1from __future__ import annotations
2
3import operator
4from operator import (
5 le,
6 lt,
7)
8import textwrap
9from typing import (
10 TYPE_CHECKING,
11 Iterator,
12 Literal,
13 Sequence,
14 TypeVar,
15 Union,
16 cast,
17 overload,
18)
19
20import numpy as np
21
22from pandas._config import get_option
23
24from pandas._libs import lib
25from pandas._libs.interval import (
26 VALID_CLOSED,
27 Interval,
28 IntervalMixin,
29 intervals_to_interval_bounds,
30)
31from pandas._libs.missing import NA
32from pandas._typing import (
33 ArrayLike,
34 AxisInt,
35 Dtype,
36 IntervalClosedType,
37 NpDtype,
38 PositionalIndexer,
39 ScalarIndexer,
40 SequenceIndexer,
41 SortKind,
42 TimeArrayLike,
43 npt,
44)
45from pandas.compat.numpy import function as nv
46from pandas.errors import IntCastingNaNError
47from pandas.util._decorators import Appender
48
49from pandas.core.dtypes.cast import (
50 LossySetitemError,
51 maybe_upcast_numeric_to_64bit,
52)
53from pandas.core.dtypes.common import (
54 is_categorical_dtype,
55 is_dtype_equal,
56 is_float_dtype,
57 is_integer_dtype,
58 is_interval_dtype,
59 is_list_like,
60 is_object_dtype,
61 is_scalar,
62 is_string_dtype,
63 needs_i8_conversion,
64 pandas_dtype,
65)
66from pandas.core.dtypes.dtypes import IntervalDtype
67from pandas.core.dtypes.generic import (
68 ABCDataFrame,
69 ABCDatetimeIndex,
70 ABCIntervalIndex,
71 ABCPeriodIndex,
72)
73from pandas.core.dtypes.missing import (
74 is_valid_na_for_dtype,
75 isna,
76 notna,
77)
78
79from pandas.core.algorithms import (
80 isin,
81 take,
82 unique,
83 value_counts,
84)
85from pandas.core.arrays.base import (
86 ExtensionArray,
87 _extension_array_shared_docs,
88)
89from pandas.core.arrays.datetimes import DatetimeArray
90from pandas.core.arrays.timedeltas import TimedeltaArray
91import pandas.core.common as com
92from pandas.core.construction import (
93 array as pd_array,
94 ensure_wrapped_if_datetimelike,
95 extract_array,
96)
97from pandas.core.indexers import check_array_indexer
98from pandas.core.ops import (
99 invalid_comparison,
100 unpack_zerodim_and_defer,
101)
102
103if TYPE_CHECKING:
104 from pandas import (
105 Index,
106 Series,
107 )
108
109
110IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")
111IntervalSideT = Union[TimeArrayLike, np.ndarray]
112IntervalOrNA = Union[Interval, float]
113
114_interval_shared_docs: dict[str, str] = {}
115
116_shared_docs_kwargs = {
117 "klass": "IntervalArray",
118 "qualname": "arrays.IntervalArray",
119 "name": "",
120}
121
122
123_interval_shared_docs[
124 "class"
125] = """
126%(summary)s
127
128.. versionadded:: %(versionadded)s
129
130Parameters
131----------
132data : array-like (1-dimensional)
133 Array-like (ndarray, :class:`DateTimeArray`, :class:`TimeDeltaArray`) containing
134 Interval objects from which to build the %(klass)s.
135closed : {'left', 'right', 'both', 'neither'}, default 'right'
136 Whether the intervals are closed on the left-side, right-side, both or
137 neither.
138dtype : dtype or None, default None
139 If None, dtype will be inferred.
140copy : bool, default False
141 Copy the input data.
142%(name)s\
143verify_integrity : bool, default True
144 Verify that the %(klass)s is valid.
145
146Attributes
147----------
148left
149right
150closed
151mid
152length
153is_empty
154is_non_overlapping_monotonic
155%(extra_attributes)s\
156
157Methods
158-------
159from_arrays
160from_tuples
161from_breaks
162contains
163overlaps
164set_closed
165to_tuples
166%(extra_methods)s\
167
168See Also
169--------
170Index : The base pandas Index type.
171Interval : A bounded slice-like interval; the elements of an %(klass)s.
172interval_range : Function to create a fixed frequency IntervalIndex.
173cut : Bin values into discrete Intervals.
174qcut : Bin values into equal-sized Intervals based on rank or sample quantiles.
175
176Notes
177-----
178See the `user guide
179<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`__
180for more.
181
182%(examples)s\
183"""
184
185
186@Appender(
187 _interval_shared_docs["class"]
188 % {
189 "klass": "IntervalArray",
190 "summary": "Pandas array for interval data that are closed on the same side.",
191 "versionadded": "0.24.0",
192 "name": "",
193 "extra_attributes": "",
194 "extra_methods": "",
195 "examples": textwrap.dedent(
196 """\
197 Examples
198 --------
199 A new ``IntervalArray`` can be constructed directly from an array-like of
200 ``Interval`` objects:
201
202 >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
203 <IntervalArray>
204 [(0, 1], (1, 5]]
205 Length: 2, dtype: interval[int64, right]
206
207 It may also be constructed using one of the constructor
208 methods: :meth:`IntervalArray.from_arrays`,
209 :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`.
210 """
211 ),
212 }
213)
214class IntervalArray(IntervalMixin, ExtensionArray):
215 can_hold_na = True
216 _na_value = _fill_value = np.nan
217
218 @property
219 def ndim(self) -> Literal[1]:
220 return 1
221
222 # To make mypy recognize the fields
223 _left: IntervalSideT
224 _right: IntervalSideT
225 _dtype: IntervalDtype
226
227 # ---------------------------------------------------------------------
228 # Constructors
229
230 def __new__(
231 cls: type[IntervalArrayT],
232 data,
233 closed=None,
234 dtype: Dtype | None = None,
235 copy: bool = False,
236 verify_integrity: bool = True,
237 ):
238 data = extract_array(data, extract_numpy=True)
239
240 if isinstance(data, cls):
241 left: IntervalSideT = data._left
242 right: IntervalSideT = data._right
243 closed = closed or data.closed
244 dtype = IntervalDtype(left.dtype, closed=closed)
245 else:
246 # don't allow scalars
247 if is_scalar(data):
248 msg = (
249 f"{cls.__name__}(...) must be called with a collection "
250 f"of some kind, {data} was passed"
251 )
252 raise TypeError(msg)
253
254 # might need to convert empty or purely na data
255 data = _maybe_convert_platform_interval(data)
256 left, right, infer_closed = intervals_to_interval_bounds(
257 data, validate_closed=closed is None
258 )
259 if left.dtype == object:
260 left = lib.maybe_convert_objects(left)
261 right = lib.maybe_convert_objects(right)
262 closed = closed or infer_closed
263
264 left, right, dtype = cls._ensure_simple_new_inputs(
265 left,
266 right,
267 closed=closed,
268 copy=copy,
269 dtype=dtype,
270 )
271
272 if verify_integrity:
273 cls._validate(left, right, dtype=dtype)
274
275 return cls._simple_new(
276 left,
277 right,
278 dtype=dtype,
279 )
280
281 @classmethod
282 def _simple_new(
283 cls: type[IntervalArrayT],
284 left: IntervalSideT,
285 right: IntervalSideT,
286 dtype: IntervalDtype,
287 ) -> IntervalArrayT:
288 result = IntervalMixin.__new__(cls)
289 result._left = left
290 result._right = right
291 result._dtype = dtype
292
293 return result
294
295 @classmethod
296 def _ensure_simple_new_inputs(
297 cls,
298 left,
299 right,
300 closed: IntervalClosedType | None = None,
301 copy: bool = False,
302 dtype: Dtype | None = None,
303 ) -> tuple[IntervalSideT, IntervalSideT, IntervalDtype]:
304 """Ensure correctness of input parameters for cls._simple_new."""
305 from pandas.core.indexes.base import ensure_index
306
307 left = ensure_index(left, copy=copy)
308 left = maybe_upcast_numeric_to_64bit(left)
309
310 right = ensure_index(right, copy=copy)
311 right = maybe_upcast_numeric_to_64bit(right)
312
313 if closed is None and isinstance(dtype, IntervalDtype):
314 closed = dtype.closed
315
316 closed = closed or "right"
317
318 if dtype is not None:
319 # GH 19262: dtype must be an IntervalDtype to override inferred
320 dtype = pandas_dtype(dtype)
321 if is_interval_dtype(dtype):
322 dtype = cast(IntervalDtype, dtype)
323 if dtype.subtype is not None:
324 left = left.astype(dtype.subtype)
325 right = right.astype(dtype.subtype)
326 else:
327 msg = f"dtype must be an IntervalDtype, got {dtype}"
328 raise TypeError(msg)
329
330 if dtype.closed is None:
331 # possibly loading an old pickle
332 dtype = IntervalDtype(dtype.subtype, closed)
333 elif closed != dtype.closed:
334 raise ValueError("closed keyword does not match dtype.closed")
335
336 # coerce dtypes to match if needed
337 if is_float_dtype(left) and is_integer_dtype(right):
338 right = right.astype(left.dtype)
339 elif is_float_dtype(right) and is_integer_dtype(left):
340 left = left.astype(right.dtype)
341
342 if type(left) != type(right):
343 msg = (
344 f"must not have differing left [{type(left).__name__}] and "
345 f"right [{type(right).__name__}] types"
346 )
347 raise ValueError(msg)
348 if is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
349 # GH 19016
350 msg = (
351 "category, object, and string subtypes are not supported "
352 "for IntervalArray"
353 )
354 raise TypeError(msg)
355 if isinstance(left, ABCPeriodIndex):
356 msg = "Period dtypes are not supported, use a PeriodIndex instead"
357 raise ValueError(msg)
358 if isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz):
359 msg = (
360 "left and right must have the same time zone, got "
361 f"'{left.tz}' and '{right.tz}'"
362 )
363 raise ValueError(msg)
364
365 # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray
366 left = ensure_wrapped_if_datetimelike(left)
367 left = extract_array(left, extract_numpy=True)
368 right = ensure_wrapped_if_datetimelike(right)
369 right = extract_array(right, extract_numpy=True)
370
371 lbase = getattr(left, "_ndarray", left).base
372 rbase = getattr(right, "_ndarray", right).base
373 if lbase is not None and lbase is rbase:
374 # If these share data, then setitem could corrupt our IA
375 right = right.copy()
376
377 dtype = IntervalDtype(left.dtype, closed=closed)
378
379 return left, right, dtype
380
381 @classmethod
382 def _from_sequence(
383 cls: type[IntervalArrayT],
384 scalars,
385 *,
386 dtype: Dtype | None = None,
387 copy: bool = False,
388 ) -> IntervalArrayT:
389 return cls(scalars, dtype=dtype, copy=copy)
390
391 @classmethod
392 def _from_factorized(
393 cls: type[IntervalArrayT], values: np.ndarray, original: IntervalArrayT
394 ) -> IntervalArrayT:
395 if len(values) == 0:
396 # An empty array returns object-dtype here. We can't create
397 # a new IA from an (empty) object-dtype array, so turn it into the
398 # correct dtype.
399 values = values.astype(original.dtype.subtype)
400 return cls(values, closed=original.closed)
401
402 _interval_shared_docs["from_breaks"] = textwrap.dedent(
403 """
404 Construct an %(klass)s from an array of splits.
405
406 Parameters
407 ----------
408 breaks : array-like (1-dimensional)
409 Left and right bounds for each interval.
410 closed : {'left', 'right', 'both', 'neither'}, default 'right'
411 Whether the intervals are closed on the left-side, right-side, both
412 or neither.\
413 %(name)s
414 copy : bool, default False
415 Copy the data.
416 dtype : dtype or None, default None
417 If None, dtype will be inferred.
418
419 Returns
420 -------
421 %(klass)s
422
423 See Also
424 --------
425 interval_range : Function to create a fixed frequency IntervalIndex.
426 %(klass)s.from_arrays : Construct from a left and right array.
427 %(klass)s.from_tuples : Construct from a sequence of tuples.
428
429 %(examples)s\
430 """
431 )
432
433 @classmethod
434 @Appender(
435 _interval_shared_docs["from_breaks"]
436 % {
437 "klass": "IntervalArray",
438 "name": "",
439 "examples": textwrap.dedent(
440 """\
441 Examples
442 --------
443 >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3])
444 <IntervalArray>
445 [(0, 1], (1, 2], (2, 3]]
446 Length: 3, dtype: interval[int64, right]
447 """
448 ),
449 }
450 )
451 def from_breaks(
452 cls: type[IntervalArrayT],
453 breaks,
454 closed: IntervalClosedType | None = "right",
455 copy: bool = False,
456 dtype: Dtype | None = None,
457 ) -> IntervalArrayT:
458 breaks = _maybe_convert_platform_interval(breaks)
459
460 return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype)
461
462 _interval_shared_docs["from_arrays"] = textwrap.dedent(
463 """
464 Construct from two arrays defining the left and right bounds.
465
466 Parameters
467 ----------
468 left : array-like (1-dimensional)
469 Left bounds for each interval.
470 right : array-like (1-dimensional)
471 Right bounds for each interval.
472 closed : {'left', 'right', 'both', 'neither'}, default 'right'
473 Whether the intervals are closed on the left-side, right-side, both
474 or neither.\
475 %(name)s
476 copy : bool, default False
477 Copy the data.
478 dtype : dtype, optional
479 If None, dtype will be inferred.
480
481 Returns
482 -------
483 %(klass)s
484
485 Raises
486 ------
487 ValueError
488 When a value is missing in only one of `left` or `right`.
489 When a value in `left` is greater than the corresponding value
490 in `right`.
491
492 See Also
493 --------
494 interval_range : Function to create a fixed frequency IntervalIndex.
495 %(klass)s.from_breaks : Construct an %(klass)s from an array of
496 splits.
497 %(klass)s.from_tuples : Construct an %(klass)s from an
498 array-like of tuples.
499
500 Notes
501 -----
502 Each element of `left` must be less than or equal to the `right`
503 element at the same position. If an element is missing, it must be
504 missing in both `left` and `right`. A TypeError is raised when
505 using an unsupported type for `left` or `right`. At the moment,
506 'category', 'object', and 'string' subtypes are not supported.
507
508 %(examples)s\
509 """
510 )
511
512 @classmethod
513 @Appender(
514 _interval_shared_docs["from_arrays"]
515 % {
516 "klass": "IntervalArray",
517 "name": "",
518 "examples": textwrap.dedent(
519 """\
520 >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])
521 <IntervalArray>
522 [(0, 1], (1, 2], (2, 3]]
523 Length: 3, dtype: interval[int64, right]
524 """
525 ),
526 }
527 )
528 def from_arrays(
529 cls: type[IntervalArrayT],
530 left,
531 right,
532 closed: IntervalClosedType | None = "right",
533 copy: bool = False,
534 dtype: Dtype | None = None,
535 ) -> IntervalArrayT:
536 left = _maybe_convert_platform_interval(left)
537 right = _maybe_convert_platform_interval(right)
538
539 left, right, dtype = cls._ensure_simple_new_inputs(
540 left,
541 right,
542 closed=closed,
543 copy=copy,
544 dtype=dtype,
545 )
546 cls._validate(left, right, dtype=dtype)
547
548 return cls._simple_new(left, right, dtype=dtype)
549
550 _interval_shared_docs["from_tuples"] = textwrap.dedent(
551 """
552 Construct an %(klass)s from an array-like of tuples.
553
554 Parameters
555 ----------
556 data : array-like (1-dimensional)
557 Array of tuples.
558 closed : {'left', 'right', 'both', 'neither'}, default 'right'
559 Whether the intervals are closed on the left-side, right-side, both
560 or neither.\
561 %(name)s
562 copy : bool, default False
563 By-default copy the data, this is compat only and ignored.
564 dtype : dtype or None, default None
565 If None, dtype will be inferred.
566
567 Returns
568 -------
569 %(klass)s
570
571 See Also
572 --------
573 interval_range : Function to create a fixed frequency IntervalIndex.
574 %(klass)s.from_arrays : Construct an %(klass)s from a left and
575 right array.
576 %(klass)s.from_breaks : Construct an %(klass)s from an array of
577 splits.
578
579 %(examples)s\
580 """
581 )
582
583 @classmethod
584 @Appender(
585 _interval_shared_docs["from_tuples"]
586 % {
587 "klass": "IntervalArray",
588 "name": "",
589 "examples": textwrap.dedent(
590 """\
591 Examples
592 --------
593 >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
594 <IntervalArray>
595 [(0, 1], (1, 2]]
596 Length: 2, dtype: interval[int64, right]
597 """
598 ),
599 }
600 )
601 def from_tuples(
602 cls: type[IntervalArrayT],
603 data,
604 closed: IntervalClosedType | None = "right",
605 copy: bool = False,
606 dtype: Dtype | None = None,
607 ) -> IntervalArrayT:
608 if len(data):
609 left, right = [], []
610 else:
611 # ensure that empty data keeps input dtype
612 left = right = data
613
614 for d in data:
615 if not isinstance(d, tuple) and isna(d):
616 lhs = rhs = np.nan
617 else:
618 name = cls.__name__
619 try:
620 # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...]
621 lhs, rhs = d
622 except ValueError as err:
623 msg = f"{name}.from_tuples requires tuples of length 2, got {d}"
624 raise ValueError(msg) from err
625 except TypeError as err:
626 msg = f"{name}.from_tuples received an invalid item, {d}"
627 raise TypeError(msg) from err
628 left.append(lhs)
629 right.append(rhs)
630
631 return cls.from_arrays(left, right, closed, copy=False, dtype=dtype)
632
633 @classmethod
634 def _validate(cls, left, right, dtype: IntervalDtype) -> None:
635 """
636 Verify that the IntervalArray is valid.
637
638 Checks that
639
640 * dtype is correct
641 * left and right match lengths
642 * left and right have the same missing values
643 * left is always below right
644 """
645 if not isinstance(dtype, IntervalDtype):
646 msg = f"invalid dtype: {dtype}"
647 raise ValueError(msg)
648 if len(left) != len(right):
649 msg = "left and right must have the same length"
650 raise ValueError(msg)
651 left_mask = notna(left)
652 right_mask = notna(right)
653 if not (left_mask == right_mask).all():
654 msg = (
655 "missing values must be missing in the same "
656 "location both left and right sides"
657 )
658 raise ValueError(msg)
659 if not (left[left_mask] <= right[left_mask]).all():
660 msg = "left side of interval must be <= right side"
661 raise ValueError(msg)
662
663 def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT:
664 """
665 Return a new IntervalArray with the replacement attributes
666
667 Parameters
668 ----------
669 left : Index
670 Values to be used for the left-side of the intervals.
671 right : Index
672 Values to be used for the right-side of the intervals.
673 """
674 dtype = IntervalDtype(left.dtype, closed=self.closed)
675 left, right, dtype = self._ensure_simple_new_inputs(left, right, dtype=dtype)
676
677 return self._simple_new(left, right, dtype=dtype)
678
679 # ---------------------------------------------------------------------
680 # Descriptive
681
682 @property
683 def dtype(self) -> IntervalDtype:
684 return self._dtype
685
686 @property
687 def nbytes(self) -> int:
688 return self.left.nbytes + self.right.nbytes
689
690 @property
691 def size(self) -> int:
692 # Avoid materializing self.values
693 return self.left.size
694
695 # ---------------------------------------------------------------------
696 # EA Interface
697
698 def __iter__(self) -> Iterator:
699 return iter(np.asarray(self))
700
701 def __len__(self) -> int:
702 return len(self._left)
703
704 @overload
705 def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA:
706 ...
707
708 @overload
709 def __getitem__(self: IntervalArrayT, key: SequenceIndexer) -> IntervalArrayT:
710 ...
711
712 def __getitem__(
713 self: IntervalArrayT, key: PositionalIndexer
714 ) -> IntervalArrayT | IntervalOrNA:
715 key = check_array_indexer(self, key)
716 left = self._left[key]
717 right = self._right[key]
718
719 if not isinstance(left, (np.ndarray, ExtensionArray)):
720 # scalar
721 if is_scalar(left) and isna(left):
722 return self._fill_value
723 return Interval(left, right, self.closed)
724 if np.ndim(left) > 1:
725 # GH#30588 multi-dimensional indexer disallowed
726 raise ValueError("multi-dimensional indexing not allowed")
727 # Argument 2 to "_simple_new" of "IntervalArray" has incompatible type
728 # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray,
729 # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray],
730 # ndarray[Any, Any]]"
731 return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type]
732
733 def __setitem__(self, key, value) -> None:
734 value_left, value_right = self._validate_setitem_value(value)
735 key = check_array_indexer(self, key)
736
737 self._left[key] = value_left
738 self._right[key] = value_right
739
740 def _cmp_method(self, other, op):
741 # ensure pandas array for list-like and eliminate non-interval scalars
742 if is_list_like(other):
743 if len(self) != len(other):
744 raise ValueError("Lengths must match to compare")
745 other = pd_array(other)
746 elif not isinstance(other, Interval):
747 # non-interval scalar -> no matches
748 if other is NA:
749 # GH#31882
750 from pandas.core.arrays import BooleanArray
751
752 arr = np.empty(self.shape, dtype=bool)
753 mask = np.ones(self.shape, dtype=bool)
754 return BooleanArray(arr, mask)
755 return invalid_comparison(self, other, op)
756
757 # determine the dtype of the elements we want to compare
758 if isinstance(other, Interval):
759 other_dtype = pandas_dtype("interval")
760 elif not is_categorical_dtype(other.dtype):
761 other_dtype = other.dtype
762 else:
763 # for categorical defer to categories for dtype
764 other_dtype = other.categories.dtype
765
766 # extract intervals if we have interval categories with matching closed
767 if is_interval_dtype(other_dtype):
768 if self.closed != other.categories.closed:
769 return invalid_comparison(self, other, op)
770
771 other = other.categories.take(
772 other.codes, allow_fill=True, fill_value=other.categories._na_value
773 )
774
775 # interval-like -> need same closed and matching endpoints
776 if is_interval_dtype(other_dtype):
777 if self.closed != other.closed:
778 return invalid_comparison(self, other, op)
779 elif not isinstance(other, Interval):
780 other = type(self)(other)
781
782 if op is operator.eq:
783 return (self._left == other.left) & (self._right == other.right)
784 elif op is operator.ne:
785 return (self._left != other.left) | (self._right != other.right)
786 elif op is operator.gt:
787 return (self._left > other.left) | (
788 (self._left == other.left) & (self._right > other.right)
789 )
790 elif op is operator.ge:
791 return (self == other) | (self > other)
792 elif op is operator.lt:
793 return (self._left < other.left) | (
794 (self._left == other.left) & (self._right < other.right)
795 )
796 else:
797 # operator.lt
798 return (self == other) | (self < other)
799
800 # non-interval/non-object dtype -> no matches
801 if not is_object_dtype(other_dtype):
802 return invalid_comparison(self, other, op)
803
804 # object dtype -> iteratively check for intervals
805 result = np.zeros(len(self), dtype=bool)
806 for i, obj in enumerate(other):
807 try:
808 result[i] = op(self[i], obj)
809 except TypeError:
810 if obj is NA:
811 # comparison with np.nan returns NA
812 # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092
813 result = result.astype(object)
814 result[i] = NA
815 else:
816 raise
817 return result
818
819 @unpack_zerodim_and_defer("__eq__")
820 def __eq__(self, other):
821 return self._cmp_method(other, operator.eq)
822
823 @unpack_zerodim_and_defer("__ne__")
824 def __ne__(self, other):
825 return self._cmp_method(other, operator.ne)
826
827 @unpack_zerodim_and_defer("__gt__")
828 def __gt__(self, other):
829 return self._cmp_method(other, operator.gt)
830
831 @unpack_zerodim_and_defer("__ge__")
832 def __ge__(self, other):
833 return self._cmp_method(other, operator.ge)
834
835 @unpack_zerodim_and_defer("__lt__")
836 def __lt__(self, other):
837 return self._cmp_method(other, operator.lt)
838
839 @unpack_zerodim_and_defer("__le__")
840 def __le__(self, other):
841 return self._cmp_method(other, operator.le)
842
843 def argsort(
844 self,
845 *,
846 ascending: bool = True,
847 kind: SortKind = "quicksort",
848 na_position: str = "last",
849 **kwargs,
850 ) -> np.ndarray:
851 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs)
852
853 if ascending and kind == "quicksort" and na_position == "last":
854 # TODO: in an IntervalIndex we can re-use the cached
855 # IntervalTree.left_sorter
856 return np.lexsort((self.right, self.left))
857
858 # TODO: other cases we can use lexsort for? much more performant.
859 return super().argsort(
860 ascending=ascending, kind=kind, na_position=na_position, **kwargs
861 )
862
863 def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:
864 nv.validate_minmax_axis(axis, self.ndim)
865
866 if not len(self):
867 return self._na_value
868
869 mask = self.isna()
870 if mask.any():
871 if not skipna:
872 return self._na_value
873 obj = self[~mask]
874 else:
875 obj = self
876
877 indexer = obj.argsort()[0]
878 return obj[indexer]
879
880 def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:
881 nv.validate_minmax_axis(axis, self.ndim)
882
883 if not len(self):
884 return self._na_value
885
886 mask = self.isna()
887 if mask.any():
888 if not skipna:
889 return self._na_value
890 obj = self[~mask]
891 else:
892 obj = self
893
894 indexer = obj.argsort()[-1]
895 return obj[indexer]
896
897 def fillna(
898 self: IntervalArrayT, value=None, method=None, limit=None
899 ) -> IntervalArrayT:
900 """
901 Fill NA/NaN values using the specified method.
902
903 Parameters
904 ----------
905 value : scalar, dict, Series
906 If a scalar value is passed it is used to fill all missing values.
907 Alternatively, a Series or dict can be used to fill in different
908 values for each index. The value should not be a list. The
909 value(s) passed should be either Interval objects or NA/NaN.
910 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
911 (Not implemented yet for IntervalArray)
912 Method to use for filling holes in reindexed Series
913 limit : int, default None
914 (Not implemented yet for IntervalArray)
915 If method is specified, this is the maximum number of consecutive
916 NaN values to forward/backward fill. In other words, if there is
917 a gap with more than this number of consecutive NaNs, it will only
918 be partially filled. If method is not specified, this is the
919 maximum number of entries along the entire axis where NaNs will be
920 filled.
921
922 Returns
923 -------
924 filled : IntervalArray with NA/NaN filled
925 """
926 if method is not None:
927 raise TypeError("Filling by method is not supported for IntervalArray.")
928 if limit is not None:
929 raise TypeError("limit is not supported for IntervalArray.")
930
931 value_left, value_right = self._validate_scalar(value)
932
933 left = self.left.fillna(value=value_left)
934 right = self.right.fillna(value=value_right)
935 return self._shallow_copy(left, right)
936
937 def astype(self, dtype, copy: bool = True):
938 """
939 Cast to an ExtensionArray or NumPy array with dtype 'dtype'.
940
941 Parameters
942 ----------
943 dtype : str or dtype
944 Typecode or data-type to which the array is cast.
945
946 copy : bool, default True
947 Whether to copy the data, even if not necessary. If False,
948 a copy is made only if the old dtype does not match the
949 new dtype.
950
951 Returns
952 -------
953 array : ExtensionArray or ndarray
954 ExtensionArray or NumPy ndarray with 'dtype' for its dtype.
955 """
956 from pandas import Index
957
958 if dtype is not None:
959 dtype = pandas_dtype(dtype)
960
961 if is_interval_dtype(dtype):
962 if dtype == self.dtype:
963 return self.copy() if copy else self
964
965 if is_float_dtype(self.dtype.subtype) and needs_i8_conversion(
966 dtype.subtype
967 ):
968 # This is allowed on the Index.astype but we disallow it here
969 msg = (
970 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"
971 )
972 raise TypeError(msg)
973
974 # need to cast to different subtype
975 try:
976 # We need to use Index rules for astype to prevent casting
977 # np.nan entries to int subtypes
978 new_left = Index(self._left, copy=False).astype(dtype.subtype)
979 new_right = Index(self._right, copy=False).astype(dtype.subtype)
980 except IntCastingNaNError:
981 # e.g test_subtype_integer
982 raise
983 except (TypeError, ValueError) as err:
984 # e.g. test_subtype_integer_errors f8->u8 can be lossy
985 # and raises ValueError
986 msg = (
987 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"
988 )
989 raise TypeError(msg) from err
990 return self._shallow_copy(new_left, new_right)
991 else:
992 try:
993 return super().astype(dtype, copy=copy)
994 except (TypeError, ValueError) as err:
995 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
996 raise TypeError(msg) from err
997
998 def equals(self, other) -> bool:
999 if type(self) != type(other):
1000 return False
1001
1002 return bool(
1003 self.closed == other.closed
1004 and self.left.equals(other.left)
1005 and self.right.equals(other.right)
1006 )
1007
1008 @classmethod
1009 def _concat_same_type(
1010 cls: type[IntervalArrayT], to_concat: Sequence[IntervalArrayT]
1011 ) -> IntervalArrayT:
1012 """
1013 Concatenate multiple IntervalArray
1014
1015 Parameters
1016 ----------
1017 to_concat : sequence of IntervalArray
1018
1019 Returns
1020 -------
1021 IntervalArray
1022 """
1023 closed_set = {interval.closed for interval in to_concat}
1024 if len(closed_set) != 1:
1025 raise ValueError("Intervals must all be closed on the same side.")
1026 closed = closed_set.pop()
1027
1028 left = np.concatenate([interval.left for interval in to_concat])
1029 right = np.concatenate([interval.right for interval in to_concat])
1030
1031 left, right, dtype = cls._ensure_simple_new_inputs(left, right, closed=closed)
1032
1033 return cls._simple_new(left, right, dtype=dtype)
1034
1035 def copy(self: IntervalArrayT) -> IntervalArrayT:
1036 """
1037 Return a copy of the array.
1038
1039 Returns
1040 -------
1041 IntervalArray
1042 """
1043 left = self._left.copy()
1044 right = self._right.copy()
1045 dtype = self.dtype
1046 return self._simple_new(left, right, dtype=dtype)
1047
1048 def isna(self) -> np.ndarray:
1049 return isna(self._left)
1050
1051 def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:
1052 if not len(self) or periods == 0:
1053 return self.copy()
1054
1055 self._validate_scalar(fill_value)
1056
1057 # ExtensionArray.shift doesn't work for two reasons
1058 # 1. IntervalArray.dtype.na_value may not be correct for the dtype.
1059 # 2. IntervalArray._from_sequence only accepts NaN for missing values,
1060 # not other values like NaT
1061
1062 empty_len = min(abs(periods), len(self))
1063 if isna(fill_value):
1064 from pandas import Index
1065
1066 fill_value = Index(self._left, copy=False)._na_value
1067 empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
1068 else:
1069 empty = self._from_sequence([fill_value] * empty_len)
1070
1071 if periods > 0:
1072 a = empty
1073 b = self[:-periods]
1074 else:
1075 a = self[abs(periods) :]
1076 b = empty
1077 return self._concat_same_type([a, b])
1078
1079 def take(
1080 self: IntervalArrayT,
1081 indices,
1082 *,
1083 allow_fill: bool = False,
1084 fill_value=None,
1085 axis=None,
1086 **kwargs,
1087 ) -> IntervalArrayT:
1088 """
1089 Take elements from the IntervalArray.
1090
1091 Parameters
1092 ----------
1093 indices : sequence of integers
1094 Indices to be taken.
1095
1096 allow_fill : bool, default False
1097 How to handle negative values in `indices`.
1098
1099 * False: negative values in `indices` indicate positional indices
1100 from the right (the default). This is similar to
1101 :func:`numpy.take`.
1102
1103 * True: negative values in `indices` indicate
1104 missing values. These values are set to `fill_value`. Any other
1105 other negative values raise a ``ValueError``.
1106
1107 fill_value : Interval or NA, optional
1108 Fill value to use for NA-indices when `allow_fill` is True.
1109 This may be ``None``, in which case the default NA value for
1110 the type, ``self.dtype.na_value``, is used.
1111
1112 For many ExtensionArrays, there will be two representations of
1113 `fill_value`: a user-facing "boxed" scalar, and a low-level
1114 physical NA value. `fill_value` should be the user-facing version,
1115 and the implementation should handle translating that to the
1116 physical version for processing the take if necessary.
1117
1118 axis : any, default None
1119 Present for compat with IntervalIndex; does nothing.
1120
1121 Returns
1122 -------
1123 IntervalArray
1124
1125 Raises
1126 ------
1127 IndexError
1128 When the indices are out of bounds for the array.
1129 ValueError
1130 When `indices` contains negative values other than ``-1``
1131 and `allow_fill` is True.
1132 """
1133 nv.validate_take((), kwargs)
1134
1135 fill_left = fill_right = fill_value
1136 if allow_fill:
1137 fill_left, fill_right = self._validate_scalar(fill_value)
1138
1139 left_take = take(
1140 self._left, indices, allow_fill=allow_fill, fill_value=fill_left
1141 )
1142 right_take = take(
1143 self._right, indices, allow_fill=allow_fill, fill_value=fill_right
1144 )
1145
1146 return self._shallow_copy(left_take, right_take)
1147
1148 def _validate_listlike(self, value):
1149 # list-like of intervals
1150 try:
1151 array = IntervalArray(value)
1152 self._check_closed_matches(array, name="value")
1153 value_left, value_right = array.left, array.right
1154 except TypeError as err:
1155 # wrong type: not interval or NA
1156 msg = f"'value' should be an interval type, got {type(value)} instead."
1157 raise TypeError(msg) from err
1158
1159 try:
1160 self.left._validate_fill_value(value_left)
1161 except (LossySetitemError, TypeError) as err:
1162 msg = (
1163 "'value' should be a compatible interval type, "
1164 f"got {type(value)} instead."
1165 )
1166 raise TypeError(msg) from err
1167
1168 return value_left, value_right
1169
1170 def _validate_scalar(self, value):
1171 if isinstance(value, Interval):
1172 self._check_closed_matches(value, name="value")
1173 left, right = value.left, value.right
1174 # TODO: check subdtype match like _validate_setitem_value?
1175 elif is_valid_na_for_dtype(value, self.left.dtype):
1176 # GH#18295
1177 left = right = self.left._na_value
1178 else:
1179 raise TypeError(
1180 "can only insert Interval objects and NA into an IntervalArray"
1181 )
1182 return left, right
1183
1184 def _validate_setitem_value(self, value):
1185 if is_valid_na_for_dtype(value, self.left.dtype):
1186 # na value: need special casing to set directly on numpy arrays
1187 value = self.left._na_value
1188 if is_integer_dtype(self.dtype.subtype):
1189 # can't set NaN on a numpy integer array
1190 # GH#45484 TypeError, not ValueError, matches what we get with
1191 # non-NA un-holdable value.
1192 raise TypeError("Cannot set float NaN to integer-backed IntervalArray")
1193 value_left, value_right = value, value
1194
1195 elif isinstance(value, Interval):
1196 # scalar interval
1197 self._check_closed_matches(value, name="value")
1198 value_left, value_right = value.left, value.right
1199 self.left._validate_fill_value(value_left)
1200 self.left._validate_fill_value(value_right)
1201
1202 else:
1203 return self._validate_listlike(value)
1204
1205 return value_left, value_right
1206
1207 def value_counts(self, dropna: bool = True) -> Series:
1208 """
1209 Returns a Series containing counts of each interval.
1210
1211 Parameters
1212 ----------
1213 dropna : bool, default True
1214 Don't include counts of NaN.
1215
1216 Returns
1217 -------
1218 counts : Series
1219
1220 See Also
1221 --------
1222 Series.value_counts
1223 """
1224 # TODO: implement this is a non-naive way!
1225 return value_counts(np.asarray(self), dropna=dropna)
1226
1227 # ---------------------------------------------------------------------
1228 # Rendering Methods
1229
1230 def _format_data(self) -> str:
1231 # TODO: integrate with categorical and make generic
1232 # name argument is unused here; just for compat with base / categorical
1233 n = len(self)
1234 max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10)
1235
1236 formatter = str
1237
1238 if n == 0:
1239 summary = "[]"
1240 elif n == 1:
1241 first = formatter(self[0])
1242 summary = f"[{first}]"
1243 elif n == 2:
1244 first = formatter(self[0])
1245 last = formatter(self[-1])
1246 summary = f"[{first}, {last}]"
1247 else:
1248 if n > max_seq_items:
1249 n = min(max_seq_items // 2, 10)
1250 head = [formatter(x) for x in self[:n]]
1251 tail = [formatter(x) for x in self[-n:]]
1252 head_str = ", ".join(head)
1253 tail_str = ", ".join(tail)
1254 summary = f"[{head_str} ... {tail_str}]"
1255 else:
1256 tail = [formatter(x) for x in self]
1257 tail_str = ", ".join(tail)
1258 summary = f"[{tail_str}]"
1259
1260 return summary
1261
1262 def __repr__(self) -> str:
1263 # the short repr has no trailing newline, while the truncated
1264 # repr does. So we include a newline in our template, and strip
1265 # any trailing newlines from format_object_summary
1266 data = self._format_data()
1267 class_name = f"<{type(self).__name__}>\n"
1268
1269 template = f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
1270 return template
1271
1272 def _format_space(self) -> str:
1273 space = " " * (len(type(self).__name__) + 1)
1274 return f"\n{space}"
1275
1276 # ---------------------------------------------------------------------
1277 # Vectorized Interval Properties/Attributes
1278
1279 @property
1280 def left(self):
1281 """
1282 Return the left endpoints of each Interval in the IntervalArray as an Index.
1283 """
1284 from pandas import Index
1285
1286 return Index(self._left, copy=False)
1287
1288 @property
1289 def right(self):
1290 """
1291 Return the right endpoints of each Interval in the IntervalArray as an Index.
1292 """
1293 from pandas import Index
1294
1295 return Index(self._right, copy=False)
1296
1297 @property
1298 def length(self) -> Index:
1299 """
1300 Return an Index with entries denoting the length of each Interval.
1301 """
1302 return self.right - self.left
1303
1304 @property
1305 def mid(self) -> Index:
1306 """
1307 Return the midpoint of each Interval in the IntervalArray as an Index.
1308 """
1309 try:
1310 return 0.5 * (self.left + self.right)
1311 except TypeError:
1312 # datetime safe version
1313 return self.left + 0.5 * self.length
1314
1315 _interval_shared_docs["overlaps"] = textwrap.dedent(
1316 """
1317 Check elementwise if an Interval overlaps the values in the %(klass)s.
1318
1319 Two intervals overlap if they share a common point, including closed
1320 endpoints. Intervals that only have an open endpoint in common do not
1321 overlap.
1322
1323 Parameters
1324 ----------
1325 other : %(klass)s
1326 Interval to check against for an overlap.
1327
1328 Returns
1329 -------
1330 ndarray
1331 Boolean array positionally indicating where an overlap occurs.
1332
1333 See Also
1334 --------
1335 Interval.overlaps : Check whether two Interval objects overlap.
1336
1337 Examples
1338 --------
1339 %(examples)s
1340 >>> intervals.overlaps(pd.Interval(0.5, 1.5))
1341 array([ True, True, False])
1342
1343 Intervals that share closed endpoints overlap:
1344
1345 >>> intervals.overlaps(pd.Interval(1, 3, closed='left'))
1346 array([ True, True, True])
1347
1348 Intervals that only have an open endpoint in common do not overlap:
1349
1350 >>> intervals.overlaps(pd.Interval(1, 2, closed='right'))
1351 array([False, True, False])
1352 """
1353 )
1354
1355 @Appender(
1356 _interval_shared_docs["overlaps"]
1357 % {
1358 "klass": "IntervalArray",
1359 "examples": textwrap.dedent(
1360 """\
1361 >>> data = [(0, 1), (1, 3), (2, 4)]
1362 >>> intervals = pd.arrays.IntervalArray.from_tuples(data)
1363 >>> intervals
1364 <IntervalArray>
1365 [(0, 1], (1, 3], (2, 4]]
1366 Length: 3, dtype: interval[int64, right]
1367 """
1368 ),
1369 }
1370 )
1371 def overlaps(self, other):
1372 if isinstance(other, (IntervalArray, ABCIntervalIndex)):
1373 raise NotImplementedError
1374 if not isinstance(other, Interval):
1375 msg = f"`other` must be Interval-like, got {type(other).__name__}"
1376 raise TypeError(msg)
1377
1378 # equality is okay if both endpoints are closed (overlap at a point)
1379 op1 = le if (self.closed_left and other.closed_right) else lt
1380 op2 = le if (other.closed_left and self.closed_right) else lt
1381
1382 # overlaps is equivalent negation of two interval being disjoint:
1383 # disjoint = (A.left > B.right) or (B.left > A.right)
1384 # (simplifying the negation allows this to be done in less operations)
1385 return op1(self.left, other.right) & op2(other.left, self.right)
1386
1387 # ---------------------------------------------------------------------
1388
1389 @property
1390 def closed(self) -> IntervalClosedType:
1391 """
1392 String describing the inclusive side the intervals.
1393
1394 Either ``left``, ``right``, ``both`` or ``neither``.
1395 """
1396 return self.dtype.closed
1397
1398 _interval_shared_docs["set_closed"] = textwrap.dedent(
1399 """
1400 Return an identical %(klass)s closed on the specified side.
1401
1402 Parameters
1403 ----------
1404 closed : {'left', 'right', 'both', 'neither'}
1405 Whether the intervals are closed on the left-side, right-side, both
1406 or neither.
1407
1408 Returns
1409 -------
1410 %(klass)s
1411
1412 %(examples)s\
1413 """
1414 )
1415
1416 @Appender(
1417 _interval_shared_docs["set_closed"]
1418 % {
1419 "klass": "IntervalArray",
1420 "examples": textwrap.dedent(
1421 """\
1422 Examples
1423 --------
1424 >>> index = pd.arrays.IntervalArray.from_breaks(range(4))
1425 >>> index
1426 <IntervalArray>
1427 [(0, 1], (1, 2], (2, 3]]
1428 Length: 3, dtype: interval[int64, right]
1429 >>> index.set_closed('both')
1430 <IntervalArray>
1431 [[0, 1], [1, 2], [2, 3]]
1432 Length: 3, dtype: interval[int64, both]
1433 """
1434 ),
1435 }
1436 )
1437 def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT:
1438 if closed not in VALID_CLOSED:
1439 msg = f"invalid option for 'closed': {closed}"
1440 raise ValueError(msg)
1441
1442 left, right = self._left, self._right
1443 dtype = IntervalDtype(left.dtype, closed=closed)
1444 return self._simple_new(left, right, dtype=dtype)
1445
1446 _interval_shared_docs[
1447 "is_non_overlapping_monotonic"
1448 ] = """
1449 Return a boolean whether the %(klass)s is non-overlapping and monotonic.
1450
1451 Non-overlapping means (no Intervals share points), and monotonic means
1452 either monotonic increasing or monotonic decreasing.
1453 """
1454
1455 @property
1456 @Appender(
1457 _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs
1458 )
1459 def is_non_overlapping_monotonic(self) -> bool:
1460 # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... )
1461 # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...)
1462 # we already require left <= right
1463
1464 # strict inequality for closed == 'both'; equality implies overlapping
1465 # at a point when both sides of intervals are included
1466 if self.closed == "both":
1467 return bool(
1468 (self._right[:-1] < self._left[1:]).all()
1469 or (self._left[:-1] > self._right[1:]).all()
1470 )
1471
1472 # non-strict inequality when closed != 'both'; at least one side is
1473 # not included in the intervals, so equality does not imply overlapping
1474 return bool(
1475 (self._right[:-1] <= self._left[1:]).all()
1476 or (self._left[:-1] >= self._right[1:]).all()
1477 )
1478
1479 # ---------------------------------------------------------------------
1480 # Conversion
1481
1482 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
1483 """
1484 Return the IntervalArray's data as a numpy array of Interval
1485 objects (with dtype='object')
1486 """
1487 left = self._left
1488 right = self._right
1489 mask = self.isna()
1490 closed = self.closed
1491
1492 result = np.empty(len(left), dtype=object)
1493 for i, left_value in enumerate(left):
1494 if mask[i]:
1495 result[i] = np.nan
1496 else:
1497 result[i] = Interval(left_value, right[i], closed)
1498 return result
1499
1500 def __arrow_array__(self, type=None):
1501 """
1502 Convert myself into a pyarrow Array.
1503 """
1504 import pyarrow
1505
1506 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
1507
1508 try:
1509 subtype = pyarrow.from_numpy_dtype(self.dtype.subtype)
1510 except TypeError as err:
1511 raise TypeError(
1512 f"Conversion to arrow with subtype '{self.dtype.subtype}' "
1513 "is not supported"
1514 ) from err
1515 interval_type = ArrowIntervalType(subtype, self.closed)
1516 storage_array = pyarrow.StructArray.from_arrays(
1517 [
1518 pyarrow.array(self._left, type=subtype, from_pandas=True),
1519 pyarrow.array(self._right, type=subtype, from_pandas=True),
1520 ],
1521 names=["left", "right"],
1522 )
1523 mask = self.isna()
1524 if mask.any():
1525 # if there are missing values, set validity bitmap also on the array level
1526 null_bitmap = pyarrow.array(~mask).buffers()[1]
1527 storage_array = pyarrow.StructArray.from_buffers(
1528 storage_array.type,
1529 len(storage_array),
1530 [null_bitmap],
1531 children=[storage_array.field(0), storage_array.field(1)],
1532 )
1533
1534 if type is not None:
1535 if type.equals(interval_type.storage_type):
1536 return storage_array
1537 elif isinstance(type, ArrowIntervalType):
1538 # ensure we have the same subtype and closed attributes
1539 if not type.equals(interval_type):
1540 raise TypeError(
1541 "Not supported to convert IntervalArray to type with "
1542 f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) "
1543 f"and 'closed' ({self.closed} vs {type.closed}) attributes"
1544 )
1545 else:
1546 raise TypeError(
1547 f"Not supported to convert IntervalArray to '{type}' type"
1548 )
1549
1550 return pyarrow.ExtensionArray.from_storage(interval_type, storage_array)
1551
1552 _interval_shared_docs[
1553 "to_tuples"
1554 ] = """
1555 Return an %(return_type)s of tuples of the form (left, right).
1556
1557 Parameters
1558 ----------
1559 na_tuple : bool, default True
1560 Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA
1561 value itself if False, ``nan``.
1562
1563 Returns
1564 -------
1565 tuples: %(return_type)s
1566 %(examples)s\
1567 """
1568
1569 @Appender(
1570 _interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""}
1571 )
1572 def to_tuples(self, na_tuple: bool = True) -> np.ndarray:
1573 tuples = com.asarray_tuplesafe(zip(self._left, self._right))
1574 if not na_tuple:
1575 # GH 18756
1576 tuples = np.where(~self.isna(), tuples, np.nan)
1577 return tuples
1578
1579 # ---------------------------------------------------------------------
1580
1581 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
1582 value_left, value_right = self._validate_setitem_value(value)
1583
1584 if isinstance(self._left, np.ndarray):
1585 np.putmask(self._left, mask, value_left)
1586 assert isinstance(self._right, np.ndarray)
1587 np.putmask(self._right, mask, value_right)
1588 else:
1589 self._left._putmask(mask, value_left)
1590 assert not isinstance(self._right, np.ndarray)
1591 self._right._putmask(mask, value_right)
1592
1593 def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT:
1594 """
1595 Return a new IntervalArray inserting new item at location. Follows
1596 Python numpy.insert semantics for negative values. Only Interval
1597 objects and NA can be inserted into an IntervalIndex
1598
1599 Parameters
1600 ----------
1601 loc : int
1602 item : Interval
1603
1604 Returns
1605 -------
1606 IntervalArray
1607 """
1608 left_insert, right_insert = self._validate_scalar(item)
1609
1610 new_left = self.left.insert(loc, left_insert)
1611 new_right = self.right.insert(loc, right_insert)
1612
1613 return self._shallow_copy(new_left, new_right)
1614
1615 def delete(self: IntervalArrayT, loc) -> IntervalArrayT:
1616 if isinstance(self._left, np.ndarray):
1617 new_left = np.delete(self._left, loc)
1618 assert isinstance(self._right, np.ndarray)
1619 new_right = np.delete(self._right, loc)
1620 else:
1621 new_left = self._left.delete(loc)
1622 assert not isinstance(self._right, np.ndarray)
1623 new_right = self._right.delete(loc)
1624 return self._shallow_copy(left=new_left, right=new_right)
1625
1626 @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs)
1627 def repeat(
1628 self: IntervalArrayT,
1629 repeats: int | Sequence[int],
1630 axis: AxisInt | None = None,
1631 ) -> IntervalArrayT:
1632 nv.validate_repeat((), {"axis": axis})
1633 left_repeat = self.left.repeat(repeats)
1634 right_repeat = self.right.repeat(repeats)
1635 return self._shallow_copy(left=left_repeat, right=right_repeat)
1636
1637 _interval_shared_docs["contains"] = textwrap.dedent(
1638 """
1639 Check elementwise if the Intervals contain the value.
1640
1641 Return a boolean mask whether the value is contained in the Intervals
1642 of the %(klass)s.
1643
1644 Parameters
1645 ----------
1646 other : scalar
1647 The value to check whether it is contained in the Intervals.
1648
1649 Returns
1650 -------
1651 boolean array
1652
1653 See Also
1654 --------
1655 Interval.contains : Check whether Interval object contains value.
1656 %(klass)s.overlaps : Check if an Interval overlaps the values in the
1657 %(klass)s.
1658
1659 Examples
1660 --------
1661 %(examples)s
1662 >>> intervals.contains(0.5)
1663 array([ True, False, False])
1664 """
1665 )
1666
1667 @Appender(
1668 _interval_shared_docs["contains"]
1669 % {
1670 "klass": "IntervalArray",
1671 "examples": textwrap.dedent(
1672 """\
1673 >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)])
1674 >>> intervals
1675 <IntervalArray>
1676 [(0, 1], (1, 3], (2, 4]]
1677 Length: 3, dtype: interval[int64, right]
1678 """
1679 ),
1680 }
1681 )
1682 def contains(self, other):
1683 if isinstance(other, Interval):
1684 raise NotImplementedError("contains not implemented for two intervals")
1685
1686 return (self._left < other if self.open_left else self._left <= other) & (
1687 other < self._right if self.open_right else other <= self._right
1688 )
1689
1690 def isin(self, values) -> npt.NDArray[np.bool_]:
1691 if not hasattr(values, "dtype"):
1692 values = np.array(values)
1693 values = extract_array(values, extract_numpy=True)
1694
1695 if is_interval_dtype(values.dtype):
1696 if self.closed != values.closed:
1697 # not comparable -> no overlap
1698 return np.zeros(self.shape, dtype=bool)
1699
1700 if is_dtype_equal(self.dtype, values.dtype):
1701 # GH#38353 instead of casting to object, operating on a
1702 # complex128 ndarray is much more performant.
1703 left = self._combined.view("complex128")
1704 right = values._combined.view("complex128")
1705 # error: Argument 1 to "in1d" has incompatible type
1706 # "Union[ExtensionArray, ndarray[Any, Any],
1707 # ndarray[Any, dtype[Any]]]"; expected
1708 # "Union[_SupportsArray[dtype[Any]],
1709 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,
1710 # int, float, complex, str, bytes, _NestedSequence[
1711 # Union[bool, int, float, complex, str, bytes]]]"
1712 return np.in1d(left, right) # type: ignore[arg-type]
1713
1714 elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(
1715 values.left.dtype
1716 ):
1717 # not comparable -> no overlap
1718 return np.zeros(self.shape, dtype=bool)
1719
1720 return isin(self.astype(object), values.astype(object))
1721
1722 @property
1723 def _combined(self) -> IntervalSideT:
1724 left = self.left._values.reshape(-1, 1)
1725 right = self.right._values.reshape(-1, 1)
1726 if needs_i8_conversion(left.dtype):
1727 comb = left._concat_same_type([left, right], axis=1)
1728 else:
1729 comb = np.concatenate([left, right], axis=1)
1730 return comb
1731
1732 def _from_combined(self, combined: np.ndarray) -> IntervalArray:
1733 """
1734 Create a new IntervalArray with our dtype from a 1D complex128 ndarray.
1735 """
1736 nc = combined.view("i8").reshape(-1, 2)
1737
1738 dtype = self._left.dtype
1739 if needs_i8_conversion(dtype):
1740 assert isinstance(self._left, (DatetimeArray, TimedeltaArray))
1741 new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)
1742 assert isinstance(self._right, (DatetimeArray, TimedeltaArray))
1743 new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)
1744 else:
1745 assert isinstance(dtype, np.dtype)
1746 new_left = nc[:, 0].view(dtype)
1747 new_right = nc[:, 1].view(dtype)
1748 return self._shallow_copy(left=new_left, right=new_right)
1749
1750 def unique(self) -> IntervalArray:
1751 # No overload variant of "__getitem__" of "ExtensionArray" matches argument
1752 # type "Tuple[slice, int]"
1753 nc = unique(
1754 self._combined.view("complex128")[:, 0] # type: ignore[call-overload]
1755 )
1756 nc = nc[:, None]
1757 return self._from_combined(nc)
1758
1759
1760def _maybe_convert_platform_interval(values) -> ArrayLike:
1761 """
1762 Try to do platform conversion, with special casing for IntervalArray.
1763 Wrapper around maybe_convert_platform that alters the default return
1764 dtype in certain cases to be compatible with IntervalArray. For example,
1765 empty lists return with integer dtype instead of object dtype, which is
1766 prohibited for IntervalArray.
1767
1768 Parameters
1769 ----------
1770 values : array-like
1771
1772 Returns
1773 -------
1774 array
1775 """
1776 if isinstance(values, (list, tuple)) and len(values) == 0:
1777 # GH 19016
1778 # empty lists/tuples get object dtype by default, but this is
1779 # prohibited for IntervalArray, so coerce to integer instead
1780 return np.array([], dtype=np.int64)
1781 elif not is_list_like(values) or isinstance(values, ABCDataFrame):
1782 # This will raise later, but we avoid passing to maybe_convert_platform
1783 return values
1784 elif is_categorical_dtype(values):
1785 values = np.asarray(values)
1786 elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)):
1787 # TODO: should we just cast these to list?
1788 return values
1789 else:
1790 values = extract_array(values, extract_numpy=True)
1791
1792 if not hasattr(values, "dtype"):
1793 values = np.asarray(values)
1794 if is_integer_dtype(values) and values.dtype != np.int64:
1795 values = values.astype(np.int64)
1796 return values