Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arrays/interval.py: 25%

1from __future__ import annotations

3import operator

4from operator import (

5 le,

6 lt,

8import textwrap

9from typing import (

10 TYPE_CHECKING,

11 Literal,

12 Union,

13 overload,

14)

15import warnings

17import numpy as np

19from pandas._libs import lib

20from pandas._libs.interval import (

21 VALID_CLOSED,

22 Interval,

23 IntervalMixin,

24 intervals_to_interval_bounds,

25)

26from pandas._libs.missing import NA

27from pandas._typing import (

28 ArrayLike,

29 AxisInt,

30 Dtype,

31 FillnaOptions,

32 IntervalClosedType,

33 NpDtype,

34 PositionalIndexer,

35 ScalarIndexer,

36 Self,

37 SequenceIndexer,

38 SortKind,

39 TimeArrayLike,

40 npt,

41)

42from pandas.compat.numpy import function as nv

43from pandas.errors import IntCastingNaNError

44from pandas.util._decorators import Appender

46from pandas.core.dtypes.cast import (

47 LossySetitemError,

48 maybe_upcast_numeric_to_64bit,

49)

50from pandas.core.dtypes.common import (

51 is_float_dtype,

52 is_integer_dtype,

53 is_list_like,

54 is_object_dtype,

55 is_scalar,

56 is_string_dtype,

57 needs_i8_conversion,

58 pandas_dtype,

59)

60from pandas.core.dtypes.dtypes import (

61 CategoricalDtype,

62 IntervalDtype,

63)

64from pandas.core.dtypes.generic import (

65 ABCDataFrame,

66 ABCDatetimeIndex,

67 ABCIntervalIndex,

68 ABCPeriodIndex,

69)

70from pandas.core.dtypes.missing import (

71 is_valid_na_for_dtype,

72 isna,

73 notna,

74)

76from pandas.core.algorithms import (

77 isin,

78 take,

79 unique,

80 value_counts_internal as value_counts,

81)

82from pandas.core.arrays import ArrowExtensionArray

83from pandas.core.arrays.base import (

84 ExtensionArray,

85 _extension_array_shared_docs,

86)

87from pandas.core.arrays.datetimes import DatetimeArray

88from pandas.core.arrays.timedeltas import TimedeltaArray

89import pandas.core.common as com

90from pandas.core.construction import (

91 array as pd_array,

92 ensure_wrapped_if_datetimelike,

93 extract_array,

94)

95from pandas.core.indexers import check_array_indexer

96from pandas.core.ops import (

97 invalid_comparison,

98 unpack_zerodim_and_defer,

99)

100

101if TYPE_CHECKING:

102 from collections.abc import (

103 Iterator,

104 Sequence,

105 )

106

107 from pandas import (

108 Index,

109 Series,

110 )

111

112

113IntervalSide = Union[TimeArrayLike, np.ndarray]

114IntervalOrNA = Union[Interval, float]

115

116_interval_shared_docs: dict[str, str] = {}

117

118_shared_docs_kwargs = {

119 "klass": "IntervalArray",

120 "qualname": "arrays.IntervalArray",

121 "name": "",

122}

123

124

125_interval_shared_docs[

126 "class"

127] = """

128%(summary)s

129

130Parameters

131----------

132data : array-like (1-dimensional)

133 Array-like (ndarray, :class:`DateTimeArray`, :class:`TimeDeltaArray`) containing

134 Interval objects from which to build the %(klass)s.

135closed : {'left', 'right', 'both', 'neither'}, default 'right'

136 Whether the intervals are closed on the left-side, right-side, both or

137 neither.

138dtype : dtype or None, default None

139 If None, dtype will be inferred.

140copy : bool, default False

141 Copy the input data.

142%(name)s\

143verify_integrity : bool, default True

144 Verify that the %(klass)s is valid.

145

146Attributes

147----------

148left

149right

150closed

151mid

152length

153is_empty

154is_non_overlapping_monotonic

155%(extra_attributes)s\

156

157Methods

158-------

159from_arrays

160from_tuples

161from_breaks

162contains

163overlaps

164set_closed

165to_tuples

166%(extra_methods)s\

167

168See Also

169--------

170Index : The base pandas Index type.

171Interval : A bounded slice-like interval; the elements of an %(klass)s.

172interval_range : Function to create a fixed frequency IntervalIndex.

173cut : Bin values into discrete Intervals.

174qcut : Bin values into equal-sized Intervals based on rank or sample quantiles.

175

176Notes

177-----

178See the `user guide

179<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`__

180for more.

181

182%(examples)s\

183"""

184

185

186@Appender(

187 _interval_shared_docs["class"]

188 % {

189 "klass": "IntervalArray",

190 "summary": "Pandas array for interval data that are closed on the same side.",

191 "name": "",

192 "extra_attributes": "",

193 "extra_methods": "",

194 "examples": textwrap.dedent(

195 """\

196 Examples

197 --------

198 A new ``IntervalArray`` can be constructed directly from an array-like of

199 ``Interval`` objects:

200

201 >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])

202 <IntervalArray>

203 [(0, 1], (1, 5]]

204 Length: 2, dtype: interval[int64, right]

205

206 It may also be constructed using one of the constructor

207 methods: :meth:`IntervalArray.from_arrays`,

208 :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`.

209 """

210 ),

211 }

212)

213class IntervalArray(IntervalMixin, ExtensionArray):

214 can_hold_na = True

215 _na_value = _fill_value = np.nan

216

217 @property

218 def ndim(self) -> Literal[1]:

219 return 1

220

221 # To make mypy recognize the fields

222 _left: IntervalSide

223 _right: IntervalSide

224 _dtype: IntervalDtype

225

226 # ---------------------------------------------------------------------

227 # Constructors

228

229 def __new__(

230 cls,

231 data,

232 closed: IntervalClosedType | None = None,

233 dtype: Dtype | None = None,

234 copy: bool = False,

235 verify_integrity: bool = True,

236 ):

237 data = extract_array(data, extract_numpy=True)

238

239 if isinstance(data, cls):

240 left: IntervalSide = data._left

241 right: IntervalSide = data._right

242 closed = closed or data.closed

243 dtype = IntervalDtype(left.dtype, closed=closed)

244 else:

245 # don't allow scalars

246 if is_scalar(data):

247 msg = (

248 f"{cls.__name__}(...) must be called with a collection "

249 f"of some kind, {data} was passed"

250 )

251 raise TypeError(msg)

252

253 # might need to convert empty or purely na data

254 data = _maybe_convert_platform_interval(data)

255 left, right, infer_closed = intervals_to_interval_bounds(

256 data, validate_closed=closed is None

257 )

258 if left.dtype == object:

259 left = lib.maybe_convert_objects(left)

260 right = lib.maybe_convert_objects(right)

261 closed = closed or infer_closed

262

263 left, right, dtype = cls._ensure_simple_new_inputs(

264 left,

265 right,

266 closed=closed,

267 copy=copy,

268 dtype=dtype,

269 )

270

271 if verify_integrity:

272 cls._validate(left, right, dtype=dtype)

273

274 return cls._simple_new(

275 left,

276 right,

277 dtype=dtype,

278 )

279

280 @classmethod

281 def _simple_new(

282 cls,

283 left: IntervalSide,

284 right: IntervalSide,

285 dtype: IntervalDtype,

286 ) -> Self:

287 result = IntervalMixin.__new__(cls)

288 result._left = left

289 result._right = right

290 result._dtype = dtype

291

292 return result

293

294 @classmethod

295 def _ensure_simple_new_inputs(

296 cls,

297 left,

298 right,

299 closed: IntervalClosedType | None = None,

300 copy: bool = False,

301 dtype: Dtype | None = None,

302 ) -> tuple[IntervalSide, IntervalSide, IntervalDtype]:

303 """Ensure correctness of input parameters for cls._simple_new."""

304 from pandas.core.indexes.base import ensure_index

305

306 left = ensure_index(left, copy=copy)

307 left = maybe_upcast_numeric_to_64bit(left)

308

309 right = ensure_index(right, copy=copy)

310 right = maybe_upcast_numeric_to_64bit(right)

311

312 if closed is None and isinstance(dtype, IntervalDtype):

313 closed = dtype.closed

314

315 closed = closed or "right"

316

317 if dtype is not None:

318 # GH 19262: dtype must be an IntervalDtype to override inferred

319 dtype = pandas_dtype(dtype)

320 if isinstance(dtype, IntervalDtype):

321 if dtype.subtype is not None:

322 left = left.astype(dtype.subtype)

323 right = right.astype(dtype.subtype)

324 else:

325 msg = f"dtype must be an IntervalDtype, got {dtype}"

326 raise TypeError(msg)

327

328 if dtype.closed is None:

329 # possibly loading an old pickle

330 dtype = IntervalDtype(dtype.subtype, closed)

331 elif closed != dtype.closed:

332 raise ValueError("closed keyword does not match dtype.closed")

333

334 # coerce dtypes to match if needed

335 if is_float_dtype(left.dtype) and is_integer_dtype(right.dtype):

336 right = right.astype(left.dtype)

337 elif is_float_dtype(right.dtype) and is_integer_dtype(left.dtype):

338 left = left.astype(right.dtype)

339

340 if type(left) != type(right):

341 msg = (

342 f"must not have differing left [{type(left).__name__}] and "

343 f"right [{type(right).__name__}] types"

344 )

345 raise ValueError(msg)

346 if isinstance(left.dtype, CategoricalDtype) or is_string_dtype(left.dtype):

347 # GH 19016

348 msg = (

349 "category, object, and string subtypes are not supported "

350 "for IntervalArray"

351 )

352 raise TypeError(msg)

353 if isinstance(left, ABCPeriodIndex):

354 msg = "Period dtypes are not supported, use a PeriodIndex instead"

355 raise ValueError(msg)

356 if isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz):

357 msg = (

358 "left and right must have the same time zone, got "

359 f"'{left.tz}' and '{right.tz}'"

360 )

361 raise ValueError(msg)

362 elif needs_i8_conversion(left.dtype) and left.unit != right.unit:

363 # e.g. m8[s] vs m8[ms], try to cast to a common dtype GH#55714

364 left_arr, right_arr = left._data._ensure_matching_resos(right._data)

365 left = ensure_index(left_arr)

366 right = ensure_index(right_arr)

367

368 # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray

369 left = ensure_wrapped_if_datetimelike(left)

370 left = extract_array(left, extract_numpy=True)

371 right = ensure_wrapped_if_datetimelike(right)

372 right = extract_array(right, extract_numpy=True)

373

374 if isinstance(left, ArrowExtensionArray) or isinstance(

375 right, ArrowExtensionArray

376 ):

377 pass

378 else:

379 lbase = getattr(left, "_ndarray", left)

380 lbase = getattr(lbase, "_data", lbase).base

381 rbase = getattr(right, "_ndarray", right)

382 rbase = getattr(rbase, "_data", rbase).base

383 if lbase is not None and lbase is rbase:

384 # If these share data, then setitem could corrupt our IA

385 right = right.copy()

386

387 dtype = IntervalDtype(left.dtype, closed=closed)

388

389 return left, right, dtype

390

391 @classmethod

392 def _from_sequence(

393 cls,

394 scalars,

395 *,

396 dtype: Dtype | None = None,

397 copy: bool = False,

398 ) -> Self:

399 return cls(scalars, dtype=dtype, copy=copy)

400

401 @classmethod

402 def _from_factorized(cls, values: np.ndarray, original: IntervalArray) -> Self:

403 return cls._from_sequence(values, dtype=original.dtype)

404

405 _interval_shared_docs["from_breaks"] = textwrap.dedent(

406 """

407 Construct an %(klass)s from an array of splits.

408

409 Parameters

410 ----------

411 breaks : array-like (1-dimensional)

412 Left and right bounds for each interval.

413 closed : {'left', 'right', 'both', 'neither'}, default 'right'

414 Whether the intervals are closed on the left-side, right-side, both

415 or neither.\

416 %(name)s

417 copy : bool, default False

418 Copy the data.

419 dtype : dtype or None, default None

420 If None, dtype will be inferred.

421

422 Returns

423 -------

424 %(klass)s

425

426 See Also

427 --------

428 interval_range : Function to create a fixed frequency IntervalIndex.

429 %(klass)s.from_arrays : Construct from a left and right array.

430 %(klass)s.from_tuples : Construct from a sequence of tuples.

431

432 %(examples)s\

433 """

434 )

435

436 @classmethod

437 @Appender(

438 _interval_shared_docs["from_breaks"]

439 % {

440 "klass": "IntervalArray",

441 "name": "",

442 "examples": textwrap.dedent(

443 """\

444 Examples

445 --------

446 >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3])

447 <IntervalArray>

448 [(0, 1], (1, 2], (2, 3]]

449 Length: 3, dtype: interval[int64, right]

450 """

451 ),

452 }

453 )

454 def from_breaks(

455 cls,

456 breaks,

457 closed: IntervalClosedType | None = "right",

458 copy: bool = False,

459 dtype: Dtype | None = None,

460 ) -> Self:

461 breaks = _maybe_convert_platform_interval(breaks)

462

463 return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype)

464

465 _interval_shared_docs["from_arrays"] = textwrap.dedent(

466 """

467 Construct from two arrays defining the left and right bounds.

468

469 Parameters

470 ----------

471 left : array-like (1-dimensional)

472 Left bounds for each interval.

473 right : array-like (1-dimensional)

474 Right bounds for each interval.

475 closed : {'left', 'right', 'both', 'neither'}, default 'right'

476 Whether the intervals are closed on the left-side, right-side, both

477 or neither.\

478 %(name)s

479 copy : bool, default False

480 Copy the data.

481 dtype : dtype, optional

482 If None, dtype will be inferred.

483

484 Returns

485 -------

486 %(klass)s

487

488 Raises

489 ------

490 ValueError

491 When a value is missing in only one of `left` or `right`.

492 When a value in `left` is greater than the corresponding value

493 in `right`.

494

495 See Also

496 --------

497 interval_range : Function to create a fixed frequency IntervalIndex.

498 %(klass)s.from_breaks : Construct an %(klass)s from an array of

499 splits.

500 %(klass)s.from_tuples : Construct an %(klass)s from an

501 array-like of tuples.

502

503 Notes

504 -----

505 Each element of `left` must be less than or equal to the `right`

506 element at the same position. If an element is missing, it must be

507 missing in both `left` and `right`. A TypeError is raised when

508 using an unsupported type for `left` or `right`. At the moment,

509 'category', 'object', and 'string' subtypes are not supported.

510

511 %(examples)s\

512 """

513 )

514

515 @classmethod

516 @Appender(

517 _interval_shared_docs["from_arrays"]

518 % {

519 "klass": "IntervalArray",

520 "name": "",

521 "examples": textwrap.dedent(

522 """\

523 Examples

524 --------

525 >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])

526 <IntervalArray>

527 [(0, 1], (1, 2], (2, 3]]

528 Length: 3, dtype: interval[int64, right]

529 """

530 ),

531 }

532 )

533 def from_arrays(

534 cls,

535 left,

536 right,

537 closed: IntervalClosedType | None = "right",

538 copy: bool = False,

539 dtype: Dtype | None = None,

540 ) -> Self:

541 left = _maybe_convert_platform_interval(left)

542 right = _maybe_convert_platform_interval(right)

543

544 left, right, dtype = cls._ensure_simple_new_inputs(

545 left,

546 right,

547 closed=closed,

548 copy=copy,

549 dtype=dtype,

550 )

551 cls._validate(left, right, dtype=dtype)

552

553 return cls._simple_new(left, right, dtype=dtype)

554

555 _interval_shared_docs["from_tuples"] = textwrap.dedent(

556 """

557 Construct an %(klass)s from an array-like of tuples.

558

559 Parameters

560 ----------

561 data : array-like (1-dimensional)

562 Array of tuples.

563 closed : {'left', 'right', 'both', 'neither'}, default 'right'

564 Whether the intervals are closed on the left-side, right-side, both

565 or neither.\

566 %(name)s

567 copy : bool, default False

568 By-default copy the data, this is compat only and ignored.

569 dtype : dtype or None, default None

570 If None, dtype will be inferred.

571

572 Returns

573 -------

574 %(klass)s

575

576 See Also

577 --------

578 interval_range : Function to create a fixed frequency IntervalIndex.

579 %(klass)s.from_arrays : Construct an %(klass)s from a left and

580 right array.

581 %(klass)s.from_breaks : Construct an %(klass)s from an array of

582 splits.

583

584 %(examples)s\

585 """

586 )

587

588 @classmethod

589 @Appender(

590 _interval_shared_docs["from_tuples"]

591 % {

592 "klass": "IntervalArray",

593 "name": "",

594 "examples": textwrap.dedent(

595 """\

596 Examples

597 --------

598 >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])

599 <IntervalArray>

600 [(0, 1], (1, 2]]

601 Length: 2, dtype: interval[int64, right]

602 """

603 ),

604 }

605 )

606 def from_tuples(

607 cls,

608 data,

609 closed: IntervalClosedType | None = "right",

610 copy: bool = False,

611 dtype: Dtype | None = None,

612 ) -> Self:

613 if len(data):

614 left, right = [], []

615 else:

616 # ensure that empty data keeps input dtype

617 left = right = data

618

619 for d in data:

620 if not isinstance(d, tuple) and isna(d):

621 lhs = rhs = np.nan

622 else:

623 name = cls.__name__

624 try:

625 # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...]

626 lhs, rhs = d

627 except ValueError as err:

628 msg = f"{name}.from_tuples requires tuples of length 2, got {d}"

629 raise ValueError(msg) from err

630 except TypeError as err:

631 msg = f"{name}.from_tuples received an invalid item, {d}"

632 raise TypeError(msg) from err

633 left.append(lhs)

634 right.append(rhs)

635

636 return cls.from_arrays(left, right, closed, copy=False, dtype=dtype)

637

638 @classmethod

639 def _validate(cls, left, right, dtype: IntervalDtype) -> None:

640 """

641 Verify that the IntervalArray is valid.

642

643 Checks that

644

645 * dtype is correct

646 * left and right match lengths

647 * left and right have the same missing values

648 * left is always below right

649 """

650 if not isinstance(dtype, IntervalDtype):

651 msg = f"invalid dtype: {dtype}"

652 raise ValueError(msg)

653 if len(left) != len(right):

654 msg = "left and right must have the same length"

655 raise ValueError(msg)

656 left_mask = notna(left)

657 right_mask = notna(right)

658 if not (left_mask == right_mask).all():

659 msg = (

660 "missing values must be missing in the same "

661 "location both left and right sides"

662 )

663 raise ValueError(msg)

664 if not (left[left_mask] <= right[left_mask]).all():

665 msg = "left side of interval must be <= right side"

666 raise ValueError(msg)

667

668 def _shallow_copy(self, left, right) -> Self:

669 """

670 Return a new IntervalArray with the replacement attributes

671

672 Parameters

673 ----------

674 left : Index

675 Values to be used for the left-side of the intervals.

676 right : Index

677 Values to be used for the right-side of the intervals.

678 """

679 dtype = IntervalDtype(left.dtype, closed=self.closed)

680 left, right, dtype = self._ensure_simple_new_inputs(left, right, dtype=dtype)

681

682 return self._simple_new(left, right, dtype=dtype)

683

684 # ---------------------------------------------------------------------

685 # Descriptive

686

687 @property

688 def dtype(self) -> IntervalDtype:

689 return self._dtype

690

691 @property

692 def nbytes(self) -> int:

693 return self.left.nbytes + self.right.nbytes

694

695 @property

696 def size(self) -> int:

697 # Avoid materializing self.values

698 return self.left.size

699

700 # ---------------------------------------------------------------------

701 # EA Interface

702

703 def __iter__(self) -> Iterator:

704 return iter(np.asarray(self))

705

706 def __len__(self) -> int:

707 return len(self._left)

708

709 @overload

710 def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA:

711 ...

712

713 @overload

714 def __getitem__(self, key: SequenceIndexer) -> Self:

715 ...

716

717 def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA:

718 key = check_array_indexer(self, key)

719 left = self._left[key]

720 right = self._right[key]

721

722 if not isinstance(left, (np.ndarray, ExtensionArray)):

723 # scalar

724 if is_scalar(left) and isna(left):

725 return self._fill_value

726 return Interval(left, right, self.closed)

727 if np.ndim(left) > 1:

728 # GH#30588 multi-dimensional indexer disallowed

729 raise ValueError("multi-dimensional indexing not allowed")

730 # Argument 2 to "_simple_new" of "IntervalArray" has incompatible type

731 # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray,

732 # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray],

733 # ndarray[Any, Any]]"

734 return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type]

735

736 def __setitem__(self, key, value) -> None:

737 value_left, value_right = self._validate_setitem_value(value)

738 key = check_array_indexer(self, key)

739

740 self._left[key] = value_left

741 self._right[key] = value_right

742

743 def _cmp_method(self, other, op):

744 # ensure pandas array for list-like and eliminate non-interval scalars

745 if is_list_like(other):

746 if len(self) != len(other):

747 raise ValueError("Lengths must match to compare")

748 other = pd_array(other)

749 elif not isinstance(other, Interval):

750 # non-interval scalar -> no matches

751 if other is NA:

752 # GH#31882

753 from pandas.core.arrays import BooleanArray

754

755 arr = np.empty(self.shape, dtype=bool)

756 mask = np.ones(self.shape, dtype=bool)

757 return BooleanArray(arr, mask)

758 return invalid_comparison(self, other, op)

759

760 # determine the dtype of the elements we want to compare

761 if isinstance(other, Interval):

762 other_dtype = pandas_dtype("interval")

763 elif not isinstance(other.dtype, CategoricalDtype):

764 other_dtype = other.dtype

765 else:

766 # for categorical defer to categories for dtype

767 other_dtype = other.categories.dtype

768

769 # extract intervals if we have interval categories with matching closed

770 if isinstance(other_dtype, IntervalDtype):

771 if self.closed != other.categories.closed:

772 return invalid_comparison(self, other, op)

773

774 other = other.categories._values.take(

775 other.codes, allow_fill=True, fill_value=other.categories._na_value

776 )

777

778 # interval-like -> need same closed and matching endpoints

779 if isinstance(other_dtype, IntervalDtype):

780 if self.closed != other.closed:

781 return invalid_comparison(self, other, op)

782 elif not isinstance(other, Interval):

783 other = type(self)(other)

784

785 if op is operator.eq:

786 return (self._left == other.left) & (self._right == other.right)

787 elif op is operator.ne:

788 return (self._left != other.left) | (self._right != other.right)

789 elif op is operator.gt:

790 return (self._left > other.left) | (

791 (self._left == other.left) & (self._right > other.right)

792 )

793 elif op is operator.ge:

794 return (self == other) | (self > other)

795 elif op is operator.lt:

796 return (self._left < other.left) | (

797 (self._left == other.left) & (self._right < other.right)

798 )

799 else:

800 # operator.lt

801 return (self == other) | (self < other)

802

803 # non-interval/non-object dtype -> no matches

804 if not is_object_dtype(other_dtype):

805 return invalid_comparison(self, other, op)

806

807 # object dtype -> iteratively check for intervals

808 result = np.zeros(len(self), dtype=bool)

809 for i, obj in enumerate(other):

810 try:

811 result[i] = op(self[i], obj)

812 except TypeError:

813 if obj is NA:

814 # comparison with np.nan returns NA

815 # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092

816 result = result.astype(object)

817 result[i] = NA

818 else:

819 raise

820 return result

821

822 @unpack_zerodim_and_defer("__eq__")

823 def __eq__(self, other):

824 return self._cmp_method(other, operator.eq)

825

826 @unpack_zerodim_and_defer("__ne__")

827 def __ne__(self, other):

828 return self._cmp_method(other, operator.ne)

829

830 @unpack_zerodim_and_defer("__gt__")

831 def __gt__(self, other):

832 return self._cmp_method(other, operator.gt)

833

834 @unpack_zerodim_and_defer("__ge__")

835 def __ge__(self, other):

836 return self._cmp_method(other, operator.ge)

837

838 @unpack_zerodim_and_defer("__lt__")

839 def __lt__(self, other):

840 return self._cmp_method(other, operator.lt)

841

842 @unpack_zerodim_and_defer("__le__")

843 def __le__(self, other):

844 return self._cmp_method(other, operator.le)

845

846 def argsort(

847 self,

848 *,

849 ascending: bool = True,

850 kind: SortKind = "quicksort",

851 na_position: str = "last",

852 **kwargs,

853 ) -> np.ndarray:

854 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs)

855

856 if ascending and kind == "quicksort" and na_position == "last":

857 # TODO: in an IntervalIndex we can reuse the cached

858 # IntervalTree.left_sorter

859 return np.lexsort((self.right, self.left))

860

861 # TODO: other cases we can use lexsort for? much more performant.

862 return super().argsort(

863 ascending=ascending, kind=kind, na_position=na_position, **kwargs

864 )

865

866 def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:

867 nv.validate_minmax_axis(axis, self.ndim)

868

869 if not len(self):

870 return self._na_value

871

872 mask = self.isna()

873 if mask.any():

874 if not skipna:

875 return self._na_value

876 obj = self[~mask]

877 else:

878 obj = self

879

880 indexer = obj.argsort()[0]

881 return obj[indexer]

882

883 def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:

884 nv.validate_minmax_axis(axis, self.ndim)

885

886 if not len(self):

887 return self._na_value

888

889 mask = self.isna()

890 if mask.any():

891 if not skipna:

892 return self._na_value

893 obj = self[~mask]

894 else:

895 obj = self

896

897 indexer = obj.argsort()[-1]

898 return obj[indexer]

899

900 def _pad_or_backfill( # pylint: disable=useless-parent-delegation

901 self,

902 *,

903 method: FillnaOptions,

904 limit: int | None = None,

905 limit_area: Literal["inside", "outside"] | None = None,

906 copy: bool = True,

907 ) -> Self:

908 # TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove

909 # this method entirely.

910 return super()._pad_or_backfill(

911 method=method, limit=limit, limit_area=limit_area, copy=copy

912 )

913

914 def fillna(

915 self, value=None, method=None, limit: int | None = None, copy: bool = True

916 ) -> Self:

917 """

918 Fill NA/NaN values using the specified method.

919

920 Parameters

921 ----------

922 value : scalar, dict, Series

923 If a scalar value is passed it is used to fill all missing values.

924 Alternatively, a Series or dict can be used to fill in different

925 values for each index. The value should not be a list. The

926 value(s) passed should be either Interval objects or NA/NaN.

927 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None

928 (Not implemented yet for IntervalArray)

929 Method to use for filling holes in reindexed Series

930 limit : int, default None

931 (Not implemented yet for IntervalArray)

932 If method is specified, this is the maximum number of consecutive

933 NaN values to forward/backward fill. In other words, if there is

934 a gap with more than this number of consecutive NaNs, it will only

935 be partially filled. If method is not specified, this is the

936 maximum number of entries along the entire axis where NaNs will be

937 filled.

938 copy : bool, default True

939 Whether to make a copy of the data before filling. If False, then

940 the original should be modified and no new memory should be allocated.

941 For ExtensionArray subclasses that cannot do this, it is at the

942 author's discretion whether to ignore "copy=False" or to raise.

943

944 Returns

945 -------

946 filled : IntervalArray with NA/NaN filled

947 """

948 if copy is False:

949 raise NotImplementedError

950 if method is not None:

951 return super().fillna(value=value, method=method, limit=limit)

952

953 value_left, value_right = self._validate_scalar(value)

954

955 left = self.left.fillna(value=value_left)

956 right = self.right.fillna(value=value_right)

957 return self._shallow_copy(left, right)

958

959 def astype(self, dtype, copy: bool = True):

960 """

961 Cast to an ExtensionArray or NumPy array with dtype 'dtype'.

962

963 Parameters

964 ----------

965 dtype : str or dtype

966 Typecode or data-type to which the array is cast.

967

968 copy : bool, default True

969 Whether to copy the data, even if not necessary. If False,

970 a copy is made only if the old dtype does not match the

971 new dtype.

972

973 Returns

974 -------

975 array : ExtensionArray or ndarray

976 ExtensionArray or NumPy ndarray with 'dtype' for its dtype.

977 """

978 from pandas import Index

979

980 if dtype is not None:

981 dtype = pandas_dtype(dtype)

982

983 if isinstance(dtype, IntervalDtype):

984 if dtype == self.dtype:

985 return self.copy() if copy else self

986

987 if is_float_dtype(self.dtype.subtype) and needs_i8_conversion(

988 dtype.subtype

989 ):

990 # This is allowed on the Index.astype but we disallow it here

991 msg = (

992 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"

993 )

994 raise TypeError(msg)

995

996 # need to cast to different subtype

997 try:

998 # We need to use Index rules for astype to prevent casting

999 # np.nan entries to int subtypes

1000 new_left = Index(self._left, copy=False).astype(dtype.subtype)

1001 new_right = Index(self._right, copy=False).astype(dtype.subtype)

1002 except IntCastingNaNError:

1003 # e.g test_subtype_integer

1004 raise

1005 except (TypeError, ValueError) as err:

1006 # e.g. test_subtype_integer_errors f8->u8 can be lossy

1007 # and raises ValueError

1008 msg = (

1009 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"

1010 )

1011 raise TypeError(msg) from err

1012 return self._shallow_copy(new_left, new_right)

1013 else:

1014 try:

1015 return super().astype(dtype, copy=copy)

1016 except (TypeError, ValueError) as err:

1017 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"

1018 raise TypeError(msg) from err

1019

1020 def equals(self, other) -> bool:

1021 if type(self) != type(other):

1022 return False

1023

1024 return bool(

1025 self.closed == other.closed

1026 and self.left.equals(other.left)

1027 and self.right.equals(other.right)

1028 )

1029

1030 @classmethod

1031 def _concat_same_type(cls, to_concat: Sequence[IntervalArray]) -> Self:

1032 """

1033 Concatenate multiple IntervalArray

1034

1035 Parameters

1036 ----------

1037 to_concat : sequence of IntervalArray

1038

1039 Returns

1040 -------

1041 IntervalArray

1042 """

1043 closed_set = {interval.closed for interval in to_concat}

1044 if len(closed_set) != 1:

1045 raise ValueError("Intervals must all be closed on the same side.")

1046 closed = closed_set.pop()

1047

1048 left: IntervalSide = np.concatenate([interval.left for interval in to_concat])

1049 right: IntervalSide = np.concatenate([interval.right for interval in to_concat])

1050

1051 left, right, dtype = cls._ensure_simple_new_inputs(left, right, closed=closed)

1052

1053 return cls._simple_new(left, right, dtype=dtype)

1054

1055 def copy(self) -> Self:

1056 """

1057 Return a copy of the array.

1058

1059 Returns

1060 -------

1061 IntervalArray

1062 """

1063 left = self._left.copy()

1064 right = self._right.copy()

1065 dtype = self.dtype

1066 return self._simple_new(left, right, dtype=dtype)

1067

1068 def isna(self) -> np.ndarray:

1069 return isna(self._left)

1070

1071 def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:

1072 if not len(self) or periods == 0:

1073 return self.copy()

1074

1075 self._validate_scalar(fill_value)

1076

1077 # ExtensionArray.shift doesn't work for two reasons

1078 # 1. IntervalArray.dtype.na_value may not be correct for the dtype.

1079 # 2. IntervalArray._from_sequence only accepts NaN for missing values,

1080 # not other values like NaT

1081

1082 empty_len = min(abs(periods), len(self))

1083 if isna(fill_value):

1084 from pandas import Index

1085

1086 fill_value = Index(self._left, copy=False)._na_value

1087 empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))

1088 else:

1089 empty = self._from_sequence([fill_value] * empty_len, dtype=self.dtype)

1090

1091 if periods > 0:

1092 a = empty

1093 b = self[:-periods]

1094 else:

1095 a = self[abs(periods) :]

1096 b = empty

1097 return self._concat_same_type([a, b])

1098

1099 def take(

1100 self,

1101 indices,

1102 *,

1103 allow_fill: bool = False,

1104 fill_value=None,

1105 axis=None,

1106 **kwargs,

1107 ) -> Self:

1108 """

1109 Take elements from the IntervalArray.

1110

1111 Parameters

1112 ----------

1113 indices : sequence of integers

1114 Indices to be taken.

1115

1116 allow_fill : bool, default False

1117 How to handle negative values in `indices`.

1118

1119 * False: negative values in `indices` indicate positional indices

1120 from the right (the default). This is similar to

1121 :func:`numpy.take`.

1122

1123 * True: negative values in `indices` indicate

1124 missing values. These values are set to `fill_value`. Any other

1125 other negative values raise a ``ValueError``.

1126

1127 fill_value : Interval or NA, optional

1128 Fill value to use for NA-indices when `allow_fill` is True.

1129 This may be ``None``, in which case the default NA value for

1130 the type, ``self.dtype.na_value``, is used.

1131

1132 For many ExtensionArrays, there will be two representations of

1133 `fill_value`: a user-facing "boxed" scalar, and a low-level

1134 physical NA value. `fill_value` should be the user-facing version,

1135 and the implementation should handle translating that to the

1136 physical version for processing the take if necessary.

1137

1138 axis : any, default None

1139 Present for compat with IntervalIndex; does nothing.

1140

1141 Returns

1142 -------

1143 IntervalArray

1144

1145 Raises

1146 ------

1147 IndexError

1148 When the indices are out of bounds for the array.

1149 ValueError

1150 When `indices` contains negative values other than ``-1``

1151 and `allow_fill` is True.

1152 """

1153 nv.validate_take((), kwargs)

1154

1155 fill_left = fill_right = fill_value

1156 if allow_fill:

1157 fill_left, fill_right = self._validate_scalar(fill_value)

1158

1159 left_take = take(

1160 self._left, indices, allow_fill=allow_fill, fill_value=fill_left

1161 )

1162 right_take = take(

1163 self._right, indices, allow_fill=allow_fill, fill_value=fill_right

1164 )

1165

1166 return self._shallow_copy(left_take, right_take)

1167

1168 def _validate_listlike(self, value):

1169 # list-like of intervals

1170 try:

1171 array = IntervalArray(value)

1172 self._check_closed_matches(array, name="value")

1173 value_left, value_right = array.left, array.right

1174 except TypeError as err:

1175 # wrong type: not interval or NA

1176 msg = f"'value' should be an interval type, got {type(value)} instead."

1177 raise TypeError(msg) from err

1178

1179 try:

1180 self.left._validate_fill_value(value_left)

1181 except (LossySetitemError, TypeError) as err:

1182 msg = (

1183 "'value' should be a compatible interval type, "

1184 f"got {type(value)} instead."

1185 )

1186 raise TypeError(msg) from err

1187

1188 return value_left, value_right

1189

1190 def _validate_scalar(self, value):

1191 if isinstance(value, Interval):

1192 self._check_closed_matches(value, name="value")

1193 left, right = value.left, value.right

1194 # TODO: check subdtype match like _validate_setitem_value?

1195 elif is_valid_na_for_dtype(value, self.left.dtype):

1196 # GH#18295

1197 left = right = self.left._na_value

1198 else:

1199 raise TypeError(

1200 "can only insert Interval objects and NA into an IntervalArray"

1201 )

1202 return left, right

1203

1204 def _validate_setitem_value(self, value):

1205 if is_valid_na_for_dtype(value, self.left.dtype):

1206 # na value: need special casing to set directly on numpy arrays

1207 value = self.left._na_value

1208 if is_integer_dtype(self.dtype.subtype):

1209 # can't set NaN on a numpy integer array

1210 # GH#45484 TypeError, not ValueError, matches what we get with

1211 # non-NA un-holdable value.

1212 raise TypeError("Cannot set float NaN to integer-backed IntervalArray")

1213 value_left, value_right = value, value

1214

1215 elif isinstance(value, Interval):

1216 # scalar interval

1217 self._check_closed_matches(value, name="value")

1218 value_left, value_right = value.left, value.right

1219 self.left._validate_fill_value(value_left)

1220 self.left._validate_fill_value(value_right)

1221

1222 else:

1223 return self._validate_listlike(value)

1224

1225 return value_left, value_right

1226

1227 def value_counts(self, dropna: bool = True) -> Series:

1228 """

1229 Returns a Series containing counts of each interval.

1230

1231 Parameters

1232 ----------

1233 dropna : bool, default True

1234 Don't include counts of NaN.

1235

1236 Returns

1237 -------

1238 counts : Series

1239

1240 See Also

1241 --------

1242 Series.value_counts

1243 """

1244 # TODO: implement this is a non-naive way!

1245 with warnings.catch_warnings():

1246 warnings.filterwarnings(

1247 "ignore",

1248 "The behavior of value_counts with object-dtype is deprecated",

1249 category=FutureWarning,

1250 )

1251 result = value_counts(np.asarray(self), dropna=dropna)

1252 # Once the deprecation is enforced, we will need to do

1253 # `result.index = result.index.astype(self.dtype)`

1254 return result

1255

1256 # ---------------------------------------------------------------------

1257 # Rendering Methods

1258

1259 def _formatter(self, boxed: bool = False):

1260 # returning 'str' here causes us to render as e.g. "(0, 1]" instead of

1261 # "Interval(0, 1, closed='right')"

1262 return str

1263

1264 # ---------------------------------------------------------------------

1265 # Vectorized Interval Properties/Attributes

1266

1267 @property

1268 def left(self) -> Index:

1269 """

1270 Return the left endpoints of each Interval in the IntervalArray as an Index.

1271

1272 Examples

1273 --------

1274

1275 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(2, 5)])

1276 >>> interv_arr

1277 <IntervalArray>

1278 [(0, 1], (2, 5]]

1279 Length: 2, dtype: interval[int64, right]

1280 >>> interv_arr.left

1281 Index([0, 2], dtype='int64')

1282 """

1283 from pandas import Index

1284

1285 return Index(self._left, copy=False)

1286

1287 @property

1288 def right(self) -> Index:

1289 """

1290 Return the right endpoints of each Interval in the IntervalArray as an Index.

1291

1292 Examples

1293 --------

1294

1295 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(2, 5)])

1296 >>> interv_arr

1297 <IntervalArray>

1298 [(0, 1], (2, 5]]

1299 Length: 2, dtype: interval[int64, right]

1300 >>> interv_arr.right

1301 Index([1, 5], dtype='int64')

1302 """

1303 from pandas import Index

1304

1305 return Index(self._right, copy=False)

1306

1307 @property

1308 def length(self) -> Index:

1309 """

1310 Return an Index with entries denoting the length of each Interval.

1311

1312 Examples

1313 --------

1314

1315 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])

1316 >>> interv_arr

1317 <IntervalArray>

1318 [(0, 1], (1, 5]]

1319 Length: 2, dtype: interval[int64, right]

1320 >>> interv_arr.length

1321 Index([1, 4], dtype='int64')

1322 """

1323 return self.right - self.left

1324

1325 @property

1326 def mid(self) -> Index:

1327 """

1328 Return the midpoint of each Interval in the IntervalArray as an Index.

1329

1330 Examples

1331 --------

1332

1333 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])

1334 >>> interv_arr

1335 <IntervalArray>

1336 [(0, 1], (1, 5]]

1337 Length: 2, dtype: interval[int64, right]

1338 >>> interv_arr.mid

1339 Index([0.5, 3.0], dtype='float64')

1340 """

1341 try:

1342 return 0.5 * (self.left + self.right)

1343 except TypeError:

1344 # datetime safe version

1345 return self.left + 0.5 * self.length

1346

1347 _interval_shared_docs["overlaps"] = textwrap.dedent(

1348 """

1349 Check elementwise if an Interval overlaps the values in the %(klass)s.

1350

1351 Two intervals overlap if they share a common point, including closed

1352 endpoints. Intervals that only have an open endpoint in common do not

1353 overlap.

1354

1355 Parameters

1356 ----------

1357 other : %(klass)s

1358 Interval to check against for an overlap.

1359

1360 Returns

1361 -------

1362 ndarray

1363 Boolean array positionally indicating where an overlap occurs.

1364

1365 See Also

1366 --------

1367 Interval.overlaps : Check whether two Interval objects overlap.

1368

1369 Examples

1370 --------

1371 %(examples)s

1372 >>> intervals.overlaps(pd.Interval(0.5, 1.5))

1373 array([ True, True, False])

1374

1375 Intervals that share closed endpoints overlap:

1376

1377 >>> intervals.overlaps(pd.Interval(1, 3, closed='left'))

1378 array([ True, True, True])

1379

1380 Intervals that only have an open endpoint in common do not overlap:

1381

1382 >>> intervals.overlaps(pd.Interval(1, 2, closed='right'))

1383 array([False, True, False])

1384 """

1385 )

1386

1387 @Appender(

1388 _interval_shared_docs["overlaps"]

1389 % {

1390 "klass": "IntervalArray",

1391 "examples": textwrap.dedent(

1392 """\

1393 >>> data = [(0, 1), (1, 3), (2, 4)]

1394 >>> intervals = pd.arrays.IntervalArray.from_tuples(data)

1395 >>> intervals

1396 <IntervalArray>

1397 [(0, 1], (1, 3], (2, 4]]

1398 Length: 3, dtype: interval[int64, right]

1399 """

1400 ),

1401 }

1402 )

1403 def overlaps(self, other):

1404 if isinstance(other, (IntervalArray, ABCIntervalIndex)):

1405 raise NotImplementedError

1406 if not isinstance(other, Interval):

1407 msg = f"`other` must be Interval-like, got {type(other).__name__}"

1408 raise TypeError(msg)

1409

1410 # equality is okay if both endpoints are closed (overlap at a point)

1411 op1 = le if (self.closed_left and other.closed_right) else lt

1412 op2 = le if (other.closed_left and self.closed_right) else lt

1413

1414 # overlaps is equivalent negation of two interval being disjoint:

1415 # disjoint = (A.left > B.right) or (B.left > A.right)

1416 # (simplifying the negation allows this to be done in less operations)

1417 return op1(self.left, other.right) & op2(other.left, self.right)

1418

1419 # ---------------------------------------------------------------------

1420

1421 @property

1422 def closed(self) -> IntervalClosedType:

1423 """

1424 String describing the inclusive side the intervals.

1425

1426 Either ``left``, ``right``, ``both`` or ``neither``.

1427

1428 Examples

1429 --------

1430

1431 For arrays:

1432

1433 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])

1434 >>> interv_arr

1435 <IntervalArray>

1436 [(0, 1], (1, 5]]

1437 Length: 2, dtype: interval[int64, right]

1438 >>> interv_arr.closed

1439 'right'

1440

1441 For Interval Index:

1442

1443 >>> interv_idx = pd.interval_range(start=0, end=2)

1444 >>> interv_idx

1445 IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')

1446 >>> interv_idx.closed

1447 'right'

1448 """

1449 return self.dtype.closed

1450

1451 _interval_shared_docs["set_closed"] = textwrap.dedent(

1452 """

1453 Return an identical %(klass)s closed on the specified side.

1454

1455 Parameters

1456 ----------

1457 closed : {'left', 'right', 'both', 'neither'}

1458 Whether the intervals are closed on the left-side, right-side, both

1459 or neither.

1460

1461 Returns

1462 -------

1463 %(klass)s

1464

1465 %(examples)s\

1466 """

1467 )

1468

1469 @Appender(

1470 _interval_shared_docs["set_closed"]

1471 % {

1472 "klass": "IntervalArray",

1473 "examples": textwrap.dedent(

1474 """\

1475 Examples

1476 --------

1477 >>> index = pd.arrays.IntervalArray.from_breaks(range(4))

1478 >>> index

1479 <IntervalArray>

1480 [(0, 1], (1, 2], (2, 3]]

1481 Length: 3, dtype: interval[int64, right]

1482 >>> index.set_closed('both')

1483 <IntervalArray>

1484 [[0, 1], [1, 2], [2, 3]]

1485 Length: 3, dtype: interval[int64, both]

1486 """

1487 ),

1488 }

1489 )

1490 def set_closed(self, closed: IntervalClosedType) -> Self:

1491 if closed not in VALID_CLOSED:

1492 msg = f"invalid option for 'closed': {closed}"

1493 raise ValueError(msg)

1494

1495 left, right = self._left, self._right

1496 dtype = IntervalDtype(left.dtype, closed=closed)

1497 return self._simple_new(left, right, dtype=dtype)

1498

1499 _interval_shared_docs[

1500 "is_non_overlapping_monotonic"

1501 ] = """

1502 Return a boolean whether the %(klass)s is non-overlapping and monotonic.

1503

1504 Non-overlapping means (no Intervals share points), and monotonic means

1505 either monotonic increasing or monotonic decreasing.

1506

1507 Examples

1508 --------

1509 For arrays:

1510

1511 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])

1512 >>> interv_arr

1513 <IntervalArray>

1514 [(0, 1], (1, 5]]

1515 Length: 2, dtype: interval[int64, right]

1516 >>> interv_arr.is_non_overlapping_monotonic

1517 True

1518

1519 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1),

1520 ... pd.Interval(-1, 0.1)])

1521 >>> interv_arr

1522 <IntervalArray>

1523 [(0.0, 1.0], (-1.0, 0.1]]

1524 Length: 2, dtype: interval[float64, right]

1525 >>> interv_arr.is_non_overlapping_monotonic

1526 False

1527

1528 For Interval Index:

1529

1530 >>> interv_idx = pd.interval_range(start=0, end=2)

1531 >>> interv_idx

1532 IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')

1533 >>> interv_idx.is_non_overlapping_monotonic

1534 True

1535

1536 >>> interv_idx = pd.interval_range(start=0, end=2, closed='both')

1537 >>> interv_idx

1538 IntervalIndex([[0, 1], [1, 2]], dtype='interval[int64, both]')

1539 >>> interv_idx.is_non_overlapping_monotonic

1540 False

1541 """

1542

1543 @property

1544 @Appender(

1545 _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs

1546 )

1547 def is_non_overlapping_monotonic(self) -> bool:

1548 # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... )

1549 # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...)

1550 # we already require left <= right

1551

1552 # strict inequality for closed == 'both'; equality implies overlapping

1553 # at a point when both sides of intervals are included

1554 if self.closed == "both":

1555 return bool(

1556 (self._right[:-1] < self._left[1:]).all()

1557 or (self._left[:-1] > self._right[1:]).all()

1558 )

1559

1560 # non-strict inequality when closed != 'both'; at least one side is

1561 # not included in the intervals, so equality does not imply overlapping

1562 return bool(

1563 (self._right[:-1] <= self._left[1:]).all()

1564 or (self._left[:-1] >= self._right[1:]).all()

1565 )

1566

1567 # ---------------------------------------------------------------------

1568 # Conversion

1569

1570 def __array__(

1571 self, dtype: NpDtype | None = None, copy: bool | None = None

1572 ) -> np.ndarray:

1573 """

1574 Return the IntervalArray's data as a numpy array of Interval

1575 objects (with dtype='object')

1576 """

1577 left = self._left

1578 right = self._right

1579 mask = self.isna()

1580 closed = self.closed

1581

1582 result = np.empty(len(left), dtype=object)

1583 for i, left_value in enumerate(left):

1584 if mask[i]:

1585 result[i] = np.nan

1586 else:

1587 result[i] = Interval(left_value, right[i], closed)

1588 return result

1589

1590 def __arrow_array__(self, type=None):

1591 """

1592 Convert myself into a pyarrow Array.

1593 """

1594 import pyarrow

1595

1596 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType

1597

1598 try:

1599 subtype = pyarrow.from_numpy_dtype(self.dtype.subtype)

1600 except TypeError as err:

1601 raise TypeError(

1602 f"Conversion to arrow with subtype '{self.dtype.subtype}' "

1603 "is not supported"

1604 ) from err

1605 interval_type = ArrowIntervalType(subtype, self.closed)

1606 storage_array = pyarrow.StructArray.from_arrays(

1607 [

1608 pyarrow.array(self._left, type=subtype, from_pandas=True),

1609 pyarrow.array(self._right, type=subtype, from_pandas=True),

1610 ],

1611 names=["left", "right"],

1612 )

1613 mask = self.isna()

1614 if mask.any():

1615 # if there are missing values, set validity bitmap also on the array level

1616 null_bitmap = pyarrow.array(~mask).buffers()[1]

1617 storage_array = pyarrow.StructArray.from_buffers(

1618 storage_array.type,

1619 len(storage_array),

1620 [null_bitmap],

1621 children=[storage_array.field(0), storage_array.field(1)],

1622 )

1623

1624 if type is not None:

1625 if type.equals(interval_type.storage_type):

1626 return storage_array

1627 elif isinstance(type, ArrowIntervalType):

1628 # ensure we have the same subtype and closed attributes

1629 if not type.equals(interval_type):

1630 raise TypeError(

1631 "Not supported to convert IntervalArray to type with "

1632 f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) "

1633 f"and 'closed' ({self.closed} vs {type.closed}) attributes"

1634 )

1635 else:

1636 raise TypeError(

1637 f"Not supported to convert IntervalArray to '{type}' type"

1638 )

1639

1640 return pyarrow.ExtensionArray.from_storage(interval_type, storage_array)

1641

1642 _interval_shared_docs["to_tuples"] = textwrap.dedent(

1643 """

1644 Return an %(return_type)s of tuples of the form (left, right).

1645

1646 Parameters

1647 ----------

1648 na_tuple : bool, default True

1649 If ``True``, return ``NA`` as a tuple ``(nan, nan)``. If ``False``,

1650 just return ``NA`` as ``nan``.

1651

1652 Returns

1653 -------

1654 tuples: %(return_type)s

1655 %(examples)s\

1656 """

1657 )

1658

1659 @Appender(

1660 _interval_shared_docs["to_tuples"]

1661 % {

1662 "return_type": (

1663 "ndarray (if self is IntervalArray) or Index (if self is IntervalIndex)"

1664 ),

1665 "examples": textwrap.dedent(

1666 """\

1667

1668 Examples

1669 --------

1670 For :class:`pandas.IntervalArray`:

1671

1672 >>> idx = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])

1673 >>> idx

1674 <IntervalArray>

1675 [(0, 1], (1, 2]]

1676 Length: 2, dtype: interval[int64, right]

1677 >>> idx.to_tuples()

1678 array([(0, 1), (1, 2)], dtype=object)

1679

1680 For :class:`pandas.IntervalIndex`:

1681

1682 >>> idx = pd.interval_range(start=0, end=2)

1683 >>> idx

1684 IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')

1685 >>> idx.to_tuples()

1686 Index([(0, 1), (1, 2)], dtype='object')

1687 """

1688 ),

1689 }

1690 )

1691 def to_tuples(self, na_tuple: bool = True) -> np.ndarray:

1692 tuples = com.asarray_tuplesafe(zip(self._left, self._right))

1693 if not na_tuple:

1694 # GH 18756

1695 tuples = np.where(~self.isna(), tuples, np.nan)

1696 return tuples

1697

1698 # ---------------------------------------------------------------------

1699

1700 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:

1701 value_left, value_right = self._validate_setitem_value(value)

1702

1703 if isinstance(self._left, np.ndarray):

1704 np.putmask(self._left, mask, value_left)

1705 assert isinstance(self._right, np.ndarray)

1706 np.putmask(self._right, mask, value_right)

1707 else:

1708 self._left._putmask(mask, value_left)

1709 assert not isinstance(self._right, np.ndarray)

1710 self._right._putmask(mask, value_right)

1711

1712 def insert(self, loc: int, item: Interval) -> Self:

1713 """

1714 Return a new IntervalArray inserting new item at location. Follows

1715 Python numpy.insert semantics for negative values. Only Interval

1716 objects and NA can be inserted into an IntervalIndex

1717

1718 Parameters

1719 ----------

1720 loc : int

1721 item : Interval

1722

1723 Returns

1724 -------

1725 IntervalArray

1726 """

1727 left_insert, right_insert = self._validate_scalar(item)

1728

1729 new_left = self.left.insert(loc, left_insert)

1730 new_right = self.right.insert(loc, right_insert)

1731

1732 return self._shallow_copy(new_left, new_right)

1733

1734 def delete(self, loc) -> Self:

1735 if isinstance(self._left, np.ndarray):

1736 new_left = np.delete(self._left, loc)

1737 assert isinstance(self._right, np.ndarray)

1738 new_right = np.delete(self._right, loc)

1739 else:

1740 new_left = self._left.delete(loc)

1741 assert not isinstance(self._right, np.ndarray)

1742 new_right = self._right.delete(loc)

1743 return self._shallow_copy(left=new_left, right=new_right)

1744

1745 @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs)

1746 def repeat(

1747 self,

1748 repeats: int | Sequence[int],

1749 axis: AxisInt | None = None,

1750 ) -> Self:

1751 nv.validate_repeat((), {"axis": axis})

1752 left_repeat = self.left.repeat(repeats)

1753 right_repeat = self.right.repeat(repeats)

1754 return self._shallow_copy(left=left_repeat, right=right_repeat)

1755

1756 _interval_shared_docs["contains"] = textwrap.dedent(

1757 """

1758 Check elementwise if the Intervals contain the value.

1759

1760 Return a boolean mask whether the value is contained in the Intervals

1761 of the %(klass)s.

1762

1763 Parameters

1764 ----------

1765 other : scalar

1766 The value to check whether it is contained in the Intervals.

1767

1768 Returns

1769 -------

1770 boolean array

1771

1772 See Also

1773 --------

1774 Interval.contains : Check whether Interval object contains value.

1775 %(klass)s.overlaps : Check if an Interval overlaps the values in the

1776 %(klass)s.

1777

1778 Examples

1779 --------

1780 %(examples)s

1781 >>> intervals.contains(0.5)

1782 array([ True, False, False])

1783 """

1784 )

1785

1786 @Appender(

1787 _interval_shared_docs["contains"]

1788 % {

1789 "klass": "IntervalArray",

1790 "examples": textwrap.dedent(

1791 """\

1792 >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)])

1793 >>> intervals

1794 <IntervalArray>

1795 [(0, 1], (1, 3], (2, 4]]

1796 Length: 3, dtype: interval[int64, right]

1797 """

1798 ),

1799 }

1800 )

1801 def contains(self, other):

1802 if isinstance(other, Interval):

1803 raise NotImplementedError("contains not implemented for two intervals")

1804

1805 return (self._left < other if self.open_left else self._left <= other) & (

1806 other < self._right if self.open_right else other <= self._right

1807 )

1808

1809 def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:

1810 if isinstance(values, IntervalArray):

1811 if self.closed != values.closed:

1812 # not comparable -> no overlap

1813 return np.zeros(self.shape, dtype=bool)

1814

1815 if self.dtype == values.dtype:

1816 # GH#38353 instead of casting to object, operating on a

1817 # complex128 ndarray is much more performant.

1818 left = self._combined.view("complex128")

1819 right = values._combined.view("complex128")

1820 # error: Argument 1 to "isin" has incompatible type

1821 # "Union[ExtensionArray, ndarray[Any, Any],

1822 # ndarray[Any, dtype[Any]]]"; expected

1823 # "Union[_SupportsArray[dtype[Any]],

1824 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,

1825 # int, float, complex, str, bytes, _NestedSequence[

1826 # Union[bool, int, float, complex, str, bytes]]]"

1827 return np.isin(left, right).ravel() # type: ignore[arg-type]

1828

1829 elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(

1830 values.left.dtype

1831 ):

1832 # not comparable -> no overlap

1833 return np.zeros(self.shape, dtype=bool)

1834

1835 return isin(self.astype(object), values.astype(object))

1836

1837 @property

1838 def _combined(self) -> IntervalSide:

1839 # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"

1840 # has no attribute "reshape" [union-attr]

1841 left = self.left._values.reshape(-1, 1) # type: ignore[union-attr]

1842 right = self.right._values.reshape(-1, 1) # type: ignore[union-attr]

1843 if needs_i8_conversion(left.dtype):

1844 # error: Item "ndarray[Any, Any]" of "Any | ndarray[Any, Any]" has

1845 # no attribute "_concat_same_type"

1846 comb = left._concat_same_type( # type: ignore[union-attr]

1847 [left, right], axis=1

1848 )

1849 else:

1850 comb = np.concatenate([left, right], axis=1)

1851 return comb

1852

1853 def _from_combined(self, combined: np.ndarray) -> IntervalArray:

1854 """

1855 Create a new IntervalArray with our dtype from a 1D complex128 ndarray.

1856 """

1857 nc = combined.view("i8").reshape(-1, 2)

1858

1859 dtype = self._left.dtype

1860 if needs_i8_conversion(dtype):

1861 assert isinstance(self._left, (DatetimeArray, TimedeltaArray))

1862 new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)

1863 assert isinstance(self._right, (DatetimeArray, TimedeltaArray))

1864 new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)

1865 else:

1866 assert isinstance(dtype, np.dtype)

1867 new_left = nc[:, 0].view(dtype)

1868 new_right = nc[:, 1].view(dtype)

1869 return self._shallow_copy(left=new_left, right=new_right)

1870

1871 def unique(self) -> IntervalArray:

1872 # No overload variant of "__getitem__" of "ExtensionArray" matches argument

1873 # type "Tuple[slice, int]"

1874 nc = unique(

1875 self._combined.view("complex128")[:, 0] # type: ignore[call-overload]

1876 )

1877 nc = nc[:, None]

1878 return self._from_combined(nc)

1879

1880

1881def _maybe_convert_platform_interval(values) -> ArrayLike:

1882 """

1883 Try to do platform conversion, with special casing for IntervalArray.

1884 Wrapper around maybe_convert_platform that alters the default return

1885 dtype in certain cases to be compatible with IntervalArray. For example,

1886 empty lists return with integer dtype instead of object dtype, which is

1887 prohibited for IntervalArray.

1888

1889 Parameters

1890 ----------

1891 values : array-like

1892

1893 Returns

1894 -------

1895 array

1896 """

1897 if isinstance(values, (list, tuple)) and len(values) == 0:

1898 # GH 19016

1899 # empty lists/tuples get object dtype by default, but this is

1900 # prohibited for IntervalArray, so coerce to integer instead

1901 return np.array([], dtype=np.int64)

1902 elif not is_list_like(values) or isinstance(values, ABCDataFrame):

1903 # This will raise later, but we avoid passing to maybe_convert_platform

1904 return values

1905 elif isinstance(getattr(values, "dtype", None), CategoricalDtype):

1906 values = np.asarray(values)

1907 elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)):

1908 # TODO: should we just cast these to list?

1909 return values

1910 else:

1911 values = extract_array(values, extract_numpy=True)

1912

1913 if not hasattr(values, "dtype"):

1914 values = np.asarray(values)

1915 if values.dtype.kind in "iu" and values.dtype != np.int64:

1916 values = values.astype(np.int64)

1917 return values