Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arrays/interval.py: 25%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

616 statements  

1from __future__ import annotations 

2 

3import operator 

4from operator import ( 

5 le, 

6 lt, 

7) 

8import textwrap 

9from typing import ( 

10 TYPE_CHECKING, 

11 Literal, 

12 Union, 

13 overload, 

14) 

15import warnings 

16 

17import numpy as np 

18 

19from pandas._libs import lib 

20from pandas._libs.interval import ( 

21 VALID_CLOSED, 

22 Interval, 

23 IntervalMixin, 

24 intervals_to_interval_bounds, 

25) 

26from pandas._libs.missing import NA 

27from pandas._typing import ( 

28 ArrayLike, 

29 AxisInt, 

30 Dtype, 

31 FillnaOptions, 

32 IntervalClosedType, 

33 NpDtype, 

34 PositionalIndexer, 

35 ScalarIndexer, 

36 Self, 

37 SequenceIndexer, 

38 SortKind, 

39 TimeArrayLike, 

40 npt, 

41) 

42from pandas.compat.numpy import function as nv 

43from pandas.errors import IntCastingNaNError 

44from pandas.util._decorators import Appender 

45 

46from pandas.core.dtypes.cast import ( 

47 LossySetitemError, 

48 maybe_upcast_numeric_to_64bit, 

49) 

50from pandas.core.dtypes.common import ( 

51 is_float_dtype, 

52 is_integer_dtype, 

53 is_list_like, 

54 is_object_dtype, 

55 is_scalar, 

56 is_string_dtype, 

57 needs_i8_conversion, 

58 pandas_dtype, 

59) 

60from pandas.core.dtypes.dtypes import ( 

61 CategoricalDtype, 

62 IntervalDtype, 

63) 

64from pandas.core.dtypes.generic import ( 

65 ABCDataFrame, 

66 ABCDatetimeIndex, 

67 ABCIntervalIndex, 

68 ABCPeriodIndex, 

69) 

70from pandas.core.dtypes.missing import ( 

71 is_valid_na_for_dtype, 

72 isna, 

73 notna, 

74) 

75 

76from pandas.core.algorithms import ( 

77 isin, 

78 take, 

79 unique, 

80 value_counts_internal as value_counts, 

81) 

82from pandas.core.arrays import ArrowExtensionArray 

83from pandas.core.arrays.base import ( 

84 ExtensionArray, 

85 _extension_array_shared_docs, 

86) 

87from pandas.core.arrays.datetimes import DatetimeArray 

88from pandas.core.arrays.timedeltas import TimedeltaArray 

89import pandas.core.common as com 

90from pandas.core.construction import ( 

91 array as pd_array, 

92 ensure_wrapped_if_datetimelike, 

93 extract_array, 

94) 

95from pandas.core.indexers import check_array_indexer 

96from pandas.core.ops import ( 

97 invalid_comparison, 

98 unpack_zerodim_and_defer, 

99) 

100 

101if TYPE_CHECKING: 

102 from collections.abc import ( 

103 Iterator, 

104 Sequence, 

105 ) 

106 

107 from pandas import ( 

108 Index, 

109 Series, 

110 ) 

111 

112 

113IntervalSide = Union[TimeArrayLike, np.ndarray] 

114IntervalOrNA = Union[Interval, float] 

115 

116_interval_shared_docs: dict[str, str] = {} 

117 

118_shared_docs_kwargs = { 

119 "klass": "IntervalArray", 

120 "qualname": "arrays.IntervalArray", 

121 "name": "", 

122} 

123 

124 

125_interval_shared_docs[ 

126 "class" 

127] = """ 

128%(summary)s 

129 

130Parameters 

131---------- 

132data : array-like (1-dimensional) 

133 Array-like (ndarray, :class:`DateTimeArray`, :class:`TimeDeltaArray`) containing 

134 Interval objects from which to build the %(klass)s. 

135closed : {'left', 'right', 'both', 'neither'}, default 'right' 

136 Whether the intervals are closed on the left-side, right-side, both or 

137 neither. 

138dtype : dtype or None, default None 

139 If None, dtype will be inferred. 

140copy : bool, default False 

141 Copy the input data. 

142%(name)s\ 

143verify_integrity : bool, default True 

144 Verify that the %(klass)s is valid. 

145 

146Attributes 

147---------- 

148left 

149right 

150closed 

151mid 

152length 

153is_empty 

154is_non_overlapping_monotonic 

155%(extra_attributes)s\ 

156 

157Methods 

158------- 

159from_arrays 

160from_tuples 

161from_breaks 

162contains 

163overlaps 

164set_closed 

165to_tuples 

166%(extra_methods)s\ 

167 

168See Also 

169-------- 

170Index : The base pandas Index type. 

171Interval : A bounded slice-like interval; the elements of an %(klass)s. 

172interval_range : Function to create a fixed frequency IntervalIndex. 

173cut : Bin values into discrete Intervals. 

174qcut : Bin values into equal-sized Intervals based on rank or sample quantiles. 

175 

176Notes 

177----- 

178See the `user guide 

179<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`__ 

180for more. 

181 

182%(examples)s\ 

183""" 

184 

185 

186@Appender( 

187 _interval_shared_docs["class"] 

188 % { 

189 "klass": "IntervalArray", 

190 "summary": "Pandas array for interval data that are closed on the same side.", 

191 "name": "", 

192 "extra_attributes": "", 

193 "extra_methods": "", 

194 "examples": textwrap.dedent( 

195 """\ 

196 Examples 

197 -------- 

198 A new ``IntervalArray`` can be constructed directly from an array-like of 

199 ``Interval`` objects: 

200 

201 >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) 

202 <IntervalArray> 

203 [(0, 1], (1, 5]] 

204 Length: 2, dtype: interval[int64, right] 

205 

206 It may also be constructed using one of the constructor 

207 methods: :meth:`IntervalArray.from_arrays`, 

208 :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`. 

209 """ 

210 ), 

211 } 

212) 

213class IntervalArray(IntervalMixin, ExtensionArray): 

214 can_hold_na = True 

215 _na_value = _fill_value = np.nan 

216 

217 @property 

218 def ndim(self) -> Literal[1]: 

219 return 1 

220 

221 # To make mypy recognize the fields 

222 _left: IntervalSide 

223 _right: IntervalSide 

224 _dtype: IntervalDtype 

225 

226 # --------------------------------------------------------------------- 

227 # Constructors 

228 

229 def __new__( 

230 cls, 

231 data, 

232 closed: IntervalClosedType | None = None, 

233 dtype: Dtype | None = None, 

234 copy: bool = False, 

235 verify_integrity: bool = True, 

236 ): 

237 data = extract_array(data, extract_numpy=True) 

238 

239 if isinstance(data, cls): 

240 left: IntervalSide = data._left 

241 right: IntervalSide = data._right 

242 closed = closed or data.closed 

243 dtype = IntervalDtype(left.dtype, closed=closed) 

244 else: 

245 # don't allow scalars 

246 if is_scalar(data): 

247 msg = ( 

248 f"{cls.__name__}(...) must be called with a collection " 

249 f"of some kind, {data} was passed" 

250 ) 

251 raise TypeError(msg) 

252 

253 # might need to convert empty or purely na data 

254 data = _maybe_convert_platform_interval(data) 

255 left, right, infer_closed = intervals_to_interval_bounds( 

256 data, validate_closed=closed is None 

257 ) 

258 if left.dtype == object: 

259 left = lib.maybe_convert_objects(left) 

260 right = lib.maybe_convert_objects(right) 

261 closed = closed or infer_closed 

262 

263 left, right, dtype = cls._ensure_simple_new_inputs( 

264 left, 

265 right, 

266 closed=closed, 

267 copy=copy, 

268 dtype=dtype, 

269 ) 

270 

271 if verify_integrity: 

272 cls._validate(left, right, dtype=dtype) 

273 

274 return cls._simple_new( 

275 left, 

276 right, 

277 dtype=dtype, 

278 ) 

279 

280 @classmethod 

281 def _simple_new( 

282 cls, 

283 left: IntervalSide, 

284 right: IntervalSide, 

285 dtype: IntervalDtype, 

286 ) -> Self: 

287 result = IntervalMixin.__new__(cls) 

288 result._left = left 

289 result._right = right 

290 result._dtype = dtype 

291 

292 return result 

293 

294 @classmethod 

295 def _ensure_simple_new_inputs( 

296 cls, 

297 left, 

298 right, 

299 closed: IntervalClosedType | None = None, 

300 copy: bool = False, 

301 dtype: Dtype | None = None, 

302 ) -> tuple[IntervalSide, IntervalSide, IntervalDtype]: 

303 """Ensure correctness of input parameters for cls._simple_new.""" 

304 from pandas.core.indexes.base import ensure_index 

305 

306 left = ensure_index(left, copy=copy) 

307 left = maybe_upcast_numeric_to_64bit(left) 

308 

309 right = ensure_index(right, copy=copy) 

310 right = maybe_upcast_numeric_to_64bit(right) 

311 

312 if closed is None and isinstance(dtype, IntervalDtype): 

313 closed = dtype.closed 

314 

315 closed = closed or "right" 

316 

317 if dtype is not None: 

318 # GH 19262: dtype must be an IntervalDtype to override inferred 

319 dtype = pandas_dtype(dtype) 

320 if isinstance(dtype, IntervalDtype): 

321 if dtype.subtype is not None: 

322 left = left.astype(dtype.subtype) 

323 right = right.astype(dtype.subtype) 

324 else: 

325 msg = f"dtype must be an IntervalDtype, got {dtype}" 

326 raise TypeError(msg) 

327 

328 if dtype.closed is None: 

329 # possibly loading an old pickle 

330 dtype = IntervalDtype(dtype.subtype, closed) 

331 elif closed != dtype.closed: 

332 raise ValueError("closed keyword does not match dtype.closed") 

333 

334 # coerce dtypes to match if needed 

335 if is_float_dtype(left.dtype) and is_integer_dtype(right.dtype): 

336 right = right.astype(left.dtype) 

337 elif is_float_dtype(right.dtype) and is_integer_dtype(left.dtype): 

338 left = left.astype(right.dtype) 

339 

340 if type(left) != type(right): 

341 msg = ( 

342 f"must not have differing left [{type(left).__name__}] and " 

343 f"right [{type(right).__name__}] types" 

344 ) 

345 raise ValueError(msg) 

346 if isinstance(left.dtype, CategoricalDtype) or is_string_dtype(left.dtype): 

347 # GH 19016 

348 msg = ( 

349 "category, object, and string subtypes are not supported " 

350 "for IntervalArray" 

351 ) 

352 raise TypeError(msg) 

353 if isinstance(left, ABCPeriodIndex): 

354 msg = "Period dtypes are not supported, use a PeriodIndex instead" 

355 raise ValueError(msg) 

356 if isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz): 

357 msg = ( 

358 "left and right must have the same time zone, got " 

359 f"'{left.tz}' and '{right.tz}'" 

360 ) 

361 raise ValueError(msg) 

362 elif needs_i8_conversion(left.dtype) and left.unit != right.unit: 

363 # e.g. m8[s] vs m8[ms], try to cast to a common dtype GH#55714 

364 left_arr, right_arr = left._data._ensure_matching_resos(right._data) 

365 left = ensure_index(left_arr) 

366 right = ensure_index(right_arr) 

367 

368 # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray 

369 left = ensure_wrapped_if_datetimelike(left) 

370 left = extract_array(left, extract_numpy=True) 

371 right = ensure_wrapped_if_datetimelike(right) 

372 right = extract_array(right, extract_numpy=True) 

373 

374 if isinstance(left, ArrowExtensionArray) or isinstance( 

375 right, ArrowExtensionArray 

376 ): 

377 pass 

378 else: 

379 lbase = getattr(left, "_ndarray", left) 

380 lbase = getattr(lbase, "_data", lbase).base 

381 rbase = getattr(right, "_ndarray", right) 

382 rbase = getattr(rbase, "_data", rbase).base 

383 if lbase is not None and lbase is rbase: 

384 # If these share data, then setitem could corrupt our IA 

385 right = right.copy() 

386 

387 dtype = IntervalDtype(left.dtype, closed=closed) 

388 

389 return left, right, dtype 

390 

391 @classmethod 

392 def _from_sequence( 

393 cls, 

394 scalars, 

395 *, 

396 dtype: Dtype | None = None, 

397 copy: bool = False, 

398 ) -> Self: 

399 return cls(scalars, dtype=dtype, copy=copy) 

400 

401 @classmethod 

402 def _from_factorized(cls, values: np.ndarray, original: IntervalArray) -> Self: 

403 return cls._from_sequence(values, dtype=original.dtype) 

404 

405 _interval_shared_docs["from_breaks"] = textwrap.dedent( 

406 """ 

407 Construct an %(klass)s from an array of splits. 

408 

409 Parameters 

410 ---------- 

411 breaks : array-like (1-dimensional) 

412 Left and right bounds for each interval. 

413 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

414 Whether the intervals are closed on the left-side, right-side, both 

415 or neither.\ 

416 %(name)s 

417 copy : bool, default False 

418 Copy the data. 

419 dtype : dtype or None, default None 

420 If None, dtype will be inferred. 

421 

422 Returns 

423 ------- 

424 %(klass)s 

425 

426 See Also 

427 -------- 

428 interval_range : Function to create a fixed frequency IntervalIndex. 

429 %(klass)s.from_arrays : Construct from a left and right array. 

430 %(klass)s.from_tuples : Construct from a sequence of tuples. 

431 

432 %(examples)s\ 

433 """ 

434 ) 

435 

436 @classmethod 

437 @Appender( 

438 _interval_shared_docs["from_breaks"] 

439 % { 

440 "klass": "IntervalArray", 

441 "name": "", 

442 "examples": textwrap.dedent( 

443 """\ 

444 Examples 

445 -------- 

446 >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3]) 

447 <IntervalArray> 

448 [(0, 1], (1, 2], (2, 3]] 

449 Length: 3, dtype: interval[int64, right] 

450 """ 

451 ), 

452 } 

453 ) 

454 def from_breaks( 

455 cls, 

456 breaks, 

457 closed: IntervalClosedType | None = "right", 

458 copy: bool = False, 

459 dtype: Dtype | None = None, 

460 ) -> Self: 

461 breaks = _maybe_convert_platform_interval(breaks) 

462 

463 return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype) 

464 

465 _interval_shared_docs["from_arrays"] = textwrap.dedent( 

466 """ 

467 Construct from two arrays defining the left and right bounds. 

468 

469 Parameters 

470 ---------- 

471 left : array-like (1-dimensional) 

472 Left bounds for each interval. 

473 right : array-like (1-dimensional) 

474 Right bounds for each interval. 

475 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

476 Whether the intervals are closed on the left-side, right-side, both 

477 or neither.\ 

478 %(name)s 

479 copy : bool, default False 

480 Copy the data. 

481 dtype : dtype, optional 

482 If None, dtype will be inferred. 

483 

484 Returns 

485 ------- 

486 %(klass)s 

487 

488 Raises 

489 ------ 

490 ValueError 

491 When a value is missing in only one of `left` or `right`. 

492 When a value in `left` is greater than the corresponding value 

493 in `right`. 

494 

495 See Also 

496 -------- 

497 interval_range : Function to create a fixed frequency IntervalIndex. 

498 %(klass)s.from_breaks : Construct an %(klass)s from an array of 

499 splits. 

500 %(klass)s.from_tuples : Construct an %(klass)s from an 

501 array-like of tuples. 

502 

503 Notes 

504 ----- 

505 Each element of `left` must be less than or equal to the `right` 

506 element at the same position. If an element is missing, it must be 

507 missing in both `left` and `right`. A TypeError is raised when 

508 using an unsupported type for `left` or `right`. At the moment, 

509 'category', 'object', and 'string' subtypes are not supported. 

510 

511 %(examples)s\ 

512 """ 

513 ) 

514 

515 @classmethod 

516 @Appender( 

517 _interval_shared_docs["from_arrays"] 

518 % { 

519 "klass": "IntervalArray", 

520 "name": "", 

521 "examples": textwrap.dedent( 

522 """\ 

523 Examples 

524 -------- 

525 >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3]) 

526 <IntervalArray> 

527 [(0, 1], (1, 2], (2, 3]] 

528 Length: 3, dtype: interval[int64, right] 

529 """ 

530 ), 

531 } 

532 ) 

533 def from_arrays( 

534 cls, 

535 left, 

536 right, 

537 closed: IntervalClosedType | None = "right", 

538 copy: bool = False, 

539 dtype: Dtype | None = None, 

540 ) -> Self: 

541 left = _maybe_convert_platform_interval(left) 

542 right = _maybe_convert_platform_interval(right) 

543 

544 left, right, dtype = cls._ensure_simple_new_inputs( 

545 left, 

546 right, 

547 closed=closed, 

548 copy=copy, 

549 dtype=dtype, 

550 ) 

551 cls._validate(left, right, dtype=dtype) 

552 

553 return cls._simple_new(left, right, dtype=dtype) 

554 

555 _interval_shared_docs["from_tuples"] = textwrap.dedent( 

556 """ 

557 Construct an %(klass)s from an array-like of tuples. 

558 

559 Parameters 

560 ---------- 

561 data : array-like (1-dimensional) 

562 Array of tuples. 

563 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

564 Whether the intervals are closed on the left-side, right-side, both 

565 or neither.\ 

566 %(name)s 

567 copy : bool, default False 

568 By-default copy the data, this is compat only and ignored. 

569 dtype : dtype or None, default None 

570 If None, dtype will be inferred. 

571 

572 Returns 

573 ------- 

574 %(klass)s 

575 

576 See Also 

577 -------- 

578 interval_range : Function to create a fixed frequency IntervalIndex. 

579 %(klass)s.from_arrays : Construct an %(klass)s from a left and 

580 right array. 

581 %(klass)s.from_breaks : Construct an %(klass)s from an array of 

582 splits. 

583 

584 %(examples)s\ 

585 """ 

586 ) 

587 

588 @classmethod 

589 @Appender( 

590 _interval_shared_docs["from_tuples"] 

591 % { 

592 "klass": "IntervalArray", 

593 "name": "", 

594 "examples": textwrap.dedent( 

595 """\ 

596 Examples 

597 -------- 

598 >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)]) 

599 <IntervalArray> 

600 [(0, 1], (1, 2]] 

601 Length: 2, dtype: interval[int64, right] 

602 """ 

603 ), 

604 } 

605 ) 

606 def from_tuples( 

607 cls, 

608 data, 

609 closed: IntervalClosedType | None = "right", 

610 copy: bool = False, 

611 dtype: Dtype | None = None, 

612 ) -> Self: 

613 if len(data): 

614 left, right = [], [] 

615 else: 

616 # ensure that empty data keeps input dtype 

617 left = right = data 

618 

619 for d in data: 

620 if not isinstance(d, tuple) and isna(d): 

621 lhs = rhs = np.nan 

622 else: 

623 name = cls.__name__ 

624 try: 

625 # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] 

626 lhs, rhs = d 

627 except ValueError as err: 

628 msg = f"{name}.from_tuples requires tuples of length 2, got {d}" 

629 raise ValueError(msg) from err 

630 except TypeError as err: 

631 msg = f"{name}.from_tuples received an invalid item, {d}" 

632 raise TypeError(msg) from err 

633 left.append(lhs) 

634 right.append(rhs) 

635 

636 return cls.from_arrays(left, right, closed, copy=False, dtype=dtype) 

637 

638 @classmethod 

639 def _validate(cls, left, right, dtype: IntervalDtype) -> None: 

640 """ 

641 Verify that the IntervalArray is valid. 

642 

643 Checks that 

644 

645 * dtype is correct 

646 * left and right match lengths 

647 * left and right have the same missing values 

648 * left is always below right 

649 """ 

650 if not isinstance(dtype, IntervalDtype): 

651 msg = f"invalid dtype: {dtype}" 

652 raise ValueError(msg) 

653 if len(left) != len(right): 

654 msg = "left and right must have the same length" 

655 raise ValueError(msg) 

656 left_mask = notna(left) 

657 right_mask = notna(right) 

658 if not (left_mask == right_mask).all(): 

659 msg = ( 

660 "missing values must be missing in the same " 

661 "location both left and right sides" 

662 ) 

663 raise ValueError(msg) 

664 if not (left[left_mask] <= right[left_mask]).all(): 

665 msg = "left side of interval must be <= right side" 

666 raise ValueError(msg) 

667 

668 def _shallow_copy(self, left, right) -> Self: 

669 """ 

670 Return a new IntervalArray with the replacement attributes 

671 

672 Parameters 

673 ---------- 

674 left : Index 

675 Values to be used for the left-side of the intervals. 

676 right : Index 

677 Values to be used for the right-side of the intervals. 

678 """ 

679 dtype = IntervalDtype(left.dtype, closed=self.closed) 

680 left, right, dtype = self._ensure_simple_new_inputs(left, right, dtype=dtype) 

681 

682 return self._simple_new(left, right, dtype=dtype) 

683 

684 # --------------------------------------------------------------------- 

685 # Descriptive 

686 

687 @property 

688 def dtype(self) -> IntervalDtype: 

689 return self._dtype 

690 

691 @property 

692 def nbytes(self) -> int: 

693 return self.left.nbytes + self.right.nbytes 

694 

695 @property 

696 def size(self) -> int: 

697 # Avoid materializing self.values 

698 return self.left.size 

699 

700 # --------------------------------------------------------------------- 

701 # EA Interface 

702 

703 def __iter__(self) -> Iterator: 

704 return iter(np.asarray(self)) 

705 

706 def __len__(self) -> int: 

707 return len(self._left) 

708 

709 @overload 

710 def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA: 

711 ... 

712 

713 @overload 

714 def __getitem__(self, key: SequenceIndexer) -> Self: 

715 ... 

716 

717 def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA: 

718 key = check_array_indexer(self, key) 

719 left = self._left[key] 

720 right = self._right[key] 

721 

722 if not isinstance(left, (np.ndarray, ExtensionArray)): 

723 # scalar 

724 if is_scalar(left) and isna(left): 

725 return self._fill_value 

726 return Interval(left, right, self.closed) 

727 if np.ndim(left) > 1: 

728 # GH#30588 multi-dimensional indexer disallowed 

729 raise ValueError("multi-dimensional indexing not allowed") 

730 # Argument 2 to "_simple_new" of "IntervalArray" has incompatible type 

731 # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray, 

732 # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray], 

733 # ndarray[Any, Any]]" 

734 return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] 

735 

736 def __setitem__(self, key, value) -> None: 

737 value_left, value_right = self._validate_setitem_value(value) 

738 key = check_array_indexer(self, key) 

739 

740 self._left[key] = value_left 

741 self._right[key] = value_right 

742 

743 def _cmp_method(self, other, op): 

744 # ensure pandas array for list-like and eliminate non-interval scalars 

745 if is_list_like(other): 

746 if len(self) != len(other): 

747 raise ValueError("Lengths must match to compare") 

748 other = pd_array(other) 

749 elif not isinstance(other, Interval): 

750 # non-interval scalar -> no matches 

751 if other is NA: 

752 # GH#31882 

753 from pandas.core.arrays import BooleanArray 

754 

755 arr = np.empty(self.shape, dtype=bool) 

756 mask = np.ones(self.shape, dtype=bool) 

757 return BooleanArray(arr, mask) 

758 return invalid_comparison(self, other, op) 

759 

760 # determine the dtype of the elements we want to compare 

761 if isinstance(other, Interval): 

762 other_dtype = pandas_dtype("interval") 

763 elif not isinstance(other.dtype, CategoricalDtype): 

764 other_dtype = other.dtype 

765 else: 

766 # for categorical defer to categories for dtype 

767 other_dtype = other.categories.dtype 

768 

769 # extract intervals if we have interval categories with matching closed 

770 if isinstance(other_dtype, IntervalDtype): 

771 if self.closed != other.categories.closed: 

772 return invalid_comparison(self, other, op) 

773 

774 other = other.categories._values.take( 

775 other.codes, allow_fill=True, fill_value=other.categories._na_value 

776 ) 

777 

778 # interval-like -> need same closed and matching endpoints 

779 if isinstance(other_dtype, IntervalDtype): 

780 if self.closed != other.closed: 

781 return invalid_comparison(self, other, op) 

782 elif not isinstance(other, Interval): 

783 other = type(self)(other) 

784 

785 if op is operator.eq: 

786 return (self._left == other.left) & (self._right == other.right) 

787 elif op is operator.ne: 

788 return (self._left != other.left) | (self._right != other.right) 

789 elif op is operator.gt: 

790 return (self._left > other.left) | ( 

791 (self._left == other.left) & (self._right > other.right) 

792 ) 

793 elif op is operator.ge: 

794 return (self == other) | (self > other) 

795 elif op is operator.lt: 

796 return (self._left < other.left) | ( 

797 (self._left == other.left) & (self._right < other.right) 

798 ) 

799 else: 

800 # operator.lt 

801 return (self == other) | (self < other) 

802 

803 # non-interval/non-object dtype -> no matches 

804 if not is_object_dtype(other_dtype): 

805 return invalid_comparison(self, other, op) 

806 

807 # object dtype -> iteratively check for intervals 

808 result = np.zeros(len(self), dtype=bool) 

809 for i, obj in enumerate(other): 

810 try: 

811 result[i] = op(self[i], obj) 

812 except TypeError: 

813 if obj is NA: 

814 # comparison with np.nan returns NA 

815 # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092 

816 result = result.astype(object) 

817 result[i] = NA 

818 else: 

819 raise 

820 return result 

821 

822 @unpack_zerodim_and_defer("__eq__") 

823 def __eq__(self, other): 

824 return self._cmp_method(other, operator.eq) 

825 

826 @unpack_zerodim_and_defer("__ne__") 

827 def __ne__(self, other): 

828 return self._cmp_method(other, operator.ne) 

829 

830 @unpack_zerodim_and_defer("__gt__") 

831 def __gt__(self, other): 

832 return self._cmp_method(other, operator.gt) 

833 

834 @unpack_zerodim_and_defer("__ge__") 

835 def __ge__(self, other): 

836 return self._cmp_method(other, operator.ge) 

837 

838 @unpack_zerodim_and_defer("__lt__") 

839 def __lt__(self, other): 

840 return self._cmp_method(other, operator.lt) 

841 

842 @unpack_zerodim_and_defer("__le__") 

843 def __le__(self, other): 

844 return self._cmp_method(other, operator.le) 

845 

846 def argsort( 

847 self, 

848 *, 

849 ascending: bool = True, 

850 kind: SortKind = "quicksort", 

851 na_position: str = "last", 

852 **kwargs, 

853 ) -> np.ndarray: 

854 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs) 

855 

856 if ascending and kind == "quicksort" and na_position == "last": 

857 # TODO: in an IntervalIndex we can reuse the cached 

858 # IntervalTree.left_sorter 

859 return np.lexsort((self.right, self.left)) 

860 

861 # TODO: other cases we can use lexsort for? much more performant. 

862 return super().argsort( 

863 ascending=ascending, kind=kind, na_position=na_position, **kwargs 

864 ) 

865 

866 def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA: 

867 nv.validate_minmax_axis(axis, self.ndim) 

868 

869 if not len(self): 

870 return self._na_value 

871 

872 mask = self.isna() 

873 if mask.any(): 

874 if not skipna: 

875 return self._na_value 

876 obj = self[~mask] 

877 else: 

878 obj = self 

879 

880 indexer = obj.argsort()[0] 

881 return obj[indexer] 

882 

883 def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA: 

884 nv.validate_minmax_axis(axis, self.ndim) 

885 

886 if not len(self): 

887 return self._na_value 

888 

889 mask = self.isna() 

890 if mask.any(): 

891 if not skipna: 

892 return self._na_value 

893 obj = self[~mask] 

894 else: 

895 obj = self 

896 

897 indexer = obj.argsort()[-1] 

898 return obj[indexer] 

899 

900 def _pad_or_backfill( # pylint: disable=useless-parent-delegation 

901 self, 

902 *, 

903 method: FillnaOptions, 

904 limit: int | None = None, 

905 limit_area: Literal["inside", "outside"] | None = None, 

906 copy: bool = True, 

907 ) -> Self: 

908 # TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove 

909 # this method entirely. 

910 return super()._pad_or_backfill( 

911 method=method, limit=limit, limit_area=limit_area, copy=copy 

912 ) 

913 

914 def fillna( 

915 self, value=None, method=None, limit: int | None = None, copy: bool = True 

916 ) -> Self: 

917 """ 

918 Fill NA/NaN values using the specified method. 

919 

920 Parameters 

921 ---------- 

922 value : scalar, dict, Series 

923 If a scalar value is passed it is used to fill all missing values. 

924 Alternatively, a Series or dict can be used to fill in different 

925 values for each index. The value should not be a list. The 

926 value(s) passed should be either Interval objects or NA/NaN. 

927 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None 

928 (Not implemented yet for IntervalArray) 

929 Method to use for filling holes in reindexed Series 

930 limit : int, default None 

931 (Not implemented yet for IntervalArray) 

932 If method is specified, this is the maximum number of consecutive 

933 NaN values to forward/backward fill. In other words, if there is 

934 a gap with more than this number of consecutive NaNs, it will only 

935 be partially filled. If method is not specified, this is the 

936 maximum number of entries along the entire axis where NaNs will be 

937 filled. 

938 copy : bool, default True 

939 Whether to make a copy of the data before filling. If False, then 

940 the original should be modified and no new memory should be allocated. 

941 For ExtensionArray subclasses that cannot do this, it is at the 

942 author's discretion whether to ignore "copy=False" or to raise. 

943 

944 Returns 

945 ------- 

946 filled : IntervalArray with NA/NaN filled 

947 """ 

948 if copy is False: 

949 raise NotImplementedError 

950 if method is not None: 

951 return super().fillna(value=value, method=method, limit=limit) 

952 

953 value_left, value_right = self._validate_scalar(value) 

954 

955 left = self.left.fillna(value=value_left) 

956 right = self.right.fillna(value=value_right) 

957 return self._shallow_copy(left, right) 

958 

959 def astype(self, dtype, copy: bool = True): 

960 """ 

961 Cast to an ExtensionArray or NumPy array with dtype 'dtype'. 

962 

963 Parameters 

964 ---------- 

965 dtype : str or dtype 

966 Typecode or data-type to which the array is cast. 

967 

968 copy : bool, default True 

969 Whether to copy the data, even if not necessary. If False, 

970 a copy is made only if the old dtype does not match the 

971 new dtype. 

972 

973 Returns 

974 ------- 

975 array : ExtensionArray or ndarray 

976 ExtensionArray or NumPy ndarray with 'dtype' for its dtype. 

977 """ 

978 from pandas import Index 

979 

980 if dtype is not None: 

981 dtype = pandas_dtype(dtype) 

982 

983 if isinstance(dtype, IntervalDtype): 

984 if dtype == self.dtype: 

985 return self.copy() if copy else self 

986 

987 if is_float_dtype(self.dtype.subtype) and needs_i8_conversion( 

988 dtype.subtype 

989 ): 

990 # This is allowed on the Index.astype but we disallow it here 

991 msg = ( 

992 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" 

993 ) 

994 raise TypeError(msg) 

995 

996 # need to cast to different subtype 

997 try: 

998 # We need to use Index rules for astype to prevent casting 

999 # np.nan entries to int subtypes 

1000 new_left = Index(self._left, copy=False).astype(dtype.subtype) 

1001 new_right = Index(self._right, copy=False).astype(dtype.subtype) 

1002 except IntCastingNaNError: 

1003 # e.g test_subtype_integer 

1004 raise 

1005 except (TypeError, ValueError) as err: 

1006 # e.g. test_subtype_integer_errors f8->u8 can be lossy 

1007 # and raises ValueError 

1008 msg = ( 

1009 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" 

1010 ) 

1011 raise TypeError(msg) from err 

1012 return self._shallow_copy(new_left, new_right) 

1013 else: 

1014 try: 

1015 return super().astype(dtype, copy=copy) 

1016 except (TypeError, ValueError) as err: 

1017 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" 

1018 raise TypeError(msg) from err 

1019 

1020 def equals(self, other) -> bool: 

1021 if type(self) != type(other): 

1022 return False 

1023 

1024 return bool( 

1025 self.closed == other.closed 

1026 and self.left.equals(other.left) 

1027 and self.right.equals(other.right) 

1028 ) 

1029 

1030 @classmethod 

1031 def _concat_same_type(cls, to_concat: Sequence[IntervalArray]) -> Self: 

1032 """ 

1033 Concatenate multiple IntervalArray 

1034 

1035 Parameters 

1036 ---------- 

1037 to_concat : sequence of IntervalArray 

1038 

1039 Returns 

1040 ------- 

1041 IntervalArray 

1042 """ 

1043 closed_set = {interval.closed for interval in to_concat} 

1044 if len(closed_set) != 1: 

1045 raise ValueError("Intervals must all be closed on the same side.") 

1046 closed = closed_set.pop() 

1047 

1048 left: IntervalSide = np.concatenate([interval.left for interval in to_concat]) 

1049 right: IntervalSide = np.concatenate([interval.right for interval in to_concat]) 

1050 

1051 left, right, dtype = cls._ensure_simple_new_inputs(left, right, closed=closed) 

1052 

1053 return cls._simple_new(left, right, dtype=dtype) 

1054 

1055 def copy(self) -> Self: 

1056 """ 

1057 Return a copy of the array. 

1058 

1059 Returns 

1060 ------- 

1061 IntervalArray 

1062 """ 

1063 left = self._left.copy() 

1064 right = self._right.copy() 

1065 dtype = self.dtype 

1066 return self._simple_new(left, right, dtype=dtype) 

1067 

1068 def isna(self) -> np.ndarray: 

1069 return isna(self._left) 

1070 

1071 def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray: 

1072 if not len(self) or periods == 0: 

1073 return self.copy() 

1074 

1075 self._validate_scalar(fill_value) 

1076 

1077 # ExtensionArray.shift doesn't work for two reasons 

1078 # 1. IntervalArray.dtype.na_value may not be correct for the dtype. 

1079 # 2. IntervalArray._from_sequence only accepts NaN for missing values, 

1080 # not other values like NaT 

1081 

1082 empty_len = min(abs(periods), len(self)) 

1083 if isna(fill_value): 

1084 from pandas import Index 

1085 

1086 fill_value = Index(self._left, copy=False)._na_value 

1087 empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) 

1088 else: 

1089 empty = self._from_sequence([fill_value] * empty_len, dtype=self.dtype) 

1090 

1091 if periods > 0: 

1092 a = empty 

1093 b = self[:-periods] 

1094 else: 

1095 a = self[abs(periods) :] 

1096 b = empty 

1097 return self._concat_same_type([a, b]) 

1098 

1099 def take( 

1100 self, 

1101 indices, 

1102 *, 

1103 allow_fill: bool = False, 

1104 fill_value=None, 

1105 axis=None, 

1106 **kwargs, 

1107 ) -> Self: 

1108 """ 

1109 Take elements from the IntervalArray. 

1110 

1111 Parameters 

1112 ---------- 

1113 indices : sequence of integers 

1114 Indices to be taken. 

1115 

1116 allow_fill : bool, default False 

1117 How to handle negative values in `indices`. 

1118 

1119 * False: negative values in `indices` indicate positional indices 

1120 from the right (the default). This is similar to 

1121 :func:`numpy.take`. 

1122 

1123 * True: negative values in `indices` indicate 

1124 missing values. These values are set to `fill_value`. Any other 

1125 other negative values raise a ``ValueError``. 

1126 

1127 fill_value : Interval or NA, optional 

1128 Fill value to use for NA-indices when `allow_fill` is True. 

1129 This may be ``None``, in which case the default NA value for 

1130 the type, ``self.dtype.na_value``, is used. 

1131 

1132 For many ExtensionArrays, there will be two representations of 

1133 `fill_value`: a user-facing "boxed" scalar, and a low-level 

1134 physical NA value. `fill_value` should be the user-facing version, 

1135 and the implementation should handle translating that to the 

1136 physical version for processing the take if necessary. 

1137 

1138 axis : any, default None 

1139 Present for compat with IntervalIndex; does nothing. 

1140 

1141 Returns 

1142 ------- 

1143 IntervalArray 

1144 

1145 Raises 

1146 ------ 

1147 IndexError 

1148 When the indices are out of bounds for the array. 

1149 ValueError 

1150 When `indices` contains negative values other than ``-1`` 

1151 and `allow_fill` is True. 

1152 """ 

1153 nv.validate_take((), kwargs) 

1154 

1155 fill_left = fill_right = fill_value 

1156 if allow_fill: 

1157 fill_left, fill_right = self._validate_scalar(fill_value) 

1158 

1159 left_take = take( 

1160 self._left, indices, allow_fill=allow_fill, fill_value=fill_left 

1161 ) 

1162 right_take = take( 

1163 self._right, indices, allow_fill=allow_fill, fill_value=fill_right 

1164 ) 

1165 

1166 return self._shallow_copy(left_take, right_take) 

1167 

1168 def _validate_listlike(self, value): 

1169 # list-like of intervals 

1170 try: 

1171 array = IntervalArray(value) 

1172 self._check_closed_matches(array, name="value") 

1173 value_left, value_right = array.left, array.right 

1174 except TypeError as err: 

1175 # wrong type: not interval or NA 

1176 msg = f"'value' should be an interval type, got {type(value)} instead." 

1177 raise TypeError(msg) from err 

1178 

1179 try: 

1180 self.left._validate_fill_value(value_left) 

1181 except (LossySetitemError, TypeError) as err: 

1182 msg = ( 

1183 "'value' should be a compatible interval type, " 

1184 f"got {type(value)} instead." 

1185 ) 

1186 raise TypeError(msg) from err 

1187 

1188 return value_left, value_right 

1189 

1190 def _validate_scalar(self, value): 

1191 if isinstance(value, Interval): 

1192 self._check_closed_matches(value, name="value") 

1193 left, right = value.left, value.right 

1194 # TODO: check subdtype match like _validate_setitem_value? 

1195 elif is_valid_na_for_dtype(value, self.left.dtype): 

1196 # GH#18295 

1197 left = right = self.left._na_value 

1198 else: 

1199 raise TypeError( 

1200 "can only insert Interval objects and NA into an IntervalArray" 

1201 ) 

1202 return left, right 

1203 

1204 def _validate_setitem_value(self, value): 

1205 if is_valid_na_for_dtype(value, self.left.dtype): 

1206 # na value: need special casing to set directly on numpy arrays 

1207 value = self.left._na_value 

1208 if is_integer_dtype(self.dtype.subtype): 

1209 # can't set NaN on a numpy integer array 

1210 # GH#45484 TypeError, not ValueError, matches what we get with 

1211 # non-NA un-holdable value. 

1212 raise TypeError("Cannot set float NaN to integer-backed IntervalArray") 

1213 value_left, value_right = value, value 

1214 

1215 elif isinstance(value, Interval): 

1216 # scalar interval 

1217 self._check_closed_matches(value, name="value") 

1218 value_left, value_right = value.left, value.right 

1219 self.left._validate_fill_value(value_left) 

1220 self.left._validate_fill_value(value_right) 

1221 

1222 else: 

1223 return self._validate_listlike(value) 

1224 

1225 return value_left, value_right 

1226 

1227 def value_counts(self, dropna: bool = True) -> Series: 

1228 """ 

1229 Returns a Series containing counts of each interval. 

1230 

1231 Parameters 

1232 ---------- 

1233 dropna : bool, default True 

1234 Don't include counts of NaN. 

1235 

1236 Returns 

1237 ------- 

1238 counts : Series 

1239 

1240 See Also 

1241 -------- 

1242 Series.value_counts 

1243 """ 

1244 # TODO: implement this is a non-naive way! 

1245 with warnings.catch_warnings(): 

1246 warnings.filterwarnings( 

1247 "ignore", 

1248 "The behavior of value_counts with object-dtype is deprecated", 

1249 category=FutureWarning, 

1250 ) 

1251 result = value_counts(np.asarray(self), dropna=dropna) 

1252 # Once the deprecation is enforced, we will need to do 

1253 # `result.index = result.index.astype(self.dtype)` 

1254 return result 

1255 

1256 # --------------------------------------------------------------------- 

1257 # Rendering Methods 

1258 

1259 def _formatter(self, boxed: bool = False): 

1260 # returning 'str' here causes us to render as e.g. "(0, 1]" instead of 

1261 # "Interval(0, 1, closed='right')" 

1262 return str 

1263 

1264 # --------------------------------------------------------------------- 

1265 # Vectorized Interval Properties/Attributes 

1266 

1267 @property 

1268 def left(self) -> Index: 

1269 """ 

1270 Return the left endpoints of each Interval in the IntervalArray as an Index. 

1271 

1272 Examples 

1273 -------- 

1274 

1275 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(2, 5)]) 

1276 >>> interv_arr 

1277 <IntervalArray> 

1278 [(0, 1], (2, 5]] 

1279 Length: 2, dtype: interval[int64, right] 

1280 >>> interv_arr.left 

1281 Index([0, 2], dtype='int64') 

1282 """ 

1283 from pandas import Index 

1284 

1285 return Index(self._left, copy=False) 

1286 

1287 @property 

1288 def right(self) -> Index: 

1289 """ 

1290 Return the right endpoints of each Interval in the IntervalArray as an Index. 

1291 

1292 Examples 

1293 -------- 

1294 

1295 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(2, 5)]) 

1296 >>> interv_arr 

1297 <IntervalArray> 

1298 [(0, 1], (2, 5]] 

1299 Length: 2, dtype: interval[int64, right] 

1300 >>> interv_arr.right 

1301 Index([1, 5], dtype='int64') 

1302 """ 

1303 from pandas import Index 

1304 

1305 return Index(self._right, copy=False) 

1306 

1307 @property 

1308 def length(self) -> Index: 

1309 """ 

1310 Return an Index with entries denoting the length of each Interval. 

1311 

1312 Examples 

1313 -------- 

1314 

1315 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) 

1316 >>> interv_arr 

1317 <IntervalArray> 

1318 [(0, 1], (1, 5]] 

1319 Length: 2, dtype: interval[int64, right] 

1320 >>> interv_arr.length 

1321 Index([1, 4], dtype='int64') 

1322 """ 

1323 return self.right - self.left 

1324 

1325 @property 

1326 def mid(self) -> Index: 

1327 """ 

1328 Return the midpoint of each Interval in the IntervalArray as an Index. 

1329 

1330 Examples 

1331 -------- 

1332 

1333 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) 

1334 >>> interv_arr 

1335 <IntervalArray> 

1336 [(0, 1], (1, 5]] 

1337 Length: 2, dtype: interval[int64, right] 

1338 >>> interv_arr.mid 

1339 Index([0.5, 3.0], dtype='float64') 

1340 """ 

1341 try: 

1342 return 0.5 * (self.left + self.right) 

1343 except TypeError: 

1344 # datetime safe version 

1345 return self.left + 0.5 * self.length 

1346 

1347 _interval_shared_docs["overlaps"] = textwrap.dedent( 

1348 """ 

1349 Check elementwise if an Interval overlaps the values in the %(klass)s. 

1350 

1351 Two intervals overlap if they share a common point, including closed 

1352 endpoints. Intervals that only have an open endpoint in common do not 

1353 overlap. 

1354 

1355 Parameters 

1356 ---------- 

1357 other : %(klass)s 

1358 Interval to check against for an overlap. 

1359 

1360 Returns 

1361 ------- 

1362 ndarray 

1363 Boolean array positionally indicating where an overlap occurs. 

1364 

1365 See Also 

1366 -------- 

1367 Interval.overlaps : Check whether two Interval objects overlap. 

1368 

1369 Examples 

1370 -------- 

1371 %(examples)s 

1372 >>> intervals.overlaps(pd.Interval(0.5, 1.5)) 

1373 array([ True, True, False]) 

1374 

1375 Intervals that share closed endpoints overlap: 

1376 

1377 >>> intervals.overlaps(pd.Interval(1, 3, closed='left')) 

1378 array([ True, True, True]) 

1379 

1380 Intervals that only have an open endpoint in common do not overlap: 

1381 

1382 >>> intervals.overlaps(pd.Interval(1, 2, closed='right')) 

1383 array([False, True, False]) 

1384 """ 

1385 ) 

1386 

1387 @Appender( 

1388 _interval_shared_docs["overlaps"] 

1389 % { 

1390 "klass": "IntervalArray", 

1391 "examples": textwrap.dedent( 

1392 """\ 

1393 >>> data = [(0, 1), (1, 3), (2, 4)] 

1394 >>> intervals = pd.arrays.IntervalArray.from_tuples(data) 

1395 >>> intervals 

1396 <IntervalArray> 

1397 [(0, 1], (1, 3], (2, 4]] 

1398 Length: 3, dtype: interval[int64, right] 

1399 """ 

1400 ), 

1401 } 

1402 ) 

1403 def overlaps(self, other): 

1404 if isinstance(other, (IntervalArray, ABCIntervalIndex)): 

1405 raise NotImplementedError 

1406 if not isinstance(other, Interval): 

1407 msg = f"`other` must be Interval-like, got {type(other).__name__}" 

1408 raise TypeError(msg) 

1409 

1410 # equality is okay if both endpoints are closed (overlap at a point) 

1411 op1 = le if (self.closed_left and other.closed_right) else lt 

1412 op2 = le if (other.closed_left and self.closed_right) else lt 

1413 

1414 # overlaps is equivalent negation of two interval being disjoint: 

1415 # disjoint = (A.left > B.right) or (B.left > A.right) 

1416 # (simplifying the negation allows this to be done in less operations) 

1417 return op1(self.left, other.right) & op2(other.left, self.right) 

1418 

1419 # --------------------------------------------------------------------- 

1420 

1421 @property 

1422 def closed(self) -> IntervalClosedType: 

1423 """ 

1424 String describing the inclusive side the intervals. 

1425 

1426 Either ``left``, ``right``, ``both`` or ``neither``. 

1427 

1428 Examples 

1429 -------- 

1430 

1431 For arrays: 

1432 

1433 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) 

1434 >>> interv_arr 

1435 <IntervalArray> 

1436 [(0, 1], (1, 5]] 

1437 Length: 2, dtype: interval[int64, right] 

1438 >>> interv_arr.closed 

1439 'right' 

1440 

1441 For Interval Index: 

1442 

1443 >>> interv_idx = pd.interval_range(start=0, end=2) 

1444 >>> interv_idx 

1445 IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]') 

1446 >>> interv_idx.closed 

1447 'right' 

1448 """ 

1449 return self.dtype.closed 

1450 

1451 _interval_shared_docs["set_closed"] = textwrap.dedent( 

1452 """ 

1453 Return an identical %(klass)s closed on the specified side. 

1454 

1455 Parameters 

1456 ---------- 

1457 closed : {'left', 'right', 'both', 'neither'} 

1458 Whether the intervals are closed on the left-side, right-side, both 

1459 or neither. 

1460 

1461 Returns 

1462 ------- 

1463 %(klass)s 

1464 

1465 %(examples)s\ 

1466 """ 

1467 ) 

1468 

1469 @Appender( 

1470 _interval_shared_docs["set_closed"] 

1471 % { 

1472 "klass": "IntervalArray", 

1473 "examples": textwrap.dedent( 

1474 """\ 

1475 Examples 

1476 -------- 

1477 >>> index = pd.arrays.IntervalArray.from_breaks(range(4)) 

1478 >>> index 

1479 <IntervalArray> 

1480 [(0, 1], (1, 2], (2, 3]] 

1481 Length: 3, dtype: interval[int64, right] 

1482 >>> index.set_closed('both') 

1483 <IntervalArray> 

1484 [[0, 1], [1, 2], [2, 3]] 

1485 Length: 3, dtype: interval[int64, both] 

1486 """ 

1487 ), 

1488 } 

1489 ) 

1490 def set_closed(self, closed: IntervalClosedType) -> Self: 

1491 if closed not in VALID_CLOSED: 

1492 msg = f"invalid option for 'closed': {closed}" 

1493 raise ValueError(msg) 

1494 

1495 left, right = self._left, self._right 

1496 dtype = IntervalDtype(left.dtype, closed=closed) 

1497 return self._simple_new(left, right, dtype=dtype) 

1498 

1499 _interval_shared_docs[ 

1500 "is_non_overlapping_monotonic" 

1501 ] = """ 

1502 Return a boolean whether the %(klass)s is non-overlapping and monotonic. 

1503 

1504 Non-overlapping means (no Intervals share points), and monotonic means 

1505 either monotonic increasing or monotonic decreasing. 

1506 

1507 Examples 

1508 -------- 

1509 For arrays: 

1510 

1511 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) 

1512 >>> interv_arr 

1513 <IntervalArray> 

1514 [(0, 1], (1, 5]] 

1515 Length: 2, dtype: interval[int64, right] 

1516 >>> interv_arr.is_non_overlapping_monotonic 

1517 True 

1518 

1519 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), 

1520 ... pd.Interval(-1, 0.1)]) 

1521 >>> interv_arr 

1522 <IntervalArray> 

1523 [(0.0, 1.0], (-1.0, 0.1]] 

1524 Length: 2, dtype: interval[float64, right] 

1525 >>> interv_arr.is_non_overlapping_monotonic 

1526 False 

1527 

1528 For Interval Index: 

1529 

1530 >>> interv_idx = pd.interval_range(start=0, end=2) 

1531 >>> interv_idx 

1532 IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]') 

1533 >>> interv_idx.is_non_overlapping_monotonic 

1534 True 

1535 

1536 >>> interv_idx = pd.interval_range(start=0, end=2, closed='both') 

1537 >>> interv_idx 

1538 IntervalIndex([[0, 1], [1, 2]], dtype='interval[int64, both]') 

1539 >>> interv_idx.is_non_overlapping_monotonic 

1540 False 

1541 """ 

1542 

1543 @property 

1544 @Appender( 

1545 _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs 

1546 ) 

1547 def is_non_overlapping_monotonic(self) -> bool: 

1548 # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) 

1549 # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) 

1550 # we already require left <= right 

1551 

1552 # strict inequality for closed == 'both'; equality implies overlapping 

1553 # at a point when both sides of intervals are included 

1554 if self.closed == "both": 

1555 return bool( 

1556 (self._right[:-1] < self._left[1:]).all() 

1557 or (self._left[:-1] > self._right[1:]).all() 

1558 ) 

1559 

1560 # non-strict inequality when closed != 'both'; at least one side is 

1561 # not included in the intervals, so equality does not imply overlapping 

1562 return bool( 

1563 (self._right[:-1] <= self._left[1:]).all() 

1564 or (self._left[:-1] >= self._right[1:]).all() 

1565 ) 

1566 

1567 # --------------------------------------------------------------------- 

1568 # Conversion 

1569 

1570 def __array__( 

1571 self, dtype: NpDtype | None = None, copy: bool | None = None 

1572 ) -> np.ndarray: 

1573 """ 

1574 Return the IntervalArray's data as a numpy array of Interval 

1575 objects (with dtype='object') 

1576 """ 

1577 left = self._left 

1578 right = self._right 

1579 mask = self.isna() 

1580 closed = self.closed 

1581 

1582 result = np.empty(len(left), dtype=object) 

1583 for i, left_value in enumerate(left): 

1584 if mask[i]: 

1585 result[i] = np.nan 

1586 else: 

1587 result[i] = Interval(left_value, right[i], closed) 

1588 return result 

1589 

1590 def __arrow_array__(self, type=None): 

1591 """ 

1592 Convert myself into a pyarrow Array. 

1593 """ 

1594 import pyarrow 

1595 

1596 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType 

1597 

1598 try: 

1599 subtype = pyarrow.from_numpy_dtype(self.dtype.subtype) 

1600 except TypeError as err: 

1601 raise TypeError( 

1602 f"Conversion to arrow with subtype '{self.dtype.subtype}' " 

1603 "is not supported" 

1604 ) from err 

1605 interval_type = ArrowIntervalType(subtype, self.closed) 

1606 storage_array = pyarrow.StructArray.from_arrays( 

1607 [ 

1608 pyarrow.array(self._left, type=subtype, from_pandas=True), 

1609 pyarrow.array(self._right, type=subtype, from_pandas=True), 

1610 ], 

1611 names=["left", "right"], 

1612 ) 

1613 mask = self.isna() 

1614 if mask.any(): 

1615 # if there are missing values, set validity bitmap also on the array level 

1616 null_bitmap = pyarrow.array(~mask).buffers()[1] 

1617 storage_array = pyarrow.StructArray.from_buffers( 

1618 storage_array.type, 

1619 len(storage_array), 

1620 [null_bitmap], 

1621 children=[storage_array.field(0), storage_array.field(1)], 

1622 ) 

1623 

1624 if type is not None: 

1625 if type.equals(interval_type.storage_type): 

1626 return storage_array 

1627 elif isinstance(type, ArrowIntervalType): 

1628 # ensure we have the same subtype and closed attributes 

1629 if not type.equals(interval_type): 

1630 raise TypeError( 

1631 "Not supported to convert IntervalArray to type with " 

1632 f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) " 

1633 f"and 'closed' ({self.closed} vs {type.closed}) attributes" 

1634 ) 

1635 else: 

1636 raise TypeError( 

1637 f"Not supported to convert IntervalArray to '{type}' type" 

1638 ) 

1639 

1640 return pyarrow.ExtensionArray.from_storage(interval_type, storage_array) 

1641 

1642 _interval_shared_docs["to_tuples"] = textwrap.dedent( 

1643 """ 

1644 Return an %(return_type)s of tuples of the form (left, right). 

1645 

1646 Parameters 

1647 ---------- 

1648 na_tuple : bool, default True 

1649 If ``True``, return ``NA`` as a tuple ``(nan, nan)``. If ``False``, 

1650 just return ``NA`` as ``nan``. 

1651 

1652 Returns 

1653 ------- 

1654 tuples: %(return_type)s 

1655 %(examples)s\ 

1656 """ 

1657 ) 

1658 

1659 @Appender( 

1660 _interval_shared_docs["to_tuples"] 

1661 % { 

1662 "return_type": ( 

1663 "ndarray (if self is IntervalArray) or Index (if self is IntervalIndex)" 

1664 ), 

1665 "examples": textwrap.dedent( 

1666 """\ 

1667 

1668 Examples 

1669 -------- 

1670 For :class:`pandas.IntervalArray`: 

1671 

1672 >>> idx = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)]) 

1673 >>> idx 

1674 <IntervalArray> 

1675 [(0, 1], (1, 2]] 

1676 Length: 2, dtype: interval[int64, right] 

1677 >>> idx.to_tuples() 

1678 array([(0, 1), (1, 2)], dtype=object) 

1679 

1680 For :class:`pandas.IntervalIndex`: 

1681 

1682 >>> idx = pd.interval_range(start=0, end=2) 

1683 >>> idx 

1684 IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]') 

1685 >>> idx.to_tuples() 

1686 Index([(0, 1), (1, 2)], dtype='object') 

1687 """ 

1688 ), 

1689 } 

1690 ) 

1691 def to_tuples(self, na_tuple: bool = True) -> np.ndarray: 

1692 tuples = com.asarray_tuplesafe(zip(self._left, self._right)) 

1693 if not na_tuple: 

1694 # GH 18756 

1695 tuples = np.where(~self.isna(), tuples, np.nan) 

1696 return tuples 

1697 

1698 # --------------------------------------------------------------------- 

1699 

1700 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: 

1701 value_left, value_right = self._validate_setitem_value(value) 

1702 

1703 if isinstance(self._left, np.ndarray): 

1704 np.putmask(self._left, mask, value_left) 

1705 assert isinstance(self._right, np.ndarray) 

1706 np.putmask(self._right, mask, value_right) 

1707 else: 

1708 self._left._putmask(mask, value_left) 

1709 assert not isinstance(self._right, np.ndarray) 

1710 self._right._putmask(mask, value_right) 

1711 

1712 def insert(self, loc: int, item: Interval) -> Self: 

1713 """ 

1714 Return a new IntervalArray inserting new item at location. Follows 

1715 Python numpy.insert semantics for negative values. Only Interval 

1716 objects and NA can be inserted into an IntervalIndex 

1717 

1718 Parameters 

1719 ---------- 

1720 loc : int 

1721 item : Interval 

1722 

1723 Returns 

1724 ------- 

1725 IntervalArray 

1726 """ 

1727 left_insert, right_insert = self._validate_scalar(item) 

1728 

1729 new_left = self.left.insert(loc, left_insert) 

1730 new_right = self.right.insert(loc, right_insert) 

1731 

1732 return self._shallow_copy(new_left, new_right) 

1733 

1734 def delete(self, loc) -> Self: 

1735 if isinstance(self._left, np.ndarray): 

1736 new_left = np.delete(self._left, loc) 

1737 assert isinstance(self._right, np.ndarray) 

1738 new_right = np.delete(self._right, loc) 

1739 else: 

1740 new_left = self._left.delete(loc) 

1741 assert not isinstance(self._right, np.ndarray) 

1742 new_right = self._right.delete(loc) 

1743 return self._shallow_copy(left=new_left, right=new_right) 

1744 

1745 @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs) 

1746 def repeat( 

1747 self, 

1748 repeats: int | Sequence[int], 

1749 axis: AxisInt | None = None, 

1750 ) -> Self: 

1751 nv.validate_repeat((), {"axis": axis}) 

1752 left_repeat = self.left.repeat(repeats) 

1753 right_repeat = self.right.repeat(repeats) 

1754 return self._shallow_copy(left=left_repeat, right=right_repeat) 

1755 

1756 _interval_shared_docs["contains"] = textwrap.dedent( 

1757 """ 

1758 Check elementwise if the Intervals contain the value. 

1759 

1760 Return a boolean mask whether the value is contained in the Intervals 

1761 of the %(klass)s. 

1762 

1763 Parameters 

1764 ---------- 

1765 other : scalar 

1766 The value to check whether it is contained in the Intervals. 

1767 

1768 Returns 

1769 ------- 

1770 boolean array 

1771 

1772 See Also 

1773 -------- 

1774 Interval.contains : Check whether Interval object contains value. 

1775 %(klass)s.overlaps : Check if an Interval overlaps the values in the 

1776 %(klass)s. 

1777 

1778 Examples 

1779 -------- 

1780 %(examples)s 

1781 >>> intervals.contains(0.5) 

1782 array([ True, False, False]) 

1783 """ 

1784 ) 

1785 

1786 @Appender( 

1787 _interval_shared_docs["contains"] 

1788 % { 

1789 "klass": "IntervalArray", 

1790 "examples": textwrap.dedent( 

1791 """\ 

1792 >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)]) 

1793 >>> intervals 

1794 <IntervalArray> 

1795 [(0, 1], (1, 3], (2, 4]] 

1796 Length: 3, dtype: interval[int64, right] 

1797 """ 

1798 ), 

1799 } 

1800 ) 

1801 def contains(self, other): 

1802 if isinstance(other, Interval): 

1803 raise NotImplementedError("contains not implemented for two intervals") 

1804 

1805 return (self._left < other if self.open_left else self._left <= other) & ( 

1806 other < self._right if self.open_right else other <= self._right 

1807 ) 

1808 

1809 def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: 

1810 if isinstance(values, IntervalArray): 

1811 if self.closed != values.closed: 

1812 # not comparable -> no overlap 

1813 return np.zeros(self.shape, dtype=bool) 

1814 

1815 if self.dtype == values.dtype: 

1816 # GH#38353 instead of casting to object, operating on a 

1817 # complex128 ndarray is much more performant. 

1818 left = self._combined.view("complex128") 

1819 right = values._combined.view("complex128") 

1820 # error: Argument 1 to "isin" has incompatible type 

1821 # "Union[ExtensionArray, ndarray[Any, Any], 

1822 # ndarray[Any, dtype[Any]]]"; expected 

1823 # "Union[_SupportsArray[dtype[Any]], 

1824 # _NestedSequence[_SupportsArray[dtype[Any]]], bool, 

1825 # int, float, complex, str, bytes, _NestedSequence[ 

1826 # Union[bool, int, float, complex, str, bytes]]]" 

1827 return np.isin(left, right).ravel() # type: ignore[arg-type] 

1828 

1829 elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion( 

1830 values.left.dtype 

1831 ): 

1832 # not comparable -> no overlap 

1833 return np.zeros(self.shape, dtype=bool) 

1834 

1835 return isin(self.astype(object), values.astype(object)) 

1836 

1837 @property 

1838 def _combined(self) -> IntervalSide: 

1839 # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]" 

1840 # has no attribute "reshape" [union-attr] 

1841 left = self.left._values.reshape(-1, 1) # type: ignore[union-attr] 

1842 right = self.right._values.reshape(-1, 1) # type: ignore[union-attr] 

1843 if needs_i8_conversion(left.dtype): 

1844 # error: Item "ndarray[Any, Any]" of "Any | ndarray[Any, Any]" has 

1845 # no attribute "_concat_same_type" 

1846 comb = left._concat_same_type( # type: ignore[union-attr] 

1847 [left, right], axis=1 

1848 ) 

1849 else: 

1850 comb = np.concatenate([left, right], axis=1) 

1851 return comb 

1852 

1853 def _from_combined(self, combined: np.ndarray) -> IntervalArray: 

1854 """ 

1855 Create a new IntervalArray with our dtype from a 1D complex128 ndarray. 

1856 """ 

1857 nc = combined.view("i8").reshape(-1, 2) 

1858 

1859 dtype = self._left.dtype 

1860 if needs_i8_conversion(dtype): 

1861 assert isinstance(self._left, (DatetimeArray, TimedeltaArray)) 

1862 new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) 

1863 assert isinstance(self._right, (DatetimeArray, TimedeltaArray)) 

1864 new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) 

1865 else: 

1866 assert isinstance(dtype, np.dtype) 

1867 new_left = nc[:, 0].view(dtype) 

1868 new_right = nc[:, 1].view(dtype) 

1869 return self._shallow_copy(left=new_left, right=new_right) 

1870 

1871 def unique(self) -> IntervalArray: 

1872 # No overload variant of "__getitem__" of "ExtensionArray" matches argument 

1873 # type "Tuple[slice, int]" 

1874 nc = unique( 

1875 self._combined.view("complex128")[:, 0] # type: ignore[call-overload] 

1876 ) 

1877 nc = nc[:, None] 

1878 return self._from_combined(nc) 

1879 

1880 

1881def _maybe_convert_platform_interval(values) -> ArrayLike: 

1882 """ 

1883 Try to do platform conversion, with special casing for IntervalArray. 

1884 Wrapper around maybe_convert_platform that alters the default return 

1885 dtype in certain cases to be compatible with IntervalArray. For example, 

1886 empty lists return with integer dtype instead of object dtype, which is 

1887 prohibited for IntervalArray. 

1888 

1889 Parameters 

1890 ---------- 

1891 values : array-like 

1892 

1893 Returns 

1894 ------- 

1895 array 

1896 """ 

1897 if isinstance(values, (list, tuple)) and len(values) == 0: 

1898 # GH 19016 

1899 # empty lists/tuples get object dtype by default, but this is 

1900 # prohibited for IntervalArray, so coerce to integer instead 

1901 return np.array([], dtype=np.int64) 

1902 elif not is_list_like(values) or isinstance(values, ABCDataFrame): 

1903 # This will raise later, but we avoid passing to maybe_convert_platform 

1904 return values 

1905 elif isinstance(getattr(values, "dtype", None), CategoricalDtype): 

1906 values = np.asarray(values) 

1907 elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)): 

1908 # TODO: should we just cast these to list? 

1909 return values 

1910 else: 

1911 values = extract_array(values, extract_numpy=True) 

1912 

1913 if not hasattr(values, "dtype"): 

1914 values = np.asarray(values) 

1915 if values.dtype.kind in "iu" and values.dtype != np.int64: 

1916 values = values.astype(np.int64) 

1917 return values