Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/interval.py: 24%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

638 statements  

1from __future__ import annotations 

2 

3import operator 

4from operator import ( 

5 le, 

6 lt, 

7) 

8import textwrap 

9from typing import ( 

10 TYPE_CHECKING, 

11 Iterator, 

12 Literal, 

13 Sequence, 

14 TypeVar, 

15 Union, 

16 cast, 

17 overload, 

18) 

19 

20import numpy as np 

21 

22from pandas._config import get_option 

23 

24from pandas._libs import lib 

25from pandas._libs.interval import ( 

26 VALID_CLOSED, 

27 Interval, 

28 IntervalMixin, 

29 intervals_to_interval_bounds, 

30) 

31from pandas._libs.missing import NA 

32from pandas._typing import ( 

33 ArrayLike, 

34 AxisInt, 

35 Dtype, 

36 IntervalClosedType, 

37 NpDtype, 

38 PositionalIndexer, 

39 ScalarIndexer, 

40 SequenceIndexer, 

41 SortKind, 

42 TimeArrayLike, 

43 npt, 

44) 

45from pandas.compat.numpy import function as nv 

46from pandas.errors import IntCastingNaNError 

47from pandas.util._decorators import Appender 

48 

49from pandas.core.dtypes.cast import ( 

50 LossySetitemError, 

51 maybe_upcast_numeric_to_64bit, 

52) 

53from pandas.core.dtypes.common import ( 

54 is_categorical_dtype, 

55 is_dtype_equal, 

56 is_float_dtype, 

57 is_integer_dtype, 

58 is_interval_dtype, 

59 is_list_like, 

60 is_object_dtype, 

61 is_scalar, 

62 is_string_dtype, 

63 needs_i8_conversion, 

64 pandas_dtype, 

65) 

66from pandas.core.dtypes.dtypes import IntervalDtype 

67from pandas.core.dtypes.generic import ( 

68 ABCDataFrame, 

69 ABCDatetimeIndex, 

70 ABCIntervalIndex, 

71 ABCPeriodIndex, 

72) 

73from pandas.core.dtypes.missing import ( 

74 is_valid_na_for_dtype, 

75 isna, 

76 notna, 

77) 

78 

79from pandas.core.algorithms import ( 

80 isin, 

81 take, 

82 unique, 

83 value_counts, 

84) 

85from pandas.core.arrays.base import ( 

86 ExtensionArray, 

87 _extension_array_shared_docs, 

88) 

89from pandas.core.arrays.datetimes import DatetimeArray 

90from pandas.core.arrays.timedeltas import TimedeltaArray 

91import pandas.core.common as com 

92from pandas.core.construction import ( 

93 array as pd_array, 

94 ensure_wrapped_if_datetimelike, 

95 extract_array, 

96) 

97from pandas.core.indexers import check_array_indexer 

98from pandas.core.ops import ( 

99 invalid_comparison, 

100 unpack_zerodim_and_defer, 

101) 

102 

103if TYPE_CHECKING: 

104 from pandas import ( 

105 Index, 

106 Series, 

107 ) 

108 

109 

110IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray") 

111IntervalSideT = Union[TimeArrayLike, np.ndarray] 

112IntervalOrNA = Union[Interval, float] 

113 

114_interval_shared_docs: dict[str, str] = {} 

115 

116_shared_docs_kwargs = { 

117 "klass": "IntervalArray", 

118 "qualname": "arrays.IntervalArray", 

119 "name": "", 

120} 

121 

122 

123_interval_shared_docs[ 

124 "class" 

125] = """ 

126%(summary)s 

127 

128.. versionadded:: %(versionadded)s 

129 

130Parameters 

131---------- 

132data : array-like (1-dimensional) 

133 Array-like (ndarray, :class:`DateTimeArray`, :class:`TimeDeltaArray`) containing 

134 Interval objects from which to build the %(klass)s. 

135closed : {'left', 'right', 'both', 'neither'}, default 'right' 

136 Whether the intervals are closed on the left-side, right-side, both or 

137 neither. 

138dtype : dtype or None, default None 

139 If None, dtype will be inferred. 

140copy : bool, default False 

141 Copy the input data. 

142%(name)s\ 

143verify_integrity : bool, default True 

144 Verify that the %(klass)s is valid. 

145 

146Attributes 

147---------- 

148left 

149right 

150closed 

151mid 

152length 

153is_empty 

154is_non_overlapping_monotonic 

155%(extra_attributes)s\ 

156 

157Methods 

158------- 

159from_arrays 

160from_tuples 

161from_breaks 

162contains 

163overlaps 

164set_closed 

165to_tuples 

166%(extra_methods)s\ 

167 

168See Also 

169-------- 

170Index : The base pandas Index type. 

171Interval : A bounded slice-like interval; the elements of an %(klass)s. 

172interval_range : Function to create a fixed frequency IntervalIndex. 

173cut : Bin values into discrete Intervals. 

174qcut : Bin values into equal-sized Intervals based on rank or sample quantiles. 

175 

176Notes 

177----- 

178See the `user guide 

179<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`__ 

180for more. 

181 

182%(examples)s\ 

183""" 

184 

185 

186@Appender( 

187 _interval_shared_docs["class"] 

188 % { 

189 "klass": "IntervalArray", 

190 "summary": "Pandas array for interval data that are closed on the same side.", 

191 "versionadded": "0.24.0", 

192 "name": "", 

193 "extra_attributes": "", 

194 "extra_methods": "", 

195 "examples": textwrap.dedent( 

196 """\ 

197 Examples 

198 -------- 

199 A new ``IntervalArray`` can be constructed directly from an array-like of 

200 ``Interval`` objects: 

201 

202 >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) 

203 <IntervalArray> 

204 [(0, 1], (1, 5]] 

205 Length: 2, dtype: interval[int64, right] 

206 

207 It may also be constructed using one of the constructor 

208 methods: :meth:`IntervalArray.from_arrays`, 

209 :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`. 

210 """ 

211 ), 

212 } 

213) 

214class IntervalArray(IntervalMixin, ExtensionArray): 

215 can_hold_na = True 

216 _na_value = _fill_value = np.nan 

217 

218 @property 

219 def ndim(self) -> Literal[1]: 

220 return 1 

221 

222 # To make mypy recognize the fields 

223 _left: IntervalSideT 

224 _right: IntervalSideT 

225 _dtype: IntervalDtype 

226 

227 # --------------------------------------------------------------------- 

228 # Constructors 

229 

230 def __new__( 

231 cls: type[IntervalArrayT], 

232 data, 

233 closed=None, 

234 dtype: Dtype | None = None, 

235 copy: bool = False, 

236 verify_integrity: bool = True, 

237 ): 

238 data = extract_array(data, extract_numpy=True) 

239 

240 if isinstance(data, cls): 

241 left: IntervalSideT = data._left 

242 right: IntervalSideT = data._right 

243 closed = closed or data.closed 

244 dtype = IntervalDtype(left.dtype, closed=closed) 

245 else: 

246 # don't allow scalars 

247 if is_scalar(data): 

248 msg = ( 

249 f"{cls.__name__}(...) must be called with a collection " 

250 f"of some kind, {data} was passed" 

251 ) 

252 raise TypeError(msg) 

253 

254 # might need to convert empty or purely na data 

255 data = _maybe_convert_platform_interval(data) 

256 left, right, infer_closed = intervals_to_interval_bounds( 

257 data, validate_closed=closed is None 

258 ) 

259 if left.dtype == object: 

260 left = lib.maybe_convert_objects(left) 

261 right = lib.maybe_convert_objects(right) 

262 closed = closed or infer_closed 

263 

264 left, right, dtype = cls._ensure_simple_new_inputs( 

265 left, 

266 right, 

267 closed=closed, 

268 copy=copy, 

269 dtype=dtype, 

270 ) 

271 

272 if verify_integrity: 

273 cls._validate(left, right, dtype=dtype) 

274 

275 return cls._simple_new( 

276 left, 

277 right, 

278 dtype=dtype, 

279 ) 

280 

281 @classmethod 

282 def _simple_new( 

283 cls: type[IntervalArrayT], 

284 left: IntervalSideT, 

285 right: IntervalSideT, 

286 dtype: IntervalDtype, 

287 ) -> IntervalArrayT: 

288 result = IntervalMixin.__new__(cls) 

289 result._left = left 

290 result._right = right 

291 result._dtype = dtype 

292 

293 return result 

294 

295 @classmethod 

296 def _ensure_simple_new_inputs( 

297 cls, 

298 left, 

299 right, 

300 closed: IntervalClosedType | None = None, 

301 copy: bool = False, 

302 dtype: Dtype | None = None, 

303 ) -> tuple[IntervalSideT, IntervalSideT, IntervalDtype]: 

304 """Ensure correctness of input parameters for cls._simple_new.""" 

305 from pandas.core.indexes.base import ensure_index 

306 

307 left = ensure_index(left, copy=copy) 

308 left = maybe_upcast_numeric_to_64bit(left) 

309 

310 right = ensure_index(right, copy=copy) 

311 right = maybe_upcast_numeric_to_64bit(right) 

312 

313 if closed is None and isinstance(dtype, IntervalDtype): 

314 closed = dtype.closed 

315 

316 closed = closed or "right" 

317 

318 if dtype is not None: 

319 # GH 19262: dtype must be an IntervalDtype to override inferred 

320 dtype = pandas_dtype(dtype) 

321 if is_interval_dtype(dtype): 

322 dtype = cast(IntervalDtype, dtype) 

323 if dtype.subtype is not None: 

324 left = left.astype(dtype.subtype) 

325 right = right.astype(dtype.subtype) 

326 else: 

327 msg = f"dtype must be an IntervalDtype, got {dtype}" 

328 raise TypeError(msg) 

329 

330 if dtype.closed is None: 

331 # possibly loading an old pickle 

332 dtype = IntervalDtype(dtype.subtype, closed) 

333 elif closed != dtype.closed: 

334 raise ValueError("closed keyword does not match dtype.closed") 

335 

336 # coerce dtypes to match if needed 

337 if is_float_dtype(left) and is_integer_dtype(right): 

338 right = right.astype(left.dtype) 

339 elif is_float_dtype(right) and is_integer_dtype(left): 

340 left = left.astype(right.dtype) 

341 

342 if type(left) != type(right): 

343 msg = ( 

344 f"must not have differing left [{type(left).__name__}] and " 

345 f"right [{type(right).__name__}] types" 

346 ) 

347 raise ValueError(msg) 

348 if is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): 

349 # GH 19016 

350 msg = ( 

351 "category, object, and string subtypes are not supported " 

352 "for IntervalArray" 

353 ) 

354 raise TypeError(msg) 

355 if isinstance(left, ABCPeriodIndex): 

356 msg = "Period dtypes are not supported, use a PeriodIndex instead" 

357 raise ValueError(msg) 

358 if isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz): 

359 msg = ( 

360 "left and right must have the same time zone, got " 

361 f"'{left.tz}' and '{right.tz}'" 

362 ) 

363 raise ValueError(msg) 

364 

365 # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray 

366 left = ensure_wrapped_if_datetimelike(left) 

367 left = extract_array(left, extract_numpy=True) 

368 right = ensure_wrapped_if_datetimelike(right) 

369 right = extract_array(right, extract_numpy=True) 

370 

371 lbase = getattr(left, "_ndarray", left).base 

372 rbase = getattr(right, "_ndarray", right).base 

373 if lbase is not None and lbase is rbase: 

374 # If these share data, then setitem could corrupt our IA 

375 right = right.copy() 

376 

377 dtype = IntervalDtype(left.dtype, closed=closed) 

378 

379 return left, right, dtype 

380 

381 @classmethod 

382 def _from_sequence( 

383 cls: type[IntervalArrayT], 

384 scalars, 

385 *, 

386 dtype: Dtype | None = None, 

387 copy: bool = False, 

388 ) -> IntervalArrayT: 

389 return cls(scalars, dtype=dtype, copy=copy) 

390 

391 @classmethod 

392 def _from_factorized( 

393 cls: type[IntervalArrayT], values: np.ndarray, original: IntervalArrayT 

394 ) -> IntervalArrayT: 

395 if len(values) == 0: 

396 # An empty array returns object-dtype here. We can't create 

397 # a new IA from an (empty) object-dtype array, so turn it into the 

398 # correct dtype. 

399 values = values.astype(original.dtype.subtype) 

400 return cls(values, closed=original.closed) 

401 

402 _interval_shared_docs["from_breaks"] = textwrap.dedent( 

403 """ 

404 Construct an %(klass)s from an array of splits. 

405 

406 Parameters 

407 ---------- 

408 breaks : array-like (1-dimensional) 

409 Left and right bounds for each interval. 

410 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

411 Whether the intervals are closed on the left-side, right-side, both 

412 or neither.\ 

413 %(name)s 

414 copy : bool, default False 

415 Copy the data. 

416 dtype : dtype or None, default None 

417 If None, dtype will be inferred. 

418 

419 Returns 

420 ------- 

421 %(klass)s 

422 

423 See Also 

424 -------- 

425 interval_range : Function to create a fixed frequency IntervalIndex. 

426 %(klass)s.from_arrays : Construct from a left and right array. 

427 %(klass)s.from_tuples : Construct from a sequence of tuples. 

428 

429 %(examples)s\ 

430 """ 

431 ) 

432 

433 @classmethod 

434 @Appender( 

435 _interval_shared_docs["from_breaks"] 

436 % { 

437 "klass": "IntervalArray", 

438 "name": "", 

439 "examples": textwrap.dedent( 

440 """\ 

441 Examples 

442 -------- 

443 >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3]) 

444 <IntervalArray> 

445 [(0, 1], (1, 2], (2, 3]] 

446 Length: 3, dtype: interval[int64, right] 

447 """ 

448 ), 

449 } 

450 ) 

451 def from_breaks( 

452 cls: type[IntervalArrayT], 

453 breaks, 

454 closed: IntervalClosedType | None = "right", 

455 copy: bool = False, 

456 dtype: Dtype | None = None, 

457 ) -> IntervalArrayT: 

458 breaks = _maybe_convert_platform_interval(breaks) 

459 

460 return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype) 

461 

462 _interval_shared_docs["from_arrays"] = textwrap.dedent( 

463 """ 

464 Construct from two arrays defining the left and right bounds. 

465 

466 Parameters 

467 ---------- 

468 left : array-like (1-dimensional) 

469 Left bounds for each interval. 

470 right : array-like (1-dimensional) 

471 Right bounds for each interval. 

472 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

473 Whether the intervals are closed on the left-side, right-side, both 

474 or neither.\ 

475 %(name)s 

476 copy : bool, default False 

477 Copy the data. 

478 dtype : dtype, optional 

479 If None, dtype will be inferred. 

480 

481 Returns 

482 ------- 

483 %(klass)s 

484 

485 Raises 

486 ------ 

487 ValueError 

488 When a value is missing in only one of `left` or `right`. 

489 When a value in `left` is greater than the corresponding value 

490 in `right`. 

491 

492 See Also 

493 -------- 

494 interval_range : Function to create a fixed frequency IntervalIndex. 

495 %(klass)s.from_breaks : Construct an %(klass)s from an array of 

496 splits. 

497 %(klass)s.from_tuples : Construct an %(klass)s from an 

498 array-like of tuples. 

499 

500 Notes 

501 ----- 

502 Each element of `left` must be less than or equal to the `right` 

503 element at the same position. If an element is missing, it must be 

504 missing in both `left` and `right`. A TypeError is raised when 

505 using an unsupported type for `left` or `right`. At the moment, 

506 'category', 'object', and 'string' subtypes are not supported. 

507 

508 %(examples)s\ 

509 """ 

510 ) 

511 

512 @classmethod 

513 @Appender( 

514 _interval_shared_docs["from_arrays"] 

515 % { 

516 "klass": "IntervalArray", 

517 "name": "", 

518 "examples": textwrap.dedent( 

519 """\ 

520 >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3]) 

521 <IntervalArray> 

522 [(0, 1], (1, 2], (2, 3]] 

523 Length: 3, dtype: interval[int64, right] 

524 """ 

525 ), 

526 } 

527 ) 

528 def from_arrays( 

529 cls: type[IntervalArrayT], 

530 left, 

531 right, 

532 closed: IntervalClosedType | None = "right", 

533 copy: bool = False, 

534 dtype: Dtype | None = None, 

535 ) -> IntervalArrayT: 

536 left = _maybe_convert_platform_interval(left) 

537 right = _maybe_convert_platform_interval(right) 

538 

539 left, right, dtype = cls._ensure_simple_new_inputs( 

540 left, 

541 right, 

542 closed=closed, 

543 copy=copy, 

544 dtype=dtype, 

545 ) 

546 cls._validate(left, right, dtype=dtype) 

547 

548 return cls._simple_new(left, right, dtype=dtype) 

549 

550 _interval_shared_docs["from_tuples"] = textwrap.dedent( 

551 """ 

552 Construct an %(klass)s from an array-like of tuples. 

553 

554 Parameters 

555 ---------- 

556 data : array-like (1-dimensional) 

557 Array of tuples. 

558 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

559 Whether the intervals are closed on the left-side, right-side, both 

560 or neither.\ 

561 %(name)s 

562 copy : bool, default False 

563 By-default copy the data, this is compat only and ignored. 

564 dtype : dtype or None, default None 

565 If None, dtype will be inferred. 

566 

567 Returns 

568 ------- 

569 %(klass)s 

570 

571 See Also 

572 -------- 

573 interval_range : Function to create a fixed frequency IntervalIndex. 

574 %(klass)s.from_arrays : Construct an %(klass)s from a left and 

575 right array. 

576 %(klass)s.from_breaks : Construct an %(klass)s from an array of 

577 splits. 

578 

579 %(examples)s\ 

580 """ 

581 ) 

582 

583 @classmethod 

584 @Appender( 

585 _interval_shared_docs["from_tuples"] 

586 % { 

587 "klass": "IntervalArray", 

588 "name": "", 

589 "examples": textwrap.dedent( 

590 """\ 

591 Examples 

592 -------- 

593 >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)]) 

594 <IntervalArray> 

595 [(0, 1], (1, 2]] 

596 Length: 2, dtype: interval[int64, right] 

597 """ 

598 ), 

599 } 

600 ) 

601 def from_tuples( 

602 cls: type[IntervalArrayT], 

603 data, 

604 closed: IntervalClosedType | None = "right", 

605 copy: bool = False, 

606 dtype: Dtype | None = None, 

607 ) -> IntervalArrayT: 

608 if len(data): 

609 left, right = [], [] 

610 else: 

611 # ensure that empty data keeps input dtype 

612 left = right = data 

613 

614 for d in data: 

615 if not isinstance(d, tuple) and isna(d): 

616 lhs = rhs = np.nan 

617 else: 

618 name = cls.__name__ 

619 try: 

620 # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] 

621 lhs, rhs = d 

622 except ValueError as err: 

623 msg = f"{name}.from_tuples requires tuples of length 2, got {d}" 

624 raise ValueError(msg) from err 

625 except TypeError as err: 

626 msg = f"{name}.from_tuples received an invalid item, {d}" 

627 raise TypeError(msg) from err 

628 left.append(lhs) 

629 right.append(rhs) 

630 

631 return cls.from_arrays(left, right, closed, copy=False, dtype=dtype) 

632 

633 @classmethod 

634 def _validate(cls, left, right, dtype: IntervalDtype) -> None: 

635 """ 

636 Verify that the IntervalArray is valid. 

637 

638 Checks that 

639 

640 * dtype is correct 

641 * left and right match lengths 

642 * left and right have the same missing values 

643 * left is always below right 

644 """ 

645 if not isinstance(dtype, IntervalDtype): 

646 msg = f"invalid dtype: {dtype}" 

647 raise ValueError(msg) 

648 if len(left) != len(right): 

649 msg = "left and right must have the same length" 

650 raise ValueError(msg) 

651 left_mask = notna(left) 

652 right_mask = notna(right) 

653 if not (left_mask == right_mask).all(): 

654 msg = ( 

655 "missing values must be missing in the same " 

656 "location both left and right sides" 

657 ) 

658 raise ValueError(msg) 

659 if not (left[left_mask] <= right[left_mask]).all(): 

660 msg = "left side of interval must be <= right side" 

661 raise ValueError(msg) 

662 

663 def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT: 

664 """ 

665 Return a new IntervalArray with the replacement attributes 

666 

667 Parameters 

668 ---------- 

669 left : Index 

670 Values to be used for the left-side of the intervals. 

671 right : Index 

672 Values to be used for the right-side of the intervals. 

673 """ 

674 dtype = IntervalDtype(left.dtype, closed=self.closed) 

675 left, right, dtype = self._ensure_simple_new_inputs(left, right, dtype=dtype) 

676 

677 return self._simple_new(left, right, dtype=dtype) 

678 

679 # --------------------------------------------------------------------- 

680 # Descriptive 

681 

682 @property 

683 def dtype(self) -> IntervalDtype: 

684 return self._dtype 

685 

686 @property 

687 def nbytes(self) -> int: 

688 return self.left.nbytes + self.right.nbytes 

689 

690 @property 

691 def size(self) -> int: 

692 # Avoid materializing self.values 

693 return self.left.size 

694 

695 # --------------------------------------------------------------------- 

696 # EA Interface 

697 

698 def __iter__(self) -> Iterator: 

699 return iter(np.asarray(self)) 

700 

701 def __len__(self) -> int: 

702 return len(self._left) 

703 

704 @overload 

705 def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA: 

706 ... 

707 

708 @overload 

709 def __getitem__(self: IntervalArrayT, key: SequenceIndexer) -> IntervalArrayT: 

710 ... 

711 

712 def __getitem__( 

713 self: IntervalArrayT, key: PositionalIndexer 

714 ) -> IntervalArrayT | IntervalOrNA: 

715 key = check_array_indexer(self, key) 

716 left = self._left[key] 

717 right = self._right[key] 

718 

719 if not isinstance(left, (np.ndarray, ExtensionArray)): 

720 # scalar 

721 if is_scalar(left) and isna(left): 

722 return self._fill_value 

723 return Interval(left, right, self.closed) 

724 if np.ndim(left) > 1: 

725 # GH#30588 multi-dimensional indexer disallowed 

726 raise ValueError("multi-dimensional indexing not allowed") 

727 # Argument 2 to "_simple_new" of "IntervalArray" has incompatible type 

728 # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray, 

729 # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray], 

730 # ndarray[Any, Any]]" 

731 return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] 

732 

733 def __setitem__(self, key, value) -> None: 

734 value_left, value_right = self._validate_setitem_value(value) 

735 key = check_array_indexer(self, key) 

736 

737 self._left[key] = value_left 

738 self._right[key] = value_right 

739 

740 def _cmp_method(self, other, op): 

741 # ensure pandas array for list-like and eliminate non-interval scalars 

742 if is_list_like(other): 

743 if len(self) != len(other): 

744 raise ValueError("Lengths must match to compare") 

745 other = pd_array(other) 

746 elif not isinstance(other, Interval): 

747 # non-interval scalar -> no matches 

748 if other is NA: 

749 # GH#31882 

750 from pandas.core.arrays import BooleanArray 

751 

752 arr = np.empty(self.shape, dtype=bool) 

753 mask = np.ones(self.shape, dtype=bool) 

754 return BooleanArray(arr, mask) 

755 return invalid_comparison(self, other, op) 

756 

757 # determine the dtype of the elements we want to compare 

758 if isinstance(other, Interval): 

759 other_dtype = pandas_dtype("interval") 

760 elif not is_categorical_dtype(other.dtype): 

761 other_dtype = other.dtype 

762 else: 

763 # for categorical defer to categories for dtype 

764 other_dtype = other.categories.dtype 

765 

766 # extract intervals if we have interval categories with matching closed 

767 if is_interval_dtype(other_dtype): 

768 if self.closed != other.categories.closed: 

769 return invalid_comparison(self, other, op) 

770 

771 other = other.categories.take( 

772 other.codes, allow_fill=True, fill_value=other.categories._na_value 

773 ) 

774 

775 # interval-like -> need same closed and matching endpoints 

776 if is_interval_dtype(other_dtype): 

777 if self.closed != other.closed: 

778 return invalid_comparison(self, other, op) 

779 elif not isinstance(other, Interval): 

780 other = type(self)(other) 

781 

782 if op is operator.eq: 

783 return (self._left == other.left) & (self._right == other.right) 

784 elif op is operator.ne: 

785 return (self._left != other.left) | (self._right != other.right) 

786 elif op is operator.gt: 

787 return (self._left > other.left) | ( 

788 (self._left == other.left) & (self._right > other.right) 

789 ) 

790 elif op is operator.ge: 

791 return (self == other) | (self > other) 

792 elif op is operator.lt: 

793 return (self._left < other.left) | ( 

794 (self._left == other.left) & (self._right < other.right) 

795 ) 

796 else: 

797 # operator.lt 

798 return (self == other) | (self < other) 

799 

800 # non-interval/non-object dtype -> no matches 

801 if not is_object_dtype(other_dtype): 

802 return invalid_comparison(self, other, op) 

803 

804 # object dtype -> iteratively check for intervals 

805 result = np.zeros(len(self), dtype=bool) 

806 for i, obj in enumerate(other): 

807 try: 

808 result[i] = op(self[i], obj) 

809 except TypeError: 

810 if obj is NA: 

811 # comparison with np.nan returns NA 

812 # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092 

813 result = result.astype(object) 

814 result[i] = NA 

815 else: 

816 raise 

817 return result 

818 

819 @unpack_zerodim_and_defer("__eq__") 

820 def __eq__(self, other): 

821 return self._cmp_method(other, operator.eq) 

822 

823 @unpack_zerodim_and_defer("__ne__") 

824 def __ne__(self, other): 

825 return self._cmp_method(other, operator.ne) 

826 

827 @unpack_zerodim_and_defer("__gt__") 

828 def __gt__(self, other): 

829 return self._cmp_method(other, operator.gt) 

830 

831 @unpack_zerodim_and_defer("__ge__") 

832 def __ge__(self, other): 

833 return self._cmp_method(other, operator.ge) 

834 

835 @unpack_zerodim_and_defer("__lt__") 

836 def __lt__(self, other): 

837 return self._cmp_method(other, operator.lt) 

838 

839 @unpack_zerodim_and_defer("__le__") 

840 def __le__(self, other): 

841 return self._cmp_method(other, operator.le) 

842 

843 def argsort( 

844 self, 

845 *, 

846 ascending: bool = True, 

847 kind: SortKind = "quicksort", 

848 na_position: str = "last", 

849 **kwargs, 

850 ) -> np.ndarray: 

851 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs) 

852 

853 if ascending and kind == "quicksort" and na_position == "last": 

854 # TODO: in an IntervalIndex we can re-use the cached 

855 # IntervalTree.left_sorter 

856 return np.lexsort((self.right, self.left)) 

857 

858 # TODO: other cases we can use lexsort for? much more performant. 

859 return super().argsort( 

860 ascending=ascending, kind=kind, na_position=na_position, **kwargs 

861 ) 

862 

863 def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA: 

864 nv.validate_minmax_axis(axis, self.ndim) 

865 

866 if not len(self): 

867 return self._na_value 

868 

869 mask = self.isna() 

870 if mask.any(): 

871 if not skipna: 

872 return self._na_value 

873 obj = self[~mask] 

874 else: 

875 obj = self 

876 

877 indexer = obj.argsort()[0] 

878 return obj[indexer] 

879 

880 def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA: 

881 nv.validate_minmax_axis(axis, self.ndim) 

882 

883 if not len(self): 

884 return self._na_value 

885 

886 mask = self.isna() 

887 if mask.any(): 

888 if not skipna: 

889 return self._na_value 

890 obj = self[~mask] 

891 else: 

892 obj = self 

893 

894 indexer = obj.argsort()[-1] 

895 return obj[indexer] 

896 

897 def fillna( 

898 self: IntervalArrayT, value=None, method=None, limit=None 

899 ) -> IntervalArrayT: 

900 """ 

901 Fill NA/NaN values using the specified method. 

902 

903 Parameters 

904 ---------- 

905 value : scalar, dict, Series 

906 If a scalar value is passed it is used to fill all missing values. 

907 Alternatively, a Series or dict can be used to fill in different 

908 values for each index. The value should not be a list. The 

909 value(s) passed should be either Interval objects or NA/NaN. 

910 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None 

911 (Not implemented yet for IntervalArray) 

912 Method to use for filling holes in reindexed Series 

913 limit : int, default None 

914 (Not implemented yet for IntervalArray) 

915 If method is specified, this is the maximum number of consecutive 

916 NaN values to forward/backward fill. In other words, if there is 

917 a gap with more than this number of consecutive NaNs, it will only 

918 be partially filled. If method is not specified, this is the 

919 maximum number of entries along the entire axis where NaNs will be 

920 filled. 

921 

922 Returns 

923 ------- 

924 filled : IntervalArray with NA/NaN filled 

925 """ 

926 if method is not None: 

927 raise TypeError("Filling by method is not supported for IntervalArray.") 

928 if limit is not None: 

929 raise TypeError("limit is not supported for IntervalArray.") 

930 

931 value_left, value_right = self._validate_scalar(value) 

932 

933 left = self.left.fillna(value=value_left) 

934 right = self.right.fillna(value=value_right) 

935 return self._shallow_copy(left, right) 

936 

937 def astype(self, dtype, copy: bool = True): 

938 """ 

939 Cast to an ExtensionArray or NumPy array with dtype 'dtype'. 

940 

941 Parameters 

942 ---------- 

943 dtype : str or dtype 

944 Typecode or data-type to which the array is cast. 

945 

946 copy : bool, default True 

947 Whether to copy the data, even if not necessary. If False, 

948 a copy is made only if the old dtype does not match the 

949 new dtype. 

950 

951 Returns 

952 ------- 

953 array : ExtensionArray or ndarray 

954 ExtensionArray or NumPy ndarray with 'dtype' for its dtype. 

955 """ 

956 from pandas import Index 

957 

958 if dtype is not None: 

959 dtype = pandas_dtype(dtype) 

960 

961 if is_interval_dtype(dtype): 

962 if dtype == self.dtype: 

963 return self.copy() if copy else self 

964 

965 if is_float_dtype(self.dtype.subtype) and needs_i8_conversion( 

966 dtype.subtype 

967 ): 

968 # This is allowed on the Index.astype but we disallow it here 

969 msg = ( 

970 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" 

971 ) 

972 raise TypeError(msg) 

973 

974 # need to cast to different subtype 

975 try: 

976 # We need to use Index rules for astype to prevent casting 

977 # np.nan entries to int subtypes 

978 new_left = Index(self._left, copy=False).astype(dtype.subtype) 

979 new_right = Index(self._right, copy=False).astype(dtype.subtype) 

980 except IntCastingNaNError: 

981 # e.g test_subtype_integer 

982 raise 

983 except (TypeError, ValueError) as err: 

984 # e.g. test_subtype_integer_errors f8->u8 can be lossy 

985 # and raises ValueError 

986 msg = ( 

987 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" 

988 ) 

989 raise TypeError(msg) from err 

990 return self._shallow_copy(new_left, new_right) 

991 else: 

992 try: 

993 return super().astype(dtype, copy=copy) 

994 except (TypeError, ValueError) as err: 

995 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" 

996 raise TypeError(msg) from err 

997 

998 def equals(self, other) -> bool: 

999 if type(self) != type(other): 

1000 return False 

1001 

1002 return bool( 

1003 self.closed == other.closed 

1004 and self.left.equals(other.left) 

1005 and self.right.equals(other.right) 

1006 ) 

1007 

1008 @classmethod 

1009 def _concat_same_type( 

1010 cls: type[IntervalArrayT], to_concat: Sequence[IntervalArrayT] 

1011 ) -> IntervalArrayT: 

1012 """ 

1013 Concatenate multiple IntervalArray 

1014 

1015 Parameters 

1016 ---------- 

1017 to_concat : sequence of IntervalArray 

1018 

1019 Returns 

1020 ------- 

1021 IntervalArray 

1022 """ 

1023 closed_set = {interval.closed for interval in to_concat} 

1024 if len(closed_set) != 1: 

1025 raise ValueError("Intervals must all be closed on the same side.") 

1026 closed = closed_set.pop() 

1027 

1028 left = np.concatenate([interval.left for interval in to_concat]) 

1029 right = np.concatenate([interval.right for interval in to_concat]) 

1030 

1031 left, right, dtype = cls._ensure_simple_new_inputs(left, right, closed=closed) 

1032 

1033 return cls._simple_new(left, right, dtype=dtype) 

1034 

1035 def copy(self: IntervalArrayT) -> IntervalArrayT: 

1036 """ 

1037 Return a copy of the array. 

1038 

1039 Returns 

1040 ------- 

1041 IntervalArray 

1042 """ 

1043 left = self._left.copy() 

1044 right = self._right.copy() 

1045 dtype = self.dtype 

1046 return self._simple_new(left, right, dtype=dtype) 

1047 

1048 def isna(self) -> np.ndarray: 

1049 return isna(self._left) 

1050 

1051 def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray: 

1052 if not len(self) or periods == 0: 

1053 return self.copy() 

1054 

1055 self._validate_scalar(fill_value) 

1056 

1057 # ExtensionArray.shift doesn't work for two reasons 

1058 # 1. IntervalArray.dtype.na_value may not be correct for the dtype. 

1059 # 2. IntervalArray._from_sequence only accepts NaN for missing values, 

1060 # not other values like NaT 

1061 

1062 empty_len = min(abs(periods), len(self)) 

1063 if isna(fill_value): 

1064 from pandas import Index 

1065 

1066 fill_value = Index(self._left, copy=False)._na_value 

1067 empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) 

1068 else: 

1069 empty = self._from_sequence([fill_value] * empty_len) 

1070 

1071 if periods > 0: 

1072 a = empty 

1073 b = self[:-periods] 

1074 else: 

1075 a = self[abs(periods) :] 

1076 b = empty 

1077 return self._concat_same_type([a, b]) 

1078 

1079 def take( 

1080 self: IntervalArrayT, 

1081 indices, 

1082 *, 

1083 allow_fill: bool = False, 

1084 fill_value=None, 

1085 axis=None, 

1086 **kwargs, 

1087 ) -> IntervalArrayT: 

1088 """ 

1089 Take elements from the IntervalArray. 

1090 

1091 Parameters 

1092 ---------- 

1093 indices : sequence of integers 

1094 Indices to be taken. 

1095 

1096 allow_fill : bool, default False 

1097 How to handle negative values in `indices`. 

1098 

1099 * False: negative values in `indices` indicate positional indices 

1100 from the right (the default). This is similar to 

1101 :func:`numpy.take`. 

1102 

1103 * True: negative values in `indices` indicate 

1104 missing values. These values are set to `fill_value`. Any other 

1105 other negative values raise a ``ValueError``. 

1106 

1107 fill_value : Interval or NA, optional 

1108 Fill value to use for NA-indices when `allow_fill` is True. 

1109 This may be ``None``, in which case the default NA value for 

1110 the type, ``self.dtype.na_value``, is used. 

1111 

1112 For many ExtensionArrays, there will be two representations of 

1113 `fill_value`: a user-facing "boxed" scalar, and a low-level 

1114 physical NA value. `fill_value` should be the user-facing version, 

1115 and the implementation should handle translating that to the 

1116 physical version for processing the take if necessary. 

1117 

1118 axis : any, default None 

1119 Present for compat with IntervalIndex; does nothing. 

1120 

1121 Returns 

1122 ------- 

1123 IntervalArray 

1124 

1125 Raises 

1126 ------ 

1127 IndexError 

1128 When the indices are out of bounds for the array. 

1129 ValueError 

1130 When `indices` contains negative values other than ``-1`` 

1131 and `allow_fill` is True. 

1132 """ 

1133 nv.validate_take((), kwargs) 

1134 

1135 fill_left = fill_right = fill_value 

1136 if allow_fill: 

1137 fill_left, fill_right = self._validate_scalar(fill_value) 

1138 

1139 left_take = take( 

1140 self._left, indices, allow_fill=allow_fill, fill_value=fill_left 

1141 ) 

1142 right_take = take( 

1143 self._right, indices, allow_fill=allow_fill, fill_value=fill_right 

1144 ) 

1145 

1146 return self._shallow_copy(left_take, right_take) 

1147 

1148 def _validate_listlike(self, value): 

1149 # list-like of intervals 

1150 try: 

1151 array = IntervalArray(value) 

1152 self._check_closed_matches(array, name="value") 

1153 value_left, value_right = array.left, array.right 

1154 except TypeError as err: 

1155 # wrong type: not interval or NA 

1156 msg = f"'value' should be an interval type, got {type(value)} instead." 

1157 raise TypeError(msg) from err 

1158 

1159 try: 

1160 self.left._validate_fill_value(value_left) 

1161 except (LossySetitemError, TypeError) as err: 

1162 msg = ( 

1163 "'value' should be a compatible interval type, " 

1164 f"got {type(value)} instead." 

1165 ) 

1166 raise TypeError(msg) from err 

1167 

1168 return value_left, value_right 

1169 

1170 def _validate_scalar(self, value): 

1171 if isinstance(value, Interval): 

1172 self._check_closed_matches(value, name="value") 

1173 left, right = value.left, value.right 

1174 # TODO: check subdtype match like _validate_setitem_value? 

1175 elif is_valid_na_for_dtype(value, self.left.dtype): 

1176 # GH#18295 

1177 left = right = self.left._na_value 

1178 else: 

1179 raise TypeError( 

1180 "can only insert Interval objects and NA into an IntervalArray" 

1181 ) 

1182 return left, right 

1183 

1184 def _validate_setitem_value(self, value): 

1185 if is_valid_na_for_dtype(value, self.left.dtype): 

1186 # na value: need special casing to set directly on numpy arrays 

1187 value = self.left._na_value 

1188 if is_integer_dtype(self.dtype.subtype): 

1189 # can't set NaN on a numpy integer array 

1190 # GH#45484 TypeError, not ValueError, matches what we get with 

1191 # non-NA un-holdable value. 

1192 raise TypeError("Cannot set float NaN to integer-backed IntervalArray") 

1193 value_left, value_right = value, value 

1194 

1195 elif isinstance(value, Interval): 

1196 # scalar interval 

1197 self._check_closed_matches(value, name="value") 

1198 value_left, value_right = value.left, value.right 

1199 self.left._validate_fill_value(value_left) 

1200 self.left._validate_fill_value(value_right) 

1201 

1202 else: 

1203 return self._validate_listlike(value) 

1204 

1205 return value_left, value_right 

1206 

1207 def value_counts(self, dropna: bool = True) -> Series: 

1208 """ 

1209 Returns a Series containing counts of each interval. 

1210 

1211 Parameters 

1212 ---------- 

1213 dropna : bool, default True 

1214 Don't include counts of NaN. 

1215 

1216 Returns 

1217 ------- 

1218 counts : Series 

1219 

1220 See Also 

1221 -------- 

1222 Series.value_counts 

1223 """ 

1224 # TODO: implement this is a non-naive way! 

1225 return value_counts(np.asarray(self), dropna=dropna) 

1226 

1227 # --------------------------------------------------------------------- 

1228 # Rendering Methods 

1229 

1230 def _format_data(self) -> str: 

1231 # TODO: integrate with categorical and make generic 

1232 # name argument is unused here; just for compat with base / categorical 

1233 n = len(self) 

1234 max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10) 

1235 

1236 formatter = str 

1237 

1238 if n == 0: 

1239 summary = "[]" 

1240 elif n == 1: 

1241 first = formatter(self[0]) 

1242 summary = f"[{first}]" 

1243 elif n == 2: 

1244 first = formatter(self[0]) 

1245 last = formatter(self[-1]) 

1246 summary = f"[{first}, {last}]" 

1247 else: 

1248 if n > max_seq_items: 

1249 n = min(max_seq_items // 2, 10) 

1250 head = [formatter(x) for x in self[:n]] 

1251 tail = [formatter(x) for x in self[-n:]] 

1252 head_str = ", ".join(head) 

1253 tail_str = ", ".join(tail) 

1254 summary = f"[{head_str} ... {tail_str}]" 

1255 else: 

1256 tail = [formatter(x) for x in self] 

1257 tail_str = ", ".join(tail) 

1258 summary = f"[{tail_str}]" 

1259 

1260 return summary 

1261 

1262 def __repr__(self) -> str: 

1263 # the short repr has no trailing newline, while the truncated 

1264 # repr does. So we include a newline in our template, and strip 

1265 # any trailing newlines from format_object_summary 

1266 data = self._format_data() 

1267 class_name = f"<{type(self).__name__}>\n" 

1268 

1269 template = f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" 

1270 return template 

1271 

1272 def _format_space(self) -> str: 

1273 space = " " * (len(type(self).__name__) + 1) 

1274 return f"\n{space}" 

1275 

1276 # --------------------------------------------------------------------- 

1277 # Vectorized Interval Properties/Attributes 

1278 

1279 @property 

1280 def left(self): 

1281 """ 

1282 Return the left endpoints of each Interval in the IntervalArray as an Index. 

1283 """ 

1284 from pandas import Index 

1285 

1286 return Index(self._left, copy=False) 

1287 

1288 @property 

1289 def right(self): 

1290 """ 

1291 Return the right endpoints of each Interval in the IntervalArray as an Index. 

1292 """ 

1293 from pandas import Index 

1294 

1295 return Index(self._right, copy=False) 

1296 

1297 @property 

1298 def length(self) -> Index: 

1299 """ 

1300 Return an Index with entries denoting the length of each Interval. 

1301 """ 

1302 return self.right - self.left 

1303 

1304 @property 

1305 def mid(self) -> Index: 

1306 """ 

1307 Return the midpoint of each Interval in the IntervalArray as an Index. 

1308 """ 

1309 try: 

1310 return 0.5 * (self.left + self.right) 

1311 except TypeError: 

1312 # datetime safe version 

1313 return self.left + 0.5 * self.length 

1314 

1315 _interval_shared_docs["overlaps"] = textwrap.dedent( 

1316 """ 

1317 Check elementwise if an Interval overlaps the values in the %(klass)s. 

1318 

1319 Two intervals overlap if they share a common point, including closed 

1320 endpoints. Intervals that only have an open endpoint in common do not 

1321 overlap. 

1322 

1323 Parameters 

1324 ---------- 

1325 other : %(klass)s 

1326 Interval to check against for an overlap. 

1327 

1328 Returns 

1329 ------- 

1330 ndarray 

1331 Boolean array positionally indicating where an overlap occurs. 

1332 

1333 See Also 

1334 -------- 

1335 Interval.overlaps : Check whether two Interval objects overlap. 

1336 

1337 Examples 

1338 -------- 

1339 %(examples)s 

1340 >>> intervals.overlaps(pd.Interval(0.5, 1.5)) 

1341 array([ True, True, False]) 

1342 

1343 Intervals that share closed endpoints overlap: 

1344 

1345 >>> intervals.overlaps(pd.Interval(1, 3, closed='left')) 

1346 array([ True, True, True]) 

1347 

1348 Intervals that only have an open endpoint in common do not overlap: 

1349 

1350 >>> intervals.overlaps(pd.Interval(1, 2, closed='right')) 

1351 array([False, True, False]) 

1352 """ 

1353 ) 

1354 

1355 @Appender( 

1356 _interval_shared_docs["overlaps"] 

1357 % { 

1358 "klass": "IntervalArray", 

1359 "examples": textwrap.dedent( 

1360 """\ 

1361 >>> data = [(0, 1), (1, 3), (2, 4)] 

1362 >>> intervals = pd.arrays.IntervalArray.from_tuples(data) 

1363 >>> intervals 

1364 <IntervalArray> 

1365 [(0, 1], (1, 3], (2, 4]] 

1366 Length: 3, dtype: interval[int64, right] 

1367 """ 

1368 ), 

1369 } 

1370 ) 

1371 def overlaps(self, other): 

1372 if isinstance(other, (IntervalArray, ABCIntervalIndex)): 

1373 raise NotImplementedError 

1374 if not isinstance(other, Interval): 

1375 msg = f"`other` must be Interval-like, got {type(other).__name__}" 

1376 raise TypeError(msg) 

1377 

1378 # equality is okay if both endpoints are closed (overlap at a point) 

1379 op1 = le if (self.closed_left and other.closed_right) else lt 

1380 op2 = le if (other.closed_left and self.closed_right) else lt 

1381 

1382 # overlaps is equivalent negation of two interval being disjoint: 

1383 # disjoint = (A.left > B.right) or (B.left > A.right) 

1384 # (simplifying the negation allows this to be done in less operations) 

1385 return op1(self.left, other.right) & op2(other.left, self.right) 

1386 

1387 # --------------------------------------------------------------------- 

1388 

1389 @property 

1390 def closed(self) -> IntervalClosedType: 

1391 """ 

1392 String describing the inclusive side the intervals. 

1393 

1394 Either ``left``, ``right``, ``both`` or ``neither``. 

1395 """ 

1396 return self.dtype.closed 

1397 

1398 _interval_shared_docs["set_closed"] = textwrap.dedent( 

1399 """ 

1400 Return an identical %(klass)s closed on the specified side. 

1401 

1402 Parameters 

1403 ---------- 

1404 closed : {'left', 'right', 'both', 'neither'} 

1405 Whether the intervals are closed on the left-side, right-side, both 

1406 or neither. 

1407 

1408 Returns 

1409 ------- 

1410 %(klass)s 

1411 

1412 %(examples)s\ 

1413 """ 

1414 ) 

1415 

1416 @Appender( 

1417 _interval_shared_docs["set_closed"] 

1418 % { 

1419 "klass": "IntervalArray", 

1420 "examples": textwrap.dedent( 

1421 """\ 

1422 Examples 

1423 -------- 

1424 >>> index = pd.arrays.IntervalArray.from_breaks(range(4)) 

1425 >>> index 

1426 <IntervalArray> 

1427 [(0, 1], (1, 2], (2, 3]] 

1428 Length: 3, dtype: interval[int64, right] 

1429 >>> index.set_closed('both') 

1430 <IntervalArray> 

1431 [[0, 1], [1, 2], [2, 3]] 

1432 Length: 3, dtype: interval[int64, both] 

1433 """ 

1434 ), 

1435 } 

1436 ) 

1437 def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT: 

1438 if closed not in VALID_CLOSED: 

1439 msg = f"invalid option for 'closed': {closed}" 

1440 raise ValueError(msg) 

1441 

1442 left, right = self._left, self._right 

1443 dtype = IntervalDtype(left.dtype, closed=closed) 

1444 return self._simple_new(left, right, dtype=dtype) 

1445 

1446 _interval_shared_docs[ 

1447 "is_non_overlapping_monotonic" 

1448 ] = """ 

1449 Return a boolean whether the %(klass)s is non-overlapping and monotonic. 

1450 

1451 Non-overlapping means (no Intervals share points), and monotonic means 

1452 either monotonic increasing or monotonic decreasing. 

1453 """ 

1454 

1455 @property 

1456 @Appender( 

1457 _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs 

1458 ) 

1459 def is_non_overlapping_monotonic(self) -> bool: 

1460 # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) 

1461 # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) 

1462 # we already require left <= right 

1463 

1464 # strict inequality for closed == 'both'; equality implies overlapping 

1465 # at a point when both sides of intervals are included 

1466 if self.closed == "both": 

1467 return bool( 

1468 (self._right[:-1] < self._left[1:]).all() 

1469 or (self._left[:-1] > self._right[1:]).all() 

1470 ) 

1471 

1472 # non-strict inequality when closed != 'both'; at least one side is 

1473 # not included in the intervals, so equality does not imply overlapping 

1474 return bool( 

1475 (self._right[:-1] <= self._left[1:]).all() 

1476 or (self._left[:-1] >= self._right[1:]).all() 

1477 ) 

1478 

1479 # --------------------------------------------------------------------- 

1480 # Conversion 

1481 

1482 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: 

1483 """ 

1484 Return the IntervalArray's data as a numpy array of Interval 

1485 objects (with dtype='object') 

1486 """ 

1487 left = self._left 

1488 right = self._right 

1489 mask = self.isna() 

1490 closed = self.closed 

1491 

1492 result = np.empty(len(left), dtype=object) 

1493 for i, left_value in enumerate(left): 

1494 if mask[i]: 

1495 result[i] = np.nan 

1496 else: 

1497 result[i] = Interval(left_value, right[i], closed) 

1498 return result 

1499 

1500 def __arrow_array__(self, type=None): 

1501 """ 

1502 Convert myself into a pyarrow Array. 

1503 """ 

1504 import pyarrow 

1505 

1506 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType 

1507 

1508 try: 

1509 subtype = pyarrow.from_numpy_dtype(self.dtype.subtype) 

1510 except TypeError as err: 

1511 raise TypeError( 

1512 f"Conversion to arrow with subtype '{self.dtype.subtype}' " 

1513 "is not supported" 

1514 ) from err 

1515 interval_type = ArrowIntervalType(subtype, self.closed) 

1516 storage_array = pyarrow.StructArray.from_arrays( 

1517 [ 

1518 pyarrow.array(self._left, type=subtype, from_pandas=True), 

1519 pyarrow.array(self._right, type=subtype, from_pandas=True), 

1520 ], 

1521 names=["left", "right"], 

1522 ) 

1523 mask = self.isna() 

1524 if mask.any(): 

1525 # if there are missing values, set validity bitmap also on the array level 

1526 null_bitmap = pyarrow.array(~mask).buffers()[1] 

1527 storage_array = pyarrow.StructArray.from_buffers( 

1528 storage_array.type, 

1529 len(storage_array), 

1530 [null_bitmap], 

1531 children=[storage_array.field(0), storage_array.field(1)], 

1532 ) 

1533 

1534 if type is not None: 

1535 if type.equals(interval_type.storage_type): 

1536 return storage_array 

1537 elif isinstance(type, ArrowIntervalType): 

1538 # ensure we have the same subtype and closed attributes 

1539 if not type.equals(interval_type): 

1540 raise TypeError( 

1541 "Not supported to convert IntervalArray to type with " 

1542 f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) " 

1543 f"and 'closed' ({self.closed} vs {type.closed}) attributes" 

1544 ) 

1545 else: 

1546 raise TypeError( 

1547 f"Not supported to convert IntervalArray to '{type}' type" 

1548 ) 

1549 

1550 return pyarrow.ExtensionArray.from_storage(interval_type, storage_array) 

1551 

1552 _interval_shared_docs[ 

1553 "to_tuples" 

1554 ] = """ 

1555 Return an %(return_type)s of tuples of the form (left, right). 

1556 

1557 Parameters 

1558 ---------- 

1559 na_tuple : bool, default True 

1560 Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA 

1561 value itself if False, ``nan``. 

1562 

1563 Returns 

1564 ------- 

1565 tuples: %(return_type)s 

1566 %(examples)s\ 

1567 """ 

1568 

1569 @Appender( 

1570 _interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""} 

1571 ) 

1572 def to_tuples(self, na_tuple: bool = True) -> np.ndarray: 

1573 tuples = com.asarray_tuplesafe(zip(self._left, self._right)) 

1574 if not na_tuple: 

1575 # GH 18756 

1576 tuples = np.where(~self.isna(), tuples, np.nan) 

1577 return tuples 

1578 

1579 # --------------------------------------------------------------------- 

1580 

1581 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: 

1582 value_left, value_right = self._validate_setitem_value(value) 

1583 

1584 if isinstance(self._left, np.ndarray): 

1585 np.putmask(self._left, mask, value_left) 

1586 assert isinstance(self._right, np.ndarray) 

1587 np.putmask(self._right, mask, value_right) 

1588 else: 

1589 self._left._putmask(mask, value_left) 

1590 assert not isinstance(self._right, np.ndarray) 

1591 self._right._putmask(mask, value_right) 

1592 

1593 def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT: 

1594 """ 

1595 Return a new IntervalArray inserting new item at location. Follows 

1596 Python numpy.insert semantics for negative values. Only Interval 

1597 objects and NA can be inserted into an IntervalIndex 

1598 

1599 Parameters 

1600 ---------- 

1601 loc : int 

1602 item : Interval 

1603 

1604 Returns 

1605 ------- 

1606 IntervalArray 

1607 """ 

1608 left_insert, right_insert = self._validate_scalar(item) 

1609 

1610 new_left = self.left.insert(loc, left_insert) 

1611 new_right = self.right.insert(loc, right_insert) 

1612 

1613 return self._shallow_copy(new_left, new_right) 

1614 

1615 def delete(self: IntervalArrayT, loc) -> IntervalArrayT: 

1616 if isinstance(self._left, np.ndarray): 

1617 new_left = np.delete(self._left, loc) 

1618 assert isinstance(self._right, np.ndarray) 

1619 new_right = np.delete(self._right, loc) 

1620 else: 

1621 new_left = self._left.delete(loc) 

1622 assert not isinstance(self._right, np.ndarray) 

1623 new_right = self._right.delete(loc) 

1624 return self._shallow_copy(left=new_left, right=new_right) 

1625 

1626 @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs) 

1627 def repeat( 

1628 self: IntervalArrayT, 

1629 repeats: int | Sequence[int], 

1630 axis: AxisInt | None = None, 

1631 ) -> IntervalArrayT: 

1632 nv.validate_repeat((), {"axis": axis}) 

1633 left_repeat = self.left.repeat(repeats) 

1634 right_repeat = self.right.repeat(repeats) 

1635 return self._shallow_copy(left=left_repeat, right=right_repeat) 

1636 

1637 _interval_shared_docs["contains"] = textwrap.dedent( 

1638 """ 

1639 Check elementwise if the Intervals contain the value. 

1640 

1641 Return a boolean mask whether the value is contained in the Intervals 

1642 of the %(klass)s. 

1643 

1644 Parameters 

1645 ---------- 

1646 other : scalar 

1647 The value to check whether it is contained in the Intervals. 

1648 

1649 Returns 

1650 ------- 

1651 boolean array 

1652 

1653 See Also 

1654 -------- 

1655 Interval.contains : Check whether Interval object contains value. 

1656 %(klass)s.overlaps : Check if an Interval overlaps the values in the 

1657 %(klass)s. 

1658 

1659 Examples 

1660 -------- 

1661 %(examples)s 

1662 >>> intervals.contains(0.5) 

1663 array([ True, False, False]) 

1664 """ 

1665 ) 

1666 

1667 @Appender( 

1668 _interval_shared_docs["contains"] 

1669 % { 

1670 "klass": "IntervalArray", 

1671 "examples": textwrap.dedent( 

1672 """\ 

1673 >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)]) 

1674 >>> intervals 

1675 <IntervalArray> 

1676 [(0, 1], (1, 3], (2, 4]] 

1677 Length: 3, dtype: interval[int64, right] 

1678 """ 

1679 ), 

1680 } 

1681 ) 

1682 def contains(self, other): 

1683 if isinstance(other, Interval): 

1684 raise NotImplementedError("contains not implemented for two intervals") 

1685 

1686 return (self._left < other if self.open_left else self._left <= other) & ( 

1687 other < self._right if self.open_right else other <= self._right 

1688 ) 

1689 

1690 def isin(self, values) -> npt.NDArray[np.bool_]: 

1691 if not hasattr(values, "dtype"): 

1692 values = np.array(values) 

1693 values = extract_array(values, extract_numpy=True) 

1694 

1695 if is_interval_dtype(values.dtype): 

1696 if self.closed != values.closed: 

1697 # not comparable -> no overlap 

1698 return np.zeros(self.shape, dtype=bool) 

1699 

1700 if is_dtype_equal(self.dtype, values.dtype): 

1701 # GH#38353 instead of casting to object, operating on a 

1702 # complex128 ndarray is much more performant. 

1703 left = self._combined.view("complex128") 

1704 right = values._combined.view("complex128") 

1705 # error: Argument 1 to "in1d" has incompatible type 

1706 # "Union[ExtensionArray, ndarray[Any, Any], 

1707 # ndarray[Any, dtype[Any]]]"; expected 

1708 # "Union[_SupportsArray[dtype[Any]], 

1709 # _NestedSequence[_SupportsArray[dtype[Any]]], bool, 

1710 # int, float, complex, str, bytes, _NestedSequence[ 

1711 # Union[bool, int, float, complex, str, bytes]]]" 

1712 return np.in1d(left, right) # type: ignore[arg-type] 

1713 

1714 elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion( 

1715 values.left.dtype 

1716 ): 

1717 # not comparable -> no overlap 

1718 return np.zeros(self.shape, dtype=bool) 

1719 

1720 return isin(self.astype(object), values.astype(object)) 

1721 

1722 @property 

1723 def _combined(self) -> IntervalSideT: 

1724 left = self.left._values.reshape(-1, 1) 

1725 right = self.right._values.reshape(-1, 1) 

1726 if needs_i8_conversion(left.dtype): 

1727 comb = left._concat_same_type([left, right], axis=1) 

1728 else: 

1729 comb = np.concatenate([left, right], axis=1) 

1730 return comb 

1731 

1732 def _from_combined(self, combined: np.ndarray) -> IntervalArray: 

1733 """ 

1734 Create a new IntervalArray with our dtype from a 1D complex128 ndarray. 

1735 """ 

1736 nc = combined.view("i8").reshape(-1, 2) 

1737 

1738 dtype = self._left.dtype 

1739 if needs_i8_conversion(dtype): 

1740 assert isinstance(self._left, (DatetimeArray, TimedeltaArray)) 

1741 new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) 

1742 assert isinstance(self._right, (DatetimeArray, TimedeltaArray)) 

1743 new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) 

1744 else: 

1745 assert isinstance(dtype, np.dtype) 

1746 new_left = nc[:, 0].view(dtype) 

1747 new_right = nc[:, 1].view(dtype) 

1748 return self._shallow_copy(left=new_left, right=new_right) 

1749 

1750 def unique(self) -> IntervalArray: 

1751 # No overload variant of "__getitem__" of "ExtensionArray" matches argument 

1752 # type "Tuple[slice, int]" 

1753 nc = unique( 

1754 self._combined.view("complex128")[:, 0] # type: ignore[call-overload] 

1755 ) 

1756 nc = nc[:, None] 

1757 return self._from_combined(nc) 

1758 

1759 

1760def _maybe_convert_platform_interval(values) -> ArrayLike: 

1761 """ 

1762 Try to do platform conversion, with special casing for IntervalArray. 

1763 Wrapper around maybe_convert_platform that alters the default return 

1764 dtype in certain cases to be compatible with IntervalArray. For example, 

1765 empty lists return with integer dtype instead of object dtype, which is 

1766 prohibited for IntervalArray. 

1767 

1768 Parameters 

1769 ---------- 

1770 values : array-like 

1771 

1772 Returns 

1773 ------- 

1774 array 

1775 """ 

1776 if isinstance(values, (list, tuple)) and len(values) == 0: 

1777 # GH 19016 

1778 # empty lists/tuples get object dtype by default, but this is 

1779 # prohibited for IntervalArray, so coerce to integer instead 

1780 return np.array([], dtype=np.int64) 

1781 elif not is_list_like(values) or isinstance(values, ABCDataFrame): 

1782 # This will raise later, but we avoid passing to maybe_convert_platform 

1783 return values 

1784 elif is_categorical_dtype(values): 

1785 values = np.asarray(values) 

1786 elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)): 

1787 # TODO: should we just cast these to list? 

1788 return values 

1789 else: 

1790 values = extract_array(values, extract_numpy=True) 

1791 

1792 if not hasattr(values, "dtype"): 

1793 values = np.asarray(values) 

1794 if is_integer_dtype(values) and values.dtype != np.int64: 

1795 values = values.astype(np.int64) 

1796 return values