Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/interval.py: 24%

1from __future__ import annotations

3import operator

4from operator import (

5 le,

6 lt,

8import textwrap

9from typing import (

10 TYPE_CHECKING,

11 Iterator,

12 Literal,

13 Sequence,

14 TypeVar,

15 Union,

16 cast,

17 overload,

18)

20import numpy as np

22from pandas._config import get_option

24from pandas._libs import lib

25from pandas._libs.interval import (

26 VALID_CLOSED,

27 Interval,

28 IntervalMixin,

29 intervals_to_interval_bounds,

30)

31from pandas._libs.missing import NA

32from pandas._typing import (

33 ArrayLike,

34 AxisInt,

35 Dtype,

36 IntervalClosedType,

37 NpDtype,

38 PositionalIndexer,

39 ScalarIndexer,

40 SequenceIndexer,

41 SortKind,

42 TimeArrayLike,

43 npt,

44)

45from pandas.compat.numpy import function as nv

46from pandas.errors import IntCastingNaNError

47from pandas.util._decorators import Appender

49from pandas.core.dtypes.cast import (

50 LossySetitemError,

51 maybe_upcast_numeric_to_64bit,

52)

53from pandas.core.dtypes.common import (

54 is_categorical_dtype,

55 is_dtype_equal,

56 is_float_dtype,

57 is_integer_dtype,

58 is_interval_dtype,

59 is_list_like,

60 is_object_dtype,

61 is_scalar,

62 is_string_dtype,

63 needs_i8_conversion,

64 pandas_dtype,

65)

66from pandas.core.dtypes.dtypes import IntervalDtype

67from pandas.core.dtypes.generic import (

68 ABCDataFrame,

69 ABCDatetimeIndex,

70 ABCIntervalIndex,

71 ABCPeriodIndex,

72)

73from pandas.core.dtypes.missing import (

74 is_valid_na_for_dtype,

75 isna,

76 notna,

77)

79from pandas.core.algorithms import (

80 isin,

81 take,

82 unique,

83 value_counts,

84)

85from pandas.core.arrays.base import (

86 ExtensionArray,

87 _extension_array_shared_docs,

88)

89from pandas.core.arrays.datetimes import DatetimeArray

90from pandas.core.arrays.timedeltas import TimedeltaArray

91import pandas.core.common as com

92from pandas.core.construction import (

93 array as pd_array,

94 ensure_wrapped_if_datetimelike,

95 extract_array,

96)

97from pandas.core.indexers import check_array_indexer

98from pandas.core.ops import (

99 invalid_comparison,

100 unpack_zerodim_and_defer,

101)

102

103if TYPE_CHECKING:

104 from pandas import (

105 Index,

106 Series,

107 )

108

109

110IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")

111IntervalSideT = Union[TimeArrayLike, np.ndarray]

112IntervalOrNA = Union[Interval, float]

113

114_interval_shared_docs: dict[str, str] = {}

115

116_shared_docs_kwargs = {

117 "klass": "IntervalArray",

118 "qualname": "arrays.IntervalArray",

119 "name": "",

120}

121

122

123_interval_shared_docs[

124 "class"

125] = """

126%(summary)s

127

128.. versionadded:: %(versionadded)s

129

130Parameters

131----------

132data : array-like (1-dimensional)

133 Array-like (ndarray, :class:`DateTimeArray`, :class:`TimeDeltaArray`) containing

134 Interval objects from which to build the %(klass)s.

135closed : {'left', 'right', 'both', 'neither'}, default 'right'

136 Whether the intervals are closed on the left-side, right-side, both or

137 neither.

138dtype : dtype or None, default None

139 If None, dtype will be inferred.

140copy : bool, default False

141 Copy the input data.

142%(name)s\

143verify_integrity : bool, default True

144 Verify that the %(klass)s is valid.

145

146Attributes

147----------

148left

149right

150closed

151mid

152length

153is_empty

154is_non_overlapping_monotonic

155%(extra_attributes)s\

156

157Methods

158-------

159from_arrays

160from_tuples

161from_breaks

162contains

163overlaps

164set_closed

165to_tuples

166%(extra_methods)s\

167

168See Also

169--------

170Index : The base pandas Index type.

171Interval : A bounded slice-like interval; the elements of an %(klass)s.

172interval_range : Function to create a fixed frequency IntervalIndex.

173cut : Bin values into discrete Intervals.

174qcut : Bin values into equal-sized Intervals based on rank or sample quantiles.

175

176Notes

177-----

178See the `user guide

179<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`__

180for more.

181

182%(examples)s\

183"""

184

185

186@Appender(

187 _interval_shared_docs["class"]

188 % {

189 "klass": "IntervalArray",

190 "summary": "Pandas array for interval data that are closed on the same side.",

191 "versionadded": "0.24.0",

192 "name": "",

193 "extra_attributes": "",

194 "extra_methods": "",

195 "examples": textwrap.dedent(

196 """\

197 Examples

198 --------

199 A new ``IntervalArray`` can be constructed directly from an array-like of

200 ``Interval`` objects:

201

202 >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])

203 <IntervalArray>

204 [(0, 1], (1, 5]]

205 Length: 2, dtype: interval[int64, right]

206

207 It may also be constructed using one of the constructor

208 methods: :meth:`IntervalArray.from_arrays`,

209 :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`.

210 """

211 ),

212 }

213)

214class IntervalArray(IntervalMixin, ExtensionArray):

215 can_hold_na = True

216 _na_value = _fill_value = np.nan

217

218 @property

219 def ndim(self) -> Literal[1]:

220 return 1

221

222 # To make mypy recognize the fields

223 _left: IntervalSideT

224 _right: IntervalSideT

225 _dtype: IntervalDtype

226

227 # ---------------------------------------------------------------------

228 # Constructors

229

230 def __new__(

231 cls: type[IntervalArrayT],

232 data,

233 closed=None,

234 dtype: Dtype | None = None,

235 copy: bool = False,

236 verify_integrity: bool = True,

237 ):

238 data = extract_array(data, extract_numpy=True)

239

240 if isinstance(data, cls):

241 left: IntervalSideT = data._left

242 right: IntervalSideT = data._right

243 closed = closed or data.closed

244 dtype = IntervalDtype(left.dtype, closed=closed)

245 else:

246 # don't allow scalars

247 if is_scalar(data):

248 msg = (

249 f"{cls.__name__}(...) must be called with a collection "

250 f"of some kind, {data} was passed"

251 )

252 raise TypeError(msg)

253

254 # might need to convert empty or purely na data

255 data = _maybe_convert_platform_interval(data)

256 left, right, infer_closed = intervals_to_interval_bounds(

257 data, validate_closed=closed is None

258 )

259 if left.dtype == object:

260 left = lib.maybe_convert_objects(left)

261 right = lib.maybe_convert_objects(right)

262 closed = closed or infer_closed

263

264 left, right, dtype = cls._ensure_simple_new_inputs(

265 left,

266 right,

267 closed=closed,

268 copy=copy,

269 dtype=dtype,

270 )

271

272 if verify_integrity:

273 cls._validate(left, right, dtype=dtype)

274

275 return cls._simple_new(

276 left,

277 right,

278 dtype=dtype,

279 )

280

281 @classmethod

282 def _simple_new(

283 cls: type[IntervalArrayT],

284 left: IntervalSideT,

285 right: IntervalSideT,

286 dtype: IntervalDtype,

287 ) -> IntervalArrayT:

288 result = IntervalMixin.__new__(cls)

289 result._left = left

290 result._right = right

291 result._dtype = dtype

292

293 return result

294

295 @classmethod

296 def _ensure_simple_new_inputs(

297 cls,

298 left,

299 right,

300 closed: IntervalClosedType | None = None,

301 copy: bool = False,

302 dtype: Dtype | None = None,

303 ) -> tuple[IntervalSideT, IntervalSideT, IntervalDtype]:

304 """Ensure correctness of input parameters for cls._simple_new."""

305 from pandas.core.indexes.base import ensure_index

306

307 left = ensure_index(left, copy=copy)

308 left = maybe_upcast_numeric_to_64bit(left)

309

310 right = ensure_index(right, copy=copy)

311 right = maybe_upcast_numeric_to_64bit(right)

312

313 if closed is None and isinstance(dtype, IntervalDtype):

314 closed = dtype.closed

315

316 closed = closed or "right"

317

318 if dtype is not None:

319 # GH 19262: dtype must be an IntervalDtype to override inferred

320 dtype = pandas_dtype(dtype)

321 if is_interval_dtype(dtype):

322 dtype = cast(IntervalDtype, dtype)

323 if dtype.subtype is not None:

324 left = left.astype(dtype.subtype)

325 right = right.astype(dtype.subtype)

326 else:

327 msg = f"dtype must be an IntervalDtype, got {dtype}"

328 raise TypeError(msg)

329

330 if dtype.closed is None:

331 # possibly loading an old pickle

332 dtype = IntervalDtype(dtype.subtype, closed)

333 elif closed != dtype.closed:

334 raise ValueError("closed keyword does not match dtype.closed")

335

336 # coerce dtypes to match if needed

337 if is_float_dtype(left) and is_integer_dtype(right):

338 right = right.astype(left.dtype)

339 elif is_float_dtype(right) and is_integer_dtype(left):

340 left = left.astype(right.dtype)

341

342 if type(left) != type(right):

343 msg = (

344 f"must not have differing left [{type(left).__name__}] and "

345 f"right [{type(right).__name__}] types"

346 )

347 raise ValueError(msg)

348 if is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):

349 # GH 19016

350 msg = (

351 "category, object, and string subtypes are not supported "

352 "for IntervalArray"

353 )

354 raise TypeError(msg)

355 if isinstance(left, ABCPeriodIndex):

356 msg = "Period dtypes are not supported, use a PeriodIndex instead"

357 raise ValueError(msg)

358 if isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz):

359 msg = (

360 "left and right must have the same time zone, got "

361 f"'{left.tz}' and '{right.tz}'"

362 )

363 raise ValueError(msg)

364

365 # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray

366 left = ensure_wrapped_if_datetimelike(left)

367 left = extract_array(left, extract_numpy=True)

368 right = ensure_wrapped_if_datetimelike(right)

369 right = extract_array(right, extract_numpy=True)

370

371 lbase = getattr(left, "_ndarray", left).base

372 rbase = getattr(right, "_ndarray", right).base

373 if lbase is not None and lbase is rbase:

374 # If these share data, then setitem could corrupt our IA

375 right = right.copy()

376

377 dtype = IntervalDtype(left.dtype, closed=closed)

378

379 return left, right, dtype

380

381 @classmethod

382 def _from_sequence(

383 cls: type[IntervalArrayT],

384 scalars,

385 *,

386 dtype: Dtype | None = None,

387 copy: bool = False,

388 ) -> IntervalArrayT:

389 return cls(scalars, dtype=dtype, copy=copy)

390

391 @classmethod

392 def _from_factorized(

393 cls: type[IntervalArrayT], values: np.ndarray, original: IntervalArrayT

394 ) -> IntervalArrayT:

395 if len(values) == 0:

396 # An empty array returns object-dtype here. We can't create

397 # a new IA from an (empty) object-dtype array, so turn it into the

398 # correct dtype.

399 values = values.astype(original.dtype.subtype)

400 return cls(values, closed=original.closed)

401

402 _interval_shared_docs["from_breaks"] = textwrap.dedent(

403 """

404 Construct an %(klass)s from an array of splits.

405

406 Parameters

407 ----------

408 breaks : array-like (1-dimensional)

409 Left and right bounds for each interval.

410 closed : {'left', 'right', 'both', 'neither'}, default 'right'

411 Whether the intervals are closed on the left-side, right-side, both

412 or neither.\

413 %(name)s

414 copy : bool, default False

415 Copy the data.

416 dtype : dtype or None, default None

417 If None, dtype will be inferred.

418

419 Returns

420 -------

421 %(klass)s

422

423 See Also

424 --------

425 interval_range : Function to create a fixed frequency IntervalIndex.

426 %(klass)s.from_arrays : Construct from a left and right array.

427 %(klass)s.from_tuples : Construct from a sequence of tuples.

428

429 %(examples)s\

430 """

431 )

432

433 @classmethod

434 @Appender(

435 _interval_shared_docs["from_breaks"]

436 % {

437 "klass": "IntervalArray",

438 "name": "",

439 "examples": textwrap.dedent(

440 """\

441 Examples

442 --------

443 >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3])

444 <IntervalArray>

445 [(0, 1], (1, 2], (2, 3]]

446 Length: 3, dtype: interval[int64, right]

447 """

448 ),

449 }

450 )

451 def from_breaks(

452 cls: type[IntervalArrayT],

453 breaks,

454 closed: IntervalClosedType | None = "right",

455 copy: bool = False,

456 dtype: Dtype | None = None,

457 ) -> IntervalArrayT:

458 breaks = _maybe_convert_platform_interval(breaks)

459

460 return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype)

461

462 _interval_shared_docs["from_arrays"] = textwrap.dedent(

463 """

464 Construct from two arrays defining the left and right bounds.

465

466 Parameters

467 ----------

468 left : array-like (1-dimensional)

469 Left bounds for each interval.

470 right : array-like (1-dimensional)

471 Right bounds for each interval.

472 closed : {'left', 'right', 'both', 'neither'}, default 'right'

473 Whether the intervals are closed on the left-side, right-side, both

474 or neither.\

475 %(name)s

476 copy : bool, default False

477 Copy the data.

478 dtype : dtype, optional

479 If None, dtype will be inferred.

480

481 Returns

482 -------

483 %(klass)s

484

485 Raises

486 ------

487 ValueError

488 When a value is missing in only one of `left` or `right`.

489 When a value in `left` is greater than the corresponding value

490 in `right`.

491

492 See Also

493 --------

494 interval_range : Function to create a fixed frequency IntervalIndex.

495 %(klass)s.from_breaks : Construct an %(klass)s from an array of

496 splits.

497 %(klass)s.from_tuples : Construct an %(klass)s from an

498 array-like of tuples.

499

500 Notes

501 -----

502 Each element of `left` must be less than or equal to the `right`

503 element at the same position. If an element is missing, it must be

504 missing in both `left` and `right`. A TypeError is raised when

505 using an unsupported type for `left` or `right`. At the moment,

506 'category', 'object', and 'string' subtypes are not supported.

507

508 %(examples)s\

509 """

510 )

511

512 @classmethod

513 @Appender(

514 _interval_shared_docs["from_arrays"]

515 % {

516 "klass": "IntervalArray",

517 "name": "",

518 "examples": textwrap.dedent(

519 """\

520 >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])

521 <IntervalArray>

522 [(0, 1], (1, 2], (2, 3]]

523 Length: 3, dtype: interval[int64, right]

524 """

525 ),

526 }

527 )

528 def from_arrays(

529 cls: type[IntervalArrayT],

530 left,

531 right,

532 closed: IntervalClosedType | None = "right",

533 copy: bool = False,

534 dtype: Dtype | None = None,

535 ) -> IntervalArrayT:

536 left = _maybe_convert_platform_interval(left)

537 right = _maybe_convert_platform_interval(right)

538

539 left, right, dtype = cls._ensure_simple_new_inputs(

540 left,

541 right,

542 closed=closed,

543 copy=copy,

544 dtype=dtype,

545 )

546 cls._validate(left, right, dtype=dtype)

547

548 return cls._simple_new(left, right, dtype=dtype)

549

550 _interval_shared_docs["from_tuples"] = textwrap.dedent(

551 """

552 Construct an %(klass)s from an array-like of tuples.

553

554 Parameters

555 ----------

556 data : array-like (1-dimensional)

557 Array of tuples.

558 closed : {'left', 'right', 'both', 'neither'}, default 'right'

559 Whether the intervals are closed on the left-side, right-side, both

560 or neither.\

561 %(name)s

562 copy : bool, default False

563 By-default copy the data, this is compat only and ignored.

564 dtype : dtype or None, default None

565 If None, dtype will be inferred.

566

567 Returns

568 -------

569 %(klass)s

570

571 See Also

572 --------

573 interval_range : Function to create a fixed frequency IntervalIndex.

574 %(klass)s.from_arrays : Construct an %(klass)s from a left and

575 right array.

576 %(klass)s.from_breaks : Construct an %(klass)s from an array of

577 splits.

578

579 %(examples)s\

580 """

581 )

582

583 @classmethod

584 @Appender(

585 _interval_shared_docs["from_tuples"]

586 % {

587 "klass": "IntervalArray",

588 "name": "",

589 "examples": textwrap.dedent(

590 """\

591 Examples

592 --------

593 >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])

594 <IntervalArray>

595 [(0, 1], (1, 2]]

596 Length: 2, dtype: interval[int64, right]

597 """

598 ),

599 }

600 )

601 def from_tuples(

602 cls: type[IntervalArrayT],

603 data,

604 closed: IntervalClosedType | None = "right",

605 copy: bool = False,

606 dtype: Dtype | None = None,

607 ) -> IntervalArrayT:

608 if len(data):

609 left, right = [], []

610 else:

611 # ensure that empty data keeps input dtype

612 left = right = data

613

614 for d in data:

615 if not isinstance(d, tuple) and isna(d):

616 lhs = rhs = np.nan

617 else:

618 name = cls.__name__

619 try:

620 # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...]

621 lhs, rhs = d

622 except ValueError as err:

623 msg = f"{name}.from_tuples requires tuples of length 2, got {d}"

624 raise ValueError(msg) from err

625 except TypeError as err:

626 msg = f"{name}.from_tuples received an invalid item, {d}"

627 raise TypeError(msg) from err

628 left.append(lhs)

629 right.append(rhs)

630

631 return cls.from_arrays(left, right, closed, copy=False, dtype=dtype)

632

633 @classmethod

634 def _validate(cls, left, right, dtype: IntervalDtype) -> None:

635 """

636 Verify that the IntervalArray is valid.

637

638 Checks that

639

640 * dtype is correct

641 * left and right match lengths

642 * left and right have the same missing values

643 * left is always below right

644 """

645 if not isinstance(dtype, IntervalDtype):

646 msg = f"invalid dtype: {dtype}"

647 raise ValueError(msg)

648 if len(left) != len(right):

649 msg = "left and right must have the same length"

650 raise ValueError(msg)

651 left_mask = notna(left)

652 right_mask = notna(right)

653 if not (left_mask == right_mask).all():

654 msg = (

655 "missing values must be missing in the same "

656 "location both left and right sides"

657 )

658 raise ValueError(msg)

659 if not (left[left_mask] <= right[left_mask]).all():

660 msg = "left side of interval must be <= right side"

661 raise ValueError(msg)

662

663 def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT:

664 """

665 Return a new IntervalArray with the replacement attributes

666

667 Parameters

668 ----------

669 left : Index

670 Values to be used for the left-side of the intervals.

671 right : Index

672 Values to be used for the right-side of the intervals.

673 """

674 dtype = IntervalDtype(left.dtype, closed=self.closed)

675 left, right, dtype = self._ensure_simple_new_inputs(left, right, dtype=dtype)

676

677 return self._simple_new(left, right, dtype=dtype)

678

679 # ---------------------------------------------------------------------

680 # Descriptive

681

682 @property

683 def dtype(self) -> IntervalDtype:

684 return self._dtype

685

686 @property

687 def nbytes(self) -> int:

688 return self.left.nbytes + self.right.nbytes

689

690 @property

691 def size(self) -> int:

692 # Avoid materializing self.values

693 return self.left.size

694

695 # ---------------------------------------------------------------------

696 # EA Interface

697

698 def __iter__(self) -> Iterator:

699 return iter(np.asarray(self))

700

701 def __len__(self) -> int:

702 return len(self._left)

703

704 @overload

705 def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA:

706 ...

707

708 @overload

709 def __getitem__(self: IntervalArrayT, key: SequenceIndexer) -> IntervalArrayT:

710 ...

711

712 def __getitem__(

713 self: IntervalArrayT, key: PositionalIndexer

714 ) -> IntervalArrayT | IntervalOrNA:

715 key = check_array_indexer(self, key)

716 left = self._left[key]

717 right = self._right[key]

718

719 if not isinstance(left, (np.ndarray, ExtensionArray)):

720 # scalar

721 if is_scalar(left) and isna(left):

722 return self._fill_value

723 return Interval(left, right, self.closed)

724 if np.ndim(left) > 1:

725 # GH#30588 multi-dimensional indexer disallowed

726 raise ValueError("multi-dimensional indexing not allowed")

727 # Argument 2 to "_simple_new" of "IntervalArray" has incompatible type

728 # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray,

729 # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray],

730 # ndarray[Any, Any]]"

731 return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type]

732

733 def __setitem__(self, key, value) -> None:

734 value_left, value_right = self._validate_setitem_value(value)

735 key = check_array_indexer(self, key)

736

737 self._left[key] = value_left

738 self._right[key] = value_right

739

740 def _cmp_method(self, other, op):

741 # ensure pandas array for list-like and eliminate non-interval scalars

742 if is_list_like(other):

743 if len(self) != len(other):

744 raise ValueError("Lengths must match to compare")

745 other = pd_array(other)

746 elif not isinstance(other, Interval):

747 # non-interval scalar -> no matches

748 if other is NA:

749 # GH#31882

750 from pandas.core.arrays import BooleanArray

751

752 arr = np.empty(self.shape, dtype=bool)

753 mask = np.ones(self.shape, dtype=bool)

754 return BooleanArray(arr, mask)

755 return invalid_comparison(self, other, op)

756

757 # determine the dtype of the elements we want to compare

758 if isinstance(other, Interval):

759 other_dtype = pandas_dtype("interval")

760 elif not is_categorical_dtype(other.dtype):

761 other_dtype = other.dtype

762 else:

763 # for categorical defer to categories for dtype

764 other_dtype = other.categories.dtype

765

766 # extract intervals if we have interval categories with matching closed

767 if is_interval_dtype(other_dtype):

768 if self.closed != other.categories.closed:

769 return invalid_comparison(self, other, op)

770

771 other = other.categories.take(

772 other.codes, allow_fill=True, fill_value=other.categories._na_value

773 )

774

775 # interval-like -> need same closed and matching endpoints

776 if is_interval_dtype(other_dtype):

777 if self.closed != other.closed:

778 return invalid_comparison(self, other, op)

779 elif not isinstance(other, Interval):

780 other = type(self)(other)

781

782 if op is operator.eq:

783 return (self._left == other.left) & (self._right == other.right)

784 elif op is operator.ne:

785 return (self._left != other.left) | (self._right != other.right)

786 elif op is operator.gt:

787 return (self._left > other.left) | (

788 (self._left == other.left) & (self._right > other.right)

789 )

790 elif op is operator.ge:

791 return (self == other) | (self > other)

792 elif op is operator.lt:

793 return (self._left < other.left) | (

794 (self._left == other.left) & (self._right < other.right)

795 )

796 else:

797 # operator.lt

798 return (self == other) | (self < other)

799

800 # non-interval/non-object dtype -> no matches

801 if not is_object_dtype(other_dtype):

802 return invalid_comparison(self, other, op)

803

804 # object dtype -> iteratively check for intervals

805 result = np.zeros(len(self), dtype=bool)

806 for i, obj in enumerate(other):

807 try:

808 result[i] = op(self[i], obj)

809 except TypeError:

810 if obj is NA:

811 # comparison with np.nan returns NA

812 # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092

813 result = result.astype(object)

814 result[i] = NA

815 else:

816 raise

817 return result

818

819 @unpack_zerodim_and_defer("__eq__")

820 def __eq__(self, other):

821 return self._cmp_method(other, operator.eq)

822

823 @unpack_zerodim_and_defer("__ne__")

824 def __ne__(self, other):

825 return self._cmp_method(other, operator.ne)

826

827 @unpack_zerodim_and_defer("__gt__")

828 def __gt__(self, other):

829 return self._cmp_method(other, operator.gt)

830

831 @unpack_zerodim_and_defer("__ge__")

832 def __ge__(self, other):

833 return self._cmp_method(other, operator.ge)

834

835 @unpack_zerodim_and_defer("__lt__")

836 def __lt__(self, other):

837 return self._cmp_method(other, operator.lt)

838

839 @unpack_zerodim_and_defer("__le__")

840 def __le__(self, other):

841 return self._cmp_method(other, operator.le)

842

843 def argsort(

844 self,

845 *,

846 ascending: bool = True,

847 kind: SortKind = "quicksort",

848 na_position: str = "last",

849 **kwargs,

850 ) -> np.ndarray:

851 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs)

852

853 if ascending and kind == "quicksort" and na_position == "last":

854 # TODO: in an IntervalIndex we can re-use the cached

855 # IntervalTree.left_sorter

856 return np.lexsort((self.right, self.left))

857

858 # TODO: other cases we can use lexsort for? much more performant.

859 return super().argsort(

860 ascending=ascending, kind=kind, na_position=na_position, **kwargs

861 )

862

863 def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:

864 nv.validate_minmax_axis(axis, self.ndim)

865

866 if not len(self):

867 return self._na_value

868

869 mask = self.isna()

870 if mask.any():

871 if not skipna:

872 return self._na_value

873 obj = self[~mask]

874 else:

875 obj = self

876

877 indexer = obj.argsort()[0]

878 return obj[indexer]

879

880 def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:

881 nv.validate_minmax_axis(axis, self.ndim)

882

883 if not len(self):

884 return self._na_value

885

886 mask = self.isna()

887 if mask.any():

888 if not skipna:

889 return self._na_value

890 obj = self[~mask]

891 else:

892 obj = self

893

894 indexer = obj.argsort()[-1]

895 return obj[indexer]

896

897 def fillna(

898 self: IntervalArrayT, value=None, method=None, limit=None

899 ) -> IntervalArrayT:

900 """

901 Fill NA/NaN values using the specified method.

902

903 Parameters

904 ----------

905 value : scalar, dict, Series

906 If a scalar value is passed it is used to fill all missing values.

907 Alternatively, a Series or dict can be used to fill in different

908 values for each index. The value should not be a list. The

909 value(s) passed should be either Interval objects or NA/NaN.

910 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None

911 (Not implemented yet for IntervalArray)

912 Method to use for filling holes in reindexed Series

913 limit : int, default None

914 (Not implemented yet for IntervalArray)

915 If method is specified, this is the maximum number of consecutive

916 NaN values to forward/backward fill. In other words, if there is

917 a gap with more than this number of consecutive NaNs, it will only

918 be partially filled. If method is not specified, this is the

919 maximum number of entries along the entire axis where NaNs will be

920 filled.

921

922 Returns

923 -------

924 filled : IntervalArray with NA/NaN filled

925 """

926 if method is not None:

927 raise TypeError("Filling by method is not supported for IntervalArray.")

928 if limit is not None:

929 raise TypeError("limit is not supported for IntervalArray.")

930

931 value_left, value_right = self._validate_scalar(value)

932

933 left = self.left.fillna(value=value_left)

934 right = self.right.fillna(value=value_right)

935 return self._shallow_copy(left, right)

936

937 def astype(self, dtype, copy: bool = True):

938 """

939 Cast to an ExtensionArray or NumPy array with dtype 'dtype'.

940

941 Parameters

942 ----------

943 dtype : str or dtype

944 Typecode or data-type to which the array is cast.

945

946 copy : bool, default True

947 Whether to copy the data, even if not necessary. If False,

948 a copy is made only if the old dtype does not match the

949 new dtype.

950

951 Returns

952 -------

953 array : ExtensionArray or ndarray

954 ExtensionArray or NumPy ndarray with 'dtype' for its dtype.

955 """

956 from pandas import Index

957

958 if dtype is not None:

959 dtype = pandas_dtype(dtype)

960

961 if is_interval_dtype(dtype):

962 if dtype == self.dtype:

963 return self.copy() if copy else self

964

965 if is_float_dtype(self.dtype.subtype) and needs_i8_conversion(

966 dtype.subtype

967 ):

968 # This is allowed on the Index.astype but we disallow it here

969 msg = (

970 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"

971 )

972 raise TypeError(msg)

973

974 # need to cast to different subtype

975 try:

976 # We need to use Index rules for astype to prevent casting

977 # np.nan entries to int subtypes

978 new_left = Index(self._left, copy=False).astype(dtype.subtype)

979 new_right = Index(self._right, copy=False).astype(dtype.subtype)

980 except IntCastingNaNError:

981 # e.g test_subtype_integer

982 raise

983 except (TypeError, ValueError) as err:

984 # e.g. test_subtype_integer_errors f8->u8 can be lossy

985 # and raises ValueError

986 msg = (

987 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"

988 )

989 raise TypeError(msg) from err

990 return self._shallow_copy(new_left, new_right)

991 else:

992 try:

993 return super().astype(dtype, copy=copy)

994 except (TypeError, ValueError) as err:

995 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"

996 raise TypeError(msg) from err

997

998 def equals(self, other) -> bool:

999 if type(self) != type(other):

1000 return False

1001

1002 return bool(

1003 self.closed == other.closed

1004 and self.left.equals(other.left)

1005 and self.right.equals(other.right)

1006 )

1007

1008 @classmethod

1009 def _concat_same_type(

1010 cls: type[IntervalArrayT], to_concat: Sequence[IntervalArrayT]

1011 ) -> IntervalArrayT:

1012 """

1013 Concatenate multiple IntervalArray

1014

1015 Parameters

1016 ----------

1017 to_concat : sequence of IntervalArray

1018

1019 Returns

1020 -------

1021 IntervalArray

1022 """

1023 closed_set = {interval.closed for interval in to_concat}

1024 if len(closed_set) != 1:

1025 raise ValueError("Intervals must all be closed on the same side.")

1026 closed = closed_set.pop()

1027

1028 left = np.concatenate([interval.left for interval in to_concat])

1029 right = np.concatenate([interval.right for interval in to_concat])

1030

1031 left, right, dtype = cls._ensure_simple_new_inputs(left, right, closed=closed)

1032

1033 return cls._simple_new(left, right, dtype=dtype)

1034

1035 def copy(self: IntervalArrayT) -> IntervalArrayT:

1036 """

1037 Return a copy of the array.

1038

1039 Returns

1040 -------

1041 IntervalArray

1042 """

1043 left = self._left.copy()

1044 right = self._right.copy()

1045 dtype = self.dtype

1046 return self._simple_new(left, right, dtype=dtype)

1047

1048 def isna(self) -> np.ndarray:

1049 return isna(self._left)

1050

1051 def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:

1052 if not len(self) or periods == 0:

1053 return self.copy()

1054

1055 self._validate_scalar(fill_value)

1056

1057 # ExtensionArray.shift doesn't work for two reasons

1058 # 1. IntervalArray.dtype.na_value may not be correct for the dtype.

1059 # 2. IntervalArray._from_sequence only accepts NaN for missing values,

1060 # not other values like NaT

1061

1062 empty_len = min(abs(periods), len(self))

1063 if isna(fill_value):

1064 from pandas import Index

1065

1066 fill_value = Index(self._left, copy=False)._na_value

1067 empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))

1068 else:

1069 empty = self._from_sequence([fill_value] * empty_len)

1070

1071 if periods > 0:

1072 a = empty

1073 b = self[:-periods]

1074 else:

1075 a = self[abs(periods) :]

1076 b = empty

1077 return self._concat_same_type([a, b])

1078

1079 def take(

1080 self: IntervalArrayT,

1081 indices,

1082 *,

1083 allow_fill: bool = False,

1084 fill_value=None,

1085 axis=None,

1086 **kwargs,

1087 ) -> IntervalArrayT:

1088 """

1089 Take elements from the IntervalArray.

1090

1091 Parameters

1092 ----------

1093 indices : sequence of integers

1094 Indices to be taken.

1095

1096 allow_fill : bool, default False

1097 How to handle negative values in `indices`.

1098

1099 * False: negative values in `indices` indicate positional indices

1100 from the right (the default). This is similar to

1101 :func:`numpy.take`.

1102

1103 * True: negative values in `indices` indicate

1104 missing values. These values are set to `fill_value`. Any other

1105 other negative values raise a ``ValueError``.

1106

1107 fill_value : Interval or NA, optional

1108 Fill value to use for NA-indices when `allow_fill` is True.

1109 This may be ``None``, in which case the default NA value for

1110 the type, ``self.dtype.na_value``, is used.

1111

1112 For many ExtensionArrays, there will be two representations of

1113 `fill_value`: a user-facing "boxed" scalar, and a low-level

1114 physical NA value. `fill_value` should be the user-facing version,

1115 and the implementation should handle translating that to the

1116 physical version for processing the take if necessary.

1117

1118 axis : any, default None

1119 Present for compat with IntervalIndex; does nothing.

1120

1121 Returns

1122 -------

1123 IntervalArray

1124

1125 Raises

1126 ------

1127 IndexError

1128 When the indices are out of bounds for the array.

1129 ValueError

1130 When `indices` contains negative values other than ``-1``

1131 and `allow_fill` is True.

1132 """

1133 nv.validate_take((), kwargs)

1134

1135 fill_left = fill_right = fill_value

1136 if allow_fill:

1137 fill_left, fill_right = self._validate_scalar(fill_value)

1138

1139 left_take = take(

1140 self._left, indices, allow_fill=allow_fill, fill_value=fill_left

1141 )

1142 right_take = take(

1143 self._right, indices, allow_fill=allow_fill, fill_value=fill_right

1144 )

1145

1146 return self._shallow_copy(left_take, right_take)

1147

1148 def _validate_listlike(self, value):

1149 # list-like of intervals

1150 try:

1151 array = IntervalArray(value)

1152 self._check_closed_matches(array, name="value")

1153 value_left, value_right = array.left, array.right

1154 except TypeError as err:

1155 # wrong type: not interval or NA

1156 msg = f"'value' should be an interval type, got {type(value)} instead."

1157 raise TypeError(msg) from err

1158

1159 try:

1160 self.left._validate_fill_value(value_left)

1161 except (LossySetitemError, TypeError) as err:

1162 msg = (

1163 "'value' should be a compatible interval type, "

1164 f"got {type(value)} instead."

1165 )

1166 raise TypeError(msg) from err

1167

1168 return value_left, value_right

1169

1170 def _validate_scalar(self, value):

1171 if isinstance(value, Interval):

1172 self._check_closed_matches(value, name="value")

1173 left, right = value.left, value.right

1174 # TODO: check subdtype match like _validate_setitem_value?

1175 elif is_valid_na_for_dtype(value, self.left.dtype):

1176 # GH#18295

1177 left = right = self.left._na_value

1178 else:

1179 raise TypeError(

1180 "can only insert Interval objects and NA into an IntervalArray"

1181 )

1182 return left, right

1183

1184 def _validate_setitem_value(self, value):

1185 if is_valid_na_for_dtype(value, self.left.dtype):

1186 # na value: need special casing to set directly on numpy arrays

1187 value = self.left._na_value

1188 if is_integer_dtype(self.dtype.subtype):

1189 # can't set NaN on a numpy integer array

1190 # GH#45484 TypeError, not ValueError, matches what we get with

1191 # non-NA un-holdable value.

1192 raise TypeError("Cannot set float NaN to integer-backed IntervalArray")

1193 value_left, value_right = value, value

1194

1195 elif isinstance(value, Interval):

1196 # scalar interval

1197 self._check_closed_matches(value, name="value")

1198 value_left, value_right = value.left, value.right

1199 self.left._validate_fill_value(value_left)

1200 self.left._validate_fill_value(value_right)

1201

1202 else:

1203 return self._validate_listlike(value)

1204

1205 return value_left, value_right

1206

1207 def value_counts(self, dropna: bool = True) -> Series:

1208 """

1209 Returns a Series containing counts of each interval.

1210

1211 Parameters

1212 ----------

1213 dropna : bool, default True

1214 Don't include counts of NaN.

1215

1216 Returns

1217 -------

1218 counts : Series

1219

1220 See Also

1221 --------

1222 Series.value_counts

1223 """

1224 # TODO: implement this is a non-naive way!

1225 return value_counts(np.asarray(self), dropna=dropna)

1226

1227 # ---------------------------------------------------------------------

1228 # Rendering Methods

1229

1230 def _format_data(self) -> str:

1231 # TODO: integrate with categorical and make generic

1232 # name argument is unused here; just for compat with base / categorical

1233 n = len(self)

1234 max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10)

1235

1236 formatter = str

1237

1238 if n == 0:

1239 summary = "[]"

1240 elif n == 1:

1241 first = formatter(self[0])

1242 summary = f"[{first}]"

1243 elif n == 2:

1244 first = formatter(self[0])

1245 last = formatter(self[-1])

1246 summary = f"[{first}, {last}]"

1247 else:

1248 if n > max_seq_items:

1249 n = min(max_seq_items // 2, 10)

1250 head = [formatter(x) for x in self[:n]]

1251 tail = [formatter(x) for x in self[-n:]]

1252 head_str = ", ".join(head)

1253 tail_str = ", ".join(tail)

1254 summary = f"[{head_str} ... {tail_str}]"

1255 else:

1256 tail = [formatter(x) for x in self]

1257 tail_str = ", ".join(tail)

1258 summary = f"[{tail_str}]"

1259

1260 return summary

1261

1262 def __repr__(self) -> str:

1263 # the short repr has no trailing newline, while the truncated

1264 # repr does. So we include a newline in our template, and strip

1265 # any trailing newlines from format_object_summary

1266 data = self._format_data()

1267 class_name = f"<{type(self).__name__}>\n"

1268

1269 template = f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"

1270 return template

1271

1272 def _format_space(self) -> str:

1273 space = " " * (len(type(self).__name__) + 1)

1274 return f"\n{space}"

1275

1276 # ---------------------------------------------------------------------

1277 # Vectorized Interval Properties/Attributes

1278

1279 @property

1280 def left(self):

1281 """

1282 Return the left endpoints of each Interval in the IntervalArray as an Index.

1283 """

1284 from pandas import Index

1285

1286 return Index(self._left, copy=False)

1287

1288 @property

1289 def right(self):

1290 """

1291 Return the right endpoints of each Interval in the IntervalArray as an Index.

1292 """

1293 from pandas import Index

1294

1295 return Index(self._right, copy=False)

1296

1297 @property

1298 def length(self) -> Index:

1299 """

1300 Return an Index with entries denoting the length of each Interval.

1301 """

1302 return self.right - self.left

1303

1304 @property

1305 def mid(self) -> Index:

1306 """

1307 Return the midpoint of each Interval in the IntervalArray as an Index.

1308 """

1309 try:

1310 return 0.5 * (self.left + self.right)

1311 except TypeError:

1312 # datetime safe version

1313 return self.left + 0.5 * self.length

1314

1315 _interval_shared_docs["overlaps"] = textwrap.dedent(

1316 """

1317 Check elementwise if an Interval overlaps the values in the %(klass)s.

1318

1319 Two intervals overlap if they share a common point, including closed

1320 endpoints. Intervals that only have an open endpoint in common do not

1321 overlap.

1322

1323 Parameters

1324 ----------

1325 other : %(klass)s

1326 Interval to check against for an overlap.

1327

1328 Returns

1329 -------

1330 ndarray

1331 Boolean array positionally indicating where an overlap occurs.

1332

1333 See Also

1334 --------

1335 Interval.overlaps : Check whether two Interval objects overlap.

1336

1337 Examples

1338 --------

1339 %(examples)s

1340 >>> intervals.overlaps(pd.Interval(0.5, 1.5))

1341 array([ True, True, False])

1342

1343 Intervals that share closed endpoints overlap:

1344

1345 >>> intervals.overlaps(pd.Interval(1, 3, closed='left'))

1346 array([ True, True, True])

1347

1348 Intervals that only have an open endpoint in common do not overlap:

1349

1350 >>> intervals.overlaps(pd.Interval(1, 2, closed='right'))

1351 array([False, True, False])

1352 """

1353 )

1354

1355 @Appender(

1356 _interval_shared_docs["overlaps"]

1357 % {

1358 "klass": "IntervalArray",

1359 "examples": textwrap.dedent(

1360 """\

1361 >>> data = [(0, 1), (1, 3), (2, 4)]

1362 >>> intervals = pd.arrays.IntervalArray.from_tuples(data)

1363 >>> intervals

1364 <IntervalArray>

1365 [(0, 1], (1, 3], (2, 4]]

1366 Length: 3, dtype: interval[int64, right]

1367 """

1368 ),

1369 }

1370 )

1371 def overlaps(self, other):

1372 if isinstance(other, (IntervalArray, ABCIntervalIndex)):

1373 raise NotImplementedError

1374 if not isinstance(other, Interval):

1375 msg = f"`other` must be Interval-like, got {type(other).__name__}"

1376 raise TypeError(msg)

1377

1378 # equality is okay if both endpoints are closed (overlap at a point)

1379 op1 = le if (self.closed_left and other.closed_right) else lt

1380 op2 = le if (other.closed_left and self.closed_right) else lt

1381

1382 # overlaps is equivalent negation of two interval being disjoint:

1383 # disjoint = (A.left > B.right) or (B.left > A.right)

1384 # (simplifying the negation allows this to be done in less operations)

1385 return op1(self.left, other.right) & op2(other.left, self.right)

1386

1387 # ---------------------------------------------------------------------

1388

1389 @property

1390 def closed(self) -> IntervalClosedType:

1391 """

1392 String describing the inclusive side the intervals.

1393

1394 Either ``left``, ``right``, ``both`` or ``neither``.

1395 """

1396 return self.dtype.closed

1397

1398 _interval_shared_docs["set_closed"] = textwrap.dedent(

1399 """

1400 Return an identical %(klass)s closed on the specified side.

1401

1402 Parameters

1403 ----------

1404 closed : {'left', 'right', 'both', 'neither'}

1405 Whether the intervals are closed on the left-side, right-side, both

1406 or neither.

1407

1408 Returns

1409 -------

1410 %(klass)s

1411

1412 %(examples)s\

1413 """

1414 )

1415

1416 @Appender(

1417 _interval_shared_docs["set_closed"]

1418 % {

1419 "klass": "IntervalArray",

1420 "examples": textwrap.dedent(

1421 """\

1422 Examples

1423 --------

1424 >>> index = pd.arrays.IntervalArray.from_breaks(range(4))

1425 >>> index

1426 <IntervalArray>

1427 [(0, 1], (1, 2], (2, 3]]

1428 Length: 3, dtype: interval[int64, right]

1429 >>> index.set_closed('both')

1430 <IntervalArray>

1431 [[0, 1], [1, 2], [2, 3]]

1432 Length: 3, dtype: interval[int64, both]

1433 """

1434 ),

1435 }

1436 )

1437 def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT:

1438 if closed not in VALID_CLOSED:

1439 msg = f"invalid option for 'closed': {closed}"

1440 raise ValueError(msg)

1441

1442 left, right = self._left, self._right

1443 dtype = IntervalDtype(left.dtype, closed=closed)

1444 return self._simple_new(left, right, dtype=dtype)

1445

1446 _interval_shared_docs[

1447 "is_non_overlapping_monotonic"

1448 ] = """

1449 Return a boolean whether the %(klass)s is non-overlapping and monotonic.

1450

1451 Non-overlapping means (no Intervals share points), and monotonic means

1452 either monotonic increasing or monotonic decreasing.

1453 """

1454

1455 @property

1456 @Appender(

1457 _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs

1458 )

1459 def is_non_overlapping_monotonic(self) -> bool:

1460 # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... )

1461 # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...)

1462 # we already require left <= right

1463

1464 # strict inequality for closed == 'both'; equality implies overlapping

1465 # at a point when both sides of intervals are included

1466 if self.closed == "both":

1467 return bool(

1468 (self._right[:-1] < self._left[1:]).all()

1469 or (self._left[:-1] > self._right[1:]).all()

1470 )

1471

1472 # non-strict inequality when closed != 'both'; at least one side is

1473 # not included in the intervals, so equality does not imply overlapping

1474 return bool(

1475 (self._right[:-1] <= self._left[1:]).all()

1476 or (self._left[:-1] >= self._right[1:]).all()

1477 )

1478

1479 # ---------------------------------------------------------------------

1480 # Conversion

1481

1482 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:

1483 """

1484 Return the IntervalArray's data as a numpy array of Interval

1485 objects (with dtype='object')

1486 """

1487 left = self._left

1488 right = self._right

1489 mask = self.isna()

1490 closed = self.closed

1491

1492 result = np.empty(len(left), dtype=object)

1493 for i, left_value in enumerate(left):

1494 if mask[i]:

1495 result[i] = np.nan

1496 else:

1497 result[i] = Interval(left_value, right[i], closed)

1498 return result

1499

1500 def __arrow_array__(self, type=None):

1501 """

1502 Convert myself into a pyarrow Array.

1503 """

1504 import pyarrow

1505

1506 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType

1507

1508 try:

1509 subtype = pyarrow.from_numpy_dtype(self.dtype.subtype)

1510 except TypeError as err:

1511 raise TypeError(

1512 f"Conversion to arrow with subtype '{self.dtype.subtype}' "

1513 "is not supported"

1514 ) from err

1515 interval_type = ArrowIntervalType(subtype, self.closed)

1516 storage_array = pyarrow.StructArray.from_arrays(

1517 [

1518 pyarrow.array(self._left, type=subtype, from_pandas=True),

1519 pyarrow.array(self._right, type=subtype, from_pandas=True),

1520 ],

1521 names=["left", "right"],

1522 )

1523 mask = self.isna()

1524 if mask.any():

1525 # if there are missing values, set validity bitmap also on the array level

1526 null_bitmap = pyarrow.array(~mask).buffers()[1]

1527 storage_array = pyarrow.StructArray.from_buffers(

1528 storage_array.type,

1529 len(storage_array),

1530 [null_bitmap],

1531 children=[storage_array.field(0), storage_array.field(1)],

1532 )

1533

1534 if type is not None:

1535 if type.equals(interval_type.storage_type):

1536 return storage_array

1537 elif isinstance(type, ArrowIntervalType):

1538 # ensure we have the same subtype and closed attributes

1539 if not type.equals(interval_type):

1540 raise TypeError(

1541 "Not supported to convert IntervalArray to type with "

1542 f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) "

1543 f"and 'closed' ({self.closed} vs {type.closed}) attributes"

1544 )

1545 else:

1546 raise TypeError(

1547 f"Not supported to convert IntervalArray to '{type}' type"

1548 )

1549

1550 return pyarrow.ExtensionArray.from_storage(interval_type, storage_array)

1551

1552 _interval_shared_docs[

1553 "to_tuples"

1554 ] = """

1555 Return an %(return_type)s of tuples of the form (left, right).

1556

1557 Parameters

1558 ----------

1559 na_tuple : bool, default True

1560 Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA

1561 value itself if False, ``nan``.

1562

1563 Returns

1564 -------

1565 tuples: %(return_type)s

1566 %(examples)s\

1567 """

1568

1569 @Appender(

1570 _interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""}

1571 )

1572 def to_tuples(self, na_tuple: bool = True) -> np.ndarray:

1573 tuples = com.asarray_tuplesafe(zip(self._left, self._right))

1574 if not na_tuple:

1575 # GH 18756

1576 tuples = np.where(~self.isna(), tuples, np.nan)

1577 return tuples

1578

1579 # ---------------------------------------------------------------------

1580

1581 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:

1582 value_left, value_right = self._validate_setitem_value(value)

1583

1584 if isinstance(self._left, np.ndarray):

1585 np.putmask(self._left, mask, value_left)

1586 assert isinstance(self._right, np.ndarray)

1587 np.putmask(self._right, mask, value_right)

1588 else:

1589 self._left._putmask(mask, value_left)

1590 assert not isinstance(self._right, np.ndarray)

1591 self._right._putmask(mask, value_right)

1592

1593 def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT:

1594 """

1595 Return a new IntervalArray inserting new item at location. Follows

1596 Python numpy.insert semantics for negative values. Only Interval

1597 objects and NA can be inserted into an IntervalIndex

1598

1599 Parameters

1600 ----------

1601 loc : int

1602 item : Interval

1603

1604 Returns

1605 -------

1606 IntervalArray

1607 """

1608 left_insert, right_insert = self._validate_scalar(item)

1609

1610 new_left = self.left.insert(loc, left_insert)

1611 new_right = self.right.insert(loc, right_insert)

1612

1613 return self._shallow_copy(new_left, new_right)

1614

1615 def delete(self: IntervalArrayT, loc) -> IntervalArrayT:

1616 if isinstance(self._left, np.ndarray):

1617 new_left = np.delete(self._left, loc)

1618 assert isinstance(self._right, np.ndarray)

1619 new_right = np.delete(self._right, loc)

1620 else:

1621 new_left = self._left.delete(loc)

1622 assert not isinstance(self._right, np.ndarray)

1623 new_right = self._right.delete(loc)

1624 return self._shallow_copy(left=new_left, right=new_right)

1625

1626 @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs)

1627 def repeat(

1628 self: IntervalArrayT,

1629 repeats: int | Sequence[int],

1630 axis: AxisInt | None = None,

1631 ) -> IntervalArrayT:

1632 nv.validate_repeat((), {"axis": axis})

1633 left_repeat = self.left.repeat(repeats)

1634 right_repeat = self.right.repeat(repeats)

1635 return self._shallow_copy(left=left_repeat, right=right_repeat)

1636

1637 _interval_shared_docs["contains"] = textwrap.dedent(

1638 """

1639 Check elementwise if the Intervals contain the value.

1640

1641 Return a boolean mask whether the value is contained in the Intervals

1642 of the %(klass)s.

1643

1644 Parameters

1645 ----------

1646 other : scalar

1647 The value to check whether it is contained in the Intervals.

1648

1649 Returns

1650 -------

1651 boolean array

1652

1653 See Also

1654 --------

1655 Interval.contains : Check whether Interval object contains value.

1656 %(klass)s.overlaps : Check if an Interval overlaps the values in the

1657 %(klass)s.

1658

1659 Examples

1660 --------

1661 %(examples)s

1662 >>> intervals.contains(0.5)

1663 array([ True, False, False])

1664 """

1665 )

1666

1667 @Appender(

1668 _interval_shared_docs["contains"]

1669 % {

1670 "klass": "IntervalArray",

1671 "examples": textwrap.dedent(

1672 """\

1673 >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)])

1674 >>> intervals

1675 <IntervalArray>

1676 [(0, 1], (1, 3], (2, 4]]

1677 Length: 3, dtype: interval[int64, right]

1678 """

1679 ),

1680 }

1681 )

1682 def contains(self, other):

1683 if isinstance(other, Interval):

1684 raise NotImplementedError("contains not implemented for two intervals")

1685

1686 return (self._left < other if self.open_left else self._left <= other) & (

1687 other < self._right if self.open_right else other <= self._right

1688 )

1689

1690 def isin(self, values) -> npt.NDArray[np.bool_]:

1691 if not hasattr(values, "dtype"):

1692 values = np.array(values)

1693 values = extract_array(values, extract_numpy=True)

1694

1695 if is_interval_dtype(values.dtype):

1696 if self.closed != values.closed:

1697 # not comparable -> no overlap

1698 return np.zeros(self.shape, dtype=bool)

1699

1700 if is_dtype_equal(self.dtype, values.dtype):

1701 # GH#38353 instead of casting to object, operating on a

1702 # complex128 ndarray is much more performant.

1703 left = self._combined.view("complex128")

1704 right = values._combined.view("complex128")

1705 # error: Argument 1 to "in1d" has incompatible type

1706 # "Union[ExtensionArray, ndarray[Any, Any],

1707 # ndarray[Any, dtype[Any]]]"; expected

1708 # "Union[_SupportsArray[dtype[Any]],

1709 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,

1710 # int, float, complex, str, bytes, _NestedSequence[

1711 # Union[bool, int, float, complex, str, bytes]]]"

1712 return np.in1d(left, right) # type: ignore[arg-type]

1713

1714 elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(

1715 values.left.dtype

1716 ):

1717 # not comparable -> no overlap

1718 return np.zeros(self.shape, dtype=bool)

1719

1720 return isin(self.astype(object), values.astype(object))

1721

1722 @property

1723 def _combined(self) -> IntervalSideT:

1724 left = self.left._values.reshape(-1, 1)

1725 right = self.right._values.reshape(-1, 1)

1726 if needs_i8_conversion(left.dtype):

1727 comb = left._concat_same_type([left, right], axis=1)

1728 else:

1729 comb = np.concatenate([left, right], axis=1)

1730 return comb

1731

1732 def _from_combined(self, combined: np.ndarray) -> IntervalArray:

1733 """

1734 Create a new IntervalArray with our dtype from a 1D complex128 ndarray.

1735 """

1736 nc = combined.view("i8").reshape(-1, 2)

1737

1738 dtype = self._left.dtype

1739 if needs_i8_conversion(dtype):

1740 assert isinstance(self._left, (DatetimeArray, TimedeltaArray))

1741 new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)

1742 assert isinstance(self._right, (DatetimeArray, TimedeltaArray))

1743 new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)

1744 else:

1745 assert isinstance(dtype, np.dtype)

1746 new_left = nc[:, 0].view(dtype)

1747 new_right = nc[:, 1].view(dtype)

1748 return self._shallow_copy(left=new_left, right=new_right)

1749

1750 def unique(self) -> IntervalArray:

1751 # No overload variant of "__getitem__" of "ExtensionArray" matches argument

1752 # type "Tuple[slice, int]"

1753 nc = unique(

1754 self._combined.view("complex128")[:, 0] # type: ignore[call-overload]

1755 )

1756 nc = nc[:, None]

1757 return self._from_combined(nc)

1758

1759

1760def _maybe_convert_platform_interval(values) -> ArrayLike:

1761 """

1762 Try to do platform conversion, with special casing for IntervalArray.

1763 Wrapper around maybe_convert_platform that alters the default return

1764 dtype in certain cases to be compatible with IntervalArray. For example,

1765 empty lists return with integer dtype instead of object dtype, which is

1766 prohibited for IntervalArray.

1767

1768 Parameters

1769 ----------

1770 values : array-like

1771

1772 Returns

1773 -------

1774 array

1775 """

1776 if isinstance(values, (list, tuple)) and len(values) == 0:

1777 # GH 19016

1778 # empty lists/tuples get object dtype by default, but this is

1779 # prohibited for IntervalArray, so coerce to integer instead

1780 return np.array([], dtype=np.int64)

1781 elif not is_list_like(values) or isinstance(values, ABCDataFrame):

1782 # This will raise later, but we avoid passing to maybe_convert_platform

1783 return values

1784 elif is_categorical_dtype(values):

1785 values = np.asarray(values)

1786 elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)):

1787 # TODO: should we just cast these to list?

1788 return values

1789 else:

1790 values = extract_array(values, extract_numpy=True)

1791

1792 if not hasattr(values, "dtype"):

1793 values = np.asarray(values)

1794 if is_integer_dtype(values) and values.dtype != np.int64:

1795 values = values.astype(np.int64)

1796 return values