Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/interval.py: 29%

1""" define the IntervalIndex """

2from __future__ import annotations

4from operator import (

5 le,

6 lt,

8import textwrap

9from typing import (

10 TYPE_CHECKING,

11 Any,

12 Literal,

13)

15import numpy as np

17from pandas._libs import lib

18from pandas._libs.interval import (

19 Interval,

20 IntervalMixin,

21 IntervalTree,

22)

23from pandas._libs.tslibs import (

24 BaseOffset,

25 Period,

26 Timedelta,

27 Timestamp,

28 to_offset,

29)

30from pandas.errors import InvalidIndexError

31from pandas.util._decorators import (

32 Appender,

33 cache_readonly,

34)

35from pandas.util._exceptions import rewrite_exception

37from pandas.core.dtypes.cast import (

38 find_common_type,

39 infer_dtype_from_scalar,

40 maybe_box_datetimelike,

41 maybe_downcast_numeric,

42 maybe_upcast_numeric_to_64bit,

43)

44from pandas.core.dtypes.common import (

45 ensure_platform_int,

46 is_float_dtype,

47 is_integer,

48 is_integer_dtype,

49 is_list_like,

50 is_number,

51 is_object_dtype,

52 is_scalar,

53 pandas_dtype,

54)

55from pandas.core.dtypes.dtypes import (

56 DatetimeTZDtype,

57 IntervalDtype,

58)

59from pandas.core.dtypes.missing import is_valid_na_for_dtype

61from pandas.core.algorithms import unique

62from pandas.core.arrays.datetimelike import validate_periods

63from pandas.core.arrays.interval import (

64 IntervalArray,

65 _interval_shared_docs,

66)

67import pandas.core.common as com

68from pandas.core.indexers import is_valid_positional_slice

69import pandas.core.indexes.base as ibase

70from pandas.core.indexes.base import (

71 Index,

72 _index_shared_docs,

73 ensure_index,

74 maybe_extract_name,

75)

76from pandas.core.indexes.datetimes import (

77 DatetimeIndex,

78 date_range,

79)

80from pandas.core.indexes.extension import (

81 ExtensionIndex,

82 inherit_names,

83)

84from pandas.core.indexes.multi import MultiIndex

85from pandas.core.indexes.timedeltas import (

86 TimedeltaIndex,

87 timedelta_range,

88)

90if TYPE_CHECKING:

91 from collections.abc import Hashable

93 from pandas._typing import (

94 Dtype,

95 DtypeObj,

96 IntervalClosedType,

97 Self,

98 npt,

99 )

100_index_doc_kwargs = dict(ibase._index_doc_kwargs)

101

102_index_doc_kwargs.update(

103 {

104 "klass": "IntervalIndex",

105 "qualname": "IntervalIndex",

106 "target_klass": "IntervalIndex or list of Intervals",

107 "name": textwrap.dedent(

108 """\

109 name : object, optional

110 Name to be stored in the index.

111 """

112 ),

113 }

114)

115

116

117def _get_next_label(label):

118 # see test_slice_locs_with_ints_and_floats_succeeds

119 dtype = getattr(label, "dtype", type(label))

120 if isinstance(label, (Timestamp, Timedelta)):

121 dtype = "datetime64[ns]"

122 dtype = pandas_dtype(dtype)

123

124 if lib.is_np_dtype(dtype, "mM") or isinstance(dtype, DatetimeTZDtype):

125 return label + np.timedelta64(1, "ns")

126 elif is_integer_dtype(dtype):

127 return label + 1

128 elif is_float_dtype(dtype):

129 return np.nextafter(label, np.inf)

130 else:

131 raise TypeError(f"cannot determine next label for type {repr(type(label))}")

132

133

134def _get_prev_label(label):

135 # see test_slice_locs_with_ints_and_floats_succeeds

136 dtype = getattr(label, "dtype", type(label))

137 if isinstance(label, (Timestamp, Timedelta)):

138 dtype = "datetime64[ns]"

139 dtype = pandas_dtype(dtype)

140

141 if lib.is_np_dtype(dtype, "mM") or isinstance(dtype, DatetimeTZDtype):

142 return label - np.timedelta64(1, "ns")

143 elif is_integer_dtype(dtype):

144 return label - 1

145 elif is_float_dtype(dtype):

146 return np.nextafter(label, -np.inf)

147 else:

148 raise TypeError(f"cannot determine next label for type {repr(type(label))}")

149

150

151def _new_IntervalIndex(cls, d):

152 """

153 This is called upon unpickling, rather than the default which doesn't have

154 arguments and breaks __new__.

155 """

156 return cls.from_arrays(**d)

157

158

159@Appender(

160 _interval_shared_docs["class"]

161 % {

162 "klass": "IntervalIndex",

163 "summary": "Immutable index of intervals that are closed on the same side.",

164 "name": _index_doc_kwargs["name"],

165 "extra_attributes": "is_overlapping\nvalues\n",

166 "extra_methods": "",

167 "examples": textwrap.dedent(

168 """\

169 Examples

170 --------

171 A new ``IntervalIndex`` is typically constructed using

172 :func:`interval_range`:

173

174 >>> pd.interval_range(start=0, end=5)

175 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],

176 dtype='interval[int64, right]')

177

178 It may also be constructed using one of the constructor

179 methods: :meth:`IntervalIndex.from_arrays`,

180 :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`.

181

182 See further examples in the doc strings of ``interval_range`` and the

183 mentioned constructor methods.

184 """

185 ),

186 }

187)

188@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True)

189@inherit_names(

190 [

191 "__array__",

192 "overlaps",

193 "contains",

194 "closed_left",

195 "closed_right",

196 "open_left",

197 "open_right",

198 "is_empty",

199 ],

200 IntervalArray,

201)

202@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True)

203class IntervalIndex(ExtensionIndex):

204 _typ = "intervalindex"

205

206 # annotate properties pinned via inherit_names

207 closed: IntervalClosedType

208 is_non_overlapping_monotonic: bool

209 closed_left: bool

210 closed_right: bool

211 open_left: bool

212 open_right: bool

213

214 _data: IntervalArray

215 _values: IntervalArray

216 _can_hold_strings = False

217 _data_cls = IntervalArray

218

219 # --------------------------------------------------------------------

220 # Constructors

221

222 def __new__(

223 cls,

224 data,

225 closed: IntervalClosedType | None = None,

226 dtype: Dtype | None = None,

227 copy: bool = False,

228 name: Hashable | None = None,

229 verify_integrity: bool = True,

230 ) -> Self:

231 name = maybe_extract_name(name, data, cls)

232

233 with rewrite_exception("IntervalArray", cls.__name__):

234 array = IntervalArray(

235 data,

236 closed=closed,

237 copy=copy,

238 dtype=dtype,

239 verify_integrity=verify_integrity,

240 )

241

242 return cls._simple_new(array, name)

243

244 @classmethod

245 @Appender(

246 _interval_shared_docs["from_breaks"]

247 % {

248 "klass": "IntervalIndex",

249 "name": textwrap.dedent(

250 """

251 name : str, optional

252 Name of the resulting IntervalIndex."""

253 ),

254 "examples": textwrap.dedent(

255 """\

256 Examples

257 --------

258 >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3])

259 IntervalIndex([(0, 1], (1, 2], (2, 3]],

260 dtype='interval[int64, right]')

261 """

262 ),

263 }

264 )

265 def from_breaks(

266 cls,

267 breaks,

268 closed: IntervalClosedType | None = "right",

269 name: Hashable | None = None,

270 copy: bool = False,

271 dtype: Dtype | None = None,

272 ) -> IntervalIndex:

273 with rewrite_exception("IntervalArray", cls.__name__):

274 array = IntervalArray.from_breaks(

275 breaks, closed=closed, copy=copy, dtype=dtype

276 )

277 return cls._simple_new(array, name=name)

278

279 @classmethod

280 @Appender(

281 _interval_shared_docs["from_arrays"]

282 % {

283 "klass": "IntervalIndex",

284 "name": textwrap.dedent(

285 """

286 name : str, optional

287 Name of the resulting IntervalIndex."""

288 ),

289 "examples": textwrap.dedent(

290 """\

291 Examples

292 --------

293 >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])

294 IntervalIndex([(0, 1], (1, 2], (2, 3]],

295 dtype='interval[int64, right]')

296 """

297 ),

298 }

299 )

300 def from_arrays(

301 cls,

302 left,

303 right,

304 closed: IntervalClosedType = "right",

305 name: Hashable | None = None,

306 copy: bool = False,

307 dtype: Dtype | None = None,

308 ) -> IntervalIndex:

309 with rewrite_exception("IntervalArray", cls.__name__):

310 array = IntervalArray.from_arrays(

311 left, right, closed, copy=copy, dtype=dtype

312 )

313 return cls._simple_new(array, name=name)

314

315 @classmethod

316 @Appender(

317 _interval_shared_docs["from_tuples"]

318 % {

319 "klass": "IntervalIndex",

320 "name": textwrap.dedent(

321 """

322 name : str, optional

323 Name of the resulting IntervalIndex."""

324 ),

325 "examples": textwrap.dedent(

326 """\

327 Examples

328 --------

329 >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)])

330 IntervalIndex([(0, 1], (1, 2]],

331 dtype='interval[int64, right]')

332 """

333 ),

334 }

335 )

336 def from_tuples(

337 cls,

338 data,

339 closed: IntervalClosedType = "right",

340 name: Hashable | None = None,

341 copy: bool = False,

342 dtype: Dtype | None = None,

343 ) -> IntervalIndex:

344 with rewrite_exception("IntervalArray", cls.__name__):

345 arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype)

346 return cls._simple_new(arr, name=name)

347

348 # --------------------------------------------------------------------

349 # error: Return type "IntervalTree" of "_engine" incompatible with return type

350 # "Union[IndexEngine, ExtensionEngine]" in supertype "Index"

351 @cache_readonly

352 def _engine(self) -> IntervalTree: # type: ignore[override]

353 # IntervalTree does not supports numpy array unless they are 64 bit

354 left = self._maybe_convert_i8(self.left)

355 left = maybe_upcast_numeric_to_64bit(left)

356 right = self._maybe_convert_i8(self.right)

357 right = maybe_upcast_numeric_to_64bit(right)

358 return IntervalTree(left, right, closed=self.closed)

359

360 def __contains__(self, key: Any) -> bool:

361 """

362 return a boolean if this key is IN the index

363 We *only* accept an Interval

364

365 Parameters

366 ----------

367 key : Interval

368

369 Returns

370 -------

371 bool

372 """

373 hash(key)

374 if not isinstance(key, Interval):

375 if is_valid_na_for_dtype(key, self.dtype):

376 return self.hasnans

377 return False

378

379 try:

380 self.get_loc(key)

381 return True

382 except KeyError:

383 return False

384

385 def _getitem_slice(self, slobj: slice) -> IntervalIndex:

386 """

387 Fastpath for __getitem__ when we know we have a slice.

388 """

389 res = self._data[slobj]

390 return type(self)._simple_new(res, name=self._name)

391

392 @cache_readonly

393 def _multiindex(self) -> MultiIndex:

394 return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"])

395

396 def __reduce__(self):

397 d = {

398 "left": self.left,

399 "right": self.right,

400 "closed": self.closed,

401 "name": self.name,

402 }

403 return _new_IntervalIndex, (type(self), d), None

404

405 @property

406 def inferred_type(self) -> str:

407 """Return a string of the type inferred from the values"""

408 return "interval"

409

410 # Cannot determine type of "memory_usage"

411 @Appender(Index.memory_usage.__doc__) # type: ignore[has-type]

412 def memory_usage(self, deep: bool = False) -> int:

413 # we don't use an explicit engine

414 # so return the bytes here

415 return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep)

416

417 # IntervalTree doesn't have a is_monotonic_decreasing, so have to override

418 # the Index implementation

419 @cache_readonly

420 def is_monotonic_decreasing(self) -> bool:

421 """

422 Return True if the IntervalIndex is monotonic decreasing (only equal or

423 decreasing values), else False

424 """

425 return self[::-1].is_monotonic_increasing

426

427 @cache_readonly

428 def is_unique(self) -> bool:

429 """

430 Return True if the IntervalIndex contains unique elements, else False.

431 """

432 left = self.left

433 right = self.right

434

435 if self.isna().sum() > 1:

436 return False

437

438 if left.is_unique or right.is_unique:

439 return True

440

441 seen_pairs = set()

442 check_idx = np.where(left.duplicated(keep=False))[0]

443 for idx in check_idx:

444 pair = (left[idx], right[idx])

445 if pair in seen_pairs:

446 return False

447 seen_pairs.add(pair)

448

449 return True

450

451 @property

452 def is_overlapping(self) -> bool:

453 """

454 Return True if the IntervalIndex has overlapping intervals, else False.

455

456 Two intervals overlap if they share a common point, including closed

457 endpoints. Intervals that only have an open endpoint in common do not

458 overlap.

459

460 Returns

461 -------

462 bool

463 Boolean indicating if the IntervalIndex has overlapping intervals.

464

465 See Also

466 --------

467 Interval.overlaps : Check whether two Interval objects overlap.

468 IntervalIndex.overlaps : Check an IntervalIndex elementwise for

469 overlaps.

470

471 Examples

472 --------

473 >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])

474 >>> index

475 IntervalIndex([(0, 2], (1, 3], (4, 5]],

476 dtype='interval[int64, right]')

477 >>> index.is_overlapping

478 True

479

480 Intervals that share closed endpoints overlap:

481

482 >>> index = pd.interval_range(0, 3, closed='both')

483 >>> index

484 IntervalIndex([[0, 1], [1, 2], [2, 3]],

485 dtype='interval[int64, both]')

486 >>> index.is_overlapping

487 True

488

489 Intervals that only have an open endpoint in common do not overlap:

490

491 >>> index = pd.interval_range(0, 3, closed='left')

492 >>> index

493 IntervalIndex([[0, 1), [1, 2), [2, 3)],

494 dtype='interval[int64, left]')

495 >>> index.is_overlapping

496 False

497 """

498 # GH 23309

499 return self._engine.is_overlapping

500

501 def _needs_i8_conversion(self, key) -> bool:

502 """

503 Check if a given key needs i8 conversion. Conversion is necessary for

504 Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An

505 Interval-like requires conversion if its endpoints are one of the

506 aforementioned types.

507

508 Assumes that any list-like data has already been cast to an Index.

509

510 Parameters

511 ----------

512 key : scalar or Index-like

513 The key that should be checked for i8 conversion

514

515 Returns

516 -------

517 bool

518 """

519 key_dtype = getattr(key, "dtype", None)

520 if isinstance(key_dtype, IntervalDtype) or isinstance(key, Interval):

521 return self._needs_i8_conversion(key.left)

522

523 i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex)

524 return isinstance(key, i8_types)

525

526 def _maybe_convert_i8(self, key):

527 """

528 Maybe convert a given key to its equivalent i8 value(s). Used as a

529 preprocessing step prior to IntervalTree queries (self._engine), which

530 expects numeric data.

531

532 Parameters

533 ----------

534 key : scalar or list-like

535 The key that should maybe be converted to i8.

536

537 Returns

538 -------

539 scalar or list-like

540 The original key if no conversion occurred, int if converted scalar,

541 Index with an int64 dtype if converted list-like.

542 """

543 if is_list_like(key):

544 key = ensure_index(key)

545 key = maybe_upcast_numeric_to_64bit(key)

546

547 if not self._needs_i8_conversion(key):

548 return key

549

550 scalar = is_scalar(key)

551 key_dtype = getattr(key, "dtype", None)

552 if isinstance(key_dtype, IntervalDtype) or isinstance(key, Interval):

553 # convert left/right and reconstruct

554 left = self._maybe_convert_i8(key.left)

555 right = self._maybe_convert_i8(key.right)

556 constructor = Interval if scalar else IntervalIndex.from_arrays

557 # error: "object" not callable

558 return constructor(

559 left, right, closed=self.closed

560 ) # type: ignore[operator]

561

562 if scalar:

563 # Timestamp/Timedelta

564 key_dtype, key_i8 = infer_dtype_from_scalar(key)

565 if isinstance(key, Period):

566 key_i8 = key.ordinal

567 elif isinstance(key_i8, Timestamp):

568 key_i8 = key_i8._value

569 elif isinstance(key_i8, (np.datetime64, np.timedelta64)):

570 key_i8 = key_i8.view("i8")

571 else:

572 # DatetimeIndex/TimedeltaIndex

573 key_dtype, key_i8 = key.dtype, Index(key.asi8)

574 if key.hasnans:

575 # convert NaT from its i8 value to np.nan so it's not viewed

576 # as a valid value, maybe causing errors (e.g. is_overlapping)

577 key_i8 = key_i8.where(~key._isnan)

578

579 # ensure consistency with IntervalIndex subtype

580 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],

581 # ExtensionDtype]" has no attribute "subtype"

582 subtype = self.dtype.subtype # type: ignore[union-attr]

583

584 if subtype != key_dtype:

585 raise ValueError(

586 f"Cannot index an IntervalIndex of subtype {subtype} with "

587 f"values of dtype {key_dtype}"

588 )

589

590 return key_i8

591

592 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):

593 if not self.is_non_overlapping_monotonic:

594 raise KeyError(

595 "can only get slices from an IntervalIndex if bounds are "

596 "non-overlapping and all monotonic increasing or decreasing"

597 )

598

599 if isinstance(label, (IntervalMixin, IntervalIndex)):

600 raise NotImplementedError("Interval objects are not currently supported")

601

602 # GH 20921: "not is_monotonic_increasing" for the second condition

603 # instead of "is_monotonic_decreasing" to account for single element

604 # indexes being both increasing and decreasing

605 if (side == "left" and self.left.is_monotonic_increasing) or (

606 side == "right" and not self.left.is_monotonic_increasing

607 ):

608 sub_idx = self.right

609 if self.open_right:

610 label = _get_next_label(label)

611 else:

612 sub_idx = self.left

613 if self.open_left:

614 label = _get_prev_label(label)

615

616 return sub_idx._searchsorted_monotonic(label, side)

617

618 # --------------------------------------------------------------------

619 # Indexing Methods

620

621 def get_loc(self, key) -> int | slice | np.ndarray:

622 """

623 Get integer location, slice or boolean mask for requested label.

624

625 Parameters

626 ----------

627 key : label

628

629 Returns

630 -------

631 int if unique index, slice if monotonic index, else mask

632

633 Examples

634 --------

635 >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)

636 >>> index = pd.IntervalIndex([i1, i2])

637 >>> index.get_loc(1)

638 0

639

640 You can also supply a point inside an interval.

641

642 >>> index.get_loc(1.5)

643 1

644

645 If a label is in several intervals, you get the locations of all the

646 relevant intervals.

647

648 >>> i3 = pd.Interval(0, 2)

649 >>> overlapping_index = pd.IntervalIndex([i1, i2, i3])

650 >>> overlapping_index.get_loc(0.5)

651 array([ True, False, True])

652

653 Only exact matches will be returned if an interval is provided.

654

655 >>> index.get_loc(pd.Interval(0, 1))

656 0

657 """

658 self._check_indexing_error(key)

659

660 if isinstance(key, Interval):

661 if self.closed != key.closed:

662 raise KeyError(key)

663 mask = (self.left == key.left) & (self.right == key.right)

664 elif is_valid_na_for_dtype(key, self.dtype):

665 mask = self.isna()

666 else:

667 # assume scalar

668 op_left = le if self.closed_left else lt

669 op_right = le if self.closed_right else lt

670 try:

671 mask = op_left(self.left, key) & op_right(key, self.right)

672 except TypeError as err:

673 # scalar is not comparable to II subtype --> invalid label

674 raise KeyError(key) from err

675

676 matches = mask.sum()

677 if matches == 0:

678 raise KeyError(key)

679 if matches == 1:

680 return mask.argmax()

681

682 res = lib.maybe_booleans_to_slice(mask.view("u1"))

683 if isinstance(res, slice) and res.stop is None:

684 # TODO: DO this in maybe_booleans_to_slice?

685 res = slice(res.start, len(self), res.step)

686 return res

687

688 def _get_indexer(

689 self,

690 target: Index,

691 method: str | None = None,

692 limit: int | None = None,

693 tolerance: Any | None = None,

694 ) -> npt.NDArray[np.intp]:

695 if isinstance(target, IntervalIndex):

696 # We only get here with not self.is_overlapping

697 # -> at most one match per interval in target

698 # want exact matches -> need both left/right to match, so defer to

699 # left/right get_indexer, compare elementwise, equality -> match

700 indexer = self._get_indexer_unique_sides(target)

701

702 elif not is_object_dtype(target.dtype):

703 # homogeneous scalar index: use IntervalTree

704 # we should always have self._should_partial_index(target) here

705 target = self._maybe_convert_i8(target)

706 indexer = self._engine.get_indexer(target.values)

707 else:

708 # heterogeneous scalar index: defer elementwise to get_loc

709 # we should always have self._should_partial_index(target) here

710 return self._get_indexer_pointwise(target)[0]

711

712 return ensure_platform_int(indexer)

713

714 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)

715 def get_indexer_non_unique(

716 self, target: Index

717 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

718 target = ensure_index(target)

719

720 if not self._should_compare(target) and not self._should_partial_index(target):

721 # e.g. IntervalIndex with different closed or incompatible subtype

722 # -> no matches

723 return self._get_indexer_non_comparable(target, None, unique=False)

724

725 elif isinstance(target, IntervalIndex):

726 if self.left.is_unique and self.right.is_unique:

727 # fastpath available even if we don't have self._index_as_unique

728 indexer = self._get_indexer_unique_sides(target)

729 missing = (indexer == -1).nonzero()[0]

730 else:

731 return self._get_indexer_pointwise(target)

732

733 elif is_object_dtype(target.dtype) or not self._should_partial_index(target):

734 # target might contain intervals: defer elementwise to get_loc

735 return self._get_indexer_pointwise(target)

736

737 else:

738 # Note: this case behaves differently from other Index subclasses

739 # because IntervalIndex does partial-int indexing

740 target = self._maybe_convert_i8(target)

741 indexer, missing = self._engine.get_indexer_non_unique(target.values)

742

743 return ensure_platform_int(indexer), ensure_platform_int(missing)

744

745 def _get_indexer_unique_sides(self, target: IntervalIndex) -> npt.NDArray[np.intp]:

746 """

747 _get_indexer specialized to the case where both of our sides are unique.

748 """

749 # Caller is responsible for checking

750 # `self.left.is_unique and self.right.is_unique`

751

752 left_indexer = self.left.get_indexer(target.left)

753 right_indexer = self.right.get_indexer(target.right)

754 indexer = np.where(left_indexer == right_indexer, left_indexer, -1)

755 return indexer

756

757 def _get_indexer_pointwise(

758 self, target: Index

759 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

760 """

761 pointwise implementation for get_indexer and get_indexer_non_unique.

762 """

763 indexer, missing = [], []

764 for i, key in enumerate(target):

765 try:

766 locs = self.get_loc(key)

767 if isinstance(locs, slice):

768 # Only needed for get_indexer_non_unique

769 locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp")

770 elif lib.is_integer(locs):

771 locs = np.array(locs, ndmin=1)

772 else:

773 # otherwise we have ndarray[bool]

774 locs = np.where(locs)[0]

775 except KeyError:

776 missing.append(i)

777 locs = np.array([-1])

778 except InvalidIndexError:

779 # i.e. non-scalar key e.g. a tuple.

780 # see test_append_different_columns_types_raises

781 missing.append(i)

782 locs = np.array([-1])

783

784 indexer.append(locs)

785

786 indexer = np.concatenate(indexer)

787 return ensure_platform_int(indexer), ensure_platform_int(missing)

788

789 @cache_readonly

790 def _index_as_unique(self) -> bool:

791 return not self.is_overlapping and self._engine._na_count < 2

792

793 _requires_unique_msg = (

794 "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"

795 )

796

797 def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]):

798 if not (key.step is None or key.step == 1):

799 # GH#31658 if label-based, we require step == 1,

800 # if positional, we disallow float start/stop

801 msg = "label-based slicing with step!=1 is not supported for IntervalIndex"

802 if kind == "loc":

803 raise ValueError(msg)

804 if kind == "getitem":

805 if not is_valid_positional_slice(key):

806 # i.e. this cannot be interpreted as a positional slice

807 raise ValueError(msg)

808

809 return super()._convert_slice_indexer(key, kind)

810

811 @cache_readonly

812 def _should_fallback_to_positional(self) -> bool:

813 # integer lookups in Series.__getitem__ are unambiguously

814 # positional in this case

815 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],

816 # ExtensionDtype]" has no attribute "subtype"

817 return self.dtype.subtype.kind in "mM" # type: ignore[union-attr]

818

819 def _maybe_cast_slice_bound(self, label, side: str):

820 return getattr(self, side)._maybe_cast_slice_bound(label, side)

821

822 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

823 if not isinstance(dtype, IntervalDtype):

824 return False

825 common_subtype = find_common_type([self.dtype, dtype])

826 return not is_object_dtype(common_subtype)

827

828 # --------------------------------------------------------------------

829

830 @cache_readonly

831 def left(self) -> Index:

832 return Index(self._data.left, copy=False)

833

834 @cache_readonly

835 def right(self) -> Index:

836 return Index(self._data.right, copy=False)

837

838 @cache_readonly

839 def mid(self) -> Index:

840 return Index(self._data.mid, copy=False)

841

842 @property

843 def length(self) -> Index:

844 return Index(self._data.length, copy=False)

845

846 # --------------------------------------------------------------------

847 # Set Operations

848

849 def _intersection(self, other, sort):

850 """

851 intersection specialized to the case with matching dtypes.

852 """

853 # For IntervalIndex we also know other.closed == self.closed

854 if self.left.is_unique and self.right.is_unique:

855 taken = self._intersection_unique(other)

856 elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1:

857 # Swap other/self if other is unique and self does not have

858 # multiple NaNs

859 taken = other._intersection_unique(self)

860 else:

861 # duplicates

862 taken = self._intersection_non_unique(other)

863

864 if sort is None:

865 taken = taken.sort_values()

866

867 return taken

868

869 def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex:

870 """

871 Used when the IntervalIndex does not have any common endpoint,

872 no matter left or right.

873 Return the intersection with another IntervalIndex.

874 Parameters

875 ----------

876 other : IntervalIndex

877 Returns

878 -------

879 IntervalIndex

880 """

881 # Note: this is much more performant than super()._intersection(other)

882 lindexer = self.left.get_indexer(other.left)

883 rindexer = self.right.get_indexer(other.right)

884

885 match = (lindexer == rindexer) & (lindexer != -1)

886 indexer = lindexer.take(match.nonzero()[0])

887 indexer = unique(indexer)

888

889 return self.take(indexer)

890

891 def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:

892 """

893 Used when the IntervalIndex does have some common endpoints,

894 on either sides.

895 Return the intersection with another IntervalIndex.

896

897 Parameters

898 ----------

899 other : IntervalIndex

900

901 Returns

902 -------

903 IntervalIndex

904 """

905 # Note: this is about 3.25x faster than super()._intersection(other)

906 # in IntervalIndexMethod.time_intersection_both_duplicate(1000)

907 mask = np.zeros(len(self), dtype=bool)

908

909 if self.hasnans and other.hasnans:

910 first_nan_loc = np.arange(len(self))[self.isna()][0]

911 mask[first_nan_loc] = True

912

913 other_tups = set(zip(other.left, other.right))

914 for i, tup in enumerate(zip(self.left, self.right)):

915 if tup in other_tups:

916 mask[i] = True

917

918 return self[mask]

919

920 # --------------------------------------------------------------------

921

922 def _get_engine_target(self) -> np.ndarray:

923 # Note: we _could_ use libjoin functions by either casting to object

924 # dtype or constructing tuples (faster than constructing Intervals)

925 # but the libjoin fastpaths are no longer fast in these cases.

926 raise NotImplementedError(

927 "IntervalIndex does not use libjoin fastpaths or pass values to "

928 "IndexEngine objects"

929 )

930

931 def _from_join_target(self, result):

932 raise NotImplementedError("IntervalIndex does not use libjoin fastpaths")

933

934 # TODO: arithmetic operations

935

936

937def _is_valid_endpoint(endpoint) -> bool:

938 """

939 Helper for interval_range to check if start/end are valid types.

940 """

941 return any(

942 [

943 is_number(endpoint),

944 isinstance(endpoint, Timestamp),

945 isinstance(endpoint, Timedelta),

946 endpoint is None,

947 ]

948 )

949

950

951def _is_type_compatible(a, b) -> bool:

952 """

953 Helper for interval_range to check type compat of start/end/freq.

954 """

955 is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset))

956 is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset))

957 return (

958 (is_number(a) and is_number(b))

959 or (is_ts_compat(a) and is_ts_compat(b))

960 or (is_td_compat(a) and is_td_compat(b))

961 or com.any_none(a, b)

962 )

963

964

965def interval_range(

966 start=None,

967 end=None,

968 periods=None,

969 freq=None,

970 name: Hashable | None = None,

971 closed: IntervalClosedType = "right",

972) -> IntervalIndex:

973 """

974 Return a fixed frequency IntervalIndex.

975

976 Parameters

977 ----------

978 start : numeric or datetime-like, default None

979 Left bound for generating intervals.

980 end : numeric or datetime-like, default None

981 Right bound for generating intervals.

982 periods : int, default None

983 Number of periods to generate.

984 freq : numeric, str, Timedelta, datetime.timedelta, or DateOffset, default None

985 The length of each interval. Must be consistent with the type of start

986 and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1

987 for numeric and 'D' for datetime-like.

988 name : str, default None

989 Name of the resulting IntervalIndex.

990 closed : {'left', 'right', 'both', 'neither'}, default 'right'

991 Whether the intervals are closed on the left-side, right-side, both

992 or neither.

993

994 Returns

995 -------

996 IntervalIndex

997

998 See Also

999 --------

1000 IntervalIndex : An Index of intervals that are all closed on the same side.

1001

1002 Notes

1003 -----

1004 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,

1005 exactly three must be specified. If ``freq`` is omitted, the resulting

1006 ``IntervalIndex`` will have ``periods`` linearly spaced elements between

1007 ``start`` and ``end``, inclusively.

1008

1009 To learn more about datetime-like frequency strings, please see `this link

1010 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

1011

1012 Examples

1013 --------

1014 Numeric ``start`` and ``end`` is supported.

1015

1016 >>> pd.interval_range(start=0, end=5)

1017 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],

1018 dtype='interval[int64, right]')

1019

1020 Additionally, datetime-like input is also supported.

1021

1022 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),

1023 ... end=pd.Timestamp('2017-01-04'))

1024 IntervalIndex([(2017-01-01 00:00:00, 2017-01-02 00:00:00],

1025 (2017-01-02 00:00:00, 2017-01-03 00:00:00],

1026 (2017-01-03 00:00:00, 2017-01-04 00:00:00]],

1027 dtype='interval[datetime64[ns], right]')

1028

1029 The ``freq`` parameter specifies the frequency between the left and right.

1030 endpoints of the individual intervals within the ``IntervalIndex``. For

1031 numeric ``start`` and ``end``, the frequency must also be numeric.

1032

1033 >>> pd.interval_range(start=0, periods=4, freq=1.5)

1034 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],

1035 dtype='interval[float64, right]')

1036

1037 Similarly, for datetime-like ``start`` and ``end``, the frequency must be

1038 convertible to a DateOffset.

1039

1040 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),

1041 ... periods=3, freq='MS')

1042 IntervalIndex([(2017-01-01 00:00:00, 2017-02-01 00:00:00],

1043 (2017-02-01 00:00:00, 2017-03-01 00:00:00],

1044 (2017-03-01 00:00:00, 2017-04-01 00:00:00]],

1045 dtype='interval[datetime64[ns], right]')

1046

1047 Specify ``start``, ``end``, and ``periods``; the frequency is generated

1048 automatically (linearly spaced).

1049

1050 >>> pd.interval_range(start=0, end=6, periods=4)

1051 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],

1052 dtype='interval[float64, right]')

1053

1054 The ``closed`` parameter specifies which endpoints of the individual

1055 intervals within the ``IntervalIndex`` are closed.

1056

1057 >>> pd.interval_range(end=5, periods=4, closed='both')

1058 IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],

1059 dtype='interval[int64, both]')

1060 """

1061 start = maybe_box_datetimelike(start)

1062 end = maybe_box_datetimelike(end)

1063 endpoint = start if start is not None else end

1064

1065 if freq is None and com.any_none(periods, start, end):

1066 freq = 1 if is_number(endpoint) else "D"

1067

1068 if com.count_not_none(start, end, periods, freq) != 3:

1069 raise ValueError(

1070 "Of the four parameters: start, end, periods, and "

1071 "freq, exactly three must be specified"

1072 )

1073

1074 if not _is_valid_endpoint(start):

1075 raise ValueError(f"start must be numeric or datetime-like, got {start}")

1076 if not _is_valid_endpoint(end):

1077 raise ValueError(f"end must be numeric or datetime-like, got {end}")

1078

1079 periods = validate_periods(periods)

1080

1081 if freq is not None and not is_number(freq):

1082 try:

1083 freq = to_offset(freq)

1084 except ValueError as err:

1085 raise ValueError(

1086 f"freq must be numeric or convertible to DateOffset, got {freq}"

1087 ) from err

1088

1089 # verify type compatibility

1090 if not all(

1091 [

1092 _is_type_compatible(start, end),

1093 _is_type_compatible(start, freq),

1094 _is_type_compatible(end, freq),

1095 ]

1096 ):

1097 raise TypeError("start, end, freq need to be type compatible")

1098

1099 # +1 to convert interval count to breaks count (n breaks = n-1 intervals)

1100 if periods is not None:

1101 periods += 1

1102

1103 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex

1104

1105 if is_number(endpoint):

1106 if com.all_not_none(start, end, freq):

1107 # 0.1 ensures we capture end

1108 breaks = np.arange(start, end + (freq * 0.1), freq)

1109 else:

1110 # compute the period/start/end if unspecified (at most one)

1111 if periods is None:

1112 periods = int((end - start) // freq) + 1

1113 elif start is None:

1114 start = end - (periods - 1) * freq

1115 elif end is None:

1116 end = start + (periods - 1) * freq

1117

1118 breaks = np.linspace(start, end, periods)

1119 if all(is_integer(x) for x in com.not_none(start, end, freq)):

1120 # np.linspace always produces float output

1121

1122 # error: Argument 1 to "maybe_downcast_numeric" has incompatible type

1123 # "Union[ndarray[Any, Any], TimedeltaIndex, DatetimeIndex]";

1124 # expected "ndarray[Any, Any]" [

1125 breaks = maybe_downcast_numeric(

1126 breaks, # type: ignore[arg-type]

1127 np.dtype("int64"),

1128 )

1129 else:

1130 # delegate to the appropriate range function

1131 if isinstance(endpoint, Timestamp):

1132 breaks = date_range(start=start, end=end, periods=periods, freq=freq)

1133 else:

1134 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)

1135

1136 return IntervalIndex.from_breaks(breaks, name=name, closed=closed)