Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/interval.py: 29%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

377 statements  

1""" define the IntervalIndex """ 

2from __future__ import annotations 

3 

4from operator import ( 

5 le, 

6 lt, 

7) 

8import textwrap 

9from typing import ( 

10 TYPE_CHECKING, 

11 Any, 

12 Literal, 

13) 

14 

15import numpy as np 

16 

17from pandas._libs import lib 

18from pandas._libs.interval import ( 

19 Interval, 

20 IntervalMixin, 

21 IntervalTree, 

22) 

23from pandas._libs.tslibs import ( 

24 BaseOffset, 

25 Period, 

26 Timedelta, 

27 Timestamp, 

28 to_offset, 

29) 

30from pandas.errors import InvalidIndexError 

31from pandas.util._decorators import ( 

32 Appender, 

33 cache_readonly, 

34) 

35from pandas.util._exceptions import rewrite_exception 

36 

37from pandas.core.dtypes.cast import ( 

38 find_common_type, 

39 infer_dtype_from_scalar, 

40 maybe_box_datetimelike, 

41 maybe_downcast_numeric, 

42 maybe_upcast_numeric_to_64bit, 

43) 

44from pandas.core.dtypes.common import ( 

45 ensure_platform_int, 

46 is_float_dtype, 

47 is_integer, 

48 is_integer_dtype, 

49 is_list_like, 

50 is_number, 

51 is_object_dtype, 

52 is_scalar, 

53 pandas_dtype, 

54) 

55from pandas.core.dtypes.dtypes import ( 

56 DatetimeTZDtype, 

57 IntervalDtype, 

58) 

59from pandas.core.dtypes.missing import is_valid_na_for_dtype 

60 

61from pandas.core.algorithms import unique 

62from pandas.core.arrays.datetimelike import validate_periods 

63from pandas.core.arrays.interval import ( 

64 IntervalArray, 

65 _interval_shared_docs, 

66) 

67import pandas.core.common as com 

68from pandas.core.indexers import is_valid_positional_slice 

69import pandas.core.indexes.base as ibase 

70from pandas.core.indexes.base import ( 

71 Index, 

72 _index_shared_docs, 

73 ensure_index, 

74 maybe_extract_name, 

75) 

76from pandas.core.indexes.datetimes import ( 

77 DatetimeIndex, 

78 date_range, 

79) 

80from pandas.core.indexes.extension import ( 

81 ExtensionIndex, 

82 inherit_names, 

83) 

84from pandas.core.indexes.multi import MultiIndex 

85from pandas.core.indexes.timedeltas import ( 

86 TimedeltaIndex, 

87 timedelta_range, 

88) 

89 

90if TYPE_CHECKING: 

91 from collections.abc import Hashable 

92 

93 from pandas._typing import ( 

94 Dtype, 

95 DtypeObj, 

96 IntervalClosedType, 

97 Self, 

98 npt, 

99 ) 

100_index_doc_kwargs = dict(ibase._index_doc_kwargs) 

101 

102_index_doc_kwargs.update( 

103 { 

104 "klass": "IntervalIndex", 

105 "qualname": "IntervalIndex", 

106 "target_klass": "IntervalIndex or list of Intervals", 

107 "name": textwrap.dedent( 

108 """\ 

109 name : object, optional 

110 Name to be stored in the index. 

111 """ 

112 ), 

113 } 

114) 

115 

116 

117def _get_next_label(label): 

118 # see test_slice_locs_with_ints_and_floats_succeeds 

119 dtype = getattr(label, "dtype", type(label)) 

120 if isinstance(label, (Timestamp, Timedelta)): 

121 dtype = "datetime64[ns]" 

122 dtype = pandas_dtype(dtype) 

123 

124 if lib.is_np_dtype(dtype, "mM") or isinstance(dtype, DatetimeTZDtype): 

125 return label + np.timedelta64(1, "ns") 

126 elif is_integer_dtype(dtype): 

127 return label + 1 

128 elif is_float_dtype(dtype): 

129 return np.nextafter(label, np.inf) 

130 else: 

131 raise TypeError(f"cannot determine next label for type {repr(type(label))}") 

132 

133 

134def _get_prev_label(label): 

135 # see test_slice_locs_with_ints_and_floats_succeeds 

136 dtype = getattr(label, "dtype", type(label)) 

137 if isinstance(label, (Timestamp, Timedelta)): 

138 dtype = "datetime64[ns]" 

139 dtype = pandas_dtype(dtype) 

140 

141 if lib.is_np_dtype(dtype, "mM") or isinstance(dtype, DatetimeTZDtype): 

142 return label - np.timedelta64(1, "ns") 

143 elif is_integer_dtype(dtype): 

144 return label - 1 

145 elif is_float_dtype(dtype): 

146 return np.nextafter(label, -np.inf) 

147 else: 

148 raise TypeError(f"cannot determine next label for type {repr(type(label))}") 

149 

150 

151def _new_IntervalIndex(cls, d): 

152 """ 

153 This is called upon unpickling, rather than the default which doesn't have 

154 arguments and breaks __new__. 

155 """ 

156 return cls.from_arrays(**d) 

157 

158 

159@Appender( 

160 _interval_shared_docs["class"] 

161 % { 

162 "klass": "IntervalIndex", 

163 "summary": "Immutable index of intervals that are closed on the same side.", 

164 "name": _index_doc_kwargs["name"], 

165 "extra_attributes": "is_overlapping\nvalues\n", 

166 "extra_methods": "", 

167 "examples": textwrap.dedent( 

168 """\ 

169 Examples 

170 -------- 

171 A new ``IntervalIndex`` is typically constructed using 

172 :func:`interval_range`: 

173 

174 >>> pd.interval_range(start=0, end=5) 

175 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], 

176 dtype='interval[int64, right]') 

177 

178 It may also be constructed using one of the constructor 

179 methods: :meth:`IntervalIndex.from_arrays`, 

180 :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. 

181 

182 See further examples in the doc strings of ``interval_range`` and the 

183 mentioned constructor methods. 

184 """ 

185 ), 

186 } 

187) 

188@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True) 

189@inherit_names( 

190 [ 

191 "__array__", 

192 "overlaps", 

193 "contains", 

194 "closed_left", 

195 "closed_right", 

196 "open_left", 

197 "open_right", 

198 "is_empty", 

199 ], 

200 IntervalArray, 

201) 

202@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True) 

203class IntervalIndex(ExtensionIndex): 

204 _typ = "intervalindex" 

205 

206 # annotate properties pinned via inherit_names 

207 closed: IntervalClosedType 

208 is_non_overlapping_monotonic: bool 

209 closed_left: bool 

210 closed_right: bool 

211 open_left: bool 

212 open_right: bool 

213 

214 _data: IntervalArray 

215 _values: IntervalArray 

216 _can_hold_strings = False 

217 _data_cls = IntervalArray 

218 

219 # -------------------------------------------------------------------- 

220 # Constructors 

221 

222 def __new__( 

223 cls, 

224 data, 

225 closed: IntervalClosedType | None = None, 

226 dtype: Dtype | None = None, 

227 copy: bool = False, 

228 name: Hashable | None = None, 

229 verify_integrity: bool = True, 

230 ) -> Self: 

231 name = maybe_extract_name(name, data, cls) 

232 

233 with rewrite_exception("IntervalArray", cls.__name__): 

234 array = IntervalArray( 

235 data, 

236 closed=closed, 

237 copy=copy, 

238 dtype=dtype, 

239 verify_integrity=verify_integrity, 

240 ) 

241 

242 return cls._simple_new(array, name) 

243 

244 @classmethod 

245 @Appender( 

246 _interval_shared_docs["from_breaks"] 

247 % { 

248 "klass": "IntervalIndex", 

249 "name": textwrap.dedent( 

250 """ 

251 name : str, optional 

252 Name of the resulting IntervalIndex.""" 

253 ), 

254 "examples": textwrap.dedent( 

255 """\ 

256 Examples 

257 -------- 

258 >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) 

259 IntervalIndex([(0, 1], (1, 2], (2, 3]], 

260 dtype='interval[int64, right]') 

261 """ 

262 ), 

263 } 

264 ) 

265 def from_breaks( 

266 cls, 

267 breaks, 

268 closed: IntervalClosedType | None = "right", 

269 name: Hashable | None = None, 

270 copy: bool = False, 

271 dtype: Dtype | None = None, 

272 ) -> IntervalIndex: 

273 with rewrite_exception("IntervalArray", cls.__name__): 

274 array = IntervalArray.from_breaks( 

275 breaks, closed=closed, copy=copy, dtype=dtype 

276 ) 

277 return cls._simple_new(array, name=name) 

278 

279 @classmethod 

280 @Appender( 

281 _interval_shared_docs["from_arrays"] 

282 % { 

283 "klass": "IntervalIndex", 

284 "name": textwrap.dedent( 

285 """ 

286 name : str, optional 

287 Name of the resulting IntervalIndex.""" 

288 ), 

289 "examples": textwrap.dedent( 

290 """\ 

291 Examples 

292 -------- 

293 >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) 

294 IntervalIndex([(0, 1], (1, 2], (2, 3]], 

295 dtype='interval[int64, right]') 

296 """ 

297 ), 

298 } 

299 ) 

300 def from_arrays( 

301 cls, 

302 left, 

303 right, 

304 closed: IntervalClosedType = "right", 

305 name: Hashable | None = None, 

306 copy: bool = False, 

307 dtype: Dtype | None = None, 

308 ) -> IntervalIndex: 

309 with rewrite_exception("IntervalArray", cls.__name__): 

310 array = IntervalArray.from_arrays( 

311 left, right, closed, copy=copy, dtype=dtype 

312 ) 

313 return cls._simple_new(array, name=name) 

314 

315 @classmethod 

316 @Appender( 

317 _interval_shared_docs["from_tuples"] 

318 % { 

319 "klass": "IntervalIndex", 

320 "name": textwrap.dedent( 

321 """ 

322 name : str, optional 

323 Name of the resulting IntervalIndex.""" 

324 ), 

325 "examples": textwrap.dedent( 

326 """\ 

327 Examples 

328 -------- 

329 >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) 

330 IntervalIndex([(0, 1], (1, 2]], 

331 dtype='interval[int64, right]') 

332 """ 

333 ), 

334 } 

335 ) 

336 def from_tuples( 

337 cls, 

338 data, 

339 closed: IntervalClosedType = "right", 

340 name: Hashable | None = None, 

341 copy: bool = False, 

342 dtype: Dtype | None = None, 

343 ) -> IntervalIndex: 

344 with rewrite_exception("IntervalArray", cls.__name__): 

345 arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) 

346 return cls._simple_new(arr, name=name) 

347 

348 # -------------------------------------------------------------------- 

349 # error: Return type "IntervalTree" of "_engine" incompatible with return type 

350 # "Union[IndexEngine, ExtensionEngine]" in supertype "Index" 

351 @cache_readonly 

352 def _engine(self) -> IntervalTree: # type: ignore[override] 

353 # IntervalTree does not supports numpy array unless they are 64 bit 

354 left = self._maybe_convert_i8(self.left) 

355 left = maybe_upcast_numeric_to_64bit(left) 

356 right = self._maybe_convert_i8(self.right) 

357 right = maybe_upcast_numeric_to_64bit(right) 

358 return IntervalTree(left, right, closed=self.closed) 

359 

360 def __contains__(self, key: Any) -> bool: 

361 """ 

362 return a boolean if this key is IN the index 

363 We *only* accept an Interval 

364 

365 Parameters 

366 ---------- 

367 key : Interval 

368 

369 Returns 

370 ------- 

371 bool 

372 """ 

373 hash(key) 

374 if not isinstance(key, Interval): 

375 if is_valid_na_for_dtype(key, self.dtype): 

376 return self.hasnans 

377 return False 

378 

379 try: 

380 self.get_loc(key) 

381 return True 

382 except KeyError: 

383 return False 

384 

385 def _getitem_slice(self, slobj: slice) -> IntervalIndex: 

386 """ 

387 Fastpath for __getitem__ when we know we have a slice. 

388 """ 

389 res = self._data[slobj] 

390 return type(self)._simple_new(res, name=self._name) 

391 

392 @cache_readonly 

393 def _multiindex(self) -> MultiIndex: 

394 return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"]) 

395 

396 def __reduce__(self): 

397 d = { 

398 "left": self.left, 

399 "right": self.right, 

400 "closed": self.closed, 

401 "name": self.name, 

402 } 

403 return _new_IntervalIndex, (type(self), d), None 

404 

405 @property 

406 def inferred_type(self) -> str: 

407 """Return a string of the type inferred from the values""" 

408 return "interval" 

409 

410 # Cannot determine type of "memory_usage" 

411 @Appender(Index.memory_usage.__doc__) # type: ignore[has-type] 

412 def memory_usage(self, deep: bool = False) -> int: 

413 # we don't use an explicit engine 

414 # so return the bytes here 

415 return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) 

416 

417 # IntervalTree doesn't have a is_monotonic_decreasing, so have to override 

418 # the Index implementation 

419 @cache_readonly 

420 def is_monotonic_decreasing(self) -> bool: 

421 """ 

422 Return True if the IntervalIndex is monotonic decreasing (only equal or 

423 decreasing values), else False 

424 """ 

425 return self[::-1].is_monotonic_increasing 

426 

427 @cache_readonly 

428 def is_unique(self) -> bool: 

429 """ 

430 Return True if the IntervalIndex contains unique elements, else False. 

431 """ 

432 left = self.left 

433 right = self.right 

434 

435 if self.isna().sum() > 1: 

436 return False 

437 

438 if left.is_unique or right.is_unique: 

439 return True 

440 

441 seen_pairs = set() 

442 check_idx = np.where(left.duplicated(keep=False))[0] 

443 for idx in check_idx: 

444 pair = (left[idx], right[idx]) 

445 if pair in seen_pairs: 

446 return False 

447 seen_pairs.add(pair) 

448 

449 return True 

450 

451 @property 

452 def is_overlapping(self) -> bool: 

453 """ 

454 Return True if the IntervalIndex has overlapping intervals, else False. 

455 

456 Two intervals overlap if they share a common point, including closed 

457 endpoints. Intervals that only have an open endpoint in common do not 

458 overlap. 

459 

460 Returns 

461 ------- 

462 bool 

463 Boolean indicating if the IntervalIndex has overlapping intervals. 

464 

465 See Also 

466 -------- 

467 Interval.overlaps : Check whether two Interval objects overlap. 

468 IntervalIndex.overlaps : Check an IntervalIndex elementwise for 

469 overlaps. 

470 

471 Examples 

472 -------- 

473 >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)]) 

474 >>> index 

475 IntervalIndex([(0, 2], (1, 3], (4, 5]], 

476 dtype='interval[int64, right]') 

477 >>> index.is_overlapping 

478 True 

479 

480 Intervals that share closed endpoints overlap: 

481 

482 >>> index = pd.interval_range(0, 3, closed='both') 

483 >>> index 

484 IntervalIndex([[0, 1], [1, 2], [2, 3]], 

485 dtype='interval[int64, both]') 

486 >>> index.is_overlapping 

487 True 

488 

489 Intervals that only have an open endpoint in common do not overlap: 

490 

491 >>> index = pd.interval_range(0, 3, closed='left') 

492 >>> index 

493 IntervalIndex([[0, 1), [1, 2), [2, 3)], 

494 dtype='interval[int64, left]') 

495 >>> index.is_overlapping 

496 False 

497 """ 

498 # GH 23309 

499 return self._engine.is_overlapping 

500 

501 def _needs_i8_conversion(self, key) -> bool: 

502 """ 

503 Check if a given key needs i8 conversion. Conversion is necessary for 

504 Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An 

505 Interval-like requires conversion if its endpoints are one of the 

506 aforementioned types. 

507 

508 Assumes that any list-like data has already been cast to an Index. 

509 

510 Parameters 

511 ---------- 

512 key : scalar or Index-like 

513 The key that should be checked for i8 conversion 

514 

515 Returns 

516 ------- 

517 bool 

518 """ 

519 key_dtype = getattr(key, "dtype", None) 

520 if isinstance(key_dtype, IntervalDtype) or isinstance(key, Interval): 

521 return self._needs_i8_conversion(key.left) 

522 

523 i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex) 

524 return isinstance(key, i8_types) 

525 

526 def _maybe_convert_i8(self, key): 

527 """ 

528 Maybe convert a given key to its equivalent i8 value(s). Used as a 

529 preprocessing step prior to IntervalTree queries (self._engine), which 

530 expects numeric data. 

531 

532 Parameters 

533 ---------- 

534 key : scalar or list-like 

535 The key that should maybe be converted to i8. 

536 

537 Returns 

538 ------- 

539 scalar or list-like 

540 The original key if no conversion occurred, int if converted scalar, 

541 Index with an int64 dtype if converted list-like. 

542 """ 

543 if is_list_like(key): 

544 key = ensure_index(key) 

545 key = maybe_upcast_numeric_to_64bit(key) 

546 

547 if not self._needs_i8_conversion(key): 

548 return key 

549 

550 scalar = is_scalar(key) 

551 key_dtype = getattr(key, "dtype", None) 

552 if isinstance(key_dtype, IntervalDtype) or isinstance(key, Interval): 

553 # convert left/right and reconstruct 

554 left = self._maybe_convert_i8(key.left) 

555 right = self._maybe_convert_i8(key.right) 

556 constructor = Interval if scalar else IntervalIndex.from_arrays 

557 # error: "object" not callable 

558 return constructor( 

559 left, right, closed=self.closed 

560 ) # type: ignore[operator] 

561 

562 if scalar: 

563 # Timestamp/Timedelta 

564 key_dtype, key_i8 = infer_dtype_from_scalar(key) 

565 if isinstance(key, Period): 

566 key_i8 = key.ordinal 

567 elif isinstance(key_i8, Timestamp): 

568 key_i8 = key_i8._value 

569 elif isinstance(key_i8, (np.datetime64, np.timedelta64)): 

570 key_i8 = key_i8.view("i8") 

571 else: 

572 # DatetimeIndex/TimedeltaIndex 

573 key_dtype, key_i8 = key.dtype, Index(key.asi8) 

574 if key.hasnans: 

575 # convert NaT from its i8 value to np.nan so it's not viewed 

576 # as a valid value, maybe causing errors (e.g. is_overlapping) 

577 key_i8 = key_i8.where(~key._isnan) 

578 

579 # ensure consistency with IntervalIndex subtype 

580 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], 

581 # ExtensionDtype]" has no attribute "subtype" 

582 subtype = self.dtype.subtype # type: ignore[union-attr] 

583 

584 if subtype != key_dtype: 

585 raise ValueError( 

586 f"Cannot index an IntervalIndex of subtype {subtype} with " 

587 f"values of dtype {key_dtype}" 

588 ) 

589 

590 return key_i8 

591 

592 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): 

593 if not self.is_non_overlapping_monotonic: 

594 raise KeyError( 

595 "can only get slices from an IntervalIndex if bounds are " 

596 "non-overlapping and all monotonic increasing or decreasing" 

597 ) 

598 

599 if isinstance(label, (IntervalMixin, IntervalIndex)): 

600 raise NotImplementedError("Interval objects are not currently supported") 

601 

602 # GH 20921: "not is_monotonic_increasing" for the second condition 

603 # instead of "is_monotonic_decreasing" to account for single element 

604 # indexes being both increasing and decreasing 

605 if (side == "left" and self.left.is_monotonic_increasing) or ( 

606 side == "right" and not self.left.is_monotonic_increasing 

607 ): 

608 sub_idx = self.right 

609 if self.open_right: 

610 label = _get_next_label(label) 

611 else: 

612 sub_idx = self.left 

613 if self.open_left: 

614 label = _get_prev_label(label) 

615 

616 return sub_idx._searchsorted_monotonic(label, side) 

617 

618 # -------------------------------------------------------------------- 

619 # Indexing Methods 

620 

621 def get_loc(self, key) -> int | slice | np.ndarray: 

622 """ 

623 Get integer location, slice or boolean mask for requested label. 

624 

625 Parameters 

626 ---------- 

627 key : label 

628 

629 Returns 

630 ------- 

631 int if unique index, slice if monotonic index, else mask 

632 

633 Examples 

634 -------- 

635 >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) 

636 >>> index = pd.IntervalIndex([i1, i2]) 

637 >>> index.get_loc(1) 

638 0 

639 

640 You can also supply a point inside an interval. 

641 

642 >>> index.get_loc(1.5) 

643 1 

644 

645 If a label is in several intervals, you get the locations of all the 

646 relevant intervals. 

647 

648 >>> i3 = pd.Interval(0, 2) 

649 >>> overlapping_index = pd.IntervalIndex([i1, i2, i3]) 

650 >>> overlapping_index.get_loc(0.5) 

651 array([ True, False, True]) 

652 

653 Only exact matches will be returned if an interval is provided. 

654 

655 >>> index.get_loc(pd.Interval(0, 1)) 

656 0 

657 """ 

658 self._check_indexing_error(key) 

659 

660 if isinstance(key, Interval): 

661 if self.closed != key.closed: 

662 raise KeyError(key) 

663 mask = (self.left == key.left) & (self.right == key.right) 

664 elif is_valid_na_for_dtype(key, self.dtype): 

665 mask = self.isna() 

666 else: 

667 # assume scalar 

668 op_left = le if self.closed_left else lt 

669 op_right = le if self.closed_right else lt 

670 try: 

671 mask = op_left(self.left, key) & op_right(key, self.right) 

672 except TypeError as err: 

673 # scalar is not comparable to II subtype --> invalid label 

674 raise KeyError(key) from err 

675 

676 matches = mask.sum() 

677 if matches == 0: 

678 raise KeyError(key) 

679 if matches == 1: 

680 return mask.argmax() 

681 

682 res = lib.maybe_booleans_to_slice(mask.view("u1")) 

683 if isinstance(res, slice) and res.stop is None: 

684 # TODO: DO this in maybe_booleans_to_slice? 

685 res = slice(res.start, len(self), res.step) 

686 return res 

687 

688 def _get_indexer( 

689 self, 

690 target: Index, 

691 method: str | None = None, 

692 limit: int | None = None, 

693 tolerance: Any | None = None, 

694 ) -> npt.NDArray[np.intp]: 

695 if isinstance(target, IntervalIndex): 

696 # We only get here with not self.is_overlapping 

697 # -> at most one match per interval in target 

698 # want exact matches -> need both left/right to match, so defer to 

699 # left/right get_indexer, compare elementwise, equality -> match 

700 indexer = self._get_indexer_unique_sides(target) 

701 

702 elif not is_object_dtype(target.dtype): 

703 # homogeneous scalar index: use IntervalTree 

704 # we should always have self._should_partial_index(target) here 

705 target = self._maybe_convert_i8(target) 

706 indexer = self._engine.get_indexer(target.values) 

707 else: 

708 # heterogeneous scalar index: defer elementwise to get_loc 

709 # we should always have self._should_partial_index(target) here 

710 return self._get_indexer_pointwise(target)[0] 

711 

712 return ensure_platform_int(indexer) 

713 

714 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) 

715 def get_indexer_non_unique( 

716 self, target: Index 

717 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

718 target = ensure_index(target) 

719 

720 if not self._should_compare(target) and not self._should_partial_index(target): 

721 # e.g. IntervalIndex with different closed or incompatible subtype 

722 # -> no matches 

723 return self._get_indexer_non_comparable(target, None, unique=False) 

724 

725 elif isinstance(target, IntervalIndex): 

726 if self.left.is_unique and self.right.is_unique: 

727 # fastpath available even if we don't have self._index_as_unique 

728 indexer = self._get_indexer_unique_sides(target) 

729 missing = (indexer == -1).nonzero()[0] 

730 else: 

731 return self._get_indexer_pointwise(target) 

732 

733 elif is_object_dtype(target.dtype) or not self._should_partial_index(target): 

734 # target might contain intervals: defer elementwise to get_loc 

735 return self._get_indexer_pointwise(target) 

736 

737 else: 

738 # Note: this case behaves differently from other Index subclasses 

739 # because IntervalIndex does partial-int indexing 

740 target = self._maybe_convert_i8(target) 

741 indexer, missing = self._engine.get_indexer_non_unique(target.values) 

742 

743 return ensure_platform_int(indexer), ensure_platform_int(missing) 

744 

745 def _get_indexer_unique_sides(self, target: IntervalIndex) -> npt.NDArray[np.intp]: 

746 """ 

747 _get_indexer specialized to the case where both of our sides are unique. 

748 """ 

749 # Caller is responsible for checking 

750 # `self.left.is_unique and self.right.is_unique` 

751 

752 left_indexer = self.left.get_indexer(target.left) 

753 right_indexer = self.right.get_indexer(target.right) 

754 indexer = np.where(left_indexer == right_indexer, left_indexer, -1) 

755 return indexer 

756 

757 def _get_indexer_pointwise( 

758 self, target: Index 

759 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

760 """ 

761 pointwise implementation for get_indexer and get_indexer_non_unique. 

762 """ 

763 indexer, missing = [], [] 

764 for i, key in enumerate(target): 

765 try: 

766 locs = self.get_loc(key) 

767 if isinstance(locs, slice): 

768 # Only needed for get_indexer_non_unique 

769 locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp") 

770 elif lib.is_integer(locs): 

771 locs = np.array(locs, ndmin=1) 

772 else: 

773 # otherwise we have ndarray[bool] 

774 locs = np.where(locs)[0] 

775 except KeyError: 

776 missing.append(i) 

777 locs = np.array([-1]) 

778 except InvalidIndexError: 

779 # i.e. non-scalar key e.g. a tuple. 

780 # see test_append_different_columns_types_raises 

781 missing.append(i) 

782 locs = np.array([-1]) 

783 

784 indexer.append(locs) 

785 

786 indexer = np.concatenate(indexer) 

787 return ensure_platform_int(indexer), ensure_platform_int(missing) 

788 

789 @cache_readonly 

790 def _index_as_unique(self) -> bool: 

791 return not self.is_overlapping and self._engine._na_count < 2 

792 

793 _requires_unique_msg = ( 

794 "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique" 

795 ) 

796 

797 def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]): 

798 if not (key.step is None or key.step == 1): 

799 # GH#31658 if label-based, we require step == 1, 

800 # if positional, we disallow float start/stop 

801 msg = "label-based slicing with step!=1 is not supported for IntervalIndex" 

802 if kind == "loc": 

803 raise ValueError(msg) 

804 if kind == "getitem": 

805 if not is_valid_positional_slice(key): 

806 # i.e. this cannot be interpreted as a positional slice 

807 raise ValueError(msg) 

808 

809 return super()._convert_slice_indexer(key, kind) 

810 

811 @cache_readonly 

812 def _should_fallback_to_positional(self) -> bool: 

813 # integer lookups in Series.__getitem__ are unambiguously 

814 # positional in this case 

815 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], 

816 # ExtensionDtype]" has no attribute "subtype" 

817 return self.dtype.subtype.kind in "mM" # type: ignore[union-attr] 

818 

819 def _maybe_cast_slice_bound(self, label, side: str): 

820 return getattr(self, side)._maybe_cast_slice_bound(label, side) 

821 

822 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

823 if not isinstance(dtype, IntervalDtype): 

824 return False 

825 common_subtype = find_common_type([self.dtype, dtype]) 

826 return not is_object_dtype(common_subtype) 

827 

828 # -------------------------------------------------------------------- 

829 

830 @cache_readonly 

831 def left(self) -> Index: 

832 return Index(self._data.left, copy=False) 

833 

834 @cache_readonly 

835 def right(self) -> Index: 

836 return Index(self._data.right, copy=False) 

837 

838 @cache_readonly 

839 def mid(self) -> Index: 

840 return Index(self._data.mid, copy=False) 

841 

842 @property 

843 def length(self) -> Index: 

844 return Index(self._data.length, copy=False) 

845 

846 # -------------------------------------------------------------------- 

847 # Set Operations 

848 

849 def _intersection(self, other, sort): 

850 """ 

851 intersection specialized to the case with matching dtypes. 

852 """ 

853 # For IntervalIndex we also know other.closed == self.closed 

854 if self.left.is_unique and self.right.is_unique: 

855 taken = self._intersection_unique(other) 

856 elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1: 

857 # Swap other/self if other is unique and self does not have 

858 # multiple NaNs 

859 taken = other._intersection_unique(self) 

860 else: 

861 # duplicates 

862 taken = self._intersection_non_unique(other) 

863 

864 if sort is None: 

865 taken = taken.sort_values() 

866 

867 return taken 

868 

869 def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex: 

870 """ 

871 Used when the IntervalIndex does not have any common endpoint, 

872 no matter left or right. 

873 Return the intersection with another IntervalIndex. 

874 Parameters 

875 ---------- 

876 other : IntervalIndex 

877 Returns 

878 ------- 

879 IntervalIndex 

880 """ 

881 # Note: this is much more performant than super()._intersection(other) 

882 lindexer = self.left.get_indexer(other.left) 

883 rindexer = self.right.get_indexer(other.right) 

884 

885 match = (lindexer == rindexer) & (lindexer != -1) 

886 indexer = lindexer.take(match.nonzero()[0]) 

887 indexer = unique(indexer) 

888 

889 return self.take(indexer) 

890 

891 def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex: 

892 """ 

893 Used when the IntervalIndex does have some common endpoints, 

894 on either sides. 

895 Return the intersection with another IntervalIndex. 

896 

897 Parameters 

898 ---------- 

899 other : IntervalIndex 

900 

901 Returns 

902 ------- 

903 IntervalIndex 

904 """ 

905 # Note: this is about 3.25x faster than super()._intersection(other) 

906 # in IntervalIndexMethod.time_intersection_both_duplicate(1000) 

907 mask = np.zeros(len(self), dtype=bool) 

908 

909 if self.hasnans and other.hasnans: 

910 first_nan_loc = np.arange(len(self))[self.isna()][0] 

911 mask[first_nan_loc] = True 

912 

913 other_tups = set(zip(other.left, other.right)) 

914 for i, tup in enumerate(zip(self.left, self.right)): 

915 if tup in other_tups: 

916 mask[i] = True 

917 

918 return self[mask] 

919 

920 # -------------------------------------------------------------------- 

921 

922 def _get_engine_target(self) -> np.ndarray: 

923 # Note: we _could_ use libjoin functions by either casting to object 

924 # dtype or constructing tuples (faster than constructing Intervals) 

925 # but the libjoin fastpaths are no longer fast in these cases. 

926 raise NotImplementedError( 

927 "IntervalIndex does not use libjoin fastpaths or pass values to " 

928 "IndexEngine objects" 

929 ) 

930 

931 def _from_join_target(self, result): 

932 raise NotImplementedError("IntervalIndex does not use libjoin fastpaths") 

933 

934 # TODO: arithmetic operations 

935 

936 

937def _is_valid_endpoint(endpoint) -> bool: 

938 """ 

939 Helper for interval_range to check if start/end are valid types. 

940 """ 

941 return any( 

942 [ 

943 is_number(endpoint), 

944 isinstance(endpoint, Timestamp), 

945 isinstance(endpoint, Timedelta), 

946 endpoint is None, 

947 ] 

948 ) 

949 

950 

951def _is_type_compatible(a, b) -> bool: 

952 """ 

953 Helper for interval_range to check type compat of start/end/freq. 

954 """ 

955 is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset)) 

956 is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset)) 

957 return ( 

958 (is_number(a) and is_number(b)) 

959 or (is_ts_compat(a) and is_ts_compat(b)) 

960 or (is_td_compat(a) and is_td_compat(b)) 

961 or com.any_none(a, b) 

962 ) 

963 

964 

965def interval_range( 

966 start=None, 

967 end=None, 

968 periods=None, 

969 freq=None, 

970 name: Hashable | None = None, 

971 closed: IntervalClosedType = "right", 

972) -> IntervalIndex: 

973 """ 

974 Return a fixed frequency IntervalIndex. 

975 

976 Parameters 

977 ---------- 

978 start : numeric or datetime-like, default None 

979 Left bound for generating intervals. 

980 end : numeric or datetime-like, default None 

981 Right bound for generating intervals. 

982 periods : int, default None 

983 Number of periods to generate. 

984 freq : numeric, str, Timedelta, datetime.timedelta, or DateOffset, default None 

985 The length of each interval. Must be consistent with the type of start 

986 and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 

987 for numeric and 'D' for datetime-like. 

988 name : str, default None 

989 Name of the resulting IntervalIndex. 

990 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

991 Whether the intervals are closed on the left-side, right-side, both 

992 or neither. 

993 

994 Returns 

995 ------- 

996 IntervalIndex 

997 

998 See Also 

999 -------- 

1000 IntervalIndex : An Index of intervals that are all closed on the same side. 

1001 

1002 Notes 

1003 ----- 

1004 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, 

1005 exactly three must be specified. If ``freq`` is omitted, the resulting 

1006 ``IntervalIndex`` will have ``periods`` linearly spaced elements between 

1007 ``start`` and ``end``, inclusively. 

1008 

1009 To learn more about datetime-like frequency strings, please see `this link 

1010 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

1011 

1012 Examples 

1013 -------- 

1014 Numeric ``start`` and ``end`` is supported. 

1015 

1016 >>> pd.interval_range(start=0, end=5) 

1017 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], 

1018 dtype='interval[int64, right]') 

1019 

1020 Additionally, datetime-like input is also supported. 

1021 

1022 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), 

1023 ... end=pd.Timestamp('2017-01-04')) 

1024 IntervalIndex([(2017-01-01 00:00:00, 2017-01-02 00:00:00], 

1025 (2017-01-02 00:00:00, 2017-01-03 00:00:00], 

1026 (2017-01-03 00:00:00, 2017-01-04 00:00:00]], 

1027 dtype='interval[datetime64[ns], right]') 

1028 

1029 The ``freq`` parameter specifies the frequency between the left and right. 

1030 endpoints of the individual intervals within the ``IntervalIndex``. For 

1031 numeric ``start`` and ``end``, the frequency must also be numeric. 

1032 

1033 >>> pd.interval_range(start=0, periods=4, freq=1.5) 

1034 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], 

1035 dtype='interval[float64, right]') 

1036 

1037 Similarly, for datetime-like ``start`` and ``end``, the frequency must be 

1038 convertible to a DateOffset. 

1039 

1040 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), 

1041 ... periods=3, freq='MS') 

1042 IntervalIndex([(2017-01-01 00:00:00, 2017-02-01 00:00:00], 

1043 (2017-02-01 00:00:00, 2017-03-01 00:00:00], 

1044 (2017-03-01 00:00:00, 2017-04-01 00:00:00]], 

1045 dtype='interval[datetime64[ns], right]') 

1046 

1047 Specify ``start``, ``end``, and ``periods``; the frequency is generated 

1048 automatically (linearly spaced). 

1049 

1050 >>> pd.interval_range(start=0, end=6, periods=4) 

1051 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], 

1052 dtype='interval[float64, right]') 

1053 

1054 The ``closed`` parameter specifies which endpoints of the individual 

1055 intervals within the ``IntervalIndex`` are closed. 

1056 

1057 >>> pd.interval_range(end=5, periods=4, closed='both') 

1058 IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], 

1059 dtype='interval[int64, both]') 

1060 """ 

1061 start = maybe_box_datetimelike(start) 

1062 end = maybe_box_datetimelike(end) 

1063 endpoint = start if start is not None else end 

1064 

1065 if freq is None and com.any_none(periods, start, end): 

1066 freq = 1 if is_number(endpoint) else "D" 

1067 

1068 if com.count_not_none(start, end, periods, freq) != 3: 

1069 raise ValueError( 

1070 "Of the four parameters: start, end, periods, and " 

1071 "freq, exactly three must be specified" 

1072 ) 

1073 

1074 if not _is_valid_endpoint(start): 

1075 raise ValueError(f"start must be numeric or datetime-like, got {start}") 

1076 if not _is_valid_endpoint(end): 

1077 raise ValueError(f"end must be numeric or datetime-like, got {end}") 

1078 

1079 periods = validate_periods(periods) 

1080 

1081 if freq is not None and not is_number(freq): 

1082 try: 

1083 freq = to_offset(freq) 

1084 except ValueError as err: 

1085 raise ValueError( 

1086 f"freq must be numeric or convertible to DateOffset, got {freq}" 

1087 ) from err 

1088 

1089 # verify type compatibility 

1090 if not all( 

1091 [ 

1092 _is_type_compatible(start, end), 

1093 _is_type_compatible(start, freq), 

1094 _is_type_compatible(end, freq), 

1095 ] 

1096 ): 

1097 raise TypeError("start, end, freq need to be type compatible") 

1098 

1099 # +1 to convert interval count to breaks count (n breaks = n-1 intervals) 

1100 if periods is not None: 

1101 periods += 1 

1102 

1103 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex 

1104 

1105 if is_number(endpoint): 

1106 if com.all_not_none(start, end, freq): 

1107 # 0.1 ensures we capture end 

1108 breaks = np.arange(start, end + (freq * 0.1), freq) 

1109 else: 

1110 # compute the period/start/end if unspecified (at most one) 

1111 if periods is None: 

1112 periods = int((end - start) // freq) + 1 

1113 elif start is None: 

1114 start = end - (periods - 1) * freq 

1115 elif end is None: 

1116 end = start + (periods - 1) * freq 

1117 

1118 breaks = np.linspace(start, end, periods) 

1119 if all(is_integer(x) for x in com.not_none(start, end, freq)): 

1120 # np.linspace always produces float output 

1121 

1122 # error: Argument 1 to "maybe_downcast_numeric" has incompatible type 

1123 # "Union[ndarray[Any, Any], TimedeltaIndex, DatetimeIndex]"; 

1124 # expected "ndarray[Any, Any]" [ 

1125 breaks = maybe_downcast_numeric( 

1126 breaks, # type: ignore[arg-type] 

1127 np.dtype("int64"), 

1128 ) 

1129 else: 

1130 # delegate to the appropriate range function 

1131 if isinstance(endpoint, Timestamp): 

1132 breaks = date_range(start=start, end=end, periods=periods, freq=freq) 

1133 else: 

1134 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) 

1135 

1136 return IntervalIndex.from_breaks(breaks, name=name, closed=closed)