Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/interval.py: 29%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

376 statements  

1""" define the IntervalIndex """ 

2from __future__ import annotations 

3 

4from operator import ( 

5 le, 

6 lt, 

7) 

8import textwrap 

9from typing import ( 

10 Any, 

11 Hashable, 

12 Literal, 

13) 

14 

15import numpy as np 

16 

17from pandas._libs import lib 

18from pandas._libs.interval import ( 

19 Interval, 

20 IntervalMixin, 

21 IntervalTree, 

22) 

23from pandas._libs.tslibs import ( 

24 BaseOffset, 

25 Timedelta, 

26 Timestamp, 

27 to_offset, 

28) 

29from pandas._typing import ( 

30 Dtype, 

31 DtypeObj, 

32 IntervalClosedType, 

33 npt, 

34) 

35from pandas.errors import InvalidIndexError 

36from pandas.util._decorators import ( 

37 Appender, 

38 cache_readonly, 

39) 

40from pandas.util._exceptions import rewrite_exception 

41 

42from pandas.core.dtypes.cast import ( 

43 find_common_type, 

44 infer_dtype_from_scalar, 

45 maybe_box_datetimelike, 

46 maybe_downcast_numeric, 

47 maybe_upcast_numeric_to_64bit, 

48) 

49from pandas.core.dtypes.common import ( 

50 ensure_platform_int, 

51 is_datetime64tz_dtype, 

52 is_datetime_or_timedelta_dtype, 

53 is_dtype_equal, 

54 is_float, 

55 is_float_dtype, 

56 is_integer, 

57 is_integer_dtype, 

58 is_interval_dtype, 

59 is_list_like, 

60 is_number, 

61 is_object_dtype, 

62 is_scalar, 

63) 

64from pandas.core.dtypes.dtypes import IntervalDtype 

65from pandas.core.dtypes.missing import is_valid_na_for_dtype 

66 

67from pandas.core.algorithms import unique 

68from pandas.core.arrays.interval import ( 

69 IntervalArray, 

70 _interval_shared_docs, 

71) 

72import pandas.core.common as com 

73from pandas.core.indexers import is_valid_positional_slice 

74import pandas.core.indexes.base as ibase 

75from pandas.core.indexes.base import ( 

76 Index, 

77 _index_shared_docs, 

78 ensure_index, 

79 maybe_extract_name, 

80) 

81from pandas.core.indexes.datetimes import ( 

82 DatetimeIndex, 

83 date_range, 

84) 

85from pandas.core.indexes.extension import ( 

86 ExtensionIndex, 

87 inherit_names, 

88) 

89from pandas.core.indexes.multi import MultiIndex 

90from pandas.core.indexes.timedeltas import ( 

91 TimedeltaIndex, 

92 timedelta_range, 

93) 

94 

95_index_doc_kwargs = dict(ibase._index_doc_kwargs) 

96 

97_index_doc_kwargs.update( 

98 { 

99 "klass": "IntervalIndex", 

100 "qualname": "IntervalIndex", 

101 "target_klass": "IntervalIndex or list of Intervals", 

102 "name": textwrap.dedent( 

103 """\ 

104 name : object, optional 

105 Name to be stored in the index. 

106 """ 

107 ), 

108 } 

109) 

110 

111 

112def _get_next_label(label): 

113 dtype = getattr(label, "dtype", type(label)) 

114 if isinstance(label, (Timestamp, Timedelta)): 

115 dtype = "datetime64" 

116 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): 

117 return label + np.timedelta64(1, "ns") 

118 elif is_integer_dtype(dtype): 

119 return label + 1 

120 elif is_float_dtype(dtype): 

121 return np.nextafter(label, np.infty) 

122 else: 

123 raise TypeError(f"cannot determine next label for type {repr(type(label))}") 

124 

125 

126def _get_prev_label(label): 

127 dtype = getattr(label, "dtype", type(label)) 

128 if isinstance(label, (Timestamp, Timedelta)): 

129 dtype = "datetime64" 

130 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): 

131 return label - np.timedelta64(1, "ns") 

132 elif is_integer_dtype(dtype): 

133 return label - 1 

134 elif is_float_dtype(dtype): 

135 return np.nextafter(label, -np.infty) 

136 else: 

137 raise TypeError(f"cannot determine next label for type {repr(type(label))}") 

138 

139 

140def _new_IntervalIndex(cls, d): 

141 """ 

142 This is called upon unpickling, rather than the default which doesn't have 

143 arguments and breaks __new__. 

144 """ 

145 return cls.from_arrays(**d) 

146 

147 

148@Appender( 

149 _interval_shared_docs["class"] 

150 % { 

151 "klass": "IntervalIndex", 

152 "summary": "Immutable index of intervals that are closed on the same side.", 

153 "name": _index_doc_kwargs["name"], 

154 "versionadded": "0.20.0", 

155 "extra_attributes": "is_overlapping\nvalues\n", 

156 "extra_methods": "", 

157 "examples": textwrap.dedent( 

158 """\ 

159 Examples 

160 -------- 

161 A new ``IntervalIndex`` is typically constructed using 

162 :func:`interval_range`: 

163 

164 >>> pd.interval_range(start=0, end=5) 

165 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], 

166 dtype='interval[int64, right]') 

167 

168 It may also be constructed using one of the constructor 

169 methods: :meth:`IntervalIndex.from_arrays`, 

170 :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. 

171 

172 See further examples in the doc strings of ``interval_range`` and the 

173 mentioned constructor methods. 

174 """ 

175 ), 

176 } 

177) 

178@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True) 

179@inherit_names( 

180 [ 

181 "__array__", 

182 "overlaps", 

183 "contains", 

184 "closed_left", 

185 "closed_right", 

186 "open_left", 

187 "open_right", 

188 "is_empty", 

189 ], 

190 IntervalArray, 

191) 

192@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True) 

193class IntervalIndex(ExtensionIndex): 

194 _typ = "intervalindex" 

195 

196 # annotate properties pinned via inherit_names 

197 closed: IntervalClosedType 

198 is_non_overlapping_monotonic: bool 

199 closed_left: bool 

200 closed_right: bool 

201 open_left: bool 

202 open_right: bool 

203 

204 _data: IntervalArray 

205 _values: IntervalArray 

206 _can_hold_strings = False 

207 _data_cls = IntervalArray 

208 

209 # -------------------------------------------------------------------- 

210 # Constructors 

211 

212 def __new__( 

213 cls, 

214 data, 

215 closed=None, 

216 dtype: Dtype | None = None, 

217 copy: bool = False, 

218 name: Hashable = None, 

219 verify_integrity: bool = True, 

220 ) -> IntervalIndex: 

221 name = maybe_extract_name(name, data, cls) 

222 

223 with rewrite_exception("IntervalArray", cls.__name__): 

224 array = IntervalArray( 

225 data, 

226 closed=closed, 

227 copy=copy, 

228 dtype=dtype, 

229 verify_integrity=verify_integrity, 

230 ) 

231 

232 return cls._simple_new(array, name) 

233 

234 @classmethod 

235 @Appender( 

236 _interval_shared_docs["from_breaks"] 

237 % { 

238 "klass": "IntervalIndex", 

239 "name": textwrap.dedent( 

240 """ 

241 name : str, optional 

242 Name of the resulting IntervalIndex.""" 

243 ), 

244 "examples": textwrap.dedent( 

245 """\ 

246 Examples 

247 -------- 

248 >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) 

249 IntervalIndex([(0, 1], (1, 2], (2, 3]], 

250 dtype='interval[int64, right]') 

251 """ 

252 ), 

253 } 

254 ) 

255 def from_breaks( 

256 cls, 

257 breaks, 

258 closed: IntervalClosedType | None = "right", 

259 name: Hashable = None, 

260 copy: bool = False, 

261 dtype: Dtype | None = None, 

262 ) -> IntervalIndex: 

263 with rewrite_exception("IntervalArray", cls.__name__): 

264 array = IntervalArray.from_breaks( 

265 breaks, closed=closed, copy=copy, dtype=dtype 

266 ) 

267 return cls._simple_new(array, name=name) 

268 

269 @classmethod 

270 @Appender( 

271 _interval_shared_docs["from_arrays"] 

272 % { 

273 "klass": "IntervalIndex", 

274 "name": textwrap.dedent( 

275 """ 

276 name : str, optional 

277 Name of the resulting IntervalIndex.""" 

278 ), 

279 "examples": textwrap.dedent( 

280 """\ 

281 Examples 

282 -------- 

283 >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) 

284 IntervalIndex([(0, 1], (1, 2], (2, 3]], 

285 dtype='interval[int64, right]') 

286 """ 

287 ), 

288 } 

289 ) 

290 def from_arrays( 

291 cls, 

292 left, 

293 right, 

294 closed: IntervalClosedType = "right", 

295 name: Hashable = None, 

296 copy: bool = False, 

297 dtype: Dtype | None = None, 

298 ) -> IntervalIndex: 

299 with rewrite_exception("IntervalArray", cls.__name__): 

300 array = IntervalArray.from_arrays( 

301 left, right, closed, copy=copy, dtype=dtype 

302 ) 

303 return cls._simple_new(array, name=name) 

304 

305 @classmethod 

306 @Appender( 

307 _interval_shared_docs["from_tuples"] 

308 % { 

309 "klass": "IntervalIndex", 

310 "name": textwrap.dedent( 

311 """ 

312 name : str, optional 

313 Name of the resulting IntervalIndex.""" 

314 ), 

315 "examples": textwrap.dedent( 

316 """\ 

317 Examples 

318 -------- 

319 >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) 

320 IntervalIndex([(0, 1], (1, 2]], 

321 dtype='interval[int64, right]') 

322 """ 

323 ), 

324 } 

325 ) 

326 def from_tuples( 

327 cls, 

328 data, 

329 closed: IntervalClosedType = "right", 

330 name: Hashable = None, 

331 copy: bool = False, 

332 dtype: Dtype | None = None, 

333 ) -> IntervalIndex: 

334 with rewrite_exception("IntervalArray", cls.__name__): 

335 arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) 

336 return cls._simple_new(arr, name=name) 

337 

338 # -------------------------------------------------------------------- 

339 # error: Return type "IntervalTree" of "_engine" incompatible with return type 

340 # "Union[IndexEngine, ExtensionEngine]" in supertype "Index" 

341 @cache_readonly 

342 def _engine(self) -> IntervalTree: # type: ignore[override] 

343 # IntervalTree does not supports numpy array unless they are 64 bit 

344 left = self._maybe_convert_i8(self.left) 

345 left = maybe_upcast_numeric_to_64bit(left) 

346 right = self._maybe_convert_i8(self.right) 

347 right = maybe_upcast_numeric_to_64bit(right) 

348 return IntervalTree(left, right, closed=self.closed) 

349 

350 def __contains__(self, key: Any) -> bool: 

351 """ 

352 return a boolean if this key is IN the index 

353 We *only* accept an Interval 

354 

355 Parameters 

356 ---------- 

357 key : Interval 

358 

359 Returns 

360 ------- 

361 bool 

362 """ 

363 hash(key) 

364 if not isinstance(key, Interval): 

365 if is_valid_na_for_dtype(key, self.dtype): 

366 return self.hasnans 

367 return False 

368 

369 try: 

370 self.get_loc(key) 

371 return True 

372 except KeyError: 

373 return False 

374 

375 @cache_readonly 

376 def _multiindex(self) -> MultiIndex: 

377 return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"]) 

378 

379 def __reduce__(self): 

380 d = { 

381 "left": self.left, 

382 "right": self.right, 

383 "closed": self.closed, 

384 "name": self.name, 

385 } 

386 return _new_IntervalIndex, (type(self), d), None 

387 

388 @property 

389 def inferred_type(self) -> str: 

390 """Return a string of the type inferred from the values""" 

391 return "interval" 

392 

393 # Cannot determine type of "memory_usage" 

394 @Appender(Index.memory_usage.__doc__) # type: ignore[has-type] 

395 def memory_usage(self, deep: bool = False) -> int: 

396 # we don't use an explicit engine 

397 # so return the bytes here 

398 return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) 

399 

400 # IntervalTree doesn't have a is_monotonic_decreasing, so have to override 

401 # the Index implementation 

402 @cache_readonly 

403 def is_monotonic_decreasing(self) -> bool: 

404 """ 

405 Return True if the IntervalIndex is monotonic decreasing (only equal or 

406 decreasing values), else False 

407 """ 

408 return self[::-1].is_monotonic_increasing 

409 

410 @cache_readonly 

411 def is_unique(self) -> bool: 

412 """ 

413 Return True if the IntervalIndex contains unique elements, else False. 

414 """ 

415 left = self.left 

416 right = self.right 

417 

418 if self.isna().sum() > 1: 

419 return False 

420 

421 if left.is_unique or right.is_unique: 

422 return True 

423 

424 seen_pairs = set() 

425 check_idx = np.where(left.duplicated(keep=False))[0] 

426 for idx in check_idx: 

427 pair = (left[idx], right[idx]) 

428 if pair in seen_pairs: 

429 return False 

430 seen_pairs.add(pair) 

431 

432 return True 

433 

434 @property 

435 def is_overlapping(self) -> bool: 

436 """ 

437 Return True if the IntervalIndex has overlapping intervals, else False. 

438 

439 Two intervals overlap if they share a common point, including closed 

440 endpoints. Intervals that only have an open endpoint in common do not 

441 overlap. 

442 

443 Returns 

444 ------- 

445 bool 

446 Boolean indicating if the IntervalIndex has overlapping intervals. 

447 

448 See Also 

449 -------- 

450 Interval.overlaps : Check whether two Interval objects overlap. 

451 IntervalIndex.overlaps : Check an IntervalIndex elementwise for 

452 overlaps. 

453 

454 Examples 

455 -------- 

456 >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)]) 

457 >>> index 

458 IntervalIndex([(0, 2], (1, 3], (4, 5]], 

459 dtype='interval[int64, right]') 

460 >>> index.is_overlapping 

461 True 

462 

463 Intervals that share closed endpoints overlap: 

464 

465 >>> index = pd.interval_range(0, 3, closed='both') 

466 >>> index 

467 IntervalIndex([[0, 1], [1, 2], [2, 3]], 

468 dtype='interval[int64, both]') 

469 >>> index.is_overlapping 

470 True 

471 

472 Intervals that only have an open endpoint in common do not overlap: 

473 

474 >>> index = pd.interval_range(0, 3, closed='left') 

475 >>> index 

476 IntervalIndex([[0, 1), [1, 2), [2, 3)], 

477 dtype='interval[int64, left]') 

478 >>> index.is_overlapping 

479 False 

480 """ 

481 # GH 23309 

482 return self._engine.is_overlapping 

483 

484 def _needs_i8_conversion(self, key) -> bool: 

485 """ 

486 Check if a given key needs i8 conversion. Conversion is necessary for 

487 Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An 

488 Interval-like requires conversion if its endpoints are one of the 

489 aforementioned types. 

490 

491 Assumes that any list-like data has already been cast to an Index. 

492 

493 Parameters 

494 ---------- 

495 key : scalar or Index-like 

496 The key that should be checked for i8 conversion 

497 

498 Returns 

499 ------- 

500 bool 

501 """ 

502 if is_interval_dtype(key) or isinstance(key, Interval): 

503 return self._needs_i8_conversion(key.left) 

504 

505 i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex) 

506 return isinstance(key, i8_types) 

507 

508 def _maybe_convert_i8(self, key): 

509 """ 

510 Maybe convert a given key to its equivalent i8 value(s). Used as a 

511 preprocessing step prior to IntervalTree queries (self._engine), which 

512 expects numeric data. 

513 

514 Parameters 

515 ---------- 

516 key : scalar or list-like 

517 The key that should maybe be converted to i8. 

518 

519 Returns 

520 ------- 

521 scalar or list-like 

522 The original key if no conversion occurred, int if converted scalar, 

523 Index with an int64 dtype if converted list-like. 

524 """ 

525 if is_list_like(key): 

526 key = ensure_index(key) 

527 key = maybe_upcast_numeric_to_64bit(key) 

528 

529 if not self._needs_i8_conversion(key): 

530 return key 

531 

532 scalar = is_scalar(key) 

533 if is_interval_dtype(key) or isinstance(key, Interval): 

534 # convert left/right and reconstruct 

535 left = self._maybe_convert_i8(key.left) 

536 right = self._maybe_convert_i8(key.right) 

537 constructor = Interval if scalar else IntervalIndex.from_arrays 

538 # error: "object" not callable 

539 return constructor( 

540 left, right, closed=self.closed 

541 ) # type: ignore[operator] 

542 

543 if scalar: 

544 # Timestamp/Timedelta 

545 key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) 

546 if lib.is_period(key): 

547 key_i8 = key.ordinal 

548 elif isinstance(key_i8, Timestamp): 

549 key_i8 = key_i8._value 

550 elif isinstance(key_i8, (np.datetime64, np.timedelta64)): 

551 key_i8 = key_i8.view("i8") 

552 else: 

553 # DatetimeIndex/TimedeltaIndex 

554 key_dtype, key_i8 = key.dtype, Index(key.asi8) 

555 if key.hasnans: 

556 # convert NaT from its i8 value to np.nan so it's not viewed 

557 # as a valid value, maybe causing errors (e.g. is_overlapping) 

558 key_i8 = key_i8.where(~key._isnan) 

559 

560 # ensure consistency with IntervalIndex subtype 

561 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], 

562 # ExtensionDtype]" has no attribute "subtype" 

563 subtype = self.dtype.subtype # type: ignore[union-attr] 

564 

565 if not is_dtype_equal(subtype, key_dtype): 

566 raise ValueError( 

567 f"Cannot index an IntervalIndex of subtype {subtype} with " 

568 f"values of dtype {key_dtype}" 

569 ) 

570 

571 return key_i8 

572 

573 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): 

574 if not self.is_non_overlapping_monotonic: 

575 raise KeyError( 

576 "can only get slices from an IntervalIndex if bounds are " 

577 "non-overlapping and all monotonic increasing or decreasing" 

578 ) 

579 

580 if isinstance(label, (IntervalMixin, IntervalIndex)): 

581 raise NotImplementedError("Interval objects are not currently supported") 

582 

583 # GH 20921: "not is_monotonic_increasing" for the second condition 

584 # instead of "is_monotonic_decreasing" to account for single element 

585 # indexes being both increasing and decreasing 

586 if (side == "left" and self.left.is_monotonic_increasing) or ( 

587 side == "right" and not self.left.is_monotonic_increasing 

588 ): 

589 sub_idx = self.right 

590 if self.open_right: 

591 label = _get_next_label(label) 

592 else: 

593 sub_idx = self.left 

594 if self.open_left: 

595 label = _get_prev_label(label) 

596 

597 return sub_idx._searchsorted_monotonic(label, side) 

598 

599 # -------------------------------------------------------------------- 

600 # Indexing Methods 

601 

602 def get_loc(self, key) -> int | slice | np.ndarray: 

603 """ 

604 Get integer location, slice or boolean mask for requested label. 

605 

606 Parameters 

607 ---------- 

608 key : label 

609 

610 Returns 

611 ------- 

612 int if unique index, slice if monotonic index, else mask 

613 

614 Examples 

615 -------- 

616 >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) 

617 >>> index = pd.IntervalIndex([i1, i2]) 

618 >>> index.get_loc(1) 

619 0 

620 

621 You can also supply a point inside an interval. 

622 

623 >>> index.get_loc(1.5) 

624 1 

625 

626 If a label is in several intervals, you get the locations of all the 

627 relevant intervals. 

628 

629 >>> i3 = pd.Interval(0, 2) 

630 >>> overlapping_index = pd.IntervalIndex([i1, i2, i3]) 

631 >>> overlapping_index.get_loc(0.5) 

632 array([ True, False, True]) 

633 

634 Only exact matches will be returned if an interval is provided. 

635 

636 >>> index.get_loc(pd.Interval(0, 1)) 

637 0 

638 """ 

639 self._check_indexing_error(key) 

640 

641 if isinstance(key, Interval): 

642 if self.closed != key.closed: 

643 raise KeyError(key) 

644 mask = (self.left == key.left) & (self.right == key.right) 

645 elif is_valid_na_for_dtype(key, self.dtype): 

646 mask = self.isna() 

647 else: 

648 # assume scalar 

649 op_left = le if self.closed_left else lt 

650 op_right = le if self.closed_right else lt 

651 try: 

652 mask = op_left(self.left, key) & op_right(key, self.right) 

653 except TypeError as err: 

654 # scalar is not comparable to II subtype --> invalid label 

655 raise KeyError(key) from err 

656 

657 matches = mask.sum() 

658 if matches == 0: 

659 raise KeyError(key) 

660 if matches == 1: 

661 return mask.argmax() 

662 

663 res = lib.maybe_booleans_to_slice(mask.view("u1")) 

664 if isinstance(res, slice) and res.stop is None: 

665 # TODO: DO this in maybe_booleans_to_slice? 

666 res = slice(res.start, len(self), res.step) 

667 return res 

668 

669 def _get_indexer( 

670 self, 

671 target: Index, 

672 method: str | None = None, 

673 limit: int | None = None, 

674 tolerance: Any | None = None, 

675 ) -> npt.NDArray[np.intp]: 

676 if isinstance(target, IntervalIndex): 

677 # We only get here with not self.is_overlapping 

678 # -> at most one match per interval in target 

679 # want exact matches -> need both left/right to match, so defer to 

680 # left/right get_indexer, compare elementwise, equality -> match 

681 indexer = self._get_indexer_unique_sides(target) 

682 

683 elif not is_object_dtype(target.dtype): 

684 # homogeneous scalar index: use IntervalTree 

685 # we should always have self._should_partial_index(target) here 

686 target = self._maybe_convert_i8(target) 

687 indexer = self._engine.get_indexer(target.values) 

688 else: 

689 # heterogeneous scalar index: defer elementwise to get_loc 

690 # we should always have self._should_partial_index(target) here 

691 return self._get_indexer_pointwise(target)[0] 

692 

693 return ensure_platform_int(indexer) 

694 

695 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) 

696 def get_indexer_non_unique( 

697 self, target: Index 

698 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

699 target = ensure_index(target) 

700 

701 if not self._should_compare(target) and not self._should_partial_index(target): 

702 # e.g. IntervalIndex with different closed or incompatible subtype 

703 # -> no matches 

704 return self._get_indexer_non_comparable(target, None, unique=False) 

705 

706 elif isinstance(target, IntervalIndex): 

707 if self.left.is_unique and self.right.is_unique: 

708 # fastpath available even if we don't have self._index_as_unique 

709 indexer = self._get_indexer_unique_sides(target) 

710 missing = (indexer == -1).nonzero()[0] 

711 else: 

712 return self._get_indexer_pointwise(target) 

713 

714 elif is_object_dtype(target.dtype) or not self._should_partial_index(target): 

715 # target might contain intervals: defer elementwise to get_loc 

716 return self._get_indexer_pointwise(target) 

717 

718 else: 

719 # Note: this case behaves differently from other Index subclasses 

720 # because IntervalIndex does partial-int indexing 

721 target = self._maybe_convert_i8(target) 

722 indexer, missing = self._engine.get_indexer_non_unique(target.values) 

723 

724 return ensure_platform_int(indexer), ensure_platform_int(missing) 

725 

726 def _get_indexer_unique_sides(self, target: IntervalIndex) -> npt.NDArray[np.intp]: 

727 """ 

728 _get_indexer specialized to the case where both of our sides are unique. 

729 """ 

730 # Caller is responsible for checking 

731 # `self.left.is_unique and self.right.is_unique` 

732 

733 left_indexer = self.left.get_indexer(target.left) 

734 right_indexer = self.right.get_indexer(target.right) 

735 indexer = np.where(left_indexer == right_indexer, left_indexer, -1) 

736 return indexer 

737 

738 def _get_indexer_pointwise( 

739 self, target: Index 

740 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

741 """ 

742 pointwise implementation for get_indexer and get_indexer_non_unique. 

743 """ 

744 indexer, missing = [], [] 

745 for i, key in enumerate(target): 

746 try: 

747 locs = self.get_loc(key) 

748 if isinstance(locs, slice): 

749 # Only needed for get_indexer_non_unique 

750 locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp") 

751 elif lib.is_integer(locs): 

752 locs = np.array(locs, ndmin=1) 

753 else: 

754 # otherwise we have ndarray[bool] 

755 locs = np.where(locs)[0] 

756 except KeyError: 

757 missing.append(i) 

758 locs = np.array([-1]) 

759 except InvalidIndexError: 

760 # i.e. non-scalar key e.g. a tuple. 

761 # see test_append_different_columns_types_raises 

762 missing.append(i) 

763 locs = np.array([-1]) 

764 

765 indexer.append(locs) 

766 

767 indexer = np.concatenate(indexer) 

768 return ensure_platform_int(indexer), ensure_platform_int(missing) 

769 

770 @cache_readonly 

771 def _index_as_unique(self) -> bool: 

772 return not self.is_overlapping and self._engine._na_count < 2 

773 

774 _requires_unique_msg = ( 

775 "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique" 

776 ) 

777 

778 def _convert_slice_indexer(self, key: slice, kind: str): 

779 if not (key.step is None or key.step == 1): 

780 # GH#31658 if label-based, we require step == 1, 

781 # if positional, we disallow float start/stop 

782 msg = "label-based slicing with step!=1 is not supported for IntervalIndex" 

783 if kind == "loc": 

784 raise ValueError(msg) 

785 if kind == "getitem": 

786 if not is_valid_positional_slice(key): 

787 # i.e. this cannot be interpreted as a positional slice 

788 raise ValueError(msg) 

789 

790 return super()._convert_slice_indexer(key, kind) 

791 

792 @cache_readonly 

793 def _should_fallback_to_positional(self) -> bool: 

794 # integer lookups in Series.__getitem__ are unambiguously 

795 # positional in this case 

796 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], 

797 # ExtensionDtype]" has no attribute "subtype" 

798 return self.dtype.subtype.kind in ["m", "M"] # type: ignore[union-attr] 

799 

800 def _maybe_cast_slice_bound(self, label, side: str): 

801 return getattr(self, side)._maybe_cast_slice_bound(label, side) 

802 

803 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

804 if not isinstance(dtype, IntervalDtype): 

805 return False 

806 common_subtype = find_common_type([self.dtype, dtype]) 

807 return not is_object_dtype(common_subtype) 

808 

809 # -------------------------------------------------------------------- 

810 

811 @cache_readonly 

812 def left(self) -> Index: 

813 return Index(self._data.left, copy=False) 

814 

815 @cache_readonly 

816 def right(self) -> Index: 

817 return Index(self._data.right, copy=False) 

818 

819 @cache_readonly 

820 def mid(self) -> Index: 

821 return Index(self._data.mid, copy=False) 

822 

823 @property 

824 def length(self) -> Index: 

825 return Index(self._data.length, copy=False) 

826 

827 # -------------------------------------------------------------------- 

828 # Rendering Methods 

829 # __repr__ associated methods are based on MultiIndex 

830 

831 def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: 

832 # matches base class except for whitespace padding 

833 return header + list(self._format_native_types(na_rep=na_rep)) 

834 

835 def _format_native_types( 

836 self, *, na_rep: str = "NaN", quoting=None, **kwargs 

837 ) -> npt.NDArray[np.object_]: 

838 # GH 28210: use base method but with different default na_rep 

839 return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs) 

840 

841 def _format_data(self, name=None) -> str: 

842 # TODO: integrate with categorical and make generic 

843 # name argument is unused here; just for compat with base / categorical 

844 return f"{self._data._format_data()},{self._format_space()}" 

845 

846 # -------------------------------------------------------------------- 

847 # Set Operations 

848 

849 def _intersection(self, other, sort): 

850 """ 

851 intersection specialized to the case with matching dtypes. 

852 """ 

853 # For IntervalIndex we also know other.closed == self.closed 

854 if self.left.is_unique and self.right.is_unique: 

855 taken = self._intersection_unique(other) 

856 elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1: 

857 # Swap other/self if other is unique and self does not have 

858 # multiple NaNs 

859 taken = other._intersection_unique(self) 

860 else: 

861 # duplicates 

862 taken = self._intersection_non_unique(other) 

863 

864 if sort is None: 

865 taken = taken.sort_values() 

866 

867 return taken 

868 

869 def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex: 

870 """ 

871 Used when the IntervalIndex does not have any common endpoint, 

872 no matter left or right. 

873 Return the intersection with another IntervalIndex. 

874 Parameters 

875 ---------- 

876 other : IntervalIndex 

877 Returns 

878 ------- 

879 IntervalIndex 

880 """ 

881 # Note: this is much more performant than super()._intersection(other) 

882 lindexer = self.left.get_indexer(other.left) 

883 rindexer = self.right.get_indexer(other.right) 

884 

885 match = (lindexer == rindexer) & (lindexer != -1) 

886 indexer = lindexer.take(match.nonzero()[0]) 

887 indexer = unique(indexer) 

888 

889 return self.take(indexer) 

890 

891 def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex: 

892 """ 

893 Used when the IntervalIndex does have some common endpoints, 

894 on either sides. 

895 Return the intersection with another IntervalIndex. 

896 

897 Parameters 

898 ---------- 

899 other : IntervalIndex 

900 

901 Returns 

902 ------- 

903 IntervalIndex 

904 """ 

905 # Note: this is about 3.25x faster than super()._intersection(other) 

906 # in IntervalIndexMethod.time_intersection_both_duplicate(1000) 

907 mask = np.zeros(len(self), dtype=bool) 

908 

909 if self.hasnans and other.hasnans: 

910 first_nan_loc = np.arange(len(self))[self.isna()][0] 

911 mask[first_nan_loc] = True 

912 

913 other_tups = set(zip(other.left, other.right)) 

914 for i, tup in enumerate(zip(self.left, self.right)): 

915 if tup in other_tups: 

916 mask[i] = True 

917 

918 return self[mask] 

919 

920 # -------------------------------------------------------------------- 

921 

922 def _get_engine_target(self) -> np.ndarray: 

923 # Note: we _could_ use libjoin functions by either casting to object 

924 # dtype or constructing tuples (faster than constructing Intervals) 

925 # but the libjoin fastpaths are no longer fast in these cases. 

926 raise NotImplementedError( 

927 "IntervalIndex does not use libjoin fastpaths or pass values to " 

928 "IndexEngine objects" 

929 ) 

930 

931 def _from_join_target(self, result): 

932 raise NotImplementedError("IntervalIndex does not use libjoin fastpaths") 

933 

934 # TODO: arithmetic operations 

935 

936 

937def _is_valid_endpoint(endpoint) -> bool: 

938 """ 

939 Helper for interval_range to check if start/end are valid types. 

940 """ 

941 return any( 

942 [ 

943 is_number(endpoint), 

944 isinstance(endpoint, Timestamp), 

945 isinstance(endpoint, Timedelta), 

946 endpoint is None, 

947 ] 

948 ) 

949 

950 

951def _is_type_compatible(a, b) -> bool: 

952 """ 

953 Helper for interval_range to check type compat of start/end/freq. 

954 """ 

955 is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset)) 

956 is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset)) 

957 return ( 

958 (is_number(a) and is_number(b)) 

959 or (is_ts_compat(a) and is_ts_compat(b)) 

960 or (is_td_compat(a) and is_td_compat(b)) 

961 or com.any_none(a, b) 

962 ) 

963 

964 

965def interval_range( 

966 start=None, 

967 end=None, 

968 periods=None, 

969 freq=None, 

970 name: Hashable = None, 

971 closed: IntervalClosedType = "right", 

972) -> IntervalIndex: 

973 """ 

974 Return a fixed frequency IntervalIndex. 

975 

976 Parameters 

977 ---------- 

978 start : numeric or datetime-like, default None 

979 Left bound for generating intervals. 

980 end : numeric or datetime-like, default None 

981 Right bound for generating intervals. 

982 periods : int, default None 

983 Number of periods to generate. 

984 freq : numeric, str, datetime.timedelta, or DateOffset, default None 

985 The length of each interval. Must be consistent with the type of start 

986 and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 

987 for numeric and 'D' for datetime-like. 

988 name : str, default None 

989 Name of the resulting IntervalIndex. 

990 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

991 Whether the intervals are closed on the left-side, right-side, both 

992 or neither. 

993 

994 Returns 

995 ------- 

996 IntervalIndex 

997 

998 See Also 

999 -------- 

1000 IntervalIndex : An Index of intervals that are all closed on the same side. 

1001 

1002 Notes 

1003 ----- 

1004 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, 

1005 exactly three must be specified. If ``freq`` is omitted, the resulting 

1006 ``IntervalIndex`` will have ``periods`` linearly spaced elements between 

1007 ``start`` and ``end``, inclusively. 

1008 

1009 To learn more about datetime-like frequency strings, please see `this link 

1010 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

1011 

1012 Examples 

1013 -------- 

1014 Numeric ``start`` and ``end`` is supported. 

1015 

1016 >>> pd.interval_range(start=0, end=5) 

1017 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], 

1018 dtype='interval[int64, right]') 

1019 

1020 Additionally, datetime-like input is also supported. 

1021 

1022 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), 

1023 ... end=pd.Timestamp('2017-01-04')) 

1024 IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], 

1025 (2017-01-03, 2017-01-04]], 

1026 dtype='interval[datetime64[ns], right]') 

1027 

1028 The ``freq`` parameter specifies the frequency between the left and right. 

1029 endpoints of the individual intervals within the ``IntervalIndex``. For 

1030 numeric ``start`` and ``end``, the frequency must also be numeric. 

1031 

1032 >>> pd.interval_range(start=0, periods=4, freq=1.5) 

1033 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], 

1034 dtype='interval[float64, right]') 

1035 

1036 Similarly, for datetime-like ``start`` and ``end``, the frequency must be 

1037 convertible to a DateOffset. 

1038 

1039 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), 

1040 ... periods=3, freq='MS') 

1041 IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], 

1042 (2017-03-01, 2017-04-01]], 

1043 dtype='interval[datetime64[ns], right]') 

1044 

1045 Specify ``start``, ``end``, and ``periods``; the frequency is generated 

1046 automatically (linearly spaced). 

1047 

1048 >>> pd.interval_range(start=0, end=6, periods=4) 

1049 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], 

1050 dtype='interval[float64, right]') 

1051 

1052 The ``closed`` parameter specifies which endpoints of the individual 

1053 intervals within the ``IntervalIndex`` are closed. 

1054 

1055 >>> pd.interval_range(end=5, periods=4, closed='both') 

1056 IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], 

1057 dtype='interval[int64, both]') 

1058 """ 

1059 start = maybe_box_datetimelike(start) 

1060 end = maybe_box_datetimelike(end) 

1061 endpoint = start if start is not None else end 

1062 

1063 if freq is None and com.any_none(periods, start, end): 

1064 freq = 1 if is_number(endpoint) else "D" 

1065 

1066 if com.count_not_none(start, end, periods, freq) != 3: 

1067 raise ValueError( 

1068 "Of the four parameters: start, end, periods, and " 

1069 "freq, exactly three must be specified" 

1070 ) 

1071 

1072 if not _is_valid_endpoint(start): 

1073 raise ValueError(f"start must be numeric or datetime-like, got {start}") 

1074 if not _is_valid_endpoint(end): 

1075 raise ValueError(f"end must be numeric or datetime-like, got {end}") 

1076 

1077 if is_float(periods): 

1078 periods = int(periods) 

1079 elif not is_integer(periods) and periods is not None: 

1080 raise TypeError(f"periods must be a number, got {periods}") 

1081 

1082 if freq is not None and not is_number(freq): 

1083 try: 

1084 freq = to_offset(freq) 

1085 except ValueError as err: 

1086 raise ValueError( 

1087 f"freq must be numeric or convertible to DateOffset, got {freq}" 

1088 ) from err 

1089 

1090 # verify type compatibility 

1091 if not all( 

1092 [ 

1093 _is_type_compatible(start, end), 

1094 _is_type_compatible(start, freq), 

1095 _is_type_compatible(end, freq), 

1096 ] 

1097 ): 

1098 raise TypeError("start, end, freq need to be type compatible") 

1099 

1100 # +1 to convert interval count to breaks count (n breaks = n-1 intervals) 

1101 if periods is not None: 

1102 periods += 1 

1103 

1104 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex 

1105 

1106 if is_number(endpoint): 

1107 # force consistency between start/end/freq (lower end if freq skips it) 

1108 if com.all_not_none(start, end, freq): 

1109 end -= (end - start) % freq 

1110 

1111 # compute the period/start/end if unspecified (at most one) 

1112 if periods is None: 

1113 periods = int((end - start) // freq) + 1 

1114 elif start is None: 

1115 start = end - (periods - 1) * freq 

1116 elif end is None: 

1117 end = start + (periods - 1) * freq 

1118 

1119 breaks = np.linspace(start, end, periods) 

1120 if all(is_integer(x) for x in com.not_none(start, end, freq)): 

1121 # np.linspace always produces float output 

1122 

1123 # error: Argument 1 to "maybe_downcast_numeric" has incompatible type 

1124 # "Union[ndarray[Any, Any], TimedeltaIndex, DatetimeIndex]"; 

1125 # expected "ndarray[Any, Any]" [ 

1126 breaks = maybe_downcast_numeric( 

1127 breaks, # type: ignore[arg-type] 

1128 np.dtype("int64"), 

1129 ) 

1130 else: 

1131 # delegate to the appropriate range function 

1132 if isinstance(endpoint, Timestamp): 

1133 breaks = date_range(start=start, end=end, periods=periods, freq=freq) 

1134 else: 

1135 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) 

1136 

1137 return IntervalIndex.from_breaks(breaks, name=name, closed=closed)