1"""
2Base and utility classes for tseries type pandas objects.
3"""
4from __future__ import annotations
5
6from abc import (
7 ABC,
8 abstractmethod,
9)
10from datetime import datetime
11from typing import (
12 TYPE_CHECKING,
13 Any,
14 Callable,
15 Sequence,
16 TypeVar,
17 cast,
18 final,
19)
20
21import numpy as np
22
23from pandas._libs import (
24 NaT,
25 Timedelta,
26 lib,
27)
28from pandas._libs.tslibs import (
29 BaseOffset,
30 Resolution,
31 Tick,
32 parsing,
33 to_offset,
34)
35from pandas._typing import (
36 Axis,
37 npt,
38)
39from pandas.compat.numpy import function as nv
40from pandas.errors import NullFrequencyError
41from pandas.util._decorators import (
42 Appender,
43 cache_readonly,
44 doc,
45)
46
47from pandas.core.dtypes.common import (
48 is_categorical_dtype,
49 is_dtype_equal,
50 is_integer,
51 is_list_like,
52)
53from pandas.core.dtypes.concat import concat_compat
54
55from pandas.core.arrays import (
56 DatetimeArray,
57 ExtensionArray,
58 PeriodArray,
59 TimedeltaArray,
60)
61from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
62import pandas.core.common as com
63import pandas.core.indexes.base as ibase
64from pandas.core.indexes.base import (
65 Index,
66 _index_shared_docs,
67)
68from pandas.core.indexes.extension import NDArrayBackedExtensionIndex
69from pandas.core.indexes.range import RangeIndex
70from pandas.core.tools.timedeltas import to_timedelta
71
72if TYPE_CHECKING:
73 from pandas import CategoricalIndex
74
75_index_doc_kwargs = dict(ibase._index_doc_kwargs)
76
77_T = TypeVar("_T", bound="DatetimeIndexOpsMixin")
78_TDT = TypeVar("_TDT", bound="DatetimeTimedeltaMixin")
79
80
81class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex, ABC):
82 """
83 Common ops mixin to support a unified interface datetimelike Index.
84 """
85
86 _can_hold_strings = False
87 _data: DatetimeArray | TimedeltaArray | PeriodArray
88
89 @doc(DatetimeLikeArrayMixin.mean)
90 def mean(self, *, skipna: bool = True, axis: int | None = 0):
91 return self._data.mean(skipna=skipna, axis=axis)
92
93 @property
94 def freq(self) -> BaseOffset | None:
95 return self._data.freq
96
97 @freq.setter
98 def freq(self, value) -> None:
99 # error: Property "freq" defined in "PeriodArray" is read-only [misc]
100 self._data.freq = value # type: ignore[misc]
101
102 @property
103 def asi8(self) -> npt.NDArray[np.int64]:
104 return self._data.asi8
105
106 @property
107 @doc(DatetimeLikeArrayMixin.freqstr)
108 def freqstr(self) -> str | None:
109 return self._data.freqstr
110
111 @cache_readonly
112 @abstractmethod
113 def _resolution_obj(self) -> Resolution:
114 ...
115
116 @cache_readonly
117 @doc(DatetimeLikeArrayMixin.resolution)
118 def resolution(self) -> str:
119 return self._data.resolution
120
121 # ------------------------------------------------------------------------
122
123 @cache_readonly
124 def hasnans(self) -> bool:
125 return self._data._hasna
126
127 def equals(self, other: Any) -> bool:
128 """
129 Determines if two Index objects contain the same elements.
130 """
131 if self.is_(other):
132 return True
133
134 if not isinstance(other, Index):
135 return False
136 elif other.dtype.kind in ["f", "i", "u", "c"]:
137 return False
138 elif not isinstance(other, type(self)):
139 should_try = False
140 inferable = self._data._infer_matches
141 if other.dtype == object:
142 should_try = other.inferred_type in inferable
143 elif is_categorical_dtype(other.dtype):
144 other = cast("CategoricalIndex", other)
145 should_try = other.categories.inferred_type in inferable
146
147 if should_try:
148 try:
149 other = type(self)(other)
150 except (ValueError, TypeError, OverflowError):
151 # e.g.
152 # ValueError -> cannot parse str entry, or OutOfBoundsDatetime
153 # TypeError -> trying to convert IntervalIndex to DatetimeIndex
154 # OverflowError -> Index([very_large_timedeltas])
155 return False
156
157 if not is_dtype_equal(self.dtype, other.dtype):
158 # have different timezone
159 return False
160
161 return np.array_equal(self.asi8, other.asi8)
162
163 @Appender(Index.__contains__.__doc__)
164 def __contains__(self, key: Any) -> bool:
165 hash(key)
166 try:
167 self.get_loc(key)
168 except (KeyError, TypeError, ValueError):
169 return False
170 return True
171
172 def _convert_tolerance(self, tolerance, target):
173 tolerance = np.asarray(to_timedelta(tolerance).to_numpy())
174 return super()._convert_tolerance(tolerance, target)
175
176 # --------------------------------------------------------------------
177 # Rendering Methods
178
179 def format(
180 self,
181 name: bool = False,
182 formatter: Callable | None = None,
183 na_rep: str = "NaT",
184 date_format: str | None = None,
185 ) -> list[str]:
186 """
187 Render a string representation of the Index.
188 """
189 header = []
190 if name:
191 header.append(
192 ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
193 if self.name is not None
194 else ""
195 )
196
197 if formatter is not None:
198 return header + list(self.map(formatter))
199
200 return self._format_with_header(header, na_rep=na_rep, date_format=date_format)
201
202 def _format_with_header(
203 self, header: list[str], na_rep: str = "NaT", date_format: str | None = None
204 ) -> list[str]:
205 # matches base class except for whitespace padding and date_format
206 return header + list(
207 self._format_native_types(na_rep=na_rep, date_format=date_format)
208 )
209
210 @property
211 def _formatter_func(self):
212 return self._data._formatter()
213
214 def _format_attrs(self):
215 """
216 Return a list of tuples of the (attr,formatted_value).
217 """
218 attrs = super()._format_attrs()
219 for attrib in self._attributes:
220 # iterating over _attributes prevents us from doing this for PeriodIndex
221 if attrib == "freq":
222 freq = self.freqstr
223 if freq is not None:
224 freq = repr(freq) # e.g. D -> 'D'
225 attrs.append(("freq", freq))
226 return attrs
227
228 @Appender(Index._summary.__doc__)
229 def _summary(self, name=None) -> str:
230 result = super()._summary(name=name)
231 if self.freq:
232 result += f"\nFreq: {self.freqstr}"
233
234 return result
235
236 # --------------------------------------------------------------------
237 # Indexing Methods
238
239 @final
240 def _can_partial_date_slice(self, reso: Resolution) -> bool:
241 # e.g. test_getitem_setitem_periodindex
242 # History of conversation GH#3452, GH#3931, GH#2369, GH#14826
243 return reso > self._resolution_obj
244 # NB: for DTI/PI, not TDI
245
246 def _parsed_string_to_bounds(self, reso: Resolution, parsed):
247 raise NotImplementedError
248
249 def _parse_with_reso(self, label: str):
250 # overridden by TimedeltaIndex
251 try:
252 if self.freq is None or hasattr(self.freq, "rule_code"):
253 freq = self.freq
254 except NotImplementedError:
255 freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
256
257 freqstr: str | None
258 if freq is not None and not isinstance(freq, str):
259 freqstr = freq.rule_code
260 else:
261 freqstr = freq
262
263 if isinstance(label, np.str_):
264 # GH#45580
265 label = str(label)
266
267 parsed, reso_str = parsing.parse_datetime_string_with_reso(label, freqstr)
268 reso = Resolution.from_attrname(reso_str)
269 return parsed, reso
270
271 def _get_string_slice(self, key: str):
272 # overridden by TimedeltaIndex
273 parsed, reso = self._parse_with_reso(key)
274 try:
275 return self._partial_date_slice(reso, parsed)
276 except KeyError as err:
277 raise KeyError(key) from err
278
279 @final
280 def _partial_date_slice(
281 self,
282 reso: Resolution,
283 parsed: datetime,
284 ):
285 """
286 Parameters
287 ----------
288 reso : Resolution
289 parsed : datetime
290
291 Returns
292 -------
293 slice or ndarray[intp]
294 """
295 if not self._can_partial_date_slice(reso):
296 raise ValueError
297
298 t1, t2 = self._parsed_string_to_bounds(reso, parsed)
299 vals = self._data._ndarray
300 unbox = self._data._unbox
301
302 if self.is_monotonic_increasing:
303 if len(self) and (
304 (t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1])
305 ):
306 # we are out of range
307 raise KeyError
308
309 # TODO: does this depend on being monotonic _increasing_?
310
311 # a monotonic (sorted) series can be sliced
312 left = vals.searchsorted(unbox(t1), side="left")
313 right = vals.searchsorted(unbox(t2), side="right")
314 return slice(left, right)
315
316 else:
317 lhs_mask = vals >= unbox(t1)
318 rhs_mask = vals <= unbox(t2)
319
320 # try to find the dates
321 return (lhs_mask & rhs_mask).nonzero()[0]
322
323 def _maybe_cast_slice_bound(self, label, side: str):
324 """
325 If label is a string, cast it to scalar type according to resolution.
326
327 Parameters
328 ----------
329 label : object
330 side : {'left', 'right'}
331
332 Returns
333 -------
334 label : object
335
336 Notes
337 -----
338 Value of `side` parameter should be validated in caller.
339 """
340 if isinstance(label, str):
341 try:
342 parsed, reso = self._parse_with_reso(label)
343 except ValueError as err:
344 # DTI -> parsing.DateParseError
345 # TDI -> 'unit abbreviation w/o a number'
346 # PI -> string cannot be parsed as datetime-like
347 self._raise_invalid_indexer("slice", label, err)
348
349 lower, upper = self._parsed_string_to_bounds(reso, parsed)
350 return lower if side == "left" else upper
351 elif not isinstance(label, self._data._recognized_scalars):
352 self._raise_invalid_indexer("slice", label)
353
354 return label
355
356 # --------------------------------------------------------------------
357 # Arithmetic Methods
358
359 def shift(self: _T, periods: int = 1, freq=None) -> _T:
360 """
361 Shift index by desired number of time frequency increments.
362
363 This method is for shifting the values of datetime-like indexes
364 by a specified time increment a given number of times.
365
366 Parameters
367 ----------
368 periods : int, default 1
369 Number of periods (or increments) to shift by,
370 can be positive or negative.
371 freq : pandas.DateOffset, pandas.Timedelta or string, optional
372 Frequency increment to shift by.
373 If None, the index is shifted by its own `freq` attribute.
374 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
375
376 Returns
377 -------
378 pandas.DatetimeIndex
379 Shifted index.
380
381 See Also
382 --------
383 Index.shift : Shift values of Index.
384 PeriodIndex.shift : Shift values of PeriodIndex.
385 """
386 raise NotImplementedError
387
388 # --------------------------------------------------------------------
389
390 @doc(Index._maybe_cast_listlike_indexer)
391 def _maybe_cast_listlike_indexer(self, keyarr):
392 try:
393 res = self._data._validate_listlike(keyarr, allow_object=True)
394 except (ValueError, TypeError):
395 if not isinstance(keyarr, ExtensionArray):
396 # e.g. we don't want to cast DTA to ndarray[object]
397 res = com.asarray_tuplesafe(keyarr)
398 # TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray
399 else:
400 res = keyarr
401 return Index(res, dtype=res.dtype)
402
403
404class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, ABC):
405 """
406 Mixin class for methods shared by DatetimeIndex and TimedeltaIndex,
407 but not PeriodIndex
408 """
409
410 _data: DatetimeArray | TimedeltaArray
411 _comparables = ["name", "freq"]
412 _attributes = ["name", "freq"]
413
414 # Compat for frequency inference, see GH#23789
415 _is_monotonic_increasing = Index.is_monotonic_increasing
416 _is_monotonic_decreasing = Index.is_monotonic_decreasing
417 _is_unique = Index.is_unique
418
419 _join_precedence = 10
420
421 @property
422 def unit(self) -> str:
423 return self._data.unit
424
425 def as_unit(self: _TDT, unit: str) -> _TDT:
426 """
427 Convert to a dtype with the given unit resolution.
428
429 Parameters
430 ----------
431 unit : {'s', 'ms', 'us', 'ns'}
432
433 Returns
434 -------
435 same type as self
436 """
437 arr = self._data.as_unit(unit)
438 return type(self)._simple_new(arr, name=self.name)
439
440 def _with_freq(self, freq):
441 arr = self._data._with_freq(freq)
442 return type(self)._simple_new(arr, name=self._name)
443
444 @property
445 def values(self) -> np.ndarray:
446 # NB: For Datetime64TZ this is lossy
447 return self._data._ndarray
448
449 @doc(DatetimeIndexOpsMixin.shift)
450 def shift(self: _TDT, periods: int = 1, freq=None) -> _TDT:
451 if freq is not None and freq != self.freq:
452 if isinstance(freq, str):
453 freq = to_offset(freq)
454 offset = periods * freq
455 return self + offset
456
457 if periods == 0 or len(self) == 0:
458 # GH#14811 empty case
459 return self.copy()
460
461 if self.freq is None:
462 raise NullFrequencyError("Cannot shift with no freq")
463
464 start = self[0] + periods * self.freq
465 end = self[-1] + periods * self.freq
466
467 # Note: in the DatetimeTZ case, _generate_range will infer the
468 # appropriate timezone from `start` and `end`, so tz does not need
469 # to be passed explicitly.
470 result = self._data._generate_range(
471 start=start, end=end, periods=None, freq=self.freq
472 )
473 return type(self)._simple_new(result, name=self.name)
474
475 @cache_readonly
476 @doc(DatetimeLikeArrayMixin.inferred_freq)
477 def inferred_freq(self) -> str | None:
478 return self._data.inferred_freq
479
480 # --------------------------------------------------------------------
481 # Set Operation Methods
482
483 @cache_readonly
484 def _as_range_index(self) -> RangeIndex:
485 # Convert our i8 representations to RangeIndex
486 # Caller is responsible for checking isinstance(self.freq, Tick)
487 freq = cast(Tick, self.freq)
488 tick = freq.delta._value
489 rng = range(self[0]._value, self[-1]._value + tick, tick)
490 return RangeIndex(rng)
491
492 def _can_range_setop(self, other):
493 return isinstance(self.freq, Tick) and isinstance(other.freq, Tick)
494
495 def _wrap_range_setop(self, other, res_i8):
496 new_freq = None
497 if not len(res_i8):
498 # RangeIndex defaults to step=1, which we don't want.
499 new_freq = self.freq
500 elif isinstance(res_i8, RangeIndex):
501 new_freq = to_offset(Timedelta(res_i8.step))
502
503 # TODO(GH#41493): we cannot just do
504 # type(self._data)(res_i8.values, dtype=self.dtype, freq=new_freq)
505 # because test_setops_preserve_freq fails with _validate_frequency raising.
506 # This raising is incorrect, as 'on_freq' is incorrect. This will
507 # be fixed by GH#41493
508 res_values = res_i8.values.view(self._data._ndarray.dtype)
509 result = type(self._data)._simple_new(
510 res_values, dtype=self.dtype, freq=new_freq
511 )
512 return self._wrap_setop_result(other, result)
513
514 def _range_intersect(self, other, sort):
515 # Dispatch to RangeIndex intersection logic.
516 left = self._as_range_index
517 right = other._as_range_index
518 res_i8 = left.intersection(right, sort=sort)
519 return self._wrap_range_setop(other, res_i8)
520
521 def _range_union(self, other, sort):
522 # Dispatch to RangeIndex union logic.
523 left = self._as_range_index
524 right = other._as_range_index
525 res_i8 = left.union(right, sort=sort)
526 return self._wrap_range_setop(other, res_i8)
527
528 def _intersection(self, other: Index, sort: bool = False) -> Index:
529 """
530 intersection specialized to the case with matching dtypes and both non-empty.
531 """
532 other = cast("DatetimeTimedeltaMixin", other)
533
534 if self._can_range_setop(other):
535 return self._range_intersect(other, sort=sort)
536
537 if not self._can_fast_intersect(other):
538 result = Index._intersection(self, other, sort=sort)
539 # We need to invalidate the freq because Index._intersection
540 # uses _shallow_copy on a view of self._data, which will preserve
541 # self.freq if we're not careful.
542 # At this point we should have result.dtype == self.dtype
543 # and type(result) is type(self._data)
544 result = self._wrap_setop_result(other, result)
545 return result._with_freq(None)._with_freq("infer")
546
547 else:
548 return self._fast_intersect(other, sort)
549
550 def _fast_intersect(self, other, sort):
551 # to make our life easier, "sort" the two ranges
552 if self[0] <= other[0]:
553 left, right = self, other
554 else:
555 left, right = other, self
556
557 # after sorting, the intersection always starts with the right index
558 # and ends with the index of which the last elements is smallest
559 end = min(left[-1], right[-1])
560 start = right[0]
561
562 if end < start:
563 result = self[:0]
564 else:
565 lslice = slice(*left.slice_locs(start, end))
566 result = left._values[lslice]
567
568 return result
569
570 def _can_fast_intersect(self: _T, other: _T) -> bool:
571 # Note: we only get here with len(self) > 0 and len(other) > 0
572 if self.freq is None:
573 return False
574
575 elif other.freq != self.freq:
576 return False
577
578 elif not self.is_monotonic_increasing:
579 # Because freq is not None, we must then be monotonic decreasing
580 return False
581
582 # this along with matching freqs ensure that we "line up",
583 # so intersection will preserve freq
584 # Note we are assuming away Ticks, as those go through _range_intersect
585 # GH#42104
586 return self.freq.n == 1
587
588 def _can_fast_union(self: _T, other: _T) -> bool:
589 # Assumes that type(self) == type(other), as per the annotation
590 # The ability to fast_union also implies that `freq` should be
591 # retained on union.
592 freq = self.freq
593
594 if freq is None or freq != other.freq:
595 return False
596
597 if not self.is_monotonic_increasing:
598 # Because freq is not None, we must then be monotonic decreasing
599 # TODO: do union on the reversed indexes?
600 return False
601
602 if len(self) == 0 or len(other) == 0:
603 # only reached via union_many
604 return True
605
606 # to make our life easier, "sort" the two ranges
607 if self[0] <= other[0]:
608 left, right = self, other
609 else:
610 left, right = other, self
611
612 right_start = right[0]
613 left_end = left[-1]
614
615 # Only need to "adjoin", not overlap
616 return (right_start == left_end + freq) or right_start in left
617
618 def _fast_union(self: _TDT, other: _TDT, sort=None) -> _TDT:
619 # Caller is responsible for ensuring self and other are non-empty
620
621 # to make our life easier, "sort" the two ranges
622 if self[0] <= other[0]:
623 left, right = self, other
624 elif sort is False:
625 # TDIs are not in the "correct" order and we don't want
626 # to sort but want to remove overlaps
627 left, right = self, other
628 left_start = left[0]
629 loc = right.searchsorted(left_start, side="left")
630 right_chunk = right._values[:loc]
631 dates = concat_compat((left._values, right_chunk))
632 result = type(self)._simple_new(dates, name=self.name)
633 return result
634 else:
635 left, right = other, self
636
637 left_end = left[-1]
638 right_end = right[-1]
639
640 # concatenate
641 if left_end < right_end:
642 loc = right.searchsorted(left_end, side="right")
643 right_chunk = right._values[loc:]
644 dates = concat_compat([left._values, right_chunk])
645 # The can_fast_union check ensures that the result.freq
646 # should match self.freq
647 dates = type(self._data)(dates, freq=self.freq)
648 result = type(self)._simple_new(dates)
649 return result
650 else:
651 return left
652
653 def _union(self, other, sort):
654 # We are called by `union`, which is responsible for this validation
655 assert isinstance(other, type(self))
656 assert self.dtype == other.dtype
657
658 if self._can_range_setop(other):
659 return self._range_union(other, sort=sort)
660
661 if self._can_fast_union(other):
662 result = self._fast_union(other, sort=sort)
663 # in the case with sort=None, the _can_fast_union check ensures
664 # that result.freq == self.freq
665 return result
666 else:
667 return super()._union(other, sort)._with_freq("infer")
668
669 # --------------------------------------------------------------------
670 # Join Methods
671
672 def _get_join_freq(self, other):
673 """
674 Get the freq to attach to the result of a join operation.
675 """
676 freq = None
677 if self._can_fast_union(other):
678 freq = self.freq
679 return freq
680
681 def _wrap_joined_index(
682 self, joined, other, lidx: npt.NDArray[np.intp], ridx: npt.NDArray[np.intp]
683 ):
684 assert other.dtype == self.dtype, (other.dtype, self.dtype)
685 result = super()._wrap_joined_index(joined, other, lidx, ridx)
686 result._data._freq = self._get_join_freq(other)
687 return result
688
689 def _get_engine_target(self) -> np.ndarray:
690 # engine methods and libjoin methods need dt64/td64 values cast to i8
691 return self._data._ndarray.view("i8")
692
693 def _from_join_target(self, result: np.ndarray):
694 # view e.g. i8 back to M8[ns]
695 result = result.view(self._data._ndarray.dtype)
696 return self._data._from_backing_data(result)
697
698 # --------------------------------------------------------------------
699 # List-like Methods
700
701 def _get_delete_freq(self, loc: int | slice | Sequence[int]):
702 """
703 Find the `freq` for self.delete(loc).
704 """
705 freq = None
706 if self.freq is not None:
707 if is_integer(loc):
708 if loc in (0, -len(self), -1, len(self) - 1):
709 freq = self.freq
710 else:
711 if is_list_like(loc):
712 # error: Incompatible types in assignment (expression has
713 # type "Union[slice, ndarray]", variable has type
714 # "Union[int, slice, Sequence[int]]")
715 loc = lib.maybe_indices_to_slice( # type: ignore[assignment]
716 np.asarray(loc, dtype=np.intp), len(self)
717 )
718 if isinstance(loc, slice) and loc.step in (1, None):
719 if loc.start in (0, None) or loc.stop in (len(self), None):
720 freq = self.freq
721 return freq
722
723 def _get_insert_freq(self, loc: int, item):
724 """
725 Find the `freq` for self.insert(loc, item).
726 """
727 value = self._data._validate_scalar(item)
728 item = self._data._box_func(value)
729
730 freq = None
731 if self.freq is not None:
732 # freq can be preserved on edge cases
733 if self.size:
734 if item is NaT:
735 pass
736 elif loc in (0, -len(self)) and item + self.freq == self[0]:
737 freq = self.freq
738 elif (loc == len(self)) and item - self.freq == self[-1]:
739 freq = self.freq
740 else:
741 # Adding a single item to an empty index may preserve freq
742 if isinstance(self.freq, Tick):
743 # all TimedeltaIndex cases go through here; is_on_offset
744 # would raise TypeError
745 freq = self.freq
746 elif self.freq.is_on_offset(item):
747 freq = self.freq
748 return freq
749
750 @doc(NDArrayBackedExtensionIndex.delete)
751 def delete(self, loc) -> DatetimeTimedeltaMixin:
752 result = super().delete(loc)
753 result._data._freq = self._get_delete_freq(loc)
754 return result
755
756 @doc(NDArrayBackedExtensionIndex.insert)
757 def insert(self, loc: int, item):
758 result = super().insert(loc, item)
759 if isinstance(result, type(self)):
760 # i.e. parent class method did not cast
761 result._data._freq = self._get_insert_freq(loc, item)
762 return result
763
764 # --------------------------------------------------------------------
765 # NDArray-Like Methods
766
767 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
768 def take(
769 self,
770 indices,
771 axis: Axis = 0,
772 allow_fill: bool = True,
773 fill_value=None,
774 **kwargs,
775 ):
776 nv.validate_take((), kwargs)
777 indices = np.asarray(indices, dtype=np.intp)
778
779 result = NDArrayBackedExtensionIndex.take(
780 self, indices, axis, allow_fill, fill_value, **kwargs
781 )
782
783 maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
784 if isinstance(maybe_slice, slice):
785 freq = self._data._get_getitem_freq(maybe_slice)
786 result._data._freq = freq
787 return result