1from __future__ import annotations
2
3from datetime import (
4 datetime,
5 timedelta,
6)
7from functools import wraps
8import operator
9from typing import (
10 TYPE_CHECKING,
11 Any,
12 Callable,
13 Literal,
14 Union,
15 cast,
16 final,
17 overload,
18)
19import warnings
20
21import numpy as np
22
23from pandas._libs import (
24 algos,
25 lib,
26)
27from pandas._libs.arrays import NDArrayBacked
28from pandas._libs.tslibs import (
29 BaseOffset,
30 IncompatibleFrequency,
31 NaT,
32 NaTType,
33 Period,
34 Resolution,
35 Tick,
36 Timedelta,
37 Timestamp,
38 add_overflowsafe,
39 astype_overflowsafe,
40 get_unit_from_dtype,
41 iNaT,
42 ints_to_pydatetime,
43 ints_to_pytimedelta,
44 periods_per_day,
45 to_offset,
46)
47from pandas._libs.tslibs.fields import (
48 RoundTo,
49 round_nsint64,
50)
51from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions
52from pandas._libs.tslibs.timedeltas import get_unit_for_round
53from pandas._libs.tslibs.timestamps import integer_op_not_supported
54from pandas._typing import (
55 ArrayLike,
56 AxisInt,
57 DatetimeLikeScalar,
58 Dtype,
59 DtypeObj,
60 F,
61 InterpolateOptions,
62 NpDtype,
63 PositionalIndexer2D,
64 PositionalIndexerTuple,
65 ScalarIndexer,
66 Self,
67 SequenceIndexer,
68 TimeAmbiguous,
69 TimeNonexistent,
70 npt,
71)
72from pandas.compat.numpy import function as nv
73from pandas.errors import (
74 AbstractMethodError,
75 InvalidComparison,
76 PerformanceWarning,
77)
78from pandas.util._decorators import (
79 Appender,
80 Substitution,
81 cache_readonly,
82)
83from pandas.util._exceptions import find_stack_level
84
85from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
86from pandas.core.dtypes.common import (
87 is_all_strings,
88 is_integer_dtype,
89 is_list_like,
90 is_object_dtype,
91 is_string_dtype,
92 pandas_dtype,
93)
94from pandas.core.dtypes.dtypes import (
95 ArrowDtype,
96 CategoricalDtype,
97 DatetimeTZDtype,
98 ExtensionDtype,
99 PeriodDtype,
100)
101from pandas.core.dtypes.generic import (
102 ABCCategorical,
103 ABCMultiIndex,
104)
105from pandas.core.dtypes.missing import (
106 is_valid_na_for_dtype,
107 isna,
108)
109
110from pandas.core import (
111 algorithms,
112 missing,
113 nanops,
114 ops,
115)
116from pandas.core.algorithms import (
117 isin,
118 map_array,
119 unique1d,
120)
121from pandas.core.array_algos import datetimelike_accumulations
122from pandas.core.arraylike import OpsMixin
123from pandas.core.arrays._mixins import (
124 NDArrayBackedExtensionArray,
125 ravel_compat,
126)
127from pandas.core.arrays.arrow.array import ArrowExtensionArray
128from pandas.core.arrays.base import ExtensionArray
129from pandas.core.arrays.integer import IntegerArray
130import pandas.core.common as com
131from pandas.core.construction import (
132 array as pd_array,
133 ensure_wrapped_if_datetimelike,
134 extract_array,
135)
136from pandas.core.indexers import (
137 check_array_indexer,
138 check_setitem_lengths,
139)
140from pandas.core.ops.common import unpack_zerodim_and_defer
141from pandas.core.ops.invalid import (
142 invalid_comparison,
143 make_invalid_op,
144)
145
146from pandas.tseries import frequencies
147
148if TYPE_CHECKING:
149 from collections.abc import (
150 Iterator,
151 Sequence,
152 )
153
154 from pandas import Index
155 from pandas.core.arrays import (
156 DatetimeArray,
157 PeriodArray,
158 TimedeltaArray,
159 )
160
161DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType]
162
163
164def _make_unpacked_invalid_op(op_name: str):
165 op = make_invalid_op(op_name)
166 return unpack_zerodim_and_defer(op_name)(op)
167
168
169def _period_dispatch(meth: F) -> F:
170 """
171 For PeriodArray methods, dispatch to DatetimeArray and re-wrap the results
172 in PeriodArray. We cannot use ._ndarray directly for the affected
173 methods because the i8 data has different semantics on NaT values.
174 """
175
176 @wraps(meth)
177 def new_meth(self, *args, **kwargs):
178 if not isinstance(self.dtype, PeriodDtype):
179 return meth(self, *args, **kwargs)
180
181 arr = self.view("M8[ns]")
182 result = meth(arr, *args, **kwargs)
183 if result is NaT:
184 return NaT
185 elif isinstance(result, Timestamp):
186 return self._box_func(result._value)
187
188 res_i8 = result.view("i8")
189 return self._from_backing_data(res_i8)
190
191 return cast(F, new_meth)
192
193
194# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
195# incompatible with definition in base class "ExtensionArray"
196class DatetimeLikeArrayMixin( # type: ignore[misc]
197 OpsMixin, NDArrayBackedExtensionArray
198):
199 """
200 Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
201
202 Assumes that __new__/__init__ defines:
203 _ndarray
204
205 and that inheriting subclass implements:
206 freq
207 """
208
209 # _infer_matches -> which infer_dtype strings are close enough to our own
210 _infer_matches: tuple[str, ...]
211 _is_recognized_dtype: Callable[[DtypeObj], bool]
212 _recognized_scalars: tuple[type, ...]
213 _ndarray: np.ndarray
214 freq: BaseOffset | None
215
216 @cache_readonly
217 def _can_hold_na(self) -> bool:
218 return True
219
220 def __init__(
221 self, data, dtype: Dtype | None = None, freq=None, copy: bool = False
222 ) -> None:
223 raise AbstractMethodError(self)
224
225 @property
226 def _scalar_type(self) -> type[DatetimeLikeScalar]:
227 """
228 The scalar associated with this datelike
229
230 * PeriodArray : Period
231 * DatetimeArray : Timestamp
232 * TimedeltaArray : Timedelta
233 """
234 raise AbstractMethodError(self)
235
236 def _scalar_from_string(self, value: str) -> DTScalarOrNaT:
237 """
238 Construct a scalar type from a string.
239
240 Parameters
241 ----------
242 value : str
243
244 Returns
245 -------
246 Period, Timestamp, or Timedelta, or NaT
247 Whatever the type of ``self._scalar_type`` is.
248
249 Notes
250 -----
251 This should call ``self._check_compatible_with`` before
252 unboxing the result.
253 """
254 raise AbstractMethodError(self)
255
256 def _unbox_scalar(
257 self, value: DTScalarOrNaT
258 ) -> np.int64 | np.datetime64 | np.timedelta64:
259 """
260 Unbox the integer value of a scalar `value`.
261
262 Parameters
263 ----------
264 value : Period, Timestamp, Timedelta, or NaT
265 Depending on subclass.
266
267 Returns
268 -------
269 int
270
271 Examples
272 --------
273 >>> arr = pd.array(np.array(['1970-01-01'], 'datetime64[ns]'))
274 >>> arr._unbox_scalar(arr[0])
275 numpy.datetime64('1970-01-01T00:00:00.000000000')
276 """
277 raise AbstractMethodError(self)
278
279 def _check_compatible_with(self, other: DTScalarOrNaT) -> None:
280 """
281 Verify that `self` and `other` are compatible.
282
283 * DatetimeArray verifies that the timezones (if any) match
284 * PeriodArray verifies that the freq matches
285 * Timedelta has no verification
286
287 In each case, NaT is considered compatible.
288
289 Parameters
290 ----------
291 other
292
293 Raises
294 ------
295 Exception
296 """
297 raise AbstractMethodError(self)
298
299 # ------------------------------------------------------------------
300
301 def _box_func(self, x):
302 """
303 box function to get object from internal representation
304 """
305 raise AbstractMethodError(self)
306
307 def _box_values(self, values) -> np.ndarray:
308 """
309 apply box func to passed values
310 """
311 return lib.map_infer(values, self._box_func, convert=False)
312
313 def __iter__(self) -> Iterator:
314 if self.ndim > 1:
315 return (self[n] for n in range(len(self)))
316 else:
317 return (self._box_func(v) for v in self.asi8)
318
319 @property
320 def asi8(self) -> npt.NDArray[np.int64]:
321 """
322 Integer representation of the values.
323
324 Returns
325 -------
326 ndarray
327 An ndarray with int64 dtype.
328 """
329 # do not cache or you'll create a memory leak
330 return self._ndarray.view("i8")
331
332 # ----------------------------------------------------------------
333 # Rendering Methods
334
335 def _format_native_types(
336 self, *, na_rep: str | float = "NaT", date_format=None
337 ) -> npt.NDArray[np.object_]:
338 """
339 Helper method for astype when converting to strings.
340
341 Returns
342 -------
343 ndarray[str]
344 """
345 raise AbstractMethodError(self)
346
347 def _formatter(self, boxed: bool = False):
348 # TODO: Remove Datetime & DatetimeTZ formatters.
349 return "'{}'".format
350
351 # ----------------------------------------------------------------
352 # Array-Like / EA-Interface Methods
353
354 def __array__(
355 self, dtype: NpDtype | None = None, copy: bool | None = None
356 ) -> np.ndarray:
357 # used for Timedelta/DatetimeArray, overwritten by PeriodArray
358 if is_object_dtype(dtype):
359 return np.array(list(self), dtype=object)
360 return self._ndarray
361
362 @overload
363 def __getitem__(self, item: ScalarIndexer) -> DTScalarOrNaT:
364 ...
365
366 @overload
367 def __getitem__(
368 self,
369 item: SequenceIndexer | PositionalIndexerTuple,
370 ) -> Self:
371 ...
372
373 def __getitem__(self, key: PositionalIndexer2D) -> Self | DTScalarOrNaT:
374 """
375 This getitem defers to the underlying array, which by-definition can
376 only handle list-likes, slices, and integer scalars
377 """
378 # Use cast as we know we will get back a DatetimeLikeArray or DTScalar,
379 # but skip evaluating the Union at runtime for performance
380 # (see https://github.com/pandas-dev/pandas/pull/44624)
381 result = cast("Union[Self, DTScalarOrNaT]", super().__getitem__(key))
382 if lib.is_scalar(result):
383 return result
384 else:
385 # At this point we know the result is an array.
386 result = cast(Self, result)
387 result._freq = self._get_getitem_freq(key)
388 return result
389
390 def _get_getitem_freq(self, key) -> BaseOffset | None:
391 """
392 Find the `freq` attribute to assign to the result of a __getitem__ lookup.
393 """
394 is_period = isinstance(self.dtype, PeriodDtype)
395 if is_period:
396 freq = self.freq
397 elif self.ndim != 1:
398 freq = None
399 else:
400 key = check_array_indexer(self, key) # maybe ndarray[bool] -> slice
401 freq = None
402 if isinstance(key, slice):
403 if self.freq is not None and key.step is not None:
404 freq = key.step * self.freq
405 else:
406 freq = self.freq
407 elif key is Ellipsis:
408 # GH#21282 indexing with Ellipsis is similar to a full slice,
409 # should preserve `freq` attribute
410 freq = self.freq
411 elif com.is_bool_indexer(key):
412 new_key = lib.maybe_booleans_to_slice(key.view(np.uint8))
413 if isinstance(new_key, slice):
414 return self._get_getitem_freq(new_key)
415 return freq
416
417 # error: Argument 1 of "__setitem__" is incompatible with supertype
418 # "ExtensionArray"; supertype defines the argument type as "Union[int,
419 # ndarray]"
420 def __setitem__(
421 self,
422 key: int | Sequence[int] | Sequence[bool] | slice,
423 value: NaTType | Any | Sequence[Any],
424 ) -> None:
425 # I'm fudging the types a bit here. "Any" above really depends
426 # on type(self). For PeriodArray, it's Period (or stuff coercible
427 # to a period in from_sequence). For DatetimeArray, it's Timestamp...
428 # I don't know if mypy can do that, possibly with Generics.
429 # https://mypy.readthedocs.io/en/latest/generics.html
430
431 no_op = check_setitem_lengths(key, value, self)
432
433 # Calling super() before the no_op short-circuit means that we raise
434 # on invalid 'value' even if this is a no-op, e.g. wrong-dtype empty array.
435 super().__setitem__(key, value)
436
437 if no_op:
438 return
439
440 self._maybe_clear_freq()
441
442 def _maybe_clear_freq(self) -> None:
443 # inplace operations like __setitem__ may invalidate the freq of
444 # DatetimeArray and TimedeltaArray
445 pass
446
447 def astype(self, dtype, copy: bool = True):
448 # Some notes on cases we don't have to handle here in the base class:
449 # 1. PeriodArray.astype handles period -> period
450 # 2. DatetimeArray.astype handles conversion between tz.
451 # 3. DatetimeArray.astype handles datetime -> period
452 dtype = pandas_dtype(dtype)
453
454 if dtype == object:
455 if self.dtype.kind == "M":
456 self = cast("DatetimeArray", self)
457 # *much* faster than self._box_values
458 # for e.g. test_get_loc_tuple_monotonic_above_size_cutoff
459 i8data = self.asi8
460 converted = ints_to_pydatetime(
461 i8data,
462 tz=self.tz,
463 box="timestamp",
464 reso=self._creso,
465 )
466 return converted
467
468 elif self.dtype.kind == "m":
469 return ints_to_pytimedelta(self._ndarray, box=True)
470
471 return self._box_values(self.asi8.ravel()).reshape(self.shape)
472
473 elif isinstance(dtype, ExtensionDtype):
474 return super().astype(dtype, copy=copy)
475 elif is_string_dtype(dtype):
476 return self._format_native_types()
477 elif dtype.kind in "iu":
478 # we deliberately ignore int32 vs. int64 here.
479 # See https://github.com/pandas-dev/pandas/issues/24381 for more.
480 values = self.asi8
481 if dtype != np.int64:
482 raise TypeError(
483 f"Converting from {self.dtype} to {dtype} is not supported. "
484 "Do obj.astype('int64').astype(dtype) instead"
485 )
486
487 if copy:
488 values = values.copy()
489 return values
490 elif (dtype.kind in "mM" and self.dtype != dtype) or dtype.kind == "f":
491 # disallow conversion between datetime/timedelta,
492 # and conversions for any datetimelike to float
493 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
494 raise TypeError(msg)
495 else:
496 return np.asarray(self, dtype=dtype)
497
498 @overload
499 def view(self) -> Self:
500 ...
501
502 @overload
503 def view(self, dtype: Literal["M8[ns]"]) -> DatetimeArray:
504 ...
505
506 @overload
507 def view(self, dtype: Literal["m8[ns]"]) -> TimedeltaArray:
508 ...
509
510 @overload
511 def view(self, dtype: Dtype | None = ...) -> ArrayLike:
512 ...
513
514 # pylint: disable-next=useless-parent-delegation
515 def view(self, dtype: Dtype | None = None) -> ArrayLike:
516 # we need to explicitly call super() method as long as the `@overload`s
517 # are present in this file.
518 return super().view(dtype)
519
520 # ------------------------------------------------------------------
521 # Validation Methods
522 # TODO: try to de-duplicate these, ensure identical behavior
523
524 def _validate_comparison_value(self, other):
525 if isinstance(other, str):
526 try:
527 # GH#18435 strings get a pass from tzawareness compat
528 other = self._scalar_from_string(other)
529 except (ValueError, IncompatibleFrequency):
530 # failed to parse as Timestamp/Timedelta/Period
531 raise InvalidComparison(other)
532
533 if isinstance(other, self._recognized_scalars) or other is NaT:
534 other = self._scalar_type(other)
535 try:
536 self._check_compatible_with(other)
537 except (TypeError, IncompatibleFrequency) as err:
538 # e.g. tzawareness mismatch
539 raise InvalidComparison(other) from err
540
541 elif not is_list_like(other):
542 raise InvalidComparison(other)
543
544 elif len(other) != len(self):
545 raise ValueError("Lengths must match")
546
547 else:
548 try:
549 other = self._validate_listlike(other, allow_object=True)
550 self._check_compatible_with(other)
551 except (TypeError, IncompatibleFrequency) as err:
552 if is_object_dtype(getattr(other, "dtype", None)):
553 # We will have to operate element-wise
554 pass
555 else:
556 raise InvalidComparison(other) from err
557
558 return other
559
560 def _validate_scalar(
561 self,
562 value,
563 *,
564 allow_listlike: bool = False,
565 unbox: bool = True,
566 ):
567 """
568 Validate that the input value can be cast to our scalar_type.
569
570 Parameters
571 ----------
572 value : object
573 allow_listlike: bool, default False
574 When raising an exception, whether the message should say
575 listlike inputs are allowed.
576 unbox : bool, default True
577 Whether to unbox the result before returning. Note: unbox=False
578 skips the setitem compatibility check.
579
580 Returns
581 -------
582 self._scalar_type or NaT
583 """
584 if isinstance(value, self._scalar_type):
585 pass
586
587 elif isinstance(value, str):
588 # NB: Careful about tzawareness
589 try:
590 value = self._scalar_from_string(value)
591 except ValueError as err:
592 msg = self._validation_error_message(value, allow_listlike)
593 raise TypeError(msg) from err
594
595 elif is_valid_na_for_dtype(value, self.dtype):
596 # GH#18295
597 value = NaT
598
599 elif isna(value):
600 # if we are dt64tz and value is dt64("NaT"), dont cast to NaT,
601 # or else we'll fail to raise in _unbox_scalar
602 msg = self._validation_error_message(value, allow_listlike)
603 raise TypeError(msg)
604
605 elif isinstance(value, self._recognized_scalars):
606 # error: Argument 1 to "Timestamp" has incompatible type "object"; expected
607 # "integer[Any] | float | str | date | datetime | datetime64"
608 value = self._scalar_type(value) # type: ignore[arg-type]
609
610 else:
611 msg = self._validation_error_message(value, allow_listlike)
612 raise TypeError(msg)
613
614 if not unbox:
615 # NB: In general NDArrayBackedExtensionArray will unbox here;
616 # this option exists to prevent a performance hit in
617 # TimedeltaIndex.get_loc
618 return value
619 return self._unbox_scalar(value)
620
621 def _validation_error_message(self, value, allow_listlike: bool = False) -> str:
622 """
623 Construct an exception message on validation error.
624
625 Some methods allow only scalar inputs, while others allow either scalar
626 or listlike.
627
628 Parameters
629 ----------
630 allow_listlike: bool, default False
631
632 Returns
633 -------
634 str
635 """
636 if hasattr(value, "dtype") and getattr(value, "ndim", 0) > 0:
637 msg_got = f"{value.dtype} array"
638 else:
639 msg_got = f"'{type(value).__name__}'"
640 if allow_listlike:
641 msg = (
642 f"value should be a '{self._scalar_type.__name__}', 'NaT', "
643 f"or array of those. Got {msg_got} instead."
644 )
645 else:
646 msg = (
647 f"value should be a '{self._scalar_type.__name__}' or 'NaT'. "
648 f"Got {msg_got} instead."
649 )
650 return msg
651
652 def _validate_listlike(self, value, allow_object: bool = False):
653 if isinstance(value, type(self)):
654 if self.dtype.kind in "mM" and not allow_object:
655 # error: "DatetimeLikeArrayMixin" has no attribute "as_unit"
656 value = value.as_unit(self.unit, round_ok=False) # type: ignore[attr-defined]
657 return value
658
659 if isinstance(value, list) and len(value) == 0:
660 # We treat empty list as our own dtype.
661 return type(self)._from_sequence([], dtype=self.dtype)
662
663 if hasattr(value, "dtype") and value.dtype == object:
664 # `array` below won't do inference if value is an Index or Series.
665 # so do so here. in the Index case, inferred_type may be cached.
666 if lib.infer_dtype(value) in self._infer_matches:
667 try:
668 value = type(self)._from_sequence(value)
669 except (ValueError, TypeError):
670 if allow_object:
671 return value
672 msg = self._validation_error_message(value, True)
673 raise TypeError(msg)
674
675 # Do type inference if necessary up front (after unpacking
676 # NumpyExtensionArray)
677 # e.g. we passed PeriodIndex.values and got an ndarray of Periods
678 value = extract_array(value, extract_numpy=True)
679 value = pd_array(value)
680 value = extract_array(value, extract_numpy=True)
681
682 if is_all_strings(value):
683 # We got a StringArray
684 try:
685 # TODO: Could use from_sequence_of_strings if implemented
686 # Note: passing dtype is necessary for PeriodArray tests
687 value = type(self)._from_sequence(value, dtype=self.dtype)
688 except ValueError:
689 pass
690
691 if isinstance(value.dtype, CategoricalDtype):
692 # e.g. we have a Categorical holding self.dtype
693 if value.categories.dtype == self.dtype:
694 # TODO: do we need equal dtype or just comparable?
695 value = value._internal_get_values()
696 value = extract_array(value, extract_numpy=True)
697
698 if allow_object and is_object_dtype(value.dtype):
699 pass
700
701 elif not type(self)._is_recognized_dtype(value.dtype):
702 msg = self._validation_error_message(value, True)
703 raise TypeError(msg)
704
705 if self.dtype.kind in "mM" and not allow_object:
706 # error: "DatetimeLikeArrayMixin" has no attribute "as_unit"
707 value = value.as_unit(self.unit, round_ok=False) # type: ignore[attr-defined]
708 return value
709
710 def _validate_setitem_value(self, value):
711 if is_list_like(value):
712 value = self._validate_listlike(value)
713 else:
714 return self._validate_scalar(value, allow_listlike=True)
715
716 return self._unbox(value)
717
718 @final
719 def _unbox(self, other) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray:
720 """
721 Unbox either a scalar with _unbox_scalar or an instance of our own type.
722 """
723 if lib.is_scalar(other):
724 other = self._unbox_scalar(other)
725 else:
726 # same type as self
727 self._check_compatible_with(other)
728 other = other._ndarray
729 return other
730
731 # ------------------------------------------------------------------
732 # Additional array methods
733 # These are not part of the EA API, but we implement them because
734 # pandas assumes they're there.
735
736 @ravel_compat
737 def map(self, mapper, na_action=None):
738 from pandas import Index
739
740 result = map_array(self, mapper, na_action=na_action)
741 result = Index(result)
742
743 if isinstance(result, ABCMultiIndex):
744 return result.to_numpy()
745 else:
746 return result.array
747
748 def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
749 """
750 Compute boolean array of whether each value is found in the
751 passed set of values.
752
753 Parameters
754 ----------
755 values : np.ndarray or ExtensionArray
756
757 Returns
758 -------
759 ndarray[bool]
760 """
761 if values.dtype.kind in "fiuc":
762 # TODO: de-duplicate with equals, validate_comparison_value
763 return np.zeros(self.shape, dtype=bool)
764
765 values = ensure_wrapped_if_datetimelike(values)
766
767 if not isinstance(values, type(self)):
768 inferable = [
769 "timedelta",
770 "timedelta64",
771 "datetime",
772 "datetime64",
773 "date",
774 "period",
775 ]
776 if values.dtype == object:
777 values = lib.maybe_convert_objects(
778 values, # type: ignore[arg-type]
779 convert_non_numeric=True,
780 dtype_if_all_nat=self.dtype,
781 )
782 if values.dtype != object:
783 return self.isin(values)
784
785 inferred = lib.infer_dtype(values, skipna=False)
786 if inferred not in inferable:
787 if inferred == "string":
788 pass
789
790 elif "mixed" in inferred:
791 return isin(self.astype(object), values)
792 else:
793 return np.zeros(self.shape, dtype=bool)
794
795 try:
796 values = type(self)._from_sequence(values)
797 except ValueError:
798 return isin(self.astype(object), values)
799 else:
800 warnings.warn(
801 # GH#53111
802 f"The behavior of 'isin' with dtype={self.dtype} and "
803 "castable values (e.g. strings) is deprecated. In a "
804 "future version, these will not be considered matching "
805 "by isin. Explicitly cast to the appropriate dtype before "
806 "calling isin instead.",
807 FutureWarning,
808 stacklevel=find_stack_level(),
809 )
810
811 if self.dtype.kind in "mM":
812 self = cast("DatetimeArray | TimedeltaArray", self)
813 # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
814 # has no attribute "as_unit"
815 values = values.as_unit(self.unit) # type: ignore[union-attr]
816
817 try:
818 # error: Argument 1 to "_check_compatible_with" of "DatetimeLikeArrayMixin"
819 # has incompatible type "ExtensionArray | ndarray[Any, Any]"; expected
820 # "Period | Timestamp | Timedelta | NaTType"
821 self._check_compatible_with(values) # type: ignore[arg-type]
822 except (TypeError, ValueError):
823 # Includes tzawareness mismatch and IncompatibleFrequencyError
824 return np.zeros(self.shape, dtype=bool)
825
826 # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
827 # has no attribute "asi8"
828 return isin(self.asi8, values.asi8) # type: ignore[union-attr]
829
830 # ------------------------------------------------------------------
831 # Null Handling
832
833 def isna(self) -> npt.NDArray[np.bool_]:
834 return self._isnan
835
836 @property # NB: override with cache_readonly in immutable subclasses
837 def _isnan(self) -> npt.NDArray[np.bool_]:
838 """
839 return if each value is nan
840 """
841 return self.asi8 == iNaT
842
843 @property # NB: override with cache_readonly in immutable subclasses
844 def _hasna(self) -> bool:
845 """
846 return if I have any nans; enables various perf speedups
847 """
848 return bool(self._isnan.any())
849
850 def _maybe_mask_results(
851 self, result: np.ndarray, fill_value=iNaT, convert=None
852 ) -> np.ndarray:
853 """
854 Parameters
855 ----------
856 result : np.ndarray
857 fill_value : object, default iNaT
858 convert : str, dtype or None
859
860 Returns
861 -------
862 result : ndarray with values replace by the fill_value
863
864 mask the result if needed, convert to the provided dtype if its not
865 None
866
867 This is an internal routine.
868 """
869 if self._hasna:
870 if convert:
871 result = result.astype(convert)
872 if fill_value is None:
873 fill_value = np.nan
874 np.putmask(result, self._isnan, fill_value)
875 return result
876
877 # ------------------------------------------------------------------
878 # Frequency Properties/Methods
879
880 @property
881 def freqstr(self) -> str | None:
882 """
883 Return the frequency object as a string if it's set, otherwise None.
884
885 Examples
886 --------
887 For DatetimeIndex:
888
889 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00"], freq="D")
890 >>> idx.freqstr
891 'D'
892
893 The frequency can be inferred if there are more than 2 points:
894
895 >>> idx = pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"],
896 ... freq="infer")
897 >>> idx.freqstr
898 '2D'
899
900 For PeriodIndex:
901
902 >>> idx = pd.PeriodIndex(["2023-1", "2023-2", "2023-3"], freq="M")
903 >>> idx.freqstr
904 'M'
905 """
906 if self.freq is None:
907 return None
908 return self.freq.freqstr
909
910 @property # NB: override with cache_readonly in immutable subclasses
911 def inferred_freq(self) -> str | None:
912 """
913 Tries to return a string representing a frequency generated by infer_freq.
914
915 Returns None if it can't autodetect the frequency.
916
917 Examples
918 --------
919 For DatetimeIndex:
920
921 >>> idx = pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"])
922 >>> idx.inferred_freq
923 '2D'
924
925 For TimedeltaIndex:
926
927 >>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])
928 >>> tdelta_idx
929 TimedeltaIndex(['0 days', '10 days', '20 days'],
930 dtype='timedelta64[ns]', freq=None)
931 >>> tdelta_idx.inferred_freq
932 '10D'
933 """
934 if self.ndim != 1:
935 return None
936 try:
937 return frequencies.infer_freq(self)
938 except ValueError:
939 return None
940
941 @property # NB: override with cache_readonly in immutable subclasses
942 def _resolution_obj(self) -> Resolution | None:
943 freqstr = self.freqstr
944 if freqstr is None:
945 return None
946 try:
947 return Resolution.get_reso_from_freqstr(freqstr)
948 except KeyError:
949 return None
950
951 @property # NB: override with cache_readonly in immutable subclasses
952 def resolution(self) -> str:
953 """
954 Returns day, hour, minute, second, millisecond or microsecond
955 """
956 # error: Item "None" of "Optional[Any]" has no attribute "attrname"
957 return self._resolution_obj.attrname # type: ignore[union-attr]
958
959 # monotonicity/uniqueness properties are called via frequencies.infer_freq,
960 # see GH#23789
961
962 @property
963 def _is_monotonic_increasing(self) -> bool:
964 return algos.is_monotonic(self.asi8, timelike=True)[0]
965
966 @property
967 def _is_monotonic_decreasing(self) -> bool:
968 return algos.is_monotonic(self.asi8, timelike=True)[1]
969
970 @property
971 def _is_unique(self) -> bool:
972 return len(unique1d(self.asi8.ravel("K"))) == self.size
973
974 # ------------------------------------------------------------------
975 # Arithmetic Methods
976
977 def _cmp_method(self, other, op):
978 if self.ndim > 1 and getattr(other, "shape", None) == self.shape:
979 # TODO: handle 2D-like listlikes
980 return op(self.ravel(), other.ravel()).reshape(self.shape)
981
982 try:
983 other = self._validate_comparison_value(other)
984 except InvalidComparison:
985 return invalid_comparison(self, other, op)
986
987 dtype = getattr(other, "dtype", None)
988 if is_object_dtype(dtype):
989 # We have to use comp_method_OBJECT_ARRAY instead of numpy
990 # comparison otherwise it would raise when comparing to None
991 result = ops.comp_method_OBJECT_ARRAY(
992 op, np.asarray(self.astype(object)), other
993 )
994 return result
995 if other is NaT:
996 if op is operator.ne:
997 result = np.ones(self.shape, dtype=bool)
998 else:
999 result = np.zeros(self.shape, dtype=bool)
1000 return result
1001
1002 if not isinstance(self.dtype, PeriodDtype):
1003 self = cast(TimelikeOps, self)
1004 if self._creso != other._creso:
1005 if not isinstance(other, type(self)):
1006 # i.e. Timedelta/Timestamp, cast to ndarray and let
1007 # compare_mismatched_resolutions handle broadcasting
1008 try:
1009 # GH#52080 see if we can losslessly cast to shared unit
1010 other = other.as_unit(self.unit, round_ok=False)
1011 except ValueError:
1012 other_arr = np.array(other.asm8)
1013 return compare_mismatched_resolutions(
1014 self._ndarray, other_arr, op
1015 )
1016 else:
1017 other_arr = other._ndarray
1018 return compare_mismatched_resolutions(self._ndarray, other_arr, op)
1019
1020 other_vals = self._unbox(other)
1021 # GH#37462 comparison on i8 values is almost 2x faster than M8/m8
1022 result = op(self._ndarray.view("i8"), other_vals.view("i8"))
1023
1024 o_mask = isna(other)
1025 mask = self._isnan | o_mask
1026 if mask.any():
1027 nat_result = op is operator.ne
1028 np.putmask(result, mask, nat_result)
1029
1030 return result
1031
1032 # pow is invalid for all three subclasses; TimedeltaArray will override
1033 # the multiplication and division ops
1034 __pow__ = _make_unpacked_invalid_op("__pow__")
1035 __rpow__ = _make_unpacked_invalid_op("__rpow__")
1036 __mul__ = _make_unpacked_invalid_op("__mul__")
1037 __rmul__ = _make_unpacked_invalid_op("__rmul__")
1038 __truediv__ = _make_unpacked_invalid_op("__truediv__")
1039 __rtruediv__ = _make_unpacked_invalid_op("__rtruediv__")
1040 __floordiv__ = _make_unpacked_invalid_op("__floordiv__")
1041 __rfloordiv__ = _make_unpacked_invalid_op("__rfloordiv__")
1042 __mod__ = _make_unpacked_invalid_op("__mod__")
1043 __rmod__ = _make_unpacked_invalid_op("__rmod__")
1044 __divmod__ = _make_unpacked_invalid_op("__divmod__")
1045 __rdivmod__ = _make_unpacked_invalid_op("__rdivmod__")
1046
1047 @final
1048 def _get_i8_values_and_mask(
1049 self, other
1050 ) -> tuple[int | npt.NDArray[np.int64], None | npt.NDArray[np.bool_]]:
1051 """
1052 Get the int64 values and b_mask to pass to add_overflowsafe.
1053 """
1054 if isinstance(other, Period):
1055 i8values = other.ordinal
1056 mask = None
1057 elif isinstance(other, (Timestamp, Timedelta)):
1058 i8values = other._value
1059 mask = None
1060 else:
1061 # PeriodArray, DatetimeArray, TimedeltaArray
1062 mask = other._isnan
1063 i8values = other.asi8
1064 return i8values, mask
1065
1066 @final
1067 def _get_arithmetic_result_freq(self, other) -> BaseOffset | None:
1068 """
1069 Check if we can preserve self.freq in addition or subtraction.
1070 """
1071 # Adding or subtracting a Timedelta/Timestamp scalar is freq-preserving
1072 # whenever self.freq is a Tick
1073 if isinstance(self.dtype, PeriodDtype):
1074 return self.freq
1075 elif not lib.is_scalar(other):
1076 return None
1077 elif isinstance(self.freq, Tick):
1078 # In these cases
1079 return self.freq
1080 return None
1081
1082 @final
1083 def _add_datetimelike_scalar(self, other) -> DatetimeArray:
1084 if not lib.is_np_dtype(self.dtype, "m"):
1085 raise TypeError(
1086 f"cannot add {type(self).__name__} and {type(other).__name__}"
1087 )
1088
1089 self = cast("TimedeltaArray", self)
1090
1091 from pandas.core.arrays import DatetimeArray
1092 from pandas.core.arrays.datetimes import tz_to_dtype
1093
1094 assert other is not NaT
1095 if isna(other):
1096 # i.e. np.datetime64("NaT")
1097 # In this case we specifically interpret NaT as a datetime, not
1098 # the timedelta interpretation we would get by returning self + NaT
1099 result = self._ndarray + NaT.to_datetime64().astype(f"M8[{self.unit}]")
1100 # Preserve our resolution
1101 return DatetimeArray._simple_new(result, dtype=result.dtype)
1102
1103 other = Timestamp(other)
1104 self, other = self._ensure_matching_resos(other)
1105 self = cast("TimedeltaArray", self)
1106
1107 other_i8, o_mask = self._get_i8_values_and_mask(other)
1108 result = add_overflowsafe(self.asi8, np.asarray(other_i8, dtype="i8"))
1109 res_values = result.view(f"M8[{self.unit}]")
1110
1111 dtype = tz_to_dtype(tz=other.tz, unit=self.unit)
1112 res_values = result.view(f"M8[{self.unit}]")
1113 new_freq = self._get_arithmetic_result_freq(other)
1114 return DatetimeArray._simple_new(res_values, dtype=dtype, freq=new_freq)
1115
1116 @final
1117 def _add_datetime_arraylike(self, other: DatetimeArray) -> DatetimeArray:
1118 if not lib.is_np_dtype(self.dtype, "m"):
1119 raise TypeError(
1120 f"cannot add {type(self).__name__} and {type(other).__name__}"
1121 )
1122
1123 # defer to DatetimeArray.__add__
1124 return other + self
1125
1126 @final
1127 def _sub_datetimelike_scalar(
1128 self, other: datetime | np.datetime64
1129 ) -> TimedeltaArray:
1130 if self.dtype.kind != "M":
1131 raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
1132
1133 self = cast("DatetimeArray", self)
1134 # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]]
1135
1136 if isna(other):
1137 # i.e. np.datetime64("NaT")
1138 return self - NaT
1139
1140 ts = Timestamp(other)
1141
1142 self, ts = self._ensure_matching_resos(ts)
1143 return self._sub_datetimelike(ts)
1144
1145 @final
1146 def _sub_datetime_arraylike(self, other: DatetimeArray) -> TimedeltaArray:
1147 if self.dtype.kind != "M":
1148 raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
1149
1150 if len(self) != len(other):
1151 raise ValueError("cannot add indices of unequal length")
1152
1153 self = cast("DatetimeArray", self)
1154
1155 self, other = self._ensure_matching_resos(other)
1156 return self._sub_datetimelike(other)
1157
1158 @final
1159 def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray:
1160 self = cast("DatetimeArray", self)
1161
1162 from pandas.core.arrays import TimedeltaArray
1163
1164 try:
1165 self._assert_tzawareness_compat(other)
1166 except TypeError as err:
1167 new_message = str(err).replace("compare", "subtract")
1168 raise type(err)(new_message) from err
1169
1170 other_i8, o_mask = self._get_i8_values_and_mask(other)
1171 res_values = add_overflowsafe(self.asi8, np.asarray(-other_i8, dtype="i8"))
1172 res_m8 = res_values.view(f"timedelta64[{self.unit}]")
1173
1174 new_freq = self._get_arithmetic_result_freq(other)
1175 new_freq = cast("Tick | None", new_freq)
1176 return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq)
1177
1178 @final
1179 def _add_period(self, other: Period) -> PeriodArray:
1180 if not lib.is_np_dtype(self.dtype, "m"):
1181 raise TypeError(f"cannot add Period to a {type(self).__name__}")
1182
1183 # We will wrap in a PeriodArray and defer to the reversed operation
1184 from pandas.core.arrays.period import PeriodArray
1185
1186 i8vals = np.broadcast_to(other.ordinal, self.shape)
1187 dtype = PeriodDtype(other.freq)
1188 parr = PeriodArray(i8vals, dtype=dtype)
1189 return parr + self
1190
1191 def _add_offset(self, offset):
1192 raise AbstractMethodError(self)
1193
1194 def _add_timedeltalike_scalar(self, other):
1195 """
1196 Add a delta of a timedeltalike
1197
1198 Returns
1199 -------
1200 Same type as self
1201 """
1202 if isna(other):
1203 # i.e np.timedelta64("NaT")
1204 new_values = np.empty(self.shape, dtype="i8").view(self._ndarray.dtype)
1205 new_values.fill(iNaT)
1206 return type(self)._simple_new(new_values, dtype=self.dtype)
1207
1208 # PeriodArray overrides, so we only get here with DTA/TDA
1209 self = cast("DatetimeArray | TimedeltaArray", self)
1210 other = Timedelta(other)
1211 self, other = self._ensure_matching_resos(other)
1212 return self._add_timedeltalike(other)
1213
1214 def _add_timedelta_arraylike(self, other: TimedeltaArray):
1215 """
1216 Add a delta of a TimedeltaIndex
1217
1218 Returns
1219 -------
1220 Same type as self
1221 """
1222 # overridden by PeriodArray
1223
1224 if len(self) != len(other):
1225 raise ValueError("cannot add indices of unequal length")
1226
1227 self = cast("DatetimeArray | TimedeltaArray", self)
1228
1229 self, other = self._ensure_matching_resos(other)
1230 return self._add_timedeltalike(other)
1231
1232 @final
1233 def _add_timedeltalike(self, other: Timedelta | TimedeltaArray):
1234 self = cast("DatetimeArray | TimedeltaArray", self)
1235
1236 other_i8, o_mask = self._get_i8_values_and_mask(other)
1237 new_values = add_overflowsafe(self.asi8, np.asarray(other_i8, dtype="i8"))
1238 res_values = new_values.view(self._ndarray.dtype)
1239
1240 new_freq = self._get_arithmetic_result_freq(other)
1241
1242 # error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
1243 # incompatible type "Union[dtype[datetime64], DatetimeTZDtype,
1244 # dtype[timedelta64]]"; expected "Union[dtype[datetime64], DatetimeTZDtype]"
1245 return type(self)._simple_new(
1246 res_values, dtype=self.dtype, freq=new_freq # type: ignore[arg-type]
1247 )
1248
1249 @final
1250 def _add_nat(self):
1251 """
1252 Add pd.NaT to self
1253 """
1254 if isinstance(self.dtype, PeriodDtype):
1255 raise TypeError(
1256 f"Cannot add {type(self).__name__} and {type(NaT).__name__}"
1257 )
1258 self = cast("TimedeltaArray | DatetimeArray", self)
1259
1260 # GH#19124 pd.NaT is treated like a timedelta for both timedelta
1261 # and datetime dtypes
1262 result = np.empty(self.shape, dtype=np.int64)
1263 result.fill(iNaT)
1264 result = result.view(self._ndarray.dtype) # preserve reso
1265 # error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
1266 # incompatible type "Union[dtype[timedelta64], dtype[datetime64],
1267 # DatetimeTZDtype]"; expected "Union[dtype[datetime64], DatetimeTZDtype]"
1268 return type(self)._simple_new(
1269 result, dtype=self.dtype, freq=None # type: ignore[arg-type]
1270 )
1271
1272 @final
1273 def _sub_nat(self):
1274 """
1275 Subtract pd.NaT from self
1276 """
1277 # GH#19124 Timedelta - datetime is not in general well-defined.
1278 # We make an exception for pd.NaT, which in this case quacks
1279 # like a timedelta.
1280 # For datetime64 dtypes by convention we treat NaT as a datetime, so
1281 # this subtraction returns a timedelta64 dtype.
1282 # For period dtype, timedelta64 is a close-enough return dtype.
1283 result = np.empty(self.shape, dtype=np.int64)
1284 result.fill(iNaT)
1285 if self.dtype.kind in "mM":
1286 # We can retain unit in dtype
1287 self = cast("DatetimeArray| TimedeltaArray", self)
1288 return result.view(f"timedelta64[{self.unit}]")
1289 else:
1290 return result.view("timedelta64[ns]")
1291
1292 @final
1293 def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_]:
1294 # If the operation is well-defined, we return an object-dtype ndarray
1295 # of DateOffsets. Null entries are filled with pd.NaT
1296 if not isinstance(self.dtype, PeriodDtype):
1297 raise TypeError(
1298 f"cannot subtract {type(other).__name__} from {type(self).__name__}"
1299 )
1300
1301 self = cast("PeriodArray", self)
1302 self._check_compatible_with(other)
1303
1304 other_i8, o_mask = self._get_i8_values_and_mask(other)
1305 new_i8_data = add_overflowsafe(self.asi8, np.asarray(-other_i8, dtype="i8"))
1306 new_data = np.array([self.freq.base * x for x in new_i8_data])
1307
1308 if o_mask is None:
1309 # i.e. Period scalar
1310 mask = self._isnan
1311 else:
1312 # i.e. PeriodArray
1313 mask = self._isnan | o_mask
1314 new_data[mask] = NaT
1315 return new_data
1316
1317 @final
1318 def _addsub_object_array(self, other: npt.NDArray[np.object_], op):
1319 """
1320 Add or subtract array-like of DateOffset objects
1321
1322 Parameters
1323 ----------
1324 other : np.ndarray[object]
1325 op : {operator.add, operator.sub}
1326
1327 Returns
1328 -------
1329 np.ndarray[object]
1330 Except in fastpath case with length 1 where we operate on the
1331 contained scalar.
1332 """
1333 assert op in [operator.add, operator.sub]
1334 if len(other) == 1 and self.ndim == 1:
1335 # Note: without this special case, we could annotate return type
1336 # as ndarray[object]
1337 # If both 1D then broadcasting is unambiguous
1338 return op(self, other[0])
1339
1340 warnings.warn(
1341 "Adding/subtracting object-dtype array to "
1342 f"{type(self).__name__} not vectorized.",
1343 PerformanceWarning,
1344 stacklevel=find_stack_level(),
1345 )
1346
1347 # Caller is responsible for broadcasting if necessary
1348 assert self.shape == other.shape, (self.shape, other.shape)
1349
1350 res_values = op(self.astype("O"), np.asarray(other))
1351 return res_values
1352
1353 def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> Self:
1354 if name not in {"cummin", "cummax"}:
1355 raise TypeError(f"Accumulation {name} not supported for {type(self)}")
1356
1357 op = getattr(datetimelike_accumulations, name)
1358 result = op(self.copy(), skipna=skipna, **kwargs)
1359
1360 return type(self)._simple_new(result, dtype=self.dtype)
1361
1362 @unpack_zerodim_and_defer("__add__")
1363 def __add__(self, other):
1364 other_dtype = getattr(other, "dtype", None)
1365 other = ensure_wrapped_if_datetimelike(other)
1366
1367 # scalar others
1368 if other is NaT:
1369 result = self._add_nat()
1370 elif isinstance(other, (Tick, timedelta, np.timedelta64)):
1371 result = self._add_timedeltalike_scalar(other)
1372 elif isinstance(other, BaseOffset):
1373 # specifically _not_ a Tick
1374 result = self._add_offset(other)
1375 elif isinstance(other, (datetime, np.datetime64)):
1376 result = self._add_datetimelike_scalar(other)
1377 elif isinstance(other, Period) and lib.is_np_dtype(self.dtype, "m"):
1378 result = self._add_period(other)
1379 elif lib.is_integer(other):
1380 # This check must come after the check for np.timedelta64
1381 # as is_integer returns True for these
1382 if not isinstance(self.dtype, PeriodDtype):
1383 raise integer_op_not_supported(self)
1384 obj = cast("PeriodArray", self)
1385 result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.add)
1386
1387 # array-like others
1388 elif lib.is_np_dtype(other_dtype, "m"):
1389 # TimedeltaIndex, ndarray[timedelta64]
1390 result = self._add_timedelta_arraylike(other)
1391 elif is_object_dtype(other_dtype):
1392 # e.g. Array/Index of DateOffset objects
1393 result = self._addsub_object_array(other, operator.add)
1394 elif lib.is_np_dtype(other_dtype, "M") or isinstance(
1395 other_dtype, DatetimeTZDtype
1396 ):
1397 # DatetimeIndex, ndarray[datetime64]
1398 return self._add_datetime_arraylike(other)
1399 elif is_integer_dtype(other_dtype):
1400 if not isinstance(self.dtype, PeriodDtype):
1401 raise integer_op_not_supported(self)
1402 obj = cast("PeriodArray", self)
1403 result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.add)
1404 else:
1405 # Includes Categorical, other ExtensionArrays
1406 # For PeriodDtype, if self is a TimedeltaArray and other is a
1407 # PeriodArray with a timedelta-like (i.e. Tick) freq, this
1408 # operation is valid. Defer to the PeriodArray implementation.
1409 # In remaining cases, this will end up raising TypeError.
1410 return NotImplemented
1411
1412 if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
1413 from pandas.core.arrays import TimedeltaArray
1414
1415 return TimedeltaArray._from_sequence(result)
1416 return result
1417
1418 def __radd__(self, other):
1419 # alias for __add__
1420 return self.__add__(other)
1421
1422 @unpack_zerodim_and_defer("__sub__")
1423 def __sub__(self, other):
1424 other_dtype = getattr(other, "dtype", None)
1425 other = ensure_wrapped_if_datetimelike(other)
1426
1427 # scalar others
1428 if other is NaT:
1429 result = self._sub_nat()
1430 elif isinstance(other, (Tick, timedelta, np.timedelta64)):
1431 result = self._add_timedeltalike_scalar(-other)
1432 elif isinstance(other, BaseOffset):
1433 # specifically _not_ a Tick
1434 result = self._add_offset(-other)
1435 elif isinstance(other, (datetime, np.datetime64)):
1436 result = self._sub_datetimelike_scalar(other)
1437 elif lib.is_integer(other):
1438 # This check must come after the check for np.timedelta64
1439 # as is_integer returns True for these
1440 if not isinstance(self.dtype, PeriodDtype):
1441 raise integer_op_not_supported(self)
1442 obj = cast("PeriodArray", self)
1443 result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.sub)
1444
1445 elif isinstance(other, Period):
1446 result = self._sub_periodlike(other)
1447
1448 # array-like others
1449 elif lib.is_np_dtype(other_dtype, "m"):
1450 # TimedeltaIndex, ndarray[timedelta64]
1451 result = self._add_timedelta_arraylike(-other)
1452 elif is_object_dtype(other_dtype):
1453 # e.g. Array/Index of DateOffset objects
1454 result = self._addsub_object_array(other, operator.sub)
1455 elif lib.is_np_dtype(other_dtype, "M") or isinstance(
1456 other_dtype, DatetimeTZDtype
1457 ):
1458 # DatetimeIndex, ndarray[datetime64]
1459 result = self._sub_datetime_arraylike(other)
1460 elif isinstance(other_dtype, PeriodDtype):
1461 # PeriodIndex
1462 result = self._sub_periodlike(other)
1463 elif is_integer_dtype(other_dtype):
1464 if not isinstance(self.dtype, PeriodDtype):
1465 raise integer_op_not_supported(self)
1466 obj = cast("PeriodArray", self)
1467 result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.sub)
1468 else:
1469 # Includes ExtensionArrays, float_dtype
1470 return NotImplemented
1471
1472 if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
1473 from pandas.core.arrays import TimedeltaArray
1474
1475 return TimedeltaArray._from_sequence(result)
1476 return result
1477
1478 def __rsub__(self, other):
1479 other_dtype = getattr(other, "dtype", None)
1480 other_is_dt64 = lib.is_np_dtype(other_dtype, "M") or isinstance(
1481 other_dtype, DatetimeTZDtype
1482 )
1483
1484 if other_is_dt64 and lib.is_np_dtype(self.dtype, "m"):
1485 # ndarray[datetime64] cannot be subtracted from self, so
1486 # we need to wrap in DatetimeArray/Index and flip the operation
1487 if lib.is_scalar(other):
1488 # i.e. np.datetime64 object
1489 return Timestamp(other) - self
1490 if not isinstance(other, DatetimeLikeArrayMixin):
1491 # Avoid down-casting DatetimeIndex
1492 from pandas.core.arrays import DatetimeArray
1493
1494 other = DatetimeArray._from_sequence(other)
1495 return other - self
1496 elif self.dtype.kind == "M" and hasattr(other, "dtype") and not other_is_dt64:
1497 # GH#19959 datetime - datetime is well-defined as timedelta,
1498 # but any other type - datetime is not well-defined.
1499 raise TypeError(
1500 f"cannot subtract {type(self).__name__} from {type(other).__name__}"
1501 )
1502 elif isinstance(self.dtype, PeriodDtype) and lib.is_np_dtype(other_dtype, "m"):
1503 # TODO: Can we simplify/generalize these cases at all?
1504 raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}")
1505 elif lib.is_np_dtype(self.dtype, "m"):
1506 self = cast("TimedeltaArray", self)
1507 return (-self) + other
1508
1509 # We get here with e.g. datetime objects
1510 return -(self - other)
1511
1512 def __iadd__(self, other) -> Self:
1513 result = self + other
1514 self[:] = result[:]
1515
1516 if not isinstance(self.dtype, PeriodDtype):
1517 # restore freq, which is invalidated by setitem
1518 self._freq = result.freq
1519 return self
1520
1521 def __isub__(self, other) -> Self:
1522 result = self - other
1523 self[:] = result[:]
1524
1525 if not isinstance(self.dtype, PeriodDtype):
1526 # restore freq, which is invalidated by setitem
1527 self._freq = result.freq
1528 return self
1529
1530 # --------------------------------------------------------------
1531 # Reductions
1532
1533 @_period_dispatch
1534 def _quantile(
1535 self,
1536 qs: npt.NDArray[np.float64],
1537 interpolation: str,
1538 ) -> Self:
1539 return super()._quantile(qs=qs, interpolation=interpolation)
1540
1541 @_period_dispatch
1542 def min(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
1543 """
1544 Return the minimum value of the Array or minimum along
1545 an axis.
1546
1547 See Also
1548 --------
1549 numpy.ndarray.min
1550 Index.min : Return the minimum value in an Index.
1551 Series.min : Return the minimum value in a Series.
1552 """
1553 nv.validate_min((), kwargs)
1554 nv.validate_minmax_axis(axis, self.ndim)
1555
1556 result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
1557 return self._wrap_reduction_result(axis, result)
1558
1559 @_period_dispatch
1560 def max(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
1561 """
1562 Return the maximum value of the Array or maximum along
1563 an axis.
1564
1565 See Also
1566 --------
1567 numpy.ndarray.max
1568 Index.max : Return the maximum value in an Index.
1569 Series.max : Return the maximum value in a Series.
1570 """
1571 nv.validate_max((), kwargs)
1572 nv.validate_minmax_axis(axis, self.ndim)
1573
1574 result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
1575 return self._wrap_reduction_result(axis, result)
1576
1577 def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0):
1578 """
1579 Return the mean value of the Array.
1580
1581 Parameters
1582 ----------
1583 skipna : bool, default True
1584 Whether to ignore any NaT elements.
1585 axis : int, optional, default 0
1586
1587 Returns
1588 -------
1589 scalar
1590 Timestamp or Timedelta.
1591
1592 See Also
1593 --------
1594 numpy.ndarray.mean : Returns the average of array elements along a given axis.
1595 Series.mean : Return the mean value in a Series.
1596
1597 Notes
1598 -----
1599 mean is only defined for Datetime and Timedelta dtypes, not for Period.
1600
1601 Examples
1602 --------
1603 For :class:`pandas.DatetimeIndex`:
1604
1605 >>> idx = pd.date_range('2001-01-01 00:00', periods=3)
1606 >>> idx
1607 DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'],
1608 dtype='datetime64[ns]', freq='D')
1609 >>> idx.mean()
1610 Timestamp('2001-01-02 00:00:00')
1611
1612 For :class:`pandas.TimedeltaIndex`:
1613
1614 >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='D')
1615 >>> tdelta_idx
1616 TimedeltaIndex(['1 days', '2 days', '3 days'],
1617 dtype='timedelta64[ns]', freq=None)
1618 >>> tdelta_idx.mean()
1619 Timedelta('2 days 00:00:00')
1620 """
1621 if isinstance(self.dtype, PeriodDtype):
1622 # See discussion in GH#24757
1623 raise TypeError(
1624 f"mean is not implemented for {type(self).__name__} since the "
1625 "meaning is ambiguous. An alternative is "
1626 "obj.to_timestamp(how='start').mean()"
1627 )
1628
1629 result = nanops.nanmean(
1630 self._ndarray, axis=axis, skipna=skipna, mask=self.isna()
1631 )
1632 return self._wrap_reduction_result(axis, result)
1633
1634 @_period_dispatch
1635 def median(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
1636 nv.validate_median((), kwargs)
1637
1638 if axis is not None and abs(axis) >= self.ndim:
1639 raise ValueError("abs(axis) must be less than ndim")
1640
1641 result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
1642 return self._wrap_reduction_result(axis, result)
1643
1644 def _mode(self, dropna: bool = True):
1645 mask = None
1646 if dropna:
1647 mask = self.isna()
1648
1649 i8modes = algorithms.mode(self.view("i8"), mask=mask)
1650 npmodes = i8modes.view(self._ndarray.dtype)
1651 npmodes = cast(np.ndarray, npmodes)
1652 return self._from_backing_data(npmodes)
1653
1654 # ------------------------------------------------------------------
1655 # GroupBy Methods
1656
1657 def _groupby_op(
1658 self,
1659 *,
1660 how: str,
1661 has_dropped_na: bool,
1662 min_count: int,
1663 ngroups: int,
1664 ids: npt.NDArray[np.intp],
1665 **kwargs,
1666 ):
1667 dtype = self.dtype
1668 if dtype.kind == "M":
1669 # Adding/multiplying datetimes is not valid
1670 if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew"]:
1671 raise TypeError(f"datetime64 type does not support {how} operations")
1672 if how in ["any", "all"]:
1673 # GH#34479
1674 warnings.warn(
1675 f"'{how}' with datetime64 dtypes is deprecated and will raise in a "
1676 f"future version. Use (obj != pd.Timestamp(0)).{how}() instead.",
1677 FutureWarning,
1678 stacklevel=find_stack_level(),
1679 )
1680
1681 elif isinstance(dtype, PeriodDtype):
1682 # Adding/multiplying Periods is not valid
1683 if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew"]:
1684 raise TypeError(f"Period type does not support {how} operations")
1685 if how in ["any", "all"]:
1686 # GH#34479
1687 warnings.warn(
1688 f"'{how}' with PeriodDtype is deprecated and will raise in a "
1689 f"future version. Use (obj != pd.Period(0, freq)).{how}() instead.",
1690 FutureWarning,
1691 stacklevel=find_stack_level(),
1692 )
1693 else:
1694 # timedeltas we can add but not multiply
1695 if how in ["prod", "cumprod", "skew", "var"]:
1696 raise TypeError(f"timedelta64 type does not support {how} operations")
1697
1698 # All of the functions implemented here are ordinal, so we can
1699 # operate on the tz-naive equivalents
1700 npvalues = self._ndarray.view("M8[ns]")
1701
1702 from pandas.core.groupby.ops import WrappedCythonOp
1703
1704 kind = WrappedCythonOp.get_kind_from_how(how)
1705 op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
1706
1707 res_values = op._cython_op_ndim_compat(
1708 npvalues,
1709 min_count=min_count,
1710 ngroups=ngroups,
1711 comp_ids=ids,
1712 mask=None,
1713 **kwargs,
1714 )
1715
1716 if op.how in op.cast_blocklist:
1717 # i.e. how in ["rank"], since other cast_blocklist methods don't go
1718 # through cython_operation
1719 return res_values
1720
1721 # We did a view to M8[ns] above, now we go the other direction
1722 assert res_values.dtype == "M8[ns]"
1723 if how in ["std", "sem"]:
1724 from pandas.core.arrays import TimedeltaArray
1725
1726 if isinstance(self.dtype, PeriodDtype):
1727 raise TypeError("'std' and 'sem' are not valid for PeriodDtype")
1728 self = cast("DatetimeArray | TimedeltaArray", self)
1729 new_dtype = f"m8[{self.unit}]"
1730 res_values = res_values.view(new_dtype)
1731 return TimedeltaArray._simple_new(res_values, dtype=res_values.dtype)
1732
1733 res_values = res_values.view(self._ndarray.dtype)
1734 return self._from_backing_data(res_values)
1735
1736
1737class DatelikeOps(DatetimeLikeArrayMixin):
1738 """
1739 Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.
1740 """
1741
1742 @Substitution(
1743 URL="https://docs.python.org/3/library/datetime.html"
1744 "#strftime-and-strptime-behavior"
1745 )
1746 def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
1747 """
1748 Convert to Index using specified date_format.
1749
1750 Return an Index of formatted strings specified by date_format, which
1751 supports the same string format as the python standard library. Details
1752 of the string format can be found in `python string format
1753 doc <%(URL)s>`__.
1754
1755 Formats supported by the C `strftime` API but not by the python string format
1756 doc (such as `"%%R"`, `"%%r"`) are not officially supported and should be
1757 preferably replaced with their supported equivalents (such as `"%%H:%%M"`,
1758 `"%%I:%%M:%%S %%p"`).
1759
1760 Note that `PeriodIndex` support additional directives, detailed in
1761 `Period.strftime`.
1762
1763 Parameters
1764 ----------
1765 date_format : str
1766 Date format string (e.g. "%%Y-%%m-%%d").
1767
1768 Returns
1769 -------
1770 ndarray[object]
1771 NumPy ndarray of formatted strings.
1772
1773 See Also
1774 --------
1775 to_datetime : Convert the given argument to datetime.
1776 DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.
1777 DatetimeIndex.round : Round the DatetimeIndex to the specified freq.
1778 DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.
1779 Timestamp.strftime : Format a single Timestamp.
1780 Period.strftime : Format a single Period.
1781
1782 Examples
1783 --------
1784 >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),
1785 ... periods=3, freq='s')
1786 >>> rng.strftime('%%B %%d, %%Y, %%r')
1787 Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
1788 'March 10, 2018, 09:00:02 AM'],
1789 dtype='object')
1790 """
1791 result = self._format_native_types(date_format=date_format, na_rep=np.nan)
1792 return result.astype(object, copy=False)
1793
1794
1795_round_doc = """
1796 Perform {op} operation on the data to the specified `freq`.
1797
1798 Parameters
1799 ----------
1800 freq : str or Offset
1801 The frequency level to {op} the index to. Must be a fixed
1802 frequency like 'S' (second) not 'ME' (month end). See
1803 :ref:`frequency aliases <timeseries.offset_aliases>` for
1804 a list of possible `freq` values.
1805 ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
1806 Only relevant for DatetimeIndex:
1807
1808 - 'infer' will attempt to infer fall dst-transition hours based on
1809 order
1810 - bool-ndarray where True signifies a DST time, False designates
1811 a non-DST time (note that this flag is only applicable for
1812 ambiguous times)
1813 - 'NaT' will return NaT where there are ambiguous times
1814 - 'raise' will raise an AmbiguousTimeError if there are ambiguous
1815 times.
1816
1817 nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, default 'raise'
1818 A nonexistent time does not exist in a particular timezone
1819 where clocks moved forward due to DST.
1820
1821 - 'shift_forward' will shift the nonexistent time forward to the
1822 closest existing time
1823 - 'shift_backward' will shift the nonexistent time backward to the
1824 closest existing time
1825 - 'NaT' will return NaT where there are nonexistent times
1826 - timedelta objects will shift nonexistent times by the timedelta
1827 - 'raise' will raise an NonExistentTimeError if there are
1828 nonexistent times.
1829
1830 Returns
1831 -------
1832 DatetimeIndex, TimedeltaIndex, or Series
1833 Index of the same type for a DatetimeIndex or TimedeltaIndex,
1834 or a Series with the same index for a Series.
1835
1836 Raises
1837 ------
1838 ValueError if the `freq` cannot be converted.
1839
1840 Notes
1841 -----
1842 If the timestamps have a timezone, {op}ing will take place relative to the
1843 local ("wall") time and re-localized to the same timezone. When {op}ing
1844 near daylight savings time, use ``nonexistent`` and ``ambiguous`` to
1845 control the re-localization behavior.
1846
1847 Examples
1848 --------
1849 **DatetimeIndex**
1850
1851 >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
1852 >>> rng
1853 DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00',
1854 '2018-01-01 12:01:00'],
1855 dtype='datetime64[ns]', freq='min')
1856 """
1857
1858_round_example = """>>> rng.round('h')
1859 DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
1860 '2018-01-01 12:00:00'],
1861 dtype='datetime64[ns]', freq=None)
1862
1863 **Series**
1864
1865 >>> pd.Series(rng).dt.round("h")
1866 0 2018-01-01 12:00:00
1867 1 2018-01-01 12:00:00
1868 2 2018-01-01 12:00:00
1869 dtype: datetime64[ns]
1870
1871 When rounding near a daylight savings time transition, use ``ambiguous`` or
1872 ``nonexistent`` to control how the timestamp should be re-localized.
1873
1874 >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")
1875
1876 >>> rng_tz.floor("2h", ambiguous=False)
1877 DatetimeIndex(['2021-10-31 02:00:00+01:00'],
1878 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1879
1880 >>> rng_tz.floor("2h", ambiguous=True)
1881 DatetimeIndex(['2021-10-31 02:00:00+02:00'],
1882 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1883 """
1884
1885_floor_example = """>>> rng.floor('h')
1886 DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
1887 '2018-01-01 12:00:00'],
1888 dtype='datetime64[ns]', freq=None)
1889
1890 **Series**
1891
1892 >>> pd.Series(rng).dt.floor("h")
1893 0 2018-01-01 11:00:00
1894 1 2018-01-01 12:00:00
1895 2 2018-01-01 12:00:00
1896 dtype: datetime64[ns]
1897
1898 When rounding near a daylight savings time transition, use ``ambiguous`` or
1899 ``nonexistent`` to control how the timestamp should be re-localized.
1900
1901 >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")
1902
1903 >>> rng_tz.floor("2h", ambiguous=False)
1904 DatetimeIndex(['2021-10-31 02:00:00+01:00'],
1905 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1906
1907 >>> rng_tz.floor("2h", ambiguous=True)
1908 DatetimeIndex(['2021-10-31 02:00:00+02:00'],
1909 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1910 """
1911
1912_ceil_example = """>>> rng.ceil('h')
1913 DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
1914 '2018-01-01 13:00:00'],
1915 dtype='datetime64[ns]', freq=None)
1916
1917 **Series**
1918
1919 >>> pd.Series(rng).dt.ceil("h")
1920 0 2018-01-01 12:00:00
1921 1 2018-01-01 12:00:00
1922 2 2018-01-01 13:00:00
1923 dtype: datetime64[ns]
1924
1925 When rounding near a daylight savings time transition, use ``ambiguous`` or
1926 ``nonexistent`` to control how the timestamp should be re-localized.
1927
1928 >>> rng_tz = pd.DatetimeIndex(["2021-10-31 01:30:00"], tz="Europe/Amsterdam")
1929
1930 >>> rng_tz.ceil("h", ambiguous=False)
1931 DatetimeIndex(['2021-10-31 02:00:00+01:00'],
1932 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1933
1934 >>> rng_tz.ceil("h", ambiguous=True)
1935 DatetimeIndex(['2021-10-31 02:00:00+02:00'],
1936 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1937 """
1938
1939
1940class TimelikeOps(DatetimeLikeArrayMixin):
1941 """
1942 Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
1943 """
1944
1945 _default_dtype: np.dtype
1946
1947 def __init__(
1948 self, values, dtype=None, freq=lib.no_default, copy: bool = False
1949 ) -> None:
1950 warnings.warn(
1951 # GH#55623
1952 f"{type(self).__name__}.__init__ is deprecated and will be "
1953 "removed in a future version. Use pd.array instead.",
1954 FutureWarning,
1955 stacklevel=find_stack_level(),
1956 )
1957 if dtype is not None:
1958 dtype = pandas_dtype(dtype)
1959
1960 values = extract_array(values, extract_numpy=True)
1961 if isinstance(values, IntegerArray):
1962 values = values.to_numpy("int64", na_value=iNaT)
1963
1964 inferred_freq = getattr(values, "_freq", None)
1965 explicit_none = freq is None
1966 freq = freq if freq is not lib.no_default else None
1967
1968 if isinstance(values, type(self)):
1969 if explicit_none:
1970 # don't inherit from values
1971 pass
1972 elif freq is None:
1973 freq = values.freq
1974 elif freq and values.freq:
1975 freq = to_offset(freq)
1976 freq = _validate_inferred_freq(freq, values.freq)
1977
1978 if dtype is not None and dtype != values.dtype:
1979 # TODO: we only have tests for this for DTA, not TDA (2022-07-01)
1980 raise TypeError(
1981 f"dtype={dtype} does not match data dtype {values.dtype}"
1982 )
1983
1984 dtype = values.dtype
1985 values = values._ndarray
1986
1987 elif dtype is None:
1988 if isinstance(values, np.ndarray) and values.dtype.kind in "Mm":
1989 dtype = values.dtype
1990 else:
1991 dtype = self._default_dtype
1992 if isinstance(values, np.ndarray) and values.dtype == "i8":
1993 values = values.view(dtype)
1994
1995 if not isinstance(values, np.ndarray):
1996 raise ValueError(
1997 f"Unexpected type '{type(values).__name__}'. 'values' must be a "
1998 f"{type(self).__name__}, ndarray, or Series or Index "
1999 "containing one of those."
2000 )
2001 if values.ndim not in [1, 2]:
2002 raise ValueError("Only 1-dimensional input arrays are supported.")
2003
2004 if values.dtype == "i8":
2005 # for compat with datetime/timedelta/period shared methods,
2006 # we can sometimes get here with int64 values. These represent
2007 # nanosecond UTC (or tz-naive) unix timestamps
2008 if dtype is None:
2009 dtype = self._default_dtype
2010 values = values.view(self._default_dtype)
2011 elif lib.is_np_dtype(dtype, "mM"):
2012 values = values.view(dtype)
2013 elif isinstance(dtype, DatetimeTZDtype):
2014 kind = self._default_dtype.kind
2015 new_dtype = f"{kind}8[{dtype.unit}]"
2016 values = values.view(new_dtype)
2017
2018 dtype = self._validate_dtype(values, dtype)
2019
2020 if freq == "infer":
2021 raise ValueError(
2022 f"Frequency inference not allowed in {type(self).__name__}.__init__. "
2023 "Use 'pd.array()' instead."
2024 )
2025
2026 if copy:
2027 values = values.copy()
2028 if freq:
2029 freq = to_offset(freq)
2030 if values.dtype.kind == "m" and not isinstance(freq, Tick):
2031 raise TypeError("TimedeltaArray/Index freq must be a Tick")
2032
2033 NDArrayBacked.__init__(self, values=values, dtype=dtype)
2034 self._freq = freq
2035
2036 if inferred_freq is None and freq is not None:
2037 type(self)._validate_frequency(self, freq)
2038
2039 @classmethod
2040 def _validate_dtype(cls, values, dtype):
2041 raise AbstractMethodError(cls)
2042
2043 @property
2044 def freq(self):
2045 """
2046 Return the frequency object if it is set, otherwise None.
2047 """
2048 return self._freq
2049
2050 @freq.setter
2051 def freq(self, value) -> None:
2052 if value is not None:
2053 value = to_offset(value)
2054 self._validate_frequency(self, value)
2055 if self.dtype.kind == "m" and not isinstance(value, Tick):
2056 raise TypeError("TimedeltaArray/Index freq must be a Tick")
2057
2058 if self.ndim > 1:
2059 raise ValueError("Cannot set freq with ndim > 1")
2060
2061 self._freq = value
2062
2063 @final
2064 def _maybe_pin_freq(self, freq, validate_kwds: dict):
2065 """
2066 Constructor helper to pin the appropriate `freq` attribute. Assumes
2067 that self._freq is currently set to any freq inferred in
2068 _from_sequence_not_strict.
2069 """
2070 if freq is None:
2071 # user explicitly passed None -> override any inferred_freq
2072 self._freq = None
2073 elif freq == "infer":
2074 # if self._freq is *not* None then we already inferred a freq
2075 # and there is nothing left to do
2076 if self._freq is None:
2077 # Set _freq directly to bypass duplicative _validate_frequency
2078 # check.
2079 self._freq = to_offset(self.inferred_freq)
2080 elif freq is lib.no_default:
2081 # user did not specify anything, keep inferred freq if the original
2082 # data had one, otherwise do nothing
2083 pass
2084 elif self._freq is None:
2085 # We cannot inherit a freq from the data, so we need to validate
2086 # the user-passed freq
2087 freq = to_offset(freq)
2088 type(self)._validate_frequency(self, freq, **validate_kwds)
2089 self._freq = freq
2090 else:
2091 # Otherwise we just need to check that the user-passed freq
2092 # doesn't conflict with the one we already have.
2093 freq = to_offset(freq)
2094 _validate_inferred_freq(freq, self._freq)
2095
2096 @final
2097 @classmethod
2098 def _validate_frequency(cls, index, freq: BaseOffset, **kwargs):
2099 """
2100 Validate that a frequency is compatible with the values of a given
2101 Datetime Array/Index or Timedelta Array/Index
2102
2103 Parameters
2104 ----------
2105 index : DatetimeIndex or TimedeltaIndex
2106 The index on which to determine if the given frequency is valid
2107 freq : DateOffset
2108 The frequency to validate
2109 """
2110 inferred = index.inferred_freq
2111 if index.size == 0 or inferred == freq.freqstr:
2112 return None
2113
2114 try:
2115 on_freq = cls._generate_range(
2116 start=index[0],
2117 end=None,
2118 periods=len(index),
2119 freq=freq,
2120 unit=index.unit,
2121 **kwargs,
2122 )
2123 if not np.array_equal(index.asi8, on_freq.asi8):
2124 raise ValueError
2125 except ValueError as err:
2126 if "non-fixed" in str(err):
2127 # non-fixed frequencies are not meaningful for timedelta64;
2128 # we retain that error message
2129 raise err
2130 # GH#11587 the main way this is reached is if the `np.array_equal`
2131 # check above is False. This can also be reached if index[0]
2132 # is `NaT`, in which case the call to `cls._generate_range` will
2133 # raise a ValueError, which we re-raise with a more targeted
2134 # message.
2135 raise ValueError(
2136 f"Inferred frequency {inferred} from passed values "
2137 f"does not conform to passed frequency {freq.freqstr}"
2138 ) from err
2139
2140 @classmethod
2141 def _generate_range(
2142 cls, start, end, periods: int | None, freq, *args, **kwargs
2143 ) -> Self:
2144 raise AbstractMethodError(cls)
2145
2146 # --------------------------------------------------------------
2147
2148 @cache_readonly
2149 def _creso(self) -> int:
2150 return get_unit_from_dtype(self._ndarray.dtype)
2151
2152 @cache_readonly
2153 def unit(self) -> str:
2154 # e.g. "ns", "us", "ms"
2155 # error: Argument 1 to "dtype_to_unit" has incompatible type
2156 # "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]"
2157 return dtype_to_unit(self.dtype) # type: ignore[arg-type]
2158
2159 def as_unit(self, unit: str, round_ok: bool = True) -> Self:
2160 if unit not in ["s", "ms", "us", "ns"]:
2161 raise ValueError("Supported units are 's', 'ms', 'us', 'ns'")
2162
2163 dtype = np.dtype(f"{self.dtype.kind}8[{unit}]")
2164 new_values = astype_overflowsafe(self._ndarray, dtype, round_ok=round_ok)
2165
2166 if isinstance(self.dtype, np.dtype):
2167 new_dtype = new_values.dtype
2168 else:
2169 tz = cast("DatetimeArray", self).tz
2170 new_dtype = DatetimeTZDtype(tz=tz, unit=unit)
2171
2172 # error: Unexpected keyword argument "freq" for "_simple_new" of
2173 # "NDArrayBacked" [call-arg]
2174 return type(self)._simple_new(
2175 new_values, dtype=new_dtype, freq=self.freq # type: ignore[call-arg]
2176 )
2177
2178 # TODO: annotate other as DatetimeArray | TimedeltaArray | Timestamp | Timedelta
2179 # with the return type matching input type. TypeVar?
2180 def _ensure_matching_resos(self, other):
2181 if self._creso != other._creso:
2182 # Just as with Timestamp/Timedelta, we cast to the higher resolution
2183 if self._creso < other._creso:
2184 self = self.as_unit(other.unit)
2185 else:
2186 other = other.as_unit(self.unit)
2187 return self, other
2188
2189 # --------------------------------------------------------------
2190
2191 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
2192 if (
2193 ufunc in [np.isnan, np.isinf, np.isfinite]
2194 and len(inputs) == 1
2195 and inputs[0] is self
2196 ):
2197 # numpy 1.18 changed isinf and isnan to not raise on dt64/td64
2198 return getattr(ufunc, method)(self._ndarray, **kwargs)
2199
2200 return super().__array_ufunc__(ufunc, method, *inputs, **kwargs)
2201
2202 def _round(self, freq, mode, ambiguous, nonexistent):
2203 # round the local times
2204 if isinstance(self.dtype, DatetimeTZDtype):
2205 # operate on naive timestamps, then convert back to aware
2206 self = cast("DatetimeArray", self)
2207 naive = self.tz_localize(None)
2208 result = naive._round(freq, mode, ambiguous, nonexistent)
2209 return result.tz_localize(
2210 self.tz, ambiguous=ambiguous, nonexistent=nonexistent
2211 )
2212
2213 values = self.view("i8")
2214 values = cast(np.ndarray, values)
2215 nanos = get_unit_for_round(freq, self._creso)
2216 if nanos == 0:
2217 # GH 52761
2218 return self.copy()
2219 result_i8 = round_nsint64(values, mode, nanos)
2220 result = self._maybe_mask_results(result_i8, fill_value=iNaT)
2221 result = result.view(self._ndarray.dtype)
2222 return self._simple_new(result, dtype=self.dtype)
2223
2224 @Appender((_round_doc + _round_example).format(op="round"))
2225 def round(
2226 self,
2227 freq,
2228 ambiguous: TimeAmbiguous = "raise",
2229 nonexistent: TimeNonexistent = "raise",
2230 ) -> Self:
2231 return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent)
2232
2233 @Appender((_round_doc + _floor_example).format(op="floor"))
2234 def floor(
2235 self,
2236 freq,
2237 ambiguous: TimeAmbiguous = "raise",
2238 nonexistent: TimeNonexistent = "raise",
2239 ) -> Self:
2240 return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
2241
2242 @Appender((_round_doc + _ceil_example).format(op="ceil"))
2243 def ceil(
2244 self,
2245 freq,
2246 ambiguous: TimeAmbiguous = "raise",
2247 nonexistent: TimeNonexistent = "raise",
2248 ) -> Self:
2249 return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
2250
2251 # --------------------------------------------------------------
2252 # Reductions
2253
2254 def any(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
2255 # GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
2256 return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
2257
2258 def all(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
2259 # GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
2260
2261 return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
2262
2263 # --------------------------------------------------------------
2264 # Frequency Methods
2265
2266 def _maybe_clear_freq(self) -> None:
2267 self._freq = None
2268
2269 def _with_freq(self, freq) -> Self:
2270 """
2271 Helper to get a view on the same data, with a new freq.
2272
2273 Parameters
2274 ----------
2275 freq : DateOffset, None, or "infer"
2276
2277 Returns
2278 -------
2279 Same type as self
2280 """
2281 # GH#29843
2282 if freq is None:
2283 # Always valid
2284 pass
2285 elif len(self) == 0 and isinstance(freq, BaseOffset):
2286 # Always valid. In the TimedeltaArray case, we require a Tick offset
2287 if self.dtype.kind == "m" and not isinstance(freq, Tick):
2288 raise TypeError("TimedeltaArray/Index freq must be a Tick")
2289 else:
2290 # As an internal method, we can ensure this assertion always holds
2291 assert freq == "infer"
2292 freq = to_offset(self.inferred_freq)
2293
2294 arr = self.view()
2295 arr._freq = freq
2296 return arr
2297
2298 # --------------------------------------------------------------
2299 # ExtensionArray Interface
2300
2301 def _values_for_json(self) -> np.ndarray:
2302 # Small performance bump vs the base class which calls np.asarray(self)
2303 if isinstance(self.dtype, np.dtype):
2304 return self._ndarray
2305 return super()._values_for_json()
2306
2307 def factorize(
2308 self,
2309 use_na_sentinel: bool = True,
2310 sort: bool = False,
2311 ):
2312 if self.freq is not None:
2313 # We must be unique, so can short-circuit (and retain freq)
2314 codes = np.arange(len(self), dtype=np.intp)
2315 uniques = self.copy() # TODO: copy or view?
2316 if sort and self.freq.n < 0:
2317 codes = codes[::-1]
2318 uniques = uniques[::-1]
2319 return codes, uniques
2320
2321 if sort:
2322 # algorithms.factorize only passes sort=True here when freq is
2323 # not None, so this should not be reached.
2324 raise NotImplementedError(
2325 f"The 'sort' keyword in {type(self).__name__}.factorize is "
2326 "ignored unless arr.freq is not None. To factorize with sort, "
2327 "call pd.factorize(obj, sort=True) instead."
2328 )
2329 return super().factorize(use_na_sentinel=use_na_sentinel)
2330
2331 @classmethod
2332 def _concat_same_type(
2333 cls,
2334 to_concat: Sequence[Self],
2335 axis: AxisInt = 0,
2336 ) -> Self:
2337 new_obj = super()._concat_same_type(to_concat, axis)
2338
2339 obj = to_concat[0]
2340
2341 if axis == 0:
2342 # GH 3232: If the concat result is evenly spaced, we can retain the
2343 # original frequency
2344 to_concat = [x for x in to_concat if len(x)]
2345
2346 if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
2347 pairs = zip(to_concat[:-1], to_concat[1:])
2348 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
2349 new_freq = obj.freq
2350 new_obj._freq = new_freq
2351 return new_obj
2352
2353 def copy(self, order: str = "C") -> Self:
2354 new_obj = super().copy(order=order)
2355 new_obj._freq = self.freq
2356 return new_obj
2357
2358 def interpolate(
2359 self,
2360 *,
2361 method: InterpolateOptions,
2362 axis: int,
2363 index: Index,
2364 limit,
2365 limit_direction,
2366 limit_area,
2367 copy: bool,
2368 **kwargs,
2369 ) -> Self:
2370 """
2371 See NDFrame.interpolate.__doc__.
2372 """
2373 # NB: we return type(self) even if copy=False
2374 if method != "linear":
2375 raise NotImplementedError
2376
2377 if not copy:
2378 out_data = self._ndarray
2379 else:
2380 out_data = self._ndarray.copy()
2381
2382 missing.interpolate_2d_inplace(
2383 out_data,
2384 method=method,
2385 axis=axis,
2386 index=index,
2387 limit=limit,
2388 limit_direction=limit_direction,
2389 limit_area=limit_area,
2390 **kwargs,
2391 )
2392 if not copy:
2393 return self
2394 return type(self)._simple_new(out_data, dtype=self.dtype)
2395
2396 # --------------------------------------------------------------
2397 # Unsorted
2398
2399 @property
2400 def _is_dates_only(self) -> bool:
2401 """
2402 Check if we are round times at midnight (and no timezone), which will
2403 be given a more compact __repr__ than other cases. For TimedeltaArray
2404 we are checking for multiples of 24H.
2405 """
2406 if not lib.is_np_dtype(self.dtype):
2407 # i.e. we have a timezone
2408 return False
2409
2410 values_int = self.asi8
2411 consider_values = values_int != iNaT
2412 reso = get_unit_from_dtype(self.dtype)
2413 ppd = periods_per_day(reso)
2414
2415 # TODO: can we reuse is_date_array_normalized? would need a skipna kwd
2416 # (first attempt at this was less performant than this implementation)
2417 even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
2418 return even_days
2419
2420
2421# -------------------------------------------------------------------
2422# Shared Constructor Helpers
2423
2424
2425def ensure_arraylike_for_datetimelike(
2426 data, copy: bool, cls_name: str
2427) -> tuple[ArrayLike, bool]:
2428 if not hasattr(data, "dtype"):
2429 # e.g. list, tuple
2430 if not isinstance(data, (list, tuple)) and np.ndim(data) == 0:
2431 # i.e. generator
2432 data = list(data)
2433
2434 data = construct_1d_object_array_from_listlike(data)
2435 copy = False
2436 elif isinstance(data, ABCMultiIndex):
2437 raise TypeError(f"Cannot create a {cls_name} from a MultiIndex.")
2438 else:
2439 data = extract_array(data, extract_numpy=True)
2440
2441 if isinstance(data, IntegerArray) or (
2442 isinstance(data, ArrowExtensionArray) and data.dtype.kind in "iu"
2443 ):
2444 data = data.to_numpy("int64", na_value=iNaT)
2445 copy = False
2446 elif isinstance(data, ArrowExtensionArray):
2447 data = data._maybe_convert_datelike_array()
2448 data = data.to_numpy()
2449 copy = False
2450 elif not isinstance(data, (np.ndarray, ExtensionArray)):
2451 # GH#24539 e.g. xarray, dask object
2452 data = np.asarray(data)
2453
2454 elif isinstance(data, ABCCategorical):
2455 # GH#18664 preserve tz in going DTI->Categorical->DTI
2456 # TODO: cases where we need to do another pass through maybe_convert_dtype,
2457 # e.g. the categories are timedelta64s
2458 data = data.categories.take(data.codes, fill_value=NaT)._values
2459 copy = False
2460
2461 return data, copy
2462
2463
2464@overload
2465def validate_periods(periods: None) -> None:
2466 ...
2467
2468
2469@overload
2470def validate_periods(periods: int | float) -> int:
2471 ...
2472
2473
2474def validate_periods(periods: int | float | None) -> int | None:
2475 """
2476 If a `periods` argument is passed to the Datetime/Timedelta Array/Index
2477 constructor, cast it to an integer.
2478
2479 Parameters
2480 ----------
2481 periods : None, float, int
2482
2483 Returns
2484 -------
2485 periods : None or int
2486
2487 Raises
2488 ------
2489 TypeError
2490 if periods is None, float, or int
2491 """
2492 if periods is not None:
2493 if lib.is_float(periods):
2494 warnings.warn(
2495 # GH#56036
2496 "Non-integer 'periods' in pd.date_range, pd.timedelta_range, "
2497 "pd.period_range, and pd.interval_range are deprecated and "
2498 "will raise in a future version.",
2499 FutureWarning,
2500 stacklevel=find_stack_level(),
2501 )
2502 periods = int(periods)
2503 elif not lib.is_integer(periods):
2504 raise TypeError(f"periods must be a number, got {periods}")
2505 return periods
2506
2507
2508def _validate_inferred_freq(
2509 freq: BaseOffset | None, inferred_freq: BaseOffset | None
2510) -> BaseOffset | None:
2511 """
2512 If the user passes a freq and another freq is inferred from passed data,
2513 require that they match.
2514
2515 Parameters
2516 ----------
2517 freq : DateOffset or None
2518 inferred_freq : DateOffset or None
2519
2520 Returns
2521 -------
2522 freq : DateOffset or None
2523 """
2524 if inferred_freq is not None:
2525 if freq is not None and freq != inferred_freq:
2526 raise ValueError(
2527 f"Inferred frequency {inferred_freq} from passed "
2528 "values does not conform to passed frequency "
2529 f"{freq.freqstr}"
2530 )
2531 if freq is None:
2532 freq = inferred_freq
2533
2534 return freq
2535
2536
2537def dtype_to_unit(dtype: DatetimeTZDtype | np.dtype | ArrowDtype) -> str:
2538 """
2539 Return the unit str corresponding to the dtype's resolution.
2540
2541 Parameters
2542 ----------
2543 dtype : DatetimeTZDtype or np.dtype
2544 If np.dtype, we assume it is a datetime64 dtype.
2545
2546 Returns
2547 -------
2548 str
2549 """
2550 if isinstance(dtype, DatetimeTZDtype):
2551 return dtype.unit
2552 elif isinstance(dtype, ArrowDtype):
2553 if dtype.kind not in "mM":
2554 raise ValueError(f"{dtype=} does not have a resolution.")
2555 return dtype.pyarrow_dtype.unit
2556 return np.datetime_data(dtype)[0]