1from __future__ import annotations
2
3from datetime import (
4 datetime,
5 timedelta,
6)
7from functools import wraps
8import operator
9from typing import (
10 TYPE_CHECKING,
11 Any,
12 Callable,
13 Iterator,
14 Literal,
15 Sequence,
16 TypeVar,
17 Union,
18 cast,
19 final,
20 overload,
21)
22import warnings
23
24import numpy as np
25
26from pandas._libs import (
27 algos,
28 lib,
29)
30from pandas._libs.arrays import NDArrayBacked
31from pandas._libs.tslibs import (
32 BaseOffset,
33 IncompatibleFrequency,
34 NaT,
35 NaTType,
36 Period,
37 Resolution,
38 Tick,
39 Timedelta,
40 Timestamp,
41 astype_overflowsafe,
42 delta_to_nanoseconds,
43 get_unit_from_dtype,
44 iNaT,
45 ints_to_pydatetime,
46 ints_to_pytimedelta,
47 to_offset,
48)
49from pandas._libs.tslibs.fields import (
50 RoundTo,
51 round_nsint64,
52)
53from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions
54from pandas._libs.tslibs.timestamps import integer_op_not_supported
55from pandas._typing import (
56 ArrayLike,
57 AxisInt,
58 DatetimeLikeScalar,
59 Dtype,
60 DtypeObj,
61 F,
62 NpDtype,
63 PositionalIndexer2D,
64 PositionalIndexerTuple,
65 ScalarIndexer,
66 SequenceIndexer,
67 TimeAmbiguous,
68 TimeNonexistent,
69 npt,
70)
71from pandas.compat.numpy import function as nv
72from pandas.errors import (
73 AbstractMethodError,
74 InvalidComparison,
75 PerformanceWarning,
76)
77from pandas.util._decorators import (
78 Appender,
79 Substitution,
80 cache_readonly,
81)
82from pandas.util._exceptions import find_stack_level
83
84from pandas.core.dtypes.common import (
85 is_all_strings,
86 is_categorical_dtype,
87 is_datetime64_any_dtype,
88 is_datetime64_dtype,
89 is_datetime64tz_dtype,
90 is_datetime_or_timedelta_dtype,
91 is_dtype_equal,
92 is_float_dtype,
93 is_integer_dtype,
94 is_list_like,
95 is_object_dtype,
96 is_period_dtype,
97 is_string_dtype,
98 is_timedelta64_dtype,
99 pandas_dtype,
100)
101from pandas.core.dtypes.dtypes import (
102 DatetimeTZDtype,
103 ExtensionDtype,
104)
105from pandas.core.dtypes.generic import (
106 ABCCategorical,
107 ABCMultiIndex,
108)
109from pandas.core.dtypes.missing import (
110 is_valid_na_for_dtype,
111 isna,
112)
113
114from pandas.core import (
115 algorithms,
116 nanops,
117 ops,
118)
119from pandas.core.algorithms import (
120 checked_add_with_arr,
121 isin,
122 unique1d,
123)
124from pandas.core.array_algos import datetimelike_accumulations
125from pandas.core.arraylike import OpsMixin
126from pandas.core.arrays._mixins import (
127 NDArrayBackedExtensionArray,
128 ravel_compat,
129)
130from pandas.core.arrays.arrow.array import ArrowExtensionArray
131from pandas.core.arrays.base import ExtensionArray
132from pandas.core.arrays.integer import IntegerArray
133import pandas.core.common as com
134from pandas.core.construction import (
135 array as pd_array,
136 ensure_wrapped_if_datetimelike,
137 extract_array,
138)
139from pandas.core.indexers import (
140 check_array_indexer,
141 check_setitem_lengths,
142)
143from pandas.core.ops.common import unpack_zerodim_and_defer
144from pandas.core.ops.invalid import (
145 invalid_comparison,
146 make_invalid_op,
147)
148
149from pandas.tseries import frequencies
150
151if TYPE_CHECKING:
152 from pandas.core.arrays import (
153 DatetimeArray,
154 PeriodArray,
155 TimedeltaArray,
156 )
157
158DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType]
159DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin")
160
161
162def _period_dispatch(meth: F) -> F:
163 """
164 For PeriodArray methods, dispatch to DatetimeArray and re-wrap the results
165 in PeriodArray. We cannot use ._ndarray directly for the affected
166 methods because the i8 data has different semantics on NaT values.
167 """
168
169 @wraps(meth)
170 def new_meth(self, *args, **kwargs):
171 if not is_period_dtype(self.dtype):
172 return meth(self, *args, **kwargs)
173
174 arr = self.view("M8[ns]")
175 result = meth(arr, *args, **kwargs)
176 if result is NaT:
177 return NaT
178 elif isinstance(result, Timestamp):
179 return self._box_func(result._value)
180
181 res_i8 = result.view("i8")
182 return self._from_backing_data(res_i8)
183
184 return cast(F, new_meth)
185
186
187class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray):
188 """
189 Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
190
191 Assumes that __new__/__init__ defines:
192 _ndarray
193
194 and that inheriting subclass implements:
195 freq
196 """
197
198 # _infer_matches -> which infer_dtype strings are close enough to our own
199 _infer_matches: tuple[str, ...]
200 _is_recognized_dtype: Callable[[DtypeObj], bool]
201 _recognized_scalars: tuple[type, ...]
202 _ndarray: np.ndarray
203 freq: BaseOffset | None
204
205 @cache_readonly
206 def _can_hold_na(self) -> bool:
207 return True
208
209 def __init__(
210 self, data, dtype: Dtype | None = None, freq=None, copy: bool = False
211 ) -> None:
212 raise AbstractMethodError(self)
213
214 @property
215 def _scalar_type(self) -> type[DatetimeLikeScalar]:
216 """
217 The scalar associated with this datelike
218
219 * PeriodArray : Period
220 * DatetimeArray : Timestamp
221 * TimedeltaArray : Timedelta
222 """
223 raise AbstractMethodError(self)
224
225 def _scalar_from_string(self, value: str) -> DTScalarOrNaT:
226 """
227 Construct a scalar type from a string.
228
229 Parameters
230 ----------
231 value : str
232
233 Returns
234 -------
235 Period, Timestamp, or Timedelta, or NaT
236 Whatever the type of ``self._scalar_type`` is.
237
238 Notes
239 -----
240 This should call ``self._check_compatible_with`` before
241 unboxing the result.
242 """
243 raise AbstractMethodError(self)
244
245 def _unbox_scalar(
246 self, value: DTScalarOrNaT
247 ) -> np.int64 | np.datetime64 | np.timedelta64:
248 """
249 Unbox the integer value of a scalar `value`.
250
251 Parameters
252 ----------
253 value : Period, Timestamp, Timedelta, or NaT
254 Depending on subclass.
255
256 Returns
257 -------
258 int
259
260 Examples
261 --------
262 >>> self._unbox_scalar(Timedelta("10s")) # doctest: +SKIP
263 10000000000
264 """
265 raise AbstractMethodError(self)
266
267 def _check_compatible_with(self, other: DTScalarOrNaT) -> None:
268 """
269 Verify that `self` and `other` are compatible.
270
271 * DatetimeArray verifies that the timezones (if any) match
272 * PeriodArray verifies that the freq matches
273 * Timedelta has no verification
274
275 In each case, NaT is considered compatible.
276
277 Parameters
278 ----------
279 other
280
281 Raises
282 ------
283 Exception
284 """
285 raise AbstractMethodError(self)
286
287 # ------------------------------------------------------------------
288
289 def _box_func(self, x):
290 """
291 box function to get object from internal representation
292 """
293 raise AbstractMethodError(self)
294
295 def _box_values(self, values) -> np.ndarray:
296 """
297 apply box func to passed values
298 """
299 return lib.map_infer(values, self._box_func, convert=False)
300
301 def __iter__(self) -> Iterator:
302 if self.ndim > 1:
303 return (self[n] for n in range(len(self)))
304 else:
305 return (self._box_func(v) for v in self.asi8)
306
307 @property
308 def asi8(self) -> npt.NDArray[np.int64]:
309 """
310 Integer representation of the values.
311
312 Returns
313 -------
314 ndarray
315 An ndarray with int64 dtype.
316 """
317 # do not cache or you'll create a memory leak
318 return self._ndarray.view("i8")
319
320 # ----------------------------------------------------------------
321 # Rendering Methods
322
323 def _format_native_types(
324 self, *, na_rep: str | float = "NaT", date_format=None
325 ) -> npt.NDArray[np.object_]:
326 """
327 Helper method for astype when converting to strings.
328
329 Returns
330 -------
331 ndarray[str]
332 """
333 raise AbstractMethodError(self)
334
335 def _formatter(self, boxed: bool = False):
336 # TODO: Remove Datetime & DatetimeTZ formatters.
337 return "'{}'".format
338
339 # ----------------------------------------------------------------
340 # Array-Like / EA-Interface Methods
341
342 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
343 # used for Timedelta/DatetimeArray, overwritten by PeriodArray
344 if is_object_dtype(dtype):
345 return np.array(list(self), dtype=object)
346 return self._ndarray
347
348 @overload
349 def __getitem__(self, item: ScalarIndexer) -> DTScalarOrNaT:
350 ...
351
352 @overload
353 def __getitem__(
354 self: DatetimeLikeArrayT,
355 item: SequenceIndexer | PositionalIndexerTuple,
356 ) -> DatetimeLikeArrayT:
357 ...
358
359 def __getitem__(
360 self: DatetimeLikeArrayT, key: PositionalIndexer2D
361 ) -> DatetimeLikeArrayT | DTScalarOrNaT:
362 """
363 This getitem defers to the underlying array, which by-definition can
364 only handle list-likes, slices, and integer scalars
365 """
366 # Use cast as we know we will get back a DatetimeLikeArray or DTScalar,
367 # but skip evaluating the Union at runtime for performance
368 # (see https://github.com/pandas-dev/pandas/pull/44624)
369 result = cast(
370 "Union[DatetimeLikeArrayT, DTScalarOrNaT]", super().__getitem__(key)
371 )
372 if lib.is_scalar(result):
373 return result
374 else:
375 # At this point we know the result is an array.
376 result = cast(DatetimeLikeArrayT, result)
377 result._freq = self._get_getitem_freq(key)
378 return result
379
380 def _get_getitem_freq(self, key) -> BaseOffset | None:
381 """
382 Find the `freq` attribute to assign to the result of a __getitem__ lookup.
383 """
384 is_period = is_period_dtype(self.dtype)
385 if is_period:
386 freq = self.freq
387 elif self.ndim != 1:
388 freq = None
389 else:
390 key = check_array_indexer(self, key) # maybe ndarray[bool] -> slice
391 freq = None
392 if isinstance(key, slice):
393 if self.freq is not None and key.step is not None:
394 freq = key.step * self.freq
395 else:
396 freq = self.freq
397 elif key is Ellipsis:
398 # GH#21282 indexing with Ellipsis is similar to a full slice,
399 # should preserve `freq` attribute
400 freq = self.freq
401 elif com.is_bool_indexer(key):
402 new_key = lib.maybe_booleans_to_slice(key.view(np.uint8))
403 if isinstance(new_key, slice):
404 return self._get_getitem_freq(new_key)
405 return freq
406
407 # error: Argument 1 of "__setitem__" is incompatible with supertype
408 # "ExtensionArray"; supertype defines the argument type as "Union[int,
409 # ndarray]"
410 def __setitem__(
411 self,
412 key: int | Sequence[int] | Sequence[bool] | slice,
413 value: NaTType | Any | Sequence[Any],
414 ) -> None:
415 # I'm fudging the types a bit here. "Any" above really depends
416 # on type(self). For PeriodArray, it's Period (or stuff coercible
417 # to a period in from_sequence). For DatetimeArray, it's Timestamp...
418 # I don't know if mypy can do that, possibly with Generics.
419 # https://mypy.readthedocs.io/en/latest/generics.html
420
421 no_op = check_setitem_lengths(key, value, self)
422
423 # Calling super() before the no_op short-circuit means that we raise
424 # on invalid 'value' even if this is a no-op, e.g. wrong-dtype empty array.
425 super().__setitem__(key, value)
426
427 if no_op:
428 return
429
430 self._maybe_clear_freq()
431
432 def _maybe_clear_freq(self) -> None:
433 # inplace operations like __setitem__ may invalidate the freq of
434 # DatetimeArray and TimedeltaArray
435 pass
436
437 def astype(self, dtype, copy: bool = True):
438 # Some notes on cases we don't have to handle here in the base class:
439 # 1. PeriodArray.astype handles period -> period
440 # 2. DatetimeArray.astype handles conversion between tz.
441 # 3. DatetimeArray.astype handles datetime -> period
442 dtype = pandas_dtype(dtype)
443
444 if is_object_dtype(dtype):
445 if self.dtype.kind == "M":
446 self = cast("DatetimeArray", self)
447 # *much* faster than self._box_values
448 # for e.g. test_get_loc_tuple_monotonic_above_size_cutoff
449 i8data = self.asi8
450 converted = ints_to_pydatetime(
451 i8data,
452 tz=self.tz,
453 box="timestamp",
454 reso=self._creso,
455 )
456 return converted
457
458 elif self.dtype.kind == "m":
459 return ints_to_pytimedelta(self._ndarray, box=True)
460
461 return self._box_values(self.asi8.ravel()).reshape(self.shape)
462
463 elif isinstance(dtype, ExtensionDtype):
464 return super().astype(dtype, copy=copy)
465 elif is_string_dtype(dtype):
466 return self._format_native_types()
467 elif is_integer_dtype(dtype):
468 # we deliberately ignore int32 vs. int64 here.
469 # See https://github.com/pandas-dev/pandas/issues/24381 for more.
470 values = self.asi8
471 if dtype != np.int64:
472 raise TypeError(
473 f"Converting from {self.dtype} to {dtype} is not supported. "
474 "Do obj.astype('int64').astype(dtype) instead"
475 )
476
477 if copy:
478 values = values.copy()
479 return values
480 elif (
481 is_datetime_or_timedelta_dtype(dtype)
482 and not is_dtype_equal(self.dtype, dtype)
483 ) or is_float_dtype(dtype):
484 # disallow conversion between datetime/timedelta,
485 # and conversions for any datetimelike to float
486 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
487 raise TypeError(msg)
488 else:
489 return np.asarray(self, dtype=dtype)
490
491 @overload
492 def view(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT:
493 ...
494
495 @overload
496 def view(self, dtype: Literal["M8[ns]"]) -> DatetimeArray:
497 ...
498
499 @overload
500 def view(self, dtype: Literal["m8[ns]"]) -> TimedeltaArray:
501 ...
502
503 @overload
504 def view(self, dtype: Dtype | None = ...) -> ArrayLike:
505 ...
506
507 # pylint: disable-next=useless-parent-delegation
508 def view(self, dtype: Dtype | None = None) -> ArrayLike:
509 # we need to explicitly call super() method as long as the `@overload`s
510 # are present in this file.
511 return super().view(dtype)
512
513 # ------------------------------------------------------------------
514 # ExtensionArray Interface
515
516 @classmethod
517 def _concat_same_type(
518 cls: type[DatetimeLikeArrayT],
519 to_concat: Sequence[DatetimeLikeArrayT],
520 axis: AxisInt = 0,
521 ) -> DatetimeLikeArrayT:
522 new_obj = super()._concat_same_type(to_concat, axis)
523
524 obj = to_concat[0]
525 dtype = obj.dtype
526
527 new_freq = None
528 if is_period_dtype(dtype):
529 new_freq = obj.freq
530 elif axis == 0:
531 # GH 3232: If the concat result is evenly spaced, we can retain the
532 # original frequency
533 to_concat = [x for x in to_concat if len(x)]
534
535 if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
536 pairs = zip(to_concat[:-1], to_concat[1:])
537 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
538 new_freq = obj.freq
539
540 new_obj._freq = new_freq
541 return new_obj
542
543 def copy(self: DatetimeLikeArrayT, order: str = "C") -> DatetimeLikeArrayT:
544 # error: Unexpected keyword argument "order" for "copy"
545 new_obj = super().copy(order=order) # type: ignore[call-arg]
546 new_obj._freq = self.freq
547 return new_obj
548
549 # ------------------------------------------------------------------
550 # Validation Methods
551 # TODO: try to de-duplicate these, ensure identical behavior
552
553 def _validate_comparison_value(self, other):
554 if isinstance(other, str):
555 try:
556 # GH#18435 strings get a pass from tzawareness compat
557 other = self._scalar_from_string(other)
558 except (ValueError, IncompatibleFrequency):
559 # failed to parse as Timestamp/Timedelta/Period
560 raise InvalidComparison(other)
561
562 if isinstance(other, self._recognized_scalars) or other is NaT:
563 other = self._scalar_type(other)
564 try:
565 self._check_compatible_with(other)
566 except (TypeError, IncompatibleFrequency) as err:
567 # e.g. tzawareness mismatch
568 raise InvalidComparison(other) from err
569
570 elif not is_list_like(other):
571 raise InvalidComparison(other)
572
573 elif len(other) != len(self):
574 raise ValueError("Lengths must match")
575
576 else:
577 try:
578 other = self._validate_listlike(other, allow_object=True)
579 self._check_compatible_with(other)
580 except (TypeError, IncompatibleFrequency) as err:
581 if is_object_dtype(getattr(other, "dtype", None)):
582 # We will have to operate element-wise
583 pass
584 else:
585 raise InvalidComparison(other) from err
586
587 return other
588
589 def _validate_scalar(
590 self,
591 value,
592 *,
593 allow_listlike: bool = False,
594 unbox: bool = True,
595 ):
596 """
597 Validate that the input value can be cast to our scalar_type.
598
599 Parameters
600 ----------
601 value : object
602 allow_listlike: bool, default False
603 When raising an exception, whether the message should say
604 listlike inputs are allowed.
605 unbox : bool, default True
606 Whether to unbox the result before returning. Note: unbox=False
607 skips the setitem compatibility check.
608
609 Returns
610 -------
611 self._scalar_type or NaT
612 """
613 if isinstance(value, self._scalar_type):
614 pass
615
616 elif isinstance(value, str):
617 # NB: Careful about tzawareness
618 try:
619 value = self._scalar_from_string(value)
620 except ValueError as err:
621 msg = self._validation_error_message(value, allow_listlike)
622 raise TypeError(msg) from err
623
624 elif is_valid_na_for_dtype(value, self.dtype):
625 # GH#18295
626 value = NaT
627
628 elif isna(value):
629 # if we are dt64tz and value is dt64("NaT"), dont cast to NaT,
630 # or else we'll fail to raise in _unbox_scalar
631 msg = self._validation_error_message(value, allow_listlike)
632 raise TypeError(msg)
633
634 elif isinstance(value, self._recognized_scalars):
635 value = self._scalar_type(value)
636
637 else:
638 msg = self._validation_error_message(value, allow_listlike)
639 raise TypeError(msg)
640
641 if not unbox:
642 # NB: In general NDArrayBackedExtensionArray will unbox here;
643 # this option exists to prevent a performance hit in
644 # TimedeltaIndex.get_loc
645 return value
646 return self._unbox_scalar(value)
647
648 def _validation_error_message(self, value, allow_listlike: bool = False) -> str:
649 """
650 Construct an exception message on validation error.
651
652 Some methods allow only scalar inputs, while others allow either scalar
653 or listlike.
654
655 Parameters
656 ----------
657 allow_listlike: bool, default False
658
659 Returns
660 -------
661 str
662 """
663 if allow_listlike:
664 msg = (
665 f"value should be a '{self._scalar_type.__name__}', 'NaT', "
666 f"or array of those. Got '{type(value).__name__}' instead."
667 )
668 else:
669 msg = (
670 f"value should be a '{self._scalar_type.__name__}' or 'NaT'. "
671 f"Got '{type(value).__name__}' instead."
672 )
673 return msg
674
675 def _validate_listlike(self, value, allow_object: bool = False):
676 if isinstance(value, type(self)):
677 return value
678
679 if isinstance(value, list) and len(value) == 0:
680 # We treat empty list as our own dtype.
681 return type(self)._from_sequence([], dtype=self.dtype)
682
683 if hasattr(value, "dtype") and value.dtype == object:
684 # `array` below won't do inference if value is an Index or Series.
685 # so do so here. in the Index case, inferred_type may be cached.
686 if lib.infer_dtype(value) in self._infer_matches:
687 try:
688 value = type(self)._from_sequence(value)
689 except (ValueError, TypeError):
690 if allow_object:
691 return value
692 msg = self._validation_error_message(value, True)
693 raise TypeError(msg)
694
695 # Do type inference if necessary up front (after unpacking PandasArray)
696 # e.g. we passed PeriodIndex.values and got an ndarray of Periods
697 value = extract_array(value, extract_numpy=True)
698 value = pd_array(value)
699 value = extract_array(value, extract_numpy=True)
700
701 if is_all_strings(value):
702 # We got a StringArray
703 try:
704 # TODO: Could use from_sequence_of_strings if implemented
705 # Note: passing dtype is necessary for PeriodArray tests
706 value = type(self)._from_sequence(value, dtype=self.dtype)
707 except ValueError:
708 pass
709
710 if is_categorical_dtype(value.dtype):
711 # e.g. we have a Categorical holding self.dtype
712 if is_dtype_equal(value.categories.dtype, self.dtype):
713 # TODO: do we need equal dtype or just comparable?
714 value = value._internal_get_values()
715 value = extract_array(value, extract_numpy=True)
716
717 if allow_object and is_object_dtype(value.dtype):
718 pass
719
720 elif not type(self)._is_recognized_dtype(value.dtype):
721 msg = self._validation_error_message(value, True)
722 raise TypeError(msg)
723
724 return value
725
726 def _validate_setitem_value(self, value):
727 if is_list_like(value):
728 value = self._validate_listlike(value)
729 else:
730 return self._validate_scalar(value, allow_listlike=True)
731
732 return self._unbox(value)
733
734 @final
735 def _unbox(self, other) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray:
736 """
737 Unbox either a scalar with _unbox_scalar or an instance of our own type.
738 """
739 if lib.is_scalar(other):
740 other = self._unbox_scalar(other)
741 else:
742 # same type as self
743 self._check_compatible_with(other)
744 other = other._ndarray
745 return other
746
747 # ------------------------------------------------------------------
748 # Additional array methods
749 # These are not part of the EA API, but we implement them because
750 # pandas assumes they're there.
751
752 @ravel_compat
753 def map(self, mapper):
754 # TODO(GH-23179): Add ExtensionArray.map
755 # Need to figure out if we want ExtensionArray.map first.
756 # If so, then we can refactor IndexOpsMixin._map_values to
757 # a standalone function and call from here..
758 # Else, just rewrite _map_infer_values to do the right thing.
759 from pandas import Index
760
761 return Index(self).map(mapper).array
762
763 def isin(self, values) -> npt.NDArray[np.bool_]:
764 """
765 Compute boolean array of whether each value is found in the
766 passed set of values.
767
768 Parameters
769 ----------
770 values : set or sequence of values
771
772 Returns
773 -------
774 ndarray[bool]
775 """
776 if not hasattr(values, "dtype"):
777 values = np.asarray(values)
778
779 if values.dtype.kind in ["f", "i", "u", "c"]:
780 # TODO: de-duplicate with equals, validate_comparison_value
781 return np.zeros(self.shape, dtype=bool)
782
783 if not isinstance(values, type(self)):
784 inferable = [
785 "timedelta",
786 "timedelta64",
787 "datetime",
788 "datetime64",
789 "date",
790 "period",
791 ]
792 if values.dtype == object:
793 inferred = lib.infer_dtype(values, skipna=False)
794 if inferred not in inferable:
795 if inferred == "string":
796 pass
797
798 elif "mixed" in inferred:
799 return isin(self.astype(object), values)
800 else:
801 return np.zeros(self.shape, dtype=bool)
802
803 try:
804 values = type(self)._from_sequence(values)
805 except ValueError:
806 return isin(self.astype(object), values)
807
808 if self.dtype.kind in ["m", "M"]:
809 self = cast("DatetimeArray | TimedeltaArray", self)
810 values = values.as_unit(self.unit)
811
812 try:
813 self._check_compatible_with(values)
814 except (TypeError, ValueError):
815 # Includes tzawareness mismatch and IncompatibleFrequencyError
816 return np.zeros(self.shape, dtype=bool)
817
818 return isin(self.asi8, values.asi8)
819
820 # ------------------------------------------------------------------
821 # Null Handling
822
823 def isna(self) -> npt.NDArray[np.bool_]:
824 return self._isnan
825
826 @property # NB: override with cache_readonly in immutable subclasses
827 def _isnan(self) -> npt.NDArray[np.bool_]:
828 """
829 return if each value is nan
830 """
831 return self.asi8 == iNaT
832
833 @property # NB: override with cache_readonly in immutable subclasses
834 def _hasna(self) -> bool:
835 """
836 return if I have any nans; enables various perf speedups
837 """
838 return bool(self._isnan.any())
839
840 def _maybe_mask_results(
841 self, result: np.ndarray, fill_value=iNaT, convert=None
842 ) -> np.ndarray:
843 """
844 Parameters
845 ----------
846 result : np.ndarray
847 fill_value : object, default iNaT
848 convert : str, dtype or None
849
850 Returns
851 -------
852 result : ndarray with values replace by the fill_value
853
854 mask the result if needed, convert to the provided dtype if its not
855 None
856
857 This is an internal routine.
858 """
859 if self._hasna:
860 if convert:
861 result = result.astype(convert)
862 if fill_value is None:
863 fill_value = np.nan
864 np.putmask(result, self._isnan, fill_value)
865 return result
866
867 # ------------------------------------------------------------------
868 # Frequency Properties/Methods
869
870 @property
871 def freqstr(self) -> str | None:
872 """
873 Return the frequency object as a string if its set, otherwise None.
874 """
875 if self.freq is None:
876 return None
877 return self.freq.freqstr
878
879 @property # NB: override with cache_readonly in immutable subclasses
880 def inferred_freq(self) -> str | None:
881 """
882 Tries to return a string representing a frequency generated by infer_freq.
883
884 Returns None if it can't autodetect the frequency.
885 """
886 if self.ndim != 1:
887 return None
888 try:
889 return frequencies.infer_freq(self)
890 except ValueError:
891 return None
892
893 @property # NB: override with cache_readonly in immutable subclasses
894 def _resolution_obj(self) -> Resolution | None:
895 freqstr = self.freqstr
896 if freqstr is None:
897 return None
898 try:
899 return Resolution.get_reso_from_freqstr(freqstr)
900 except KeyError:
901 return None
902
903 @property # NB: override with cache_readonly in immutable subclasses
904 def resolution(self) -> str:
905 """
906 Returns day, hour, minute, second, millisecond or microsecond
907 """
908 # error: Item "None" of "Optional[Any]" has no attribute "attrname"
909 return self._resolution_obj.attrname # type: ignore[union-attr]
910
911 # monotonicity/uniqueness properties are called via frequencies.infer_freq,
912 # see GH#23789
913
914 @property
915 def _is_monotonic_increasing(self) -> bool:
916 return algos.is_monotonic(self.asi8, timelike=True)[0]
917
918 @property
919 def _is_monotonic_decreasing(self) -> bool:
920 return algos.is_monotonic(self.asi8, timelike=True)[1]
921
922 @property
923 def _is_unique(self) -> bool:
924 return len(unique1d(self.asi8.ravel("K"))) == self.size
925
926 # ------------------------------------------------------------------
927 # Arithmetic Methods
928
929 def _cmp_method(self, other, op):
930 if self.ndim > 1 and getattr(other, "shape", None) == self.shape:
931 # TODO: handle 2D-like listlikes
932 return op(self.ravel(), other.ravel()).reshape(self.shape)
933
934 try:
935 other = self._validate_comparison_value(other)
936 except InvalidComparison:
937 return invalid_comparison(self, other, op)
938
939 dtype = getattr(other, "dtype", None)
940 if is_object_dtype(dtype):
941 # We have to use comp_method_OBJECT_ARRAY instead of numpy
942 # comparison otherwise it would fail to raise when
943 # comparing tz-aware and tz-naive
944 with np.errstate(all="ignore"):
945 result = ops.comp_method_OBJECT_ARRAY(
946 op, np.asarray(self.astype(object)), other
947 )
948 return result
949
950 if other is NaT:
951 if op is operator.ne:
952 result = np.ones(self.shape, dtype=bool)
953 else:
954 result = np.zeros(self.shape, dtype=bool)
955 return result
956
957 if not is_period_dtype(self.dtype):
958 self = cast(TimelikeOps, self)
959 if self._creso != other._creso:
960 if not isinstance(other, type(self)):
961 # i.e. Timedelta/Timestamp, cast to ndarray and let
962 # compare_mismatched_resolutions handle broadcasting
963 try:
964 # GH#52080 see if we can losslessly cast to shared unit
965 other = other.as_unit(self.unit, round_ok=False)
966 except ValueError:
967 other_arr = np.array(other.asm8)
968 return compare_mismatched_resolutions(
969 self._ndarray, other_arr, op
970 )
971 else:
972 other_arr = other._ndarray
973 return compare_mismatched_resolutions(self._ndarray, other_arr, op)
974
975 other_vals = self._unbox(other)
976 # GH#37462 comparison on i8 values is almost 2x faster than M8/m8
977 result = op(self._ndarray.view("i8"), other_vals.view("i8"))
978
979 o_mask = isna(other)
980 mask = self._isnan | o_mask
981 if mask.any():
982 nat_result = op is operator.ne
983 np.putmask(result, mask, nat_result)
984
985 return result
986
987 # pow is invalid for all three subclasses; TimedeltaArray will override
988 # the multiplication and division ops
989 __pow__ = make_invalid_op("__pow__")
990 __rpow__ = make_invalid_op("__rpow__")
991 __mul__ = make_invalid_op("__mul__")
992 __rmul__ = make_invalid_op("__rmul__")
993 __truediv__ = make_invalid_op("__truediv__")
994 __rtruediv__ = make_invalid_op("__rtruediv__")
995 __floordiv__ = make_invalid_op("__floordiv__")
996 __rfloordiv__ = make_invalid_op("__rfloordiv__")
997 __mod__ = make_invalid_op("__mod__")
998 __rmod__ = make_invalid_op("__rmod__")
999 __divmod__ = make_invalid_op("__divmod__")
1000 __rdivmod__ = make_invalid_op("__rdivmod__")
1001
1002 @final
1003 def _get_i8_values_and_mask(
1004 self, other
1005 ) -> tuple[int | npt.NDArray[np.int64], None | npt.NDArray[np.bool_]]:
1006 """
1007 Get the int64 values and b_mask to pass to checked_add_with_arr.
1008 """
1009 if isinstance(other, Period):
1010 i8values = other.ordinal
1011 mask = None
1012 elif isinstance(other, (Timestamp, Timedelta)):
1013 i8values = other._value
1014 mask = None
1015 else:
1016 # PeriodArray, DatetimeArray, TimedeltaArray
1017 mask = other._isnan
1018 i8values = other.asi8
1019 return i8values, mask
1020
1021 @final
1022 def _get_arithmetic_result_freq(self, other) -> BaseOffset | None:
1023 """
1024 Check if we can preserve self.freq in addition or subtraction.
1025 """
1026 # Adding or subtracting a Timedelta/Timestamp scalar is freq-preserving
1027 # whenever self.freq is a Tick
1028 if is_period_dtype(self.dtype):
1029 return self.freq
1030 elif not lib.is_scalar(other):
1031 return None
1032 elif isinstance(self.freq, Tick):
1033 # In these cases
1034 return self.freq
1035 return None
1036
1037 @final
1038 def _add_datetimelike_scalar(self, other) -> DatetimeArray:
1039 if not is_timedelta64_dtype(self.dtype):
1040 raise TypeError(
1041 f"cannot add {type(self).__name__} and {type(other).__name__}"
1042 )
1043
1044 self = cast("TimedeltaArray", self)
1045
1046 from pandas.core.arrays import DatetimeArray
1047 from pandas.core.arrays.datetimes import tz_to_dtype
1048
1049 assert other is not NaT
1050 if isna(other):
1051 # i.e. np.datetime64("NaT")
1052 # In this case we specifically interpret NaT as a datetime, not
1053 # the timedelta interpretation we would get by returning self + NaT
1054 result = self._ndarray + NaT.to_datetime64().astype(f"M8[{self.unit}]")
1055 # Preserve our resolution
1056 return DatetimeArray._simple_new(result, dtype=result.dtype)
1057
1058 other = Timestamp(other)
1059 self, other = self._ensure_matching_resos(other)
1060 self = cast("TimedeltaArray", self)
1061
1062 other_i8, o_mask = self._get_i8_values_and_mask(other)
1063 result = checked_add_with_arr(
1064 self.asi8, other_i8, arr_mask=self._isnan, b_mask=o_mask
1065 )
1066 res_values = result.view(f"M8[{self.unit}]")
1067
1068 dtype = tz_to_dtype(tz=other.tz, unit=self.unit)
1069 res_values = result.view(f"M8[{self.unit}]")
1070 new_freq = self._get_arithmetic_result_freq(other)
1071 return DatetimeArray._simple_new(res_values, dtype=dtype, freq=new_freq)
1072
1073 @final
1074 def _add_datetime_arraylike(self, other: DatetimeArray) -> DatetimeArray:
1075 if not is_timedelta64_dtype(self.dtype):
1076 raise TypeError(
1077 f"cannot add {type(self).__name__} and {type(other).__name__}"
1078 )
1079
1080 # defer to DatetimeArray.__add__
1081 return other + self
1082
1083 @final
1084 def _sub_datetimelike_scalar(self, other: datetime | np.datetime64):
1085 if self.dtype.kind != "M":
1086 raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
1087
1088 self = cast("DatetimeArray", self)
1089 # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]]
1090
1091 if isna(other):
1092 # i.e. np.datetime64("NaT")
1093 return self - NaT
1094
1095 ts = Timestamp(other)
1096
1097 self, ts = self._ensure_matching_resos(ts)
1098 return self._sub_datetimelike(ts)
1099
1100 @final
1101 def _sub_datetime_arraylike(self, other: DatetimeArray):
1102 if self.dtype.kind != "M":
1103 raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
1104
1105 if len(self) != len(other):
1106 raise ValueError("cannot add indices of unequal length")
1107
1108 self = cast("DatetimeArray", self)
1109
1110 self, other = self._ensure_matching_resos(other)
1111 return self._sub_datetimelike(other)
1112
1113 @final
1114 def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray:
1115 self = cast("DatetimeArray", self)
1116
1117 from pandas.core.arrays import TimedeltaArray
1118
1119 try:
1120 self._assert_tzawareness_compat(other)
1121 except TypeError as err:
1122 new_message = str(err).replace("compare", "subtract")
1123 raise type(err)(new_message) from err
1124
1125 other_i8, o_mask = self._get_i8_values_and_mask(other)
1126 res_values = checked_add_with_arr(
1127 self.asi8, -other_i8, arr_mask=self._isnan, b_mask=o_mask
1128 )
1129 res_m8 = res_values.view(f"timedelta64[{self.unit}]")
1130
1131 new_freq = self._get_arithmetic_result_freq(other)
1132 return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq)
1133
1134 @final
1135 def _add_period(self, other: Period) -> PeriodArray:
1136 if not is_timedelta64_dtype(self.dtype):
1137 raise TypeError(f"cannot add Period to a {type(self).__name__}")
1138
1139 # We will wrap in a PeriodArray and defer to the reversed operation
1140 from pandas.core.arrays.period import PeriodArray
1141
1142 i8vals = np.broadcast_to(other.ordinal, self.shape)
1143 parr = PeriodArray(i8vals, freq=other.freq)
1144 return parr + self
1145
1146 def _add_offset(self, offset):
1147 raise AbstractMethodError(self)
1148
1149 def _add_timedeltalike_scalar(self, other):
1150 """
1151 Add a delta of a timedeltalike
1152
1153 Returns
1154 -------
1155 Same type as self
1156 """
1157 if isna(other):
1158 # i.e np.timedelta64("NaT")
1159 new_values = np.empty(self.shape, dtype="i8").view(self._ndarray.dtype)
1160 new_values.fill(iNaT)
1161 return type(self)._simple_new(new_values, dtype=self.dtype)
1162
1163 # PeriodArray overrides, so we only get here with DTA/TDA
1164 self = cast("DatetimeArray | TimedeltaArray", self)
1165 other = Timedelta(other)
1166 self, other = self._ensure_matching_resos(other)
1167 return self._add_timedeltalike(other)
1168
1169 def _add_timedelta_arraylike(self, other: TimedeltaArray):
1170 """
1171 Add a delta of a TimedeltaIndex
1172
1173 Returns
1174 -------
1175 Same type as self
1176 """
1177 # overridden by PeriodArray
1178
1179 if len(self) != len(other):
1180 raise ValueError("cannot add indices of unequal length")
1181
1182 self = cast("DatetimeArray | TimedeltaArray", self)
1183
1184 self, other = self._ensure_matching_resos(other)
1185 return self._add_timedeltalike(other)
1186
1187 @final
1188 def _add_timedeltalike(self, other: Timedelta | TimedeltaArray):
1189 self = cast("DatetimeArray | TimedeltaArray", self)
1190
1191 other_i8, o_mask = self._get_i8_values_and_mask(other)
1192 new_values = checked_add_with_arr(
1193 self.asi8, other_i8, arr_mask=self._isnan, b_mask=o_mask
1194 )
1195 res_values = new_values.view(self._ndarray.dtype)
1196
1197 new_freq = self._get_arithmetic_result_freq(other)
1198
1199 return type(self)._simple_new(res_values, dtype=self.dtype, freq=new_freq)
1200
1201 @final
1202 def _add_nat(self):
1203 """
1204 Add pd.NaT to self
1205 """
1206 if is_period_dtype(self.dtype):
1207 raise TypeError(
1208 f"Cannot add {type(self).__name__} and {type(NaT).__name__}"
1209 )
1210 self = cast("TimedeltaArray | DatetimeArray", self)
1211
1212 # GH#19124 pd.NaT is treated like a timedelta for both timedelta
1213 # and datetime dtypes
1214 result = np.empty(self.shape, dtype=np.int64)
1215 result.fill(iNaT)
1216 result = result.view(self._ndarray.dtype) # preserve reso
1217 return type(self)._simple_new(result, dtype=self.dtype, freq=None)
1218
1219 @final
1220 def _sub_nat(self):
1221 """
1222 Subtract pd.NaT from self
1223 """
1224 # GH#19124 Timedelta - datetime is not in general well-defined.
1225 # We make an exception for pd.NaT, which in this case quacks
1226 # like a timedelta.
1227 # For datetime64 dtypes by convention we treat NaT as a datetime, so
1228 # this subtraction returns a timedelta64 dtype.
1229 # For period dtype, timedelta64 is a close-enough return dtype.
1230 result = np.empty(self.shape, dtype=np.int64)
1231 result.fill(iNaT)
1232 if self.dtype.kind in ["m", "M"]:
1233 # We can retain unit in dtype
1234 self = cast("DatetimeArray| TimedeltaArray", self)
1235 return result.view(f"timedelta64[{self.unit}]")
1236 else:
1237 return result.view("timedelta64[ns]")
1238
1239 @final
1240 def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_]:
1241 # If the operation is well-defined, we return an object-dtype ndarray
1242 # of DateOffsets. Null entries are filled with pd.NaT
1243 if not is_period_dtype(self.dtype):
1244 raise TypeError(
1245 f"cannot subtract {type(other).__name__} from {type(self).__name__}"
1246 )
1247
1248 self = cast("PeriodArray", self)
1249 self._check_compatible_with(other)
1250
1251 other_i8, o_mask = self._get_i8_values_and_mask(other)
1252 new_i8_data = checked_add_with_arr(
1253 self.asi8, -other_i8, arr_mask=self._isnan, b_mask=o_mask
1254 )
1255 new_data = np.array([self.freq.base * x for x in new_i8_data])
1256
1257 if o_mask is None:
1258 # i.e. Period scalar
1259 mask = self._isnan
1260 else:
1261 # i.e. PeriodArray
1262 mask = self._isnan | o_mask
1263 new_data[mask] = NaT
1264 return new_data
1265
1266 @final
1267 def _addsub_object_array(self, other: npt.NDArray[np.object_], op):
1268 """
1269 Add or subtract array-like of DateOffset objects
1270
1271 Parameters
1272 ----------
1273 other : np.ndarray[object]
1274 op : {operator.add, operator.sub}
1275
1276 Returns
1277 -------
1278 np.ndarray[object]
1279 Except in fastpath case with length 1 where we operate on the
1280 contained scalar.
1281 """
1282 assert op in [operator.add, operator.sub]
1283 if len(other) == 1 and self.ndim == 1:
1284 # Note: without this special case, we could annotate return type
1285 # as ndarray[object]
1286 # If both 1D then broadcasting is unambiguous
1287 return op(self, other[0])
1288
1289 warnings.warn(
1290 "Adding/subtracting object-dtype array to "
1291 f"{type(self).__name__} not vectorized.",
1292 PerformanceWarning,
1293 stacklevel=find_stack_level(),
1294 )
1295
1296 # Caller is responsible for broadcasting if necessary
1297 assert self.shape == other.shape, (self.shape, other.shape)
1298
1299 res_values = op(self.astype("O"), np.asarray(other))
1300 return res_values
1301
1302 def _accumulate(self, name: str, *, skipna: bool = True, **kwargs):
1303 if name not in {"cummin", "cummax"}:
1304 raise TypeError(f"Accumulation {name} not supported for {type(self)}")
1305
1306 op = getattr(datetimelike_accumulations, name)
1307 result = op(self.copy(), skipna=skipna, **kwargs)
1308
1309 return type(self)._simple_new(
1310 result, freq=None, dtype=self.dtype # type: ignore[call-arg]
1311 )
1312
1313 @unpack_zerodim_and_defer("__add__")
1314 def __add__(self, other):
1315 other_dtype = getattr(other, "dtype", None)
1316 other = ensure_wrapped_if_datetimelike(other)
1317
1318 # scalar others
1319 if other is NaT:
1320 result = self._add_nat()
1321 elif isinstance(other, (Tick, timedelta, np.timedelta64)):
1322 result = self._add_timedeltalike_scalar(other)
1323 elif isinstance(other, BaseOffset):
1324 # specifically _not_ a Tick
1325 result = self._add_offset(other)
1326 elif isinstance(other, (datetime, np.datetime64)):
1327 result = self._add_datetimelike_scalar(other)
1328 elif isinstance(other, Period) and is_timedelta64_dtype(self.dtype):
1329 result = self._add_period(other)
1330 elif lib.is_integer(other):
1331 # This check must come after the check for np.timedelta64
1332 # as is_integer returns True for these
1333 if not is_period_dtype(self.dtype):
1334 raise integer_op_not_supported(self)
1335 obj = cast("PeriodArray", self)
1336 result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.add)
1337
1338 # array-like others
1339 elif is_timedelta64_dtype(other_dtype):
1340 # TimedeltaIndex, ndarray[timedelta64]
1341 result = self._add_timedelta_arraylike(other)
1342 elif is_object_dtype(other_dtype):
1343 # e.g. Array/Index of DateOffset objects
1344 result = self._addsub_object_array(other, operator.add)
1345 elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype):
1346 # DatetimeIndex, ndarray[datetime64]
1347 return self._add_datetime_arraylike(other)
1348 elif is_integer_dtype(other_dtype):
1349 if not is_period_dtype(self.dtype):
1350 raise integer_op_not_supported(self)
1351 obj = cast("PeriodArray", self)
1352 result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.add)
1353 else:
1354 # Includes Categorical, other ExtensionArrays
1355 # For PeriodDtype, if self is a TimedeltaArray and other is a
1356 # PeriodArray with a timedelta-like (i.e. Tick) freq, this
1357 # operation is valid. Defer to the PeriodArray implementation.
1358 # In remaining cases, this will end up raising TypeError.
1359 return NotImplemented
1360
1361 if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype):
1362 from pandas.core.arrays import TimedeltaArray
1363
1364 return TimedeltaArray(result)
1365 return result
1366
1367 def __radd__(self, other):
1368 # alias for __add__
1369 return self.__add__(other)
1370
1371 @unpack_zerodim_and_defer("__sub__")
1372 def __sub__(self, other):
1373 other_dtype = getattr(other, "dtype", None)
1374 other = ensure_wrapped_if_datetimelike(other)
1375
1376 # scalar others
1377 if other is NaT:
1378 result = self._sub_nat()
1379 elif isinstance(other, (Tick, timedelta, np.timedelta64)):
1380 result = self._add_timedeltalike_scalar(-other)
1381 elif isinstance(other, BaseOffset):
1382 # specifically _not_ a Tick
1383 result = self._add_offset(-other)
1384 elif isinstance(other, (datetime, np.datetime64)):
1385 result = self._sub_datetimelike_scalar(other)
1386 elif lib.is_integer(other):
1387 # This check must come after the check for np.timedelta64
1388 # as is_integer returns True for these
1389 if not is_period_dtype(self.dtype):
1390 raise integer_op_not_supported(self)
1391 obj = cast("PeriodArray", self)
1392 result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.sub)
1393
1394 elif isinstance(other, Period):
1395 result = self._sub_periodlike(other)
1396
1397 # array-like others
1398 elif is_timedelta64_dtype(other_dtype):
1399 # TimedeltaIndex, ndarray[timedelta64]
1400 result = self._add_timedelta_arraylike(-other)
1401 elif is_object_dtype(other_dtype):
1402 # e.g. Array/Index of DateOffset objects
1403 result = self._addsub_object_array(other, operator.sub)
1404 elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype):
1405 # DatetimeIndex, ndarray[datetime64]
1406 result = self._sub_datetime_arraylike(other)
1407 elif is_period_dtype(other_dtype):
1408 # PeriodIndex
1409 result = self._sub_periodlike(other)
1410 elif is_integer_dtype(other_dtype):
1411 if not is_period_dtype(self.dtype):
1412 raise integer_op_not_supported(self)
1413 obj = cast("PeriodArray", self)
1414 result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.sub)
1415 else:
1416 # Includes ExtensionArrays, float_dtype
1417 return NotImplemented
1418
1419 if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype):
1420 from pandas.core.arrays import TimedeltaArray
1421
1422 return TimedeltaArray(result)
1423 return result
1424
1425 def __rsub__(self, other):
1426 other_dtype = getattr(other, "dtype", None)
1427
1428 if is_datetime64_any_dtype(other_dtype) and is_timedelta64_dtype(self.dtype):
1429 # ndarray[datetime64] cannot be subtracted from self, so
1430 # we need to wrap in DatetimeArray/Index and flip the operation
1431 if lib.is_scalar(other):
1432 # i.e. np.datetime64 object
1433 return Timestamp(other) - self
1434 if not isinstance(other, DatetimeLikeArrayMixin):
1435 # Avoid down-casting DatetimeIndex
1436 from pandas.core.arrays import DatetimeArray
1437
1438 other = DatetimeArray(other)
1439 return other - self
1440 elif (
1441 is_datetime64_any_dtype(self.dtype)
1442 and hasattr(other, "dtype")
1443 and not is_datetime64_any_dtype(other.dtype)
1444 ):
1445 # GH#19959 datetime - datetime is well-defined as timedelta,
1446 # but any other type - datetime is not well-defined.
1447 raise TypeError(
1448 f"cannot subtract {type(self).__name__} from {type(other).__name__}"
1449 )
1450 elif is_period_dtype(self.dtype) and is_timedelta64_dtype(other_dtype):
1451 # TODO: Can we simplify/generalize these cases at all?
1452 raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}")
1453 elif is_timedelta64_dtype(self.dtype):
1454 self = cast("TimedeltaArray", self)
1455 return (-self) + other
1456
1457 # We get here with e.g. datetime objects
1458 return -(self - other)
1459
1460 def __iadd__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT:
1461 result = self + other
1462 self[:] = result[:]
1463
1464 if not is_period_dtype(self.dtype):
1465 # restore freq, which is invalidated by setitem
1466 self._freq = result.freq
1467 return self
1468
1469 def __isub__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT:
1470 result = self - other
1471 self[:] = result[:]
1472
1473 if not is_period_dtype(self.dtype):
1474 # restore freq, which is invalidated by setitem
1475 self._freq = result.freq
1476 return self
1477
1478 # --------------------------------------------------------------
1479 # Reductions
1480
1481 @_period_dispatch
1482 def _quantile(
1483 self: DatetimeLikeArrayT,
1484 qs: npt.NDArray[np.float64],
1485 interpolation: str,
1486 ) -> DatetimeLikeArrayT:
1487 return super()._quantile(qs=qs, interpolation=interpolation)
1488
1489 @_period_dispatch
1490 def min(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
1491 """
1492 Return the minimum value of the Array or minimum along
1493 an axis.
1494
1495 See Also
1496 --------
1497 numpy.ndarray.min
1498 Index.min : Return the minimum value in an Index.
1499 Series.min : Return the minimum value in a Series.
1500 """
1501 nv.validate_min((), kwargs)
1502 nv.validate_minmax_axis(axis, self.ndim)
1503
1504 result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
1505 return self._wrap_reduction_result(axis, result)
1506
1507 @_period_dispatch
1508 def max(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
1509 """
1510 Return the maximum value of the Array or maximum along
1511 an axis.
1512
1513 See Also
1514 --------
1515 numpy.ndarray.max
1516 Index.max : Return the maximum value in an Index.
1517 Series.max : Return the maximum value in a Series.
1518 """
1519 nv.validate_max((), kwargs)
1520 nv.validate_minmax_axis(axis, self.ndim)
1521
1522 result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
1523 return self._wrap_reduction_result(axis, result)
1524
1525 def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0):
1526 """
1527 Return the mean value of the Array.
1528
1529 Parameters
1530 ----------
1531 skipna : bool, default True
1532 Whether to ignore any NaT elements.
1533 axis : int, optional, default 0
1534
1535 Returns
1536 -------
1537 scalar
1538 Timestamp or Timedelta.
1539
1540 See Also
1541 --------
1542 numpy.ndarray.mean : Returns the average of array elements along a given axis.
1543 Series.mean : Return the mean value in a Series.
1544
1545 Notes
1546 -----
1547 mean is only defined for Datetime and Timedelta dtypes, not for Period.
1548 """
1549 if is_period_dtype(self.dtype):
1550 # See discussion in GH#24757
1551 raise TypeError(
1552 f"mean is not implemented for {type(self).__name__} since the "
1553 "meaning is ambiguous. An alternative is "
1554 "obj.to_timestamp(how='start').mean()"
1555 )
1556
1557 result = nanops.nanmean(
1558 self._ndarray, axis=axis, skipna=skipna, mask=self.isna()
1559 )
1560 return self._wrap_reduction_result(axis, result)
1561
1562 @_period_dispatch
1563 def median(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
1564 nv.validate_median((), kwargs)
1565
1566 if axis is not None and abs(axis) >= self.ndim:
1567 raise ValueError("abs(axis) must be less than ndim")
1568
1569 result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
1570 return self._wrap_reduction_result(axis, result)
1571
1572 def _mode(self, dropna: bool = True):
1573 mask = None
1574 if dropna:
1575 mask = self.isna()
1576
1577 i8modes = algorithms.mode(self.view("i8"), mask=mask)
1578 npmodes = i8modes.view(self._ndarray.dtype)
1579 npmodes = cast(np.ndarray, npmodes)
1580 return self._from_backing_data(npmodes)
1581
1582
1583class DatelikeOps(DatetimeLikeArrayMixin):
1584 """
1585 Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.
1586 """
1587
1588 @Substitution(
1589 URL="https://docs.python.org/3/library/datetime.html"
1590 "#strftime-and-strptime-behavior"
1591 )
1592 def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
1593 """
1594 Convert to Index using specified date_format.
1595
1596 Return an Index of formatted strings specified by date_format, which
1597 supports the same string format as the python standard library. Details
1598 of the string format can be found in `python string format
1599 doc <%(URL)s>`__.
1600
1601 Formats supported by the C `strftime` API but not by the python string format
1602 doc (such as `"%%R"`, `"%%r"`) are not officially supported and should be
1603 preferably replaced with their supported equivalents (such as `"%%H:%%M"`,
1604 `"%%I:%%M:%%S %%p"`).
1605
1606 Note that `PeriodIndex` support additional directives, detailed in
1607 `Period.strftime`.
1608
1609 Parameters
1610 ----------
1611 date_format : str
1612 Date format string (e.g. "%%Y-%%m-%%d").
1613
1614 Returns
1615 -------
1616 ndarray[object]
1617 NumPy ndarray of formatted strings.
1618
1619 See Also
1620 --------
1621 to_datetime : Convert the given argument to datetime.
1622 DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.
1623 DatetimeIndex.round : Round the DatetimeIndex to the specified freq.
1624 DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.
1625 Timestamp.strftime : Format a single Timestamp.
1626 Period.strftime : Format a single Period.
1627
1628 Examples
1629 --------
1630 >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),
1631 ... periods=3, freq='s')
1632 >>> rng.strftime('%%B %%d, %%Y, %%r')
1633 Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
1634 'March 10, 2018, 09:00:02 AM'],
1635 dtype='object')
1636 """
1637 result = self._format_native_types(date_format=date_format, na_rep=np.nan)
1638 return result.astype(object, copy=False)
1639
1640
1641_round_doc = """
1642 Perform {op} operation on the data to the specified `freq`.
1643
1644 Parameters
1645 ----------
1646 freq : str or Offset
1647 The frequency level to {op} the index to. Must be a fixed
1648 frequency like 'S' (second) not 'ME' (month end). See
1649 :ref:`frequency aliases <timeseries.offset_aliases>` for
1650 a list of possible `freq` values.
1651 ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
1652 Only relevant for DatetimeIndex:
1653
1654 - 'infer' will attempt to infer fall dst-transition hours based on
1655 order
1656 - bool-ndarray where True signifies a DST time, False designates
1657 a non-DST time (note that this flag is only applicable for
1658 ambiguous times)
1659 - 'NaT' will return NaT where there are ambiguous times
1660 - 'raise' will raise an AmbiguousTimeError if there are ambiguous
1661 times.
1662
1663 nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, default 'raise'
1664 A nonexistent time does not exist in a particular timezone
1665 where clocks moved forward due to DST.
1666
1667 - 'shift_forward' will shift the nonexistent time forward to the
1668 closest existing time
1669 - 'shift_backward' will shift the nonexistent time backward to the
1670 closest existing time
1671 - 'NaT' will return NaT where there are nonexistent times
1672 - timedelta objects will shift nonexistent times by the timedelta
1673 - 'raise' will raise an NonExistentTimeError if there are
1674 nonexistent times.
1675
1676 Returns
1677 -------
1678 DatetimeIndex, TimedeltaIndex, or Series
1679 Index of the same type for a DatetimeIndex or TimedeltaIndex,
1680 or a Series with the same index for a Series.
1681
1682 Raises
1683 ------
1684 ValueError if the `freq` cannot be converted.
1685
1686 Notes
1687 -----
1688 If the timestamps have a timezone, {op}ing will take place relative to the
1689 local ("wall") time and re-localized to the same timezone. When {op}ing
1690 near daylight savings time, use ``nonexistent`` and ``ambiguous`` to
1691 control the re-localization behavior.
1692
1693 Examples
1694 --------
1695 **DatetimeIndex**
1696
1697 >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
1698 >>> rng
1699 DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00',
1700 '2018-01-01 12:01:00'],
1701 dtype='datetime64[ns]', freq='T')
1702 """
1703
1704_round_example = """>>> rng.round('H')
1705 DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
1706 '2018-01-01 12:00:00'],
1707 dtype='datetime64[ns]', freq=None)
1708
1709 **Series**
1710
1711 >>> pd.Series(rng).dt.round("H")
1712 0 2018-01-01 12:00:00
1713 1 2018-01-01 12:00:00
1714 2 2018-01-01 12:00:00
1715 dtype: datetime64[ns]
1716
1717 When rounding near a daylight savings time transition, use ``ambiguous`` or
1718 ``nonexistent`` to control how the timestamp should be re-localized.
1719
1720 >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")
1721
1722 >>> rng_tz.floor("2H", ambiguous=False)
1723 DatetimeIndex(['2021-10-31 02:00:00+01:00'],
1724 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1725
1726 >>> rng_tz.floor("2H", ambiguous=True)
1727 DatetimeIndex(['2021-10-31 02:00:00+02:00'],
1728 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1729 """
1730
1731_floor_example = """>>> rng.floor('H')
1732 DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
1733 '2018-01-01 12:00:00'],
1734 dtype='datetime64[ns]', freq=None)
1735
1736 **Series**
1737
1738 >>> pd.Series(rng).dt.floor("H")
1739 0 2018-01-01 11:00:00
1740 1 2018-01-01 12:00:00
1741 2 2018-01-01 12:00:00
1742 dtype: datetime64[ns]
1743
1744 When rounding near a daylight savings time transition, use ``ambiguous`` or
1745 ``nonexistent`` to control how the timestamp should be re-localized.
1746
1747 >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")
1748
1749 >>> rng_tz.floor("2H", ambiguous=False)
1750 DatetimeIndex(['2021-10-31 02:00:00+01:00'],
1751 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1752
1753 >>> rng_tz.floor("2H", ambiguous=True)
1754 DatetimeIndex(['2021-10-31 02:00:00+02:00'],
1755 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1756 """
1757
1758_ceil_example = """>>> rng.ceil('H')
1759 DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
1760 '2018-01-01 13:00:00'],
1761 dtype='datetime64[ns]', freq=None)
1762
1763 **Series**
1764
1765 >>> pd.Series(rng).dt.ceil("H")
1766 0 2018-01-01 12:00:00
1767 1 2018-01-01 12:00:00
1768 2 2018-01-01 13:00:00
1769 dtype: datetime64[ns]
1770
1771 When rounding near a daylight savings time transition, use ``ambiguous`` or
1772 ``nonexistent`` to control how the timestamp should be re-localized.
1773
1774 >>> rng_tz = pd.DatetimeIndex(["2021-10-31 01:30:00"], tz="Europe/Amsterdam")
1775
1776 >>> rng_tz.ceil("H", ambiguous=False)
1777 DatetimeIndex(['2021-10-31 02:00:00+01:00'],
1778 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1779
1780 >>> rng_tz.ceil("H", ambiguous=True)
1781 DatetimeIndex(['2021-10-31 02:00:00+02:00'],
1782 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
1783 """
1784
1785
1786TimelikeOpsT = TypeVar("TimelikeOpsT", bound="TimelikeOps")
1787
1788
1789class TimelikeOps(DatetimeLikeArrayMixin):
1790 """
1791 Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
1792 """
1793
1794 _default_dtype: np.dtype
1795
1796 def __init__(
1797 self, values, dtype=None, freq=lib.no_default, copy: bool = False
1798 ) -> None:
1799 values = extract_array(values, extract_numpy=True)
1800 if isinstance(values, IntegerArray):
1801 values = values.to_numpy("int64", na_value=iNaT)
1802
1803 inferred_freq = getattr(values, "_freq", None)
1804 explicit_none = freq is None
1805 freq = freq if freq is not lib.no_default else None
1806
1807 if isinstance(values, type(self)):
1808 if explicit_none:
1809 # don't inherit from values
1810 pass
1811 elif freq is None:
1812 freq = values.freq
1813 elif freq and values.freq:
1814 freq = to_offset(freq)
1815 freq, _ = validate_inferred_freq(freq, values.freq, False)
1816
1817 if dtype is not None:
1818 dtype = pandas_dtype(dtype)
1819 if not is_dtype_equal(dtype, values.dtype):
1820 # TODO: we only have tests for this for DTA, not TDA (2022-07-01)
1821 raise TypeError(
1822 f"dtype={dtype} does not match data dtype {values.dtype}"
1823 )
1824
1825 dtype = values.dtype
1826 values = values._ndarray
1827
1828 elif dtype is None:
1829 if isinstance(values, np.ndarray) and values.dtype.kind in "Mm":
1830 dtype = values.dtype
1831 else:
1832 dtype = self._default_dtype
1833
1834 if not isinstance(values, np.ndarray):
1835 raise ValueError(
1836 f"Unexpected type '{type(values).__name__}'. 'values' must be a "
1837 f"{type(self).__name__}, ndarray, or Series or Index "
1838 "containing one of those."
1839 )
1840 if values.ndim not in [1, 2]:
1841 raise ValueError("Only 1-dimensional input arrays are supported.")
1842
1843 if values.dtype == "i8":
1844 # for compat with datetime/timedelta/period shared methods,
1845 # we can sometimes get here with int64 values. These represent
1846 # nanosecond UTC (or tz-naive) unix timestamps
1847 values = values.view(self._default_dtype)
1848
1849 dtype = self._validate_dtype(values, dtype)
1850
1851 if freq == "infer":
1852 raise ValueError(
1853 f"Frequency inference not allowed in {type(self).__name__}.__init__. "
1854 "Use 'pd.array()' instead."
1855 )
1856
1857 if copy:
1858 values = values.copy()
1859 if freq:
1860 freq = to_offset(freq)
1861
1862 NDArrayBacked.__init__(self, values=values, dtype=dtype)
1863 self._freq = freq
1864
1865 if inferred_freq is None and freq is not None:
1866 type(self)._validate_frequency(self, freq)
1867
1868 @classmethod
1869 def _validate_dtype(cls, values, dtype):
1870 raise AbstractMethodError(cls)
1871
1872 @property
1873 def freq(self):
1874 """
1875 Return the frequency object if it is set, otherwise None.
1876 """
1877 return self._freq
1878
1879 @freq.setter
1880 def freq(self, value) -> None:
1881 if value is not None:
1882 value = to_offset(value)
1883 self._validate_frequency(self, value)
1884
1885 if self.ndim > 1:
1886 raise ValueError("Cannot set freq with ndim > 1")
1887
1888 self._freq = value
1889
1890 @classmethod
1891 def _validate_frequency(cls, index, freq, **kwargs):
1892 """
1893 Validate that a frequency is compatible with the values of a given
1894 Datetime Array/Index or Timedelta Array/Index
1895
1896 Parameters
1897 ----------
1898 index : DatetimeIndex or TimedeltaIndex
1899 The index on which to determine if the given frequency is valid
1900 freq : DateOffset
1901 The frequency to validate
1902 """
1903 inferred = index.inferred_freq
1904 if index.size == 0 or inferred == freq.freqstr:
1905 return None
1906
1907 try:
1908 on_freq = cls._generate_range(
1909 start=index[0],
1910 end=None,
1911 periods=len(index),
1912 freq=freq,
1913 unit=index.unit,
1914 **kwargs,
1915 )
1916 if not np.array_equal(index.asi8, on_freq.asi8):
1917 raise ValueError
1918 except ValueError as err:
1919 if "non-fixed" in str(err):
1920 # non-fixed frequencies are not meaningful for timedelta64;
1921 # we retain that error message
1922 raise err
1923 # GH#11587 the main way this is reached is if the `np.array_equal`
1924 # check above is False. This can also be reached if index[0]
1925 # is `NaT`, in which case the call to `cls._generate_range` will
1926 # raise a ValueError, which we re-raise with a more targeted
1927 # message.
1928 raise ValueError(
1929 f"Inferred frequency {inferred} from passed values "
1930 f"does not conform to passed frequency {freq.freqstr}"
1931 ) from err
1932
1933 @classmethod
1934 def _generate_range(
1935 cls: type[DatetimeLikeArrayT], start, end, periods, freq, *args, **kwargs
1936 ) -> DatetimeLikeArrayT:
1937 raise AbstractMethodError(cls)
1938
1939 # --------------------------------------------------------------
1940
1941 @cache_readonly
1942 def _creso(self) -> int:
1943 return get_unit_from_dtype(self._ndarray.dtype)
1944
1945 @cache_readonly
1946 def unit(self) -> str:
1947 # e.g. "ns", "us", "ms"
1948 # error: Argument 1 to "dtype_to_unit" has incompatible type
1949 # "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]"
1950 return dtype_to_unit(self.dtype) # type: ignore[arg-type]
1951
1952 def as_unit(self: TimelikeOpsT, unit: str) -> TimelikeOpsT:
1953 if unit not in ["s", "ms", "us", "ns"]:
1954 raise ValueError("Supported units are 's', 'ms', 'us', 'ns'")
1955
1956 dtype = np.dtype(f"{self.dtype.kind}8[{unit}]")
1957 new_values = astype_overflowsafe(self._ndarray, dtype, round_ok=True)
1958
1959 if isinstance(self.dtype, np.dtype):
1960 new_dtype = new_values.dtype
1961 else:
1962 tz = cast("DatetimeArray", self).tz
1963 new_dtype = DatetimeTZDtype(tz=tz, unit=unit)
1964
1965 # error: Unexpected keyword argument "freq" for "_simple_new" of
1966 # "NDArrayBacked" [call-arg]
1967 return type(self)._simple_new(
1968 new_values, dtype=new_dtype, freq=self.freq # type: ignore[call-arg]
1969 )
1970
1971 # TODO: annotate other as DatetimeArray | TimedeltaArray | Timestamp | Timedelta
1972 # with the return type matching input type. TypeVar?
1973 def _ensure_matching_resos(self, other):
1974 if self._creso != other._creso:
1975 # Just as with Timestamp/Timedelta, we cast to the higher resolution
1976 if self._creso < other._creso:
1977 self = self.as_unit(other.unit)
1978 else:
1979 other = other.as_unit(self.unit)
1980 return self, other
1981
1982 # --------------------------------------------------------------
1983
1984 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
1985 if (
1986 ufunc in [np.isnan, np.isinf, np.isfinite]
1987 and len(inputs) == 1
1988 and inputs[0] is self
1989 ):
1990 # numpy 1.18 changed isinf and isnan to not raise on dt64/td64
1991 return getattr(ufunc, method)(self._ndarray, **kwargs)
1992
1993 return super().__array_ufunc__(ufunc, method, *inputs, **kwargs)
1994
1995 def _round(self, freq, mode, ambiguous, nonexistent):
1996 # round the local times
1997 if is_datetime64tz_dtype(self.dtype):
1998 # operate on naive timestamps, then convert back to aware
1999 self = cast("DatetimeArray", self)
2000 naive = self.tz_localize(None)
2001 result = naive._round(freq, mode, ambiguous, nonexistent)
2002 return result.tz_localize(
2003 self.tz, ambiguous=ambiguous, nonexistent=nonexistent
2004 )
2005
2006 values = self.view("i8")
2007 values = cast(np.ndarray, values)
2008 offset = to_offset(freq)
2009 offset.nanos # raises on non-fixed frequencies
2010 nanos = delta_to_nanoseconds(offset, self._creso)
2011 if nanos == 0:
2012 # GH 52761
2013 return self.copy()
2014 result_i8 = round_nsint64(values, mode, nanos)
2015 result = self._maybe_mask_results(result_i8, fill_value=iNaT)
2016 result = result.view(self._ndarray.dtype)
2017 return self._simple_new(result, dtype=self.dtype)
2018
2019 @Appender((_round_doc + _round_example).format(op="round"))
2020 def round(
2021 self,
2022 freq,
2023 ambiguous: TimeAmbiguous = "raise",
2024 nonexistent: TimeNonexistent = "raise",
2025 ):
2026 return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent)
2027
2028 @Appender((_round_doc + _floor_example).format(op="floor"))
2029 def floor(
2030 self,
2031 freq,
2032 ambiguous: TimeAmbiguous = "raise",
2033 nonexistent: TimeNonexistent = "raise",
2034 ):
2035 return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
2036
2037 @Appender((_round_doc + _ceil_example).format(op="ceil"))
2038 def ceil(
2039 self,
2040 freq,
2041 ambiguous: TimeAmbiguous = "raise",
2042 nonexistent: TimeNonexistent = "raise",
2043 ):
2044 return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
2045
2046 # --------------------------------------------------------------
2047 # Reductions
2048
2049 def any(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
2050 # GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
2051 return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
2052
2053 def all(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
2054 # GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
2055
2056 return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
2057
2058 # --------------------------------------------------------------
2059 # Frequency Methods
2060
2061 def _maybe_clear_freq(self) -> None:
2062 self._freq = None
2063
2064 def _with_freq(self, freq):
2065 """
2066 Helper to get a view on the same data, with a new freq.
2067
2068 Parameters
2069 ----------
2070 freq : DateOffset, None, or "infer"
2071
2072 Returns
2073 -------
2074 Same type as self
2075 """
2076 # GH#29843
2077 if freq is None:
2078 # Always valid
2079 pass
2080 elif len(self) == 0 and isinstance(freq, BaseOffset):
2081 # Always valid. In the TimedeltaArray case, we assume this
2082 # is a Tick offset.
2083 pass
2084 else:
2085 # As an internal method, we can ensure this assertion always holds
2086 assert freq == "infer"
2087 freq = to_offset(self.inferred_freq)
2088
2089 arr = self.view()
2090 arr._freq = freq
2091 return arr
2092
2093 # --------------------------------------------------------------
2094
2095 def factorize(
2096 self,
2097 use_na_sentinel: bool = True,
2098 sort: bool = False,
2099 ):
2100 if self.freq is not None:
2101 # We must be unique, so can short-circuit (and retain freq)
2102 codes = np.arange(len(self), dtype=np.intp)
2103 uniques = self.copy() # TODO: copy or view?
2104 if sort and self.freq.n < 0:
2105 codes = codes[::-1]
2106 uniques = uniques[::-1]
2107 return codes, uniques
2108 # FIXME: shouldn't get here; we are ignoring sort
2109 return super().factorize(use_na_sentinel=use_na_sentinel)
2110
2111
2112# -------------------------------------------------------------------
2113# Shared Constructor Helpers
2114
2115
2116def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str):
2117 if not hasattr(data, "dtype"):
2118 # e.g. list, tuple
2119 if not isinstance(data, (list, tuple)) and np.ndim(data) == 0:
2120 # i.e. generator
2121 data = list(data)
2122 data = np.asarray(data)
2123 copy = False
2124 elif isinstance(data, ABCMultiIndex):
2125 raise TypeError(f"Cannot create a {cls_name} from a MultiIndex.")
2126 else:
2127 data = extract_array(data, extract_numpy=True)
2128
2129 if isinstance(data, IntegerArray) or (
2130 isinstance(data, ArrowExtensionArray) and data.dtype.kind in "iu"
2131 ):
2132 data = data.to_numpy("int64", na_value=iNaT)
2133 copy = False
2134 elif not isinstance(data, (np.ndarray, ExtensionArray)) or isinstance(
2135 data, ArrowExtensionArray
2136 ):
2137 # GH#24539 e.g. xarray, dask object
2138 data = np.asarray(data)
2139
2140 elif isinstance(data, ABCCategorical):
2141 # GH#18664 preserve tz in going DTI->Categorical->DTI
2142 # TODO: cases where we need to do another pass through maybe_convert_dtype,
2143 # e.g. the categories are timedelta64s
2144 data = data.categories.take(data.codes, fill_value=NaT)._values
2145 copy = False
2146
2147 return data, copy
2148
2149
2150@overload
2151def validate_periods(periods: None) -> None:
2152 ...
2153
2154
2155@overload
2156def validate_periods(periods: int | float) -> int:
2157 ...
2158
2159
2160def validate_periods(periods: int | float | None) -> int | None:
2161 """
2162 If a `periods` argument is passed to the Datetime/Timedelta Array/Index
2163 constructor, cast it to an integer.
2164
2165 Parameters
2166 ----------
2167 periods : None, float, int
2168
2169 Returns
2170 -------
2171 periods : None or int
2172
2173 Raises
2174 ------
2175 TypeError
2176 if periods is None, float, or int
2177 """
2178 if periods is not None:
2179 if lib.is_float(periods):
2180 periods = int(periods)
2181 elif not lib.is_integer(periods):
2182 raise TypeError(f"periods must be a number, got {periods}")
2183 periods = cast(int, periods)
2184 return periods
2185
2186
2187def validate_inferred_freq(
2188 freq, inferred_freq, freq_infer
2189) -> tuple[BaseOffset | None, bool]:
2190 """
2191 If the user passes a freq and another freq is inferred from passed data,
2192 require that they match.
2193
2194 Parameters
2195 ----------
2196 freq : DateOffset or None
2197 inferred_freq : DateOffset or None
2198 freq_infer : bool
2199
2200 Returns
2201 -------
2202 freq : DateOffset or None
2203 freq_infer : bool
2204
2205 Notes
2206 -----
2207 We assume at this point that `maybe_infer_freq` has been called, so
2208 `freq` is either a DateOffset object or None.
2209 """
2210 if inferred_freq is not None:
2211 if freq is not None and freq != inferred_freq:
2212 raise ValueError(
2213 f"Inferred frequency {inferred_freq} from passed "
2214 "values does not conform to passed frequency "
2215 f"{freq.freqstr}"
2216 )
2217 if freq is None:
2218 freq = inferred_freq
2219 freq_infer = False
2220
2221 return freq, freq_infer
2222
2223
2224def maybe_infer_freq(freq):
2225 """
2226 Comparing a DateOffset to the string "infer" raises, so we need to
2227 be careful about comparisons. Make a dummy variable `freq_infer` to
2228 signify the case where the given freq is "infer" and set freq to None
2229 to avoid comparison trouble later on.
2230
2231 Parameters
2232 ----------
2233 freq : {DateOffset, None, str}
2234
2235 Returns
2236 -------
2237 freq : {DateOffset, None}
2238 freq_infer : bool
2239 Whether we should inherit the freq of passed data.
2240 """
2241 freq_infer = False
2242 if not isinstance(freq, BaseOffset):
2243 # if a passed freq is None, don't infer automatically
2244 if freq != "infer":
2245 freq = to_offset(freq)
2246 else:
2247 freq_infer = True
2248 freq = None
2249 return freq, freq_infer
2250
2251
2252def dtype_to_unit(dtype: DatetimeTZDtype | np.dtype) -> str:
2253 """
2254 Return the unit str corresponding to the dtype's resolution.
2255
2256 Parameters
2257 ----------
2258 dtype : DatetimeTZDtype or np.dtype
2259 If np.dtype, we assume it is a datetime64 dtype.
2260
2261 Returns
2262 -------
2263 str
2264 """
2265 if isinstance(dtype, DatetimeTZDtype):
2266 return dtype.unit
2267 return np.datetime_data(dtype)[0]