1from __future__ import annotations
2
3from datetime import (
4 datetime,
5 timedelta,
6 tzinfo,
7)
8from typing import (
9 TYPE_CHECKING,
10 cast,
11 overload,
12)
13import warnings
14
15import numpy as np
16
17from pandas._libs import (
18 lib,
19 tslib,
20)
21from pandas._libs.tslibs import (
22 BaseOffset,
23 NaT,
24 NaTType,
25 Resolution,
26 Timestamp,
27 astype_overflowsafe,
28 fields,
29 get_resolution,
30 get_supported_dtype,
31 get_unit_from_dtype,
32 ints_to_pydatetime,
33 is_date_array_normalized,
34 is_supported_dtype,
35 is_unitless,
36 normalize_i8_timestamps,
37 timezones,
38 to_offset,
39 tz_convert_from_utc,
40 tzconversion,
41)
42from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
43from pandas.errors import PerformanceWarning
44from pandas.util._exceptions import find_stack_level
45from pandas.util._validators import validate_inclusive
46
47from pandas.core.dtypes.common import (
48 DT64NS_DTYPE,
49 INT64_DTYPE,
50 is_bool_dtype,
51 is_float_dtype,
52 is_string_dtype,
53 pandas_dtype,
54)
55from pandas.core.dtypes.dtypes import (
56 DatetimeTZDtype,
57 ExtensionDtype,
58 PeriodDtype,
59)
60from pandas.core.dtypes.missing import isna
61
62from pandas.core.arrays import datetimelike as dtl
63from pandas.core.arrays._ranges import generate_regular_range
64import pandas.core.common as com
65
66from pandas.tseries.frequencies import get_period_alias
67from pandas.tseries.offsets import (
68 Day,
69 Tick,
70)
71
72if TYPE_CHECKING:
73 from collections.abc import Iterator
74
75 from pandas._typing import (
76 ArrayLike,
77 DateTimeErrorChoices,
78 DtypeObj,
79 IntervalClosedType,
80 Self,
81 TimeAmbiguous,
82 TimeNonexistent,
83 npt,
84 )
85
86 from pandas import DataFrame
87 from pandas.core.arrays import PeriodArray
88
89
90_ITER_CHUNKSIZE = 10_000
91
92
93@overload
94def tz_to_dtype(tz: tzinfo, unit: str = ...) -> DatetimeTZDtype:
95 ...
96
97
98@overload
99def tz_to_dtype(tz: None, unit: str = ...) -> np.dtype[np.datetime64]:
100 ...
101
102
103def tz_to_dtype(
104 tz: tzinfo | None, unit: str = "ns"
105) -> np.dtype[np.datetime64] | DatetimeTZDtype:
106 """
107 Return a datetime64[ns] dtype appropriate for the given timezone.
108
109 Parameters
110 ----------
111 tz : tzinfo or None
112 unit : str, default "ns"
113
114 Returns
115 -------
116 np.dtype or Datetime64TZDType
117 """
118 if tz is None:
119 return np.dtype(f"M8[{unit}]")
120 else:
121 return DatetimeTZDtype(tz=tz, unit=unit)
122
123
124def _field_accessor(name: str, field: str, docstring: str | None = None):
125 def f(self):
126 values = self._local_timestamps()
127
128 if field in self._bool_ops:
129 result: np.ndarray
130
131 if field.endswith(("start", "end")):
132 freq = self.freq
133 month_kw = 12
134 if freq:
135 kwds = freq.kwds
136 month_kw = kwds.get("startingMonth", kwds.get("month", 12))
137
138 result = fields.get_start_end_field(
139 values, field, self.freqstr, month_kw, reso=self._creso
140 )
141 else:
142 result = fields.get_date_field(values, field, reso=self._creso)
143
144 # these return a boolean by-definition
145 return result
146
147 if field in self._object_ops:
148 result = fields.get_date_name_field(values, field, reso=self._creso)
149 result = self._maybe_mask_results(result, fill_value=None)
150
151 else:
152 result = fields.get_date_field(values, field, reso=self._creso)
153 result = self._maybe_mask_results(
154 result, fill_value=None, convert="float64"
155 )
156
157 return result
158
159 f.__name__ = name
160 f.__doc__ = docstring
161 return property(f)
162
163
164# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
165# incompatible with definition in base class "ExtensionArray"
166class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc]
167 """
168 Pandas ExtensionArray for tz-naive or tz-aware datetime data.
169
170 .. warning::
171
172 DatetimeArray is currently experimental, and its API may change
173 without warning. In particular, :attr:`DatetimeArray.dtype` is
174 expected to change to always be an instance of an ``ExtensionDtype``
175 subclass.
176
177 Parameters
178 ----------
179 values : Series, Index, DatetimeArray, ndarray
180 The datetime data.
181
182 For DatetimeArray `values` (or a Series or Index boxing one),
183 `dtype` and `freq` will be extracted from `values`.
184
185 dtype : numpy.dtype or DatetimeTZDtype
186 Note that the only NumPy dtype allowed is 'datetime64[ns]'.
187 freq : str or Offset, optional
188 The frequency.
189 copy : bool, default False
190 Whether to copy the underlying array of values.
191
192 Attributes
193 ----------
194 None
195
196 Methods
197 -------
198 None
199
200 Examples
201 --------
202 >>> pd.arrays.DatetimeArray._from_sequence(
203 ... pd.DatetimeIndex(['2023-01-01', '2023-01-02'], freq='D'))
204 <DatetimeArray>
205 ['2023-01-01 00:00:00', '2023-01-02 00:00:00']
206 Length: 2, dtype: datetime64[ns]
207 """
208
209 _typ = "datetimearray"
210 _internal_fill_value = np.datetime64("NaT", "ns")
211 _recognized_scalars = (datetime, np.datetime64)
212 _is_recognized_dtype = lambda x: lib.is_np_dtype(x, "M") or isinstance(
213 x, DatetimeTZDtype
214 )
215 _infer_matches = ("datetime", "datetime64", "date")
216
217 @property
218 def _scalar_type(self) -> type[Timestamp]:
219 return Timestamp
220
221 # define my properties & methods for delegation
222 _bool_ops: list[str] = [
223 "is_month_start",
224 "is_month_end",
225 "is_quarter_start",
226 "is_quarter_end",
227 "is_year_start",
228 "is_year_end",
229 "is_leap_year",
230 ]
231 _object_ops: list[str] = ["freq", "tz"]
232 _field_ops: list[str] = [
233 "year",
234 "month",
235 "day",
236 "hour",
237 "minute",
238 "second",
239 "weekday",
240 "dayofweek",
241 "day_of_week",
242 "dayofyear",
243 "day_of_year",
244 "quarter",
245 "days_in_month",
246 "daysinmonth",
247 "microsecond",
248 "nanosecond",
249 ]
250 _other_ops: list[str] = ["date", "time", "timetz"]
251 _datetimelike_ops: list[str] = (
252 _field_ops + _object_ops + _bool_ops + _other_ops + ["unit"]
253 )
254 _datetimelike_methods: list[str] = [
255 "to_period",
256 "tz_localize",
257 "tz_convert",
258 "normalize",
259 "strftime",
260 "round",
261 "floor",
262 "ceil",
263 "month_name",
264 "day_name",
265 "as_unit",
266 ]
267
268 # ndim is inherited from ExtensionArray, must exist to ensure
269 # Timestamp.__richcmp__(DateTimeArray) operates pointwise
270
271 # ensure that operations with numpy arrays defer to our implementation
272 __array_priority__ = 1000
273
274 # -----------------------------------------------------------------
275 # Constructors
276
277 _dtype: np.dtype[np.datetime64] | DatetimeTZDtype
278 _freq: BaseOffset | None = None
279 _default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__
280
281 @classmethod
282 def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
283 if lib.infer_dtype(scalars, skipna=True) not in ["datetime", "datetime64"]:
284 # TODO: require any NAs be valid-for-DTA
285 # TODO: if dtype is passed, check for tzawareness compat?
286 raise ValueError
287 return cls._from_sequence(scalars, dtype=dtype)
288
289 @classmethod
290 def _validate_dtype(cls, values, dtype):
291 # used in TimeLikeOps.__init__
292 dtype = _validate_dt64_dtype(dtype)
293 _validate_dt64_dtype(values.dtype)
294 if isinstance(dtype, np.dtype):
295 if values.dtype != dtype:
296 raise ValueError("Values resolution does not match dtype.")
297 else:
298 vunit = np.datetime_data(values.dtype)[0]
299 if vunit != dtype.unit:
300 raise ValueError("Values resolution does not match dtype.")
301 return dtype
302
303 # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
304 @classmethod
305 def _simple_new( # type: ignore[override]
306 cls,
307 values: npt.NDArray[np.datetime64],
308 freq: BaseOffset | None = None,
309 dtype: np.dtype[np.datetime64] | DatetimeTZDtype = DT64NS_DTYPE,
310 ) -> Self:
311 assert isinstance(values, np.ndarray)
312 assert dtype.kind == "M"
313 if isinstance(dtype, np.dtype):
314 assert dtype == values.dtype
315 assert not is_unitless(dtype)
316 else:
317 # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC],
318 # then values.dtype should be M8[us].
319 assert dtype._creso == get_unit_from_dtype(values.dtype)
320
321 result = super()._simple_new(values, dtype)
322 result._freq = freq
323 return result
324
325 @classmethod
326 def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
327 return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)
328
329 @classmethod
330 def _from_sequence_not_strict(
331 cls,
332 data,
333 *,
334 dtype=None,
335 copy: bool = False,
336 tz=lib.no_default,
337 freq: str | BaseOffset | lib.NoDefault | None = lib.no_default,
338 dayfirst: bool = False,
339 yearfirst: bool = False,
340 ambiguous: TimeAmbiguous = "raise",
341 ) -> Self:
342 """
343 A non-strict version of _from_sequence, called from DatetimeIndex.__new__.
344 """
345
346 # if the user either explicitly passes tz=None or a tz-naive dtype, we
347 # disallows inferring a tz.
348 explicit_tz_none = tz is None
349 if tz is lib.no_default:
350 tz = None
351 else:
352 tz = timezones.maybe_get_tz(tz)
353
354 dtype = _validate_dt64_dtype(dtype)
355 # if dtype has an embedded tz, capture it
356 tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none)
357
358 unit = None
359 if dtype is not None:
360 unit = dtl.dtype_to_unit(dtype)
361
362 data, copy = dtl.ensure_arraylike_for_datetimelike(
363 data, copy, cls_name="DatetimeArray"
364 )
365 inferred_freq = None
366 if isinstance(data, DatetimeArray):
367 inferred_freq = data.freq
368
369 subarr, tz = _sequence_to_dt64(
370 data,
371 copy=copy,
372 tz=tz,
373 dayfirst=dayfirst,
374 yearfirst=yearfirst,
375 ambiguous=ambiguous,
376 out_unit=unit,
377 )
378 # We have to call this again after possibly inferring a tz above
379 _validate_tz_from_dtype(dtype, tz, explicit_tz_none)
380 if tz is not None and explicit_tz_none:
381 raise ValueError(
382 "Passed data is timezone-aware, incompatible with 'tz=None'. "
383 "Use obj.tz_localize(None) instead."
384 )
385
386 data_unit = np.datetime_data(subarr.dtype)[0]
387 data_dtype = tz_to_dtype(tz, data_unit)
388 result = cls._simple_new(subarr, freq=inferred_freq, dtype=data_dtype)
389 if unit is not None and unit != result.unit:
390 # If unit was specified in user-passed dtype, cast to it here
391 result = result.as_unit(unit)
392
393 validate_kwds = {"ambiguous": ambiguous}
394 result._maybe_pin_freq(freq, validate_kwds)
395 return result
396
397 @classmethod
398 def _generate_range(
399 cls,
400 start,
401 end,
402 periods: int | None,
403 freq,
404 tz=None,
405 normalize: bool = False,
406 ambiguous: TimeAmbiguous = "raise",
407 nonexistent: TimeNonexistent = "raise",
408 inclusive: IntervalClosedType = "both",
409 *,
410 unit: str | None = None,
411 ) -> Self:
412 periods = dtl.validate_periods(periods)
413 if freq is None and any(x is None for x in [periods, start, end]):
414 raise ValueError("Must provide freq argument if no data is supplied")
415
416 if com.count_not_none(start, end, periods, freq) != 3:
417 raise ValueError(
418 "Of the four parameters: start, end, periods, "
419 "and freq, exactly three must be specified"
420 )
421 freq = to_offset(freq)
422
423 if start is not None:
424 start = Timestamp(start)
425
426 if end is not None:
427 end = Timestamp(end)
428
429 if start is NaT or end is NaT:
430 raise ValueError("Neither `start` nor `end` can be NaT")
431
432 if unit is not None:
433 if unit not in ["s", "ms", "us", "ns"]:
434 raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")
435 else:
436 unit = "ns"
437
438 if start is not None:
439 start = start.as_unit(unit, round_ok=False)
440 if end is not None:
441 end = end.as_unit(unit, round_ok=False)
442
443 left_inclusive, right_inclusive = validate_inclusive(inclusive)
444 start, end = _maybe_normalize_endpoints(start, end, normalize)
445 tz = _infer_tz_from_endpoints(start, end, tz)
446
447 if tz is not None:
448 # Localize the start and end arguments
449 start = _maybe_localize_point(start, freq, tz, ambiguous, nonexistent)
450 end = _maybe_localize_point(end, freq, tz, ambiguous, nonexistent)
451
452 if freq is not None:
453 # We break Day arithmetic (fixed 24 hour) here and opt for
454 # Day to mean calendar day (23/24/25 hour). Therefore, strip
455 # tz info from start and day to avoid DST arithmetic
456 if isinstance(freq, Day):
457 if start is not None:
458 start = start.tz_localize(None)
459 if end is not None:
460 end = end.tz_localize(None)
461
462 if isinstance(freq, Tick):
463 i8values = generate_regular_range(start, end, periods, freq, unit=unit)
464 else:
465 xdr = _generate_range(
466 start=start, end=end, periods=periods, offset=freq, unit=unit
467 )
468 i8values = np.array([x._value for x in xdr], dtype=np.int64)
469
470 endpoint_tz = start.tz if start is not None else end.tz
471
472 if tz is not None and endpoint_tz is None:
473 if not timezones.is_utc(tz):
474 # short-circuit tz_localize_to_utc which would make
475 # an unnecessary copy with UTC but be a no-op.
476 creso = abbrev_to_npy_unit(unit)
477 i8values = tzconversion.tz_localize_to_utc(
478 i8values,
479 tz,
480 ambiguous=ambiguous,
481 nonexistent=nonexistent,
482 creso=creso,
483 )
484
485 # i8values is localized datetime64 array -> have to convert
486 # start/end as well to compare
487 if start is not None:
488 start = start.tz_localize(tz, ambiguous, nonexistent)
489 if end is not None:
490 end = end.tz_localize(tz, ambiguous, nonexistent)
491 else:
492 # Create a linearly spaced date_range in local time
493 # Nanosecond-granularity timestamps aren't always correctly
494 # representable with doubles, so we limit the range that we
495 # pass to np.linspace as much as possible
496 periods = cast(int, periods)
497 i8values = (
498 np.linspace(0, end._value - start._value, periods, dtype="int64")
499 + start._value
500 )
501 if i8values.dtype != "i8":
502 # 2022-01-09 I (brock) am not sure if it is possible for this
503 # to overflow and cast to e.g. f8, but if it does we need to cast
504 i8values = i8values.astype("i8")
505
506 if start == end:
507 if not left_inclusive and not right_inclusive:
508 i8values = i8values[1:-1]
509 else:
510 start_i8 = Timestamp(start)._value
511 end_i8 = Timestamp(end)._value
512 if not left_inclusive or not right_inclusive:
513 if not left_inclusive and len(i8values) and i8values[0] == start_i8:
514 i8values = i8values[1:]
515 if not right_inclusive and len(i8values) and i8values[-1] == end_i8:
516 i8values = i8values[:-1]
517
518 dt64_values = i8values.view(f"datetime64[{unit}]")
519 dtype = tz_to_dtype(tz, unit=unit)
520 return cls._simple_new(dt64_values, freq=freq, dtype=dtype)
521
522 # -----------------------------------------------------------------
523 # DatetimeLike Interface
524
525 def _unbox_scalar(self, value) -> np.datetime64:
526 if not isinstance(value, self._scalar_type) and value is not NaT:
527 raise ValueError("'value' should be a Timestamp.")
528 self._check_compatible_with(value)
529 if value is NaT:
530 return np.datetime64(value._value, self.unit)
531 else:
532 return value.as_unit(self.unit).asm8
533
534 def _scalar_from_string(self, value) -> Timestamp | NaTType:
535 return Timestamp(value, tz=self.tz)
536
537 def _check_compatible_with(self, other) -> None:
538 if other is NaT:
539 return
540 self._assert_tzawareness_compat(other)
541
542 # -----------------------------------------------------------------
543 # Descriptive Properties
544
545 def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:
546 # GH#42228
547 value = x.view("i8")
548 ts = Timestamp._from_value_and_reso(value, reso=self._creso, tz=self.tz)
549 return ts
550
551 @property
552 # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype"
553 # incompatible with return type "ExtensionDtype" in supertype
554 # "ExtensionArray"
555 def dtype(self) -> np.dtype[np.datetime64] | DatetimeTZDtype: # type: ignore[override]
556 """
557 The dtype for the DatetimeArray.
558
559 .. warning::
560
561 A future version of pandas will change dtype to never be a
562 ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will
563 always be an instance of an ``ExtensionDtype`` subclass.
564
565 Returns
566 -------
567 numpy.dtype or DatetimeTZDtype
568 If the values are tz-naive, then ``np.dtype('datetime64[ns]')``
569 is returned.
570
571 If the values are tz-aware, then the ``DatetimeTZDtype``
572 is returned.
573 """
574 return self._dtype
575
576 @property
577 def tz(self) -> tzinfo | None:
578 """
579 Return the timezone.
580
581 Returns
582 -------
583 datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
584 Returns None when the array is tz-naive.
585
586 Examples
587 --------
588 For Series:
589
590 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
591 >>> s = pd.to_datetime(s)
592 >>> s
593 0 2020-01-01 10:00:00+00:00
594 1 2020-02-01 11:00:00+00:00
595 dtype: datetime64[ns, UTC]
596 >>> s.dt.tz
597 datetime.timezone.utc
598
599 For DatetimeIndex:
600
601 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
602 ... "2/1/2020 11:00:00+00:00"])
603 >>> idx.tz
604 datetime.timezone.utc
605 """
606 # GH 18595
607 return getattr(self.dtype, "tz", None)
608
609 @tz.setter
610 def tz(self, value):
611 # GH 3746: Prevent localizing or converting the index by setting tz
612 raise AttributeError(
613 "Cannot directly set timezone. Use tz_localize() "
614 "or tz_convert() as appropriate"
615 )
616
617 @property
618 def tzinfo(self) -> tzinfo | None:
619 """
620 Alias for tz attribute
621 """
622 return self.tz
623
624 @property # NB: override with cache_readonly in immutable subclasses
625 def is_normalized(self) -> bool:
626 """
627 Returns True if all of the dates are at midnight ("no time")
628 """
629 return is_date_array_normalized(self.asi8, self.tz, reso=self._creso)
630
631 @property # NB: override with cache_readonly in immutable subclasses
632 def _resolution_obj(self) -> Resolution:
633 return get_resolution(self.asi8, self.tz, reso=self._creso)
634
635 # ----------------------------------------------------------------
636 # Array-Like / EA-Interface Methods
637
638 def __array__(self, dtype=None, copy=None) -> np.ndarray:
639 if dtype is None and self.tz:
640 # The default for tz-aware is object, to preserve tz info
641 dtype = object
642
643 return super().__array__(dtype=dtype, copy=copy)
644
645 def __iter__(self) -> Iterator:
646 """
647 Return an iterator over the boxed values
648
649 Yields
650 ------
651 tstamp : Timestamp
652 """
653 if self.ndim > 1:
654 for i in range(len(self)):
655 yield self[i]
656 else:
657 # convert in chunks of 10k for efficiency
658 data = self.asi8
659 length = len(self)
660 chunksize = _ITER_CHUNKSIZE
661 chunks = (length // chunksize) + 1
662
663 for i in range(chunks):
664 start_i = i * chunksize
665 end_i = min((i + 1) * chunksize, length)
666 converted = ints_to_pydatetime(
667 data[start_i:end_i],
668 tz=self.tz,
669 box="timestamp",
670 reso=self._creso,
671 )
672 yield from converted
673
674 def astype(self, dtype, copy: bool = True):
675 # We handle
676 # --> datetime
677 # --> period
678 # DatetimeLikeArrayMixin Super handles the rest.
679 dtype = pandas_dtype(dtype)
680
681 if dtype == self.dtype:
682 if copy:
683 return self.copy()
684 return self
685
686 elif isinstance(dtype, ExtensionDtype):
687 if not isinstance(dtype, DatetimeTZDtype):
688 # e.g. Sparse[datetime64[ns]]
689 return super().astype(dtype, copy=copy)
690 elif self.tz is None:
691 # pre-2.0 this did self.tz_localize(dtype.tz), which did not match
692 # the Series behavior which did
693 # values.tz_localize("UTC").tz_convert(dtype.tz)
694 raise TypeError(
695 "Cannot use .astype to convert from timezone-naive dtype to "
696 "timezone-aware dtype. Use obj.tz_localize instead or "
697 "series.dt.tz_localize instead"
698 )
699 else:
700 # tzaware unit conversion e.g. datetime64[s, UTC]
701 np_dtype = np.dtype(dtype.str)
702 res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)
703 return type(self)._simple_new(res_values, dtype=dtype, freq=self.freq)
704
705 elif (
706 self.tz is None
707 and lib.is_np_dtype(dtype, "M")
708 and not is_unitless(dtype)
709 and is_supported_dtype(dtype)
710 ):
711 # unit conversion e.g. datetime64[s]
712 res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)
713 return type(self)._simple_new(res_values, dtype=res_values.dtype)
714 # TODO: preserve freq?
715
716 elif self.tz is not None and lib.is_np_dtype(dtype, "M"):
717 # pre-2.0 behavior for DTA/DTI was
718 # values.tz_convert("UTC").tz_localize(None), which did not match
719 # the Series behavior
720 raise TypeError(
721 "Cannot use .astype to convert from timezone-aware dtype to "
722 "timezone-naive dtype. Use obj.tz_localize(None) or "
723 "obj.tz_convert('UTC').tz_localize(None) instead."
724 )
725
726 elif (
727 self.tz is None
728 and lib.is_np_dtype(dtype, "M")
729 and dtype != self.dtype
730 and is_unitless(dtype)
731 ):
732 raise TypeError(
733 "Casting to unit-less dtype 'datetime64' is not supported. "
734 "Pass e.g. 'datetime64[ns]' instead."
735 )
736
737 elif isinstance(dtype, PeriodDtype):
738 return self.to_period(freq=dtype.freq)
739 return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
740
741 # -----------------------------------------------------------------
742 # Rendering Methods
743
744 def _format_native_types(
745 self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
746 ) -> npt.NDArray[np.object_]:
747 if date_format is None and self._is_dates_only:
748 # Only dates and no timezone: provide a default format
749 date_format = "%Y-%m-%d"
750
751 return tslib.format_array_from_datetime(
752 self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso
753 )
754
755 # -----------------------------------------------------------------
756 # Comparison Methods
757
758 def _has_same_tz(self, other) -> bool:
759 # vzone shouldn't be None if value is non-datetime like
760 if isinstance(other, np.datetime64):
761 # convert to Timestamp as np.datetime64 doesn't have tz attr
762 other = Timestamp(other)
763
764 if not hasattr(other, "tzinfo"):
765 return False
766 other_tz = other.tzinfo
767 return timezones.tz_compare(self.tzinfo, other_tz)
768
769 def _assert_tzawareness_compat(self, other) -> None:
770 # adapted from _Timestamp._assert_tzawareness_compat
771 other_tz = getattr(other, "tzinfo", None)
772 other_dtype = getattr(other, "dtype", None)
773
774 if isinstance(other_dtype, DatetimeTZDtype):
775 # Get tzinfo from Series dtype
776 other_tz = other.dtype.tz
777 if other is NaT:
778 # pd.NaT quacks both aware and naive
779 pass
780 elif self.tz is None:
781 if other_tz is not None:
782 raise TypeError(
783 "Cannot compare tz-naive and tz-aware datetime-like objects."
784 )
785 elif other_tz is None:
786 raise TypeError(
787 "Cannot compare tz-naive and tz-aware datetime-like objects"
788 )
789
790 # -----------------------------------------------------------------
791 # Arithmetic Methods
792
793 def _add_offset(self, offset: BaseOffset) -> Self:
794 assert not isinstance(offset, Tick)
795
796 if self.tz is not None:
797 values = self.tz_localize(None)
798 else:
799 values = self
800
801 try:
802 res_values = offset._apply_array(values._ndarray)
803 if res_values.dtype.kind == "i":
804 # error: Argument 1 to "view" of "ndarray" has incompatible type
805 # "dtype[datetime64] | DatetimeTZDtype"; expected
806 # "dtype[Any] | type[Any] | _SupportsDType[dtype[Any]]"
807 res_values = res_values.view(values.dtype) # type: ignore[arg-type]
808 except NotImplementedError:
809 warnings.warn(
810 "Non-vectorized DateOffset being applied to Series or DatetimeIndex.",
811 PerformanceWarning,
812 stacklevel=find_stack_level(),
813 )
814 res_values = self.astype("O") + offset
815 # TODO(GH#55564): as_unit will be unnecessary
816 result = type(self)._from_sequence(res_values).as_unit(self.unit)
817 if not len(self):
818 # GH#30336 _from_sequence won't be able to infer self.tz
819 return result.tz_localize(self.tz)
820
821 else:
822 result = type(self)._simple_new(res_values, dtype=res_values.dtype)
823 if offset.normalize:
824 result = result.normalize()
825 result._freq = None
826
827 if self.tz is not None:
828 result = result.tz_localize(self.tz)
829
830 return result
831
832 # -----------------------------------------------------------------
833 # Timezone Conversion and Localization Methods
834
835 def _local_timestamps(self) -> npt.NDArray[np.int64]:
836 """
837 Convert to an i8 (unix-like nanosecond timestamp) representation
838 while keeping the local timezone and not using UTC.
839 This is used to calculate time-of-day information as if the timestamps
840 were timezone-naive.
841 """
842 if self.tz is None or timezones.is_utc(self.tz):
843 # Avoid the copy that would be made in tzconversion
844 return self.asi8
845 return tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)
846
847 def tz_convert(self, tz) -> Self:
848 """
849 Convert tz-aware Datetime Array/Index from one time zone to another.
850
851 Parameters
852 ----------
853 tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
854 Time zone for time. Corresponding timestamps would be converted
855 to this time zone of the Datetime Array/Index. A `tz` of None will
856 convert to UTC and remove the timezone information.
857
858 Returns
859 -------
860 Array or Index
861
862 Raises
863 ------
864 TypeError
865 If Datetime Array/Index is tz-naive.
866
867 See Also
868 --------
869 DatetimeIndex.tz : A timezone that has a variable offset from UTC.
870 DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a
871 given time zone, or remove timezone from a tz-aware DatetimeIndex.
872
873 Examples
874 --------
875 With the `tz` parameter, we can change the DatetimeIndex
876 to other time zones:
877
878 >>> dti = pd.date_range(start='2014-08-01 09:00',
879 ... freq='h', periods=3, tz='Europe/Berlin')
880
881 >>> dti
882 DatetimeIndex(['2014-08-01 09:00:00+02:00',
883 '2014-08-01 10:00:00+02:00',
884 '2014-08-01 11:00:00+02:00'],
885 dtype='datetime64[ns, Europe/Berlin]', freq='h')
886
887 >>> dti.tz_convert('US/Central')
888 DatetimeIndex(['2014-08-01 02:00:00-05:00',
889 '2014-08-01 03:00:00-05:00',
890 '2014-08-01 04:00:00-05:00'],
891 dtype='datetime64[ns, US/Central]', freq='h')
892
893 With the ``tz=None``, we can remove the timezone (after converting
894 to UTC if necessary):
895
896 >>> dti = pd.date_range(start='2014-08-01 09:00', freq='h',
897 ... periods=3, tz='Europe/Berlin')
898
899 >>> dti
900 DatetimeIndex(['2014-08-01 09:00:00+02:00',
901 '2014-08-01 10:00:00+02:00',
902 '2014-08-01 11:00:00+02:00'],
903 dtype='datetime64[ns, Europe/Berlin]', freq='h')
904
905 >>> dti.tz_convert(None)
906 DatetimeIndex(['2014-08-01 07:00:00',
907 '2014-08-01 08:00:00',
908 '2014-08-01 09:00:00'],
909 dtype='datetime64[ns]', freq='h')
910 """
911 tz = timezones.maybe_get_tz(tz)
912
913 if self.tz is None:
914 # tz naive, use tz_localize
915 raise TypeError(
916 "Cannot convert tz-naive timestamps, use tz_localize to localize"
917 )
918
919 # No conversion since timestamps are all UTC to begin with
920 dtype = tz_to_dtype(tz, unit=self.unit)
921 return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)
922
923 @dtl.ravel_compat
924 def tz_localize(
925 self,
926 tz,
927 ambiguous: TimeAmbiguous = "raise",
928 nonexistent: TimeNonexistent = "raise",
929 ) -> Self:
930 """
931 Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index.
932
933 This method takes a time zone (tz) naive Datetime Array/Index object
934 and makes this time zone aware. It does not move the time to another
935 time zone.
936
937 This method can also be used to do the inverse -- to create a time
938 zone unaware object from an aware object. To that end, pass `tz=None`.
939
940 Parameters
941 ----------
942 tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
943 Time zone to convert timestamps to. Passing ``None`` will
944 remove the time zone information preserving local time.
945 ambiguous : 'infer', 'NaT', bool array, default 'raise'
946 When clocks moved backward due to DST, ambiguous times may arise.
947 For example in Central European Time (UTC+01), when going from
948 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
949 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
950 `ambiguous` parameter dictates how ambiguous times should be
951 handled.
952
953 - 'infer' will attempt to infer fall dst-transition hours based on
954 order
955 - bool-ndarray where True signifies a DST time, False signifies a
956 non-DST time (note that this flag is only applicable for
957 ambiguous times)
958 - 'NaT' will return NaT where there are ambiguous times
959 - 'raise' will raise an AmbiguousTimeError if there are ambiguous
960 times.
961
962 nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
963default 'raise'
964 A nonexistent time does not exist in a particular timezone
965 where clocks moved forward due to DST.
966
967 - 'shift_forward' will shift the nonexistent time forward to the
968 closest existing time
969 - 'shift_backward' will shift the nonexistent time backward to the
970 closest existing time
971 - 'NaT' will return NaT where there are nonexistent times
972 - timedelta objects will shift nonexistent times by the timedelta
973 - 'raise' will raise an NonExistentTimeError if there are
974 nonexistent times.
975
976 Returns
977 -------
978 Same type as self
979 Array/Index converted to the specified time zone.
980
981 Raises
982 ------
983 TypeError
984 If the Datetime Array/Index is tz-aware and tz is not None.
985
986 See Also
987 --------
988 DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from
989 one time zone to another.
990
991 Examples
992 --------
993 >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3)
994 >>> tz_naive
995 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
996 '2018-03-03 09:00:00'],
997 dtype='datetime64[ns]', freq='D')
998
999 Localize DatetimeIndex in US/Eastern time zone:
1000
1001 >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern')
1002 >>> tz_aware
1003 DatetimeIndex(['2018-03-01 09:00:00-05:00',
1004 '2018-03-02 09:00:00-05:00',
1005 '2018-03-03 09:00:00-05:00'],
1006 dtype='datetime64[ns, US/Eastern]', freq=None)
1007
1008 With the ``tz=None``, we can remove the time zone information
1009 while keeping the local time (not converted to UTC):
1010
1011 >>> tz_aware.tz_localize(None)
1012 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
1013 '2018-03-03 09:00:00'],
1014 dtype='datetime64[ns]', freq=None)
1015
1016 Be careful with DST changes. When there is sequential data, pandas can
1017 infer the DST time:
1018
1019 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00',
1020 ... '2018-10-28 02:00:00',
1021 ... '2018-10-28 02:30:00',
1022 ... '2018-10-28 02:00:00',
1023 ... '2018-10-28 02:30:00',
1024 ... '2018-10-28 03:00:00',
1025 ... '2018-10-28 03:30:00']))
1026 >>> s.dt.tz_localize('CET', ambiguous='infer')
1027 0 2018-10-28 01:30:00+02:00
1028 1 2018-10-28 02:00:00+02:00
1029 2 2018-10-28 02:30:00+02:00
1030 3 2018-10-28 02:00:00+01:00
1031 4 2018-10-28 02:30:00+01:00
1032 5 2018-10-28 03:00:00+01:00
1033 6 2018-10-28 03:30:00+01:00
1034 dtype: datetime64[ns, CET]
1035
1036 In some cases, inferring the DST is impossible. In such cases, you can
1037 pass an ndarray to the ambiguous parameter to set the DST explicitly
1038
1039 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00',
1040 ... '2018-10-28 02:36:00',
1041 ... '2018-10-28 03:46:00']))
1042 >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False]))
1043 0 2018-10-28 01:20:00+02:00
1044 1 2018-10-28 02:36:00+02:00
1045 2 2018-10-28 03:46:00+01:00
1046 dtype: datetime64[ns, CET]
1047
1048 If the DST transition causes nonexistent times, you can shift these
1049 dates forward or backwards with a timedelta object or `'shift_forward'`
1050 or `'shift_backwards'`.
1051
1052 >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00',
1053 ... '2015-03-29 03:30:00']))
1054 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
1055 0 2015-03-29 03:00:00+02:00
1056 1 2015-03-29 03:30:00+02:00
1057 dtype: datetime64[ns, Europe/Warsaw]
1058
1059 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
1060 0 2015-03-29 01:59:59.999999999+01:00
1061 1 2015-03-29 03:30:00+02:00
1062 dtype: datetime64[ns, Europe/Warsaw]
1063
1064 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h'))
1065 0 2015-03-29 03:30:00+02:00
1066 1 2015-03-29 03:30:00+02:00
1067 dtype: datetime64[ns, Europe/Warsaw]
1068 """
1069 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
1070 if nonexistent not in nonexistent_options and not isinstance(
1071 nonexistent, timedelta
1072 ):
1073 raise ValueError(
1074 "The nonexistent argument must be one of 'raise', "
1075 "'NaT', 'shift_forward', 'shift_backward' or "
1076 "a timedelta object"
1077 )
1078
1079 if self.tz is not None:
1080 if tz is None:
1081 new_dates = tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)
1082 else:
1083 raise TypeError("Already tz-aware, use tz_convert to convert.")
1084 else:
1085 tz = timezones.maybe_get_tz(tz)
1086 # Convert to UTC
1087
1088 new_dates = tzconversion.tz_localize_to_utc(
1089 self.asi8,
1090 tz,
1091 ambiguous=ambiguous,
1092 nonexistent=nonexistent,
1093 creso=self._creso,
1094 )
1095 new_dates_dt64 = new_dates.view(f"M8[{self.unit}]")
1096 dtype = tz_to_dtype(tz, unit=self.unit)
1097
1098 freq = None
1099 if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates_dt64[0])):
1100 # we can preserve freq
1101 # TODO: Also for fixed-offsets
1102 freq = self.freq
1103 elif tz is None and self.tz is None:
1104 # no-op
1105 freq = self.freq
1106 return self._simple_new(new_dates_dt64, dtype=dtype, freq=freq)
1107
1108 # ----------------------------------------------------------------
1109 # Conversion Methods - Vectorized analogues of Timestamp methods
1110
1111 def to_pydatetime(self) -> npt.NDArray[np.object_]:
1112 """
1113 Return an ndarray of ``datetime.datetime`` objects.
1114
1115 Returns
1116 -------
1117 numpy.ndarray
1118
1119 Examples
1120 --------
1121 >>> idx = pd.date_range('2018-02-27', periods=3)
1122 >>> idx.to_pydatetime()
1123 array([datetime.datetime(2018, 2, 27, 0, 0),
1124 datetime.datetime(2018, 2, 28, 0, 0),
1125 datetime.datetime(2018, 3, 1, 0, 0)], dtype=object)
1126 """
1127 return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._creso)
1128
1129 def normalize(self) -> Self:
1130 """
1131 Convert times to midnight.
1132
1133 The time component of the date-time is converted to midnight i.e.
1134 00:00:00. This is useful in cases, when the time does not matter.
1135 Length is unaltered. The timezones are unaffected.
1136
1137 This method is available on Series with datetime values under
1138 the ``.dt`` accessor, and directly on Datetime Array/Index.
1139
1140 Returns
1141 -------
1142 DatetimeArray, DatetimeIndex or Series
1143 The same type as the original data. Series will have the same
1144 name and index. DatetimeIndex will have the same name.
1145
1146 See Also
1147 --------
1148 floor : Floor the datetimes to the specified freq.
1149 ceil : Ceil the datetimes to the specified freq.
1150 round : Round the datetimes to the specified freq.
1151
1152 Examples
1153 --------
1154 >>> idx = pd.date_range(start='2014-08-01 10:00', freq='h',
1155 ... periods=3, tz='Asia/Calcutta')
1156 >>> idx
1157 DatetimeIndex(['2014-08-01 10:00:00+05:30',
1158 '2014-08-01 11:00:00+05:30',
1159 '2014-08-01 12:00:00+05:30'],
1160 dtype='datetime64[ns, Asia/Calcutta]', freq='h')
1161 >>> idx.normalize()
1162 DatetimeIndex(['2014-08-01 00:00:00+05:30',
1163 '2014-08-01 00:00:00+05:30',
1164 '2014-08-01 00:00:00+05:30'],
1165 dtype='datetime64[ns, Asia/Calcutta]', freq=None)
1166 """
1167 new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._creso)
1168 dt64_values = new_values.view(self._ndarray.dtype)
1169
1170 dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype)
1171 dta = dta._with_freq("infer")
1172 if self.tz is not None:
1173 dta = dta.tz_localize(self.tz)
1174 return dta
1175
1176 def to_period(self, freq=None) -> PeriodArray:
1177 """
1178 Cast to PeriodArray/PeriodIndex at a particular frequency.
1179
1180 Converts DatetimeArray/Index to PeriodArray/PeriodIndex.
1181
1182 Parameters
1183 ----------
1184 freq : str or Period, optional
1185 One of pandas' :ref:`period aliases <timeseries.period_aliases>`
1186 or an Period object. Will be inferred by default.
1187
1188 Returns
1189 -------
1190 PeriodArray/PeriodIndex
1191
1192 Raises
1193 ------
1194 ValueError
1195 When converting a DatetimeArray/Index with non-regular values,
1196 so that a frequency cannot be inferred.
1197
1198 See Also
1199 --------
1200 PeriodIndex: Immutable ndarray holding ordinal values.
1201 DatetimeIndex.to_pydatetime: Return DatetimeIndex as object.
1202
1203 Examples
1204 --------
1205 >>> df = pd.DataFrame({"y": [1, 2, 3]},
1206 ... index=pd.to_datetime(["2000-03-31 00:00:00",
1207 ... "2000-05-31 00:00:00",
1208 ... "2000-08-31 00:00:00"]))
1209 >>> df.index.to_period("M")
1210 PeriodIndex(['2000-03', '2000-05', '2000-08'],
1211 dtype='period[M]')
1212
1213 Infer the daily frequency
1214
1215 >>> idx = pd.date_range("2017-01-01", periods=2)
1216 >>> idx.to_period()
1217 PeriodIndex(['2017-01-01', '2017-01-02'],
1218 dtype='period[D]')
1219 """
1220 from pandas.core.arrays import PeriodArray
1221
1222 if self.tz is not None:
1223 warnings.warn(
1224 "Converting to PeriodArray/Index representation "
1225 "will drop timezone information.",
1226 UserWarning,
1227 stacklevel=find_stack_level(),
1228 )
1229
1230 if freq is None:
1231 freq = self.freqstr or self.inferred_freq
1232 if isinstance(self.freq, BaseOffset) and hasattr(
1233 self.freq, "_period_dtype_code"
1234 ):
1235 freq = PeriodDtype(self.freq)._freqstr
1236
1237 if freq is None:
1238 raise ValueError(
1239 "You must pass a freq argument as current index has none."
1240 )
1241
1242 res = get_period_alias(freq)
1243
1244 # https://github.com/pandas-dev/pandas/issues/33358
1245 if res is None:
1246 res = freq
1247
1248 freq = res
1249 return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz)
1250
1251 # -----------------------------------------------------------------
1252 # Properties - Vectorized Timestamp Properties/Methods
1253
1254 def month_name(self, locale=None) -> npt.NDArray[np.object_]:
1255 """
1256 Return the month names with specified locale.
1257
1258 Parameters
1259 ----------
1260 locale : str, optional
1261 Locale determining the language in which to return the month name.
1262 Default is English locale (``'en_US.utf8'``). Use the command
1263 ``locale -a`` on your terminal on Unix systems to find your locale
1264 language code.
1265
1266 Returns
1267 -------
1268 Series or Index
1269 Series or Index of month names.
1270
1271 Examples
1272 --------
1273 >>> s = pd.Series(pd.date_range(start='2018-01', freq='ME', periods=3))
1274 >>> s
1275 0 2018-01-31
1276 1 2018-02-28
1277 2 2018-03-31
1278 dtype: datetime64[ns]
1279 >>> s.dt.month_name()
1280 0 January
1281 1 February
1282 2 March
1283 dtype: object
1284
1285 >>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)
1286 >>> idx
1287 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
1288 dtype='datetime64[ns]', freq='ME')
1289 >>> idx.month_name()
1290 Index(['January', 'February', 'March'], dtype='object')
1291
1292 Using the ``locale`` parameter you can set a different locale language,
1293 for example: ``idx.month_name(locale='pt_BR.utf8')`` will return month
1294 names in Brazilian Portuguese language.
1295
1296 >>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)
1297 >>> idx
1298 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
1299 dtype='datetime64[ns]', freq='ME')
1300 >>> idx.month_name(locale='pt_BR.utf8') # doctest: +SKIP
1301 Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object')
1302 """
1303 values = self._local_timestamps()
1304
1305 result = fields.get_date_name_field(
1306 values, "month_name", locale=locale, reso=self._creso
1307 )
1308 result = self._maybe_mask_results(result, fill_value=None)
1309 return result
1310
1311 def day_name(self, locale=None) -> npt.NDArray[np.object_]:
1312 """
1313 Return the day names with specified locale.
1314
1315 Parameters
1316 ----------
1317 locale : str, optional
1318 Locale determining the language in which to return the day name.
1319 Default is English locale (``'en_US.utf8'``). Use the command
1320 ``locale -a`` on your terminal on Unix systems to find your locale
1321 language code.
1322
1323 Returns
1324 -------
1325 Series or Index
1326 Series or Index of day names.
1327
1328 Examples
1329 --------
1330 >>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3))
1331 >>> s
1332 0 2018-01-01
1333 1 2018-01-02
1334 2 2018-01-03
1335 dtype: datetime64[ns]
1336 >>> s.dt.day_name()
1337 0 Monday
1338 1 Tuesday
1339 2 Wednesday
1340 dtype: object
1341
1342 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
1343 >>> idx
1344 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1345 dtype='datetime64[ns]', freq='D')
1346 >>> idx.day_name()
1347 Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')
1348
1349 Using the ``locale`` parameter you can set a different locale language,
1350 for example: ``idx.day_name(locale='pt_BR.utf8')`` will return day
1351 names in Brazilian Portuguese language.
1352
1353 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
1354 >>> idx
1355 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1356 dtype='datetime64[ns]', freq='D')
1357 >>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP
1358 Index(['Segunda', 'Terça', 'Quarta'], dtype='object')
1359 """
1360 values = self._local_timestamps()
1361
1362 result = fields.get_date_name_field(
1363 values, "day_name", locale=locale, reso=self._creso
1364 )
1365 result = self._maybe_mask_results(result, fill_value=None)
1366 return result
1367
1368 @property
1369 def time(self) -> npt.NDArray[np.object_]:
1370 """
1371 Returns numpy array of :class:`datetime.time` objects.
1372
1373 The time part of the Timestamps.
1374
1375 Examples
1376 --------
1377 For Series:
1378
1379 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
1380 >>> s = pd.to_datetime(s)
1381 >>> s
1382 0 2020-01-01 10:00:00+00:00
1383 1 2020-02-01 11:00:00+00:00
1384 dtype: datetime64[ns, UTC]
1385 >>> s.dt.time
1386 0 10:00:00
1387 1 11:00:00
1388 dtype: object
1389
1390 For DatetimeIndex:
1391
1392 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
1393 ... "2/1/2020 11:00:00+00:00"])
1394 >>> idx.time
1395 array([datetime.time(10, 0), datetime.time(11, 0)], dtype=object)
1396 """
1397 # If the Timestamps have a timezone that is not UTC,
1398 # convert them into their i8 representation while
1399 # keeping their timezone and not using UTC
1400 timestamps = self._local_timestamps()
1401
1402 return ints_to_pydatetime(timestamps, box="time", reso=self._creso)
1403
1404 @property
1405 def timetz(self) -> npt.NDArray[np.object_]:
1406 """
1407 Returns numpy array of :class:`datetime.time` objects with timezones.
1408
1409 The time part of the Timestamps.
1410
1411 Examples
1412 --------
1413 For Series:
1414
1415 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
1416 >>> s = pd.to_datetime(s)
1417 >>> s
1418 0 2020-01-01 10:00:00+00:00
1419 1 2020-02-01 11:00:00+00:00
1420 dtype: datetime64[ns, UTC]
1421 >>> s.dt.timetz
1422 0 10:00:00+00:00
1423 1 11:00:00+00:00
1424 dtype: object
1425
1426 For DatetimeIndex:
1427
1428 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
1429 ... "2/1/2020 11:00:00+00:00"])
1430 >>> idx.timetz
1431 array([datetime.time(10, 0, tzinfo=datetime.timezone.utc),
1432 datetime.time(11, 0, tzinfo=datetime.timezone.utc)], dtype=object)
1433 """
1434 return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._creso)
1435
1436 @property
1437 def date(self) -> npt.NDArray[np.object_]:
1438 """
1439 Returns numpy array of python :class:`datetime.date` objects.
1440
1441 Namely, the date part of Timestamps without time and
1442 timezone information.
1443
1444 Examples
1445 --------
1446 For Series:
1447
1448 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
1449 >>> s = pd.to_datetime(s)
1450 >>> s
1451 0 2020-01-01 10:00:00+00:00
1452 1 2020-02-01 11:00:00+00:00
1453 dtype: datetime64[ns, UTC]
1454 >>> s.dt.date
1455 0 2020-01-01
1456 1 2020-02-01
1457 dtype: object
1458
1459 For DatetimeIndex:
1460
1461 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
1462 ... "2/1/2020 11:00:00+00:00"])
1463 >>> idx.date
1464 array([datetime.date(2020, 1, 1), datetime.date(2020, 2, 1)], dtype=object)
1465 """
1466 # If the Timestamps have a timezone that is not UTC,
1467 # convert them into their i8 representation while
1468 # keeping their timezone and not using UTC
1469 timestamps = self._local_timestamps()
1470
1471 return ints_to_pydatetime(timestamps, box="date", reso=self._creso)
1472
1473 def isocalendar(self) -> DataFrame:
1474 """
1475 Calculate year, week, and day according to the ISO 8601 standard.
1476
1477 Returns
1478 -------
1479 DataFrame
1480 With columns year, week and day.
1481
1482 See Also
1483 --------
1484 Timestamp.isocalendar : Function return a 3-tuple containing ISO year,
1485 week number, and weekday for the given Timestamp object.
1486 datetime.date.isocalendar : Return a named tuple object with
1487 three components: year, week and weekday.
1488
1489 Examples
1490 --------
1491 >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4)
1492 >>> idx.isocalendar()
1493 year week day
1494 2019-12-29 2019 52 7
1495 2019-12-30 2020 1 1
1496 2019-12-31 2020 1 2
1497 2020-01-01 2020 1 3
1498 >>> idx.isocalendar().week
1499 2019-12-29 52
1500 2019-12-30 1
1501 2019-12-31 1
1502 2020-01-01 1
1503 Freq: D, Name: week, dtype: UInt32
1504 """
1505 from pandas import DataFrame
1506
1507 values = self._local_timestamps()
1508 sarray = fields.build_isocalendar_sarray(values, reso=self._creso)
1509 iso_calendar_df = DataFrame(
1510 sarray, columns=["year", "week", "day"], dtype="UInt32"
1511 )
1512 if self._hasna:
1513 iso_calendar_df.iloc[self._isnan] = None
1514 return iso_calendar_df
1515
1516 year = _field_accessor(
1517 "year",
1518 "Y",
1519 """
1520 The year of the datetime.
1521
1522 Examples
1523 --------
1524 >>> datetime_series = pd.Series(
1525 ... pd.date_range("2000-01-01", periods=3, freq="YE")
1526 ... )
1527 >>> datetime_series
1528 0 2000-12-31
1529 1 2001-12-31
1530 2 2002-12-31
1531 dtype: datetime64[ns]
1532 >>> datetime_series.dt.year
1533 0 2000
1534 1 2001
1535 2 2002
1536 dtype: int32
1537 """,
1538 )
1539 month = _field_accessor(
1540 "month",
1541 "M",
1542 """
1543 The month as January=1, December=12.
1544
1545 Examples
1546 --------
1547 >>> datetime_series = pd.Series(
1548 ... pd.date_range("2000-01-01", periods=3, freq="ME")
1549 ... )
1550 >>> datetime_series
1551 0 2000-01-31
1552 1 2000-02-29
1553 2 2000-03-31
1554 dtype: datetime64[ns]
1555 >>> datetime_series.dt.month
1556 0 1
1557 1 2
1558 2 3
1559 dtype: int32
1560 """,
1561 )
1562 day = _field_accessor(
1563 "day",
1564 "D",
1565 """
1566 The day of the datetime.
1567
1568 Examples
1569 --------
1570 >>> datetime_series = pd.Series(
1571 ... pd.date_range("2000-01-01", periods=3, freq="D")
1572 ... )
1573 >>> datetime_series
1574 0 2000-01-01
1575 1 2000-01-02
1576 2 2000-01-03
1577 dtype: datetime64[ns]
1578 >>> datetime_series.dt.day
1579 0 1
1580 1 2
1581 2 3
1582 dtype: int32
1583 """,
1584 )
1585 hour = _field_accessor(
1586 "hour",
1587 "h",
1588 """
1589 The hours of the datetime.
1590
1591 Examples
1592 --------
1593 >>> datetime_series = pd.Series(
1594 ... pd.date_range("2000-01-01", periods=3, freq="h")
1595 ... )
1596 >>> datetime_series
1597 0 2000-01-01 00:00:00
1598 1 2000-01-01 01:00:00
1599 2 2000-01-01 02:00:00
1600 dtype: datetime64[ns]
1601 >>> datetime_series.dt.hour
1602 0 0
1603 1 1
1604 2 2
1605 dtype: int32
1606 """,
1607 )
1608 minute = _field_accessor(
1609 "minute",
1610 "m",
1611 """
1612 The minutes of the datetime.
1613
1614 Examples
1615 --------
1616 >>> datetime_series = pd.Series(
1617 ... pd.date_range("2000-01-01", periods=3, freq="min")
1618 ... )
1619 >>> datetime_series
1620 0 2000-01-01 00:00:00
1621 1 2000-01-01 00:01:00
1622 2 2000-01-01 00:02:00
1623 dtype: datetime64[ns]
1624 >>> datetime_series.dt.minute
1625 0 0
1626 1 1
1627 2 2
1628 dtype: int32
1629 """,
1630 )
1631 second = _field_accessor(
1632 "second",
1633 "s",
1634 """
1635 The seconds of the datetime.
1636
1637 Examples
1638 --------
1639 >>> datetime_series = pd.Series(
1640 ... pd.date_range("2000-01-01", periods=3, freq="s")
1641 ... )
1642 >>> datetime_series
1643 0 2000-01-01 00:00:00
1644 1 2000-01-01 00:00:01
1645 2 2000-01-01 00:00:02
1646 dtype: datetime64[ns]
1647 >>> datetime_series.dt.second
1648 0 0
1649 1 1
1650 2 2
1651 dtype: int32
1652 """,
1653 )
1654 microsecond = _field_accessor(
1655 "microsecond",
1656 "us",
1657 """
1658 The microseconds of the datetime.
1659
1660 Examples
1661 --------
1662 >>> datetime_series = pd.Series(
1663 ... pd.date_range("2000-01-01", periods=3, freq="us")
1664 ... )
1665 >>> datetime_series
1666 0 2000-01-01 00:00:00.000000
1667 1 2000-01-01 00:00:00.000001
1668 2 2000-01-01 00:00:00.000002
1669 dtype: datetime64[ns]
1670 >>> datetime_series.dt.microsecond
1671 0 0
1672 1 1
1673 2 2
1674 dtype: int32
1675 """,
1676 )
1677 nanosecond = _field_accessor(
1678 "nanosecond",
1679 "ns",
1680 """
1681 The nanoseconds of the datetime.
1682
1683 Examples
1684 --------
1685 >>> datetime_series = pd.Series(
1686 ... pd.date_range("2000-01-01", periods=3, freq="ns")
1687 ... )
1688 >>> datetime_series
1689 0 2000-01-01 00:00:00.000000000
1690 1 2000-01-01 00:00:00.000000001
1691 2 2000-01-01 00:00:00.000000002
1692 dtype: datetime64[ns]
1693 >>> datetime_series.dt.nanosecond
1694 0 0
1695 1 1
1696 2 2
1697 dtype: int32
1698 """,
1699 )
1700 _dayofweek_doc = """
1701 The day of the week with Monday=0, Sunday=6.
1702
1703 Return the day of the week. It is assumed the week starts on
1704 Monday, which is denoted by 0 and ends on Sunday which is denoted
1705 by 6. This method is available on both Series with datetime
1706 values (using the `dt` accessor) or DatetimeIndex.
1707
1708 Returns
1709 -------
1710 Series or Index
1711 Containing integers indicating the day number.
1712
1713 See Also
1714 --------
1715 Series.dt.dayofweek : Alias.
1716 Series.dt.weekday : Alias.
1717 Series.dt.day_name : Returns the name of the day of the week.
1718
1719 Examples
1720 --------
1721 >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series()
1722 >>> s.dt.dayofweek
1723 2016-12-31 5
1724 2017-01-01 6
1725 2017-01-02 0
1726 2017-01-03 1
1727 2017-01-04 2
1728 2017-01-05 3
1729 2017-01-06 4
1730 2017-01-07 5
1731 2017-01-08 6
1732 Freq: D, dtype: int32
1733 """
1734 day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc)
1735 dayofweek = day_of_week
1736 weekday = day_of_week
1737
1738 day_of_year = _field_accessor(
1739 "dayofyear",
1740 "doy",
1741 """
1742 The ordinal day of the year.
1743
1744 Examples
1745 --------
1746 For Series:
1747
1748 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
1749 >>> s = pd.to_datetime(s)
1750 >>> s
1751 0 2020-01-01 10:00:00+00:00
1752 1 2020-02-01 11:00:00+00:00
1753 dtype: datetime64[ns, UTC]
1754 >>> s.dt.dayofyear
1755 0 1
1756 1 32
1757 dtype: int32
1758
1759 For DatetimeIndex:
1760
1761 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
1762 ... "2/1/2020 11:00:00+00:00"])
1763 >>> idx.dayofyear
1764 Index([1, 32], dtype='int32')
1765 """,
1766 )
1767 dayofyear = day_of_year
1768 quarter = _field_accessor(
1769 "quarter",
1770 "q",
1771 """
1772 The quarter of the date.
1773
1774 Examples
1775 --------
1776 For Series:
1777
1778 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "4/1/2020 11:00:00+00:00"])
1779 >>> s = pd.to_datetime(s)
1780 >>> s
1781 0 2020-01-01 10:00:00+00:00
1782 1 2020-04-01 11:00:00+00:00
1783 dtype: datetime64[ns, UTC]
1784 >>> s.dt.quarter
1785 0 1
1786 1 2
1787 dtype: int32
1788
1789 For DatetimeIndex:
1790
1791 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
1792 ... "2/1/2020 11:00:00+00:00"])
1793 >>> idx.quarter
1794 Index([1, 1], dtype='int32')
1795 """,
1796 )
1797 days_in_month = _field_accessor(
1798 "days_in_month",
1799 "dim",
1800 """
1801 The number of days in the month.
1802
1803 Examples
1804 --------
1805 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
1806 >>> s = pd.to_datetime(s)
1807 >>> s
1808 0 2020-01-01 10:00:00+00:00
1809 1 2020-02-01 11:00:00+00:00
1810 dtype: datetime64[ns, UTC]
1811 >>> s.dt.daysinmonth
1812 0 31
1813 1 29
1814 dtype: int32
1815 """,
1816 )
1817 daysinmonth = days_in_month
1818 _is_month_doc = """
1819 Indicates whether the date is the {first_or_last} day of the month.
1820
1821 Returns
1822 -------
1823 Series or array
1824 For Series, returns a Series with boolean values.
1825 For DatetimeIndex, returns a boolean array.
1826
1827 See Also
1828 --------
1829 is_month_start : Return a boolean indicating whether the date
1830 is the first day of the month.
1831 is_month_end : Return a boolean indicating whether the date
1832 is the last day of the month.
1833
1834 Examples
1835 --------
1836 This method is available on Series with datetime values under
1837 the ``.dt`` accessor, and directly on DatetimeIndex.
1838
1839 >>> s = pd.Series(pd.date_range("2018-02-27", periods=3))
1840 >>> s
1841 0 2018-02-27
1842 1 2018-02-28
1843 2 2018-03-01
1844 dtype: datetime64[ns]
1845 >>> s.dt.is_month_start
1846 0 False
1847 1 False
1848 2 True
1849 dtype: bool
1850 >>> s.dt.is_month_end
1851 0 False
1852 1 True
1853 2 False
1854 dtype: bool
1855
1856 >>> idx = pd.date_range("2018-02-27", periods=3)
1857 >>> idx.is_month_start
1858 array([False, False, True])
1859 >>> idx.is_month_end
1860 array([False, True, False])
1861 """
1862 is_month_start = _field_accessor(
1863 "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first")
1864 )
1865
1866 is_month_end = _field_accessor(
1867 "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last")
1868 )
1869
1870 is_quarter_start = _field_accessor(
1871 "is_quarter_start",
1872 "is_quarter_start",
1873 """
1874 Indicator for whether the date is the first day of a quarter.
1875
1876 Returns
1877 -------
1878 is_quarter_start : Series or DatetimeIndex
1879 The same type as the original data with boolean values. Series will
1880 have the same name and index. DatetimeIndex will have the same
1881 name.
1882
1883 See Also
1884 --------
1885 quarter : Return the quarter of the date.
1886 is_quarter_end : Similar property for indicating the quarter end.
1887
1888 Examples
1889 --------
1890 This method is available on Series with datetime values under
1891 the ``.dt`` accessor, and directly on DatetimeIndex.
1892
1893 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
1894 ... periods=4)})
1895 >>> df.assign(quarter=df.dates.dt.quarter,
1896 ... is_quarter_start=df.dates.dt.is_quarter_start)
1897 dates quarter is_quarter_start
1898 0 2017-03-30 1 False
1899 1 2017-03-31 1 False
1900 2 2017-04-01 2 True
1901 3 2017-04-02 2 False
1902
1903 >>> idx = pd.date_range('2017-03-30', periods=4)
1904 >>> idx
1905 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
1906 dtype='datetime64[ns]', freq='D')
1907
1908 >>> idx.is_quarter_start
1909 array([False, False, True, False])
1910 """,
1911 )
1912 is_quarter_end = _field_accessor(
1913 "is_quarter_end",
1914 "is_quarter_end",
1915 """
1916 Indicator for whether the date is the last day of a quarter.
1917
1918 Returns
1919 -------
1920 is_quarter_end : Series or DatetimeIndex
1921 The same type as the original data with boolean values. Series will
1922 have the same name and index. DatetimeIndex will have the same
1923 name.
1924
1925 See Also
1926 --------
1927 quarter : Return the quarter of the date.
1928 is_quarter_start : Similar property indicating the quarter start.
1929
1930 Examples
1931 --------
1932 This method is available on Series with datetime values under
1933 the ``.dt`` accessor, and directly on DatetimeIndex.
1934
1935 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
1936 ... periods=4)})
1937 >>> df.assign(quarter=df.dates.dt.quarter,
1938 ... is_quarter_end=df.dates.dt.is_quarter_end)
1939 dates quarter is_quarter_end
1940 0 2017-03-30 1 False
1941 1 2017-03-31 1 True
1942 2 2017-04-01 2 False
1943 3 2017-04-02 2 False
1944
1945 >>> idx = pd.date_range('2017-03-30', periods=4)
1946 >>> idx
1947 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
1948 dtype='datetime64[ns]', freq='D')
1949
1950 >>> idx.is_quarter_end
1951 array([False, True, False, False])
1952 """,
1953 )
1954 is_year_start = _field_accessor(
1955 "is_year_start",
1956 "is_year_start",
1957 """
1958 Indicate whether the date is the first day of a year.
1959
1960 Returns
1961 -------
1962 Series or DatetimeIndex
1963 The same type as the original data with boolean values. Series will
1964 have the same name and index. DatetimeIndex will have the same
1965 name.
1966
1967 See Also
1968 --------
1969 is_year_end : Similar property indicating the last day of the year.
1970
1971 Examples
1972 --------
1973 This method is available on Series with datetime values under
1974 the ``.dt`` accessor, and directly on DatetimeIndex.
1975
1976 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
1977 >>> dates
1978 0 2017-12-30
1979 1 2017-12-31
1980 2 2018-01-01
1981 dtype: datetime64[ns]
1982
1983 >>> dates.dt.is_year_start
1984 0 False
1985 1 False
1986 2 True
1987 dtype: bool
1988
1989 >>> idx = pd.date_range("2017-12-30", periods=3)
1990 >>> idx
1991 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
1992 dtype='datetime64[ns]', freq='D')
1993
1994 >>> idx.is_year_start
1995 array([False, False, True])
1996 """,
1997 )
1998 is_year_end = _field_accessor(
1999 "is_year_end",
2000 "is_year_end",
2001 """
2002 Indicate whether the date is the last day of the year.
2003
2004 Returns
2005 -------
2006 Series or DatetimeIndex
2007 The same type as the original data with boolean values. Series will
2008 have the same name and index. DatetimeIndex will have the same
2009 name.
2010
2011 See Also
2012 --------
2013 is_year_start : Similar property indicating the start of the year.
2014
2015 Examples
2016 --------
2017 This method is available on Series with datetime values under
2018 the ``.dt`` accessor, and directly on DatetimeIndex.
2019
2020 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
2021 >>> dates
2022 0 2017-12-30
2023 1 2017-12-31
2024 2 2018-01-01
2025 dtype: datetime64[ns]
2026
2027 >>> dates.dt.is_year_end
2028 0 False
2029 1 True
2030 2 False
2031 dtype: bool
2032
2033 >>> idx = pd.date_range("2017-12-30", periods=3)
2034 >>> idx
2035 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
2036 dtype='datetime64[ns]', freq='D')
2037
2038 >>> idx.is_year_end
2039 array([False, True, False])
2040 """,
2041 )
2042 is_leap_year = _field_accessor(
2043 "is_leap_year",
2044 "is_leap_year",
2045 """
2046 Boolean indicator if the date belongs to a leap year.
2047
2048 A leap year is a year, which has 366 days (instead of 365) including
2049 29th of February as an intercalary day.
2050 Leap years are years which are multiples of four with the exception
2051 of years divisible by 100 but not by 400.
2052
2053 Returns
2054 -------
2055 Series or ndarray
2056 Booleans indicating if dates belong to a leap year.
2057
2058 Examples
2059 --------
2060 This method is available on Series with datetime values under
2061 the ``.dt`` accessor, and directly on DatetimeIndex.
2062
2063 >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="YE")
2064 >>> idx
2065 DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],
2066 dtype='datetime64[ns]', freq='YE-DEC')
2067 >>> idx.is_leap_year
2068 array([ True, False, False])
2069
2070 >>> dates_series = pd.Series(idx)
2071 >>> dates_series
2072 0 2012-12-31
2073 1 2013-12-31
2074 2 2014-12-31
2075 dtype: datetime64[ns]
2076 >>> dates_series.dt.is_leap_year
2077 0 True
2078 1 False
2079 2 False
2080 dtype: bool
2081 """,
2082 )
2083
2084 def to_julian_date(self) -> npt.NDArray[np.float64]:
2085 """
2086 Convert Datetime Array to float64 ndarray of Julian Dates.
2087 0 Julian date is noon January 1, 4713 BC.
2088 https://en.wikipedia.org/wiki/Julian_day
2089 """
2090
2091 # http://mysite.verizon.net/aesir_research/date/jdalg2.htm
2092 year = np.asarray(self.year)
2093 month = np.asarray(self.month)
2094 day = np.asarray(self.day)
2095 testarr = month < 3
2096 year[testarr] -= 1
2097 month[testarr] += 12
2098 return (
2099 day
2100 + np.fix((153 * month - 457) / 5)
2101 + 365 * year
2102 + np.floor(year / 4)
2103 - np.floor(year / 100)
2104 + np.floor(year / 400)
2105 + 1_721_118.5
2106 + (
2107 self.hour
2108 + self.minute / 60
2109 + self.second / 3600
2110 + self.microsecond / 3600 / 10**6
2111 + self.nanosecond / 3600 / 10**9
2112 )
2113 / 24
2114 )
2115
2116 # -----------------------------------------------------------------
2117 # Reductions
2118
2119 def std(
2120 self,
2121 axis=None,
2122 dtype=None,
2123 out=None,
2124 ddof: int = 1,
2125 keepdims: bool = False,
2126 skipna: bool = True,
2127 ):
2128 """
2129 Return sample standard deviation over requested axis.
2130
2131 Normalized by `N-1` by default. This can be changed using ``ddof``.
2132
2133 Parameters
2134 ----------
2135 axis : int, optional
2136 Axis for the function to be applied on. For :class:`pandas.Series`
2137 this parameter is unused and defaults to ``None``.
2138 ddof : int, default 1
2139 Degrees of Freedom. The divisor used in calculations is `N - ddof`,
2140 where `N` represents the number of elements.
2141 skipna : bool, default True
2142 Exclude NA/null values. If an entire row/column is ``NA``, the result
2143 will be ``NA``.
2144
2145 Returns
2146 -------
2147 Timedelta
2148
2149 See Also
2150 --------
2151 numpy.ndarray.std : Returns the standard deviation of the array elements
2152 along given axis.
2153 Series.std : Return sample standard deviation over requested axis.
2154
2155 Examples
2156 --------
2157 For :class:`pandas.DatetimeIndex`:
2158
2159 >>> idx = pd.date_range('2001-01-01 00:00', periods=3)
2160 >>> idx
2161 DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'],
2162 dtype='datetime64[ns]', freq='D')
2163 >>> idx.std()
2164 Timedelta('1 days 00:00:00')
2165 """
2166 # Because std is translation-invariant, we can get self.std
2167 # by calculating (self - Timestamp(0)).std, and we can do it
2168 # without creating a copy by using a view on self._ndarray
2169 from pandas.core.arrays import TimedeltaArray
2170
2171 # Find the td64 dtype with the same resolution as our dt64 dtype
2172 dtype_str = self._ndarray.dtype.name.replace("datetime64", "timedelta64")
2173 dtype = np.dtype(dtype_str)
2174
2175 tda = TimedeltaArray._simple_new(self._ndarray.view(dtype), dtype=dtype)
2176
2177 return tda.std(axis=axis, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna)
2178
2179
2180# -------------------------------------------------------------------
2181# Constructor Helpers
2182
2183
2184def _sequence_to_dt64(
2185 data: ArrayLike,
2186 *,
2187 copy: bool = False,
2188 tz: tzinfo | None = None,
2189 dayfirst: bool = False,
2190 yearfirst: bool = False,
2191 ambiguous: TimeAmbiguous = "raise",
2192 out_unit: str | None = None,
2193):
2194 """
2195 Parameters
2196 ----------
2197 data : np.ndarray or ExtensionArray
2198 dtl.ensure_arraylike_for_datetimelike has already been called.
2199 copy : bool, default False
2200 tz : tzinfo or None, default None
2201 dayfirst : bool, default False
2202 yearfirst : bool, default False
2203 ambiguous : str, bool, or arraylike, default 'raise'
2204 See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.
2205 out_unit : str or None, default None
2206 Desired output resolution.
2207
2208 Returns
2209 -------
2210 result : numpy.ndarray
2211 The sequence converted to a numpy array with dtype ``datetime64[unit]``.
2212 Where `unit` is "ns" unless specified otherwise by `out_unit`.
2213 tz : tzinfo or None
2214 Either the user-provided tzinfo or one inferred from the data.
2215
2216 Raises
2217 ------
2218 TypeError : PeriodDType data is passed
2219 """
2220
2221 # By this point we are assured to have either a numpy array or Index
2222 data, copy = maybe_convert_dtype(data, copy, tz=tz)
2223 data_dtype = getattr(data, "dtype", None)
2224
2225 if out_unit is None:
2226 out_unit = "ns"
2227 out_dtype = np.dtype(f"M8[{out_unit}]")
2228
2229 if data_dtype == object or is_string_dtype(data_dtype):
2230 # TODO: We do not have tests specific to string-dtypes,
2231 # also complex or categorical or other extension
2232 data = cast(np.ndarray, data)
2233 copy = False
2234 if lib.infer_dtype(data, skipna=False) == "integer":
2235 # Much more performant than going through array_to_datetime
2236 data = data.astype(np.int64)
2237 elif tz is not None and ambiguous == "raise":
2238 obj_data = np.asarray(data, dtype=object)
2239 result = tslib.array_to_datetime_with_tz(
2240 obj_data,
2241 tz=tz,
2242 dayfirst=dayfirst,
2243 yearfirst=yearfirst,
2244 creso=abbrev_to_npy_unit(out_unit),
2245 )
2246 return result, tz
2247 else:
2248 converted, inferred_tz = objects_to_datetime64(
2249 data,
2250 dayfirst=dayfirst,
2251 yearfirst=yearfirst,
2252 allow_object=False,
2253 out_unit=out_unit or "ns",
2254 )
2255 copy = False
2256 if tz and inferred_tz:
2257 # two timezones: convert to intended from base UTC repr
2258 # GH#42505 by convention, these are _already_ UTC
2259 result = converted
2260
2261 elif inferred_tz:
2262 tz = inferred_tz
2263 result = converted
2264
2265 else:
2266 result, _ = _construct_from_dt64_naive(
2267 converted, tz=tz, copy=copy, ambiguous=ambiguous
2268 )
2269 return result, tz
2270
2271 data_dtype = data.dtype
2272
2273 # `data` may have originally been a Categorical[datetime64[ns, tz]],
2274 # so we need to handle these types.
2275 if isinstance(data_dtype, DatetimeTZDtype):
2276 # DatetimeArray -> ndarray
2277 data = cast(DatetimeArray, data)
2278 tz = _maybe_infer_tz(tz, data.tz)
2279 result = data._ndarray
2280
2281 elif lib.is_np_dtype(data_dtype, "M"):
2282 # tz-naive DatetimeArray or ndarray[datetime64]
2283 if isinstance(data, DatetimeArray):
2284 data = data._ndarray
2285
2286 data = cast(np.ndarray, data)
2287 result, copy = _construct_from_dt64_naive(
2288 data, tz=tz, copy=copy, ambiguous=ambiguous
2289 )
2290
2291 else:
2292 # must be integer dtype otherwise
2293 # assume this data are epoch timestamps
2294 if data.dtype != INT64_DTYPE:
2295 data = data.astype(np.int64, copy=False)
2296 copy = False
2297 data = cast(np.ndarray, data)
2298 result = data.view(out_dtype)
2299
2300 if copy:
2301 result = result.copy()
2302
2303 assert isinstance(result, np.ndarray), type(result)
2304 assert result.dtype.kind == "M"
2305 assert result.dtype != "M8"
2306 assert is_supported_dtype(result.dtype)
2307 return result, tz
2308
2309
2310def _construct_from_dt64_naive(
2311 data: np.ndarray, *, tz: tzinfo | None, copy: bool, ambiguous: TimeAmbiguous
2312) -> tuple[np.ndarray, bool]:
2313 """
2314 Convert datetime64 data to a supported dtype, localizing if necessary.
2315 """
2316 # Caller is responsible for ensuring
2317 # lib.is_np_dtype(data.dtype)
2318
2319 new_dtype = data.dtype
2320 if not is_supported_dtype(new_dtype):
2321 # Cast to the nearest supported unit, generally "s"
2322 new_dtype = get_supported_dtype(new_dtype)
2323 data = astype_overflowsafe(data, dtype=new_dtype, copy=False)
2324 copy = False
2325
2326 if data.dtype.byteorder == ">":
2327 # TODO: better way to handle this? non-copying alternative?
2328 # without this, test_constructor_datetime64_bigendian fails
2329 data = data.astype(data.dtype.newbyteorder("<"))
2330 new_dtype = data.dtype
2331 copy = False
2332
2333 if tz is not None:
2334 # Convert tz-naive to UTC
2335 # TODO: if tz is UTC, are there situations where we *don't* want a
2336 # copy? tz_localize_to_utc always makes one.
2337 shape = data.shape
2338 if data.ndim > 1:
2339 data = data.ravel()
2340
2341 data_unit = get_unit_from_dtype(new_dtype)
2342 data = tzconversion.tz_localize_to_utc(
2343 data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit
2344 )
2345 data = data.view(new_dtype)
2346 data = data.reshape(shape)
2347
2348 assert data.dtype == new_dtype, data.dtype
2349 result = data
2350
2351 return result, copy
2352
2353
2354def objects_to_datetime64(
2355 data: np.ndarray,
2356 dayfirst,
2357 yearfirst,
2358 utc: bool = False,
2359 errors: DateTimeErrorChoices = "raise",
2360 allow_object: bool = False,
2361 out_unit: str = "ns",
2362):
2363 """
2364 Convert data to array of timestamps.
2365
2366 Parameters
2367 ----------
2368 data : np.ndarray[object]
2369 dayfirst : bool
2370 yearfirst : bool
2371 utc : bool, default False
2372 Whether to convert/localize timestamps to UTC.
2373 errors : {'raise', 'ignore', 'coerce'}
2374 allow_object : bool
2375 Whether to return an object-dtype ndarray instead of raising if the
2376 data contains more than one timezone.
2377 out_unit : str, default "ns"
2378
2379 Returns
2380 -------
2381 result : ndarray
2382 np.datetime64[out_unit] if returned values represent wall times or UTC
2383 timestamps.
2384 object if mixed timezones
2385 inferred_tz : tzinfo or None
2386 If not None, then the datetime64 values in `result` denote UTC timestamps.
2387
2388 Raises
2389 ------
2390 ValueError : if data cannot be converted to datetimes
2391 TypeError : When a type cannot be converted to datetime
2392 """
2393 assert errors in ["raise", "ignore", "coerce"]
2394
2395 # if str-dtype, convert
2396 data = np.asarray(data, dtype=np.object_)
2397
2398 result, tz_parsed = tslib.array_to_datetime(
2399 data,
2400 errors=errors,
2401 utc=utc,
2402 dayfirst=dayfirst,
2403 yearfirst=yearfirst,
2404 creso=abbrev_to_npy_unit(out_unit),
2405 )
2406
2407 if tz_parsed is not None:
2408 # We can take a shortcut since the datetime64 numpy array
2409 # is in UTC
2410 return result, tz_parsed
2411 elif result.dtype.kind == "M":
2412 return result, tz_parsed
2413 elif result.dtype == object:
2414 # GH#23675 when called via `pd.to_datetime`, returning an object-dtype
2415 # array is allowed. When called via `pd.DatetimeIndex`, we can
2416 # only accept datetime64 dtype, so raise TypeError if object-dtype
2417 # is returned, as that indicates the values can be recognized as
2418 # datetimes but they have conflicting timezones/awareness
2419 if allow_object:
2420 return result, tz_parsed
2421 raise TypeError("DatetimeIndex has mixed timezones")
2422 else: # pragma: no cover
2423 # GH#23675 this TypeError should never be hit, whereas the TypeError
2424 # in the object-dtype branch above is reachable.
2425 raise TypeError(result)
2426
2427
2428def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None):
2429 """
2430 Convert data based on dtype conventions, issuing
2431 errors where appropriate.
2432
2433 Parameters
2434 ----------
2435 data : np.ndarray or pd.Index
2436 copy : bool
2437 tz : tzinfo or None, default None
2438
2439 Returns
2440 -------
2441 data : np.ndarray or pd.Index
2442 copy : bool
2443
2444 Raises
2445 ------
2446 TypeError : PeriodDType data is passed
2447 """
2448 if not hasattr(data, "dtype"):
2449 # e.g. collections.deque
2450 return data, copy
2451
2452 if is_float_dtype(data.dtype):
2453 # pre-2.0 we treated these as wall-times, inconsistent with ints
2454 # GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes.
2455 # Note: data.astype(np.int64) fails ARM tests, see
2456 # https://github.com/pandas-dev/pandas/issues/49468.
2457 data = data.astype(DT64NS_DTYPE).view("i8")
2458 copy = False
2459
2460 elif lib.is_np_dtype(data.dtype, "m") or is_bool_dtype(data.dtype):
2461 # GH#29794 enforcing deprecation introduced in GH#23539
2462 raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]")
2463 elif isinstance(data.dtype, PeriodDtype):
2464 # Note: without explicitly raising here, PeriodIndex
2465 # test_setops.test_join_does_not_recur fails
2466 raise TypeError(
2467 "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead"
2468 )
2469
2470 elif isinstance(data.dtype, ExtensionDtype) and not isinstance(
2471 data.dtype, DatetimeTZDtype
2472 ):
2473 # TODO: We have no tests for these
2474 data = np.array(data, dtype=np.object_)
2475 copy = False
2476
2477 return data, copy
2478
2479
2480# -------------------------------------------------------------------
2481# Validation and Inference
2482
2483
2484def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None:
2485 """
2486 If a timezone is inferred from data, check that it is compatible with
2487 the user-provided timezone, if any.
2488
2489 Parameters
2490 ----------
2491 tz : tzinfo or None
2492 inferred_tz : tzinfo or None
2493
2494 Returns
2495 -------
2496 tz : tzinfo or None
2497
2498 Raises
2499 ------
2500 TypeError : if both timezones are present but do not match
2501 """
2502 if tz is None:
2503 tz = inferred_tz
2504 elif inferred_tz is None:
2505 pass
2506 elif not timezones.tz_compare(tz, inferred_tz):
2507 raise TypeError(
2508 f"data is already tz-aware {inferred_tz}, unable to "
2509 f"set specified tz: {tz}"
2510 )
2511 return tz
2512
2513
2514def _validate_dt64_dtype(dtype):
2515 """
2516 Check that a dtype, if passed, represents either a numpy datetime64[ns]
2517 dtype or a pandas DatetimeTZDtype.
2518
2519 Parameters
2520 ----------
2521 dtype : object
2522
2523 Returns
2524 -------
2525 dtype : None, numpy.dtype, or DatetimeTZDtype
2526
2527 Raises
2528 ------
2529 ValueError : invalid dtype
2530
2531 Notes
2532 -----
2533 Unlike _validate_tz_from_dtype, this does _not_ allow non-existent
2534 tz errors to go through
2535 """
2536 if dtype is not None:
2537 dtype = pandas_dtype(dtype)
2538 if dtype == np.dtype("M8"):
2539 # no precision, disallowed GH#24806
2540 msg = (
2541 "Passing in 'datetime64' dtype with no precision is not allowed. "
2542 "Please pass in 'datetime64[ns]' instead."
2543 )
2544 raise ValueError(msg)
2545
2546 if (
2547 isinstance(dtype, np.dtype)
2548 and (dtype.kind != "M" or not is_supported_dtype(dtype))
2549 ) or not isinstance(dtype, (np.dtype, DatetimeTZDtype)):
2550 raise ValueError(
2551 f"Unexpected value for 'dtype': '{dtype}'. "
2552 "Must be 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', "
2553 "'datetime64[ns]' or DatetimeTZDtype'."
2554 )
2555
2556 if getattr(dtype, "tz", None):
2557 # https://github.com/pandas-dev/pandas/issues/18595
2558 # Ensure that we have a standard timezone for pytz objects.
2559 # Without this, things like adding an array of timedeltas and
2560 # a tz-aware Timestamp (with a tz specific to its datetime) will
2561 # be incorrect(ish?) for the array as a whole
2562 dtype = cast(DatetimeTZDtype, dtype)
2563 dtype = DatetimeTZDtype(
2564 unit=dtype.unit, tz=timezones.tz_standardize(dtype.tz)
2565 )
2566
2567 return dtype
2568
2569
2570def _validate_tz_from_dtype(
2571 dtype, tz: tzinfo | None, explicit_tz_none: bool = False
2572) -> tzinfo | None:
2573 """
2574 If the given dtype is a DatetimeTZDtype, extract the implied
2575 tzinfo object from it and check that it does not conflict with the given
2576 tz.
2577
2578 Parameters
2579 ----------
2580 dtype : dtype, str
2581 tz : None, tzinfo
2582 explicit_tz_none : bool, default False
2583 Whether tz=None was passed explicitly, as opposed to lib.no_default.
2584
2585 Returns
2586 -------
2587 tz : consensus tzinfo
2588
2589 Raises
2590 ------
2591 ValueError : on tzinfo mismatch
2592 """
2593 if dtype is not None:
2594 if isinstance(dtype, str):
2595 try:
2596 dtype = DatetimeTZDtype.construct_from_string(dtype)
2597 except TypeError:
2598 # Things like `datetime64[ns]`, which is OK for the
2599 # constructors, but also nonsense, which should be validated
2600 # but not by us. We *do* allow non-existent tz errors to
2601 # go through
2602 pass
2603 dtz = getattr(dtype, "tz", None)
2604 if dtz is not None:
2605 if tz is not None and not timezones.tz_compare(tz, dtz):
2606 raise ValueError("cannot supply both a tz and a dtype with a tz")
2607 if explicit_tz_none:
2608 raise ValueError("Cannot pass both a timezone-aware dtype and tz=None")
2609 tz = dtz
2610
2611 if tz is not None and lib.is_np_dtype(dtype, "M"):
2612 # We also need to check for the case where the user passed a
2613 # tz-naive dtype (i.e. datetime64[ns])
2614 if tz is not None and not timezones.tz_compare(tz, dtz):
2615 raise ValueError(
2616 "cannot supply both a tz and a "
2617 "timezone-naive dtype (i.e. datetime64[ns])"
2618 )
2619
2620 return tz
2621
2622
2623def _infer_tz_from_endpoints(
2624 start: Timestamp, end: Timestamp, tz: tzinfo | None
2625) -> tzinfo | None:
2626 """
2627 If a timezone is not explicitly given via `tz`, see if one can
2628 be inferred from the `start` and `end` endpoints. If more than one
2629 of these inputs provides a timezone, require that they all agree.
2630
2631 Parameters
2632 ----------
2633 start : Timestamp
2634 end : Timestamp
2635 tz : tzinfo or None
2636
2637 Returns
2638 -------
2639 tz : tzinfo or None
2640
2641 Raises
2642 ------
2643 TypeError : if start and end timezones do not agree
2644 """
2645 try:
2646 inferred_tz = timezones.infer_tzinfo(start, end)
2647 except AssertionError as err:
2648 # infer_tzinfo raises AssertionError if passed mismatched timezones
2649 raise TypeError(
2650 "Start and end cannot both be tz-aware with different timezones"
2651 ) from err
2652
2653 inferred_tz = timezones.maybe_get_tz(inferred_tz)
2654 tz = timezones.maybe_get_tz(tz)
2655
2656 if tz is not None and inferred_tz is not None:
2657 if not timezones.tz_compare(inferred_tz, tz):
2658 raise AssertionError("Inferred time zone not equal to passed time zone")
2659
2660 elif inferred_tz is not None:
2661 tz = inferred_tz
2662
2663 return tz
2664
2665
2666def _maybe_normalize_endpoints(
2667 start: Timestamp | None, end: Timestamp | None, normalize: bool
2668):
2669 if normalize:
2670 if start is not None:
2671 start = start.normalize()
2672
2673 if end is not None:
2674 end = end.normalize()
2675
2676 return start, end
2677
2678
2679def _maybe_localize_point(
2680 ts: Timestamp | None, freq, tz, ambiguous, nonexistent
2681) -> Timestamp | None:
2682 """
2683 Localize a start or end Timestamp to the timezone of the corresponding
2684 start or end Timestamp
2685
2686 Parameters
2687 ----------
2688 ts : start or end Timestamp to potentially localize
2689 freq : Tick, DateOffset, or None
2690 tz : str, timezone object or None
2691 ambiguous: str, localization behavior for ambiguous times
2692 nonexistent: str, localization behavior for nonexistent times
2693
2694 Returns
2695 -------
2696 ts : Timestamp
2697 """
2698 # Make sure start and end are timezone localized if:
2699 # 1) freq = a Timedelta-like frequency (Tick)
2700 # 2) freq = None i.e. generating a linspaced range
2701 if ts is not None and ts.tzinfo is None:
2702 # Note: We can't ambiguous='infer' a singular ambiguous time; however,
2703 # we have historically defaulted ambiguous=False
2704 ambiguous = ambiguous if ambiguous != "infer" else False
2705 localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None}
2706 if isinstance(freq, Tick) or freq is None:
2707 localize_args["tz"] = tz
2708 ts = ts.tz_localize(**localize_args)
2709 return ts
2710
2711
2712def _generate_range(
2713 start: Timestamp | None,
2714 end: Timestamp | None,
2715 periods: int | None,
2716 offset: BaseOffset,
2717 *,
2718 unit: str,
2719):
2720 """
2721 Generates a sequence of dates corresponding to the specified time
2722 offset. Similar to dateutil.rrule except uses pandas DateOffset
2723 objects to represent time increments.
2724
2725 Parameters
2726 ----------
2727 start : Timestamp or None
2728 end : Timestamp or None
2729 periods : int or None
2730 offset : DateOffset
2731 unit : str
2732
2733 Notes
2734 -----
2735 * This method is faster for generating weekdays than dateutil.rrule
2736 * At least two of (start, end, periods) must be specified.
2737 * If both start and end are specified, the returned dates will
2738 satisfy start <= date <= end.
2739
2740 Returns
2741 -------
2742 dates : generator object
2743 """
2744 offset = to_offset(offset)
2745
2746 # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
2747 # expected "Union[integer[Any], float, str, date, datetime64]"
2748 start = Timestamp(start) # type: ignore[arg-type]
2749 if start is not NaT:
2750 start = start.as_unit(unit)
2751 else:
2752 start = None
2753
2754 # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
2755 # expected "Union[integer[Any], float, str, date, datetime64]"
2756 end = Timestamp(end) # type: ignore[arg-type]
2757 if end is not NaT:
2758 end = end.as_unit(unit)
2759 else:
2760 end = None
2761
2762 if start and not offset.is_on_offset(start):
2763 # Incompatible types in assignment (expression has type "datetime",
2764 # variable has type "Optional[Timestamp]")
2765 start = offset.rollforward(start) # type: ignore[assignment]
2766
2767 elif end and not offset.is_on_offset(end):
2768 # Incompatible types in assignment (expression has type "datetime",
2769 # variable has type "Optional[Timestamp]")
2770 end = offset.rollback(end) # type: ignore[assignment]
2771
2772 # Unsupported operand types for < ("Timestamp" and "None")
2773 if periods is None and end < start and offset.n >= 0: # type: ignore[operator]
2774 end = None
2775 periods = 0
2776
2777 if end is None:
2778 # error: No overload variant of "__radd__" of "BaseOffset" matches
2779 # argument type "None"
2780 end = start + (periods - 1) * offset # type: ignore[operator]
2781
2782 if start is None:
2783 # error: No overload variant of "__radd__" of "BaseOffset" matches
2784 # argument type "None"
2785 start = end - (periods - 1) * offset # type: ignore[operator]
2786
2787 start = cast(Timestamp, start)
2788 end = cast(Timestamp, end)
2789
2790 cur = start
2791 if offset.n >= 0:
2792 while cur <= end:
2793 yield cur
2794
2795 if cur == end:
2796 # GH#24252 avoid overflows by not performing the addition
2797 # in offset.apply unless we have to
2798 break
2799
2800 # faster than cur + offset
2801 next_date = offset._apply(cur)
2802 next_date = next_date.as_unit(unit)
2803 if next_date <= cur:
2804 raise ValueError(f"Offset {offset} did not increment date")
2805 cur = next_date
2806 else:
2807 while cur >= end:
2808 yield cur
2809
2810 if cur == end:
2811 # GH#24252 avoid overflows by not performing the addition
2812 # in offset.apply unless we have to
2813 break
2814
2815 # faster than cur + offset
2816 next_date = offset._apply(cur)
2817 next_date = next_date.as_unit(unit)
2818 if next_date >= cur:
2819 raise ValueError(f"Offset {offset} did not decrement date")
2820 cur = next_date