1from __future__ import annotations
2
3from datetime import (
4 datetime,
5 time,
6 timedelta,
7 tzinfo,
8)
9from typing import (
10 TYPE_CHECKING,
11 Iterator,
12 cast,
13)
14import warnings
15
16import numpy as np
17
18from pandas._libs import (
19 lib,
20 tslib,
21)
22from pandas._libs.tslibs import (
23 BaseOffset,
24 NaT,
25 NaTType,
26 Resolution,
27 Timestamp,
28 astype_overflowsafe,
29 fields,
30 get_resolution,
31 get_supported_reso,
32 get_unit_from_dtype,
33 ints_to_pydatetime,
34 is_date_array_normalized,
35 is_supported_unit,
36 is_unitless,
37 normalize_i8_timestamps,
38 npy_unit_to_abbrev,
39 timezones,
40 to_offset,
41 tz_convert_from_utc,
42 tzconversion,
43)
44from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
45from pandas._typing import (
46 DateTimeErrorChoices,
47 IntervalClosedType,
48 TimeAmbiguous,
49 TimeNonexistent,
50 npt,
51)
52from pandas.errors import PerformanceWarning
53from pandas.util._exceptions import find_stack_level
54from pandas.util._validators import validate_inclusive
55
56from pandas.core.dtypes.common import (
57 DT64NS_DTYPE,
58 INT64_DTYPE,
59 is_bool_dtype,
60 is_datetime64_any_dtype,
61 is_datetime64_dtype,
62 is_datetime64tz_dtype,
63 is_dtype_equal,
64 is_extension_array_dtype,
65 is_float_dtype,
66 is_object_dtype,
67 is_period_dtype,
68 is_sparse,
69 is_string_dtype,
70 is_timedelta64_dtype,
71 pandas_dtype,
72)
73from pandas.core.dtypes.dtypes import (
74 DatetimeTZDtype,
75 ExtensionDtype,
76)
77from pandas.core.dtypes.missing import isna
78
79from pandas.core.arrays import datetimelike as dtl
80from pandas.core.arrays._ranges import generate_regular_range
81import pandas.core.common as com
82
83from pandas.tseries.frequencies import get_period_alias
84from pandas.tseries.offsets import (
85 Day,
86 Tick,
87)
88
89if TYPE_CHECKING:
90 from pandas import DataFrame
91 from pandas.core.arrays import PeriodArray
92
93_midnight = time(0, 0)
94
95
96def tz_to_dtype(tz: tzinfo | None, unit: str = "ns"):
97 """
98 Return a datetime64[ns] dtype appropriate for the given timezone.
99
100 Parameters
101 ----------
102 tz : tzinfo or None
103 unit : str, default "ns"
104
105 Returns
106 -------
107 np.dtype or Datetime64TZDType
108 """
109 if tz is None:
110 return np.dtype(f"M8[{unit}]")
111 else:
112 return DatetimeTZDtype(tz=tz, unit=unit)
113
114
115def _field_accessor(name: str, field: str, docstring=None):
116 def f(self):
117 values = self._local_timestamps()
118
119 if field in self._bool_ops:
120 result: np.ndarray
121
122 if field.endswith(("start", "end")):
123 freq = self.freq
124 month_kw = 12
125 if freq:
126 kwds = freq.kwds
127 month_kw = kwds.get("startingMonth", kwds.get("month", 12))
128
129 result = fields.get_start_end_field(
130 values, field, self.freqstr, month_kw, reso=self._creso
131 )
132 else:
133 result = fields.get_date_field(values, field, reso=self._creso)
134
135 # these return a boolean by-definition
136 return result
137
138 if field in self._object_ops:
139 result = fields.get_date_name_field(values, field, reso=self._creso)
140 result = self._maybe_mask_results(result, fill_value=None)
141
142 else:
143 result = fields.get_date_field(values, field, reso=self._creso)
144 result = self._maybe_mask_results(
145 result, fill_value=None, convert="float64"
146 )
147
148 return result
149
150 f.__name__ = name
151 f.__doc__ = docstring
152 return property(f)
153
154
155class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
156 """
157 Pandas ExtensionArray for tz-naive or tz-aware datetime data.
158
159 .. warning::
160
161 DatetimeArray is currently experimental, and its API may change
162 without warning. In particular, :attr:`DatetimeArray.dtype` is
163 expected to change to always be an instance of an ``ExtensionDtype``
164 subclass.
165
166 Parameters
167 ----------
168 values : Series, Index, DatetimeArray, ndarray
169 The datetime data.
170
171 For DatetimeArray `values` (or a Series or Index boxing one),
172 `dtype` and `freq` will be extracted from `values`.
173
174 dtype : numpy.dtype or DatetimeTZDtype
175 Note that the only NumPy dtype allowed is 'datetime64[ns]'.
176 freq : str or Offset, optional
177 The frequency.
178 copy : bool, default False
179 Whether to copy the underlying array of values.
180
181 Attributes
182 ----------
183 None
184
185 Methods
186 -------
187 None
188 """
189
190 _typ = "datetimearray"
191 _internal_fill_value = np.datetime64("NaT", "ns")
192 _recognized_scalars = (datetime, np.datetime64)
193 _is_recognized_dtype = is_datetime64_any_dtype
194 _infer_matches = ("datetime", "datetime64", "date")
195
196 @property
197 def _scalar_type(self) -> type[Timestamp]:
198 return Timestamp
199
200 # define my properties & methods for delegation
201 _bool_ops: list[str] = [
202 "is_month_start",
203 "is_month_end",
204 "is_quarter_start",
205 "is_quarter_end",
206 "is_year_start",
207 "is_year_end",
208 "is_leap_year",
209 ]
210 _object_ops: list[str] = ["freq", "tz"]
211 _field_ops: list[str] = [
212 "year",
213 "month",
214 "day",
215 "hour",
216 "minute",
217 "second",
218 "weekday",
219 "dayofweek",
220 "day_of_week",
221 "dayofyear",
222 "day_of_year",
223 "quarter",
224 "days_in_month",
225 "daysinmonth",
226 "microsecond",
227 "nanosecond",
228 ]
229 _other_ops: list[str] = ["date", "time", "timetz"]
230 _datetimelike_ops: list[str] = (
231 _field_ops + _object_ops + _bool_ops + _other_ops + ["unit"]
232 )
233 _datetimelike_methods: list[str] = [
234 "to_period",
235 "tz_localize",
236 "tz_convert",
237 "normalize",
238 "strftime",
239 "round",
240 "floor",
241 "ceil",
242 "month_name",
243 "day_name",
244 "as_unit",
245 ]
246
247 # ndim is inherited from ExtensionArray, must exist to ensure
248 # Timestamp.__richcmp__(DateTimeArray) operates pointwise
249
250 # ensure that operations with numpy arrays defer to our implementation
251 __array_priority__ = 1000
252
253 # -----------------------------------------------------------------
254 # Constructors
255
256 _dtype: np.dtype | DatetimeTZDtype
257 _freq: BaseOffset | None = None
258 _default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__
259
260 @classmethod
261 def _validate_dtype(cls, values, dtype):
262 # used in TimeLikeOps.__init__
263 _validate_dt64_dtype(values.dtype)
264 dtype = _validate_dt64_dtype(dtype)
265 return dtype
266
267 # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
268 @classmethod
269 def _simple_new( # type: ignore[override]
270 cls,
271 values: np.ndarray,
272 freq: BaseOffset | None = None,
273 dtype=DT64NS_DTYPE,
274 ) -> DatetimeArray:
275 assert isinstance(values, np.ndarray)
276 assert dtype.kind == "M"
277 if isinstance(dtype, np.dtype):
278 assert dtype == values.dtype
279 assert not is_unitless(dtype)
280 else:
281 # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC],
282 # then values.dtype should be M8[us].
283 assert dtype._creso == get_unit_from_dtype(values.dtype)
284
285 result = super()._simple_new(values, dtype)
286 result._freq = freq
287 return result
288
289 @classmethod
290 def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
291 return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)
292
293 @classmethod
294 def _from_sequence_not_strict(
295 cls,
296 data,
297 *,
298 dtype=None,
299 copy: bool = False,
300 tz=lib.no_default,
301 freq: str | BaseOffset | lib.NoDefault | None = lib.no_default,
302 dayfirst: bool = False,
303 yearfirst: bool = False,
304 ambiguous: TimeAmbiguous = "raise",
305 ):
306 """
307 A non-strict version of _from_sequence, called from DatetimeIndex.__new__.
308 """
309 explicit_none = freq is None
310 freq = freq if freq is not lib.no_default else None
311 freq, freq_infer = dtl.maybe_infer_freq(freq)
312
313 # if the user either explicitly passes tz=None or a tz-naive dtype, we
314 # disallows inferring a tz.
315 explicit_tz_none = tz is None
316 if tz is lib.no_default:
317 tz = None
318 else:
319 tz = timezones.maybe_get_tz(tz)
320
321 dtype = _validate_dt64_dtype(dtype)
322 # if dtype has an embedded tz, capture it
323 tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none)
324
325 unit = None
326 if dtype is not None:
327 if isinstance(dtype, np.dtype):
328 unit = np.datetime_data(dtype)[0]
329 else:
330 # DatetimeTZDtype
331 unit = dtype.unit
332
333 subarr, tz, inferred_freq = _sequence_to_dt64ns(
334 data,
335 copy=copy,
336 tz=tz,
337 dayfirst=dayfirst,
338 yearfirst=yearfirst,
339 ambiguous=ambiguous,
340 out_unit=unit,
341 )
342 # We have to call this again after possibly inferring a tz above
343 _validate_tz_from_dtype(dtype, tz, explicit_tz_none)
344 if tz is not None and explicit_tz_none:
345 raise ValueError(
346 "Passed data is timezone-aware, incompatible with 'tz=None'. "
347 "Use obj.tz_localize(None) instead."
348 )
349
350 freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
351 if explicit_none:
352 freq = None
353
354 data_unit = np.datetime_data(subarr.dtype)[0]
355 data_dtype = tz_to_dtype(tz, data_unit)
356 result = cls._simple_new(subarr, freq=freq, dtype=data_dtype)
357 if unit is not None and unit != result.unit:
358 # If unit was specified in user-passed dtype, cast to it here
359 result = result.as_unit(unit)
360
361 if inferred_freq is None and freq is not None:
362 # this condition precludes `freq_infer`
363 cls._validate_frequency(result, freq, ambiguous=ambiguous)
364
365 elif freq_infer:
366 # Set _freq directly to bypass duplicative _validate_frequency
367 # check.
368 result._freq = to_offset(result.inferred_freq)
369
370 return result
371
372 # error: Signature of "_generate_range" incompatible with supertype
373 # "DatetimeLikeArrayMixin"
374 @classmethod
375 def _generate_range( # type: ignore[override]
376 cls,
377 start,
378 end,
379 periods,
380 freq,
381 tz=None,
382 normalize: bool = False,
383 ambiguous: TimeAmbiguous = "raise",
384 nonexistent: TimeNonexistent = "raise",
385 inclusive: IntervalClosedType = "both",
386 *,
387 unit: str | None = None,
388 ) -> DatetimeArray:
389 periods = dtl.validate_periods(periods)
390 if freq is None and any(x is None for x in [periods, start, end]):
391 raise ValueError("Must provide freq argument if no data is supplied")
392
393 if com.count_not_none(start, end, periods, freq) != 3:
394 raise ValueError(
395 "Of the four parameters: start, end, periods, "
396 "and freq, exactly three must be specified"
397 )
398 freq = to_offset(freq)
399
400 if start is not None:
401 start = Timestamp(start)
402
403 if end is not None:
404 end = Timestamp(end)
405
406 if start is NaT or end is NaT:
407 raise ValueError("Neither `start` nor `end` can be NaT")
408
409 if unit is not None:
410 if unit not in ["s", "ms", "us", "ns"]:
411 raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")
412 else:
413 unit = "ns"
414
415 if start is not None and unit is not None:
416 start = start.as_unit(unit, round_ok=False)
417 if end is not None and unit is not None:
418 end = end.as_unit(unit, round_ok=False)
419
420 left_inclusive, right_inclusive = validate_inclusive(inclusive)
421 start, end = _maybe_normalize_endpoints(start, end, normalize)
422 tz = _infer_tz_from_endpoints(start, end, tz)
423
424 if tz is not None:
425 # Localize the start and end arguments
426 start_tz = None if start is None else start.tz
427 end_tz = None if end is None else end.tz
428 start = _maybe_localize_point(
429 start, start_tz, start, freq, tz, ambiguous, nonexistent
430 )
431 end = _maybe_localize_point(
432 end, end_tz, end, freq, tz, ambiguous, nonexistent
433 )
434
435 if freq is not None:
436 # We break Day arithmetic (fixed 24 hour) here and opt for
437 # Day to mean calendar day (23/24/25 hour). Therefore, strip
438 # tz info from start and day to avoid DST arithmetic
439 if isinstance(freq, Day):
440 if start is not None:
441 start = start.tz_localize(None)
442 if end is not None:
443 end = end.tz_localize(None)
444
445 if isinstance(freq, Tick):
446 i8values = generate_regular_range(start, end, periods, freq, unit=unit)
447 else:
448 xdr = _generate_range(
449 start=start, end=end, periods=periods, offset=freq, unit=unit
450 )
451 i8values = np.array([x._value for x in xdr], dtype=np.int64)
452
453 endpoint_tz = start.tz if start is not None else end.tz
454
455 if tz is not None and endpoint_tz is None:
456 if not timezones.is_utc(tz):
457 # short-circuit tz_localize_to_utc which would make
458 # an unnecessary copy with UTC but be a no-op.
459 creso = abbrev_to_npy_unit(unit)
460 i8values = tzconversion.tz_localize_to_utc(
461 i8values,
462 tz,
463 ambiguous=ambiguous,
464 nonexistent=nonexistent,
465 creso=creso,
466 )
467
468 # i8values is localized datetime64 array -> have to convert
469 # start/end as well to compare
470 if start is not None:
471 start = start.tz_localize(tz, ambiguous, nonexistent)
472 if end is not None:
473 end = end.tz_localize(tz, ambiguous, nonexistent)
474 else:
475 # Create a linearly spaced date_range in local time
476 # Nanosecond-granularity timestamps aren't always correctly
477 # representable with doubles, so we limit the range that we
478 # pass to np.linspace as much as possible
479 i8values = (
480 np.linspace(0, end._value - start._value, periods, dtype="int64")
481 + start._value
482 )
483 if i8values.dtype != "i8":
484 # 2022-01-09 I (brock) am not sure if it is possible for this
485 # to overflow and cast to e.g. f8, but if it does we need to cast
486 i8values = i8values.astype("i8")
487
488 if start == end:
489 if not left_inclusive and not right_inclusive:
490 i8values = i8values[1:-1]
491 else:
492 start_i8 = Timestamp(start)._value
493 end_i8 = Timestamp(end)._value
494 if not left_inclusive or not right_inclusive:
495 if not left_inclusive and len(i8values) and i8values[0] == start_i8:
496 i8values = i8values[1:]
497 if not right_inclusive and len(i8values) and i8values[-1] == end_i8:
498 i8values = i8values[:-1]
499
500 dt64_values = i8values.view(f"datetime64[{unit}]")
501 dtype = tz_to_dtype(tz, unit=unit)
502 return cls._simple_new(dt64_values, freq=freq, dtype=dtype)
503
504 # -----------------------------------------------------------------
505 # DatetimeLike Interface
506
507 def _unbox_scalar(self, value) -> np.datetime64:
508 if not isinstance(value, self._scalar_type) and value is not NaT:
509 raise ValueError("'value' should be a Timestamp.")
510 self._check_compatible_with(value)
511 if value is NaT:
512 return np.datetime64(value._value, self.unit)
513 else:
514 return value.as_unit(self.unit).asm8
515
516 def _scalar_from_string(self, value) -> Timestamp | NaTType:
517 return Timestamp(value, tz=self.tz)
518
519 def _check_compatible_with(self, other) -> None:
520 if other is NaT:
521 return
522 self._assert_tzawareness_compat(other)
523
524 # -----------------------------------------------------------------
525 # Descriptive Properties
526
527 def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:
528 # GH#42228
529 value = x.view("i8")
530 ts = Timestamp._from_value_and_reso(value, reso=self._creso, tz=self.tz)
531 return ts
532
533 @property
534 # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype"
535 # incompatible with return type "ExtensionDtype" in supertype
536 # "ExtensionArray"
537 def dtype(self) -> np.dtype | DatetimeTZDtype: # type: ignore[override]
538 """
539 The dtype for the DatetimeArray.
540
541 .. warning::
542
543 A future version of pandas will change dtype to never be a
544 ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will
545 always be an instance of an ``ExtensionDtype`` subclass.
546
547 Returns
548 -------
549 numpy.dtype or DatetimeTZDtype
550 If the values are tz-naive, then ``np.dtype('datetime64[ns]')``
551 is returned.
552
553 If the values are tz-aware, then the ``DatetimeTZDtype``
554 is returned.
555 """
556 return self._dtype
557
558 @property
559 def tz(self) -> tzinfo | None:
560 """
561 Return the timezone.
562
563 Returns
564 -------
565 datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
566 Returns None when the array is tz-naive.
567 """
568 # GH 18595
569 return getattr(self.dtype, "tz", None)
570
571 @tz.setter
572 def tz(self, value):
573 # GH 3746: Prevent localizing or converting the index by setting tz
574 raise AttributeError(
575 "Cannot directly set timezone. Use tz_localize() "
576 "or tz_convert() as appropriate"
577 )
578
579 @property
580 def tzinfo(self) -> tzinfo | None:
581 """
582 Alias for tz attribute
583 """
584 return self.tz
585
586 @property # NB: override with cache_readonly in immutable subclasses
587 def is_normalized(self) -> bool:
588 """
589 Returns True if all of the dates are at midnight ("no time")
590 """
591 return is_date_array_normalized(self.asi8, self.tz, reso=self._creso)
592
593 @property # NB: override with cache_readonly in immutable subclasses
594 def _resolution_obj(self) -> Resolution:
595 return get_resolution(self.asi8, self.tz, reso=self._creso)
596
597 # ----------------------------------------------------------------
598 # Array-Like / EA-Interface Methods
599
600 def __array__(self, dtype=None) -> np.ndarray:
601 if dtype is None and self.tz:
602 # The default for tz-aware is object, to preserve tz info
603 dtype = object
604
605 return super().__array__(dtype=dtype)
606
607 def __iter__(self) -> Iterator:
608 """
609 Return an iterator over the boxed values
610
611 Yields
612 ------
613 tstamp : Timestamp
614 """
615 if self.ndim > 1:
616 for i in range(len(self)):
617 yield self[i]
618 else:
619 # convert in chunks of 10k for efficiency
620 data = self.asi8
621 length = len(self)
622 chunksize = 10000
623 chunks = (length // chunksize) + 1
624
625 for i in range(chunks):
626 start_i = i * chunksize
627 end_i = min((i + 1) * chunksize, length)
628 converted = ints_to_pydatetime(
629 data[start_i:end_i],
630 tz=self.tz,
631 box="timestamp",
632 reso=self._creso,
633 )
634 yield from converted
635
636 def astype(self, dtype, copy: bool = True):
637 # We handle
638 # --> datetime
639 # --> period
640 # DatetimeLikeArrayMixin Super handles the rest.
641 dtype = pandas_dtype(dtype)
642
643 if is_dtype_equal(dtype, self.dtype):
644 if copy:
645 return self.copy()
646 return self
647
648 elif isinstance(dtype, ExtensionDtype):
649 if not isinstance(dtype, DatetimeTZDtype):
650 # e.g. Sparse[datetime64[ns]]
651 return super().astype(dtype, copy=copy)
652 elif self.tz is None:
653 # pre-2.0 this did self.tz_localize(dtype.tz), which did not match
654 # the Series behavior which did
655 # values.tz_localize("UTC").tz_convert(dtype.tz)
656 raise TypeError(
657 "Cannot use .astype to convert from timezone-naive dtype to "
658 "timezone-aware dtype. Use obj.tz_localize instead or "
659 "series.dt.tz_localize instead"
660 )
661 else:
662 # tzaware unit conversion e.g. datetime64[s, UTC]
663 np_dtype = np.dtype(dtype.str)
664 res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)
665 return type(self)._simple_new(res_values, dtype=dtype, freq=self.freq)
666
667 elif (
668 self.tz is None
669 and is_datetime64_dtype(dtype)
670 and not is_unitless(dtype)
671 and is_supported_unit(get_unit_from_dtype(dtype))
672 ):
673 # unit conversion e.g. datetime64[s]
674 res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)
675 return type(self)._simple_new(res_values, dtype=res_values.dtype)
676 # TODO: preserve freq?
677
678 elif self.tz is not None and is_datetime64_dtype(dtype):
679 # pre-2.0 behavior for DTA/DTI was
680 # values.tz_convert("UTC").tz_localize(None), which did not match
681 # the Series behavior
682 raise TypeError(
683 "Cannot use .astype to convert from timezone-aware dtype to "
684 "timezone-naive dtype. Use obj.tz_localize(None) or "
685 "obj.tz_convert('UTC').tz_localize(None) instead."
686 )
687
688 elif (
689 self.tz is None
690 and is_datetime64_dtype(dtype)
691 and dtype != self.dtype
692 and is_unitless(dtype)
693 ):
694 raise TypeError(
695 "Casting to unit-less dtype 'datetime64' is not supported. "
696 "Pass e.g. 'datetime64[ns]' instead."
697 )
698
699 elif is_period_dtype(dtype):
700 return self.to_period(freq=dtype.freq)
701 return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
702
703 # -----------------------------------------------------------------
704 # Rendering Methods
705
706 def _format_native_types(
707 self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
708 ) -> npt.NDArray[np.object_]:
709 from pandas.io.formats.format import get_format_datetime64_from_values
710
711 fmt = get_format_datetime64_from_values(self, date_format)
712
713 return tslib.format_array_from_datetime(
714 self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._creso
715 )
716
717 # -----------------------------------------------------------------
718 # Comparison Methods
719
720 def _has_same_tz(self, other) -> bool:
721 # vzone shouldn't be None if value is non-datetime like
722 if isinstance(other, np.datetime64):
723 # convert to Timestamp as np.datetime64 doesn't have tz attr
724 other = Timestamp(other)
725
726 if not hasattr(other, "tzinfo"):
727 return False
728 other_tz = other.tzinfo
729 return timezones.tz_compare(self.tzinfo, other_tz)
730
731 def _assert_tzawareness_compat(self, other) -> None:
732 # adapted from _Timestamp._assert_tzawareness_compat
733 other_tz = getattr(other, "tzinfo", None)
734 other_dtype = getattr(other, "dtype", None)
735
736 if is_datetime64tz_dtype(other_dtype):
737 # Get tzinfo from Series dtype
738 other_tz = other.dtype.tz
739 if other is NaT:
740 # pd.NaT quacks both aware and naive
741 pass
742 elif self.tz is None:
743 if other_tz is not None:
744 raise TypeError(
745 "Cannot compare tz-naive and tz-aware datetime-like objects."
746 )
747 elif other_tz is None:
748 raise TypeError(
749 "Cannot compare tz-naive and tz-aware datetime-like objects"
750 )
751
752 # -----------------------------------------------------------------
753 # Arithmetic Methods
754
755 def _add_offset(self, offset) -> DatetimeArray:
756 assert not isinstance(offset, Tick)
757
758 if self.tz is not None:
759 values = self.tz_localize(None)
760 else:
761 values = self
762
763 try:
764 result = offset._apply_array(values).view(values.dtype)
765 except NotImplementedError:
766 warnings.warn(
767 "Non-vectorized DateOffset being applied to Series or DatetimeIndex.",
768 PerformanceWarning,
769 stacklevel=find_stack_level(),
770 )
771 result = self.astype("O") + offset
772 result = type(self)._from_sequence(result).as_unit(self.unit)
773 if not len(self):
774 # GH#30336 _from_sequence won't be able to infer self.tz
775 return result.tz_localize(self.tz)
776
777 else:
778 result = DatetimeArray._simple_new(result, dtype=result.dtype)
779 if self.tz is not None:
780 result = result.tz_localize(self.tz)
781
782 return result
783
784 # -----------------------------------------------------------------
785 # Timezone Conversion and Localization Methods
786
787 def _local_timestamps(self) -> npt.NDArray[np.int64]:
788 """
789 Convert to an i8 (unix-like nanosecond timestamp) representation
790 while keeping the local timezone and not using UTC.
791 This is used to calculate time-of-day information as if the timestamps
792 were timezone-naive.
793 """
794 if self.tz is None or timezones.is_utc(self.tz):
795 # Avoid the copy that would be made in tzconversion
796 return self.asi8
797 return tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)
798
799 def tz_convert(self, tz) -> DatetimeArray:
800 """
801 Convert tz-aware Datetime Array/Index from one time zone to another.
802
803 Parameters
804 ----------
805 tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
806 Time zone for time. Corresponding timestamps would be converted
807 to this time zone of the Datetime Array/Index. A `tz` of None will
808 convert to UTC and remove the timezone information.
809
810 Returns
811 -------
812 Array or Index
813
814 Raises
815 ------
816 TypeError
817 If Datetime Array/Index is tz-naive.
818
819 See Also
820 --------
821 DatetimeIndex.tz : A timezone that has a variable offset from UTC.
822 DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a
823 given time zone, or remove timezone from a tz-aware DatetimeIndex.
824
825 Examples
826 --------
827 With the `tz` parameter, we can change the DatetimeIndex
828 to other time zones:
829
830 >>> dti = pd.date_range(start='2014-08-01 09:00',
831 ... freq='H', periods=3, tz='Europe/Berlin')
832
833 >>> dti
834 DatetimeIndex(['2014-08-01 09:00:00+02:00',
835 '2014-08-01 10:00:00+02:00',
836 '2014-08-01 11:00:00+02:00'],
837 dtype='datetime64[ns, Europe/Berlin]', freq='H')
838
839 >>> dti.tz_convert('US/Central')
840 DatetimeIndex(['2014-08-01 02:00:00-05:00',
841 '2014-08-01 03:00:00-05:00',
842 '2014-08-01 04:00:00-05:00'],
843 dtype='datetime64[ns, US/Central]', freq='H')
844
845 With the ``tz=None``, we can remove the timezone (after converting
846 to UTC if necessary):
847
848 >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H',
849 ... periods=3, tz='Europe/Berlin')
850
851 >>> dti
852 DatetimeIndex(['2014-08-01 09:00:00+02:00',
853 '2014-08-01 10:00:00+02:00',
854 '2014-08-01 11:00:00+02:00'],
855 dtype='datetime64[ns, Europe/Berlin]', freq='H')
856
857 >>> dti.tz_convert(None)
858 DatetimeIndex(['2014-08-01 07:00:00',
859 '2014-08-01 08:00:00',
860 '2014-08-01 09:00:00'],
861 dtype='datetime64[ns]', freq='H')
862 """
863 tz = timezones.maybe_get_tz(tz)
864
865 if self.tz is None:
866 # tz naive, use tz_localize
867 raise TypeError(
868 "Cannot convert tz-naive timestamps, use tz_localize to localize"
869 )
870
871 # No conversion since timestamps are all UTC to begin with
872 dtype = tz_to_dtype(tz, unit=self.unit)
873 return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)
874
875 @dtl.ravel_compat
876 def tz_localize(
877 self,
878 tz,
879 ambiguous: TimeAmbiguous = "raise",
880 nonexistent: TimeNonexistent = "raise",
881 ) -> DatetimeArray:
882 """
883 Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index.
884
885 This method takes a time zone (tz) naive Datetime Array/Index object
886 and makes this time zone aware. It does not move the time to another
887 time zone.
888
889 This method can also be used to do the inverse -- to create a time
890 zone unaware object from an aware object. To that end, pass `tz=None`.
891
892 Parameters
893 ----------
894 tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
895 Time zone to convert timestamps to. Passing ``None`` will
896 remove the time zone information preserving local time.
897 ambiguous : 'infer', 'NaT', bool array, default 'raise'
898 When clocks moved backward due to DST, ambiguous times may arise.
899 For example in Central European Time (UTC+01), when going from
900 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
901 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
902 `ambiguous` parameter dictates how ambiguous times should be
903 handled.
904
905 - 'infer' will attempt to infer fall dst-transition hours based on
906 order
907 - bool-ndarray where True signifies a DST time, False signifies a
908 non-DST time (note that this flag is only applicable for
909 ambiguous times)
910 - 'NaT' will return NaT where there are ambiguous times
911 - 'raise' will raise an AmbiguousTimeError if there are ambiguous
912 times.
913
914 nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
915default 'raise'
916 A nonexistent time does not exist in a particular timezone
917 where clocks moved forward due to DST.
918
919 - 'shift_forward' will shift the nonexistent time forward to the
920 closest existing time
921 - 'shift_backward' will shift the nonexistent time backward to the
922 closest existing time
923 - 'NaT' will return NaT where there are nonexistent times
924 - timedelta objects will shift nonexistent times by the timedelta
925 - 'raise' will raise an NonExistentTimeError if there are
926 nonexistent times.
927
928 Returns
929 -------
930 Same type as self
931 Array/Index converted to the specified time zone.
932
933 Raises
934 ------
935 TypeError
936 If the Datetime Array/Index is tz-aware and tz is not None.
937
938 See Also
939 --------
940 DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from
941 one time zone to another.
942
943 Examples
944 --------
945 >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3)
946 >>> tz_naive
947 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
948 '2018-03-03 09:00:00'],
949 dtype='datetime64[ns]', freq='D')
950
951 Localize DatetimeIndex in US/Eastern time zone:
952
953 >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern')
954 >>> tz_aware
955 DatetimeIndex(['2018-03-01 09:00:00-05:00',
956 '2018-03-02 09:00:00-05:00',
957 '2018-03-03 09:00:00-05:00'],
958 dtype='datetime64[ns, US/Eastern]', freq=None)
959
960 With the ``tz=None``, we can remove the time zone information
961 while keeping the local time (not converted to UTC):
962
963 >>> tz_aware.tz_localize(None)
964 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
965 '2018-03-03 09:00:00'],
966 dtype='datetime64[ns]', freq=None)
967
968 Be careful with DST changes. When there is sequential data, pandas can
969 infer the DST time:
970
971 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00',
972 ... '2018-10-28 02:00:00',
973 ... '2018-10-28 02:30:00',
974 ... '2018-10-28 02:00:00',
975 ... '2018-10-28 02:30:00',
976 ... '2018-10-28 03:00:00',
977 ... '2018-10-28 03:30:00']))
978 >>> s.dt.tz_localize('CET', ambiguous='infer')
979 0 2018-10-28 01:30:00+02:00
980 1 2018-10-28 02:00:00+02:00
981 2 2018-10-28 02:30:00+02:00
982 3 2018-10-28 02:00:00+01:00
983 4 2018-10-28 02:30:00+01:00
984 5 2018-10-28 03:00:00+01:00
985 6 2018-10-28 03:30:00+01:00
986 dtype: datetime64[ns, CET]
987
988 In some cases, inferring the DST is impossible. In such cases, you can
989 pass an ndarray to the ambiguous parameter to set the DST explicitly
990
991 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00',
992 ... '2018-10-28 02:36:00',
993 ... '2018-10-28 03:46:00']))
994 >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False]))
995 0 2018-10-28 01:20:00+02:00
996 1 2018-10-28 02:36:00+02:00
997 2 2018-10-28 03:46:00+01:00
998 dtype: datetime64[ns, CET]
999
1000 If the DST transition causes nonexistent times, you can shift these
1001 dates forward or backwards with a timedelta object or `'shift_forward'`
1002 or `'shift_backwards'`.
1003
1004 >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00',
1005 ... '2015-03-29 03:30:00']))
1006 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
1007 0 2015-03-29 03:00:00+02:00
1008 1 2015-03-29 03:30:00+02:00
1009 dtype: datetime64[ns, Europe/Warsaw]
1010
1011 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
1012 0 2015-03-29 01:59:59.999999999+01:00
1013 1 2015-03-29 03:30:00+02:00
1014 dtype: datetime64[ns, Europe/Warsaw]
1015
1016 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))
1017 0 2015-03-29 03:30:00+02:00
1018 1 2015-03-29 03:30:00+02:00
1019 dtype: datetime64[ns, Europe/Warsaw]
1020 """
1021 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
1022 if nonexistent not in nonexistent_options and not isinstance(
1023 nonexistent, timedelta
1024 ):
1025 raise ValueError(
1026 "The nonexistent argument must be one of 'raise', "
1027 "'NaT', 'shift_forward', 'shift_backward' or "
1028 "a timedelta object"
1029 )
1030
1031 if self.tz is not None:
1032 if tz is None:
1033 new_dates = tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)
1034 else:
1035 raise TypeError("Already tz-aware, use tz_convert to convert.")
1036 else:
1037 tz = timezones.maybe_get_tz(tz)
1038 # Convert to UTC
1039
1040 new_dates = tzconversion.tz_localize_to_utc(
1041 self.asi8,
1042 tz,
1043 ambiguous=ambiguous,
1044 nonexistent=nonexistent,
1045 creso=self._creso,
1046 )
1047 new_dates = new_dates.view(f"M8[{self.unit}]")
1048 dtype = tz_to_dtype(tz, unit=self.unit)
1049
1050 freq = None
1051 if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])):
1052 # we can preserve freq
1053 # TODO: Also for fixed-offsets
1054 freq = self.freq
1055 elif tz is None and self.tz is None:
1056 # no-op
1057 freq = self.freq
1058 return self._simple_new(new_dates, dtype=dtype, freq=freq)
1059
1060 # ----------------------------------------------------------------
1061 # Conversion Methods - Vectorized analogues of Timestamp methods
1062
1063 def to_pydatetime(self) -> npt.NDArray[np.object_]:
1064 """
1065 Return an ndarray of datetime.datetime objects.
1066
1067 Returns
1068 -------
1069 numpy.ndarray
1070 """
1071 return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._creso)
1072
1073 def normalize(self) -> DatetimeArray:
1074 """
1075 Convert times to midnight.
1076
1077 The time component of the date-time is converted to midnight i.e.
1078 00:00:00. This is useful in cases, when the time does not matter.
1079 Length is unaltered. The timezones are unaffected.
1080
1081 This method is available on Series with datetime values under
1082 the ``.dt`` accessor, and directly on Datetime Array/Index.
1083
1084 Returns
1085 -------
1086 DatetimeArray, DatetimeIndex or Series
1087 The same type as the original data. Series will have the same
1088 name and index. DatetimeIndex will have the same name.
1089
1090 See Also
1091 --------
1092 floor : Floor the datetimes to the specified freq.
1093 ceil : Ceil the datetimes to the specified freq.
1094 round : Round the datetimes to the specified freq.
1095
1096 Examples
1097 --------
1098 >>> idx = pd.date_range(start='2014-08-01 10:00', freq='H',
1099 ... periods=3, tz='Asia/Calcutta')
1100 >>> idx
1101 DatetimeIndex(['2014-08-01 10:00:00+05:30',
1102 '2014-08-01 11:00:00+05:30',
1103 '2014-08-01 12:00:00+05:30'],
1104 dtype='datetime64[ns, Asia/Calcutta]', freq='H')
1105 >>> idx.normalize()
1106 DatetimeIndex(['2014-08-01 00:00:00+05:30',
1107 '2014-08-01 00:00:00+05:30',
1108 '2014-08-01 00:00:00+05:30'],
1109 dtype='datetime64[ns, Asia/Calcutta]', freq=None)
1110 """
1111 new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._creso)
1112 dt64_values = new_values.view(self._ndarray.dtype)
1113
1114 dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype)
1115 dta = dta._with_freq("infer")
1116 if self.tz is not None:
1117 dta = dta.tz_localize(self.tz)
1118 return dta
1119
1120 def to_period(self, freq=None) -> PeriodArray:
1121 """
1122 Cast to PeriodArray/Index at a particular frequency.
1123
1124 Converts DatetimeArray/Index to PeriodArray/Index.
1125
1126 Parameters
1127 ----------
1128 freq : str or Offset, optional
1129 One of pandas' :ref:`offset strings <timeseries.offset_aliases>`
1130 or an Offset object. Will be inferred by default.
1131
1132 Returns
1133 -------
1134 PeriodArray/Index
1135
1136 Raises
1137 ------
1138 ValueError
1139 When converting a DatetimeArray/Index with non-regular values,
1140 so that a frequency cannot be inferred.
1141
1142 See Also
1143 --------
1144 PeriodIndex: Immutable ndarray holding ordinal values.
1145 DatetimeIndex.to_pydatetime: Return DatetimeIndex as object.
1146
1147 Examples
1148 --------
1149 >>> df = pd.DataFrame({"y": [1, 2, 3]},
1150 ... index=pd.to_datetime(["2000-03-31 00:00:00",
1151 ... "2000-05-31 00:00:00",
1152 ... "2000-08-31 00:00:00"]))
1153 >>> df.index.to_period("M")
1154 PeriodIndex(['2000-03', '2000-05', '2000-08'],
1155 dtype='period[M]')
1156
1157 Infer the daily frequency
1158
1159 >>> idx = pd.date_range("2017-01-01", periods=2)
1160 >>> idx.to_period()
1161 PeriodIndex(['2017-01-01', '2017-01-02'],
1162 dtype='period[D]')
1163 """
1164 from pandas.core.arrays import PeriodArray
1165
1166 if self.tz is not None:
1167 warnings.warn(
1168 "Converting to PeriodArray/Index representation "
1169 "will drop timezone information.",
1170 UserWarning,
1171 stacklevel=find_stack_level(),
1172 )
1173
1174 if freq is None:
1175 freq = self.freqstr or self.inferred_freq
1176
1177 if freq is None:
1178 raise ValueError(
1179 "You must pass a freq argument as current index has none."
1180 )
1181
1182 res = get_period_alias(freq)
1183
1184 # https://github.com/pandas-dev/pandas/issues/33358
1185 if res is None:
1186 res = freq
1187
1188 freq = res
1189
1190 return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz)
1191
1192 # -----------------------------------------------------------------
1193 # Properties - Vectorized Timestamp Properties/Methods
1194
1195 def month_name(self, locale=None) -> npt.NDArray[np.object_]:
1196 """
1197 Return the month names with specified locale.
1198
1199 Parameters
1200 ----------
1201 locale : str, optional
1202 Locale determining the language in which to return the month name.
1203 Default is English locale (``'en_US.utf8'``). Use the command
1204 ``locale -a`` on your terminal on Unix systems to find your locale
1205 language code.
1206
1207 Returns
1208 -------
1209 Series or Index
1210 Series or Index of month names.
1211
1212 Examples
1213 --------
1214 >>> s = pd.Series(pd.date_range(start='2018-01', freq='M', periods=3))
1215 >>> s
1216 0 2018-01-31
1217 1 2018-02-28
1218 2 2018-03-31
1219 dtype: datetime64[ns]
1220 >>> s.dt.month_name()
1221 0 January
1222 1 February
1223 2 March
1224 dtype: object
1225
1226 >>> idx = pd.date_range(start='2018-01', freq='M', periods=3)
1227 >>> idx
1228 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
1229 dtype='datetime64[ns]', freq='M')
1230 >>> idx.month_name()
1231 Index(['January', 'February', 'March'], dtype='object')
1232
1233 Using the ``locale`` parameter you can set a different locale language,
1234 for example: ``idx.month_name(locale='pt_BR.utf8')`` will return month
1235 names in Brazilian Portuguese language.
1236
1237 >>> idx = pd.date_range(start='2018-01', freq='M', periods=3)
1238 >>> idx
1239 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
1240 dtype='datetime64[ns]', freq='M')
1241 >>> idx.month_name(locale='pt_BR.utf8') # doctest: +SKIP
1242 Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object')
1243 """
1244 values = self._local_timestamps()
1245
1246 result = fields.get_date_name_field(
1247 values, "month_name", locale=locale, reso=self._creso
1248 )
1249 result = self._maybe_mask_results(result, fill_value=None)
1250 return result
1251
1252 def day_name(self, locale=None) -> npt.NDArray[np.object_]:
1253 """
1254 Return the day names with specified locale.
1255
1256 Parameters
1257 ----------
1258 locale : str, optional
1259 Locale determining the language in which to return the day name.
1260 Default is English locale (``'en_US.utf8'``). Use the command
1261 ``locale -a`` on your terminal on Unix systems to find your locale
1262 language code.
1263
1264 Returns
1265 -------
1266 Series or Index
1267 Series or Index of day names.
1268
1269 Examples
1270 --------
1271 >>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3))
1272 >>> s
1273 0 2018-01-01
1274 1 2018-01-02
1275 2 2018-01-03
1276 dtype: datetime64[ns]
1277 >>> s.dt.day_name()
1278 0 Monday
1279 1 Tuesday
1280 2 Wednesday
1281 dtype: object
1282
1283 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
1284 >>> idx
1285 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1286 dtype='datetime64[ns]', freq='D')
1287 >>> idx.day_name()
1288 Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')
1289
1290 Using the ``locale`` parameter you can set a different locale language,
1291 for example: ``idx.day_name(locale='pt_BR.utf8')`` will return day
1292 names in Brazilian Portuguese language.
1293
1294 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
1295 >>> idx
1296 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1297 dtype='datetime64[ns]', freq='D')
1298 >>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP
1299 Index(['Segunda', 'Terça', 'Quarta'], dtype='object')
1300 """
1301 values = self._local_timestamps()
1302
1303 result = fields.get_date_name_field(
1304 values, "day_name", locale=locale, reso=self._creso
1305 )
1306 result = self._maybe_mask_results(result, fill_value=None)
1307 return result
1308
1309 @property
1310 def time(self) -> npt.NDArray[np.object_]:
1311 """
1312 Returns numpy array of :class:`datetime.time` objects.
1313
1314 The time part of the Timestamps.
1315 """
1316 # If the Timestamps have a timezone that is not UTC,
1317 # convert them into their i8 representation while
1318 # keeping their timezone and not using UTC
1319 timestamps = self._local_timestamps()
1320
1321 return ints_to_pydatetime(timestamps, box="time", reso=self._creso)
1322
1323 @property
1324 def timetz(self) -> npt.NDArray[np.object_]:
1325 """
1326 Returns numpy array of :class:`datetime.time` objects with timezones.
1327
1328 The time part of the Timestamps.
1329 """
1330 return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._creso)
1331
1332 @property
1333 def date(self) -> npt.NDArray[np.object_]:
1334 """
1335 Returns numpy array of python :class:`datetime.date` objects.
1336
1337 Namely, the date part of Timestamps without time and
1338 timezone information.
1339 """
1340 # If the Timestamps have a timezone that is not UTC,
1341 # convert them into their i8 representation while
1342 # keeping their timezone and not using UTC
1343 timestamps = self._local_timestamps()
1344
1345 return ints_to_pydatetime(timestamps, box="date", reso=self._creso)
1346
1347 def isocalendar(self) -> DataFrame:
1348 """
1349 Calculate year, week, and day according to the ISO 8601 standard.
1350
1351 .. versionadded:: 1.1.0
1352
1353 Returns
1354 -------
1355 DataFrame
1356 With columns year, week and day.
1357
1358 See Also
1359 --------
1360 Timestamp.isocalendar : Function return a 3-tuple containing ISO year,
1361 week number, and weekday for the given Timestamp object.
1362 datetime.date.isocalendar : Return a named tuple object with
1363 three components: year, week and weekday.
1364
1365 Examples
1366 --------
1367 >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4)
1368 >>> idx.isocalendar()
1369 year week day
1370 2019-12-29 2019 52 7
1371 2019-12-30 2020 1 1
1372 2019-12-31 2020 1 2
1373 2020-01-01 2020 1 3
1374 >>> idx.isocalendar().week
1375 2019-12-29 52
1376 2019-12-30 1
1377 2019-12-31 1
1378 2020-01-01 1
1379 Freq: D, Name: week, dtype: UInt32
1380 """
1381 from pandas import DataFrame
1382
1383 values = self._local_timestamps()
1384 sarray = fields.build_isocalendar_sarray(values, reso=self._creso)
1385 iso_calendar_df = DataFrame(
1386 sarray, columns=["year", "week", "day"], dtype="UInt32"
1387 )
1388 if self._hasna:
1389 iso_calendar_df.iloc[self._isnan] = None
1390 return iso_calendar_df
1391
1392 year = _field_accessor(
1393 "year",
1394 "Y",
1395 """
1396 The year of the datetime.
1397
1398 Examples
1399 --------
1400 >>> datetime_series = pd.Series(
1401 ... pd.date_range("2000-01-01", periods=3, freq="Y")
1402 ... )
1403 >>> datetime_series
1404 0 2000-12-31
1405 1 2001-12-31
1406 2 2002-12-31
1407 dtype: datetime64[ns]
1408 >>> datetime_series.dt.year
1409 0 2000
1410 1 2001
1411 2 2002
1412 dtype: int32
1413 """,
1414 )
1415 month = _field_accessor(
1416 "month",
1417 "M",
1418 """
1419 The month as January=1, December=12.
1420
1421 Examples
1422 --------
1423 >>> datetime_series = pd.Series(
1424 ... pd.date_range("2000-01-01", periods=3, freq="M")
1425 ... )
1426 >>> datetime_series
1427 0 2000-01-31
1428 1 2000-02-29
1429 2 2000-03-31
1430 dtype: datetime64[ns]
1431 >>> datetime_series.dt.month
1432 0 1
1433 1 2
1434 2 3
1435 dtype: int32
1436 """,
1437 )
1438 day = _field_accessor(
1439 "day",
1440 "D",
1441 """
1442 The day of the datetime.
1443
1444 Examples
1445 --------
1446 >>> datetime_series = pd.Series(
1447 ... pd.date_range("2000-01-01", periods=3, freq="D")
1448 ... )
1449 >>> datetime_series
1450 0 2000-01-01
1451 1 2000-01-02
1452 2 2000-01-03
1453 dtype: datetime64[ns]
1454 >>> datetime_series.dt.day
1455 0 1
1456 1 2
1457 2 3
1458 dtype: int32
1459 """,
1460 )
1461 hour = _field_accessor(
1462 "hour",
1463 "h",
1464 """
1465 The hours of the datetime.
1466
1467 Examples
1468 --------
1469 >>> datetime_series = pd.Series(
1470 ... pd.date_range("2000-01-01", periods=3, freq="h")
1471 ... )
1472 >>> datetime_series
1473 0 2000-01-01 00:00:00
1474 1 2000-01-01 01:00:00
1475 2 2000-01-01 02:00:00
1476 dtype: datetime64[ns]
1477 >>> datetime_series.dt.hour
1478 0 0
1479 1 1
1480 2 2
1481 dtype: int32
1482 """,
1483 )
1484 minute = _field_accessor(
1485 "minute",
1486 "m",
1487 """
1488 The minutes of the datetime.
1489
1490 Examples
1491 --------
1492 >>> datetime_series = pd.Series(
1493 ... pd.date_range("2000-01-01", periods=3, freq="T")
1494 ... )
1495 >>> datetime_series
1496 0 2000-01-01 00:00:00
1497 1 2000-01-01 00:01:00
1498 2 2000-01-01 00:02:00
1499 dtype: datetime64[ns]
1500 >>> datetime_series.dt.minute
1501 0 0
1502 1 1
1503 2 2
1504 dtype: int32
1505 """,
1506 )
1507 second = _field_accessor(
1508 "second",
1509 "s",
1510 """
1511 The seconds of the datetime.
1512
1513 Examples
1514 --------
1515 >>> datetime_series = pd.Series(
1516 ... pd.date_range("2000-01-01", periods=3, freq="s")
1517 ... )
1518 >>> datetime_series
1519 0 2000-01-01 00:00:00
1520 1 2000-01-01 00:00:01
1521 2 2000-01-01 00:00:02
1522 dtype: datetime64[ns]
1523 >>> datetime_series.dt.second
1524 0 0
1525 1 1
1526 2 2
1527 dtype: int32
1528 """,
1529 )
1530 microsecond = _field_accessor(
1531 "microsecond",
1532 "us",
1533 """
1534 The microseconds of the datetime.
1535
1536 Examples
1537 --------
1538 >>> datetime_series = pd.Series(
1539 ... pd.date_range("2000-01-01", periods=3, freq="us")
1540 ... )
1541 >>> datetime_series
1542 0 2000-01-01 00:00:00.000000
1543 1 2000-01-01 00:00:00.000001
1544 2 2000-01-01 00:00:00.000002
1545 dtype: datetime64[ns]
1546 >>> datetime_series.dt.microsecond
1547 0 0
1548 1 1
1549 2 2
1550 dtype: int32
1551 """,
1552 )
1553 nanosecond = _field_accessor(
1554 "nanosecond",
1555 "ns",
1556 """
1557 The nanoseconds of the datetime.
1558
1559 Examples
1560 --------
1561 >>> datetime_series = pd.Series(
1562 ... pd.date_range("2000-01-01", periods=3, freq="ns")
1563 ... )
1564 >>> datetime_series
1565 0 2000-01-01 00:00:00.000000000
1566 1 2000-01-01 00:00:00.000000001
1567 2 2000-01-01 00:00:00.000000002
1568 dtype: datetime64[ns]
1569 >>> datetime_series.dt.nanosecond
1570 0 0
1571 1 1
1572 2 2
1573 dtype: int32
1574 """,
1575 )
1576 _dayofweek_doc = """
1577 The day of the week with Monday=0, Sunday=6.
1578
1579 Return the day of the week. It is assumed the week starts on
1580 Monday, which is denoted by 0 and ends on Sunday which is denoted
1581 by 6. This method is available on both Series with datetime
1582 values (using the `dt` accessor) or DatetimeIndex.
1583
1584 Returns
1585 -------
1586 Series or Index
1587 Containing integers indicating the day number.
1588
1589 See Also
1590 --------
1591 Series.dt.dayofweek : Alias.
1592 Series.dt.weekday : Alias.
1593 Series.dt.day_name : Returns the name of the day of the week.
1594
1595 Examples
1596 --------
1597 >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series()
1598 >>> s.dt.dayofweek
1599 2016-12-31 5
1600 2017-01-01 6
1601 2017-01-02 0
1602 2017-01-03 1
1603 2017-01-04 2
1604 2017-01-05 3
1605 2017-01-06 4
1606 2017-01-07 5
1607 2017-01-08 6
1608 Freq: D, dtype: int32
1609 """
1610 day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc)
1611 dayofweek = day_of_week
1612 weekday = day_of_week
1613
1614 day_of_year = _field_accessor(
1615 "dayofyear",
1616 "doy",
1617 """
1618 The ordinal day of the year.
1619 """,
1620 )
1621 dayofyear = day_of_year
1622 quarter = _field_accessor(
1623 "quarter",
1624 "q",
1625 """
1626 The quarter of the date.
1627 """,
1628 )
1629 days_in_month = _field_accessor(
1630 "days_in_month",
1631 "dim",
1632 """
1633 The number of days in the month.
1634 """,
1635 )
1636 daysinmonth = days_in_month
1637 _is_month_doc = """
1638 Indicates whether the date is the {first_or_last} day of the month.
1639
1640 Returns
1641 -------
1642 Series or array
1643 For Series, returns a Series with boolean values.
1644 For DatetimeIndex, returns a boolean array.
1645
1646 See Also
1647 --------
1648 is_month_start : Return a boolean indicating whether the date
1649 is the first day of the month.
1650 is_month_end : Return a boolean indicating whether the date
1651 is the last day of the month.
1652
1653 Examples
1654 --------
1655 This method is available on Series with datetime values under
1656 the ``.dt`` accessor, and directly on DatetimeIndex.
1657
1658 >>> s = pd.Series(pd.date_range("2018-02-27", periods=3))
1659 >>> s
1660 0 2018-02-27
1661 1 2018-02-28
1662 2 2018-03-01
1663 dtype: datetime64[ns]
1664 >>> s.dt.is_month_start
1665 0 False
1666 1 False
1667 2 True
1668 dtype: bool
1669 >>> s.dt.is_month_end
1670 0 False
1671 1 True
1672 2 False
1673 dtype: bool
1674
1675 >>> idx = pd.date_range("2018-02-27", periods=3)
1676 >>> idx.is_month_start
1677 array([False, False, True])
1678 >>> idx.is_month_end
1679 array([False, True, False])
1680 """
1681 is_month_start = _field_accessor(
1682 "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first")
1683 )
1684
1685 is_month_end = _field_accessor(
1686 "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last")
1687 )
1688
1689 is_quarter_start = _field_accessor(
1690 "is_quarter_start",
1691 "is_quarter_start",
1692 """
1693 Indicator for whether the date is the first day of a quarter.
1694
1695 Returns
1696 -------
1697 is_quarter_start : Series or DatetimeIndex
1698 The same type as the original data with boolean values. Series will
1699 have the same name and index. DatetimeIndex will have the same
1700 name.
1701
1702 See Also
1703 --------
1704 quarter : Return the quarter of the date.
1705 is_quarter_end : Similar property for indicating the quarter end.
1706
1707 Examples
1708 --------
1709 This method is available on Series with datetime values under
1710 the ``.dt`` accessor, and directly on DatetimeIndex.
1711
1712 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
1713 ... periods=4)})
1714 >>> df.assign(quarter=df.dates.dt.quarter,
1715 ... is_quarter_start=df.dates.dt.is_quarter_start)
1716 dates quarter is_quarter_start
1717 0 2017-03-30 1 False
1718 1 2017-03-31 1 False
1719 2 2017-04-01 2 True
1720 3 2017-04-02 2 False
1721
1722 >>> idx = pd.date_range('2017-03-30', periods=4)
1723 >>> idx
1724 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
1725 dtype='datetime64[ns]', freq='D')
1726
1727 >>> idx.is_quarter_start
1728 array([False, False, True, False])
1729 """,
1730 )
1731 is_quarter_end = _field_accessor(
1732 "is_quarter_end",
1733 "is_quarter_end",
1734 """
1735 Indicator for whether the date is the last day of a quarter.
1736
1737 Returns
1738 -------
1739 is_quarter_end : Series or DatetimeIndex
1740 The same type as the original data with boolean values. Series will
1741 have the same name and index. DatetimeIndex will have the same
1742 name.
1743
1744 See Also
1745 --------
1746 quarter : Return the quarter of the date.
1747 is_quarter_start : Similar property indicating the quarter start.
1748
1749 Examples
1750 --------
1751 This method is available on Series with datetime values under
1752 the ``.dt`` accessor, and directly on DatetimeIndex.
1753
1754 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
1755 ... periods=4)})
1756 >>> df.assign(quarter=df.dates.dt.quarter,
1757 ... is_quarter_end=df.dates.dt.is_quarter_end)
1758 dates quarter is_quarter_end
1759 0 2017-03-30 1 False
1760 1 2017-03-31 1 True
1761 2 2017-04-01 2 False
1762 3 2017-04-02 2 False
1763
1764 >>> idx = pd.date_range('2017-03-30', periods=4)
1765 >>> idx
1766 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
1767 dtype='datetime64[ns]', freq='D')
1768
1769 >>> idx.is_quarter_end
1770 array([False, True, False, False])
1771 """,
1772 )
1773 is_year_start = _field_accessor(
1774 "is_year_start",
1775 "is_year_start",
1776 """
1777 Indicate whether the date is the first day of a year.
1778
1779 Returns
1780 -------
1781 Series or DatetimeIndex
1782 The same type as the original data with boolean values. Series will
1783 have the same name and index. DatetimeIndex will have the same
1784 name.
1785
1786 See Also
1787 --------
1788 is_year_end : Similar property indicating the last day of the year.
1789
1790 Examples
1791 --------
1792 This method is available on Series with datetime values under
1793 the ``.dt`` accessor, and directly on DatetimeIndex.
1794
1795 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
1796 >>> dates
1797 0 2017-12-30
1798 1 2017-12-31
1799 2 2018-01-01
1800 dtype: datetime64[ns]
1801
1802 >>> dates.dt.is_year_start
1803 0 False
1804 1 False
1805 2 True
1806 dtype: bool
1807
1808 >>> idx = pd.date_range("2017-12-30", periods=3)
1809 >>> idx
1810 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
1811 dtype='datetime64[ns]', freq='D')
1812
1813 >>> idx.is_year_start
1814 array([False, False, True])
1815 """,
1816 )
1817 is_year_end = _field_accessor(
1818 "is_year_end",
1819 "is_year_end",
1820 """
1821 Indicate whether the date is the last day of the year.
1822
1823 Returns
1824 -------
1825 Series or DatetimeIndex
1826 The same type as the original data with boolean values. Series will
1827 have the same name and index. DatetimeIndex will have the same
1828 name.
1829
1830 See Also
1831 --------
1832 is_year_start : Similar property indicating the start of the year.
1833
1834 Examples
1835 --------
1836 This method is available on Series with datetime values under
1837 the ``.dt`` accessor, and directly on DatetimeIndex.
1838
1839 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
1840 >>> dates
1841 0 2017-12-30
1842 1 2017-12-31
1843 2 2018-01-01
1844 dtype: datetime64[ns]
1845
1846 >>> dates.dt.is_year_end
1847 0 False
1848 1 True
1849 2 False
1850 dtype: bool
1851
1852 >>> idx = pd.date_range("2017-12-30", periods=3)
1853 >>> idx
1854 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
1855 dtype='datetime64[ns]', freq='D')
1856
1857 >>> idx.is_year_end
1858 array([False, True, False])
1859 """,
1860 )
1861 is_leap_year = _field_accessor(
1862 "is_leap_year",
1863 "is_leap_year",
1864 """
1865 Boolean indicator if the date belongs to a leap year.
1866
1867 A leap year is a year, which has 366 days (instead of 365) including
1868 29th of February as an intercalary day.
1869 Leap years are years which are multiples of four with the exception
1870 of years divisible by 100 but not by 400.
1871
1872 Returns
1873 -------
1874 Series or ndarray
1875 Booleans indicating if dates belong to a leap year.
1876
1877 Examples
1878 --------
1879 This method is available on Series with datetime values under
1880 the ``.dt`` accessor, and directly on DatetimeIndex.
1881
1882 >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="Y")
1883 >>> idx
1884 DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],
1885 dtype='datetime64[ns]', freq='A-DEC')
1886 >>> idx.is_leap_year
1887 array([ True, False, False])
1888
1889 >>> dates_series = pd.Series(idx)
1890 >>> dates_series
1891 0 2012-12-31
1892 1 2013-12-31
1893 2 2014-12-31
1894 dtype: datetime64[ns]
1895 >>> dates_series.dt.is_leap_year
1896 0 True
1897 1 False
1898 2 False
1899 dtype: bool
1900 """,
1901 )
1902
1903 def to_julian_date(self) -> npt.NDArray[np.float64]:
1904 """
1905 Convert Datetime Array to float64 ndarray of Julian Dates.
1906 0 Julian date is noon January 1, 4713 BC.
1907 https://en.wikipedia.org/wiki/Julian_day
1908 """
1909
1910 # http://mysite.verizon.net/aesir_research/date/jdalg2.htm
1911 year = np.asarray(self.year)
1912 month = np.asarray(self.month)
1913 day = np.asarray(self.day)
1914 testarr = month < 3
1915 year[testarr] -= 1
1916 month[testarr] += 12
1917 return (
1918 day
1919 + np.fix((153 * month - 457) / 5)
1920 + 365 * year
1921 + np.floor(year / 4)
1922 - np.floor(year / 100)
1923 + np.floor(year / 400)
1924 + 1_721_118.5
1925 + (
1926 self.hour
1927 + self.minute / 60
1928 + self.second / 3600
1929 + self.microsecond / 3600 / 10**6
1930 + self.nanosecond / 3600 / 10**9
1931 )
1932 / 24
1933 )
1934
1935 # -----------------------------------------------------------------
1936 # Reductions
1937
1938 def std(
1939 self,
1940 axis=None,
1941 dtype=None,
1942 out=None,
1943 ddof: int = 1,
1944 keepdims: bool = False,
1945 skipna: bool = True,
1946 ):
1947 """
1948 Return sample standard deviation over requested axis.
1949
1950 Normalized by N-1 by default. This can be changed using the ddof argument
1951
1952 Parameters
1953 ----------
1954 axis : int optional, default None
1955 Axis for the function to be applied on.
1956 For `Series` this parameter is unused and defaults to `None`.
1957 ddof : int, default 1
1958 Degrees of Freedom. The divisor used in calculations is N - ddof,
1959 where N represents the number of elements.
1960 skipna : bool, default True
1961 Exclude NA/null values. If an entire row/column is NA, the result will be
1962 NA.
1963
1964 Returns
1965 -------
1966 Timedelta
1967 """
1968 # Because std is translation-invariant, we can get self.std
1969 # by calculating (self - Timestamp(0)).std, and we can do it
1970 # without creating a copy by using a view on self._ndarray
1971 from pandas.core.arrays import TimedeltaArray
1972
1973 # Find the td64 dtype with the same resolution as our dt64 dtype
1974 dtype_str = self._ndarray.dtype.name.replace("datetime64", "timedelta64")
1975 dtype = np.dtype(dtype_str)
1976
1977 tda = TimedeltaArray._simple_new(self._ndarray.view(dtype), dtype=dtype)
1978
1979 return tda.std(axis=axis, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna)
1980
1981
1982# -------------------------------------------------------------------
1983# Constructor Helpers
1984
1985
1986def _sequence_to_dt64ns(
1987 data,
1988 *,
1989 copy: bool = False,
1990 tz: tzinfo | None = None,
1991 dayfirst: bool = False,
1992 yearfirst: bool = False,
1993 ambiguous: TimeAmbiguous = "raise",
1994 out_unit: str | None = None,
1995):
1996 """
1997 Parameters
1998 ----------
1999 data : list-like
2000 copy : bool, default False
2001 tz : tzinfo or None, default None
2002 dayfirst : bool, default False
2003 yearfirst : bool, default False
2004 ambiguous : str, bool, or arraylike, default 'raise'
2005 See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.
2006 out_unit : str or None, default None
2007 Desired output resolution.
2008
2009 Returns
2010 -------
2011 result : numpy.ndarray
2012 The sequence converted to a numpy array with dtype ``datetime64[ns]``.
2013 tz : tzinfo or None
2014 Either the user-provided tzinfo or one inferred from the data.
2015 inferred_freq : Tick or None
2016 The inferred frequency of the sequence.
2017
2018 Raises
2019 ------
2020 TypeError : PeriodDType data is passed
2021 """
2022 inferred_freq = None
2023
2024 data, copy = dtl.ensure_arraylike_for_datetimelike(
2025 data, copy, cls_name="DatetimeArray"
2026 )
2027
2028 if isinstance(data, DatetimeArray):
2029 inferred_freq = data.freq
2030
2031 # By this point we are assured to have either a numpy array or Index
2032 data, copy = maybe_convert_dtype(data, copy, tz=tz)
2033 data_dtype = getattr(data, "dtype", None)
2034
2035 out_dtype = DT64NS_DTYPE
2036 if out_unit is not None:
2037 out_dtype = np.dtype(f"M8[{out_unit}]")
2038
2039 if (
2040 is_object_dtype(data_dtype)
2041 or is_string_dtype(data_dtype)
2042 or is_sparse(data_dtype)
2043 ):
2044 # TODO: We do not have tests specific to string-dtypes,
2045 # also complex or categorical or other extension
2046 copy = False
2047 if lib.infer_dtype(data, skipna=False) == "integer":
2048 data = data.astype(np.int64)
2049 elif tz is not None and ambiguous == "raise":
2050 # TODO: yearfirst/dayfirst/etc?
2051 obj_data = np.asarray(data, dtype=object)
2052 i8data = tslib.array_to_datetime_with_tz(obj_data, tz)
2053 return i8data.view(DT64NS_DTYPE), tz, None
2054 else:
2055 # data comes back here as either i8 to denote UTC timestamps
2056 # or M8[ns] to denote wall times
2057 data, inferred_tz = objects_to_datetime64ns(
2058 data,
2059 dayfirst=dayfirst,
2060 yearfirst=yearfirst,
2061 allow_object=False,
2062 )
2063 if tz and inferred_tz:
2064 # two timezones: convert to intended from base UTC repr
2065 assert data.dtype == "i8"
2066 # GH#42505
2067 # by convention, these are _already_ UTC, e.g
2068 return data.view(DT64NS_DTYPE), tz, None
2069
2070 elif inferred_tz:
2071 tz = inferred_tz
2072
2073 data_dtype = data.dtype
2074
2075 # `data` may have originally been a Categorical[datetime64[ns, tz]],
2076 # so we need to handle these types.
2077 if is_datetime64tz_dtype(data_dtype):
2078 # DatetimeArray -> ndarray
2079 tz = _maybe_infer_tz(tz, data.tz)
2080 result = data._ndarray
2081
2082 elif is_datetime64_dtype(data_dtype):
2083 # tz-naive DatetimeArray or ndarray[datetime64]
2084 data = getattr(data, "_ndarray", data)
2085 new_dtype = data.dtype
2086 data_unit = get_unit_from_dtype(new_dtype)
2087 if not is_supported_unit(data_unit):
2088 # Cast to the nearest supported unit, generally "s"
2089 new_reso = get_supported_reso(data_unit)
2090 new_unit = npy_unit_to_abbrev(new_reso)
2091 new_dtype = np.dtype(f"M8[{new_unit}]")
2092 data = astype_overflowsafe(data, dtype=new_dtype, copy=False)
2093 data_unit = get_unit_from_dtype(new_dtype)
2094 copy = False
2095
2096 if data.dtype.byteorder == ">":
2097 # TODO: better way to handle this? non-copying alternative?
2098 # without this, test_constructor_datetime64_bigendian fails
2099 data = data.astype(data.dtype.newbyteorder("<"))
2100 new_dtype = data.dtype
2101 copy = False
2102
2103 if tz is not None:
2104 # Convert tz-naive to UTC
2105 # TODO: if tz is UTC, are there situations where we *don't* want a
2106 # copy? tz_localize_to_utc always makes one.
2107 shape = data.shape
2108 if data.ndim > 1:
2109 data = data.ravel()
2110
2111 data = tzconversion.tz_localize_to_utc(
2112 data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit
2113 )
2114 data = data.view(new_dtype)
2115 data = data.reshape(shape)
2116
2117 assert data.dtype == new_dtype, data.dtype
2118 result = data
2119
2120 else:
2121 # must be integer dtype otherwise
2122 # assume this data are epoch timestamps
2123 if data.dtype != INT64_DTYPE:
2124 data = data.astype(np.int64, copy=False)
2125 result = data.view(out_dtype)
2126
2127 if copy:
2128 result = result.copy()
2129
2130 assert isinstance(result, np.ndarray), type(result)
2131 assert result.dtype.kind == "M"
2132 assert result.dtype != "M8"
2133 assert is_supported_unit(get_unit_from_dtype(result.dtype))
2134 return result, tz, inferred_freq
2135
2136
2137def objects_to_datetime64ns(
2138 data: np.ndarray,
2139 dayfirst,
2140 yearfirst,
2141 utc: bool = False,
2142 errors: DateTimeErrorChoices = "raise",
2143 allow_object: bool = False,
2144):
2145 """
2146 Convert data to array of timestamps.
2147
2148 Parameters
2149 ----------
2150 data : np.ndarray[object]
2151 dayfirst : bool
2152 yearfirst : bool
2153 utc : bool, default False
2154 Whether to convert/localize timestamps to UTC.
2155 errors : {'raise', 'ignore', 'coerce'}
2156 allow_object : bool
2157 Whether to return an object-dtype ndarray instead of raising if the
2158 data contains more than one timezone.
2159
2160 Returns
2161 -------
2162 result : ndarray
2163 np.int64 dtype if returned values represent UTC timestamps
2164 np.datetime64[ns] if returned values represent wall times
2165 object if mixed timezones
2166 inferred_tz : tzinfo or None
2167
2168 Raises
2169 ------
2170 ValueError : if data cannot be converted to datetimes
2171 """
2172 assert errors in ["raise", "ignore", "coerce"]
2173
2174 # if str-dtype, convert
2175 data = np.array(data, copy=False, dtype=np.object_)
2176
2177 result, tz_parsed = tslib.array_to_datetime(
2178 data,
2179 errors=errors,
2180 utc=utc,
2181 dayfirst=dayfirst,
2182 yearfirst=yearfirst,
2183 )
2184
2185 if tz_parsed is not None:
2186 # We can take a shortcut since the datetime64 numpy array
2187 # is in UTC
2188 # Return i8 values to denote unix timestamps
2189 return result.view("i8"), tz_parsed
2190 elif is_datetime64_dtype(result):
2191 # returning M8[ns] denotes wall-times; since tz is None
2192 # the distinction is a thin one
2193 return result, tz_parsed
2194 elif is_object_dtype(result):
2195 # GH#23675 when called via `pd.to_datetime`, returning an object-dtype
2196 # array is allowed. When called via `pd.DatetimeIndex`, we can
2197 # only accept datetime64 dtype, so raise TypeError if object-dtype
2198 # is returned, as that indicates the values can be recognized as
2199 # datetimes but they have conflicting timezones/awareness
2200 if allow_object:
2201 return result, tz_parsed
2202 raise TypeError(result)
2203 else: # pragma: no cover
2204 # GH#23675 this TypeError should never be hit, whereas the TypeError
2205 # in the object-dtype branch above is reachable.
2206 raise TypeError(result)
2207
2208
2209def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None):
2210 """
2211 Convert data based on dtype conventions, issuing
2212 errors where appropriate.
2213
2214 Parameters
2215 ----------
2216 data : np.ndarray or pd.Index
2217 copy : bool
2218 tz : tzinfo or None, default None
2219
2220 Returns
2221 -------
2222 data : np.ndarray or pd.Index
2223 copy : bool
2224
2225 Raises
2226 ------
2227 TypeError : PeriodDType data is passed
2228 """
2229 if not hasattr(data, "dtype"):
2230 # e.g. collections.deque
2231 return data, copy
2232
2233 if is_float_dtype(data.dtype):
2234 # pre-2.0 we treated these as wall-times, inconsistent with ints
2235 # GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes.
2236 # Note: data.astype(np.int64) fails ARM tests, see
2237 # https://github.com/pandas-dev/pandas/issues/49468.
2238 data = data.astype(DT64NS_DTYPE).view("i8")
2239 copy = False
2240
2241 elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype):
2242 # GH#29794 enforcing deprecation introduced in GH#23539
2243 raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]")
2244 elif is_period_dtype(data.dtype):
2245 # Note: without explicitly raising here, PeriodIndex
2246 # test_setops.test_join_does_not_recur fails
2247 raise TypeError(
2248 "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead"
2249 )
2250
2251 elif is_extension_array_dtype(data.dtype) and not is_datetime64tz_dtype(data.dtype):
2252 # TODO: We have no tests for these
2253 data = np.array(data, dtype=np.object_)
2254 copy = False
2255
2256 return data, copy
2257
2258
2259# -------------------------------------------------------------------
2260# Validation and Inference
2261
2262
2263def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None:
2264 """
2265 If a timezone is inferred from data, check that it is compatible with
2266 the user-provided timezone, if any.
2267
2268 Parameters
2269 ----------
2270 tz : tzinfo or None
2271 inferred_tz : tzinfo or None
2272
2273 Returns
2274 -------
2275 tz : tzinfo or None
2276
2277 Raises
2278 ------
2279 TypeError : if both timezones are present but do not match
2280 """
2281 if tz is None:
2282 tz = inferred_tz
2283 elif inferred_tz is None:
2284 pass
2285 elif not timezones.tz_compare(tz, inferred_tz):
2286 raise TypeError(
2287 f"data is already tz-aware {inferred_tz}, unable to "
2288 f"set specified tz: {tz}"
2289 )
2290 return tz
2291
2292
2293def _validate_dt64_dtype(dtype):
2294 """
2295 Check that a dtype, if passed, represents either a numpy datetime64[ns]
2296 dtype or a pandas DatetimeTZDtype.
2297
2298 Parameters
2299 ----------
2300 dtype : object
2301
2302 Returns
2303 -------
2304 dtype : None, numpy.dtype, or DatetimeTZDtype
2305
2306 Raises
2307 ------
2308 ValueError : invalid dtype
2309
2310 Notes
2311 -----
2312 Unlike _validate_tz_from_dtype, this does _not_ allow non-existent
2313 tz errors to go through
2314 """
2315 if dtype is not None:
2316 dtype = pandas_dtype(dtype)
2317 if is_dtype_equal(dtype, np.dtype("M8")):
2318 # no precision, disallowed GH#24806
2319 msg = (
2320 "Passing in 'datetime64' dtype with no precision is not allowed. "
2321 "Please pass in 'datetime64[ns]' instead."
2322 )
2323 raise ValueError(msg)
2324
2325 if (
2326 isinstance(dtype, np.dtype)
2327 and (dtype.kind != "M" or not is_supported_unit(get_unit_from_dtype(dtype)))
2328 ) or not isinstance(dtype, (np.dtype, DatetimeTZDtype)):
2329 raise ValueError(
2330 f"Unexpected value for 'dtype': '{dtype}'. "
2331 "Must be 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', "
2332 "'datetime64[ns]' or DatetimeTZDtype'."
2333 )
2334
2335 if getattr(dtype, "tz", None):
2336 # https://github.com/pandas-dev/pandas/issues/18595
2337 # Ensure that we have a standard timezone for pytz objects.
2338 # Without this, things like adding an array of timedeltas and
2339 # a tz-aware Timestamp (with a tz specific to its datetime) will
2340 # be incorrect(ish?) for the array as a whole
2341 dtype = cast(DatetimeTZDtype, dtype)
2342 dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz))
2343
2344 return dtype
2345
2346
2347def _validate_tz_from_dtype(
2348 dtype, tz: tzinfo | None, explicit_tz_none: bool = False
2349) -> tzinfo | None:
2350 """
2351 If the given dtype is a DatetimeTZDtype, extract the implied
2352 tzinfo object from it and check that it does not conflict with the given
2353 tz.
2354
2355 Parameters
2356 ----------
2357 dtype : dtype, str
2358 tz : None, tzinfo
2359 explicit_tz_none : bool, default False
2360 Whether tz=None was passed explicitly, as opposed to lib.no_default.
2361
2362 Returns
2363 -------
2364 tz : consensus tzinfo
2365
2366 Raises
2367 ------
2368 ValueError : on tzinfo mismatch
2369 """
2370 if dtype is not None:
2371 if isinstance(dtype, str):
2372 try:
2373 dtype = DatetimeTZDtype.construct_from_string(dtype)
2374 except TypeError:
2375 # Things like `datetime64[ns]`, which is OK for the
2376 # constructors, but also nonsense, which should be validated
2377 # but not by us. We *do* allow non-existent tz errors to
2378 # go through
2379 pass
2380 dtz = getattr(dtype, "tz", None)
2381 if dtz is not None:
2382 if tz is not None and not timezones.tz_compare(tz, dtz):
2383 raise ValueError("cannot supply both a tz and a dtype with a tz")
2384 if explicit_tz_none:
2385 raise ValueError("Cannot pass both a timezone-aware dtype and tz=None")
2386 tz = dtz
2387
2388 if tz is not None and is_datetime64_dtype(dtype):
2389 # We also need to check for the case where the user passed a
2390 # tz-naive dtype (i.e. datetime64[ns])
2391 if tz is not None and not timezones.tz_compare(tz, dtz):
2392 raise ValueError(
2393 "cannot supply both a tz and a "
2394 "timezone-naive dtype (i.e. datetime64[ns])"
2395 )
2396
2397 return tz
2398
2399
2400def _infer_tz_from_endpoints(
2401 start: Timestamp, end: Timestamp, tz: tzinfo | None
2402) -> tzinfo | None:
2403 """
2404 If a timezone is not explicitly given via `tz`, see if one can
2405 be inferred from the `start` and `end` endpoints. If more than one
2406 of these inputs provides a timezone, require that they all agree.
2407
2408 Parameters
2409 ----------
2410 start : Timestamp
2411 end : Timestamp
2412 tz : tzinfo or None
2413
2414 Returns
2415 -------
2416 tz : tzinfo or None
2417
2418 Raises
2419 ------
2420 TypeError : if start and end timezones do not agree
2421 """
2422 try:
2423 inferred_tz = timezones.infer_tzinfo(start, end)
2424 except AssertionError as err:
2425 # infer_tzinfo raises AssertionError if passed mismatched timezones
2426 raise TypeError(
2427 "Start and end cannot both be tz-aware with different timezones"
2428 ) from err
2429
2430 inferred_tz = timezones.maybe_get_tz(inferred_tz)
2431 tz = timezones.maybe_get_tz(tz)
2432
2433 if tz is not None and inferred_tz is not None:
2434 if not timezones.tz_compare(inferred_tz, tz):
2435 raise AssertionError("Inferred time zone not equal to passed time zone")
2436
2437 elif inferred_tz is not None:
2438 tz = inferred_tz
2439
2440 return tz
2441
2442
2443def _maybe_normalize_endpoints(
2444 start: Timestamp | None, end: Timestamp | None, normalize: bool
2445):
2446 if normalize:
2447 if start is not None:
2448 start = start.normalize()
2449
2450 if end is not None:
2451 end = end.normalize()
2452
2453 return start, end
2454
2455
2456def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexistent):
2457 """
2458 Localize a start or end Timestamp to the timezone of the corresponding
2459 start or end Timestamp
2460
2461 Parameters
2462 ----------
2463 ts : start or end Timestamp to potentially localize
2464 is_none : argument that should be None
2465 is_not_none : argument that should not be None
2466 freq : Tick, DateOffset, or None
2467 tz : str, timezone object or None
2468 ambiguous: str, localization behavior for ambiguous times
2469 nonexistent: str, localization behavior for nonexistent times
2470
2471 Returns
2472 -------
2473 ts : Timestamp
2474 """
2475 # Make sure start and end are timezone localized if:
2476 # 1) freq = a Timedelta-like frequency (Tick)
2477 # 2) freq = None i.e. generating a linspaced range
2478 if is_none is None and is_not_none is not None:
2479 # Note: We can't ambiguous='infer' a singular ambiguous time; however,
2480 # we have historically defaulted ambiguous=False
2481 ambiguous = ambiguous if ambiguous != "infer" else False
2482 localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None}
2483 if isinstance(freq, Tick) or freq is None:
2484 localize_args["tz"] = tz
2485 ts = ts.tz_localize(**localize_args)
2486 return ts
2487
2488
2489def _generate_range(
2490 start: Timestamp | None,
2491 end: Timestamp | None,
2492 periods: int | None,
2493 offset: BaseOffset,
2494 *,
2495 unit: str,
2496):
2497 """
2498 Generates a sequence of dates corresponding to the specified time
2499 offset. Similar to dateutil.rrule except uses pandas DateOffset
2500 objects to represent time increments.
2501
2502 Parameters
2503 ----------
2504 start : Timestamp or None
2505 end : Timestamp or None
2506 periods : int or None
2507 offset : DateOffset
2508 unit : str
2509
2510 Notes
2511 -----
2512 * This method is faster for generating weekdays than dateutil.rrule
2513 * At least two of (start, end, periods) must be specified.
2514 * If both start and end are specified, the returned dates will
2515 satisfy start <= date <= end.
2516
2517 Returns
2518 -------
2519 dates : generator object
2520 """
2521 offset = to_offset(offset)
2522
2523 # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
2524 # expected "Union[integer[Any], float, str, date, datetime64]"
2525 start = Timestamp(start) # type: ignore[arg-type]
2526 if start is not NaT:
2527 start = start.as_unit(unit)
2528 else:
2529 start = None
2530
2531 # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
2532 # expected "Union[integer[Any], float, str, date, datetime64]"
2533 end = Timestamp(end) # type: ignore[arg-type]
2534 if end is not NaT:
2535 end = end.as_unit(unit)
2536 else:
2537 end = None
2538
2539 if start and not offset.is_on_offset(start):
2540 # Incompatible types in assignment (expression has type "datetime",
2541 # variable has type "Optional[Timestamp]")
2542 start = offset.rollforward(start) # type: ignore[assignment]
2543
2544 elif end and not offset.is_on_offset(end):
2545 # Incompatible types in assignment (expression has type "datetime",
2546 # variable has type "Optional[Timestamp]")
2547 end = offset.rollback(end) # type: ignore[assignment]
2548
2549 # Unsupported operand types for < ("Timestamp" and "None")
2550 if periods is None and end < start and offset.n >= 0: # type: ignore[operator]
2551 end = None
2552 periods = 0
2553
2554 if end is None:
2555 # error: No overload variant of "__radd__" of "BaseOffset" matches
2556 # argument type "None"
2557 end = start + (periods - 1) * offset # type: ignore[operator]
2558
2559 if start is None:
2560 # error: No overload variant of "__radd__" of "BaseOffset" matches
2561 # argument type "None"
2562 start = end - (periods - 1) * offset # type: ignore[operator]
2563
2564 start = cast(Timestamp, start)
2565 end = cast(Timestamp, end)
2566
2567 cur = start
2568 if offset.n >= 0:
2569 while cur <= end:
2570 yield cur
2571
2572 if cur == end:
2573 # GH#24252 avoid overflows by not performing the addition
2574 # in offset.apply unless we have to
2575 break
2576
2577 # faster than cur + offset
2578 next_date = offset._apply(cur).as_unit(unit)
2579 if next_date <= cur:
2580 raise ValueError(f"Offset {offset} did not increment date")
2581 cur = next_date
2582 else:
2583 while cur >= end:
2584 yield cur
2585
2586 if cur == end:
2587 # GH#24252 avoid overflows by not performing the addition
2588 # in offset.apply unless we have to
2589 break
2590
2591 # faster than cur + offset
2592 next_date = offset._apply(cur).as_unit(unit)
2593 if next_date >= cur:
2594 raise ValueError(f"Offset {offset} did not decrement date")
2595 cur = next_date