Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py: 21%

1from __future__ import annotations

3from datetime import (

4 datetime,

5 time,

6 timedelta,

7 tzinfo,

9from typing import (

10 TYPE_CHECKING,

11 Iterator,

12 cast,

13)

14import warnings

16import numpy as np

18from pandas._libs import (

19 lib,

20 tslib,

21)

22from pandas._libs.tslibs import (

23 BaseOffset,

24 NaT,

25 NaTType,

26 Resolution,

27 Timestamp,

28 astype_overflowsafe,

29 fields,

30 get_resolution,

31 get_supported_reso,

32 get_unit_from_dtype,

33 ints_to_pydatetime,

34 is_date_array_normalized,

35 is_supported_unit,

36 is_unitless,

37 normalize_i8_timestamps,

38 npy_unit_to_abbrev,

39 timezones,

40 to_offset,

41 tz_convert_from_utc,

42 tzconversion,

43)

44from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit

45from pandas._typing import (

46 DateTimeErrorChoices,

47 IntervalClosedType,

48 TimeAmbiguous,

49 TimeNonexistent,

50 npt,

51)

52from pandas.errors import PerformanceWarning

53from pandas.util._exceptions import find_stack_level

54from pandas.util._validators import validate_inclusive

56from pandas.core.dtypes.common import (

57 DT64NS_DTYPE,

58 INT64_DTYPE,

59 is_bool_dtype,

60 is_datetime64_any_dtype,

61 is_datetime64_dtype,

62 is_datetime64tz_dtype,

63 is_dtype_equal,

64 is_extension_array_dtype,

65 is_float_dtype,

66 is_object_dtype,

67 is_period_dtype,

68 is_sparse,

69 is_string_dtype,

70 is_timedelta64_dtype,

71 pandas_dtype,

72)

73from pandas.core.dtypes.dtypes import (

74 DatetimeTZDtype,

75 ExtensionDtype,

76)

77from pandas.core.dtypes.missing import isna

79from pandas.core.arrays import datetimelike as dtl

80from pandas.core.arrays._ranges import generate_regular_range

81import pandas.core.common as com

83from pandas.tseries.frequencies import get_period_alias

84from pandas.tseries.offsets import (

85 Day,

86 Tick,

87)

89if TYPE_CHECKING:

90 from pandas import DataFrame

91 from pandas.core.arrays import PeriodArray

93_midnight = time(0, 0)

96def tz_to_dtype(tz: tzinfo | None, unit: str = "ns"):

97 """

98 Return a datetime64[ns] dtype appropriate for the given timezone.

100 Parameters

101 ----------

102 tz : tzinfo or None

103 unit : str, default "ns"

104

105 Returns

106 -------

107 np.dtype or Datetime64TZDType

108 """

109 if tz is None:

110 return np.dtype(f"M8[{unit}]")

111 else:

112 return DatetimeTZDtype(tz=tz, unit=unit)

113

114

115def _field_accessor(name: str, field: str, docstring=None):

116 def f(self):

117 values = self._local_timestamps()

118

119 if field in self._bool_ops:

120 result: np.ndarray

121

122 if field.endswith(("start", "end")):

123 freq = self.freq

124 month_kw = 12

125 if freq:

126 kwds = freq.kwds

127 month_kw = kwds.get("startingMonth", kwds.get("month", 12))

128

129 result = fields.get_start_end_field(

130 values, field, self.freqstr, month_kw, reso=self._creso

131 )

132 else:

133 result = fields.get_date_field(values, field, reso=self._creso)

134

135 # these return a boolean by-definition

136 return result

137

138 if field in self._object_ops:

139 result = fields.get_date_name_field(values, field, reso=self._creso)

140 result = self._maybe_mask_results(result, fill_value=None)

141

142 else:

143 result = fields.get_date_field(values, field, reso=self._creso)

144 result = self._maybe_mask_results(

145 result, fill_value=None, convert="float64"

146 )

147

148 return result

149

150 f.__name__ = name

151 f.__doc__ = docstring

152 return property(f)

153

154

155class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):

156 """

157 Pandas ExtensionArray for tz-naive or tz-aware datetime data.

158

159 .. warning::

160

161 DatetimeArray is currently experimental, and its API may change

162 without warning. In particular, :attr:`DatetimeArray.dtype` is

163 expected to change to always be an instance of an ``ExtensionDtype``

164 subclass.

165

166 Parameters

167 ----------

168 values : Series, Index, DatetimeArray, ndarray

169 The datetime data.

170

171 For DatetimeArray `values` (or a Series or Index boxing one),

172 `dtype` and `freq` will be extracted from `values`.

173

174 dtype : numpy.dtype or DatetimeTZDtype

175 Note that the only NumPy dtype allowed is 'datetime64[ns]'.

176 freq : str or Offset, optional

177 The frequency.

178 copy : bool, default False

179 Whether to copy the underlying array of values.

180

181 Attributes

182 ----------

183 None

184

185 Methods

186 -------

187 None

188 """

189

190 _typ = "datetimearray"

191 _internal_fill_value = np.datetime64("NaT", "ns")

192 _recognized_scalars = (datetime, np.datetime64)

193 _is_recognized_dtype = is_datetime64_any_dtype

194 _infer_matches = ("datetime", "datetime64", "date")

195

196 @property

197 def _scalar_type(self) -> type[Timestamp]:

198 return Timestamp

199

200 # define my properties & methods for delegation

201 _bool_ops: list[str] = [

202 "is_month_start",

203 "is_month_end",

204 "is_quarter_start",

205 "is_quarter_end",

206 "is_year_start",

207 "is_year_end",

208 "is_leap_year",

209 ]

210 _object_ops: list[str] = ["freq", "tz"]

211 _field_ops: list[str] = [

212 "year",

213 "month",

214 "day",

215 "hour",

216 "minute",

217 "second",

218 "weekday",

219 "dayofweek",

220 "day_of_week",

221 "dayofyear",

222 "day_of_year",

223 "quarter",

224 "days_in_month",

225 "daysinmonth",

226 "microsecond",

227 "nanosecond",

228 ]

229 _other_ops: list[str] = ["date", "time", "timetz"]

230 _datetimelike_ops: list[str] = (

231 _field_ops + _object_ops + _bool_ops + _other_ops + ["unit"]

232 )

233 _datetimelike_methods: list[str] = [

234 "to_period",

235 "tz_localize",

236 "tz_convert",

237 "normalize",

238 "strftime",

239 "round",

240 "floor",

241 "ceil",

242 "month_name",

243 "day_name",

244 "as_unit",

245 ]

246

247 # ndim is inherited from ExtensionArray, must exist to ensure

248 # Timestamp.__richcmp__(DateTimeArray) operates pointwise

249

250 # ensure that operations with numpy arrays defer to our implementation

251 __array_priority__ = 1000

252

253 # -----------------------------------------------------------------

254 # Constructors

255

256 _dtype: np.dtype | DatetimeTZDtype

257 _freq: BaseOffset | None = None

258 _default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__

259

260 @classmethod

261 def _validate_dtype(cls, values, dtype):

262 # used in TimeLikeOps.__init__

263 _validate_dt64_dtype(values.dtype)

264 dtype = _validate_dt64_dtype(dtype)

265 return dtype

266

267 # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"

268 @classmethod

269 def _simple_new( # type: ignore[override]

270 cls,

271 values: np.ndarray,

272 freq: BaseOffset | None = None,

273 dtype=DT64NS_DTYPE,

274 ) -> DatetimeArray:

275 assert isinstance(values, np.ndarray)

276 assert dtype.kind == "M"

277 if isinstance(dtype, np.dtype):

278 assert dtype == values.dtype

279 assert not is_unitless(dtype)

280 else:

281 # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC],

282 # then values.dtype should be M8[us].

283 assert dtype._creso == get_unit_from_dtype(values.dtype)

284

285 result = super()._simple_new(values, dtype)

286 result._freq = freq

287 return result

288

289 @classmethod

290 def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):

291 return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)

292

293 @classmethod

294 def _from_sequence_not_strict(

295 cls,

296 data,

297 *,

298 dtype=None,

299 copy: bool = False,

300 tz=lib.no_default,

301 freq: str | BaseOffset | lib.NoDefault | None = lib.no_default,

302 dayfirst: bool = False,

303 yearfirst: bool = False,

304 ambiguous: TimeAmbiguous = "raise",

305 ):

306 """

307 A non-strict version of _from_sequence, called from DatetimeIndex.__new__.

308 """

309 explicit_none = freq is None

310 freq = freq if freq is not lib.no_default else None

311 freq, freq_infer = dtl.maybe_infer_freq(freq)

312

313 # if the user either explicitly passes tz=None or a tz-naive dtype, we

314 # disallows inferring a tz.

315 explicit_tz_none = tz is None

316 if tz is lib.no_default:

317 tz = None

318 else:

319 tz = timezones.maybe_get_tz(tz)

320

321 dtype = _validate_dt64_dtype(dtype)

322 # if dtype has an embedded tz, capture it

323 tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none)

324

325 unit = None

326 if dtype is not None:

327 if isinstance(dtype, np.dtype):

328 unit = np.datetime_data(dtype)[0]

329 else:

330 # DatetimeTZDtype

331 unit = dtype.unit

332

333 subarr, tz, inferred_freq = _sequence_to_dt64ns(

334 data,

335 copy=copy,

336 tz=tz,

337 dayfirst=dayfirst,

338 yearfirst=yearfirst,

339 ambiguous=ambiguous,

340 out_unit=unit,

341 )

342 # We have to call this again after possibly inferring a tz above

343 _validate_tz_from_dtype(dtype, tz, explicit_tz_none)

344 if tz is not None and explicit_tz_none:

345 raise ValueError(

346 "Passed data is timezone-aware, incompatible with 'tz=None'. "

347 "Use obj.tz_localize(None) instead."

348 )

349

350 freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)

351 if explicit_none:

352 freq = None

353

354 data_unit = np.datetime_data(subarr.dtype)[0]

355 data_dtype = tz_to_dtype(tz, data_unit)

356 result = cls._simple_new(subarr, freq=freq, dtype=data_dtype)

357 if unit is not None and unit != result.unit:

358 # If unit was specified in user-passed dtype, cast to it here

359 result = result.as_unit(unit)

360

361 if inferred_freq is None and freq is not None:

362 # this condition precludes `freq_infer`

363 cls._validate_frequency(result, freq, ambiguous=ambiguous)

364

365 elif freq_infer:

366 # Set _freq directly to bypass duplicative _validate_frequency

367 # check.

368 result._freq = to_offset(result.inferred_freq)

369

370 return result

371

372 # error: Signature of "_generate_range" incompatible with supertype

373 # "DatetimeLikeArrayMixin"

374 @classmethod

375 def _generate_range( # type: ignore[override]

376 cls,

377 start,

378 end,

379 periods,

380 freq,

381 tz=None,

382 normalize: bool = False,

383 ambiguous: TimeAmbiguous = "raise",

384 nonexistent: TimeNonexistent = "raise",

385 inclusive: IntervalClosedType = "both",

386 *,

387 unit: str | None = None,

388 ) -> DatetimeArray:

389 periods = dtl.validate_periods(periods)

390 if freq is None and any(x is None for x in [periods, start, end]):

391 raise ValueError("Must provide freq argument if no data is supplied")

392

393 if com.count_not_none(start, end, periods, freq) != 3:

394 raise ValueError(

395 "Of the four parameters: start, end, periods, "

396 "and freq, exactly three must be specified"

397 )

398 freq = to_offset(freq)

399

400 if start is not None:

401 start = Timestamp(start)

402

403 if end is not None:

404 end = Timestamp(end)

405

406 if start is NaT or end is NaT:

407 raise ValueError("Neither `start` nor `end` can be NaT")

408

409 if unit is not None:

410 if unit not in ["s", "ms", "us", "ns"]:

411 raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")

412 else:

413 unit = "ns"

414

415 if start is not None and unit is not None:

416 start = start.as_unit(unit, round_ok=False)

417 if end is not None and unit is not None:

418 end = end.as_unit(unit, round_ok=False)

419

420 left_inclusive, right_inclusive = validate_inclusive(inclusive)

421 start, end = _maybe_normalize_endpoints(start, end, normalize)

422 tz = _infer_tz_from_endpoints(start, end, tz)

423

424 if tz is not None:

425 # Localize the start and end arguments

426 start_tz = None if start is None else start.tz

427 end_tz = None if end is None else end.tz

428 start = _maybe_localize_point(

429 start, start_tz, start, freq, tz, ambiguous, nonexistent

430 )

431 end = _maybe_localize_point(

432 end, end_tz, end, freq, tz, ambiguous, nonexistent

433 )

434

435 if freq is not None:

436 # We break Day arithmetic (fixed 24 hour) here and opt for

437 # Day to mean calendar day (23/24/25 hour). Therefore, strip

438 # tz info from start and day to avoid DST arithmetic

439 if isinstance(freq, Day):

440 if start is not None:

441 start = start.tz_localize(None)

442 if end is not None:

443 end = end.tz_localize(None)

444

445 if isinstance(freq, Tick):

446 i8values = generate_regular_range(start, end, periods, freq, unit=unit)

447 else:

448 xdr = _generate_range(

449 start=start, end=end, periods=periods, offset=freq, unit=unit

450 )

451 i8values = np.array([x._value for x in xdr], dtype=np.int64)

452

453 endpoint_tz = start.tz if start is not None else end.tz

454

455 if tz is not None and endpoint_tz is None:

456 if not timezones.is_utc(tz):

457 # short-circuit tz_localize_to_utc which would make

458 # an unnecessary copy with UTC but be a no-op.

459 creso = abbrev_to_npy_unit(unit)

460 i8values = tzconversion.tz_localize_to_utc(

461 i8values,

462 tz,

463 ambiguous=ambiguous,

464 nonexistent=nonexistent,

465 creso=creso,

466 )

467

468 # i8values is localized datetime64 array -> have to convert

469 # start/end as well to compare

470 if start is not None:

471 start = start.tz_localize(tz, ambiguous, nonexistent)

472 if end is not None:

473 end = end.tz_localize(tz, ambiguous, nonexistent)

474 else:

475 # Create a linearly spaced date_range in local time

476 # Nanosecond-granularity timestamps aren't always correctly

477 # representable with doubles, so we limit the range that we

478 # pass to np.linspace as much as possible

479 i8values = (

480 np.linspace(0, end._value - start._value, periods, dtype="int64")

481 + start._value

482 )

483 if i8values.dtype != "i8":

484 # 2022-01-09 I (brock) am not sure if it is possible for this

485 # to overflow and cast to e.g. f8, but if it does we need to cast

486 i8values = i8values.astype("i8")

487

488 if start == end:

489 if not left_inclusive and not right_inclusive:

490 i8values = i8values[1:-1]

491 else:

492 start_i8 = Timestamp(start)._value

493 end_i8 = Timestamp(end)._value

494 if not left_inclusive or not right_inclusive:

495 if not left_inclusive and len(i8values) and i8values[0] == start_i8:

496 i8values = i8values[1:]

497 if not right_inclusive and len(i8values) and i8values[-1] == end_i8:

498 i8values = i8values[:-1]

499

500 dt64_values = i8values.view(f"datetime64[{unit}]")

501 dtype = tz_to_dtype(tz, unit=unit)

502 return cls._simple_new(dt64_values, freq=freq, dtype=dtype)

503

504 # -----------------------------------------------------------------

505 # DatetimeLike Interface

506

507 def _unbox_scalar(self, value) -> np.datetime64:

508 if not isinstance(value, self._scalar_type) and value is not NaT:

509 raise ValueError("'value' should be a Timestamp.")

510 self._check_compatible_with(value)

511 if value is NaT:

512 return np.datetime64(value._value, self.unit)

513 else:

514 return value.as_unit(self.unit).asm8

515

516 def _scalar_from_string(self, value) -> Timestamp | NaTType:

517 return Timestamp(value, tz=self.tz)

518

519 def _check_compatible_with(self, other) -> None:

520 if other is NaT:

521 return

522 self._assert_tzawareness_compat(other)

523

524 # -----------------------------------------------------------------

525 # Descriptive Properties

526

527 def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:

528 # GH#42228

529 value = x.view("i8")

530 ts = Timestamp._from_value_and_reso(value, reso=self._creso, tz=self.tz)

531 return ts

532

533 @property

534 # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype"

535 # incompatible with return type "ExtensionDtype" in supertype

536 # "ExtensionArray"

537 def dtype(self) -> np.dtype | DatetimeTZDtype: # type: ignore[override]

538 """

539 The dtype for the DatetimeArray.

540

541 .. warning::

542

543 A future version of pandas will change dtype to never be a

544 ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will

545 always be an instance of an ``ExtensionDtype`` subclass.

546

547 Returns

548 -------

549 numpy.dtype or DatetimeTZDtype

550 If the values are tz-naive, then ``np.dtype('datetime64[ns]')``

551 is returned.

552

553 If the values are tz-aware, then the ``DatetimeTZDtype``

554 is returned.

555 """

556 return self._dtype

557

558 @property

559 def tz(self) -> tzinfo | None:

560 """

561 Return the timezone.

562

563 Returns

564 -------

565 datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None

566 Returns None when the array is tz-naive.

567 """

568 # GH 18595

569 return getattr(self.dtype, "tz", None)

570

571 @tz.setter

572 def tz(self, value):

573 # GH 3746: Prevent localizing or converting the index by setting tz

574 raise AttributeError(

575 "Cannot directly set timezone. Use tz_localize() "

576 "or tz_convert() as appropriate"

577 )

578

579 @property

580 def tzinfo(self) -> tzinfo | None:

581 """

582 Alias for tz attribute

583 """

584 return self.tz

585

586 @property # NB: override with cache_readonly in immutable subclasses

587 def is_normalized(self) -> bool:

588 """

589 Returns True if all of the dates are at midnight ("no time")

590 """

591 return is_date_array_normalized(self.asi8, self.tz, reso=self._creso)

592

593 @property # NB: override with cache_readonly in immutable subclasses

594 def _resolution_obj(self) -> Resolution:

595 return get_resolution(self.asi8, self.tz, reso=self._creso)

596

597 # ----------------------------------------------------------------

598 # Array-Like / EA-Interface Methods

599

600 def __array__(self, dtype=None) -> np.ndarray:

601 if dtype is None and self.tz:

602 # The default for tz-aware is object, to preserve tz info

603 dtype = object

604

605 return super().__array__(dtype=dtype)

606

607 def __iter__(self) -> Iterator:

608 """

609 Return an iterator over the boxed values

610

611 Yields

612 ------

613 tstamp : Timestamp

614 """

615 if self.ndim > 1:

616 for i in range(len(self)):

617 yield self[i]

618 else:

619 # convert in chunks of 10k for efficiency

620 data = self.asi8

621 length = len(self)

622 chunksize = 10000

623 chunks = (length // chunksize) + 1

624

625 for i in range(chunks):

626 start_i = i * chunksize

627 end_i = min((i + 1) * chunksize, length)

628 converted = ints_to_pydatetime(

629 data[start_i:end_i],

630 tz=self.tz,

631 box="timestamp",

632 reso=self._creso,

633 )

634 yield from converted

635

636 def astype(self, dtype, copy: bool = True):

637 # We handle

638 # --> datetime

639 # --> period

640 # DatetimeLikeArrayMixin Super handles the rest.

641 dtype = pandas_dtype(dtype)

642

643 if is_dtype_equal(dtype, self.dtype):

644 if copy:

645 return self.copy()

646 return self

647

648 elif isinstance(dtype, ExtensionDtype):

649 if not isinstance(dtype, DatetimeTZDtype):

650 # e.g. Sparse[datetime64[ns]]

651 return super().astype(dtype, copy=copy)

652 elif self.tz is None:

653 # pre-2.0 this did self.tz_localize(dtype.tz), which did not match

654 # the Series behavior which did

655 # values.tz_localize("UTC").tz_convert(dtype.tz)

656 raise TypeError(

657 "Cannot use .astype to convert from timezone-naive dtype to "

658 "timezone-aware dtype. Use obj.tz_localize instead or "

659 "series.dt.tz_localize instead"

660 )

661 else:

662 # tzaware unit conversion e.g. datetime64[s, UTC]

663 np_dtype = np.dtype(dtype.str)

664 res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)

665 return type(self)._simple_new(res_values, dtype=dtype, freq=self.freq)

666

667 elif (

668 self.tz is None

669 and is_datetime64_dtype(dtype)

670 and not is_unitless(dtype)

671 and is_supported_unit(get_unit_from_dtype(dtype))

672 ):

673 # unit conversion e.g. datetime64[s]

674 res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)

675 return type(self)._simple_new(res_values, dtype=res_values.dtype)

676 # TODO: preserve freq?

677

678 elif self.tz is not None and is_datetime64_dtype(dtype):

679 # pre-2.0 behavior for DTA/DTI was

680 # values.tz_convert("UTC").tz_localize(None), which did not match

681 # the Series behavior

682 raise TypeError(

683 "Cannot use .astype to convert from timezone-aware dtype to "

684 "timezone-naive dtype. Use obj.tz_localize(None) or "

685 "obj.tz_convert('UTC').tz_localize(None) instead."

686 )

687

688 elif (

689 self.tz is None

690 and is_datetime64_dtype(dtype)

691 and dtype != self.dtype

692 and is_unitless(dtype)

693 ):

694 raise TypeError(

695 "Casting to unit-less dtype 'datetime64' is not supported. "

696 "Pass e.g. 'datetime64[ns]' instead."

697 )

698

699 elif is_period_dtype(dtype):

700 return self.to_period(freq=dtype.freq)

701 return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)

702

703 # -----------------------------------------------------------------

704 # Rendering Methods

705

706 def _format_native_types(

707 self, *, na_rep: str | float = "NaT", date_format=None, **kwargs

708 ) -> npt.NDArray[np.object_]:

709 from pandas.io.formats.format import get_format_datetime64_from_values

710

711 fmt = get_format_datetime64_from_values(self, date_format)

712

713 return tslib.format_array_from_datetime(

714 self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._creso

715 )

716

717 # -----------------------------------------------------------------

718 # Comparison Methods

719

720 def _has_same_tz(self, other) -> bool:

721 # vzone shouldn't be None if value is non-datetime like

722 if isinstance(other, np.datetime64):

723 # convert to Timestamp as np.datetime64 doesn't have tz attr

724 other = Timestamp(other)

725

726 if not hasattr(other, "tzinfo"):

727 return False

728 other_tz = other.tzinfo

729 return timezones.tz_compare(self.tzinfo, other_tz)

730

731 def _assert_tzawareness_compat(self, other) -> None:

732 # adapted from _Timestamp._assert_tzawareness_compat

733 other_tz = getattr(other, "tzinfo", None)

734 other_dtype = getattr(other, "dtype", None)

735

736 if is_datetime64tz_dtype(other_dtype):

737 # Get tzinfo from Series dtype

738 other_tz = other.dtype.tz

739 if other is NaT:

740 # pd.NaT quacks both aware and naive

741 pass

742 elif self.tz is None:

743 if other_tz is not None:

744 raise TypeError(

745 "Cannot compare tz-naive and tz-aware datetime-like objects."

746 )

747 elif other_tz is None:

748 raise TypeError(

749 "Cannot compare tz-naive and tz-aware datetime-like objects"

750 )

751

752 # -----------------------------------------------------------------

753 # Arithmetic Methods

754

755 def _add_offset(self, offset) -> DatetimeArray:

756 assert not isinstance(offset, Tick)

757

758 if self.tz is not None:

759 values = self.tz_localize(None)

760 else:

761 values = self

762

763 try:

764 result = offset._apply_array(values).view(values.dtype)

765 except NotImplementedError:

766 warnings.warn(

767 "Non-vectorized DateOffset being applied to Series or DatetimeIndex.",

768 PerformanceWarning,

769 stacklevel=find_stack_level(),

770 )

771 result = self.astype("O") + offset

772 result = type(self)._from_sequence(result).as_unit(self.unit)

773 if not len(self):

774 # GH#30336 _from_sequence won't be able to infer self.tz

775 return result.tz_localize(self.tz)

776

777 else:

778 result = DatetimeArray._simple_new(result, dtype=result.dtype)

779 if self.tz is not None:

780 result = result.tz_localize(self.tz)

781

782 return result

783

784 # -----------------------------------------------------------------

785 # Timezone Conversion and Localization Methods

786

787 def _local_timestamps(self) -> npt.NDArray[np.int64]:

788 """

789 Convert to an i8 (unix-like nanosecond timestamp) representation

790 while keeping the local timezone and not using UTC.

791 This is used to calculate time-of-day information as if the timestamps

792 were timezone-naive.

793 """

794 if self.tz is None or timezones.is_utc(self.tz):

795 # Avoid the copy that would be made in tzconversion

796 return self.asi8

797 return tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)

798

799 def tz_convert(self, tz) -> DatetimeArray:

800 """

801 Convert tz-aware Datetime Array/Index from one time zone to another.

802

803 Parameters

804 ----------

805 tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None

806 Time zone for time. Corresponding timestamps would be converted

807 to this time zone of the Datetime Array/Index. A `tz` of None will

808 convert to UTC and remove the timezone information.

809

810 Returns

811 -------

812 Array or Index

813

814 Raises

815 ------

816 TypeError

817 If Datetime Array/Index is tz-naive.

818

819 See Also

820 --------

821 DatetimeIndex.tz : A timezone that has a variable offset from UTC.

822 DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a

823 given time zone, or remove timezone from a tz-aware DatetimeIndex.

824

825 Examples

826 --------

827 With the `tz` parameter, we can change the DatetimeIndex

828 to other time zones:

829

830 >>> dti = pd.date_range(start='2014-08-01 09:00',

831 ... freq='H', periods=3, tz='Europe/Berlin')

832

833 >>> dti

834 DatetimeIndex(['2014-08-01 09:00:00+02:00',

835 '2014-08-01 10:00:00+02:00',

836 '2014-08-01 11:00:00+02:00'],

837 dtype='datetime64[ns, Europe/Berlin]', freq='H')

838

839 >>> dti.tz_convert('US/Central')

840 DatetimeIndex(['2014-08-01 02:00:00-05:00',

841 '2014-08-01 03:00:00-05:00',

842 '2014-08-01 04:00:00-05:00'],

843 dtype='datetime64[ns, US/Central]', freq='H')

844

845 With the ``tz=None``, we can remove the timezone (after converting

846 to UTC if necessary):

847

848 >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H',

849 ... periods=3, tz='Europe/Berlin')

850

851 >>> dti

852 DatetimeIndex(['2014-08-01 09:00:00+02:00',

853 '2014-08-01 10:00:00+02:00',

854 '2014-08-01 11:00:00+02:00'],

855 dtype='datetime64[ns, Europe/Berlin]', freq='H')

856

857 >>> dti.tz_convert(None)

858 DatetimeIndex(['2014-08-01 07:00:00',

859 '2014-08-01 08:00:00',

860 '2014-08-01 09:00:00'],

861 dtype='datetime64[ns]', freq='H')

862 """

863 tz = timezones.maybe_get_tz(tz)

864

865 if self.tz is None:

866 # tz naive, use tz_localize

867 raise TypeError(

868 "Cannot convert tz-naive timestamps, use tz_localize to localize"

869 )

870

871 # No conversion since timestamps are all UTC to begin with

872 dtype = tz_to_dtype(tz, unit=self.unit)

873 return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)

874

875 @dtl.ravel_compat

876 def tz_localize(

877 self,

878 tz,

879 ambiguous: TimeAmbiguous = "raise",

880 nonexistent: TimeNonexistent = "raise",

881 ) -> DatetimeArray:

882 """

883 Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index.

884

885 This method takes a time zone (tz) naive Datetime Array/Index object

886 and makes this time zone aware. It does not move the time to another

887 time zone.

888

889 This method can also be used to do the inverse -- to create a time

890 zone unaware object from an aware object. To that end, pass `tz=None`.

891

892 Parameters

893 ----------

894 tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None

895 Time zone to convert timestamps to. Passing ``None`` will

896 remove the time zone information preserving local time.

897 ambiguous : 'infer', 'NaT', bool array, default 'raise'

898 When clocks moved backward due to DST, ambiguous times may arise.

899 For example in Central European Time (UTC+01), when going from

900 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at

901 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the

902 `ambiguous` parameter dictates how ambiguous times should be

903 handled.

904

905 - 'infer' will attempt to infer fall dst-transition hours based on

906 order

907 - bool-ndarray where True signifies a DST time, False signifies a

908 non-DST time (note that this flag is only applicable for

909 ambiguous times)

910 - 'NaT' will return NaT where there are ambiguous times

911 - 'raise' will raise an AmbiguousTimeError if there are ambiguous

912 times.

913

914 nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \

915default 'raise'

916 A nonexistent time does not exist in a particular timezone

917 where clocks moved forward due to DST.

918

919 - 'shift_forward' will shift the nonexistent time forward to the

920 closest existing time

921 - 'shift_backward' will shift the nonexistent time backward to the

922 closest existing time

923 - 'NaT' will return NaT where there are nonexistent times

924 - timedelta objects will shift nonexistent times by the timedelta

925 - 'raise' will raise an NonExistentTimeError if there are

926 nonexistent times.

927

928 Returns

929 -------

930 Same type as self

931 Array/Index converted to the specified time zone.

932

933 Raises

934 ------

935 TypeError

936 If the Datetime Array/Index is tz-aware and tz is not None.

937

938 See Also

939 --------

940 DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from

941 one time zone to another.

942

943 Examples

944 --------

945 >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3)

946 >>> tz_naive

947 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',

948 '2018-03-03 09:00:00'],

949 dtype='datetime64[ns]', freq='D')

950

951 Localize DatetimeIndex in US/Eastern time zone:

952

953 >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern')

954 >>> tz_aware

955 DatetimeIndex(['2018-03-01 09:00:00-05:00',

956 '2018-03-02 09:00:00-05:00',

957 '2018-03-03 09:00:00-05:00'],

958 dtype='datetime64[ns, US/Eastern]', freq=None)

959

960 With the ``tz=None``, we can remove the time zone information

961 while keeping the local time (not converted to UTC):

962

963 >>> tz_aware.tz_localize(None)

964 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',

965 '2018-03-03 09:00:00'],

966 dtype='datetime64[ns]', freq=None)

967

968 Be careful with DST changes. When there is sequential data, pandas can

969 infer the DST time:

970

971 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00',

972 ... '2018-10-28 02:00:00',

973 ... '2018-10-28 02:30:00',

974 ... '2018-10-28 02:00:00',

975 ... '2018-10-28 02:30:00',

976 ... '2018-10-28 03:00:00',

977 ... '2018-10-28 03:30:00']))

978 >>> s.dt.tz_localize('CET', ambiguous='infer')

979 0 2018-10-28 01:30:00+02:00

980 1 2018-10-28 02:00:00+02:00

981 2 2018-10-28 02:30:00+02:00

982 3 2018-10-28 02:00:00+01:00

983 4 2018-10-28 02:30:00+01:00

984 5 2018-10-28 03:00:00+01:00

985 6 2018-10-28 03:30:00+01:00

986 dtype: datetime64[ns, CET]

987

988 In some cases, inferring the DST is impossible. In such cases, you can

989 pass an ndarray to the ambiguous parameter to set the DST explicitly

990

991 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00',

992 ... '2018-10-28 02:36:00',

993 ... '2018-10-28 03:46:00']))

994 >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False]))

995 0 2018-10-28 01:20:00+02:00

996 1 2018-10-28 02:36:00+02:00

997 2 2018-10-28 03:46:00+01:00

998 dtype: datetime64[ns, CET]

999

1000 If the DST transition causes nonexistent times, you can shift these

1001 dates forward or backwards with a timedelta object or `'shift_forward'`

1002 or `'shift_backwards'`.

1003

1004 >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00',

1005 ... '2015-03-29 03:30:00']))

1006 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')

1007 0 2015-03-29 03:00:00+02:00

1008 1 2015-03-29 03:30:00+02:00

1009 dtype: datetime64[ns, Europe/Warsaw]

1010

1011 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward')

1012 0 2015-03-29 01:59:59.999999999+01:00

1013 1 2015-03-29 03:30:00+02:00

1014 dtype: datetime64[ns, Europe/Warsaw]

1015

1016 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))

1017 0 2015-03-29 03:30:00+02:00

1018 1 2015-03-29 03:30:00+02:00

1019 dtype: datetime64[ns, Europe/Warsaw]

1020 """

1021 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")

1022 if nonexistent not in nonexistent_options and not isinstance(

1023 nonexistent, timedelta

1024 ):

1025 raise ValueError(

1026 "The nonexistent argument must be one of 'raise', "

1027 "'NaT', 'shift_forward', 'shift_backward' or "

1028 "a timedelta object"

1029 )

1030

1031 if self.tz is not None:

1032 if tz is None:

1033 new_dates = tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)

1034 else:

1035 raise TypeError("Already tz-aware, use tz_convert to convert.")

1036 else:

1037 tz = timezones.maybe_get_tz(tz)

1038 # Convert to UTC

1039

1040 new_dates = tzconversion.tz_localize_to_utc(

1041 self.asi8,

1042 tz,

1043 ambiguous=ambiguous,

1044 nonexistent=nonexistent,

1045 creso=self._creso,

1046 )

1047 new_dates = new_dates.view(f"M8[{self.unit}]")

1048 dtype = tz_to_dtype(tz, unit=self.unit)

1049

1050 freq = None

1051 if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])):

1052 # we can preserve freq

1053 # TODO: Also for fixed-offsets

1054 freq = self.freq

1055 elif tz is None and self.tz is None:

1056 # no-op

1057 freq = self.freq

1058 return self._simple_new(new_dates, dtype=dtype, freq=freq)

1059

1060 # ----------------------------------------------------------------

1061 # Conversion Methods - Vectorized analogues of Timestamp methods

1062

1063 def to_pydatetime(self) -> npt.NDArray[np.object_]:

1064 """

1065 Return an ndarray of datetime.datetime objects.

1066

1067 Returns

1068 -------

1069 numpy.ndarray

1070 """

1071 return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._creso)

1072

1073 def normalize(self) -> DatetimeArray:

1074 """

1075 Convert times to midnight.

1076

1077 The time component of the date-time is converted to midnight i.e.

1078 00:00:00. This is useful in cases, when the time does not matter.

1079 Length is unaltered. The timezones are unaffected.

1080

1081 This method is available on Series with datetime values under

1082 the ``.dt`` accessor, and directly on Datetime Array/Index.

1083

1084 Returns

1085 -------

1086 DatetimeArray, DatetimeIndex or Series

1087 The same type as the original data. Series will have the same

1088 name and index. DatetimeIndex will have the same name.

1089

1090 See Also

1091 --------

1092 floor : Floor the datetimes to the specified freq.

1093 ceil : Ceil the datetimes to the specified freq.

1094 round : Round the datetimes to the specified freq.

1095

1096 Examples

1097 --------

1098 >>> idx = pd.date_range(start='2014-08-01 10:00', freq='H',

1099 ... periods=3, tz='Asia/Calcutta')

1100 >>> idx

1101 DatetimeIndex(['2014-08-01 10:00:00+05:30',

1102 '2014-08-01 11:00:00+05:30',

1103 '2014-08-01 12:00:00+05:30'],

1104 dtype='datetime64[ns, Asia/Calcutta]', freq='H')

1105 >>> idx.normalize()

1106 DatetimeIndex(['2014-08-01 00:00:00+05:30',

1107 '2014-08-01 00:00:00+05:30',

1108 '2014-08-01 00:00:00+05:30'],

1109 dtype='datetime64[ns, Asia/Calcutta]', freq=None)

1110 """

1111 new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._creso)

1112 dt64_values = new_values.view(self._ndarray.dtype)

1113

1114 dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype)

1115 dta = dta._with_freq("infer")

1116 if self.tz is not None:

1117 dta = dta.tz_localize(self.tz)

1118 return dta

1119

1120 def to_period(self, freq=None) -> PeriodArray:

1121 """

1122 Cast to PeriodArray/Index at a particular frequency.

1123

1124 Converts DatetimeArray/Index to PeriodArray/Index.

1125

1126 Parameters

1127 ----------

1128 freq : str or Offset, optional

1129 One of pandas' :ref:`offset strings <timeseries.offset_aliases>`

1130 or an Offset object. Will be inferred by default.

1131

1132 Returns

1133 -------

1134 PeriodArray/Index

1135

1136 Raises

1137 ------

1138 ValueError

1139 When converting a DatetimeArray/Index with non-regular values,

1140 so that a frequency cannot be inferred.

1141

1142 See Also

1143 --------

1144 PeriodIndex: Immutable ndarray holding ordinal values.

1145 DatetimeIndex.to_pydatetime: Return DatetimeIndex as object.

1146

1147 Examples

1148 --------

1149 >>> df = pd.DataFrame({"y": [1, 2, 3]},

1150 ... index=pd.to_datetime(["2000-03-31 00:00:00",

1151 ... "2000-05-31 00:00:00",

1152 ... "2000-08-31 00:00:00"]))

1153 >>> df.index.to_period("M")

1154 PeriodIndex(['2000-03', '2000-05', '2000-08'],

1155 dtype='period[M]')

1156

1157 Infer the daily frequency

1158

1159 >>> idx = pd.date_range("2017-01-01", periods=2)

1160 >>> idx.to_period()

1161 PeriodIndex(['2017-01-01', '2017-01-02'],

1162 dtype='period[D]')

1163 """

1164 from pandas.core.arrays import PeriodArray

1165

1166 if self.tz is not None:

1167 warnings.warn(

1168 "Converting to PeriodArray/Index representation "

1169 "will drop timezone information.",

1170 UserWarning,

1171 stacklevel=find_stack_level(),

1172 )

1173

1174 if freq is None:

1175 freq = self.freqstr or self.inferred_freq

1176

1177 if freq is None:

1178 raise ValueError(

1179 "You must pass a freq argument as current index has none."

1180 )

1181

1182 res = get_period_alias(freq)

1183

1184 # https://github.com/pandas-dev/pandas/issues/33358

1185 if res is None:

1186 res = freq

1187

1188 freq = res

1189

1190 return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz)

1191

1192 # -----------------------------------------------------------------

1193 # Properties - Vectorized Timestamp Properties/Methods

1194

1195 def month_name(self, locale=None) -> npt.NDArray[np.object_]:

1196 """

1197 Return the month names with specified locale.

1198

1199 Parameters

1200 ----------

1201 locale : str, optional

1202 Locale determining the language in which to return the month name.

1203 Default is English locale (``'en_US.utf8'``). Use the command

1204 ``locale -a`` on your terminal on Unix systems to find your locale

1205 language code.

1206

1207 Returns

1208 -------

1209 Series or Index

1210 Series or Index of month names.

1211

1212 Examples

1213 --------

1214 >>> s = pd.Series(pd.date_range(start='2018-01', freq='M', periods=3))

1215 >>> s

1216 0 2018-01-31

1217 1 2018-02-28

1218 2 2018-03-31

1219 dtype: datetime64[ns]

1220 >>> s.dt.month_name()

1221 0 January

1222 1 February

1223 2 March

1224 dtype: object

1225

1226 >>> idx = pd.date_range(start='2018-01', freq='M', periods=3)

1227 >>> idx

1228 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],

1229 dtype='datetime64[ns]', freq='M')

1230 >>> idx.month_name()

1231 Index(['January', 'February', 'March'], dtype='object')

1232

1233 Using the ``locale`` parameter you can set a different locale language,

1234 for example: ``idx.month_name(locale='pt_BR.utf8')`` will return month

1235 names in Brazilian Portuguese language.

1236

1237 >>> idx = pd.date_range(start='2018-01', freq='M', periods=3)

1238 >>> idx

1239 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],

1240 dtype='datetime64[ns]', freq='M')

1241 >>> idx.month_name(locale='pt_BR.utf8') # doctest: +SKIP

1242 Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object')

1243 """

1244 values = self._local_timestamps()

1245

1246 result = fields.get_date_name_field(

1247 values, "month_name", locale=locale, reso=self._creso

1248 )

1249 result = self._maybe_mask_results(result, fill_value=None)

1250 return result

1251

1252 def day_name(self, locale=None) -> npt.NDArray[np.object_]:

1253 """

1254 Return the day names with specified locale.

1255

1256 Parameters

1257 ----------

1258 locale : str, optional

1259 Locale determining the language in which to return the day name.

1260 Default is English locale (``'en_US.utf8'``). Use the command

1261 ``locale -a`` on your terminal on Unix systems to find your locale

1262 language code.

1263

1264 Returns

1265 -------

1266 Series or Index

1267 Series or Index of day names.

1268

1269 Examples

1270 --------

1271 >>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3))

1272 >>> s

1273 0 2018-01-01

1274 1 2018-01-02

1275 2 2018-01-03

1276 dtype: datetime64[ns]

1277 >>> s.dt.day_name()

1278 0 Monday

1279 1 Tuesday

1280 2 Wednesday

1281 dtype: object

1282

1283 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)

1284 >>> idx

1285 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],

1286 dtype='datetime64[ns]', freq='D')

1287 >>> idx.day_name()

1288 Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')

1289

1290 Using the ``locale`` parameter you can set a different locale language,

1291 for example: ``idx.day_name(locale='pt_BR.utf8')`` will return day

1292 names in Brazilian Portuguese language.

1293

1294 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)

1295 >>> idx

1296 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],

1297 dtype='datetime64[ns]', freq='D')

1298 >>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP

1299 Index(['Segunda', 'Terça', 'Quarta'], dtype='object')

1300 """

1301 values = self._local_timestamps()

1302

1303 result = fields.get_date_name_field(

1304 values, "day_name", locale=locale, reso=self._creso

1305 )

1306 result = self._maybe_mask_results(result, fill_value=None)

1307 return result

1308

1309 @property

1310 def time(self) -> npt.NDArray[np.object_]:

1311 """

1312 Returns numpy array of :class:`datetime.time` objects.

1313

1314 The time part of the Timestamps.

1315 """

1316 # If the Timestamps have a timezone that is not UTC,

1317 # convert them into their i8 representation while

1318 # keeping their timezone and not using UTC

1319 timestamps = self._local_timestamps()

1320

1321 return ints_to_pydatetime(timestamps, box="time", reso=self._creso)

1322

1323 @property

1324 def timetz(self) -> npt.NDArray[np.object_]:

1325 """

1326 Returns numpy array of :class:`datetime.time` objects with timezones.

1327

1328 The time part of the Timestamps.

1329 """

1330 return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._creso)

1331

1332 @property

1333 def date(self) -> npt.NDArray[np.object_]:

1334 """

1335 Returns numpy array of python :class:`datetime.date` objects.

1336

1337 Namely, the date part of Timestamps without time and

1338 timezone information.

1339 """

1340 # If the Timestamps have a timezone that is not UTC,

1341 # convert them into their i8 representation while

1342 # keeping their timezone and not using UTC

1343 timestamps = self._local_timestamps()

1344

1345 return ints_to_pydatetime(timestamps, box="date", reso=self._creso)

1346

1347 def isocalendar(self) -> DataFrame:

1348 """

1349 Calculate year, week, and day according to the ISO 8601 standard.

1350

1351 .. versionadded:: 1.1.0

1352

1353 Returns

1354 -------

1355 DataFrame

1356 With columns year, week and day.

1357

1358 See Also

1359 --------

1360 Timestamp.isocalendar : Function return a 3-tuple containing ISO year,

1361 week number, and weekday for the given Timestamp object.

1362 datetime.date.isocalendar : Return a named tuple object with

1363 three components: year, week and weekday.

1364

1365 Examples

1366 --------

1367 >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4)

1368 >>> idx.isocalendar()

1369 year week day

1370 2019-12-29 2019 52 7

1371 2019-12-30 2020 1 1

1372 2019-12-31 2020 1 2

1373 2020-01-01 2020 1 3

1374 >>> idx.isocalendar().week

1375 2019-12-29 52

1376 2019-12-30 1

1377 2019-12-31 1

1378 2020-01-01 1

1379 Freq: D, Name: week, dtype: UInt32

1380 """

1381 from pandas import DataFrame

1382

1383 values = self._local_timestamps()

1384 sarray = fields.build_isocalendar_sarray(values, reso=self._creso)

1385 iso_calendar_df = DataFrame(

1386 sarray, columns=["year", "week", "day"], dtype="UInt32"

1387 )

1388 if self._hasna:

1389 iso_calendar_df.iloc[self._isnan] = None

1390 return iso_calendar_df

1391

1392 year = _field_accessor(

1393 "year",

1394 "Y",

1395 """

1396 The year of the datetime.

1397

1398 Examples

1399 --------

1400 >>> datetime_series = pd.Series(

1401 ... pd.date_range("2000-01-01", periods=3, freq="Y")

1402 ... )

1403 >>> datetime_series

1404 0 2000-12-31

1405 1 2001-12-31

1406 2 2002-12-31

1407 dtype: datetime64[ns]

1408 >>> datetime_series.dt.year

1409 0 2000

1410 1 2001

1411 2 2002

1412 dtype: int32

1413 """,

1414 )

1415 month = _field_accessor(

1416 "month",

1417 "M",

1418 """

1419 The month as January=1, December=12.

1420

1421 Examples

1422 --------

1423 >>> datetime_series = pd.Series(

1424 ... pd.date_range("2000-01-01", periods=3, freq="M")

1425 ... )

1426 >>> datetime_series

1427 0 2000-01-31

1428 1 2000-02-29

1429 2 2000-03-31

1430 dtype: datetime64[ns]

1431 >>> datetime_series.dt.month

1432 0 1

1433 1 2

1434 2 3

1435 dtype: int32

1436 """,

1437 )

1438 day = _field_accessor(

1439 "day",

1440 "D",

1441 """

1442 The day of the datetime.

1443

1444 Examples

1445 --------

1446 >>> datetime_series = pd.Series(

1447 ... pd.date_range("2000-01-01", periods=3, freq="D")

1448 ... )

1449 >>> datetime_series

1450 0 2000-01-01

1451 1 2000-01-02

1452 2 2000-01-03

1453 dtype: datetime64[ns]

1454 >>> datetime_series.dt.day

1455 0 1

1456 1 2

1457 2 3

1458 dtype: int32

1459 """,

1460 )

1461 hour = _field_accessor(

1462 "hour",

1463 "h",

1464 """

1465 The hours of the datetime.

1466

1467 Examples

1468 --------

1469 >>> datetime_series = pd.Series(

1470 ... pd.date_range("2000-01-01", periods=3, freq="h")

1471 ... )

1472 >>> datetime_series

1473 0 2000-01-01 00:00:00

1474 1 2000-01-01 01:00:00

1475 2 2000-01-01 02:00:00

1476 dtype: datetime64[ns]

1477 >>> datetime_series.dt.hour

1478 0 0

1479 1 1

1480 2 2

1481 dtype: int32

1482 """,

1483 )

1484 minute = _field_accessor(

1485 "minute",

1486 "m",

1487 """

1488 The minutes of the datetime.

1489

1490 Examples

1491 --------

1492 >>> datetime_series = pd.Series(

1493 ... pd.date_range("2000-01-01", periods=3, freq="T")

1494 ... )

1495 >>> datetime_series

1496 0 2000-01-01 00:00:00

1497 1 2000-01-01 00:01:00

1498 2 2000-01-01 00:02:00

1499 dtype: datetime64[ns]

1500 >>> datetime_series.dt.minute

1501 0 0

1502 1 1

1503 2 2

1504 dtype: int32

1505 """,

1506 )

1507 second = _field_accessor(

1508 "second",

1509 "s",

1510 """

1511 The seconds of the datetime.

1512

1513 Examples

1514 --------

1515 >>> datetime_series = pd.Series(

1516 ... pd.date_range("2000-01-01", periods=3, freq="s")

1517 ... )

1518 >>> datetime_series

1519 0 2000-01-01 00:00:00

1520 1 2000-01-01 00:00:01

1521 2 2000-01-01 00:00:02

1522 dtype: datetime64[ns]

1523 >>> datetime_series.dt.second

1524 0 0

1525 1 1

1526 2 2

1527 dtype: int32

1528 """,

1529 )

1530 microsecond = _field_accessor(

1531 "microsecond",

1532 "us",

1533 """

1534 The microseconds of the datetime.

1535

1536 Examples

1537 --------

1538 >>> datetime_series = pd.Series(

1539 ... pd.date_range("2000-01-01", periods=3, freq="us")

1540 ... )

1541 >>> datetime_series

1542 0 2000-01-01 00:00:00.000000

1543 1 2000-01-01 00:00:00.000001

1544 2 2000-01-01 00:00:00.000002

1545 dtype: datetime64[ns]

1546 >>> datetime_series.dt.microsecond

1547 0 0

1548 1 1

1549 2 2

1550 dtype: int32

1551 """,

1552 )

1553 nanosecond = _field_accessor(

1554 "nanosecond",

1555 "ns",

1556 """

1557 The nanoseconds of the datetime.

1558

1559 Examples

1560 --------

1561 >>> datetime_series = pd.Series(

1562 ... pd.date_range("2000-01-01", periods=3, freq="ns")

1563 ... )

1564 >>> datetime_series

1565 0 2000-01-01 00:00:00.000000000

1566 1 2000-01-01 00:00:00.000000001

1567 2 2000-01-01 00:00:00.000000002

1568 dtype: datetime64[ns]

1569 >>> datetime_series.dt.nanosecond

1570 0 0

1571 1 1

1572 2 2

1573 dtype: int32

1574 """,

1575 )

1576 _dayofweek_doc = """

1577 The day of the week with Monday=0, Sunday=6.

1578

1579 Return the day of the week. It is assumed the week starts on

1580 Monday, which is denoted by 0 and ends on Sunday which is denoted

1581 by 6. This method is available on both Series with datetime

1582 values (using the `dt` accessor) or DatetimeIndex.

1583

1584 Returns

1585 -------

1586 Series or Index

1587 Containing integers indicating the day number.

1588

1589 See Also

1590 --------

1591 Series.dt.dayofweek : Alias.

1592 Series.dt.weekday : Alias.

1593 Series.dt.day_name : Returns the name of the day of the week.

1594

1595 Examples

1596 --------

1597 >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series()

1598 >>> s.dt.dayofweek

1599 2016-12-31 5

1600 2017-01-01 6

1601 2017-01-02 0

1602 2017-01-03 1

1603 2017-01-04 2

1604 2017-01-05 3

1605 2017-01-06 4

1606 2017-01-07 5

1607 2017-01-08 6

1608 Freq: D, dtype: int32

1609 """

1610 day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc)

1611 dayofweek = day_of_week

1612 weekday = day_of_week

1613

1614 day_of_year = _field_accessor(

1615 "dayofyear",

1616 "doy",

1617 """

1618 The ordinal day of the year.

1619 """,

1620 )

1621 dayofyear = day_of_year

1622 quarter = _field_accessor(

1623 "quarter",

1624 "q",

1625 """

1626 The quarter of the date.

1627 """,

1628 )

1629 days_in_month = _field_accessor(

1630 "days_in_month",

1631 "dim",

1632 """

1633 The number of days in the month.

1634 """,

1635 )

1636 daysinmonth = days_in_month

1637 _is_month_doc = """

1638 Indicates whether the date is the {first_or_last} day of the month.

1639

1640 Returns

1641 -------

1642 Series or array

1643 For Series, returns a Series with boolean values.

1644 For DatetimeIndex, returns a boolean array.

1645

1646 See Also

1647 --------

1648 is_month_start : Return a boolean indicating whether the date

1649 is the first day of the month.

1650 is_month_end : Return a boolean indicating whether the date

1651 is the last day of the month.

1652

1653 Examples

1654 --------

1655 This method is available on Series with datetime values under

1656 the ``.dt`` accessor, and directly on DatetimeIndex.

1657

1658 >>> s = pd.Series(pd.date_range("2018-02-27", periods=3))

1659 >>> s

1660 0 2018-02-27

1661 1 2018-02-28

1662 2 2018-03-01

1663 dtype: datetime64[ns]

1664 >>> s.dt.is_month_start

1665 0 False

1666 1 False

1667 2 True

1668 dtype: bool

1669 >>> s.dt.is_month_end

1670 0 False

1671 1 True

1672 2 False

1673 dtype: bool

1674

1675 >>> idx = pd.date_range("2018-02-27", periods=3)

1676 >>> idx.is_month_start

1677 array([False, False, True])

1678 >>> idx.is_month_end

1679 array([False, True, False])

1680 """

1681 is_month_start = _field_accessor(

1682 "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first")

1683 )

1684

1685 is_month_end = _field_accessor(

1686 "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last")

1687 )

1688

1689 is_quarter_start = _field_accessor(

1690 "is_quarter_start",

1691 "is_quarter_start",

1692 """

1693 Indicator for whether the date is the first day of a quarter.

1694

1695 Returns

1696 -------

1697 is_quarter_start : Series or DatetimeIndex

1698 The same type as the original data with boolean values. Series will

1699 have the same name and index. DatetimeIndex will have the same

1700 name.

1701

1702 See Also

1703 --------

1704 quarter : Return the quarter of the date.

1705 is_quarter_end : Similar property for indicating the quarter end.

1706

1707 Examples

1708 --------

1709 This method is available on Series with datetime values under

1710 the ``.dt`` accessor, and directly on DatetimeIndex.

1711

1712 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",

1713 ... periods=4)})

1714 >>> df.assign(quarter=df.dates.dt.quarter,

1715 ... is_quarter_start=df.dates.dt.is_quarter_start)

1716 dates quarter is_quarter_start

1717 0 2017-03-30 1 False

1718 1 2017-03-31 1 False

1719 2 2017-04-01 2 True

1720 3 2017-04-02 2 False

1721

1722 >>> idx = pd.date_range('2017-03-30', periods=4)

1723 >>> idx

1724 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],

1725 dtype='datetime64[ns]', freq='D')

1726

1727 >>> idx.is_quarter_start

1728 array([False, False, True, False])

1729 """,

1730 )

1731 is_quarter_end = _field_accessor(

1732 "is_quarter_end",

1733 "is_quarter_end",

1734 """

1735 Indicator for whether the date is the last day of a quarter.

1736

1737 Returns

1738 -------

1739 is_quarter_end : Series or DatetimeIndex

1740 The same type as the original data with boolean values. Series will

1741 have the same name and index. DatetimeIndex will have the same

1742 name.

1743

1744 See Also

1745 --------

1746 quarter : Return the quarter of the date.

1747 is_quarter_start : Similar property indicating the quarter start.

1748

1749 Examples

1750 --------

1751 This method is available on Series with datetime values under

1752 the ``.dt`` accessor, and directly on DatetimeIndex.

1753

1754 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",

1755 ... periods=4)})

1756 >>> df.assign(quarter=df.dates.dt.quarter,

1757 ... is_quarter_end=df.dates.dt.is_quarter_end)

1758 dates quarter is_quarter_end

1759 0 2017-03-30 1 False

1760 1 2017-03-31 1 True

1761 2 2017-04-01 2 False

1762 3 2017-04-02 2 False

1763

1764 >>> idx = pd.date_range('2017-03-30', periods=4)

1765 >>> idx

1766 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],

1767 dtype='datetime64[ns]', freq='D')

1768

1769 >>> idx.is_quarter_end

1770 array([False, True, False, False])

1771 """,

1772 )

1773 is_year_start = _field_accessor(

1774 "is_year_start",

1775 "is_year_start",

1776 """

1777 Indicate whether the date is the first day of a year.

1778

1779 Returns

1780 -------

1781 Series or DatetimeIndex

1782 The same type as the original data with boolean values. Series will

1783 have the same name and index. DatetimeIndex will have the same

1784 name.

1785

1786 See Also

1787 --------

1788 is_year_end : Similar property indicating the last day of the year.

1789

1790 Examples

1791 --------

1792 This method is available on Series with datetime values under

1793 the ``.dt`` accessor, and directly on DatetimeIndex.

1794

1795 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))

1796 >>> dates

1797 0 2017-12-30

1798 1 2017-12-31

1799 2 2018-01-01

1800 dtype: datetime64[ns]

1801

1802 >>> dates.dt.is_year_start

1803 0 False

1804 1 False

1805 2 True

1806 dtype: bool

1807

1808 >>> idx = pd.date_range("2017-12-30", periods=3)

1809 >>> idx

1810 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],

1811 dtype='datetime64[ns]', freq='D')

1812

1813 >>> idx.is_year_start

1814 array([False, False, True])

1815 """,

1816 )

1817 is_year_end = _field_accessor(

1818 "is_year_end",

1819 "is_year_end",

1820 """

1821 Indicate whether the date is the last day of the year.

1822

1823 Returns

1824 -------

1825 Series or DatetimeIndex

1826 The same type as the original data with boolean values. Series will

1827 have the same name and index. DatetimeIndex will have the same

1828 name.

1829

1830 See Also

1831 --------

1832 is_year_start : Similar property indicating the start of the year.

1833

1834 Examples

1835 --------

1836 This method is available on Series with datetime values under

1837 the ``.dt`` accessor, and directly on DatetimeIndex.

1838

1839 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))

1840 >>> dates

1841 0 2017-12-30

1842 1 2017-12-31

1843 2 2018-01-01

1844 dtype: datetime64[ns]

1845

1846 >>> dates.dt.is_year_end

1847 0 False

1848 1 True

1849 2 False

1850 dtype: bool

1851

1852 >>> idx = pd.date_range("2017-12-30", periods=3)

1853 >>> idx

1854 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],

1855 dtype='datetime64[ns]', freq='D')

1856

1857 >>> idx.is_year_end

1858 array([False, True, False])

1859 """,

1860 )

1861 is_leap_year = _field_accessor(

1862 "is_leap_year",

1863 "is_leap_year",

1864 """

1865 Boolean indicator if the date belongs to a leap year.

1866

1867 A leap year is a year, which has 366 days (instead of 365) including

1868 29th of February as an intercalary day.

1869 Leap years are years which are multiples of four with the exception

1870 of years divisible by 100 but not by 400.

1871

1872 Returns

1873 -------

1874 Series or ndarray

1875 Booleans indicating if dates belong to a leap year.

1876

1877 Examples

1878 --------

1879 This method is available on Series with datetime values under

1880 the ``.dt`` accessor, and directly on DatetimeIndex.

1881

1882 >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="Y")

1883 >>> idx

1884 DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],

1885 dtype='datetime64[ns]', freq='A-DEC')

1886 >>> idx.is_leap_year

1887 array([ True, False, False])

1888

1889 >>> dates_series = pd.Series(idx)

1890 >>> dates_series

1891 0 2012-12-31

1892 1 2013-12-31

1893 2 2014-12-31

1894 dtype: datetime64[ns]

1895 >>> dates_series.dt.is_leap_year

1896 0 True

1897 1 False

1898 2 False

1899 dtype: bool

1900 """,

1901 )

1902

1903 def to_julian_date(self) -> npt.NDArray[np.float64]:

1904 """

1905 Convert Datetime Array to float64 ndarray of Julian Dates.

1906 0 Julian date is noon January 1, 4713 BC.

1907 https://en.wikipedia.org/wiki/Julian_day

1908 """

1909

1910 # http://mysite.verizon.net/aesir_research/date/jdalg2.htm

1911 year = np.asarray(self.year)

1912 month = np.asarray(self.month)

1913 day = np.asarray(self.day)

1914 testarr = month < 3

1915 year[testarr] -= 1

1916 month[testarr] += 12

1917 return (

1918 day

1919 + np.fix((153 * month - 457) / 5)

1920 + 365 * year

1921 + np.floor(year / 4)

1922 - np.floor(year / 100)

1923 + np.floor(year / 400)

1924 + 1_721_118.5

1925 + (

1926 self.hour

1927 + self.minute / 60

1928 + self.second / 3600

1929 + self.microsecond / 3600 / 10**6

1930 + self.nanosecond / 3600 / 10**9

1931 )

1932 / 24

1933 )

1934

1935 # -----------------------------------------------------------------

1936 # Reductions

1937

1938 def std(

1939 self,

1940 axis=None,

1941 dtype=None,

1942 out=None,

1943 ddof: int = 1,

1944 keepdims: bool = False,

1945 skipna: bool = True,

1946 ):

1947 """

1948 Return sample standard deviation over requested axis.

1949

1950 Normalized by N-1 by default. This can be changed using the ddof argument

1951

1952 Parameters

1953 ----------

1954 axis : int optional, default None

1955 Axis for the function to be applied on.

1956 For `Series` this parameter is unused and defaults to `None`.

1957 ddof : int, default 1

1958 Degrees of Freedom. The divisor used in calculations is N - ddof,

1959 where N represents the number of elements.

1960 skipna : bool, default True

1961 Exclude NA/null values. If an entire row/column is NA, the result will be

1962 NA.

1963

1964 Returns

1965 -------

1966 Timedelta

1967 """

1968 # Because std is translation-invariant, we can get self.std

1969 # by calculating (self - Timestamp(0)).std, and we can do it

1970 # without creating a copy by using a view on self._ndarray

1971 from pandas.core.arrays import TimedeltaArray

1972

1973 # Find the td64 dtype with the same resolution as our dt64 dtype

1974 dtype_str = self._ndarray.dtype.name.replace("datetime64", "timedelta64")

1975 dtype = np.dtype(dtype_str)

1976

1977 tda = TimedeltaArray._simple_new(self._ndarray.view(dtype), dtype=dtype)

1978

1979 return tda.std(axis=axis, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna)

1980

1981

1982# -------------------------------------------------------------------

1983# Constructor Helpers

1984

1985

1986def _sequence_to_dt64ns(

1987 data,

1988 *,

1989 copy: bool = False,

1990 tz: tzinfo | None = None,

1991 dayfirst: bool = False,

1992 yearfirst: bool = False,

1993 ambiguous: TimeAmbiguous = "raise",

1994 out_unit: str | None = None,

1995):

1996 """

1997 Parameters

1998 ----------

1999 data : list-like

2000 copy : bool, default False

2001 tz : tzinfo or None, default None

2002 dayfirst : bool, default False

2003 yearfirst : bool, default False

2004 ambiguous : str, bool, or arraylike, default 'raise'

2005 See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.

2006 out_unit : str or None, default None

2007 Desired output resolution.

2008

2009 Returns

2010 -------

2011 result : numpy.ndarray

2012 The sequence converted to a numpy array with dtype ``datetime64[ns]``.

2013 tz : tzinfo or None

2014 Either the user-provided tzinfo or one inferred from the data.

2015 inferred_freq : Tick or None

2016 The inferred frequency of the sequence.

2017

2018 Raises

2019 ------

2020 TypeError : PeriodDType data is passed

2021 """

2022 inferred_freq = None

2023

2024 data, copy = dtl.ensure_arraylike_for_datetimelike(

2025 data, copy, cls_name="DatetimeArray"

2026 )

2027

2028 if isinstance(data, DatetimeArray):

2029 inferred_freq = data.freq

2030

2031 # By this point we are assured to have either a numpy array or Index

2032 data, copy = maybe_convert_dtype(data, copy, tz=tz)

2033 data_dtype = getattr(data, "dtype", None)

2034

2035 out_dtype = DT64NS_DTYPE

2036 if out_unit is not None:

2037 out_dtype = np.dtype(f"M8[{out_unit}]")

2038

2039 if (

2040 is_object_dtype(data_dtype)

2041 or is_string_dtype(data_dtype)

2042 or is_sparse(data_dtype)

2043 ):

2044 # TODO: We do not have tests specific to string-dtypes,

2045 # also complex or categorical or other extension

2046 copy = False

2047 if lib.infer_dtype(data, skipna=False) == "integer":

2048 data = data.astype(np.int64)

2049 elif tz is not None and ambiguous == "raise":

2050 # TODO: yearfirst/dayfirst/etc?

2051 obj_data = np.asarray(data, dtype=object)

2052 i8data = tslib.array_to_datetime_with_tz(obj_data, tz)

2053 return i8data.view(DT64NS_DTYPE), tz, None

2054 else:

2055 # data comes back here as either i8 to denote UTC timestamps

2056 # or M8[ns] to denote wall times

2057 data, inferred_tz = objects_to_datetime64ns(

2058 data,

2059 dayfirst=dayfirst,

2060 yearfirst=yearfirst,

2061 allow_object=False,

2062 )

2063 if tz and inferred_tz:

2064 # two timezones: convert to intended from base UTC repr

2065 assert data.dtype == "i8"

2066 # GH#42505

2067 # by convention, these are _already_ UTC, e.g

2068 return data.view(DT64NS_DTYPE), tz, None

2069

2070 elif inferred_tz:

2071 tz = inferred_tz

2072

2073 data_dtype = data.dtype

2074

2075 # `data` may have originally been a Categorical[datetime64[ns, tz]],

2076 # so we need to handle these types.

2077 if is_datetime64tz_dtype(data_dtype):

2078 # DatetimeArray -> ndarray

2079 tz = _maybe_infer_tz(tz, data.tz)

2080 result = data._ndarray

2081

2082 elif is_datetime64_dtype(data_dtype):

2083 # tz-naive DatetimeArray or ndarray[datetime64]

2084 data = getattr(data, "_ndarray", data)

2085 new_dtype = data.dtype

2086 data_unit = get_unit_from_dtype(new_dtype)

2087 if not is_supported_unit(data_unit):

2088 # Cast to the nearest supported unit, generally "s"

2089 new_reso = get_supported_reso(data_unit)

2090 new_unit = npy_unit_to_abbrev(new_reso)

2091 new_dtype = np.dtype(f"M8[{new_unit}]")

2092 data = astype_overflowsafe(data, dtype=new_dtype, copy=False)

2093 data_unit = get_unit_from_dtype(new_dtype)

2094 copy = False

2095

2096 if data.dtype.byteorder == ">":

2097 # TODO: better way to handle this? non-copying alternative?

2098 # without this, test_constructor_datetime64_bigendian fails

2099 data = data.astype(data.dtype.newbyteorder("<"))

2100 new_dtype = data.dtype

2101 copy = False

2102

2103 if tz is not None:

2104 # Convert tz-naive to UTC

2105 # TODO: if tz is UTC, are there situations where we *don't* want a

2106 # copy? tz_localize_to_utc always makes one.

2107 shape = data.shape

2108 if data.ndim > 1:

2109 data = data.ravel()

2110

2111 data = tzconversion.tz_localize_to_utc(

2112 data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit

2113 )

2114 data = data.view(new_dtype)

2115 data = data.reshape(shape)

2116

2117 assert data.dtype == new_dtype, data.dtype

2118 result = data

2119

2120 else:

2121 # must be integer dtype otherwise

2122 # assume this data are epoch timestamps

2123 if data.dtype != INT64_DTYPE:

2124 data = data.astype(np.int64, copy=False)

2125 result = data.view(out_dtype)

2126

2127 if copy:

2128 result = result.copy()

2129

2130 assert isinstance(result, np.ndarray), type(result)

2131 assert result.dtype.kind == "M"

2132 assert result.dtype != "M8"

2133 assert is_supported_unit(get_unit_from_dtype(result.dtype))

2134 return result, tz, inferred_freq

2135

2136

2137def objects_to_datetime64ns(

2138 data: np.ndarray,

2139 dayfirst,

2140 yearfirst,

2141 utc: bool = False,

2142 errors: DateTimeErrorChoices = "raise",

2143 allow_object: bool = False,

2144):

2145 """

2146 Convert data to array of timestamps.

2147

2148 Parameters

2149 ----------

2150 data : np.ndarray[object]

2151 dayfirst : bool

2152 yearfirst : bool

2153 utc : bool, default False

2154 Whether to convert/localize timestamps to UTC.

2155 errors : {'raise', 'ignore', 'coerce'}

2156 allow_object : bool

2157 Whether to return an object-dtype ndarray instead of raising if the

2158 data contains more than one timezone.

2159

2160 Returns

2161 -------

2162 result : ndarray

2163 np.int64 dtype if returned values represent UTC timestamps

2164 np.datetime64[ns] if returned values represent wall times

2165 object if mixed timezones

2166 inferred_tz : tzinfo or None

2167

2168 Raises

2169 ------

2170 ValueError : if data cannot be converted to datetimes

2171 """

2172 assert errors in ["raise", "ignore", "coerce"]

2173

2174 # if str-dtype, convert

2175 data = np.array(data, copy=False, dtype=np.object_)

2176

2177 result, tz_parsed = tslib.array_to_datetime(

2178 data,

2179 errors=errors,

2180 utc=utc,

2181 dayfirst=dayfirst,

2182 yearfirst=yearfirst,

2183 )

2184

2185 if tz_parsed is not None:

2186 # We can take a shortcut since the datetime64 numpy array

2187 # is in UTC

2188 # Return i8 values to denote unix timestamps

2189 return result.view("i8"), tz_parsed

2190 elif is_datetime64_dtype(result):

2191 # returning M8[ns] denotes wall-times; since tz is None

2192 # the distinction is a thin one

2193 return result, tz_parsed

2194 elif is_object_dtype(result):

2195 # GH#23675 when called via `pd.to_datetime`, returning an object-dtype

2196 # array is allowed. When called via `pd.DatetimeIndex`, we can

2197 # only accept datetime64 dtype, so raise TypeError if object-dtype

2198 # is returned, as that indicates the values can be recognized as

2199 # datetimes but they have conflicting timezones/awareness

2200 if allow_object:

2201 return result, tz_parsed

2202 raise TypeError(result)

2203 else: # pragma: no cover

2204 # GH#23675 this TypeError should never be hit, whereas the TypeError

2205 # in the object-dtype branch above is reachable.

2206 raise TypeError(result)

2207

2208

2209def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None):

2210 """

2211 Convert data based on dtype conventions, issuing

2212 errors where appropriate.

2213

2214 Parameters

2215 ----------

2216 data : np.ndarray or pd.Index

2217 copy : bool

2218 tz : tzinfo or None, default None

2219

2220 Returns

2221 -------

2222 data : np.ndarray or pd.Index

2223 copy : bool

2224

2225 Raises

2226 ------

2227 TypeError : PeriodDType data is passed

2228 """

2229 if not hasattr(data, "dtype"):

2230 # e.g. collections.deque

2231 return data, copy

2232

2233 if is_float_dtype(data.dtype):

2234 # pre-2.0 we treated these as wall-times, inconsistent with ints

2235 # GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes.

2236 # Note: data.astype(np.int64) fails ARM tests, see

2237 # https://github.com/pandas-dev/pandas/issues/49468.

2238 data = data.astype(DT64NS_DTYPE).view("i8")

2239 copy = False

2240

2241 elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype):

2242 # GH#29794 enforcing deprecation introduced in GH#23539

2243 raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]")

2244 elif is_period_dtype(data.dtype):

2245 # Note: without explicitly raising here, PeriodIndex

2246 # test_setops.test_join_does_not_recur fails

2247 raise TypeError(

2248 "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead"

2249 )

2250

2251 elif is_extension_array_dtype(data.dtype) and not is_datetime64tz_dtype(data.dtype):

2252 # TODO: We have no tests for these

2253 data = np.array(data, dtype=np.object_)

2254 copy = False

2255

2256 return data, copy

2257

2258

2259# -------------------------------------------------------------------

2260# Validation and Inference

2261

2262

2263def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None:

2264 """

2265 If a timezone is inferred from data, check that it is compatible with

2266 the user-provided timezone, if any.

2267

2268 Parameters

2269 ----------

2270 tz : tzinfo or None

2271 inferred_tz : tzinfo or None

2272

2273 Returns

2274 -------

2275 tz : tzinfo or None

2276

2277 Raises

2278 ------

2279 TypeError : if both timezones are present but do not match

2280 """

2281 if tz is None:

2282 tz = inferred_tz

2283 elif inferred_tz is None:

2284 pass

2285 elif not timezones.tz_compare(tz, inferred_tz):

2286 raise TypeError(

2287 f"data is already tz-aware {inferred_tz}, unable to "

2288 f"set specified tz: {tz}"

2289 )

2290 return tz

2291

2292

2293def _validate_dt64_dtype(dtype):

2294 """

2295 Check that a dtype, if passed, represents either a numpy datetime64[ns]

2296 dtype or a pandas DatetimeTZDtype.

2297

2298 Parameters

2299 ----------

2300 dtype : object

2301

2302 Returns

2303 -------

2304 dtype : None, numpy.dtype, or DatetimeTZDtype

2305

2306 Raises

2307 ------

2308 ValueError : invalid dtype

2309

2310 Notes

2311 -----

2312 Unlike _validate_tz_from_dtype, this does _not_ allow non-existent

2313 tz errors to go through

2314 """

2315 if dtype is not None:

2316 dtype = pandas_dtype(dtype)

2317 if is_dtype_equal(dtype, np.dtype("M8")):

2318 # no precision, disallowed GH#24806

2319 msg = (

2320 "Passing in 'datetime64' dtype with no precision is not allowed. "

2321 "Please pass in 'datetime64[ns]' instead."

2322 )

2323 raise ValueError(msg)

2324

2325 if (

2326 isinstance(dtype, np.dtype)

2327 and (dtype.kind != "M" or not is_supported_unit(get_unit_from_dtype(dtype)))

2328 ) or not isinstance(dtype, (np.dtype, DatetimeTZDtype)):

2329 raise ValueError(

2330 f"Unexpected value for 'dtype': '{dtype}'. "

2331 "Must be 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', "

2332 "'datetime64[ns]' or DatetimeTZDtype'."

2333 )

2334

2335 if getattr(dtype, "tz", None):

2336 # https://github.com/pandas-dev/pandas/issues/18595

2337 # Ensure that we have a standard timezone for pytz objects.

2338 # Without this, things like adding an array of timedeltas and

2339 # a tz-aware Timestamp (with a tz specific to its datetime) will

2340 # be incorrect(ish?) for the array as a whole

2341 dtype = cast(DatetimeTZDtype, dtype)

2342 dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz))

2343

2344 return dtype

2345

2346

2347def _validate_tz_from_dtype(

2348 dtype, tz: tzinfo | None, explicit_tz_none: bool = False

2349) -> tzinfo | None:

2350 """

2351 If the given dtype is a DatetimeTZDtype, extract the implied

2352 tzinfo object from it and check that it does not conflict with the given

2353 tz.

2354

2355 Parameters

2356 ----------

2357 dtype : dtype, str

2358 tz : None, tzinfo

2359 explicit_tz_none : bool, default False

2360 Whether tz=None was passed explicitly, as opposed to lib.no_default.

2361

2362 Returns

2363 -------

2364 tz : consensus tzinfo

2365

2366 Raises

2367 ------

2368 ValueError : on tzinfo mismatch

2369 """

2370 if dtype is not None:

2371 if isinstance(dtype, str):

2372 try:

2373 dtype = DatetimeTZDtype.construct_from_string(dtype)

2374 except TypeError:

2375 # Things like `datetime64[ns]`, which is OK for the

2376 # constructors, but also nonsense, which should be validated

2377 # but not by us. We *do* allow non-existent tz errors to

2378 # go through

2379 pass

2380 dtz = getattr(dtype, "tz", None)

2381 if dtz is not None:

2382 if tz is not None and not timezones.tz_compare(tz, dtz):

2383 raise ValueError("cannot supply both a tz and a dtype with a tz")

2384 if explicit_tz_none:

2385 raise ValueError("Cannot pass both a timezone-aware dtype and tz=None")

2386 tz = dtz

2387

2388 if tz is not None and is_datetime64_dtype(dtype):

2389 # We also need to check for the case where the user passed a

2390 # tz-naive dtype (i.e. datetime64[ns])

2391 if tz is not None and not timezones.tz_compare(tz, dtz):

2392 raise ValueError(

2393 "cannot supply both a tz and a "

2394 "timezone-naive dtype (i.e. datetime64[ns])"

2395 )

2396

2397 return tz

2398

2399

2400def _infer_tz_from_endpoints(

2401 start: Timestamp, end: Timestamp, tz: tzinfo | None

2402) -> tzinfo | None:

2403 """

2404 If a timezone is not explicitly given via `tz`, see if one can

2405 be inferred from the `start` and `end` endpoints. If more than one

2406 of these inputs provides a timezone, require that they all agree.

2407

2408 Parameters

2409 ----------

2410 start : Timestamp

2411 end : Timestamp

2412 tz : tzinfo or None

2413

2414 Returns

2415 -------

2416 tz : tzinfo or None

2417

2418 Raises

2419 ------

2420 TypeError : if start and end timezones do not agree

2421 """

2422 try:

2423 inferred_tz = timezones.infer_tzinfo(start, end)

2424 except AssertionError as err:

2425 # infer_tzinfo raises AssertionError if passed mismatched timezones

2426 raise TypeError(

2427 "Start and end cannot both be tz-aware with different timezones"

2428 ) from err

2429

2430 inferred_tz = timezones.maybe_get_tz(inferred_tz)

2431 tz = timezones.maybe_get_tz(tz)

2432

2433 if tz is not None and inferred_tz is not None:

2434 if not timezones.tz_compare(inferred_tz, tz):

2435 raise AssertionError("Inferred time zone not equal to passed time zone")

2436

2437 elif inferred_tz is not None:

2438 tz = inferred_tz

2439

2440 return tz

2441

2442

2443def _maybe_normalize_endpoints(

2444 start: Timestamp | None, end: Timestamp | None, normalize: bool

2445):

2446 if normalize:

2447 if start is not None:

2448 start = start.normalize()

2449

2450 if end is not None:

2451 end = end.normalize()

2452

2453 return start, end

2454

2455

2456def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexistent):

2457 """

2458 Localize a start or end Timestamp to the timezone of the corresponding

2459 start or end Timestamp

2460

2461 Parameters

2462 ----------

2463 ts : start or end Timestamp to potentially localize

2464 is_none : argument that should be None

2465 is_not_none : argument that should not be None

2466 freq : Tick, DateOffset, or None

2467 tz : str, timezone object or None

2468 ambiguous: str, localization behavior for ambiguous times

2469 nonexistent: str, localization behavior for nonexistent times

2470

2471 Returns

2472 -------

2473 ts : Timestamp

2474 """

2475 # Make sure start and end are timezone localized if:

2476 # 1) freq = a Timedelta-like frequency (Tick)

2477 # 2) freq = None i.e. generating a linspaced range

2478 if is_none is None and is_not_none is not None:

2479 # Note: We can't ambiguous='infer' a singular ambiguous time; however,

2480 # we have historically defaulted ambiguous=False

2481 ambiguous = ambiguous if ambiguous != "infer" else False

2482 localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None}

2483 if isinstance(freq, Tick) or freq is None:

2484 localize_args["tz"] = tz

2485 ts = ts.tz_localize(**localize_args)

2486 return ts

2487

2488

2489def _generate_range(

2490 start: Timestamp | None,

2491 end: Timestamp | None,

2492 periods: int | None,

2493 offset: BaseOffset,

2494 *,

2495 unit: str,

2496):

2497 """

2498 Generates a sequence of dates corresponding to the specified time

2499 offset. Similar to dateutil.rrule except uses pandas DateOffset

2500 objects to represent time increments.

2501

2502 Parameters

2503 ----------

2504 start : Timestamp or None

2505 end : Timestamp or None

2506 periods : int or None

2507 offset : DateOffset

2508 unit : str

2509

2510 Notes

2511 -----

2512 * This method is faster for generating weekdays than dateutil.rrule

2513 * At least two of (start, end, periods) must be specified.

2514 * If both start and end are specified, the returned dates will

2515 satisfy start <= date <= end.

2516

2517 Returns

2518 -------

2519 dates : generator object

2520 """

2521 offset = to_offset(offset)

2522

2523 # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";

2524 # expected "Union[integer[Any], float, str, date, datetime64]"

2525 start = Timestamp(start) # type: ignore[arg-type]

2526 if start is not NaT:

2527 start = start.as_unit(unit)

2528 else:

2529 start = None

2530

2531 # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";

2532 # expected "Union[integer[Any], float, str, date, datetime64]"

2533 end = Timestamp(end) # type: ignore[arg-type]

2534 if end is not NaT:

2535 end = end.as_unit(unit)

2536 else:

2537 end = None

2538

2539 if start and not offset.is_on_offset(start):

2540 # Incompatible types in assignment (expression has type "datetime",

2541 # variable has type "Optional[Timestamp]")

2542 start = offset.rollforward(start) # type: ignore[assignment]

2543

2544 elif end and not offset.is_on_offset(end):

2545 # Incompatible types in assignment (expression has type "datetime",

2546 # variable has type "Optional[Timestamp]")

2547 end = offset.rollback(end) # type: ignore[assignment]

2548

2549 # Unsupported operand types for < ("Timestamp" and "None")

2550 if periods is None and end < start and offset.n >= 0: # type: ignore[operator]

2551 end = None

2552 periods = 0

2553

2554 if end is None:

2555 # error: No overload variant of "__radd__" of "BaseOffset" matches

2556 # argument type "None"

2557 end = start + (periods - 1) * offset # type: ignore[operator]

2558

2559 if start is None:

2560 # error: No overload variant of "__radd__" of "BaseOffset" matches

2561 # argument type "None"

2562 start = end - (periods - 1) * offset # type: ignore[operator]

2563

2564 start = cast(Timestamp, start)

2565 end = cast(Timestamp, end)

2566

2567 cur = start

2568 if offset.n >= 0:

2569 while cur <= end:

2570 yield cur

2571

2572 if cur == end:

2573 # GH#24252 avoid overflows by not performing the addition

2574 # in offset.apply unless we have to

2575 break

2576

2577 # faster than cur + offset

2578 next_date = offset._apply(cur).as_unit(unit)

2579 if next_date <= cur:

2580 raise ValueError(f"Offset {offset} did not increment date")

2581 cur = next_date

2582 else:

2583 while cur >= end:

2584 yield cur

2585

2586 if cur == end:

2587 # GH#24252 avoid overflows by not performing the addition

2588 # in offset.apply unless we have to

2589 break

2590

2591 # faster than cur + offset

2592 next_date = offset._apply(cur).as_unit(unit)

2593 if next_date >= cur:

2594 raise ValueError(f"Offset {offset} did not decrement date")

2595 cur = next_date