Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arrays/datetimes.py: 42%

1from __future__ import annotations

3from datetime import (

4 datetime,

5 timedelta,

6 tzinfo,

8from typing import (

9 TYPE_CHECKING,

10 cast,

11 overload,

12)

13import warnings

15import numpy as np

17from pandas._libs import (

18 lib,

19 tslib,

20)

21from pandas._libs.tslibs import (

22 BaseOffset,

23 NaT,

24 NaTType,

25 Resolution,

26 Timestamp,

27 astype_overflowsafe,

28 fields,

29 get_resolution,

30 get_supported_dtype,

31 get_unit_from_dtype,

32 ints_to_pydatetime,

33 is_date_array_normalized,

34 is_supported_dtype,

35 is_unitless,

36 normalize_i8_timestamps,

37 timezones,

38 to_offset,

39 tz_convert_from_utc,

40 tzconversion,

41)

42from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit

43from pandas.errors import PerformanceWarning

44from pandas.util._exceptions import find_stack_level

45from pandas.util._validators import validate_inclusive

47from pandas.core.dtypes.common import (

48 DT64NS_DTYPE,

49 INT64_DTYPE,

50 is_bool_dtype,

51 is_float_dtype,

52 is_string_dtype,

53 pandas_dtype,

54)

55from pandas.core.dtypes.dtypes import (

56 DatetimeTZDtype,

57 ExtensionDtype,

58 PeriodDtype,

59)

60from pandas.core.dtypes.missing import isna

62from pandas.core.arrays import datetimelike as dtl

63from pandas.core.arrays._ranges import generate_regular_range

64import pandas.core.common as com

66from pandas.tseries.frequencies import get_period_alias

67from pandas.tseries.offsets import (

68 Day,

69 Tick,

70)

72if TYPE_CHECKING:

73 from collections.abc import Iterator

75 from pandas._typing import (

76 ArrayLike,

77 DateTimeErrorChoices,

78 DtypeObj,

79 IntervalClosedType,

80 Self,

81 TimeAmbiguous,

82 TimeNonexistent,

83 npt,

84 )

86 from pandas import DataFrame

87 from pandas.core.arrays import PeriodArray

90_ITER_CHUNKSIZE = 10_000

93@overload

94def tz_to_dtype(tz: tzinfo, unit: str = ...) -> DatetimeTZDtype:

95 ...

98@overload

99def tz_to_dtype(tz: None, unit: str = ...) -> np.dtype[np.datetime64]:

100 ...

101

102

103def tz_to_dtype(

104 tz: tzinfo | None, unit: str = "ns"

105) -> np.dtype[np.datetime64] | DatetimeTZDtype:

106 """

107 Return a datetime64[ns] dtype appropriate for the given timezone.

108

109 Parameters

110 ----------

111 tz : tzinfo or None

112 unit : str, default "ns"

113

114 Returns

115 -------

116 np.dtype or Datetime64TZDType

117 """

118 if tz is None:

119 return np.dtype(f"M8[{unit}]")

120 else:

121 return DatetimeTZDtype(tz=tz, unit=unit)

122

123

124def _field_accessor(name: str, field: str, docstring: str | None = None):

125 def f(self):

126 values = self._local_timestamps()

127

128 if field in self._bool_ops:

129 result: np.ndarray

130

131 if field.endswith(("start", "end")):

132 freq = self.freq

133 month_kw = 12

134 if freq:

135 kwds = freq.kwds

136 month_kw = kwds.get("startingMonth", kwds.get("month", 12))

137

138 result = fields.get_start_end_field(

139 values, field, self.freqstr, month_kw, reso=self._creso

140 )

141 else:

142 result = fields.get_date_field(values, field, reso=self._creso)

143

144 # these return a boolean by-definition

145 return result

146

147 if field in self._object_ops:

148 result = fields.get_date_name_field(values, field, reso=self._creso)

149 result = self._maybe_mask_results(result, fill_value=None)

150

151 else:

152 result = fields.get_date_field(values, field, reso=self._creso)

153 result = self._maybe_mask_results(

154 result, fill_value=None, convert="float64"

155 )

156

157 return result

158

159 f.__name__ = name

160 f.__doc__ = docstring

161 return property(f)

162

163

164# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is

165# incompatible with definition in base class "ExtensionArray"

166class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc]

167 """

168 Pandas ExtensionArray for tz-naive or tz-aware datetime data.

169

170 .. warning::

171

172 DatetimeArray is currently experimental, and its API may change

173 without warning. In particular, :attr:`DatetimeArray.dtype` is

174 expected to change to always be an instance of an ``ExtensionDtype``

175 subclass.

176

177 Parameters

178 ----------

179 values : Series, Index, DatetimeArray, ndarray

180 The datetime data.

181

182 For DatetimeArray `values` (or a Series or Index boxing one),

183 `dtype` and `freq` will be extracted from `values`.

184

185 dtype : numpy.dtype or DatetimeTZDtype

186 Note that the only NumPy dtype allowed is 'datetime64[ns]'.

187 freq : str or Offset, optional

188 The frequency.

189 copy : bool, default False

190 Whether to copy the underlying array of values.

191

192 Attributes

193 ----------

194 None

195

196 Methods

197 -------

198 None

199

200 Examples

201 --------

202 >>> pd.arrays.DatetimeArray._from_sequence(

203 ... pd.DatetimeIndex(['2023-01-01', '2023-01-02'], freq='D'))

204 <DatetimeArray>

205 ['2023-01-01 00:00:00', '2023-01-02 00:00:00']

206 Length: 2, dtype: datetime64[ns]

207 """

208

209 _typ = "datetimearray"

210 _internal_fill_value = np.datetime64("NaT", "ns")

211 _recognized_scalars = (datetime, np.datetime64)

212 _is_recognized_dtype = lambda x: lib.is_np_dtype(x, "M") or isinstance(

213 x, DatetimeTZDtype

214 )

215 _infer_matches = ("datetime", "datetime64", "date")

216

217 @property

218 def _scalar_type(self) -> type[Timestamp]:

219 return Timestamp

220

221 # define my properties & methods for delegation

222 _bool_ops: list[str] = [

223 "is_month_start",

224 "is_month_end",

225 "is_quarter_start",

226 "is_quarter_end",

227 "is_year_start",

228 "is_year_end",

229 "is_leap_year",

230 ]

231 _object_ops: list[str] = ["freq", "tz"]

232 _field_ops: list[str] = [

233 "year",

234 "month",

235 "day",

236 "hour",

237 "minute",

238 "second",

239 "weekday",

240 "dayofweek",

241 "day_of_week",

242 "dayofyear",

243 "day_of_year",

244 "quarter",

245 "days_in_month",

246 "daysinmonth",

247 "microsecond",

248 "nanosecond",

249 ]

250 _other_ops: list[str] = ["date", "time", "timetz"]

251 _datetimelike_ops: list[str] = (

252 _field_ops + _object_ops + _bool_ops + _other_ops + ["unit"]

253 )

254 _datetimelike_methods: list[str] = [

255 "to_period",

256 "tz_localize",

257 "tz_convert",

258 "normalize",

259 "strftime",

260 "round",

261 "floor",

262 "ceil",

263 "month_name",

264 "day_name",

265 "as_unit",

266 ]

267

268 # ndim is inherited from ExtensionArray, must exist to ensure

269 # Timestamp.__richcmp__(DateTimeArray) operates pointwise

270

271 # ensure that operations with numpy arrays defer to our implementation

272 __array_priority__ = 1000

273

274 # -----------------------------------------------------------------

275 # Constructors

276

277 _dtype: np.dtype[np.datetime64] | DatetimeTZDtype

278 _freq: BaseOffset | None = None

279 _default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__

280

281 @classmethod

282 def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:

283 if lib.infer_dtype(scalars, skipna=True) not in ["datetime", "datetime64"]:

284 # TODO: require any NAs be valid-for-DTA

285 # TODO: if dtype is passed, check for tzawareness compat?

286 raise ValueError

287 return cls._from_sequence(scalars, dtype=dtype)

288

289 @classmethod

290 def _validate_dtype(cls, values, dtype):

291 # used in TimeLikeOps.__init__

292 dtype = _validate_dt64_dtype(dtype)

293 _validate_dt64_dtype(values.dtype)

294 if isinstance(dtype, np.dtype):

295 if values.dtype != dtype:

296 raise ValueError("Values resolution does not match dtype.")

297 else:

298 vunit = np.datetime_data(values.dtype)[0]

299 if vunit != dtype.unit:

300 raise ValueError("Values resolution does not match dtype.")

301 return dtype

302

303 # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"

304 @classmethod

305 def _simple_new( # type: ignore[override]

306 cls,

307 values: npt.NDArray[np.datetime64],

308 freq: BaseOffset | None = None,

309 dtype: np.dtype[np.datetime64] | DatetimeTZDtype = DT64NS_DTYPE,

310 ) -> Self:

311 assert isinstance(values, np.ndarray)

312 assert dtype.kind == "M"

313 if isinstance(dtype, np.dtype):

314 assert dtype == values.dtype

315 assert not is_unitless(dtype)

316 else:

317 # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC],

318 # then values.dtype should be M8[us].

319 assert dtype._creso == get_unit_from_dtype(values.dtype)

320

321 result = super()._simple_new(values, dtype)

322 result._freq = freq

323 return result

324

325 @classmethod

326 def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):

327 return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)

328

329 @classmethod

330 def _from_sequence_not_strict(

331 cls,

332 data,

333 *,

334 dtype=None,

335 copy: bool = False,

336 tz=lib.no_default,

337 freq: str | BaseOffset | lib.NoDefault | None = lib.no_default,

338 dayfirst: bool = False,

339 yearfirst: bool = False,

340 ambiguous: TimeAmbiguous = "raise",

341 ) -> Self:

342 """

343 A non-strict version of _from_sequence, called from DatetimeIndex.__new__.

344 """

345

346 # if the user either explicitly passes tz=None or a tz-naive dtype, we

347 # disallows inferring a tz.

348 explicit_tz_none = tz is None

349 if tz is lib.no_default:

350 tz = None

351 else:

352 tz = timezones.maybe_get_tz(tz)

353

354 dtype = _validate_dt64_dtype(dtype)

355 # if dtype has an embedded tz, capture it

356 tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none)

357

358 unit = None

359 if dtype is not None:

360 unit = dtl.dtype_to_unit(dtype)

361

362 data, copy = dtl.ensure_arraylike_for_datetimelike(

363 data, copy, cls_name="DatetimeArray"

364 )

365 inferred_freq = None

366 if isinstance(data, DatetimeArray):

367 inferred_freq = data.freq

368

369 subarr, tz = _sequence_to_dt64(

370 data,

371 copy=copy,

372 tz=tz,

373 dayfirst=dayfirst,

374 yearfirst=yearfirst,

375 ambiguous=ambiguous,

376 out_unit=unit,

377 )

378 # We have to call this again after possibly inferring a tz above

379 _validate_tz_from_dtype(dtype, tz, explicit_tz_none)

380 if tz is not None and explicit_tz_none:

381 raise ValueError(

382 "Passed data is timezone-aware, incompatible with 'tz=None'. "

383 "Use obj.tz_localize(None) instead."

384 )

385

386 data_unit = np.datetime_data(subarr.dtype)[0]

387 data_dtype = tz_to_dtype(tz, data_unit)

388 result = cls._simple_new(subarr, freq=inferred_freq, dtype=data_dtype)

389 if unit is not None and unit != result.unit:

390 # If unit was specified in user-passed dtype, cast to it here

391 result = result.as_unit(unit)

392

393 validate_kwds = {"ambiguous": ambiguous}

394 result._maybe_pin_freq(freq, validate_kwds)

395 return result

396

397 @classmethod

398 def _generate_range(

399 cls,

400 start,

401 end,

402 periods: int | None,

403 freq,

404 tz=None,

405 normalize: bool = False,

406 ambiguous: TimeAmbiguous = "raise",

407 nonexistent: TimeNonexistent = "raise",

408 inclusive: IntervalClosedType = "both",

409 *,

410 unit: str | None = None,

411 ) -> Self:

412 periods = dtl.validate_periods(periods)

413 if freq is None and any(x is None for x in [periods, start, end]):

414 raise ValueError("Must provide freq argument if no data is supplied")

415

416 if com.count_not_none(start, end, periods, freq) != 3:

417 raise ValueError(

418 "Of the four parameters: start, end, periods, "

419 "and freq, exactly three must be specified"

420 )

421 freq = to_offset(freq)

422

423 if start is not None:

424 start = Timestamp(start)

425

426 if end is not None:

427 end = Timestamp(end)

428

429 if start is NaT or end is NaT:

430 raise ValueError("Neither `start` nor `end` can be NaT")

431

432 if unit is not None:

433 if unit not in ["s", "ms", "us", "ns"]:

434 raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")

435 else:

436 unit = "ns"

437

438 if start is not None:

439 start = start.as_unit(unit, round_ok=False)

440 if end is not None:

441 end = end.as_unit(unit, round_ok=False)

442

443 left_inclusive, right_inclusive = validate_inclusive(inclusive)

444 start, end = _maybe_normalize_endpoints(start, end, normalize)

445 tz = _infer_tz_from_endpoints(start, end, tz)

446

447 if tz is not None:

448 # Localize the start and end arguments

449 start = _maybe_localize_point(start, freq, tz, ambiguous, nonexistent)

450 end = _maybe_localize_point(end, freq, tz, ambiguous, nonexistent)

451

452 if freq is not None:

453 # We break Day arithmetic (fixed 24 hour) here and opt for

454 # Day to mean calendar day (23/24/25 hour). Therefore, strip

455 # tz info from start and day to avoid DST arithmetic

456 if isinstance(freq, Day):

457 if start is not None:

458 start = start.tz_localize(None)

459 if end is not None:

460 end = end.tz_localize(None)

461

462 if isinstance(freq, Tick):

463 i8values = generate_regular_range(start, end, periods, freq, unit=unit)

464 else:

465 xdr = _generate_range(

466 start=start, end=end, periods=periods, offset=freq, unit=unit

467 )

468 i8values = np.array([x._value for x in xdr], dtype=np.int64)

469

470 endpoint_tz = start.tz if start is not None else end.tz

471

472 if tz is not None and endpoint_tz is None:

473 if not timezones.is_utc(tz):

474 # short-circuit tz_localize_to_utc which would make

475 # an unnecessary copy with UTC but be a no-op.

476 creso = abbrev_to_npy_unit(unit)

477 i8values = tzconversion.tz_localize_to_utc(

478 i8values,

479 tz,

480 ambiguous=ambiguous,

481 nonexistent=nonexistent,

482 creso=creso,

483 )

484

485 # i8values is localized datetime64 array -> have to convert

486 # start/end as well to compare

487 if start is not None:

488 start = start.tz_localize(tz, ambiguous, nonexistent)

489 if end is not None:

490 end = end.tz_localize(tz, ambiguous, nonexistent)

491 else:

492 # Create a linearly spaced date_range in local time

493 # Nanosecond-granularity timestamps aren't always correctly

494 # representable with doubles, so we limit the range that we

495 # pass to np.linspace as much as possible

496 periods = cast(int, periods)

497 i8values = (

498 np.linspace(0, end._value - start._value, periods, dtype="int64")

499 + start._value

500 )

501 if i8values.dtype != "i8":

502 # 2022-01-09 I (brock) am not sure if it is possible for this

503 # to overflow and cast to e.g. f8, but if it does we need to cast

504 i8values = i8values.astype("i8")

505

506 if start == end:

507 if not left_inclusive and not right_inclusive:

508 i8values = i8values[1:-1]

509 else:

510 start_i8 = Timestamp(start)._value

511 end_i8 = Timestamp(end)._value

512 if not left_inclusive or not right_inclusive:

513 if not left_inclusive and len(i8values) and i8values[0] == start_i8:

514 i8values = i8values[1:]

515 if not right_inclusive and len(i8values) and i8values[-1] == end_i8:

516 i8values = i8values[:-1]

517

518 dt64_values = i8values.view(f"datetime64[{unit}]")

519 dtype = tz_to_dtype(tz, unit=unit)

520 return cls._simple_new(dt64_values, freq=freq, dtype=dtype)

521

522 # -----------------------------------------------------------------

523 # DatetimeLike Interface

524

525 def _unbox_scalar(self, value) -> np.datetime64:

526 if not isinstance(value, self._scalar_type) and value is not NaT:

527 raise ValueError("'value' should be a Timestamp.")

528 self._check_compatible_with(value)

529 if value is NaT:

530 return np.datetime64(value._value, self.unit)

531 else:

532 return value.as_unit(self.unit).asm8

533

534 def _scalar_from_string(self, value) -> Timestamp | NaTType:

535 return Timestamp(value, tz=self.tz)

536

537 def _check_compatible_with(self, other) -> None:

538 if other is NaT:

539 return

540 self._assert_tzawareness_compat(other)

541

542 # -----------------------------------------------------------------

543 # Descriptive Properties

544

545 def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:

546 # GH#42228

547 value = x.view("i8")

548 ts = Timestamp._from_value_and_reso(value, reso=self._creso, tz=self.tz)

549 return ts

550

551 @property

552 # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype"

553 # incompatible with return type "ExtensionDtype" in supertype

554 # "ExtensionArray"

555 def dtype(self) -> np.dtype[np.datetime64] | DatetimeTZDtype: # type: ignore[override]

556 """

557 The dtype for the DatetimeArray.

558

559 .. warning::

560

561 A future version of pandas will change dtype to never be a

562 ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will

563 always be an instance of an ``ExtensionDtype`` subclass.

564

565 Returns

566 -------

567 numpy.dtype or DatetimeTZDtype

568 If the values are tz-naive, then ``np.dtype('datetime64[ns]')``

569 is returned.

570

571 If the values are tz-aware, then the ``DatetimeTZDtype``

572 is returned.

573 """

574 return self._dtype

575

576 @property

577 def tz(self) -> tzinfo | None:

578 """

579 Return the timezone.

580

581 Returns

582 -------

583 datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None

584 Returns None when the array is tz-naive.

585

586 Examples

587 --------

588 For Series:

589

590 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])

591 >>> s = pd.to_datetime(s)

592 >>> s

593 0 2020-01-01 10:00:00+00:00

594 1 2020-02-01 11:00:00+00:00

595 dtype: datetime64[ns, UTC]

596 >>> s.dt.tz

597 datetime.timezone.utc

598

599 For DatetimeIndex:

600

601 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",

602 ... "2/1/2020 11:00:00+00:00"])

603 >>> idx.tz

604 datetime.timezone.utc

605 """

606 # GH 18595

607 return getattr(self.dtype, "tz", None)

608

609 @tz.setter

610 def tz(self, value):

611 # GH 3746: Prevent localizing or converting the index by setting tz

612 raise AttributeError(

613 "Cannot directly set timezone. Use tz_localize() "

614 "or tz_convert() as appropriate"

615 )

616

617 @property

618 def tzinfo(self) -> tzinfo | None:

619 """

620 Alias for tz attribute

621 """

622 return self.tz

623

624 @property # NB: override with cache_readonly in immutable subclasses

625 def is_normalized(self) -> bool:

626 """

627 Returns True if all of the dates are at midnight ("no time")

628 """

629 return is_date_array_normalized(self.asi8, self.tz, reso=self._creso)

630

631 @property # NB: override with cache_readonly in immutable subclasses

632 def _resolution_obj(self) -> Resolution:

633 return get_resolution(self.asi8, self.tz, reso=self._creso)

634

635 # ----------------------------------------------------------------

636 # Array-Like / EA-Interface Methods

637

638 def __array__(self, dtype=None, copy=None) -> np.ndarray:

639 if dtype is None and self.tz:

640 # The default for tz-aware is object, to preserve tz info

641 dtype = object

642

643 return super().__array__(dtype=dtype, copy=copy)

644

645 def __iter__(self) -> Iterator:

646 """

647 Return an iterator over the boxed values

648

649 Yields

650 ------

651 tstamp : Timestamp

652 """

653 if self.ndim > 1:

654 for i in range(len(self)):

655 yield self[i]

656 else:

657 # convert in chunks of 10k for efficiency

658 data = self.asi8

659 length = len(self)

660 chunksize = _ITER_CHUNKSIZE

661 chunks = (length // chunksize) + 1

662

663 for i in range(chunks):

664 start_i = i * chunksize

665 end_i = min((i + 1) * chunksize, length)

666 converted = ints_to_pydatetime(

667 data[start_i:end_i],

668 tz=self.tz,

669 box="timestamp",

670 reso=self._creso,

671 )

672 yield from converted

673

674 def astype(self, dtype, copy: bool = True):

675 # We handle

676 # --> datetime

677 # --> period

678 # DatetimeLikeArrayMixin Super handles the rest.

679 dtype = pandas_dtype(dtype)

680

681 if dtype == self.dtype:

682 if copy:

683 return self.copy()

684 return self

685

686 elif isinstance(dtype, ExtensionDtype):

687 if not isinstance(dtype, DatetimeTZDtype):

688 # e.g. Sparse[datetime64[ns]]

689 return super().astype(dtype, copy=copy)

690 elif self.tz is None:

691 # pre-2.0 this did self.tz_localize(dtype.tz), which did not match

692 # the Series behavior which did

693 # values.tz_localize("UTC").tz_convert(dtype.tz)

694 raise TypeError(

695 "Cannot use .astype to convert from timezone-naive dtype to "

696 "timezone-aware dtype. Use obj.tz_localize instead or "

697 "series.dt.tz_localize instead"

698 )

699 else:

700 # tzaware unit conversion e.g. datetime64[s, UTC]

701 np_dtype = np.dtype(dtype.str)

702 res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)

703 return type(self)._simple_new(res_values, dtype=dtype, freq=self.freq)

704

705 elif (

706 self.tz is None

707 and lib.is_np_dtype(dtype, "M")

708 and not is_unitless(dtype)

709 and is_supported_dtype(dtype)

710 ):

711 # unit conversion e.g. datetime64[s]

712 res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)

713 return type(self)._simple_new(res_values, dtype=res_values.dtype)

714 # TODO: preserve freq?

715

716 elif self.tz is not None and lib.is_np_dtype(dtype, "M"):

717 # pre-2.0 behavior for DTA/DTI was

718 # values.tz_convert("UTC").tz_localize(None), which did not match

719 # the Series behavior

720 raise TypeError(

721 "Cannot use .astype to convert from timezone-aware dtype to "

722 "timezone-naive dtype. Use obj.tz_localize(None) or "

723 "obj.tz_convert('UTC').tz_localize(None) instead."

724 )

725

726 elif (

727 self.tz is None

728 and lib.is_np_dtype(dtype, "M")

729 and dtype != self.dtype

730 and is_unitless(dtype)

731 ):

732 raise TypeError(

733 "Casting to unit-less dtype 'datetime64' is not supported. "

734 "Pass e.g. 'datetime64[ns]' instead."

735 )

736

737 elif isinstance(dtype, PeriodDtype):

738 return self.to_period(freq=dtype.freq)

739 return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)

740

741 # -----------------------------------------------------------------

742 # Rendering Methods

743

744 def _format_native_types(

745 self, *, na_rep: str | float = "NaT", date_format=None, **kwargs

746 ) -> npt.NDArray[np.object_]:

747 if date_format is None and self._is_dates_only:

748 # Only dates and no timezone: provide a default format

749 date_format = "%Y-%m-%d"

750

751 return tslib.format_array_from_datetime(

752 self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso

753 )

754

755 # -----------------------------------------------------------------

756 # Comparison Methods

757

758 def _has_same_tz(self, other) -> bool:

759 # vzone shouldn't be None if value is non-datetime like

760 if isinstance(other, np.datetime64):

761 # convert to Timestamp as np.datetime64 doesn't have tz attr

762 other = Timestamp(other)

763

764 if not hasattr(other, "tzinfo"):

765 return False

766 other_tz = other.tzinfo

767 return timezones.tz_compare(self.tzinfo, other_tz)

768

769 def _assert_tzawareness_compat(self, other) -> None:

770 # adapted from _Timestamp._assert_tzawareness_compat

771 other_tz = getattr(other, "tzinfo", None)

772 other_dtype = getattr(other, "dtype", None)

773

774 if isinstance(other_dtype, DatetimeTZDtype):

775 # Get tzinfo from Series dtype

776 other_tz = other.dtype.tz

777 if other is NaT:

778 # pd.NaT quacks both aware and naive

779 pass

780 elif self.tz is None:

781 if other_tz is not None:

782 raise TypeError(

783 "Cannot compare tz-naive and tz-aware datetime-like objects."

784 )

785 elif other_tz is None:

786 raise TypeError(

787 "Cannot compare tz-naive and tz-aware datetime-like objects"

788 )

789

790 # -----------------------------------------------------------------

791 # Arithmetic Methods

792

793 def _add_offset(self, offset: BaseOffset) -> Self:

794 assert not isinstance(offset, Tick)

795

796 if self.tz is not None:

797 values = self.tz_localize(None)

798 else:

799 values = self

800

801 try:

802 res_values = offset._apply_array(values._ndarray)

803 if res_values.dtype.kind == "i":

804 # error: Argument 1 to "view" of "ndarray" has incompatible type

805 # "dtype[datetime64] | DatetimeTZDtype"; expected

806 # "dtype[Any] | type[Any] | _SupportsDType[dtype[Any]]"

807 res_values = res_values.view(values.dtype) # type: ignore[arg-type]

808 except NotImplementedError:

809 warnings.warn(

810 "Non-vectorized DateOffset being applied to Series or DatetimeIndex.",

811 PerformanceWarning,

812 stacklevel=find_stack_level(),

813 )

814 res_values = self.astype("O") + offset

815 # TODO(GH#55564): as_unit will be unnecessary

816 result = type(self)._from_sequence(res_values).as_unit(self.unit)

817 if not len(self):

818 # GH#30336 _from_sequence won't be able to infer self.tz

819 return result.tz_localize(self.tz)

820

821 else:

822 result = type(self)._simple_new(res_values, dtype=res_values.dtype)

823 if offset.normalize:

824 result = result.normalize()

825 result._freq = None

826

827 if self.tz is not None:

828 result = result.tz_localize(self.tz)

829

830 return result

831

832 # -----------------------------------------------------------------

833 # Timezone Conversion and Localization Methods

834

835 def _local_timestamps(self) -> npt.NDArray[np.int64]:

836 """

837 Convert to an i8 (unix-like nanosecond timestamp) representation

838 while keeping the local timezone and not using UTC.

839 This is used to calculate time-of-day information as if the timestamps

840 were timezone-naive.

841 """

842 if self.tz is None or timezones.is_utc(self.tz):

843 # Avoid the copy that would be made in tzconversion

844 return self.asi8

845 return tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)

846

847 def tz_convert(self, tz) -> Self:

848 """

849 Convert tz-aware Datetime Array/Index from one time zone to another.

850

851 Parameters

852 ----------

853 tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None

854 Time zone for time. Corresponding timestamps would be converted

855 to this time zone of the Datetime Array/Index. A `tz` of None will

856 convert to UTC and remove the timezone information.

857

858 Returns

859 -------

860 Array or Index

861

862 Raises

863 ------

864 TypeError

865 If Datetime Array/Index is tz-naive.

866

867 See Also

868 --------

869 DatetimeIndex.tz : A timezone that has a variable offset from UTC.

870 DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a

871 given time zone, or remove timezone from a tz-aware DatetimeIndex.

872

873 Examples

874 --------

875 With the `tz` parameter, we can change the DatetimeIndex

876 to other time zones:

877

878 >>> dti = pd.date_range(start='2014-08-01 09:00',

879 ... freq='h', periods=3, tz='Europe/Berlin')

880

881 >>> dti

882 DatetimeIndex(['2014-08-01 09:00:00+02:00',

883 '2014-08-01 10:00:00+02:00',

884 '2014-08-01 11:00:00+02:00'],

885 dtype='datetime64[ns, Europe/Berlin]', freq='h')

886

887 >>> dti.tz_convert('US/Central')

888 DatetimeIndex(['2014-08-01 02:00:00-05:00',

889 '2014-08-01 03:00:00-05:00',

890 '2014-08-01 04:00:00-05:00'],

891 dtype='datetime64[ns, US/Central]', freq='h')

892

893 With the ``tz=None``, we can remove the timezone (after converting

894 to UTC if necessary):

895

896 >>> dti = pd.date_range(start='2014-08-01 09:00', freq='h',

897 ... periods=3, tz='Europe/Berlin')

898

899 >>> dti

900 DatetimeIndex(['2014-08-01 09:00:00+02:00',

901 '2014-08-01 10:00:00+02:00',

902 '2014-08-01 11:00:00+02:00'],

903 dtype='datetime64[ns, Europe/Berlin]', freq='h')

904

905 >>> dti.tz_convert(None)

906 DatetimeIndex(['2014-08-01 07:00:00',

907 '2014-08-01 08:00:00',

908 '2014-08-01 09:00:00'],

909 dtype='datetime64[ns]', freq='h')

910 """

911 tz = timezones.maybe_get_tz(tz)

912

913 if self.tz is None:

914 # tz naive, use tz_localize

915 raise TypeError(

916 "Cannot convert tz-naive timestamps, use tz_localize to localize"

917 )

918

919 # No conversion since timestamps are all UTC to begin with

920 dtype = tz_to_dtype(tz, unit=self.unit)

921 return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)

922

923 @dtl.ravel_compat

924 def tz_localize(

925 self,

926 tz,

927 ambiguous: TimeAmbiguous = "raise",

928 nonexistent: TimeNonexistent = "raise",

929 ) -> Self:

930 """

931 Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index.

932

933 This method takes a time zone (tz) naive Datetime Array/Index object

934 and makes this time zone aware. It does not move the time to another

935 time zone.

936

937 This method can also be used to do the inverse -- to create a time

938 zone unaware object from an aware object. To that end, pass `tz=None`.

939

940 Parameters

941 ----------

942 tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None

943 Time zone to convert timestamps to. Passing ``None`` will

944 remove the time zone information preserving local time.

945 ambiguous : 'infer', 'NaT', bool array, default 'raise'

946 When clocks moved backward due to DST, ambiguous times may arise.

947 For example in Central European Time (UTC+01), when going from

948 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at

949 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the

950 `ambiguous` parameter dictates how ambiguous times should be

951 handled.

952

953 - 'infer' will attempt to infer fall dst-transition hours based on

954 order

955 - bool-ndarray where True signifies a DST time, False signifies a

956 non-DST time (note that this flag is only applicable for

957 ambiguous times)

958 - 'NaT' will return NaT where there are ambiguous times

959 - 'raise' will raise an AmbiguousTimeError if there are ambiguous

960 times.

961

962 nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \

963default 'raise'

964 A nonexistent time does not exist in a particular timezone

965 where clocks moved forward due to DST.

966

967 - 'shift_forward' will shift the nonexistent time forward to the

968 closest existing time

969 - 'shift_backward' will shift the nonexistent time backward to the

970 closest existing time

971 - 'NaT' will return NaT where there are nonexistent times

972 - timedelta objects will shift nonexistent times by the timedelta

973 - 'raise' will raise an NonExistentTimeError if there are

974 nonexistent times.

975

976 Returns

977 -------

978 Same type as self

979 Array/Index converted to the specified time zone.

980

981 Raises

982 ------

983 TypeError

984 If the Datetime Array/Index is tz-aware and tz is not None.

985

986 See Also

987 --------

988 DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from

989 one time zone to another.

990

991 Examples

992 --------

993 >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3)

994 >>> tz_naive

995 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',

996 '2018-03-03 09:00:00'],

997 dtype='datetime64[ns]', freq='D')

998

999 Localize DatetimeIndex in US/Eastern time zone:

1000

1001 >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern')

1002 >>> tz_aware

1003 DatetimeIndex(['2018-03-01 09:00:00-05:00',

1004 '2018-03-02 09:00:00-05:00',

1005 '2018-03-03 09:00:00-05:00'],

1006 dtype='datetime64[ns, US/Eastern]', freq=None)

1007

1008 With the ``tz=None``, we can remove the time zone information

1009 while keeping the local time (not converted to UTC):

1010

1011 >>> tz_aware.tz_localize(None)

1012 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',

1013 '2018-03-03 09:00:00'],

1014 dtype='datetime64[ns]', freq=None)

1015

1016 Be careful with DST changes. When there is sequential data, pandas can

1017 infer the DST time:

1018

1019 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00',

1020 ... '2018-10-28 02:00:00',

1021 ... '2018-10-28 02:30:00',

1022 ... '2018-10-28 02:00:00',

1023 ... '2018-10-28 02:30:00',

1024 ... '2018-10-28 03:00:00',

1025 ... '2018-10-28 03:30:00']))

1026 >>> s.dt.tz_localize('CET', ambiguous='infer')

1027 0 2018-10-28 01:30:00+02:00

1028 1 2018-10-28 02:00:00+02:00

1029 2 2018-10-28 02:30:00+02:00

1030 3 2018-10-28 02:00:00+01:00

1031 4 2018-10-28 02:30:00+01:00

1032 5 2018-10-28 03:00:00+01:00

1033 6 2018-10-28 03:30:00+01:00

1034 dtype: datetime64[ns, CET]

1035

1036 In some cases, inferring the DST is impossible. In such cases, you can

1037 pass an ndarray to the ambiguous parameter to set the DST explicitly

1038

1039 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00',

1040 ... '2018-10-28 02:36:00',

1041 ... '2018-10-28 03:46:00']))

1042 >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False]))

1043 0 2018-10-28 01:20:00+02:00

1044 1 2018-10-28 02:36:00+02:00

1045 2 2018-10-28 03:46:00+01:00

1046 dtype: datetime64[ns, CET]

1047

1048 If the DST transition causes nonexistent times, you can shift these

1049 dates forward or backwards with a timedelta object or `'shift_forward'`

1050 or `'shift_backwards'`.

1051

1052 >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00',

1053 ... '2015-03-29 03:30:00']))

1054 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')

1055 0 2015-03-29 03:00:00+02:00

1056 1 2015-03-29 03:30:00+02:00

1057 dtype: datetime64[ns, Europe/Warsaw]

1058

1059 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward')

1060 0 2015-03-29 01:59:59.999999999+01:00

1061 1 2015-03-29 03:30:00+02:00

1062 dtype: datetime64[ns, Europe/Warsaw]

1063

1064 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h'))

1065 0 2015-03-29 03:30:00+02:00

1066 1 2015-03-29 03:30:00+02:00

1067 dtype: datetime64[ns, Europe/Warsaw]

1068 """

1069 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")

1070 if nonexistent not in nonexistent_options and not isinstance(

1071 nonexistent, timedelta

1072 ):

1073 raise ValueError(

1074 "The nonexistent argument must be one of 'raise', "

1075 "'NaT', 'shift_forward', 'shift_backward' or "

1076 "a timedelta object"

1077 )

1078

1079 if self.tz is not None:

1080 if tz is None:

1081 new_dates = tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)

1082 else:

1083 raise TypeError("Already tz-aware, use tz_convert to convert.")

1084 else:

1085 tz = timezones.maybe_get_tz(tz)

1086 # Convert to UTC

1087

1088 new_dates = tzconversion.tz_localize_to_utc(

1089 self.asi8,

1090 tz,

1091 ambiguous=ambiguous,

1092 nonexistent=nonexistent,

1093 creso=self._creso,

1094 )

1095 new_dates_dt64 = new_dates.view(f"M8[{self.unit}]")

1096 dtype = tz_to_dtype(tz, unit=self.unit)

1097

1098 freq = None

1099 if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates_dt64[0])):

1100 # we can preserve freq

1101 # TODO: Also for fixed-offsets

1102 freq = self.freq

1103 elif tz is None and self.tz is None:

1104 # no-op

1105 freq = self.freq

1106 return self._simple_new(new_dates_dt64, dtype=dtype, freq=freq)

1107

1108 # ----------------------------------------------------------------

1109 # Conversion Methods - Vectorized analogues of Timestamp methods

1110

1111 def to_pydatetime(self) -> npt.NDArray[np.object_]:

1112 """

1113 Return an ndarray of ``datetime.datetime`` objects.

1114

1115 Returns

1116 -------

1117 numpy.ndarray

1118

1119 Examples

1120 --------

1121 >>> idx = pd.date_range('2018-02-27', periods=3)

1122 >>> idx.to_pydatetime()

1123 array([datetime.datetime(2018, 2, 27, 0, 0),

1124 datetime.datetime(2018, 2, 28, 0, 0),

1125 datetime.datetime(2018, 3, 1, 0, 0)], dtype=object)

1126 """

1127 return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._creso)

1128

1129 def normalize(self) -> Self:

1130 """

1131 Convert times to midnight.

1132

1133 The time component of the date-time is converted to midnight i.e.

1134 00:00:00. This is useful in cases, when the time does not matter.

1135 Length is unaltered. The timezones are unaffected.

1136

1137 This method is available on Series with datetime values under

1138 the ``.dt`` accessor, and directly on Datetime Array/Index.

1139

1140 Returns

1141 -------

1142 DatetimeArray, DatetimeIndex or Series

1143 The same type as the original data. Series will have the same

1144 name and index. DatetimeIndex will have the same name.

1145

1146 See Also

1147 --------

1148 floor : Floor the datetimes to the specified freq.

1149 ceil : Ceil the datetimes to the specified freq.

1150 round : Round the datetimes to the specified freq.

1151

1152 Examples

1153 --------

1154 >>> idx = pd.date_range(start='2014-08-01 10:00', freq='h',

1155 ... periods=3, tz='Asia/Calcutta')

1156 >>> idx

1157 DatetimeIndex(['2014-08-01 10:00:00+05:30',

1158 '2014-08-01 11:00:00+05:30',

1159 '2014-08-01 12:00:00+05:30'],

1160 dtype='datetime64[ns, Asia/Calcutta]', freq='h')

1161 >>> idx.normalize()

1162 DatetimeIndex(['2014-08-01 00:00:00+05:30',

1163 '2014-08-01 00:00:00+05:30',

1164 '2014-08-01 00:00:00+05:30'],

1165 dtype='datetime64[ns, Asia/Calcutta]', freq=None)

1166 """

1167 new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._creso)

1168 dt64_values = new_values.view(self._ndarray.dtype)

1169

1170 dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype)

1171 dta = dta._with_freq("infer")

1172 if self.tz is not None:

1173 dta = dta.tz_localize(self.tz)

1174 return dta

1175

1176 def to_period(self, freq=None) -> PeriodArray:

1177 """

1178 Cast to PeriodArray/PeriodIndex at a particular frequency.

1179

1180 Converts DatetimeArray/Index to PeriodArray/PeriodIndex.

1181

1182 Parameters

1183 ----------

1184 freq : str or Period, optional

1185 One of pandas' :ref:`period aliases <timeseries.period_aliases>`

1186 or an Period object. Will be inferred by default.

1187

1188 Returns

1189 -------

1190 PeriodArray/PeriodIndex

1191

1192 Raises

1193 ------

1194 ValueError

1195 When converting a DatetimeArray/Index with non-regular values,

1196 so that a frequency cannot be inferred.

1197

1198 See Also

1199 --------

1200 PeriodIndex: Immutable ndarray holding ordinal values.

1201 DatetimeIndex.to_pydatetime: Return DatetimeIndex as object.

1202

1203 Examples

1204 --------

1205 >>> df = pd.DataFrame({"y": [1, 2, 3]},

1206 ... index=pd.to_datetime(["2000-03-31 00:00:00",

1207 ... "2000-05-31 00:00:00",

1208 ... "2000-08-31 00:00:00"]))

1209 >>> df.index.to_period("M")

1210 PeriodIndex(['2000-03', '2000-05', '2000-08'],

1211 dtype='period[M]')

1212

1213 Infer the daily frequency

1214

1215 >>> idx = pd.date_range("2017-01-01", periods=2)

1216 >>> idx.to_period()

1217 PeriodIndex(['2017-01-01', '2017-01-02'],

1218 dtype='period[D]')

1219 """

1220 from pandas.core.arrays import PeriodArray

1221

1222 if self.tz is not None:

1223 warnings.warn(

1224 "Converting to PeriodArray/Index representation "

1225 "will drop timezone information.",

1226 UserWarning,

1227 stacklevel=find_stack_level(),

1228 )

1229

1230 if freq is None:

1231 freq = self.freqstr or self.inferred_freq

1232 if isinstance(self.freq, BaseOffset) and hasattr(

1233 self.freq, "_period_dtype_code"

1234 ):

1235 freq = PeriodDtype(self.freq)._freqstr

1236

1237 if freq is None:

1238 raise ValueError(

1239 "You must pass a freq argument as current index has none."

1240 )

1241

1242 res = get_period_alias(freq)

1243

1244 # https://github.com/pandas-dev/pandas/issues/33358

1245 if res is None:

1246 res = freq

1247

1248 freq = res

1249 return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz)

1250

1251 # -----------------------------------------------------------------

1252 # Properties - Vectorized Timestamp Properties/Methods

1253

1254 def month_name(self, locale=None) -> npt.NDArray[np.object_]:

1255 """

1256 Return the month names with specified locale.

1257

1258 Parameters

1259 ----------

1260 locale : str, optional

1261 Locale determining the language in which to return the month name.

1262 Default is English locale (``'en_US.utf8'``). Use the command

1263 ``locale -a`` on your terminal on Unix systems to find your locale

1264 language code.

1265

1266 Returns

1267 -------

1268 Series or Index

1269 Series or Index of month names.

1270

1271 Examples

1272 --------

1273 >>> s = pd.Series(pd.date_range(start='2018-01', freq='ME', periods=3))

1274 >>> s

1275 0 2018-01-31

1276 1 2018-02-28

1277 2 2018-03-31

1278 dtype: datetime64[ns]

1279 >>> s.dt.month_name()

1280 0 January

1281 1 February

1282 2 March

1283 dtype: object

1284

1285 >>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)

1286 >>> idx

1287 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],

1288 dtype='datetime64[ns]', freq='ME')

1289 >>> idx.month_name()

1290 Index(['January', 'February', 'March'], dtype='object')

1291

1292 Using the ``locale`` parameter you can set a different locale language,

1293 for example: ``idx.month_name(locale='pt_BR.utf8')`` will return month

1294 names in Brazilian Portuguese language.

1295

1296 >>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)

1297 >>> idx

1298 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],

1299 dtype='datetime64[ns]', freq='ME')

1300 >>> idx.month_name(locale='pt_BR.utf8') # doctest: +SKIP

1301 Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object')

1302 """

1303 values = self._local_timestamps()

1304

1305 result = fields.get_date_name_field(

1306 values, "month_name", locale=locale, reso=self._creso

1307 )

1308 result = self._maybe_mask_results(result, fill_value=None)

1309 return result

1310

1311 def day_name(self, locale=None) -> npt.NDArray[np.object_]:

1312 """

1313 Return the day names with specified locale.

1314

1315 Parameters

1316 ----------

1317 locale : str, optional

1318 Locale determining the language in which to return the day name.

1319 Default is English locale (``'en_US.utf8'``). Use the command

1320 ``locale -a`` on your terminal on Unix systems to find your locale

1321 language code.

1322

1323 Returns

1324 -------

1325 Series or Index

1326 Series or Index of day names.

1327

1328 Examples

1329 --------

1330 >>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3))

1331 >>> s

1332 0 2018-01-01

1333 1 2018-01-02

1334 2 2018-01-03

1335 dtype: datetime64[ns]

1336 >>> s.dt.day_name()

1337 0 Monday

1338 1 Tuesday

1339 2 Wednesday

1340 dtype: object

1341

1342 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)

1343 >>> idx

1344 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],

1345 dtype='datetime64[ns]', freq='D')

1346 >>> idx.day_name()

1347 Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')

1348

1349 Using the ``locale`` parameter you can set a different locale language,

1350 for example: ``idx.day_name(locale='pt_BR.utf8')`` will return day

1351 names in Brazilian Portuguese language.

1352

1353 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)

1354 >>> idx

1355 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],

1356 dtype='datetime64[ns]', freq='D')

1357 >>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP

1358 Index(['Segunda', 'Terça', 'Quarta'], dtype='object')

1359 """

1360 values = self._local_timestamps()

1361

1362 result = fields.get_date_name_field(

1363 values, "day_name", locale=locale, reso=self._creso

1364 )

1365 result = self._maybe_mask_results(result, fill_value=None)

1366 return result

1367

1368 @property

1369 def time(self) -> npt.NDArray[np.object_]:

1370 """

1371 Returns numpy array of :class:`datetime.time` objects.

1372

1373 The time part of the Timestamps.

1374

1375 Examples

1376 --------

1377 For Series:

1378

1379 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])

1380 >>> s = pd.to_datetime(s)

1381 >>> s

1382 0 2020-01-01 10:00:00+00:00

1383 1 2020-02-01 11:00:00+00:00

1384 dtype: datetime64[ns, UTC]

1385 >>> s.dt.time

1386 0 10:00:00

1387 1 11:00:00

1388 dtype: object

1389

1390 For DatetimeIndex:

1391

1392 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",

1393 ... "2/1/2020 11:00:00+00:00"])

1394 >>> idx.time

1395 array([datetime.time(10, 0), datetime.time(11, 0)], dtype=object)

1396 """

1397 # If the Timestamps have a timezone that is not UTC,

1398 # convert them into their i8 representation while

1399 # keeping their timezone and not using UTC

1400 timestamps = self._local_timestamps()

1401

1402 return ints_to_pydatetime(timestamps, box="time", reso=self._creso)

1403

1404 @property

1405 def timetz(self) -> npt.NDArray[np.object_]:

1406 """

1407 Returns numpy array of :class:`datetime.time` objects with timezones.

1408

1409 The time part of the Timestamps.

1410

1411 Examples

1412 --------

1413 For Series:

1414

1415 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])

1416 >>> s = pd.to_datetime(s)

1417 >>> s

1418 0 2020-01-01 10:00:00+00:00

1419 1 2020-02-01 11:00:00+00:00

1420 dtype: datetime64[ns, UTC]

1421 >>> s.dt.timetz

1422 0 10:00:00+00:00

1423 1 11:00:00+00:00

1424 dtype: object

1425

1426 For DatetimeIndex:

1427

1428 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",

1429 ... "2/1/2020 11:00:00+00:00"])

1430 >>> idx.timetz

1431 array([datetime.time(10, 0, tzinfo=datetime.timezone.utc),

1432 datetime.time(11, 0, tzinfo=datetime.timezone.utc)], dtype=object)

1433 """

1434 return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._creso)

1435

1436 @property

1437 def date(self) -> npt.NDArray[np.object_]:

1438 """

1439 Returns numpy array of python :class:`datetime.date` objects.

1440

1441 Namely, the date part of Timestamps without time and

1442 timezone information.

1443

1444 Examples

1445 --------

1446 For Series:

1447

1448 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])

1449 >>> s = pd.to_datetime(s)

1450 >>> s

1451 0 2020-01-01 10:00:00+00:00

1452 1 2020-02-01 11:00:00+00:00

1453 dtype: datetime64[ns, UTC]

1454 >>> s.dt.date

1455 0 2020-01-01

1456 1 2020-02-01

1457 dtype: object

1458

1459 For DatetimeIndex:

1460

1461 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",

1462 ... "2/1/2020 11:00:00+00:00"])

1463 >>> idx.date

1464 array([datetime.date(2020, 1, 1), datetime.date(2020, 2, 1)], dtype=object)

1465 """

1466 # If the Timestamps have a timezone that is not UTC,

1467 # convert them into their i8 representation while

1468 # keeping their timezone and not using UTC

1469 timestamps = self._local_timestamps()

1470

1471 return ints_to_pydatetime(timestamps, box="date", reso=self._creso)

1472

1473 def isocalendar(self) -> DataFrame:

1474 """

1475 Calculate year, week, and day according to the ISO 8601 standard.

1476

1477 Returns

1478 -------

1479 DataFrame

1480 With columns year, week and day.

1481

1482 See Also

1483 --------

1484 Timestamp.isocalendar : Function return a 3-tuple containing ISO year,

1485 week number, and weekday for the given Timestamp object.

1486 datetime.date.isocalendar : Return a named tuple object with

1487 three components: year, week and weekday.

1488

1489 Examples

1490 --------

1491 >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4)

1492 >>> idx.isocalendar()

1493 year week day

1494 2019-12-29 2019 52 7

1495 2019-12-30 2020 1 1

1496 2019-12-31 2020 1 2

1497 2020-01-01 2020 1 3

1498 >>> idx.isocalendar().week

1499 2019-12-29 52

1500 2019-12-30 1

1501 2019-12-31 1

1502 2020-01-01 1

1503 Freq: D, Name: week, dtype: UInt32

1504 """

1505 from pandas import DataFrame

1506

1507 values = self._local_timestamps()

1508 sarray = fields.build_isocalendar_sarray(values, reso=self._creso)

1509 iso_calendar_df = DataFrame(

1510 sarray, columns=["year", "week", "day"], dtype="UInt32"

1511 )

1512 if self._hasna:

1513 iso_calendar_df.iloc[self._isnan] = None

1514 return iso_calendar_df

1515

1516 year = _field_accessor(

1517 "year",

1518 "Y",

1519 """

1520 The year of the datetime.

1521

1522 Examples

1523 --------

1524 >>> datetime_series = pd.Series(

1525 ... pd.date_range("2000-01-01", periods=3, freq="YE")

1526 ... )

1527 >>> datetime_series

1528 0 2000-12-31

1529 1 2001-12-31

1530 2 2002-12-31

1531 dtype: datetime64[ns]

1532 >>> datetime_series.dt.year

1533 0 2000

1534 1 2001

1535 2 2002

1536 dtype: int32

1537 """,

1538 )

1539 month = _field_accessor(

1540 "month",

1541 "M",

1542 """

1543 The month as January=1, December=12.

1544

1545 Examples

1546 --------

1547 >>> datetime_series = pd.Series(

1548 ... pd.date_range("2000-01-01", periods=3, freq="ME")

1549 ... )

1550 >>> datetime_series

1551 0 2000-01-31

1552 1 2000-02-29

1553 2 2000-03-31

1554 dtype: datetime64[ns]

1555 >>> datetime_series.dt.month

1556 0 1

1557 1 2

1558 2 3

1559 dtype: int32

1560 """,

1561 )

1562 day = _field_accessor(

1563 "day",

1564 "D",

1565 """

1566 The day of the datetime.

1567

1568 Examples

1569 --------

1570 >>> datetime_series = pd.Series(

1571 ... pd.date_range("2000-01-01", periods=3, freq="D")

1572 ... )

1573 >>> datetime_series

1574 0 2000-01-01

1575 1 2000-01-02

1576 2 2000-01-03

1577 dtype: datetime64[ns]

1578 >>> datetime_series.dt.day

1579 0 1

1580 1 2

1581 2 3

1582 dtype: int32

1583 """,

1584 )

1585 hour = _field_accessor(

1586 "hour",

1587 "h",

1588 """

1589 The hours of the datetime.

1590

1591 Examples

1592 --------

1593 >>> datetime_series = pd.Series(

1594 ... pd.date_range("2000-01-01", periods=3, freq="h")

1595 ... )

1596 >>> datetime_series

1597 0 2000-01-01 00:00:00

1598 1 2000-01-01 01:00:00

1599 2 2000-01-01 02:00:00

1600 dtype: datetime64[ns]

1601 >>> datetime_series.dt.hour

1602 0 0

1603 1 1

1604 2 2

1605 dtype: int32

1606 """,

1607 )

1608 minute = _field_accessor(

1609 "minute",

1610 "m",

1611 """

1612 The minutes of the datetime.

1613

1614 Examples

1615 --------

1616 >>> datetime_series = pd.Series(

1617 ... pd.date_range("2000-01-01", periods=3, freq="min")

1618 ... )

1619 >>> datetime_series

1620 0 2000-01-01 00:00:00

1621 1 2000-01-01 00:01:00

1622 2 2000-01-01 00:02:00

1623 dtype: datetime64[ns]

1624 >>> datetime_series.dt.minute

1625 0 0

1626 1 1

1627 2 2

1628 dtype: int32

1629 """,

1630 )

1631 second = _field_accessor(

1632 "second",

1633 "s",

1634 """

1635 The seconds of the datetime.

1636

1637 Examples

1638 --------

1639 >>> datetime_series = pd.Series(

1640 ... pd.date_range("2000-01-01", periods=3, freq="s")

1641 ... )

1642 >>> datetime_series

1643 0 2000-01-01 00:00:00

1644 1 2000-01-01 00:00:01

1645 2 2000-01-01 00:00:02

1646 dtype: datetime64[ns]

1647 >>> datetime_series.dt.second

1648 0 0

1649 1 1

1650 2 2

1651 dtype: int32

1652 """,

1653 )

1654 microsecond = _field_accessor(

1655 "microsecond",

1656 "us",

1657 """

1658 The microseconds of the datetime.

1659

1660 Examples

1661 --------

1662 >>> datetime_series = pd.Series(

1663 ... pd.date_range("2000-01-01", periods=3, freq="us")

1664 ... )

1665 >>> datetime_series

1666 0 2000-01-01 00:00:00.000000

1667 1 2000-01-01 00:00:00.000001

1668 2 2000-01-01 00:00:00.000002

1669 dtype: datetime64[ns]

1670 >>> datetime_series.dt.microsecond

1671 0 0

1672 1 1

1673 2 2

1674 dtype: int32

1675 """,

1676 )

1677 nanosecond = _field_accessor(

1678 "nanosecond",

1679 "ns",

1680 """

1681 The nanoseconds of the datetime.

1682

1683 Examples

1684 --------

1685 >>> datetime_series = pd.Series(

1686 ... pd.date_range("2000-01-01", periods=3, freq="ns")

1687 ... )

1688 >>> datetime_series

1689 0 2000-01-01 00:00:00.000000000

1690 1 2000-01-01 00:00:00.000000001

1691 2 2000-01-01 00:00:00.000000002

1692 dtype: datetime64[ns]

1693 >>> datetime_series.dt.nanosecond

1694 0 0

1695 1 1

1696 2 2

1697 dtype: int32

1698 """,

1699 )

1700 _dayofweek_doc = """

1701 The day of the week with Monday=0, Sunday=6.

1702

1703 Return the day of the week. It is assumed the week starts on

1704 Monday, which is denoted by 0 and ends on Sunday which is denoted

1705 by 6. This method is available on both Series with datetime

1706 values (using the `dt` accessor) or DatetimeIndex.

1707

1708 Returns

1709 -------

1710 Series or Index

1711 Containing integers indicating the day number.

1712

1713 See Also

1714 --------

1715 Series.dt.dayofweek : Alias.

1716 Series.dt.weekday : Alias.

1717 Series.dt.day_name : Returns the name of the day of the week.

1718

1719 Examples

1720 --------

1721 >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series()

1722 >>> s.dt.dayofweek

1723 2016-12-31 5

1724 2017-01-01 6

1725 2017-01-02 0

1726 2017-01-03 1

1727 2017-01-04 2

1728 2017-01-05 3

1729 2017-01-06 4

1730 2017-01-07 5

1731 2017-01-08 6

1732 Freq: D, dtype: int32

1733 """

1734 day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc)

1735 dayofweek = day_of_week

1736 weekday = day_of_week

1737

1738 day_of_year = _field_accessor(

1739 "dayofyear",

1740 "doy",

1741 """

1742 The ordinal day of the year.

1743

1744 Examples

1745 --------

1746 For Series:

1747

1748 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])

1749 >>> s = pd.to_datetime(s)

1750 >>> s

1751 0 2020-01-01 10:00:00+00:00

1752 1 2020-02-01 11:00:00+00:00

1753 dtype: datetime64[ns, UTC]

1754 >>> s.dt.dayofyear

1755 0 1

1756 1 32

1757 dtype: int32

1758

1759 For DatetimeIndex:

1760

1761 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",

1762 ... "2/1/2020 11:00:00+00:00"])

1763 >>> idx.dayofyear

1764 Index([1, 32], dtype='int32')

1765 """,

1766 )

1767 dayofyear = day_of_year

1768 quarter = _field_accessor(

1769 "quarter",

1770 "q",

1771 """

1772 The quarter of the date.

1773

1774 Examples

1775 --------

1776 For Series:

1777

1778 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "4/1/2020 11:00:00+00:00"])

1779 >>> s = pd.to_datetime(s)

1780 >>> s

1781 0 2020-01-01 10:00:00+00:00

1782 1 2020-04-01 11:00:00+00:00

1783 dtype: datetime64[ns, UTC]

1784 >>> s.dt.quarter

1785 0 1

1786 1 2

1787 dtype: int32

1788

1789 For DatetimeIndex:

1790

1791 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",

1792 ... "2/1/2020 11:00:00+00:00"])

1793 >>> idx.quarter

1794 Index([1, 1], dtype='int32')

1795 """,

1796 )

1797 days_in_month = _field_accessor(

1798 "days_in_month",

1799 "dim",

1800 """

1801 The number of days in the month.

1802

1803 Examples

1804 --------

1805 >>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])

1806 >>> s = pd.to_datetime(s)

1807 >>> s

1808 0 2020-01-01 10:00:00+00:00

1809 1 2020-02-01 11:00:00+00:00

1810 dtype: datetime64[ns, UTC]

1811 >>> s.dt.daysinmonth

1812 0 31

1813 1 29

1814 dtype: int32

1815 """,

1816 )

1817 daysinmonth = days_in_month

1818 _is_month_doc = """

1819 Indicates whether the date is the {first_or_last} day of the month.

1820

1821 Returns

1822 -------

1823 Series or array

1824 For Series, returns a Series with boolean values.

1825 For DatetimeIndex, returns a boolean array.

1826

1827 See Also

1828 --------

1829 is_month_start : Return a boolean indicating whether the date

1830 is the first day of the month.

1831 is_month_end : Return a boolean indicating whether the date

1832 is the last day of the month.

1833

1834 Examples

1835 --------

1836 This method is available on Series with datetime values under

1837 the ``.dt`` accessor, and directly on DatetimeIndex.

1838

1839 >>> s = pd.Series(pd.date_range("2018-02-27", periods=3))

1840 >>> s

1841 0 2018-02-27

1842 1 2018-02-28

1843 2 2018-03-01

1844 dtype: datetime64[ns]

1845 >>> s.dt.is_month_start

1846 0 False

1847 1 False

1848 2 True

1849 dtype: bool

1850 >>> s.dt.is_month_end

1851 0 False

1852 1 True

1853 2 False

1854 dtype: bool

1855

1856 >>> idx = pd.date_range("2018-02-27", periods=3)

1857 >>> idx.is_month_start

1858 array([False, False, True])

1859 >>> idx.is_month_end

1860 array([False, True, False])

1861 """

1862 is_month_start = _field_accessor(

1863 "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first")

1864 )

1865

1866 is_month_end = _field_accessor(

1867 "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last")

1868 )

1869

1870 is_quarter_start = _field_accessor(

1871 "is_quarter_start",

1872 "is_quarter_start",

1873 """

1874 Indicator for whether the date is the first day of a quarter.

1875

1876 Returns

1877 -------

1878 is_quarter_start : Series or DatetimeIndex

1879 The same type as the original data with boolean values. Series will

1880 have the same name and index. DatetimeIndex will have the same

1881 name.

1882

1883 See Also

1884 --------

1885 quarter : Return the quarter of the date.

1886 is_quarter_end : Similar property for indicating the quarter end.

1887

1888 Examples

1889 --------

1890 This method is available on Series with datetime values under

1891 the ``.dt`` accessor, and directly on DatetimeIndex.

1892

1893 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",

1894 ... periods=4)})

1895 >>> df.assign(quarter=df.dates.dt.quarter,

1896 ... is_quarter_start=df.dates.dt.is_quarter_start)

1897 dates quarter is_quarter_start

1898 0 2017-03-30 1 False

1899 1 2017-03-31 1 False

1900 2 2017-04-01 2 True

1901 3 2017-04-02 2 False

1902

1903 >>> idx = pd.date_range('2017-03-30', periods=4)

1904 >>> idx

1905 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],

1906 dtype='datetime64[ns]', freq='D')

1907

1908 >>> idx.is_quarter_start

1909 array([False, False, True, False])

1910 """,

1911 )

1912 is_quarter_end = _field_accessor(

1913 "is_quarter_end",

1914 "is_quarter_end",

1915 """

1916 Indicator for whether the date is the last day of a quarter.

1917

1918 Returns

1919 -------

1920 is_quarter_end : Series or DatetimeIndex

1921 The same type as the original data with boolean values. Series will

1922 have the same name and index. DatetimeIndex will have the same

1923 name.

1924

1925 See Also

1926 --------

1927 quarter : Return the quarter of the date.

1928 is_quarter_start : Similar property indicating the quarter start.

1929

1930 Examples

1931 --------

1932 This method is available on Series with datetime values under

1933 the ``.dt`` accessor, and directly on DatetimeIndex.

1934

1935 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",

1936 ... periods=4)})

1937 >>> df.assign(quarter=df.dates.dt.quarter,

1938 ... is_quarter_end=df.dates.dt.is_quarter_end)

1939 dates quarter is_quarter_end

1940 0 2017-03-30 1 False

1941 1 2017-03-31 1 True

1942 2 2017-04-01 2 False

1943 3 2017-04-02 2 False

1944

1945 >>> idx = pd.date_range('2017-03-30', periods=4)

1946 >>> idx

1947 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],

1948 dtype='datetime64[ns]', freq='D')

1949

1950 >>> idx.is_quarter_end

1951 array([False, True, False, False])

1952 """,

1953 )

1954 is_year_start = _field_accessor(

1955 "is_year_start",

1956 "is_year_start",

1957 """

1958 Indicate whether the date is the first day of a year.

1959

1960 Returns

1961 -------

1962 Series or DatetimeIndex

1963 The same type as the original data with boolean values. Series will

1964 have the same name and index. DatetimeIndex will have the same

1965 name.

1966

1967 See Also

1968 --------

1969 is_year_end : Similar property indicating the last day of the year.

1970

1971 Examples

1972 --------

1973 This method is available on Series with datetime values under

1974 the ``.dt`` accessor, and directly on DatetimeIndex.

1975

1976 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))

1977 >>> dates

1978 0 2017-12-30

1979 1 2017-12-31

1980 2 2018-01-01

1981 dtype: datetime64[ns]

1982

1983 >>> dates.dt.is_year_start

1984 0 False

1985 1 False

1986 2 True

1987 dtype: bool

1988

1989 >>> idx = pd.date_range("2017-12-30", periods=3)

1990 >>> idx

1991 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],

1992 dtype='datetime64[ns]', freq='D')

1993

1994 >>> idx.is_year_start

1995 array([False, False, True])

1996 """,

1997 )

1998 is_year_end = _field_accessor(

1999 "is_year_end",

2000 "is_year_end",

2001 """

2002 Indicate whether the date is the last day of the year.

2003

2004 Returns

2005 -------

2006 Series or DatetimeIndex

2007 The same type as the original data with boolean values. Series will

2008 have the same name and index. DatetimeIndex will have the same

2009 name.

2010

2011 See Also

2012 --------

2013 is_year_start : Similar property indicating the start of the year.

2014

2015 Examples

2016 --------

2017 This method is available on Series with datetime values under

2018 the ``.dt`` accessor, and directly on DatetimeIndex.

2019

2020 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))

2021 >>> dates

2022 0 2017-12-30

2023 1 2017-12-31

2024 2 2018-01-01

2025 dtype: datetime64[ns]

2026

2027 >>> dates.dt.is_year_end

2028 0 False

2029 1 True

2030 2 False

2031 dtype: bool

2032

2033 >>> idx = pd.date_range("2017-12-30", periods=3)

2034 >>> idx

2035 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],

2036 dtype='datetime64[ns]', freq='D')

2037

2038 >>> idx.is_year_end

2039 array([False, True, False])

2040 """,

2041 )

2042 is_leap_year = _field_accessor(

2043 "is_leap_year",

2044 "is_leap_year",

2045 """

2046 Boolean indicator if the date belongs to a leap year.

2047

2048 A leap year is a year, which has 366 days (instead of 365) including

2049 29th of February as an intercalary day.

2050 Leap years are years which are multiples of four with the exception

2051 of years divisible by 100 but not by 400.

2052

2053 Returns

2054 -------

2055 Series or ndarray

2056 Booleans indicating if dates belong to a leap year.

2057

2058 Examples

2059 --------

2060 This method is available on Series with datetime values under

2061 the ``.dt`` accessor, and directly on DatetimeIndex.

2062

2063 >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="YE")

2064 >>> idx

2065 DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],

2066 dtype='datetime64[ns]', freq='YE-DEC')

2067 >>> idx.is_leap_year

2068 array([ True, False, False])

2069

2070 >>> dates_series = pd.Series(idx)

2071 >>> dates_series

2072 0 2012-12-31

2073 1 2013-12-31

2074 2 2014-12-31

2075 dtype: datetime64[ns]

2076 >>> dates_series.dt.is_leap_year

2077 0 True

2078 1 False

2079 2 False

2080 dtype: bool

2081 """,

2082 )

2083

2084 def to_julian_date(self) -> npt.NDArray[np.float64]:

2085 """

2086 Convert Datetime Array to float64 ndarray of Julian Dates.

2087 0 Julian date is noon January 1, 4713 BC.

2088 https://en.wikipedia.org/wiki/Julian_day

2089 """

2090

2091 # http://mysite.verizon.net/aesir_research/date/jdalg2.htm

2092 year = np.asarray(self.year)

2093 month = np.asarray(self.month)

2094 day = np.asarray(self.day)

2095 testarr = month < 3

2096 year[testarr] -= 1

2097 month[testarr] += 12

2098 return (

2099 day

2100 + np.fix((153 * month - 457) / 5)

2101 + 365 * year

2102 + np.floor(year / 4)

2103 - np.floor(year / 100)

2104 + np.floor(year / 400)

2105 + 1_721_118.5

2106 + (

2107 self.hour

2108 + self.minute / 60

2109 + self.second / 3600

2110 + self.microsecond / 3600 / 10**6

2111 + self.nanosecond / 3600 / 10**9

2112 )

2113 / 24

2114 )

2115

2116 # -----------------------------------------------------------------

2117 # Reductions

2118

2119 def std(

2120 self,

2121 axis=None,

2122 dtype=None,

2123 out=None,

2124 ddof: int = 1,

2125 keepdims: bool = False,

2126 skipna: bool = True,

2127 ):

2128 """

2129 Return sample standard deviation over requested axis.

2130

2131 Normalized by `N-1` by default. This can be changed using ``ddof``.

2132

2133 Parameters

2134 ----------

2135 axis : int, optional

2136 Axis for the function to be applied on. For :class:`pandas.Series`

2137 this parameter is unused and defaults to ``None``.

2138 ddof : int, default 1

2139 Degrees of Freedom. The divisor used in calculations is `N - ddof`,

2140 where `N` represents the number of elements.

2141 skipna : bool, default True

2142 Exclude NA/null values. If an entire row/column is ``NA``, the result

2143 will be ``NA``.

2144

2145 Returns

2146 -------

2147 Timedelta

2148

2149 See Also

2150 --------

2151 numpy.ndarray.std : Returns the standard deviation of the array elements

2152 along given axis.

2153 Series.std : Return sample standard deviation over requested axis.

2154

2155 Examples

2156 --------

2157 For :class:`pandas.DatetimeIndex`:

2158

2159 >>> idx = pd.date_range('2001-01-01 00:00', periods=3)

2160 >>> idx

2161 DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'],

2162 dtype='datetime64[ns]', freq='D')

2163 >>> idx.std()

2164 Timedelta('1 days 00:00:00')

2165 """

2166 # Because std is translation-invariant, we can get self.std

2167 # by calculating (self - Timestamp(0)).std, and we can do it

2168 # without creating a copy by using a view on self._ndarray

2169 from pandas.core.arrays import TimedeltaArray

2170

2171 # Find the td64 dtype with the same resolution as our dt64 dtype

2172 dtype_str = self._ndarray.dtype.name.replace("datetime64", "timedelta64")

2173 dtype = np.dtype(dtype_str)

2174

2175 tda = TimedeltaArray._simple_new(self._ndarray.view(dtype), dtype=dtype)

2176

2177 return tda.std(axis=axis, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna)

2178

2179

2180# -------------------------------------------------------------------

2181# Constructor Helpers

2182

2183

2184def _sequence_to_dt64(

2185 data: ArrayLike,

2186 *,

2187 copy: bool = False,

2188 tz: tzinfo | None = None,

2189 dayfirst: bool = False,

2190 yearfirst: bool = False,

2191 ambiguous: TimeAmbiguous = "raise",

2192 out_unit: str | None = None,

2193):

2194 """

2195 Parameters

2196 ----------

2197 data : np.ndarray or ExtensionArray

2198 dtl.ensure_arraylike_for_datetimelike has already been called.

2199 copy : bool, default False

2200 tz : tzinfo or None, default None

2201 dayfirst : bool, default False

2202 yearfirst : bool, default False

2203 ambiguous : str, bool, or arraylike, default 'raise'

2204 See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.

2205 out_unit : str or None, default None

2206 Desired output resolution.

2207

2208 Returns

2209 -------

2210 result : numpy.ndarray

2211 The sequence converted to a numpy array with dtype ``datetime64[unit]``.

2212 Where `unit` is "ns" unless specified otherwise by `out_unit`.

2213 tz : tzinfo or None

2214 Either the user-provided tzinfo or one inferred from the data.

2215

2216 Raises

2217 ------

2218 TypeError : PeriodDType data is passed

2219 """

2220

2221 # By this point we are assured to have either a numpy array or Index

2222 data, copy = maybe_convert_dtype(data, copy, tz=tz)

2223 data_dtype = getattr(data, "dtype", None)

2224

2225 if out_unit is None:

2226 out_unit = "ns"

2227 out_dtype = np.dtype(f"M8[{out_unit}]")

2228

2229 if data_dtype == object or is_string_dtype(data_dtype):

2230 # TODO: We do not have tests specific to string-dtypes,

2231 # also complex or categorical or other extension

2232 data = cast(np.ndarray, data)

2233 copy = False

2234 if lib.infer_dtype(data, skipna=False) == "integer":

2235 # Much more performant than going through array_to_datetime

2236 data = data.astype(np.int64)

2237 elif tz is not None and ambiguous == "raise":

2238 obj_data = np.asarray(data, dtype=object)

2239 result = tslib.array_to_datetime_with_tz(

2240 obj_data,

2241 tz=tz,

2242 dayfirst=dayfirst,

2243 yearfirst=yearfirst,

2244 creso=abbrev_to_npy_unit(out_unit),

2245 )

2246 return result, tz

2247 else:

2248 converted, inferred_tz = objects_to_datetime64(

2249 data,

2250 dayfirst=dayfirst,

2251 yearfirst=yearfirst,

2252 allow_object=False,

2253 out_unit=out_unit or "ns",

2254 )

2255 copy = False

2256 if tz and inferred_tz:

2257 # two timezones: convert to intended from base UTC repr

2258 # GH#42505 by convention, these are _already_ UTC

2259 result = converted

2260

2261 elif inferred_tz:

2262 tz = inferred_tz

2263 result = converted

2264

2265 else:

2266 result, _ = _construct_from_dt64_naive(

2267 converted, tz=tz, copy=copy, ambiguous=ambiguous

2268 )

2269 return result, tz

2270

2271 data_dtype = data.dtype

2272

2273 # `data` may have originally been a Categorical[datetime64[ns, tz]],

2274 # so we need to handle these types.

2275 if isinstance(data_dtype, DatetimeTZDtype):

2276 # DatetimeArray -> ndarray

2277 data = cast(DatetimeArray, data)

2278 tz = _maybe_infer_tz(tz, data.tz)

2279 result = data._ndarray

2280

2281 elif lib.is_np_dtype(data_dtype, "M"):

2282 # tz-naive DatetimeArray or ndarray[datetime64]

2283 if isinstance(data, DatetimeArray):

2284 data = data._ndarray

2285

2286 data = cast(np.ndarray, data)

2287 result, copy = _construct_from_dt64_naive(

2288 data, tz=tz, copy=copy, ambiguous=ambiguous

2289 )

2290

2291 else:

2292 # must be integer dtype otherwise

2293 # assume this data are epoch timestamps

2294 if data.dtype != INT64_DTYPE:

2295 data = data.astype(np.int64, copy=False)

2296 copy = False

2297 data = cast(np.ndarray, data)

2298 result = data.view(out_dtype)

2299

2300 if copy:

2301 result = result.copy()

2302

2303 assert isinstance(result, np.ndarray), type(result)

2304 assert result.dtype.kind == "M"

2305 assert result.dtype != "M8"

2306 assert is_supported_dtype(result.dtype)

2307 return result, tz

2308

2309

2310def _construct_from_dt64_naive(

2311 data: np.ndarray, *, tz: tzinfo | None, copy: bool, ambiguous: TimeAmbiguous

2312) -> tuple[np.ndarray, bool]:

2313 """

2314 Convert datetime64 data to a supported dtype, localizing if necessary.

2315 """

2316 # Caller is responsible for ensuring

2317 # lib.is_np_dtype(data.dtype)

2318

2319 new_dtype = data.dtype

2320 if not is_supported_dtype(new_dtype):

2321 # Cast to the nearest supported unit, generally "s"

2322 new_dtype = get_supported_dtype(new_dtype)

2323 data = astype_overflowsafe(data, dtype=new_dtype, copy=False)

2324 copy = False

2325

2326 if data.dtype.byteorder == ">":

2327 # TODO: better way to handle this? non-copying alternative?

2328 # without this, test_constructor_datetime64_bigendian fails

2329 data = data.astype(data.dtype.newbyteorder("<"))

2330 new_dtype = data.dtype

2331 copy = False

2332

2333 if tz is not None:

2334 # Convert tz-naive to UTC

2335 # TODO: if tz is UTC, are there situations where we *don't* want a

2336 # copy? tz_localize_to_utc always makes one.

2337 shape = data.shape

2338 if data.ndim > 1:

2339 data = data.ravel()

2340

2341 data_unit = get_unit_from_dtype(new_dtype)

2342 data = tzconversion.tz_localize_to_utc(

2343 data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit

2344 )

2345 data = data.view(new_dtype)

2346 data = data.reshape(shape)

2347

2348 assert data.dtype == new_dtype, data.dtype

2349 result = data

2350

2351 return result, copy

2352

2353

2354def objects_to_datetime64(

2355 data: np.ndarray,

2356 dayfirst,

2357 yearfirst,

2358 utc: bool = False,

2359 errors: DateTimeErrorChoices = "raise",

2360 allow_object: bool = False,

2361 out_unit: str = "ns",

2362):

2363 """

2364 Convert data to array of timestamps.

2365

2366 Parameters

2367 ----------

2368 data : np.ndarray[object]

2369 dayfirst : bool

2370 yearfirst : bool

2371 utc : bool, default False

2372 Whether to convert/localize timestamps to UTC.

2373 errors : {'raise', 'ignore', 'coerce'}

2374 allow_object : bool

2375 Whether to return an object-dtype ndarray instead of raising if the

2376 data contains more than one timezone.

2377 out_unit : str, default "ns"

2378

2379 Returns

2380 -------

2381 result : ndarray

2382 np.datetime64[out_unit] if returned values represent wall times or UTC

2383 timestamps.

2384 object if mixed timezones

2385 inferred_tz : tzinfo or None

2386 If not None, then the datetime64 values in `result` denote UTC timestamps.

2387

2388 Raises

2389 ------

2390 ValueError : if data cannot be converted to datetimes

2391 TypeError : When a type cannot be converted to datetime

2392 """

2393 assert errors in ["raise", "ignore", "coerce"]

2394

2395 # if str-dtype, convert

2396 data = np.asarray(data, dtype=np.object_)

2397

2398 result, tz_parsed = tslib.array_to_datetime(

2399 data,

2400 errors=errors,

2401 utc=utc,

2402 dayfirst=dayfirst,

2403 yearfirst=yearfirst,

2404 creso=abbrev_to_npy_unit(out_unit),

2405 )

2406

2407 if tz_parsed is not None:

2408 # We can take a shortcut since the datetime64 numpy array

2409 # is in UTC

2410 return result, tz_parsed

2411 elif result.dtype.kind == "M":

2412 return result, tz_parsed

2413 elif result.dtype == object:

2414 # GH#23675 when called via `pd.to_datetime`, returning an object-dtype

2415 # array is allowed. When called via `pd.DatetimeIndex`, we can

2416 # only accept datetime64 dtype, so raise TypeError if object-dtype

2417 # is returned, as that indicates the values can be recognized as

2418 # datetimes but they have conflicting timezones/awareness

2419 if allow_object:

2420 return result, tz_parsed

2421 raise TypeError("DatetimeIndex has mixed timezones")

2422 else: # pragma: no cover

2423 # GH#23675 this TypeError should never be hit, whereas the TypeError

2424 # in the object-dtype branch above is reachable.

2425 raise TypeError(result)

2426

2427

2428def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None):

2429 """

2430 Convert data based on dtype conventions, issuing

2431 errors where appropriate.

2432

2433 Parameters

2434 ----------

2435 data : np.ndarray or pd.Index

2436 copy : bool

2437 tz : tzinfo or None, default None

2438

2439 Returns

2440 -------

2441 data : np.ndarray or pd.Index

2442 copy : bool

2443

2444 Raises

2445 ------

2446 TypeError : PeriodDType data is passed

2447 """

2448 if not hasattr(data, "dtype"):

2449 # e.g. collections.deque

2450 return data, copy

2451

2452 if is_float_dtype(data.dtype):

2453 # pre-2.0 we treated these as wall-times, inconsistent with ints

2454 # GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes.

2455 # Note: data.astype(np.int64) fails ARM tests, see

2456 # https://github.com/pandas-dev/pandas/issues/49468.

2457 data = data.astype(DT64NS_DTYPE).view("i8")

2458 copy = False

2459

2460 elif lib.is_np_dtype(data.dtype, "m") or is_bool_dtype(data.dtype):

2461 # GH#29794 enforcing deprecation introduced in GH#23539

2462 raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]")

2463 elif isinstance(data.dtype, PeriodDtype):

2464 # Note: without explicitly raising here, PeriodIndex

2465 # test_setops.test_join_does_not_recur fails

2466 raise TypeError(

2467 "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead"

2468 )

2469

2470 elif isinstance(data.dtype, ExtensionDtype) and not isinstance(

2471 data.dtype, DatetimeTZDtype

2472 ):

2473 # TODO: We have no tests for these

2474 data = np.array(data, dtype=np.object_)

2475 copy = False

2476

2477 return data, copy

2478

2479

2480# -------------------------------------------------------------------

2481# Validation and Inference

2482

2483

2484def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None:

2485 """

2486 If a timezone is inferred from data, check that it is compatible with

2487 the user-provided timezone, if any.

2488

2489 Parameters

2490 ----------

2491 tz : tzinfo or None

2492 inferred_tz : tzinfo or None

2493

2494 Returns

2495 -------

2496 tz : tzinfo or None

2497

2498 Raises

2499 ------

2500 TypeError : if both timezones are present but do not match

2501 """

2502 if tz is None:

2503 tz = inferred_tz

2504 elif inferred_tz is None:

2505 pass

2506 elif not timezones.tz_compare(tz, inferred_tz):

2507 raise TypeError(

2508 f"data is already tz-aware {inferred_tz}, unable to "

2509 f"set specified tz: {tz}"

2510 )

2511 return tz

2512

2513

2514def _validate_dt64_dtype(dtype):

2515 """

2516 Check that a dtype, if passed, represents either a numpy datetime64[ns]

2517 dtype or a pandas DatetimeTZDtype.

2518

2519 Parameters

2520 ----------

2521 dtype : object

2522

2523 Returns

2524 -------

2525 dtype : None, numpy.dtype, or DatetimeTZDtype

2526

2527 Raises

2528 ------

2529 ValueError : invalid dtype

2530

2531 Notes

2532 -----

2533 Unlike _validate_tz_from_dtype, this does _not_ allow non-existent

2534 tz errors to go through

2535 """

2536 if dtype is not None:

2537 dtype = pandas_dtype(dtype)

2538 if dtype == np.dtype("M8"):

2539 # no precision, disallowed GH#24806

2540 msg = (

2541 "Passing in 'datetime64' dtype with no precision is not allowed. "

2542 "Please pass in 'datetime64[ns]' instead."

2543 )

2544 raise ValueError(msg)

2545

2546 if (

2547 isinstance(dtype, np.dtype)

2548 and (dtype.kind != "M" or not is_supported_dtype(dtype))

2549 ) or not isinstance(dtype, (np.dtype, DatetimeTZDtype)):

2550 raise ValueError(

2551 f"Unexpected value for 'dtype': '{dtype}'. "

2552 "Must be 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', "

2553 "'datetime64[ns]' or DatetimeTZDtype'."

2554 )

2555

2556 if getattr(dtype, "tz", None):

2557 # https://github.com/pandas-dev/pandas/issues/18595

2558 # Ensure that we have a standard timezone for pytz objects.

2559 # Without this, things like adding an array of timedeltas and

2560 # a tz-aware Timestamp (with a tz specific to its datetime) will

2561 # be incorrect(ish?) for the array as a whole

2562 dtype = cast(DatetimeTZDtype, dtype)

2563 dtype = DatetimeTZDtype(

2564 unit=dtype.unit, tz=timezones.tz_standardize(dtype.tz)

2565 )

2566

2567 return dtype

2568

2569

2570def _validate_tz_from_dtype(

2571 dtype, tz: tzinfo | None, explicit_tz_none: bool = False

2572) -> tzinfo | None:

2573 """

2574 If the given dtype is a DatetimeTZDtype, extract the implied

2575 tzinfo object from it and check that it does not conflict with the given

2576 tz.

2577

2578 Parameters

2579 ----------

2580 dtype : dtype, str

2581 tz : None, tzinfo

2582 explicit_tz_none : bool, default False

2583 Whether tz=None was passed explicitly, as opposed to lib.no_default.

2584

2585 Returns

2586 -------

2587 tz : consensus tzinfo

2588

2589 Raises

2590 ------

2591 ValueError : on tzinfo mismatch

2592 """

2593 if dtype is not None:

2594 if isinstance(dtype, str):

2595 try:

2596 dtype = DatetimeTZDtype.construct_from_string(dtype)

2597 except TypeError:

2598 # Things like `datetime64[ns]`, which is OK for the

2599 # constructors, but also nonsense, which should be validated

2600 # but not by us. We *do* allow non-existent tz errors to

2601 # go through

2602 pass

2603 dtz = getattr(dtype, "tz", None)

2604 if dtz is not None:

2605 if tz is not None and not timezones.tz_compare(tz, dtz):

2606 raise ValueError("cannot supply both a tz and a dtype with a tz")

2607 if explicit_tz_none:

2608 raise ValueError("Cannot pass both a timezone-aware dtype and tz=None")

2609 tz = dtz

2610

2611 if tz is not None and lib.is_np_dtype(dtype, "M"):

2612 # We also need to check for the case where the user passed a

2613 # tz-naive dtype (i.e. datetime64[ns])

2614 if tz is not None and not timezones.tz_compare(tz, dtz):

2615 raise ValueError(

2616 "cannot supply both a tz and a "

2617 "timezone-naive dtype (i.e. datetime64[ns])"

2618 )

2619

2620 return tz

2621

2622

2623def _infer_tz_from_endpoints(

2624 start: Timestamp, end: Timestamp, tz: tzinfo | None

2625) -> tzinfo | None:

2626 """

2627 If a timezone is not explicitly given via `tz`, see if one can

2628 be inferred from the `start` and `end` endpoints. If more than one

2629 of these inputs provides a timezone, require that they all agree.

2630

2631 Parameters

2632 ----------

2633 start : Timestamp

2634 end : Timestamp

2635 tz : tzinfo or None

2636

2637 Returns

2638 -------

2639 tz : tzinfo or None

2640

2641 Raises

2642 ------

2643 TypeError : if start and end timezones do not agree

2644 """

2645 try:

2646 inferred_tz = timezones.infer_tzinfo(start, end)

2647 except AssertionError as err:

2648 # infer_tzinfo raises AssertionError if passed mismatched timezones

2649 raise TypeError(

2650 "Start and end cannot both be tz-aware with different timezones"

2651 ) from err

2652

2653 inferred_tz = timezones.maybe_get_tz(inferred_tz)

2654 tz = timezones.maybe_get_tz(tz)

2655

2656 if tz is not None and inferred_tz is not None:

2657 if not timezones.tz_compare(inferred_tz, tz):

2658 raise AssertionError("Inferred time zone not equal to passed time zone")

2659

2660 elif inferred_tz is not None:

2661 tz = inferred_tz

2662

2663 return tz

2664

2665

2666def _maybe_normalize_endpoints(

2667 start: Timestamp | None, end: Timestamp | None, normalize: bool

2668):

2669 if normalize:

2670 if start is not None:

2671 start = start.normalize()

2672

2673 if end is not None:

2674 end = end.normalize()

2675

2676 return start, end

2677

2678

2679def _maybe_localize_point(

2680 ts: Timestamp | None, freq, tz, ambiguous, nonexistent

2681) -> Timestamp | None:

2682 """

2683 Localize a start or end Timestamp to the timezone of the corresponding

2684 start or end Timestamp

2685

2686 Parameters

2687 ----------

2688 ts : start or end Timestamp to potentially localize

2689 freq : Tick, DateOffset, or None

2690 tz : str, timezone object or None

2691 ambiguous: str, localization behavior for ambiguous times

2692 nonexistent: str, localization behavior for nonexistent times

2693

2694 Returns

2695 -------

2696 ts : Timestamp

2697 """

2698 # Make sure start and end are timezone localized if:

2699 # 1) freq = a Timedelta-like frequency (Tick)

2700 # 2) freq = None i.e. generating a linspaced range

2701 if ts is not None and ts.tzinfo is None:

2702 # Note: We can't ambiguous='infer' a singular ambiguous time; however,

2703 # we have historically defaulted ambiguous=False

2704 ambiguous = ambiguous if ambiguous != "infer" else False

2705 localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None}

2706 if isinstance(freq, Tick) or freq is None:

2707 localize_args["tz"] = tz

2708 ts = ts.tz_localize(**localize_args)

2709 return ts

2710

2711

2712def _generate_range(

2713 start: Timestamp | None,

2714 end: Timestamp | None,

2715 periods: int | None,

2716 offset: BaseOffset,

2717 *,

2718 unit: str,

2719):

2720 """

2721 Generates a sequence of dates corresponding to the specified time

2722 offset. Similar to dateutil.rrule except uses pandas DateOffset

2723 objects to represent time increments.

2724

2725 Parameters

2726 ----------

2727 start : Timestamp or None

2728 end : Timestamp or None

2729 periods : int or None

2730 offset : DateOffset

2731 unit : str

2732

2733 Notes

2734 -----

2735 * This method is faster for generating weekdays than dateutil.rrule

2736 * At least two of (start, end, periods) must be specified.

2737 * If both start and end are specified, the returned dates will

2738 satisfy start <= date <= end.

2739

2740 Returns

2741 -------

2742 dates : generator object

2743 """

2744 offset = to_offset(offset)

2745

2746 # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";

2747 # expected "Union[integer[Any], float, str, date, datetime64]"

2748 start = Timestamp(start) # type: ignore[arg-type]

2749 if start is not NaT:

2750 start = start.as_unit(unit)

2751 else:

2752 start = None

2753

2754 # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";

2755 # expected "Union[integer[Any], float, str, date, datetime64]"

2756 end = Timestamp(end) # type: ignore[arg-type]

2757 if end is not NaT:

2758 end = end.as_unit(unit)

2759 else:

2760 end = None

2761

2762 if start and not offset.is_on_offset(start):

2763 # Incompatible types in assignment (expression has type "datetime",

2764 # variable has type "Optional[Timestamp]")

2765 start = offset.rollforward(start) # type: ignore[assignment]

2766

2767 elif end and not offset.is_on_offset(end):

2768 # Incompatible types in assignment (expression has type "datetime",

2769 # variable has type "Optional[Timestamp]")

2770 end = offset.rollback(end) # type: ignore[assignment]

2771

2772 # Unsupported operand types for < ("Timestamp" and "None")

2773 if periods is None and end < start and offset.n >= 0: # type: ignore[operator]

2774 end = None

2775 periods = 0

2776

2777 if end is None:

2778 # error: No overload variant of "__radd__" of "BaseOffset" matches

2779 # argument type "None"

2780 end = start + (periods - 1) * offset # type: ignore[operator]

2781

2782 if start is None:

2783 # error: No overload variant of "__radd__" of "BaseOffset" matches

2784 # argument type "None"

2785 start = end - (periods - 1) * offset # type: ignore[operator]

2786

2787 start = cast(Timestamp, start)

2788 end = cast(Timestamp, end)

2789

2790 cur = start

2791 if offset.n >= 0:

2792 while cur <= end:

2793 yield cur

2794

2795 if cur == end:

2796 # GH#24252 avoid overflows by not performing the addition

2797 # in offset.apply unless we have to

2798 break

2799

2800 # faster than cur + offset

2801 next_date = offset._apply(cur)

2802 next_date = next_date.as_unit(unit)

2803 if next_date <= cur:

2804 raise ValueError(f"Offset {offset} did not increment date")

2805 cur = next_date

2806 else:

2807 while cur >= end:

2808 yield cur

2809

2810 if cur == end:

2811 # GH#24252 avoid overflows by not performing the addition

2812 # in offset.apply unless we have to

2813 break

2814

2815 # faster than cur + offset

2816 next_date = offset._apply(cur)

2817 next_date = next_date.as_unit(unit)

2818 if next_date >= cur:

2819 raise ValueError(f"Offset {offset} did not decrement date")

2820 cur = next_date