Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

617 statements  

1from __future__ import annotations 

2 

3from datetime import ( 

4 datetime, 

5 time, 

6 timedelta, 

7 tzinfo, 

8) 

9from typing import ( 

10 TYPE_CHECKING, 

11 Iterator, 

12 cast, 

13) 

14import warnings 

15 

16import numpy as np 

17 

18from pandas._libs import ( 

19 lib, 

20 tslib, 

21) 

22from pandas._libs.tslibs import ( 

23 BaseOffset, 

24 NaT, 

25 NaTType, 

26 Resolution, 

27 Timestamp, 

28 astype_overflowsafe, 

29 fields, 

30 get_resolution, 

31 get_supported_reso, 

32 get_unit_from_dtype, 

33 ints_to_pydatetime, 

34 is_date_array_normalized, 

35 is_supported_unit, 

36 is_unitless, 

37 normalize_i8_timestamps, 

38 npy_unit_to_abbrev, 

39 timezones, 

40 to_offset, 

41 tz_convert_from_utc, 

42 tzconversion, 

43) 

44from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit 

45from pandas._typing import ( 

46 DateTimeErrorChoices, 

47 IntervalClosedType, 

48 TimeAmbiguous, 

49 TimeNonexistent, 

50 npt, 

51) 

52from pandas.errors import PerformanceWarning 

53from pandas.util._exceptions import find_stack_level 

54from pandas.util._validators import validate_inclusive 

55 

56from pandas.core.dtypes.common import ( 

57 DT64NS_DTYPE, 

58 INT64_DTYPE, 

59 is_bool_dtype, 

60 is_datetime64_any_dtype, 

61 is_datetime64_dtype, 

62 is_datetime64tz_dtype, 

63 is_dtype_equal, 

64 is_extension_array_dtype, 

65 is_float_dtype, 

66 is_object_dtype, 

67 is_period_dtype, 

68 is_sparse, 

69 is_string_dtype, 

70 is_timedelta64_dtype, 

71 pandas_dtype, 

72) 

73from pandas.core.dtypes.dtypes import ( 

74 DatetimeTZDtype, 

75 ExtensionDtype, 

76) 

77from pandas.core.dtypes.missing import isna 

78 

79from pandas.core.arrays import datetimelike as dtl 

80from pandas.core.arrays._ranges import generate_regular_range 

81import pandas.core.common as com 

82 

83from pandas.tseries.frequencies import get_period_alias 

84from pandas.tseries.offsets import ( 

85 Day, 

86 Tick, 

87) 

88 

89if TYPE_CHECKING: 

90 from pandas import DataFrame 

91 from pandas.core.arrays import PeriodArray 

92 

93_midnight = time(0, 0) 

94 

95 

96def tz_to_dtype(tz: tzinfo | None, unit: str = "ns"): 

97 """ 

98 Return a datetime64[ns] dtype appropriate for the given timezone. 

99 

100 Parameters 

101 ---------- 

102 tz : tzinfo or None 

103 unit : str, default "ns" 

104 

105 Returns 

106 ------- 

107 np.dtype or Datetime64TZDType 

108 """ 

109 if tz is None: 

110 return np.dtype(f"M8[{unit}]") 

111 else: 

112 return DatetimeTZDtype(tz=tz, unit=unit) 

113 

114 

115def _field_accessor(name: str, field: str, docstring=None): 

116 def f(self): 

117 values = self._local_timestamps() 

118 

119 if field in self._bool_ops: 

120 result: np.ndarray 

121 

122 if field.endswith(("start", "end")): 

123 freq = self.freq 

124 month_kw = 12 

125 if freq: 

126 kwds = freq.kwds 

127 month_kw = kwds.get("startingMonth", kwds.get("month", 12)) 

128 

129 result = fields.get_start_end_field( 

130 values, field, self.freqstr, month_kw, reso=self._creso 

131 ) 

132 else: 

133 result = fields.get_date_field(values, field, reso=self._creso) 

134 

135 # these return a boolean by-definition 

136 return result 

137 

138 if field in self._object_ops: 

139 result = fields.get_date_name_field(values, field, reso=self._creso) 

140 result = self._maybe_mask_results(result, fill_value=None) 

141 

142 else: 

143 result = fields.get_date_field(values, field, reso=self._creso) 

144 result = self._maybe_mask_results( 

145 result, fill_value=None, convert="float64" 

146 ) 

147 

148 return result 

149 

150 f.__name__ = name 

151 f.__doc__ = docstring 

152 return property(f) 

153 

154 

155class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): 

156 """ 

157 Pandas ExtensionArray for tz-naive or tz-aware datetime data. 

158 

159 .. warning:: 

160 

161 DatetimeArray is currently experimental, and its API may change 

162 without warning. In particular, :attr:`DatetimeArray.dtype` is 

163 expected to change to always be an instance of an ``ExtensionDtype`` 

164 subclass. 

165 

166 Parameters 

167 ---------- 

168 values : Series, Index, DatetimeArray, ndarray 

169 The datetime data. 

170 

171 For DatetimeArray `values` (or a Series or Index boxing one), 

172 `dtype` and `freq` will be extracted from `values`. 

173 

174 dtype : numpy.dtype or DatetimeTZDtype 

175 Note that the only NumPy dtype allowed is 'datetime64[ns]'. 

176 freq : str or Offset, optional 

177 The frequency. 

178 copy : bool, default False 

179 Whether to copy the underlying array of values. 

180 

181 Attributes 

182 ---------- 

183 None 

184 

185 Methods 

186 ------- 

187 None 

188 """ 

189 

190 _typ = "datetimearray" 

191 _internal_fill_value = np.datetime64("NaT", "ns") 

192 _recognized_scalars = (datetime, np.datetime64) 

193 _is_recognized_dtype = is_datetime64_any_dtype 

194 _infer_matches = ("datetime", "datetime64", "date") 

195 

196 @property 

197 def _scalar_type(self) -> type[Timestamp]: 

198 return Timestamp 

199 

200 # define my properties & methods for delegation 

201 _bool_ops: list[str] = [ 

202 "is_month_start", 

203 "is_month_end", 

204 "is_quarter_start", 

205 "is_quarter_end", 

206 "is_year_start", 

207 "is_year_end", 

208 "is_leap_year", 

209 ] 

210 _object_ops: list[str] = ["freq", "tz"] 

211 _field_ops: list[str] = [ 

212 "year", 

213 "month", 

214 "day", 

215 "hour", 

216 "minute", 

217 "second", 

218 "weekday", 

219 "dayofweek", 

220 "day_of_week", 

221 "dayofyear", 

222 "day_of_year", 

223 "quarter", 

224 "days_in_month", 

225 "daysinmonth", 

226 "microsecond", 

227 "nanosecond", 

228 ] 

229 _other_ops: list[str] = ["date", "time", "timetz"] 

230 _datetimelike_ops: list[str] = ( 

231 _field_ops + _object_ops + _bool_ops + _other_ops + ["unit"] 

232 ) 

233 _datetimelike_methods: list[str] = [ 

234 "to_period", 

235 "tz_localize", 

236 "tz_convert", 

237 "normalize", 

238 "strftime", 

239 "round", 

240 "floor", 

241 "ceil", 

242 "month_name", 

243 "day_name", 

244 "as_unit", 

245 ] 

246 

247 # ndim is inherited from ExtensionArray, must exist to ensure 

248 # Timestamp.__richcmp__(DateTimeArray) operates pointwise 

249 

250 # ensure that operations with numpy arrays defer to our implementation 

251 __array_priority__ = 1000 

252 

253 # ----------------------------------------------------------------- 

254 # Constructors 

255 

256 _dtype: np.dtype | DatetimeTZDtype 

257 _freq: BaseOffset | None = None 

258 _default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__ 

259 

260 @classmethod 

261 def _validate_dtype(cls, values, dtype): 

262 # used in TimeLikeOps.__init__ 

263 _validate_dt64_dtype(values.dtype) 

264 dtype = _validate_dt64_dtype(dtype) 

265 return dtype 

266 

267 # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked" 

268 @classmethod 

269 def _simple_new( # type: ignore[override] 

270 cls, 

271 values: np.ndarray, 

272 freq: BaseOffset | None = None, 

273 dtype=DT64NS_DTYPE, 

274 ) -> DatetimeArray: 

275 assert isinstance(values, np.ndarray) 

276 assert dtype.kind == "M" 

277 if isinstance(dtype, np.dtype): 

278 assert dtype == values.dtype 

279 assert not is_unitless(dtype) 

280 else: 

281 # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC], 

282 # then values.dtype should be M8[us]. 

283 assert dtype._creso == get_unit_from_dtype(values.dtype) 

284 

285 result = super()._simple_new(values, dtype) 

286 result._freq = freq 

287 return result 

288 

289 @classmethod 

290 def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False): 

291 return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy) 

292 

293 @classmethod 

294 def _from_sequence_not_strict( 

295 cls, 

296 data, 

297 *, 

298 dtype=None, 

299 copy: bool = False, 

300 tz=lib.no_default, 

301 freq: str | BaseOffset | lib.NoDefault | None = lib.no_default, 

302 dayfirst: bool = False, 

303 yearfirst: bool = False, 

304 ambiguous: TimeAmbiguous = "raise", 

305 ): 

306 """ 

307 A non-strict version of _from_sequence, called from DatetimeIndex.__new__. 

308 """ 

309 explicit_none = freq is None 

310 freq = freq if freq is not lib.no_default else None 

311 freq, freq_infer = dtl.maybe_infer_freq(freq) 

312 

313 # if the user either explicitly passes tz=None or a tz-naive dtype, we 

314 # disallows inferring a tz. 

315 explicit_tz_none = tz is None 

316 if tz is lib.no_default: 

317 tz = None 

318 else: 

319 tz = timezones.maybe_get_tz(tz) 

320 

321 dtype = _validate_dt64_dtype(dtype) 

322 # if dtype has an embedded tz, capture it 

323 tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none) 

324 

325 unit = None 

326 if dtype is not None: 

327 if isinstance(dtype, np.dtype): 

328 unit = np.datetime_data(dtype)[0] 

329 else: 

330 # DatetimeTZDtype 

331 unit = dtype.unit 

332 

333 subarr, tz, inferred_freq = _sequence_to_dt64ns( 

334 data, 

335 copy=copy, 

336 tz=tz, 

337 dayfirst=dayfirst, 

338 yearfirst=yearfirst, 

339 ambiguous=ambiguous, 

340 out_unit=unit, 

341 ) 

342 # We have to call this again after possibly inferring a tz above 

343 _validate_tz_from_dtype(dtype, tz, explicit_tz_none) 

344 if tz is not None and explicit_tz_none: 

345 raise ValueError( 

346 "Passed data is timezone-aware, incompatible with 'tz=None'. " 

347 "Use obj.tz_localize(None) instead." 

348 ) 

349 

350 freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) 

351 if explicit_none: 

352 freq = None 

353 

354 data_unit = np.datetime_data(subarr.dtype)[0] 

355 data_dtype = tz_to_dtype(tz, data_unit) 

356 result = cls._simple_new(subarr, freq=freq, dtype=data_dtype) 

357 if unit is not None and unit != result.unit: 

358 # If unit was specified in user-passed dtype, cast to it here 

359 result = result.as_unit(unit) 

360 

361 if inferred_freq is None and freq is not None: 

362 # this condition precludes `freq_infer` 

363 cls._validate_frequency(result, freq, ambiguous=ambiguous) 

364 

365 elif freq_infer: 

366 # Set _freq directly to bypass duplicative _validate_frequency 

367 # check. 

368 result._freq = to_offset(result.inferred_freq) 

369 

370 return result 

371 

372 # error: Signature of "_generate_range" incompatible with supertype 

373 # "DatetimeLikeArrayMixin" 

374 @classmethod 

375 def _generate_range( # type: ignore[override] 

376 cls, 

377 start, 

378 end, 

379 periods, 

380 freq, 

381 tz=None, 

382 normalize: bool = False, 

383 ambiguous: TimeAmbiguous = "raise", 

384 nonexistent: TimeNonexistent = "raise", 

385 inclusive: IntervalClosedType = "both", 

386 *, 

387 unit: str | None = None, 

388 ) -> DatetimeArray: 

389 periods = dtl.validate_periods(periods) 

390 if freq is None and any(x is None for x in [periods, start, end]): 

391 raise ValueError("Must provide freq argument if no data is supplied") 

392 

393 if com.count_not_none(start, end, periods, freq) != 3: 

394 raise ValueError( 

395 "Of the four parameters: start, end, periods, " 

396 "and freq, exactly three must be specified" 

397 ) 

398 freq = to_offset(freq) 

399 

400 if start is not None: 

401 start = Timestamp(start) 

402 

403 if end is not None: 

404 end = Timestamp(end) 

405 

406 if start is NaT or end is NaT: 

407 raise ValueError("Neither `start` nor `end` can be NaT") 

408 

409 if unit is not None: 

410 if unit not in ["s", "ms", "us", "ns"]: 

411 raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'") 

412 else: 

413 unit = "ns" 

414 

415 if start is not None and unit is not None: 

416 start = start.as_unit(unit, round_ok=False) 

417 if end is not None and unit is not None: 

418 end = end.as_unit(unit, round_ok=False) 

419 

420 left_inclusive, right_inclusive = validate_inclusive(inclusive) 

421 start, end = _maybe_normalize_endpoints(start, end, normalize) 

422 tz = _infer_tz_from_endpoints(start, end, tz) 

423 

424 if tz is not None: 

425 # Localize the start and end arguments 

426 start_tz = None if start is None else start.tz 

427 end_tz = None if end is None else end.tz 

428 start = _maybe_localize_point( 

429 start, start_tz, start, freq, tz, ambiguous, nonexistent 

430 ) 

431 end = _maybe_localize_point( 

432 end, end_tz, end, freq, tz, ambiguous, nonexistent 

433 ) 

434 

435 if freq is not None: 

436 # We break Day arithmetic (fixed 24 hour) here and opt for 

437 # Day to mean calendar day (23/24/25 hour). Therefore, strip 

438 # tz info from start and day to avoid DST arithmetic 

439 if isinstance(freq, Day): 

440 if start is not None: 

441 start = start.tz_localize(None) 

442 if end is not None: 

443 end = end.tz_localize(None) 

444 

445 if isinstance(freq, Tick): 

446 i8values = generate_regular_range(start, end, periods, freq, unit=unit) 

447 else: 

448 xdr = _generate_range( 

449 start=start, end=end, periods=periods, offset=freq, unit=unit 

450 ) 

451 i8values = np.array([x._value for x in xdr], dtype=np.int64) 

452 

453 endpoint_tz = start.tz if start is not None else end.tz 

454 

455 if tz is not None and endpoint_tz is None: 

456 if not timezones.is_utc(tz): 

457 # short-circuit tz_localize_to_utc which would make 

458 # an unnecessary copy with UTC but be a no-op. 

459 creso = abbrev_to_npy_unit(unit) 

460 i8values = tzconversion.tz_localize_to_utc( 

461 i8values, 

462 tz, 

463 ambiguous=ambiguous, 

464 nonexistent=nonexistent, 

465 creso=creso, 

466 ) 

467 

468 # i8values is localized datetime64 array -> have to convert 

469 # start/end as well to compare 

470 if start is not None: 

471 start = start.tz_localize(tz, ambiguous, nonexistent) 

472 if end is not None: 

473 end = end.tz_localize(tz, ambiguous, nonexistent) 

474 else: 

475 # Create a linearly spaced date_range in local time 

476 # Nanosecond-granularity timestamps aren't always correctly 

477 # representable with doubles, so we limit the range that we 

478 # pass to np.linspace as much as possible 

479 i8values = ( 

480 np.linspace(0, end._value - start._value, periods, dtype="int64") 

481 + start._value 

482 ) 

483 if i8values.dtype != "i8": 

484 # 2022-01-09 I (brock) am not sure if it is possible for this 

485 # to overflow and cast to e.g. f8, but if it does we need to cast 

486 i8values = i8values.astype("i8") 

487 

488 if start == end: 

489 if not left_inclusive and not right_inclusive: 

490 i8values = i8values[1:-1] 

491 else: 

492 start_i8 = Timestamp(start)._value 

493 end_i8 = Timestamp(end)._value 

494 if not left_inclusive or not right_inclusive: 

495 if not left_inclusive and len(i8values) and i8values[0] == start_i8: 

496 i8values = i8values[1:] 

497 if not right_inclusive and len(i8values) and i8values[-1] == end_i8: 

498 i8values = i8values[:-1] 

499 

500 dt64_values = i8values.view(f"datetime64[{unit}]") 

501 dtype = tz_to_dtype(tz, unit=unit) 

502 return cls._simple_new(dt64_values, freq=freq, dtype=dtype) 

503 

504 # ----------------------------------------------------------------- 

505 # DatetimeLike Interface 

506 

507 def _unbox_scalar(self, value) -> np.datetime64: 

508 if not isinstance(value, self._scalar_type) and value is not NaT: 

509 raise ValueError("'value' should be a Timestamp.") 

510 self._check_compatible_with(value) 

511 if value is NaT: 

512 return np.datetime64(value._value, self.unit) 

513 else: 

514 return value.as_unit(self.unit).asm8 

515 

516 def _scalar_from_string(self, value) -> Timestamp | NaTType: 

517 return Timestamp(value, tz=self.tz) 

518 

519 def _check_compatible_with(self, other) -> None: 

520 if other is NaT: 

521 return 

522 self._assert_tzawareness_compat(other) 

523 

524 # ----------------------------------------------------------------- 

525 # Descriptive Properties 

526 

527 def _box_func(self, x: np.datetime64) -> Timestamp | NaTType: 

528 # GH#42228 

529 value = x.view("i8") 

530 ts = Timestamp._from_value_and_reso(value, reso=self._creso, tz=self.tz) 

531 return ts 

532 

533 @property 

534 # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype" 

535 # incompatible with return type "ExtensionDtype" in supertype 

536 # "ExtensionArray" 

537 def dtype(self) -> np.dtype | DatetimeTZDtype: # type: ignore[override] 

538 """ 

539 The dtype for the DatetimeArray. 

540 

541 .. warning:: 

542 

543 A future version of pandas will change dtype to never be a 

544 ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will 

545 always be an instance of an ``ExtensionDtype`` subclass. 

546 

547 Returns 

548 ------- 

549 numpy.dtype or DatetimeTZDtype 

550 If the values are tz-naive, then ``np.dtype('datetime64[ns]')`` 

551 is returned. 

552 

553 If the values are tz-aware, then the ``DatetimeTZDtype`` 

554 is returned. 

555 """ 

556 return self._dtype 

557 

558 @property 

559 def tz(self) -> tzinfo | None: 

560 """ 

561 Return the timezone. 

562 

563 Returns 

564 ------- 

565 datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None 

566 Returns None when the array is tz-naive. 

567 """ 

568 # GH 18595 

569 return getattr(self.dtype, "tz", None) 

570 

571 @tz.setter 

572 def tz(self, value): 

573 # GH 3746: Prevent localizing or converting the index by setting tz 

574 raise AttributeError( 

575 "Cannot directly set timezone. Use tz_localize() " 

576 "or tz_convert() as appropriate" 

577 ) 

578 

579 @property 

580 def tzinfo(self) -> tzinfo | None: 

581 """ 

582 Alias for tz attribute 

583 """ 

584 return self.tz 

585 

586 @property # NB: override with cache_readonly in immutable subclasses 

587 def is_normalized(self) -> bool: 

588 """ 

589 Returns True if all of the dates are at midnight ("no time") 

590 """ 

591 return is_date_array_normalized(self.asi8, self.tz, reso=self._creso) 

592 

593 @property # NB: override with cache_readonly in immutable subclasses 

594 def _resolution_obj(self) -> Resolution: 

595 return get_resolution(self.asi8, self.tz, reso=self._creso) 

596 

597 # ---------------------------------------------------------------- 

598 # Array-Like / EA-Interface Methods 

599 

600 def __array__(self, dtype=None) -> np.ndarray: 

601 if dtype is None and self.tz: 

602 # The default for tz-aware is object, to preserve tz info 

603 dtype = object 

604 

605 return super().__array__(dtype=dtype) 

606 

607 def __iter__(self) -> Iterator: 

608 """ 

609 Return an iterator over the boxed values 

610 

611 Yields 

612 ------ 

613 tstamp : Timestamp 

614 """ 

615 if self.ndim > 1: 

616 for i in range(len(self)): 

617 yield self[i] 

618 else: 

619 # convert in chunks of 10k for efficiency 

620 data = self.asi8 

621 length = len(self) 

622 chunksize = 10000 

623 chunks = (length // chunksize) + 1 

624 

625 for i in range(chunks): 

626 start_i = i * chunksize 

627 end_i = min((i + 1) * chunksize, length) 

628 converted = ints_to_pydatetime( 

629 data[start_i:end_i], 

630 tz=self.tz, 

631 box="timestamp", 

632 reso=self._creso, 

633 ) 

634 yield from converted 

635 

636 def astype(self, dtype, copy: bool = True): 

637 # We handle 

638 # --> datetime 

639 # --> period 

640 # DatetimeLikeArrayMixin Super handles the rest. 

641 dtype = pandas_dtype(dtype) 

642 

643 if is_dtype_equal(dtype, self.dtype): 

644 if copy: 

645 return self.copy() 

646 return self 

647 

648 elif isinstance(dtype, ExtensionDtype): 

649 if not isinstance(dtype, DatetimeTZDtype): 

650 # e.g. Sparse[datetime64[ns]] 

651 return super().astype(dtype, copy=copy) 

652 elif self.tz is None: 

653 # pre-2.0 this did self.tz_localize(dtype.tz), which did not match 

654 # the Series behavior which did 

655 # values.tz_localize("UTC").tz_convert(dtype.tz) 

656 raise TypeError( 

657 "Cannot use .astype to convert from timezone-naive dtype to " 

658 "timezone-aware dtype. Use obj.tz_localize instead or " 

659 "series.dt.tz_localize instead" 

660 ) 

661 else: 

662 # tzaware unit conversion e.g. datetime64[s, UTC] 

663 np_dtype = np.dtype(dtype.str) 

664 res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy) 

665 return type(self)._simple_new(res_values, dtype=dtype, freq=self.freq) 

666 

667 elif ( 

668 self.tz is None 

669 and is_datetime64_dtype(dtype) 

670 and not is_unitless(dtype) 

671 and is_supported_unit(get_unit_from_dtype(dtype)) 

672 ): 

673 # unit conversion e.g. datetime64[s] 

674 res_values = astype_overflowsafe(self._ndarray, dtype, copy=True) 

675 return type(self)._simple_new(res_values, dtype=res_values.dtype) 

676 # TODO: preserve freq? 

677 

678 elif self.tz is not None and is_datetime64_dtype(dtype): 

679 # pre-2.0 behavior for DTA/DTI was 

680 # values.tz_convert("UTC").tz_localize(None), which did not match 

681 # the Series behavior 

682 raise TypeError( 

683 "Cannot use .astype to convert from timezone-aware dtype to " 

684 "timezone-naive dtype. Use obj.tz_localize(None) or " 

685 "obj.tz_convert('UTC').tz_localize(None) instead." 

686 ) 

687 

688 elif ( 

689 self.tz is None 

690 and is_datetime64_dtype(dtype) 

691 and dtype != self.dtype 

692 and is_unitless(dtype) 

693 ): 

694 raise TypeError( 

695 "Casting to unit-less dtype 'datetime64' is not supported. " 

696 "Pass e.g. 'datetime64[ns]' instead." 

697 ) 

698 

699 elif is_period_dtype(dtype): 

700 return self.to_period(freq=dtype.freq) 

701 return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy) 

702 

703 # ----------------------------------------------------------------- 

704 # Rendering Methods 

705 

706 def _format_native_types( 

707 self, *, na_rep: str | float = "NaT", date_format=None, **kwargs 

708 ) -> npt.NDArray[np.object_]: 

709 from pandas.io.formats.format import get_format_datetime64_from_values 

710 

711 fmt = get_format_datetime64_from_values(self, date_format) 

712 

713 return tslib.format_array_from_datetime( 

714 self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._creso 

715 ) 

716 

717 # ----------------------------------------------------------------- 

718 # Comparison Methods 

719 

720 def _has_same_tz(self, other) -> bool: 

721 # vzone shouldn't be None if value is non-datetime like 

722 if isinstance(other, np.datetime64): 

723 # convert to Timestamp as np.datetime64 doesn't have tz attr 

724 other = Timestamp(other) 

725 

726 if not hasattr(other, "tzinfo"): 

727 return False 

728 other_tz = other.tzinfo 

729 return timezones.tz_compare(self.tzinfo, other_tz) 

730 

731 def _assert_tzawareness_compat(self, other) -> None: 

732 # adapted from _Timestamp._assert_tzawareness_compat 

733 other_tz = getattr(other, "tzinfo", None) 

734 other_dtype = getattr(other, "dtype", None) 

735 

736 if is_datetime64tz_dtype(other_dtype): 

737 # Get tzinfo from Series dtype 

738 other_tz = other.dtype.tz 

739 if other is NaT: 

740 # pd.NaT quacks both aware and naive 

741 pass 

742 elif self.tz is None: 

743 if other_tz is not None: 

744 raise TypeError( 

745 "Cannot compare tz-naive and tz-aware datetime-like objects." 

746 ) 

747 elif other_tz is None: 

748 raise TypeError( 

749 "Cannot compare tz-naive and tz-aware datetime-like objects" 

750 ) 

751 

752 # ----------------------------------------------------------------- 

753 # Arithmetic Methods 

754 

755 def _add_offset(self, offset) -> DatetimeArray: 

756 assert not isinstance(offset, Tick) 

757 

758 if self.tz is not None: 

759 values = self.tz_localize(None) 

760 else: 

761 values = self 

762 

763 try: 

764 result = offset._apply_array(values).view(values.dtype) 

765 except NotImplementedError: 

766 warnings.warn( 

767 "Non-vectorized DateOffset being applied to Series or DatetimeIndex.", 

768 PerformanceWarning, 

769 stacklevel=find_stack_level(), 

770 ) 

771 result = self.astype("O") + offset 

772 result = type(self)._from_sequence(result).as_unit(self.unit) 

773 if not len(self): 

774 # GH#30336 _from_sequence won't be able to infer self.tz 

775 return result.tz_localize(self.tz) 

776 

777 else: 

778 result = DatetimeArray._simple_new(result, dtype=result.dtype) 

779 if self.tz is not None: 

780 result = result.tz_localize(self.tz) 

781 

782 return result 

783 

784 # ----------------------------------------------------------------- 

785 # Timezone Conversion and Localization Methods 

786 

787 def _local_timestamps(self) -> npt.NDArray[np.int64]: 

788 """ 

789 Convert to an i8 (unix-like nanosecond timestamp) representation 

790 while keeping the local timezone and not using UTC. 

791 This is used to calculate time-of-day information as if the timestamps 

792 were timezone-naive. 

793 """ 

794 if self.tz is None or timezones.is_utc(self.tz): 

795 # Avoid the copy that would be made in tzconversion 

796 return self.asi8 

797 return tz_convert_from_utc(self.asi8, self.tz, reso=self._creso) 

798 

799 def tz_convert(self, tz) -> DatetimeArray: 

800 """ 

801 Convert tz-aware Datetime Array/Index from one time zone to another. 

802 

803 Parameters 

804 ---------- 

805 tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None 

806 Time zone for time. Corresponding timestamps would be converted 

807 to this time zone of the Datetime Array/Index. A `tz` of None will 

808 convert to UTC and remove the timezone information. 

809 

810 Returns 

811 ------- 

812 Array or Index 

813 

814 Raises 

815 ------ 

816 TypeError 

817 If Datetime Array/Index is tz-naive. 

818 

819 See Also 

820 -------- 

821 DatetimeIndex.tz : A timezone that has a variable offset from UTC. 

822 DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a 

823 given time zone, or remove timezone from a tz-aware DatetimeIndex. 

824 

825 Examples 

826 -------- 

827 With the `tz` parameter, we can change the DatetimeIndex 

828 to other time zones: 

829 

830 >>> dti = pd.date_range(start='2014-08-01 09:00', 

831 ... freq='H', periods=3, tz='Europe/Berlin') 

832 

833 >>> dti 

834 DatetimeIndex(['2014-08-01 09:00:00+02:00', 

835 '2014-08-01 10:00:00+02:00', 

836 '2014-08-01 11:00:00+02:00'], 

837 dtype='datetime64[ns, Europe/Berlin]', freq='H') 

838 

839 >>> dti.tz_convert('US/Central') 

840 DatetimeIndex(['2014-08-01 02:00:00-05:00', 

841 '2014-08-01 03:00:00-05:00', 

842 '2014-08-01 04:00:00-05:00'], 

843 dtype='datetime64[ns, US/Central]', freq='H') 

844 

845 With the ``tz=None``, we can remove the timezone (after converting 

846 to UTC if necessary): 

847 

848 >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H', 

849 ... periods=3, tz='Europe/Berlin') 

850 

851 >>> dti 

852 DatetimeIndex(['2014-08-01 09:00:00+02:00', 

853 '2014-08-01 10:00:00+02:00', 

854 '2014-08-01 11:00:00+02:00'], 

855 dtype='datetime64[ns, Europe/Berlin]', freq='H') 

856 

857 >>> dti.tz_convert(None) 

858 DatetimeIndex(['2014-08-01 07:00:00', 

859 '2014-08-01 08:00:00', 

860 '2014-08-01 09:00:00'], 

861 dtype='datetime64[ns]', freq='H') 

862 """ 

863 tz = timezones.maybe_get_tz(tz) 

864 

865 if self.tz is None: 

866 # tz naive, use tz_localize 

867 raise TypeError( 

868 "Cannot convert tz-naive timestamps, use tz_localize to localize" 

869 ) 

870 

871 # No conversion since timestamps are all UTC to begin with 

872 dtype = tz_to_dtype(tz, unit=self.unit) 

873 return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq) 

874 

875 @dtl.ravel_compat 

876 def tz_localize( 

877 self, 

878 tz, 

879 ambiguous: TimeAmbiguous = "raise", 

880 nonexistent: TimeNonexistent = "raise", 

881 ) -> DatetimeArray: 

882 """ 

883 Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. 

884 

885 This method takes a time zone (tz) naive Datetime Array/Index object 

886 and makes this time zone aware. It does not move the time to another 

887 time zone. 

888 

889 This method can also be used to do the inverse -- to create a time 

890 zone unaware object from an aware object. To that end, pass `tz=None`. 

891 

892 Parameters 

893 ---------- 

894 tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None 

895 Time zone to convert timestamps to. Passing ``None`` will 

896 remove the time zone information preserving local time. 

897 ambiguous : 'infer', 'NaT', bool array, default 'raise' 

898 When clocks moved backward due to DST, ambiguous times may arise. 

899 For example in Central European Time (UTC+01), when going from 

900 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at 

901 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the 

902 `ambiguous` parameter dictates how ambiguous times should be 

903 handled. 

904 

905 - 'infer' will attempt to infer fall dst-transition hours based on 

906 order 

907 - bool-ndarray where True signifies a DST time, False signifies a 

908 non-DST time (note that this flag is only applicable for 

909 ambiguous times) 

910 - 'NaT' will return NaT where there are ambiguous times 

911 - 'raise' will raise an AmbiguousTimeError if there are ambiguous 

912 times. 

913 

914 nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ 

915default 'raise' 

916 A nonexistent time does not exist in a particular timezone 

917 where clocks moved forward due to DST. 

918 

919 - 'shift_forward' will shift the nonexistent time forward to the 

920 closest existing time 

921 - 'shift_backward' will shift the nonexistent time backward to the 

922 closest existing time 

923 - 'NaT' will return NaT where there are nonexistent times 

924 - timedelta objects will shift nonexistent times by the timedelta 

925 - 'raise' will raise an NonExistentTimeError if there are 

926 nonexistent times. 

927 

928 Returns 

929 ------- 

930 Same type as self 

931 Array/Index converted to the specified time zone. 

932 

933 Raises 

934 ------ 

935 TypeError 

936 If the Datetime Array/Index is tz-aware and tz is not None. 

937 

938 See Also 

939 -------- 

940 DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from 

941 one time zone to another. 

942 

943 Examples 

944 -------- 

945 >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3) 

946 >>> tz_naive 

947 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', 

948 '2018-03-03 09:00:00'], 

949 dtype='datetime64[ns]', freq='D') 

950 

951 Localize DatetimeIndex in US/Eastern time zone: 

952 

953 >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern') 

954 >>> tz_aware 

955 DatetimeIndex(['2018-03-01 09:00:00-05:00', 

956 '2018-03-02 09:00:00-05:00', 

957 '2018-03-03 09:00:00-05:00'], 

958 dtype='datetime64[ns, US/Eastern]', freq=None) 

959 

960 With the ``tz=None``, we can remove the time zone information 

961 while keeping the local time (not converted to UTC): 

962 

963 >>> tz_aware.tz_localize(None) 

964 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', 

965 '2018-03-03 09:00:00'], 

966 dtype='datetime64[ns]', freq=None) 

967 

968 Be careful with DST changes. When there is sequential data, pandas can 

969 infer the DST time: 

970 

971 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', 

972 ... '2018-10-28 02:00:00', 

973 ... '2018-10-28 02:30:00', 

974 ... '2018-10-28 02:00:00', 

975 ... '2018-10-28 02:30:00', 

976 ... '2018-10-28 03:00:00', 

977 ... '2018-10-28 03:30:00'])) 

978 >>> s.dt.tz_localize('CET', ambiguous='infer') 

979 0 2018-10-28 01:30:00+02:00 

980 1 2018-10-28 02:00:00+02:00 

981 2 2018-10-28 02:30:00+02:00 

982 3 2018-10-28 02:00:00+01:00 

983 4 2018-10-28 02:30:00+01:00 

984 5 2018-10-28 03:00:00+01:00 

985 6 2018-10-28 03:30:00+01:00 

986 dtype: datetime64[ns, CET] 

987 

988 In some cases, inferring the DST is impossible. In such cases, you can 

989 pass an ndarray to the ambiguous parameter to set the DST explicitly 

990 

991 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', 

992 ... '2018-10-28 02:36:00', 

993 ... '2018-10-28 03:46:00'])) 

994 >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) 

995 0 2018-10-28 01:20:00+02:00 

996 1 2018-10-28 02:36:00+02:00 

997 2 2018-10-28 03:46:00+01:00 

998 dtype: datetime64[ns, CET] 

999 

1000 If the DST transition causes nonexistent times, you can shift these 

1001 dates forward or backwards with a timedelta object or `'shift_forward'` 

1002 or `'shift_backwards'`. 

1003 

1004 >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', 

1005 ... '2015-03-29 03:30:00'])) 

1006 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 

1007 0 2015-03-29 03:00:00+02:00 

1008 1 2015-03-29 03:30:00+02:00 

1009 dtype: datetime64[ns, Europe/Warsaw] 

1010 

1011 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') 

1012 0 2015-03-29 01:59:59.999999999+01:00 

1013 1 2015-03-29 03:30:00+02:00 

1014 dtype: datetime64[ns, Europe/Warsaw] 

1015 

1016 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) 

1017 0 2015-03-29 03:30:00+02:00 

1018 1 2015-03-29 03:30:00+02:00 

1019 dtype: datetime64[ns, Europe/Warsaw] 

1020 """ 

1021 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") 

1022 if nonexistent not in nonexistent_options and not isinstance( 

1023 nonexistent, timedelta 

1024 ): 

1025 raise ValueError( 

1026 "The nonexistent argument must be one of 'raise', " 

1027 "'NaT', 'shift_forward', 'shift_backward' or " 

1028 "a timedelta object" 

1029 ) 

1030 

1031 if self.tz is not None: 

1032 if tz is None: 

1033 new_dates = tz_convert_from_utc(self.asi8, self.tz, reso=self._creso) 

1034 else: 

1035 raise TypeError("Already tz-aware, use tz_convert to convert.") 

1036 else: 

1037 tz = timezones.maybe_get_tz(tz) 

1038 # Convert to UTC 

1039 

1040 new_dates = tzconversion.tz_localize_to_utc( 

1041 self.asi8, 

1042 tz, 

1043 ambiguous=ambiguous, 

1044 nonexistent=nonexistent, 

1045 creso=self._creso, 

1046 ) 

1047 new_dates = new_dates.view(f"M8[{self.unit}]") 

1048 dtype = tz_to_dtype(tz, unit=self.unit) 

1049 

1050 freq = None 

1051 if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])): 

1052 # we can preserve freq 

1053 # TODO: Also for fixed-offsets 

1054 freq = self.freq 

1055 elif tz is None and self.tz is None: 

1056 # no-op 

1057 freq = self.freq 

1058 return self._simple_new(new_dates, dtype=dtype, freq=freq) 

1059 

1060 # ---------------------------------------------------------------- 

1061 # Conversion Methods - Vectorized analogues of Timestamp methods 

1062 

1063 def to_pydatetime(self) -> npt.NDArray[np.object_]: 

1064 """ 

1065 Return an ndarray of datetime.datetime objects. 

1066 

1067 Returns 

1068 ------- 

1069 numpy.ndarray 

1070 """ 

1071 return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._creso) 

1072 

1073 def normalize(self) -> DatetimeArray: 

1074 """ 

1075 Convert times to midnight. 

1076 

1077 The time component of the date-time is converted to midnight i.e. 

1078 00:00:00. This is useful in cases, when the time does not matter. 

1079 Length is unaltered. The timezones are unaffected. 

1080 

1081 This method is available on Series with datetime values under 

1082 the ``.dt`` accessor, and directly on Datetime Array/Index. 

1083 

1084 Returns 

1085 ------- 

1086 DatetimeArray, DatetimeIndex or Series 

1087 The same type as the original data. Series will have the same 

1088 name and index. DatetimeIndex will have the same name. 

1089 

1090 See Also 

1091 -------- 

1092 floor : Floor the datetimes to the specified freq. 

1093 ceil : Ceil the datetimes to the specified freq. 

1094 round : Round the datetimes to the specified freq. 

1095 

1096 Examples 

1097 -------- 

1098 >>> idx = pd.date_range(start='2014-08-01 10:00', freq='H', 

1099 ... periods=3, tz='Asia/Calcutta') 

1100 >>> idx 

1101 DatetimeIndex(['2014-08-01 10:00:00+05:30', 

1102 '2014-08-01 11:00:00+05:30', 

1103 '2014-08-01 12:00:00+05:30'], 

1104 dtype='datetime64[ns, Asia/Calcutta]', freq='H') 

1105 >>> idx.normalize() 

1106 DatetimeIndex(['2014-08-01 00:00:00+05:30', 

1107 '2014-08-01 00:00:00+05:30', 

1108 '2014-08-01 00:00:00+05:30'], 

1109 dtype='datetime64[ns, Asia/Calcutta]', freq=None) 

1110 """ 

1111 new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._creso) 

1112 dt64_values = new_values.view(self._ndarray.dtype) 

1113 

1114 dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype) 

1115 dta = dta._with_freq("infer") 

1116 if self.tz is not None: 

1117 dta = dta.tz_localize(self.tz) 

1118 return dta 

1119 

1120 def to_period(self, freq=None) -> PeriodArray: 

1121 """ 

1122 Cast to PeriodArray/Index at a particular frequency. 

1123 

1124 Converts DatetimeArray/Index to PeriodArray/Index. 

1125 

1126 Parameters 

1127 ---------- 

1128 freq : str or Offset, optional 

1129 One of pandas' :ref:`offset strings <timeseries.offset_aliases>` 

1130 or an Offset object. Will be inferred by default. 

1131 

1132 Returns 

1133 ------- 

1134 PeriodArray/Index 

1135 

1136 Raises 

1137 ------ 

1138 ValueError 

1139 When converting a DatetimeArray/Index with non-regular values, 

1140 so that a frequency cannot be inferred. 

1141 

1142 See Also 

1143 -------- 

1144 PeriodIndex: Immutable ndarray holding ordinal values. 

1145 DatetimeIndex.to_pydatetime: Return DatetimeIndex as object. 

1146 

1147 Examples 

1148 -------- 

1149 >>> df = pd.DataFrame({"y": [1, 2, 3]}, 

1150 ... index=pd.to_datetime(["2000-03-31 00:00:00", 

1151 ... "2000-05-31 00:00:00", 

1152 ... "2000-08-31 00:00:00"])) 

1153 >>> df.index.to_period("M") 

1154 PeriodIndex(['2000-03', '2000-05', '2000-08'], 

1155 dtype='period[M]') 

1156 

1157 Infer the daily frequency 

1158 

1159 >>> idx = pd.date_range("2017-01-01", periods=2) 

1160 >>> idx.to_period() 

1161 PeriodIndex(['2017-01-01', '2017-01-02'], 

1162 dtype='period[D]') 

1163 """ 

1164 from pandas.core.arrays import PeriodArray 

1165 

1166 if self.tz is not None: 

1167 warnings.warn( 

1168 "Converting to PeriodArray/Index representation " 

1169 "will drop timezone information.", 

1170 UserWarning, 

1171 stacklevel=find_stack_level(), 

1172 ) 

1173 

1174 if freq is None: 

1175 freq = self.freqstr or self.inferred_freq 

1176 

1177 if freq is None: 

1178 raise ValueError( 

1179 "You must pass a freq argument as current index has none." 

1180 ) 

1181 

1182 res = get_period_alias(freq) 

1183 

1184 # https://github.com/pandas-dev/pandas/issues/33358 

1185 if res is None: 

1186 res = freq 

1187 

1188 freq = res 

1189 

1190 return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz) 

1191 

1192 # ----------------------------------------------------------------- 

1193 # Properties - Vectorized Timestamp Properties/Methods 

1194 

1195 def month_name(self, locale=None) -> npt.NDArray[np.object_]: 

1196 """ 

1197 Return the month names with specified locale. 

1198 

1199 Parameters 

1200 ---------- 

1201 locale : str, optional 

1202 Locale determining the language in which to return the month name. 

1203 Default is English locale (``'en_US.utf8'``). Use the command 

1204 ``locale -a`` on your terminal on Unix systems to find your locale 

1205 language code. 

1206 

1207 Returns 

1208 ------- 

1209 Series or Index 

1210 Series or Index of month names. 

1211 

1212 Examples 

1213 -------- 

1214 >>> s = pd.Series(pd.date_range(start='2018-01', freq='M', periods=3)) 

1215 >>> s 

1216 0 2018-01-31 

1217 1 2018-02-28 

1218 2 2018-03-31 

1219 dtype: datetime64[ns] 

1220 >>> s.dt.month_name() 

1221 0 January 

1222 1 February 

1223 2 March 

1224 dtype: object 

1225 

1226 >>> idx = pd.date_range(start='2018-01', freq='M', periods=3) 

1227 >>> idx 

1228 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], 

1229 dtype='datetime64[ns]', freq='M') 

1230 >>> idx.month_name() 

1231 Index(['January', 'February', 'March'], dtype='object') 

1232 

1233 Using the ``locale`` parameter you can set a different locale language, 

1234 for example: ``idx.month_name(locale='pt_BR.utf8')`` will return month 

1235 names in Brazilian Portuguese language. 

1236 

1237 >>> idx = pd.date_range(start='2018-01', freq='M', periods=3) 

1238 >>> idx 

1239 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], 

1240 dtype='datetime64[ns]', freq='M') 

1241 >>> idx.month_name(locale='pt_BR.utf8') # doctest: +SKIP 

1242 Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object') 

1243 """ 

1244 values = self._local_timestamps() 

1245 

1246 result = fields.get_date_name_field( 

1247 values, "month_name", locale=locale, reso=self._creso 

1248 ) 

1249 result = self._maybe_mask_results(result, fill_value=None) 

1250 return result 

1251 

1252 def day_name(self, locale=None) -> npt.NDArray[np.object_]: 

1253 """ 

1254 Return the day names with specified locale. 

1255 

1256 Parameters 

1257 ---------- 

1258 locale : str, optional 

1259 Locale determining the language in which to return the day name. 

1260 Default is English locale (``'en_US.utf8'``). Use the command 

1261 ``locale -a`` on your terminal on Unix systems to find your locale 

1262 language code. 

1263 

1264 Returns 

1265 ------- 

1266 Series or Index 

1267 Series or Index of day names. 

1268 

1269 Examples 

1270 -------- 

1271 >>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3)) 

1272 >>> s 

1273 0 2018-01-01 

1274 1 2018-01-02 

1275 2 2018-01-03 

1276 dtype: datetime64[ns] 

1277 >>> s.dt.day_name() 

1278 0 Monday 

1279 1 Tuesday 

1280 2 Wednesday 

1281 dtype: object 

1282 

1283 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3) 

1284 >>> idx 

1285 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], 

1286 dtype='datetime64[ns]', freq='D') 

1287 >>> idx.day_name() 

1288 Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object') 

1289 

1290 Using the ``locale`` parameter you can set a different locale language, 

1291 for example: ``idx.day_name(locale='pt_BR.utf8')`` will return day 

1292 names in Brazilian Portuguese language. 

1293 

1294 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3) 

1295 >>> idx 

1296 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], 

1297 dtype='datetime64[ns]', freq='D') 

1298 >>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP 

1299 Index(['Segunda', 'Terça', 'Quarta'], dtype='object') 

1300 """ 

1301 values = self._local_timestamps() 

1302 

1303 result = fields.get_date_name_field( 

1304 values, "day_name", locale=locale, reso=self._creso 

1305 ) 

1306 result = self._maybe_mask_results(result, fill_value=None) 

1307 return result 

1308 

1309 @property 

1310 def time(self) -> npt.NDArray[np.object_]: 

1311 """ 

1312 Returns numpy array of :class:`datetime.time` objects. 

1313 

1314 The time part of the Timestamps. 

1315 """ 

1316 # If the Timestamps have a timezone that is not UTC, 

1317 # convert them into their i8 representation while 

1318 # keeping their timezone and not using UTC 

1319 timestamps = self._local_timestamps() 

1320 

1321 return ints_to_pydatetime(timestamps, box="time", reso=self._creso) 

1322 

1323 @property 

1324 def timetz(self) -> npt.NDArray[np.object_]: 

1325 """ 

1326 Returns numpy array of :class:`datetime.time` objects with timezones. 

1327 

1328 The time part of the Timestamps. 

1329 """ 

1330 return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._creso) 

1331 

1332 @property 

1333 def date(self) -> npt.NDArray[np.object_]: 

1334 """ 

1335 Returns numpy array of python :class:`datetime.date` objects. 

1336 

1337 Namely, the date part of Timestamps without time and 

1338 timezone information. 

1339 """ 

1340 # If the Timestamps have a timezone that is not UTC, 

1341 # convert them into their i8 representation while 

1342 # keeping their timezone and not using UTC 

1343 timestamps = self._local_timestamps() 

1344 

1345 return ints_to_pydatetime(timestamps, box="date", reso=self._creso) 

1346 

1347 def isocalendar(self) -> DataFrame: 

1348 """ 

1349 Calculate year, week, and day according to the ISO 8601 standard. 

1350 

1351 .. versionadded:: 1.1.0 

1352 

1353 Returns 

1354 ------- 

1355 DataFrame 

1356 With columns year, week and day. 

1357 

1358 See Also 

1359 -------- 

1360 Timestamp.isocalendar : Function return a 3-tuple containing ISO year, 

1361 week number, and weekday for the given Timestamp object. 

1362 datetime.date.isocalendar : Return a named tuple object with 

1363 three components: year, week and weekday. 

1364 

1365 Examples 

1366 -------- 

1367 >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4) 

1368 >>> idx.isocalendar() 

1369 year week day 

1370 2019-12-29 2019 52 7 

1371 2019-12-30 2020 1 1 

1372 2019-12-31 2020 1 2 

1373 2020-01-01 2020 1 3 

1374 >>> idx.isocalendar().week 

1375 2019-12-29 52 

1376 2019-12-30 1 

1377 2019-12-31 1 

1378 2020-01-01 1 

1379 Freq: D, Name: week, dtype: UInt32 

1380 """ 

1381 from pandas import DataFrame 

1382 

1383 values = self._local_timestamps() 

1384 sarray = fields.build_isocalendar_sarray(values, reso=self._creso) 

1385 iso_calendar_df = DataFrame( 

1386 sarray, columns=["year", "week", "day"], dtype="UInt32" 

1387 ) 

1388 if self._hasna: 

1389 iso_calendar_df.iloc[self._isnan] = None 

1390 return iso_calendar_df 

1391 

1392 year = _field_accessor( 

1393 "year", 

1394 "Y", 

1395 """ 

1396 The year of the datetime. 

1397 

1398 Examples 

1399 -------- 

1400 >>> datetime_series = pd.Series( 

1401 ... pd.date_range("2000-01-01", periods=3, freq="Y") 

1402 ... ) 

1403 >>> datetime_series 

1404 0 2000-12-31 

1405 1 2001-12-31 

1406 2 2002-12-31 

1407 dtype: datetime64[ns] 

1408 >>> datetime_series.dt.year 

1409 0 2000 

1410 1 2001 

1411 2 2002 

1412 dtype: int32 

1413 """, 

1414 ) 

1415 month = _field_accessor( 

1416 "month", 

1417 "M", 

1418 """ 

1419 The month as January=1, December=12. 

1420 

1421 Examples 

1422 -------- 

1423 >>> datetime_series = pd.Series( 

1424 ... pd.date_range("2000-01-01", periods=3, freq="M") 

1425 ... ) 

1426 >>> datetime_series 

1427 0 2000-01-31 

1428 1 2000-02-29 

1429 2 2000-03-31 

1430 dtype: datetime64[ns] 

1431 >>> datetime_series.dt.month 

1432 0 1 

1433 1 2 

1434 2 3 

1435 dtype: int32 

1436 """, 

1437 ) 

1438 day = _field_accessor( 

1439 "day", 

1440 "D", 

1441 """ 

1442 The day of the datetime. 

1443 

1444 Examples 

1445 -------- 

1446 >>> datetime_series = pd.Series( 

1447 ... pd.date_range("2000-01-01", periods=3, freq="D") 

1448 ... ) 

1449 >>> datetime_series 

1450 0 2000-01-01 

1451 1 2000-01-02 

1452 2 2000-01-03 

1453 dtype: datetime64[ns] 

1454 >>> datetime_series.dt.day 

1455 0 1 

1456 1 2 

1457 2 3 

1458 dtype: int32 

1459 """, 

1460 ) 

1461 hour = _field_accessor( 

1462 "hour", 

1463 "h", 

1464 """ 

1465 The hours of the datetime. 

1466 

1467 Examples 

1468 -------- 

1469 >>> datetime_series = pd.Series( 

1470 ... pd.date_range("2000-01-01", periods=3, freq="h") 

1471 ... ) 

1472 >>> datetime_series 

1473 0 2000-01-01 00:00:00 

1474 1 2000-01-01 01:00:00 

1475 2 2000-01-01 02:00:00 

1476 dtype: datetime64[ns] 

1477 >>> datetime_series.dt.hour 

1478 0 0 

1479 1 1 

1480 2 2 

1481 dtype: int32 

1482 """, 

1483 ) 

1484 minute = _field_accessor( 

1485 "minute", 

1486 "m", 

1487 """ 

1488 The minutes of the datetime. 

1489 

1490 Examples 

1491 -------- 

1492 >>> datetime_series = pd.Series( 

1493 ... pd.date_range("2000-01-01", periods=3, freq="T") 

1494 ... ) 

1495 >>> datetime_series 

1496 0 2000-01-01 00:00:00 

1497 1 2000-01-01 00:01:00 

1498 2 2000-01-01 00:02:00 

1499 dtype: datetime64[ns] 

1500 >>> datetime_series.dt.minute 

1501 0 0 

1502 1 1 

1503 2 2 

1504 dtype: int32 

1505 """, 

1506 ) 

1507 second = _field_accessor( 

1508 "second", 

1509 "s", 

1510 """ 

1511 The seconds of the datetime. 

1512 

1513 Examples 

1514 -------- 

1515 >>> datetime_series = pd.Series( 

1516 ... pd.date_range("2000-01-01", periods=3, freq="s") 

1517 ... ) 

1518 >>> datetime_series 

1519 0 2000-01-01 00:00:00 

1520 1 2000-01-01 00:00:01 

1521 2 2000-01-01 00:00:02 

1522 dtype: datetime64[ns] 

1523 >>> datetime_series.dt.second 

1524 0 0 

1525 1 1 

1526 2 2 

1527 dtype: int32 

1528 """, 

1529 ) 

1530 microsecond = _field_accessor( 

1531 "microsecond", 

1532 "us", 

1533 """ 

1534 The microseconds of the datetime. 

1535 

1536 Examples 

1537 -------- 

1538 >>> datetime_series = pd.Series( 

1539 ... pd.date_range("2000-01-01", periods=3, freq="us") 

1540 ... ) 

1541 >>> datetime_series 

1542 0 2000-01-01 00:00:00.000000 

1543 1 2000-01-01 00:00:00.000001 

1544 2 2000-01-01 00:00:00.000002 

1545 dtype: datetime64[ns] 

1546 >>> datetime_series.dt.microsecond 

1547 0 0 

1548 1 1 

1549 2 2 

1550 dtype: int32 

1551 """, 

1552 ) 

1553 nanosecond = _field_accessor( 

1554 "nanosecond", 

1555 "ns", 

1556 """ 

1557 The nanoseconds of the datetime. 

1558 

1559 Examples 

1560 -------- 

1561 >>> datetime_series = pd.Series( 

1562 ... pd.date_range("2000-01-01", periods=3, freq="ns") 

1563 ... ) 

1564 >>> datetime_series 

1565 0 2000-01-01 00:00:00.000000000 

1566 1 2000-01-01 00:00:00.000000001 

1567 2 2000-01-01 00:00:00.000000002 

1568 dtype: datetime64[ns] 

1569 >>> datetime_series.dt.nanosecond 

1570 0 0 

1571 1 1 

1572 2 2 

1573 dtype: int32 

1574 """, 

1575 ) 

1576 _dayofweek_doc = """ 

1577 The day of the week with Monday=0, Sunday=6. 

1578 

1579 Return the day of the week. It is assumed the week starts on 

1580 Monday, which is denoted by 0 and ends on Sunday which is denoted 

1581 by 6. This method is available on both Series with datetime 

1582 values (using the `dt` accessor) or DatetimeIndex. 

1583 

1584 Returns 

1585 ------- 

1586 Series or Index 

1587 Containing integers indicating the day number. 

1588 

1589 See Also 

1590 -------- 

1591 Series.dt.dayofweek : Alias. 

1592 Series.dt.weekday : Alias. 

1593 Series.dt.day_name : Returns the name of the day of the week. 

1594 

1595 Examples 

1596 -------- 

1597 >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series() 

1598 >>> s.dt.dayofweek 

1599 2016-12-31 5 

1600 2017-01-01 6 

1601 2017-01-02 0 

1602 2017-01-03 1 

1603 2017-01-04 2 

1604 2017-01-05 3 

1605 2017-01-06 4 

1606 2017-01-07 5 

1607 2017-01-08 6 

1608 Freq: D, dtype: int32 

1609 """ 

1610 day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc) 

1611 dayofweek = day_of_week 

1612 weekday = day_of_week 

1613 

1614 day_of_year = _field_accessor( 

1615 "dayofyear", 

1616 "doy", 

1617 """ 

1618 The ordinal day of the year. 

1619 """, 

1620 ) 

1621 dayofyear = day_of_year 

1622 quarter = _field_accessor( 

1623 "quarter", 

1624 "q", 

1625 """ 

1626 The quarter of the date. 

1627 """, 

1628 ) 

1629 days_in_month = _field_accessor( 

1630 "days_in_month", 

1631 "dim", 

1632 """ 

1633 The number of days in the month. 

1634 """, 

1635 ) 

1636 daysinmonth = days_in_month 

1637 _is_month_doc = """ 

1638 Indicates whether the date is the {first_or_last} day of the month. 

1639 

1640 Returns 

1641 ------- 

1642 Series or array 

1643 For Series, returns a Series with boolean values. 

1644 For DatetimeIndex, returns a boolean array. 

1645 

1646 See Also 

1647 -------- 

1648 is_month_start : Return a boolean indicating whether the date 

1649 is the first day of the month. 

1650 is_month_end : Return a boolean indicating whether the date 

1651 is the last day of the month. 

1652 

1653 Examples 

1654 -------- 

1655 This method is available on Series with datetime values under 

1656 the ``.dt`` accessor, and directly on DatetimeIndex. 

1657 

1658 >>> s = pd.Series(pd.date_range("2018-02-27", periods=3)) 

1659 >>> s 

1660 0 2018-02-27 

1661 1 2018-02-28 

1662 2 2018-03-01 

1663 dtype: datetime64[ns] 

1664 >>> s.dt.is_month_start 

1665 0 False 

1666 1 False 

1667 2 True 

1668 dtype: bool 

1669 >>> s.dt.is_month_end 

1670 0 False 

1671 1 True 

1672 2 False 

1673 dtype: bool 

1674 

1675 >>> idx = pd.date_range("2018-02-27", periods=3) 

1676 >>> idx.is_month_start 

1677 array([False, False, True]) 

1678 >>> idx.is_month_end 

1679 array([False, True, False]) 

1680 """ 

1681 is_month_start = _field_accessor( 

1682 "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first") 

1683 ) 

1684 

1685 is_month_end = _field_accessor( 

1686 "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last") 

1687 ) 

1688 

1689 is_quarter_start = _field_accessor( 

1690 "is_quarter_start", 

1691 "is_quarter_start", 

1692 """ 

1693 Indicator for whether the date is the first day of a quarter. 

1694 

1695 Returns 

1696 ------- 

1697 is_quarter_start : Series or DatetimeIndex 

1698 The same type as the original data with boolean values. Series will 

1699 have the same name and index. DatetimeIndex will have the same 

1700 name. 

1701 

1702 See Also 

1703 -------- 

1704 quarter : Return the quarter of the date. 

1705 is_quarter_end : Similar property for indicating the quarter end. 

1706 

1707 Examples 

1708 -------- 

1709 This method is available on Series with datetime values under 

1710 the ``.dt`` accessor, and directly on DatetimeIndex. 

1711 

1712 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30", 

1713 ... periods=4)}) 

1714 >>> df.assign(quarter=df.dates.dt.quarter, 

1715 ... is_quarter_start=df.dates.dt.is_quarter_start) 

1716 dates quarter is_quarter_start 

1717 0 2017-03-30 1 False 

1718 1 2017-03-31 1 False 

1719 2 2017-04-01 2 True 

1720 3 2017-04-02 2 False 

1721 

1722 >>> idx = pd.date_range('2017-03-30', periods=4) 

1723 >>> idx 

1724 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'], 

1725 dtype='datetime64[ns]', freq='D') 

1726 

1727 >>> idx.is_quarter_start 

1728 array([False, False, True, False]) 

1729 """, 

1730 ) 

1731 is_quarter_end = _field_accessor( 

1732 "is_quarter_end", 

1733 "is_quarter_end", 

1734 """ 

1735 Indicator for whether the date is the last day of a quarter. 

1736 

1737 Returns 

1738 ------- 

1739 is_quarter_end : Series or DatetimeIndex 

1740 The same type as the original data with boolean values. Series will 

1741 have the same name and index. DatetimeIndex will have the same 

1742 name. 

1743 

1744 See Also 

1745 -------- 

1746 quarter : Return the quarter of the date. 

1747 is_quarter_start : Similar property indicating the quarter start. 

1748 

1749 Examples 

1750 -------- 

1751 This method is available on Series with datetime values under 

1752 the ``.dt`` accessor, and directly on DatetimeIndex. 

1753 

1754 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30", 

1755 ... periods=4)}) 

1756 >>> df.assign(quarter=df.dates.dt.quarter, 

1757 ... is_quarter_end=df.dates.dt.is_quarter_end) 

1758 dates quarter is_quarter_end 

1759 0 2017-03-30 1 False 

1760 1 2017-03-31 1 True 

1761 2 2017-04-01 2 False 

1762 3 2017-04-02 2 False 

1763 

1764 >>> idx = pd.date_range('2017-03-30', periods=4) 

1765 >>> idx 

1766 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'], 

1767 dtype='datetime64[ns]', freq='D') 

1768 

1769 >>> idx.is_quarter_end 

1770 array([False, True, False, False]) 

1771 """, 

1772 ) 

1773 is_year_start = _field_accessor( 

1774 "is_year_start", 

1775 "is_year_start", 

1776 """ 

1777 Indicate whether the date is the first day of a year. 

1778 

1779 Returns 

1780 ------- 

1781 Series or DatetimeIndex 

1782 The same type as the original data with boolean values. Series will 

1783 have the same name and index. DatetimeIndex will have the same 

1784 name. 

1785 

1786 See Also 

1787 -------- 

1788 is_year_end : Similar property indicating the last day of the year. 

1789 

1790 Examples 

1791 -------- 

1792 This method is available on Series with datetime values under 

1793 the ``.dt`` accessor, and directly on DatetimeIndex. 

1794 

1795 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3)) 

1796 >>> dates 

1797 0 2017-12-30 

1798 1 2017-12-31 

1799 2 2018-01-01 

1800 dtype: datetime64[ns] 

1801 

1802 >>> dates.dt.is_year_start 

1803 0 False 

1804 1 False 

1805 2 True 

1806 dtype: bool 

1807 

1808 >>> idx = pd.date_range("2017-12-30", periods=3) 

1809 >>> idx 

1810 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'], 

1811 dtype='datetime64[ns]', freq='D') 

1812 

1813 >>> idx.is_year_start 

1814 array([False, False, True]) 

1815 """, 

1816 ) 

1817 is_year_end = _field_accessor( 

1818 "is_year_end", 

1819 "is_year_end", 

1820 """ 

1821 Indicate whether the date is the last day of the year. 

1822 

1823 Returns 

1824 ------- 

1825 Series or DatetimeIndex 

1826 The same type as the original data with boolean values. Series will 

1827 have the same name and index. DatetimeIndex will have the same 

1828 name. 

1829 

1830 See Also 

1831 -------- 

1832 is_year_start : Similar property indicating the start of the year. 

1833 

1834 Examples 

1835 -------- 

1836 This method is available on Series with datetime values under 

1837 the ``.dt`` accessor, and directly on DatetimeIndex. 

1838 

1839 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3)) 

1840 >>> dates 

1841 0 2017-12-30 

1842 1 2017-12-31 

1843 2 2018-01-01 

1844 dtype: datetime64[ns] 

1845 

1846 >>> dates.dt.is_year_end 

1847 0 False 

1848 1 True 

1849 2 False 

1850 dtype: bool 

1851 

1852 >>> idx = pd.date_range("2017-12-30", periods=3) 

1853 >>> idx 

1854 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'], 

1855 dtype='datetime64[ns]', freq='D') 

1856 

1857 >>> idx.is_year_end 

1858 array([False, True, False]) 

1859 """, 

1860 ) 

1861 is_leap_year = _field_accessor( 

1862 "is_leap_year", 

1863 "is_leap_year", 

1864 """ 

1865 Boolean indicator if the date belongs to a leap year. 

1866 

1867 A leap year is a year, which has 366 days (instead of 365) including 

1868 29th of February as an intercalary day. 

1869 Leap years are years which are multiples of four with the exception 

1870 of years divisible by 100 but not by 400. 

1871 

1872 Returns 

1873 ------- 

1874 Series or ndarray 

1875 Booleans indicating if dates belong to a leap year. 

1876 

1877 Examples 

1878 -------- 

1879 This method is available on Series with datetime values under 

1880 the ``.dt`` accessor, and directly on DatetimeIndex. 

1881 

1882 >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="Y") 

1883 >>> idx 

1884 DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'], 

1885 dtype='datetime64[ns]', freq='A-DEC') 

1886 >>> idx.is_leap_year 

1887 array([ True, False, False]) 

1888 

1889 >>> dates_series = pd.Series(idx) 

1890 >>> dates_series 

1891 0 2012-12-31 

1892 1 2013-12-31 

1893 2 2014-12-31 

1894 dtype: datetime64[ns] 

1895 >>> dates_series.dt.is_leap_year 

1896 0 True 

1897 1 False 

1898 2 False 

1899 dtype: bool 

1900 """, 

1901 ) 

1902 

1903 def to_julian_date(self) -> npt.NDArray[np.float64]: 

1904 """ 

1905 Convert Datetime Array to float64 ndarray of Julian Dates. 

1906 0 Julian date is noon January 1, 4713 BC. 

1907 https://en.wikipedia.org/wiki/Julian_day 

1908 """ 

1909 

1910 # http://mysite.verizon.net/aesir_research/date/jdalg2.htm 

1911 year = np.asarray(self.year) 

1912 month = np.asarray(self.month) 

1913 day = np.asarray(self.day) 

1914 testarr = month < 3 

1915 year[testarr] -= 1 

1916 month[testarr] += 12 

1917 return ( 

1918 day 

1919 + np.fix((153 * month - 457) / 5) 

1920 + 365 * year 

1921 + np.floor(year / 4) 

1922 - np.floor(year / 100) 

1923 + np.floor(year / 400) 

1924 + 1_721_118.5 

1925 + ( 

1926 self.hour 

1927 + self.minute / 60 

1928 + self.second / 3600 

1929 + self.microsecond / 3600 / 10**6 

1930 + self.nanosecond / 3600 / 10**9 

1931 ) 

1932 / 24 

1933 ) 

1934 

1935 # ----------------------------------------------------------------- 

1936 # Reductions 

1937 

1938 def std( 

1939 self, 

1940 axis=None, 

1941 dtype=None, 

1942 out=None, 

1943 ddof: int = 1, 

1944 keepdims: bool = False, 

1945 skipna: bool = True, 

1946 ): 

1947 """ 

1948 Return sample standard deviation over requested axis. 

1949 

1950 Normalized by N-1 by default. This can be changed using the ddof argument 

1951 

1952 Parameters 

1953 ---------- 

1954 axis : int optional, default None 

1955 Axis for the function to be applied on. 

1956 For `Series` this parameter is unused and defaults to `None`. 

1957 ddof : int, default 1 

1958 Degrees of Freedom. The divisor used in calculations is N - ddof, 

1959 where N represents the number of elements. 

1960 skipna : bool, default True 

1961 Exclude NA/null values. If an entire row/column is NA, the result will be 

1962 NA. 

1963 

1964 Returns 

1965 ------- 

1966 Timedelta 

1967 """ 

1968 # Because std is translation-invariant, we can get self.std 

1969 # by calculating (self - Timestamp(0)).std, and we can do it 

1970 # without creating a copy by using a view on self._ndarray 

1971 from pandas.core.arrays import TimedeltaArray 

1972 

1973 # Find the td64 dtype with the same resolution as our dt64 dtype 

1974 dtype_str = self._ndarray.dtype.name.replace("datetime64", "timedelta64") 

1975 dtype = np.dtype(dtype_str) 

1976 

1977 tda = TimedeltaArray._simple_new(self._ndarray.view(dtype), dtype=dtype) 

1978 

1979 return tda.std(axis=axis, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna) 

1980 

1981 

1982# ------------------------------------------------------------------- 

1983# Constructor Helpers 

1984 

1985 

1986def _sequence_to_dt64ns( 

1987 data, 

1988 *, 

1989 copy: bool = False, 

1990 tz: tzinfo | None = None, 

1991 dayfirst: bool = False, 

1992 yearfirst: bool = False, 

1993 ambiguous: TimeAmbiguous = "raise", 

1994 out_unit: str | None = None, 

1995): 

1996 """ 

1997 Parameters 

1998 ---------- 

1999 data : list-like 

2000 copy : bool, default False 

2001 tz : tzinfo or None, default None 

2002 dayfirst : bool, default False 

2003 yearfirst : bool, default False 

2004 ambiguous : str, bool, or arraylike, default 'raise' 

2005 See pandas._libs.tslibs.tzconversion.tz_localize_to_utc. 

2006 out_unit : str or None, default None 

2007 Desired output resolution. 

2008 

2009 Returns 

2010 ------- 

2011 result : numpy.ndarray 

2012 The sequence converted to a numpy array with dtype ``datetime64[ns]``. 

2013 tz : tzinfo or None 

2014 Either the user-provided tzinfo or one inferred from the data. 

2015 inferred_freq : Tick or None 

2016 The inferred frequency of the sequence. 

2017 

2018 Raises 

2019 ------ 

2020 TypeError : PeriodDType data is passed 

2021 """ 

2022 inferred_freq = None 

2023 

2024 data, copy = dtl.ensure_arraylike_for_datetimelike( 

2025 data, copy, cls_name="DatetimeArray" 

2026 ) 

2027 

2028 if isinstance(data, DatetimeArray): 

2029 inferred_freq = data.freq 

2030 

2031 # By this point we are assured to have either a numpy array or Index 

2032 data, copy = maybe_convert_dtype(data, copy, tz=tz) 

2033 data_dtype = getattr(data, "dtype", None) 

2034 

2035 out_dtype = DT64NS_DTYPE 

2036 if out_unit is not None: 

2037 out_dtype = np.dtype(f"M8[{out_unit}]") 

2038 

2039 if ( 

2040 is_object_dtype(data_dtype) 

2041 or is_string_dtype(data_dtype) 

2042 or is_sparse(data_dtype) 

2043 ): 

2044 # TODO: We do not have tests specific to string-dtypes, 

2045 # also complex or categorical or other extension 

2046 copy = False 

2047 if lib.infer_dtype(data, skipna=False) == "integer": 

2048 data = data.astype(np.int64) 

2049 elif tz is not None and ambiguous == "raise": 

2050 # TODO: yearfirst/dayfirst/etc? 

2051 obj_data = np.asarray(data, dtype=object) 

2052 i8data = tslib.array_to_datetime_with_tz(obj_data, tz) 

2053 return i8data.view(DT64NS_DTYPE), tz, None 

2054 else: 

2055 # data comes back here as either i8 to denote UTC timestamps 

2056 # or M8[ns] to denote wall times 

2057 data, inferred_tz = objects_to_datetime64ns( 

2058 data, 

2059 dayfirst=dayfirst, 

2060 yearfirst=yearfirst, 

2061 allow_object=False, 

2062 ) 

2063 if tz and inferred_tz: 

2064 # two timezones: convert to intended from base UTC repr 

2065 assert data.dtype == "i8" 

2066 # GH#42505 

2067 # by convention, these are _already_ UTC, e.g 

2068 return data.view(DT64NS_DTYPE), tz, None 

2069 

2070 elif inferred_tz: 

2071 tz = inferred_tz 

2072 

2073 data_dtype = data.dtype 

2074 

2075 # `data` may have originally been a Categorical[datetime64[ns, tz]], 

2076 # so we need to handle these types. 

2077 if is_datetime64tz_dtype(data_dtype): 

2078 # DatetimeArray -> ndarray 

2079 tz = _maybe_infer_tz(tz, data.tz) 

2080 result = data._ndarray 

2081 

2082 elif is_datetime64_dtype(data_dtype): 

2083 # tz-naive DatetimeArray or ndarray[datetime64] 

2084 data = getattr(data, "_ndarray", data) 

2085 new_dtype = data.dtype 

2086 data_unit = get_unit_from_dtype(new_dtype) 

2087 if not is_supported_unit(data_unit): 

2088 # Cast to the nearest supported unit, generally "s" 

2089 new_reso = get_supported_reso(data_unit) 

2090 new_unit = npy_unit_to_abbrev(new_reso) 

2091 new_dtype = np.dtype(f"M8[{new_unit}]") 

2092 data = astype_overflowsafe(data, dtype=new_dtype, copy=False) 

2093 data_unit = get_unit_from_dtype(new_dtype) 

2094 copy = False 

2095 

2096 if data.dtype.byteorder == ">": 

2097 # TODO: better way to handle this? non-copying alternative? 

2098 # without this, test_constructor_datetime64_bigendian fails 

2099 data = data.astype(data.dtype.newbyteorder("<")) 

2100 new_dtype = data.dtype 

2101 copy = False 

2102 

2103 if tz is not None: 

2104 # Convert tz-naive to UTC 

2105 # TODO: if tz is UTC, are there situations where we *don't* want a 

2106 # copy? tz_localize_to_utc always makes one. 

2107 shape = data.shape 

2108 if data.ndim > 1: 

2109 data = data.ravel() 

2110 

2111 data = tzconversion.tz_localize_to_utc( 

2112 data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit 

2113 ) 

2114 data = data.view(new_dtype) 

2115 data = data.reshape(shape) 

2116 

2117 assert data.dtype == new_dtype, data.dtype 

2118 result = data 

2119 

2120 else: 

2121 # must be integer dtype otherwise 

2122 # assume this data are epoch timestamps 

2123 if data.dtype != INT64_DTYPE: 

2124 data = data.astype(np.int64, copy=False) 

2125 result = data.view(out_dtype) 

2126 

2127 if copy: 

2128 result = result.copy() 

2129 

2130 assert isinstance(result, np.ndarray), type(result) 

2131 assert result.dtype.kind == "M" 

2132 assert result.dtype != "M8" 

2133 assert is_supported_unit(get_unit_from_dtype(result.dtype)) 

2134 return result, tz, inferred_freq 

2135 

2136 

2137def objects_to_datetime64ns( 

2138 data: np.ndarray, 

2139 dayfirst, 

2140 yearfirst, 

2141 utc: bool = False, 

2142 errors: DateTimeErrorChoices = "raise", 

2143 allow_object: bool = False, 

2144): 

2145 """ 

2146 Convert data to array of timestamps. 

2147 

2148 Parameters 

2149 ---------- 

2150 data : np.ndarray[object] 

2151 dayfirst : bool 

2152 yearfirst : bool 

2153 utc : bool, default False 

2154 Whether to convert/localize timestamps to UTC. 

2155 errors : {'raise', 'ignore', 'coerce'} 

2156 allow_object : bool 

2157 Whether to return an object-dtype ndarray instead of raising if the 

2158 data contains more than one timezone. 

2159 

2160 Returns 

2161 ------- 

2162 result : ndarray 

2163 np.int64 dtype if returned values represent UTC timestamps 

2164 np.datetime64[ns] if returned values represent wall times 

2165 object if mixed timezones 

2166 inferred_tz : tzinfo or None 

2167 

2168 Raises 

2169 ------ 

2170 ValueError : if data cannot be converted to datetimes 

2171 """ 

2172 assert errors in ["raise", "ignore", "coerce"] 

2173 

2174 # if str-dtype, convert 

2175 data = np.array(data, copy=False, dtype=np.object_) 

2176 

2177 result, tz_parsed = tslib.array_to_datetime( 

2178 data, 

2179 errors=errors, 

2180 utc=utc, 

2181 dayfirst=dayfirst, 

2182 yearfirst=yearfirst, 

2183 ) 

2184 

2185 if tz_parsed is not None: 

2186 # We can take a shortcut since the datetime64 numpy array 

2187 # is in UTC 

2188 # Return i8 values to denote unix timestamps 

2189 return result.view("i8"), tz_parsed 

2190 elif is_datetime64_dtype(result): 

2191 # returning M8[ns] denotes wall-times; since tz is None 

2192 # the distinction is a thin one 

2193 return result, tz_parsed 

2194 elif is_object_dtype(result): 

2195 # GH#23675 when called via `pd.to_datetime`, returning an object-dtype 

2196 # array is allowed. When called via `pd.DatetimeIndex`, we can 

2197 # only accept datetime64 dtype, so raise TypeError if object-dtype 

2198 # is returned, as that indicates the values can be recognized as 

2199 # datetimes but they have conflicting timezones/awareness 

2200 if allow_object: 

2201 return result, tz_parsed 

2202 raise TypeError(result) 

2203 else: # pragma: no cover 

2204 # GH#23675 this TypeError should never be hit, whereas the TypeError 

2205 # in the object-dtype branch above is reachable. 

2206 raise TypeError(result) 

2207 

2208 

2209def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None): 

2210 """ 

2211 Convert data based on dtype conventions, issuing 

2212 errors where appropriate. 

2213 

2214 Parameters 

2215 ---------- 

2216 data : np.ndarray or pd.Index 

2217 copy : bool 

2218 tz : tzinfo or None, default None 

2219 

2220 Returns 

2221 ------- 

2222 data : np.ndarray or pd.Index 

2223 copy : bool 

2224 

2225 Raises 

2226 ------ 

2227 TypeError : PeriodDType data is passed 

2228 """ 

2229 if not hasattr(data, "dtype"): 

2230 # e.g. collections.deque 

2231 return data, copy 

2232 

2233 if is_float_dtype(data.dtype): 

2234 # pre-2.0 we treated these as wall-times, inconsistent with ints 

2235 # GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes. 

2236 # Note: data.astype(np.int64) fails ARM tests, see 

2237 # https://github.com/pandas-dev/pandas/issues/49468. 

2238 data = data.astype(DT64NS_DTYPE).view("i8") 

2239 copy = False 

2240 

2241 elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): 

2242 # GH#29794 enforcing deprecation introduced in GH#23539 

2243 raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]") 

2244 elif is_period_dtype(data.dtype): 

2245 # Note: without explicitly raising here, PeriodIndex 

2246 # test_setops.test_join_does_not_recur fails 

2247 raise TypeError( 

2248 "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead" 

2249 ) 

2250 

2251 elif is_extension_array_dtype(data.dtype) and not is_datetime64tz_dtype(data.dtype): 

2252 # TODO: We have no tests for these 

2253 data = np.array(data, dtype=np.object_) 

2254 copy = False 

2255 

2256 return data, copy 

2257 

2258 

2259# ------------------------------------------------------------------- 

2260# Validation and Inference 

2261 

2262 

2263def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None: 

2264 """ 

2265 If a timezone is inferred from data, check that it is compatible with 

2266 the user-provided timezone, if any. 

2267 

2268 Parameters 

2269 ---------- 

2270 tz : tzinfo or None 

2271 inferred_tz : tzinfo or None 

2272 

2273 Returns 

2274 ------- 

2275 tz : tzinfo or None 

2276 

2277 Raises 

2278 ------ 

2279 TypeError : if both timezones are present but do not match 

2280 """ 

2281 if tz is None: 

2282 tz = inferred_tz 

2283 elif inferred_tz is None: 

2284 pass 

2285 elif not timezones.tz_compare(tz, inferred_tz): 

2286 raise TypeError( 

2287 f"data is already tz-aware {inferred_tz}, unable to " 

2288 f"set specified tz: {tz}" 

2289 ) 

2290 return tz 

2291 

2292 

2293def _validate_dt64_dtype(dtype): 

2294 """ 

2295 Check that a dtype, if passed, represents either a numpy datetime64[ns] 

2296 dtype or a pandas DatetimeTZDtype. 

2297 

2298 Parameters 

2299 ---------- 

2300 dtype : object 

2301 

2302 Returns 

2303 ------- 

2304 dtype : None, numpy.dtype, or DatetimeTZDtype 

2305 

2306 Raises 

2307 ------ 

2308 ValueError : invalid dtype 

2309 

2310 Notes 

2311 ----- 

2312 Unlike _validate_tz_from_dtype, this does _not_ allow non-existent 

2313 tz errors to go through 

2314 """ 

2315 if dtype is not None: 

2316 dtype = pandas_dtype(dtype) 

2317 if is_dtype_equal(dtype, np.dtype("M8")): 

2318 # no precision, disallowed GH#24806 

2319 msg = ( 

2320 "Passing in 'datetime64' dtype with no precision is not allowed. " 

2321 "Please pass in 'datetime64[ns]' instead." 

2322 ) 

2323 raise ValueError(msg) 

2324 

2325 if ( 

2326 isinstance(dtype, np.dtype) 

2327 and (dtype.kind != "M" or not is_supported_unit(get_unit_from_dtype(dtype))) 

2328 ) or not isinstance(dtype, (np.dtype, DatetimeTZDtype)): 

2329 raise ValueError( 

2330 f"Unexpected value for 'dtype': '{dtype}'. " 

2331 "Must be 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', " 

2332 "'datetime64[ns]' or DatetimeTZDtype'." 

2333 ) 

2334 

2335 if getattr(dtype, "tz", None): 

2336 # https://github.com/pandas-dev/pandas/issues/18595 

2337 # Ensure that we have a standard timezone for pytz objects. 

2338 # Without this, things like adding an array of timedeltas and 

2339 # a tz-aware Timestamp (with a tz specific to its datetime) will 

2340 # be incorrect(ish?) for the array as a whole 

2341 dtype = cast(DatetimeTZDtype, dtype) 

2342 dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) 

2343 

2344 return dtype 

2345 

2346 

2347def _validate_tz_from_dtype( 

2348 dtype, tz: tzinfo | None, explicit_tz_none: bool = False 

2349) -> tzinfo | None: 

2350 """ 

2351 If the given dtype is a DatetimeTZDtype, extract the implied 

2352 tzinfo object from it and check that it does not conflict with the given 

2353 tz. 

2354 

2355 Parameters 

2356 ---------- 

2357 dtype : dtype, str 

2358 tz : None, tzinfo 

2359 explicit_tz_none : bool, default False 

2360 Whether tz=None was passed explicitly, as opposed to lib.no_default. 

2361 

2362 Returns 

2363 ------- 

2364 tz : consensus tzinfo 

2365 

2366 Raises 

2367 ------ 

2368 ValueError : on tzinfo mismatch 

2369 """ 

2370 if dtype is not None: 

2371 if isinstance(dtype, str): 

2372 try: 

2373 dtype = DatetimeTZDtype.construct_from_string(dtype) 

2374 except TypeError: 

2375 # Things like `datetime64[ns]`, which is OK for the 

2376 # constructors, but also nonsense, which should be validated 

2377 # but not by us. We *do* allow non-existent tz errors to 

2378 # go through 

2379 pass 

2380 dtz = getattr(dtype, "tz", None) 

2381 if dtz is not None: 

2382 if tz is not None and not timezones.tz_compare(tz, dtz): 

2383 raise ValueError("cannot supply both a tz and a dtype with a tz") 

2384 if explicit_tz_none: 

2385 raise ValueError("Cannot pass both a timezone-aware dtype and tz=None") 

2386 tz = dtz 

2387 

2388 if tz is not None and is_datetime64_dtype(dtype): 

2389 # We also need to check for the case where the user passed a 

2390 # tz-naive dtype (i.e. datetime64[ns]) 

2391 if tz is not None and not timezones.tz_compare(tz, dtz): 

2392 raise ValueError( 

2393 "cannot supply both a tz and a " 

2394 "timezone-naive dtype (i.e. datetime64[ns])" 

2395 ) 

2396 

2397 return tz 

2398 

2399 

2400def _infer_tz_from_endpoints( 

2401 start: Timestamp, end: Timestamp, tz: tzinfo | None 

2402) -> tzinfo | None: 

2403 """ 

2404 If a timezone is not explicitly given via `tz`, see if one can 

2405 be inferred from the `start` and `end` endpoints. If more than one 

2406 of these inputs provides a timezone, require that they all agree. 

2407 

2408 Parameters 

2409 ---------- 

2410 start : Timestamp 

2411 end : Timestamp 

2412 tz : tzinfo or None 

2413 

2414 Returns 

2415 ------- 

2416 tz : tzinfo or None 

2417 

2418 Raises 

2419 ------ 

2420 TypeError : if start and end timezones do not agree 

2421 """ 

2422 try: 

2423 inferred_tz = timezones.infer_tzinfo(start, end) 

2424 except AssertionError as err: 

2425 # infer_tzinfo raises AssertionError if passed mismatched timezones 

2426 raise TypeError( 

2427 "Start and end cannot both be tz-aware with different timezones" 

2428 ) from err 

2429 

2430 inferred_tz = timezones.maybe_get_tz(inferred_tz) 

2431 tz = timezones.maybe_get_tz(tz) 

2432 

2433 if tz is not None and inferred_tz is not None: 

2434 if not timezones.tz_compare(inferred_tz, tz): 

2435 raise AssertionError("Inferred time zone not equal to passed time zone") 

2436 

2437 elif inferred_tz is not None: 

2438 tz = inferred_tz 

2439 

2440 return tz 

2441 

2442 

2443def _maybe_normalize_endpoints( 

2444 start: Timestamp | None, end: Timestamp | None, normalize: bool 

2445): 

2446 if normalize: 

2447 if start is not None: 

2448 start = start.normalize() 

2449 

2450 if end is not None: 

2451 end = end.normalize() 

2452 

2453 return start, end 

2454 

2455 

2456def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexistent): 

2457 """ 

2458 Localize a start or end Timestamp to the timezone of the corresponding 

2459 start or end Timestamp 

2460 

2461 Parameters 

2462 ---------- 

2463 ts : start or end Timestamp to potentially localize 

2464 is_none : argument that should be None 

2465 is_not_none : argument that should not be None 

2466 freq : Tick, DateOffset, or None 

2467 tz : str, timezone object or None 

2468 ambiguous: str, localization behavior for ambiguous times 

2469 nonexistent: str, localization behavior for nonexistent times 

2470 

2471 Returns 

2472 ------- 

2473 ts : Timestamp 

2474 """ 

2475 # Make sure start and end are timezone localized if: 

2476 # 1) freq = a Timedelta-like frequency (Tick) 

2477 # 2) freq = None i.e. generating a linspaced range 

2478 if is_none is None and is_not_none is not None: 

2479 # Note: We can't ambiguous='infer' a singular ambiguous time; however, 

2480 # we have historically defaulted ambiguous=False 

2481 ambiguous = ambiguous if ambiguous != "infer" else False 

2482 localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None} 

2483 if isinstance(freq, Tick) or freq is None: 

2484 localize_args["tz"] = tz 

2485 ts = ts.tz_localize(**localize_args) 

2486 return ts 

2487 

2488 

2489def _generate_range( 

2490 start: Timestamp | None, 

2491 end: Timestamp | None, 

2492 periods: int | None, 

2493 offset: BaseOffset, 

2494 *, 

2495 unit: str, 

2496): 

2497 """ 

2498 Generates a sequence of dates corresponding to the specified time 

2499 offset. Similar to dateutil.rrule except uses pandas DateOffset 

2500 objects to represent time increments. 

2501 

2502 Parameters 

2503 ---------- 

2504 start : Timestamp or None 

2505 end : Timestamp or None 

2506 periods : int or None 

2507 offset : DateOffset 

2508 unit : str 

2509 

2510 Notes 

2511 ----- 

2512 * This method is faster for generating weekdays than dateutil.rrule 

2513 * At least two of (start, end, periods) must be specified. 

2514 * If both start and end are specified, the returned dates will 

2515 satisfy start <= date <= end. 

2516 

2517 Returns 

2518 ------- 

2519 dates : generator object 

2520 """ 

2521 offset = to_offset(offset) 

2522 

2523 # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]"; 

2524 # expected "Union[integer[Any], float, str, date, datetime64]" 

2525 start = Timestamp(start) # type: ignore[arg-type] 

2526 if start is not NaT: 

2527 start = start.as_unit(unit) 

2528 else: 

2529 start = None 

2530 

2531 # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]"; 

2532 # expected "Union[integer[Any], float, str, date, datetime64]" 

2533 end = Timestamp(end) # type: ignore[arg-type] 

2534 if end is not NaT: 

2535 end = end.as_unit(unit) 

2536 else: 

2537 end = None 

2538 

2539 if start and not offset.is_on_offset(start): 

2540 # Incompatible types in assignment (expression has type "datetime", 

2541 # variable has type "Optional[Timestamp]") 

2542 start = offset.rollforward(start) # type: ignore[assignment] 

2543 

2544 elif end and not offset.is_on_offset(end): 

2545 # Incompatible types in assignment (expression has type "datetime", 

2546 # variable has type "Optional[Timestamp]") 

2547 end = offset.rollback(end) # type: ignore[assignment] 

2548 

2549 # Unsupported operand types for < ("Timestamp" and "None") 

2550 if periods is None and end < start and offset.n >= 0: # type: ignore[operator] 

2551 end = None 

2552 periods = 0 

2553 

2554 if end is None: 

2555 # error: No overload variant of "__radd__" of "BaseOffset" matches 

2556 # argument type "None" 

2557 end = start + (periods - 1) * offset # type: ignore[operator] 

2558 

2559 if start is None: 

2560 # error: No overload variant of "__radd__" of "BaseOffset" matches 

2561 # argument type "None" 

2562 start = end - (periods - 1) * offset # type: ignore[operator] 

2563 

2564 start = cast(Timestamp, start) 

2565 end = cast(Timestamp, end) 

2566 

2567 cur = start 

2568 if offset.n >= 0: 

2569 while cur <= end: 

2570 yield cur 

2571 

2572 if cur == end: 

2573 # GH#24252 avoid overflows by not performing the addition 

2574 # in offset.apply unless we have to 

2575 break 

2576 

2577 # faster than cur + offset 

2578 next_date = offset._apply(cur).as_unit(unit) 

2579 if next_date <= cur: 

2580 raise ValueError(f"Offset {offset} did not increment date") 

2581 cur = next_date 

2582 else: 

2583 while cur >= end: 

2584 yield cur 

2585 

2586 if cur == end: 

2587 # GH#24252 avoid overflows by not performing the addition 

2588 # in offset.apply unless we have to 

2589 break 

2590 

2591 # faster than cur + offset 

2592 next_date = offset._apply(cur).as_unit(unit) 

2593 if next_date >= cur: 

2594 raise ValueError(f"Offset {offset} did not decrement date") 

2595 cur = next_date