Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/datetimes.py: 34%

1from __future__ import annotations

3import datetime as dt

4import operator

5from typing import TYPE_CHECKING

6import warnings

8import numpy as np

9import pytz

11from pandas._libs import (

12 NaT,

13 Period,

14 Timestamp,

15 index as libindex,

16 lib,

17)

18from pandas._libs.tslibs import (

19 Resolution,

20 Tick,

21 Timedelta,

22 periods_per_day,

23 timezones,

24 to_offset,

25)

26from pandas._libs.tslibs.offsets import prefix_mapping

27from pandas.util._decorators import (

28 cache_readonly,

29 doc,

30)

31from pandas.util._exceptions import find_stack_level

33from pandas.core.dtypes.common import is_scalar

34from pandas.core.dtypes.dtypes import DatetimeTZDtype

35from pandas.core.dtypes.generic import ABCSeries

36from pandas.core.dtypes.missing import is_valid_na_for_dtype

38from pandas.core.arrays.datetimes import (

39 DatetimeArray,

40 tz_to_dtype,

41)

42import pandas.core.common as com

43from pandas.core.indexes.base import (

44 Index,

45 maybe_extract_name,

46)

47from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin

48from pandas.core.indexes.extension import inherit_names

49from pandas.core.tools.times import to_time

51if TYPE_CHECKING:

52 from collections.abc import Hashable

54 from pandas._typing import (

55 Dtype,

56 DtypeObj,

57 Frequency,

58 IntervalClosedType,

59 Self,

60 TimeAmbiguous,

61 TimeNonexistent,

62 npt,

63 )

65 from pandas.core.api import (

66 DataFrame,

67 PeriodIndex,

68 )

70from pandas._libs.tslibs.dtypes import OFFSET_TO_PERIOD_FREQSTR

73def _new_DatetimeIndex(cls, d):

74 """

75 This is called upon unpickling, rather than the default which doesn't

76 have arguments and breaks __new__

77 """

78 if "data" in d and not isinstance(d["data"], DatetimeIndex):

79 # Avoid need to verify integrity by calling simple_new directly

80 data = d.pop("data")

81 if not isinstance(data, DatetimeArray):

82 # For backward compat with older pickles, we may need to construct

83 # a DatetimeArray to adapt to the newer _simple_new signature

84 tz = d.pop("tz")

85 freq = d.pop("freq")

86 dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq)

87 else:

88 dta = data

89 for key in ["tz", "freq"]:

90 # These are already stored in our DatetimeArray; if they are

91 # also in the pickle and don't match, we have a problem.

92 if key in d:

93 assert d[key] == getattr(dta, key)

94 d.pop(key)

95 result = cls._simple_new(dta, **d)

96 else:

97 with warnings.catch_warnings():

98 # TODO: If we knew what was going in to **d, we might be able to

99 # go through _simple_new instead

100 warnings.simplefilter("ignore")

101 result = cls.__new__(cls, **d)

102

103 return result

104

105

106@inherit_names(

107 DatetimeArray._field_ops

108 + [

109 method

110 for method in DatetimeArray._datetimelike_methods

111 if method not in ("tz_localize", "tz_convert", "strftime")

112 ],

113 DatetimeArray,

114 wrap=True,

115)

116@inherit_names(["is_normalized"], DatetimeArray, cache=True)

117@inherit_names(

118 [

119 "tz",

120 "tzinfo",

121 "dtype",

122 "to_pydatetime",

123 "date",

124 "time",

125 "timetz",

126 "std",

127 ]

128 + DatetimeArray._bool_ops,

129 DatetimeArray,

130)

131class DatetimeIndex(DatetimeTimedeltaMixin):

132 """

133 Immutable ndarray-like of datetime64 data.

134

135 Represented internally as int64, and which can be boxed to Timestamp objects

136 that are subclasses of datetime and carry metadata.

137

138 .. versionchanged:: 2.0.0

139 The various numeric date/time attributes (:attr:`~DatetimeIndex.day`,

140 :attr:`~DatetimeIndex.month`, :attr:`~DatetimeIndex.year` etc.) now have dtype

141 ``int32``. Previously they had dtype ``int64``.

142

143 Parameters

144 ----------

145 data : array-like (1-dimensional)

146 Datetime-like data to construct index with.

147 freq : str or pandas offset object, optional

148 One of pandas date offset strings or corresponding objects. The string

149 'infer' can be passed in order to set the frequency of the index as the

150 inferred frequency upon creation.

151 tz : pytz.timezone or dateutil.tz.tzfile or datetime.tzinfo or str

152 Set the Timezone of the data.

153 normalize : bool, default False

154 Normalize start/end dates to midnight before generating date range.

155

156 .. deprecated:: 2.1.0

157

158 closed : {'left', 'right'}, optional

159 Set whether to include `start` and `end` that are on the

160 boundary. The default includes boundary points on either end.

161

162 .. deprecated:: 2.1.0

163

164 ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'

165 When clocks moved backward due to DST, ambiguous times may arise.

166 For example in Central European Time (UTC+01), when going from 03:00

167 DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC

168 and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter

169 dictates how ambiguous times should be handled.

170

171 - 'infer' will attempt to infer fall dst-transition hours based on

172 order

173 - bool-ndarray where True signifies a DST time, False signifies a

174 non-DST time (note that this flag is only applicable for ambiguous

175 times)

176 - 'NaT' will return NaT where there are ambiguous times

177 - 'raise' will raise an AmbiguousTimeError if there are ambiguous times.

178 dayfirst : bool, default False

179 If True, parse dates in `data` with the day first order.

180 yearfirst : bool, default False

181 If True parse dates in `data` with the year first order.

182 dtype : numpy.dtype or DatetimeTZDtype or str, default None

183 Note that the only NumPy dtype allowed is `datetime64[ns]`.

184 copy : bool, default False

185 Make a copy of input ndarray.

186 name : label, default None

187 Name to be stored in the index.

188

189 Attributes

190 ----------

191 year

192 month

193 day

194 hour

195 minute

196 second

197 microsecond

198 nanosecond

199 date

200 time

201 timetz

202 dayofyear

203 day_of_year

204 dayofweek

205 day_of_week

206 weekday

207 quarter

208 tz

209 freq

210 freqstr

211 is_month_start

212 is_month_end

213 is_quarter_start

214 is_quarter_end

215 is_year_start

216 is_year_end

217 is_leap_year

218 inferred_freq

219

220 Methods

221 -------

222 normalize

223 strftime

224 snap

225 tz_convert

226 tz_localize

227 round

228 floor

229 ceil

230 to_period

231 to_pydatetime

232 to_series

233 to_frame

234 month_name

235 day_name

236 mean

237 std

238

239 See Also

240 --------

241 Index : The base pandas Index type.

242 TimedeltaIndex : Index of timedelta64 data.

243 PeriodIndex : Index of Period data.

244 to_datetime : Convert argument to datetime.

245 date_range : Create a fixed-frequency DatetimeIndex.

246

247 Notes

248 -----

249 To learn more about the frequency strings, please see `this link

250 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

251

252 Examples

253 --------

254 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])

255 >>> idx

256 DatetimeIndex(['2020-01-01 10:00:00+00:00', '2020-02-01 11:00:00+00:00'],

257 dtype='datetime64[ns, UTC]', freq=None)

258 """

259

260 _typ = "datetimeindex"

261

262 _data_cls = DatetimeArray

263 _supports_partial_string_indexing = True

264

265 @property

266 def _engine_type(self) -> type[libindex.DatetimeEngine]:

267 return libindex.DatetimeEngine

268

269 _data: DatetimeArray

270 _values: DatetimeArray

271 tz: dt.tzinfo | None

272

273 # --------------------------------------------------------------------

274 # methods that dispatch to DatetimeArray and wrap result

275

276 @doc(DatetimeArray.strftime)

277 def strftime(self, date_format) -> Index:

278 arr = self._data.strftime(date_format)

279 return Index(arr, name=self.name, dtype=object)

280

281 @doc(DatetimeArray.tz_convert)

282 def tz_convert(self, tz) -> Self:

283 arr = self._data.tz_convert(tz)

284 return type(self)._simple_new(arr, name=self.name, refs=self._references)

285

286 @doc(DatetimeArray.tz_localize)

287 def tz_localize(

288 self,

289 tz,

290 ambiguous: TimeAmbiguous = "raise",

291 nonexistent: TimeNonexistent = "raise",

292 ) -> Self:

293 arr = self._data.tz_localize(tz, ambiguous, nonexistent)

294 return type(self)._simple_new(arr, name=self.name)

295

296 @doc(DatetimeArray.to_period)

297 def to_period(self, freq=None) -> PeriodIndex:

298 from pandas.core.indexes.api import PeriodIndex

299

300 arr = self._data.to_period(freq)

301 return PeriodIndex._simple_new(arr, name=self.name)

302

303 @doc(DatetimeArray.to_julian_date)

304 def to_julian_date(self) -> Index:

305 arr = self._data.to_julian_date()

306 return Index._simple_new(arr, name=self.name)

307

308 @doc(DatetimeArray.isocalendar)

309 def isocalendar(self) -> DataFrame:

310 df = self._data.isocalendar()

311 return df.set_index(self)

312

313 @cache_readonly

314 def _resolution_obj(self) -> Resolution:

315 return self._data._resolution_obj

316

317 # --------------------------------------------------------------------

318 # Constructors

319

320 def __new__(

321 cls,

322 data=None,

323 freq: Frequency | lib.NoDefault = lib.no_default,

324 tz=lib.no_default,

325 normalize: bool | lib.NoDefault = lib.no_default,

326 closed=lib.no_default,

327 ambiguous: TimeAmbiguous = "raise",

328 dayfirst: bool = False,

329 yearfirst: bool = False,

330 dtype: Dtype | None = None,

331 copy: bool = False,

332 name: Hashable | None = None,

333 ) -> Self:

334 if closed is not lib.no_default:

335 # GH#52628

336 warnings.warn(

337 f"The 'closed' keyword in {cls.__name__} construction is "

338 "deprecated and will be removed in a future version.",

339 FutureWarning,

340 stacklevel=find_stack_level(),

341 )

342 if normalize is not lib.no_default:

343 # GH#52628

344 warnings.warn(

345 f"The 'normalize' keyword in {cls.__name__} construction is "

346 "deprecated and will be removed in a future version.",

347 FutureWarning,

348 stacklevel=find_stack_level(),

349 )

350

351 if is_scalar(data):

352 cls._raise_scalar_data_error(data)

353

354 # - Cases checked above all return/raise before reaching here - #

355

356 name = maybe_extract_name(name, data, cls)

357

358 if (

359 isinstance(data, DatetimeArray)

360 and freq is lib.no_default

361 and tz is lib.no_default

362 and dtype is None

363 ):

364 # fastpath, similar logic in TimedeltaIndex.__new__;

365 # Note in this particular case we retain non-nano.

366 if copy:

367 data = data.copy()

368 return cls._simple_new(data, name=name)

369

370 dtarr = DatetimeArray._from_sequence_not_strict(

371 data,

372 dtype=dtype,

373 copy=copy,

374 tz=tz,

375 freq=freq,

376 dayfirst=dayfirst,

377 yearfirst=yearfirst,

378 ambiguous=ambiguous,

379 )

380 refs = None

381 if not copy and isinstance(data, (Index, ABCSeries)):

382 refs = data._references

383

384 subarr = cls._simple_new(dtarr, name=name, refs=refs)

385 return subarr

386

387 # --------------------------------------------------------------------

388

389 @cache_readonly

390 def _is_dates_only(self) -> bool:

391 """

392 Return a boolean if we are only dates (and don't have a timezone)

393

394 Returns

395 -------

396 bool

397 """

398 if isinstance(self.freq, Tick):

399 delta = Timedelta(self.freq)

400

401 if delta % dt.timedelta(days=1) != dt.timedelta(days=0):

402 return False

403

404 return self._values._is_dates_only

405

406 def __reduce__(self):

407 d = {"data": self._data, "name": self.name}

408 return _new_DatetimeIndex, (type(self), d), None

409

410 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

411 """

412 Can we compare values of the given dtype to our own?

413 """

414 if self.tz is not None:

415 # If we have tz, we can compare to tzaware

416 return isinstance(dtype, DatetimeTZDtype)

417 # if we dont have tz, we can only compare to tznaive

418 return lib.is_np_dtype(dtype, "M")

419

420 # --------------------------------------------------------------------

421 # Rendering Methods

422

423 @cache_readonly

424 def _formatter_func(self):

425 # Note this is equivalent to the DatetimeIndexOpsMixin method but

426 # uses the maybe-cached self._is_dates_only instead of re-computing it.

427 from pandas.io.formats.format import get_format_datetime64

428

429 formatter = get_format_datetime64(is_dates_only=self._is_dates_only)

430 return lambda x: f"'{formatter(x)}'"

431

432 # --------------------------------------------------------------------

433 # Set Operation Methods

434

435 def _can_range_setop(self, other) -> bool:

436 # GH 46702: If self or other have non-UTC tzs, DST transitions prevent

437 # range representation due to no singular step

438 if (

439 self.tz is not None

440 and not timezones.is_utc(self.tz)

441 and not timezones.is_fixed_offset(self.tz)

442 ):

443 return False

444 if (

445 other.tz is not None

446 and not timezones.is_utc(other.tz)

447 and not timezones.is_fixed_offset(other.tz)

448 ):

449 return False

450 return super()._can_range_setop(other)

451

452 # --------------------------------------------------------------------

453

454 def _get_time_micros(self) -> npt.NDArray[np.int64]:

455 """

456 Return the number of microseconds since midnight.

457

458 Returns

459 -------

460 ndarray[int64_t]

461 """

462 values = self._data._local_timestamps()

463

464 ppd = periods_per_day(self._data._creso)

465

466 frac = values % ppd

467 if self.unit == "ns":

468 micros = frac // 1000

469 elif self.unit == "us":

470 micros = frac

471 elif self.unit == "ms":

472 micros = frac * 1000

473 elif self.unit == "s":

474 micros = frac * 1_000_000

475 else: # pragma: no cover

476 raise NotImplementedError(self.unit)

477

478 micros[self._isnan] = -1

479 return micros

480

481 def snap(self, freq: Frequency = "S") -> DatetimeIndex:

482 """

483 Snap time stamps to nearest occurring frequency.

484

485 Returns

486 -------

487 DatetimeIndex

488

489 Examples

490 --------

491 >>> idx = pd.DatetimeIndex(['2023-01-01', '2023-01-02',

492 ... '2023-02-01', '2023-02-02'])

493 >>> idx

494 DatetimeIndex(['2023-01-01', '2023-01-02', '2023-02-01', '2023-02-02'],

495 dtype='datetime64[ns]', freq=None)

496 >>> idx.snap('MS')

497 DatetimeIndex(['2023-01-01', '2023-01-01', '2023-02-01', '2023-02-01'],

498 dtype='datetime64[ns]', freq=None)

499 """

500 # Superdumb, punting on any optimizing

501 freq = to_offset(freq)

502

503 dta = self._data.copy()

504

505 for i, v in enumerate(self):

506 s = v

507 if not freq.is_on_offset(s):

508 t0 = freq.rollback(s)

509 t1 = freq.rollforward(s)

510 if abs(s - t0) < abs(t1 - s):

511 s = t0

512 else:

513 s = t1

514 dta[i] = s

515

516 return DatetimeIndex._simple_new(dta, name=self.name)

517

518 # --------------------------------------------------------------------

519 # Indexing Methods

520

521 def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime):

522 """

523 Calculate datetime bounds for parsed time string and its resolution.

524

525 Parameters

526 ----------

527 reso : Resolution

528 Resolution provided by parsed string.

529 parsed : datetime

530 Datetime from parsed string.

531

532 Returns

533 -------

534 lower, upper: pd.Timestamp

535 """

536 freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev)

537 per = Period(parsed, freq=freq)

538 start, end = per.start_time, per.end_time

539

540 # GH 24076

541 # If an incoming date string contained a UTC offset, need to localize

542 # the parsed date to this offset first before aligning with the index's

543 # timezone

544 start = start.tz_localize(parsed.tzinfo)

545 end = end.tz_localize(parsed.tzinfo)

546

547 if parsed.tzinfo is not None:

548 if self.tz is None:

549 raise ValueError(

550 "The index must be timezone aware when indexing "

551 "with a date string with a UTC offset"

552 )

553 # The flipped case with parsed.tz is None and self.tz is not None

554 # is ruled out bc parsed and reso are produced by _parse_with_reso,

555 # which localizes parsed.

556 return start, end

557

558 def _parse_with_reso(self, label: str):

559 parsed, reso = super()._parse_with_reso(label)

560

561 parsed = Timestamp(parsed)

562

563 if self.tz is not None and parsed.tzinfo is None:

564 # we special-case timezone-naive strings and timezone-aware

565 # DatetimeIndex

566 # https://github.com/pandas-dev/pandas/pull/36148#issuecomment-687883081

567 parsed = parsed.tz_localize(self.tz)

568

569 return parsed, reso

570

571 def _disallow_mismatched_indexing(self, key) -> None:

572 """

573 Check for mismatched-tzawareness indexing and re-raise as KeyError.

574 """

575 # we get here with isinstance(key, self._data._recognized_scalars)

576 try:

577 # GH#36148

578 self._data._assert_tzawareness_compat(key)

579 except TypeError as err:

580 raise KeyError(key) from err

581

582 def get_loc(self, key):

583 """

584 Get integer location for requested label

585

586 Returns

587 -------

588 loc : int

589 """

590 self._check_indexing_error(key)

591

592 orig_key = key

593 if is_valid_na_for_dtype(key, self.dtype):

594 key = NaT

595

596 if isinstance(key, self._data._recognized_scalars):

597 # needed to localize naive datetimes

598 self._disallow_mismatched_indexing(key)

599 key = Timestamp(key)

600

601 elif isinstance(key, str):

602 try:

603 parsed, reso = self._parse_with_reso(key)

604 except (ValueError, pytz.NonExistentTimeError) as err:

605 raise KeyError(key) from err

606 self._disallow_mismatched_indexing(parsed)

607

608 if self._can_partial_date_slice(reso):

609 try:

610 return self._partial_date_slice(reso, parsed)

611 except KeyError as err:

612 raise KeyError(key) from err

613

614 key = parsed

615

616 elif isinstance(key, dt.timedelta):

617 # GH#20464

618 raise TypeError(

619 f"Cannot index {type(self).__name__} with {type(key).__name__}"

620 )

621

622 elif isinstance(key, dt.time):

623 return self.indexer_at_time(key)

624

625 else:

626 # unrecognized type

627 raise KeyError(key)

628

629 try:

630 return Index.get_loc(self, key)

631 except KeyError as err:

632 raise KeyError(orig_key) from err

633

634 @doc(DatetimeTimedeltaMixin._maybe_cast_slice_bound)

635 def _maybe_cast_slice_bound(self, label, side: str):

636 # GH#42855 handle date here instead of get_slice_bound

637 if isinstance(label, dt.date) and not isinstance(label, dt.datetime):

638 # Pandas supports slicing with dates, treated as datetimes at midnight.

639 # https://github.com/pandas-dev/pandas/issues/31501

640 label = Timestamp(label).to_pydatetime()

641

642 label = super()._maybe_cast_slice_bound(label, side)

643 self._data._assert_tzawareness_compat(label)

644 return Timestamp(label)

645

646 def slice_indexer(self, start=None, end=None, step=None):

647 """

648 Return indexer for specified label slice.

649 Index.slice_indexer, customized to handle time slicing.

650

651 In addition to functionality provided by Index.slice_indexer, does the

652 following:

653

654 - if both `start` and `end` are instances of `datetime.time`, it

655 invokes `indexer_between_time`

656 - if `start` and `end` are both either string or None perform

657 value-based selection in non-monotonic cases.

658

659 """

660 # For historical reasons DatetimeIndex supports slices between two

661 # instances of datetime.time as if it were applying a slice mask to

662 # an array of (self.hour, self.minute, self.seconds, self.microsecond).

663 if isinstance(start, dt.time) and isinstance(end, dt.time):

664 if step is not None and step != 1:

665 raise ValueError("Must have step size of 1 with time slices")

666 return self.indexer_between_time(start, end)

667

668 if isinstance(start, dt.time) or isinstance(end, dt.time):

669 raise KeyError("Cannot mix time and non-time slice keys")

670

671 def check_str_or_none(point) -> bool:

672 return point is not None and not isinstance(point, str)

673

674 # GH#33146 if start and end are combinations of str and None and Index is not

675 # monotonic, we can not use Index.slice_indexer because it does not honor the

676 # actual elements, is only searching for start and end

677 if (

678 check_str_or_none(start)

679 or check_str_or_none(end)

680 or self.is_monotonic_increasing

681 ):

682 return Index.slice_indexer(self, start, end, step)

683

684 mask = np.array(True)

685 in_index = True

686 if start is not None:

687 start_casted = self._maybe_cast_slice_bound(start, "left")

688 mask = start_casted <= self

689 in_index &= (start_casted == self).any()

690

691 if end is not None:

692 end_casted = self._maybe_cast_slice_bound(end, "right")

693 mask = (self <= end_casted) & mask

694 in_index &= (end_casted == self).any()

695

696 if not in_index:

697 raise KeyError(

698 "Value based partial slicing on non-monotonic DatetimeIndexes "

699 "with non-existing keys is not allowed.",

700 )

701 indexer = mask.nonzero()[0][::step]

702 if len(indexer) == len(self):

703 return slice(None)

704 else:

705 return indexer

706

707 # --------------------------------------------------------------------

708

709 @property

710 def inferred_type(self) -> str:

711 # b/c datetime is represented as microseconds since the epoch, make

712 # sure we can't have ambiguous indexing

713 return "datetime64"

714

715 def indexer_at_time(self, time, asof: bool = False) -> npt.NDArray[np.intp]:

716 """

717 Return index locations of values at particular time of day.

718

719 Parameters

720 ----------

721 time : datetime.time or str

722 Time passed in either as object (datetime.time) or as string in

723 appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p",

724 "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", "%I%M%S%p").

725

726 Returns

727 -------

728 np.ndarray[np.intp]

729

730 See Also

731 --------

732 indexer_between_time : Get index locations of values between particular

733 times of day.

734 DataFrame.at_time : Select values at particular time of day.

735

736 Examples

737 --------

738 >>> idx = pd.DatetimeIndex(["1/1/2020 10:00", "2/1/2020 11:00",

739 ... "3/1/2020 10:00"])

740 >>> idx.indexer_at_time("10:00")

741 array([0, 2])

742 """

743 if asof:

744 raise NotImplementedError("'asof' argument is not supported")

745

746 if isinstance(time, str):

747 from dateutil.parser import parse

748

749 time = parse(time).time()

750

751 if time.tzinfo:

752 if self.tz is None:

753 raise ValueError("Index must be timezone aware.")

754 time_micros = self.tz_convert(time.tzinfo)._get_time_micros()

755 else:

756 time_micros = self._get_time_micros()

757 micros = _time_to_micros(time)

758 return (time_micros == micros).nonzero()[0]

759

760 def indexer_between_time(

761 self, start_time, end_time, include_start: bool = True, include_end: bool = True

762 ) -> npt.NDArray[np.intp]:

763 """

764 Return index locations of values between particular times of day.

765

766 Parameters

767 ----------

768 start_time, end_time : datetime.time, str

769 Time passed either as object (datetime.time) or as string in

770 appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p",

771 "%H:%M:%S", "%H%M%S", "%I:%M:%S%p","%I%M%S%p").

772 include_start : bool, default True

773 include_end : bool, default True

774

775 Returns

776 -------

777 np.ndarray[np.intp]

778

779 See Also

780 --------

781 indexer_at_time : Get index locations of values at particular time of day.

782 DataFrame.between_time : Select values between particular times of day.

783

784 Examples

785 --------

786 >>> idx = pd.date_range("2023-01-01", periods=4, freq="h")

787 >>> idx

788 DatetimeIndex(['2023-01-01 00:00:00', '2023-01-01 01:00:00',

789 '2023-01-01 02:00:00', '2023-01-01 03:00:00'],

790 dtype='datetime64[ns]', freq='h')

791 >>> idx.indexer_between_time("00:00", "2:00", include_end=False)

792 array([0, 1])

793 """

794 start_time = to_time(start_time)

795 end_time = to_time(end_time)

796 time_micros = self._get_time_micros()

797 start_micros = _time_to_micros(start_time)

798 end_micros = _time_to_micros(end_time)

799

800 if include_start and include_end:

801 lop = rop = operator.le

802 elif include_start:

803 lop = operator.le

804 rop = operator.lt

805 elif include_end:

806 lop = operator.lt

807 rop = operator.le

808 else:

809 lop = rop = operator.lt

810

811 if start_time <= end_time:

812 join_op = operator.and_

813 else:

814 join_op = operator.or_

815

816 mask = join_op(lop(start_micros, time_micros), rop(time_micros, end_micros))

817

818 return mask.nonzero()[0]

819

820

821def date_range(

822 start=None,

823 end=None,

824 periods=None,

825 freq=None,

826 tz=None,

827 normalize: bool = False,

828 name: Hashable | None = None,

829 inclusive: IntervalClosedType = "both",

830 *,

831 unit: str | None = None,

832 **kwargs,

833) -> DatetimeIndex:

834 """

835 Return a fixed frequency DatetimeIndex.

836

837 Returns the range of equally spaced time points (where the difference between any

838 two adjacent points is specified by the given frequency) such that they all

839 satisfy `start <[=] x <[=] end`, where the first one and the last one are, resp.,

840 the first and last time points in that range that fall on the boundary of ``freq``

841 (if given as a frequency string) or that are valid for ``freq`` (if given as a

842 :class:`pandas.tseries.offsets.DateOffset`). (If exactly one of ``start``,

843 ``end``, or ``freq`` is *not* specified, this missing parameter can be computed

844 given ``periods``, the number of timesteps in the range. See the note below.)

845

846 Parameters

847 ----------

848 start : str or datetime-like, optional

849 Left bound for generating dates.

850 end : str or datetime-like, optional

851 Right bound for generating dates.

852 periods : int, optional

853 Number of periods to generate.

854 freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'D'

855 Frequency strings can have multiples, e.g. '5h'. See

856 :ref:`here <timeseries.offset_aliases>` for a list of

857 frequency aliases.

858 tz : str or tzinfo, optional

859 Time zone name for returning localized DatetimeIndex, for example

860 'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is

861 timezone-naive unless timezone-aware datetime-likes are passed.

862 normalize : bool, default False

863 Normalize start/end dates to midnight before generating date range.

864 name : str, default None

865 Name of the resulting DatetimeIndex.

866 inclusive : {"both", "neither", "left", "right"}, default "both"

867 Include boundaries; Whether to set each bound as closed or open.

868

869 .. versionadded:: 1.4.0

870 unit : str, default None

871 Specify the desired resolution of the result.

872

873 .. versionadded:: 2.0.0

874 **kwargs

875 For compatibility. Has no effect on the result.

876

877 Returns

878 -------

879 DatetimeIndex

880

881 See Also

882 --------

883 DatetimeIndex : An immutable container for datetimes.

884 timedelta_range : Return a fixed frequency TimedeltaIndex.

885 period_range : Return a fixed frequency PeriodIndex.

886 interval_range : Return a fixed frequency IntervalIndex.

887

888 Notes

889 -----

890 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,

891 exactly three must be specified. If ``freq`` is omitted, the resulting

892 ``DatetimeIndex`` will have ``periods`` linearly spaced elements between

893 ``start`` and ``end`` (closed on both sides).

894

895 To learn more about the frequency strings, please see `this link

896 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

897

898 Examples

899 --------

900 **Specifying the values**

901

902 The next four examples generate the same `DatetimeIndex`, but vary

903 the combination of `start`, `end` and `periods`.

904

905 Specify `start` and `end`, with the default daily frequency.

906

907 >>> pd.date_range(start='1/1/2018', end='1/08/2018')

908 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',

909 '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],

910 dtype='datetime64[ns]', freq='D')

911

912 Specify timezone-aware `start` and `end`, with the default daily frequency.

913

914 >>> pd.date_range(

915 ... start=pd.to_datetime("1/1/2018").tz_localize("Europe/Berlin"),

916 ... end=pd.to_datetime("1/08/2018").tz_localize("Europe/Berlin"),

917 ... )

918 DatetimeIndex(['2018-01-01 00:00:00+01:00', '2018-01-02 00:00:00+01:00',

919 '2018-01-03 00:00:00+01:00', '2018-01-04 00:00:00+01:00',

920 '2018-01-05 00:00:00+01:00', '2018-01-06 00:00:00+01:00',

921 '2018-01-07 00:00:00+01:00', '2018-01-08 00:00:00+01:00'],

922 dtype='datetime64[ns, Europe/Berlin]', freq='D')

923

924 Specify `start` and `periods`, the number of periods (days).

925

926 >>> pd.date_range(start='1/1/2018', periods=8)

927 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',

928 '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],

929 dtype='datetime64[ns]', freq='D')

930

931 Specify `end` and `periods`, the number of periods (days).

932

933 >>> pd.date_range(end='1/1/2018', periods=8)

934 DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28',

935 '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'],

936 dtype='datetime64[ns]', freq='D')

937

938 Specify `start`, `end`, and `periods`; the frequency is generated

939 automatically (linearly spaced).

940

941 >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3)

942 DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',

943 '2018-04-27 00:00:00'],

944 dtype='datetime64[ns]', freq=None)

945

946 **Other Parameters**

947

948 Changed the `freq` (frequency) to ``'ME'`` (month end frequency).

949

950 >>> pd.date_range(start='1/1/2018', periods=5, freq='ME')

951 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',

952 '2018-05-31'],

953 dtype='datetime64[ns]', freq='ME')

954

955 Multiples are allowed

956

957 >>> pd.date_range(start='1/1/2018', periods=5, freq='3ME')

958 DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',

959 '2019-01-31'],

960 dtype='datetime64[ns]', freq='3ME')

961

962 `freq` can also be specified as an Offset object.

963

964 >>> pd.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3))

965 DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',

966 '2019-01-31'],

967 dtype='datetime64[ns]', freq='3ME')

968

969 Specify `tz` to set the timezone.

970

971 >>> pd.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo')

972 DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00',

973 '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00',

974 '2018-01-05 00:00:00+09:00'],

975 dtype='datetime64[ns, Asia/Tokyo]', freq='D')

976

977 `inclusive` controls whether to include `start` and `end` that are on the

978 boundary. The default, "both", includes boundary points on either end.

979

980 >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive="both")

981 DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'],

982 dtype='datetime64[ns]', freq='D')

983

984 Use ``inclusive='left'`` to exclude `end` if it falls on the boundary.

985

986 >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='left')

987 DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'],

988 dtype='datetime64[ns]', freq='D')

989

990 Use ``inclusive='right'`` to exclude `start` if it falls on the boundary, and

991 similarly ``inclusive='neither'`` will exclude both `start` and `end`.

992

993 >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right')

994 DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],

995 dtype='datetime64[ns]', freq='D')

996

997 **Specify a unit**

998

999 >>> pd.date_range(start="2017-01-01", periods=10, freq="100YS", unit="s")

1000 DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',

1001 '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',

1002 '2817-01-01', '2917-01-01'],

1003 dtype='datetime64[s]', freq='100YS-JAN')

1004 """

1005 if freq is None and com.any_none(periods, start, end):

1006 freq = "D"

1007

1008 dtarr = DatetimeArray._generate_range(

1009 start=start,

1010 end=end,

1011 periods=periods,

1012 freq=freq,

1013 tz=tz,

1014 normalize=normalize,

1015 inclusive=inclusive,

1016 unit=unit,

1017 **kwargs,

1018 )

1019 return DatetimeIndex._simple_new(dtarr, name=name)

1020

1021

1022def bdate_range(

1023 start=None,

1024 end=None,

1025 periods: int | None = None,

1026 freq: Frequency | dt.timedelta = "B",

1027 tz=None,

1028 normalize: bool = True,

1029 name: Hashable | None = None,

1030 weekmask=None,

1031 holidays=None,

1032 inclusive: IntervalClosedType = "both",

1033 **kwargs,

1034) -> DatetimeIndex:

1035 """

1036 Return a fixed frequency DatetimeIndex with business day as the default.

1037

1038 Parameters

1039 ----------

1040 start : str or datetime-like, default None

1041 Left bound for generating dates.

1042 end : str or datetime-like, default None

1043 Right bound for generating dates.

1044 periods : int, default None

1045 Number of periods to generate.

1046 freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'B'

1047 Frequency strings can have multiples, e.g. '5h'. The default is

1048 business daily ('B').

1049 tz : str or None

1050 Time zone name for returning localized DatetimeIndex, for example

1051 Asia/Beijing.

1052 normalize : bool, default False

1053 Normalize start/end dates to midnight before generating date range.

1054 name : str, default None

1055 Name of the resulting DatetimeIndex.

1056 weekmask : str or None, default None

1057 Weekmask of valid business days, passed to ``numpy.busdaycalendar``,

1058 only used when custom frequency strings are passed. The default

1059 value None is equivalent to 'Mon Tue Wed Thu Fri'.

1060 holidays : list-like or None, default None

1061 Dates to exclude from the set of valid business days, passed to

1062 ``numpy.busdaycalendar``, only used when custom frequency strings

1063 are passed.

1064 inclusive : {"both", "neither", "left", "right"}, default "both"

1065 Include boundaries; Whether to set each bound as closed or open.

1066

1067 .. versionadded:: 1.4.0

1068 **kwargs

1069 For compatibility. Has no effect on the result.

1070

1071 Returns

1072 -------

1073 DatetimeIndex

1074

1075 Notes

1076 -----

1077 Of the four parameters: ``start``, ``end``, ``periods``, and ``freq``,

1078 exactly three must be specified. Specifying ``freq`` is a requirement

1079 for ``bdate_range``. Use ``date_range`` if specifying ``freq`` is not

1080 desired.

1081

1082 To learn more about the frequency strings, please see `this link

1083 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

1084

1085 Examples

1086 --------

1087 Note how the two weekend days are skipped in the result.

1088

1089 >>> pd.bdate_range(start='1/1/2018', end='1/08/2018')

1090 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',

1091 '2018-01-05', '2018-01-08'],

1092 dtype='datetime64[ns]', freq='B')

1093 """

1094 if freq is None:

1095 msg = "freq must be specified for bdate_range; use date_range instead"

1096 raise TypeError(msg)

1097

1098 if isinstance(freq, str) and freq.startswith("C"):

1099 try:

1100 weekmask = weekmask or "Mon Tue Wed Thu Fri"

1101 freq = prefix_mapping[freq](holidays=holidays, weekmask=weekmask)

1102 except (KeyError, TypeError) as err:

1103 msg = f"invalid custom frequency string: {freq}"

1104 raise ValueError(msg) from err

1105 elif holidays or weekmask:

1106 msg = (

1107 "a custom frequency string is required when holidays or "

1108 f"weekmask are passed, got frequency {freq}"

1109 )

1110 raise ValueError(msg)

1111

1112 return date_range(

1113 start=start,

1114 end=end,

1115 periods=periods,

1116 freq=freq,

1117 tz=tz,

1118 normalize=normalize,

1119 name=name,

1120 inclusive=inclusive,

1121 **kwargs,

1122 )

1123

1124

1125def _time_to_micros(time_obj: dt.time) -> int:

1126 seconds = time_obj.hour * 60 * 60 + 60 * time_obj.minute + time_obj.second

1127 return 1_000_000 * seconds + time_obj.microsecond