1from __future__ import annotations
2
3from datetime import (
4 datetime,
5 timedelta,
6)
7from typing import TYPE_CHECKING
8import warnings
9
10import numpy as np
11
12from pandas._libs import index as libindex
13from pandas._libs.tslibs import (
14 BaseOffset,
15 NaT,
16 Period,
17 Resolution,
18 Tick,
19)
20from pandas._libs.tslibs.dtypes import OFFSET_TO_PERIOD_FREQSTR
21from pandas.util._decorators import (
22 cache_readonly,
23 doc,
24)
25from pandas.util._exceptions import find_stack_level
26
27from pandas.core.dtypes.common import is_integer
28from pandas.core.dtypes.dtypes import PeriodDtype
29from pandas.core.dtypes.generic import ABCSeries
30from pandas.core.dtypes.missing import is_valid_na_for_dtype
31
32from pandas.core.arrays.period import (
33 PeriodArray,
34 period_array,
35 raise_on_incompatible,
36 validate_dtype_freq,
37)
38import pandas.core.common as com
39import pandas.core.indexes.base as ibase
40from pandas.core.indexes.base import maybe_extract_name
41from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
42from pandas.core.indexes.datetimes import (
43 DatetimeIndex,
44 Index,
45)
46from pandas.core.indexes.extension import inherit_names
47
48if TYPE_CHECKING:
49 from collections.abc import Hashable
50
51 from pandas._typing import (
52 Dtype,
53 DtypeObj,
54 Self,
55 npt,
56 )
57
58
59_index_doc_kwargs = dict(ibase._index_doc_kwargs)
60_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
61_shared_doc_kwargs = {
62 "klass": "PeriodArray",
63}
64
65# --- Period index sketch
66
67
68def _new_PeriodIndex(cls, **d):
69 # GH13277 for unpickling
70 values = d.pop("data")
71 if values.dtype == "int64":
72 freq = d.pop("freq", None)
73 dtype = PeriodDtype(freq)
74 values = PeriodArray(values, dtype=dtype)
75 return cls._simple_new(values, **d)
76 else:
77 return cls(values, **d)
78
79
80@inherit_names(
81 ["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
82 PeriodArray,
83 wrap=True,
84)
85@inherit_names(["is_leap_year"], PeriodArray)
86class PeriodIndex(DatetimeIndexOpsMixin):
87 """
88 Immutable ndarray holding ordinal values indicating regular periods in time.
89
90 Index keys are boxed to Period objects which carries the metadata (eg,
91 frequency information).
92
93 Parameters
94 ----------
95 data : array-like (1d int np.ndarray or PeriodArray), optional
96 Optional period-like data to construct index with.
97 copy : bool
98 Make a copy of input ndarray.
99 freq : str or period object, optional
100 One of pandas period strings or corresponding objects.
101 year : int, array, or Series, default None
102
103 .. deprecated:: 2.2.0
104 Use PeriodIndex.from_fields instead.
105 month : int, array, or Series, default None
106
107 .. deprecated:: 2.2.0
108 Use PeriodIndex.from_fields instead.
109 quarter : int, array, or Series, default None
110
111 .. deprecated:: 2.2.0
112 Use PeriodIndex.from_fields instead.
113 day : int, array, or Series, default None
114
115 .. deprecated:: 2.2.0
116 Use PeriodIndex.from_fields instead.
117 hour : int, array, or Series, default None
118
119 .. deprecated:: 2.2.0
120 Use PeriodIndex.from_fields instead.
121 minute : int, array, or Series, default None
122
123 .. deprecated:: 2.2.0
124 Use PeriodIndex.from_fields instead.
125 second : int, array, or Series, default None
126
127 .. deprecated:: 2.2.0
128 Use PeriodIndex.from_fields instead.
129 dtype : str or PeriodDtype, default None
130
131 Attributes
132 ----------
133 day
134 dayofweek
135 day_of_week
136 dayofyear
137 day_of_year
138 days_in_month
139 daysinmonth
140 end_time
141 freq
142 freqstr
143 hour
144 is_leap_year
145 minute
146 month
147 quarter
148 qyear
149 second
150 start_time
151 week
152 weekday
153 weekofyear
154 year
155
156 Methods
157 -------
158 asfreq
159 strftime
160 to_timestamp
161 from_fields
162 from_ordinals
163
164 See Also
165 --------
166 Index : The base pandas Index type.
167 Period : Represents a period of time.
168 DatetimeIndex : Index with datetime64 data.
169 TimedeltaIndex : Index of timedelta64 data.
170 period_range : Create a fixed-frequency PeriodIndex.
171
172 Examples
173 --------
174 >>> idx = pd.PeriodIndex.from_fields(year=[2000, 2002], quarter=[1, 3])
175 >>> idx
176 PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')
177 """
178
179 _typ = "periodindex"
180
181 _data: PeriodArray
182 freq: BaseOffset
183 dtype: PeriodDtype
184
185 _data_cls = PeriodArray
186 _supports_partial_string_indexing = True
187
188 @property
189 def _engine_type(self) -> type[libindex.PeriodEngine]:
190 return libindex.PeriodEngine
191
192 @cache_readonly
193 def _resolution_obj(self) -> Resolution:
194 # for compat with DatetimeIndex
195 return self.dtype._resolution_obj
196
197 # --------------------------------------------------------------------
198 # methods that dispatch to array and wrap result in Index
199 # These are defined here instead of via inherit_names for mypy
200
201 @doc(
202 PeriodArray.asfreq,
203 other="pandas.arrays.PeriodArray",
204 other_name="PeriodArray",
205 **_shared_doc_kwargs,
206 )
207 def asfreq(self, freq=None, how: str = "E") -> Self:
208 arr = self._data.asfreq(freq, how)
209 return type(self)._simple_new(arr, name=self.name)
210
211 @doc(PeriodArray.to_timestamp)
212 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex:
213 arr = self._data.to_timestamp(freq, how)
214 return DatetimeIndex._simple_new(arr, name=self.name)
215
216 @property
217 @doc(PeriodArray.hour.fget)
218 def hour(self) -> Index:
219 return Index(self._data.hour, name=self.name)
220
221 @property
222 @doc(PeriodArray.minute.fget)
223 def minute(self) -> Index:
224 return Index(self._data.minute, name=self.name)
225
226 @property
227 @doc(PeriodArray.second.fget)
228 def second(self) -> Index:
229 return Index(self._data.second, name=self.name)
230
231 # ------------------------------------------------------------------------
232 # Index Constructors
233
234 def __new__(
235 cls,
236 data=None,
237 ordinal=None,
238 freq=None,
239 dtype: Dtype | None = None,
240 copy: bool = False,
241 name: Hashable | None = None,
242 **fields,
243 ) -> Self:
244 valid_field_set = {
245 "year",
246 "month",
247 "day",
248 "quarter",
249 "hour",
250 "minute",
251 "second",
252 }
253
254 refs = None
255 if not copy and isinstance(data, (Index, ABCSeries)):
256 refs = data._references
257
258 if not set(fields).issubset(valid_field_set):
259 argument = next(iter(set(fields) - valid_field_set))
260 raise TypeError(f"__new__() got an unexpected keyword argument {argument}")
261 elif len(fields):
262 # GH#55960
263 warnings.warn(
264 "Constructing PeriodIndex from fields is deprecated. Use "
265 "PeriodIndex.from_fields instead.",
266 FutureWarning,
267 stacklevel=find_stack_level(),
268 )
269
270 if ordinal is not None:
271 # GH#55960
272 warnings.warn(
273 "The 'ordinal' keyword in PeriodIndex is deprecated and will "
274 "be removed in a future version. Use PeriodIndex.from_ordinals "
275 "instead.",
276 FutureWarning,
277 stacklevel=find_stack_level(),
278 )
279
280 name = maybe_extract_name(name, data, cls)
281
282 if data is None and ordinal is None:
283 # range-based.
284 if not fields:
285 # test_pickle_compat_construction
286 cls._raise_scalar_data_error(None)
287 data = cls.from_fields(**fields, freq=freq)._data
288 copy = False
289
290 elif fields:
291 if data is not None:
292 raise ValueError("Cannot pass both data and fields")
293 raise ValueError("Cannot pass both ordinal and fields")
294
295 else:
296 freq = validate_dtype_freq(dtype, freq)
297
298 # PeriodIndex allow PeriodIndex(period_index, freq=different)
299 # Let's not encourage that kind of behavior in PeriodArray.
300
301 if freq and isinstance(data, cls) and data.freq != freq:
302 # TODO: We can do some of these with no-copy / coercion?
303 # e.g. D -> 2D seems to be OK
304 data = data.asfreq(freq)
305
306 if data is None and ordinal is not None:
307 ordinal = np.asarray(ordinal, dtype=np.int64)
308 dtype = PeriodDtype(freq)
309 data = PeriodArray(ordinal, dtype=dtype)
310 elif data is not None and ordinal is not None:
311 raise ValueError("Cannot pass both data and ordinal")
312 else:
313 # don't pass copy here, since we copy later.
314 data = period_array(data=data, freq=freq)
315
316 if copy:
317 data = data.copy()
318
319 return cls._simple_new(data, name=name, refs=refs)
320
321 @classmethod
322 def from_fields(
323 cls,
324 *,
325 year=None,
326 quarter=None,
327 month=None,
328 day=None,
329 hour=None,
330 minute=None,
331 second=None,
332 freq=None,
333 ) -> Self:
334 fields = {
335 "year": year,
336 "quarter": quarter,
337 "month": month,
338 "day": day,
339 "hour": hour,
340 "minute": minute,
341 "second": second,
342 }
343 fields = {key: value for key, value in fields.items() if value is not None}
344 arr = PeriodArray._from_fields(fields=fields, freq=freq)
345 return cls._simple_new(arr)
346
347 @classmethod
348 def from_ordinals(cls, ordinals, *, freq, name=None) -> Self:
349 ordinals = np.asarray(ordinals, dtype=np.int64)
350 dtype = PeriodDtype(freq)
351 data = PeriodArray._simple_new(ordinals, dtype=dtype)
352 return cls._simple_new(data, name=name)
353
354 # ------------------------------------------------------------------------
355 # Data
356
357 @property
358 def values(self) -> npt.NDArray[np.object_]:
359 return np.asarray(self, dtype=object)
360
361 def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:
362 """
363 Convert timedelta-like input to an integer multiple of self.freq
364
365 Parameters
366 ----------
367 other : timedelta, np.timedelta64, DateOffset, int, np.ndarray
368
369 Returns
370 -------
371 converted : int, np.ndarray[int64]
372
373 Raises
374 ------
375 IncompatibleFrequency : if the input cannot be written as a multiple
376 of self.freq. Note IncompatibleFrequency subclasses ValueError.
377 """
378 if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):
379 if isinstance(self.freq, Tick):
380 # _check_timedeltalike_freq_compat will raise if incompatible
381 delta = self._data._check_timedeltalike_freq_compat(other)
382 return delta
383 elif isinstance(other, BaseOffset):
384 if other.base == self.freq.base:
385 return other.n
386
387 raise raise_on_incompatible(self, other)
388 elif is_integer(other):
389 assert isinstance(other, int)
390 return other
391
392 # raise when input doesn't have freq
393 raise raise_on_incompatible(self, None)
394
395 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
396 """
397 Can we compare values of the given dtype to our own?
398 """
399 return self.dtype == dtype
400
401 # ------------------------------------------------------------------------
402 # Index Methods
403
404 def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray:
405 """
406 where : array of timestamps
407 mask : np.ndarray[bool]
408 Array of booleans where data is not NA.
409 """
410 if isinstance(where, DatetimeIndex):
411 where = PeriodIndex(where._values, freq=self.freq)
412 elif not isinstance(where, PeriodIndex):
413 raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")
414
415 return super().asof_locs(where, mask)
416
417 @property
418 def is_full(self) -> bool:
419 """
420 Returns True if this PeriodIndex is range-like in that all Periods
421 between start and end are present, in order.
422 """
423 if len(self) == 0:
424 return True
425 if not self.is_monotonic_increasing:
426 raise ValueError("Index is not monotonic")
427 values = self.asi8
428 return bool(((values[1:] - values[:-1]) < 2).all())
429
430 @property
431 def inferred_type(self) -> str:
432 # b/c data is represented as ints make sure we can't have ambiguous
433 # indexing
434 return "period"
435
436 # ------------------------------------------------------------------------
437 # Indexing Methods
438
439 def _convert_tolerance(self, tolerance, target):
440 # Returned tolerance must be in dtype/units so that
441 # `|self._get_engine_target() - target._engine_target()| <= tolerance`
442 # is meaningful. Since PeriodIndex returns int64 for engine_target,
443 # we may need to convert timedelta64 tolerance to int64.
444 tolerance = super()._convert_tolerance(tolerance, target)
445
446 if self.dtype == target.dtype:
447 # convert tolerance to i8
448 tolerance = self._maybe_convert_timedelta(tolerance)
449
450 return tolerance
451
452 def get_loc(self, key):
453 """
454 Get integer location for requested label.
455
456 Parameters
457 ----------
458 key : Period, NaT, str, or datetime
459 String or datetime key must be parsable as Period.
460
461 Returns
462 -------
463 loc : int or ndarray[int64]
464
465 Raises
466 ------
467 KeyError
468 Key is not present in the index.
469 TypeError
470 If key is listlike or otherwise not hashable.
471 """
472 orig_key = key
473
474 self._check_indexing_error(key)
475
476 if is_valid_na_for_dtype(key, self.dtype):
477 key = NaT
478
479 elif isinstance(key, str):
480 try:
481 parsed, reso = self._parse_with_reso(key)
482 except ValueError as err:
483 # A string with invalid format
484 raise KeyError(f"Cannot interpret '{key}' as period") from err
485
486 if self._can_partial_date_slice(reso):
487 try:
488 return self._partial_date_slice(reso, parsed)
489 except KeyError as err:
490 raise KeyError(key) from err
491
492 if reso == self._resolution_obj:
493 # the reso < self._resolution_obj case goes
494 # through _get_string_slice
495 key = self._cast_partial_indexing_scalar(parsed)
496 else:
497 raise KeyError(key)
498
499 elif isinstance(key, Period):
500 self._disallow_mismatched_indexing(key)
501
502 elif isinstance(key, datetime):
503 key = self._cast_partial_indexing_scalar(key)
504
505 else:
506 # in particular integer, which Period constructor would cast to string
507 raise KeyError(key)
508
509 try:
510 return Index.get_loc(self, key)
511 except KeyError as err:
512 raise KeyError(orig_key) from err
513
514 def _disallow_mismatched_indexing(self, key: Period) -> None:
515 if key._dtype != self.dtype:
516 raise KeyError(key)
517
518 def _cast_partial_indexing_scalar(self, label: datetime) -> Period:
519 try:
520 period = Period(label, freq=self.freq)
521 except ValueError as err:
522 # we cannot construct the Period
523 raise KeyError(label) from err
524 return period
525
526 @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)
527 def _maybe_cast_slice_bound(self, label, side: str):
528 if isinstance(label, datetime):
529 label = self._cast_partial_indexing_scalar(label)
530
531 return super()._maybe_cast_slice_bound(label, side)
532
533 def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
534 freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev)
535 iv = Period(parsed, freq=freq)
536 return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))
537
538 @doc(DatetimeIndexOpsMixin.shift)
539 def shift(self, periods: int = 1, freq=None) -> Self:
540 if freq is not None:
541 raise TypeError(
542 f"`freq` argument is not supported for {type(self).__name__}.shift"
543 )
544 return self + periods
545
546
547def period_range(
548 start=None,
549 end=None,
550 periods: int | None = None,
551 freq=None,
552 name: Hashable | None = None,
553) -> PeriodIndex:
554 """
555 Return a fixed frequency PeriodIndex.
556
557 The day (calendar) is the default frequency.
558
559 Parameters
560 ----------
561 start : str, datetime, date, pandas.Timestamp, or period-like, default None
562 Left bound for generating periods.
563 end : str, datetime, date, pandas.Timestamp, or period-like, default None
564 Right bound for generating periods.
565 periods : int, default None
566 Number of periods to generate.
567 freq : str or DateOffset, optional
568 Frequency alias. By default the freq is taken from `start` or `end`
569 if those are Period objects. Otherwise, the default is ``"D"`` for
570 daily frequency.
571 name : str, default None
572 Name of the resulting PeriodIndex.
573
574 Returns
575 -------
576 PeriodIndex
577
578 Notes
579 -----
580 Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
581 must be specified.
582
583 To learn more about the frequency strings, please see `this link
584 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
585
586 Examples
587 --------
588 >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
589 PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
590 '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
591 '2018-01'],
592 dtype='period[M]')
593
594 If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
595 endpoints for a ``PeriodIndex`` with frequency matching that of the
596 ``period_range`` constructor.
597
598 >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
599 ... end=pd.Period('2017Q2', freq='Q'), freq='M')
600 PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
601 dtype='period[M]')
602 """
603 if com.count_not_none(start, end, periods) != 2:
604 raise ValueError(
605 "Of the three parameters: start, end, and periods, "
606 "exactly two must be specified"
607 )
608 if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):
609 freq = "D"
610
611 data, freq = PeriodArray._generate_range(start, end, periods, freq)
612 dtype = PeriodDtype(freq)
613 data = PeriodArray(data, dtype=dtype)
614 return PeriodIndex(data, name=name)