1from __future__ import annotations
2
3from datetime import (
4 datetime,
5 timedelta,
6)
7from typing import Hashable
8
9import numpy as np
10
11from pandas._libs import index as libindex
12from pandas._libs.tslibs import (
13 BaseOffset,
14 NaT,
15 Period,
16 Resolution,
17 Tick,
18)
19from pandas._typing import (
20 Dtype,
21 DtypeObj,
22 npt,
23)
24from pandas.util._decorators import (
25 cache_readonly,
26 doc,
27)
28
29from pandas.core.dtypes.common import is_integer
30from pandas.core.dtypes.dtypes import PeriodDtype
31from pandas.core.dtypes.generic import ABCSeries
32from pandas.core.dtypes.missing import is_valid_na_for_dtype
33
34from pandas.core.arrays.period import (
35 PeriodArray,
36 period_array,
37 raise_on_incompatible,
38 validate_dtype_freq,
39)
40import pandas.core.common as com
41import pandas.core.indexes.base as ibase
42from pandas.core.indexes.base import maybe_extract_name
43from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
44from pandas.core.indexes.datetimes import (
45 DatetimeIndex,
46 Index,
47)
48from pandas.core.indexes.extension import inherit_names
49
50_index_doc_kwargs = dict(ibase._index_doc_kwargs)
51_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
52_shared_doc_kwargs = {
53 "klass": "PeriodArray",
54}
55
56# --- Period index sketch
57
58
59def _new_PeriodIndex(cls, **d):
60 # GH13277 for unpickling
61 values = d.pop("data")
62 if values.dtype == "int64":
63 freq = d.pop("freq", None)
64 values = PeriodArray(values, freq=freq)
65 return cls._simple_new(values, **d)
66 else:
67 return cls(values, **d)
68
69
70@inherit_names(
71 ["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
72 PeriodArray,
73 wrap=True,
74)
75@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)
76class PeriodIndex(DatetimeIndexOpsMixin):
77 """
78 Immutable ndarray holding ordinal values indicating regular periods in time.
79
80 Index keys are boxed to Period objects which carries the metadata (eg,
81 frequency information).
82
83 Parameters
84 ----------
85 data : array-like (1d int np.ndarray or PeriodArray), optional
86 Optional period-like data to construct index with.
87 copy : bool
88 Make a copy of input ndarray.
89 freq : str or period object, optional
90 One of pandas period strings or corresponding objects.
91 year : int, array, or Series, default None
92 month : int, array, or Series, default None
93 quarter : int, array, or Series, default None
94 day : int, array, or Series, default None
95 hour : int, array, or Series, default None
96 minute : int, array, or Series, default None
97 second : int, array, or Series, default None
98 dtype : str or PeriodDtype, default None
99
100 Attributes
101 ----------
102 day
103 dayofweek
104 day_of_week
105 dayofyear
106 day_of_year
107 days_in_month
108 daysinmonth
109 end_time
110 freq
111 freqstr
112 hour
113 is_leap_year
114 minute
115 month
116 quarter
117 qyear
118 second
119 start_time
120 week
121 weekday
122 weekofyear
123 year
124
125 Methods
126 -------
127 asfreq
128 strftime
129 to_timestamp
130
131 See Also
132 --------
133 Index : The base pandas Index type.
134 Period : Represents a period of time.
135 DatetimeIndex : Index with datetime64 data.
136 TimedeltaIndex : Index of timedelta64 data.
137 period_range : Create a fixed-frequency PeriodIndex.
138
139 Examples
140 --------
141 >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3])
142 >>> idx
143 PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')
144 """
145
146 _typ = "periodindex"
147
148 _data: PeriodArray
149 freq: BaseOffset
150 dtype: PeriodDtype
151
152 _data_cls = PeriodArray
153 _supports_partial_string_indexing = True
154
155 @property
156 def _engine_type(self) -> type[libindex.PeriodEngine]:
157 return libindex.PeriodEngine
158
159 @cache_readonly
160 def _resolution_obj(self) -> Resolution:
161 # for compat with DatetimeIndex
162 return self.dtype._resolution_obj
163
164 # --------------------------------------------------------------------
165 # methods that dispatch to array and wrap result in Index
166 # These are defined here instead of via inherit_names for mypy
167
168 @doc(
169 PeriodArray.asfreq,
170 other="pandas.arrays.PeriodArray",
171 other_name="PeriodArray",
172 **_shared_doc_kwargs,
173 )
174 def asfreq(self, freq=None, how: str = "E") -> PeriodIndex:
175 arr = self._data.asfreq(freq, how)
176 return type(self)._simple_new(arr, name=self.name)
177
178 @doc(PeriodArray.to_timestamp)
179 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex:
180 arr = self._data.to_timestamp(freq, how)
181 return DatetimeIndex._simple_new(arr, name=self.name)
182
183 @property
184 @doc(PeriodArray.hour.fget)
185 def hour(self) -> Index:
186 return Index(self._data.hour, name=self.name)
187
188 @property
189 @doc(PeriodArray.minute.fget)
190 def minute(self) -> Index:
191 return Index(self._data.minute, name=self.name)
192
193 @property
194 @doc(PeriodArray.second.fget)
195 def second(self) -> Index:
196 return Index(self._data.second, name=self.name)
197
198 # ------------------------------------------------------------------------
199 # Index Constructors
200
201 def __new__(
202 cls,
203 data=None,
204 ordinal=None,
205 freq=None,
206 dtype: Dtype | None = None,
207 copy: bool = False,
208 name: Hashable = None,
209 **fields,
210 ) -> PeriodIndex:
211 valid_field_set = {
212 "year",
213 "month",
214 "day",
215 "quarter",
216 "hour",
217 "minute",
218 "second",
219 }
220
221 refs = None
222 if not copy and isinstance(data, (Index, ABCSeries)):
223 refs = data._references
224
225 if not set(fields).issubset(valid_field_set):
226 argument = list(set(fields) - valid_field_set)[0]
227 raise TypeError(f"__new__() got an unexpected keyword argument {argument}")
228
229 name = maybe_extract_name(name, data, cls)
230
231 if data is None and ordinal is None:
232 # range-based.
233 if not fields:
234 # test_pickle_compat_construction
235 cls._raise_scalar_data_error(None)
236
237 data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields)
238 # PeriodArray._generate range does validation that fields is
239 # empty when really using the range-based constructor.
240 freq = freq2
241
242 data = PeriodArray(data, freq=freq)
243 else:
244 freq = validate_dtype_freq(dtype, freq)
245
246 # PeriodIndex allow PeriodIndex(period_index, freq=different)
247 # Let's not encourage that kind of behavior in PeriodArray.
248
249 if freq and isinstance(data, cls) and data.freq != freq:
250 # TODO: We can do some of these with no-copy / coercion?
251 # e.g. D -> 2D seems to be OK
252 data = data.asfreq(freq)
253
254 if data is None and ordinal is not None:
255 # we strangely ignore `ordinal` if data is passed.
256 ordinal = np.asarray(ordinal, dtype=np.int64)
257 data = PeriodArray(ordinal, freq=freq)
258 else:
259 # don't pass copy here, since we copy later.
260 data = period_array(data=data, freq=freq)
261
262 if copy:
263 data = data.copy()
264
265 return cls._simple_new(data, name=name, refs=refs)
266
267 # ------------------------------------------------------------------------
268 # Data
269
270 @property
271 def values(self) -> np.ndarray:
272 return np.asarray(self, dtype=object)
273
274 def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:
275 """
276 Convert timedelta-like input to an integer multiple of self.freq
277
278 Parameters
279 ----------
280 other : timedelta, np.timedelta64, DateOffset, int, np.ndarray
281
282 Returns
283 -------
284 converted : int, np.ndarray[int64]
285
286 Raises
287 ------
288 IncompatibleFrequency : if the input cannot be written as a multiple
289 of self.freq. Note IncompatibleFrequency subclasses ValueError.
290 """
291 if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):
292 if isinstance(self.freq, Tick):
293 # _check_timedeltalike_freq_compat will raise if incompatible
294 delta = self._data._check_timedeltalike_freq_compat(other)
295 return delta
296 elif isinstance(other, BaseOffset):
297 if other.base == self.freq.base:
298 return other.n
299
300 raise raise_on_incompatible(self, other)
301 elif is_integer(other):
302 # integer is passed to .shift via
303 # _add_datetimelike_methods basically
304 # but ufunc may pass integer to _add_delta
305 return other
306
307 # raise when input doesn't have freq
308 raise raise_on_incompatible(self, None)
309
310 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
311 """
312 Can we compare values of the given dtype to our own?
313 """
314 if not isinstance(dtype, PeriodDtype):
315 return False
316 # For the subset of DateOffsets that can be a dtype.freq, it
317 # suffices (and is much faster) to compare the dtype_code rather than
318 # the freq itself.
319 # See also: PeriodDtype.__eq__
320 freq = dtype.freq
321 own_freq = self.freq
322 return (
323 freq._period_dtype_code
324 # error: "BaseOffset" has no attribute "_period_dtype_code"
325 == own_freq._period_dtype_code # type: ignore[attr-defined]
326 and freq.n == own_freq.n
327 )
328
329 # ------------------------------------------------------------------------
330 # Index Methods
331
332 def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray:
333 """
334 where : array of timestamps
335 mask : np.ndarray[bool]
336 Array of booleans where data is not NA.
337 """
338 if isinstance(where, DatetimeIndex):
339 where = PeriodIndex(where._values, freq=self.freq)
340 elif not isinstance(where, PeriodIndex):
341 raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")
342
343 return super().asof_locs(where, mask)
344
345 @property
346 def is_full(self) -> bool:
347 """
348 Returns True if this PeriodIndex is range-like in that all Periods
349 between start and end are present, in order.
350 """
351 if len(self) == 0:
352 return True
353 if not self.is_monotonic_increasing:
354 raise ValueError("Index is not monotonic")
355 values = self.asi8
356 return bool(((values[1:] - values[:-1]) < 2).all())
357
358 @property
359 def inferred_type(self) -> str:
360 # b/c data is represented as ints make sure we can't have ambiguous
361 # indexing
362 return "period"
363
364 # ------------------------------------------------------------------------
365 # Indexing Methods
366
367 def _convert_tolerance(self, tolerance, target):
368 # Returned tolerance must be in dtype/units so that
369 # `|self._get_engine_target() - target._engine_target()| <= tolerance`
370 # is meaningful. Since PeriodIndex returns int64 for engine_target,
371 # we may need to convert timedelta64 tolerance to int64.
372 tolerance = super()._convert_tolerance(tolerance, target)
373
374 if self.dtype == target.dtype:
375 # convert tolerance to i8
376 tolerance = self._maybe_convert_timedelta(tolerance)
377
378 return tolerance
379
380 def get_loc(self, key):
381 """
382 Get integer location for requested label.
383
384 Parameters
385 ----------
386 key : Period, NaT, str, or datetime
387 String or datetime key must be parsable as Period.
388
389 Returns
390 -------
391 loc : int or ndarray[int64]
392
393 Raises
394 ------
395 KeyError
396 Key is not present in the index.
397 TypeError
398 If key is listlike or otherwise not hashable.
399 """
400 orig_key = key
401
402 self._check_indexing_error(key)
403
404 if is_valid_na_for_dtype(key, self.dtype):
405 key = NaT
406
407 elif isinstance(key, str):
408 try:
409 parsed, reso = self._parse_with_reso(key)
410 except ValueError as err:
411 # A string with invalid format
412 raise KeyError(f"Cannot interpret '{key}' as period") from err
413
414 if self._can_partial_date_slice(reso):
415 try:
416 return self._partial_date_slice(reso, parsed)
417 except KeyError as err:
418 raise KeyError(key) from err
419
420 if reso == self._resolution_obj:
421 # the reso < self._resolution_obj case goes
422 # through _get_string_slice
423 key = self._cast_partial_indexing_scalar(parsed)
424 else:
425 raise KeyError(key)
426
427 elif isinstance(key, Period):
428 self._disallow_mismatched_indexing(key)
429
430 elif isinstance(key, datetime):
431 key = self._cast_partial_indexing_scalar(key)
432
433 else:
434 # in particular integer, which Period constructor would cast to string
435 raise KeyError(key)
436
437 try:
438 return Index.get_loc(self, key)
439 except KeyError as err:
440 raise KeyError(orig_key) from err
441
442 def _disallow_mismatched_indexing(self, key: Period) -> None:
443 sfreq = self.freq
444 kfreq = key.freq
445 if not (
446 sfreq.n == kfreq.n
447 # error: "BaseOffset" has no attribute "_period_dtype_code"
448 and sfreq._period_dtype_code # type: ignore[attr-defined]
449 # error: "BaseOffset" has no attribute "_period_dtype_code"
450 == kfreq._period_dtype_code # type: ignore[attr-defined]
451 ):
452 # GH#42247 For the subset of DateOffsets that can be Period freqs,
453 # checking these two attributes is sufficient to check equality,
454 # and much more performant than `self.freq == key.freq`
455 raise KeyError(key)
456
457 def _cast_partial_indexing_scalar(self, label: datetime) -> Period:
458 try:
459 period = Period(label, freq=self.freq)
460 except ValueError as err:
461 # we cannot construct the Period
462 raise KeyError(label) from err
463 return period
464
465 @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)
466 def _maybe_cast_slice_bound(self, label, side: str):
467 if isinstance(label, datetime):
468 label = self._cast_partial_indexing_scalar(label)
469
470 return super()._maybe_cast_slice_bound(label, side)
471
472 def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
473 iv = Period(parsed, freq=reso.attr_abbrev)
474 return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))
475
476 @doc(DatetimeIndexOpsMixin.shift)
477 def shift(self, periods: int = 1, freq=None):
478 if freq is not None:
479 raise TypeError(
480 f"`freq` argument is not supported for {type(self).__name__}.shift"
481 )
482 return self + periods
483
484
485def period_range(
486 start=None, end=None, periods: int | None = None, freq=None, name=None
487) -> PeriodIndex:
488 """
489 Return a fixed frequency PeriodIndex.
490
491 The day (calendar) is the default frequency.
492
493 Parameters
494 ----------
495 start : str or period-like, default None
496 Left bound for generating periods.
497 end : str or period-like, default None
498 Right bound for generating periods.
499 periods : int, default None
500 Number of periods to generate.
501 freq : str or DateOffset, optional
502 Frequency alias. By default the freq is taken from `start` or `end`
503 if those are Period objects. Otherwise, the default is ``"D"`` for
504 daily frequency.
505 name : str, default None
506 Name of the resulting PeriodIndex.
507
508 Returns
509 -------
510 PeriodIndex
511
512 Notes
513 -----
514 Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
515 must be specified.
516
517 To learn more about the frequency strings, please see `this link
518 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
519
520 Examples
521 --------
522 >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
523 PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
524 '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
525 '2018-01'],
526 dtype='period[M]')
527
528 If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
529 endpoints for a ``PeriodIndex`` with frequency matching that of the
530 ``period_range`` constructor.
531
532 >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
533 ... end=pd.Period('2017Q2', freq='Q'), freq='M')
534 PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
535 dtype='period[M]')
536 """
537 if com.count_not_none(start, end, periods) != 2:
538 raise ValueError(
539 "Of the three parameters: start, end, and periods, "
540 "exactly two must be specified"
541 )
542 if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):
543 freq = "D"
544
545 data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={})
546 data = PeriodArray(data, freq=freq)
547 return PeriodIndex(data, name=name)