1from __future__ import annotations
2
3from datetime import timedelta
4import operator
5from typing import (
6 TYPE_CHECKING,
7 Any,
8 Callable,
9 Literal,
10 TypeVar,
11 cast,
12 overload,
13)
14import warnings
15
16import numpy as np
17
18from pandas._libs import (
19 algos as libalgos,
20 lib,
21)
22from pandas._libs.arrays import NDArrayBacked
23from pandas._libs.tslibs import (
24 BaseOffset,
25 NaT,
26 NaTType,
27 Timedelta,
28 add_overflowsafe,
29 astype_overflowsafe,
30 dt64arr_to_periodarr as c_dt64arr_to_periodarr,
31 get_unit_from_dtype,
32 iNaT,
33 parsing,
34 period as libperiod,
35 to_offset,
36)
37from pandas._libs.tslibs.dtypes import (
38 FreqGroup,
39 PeriodDtypeBase,
40 freq_to_period_freqstr,
41)
42from pandas._libs.tslibs.fields import isleapyear_arr
43from pandas._libs.tslibs.offsets import (
44 Tick,
45 delta_to_tick,
46)
47from pandas._libs.tslibs.period import (
48 DIFFERENT_FREQ,
49 IncompatibleFrequency,
50 Period,
51 get_period_field_arr,
52 period_asfreq_arr,
53)
54from pandas.util._decorators import (
55 cache_readonly,
56 doc,
57)
58from pandas.util._exceptions import find_stack_level
59
60from pandas.core.dtypes.common import (
61 ensure_object,
62 pandas_dtype,
63)
64from pandas.core.dtypes.dtypes import (
65 DatetimeTZDtype,
66 PeriodDtype,
67)
68from pandas.core.dtypes.generic import (
69 ABCIndex,
70 ABCPeriodIndex,
71 ABCSeries,
72 ABCTimedeltaArray,
73)
74from pandas.core.dtypes.missing import isna
75
76from pandas.core.arrays import datetimelike as dtl
77import pandas.core.common as com
78
79if TYPE_CHECKING:
80 from collections.abc import Sequence
81
82 from pandas._typing import (
83 AnyArrayLike,
84 Dtype,
85 FillnaOptions,
86 NpDtype,
87 NumpySorter,
88 NumpyValueArrayLike,
89 Self,
90 npt,
91 )
92
93 from pandas.core.arrays import (
94 DatetimeArray,
95 TimedeltaArray,
96 )
97 from pandas.core.arrays.base import ExtensionArray
98
99
100BaseOffsetT = TypeVar("BaseOffsetT", bound=BaseOffset)
101
102
103_shared_doc_kwargs = {
104 "klass": "PeriodArray",
105}
106
107
108def _field_accessor(name: str, docstring: str | None = None):
109 def f(self):
110 base = self.dtype._dtype_code
111 result = get_period_field_arr(name, self.asi8, base)
112 return result
113
114 f.__name__ = name
115 f.__doc__ = docstring
116 return property(f)
117
118
119# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
120# incompatible with definition in base class "ExtensionArray"
121class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc]
122 """
123 Pandas ExtensionArray for storing Period data.
124
125 Users should use :func:`~pandas.array` to create new instances.
126
127 Parameters
128 ----------
129 values : Union[PeriodArray, Series[period], ndarray[int], PeriodIndex]
130 The data to store. These should be arrays that can be directly
131 converted to ordinals without inference or copy (PeriodArray,
132 ndarray[int64]), or a box around such an array (Series[period],
133 PeriodIndex).
134 dtype : PeriodDtype, optional
135 A PeriodDtype instance from which to extract a `freq`. If both
136 `freq` and `dtype` are specified, then the frequencies must match.
137 freq : str or DateOffset
138 The `freq` to use for the array. Mostly applicable when `values`
139 is an ndarray of integers, when `freq` is required. When `values`
140 is a PeriodArray (or box around), it's checked that ``values.freq``
141 matches `freq`.
142 copy : bool, default False
143 Whether to copy the ordinals before storing.
144
145 Attributes
146 ----------
147 None
148
149 Methods
150 -------
151 None
152
153 See Also
154 --------
155 Period: Represents a period of time.
156 PeriodIndex : Immutable Index for period data.
157 period_range: Create a fixed-frequency PeriodArray.
158 array: Construct a pandas array.
159
160 Notes
161 -----
162 There are two components to a PeriodArray
163
164 - ordinals : integer ndarray
165 - freq : pd.tseries.offsets.Offset
166
167 The values are physically stored as a 1-D ndarray of integers. These are
168 called "ordinals" and represent some kind of offset from a base.
169
170 The `freq` indicates the span covered by each element of the array.
171 All elements in the PeriodArray have the same `freq`.
172
173 Examples
174 --------
175 >>> pd.arrays.PeriodArray(pd.PeriodIndex(['2023-01-01',
176 ... '2023-01-02'], freq='D'))
177 <PeriodArray>
178 ['2023-01-01', '2023-01-02']
179 Length: 2, dtype: period[D]
180 """
181
182 # array priority higher than numpy scalars
183 __array_priority__ = 1000
184 _typ = "periodarray" # ABCPeriodArray
185 _internal_fill_value = np.int64(iNaT)
186 _recognized_scalars = (Period,)
187 _is_recognized_dtype = lambda x: isinstance(
188 x, PeriodDtype
189 ) # check_compatible_with checks freq match
190 _infer_matches = ("period",)
191
192 @property
193 def _scalar_type(self) -> type[Period]:
194 return Period
195
196 # Names others delegate to us
197 _other_ops: list[str] = []
198 _bool_ops: list[str] = ["is_leap_year"]
199 _object_ops: list[str] = ["start_time", "end_time", "freq"]
200 _field_ops: list[str] = [
201 "year",
202 "month",
203 "day",
204 "hour",
205 "minute",
206 "second",
207 "weekofyear",
208 "weekday",
209 "week",
210 "dayofweek",
211 "day_of_week",
212 "dayofyear",
213 "day_of_year",
214 "quarter",
215 "qyear",
216 "days_in_month",
217 "daysinmonth",
218 ]
219 _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops
220 _datetimelike_methods: list[str] = ["strftime", "to_timestamp", "asfreq"]
221
222 _dtype: PeriodDtype
223
224 # --------------------------------------------------------------------
225 # Constructors
226
227 def __init__(
228 self, values, dtype: Dtype | None = None, freq=None, copy: bool = False
229 ) -> None:
230 if freq is not None:
231 # GH#52462
232 warnings.warn(
233 "The 'freq' keyword in the PeriodArray constructor is deprecated "
234 "and will be removed in a future version. Pass 'dtype' instead",
235 FutureWarning,
236 stacklevel=find_stack_level(),
237 )
238 freq = validate_dtype_freq(dtype, freq)
239 dtype = PeriodDtype(freq)
240
241 if dtype is not None:
242 dtype = pandas_dtype(dtype)
243 if not isinstance(dtype, PeriodDtype):
244 raise ValueError(f"Invalid dtype {dtype} for PeriodArray")
245
246 if isinstance(values, ABCSeries):
247 values = values._values
248 if not isinstance(values, type(self)):
249 raise TypeError("Incorrect dtype")
250
251 elif isinstance(values, ABCPeriodIndex):
252 values = values._values
253
254 if isinstance(values, type(self)):
255 if dtype is not None and dtype != values.dtype:
256 raise raise_on_incompatible(values, dtype.freq)
257 values, dtype = values._ndarray, values.dtype
258
259 if not copy:
260 values = np.asarray(values, dtype="int64")
261 else:
262 values = np.array(values, dtype="int64", copy=copy)
263 if dtype is None:
264 raise ValueError("dtype is not specified and cannot be inferred")
265 dtype = cast(PeriodDtype, dtype)
266 NDArrayBacked.__init__(self, values, dtype)
267
268 # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
269 @classmethod
270 def _simple_new( # type: ignore[override]
271 cls,
272 values: npt.NDArray[np.int64],
273 dtype: PeriodDtype,
274 ) -> Self:
275 # alias for PeriodArray.__init__
276 assertion_msg = "Should be numpy array of type i8"
277 assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg
278 return cls(values, dtype=dtype)
279
280 @classmethod
281 def _from_sequence(
282 cls,
283 scalars,
284 *,
285 dtype: Dtype | None = None,
286 copy: bool = False,
287 ) -> Self:
288 if dtype is not None:
289 dtype = pandas_dtype(dtype)
290 if dtype and isinstance(dtype, PeriodDtype):
291 freq = dtype.freq
292 else:
293 freq = None
294
295 if isinstance(scalars, cls):
296 validate_dtype_freq(scalars.dtype, freq)
297 if copy:
298 scalars = scalars.copy()
299 return scalars
300
301 periods = np.asarray(scalars, dtype=object)
302
303 freq = freq or libperiod.extract_freq(periods)
304 ordinals = libperiod.extract_ordinals(periods, freq)
305 dtype = PeriodDtype(freq)
306 return cls(ordinals, dtype=dtype)
307
308 @classmethod
309 def _from_sequence_of_strings(
310 cls, strings, *, dtype: Dtype | None = None, copy: bool = False
311 ) -> Self:
312 return cls._from_sequence(strings, dtype=dtype, copy=copy)
313
314 @classmethod
315 def _from_datetime64(cls, data, freq, tz=None) -> Self:
316 """
317 Construct a PeriodArray from a datetime64 array
318
319 Parameters
320 ----------
321 data : ndarray[datetime64[ns], datetime64[ns, tz]]
322 freq : str or Tick
323 tz : tzinfo, optional
324
325 Returns
326 -------
327 PeriodArray[freq]
328 """
329 if isinstance(freq, BaseOffset):
330 freq = freq_to_period_freqstr(freq.n, freq.name)
331 data, freq = dt64arr_to_periodarr(data, freq, tz)
332 dtype = PeriodDtype(freq)
333 return cls(data, dtype=dtype)
334
335 @classmethod
336 def _generate_range(cls, start, end, periods, freq):
337 periods = dtl.validate_periods(periods)
338
339 if freq is not None:
340 freq = Period._maybe_convert_freq(freq)
341
342 if start is not None or end is not None:
343 subarr, freq = _get_ordinal_range(start, end, periods, freq)
344 else:
345 raise ValueError("Not enough parameters to construct Period range")
346
347 return subarr, freq
348
349 @classmethod
350 def _from_fields(cls, *, fields: dict, freq) -> Self:
351 subarr, freq = _range_from_fields(freq=freq, **fields)
352 dtype = PeriodDtype(freq)
353 return cls._simple_new(subarr, dtype=dtype)
354
355 # -----------------------------------------------------------------
356 # DatetimeLike Interface
357
358 # error: Argument 1 of "_unbox_scalar" is incompatible with supertype
359 # "DatetimeLikeArrayMixin"; supertype defines the argument type as
360 # "Union[Union[Period, Any, Timedelta], NaTType]"
361 def _unbox_scalar( # type: ignore[override]
362 self,
363 value: Period | NaTType,
364 ) -> np.int64:
365 if value is NaT:
366 # error: Item "Period" of "Union[Period, NaTType]" has no attribute "value"
367 return np.int64(value._value) # type: ignore[union-attr]
368 elif isinstance(value, self._scalar_type):
369 self._check_compatible_with(value)
370 return np.int64(value.ordinal)
371 else:
372 raise ValueError(f"'value' should be a Period. Got '{value}' instead.")
373
374 def _scalar_from_string(self, value: str) -> Period:
375 return Period(value, freq=self.freq)
376
377 # error: Argument 1 of "_check_compatible_with" is incompatible with
378 # supertype "DatetimeLikeArrayMixin"; supertype defines the argument type
379 # as "Period | Timestamp | Timedelta | NaTType"
380 def _check_compatible_with(self, other: Period | NaTType | PeriodArray) -> None: # type: ignore[override]
381 if other is NaT:
382 return
383 # error: Item "NaTType" of "Period | NaTType | PeriodArray" has no
384 # attribute "freq"
385 self._require_matching_freq(other.freq) # type: ignore[union-attr]
386
387 # --------------------------------------------------------------------
388 # Data / Attributes
389
390 @cache_readonly
391 def dtype(self) -> PeriodDtype:
392 return self._dtype
393
394 # error: Cannot override writeable attribute with read-only property
395 @property # type: ignore[override]
396 def freq(self) -> BaseOffset:
397 """
398 Return the frequency object for this PeriodArray.
399 """
400 return self.dtype.freq
401
402 @property
403 def freqstr(self) -> str:
404 return freq_to_period_freqstr(self.freq.n, self.freq.name)
405
406 def __array__(
407 self, dtype: NpDtype | None = None, copy: bool | None = None
408 ) -> np.ndarray:
409 if dtype == "i8":
410 return self.asi8
411 elif dtype == bool:
412 return ~self._isnan
413
414 # This will raise TypeError for non-object dtypes
415 return np.array(list(self), dtype=object)
416
417 def __arrow_array__(self, type=None):
418 """
419 Convert myself into a pyarrow Array.
420 """
421 import pyarrow
422
423 from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
424
425 if type is not None:
426 if pyarrow.types.is_integer(type):
427 return pyarrow.array(self._ndarray, mask=self.isna(), type=type)
428 elif isinstance(type, ArrowPeriodType):
429 # ensure we have the same freq
430 if self.freqstr != type.freq:
431 raise TypeError(
432 "Not supported to convert PeriodArray to array with different "
433 f"'freq' ({self.freqstr} vs {type.freq})"
434 )
435 else:
436 raise TypeError(
437 f"Not supported to convert PeriodArray to '{type}' type"
438 )
439
440 period_type = ArrowPeriodType(self.freqstr)
441 storage_array = pyarrow.array(self._ndarray, mask=self.isna(), type="int64")
442 return pyarrow.ExtensionArray.from_storage(period_type, storage_array)
443
444 # --------------------------------------------------------------------
445 # Vectorized analogues of Period properties
446
447 year = _field_accessor(
448 "year",
449 """
450 The year of the period.
451
452 Examples
453 --------
454 >>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y")
455 >>> idx.year
456 Index([2023, 2024, 2025], dtype='int64')
457 """,
458 )
459 month = _field_accessor(
460 "month",
461 """
462 The month as January=1, December=12.
463
464 Examples
465 --------
466 >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
467 >>> idx.month
468 Index([1, 2, 3], dtype='int64')
469 """,
470 )
471 day = _field_accessor(
472 "day",
473 """
474 The days of the period.
475
476 Examples
477 --------
478 >>> idx = pd.PeriodIndex(['2020-01-31', '2020-02-28'], freq='D')
479 >>> idx.day
480 Index([31, 28], dtype='int64')
481 """,
482 )
483 hour = _field_accessor(
484 "hour",
485 """
486 The hour of the period.
487
488 Examples
489 --------
490 >>> idx = pd.PeriodIndex(["2023-01-01 10:00", "2023-01-01 11:00"], freq='h')
491 >>> idx.hour
492 Index([10, 11], dtype='int64')
493 """,
494 )
495 minute = _field_accessor(
496 "minute",
497 """
498 The minute of the period.
499
500 Examples
501 --------
502 >>> idx = pd.PeriodIndex(["2023-01-01 10:30:00",
503 ... "2023-01-01 11:50:00"], freq='min')
504 >>> idx.minute
505 Index([30, 50], dtype='int64')
506 """,
507 )
508 second = _field_accessor(
509 "second",
510 """
511 The second of the period.
512
513 Examples
514 --------
515 >>> idx = pd.PeriodIndex(["2023-01-01 10:00:30",
516 ... "2023-01-01 10:00:31"], freq='s')
517 >>> idx.second
518 Index([30, 31], dtype='int64')
519 """,
520 )
521 weekofyear = _field_accessor(
522 "week",
523 """
524 The week ordinal of the year.
525
526 Examples
527 --------
528 >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
529 >>> idx.week # It can be written `weekofyear`
530 Index([5, 9, 13], dtype='int64')
531 """,
532 )
533 week = weekofyear
534 day_of_week = _field_accessor(
535 "day_of_week",
536 """
537 The day of the week with Monday=0, Sunday=6.
538
539 Examples
540 --------
541 >>> idx = pd.PeriodIndex(["2023-01-01", "2023-01-02", "2023-01-03"], freq="D")
542 >>> idx.weekday
543 Index([6, 0, 1], dtype='int64')
544 """,
545 )
546 dayofweek = day_of_week
547 weekday = dayofweek
548 dayofyear = day_of_year = _field_accessor(
549 "day_of_year",
550 """
551 The ordinal day of the year.
552
553 Examples
554 --------
555 >>> idx = pd.PeriodIndex(["2023-01-10", "2023-02-01", "2023-03-01"], freq="D")
556 >>> idx.dayofyear
557 Index([10, 32, 60], dtype='int64')
558
559 >>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y")
560 >>> idx
561 PeriodIndex(['2023', '2024', '2025'], dtype='period[Y-DEC]')
562 >>> idx.dayofyear
563 Index([365, 366, 365], dtype='int64')
564 """,
565 )
566 quarter = _field_accessor(
567 "quarter",
568 """
569 The quarter of the date.
570
571 Examples
572 --------
573 >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
574 >>> idx.quarter
575 Index([1, 1, 1], dtype='int64')
576 """,
577 )
578 qyear = _field_accessor("qyear")
579 days_in_month = _field_accessor(
580 "days_in_month",
581 """
582 The number of days in the month.
583
584 Examples
585 --------
586 For Series:
587
588 >>> period = pd.period_range('2020-1-1 00:00', '2020-3-1 00:00', freq='M')
589 >>> s = pd.Series(period)
590 >>> s
591 0 2020-01
592 1 2020-02
593 2 2020-03
594 dtype: period[M]
595 >>> s.dt.days_in_month
596 0 31
597 1 29
598 2 31
599 dtype: int64
600
601 For PeriodIndex:
602
603 >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
604 >>> idx.days_in_month # It can be also entered as `daysinmonth`
605 Index([31, 28, 31], dtype='int64')
606 """,
607 )
608 daysinmonth = days_in_month
609
610 @property
611 def is_leap_year(self) -> npt.NDArray[np.bool_]:
612 """
613 Logical indicating if the date belongs to a leap year.
614
615 Examples
616 --------
617 >>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y")
618 >>> idx.is_leap_year
619 array([False, True, False])
620 """
621 return isleapyear_arr(np.asarray(self.year))
622
623 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
624 """
625 Cast to DatetimeArray/Index.
626
627 Parameters
628 ----------
629 freq : str or DateOffset, optional
630 Target frequency. The default is 'D' for week or longer,
631 's' otherwise.
632 how : {'s', 'e', 'start', 'end'}
633 Whether to use the start or end of the time period being converted.
634
635 Returns
636 -------
637 DatetimeArray/Index
638
639 Examples
640 --------
641 >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
642 >>> idx.to_timestamp()
643 DatetimeIndex(['2023-01-01', '2023-02-01', '2023-03-01'],
644 dtype='datetime64[ns]', freq='MS')
645 """
646 from pandas.core.arrays import DatetimeArray
647
648 how = libperiod.validate_end_alias(how)
649
650 end = how == "E"
651 if end:
652 if freq == "B" or self.freq == "B":
653 # roll forward to ensure we land on B date
654 adjust = Timedelta(1, "D") - Timedelta(1, "ns")
655 return self.to_timestamp(how="start") + adjust
656 else:
657 adjust = Timedelta(1, "ns")
658 return (self + self.freq).to_timestamp(how="start") - adjust
659
660 if freq is None:
661 freq_code = self._dtype._get_to_timestamp_base()
662 dtype = PeriodDtypeBase(freq_code, 1)
663 freq = dtype._freqstr
664 base = freq_code
665 else:
666 freq = Period._maybe_convert_freq(freq)
667 base = freq._period_dtype_code
668
669 new_parr = self.asfreq(freq, how=how)
670
671 new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
672 dta = DatetimeArray._from_sequence(new_data)
673
674 if self.freq.name == "B":
675 # See if we can retain BDay instead of Day in cases where
676 # len(self) is too small for infer_freq to distinguish between them
677 diffs = libalgos.unique_deltas(self.asi8)
678 if len(diffs) == 1:
679 diff = diffs[0]
680 if diff == self.dtype._n:
681 dta._freq = self.freq
682 elif diff == 1:
683 dta._freq = self.freq.base
684 # TODO: other cases?
685 return dta
686 else:
687 return dta._with_freq("infer")
688
689 # --------------------------------------------------------------------
690
691 def _box_func(self, x) -> Period | NaTType:
692 return Period._from_ordinal(ordinal=x, freq=self.freq)
693
694 @doc(**_shared_doc_kwargs, other="PeriodIndex", other_name="PeriodIndex")
695 def asfreq(self, freq=None, how: str = "E") -> Self:
696 """
697 Convert the {klass} to the specified frequency `freq`.
698
699 Equivalent to applying :meth:`pandas.Period.asfreq` with the given arguments
700 to each :class:`~pandas.Period` in this {klass}.
701
702 Parameters
703 ----------
704 freq : str
705 A frequency.
706 how : str {{'E', 'S'}}, default 'E'
707 Whether the elements should be aligned to the end
708 or start within pa period.
709
710 * 'E', 'END', or 'FINISH' for end,
711 * 'S', 'START', or 'BEGIN' for start.
712
713 January 31st ('END') vs. January 1st ('START') for example.
714
715 Returns
716 -------
717 {klass}
718 The transformed {klass} with the new frequency.
719
720 See Also
721 --------
722 {other}.asfreq: Convert each Period in a {other_name} to the given frequency.
723 Period.asfreq : Convert a :class:`~pandas.Period` object to the given frequency.
724
725 Examples
726 --------
727 >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='Y')
728 >>> pidx
729 PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'],
730 dtype='period[Y-DEC]')
731
732 >>> pidx.asfreq('M')
733 PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12',
734 '2015-12'], dtype='period[M]')
735
736 >>> pidx.asfreq('M', how='S')
737 PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01',
738 '2015-01'], dtype='period[M]')
739 """
740 how = libperiod.validate_end_alias(how)
741 if isinstance(freq, BaseOffset) and hasattr(freq, "_period_dtype_code"):
742 freq = PeriodDtype(freq)._freqstr
743 freq = Period._maybe_convert_freq(freq)
744
745 base1 = self._dtype._dtype_code
746 base2 = freq._period_dtype_code
747
748 asi8 = self.asi8
749 # self.freq.n can't be negative or 0
750 end = how == "E"
751 if end:
752 ordinal = asi8 + self.dtype._n - 1
753 else:
754 ordinal = asi8
755
756 new_data = period_asfreq_arr(ordinal, base1, base2, end)
757
758 if self._hasna:
759 new_data[self._isnan] = iNaT
760
761 dtype = PeriodDtype(freq)
762 return type(self)(new_data, dtype=dtype)
763
764 # ------------------------------------------------------------------
765 # Rendering Methods
766
767 def _formatter(self, boxed: bool = False):
768 if boxed:
769 return str
770 return "'{}'".format
771
772 def _format_native_types(
773 self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
774 ) -> npt.NDArray[np.object_]:
775 """
776 actually format my specific types
777 """
778 return libperiod.period_array_strftime(
779 self.asi8, self.dtype._dtype_code, na_rep, date_format
780 )
781
782 # ------------------------------------------------------------------
783
784 def astype(self, dtype, copy: bool = True):
785 # We handle Period[T] -> Period[U]
786 # Our parent handles everything else.
787 dtype = pandas_dtype(dtype)
788 if dtype == self._dtype:
789 if not copy:
790 return self
791 else:
792 return self.copy()
793 if isinstance(dtype, PeriodDtype):
794 return self.asfreq(dtype.freq)
795
796 if lib.is_np_dtype(dtype, "M") or isinstance(dtype, DatetimeTZDtype):
797 # GH#45038 match PeriodIndex behavior.
798 tz = getattr(dtype, "tz", None)
799 unit = dtl.dtype_to_unit(dtype)
800 return self.to_timestamp().tz_localize(tz).as_unit(unit)
801
802 return super().astype(dtype, copy=copy)
803
804 def searchsorted(
805 self,
806 value: NumpyValueArrayLike | ExtensionArray,
807 side: Literal["left", "right"] = "left",
808 sorter: NumpySorter | None = None,
809 ) -> npt.NDArray[np.intp] | np.intp:
810 npvalue = self._validate_setitem_value(value).view("M8[ns]")
811
812 # Cast to M8 to get datetime-like NaT placement,
813 # similar to dtl._period_dispatch
814 m8arr = self._ndarray.view("M8[ns]")
815 return m8arr.searchsorted(npvalue, side=side, sorter=sorter)
816
817 def _pad_or_backfill(
818 self,
819 *,
820 method: FillnaOptions,
821 limit: int | None = None,
822 limit_area: Literal["inside", "outside"] | None = None,
823 copy: bool = True,
824 ) -> Self:
825 # view as dt64 so we get treated as timelike in core.missing,
826 # similar to dtl._period_dispatch
827 dta = self.view("M8[ns]")
828 result = dta._pad_or_backfill(
829 method=method, limit=limit, limit_area=limit_area, copy=copy
830 )
831 if copy:
832 return cast("Self", result.view(self.dtype))
833 else:
834 return self
835
836 def fillna(
837 self, value=None, method=None, limit: int | None = None, copy: bool = True
838 ) -> Self:
839 if method is not None:
840 # view as dt64 so we get treated as timelike in core.missing,
841 # similar to dtl._period_dispatch
842 dta = self.view("M8[ns]")
843 result = dta.fillna(value=value, method=method, limit=limit, copy=copy)
844 # error: Incompatible return value type (got "Union[ExtensionArray,
845 # ndarray[Any, Any]]", expected "PeriodArray")
846 return result.view(self.dtype) # type: ignore[return-value]
847 return super().fillna(value=value, method=method, limit=limit, copy=copy)
848
849 # ------------------------------------------------------------------
850 # Arithmetic Methods
851
852 def _addsub_int_array_or_scalar(
853 self, other: np.ndarray | int, op: Callable[[Any, Any], Any]
854 ) -> Self:
855 """
856 Add or subtract array of integers.
857
858 Parameters
859 ----------
860 other : np.ndarray[int64] or int
861 op : {operator.add, operator.sub}
862
863 Returns
864 -------
865 result : PeriodArray
866 """
867 assert op in [operator.add, operator.sub]
868 if op is operator.sub:
869 other = -other
870 res_values = add_overflowsafe(self.asi8, np.asarray(other, dtype="i8"))
871 return type(self)(res_values, dtype=self.dtype)
872
873 def _add_offset(self, other: BaseOffset):
874 assert not isinstance(other, Tick)
875
876 self._require_matching_freq(other, base=True)
877 return self._addsub_int_array_or_scalar(other.n, operator.add)
878
879 # TODO: can we de-duplicate with Period._add_timedeltalike_scalar?
880 def _add_timedeltalike_scalar(self, other):
881 """
882 Parameters
883 ----------
884 other : timedelta, Tick, np.timedelta64
885
886 Returns
887 -------
888 PeriodArray
889 """
890 if not isinstance(self.freq, Tick):
891 # We cannot add timedelta-like to non-tick PeriodArray
892 raise raise_on_incompatible(self, other)
893
894 if isna(other):
895 # i.e. np.timedelta64("NaT")
896 return super()._add_timedeltalike_scalar(other)
897
898 td = np.asarray(Timedelta(other).asm8)
899 return self._add_timedelta_arraylike(td)
900
901 def _add_timedelta_arraylike(
902 self, other: TimedeltaArray | npt.NDArray[np.timedelta64]
903 ) -> Self:
904 """
905 Parameters
906 ----------
907 other : TimedeltaArray or ndarray[timedelta64]
908
909 Returns
910 -------
911 PeriodArray
912 """
913 if not self.dtype._is_tick_like():
914 # We cannot add timedelta-like to non-tick PeriodArray
915 raise TypeError(
916 f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}"
917 )
918
919 dtype = np.dtype(f"m8[{self.dtype._td64_unit}]")
920
921 # Similar to _check_timedeltalike_freq_compat, but we raise with a
922 # more specific exception message if necessary.
923 try:
924 delta = astype_overflowsafe(
925 np.asarray(other), dtype=dtype, copy=False, round_ok=False
926 )
927 except ValueError as err:
928 # e.g. if we have minutes freq and try to add 30s
929 # "Cannot losslessly convert units"
930 raise IncompatibleFrequency(
931 "Cannot add/subtract timedelta-like from PeriodArray that is "
932 "not an integer multiple of the PeriodArray's freq."
933 ) from err
934
935 res_values = add_overflowsafe(self.asi8, np.asarray(delta.view("i8")))
936 return type(self)(res_values, dtype=self.dtype)
937
938 def _check_timedeltalike_freq_compat(self, other):
939 """
940 Arithmetic operations with timedelta-like scalars or array `other`
941 are only valid if `other` is an integer multiple of `self.freq`.
942 If the operation is valid, find that integer multiple. Otherwise,
943 raise because the operation is invalid.
944
945 Parameters
946 ----------
947 other : timedelta, np.timedelta64, Tick,
948 ndarray[timedelta64], TimedeltaArray, TimedeltaIndex
949
950 Returns
951 -------
952 multiple : int or ndarray[int64]
953
954 Raises
955 ------
956 IncompatibleFrequency
957 """
958 assert self.dtype._is_tick_like() # checked by calling function
959
960 dtype = np.dtype(f"m8[{self.dtype._td64_unit}]")
961
962 if isinstance(other, (timedelta, np.timedelta64, Tick)):
963 td = np.asarray(Timedelta(other).asm8)
964 else:
965 td = np.asarray(other)
966
967 try:
968 delta = astype_overflowsafe(td, dtype=dtype, copy=False, round_ok=False)
969 except ValueError as err:
970 raise raise_on_incompatible(self, other) from err
971
972 delta = delta.view("i8")
973 return lib.item_from_zerodim(delta)
974
975
976def raise_on_incompatible(left, right) -> IncompatibleFrequency:
977 """
978 Helper function to render a consistent error message when raising
979 IncompatibleFrequency.
980
981 Parameters
982 ----------
983 left : PeriodArray
984 right : None, DateOffset, Period, ndarray, or timedelta-like
985
986 Returns
987 -------
988 IncompatibleFrequency
989 Exception to be raised by the caller.
990 """
991 # GH#24283 error message format depends on whether right is scalar
992 if isinstance(right, (np.ndarray, ABCTimedeltaArray)) or right is None:
993 other_freq = None
994 elif isinstance(right, BaseOffset):
995 other_freq = freq_to_period_freqstr(right.n, right.name)
996 elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period)):
997 other_freq = right.freqstr
998 else:
999 other_freq = delta_to_tick(Timedelta(right)).freqstr
1000
1001 own_freq = freq_to_period_freqstr(left.freq.n, left.freq.name)
1002 msg = DIFFERENT_FREQ.format(
1003 cls=type(left).__name__, own_freq=own_freq, other_freq=other_freq
1004 )
1005 return IncompatibleFrequency(msg)
1006
1007
1008# -------------------------------------------------------------------
1009# Constructor Helpers
1010
1011
1012def period_array(
1013 data: Sequence[Period | str | None] | AnyArrayLike,
1014 freq: str | Tick | BaseOffset | None = None,
1015 copy: bool = False,
1016) -> PeriodArray:
1017 """
1018 Construct a new PeriodArray from a sequence of Period scalars.
1019
1020 Parameters
1021 ----------
1022 data : Sequence of Period objects
1023 A sequence of Period objects. These are required to all have
1024 the same ``freq.`` Missing values can be indicated by ``None``
1025 or ``pandas.NaT``.
1026 freq : str, Tick, or Offset
1027 The frequency of every element of the array. This can be specified
1028 to avoid inferring the `freq` from `data`.
1029 copy : bool, default False
1030 Whether to ensure a copy of the data is made.
1031
1032 Returns
1033 -------
1034 PeriodArray
1035
1036 See Also
1037 --------
1038 PeriodArray
1039 pandas.PeriodIndex
1040
1041 Examples
1042 --------
1043 >>> period_array([pd.Period('2017', freq='Y'),
1044 ... pd.Period('2018', freq='Y')])
1045 <PeriodArray>
1046 ['2017', '2018']
1047 Length: 2, dtype: period[Y-DEC]
1048
1049 >>> period_array([pd.Period('2017', freq='Y'),
1050 ... pd.Period('2018', freq='Y'),
1051 ... pd.NaT])
1052 <PeriodArray>
1053 ['2017', '2018', 'NaT']
1054 Length: 3, dtype: period[Y-DEC]
1055
1056 Integers that look like years are handled
1057
1058 >>> period_array([2000, 2001, 2002], freq='D')
1059 <PeriodArray>
1060 ['2000-01-01', '2001-01-01', '2002-01-01']
1061 Length: 3, dtype: period[D]
1062
1063 Datetime-like strings may also be passed
1064
1065 >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q')
1066 <PeriodArray>
1067 ['2000Q1', '2000Q2', '2000Q3', '2000Q4']
1068 Length: 4, dtype: period[Q-DEC]
1069 """
1070 data_dtype = getattr(data, "dtype", None)
1071
1072 if lib.is_np_dtype(data_dtype, "M"):
1073 return PeriodArray._from_datetime64(data, freq)
1074 if isinstance(data_dtype, PeriodDtype):
1075 out = PeriodArray(data)
1076 if freq is not None:
1077 if freq == data_dtype.freq:
1078 return out
1079 return out.asfreq(freq)
1080 return out
1081
1082 # other iterable of some kind
1083 if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)):
1084 data = list(data)
1085
1086 arrdata = np.asarray(data)
1087
1088 dtype: PeriodDtype | None
1089 if freq:
1090 dtype = PeriodDtype(freq)
1091 else:
1092 dtype = None
1093
1094 if arrdata.dtype.kind == "f" and len(arrdata) > 0:
1095 raise TypeError("PeriodIndex does not allow floating point in construction")
1096
1097 if arrdata.dtype.kind in "iu":
1098 arr = arrdata.astype(np.int64, copy=False)
1099 # error: Argument 2 to "from_ordinals" has incompatible type "Union[str,
1100 # Tick, None]"; expected "Union[timedelta, BaseOffset, str]"
1101 ordinals = libperiod.from_ordinals(arr, freq) # type: ignore[arg-type]
1102 return PeriodArray(ordinals, dtype=dtype)
1103
1104 data = ensure_object(arrdata)
1105 if freq is None:
1106 freq = libperiod.extract_freq(data)
1107 dtype = PeriodDtype(freq)
1108 return PeriodArray._from_sequence(data, dtype=dtype)
1109
1110
1111@overload
1112def validate_dtype_freq(dtype, freq: BaseOffsetT) -> BaseOffsetT:
1113 ...
1114
1115
1116@overload
1117def validate_dtype_freq(dtype, freq: timedelta | str | None) -> BaseOffset:
1118 ...
1119
1120
1121def validate_dtype_freq(
1122 dtype, freq: BaseOffsetT | BaseOffset | timedelta | str | None
1123) -> BaseOffsetT:
1124 """
1125 If both a dtype and a freq are available, ensure they match. If only
1126 dtype is available, extract the implied freq.
1127
1128 Parameters
1129 ----------
1130 dtype : dtype
1131 freq : DateOffset or None
1132
1133 Returns
1134 -------
1135 freq : DateOffset
1136
1137 Raises
1138 ------
1139 ValueError : non-period dtype
1140 IncompatibleFrequency : mismatch between dtype and freq
1141 """
1142 if freq is not None:
1143 freq = to_offset(freq, is_period=True)
1144
1145 if dtype is not None:
1146 dtype = pandas_dtype(dtype)
1147 if not isinstance(dtype, PeriodDtype):
1148 raise ValueError("dtype must be PeriodDtype")
1149 if freq is None:
1150 freq = dtype.freq
1151 elif freq != dtype.freq:
1152 raise IncompatibleFrequency("specified freq and dtype are different")
1153 # error: Incompatible return value type (got "Union[BaseOffset, Any, None]",
1154 # expected "BaseOffset")
1155 return freq # type: ignore[return-value]
1156
1157
1158def dt64arr_to_periodarr(
1159 data, freq, tz=None
1160) -> tuple[npt.NDArray[np.int64], BaseOffset]:
1161 """
1162 Convert an datetime-like array to values Period ordinals.
1163
1164 Parameters
1165 ----------
1166 data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]]
1167 freq : Optional[Union[str, Tick]]
1168 Must match the `freq` on the `data` if `data` is a DatetimeIndex
1169 or Series.
1170 tz : Optional[tzinfo]
1171
1172 Returns
1173 -------
1174 ordinals : ndarray[int64]
1175 freq : Tick
1176 The frequency extracted from the Series or DatetimeIndex if that's
1177 used.
1178
1179 """
1180 if not isinstance(data.dtype, np.dtype) or data.dtype.kind != "M":
1181 raise ValueError(f"Wrong dtype: {data.dtype}")
1182
1183 if freq is None:
1184 if isinstance(data, ABCIndex):
1185 data, freq = data._values, data.freq
1186 elif isinstance(data, ABCSeries):
1187 data, freq = data._values, data.dt.freq
1188
1189 elif isinstance(data, (ABCIndex, ABCSeries)):
1190 data = data._values
1191
1192 reso = get_unit_from_dtype(data.dtype)
1193 freq = Period._maybe_convert_freq(freq)
1194 base = freq._period_dtype_code
1195 return c_dt64arr_to_periodarr(data.view("i8"), base, tz, reso=reso), freq
1196
1197
1198def _get_ordinal_range(start, end, periods, freq, mult: int = 1):
1199 if com.count_not_none(start, end, periods) != 2:
1200 raise ValueError(
1201 "Of the three parameters: start, end, and periods, "
1202 "exactly two must be specified"
1203 )
1204
1205 if freq is not None:
1206 freq = to_offset(freq, is_period=True)
1207 mult = freq.n
1208
1209 if start is not None:
1210 start = Period(start, freq)
1211 if end is not None:
1212 end = Period(end, freq)
1213
1214 is_start_per = isinstance(start, Period)
1215 is_end_per = isinstance(end, Period)
1216
1217 if is_start_per and is_end_per and start.freq != end.freq:
1218 raise ValueError("start and end must have same freq")
1219 if start is NaT or end is NaT:
1220 raise ValueError("start and end must not be NaT")
1221
1222 if freq is None:
1223 if is_start_per:
1224 freq = start.freq
1225 elif is_end_per:
1226 freq = end.freq
1227 else: # pragma: no cover
1228 raise ValueError("Could not infer freq from start/end")
1229 mult = freq.n
1230
1231 if periods is not None:
1232 periods = periods * mult
1233 if start is None:
1234 data = np.arange(
1235 end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64
1236 )
1237 else:
1238 data = np.arange(
1239 start.ordinal, start.ordinal + periods, mult, dtype=np.int64
1240 )
1241 else:
1242 data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
1243
1244 return data, freq
1245
1246
1247def _range_from_fields(
1248 year=None,
1249 month=None,
1250 quarter=None,
1251 day=None,
1252 hour=None,
1253 minute=None,
1254 second=None,
1255 freq=None,
1256) -> tuple[np.ndarray, BaseOffset]:
1257 if hour is None:
1258 hour = 0
1259 if minute is None:
1260 minute = 0
1261 if second is None:
1262 second = 0
1263 if day is None:
1264 day = 1
1265
1266 ordinals = []
1267
1268 if quarter is not None:
1269 if freq is None:
1270 freq = to_offset("Q", is_period=True)
1271 base = FreqGroup.FR_QTR.value
1272 else:
1273 freq = to_offset(freq, is_period=True)
1274 base = libperiod.freq_to_dtype_code(freq)
1275 if base != FreqGroup.FR_QTR.value:
1276 raise AssertionError("base must equal FR_QTR")
1277
1278 freqstr = freq.freqstr
1279 year, quarter = _make_field_arrays(year, quarter)
1280 for y, q in zip(year, quarter):
1281 calendar_year, calendar_month = parsing.quarter_to_myear(y, q, freqstr)
1282 val = libperiod.period_ordinal(
1283 calendar_year, calendar_month, 1, 1, 1, 1, 0, 0, base
1284 )
1285 ordinals.append(val)
1286 else:
1287 freq = to_offset(freq, is_period=True)
1288 base = libperiod.freq_to_dtype_code(freq)
1289 arrays = _make_field_arrays(year, month, day, hour, minute, second)
1290 for y, mth, d, h, mn, s in zip(*arrays):
1291 ordinals.append(libperiod.period_ordinal(y, mth, d, h, mn, s, 0, 0, base))
1292
1293 return np.array(ordinals, dtype=np.int64), freq
1294
1295
1296def _make_field_arrays(*fields) -> list[np.ndarray]:
1297 length = None
1298 for x in fields:
1299 if isinstance(x, (list, np.ndarray, ABCSeries)):
1300 if length is not None and len(x) != length:
1301 raise ValueError("Mismatched Period array lengths")
1302 if length is None:
1303 length = len(x)
1304
1305 # error: Argument 2 to "repeat" has incompatible type "Optional[int]"; expected
1306 # "Union[Union[int, integer[Any]], Union[bool, bool_], ndarray, Sequence[Union[int,
1307 # integer[Any]]], Sequence[Union[bool, bool_]], Sequence[Sequence[Any]]]"
1308 return [
1309 np.asarray(x)
1310 if isinstance(x, (np.ndarray, list, ABCSeries))
1311 else np.repeat(x, length) # type: ignore[arg-type]
1312 for x in fields
1313 ]