1from __future__ import annotations
2
3from datetime import timedelta
4import operator
5from typing import (
6 TYPE_CHECKING,
7 Any,
8 Callable,
9 Literal,
10 Sequence,
11 TypeVar,
12 overload,
13)
14
15import numpy as np
16
17from pandas._libs import (
18 algos as libalgos,
19 lib,
20)
21from pandas._libs.arrays import NDArrayBacked
22from pandas._libs.tslibs import (
23 BaseOffset,
24 NaT,
25 NaTType,
26 Timedelta,
27 astype_overflowsafe,
28 dt64arr_to_periodarr as c_dt64arr_to_periodarr,
29 get_unit_from_dtype,
30 iNaT,
31 parsing,
32 period as libperiod,
33 to_offset,
34)
35from pandas._libs.tslibs.dtypes import FreqGroup
36from pandas._libs.tslibs.fields import isleapyear_arr
37from pandas._libs.tslibs.offsets import (
38 Tick,
39 delta_to_tick,
40)
41from pandas._libs.tslibs.period import (
42 DIFFERENT_FREQ,
43 IncompatibleFrequency,
44 Period,
45 get_period_field_arr,
46 period_asfreq_arr,
47)
48from pandas._typing import (
49 AnyArrayLike,
50 Dtype,
51 NpDtype,
52 npt,
53)
54from pandas.util._decorators import (
55 cache_readonly,
56 doc,
57)
58
59from pandas.core.dtypes.common import (
60 ensure_object,
61 is_datetime64_any_dtype,
62 is_datetime64_dtype,
63 is_dtype_equal,
64 is_float_dtype,
65 is_integer_dtype,
66 is_period_dtype,
67 pandas_dtype,
68)
69from pandas.core.dtypes.dtypes import PeriodDtype
70from pandas.core.dtypes.generic import (
71 ABCIndex,
72 ABCPeriodIndex,
73 ABCSeries,
74 ABCTimedeltaArray,
75)
76from pandas.core.dtypes.missing import isna
77
78import pandas.core.algorithms as algos
79from pandas.core.arrays import datetimelike as dtl
80import pandas.core.common as com
81
82if TYPE_CHECKING:
83 from pandas._typing import (
84 NumpySorter,
85 NumpyValueArrayLike,
86 )
87
88 from pandas.core.arrays import (
89 DatetimeArray,
90 TimedeltaArray,
91 )
92 from pandas.core.arrays.base import ExtensionArray
93
94
95BaseOffsetT = TypeVar("BaseOffsetT", bound=BaseOffset)
96
97
98_shared_doc_kwargs = {
99 "klass": "PeriodArray",
100}
101
102
103def _field_accessor(name: str, docstring=None):
104 def f(self):
105 base = self.freq._period_dtype_code
106 result = get_period_field_arr(name, self.asi8, base)
107 return result
108
109 f.__name__ = name
110 f.__doc__ = docstring
111 return property(f)
112
113
114class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin):
115 """
116 Pandas ExtensionArray for storing Period data.
117
118 Users should use :func:`~pandas.period_array` to create new instances.
119 Alternatively, :func:`~pandas.array` can be used to create new instances
120 from a sequence of Period scalars.
121
122 Parameters
123 ----------
124 values : Union[PeriodArray, Series[period], ndarray[int], PeriodIndex]
125 The data to store. These should be arrays that can be directly
126 converted to ordinals without inference or copy (PeriodArray,
127 ndarray[int64]), or a box around such an array (Series[period],
128 PeriodIndex).
129 dtype : PeriodDtype, optional
130 A PeriodDtype instance from which to extract a `freq`. If both
131 `freq` and `dtype` are specified, then the frequencies must match.
132 freq : str or DateOffset
133 The `freq` to use for the array. Mostly applicable when `values`
134 is an ndarray of integers, when `freq` is required. When `values`
135 is a PeriodArray (or box around), it's checked that ``values.freq``
136 matches `freq`.
137 copy : bool, default False
138 Whether to copy the ordinals before storing.
139
140 Attributes
141 ----------
142 None
143
144 Methods
145 -------
146 None
147
148 See Also
149 --------
150 Period: Represents a period of time.
151 PeriodIndex : Immutable Index for period data.
152 period_range: Create a fixed-frequency PeriodArray.
153 array: Construct a pandas array.
154
155 Notes
156 -----
157 There are two components to a PeriodArray
158
159 - ordinals : integer ndarray
160 - freq : pd.tseries.offsets.Offset
161
162 The values are physically stored as a 1-D ndarray of integers. These are
163 called "ordinals" and represent some kind of offset from a base.
164
165 The `freq` indicates the span covered by each element of the array.
166 All elements in the PeriodArray have the same `freq`.
167 """
168
169 # array priority higher than numpy scalars
170 __array_priority__ = 1000
171 _typ = "periodarray" # ABCPeriodArray
172 _internal_fill_value = np.int64(iNaT)
173 _recognized_scalars = (Period,)
174 _is_recognized_dtype = is_period_dtype # check_compatible_with checks freq match
175 _infer_matches = ("period",)
176
177 @property
178 def _scalar_type(self) -> type[Period]:
179 return Period
180
181 # Names others delegate to us
182 _other_ops: list[str] = []
183 _bool_ops: list[str] = ["is_leap_year"]
184 _object_ops: list[str] = ["start_time", "end_time", "freq"]
185 _field_ops: list[str] = [
186 "year",
187 "month",
188 "day",
189 "hour",
190 "minute",
191 "second",
192 "weekofyear",
193 "weekday",
194 "week",
195 "dayofweek",
196 "day_of_week",
197 "dayofyear",
198 "day_of_year",
199 "quarter",
200 "qyear",
201 "days_in_month",
202 "daysinmonth",
203 ]
204 _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops
205 _datetimelike_methods: list[str] = ["strftime", "to_timestamp", "asfreq"]
206
207 _dtype: PeriodDtype
208
209 # --------------------------------------------------------------------
210 # Constructors
211
212 def __init__(
213 self, values, dtype: Dtype | None = None, freq=None, copy: bool = False
214 ) -> None:
215 freq = validate_dtype_freq(dtype, freq)
216
217 if freq is not None:
218 freq = Period._maybe_convert_freq(freq)
219
220 if isinstance(values, ABCSeries):
221 values = values._values
222 if not isinstance(values, type(self)):
223 raise TypeError("Incorrect dtype")
224
225 elif isinstance(values, ABCPeriodIndex):
226 values = values._values
227
228 if isinstance(values, type(self)):
229 if freq is not None and freq != values.freq:
230 raise raise_on_incompatible(values, freq)
231 values, freq = values._ndarray, values.freq
232
233 values = np.array(values, dtype="int64", copy=copy)
234 if freq is None:
235 raise ValueError("freq is not specified and cannot be inferred")
236 NDArrayBacked.__init__(self, values, PeriodDtype(freq))
237
238 # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
239 @classmethod
240 def _simple_new( # type: ignore[override]
241 cls,
242 values: np.ndarray,
243 freq: BaseOffset | None = None,
244 dtype: Dtype | None = None,
245 ) -> PeriodArray:
246 # alias for PeriodArray.__init__
247 assertion_msg = "Should be numpy array of type i8"
248 assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg
249 return cls(values, freq=freq, dtype=dtype)
250
251 @classmethod
252 def _from_sequence(
253 cls: type[PeriodArray],
254 scalars: Sequence[Period | None] | AnyArrayLike,
255 *,
256 dtype: Dtype | None = None,
257 copy: bool = False,
258 ) -> PeriodArray:
259 if dtype and isinstance(dtype, PeriodDtype):
260 freq = dtype.freq
261 else:
262 freq = None
263
264 if isinstance(scalars, cls):
265 validate_dtype_freq(scalars.dtype, freq)
266 if copy:
267 scalars = scalars.copy()
268 return scalars
269
270 periods = np.asarray(scalars, dtype=object)
271
272 freq = freq or libperiod.extract_freq(periods)
273 ordinals = libperiod.extract_ordinals(periods, freq)
274 return cls(ordinals, freq=freq)
275
276 @classmethod
277 def _from_sequence_of_strings(
278 cls, strings, *, dtype: Dtype | None = None, copy: bool = False
279 ) -> PeriodArray:
280 return cls._from_sequence(strings, dtype=dtype, copy=copy)
281
282 @classmethod
283 def _from_datetime64(cls, data, freq, tz=None) -> PeriodArray:
284 """
285 Construct a PeriodArray from a datetime64 array
286
287 Parameters
288 ----------
289 data : ndarray[datetime64[ns], datetime64[ns, tz]]
290 freq : str or Tick
291 tz : tzinfo, optional
292
293 Returns
294 -------
295 PeriodArray[freq]
296 """
297 data, freq = dt64arr_to_periodarr(data, freq, tz)
298 return cls(data, freq=freq)
299
300 @classmethod
301 def _generate_range(cls, start, end, periods, freq, fields):
302 periods = dtl.validate_periods(periods)
303
304 if freq is not None:
305 freq = Period._maybe_convert_freq(freq)
306
307 field_count = len(fields)
308 if start is not None or end is not None:
309 if field_count > 0:
310 raise ValueError(
311 "Can either instantiate from fields or endpoints, but not both"
312 )
313 subarr, freq = _get_ordinal_range(start, end, periods, freq)
314 elif field_count > 0:
315 subarr, freq = _range_from_fields(freq=freq, **fields)
316 else:
317 raise ValueError("Not enough parameters to construct Period range")
318
319 return subarr, freq
320
321 # -----------------------------------------------------------------
322 # DatetimeLike Interface
323
324 # error: Argument 1 of "_unbox_scalar" is incompatible with supertype
325 # "DatetimeLikeArrayMixin"; supertype defines the argument type as
326 # "Union[Union[Period, Any, Timedelta], NaTType]"
327 def _unbox_scalar( # type: ignore[override]
328 self,
329 value: Period | NaTType,
330 ) -> np.int64:
331 if value is NaT:
332 # error: Item "Period" of "Union[Period, NaTType]" has no attribute "value"
333 return np.int64(value._value) # type: ignore[union-attr]
334 elif isinstance(value, self._scalar_type):
335 self._check_compatible_with(value)
336 return np.int64(value.ordinal)
337 else:
338 raise ValueError(f"'value' should be a Period. Got '{value}' instead.")
339
340 def _scalar_from_string(self, value: str) -> Period:
341 return Period(value, freq=self.freq)
342
343 def _check_compatible_with(self, other) -> None:
344 if other is NaT:
345 return
346 self._require_matching_freq(other)
347
348 # --------------------------------------------------------------------
349 # Data / Attributes
350
351 @cache_readonly
352 def dtype(self) -> PeriodDtype:
353 return self._dtype
354
355 # error: Cannot override writeable attribute with read-only property
356 @property # type: ignore[override]
357 def freq(self) -> BaseOffset:
358 """
359 Return the frequency object for this PeriodArray.
360 """
361 return self.dtype.freq
362
363 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
364 if dtype == "i8":
365 return self.asi8
366 elif dtype == bool:
367 return ~self._isnan
368
369 # This will raise TypeError for non-object dtypes
370 return np.array(list(self), dtype=object)
371
372 def __arrow_array__(self, type=None):
373 """
374 Convert myself into a pyarrow Array.
375 """
376 import pyarrow
377
378 from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
379
380 if type is not None:
381 if pyarrow.types.is_integer(type):
382 return pyarrow.array(self._ndarray, mask=self.isna(), type=type)
383 elif isinstance(type, ArrowPeriodType):
384 # ensure we have the same freq
385 if self.freqstr != type.freq:
386 raise TypeError(
387 "Not supported to convert PeriodArray to array with different "
388 f"'freq' ({self.freqstr} vs {type.freq})"
389 )
390 else:
391 raise TypeError(
392 f"Not supported to convert PeriodArray to '{type}' type"
393 )
394
395 period_type = ArrowPeriodType(self.freqstr)
396 storage_array = pyarrow.array(self._ndarray, mask=self.isna(), type="int64")
397 return pyarrow.ExtensionArray.from_storage(period_type, storage_array)
398
399 # --------------------------------------------------------------------
400 # Vectorized analogues of Period properties
401
402 year = _field_accessor(
403 "year",
404 """
405 The year of the period.
406 """,
407 )
408 month = _field_accessor(
409 "month",
410 """
411 The month as January=1, December=12.
412 """,
413 )
414 day = _field_accessor(
415 "day",
416 """
417 The days of the period.
418 """,
419 )
420 hour = _field_accessor(
421 "hour",
422 """
423 The hour of the period.
424 """,
425 )
426 minute = _field_accessor(
427 "minute",
428 """
429 The minute of the period.
430 """,
431 )
432 second = _field_accessor(
433 "second",
434 """
435 The second of the period.
436 """,
437 )
438 weekofyear = _field_accessor(
439 "week",
440 """
441 The week ordinal of the year.
442 """,
443 )
444 week = weekofyear
445 day_of_week = _field_accessor(
446 "day_of_week",
447 """
448 The day of the week with Monday=0, Sunday=6.
449 """,
450 )
451 dayofweek = day_of_week
452 weekday = dayofweek
453 dayofyear = day_of_year = _field_accessor(
454 "day_of_year",
455 """
456 The ordinal day of the year.
457 """,
458 )
459 quarter = _field_accessor(
460 "quarter",
461 """
462 The quarter of the date.
463 """,
464 )
465 qyear = _field_accessor("qyear")
466 days_in_month = _field_accessor(
467 "days_in_month",
468 """
469 The number of days in the month.
470 """,
471 )
472 daysinmonth = days_in_month
473
474 @property
475 def is_leap_year(self) -> np.ndarray:
476 """
477 Logical indicating if the date belongs to a leap year.
478 """
479 return isleapyear_arr(np.asarray(self.year))
480
481 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
482 """
483 Cast to DatetimeArray/Index.
484
485 Parameters
486 ----------
487 freq : str or DateOffset, optional
488 Target frequency. The default is 'D' for week or longer,
489 'S' otherwise.
490 how : {'s', 'e', 'start', 'end'}
491 Whether to use the start or end of the time period being converted.
492
493 Returns
494 -------
495 DatetimeArray/Index
496 """
497 from pandas.core.arrays import DatetimeArray
498
499 how = libperiod.validate_end_alias(how)
500
501 end = how == "E"
502 if end:
503 if freq == "B" or self.freq == "B":
504 # roll forward to ensure we land on B date
505 adjust = Timedelta(1, "D") - Timedelta(1, "ns")
506 return self.to_timestamp(how="start") + adjust
507 else:
508 adjust = Timedelta(1, "ns")
509 return (self + self.freq).to_timestamp(how="start") - adjust
510
511 if freq is None:
512 freq = self._dtype._get_to_timestamp_base()
513 base = freq
514 else:
515 freq = Period._maybe_convert_freq(freq)
516 base = freq._period_dtype_code
517
518 new_parr = self.asfreq(freq, how=how)
519
520 new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
521 dta = DatetimeArray(new_data)
522
523 if self.freq.name == "B":
524 # See if we can retain BDay instead of Day in cases where
525 # len(self) is too small for infer_freq to distinguish between them
526 diffs = libalgos.unique_deltas(self.asi8)
527 if len(diffs) == 1:
528 diff = diffs[0]
529 if diff == self.freq.n:
530 dta._freq = self.freq
531 elif diff == 1:
532 dta._freq = self.freq.base
533 # TODO: other cases?
534 return dta
535 else:
536 return dta._with_freq("infer")
537
538 # --------------------------------------------------------------------
539
540 def _box_func(self, x) -> Period | NaTType:
541 return Period._from_ordinal(ordinal=x, freq=self.freq)
542
543 @doc(**_shared_doc_kwargs, other="PeriodIndex", other_name="PeriodIndex")
544 def asfreq(self, freq=None, how: str = "E") -> PeriodArray:
545 """
546 Convert the {klass} to the specified frequency `freq`.
547
548 Equivalent to applying :meth:`pandas.Period.asfreq` with the given arguments
549 to each :class:`~pandas.Period` in this {klass}.
550
551 Parameters
552 ----------
553 freq : str
554 A frequency.
555 how : str {{'E', 'S'}}, default 'E'
556 Whether the elements should be aligned to the end
557 or start within pa period.
558
559 * 'E', 'END', or 'FINISH' for end,
560 * 'S', 'START', or 'BEGIN' for start.
561
562 January 31st ('END') vs. January 1st ('START') for example.
563
564 Returns
565 -------
566 {klass}
567 The transformed {klass} with the new frequency.
568
569 See Also
570 --------
571 {other}.asfreq: Convert each Period in a {other_name} to the given frequency.
572 Period.asfreq : Convert a :class:`~pandas.Period` object to the given frequency.
573
574 Examples
575 --------
576 >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A')
577 >>> pidx
578 PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'],
579 dtype='period[A-DEC]')
580
581 >>> pidx.asfreq('M')
582 PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12',
583 '2015-12'], dtype='period[M]')
584
585 >>> pidx.asfreq('M', how='S')
586 PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01',
587 '2015-01'], dtype='period[M]')
588 """
589 how = libperiod.validate_end_alias(how)
590
591 freq = Period._maybe_convert_freq(freq)
592
593 base1 = self._dtype._dtype_code
594 base2 = freq._period_dtype_code
595
596 asi8 = self.asi8
597 # self.freq.n can't be negative or 0
598 end = how == "E"
599 if end:
600 ordinal = asi8 + self.freq.n - 1
601 else:
602 ordinal = asi8
603
604 new_data = period_asfreq_arr(ordinal, base1, base2, end)
605
606 if self._hasna:
607 new_data[self._isnan] = iNaT
608
609 return type(self)(new_data, freq=freq)
610
611 # ------------------------------------------------------------------
612 # Rendering Methods
613
614 def _formatter(self, boxed: bool = False):
615 if boxed:
616 return str
617 return "'{}'".format
618
619 @dtl.ravel_compat
620 def _format_native_types(
621 self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
622 ) -> npt.NDArray[np.object_]:
623 """
624 actually format my specific types
625 """
626 values = self.astype(object)
627
628 # Create the formatter function
629 if date_format:
630 formatter = lambda per: per.strftime(date_format)
631 else:
632 # Uses `_Period.str` which in turn uses `format_period`
633 formatter = lambda per: str(per)
634
635 # Apply the formatter to all values in the array, possibly with a mask
636 if self._hasna:
637 mask = self._isnan
638 values[mask] = na_rep
639 imask = ~mask
640 values[imask] = np.array([formatter(per) for per in values[imask]])
641 else:
642 values = np.array([formatter(per) for per in values])
643 return values
644
645 # ------------------------------------------------------------------
646
647 def astype(self, dtype, copy: bool = True):
648 # We handle Period[T] -> Period[U]
649 # Our parent handles everything else.
650 dtype = pandas_dtype(dtype)
651 if is_dtype_equal(dtype, self._dtype):
652 if not copy:
653 return self
654 else:
655 return self.copy()
656 if is_period_dtype(dtype):
657 return self.asfreq(dtype.freq)
658
659 if is_datetime64_any_dtype(dtype):
660 # GH#45038 match PeriodIndex behavior.
661 tz = getattr(dtype, "tz", None)
662 return self.to_timestamp().tz_localize(tz)
663
664 return super().astype(dtype, copy=copy)
665
666 def searchsorted(
667 self,
668 value: NumpyValueArrayLike | ExtensionArray,
669 side: Literal["left", "right"] = "left",
670 sorter: NumpySorter = None,
671 ) -> npt.NDArray[np.intp] | np.intp:
672 npvalue = self._validate_setitem_value(value).view("M8[ns]")
673
674 # Cast to M8 to get datetime-like NaT placement,
675 # similar to dtl._period_dispatch
676 m8arr = self._ndarray.view("M8[ns]")
677 return m8arr.searchsorted(npvalue, side=side, sorter=sorter)
678
679 def fillna(self, value=None, method=None, limit=None) -> PeriodArray:
680 if method is not None:
681 # view as dt64 so we get treated as timelike in core.missing,
682 # similar to dtl._period_dispatch
683 dta = self.view("M8[ns]")
684 result = dta.fillna(value=value, method=method, limit=limit)
685 # error: Incompatible return value type (got "Union[ExtensionArray,
686 # ndarray[Any, Any]]", expected "PeriodArray")
687 return result.view(self.dtype) # type: ignore[return-value]
688 return super().fillna(value=value, method=method, limit=limit)
689
690 # ------------------------------------------------------------------
691 # Arithmetic Methods
692
693 def _addsub_int_array_or_scalar(
694 self, other: np.ndarray | int, op: Callable[[Any, Any], Any]
695 ) -> PeriodArray:
696 """
697 Add or subtract array of integers.
698
699 Parameters
700 ----------
701 other : np.ndarray[int64] or int
702 op : {operator.add, operator.sub}
703
704 Returns
705 -------
706 result : PeriodArray
707 """
708 assert op in [operator.add, operator.sub]
709 if op is operator.sub:
710 other = -other
711 res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan)
712 return type(self)(res_values, freq=self.freq)
713
714 def _add_offset(self, other: BaseOffset):
715 assert not isinstance(other, Tick)
716
717 self._require_matching_freq(other, base=True)
718 return self._addsub_int_array_or_scalar(other.n, operator.add)
719
720 # TODO: can we de-duplicate with Period._add_timedeltalike_scalar?
721 def _add_timedeltalike_scalar(self, other):
722 """
723 Parameters
724 ----------
725 other : timedelta, Tick, np.timedelta64
726
727 Returns
728 -------
729 PeriodArray
730 """
731 if not isinstance(self.freq, Tick):
732 # We cannot add timedelta-like to non-tick PeriodArray
733 raise raise_on_incompatible(self, other)
734
735 if isna(other):
736 # i.e. np.timedelta64("NaT")
737 return super()._add_timedeltalike_scalar(other)
738
739 td = np.asarray(Timedelta(other).asm8)
740 return self._add_timedelta_arraylike(td)
741
742 def _add_timedelta_arraylike(
743 self, other: TimedeltaArray | npt.NDArray[np.timedelta64]
744 ) -> PeriodArray:
745 """
746 Parameters
747 ----------
748 other : TimedeltaArray or ndarray[timedelta64]
749
750 Returns
751 -------
752 PeriodArray
753 """
754 freq = self.freq
755 if not isinstance(freq, Tick):
756 # We cannot add timedelta-like to non-tick PeriodArray
757 raise TypeError(
758 f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}"
759 )
760
761 dtype = np.dtype(f"m8[{freq._td64_unit}]")
762
763 try:
764 delta = astype_overflowsafe(
765 np.asarray(other), dtype=dtype, copy=False, round_ok=False
766 )
767 except ValueError as err:
768 # e.g. if we have minutes freq and try to add 30s
769 # "Cannot losslessly convert units"
770 raise IncompatibleFrequency(
771 "Cannot add/subtract timedelta-like from PeriodArray that is "
772 "not an integer multiple of the PeriodArray's freq."
773 ) from err
774
775 b_mask = np.isnat(delta)
776
777 res_values = algos.checked_add_with_arr(
778 self.asi8, delta.view("i8"), arr_mask=self._isnan, b_mask=b_mask
779 )
780 np.putmask(res_values, self._isnan | b_mask, iNaT)
781 return type(self)(res_values, freq=self.freq)
782
783 def _check_timedeltalike_freq_compat(self, other):
784 """
785 Arithmetic operations with timedelta-like scalars or array `other`
786 are only valid if `other` is an integer multiple of `self.freq`.
787 If the operation is valid, find that integer multiple. Otherwise,
788 raise because the operation is invalid.
789
790 Parameters
791 ----------
792 other : timedelta, np.timedelta64, Tick,
793 ndarray[timedelta64], TimedeltaArray, TimedeltaIndex
794
795 Returns
796 -------
797 multiple : int or ndarray[int64]
798
799 Raises
800 ------
801 IncompatibleFrequency
802 """
803 assert isinstance(self.freq, Tick) # checked by calling function
804
805 dtype = np.dtype(f"m8[{self.freq._td64_unit}]")
806
807 if isinstance(other, (timedelta, np.timedelta64, Tick)):
808 td = np.asarray(Timedelta(other).asm8)
809 else:
810 td = np.asarray(other)
811
812 try:
813 delta = astype_overflowsafe(td, dtype=dtype, copy=False, round_ok=False)
814 except ValueError as err:
815 raise raise_on_incompatible(self, other) from err
816
817 delta = delta.view("i8")
818 return lib.item_from_zerodim(delta)
819
820
821def raise_on_incompatible(left, right):
822 """
823 Helper function to render a consistent error message when raising
824 IncompatibleFrequency.
825
826 Parameters
827 ----------
828 left : PeriodArray
829 right : None, DateOffset, Period, ndarray, or timedelta-like
830
831 Returns
832 -------
833 IncompatibleFrequency
834 Exception to be raised by the caller.
835 """
836 # GH#24283 error message format depends on whether right is scalar
837 if isinstance(right, (np.ndarray, ABCTimedeltaArray)) or right is None:
838 other_freq = None
839 elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, BaseOffset)):
840 other_freq = right.freqstr
841 else:
842 other_freq = delta_to_tick(Timedelta(right)).freqstr
843
844 msg = DIFFERENT_FREQ.format(
845 cls=type(left).__name__, own_freq=left.freqstr, other_freq=other_freq
846 )
847 return IncompatibleFrequency(msg)
848
849
850# -------------------------------------------------------------------
851# Constructor Helpers
852
853
854def period_array(
855 data: Sequence[Period | str | None] | AnyArrayLike,
856 freq: str | Tick | None = None,
857 copy: bool = False,
858) -> PeriodArray:
859 """
860 Construct a new PeriodArray from a sequence of Period scalars.
861
862 Parameters
863 ----------
864 data : Sequence of Period objects
865 A sequence of Period objects. These are required to all have
866 the same ``freq.`` Missing values can be indicated by ``None``
867 or ``pandas.NaT``.
868 freq : str, Tick, or Offset
869 The frequency of every element of the array. This can be specified
870 to avoid inferring the `freq` from `data`.
871 copy : bool, default False
872 Whether to ensure a copy of the data is made.
873
874 Returns
875 -------
876 PeriodArray
877
878 See Also
879 --------
880 PeriodArray
881 pandas.PeriodIndex
882
883 Examples
884 --------
885 >>> period_array([pd.Period('2017', freq='A'),
886 ... pd.Period('2018', freq='A')])
887 <PeriodArray>
888 ['2017', '2018']
889 Length: 2, dtype: period[A-DEC]
890
891 >>> period_array([pd.Period('2017', freq='A'),
892 ... pd.Period('2018', freq='A'),
893 ... pd.NaT])
894 <PeriodArray>
895 ['2017', '2018', 'NaT']
896 Length: 3, dtype: period[A-DEC]
897
898 Integers that look like years are handled
899
900 >>> period_array([2000, 2001, 2002], freq='D')
901 <PeriodArray>
902 ['2000-01-01', '2001-01-01', '2002-01-01']
903 Length: 3, dtype: period[D]
904
905 Datetime-like strings may also be passed
906
907 >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q')
908 <PeriodArray>
909 ['2000Q1', '2000Q2', '2000Q3', '2000Q4']
910 Length: 4, dtype: period[Q-DEC]
911 """
912 data_dtype = getattr(data, "dtype", None)
913
914 if is_datetime64_dtype(data_dtype):
915 return PeriodArray._from_datetime64(data, freq)
916 if is_period_dtype(data_dtype):
917 return PeriodArray(data, freq=freq)
918
919 # other iterable of some kind
920 if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)):
921 data = list(data)
922
923 arrdata = np.asarray(data)
924
925 dtype: PeriodDtype | None
926 if freq:
927 dtype = PeriodDtype(freq)
928 else:
929 dtype = None
930
931 if is_float_dtype(arrdata) and len(arrdata) > 0:
932 raise TypeError("PeriodIndex does not allow floating point in construction")
933
934 if is_integer_dtype(arrdata.dtype):
935 arr = arrdata.astype(np.int64, copy=False)
936 # error: Argument 2 to "from_ordinals" has incompatible type "Union[str,
937 # Tick, None]"; expected "Union[timedelta, BaseOffset, str]"
938 ordinals = libperiod.from_ordinals(arr, freq) # type: ignore[arg-type]
939 return PeriodArray(ordinals, dtype=dtype)
940
941 data = ensure_object(arrdata)
942
943 return PeriodArray._from_sequence(data, dtype=dtype)
944
945
946@overload
947def validate_dtype_freq(dtype, freq: BaseOffsetT) -> BaseOffsetT:
948 ...
949
950
951@overload
952def validate_dtype_freq(dtype, freq: timedelta | str | None) -> BaseOffset:
953 ...
954
955
956def validate_dtype_freq(
957 dtype, freq: BaseOffsetT | timedelta | str | None
958) -> BaseOffsetT:
959 """
960 If both a dtype and a freq are available, ensure they match. If only
961 dtype is available, extract the implied freq.
962
963 Parameters
964 ----------
965 dtype : dtype
966 freq : DateOffset or None
967
968 Returns
969 -------
970 freq : DateOffset
971
972 Raises
973 ------
974 ValueError : non-period dtype
975 IncompatibleFrequency : mismatch between dtype and freq
976 """
977 if freq is not None:
978 # error: Incompatible types in assignment (expression has type
979 # "BaseOffset", variable has type "Union[BaseOffsetT, timedelta,
980 # str, None]")
981 freq = to_offset(freq) # type: ignore[assignment]
982
983 if dtype is not None:
984 dtype = pandas_dtype(dtype)
985 if not is_period_dtype(dtype):
986 raise ValueError("dtype must be PeriodDtype")
987 if freq is None:
988 freq = dtype.freq
989 elif freq != dtype.freq:
990 raise IncompatibleFrequency("specified freq and dtype are different")
991 # error: Incompatible return value type (got "Union[BaseOffset, Any, None]",
992 # expected "BaseOffset")
993 return freq # type: ignore[return-value]
994
995
996def dt64arr_to_periodarr(
997 data, freq, tz=None
998) -> tuple[npt.NDArray[np.int64], BaseOffset]:
999 """
1000 Convert an datetime-like array to values Period ordinals.
1001
1002 Parameters
1003 ----------
1004 data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]]
1005 freq : Optional[Union[str, Tick]]
1006 Must match the `freq` on the `data` if `data` is a DatetimeIndex
1007 or Series.
1008 tz : Optional[tzinfo]
1009
1010 Returns
1011 -------
1012 ordinals : ndarray[int64]
1013 freq : Tick
1014 The frequency extracted from the Series or DatetimeIndex if that's
1015 used.
1016
1017 """
1018 if not isinstance(data.dtype, np.dtype) or data.dtype.kind != "M":
1019 raise ValueError(f"Wrong dtype: {data.dtype}")
1020
1021 if freq is None:
1022 if isinstance(data, ABCIndex):
1023 data, freq = data._values, data.freq
1024 elif isinstance(data, ABCSeries):
1025 data, freq = data._values, data.dt.freq
1026
1027 elif isinstance(data, (ABCIndex, ABCSeries)):
1028 data = data._values
1029
1030 reso = get_unit_from_dtype(data.dtype)
1031 freq = Period._maybe_convert_freq(freq)
1032 base = freq._period_dtype_code
1033 return c_dt64arr_to_periodarr(data.view("i8"), base, tz, reso=reso), freq
1034
1035
1036def _get_ordinal_range(start, end, periods, freq, mult: int = 1):
1037 if com.count_not_none(start, end, periods) != 2:
1038 raise ValueError(
1039 "Of the three parameters: start, end, and periods, "
1040 "exactly two must be specified"
1041 )
1042
1043 if freq is not None:
1044 freq = to_offset(freq)
1045 mult = freq.n
1046
1047 if start is not None:
1048 start = Period(start, freq)
1049 if end is not None:
1050 end = Period(end, freq)
1051
1052 is_start_per = isinstance(start, Period)
1053 is_end_per = isinstance(end, Period)
1054
1055 if is_start_per and is_end_per and start.freq != end.freq:
1056 raise ValueError("start and end must have same freq")
1057 if start is NaT or end is NaT:
1058 raise ValueError("start and end must not be NaT")
1059
1060 if freq is None:
1061 if is_start_per:
1062 freq = start.freq
1063 elif is_end_per:
1064 freq = end.freq
1065 else: # pragma: no cover
1066 raise ValueError("Could not infer freq from start/end")
1067
1068 if periods is not None:
1069 periods = periods * mult
1070 if start is None:
1071 data = np.arange(
1072 end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64
1073 )
1074 else:
1075 data = np.arange(
1076 start.ordinal, start.ordinal + periods, mult, dtype=np.int64
1077 )
1078 else:
1079 data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
1080
1081 return data, freq
1082
1083
1084def _range_from_fields(
1085 year=None,
1086 month=None,
1087 quarter=None,
1088 day=None,
1089 hour=None,
1090 minute=None,
1091 second=None,
1092 freq=None,
1093) -> tuple[np.ndarray, BaseOffset]:
1094 if hour is None:
1095 hour = 0
1096 if minute is None:
1097 minute = 0
1098 if second is None:
1099 second = 0
1100 if day is None:
1101 day = 1
1102
1103 ordinals = []
1104
1105 if quarter is not None:
1106 if freq is None:
1107 freq = to_offset("Q")
1108 base = FreqGroup.FR_QTR.value
1109 else:
1110 freq = to_offset(freq)
1111 base = libperiod.freq_to_dtype_code(freq)
1112 if base != FreqGroup.FR_QTR.value:
1113 raise AssertionError("base must equal FR_QTR")
1114
1115 freqstr = freq.freqstr
1116 year, quarter = _make_field_arrays(year, quarter)
1117 for y, q in zip(year, quarter):
1118 y, m = parsing.quarter_to_myear(y, q, freqstr)
1119 val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base)
1120 ordinals.append(val)
1121 else:
1122 freq = to_offset(freq)
1123 base = libperiod.freq_to_dtype_code(freq)
1124 arrays = _make_field_arrays(year, month, day, hour, minute, second)
1125 for y, mth, d, h, mn, s in zip(*arrays):
1126 ordinals.append(libperiod.period_ordinal(y, mth, d, h, mn, s, 0, 0, base))
1127
1128 return np.array(ordinals, dtype=np.int64), freq
1129
1130
1131def _make_field_arrays(*fields) -> list[np.ndarray]:
1132 length = None
1133 for x in fields:
1134 if isinstance(x, (list, np.ndarray, ABCSeries)):
1135 if length is not None and len(x) != length:
1136 raise ValueError("Mismatched Period array lengths")
1137 if length is None:
1138 length = len(x)
1139
1140 # error: Argument 2 to "repeat" has incompatible type "Optional[int]"; expected
1141 # "Union[Union[int, integer[Any]], Union[bool, bool_], ndarray, Sequence[Union[int,
1142 # integer[Any]]], Sequence[Union[bool, bool_]], Sequence[Sequence[Any]]]"
1143 return [
1144 np.asarray(x)
1145 if isinstance(x, (np.ndarray, list, ABCSeries))
1146 else np.repeat(x, length) # type: ignore[arg-type]
1147 for x in fields
1148 ]