Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arrays/period.py: 25%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

448 statements  

1from __future__ import annotations 

2 

3from datetime import timedelta 

4import operator 

5from typing import ( 

6 TYPE_CHECKING, 

7 Any, 

8 Callable, 

9 Literal, 

10 TypeVar, 

11 cast, 

12 overload, 

13) 

14import warnings 

15 

16import numpy as np 

17 

18from pandas._libs import ( 

19 algos as libalgos, 

20 lib, 

21) 

22from pandas._libs.arrays import NDArrayBacked 

23from pandas._libs.tslibs import ( 

24 BaseOffset, 

25 NaT, 

26 NaTType, 

27 Timedelta, 

28 add_overflowsafe, 

29 astype_overflowsafe, 

30 dt64arr_to_periodarr as c_dt64arr_to_periodarr, 

31 get_unit_from_dtype, 

32 iNaT, 

33 parsing, 

34 period as libperiod, 

35 to_offset, 

36) 

37from pandas._libs.tslibs.dtypes import ( 

38 FreqGroup, 

39 PeriodDtypeBase, 

40 freq_to_period_freqstr, 

41) 

42from pandas._libs.tslibs.fields import isleapyear_arr 

43from pandas._libs.tslibs.offsets import ( 

44 Tick, 

45 delta_to_tick, 

46) 

47from pandas._libs.tslibs.period import ( 

48 DIFFERENT_FREQ, 

49 IncompatibleFrequency, 

50 Period, 

51 get_period_field_arr, 

52 period_asfreq_arr, 

53) 

54from pandas.util._decorators import ( 

55 cache_readonly, 

56 doc, 

57) 

58from pandas.util._exceptions import find_stack_level 

59 

60from pandas.core.dtypes.common import ( 

61 ensure_object, 

62 pandas_dtype, 

63) 

64from pandas.core.dtypes.dtypes import ( 

65 DatetimeTZDtype, 

66 PeriodDtype, 

67) 

68from pandas.core.dtypes.generic import ( 

69 ABCIndex, 

70 ABCPeriodIndex, 

71 ABCSeries, 

72 ABCTimedeltaArray, 

73) 

74from pandas.core.dtypes.missing import isna 

75 

76from pandas.core.arrays import datetimelike as dtl 

77import pandas.core.common as com 

78 

79if TYPE_CHECKING: 

80 from collections.abc import Sequence 

81 

82 from pandas._typing import ( 

83 AnyArrayLike, 

84 Dtype, 

85 FillnaOptions, 

86 NpDtype, 

87 NumpySorter, 

88 NumpyValueArrayLike, 

89 Self, 

90 npt, 

91 ) 

92 

93 from pandas.core.arrays import ( 

94 DatetimeArray, 

95 TimedeltaArray, 

96 ) 

97 from pandas.core.arrays.base import ExtensionArray 

98 

99 

100BaseOffsetT = TypeVar("BaseOffsetT", bound=BaseOffset) 

101 

102 

103_shared_doc_kwargs = { 

104 "klass": "PeriodArray", 

105} 

106 

107 

108def _field_accessor(name: str, docstring: str | None = None): 

109 def f(self): 

110 base = self.dtype._dtype_code 

111 result = get_period_field_arr(name, self.asi8, base) 

112 return result 

113 

114 f.__name__ = name 

115 f.__doc__ = docstring 

116 return property(f) 

117 

118 

119# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is 

120# incompatible with definition in base class "ExtensionArray" 

121class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc] 

122 """ 

123 Pandas ExtensionArray for storing Period data. 

124 

125 Users should use :func:`~pandas.array` to create new instances. 

126 

127 Parameters 

128 ---------- 

129 values : Union[PeriodArray, Series[period], ndarray[int], PeriodIndex] 

130 The data to store. These should be arrays that can be directly 

131 converted to ordinals without inference or copy (PeriodArray, 

132 ndarray[int64]), or a box around such an array (Series[period], 

133 PeriodIndex). 

134 dtype : PeriodDtype, optional 

135 A PeriodDtype instance from which to extract a `freq`. If both 

136 `freq` and `dtype` are specified, then the frequencies must match. 

137 freq : str or DateOffset 

138 The `freq` to use for the array. Mostly applicable when `values` 

139 is an ndarray of integers, when `freq` is required. When `values` 

140 is a PeriodArray (or box around), it's checked that ``values.freq`` 

141 matches `freq`. 

142 copy : bool, default False 

143 Whether to copy the ordinals before storing. 

144 

145 Attributes 

146 ---------- 

147 None 

148 

149 Methods 

150 ------- 

151 None 

152 

153 See Also 

154 -------- 

155 Period: Represents a period of time. 

156 PeriodIndex : Immutable Index for period data. 

157 period_range: Create a fixed-frequency PeriodArray. 

158 array: Construct a pandas array. 

159 

160 Notes 

161 ----- 

162 There are two components to a PeriodArray 

163 

164 - ordinals : integer ndarray 

165 - freq : pd.tseries.offsets.Offset 

166 

167 The values are physically stored as a 1-D ndarray of integers. These are 

168 called "ordinals" and represent some kind of offset from a base. 

169 

170 The `freq` indicates the span covered by each element of the array. 

171 All elements in the PeriodArray have the same `freq`. 

172 

173 Examples 

174 -------- 

175 >>> pd.arrays.PeriodArray(pd.PeriodIndex(['2023-01-01', 

176 ... '2023-01-02'], freq='D')) 

177 <PeriodArray> 

178 ['2023-01-01', '2023-01-02'] 

179 Length: 2, dtype: period[D] 

180 """ 

181 

182 # array priority higher than numpy scalars 

183 __array_priority__ = 1000 

184 _typ = "periodarray" # ABCPeriodArray 

185 _internal_fill_value = np.int64(iNaT) 

186 _recognized_scalars = (Period,) 

187 _is_recognized_dtype = lambda x: isinstance( 

188 x, PeriodDtype 

189 ) # check_compatible_with checks freq match 

190 _infer_matches = ("period",) 

191 

192 @property 

193 def _scalar_type(self) -> type[Period]: 

194 return Period 

195 

196 # Names others delegate to us 

197 _other_ops: list[str] = [] 

198 _bool_ops: list[str] = ["is_leap_year"] 

199 _object_ops: list[str] = ["start_time", "end_time", "freq"] 

200 _field_ops: list[str] = [ 

201 "year", 

202 "month", 

203 "day", 

204 "hour", 

205 "minute", 

206 "second", 

207 "weekofyear", 

208 "weekday", 

209 "week", 

210 "dayofweek", 

211 "day_of_week", 

212 "dayofyear", 

213 "day_of_year", 

214 "quarter", 

215 "qyear", 

216 "days_in_month", 

217 "daysinmonth", 

218 ] 

219 _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops 

220 _datetimelike_methods: list[str] = ["strftime", "to_timestamp", "asfreq"] 

221 

222 _dtype: PeriodDtype 

223 

224 # -------------------------------------------------------------------- 

225 # Constructors 

226 

227 def __init__( 

228 self, values, dtype: Dtype | None = None, freq=None, copy: bool = False 

229 ) -> None: 

230 if freq is not None: 

231 # GH#52462 

232 warnings.warn( 

233 "The 'freq' keyword in the PeriodArray constructor is deprecated " 

234 "and will be removed in a future version. Pass 'dtype' instead", 

235 FutureWarning, 

236 stacklevel=find_stack_level(), 

237 ) 

238 freq = validate_dtype_freq(dtype, freq) 

239 dtype = PeriodDtype(freq) 

240 

241 if dtype is not None: 

242 dtype = pandas_dtype(dtype) 

243 if not isinstance(dtype, PeriodDtype): 

244 raise ValueError(f"Invalid dtype {dtype} for PeriodArray") 

245 

246 if isinstance(values, ABCSeries): 

247 values = values._values 

248 if not isinstance(values, type(self)): 

249 raise TypeError("Incorrect dtype") 

250 

251 elif isinstance(values, ABCPeriodIndex): 

252 values = values._values 

253 

254 if isinstance(values, type(self)): 

255 if dtype is not None and dtype != values.dtype: 

256 raise raise_on_incompatible(values, dtype.freq) 

257 values, dtype = values._ndarray, values.dtype 

258 

259 if not copy: 

260 values = np.asarray(values, dtype="int64") 

261 else: 

262 values = np.array(values, dtype="int64", copy=copy) 

263 if dtype is None: 

264 raise ValueError("dtype is not specified and cannot be inferred") 

265 dtype = cast(PeriodDtype, dtype) 

266 NDArrayBacked.__init__(self, values, dtype) 

267 

268 # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked" 

269 @classmethod 

270 def _simple_new( # type: ignore[override] 

271 cls, 

272 values: npt.NDArray[np.int64], 

273 dtype: PeriodDtype, 

274 ) -> Self: 

275 # alias for PeriodArray.__init__ 

276 assertion_msg = "Should be numpy array of type i8" 

277 assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg 

278 return cls(values, dtype=dtype) 

279 

280 @classmethod 

281 def _from_sequence( 

282 cls, 

283 scalars, 

284 *, 

285 dtype: Dtype | None = None, 

286 copy: bool = False, 

287 ) -> Self: 

288 if dtype is not None: 

289 dtype = pandas_dtype(dtype) 

290 if dtype and isinstance(dtype, PeriodDtype): 

291 freq = dtype.freq 

292 else: 

293 freq = None 

294 

295 if isinstance(scalars, cls): 

296 validate_dtype_freq(scalars.dtype, freq) 

297 if copy: 

298 scalars = scalars.copy() 

299 return scalars 

300 

301 periods = np.asarray(scalars, dtype=object) 

302 

303 freq = freq or libperiod.extract_freq(periods) 

304 ordinals = libperiod.extract_ordinals(periods, freq) 

305 dtype = PeriodDtype(freq) 

306 return cls(ordinals, dtype=dtype) 

307 

308 @classmethod 

309 def _from_sequence_of_strings( 

310 cls, strings, *, dtype: Dtype | None = None, copy: bool = False 

311 ) -> Self: 

312 return cls._from_sequence(strings, dtype=dtype, copy=copy) 

313 

314 @classmethod 

315 def _from_datetime64(cls, data, freq, tz=None) -> Self: 

316 """ 

317 Construct a PeriodArray from a datetime64 array 

318 

319 Parameters 

320 ---------- 

321 data : ndarray[datetime64[ns], datetime64[ns, tz]] 

322 freq : str or Tick 

323 tz : tzinfo, optional 

324 

325 Returns 

326 ------- 

327 PeriodArray[freq] 

328 """ 

329 if isinstance(freq, BaseOffset): 

330 freq = freq_to_period_freqstr(freq.n, freq.name) 

331 data, freq = dt64arr_to_periodarr(data, freq, tz) 

332 dtype = PeriodDtype(freq) 

333 return cls(data, dtype=dtype) 

334 

335 @classmethod 

336 def _generate_range(cls, start, end, periods, freq): 

337 periods = dtl.validate_periods(periods) 

338 

339 if freq is not None: 

340 freq = Period._maybe_convert_freq(freq) 

341 

342 if start is not None or end is not None: 

343 subarr, freq = _get_ordinal_range(start, end, periods, freq) 

344 else: 

345 raise ValueError("Not enough parameters to construct Period range") 

346 

347 return subarr, freq 

348 

349 @classmethod 

350 def _from_fields(cls, *, fields: dict, freq) -> Self: 

351 subarr, freq = _range_from_fields(freq=freq, **fields) 

352 dtype = PeriodDtype(freq) 

353 return cls._simple_new(subarr, dtype=dtype) 

354 

355 # ----------------------------------------------------------------- 

356 # DatetimeLike Interface 

357 

358 # error: Argument 1 of "_unbox_scalar" is incompatible with supertype 

359 # "DatetimeLikeArrayMixin"; supertype defines the argument type as 

360 # "Union[Union[Period, Any, Timedelta], NaTType]" 

361 def _unbox_scalar( # type: ignore[override] 

362 self, 

363 value: Period | NaTType, 

364 ) -> np.int64: 

365 if value is NaT: 

366 # error: Item "Period" of "Union[Period, NaTType]" has no attribute "value" 

367 return np.int64(value._value) # type: ignore[union-attr] 

368 elif isinstance(value, self._scalar_type): 

369 self._check_compatible_with(value) 

370 return np.int64(value.ordinal) 

371 else: 

372 raise ValueError(f"'value' should be a Period. Got '{value}' instead.") 

373 

374 def _scalar_from_string(self, value: str) -> Period: 

375 return Period(value, freq=self.freq) 

376 

377 # error: Argument 1 of "_check_compatible_with" is incompatible with 

378 # supertype "DatetimeLikeArrayMixin"; supertype defines the argument type 

379 # as "Period | Timestamp | Timedelta | NaTType" 

380 def _check_compatible_with(self, other: Period | NaTType | PeriodArray) -> None: # type: ignore[override] 

381 if other is NaT: 

382 return 

383 # error: Item "NaTType" of "Period | NaTType | PeriodArray" has no 

384 # attribute "freq" 

385 self._require_matching_freq(other.freq) # type: ignore[union-attr] 

386 

387 # -------------------------------------------------------------------- 

388 # Data / Attributes 

389 

390 @cache_readonly 

391 def dtype(self) -> PeriodDtype: 

392 return self._dtype 

393 

394 # error: Cannot override writeable attribute with read-only property 

395 @property # type: ignore[override] 

396 def freq(self) -> BaseOffset: 

397 """ 

398 Return the frequency object for this PeriodArray. 

399 """ 

400 return self.dtype.freq 

401 

402 @property 

403 def freqstr(self) -> str: 

404 return freq_to_period_freqstr(self.freq.n, self.freq.name) 

405 

406 def __array__( 

407 self, dtype: NpDtype | None = None, copy: bool | None = None 

408 ) -> np.ndarray: 

409 if dtype == "i8": 

410 return self.asi8 

411 elif dtype == bool: 

412 return ~self._isnan 

413 

414 # This will raise TypeError for non-object dtypes 

415 return np.array(list(self), dtype=object) 

416 

417 def __arrow_array__(self, type=None): 

418 """ 

419 Convert myself into a pyarrow Array. 

420 """ 

421 import pyarrow 

422 

423 from pandas.core.arrays.arrow.extension_types import ArrowPeriodType 

424 

425 if type is not None: 

426 if pyarrow.types.is_integer(type): 

427 return pyarrow.array(self._ndarray, mask=self.isna(), type=type) 

428 elif isinstance(type, ArrowPeriodType): 

429 # ensure we have the same freq 

430 if self.freqstr != type.freq: 

431 raise TypeError( 

432 "Not supported to convert PeriodArray to array with different " 

433 f"'freq' ({self.freqstr} vs {type.freq})" 

434 ) 

435 else: 

436 raise TypeError( 

437 f"Not supported to convert PeriodArray to '{type}' type" 

438 ) 

439 

440 period_type = ArrowPeriodType(self.freqstr) 

441 storage_array = pyarrow.array(self._ndarray, mask=self.isna(), type="int64") 

442 return pyarrow.ExtensionArray.from_storage(period_type, storage_array) 

443 

444 # -------------------------------------------------------------------- 

445 # Vectorized analogues of Period properties 

446 

447 year = _field_accessor( 

448 "year", 

449 """ 

450 The year of the period. 

451 

452 Examples 

453 -------- 

454 >>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y") 

455 >>> idx.year 

456 Index([2023, 2024, 2025], dtype='int64') 

457 """, 

458 ) 

459 month = _field_accessor( 

460 "month", 

461 """ 

462 The month as January=1, December=12. 

463 

464 Examples 

465 -------- 

466 >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M") 

467 >>> idx.month 

468 Index([1, 2, 3], dtype='int64') 

469 """, 

470 ) 

471 day = _field_accessor( 

472 "day", 

473 """ 

474 The days of the period. 

475 

476 Examples 

477 -------- 

478 >>> idx = pd.PeriodIndex(['2020-01-31', '2020-02-28'], freq='D') 

479 >>> idx.day 

480 Index([31, 28], dtype='int64') 

481 """, 

482 ) 

483 hour = _field_accessor( 

484 "hour", 

485 """ 

486 The hour of the period. 

487 

488 Examples 

489 -------- 

490 >>> idx = pd.PeriodIndex(["2023-01-01 10:00", "2023-01-01 11:00"], freq='h') 

491 >>> idx.hour 

492 Index([10, 11], dtype='int64') 

493 """, 

494 ) 

495 minute = _field_accessor( 

496 "minute", 

497 """ 

498 The minute of the period. 

499 

500 Examples 

501 -------- 

502 >>> idx = pd.PeriodIndex(["2023-01-01 10:30:00", 

503 ... "2023-01-01 11:50:00"], freq='min') 

504 >>> idx.minute 

505 Index([30, 50], dtype='int64') 

506 """, 

507 ) 

508 second = _field_accessor( 

509 "second", 

510 """ 

511 The second of the period. 

512 

513 Examples 

514 -------- 

515 >>> idx = pd.PeriodIndex(["2023-01-01 10:00:30", 

516 ... "2023-01-01 10:00:31"], freq='s') 

517 >>> idx.second 

518 Index([30, 31], dtype='int64') 

519 """, 

520 ) 

521 weekofyear = _field_accessor( 

522 "week", 

523 """ 

524 The week ordinal of the year. 

525 

526 Examples 

527 -------- 

528 >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M") 

529 >>> idx.week # It can be written `weekofyear` 

530 Index([5, 9, 13], dtype='int64') 

531 """, 

532 ) 

533 week = weekofyear 

534 day_of_week = _field_accessor( 

535 "day_of_week", 

536 """ 

537 The day of the week with Monday=0, Sunday=6. 

538 

539 Examples 

540 -------- 

541 >>> idx = pd.PeriodIndex(["2023-01-01", "2023-01-02", "2023-01-03"], freq="D") 

542 >>> idx.weekday 

543 Index([6, 0, 1], dtype='int64') 

544 """, 

545 ) 

546 dayofweek = day_of_week 

547 weekday = dayofweek 

548 dayofyear = day_of_year = _field_accessor( 

549 "day_of_year", 

550 """ 

551 The ordinal day of the year. 

552 

553 Examples 

554 -------- 

555 >>> idx = pd.PeriodIndex(["2023-01-10", "2023-02-01", "2023-03-01"], freq="D") 

556 >>> idx.dayofyear 

557 Index([10, 32, 60], dtype='int64') 

558 

559 >>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y") 

560 >>> idx 

561 PeriodIndex(['2023', '2024', '2025'], dtype='period[Y-DEC]') 

562 >>> idx.dayofyear 

563 Index([365, 366, 365], dtype='int64') 

564 """, 

565 ) 

566 quarter = _field_accessor( 

567 "quarter", 

568 """ 

569 The quarter of the date. 

570 

571 Examples 

572 -------- 

573 >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M") 

574 >>> idx.quarter 

575 Index([1, 1, 1], dtype='int64') 

576 """, 

577 ) 

578 qyear = _field_accessor("qyear") 

579 days_in_month = _field_accessor( 

580 "days_in_month", 

581 """ 

582 The number of days in the month. 

583 

584 Examples 

585 -------- 

586 For Series: 

587 

588 >>> period = pd.period_range('2020-1-1 00:00', '2020-3-1 00:00', freq='M') 

589 >>> s = pd.Series(period) 

590 >>> s 

591 0 2020-01 

592 1 2020-02 

593 2 2020-03 

594 dtype: period[M] 

595 >>> s.dt.days_in_month 

596 0 31 

597 1 29 

598 2 31 

599 dtype: int64 

600 

601 For PeriodIndex: 

602 

603 >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M") 

604 >>> idx.days_in_month # It can be also entered as `daysinmonth` 

605 Index([31, 28, 31], dtype='int64') 

606 """, 

607 ) 

608 daysinmonth = days_in_month 

609 

610 @property 

611 def is_leap_year(self) -> npt.NDArray[np.bool_]: 

612 """ 

613 Logical indicating if the date belongs to a leap year. 

614 

615 Examples 

616 -------- 

617 >>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y") 

618 >>> idx.is_leap_year 

619 array([False, True, False]) 

620 """ 

621 return isleapyear_arr(np.asarray(self.year)) 

622 

623 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray: 

624 """ 

625 Cast to DatetimeArray/Index. 

626 

627 Parameters 

628 ---------- 

629 freq : str or DateOffset, optional 

630 Target frequency. The default is 'D' for week or longer, 

631 's' otherwise. 

632 how : {'s', 'e', 'start', 'end'} 

633 Whether to use the start or end of the time period being converted. 

634 

635 Returns 

636 ------- 

637 DatetimeArray/Index 

638 

639 Examples 

640 -------- 

641 >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M") 

642 >>> idx.to_timestamp() 

643 DatetimeIndex(['2023-01-01', '2023-02-01', '2023-03-01'], 

644 dtype='datetime64[ns]', freq='MS') 

645 """ 

646 from pandas.core.arrays import DatetimeArray 

647 

648 how = libperiod.validate_end_alias(how) 

649 

650 end = how == "E" 

651 if end: 

652 if freq == "B" or self.freq == "B": 

653 # roll forward to ensure we land on B date 

654 adjust = Timedelta(1, "D") - Timedelta(1, "ns") 

655 return self.to_timestamp(how="start") + adjust 

656 else: 

657 adjust = Timedelta(1, "ns") 

658 return (self + self.freq).to_timestamp(how="start") - adjust 

659 

660 if freq is None: 

661 freq_code = self._dtype._get_to_timestamp_base() 

662 dtype = PeriodDtypeBase(freq_code, 1) 

663 freq = dtype._freqstr 

664 base = freq_code 

665 else: 

666 freq = Period._maybe_convert_freq(freq) 

667 base = freq._period_dtype_code 

668 

669 new_parr = self.asfreq(freq, how=how) 

670 

671 new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base) 

672 dta = DatetimeArray._from_sequence(new_data) 

673 

674 if self.freq.name == "B": 

675 # See if we can retain BDay instead of Day in cases where 

676 # len(self) is too small for infer_freq to distinguish between them 

677 diffs = libalgos.unique_deltas(self.asi8) 

678 if len(diffs) == 1: 

679 diff = diffs[0] 

680 if diff == self.dtype._n: 

681 dta._freq = self.freq 

682 elif diff == 1: 

683 dta._freq = self.freq.base 

684 # TODO: other cases? 

685 return dta 

686 else: 

687 return dta._with_freq("infer") 

688 

689 # -------------------------------------------------------------------- 

690 

691 def _box_func(self, x) -> Period | NaTType: 

692 return Period._from_ordinal(ordinal=x, freq=self.freq) 

693 

694 @doc(**_shared_doc_kwargs, other="PeriodIndex", other_name="PeriodIndex") 

695 def asfreq(self, freq=None, how: str = "E") -> Self: 

696 """ 

697 Convert the {klass} to the specified frequency `freq`. 

698 

699 Equivalent to applying :meth:`pandas.Period.asfreq` with the given arguments 

700 to each :class:`~pandas.Period` in this {klass}. 

701 

702 Parameters 

703 ---------- 

704 freq : str 

705 A frequency. 

706 how : str {{'E', 'S'}}, default 'E' 

707 Whether the elements should be aligned to the end 

708 or start within pa period. 

709 

710 * 'E', 'END', or 'FINISH' for end, 

711 * 'S', 'START', or 'BEGIN' for start. 

712 

713 January 31st ('END') vs. January 1st ('START') for example. 

714 

715 Returns 

716 ------- 

717 {klass} 

718 The transformed {klass} with the new frequency. 

719 

720 See Also 

721 -------- 

722 {other}.asfreq: Convert each Period in a {other_name} to the given frequency. 

723 Period.asfreq : Convert a :class:`~pandas.Period` object to the given frequency. 

724 

725 Examples 

726 -------- 

727 >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='Y') 

728 >>> pidx 

729 PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'], 

730 dtype='period[Y-DEC]') 

731 

732 >>> pidx.asfreq('M') 

733 PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12', 

734 '2015-12'], dtype='period[M]') 

735 

736 >>> pidx.asfreq('M', how='S') 

737 PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01', 

738 '2015-01'], dtype='period[M]') 

739 """ 

740 how = libperiod.validate_end_alias(how) 

741 if isinstance(freq, BaseOffset) and hasattr(freq, "_period_dtype_code"): 

742 freq = PeriodDtype(freq)._freqstr 

743 freq = Period._maybe_convert_freq(freq) 

744 

745 base1 = self._dtype._dtype_code 

746 base2 = freq._period_dtype_code 

747 

748 asi8 = self.asi8 

749 # self.freq.n can't be negative or 0 

750 end = how == "E" 

751 if end: 

752 ordinal = asi8 + self.dtype._n - 1 

753 else: 

754 ordinal = asi8 

755 

756 new_data = period_asfreq_arr(ordinal, base1, base2, end) 

757 

758 if self._hasna: 

759 new_data[self._isnan] = iNaT 

760 

761 dtype = PeriodDtype(freq) 

762 return type(self)(new_data, dtype=dtype) 

763 

764 # ------------------------------------------------------------------ 

765 # Rendering Methods 

766 

767 def _formatter(self, boxed: bool = False): 

768 if boxed: 

769 return str 

770 return "'{}'".format 

771 

772 def _format_native_types( 

773 self, *, na_rep: str | float = "NaT", date_format=None, **kwargs 

774 ) -> npt.NDArray[np.object_]: 

775 """ 

776 actually format my specific types 

777 """ 

778 return libperiod.period_array_strftime( 

779 self.asi8, self.dtype._dtype_code, na_rep, date_format 

780 ) 

781 

782 # ------------------------------------------------------------------ 

783 

784 def astype(self, dtype, copy: bool = True): 

785 # We handle Period[T] -> Period[U] 

786 # Our parent handles everything else. 

787 dtype = pandas_dtype(dtype) 

788 if dtype == self._dtype: 

789 if not copy: 

790 return self 

791 else: 

792 return self.copy() 

793 if isinstance(dtype, PeriodDtype): 

794 return self.asfreq(dtype.freq) 

795 

796 if lib.is_np_dtype(dtype, "M") or isinstance(dtype, DatetimeTZDtype): 

797 # GH#45038 match PeriodIndex behavior. 

798 tz = getattr(dtype, "tz", None) 

799 unit = dtl.dtype_to_unit(dtype) 

800 return self.to_timestamp().tz_localize(tz).as_unit(unit) 

801 

802 return super().astype(dtype, copy=copy) 

803 

804 def searchsorted( 

805 self, 

806 value: NumpyValueArrayLike | ExtensionArray, 

807 side: Literal["left", "right"] = "left", 

808 sorter: NumpySorter | None = None, 

809 ) -> npt.NDArray[np.intp] | np.intp: 

810 npvalue = self._validate_setitem_value(value).view("M8[ns]") 

811 

812 # Cast to M8 to get datetime-like NaT placement, 

813 # similar to dtl._period_dispatch 

814 m8arr = self._ndarray.view("M8[ns]") 

815 return m8arr.searchsorted(npvalue, side=side, sorter=sorter) 

816 

817 def _pad_or_backfill( 

818 self, 

819 *, 

820 method: FillnaOptions, 

821 limit: int | None = None, 

822 limit_area: Literal["inside", "outside"] | None = None, 

823 copy: bool = True, 

824 ) -> Self: 

825 # view as dt64 so we get treated as timelike in core.missing, 

826 # similar to dtl._period_dispatch 

827 dta = self.view("M8[ns]") 

828 result = dta._pad_or_backfill( 

829 method=method, limit=limit, limit_area=limit_area, copy=copy 

830 ) 

831 if copy: 

832 return cast("Self", result.view(self.dtype)) 

833 else: 

834 return self 

835 

836 def fillna( 

837 self, value=None, method=None, limit: int | None = None, copy: bool = True 

838 ) -> Self: 

839 if method is not None: 

840 # view as dt64 so we get treated as timelike in core.missing, 

841 # similar to dtl._period_dispatch 

842 dta = self.view("M8[ns]") 

843 result = dta.fillna(value=value, method=method, limit=limit, copy=copy) 

844 # error: Incompatible return value type (got "Union[ExtensionArray, 

845 # ndarray[Any, Any]]", expected "PeriodArray") 

846 return result.view(self.dtype) # type: ignore[return-value] 

847 return super().fillna(value=value, method=method, limit=limit, copy=copy) 

848 

849 # ------------------------------------------------------------------ 

850 # Arithmetic Methods 

851 

852 def _addsub_int_array_or_scalar( 

853 self, other: np.ndarray | int, op: Callable[[Any, Any], Any] 

854 ) -> Self: 

855 """ 

856 Add or subtract array of integers. 

857 

858 Parameters 

859 ---------- 

860 other : np.ndarray[int64] or int 

861 op : {operator.add, operator.sub} 

862 

863 Returns 

864 ------- 

865 result : PeriodArray 

866 """ 

867 assert op in [operator.add, operator.sub] 

868 if op is operator.sub: 

869 other = -other 

870 res_values = add_overflowsafe(self.asi8, np.asarray(other, dtype="i8")) 

871 return type(self)(res_values, dtype=self.dtype) 

872 

873 def _add_offset(self, other: BaseOffset): 

874 assert not isinstance(other, Tick) 

875 

876 self._require_matching_freq(other, base=True) 

877 return self._addsub_int_array_or_scalar(other.n, operator.add) 

878 

879 # TODO: can we de-duplicate with Period._add_timedeltalike_scalar? 

880 def _add_timedeltalike_scalar(self, other): 

881 """ 

882 Parameters 

883 ---------- 

884 other : timedelta, Tick, np.timedelta64 

885 

886 Returns 

887 ------- 

888 PeriodArray 

889 """ 

890 if not isinstance(self.freq, Tick): 

891 # We cannot add timedelta-like to non-tick PeriodArray 

892 raise raise_on_incompatible(self, other) 

893 

894 if isna(other): 

895 # i.e. np.timedelta64("NaT") 

896 return super()._add_timedeltalike_scalar(other) 

897 

898 td = np.asarray(Timedelta(other).asm8) 

899 return self._add_timedelta_arraylike(td) 

900 

901 def _add_timedelta_arraylike( 

902 self, other: TimedeltaArray | npt.NDArray[np.timedelta64] 

903 ) -> Self: 

904 """ 

905 Parameters 

906 ---------- 

907 other : TimedeltaArray or ndarray[timedelta64] 

908 

909 Returns 

910 ------- 

911 PeriodArray 

912 """ 

913 if not self.dtype._is_tick_like(): 

914 # We cannot add timedelta-like to non-tick PeriodArray 

915 raise TypeError( 

916 f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}" 

917 ) 

918 

919 dtype = np.dtype(f"m8[{self.dtype._td64_unit}]") 

920 

921 # Similar to _check_timedeltalike_freq_compat, but we raise with a 

922 # more specific exception message if necessary. 

923 try: 

924 delta = astype_overflowsafe( 

925 np.asarray(other), dtype=dtype, copy=False, round_ok=False 

926 ) 

927 except ValueError as err: 

928 # e.g. if we have minutes freq and try to add 30s 

929 # "Cannot losslessly convert units" 

930 raise IncompatibleFrequency( 

931 "Cannot add/subtract timedelta-like from PeriodArray that is " 

932 "not an integer multiple of the PeriodArray's freq." 

933 ) from err 

934 

935 res_values = add_overflowsafe(self.asi8, np.asarray(delta.view("i8"))) 

936 return type(self)(res_values, dtype=self.dtype) 

937 

938 def _check_timedeltalike_freq_compat(self, other): 

939 """ 

940 Arithmetic operations with timedelta-like scalars or array `other` 

941 are only valid if `other` is an integer multiple of `self.freq`. 

942 If the operation is valid, find that integer multiple. Otherwise, 

943 raise because the operation is invalid. 

944 

945 Parameters 

946 ---------- 

947 other : timedelta, np.timedelta64, Tick, 

948 ndarray[timedelta64], TimedeltaArray, TimedeltaIndex 

949 

950 Returns 

951 ------- 

952 multiple : int or ndarray[int64] 

953 

954 Raises 

955 ------ 

956 IncompatibleFrequency 

957 """ 

958 assert self.dtype._is_tick_like() # checked by calling function 

959 

960 dtype = np.dtype(f"m8[{self.dtype._td64_unit}]") 

961 

962 if isinstance(other, (timedelta, np.timedelta64, Tick)): 

963 td = np.asarray(Timedelta(other).asm8) 

964 else: 

965 td = np.asarray(other) 

966 

967 try: 

968 delta = astype_overflowsafe(td, dtype=dtype, copy=False, round_ok=False) 

969 except ValueError as err: 

970 raise raise_on_incompatible(self, other) from err 

971 

972 delta = delta.view("i8") 

973 return lib.item_from_zerodim(delta) 

974 

975 

976def raise_on_incompatible(left, right) -> IncompatibleFrequency: 

977 """ 

978 Helper function to render a consistent error message when raising 

979 IncompatibleFrequency. 

980 

981 Parameters 

982 ---------- 

983 left : PeriodArray 

984 right : None, DateOffset, Period, ndarray, or timedelta-like 

985 

986 Returns 

987 ------- 

988 IncompatibleFrequency 

989 Exception to be raised by the caller. 

990 """ 

991 # GH#24283 error message format depends on whether right is scalar 

992 if isinstance(right, (np.ndarray, ABCTimedeltaArray)) or right is None: 

993 other_freq = None 

994 elif isinstance(right, BaseOffset): 

995 other_freq = freq_to_period_freqstr(right.n, right.name) 

996 elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period)): 

997 other_freq = right.freqstr 

998 else: 

999 other_freq = delta_to_tick(Timedelta(right)).freqstr 

1000 

1001 own_freq = freq_to_period_freqstr(left.freq.n, left.freq.name) 

1002 msg = DIFFERENT_FREQ.format( 

1003 cls=type(left).__name__, own_freq=own_freq, other_freq=other_freq 

1004 ) 

1005 return IncompatibleFrequency(msg) 

1006 

1007 

1008# ------------------------------------------------------------------- 

1009# Constructor Helpers 

1010 

1011 

1012def period_array( 

1013 data: Sequence[Period | str | None] | AnyArrayLike, 

1014 freq: str | Tick | BaseOffset | None = None, 

1015 copy: bool = False, 

1016) -> PeriodArray: 

1017 """ 

1018 Construct a new PeriodArray from a sequence of Period scalars. 

1019 

1020 Parameters 

1021 ---------- 

1022 data : Sequence of Period objects 

1023 A sequence of Period objects. These are required to all have 

1024 the same ``freq.`` Missing values can be indicated by ``None`` 

1025 or ``pandas.NaT``. 

1026 freq : str, Tick, or Offset 

1027 The frequency of every element of the array. This can be specified 

1028 to avoid inferring the `freq` from `data`. 

1029 copy : bool, default False 

1030 Whether to ensure a copy of the data is made. 

1031 

1032 Returns 

1033 ------- 

1034 PeriodArray 

1035 

1036 See Also 

1037 -------- 

1038 PeriodArray 

1039 pandas.PeriodIndex 

1040 

1041 Examples 

1042 -------- 

1043 >>> period_array([pd.Period('2017', freq='Y'), 

1044 ... pd.Period('2018', freq='Y')]) 

1045 <PeriodArray> 

1046 ['2017', '2018'] 

1047 Length: 2, dtype: period[Y-DEC] 

1048 

1049 >>> period_array([pd.Period('2017', freq='Y'), 

1050 ... pd.Period('2018', freq='Y'), 

1051 ... pd.NaT]) 

1052 <PeriodArray> 

1053 ['2017', '2018', 'NaT'] 

1054 Length: 3, dtype: period[Y-DEC] 

1055 

1056 Integers that look like years are handled 

1057 

1058 >>> period_array([2000, 2001, 2002], freq='D') 

1059 <PeriodArray> 

1060 ['2000-01-01', '2001-01-01', '2002-01-01'] 

1061 Length: 3, dtype: period[D] 

1062 

1063 Datetime-like strings may also be passed 

1064 

1065 >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q') 

1066 <PeriodArray> 

1067 ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] 

1068 Length: 4, dtype: period[Q-DEC] 

1069 """ 

1070 data_dtype = getattr(data, "dtype", None) 

1071 

1072 if lib.is_np_dtype(data_dtype, "M"): 

1073 return PeriodArray._from_datetime64(data, freq) 

1074 if isinstance(data_dtype, PeriodDtype): 

1075 out = PeriodArray(data) 

1076 if freq is not None: 

1077 if freq == data_dtype.freq: 

1078 return out 

1079 return out.asfreq(freq) 

1080 return out 

1081 

1082 # other iterable of some kind 

1083 if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)): 

1084 data = list(data) 

1085 

1086 arrdata = np.asarray(data) 

1087 

1088 dtype: PeriodDtype | None 

1089 if freq: 

1090 dtype = PeriodDtype(freq) 

1091 else: 

1092 dtype = None 

1093 

1094 if arrdata.dtype.kind == "f" and len(arrdata) > 0: 

1095 raise TypeError("PeriodIndex does not allow floating point in construction") 

1096 

1097 if arrdata.dtype.kind in "iu": 

1098 arr = arrdata.astype(np.int64, copy=False) 

1099 # error: Argument 2 to "from_ordinals" has incompatible type "Union[str, 

1100 # Tick, None]"; expected "Union[timedelta, BaseOffset, str]" 

1101 ordinals = libperiod.from_ordinals(arr, freq) # type: ignore[arg-type] 

1102 return PeriodArray(ordinals, dtype=dtype) 

1103 

1104 data = ensure_object(arrdata) 

1105 if freq is None: 

1106 freq = libperiod.extract_freq(data) 

1107 dtype = PeriodDtype(freq) 

1108 return PeriodArray._from_sequence(data, dtype=dtype) 

1109 

1110 

1111@overload 

1112def validate_dtype_freq(dtype, freq: BaseOffsetT) -> BaseOffsetT: 

1113 ... 

1114 

1115 

1116@overload 

1117def validate_dtype_freq(dtype, freq: timedelta | str | None) -> BaseOffset: 

1118 ... 

1119 

1120 

1121def validate_dtype_freq( 

1122 dtype, freq: BaseOffsetT | BaseOffset | timedelta | str | None 

1123) -> BaseOffsetT: 

1124 """ 

1125 If both a dtype and a freq are available, ensure they match. If only 

1126 dtype is available, extract the implied freq. 

1127 

1128 Parameters 

1129 ---------- 

1130 dtype : dtype 

1131 freq : DateOffset or None 

1132 

1133 Returns 

1134 ------- 

1135 freq : DateOffset 

1136 

1137 Raises 

1138 ------ 

1139 ValueError : non-period dtype 

1140 IncompatibleFrequency : mismatch between dtype and freq 

1141 """ 

1142 if freq is not None: 

1143 freq = to_offset(freq, is_period=True) 

1144 

1145 if dtype is not None: 

1146 dtype = pandas_dtype(dtype) 

1147 if not isinstance(dtype, PeriodDtype): 

1148 raise ValueError("dtype must be PeriodDtype") 

1149 if freq is None: 

1150 freq = dtype.freq 

1151 elif freq != dtype.freq: 

1152 raise IncompatibleFrequency("specified freq and dtype are different") 

1153 # error: Incompatible return value type (got "Union[BaseOffset, Any, None]", 

1154 # expected "BaseOffset") 

1155 return freq # type: ignore[return-value] 

1156 

1157 

1158def dt64arr_to_periodarr( 

1159 data, freq, tz=None 

1160) -> tuple[npt.NDArray[np.int64], BaseOffset]: 

1161 """ 

1162 Convert an datetime-like array to values Period ordinals. 

1163 

1164 Parameters 

1165 ---------- 

1166 data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]] 

1167 freq : Optional[Union[str, Tick]] 

1168 Must match the `freq` on the `data` if `data` is a DatetimeIndex 

1169 or Series. 

1170 tz : Optional[tzinfo] 

1171 

1172 Returns 

1173 ------- 

1174 ordinals : ndarray[int64] 

1175 freq : Tick 

1176 The frequency extracted from the Series or DatetimeIndex if that's 

1177 used. 

1178 

1179 """ 

1180 if not isinstance(data.dtype, np.dtype) or data.dtype.kind != "M": 

1181 raise ValueError(f"Wrong dtype: {data.dtype}") 

1182 

1183 if freq is None: 

1184 if isinstance(data, ABCIndex): 

1185 data, freq = data._values, data.freq 

1186 elif isinstance(data, ABCSeries): 

1187 data, freq = data._values, data.dt.freq 

1188 

1189 elif isinstance(data, (ABCIndex, ABCSeries)): 

1190 data = data._values 

1191 

1192 reso = get_unit_from_dtype(data.dtype) 

1193 freq = Period._maybe_convert_freq(freq) 

1194 base = freq._period_dtype_code 

1195 return c_dt64arr_to_periodarr(data.view("i8"), base, tz, reso=reso), freq 

1196 

1197 

1198def _get_ordinal_range(start, end, periods, freq, mult: int = 1): 

1199 if com.count_not_none(start, end, periods) != 2: 

1200 raise ValueError( 

1201 "Of the three parameters: start, end, and periods, " 

1202 "exactly two must be specified" 

1203 ) 

1204 

1205 if freq is not None: 

1206 freq = to_offset(freq, is_period=True) 

1207 mult = freq.n 

1208 

1209 if start is not None: 

1210 start = Period(start, freq) 

1211 if end is not None: 

1212 end = Period(end, freq) 

1213 

1214 is_start_per = isinstance(start, Period) 

1215 is_end_per = isinstance(end, Period) 

1216 

1217 if is_start_per and is_end_per and start.freq != end.freq: 

1218 raise ValueError("start and end must have same freq") 

1219 if start is NaT or end is NaT: 

1220 raise ValueError("start and end must not be NaT") 

1221 

1222 if freq is None: 

1223 if is_start_per: 

1224 freq = start.freq 

1225 elif is_end_per: 

1226 freq = end.freq 

1227 else: # pragma: no cover 

1228 raise ValueError("Could not infer freq from start/end") 

1229 mult = freq.n 

1230 

1231 if periods is not None: 

1232 periods = periods * mult 

1233 if start is None: 

1234 data = np.arange( 

1235 end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64 

1236 ) 

1237 else: 

1238 data = np.arange( 

1239 start.ordinal, start.ordinal + periods, mult, dtype=np.int64 

1240 ) 

1241 else: 

1242 data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64) 

1243 

1244 return data, freq 

1245 

1246 

1247def _range_from_fields( 

1248 year=None, 

1249 month=None, 

1250 quarter=None, 

1251 day=None, 

1252 hour=None, 

1253 minute=None, 

1254 second=None, 

1255 freq=None, 

1256) -> tuple[np.ndarray, BaseOffset]: 

1257 if hour is None: 

1258 hour = 0 

1259 if minute is None: 

1260 minute = 0 

1261 if second is None: 

1262 second = 0 

1263 if day is None: 

1264 day = 1 

1265 

1266 ordinals = [] 

1267 

1268 if quarter is not None: 

1269 if freq is None: 

1270 freq = to_offset("Q", is_period=True) 

1271 base = FreqGroup.FR_QTR.value 

1272 else: 

1273 freq = to_offset(freq, is_period=True) 

1274 base = libperiod.freq_to_dtype_code(freq) 

1275 if base != FreqGroup.FR_QTR.value: 

1276 raise AssertionError("base must equal FR_QTR") 

1277 

1278 freqstr = freq.freqstr 

1279 year, quarter = _make_field_arrays(year, quarter) 

1280 for y, q in zip(year, quarter): 

1281 calendar_year, calendar_month = parsing.quarter_to_myear(y, q, freqstr) 

1282 val = libperiod.period_ordinal( 

1283 calendar_year, calendar_month, 1, 1, 1, 1, 0, 0, base 

1284 ) 

1285 ordinals.append(val) 

1286 else: 

1287 freq = to_offset(freq, is_period=True) 

1288 base = libperiod.freq_to_dtype_code(freq) 

1289 arrays = _make_field_arrays(year, month, day, hour, minute, second) 

1290 for y, mth, d, h, mn, s in zip(*arrays): 

1291 ordinals.append(libperiod.period_ordinal(y, mth, d, h, mn, s, 0, 0, base)) 

1292 

1293 return np.array(ordinals, dtype=np.int64), freq 

1294 

1295 

1296def _make_field_arrays(*fields) -> list[np.ndarray]: 

1297 length = None 

1298 for x in fields: 

1299 if isinstance(x, (list, np.ndarray, ABCSeries)): 

1300 if length is not None and len(x) != length: 

1301 raise ValueError("Mismatched Period array lengths") 

1302 if length is None: 

1303 length = len(x) 

1304 

1305 # error: Argument 2 to "repeat" has incompatible type "Optional[int]"; expected 

1306 # "Union[Union[int, integer[Any]], Union[bool, bool_], ndarray, Sequence[Union[int, 

1307 # integer[Any]]], Sequence[Union[bool, bool_]], Sequence[Sequence[Any]]]" 

1308 return [ 

1309 np.asarray(x) 

1310 if isinstance(x, (np.ndarray, list, ABCSeries)) 

1311 else np.repeat(x, length) # type: ignore[arg-type] 

1312 for x in fields 

1313 ]