Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/period.py: 35%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

216 statements  

1from __future__ import annotations 

2 

3from datetime import ( 

4 datetime, 

5 timedelta, 

6) 

7from typing import TYPE_CHECKING 

8import warnings 

9 

10import numpy as np 

11 

12from pandas._libs import index as libindex 

13from pandas._libs.tslibs import ( 

14 BaseOffset, 

15 NaT, 

16 Period, 

17 Resolution, 

18 Tick, 

19) 

20from pandas._libs.tslibs.dtypes import OFFSET_TO_PERIOD_FREQSTR 

21from pandas.util._decorators import ( 

22 cache_readonly, 

23 doc, 

24) 

25from pandas.util._exceptions import find_stack_level 

26 

27from pandas.core.dtypes.common import is_integer 

28from pandas.core.dtypes.dtypes import PeriodDtype 

29from pandas.core.dtypes.generic import ABCSeries 

30from pandas.core.dtypes.missing import is_valid_na_for_dtype 

31 

32from pandas.core.arrays.period import ( 

33 PeriodArray, 

34 period_array, 

35 raise_on_incompatible, 

36 validate_dtype_freq, 

37) 

38import pandas.core.common as com 

39import pandas.core.indexes.base as ibase 

40from pandas.core.indexes.base import maybe_extract_name 

41from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin 

42from pandas.core.indexes.datetimes import ( 

43 DatetimeIndex, 

44 Index, 

45) 

46from pandas.core.indexes.extension import inherit_names 

47 

48if TYPE_CHECKING: 

49 from collections.abc import Hashable 

50 

51 from pandas._typing import ( 

52 Dtype, 

53 DtypeObj, 

54 Self, 

55 npt, 

56 ) 

57 

58 

59_index_doc_kwargs = dict(ibase._index_doc_kwargs) 

60_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"}) 

61_shared_doc_kwargs = { 

62 "klass": "PeriodArray", 

63} 

64 

65# --- Period index sketch 

66 

67 

68def _new_PeriodIndex(cls, **d): 

69 # GH13277 for unpickling 

70 values = d.pop("data") 

71 if values.dtype == "int64": 

72 freq = d.pop("freq", None) 

73 dtype = PeriodDtype(freq) 

74 values = PeriodArray(values, dtype=dtype) 

75 return cls._simple_new(values, **d) 

76 else: 

77 return cls(values, **d) 

78 

79 

80@inherit_names( 

81 ["strftime", "start_time", "end_time"] + PeriodArray._field_ops, 

82 PeriodArray, 

83 wrap=True, 

84) 

85@inherit_names(["is_leap_year"], PeriodArray) 

86class PeriodIndex(DatetimeIndexOpsMixin): 

87 """ 

88 Immutable ndarray holding ordinal values indicating regular periods in time. 

89 

90 Index keys are boxed to Period objects which carries the metadata (eg, 

91 frequency information). 

92 

93 Parameters 

94 ---------- 

95 data : array-like (1d int np.ndarray or PeriodArray), optional 

96 Optional period-like data to construct index with. 

97 copy : bool 

98 Make a copy of input ndarray. 

99 freq : str or period object, optional 

100 One of pandas period strings or corresponding objects. 

101 year : int, array, or Series, default None 

102 

103 .. deprecated:: 2.2.0 

104 Use PeriodIndex.from_fields instead. 

105 month : int, array, or Series, default None 

106 

107 .. deprecated:: 2.2.0 

108 Use PeriodIndex.from_fields instead. 

109 quarter : int, array, or Series, default None 

110 

111 .. deprecated:: 2.2.0 

112 Use PeriodIndex.from_fields instead. 

113 day : int, array, or Series, default None 

114 

115 .. deprecated:: 2.2.0 

116 Use PeriodIndex.from_fields instead. 

117 hour : int, array, or Series, default None 

118 

119 .. deprecated:: 2.2.0 

120 Use PeriodIndex.from_fields instead. 

121 minute : int, array, or Series, default None 

122 

123 .. deprecated:: 2.2.0 

124 Use PeriodIndex.from_fields instead. 

125 second : int, array, or Series, default None 

126 

127 .. deprecated:: 2.2.0 

128 Use PeriodIndex.from_fields instead. 

129 dtype : str or PeriodDtype, default None 

130 

131 Attributes 

132 ---------- 

133 day 

134 dayofweek 

135 day_of_week 

136 dayofyear 

137 day_of_year 

138 days_in_month 

139 daysinmonth 

140 end_time 

141 freq 

142 freqstr 

143 hour 

144 is_leap_year 

145 minute 

146 month 

147 quarter 

148 qyear 

149 second 

150 start_time 

151 week 

152 weekday 

153 weekofyear 

154 year 

155 

156 Methods 

157 ------- 

158 asfreq 

159 strftime 

160 to_timestamp 

161 from_fields 

162 from_ordinals 

163 

164 See Also 

165 -------- 

166 Index : The base pandas Index type. 

167 Period : Represents a period of time. 

168 DatetimeIndex : Index with datetime64 data. 

169 TimedeltaIndex : Index of timedelta64 data. 

170 period_range : Create a fixed-frequency PeriodIndex. 

171 

172 Examples 

173 -------- 

174 >>> idx = pd.PeriodIndex.from_fields(year=[2000, 2002], quarter=[1, 3]) 

175 >>> idx 

176 PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]') 

177 """ 

178 

179 _typ = "periodindex" 

180 

181 _data: PeriodArray 

182 freq: BaseOffset 

183 dtype: PeriodDtype 

184 

185 _data_cls = PeriodArray 

186 _supports_partial_string_indexing = True 

187 

188 @property 

189 def _engine_type(self) -> type[libindex.PeriodEngine]: 

190 return libindex.PeriodEngine 

191 

192 @cache_readonly 

193 def _resolution_obj(self) -> Resolution: 

194 # for compat with DatetimeIndex 

195 return self.dtype._resolution_obj 

196 

197 # -------------------------------------------------------------------- 

198 # methods that dispatch to array and wrap result in Index 

199 # These are defined here instead of via inherit_names for mypy 

200 

201 @doc( 

202 PeriodArray.asfreq, 

203 other="pandas.arrays.PeriodArray", 

204 other_name="PeriodArray", 

205 **_shared_doc_kwargs, 

206 ) 

207 def asfreq(self, freq=None, how: str = "E") -> Self: 

208 arr = self._data.asfreq(freq, how) 

209 return type(self)._simple_new(arr, name=self.name) 

210 

211 @doc(PeriodArray.to_timestamp) 

212 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex: 

213 arr = self._data.to_timestamp(freq, how) 

214 return DatetimeIndex._simple_new(arr, name=self.name) 

215 

216 @property 

217 @doc(PeriodArray.hour.fget) 

218 def hour(self) -> Index: 

219 return Index(self._data.hour, name=self.name) 

220 

221 @property 

222 @doc(PeriodArray.minute.fget) 

223 def minute(self) -> Index: 

224 return Index(self._data.minute, name=self.name) 

225 

226 @property 

227 @doc(PeriodArray.second.fget) 

228 def second(self) -> Index: 

229 return Index(self._data.second, name=self.name) 

230 

231 # ------------------------------------------------------------------------ 

232 # Index Constructors 

233 

234 def __new__( 

235 cls, 

236 data=None, 

237 ordinal=None, 

238 freq=None, 

239 dtype: Dtype | None = None, 

240 copy: bool = False, 

241 name: Hashable | None = None, 

242 **fields, 

243 ) -> Self: 

244 valid_field_set = { 

245 "year", 

246 "month", 

247 "day", 

248 "quarter", 

249 "hour", 

250 "minute", 

251 "second", 

252 } 

253 

254 refs = None 

255 if not copy and isinstance(data, (Index, ABCSeries)): 

256 refs = data._references 

257 

258 if not set(fields).issubset(valid_field_set): 

259 argument = next(iter(set(fields) - valid_field_set)) 

260 raise TypeError(f"__new__() got an unexpected keyword argument {argument}") 

261 elif len(fields): 

262 # GH#55960 

263 warnings.warn( 

264 "Constructing PeriodIndex from fields is deprecated. Use " 

265 "PeriodIndex.from_fields instead.", 

266 FutureWarning, 

267 stacklevel=find_stack_level(), 

268 ) 

269 

270 if ordinal is not None: 

271 # GH#55960 

272 warnings.warn( 

273 "The 'ordinal' keyword in PeriodIndex is deprecated and will " 

274 "be removed in a future version. Use PeriodIndex.from_ordinals " 

275 "instead.", 

276 FutureWarning, 

277 stacklevel=find_stack_level(), 

278 ) 

279 

280 name = maybe_extract_name(name, data, cls) 

281 

282 if data is None and ordinal is None: 

283 # range-based. 

284 if not fields: 

285 # test_pickle_compat_construction 

286 cls._raise_scalar_data_error(None) 

287 data = cls.from_fields(**fields, freq=freq)._data 

288 copy = False 

289 

290 elif fields: 

291 if data is not None: 

292 raise ValueError("Cannot pass both data and fields") 

293 raise ValueError("Cannot pass both ordinal and fields") 

294 

295 else: 

296 freq = validate_dtype_freq(dtype, freq) 

297 

298 # PeriodIndex allow PeriodIndex(period_index, freq=different) 

299 # Let's not encourage that kind of behavior in PeriodArray. 

300 

301 if freq and isinstance(data, cls) and data.freq != freq: 

302 # TODO: We can do some of these with no-copy / coercion? 

303 # e.g. D -> 2D seems to be OK 

304 data = data.asfreq(freq) 

305 

306 if data is None and ordinal is not None: 

307 ordinal = np.asarray(ordinal, dtype=np.int64) 

308 dtype = PeriodDtype(freq) 

309 data = PeriodArray(ordinal, dtype=dtype) 

310 elif data is not None and ordinal is not None: 

311 raise ValueError("Cannot pass both data and ordinal") 

312 else: 

313 # don't pass copy here, since we copy later. 

314 data = period_array(data=data, freq=freq) 

315 

316 if copy: 

317 data = data.copy() 

318 

319 return cls._simple_new(data, name=name, refs=refs) 

320 

321 @classmethod 

322 def from_fields( 

323 cls, 

324 *, 

325 year=None, 

326 quarter=None, 

327 month=None, 

328 day=None, 

329 hour=None, 

330 minute=None, 

331 second=None, 

332 freq=None, 

333 ) -> Self: 

334 fields = { 

335 "year": year, 

336 "quarter": quarter, 

337 "month": month, 

338 "day": day, 

339 "hour": hour, 

340 "minute": minute, 

341 "second": second, 

342 } 

343 fields = {key: value for key, value in fields.items() if value is not None} 

344 arr = PeriodArray._from_fields(fields=fields, freq=freq) 

345 return cls._simple_new(arr) 

346 

347 @classmethod 

348 def from_ordinals(cls, ordinals, *, freq, name=None) -> Self: 

349 ordinals = np.asarray(ordinals, dtype=np.int64) 

350 dtype = PeriodDtype(freq) 

351 data = PeriodArray._simple_new(ordinals, dtype=dtype) 

352 return cls._simple_new(data, name=name) 

353 

354 # ------------------------------------------------------------------------ 

355 # Data 

356 

357 @property 

358 def values(self) -> npt.NDArray[np.object_]: 

359 return np.asarray(self, dtype=object) 

360 

361 def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]: 

362 """ 

363 Convert timedelta-like input to an integer multiple of self.freq 

364 

365 Parameters 

366 ---------- 

367 other : timedelta, np.timedelta64, DateOffset, int, np.ndarray 

368 

369 Returns 

370 ------- 

371 converted : int, np.ndarray[int64] 

372 

373 Raises 

374 ------ 

375 IncompatibleFrequency : if the input cannot be written as a multiple 

376 of self.freq. Note IncompatibleFrequency subclasses ValueError. 

377 """ 

378 if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): 

379 if isinstance(self.freq, Tick): 

380 # _check_timedeltalike_freq_compat will raise if incompatible 

381 delta = self._data._check_timedeltalike_freq_compat(other) 

382 return delta 

383 elif isinstance(other, BaseOffset): 

384 if other.base == self.freq.base: 

385 return other.n 

386 

387 raise raise_on_incompatible(self, other) 

388 elif is_integer(other): 

389 assert isinstance(other, int) 

390 return other 

391 

392 # raise when input doesn't have freq 

393 raise raise_on_incompatible(self, None) 

394 

395 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

396 """ 

397 Can we compare values of the given dtype to our own? 

398 """ 

399 return self.dtype == dtype 

400 

401 # ------------------------------------------------------------------------ 

402 # Index Methods 

403 

404 def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray: 

405 """ 

406 where : array of timestamps 

407 mask : np.ndarray[bool] 

408 Array of booleans where data is not NA. 

409 """ 

410 if isinstance(where, DatetimeIndex): 

411 where = PeriodIndex(where._values, freq=self.freq) 

412 elif not isinstance(where, PeriodIndex): 

413 raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex") 

414 

415 return super().asof_locs(where, mask) 

416 

417 @property 

418 def is_full(self) -> bool: 

419 """ 

420 Returns True if this PeriodIndex is range-like in that all Periods 

421 between start and end are present, in order. 

422 """ 

423 if len(self) == 0: 

424 return True 

425 if not self.is_monotonic_increasing: 

426 raise ValueError("Index is not monotonic") 

427 values = self.asi8 

428 return bool(((values[1:] - values[:-1]) < 2).all()) 

429 

430 @property 

431 def inferred_type(self) -> str: 

432 # b/c data is represented as ints make sure we can't have ambiguous 

433 # indexing 

434 return "period" 

435 

436 # ------------------------------------------------------------------------ 

437 # Indexing Methods 

438 

439 def _convert_tolerance(self, tolerance, target): 

440 # Returned tolerance must be in dtype/units so that 

441 # `|self._get_engine_target() - target._engine_target()| <= tolerance` 

442 # is meaningful. Since PeriodIndex returns int64 for engine_target, 

443 # we may need to convert timedelta64 tolerance to int64. 

444 tolerance = super()._convert_tolerance(tolerance, target) 

445 

446 if self.dtype == target.dtype: 

447 # convert tolerance to i8 

448 tolerance = self._maybe_convert_timedelta(tolerance) 

449 

450 return tolerance 

451 

452 def get_loc(self, key): 

453 """ 

454 Get integer location for requested label. 

455 

456 Parameters 

457 ---------- 

458 key : Period, NaT, str, or datetime 

459 String or datetime key must be parsable as Period. 

460 

461 Returns 

462 ------- 

463 loc : int or ndarray[int64] 

464 

465 Raises 

466 ------ 

467 KeyError 

468 Key is not present in the index. 

469 TypeError 

470 If key is listlike or otherwise not hashable. 

471 """ 

472 orig_key = key 

473 

474 self._check_indexing_error(key) 

475 

476 if is_valid_na_for_dtype(key, self.dtype): 

477 key = NaT 

478 

479 elif isinstance(key, str): 

480 try: 

481 parsed, reso = self._parse_with_reso(key) 

482 except ValueError as err: 

483 # A string with invalid format 

484 raise KeyError(f"Cannot interpret '{key}' as period") from err 

485 

486 if self._can_partial_date_slice(reso): 

487 try: 

488 return self._partial_date_slice(reso, parsed) 

489 except KeyError as err: 

490 raise KeyError(key) from err 

491 

492 if reso == self._resolution_obj: 

493 # the reso < self._resolution_obj case goes 

494 # through _get_string_slice 

495 key = self._cast_partial_indexing_scalar(parsed) 

496 else: 

497 raise KeyError(key) 

498 

499 elif isinstance(key, Period): 

500 self._disallow_mismatched_indexing(key) 

501 

502 elif isinstance(key, datetime): 

503 key = self._cast_partial_indexing_scalar(key) 

504 

505 else: 

506 # in particular integer, which Period constructor would cast to string 

507 raise KeyError(key) 

508 

509 try: 

510 return Index.get_loc(self, key) 

511 except KeyError as err: 

512 raise KeyError(orig_key) from err 

513 

514 def _disallow_mismatched_indexing(self, key: Period) -> None: 

515 if key._dtype != self.dtype: 

516 raise KeyError(key) 

517 

518 def _cast_partial_indexing_scalar(self, label: datetime) -> Period: 

519 try: 

520 period = Period(label, freq=self.freq) 

521 except ValueError as err: 

522 # we cannot construct the Period 

523 raise KeyError(label) from err 

524 return period 

525 

526 @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound) 

527 def _maybe_cast_slice_bound(self, label, side: str): 

528 if isinstance(label, datetime): 

529 label = self._cast_partial_indexing_scalar(label) 

530 

531 return super()._maybe_cast_slice_bound(label, side) 

532 

533 def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): 

534 freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev) 

535 iv = Period(parsed, freq=freq) 

536 return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end")) 

537 

538 @doc(DatetimeIndexOpsMixin.shift) 

539 def shift(self, periods: int = 1, freq=None) -> Self: 

540 if freq is not None: 

541 raise TypeError( 

542 f"`freq` argument is not supported for {type(self).__name__}.shift" 

543 ) 

544 return self + periods 

545 

546 

547def period_range( 

548 start=None, 

549 end=None, 

550 periods: int | None = None, 

551 freq=None, 

552 name: Hashable | None = None, 

553) -> PeriodIndex: 

554 """ 

555 Return a fixed frequency PeriodIndex. 

556 

557 The day (calendar) is the default frequency. 

558 

559 Parameters 

560 ---------- 

561 start : str, datetime, date, pandas.Timestamp, or period-like, default None 

562 Left bound for generating periods. 

563 end : str, datetime, date, pandas.Timestamp, or period-like, default None 

564 Right bound for generating periods. 

565 periods : int, default None 

566 Number of periods to generate. 

567 freq : str or DateOffset, optional 

568 Frequency alias. By default the freq is taken from `start` or `end` 

569 if those are Period objects. Otherwise, the default is ``"D"`` for 

570 daily frequency. 

571 name : str, default None 

572 Name of the resulting PeriodIndex. 

573 

574 Returns 

575 ------- 

576 PeriodIndex 

577 

578 Notes 

579 ----- 

580 Of the three parameters: ``start``, ``end``, and ``periods``, exactly two 

581 must be specified. 

582 

583 To learn more about the frequency strings, please see `this link 

584 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

585 

586 Examples 

587 -------- 

588 >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') 

589 PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', 

590 '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', 

591 '2018-01'], 

592 dtype='period[M]') 

593 

594 If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor 

595 endpoints for a ``PeriodIndex`` with frequency matching that of the 

596 ``period_range`` constructor. 

597 

598 >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), 

599 ... end=pd.Period('2017Q2', freq='Q'), freq='M') 

600 PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], 

601 dtype='period[M]') 

602 """ 

603 if com.count_not_none(start, end, periods) != 2: 

604 raise ValueError( 

605 "Of the three parameters: start, end, and periods, " 

606 "exactly two must be specified" 

607 ) 

608 if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)): 

609 freq = "D" 

610 

611 data, freq = PeriodArray._generate_range(start, end, periods, freq) 

612 dtype = PeriodDtype(freq) 

613 data = PeriodArray(data, dtype=dtype) 

614 return PeriodIndex(data, name=name)