Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/period.py: 36%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

191 statements  

1from __future__ import annotations 

2 

3from datetime import ( 

4 datetime, 

5 timedelta, 

6) 

7from typing import Hashable 

8 

9import numpy as np 

10 

11from pandas._libs import index as libindex 

12from pandas._libs.tslibs import ( 

13 BaseOffset, 

14 NaT, 

15 Period, 

16 Resolution, 

17 Tick, 

18) 

19from pandas._typing import ( 

20 Dtype, 

21 DtypeObj, 

22 npt, 

23) 

24from pandas.util._decorators import ( 

25 cache_readonly, 

26 doc, 

27) 

28 

29from pandas.core.dtypes.common import is_integer 

30from pandas.core.dtypes.dtypes import PeriodDtype 

31from pandas.core.dtypes.generic import ABCSeries 

32from pandas.core.dtypes.missing import is_valid_na_for_dtype 

33 

34from pandas.core.arrays.period import ( 

35 PeriodArray, 

36 period_array, 

37 raise_on_incompatible, 

38 validate_dtype_freq, 

39) 

40import pandas.core.common as com 

41import pandas.core.indexes.base as ibase 

42from pandas.core.indexes.base import maybe_extract_name 

43from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin 

44from pandas.core.indexes.datetimes import ( 

45 DatetimeIndex, 

46 Index, 

47) 

48from pandas.core.indexes.extension import inherit_names 

49 

50_index_doc_kwargs = dict(ibase._index_doc_kwargs) 

51_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"}) 

52_shared_doc_kwargs = { 

53 "klass": "PeriodArray", 

54} 

55 

56# --- Period index sketch 

57 

58 

59def _new_PeriodIndex(cls, **d): 

60 # GH13277 for unpickling 

61 values = d.pop("data") 

62 if values.dtype == "int64": 

63 freq = d.pop("freq", None) 

64 values = PeriodArray(values, freq=freq) 

65 return cls._simple_new(values, **d) 

66 else: 

67 return cls(values, **d) 

68 

69 

70@inherit_names( 

71 ["strftime", "start_time", "end_time"] + PeriodArray._field_ops, 

72 PeriodArray, 

73 wrap=True, 

74) 

75@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray) 

76class PeriodIndex(DatetimeIndexOpsMixin): 

77 """ 

78 Immutable ndarray holding ordinal values indicating regular periods in time. 

79 

80 Index keys are boxed to Period objects which carries the metadata (eg, 

81 frequency information). 

82 

83 Parameters 

84 ---------- 

85 data : array-like (1d int np.ndarray or PeriodArray), optional 

86 Optional period-like data to construct index with. 

87 copy : bool 

88 Make a copy of input ndarray. 

89 freq : str or period object, optional 

90 One of pandas period strings or corresponding objects. 

91 year : int, array, or Series, default None 

92 month : int, array, or Series, default None 

93 quarter : int, array, or Series, default None 

94 day : int, array, or Series, default None 

95 hour : int, array, or Series, default None 

96 minute : int, array, or Series, default None 

97 second : int, array, or Series, default None 

98 dtype : str or PeriodDtype, default None 

99 

100 Attributes 

101 ---------- 

102 day 

103 dayofweek 

104 day_of_week 

105 dayofyear 

106 day_of_year 

107 days_in_month 

108 daysinmonth 

109 end_time 

110 freq 

111 freqstr 

112 hour 

113 is_leap_year 

114 minute 

115 month 

116 quarter 

117 qyear 

118 second 

119 start_time 

120 week 

121 weekday 

122 weekofyear 

123 year 

124 

125 Methods 

126 ------- 

127 asfreq 

128 strftime 

129 to_timestamp 

130 

131 See Also 

132 -------- 

133 Index : The base pandas Index type. 

134 Period : Represents a period of time. 

135 DatetimeIndex : Index with datetime64 data. 

136 TimedeltaIndex : Index of timedelta64 data. 

137 period_range : Create a fixed-frequency PeriodIndex. 

138 

139 Examples 

140 -------- 

141 >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3]) 

142 >>> idx 

143 PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]') 

144 """ 

145 

146 _typ = "periodindex" 

147 

148 _data: PeriodArray 

149 freq: BaseOffset 

150 dtype: PeriodDtype 

151 

152 _data_cls = PeriodArray 

153 _supports_partial_string_indexing = True 

154 

155 @property 

156 def _engine_type(self) -> type[libindex.PeriodEngine]: 

157 return libindex.PeriodEngine 

158 

159 @cache_readonly 

160 def _resolution_obj(self) -> Resolution: 

161 # for compat with DatetimeIndex 

162 return self.dtype._resolution_obj 

163 

164 # -------------------------------------------------------------------- 

165 # methods that dispatch to array and wrap result in Index 

166 # These are defined here instead of via inherit_names for mypy 

167 

168 @doc( 

169 PeriodArray.asfreq, 

170 other="pandas.arrays.PeriodArray", 

171 other_name="PeriodArray", 

172 **_shared_doc_kwargs, 

173 ) 

174 def asfreq(self, freq=None, how: str = "E") -> PeriodIndex: 

175 arr = self._data.asfreq(freq, how) 

176 return type(self)._simple_new(arr, name=self.name) 

177 

178 @doc(PeriodArray.to_timestamp) 

179 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex: 

180 arr = self._data.to_timestamp(freq, how) 

181 return DatetimeIndex._simple_new(arr, name=self.name) 

182 

183 @property 

184 @doc(PeriodArray.hour.fget) 

185 def hour(self) -> Index: 

186 return Index(self._data.hour, name=self.name) 

187 

188 @property 

189 @doc(PeriodArray.minute.fget) 

190 def minute(self) -> Index: 

191 return Index(self._data.minute, name=self.name) 

192 

193 @property 

194 @doc(PeriodArray.second.fget) 

195 def second(self) -> Index: 

196 return Index(self._data.second, name=self.name) 

197 

198 # ------------------------------------------------------------------------ 

199 # Index Constructors 

200 

201 def __new__( 

202 cls, 

203 data=None, 

204 ordinal=None, 

205 freq=None, 

206 dtype: Dtype | None = None, 

207 copy: bool = False, 

208 name: Hashable = None, 

209 **fields, 

210 ) -> PeriodIndex: 

211 valid_field_set = { 

212 "year", 

213 "month", 

214 "day", 

215 "quarter", 

216 "hour", 

217 "minute", 

218 "second", 

219 } 

220 

221 refs = None 

222 if not copy and isinstance(data, (Index, ABCSeries)): 

223 refs = data._references 

224 

225 if not set(fields).issubset(valid_field_set): 

226 argument = list(set(fields) - valid_field_set)[0] 

227 raise TypeError(f"__new__() got an unexpected keyword argument {argument}") 

228 

229 name = maybe_extract_name(name, data, cls) 

230 

231 if data is None and ordinal is None: 

232 # range-based. 

233 if not fields: 

234 # test_pickle_compat_construction 

235 cls._raise_scalar_data_error(None) 

236 

237 data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields) 

238 # PeriodArray._generate range does validation that fields is 

239 # empty when really using the range-based constructor. 

240 freq = freq2 

241 

242 data = PeriodArray(data, freq=freq) 

243 else: 

244 freq = validate_dtype_freq(dtype, freq) 

245 

246 # PeriodIndex allow PeriodIndex(period_index, freq=different) 

247 # Let's not encourage that kind of behavior in PeriodArray. 

248 

249 if freq and isinstance(data, cls) and data.freq != freq: 

250 # TODO: We can do some of these with no-copy / coercion? 

251 # e.g. D -> 2D seems to be OK 

252 data = data.asfreq(freq) 

253 

254 if data is None and ordinal is not None: 

255 # we strangely ignore `ordinal` if data is passed. 

256 ordinal = np.asarray(ordinal, dtype=np.int64) 

257 data = PeriodArray(ordinal, freq=freq) 

258 else: 

259 # don't pass copy here, since we copy later. 

260 data = period_array(data=data, freq=freq) 

261 

262 if copy: 

263 data = data.copy() 

264 

265 return cls._simple_new(data, name=name, refs=refs) 

266 

267 # ------------------------------------------------------------------------ 

268 # Data 

269 

270 @property 

271 def values(self) -> np.ndarray: 

272 return np.asarray(self, dtype=object) 

273 

274 def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]: 

275 """ 

276 Convert timedelta-like input to an integer multiple of self.freq 

277 

278 Parameters 

279 ---------- 

280 other : timedelta, np.timedelta64, DateOffset, int, np.ndarray 

281 

282 Returns 

283 ------- 

284 converted : int, np.ndarray[int64] 

285 

286 Raises 

287 ------ 

288 IncompatibleFrequency : if the input cannot be written as a multiple 

289 of self.freq. Note IncompatibleFrequency subclasses ValueError. 

290 """ 

291 if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): 

292 if isinstance(self.freq, Tick): 

293 # _check_timedeltalike_freq_compat will raise if incompatible 

294 delta = self._data._check_timedeltalike_freq_compat(other) 

295 return delta 

296 elif isinstance(other, BaseOffset): 

297 if other.base == self.freq.base: 

298 return other.n 

299 

300 raise raise_on_incompatible(self, other) 

301 elif is_integer(other): 

302 # integer is passed to .shift via 

303 # _add_datetimelike_methods basically 

304 # but ufunc may pass integer to _add_delta 

305 return other 

306 

307 # raise when input doesn't have freq 

308 raise raise_on_incompatible(self, None) 

309 

310 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

311 """ 

312 Can we compare values of the given dtype to our own? 

313 """ 

314 if not isinstance(dtype, PeriodDtype): 

315 return False 

316 # For the subset of DateOffsets that can be a dtype.freq, it 

317 # suffices (and is much faster) to compare the dtype_code rather than 

318 # the freq itself. 

319 # See also: PeriodDtype.__eq__ 

320 freq = dtype.freq 

321 own_freq = self.freq 

322 return ( 

323 freq._period_dtype_code 

324 # error: "BaseOffset" has no attribute "_period_dtype_code" 

325 == own_freq._period_dtype_code # type: ignore[attr-defined] 

326 and freq.n == own_freq.n 

327 ) 

328 

329 # ------------------------------------------------------------------------ 

330 # Index Methods 

331 

332 def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray: 

333 """ 

334 where : array of timestamps 

335 mask : np.ndarray[bool] 

336 Array of booleans where data is not NA. 

337 """ 

338 if isinstance(where, DatetimeIndex): 

339 where = PeriodIndex(where._values, freq=self.freq) 

340 elif not isinstance(where, PeriodIndex): 

341 raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex") 

342 

343 return super().asof_locs(where, mask) 

344 

345 @property 

346 def is_full(self) -> bool: 

347 """ 

348 Returns True if this PeriodIndex is range-like in that all Periods 

349 between start and end are present, in order. 

350 """ 

351 if len(self) == 0: 

352 return True 

353 if not self.is_monotonic_increasing: 

354 raise ValueError("Index is not monotonic") 

355 values = self.asi8 

356 return bool(((values[1:] - values[:-1]) < 2).all()) 

357 

358 @property 

359 def inferred_type(self) -> str: 

360 # b/c data is represented as ints make sure we can't have ambiguous 

361 # indexing 

362 return "period" 

363 

364 # ------------------------------------------------------------------------ 

365 # Indexing Methods 

366 

367 def _convert_tolerance(self, tolerance, target): 

368 # Returned tolerance must be in dtype/units so that 

369 # `|self._get_engine_target() - target._engine_target()| <= tolerance` 

370 # is meaningful. Since PeriodIndex returns int64 for engine_target, 

371 # we may need to convert timedelta64 tolerance to int64. 

372 tolerance = super()._convert_tolerance(tolerance, target) 

373 

374 if self.dtype == target.dtype: 

375 # convert tolerance to i8 

376 tolerance = self._maybe_convert_timedelta(tolerance) 

377 

378 return tolerance 

379 

380 def get_loc(self, key): 

381 """ 

382 Get integer location for requested label. 

383 

384 Parameters 

385 ---------- 

386 key : Period, NaT, str, or datetime 

387 String or datetime key must be parsable as Period. 

388 

389 Returns 

390 ------- 

391 loc : int or ndarray[int64] 

392 

393 Raises 

394 ------ 

395 KeyError 

396 Key is not present in the index. 

397 TypeError 

398 If key is listlike or otherwise not hashable. 

399 """ 

400 orig_key = key 

401 

402 self._check_indexing_error(key) 

403 

404 if is_valid_na_for_dtype(key, self.dtype): 

405 key = NaT 

406 

407 elif isinstance(key, str): 

408 try: 

409 parsed, reso = self._parse_with_reso(key) 

410 except ValueError as err: 

411 # A string with invalid format 

412 raise KeyError(f"Cannot interpret '{key}' as period") from err 

413 

414 if self._can_partial_date_slice(reso): 

415 try: 

416 return self._partial_date_slice(reso, parsed) 

417 except KeyError as err: 

418 raise KeyError(key) from err 

419 

420 if reso == self._resolution_obj: 

421 # the reso < self._resolution_obj case goes 

422 # through _get_string_slice 

423 key = self._cast_partial_indexing_scalar(parsed) 

424 else: 

425 raise KeyError(key) 

426 

427 elif isinstance(key, Period): 

428 self._disallow_mismatched_indexing(key) 

429 

430 elif isinstance(key, datetime): 

431 key = self._cast_partial_indexing_scalar(key) 

432 

433 else: 

434 # in particular integer, which Period constructor would cast to string 

435 raise KeyError(key) 

436 

437 try: 

438 return Index.get_loc(self, key) 

439 except KeyError as err: 

440 raise KeyError(orig_key) from err 

441 

442 def _disallow_mismatched_indexing(self, key: Period) -> None: 

443 sfreq = self.freq 

444 kfreq = key.freq 

445 if not ( 

446 sfreq.n == kfreq.n 

447 # error: "BaseOffset" has no attribute "_period_dtype_code" 

448 and sfreq._period_dtype_code # type: ignore[attr-defined] 

449 # error: "BaseOffset" has no attribute "_period_dtype_code" 

450 == kfreq._period_dtype_code # type: ignore[attr-defined] 

451 ): 

452 # GH#42247 For the subset of DateOffsets that can be Period freqs, 

453 # checking these two attributes is sufficient to check equality, 

454 # and much more performant than `self.freq == key.freq` 

455 raise KeyError(key) 

456 

457 def _cast_partial_indexing_scalar(self, label: datetime) -> Period: 

458 try: 

459 period = Period(label, freq=self.freq) 

460 except ValueError as err: 

461 # we cannot construct the Period 

462 raise KeyError(label) from err 

463 return period 

464 

465 @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound) 

466 def _maybe_cast_slice_bound(self, label, side: str): 

467 if isinstance(label, datetime): 

468 label = self._cast_partial_indexing_scalar(label) 

469 

470 return super()._maybe_cast_slice_bound(label, side) 

471 

472 def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): 

473 iv = Period(parsed, freq=reso.attr_abbrev) 

474 return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end")) 

475 

476 @doc(DatetimeIndexOpsMixin.shift) 

477 def shift(self, periods: int = 1, freq=None): 

478 if freq is not None: 

479 raise TypeError( 

480 f"`freq` argument is not supported for {type(self).__name__}.shift" 

481 ) 

482 return self + periods 

483 

484 

485def period_range( 

486 start=None, end=None, periods: int | None = None, freq=None, name=None 

487) -> PeriodIndex: 

488 """ 

489 Return a fixed frequency PeriodIndex. 

490 

491 The day (calendar) is the default frequency. 

492 

493 Parameters 

494 ---------- 

495 start : str or period-like, default None 

496 Left bound for generating periods. 

497 end : str or period-like, default None 

498 Right bound for generating periods. 

499 periods : int, default None 

500 Number of periods to generate. 

501 freq : str or DateOffset, optional 

502 Frequency alias. By default the freq is taken from `start` or `end` 

503 if those are Period objects. Otherwise, the default is ``"D"`` for 

504 daily frequency. 

505 name : str, default None 

506 Name of the resulting PeriodIndex. 

507 

508 Returns 

509 ------- 

510 PeriodIndex 

511 

512 Notes 

513 ----- 

514 Of the three parameters: ``start``, ``end``, and ``periods``, exactly two 

515 must be specified. 

516 

517 To learn more about the frequency strings, please see `this link 

518 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

519 

520 Examples 

521 -------- 

522 >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') 

523 PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', 

524 '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', 

525 '2018-01'], 

526 dtype='period[M]') 

527 

528 If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor 

529 endpoints for a ``PeriodIndex`` with frequency matching that of the 

530 ``period_range`` constructor. 

531 

532 >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), 

533 ... end=pd.Period('2017Q2', freq='Q'), freq='M') 

534 PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], 

535 dtype='period[M]') 

536 """ 

537 if com.count_not_none(start, end, periods) != 2: 

538 raise ValueError( 

539 "Of the three parameters: start, end, and periods, " 

540 "exactly two must be specified" 

541 ) 

542 if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)): 

543 freq = "D" 

544 

545 data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={}) 

546 data = PeriodArray(data, freq=freq) 

547 return PeriodIndex(data, name=name)