Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/period.py: 36%

1from __future__ import annotations

3from datetime import (

4 datetime,

5 timedelta,

7from typing import Hashable

9import numpy as np

11from pandas._libs import index as libindex

12from pandas._libs.tslibs import (

13 BaseOffset,

14 NaT,

15 Period,

16 Resolution,

17 Tick,

18)

19from pandas._typing import (

20 Dtype,

21 DtypeObj,

22 npt,

23)

24from pandas.util._decorators import (

25 cache_readonly,

26 doc,

27)

29from pandas.core.dtypes.common import is_integer

30from pandas.core.dtypes.dtypes import PeriodDtype

31from pandas.core.dtypes.generic import ABCSeries

32from pandas.core.dtypes.missing import is_valid_na_for_dtype

34from pandas.core.arrays.period import (

35 PeriodArray,

36 period_array,

37 raise_on_incompatible,

38 validate_dtype_freq,

39)

40import pandas.core.common as com

41import pandas.core.indexes.base as ibase

42from pandas.core.indexes.base import maybe_extract_name

43from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin

44from pandas.core.indexes.datetimes import (

45 DatetimeIndex,

46 Index,

47)

48from pandas.core.indexes.extension import inherit_names

50_index_doc_kwargs = dict(ibase._index_doc_kwargs)

51_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})

52_shared_doc_kwargs = {

53 "klass": "PeriodArray",

54}

56# --- Period index sketch

59def _new_PeriodIndex(cls, **d):

60 # GH13277 for unpickling

61 values = d.pop("data")

62 if values.dtype == "int64":

63 freq = d.pop("freq", None)

64 values = PeriodArray(values, freq=freq)

65 return cls._simple_new(values, **d)

66 else:

67 return cls(values, **d)

70@inherit_names(

71 ["strftime", "start_time", "end_time"] + PeriodArray._field_ops,

72 PeriodArray,

73 wrap=True,

74)

75@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)

76class PeriodIndex(DatetimeIndexOpsMixin):

77 """

78 Immutable ndarray holding ordinal values indicating regular periods in time.

80 Index keys are boxed to Period objects which carries the metadata (eg,

81 frequency information).

83 Parameters

84 ----------

85 data : array-like (1d int np.ndarray or PeriodArray), optional

86 Optional period-like data to construct index with.

87 copy : bool

88 Make a copy of input ndarray.

89 freq : str or period object, optional

90 One of pandas period strings or corresponding objects.

91 year : int, array, or Series, default None

92 month : int, array, or Series, default None

93 quarter : int, array, or Series, default None

94 day : int, array, or Series, default None

95 hour : int, array, or Series, default None

96 minute : int, array, or Series, default None

97 second : int, array, or Series, default None

98 dtype : str or PeriodDtype, default None

100 Attributes

101 ----------

102 day

103 dayofweek

104 day_of_week

105 dayofyear

106 day_of_year

107 days_in_month

108 daysinmonth

109 end_time

110 freq

111 freqstr

112 hour

113 is_leap_year

114 minute

115 month

116 quarter

117 qyear

118 second

119 start_time

120 week

121 weekday

122 weekofyear

123 year

124

125 Methods

126 -------

127 asfreq

128 strftime

129 to_timestamp

130

131 See Also

132 --------

133 Index : The base pandas Index type.

134 Period : Represents a period of time.

135 DatetimeIndex : Index with datetime64 data.

136 TimedeltaIndex : Index of timedelta64 data.

137 period_range : Create a fixed-frequency PeriodIndex.

138

139 Examples

140 --------

141 >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3])

142 >>> idx

143 PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')

144 """

145

146 _typ = "periodindex"

147

148 _data: PeriodArray

149 freq: BaseOffset

150 dtype: PeriodDtype

151

152 _data_cls = PeriodArray

153 _supports_partial_string_indexing = True

154

155 @property

156 def _engine_type(self) -> type[libindex.PeriodEngine]:

157 return libindex.PeriodEngine

158

159 @cache_readonly

160 def _resolution_obj(self) -> Resolution:

161 # for compat with DatetimeIndex

162 return self.dtype._resolution_obj

163

164 # --------------------------------------------------------------------

165 # methods that dispatch to array and wrap result in Index

166 # These are defined here instead of via inherit_names for mypy

167

168 @doc(

169 PeriodArray.asfreq,

170 other="pandas.arrays.PeriodArray",

171 other_name="PeriodArray",

172 **_shared_doc_kwargs,

173 )

174 def asfreq(self, freq=None, how: str = "E") -> PeriodIndex:

175 arr = self._data.asfreq(freq, how)

176 return type(self)._simple_new(arr, name=self.name)

177

178 @doc(PeriodArray.to_timestamp)

179 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex:

180 arr = self._data.to_timestamp(freq, how)

181 return DatetimeIndex._simple_new(arr, name=self.name)

182

183 @property

184 @doc(PeriodArray.hour.fget)

185 def hour(self) -> Index:

186 return Index(self._data.hour, name=self.name)

187

188 @property

189 @doc(PeriodArray.minute.fget)

190 def minute(self) -> Index:

191 return Index(self._data.minute, name=self.name)

192

193 @property

194 @doc(PeriodArray.second.fget)

195 def second(self) -> Index:

196 return Index(self._data.second, name=self.name)

197

198 # ------------------------------------------------------------------------

199 # Index Constructors

200

201 def __new__(

202 cls,

203 data=None,

204 ordinal=None,

205 freq=None,

206 dtype: Dtype | None = None,

207 copy: bool = False,

208 name: Hashable = None,

209 **fields,

210 ) -> PeriodIndex:

211 valid_field_set = {

212 "year",

213 "month",

214 "day",

215 "quarter",

216 "hour",

217 "minute",

218 "second",

219 }

220

221 refs = None

222 if not copy and isinstance(data, (Index, ABCSeries)):

223 refs = data._references

224

225 if not set(fields).issubset(valid_field_set):

226 argument = list(set(fields) - valid_field_set)[0]

227 raise TypeError(f"__new__() got an unexpected keyword argument {argument}")

228

229 name = maybe_extract_name(name, data, cls)

230

231 if data is None and ordinal is None:

232 # range-based.

233 if not fields:

234 # test_pickle_compat_construction

235 cls._raise_scalar_data_error(None)

236

237 data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields)

238 # PeriodArray._generate range does validation that fields is

239 # empty when really using the range-based constructor.

240 freq = freq2

241

242 data = PeriodArray(data, freq=freq)

243 else:

244 freq = validate_dtype_freq(dtype, freq)

245

246 # PeriodIndex allow PeriodIndex(period_index, freq=different)

247 # Let's not encourage that kind of behavior in PeriodArray.

248

249 if freq and isinstance(data, cls) and data.freq != freq:

250 # TODO: We can do some of these with no-copy / coercion?

251 # e.g. D -> 2D seems to be OK

252 data = data.asfreq(freq)

253

254 if data is None and ordinal is not None:

255 # we strangely ignore `ordinal` if data is passed.

256 ordinal = np.asarray(ordinal, dtype=np.int64)

257 data = PeriodArray(ordinal, freq=freq)

258 else:

259 # don't pass copy here, since we copy later.

260 data = period_array(data=data, freq=freq)

261

262 if copy:

263 data = data.copy()

264

265 return cls._simple_new(data, name=name, refs=refs)

266

267 # ------------------------------------------------------------------------

268 # Data

269

270 @property

271 def values(self) -> np.ndarray:

272 return np.asarray(self, dtype=object)

273

274 def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:

275 """

276 Convert timedelta-like input to an integer multiple of self.freq

277

278 Parameters

279 ----------

280 other : timedelta, np.timedelta64, DateOffset, int, np.ndarray

281

282 Returns

283 -------

284 converted : int, np.ndarray[int64]

285

286 Raises

287 ------

288 IncompatibleFrequency : if the input cannot be written as a multiple

289 of self.freq. Note IncompatibleFrequency subclasses ValueError.

290 """

291 if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):

292 if isinstance(self.freq, Tick):

293 # _check_timedeltalike_freq_compat will raise if incompatible

294 delta = self._data._check_timedeltalike_freq_compat(other)

295 return delta

296 elif isinstance(other, BaseOffset):

297 if other.base == self.freq.base:

298 return other.n

299

300 raise raise_on_incompatible(self, other)

301 elif is_integer(other):

302 # integer is passed to .shift via

303 # _add_datetimelike_methods basically

304 # but ufunc may pass integer to _add_delta

305 return other

306

307 # raise when input doesn't have freq

308 raise raise_on_incompatible(self, None)

309

310 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

311 """

312 Can we compare values of the given dtype to our own?

313 """

314 if not isinstance(dtype, PeriodDtype):

315 return False

316 # For the subset of DateOffsets that can be a dtype.freq, it

317 # suffices (and is much faster) to compare the dtype_code rather than

318 # the freq itself.

319 # See also: PeriodDtype.__eq__

320 freq = dtype.freq

321 own_freq = self.freq

322 return (

323 freq._period_dtype_code

324 # error: "BaseOffset" has no attribute "_period_dtype_code"

325 == own_freq._period_dtype_code # type: ignore[attr-defined]

326 and freq.n == own_freq.n

327 )

328

329 # ------------------------------------------------------------------------

330 # Index Methods

331

332 def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray:

333 """

334 where : array of timestamps

335 mask : np.ndarray[bool]

336 Array of booleans where data is not NA.

337 """

338 if isinstance(where, DatetimeIndex):

339 where = PeriodIndex(where._values, freq=self.freq)

340 elif not isinstance(where, PeriodIndex):

341 raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")

342

343 return super().asof_locs(where, mask)

344

345 @property

346 def is_full(self) -> bool:

347 """

348 Returns True if this PeriodIndex is range-like in that all Periods

349 between start and end are present, in order.

350 """

351 if len(self) == 0:

352 return True

353 if not self.is_monotonic_increasing:

354 raise ValueError("Index is not monotonic")

355 values = self.asi8

356 return bool(((values[1:] - values[:-1]) < 2).all())

357

358 @property

359 def inferred_type(self) -> str:

360 # b/c data is represented as ints make sure we can't have ambiguous

361 # indexing

362 return "period"

363

364 # ------------------------------------------------------------------------

365 # Indexing Methods

366

367 def _convert_tolerance(self, tolerance, target):

368 # Returned tolerance must be in dtype/units so that

369 # `|self._get_engine_target() - target._engine_target()| <= tolerance`

370 # is meaningful. Since PeriodIndex returns int64 for engine_target,

371 # we may need to convert timedelta64 tolerance to int64.

372 tolerance = super()._convert_tolerance(tolerance, target)

373

374 if self.dtype == target.dtype:

375 # convert tolerance to i8

376 tolerance = self._maybe_convert_timedelta(tolerance)

377

378 return tolerance

379

380 def get_loc(self, key):

381 """

382 Get integer location for requested label.

383

384 Parameters

385 ----------

386 key : Period, NaT, str, or datetime

387 String or datetime key must be parsable as Period.

388

389 Returns

390 -------

391 loc : int or ndarray[int64]

392

393 Raises

394 ------

395 KeyError

396 Key is not present in the index.

397 TypeError

398 If key is listlike or otherwise not hashable.

399 """

400 orig_key = key

401

402 self._check_indexing_error(key)

403

404 if is_valid_na_for_dtype(key, self.dtype):

405 key = NaT

406

407 elif isinstance(key, str):

408 try:

409 parsed, reso = self._parse_with_reso(key)

410 except ValueError as err:

411 # A string with invalid format

412 raise KeyError(f"Cannot interpret '{key}' as period") from err

413

414 if self._can_partial_date_slice(reso):

415 try:

416 return self._partial_date_slice(reso, parsed)

417 except KeyError as err:

418 raise KeyError(key) from err

419

420 if reso == self._resolution_obj:

421 # the reso < self._resolution_obj case goes

422 # through _get_string_slice

423 key = self._cast_partial_indexing_scalar(parsed)

424 else:

425 raise KeyError(key)

426

427 elif isinstance(key, Period):

428 self._disallow_mismatched_indexing(key)

429

430 elif isinstance(key, datetime):

431 key = self._cast_partial_indexing_scalar(key)

432

433 else:

434 # in particular integer, which Period constructor would cast to string

435 raise KeyError(key)

436

437 try:

438 return Index.get_loc(self, key)

439 except KeyError as err:

440 raise KeyError(orig_key) from err

441

442 def _disallow_mismatched_indexing(self, key: Period) -> None:

443 sfreq = self.freq

444 kfreq = key.freq

445 if not (

446 sfreq.n == kfreq.n

447 # error: "BaseOffset" has no attribute "_period_dtype_code"

448 and sfreq._period_dtype_code # type: ignore[attr-defined]

449 # error: "BaseOffset" has no attribute "_period_dtype_code"

450 == kfreq._period_dtype_code # type: ignore[attr-defined]

451 ):

452 # GH#42247 For the subset of DateOffsets that can be Period freqs,

453 # checking these two attributes is sufficient to check equality,

454 # and much more performant than `self.freq == key.freq`

455 raise KeyError(key)

456

457 def _cast_partial_indexing_scalar(self, label: datetime) -> Period:

458 try:

459 period = Period(label, freq=self.freq)

460 except ValueError as err:

461 # we cannot construct the Period

462 raise KeyError(label) from err

463 return period

464

465 @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)

466 def _maybe_cast_slice_bound(self, label, side: str):

467 if isinstance(label, datetime):

468 label = self._cast_partial_indexing_scalar(label)

469

470 return super()._maybe_cast_slice_bound(label, side)

471

472 def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):

473 iv = Period(parsed, freq=reso.attr_abbrev)

474 return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))

475

476 @doc(DatetimeIndexOpsMixin.shift)

477 def shift(self, periods: int = 1, freq=None):

478 if freq is not None:

479 raise TypeError(

480 f"`freq` argument is not supported for {type(self).__name__}.shift"

481 )

482 return self + periods

483

484

485def period_range(

486 start=None, end=None, periods: int | None = None, freq=None, name=None

487) -> PeriodIndex:

488 """

489 Return a fixed frequency PeriodIndex.

490

491 The day (calendar) is the default frequency.

492

493 Parameters

494 ----------

495 start : str or period-like, default None

496 Left bound for generating periods.

497 end : str or period-like, default None

498 Right bound for generating periods.

499 periods : int, default None

500 Number of periods to generate.

501 freq : str or DateOffset, optional

502 Frequency alias. By default the freq is taken from `start` or `end`

503 if those are Period objects. Otherwise, the default is ``"D"`` for

504 daily frequency.

505 name : str, default None

506 Name of the resulting PeriodIndex.

507

508 Returns

509 -------

510 PeriodIndex

511

512 Notes

513 -----

514 Of the three parameters: ``start``, ``end``, and ``periods``, exactly two

515 must be specified.

516

517 To learn more about the frequency strings, please see `this link

518 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

519

520 Examples

521 --------

522 >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')

523 PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',

524 '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',

525 '2018-01'],

526 dtype='period[M]')

527

528 If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor

529 endpoints for a ``PeriodIndex`` with frequency matching that of the

530 ``period_range`` constructor.

531

532 >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),

533 ... end=pd.Period('2017Q2', freq='Q'), freq='M')

534 PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],

535 dtype='period[M]')

536 """

537 if com.count_not_none(start, end, periods) != 2:

538 raise ValueError(

539 "Of the three parameters: start, end, and periods, "

540 "exactly two must be specified"

541 )

542 if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):

543 freq = "D"

544

545 data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={})

546 data = PeriodArray(data, freq=freq)

547 return PeriodIndex(data, name=name)