Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/period.py: 35%

1from __future__ import annotations

3from datetime import (

4 datetime,

5 timedelta,

7from typing import TYPE_CHECKING

8import warnings

10import numpy as np

12from pandas._libs import index as libindex

13from pandas._libs.tslibs import (

14 BaseOffset,

15 NaT,

16 Period,

17 Resolution,

18 Tick,

19)

20from pandas._libs.tslibs.dtypes import OFFSET_TO_PERIOD_FREQSTR

21from pandas.util._decorators import (

22 cache_readonly,

23 doc,

24)

25from pandas.util._exceptions import find_stack_level

27from pandas.core.dtypes.common import is_integer

28from pandas.core.dtypes.dtypes import PeriodDtype

29from pandas.core.dtypes.generic import ABCSeries

30from pandas.core.dtypes.missing import is_valid_na_for_dtype

32from pandas.core.arrays.period import (

33 PeriodArray,

34 period_array,

35 raise_on_incompatible,

36 validate_dtype_freq,

37)

38import pandas.core.common as com

39import pandas.core.indexes.base as ibase

40from pandas.core.indexes.base import maybe_extract_name

41from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin

42from pandas.core.indexes.datetimes import (

43 DatetimeIndex,

44 Index,

45)

46from pandas.core.indexes.extension import inherit_names

48if TYPE_CHECKING:

49 from collections.abc import Hashable

51 from pandas._typing import (

52 Dtype,

53 DtypeObj,

54 Self,

55 npt,

56 )

59_index_doc_kwargs = dict(ibase._index_doc_kwargs)

60_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})

61_shared_doc_kwargs = {

62 "klass": "PeriodArray",

63}

65# --- Period index sketch

68def _new_PeriodIndex(cls, **d):

69 # GH13277 for unpickling

70 values = d.pop("data")

71 if values.dtype == "int64":

72 freq = d.pop("freq", None)

73 dtype = PeriodDtype(freq)

74 values = PeriodArray(values, dtype=dtype)

75 return cls._simple_new(values, **d)

76 else:

77 return cls(values, **d)

80@inherit_names(

81 ["strftime", "start_time", "end_time"] + PeriodArray._field_ops,

82 PeriodArray,

83 wrap=True,

84)

85@inherit_names(["is_leap_year"], PeriodArray)

86class PeriodIndex(DatetimeIndexOpsMixin):

87 """

88 Immutable ndarray holding ordinal values indicating regular periods in time.

90 Index keys are boxed to Period objects which carries the metadata (eg,

91 frequency information).

93 Parameters

94 ----------

95 data : array-like (1d int np.ndarray or PeriodArray), optional

96 Optional period-like data to construct index with.

97 copy : bool

98 Make a copy of input ndarray.

99 freq : str or period object, optional

100 One of pandas period strings or corresponding objects.

101 year : int, array, or Series, default None

102

103 .. deprecated:: 2.2.0

104 Use PeriodIndex.from_fields instead.

105 month : int, array, or Series, default None

106

107 .. deprecated:: 2.2.0

108 Use PeriodIndex.from_fields instead.

109 quarter : int, array, or Series, default None

110

111 .. deprecated:: 2.2.0

112 Use PeriodIndex.from_fields instead.

113 day : int, array, or Series, default None

114

115 .. deprecated:: 2.2.0

116 Use PeriodIndex.from_fields instead.

117 hour : int, array, or Series, default None

118

119 .. deprecated:: 2.2.0

120 Use PeriodIndex.from_fields instead.

121 minute : int, array, or Series, default None

122

123 .. deprecated:: 2.2.0

124 Use PeriodIndex.from_fields instead.

125 second : int, array, or Series, default None

126

127 .. deprecated:: 2.2.0

128 Use PeriodIndex.from_fields instead.

129 dtype : str or PeriodDtype, default None

130

131 Attributes

132 ----------

133 day

134 dayofweek

135 day_of_week

136 dayofyear

137 day_of_year

138 days_in_month

139 daysinmonth

140 end_time

141 freq

142 freqstr

143 hour

144 is_leap_year

145 minute

146 month

147 quarter

148 qyear

149 second

150 start_time

151 week

152 weekday

153 weekofyear

154 year

155

156 Methods

157 -------

158 asfreq

159 strftime

160 to_timestamp

161 from_fields

162 from_ordinals

163

164 See Also

165 --------

166 Index : The base pandas Index type.

167 Period : Represents a period of time.

168 DatetimeIndex : Index with datetime64 data.

169 TimedeltaIndex : Index of timedelta64 data.

170 period_range : Create a fixed-frequency PeriodIndex.

171

172 Examples

173 --------

174 >>> idx = pd.PeriodIndex.from_fields(year=[2000, 2002], quarter=[1, 3])

175 >>> idx

176 PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')

177 """

178

179 _typ = "periodindex"

180

181 _data: PeriodArray

182 freq: BaseOffset

183 dtype: PeriodDtype

184

185 _data_cls = PeriodArray

186 _supports_partial_string_indexing = True

187

188 @property

189 def _engine_type(self) -> type[libindex.PeriodEngine]:

190 return libindex.PeriodEngine

191

192 @cache_readonly

193 def _resolution_obj(self) -> Resolution:

194 # for compat with DatetimeIndex

195 return self.dtype._resolution_obj

196

197 # --------------------------------------------------------------------

198 # methods that dispatch to array and wrap result in Index

199 # These are defined here instead of via inherit_names for mypy

200

201 @doc(

202 PeriodArray.asfreq,

203 other="pandas.arrays.PeriodArray",

204 other_name="PeriodArray",

205 **_shared_doc_kwargs,

206 )

207 def asfreq(self, freq=None, how: str = "E") -> Self:

208 arr = self._data.asfreq(freq, how)

209 return type(self)._simple_new(arr, name=self.name)

210

211 @doc(PeriodArray.to_timestamp)

212 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex:

213 arr = self._data.to_timestamp(freq, how)

214 return DatetimeIndex._simple_new(arr, name=self.name)

215

216 @property

217 @doc(PeriodArray.hour.fget)

218 def hour(self) -> Index:

219 return Index(self._data.hour, name=self.name)

220

221 @property

222 @doc(PeriodArray.minute.fget)

223 def minute(self) -> Index:

224 return Index(self._data.minute, name=self.name)

225

226 @property

227 @doc(PeriodArray.second.fget)

228 def second(self) -> Index:

229 return Index(self._data.second, name=self.name)

230

231 # ------------------------------------------------------------------------

232 # Index Constructors

233

234 def __new__(

235 cls,

236 data=None,

237 ordinal=None,

238 freq=None,

239 dtype: Dtype | None = None,

240 copy: bool = False,

241 name: Hashable | None = None,

242 **fields,

243 ) -> Self:

244 valid_field_set = {

245 "year",

246 "month",

247 "day",

248 "quarter",

249 "hour",

250 "minute",

251 "second",

252 }

253

254 refs = None

255 if not copy and isinstance(data, (Index, ABCSeries)):

256 refs = data._references

257

258 if not set(fields).issubset(valid_field_set):

259 argument = next(iter(set(fields) - valid_field_set))

260 raise TypeError(f"__new__() got an unexpected keyword argument {argument}")

261 elif len(fields):

262 # GH#55960

263 warnings.warn(

264 "Constructing PeriodIndex from fields is deprecated. Use "

265 "PeriodIndex.from_fields instead.",

266 FutureWarning,

267 stacklevel=find_stack_level(),

268 )

269

270 if ordinal is not None:

271 # GH#55960

272 warnings.warn(

273 "The 'ordinal' keyword in PeriodIndex is deprecated and will "

274 "be removed in a future version. Use PeriodIndex.from_ordinals "

275 "instead.",

276 FutureWarning,

277 stacklevel=find_stack_level(),

278 )

279

280 name = maybe_extract_name(name, data, cls)

281

282 if data is None and ordinal is None:

283 # range-based.

284 if not fields:

285 # test_pickle_compat_construction

286 cls._raise_scalar_data_error(None)

287 data = cls.from_fields(**fields, freq=freq)._data

288 copy = False

289

290 elif fields:

291 if data is not None:

292 raise ValueError("Cannot pass both data and fields")

293 raise ValueError("Cannot pass both ordinal and fields")

294

295 else:

296 freq = validate_dtype_freq(dtype, freq)

297

298 # PeriodIndex allow PeriodIndex(period_index, freq=different)

299 # Let's not encourage that kind of behavior in PeriodArray.

300

301 if freq and isinstance(data, cls) and data.freq != freq:

302 # TODO: We can do some of these with no-copy / coercion?

303 # e.g. D -> 2D seems to be OK

304 data = data.asfreq(freq)

305

306 if data is None and ordinal is not None:

307 ordinal = np.asarray(ordinal, dtype=np.int64)

308 dtype = PeriodDtype(freq)

309 data = PeriodArray(ordinal, dtype=dtype)

310 elif data is not None and ordinal is not None:

311 raise ValueError("Cannot pass both data and ordinal")

312 else:

313 # don't pass copy here, since we copy later.

314 data = period_array(data=data, freq=freq)

315

316 if copy:

317 data = data.copy()

318

319 return cls._simple_new(data, name=name, refs=refs)

320

321 @classmethod

322 def from_fields(

323 cls,

324 *,

325 year=None,

326 quarter=None,

327 month=None,

328 day=None,

329 hour=None,

330 minute=None,

331 second=None,

332 freq=None,

333 ) -> Self:

334 fields = {

335 "year": year,

336 "quarter": quarter,

337 "month": month,

338 "day": day,

339 "hour": hour,

340 "minute": minute,

341 "second": second,

342 }

343 fields = {key: value for key, value in fields.items() if value is not None}

344 arr = PeriodArray._from_fields(fields=fields, freq=freq)

345 return cls._simple_new(arr)

346

347 @classmethod

348 def from_ordinals(cls, ordinals, *, freq, name=None) -> Self:

349 ordinals = np.asarray(ordinals, dtype=np.int64)

350 dtype = PeriodDtype(freq)

351 data = PeriodArray._simple_new(ordinals, dtype=dtype)

352 return cls._simple_new(data, name=name)

353

354 # ------------------------------------------------------------------------

355 # Data

356

357 @property

358 def values(self) -> npt.NDArray[np.object_]:

359 return np.asarray(self, dtype=object)

360

361 def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:

362 """

363 Convert timedelta-like input to an integer multiple of self.freq

364

365 Parameters

366 ----------

367 other : timedelta, np.timedelta64, DateOffset, int, np.ndarray

368

369 Returns

370 -------

371 converted : int, np.ndarray[int64]

372

373 Raises

374 ------

375 IncompatibleFrequency : if the input cannot be written as a multiple

376 of self.freq. Note IncompatibleFrequency subclasses ValueError.

377 """

378 if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):

379 if isinstance(self.freq, Tick):

380 # _check_timedeltalike_freq_compat will raise if incompatible

381 delta = self._data._check_timedeltalike_freq_compat(other)

382 return delta

383 elif isinstance(other, BaseOffset):

384 if other.base == self.freq.base:

385 return other.n

386

387 raise raise_on_incompatible(self, other)

388 elif is_integer(other):

389 assert isinstance(other, int)

390 return other

391

392 # raise when input doesn't have freq

393 raise raise_on_incompatible(self, None)

394

395 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

396 """

397 Can we compare values of the given dtype to our own?

398 """

399 return self.dtype == dtype

400

401 # ------------------------------------------------------------------------

402 # Index Methods

403

404 def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray:

405 """

406 where : array of timestamps

407 mask : np.ndarray[bool]

408 Array of booleans where data is not NA.

409 """

410 if isinstance(where, DatetimeIndex):

411 where = PeriodIndex(where._values, freq=self.freq)

412 elif not isinstance(where, PeriodIndex):

413 raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")

414

415 return super().asof_locs(where, mask)

416

417 @property

418 def is_full(self) -> bool:

419 """

420 Returns True if this PeriodIndex is range-like in that all Periods

421 between start and end are present, in order.

422 """

423 if len(self) == 0:

424 return True

425 if not self.is_monotonic_increasing:

426 raise ValueError("Index is not monotonic")

427 values = self.asi8

428 return bool(((values[1:] - values[:-1]) < 2).all())

429

430 @property

431 def inferred_type(self) -> str:

432 # b/c data is represented as ints make sure we can't have ambiguous

433 # indexing

434 return "period"

435

436 # ------------------------------------------------------------------------

437 # Indexing Methods

438

439 def _convert_tolerance(self, tolerance, target):

440 # Returned tolerance must be in dtype/units so that

441 # `|self._get_engine_target() - target._engine_target()| <= tolerance`

442 # is meaningful. Since PeriodIndex returns int64 for engine_target,

443 # we may need to convert timedelta64 tolerance to int64.

444 tolerance = super()._convert_tolerance(tolerance, target)

445

446 if self.dtype == target.dtype:

447 # convert tolerance to i8

448 tolerance = self._maybe_convert_timedelta(tolerance)

449

450 return tolerance

451

452 def get_loc(self, key):

453 """

454 Get integer location for requested label.

455

456 Parameters

457 ----------

458 key : Period, NaT, str, or datetime

459 String or datetime key must be parsable as Period.

460

461 Returns

462 -------

463 loc : int or ndarray[int64]

464

465 Raises

466 ------

467 KeyError

468 Key is not present in the index.

469 TypeError

470 If key is listlike or otherwise not hashable.

471 """

472 orig_key = key

473

474 self._check_indexing_error(key)

475

476 if is_valid_na_for_dtype(key, self.dtype):

477 key = NaT

478

479 elif isinstance(key, str):

480 try:

481 parsed, reso = self._parse_with_reso(key)

482 except ValueError as err:

483 # A string with invalid format

484 raise KeyError(f"Cannot interpret '{key}' as period") from err

485

486 if self._can_partial_date_slice(reso):

487 try:

488 return self._partial_date_slice(reso, parsed)

489 except KeyError as err:

490 raise KeyError(key) from err

491

492 if reso == self._resolution_obj:

493 # the reso < self._resolution_obj case goes

494 # through _get_string_slice

495 key = self._cast_partial_indexing_scalar(parsed)

496 else:

497 raise KeyError(key)

498

499 elif isinstance(key, Period):

500 self._disallow_mismatched_indexing(key)

501

502 elif isinstance(key, datetime):

503 key = self._cast_partial_indexing_scalar(key)

504

505 else:

506 # in particular integer, which Period constructor would cast to string

507 raise KeyError(key)

508

509 try:

510 return Index.get_loc(self, key)

511 except KeyError as err:

512 raise KeyError(orig_key) from err

513

514 def _disallow_mismatched_indexing(self, key: Period) -> None:

515 if key._dtype != self.dtype:

516 raise KeyError(key)

517

518 def _cast_partial_indexing_scalar(self, label: datetime) -> Period:

519 try:

520 period = Period(label, freq=self.freq)

521 except ValueError as err:

522 # we cannot construct the Period

523 raise KeyError(label) from err

524 return period

525

526 @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)

527 def _maybe_cast_slice_bound(self, label, side: str):

528 if isinstance(label, datetime):

529 label = self._cast_partial_indexing_scalar(label)

530

531 return super()._maybe_cast_slice_bound(label, side)

532

533 def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):

534 freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev)

535 iv = Period(parsed, freq=freq)

536 return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))

537

538 @doc(DatetimeIndexOpsMixin.shift)

539 def shift(self, periods: int = 1, freq=None) -> Self:

540 if freq is not None:

541 raise TypeError(

542 f"`freq` argument is not supported for {type(self).__name__}.shift"

543 )

544 return self + periods

545

546

547def period_range(

548 start=None,

549 end=None,

550 periods: int | None = None,

551 freq=None,

552 name: Hashable | None = None,

553) -> PeriodIndex:

554 """

555 Return a fixed frequency PeriodIndex.

556

557 The day (calendar) is the default frequency.

558

559 Parameters

560 ----------

561 start : str, datetime, date, pandas.Timestamp, or period-like, default None

562 Left bound for generating periods.

563 end : str, datetime, date, pandas.Timestamp, or period-like, default None

564 Right bound for generating periods.

565 periods : int, default None

566 Number of periods to generate.

567 freq : str or DateOffset, optional

568 Frequency alias. By default the freq is taken from `start` or `end`

569 if those are Period objects. Otherwise, the default is ``"D"`` for

570 daily frequency.

571 name : str, default None

572 Name of the resulting PeriodIndex.

573

574 Returns

575 -------

576 PeriodIndex

577

578 Notes

579 -----

580 Of the three parameters: ``start``, ``end``, and ``periods``, exactly two

581 must be specified.

582

583 To learn more about the frequency strings, please see `this link

584 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

585

586 Examples

587 --------

588 >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')

589 PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',

590 '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',

591 '2018-01'],

592 dtype='period[M]')

593

594 If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor

595 endpoints for a ``PeriodIndex`` with frequency matching that of the

596 ``period_range`` constructor.

597

598 >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),

599 ... end=pd.Period('2017Q2', freq='Q'), freq='M')

600 PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],

601 dtype='period[M]')

602 """

603 if com.count_not_none(start, end, periods) != 2:

604 raise ValueError(

605 "Of the three parameters: start, end, and periods, "

606 "exactly two must be specified"

607 )

608 if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):

609 freq = "D"

610

611 data, freq = PeriodArray._generate_range(start, end, periods, freq)

612 dtype = PeriodDtype(freq)

613 data = PeriodArray(data, dtype=dtype)

614 return PeriodIndex(data, name=name)