Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/resample.py: 25%

1from __future__ import annotations

3import copy

4from textwrap import dedent

5from typing import (

6 TYPE_CHECKING,

7 Callable,

8 Literal,

9 cast,

10 final,

11 no_type_check,

12)

13import warnings

15import numpy as np

17from pandas._libs import lib

18from pandas._libs.tslibs import (

19 BaseOffset,

20 IncompatibleFrequency,

21 NaT,

22 Period,

23 Timedelta,

24 Timestamp,

25 to_offset,

26)

27from pandas._libs.tslibs.dtypes import freq_to_period_freqstr

28from pandas._typing import NDFrameT

29from pandas.compat.numpy import function as nv

30from pandas.errors import AbstractMethodError

31from pandas.util._decorators import (

32 Appender,

33 Substitution,

34 doc,

35)

36from pandas.util._exceptions import (

37 find_stack_level,

38 rewrite_warning,

39)

41from pandas.core.dtypes.dtypes import ArrowDtype

42from pandas.core.dtypes.generic import (

43 ABCDataFrame,

44 ABCSeries,

45)

47import pandas.core.algorithms as algos

48from pandas.core.apply import (

49 ResamplerWindowApply,

50 warn_alias_replacement,

51)

52from pandas.core.arrays import ArrowExtensionArray

53from pandas.core.base import (

54 PandasObject,

55 SelectionMixin,

56)

57import pandas.core.common as com

58from pandas.core.generic import (

59 NDFrame,

60 _shared_docs,

61)

62from pandas.core.groupby.generic import SeriesGroupBy

63from pandas.core.groupby.groupby import (

64 BaseGroupBy,

65 GroupBy,

66 _apply_groupings_depr,

67 _pipe_template,

68 get_groupby,

69)

70from pandas.core.groupby.grouper import Grouper

71from pandas.core.groupby.ops import BinGrouper

72from pandas.core.indexes.api import MultiIndex

73from pandas.core.indexes.base import Index

74from pandas.core.indexes.datetimes import (

75 DatetimeIndex,

76 date_range,

77)

78from pandas.core.indexes.period import (

79 PeriodIndex,

80 period_range,

81)

82from pandas.core.indexes.timedeltas import (

83 TimedeltaIndex,

84 timedelta_range,

85)

87from pandas.tseries.frequencies import (

88 is_subperiod,

89 is_superperiod,

90)

91from pandas.tseries.offsets import (

92 Day,

93 Tick,

94)

96if TYPE_CHECKING:

97 from collections.abc import Hashable

99 from pandas._typing import (

100 AnyArrayLike,

101 Axis,

102 AxisInt,

103 Frequency,

104 IndexLabel,

105 InterpolateOptions,

106 T,

107 TimedeltaConvertibleTypes,

108 TimeGrouperOrigin,

109 TimestampConvertibleTypes,

110 npt,

111 )

112

113 from pandas import (

114 DataFrame,

115 Series,

116 )

117

118_shared_docs_kwargs: dict[str, str] = {}

119

120

121class Resampler(BaseGroupBy, PandasObject):

122 """

123 Class for resampling datetimelike data, a groupby-like operation.

124 See aggregate, transform, and apply functions on this object.

125

126 It's easiest to use obj.resample(...) to use Resampler.

127

128 Parameters

129 ----------

130 obj : Series or DataFrame

131 groupby : TimeGrouper

132 axis : int, default 0

133 kind : str or None

134 'period', 'timestamp' to override default index treatment

135

136 Returns

137 -------

138 a Resampler of the appropriate type

139

140 Notes

141 -----

142 After resampling, see aggregate, apply, and transform functions.

143 """

144

145 _grouper: BinGrouper

146 _timegrouper: TimeGrouper

147 binner: DatetimeIndex | TimedeltaIndex | PeriodIndex # depends on subclass

148 exclusions: frozenset[Hashable] = frozenset() # for SelectionMixin compat

149 _internal_names_set = set({"obj", "ax", "_indexer"})

150

151 # to the groupby descriptor

152 _attributes = [

153 "freq",

154 "axis",

155 "closed",

156 "label",

157 "convention",

158 "kind",

159 "origin",

160 "offset",

161 ]

162

163 def __init__(

164 self,

165 obj: NDFrame,

166 timegrouper: TimeGrouper,

167 axis: Axis = 0,

168 kind=None,

169 *,

170 gpr_index: Index,

171 group_keys: bool = False,

172 selection=None,

173 include_groups: bool = True,

174 ) -> None:

175 self._timegrouper = timegrouper

176 self.keys = None

177 self.sort = True

178 self.axis = obj._get_axis_number(axis)

179 self.kind = kind

180 self.group_keys = group_keys

181 self.as_index = True

182 self.include_groups = include_groups

183

184 self.obj, self.ax, self._indexer = self._timegrouper._set_grouper(

185 self._convert_obj(obj), sort=True, gpr_index=gpr_index

186 )

187 self.binner, self._grouper = self._get_binner()

188 self._selection = selection

189 if self._timegrouper.key is not None:

190 self.exclusions = frozenset([self._timegrouper.key])

191 else:

192 self.exclusions = frozenset()

193

194 @final

195 def __str__(self) -> str:

196 """

197 Provide a nice str repr of our rolling object.

198 """

199 attrs = (

200 f"{k}={getattr(self._timegrouper, k)}"

201 for k in self._attributes

202 if getattr(self._timegrouper, k, None) is not None

203 )

204 return f"{type(self).__name__} [{', '.join(attrs)}]"

205

206 @final

207 def __getattr__(self, attr: str):

208 if attr in self._internal_names_set:

209 return object.__getattribute__(self, attr)

210 if attr in self._attributes:

211 return getattr(self._timegrouper, attr)

212 if attr in self.obj:

213 return self[attr]

214

215 return object.__getattribute__(self, attr)

216

217 @final

218 @property

219 def _from_selection(self) -> bool:

220 """

221 Is the resampling from a DataFrame column or MultiIndex level.

222 """

223 # upsampling and PeriodIndex resampling do not work

224 # with selection, this state used to catch and raise an error

225 return self._timegrouper is not None and (

226 self._timegrouper.key is not None or self._timegrouper.level is not None

227 )

228

229 def _convert_obj(self, obj: NDFrameT) -> NDFrameT:

230 """

231 Provide any conversions for the object in order to correctly handle.

232

233 Parameters

234 ----------

235 obj : Series or DataFrame

236

237 Returns

238 -------

239 Series or DataFrame

240 """

241 return obj._consolidate()

242

243 def _get_binner_for_time(self):

244 raise AbstractMethodError(self)

245

246 @final

247 def _get_binner(self):

248 """

249 Create the BinGrouper, assume that self.set_grouper(obj)

250 has already been called.

251 """

252 binner, bins, binlabels = self._get_binner_for_time()

253 assert len(bins) == len(binlabels)

254 bin_grouper = BinGrouper(bins, binlabels, indexer=self._indexer)

255 return binner, bin_grouper

256

257 @final

258 @Substitution(

259 klass="Resampler",

260 examples="""

261 >>> df = pd.DataFrame({'A': [1, 2, 3, 4]},

262 ... index=pd.date_range('2012-08-02', periods=4))

263 >>> df

264 A

265 2012-08-02 1

266 2012-08-03 2

267 2012-08-04 3

268 2012-08-05 4

269

270 To get the difference between each 2-day period's maximum and minimum

271 value in one pass, you can do

272

273 >>> df.resample('2D').pipe(lambda x: x.max() - x.min())

274 A

275 2012-08-02 1

276 2012-08-04 1""",

277 )

278 @Appender(_pipe_template)

279 def pipe(

280 self,

281 func: Callable[..., T] | tuple[Callable[..., T], str],

282 *args,

283 **kwargs,

284 ) -> T:

285 return super().pipe(func, *args, **kwargs)

286

287 _agg_see_also_doc = dedent(

288 """

289 See Also

290 --------

291 DataFrame.groupby.aggregate : Aggregate using callable, string, dict,

292 or list of string/callables.

293 DataFrame.resample.transform : Transforms the Series on each group

294 based on the given function.

295 DataFrame.aggregate: Aggregate using one or more

296 operations over the specified axis.

297 """

298 )

299

300 _agg_examples_doc = dedent(

301 """

302 Examples

303 --------

304 >>> s = pd.Series([1, 2, 3, 4, 5],

305 ... index=pd.date_range('20130101', periods=5, freq='s'))

306 >>> s

307 2013-01-01 00:00:00 1

308 2013-01-01 00:00:01 2

309 2013-01-01 00:00:02 3

310 2013-01-01 00:00:03 4

311 2013-01-01 00:00:04 5

312 Freq: s, dtype: int64

313

314 >>> r = s.resample('2s')

315

316 >>> r.agg("sum")

317 2013-01-01 00:00:00 3

318 2013-01-01 00:00:02 7

319 2013-01-01 00:00:04 5

320 Freq: 2s, dtype: int64

321

322 >>> r.agg(['sum', 'mean', 'max'])

323 sum mean max

324 2013-01-01 00:00:00 3 1.5 2

325 2013-01-01 00:00:02 7 3.5 4

326 2013-01-01 00:00:04 5 5.0 5

327

328 >>> r.agg({'result': lambda x: x.mean() / x.std(),

329 ... 'total': "sum"})

330 result total

331 2013-01-01 00:00:00 2.121320 3

332 2013-01-01 00:00:02 4.949747 7

333 2013-01-01 00:00:04 NaN 5

334

335 >>> r.agg(average="mean", total="sum")

336 average total

337 2013-01-01 00:00:00 1.5 3

338 2013-01-01 00:00:02 3.5 7

339 2013-01-01 00:00:04 5.0 5

340 """

341 )

342

343 @final

344 @doc(

345 _shared_docs["aggregate"],

346 see_also=_agg_see_also_doc,

347 examples=_agg_examples_doc,

348 klass="DataFrame",

349 axis="",

350 )

351 def aggregate(self, func=None, *args, **kwargs):

352 result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()

353 if result is None:

354 how = func

355 result = self._groupby_and_aggregate(how, *args, **kwargs)

356

357 return result

358

359 agg = aggregate

360 apply = aggregate

361

362 @final

363 def transform(self, arg, *args, **kwargs):

364 """

365 Call function producing a like-indexed Series on each group.

366

367 Return a Series with the transformed values.

368

369 Parameters

370 ----------

371 arg : function

372 To apply to each group. Should return a Series with the same index.

373

374 Returns

375 -------

376 Series

377

378 Examples

379 --------

380 >>> s = pd.Series([1, 2],

381 ... index=pd.date_range('20180101',

382 ... periods=2,

383 ... freq='1h'))

384 >>> s

385 2018-01-01 00:00:00 1

386 2018-01-01 01:00:00 2

387 Freq: h, dtype: int64

388

389 >>> resampled = s.resample('15min')

390 >>> resampled.transform(lambda x: (x - x.mean()) / x.std())

391 2018-01-01 00:00:00 NaN

392 2018-01-01 01:00:00 NaN

393 Freq: h, dtype: float64

394 """

395 return self._selected_obj.groupby(self._timegrouper).transform(

396 arg, *args, **kwargs

397 )

398

399 def _downsample(self, f, **kwargs):

400 raise AbstractMethodError(self)

401

402 def _upsample(self, f, limit: int | None = None, fill_value=None):

403 raise AbstractMethodError(self)

404

405 def _gotitem(self, key, ndim: int, subset=None):

406 """

407 Sub-classes to define. Return a sliced object.

408

409 Parameters

410 ----------

411 key : string / list of selections

412 ndim : {1, 2}

413 requested ndim of result

414 subset : object, default None

415 subset to act on

416 """

417 grouper = self._grouper

418 if subset is None:

419 subset = self.obj

420 if key is not None:

421 subset = subset[key]

422 else:

423 # reached via Apply.agg_dict_like with selection=None and ndim=1

424 assert subset.ndim == 1

425 if ndim == 1:

426 assert subset.ndim == 1

427

428 grouped = get_groupby(

429 subset, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys

430 )

431 return grouped

432

433 def _groupby_and_aggregate(self, how, *args, **kwargs):

434 """

435 Re-evaluate the obj with a groupby aggregation.

436 """

437 grouper = self._grouper

438

439 # Excludes `on` column when provided

440 obj = self._obj_with_exclusions

441

442 grouped = get_groupby(

443 obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys

444 )

445

446 try:

447 if callable(how):

448 # TODO: test_resample_apply_with_additional_args fails if we go

449 # through the non-lambda path, not clear that it should.

450 func = lambda x: how(x, *args, **kwargs)

451 result = grouped.aggregate(func)

452 else:

453 result = grouped.aggregate(how, *args, **kwargs)

454 except (AttributeError, KeyError):

455 # we have a non-reducing function; try to evaluate

456 # alternatively we want to evaluate only a column of the input

457

458 # test_apply_to_one_column_of_df the function being applied references

459 # a DataFrame column, but aggregate_item_by_item operates column-wise

460 # on Series, raising AttributeError or KeyError

461 # (depending on whether the column lookup uses getattr/__getitem__)

462 result = _apply(

463 grouped, how, *args, include_groups=self.include_groups, **kwargs

464 )

465

466 except ValueError as err:

467 if "Must produce aggregated value" in str(err):

468 # raised in _aggregate_named

469 # see test_apply_without_aggregation, test_apply_with_mutated_index

470 pass

471 else:

472 raise

473

474 # we have a non-reducing function

475 # try to evaluate

476 result = _apply(

477 grouped, how, *args, include_groups=self.include_groups, **kwargs

478 )

479

480 return self._wrap_result(result)

481

482 @final

483 def _get_resampler_for_grouping(

484 self, groupby: GroupBy, key, include_groups: bool = True

485 ):

486 """

487 Return the correct class for resampling with groupby.

488 """

489 return self._resampler_for_grouping(

490 groupby=groupby, key=key, parent=self, include_groups=include_groups

491 )

492

493 def _wrap_result(self, result):

494 """

495 Potentially wrap any results.

496 """

497 # GH 47705

498 obj = self.obj

499 if (

500 isinstance(result, ABCDataFrame)

501 and len(result) == 0

502 and not isinstance(result.index, PeriodIndex)

503 ):

504 result = result.set_index(

505 _asfreq_compat(obj.index[:0], freq=self.freq), append=True

506 )

507

508 if isinstance(result, ABCSeries) and self._selection is not None:

509 result.name = self._selection

510

511 if isinstance(result, ABCSeries) and result.empty:

512 # When index is all NaT, result is empty but index is not

513 result.index = _asfreq_compat(obj.index[:0], freq=self.freq)

514 result.name = getattr(obj, "name", None)

515

516 if self._timegrouper._arrow_dtype is not None:

517 result.index = result.index.astype(self._timegrouper._arrow_dtype)

518

519 return result

520

521 @final

522 def ffill(self, limit: int | None = None):

523 """

524 Forward fill the values.

525

526 Parameters

527 ----------

528 limit : int, optional

529 Limit of how many values to fill.

530

531 Returns

532 -------

533 An upsampled Series.

534

535 See Also

536 --------

537 Series.fillna: Fill NA/NaN values using the specified method.

538 DataFrame.fillna: Fill NA/NaN values using the specified method.

539

540 Examples

541 --------

542 Here we only create a ``Series``.

543

544 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(

545 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))

546 >>> ser

547 2023-01-01 1

548 2023-01-15 2

549 2023-02-01 3

550 2023-02-15 4

551 dtype: int64

552

553 Example for ``ffill`` with downsampling (we have fewer dates after resampling):

554

555 >>> ser.resample('MS').ffill()

556 2023-01-01 1

557 2023-02-01 3

558 Freq: MS, dtype: int64

559

560 Example for ``ffill`` with upsampling (fill the new dates with

561 the previous value):

562

563 >>> ser.resample('W').ffill()

564 2023-01-01 1

565 2023-01-08 1

566 2023-01-15 2

567 2023-01-22 2

568 2023-01-29 2

569 2023-02-05 3

570 2023-02-12 3

571 2023-02-19 4

572 Freq: W-SUN, dtype: int64

573

574 With upsampling and limiting (only fill the first new date with the

575 previous value):

576

577 >>> ser.resample('W').ffill(limit=1)

578 2023-01-01 1.0

579 2023-01-08 1.0

580 2023-01-15 2.0

581 2023-01-22 2.0

582 2023-01-29 NaN

583 2023-02-05 3.0

584 2023-02-12 NaN

585 2023-02-19 4.0

586 Freq: W-SUN, dtype: float64

587 """

588 return self._upsample("ffill", limit=limit)

589

590 @final

591 def nearest(self, limit: int | None = None):

592 """

593 Resample by using the nearest value.

594

595 When resampling data, missing values may appear (e.g., when the

596 resampling frequency is higher than the original frequency).

597 The `nearest` method will replace ``NaN`` values that appeared in

598 the resampled data with the value from the nearest member of the

599 sequence, based on the index value.

600 Missing values that existed in the original data will not be modified.

601 If `limit` is given, fill only this many values in each direction for

602 each of the original values.

603

604 Parameters

605 ----------

606 limit : int, optional

607 Limit of how many values to fill.

608

609 Returns

610 -------

611 Series or DataFrame

612 An upsampled Series or DataFrame with ``NaN`` values filled with

613 their nearest value.

614

615 See Also

616 --------

617 backfill : Backward fill the new missing values in the resampled data.

618 pad : Forward fill ``NaN`` values.

619

620 Examples

621 --------

622 >>> s = pd.Series([1, 2],

623 ... index=pd.date_range('20180101',

624 ... periods=2,

625 ... freq='1h'))

626 >>> s

627 2018-01-01 00:00:00 1

628 2018-01-01 01:00:00 2

629 Freq: h, dtype: int64

630

631 >>> s.resample('15min').nearest()

632 2018-01-01 00:00:00 1

633 2018-01-01 00:15:00 1

634 2018-01-01 00:30:00 2

635 2018-01-01 00:45:00 2

636 2018-01-01 01:00:00 2

637 Freq: 15min, dtype: int64

638

639 Limit the number of upsampled values imputed by the nearest:

640

641 >>> s.resample('15min').nearest(limit=1)

642 2018-01-01 00:00:00 1.0

643 2018-01-01 00:15:00 1.0

644 2018-01-01 00:30:00 NaN

645 2018-01-01 00:45:00 2.0

646 2018-01-01 01:00:00 2.0

647 Freq: 15min, dtype: float64

648 """

649 return self._upsample("nearest", limit=limit)

650

651 @final

652 def bfill(self, limit: int | None = None):

653 """

654 Backward fill the new missing values in the resampled data.

655

656 In statistics, imputation is the process of replacing missing data with

657 substituted values [1]_. When resampling data, missing values may

658 appear (e.g., when the resampling frequency is higher than the original

659 frequency). The backward fill will replace NaN values that appeared in

660 the resampled data with the next value in the original sequence.

661 Missing values that existed in the original data will not be modified.

662

663 Parameters

664 ----------

665 limit : int, optional

666 Limit of how many values to fill.

667

668 Returns

669 -------

670 Series, DataFrame

671 An upsampled Series or DataFrame with backward filled NaN values.

672

673 See Also

674 --------

675 bfill : Alias of backfill.

676 fillna : Fill NaN values using the specified method, which can be

677 'backfill'.

678 nearest : Fill NaN values with nearest neighbor starting from center.

679 ffill : Forward fill NaN values.

680 Series.fillna : Fill NaN values in the Series using the

681 specified method, which can be 'backfill'.

682 DataFrame.fillna : Fill NaN values in the DataFrame using the

683 specified method, which can be 'backfill'.

684

685 References

686 ----------

687 .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics)

688

689 Examples

690 --------

691 Resampling a Series:

692

693 >>> s = pd.Series([1, 2, 3],

694 ... index=pd.date_range('20180101', periods=3, freq='h'))

695 >>> s

696 2018-01-01 00:00:00 1

697 2018-01-01 01:00:00 2

698 2018-01-01 02:00:00 3

699 Freq: h, dtype: int64

700

701 >>> s.resample('30min').bfill()

702 2018-01-01 00:00:00 1

703 2018-01-01 00:30:00 2

704 2018-01-01 01:00:00 2

705 2018-01-01 01:30:00 3

706 2018-01-01 02:00:00 3

707 Freq: 30min, dtype: int64

708

709 >>> s.resample('15min').bfill(limit=2)

710 2018-01-01 00:00:00 1.0

711 2018-01-01 00:15:00 NaN

712 2018-01-01 00:30:00 2.0

713 2018-01-01 00:45:00 2.0

714 2018-01-01 01:00:00 2.0

715 2018-01-01 01:15:00 NaN

716 2018-01-01 01:30:00 3.0

717 2018-01-01 01:45:00 3.0

718 2018-01-01 02:00:00 3.0

719 Freq: 15min, dtype: float64

720

721 Resampling a DataFrame that has missing values:

722

723 >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]},

724 ... index=pd.date_range('20180101', periods=3,

725 ... freq='h'))

726 >>> df

727 a b

728 2018-01-01 00:00:00 2.0 1

729 2018-01-01 01:00:00 NaN 3

730 2018-01-01 02:00:00 6.0 5

731

732 >>> df.resample('30min').bfill()

733 a b

734 2018-01-01 00:00:00 2.0 1

735 2018-01-01 00:30:00 NaN 3

736 2018-01-01 01:00:00 NaN 3

737 2018-01-01 01:30:00 6.0 5

738 2018-01-01 02:00:00 6.0 5

739

740 >>> df.resample('15min').bfill(limit=2)

741 a b

742 2018-01-01 00:00:00 2.0 1.0

743 2018-01-01 00:15:00 NaN NaN

744 2018-01-01 00:30:00 NaN 3.0

745 2018-01-01 00:45:00 NaN 3.0

746 2018-01-01 01:00:00 NaN 3.0

747 2018-01-01 01:15:00 NaN NaN

748 2018-01-01 01:30:00 6.0 5.0

749 2018-01-01 01:45:00 6.0 5.0

750 2018-01-01 02:00:00 6.0 5.0

751 """

752 return self._upsample("bfill", limit=limit)

753

754 @final

755 def fillna(self, method, limit: int | None = None):

756 """

757 Fill missing values introduced by upsampling.

758

759 In statistics, imputation is the process of replacing missing data with

760 substituted values [1]_. When resampling data, missing values may

761 appear (e.g., when the resampling frequency is higher than the original

762 frequency).

763

764 Missing values that existed in the original data will

765 not be modified.

766

767 Parameters

768 ----------

769 method : {'pad', 'backfill', 'ffill', 'bfill', 'nearest'}

770 Method to use for filling holes in resampled data

771

772 * 'pad' or 'ffill': use previous valid observation to fill gap

773 (forward fill).

774 * 'backfill' or 'bfill': use next valid observation to fill gap.

775 * 'nearest': use nearest valid observation to fill gap.

776

777 limit : int, optional

778 Limit of how many consecutive missing values to fill.

779

780 Returns

781 -------

782 Series or DataFrame

783 An upsampled Series or DataFrame with missing values filled.

784

785 See Also

786 --------

787 bfill : Backward fill NaN values in the resampled data.

788 ffill : Forward fill NaN values in the resampled data.

789 nearest : Fill NaN values in the resampled data

790 with nearest neighbor starting from center.

791 interpolate : Fill NaN values using interpolation.

792 Series.fillna : Fill NaN values in the Series using the

793 specified method, which can be 'bfill' and 'ffill'.

794 DataFrame.fillna : Fill NaN values in the DataFrame using the

795 specified method, which can be 'bfill' and 'ffill'.

796

797 References

798 ----------

799 .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics)

800

801 Examples

802 --------

803 Resampling a Series:

804

805 >>> s = pd.Series([1, 2, 3],

806 ... index=pd.date_range('20180101', periods=3, freq='h'))

807 >>> s

808 2018-01-01 00:00:00 1

809 2018-01-01 01:00:00 2

810 2018-01-01 02:00:00 3

811 Freq: h, dtype: int64

812

813 Without filling the missing values you get:

814

815 >>> s.resample("30min").asfreq()

816 2018-01-01 00:00:00 1.0

817 2018-01-01 00:30:00 NaN

818 2018-01-01 01:00:00 2.0

819 2018-01-01 01:30:00 NaN

820 2018-01-01 02:00:00 3.0

821 Freq: 30min, dtype: float64

822

823 >>> s.resample('30min').fillna("backfill")

824 2018-01-01 00:00:00 1

825 2018-01-01 00:30:00 2

826 2018-01-01 01:00:00 2

827 2018-01-01 01:30:00 3

828 2018-01-01 02:00:00 3

829 Freq: 30min, dtype: int64

830

831 >>> s.resample('15min').fillna("backfill", limit=2)

832 2018-01-01 00:00:00 1.0

833 2018-01-01 00:15:00 NaN

834 2018-01-01 00:30:00 2.0

835 2018-01-01 00:45:00 2.0

836 2018-01-01 01:00:00 2.0

837 2018-01-01 01:15:00 NaN

838 2018-01-01 01:30:00 3.0

839 2018-01-01 01:45:00 3.0

840 2018-01-01 02:00:00 3.0

841 Freq: 15min, dtype: float64

842

843 >>> s.resample('30min').fillna("pad")

844 2018-01-01 00:00:00 1

845 2018-01-01 00:30:00 1

846 2018-01-01 01:00:00 2

847 2018-01-01 01:30:00 2

848 2018-01-01 02:00:00 3

849 Freq: 30min, dtype: int64

850

851 >>> s.resample('30min').fillna("nearest")

852 2018-01-01 00:00:00 1

853 2018-01-01 00:30:00 2

854 2018-01-01 01:00:00 2

855 2018-01-01 01:30:00 3

856 2018-01-01 02:00:00 3

857 Freq: 30min, dtype: int64

858

859 Missing values present before the upsampling are not affected.

860

861 >>> sm = pd.Series([1, None, 3],

862 ... index=pd.date_range('20180101', periods=3, freq='h'))

863 >>> sm

864 2018-01-01 00:00:00 1.0

865 2018-01-01 01:00:00 NaN

866 2018-01-01 02:00:00 3.0

867 Freq: h, dtype: float64

868

869 >>> sm.resample('30min').fillna('backfill')

870 2018-01-01 00:00:00 1.0

871 2018-01-01 00:30:00 NaN

872 2018-01-01 01:00:00 NaN

873 2018-01-01 01:30:00 3.0

874 2018-01-01 02:00:00 3.0

875 Freq: 30min, dtype: float64

876

877 >>> sm.resample('30min').fillna('pad')

878 2018-01-01 00:00:00 1.0

879 2018-01-01 00:30:00 1.0

880 2018-01-01 01:00:00 NaN

881 2018-01-01 01:30:00 NaN

882 2018-01-01 02:00:00 3.0

883 Freq: 30min, dtype: float64

884

885 >>> sm.resample('30min').fillna('nearest')

886 2018-01-01 00:00:00 1.0

887 2018-01-01 00:30:00 NaN

888 2018-01-01 01:00:00 NaN

889 2018-01-01 01:30:00 3.0

890 2018-01-01 02:00:00 3.0

891 Freq: 30min, dtype: float64

892

893 DataFrame resampling is done column-wise. All the same options are

894 available.

895

896 >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]},

897 ... index=pd.date_range('20180101', periods=3,

898 ... freq='h'))

899 >>> df

900 a b

901 2018-01-01 00:00:00 2.0 1

902 2018-01-01 01:00:00 NaN 3

903 2018-01-01 02:00:00 6.0 5

904

905 >>> df.resample('30min').fillna("bfill")

906 a b

907 2018-01-01 00:00:00 2.0 1

908 2018-01-01 00:30:00 NaN 3

909 2018-01-01 01:00:00 NaN 3

910 2018-01-01 01:30:00 6.0 5

911 2018-01-01 02:00:00 6.0 5

912 """

913 warnings.warn(

914 f"{type(self).__name__}.fillna is deprecated and will be removed "

915 "in a future version. Use obj.ffill(), obj.bfill(), "

916 "or obj.nearest() instead.",

917 FutureWarning,

918 stacklevel=find_stack_level(),

919 )

920 return self._upsample(method, limit=limit)

921

922 @final

923 def interpolate(

924 self,

925 method: InterpolateOptions = "linear",

926 *,

927 axis: Axis = 0,

928 limit: int | None = None,

929 inplace: bool = False,

930 limit_direction: Literal["forward", "backward", "both"] = "forward",

931 limit_area=None,

932 downcast=lib.no_default,

933 **kwargs,

934 ):

935 """

936 Interpolate values between target timestamps according to different methods.

937

938 The original index is first reindexed to target timestamps

939 (see :meth:`core.resample.Resampler.asfreq`),

940 then the interpolation of ``NaN`` values via :meth:`DataFrame.interpolate`

941 happens.

942

943 Parameters

944 ----------

945 method : str, default 'linear'

946 Interpolation technique to use. One of:

947

948 * 'linear': Ignore the index and treat the values as equally

949 spaced. This is the only method supported on MultiIndexes.

950 * 'time': Works on daily and higher resolution data to interpolate

951 given length of interval.

952 * 'index', 'values': use the actual numerical values of the index.

953 * 'pad': Fill in NaNs using existing values.

954 * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',

955 'barycentric', 'polynomial': Passed to

956 `scipy.interpolate.interp1d`, whereas 'spline' is passed to

957 `scipy.interpolate.UnivariateSpline`. These methods use the numerical

958 values of the index. Both 'polynomial' and 'spline' require that

959 you also specify an `order` (int), e.g.

960 ``df.interpolate(method='polynomial', order=5)``. Note that,

961 `slinear` method in Pandas refers to the Scipy first order `spline`

962 instead of Pandas first order `spline`.

963 * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima',

964 'cubicspline': Wrappers around the SciPy interpolation methods of

965 similar names. See `Notes`.

966 * 'from_derivatives': Refers to

967 `scipy.interpolate.BPoly.from_derivatives`.

968

969 axis : {{0 or 'index', 1 or 'columns', None}}, default None

970 Axis to interpolate along. For `Series` this parameter is unused

971 and defaults to 0.

972 limit : int, optional

973 Maximum number of consecutive NaNs to fill. Must be greater than

974 0.

975 inplace : bool, default False

976 Update the data in place if possible.

977 limit_direction : {{'forward', 'backward', 'both'}}, Optional

978 Consecutive NaNs will be filled in this direction.

979

980 If limit is specified:

981 * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'.

982 * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be

983 'backwards'.

984

985 If 'limit' is not specified:

986 * If 'method' is 'backfill' or 'bfill', the default is 'backward'

987 * else the default is 'forward'

988

989 raises ValueError if `limit_direction` is 'forward' or 'both' and

990 method is 'backfill' or 'bfill'.

991 raises ValueError if `limit_direction` is 'backward' or 'both' and

992 method is 'pad' or 'ffill'.

993

994 limit_area : {{`None`, 'inside', 'outside'}}, default None

995 If limit is specified, consecutive NaNs will be filled with this

996 restriction.

997

998 * ``None``: No fill restriction.

999 * 'inside': Only fill NaNs surrounded by valid values

1000 (interpolate).

1001 * 'outside': Only fill NaNs outside valid values (extrapolate).

1002

1003 downcast : optional, 'infer' or None, defaults to None

1004 Downcast dtypes if possible.

1005

1006 .. deprecated:: 2.1.0

1007

1008 ``**kwargs`` : optional

1009 Keyword arguments to pass on to the interpolating function.

1010

1011 Returns

1012 -------

1013 DataFrame or Series

1014 Interpolated values at the specified freq.

1015

1016 See Also

1017 --------

1018 core.resample.Resampler.asfreq: Return the values at the new freq,

1019 essentially a reindex.

1020 DataFrame.interpolate: Fill NaN values using an interpolation method.

1021

1022 Notes

1023 -----

1024 For high-frequent or non-equidistant time-series with timestamps

1025 the reindexing followed by interpolation may lead to information loss

1026 as shown in the last example.

1027

1028 Examples

1029 --------

1030

1031 >>> start = "2023-03-01T07:00:00"

1032 >>> timesteps = pd.date_range(start, periods=5, freq="s")

1033 >>> series = pd.Series(data=[1, -1, 2, 1, 3], index=timesteps)

1034 >>> series

1035 2023-03-01 07:00:00 1

1036 2023-03-01 07:00:01 -1

1037 2023-03-01 07:00:02 2

1038 2023-03-01 07:00:03 1

1039 2023-03-01 07:00:04 3

1040 Freq: s, dtype: int64

1041

1042 Upsample the dataframe to 0.5Hz by providing the period time of 2s.

1043

1044 >>> series.resample("2s").interpolate("linear")

1045 2023-03-01 07:00:00 1

1046 2023-03-01 07:00:02 2

1047 2023-03-01 07:00:04 3

1048 Freq: 2s, dtype: int64

1049

1050 Downsample the dataframe to 2Hz by providing the period time of 500ms.

1051

1052 >>> series.resample("500ms").interpolate("linear")

1053 2023-03-01 07:00:00.000 1.0

1054 2023-03-01 07:00:00.500 0.0

1055 2023-03-01 07:00:01.000 -1.0

1056 2023-03-01 07:00:01.500 0.5

1057 2023-03-01 07:00:02.000 2.0

1058 2023-03-01 07:00:02.500 1.5

1059 2023-03-01 07:00:03.000 1.0

1060 2023-03-01 07:00:03.500 2.0

1061 2023-03-01 07:00:04.000 3.0

1062 Freq: 500ms, dtype: float64

1063

1064 Internal reindexing with ``asfreq()`` prior to interpolation leads to

1065 an interpolated timeseries on the basis the reindexed timestamps (anchors).

1066 Since not all datapoints from original series become anchors,

1067 it can lead to misleading interpolation results as in the following example:

1068

1069 >>> series.resample("400ms").interpolate("linear")

1070 2023-03-01 07:00:00.000 1.0

1071 2023-03-01 07:00:00.400 1.2

1072 2023-03-01 07:00:00.800 1.4

1073 2023-03-01 07:00:01.200 1.6

1074 2023-03-01 07:00:01.600 1.8

1075 2023-03-01 07:00:02.000 2.0

1076 2023-03-01 07:00:02.400 2.2

1077 2023-03-01 07:00:02.800 2.4

1078 2023-03-01 07:00:03.200 2.6

1079 2023-03-01 07:00:03.600 2.8

1080 2023-03-01 07:00:04.000 3.0

1081 Freq: 400ms, dtype: float64

1082

1083 Note that the series erroneously increases between two anchors

1084 ``07:00:00`` and ``07:00:02``.

1085 """

1086 assert downcast is lib.no_default # just checking coverage

1087 result = self._upsample("asfreq")

1088 return result.interpolate(

1089 method=method,

1090 axis=axis,

1091 limit=limit,

1092 inplace=inplace,

1093 limit_direction=limit_direction,

1094 limit_area=limit_area,

1095 downcast=downcast,

1096 **kwargs,

1097 )

1098

1099 @final

1100 def asfreq(self, fill_value=None):

1101 """

1102 Return the values at the new freq, essentially a reindex.

1103

1104 Parameters

1105 ----------

1106 fill_value : scalar, optional

1107 Value to use for missing values, applied during upsampling (note

1108 this does not fill NaNs that already were present).

1109

1110 Returns

1111 -------

1112 DataFrame or Series

1113 Values at the specified freq.

1114

1115 See Also

1116 --------

1117 Series.asfreq: Convert TimeSeries to specified frequency.

1118 DataFrame.asfreq: Convert TimeSeries to specified frequency.

1119

1120 Examples

1121 --------

1122

1123 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(

1124 ... ['2023-01-01', '2023-01-31', '2023-02-01', '2023-02-28']))

1125 >>> ser

1126 2023-01-01 1

1127 2023-01-31 2

1128 2023-02-01 3

1129 2023-02-28 4

1130 dtype: int64

1131 >>> ser.resample('MS').asfreq()

1132 2023-01-01 1

1133 2023-02-01 3

1134 Freq: MS, dtype: int64

1135 """

1136 return self._upsample("asfreq", fill_value=fill_value)

1137

1138 @final

1139 def sum(

1140 self,

1141 numeric_only: bool = False,

1142 min_count: int = 0,

1143 *args,

1144 **kwargs,

1145 ):

1146 """

1147 Compute sum of group values.

1148

1149 Parameters

1150 ----------

1151 numeric_only : bool, default False

1152 Include only float, int, boolean columns.

1153

1154 .. versionchanged:: 2.0.0

1155

1156 numeric_only no longer accepts ``None``.

1157

1158 min_count : int, default 0

1159 The required number of valid values to perform the operation. If fewer

1160 than ``min_count`` non-NA values are present the result will be NA.

1161

1162 Returns

1163 -------

1164 Series or DataFrame

1165 Computed sum of values within each group.

1166

1167 Examples

1168 --------

1169 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(

1170 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))

1171 >>> ser

1172 2023-01-01 1

1173 2023-01-15 2

1174 2023-02-01 3

1175 2023-02-15 4

1176 dtype: int64

1177 >>> ser.resample('MS').sum()

1178 2023-01-01 3

1179 2023-02-01 7

1180 Freq: MS, dtype: int64

1181 """

1182 maybe_warn_args_and_kwargs(type(self), "sum", args, kwargs)

1183 nv.validate_resampler_func("sum", args, kwargs)

1184 return self._downsample("sum", numeric_only=numeric_only, min_count=min_count)

1185

1186 @final

1187 def prod(

1188 self,

1189 numeric_only: bool = False,

1190 min_count: int = 0,

1191 *args,

1192 **kwargs,

1193 ):

1194 """

1195 Compute prod of group values.

1196

1197 Parameters

1198 ----------

1199 numeric_only : bool, default False

1200 Include only float, int, boolean columns.

1201

1202 .. versionchanged:: 2.0.0

1203

1204 numeric_only no longer accepts ``None``.

1205

1206 min_count : int, default 0

1207 The required number of valid values to perform the operation. If fewer

1208 than ``min_count`` non-NA values are present the result will be NA.

1209

1210 Returns

1211 -------

1212 Series or DataFrame

1213 Computed prod of values within each group.

1214

1215 Examples

1216 --------

1217 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(

1218 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))

1219 >>> ser

1220 2023-01-01 1

1221 2023-01-15 2

1222 2023-02-01 3

1223 2023-02-15 4

1224 dtype: int64

1225 >>> ser.resample('MS').prod()

1226 2023-01-01 2

1227 2023-02-01 12

1228 Freq: MS, dtype: int64

1229 """

1230 maybe_warn_args_and_kwargs(type(self), "prod", args, kwargs)

1231 nv.validate_resampler_func("prod", args, kwargs)

1232 return self._downsample("prod", numeric_only=numeric_only, min_count=min_count)

1233

1234 @final

1235 def min(

1236 self,

1237 numeric_only: bool = False,

1238 min_count: int = 0,

1239 *args,

1240 **kwargs,

1241 ):

1242 """

1243 Compute min value of group.

1244

1245 Returns

1246 -------

1247 Series or DataFrame

1248

1249 Examples

1250 --------

1251 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(

1252 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))

1253 >>> ser

1254 2023-01-01 1

1255 2023-01-15 2

1256 2023-02-01 3

1257 2023-02-15 4

1258 dtype: int64

1259 >>> ser.resample('MS').min()

1260 2023-01-01 1

1261 2023-02-01 3

1262 Freq: MS, dtype: int64

1263 """

1264

1265 maybe_warn_args_and_kwargs(type(self), "min", args, kwargs)

1266 nv.validate_resampler_func("min", args, kwargs)

1267 return self._downsample("min", numeric_only=numeric_only, min_count=min_count)

1268

1269 @final

1270 def max(

1271 self,

1272 numeric_only: bool = False,

1273 min_count: int = 0,

1274 *args,

1275 **kwargs,

1276 ):

1277 """

1278 Compute max value of group.

1279

1280 Returns

1281 -------

1282 Series or DataFrame

1283

1284 Examples

1285 --------

1286 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(

1287 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))

1288 >>> ser

1289 2023-01-01 1

1290 2023-01-15 2

1291 2023-02-01 3

1292 2023-02-15 4

1293 dtype: int64

1294 >>> ser.resample('MS').max()

1295 2023-01-01 2

1296 2023-02-01 4

1297 Freq: MS, dtype: int64

1298 """

1299 maybe_warn_args_and_kwargs(type(self), "max", args, kwargs)

1300 nv.validate_resampler_func("max", args, kwargs)

1301 return self._downsample("max", numeric_only=numeric_only, min_count=min_count)

1302

1303 @final

1304 @doc(GroupBy.first)

1305 def first(

1306 self,

1307 numeric_only: bool = False,

1308 min_count: int = 0,

1309 skipna: bool = True,

1310 *args,

1311 **kwargs,

1312 ):

1313 maybe_warn_args_and_kwargs(type(self), "first", args, kwargs)

1314 nv.validate_resampler_func("first", args, kwargs)

1315 return self._downsample(

1316 "first", numeric_only=numeric_only, min_count=min_count, skipna=skipna

1317 )

1318

1319 @final

1320 @doc(GroupBy.last)

1321 def last(

1322 self,

1323 numeric_only: bool = False,

1324 min_count: int = 0,

1325 skipna: bool = True,

1326 *args,

1327 **kwargs,

1328 ):

1329 maybe_warn_args_and_kwargs(type(self), "last", args, kwargs)

1330 nv.validate_resampler_func("last", args, kwargs)

1331 return self._downsample(

1332 "last", numeric_only=numeric_only, min_count=min_count, skipna=skipna

1333 )

1334

1335 @final

1336 @doc(GroupBy.median)

1337 def median(self, numeric_only: bool = False, *args, **kwargs):

1338 maybe_warn_args_and_kwargs(type(self), "median", args, kwargs)

1339 nv.validate_resampler_func("median", args, kwargs)

1340 return self._downsample("median", numeric_only=numeric_only)

1341

1342 @final

1343 def mean(

1344 self,

1345 numeric_only: bool = False,

1346 *args,

1347 **kwargs,

1348 ):

1349 """

1350 Compute mean of groups, excluding missing values.

1351

1352 Parameters

1353 ----------

1354 numeric_only : bool, default False

1355 Include only `float`, `int` or `boolean` data.

1356

1357 .. versionchanged:: 2.0.0

1358

1359 numeric_only now defaults to ``False``.

1360

1361 Returns

1362 -------

1363 DataFrame or Series

1364 Mean of values within each group.

1365

1366 Examples

1367 --------

1368

1369 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(

1370 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))

1371 >>> ser

1372 2023-01-01 1

1373 2023-01-15 2

1374 2023-02-01 3

1375 2023-02-15 4

1376 dtype: int64

1377 >>> ser.resample('MS').mean()

1378 2023-01-01 1.5

1379 2023-02-01 3.5

1380 Freq: MS, dtype: float64

1381 """

1382 maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs)

1383 nv.validate_resampler_func("mean", args, kwargs)

1384 return self._downsample("mean", numeric_only=numeric_only)

1385

1386 @final

1387 def std(

1388 self,

1389 ddof: int = 1,

1390 numeric_only: bool = False,

1391 *args,

1392 **kwargs,

1393 ):

1394 """

1395 Compute standard deviation of groups, excluding missing values.

1396

1397 Parameters

1398 ----------

1399 ddof : int, default 1

1400 Degrees of freedom.

1401 numeric_only : bool, default False

1402 Include only `float`, `int` or `boolean` data.

1403

1404 .. versionadded:: 1.5.0

1405

1406 .. versionchanged:: 2.0.0

1407

1408 numeric_only now defaults to ``False``.

1409

1410 Returns

1411 -------

1412 DataFrame or Series

1413 Standard deviation of values within each group.

1414

1415 Examples

1416 --------

1417

1418 >>> ser = pd.Series([1, 3, 2, 4, 3, 8],

1419 ... index=pd.DatetimeIndex(['2023-01-01',

1420 ... '2023-01-10',

1421 ... '2023-01-15',

1422 ... '2023-02-01',

1423 ... '2023-02-10',

1424 ... '2023-02-15']))

1425 >>> ser.resample('MS').std()

1426 2023-01-01 1.000000

1427 2023-02-01 2.645751

1428 Freq: MS, dtype: float64

1429 """

1430 maybe_warn_args_and_kwargs(type(self), "std", args, kwargs)

1431 nv.validate_resampler_func("std", args, kwargs)

1432 return self._downsample("std", ddof=ddof, numeric_only=numeric_only)

1433

1434 @final

1435 def var(

1436 self,

1437 ddof: int = 1,

1438 numeric_only: bool = False,

1439 *args,

1440 **kwargs,

1441 ):

1442 """

1443 Compute variance of groups, excluding missing values.

1444

1445 Parameters

1446 ----------

1447 ddof : int, default 1

1448 Degrees of freedom.

1449

1450 numeric_only : bool, default False

1451 Include only `float`, `int` or `boolean` data.

1452

1453 .. versionadded:: 1.5.0

1454

1455 .. versionchanged:: 2.0.0

1456

1457 numeric_only now defaults to ``False``.

1458

1459 Returns

1460 -------

1461 DataFrame or Series

1462 Variance of values within each group.

1463

1464 Examples

1465 --------

1466

1467 >>> ser = pd.Series([1, 3, 2, 4, 3, 8],

1468 ... index=pd.DatetimeIndex(['2023-01-01',

1469 ... '2023-01-10',

1470 ... '2023-01-15',

1471 ... '2023-02-01',

1472 ... '2023-02-10',

1473 ... '2023-02-15']))

1474 >>> ser.resample('MS').var()

1475 2023-01-01 1.0

1476 2023-02-01 7.0

1477 Freq: MS, dtype: float64

1478

1479 >>> ser.resample('MS').var(ddof=0)

1480 2023-01-01 0.666667

1481 2023-02-01 4.666667

1482 Freq: MS, dtype: float64

1483 """

1484 maybe_warn_args_and_kwargs(type(self), "var", args, kwargs)

1485 nv.validate_resampler_func("var", args, kwargs)

1486 return self._downsample("var", ddof=ddof, numeric_only=numeric_only)

1487

1488 @final

1489 @doc(GroupBy.sem)

1490 def sem(

1491 self,

1492 ddof: int = 1,

1493 numeric_only: bool = False,

1494 *args,

1495 **kwargs,

1496 ):

1497 maybe_warn_args_and_kwargs(type(self), "sem", args, kwargs)

1498 nv.validate_resampler_func("sem", args, kwargs)

1499 return self._downsample("sem", ddof=ddof, numeric_only=numeric_only)

1500

1501 @final

1502 @doc(GroupBy.ohlc)

1503 def ohlc(

1504 self,

1505 *args,

1506 **kwargs,

1507 ):

1508 maybe_warn_args_and_kwargs(type(self), "ohlc", args, kwargs)

1509 nv.validate_resampler_func("ohlc", args, kwargs)

1510

1511 ax = self.ax

1512 obj = self._obj_with_exclusions

1513 if len(ax) == 0:

1514 # GH#42902

1515 obj = obj.copy()

1516 obj.index = _asfreq_compat(obj.index, self.freq)

1517 if obj.ndim == 1:

1518 obj = obj.to_frame()

1519 obj = obj.reindex(["open", "high", "low", "close"], axis=1)

1520 else:

1521 mi = MultiIndex.from_product(

1522 [obj.columns, ["open", "high", "low", "close"]]

1523 )

1524 obj = obj.reindex(mi, axis=1)

1525 return obj

1526

1527 return self._downsample("ohlc")

1528

1529 @final

1530 @doc(SeriesGroupBy.nunique)

1531 def nunique(

1532 self,

1533 *args,

1534 **kwargs,

1535 ):

1536 maybe_warn_args_and_kwargs(type(self), "nunique", args, kwargs)

1537 nv.validate_resampler_func("nunique", args, kwargs)

1538 return self._downsample("nunique")

1539

1540 @final

1541 @doc(GroupBy.size)

1542 def size(self):

1543 result = self._downsample("size")

1544

1545 # If the result is a non-empty DataFrame we stack to get a Series

1546 # GH 46826

1547 if isinstance(result, ABCDataFrame) and not result.empty:

1548 result = result.stack(future_stack=True)

1549

1550 if not len(self.ax):

1551 from pandas import Series

1552

1553 if self._selected_obj.ndim == 1:

1554 name = self._selected_obj.name

1555 else:

1556 name = None

1557 result = Series([], index=result.index, dtype="int64", name=name)

1558 return result

1559

1560 @final

1561 @doc(GroupBy.count)

1562 def count(self):

1563 result = self._downsample("count")

1564 if not len(self.ax):

1565 if self._selected_obj.ndim == 1:

1566 result = type(self._selected_obj)(

1567 [], index=result.index, dtype="int64", name=self._selected_obj.name

1568 )

1569 else:

1570 from pandas import DataFrame

1571

1572 result = DataFrame(

1573 [], index=result.index, columns=result.columns, dtype="int64"

1574 )

1575

1576 return result

1577

1578 @final

1579 def quantile(self, q: float | list[float] | AnyArrayLike = 0.5, **kwargs):

1580 """

1581 Return value at the given quantile.

1582

1583 Parameters

1584 ----------

1585 q : float or array-like, default 0.5 (50% quantile)

1586

1587 Returns

1588 -------

1589 DataFrame or Series

1590 Quantile of values within each group.

1591

1592 See Also

1593 --------

1594 Series.quantile

1595 Return a series, where the index is q and the values are the quantiles.

1596 DataFrame.quantile

1597 Return a DataFrame, where the columns are the columns of self,

1598 and the values are the quantiles.

1599 DataFrameGroupBy.quantile

1600 Return a DataFrame, where the columns are groupby columns,

1601 and the values are its quantiles.

1602

1603 Examples

1604 --------

1605

1606 >>> ser = pd.Series([1, 3, 2, 4, 3, 8],

1607 ... index=pd.DatetimeIndex(['2023-01-01',

1608 ... '2023-01-10',

1609 ... '2023-01-15',

1610 ... '2023-02-01',

1611 ... '2023-02-10',

1612 ... '2023-02-15']))

1613 >>> ser.resample('MS').quantile()

1614 2023-01-01 2.0

1615 2023-02-01 4.0

1616 Freq: MS, dtype: float64

1617

1618 >>> ser.resample('MS').quantile(.25)

1619 2023-01-01 1.5

1620 2023-02-01 3.5

1621 Freq: MS, dtype: float64

1622 """

1623 return self._downsample("quantile", q=q, **kwargs)

1624

1625

1626class _GroupByMixin(PandasObject, SelectionMixin):

1627 """

1628 Provide the groupby facilities.

1629 """

1630

1631 _attributes: list[str] # in practice the same as Resampler._attributes

1632 _selection: IndexLabel | None = None

1633 _groupby: GroupBy

1634 _timegrouper: TimeGrouper

1635

1636 def __init__(

1637 self,

1638 *,

1639 parent: Resampler,

1640 groupby: GroupBy,

1641 key=None,

1642 selection: IndexLabel | None = None,

1643 include_groups: bool = False,

1644 ) -> None:

1645 # reached via ._gotitem and _get_resampler_for_grouping

1646

1647 assert isinstance(groupby, GroupBy), type(groupby)

1648

1649 # parent is always a Resampler, sometimes a _GroupByMixin

1650 assert isinstance(parent, Resampler), type(parent)

1651

1652 # initialize our GroupByMixin object with

1653 # the resampler attributes

1654 for attr in self._attributes:

1655 setattr(self, attr, getattr(parent, attr))

1656 self._selection = selection

1657

1658 self.binner = parent.binner

1659 self.key = key

1660

1661 self._groupby = groupby

1662 self._timegrouper = copy.copy(parent._timegrouper)

1663

1664 self.ax = parent.ax

1665 self.obj = parent.obj

1666 self.include_groups = include_groups

1667

1668 @no_type_check

1669 def _apply(self, f, *args, **kwargs):

1670 """

1671 Dispatch to _upsample; we are stripping all of the _upsample kwargs and

1672 performing the original function call on the grouped object.

1673 """

1674

1675 def func(x):

1676 x = self._resampler_cls(x, timegrouper=self._timegrouper, gpr_index=self.ax)

1677

1678 if isinstance(f, str):

1679 return getattr(x, f)(**kwargs)

1680

1681 return x.apply(f, *args, **kwargs)

1682

1683 result = _apply(self._groupby, func, include_groups=self.include_groups)

1684 return self._wrap_result(result)

1685

1686 _upsample = _apply

1687 _downsample = _apply

1688 _groupby_and_aggregate = _apply

1689

1690 @final

1691 def _gotitem(self, key, ndim, subset=None):

1692 """

1693 Sub-classes to define. Return a sliced object.

1694

1695 Parameters

1696 ----------

1697 key : string / list of selections

1698 ndim : {1, 2}

1699 requested ndim of result

1700 subset : object, default None

1701 subset to act on

1702 """

1703 # create a new object to prevent aliasing

1704 if subset is None:

1705 subset = self.obj

1706 if key is not None:

1707 subset = subset[key]

1708 else:

1709 # reached via Apply.agg_dict_like with selection=None, ndim=1

1710 assert subset.ndim == 1

1711

1712 # Try to select from a DataFrame, falling back to a Series

1713 try:

1714 if isinstance(key, list) and self.key not in key and self.key is not None:

1715 key.append(self.key)

1716 groupby = self._groupby[key]

1717 except IndexError:

1718 groupby = self._groupby

1719

1720 selection = self._infer_selection(key, subset)

1721

1722 new_rs = type(self)(

1723 groupby=groupby,

1724 parent=cast(Resampler, self),

1725 selection=selection,

1726 )

1727 return new_rs

1728

1729

1730class DatetimeIndexResampler(Resampler):

1731 ax: DatetimeIndex

1732

1733 @property

1734 def _resampler_for_grouping(self):

1735 return DatetimeIndexResamplerGroupby

1736

1737 def _get_binner_for_time(self):

1738 # this is how we are actually creating the bins

1739 if self.kind == "period":

1740 return self._timegrouper._get_time_period_bins(self.ax)

1741 return self._timegrouper._get_time_bins(self.ax)

1742

1743 def _downsample(self, how, **kwargs):

1744 """

1745 Downsample the cython defined function.

1746

1747 Parameters

1748 ----------

1749 how : string / cython mapped function

1750 **kwargs : kw args passed to how function

1751 """

1752 orig_how = how

1753 how = com.get_cython_func(how) or how

1754 if orig_how != how:

1755 warn_alias_replacement(self, orig_how, how)

1756 ax = self.ax

1757

1758 # Excludes `on` column when provided

1759 obj = self._obj_with_exclusions

1760

1761 if not len(ax):

1762 # reset to the new freq

1763 obj = obj.copy()

1764 obj.index = obj.index._with_freq(self.freq)

1765 assert obj.index.freq == self.freq, (obj.index.freq, self.freq)

1766 return obj

1767

1768 # do we have a regular frequency

1769

1770 # error: Item "None" of "Optional[Any]" has no attribute "binlabels"

1771 if (

1772 (ax.freq is not None or ax.inferred_freq is not None)

1773 and len(self._grouper.binlabels) > len(ax)

1774 and how is None

1775 ):

1776 # let's do an asfreq

1777 return self.asfreq()

1778

1779 # we are downsampling

1780 # we want to call the actual grouper method here

1781 if self.axis == 0:

1782 result = obj.groupby(self._grouper).aggregate(how, **kwargs)

1783 else:

1784 # test_resample_axis1

1785 result = obj.T.groupby(self._grouper).aggregate(how, **kwargs).T

1786

1787 return self._wrap_result(result)

1788

1789 def _adjust_binner_for_upsample(self, binner):

1790 """

1791 Adjust our binner when upsampling.

1792

1793 The range of a new index should not be outside specified range

1794 """

1795 if self.closed == "right":

1796 binner = binner[1:]

1797 else:

1798 binner = binner[:-1]

1799 return binner

1800

1801 def _upsample(self, method, limit: int | None = None, fill_value=None):

1802 """

1803 Parameters

1804 ----------

1805 method : string {'backfill', 'bfill', 'pad',

1806 'ffill', 'asfreq'} method for upsampling

1807 limit : int, default None

1808 Maximum size gap to fill when reindexing

1809 fill_value : scalar, default None

1810 Value to use for missing values

1811

1812 See Also

1813 --------

1814 .fillna: Fill NA/NaN values using the specified method.

1815

1816 """

1817 if self.axis:

1818 raise AssertionError("axis must be 0")

1819 if self._from_selection:

1820 raise ValueError(

1821 "Upsampling from level= or on= selection "

1822 "is not supported, use .set_index(...) "

1823 "to explicitly set index to datetime-like"

1824 )

1825

1826 ax = self.ax

1827 obj = self._selected_obj

1828 binner = self.binner

1829 res_index = self._adjust_binner_for_upsample(binner)

1830

1831 # if we have the same frequency as our axis, then we are equal sampling

1832 if (

1833 limit is None

1834 and to_offset(ax.inferred_freq) == self.freq

1835 and len(obj) == len(res_index)

1836 ):

1837 result = obj.copy()

1838 result.index = res_index

1839 else:

1840 if method == "asfreq":

1841 method = None

1842 result = obj.reindex(

1843 res_index, method=method, limit=limit, fill_value=fill_value

1844 )

1845

1846 return self._wrap_result(result)

1847

1848 def _wrap_result(self, result):

1849 result = super()._wrap_result(result)

1850

1851 # we may have a different kind that we were asked originally

1852 # convert if needed

1853 if self.kind == "period" and not isinstance(result.index, PeriodIndex):

1854 if isinstance(result.index, MultiIndex):

1855 # GH 24103 - e.g. groupby resample

1856 if not isinstance(result.index.levels[-1], PeriodIndex):

1857 new_level = result.index.levels[-1].to_period(self.freq)

1858 result.index = result.index.set_levels(new_level, level=-1)

1859 else:

1860 result.index = result.index.to_period(self.freq)

1861 return result

1862

1863

1864# error: Definition of "ax" in base class "_GroupByMixin" is incompatible

1865# with definition in base class "DatetimeIndexResampler"

1866class DatetimeIndexResamplerGroupby( # type: ignore[misc]

1867 _GroupByMixin, DatetimeIndexResampler

1868):

1869 """

1870 Provides a resample of a groupby implementation

1871 """

1872

1873 @property

1874 def _resampler_cls(self):

1875 return DatetimeIndexResampler

1876

1877

1878class PeriodIndexResampler(DatetimeIndexResampler):

1879 # error: Incompatible types in assignment (expression has type "PeriodIndex", base

1880 # class "DatetimeIndexResampler" defined the type as "DatetimeIndex")

1881 ax: PeriodIndex # type: ignore[assignment]

1882

1883 @property

1884 def _resampler_for_grouping(self):

1885 warnings.warn(

1886 "Resampling a groupby with a PeriodIndex is deprecated. "

1887 "Cast to DatetimeIndex before resampling instead.",

1888 FutureWarning,

1889 stacklevel=find_stack_level(),

1890 )

1891 return PeriodIndexResamplerGroupby

1892

1893 def _get_binner_for_time(self):

1894 if self.kind == "timestamp":

1895 return super()._get_binner_for_time()

1896 return self._timegrouper._get_period_bins(self.ax)

1897

1898 def _convert_obj(self, obj: NDFrameT) -> NDFrameT:

1899 obj = super()._convert_obj(obj)

1900

1901 if self._from_selection:

1902 # see GH 14008, GH 12871

1903 msg = (

1904 "Resampling from level= or on= selection "

1905 "with a PeriodIndex is not currently supported, "

1906 "use .set_index(...) to explicitly set index"

1907 )

1908 raise NotImplementedError(msg)

1909

1910 # convert to timestamp

1911 if self.kind == "timestamp":

1912 obj = obj.to_timestamp(how=self.convention)

1913

1914 return obj

1915

1916 def _downsample(self, how, **kwargs):

1917 """

1918 Downsample the cython defined function.

1919

1920 Parameters

1921 ----------

1922 how : string / cython mapped function

1923 **kwargs : kw args passed to how function

1924 """

1925 # we may need to actually resample as if we are timestamps

1926 if self.kind == "timestamp":

1927 return super()._downsample(how, **kwargs)

1928

1929 orig_how = how

1930 how = com.get_cython_func(how) or how

1931 if orig_how != how:

1932 warn_alias_replacement(self, orig_how, how)

1933 ax = self.ax

1934

1935 if is_subperiod(ax.freq, self.freq):

1936 # Downsampling

1937 return self._groupby_and_aggregate(how, **kwargs)

1938 elif is_superperiod(ax.freq, self.freq):

1939 if how == "ohlc":

1940 # GH #13083

1941 # upsampling to subperiods is handled as an asfreq, which works

1942 # for pure aggregating/reducing methods

1943 # OHLC reduces along the time dimension, but creates multiple

1944 # values for each period -> handle by _groupby_and_aggregate()

1945 return self._groupby_and_aggregate(how)

1946 return self.asfreq()

1947 elif ax.freq == self.freq:

1948 return self.asfreq()

1949

1950 raise IncompatibleFrequency(

1951 f"Frequency {ax.freq} cannot be resampled to {self.freq}, "

1952 "as they are not sub or super periods"

1953 )

1954

1955 def _upsample(self, method, limit: int | None = None, fill_value=None):

1956 """

1957 Parameters

1958 ----------

1959 method : {'backfill', 'bfill', 'pad', 'ffill'}

1960 Method for upsampling.

1961 limit : int, default None

1962 Maximum size gap to fill when reindexing.

1963 fill_value : scalar, default None

1964 Value to use for missing values.

1965

1966 See Also

1967 --------

1968 .fillna: Fill NA/NaN values using the specified method.

1969

1970 """

1971 # we may need to actually resample as if we are timestamps

1972 if self.kind == "timestamp":

1973 return super()._upsample(method, limit=limit, fill_value=fill_value)

1974

1975 ax = self.ax

1976 obj = self.obj

1977 new_index = self.binner

1978

1979 # Start vs. end of period

1980 memb = ax.asfreq(self.freq, how=self.convention)

1981

1982 # Get the fill indexer

1983 if method == "asfreq":

1984 method = None

1985 indexer = memb.get_indexer(new_index, method=method, limit=limit)

1986 new_obj = _take_new_index(

1987 obj,

1988 indexer,

1989 new_index,

1990 axis=self.axis,

1991 )

1992 return self._wrap_result(new_obj)

1993

1994

1995# error: Definition of "ax" in base class "_GroupByMixin" is incompatible with

1996# definition in base class "PeriodIndexResampler"

1997class PeriodIndexResamplerGroupby( # type: ignore[misc]

1998 _GroupByMixin, PeriodIndexResampler

1999):

2000 """

2001 Provides a resample of a groupby implementation.

2002 """

2003

2004 @property

2005 def _resampler_cls(self):

2006 return PeriodIndexResampler

2007

2008

2009class TimedeltaIndexResampler(DatetimeIndexResampler):

2010 # error: Incompatible types in assignment (expression has type "TimedeltaIndex",

2011 # base class "DatetimeIndexResampler" defined the type as "DatetimeIndex")

2012 ax: TimedeltaIndex # type: ignore[assignment]

2013

2014 @property

2015 def _resampler_for_grouping(self):

2016 return TimedeltaIndexResamplerGroupby

2017

2018 def _get_binner_for_time(self):

2019 return self._timegrouper._get_time_delta_bins(self.ax)

2020

2021 def _adjust_binner_for_upsample(self, binner):

2022 """

2023 Adjust our binner when upsampling.

2024

2025 The range of a new index is allowed to be greater than original range

2026 so we don't need to change the length of a binner, GH 13022

2027 """

2028 return binner

2029

2030

2031# error: Definition of "ax" in base class "_GroupByMixin" is incompatible with

2032# definition in base class "DatetimeIndexResampler"

2033class TimedeltaIndexResamplerGroupby( # type: ignore[misc]

2034 _GroupByMixin, TimedeltaIndexResampler

2035):

2036 """

2037 Provides a resample of a groupby implementation.

2038 """

2039

2040 @property

2041 def _resampler_cls(self):

2042 return TimedeltaIndexResampler

2043

2044

2045def get_resampler(obj: Series | DataFrame, kind=None, **kwds) -> Resampler:

2046 """

2047 Create a TimeGrouper and return our resampler.

2048 """

2049 tg = TimeGrouper(obj, **kwds) # type: ignore[arg-type]

2050 return tg._get_resampler(obj, kind=kind)

2051

2052

2053get_resampler.__doc__ = Resampler.__doc__

2054

2055

2056def get_resampler_for_grouping(

2057 groupby: GroupBy,

2058 rule,

2059 how=None,

2060 fill_method=None,

2061 limit: int | None = None,

2062 kind=None,

2063 on=None,

2064 include_groups: bool = True,

2065 **kwargs,

2066) -> Resampler:

2067 """

2068 Return our appropriate resampler when grouping as well.

2069 """

2070 # .resample uses 'on' similar to how .groupby uses 'key'

2071 tg = TimeGrouper(freq=rule, key=on, **kwargs)

2072 resampler = tg._get_resampler(groupby.obj, kind=kind)

2073 return resampler._get_resampler_for_grouping(

2074 groupby=groupby, include_groups=include_groups, key=tg.key

2075 )

2076

2077

2078class TimeGrouper(Grouper):

2079 """

2080 Custom groupby class for time-interval grouping.

2081

2082 Parameters

2083 ----------

2084 freq : pandas date offset or offset alias for identifying bin edges

2085 closed : closed end of interval; 'left' or 'right'

2086 label : interval boundary to use for labeling; 'left' or 'right'

2087 convention : {'start', 'end', 'e', 's'}

2088 If axis is PeriodIndex

2089 """

2090

2091 _attributes = Grouper._attributes + (

2092 "closed",

2093 "label",

2094 "how",

2095 "kind",

2096 "convention",

2097 "origin",

2098 "offset",

2099 )

2100

2101 origin: TimeGrouperOrigin

2102

2103 def __init__(

2104 self,

2105 obj: Grouper | None = None,

2106 freq: Frequency = "Min",

2107 key: str | None = None,

2108 closed: Literal["left", "right"] | None = None,

2109 label: Literal["left", "right"] | None = None,

2110 how: str = "mean",

2111 axis: Axis = 0,

2112 fill_method=None,

2113 limit: int | None = None,

2114 kind: str | None = None,

2115 convention: Literal["start", "end", "e", "s"] | None = None,

2116 origin: Literal["epoch", "start", "start_day", "end", "end_day"]

2117 | TimestampConvertibleTypes = "start_day",

2118 offset: TimedeltaConvertibleTypes | None = None,

2119 group_keys: bool = False,

2120 **kwargs,

2121 ) -> None:

2122 # Check for correctness of the keyword arguments which would

2123 # otherwise silently use the default if misspelled

2124 if label not in {None, "left", "right"}:

2125 raise ValueError(f"Unsupported value {label} for `label`")

2126 if closed not in {None, "left", "right"}:

2127 raise ValueError(f"Unsupported value {closed} for `closed`")

2128 if convention not in {None, "start", "end", "e", "s"}:

2129 raise ValueError(f"Unsupported value {convention} for `convention`")

2130

2131 if (

2132 key is None

2133 and obj is not None

2134 and isinstance(obj.index, PeriodIndex) # type: ignore[attr-defined]

2135 or (

2136 key is not None

2137 and obj is not None

2138 and getattr(obj[key], "dtype", None) == "period" # type: ignore[index]

2139 )

2140 ):

2141 freq = to_offset(freq, is_period=True)

2142 else:

2143 freq = to_offset(freq)

2144

2145 end_types = {"ME", "YE", "QE", "BME", "BYE", "BQE", "W"}

2146 rule = freq.rule_code

2147 if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types):

2148 if closed is None:

2149 closed = "right"

2150 if label is None:

2151 label = "right"

2152 else:

2153 # The backward resample sets ``closed`` to ``'right'`` by default

2154 # since the last value should be considered as the edge point for

2155 # the last bin. When origin in "end" or "end_day", the value for a

2156 # specific ``Timestamp`` index stands for the resample result from

2157 # the current ``Timestamp`` minus ``freq`` to the current

2158 # ``Timestamp`` with a right close.

2159 if origin in ["end", "end_day"]:

2160 if closed is None:

2161 closed = "right"

2162 if label is None:

2163 label = "right"

2164 else:

2165 if closed is None:

2166 closed = "left"

2167 if label is None:

2168 label = "left"

2169

2170 self.closed = closed

2171 self.label = label

2172 self.kind = kind

2173 self.convention = convention if convention is not None else "e"

2174 self.how = how

2175 self.fill_method = fill_method

2176 self.limit = limit

2177 self.group_keys = group_keys

2178 self._arrow_dtype: ArrowDtype | None = None

2179

2180 if origin in ("epoch", "start", "start_day", "end", "end_day"):

2181 # error: Incompatible types in assignment (expression has type "Union[Union[

2182 # Timestamp, datetime, datetime64, signedinteger[_64Bit], float, str],

2183 # Literal['epoch', 'start', 'start_day', 'end', 'end_day']]", variable has

2184 # type "Union[Timestamp, Literal['epoch', 'start', 'start_day', 'end',

2185 # 'end_day']]")

2186 self.origin = origin # type: ignore[assignment]

2187 else:

2188 try:

2189 self.origin = Timestamp(origin)

2190 except (ValueError, TypeError) as err:

2191 raise ValueError(

2192 "'origin' should be equal to 'epoch', 'start', 'start_day', "

2193 "'end', 'end_day' or "

2194 f"should be a Timestamp convertible type. Got '{origin}' instead."

2195 ) from err

2196

2197 try:

2198 self.offset = Timedelta(offset) if offset is not None else None

2199 except (ValueError, TypeError) as err:

2200 raise ValueError(

2201 "'offset' should be a Timedelta convertible type. "

2202 f"Got '{offset}' instead."

2203 ) from err

2204

2205 # always sort time groupers

2206 kwargs["sort"] = True

2207

2208 super().__init__(freq=freq, key=key, axis=axis, **kwargs)

2209

2210 def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler:

2211 """

2212 Return my resampler or raise if we have an invalid axis.

2213

2214 Parameters

2215 ----------

2216 obj : Series or DataFrame

2217 kind : string, optional

2218 'period','timestamp','timedelta' are valid

2219

2220 Returns

2221 -------

2222 Resampler

2223

2224 Raises

2225 ------

2226 TypeError if incompatible axis

2227

2228 """

2229 _, ax, _ = self._set_grouper(obj, gpr_index=None)

2230 if isinstance(ax, DatetimeIndex):

2231 return DatetimeIndexResampler(

2232 obj,

2233 timegrouper=self,

2234 kind=kind,

2235 axis=self.axis,

2236 group_keys=self.group_keys,

2237 gpr_index=ax,

2238 )

2239 elif isinstance(ax, PeriodIndex) or kind == "period":

2240 if isinstance(ax, PeriodIndex):

2241 # GH#53481

2242 warnings.warn(

2243 "Resampling with a PeriodIndex is deprecated. "

2244 "Cast index to DatetimeIndex before resampling instead.",

2245 FutureWarning,

2246 stacklevel=find_stack_level(),

2247 )

2248 else:

2249 warnings.warn(

2250 "Resampling with kind='period' is deprecated. "

2251 "Use datetime paths instead.",

2252 FutureWarning,

2253 stacklevel=find_stack_level(),

2254 )

2255 return PeriodIndexResampler(

2256 obj,

2257 timegrouper=self,

2258 kind=kind,

2259 axis=self.axis,

2260 group_keys=self.group_keys,

2261 gpr_index=ax,

2262 )

2263 elif isinstance(ax, TimedeltaIndex):

2264 return TimedeltaIndexResampler(

2265 obj,

2266 timegrouper=self,

2267 axis=self.axis,

2268 group_keys=self.group_keys,

2269 gpr_index=ax,

2270 )

2271

2272 raise TypeError(

2273 "Only valid with DatetimeIndex, "

2274 "TimedeltaIndex or PeriodIndex, "

2275 f"but got an instance of '{type(ax).__name__}'"

2276 )

2277

2278 def _get_grouper(

2279 self, obj: NDFrameT, validate: bool = True

2280 ) -> tuple[BinGrouper, NDFrameT]:

2281 # create the resampler and return our binner

2282 r = self._get_resampler(obj)

2283 return r._grouper, cast(NDFrameT, r.obj)

2284

2285 def _get_time_bins(self, ax: DatetimeIndex):

2286 if not isinstance(ax, DatetimeIndex):

2287 raise TypeError(

2288 "axis must be a DatetimeIndex, but got "

2289 f"an instance of {type(ax).__name__}"

2290 )

2291

2292 if len(ax) == 0:

2293 binner = labels = DatetimeIndex(

2294 data=[], freq=self.freq, name=ax.name, dtype=ax.dtype

2295 )

2296 return binner, [], labels

2297

2298 first, last = _get_timestamp_range_edges(

2299 ax.min(),

2300 ax.max(),

2301 self.freq,

2302 unit=ax.unit,

2303 closed=self.closed,

2304 origin=self.origin,

2305 offset=self.offset,

2306 )

2307 # GH #12037

2308 # use first/last directly instead of call replace() on them

2309 # because replace() will swallow the nanosecond part

2310 # thus last bin maybe slightly before the end if the end contains

2311 # nanosecond part and lead to `Values falls after last bin` error

2312 # GH 25758: If DST lands at midnight (e.g. 'America/Havana'), user feedback

2313 # has noted that ambiguous=True provides the most sensible result

2314 binner = labels = date_range(

2315 freq=self.freq,

2316 start=first,

2317 end=last,

2318 tz=ax.tz,

2319 name=ax.name,

2320 ambiguous=True,

2321 nonexistent="shift_forward",

2322 unit=ax.unit,

2323 )

2324

2325 ax_values = ax.asi8

2326 binner, bin_edges = self._adjust_bin_edges(binner, ax_values)

2327

2328 # general version, knowing nothing about relative frequencies

2329 bins = lib.generate_bins_dt64(

2330 ax_values, bin_edges, self.closed, hasnans=ax.hasnans

2331 )

2332

2333 if self.closed == "right":

2334 labels = binner

2335 if self.label == "right":

2336 labels = labels[1:]

2337 elif self.label == "right":

2338 labels = labels[1:]

2339

2340 if ax.hasnans:

2341 binner = binner.insert(0, NaT)

2342 labels = labels.insert(0, NaT)

2343

2344 # if we end up with more labels than bins

2345 # adjust the labels

2346 # GH4076

2347 if len(bins) < len(labels):

2348 labels = labels[: len(bins)]

2349

2350 return binner, bins, labels

2351

2352 def _adjust_bin_edges(

2353 self, binner: DatetimeIndex, ax_values: npt.NDArray[np.int64]

2354 ) -> tuple[DatetimeIndex, npt.NDArray[np.int64]]:

2355 # Some hacks for > daily data, see #1471, #1458, #1483

2356

2357 if self.freq.name in ("BME", "ME", "W") or self.freq.name.split("-")[0] in (

2358 "BQE",

2359 "BYE",

2360 "QE",

2361 "YE",

2362 "W",

2363 ):

2364 # If the right end-point is on the last day of the month, roll forwards

2365 # until the last moment of that day. Note that we only do this for offsets

2366 # which correspond to the end of a super-daily period - "month start", for

2367 # example, is excluded.

2368 if self.closed == "right":

2369 # GH 21459, GH 9119: Adjust the bins relative to the wall time

2370 edges_dti = binner.tz_localize(None)

2371 edges_dti = (

2372 edges_dti

2373 + Timedelta(days=1, unit=edges_dti.unit).as_unit(edges_dti.unit)

2374 - Timedelta(1, unit=edges_dti.unit).as_unit(edges_dti.unit)

2375 )

2376 bin_edges = edges_dti.tz_localize(binner.tz).asi8

2377 else:

2378 bin_edges = binner.asi8

2379

2380 # intraday values on last day

2381 if bin_edges[-2] > ax_values.max():

2382 bin_edges = bin_edges[:-1]

2383 binner = binner[:-1]

2384 else:

2385 bin_edges = binner.asi8

2386 return binner, bin_edges

2387

2388 def _get_time_delta_bins(self, ax: TimedeltaIndex):

2389 if not isinstance(ax, TimedeltaIndex):

2390 raise TypeError(

2391 "axis must be a TimedeltaIndex, but got "

2392 f"an instance of {type(ax).__name__}"

2393 )

2394

2395 if not isinstance(self.freq, Tick):

2396 # GH#51896

2397 raise ValueError(

2398 "Resampling on a TimedeltaIndex requires fixed-duration `freq`, "

2399 f"e.g. '24h' or '3D', not {self.freq}"

2400 )

2401

2402 if not len(ax):

2403 binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name)

2404 return binner, [], labels

2405

2406 start, end = ax.min(), ax.max()

2407

2408 if self.closed == "right":

2409 end += self.freq

2410

2411 labels = binner = timedelta_range(

2412 start=start, end=end, freq=self.freq, name=ax.name

2413 )

2414

2415 end_stamps = labels

2416 if self.closed == "left":

2417 end_stamps += self.freq

2418

2419 bins = ax.searchsorted(end_stamps, side=self.closed)

2420

2421 if self.offset:

2422 # GH 10530 & 31809

2423 labels += self.offset

2424

2425 return binner, bins, labels

2426

2427 def _get_time_period_bins(self, ax: DatetimeIndex):

2428 if not isinstance(ax, DatetimeIndex):

2429 raise TypeError(

2430 "axis must be a DatetimeIndex, but got "

2431 f"an instance of {type(ax).__name__}"

2432 )

2433

2434 freq = self.freq

2435

2436 if len(ax) == 0:

2437 binner = labels = PeriodIndex(

2438 data=[], freq=freq, name=ax.name, dtype=ax.dtype

2439 )

2440 return binner, [], labels

2441

2442 labels = binner = period_range(start=ax[0], end=ax[-1], freq=freq, name=ax.name)

2443

2444 end_stamps = (labels + freq).asfreq(freq, "s").to_timestamp()

2445 if ax.tz:

2446 end_stamps = end_stamps.tz_localize(ax.tz)

2447 bins = ax.searchsorted(end_stamps, side="left")

2448

2449 return binner, bins, labels

2450

2451 def _get_period_bins(self, ax: PeriodIndex):

2452 if not isinstance(ax, PeriodIndex):

2453 raise TypeError(

2454 "axis must be a PeriodIndex, but got "

2455 f"an instance of {type(ax).__name__}"

2456 )

2457

2458 memb = ax.asfreq(self.freq, how=self.convention)

2459

2460 # NaT handling as in pandas._lib.lib.generate_bins_dt64()

2461 nat_count = 0

2462 if memb.hasnans:

2463 # error: Incompatible types in assignment (expression has type

2464 # "bool_", variable has type "int") [assignment]

2465 nat_count = np.sum(memb._isnan) # type: ignore[assignment]

2466 memb = memb[~memb._isnan]

2467

2468 if not len(memb):

2469 # index contains no valid (non-NaT) values

2470 bins = np.array([], dtype=np.int64)

2471 binner = labels = PeriodIndex(data=[], freq=self.freq, name=ax.name)

2472 if len(ax) > 0:

2473 # index is all NaT

2474 binner, bins, labels = _insert_nat_bin(binner, bins, labels, len(ax))

2475 return binner, bins, labels

2476

2477 freq_mult = self.freq.n

2478

2479 start = ax.min().asfreq(self.freq, how=self.convention)

2480 end = ax.max().asfreq(self.freq, how="end")

2481 bin_shift = 0

2482

2483 if isinstance(self.freq, Tick):

2484 # GH 23882 & 31809: get adjusted bin edge labels with 'origin'

2485 # and 'origin' support. This call only makes sense if the freq is a

2486 # Tick since offset and origin are only used in those cases.

2487 # Not doing this check could create an extra empty bin.

2488 p_start, end = _get_period_range_edges(

2489 start,

2490 end,

2491 self.freq,

2492 closed=self.closed,

2493 origin=self.origin,

2494 offset=self.offset,

2495 )

2496

2497 # Get offset for bin edge (not label edge) adjustment

2498 start_offset = Period(start, self.freq) - Period(p_start, self.freq)

2499 # error: Item "Period" of "Union[Period, Any]" has no attribute "n"

2500 bin_shift = start_offset.n % freq_mult # type: ignore[union-attr]

2501 start = p_start

2502

2503 labels = binner = period_range(

2504 start=start, end=end, freq=self.freq, name=ax.name

2505 )

2506

2507 i8 = memb.asi8

2508

2509 # when upsampling to subperiods, we need to generate enough bins

2510 expected_bins_count = len(binner) * freq_mult

2511 i8_extend = expected_bins_count - (i8[-1] - i8[0])

2512 rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult)

2513 rng += freq_mult

2514 # adjust bin edge indexes to account for base

2515 rng -= bin_shift

2516

2517 # Wrap in PeriodArray for PeriodArray.searchsorted

2518 prng = type(memb._data)(rng, dtype=memb.dtype)

2519 bins = memb.searchsorted(prng, side="left")

2520

2521 if nat_count > 0:

2522 binner, bins, labels = _insert_nat_bin(binner, bins, labels, nat_count)

2523

2524 return binner, bins, labels

2525

2526 def _set_grouper(

2527 self, obj: NDFrameT, sort: bool = False, *, gpr_index: Index | None = None

2528 ) -> tuple[NDFrameT, Index, npt.NDArray[np.intp] | None]:

2529 obj, ax, indexer = super()._set_grouper(obj, sort, gpr_index=gpr_index)

2530 if isinstance(ax.dtype, ArrowDtype) and ax.dtype.kind in "Mm":

2531 self._arrow_dtype = ax.dtype

2532 ax = Index(

2533 cast(ArrowExtensionArray, ax.array)._maybe_convert_datelike_array()

2534 )

2535 return obj, ax, indexer

2536

2537

2538def _take_new_index(

2539 obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: AxisInt = 0

2540) -> NDFrameT:

2541 if isinstance(obj, ABCSeries):

2542 new_values = algos.take_nd(obj._values, indexer)

2543 # error: Incompatible return value type (got "Series", expected "NDFrameT")

2544 return obj._constructor( # type: ignore[return-value]

2545 new_values, index=new_index, name=obj.name

2546 )

2547 elif isinstance(obj, ABCDataFrame):

2548 if axis == 1:

2549 raise NotImplementedError("axis 1 is not supported")

2550 new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1)

2551 # error: Incompatible return value type (got "DataFrame", expected "NDFrameT")

2552 return obj._constructor_from_mgr(new_mgr, axes=new_mgr.axes) # type: ignore[return-value]

2553 else:

2554 raise ValueError("'obj' should be either a Series or a DataFrame")

2555

2556

2557def _get_timestamp_range_edges(

2558 first: Timestamp,

2559 last: Timestamp,

2560 freq: BaseOffset,

2561 unit: str,

2562 closed: Literal["right", "left"] = "left",

2563 origin: TimeGrouperOrigin = "start_day",

2564 offset: Timedelta | None = None,

2565) -> tuple[Timestamp, Timestamp]:

2566 """

2567 Adjust the `first` Timestamp to the preceding Timestamp that resides on

2568 the provided offset. Adjust the `last` Timestamp to the following

2569 Timestamp that resides on the provided offset. Input Timestamps that

2570 already reside on the offset will be adjusted depending on the type of

2571 offset and the `closed` parameter.

2572

2573 Parameters

2574 ----------

2575 first : pd.Timestamp

2576 The beginning Timestamp of the range to be adjusted.

2577 last : pd.Timestamp

2578 The ending Timestamp of the range to be adjusted.

2579 freq : pd.DateOffset

2580 The dateoffset to which the Timestamps will be adjusted.

2581 closed : {'right', 'left'}, default "left"

2582 Which side of bin interval is closed.

2583 origin : {'epoch', 'start', 'start_day'} or Timestamp, default 'start_day'

2584 The timestamp on which to adjust the grouping. The timezone of origin must

2585 match the timezone of the index.

2586 If a timestamp is not used, these values are also supported:

2587

2588 - 'epoch': `origin` is 1970-01-01

2589 - 'start': `origin` is the first value of the timeseries

2590 - 'start_day': `origin` is the first day at midnight of the timeseries

2591 offset : pd.Timedelta, default is None

2592 An offset timedelta added to the origin.

2593

2594 Returns

2595 -------

2596 A tuple of length 2, containing the adjusted pd.Timestamp objects.

2597 """

2598 if isinstance(freq, Tick):

2599 index_tz = first.tz

2600 if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None):

2601 raise ValueError("The origin must have the same timezone as the index.")

2602 if origin == "epoch":

2603 # set the epoch based on the timezone to have similar bins results when

2604 # resampling on the same kind of indexes on different timezones

2605 origin = Timestamp("1970-01-01", tz=index_tz)

2606

2607 if isinstance(freq, Day):

2608 # _adjust_dates_anchored assumes 'D' means 24h, but first/last

2609 # might contain a DST transition (23h, 24h, or 25h).

2610 # So "pretend" the dates are naive when adjusting the endpoints

2611 first = first.tz_localize(None)

2612 last = last.tz_localize(None)

2613 if isinstance(origin, Timestamp):

2614 origin = origin.tz_localize(None)

2615

2616 first, last = _adjust_dates_anchored(

2617 first, last, freq, closed=closed, origin=origin, offset=offset, unit=unit

2618 )

2619 if isinstance(freq, Day):

2620 first = first.tz_localize(index_tz)

2621 last = last.tz_localize(index_tz)

2622 else:

2623 first = first.normalize()

2624 last = last.normalize()

2625

2626 if closed == "left":

2627 first = Timestamp(freq.rollback(first))

2628 else:

2629 first = Timestamp(first - freq)

2630

2631 last = Timestamp(last + freq)

2632

2633 return first, last

2634

2635

2636def _get_period_range_edges(

2637 first: Period,

2638 last: Period,

2639 freq: BaseOffset,

2640 closed: Literal["right", "left"] = "left",

2641 origin: TimeGrouperOrigin = "start_day",

2642 offset: Timedelta | None = None,

2643) -> tuple[Period, Period]:

2644 """

2645 Adjust the provided `first` and `last` Periods to the respective Period of

2646 the given offset that encompasses them.

2647

2648 Parameters

2649 ----------

2650 first : pd.Period

2651 The beginning Period of the range to be adjusted.

2652 last : pd.Period

2653 The ending Period of the range to be adjusted.

2654 freq : pd.DateOffset

2655 The freq to which the Periods will be adjusted.

2656 closed : {'right', 'left'}, default "left"

2657 Which side of bin interval is closed.

2658 origin : {'epoch', 'start', 'start_day'}, Timestamp, default 'start_day'

2659 The timestamp on which to adjust the grouping. The timezone of origin must

2660 match the timezone of the index.

2661

2662 If a timestamp is not used, these values are also supported:

2663

2664 - 'epoch': `origin` is 1970-01-01

2665 - 'start': `origin` is the first value of the timeseries

2666 - 'start_day': `origin` is the first day at midnight of the timeseries

2667 offset : pd.Timedelta, default is None

2668 An offset timedelta added to the origin.

2669

2670 Returns

2671 -------

2672 A tuple of length 2, containing the adjusted pd.Period objects.

2673 """

2674 if not all(isinstance(obj, Period) for obj in [first, last]):

2675 raise TypeError("'first' and 'last' must be instances of type Period")

2676

2677 # GH 23882

2678 first_ts = first.to_timestamp()

2679 last_ts = last.to_timestamp()

2680 adjust_first = not freq.is_on_offset(first_ts)

2681 adjust_last = freq.is_on_offset(last_ts)

2682

2683 first_ts, last_ts = _get_timestamp_range_edges(

2684 first_ts, last_ts, freq, unit="ns", closed=closed, origin=origin, offset=offset

2685 )

2686

2687 first = (first_ts + int(adjust_first) * freq).to_period(freq)

2688 last = (last_ts - int(adjust_last) * freq).to_period(freq)

2689 return first, last

2690

2691

2692def _insert_nat_bin(

2693 binner: PeriodIndex, bins: np.ndarray, labels: PeriodIndex, nat_count: int

2694) -> tuple[PeriodIndex, np.ndarray, PeriodIndex]:

2695 # NaT handling as in pandas._lib.lib.generate_bins_dt64()

2696 # shift bins by the number of NaT

2697 assert nat_count > 0

2698 bins += nat_count

2699 bins = np.insert(bins, 0, nat_count)

2700

2701 # Incompatible types in assignment (expression has type "Index", variable

2702 # has type "PeriodIndex")

2703 binner = binner.insert(0, NaT) # type: ignore[assignment]

2704 # Incompatible types in assignment (expression has type "Index", variable

2705 # has type "PeriodIndex")

2706 labels = labels.insert(0, NaT) # type: ignore[assignment]

2707 return binner, bins, labels

2708

2709

2710def _adjust_dates_anchored(

2711 first: Timestamp,

2712 last: Timestamp,

2713 freq: Tick,

2714 closed: Literal["right", "left"] = "right",

2715 origin: TimeGrouperOrigin = "start_day",

2716 offset: Timedelta | None = None,

2717 unit: str = "ns",

2718) -> tuple[Timestamp, Timestamp]:

2719 # First and last offsets should be calculated from the start day to fix an

2720 # error cause by resampling across multiple days when a one day period is

2721 # not a multiple of the frequency. See GH 8683

2722 # To handle frequencies that are not multiple or divisible by a day we let

2723 # the possibility to define a fixed origin timestamp. See GH 31809

2724 first = first.as_unit(unit)

2725 last = last.as_unit(unit)

2726 if offset is not None:

2727 offset = offset.as_unit(unit)

2728

2729 freq_value = Timedelta(freq).as_unit(unit)._value

2730

2731 origin_timestamp = 0 # origin == "epoch"

2732 if origin == "start_day":

2733 origin_timestamp = first.normalize()._value

2734 elif origin == "start":

2735 origin_timestamp = first._value

2736 elif isinstance(origin, Timestamp):

2737 origin_timestamp = origin.as_unit(unit)._value

2738 elif origin in ["end", "end_day"]:

2739 origin_last = last if origin == "end" else last.ceil("D")

2740 sub_freq_times = (origin_last._value - first._value) // freq_value

2741 if closed == "left":

2742 sub_freq_times += 1

2743 first = origin_last - sub_freq_times * freq

2744 origin_timestamp = first._value

2745 origin_timestamp += offset._value if offset else 0

2746

2747 # GH 10117 & GH 19375. If first and last contain timezone information,

2748 # Perform the calculation in UTC in order to avoid localizing on an

2749 # Ambiguous or Nonexistent time.

2750 first_tzinfo = first.tzinfo

2751 last_tzinfo = last.tzinfo

2752 if first_tzinfo is not None:

2753 first = first.tz_convert("UTC")

2754 if last_tzinfo is not None:

2755 last = last.tz_convert("UTC")

2756

2757 foffset = (first._value - origin_timestamp) % freq_value

2758 loffset = (last._value - origin_timestamp) % freq_value

2759

2760 if closed == "right":

2761 if foffset > 0:

2762 # roll back

2763 fresult_int = first._value - foffset

2764 else:

2765 fresult_int = first._value - freq_value

2766

2767 if loffset > 0:

2768 # roll forward

2769 lresult_int = last._value + (freq_value - loffset)

2770 else:

2771 # already the end of the road

2772 lresult_int = last._value

2773 else: # closed == 'left'

2774 if foffset > 0:

2775 fresult_int = first._value - foffset

2776 else:

2777 # start of the road

2778 fresult_int = first._value

2779

2780 if loffset > 0:

2781 # roll forward

2782 lresult_int = last._value + (freq_value - loffset)

2783 else:

2784 lresult_int = last._value + freq_value

2785 fresult = Timestamp(fresult_int, unit=unit)

2786 lresult = Timestamp(lresult_int, unit=unit)

2787 if first_tzinfo is not None:

2788 fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo)

2789 if last_tzinfo is not None:

2790 lresult = lresult.tz_localize("UTC").tz_convert(last_tzinfo)

2791 return fresult, lresult

2792

2793

2794def asfreq(

2795 obj: NDFrameT,

2796 freq,

2797 method=None,

2798 how=None,

2799 normalize: bool = False,

2800 fill_value=None,

2801) -> NDFrameT:

2802 """

2803 Utility frequency conversion method for Series/DataFrame.

2804

2805 See :meth:`pandas.NDFrame.asfreq` for full documentation.

2806 """

2807 if isinstance(obj.index, PeriodIndex):

2808 if method is not None:

2809 raise NotImplementedError("'method' argument is not supported")

2810

2811 if how is None:

2812 how = "E"

2813

2814 if isinstance(freq, BaseOffset):

2815 if hasattr(freq, "_period_dtype_code"):

2816 freq = freq_to_period_freqstr(freq.n, freq.name)

2817 else:

2818 raise ValueError(

2819 f"Invalid offset: '{freq.base}' for converting time series "

2820 f"with PeriodIndex."

2821 )

2822

2823 new_obj = obj.copy()

2824 new_obj.index = obj.index.asfreq(freq, how=how)

2825

2826 elif len(obj.index) == 0:

2827 new_obj = obj.copy()

2828

2829 new_obj.index = _asfreq_compat(obj.index, freq)

2830 else:

2831 unit = None

2832 if isinstance(obj.index, DatetimeIndex):

2833 # TODO: should we disallow non-DatetimeIndex?

2834 unit = obj.index.unit

2835 dti = date_range(obj.index.min(), obj.index.max(), freq=freq, unit=unit)

2836 dti.name = obj.index.name

2837 new_obj = obj.reindex(dti, method=method, fill_value=fill_value)

2838 if normalize:

2839 new_obj.index = new_obj.index.normalize()

2840

2841 return new_obj

2842

2843

2844def _asfreq_compat(index: DatetimeIndex | PeriodIndex | TimedeltaIndex, freq):

2845 """

2846 Helper to mimic asfreq on (empty) DatetimeIndex and TimedeltaIndex.

2847

2848 Parameters

2849 ----------

2850 index : PeriodIndex, DatetimeIndex, or TimedeltaIndex

2851 freq : DateOffset

2852

2853 Returns

2854 -------

2855 same type as index

2856 """

2857 if len(index) != 0:

2858 # This should never be reached, always checked by the caller

2859 raise ValueError(

2860 "Can only set arbitrary freq for empty DatetimeIndex or TimedeltaIndex"

2861 )

2862 new_index: Index

2863 if isinstance(index, PeriodIndex):

2864 new_index = index.asfreq(freq=freq)

2865 elif isinstance(index, DatetimeIndex):

2866 new_index = DatetimeIndex([], dtype=index.dtype, freq=freq, name=index.name)

2867 elif isinstance(index, TimedeltaIndex):

2868 new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name)

2869 else: # pragma: no cover

2870 raise TypeError(type(index))

2871 return new_index

2872

2873

2874def maybe_warn_args_and_kwargs(cls, kernel: str, args, kwargs) -> None:

2875 """

2876 Warn for deprecation of args and kwargs in resample functions.

2877

2878 Parameters

2879 ----------

2880 cls : type

2881 Class to warn about.

2882 kernel : str

2883 Operation name.

2884 args : tuple or None

2885 args passed by user. Will be None if and only if kernel does not have args.

2886 kwargs : dict or None

2887 kwargs passed by user. Will be None if and only if kernel does not have kwargs.

2888 """

2889 warn_args = args is not None and len(args) > 0

2890 warn_kwargs = kwargs is not None and len(kwargs) > 0

2891 if warn_args and warn_kwargs:

2892 msg = "args and kwargs"

2893 elif warn_args:

2894 msg = "args"

2895 elif warn_kwargs:

2896 msg = "kwargs"

2897 else:

2898 return

2899 warnings.warn(

2900 f"Passing additional {msg} to {cls.__name__}.{kernel} has "

2901 "no impact on the result and is deprecated. This will "

2902 "raise a TypeError in a future version of pandas.",

2903 category=FutureWarning,

2904 stacklevel=find_stack_level(),

2905 )

2906

2907

2908def _apply(

2909 grouped: GroupBy, how: Callable, *args, include_groups: bool, **kwargs

2910) -> DataFrame:

2911 # GH#7155 - rewrite warning to appear as if it came from `.resample`

2912 target_message = "DataFrameGroupBy.apply operated on the grouping columns"

2913 new_message = _apply_groupings_depr.format("DataFrameGroupBy", "resample")

2914 with rewrite_warning(

2915 target_message=target_message,

2916 target_category=DeprecationWarning,

2917 new_message=new_message,

2918 ):

2919 result = grouped.apply(how, *args, include_groups=include_groups, **kwargs)

2920 return result