Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/resample.py: 25%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

786 statements  

1from __future__ import annotations 

2 

3import copy 

4from textwrap import dedent 

5from typing import ( 

6 TYPE_CHECKING, 

7 Callable, 

8 Literal, 

9 cast, 

10 final, 

11 no_type_check, 

12) 

13import warnings 

14 

15import numpy as np 

16 

17from pandas._libs import lib 

18from pandas._libs.tslibs import ( 

19 BaseOffset, 

20 IncompatibleFrequency, 

21 NaT, 

22 Period, 

23 Timedelta, 

24 Timestamp, 

25 to_offset, 

26) 

27from pandas._libs.tslibs.dtypes import freq_to_period_freqstr 

28from pandas._typing import NDFrameT 

29from pandas.compat.numpy import function as nv 

30from pandas.errors import AbstractMethodError 

31from pandas.util._decorators import ( 

32 Appender, 

33 Substitution, 

34 doc, 

35) 

36from pandas.util._exceptions import ( 

37 find_stack_level, 

38 rewrite_warning, 

39) 

40 

41from pandas.core.dtypes.dtypes import ArrowDtype 

42from pandas.core.dtypes.generic import ( 

43 ABCDataFrame, 

44 ABCSeries, 

45) 

46 

47import pandas.core.algorithms as algos 

48from pandas.core.apply import ( 

49 ResamplerWindowApply, 

50 warn_alias_replacement, 

51) 

52from pandas.core.arrays import ArrowExtensionArray 

53from pandas.core.base import ( 

54 PandasObject, 

55 SelectionMixin, 

56) 

57import pandas.core.common as com 

58from pandas.core.generic import ( 

59 NDFrame, 

60 _shared_docs, 

61) 

62from pandas.core.groupby.generic import SeriesGroupBy 

63from pandas.core.groupby.groupby import ( 

64 BaseGroupBy, 

65 GroupBy, 

66 _apply_groupings_depr, 

67 _pipe_template, 

68 get_groupby, 

69) 

70from pandas.core.groupby.grouper import Grouper 

71from pandas.core.groupby.ops import BinGrouper 

72from pandas.core.indexes.api import MultiIndex 

73from pandas.core.indexes.base import Index 

74from pandas.core.indexes.datetimes import ( 

75 DatetimeIndex, 

76 date_range, 

77) 

78from pandas.core.indexes.period import ( 

79 PeriodIndex, 

80 period_range, 

81) 

82from pandas.core.indexes.timedeltas import ( 

83 TimedeltaIndex, 

84 timedelta_range, 

85) 

86 

87from pandas.tseries.frequencies import ( 

88 is_subperiod, 

89 is_superperiod, 

90) 

91from pandas.tseries.offsets import ( 

92 Day, 

93 Tick, 

94) 

95 

96if TYPE_CHECKING: 

97 from collections.abc import Hashable 

98 

99 from pandas._typing import ( 

100 AnyArrayLike, 

101 Axis, 

102 AxisInt, 

103 Frequency, 

104 IndexLabel, 

105 InterpolateOptions, 

106 T, 

107 TimedeltaConvertibleTypes, 

108 TimeGrouperOrigin, 

109 TimestampConvertibleTypes, 

110 npt, 

111 ) 

112 

113 from pandas import ( 

114 DataFrame, 

115 Series, 

116 ) 

117 

118_shared_docs_kwargs: dict[str, str] = {} 

119 

120 

121class Resampler(BaseGroupBy, PandasObject): 

122 """ 

123 Class for resampling datetimelike data, a groupby-like operation. 

124 See aggregate, transform, and apply functions on this object. 

125 

126 It's easiest to use obj.resample(...) to use Resampler. 

127 

128 Parameters 

129 ---------- 

130 obj : Series or DataFrame 

131 groupby : TimeGrouper 

132 axis : int, default 0 

133 kind : str or None 

134 'period', 'timestamp' to override default index treatment 

135 

136 Returns 

137 ------- 

138 a Resampler of the appropriate type 

139 

140 Notes 

141 ----- 

142 After resampling, see aggregate, apply, and transform functions. 

143 """ 

144 

145 _grouper: BinGrouper 

146 _timegrouper: TimeGrouper 

147 binner: DatetimeIndex | TimedeltaIndex | PeriodIndex # depends on subclass 

148 exclusions: frozenset[Hashable] = frozenset() # for SelectionMixin compat 

149 _internal_names_set = set({"obj", "ax", "_indexer"}) 

150 

151 # to the groupby descriptor 

152 _attributes = [ 

153 "freq", 

154 "axis", 

155 "closed", 

156 "label", 

157 "convention", 

158 "kind", 

159 "origin", 

160 "offset", 

161 ] 

162 

163 def __init__( 

164 self, 

165 obj: NDFrame, 

166 timegrouper: TimeGrouper, 

167 axis: Axis = 0, 

168 kind=None, 

169 *, 

170 gpr_index: Index, 

171 group_keys: bool = False, 

172 selection=None, 

173 include_groups: bool = True, 

174 ) -> None: 

175 self._timegrouper = timegrouper 

176 self.keys = None 

177 self.sort = True 

178 self.axis = obj._get_axis_number(axis) 

179 self.kind = kind 

180 self.group_keys = group_keys 

181 self.as_index = True 

182 self.include_groups = include_groups 

183 

184 self.obj, self.ax, self._indexer = self._timegrouper._set_grouper( 

185 self._convert_obj(obj), sort=True, gpr_index=gpr_index 

186 ) 

187 self.binner, self._grouper = self._get_binner() 

188 self._selection = selection 

189 if self._timegrouper.key is not None: 

190 self.exclusions = frozenset([self._timegrouper.key]) 

191 else: 

192 self.exclusions = frozenset() 

193 

194 @final 

195 def __str__(self) -> str: 

196 """ 

197 Provide a nice str repr of our rolling object. 

198 """ 

199 attrs = ( 

200 f"{k}={getattr(self._timegrouper, k)}" 

201 for k in self._attributes 

202 if getattr(self._timegrouper, k, None) is not None 

203 ) 

204 return f"{type(self).__name__} [{', '.join(attrs)}]" 

205 

206 @final 

207 def __getattr__(self, attr: str): 

208 if attr in self._internal_names_set: 

209 return object.__getattribute__(self, attr) 

210 if attr in self._attributes: 

211 return getattr(self._timegrouper, attr) 

212 if attr in self.obj: 

213 return self[attr] 

214 

215 return object.__getattribute__(self, attr) 

216 

217 @final 

218 @property 

219 def _from_selection(self) -> bool: 

220 """ 

221 Is the resampling from a DataFrame column or MultiIndex level. 

222 """ 

223 # upsampling and PeriodIndex resampling do not work 

224 # with selection, this state used to catch and raise an error 

225 return self._timegrouper is not None and ( 

226 self._timegrouper.key is not None or self._timegrouper.level is not None 

227 ) 

228 

229 def _convert_obj(self, obj: NDFrameT) -> NDFrameT: 

230 """ 

231 Provide any conversions for the object in order to correctly handle. 

232 

233 Parameters 

234 ---------- 

235 obj : Series or DataFrame 

236 

237 Returns 

238 ------- 

239 Series or DataFrame 

240 """ 

241 return obj._consolidate() 

242 

243 def _get_binner_for_time(self): 

244 raise AbstractMethodError(self) 

245 

246 @final 

247 def _get_binner(self): 

248 """ 

249 Create the BinGrouper, assume that self.set_grouper(obj) 

250 has already been called. 

251 """ 

252 binner, bins, binlabels = self._get_binner_for_time() 

253 assert len(bins) == len(binlabels) 

254 bin_grouper = BinGrouper(bins, binlabels, indexer=self._indexer) 

255 return binner, bin_grouper 

256 

257 @final 

258 @Substitution( 

259 klass="Resampler", 

260 examples=""" 

261 >>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, 

262 ... index=pd.date_range('2012-08-02', periods=4)) 

263 >>> df 

264 A 

265 2012-08-02 1 

266 2012-08-03 2 

267 2012-08-04 3 

268 2012-08-05 4 

269 

270 To get the difference between each 2-day period's maximum and minimum 

271 value in one pass, you can do 

272 

273 >>> df.resample('2D').pipe(lambda x: x.max() - x.min()) 

274 A 

275 2012-08-02 1 

276 2012-08-04 1""", 

277 ) 

278 @Appender(_pipe_template) 

279 def pipe( 

280 self, 

281 func: Callable[..., T] | tuple[Callable[..., T], str], 

282 *args, 

283 **kwargs, 

284 ) -> T: 

285 return super().pipe(func, *args, **kwargs) 

286 

287 _agg_see_also_doc = dedent( 

288 """ 

289 See Also 

290 -------- 

291 DataFrame.groupby.aggregate : Aggregate using callable, string, dict, 

292 or list of string/callables. 

293 DataFrame.resample.transform : Transforms the Series on each group 

294 based on the given function. 

295 DataFrame.aggregate: Aggregate using one or more 

296 operations over the specified axis. 

297 """ 

298 ) 

299 

300 _agg_examples_doc = dedent( 

301 """ 

302 Examples 

303 -------- 

304 >>> s = pd.Series([1, 2, 3, 4, 5], 

305 ... index=pd.date_range('20130101', periods=5, freq='s')) 

306 >>> s 

307 2013-01-01 00:00:00 1 

308 2013-01-01 00:00:01 2 

309 2013-01-01 00:00:02 3 

310 2013-01-01 00:00:03 4 

311 2013-01-01 00:00:04 5 

312 Freq: s, dtype: int64 

313 

314 >>> r = s.resample('2s') 

315 

316 >>> r.agg("sum") 

317 2013-01-01 00:00:00 3 

318 2013-01-01 00:00:02 7 

319 2013-01-01 00:00:04 5 

320 Freq: 2s, dtype: int64 

321 

322 >>> r.agg(['sum', 'mean', 'max']) 

323 sum mean max 

324 2013-01-01 00:00:00 3 1.5 2 

325 2013-01-01 00:00:02 7 3.5 4 

326 2013-01-01 00:00:04 5 5.0 5 

327 

328 >>> r.agg({'result': lambda x: x.mean() / x.std(), 

329 ... 'total': "sum"}) 

330 result total 

331 2013-01-01 00:00:00 2.121320 3 

332 2013-01-01 00:00:02 4.949747 7 

333 2013-01-01 00:00:04 NaN 5 

334 

335 >>> r.agg(average="mean", total="sum") 

336 average total 

337 2013-01-01 00:00:00 1.5 3 

338 2013-01-01 00:00:02 3.5 7 

339 2013-01-01 00:00:04 5.0 5 

340 """ 

341 ) 

342 

343 @final 

344 @doc( 

345 _shared_docs["aggregate"], 

346 see_also=_agg_see_also_doc, 

347 examples=_agg_examples_doc, 

348 klass="DataFrame", 

349 axis="", 

350 ) 

351 def aggregate(self, func=None, *args, **kwargs): 

352 result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() 

353 if result is None: 

354 how = func 

355 result = self._groupby_and_aggregate(how, *args, **kwargs) 

356 

357 return result 

358 

359 agg = aggregate 

360 apply = aggregate 

361 

362 @final 

363 def transform(self, arg, *args, **kwargs): 

364 """ 

365 Call function producing a like-indexed Series on each group. 

366 

367 Return a Series with the transformed values. 

368 

369 Parameters 

370 ---------- 

371 arg : function 

372 To apply to each group. Should return a Series with the same index. 

373 

374 Returns 

375 ------- 

376 Series 

377 

378 Examples 

379 -------- 

380 >>> s = pd.Series([1, 2], 

381 ... index=pd.date_range('20180101', 

382 ... periods=2, 

383 ... freq='1h')) 

384 >>> s 

385 2018-01-01 00:00:00 1 

386 2018-01-01 01:00:00 2 

387 Freq: h, dtype: int64 

388 

389 >>> resampled = s.resample('15min') 

390 >>> resampled.transform(lambda x: (x - x.mean()) / x.std()) 

391 2018-01-01 00:00:00 NaN 

392 2018-01-01 01:00:00 NaN 

393 Freq: h, dtype: float64 

394 """ 

395 return self._selected_obj.groupby(self._timegrouper).transform( 

396 arg, *args, **kwargs 

397 ) 

398 

399 def _downsample(self, f, **kwargs): 

400 raise AbstractMethodError(self) 

401 

402 def _upsample(self, f, limit: int | None = None, fill_value=None): 

403 raise AbstractMethodError(self) 

404 

405 def _gotitem(self, key, ndim: int, subset=None): 

406 """ 

407 Sub-classes to define. Return a sliced object. 

408 

409 Parameters 

410 ---------- 

411 key : string / list of selections 

412 ndim : {1, 2} 

413 requested ndim of result 

414 subset : object, default None 

415 subset to act on 

416 """ 

417 grouper = self._grouper 

418 if subset is None: 

419 subset = self.obj 

420 if key is not None: 

421 subset = subset[key] 

422 else: 

423 # reached via Apply.agg_dict_like with selection=None and ndim=1 

424 assert subset.ndim == 1 

425 if ndim == 1: 

426 assert subset.ndim == 1 

427 

428 grouped = get_groupby( 

429 subset, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys 

430 ) 

431 return grouped 

432 

433 def _groupby_and_aggregate(self, how, *args, **kwargs): 

434 """ 

435 Re-evaluate the obj with a groupby aggregation. 

436 """ 

437 grouper = self._grouper 

438 

439 # Excludes `on` column when provided 

440 obj = self._obj_with_exclusions 

441 

442 grouped = get_groupby( 

443 obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys 

444 ) 

445 

446 try: 

447 if callable(how): 

448 # TODO: test_resample_apply_with_additional_args fails if we go 

449 # through the non-lambda path, not clear that it should. 

450 func = lambda x: how(x, *args, **kwargs) 

451 result = grouped.aggregate(func) 

452 else: 

453 result = grouped.aggregate(how, *args, **kwargs) 

454 except (AttributeError, KeyError): 

455 # we have a non-reducing function; try to evaluate 

456 # alternatively we want to evaluate only a column of the input 

457 

458 # test_apply_to_one_column_of_df the function being applied references 

459 # a DataFrame column, but aggregate_item_by_item operates column-wise 

460 # on Series, raising AttributeError or KeyError 

461 # (depending on whether the column lookup uses getattr/__getitem__) 

462 result = _apply( 

463 grouped, how, *args, include_groups=self.include_groups, **kwargs 

464 ) 

465 

466 except ValueError as err: 

467 if "Must produce aggregated value" in str(err): 

468 # raised in _aggregate_named 

469 # see test_apply_without_aggregation, test_apply_with_mutated_index 

470 pass 

471 else: 

472 raise 

473 

474 # we have a non-reducing function 

475 # try to evaluate 

476 result = _apply( 

477 grouped, how, *args, include_groups=self.include_groups, **kwargs 

478 ) 

479 

480 return self._wrap_result(result) 

481 

482 @final 

483 def _get_resampler_for_grouping( 

484 self, groupby: GroupBy, key, include_groups: bool = True 

485 ): 

486 """ 

487 Return the correct class for resampling with groupby. 

488 """ 

489 return self._resampler_for_grouping( 

490 groupby=groupby, key=key, parent=self, include_groups=include_groups 

491 ) 

492 

493 def _wrap_result(self, result): 

494 """ 

495 Potentially wrap any results. 

496 """ 

497 # GH 47705 

498 obj = self.obj 

499 if ( 

500 isinstance(result, ABCDataFrame) 

501 and len(result) == 0 

502 and not isinstance(result.index, PeriodIndex) 

503 ): 

504 result = result.set_index( 

505 _asfreq_compat(obj.index[:0], freq=self.freq), append=True 

506 ) 

507 

508 if isinstance(result, ABCSeries) and self._selection is not None: 

509 result.name = self._selection 

510 

511 if isinstance(result, ABCSeries) and result.empty: 

512 # When index is all NaT, result is empty but index is not 

513 result.index = _asfreq_compat(obj.index[:0], freq=self.freq) 

514 result.name = getattr(obj, "name", None) 

515 

516 if self._timegrouper._arrow_dtype is not None: 

517 result.index = result.index.astype(self._timegrouper._arrow_dtype) 

518 

519 return result 

520 

521 @final 

522 def ffill(self, limit: int | None = None): 

523 """ 

524 Forward fill the values. 

525 

526 Parameters 

527 ---------- 

528 limit : int, optional 

529 Limit of how many values to fill. 

530 

531 Returns 

532 ------- 

533 An upsampled Series. 

534 

535 See Also 

536 -------- 

537 Series.fillna: Fill NA/NaN values using the specified method. 

538 DataFrame.fillna: Fill NA/NaN values using the specified method. 

539 

540 Examples 

541 -------- 

542 Here we only create a ``Series``. 

543 

544 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( 

545 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) 

546 >>> ser 

547 2023-01-01 1 

548 2023-01-15 2 

549 2023-02-01 3 

550 2023-02-15 4 

551 dtype: int64 

552 

553 Example for ``ffill`` with downsampling (we have fewer dates after resampling): 

554 

555 >>> ser.resample('MS').ffill() 

556 2023-01-01 1 

557 2023-02-01 3 

558 Freq: MS, dtype: int64 

559 

560 Example for ``ffill`` with upsampling (fill the new dates with 

561 the previous value): 

562 

563 >>> ser.resample('W').ffill() 

564 2023-01-01 1 

565 2023-01-08 1 

566 2023-01-15 2 

567 2023-01-22 2 

568 2023-01-29 2 

569 2023-02-05 3 

570 2023-02-12 3 

571 2023-02-19 4 

572 Freq: W-SUN, dtype: int64 

573 

574 With upsampling and limiting (only fill the first new date with the 

575 previous value): 

576 

577 >>> ser.resample('W').ffill(limit=1) 

578 2023-01-01 1.0 

579 2023-01-08 1.0 

580 2023-01-15 2.0 

581 2023-01-22 2.0 

582 2023-01-29 NaN 

583 2023-02-05 3.0 

584 2023-02-12 NaN 

585 2023-02-19 4.0 

586 Freq: W-SUN, dtype: float64 

587 """ 

588 return self._upsample("ffill", limit=limit) 

589 

590 @final 

591 def nearest(self, limit: int | None = None): 

592 """ 

593 Resample by using the nearest value. 

594 

595 When resampling data, missing values may appear (e.g., when the 

596 resampling frequency is higher than the original frequency). 

597 The `nearest` method will replace ``NaN`` values that appeared in 

598 the resampled data with the value from the nearest member of the 

599 sequence, based on the index value. 

600 Missing values that existed in the original data will not be modified. 

601 If `limit` is given, fill only this many values in each direction for 

602 each of the original values. 

603 

604 Parameters 

605 ---------- 

606 limit : int, optional 

607 Limit of how many values to fill. 

608 

609 Returns 

610 ------- 

611 Series or DataFrame 

612 An upsampled Series or DataFrame with ``NaN`` values filled with 

613 their nearest value. 

614 

615 See Also 

616 -------- 

617 backfill : Backward fill the new missing values in the resampled data. 

618 pad : Forward fill ``NaN`` values. 

619 

620 Examples 

621 -------- 

622 >>> s = pd.Series([1, 2], 

623 ... index=pd.date_range('20180101', 

624 ... periods=2, 

625 ... freq='1h')) 

626 >>> s 

627 2018-01-01 00:00:00 1 

628 2018-01-01 01:00:00 2 

629 Freq: h, dtype: int64 

630 

631 >>> s.resample('15min').nearest() 

632 2018-01-01 00:00:00 1 

633 2018-01-01 00:15:00 1 

634 2018-01-01 00:30:00 2 

635 2018-01-01 00:45:00 2 

636 2018-01-01 01:00:00 2 

637 Freq: 15min, dtype: int64 

638 

639 Limit the number of upsampled values imputed by the nearest: 

640 

641 >>> s.resample('15min').nearest(limit=1) 

642 2018-01-01 00:00:00 1.0 

643 2018-01-01 00:15:00 1.0 

644 2018-01-01 00:30:00 NaN 

645 2018-01-01 00:45:00 2.0 

646 2018-01-01 01:00:00 2.0 

647 Freq: 15min, dtype: float64 

648 """ 

649 return self._upsample("nearest", limit=limit) 

650 

651 @final 

652 def bfill(self, limit: int | None = None): 

653 """ 

654 Backward fill the new missing values in the resampled data. 

655 

656 In statistics, imputation is the process of replacing missing data with 

657 substituted values [1]_. When resampling data, missing values may 

658 appear (e.g., when the resampling frequency is higher than the original 

659 frequency). The backward fill will replace NaN values that appeared in 

660 the resampled data with the next value in the original sequence. 

661 Missing values that existed in the original data will not be modified. 

662 

663 Parameters 

664 ---------- 

665 limit : int, optional 

666 Limit of how many values to fill. 

667 

668 Returns 

669 ------- 

670 Series, DataFrame 

671 An upsampled Series or DataFrame with backward filled NaN values. 

672 

673 See Also 

674 -------- 

675 bfill : Alias of backfill. 

676 fillna : Fill NaN values using the specified method, which can be 

677 'backfill'. 

678 nearest : Fill NaN values with nearest neighbor starting from center. 

679 ffill : Forward fill NaN values. 

680 Series.fillna : Fill NaN values in the Series using the 

681 specified method, which can be 'backfill'. 

682 DataFrame.fillna : Fill NaN values in the DataFrame using the 

683 specified method, which can be 'backfill'. 

684 

685 References 

686 ---------- 

687 .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics) 

688 

689 Examples 

690 -------- 

691 Resampling a Series: 

692 

693 >>> s = pd.Series([1, 2, 3], 

694 ... index=pd.date_range('20180101', periods=3, freq='h')) 

695 >>> s 

696 2018-01-01 00:00:00 1 

697 2018-01-01 01:00:00 2 

698 2018-01-01 02:00:00 3 

699 Freq: h, dtype: int64 

700 

701 >>> s.resample('30min').bfill() 

702 2018-01-01 00:00:00 1 

703 2018-01-01 00:30:00 2 

704 2018-01-01 01:00:00 2 

705 2018-01-01 01:30:00 3 

706 2018-01-01 02:00:00 3 

707 Freq: 30min, dtype: int64 

708 

709 >>> s.resample('15min').bfill(limit=2) 

710 2018-01-01 00:00:00 1.0 

711 2018-01-01 00:15:00 NaN 

712 2018-01-01 00:30:00 2.0 

713 2018-01-01 00:45:00 2.0 

714 2018-01-01 01:00:00 2.0 

715 2018-01-01 01:15:00 NaN 

716 2018-01-01 01:30:00 3.0 

717 2018-01-01 01:45:00 3.0 

718 2018-01-01 02:00:00 3.0 

719 Freq: 15min, dtype: float64 

720 

721 Resampling a DataFrame that has missing values: 

722 

723 >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]}, 

724 ... index=pd.date_range('20180101', periods=3, 

725 ... freq='h')) 

726 >>> df 

727 a b 

728 2018-01-01 00:00:00 2.0 1 

729 2018-01-01 01:00:00 NaN 3 

730 2018-01-01 02:00:00 6.0 5 

731 

732 >>> df.resample('30min').bfill() 

733 a b 

734 2018-01-01 00:00:00 2.0 1 

735 2018-01-01 00:30:00 NaN 3 

736 2018-01-01 01:00:00 NaN 3 

737 2018-01-01 01:30:00 6.0 5 

738 2018-01-01 02:00:00 6.0 5 

739 

740 >>> df.resample('15min').bfill(limit=2) 

741 a b 

742 2018-01-01 00:00:00 2.0 1.0 

743 2018-01-01 00:15:00 NaN NaN 

744 2018-01-01 00:30:00 NaN 3.0 

745 2018-01-01 00:45:00 NaN 3.0 

746 2018-01-01 01:00:00 NaN 3.0 

747 2018-01-01 01:15:00 NaN NaN 

748 2018-01-01 01:30:00 6.0 5.0 

749 2018-01-01 01:45:00 6.0 5.0 

750 2018-01-01 02:00:00 6.0 5.0 

751 """ 

752 return self._upsample("bfill", limit=limit) 

753 

754 @final 

755 def fillna(self, method, limit: int | None = None): 

756 """ 

757 Fill missing values introduced by upsampling. 

758 

759 In statistics, imputation is the process of replacing missing data with 

760 substituted values [1]_. When resampling data, missing values may 

761 appear (e.g., when the resampling frequency is higher than the original 

762 frequency). 

763 

764 Missing values that existed in the original data will 

765 not be modified. 

766 

767 Parameters 

768 ---------- 

769 method : {'pad', 'backfill', 'ffill', 'bfill', 'nearest'} 

770 Method to use for filling holes in resampled data 

771 

772 * 'pad' or 'ffill': use previous valid observation to fill gap 

773 (forward fill). 

774 * 'backfill' or 'bfill': use next valid observation to fill gap. 

775 * 'nearest': use nearest valid observation to fill gap. 

776 

777 limit : int, optional 

778 Limit of how many consecutive missing values to fill. 

779 

780 Returns 

781 ------- 

782 Series or DataFrame 

783 An upsampled Series or DataFrame with missing values filled. 

784 

785 See Also 

786 -------- 

787 bfill : Backward fill NaN values in the resampled data. 

788 ffill : Forward fill NaN values in the resampled data. 

789 nearest : Fill NaN values in the resampled data 

790 with nearest neighbor starting from center. 

791 interpolate : Fill NaN values using interpolation. 

792 Series.fillna : Fill NaN values in the Series using the 

793 specified method, which can be 'bfill' and 'ffill'. 

794 DataFrame.fillna : Fill NaN values in the DataFrame using the 

795 specified method, which can be 'bfill' and 'ffill'. 

796 

797 References 

798 ---------- 

799 .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics) 

800 

801 Examples 

802 -------- 

803 Resampling a Series: 

804 

805 >>> s = pd.Series([1, 2, 3], 

806 ... index=pd.date_range('20180101', periods=3, freq='h')) 

807 >>> s 

808 2018-01-01 00:00:00 1 

809 2018-01-01 01:00:00 2 

810 2018-01-01 02:00:00 3 

811 Freq: h, dtype: int64 

812 

813 Without filling the missing values you get: 

814 

815 >>> s.resample("30min").asfreq() 

816 2018-01-01 00:00:00 1.0 

817 2018-01-01 00:30:00 NaN 

818 2018-01-01 01:00:00 2.0 

819 2018-01-01 01:30:00 NaN 

820 2018-01-01 02:00:00 3.0 

821 Freq: 30min, dtype: float64 

822 

823 >>> s.resample('30min').fillna("backfill") 

824 2018-01-01 00:00:00 1 

825 2018-01-01 00:30:00 2 

826 2018-01-01 01:00:00 2 

827 2018-01-01 01:30:00 3 

828 2018-01-01 02:00:00 3 

829 Freq: 30min, dtype: int64 

830 

831 >>> s.resample('15min').fillna("backfill", limit=2) 

832 2018-01-01 00:00:00 1.0 

833 2018-01-01 00:15:00 NaN 

834 2018-01-01 00:30:00 2.0 

835 2018-01-01 00:45:00 2.0 

836 2018-01-01 01:00:00 2.0 

837 2018-01-01 01:15:00 NaN 

838 2018-01-01 01:30:00 3.0 

839 2018-01-01 01:45:00 3.0 

840 2018-01-01 02:00:00 3.0 

841 Freq: 15min, dtype: float64 

842 

843 >>> s.resample('30min').fillna("pad") 

844 2018-01-01 00:00:00 1 

845 2018-01-01 00:30:00 1 

846 2018-01-01 01:00:00 2 

847 2018-01-01 01:30:00 2 

848 2018-01-01 02:00:00 3 

849 Freq: 30min, dtype: int64 

850 

851 >>> s.resample('30min').fillna("nearest") 

852 2018-01-01 00:00:00 1 

853 2018-01-01 00:30:00 2 

854 2018-01-01 01:00:00 2 

855 2018-01-01 01:30:00 3 

856 2018-01-01 02:00:00 3 

857 Freq: 30min, dtype: int64 

858 

859 Missing values present before the upsampling are not affected. 

860 

861 >>> sm = pd.Series([1, None, 3], 

862 ... index=pd.date_range('20180101', periods=3, freq='h')) 

863 >>> sm 

864 2018-01-01 00:00:00 1.0 

865 2018-01-01 01:00:00 NaN 

866 2018-01-01 02:00:00 3.0 

867 Freq: h, dtype: float64 

868 

869 >>> sm.resample('30min').fillna('backfill') 

870 2018-01-01 00:00:00 1.0 

871 2018-01-01 00:30:00 NaN 

872 2018-01-01 01:00:00 NaN 

873 2018-01-01 01:30:00 3.0 

874 2018-01-01 02:00:00 3.0 

875 Freq: 30min, dtype: float64 

876 

877 >>> sm.resample('30min').fillna('pad') 

878 2018-01-01 00:00:00 1.0 

879 2018-01-01 00:30:00 1.0 

880 2018-01-01 01:00:00 NaN 

881 2018-01-01 01:30:00 NaN 

882 2018-01-01 02:00:00 3.0 

883 Freq: 30min, dtype: float64 

884 

885 >>> sm.resample('30min').fillna('nearest') 

886 2018-01-01 00:00:00 1.0 

887 2018-01-01 00:30:00 NaN 

888 2018-01-01 01:00:00 NaN 

889 2018-01-01 01:30:00 3.0 

890 2018-01-01 02:00:00 3.0 

891 Freq: 30min, dtype: float64 

892 

893 DataFrame resampling is done column-wise. All the same options are 

894 available. 

895 

896 >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]}, 

897 ... index=pd.date_range('20180101', periods=3, 

898 ... freq='h')) 

899 >>> df 

900 a b 

901 2018-01-01 00:00:00 2.0 1 

902 2018-01-01 01:00:00 NaN 3 

903 2018-01-01 02:00:00 6.0 5 

904 

905 >>> df.resample('30min').fillna("bfill") 

906 a b 

907 2018-01-01 00:00:00 2.0 1 

908 2018-01-01 00:30:00 NaN 3 

909 2018-01-01 01:00:00 NaN 3 

910 2018-01-01 01:30:00 6.0 5 

911 2018-01-01 02:00:00 6.0 5 

912 """ 

913 warnings.warn( 

914 f"{type(self).__name__}.fillna is deprecated and will be removed " 

915 "in a future version. Use obj.ffill(), obj.bfill(), " 

916 "or obj.nearest() instead.", 

917 FutureWarning, 

918 stacklevel=find_stack_level(), 

919 ) 

920 return self._upsample(method, limit=limit) 

921 

922 @final 

923 def interpolate( 

924 self, 

925 method: InterpolateOptions = "linear", 

926 *, 

927 axis: Axis = 0, 

928 limit: int | None = None, 

929 inplace: bool = False, 

930 limit_direction: Literal["forward", "backward", "both"] = "forward", 

931 limit_area=None, 

932 downcast=lib.no_default, 

933 **kwargs, 

934 ): 

935 """ 

936 Interpolate values between target timestamps according to different methods. 

937 

938 The original index is first reindexed to target timestamps 

939 (see :meth:`core.resample.Resampler.asfreq`), 

940 then the interpolation of ``NaN`` values via :meth:`DataFrame.interpolate` 

941 happens. 

942 

943 Parameters 

944 ---------- 

945 method : str, default 'linear' 

946 Interpolation technique to use. One of: 

947 

948 * 'linear': Ignore the index and treat the values as equally 

949 spaced. This is the only method supported on MultiIndexes. 

950 * 'time': Works on daily and higher resolution data to interpolate 

951 given length of interval. 

952 * 'index', 'values': use the actual numerical values of the index. 

953 * 'pad': Fill in NaNs using existing values. 

954 * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 

955 'barycentric', 'polynomial': Passed to 

956 `scipy.interpolate.interp1d`, whereas 'spline' is passed to 

957 `scipy.interpolate.UnivariateSpline`. These methods use the numerical 

958 values of the index. Both 'polynomial' and 'spline' require that 

959 you also specify an `order` (int), e.g. 

960 ``df.interpolate(method='polynomial', order=5)``. Note that, 

961 `slinear` method in Pandas refers to the Scipy first order `spline` 

962 instead of Pandas first order `spline`. 

963 * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', 

964 'cubicspline': Wrappers around the SciPy interpolation methods of 

965 similar names. See `Notes`. 

966 * 'from_derivatives': Refers to 

967 `scipy.interpolate.BPoly.from_derivatives`. 

968 

969 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

970 Axis to interpolate along. For `Series` this parameter is unused 

971 and defaults to 0. 

972 limit : int, optional 

973 Maximum number of consecutive NaNs to fill. Must be greater than 

974 0. 

975 inplace : bool, default False 

976 Update the data in place if possible. 

977 limit_direction : {{'forward', 'backward', 'both'}}, Optional 

978 Consecutive NaNs will be filled in this direction. 

979 

980 If limit is specified: 

981 * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'. 

982 * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be 

983 'backwards'. 

984 

985 If 'limit' is not specified: 

986 * If 'method' is 'backfill' or 'bfill', the default is 'backward' 

987 * else the default is 'forward' 

988 

989 raises ValueError if `limit_direction` is 'forward' or 'both' and 

990 method is 'backfill' or 'bfill'. 

991 raises ValueError if `limit_direction` is 'backward' or 'both' and 

992 method is 'pad' or 'ffill'. 

993 

994 limit_area : {{`None`, 'inside', 'outside'}}, default None 

995 If limit is specified, consecutive NaNs will be filled with this 

996 restriction. 

997 

998 * ``None``: No fill restriction. 

999 * 'inside': Only fill NaNs surrounded by valid values 

1000 (interpolate). 

1001 * 'outside': Only fill NaNs outside valid values (extrapolate). 

1002 

1003 downcast : optional, 'infer' or None, defaults to None 

1004 Downcast dtypes if possible. 

1005 

1006 .. deprecated:: 2.1.0 

1007 

1008 ``**kwargs`` : optional 

1009 Keyword arguments to pass on to the interpolating function. 

1010 

1011 Returns 

1012 ------- 

1013 DataFrame or Series 

1014 Interpolated values at the specified freq. 

1015 

1016 See Also 

1017 -------- 

1018 core.resample.Resampler.asfreq: Return the values at the new freq, 

1019 essentially a reindex. 

1020 DataFrame.interpolate: Fill NaN values using an interpolation method. 

1021 

1022 Notes 

1023 ----- 

1024 For high-frequent or non-equidistant time-series with timestamps 

1025 the reindexing followed by interpolation may lead to information loss 

1026 as shown in the last example. 

1027 

1028 Examples 

1029 -------- 

1030 

1031 >>> start = "2023-03-01T07:00:00" 

1032 >>> timesteps = pd.date_range(start, periods=5, freq="s") 

1033 >>> series = pd.Series(data=[1, -1, 2, 1, 3], index=timesteps) 

1034 >>> series 

1035 2023-03-01 07:00:00 1 

1036 2023-03-01 07:00:01 -1 

1037 2023-03-01 07:00:02 2 

1038 2023-03-01 07:00:03 1 

1039 2023-03-01 07:00:04 3 

1040 Freq: s, dtype: int64 

1041 

1042 Upsample the dataframe to 0.5Hz by providing the period time of 2s. 

1043 

1044 >>> series.resample("2s").interpolate("linear") 

1045 2023-03-01 07:00:00 1 

1046 2023-03-01 07:00:02 2 

1047 2023-03-01 07:00:04 3 

1048 Freq: 2s, dtype: int64 

1049 

1050 Downsample the dataframe to 2Hz by providing the period time of 500ms. 

1051 

1052 >>> series.resample("500ms").interpolate("linear") 

1053 2023-03-01 07:00:00.000 1.0 

1054 2023-03-01 07:00:00.500 0.0 

1055 2023-03-01 07:00:01.000 -1.0 

1056 2023-03-01 07:00:01.500 0.5 

1057 2023-03-01 07:00:02.000 2.0 

1058 2023-03-01 07:00:02.500 1.5 

1059 2023-03-01 07:00:03.000 1.0 

1060 2023-03-01 07:00:03.500 2.0 

1061 2023-03-01 07:00:04.000 3.0 

1062 Freq: 500ms, dtype: float64 

1063 

1064 Internal reindexing with ``asfreq()`` prior to interpolation leads to 

1065 an interpolated timeseries on the basis the reindexed timestamps (anchors). 

1066 Since not all datapoints from original series become anchors, 

1067 it can lead to misleading interpolation results as in the following example: 

1068 

1069 >>> series.resample("400ms").interpolate("linear") 

1070 2023-03-01 07:00:00.000 1.0 

1071 2023-03-01 07:00:00.400 1.2 

1072 2023-03-01 07:00:00.800 1.4 

1073 2023-03-01 07:00:01.200 1.6 

1074 2023-03-01 07:00:01.600 1.8 

1075 2023-03-01 07:00:02.000 2.0 

1076 2023-03-01 07:00:02.400 2.2 

1077 2023-03-01 07:00:02.800 2.4 

1078 2023-03-01 07:00:03.200 2.6 

1079 2023-03-01 07:00:03.600 2.8 

1080 2023-03-01 07:00:04.000 3.0 

1081 Freq: 400ms, dtype: float64 

1082 

1083 Note that the series erroneously increases between two anchors 

1084 ``07:00:00`` and ``07:00:02``. 

1085 """ 

1086 assert downcast is lib.no_default # just checking coverage 

1087 result = self._upsample("asfreq") 

1088 return result.interpolate( 

1089 method=method, 

1090 axis=axis, 

1091 limit=limit, 

1092 inplace=inplace, 

1093 limit_direction=limit_direction, 

1094 limit_area=limit_area, 

1095 downcast=downcast, 

1096 **kwargs, 

1097 ) 

1098 

1099 @final 

1100 def asfreq(self, fill_value=None): 

1101 """ 

1102 Return the values at the new freq, essentially a reindex. 

1103 

1104 Parameters 

1105 ---------- 

1106 fill_value : scalar, optional 

1107 Value to use for missing values, applied during upsampling (note 

1108 this does not fill NaNs that already were present). 

1109 

1110 Returns 

1111 ------- 

1112 DataFrame or Series 

1113 Values at the specified freq. 

1114 

1115 See Also 

1116 -------- 

1117 Series.asfreq: Convert TimeSeries to specified frequency. 

1118 DataFrame.asfreq: Convert TimeSeries to specified frequency. 

1119 

1120 Examples 

1121 -------- 

1122 

1123 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( 

1124 ... ['2023-01-01', '2023-01-31', '2023-02-01', '2023-02-28'])) 

1125 >>> ser 

1126 2023-01-01 1 

1127 2023-01-31 2 

1128 2023-02-01 3 

1129 2023-02-28 4 

1130 dtype: int64 

1131 >>> ser.resample('MS').asfreq() 

1132 2023-01-01 1 

1133 2023-02-01 3 

1134 Freq: MS, dtype: int64 

1135 """ 

1136 return self._upsample("asfreq", fill_value=fill_value) 

1137 

1138 @final 

1139 def sum( 

1140 self, 

1141 numeric_only: bool = False, 

1142 min_count: int = 0, 

1143 *args, 

1144 **kwargs, 

1145 ): 

1146 """ 

1147 Compute sum of group values. 

1148 

1149 Parameters 

1150 ---------- 

1151 numeric_only : bool, default False 

1152 Include only float, int, boolean columns. 

1153 

1154 .. versionchanged:: 2.0.0 

1155 

1156 numeric_only no longer accepts ``None``. 

1157 

1158 min_count : int, default 0 

1159 The required number of valid values to perform the operation. If fewer 

1160 than ``min_count`` non-NA values are present the result will be NA. 

1161 

1162 Returns 

1163 ------- 

1164 Series or DataFrame 

1165 Computed sum of values within each group. 

1166 

1167 Examples 

1168 -------- 

1169 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( 

1170 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) 

1171 >>> ser 

1172 2023-01-01 1 

1173 2023-01-15 2 

1174 2023-02-01 3 

1175 2023-02-15 4 

1176 dtype: int64 

1177 >>> ser.resample('MS').sum() 

1178 2023-01-01 3 

1179 2023-02-01 7 

1180 Freq: MS, dtype: int64 

1181 """ 

1182 maybe_warn_args_and_kwargs(type(self), "sum", args, kwargs) 

1183 nv.validate_resampler_func("sum", args, kwargs) 

1184 return self._downsample("sum", numeric_only=numeric_only, min_count=min_count) 

1185 

1186 @final 

1187 def prod( 

1188 self, 

1189 numeric_only: bool = False, 

1190 min_count: int = 0, 

1191 *args, 

1192 **kwargs, 

1193 ): 

1194 """ 

1195 Compute prod of group values. 

1196 

1197 Parameters 

1198 ---------- 

1199 numeric_only : bool, default False 

1200 Include only float, int, boolean columns. 

1201 

1202 .. versionchanged:: 2.0.0 

1203 

1204 numeric_only no longer accepts ``None``. 

1205 

1206 min_count : int, default 0 

1207 The required number of valid values to perform the operation. If fewer 

1208 than ``min_count`` non-NA values are present the result will be NA. 

1209 

1210 Returns 

1211 ------- 

1212 Series or DataFrame 

1213 Computed prod of values within each group. 

1214 

1215 Examples 

1216 -------- 

1217 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( 

1218 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) 

1219 >>> ser 

1220 2023-01-01 1 

1221 2023-01-15 2 

1222 2023-02-01 3 

1223 2023-02-15 4 

1224 dtype: int64 

1225 >>> ser.resample('MS').prod() 

1226 2023-01-01 2 

1227 2023-02-01 12 

1228 Freq: MS, dtype: int64 

1229 """ 

1230 maybe_warn_args_and_kwargs(type(self), "prod", args, kwargs) 

1231 nv.validate_resampler_func("prod", args, kwargs) 

1232 return self._downsample("prod", numeric_only=numeric_only, min_count=min_count) 

1233 

1234 @final 

1235 def min( 

1236 self, 

1237 numeric_only: bool = False, 

1238 min_count: int = 0, 

1239 *args, 

1240 **kwargs, 

1241 ): 

1242 """ 

1243 Compute min value of group. 

1244 

1245 Returns 

1246 ------- 

1247 Series or DataFrame 

1248 

1249 Examples 

1250 -------- 

1251 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( 

1252 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) 

1253 >>> ser 

1254 2023-01-01 1 

1255 2023-01-15 2 

1256 2023-02-01 3 

1257 2023-02-15 4 

1258 dtype: int64 

1259 >>> ser.resample('MS').min() 

1260 2023-01-01 1 

1261 2023-02-01 3 

1262 Freq: MS, dtype: int64 

1263 """ 

1264 

1265 maybe_warn_args_and_kwargs(type(self), "min", args, kwargs) 

1266 nv.validate_resampler_func("min", args, kwargs) 

1267 return self._downsample("min", numeric_only=numeric_only, min_count=min_count) 

1268 

1269 @final 

1270 def max( 

1271 self, 

1272 numeric_only: bool = False, 

1273 min_count: int = 0, 

1274 *args, 

1275 **kwargs, 

1276 ): 

1277 """ 

1278 Compute max value of group. 

1279 

1280 Returns 

1281 ------- 

1282 Series or DataFrame 

1283 

1284 Examples 

1285 -------- 

1286 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( 

1287 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) 

1288 >>> ser 

1289 2023-01-01 1 

1290 2023-01-15 2 

1291 2023-02-01 3 

1292 2023-02-15 4 

1293 dtype: int64 

1294 >>> ser.resample('MS').max() 

1295 2023-01-01 2 

1296 2023-02-01 4 

1297 Freq: MS, dtype: int64 

1298 """ 

1299 maybe_warn_args_and_kwargs(type(self), "max", args, kwargs) 

1300 nv.validate_resampler_func("max", args, kwargs) 

1301 return self._downsample("max", numeric_only=numeric_only, min_count=min_count) 

1302 

1303 @final 

1304 @doc(GroupBy.first) 

1305 def first( 

1306 self, 

1307 numeric_only: bool = False, 

1308 min_count: int = 0, 

1309 skipna: bool = True, 

1310 *args, 

1311 **kwargs, 

1312 ): 

1313 maybe_warn_args_and_kwargs(type(self), "first", args, kwargs) 

1314 nv.validate_resampler_func("first", args, kwargs) 

1315 return self._downsample( 

1316 "first", numeric_only=numeric_only, min_count=min_count, skipna=skipna 

1317 ) 

1318 

1319 @final 

1320 @doc(GroupBy.last) 

1321 def last( 

1322 self, 

1323 numeric_only: bool = False, 

1324 min_count: int = 0, 

1325 skipna: bool = True, 

1326 *args, 

1327 **kwargs, 

1328 ): 

1329 maybe_warn_args_and_kwargs(type(self), "last", args, kwargs) 

1330 nv.validate_resampler_func("last", args, kwargs) 

1331 return self._downsample( 

1332 "last", numeric_only=numeric_only, min_count=min_count, skipna=skipna 

1333 ) 

1334 

1335 @final 

1336 @doc(GroupBy.median) 

1337 def median(self, numeric_only: bool = False, *args, **kwargs): 

1338 maybe_warn_args_and_kwargs(type(self), "median", args, kwargs) 

1339 nv.validate_resampler_func("median", args, kwargs) 

1340 return self._downsample("median", numeric_only=numeric_only) 

1341 

1342 @final 

1343 def mean( 

1344 self, 

1345 numeric_only: bool = False, 

1346 *args, 

1347 **kwargs, 

1348 ): 

1349 """ 

1350 Compute mean of groups, excluding missing values. 

1351 

1352 Parameters 

1353 ---------- 

1354 numeric_only : bool, default False 

1355 Include only `float`, `int` or `boolean` data. 

1356 

1357 .. versionchanged:: 2.0.0 

1358 

1359 numeric_only now defaults to ``False``. 

1360 

1361 Returns 

1362 ------- 

1363 DataFrame or Series 

1364 Mean of values within each group. 

1365 

1366 Examples 

1367 -------- 

1368 

1369 >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( 

1370 ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) 

1371 >>> ser 

1372 2023-01-01 1 

1373 2023-01-15 2 

1374 2023-02-01 3 

1375 2023-02-15 4 

1376 dtype: int64 

1377 >>> ser.resample('MS').mean() 

1378 2023-01-01 1.5 

1379 2023-02-01 3.5 

1380 Freq: MS, dtype: float64 

1381 """ 

1382 maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs) 

1383 nv.validate_resampler_func("mean", args, kwargs) 

1384 return self._downsample("mean", numeric_only=numeric_only) 

1385 

1386 @final 

1387 def std( 

1388 self, 

1389 ddof: int = 1, 

1390 numeric_only: bool = False, 

1391 *args, 

1392 **kwargs, 

1393 ): 

1394 """ 

1395 Compute standard deviation of groups, excluding missing values. 

1396 

1397 Parameters 

1398 ---------- 

1399 ddof : int, default 1 

1400 Degrees of freedom. 

1401 numeric_only : bool, default False 

1402 Include only `float`, `int` or `boolean` data. 

1403 

1404 .. versionadded:: 1.5.0 

1405 

1406 .. versionchanged:: 2.0.0 

1407 

1408 numeric_only now defaults to ``False``. 

1409 

1410 Returns 

1411 ------- 

1412 DataFrame or Series 

1413 Standard deviation of values within each group. 

1414 

1415 Examples 

1416 -------- 

1417 

1418 >>> ser = pd.Series([1, 3, 2, 4, 3, 8], 

1419 ... index=pd.DatetimeIndex(['2023-01-01', 

1420 ... '2023-01-10', 

1421 ... '2023-01-15', 

1422 ... '2023-02-01', 

1423 ... '2023-02-10', 

1424 ... '2023-02-15'])) 

1425 >>> ser.resample('MS').std() 

1426 2023-01-01 1.000000 

1427 2023-02-01 2.645751 

1428 Freq: MS, dtype: float64 

1429 """ 

1430 maybe_warn_args_and_kwargs(type(self), "std", args, kwargs) 

1431 nv.validate_resampler_func("std", args, kwargs) 

1432 return self._downsample("std", ddof=ddof, numeric_only=numeric_only) 

1433 

1434 @final 

1435 def var( 

1436 self, 

1437 ddof: int = 1, 

1438 numeric_only: bool = False, 

1439 *args, 

1440 **kwargs, 

1441 ): 

1442 """ 

1443 Compute variance of groups, excluding missing values. 

1444 

1445 Parameters 

1446 ---------- 

1447 ddof : int, default 1 

1448 Degrees of freedom. 

1449 

1450 numeric_only : bool, default False 

1451 Include only `float`, `int` or `boolean` data. 

1452 

1453 .. versionadded:: 1.5.0 

1454 

1455 .. versionchanged:: 2.0.0 

1456 

1457 numeric_only now defaults to ``False``. 

1458 

1459 Returns 

1460 ------- 

1461 DataFrame or Series 

1462 Variance of values within each group. 

1463 

1464 Examples 

1465 -------- 

1466 

1467 >>> ser = pd.Series([1, 3, 2, 4, 3, 8], 

1468 ... index=pd.DatetimeIndex(['2023-01-01', 

1469 ... '2023-01-10', 

1470 ... '2023-01-15', 

1471 ... '2023-02-01', 

1472 ... '2023-02-10', 

1473 ... '2023-02-15'])) 

1474 >>> ser.resample('MS').var() 

1475 2023-01-01 1.0 

1476 2023-02-01 7.0 

1477 Freq: MS, dtype: float64 

1478 

1479 >>> ser.resample('MS').var(ddof=0) 

1480 2023-01-01 0.666667 

1481 2023-02-01 4.666667 

1482 Freq: MS, dtype: float64 

1483 """ 

1484 maybe_warn_args_and_kwargs(type(self), "var", args, kwargs) 

1485 nv.validate_resampler_func("var", args, kwargs) 

1486 return self._downsample("var", ddof=ddof, numeric_only=numeric_only) 

1487 

1488 @final 

1489 @doc(GroupBy.sem) 

1490 def sem( 

1491 self, 

1492 ddof: int = 1, 

1493 numeric_only: bool = False, 

1494 *args, 

1495 **kwargs, 

1496 ): 

1497 maybe_warn_args_and_kwargs(type(self), "sem", args, kwargs) 

1498 nv.validate_resampler_func("sem", args, kwargs) 

1499 return self._downsample("sem", ddof=ddof, numeric_only=numeric_only) 

1500 

1501 @final 

1502 @doc(GroupBy.ohlc) 

1503 def ohlc( 

1504 self, 

1505 *args, 

1506 **kwargs, 

1507 ): 

1508 maybe_warn_args_and_kwargs(type(self), "ohlc", args, kwargs) 

1509 nv.validate_resampler_func("ohlc", args, kwargs) 

1510 

1511 ax = self.ax 

1512 obj = self._obj_with_exclusions 

1513 if len(ax) == 0: 

1514 # GH#42902 

1515 obj = obj.copy() 

1516 obj.index = _asfreq_compat(obj.index, self.freq) 

1517 if obj.ndim == 1: 

1518 obj = obj.to_frame() 

1519 obj = obj.reindex(["open", "high", "low", "close"], axis=1) 

1520 else: 

1521 mi = MultiIndex.from_product( 

1522 [obj.columns, ["open", "high", "low", "close"]] 

1523 ) 

1524 obj = obj.reindex(mi, axis=1) 

1525 return obj 

1526 

1527 return self._downsample("ohlc") 

1528 

1529 @final 

1530 @doc(SeriesGroupBy.nunique) 

1531 def nunique( 

1532 self, 

1533 *args, 

1534 **kwargs, 

1535 ): 

1536 maybe_warn_args_and_kwargs(type(self), "nunique", args, kwargs) 

1537 nv.validate_resampler_func("nunique", args, kwargs) 

1538 return self._downsample("nunique") 

1539 

1540 @final 

1541 @doc(GroupBy.size) 

1542 def size(self): 

1543 result = self._downsample("size") 

1544 

1545 # If the result is a non-empty DataFrame we stack to get a Series 

1546 # GH 46826 

1547 if isinstance(result, ABCDataFrame) and not result.empty: 

1548 result = result.stack(future_stack=True) 

1549 

1550 if not len(self.ax): 

1551 from pandas import Series 

1552 

1553 if self._selected_obj.ndim == 1: 

1554 name = self._selected_obj.name 

1555 else: 

1556 name = None 

1557 result = Series([], index=result.index, dtype="int64", name=name) 

1558 return result 

1559 

1560 @final 

1561 @doc(GroupBy.count) 

1562 def count(self): 

1563 result = self._downsample("count") 

1564 if not len(self.ax): 

1565 if self._selected_obj.ndim == 1: 

1566 result = type(self._selected_obj)( 

1567 [], index=result.index, dtype="int64", name=self._selected_obj.name 

1568 ) 

1569 else: 

1570 from pandas import DataFrame 

1571 

1572 result = DataFrame( 

1573 [], index=result.index, columns=result.columns, dtype="int64" 

1574 ) 

1575 

1576 return result 

1577 

1578 @final 

1579 def quantile(self, q: float | list[float] | AnyArrayLike = 0.5, **kwargs): 

1580 """ 

1581 Return value at the given quantile. 

1582 

1583 Parameters 

1584 ---------- 

1585 q : float or array-like, default 0.5 (50% quantile) 

1586 

1587 Returns 

1588 ------- 

1589 DataFrame or Series 

1590 Quantile of values within each group. 

1591 

1592 See Also 

1593 -------- 

1594 Series.quantile 

1595 Return a series, where the index is q and the values are the quantiles. 

1596 DataFrame.quantile 

1597 Return a DataFrame, where the columns are the columns of self, 

1598 and the values are the quantiles. 

1599 DataFrameGroupBy.quantile 

1600 Return a DataFrame, where the columns are groupby columns, 

1601 and the values are its quantiles. 

1602 

1603 Examples 

1604 -------- 

1605 

1606 >>> ser = pd.Series([1, 3, 2, 4, 3, 8], 

1607 ... index=pd.DatetimeIndex(['2023-01-01', 

1608 ... '2023-01-10', 

1609 ... '2023-01-15', 

1610 ... '2023-02-01', 

1611 ... '2023-02-10', 

1612 ... '2023-02-15'])) 

1613 >>> ser.resample('MS').quantile() 

1614 2023-01-01 2.0 

1615 2023-02-01 4.0 

1616 Freq: MS, dtype: float64 

1617 

1618 >>> ser.resample('MS').quantile(.25) 

1619 2023-01-01 1.5 

1620 2023-02-01 3.5 

1621 Freq: MS, dtype: float64 

1622 """ 

1623 return self._downsample("quantile", q=q, **kwargs) 

1624 

1625 

1626class _GroupByMixin(PandasObject, SelectionMixin): 

1627 """ 

1628 Provide the groupby facilities. 

1629 """ 

1630 

1631 _attributes: list[str] # in practice the same as Resampler._attributes 

1632 _selection: IndexLabel | None = None 

1633 _groupby: GroupBy 

1634 _timegrouper: TimeGrouper 

1635 

1636 def __init__( 

1637 self, 

1638 *, 

1639 parent: Resampler, 

1640 groupby: GroupBy, 

1641 key=None, 

1642 selection: IndexLabel | None = None, 

1643 include_groups: bool = False, 

1644 ) -> None: 

1645 # reached via ._gotitem and _get_resampler_for_grouping 

1646 

1647 assert isinstance(groupby, GroupBy), type(groupby) 

1648 

1649 # parent is always a Resampler, sometimes a _GroupByMixin 

1650 assert isinstance(parent, Resampler), type(parent) 

1651 

1652 # initialize our GroupByMixin object with 

1653 # the resampler attributes 

1654 for attr in self._attributes: 

1655 setattr(self, attr, getattr(parent, attr)) 

1656 self._selection = selection 

1657 

1658 self.binner = parent.binner 

1659 self.key = key 

1660 

1661 self._groupby = groupby 

1662 self._timegrouper = copy.copy(parent._timegrouper) 

1663 

1664 self.ax = parent.ax 

1665 self.obj = parent.obj 

1666 self.include_groups = include_groups 

1667 

1668 @no_type_check 

1669 def _apply(self, f, *args, **kwargs): 

1670 """ 

1671 Dispatch to _upsample; we are stripping all of the _upsample kwargs and 

1672 performing the original function call on the grouped object. 

1673 """ 

1674 

1675 def func(x): 

1676 x = self._resampler_cls(x, timegrouper=self._timegrouper, gpr_index=self.ax) 

1677 

1678 if isinstance(f, str): 

1679 return getattr(x, f)(**kwargs) 

1680 

1681 return x.apply(f, *args, **kwargs) 

1682 

1683 result = _apply(self._groupby, func, include_groups=self.include_groups) 

1684 return self._wrap_result(result) 

1685 

1686 _upsample = _apply 

1687 _downsample = _apply 

1688 _groupby_and_aggregate = _apply 

1689 

1690 @final 

1691 def _gotitem(self, key, ndim, subset=None): 

1692 """ 

1693 Sub-classes to define. Return a sliced object. 

1694 

1695 Parameters 

1696 ---------- 

1697 key : string / list of selections 

1698 ndim : {1, 2} 

1699 requested ndim of result 

1700 subset : object, default None 

1701 subset to act on 

1702 """ 

1703 # create a new object to prevent aliasing 

1704 if subset is None: 

1705 subset = self.obj 

1706 if key is not None: 

1707 subset = subset[key] 

1708 else: 

1709 # reached via Apply.agg_dict_like with selection=None, ndim=1 

1710 assert subset.ndim == 1 

1711 

1712 # Try to select from a DataFrame, falling back to a Series 

1713 try: 

1714 if isinstance(key, list) and self.key not in key and self.key is not None: 

1715 key.append(self.key) 

1716 groupby = self._groupby[key] 

1717 except IndexError: 

1718 groupby = self._groupby 

1719 

1720 selection = self._infer_selection(key, subset) 

1721 

1722 new_rs = type(self)( 

1723 groupby=groupby, 

1724 parent=cast(Resampler, self), 

1725 selection=selection, 

1726 ) 

1727 return new_rs 

1728 

1729 

1730class DatetimeIndexResampler(Resampler): 

1731 ax: DatetimeIndex 

1732 

1733 @property 

1734 def _resampler_for_grouping(self): 

1735 return DatetimeIndexResamplerGroupby 

1736 

1737 def _get_binner_for_time(self): 

1738 # this is how we are actually creating the bins 

1739 if self.kind == "period": 

1740 return self._timegrouper._get_time_period_bins(self.ax) 

1741 return self._timegrouper._get_time_bins(self.ax) 

1742 

1743 def _downsample(self, how, **kwargs): 

1744 """ 

1745 Downsample the cython defined function. 

1746 

1747 Parameters 

1748 ---------- 

1749 how : string / cython mapped function 

1750 **kwargs : kw args passed to how function 

1751 """ 

1752 orig_how = how 

1753 how = com.get_cython_func(how) or how 

1754 if orig_how != how: 

1755 warn_alias_replacement(self, orig_how, how) 

1756 ax = self.ax 

1757 

1758 # Excludes `on` column when provided 

1759 obj = self._obj_with_exclusions 

1760 

1761 if not len(ax): 

1762 # reset to the new freq 

1763 obj = obj.copy() 

1764 obj.index = obj.index._with_freq(self.freq) 

1765 assert obj.index.freq == self.freq, (obj.index.freq, self.freq) 

1766 return obj 

1767 

1768 # do we have a regular frequency 

1769 

1770 # error: Item "None" of "Optional[Any]" has no attribute "binlabels" 

1771 if ( 

1772 (ax.freq is not None or ax.inferred_freq is not None) 

1773 and len(self._grouper.binlabels) > len(ax) 

1774 and how is None 

1775 ): 

1776 # let's do an asfreq 

1777 return self.asfreq() 

1778 

1779 # we are downsampling 

1780 # we want to call the actual grouper method here 

1781 if self.axis == 0: 

1782 result = obj.groupby(self._grouper).aggregate(how, **kwargs) 

1783 else: 

1784 # test_resample_axis1 

1785 result = obj.T.groupby(self._grouper).aggregate(how, **kwargs).T 

1786 

1787 return self._wrap_result(result) 

1788 

1789 def _adjust_binner_for_upsample(self, binner): 

1790 """ 

1791 Adjust our binner when upsampling. 

1792 

1793 The range of a new index should not be outside specified range 

1794 """ 

1795 if self.closed == "right": 

1796 binner = binner[1:] 

1797 else: 

1798 binner = binner[:-1] 

1799 return binner 

1800 

1801 def _upsample(self, method, limit: int | None = None, fill_value=None): 

1802 """ 

1803 Parameters 

1804 ---------- 

1805 method : string {'backfill', 'bfill', 'pad', 

1806 'ffill', 'asfreq'} method for upsampling 

1807 limit : int, default None 

1808 Maximum size gap to fill when reindexing 

1809 fill_value : scalar, default None 

1810 Value to use for missing values 

1811 

1812 See Also 

1813 -------- 

1814 .fillna: Fill NA/NaN values using the specified method. 

1815 

1816 """ 

1817 if self.axis: 

1818 raise AssertionError("axis must be 0") 

1819 if self._from_selection: 

1820 raise ValueError( 

1821 "Upsampling from level= or on= selection " 

1822 "is not supported, use .set_index(...) " 

1823 "to explicitly set index to datetime-like" 

1824 ) 

1825 

1826 ax = self.ax 

1827 obj = self._selected_obj 

1828 binner = self.binner 

1829 res_index = self._adjust_binner_for_upsample(binner) 

1830 

1831 # if we have the same frequency as our axis, then we are equal sampling 

1832 if ( 

1833 limit is None 

1834 and to_offset(ax.inferred_freq) == self.freq 

1835 and len(obj) == len(res_index) 

1836 ): 

1837 result = obj.copy() 

1838 result.index = res_index 

1839 else: 

1840 if method == "asfreq": 

1841 method = None 

1842 result = obj.reindex( 

1843 res_index, method=method, limit=limit, fill_value=fill_value 

1844 ) 

1845 

1846 return self._wrap_result(result) 

1847 

1848 def _wrap_result(self, result): 

1849 result = super()._wrap_result(result) 

1850 

1851 # we may have a different kind that we were asked originally 

1852 # convert if needed 

1853 if self.kind == "period" and not isinstance(result.index, PeriodIndex): 

1854 if isinstance(result.index, MultiIndex): 

1855 # GH 24103 - e.g. groupby resample 

1856 if not isinstance(result.index.levels[-1], PeriodIndex): 

1857 new_level = result.index.levels[-1].to_period(self.freq) 

1858 result.index = result.index.set_levels(new_level, level=-1) 

1859 else: 

1860 result.index = result.index.to_period(self.freq) 

1861 return result 

1862 

1863 

1864# error: Definition of "ax" in base class "_GroupByMixin" is incompatible 

1865# with definition in base class "DatetimeIndexResampler" 

1866class DatetimeIndexResamplerGroupby( # type: ignore[misc] 

1867 _GroupByMixin, DatetimeIndexResampler 

1868): 

1869 """ 

1870 Provides a resample of a groupby implementation 

1871 """ 

1872 

1873 @property 

1874 def _resampler_cls(self): 

1875 return DatetimeIndexResampler 

1876 

1877 

1878class PeriodIndexResampler(DatetimeIndexResampler): 

1879 # error: Incompatible types in assignment (expression has type "PeriodIndex", base 

1880 # class "DatetimeIndexResampler" defined the type as "DatetimeIndex") 

1881 ax: PeriodIndex # type: ignore[assignment] 

1882 

1883 @property 

1884 def _resampler_for_grouping(self): 

1885 warnings.warn( 

1886 "Resampling a groupby with a PeriodIndex is deprecated. " 

1887 "Cast to DatetimeIndex before resampling instead.", 

1888 FutureWarning, 

1889 stacklevel=find_stack_level(), 

1890 ) 

1891 return PeriodIndexResamplerGroupby 

1892 

1893 def _get_binner_for_time(self): 

1894 if self.kind == "timestamp": 

1895 return super()._get_binner_for_time() 

1896 return self._timegrouper._get_period_bins(self.ax) 

1897 

1898 def _convert_obj(self, obj: NDFrameT) -> NDFrameT: 

1899 obj = super()._convert_obj(obj) 

1900 

1901 if self._from_selection: 

1902 # see GH 14008, GH 12871 

1903 msg = ( 

1904 "Resampling from level= or on= selection " 

1905 "with a PeriodIndex is not currently supported, " 

1906 "use .set_index(...) to explicitly set index" 

1907 ) 

1908 raise NotImplementedError(msg) 

1909 

1910 # convert to timestamp 

1911 if self.kind == "timestamp": 

1912 obj = obj.to_timestamp(how=self.convention) 

1913 

1914 return obj 

1915 

1916 def _downsample(self, how, **kwargs): 

1917 """ 

1918 Downsample the cython defined function. 

1919 

1920 Parameters 

1921 ---------- 

1922 how : string / cython mapped function 

1923 **kwargs : kw args passed to how function 

1924 """ 

1925 # we may need to actually resample as if we are timestamps 

1926 if self.kind == "timestamp": 

1927 return super()._downsample(how, **kwargs) 

1928 

1929 orig_how = how 

1930 how = com.get_cython_func(how) or how 

1931 if orig_how != how: 

1932 warn_alias_replacement(self, orig_how, how) 

1933 ax = self.ax 

1934 

1935 if is_subperiod(ax.freq, self.freq): 

1936 # Downsampling 

1937 return self._groupby_and_aggregate(how, **kwargs) 

1938 elif is_superperiod(ax.freq, self.freq): 

1939 if how == "ohlc": 

1940 # GH #13083 

1941 # upsampling to subperiods is handled as an asfreq, which works 

1942 # for pure aggregating/reducing methods 

1943 # OHLC reduces along the time dimension, but creates multiple 

1944 # values for each period -> handle by _groupby_and_aggregate() 

1945 return self._groupby_and_aggregate(how) 

1946 return self.asfreq() 

1947 elif ax.freq == self.freq: 

1948 return self.asfreq() 

1949 

1950 raise IncompatibleFrequency( 

1951 f"Frequency {ax.freq} cannot be resampled to {self.freq}, " 

1952 "as they are not sub or super periods" 

1953 ) 

1954 

1955 def _upsample(self, method, limit: int | None = None, fill_value=None): 

1956 """ 

1957 Parameters 

1958 ---------- 

1959 method : {'backfill', 'bfill', 'pad', 'ffill'} 

1960 Method for upsampling. 

1961 limit : int, default None 

1962 Maximum size gap to fill when reindexing. 

1963 fill_value : scalar, default None 

1964 Value to use for missing values. 

1965 

1966 See Also 

1967 -------- 

1968 .fillna: Fill NA/NaN values using the specified method. 

1969 

1970 """ 

1971 # we may need to actually resample as if we are timestamps 

1972 if self.kind == "timestamp": 

1973 return super()._upsample(method, limit=limit, fill_value=fill_value) 

1974 

1975 ax = self.ax 

1976 obj = self.obj 

1977 new_index = self.binner 

1978 

1979 # Start vs. end of period 

1980 memb = ax.asfreq(self.freq, how=self.convention) 

1981 

1982 # Get the fill indexer 

1983 if method == "asfreq": 

1984 method = None 

1985 indexer = memb.get_indexer(new_index, method=method, limit=limit) 

1986 new_obj = _take_new_index( 

1987 obj, 

1988 indexer, 

1989 new_index, 

1990 axis=self.axis, 

1991 ) 

1992 return self._wrap_result(new_obj) 

1993 

1994 

1995# error: Definition of "ax" in base class "_GroupByMixin" is incompatible with 

1996# definition in base class "PeriodIndexResampler" 

1997class PeriodIndexResamplerGroupby( # type: ignore[misc] 

1998 _GroupByMixin, PeriodIndexResampler 

1999): 

2000 """ 

2001 Provides a resample of a groupby implementation. 

2002 """ 

2003 

2004 @property 

2005 def _resampler_cls(self): 

2006 return PeriodIndexResampler 

2007 

2008 

2009class TimedeltaIndexResampler(DatetimeIndexResampler): 

2010 # error: Incompatible types in assignment (expression has type "TimedeltaIndex", 

2011 # base class "DatetimeIndexResampler" defined the type as "DatetimeIndex") 

2012 ax: TimedeltaIndex # type: ignore[assignment] 

2013 

2014 @property 

2015 def _resampler_for_grouping(self): 

2016 return TimedeltaIndexResamplerGroupby 

2017 

2018 def _get_binner_for_time(self): 

2019 return self._timegrouper._get_time_delta_bins(self.ax) 

2020 

2021 def _adjust_binner_for_upsample(self, binner): 

2022 """ 

2023 Adjust our binner when upsampling. 

2024 

2025 The range of a new index is allowed to be greater than original range 

2026 so we don't need to change the length of a binner, GH 13022 

2027 """ 

2028 return binner 

2029 

2030 

2031# error: Definition of "ax" in base class "_GroupByMixin" is incompatible with 

2032# definition in base class "DatetimeIndexResampler" 

2033class TimedeltaIndexResamplerGroupby( # type: ignore[misc] 

2034 _GroupByMixin, TimedeltaIndexResampler 

2035): 

2036 """ 

2037 Provides a resample of a groupby implementation. 

2038 """ 

2039 

2040 @property 

2041 def _resampler_cls(self): 

2042 return TimedeltaIndexResampler 

2043 

2044 

2045def get_resampler(obj: Series | DataFrame, kind=None, **kwds) -> Resampler: 

2046 """ 

2047 Create a TimeGrouper and return our resampler. 

2048 """ 

2049 tg = TimeGrouper(obj, **kwds) # type: ignore[arg-type] 

2050 return tg._get_resampler(obj, kind=kind) 

2051 

2052 

2053get_resampler.__doc__ = Resampler.__doc__ 

2054 

2055 

2056def get_resampler_for_grouping( 

2057 groupby: GroupBy, 

2058 rule, 

2059 how=None, 

2060 fill_method=None, 

2061 limit: int | None = None, 

2062 kind=None, 

2063 on=None, 

2064 include_groups: bool = True, 

2065 **kwargs, 

2066) -> Resampler: 

2067 """ 

2068 Return our appropriate resampler when grouping as well. 

2069 """ 

2070 # .resample uses 'on' similar to how .groupby uses 'key' 

2071 tg = TimeGrouper(freq=rule, key=on, **kwargs) 

2072 resampler = tg._get_resampler(groupby.obj, kind=kind) 

2073 return resampler._get_resampler_for_grouping( 

2074 groupby=groupby, include_groups=include_groups, key=tg.key 

2075 ) 

2076 

2077 

2078class TimeGrouper(Grouper): 

2079 """ 

2080 Custom groupby class for time-interval grouping. 

2081 

2082 Parameters 

2083 ---------- 

2084 freq : pandas date offset or offset alias for identifying bin edges 

2085 closed : closed end of interval; 'left' or 'right' 

2086 label : interval boundary to use for labeling; 'left' or 'right' 

2087 convention : {'start', 'end', 'e', 's'} 

2088 If axis is PeriodIndex 

2089 """ 

2090 

2091 _attributes = Grouper._attributes + ( 

2092 "closed", 

2093 "label", 

2094 "how", 

2095 "kind", 

2096 "convention", 

2097 "origin", 

2098 "offset", 

2099 ) 

2100 

2101 origin: TimeGrouperOrigin 

2102 

2103 def __init__( 

2104 self, 

2105 obj: Grouper | None = None, 

2106 freq: Frequency = "Min", 

2107 key: str | None = None, 

2108 closed: Literal["left", "right"] | None = None, 

2109 label: Literal["left", "right"] | None = None, 

2110 how: str = "mean", 

2111 axis: Axis = 0, 

2112 fill_method=None, 

2113 limit: int | None = None, 

2114 kind: str | None = None, 

2115 convention: Literal["start", "end", "e", "s"] | None = None, 

2116 origin: Literal["epoch", "start", "start_day", "end", "end_day"] 

2117 | TimestampConvertibleTypes = "start_day", 

2118 offset: TimedeltaConvertibleTypes | None = None, 

2119 group_keys: bool = False, 

2120 **kwargs, 

2121 ) -> None: 

2122 # Check for correctness of the keyword arguments which would 

2123 # otherwise silently use the default if misspelled 

2124 if label not in {None, "left", "right"}: 

2125 raise ValueError(f"Unsupported value {label} for `label`") 

2126 if closed not in {None, "left", "right"}: 

2127 raise ValueError(f"Unsupported value {closed} for `closed`") 

2128 if convention not in {None, "start", "end", "e", "s"}: 

2129 raise ValueError(f"Unsupported value {convention} for `convention`") 

2130 

2131 if ( 

2132 key is None 

2133 and obj is not None 

2134 and isinstance(obj.index, PeriodIndex) # type: ignore[attr-defined] 

2135 or ( 

2136 key is not None 

2137 and obj is not None 

2138 and getattr(obj[key], "dtype", None) == "period" # type: ignore[index] 

2139 ) 

2140 ): 

2141 freq = to_offset(freq, is_period=True) 

2142 else: 

2143 freq = to_offset(freq) 

2144 

2145 end_types = {"ME", "YE", "QE", "BME", "BYE", "BQE", "W"} 

2146 rule = freq.rule_code 

2147 if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types): 

2148 if closed is None: 

2149 closed = "right" 

2150 if label is None: 

2151 label = "right" 

2152 else: 

2153 # The backward resample sets ``closed`` to ``'right'`` by default 

2154 # since the last value should be considered as the edge point for 

2155 # the last bin. When origin in "end" or "end_day", the value for a 

2156 # specific ``Timestamp`` index stands for the resample result from 

2157 # the current ``Timestamp`` minus ``freq`` to the current 

2158 # ``Timestamp`` with a right close. 

2159 if origin in ["end", "end_day"]: 

2160 if closed is None: 

2161 closed = "right" 

2162 if label is None: 

2163 label = "right" 

2164 else: 

2165 if closed is None: 

2166 closed = "left" 

2167 if label is None: 

2168 label = "left" 

2169 

2170 self.closed = closed 

2171 self.label = label 

2172 self.kind = kind 

2173 self.convention = convention if convention is not None else "e" 

2174 self.how = how 

2175 self.fill_method = fill_method 

2176 self.limit = limit 

2177 self.group_keys = group_keys 

2178 self._arrow_dtype: ArrowDtype | None = None 

2179 

2180 if origin in ("epoch", "start", "start_day", "end", "end_day"): 

2181 # error: Incompatible types in assignment (expression has type "Union[Union[ 

2182 # Timestamp, datetime, datetime64, signedinteger[_64Bit], float, str], 

2183 # Literal['epoch', 'start', 'start_day', 'end', 'end_day']]", variable has 

2184 # type "Union[Timestamp, Literal['epoch', 'start', 'start_day', 'end', 

2185 # 'end_day']]") 

2186 self.origin = origin # type: ignore[assignment] 

2187 else: 

2188 try: 

2189 self.origin = Timestamp(origin) 

2190 except (ValueError, TypeError) as err: 

2191 raise ValueError( 

2192 "'origin' should be equal to 'epoch', 'start', 'start_day', " 

2193 "'end', 'end_day' or " 

2194 f"should be a Timestamp convertible type. Got '{origin}' instead." 

2195 ) from err 

2196 

2197 try: 

2198 self.offset = Timedelta(offset) if offset is not None else None 

2199 except (ValueError, TypeError) as err: 

2200 raise ValueError( 

2201 "'offset' should be a Timedelta convertible type. " 

2202 f"Got '{offset}' instead." 

2203 ) from err 

2204 

2205 # always sort time groupers 

2206 kwargs["sort"] = True 

2207 

2208 super().__init__(freq=freq, key=key, axis=axis, **kwargs) 

2209 

2210 def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: 

2211 """ 

2212 Return my resampler or raise if we have an invalid axis. 

2213 

2214 Parameters 

2215 ---------- 

2216 obj : Series or DataFrame 

2217 kind : string, optional 

2218 'period','timestamp','timedelta' are valid 

2219 

2220 Returns 

2221 ------- 

2222 Resampler 

2223 

2224 Raises 

2225 ------ 

2226 TypeError if incompatible axis 

2227 

2228 """ 

2229 _, ax, _ = self._set_grouper(obj, gpr_index=None) 

2230 if isinstance(ax, DatetimeIndex): 

2231 return DatetimeIndexResampler( 

2232 obj, 

2233 timegrouper=self, 

2234 kind=kind, 

2235 axis=self.axis, 

2236 group_keys=self.group_keys, 

2237 gpr_index=ax, 

2238 ) 

2239 elif isinstance(ax, PeriodIndex) or kind == "period": 

2240 if isinstance(ax, PeriodIndex): 

2241 # GH#53481 

2242 warnings.warn( 

2243 "Resampling with a PeriodIndex is deprecated. " 

2244 "Cast index to DatetimeIndex before resampling instead.", 

2245 FutureWarning, 

2246 stacklevel=find_stack_level(), 

2247 ) 

2248 else: 

2249 warnings.warn( 

2250 "Resampling with kind='period' is deprecated. " 

2251 "Use datetime paths instead.", 

2252 FutureWarning, 

2253 stacklevel=find_stack_level(), 

2254 ) 

2255 return PeriodIndexResampler( 

2256 obj, 

2257 timegrouper=self, 

2258 kind=kind, 

2259 axis=self.axis, 

2260 group_keys=self.group_keys, 

2261 gpr_index=ax, 

2262 ) 

2263 elif isinstance(ax, TimedeltaIndex): 

2264 return TimedeltaIndexResampler( 

2265 obj, 

2266 timegrouper=self, 

2267 axis=self.axis, 

2268 group_keys=self.group_keys, 

2269 gpr_index=ax, 

2270 ) 

2271 

2272 raise TypeError( 

2273 "Only valid with DatetimeIndex, " 

2274 "TimedeltaIndex or PeriodIndex, " 

2275 f"but got an instance of '{type(ax).__name__}'" 

2276 ) 

2277 

2278 def _get_grouper( 

2279 self, obj: NDFrameT, validate: bool = True 

2280 ) -> tuple[BinGrouper, NDFrameT]: 

2281 # create the resampler and return our binner 

2282 r = self._get_resampler(obj) 

2283 return r._grouper, cast(NDFrameT, r.obj) 

2284 

2285 def _get_time_bins(self, ax: DatetimeIndex): 

2286 if not isinstance(ax, DatetimeIndex): 

2287 raise TypeError( 

2288 "axis must be a DatetimeIndex, but got " 

2289 f"an instance of {type(ax).__name__}" 

2290 ) 

2291 

2292 if len(ax) == 0: 

2293 binner = labels = DatetimeIndex( 

2294 data=[], freq=self.freq, name=ax.name, dtype=ax.dtype 

2295 ) 

2296 return binner, [], labels 

2297 

2298 first, last = _get_timestamp_range_edges( 

2299 ax.min(), 

2300 ax.max(), 

2301 self.freq, 

2302 unit=ax.unit, 

2303 closed=self.closed, 

2304 origin=self.origin, 

2305 offset=self.offset, 

2306 ) 

2307 # GH #12037 

2308 # use first/last directly instead of call replace() on them 

2309 # because replace() will swallow the nanosecond part 

2310 # thus last bin maybe slightly before the end if the end contains 

2311 # nanosecond part and lead to `Values falls after last bin` error 

2312 # GH 25758: If DST lands at midnight (e.g. 'America/Havana'), user feedback 

2313 # has noted that ambiguous=True provides the most sensible result 

2314 binner = labels = date_range( 

2315 freq=self.freq, 

2316 start=first, 

2317 end=last, 

2318 tz=ax.tz, 

2319 name=ax.name, 

2320 ambiguous=True, 

2321 nonexistent="shift_forward", 

2322 unit=ax.unit, 

2323 ) 

2324 

2325 ax_values = ax.asi8 

2326 binner, bin_edges = self._adjust_bin_edges(binner, ax_values) 

2327 

2328 # general version, knowing nothing about relative frequencies 

2329 bins = lib.generate_bins_dt64( 

2330 ax_values, bin_edges, self.closed, hasnans=ax.hasnans 

2331 ) 

2332 

2333 if self.closed == "right": 

2334 labels = binner 

2335 if self.label == "right": 

2336 labels = labels[1:] 

2337 elif self.label == "right": 

2338 labels = labels[1:] 

2339 

2340 if ax.hasnans: 

2341 binner = binner.insert(0, NaT) 

2342 labels = labels.insert(0, NaT) 

2343 

2344 # if we end up with more labels than bins 

2345 # adjust the labels 

2346 # GH4076 

2347 if len(bins) < len(labels): 

2348 labels = labels[: len(bins)] 

2349 

2350 return binner, bins, labels 

2351 

2352 def _adjust_bin_edges( 

2353 self, binner: DatetimeIndex, ax_values: npt.NDArray[np.int64] 

2354 ) -> tuple[DatetimeIndex, npt.NDArray[np.int64]]: 

2355 # Some hacks for > daily data, see #1471, #1458, #1483 

2356 

2357 if self.freq.name in ("BME", "ME", "W") or self.freq.name.split("-")[0] in ( 

2358 "BQE", 

2359 "BYE", 

2360 "QE", 

2361 "YE", 

2362 "W", 

2363 ): 

2364 # If the right end-point is on the last day of the month, roll forwards 

2365 # until the last moment of that day. Note that we only do this for offsets 

2366 # which correspond to the end of a super-daily period - "month start", for 

2367 # example, is excluded. 

2368 if self.closed == "right": 

2369 # GH 21459, GH 9119: Adjust the bins relative to the wall time 

2370 edges_dti = binner.tz_localize(None) 

2371 edges_dti = ( 

2372 edges_dti 

2373 + Timedelta(days=1, unit=edges_dti.unit).as_unit(edges_dti.unit) 

2374 - Timedelta(1, unit=edges_dti.unit).as_unit(edges_dti.unit) 

2375 ) 

2376 bin_edges = edges_dti.tz_localize(binner.tz).asi8 

2377 else: 

2378 bin_edges = binner.asi8 

2379 

2380 # intraday values on last day 

2381 if bin_edges[-2] > ax_values.max(): 

2382 bin_edges = bin_edges[:-1] 

2383 binner = binner[:-1] 

2384 else: 

2385 bin_edges = binner.asi8 

2386 return binner, bin_edges 

2387 

2388 def _get_time_delta_bins(self, ax: TimedeltaIndex): 

2389 if not isinstance(ax, TimedeltaIndex): 

2390 raise TypeError( 

2391 "axis must be a TimedeltaIndex, but got " 

2392 f"an instance of {type(ax).__name__}" 

2393 ) 

2394 

2395 if not isinstance(self.freq, Tick): 

2396 # GH#51896 

2397 raise ValueError( 

2398 "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " 

2399 f"e.g. '24h' or '3D', not {self.freq}" 

2400 ) 

2401 

2402 if not len(ax): 

2403 binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name) 

2404 return binner, [], labels 

2405 

2406 start, end = ax.min(), ax.max() 

2407 

2408 if self.closed == "right": 

2409 end += self.freq 

2410 

2411 labels = binner = timedelta_range( 

2412 start=start, end=end, freq=self.freq, name=ax.name 

2413 ) 

2414 

2415 end_stamps = labels 

2416 if self.closed == "left": 

2417 end_stamps += self.freq 

2418 

2419 bins = ax.searchsorted(end_stamps, side=self.closed) 

2420 

2421 if self.offset: 

2422 # GH 10530 & 31809 

2423 labels += self.offset 

2424 

2425 return binner, bins, labels 

2426 

2427 def _get_time_period_bins(self, ax: DatetimeIndex): 

2428 if not isinstance(ax, DatetimeIndex): 

2429 raise TypeError( 

2430 "axis must be a DatetimeIndex, but got " 

2431 f"an instance of {type(ax).__name__}" 

2432 ) 

2433 

2434 freq = self.freq 

2435 

2436 if len(ax) == 0: 

2437 binner = labels = PeriodIndex( 

2438 data=[], freq=freq, name=ax.name, dtype=ax.dtype 

2439 ) 

2440 return binner, [], labels 

2441 

2442 labels = binner = period_range(start=ax[0], end=ax[-1], freq=freq, name=ax.name) 

2443 

2444 end_stamps = (labels + freq).asfreq(freq, "s").to_timestamp() 

2445 if ax.tz: 

2446 end_stamps = end_stamps.tz_localize(ax.tz) 

2447 bins = ax.searchsorted(end_stamps, side="left") 

2448 

2449 return binner, bins, labels 

2450 

2451 def _get_period_bins(self, ax: PeriodIndex): 

2452 if not isinstance(ax, PeriodIndex): 

2453 raise TypeError( 

2454 "axis must be a PeriodIndex, but got " 

2455 f"an instance of {type(ax).__name__}" 

2456 ) 

2457 

2458 memb = ax.asfreq(self.freq, how=self.convention) 

2459 

2460 # NaT handling as in pandas._lib.lib.generate_bins_dt64() 

2461 nat_count = 0 

2462 if memb.hasnans: 

2463 # error: Incompatible types in assignment (expression has type 

2464 # "bool_", variable has type "int") [assignment] 

2465 nat_count = np.sum(memb._isnan) # type: ignore[assignment] 

2466 memb = memb[~memb._isnan] 

2467 

2468 if not len(memb): 

2469 # index contains no valid (non-NaT) values 

2470 bins = np.array([], dtype=np.int64) 

2471 binner = labels = PeriodIndex(data=[], freq=self.freq, name=ax.name) 

2472 if len(ax) > 0: 

2473 # index is all NaT 

2474 binner, bins, labels = _insert_nat_bin(binner, bins, labels, len(ax)) 

2475 return binner, bins, labels 

2476 

2477 freq_mult = self.freq.n 

2478 

2479 start = ax.min().asfreq(self.freq, how=self.convention) 

2480 end = ax.max().asfreq(self.freq, how="end") 

2481 bin_shift = 0 

2482 

2483 if isinstance(self.freq, Tick): 

2484 # GH 23882 & 31809: get adjusted bin edge labels with 'origin' 

2485 # and 'origin' support. This call only makes sense if the freq is a 

2486 # Tick since offset and origin are only used in those cases. 

2487 # Not doing this check could create an extra empty bin. 

2488 p_start, end = _get_period_range_edges( 

2489 start, 

2490 end, 

2491 self.freq, 

2492 closed=self.closed, 

2493 origin=self.origin, 

2494 offset=self.offset, 

2495 ) 

2496 

2497 # Get offset for bin edge (not label edge) adjustment 

2498 start_offset = Period(start, self.freq) - Period(p_start, self.freq) 

2499 # error: Item "Period" of "Union[Period, Any]" has no attribute "n" 

2500 bin_shift = start_offset.n % freq_mult # type: ignore[union-attr] 

2501 start = p_start 

2502 

2503 labels = binner = period_range( 

2504 start=start, end=end, freq=self.freq, name=ax.name 

2505 ) 

2506 

2507 i8 = memb.asi8 

2508 

2509 # when upsampling to subperiods, we need to generate enough bins 

2510 expected_bins_count = len(binner) * freq_mult 

2511 i8_extend = expected_bins_count - (i8[-1] - i8[0]) 

2512 rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult) 

2513 rng += freq_mult 

2514 # adjust bin edge indexes to account for base 

2515 rng -= bin_shift 

2516 

2517 # Wrap in PeriodArray for PeriodArray.searchsorted 

2518 prng = type(memb._data)(rng, dtype=memb.dtype) 

2519 bins = memb.searchsorted(prng, side="left") 

2520 

2521 if nat_count > 0: 

2522 binner, bins, labels = _insert_nat_bin(binner, bins, labels, nat_count) 

2523 

2524 return binner, bins, labels 

2525 

2526 def _set_grouper( 

2527 self, obj: NDFrameT, sort: bool = False, *, gpr_index: Index | None = None 

2528 ) -> tuple[NDFrameT, Index, npt.NDArray[np.intp] | None]: 

2529 obj, ax, indexer = super()._set_grouper(obj, sort, gpr_index=gpr_index) 

2530 if isinstance(ax.dtype, ArrowDtype) and ax.dtype.kind in "Mm": 

2531 self._arrow_dtype = ax.dtype 

2532 ax = Index( 

2533 cast(ArrowExtensionArray, ax.array)._maybe_convert_datelike_array() 

2534 ) 

2535 return obj, ax, indexer 

2536 

2537 

2538def _take_new_index( 

2539 obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: AxisInt = 0 

2540) -> NDFrameT: 

2541 if isinstance(obj, ABCSeries): 

2542 new_values = algos.take_nd(obj._values, indexer) 

2543 # error: Incompatible return value type (got "Series", expected "NDFrameT") 

2544 return obj._constructor( # type: ignore[return-value] 

2545 new_values, index=new_index, name=obj.name 

2546 ) 

2547 elif isinstance(obj, ABCDataFrame): 

2548 if axis == 1: 

2549 raise NotImplementedError("axis 1 is not supported") 

2550 new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) 

2551 # error: Incompatible return value type (got "DataFrame", expected "NDFrameT") 

2552 return obj._constructor_from_mgr(new_mgr, axes=new_mgr.axes) # type: ignore[return-value] 

2553 else: 

2554 raise ValueError("'obj' should be either a Series or a DataFrame") 

2555 

2556 

2557def _get_timestamp_range_edges( 

2558 first: Timestamp, 

2559 last: Timestamp, 

2560 freq: BaseOffset, 

2561 unit: str, 

2562 closed: Literal["right", "left"] = "left", 

2563 origin: TimeGrouperOrigin = "start_day", 

2564 offset: Timedelta | None = None, 

2565) -> tuple[Timestamp, Timestamp]: 

2566 """ 

2567 Adjust the `first` Timestamp to the preceding Timestamp that resides on 

2568 the provided offset. Adjust the `last` Timestamp to the following 

2569 Timestamp that resides on the provided offset. Input Timestamps that 

2570 already reside on the offset will be adjusted depending on the type of 

2571 offset and the `closed` parameter. 

2572 

2573 Parameters 

2574 ---------- 

2575 first : pd.Timestamp 

2576 The beginning Timestamp of the range to be adjusted. 

2577 last : pd.Timestamp 

2578 The ending Timestamp of the range to be adjusted. 

2579 freq : pd.DateOffset 

2580 The dateoffset to which the Timestamps will be adjusted. 

2581 closed : {'right', 'left'}, default "left" 

2582 Which side of bin interval is closed. 

2583 origin : {'epoch', 'start', 'start_day'} or Timestamp, default 'start_day' 

2584 The timestamp on which to adjust the grouping. The timezone of origin must 

2585 match the timezone of the index. 

2586 If a timestamp is not used, these values are also supported: 

2587 

2588 - 'epoch': `origin` is 1970-01-01 

2589 - 'start': `origin` is the first value of the timeseries 

2590 - 'start_day': `origin` is the first day at midnight of the timeseries 

2591 offset : pd.Timedelta, default is None 

2592 An offset timedelta added to the origin. 

2593 

2594 Returns 

2595 ------- 

2596 A tuple of length 2, containing the adjusted pd.Timestamp objects. 

2597 """ 

2598 if isinstance(freq, Tick): 

2599 index_tz = first.tz 

2600 if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): 

2601 raise ValueError("The origin must have the same timezone as the index.") 

2602 if origin == "epoch": 

2603 # set the epoch based on the timezone to have similar bins results when 

2604 # resampling on the same kind of indexes on different timezones 

2605 origin = Timestamp("1970-01-01", tz=index_tz) 

2606 

2607 if isinstance(freq, Day): 

2608 # _adjust_dates_anchored assumes 'D' means 24h, but first/last 

2609 # might contain a DST transition (23h, 24h, or 25h). 

2610 # So "pretend" the dates are naive when adjusting the endpoints 

2611 first = first.tz_localize(None) 

2612 last = last.tz_localize(None) 

2613 if isinstance(origin, Timestamp): 

2614 origin = origin.tz_localize(None) 

2615 

2616 first, last = _adjust_dates_anchored( 

2617 first, last, freq, closed=closed, origin=origin, offset=offset, unit=unit 

2618 ) 

2619 if isinstance(freq, Day): 

2620 first = first.tz_localize(index_tz) 

2621 last = last.tz_localize(index_tz) 

2622 else: 

2623 first = first.normalize() 

2624 last = last.normalize() 

2625 

2626 if closed == "left": 

2627 first = Timestamp(freq.rollback(first)) 

2628 else: 

2629 first = Timestamp(first - freq) 

2630 

2631 last = Timestamp(last + freq) 

2632 

2633 return first, last 

2634 

2635 

2636def _get_period_range_edges( 

2637 first: Period, 

2638 last: Period, 

2639 freq: BaseOffset, 

2640 closed: Literal["right", "left"] = "left", 

2641 origin: TimeGrouperOrigin = "start_day", 

2642 offset: Timedelta | None = None, 

2643) -> tuple[Period, Period]: 

2644 """ 

2645 Adjust the provided `first` and `last` Periods to the respective Period of 

2646 the given offset that encompasses them. 

2647 

2648 Parameters 

2649 ---------- 

2650 first : pd.Period 

2651 The beginning Period of the range to be adjusted. 

2652 last : pd.Period 

2653 The ending Period of the range to be adjusted. 

2654 freq : pd.DateOffset 

2655 The freq to which the Periods will be adjusted. 

2656 closed : {'right', 'left'}, default "left" 

2657 Which side of bin interval is closed. 

2658 origin : {'epoch', 'start', 'start_day'}, Timestamp, default 'start_day' 

2659 The timestamp on which to adjust the grouping. The timezone of origin must 

2660 match the timezone of the index. 

2661 

2662 If a timestamp is not used, these values are also supported: 

2663 

2664 - 'epoch': `origin` is 1970-01-01 

2665 - 'start': `origin` is the first value of the timeseries 

2666 - 'start_day': `origin` is the first day at midnight of the timeseries 

2667 offset : pd.Timedelta, default is None 

2668 An offset timedelta added to the origin. 

2669 

2670 Returns 

2671 ------- 

2672 A tuple of length 2, containing the adjusted pd.Period objects. 

2673 """ 

2674 if not all(isinstance(obj, Period) for obj in [first, last]): 

2675 raise TypeError("'first' and 'last' must be instances of type Period") 

2676 

2677 # GH 23882 

2678 first_ts = first.to_timestamp() 

2679 last_ts = last.to_timestamp() 

2680 adjust_first = not freq.is_on_offset(first_ts) 

2681 adjust_last = freq.is_on_offset(last_ts) 

2682 

2683 first_ts, last_ts = _get_timestamp_range_edges( 

2684 first_ts, last_ts, freq, unit="ns", closed=closed, origin=origin, offset=offset 

2685 ) 

2686 

2687 first = (first_ts + int(adjust_first) * freq).to_period(freq) 

2688 last = (last_ts - int(adjust_last) * freq).to_period(freq) 

2689 return first, last 

2690 

2691 

2692def _insert_nat_bin( 

2693 binner: PeriodIndex, bins: np.ndarray, labels: PeriodIndex, nat_count: int 

2694) -> tuple[PeriodIndex, np.ndarray, PeriodIndex]: 

2695 # NaT handling as in pandas._lib.lib.generate_bins_dt64() 

2696 # shift bins by the number of NaT 

2697 assert nat_count > 0 

2698 bins += nat_count 

2699 bins = np.insert(bins, 0, nat_count) 

2700 

2701 # Incompatible types in assignment (expression has type "Index", variable 

2702 # has type "PeriodIndex") 

2703 binner = binner.insert(0, NaT) # type: ignore[assignment] 

2704 # Incompatible types in assignment (expression has type "Index", variable 

2705 # has type "PeriodIndex") 

2706 labels = labels.insert(0, NaT) # type: ignore[assignment] 

2707 return binner, bins, labels 

2708 

2709 

2710def _adjust_dates_anchored( 

2711 first: Timestamp, 

2712 last: Timestamp, 

2713 freq: Tick, 

2714 closed: Literal["right", "left"] = "right", 

2715 origin: TimeGrouperOrigin = "start_day", 

2716 offset: Timedelta | None = None, 

2717 unit: str = "ns", 

2718) -> tuple[Timestamp, Timestamp]: 

2719 # First and last offsets should be calculated from the start day to fix an 

2720 # error cause by resampling across multiple days when a one day period is 

2721 # not a multiple of the frequency. See GH 8683 

2722 # To handle frequencies that are not multiple or divisible by a day we let 

2723 # the possibility to define a fixed origin timestamp. See GH 31809 

2724 first = first.as_unit(unit) 

2725 last = last.as_unit(unit) 

2726 if offset is not None: 

2727 offset = offset.as_unit(unit) 

2728 

2729 freq_value = Timedelta(freq).as_unit(unit)._value 

2730 

2731 origin_timestamp = 0 # origin == "epoch" 

2732 if origin == "start_day": 

2733 origin_timestamp = first.normalize()._value 

2734 elif origin == "start": 

2735 origin_timestamp = first._value 

2736 elif isinstance(origin, Timestamp): 

2737 origin_timestamp = origin.as_unit(unit)._value 

2738 elif origin in ["end", "end_day"]: 

2739 origin_last = last if origin == "end" else last.ceil("D") 

2740 sub_freq_times = (origin_last._value - first._value) // freq_value 

2741 if closed == "left": 

2742 sub_freq_times += 1 

2743 first = origin_last - sub_freq_times * freq 

2744 origin_timestamp = first._value 

2745 origin_timestamp += offset._value if offset else 0 

2746 

2747 # GH 10117 & GH 19375. If first and last contain timezone information, 

2748 # Perform the calculation in UTC in order to avoid localizing on an 

2749 # Ambiguous or Nonexistent time. 

2750 first_tzinfo = first.tzinfo 

2751 last_tzinfo = last.tzinfo 

2752 if first_tzinfo is not None: 

2753 first = first.tz_convert("UTC") 

2754 if last_tzinfo is not None: 

2755 last = last.tz_convert("UTC") 

2756 

2757 foffset = (first._value - origin_timestamp) % freq_value 

2758 loffset = (last._value - origin_timestamp) % freq_value 

2759 

2760 if closed == "right": 

2761 if foffset > 0: 

2762 # roll back 

2763 fresult_int = first._value - foffset 

2764 else: 

2765 fresult_int = first._value - freq_value 

2766 

2767 if loffset > 0: 

2768 # roll forward 

2769 lresult_int = last._value + (freq_value - loffset) 

2770 else: 

2771 # already the end of the road 

2772 lresult_int = last._value 

2773 else: # closed == 'left' 

2774 if foffset > 0: 

2775 fresult_int = first._value - foffset 

2776 else: 

2777 # start of the road 

2778 fresult_int = first._value 

2779 

2780 if loffset > 0: 

2781 # roll forward 

2782 lresult_int = last._value + (freq_value - loffset) 

2783 else: 

2784 lresult_int = last._value + freq_value 

2785 fresult = Timestamp(fresult_int, unit=unit) 

2786 lresult = Timestamp(lresult_int, unit=unit) 

2787 if first_tzinfo is not None: 

2788 fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo) 

2789 if last_tzinfo is not None: 

2790 lresult = lresult.tz_localize("UTC").tz_convert(last_tzinfo) 

2791 return fresult, lresult 

2792 

2793 

2794def asfreq( 

2795 obj: NDFrameT, 

2796 freq, 

2797 method=None, 

2798 how=None, 

2799 normalize: bool = False, 

2800 fill_value=None, 

2801) -> NDFrameT: 

2802 """ 

2803 Utility frequency conversion method for Series/DataFrame. 

2804 

2805 See :meth:`pandas.NDFrame.asfreq` for full documentation. 

2806 """ 

2807 if isinstance(obj.index, PeriodIndex): 

2808 if method is not None: 

2809 raise NotImplementedError("'method' argument is not supported") 

2810 

2811 if how is None: 

2812 how = "E" 

2813 

2814 if isinstance(freq, BaseOffset): 

2815 if hasattr(freq, "_period_dtype_code"): 

2816 freq = freq_to_period_freqstr(freq.n, freq.name) 

2817 else: 

2818 raise ValueError( 

2819 f"Invalid offset: '{freq.base}' for converting time series " 

2820 f"with PeriodIndex." 

2821 ) 

2822 

2823 new_obj = obj.copy() 

2824 new_obj.index = obj.index.asfreq(freq, how=how) 

2825 

2826 elif len(obj.index) == 0: 

2827 new_obj = obj.copy() 

2828 

2829 new_obj.index = _asfreq_compat(obj.index, freq) 

2830 else: 

2831 unit = None 

2832 if isinstance(obj.index, DatetimeIndex): 

2833 # TODO: should we disallow non-DatetimeIndex? 

2834 unit = obj.index.unit 

2835 dti = date_range(obj.index.min(), obj.index.max(), freq=freq, unit=unit) 

2836 dti.name = obj.index.name 

2837 new_obj = obj.reindex(dti, method=method, fill_value=fill_value) 

2838 if normalize: 

2839 new_obj.index = new_obj.index.normalize() 

2840 

2841 return new_obj 

2842 

2843 

2844def _asfreq_compat(index: DatetimeIndex | PeriodIndex | TimedeltaIndex, freq): 

2845 """ 

2846 Helper to mimic asfreq on (empty) DatetimeIndex and TimedeltaIndex. 

2847 

2848 Parameters 

2849 ---------- 

2850 index : PeriodIndex, DatetimeIndex, or TimedeltaIndex 

2851 freq : DateOffset 

2852 

2853 Returns 

2854 ------- 

2855 same type as index 

2856 """ 

2857 if len(index) != 0: 

2858 # This should never be reached, always checked by the caller 

2859 raise ValueError( 

2860 "Can only set arbitrary freq for empty DatetimeIndex or TimedeltaIndex" 

2861 ) 

2862 new_index: Index 

2863 if isinstance(index, PeriodIndex): 

2864 new_index = index.asfreq(freq=freq) 

2865 elif isinstance(index, DatetimeIndex): 

2866 new_index = DatetimeIndex([], dtype=index.dtype, freq=freq, name=index.name) 

2867 elif isinstance(index, TimedeltaIndex): 

2868 new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name) 

2869 else: # pragma: no cover 

2870 raise TypeError(type(index)) 

2871 return new_index 

2872 

2873 

2874def maybe_warn_args_and_kwargs(cls, kernel: str, args, kwargs) -> None: 

2875 """ 

2876 Warn for deprecation of args and kwargs in resample functions. 

2877 

2878 Parameters 

2879 ---------- 

2880 cls : type 

2881 Class to warn about. 

2882 kernel : str 

2883 Operation name. 

2884 args : tuple or None 

2885 args passed by user. Will be None if and only if kernel does not have args. 

2886 kwargs : dict or None 

2887 kwargs passed by user. Will be None if and only if kernel does not have kwargs. 

2888 """ 

2889 warn_args = args is not None and len(args) > 0 

2890 warn_kwargs = kwargs is not None and len(kwargs) > 0 

2891 if warn_args and warn_kwargs: 

2892 msg = "args and kwargs" 

2893 elif warn_args: 

2894 msg = "args" 

2895 elif warn_kwargs: 

2896 msg = "kwargs" 

2897 else: 

2898 return 

2899 warnings.warn( 

2900 f"Passing additional {msg} to {cls.__name__}.{kernel} has " 

2901 "no impact on the result and is deprecated. This will " 

2902 "raise a TypeError in a future version of pandas.", 

2903 category=FutureWarning, 

2904 stacklevel=find_stack_level(), 

2905 ) 

2906 

2907 

2908def _apply( 

2909 grouped: GroupBy, how: Callable, *args, include_groups: bool, **kwargs 

2910) -> DataFrame: 

2911 # GH#7155 - rewrite warning to appear as if it came from `.resample` 

2912 target_message = "DataFrameGroupBy.apply operated on the grouping columns" 

2913 new_message = _apply_groupings_depr.format("DataFrameGroupBy", "resample") 

2914 with rewrite_warning( 

2915 target_message=target_message, 

2916 target_category=DeprecationWarning, 

2917 new_message=new_message, 

2918 ): 

2919 result = grouped.apply(how, *args, include_groups=include_groups, **kwargs) 

2920 return result