Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/window/expanding.py: 68%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

75 statements  

1from __future__ import annotations 

2 

3from textwrap import dedent 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7 Callable, 

8 Literal, 

9) 

10 

11from pandas.util._decorators import ( 

12 deprecate_kwarg, 

13 doc, 

14) 

15 

16from pandas.core.indexers.objects import ( 

17 BaseIndexer, 

18 ExpandingIndexer, 

19 GroupbyIndexer, 

20) 

21from pandas.core.window.doc import ( 

22 _shared_docs, 

23 create_section_header, 

24 kwargs_numeric_only, 

25 numba_notes, 

26 template_header, 

27 template_returns, 

28 template_see_also, 

29 window_agg_numba_parameters, 

30 window_apply_parameters, 

31) 

32from pandas.core.window.rolling import ( 

33 BaseWindowGroupby, 

34 RollingAndExpandingMixin, 

35) 

36 

37if TYPE_CHECKING: 

38 from pandas._typing import ( 

39 Axis, 

40 QuantileInterpolation, 

41 WindowingRankType, 

42 ) 

43 

44 from pandas import ( 

45 DataFrame, 

46 Series, 

47 ) 

48 from pandas.core.generic import NDFrame 

49 

50 

51class Expanding(RollingAndExpandingMixin): 

52 """ 

53 Provide expanding window calculations. 

54 

55 Parameters 

56 ---------- 

57 min_periods : int, default 1 

58 Minimum number of observations in window required to have a value; 

59 otherwise, result is ``np.nan``. 

60 

61 axis : int or str, default 0 

62 If ``0`` or ``'index'``, roll across the rows. 

63 

64 If ``1`` or ``'columns'``, roll across the columns. 

65 

66 For `Series` this parameter is unused and defaults to 0. 

67 

68 method : str {'single', 'table'}, default 'single' 

69 Execute the rolling operation per single column or row (``'single'``) 

70 or over the entire object (``'table'``). 

71 

72 This argument is only implemented when specifying ``engine='numba'`` 

73 in the method call. 

74 

75 .. versionadded:: 1.3.0 

76 

77 Returns 

78 ------- 

79 pandas.api.typing.Expanding 

80 

81 See Also 

82 -------- 

83 rolling : Provides rolling window calculations. 

84 ewm : Provides exponential weighted functions. 

85 

86 Notes 

87 ----- 

88 See :ref:`Windowing Operations <window.expanding>` for further usage details 

89 and examples. 

90 

91 Examples 

92 -------- 

93 >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) 

94 >>> df 

95 B 

96 0 0.0 

97 1 1.0 

98 2 2.0 

99 3 NaN 

100 4 4.0 

101 

102 **min_periods** 

103 

104 Expanding sum with 1 vs 3 observations needed to calculate a value. 

105 

106 >>> df.expanding(1).sum() 

107 B 

108 0 0.0 

109 1 1.0 

110 2 3.0 

111 3 3.0 

112 4 7.0 

113 >>> df.expanding(3).sum() 

114 B 

115 0 NaN 

116 1 NaN 

117 2 3.0 

118 3 3.0 

119 4 7.0 

120 """ 

121 

122 _attributes: list[str] = ["min_periods", "axis", "method"] 

123 

124 def __init__( 

125 self, 

126 obj: NDFrame, 

127 min_periods: int = 1, 

128 axis: Axis = 0, 

129 method: str = "single", 

130 selection=None, 

131 ) -> None: 

132 super().__init__( 

133 obj=obj, 

134 min_periods=min_periods, 

135 axis=axis, 

136 method=method, 

137 selection=selection, 

138 ) 

139 

140 def _get_window_indexer(self) -> BaseIndexer: 

141 """ 

142 Return an indexer class that will compute the window start and end bounds 

143 """ 

144 return ExpandingIndexer() 

145 

146 @doc( 

147 _shared_docs["aggregate"], 

148 see_also=dedent( 

149 """ 

150 See Also 

151 -------- 

152 pandas.DataFrame.aggregate : Similar DataFrame method. 

153 pandas.Series.aggregate : Similar Series method. 

154 """ 

155 ), 

156 examples=dedent( 

157 """ 

158 Examples 

159 -------- 

160 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) 

161 >>> df 

162 A B C 

163 0 1 4 7 

164 1 2 5 8 

165 2 3 6 9 

166 

167 >>> df.ewm(alpha=0.5).mean() 

168 A B C 

169 0 1.000000 4.000000 7.000000 

170 1 1.666667 4.666667 7.666667 

171 2 2.428571 5.428571 8.428571 

172 """ 

173 ), 

174 klass="Series/Dataframe", 

175 axis="", 

176 ) 

177 def aggregate(self, func, *args, **kwargs): 

178 return super().aggregate(func, *args, **kwargs) 

179 

180 agg = aggregate 

181 

182 @doc( 

183 template_header, 

184 create_section_header("Returns"), 

185 template_returns, 

186 create_section_header("See Also"), 

187 template_see_also, 

188 create_section_header("Examples"), 

189 dedent( 

190 """\ 

191 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) 

192 >>> ser.expanding().count() 

193 a 1.0 

194 b 2.0 

195 c 3.0 

196 d 4.0 

197 dtype: float64 

198 """ 

199 ), 

200 window_method="expanding", 

201 aggregation_description="count of non NaN observations", 

202 agg_method="count", 

203 ) 

204 def count(self, numeric_only: bool = False): 

205 return super().count(numeric_only=numeric_only) 

206 

207 @doc( 

208 template_header, 

209 create_section_header("Parameters"), 

210 window_apply_parameters, 

211 create_section_header("Returns"), 

212 template_returns, 

213 create_section_header("See Also"), 

214 template_see_also, 

215 create_section_header("Examples"), 

216 dedent( 

217 """\ 

218 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) 

219 >>> ser.expanding().apply(lambda s: s.max() - 2 * s.min()) 

220 a -1.0 

221 b 0.0 

222 c 1.0 

223 d 2.0 

224 dtype: float64 

225 """ 

226 ), 

227 window_method="expanding", 

228 aggregation_description="custom aggregation function", 

229 agg_method="apply", 

230 ) 

231 def apply( 

232 self, 

233 func: Callable[..., Any], 

234 raw: bool = False, 

235 engine: Literal["cython", "numba"] | None = None, 

236 engine_kwargs: dict[str, bool] | None = None, 

237 args: tuple[Any, ...] | None = None, 

238 kwargs: dict[str, Any] | None = None, 

239 ): 

240 return super().apply( 

241 func, 

242 raw=raw, 

243 engine=engine, 

244 engine_kwargs=engine_kwargs, 

245 args=args, 

246 kwargs=kwargs, 

247 ) 

248 

249 @doc( 

250 template_header, 

251 create_section_header("Parameters"), 

252 kwargs_numeric_only, 

253 window_agg_numba_parameters(), 

254 create_section_header("Returns"), 

255 template_returns, 

256 create_section_header("See Also"), 

257 template_see_also, 

258 create_section_header("Notes"), 

259 numba_notes, 

260 create_section_header("Examples"), 

261 dedent( 

262 """\ 

263 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) 

264 >>> ser.expanding().sum() 

265 a 1.0 

266 b 3.0 

267 c 6.0 

268 d 10.0 

269 dtype: float64 

270 """ 

271 ), 

272 window_method="expanding", 

273 aggregation_description="sum", 

274 agg_method="sum", 

275 ) 

276 def sum( 

277 self, 

278 numeric_only: bool = False, 

279 engine: Literal["cython", "numba"] | None = None, 

280 engine_kwargs: dict[str, bool] | None = None, 

281 ): 

282 return super().sum( 

283 numeric_only=numeric_only, 

284 engine=engine, 

285 engine_kwargs=engine_kwargs, 

286 ) 

287 

288 @doc( 

289 template_header, 

290 create_section_header("Parameters"), 

291 kwargs_numeric_only, 

292 window_agg_numba_parameters(), 

293 create_section_header("Returns"), 

294 template_returns, 

295 create_section_header("See Also"), 

296 template_see_also, 

297 create_section_header("Notes"), 

298 numba_notes, 

299 create_section_header("Examples"), 

300 dedent( 

301 """\ 

302 >>> ser = pd.Series([3, 2, 1, 4], index=['a', 'b', 'c', 'd']) 

303 >>> ser.expanding().max() 

304 a 3.0 

305 b 3.0 

306 c 3.0 

307 d 4.0 

308 dtype: float64 

309 """ 

310 ), 

311 window_method="expanding", 

312 aggregation_description="maximum", 

313 agg_method="max", 

314 ) 

315 def max( 

316 self, 

317 numeric_only: bool = False, 

318 engine: Literal["cython", "numba"] | None = None, 

319 engine_kwargs: dict[str, bool] | None = None, 

320 ): 

321 return super().max( 

322 numeric_only=numeric_only, 

323 engine=engine, 

324 engine_kwargs=engine_kwargs, 

325 ) 

326 

327 @doc( 

328 template_header, 

329 create_section_header("Parameters"), 

330 kwargs_numeric_only, 

331 window_agg_numba_parameters(), 

332 create_section_header("Returns"), 

333 template_returns, 

334 create_section_header("See Also"), 

335 template_see_also, 

336 create_section_header("Notes"), 

337 numba_notes, 

338 create_section_header("Examples"), 

339 dedent( 

340 """\ 

341 >>> ser = pd.Series([2, 3, 4, 1], index=['a', 'b', 'c', 'd']) 

342 >>> ser.expanding().min() 

343 a 2.0 

344 b 2.0 

345 c 2.0 

346 d 1.0 

347 dtype: float64 

348 """ 

349 ), 

350 window_method="expanding", 

351 aggregation_description="minimum", 

352 agg_method="min", 

353 ) 

354 def min( 

355 self, 

356 numeric_only: bool = False, 

357 engine: Literal["cython", "numba"] | None = None, 

358 engine_kwargs: dict[str, bool] | None = None, 

359 ): 

360 return super().min( 

361 numeric_only=numeric_only, 

362 engine=engine, 

363 engine_kwargs=engine_kwargs, 

364 ) 

365 

366 @doc( 

367 template_header, 

368 create_section_header("Parameters"), 

369 kwargs_numeric_only, 

370 window_agg_numba_parameters(), 

371 create_section_header("Returns"), 

372 template_returns, 

373 create_section_header("See Also"), 

374 template_see_also, 

375 create_section_header("Notes"), 

376 numba_notes, 

377 create_section_header("Examples"), 

378 dedent( 

379 """\ 

380 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) 

381 >>> ser.expanding().mean() 

382 a 1.0 

383 b 1.5 

384 c 2.0 

385 d 2.5 

386 dtype: float64 

387 """ 

388 ), 

389 window_method="expanding", 

390 aggregation_description="mean", 

391 agg_method="mean", 

392 ) 

393 def mean( 

394 self, 

395 numeric_only: bool = False, 

396 engine: Literal["cython", "numba"] | None = None, 

397 engine_kwargs: dict[str, bool] | None = None, 

398 ): 

399 return super().mean( 

400 numeric_only=numeric_only, 

401 engine=engine, 

402 engine_kwargs=engine_kwargs, 

403 ) 

404 

405 @doc( 

406 template_header, 

407 create_section_header("Parameters"), 

408 kwargs_numeric_only, 

409 window_agg_numba_parameters(), 

410 create_section_header("Returns"), 

411 template_returns, 

412 create_section_header("See Also"), 

413 template_see_also, 

414 create_section_header("Notes"), 

415 numba_notes, 

416 create_section_header("Examples"), 

417 dedent( 

418 """\ 

419 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) 

420 >>> ser.expanding().median() 

421 a 1.0 

422 b 1.5 

423 c 2.0 

424 d 2.5 

425 dtype: float64 

426 """ 

427 ), 

428 window_method="expanding", 

429 aggregation_description="median", 

430 agg_method="median", 

431 ) 

432 def median( 

433 self, 

434 numeric_only: bool = False, 

435 engine: Literal["cython", "numba"] | None = None, 

436 engine_kwargs: dict[str, bool] | None = None, 

437 ): 

438 return super().median( 

439 numeric_only=numeric_only, 

440 engine=engine, 

441 engine_kwargs=engine_kwargs, 

442 ) 

443 

444 @doc( 

445 template_header, 

446 create_section_header("Parameters"), 

447 dedent( 

448 """ 

449 ddof : int, default 1 

450 Delta Degrees of Freedom. The divisor used in calculations 

451 is ``N - ddof``, where ``N`` represents the number of elements.\n 

452 """ 

453 ).replace("\n", "", 1), 

454 kwargs_numeric_only, 

455 window_agg_numba_parameters("1.4"), 

456 create_section_header("Returns"), 

457 template_returns, 

458 create_section_header("See Also"), 

459 "numpy.std : Equivalent method for NumPy array.\n", 

460 template_see_also, 

461 create_section_header("Notes"), 

462 dedent( 

463 """ 

464 The default ``ddof`` of 1 used in :meth:`Series.std` is different 

465 than the default ``ddof`` of 0 in :func:`numpy.std`. 

466 

467 A minimum of one period is required for the rolling calculation.\n 

468 """ 

469 ).replace("\n", "", 1), 

470 create_section_header("Examples"), 

471 dedent( 

472 """ 

473 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) 

474 

475 >>> s.expanding(3).std() 

476 0 NaN 

477 1 NaN 

478 2 0.577350 

479 3 0.957427 

480 4 0.894427 

481 5 0.836660 

482 6 0.786796 

483 dtype: float64 

484 """ 

485 ).replace("\n", "", 1), 

486 window_method="expanding", 

487 aggregation_description="standard deviation", 

488 agg_method="std", 

489 ) 

490 def std( 

491 self, 

492 ddof: int = 1, 

493 numeric_only: bool = False, 

494 engine: Literal["cython", "numba"] | None = None, 

495 engine_kwargs: dict[str, bool] | None = None, 

496 ): 

497 return super().std( 

498 ddof=ddof, 

499 numeric_only=numeric_only, 

500 engine=engine, 

501 engine_kwargs=engine_kwargs, 

502 ) 

503 

504 @doc( 

505 template_header, 

506 create_section_header("Parameters"), 

507 dedent( 

508 """ 

509 ddof : int, default 1 

510 Delta Degrees of Freedom. The divisor used in calculations 

511 is ``N - ddof``, where ``N`` represents the number of elements.\n 

512 """ 

513 ).replace("\n", "", 1), 

514 kwargs_numeric_only, 

515 window_agg_numba_parameters("1.4"), 

516 create_section_header("Returns"), 

517 template_returns, 

518 create_section_header("See Also"), 

519 "numpy.var : Equivalent method for NumPy array.\n", 

520 template_see_also, 

521 create_section_header("Notes"), 

522 dedent( 

523 """ 

524 The default ``ddof`` of 1 used in :meth:`Series.var` is different 

525 than the default ``ddof`` of 0 in :func:`numpy.var`. 

526 

527 A minimum of one period is required for the rolling calculation.\n 

528 """ 

529 ).replace("\n", "", 1), 

530 create_section_header("Examples"), 

531 dedent( 

532 """ 

533 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) 

534 

535 >>> s.expanding(3).var() 

536 0 NaN 

537 1 NaN 

538 2 0.333333 

539 3 0.916667 

540 4 0.800000 

541 5 0.700000 

542 6 0.619048 

543 dtype: float64 

544 """ 

545 ).replace("\n", "", 1), 

546 window_method="expanding", 

547 aggregation_description="variance", 

548 agg_method="var", 

549 ) 

550 def var( 

551 self, 

552 ddof: int = 1, 

553 numeric_only: bool = False, 

554 engine: Literal["cython", "numba"] | None = None, 

555 engine_kwargs: dict[str, bool] | None = None, 

556 ): 

557 return super().var( 

558 ddof=ddof, 

559 numeric_only=numeric_only, 

560 engine=engine, 

561 engine_kwargs=engine_kwargs, 

562 ) 

563 

564 @doc( 

565 template_header, 

566 create_section_header("Parameters"), 

567 dedent( 

568 """ 

569 ddof : int, default 1 

570 Delta Degrees of Freedom. The divisor used in calculations 

571 is ``N - ddof``, where ``N`` represents the number of elements.\n 

572 """ 

573 ).replace("\n", "", 1), 

574 kwargs_numeric_only, 

575 create_section_header("Returns"), 

576 template_returns, 

577 create_section_header("See Also"), 

578 template_see_also, 

579 create_section_header("Notes"), 

580 "A minimum of one period is required for the calculation.\n\n", 

581 create_section_header("Examples"), 

582 dedent( 

583 """ 

584 >>> s = pd.Series([0, 1, 2, 3]) 

585 

586 >>> s.expanding().sem() 

587 0 NaN 

588 1 0.707107 

589 2 0.707107 

590 3 0.745356 

591 dtype: float64 

592 """ 

593 ).replace("\n", "", 1), 

594 window_method="expanding", 

595 aggregation_description="standard error of mean", 

596 agg_method="sem", 

597 ) 

598 def sem(self, ddof: int = 1, numeric_only: bool = False): 

599 return super().sem(ddof=ddof, numeric_only=numeric_only) 

600 

601 @doc( 

602 template_header, 

603 create_section_header("Parameters"), 

604 kwargs_numeric_only, 

605 create_section_header("Returns"), 

606 template_returns, 

607 create_section_header("See Also"), 

608 "scipy.stats.skew : Third moment of a probability density.\n", 

609 template_see_also, 

610 create_section_header("Notes"), 

611 "A minimum of three periods is required for the rolling calculation.\n\n", 

612 create_section_header("Examples"), 

613 dedent( 

614 """\ 

615 >>> ser = pd.Series([-1, 0, 2, -1, 2], index=['a', 'b', 'c', 'd', 'e']) 

616 >>> ser.expanding().skew() 

617 a NaN 

618 b NaN 

619 c 0.935220 

620 d 1.414214 

621 e 0.315356 

622 dtype: float64 

623 """ 

624 ), 

625 window_method="expanding", 

626 aggregation_description="unbiased skewness", 

627 agg_method="skew", 

628 ) 

629 def skew(self, numeric_only: bool = False): 

630 return super().skew(numeric_only=numeric_only) 

631 

632 @doc( 

633 template_header, 

634 create_section_header("Parameters"), 

635 kwargs_numeric_only, 

636 create_section_header("Returns"), 

637 template_returns, 

638 create_section_header("See Also"), 

639 "scipy.stats.kurtosis : Reference SciPy method.\n", 

640 template_see_also, 

641 create_section_header("Notes"), 

642 "A minimum of four periods is required for the calculation.\n\n", 

643 create_section_header("Examples"), 

644 dedent( 

645 """ 

646 The example below will show a rolling calculation with a window size of 

647 four matching the equivalent function call using `scipy.stats`. 

648 

649 >>> arr = [1, 2, 3, 4, 999] 

650 >>> import scipy.stats 

651 >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}") 

652 -1.200000 

653 >>> print(f"{{scipy.stats.kurtosis(arr, bias=False):.6f}}") 

654 4.999874 

655 >>> s = pd.Series(arr) 

656 >>> s.expanding(4).kurt() 

657 0 NaN 

658 1 NaN 

659 2 NaN 

660 3 -1.200000 

661 4 4.999874 

662 dtype: float64 

663 """ 

664 ).replace("\n", "", 1), 

665 window_method="expanding", 

666 aggregation_description="Fisher's definition of kurtosis without bias", 

667 agg_method="kurt", 

668 ) 

669 def kurt(self, numeric_only: bool = False): 

670 return super().kurt(numeric_only=numeric_only) 

671 

672 @doc( 

673 template_header, 

674 create_section_header("Parameters"), 

675 dedent( 

676 """ 

677 quantile : float 

678 Quantile to compute. 0 <= quantile <= 1. 

679 

680 .. deprecated:: 2.1.0 

681 This will be renamed to 'q' in a future version. 

682 interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}} 

683 This optional parameter specifies the interpolation method to use, 

684 when the desired quantile lies between two data points `i` and `j`: 

685 

686 * linear: `i + (j - i) * fraction`, where `fraction` is the 

687 fractional part of the index surrounded by `i` and `j`. 

688 * lower: `i`. 

689 * higher: `j`. 

690 * nearest: `i` or `j` whichever is nearest. 

691 * midpoint: (`i` + `j`) / 2. 

692 """ 

693 ).replace("\n", "", 1), 

694 kwargs_numeric_only, 

695 create_section_header("Returns"), 

696 template_returns, 

697 create_section_header("See Also"), 

698 template_see_also, 

699 create_section_header("Examples"), 

700 dedent( 

701 """\ 

702 >>> ser = pd.Series([1, 2, 3, 4, 5, 6], index=['a', 'b', 'c', 'd', 'e', 'f']) 

703 >>> ser.expanding(min_periods=4).quantile(.25) 

704 a NaN 

705 b NaN 

706 c NaN 

707 d 1.75 

708 e 2.00 

709 f 2.25 

710 dtype: float64 

711 """ 

712 ), 

713 window_method="expanding", 

714 aggregation_description="quantile", 

715 agg_method="quantile", 

716 ) 

717 @deprecate_kwarg(old_arg_name="quantile", new_arg_name="q") 

718 def quantile( 

719 self, 

720 q: float, 

721 interpolation: QuantileInterpolation = "linear", 

722 numeric_only: bool = False, 

723 ): 

724 return super().quantile( 

725 q=q, 

726 interpolation=interpolation, 

727 numeric_only=numeric_only, 

728 ) 

729 

730 @doc( 

731 template_header, 

732 ".. versionadded:: 1.4.0 \n\n", 

733 create_section_header("Parameters"), 

734 dedent( 

735 """ 

736 method : {{'average', 'min', 'max'}}, default 'average' 

737 How to rank the group of records that have the same value (i.e. ties): 

738 

739 * average: average rank of the group 

740 * min: lowest rank in the group 

741 * max: highest rank in the group 

742 

743 ascending : bool, default True 

744 Whether or not the elements should be ranked in ascending order. 

745 pct : bool, default False 

746 Whether or not to display the returned rankings in percentile 

747 form. 

748 """ 

749 ).replace("\n", "", 1), 

750 kwargs_numeric_only, 

751 create_section_header("Returns"), 

752 template_returns, 

753 create_section_header("See Also"), 

754 template_see_also, 

755 create_section_header("Examples"), 

756 dedent( 

757 """ 

758 >>> s = pd.Series([1, 4, 2, 3, 5, 3]) 

759 >>> s.expanding().rank() 

760 0 1.0 

761 1 2.0 

762 2 2.0 

763 3 3.0 

764 4 5.0 

765 5 3.5 

766 dtype: float64 

767 

768 >>> s.expanding().rank(method="max") 

769 0 1.0 

770 1 2.0 

771 2 2.0 

772 3 3.0 

773 4 5.0 

774 5 4.0 

775 dtype: float64 

776 

777 >>> s.expanding().rank(method="min") 

778 0 1.0 

779 1 2.0 

780 2 2.0 

781 3 3.0 

782 4 5.0 

783 5 3.0 

784 dtype: float64 

785 """ 

786 ).replace("\n", "", 1), 

787 window_method="expanding", 

788 aggregation_description="rank", 

789 agg_method="rank", 

790 ) 

791 def rank( 

792 self, 

793 method: WindowingRankType = "average", 

794 ascending: bool = True, 

795 pct: bool = False, 

796 numeric_only: bool = False, 

797 ): 

798 return super().rank( 

799 method=method, 

800 ascending=ascending, 

801 pct=pct, 

802 numeric_only=numeric_only, 

803 ) 

804 

805 @doc( 

806 template_header, 

807 create_section_header("Parameters"), 

808 dedent( 

809 """ 

810 other : Series or DataFrame, optional 

811 If not supplied then will default to self and produce pairwise 

812 output. 

813 pairwise : bool, default None 

814 If False then only matching columns between self and other will be 

815 used and the output will be a DataFrame. 

816 If True then all pairwise combinations will be calculated and the 

817 output will be a MultiIndexed DataFrame in the case of DataFrame 

818 inputs. In the case of missing elements, only complete pairwise 

819 observations will be used. 

820 ddof : int, default 1 

821 Delta Degrees of Freedom. The divisor used in calculations 

822 is ``N - ddof``, where ``N`` represents the number of elements. 

823 """ 

824 ).replace("\n", "", 1), 

825 kwargs_numeric_only, 

826 create_section_header("Returns"), 

827 template_returns, 

828 create_section_header("See Also"), 

829 template_see_also, 

830 create_section_header("Examples"), 

831 dedent( 

832 """\ 

833 >>> ser1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) 

834 >>> ser2 = pd.Series([10, 11, 13, 16], index=['a', 'b', 'c', 'd']) 

835 >>> ser1.expanding().cov(ser2) 

836 a NaN 

837 b 0.500000 

838 c 1.500000 

839 d 3.333333 

840 dtype: float64 

841 """ 

842 ), 

843 window_method="expanding", 

844 aggregation_description="sample covariance", 

845 agg_method="cov", 

846 ) 

847 def cov( 

848 self, 

849 other: DataFrame | Series | None = None, 

850 pairwise: bool | None = None, 

851 ddof: int = 1, 

852 numeric_only: bool = False, 

853 ): 

854 return super().cov( 

855 other=other, 

856 pairwise=pairwise, 

857 ddof=ddof, 

858 numeric_only=numeric_only, 

859 ) 

860 

861 @doc( 

862 template_header, 

863 create_section_header("Parameters"), 

864 dedent( 

865 """ 

866 other : Series or DataFrame, optional 

867 If not supplied then will default to self and produce pairwise 

868 output. 

869 pairwise : bool, default None 

870 If False then only matching columns between self and other will be 

871 used and the output will be a DataFrame. 

872 If True then all pairwise combinations will be calculated and the 

873 output will be a MultiIndexed DataFrame in the case of DataFrame 

874 inputs. In the case of missing elements, only complete pairwise 

875 observations will be used. 

876 """ 

877 ).replace("\n", "", 1), 

878 kwargs_numeric_only, 

879 create_section_header("Returns"), 

880 template_returns, 

881 create_section_header("See Also"), 

882 dedent( 

883 """ 

884 cov : Similar method to calculate covariance. 

885 numpy.corrcoef : NumPy Pearson's correlation calculation. 

886 """ 

887 ).replace("\n", "", 1), 

888 template_see_also, 

889 create_section_header("Notes"), 

890 dedent( 

891 """ 

892 This function uses Pearson's definition of correlation 

893 (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient). 

894 

895 When `other` is not specified, the output will be self correlation (e.g. 

896 all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` 

897 set to `True`. 

898 

899 Function will return ``NaN`` for correlations of equal valued sequences; 

900 this is the result of a 0/0 division error. 

901 

902 When `pairwise` is set to `False`, only matching columns between `self` and 

903 `other` will be used. 

904 

905 When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame 

906 with the original index on the first level, and the `other` DataFrame 

907 columns on the second level. 

908 

909 In the case of missing elements, only complete pairwise observations 

910 will be used.\n 

911 """ 

912 ), 

913 create_section_header("Examples"), 

914 dedent( 

915 """\ 

916 >>> ser1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) 

917 >>> ser2 = pd.Series([10, 11, 13, 16], index=['a', 'b', 'c', 'd']) 

918 >>> ser1.expanding().corr(ser2) 

919 a NaN 

920 b 1.000000 

921 c 0.981981 

922 d 0.975900 

923 dtype: float64 

924 """ 

925 ), 

926 window_method="expanding", 

927 aggregation_description="correlation", 

928 agg_method="corr", 

929 ) 

930 def corr( 

931 self, 

932 other: DataFrame | Series | None = None, 

933 pairwise: bool | None = None, 

934 ddof: int = 1, 

935 numeric_only: bool = False, 

936 ): 

937 return super().corr( 

938 other=other, 

939 pairwise=pairwise, 

940 ddof=ddof, 

941 numeric_only=numeric_only, 

942 ) 

943 

944 

945class ExpandingGroupby(BaseWindowGroupby, Expanding): 

946 """ 

947 Provide a expanding groupby implementation. 

948 """ 

949 

950 _attributes = Expanding._attributes + BaseWindowGroupby._attributes 

951 

952 def _get_window_indexer(self) -> GroupbyIndexer: 

953 """ 

954 Return an indexer class that will compute the window start and end bounds 

955 

956 Returns 

957 ------- 

958 GroupbyIndexer 

959 """ 

960 window_indexer = GroupbyIndexer( 

961 groupby_indices=self._grouper.indices, 

962 window_indexer=ExpandingIndexer, 

963 ) 

964 return window_indexer