Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/window/expanding.py: 68%

1from __future__ import annotations

3from textwrap import dedent

4from typing import (

5 TYPE_CHECKING,

6 Any,

7 Callable,

8 Literal,

11from pandas.util._decorators import (

12 deprecate_kwarg,

13 doc,

14)

16from pandas.core.indexers.objects import (

17 BaseIndexer,

18 ExpandingIndexer,

19 GroupbyIndexer,

20)

21from pandas.core.window.doc import (

22 _shared_docs,

23 create_section_header,

24 kwargs_numeric_only,

25 numba_notes,

26 template_header,

27 template_returns,

28 template_see_also,

29 window_agg_numba_parameters,

30 window_apply_parameters,

31)

32from pandas.core.window.rolling import (

33 BaseWindowGroupby,

34 RollingAndExpandingMixin,

35)

37if TYPE_CHECKING:

38 from pandas._typing import (

39 Axis,

40 QuantileInterpolation,

41 WindowingRankType,

42 )

44 from pandas import (

45 DataFrame,

46 Series,

47 )

48 from pandas.core.generic import NDFrame

51class Expanding(RollingAndExpandingMixin):

52 """

53 Provide expanding window calculations.

55 Parameters

56 ----------

57 min_periods : int, default 1

58 Minimum number of observations in window required to have a value;

59 otherwise, result is ``np.nan``.

61 axis : int or str, default 0

62 If ``0`` or ``'index'``, roll across the rows.

64 If ``1`` or ``'columns'``, roll across the columns.

66 For `Series` this parameter is unused and defaults to 0.

68 method : str {'single', 'table'}, default 'single'

69 Execute the rolling operation per single column or row (``'single'``)

70 or over the entire object (``'table'``).

72 This argument is only implemented when specifying ``engine='numba'``

73 in the method call.

75 .. versionadded:: 1.3.0

77 Returns

78 -------

79 pandas.api.typing.Expanding

81 See Also

82 --------

83 rolling : Provides rolling window calculations.

84 ewm : Provides exponential weighted functions.

86 Notes

87 -----

88 See :ref:`Windowing Operations <window.expanding>` for further usage details

89 and examples.

91 Examples

92 --------

93 >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

94 >>> df

95 B

96 0 0.0

97 1 1.0

98 2 2.0

99 3 NaN

100 4 4.0

101

102 **min_periods**

103

104 Expanding sum with 1 vs 3 observations needed to calculate a value.

105

106 >>> df.expanding(1).sum()

107 B

108 0 0.0

109 1 1.0

110 2 3.0

111 3 3.0

112 4 7.0

113 >>> df.expanding(3).sum()

114 B

115 0 NaN

116 1 NaN

117 2 3.0

118 3 3.0

119 4 7.0

120 """

121

122 _attributes: list[str] = ["min_periods", "axis", "method"]

123

124 def __init__(

125 self,

126 obj: NDFrame,

127 min_periods: int = 1,

128 axis: Axis = 0,

129 method: str = "single",

130 selection=None,

131 ) -> None:

132 super().__init__(

133 obj=obj,

134 min_periods=min_periods,

135 axis=axis,

136 method=method,

137 selection=selection,

138 )

139

140 def _get_window_indexer(self) -> BaseIndexer:

141 """

142 Return an indexer class that will compute the window start and end bounds

143 """

144 return ExpandingIndexer()

145

146 @doc(

147 _shared_docs["aggregate"],

148 see_also=dedent(

149 """

150 See Also

151 --------

152 pandas.DataFrame.aggregate : Similar DataFrame method.

153 pandas.Series.aggregate : Similar Series method.

154 """

155 ),

156 examples=dedent(

157 """

158 Examples

159 --------

160 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})

161 >>> df

162 A B C

163 0 1 4 7

164 1 2 5 8

165 2 3 6 9

166

167 >>> df.ewm(alpha=0.5).mean()

168 A B C

169 0 1.000000 4.000000 7.000000

170 1 1.666667 4.666667 7.666667

171 2 2.428571 5.428571 8.428571

172 """

173 ),

174 klass="Series/Dataframe",

175 axis="",

176 )

177 def aggregate(self, func, *args, **kwargs):

178 return super().aggregate(func, *args, **kwargs)

179

180 agg = aggregate

181

182 @doc(

183 template_header,

184 create_section_header("Returns"),

185 template_returns,

186 create_section_header("See Also"),

187 template_see_also,

188 create_section_header("Examples"),

189 dedent(

190 """\

191 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])

192 >>> ser.expanding().count()

193 a 1.0

194 b 2.0

195 c 3.0

196 d 4.0

197 dtype: float64

198 """

199 ),

200 window_method="expanding",

201 aggregation_description="count of non NaN observations",

202 agg_method="count",

203 )

204 def count(self, numeric_only: bool = False):

205 return super().count(numeric_only=numeric_only)

206

207 @doc(

208 template_header,

209 create_section_header("Parameters"),

210 window_apply_parameters,

211 create_section_header("Returns"),

212 template_returns,

213 create_section_header("See Also"),

214 template_see_also,

215 create_section_header("Examples"),

216 dedent(

217 """\

218 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])

219 >>> ser.expanding().apply(lambda s: s.max() - 2 * s.min())

220 a -1.0

221 b 0.0

222 c 1.0

223 d 2.0

224 dtype: float64

225 """

226 ),

227 window_method="expanding",

228 aggregation_description="custom aggregation function",

229 agg_method="apply",

230 )

231 def apply(

232 self,

233 func: Callable[..., Any],

234 raw: bool = False,

235 engine: Literal["cython", "numba"] | None = None,

236 engine_kwargs: dict[str, bool] | None = None,

237 args: tuple[Any, ...] | None = None,

238 kwargs: dict[str, Any] | None = None,

239 ):

240 return super().apply(

241 func,

242 raw=raw,

243 engine=engine,

244 engine_kwargs=engine_kwargs,

245 args=args,

246 kwargs=kwargs,

247 )

248

249 @doc(

250 template_header,

251 create_section_header("Parameters"),

252 kwargs_numeric_only,

253 window_agg_numba_parameters(),

254 create_section_header("Returns"),

255 template_returns,

256 create_section_header("See Also"),

257 template_see_also,

258 create_section_header("Notes"),

259 numba_notes,

260 create_section_header("Examples"),

261 dedent(

262 """\

263 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])

264 >>> ser.expanding().sum()

265 a 1.0

266 b 3.0

267 c 6.0

268 d 10.0

269 dtype: float64

270 """

271 ),

272 window_method="expanding",

273 aggregation_description="sum",

274 agg_method="sum",

275 )

276 def sum(

277 self,

278 numeric_only: bool = False,

279 engine: Literal["cython", "numba"] | None = None,

280 engine_kwargs: dict[str, bool] | None = None,

281 ):

282 return super().sum(

283 numeric_only=numeric_only,

284 engine=engine,

285 engine_kwargs=engine_kwargs,

286 )

287

288 @doc(

289 template_header,

290 create_section_header("Parameters"),

291 kwargs_numeric_only,

292 window_agg_numba_parameters(),

293 create_section_header("Returns"),

294 template_returns,

295 create_section_header("See Also"),

296 template_see_also,

297 create_section_header("Notes"),

298 numba_notes,

299 create_section_header("Examples"),

300 dedent(

301 """\

302 >>> ser = pd.Series([3, 2, 1, 4], index=['a', 'b', 'c', 'd'])

303 >>> ser.expanding().max()

304 a 3.0

305 b 3.0

306 c 3.0

307 d 4.0

308 dtype: float64

309 """

310 ),

311 window_method="expanding",

312 aggregation_description="maximum",

313 agg_method="max",

314 )

315 def max(

316 self,

317 numeric_only: bool = False,

318 engine: Literal["cython", "numba"] | None = None,

319 engine_kwargs: dict[str, bool] | None = None,

320 ):

321 return super().max(

322 numeric_only=numeric_only,

323 engine=engine,

324 engine_kwargs=engine_kwargs,

325 )

326

327 @doc(

328 template_header,

329 create_section_header("Parameters"),

330 kwargs_numeric_only,

331 window_agg_numba_parameters(),

332 create_section_header("Returns"),

333 template_returns,

334 create_section_header("See Also"),

335 template_see_also,

336 create_section_header("Notes"),

337 numba_notes,

338 create_section_header("Examples"),

339 dedent(

340 """\

341 >>> ser = pd.Series([2, 3, 4, 1], index=['a', 'b', 'c', 'd'])

342 >>> ser.expanding().min()

343 a 2.0

344 b 2.0

345 c 2.0

346 d 1.0

347 dtype: float64

348 """

349 ),

350 window_method="expanding",

351 aggregation_description="minimum",

352 agg_method="min",

353 )

354 def min(

355 self,

356 numeric_only: bool = False,

357 engine: Literal["cython", "numba"] | None = None,

358 engine_kwargs: dict[str, bool] | None = None,

359 ):

360 return super().min(

361 numeric_only=numeric_only,

362 engine=engine,

363 engine_kwargs=engine_kwargs,

364 )

365

366 @doc(

367 template_header,

368 create_section_header("Parameters"),

369 kwargs_numeric_only,

370 window_agg_numba_parameters(),

371 create_section_header("Returns"),

372 template_returns,

373 create_section_header("See Also"),

374 template_see_also,

375 create_section_header("Notes"),

376 numba_notes,

377 create_section_header("Examples"),

378 dedent(

379 """\

380 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])

381 >>> ser.expanding().mean()

382 a 1.0

383 b 1.5

384 c 2.0

385 d 2.5

386 dtype: float64

387 """

388 ),

389 window_method="expanding",

390 aggregation_description="mean",

391 agg_method="mean",

392 )

393 def mean(

394 self,

395 numeric_only: bool = False,

396 engine: Literal["cython", "numba"] | None = None,

397 engine_kwargs: dict[str, bool] | None = None,

398 ):

399 return super().mean(

400 numeric_only=numeric_only,

401 engine=engine,

402 engine_kwargs=engine_kwargs,

403 )

404

405 @doc(

406 template_header,

407 create_section_header("Parameters"),

408 kwargs_numeric_only,

409 window_agg_numba_parameters(),

410 create_section_header("Returns"),

411 template_returns,

412 create_section_header("See Also"),

413 template_see_also,

414 create_section_header("Notes"),

415 numba_notes,

416 create_section_header("Examples"),

417 dedent(

418 """\

419 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])

420 >>> ser.expanding().median()

421 a 1.0

422 b 1.5

423 c 2.0

424 d 2.5

425 dtype: float64

426 """

427 ),

428 window_method="expanding",

429 aggregation_description="median",

430 agg_method="median",

431 )

432 def median(

433 self,

434 numeric_only: bool = False,

435 engine: Literal["cython", "numba"] | None = None,

436 engine_kwargs: dict[str, bool] | None = None,

437 ):

438 return super().median(

439 numeric_only=numeric_only,

440 engine=engine,

441 engine_kwargs=engine_kwargs,

442 )

443

444 @doc(

445 template_header,

446 create_section_header("Parameters"),

447 dedent(

448 """

449 ddof : int, default 1

450 Delta Degrees of Freedom. The divisor used in calculations

451 is ``N - ddof``, where ``N`` represents the number of elements.\n

452 """

453 ).replace("\n", "", 1),

454 kwargs_numeric_only,

455 window_agg_numba_parameters("1.4"),

456 create_section_header("Returns"),

457 template_returns,

458 create_section_header("See Also"),

459 "numpy.std : Equivalent method for NumPy array.\n",

460 template_see_also,

461 create_section_header("Notes"),

462 dedent(

463 """

464 The default ``ddof`` of 1 used in :meth:`Series.std` is different

465 than the default ``ddof`` of 0 in :func:`numpy.std`.

466

467 A minimum of one period is required for the rolling calculation.\n

468 """

469 ).replace("\n", "", 1),

470 create_section_header("Examples"),

471 dedent(

472 """

473 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])

474

475 >>> s.expanding(3).std()

476 0 NaN

477 1 NaN

478 2 0.577350

479 3 0.957427

480 4 0.894427

481 5 0.836660

482 6 0.786796

483 dtype: float64

484 """

485 ).replace("\n", "", 1),

486 window_method="expanding",

487 aggregation_description="standard deviation",

488 agg_method="std",

489 )

490 def std(

491 self,

492 ddof: int = 1,

493 numeric_only: bool = False,

494 engine: Literal["cython", "numba"] | None = None,

495 engine_kwargs: dict[str, bool] | None = None,

496 ):

497 return super().std(

498 ddof=ddof,

499 numeric_only=numeric_only,

500 engine=engine,

501 engine_kwargs=engine_kwargs,

502 )

503

504 @doc(

505 template_header,

506 create_section_header("Parameters"),

507 dedent(

508 """

509 ddof : int, default 1

510 Delta Degrees of Freedom. The divisor used in calculations

511 is ``N - ddof``, where ``N`` represents the number of elements.\n

512 """

513 ).replace("\n", "", 1),

514 kwargs_numeric_only,

515 window_agg_numba_parameters("1.4"),

516 create_section_header("Returns"),

517 template_returns,

518 create_section_header("See Also"),

519 "numpy.var : Equivalent method for NumPy array.\n",

520 template_see_also,

521 create_section_header("Notes"),

522 dedent(

523 """

524 The default ``ddof`` of 1 used in :meth:`Series.var` is different

525 than the default ``ddof`` of 0 in :func:`numpy.var`.

526

527 A minimum of one period is required for the rolling calculation.\n

528 """

529 ).replace("\n", "", 1),

530 create_section_header("Examples"),

531 dedent(

532 """

533 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])

534

535 >>> s.expanding(3).var()

536 0 NaN

537 1 NaN

538 2 0.333333

539 3 0.916667

540 4 0.800000

541 5 0.700000

542 6 0.619048

543 dtype: float64

544 """

545 ).replace("\n", "", 1),

546 window_method="expanding",

547 aggregation_description="variance",

548 agg_method="var",

549 )

550 def var(

551 self,

552 ddof: int = 1,

553 numeric_only: bool = False,

554 engine: Literal["cython", "numba"] | None = None,

555 engine_kwargs: dict[str, bool] | None = None,

556 ):

557 return super().var(

558 ddof=ddof,

559 numeric_only=numeric_only,

560 engine=engine,

561 engine_kwargs=engine_kwargs,

562 )

563

564 @doc(

565 template_header,

566 create_section_header("Parameters"),

567 dedent(

568 """

569 ddof : int, default 1

570 Delta Degrees of Freedom. The divisor used in calculations

571 is ``N - ddof``, where ``N`` represents the number of elements.\n

572 """

573 ).replace("\n", "", 1),

574 kwargs_numeric_only,

575 create_section_header("Returns"),

576 template_returns,

577 create_section_header("See Also"),

578 template_see_also,

579 create_section_header("Notes"),

580 "A minimum of one period is required for the calculation.\n\n",

581 create_section_header("Examples"),

582 dedent(

583 """

584 >>> s = pd.Series([0, 1, 2, 3])

585

586 >>> s.expanding().sem()

587 0 NaN

588 1 0.707107

589 2 0.707107

590 3 0.745356

591 dtype: float64

592 """

593 ).replace("\n", "", 1),

594 window_method="expanding",

595 aggregation_description="standard error of mean",

596 agg_method="sem",

597 )

598 def sem(self, ddof: int = 1, numeric_only: bool = False):

599 return super().sem(ddof=ddof, numeric_only=numeric_only)

600

601 @doc(

602 template_header,

603 create_section_header("Parameters"),

604 kwargs_numeric_only,

605 create_section_header("Returns"),

606 template_returns,

607 create_section_header("See Also"),

608 "scipy.stats.skew : Third moment of a probability density.\n",

609 template_see_also,

610 create_section_header("Notes"),

611 "A minimum of three periods is required for the rolling calculation.\n\n",

612 create_section_header("Examples"),

613 dedent(

614 """\

615 >>> ser = pd.Series([-1, 0, 2, -1, 2], index=['a', 'b', 'c', 'd', 'e'])

616 >>> ser.expanding().skew()

617 a NaN

618 b NaN

619 c 0.935220

620 d 1.414214

621 e 0.315356

622 dtype: float64

623 """

624 ),

625 window_method="expanding",

626 aggregation_description="unbiased skewness",

627 agg_method="skew",

628 )

629 def skew(self, numeric_only: bool = False):

630 return super().skew(numeric_only=numeric_only)

631

632 @doc(

633 template_header,

634 create_section_header("Parameters"),

635 kwargs_numeric_only,

636 create_section_header("Returns"),

637 template_returns,

638 create_section_header("See Also"),

639 "scipy.stats.kurtosis : Reference SciPy method.\n",

640 template_see_also,

641 create_section_header("Notes"),

642 "A minimum of four periods is required for the calculation.\n\n",

643 create_section_header("Examples"),

644 dedent(

645 """

646 The example below will show a rolling calculation with a window size of

647 four matching the equivalent function call using `scipy.stats`.

648

649 >>> arr = [1, 2, 3, 4, 999]

650 >>> import scipy.stats

651 >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}")

652 -1.200000

653 >>> print(f"{{scipy.stats.kurtosis(arr, bias=False):.6f}}")

654 4.999874

655 >>> s = pd.Series(arr)

656 >>> s.expanding(4).kurt()

657 0 NaN

658 1 NaN

659 2 NaN

660 3 -1.200000

661 4 4.999874

662 dtype: float64

663 """

664 ).replace("\n", "", 1),

665 window_method="expanding",

666 aggregation_description="Fisher's definition of kurtosis without bias",

667 agg_method="kurt",

668 )

669 def kurt(self, numeric_only: bool = False):

670 return super().kurt(numeric_only=numeric_only)

671

672 @doc(

673 template_header,

674 create_section_header("Parameters"),

675 dedent(

676 """

677 quantile : float

678 Quantile to compute. 0 <= quantile <= 1.

679

680 .. deprecated:: 2.1.0

681 This will be renamed to 'q' in a future version.

682 interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}}

683 This optional parameter specifies the interpolation method to use,

684 when the desired quantile lies between two data points `i` and `j`:

685

686 * linear: `i + (j - i) * fraction`, where `fraction` is the

687 fractional part of the index surrounded by `i` and `j`.

688 * lower: `i`.

689 * higher: `j`.

690 * nearest: `i` or `j` whichever is nearest.

691 * midpoint: (`i` + `j`) / 2.

692 """

693 ).replace("\n", "", 1),

694 kwargs_numeric_only,

695 create_section_header("Returns"),

696 template_returns,

697 create_section_header("See Also"),

698 template_see_also,

699 create_section_header("Examples"),

700 dedent(

701 """\

702 >>> ser = pd.Series([1, 2, 3, 4, 5, 6], index=['a', 'b', 'c', 'd', 'e', 'f'])

703 >>> ser.expanding(min_periods=4).quantile(.25)

704 a NaN

705 b NaN

706 c NaN

707 d 1.75

708 e 2.00

709 f 2.25

710 dtype: float64

711 """

712 ),

713 window_method="expanding",

714 aggregation_description="quantile",

715 agg_method="quantile",

716 )

717 @deprecate_kwarg(old_arg_name="quantile", new_arg_name="q")

718 def quantile(

719 self,

720 q: float,

721 interpolation: QuantileInterpolation = "linear",

722 numeric_only: bool = False,

723 ):

724 return super().quantile(

725 q=q,

726 interpolation=interpolation,

727 numeric_only=numeric_only,

728 )

729

730 @doc(

731 template_header,

732 ".. versionadded:: 1.4.0 \n\n",

733 create_section_header("Parameters"),

734 dedent(

735 """

736 method : {{'average', 'min', 'max'}}, default 'average'

737 How to rank the group of records that have the same value (i.e. ties):

738

739 * average: average rank of the group

740 * min: lowest rank in the group

741 * max: highest rank in the group

742

743 ascending : bool, default True

744 Whether or not the elements should be ranked in ascending order.

745 pct : bool, default False

746 Whether or not to display the returned rankings in percentile

747 form.

748 """

749 ).replace("\n", "", 1),

750 kwargs_numeric_only,

751 create_section_header("Returns"),

752 template_returns,

753 create_section_header("See Also"),

754 template_see_also,

755 create_section_header("Examples"),

756 dedent(

757 """

758 >>> s = pd.Series([1, 4, 2, 3, 5, 3])

759 >>> s.expanding().rank()

760 0 1.0

761 1 2.0

762 2 2.0

763 3 3.0

764 4 5.0

765 5 3.5

766 dtype: float64

767

768 >>> s.expanding().rank(method="max")

769 0 1.0

770 1 2.0

771 2 2.0

772 3 3.0

773 4 5.0

774 5 4.0

775 dtype: float64

776

777 >>> s.expanding().rank(method="min")

778 0 1.0

779 1 2.0

780 2 2.0

781 3 3.0

782 4 5.0

783 5 3.0

784 dtype: float64

785 """

786 ).replace("\n", "", 1),

787 window_method="expanding",

788 aggregation_description="rank",

789 agg_method="rank",

790 )

791 def rank(

792 self,

793 method: WindowingRankType = "average",

794 ascending: bool = True,

795 pct: bool = False,

796 numeric_only: bool = False,

797 ):

798 return super().rank(

799 method=method,

800 ascending=ascending,

801 pct=pct,

802 numeric_only=numeric_only,

803 )

804

805 @doc(

806 template_header,

807 create_section_header("Parameters"),

808 dedent(

809 """

810 other : Series or DataFrame, optional

811 If not supplied then will default to self and produce pairwise

812 output.

813 pairwise : bool, default None

814 If False then only matching columns between self and other will be

815 used and the output will be a DataFrame.

816 If True then all pairwise combinations will be calculated and the

817 output will be a MultiIndexed DataFrame in the case of DataFrame

818 inputs. In the case of missing elements, only complete pairwise

819 observations will be used.

820 ddof : int, default 1

821 Delta Degrees of Freedom. The divisor used in calculations

822 is ``N - ddof``, where ``N`` represents the number of elements.

823 """

824 ).replace("\n", "", 1),

825 kwargs_numeric_only,

826 create_section_header("Returns"),

827 template_returns,

828 create_section_header("See Also"),

829 template_see_also,

830 create_section_header("Examples"),

831 dedent(

832 """\

833 >>> ser1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])

834 >>> ser2 = pd.Series([10, 11, 13, 16], index=['a', 'b', 'c', 'd'])

835 >>> ser1.expanding().cov(ser2)

836 a NaN

837 b 0.500000

838 c 1.500000

839 d 3.333333

840 dtype: float64

841 """

842 ),

843 window_method="expanding",

844 aggregation_description="sample covariance",

845 agg_method="cov",

846 )

847 def cov(

848 self,

849 other: DataFrame | Series | None = None,

850 pairwise: bool | None = None,

851 ddof: int = 1,

852 numeric_only: bool = False,

853 ):

854 return super().cov(

855 other=other,

856 pairwise=pairwise,

857 ddof=ddof,

858 numeric_only=numeric_only,

859 )

860

861 @doc(

862 template_header,

863 create_section_header("Parameters"),

864 dedent(

865 """

866 other : Series or DataFrame, optional

867 If not supplied then will default to self and produce pairwise

868 output.

869 pairwise : bool, default None

870 If False then only matching columns between self and other will be

871 used and the output will be a DataFrame.

872 If True then all pairwise combinations will be calculated and the

873 output will be a MultiIndexed DataFrame in the case of DataFrame

874 inputs. In the case of missing elements, only complete pairwise

875 observations will be used.

876 """

877 ).replace("\n", "", 1),

878 kwargs_numeric_only,

879 create_section_header("Returns"),

880 template_returns,

881 create_section_header("See Also"),

882 dedent(

883 """

884 cov : Similar method to calculate covariance.

885 numpy.corrcoef : NumPy Pearson's correlation calculation.

886 """

887 ).replace("\n", "", 1),

888 template_see_also,

889 create_section_header("Notes"),

890 dedent(

891 """

892 This function uses Pearson's definition of correlation

893 (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).

894

895 When `other` is not specified, the output will be self correlation (e.g.

896 all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`

897 set to `True`.

898

899 Function will return ``NaN`` for correlations of equal valued sequences;

900 this is the result of a 0/0 division error.

901

902 When `pairwise` is set to `False`, only matching columns between `self` and

903 `other` will be used.

904

905 When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame

906 with the original index on the first level, and the `other` DataFrame

907 columns on the second level.

908

909 In the case of missing elements, only complete pairwise observations

910 will be used.\n

911 """

912 ),

913 create_section_header("Examples"),

914 dedent(

915 """\

916 >>> ser1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])

917 >>> ser2 = pd.Series([10, 11, 13, 16], index=['a', 'b', 'c', 'd'])

918 >>> ser1.expanding().corr(ser2)

919 a NaN

920 b 1.000000

921 c 0.981981

922 d 0.975900

923 dtype: float64

924 """

925 ),

926 window_method="expanding",

927 aggregation_description="correlation",

928 agg_method="corr",

929 )

930 def corr(

931 self,

932 other: DataFrame | Series | None = None,

933 pairwise: bool | None = None,

934 ddof: int = 1,

935 numeric_only: bool = False,

936 ):

937 return super().corr(

938 other=other,

939 pairwise=pairwise,

940 ddof=ddof,

941 numeric_only=numeric_only,

942 )

943

944

945class ExpandingGroupby(BaseWindowGroupby, Expanding):

946 """

947 Provide a expanding groupby implementation.

948 """

949

950 _attributes = Expanding._attributes + BaseWindowGroupby._attributes

951

952 def _get_window_indexer(self) -> GroupbyIndexer:

953 """

954 Return an indexer class that will compute the window start and end bounds

955

956 Returns

957 -------

958 GroupbyIndexer

959 """

960 window_indexer = GroupbyIndexer(

961 groupby_indices=self._grouper.indices,

962 window_indexer=ExpandingIndexer,

963 )

964 return window_indexer