1from __future__ import annotations
2
3from textwrap import dedent
4from typing import (
5 TYPE_CHECKING,
6 Any,
7 Callable,
8 Literal,
9)
10
11from pandas.util._decorators import (
12 deprecate_kwarg,
13 doc,
14)
15
16from pandas.core.indexers.objects import (
17 BaseIndexer,
18 ExpandingIndexer,
19 GroupbyIndexer,
20)
21from pandas.core.window.doc import (
22 _shared_docs,
23 create_section_header,
24 kwargs_numeric_only,
25 numba_notes,
26 template_header,
27 template_returns,
28 template_see_also,
29 window_agg_numba_parameters,
30 window_apply_parameters,
31)
32from pandas.core.window.rolling import (
33 BaseWindowGroupby,
34 RollingAndExpandingMixin,
35)
36
37if TYPE_CHECKING:
38 from pandas._typing import (
39 Axis,
40 QuantileInterpolation,
41 WindowingRankType,
42 )
43
44 from pandas import (
45 DataFrame,
46 Series,
47 )
48 from pandas.core.generic import NDFrame
49
50
51class Expanding(RollingAndExpandingMixin):
52 """
53 Provide expanding window calculations.
54
55 Parameters
56 ----------
57 min_periods : int, default 1
58 Minimum number of observations in window required to have a value;
59 otherwise, result is ``np.nan``.
60
61 axis : int or str, default 0
62 If ``0`` or ``'index'``, roll across the rows.
63
64 If ``1`` or ``'columns'``, roll across the columns.
65
66 For `Series` this parameter is unused and defaults to 0.
67
68 method : str {'single', 'table'}, default 'single'
69 Execute the rolling operation per single column or row (``'single'``)
70 or over the entire object (``'table'``).
71
72 This argument is only implemented when specifying ``engine='numba'``
73 in the method call.
74
75 .. versionadded:: 1.3.0
76
77 Returns
78 -------
79 pandas.api.typing.Expanding
80
81 See Also
82 --------
83 rolling : Provides rolling window calculations.
84 ewm : Provides exponential weighted functions.
85
86 Notes
87 -----
88 See :ref:`Windowing Operations <window.expanding>` for further usage details
89 and examples.
90
91 Examples
92 --------
93 >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
94 >>> df
95 B
96 0 0.0
97 1 1.0
98 2 2.0
99 3 NaN
100 4 4.0
101
102 **min_periods**
103
104 Expanding sum with 1 vs 3 observations needed to calculate a value.
105
106 >>> df.expanding(1).sum()
107 B
108 0 0.0
109 1 1.0
110 2 3.0
111 3 3.0
112 4 7.0
113 >>> df.expanding(3).sum()
114 B
115 0 NaN
116 1 NaN
117 2 3.0
118 3 3.0
119 4 7.0
120 """
121
122 _attributes: list[str] = ["min_periods", "axis", "method"]
123
124 def __init__(
125 self,
126 obj: NDFrame,
127 min_periods: int = 1,
128 axis: Axis = 0,
129 method: str = "single",
130 selection=None,
131 ) -> None:
132 super().__init__(
133 obj=obj,
134 min_periods=min_periods,
135 axis=axis,
136 method=method,
137 selection=selection,
138 )
139
140 def _get_window_indexer(self) -> BaseIndexer:
141 """
142 Return an indexer class that will compute the window start and end bounds
143 """
144 return ExpandingIndexer()
145
146 @doc(
147 _shared_docs["aggregate"],
148 see_also=dedent(
149 """
150 See Also
151 --------
152 pandas.DataFrame.aggregate : Similar DataFrame method.
153 pandas.Series.aggregate : Similar Series method.
154 """
155 ),
156 examples=dedent(
157 """
158 Examples
159 --------
160 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
161 >>> df
162 A B C
163 0 1 4 7
164 1 2 5 8
165 2 3 6 9
166
167 >>> df.ewm(alpha=0.5).mean()
168 A B C
169 0 1.000000 4.000000 7.000000
170 1 1.666667 4.666667 7.666667
171 2 2.428571 5.428571 8.428571
172 """
173 ),
174 klass="Series/Dataframe",
175 axis="",
176 )
177 def aggregate(self, func, *args, **kwargs):
178 return super().aggregate(func, *args, **kwargs)
179
180 agg = aggregate
181
182 @doc(
183 template_header,
184 create_section_header("Returns"),
185 template_returns,
186 create_section_header("See Also"),
187 template_see_also,
188 create_section_header("Examples"),
189 dedent(
190 """\
191 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
192 >>> ser.expanding().count()
193 a 1.0
194 b 2.0
195 c 3.0
196 d 4.0
197 dtype: float64
198 """
199 ),
200 window_method="expanding",
201 aggregation_description="count of non NaN observations",
202 agg_method="count",
203 )
204 def count(self, numeric_only: bool = False):
205 return super().count(numeric_only=numeric_only)
206
207 @doc(
208 template_header,
209 create_section_header("Parameters"),
210 window_apply_parameters,
211 create_section_header("Returns"),
212 template_returns,
213 create_section_header("See Also"),
214 template_see_also,
215 create_section_header("Examples"),
216 dedent(
217 """\
218 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
219 >>> ser.expanding().apply(lambda s: s.max() - 2 * s.min())
220 a -1.0
221 b 0.0
222 c 1.0
223 d 2.0
224 dtype: float64
225 """
226 ),
227 window_method="expanding",
228 aggregation_description="custom aggregation function",
229 agg_method="apply",
230 )
231 def apply(
232 self,
233 func: Callable[..., Any],
234 raw: bool = False,
235 engine: Literal["cython", "numba"] | None = None,
236 engine_kwargs: dict[str, bool] | None = None,
237 args: tuple[Any, ...] | None = None,
238 kwargs: dict[str, Any] | None = None,
239 ):
240 return super().apply(
241 func,
242 raw=raw,
243 engine=engine,
244 engine_kwargs=engine_kwargs,
245 args=args,
246 kwargs=kwargs,
247 )
248
249 @doc(
250 template_header,
251 create_section_header("Parameters"),
252 kwargs_numeric_only,
253 window_agg_numba_parameters(),
254 create_section_header("Returns"),
255 template_returns,
256 create_section_header("See Also"),
257 template_see_also,
258 create_section_header("Notes"),
259 numba_notes,
260 create_section_header("Examples"),
261 dedent(
262 """\
263 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
264 >>> ser.expanding().sum()
265 a 1.0
266 b 3.0
267 c 6.0
268 d 10.0
269 dtype: float64
270 """
271 ),
272 window_method="expanding",
273 aggregation_description="sum",
274 agg_method="sum",
275 )
276 def sum(
277 self,
278 numeric_only: bool = False,
279 engine: Literal["cython", "numba"] | None = None,
280 engine_kwargs: dict[str, bool] | None = None,
281 ):
282 return super().sum(
283 numeric_only=numeric_only,
284 engine=engine,
285 engine_kwargs=engine_kwargs,
286 )
287
288 @doc(
289 template_header,
290 create_section_header("Parameters"),
291 kwargs_numeric_only,
292 window_agg_numba_parameters(),
293 create_section_header("Returns"),
294 template_returns,
295 create_section_header("See Also"),
296 template_see_also,
297 create_section_header("Notes"),
298 numba_notes,
299 create_section_header("Examples"),
300 dedent(
301 """\
302 >>> ser = pd.Series([3, 2, 1, 4], index=['a', 'b', 'c', 'd'])
303 >>> ser.expanding().max()
304 a 3.0
305 b 3.0
306 c 3.0
307 d 4.0
308 dtype: float64
309 """
310 ),
311 window_method="expanding",
312 aggregation_description="maximum",
313 agg_method="max",
314 )
315 def max(
316 self,
317 numeric_only: bool = False,
318 engine: Literal["cython", "numba"] | None = None,
319 engine_kwargs: dict[str, bool] | None = None,
320 ):
321 return super().max(
322 numeric_only=numeric_only,
323 engine=engine,
324 engine_kwargs=engine_kwargs,
325 )
326
327 @doc(
328 template_header,
329 create_section_header("Parameters"),
330 kwargs_numeric_only,
331 window_agg_numba_parameters(),
332 create_section_header("Returns"),
333 template_returns,
334 create_section_header("See Also"),
335 template_see_also,
336 create_section_header("Notes"),
337 numba_notes,
338 create_section_header("Examples"),
339 dedent(
340 """\
341 >>> ser = pd.Series([2, 3, 4, 1], index=['a', 'b', 'c', 'd'])
342 >>> ser.expanding().min()
343 a 2.0
344 b 2.0
345 c 2.0
346 d 1.0
347 dtype: float64
348 """
349 ),
350 window_method="expanding",
351 aggregation_description="minimum",
352 agg_method="min",
353 )
354 def min(
355 self,
356 numeric_only: bool = False,
357 engine: Literal["cython", "numba"] | None = None,
358 engine_kwargs: dict[str, bool] | None = None,
359 ):
360 return super().min(
361 numeric_only=numeric_only,
362 engine=engine,
363 engine_kwargs=engine_kwargs,
364 )
365
366 @doc(
367 template_header,
368 create_section_header("Parameters"),
369 kwargs_numeric_only,
370 window_agg_numba_parameters(),
371 create_section_header("Returns"),
372 template_returns,
373 create_section_header("See Also"),
374 template_see_also,
375 create_section_header("Notes"),
376 numba_notes,
377 create_section_header("Examples"),
378 dedent(
379 """\
380 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
381 >>> ser.expanding().mean()
382 a 1.0
383 b 1.5
384 c 2.0
385 d 2.5
386 dtype: float64
387 """
388 ),
389 window_method="expanding",
390 aggregation_description="mean",
391 agg_method="mean",
392 )
393 def mean(
394 self,
395 numeric_only: bool = False,
396 engine: Literal["cython", "numba"] | None = None,
397 engine_kwargs: dict[str, bool] | None = None,
398 ):
399 return super().mean(
400 numeric_only=numeric_only,
401 engine=engine,
402 engine_kwargs=engine_kwargs,
403 )
404
405 @doc(
406 template_header,
407 create_section_header("Parameters"),
408 kwargs_numeric_only,
409 window_agg_numba_parameters(),
410 create_section_header("Returns"),
411 template_returns,
412 create_section_header("See Also"),
413 template_see_also,
414 create_section_header("Notes"),
415 numba_notes,
416 create_section_header("Examples"),
417 dedent(
418 """\
419 >>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
420 >>> ser.expanding().median()
421 a 1.0
422 b 1.5
423 c 2.0
424 d 2.5
425 dtype: float64
426 """
427 ),
428 window_method="expanding",
429 aggregation_description="median",
430 agg_method="median",
431 )
432 def median(
433 self,
434 numeric_only: bool = False,
435 engine: Literal["cython", "numba"] | None = None,
436 engine_kwargs: dict[str, bool] | None = None,
437 ):
438 return super().median(
439 numeric_only=numeric_only,
440 engine=engine,
441 engine_kwargs=engine_kwargs,
442 )
443
444 @doc(
445 template_header,
446 create_section_header("Parameters"),
447 dedent(
448 """
449 ddof : int, default 1
450 Delta Degrees of Freedom. The divisor used in calculations
451 is ``N - ddof``, where ``N`` represents the number of elements.\n
452 """
453 ).replace("\n", "", 1),
454 kwargs_numeric_only,
455 window_agg_numba_parameters("1.4"),
456 create_section_header("Returns"),
457 template_returns,
458 create_section_header("See Also"),
459 "numpy.std : Equivalent method for NumPy array.\n",
460 template_see_also,
461 create_section_header("Notes"),
462 dedent(
463 """
464 The default ``ddof`` of 1 used in :meth:`Series.std` is different
465 than the default ``ddof`` of 0 in :func:`numpy.std`.
466
467 A minimum of one period is required for the rolling calculation.\n
468 """
469 ).replace("\n", "", 1),
470 create_section_header("Examples"),
471 dedent(
472 """
473 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
474
475 >>> s.expanding(3).std()
476 0 NaN
477 1 NaN
478 2 0.577350
479 3 0.957427
480 4 0.894427
481 5 0.836660
482 6 0.786796
483 dtype: float64
484 """
485 ).replace("\n", "", 1),
486 window_method="expanding",
487 aggregation_description="standard deviation",
488 agg_method="std",
489 )
490 def std(
491 self,
492 ddof: int = 1,
493 numeric_only: bool = False,
494 engine: Literal["cython", "numba"] | None = None,
495 engine_kwargs: dict[str, bool] | None = None,
496 ):
497 return super().std(
498 ddof=ddof,
499 numeric_only=numeric_only,
500 engine=engine,
501 engine_kwargs=engine_kwargs,
502 )
503
504 @doc(
505 template_header,
506 create_section_header("Parameters"),
507 dedent(
508 """
509 ddof : int, default 1
510 Delta Degrees of Freedom. The divisor used in calculations
511 is ``N - ddof``, where ``N`` represents the number of elements.\n
512 """
513 ).replace("\n", "", 1),
514 kwargs_numeric_only,
515 window_agg_numba_parameters("1.4"),
516 create_section_header("Returns"),
517 template_returns,
518 create_section_header("See Also"),
519 "numpy.var : Equivalent method for NumPy array.\n",
520 template_see_also,
521 create_section_header("Notes"),
522 dedent(
523 """
524 The default ``ddof`` of 1 used in :meth:`Series.var` is different
525 than the default ``ddof`` of 0 in :func:`numpy.var`.
526
527 A minimum of one period is required for the rolling calculation.\n
528 """
529 ).replace("\n", "", 1),
530 create_section_header("Examples"),
531 dedent(
532 """
533 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
534
535 >>> s.expanding(3).var()
536 0 NaN
537 1 NaN
538 2 0.333333
539 3 0.916667
540 4 0.800000
541 5 0.700000
542 6 0.619048
543 dtype: float64
544 """
545 ).replace("\n", "", 1),
546 window_method="expanding",
547 aggregation_description="variance",
548 agg_method="var",
549 )
550 def var(
551 self,
552 ddof: int = 1,
553 numeric_only: bool = False,
554 engine: Literal["cython", "numba"] | None = None,
555 engine_kwargs: dict[str, bool] | None = None,
556 ):
557 return super().var(
558 ddof=ddof,
559 numeric_only=numeric_only,
560 engine=engine,
561 engine_kwargs=engine_kwargs,
562 )
563
564 @doc(
565 template_header,
566 create_section_header("Parameters"),
567 dedent(
568 """
569 ddof : int, default 1
570 Delta Degrees of Freedom. The divisor used in calculations
571 is ``N - ddof``, where ``N`` represents the number of elements.\n
572 """
573 ).replace("\n", "", 1),
574 kwargs_numeric_only,
575 create_section_header("Returns"),
576 template_returns,
577 create_section_header("See Also"),
578 template_see_also,
579 create_section_header("Notes"),
580 "A minimum of one period is required for the calculation.\n\n",
581 create_section_header("Examples"),
582 dedent(
583 """
584 >>> s = pd.Series([0, 1, 2, 3])
585
586 >>> s.expanding().sem()
587 0 NaN
588 1 0.707107
589 2 0.707107
590 3 0.745356
591 dtype: float64
592 """
593 ).replace("\n", "", 1),
594 window_method="expanding",
595 aggregation_description="standard error of mean",
596 agg_method="sem",
597 )
598 def sem(self, ddof: int = 1, numeric_only: bool = False):
599 return super().sem(ddof=ddof, numeric_only=numeric_only)
600
601 @doc(
602 template_header,
603 create_section_header("Parameters"),
604 kwargs_numeric_only,
605 create_section_header("Returns"),
606 template_returns,
607 create_section_header("See Also"),
608 "scipy.stats.skew : Third moment of a probability density.\n",
609 template_see_also,
610 create_section_header("Notes"),
611 "A minimum of three periods is required for the rolling calculation.\n\n",
612 create_section_header("Examples"),
613 dedent(
614 """\
615 >>> ser = pd.Series([-1, 0, 2, -1, 2], index=['a', 'b', 'c', 'd', 'e'])
616 >>> ser.expanding().skew()
617 a NaN
618 b NaN
619 c 0.935220
620 d 1.414214
621 e 0.315356
622 dtype: float64
623 """
624 ),
625 window_method="expanding",
626 aggregation_description="unbiased skewness",
627 agg_method="skew",
628 )
629 def skew(self, numeric_only: bool = False):
630 return super().skew(numeric_only=numeric_only)
631
632 @doc(
633 template_header,
634 create_section_header("Parameters"),
635 kwargs_numeric_only,
636 create_section_header("Returns"),
637 template_returns,
638 create_section_header("See Also"),
639 "scipy.stats.kurtosis : Reference SciPy method.\n",
640 template_see_also,
641 create_section_header("Notes"),
642 "A minimum of four periods is required for the calculation.\n\n",
643 create_section_header("Examples"),
644 dedent(
645 """
646 The example below will show a rolling calculation with a window size of
647 four matching the equivalent function call using `scipy.stats`.
648
649 >>> arr = [1, 2, 3, 4, 999]
650 >>> import scipy.stats
651 >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}")
652 -1.200000
653 >>> print(f"{{scipy.stats.kurtosis(arr, bias=False):.6f}}")
654 4.999874
655 >>> s = pd.Series(arr)
656 >>> s.expanding(4).kurt()
657 0 NaN
658 1 NaN
659 2 NaN
660 3 -1.200000
661 4 4.999874
662 dtype: float64
663 """
664 ).replace("\n", "", 1),
665 window_method="expanding",
666 aggregation_description="Fisher's definition of kurtosis without bias",
667 agg_method="kurt",
668 )
669 def kurt(self, numeric_only: bool = False):
670 return super().kurt(numeric_only=numeric_only)
671
672 @doc(
673 template_header,
674 create_section_header("Parameters"),
675 dedent(
676 """
677 quantile : float
678 Quantile to compute. 0 <= quantile <= 1.
679
680 .. deprecated:: 2.1.0
681 This will be renamed to 'q' in a future version.
682 interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}}
683 This optional parameter specifies the interpolation method to use,
684 when the desired quantile lies between two data points `i` and `j`:
685
686 * linear: `i + (j - i) * fraction`, where `fraction` is the
687 fractional part of the index surrounded by `i` and `j`.
688 * lower: `i`.
689 * higher: `j`.
690 * nearest: `i` or `j` whichever is nearest.
691 * midpoint: (`i` + `j`) / 2.
692 """
693 ).replace("\n", "", 1),
694 kwargs_numeric_only,
695 create_section_header("Returns"),
696 template_returns,
697 create_section_header("See Also"),
698 template_see_also,
699 create_section_header("Examples"),
700 dedent(
701 """\
702 >>> ser = pd.Series([1, 2, 3, 4, 5, 6], index=['a', 'b', 'c', 'd', 'e', 'f'])
703 >>> ser.expanding(min_periods=4).quantile(.25)
704 a NaN
705 b NaN
706 c NaN
707 d 1.75
708 e 2.00
709 f 2.25
710 dtype: float64
711 """
712 ),
713 window_method="expanding",
714 aggregation_description="quantile",
715 agg_method="quantile",
716 )
717 @deprecate_kwarg(old_arg_name="quantile", new_arg_name="q")
718 def quantile(
719 self,
720 q: float,
721 interpolation: QuantileInterpolation = "linear",
722 numeric_only: bool = False,
723 ):
724 return super().quantile(
725 q=q,
726 interpolation=interpolation,
727 numeric_only=numeric_only,
728 )
729
730 @doc(
731 template_header,
732 ".. versionadded:: 1.4.0 \n\n",
733 create_section_header("Parameters"),
734 dedent(
735 """
736 method : {{'average', 'min', 'max'}}, default 'average'
737 How to rank the group of records that have the same value (i.e. ties):
738
739 * average: average rank of the group
740 * min: lowest rank in the group
741 * max: highest rank in the group
742
743 ascending : bool, default True
744 Whether or not the elements should be ranked in ascending order.
745 pct : bool, default False
746 Whether or not to display the returned rankings in percentile
747 form.
748 """
749 ).replace("\n", "", 1),
750 kwargs_numeric_only,
751 create_section_header("Returns"),
752 template_returns,
753 create_section_header("See Also"),
754 template_see_also,
755 create_section_header("Examples"),
756 dedent(
757 """
758 >>> s = pd.Series([1, 4, 2, 3, 5, 3])
759 >>> s.expanding().rank()
760 0 1.0
761 1 2.0
762 2 2.0
763 3 3.0
764 4 5.0
765 5 3.5
766 dtype: float64
767
768 >>> s.expanding().rank(method="max")
769 0 1.0
770 1 2.0
771 2 2.0
772 3 3.0
773 4 5.0
774 5 4.0
775 dtype: float64
776
777 >>> s.expanding().rank(method="min")
778 0 1.0
779 1 2.0
780 2 2.0
781 3 3.0
782 4 5.0
783 5 3.0
784 dtype: float64
785 """
786 ).replace("\n", "", 1),
787 window_method="expanding",
788 aggregation_description="rank",
789 agg_method="rank",
790 )
791 def rank(
792 self,
793 method: WindowingRankType = "average",
794 ascending: bool = True,
795 pct: bool = False,
796 numeric_only: bool = False,
797 ):
798 return super().rank(
799 method=method,
800 ascending=ascending,
801 pct=pct,
802 numeric_only=numeric_only,
803 )
804
805 @doc(
806 template_header,
807 create_section_header("Parameters"),
808 dedent(
809 """
810 other : Series or DataFrame, optional
811 If not supplied then will default to self and produce pairwise
812 output.
813 pairwise : bool, default None
814 If False then only matching columns between self and other will be
815 used and the output will be a DataFrame.
816 If True then all pairwise combinations will be calculated and the
817 output will be a MultiIndexed DataFrame in the case of DataFrame
818 inputs. In the case of missing elements, only complete pairwise
819 observations will be used.
820 ddof : int, default 1
821 Delta Degrees of Freedom. The divisor used in calculations
822 is ``N - ddof``, where ``N`` represents the number of elements.
823 """
824 ).replace("\n", "", 1),
825 kwargs_numeric_only,
826 create_section_header("Returns"),
827 template_returns,
828 create_section_header("See Also"),
829 template_see_also,
830 create_section_header("Examples"),
831 dedent(
832 """\
833 >>> ser1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
834 >>> ser2 = pd.Series([10, 11, 13, 16], index=['a', 'b', 'c', 'd'])
835 >>> ser1.expanding().cov(ser2)
836 a NaN
837 b 0.500000
838 c 1.500000
839 d 3.333333
840 dtype: float64
841 """
842 ),
843 window_method="expanding",
844 aggregation_description="sample covariance",
845 agg_method="cov",
846 )
847 def cov(
848 self,
849 other: DataFrame | Series | None = None,
850 pairwise: bool | None = None,
851 ddof: int = 1,
852 numeric_only: bool = False,
853 ):
854 return super().cov(
855 other=other,
856 pairwise=pairwise,
857 ddof=ddof,
858 numeric_only=numeric_only,
859 )
860
861 @doc(
862 template_header,
863 create_section_header("Parameters"),
864 dedent(
865 """
866 other : Series or DataFrame, optional
867 If not supplied then will default to self and produce pairwise
868 output.
869 pairwise : bool, default None
870 If False then only matching columns between self and other will be
871 used and the output will be a DataFrame.
872 If True then all pairwise combinations will be calculated and the
873 output will be a MultiIndexed DataFrame in the case of DataFrame
874 inputs. In the case of missing elements, only complete pairwise
875 observations will be used.
876 """
877 ).replace("\n", "", 1),
878 kwargs_numeric_only,
879 create_section_header("Returns"),
880 template_returns,
881 create_section_header("See Also"),
882 dedent(
883 """
884 cov : Similar method to calculate covariance.
885 numpy.corrcoef : NumPy Pearson's correlation calculation.
886 """
887 ).replace("\n", "", 1),
888 template_see_also,
889 create_section_header("Notes"),
890 dedent(
891 """
892 This function uses Pearson's definition of correlation
893 (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
894
895 When `other` is not specified, the output will be self correlation (e.g.
896 all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
897 set to `True`.
898
899 Function will return ``NaN`` for correlations of equal valued sequences;
900 this is the result of a 0/0 division error.
901
902 When `pairwise` is set to `False`, only matching columns between `self` and
903 `other` will be used.
904
905 When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
906 with the original index on the first level, and the `other` DataFrame
907 columns on the second level.
908
909 In the case of missing elements, only complete pairwise observations
910 will be used.\n
911 """
912 ),
913 create_section_header("Examples"),
914 dedent(
915 """\
916 >>> ser1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
917 >>> ser2 = pd.Series([10, 11, 13, 16], index=['a', 'b', 'c', 'd'])
918 >>> ser1.expanding().corr(ser2)
919 a NaN
920 b 1.000000
921 c 0.981981
922 d 0.975900
923 dtype: float64
924 """
925 ),
926 window_method="expanding",
927 aggregation_description="correlation",
928 agg_method="corr",
929 )
930 def corr(
931 self,
932 other: DataFrame | Series | None = None,
933 pairwise: bool | None = None,
934 ddof: int = 1,
935 numeric_only: bool = False,
936 ):
937 return super().corr(
938 other=other,
939 pairwise=pairwise,
940 ddof=ddof,
941 numeric_only=numeric_only,
942 )
943
944
945class ExpandingGroupby(BaseWindowGroupby, Expanding):
946 """
947 Provide a expanding groupby implementation.
948 """
949
950 _attributes = Expanding._attributes + BaseWindowGroupby._attributes
951
952 def _get_window_indexer(self) -> GroupbyIndexer:
953 """
954 Return an indexer class that will compute the window start and end bounds
955
956 Returns
957 -------
958 GroupbyIndexer
959 """
960 window_indexer = GroupbyIndexer(
961 groupby_indices=self._grouper.indices,
962 window_indexer=ExpandingIndexer,
963 )
964 return window_indexer