Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/window/expanding.py: 69%

1from __future__ import annotations

3from textwrap import dedent

4from typing import (

5 TYPE_CHECKING,

6 Any,

7 Callable,

10from pandas._typing import (

11 Axis,

12 QuantileInterpolation,

13 WindowingRankType,

14)

16if TYPE_CHECKING:

17 from pandas import DataFrame, Series

18 from pandas.core.generic import NDFrame

20from pandas.util._decorators import doc

22from pandas.core.indexers.objects import (

23 BaseIndexer,

24 ExpandingIndexer,

25 GroupbyIndexer,

26)

27from pandas.core.window.doc import (

28 _shared_docs,

29 create_section_header,

30 kwargs_numeric_only,

31 numba_notes,

32 template_header,

33 template_returns,

34 template_see_also,

35 window_agg_numba_parameters,

36 window_apply_parameters,

37)

38from pandas.core.window.rolling import (

39 BaseWindowGroupby,

40 RollingAndExpandingMixin,

41)

44class Expanding(RollingAndExpandingMixin):

45 """

46 Provide expanding window calculations.

48 Parameters

49 ----------

50 min_periods : int, default 1

51 Minimum number of observations in window required to have a value;

52 otherwise, result is ``np.nan``.

54 axis : int or str, default 0

55 If ``0`` or ``'index'``, roll across the rows.

57 If ``1`` or ``'columns'``, roll across the columns.

59 For `Series` this parameter is unused and defaults to 0.

61 method : str {'single', 'table'}, default 'single'

62 Execute the rolling operation per single column or row (``'single'``)

63 or over the entire object (``'table'``).

65 This argument is only implemented when specifying ``engine='numba'``

66 in the method call.

68 .. versionadded:: 1.3.0

70 Returns

71 -------

72 ``Expanding`` subclass

74 See Also

75 --------

76 rolling : Provides rolling window calculations.

77 ewm : Provides exponential weighted functions.

79 Notes

80 -----

81 See :ref:`Windowing Operations <window.expanding>` for further usage details

82 and examples.

84 Examples

85 --------

86 >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

87 >>> df

88 B

89 0 0.0

90 1 1.0

91 2 2.0

92 3 NaN

93 4 4.0

95 **min_periods**

97 Expanding sum with 1 vs 3 observations needed to calculate a value.

99 >>> df.expanding(1).sum()

100 B

101 0 0.0

102 1 1.0

103 2 3.0

104 3 3.0

105 4 7.0

106 >>> df.expanding(3).sum()

107 B

108 0 NaN

109 1 NaN

110 2 3.0

111 3 3.0

112 4 7.0

113 """

114

115 _attributes: list[str] = ["min_periods", "axis", "method"]

116

117 def __init__(

118 self,

119 obj: NDFrame,

120 min_periods: int = 1,

121 axis: Axis = 0,

122 method: str = "single",

123 selection=None,

124 ) -> None:

125 super().__init__(

126 obj=obj,

127 min_periods=min_periods,

128 axis=axis,

129 method=method,

130 selection=selection,

131 )

132

133 def _get_window_indexer(self) -> BaseIndexer:

134 """

135 Return an indexer class that will compute the window start and end bounds

136 """

137 return ExpandingIndexer()

138

139 @doc(

140 _shared_docs["aggregate"],

141 see_also=dedent(

142 """

143 See Also

144 --------

145 pandas.DataFrame.aggregate : Similar DataFrame method.

146 pandas.Series.aggregate : Similar Series method.

147 """

148 ),

149 examples=dedent(

150 """

151 Examples

152 --------

153 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})

154 >>> df

155 A B C

156 0 1 4 7

157 1 2 5 8

158 2 3 6 9

159

160 >>> df.ewm(alpha=0.5).mean()

161 A B C

162 0 1.000000 4.000000 7.000000

163 1 1.666667 4.666667 7.666667

164 2 2.428571 5.428571 8.428571

165 """

166 ),

167 klass="Series/Dataframe",

168 axis="",

169 )

170 def aggregate(self, func, *args, **kwargs):

171 return super().aggregate(func, *args, **kwargs)

172

173 agg = aggregate

174

175 @doc(

176 template_header,

177 create_section_header("Returns"),

178 template_returns,

179 create_section_header("See Also"),

180 template_see_also[:-1],

181 window_method="expanding",

182 aggregation_description="count of non NaN observations",

183 agg_method="count",

184 )

185 def count(self, numeric_only: bool = False):

186 return super().count(numeric_only=numeric_only)

187

188 @doc(

189 template_header,

190 create_section_header("Parameters"),

191 window_apply_parameters,

192 create_section_header("Returns"),

193 template_returns,

194 create_section_header("See Also"),

195 template_see_also[:-1],

196 window_method="expanding",

197 aggregation_description="custom aggregation function",

198 agg_method="apply",

199 )

200 def apply(

201 self,

202 func: Callable[..., Any],

203 raw: bool = False,

204 engine: str | None = None,

205 engine_kwargs: dict[str, bool] | None = None,

206 args: tuple[Any, ...] | None = None,

207 kwargs: dict[str, Any] | None = None,

208 ):

209 return super().apply(

210 func,

211 raw=raw,

212 engine=engine,

213 engine_kwargs=engine_kwargs,

214 args=args,

215 kwargs=kwargs,

216 )

217

218 @doc(

219 template_header,

220 create_section_header("Parameters"),

221 kwargs_numeric_only,

222 window_agg_numba_parameters(),

223 create_section_header("Returns"),

224 template_returns,

225 create_section_header("See Also"),

226 template_see_also,

227 create_section_header("Notes"),

228 numba_notes[:-1],

229 window_method="expanding",

230 aggregation_description="sum",

231 agg_method="sum",

232 )

233 def sum(

234 self,

235 numeric_only: bool = False,

236 engine: str | None = None,

237 engine_kwargs: dict[str, bool] | None = None,

238 ):

239 return super().sum(

240 numeric_only=numeric_only,

241 engine=engine,

242 engine_kwargs=engine_kwargs,

243 )

244

245 @doc(

246 template_header,

247 create_section_header("Parameters"),

248 kwargs_numeric_only,

249 window_agg_numba_parameters(),

250 create_section_header("Returns"),

251 template_returns,

252 create_section_header("See Also"),

253 template_see_also,

254 create_section_header("Notes"),

255 numba_notes[:-1],

256 window_method="expanding",

257 aggregation_description="maximum",

258 agg_method="max",

259 )

260 def max(

261 self,

262 numeric_only: bool = False,

263 engine: str | None = None,

264 engine_kwargs: dict[str, bool] | None = None,

265 ):

266 return super().max(

267 numeric_only=numeric_only,

268 engine=engine,

269 engine_kwargs=engine_kwargs,

270 )

271

272 @doc(

273 template_header,

274 create_section_header("Parameters"),

275 kwargs_numeric_only,

276 window_agg_numba_parameters(),

277 create_section_header("Returns"),

278 template_returns,

279 create_section_header("See Also"),

280 template_see_also,

281 create_section_header("Notes"),

282 numba_notes[:-1],

283 window_method="expanding",

284 aggregation_description="minimum",

285 agg_method="min",

286 )

287 def min(

288 self,

289 numeric_only: bool = False,

290 engine: str | None = None,

291 engine_kwargs: dict[str, bool] | None = None,

292 ):

293 return super().min(

294 numeric_only=numeric_only,

295 engine=engine,

296 engine_kwargs=engine_kwargs,

297 )

298

299 @doc(

300 template_header,

301 create_section_header("Parameters"),

302 kwargs_numeric_only,

303 window_agg_numba_parameters(),

304 create_section_header("Returns"),

305 template_returns,

306 create_section_header("See Also"),

307 template_see_also,

308 create_section_header("Notes"),

309 numba_notes[:-1],

310 window_method="expanding",

311 aggregation_description="mean",

312 agg_method="mean",

313 )

314 def mean(

315 self,

316 numeric_only: bool = False,

317 engine: str | None = None,

318 engine_kwargs: dict[str, bool] | None = None,

319 ):

320 return super().mean(

321 numeric_only=numeric_only,

322 engine=engine,

323 engine_kwargs=engine_kwargs,

324 )

325

326 @doc(

327 template_header,

328 create_section_header("Parameters"),

329 kwargs_numeric_only,

330 window_agg_numba_parameters(),

331 create_section_header("Returns"),

332 template_returns,

333 create_section_header("See Also"),

334 template_see_also,

335 create_section_header("Notes"),

336 numba_notes[:-1],

337 window_method="expanding",

338 aggregation_description="median",

339 agg_method="median",

340 )

341 def median(

342 self,

343 numeric_only: bool = False,

344 engine: str | None = None,

345 engine_kwargs: dict[str, bool] | None = None,

346 ):

347 return super().median(

348 numeric_only=numeric_only,

349 engine=engine,

350 engine_kwargs=engine_kwargs,

351 )

352

353 @doc(

354 template_header,

355 create_section_header("Parameters"),

356 dedent(

357 """

358 ddof : int, default 1

359 Delta Degrees of Freedom. The divisor used in calculations

360 is ``N - ddof``, where ``N`` represents the number of elements.\n

361 """

362 ).replace("\n", "", 1),

363 kwargs_numeric_only,

364 window_agg_numba_parameters("1.4"),

365 create_section_header("Returns"),

366 template_returns,

367 create_section_header("See Also"),

368 "numpy.std : Equivalent method for NumPy array.\n",

369 template_see_also,

370 create_section_header("Notes"),

371 dedent(

372 """

373 The default ``ddof`` of 1 used in :meth:`Series.std` is different

374 than the default ``ddof`` of 0 in :func:`numpy.std`.

375

376 A minimum of one period is required for the rolling calculation.\n

377 """

378 ).replace("\n", "", 1),

379 create_section_header("Examples"),

380 dedent(

381 """

382 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])

383

384 >>> s.expanding(3).std()

385 0 NaN

386 1 NaN

387 2 0.577350

388 3 0.957427

389 4 0.894427

390 5 0.836660

391 6 0.786796

392 dtype: float64

393 """

394 ).replace("\n", "", 1),

395 window_method="expanding",

396 aggregation_description="standard deviation",

397 agg_method="std",

398 )

399 def std(

400 self,

401 ddof: int = 1,

402 numeric_only: bool = False,

403 engine: str | None = None,

404 engine_kwargs: dict[str, bool] | None = None,

405 ):

406 return super().std(

407 ddof=ddof,

408 numeric_only=numeric_only,

409 engine=engine,

410 engine_kwargs=engine_kwargs,

411 )

412

413 @doc(

414 template_header,

415 create_section_header("Parameters"),

416 dedent(

417 """

418 ddof : int, default 1

419 Delta Degrees of Freedom. The divisor used in calculations

420 is ``N - ddof``, where ``N`` represents the number of elements.\n

421 """

422 ).replace("\n", "", 1),

423 kwargs_numeric_only,

424 window_agg_numba_parameters("1.4"),

425 create_section_header("Returns"),

426 template_returns,

427 create_section_header("See Also"),

428 "numpy.var : Equivalent method for NumPy array.\n",

429 template_see_also,

430 create_section_header("Notes"),

431 dedent(

432 """

433 The default ``ddof`` of 1 used in :meth:`Series.var` is different

434 than the default ``ddof`` of 0 in :func:`numpy.var`.

435

436 A minimum of one period is required for the rolling calculation.\n

437 """

438 ).replace("\n", "", 1),

439 create_section_header("Examples"),

440 dedent(

441 """

442 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])

443

444 >>> s.expanding(3).var()

445 0 NaN

446 1 NaN

447 2 0.333333

448 3 0.916667

449 4 0.800000

450 5 0.700000

451 6 0.619048

452 dtype: float64

453 """

454 ).replace("\n", "", 1),

455 window_method="expanding",

456 aggregation_description="variance",

457 agg_method="var",

458 )

459 def var(

460 self,

461 ddof: int = 1,

462 numeric_only: bool = False,

463 engine: str | None = None,

464 engine_kwargs: dict[str, bool] | None = None,

465 ):

466 return super().var(

467 ddof=ddof,

468 numeric_only=numeric_only,

469 engine=engine,

470 engine_kwargs=engine_kwargs,

471 )

472

473 @doc(

474 template_header,

475 create_section_header("Parameters"),

476 dedent(

477 """

478 ddof : int, default 1

479 Delta Degrees of Freedom. The divisor used in calculations

480 is ``N - ddof``, where ``N`` represents the number of elements.\n

481 """

482 ).replace("\n", "", 1),

483 kwargs_numeric_only,

484 create_section_header("Returns"),

485 template_returns,

486 create_section_header("See Also"),

487 template_see_also,

488 create_section_header("Notes"),

489 "A minimum of one period is required for the calculation.\n\n",

490 create_section_header("Examples"),

491 dedent(

492 """

493 >>> s = pd.Series([0, 1, 2, 3])

494

495 >>> s.expanding().sem()

496 0 NaN

497 1 0.707107

498 2 0.707107

499 3 0.745356

500 dtype: float64

501 """

502 ).replace("\n", "", 1),

503 window_method="expanding",

504 aggregation_description="standard error of mean",

505 agg_method="sem",

506 )

507 def sem(self, ddof: int = 1, numeric_only: bool = False):

508 return super().sem(ddof=ddof, numeric_only=numeric_only)

509

510 @doc(

511 template_header,

512 create_section_header("Parameters"),

513 kwargs_numeric_only,

514 create_section_header("Returns"),

515 template_returns,

516 create_section_header("See Also"),

517 "scipy.stats.skew : Third moment of a probability density.\n",

518 template_see_also,

519 create_section_header("Notes"),

520 "A minimum of three periods is required for the rolling calculation.\n",

521 window_method="expanding",

522 aggregation_description="unbiased skewness",

523 agg_method="skew",

524 )

525 def skew(self, numeric_only: bool = False):

526 return super().skew(numeric_only=numeric_only)

527

528 @doc(

529 template_header,

530 create_section_header("Parameters"),

531 kwargs_numeric_only,

532 create_section_header("Returns"),

533 template_returns,

534 create_section_header("See Also"),

535 "scipy.stats.kurtosis : Reference SciPy method.\n",

536 template_see_also,

537 create_section_header("Notes"),

538 "A minimum of four periods is required for the calculation.\n\n",

539 create_section_header("Examples"),

540 dedent(

541 """

542 The example below will show a rolling calculation with a window size of

543 four matching the equivalent function call using `scipy.stats`.

544

545 >>> arr = [1, 2, 3, 4, 999]

546 >>> import scipy.stats

547 >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}")

548 -1.200000

549 >>> print(f"{{scipy.stats.kurtosis(arr, bias=False):.6f}}")

550 4.999874

551 >>> s = pd.Series(arr)

552 >>> s.expanding(4).kurt()

553 0 NaN

554 1 NaN

555 2 NaN

556 3 -1.200000

557 4 4.999874

558 dtype: float64

559 """

560 ).replace("\n", "", 1),

561 window_method="expanding",

562 aggregation_description="Fisher's definition of kurtosis without bias",

563 agg_method="kurt",

564 )

565 def kurt(self, numeric_only: bool = False):

566 return super().kurt(numeric_only=numeric_only)

567

568 @doc(

569 template_header,

570 create_section_header("Parameters"),

571 dedent(

572 """

573 quantile : float

574 Quantile to compute. 0 <= quantile <= 1.

575 interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}}

576 This optional parameter specifies the interpolation method to use,

577 when the desired quantile lies between two data points `i` and `j`:

578

579 * linear: `i + (j - i) * fraction`, where `fraction` is the

580 fractional part of the index surrounded by `i` and `j`.

581 * lower: `i`.

582 * higher: `j`.

583 * nearest: `i` or `j` whichever is nearest.

584 * midpoint: (`i` + `j`) / 2.

585 """

586 ).replace("\n", "", 1),

587 kwargs_numeric_only,

588 create_section_header("Returns"),

589 template_returns,

590 create_section_header("See Also"),

591 template_see_also[:-1],

592 window_method="expanding",

593 aggregation_description="quantile",

594 agg_method="quantile",

595 )

596 def quantile(

597 self,

598 quantile: float,

599 interpolation: QuantileInterpolation = "linear",

600 numeric_only: bool = False,

601 ):

602 return super().quantile(

603 quantile=quantile,

604 interpolation=interpolation,

605 numeric_only=numeric_only,

606 )

607

608 @doc(

609 template_header,

610 ".. versionadded:: 1.4.0 \n\n",

611 create_section_header("Parameters"),

612 dedent(

613 """

614 method : {{'average', 'min', 'max'}}, default 'average'

615 How to rank the group of records that have the same value (i.e. ties):

616

617 * average: average rank of the group

618 * min: lowest rank in the group

619 * max: highest rank in the group

620

621 ascending : bool, default True

622 Whether or not the elements should be ranked in ascending order.

623 pct : bool, default False

624 Whether or not to display the returned rankings in percentile

625 form.

626 """

627 ).replace("\n", "", 1),

628 kwargs_numeric_only,

629 create_section_header("Returns"),

630 template_returns,

631 create_section_header("See Also"),

632 template_see_also,

633 create_section_header("Examples"),

634 dedent(

635 """

636 >>> s = pd.Series([1, 4, 2, 3, 5, 3])

637 >>> s.expanding().rank()

638 0 1.0

639 1 2.0

640 2 2.0

641 3 3.0

642 4 5.0

643 5 3.5

644 dtype: float64

645

646 >>> s.expanding().rank(method="max")

647 0 1.0

648 1 2.0

649 2 2.0

650 3 3.0

651 4 5.0

652 5 4.0

653 dtype: float64

654

655 >>> s.expanding().rank(method="min")

656 0 1.0

657 1 2.0

658 2 2.0

659 3 3.0

660 4 5.0

661 5 3.0

662 dtype: float64

663 """

664 ).replace("\n", "", 1),

665 window_method="expanding",

666 aggregation_description="rank",

667 agg_method="rank",

668 )

669 def rank(

670 self,

671 method: WindowingRankType = "average",

672 ascending: bool = True,

673 pct: bool = False,

674 numeric_only: bool = False,

675 ):

676 return super().rank(

677 method=method,

678 ascending=ascending,

679 pct=pct,

680 numeric_only=numeric_only,

681 )

682

683 @doc(

684 template_header,

685 create_section_header("Parameters"),

686 dedent(

687 """

688 other : Series or DataFrame, optional

689 If not supplied then will default to self and produce pairwise

690 output.

691 pairwise : bool, default None

692 If False then only matching columns between self and other will be

693 used and the output will be a DataFrame.

694 If True then all pairwise combinations will be calculated and the

695 output will be a MultiIndexed DataFrame in the case of DataFrame

696 inputs. In the case of missing elements, only complete pairwise

697 observations will be used.

698 ddof : int, default 1

699 Delta Degrees of Freedom. The divisor used in calculations

700 is ``N - ddof``, where ``N`` represents the number of elements.

701 """

702 ).replace("\n", "", 1),

703 kwargs_numeric_only,

704 create_section_header("Returns"),

705 template_returns,

706 create_section_header("See Also"),

707 template_see_also[:-1],

708 window_method="expanding",

709 aggregation_description="sample covariance",

710 agg_method="cov",

711 )

712 def cov(

713 self,

714 other: DataFrame | Series | None = None,

715 pairwise: bool | None = None,

716 ddof: int = 1,

717 numeric_only: bool = False,

718 ):

719 return super().cov(

720 other=other,

721 pairwise=pairwise,

722 ddof=ddof,

723 numeric_only=numeric_only,

724 )

725

726 @doc(

727 template_header,

728 create_section_header("Parameters"),

729 dedent(

730 """

731 other : Series or DataFrame, optional

732 If not supplied then will default to self and produce pairwise

733 output.

734 pairwise : bool, default None

735 If False then only matching columns between self and other will be

736 used and the output will be a DataFrame.

737 If True then all pairwise combinations will be calculated and the

738 output will be a MultiIndexed DataFrame in the case of DataFrame

739 inputs. In the case of missing elements, only complete pairwise

740 observations will be used.

741 """

742 ).replace("\n", "", 1),

743 kwargs_numeric_only,

744 create_section_header("Returns"),

745 template_returns,

746 create_section_header("See Also"),

747 dedent(

748 """

749 cov : Similar method to calculate covariance.

750 numpy.corrcoef : NumPy Pearson's correlation calculation.

751 """

752 ).replace("\n", "", 1),

753 template_see_also,

754 create_section_header("Notes"),

755 dedent(

756 """

757 This function uses Pearson's definition of correlation

758 (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).

759

760 When `other` is not specified, the output will be self correlation (e.g.

761 all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`

762 set to `True`.

763

764 Function will return ``NaN`` for correlations of equal valued sequences;

765 this is the result of a 0/0 division error.

766

767 When `pairwise` is set to `False`, only matching columns between `self` and

768 `other` will be used.

769

770 When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame

771 with the original index on the first level, and the `other` DataFrame

772 columns on the second level.

773

774 In the case of missing elements, only complete pairwise observations

775 will be used.

776 """

777 ).replace("\n", "", 1),

778 window_method="expanding",

779 aggregation_description="correlation",

780 agg_method="corr",

781 )

782 def corr(

783 self,

784 other: DataFrame | Series | None = None,

785 pairwise: bool | None = None,

786 ddof: int = 1,

787 numeric_only: bool = False,

788 ):

789 return super().corr(

790 other=other,

791 pairwise=pairwise,

792 ddof=ddof,

793 numeric_only=numeric_only,

794 )

795

796

797class ExpandingGroupby(BaseWindowGroupby, Expanding):

798 """

799 Provide a expanding groupby implementation.

800 """

801

802 _attributes = Expanding._attributes + BaseWindowGroupby._attributes

803

804 def _get_window_indexer(self) -> GroupbyIndexer:

805 """

806 Return an indexer class that will compute the window start and end bounds

807

808 Returns

809 -------

810 GroupbyIndexer

811 """

812 window_indexer = GroupbyIndexer(

813 groupby_indices=self._grouper.indices,

814 window_indexer=ExpandingIndexer,

815 )

816 return window_indexer