1"""
2masked_reductions.py is for reduction algorithms using a mask-based approach
3for missing values.
4"""
5from __future__ import annotations
6
7from typing import Callable
8import warnings
9
10import numpy as np
11
12from pandas._libs import missing as libmissing
13from pandas._typing import (
14 AxisInt,
15 npt,
16)
17
18from pandas.core.nanops import check_below_min_count
19
20
21def _reductions(
22 func: Callable,
23 values: np.ndarray,
24 mask: npt.NDArray[np.bool_],
25 *,
26 skipna: bool = True,
27 min_count: int = 0,
28 axis: AxisInt | None = None,
29 **kwargs,
30):
31 """
32 Sum, mean or product for 1D masked array.
33
34 Parameters
35 ----------
36 func : np.sum or np.prod
37 values : np.ndarray
38 Numpy array with the values (can be of any dtype that support the
39 operation).
40 mask : np.ndarray[bool]
41 Boolean numpy array (True values indicate missing values).
42 skipna : bool, default True
43 Whether to skip NA.
44 min_count : int, default 0
45 The required number of valid values to perform the operation. If fewer than
46 ``min_count`` non-NA values are present the result will be NA.
47 axis : int, optional, default None
48 """
49 if not skipna:
50 if mask.any(axis=axis) or check_below_min_count(values.shape, None, min_count):
51 return libmissing.NA
52 else:
53 return func(values, axis=axis, **kwargs)
54 else:
55 if check_below_min_count(values.shape, mask, min_count) and (
56 axis is None or values.ndim == 1
57 ):
58 return libmissing.NA
59
60 return func(values, where=~mask, axis=axis, **kwargs)
61
62
63def sum(
64 values: np.ndarray,
65 mask: npt.NDArray[np.bool_],
66 *,
67 skipna: bool = True,
68 min_count: int = 0,
69 axis: AxisInt | None = None,
70):
71 return _reductions(
72 np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis
73 )
74
75
76def prod(
77 values: np.ndarray,
78 mask: npt.NDArray[np.bool_],
79 *,
80 skipna: bool = True,
81 min_count: int = 0,
82 axis: AxisInt | None = None,
83):
84 return _reductions(
85 np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis
86 )
87
88
89def _minmax(
90 func: Callable,
91 values: np.ndarray,
92 mask: npt.NDArray[np.bool_],
93 *,
94 skipna: bool = True,
95 axis: AxisInt | None = None,
96):
97 """
98 Reduction for 1D masked array.
99
100 Parameters
101 ----------
102 func : np.min or np.max
103 values : np.ndarray
104 Numpy array with the values (can be of any dtype that support the
105 operation).
106 mask : np.ndarray[bool]
107 Boolean numpy array (True values indicate missing values).
108 skipna : bool, default True
109 Whether to skip NA.
110 axis : int, optional, default None
111 """
112 if not skipna:
113 if mask.any() or not values.size:
114 # min/max with empty array raise in numpy, pandas returns NA
115 return libmissing.NA
116 else:
117 return func(values)
118 else:
119 subset = values[~mask]
120 if subset.size:
121 return func(subset)
122 else:
123 # min/max with empty array raise in numpy, pandas returns NA
124 return libmissing.NA
125
126
127def min(
128 values: np.ndarray,
129 mask: npt.NDArray[np.bool_],
130 *,
131 skipna: bool = True,
132 axis: AxisInt | None = None,
133):
134 return _minmax(np.min, values=values, mask=mask, skipna=skipna, axis=axis)
135
136
137def max(
138 values: np.ndarray,
139 mask: npt.NDArray[np.bool_],
140 *,
141 skipna: bool = True,
142 axis: AxisInt | None = None,
143):
144 return _minmax(np.max, values=values, mask=mask, skipna=skipna, axis=axis)
145
146
147def mean(
148 values: np.ndarray,
149 mask: npt.NDArray[np.bool_],
150 *,
151 skipna: bool = True,
152 axis: AxisInt | None = None,
153):
154 if not values.size or mask.all():
155 return libmissing.NA
156 return _reductions(np.mean, values=values, mask=mask, skipna=skipna, axis=axis)
157
158
159def var(
160 values: np.ndarray,
161 mask: npt.NDArray[np.bool_],
162 *,
163 skipna: bool = True,
164 axis: AxisInt | None = None,
165 ddof: int = 1,
166):
167 if not values.size or mask.all():
168 return libmissing.NA
169
170 with warnings.catch_warnings():
171 warnings.simplefilter("ignore", RuntimeWarning)
172 return _reductions(
173 np.var, values=values, mask=mask, skipna=skipna, axis=axis, ddof=ddof
174 )
175
176
177def std(
178 values: np.ndarray,
179 mask: npt.NDArray[np.bool_],
180 *,
181 skipna: bool = True,
182 axis: AxisInt | None = None,
183 ddof: int = 1,
184):
185 if not values.size or mask.all():
186 return libmissing.NA
187
188 with warnings.catch_warnings():
189 warnings.simplefilter("ignore", RuntimeWarning)
190 return _reductions(
191 np.std, values=values, mask=mask, skipna=skipna, axis=axis, ddof=ddof
192 )