Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_variation.py: 9%
65 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
2import numpy as np
3from numpy.core.multiarray import normalize_axis_index
4from scipy._lib._util import _nan_allsame, _contains_nan
5from ._stats_py import _chk_asarray
8def _nanvariation(a, *, axis=0, ddof=0, keepdims=False):
9 """
10 Private version of `variation` that ignores nan.
12 `a` must be a numpy array.
13 `axis` is assumed to be normalized, i.e. 0 <= axis < a.ndim.
14 """
15 #
16 # In theory, this should be as simple as something like
17 # nanstd(a, ddof=ddof, axis=axis, keepdims=keepdims) /
18 # nanmean(a, axis=axis, keepdims=keepdims)
19 # In practice, annoying issues arise. Specifically, numpy
20 # generates warnings in certain edge cases that we don't want
21 # to propagate to the user. Unfortunately, there does not
22 # appear to be a thread-safe way to filter out the warnings,
23 # so we have to do the calculation in a way that doesn't
24 # generate numpy warnings.
25 #
26 # Let N be the number of non-nan inputs in a slice.
27 # Conditions that generate nan:
28 # * empty input (i.e. N = 0)
29 # * All non-nan values 0
30 # * N < ddof
31 # * N == ddof and the input is constant
32 # Conditions that generate inf:
33 # * non-constant input and either
34 # * the mean is 0, or
35 # * N == ddof
36 #
37 a_isnan = np.isnan(a)
38 all_nan = a_isnan.all(axis=axis, keepdims=True)
39 all_nan_full = np.broadcast_to(all_nan, a.shape)
40 all_zero = (a_isnan | (a == 0)).all(axis=axis, keepdims=True) & ~all_nan
42 # ngood is the number of non-nan values in each slice.
43 ngood = (a.shape[axis] -
44 np.expand_dims(np.count_nonzero(a_isnan, axis=axis), axis))
45 # The return value is nan where ddof > ngood.
46 ddof_too_big = ddof > ngood
47 # If ddof == ngood, the return value is nan where the input is constant and
48 # inf otherwise.
49 ddof_equal_n = ddof == ngood
51 is_const = _nan_allsame(a, axis=axis, keepdims=True)
53 a2 = a.copy()
54 # If an entire slice is nan, `np.nanmean` will generate a warning,
55 # so we replace those nan's with 1.0 before computing the mean.
56 # We'll fix the corresponding output later.
57 a2[all_nan_full] = 1.0
58 mean_a = np.nanmean(a2, axis=axis, keepdims=True)
60 # If ddof >= ngood (the number of non-nan values in the slice), `np.nanstd`
61 # will generate a warning, so set all the values in such a slice to 1.0.
62 # We'll fix the corresponding output later.
63 a2[np.broadcast_to(ddof_too_big, a2.shape) | ddof_equal_n] = 1.0
64 with np.errstate(invalid='ignore'):
65 std_a = np.nanstd(a2, axis=axis, ddof=ddof, keepdims=True)
66 del a2
68 sum_zero = np.nansum(a, axis=axis, keepdims=True) == 0
70 # Where the sum along the axis is 0, replace mean_a with 1. This avoids
71 # division by zero. We'll fix the corresponding output later.
72 mean_a[sum_zero] = 1.0
74 # Here--finally!--is the calculation of the variation.
75 result = std_a / mean_a
77 # Now fix the values that were given fake data to avoid warnings.
78 result[~is_const & sum_zero] = np.inf
79 signed_inf_mask = ~is_const & ddof_equal_n
80 result[signed_inf_mask] = np.sign(mean_a[signed_inf_mask]) * np.inf
81 nan_mask = all_zero | all_nan | ddof_too_big | (ddof_equal_n & is_const)
82 result[nan_mask] = np.nan
84 if not keepdims:
85 result = np.squeeze(result, axis=axis)
86 if result.shape == ():
87 result = result[()]
89 return result
92def variation(a, axis=0, nan_policy='propagate', ddof=0, *, keepdims=False):
93 """
94 Compute the coefficient of variation.
96 The coefficient of variation is the standard deviation divided by the
97 mean. This function is equivalent to::
99 np.std(x, axis=axis, ddof=ddof) / np.mean(x)
101 The default for ``ddof`` is 0, but many definitions of the coefficient
102 of variation use the square root of the unbiased sample variance
103 for the sample standard deviation, which corresponds to ``ddof=1``.
105 The function does not take the absolute value of the mean of the data,
106 so the return value is negative if the mean is negative.
108 Parameters
109 ----------
110 a : array_like
111 Input array.
112 axis : int or None, optional
113 Axis along which to calculate the coefficient of variation.
114 Default is 0. If None, compute over the whole array `a`.
115 nan_policy : {'propagate', 'raise', 'omit'}, optional
116 Defines how to handle when input contains ``nan``.
117 The following options are available:
119 * 'propagate': return ``nan``
120 * 'raise': raise an exception
121 * 'omit': perform the calculation with ``nan`` values omitted
123 The default is 'propagate'.
124 ddof : int, optional
125 Gives the "Delta Degrees Of Freedom" used when computing the
126 standard deviation. The divisor used in the calculation of the
127 standard deviation is ``N - ddof``, where ``N`` is the number of
128 elements. `ddof` must be less than ``N``; if it isn't, the result
129 will be ``nan`` or ``inf``, depending on ``N`` and the values in
130 the array. By default `ddof` is zero for backwards compatibility,
131 but it is recommended to use ``ddof=1`` to ensure that the sample
132 standard deviation is computed as the square root of the unbiased
133 sample variance.
134 keepdims : bool, optional
135 If this is set to True, the axes which are reduced are left in the
136 result as dimensions with size one. With this option, the result
137 will broadcast correctly against the input array.
139 Returns
140 -------
141 variation : ndarray
142 The calculated variation along the requested axis.
144 Notes
145 -----
146 There are several edge cases that are handled without generating a
147 warning:
149 * If both the mean and the standard deviation are zero, ``nan``
150 is returned.
151 * If the mean is zero and the standard deviation is nonzero, ``inf``
152 is returned.
153 * If the input has length zero (either because the array has zero
154 length, or all the input values are ``nan`` and ``nan_policy`` is
155 ``'omit'``), ``nan`` is returned.
156 * If the input contains ``inf``, ``nan`` is returned.
158 References
159 ----------
160 .. [1] Zwillinger, D. and Kokoska, S. (2000). CRC Standard
161 Probability and Statistics Tables and Formulae. Chapman & Hall: New
162 York. 2000.
164 Examples
165 --------
166 >>> import numpy as np
167 >>> from scipy.stats import variation
168 >>> variation([1, 2, 3, 4, 5], ddof=1)
169 0.5270462766947299
171 Compute the variation along a given dimension of an array that contains
172 a few ``nan`` values:
174 >>> x = np.array([[ 10.0, np.nan, 11.0, 19.0, 23.0, 29.0, 98.0],
175 ... [ 29.0, 30.0, 32.0, 33.0, 35.0, 56.0, 57.0],
176 ... [np.nan, np.nan, 12.0, 13.0, 16.0, 16.0, 17.0]])
177 >>> variation(x, axis=1, ddof=1, nan_policy='omit')
178 array([1.05109361, 0.31428986, 0.146483 ])
180 """
181 a, axis = _chk_asarray(a, axis)
182 axis = normalize_axis_index(axis, ndim=a.ndim)
183 n = a.shape[axis]
185 contains_nan, nan_policy = _contains_nan(a, nan_policy)
186 if contains_nan and nan_policy == 'omit':
187 return _nanvariation(a, axis=axis, ddof=ddof, keepdims=keepdims)
189 if a.size == 0 or ddof > n:
190 # Handle as a special case to avoid spurious warnings.
191 # The return values, if any, are all nan.
192 shp = list(a.shape)
193 if keepdims:
194 shp[axis] = 1
195 else:
196 del shp[axis]
197 if len(shp) == 0:
198 result = np.nan
199 else:
200 result = np.full(shp, fill_value=np.nan)
202 return result
204 mean_a = a.mean(axis, keepdims=True)
206 if ddof == n:
207 # Another special case. Result is either inf or nan.
208 std_a = a.std(axis=axis, ddof=0, keepdims=True)
209 result = np.full_like(std_a, fill_value=np.nan)
210 result.flat[std_a.flat > 0] = (np.sign(mean_a) * np.inf).flat
211 if result.shape == ():
212 result = result[()]
213 return result
215 with np.errstate(divide='ignore', invalid='ignore'):
216 std_a = a.std(axis, ddof=ddof, keepdims=True)
217 result = std_a / mean_a
219 if not keepdims:
220 result = np.squeeze(result, axis=axis)
221 if result.shape == ():
222 result = result[()]
224 return result