Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_variation.py: 9%

65 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1 

2import numpy as np 

3from numpy.core.multiarray import normalize_axis_index 

4from scipy._lib._util import _nan_allsame, _contains_nan 

5from ._stats_py import _chk_asarray 

6 

7 

8def _nanvariation(a, *, axis=0, ddof=0, keepdims=False): 

9 """ 

10 Private version of `variation` that ignores nan. 

11 

12 `a` must be a numpy array. 

13 `axis` is assumed to be normalized, i.e. 0 <= axis < a.ndim. 

14 """ 

15 # 

16 # In theory, this should be as simple as something like 

17 # nanstd(a, ddof=ddof, axis=axis, keepdims=keepdims) / 

18 # nanmean(a, axis=axis, keepdims=keepdims) 

19 # In practice, annoying issues arise. Specifically, numpy 

20 # generates warnings in certain edge cases that we don't want 

21 # to propagate to the user. Unfortunately, there does not 

22 # appear to be a thread-safe way to filter out the warnings, 

23 # so we have to do the calculation in a way that doesn't 

24 # generate numpy warnings. 

25 # 

26 # Let N be the number of non-nan inputs in a slice. 

27 # Conditions that generate nan: 

28 # * empty input (i.e. N = 0) 

29 # * All non-nan values 0 

30 # * N < ddof 

31 # * N == ddof and the input is constant 

32 # Conditions that generate inf: 

33 # * non-constant input and either 

34 # * the mean is 0, or 

35 # * N == ddof 

36 # 

37 a_isnan = np.isnan(a) 

38 all_nan = a_isnan.all(axis=axis, keepdims=True) 

39 all_nan_full = np.broadcast_to(all_nan, a.shape) 

40 all_zero = (a_isnan | (a == 0)).all(axis=axis, keepdims=True) & ~all_nan 

41 

42 # ngood is the number of non-nan values in each slice. 

43 ngood = (a.shape[axis] - 

44 np.expand_dims(np.count_nonzero(a_isnan, axis=axis), axis)) 

45 # The return value is nan where ddof > ngood. 

46 ddof_too_big = ddof > ngood 

47 # If ddof == ngood, the return value is nan where the input is constant and 

48 # inf otherwise. 

49 ddof_equal_n = ddof == ngood 

50 

51 is_const = _nan_allsame(a, axis=axis, keepdims=True) 

52 

53 a2 = a.copy() 

54 # If an entire slice is nan, `np.nanmean` will generate a warning, 

55 # so we replace those nan's with 1.0 before computing the mean. 

56 # We'll fix the corresponding output later. 

57 a2[all_nan_full] = 1.0 

58 mean_a = np.nanmean(a2, axis=axis, keepdims=True) 

59 

60 # If ddof >= ngood (the number of non-nan values in the slice), `np.nanstd` 

61 # will generate a warning, so set all the values in such a slice to 1.0. 

62 # We'll fix the corresponding output later. 

63 a2[np.broadcast_to(ddof_too_big, a2.shape) | ddof_equal_n] = 1.0 

64 with np.errstate(invalid='ignore'): 

65 std_a = np.nanstd(a2, axis=axis, ddof=ddof, keepdims=True) 

66 del a2 

67 

68 sum_zero = np.nansum(a, axis=axis, keepdims=True) == 0 

69 

70 # Where the sum along the axis is 0, replace mean_a with 1. This avoids 

71 # division by zero. We'll fix the corresponding output later. 

72 mean_a[sum_zero] = 1.0 

73 

74 # Here--finally!--is the calculation of the variation. 

75 result = std_a / mean_a 

76 

77 # Now fix the values that were given fake data to avoid warnings. 

78 result[~is_const & sum_zero] = np.inf 

79 signed_inf_mask = ~is_const & ddof_equal_n 

80 result[signed_inf_mask] = np.sign(mean_a[signed_inf_mask]) * np.inf 

81 nan_mask = all_zero | all_nan | ddof_too_big | (ddof_equal_n & is_const) 

82 result[nan_mask] = np.nan 

83 

84 if not keepdims: 

85 result = np.squeeze(result, axis=axis) 

86 if result.shape == (): 

87 result = result[()] 

88 

89 return result 

90 

91 

92def variation(a, axis=0, nan_policy='propagate', ddof=0, *, keepdims=False): 

93 """ 

94 Compute the coefficient of variation. 

95 

96 The coefficient of variation is the standard deviation divided by the 

97 mean. This function is equivalent to:: 

98 

99 np.std(x, axis=axis, ddof=ddof) / np.mean(x) 

100 

101 The default for ``ddof`` is 0, but many definitions of the coefficient 

102 of variation use the square root of the unbiased sample variance 

103 for the sample standard deviation, which corresponds to ``ddof=1``. 

104 

105 The function does not take the absolute value of the mean of the data, 

106 so the return value is negative if the mean is negative. 

107 

108 Parameters 

109 ---------- 

110 a : array_like 

111 Input array. 

112 axis : int or None, optional 

113 Axis along which to calculate the coefficient of variation. 

114 Default is 0. If None, compute over the whole array `a`. 

115 nan_policy : {'propagate', 'raise', 'omit'}, optional 

116 Defines how to handle when input contains ``nan``. 

117 The following options are available: 

118 

119 * 'propagate': return ``nan`` 

120 * 'raise': raise an exception 

121 * 'omit': perform the calculation with ``nan`` values omitted 

122 

123 The default is 'propagate'. 

124 ddof : int, optional 

125 Gives the "Delta Degrees Of Freedom" used when computing the 

126 standard deviation. The divisor used in the calculation of the 

127 standard deviation is ``N - ddof``, where ``N`` is the number of 

128 elements. `ddof` must be less than ``N``; if it isn't, the result 

129 will be ``nan`` or ``inf``, depending on ``N`` and the values in 

130 the array. By default `ddof` is zero for backwards compatibility, 

131 but it is recommended to use ``ddof=1`` to ensure that the sample 

132 standard deviation is computed as the square root of the unbiased 

133 sample variance. 

134 keepdims : bool, optional 

135 If this is set to True, the axes which are reduced are left in the 

136 result as dimensions with size one. With this option, the result 

137 will broadcast correctly against the input array. 

138 

139 Returns 

140 ------- 

141 variation : ndarray 

142 The calculated variation along the requested axis. 

143 

144 Notes 

145 ----- 

146 There are several edge cases that are handled without generating a 

147 warning: 

148 

149 * If both the mean and the standard deviation are zero, ``nan`` 

150 is returned. 

151 * If the mean is zero and the standard deviation is nonzero, ``inf`` 

152 is returned. 

153 * If the input has length zero (either because the array has zero 

154 length, or all the input values are ``nan`` and ``nan_policy`` is 

155 ``'omit'``), ``nan`` is returned. 

156 * If the input contains ``inf``, ``nan`` is returned. 

157 

158 References 

159 ---------- 

160 .. [1] Zwillinger, D. and Kokoska, S. (2000). CRC Standard 

161 Probability and Statistics Tables and Formulae. Chapman & Hall: New 

162 York. 2000. 

163 

164 Examples 

165 -------- 

166 >>> import numpy as np 

167 >>> from scipy.stats import variation 

168 >>> variation([1, 2, 3, 4, 5], ddof=1) 

169 0.5270462766947299 

170 

171 Compute the variation along a given dimension of an array that contains 

172 a few ``nan`` values: 

173 

174 >>> x = np.array([[ 10.0, np.nan, 11.0, 19.0, 23.0, 29.0, 98.0], 

175 ... [ 29.0, 30.0, 32.0, 33.0, 35.0, 56.0, 57.0], 

176 ... [np.nan, np.nan, 12.0, 13.0, 16.0, 16.0, 17.0]]) 

177 >>> variation(x, axis=1, ddof=1, nan_policy='omit') 

178 array([1.05109361, 0.31428986, 0.146483 ]) 

179 

180 """ 

181 a, axis = _chk_asarray(a, axis) 

182 axis = normalize_axis_index(axis, ndim=a.ndim) 

183 n = a.shape[axis] 

184 

185 contains_nan, nan_policy = _contains_nan(a, nan_policy) 

186 if contains_nan and nan_policy == 'omit': 

187 return _nanvariation(a, axis=axis, ddof=ddof, keepdims=keepdims) 

188 

189 if a.size == 0 or ddof > n: 

190 # Handle as a special case to avoid spurious warnings. 

191 # The return values, if any, are all nan. 

192 shp = list(a.shape) 

193 if keepdims: 

194 shp[axis] = 1 

195 else: 

196 del shp[axis] 

197 if len(shp) == 0: 

198 result = np.nan 

199 else: 

200 result = np.full(shp, fill_value=np.nan) 

201 

202 return result 

203 

204 mean_a = a.mean(axis, keepdims=True) 

205 

206 if ddof == n: 

207 # Another special case. Result is either inf or nan. 

208 std_a = a.std(axis=axis, ddof=0, keepdims=True) 

209 result = np.full_like(std_a, fill_value=np.nan) 

210 result.flat[std_a.flat > 0] = (np.sign(mean_a) * np.inf).flat 

211 if result.shape == (): 

212 result = result[()] 

213 return result 

214 

215 with np.errstate(divide='ignore', invalid='ignore'): 

216 std_a = a.std(axis, ddof=ddof, keepdims=True) 

217 result = std_a / mean_a 

218 

219 if not keepdims: 

220 result = np.squeeze(result, axis=axis) 

221 if result.shape == (): 

222 result = result[()] 

223 

224 return result