Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_entropy.py: 16%

88 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1# -*- coding: utf-8 -*- 

2""" 

3Created on Fri Apr 2 09:06:05 2021 

4 

5@author: matth 

6""" 

7 

8from __future__ import annotations 

9import math 

10import numpy as np 

11from scipy import special 

12from typing import Optional, Union 

13 

14__all__ = ['entropy', 'differential_entropy'] 

15 

16 

17def entropy(pk: np.typing.ArrayLike, 

18 qk: Optional[np.typing.ArrayLike] = None, 

19 base: Optional[float] = None, 

20 axis: int = 0 

21 ) -> Union[np.number, np.ndarray]: 

22 """ 

23 Calculate the Shannon entropy/relative entropy of given distribution(s). 

24 

25 If only probabilities `pk` are given, the Shannon entropy is calculated as 

26 ``H = -sum(pk * log(pk))``. 

27 

28 If `qk` is not None, then compute the relative entropy 

29 ``D = sum(pk * log(pk / qk))``. This quantity is also known 

30 as the Kullback-Leibler divergence. 

31 

32 This routine will normalize `pk` and `qk` if they don't sum to 1. 

33 

34 Parameters 

35 ---------- 

36 pk : array_like 

37 Defines the (discrete) distribution. Along each axis-slice of ``pk``, 

38 element ``i`` is the (possibly unnormalized) probability of event 

39 ``i``. 

40 qk : array_like, optional 

41 Sequence against which the relative entropy is computed. Should be in 

42 the same format as `pk`. 

43 base : float, optional 

44 The logarithmic base to use, defaults to ``e`` (natural logarithm). 

45 axis : int, optional 

46 The axis along which the entropy is calculated. Default is 0. 

47 

48 Returns 

49 ------- 

50 S : {float, array_like} 

51 The calculated entropy. 

52 

53 Notes 

54 ----- 

55 Informally, the Shannon entropy quantifies the expected uncertainty 

56 inherent in the possible outcomes of a discrete random variable. 

57 For example, 

58 if messages consisting of sequences of symbols from a set are to be 

59 encoded and transmitted over a noiseless channel, then the Shannon entropy 

60 ``H(pk)`` gives a tight lower bound for the average number of units of 

61 information needed per symbol if the symbols occur with frequencies 

62 governed by the discrete distribution `pk` [1]_. The choice of base 

63 determines the choice of units; e.g., ``e`` for nats, ``2`` for bits, etc. 

64 

65 The relative entropy, ``D(pk|qk)``, quantifies the increase in the average 

66 number of units of information needed per symbol if the encoding is 

67 optimized for the probability distribution `qk` instead of the true 

68 distribution `pk`. Informally, the relative entropy quantifies the expected 

69 excess in surprise experienced if one believes the true distribution is 

70 `qk` when it is actually `pk`. 

71 

72 A related quantity, the cross entropy ``CE(pk, qk)``, satisfies the 

73 equation ``CE(pk, qk) = H(pk) + D(pk|qk)`` and can also be calculated with 

74 the formula ``CE = -sum(pk * log(qk))``. It gives the average 

75 number of units of information needed per symbol if an encoding is 

76 optimized for the probability distribution `qk` when the true distribution 

77 is `pk`. It is not computed directly by `entropy`, but it can be computed 

78 using two calls to the function (see Examples). 

79 

80 See [2]_ for more information. 

81 

82 References 

83 ---------- 

84 .. [1] Shannon, C.E. (1948), A Mathematical Theory of Communication. 

85 Bell System Technical Journal, 27: 379-423. 

86 https://doi.org/10.1002/j.1538-7305.1948.tb01338.x 

87 .. [2] Thomas M. Cover and Joy A. Thomas. 2006. Elements of Information 

88 Theory (Wiley Series in Telecommunications and Signal Processing). 

89 Wiley-Interscience, USA. 

90 

91 

92 Examples 

93 -------- 

94 The outcome of a fair coin is the most uncertain: 

95 

96 >>> import numpy as np 

97 >>> from scipy.stats import entropy 

98 >>> base = 2 # work in units of bits 

99 >>> pk = np.array([1/2, 1/2]) # fair coin 

100 >>> H = entropy(pk, base=base) 

101 >>> H 

102 1.0 

103 >>> H == -np.sum(pk * np.log(pk)) / np.log(base) 

104 True 

105 

106 The outcome of a biased coin is less uncertain: 

107 

108 >>> qk = np.array([9/10, 1/10]) # biased coin 

109 >>> entropy(qk, base=base) 

110 0.46899559358928117 

111 

112 The relative entropy between the fair coin and biased coin is calculated 

113 as: 

114 

115 >>> D = entropy(pk, qk, base=base) 

116 >>> D 

117 0.7369655941662062 

118 >>> D == np.sum(pk * np.log(pk/qk)) / np.log(base) 

119 True 

120 

121 The cross entropy can be calculated as the sum of the entropy and 

122 relative entropy`: 

123 

124 >>> CE = entropy(pk, base=base) + entropy(pk, qk, base=base) 

125 >>> CE 

126 1.736965594166206 

127 >>> CE == -np.sum(pk * np.log(qk)) / np.log(base) 

128 True 

129 

130 """ 

131 if base is not None and base <= 0: 

132 raise ValueError("`base` must be a positive number or `None`.") 

133 

134 pk = np.asarray(pk) 

135 pk = 1.0*pk / np.sum(pk, axis=axis, keepdims=True) 

136 if qk is None: 

137 vec = special.entr(pk) 

138 else: 

139 qk = np.asarray(qk) 

140 pk, qk = np.broadcast_arrays(pk, qk) 

141 qk = 1.0*qk / np.sum(qk, axis=axis, keepdims=True) 

142 vec = special.rel_entr(pk, qk) 

143 S = np.sum(vec, axis=axis) 

144 if base is not None: 

145 S /= np.log(base) 

146 return S 

147 

148 

149def differential_entropy( 

150 values: np.typing.ArrayLike, 

151 *, 

152 window_length: Optional[int] = None, 

153 base: Optional[float] = None, 

154 axis: int = 0, 

155 method: str = "auto", 

156) -> Union[np.number, np.ndarray]: 

157 r"""Given a sample of a distribution, estimate the differential entropy. 

158 

159 Several estimation methods are available using the `method` parameter. By 

160 default, a method is selected based the size of the sample. 

161 

162 Parameters 

163 ---------- 

164 values : sequence 

165 Sample from a continuous distribution. 

166 window_length : int, optional 

167 Window length for computing Vasicek estimate. Must be an integer 

168 between 1 and half of the sample size. If ``None`` (the default), it 

169 uses the heuristic value 

170 

171 .. math:: 

172 \left \lfloor \sqrt{n} + 0.5 \right \rfloor 

173 

174 where :math:`n` is the sample size. This heuristic was originally 

175 proposed in [2]_ and has become common in the literature. 

176 base : float, optional 

177 The logarithmic base to use, defaults to ``e`` (natural logarithm). 

178 axis : int, optional 

179 The axis along which the differential entropy is calculated. 

180 Default is 0. 

181 method : {'vasicek', 'van es', 'ebrahimi', 'correa', 'auto'}, optional 

182 The method used to estimate the differential entropy from the sample. 

183 Default is ``'auto'``. See Notes for more information. 

184 

185 Returns 

186 ------- 

187 entropy : float 

188 The calculated differential entropy. 

189 

190 Notes 

191 ----- 

192 This function will converge to the true differential entropy in the limit 

193 

194 .. math:: 

195 n \to \infty, \quad m \to \infty, \quad \frac{m}{n} \to 0 

196 

197 The optimal choice of ``window_length`` for a given sample size depends on 

198 the (unknown) distribution. Typically, the smoother the density of the 

199 distribution, the larger the optimal value of ``window_length`` [1]_. 

200 

201 The following options are available for the `method` parameter. 

202 

203 * ``'vasicek'`` uses the estimator presented in [1]_. This is 

204 one of the first and most influential estimators of differential entropy. 

205 * ``'van es'`` uses the bias-corrected estimator presented in [3]_, which 

206 is not only consistent but, under some conditions, asymptotically normal. 

207 * ``'ebrahimi'`` uses an estimator presented in [4]_, which was shown 

208 in simulation to have smaller bias and mean squared error than 

209 the Vasicek estimator. 

210 * ``'correa'`` uses the estimator presented in [5]_ based on local linear 

211 regression. In a simulation study, it had consistently smaller mean 

212 square error than the Vasiceck estimator, but it is more expensive to 

213 compute. 

214 * ``'auto'`` selects the method automatically (default). Currently, 

215 this selects ``'van es'`` for very small samples (<10), ``'ebrahimi'`` 

216 for moderate sample sizes (11-1000), and ``'vasicek'`` for larger 

217 samples, but this behavior is subject to change in future versions. 

218 

219 All estimators are implemented as described in [6]_. 

220 

221 References 

222 ---------- 

223 .. [1] Vasicek, O. (1976). A test for normality based on sample entropy. 

224 Journal of the Royal Statistical Society: 

225 Series B (Methodological), 38(1), 54-59. 

226 .. [2] Crzcgorzewski, P., & Wirczorkowski, R. (1999). Entropy-based 

227 goodness-of-fit test for exponentiality. Communications in 

228 Statistics-Theory and Methods, 28(5), 1183-1202. 

229 .. [3] Van Es, B. (1992). Estimating functionals related to a density by a 

230 class of statistics based on spacings. Scandinavian Journal of 

231 Statistics, 61-72. 

232 .. [4] Ebrahimi, N., Pflughoeft, K., & Soofi, E. S. (1994). Two measures 

233 of sample entropy. Statistics & Probability Letters, 20(3), 225-234. 

234 .. [5] Correa, J. C. (1995). A new estimator of entropy. Communications 

235 in Statistics-Theory and Methods, 24(10), 2439-2449. 

236 .. [6] Noughabi, H. A. (2015). Entropy Estimation Using Numerical Methods. 

237 Annals of Data Science, 2(2), 231-241. 

238 https://link.springer.com/article/10.1007/s40745-015-0045-9 

239 

240 Examples 

241 -------- 

242 >>> import numpy as np 

243 >>> from scipy.stats import differential_entropy, norm 

244 

245 Entropy of a standard normal distribution: 

246 

247 >>> rng = np.random.default_rng() 

248 >>> values = rng.standard_normal(100) 

249 >>> differential_entropy(values) 

250 1.3407817436640392 

251 

252 Compare with the true entropy: 

253 

254 >>> float(norm.entropy()) 

255 1.4189385332046727 

256 

257 For several sample sizes between 5 and 1000, compare the accuracy of 

258 the ``'vasicek'``, ``'van es'``, and ``'ebrahimi'`` methods. Specifically, 

259 compare the root mean squared error (over 1000 trials) between the estimate 

260 and the true differential entropy of the distribution. 

261 

262 >>> from scipy import stats 

263 >>> import matplotlib.pyplot as plt 

264 >>> 

265 >>> 

266 >>> def rmse(res, expected): 

267 ... '''Root mean squared error''' 

268 ... return np.sqrt(np.mean((res - expected)**2)) 

269 >>> 

270 >>> 

271 >>> a, b = np.log10(5), np.log10(1000) 

272 >>> ns = np.round(np.logspace(a, b, 10)).astype(int) 

273 >>> reps = 1000 # number of repetitions for each sample size 

274 >>> expected = stats.expon.entropy() 

275 >>> 

276 >>> method_errors = {'vasicek': [], 'van es': [], 'ebrahimi': []} 

277 >>> for method in method_errors: 

278 ... for n in ns: 

279 ... rvs = stats.expon.rvs(size=(reps, n), random_state=rng) 

280 ... res = stats.differential_entropy(rvs, method=method, axis=-1) 

281 ... error = rmse(res, expected) 

282 ... method_errors[method].append(error) 

283 >>> 

284 >>> for method, errors in method_errors.items(): 

285 ... plt.loglog(ns, errors, label=method) 

286 >>> 

287 >>> plt.legend() 

288 >>> plt.xlabel('sample size') 

289 >>> plt.ylabel('RMSE (1000 trials)') 

290 >>> plt.title('Entropy Estimator Error (Exponential Distribution)') 

291 

292 """ 

293 values = np.asarray(values) 

294 values = np.moveaxis(values, axis, -1) 

295 n = values.shape[-1] # number of observations 

296 

297 if window_length is None: 

298 window_length = math.floor(math.sqrt(n) + 0.5) 

299 

300 if not 2 <= 2 * window_length < n: 

301 raise ValueError( 

302 f"Window length ({window_length}) must be positive and less " 

303 f"than half the sample size ({n}).", 

304 ) 

305 

306 if base is not None and base <= 0: 

307 raise ValueError("`base` must be a positive number or `None`.") 

308 

309 sorted_data = np.sort(values, axis=-1) 

310 

311 methods = {"vasicek": _vasicek_entropy, 

312 "van es": _van_es_entropy, 

313 "correa": _correa_entropy, 

314 "ebrahimi": _ebrahimi_entropy, 

315 "auto": _vasicek_entropy} 

316 method = method.lower() 

317 if method not in methods: 

318 message = f"`method` must be one of {set(methods)}" 

319 raise ValueError(message) 

320 

321 if method == "auto": 

322 if n <= 10: 

323 method = 'van es' 

324 elif n <= 1000: 

325 method = 'ebrahimi' 

326 else: 

327 method = 'vasicek' 

328 

329 res = methods[method](sorted_data, window_length) 

330 

331 if base is not None: 

332 res /= np.log(base) 

333 

334 return res 

335 

336 

337def _pad_along_last_axis(X, m): 

338 """Pad the data for computing the rolling window difference.""" 

339 # scales a bit better than method in _vasicek_like_entropy 

340 shape = np.array(X.shape) 

341 shape[-1] = m 

342 Xl = np.broadcast_to(X[..., [0]], shape) # [0] vs 0 to maintain shape 

343 Xr = np.broadcast_to(X[..., [-1]], shape) 

344 return np.concatenate((Xl, X, Xr), axis=-1) 

345 

346 

347def _vasicek_entropy(X, m): 

348 """Compute the Vasicek estimator as described in [6] Eq. 1.3.""" 

349 n = X.shape[-1] 

350 X = _pad_along_last_axis(X, m) 

351 differences = X[..., 2 * m:] - X[..., : -2 * m:] 

352 logs = np.log(n/(2*m) * differences) 

353 return np.mean(logs, axis=-1) 

354 

355 

356def _van_es_entropy(X, m): 

357 """Compute the van Es estimator as described in [6].""" 

358 # No equation number, but referred to as HVE_mn. 

359 # Typo: there should be a log within the summation. 

360 n = X.shape[-1] 

361 difference = X[..., m:] - X[..., :-m] 

362 term1 = 1/(n-m) * np.sum(np.log((n+1)/m * difference), axis=-1) 

363 k = np.arange(m, n+1) 

364 return term1 + np.sum(1/k) + np.log(m) - np.log(n+1) 

365 

366 

367def _ebrahimi_entropy(X, m): 

368 """Compute the Ebrahimi estimator as described in [6].""" 

369 # No equation number, but referred to as HE_mn 

370 n = X.shape[-1] 

371 X = _pad_along_last_axis(X, m) 

372 

373 differences = X[..., 2 * m:] - X[..., : -2 * m:] 

374 

375 i = np.arange(1, n+1).astype(float) 

376 ci = np.ones_like(i)*2 

377 ci[i <= m] = 1 + (i[i <= m] - 1)/m 

378 ci[i >= n - m + 1] = 1 + (n - i[i >= n-m+1])/m 

379 

380 logs = np.log(n * differences / (ci * m)) 

381 return np.mean(logs, axis=-1) 

382 

383 

384def _correa_entropy(X, m): 

385 """Compute the Correa estimator as described in [6].""" 

386 # No equation number, but referred to as HC_mn 

387 n = X.shape[-1] 

388 X = _pad_along_last_axis(X, m) 

389 

390 i = np.arange(1, n+1) 

391 dj = np.arange(-m, m+1)[:, None] 

392 j = i + dj 

393 j0 = j + m - 1 # 0-indexed version of j 

394 

395 Xibar = np.mean(X[..., j0], axis=-2, keepdims=True) 

396 difference = X[..., j0] - Xibar 

397 num = np.sum(difference*dj, axis=-2) # dj is d-i 

398 den = n*np.sum(difference**2, axis=-2) 

399 return -np.mean(np.log(num/den), axis=-1)