Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_binomtest.py: 13%

121 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1from math import sqrt 

2import numpy as np 

3from scipy._lib._util import _validate_int 

4from scipy.optimize import brentq 

5from scipy.special import ndtri 

6from ._discrete_distns import binom 

7from ._common import ConfidenceInterval 

8 

9 

10class BinomTestResult: 

11 """ 

12 Result of `scipy.stats.binomtest`. 

13 

14 Attributes 

15 ---------- 

16 k : int 

17 The number of successes (copied from `binomtest` input). 

18 n : int 

19 The number of trials (copied from `binomtest` input). 

20 alternative : str 

21 Indicates the alternative hypothesis specified in the input 

22 to `binomtest`. It will be one of ``'two-sided'``, ``'greater'``, 

23 or ``'less'``. 

24 statistic: float 

25 The estimate of the proportion of successes. 

26 pvalue : float 

27 The p-value of the hypothesis test. 

28 

29 """ 

30 def __init__(self, k, n, alternative, statistic, pvalue): 

31 self.k = k 

32 self.n = n 

33 self.alternative = alternative 

34 self.statistic = statistic 

35 self.pvalue = pvalue 

36 

37 # add alias for backward compatibility 

38 self.proportion_estimate = statistic 

39 

40 def __repr__(self): 

41 s = ("BinomTestResult(" 

42 f"k={self.k}, " 

43 f"n={self.n}, " 

44 f"alternative={self.alternative!r}, " 

45 f"statistic={self.statistic}, " 

46 f"pvalue={self.pvalue})") 

47 return s 

48 

49 def proportion_ci(self, confidence_level=0.95, method='exact'): 

50 """ 

51 Compute the confidence interval for ``statistic``. 

52 

53 Parameters 

54 ---------- 

55 confidence_level : float, optional 

56 Confidence level for the computed confidence interval 

57 of the estimated proportion. Default is 0.95. 

58 method : {'exact', 'wilson', 'wilsoncc'}, optional 

59 Selects the method used to compute the confidence interval 

60 for the estimate of the proportion: 

61 

62 'exact' : 

63 Use the Clopper-Pearson exact method [1]_. 

64 'wilson' : 

65 Wilson's method, without continuity correction ([2]_, [3]_). 

66 'wilsoncc' : 

67 Wilson's method, with continuity correction ([2]_, [3]_). 

68 

69 Default is ``'exact'``. 

70 

71 Returns 

72 ------- 

73 ci : ``ConfidenceInterval`` object 

74 The object has attributes ``low`` and ``high`` that hold the 

75 lower and upper bounds of the confidence interval. 

76 

77 References 

78 ---------- 

79 .. [1] C. J. Clopper and E. S. Pearson, The use of confidence or 

80 fiducial limits illustrated in the case of the binomial, 

81 Biometrika, Vol. 26, No. 4, pp 404-413 (Dec. 1934). 

82 .. [2] E. B. Wilson, Probable inference, the law of succession, and 

83 statistical inference, J. Amer. Stat. Assoc., 22, pp 209-212 

84 (1927). 

85 .. [3] Robert G. Newcombe, Two-sided confidence intervals for the 

86 single proportion: comparison of seven methods, Statistics 

87 in Medicine, 17, pp 857-872 (1998). 

88 

89 Examples 

90 -------- 

91 >>> from scipy.stats import binomtest 

92 >>> result = binomtest(k=7, n=50, p=0.1) 

93 >>> result.statistic 

94 0.14 

95 >>> result.proportion_ci() 

96 ConfidenceInterval(low=0.05819170033997342, high=0.26739600249700846) 

97 """ 

98 if method not in ('exact', 'wilson', 'wilsoncc'): 

99 raise ValueError("method must be one of 'exact', 'wilson' or " 

100 "'wilsoncc'.") 

101 if not (0 <= confidence_level <= 1): 

102 raise ValueError('confidence_level must be in the interval ' 

103 '[0, 1].') 

104 if method == 'exact': 

105 low, high = _binom_exact_conf_int(self.k, self.n, 

106 confidence_level, 

107 self.alternative) 

108 else: 

109 # method is 'wilson' or 'wilsoncc' 

110 low, high = _binom_wilson_conf_int(self.k, self.n, 

111 confidence_level, 

112 self.alternative, 

113 correction=method == 'wilsoncc') 

114 return ConfidenceInterval(low=low, high=high) 

115 

116 

117def _findp(func): 

118 try: 

119 p = brentq(func, 0, 1) 

120 except RuntimeError: 

121 raise RuntimeError('numerical solver failed to converge when ' 

122 'computing the confidence limits') from None 

123 except ValueError as exc: 

124 raise ValueError('brentq raised a ValueError; report this to the ' 

125 'SciPy developers') from exc 

126 return p 

127 

128 

129def _binom_exact_conf_int(k, n, confidence_level, alternative): 

130 """ 

131 Compute the estimate and confidence interval for the binomial test. 

132 

133 Returns proportion, prop_low, prop_high 

134 """ 

135 if alternative == 'two-sided': 

136 alpha = (1 - confidence_level) / 2 

137 if k == 0: 

138 plow = 0.0 

139 else: 

140 plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha) 

141 if k == n: 

142 phigh = 1.0 

143 else: 

144 phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha) 

145 elif alternative == 'less': 

146 alpha = 1 - confidence_level 

147 plow = 0.0 

148 if k == n: 

149 phigh = 1.0 

150 else: 

151 phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha) 

152 elif alternative == 'greater': 

153 alpha = 1 - confidence_level 

154 if k == 0: 

155 plow = 0.0 

156 else: 

157 plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha) 

158 phigh = 1.0 

159 return plow, phigh 

160 

161 

162def _binom_wilson_conf_int(k, n, confidence_level, alternative, correction): 

163 # This function assumes that the arguments have already been validated. 

164 # In particular, `alternative` must be one of 'two-sided', 'less' or 

165 # 'greater'. 

166 p = k / n 

167 if alternative == 'two-sided': 

168 z = ndtri(0.5 + 0.5*confidence_level) 

169 else: 

170 z = ndtri(confidence_level) 

171 

172 # For reference, the formulas implemented here are from 

173 # Newcombe (1998) (ref. [3] in the proportion_ci docstring). 

174 denom = 2*(n + z**2) 

175 center = (2*n*p + z**2)/denom 

176 q = 1 - p 

177 if correction: 

178 if alternative == 'less' or k == 0: 

179 lo = 0.0 

180 else: 

181 dlo = (1 + z*sqrt(z**2 - 2 - 1/n + 4*p*(n*q + 1))) / denom 

182 lo = center - dlo 

183 if alternative == 'greater' or k == n: 

184 hi = 1.0 

185 else: 

186 dhi = (1 + z*sqrt(z**2 + 2 - 1/n + 4*p*(n*q - 1))) / denom 

187 hi = center + dhi 

188 else: 

189 delta = z/denom * sqrt(4*n*p*q + z**2) 

190 if alternative == 'less' or k == 0: 

191 lo = 0.0 

192 else: 

193 lo = center - delta 

194 if alternative == 'greater' or k == n: 

195 hi = 1.0 

196 else: 

197 hi = center + delta 

198 

199 return lo, hi 

200 

201 

202def binomtest(k, n, p=0.5, alternative='two-sided'): 

203 """ 

204 Perform a test that the probability of success is p. 

205 

206 The binomial test [1]_ is a test of the null hypothesis that the 

207 probability of success in a Bernoulli experiment is `p`. 

208 

209 Details of the test can be found in many texts on statistics, such 

210 as section 24.5 of [2]_. 

211 

212 Parameters 

213 ---------- 

214 k : int 

215 The number of successes. 

216 n : int 

217 The number of trials. 

218 p : float, optional 

219 The hypothesized probability of success, i.e. the expected 

220 proportion of successes. The value must be in the interval 

221 ``0 <= p <= 1``. The default value is ``p = 0.5``. 

222 alternative : {'two-sided', 'greater', 'less'}, optional 

223 Indicates the alternative hypothesis. The default value is 

224 'two-sided'. 

225 

226 Returns 

227 ------- 

228 result : `~scipy.stats._result_classes.BinomTestResult` instance 

229 The return value is an object with the following attributes: 

230 

231 k : int 

232 The number of successes (copied from `binomtest` input). 

233 n : int 

234 The number of trials (copied from `binomtest` input). 

235 alternative : str 

236 Indicates the alternative hypothesis specified in the input 

237 to `binomtest`. It will be one of ``'two-sided'``, ``'greater'``, 

238 or ``'less'``. 

239 statistic : float 

240 The estimate of the proportion of successes. 

241 pvalue : float 

242 The p-value of the hypothesis test. 

243 

244 The object has the following methods: 

245 

246 proportion_ci(confidence_level=0.95, method='exact') : 

247 Compute the confidence interval for ``statistic``. 

248 

249 Notes 

250 ----- 

251 .. versionadded:: 1.7.0 

252 

253 References 

254 ---------- 

255 .. [1] Binomial test, https://en.wikipedia.org/wiki/Binomial_test 

256 .. [2] Jerrold H. Zar, Biostatistical Analysis (fifth edition), 

257 Prentice Hall, Upper Saddle River, New Jersey USA (2010) 

258 

259 Examples 

260 -------- 

261 >>> from scipy.stats import binomtest 

262 

263 A car manufacturer claims that no more than 10% of their cars are unsafe. 

264 15 cars are inspected for safety, 3 were found to be unsafe. Test the 

265 manufacturer's claim: 

266 

267 >>> result = binomtest(3, n=15, p=0.1, alternative='greater') 

268 >>> result.pvalue 

269 0.18406106910639114 

270 

271 The null hypothesis cannot be rejected at the 5% level of significance 

272 because the returned p-value is greater than the critical value of 5%. 

273 

274 The test statistic is equal to the estimated proportion, which is simply 

275 ``3/15``: 

276 

277 >>> result.statistic 

278 0.2 

279 

280 We can use the `proportion_ci()` method of the result to compute the 

281 confidence interval of the estimate: 

282 

283 >>> result.proportion_ci(confidence_level=0.95) 

284 ConfidenceInterval(low=0.05684686759024681, high=1.0) 

285 

286 """ 

287 k = _validate_int(k, 'k', minimum=0) 

288 n = _validate_int(n, 'n', minimum=1) 

289 if k > n: 

290 raise ValueError('k must not be greater than n.') 

291 

292 if not (0 <= p <= 1): 

293 raise ValueError("p must be in range [0,1]") 

294 

295 if alternative not in ('two-sided', 'less', 'greater'): 

296 raise ValueError("alternative not recognized; \n" 

297 "must be 'two-sided', 'less' or 'greater'") 

298 if alternative == 'less': 

299 pval = binom.cdf(k, n, p) 

300 elif alternative == 'greater': 

301 pval = binom.sf(k-1, n, p) 

302 else: 

303 # alternative is 'two-sided' 

304 d = binom.pmf(k, n, p) 

305 rerr = 1 + 1e-7 

306 if k == p * n: 

307 # special case as shortcut, would also be handled by `else` below 

308 pval = 1. 

309 elif k < p * n: 

310 ix = _binary_search_for_binom_tst(lambda x1: -binom.pmf(x1, n, p), 

311 -d*rerr, np.ceil(p * n), n) 

312 # y is the number of terms between mode and n that are <= d*rerr. 

313 # ix gave us the first term where a(ix) <= d*rerr < a(ix-1) 

314 # if the first equality doesn't hold, y=n-ix. Otherwise, we 

315 # need to include ix as well as the equality holds. Note that 

316 # the equality will hold in very very rare situations due to rerr. 

317 y = n - ix + int(d*rerr == binom.pmf(ix, n, p)) 

318 pval = binom.cdf(k, n, p) + binom.sf(n - y, n, p) 

319 else: 

320 ix = _binary_search_for_binom_tst(lambda x1: binom.pmf(x1, n, p), 

321 d*rerr, 0, np.floor(p * n)) 

322 # y is the number of terms between 0 and mode that are <= d*rerr. 

323 # we need to add a 1 to account for the 0 index. 

324 # For comparing this with old behavior, see 

325 # tst_binary_srch_for_binom_tst method in test_morestats. 

326 y = ix + 1 

327 pval = binom.cdf(y-1, n, p) + binom.sf(k-1, n, p) 

328 

329 pval = min(1.0, pval) 

330 

331 result = BinomTestResult(k=k, n=n, alternative=alternative, 

332 statistic=k/n, pvalue=pval) 

333 return result 

334 

335 

336def _binary_search_for_binom_tst(a, d, lo, hi): 

337 """ 

338 Conducts an implicit binary search on a function specified by `a`. 

339 

340 Meant to be used on the binomial PMF for the case of two-sided tests 

341 to obtain the value on the other side of the mode where the tail 

342 probability should be computed. The values on either side of 

343 the mode are always in order, meaning binary search is applicable. 

344 

345 Parameters 

346 ---------- 

347 a : callable 

348 The function over which to perform binary search. Its values 

349 for inputs lo and hi should be in ascending order. 

350 d : float 

351 The value to search. 

352 lo : int 

353 The lower end of range to search. 

354 hi : int 

355 The higher end of the range to search. 

356 

357 Returns 

358 ------- 

359 int 

360 The index, i between lo and hi 

361 such that a(i)<=d<a(i+1) 

362 """ 

363 while lo < hi: 

364 mid = lo + (hi-lo)//2 

365 midval = a(mid) 

366 if midval < d: 

367 lo = mid+1 

368 elif midval > d: 

369 hi = mid-1 

370 else: 

371 return mid 

372 if a(lo) <= d: 

373 return lo 

374 else: 

375 return lo-1