Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/

1from math import sqrt

2import numpy as np

3from scipy._lib._util import _validate_int

4from scipy.optimize import brentq

5from scipy.special import ndtri

6from ._discrete_distns import binom

7from ._common import ConfidenceInterval

10class BinomTestResult:

11 """

12 Result of `scipy.stats.binomtest`.

14 Attributes

15 ----------

16 k : int

17 The number of successes (copied from `binomtest` input).

18 n : int

19 The number of trials (copied from `binomtest` input).

20 alternative : str

21 Indicates the alternative hypothesis specified in the input

22 to `binomtest`. It will be one of ``'two-sided'``, ``'greater'``,

23 or ``'less'``.

24 statistic: float

25 The estimate of the proportion of successes.

26 pvalue : float

27 The p-value of the hypothesis test.

29 """

30 def __init__(self, k, n, alternative, statistic, pvalue):

31 self.k = k

32 self.n = n

33 self.alternative = alternative

34 self.statistic = statistic

35 self.pvalue = pvalue

37 # add alias for backward compatibility

38 self.proportion_estimate = statistic

40 def __repr__(self):

41 s = ("BinomTestResult("

42 f"k={self.k}, "

43 f"n={self.n}, "

44 f"alternative={self.alternative!r}, "

45 f"statistic={self.statistic}, "

46 f"pvalue={self.pvalue})")

47 return s

49 def proportion_ci(self, confidence_level=0.95, method='exact'):

50 """

51 Compute the confidence interval for ``statistic``.

53 Parameters

54 ----------

55 confidence_level : float, optional

56 Confidence level for the computed confidence interval

57 of the estimated proportion. Default is 0.95.

58 method : {'exact', 'wilson', 'wilsoncc'}, optional

59 Selects the method used to compute the confidence interval

60 for the estimate of the proportion:

62 'exact' :

63 Use the Clopper-Pearson exact method [1]_.

64 'wilson' :

65 Wilson's method, without continuity correction ([2]_, [3]_).

66 'wilsoncc' :

67 Wilson's method, with continuity correction ([2]_, [3]_).

69 Default is ``'exact'``.

71 Returns

72 -------

73 ci : ``ConfidenceInterval`` object

74 The object has attributes ``low`` and ``high`` that hold the

75 lower and upper bounds of the confidence interval.

77 References

78 ----------

79 .. [1] C. J. Clopper and E. S. Pearson, The use of confidence or

80 fiducial limits illustrated in the case of the binomial,

81 Biometrika, Vol. 26, No. 4, pp 404-413 (Dec. 1934).

82 .. [2] E. B. Wilson, Probable inference, the law of succession, and

83 statistical inference, J. Amer. Stat. Assoc., 22, pp 209-212

84 (1927).

85 .. [3] Robert G. Newcombe, Two-sided confidence intervals for the

86 single proportion: comparison of seven methods, Statistics

87 in Medicine, 17, pp 857-872 (1998).

89 Examples

90 --------

91 >>> from scipy.stats import binomtest

92 >>> result = binomtest(k=7, n=50, p=0.1)

93 >>> result.statistic

94 0.14

95 >>> result.proportion_ci()

96 ConfidenceInterval(low=0.05819170033997342, high=0.26739600249700846)

97 """

98 if method not in ('exact', 'wilson', 'wilsoncc'):

99 raise ValueError("method must be one of 'exact', 'wilson' or "

100 "'wilsoncc'.")

101 if not (0 <= confidence_level <= 1):

102 raise ValueError('confidence_level must be in the interval '

103 '[0, 1].')

104 if method == 'exact':

105 low, high = _binom_exact_conf_int(self.k, self.n,

106 confidence_level,

107 self.alternative)

108 else:

109 # method is 'wilson' or 'wilsoncc'

110 low, high = _binom_wilson_conf_int(self.k, self.n,

111 confidence_level,

112 self.alternative,

113 correction=method == 'wilsoncc')

114 return ConfidenceInterval(low=low, high=high)

115

116

117def _findp(func):

118 try:

119 p = brentq(func, 0, 1)

120 except RuntimeError:

121 raise RuntimeError('numerical solver failed to converge when '

122 'computing the confidence limits') from None

123 except ValueError as exc:

124 raise ValueError('brentq raised a ValueError; report this to the '

125 'SciPy developers') from exc

126 return p

127

128

129def _binom_exact_conf_int(k, n, confidence_level, alternative):

130 """

131 Compute the estimate and confidence interval for the binomial test.

132

133 Returns proportion, prop_low, prop_high

134 """

135 if alternative == 'two-sided':

136 alpha = (1 - confidence_level) / 2

137 if k == 0:

138 plow = 0.0

139 else:

140 plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha)

141 if k == n:

142 phigh = 1.0

143 else:

144 phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha)

145 elif alternative == 'less':

146 alpha = 1 - confidence_level

147 plow = 0.0

148 if k == n:

149 phigh = 1.0

150 else:

151 phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha)

152 elif alternative == 'greater':

153 alpha = 1 - confidence_level

154 if k == 0:

155 plow = 0.0

156 else:

157 plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha)

158 phigh = 1.0

159 return plow, phigh

160

161

162def _binom_wilson_conf_int(k, n, confidence_level, alternative, correction):

163 # This function assumes that the arguments have already been validated.

164 # In particular, `alternative` must be one of 'two-sided', 'less' or

165 # 'greater'.

166 p = k / n

167 if alternative == 'two-sided':

168 z = ndtri(0.5 + 0.5*confidence_level)

169 else:

170 z = ndtri(confidence_level)

171

172 # For reference, the formulas implemented here are from

173 # Newcombe (1998) (ref. [3] in the proportion_ci docstring).

174 denom = 2*(n + z**2)

175 center = (2*n*p + z**2)/denom

176 q = 1 - p

177 if correction:

178 if alternative == 'less' or k == 0:

179 lo = 0.0

180 else:

181 dlo = (1 + z*sqrt(z**2 - 2 - 1/n + 4*p*(n*q + 1))) / denom

182 lo = center - dlo

183 if alternative == 'greater' or k == n:

184 hi = 1.0

185 else:

186 dhi = (1 + z*sqrt(z**2 + 2 - 1/n + 4*p*(n*q - 1))) / denom

187 hi = center + dhi

188 else:

189 delta = z/denom * sqrt(4*n*p*q + z**2)

190 if alternative == 'less' or k == 0:

191 lo = 0.0

192 else:

193 lo = center - delta

194 if alternative == 'greater' or k == n:

195 hi = 1.0

196 else:

197 hi = center + delta

198

199 return lo, hi

200

201

202def binomtest(k, n, p=0.5, alternative='two-sided'):

203 """

204 Perform a test that the probability of success is p.

205

206 The binomial test [1]_ is a test of the null hypothesis that the

207 probability of success in a Bernoulli experiment is `p`.

208

209 Details of the test can be found in many texts on statistics, such

210 as section 24.5 of [2]_.

211

212 Parameters

213 ----------

214 k : int

215 The number of successes.

216 n : int

217 The number of trials.

218 p : float, optional

219 The hypothesized probability of success, i.e. the expected

220 proportion of successes. The value must be in the interval

221 ``0 <= p <= 1``. The default value is ``p = 0.5``.

222 alternative : {'two-sided', 'greater', 'less'}, optional

223 Indicates the alternative hypothesis. The default value is

224 'two-sided'.

225

226 Returns

227 -------

228 result : `~scipy.stats._result_classes.BinomTestResult` instance

229 The return value is an object with the following attributes:

230

231 k : int

232 The number of successes (copied from `binomtest` input).

233 n : int

234 The number of trials (copied from `binomtest` input).

235 alternative : str

236 Indicates the alternative hypothesis specified in the input

237 to `binomtest`. It will be one of ``'two-sided'``, ``'greater'``,

238 or ``'less'``.

239 statistic : float

240 The estimate of the proportion of successes.

241 pvalue : float

242 The p-value of the hypothesis test.

243

244 The object has the following methods:

245

246 proportion_ci(confidence_level=0.95, method='exact') :

247 Compute the confidence interval for ``statistic``.

248

249 Notes

250 -----

251 .. versionadded:: 1.7.0

252

253 References

254 ----------

255 .. [1] Binomial test, https://en.wikipedia.org/wiki/Binomial_test

256 .. [2] Jerrold H. Zar, Biostatistical Analysis (fifth edition),

257 Prentice Hall, Upper Saddle River, New Jersey USA (2010)

258

259 Examples

260 --------

261 >>> from scipy.stats import binomtest

262

263 A car manufacturer claims that no more than 10% of their cars are unsafe.

264 15 cars are inspected for safety, 3 were found to be unsafe. Test the

265 manufacturer's claim:

266

267 >>> result = binomtest(3, n=15, p=0.1, alternative='greater')

268 >>> result.pvalue

269 0.18406106910639114

270

271 The null hypothesis cannot be rejected at the 5% level of significance

272 because the returned p-value is greater than the critical value of 5%.

273

274 The test statistic is equal to the estimated proportion, which is simply

275 ``3/15``:

276

277 >>> result.statistic

278 0.2

279

280 We can use the `proportion_ci()` method of the result to compute the

281 confidence interval of the estimate:

282

283 >>> result.proportion_ci(confidence_level=0.95)

284 ConfidenceInterval(low=0.05684686759024681, high=1.0)

285

286 """

287 k = _validate_int(k, 'k', minimum=0)

288 n = _validate_int(n, 'n', minimum=1)

289 if k > n:

290 raise ValueError('k must not be greater than n.')

291

292 if not (0 <= p <= 1):

293 raise ValueError("p must be in range [0,1]")

294

295 if alternative not in ('two-sided', 'less', 'greater'):

296 raise ValueError("alternative not recognized; \n"

297 "must be 'two-sided', 'less' or 'greater'")

298 if alternative == 'less':

299 pval = binom.cdf(k, n, p)

300 elif alternative == 'greater':

301 pval = binom.sf(k-1, n, p)

302 else:

303 # alternative is 'two-sided'

304 d = binom.pmf(k, n, p)

305 rerr = 1 + 1e-7

306 if k == p * n:

307 # special case as shortcut, would also be handled by `else` below

308 pval = 1.

309 elif k < p * n:

310 ix = _binary_search_for_binom_tst(lambda x1: -binom.pmf(x1, n, p),

311 -d*rerr, np.ceil(p * n), n)

312 # y is the number of terms between mode and n that are <= d*rerr.

313 # ix gave us the first term where a(ix) <= d*rerr < a(ix-1)

314 # if the first equality doesn't hold, y=n-ix. Otherwise, we

315 # need to include ix as well as the equality holds. Note that

316 # the equality will hold in very very rare situations due to rerr.

317 y = n - ix + int(d*rerr == binom.pmf(ix, n, p))

318 pval = binom.cdf(k, n, p) + binom.sf(n - y, n, p)

319 else:

320 ix = _binary_search_for_binom_tst(lambda x1: binom.pmf(x1, n, p),

321 d*rerr, 0, np.floor(p * n))

322 # y is the number of terms between 0 and mode that are <= d*rerr.

323 # we need to add a 1 to account for the 0 index.

324 # For comparing this with old behavior, see

325 # tst_binary_srch_for_binom_tst method in test_morestats.

326 y = ix + 1

327 pval = binom.cdf(y-1, n, p) + binom.sf(k-1, n, p)

328

329 pval = min(1.0, pval)

330

331 result = BinomTestResult(k=k, n=n, alternative=alternative,

332 statistic=k/n, pvalue=pval)

333 return result

334

335

336def _binary_search_for_binom_tst(a, d, lo, hi):

337 """

338 Conducts an implicit binary search on a function specified by `a`.

339

340 Meant to be used on the binomial PMF for the case of two-sided tests

341 to obtain the value on the other side of the mode where the tail

342 probability should be computed. The values on either side of

343 the mode are always in order, meaning binary search is applicable.

344

345 Parameters

346 ----------

347 a : callable

348 The function over which to perform binary search. Its values

349 for inputs lo and hi should be in ascending order.

350 d : float

351 The value to search.

352 lo : int

353 The lower end of range to search.

354 hi : int

355 The higher end of the range to search.

356

357 Returns

358 -------

359 int

360 The index, i between lo and hi

361 such that a(i)<=d<a(i+1)

362 """

363 while lo < hi:

364 mid = lo + (hi-lo)//2

365 midval = a(mid)

366 if midval < d:

367 lo = mid+1

368 elif midval > d:

369 hi = mid-1

370 else:

371 return mid

372 if a(lo) <= d:

373 return lo

374 else:

375 return lo-1

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_binomtest.py: 13%

121 statements