Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_binomtest.py: 13%
121 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1from math import sqrt
2import numpy as np
3from scipy._lib._util import _validate_int
4from scipy.optimize import brentq
5from scipy.special import ndtri
6from ._discrete_distns import binom
7from ._common import ConfidenceInterval
10class BinomTestResult:
11 """
12 Result of `scipy.stats.binomtest`.
14 Attributes
15 ----------
16 k : int
17 The number of successes (copied from `binomtest` input).
18 n : int
19 The number of trials (copied from `binomtest` input).
20 alternative : str
21 Indicates the alternative hypothesis specified in the input
22 to `binomtest`. It will be one of ``'two-sided'``, ``'greater'``,
23 or ``'less'``.
24 statistic: float
25 The estimate of the proportion of successes.
26 pvalue : float
27 The p-value of the hypothesis test.
29 """
30 def __init__(self, k, n, alternative, statistic, pvalue):
31 self.k = k
32 self.n = n
33 self.alternative = alternative
34 self.statistic = statistic
35 self.pvalue = pvalue
37 # add alias for backward compatibility
38 self.proportion_estimate = statistic
40 def __repr__(self):
41 s = ("BinomTestResult("
42 f"k={self.k}, "
43 f"n={self.n}, "
44 f"alternative={self.alternative!r}, "
45 f"statistic={self.statistic}, "
46 f"pvalue={self.pvalue})")
47 return s
49 def proportion_ci(self, confidence_level=0.95, method='exact'):
50 """
51 Compute the confidence interval for ``statistic``.
53 Parameters
54 ----------
55 confidence_level : float, optional
56 Confidence level for the computed confidence interval
57 of the estimated proportion. Default is 0.95.
58 method : {'exact', 'wilson', 'wilsoncc'}, optional
59 Selects the method used to compute the confidence interval
60 for the estimate of the proportion:
62 'exact' :
63 Use the Clopper-Pearson exact method [1]_.
64 'wilson' :
65 Wilson's method, without continuity correction ([2]_, [3]_).
66 'wilsoncc' :
67 Wilson's method, with continuity correction ([2]_, [3]_).
69 Default is ``'exact'``.
71 Returns
72 -------
73 ci : ``ConfidenceInterval`` object
74 The object has attributes ``low`` and ``high`` that hold the
75 lower and upper bounds of the confidence interval.
77 References
78 ----------
79 .. [1] C. J. Clopper and E. S. Pearson, The use of confidence or
80 fiducial limits illustrated in the case of the binomial,
81 Biometrika, Vol. 26, No. 4, pp 404-413 (Dec. 1934).
82 .. [2] E. B. Wilson, Probable inference, the law of succession, and
83 statistical inference, J. Amer. Stat. Assoc., 22, pp 209-212
84 (1927).
85 .. [3] Robert G. Newcombe, Two-sided confidence intervals for the
86 single proportion: comparison of seven methods, Statistics
87 in Medicine, 17, pp 857-872 (1998).
89 Examples
90 --------
91 >>> from scipy.stats import binomtest
92 >>> result = binomtest(k=7, n=50, p=0.1)
93 >>> result.statistic
94 0.14
95 >>> result.proportion_ci()
96 ConfidenceInterval(low=0.05819170033997342, high=0.26739600249700846)
97 """
98 if method not in ('exact', 'wilson', 'wilsoncc'):
99 raise ValueError("method must be one of 'exact', 'wilson' or "
100 "'wilsoncc'.")
101 if not (0 <= confidence_level <= 1):
102 raise ValueError('confidence_level must be in the interval '
103 '[0, 1].')
104 if method == 'exact':
105 low, high = _binom_exact_conf_int(self.k, self.n,
106 confidence_level,
107 self.alternative)
108 else:
109 # method is 'wilson' or 'wilsoncc'
110 low, high = _binom_wilson_conf_int(self.k, self.n,
111 confidence_level,
112 self.alternative,
113 correction=method == 'wilsoncc')
114 return ConfidenceInterval(low=low, high=high)
117def _findp(func):
118 try:
119 p = brentq(func, 0, 1)
120 except RuntimeError:
121 raise RuntimeError('numerical solver failed to converge when '
122 'computing the confidence limits') from None
123 except ValueError as exc:
124 raise ValueError('brentq raised a ValueError; report this to the '
125 'SciPy developers') from exc
126 return p
129def _binom_exact_conf_int(k, n, confidence_level, alternative):
130 """
131 Compute the estimate and confidence interval for the binomial test.
133 Returns proportion, prop_low, prop_high
134 """
135 if alternative == 'two-sided':
136 alpha = (1 - confidence_level) / 2
137 if k == 0:
138 plow = 0.0
139 else:
140 plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha)
141 if k == n:
142 phigh = 1.0
143 else:
144 phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha)
145 elif alternative == 'less':
146 alpha = 1 - confidence_level
147 plow = 0.0
148 if k == n:
149 phigh = 1.0
150 else:
151 phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha)
152 elif alternative == 'greater':
153 alpha = 1 - confidence_level
154 if k == 0:
155 plow = 0.0
156 else:
157 plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha)
158 phigh = 1.0
159 return plow, phigh
162def _binom_wilson_conf_int(k, n, confidence_level, alternative, correction):
163 # This function assumes that the arguments have already been validated.
164 # In particular, `alternative` must be one of 'two-sided', 'less' or
165 # 'greater'.
166 p = k / n
167 if alternative == 'two-sided':
168 z = ndtri(0.5 + 0.5*confidence_level)
169 else:
170 z = ndtri(confidence_level)
172 # For reference, the formulas implemented here are from
173 # Newcombe (1998) (ref. [3] in the proportion_ci docstring).
174 denom = 2*(n + z**2)
175 center = (2*n*p + z**2)/denom
176 q = 1 - p
177 if correction:
178 if alternative == 'less' or k == 0:
179 lo = 0.0
180 else:
181 dlo = (1 + z*sqrt(z**2 - 2 - 1/n + 4*p*(n*q + 1))) / denom
182 lo = center - dlo
183 if alternative == 'greater' or k == n:
184 hi = 1.0
185 else:
186 dhi = (1 + z*sqrt(z**2 + 2 - 1/n + 4*p*(n*q - 1))) / denom
187 hi = center + dhi
188 else:
189 delta = z/denom * sqrt(4*n*p*q + z**2)
190 if alternative == 'less' or k == 0:
191 lo = 0.0
192 else:
193 lo = center - delta
194 if alternative == 'greater' or k == n:
195 hi = 1.0
196 else:
197 hi = center + delta
199 return lo, hi
202def binomtest(k, n, p=0.5, alternative='two-sided'):
203 """
204 Perform a test that the probability of success is p.
206 The binomial test [1]_ is a test of the null hypothesis that the
207 probability of success in a Bernoulli experiment is `p`.
209 Details of the test can be found in many texts on statistics, such
210 as section 24.5 of [2]_.
212 Parameters
213 ----------
214 k : int
215 The number of successes.
216 n : int
217 The number of trials.
218 p : float, optional
219 The hypothesized probability of success, i.e. the expected
220 proportion of successes. The value must be in the interval
221 ``0 <= p <= 1``. The default value is ``p = 0.5``.
222 alternative : {'two-sided', 'greater', 'less'}, optional
223 Indicates the alternative hypothesis. The default value is
224 'two-sided'.
226 Returns
227 -------
228 result : `~scipy.stats._result_classes.BinomTestResult` instance
229 The return value is an object with the following attributes:
231 k : int
232 The number of successes (copied from `binomtest` input).
233 n : int
234 The number of trials (copied from `binomtest` input).
235 alternative : str
236 Indicates the alternative hypothesis specified in the input
237 to `binomtest`. It will be one of ``'two-sided'``, ``'greater'``,
238 or ``'less'``.
239 statistic : float
240 The estimate of the proportion of successes.
241 pvalue : float
242 The p-value of the hypothesis test.
244 The object has the following methods:
246 proportion_ci(confidence_level=0.95, method='exact') :
247 Compute the confidence interval for ``statistic``.
249 Notes
250 -----
251 .. versionadded:: 1.7.0
253 References
254 ----------
255 .. [1] Binomial test, https://en.wikipedia.org/wiki/Binomial_test
256 .. [2] Jerrold H. Zar, Biostatistical Analysis (fifth edition),
257 Prentice Hall, Upper Saddle River, New Jersey USA (2010)
259 Examples
260 --------
261 >>> from scipy.stats import binomtest
263 A car manufacturer claims that no more than 10% of their cars are unsafe.
264 15 cars are inspected for safety, 3 were found to be unsafe. Test the
265 manufacturer's claim:
267 >>> result = binomtest(3, n=15, p=0.1, alternative='greater')
268 >>> result.pvalue
269 0.18406106910639114
271 The null hypothesis cannot be rejected at the 5% level of significance
272 because the returned p-value is greater than the critical value of 5%.
274 The test statistic is equal to the estimated proportion, which is simply
275 ``3/15``:
277 >>> result.statistic
278 0.2
280 We can use the `proportion_ci()` method of the result to compute the
281 confidence interval of the estimate:
283 >>> result.proportion_ci(confidence_level=0.95)
284 ConfidenceInterval(low=0.05684686759024681, high=1.0)
286 """
287 k = _validate_int(k, 'k', minimum=0)
288 n = _validate_int(n, 'n', minimum=1)
289 if k > n:
290 raise ValueError('k must not be greater than n.')
292 if not (0 <= p <= 1):
293 raise ValueError("p must be in range [0,1]")
295 if alternative not in ('two-sided', 'less', 'greater'):
296 raise ValueError("alternative not recognized; \n"
297 "must be 'two-sided', 'less' or 'greater'")
298 if alternative == 'less':
299 pval = binom.cdf(k, n, p)
300 elif alternative == 'greater':
301 pval = binom.sf(k-1, n, p)
302 else:
303 # alternative is 'two-sided'
304 d = binom.pmf(k, n, p)
305 rerr = 1 + 1e-7
306 if k == p * n:
307 # special case as shortcut, would also be handled by `else` below
308 pval = 1.
309 elif k < p * n:
310 ix = _binary_search_for_binom_tst(lambda x1: -binom.pmf(x1, n, p),
311 -d*rerr, np.ceil(p * n), n)
312 # y is the number of terms between mode and n that are <= d*rerr.
313 # ix gave us the first term where a(ix) <= d*rerr < a(ix-1)
314 # if the first equality doesn't hold, y=n-ix. Otherwise, we
315 # need to include ix as well as the equality holds. Note that
316 # the equality will hold in very very rare situations due to rerr.
317 y = n - ix + int(d*rerr == binom.pmf(ix, n, p))
318 pval = binom.cdf(k, n, p) + binom.sf(n - y, n, p)
319 else:
320 ix = _binary_search_for_binom_tst(lambda x1: binom.pmf(x1, n, p),
321 d*rerr, 0, np.floor(p * n))
322 # y is the number of terms between 0 and mode that are <= d*rerr.
323 # we need to add a 1 to account for the 0 index.
324 # For comparing this with old behavior, see
325 # tst_binary_srch_for_binom_tst method in test_morestats.
326 y = ix + 1
327 pval = binom.cdf(y-1, n, p) + binom.sf(k-1, n, p)
329 pval = min(1.0, pval)
331 result = BinomTestResult(k=k, n=n, alternative=alternative,
332 statistic=k/n, pvalue=pval)
333 return result
336def _binary_search_for_binom_tst(a, d, lo, hi):
337 """
338 Conducts an implicit binary search on a function specified by `a`.
340 Meant to be used on the binomial PMF for the case of two-sided tests
341 to obtain the value on the other side of the mode where the tail
342 probability should be computed. The values on either side of
343 the mode are always in order, meaning binary search is applicable.
345 Parameters
346 ----------
347 a : callable
348 The function over which to perform binary search. Its values
349 for inputs lo and hi should be in ascending order.
350 d : float
351 The value to search.
352 lo : int
353 The lower end of range to search.
354 hi : int
355 The higher end of the range to search.
357 Returns
358 -------
359 int
360 The index, i between lo and hi
361 such that a(i)<=d<a(i+1)
362 """
363 while lo < hi:
364 mid = lo + (hi-lo)//2
365 midval = a(mid)
366 if midval < d:
367 lo = mid+1
368 elif midval > d:
369 hi = mid-1
370 else:
371 return mid
372 if a(lo) <= d:
373 return lo
374 else:
375 return lo-1