Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/

1from collections import namedtuple

2from dataclasses import make_dataclass

3from math import comb

4import numpy as np

5import warnings

6from itertools import combinations

7import scipy.stats

8from scipy.optimize import shgo

9from . import distributions

10from ._common import ConfidenceInterval

11from ._continuous_distns import chi2, norm

12from scipy.special import gamma, kv, gammaln

13from scipy.fft import ifft

14from ._stats_pythran import _a_ij_Aij_Dij2

15from ._stats_pythran import (

16 _concordant_pairs as _P, _discordant_pairs as _Q

17)

18from scipy.stats import _stats_py

20__all__ = ['epps_singleton_2samp', 'cramervonmises', 'somersd',

21 'barnard_exact', 'boschloo_exact', 'cramervonmises_2samp',

22 'tukey_hsd', 'poisson_means_test']

24Epps_Singleton_2sampResult = namedtuple('Epps_Singleton_2sampResult',

25 ('statistic', 'pvalue'))

28def epps_singleton_2samp(x, y, t=(0.4, 0.8)):

29 """Compute the Epps-Singleton (ES) test statistic.

31 Test the null hypothesis that two samples have the same underlying

32 probability distribution.

34 Parameters

35 ----------

36 x, y : array-like

37 The two samples of observations to be tested. Input must not have more

38 than one dimension. Samples can have different lengths.

39 t : array-like, optional

40 The points (t1, ..., tn) where the empirical characteristic function is

41 to be evaluated. It should be positive distinct numbers. The default

42 value (0.4, 0.8) is proposed in [1]_. Input must not have more than

43 one dimension.

45 Returns

46 -------

47 statistic : float

48 The test statistic.

49 pvalue : float

50 The associated p-value based on the asymptotic chi2-distribution.

52 See Also

53 --------

54 ks_2samp, anderson_ksamp

56 Notes

57 -----

58 Testing whether two samples are generated by the same underlying

59 distribution is a classical question in statistics. A widely used test is

60 the Kolmogorov-Smirnov (KS) test which relies on the empirical

61 distribution function. Epps and Singleton introduce a test based on the

62 empirical characteristic function in [1]_.

64 One advantage of the ES test compared to the KS test is that is does

65 not assume a continuous distribution. In [1]_, the authors conclude

66 that the test also has a higher power than the KS test in many

67 examples. They recommend the use of the ES test for discrete samples as

68 well as continuous samples with at least 25 observations each, whereas

69 `anderson_ksamp` is recommended for smaller sample sizes in the

70 continuous case.

72 The p-value is computed from the asymptotic distribution of the test

73 statistic which follows a `chi2` distribution. If the sample size of both

74 `x` and `y` is below 25, the small sample correction proposed in [1]_ is

75 applied to the test statistic.

77 The default values of `t` are determined in [1]_ by considering

78 various distributions and finding good values that lead to a high power

79 of the test in general. Table III in [1]_ gives the optimal values for

80 the distributions tested in that study. The values of `t` are scaled by

81 the semi-interquartile range in the implementation, see [1]_.

83 References

84 ----------

85 .. [1] T. W. Epps and K. J. Singleton, "An omnibus test for the two-sample

86 problem using the empirical characteristic function", Journal of

87 Statistical Computation and Simulation 26, p. 177--203, 1986.

89 .. [2] S. J. Goerg and J. Kaiser, "Nonparametric testing of distributions

90 - the Epps-Singleton two-sample test using the empirical characteristic

91 function", The Stata Journal 9(3), p. 454--465, 2009.

93 """

94 x, y, t = np.asarray(x), np.asarray(y), np.asarray(t)

95 # check if x and y are valid inputs

96 if x.ndim > 1:

97 raise ValueError('x must be 1d, but x.ndim equals {}.'.format(x.ndim))

98 if y.ndim > 1:

99 raise ValueError('y must be 1d, but y.ndim equals {}.'.format(y.ndim))

100 nx, ny = len(x), len(y)

101 if (nx < 5) or (ny < 5):

102 raise ValueError('x and y should have at least 5 elements, but len(x) '

103 '= {} and len(y) = {}.'.format(nx, ny))

104 if not np.isfinite(x).all():

105 raise ValueError('x must not contain nonfinite values.')

106 if not np.isfinite(y).all():

107 raise ValueError('y must not contain nonfinite values.')

108 n = nx + ny

109

110 # check if t is valid

111 if t.ndim > 1:

112 raise ValueError('t must be 1d, but t.ndim equals {}.'.format(t.ndim))

113 if np.less_equal(t, 0).any():

114 raise ValueError('t must contain positive elements only.')

115

116 # rescale t with semi-iqr as proposed in [1]; import iqr here to avoid

117 # circular import

118 from scipy.stats import iqr

119 sigma = iqr(np.hstack((x, y))) / 2

120 ts = np.reshape(t, (-1, 1)) / sigma

121

122 # covariance estimation of ES test

123 gx = np.vstack((np.cos(ts*x), np.sin(ts*x))).T # shape = (nx, 2*len(t))

124 gy = np.vstack((np.cos(ts*y), np.sin(ts*y))).T

125 cov_x = np.cov(gx.T, bias=True) # the test uses biased cov-estimate

126 cov_y = np.cov(gy.T, bias=True)

127 est_cov = (n/nx)*cov_x + (n/ny)*cov_y

128 est_cov_inv = np.linalg.pinv(est_cov)

129 r = np.linalg.matrix_rank(est_cov_inv)

130 if r < 2*len(t):

131 warnings.warn('Estimated covariance matrix does not have full rank. '

132 'This indicates a bad choice of the input t and the '

133 'test might not be consistent.') # see p. 183 in [1]_

134

135 # compute test statistic w distributed asympt. as chisquare with df=r

136 g_diff = np.mean(gx, axis=0) - np.mean(gy, axis=0)

137 w = n*np.dot(g_diff.T, np.dot(est_cov_inv, g_diff))

138

139 # apply small-sample correction

140 if (max(nx, ny) < 25):

141 corr = 1.0/(1.0 + n**(-0.45) + 10.1*(nx**(-1.7) + ny**(-1.7)))

142 w = corr * w

143

144 p = chi2.sf(w, r)

145

146 return Epps_Singleton_2sampResult(w, p)

147

148

149def poisson_means_test(k1, n1, k2, n2, *, diff=0, alternative='two-sided'):

150 r"""

151 Performs the Poisson means test, AKA the "E-test".

152

153 This is a test of the null hypothesis that the difference between means of

154 two Poisson distributions is `diff`. The samples are provided as the

155 number of events `k1` and `k2` observed within measurement intervals

156 (e.g. of time, space, number of observations) of sizes `n1` and `n2`.

157

158 Parameters

159 ----------

160 k1 : int

161 Number of events observed from distribution 1.

162 n1: float

163 Size of sample from distribution 1.

164 k2 : int

165 Number of events observed from distribution 2.

166 n2 : float

167 Size of sample from distribution 2.

168 diff : float, default=0

169 The hypothesized difference in means between the distributions

170 underlying the samples.

171 alternative : {'two-sided', 'less', 'greater'}, optional

172 Defines the alternative hypothesis.

173 The following options are available (default is 'two-sided'):

174

175 * 'two-sided': the difference between distribution means is not

176 equal to `diff`

177 * 'less': the difference between distribution means is less than

178 `diff`

179 * 'greater': the difference between distribution means is greater

180 than `diff`

181

182 Returns

183 -------

184 statistic : float

185 The test statistic (see [1]_ equation 3.3).

186 pvalue : float

187 The probability of achieving such an extreme value of the test

188 statistic under the null hypothesis.

189

190 Notes

191 -----

192

193 Let:

194

195 .. math:: X_1 \sim \mbox{Poisson}(\mathtt{n1}\lambda_1)

196

197 be a random variable independent of

198

199 .. math:: X_2 \sim \mbox{Poisson}(\mathtt{n2}\lambda_2)

200

201 and let ``k1`` and ``k2`` be the observed values of :math:`X_1`

202 and :math:`X_2`, respectively. Then `poisson_means_test` uses the number

203 of observed events ``k1`` and ``k2`` from samples of size ``n1`` and

204 ``n2``, respectively, to test the null hypothesis that

205

206 .. math::

207 H_0: \lambda_1 - \lambda_2 = \mathtt{diff}

208

209 A benefit of the E-test is that it has good power for small sample sizes,

210 which can reduce sampling costs [1]_. It has been evaluated and determined

211 to be more powerful than the comparable C-test, sometimes referred to as

212 the Poisson exact test.

213

214 References

215 ----------

216 .. [1] Krishnamoorthy, K., & Thomson, J. (2004). A more powerful test for

217 comparing two Poisson means. Journal of Statistical Planning and

218 Inference, 119(1), 23-35.

219

220 .. [2] Przyborowski, J., & Wilenski, H. (1940). Homogeneity of results in

221 testing samples from Poisson series: With an application to testing

222 clover seed for dodder. Biometrika, 31(3/4), 313-323.

223

224 Examples

225 --------

226

227 Suppose that a gardener wishes to test the number of dodder (weed) seeds

228 in a sack of clover seeds that they buy from a seed company. It has

229 previously been established that the number of dodder seeds in clover

230 follows the Poisson distribution.

231

232 A 100 gram sample is drawn from the sack before being shipped to the

233 gardener. The sample is analyzed, and it is found to contain no dodder

234 seeds; that is, `k1` is 0. However, upon arrival, the gardener draws

235 another 100 gram sample from the sack. This time, three dodder seeds are

236 found in the sample; that is, `k2` is 3. The gardener would like to

237 know if the difference is significant and not due to chance. The

238 null hypothesis is that the difference between the two samples is merely

239 due to chance, or that :math:`\lambda_1 - \lambda_2 = \mathtt{diff}`

240 where :math:`\mathtt{diff} = 0`. The alternative hypothesis is that the

241 difference is not due to chance, or :math:`\lambda_1 - \lambda_2 \ne 0`.

242 The gardener selects a significance level of 5% to reject the null

243 hypothesis in favor of the alternative [2]_.

244

245 >>> import scipy.stats as stats

246 >>> res = stats.poisson_means_test(0, 100, 3, 100)

247 >>> res.statistic, res.pvalue

248 (-1.7320508075688772, 0.08837900929018157)

249

250 The p-value is .088, indicating a near 9% chance of observing a value of

251 the test statistic under the null hypothesis. This exceeds 5%, so the

252 gardener does not reject the null hypothesis as the difference cannot be

253 regarded as significant at this level.

254 """

255

256 _poisson_means_test_iv(k1, n1, k2, n2, diff, alternative)

257

258 # "for a given k_1 and k_2, an estimate of \lambda_2 is given by" [1] (3.4)

259 lmbd_hat2 = ((k1 + k2) / (n1 + n2) - diff * n1 / (n1 + n2))

260

261 # "\hat{\lambda_{2k}} may be less than or equal to zero ... and in this

262 # case the null hypothesis cannot be rejected ... [and] it is not necessary

263 # to compute the p-value". [1] page 26 below eq. (3.6).

264 if lmbd_hat2 <= 0:

265 return _stats_py.SignificanceResult(0, 1)

266

267 # The unbiased variance estimate [1] (3.2)

268 var = k1 / (n1 ** 2) + k2 / (n2 ** 2)

269

270 # The _observed_ pivot statistic from the input. It follows the

271 # unnumbered equation following equation (3.3) This is used later in

272 # comparison with the computed pivot statistics in an indicator function.

273 t_k1k2 = (k1 / n1 - k2 / n2 - diff) / np.sqrt(var)

274

275 # Equation (3.5) of [1] is lengthy, so it is broken into several parts,

276 # beginning here. Note that the probability mass function of poisson is

277 # exp^(-\mu)*\mu^k/k!, so and this is called with shape \mu, here noted

278 # here as nlmbd_hat*. The strategy for evaluating the double summation in

279 # (3.5) is to create two arrays of the values of the two products inside

280 # the summation and then broadcast them together into a matrix, and then

281 # sum across the entire matrix.

282

283 # Compute constants (as seen in the first and second separated products in

284 # (3.5).). (This is the shape (\mu) parameter of the poisson distribution.)

285 nlmbd_hat1 = n1 * (lmbd_hat2 + diff)

286 nlmbd_hat2 = n2 * lmbd_hat2

287

288 # Determine summation bounds for tail ends of distribution rather than

289 # summing to infinity. `x1*` is for the outer sum and `x2*` is the inner

290 # sum.

291 x1_lb, x1_ub = distributions.poisson.ppf([1e-10, 1 - 1e-16], nlmbd_hat1)

292 x2_lb, x2_ub = distributions.poisson.ppf([1e-10, 1 - 1e-16], nlmbd_hat2)

293

294 # Construct arrays to function as the x_1 and x_2 counters on the summation

295 # in (3.5). `x1` is in columns and `x2` is in rows to allow for

296 # broadcasting.

297 x1 = np.arange(x1_lb, x1_ub + 1)

298 x2 = np.arange(x2_lb, x2_ub + 1)[:, None]

299

300 # These are the two products in equation (3.5) with `prob_x1` being the

301 # first (left side) and `prob_x2` being the second (right side). (To

302 # make as clear as possible: the 1st contains a "+ d" term, the 2nd does

303 # not.)

304 prob_x1 = distributions.poisson.pmf(x1, nlmbd_hat1)

305 prob_x2 = distributions.poisson.pmf(x2, nlmbd_hat2)

306

307 # compute constants for use in the "pivot statistic" per the

308 # unnumbered equation following (3.3).

309 lmbd_x1 = x1 / n1

310 lmbd_x2 = x2 / n2

311 lmbds_diff = lmbd_x1 - lmbd_x2 - diff

312 var_x1x2 = lmbd_x1 / n1 + lmbd_x2 / n2

313

314 # This is the 'pivot statistic' for use in the indicator of the summation

315 # (left side of "I[.]").

316 with np.errstate(invalid='ignore', divide='ignore'):

317 t_x1x2 = lmbds_diff / np.sqrt(var_x1x2)

318

319 # `[indicator]` implements the "I[.] ... the indicator function" per

320 # the paragraph following equation (3.5).

321 if alternative == 'two-sided':

322 indicator = np.abs(t_x1x2) >= np.abs(t_k1k2)

323 elif alternative == 'less':

324 indicator = t_x1x2 <= t_k1k2

325 else:

326 indicator = t_x1x2 >= t_k1k2

327

328 # Multiply all combinations of the products together, exclude terms

329 # based on the `indicator` and then sum. (3.5)

330 pvalue = np.sum((prob_x1 * prob_x2)[indicator])

331 return _stats_py.SignificanceResult(t_k1k2, pvalue)

332

333

334def _poisson_means_test_iv(k1, n1, k2, n2, diff, alternative):

335 # """check for valid types and values of input to `poisson_mean_test`."""

336 if k1 != int(k1) or k2 != int(k2):

337 raise TypeError('`k1` and `k2` must be integers.')

338

339 count_err = '`k1` and `k2` must be greater than or equal to 0.'

340 if k1 < 0 or k2 < 0:

341 raise ValueError(count_err)

342

343 if n1 <= 0 or n2 <= 0:

344 raise ValueError('`n1` and `n2` must be greater than 0.')

345

346 if diff < 0:

347 raise ValueError('diff must be greater than or equal to 0.')

348

349 alternatives = {'two-sided', 'less', 'greater'}

350 if alternative.lower() not in alternatives:

351 raise ValueError(f"Alternative must be one of '{alternatives}'.")

352

353

354class CramerVonMisesResult:

355 def __init__(self, statistic, pvalue):

356 self.statistic = statistic

357 self.pvalue = pvalue

358

359 def __repr__(self):

360 return (f"{self.__class__.__name__}(statistic={self.statistic}, "

361 f"pvalue={self.pvalue})")

362

363

364def _psi1_mod(x):

365 """

366 psi1 is defined in equation 1.10 in Csörgő, S. and Faraway, J. (1996).

367 This implements a modified version by excluding the term V(x) / 12

368 (here: _cdf_cvm_inf(x) / 12) to avoid evaluating _cdf_cvm_inf(x)

369 twice in _cdf_cvm.

370

371 Implementation based on MAPLE code of Julian Faraway and R code of the

372 function pCvM in the package goftest (v1.1.1), permission granted

373 by Adrian Baddeley. Main difference in the implementation: the code

374 here keeps adding terms of the series until the terms are small enough.

375 """

376

377 def _ed2(y):

378 z = y**2 / 4

379 b = kv(1/4, z) + kv(3/4, z)

380 return np.exp(-z) * (y/2)**(3/2) * b / np.sqrt(np.pi)

381

382 def _ed3(y):

383 z = y**2 / 4

384 c = np.exp(-z) / np.sqrt(np.pi)

385 return c * (y/2)**(5/2) * (2*kv(1/4, z) + 3*kv(3/4, z) - kv(5/4, z))

386

387 def _Ak(k, x):

388 m = 2*k + 1

389 sx = 2 * np.sqrt(x)

390 y1 = x**(3/4)

391 y2 = x**(5/4)

392

393 e1 = m * gamma(k + 1/2) * _ed2((4 * k + 3)/sx) / (9 * y1)

394 e2 = gamma(k + 1/2) * _ed3((4 * k + 1) / sx) / (72 * y2)

395 e3 = 2 * (m + 2) * gamma(k + 3/2) * _ed3((4 * k + 5) / sx) / (12 * y2)

396 e4 = 7 * m * gamma(k + 1/2) * _ed2((4 * k + 1) / sx) / (144 * y1)

397 e5 = 7 * m * gamma(k + 1/2) * _ed2((4 * k + 5) / sx) / (144 * y1)

398

399 return e1 + e2 + e3 + e4 + e5

400

401 x = np.asarray(x)

402 tot = np.zeros_like(x, dtype='float')

403 cond = np.ones_like(x, dtype='bool')

404 k = 0

405 while np.any(cond):

406 z = -_Ak(k, x[cond]) / (np.pi * gamma(k + 1))

407 tot[cond] = tot[cond] + z

408 cond[cond] = np.abs(z) >= 1e-7

409 k += 1

410

411 return tot

412

413

414def _cdf_cvm_inf(x):

415 """

416 Calculate the cdf of the Cramér-von Mises statistic (infinite sample size).

417

418 See equation 1.2 in Csörgő, S. and Faraway, J. (1996).

419

420 Implementation based on MAPLE code of Julian Faraway and R code of the

421 function pCvM in the package goftest (v1.1.1), permission granted

422 by Adrian Baddeley. Main difference in the implementation: the code

423 here keeps adding terms of the series until the terms are small enough.

424

425 The function is not expected to be accurate for large values of x, say

426 x > 4, when the cdf is very close to 1.

427 """

428 x = np.asarray(x)

429

430 def term(x, k):

431 # this expression can be found in [2], second line of (1.3)

432 u = np.exp(gammaln(k + 0.5) - gammaln(k+1)) / (np.pi**1.5 * np.sqrt(x))

433 y = 4*k + 1

434 q = y**2 / (16*x)

435 b = kv(0.25, q)

436 return u * np.sqrt(y) * np.exp(-q) * b

437

438 tot = np.zeros_like(x, dtype='float')

439 cond = np.ones_like(x, dtype='bool')

440 k = 0

441 while np.any(cond):

442 z = term(x[cond], k)

443 tot[cond] = tot[cond] + z

444 cond[cond] = np.abs(z) >= 1e-7

445 k += 1

446

447 return tot

448

449

450def _cdf_cvm(x, n=None):

451 """

452 Calculate the cdf of the Cramér-von Mises statistic for a finite sample

453 size n. If N is None, use the asymptotic cdf (n=inf).

454

455 See equation 1.8 in Csörgő, S. and Faraway, J. (1996) for finite samples,

456 1.2 for the asymptotic cdf.

457

458 The function is not expected to be accurate for large values of x, say

459 x > 2, when the cdf is very close to 1 and it might return values > 1

460 in that case, e.g. _cdf_cvm(2.0, 12) = 1.0000027556716846. Moreover, it

461 is not accurate for small values of n, especially close to the bounds of

462 the distribution's domain, [1/(12*n), n/3], where the value jumps to 0

463 and 1, respectively. These are limitations of the approximation by Csörgő

464 and Faraway (1996) implemented in this function.

465 """

466 x = np.asarray(x)

467 if n is None:

468 y = _cdf_cvm_inf(x)

469 else:

470 # support of the test statistic is [12/n, n/3], see 1.1 in [2]

471 y = np.zeros_like(x, dtype='float')

472 sup = (1./(12*n) < x) & (x < n/3.)

473 # note: _psi1_mod does not include the term _cdf_cvm_inf(x) / 12

474 # therefore, we need to add it here

475 y[sup] = _cdf_cvm_inf(x[sup]) * (1 + 1./(12*n)) + _psi1_mod(x[sup]) / n

476 y[x >= n/3] = 1

477

478 if y.ndim == 0:

479 return y[()]

480 return y

481

482

483def cramervonmises(rvs, cdf, args=()):

484 """Perform the one-sample Cramér-von Mises test for goodness of fit.

485

486 This performs a test of the goodness of fit of a cumulative distribution

487 function (cdf) :math:`F` compared to the empirical distribution function

488 :math:`F_n` of observed random variates :math:`X_1, ..., X_n` that are

489 assumed to be independent and identically distributed ([1]_).

490 The null hypothesis is that the :math:`X_i` have cumulative distribution

491 :math:`F`.

492

493 Parameters

494 ----------

495 rvs : array_like

496 A 1-D array of observed values of the random variables :math:`X_i`.

497 cdf : str or callable

498 The cumulative distribution function :math:`F` to test the

499 observations against. If a string, it should be the name of a

500 distribution in `scipy.stats`. If a callable, that callable is used

501 to calculate the cdf: ``cdf(x, *args) -> float``.

502 args : tuple, optional

503 Distribution parameters. These are assumed to be known; see Notes.

504

505 Returns

506 -------

507 res : object with attributes

508 statistic : float

509 Cramér-von Mises statistic.

510 pvalue : float

511 The p-value.

512

513 See Also

514 --------

515 kstest, cramervonmises_2samp

516

517 Notes

518 -----

519 .. versionadded:: 1.6.0

520

521 The p-value relies on the approximation given by equation 1.8 in [2]_.

522 It is important to keep in mind that the p-value is only accurate if

523 one tests a simple hypothesis, i.e. the parameters of the reference

524 distribution are known. If the parameters are estimated from the data

525 (composite hypothesis), the computed p-value is not reliable.

526

527 References

528 ----------

529 .. [1] Cramér-von Mises criterion, Wikipedia,

530 https://en.wikipedia.org/wiki/Cram%C3%A9r%E2%80%93von_Mises_criterion

531 .. [2] Csörgő, S. and Faraway, J. (1996). The Exact and Asymptotic

532 Distribution of Cramér-von Mises Statistics. Journal of the

533 Royal Statistical Society, pp. 221-234.

534

535 Examples

536 --------

537

538 Suppose we wish to test whether data generated by ``scipy.stats.norm.rvs``

539 were, in fact, drawn from the standard normal distribution. We choose a

540 significance level of alpha=0.05.

541

542 >>> import numpy as np

543 >>> from scipy import stats

544 >>> rng = np.random.default_rng()

545 >>> x = stats.norm.rvs(size=500, random_state=rng)

546 >>> res = stats.cramervonmises(x, 'norm')

547 >>> res.statistic, res.pvalue

548 (0.49121480855028343, 0.04189256516661377)

549

550 The p-value 0.79 exceeds our chosen significance level, so we do not

551 reject the null hypothesis that the observed sample is drawn from the

552 standard normal distribution.

553

554 Now suppose we wish to check whether the same samples shifted by 2.1 is

555 consistent with being drawn from a normal distribution with a mean of 2.

556

557 >>> y = x + 2.1

558 >>> res = stats.cramervonmises(y, 'norm', args=(2,))

559 >>> res.statistic, res.pvalue

560 (0.07400330012187435, 0.7274595666160468)

561

562 Here we have used the `args` keyword to specify the mean (``loc``)

563 of the normal distribution to test the data against. This is equivalent

564 to the following, in which we create a frozen normal distribution with

565 mean 2.1, then pass its ``cdf`` method as an argument.

566

567 >>> frozen_dist = stats.norm(loc=2)

568 >>> res = stats.cramervonmises(y, frozen_dist.cdf)

569 >>> res.statistic, res.pvalue

570 (0.07400330012187435, 0.7274595666160468)

571

572 In either case, we would reject the null hypothesis that the observed

573 sample is drawn from a normal distribution with a mean of 2 (and default

574 variance of 1) because the p-value 0.04 is less than our chosen

575 significance level.

576

577 """

578 if isinstance(cdf, str):

579 cdf = getattr(distributions, cdf).cdf

580

581 vals = np.sort(np.asarray(rvs))

582

583 if vals.size <= 1:

584 raise ValueError('The sample must contain at least two observations.')

585 if vals.ndim > 1:

586 raise ValueError('The sample must be one-dimensional.')

587

588 n = len(vals)

589 cdfvals = cdf(vals, *args)

590

591 u = (2*np.arange(1, n+1) - 1)/(2*n)

592 w = 1/(12*n) + np.sum((u - cdfvals)**2)

593

594 # avoid small negative values that can occur due to the approximation

595 p = max(0, 1. - _cdf_cvm(w, n))

596

597 return CramerVonMisesResult(statistic=w, pvalue=p)

598

599

600def _get_wilcoxon_distr(n):

601 """

602 Distribution of probability of the Wilcoxon ranksum statistic r_plus (sum

603 of ranks of positive differences).

604 Returns an array with the probabilities of all the possible ranks

605 r = 0, ..., n*(n+1)/2

606 """

607 c = np.ones(1, dtype=np.double)

608 for k in range(1, n + 1):

609 prev_c = c

610 c = np.zeros(k * (k + 1) // 2 + 1, dtype=np.double)

611 m = len(prev_c)

612 c[:m] = prev_c * 0.5

613 c[-m:] += prev_c * 0.5

614 return c

615

616

617def _get_wilcoxon_distr2(n):

618 """

619 Distribution of probability of the Wilcoxon ranksum statistic r_plus (sum

620 of ranks of positive differences).

621 Returns an array with the probabilities of all the possible ranks

622 r = 0, ..., n*(n+1)/2

623 This is a slower reference function

624 References

625 ----------

626 .. [1] 1. Harris T, Hardin JW. Exact Wilcoxon Signed-Rank and Wilcoxon

627 Mann-Whitney Ranksum Tests. The Stata Journal. 2013;13(2):337-343.

628 """

629 ai = np.arange(1, n+1)[:, None]

630 t = n*(n+1)/2

631 q = 2*t

632 j = np.arange(q)

633 theta = 2*np.pi/q*j

634 phi_sp = np.prod(np.cos(theta*ai), axis=0)

635 phi_s = np.exp(1j*theta*t) * phi_sp

636 p = np.real(ifft(phi_s))

637 res = np.zeros(int(t)+1)

638 res[:-1:] = p[::2]

639 res[0] /= 2

640 res[-1] = res[0]

641 return res

642

643

644def _tau_b(A):

645 """Calculate Kendall's tau-b and p-value from contingency table."""

646 # See [2] 2.2 and 4.2

647

648 # contingency table must be truly 2D

649 if A.shape[0] == 1 or A.shape[1] == 1:

650 return np.nan, np.nan

651

652 NA = A.sum()

653 PA = _P(A)

654 QA = _Q(A)

655 Sri2 = (A.sum(axis=1)**2).sum()

656 Scj2 = (A.sum(axis=0)**2).sum()

657 denominator = (NA**2 - Sri2)*(NA**2 - Scj2)

658

659 tau = (PA-QA)/(denominator)**0.5

660

661 numerator = 4*(_a_ij_Aij_Dij2(A) - (PA - QA)**2 / NA)

662 s02_tau_b = numerator/denominator

663 if s02_tau_b == 0: # Avoid divide by zero

664 return tau, 0

665 Z = tau/s02_tau_b**0.5

666 p = 2*norm.sf(abs(Z)) # 2-sided p-value

667

668 return tau, p

669

670

671def _somers_d(A, alternative='two-sided'):

672 """Calculate Somers' D and p-value from contingency table."""

673 # See [3] page 1740

674

675 # contingency table must be truly 2D

676 if A.shape[0] <= 1 or A.shape[1] <= 1:

677 return np.nan, np.nan

678

679 NA = A.sum()

680 NA2 = NA**2

681 PA = _P(A)

682 QA = _Q(A)

683 Sri2 = (A.sum(axis=1)**2).sum()

684

685 d = (PA - QA)/(NA2 - Sri2)

686

687 S = _a_ij_Aij_Dij2(A) - (PA-QA)**2/NA

688

689 with np.errstate(divide='ignore'):

690 Z = (PA - QA)/(4*(S))**0.5

691

692 _, p = scipy.stats._stats_py._normtest_finish(Z, alternative)

693

694 return d, p

695

696

697SomersDResult = make_dataclass("SomersDResult",

698 ("statistic", "pvalue", "table"))

699

700

701def somersd(x, y=None, alternative='two-sided'):

702 r"""Calculates Somers' D, an asymmetric measure of ordinal association.

703

704 Like Kendall's :math:`\tau`, Somers' :math:`D` is a measure of the

705 correspondence between two rankings. Both statistics consider the

706 difference between the number of concordant and discordant pairs in two

707 rankings :math:`X` and :math:`Y`, and both are normalized such that values

708 close to 1 indicate strong agreement and values close to -1 indicate

709 strong disagreement. They differ in how they are normalized. To show the

710 relationship, Somers' :math:`D` can be defined in terms of Kendall's

711 :math:`\tau_a`:

712

713 .. math::

714 D(Y|X) = \frac{\tau_a(X, Y)}{\tau_a(X, X)}

715

716 Suppose the first ranking :math:`X` has :math:`r` distinct ranks and the

717 second ranking :math:`Y` has :math:`s` distinct ranks. These two lists of

718 :math:`n` rankings can also be viewed as an :math:`r \times s` contingency

719 table in which element :math:`i, j` is the number of rank pairs with rank

720 :math:`i` in ranking :math:`X` and rank :math:`j` in ranking :math:`Y`.

721 Accordingly, `somersd` also allows the input data to be supplied as a

722 single, 2D contingency table instead of as two separate, 1D rankings.

723

724 Note that the definition of Somers' :math:`D` is asymmetric: in general,

725 :math:`D(Y|X) \neq D(X|Y)`. ``somersd(x, y)`` calculates Somers'

726 :math:`D(Y|X)`: the "row" variable :math:`X` is treated as an independent

727 variable, and the "column" variable :math:`Y` is dependent. For Somers'

728 :math:`D(X|Y)`, swap the input lists or transpose the input table.

729

730 Parameters

731 ----------

732 x : array_like

733 1D array of rankings, treated as the (row) independent variable.

734 Alternatively, a 2D contingency table.

735 y : array_like, optional

736 If `x` is a 1D array of rankings, `y` is a 1D array of rankings of the

737 same length, treated as the (column) dependent variable.

738 If `x` is 2D, `y` is ignored.

739 alternative : {'two-sided', 'less', 'greater'}, optional

740 Defines the alternative hypothesis. Default is 'two-sided'.

741 The following options are available:

742 * 'two-sided': the rank correlation is nonzero

743 * 'less': the rank correlation is negative (less than zero)

744 * 'greater': the rank correlation is positive (greater than zero)

745

746 Returns

747 -------

748 res : SomersDResult

749 A `SomersDResult` object with the following fields:

750

751 statistic : float

752 The Somers' :math:`D` statistic.

753 pvalue : float

754 The p-value for a hypothesis test whose null

755 hypothesis is an absence of association, :math:`D=0`.

756 See notes for more information.

757 table : 2D array

758 The contingency table formed from rankings `x` and `y` (or the

759 provided contingency table, if `x` is a 2D array)

760

761 See Also

762 --------

763 kendalltau : Calculates Kendall's tau, another correlation measure.

764 weightedtau : Computes a weighted version of Kendall's tau.

765 spearmanr : Calculates a Spearman rank-order correlation coefficient.

766 pearsonr : Calculates a Pearson correlation coefficient.

767

768 Notes

769 -----

770 This function follows the contingency table approach of [2]_ and

771 [3]_. *p*-values are computed based on an asymptotic approximation of

772 the test statistic distribution under the null hypothesis :math:`D=0`.

773

774 Theoretically, hypothesis tests based on Kendall's :math:`tau` and Somers'

775 :math:`D` should be identical.

776 However, the *p*-values returned by `kendalltau` are based

777 on the null hypothesis of *independence* between :math:`X` and :math:`Y`

778 (i.e. the population from which pairs in :math:`X` and :math:`Y` are

779 sampled contains equal numbers of all possible pairs), which is more

780 specific than the null hypothesis :math:`D=0` used here. If the null

781 hypothesis of independence is desired, it is acceptable to use the

782 *p*-value returned by `kendalltau` with the statistic returned by

783 `somersd` and vice versa. For more information, see [2]_.

784

785 Contingency tables are formatted according to the convention used by

786 SAS and R: the first ranking supplied (``x``) is the "row" variable, and

787 the second ranking supplied (``y``) is the "column" variable. This is

788 opposite the convention of Somers' original paper [1]_.

789

790 References

791 ----------

792 .. [1] Robert H. Somers, "A New Asymmetric Measure of Association for

793 Ordinal Variables", *American Sociological Review*, Vol. 27, No. 6,

794 pp. 799--811, 1962.

795

796 .. [2] Morton B. Brown and Jacqueline K. Benedetti, "Sampling Behavior of

797 Tests for Correlation in Two-Way Contingency Tables", *Journal of

798 the American Statistical Association* Vol. 72, No. 358, pp.

799 309--315, 1977.

800

801 .. [3] SAS Institute, Inc., "The FREQ Procedure (Book Excerpt)",

802 *SAS/STAT 9.2 User's Guide, Second Edition*, SAS Publishing, 2009.

803

804 .. [4] Laerd Statistics, "Somers' d using SPSS Statistics", *SPSS

805 Statistics Tutorials and Statistical Guides*,

806 https://statistics.laerd.com/spss-tutorials/somers-d-using-spss-statistics.php,

807 Accessed July 31, 2020.

808

809 Examples

810 --------

811 We calculate Somers' D for the example given in [4]_, in which a hotel

812 chain owner seeks to determine the association between hotel room

813 cleanliness and customer satisfaction. The independent variable, hotel

814 room cleanliness, is ranked on an ordinal scale: "below average (1)",

815 "average (2)", or "above average (3)". The dependent variable, customer

816 satisfaction, is ranked on a second scale: "very dissatisfied (1)",

817 "moderately dissatisfied (2)", "neither dissatisfied nor satisfied (3)",

818 "moderately satisfied (4)", or "very satisfied (5)". 189 customers

819 respond to the survey, and the results are cast into a contingency table

820 with the hotel room cleanliness as the "row" variable and customer

821 satisfaction as the "column" variable.

822

823 +-----+-----+-----+-----+-----+-----+

824 | | (1) | (2) | (3) | (4) | (5) |

825 +=====+=====+=====+=====+=====+=====+

826 | (1) | 27 | 25 | 14 | 7 | 0 |

827 +-----+-----+-----+-----+-----+-----+

828 | (2) | 7 | 14 | 18 | 35 | 12 |

829 +-----+-----+-----+-----+-----+-----+

830 | (3) | 1 | 3 | 2 | 7 | 17 |

831 +-----+-----+-----+-----+-----+-----+

832

833 For example, 27 customers assigned their room a cleanliness ranking of

834 "below average (1)" and a corresponding satisfaction of "very

835 dissatisfied (1)". We perform the analysis as follows.

836

837 >>> from scipy.stats import somersd

838 >>> table = [[27, 25, 14, 7, 0], [7, 14, 18, 35, 12], [1, 3, 2, 7, 17]]

839 >>> res = somersd(table)

840 >>> res.statistic

841 0.6032766111513396

842 >>> res.pvalue

843 1.0007091191074533e-27

844

845 The value of the Somers' D statistic is approximately 0.6, indicating

846 a positive correlation between room cleanliness and customer satisfaction

847 in the sample.

848 The *p*-value is very small, indicating a very small probability of

849 observing such an extreme value of the statistic under the null

850 hypothesis that the statistic of the entire population (from which

851 our sample of 189 customers is drawn) is zero. This supports the

852 alternative hypothesis that the true value of Somers' D for the population

853 is nonzero.

854

855 """

856 x, y = np.array(x), np.array(y)

857 if x.ndim == 1:

858 if x.size != y.size:

859 raise ValueError("Rankings must be of equal length.")

860 table = scipy.stats.contingency.crosstab(x, y)[1]

861 elif x.ndim == 2:

862 if np.any(x < 0):

863 raise ValueError("All elements of the contingency table must be "

864 "non-negative.")

865 if np.any(x != x.astype(int)):

866 raise ValueError("All elements of the contingency table must be "

867 "integer.")

868 if x.nonzero()[0].size < 2:

869 raise ValueError("At least two elements of the contingency table "

870 "must be nonzero.")

871 table = x

872 else:

873 raise ValueError("x must be either a 1D or 2D array")

874 d, p = _somers_d(table, alternative)

875

876 # add alias for consistency with other correlation functions

877 res = SomersDResult(d, p, table)

878 res.correlation = d

879 return res

880

881

882# This could be combined with `_all_partitions` in `_resampling.py`

883def _all_partitions(nx, ny):

884 """

885 Partition a set of indices into two fixed-length sets in all possible ways

886

887 Partition a set of indices 0 ... nx + ny - 1 into two sets of length nx and

888 ny in all possible ways (ignoring order of elements).

889 """

890 z = np.arange(nx+ny)

891 for c in combinations(z, nx):

892 x = np.array(c)

893 mask = np.ones(nx+ny, bool)

894 mask[x] = False

895 y = z[mask]

896 yield x, y

897

898

899def _compute_log_combinations(n):

900 """Compute all log combination of C(n, k)."""

901 gammaln_arr = gammaln(np.arange(n + 1) + 1)

902 return gammaln(n + 1) - gammaln_arr - gammaln_arr[::-1]

903

904

905BarnardExactResult = make_dataclass(

906 "BarnardExactResult", [("statistic", float), ("pvalue", float)]

907)

908

909

910def barnard_exact(table, alternative="two-sided", pooled=True, n=32):

911 r"""Perform a Barnard exact test on a 2x2 contingency table.

912

913 Parameters

914 ----------

915 table : array_like of ints

916 A 2x2 contingency table. Elements should be non-negative integers.

917

918 alternative : {'two-sided', 'less', 'greater'}, optional

919 Defines the null and alternative hypotheses. Default is 'two-sided'.

920 Please see explanations in the Notes section below.

921

922 pooled : bool, optional

923 Whether to compute score statistic with pooled variance (as in

924 Student's t-test, for example) or unpooled variance (as in Welch's

925 t-test). Default is ``True``.

926

927 n : int, optional

928 Number of sampling points used in the construction of the sampling

929 method. Note that this argument will automatically be converted to

930 the next higher power of 2 since `scipy.stats.qmc.Sobol` is used to

931 select sample points. Default is 32. Must be positive. In most cases,

932 32 points is enough to reach good precision. More points comes at

933 performance cost.

934

935 Returns

936 -------

937 ber : BarnardExactResult

938 A result object with the following attributes.

939

940 statistic : float

941 The Wald statistic with pooled or unpooled variance, depending

942 on the user choice of `pooled`.

943

944 pvalue : float

945 P-value, the probability of obtaining a distribution at least as

946 extreme as the one that was actually observed, assuming that the

947 null hypothesis is true.

948

949 See Also

950 --------

951 chi2_contingency : Chi-square test of independence of variables in a

952 contingency table.

953 fisher_exact : Fisher exact test on a 2x2 contingency table.

954 boschloo_exact : Boschloo's exact test on a 2x2 contingency table,

955 which is an uniformly more powerful alternative to Fisher's exact test.

956

957 Notes

958 -----

959 Barnard's test is an exact test used in the analysis of contingency

960 tables. It examines the association of two categorical variables, and

961 is a more powerful alternative than Fisher's exact test

962 for 2x2 contingency tables.

963

964 Let's define :math:`X_0` a 2x2 matrix representing the observed sample,

965 where each column stores the binomial experiment, as in the example

966 below. Let's also define :math:`p_1, p_2` the theoretical binomial

967 probabilities for :math:`x_{11}` and :math:`x_{12}`. When using

968 Barnard exact test, we can assert three different null hypotheses :

969

970 - :math:`H_0 : p_1 \geq p_2` versus :math:`H_1 : p_1 < p_2`,

971 with `alternative` = "less"

972

973 - :math:`H_0 : p_1 \leq p_2` versus :math:`H_1 : p_1 > p_2`,

974 with `alternative` = "greater"

975

976 - :math:`H_0 : p_1 = p_2` versus :math:`H_1 : p_1 \neq p_2`,

977 with `alternative` = "two-sided" (default one)

978

979 In order to compute Barnard's exact test, we are using the Wald

980 statistic [3]_ with pooled or unpooled variance.

981 Under the default assumption that both variances are equal

982 (``pooled = True``), the statistic is computed as:

983

984 .. math::

985

986 T(X) = \frac{

987 \hat{p}_1 - \hat{p}_2

988 }{

989 \sqrt{

990 \hat{p}(1 - \hat{p})

991 (\frac{1}{c_1} +

992 \frac{1}{c_2})

993 }

994 }

995

996 with :math:`\hat{p}_1, \hat{p}_2` and :math:`\hat{p}` the estimator of

997 :math:`p_1, p_2` and :math:`p`, the latter being the combined probability,

998 given the assumption that :math:`p_1 = p_2`.

999

1000 If this assumption is invalid (``pooled = False``), the statistic is:

1001

1002 .. math::

1003

1004 T(X) = \frac{

1005 \hat{p}_1 - \hat{p}_2

1006 }{

1007 \sqrt{

1008 \frac{\hat{p}_1 (1 - \hat{p}_1)}{c_1} +

1009 \frac{\hat{p}_2 (1 - \hat{p}_2)}{c_2}

1010 }

1011 }

1012

1013 The p-value is then computed as:

1014

1015 .. math::

1016

1017 \sum

1018 \binom{c_1}{x_{11}}

1019 \binom{c_2}{x_{12}}

1020 \pi^{x_{11} + x_{12}}

1021 (1 - \pi)^{t - x_{11} - x_{12}}

1022

1023 where the sum is over all 2x2 contingency tables :math:`X` such that:

1024 * :math:`T(X) \leq T(X_0)` when `alternative` = "less",

1025 * :math:`T(X) \geq T(X_0)` when `alternative` = "greater", or

1026 * :math:`T(X) \geq |T(X_0)|` when `alternative` = "two-sided".

1027 Above, :math:`c_1, c_2` are the sum of the columns 1 and 2,

1028 and :math:`t` the total (sum of the 4 sample's element).

1029

1030 The returned p-value is the maximum p-value taken over the nuisance

1031 parameter :math:`\pi`, where :math:`0 \leq \pi \leq 1`.

1032

1033 This function's complexity is :math:`O(n c_1 c_2)`, where `n` is the

1034 number of sample points.

1035

1036 References

1037 ----------

1038 .. [1] Barnard, G. A. "Significance Tests for 2x2 Tables". *Biometrika*.

1039 34.1/2 (1947): 123-138. :doi:`dpgkg3`

1040

1041 .. [2] Mehta, Cyrus R., and Pralay Senchaudhuri. "Conditional versus

1042 unconditional exact tests for comparing two binomials."

1043 *Cytel Software Corporation* 675 (2003): 1-5.

1044

1045 .. [3] "Wald Test". *Wikipedia*. https://en.wikipedia.org/wiki/Wald_test

1046

1047 Examples

1048 --------

1049 An example use of Barnard's test is presented in [2]_.

1050

1051 Consider the following example of a vaccine efficacy study

1052 (Chan, 1998). In a randomized clinical trial of 30 subjects, 15 were

1053 inoculated with a recombinant DNA influenza vaccine and the 15 were

1054 inoculated with a placebo. Twelve of the 15 subjects in the placebo

1055 group (80%) eventually became infected with influenza whereas for the

1056 vaccine group, only 7 of the 15 subjects (47%) became infected. The

1057 data are tabulated as a 2 x 2 table::

1058

1059 Vaccine Placebo

1060 Yes 7 12

1061 No 8 3

1062

1063 When working with statistical hypothesis testing, we usually use a

1064 threshold probability or significance level upon which we decide

1065 to reject the null hypothesis :math:`H_0`. Suppose we choose the common

1066 significance level of 5%.

1067

1068 Our alternative hypothesis is that the vaccine will lower the chance of

1069 becoming infected with the virus; that is, the probability :math:`p_1` of

1070 catching the virus with the vaccine will be *less than* the probability

1071 :math:`p_2` of catching the virus without the vaccine. Therefore, we call

1072 `barnard_exact` with the ``alternative="less"`` option:

1073

1074 >>> import scipy.stats as stats

1075 >>> res = stats.barnard_exact([[7, 12], [8, 3]], alternative="less")

1076 >>> res.statistic

1077 -1.894...

1078 >>> res.pvalue

1079 0.03407...

1080

1081 Under the null hypothesis that the vaccine will not lower the chance of

1082 becoming infected, the probability of obtaining test results at least as

1083 extreme as the observed data is approximately 3.4%. Since this p-value is

1084 less than our chosen significance level, we have evidence to reject

1085 :math:`H_0` in favor of the alternative.

1086

1087 Suppose we had used Fisher's exact test instead:

1088

1089 >>> _, pvalue = stats.fisher_exact([[7, 12], [8, 3]], alternative="less")

1090 >>> pvalue

1091 0.0640...

1092

1093 With the same threshold significance of 5%, we would not have been able

1094 to reject the null hypothesis in favor of the alternative. As stated in

1095 [2]_, Barnard's test is uniformly more powerful than Fisher's exact test

1096 because Barnard's test does not condition on any margin. Fisher's test

1097 should only be used when both sets of marginals are fixed.

1098

1099 """

1100 if n <= 0:

1101 raise ValueError(

1102 "Number of points `n` must be strictly positive, "

1103 f"found {n!r}"

1104 )

1105

1106 table = np.asarray(table, dtype=np.int64)

1107

1108 if not table.shape == (2, 2):

1109 raise ValueError("The input `table` must be of shape (2, 2).")

1110

1111 if np.any(table < 0):

1112 raise ValueError("All values in `table` must be nonnegative.")

1113

1114 if 0 in table.sum(axis=0):

1115 # If both values in column are zero, the p-value is 1 and

1116 # the score's statistic is NaN.

1117 return BarnardExactResult(np.nan, 1.0)

1118

1119 total_col_1, total_col_2 = table.sum(axis=0)

1120

1121 x1 = np.arange(total_col_1 + 1, dtype=np.int64).reshape(-1, 1)

1122 x2 = np.arange(total_col_2 + 1, dtype=np.int64).reshape(1, -1)

1123

1124 # We need to calculate the wald statistics for each combination of x1 and

1125 # x2.

1126 p1, p2 = x1 / total_col_1, x2 / total_col_2

1127

1128 if pooled:

1129 p = (x1 + x2) / (total_col_1 + total_col_2)

1130 variances = p * (1 - p) * (1 / total_col_1 + 1 / total_col_2)

1131 else:

1132 variances = p1 * (1 - p1) / total_col_1 + p2 * (1 - p2) / total_col_2

1133

1134 # To avoid warning when dividing by 0

1135 with np.errstate(divide="ignore", invalid="ignore"):

1136 wald_statistic = np.divide((p1 - p2), np.sqrt(variances))

1137

1138 wald_statistic[p1 == p2] = 0 # Removing NaN values

1139

1140 wald_stat_obs = wald_statistic[table[0, 0], table[0, 1]]

1141

1142 if alternative == "two-sided":

1143 index_arr = np.abs(wald_statistic) >= abs(wald_stat_obs)

1144 elif alternative == "less":

1145 index_arr = wald_statistic <= wald_stat_obs

1146 elif alternative == "greater":

1147 index_arr = wald_statistic >= wald_stat_obs

1148 else:

1149 msg = (

1150 "`alternative` should be one of {'two-sided', 'less', 'greater'},"

1151 f" found {alternative!r}"

1152 )

1153 raise ValueError(msg)

1154

1155 x1_sum_x2 = x1 + x2

1156

1157 x1_log_comb = _compute_log_combinations(total_col_1)

1158 x2_log_comb = _compute_log_combinations(total_col_2)

1159 x1_sum_x2_log_comb = x1_log_comb[x1] + x2_log_comb[x2]

1160

1161 result = shgo(

1162 _get_binomial_log_p_value_with_nuisance_param,

1163 args=(x1_sum_x2, x1_sum_x2_log_comb, index_arr),

1164 bounds=((0, 1),),

1165 n=n,

1166 sampling_method="sobol",

1167 )

1168

1169 # result.fun is the negative log pvalue and therefore needs to be

1170 # changed before return

1171 p_value = np.clip(np.exp(-result.fun), a_min=0, a_max=1)

1172 return BarnardExactResult(wald_stat_obs, p_value)

1173

1174

1175BoschlooExactResult = make_dataclass(

1176 "BoschlooExactResult", [("statistic", float), ("pvalue", float)]

1177)

1178

1179

1180def boschloo_exact(table, alternative="two-sided", n=32):

1181 r"""Perform Boschloo's exact test on a 2x2 contingency table.

1182

1183 Parameters

1184 ----------

1185 table : array_like of ints

1186 A 2x2 contingency table. Elements should be non-negative integers.

1187

1188 alternative : {'two-sided', 'less', 'greater'}, optional

1189 Defines the null and alternative hypotheses. Default is 'two-sided'.

1190 Please see explanations in the Notes section below.

1191

1192 n : int, optional

1193 Number of sampling points used in the construction of the sampling

1194 method. Note that this argument will automatically be converted to

1195 the next higher power of 2 since `scipy.stats.qmc.Sobol` is used to

1196 select sample points. Default is 32. Must be positive. In most cases,

1197 32 points is enough to reach good precision. More points comes at

1198 performance cost.

1199

1200 Returns

1201 -------

1202 ber : BoschlooExactResult

1203 A result object with the following attributes.

1204

1205 statistic : float

1206 The statistic used in Boschloo's test; that is, the p-value

1207 from Fisher's exact test.

1208

1209 pvalue : float

1210 P-value, the probability of obtaining a distribution at least as

1211 extreme as the one that was actually observed, assuming that the

1212 null hypothesis is true.

1213

1214 See Also

1215 --------

1216 chi2_contingency : Chi-square test of independence of variables in a

1217 contingency table.

1218 fisher_exact : Fisher exact test on a 2x2 contingency table.

1219 barnard_exact : Barnard's exact test, which is a more powerful alternative

1220 than Fisher's exact test for 2x2 contingency tables.

1221

1222 Notes

1223 -----

1224 Boschloo's test is an exact test used in the analysis of contingency

1225 tables. It examines the association of two categorical variables, and

1226 is a uniformly more powerful alternative to Fisher's exact test

1227 for 2x2 contingency tables.

1228

1229 Boschloo's exact test uses the p-value of Fisher's exact test as a

1230 statistic, and Boschloo's p-value is the probability under the null

1231 hypothesis of observing such an extreme value of this statistic.

1232

1233 Let's define :math:`X_0` a 2x2 matrix representing the observed sample,

1234 where each column stores the binomial experiment, as in the example

1235 below. Let's also define :math:`p_1, p_2` the theoretical binomial

1236 probabilities for :math:`x_{11}` and :math:`x_{12}`. When using

1237 Boschloo exact test, we can assert three different alternative hypotheses:

1238

1239 - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 < p_2`,

1240 with `alternative` = "less"

1241

1242 - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 > p_2`,

1243 with `alternative` = "greater"

1244

1245 - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 \neq p_2`,

1246 with `alternative` = "two-sided" (default)

1247

1248 There are multiple conventions for computing a two-sided p-value when the

1249 null distribution is asymmetric. Here, we apply the convention that the

1250 p-value of a two-sided test is twice the minimum of the p-values of the

1251 one-sided tests (clipped to 1.0). Note that `fisher_exact` follows a

1252 different convention, so for a given `table`, the statistic reported by

1253 `boschloo_exact` may differ from the p-value reported by `fisher_exact`

1254 when ``alternative='two-sided'``.

1255

1256 .. versionadded:: 1.7.0

1257

1258 References

1259 ----------

1260 .. [1] R.D. Boschloo. "Raised conditional level of significance for the

1261 2 x 2-table when testing the equality of two probabilities",

1262 Statistica Neerlandica, 24(1), 1970

1263

1264 .. [2] "Boschloo's test", Wikipedia,

1265 https://en.wikipedia.org/wiki/Boschloo%27s_test

1266

1267 .. [3] Lise M. Saari et al. "Employee attitudes and job satisfaction",

1268 Human Resource Management, 43(4), 395-407, 2004,

1269 :doi:`10.1002/hrm.20032`.

1270

1271 Examples

1272 --------

1273 In the following example, we consider the article "Employee

1274 attitudes and job satisfaction" [3]_

1275 which reports the results of a survey from 63 scientists and 117 college

1276 professors. Of the 63 scientists, 31 said they were very satisfied with

1277 their jobs, whereas 74 of the college professors were very satisfied

1278 with their work. Is this significant evidence that college

1279 professors are happier with their work than scientists?

1280 The following table summarizes the data mentioned above::

1281

1282 college professors scientists

1283 Very Satisfied 74 31

1284 Dissatisfied 43 32

1285

1286 When working with statistical hypothesis testing, we usually use a

1287 threshold probability or significance level upon which we decide

1288 to reject the null hypothesis :math:`H_0`. Suppose we choose the common

1289 significance level of 5%.

1290

1291 Our alternative hypothesis is that college professors are truly more

1292 satisfied with their work than scientists. Therefore, we expect

1293 :math:`p_1` the proportion of very satisfied college professors to be

1294 greater than :math:`p_2`, the proportion of very satisfied scientists.

1295 We thus call `boschloo_exact` with the ``alternative="greater"`` option:

1296

1297 >>> import scipy.stats as stats

1298 >>> res = stats.boschloo_exact([[74, 31], [43, 32]], alternative="greater")

1299 >>> res.statistic

1300 0.0483...

1301 >>> res.pvalue

1302 0.0355...

1303

1304 Under the null hypothesis that scientists are happier in their work than

1305 college professors, the probability of obtaining test

1306 results at least as extreme as the observed data is approximately 3.55%.

1307 Since this p-value is less than our chosen significance level, we have

1308 evidence to reject :math:`H_0` in favor of the alternative hypothesis.

1309

1310 """

1311 hypergeom = distributions.hypergeom

1312

1313 if n <= 0:

1314 raise ValueError(

1315 "Number of points `n` must be strictly positive,"

1316 f" found {n!r}"

1317 )

1318

1319 table = np.asarray(table, dtype=np.int64)

1320

1321 if not table.shape == (2, 2):

1322 raise ValueError("The input `table` must be of shape (2, 2).")

1323

1324 if np.any(table < 0):

1325 raise ValueError("All values in `table` must be nonnegative.")

1326

1327 if 0 in table.sum(axis=0):

1328 # If both values in column are zero, the p-value is 1 and

1329 # the score's statistic is NaN.

1330 return BoschlooExactResult(np.nan, np.nan)

1331

1332 total_col_1, total_col_2 = table.sum(axis=0)

1333 total = total_col_1 + total_col_2

1334 x1 = np.arange(total_col_1 + 1, dtype=np.int64).reshape(1, -1)

1335 x2 = np.arange(total_col_2 + 1, dtype=np.int64).reshape(-1, 1)

1336 x1_sum_x2 = x1 + x2

1337

1338 if alternative == 'less':

1339 pvalues = hypergeom.cdf(x1, total, x1_sum_x2, total_col_1).T

1340 elif alternative == 'greater':

1341 # Same formula as the 'less' case, but with the second column.

1342 pvalues = hypergeom.cdf(x2, total, x1_sum_x2, total_col_2).T

1343 elif alternative == 'two-sided':

1344 boschloo_less = boschloo_exact(table, alternative="less", n=n)

1345 boschloo_greater = boschloo_exact(table, alternative="greater", n=n)

1346

1347 res = (

1348 boschloo_less if boschloo_less.pvalue < boschloo_greater.pvalue

1349 else boschloo_greater

1350 )

1351

1352 # Two-sided p-value is defined as twice the minimum of the one-sided

1353 # p-values

1354 pvalue = np.clip(2 * res.pvalue, a_min=0, a_max=1)

1355 return BoschlooExactResult(res.statistic, pvalue)

1356 else:

1357 msg = (

1358 f"`alternative` should be one of {'two-sided', 'less', 'greater'},"

1359 f" found {alternative!r}"

1360 )

1361 raise ValueError(msg)

1362

1363 fisher_stat = pvalues[table[0, 0], table[0, 1]]

1364

1365 # fisher_stat * (1+1e-13) guards us from small numerical error. It is

1366 # equivalent to np.isclose with relative tol of 1e-13 and absolute tol of 0

1367 # For more throughout explanations, see gh-14178

1368 index_arr = pvalues <= fisher_stat * (1+1e-13)

1369

1370 x1, x2, x1_sum_x2 = x1.T, x2.T, x1_sum_x2.T

1371 x1_log_comb = _compute_log_combinations(total_col_1)

1372 x2_log_comb = _compute_log_combinations(total_col_2)

1373 x1_sum_x2_log_comb = x1_log_comb[x1] + x2_log_comb[x2]

1374

1375 result = shgo(

1376 _get_binomial_log_p_value_with_nuisance_param,

1377 args=(x1_sum_x2, x1_sum_x2_log_comb, index_arr),

1378 bounds=((0, 1),),

1379 n=n,

1380 sampling_method="sobol",

1381 )

1382

1383 # result.fun is the negative log pvalue and therefore needs to be

1384 # changed before return

1385 p_value = np.clip(np.exp(-result.fun), a_min=0, a_max=1)

1386 return BoschlooExactResult(fisher_stat, p_value)

1387

1388

1389def _get_binomial_log_p_value_with_nuisance_param(

1390 nuisance_param, x1_sum_x2, x1_sum_x2_log_comb, index_arr

1391):

1392 r"""

1393 Compute the log pvalue in respect of a nuisance parameter considering

1394 a 2x2 sample space.

1395

1396 Parameters

1397 ----------

1398 nuisance_param : float

1399 nuisance parameter used in the computation of the maximisation of

1400 the p-value. Must be between 0 and 1

1401

1402 x1_sum_x2 : ndarray

1403 Sum of x1 and x2 inside barnard_exact

1404

1405 x1_sum_x2_log_comb : ndarray

1406 sum of the log combination of x1 and x2

1407

1408 index_arr : ndarray of boolean

1409

1410 Returns

1411 -------

1412 p_value : float

1413 Return the maximum p-value considering every nuisance paramater

1414 between 0 and 1

1415

1416 Notes

1417 -----

1418

1419 Both Barnard's test and Boschloo's test iterate over a nuisance parameter

1420 :math:`\pi \in [0, 1]` to find the maximum p-value. To search this

1421 maxima, this function return the negative log pvalue with respect to the

1422 nuisance parameter passed in params. This negative log p-value is then

1423 used in `shgo` to find the minimum negative pvalue which is our maximum

1424 pvalue.

1425

1426 Also, to compute the different combination used in the

1427 p-values' computation formula, this function uses `gammaln` which is

1428 more tolerant for large value than `scipy.special.comb`. `gammaln` gives

1429 a log combination. For the little precision loss, performances are

1430 improved a lot.

1431 """

1432 t1, t2 = x1_sum_x2.shape

1433 n = t1 + t2 - 2

1434 with np.errstate(divide="ignore", invalid="ignore"):

1435 log_nuisance = np.log(

1436 nuisance_param,

1437 out=np.zeros_like(nuisance_param),

1438 where=nuisance_param >= 0,

1439 )

1440 log_1_minus_nuisance = np.log(

1441 1 - nuisance_param,

1442 out=np.zeros_like(nuisance_param),

1443 where=1 - nuisance_param >= 0,

1444 )

1445

1446 nuisance_power_x1_x2 = log_nuisance * x1_sum_x2

1447 nuisance_power_x1_x2[(x1_sum_x2 == 0)[:, :]] = 0

1448

1449 nuisance_power_n_minus_x1_x2 = log_1_minus_nuisance * (n - x1_sum_x2)

1450 nuisance_power_n_minus_x1_x2[(x1_sum_x2 == n)[:, :]] = 0

1451

1452 tmp_log_values_arr = (

1453 x1_sum_x2_log_comb

1454 + nuisance_power_x1_x2

1455 + nuisance_power_n_minus_x1_x2

1456 )

1457

1458 tmp_values_from_index = tmp_log_values_arr[index_arr]

1459

1460 # To avoid dividing by zero in log function and getting inf value,

1461 # values are centered according to the max

1462 max_value = tmp_values_from_index.max()

1463

1464 # To have better result's precision, the log pvalue is taken here.

1465 # Indeed, pvalue is included inside [0, 1] interval. Passing the

1466 # pvalue to log makes the interval a lot bigger ([-inf, 0]), and thus

1467 # help us to achieve better precision

1468 with np.errstate(divide="ignore", invalid="ignore"):

1469 log_probs = np.exp(tmp_values_from_index - max_value).sum()

1470 log_pvalue = max_value + np.log(

1471 log_probs,

1472 out=np.full_like(log_probs, -np.inf),

1473 where=log_probs > 0,

1474 )

1475

1476 # Since shgo find the minima, minus log pvalue is returned

1477 return -log_pvalue

1478

1479

1480def _pval_cvm_2samp_exact(s, m, n):

1481 """

1482 Compute the exact p-value of the Cramer-von Mises two-sample test

1483 for a given value s of the test statistic.

1484 m and n are the sizes of the samples.

1485

1486 [1] Y. Xiao, A. Gordon, and A. Yakovlev, "A C++ Program for

1487 the Cramér-Von Mises Two-Sample Test", J. Stat. Soft.,

1488 vol. 17, no. 8, pp. 1-15, Dec. 2006.

1489 [2] T. W. Anderson "On the Distribution of the Two-Sample Cramer-von Mises

1490 Criterion," The Annals of Mathematical Statistics, Ann. Math. Statist.

1491 33(3), 1148-1159, (September, 1962)

1492 """

1493

1494 # [1, p. 3]

1495 lcm = np.lcm(m, n)

1496 # [1, p. 4], below eq. 3

1497 a = lcm // m

1498 b = lcm // n

1499 # Combine Eq. 9 in [2] with Eq. 2 in [1] and solve for $\zeta$

1500 # Hint: `s` is $U$ in [2], and $T_2$ in [1] is $T$ in [2]

1501 mn = m * n

1502 zeta = lcm ** 2 * (m + n) * (6 * s - mn * (4 * mn - 1)) // (6 * mn ** 2)

1503

1504 # bound maximum value that may appear in `gs` (remember both rows!)

1505 zeta_bound = lcm**2 * (m + n) # bound elements in row 1

1506 combinations = comb(m + n, m) # sum of row 2

1507 max_gs = max(zeta_bound, combinations)

1508 dtype = np.min_scalar_type(max_gs)

1509

1510 # the frequency table of $g_{u, v}^+$ defined in [1, p. 6]

1511 gs = ([np.array([[0], [1]], dtype=dtype)]

1512 + [np.empty((2, 0), dtype=dtype) for _ in range(m)])

1513 for u in range(n + 1):

1514 next_gs = []

1515 tmp = np.empty((2, 0), dtype=dtype)

1516 for v, g in enumerate(gs):

1517 # Calculate g recursively with eq. 11 in [1]. Even though it

1518 # doesn't look like it, this also does 12/13 (all of Algorithm 1).

1519 vi, i0, i1 = np.intersect1d(tmp[0], g[0], return_indices=True)

1520 tmp = np.concatenate([

1521 np.stack([vi, tmp[1, i0] + g[1, i1]]),

1522 np.delete(tmp, i0, 1),

1523 np.delete(g, i1, 1)

1524 ], 1)

1525 tmp[0] += (a * v - b * u) ** 2

1526 next_gs.append(tmp)

1527 gs = next_gs

1528 value, freq = gs[m]

1529 return np.float64(np.sum(freq[value >= zeta]) / combinations)

1530

1531

1532def cramervonmises_2samp(x, y, method='auto'):

1533 """Perform the two-sample Cramér-von Mises test for goodness of fit.

1534

1535 This is the two-sample version of the Cramér-von Mises test ([1]_):

1536 for two independent samples :math:`X_1, ..., X_n` and

1537 :math:`Y_1, ..., Y_m`, the null hypothesis is that the samples

1538 come from the same (unspecified) continuous distribution.

1539

1540 Parameters

1541 ----------

1542 x : array_like

1543 A 1-D array of observed values of the random variables :math:`X_i`.

1544 y : array_like

1545 A 1-D array of observed values of the random variables :math:`Y_i`.

1546 method : {'auto', 'asymptotic', 'exact'}, optional

1547 The method used to compute the p-value, see Notes for details.

1548 The default is 'auto'.

1549

1550 Returns

1551 -------

1552 res : object with attributes

1553 statistic : float

1554 Cramér-von Mises statistic.

1555 pvalue : float

1556 The p-value.

1557

1558 See Also

1559 --------

1560 cramervonmises, anderson_ksamp, epps_singleton_2samp, ks_2samp

1561

1562 Notes

1563 -----

1564 .. versionadded:: 1.7.0

1565

1566 The statistic is computed according to equation 9 in [2]_. The

1567 calculation of the p-value depends on the keyword `method`:

1568

1569 - ``asymptotic``: The p-value is approximated by using the limiting

1570 distribution of the test statistic.

1571 - ``exact``: The exact p-value is computed by enumerating all

1572 possible combinations of the test statistic, see [2]_.

1573

1574 If ``method='auto'``, the exact approach is used

1575 if both samples contain equal to or less than 20 observations,

1576 otherwise the asymptotic distribution is used.

1577

1578 If the underlying distribution is not continuous, the p-value is likely to

1579 be conservative (Section 6.2 in [3]_). When ranking the data to compute

1580 the test statistic, midranks are used if there are ties.

1581

1582 References

1583 ----------

1584 .. [1] https://en.wikipedia.org/wiki/Cramer-von_Mises_criterion

1585 .. [2] Anderson, T.W. (1962). On the distribution of the two-sample

1586 Cramer-von-Mises criterion. The Annals of Mathematical

1587 Statistics, pp. 1148-1159.

1588 .. [3] Conover, W.J., Practical Nonparametric Statistics, 1971.

1589

1590 Examples

1591 --------

1592

1593 Suppose we wish to test whether two samples generated by

1594 ``scipy.stats.norm.rvs`` have the same distribution. We choose a

1595 significance level of alpha=0.05.

1596

1597 >>> import numpy as np

1598 >>> from scipy import stats

1599 >>> rng = np.random.default_rng()

1600 >>> x = stats.norm.rvs(size=100, random_state=rng)

1601 >>> y = stats.norm.rvs(size=70, random_state=rng)

1602 >>> res = stats.cramervonmises_2samp(x, y)

1603 >>> res.statistic, res.pvalue

1604 (0.29376470588235293, 0.1412873014573014)

1605

1606 The p-value exceeds our chosen significance level, so we do not

1607 reject the null hypothesis that the observed samples are drawn from the

1608 same distribution.

1609

1610 For small sample sizes, one can compute the exact p-values:

1611

1612 >>> x = stats.norm.rvs(size=7, random_state=rng)

1613 >>> y = stats.t.rvs(df=2, size=6, random_state=rng)

1614 >>> res = stats.cramervonmises_2samp(x, y, method='exact')

1615 >>> res.statistic, res.pvalue

1616 (0.197802197802198, 0.31643356643356646)

1617

1618 The p-value based on the asymptotic distribution is a good approximation

1619 even though the sample size is small.

1620

1621 >>> res = stats.cramervonmises_2samp(x, y, method='asymptotic')

1622 >>> res.statistic, res.pvalue

1623 (0.197802197802198, 0.2966041181527128)

1624

1625 Independent of the method, one would not reject the null hypothesis at the

1626 chosen significance level in this example.

1627

1628 """

1629 xa = np.sort(np.asarray(x))

1630 ya = np.sort(np.asarray(y))

1631

1632 if xa.size <= 1 or ya.size <= 1:

1633 raise ValueError('x and y must contain at least two observations.')

1634 if xa.ndim > 1 or ya.ndim > 1:

1635 raise ValueError('The samples must be one-dimensional.')

1636 if method not in ['auto', 'exact', 'asymptotic']:

1637 raise ValueError('method must be either auto, exact or asymptotic.')

1638

1639 nx = len(xa)

1640 ny = len(ya)

1641

1642 if method == 'auto':

1643 if max(nx, ny) > 20:

1644 method = 'asymptotic'

1645 else:

1646 method = 'exact'

1647

1648 # get ranks of x and y in the pooled sample

1649 z = np.concatenate([xa, ya])

1650 # in case of ties, use midrank (see [1])

1651 r = scipy.stats.rankdata(z, method='average')

1652 rx = r[:nx]

1653 ry = r[nx:]

1654

1655 # compute U (eq. 10 in [2])

1656 u = nx * np.sum((rx - np.arange(1, nx+1))**2)

1657 u += ny * np.sum((ry - np.arange(1, ny+1))**2)

1658

1659 # compute T (eq. 9 in [2])

1660 k, N = nx*ny, nx + ny

1661 t = u / (k*N) - (4*k - 1)/(6*N)

1662

1663 if method == 'exact':

1664 p = _pval_cvm_2samp_exact(u, nx, ny)

1665 else:

1666 # compute expected value and variance of T (eq. 11 and 14 in [2])

1667 et = (1 + 1/N)/6

1668 vt = (N+1) * (4*k*N - 3*(nx**2 + ny**2) - 2*k)

1669 vt = vt / (45 * N**2 * 4 * k)

1670

1671 # computed the normalized statistic (eq. 15 in [2])

1672 tn = 1/6 + (t - et) / np.sqrt(45 * vt)

1673

1674 # approximate distribution of tn with limiting distribution

1675 # of the one-sample test statistic

1676 # if tn < 0.003, the _cdf_cvm_inf(tn) < 1.28*1e-18, return 1.0 directly

1677 if tn < 0.003:

1678 p = 1.0

1679 else:

1680 p = max(0, 1. - _cdf_cvm_inf(tn))

1681

1682 return CramerVonMisesResult(statistic=t, pvalue=p)

1683

1684

1685class TukeyHSDResult:

1686 """Result of `scipy.stats.tukey_hsd`.

1687

1688 Attributes

1689 ----------

1690 statistic : float ndarray

1691 The computed statistic of the test for each comparison. The element

1692 at index ``(i, j)`` is the statistic for the comparison between groups

1693 ``i`` and ``j``.

1694 pvalue : float ndarray

1695 The associated p-value from the studentized range distribution. The

1696 element at index ``(i, j)`` is the p-value for the comparison

1697 between groups ``i`` and ``j``.

1698

1699 Notes

1700 -----

1701 The string representation of this object displays the most recently

1702 calculated confidence interval, and if none have been previously

1703 calculated, it will evaluate ``confidence_interval()``.

1704

1705 References

1706 ----------

1707 .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1. Tukey's

1708 Method."

1709 https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,

1710 28 November 2020.

1711 """

1712

1713 def __init__(self, statistic, pvalue, _nobs, _ntreatments, _stand_err):

1714 self.statistic = statistic

1715 self.pvalue = pvalue

1716 self._ntreatments = _ntreatments

1717 self._nobs = _nobs

1718 self._stand_err = _stand_err

1719 self._ci = None

1720 self._ci_cl = None

1721

1722 def __str__(self):

1723 # Note: `__str__` prints the confidence intervals from the most

1724 # recent call to `confidence_interval`. If it has not been called,

1725 # it will be called with the default CL of .95.

1726 if self._ci is None:

1727 self.confidence_interval(confidence_level=.95)

1728 s = ("Tukey's HSD Pairwise Group Comparisons"

1729 f" ({self._ci_cl*100:.1f}% Confidence Interval)\n")

1730 s += "Comparison Statistic p-value Lower CI Upper CI\n"

1731 for i in range(self.pvalue.shape[0]):

1732 for j in range(self.pvalue.shape[0]):

1733 if i != j:

1734 s += (f" ({i} - {j}) {self.statistic[i, j]:>10.3f}"

1735 f"{self.pvalue[i, j]:>10.3f}"

1736 f"{self._ci.low[i, j]:>10.3f}"

1737 f"{self._ci.high[i, j]:>10.3f}\n")

1738 return s

1739

1740 def confidence_interval(self, confidence_level=.95):

1741 """Compute the confidence interval for the specified confidence level.

1742

1743 Parameters

1744 ----------

1745 confidence_level : float, optional

1746 Confidence level for the computed confidence interval

1747 of the estimated proportion. Default is .95.

1748

1749 Returns

1750 -------

1751 ci : ``ConfidenceInterval`` object

1752 The object has attributes ``low`` and ``high`` that hold the

1753 lower and upper bounds of the confidence intervals for each

1754 comparison. The high and low values are accessible for each

1755 comparison at index ``(i, j)`` between groups ``i`` and ``j``.

1756

1757 References

1758 ----------

1759 .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1.

1760 Tukey's Method."

1761 https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,

1762 28 November 2020.

1763

1764 Examples

1765 --------

1766 >>> from scipy.stats import tukey_hsd

1767 >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]

1768 >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]

1769 >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]

1770 >>> result = tukey_hsd(group0, group1, group2)

1771 >>> ci = result.confidence_interval()

1772 >>> ci.low

1773 array([[-3.649159, -8.249159, -3.909159],

1774 [ 0.950841, -3.649159, 0.690841],

1775 [-3.389159, -7.989159, -3.649159]])

1776 >>> ci.high

1777 array([[ 3.649159, -0.950841, 3.389159],

1778 [ 8.249159, 3.649159, 7.989159],

1779 [ 3.909159, -0.690841, 3.649159]])

1780 """

1781 # check to see if the supplied confidence level matches that of the

1782 # previously computed CI.

1783 if (self._ci is not None and self._ci_cl is not None and

1784 confidence_level == self._ci_cl):

1785 return self._ci

1786

1787 if not 0 < confidence_level < 1:

1788 raise ValueError("Confidence level must be between 0 and 1.")

1789 # determine the critical value of the studentized range using the

1790 # appropriate confidence level, number of treatments, and degrees

1791 # of freedom as determined by the number of data less the number of

1792 # treatments. ("Confidence limits for Tukey's method")[1]. Note that

1793 # in the cases of unequal sample sizes there will be a criterion for

1794 # each group comparison.

1795 params = (confidence_level, self._nobs, self._ntreatments - self._nobs)

1796 srd = distributions.studentized_range.ppf(*params)

1797 # also called maximum critical value, the Tukey criterion is the

1798 # studentized range critical value * the square root of mean square

1799 # error over the sample size.

1800 tukey_criterion = srd * self._stand_err

1801 # the confidence levels are determined by the

1802 # `mean_differences` +- `tukey_criterion`

1803 upper_conf = self.statistic + tukey_criterion

1804 lower_conf = self.statistic - tukey_criterion

1805 self._ci = ConfidenceInterval(low=lower_conf, high=upper_conf)

1806 self._ci_cl = confidence_level

1807 return self._ci

1808

1809

1810def _tukey_hsd_iv(args):

1811 if (len(args)) < 2:

1812 raise ValueError("There must be more than 1 treatment.")

1813 args = [np.asarray(arg) for arg in args]

1814 for arg in args:

1815 if arg.ndim != 1:

1816 raise ValueError("Input samples must be one-dimensional.")

1817 if arg.size <= 1:

1818 raise ValueError("Input sample size must be greater than one.")

1819 if np.isinf(arg).any():

1820 raise ValueError("Input samples must be finite.")

1821 return args

1822

1823

1824def tukey_hsd(*args):

1825 """Perform Tukey's HSD test for equality of means over multiple treatments.

1826

1827 Tukey's honestly significant difference (HSD) test performs pairwise

1828 comparison of means for a set of samples. Whereas ANOVA (e.g. `f_oneway`)

1829 assesses whether the true means underlying each sample are identical,

1830 Tukey's HSD is a post hoc test used to compare the mean of each sample

1831 to the mean of each other sample.

1832

1833 The null hypothesis is that the distributions underlying the samples all

1834 have the same mean. The test statistic, which is computed for every

1835 possible pairing of samples, is simply the difference between the sample

1836 means. For each pair, the p-value is the probability under the null

1837 hypothesis (and other assumptions; see notes) of observing such an extreme

1838 value of the statistic, considering that many pairwise comparisons are

1839 being performed. Confidence intervals for the difference between each pair

1840 of means are also available.

1841

1842 Parameters

1843 ----------

1844 sample1, sample2, ... : array_like

1845 The sample measurements for each group. There must be at least

1846 two arguments.

1847

1848 Returns

1849 -------

1850 result : `~scipy.stats._result_classes.TukeyHSDResult` instance

1851 The return value is an object with the following attributes:

1852

1853 statistic : float ndarray

1854 The computed statistic of the test for each comparison. The element

1855 at index ``(i, j)`` is the statistic for the comparison between

1856 groups ``i`` and ``j``.

1857 pvalue : float ndarray

1858 The computed p-value of the test for each comparison. The element

1859 at index ``(i, j)`` is the p-value for the comparison between

1860 groups ``i`` and ``j``.

1861

1862 The object has the following methods:

1863

1864 confidence_interval(confidence_level=0.95):

1865 Compute the confidence interval for the specified confidence level.

1866

1867 Notes

1868 -----

1869 The use of this test relies on several assumptions.

1870

1871 1. The observations are independent within and among groups.

1872 2. The observations within each group are normally distributed.

1873 3. The distributions from which the samples are drawn have the same finite

1874 variance.

1875

1876 The original formulation of the test was for samples of equal size [6]_.

1877 In case of unequal sample sizes, the test uses the Tukey-Kramer method

1878 [4]_.

1879

1880 References

1881 ----------

1882 .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1. Tukey's

1883 Method."

1884 https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,

1885 28 November 2020.

1886 .. [2] Abdi, Herve & Williams, Lynne. (2021). "Tukey's Honestly Significant

1887 Difference (HSD) Test."

1888 https://personal.utdallas.edu/~herve/abdi-HSD2010-pretty.pdf

1889 .. [3] "One-Way ANOVA Using SAS PROC ANOVA & PROC GLM." SAS

1890 Tutorials, 2007, www.stattutorials.com/SAS/TUTORIAL-PROC-GLM.htm.

1891 .. [4] Kramer, Clyde Young. "Extension of Multiple Range Tests to Group

1892 Means with Unequal Numbers of Replications." Biometrics, vol. 12,

1893 no. 3, 1956, pp. 307-310. JSTOR, www.jstor.org/stable/3001469.

1894 Accessed 25 May 2021.

1895 .. [5] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.3.3.

1896 The ANOVA table and tests of hypotheses about means"

1897 https://www.itl.nist.gov/div898/handbook/prc/section4/prc433.htm,

1898 2 June 2021.

1899 .. [6] Tukey, John W. "Comparing Individual Means in the Analysis of

1900 Variance." Biometrics, vol. 5, no. 2, 1949, pp. 99-114. JSTOR,

1901 www.jstor.org/stable/3001913. Accessed 14 June 2021.

1902

1903

1904 Examples

1905 --------

1906 Here are some data comparing the time to relief of three brands of

1907 headache medicine, reported in minutes. Data adapted from [3]_.

1908

1909 >>> import numpy as np

1910 >>> from scipy.stats import tukey_hsd

1911 >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]

1912 >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]

1913 >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]

1914

1915 We would like to see if the means between any of the groups are

1916 significantly different. First, visually examine a box and whisker plot.

1917

1918 >>> import matplotlib.pyplot as plt

1919 >>> fig, ax = plt.subplots(1, 1)

1920 >>> ax.boxplot([group0, group1, group2])

1921 >>> ax.set_xticklabels(["group0", "group1", "group2"]) # doctest: +SKIP

1922 >>> ax.set_ylabel("mean") # doctest: +SKIP

1923 >>> plt.show()

1924

1925 From the box and whisker plot, we can see overlap in the interquartile

1926 ranges group 1 to group 2 and group 3, but we can apply the ``tukey_hsd``

1927 test to determine if the difference between means is significant. We

1928 set a significance level of .05 to reject the null hypothesis.

1929

1930 >>> res = tukey_hsd(group0, group1, group2)

1931 >>> print(res)

1932 Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)

1933 Comparison Statistic p-value Lower CI Upper CI

1934 (0 - 1) -4.600 0.014 -8.249 -0.951

1935 (0 - 2) -0.260 0.980 -3.909 3.389

1936 (1 - 0) 4.600 0.014 0.951 8.249

1937 (1 - 2) 4.340 0.020 0.691 7.989

1938 (2 - 0) 0.260 0.980 -3.389 3.909

1939 (2 - 1) -4.340 0.020 -7.989 -0.691

1940

1941 The null hypothesis is that each group has the same mean. The p-value for

1942 comparisons between ``group0`` and ``group1`` as well as ``group1`` and

1943 ``group2`` do not exceed .05, so we reject the null hypothesis that they

1944 have the same means. The p-value of the comparison between ``group0``

1945 and ``group2`` exceeds .05, so we accept the null hypothesis that there

1946 is not a significant difference between their means.

1947

1948 We can also compute the confidence interval associated with our chosen

1949 confidence level.

1950

1951 >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]

1952 >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]

1953 >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]

1954 >>> result = tukey_hsd(group0, group1, group2)

1955 >>> conf = res.confidence_interval(confidence_level=.99)

1956 >>> for ((i, j), l) in np.ndenumerate(conf.low):

1957 ... # filter out self comparisons

1958 ... if i != j:

1959 ... h = conf.high[i,j]

1960 ... print(f"({i} - {j}) {l:>6.3f} {h:>6.3f}")

1961 (0 - 1) -9.480 0.280

1962 (0 - 2) -5.140 4.620

1963 (1 - 0) -0.280 9.480

1964 (1 - 2) -0.540 9.220

1965 (2 - 0) -4.620 5.140

1966 (2 - 1) -9.220 0.540

1967 """

1968 args = _tukey_hsd_iv(args)

1969 ntreatments = len(args)

1970 means = np.asarray([np.mean(arg) for arg in args])

1971 nsamples_treatments = np.asarray([a.size for a in args])

1972 nobs = np.sum(nsamples_treatments)

1973

1974 # determine mean square error [5]. Note that this is sometimes called

1975 # mean square error within.

1976 mse = (np.sum([np.var(arg, ddof=1) for arg in args] *

1977 (nsamples_treatments - 1)) / (nobs - ntreatments))

1978

1979 # The calculation of the standard error differs when treatments differ in

1980 # size. See ("Unequal sample sizes")[1].

1981 if np.unique(nsamples_treatments).size == 1:

1982 # all input groups are the same length, so only one value needs to be

1983 # calculated [1].

1984 normalize = 2 / nsamples_treatments[0]

1985 else:

1986 # to compare groups of differing sizes, we must compute a variance

1987 # value for each individual comparison. Use broadcasting to get the

1988 # resulting matrix. [3], verified against [4] (page 308).

1989 normalize = 1 / nsamples_treatments + 1 / nsamples_treatments[None].T

1990

1991 # the standard error is used in the computation of the tukey criterion and

1992 # finding the p-values.

1993 stand_err = np.sqrt(normalize * mse / 2)

1994

1995 # the mean difference is the test statistic.

1996 mean_differences = means[None].T - means

1997

1998 # Calculate the t-statistic to use within the survival function of the

1999 # studentized range to get the p-value.

2000 t_stat = np.abs(mean_differences) / stand_err

2001

2002 params = t_stat, ntreatments, nobs - ntreatments

2003 pvalues = distributions.studentized_range.sf(*params)

2004

2005 return TukeyHSDResult(mean_differences, pvalues, ntreatments,

2006 nobs, stand_err)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_hypotests.py: 10%

467 statements