Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/

1import warnings

2import numpy as np

3from itertools import combinations, permutations, product

4import inspect

6from scipy._lib._util import check_random_state

7from scipy.special import ndtr, ndtri, comb, factorial

8from scipy._lib._util import rng_integers

9from dataclasses import make_dataclass

10from ._common import ConfidenceInterval

11from ._axis_nan_policy import _broadcast_concatenate, _broadcast_arrays

12from ._warnings_errors import DegenerateDataWarning

14__all__ = ['bootstrap', 'monte_carlo_test', 'permutation_test']

17def _vectorize_statistic(statistic):

18 """Vectorize an n-sample statistic"""

19 # This is a little cleaner than np.nditer at the expense of some data

20 # copying: concatenate samples together, then use np.apply_along_axis

21 def stat_nd(*data, axis=0):

22 lengths = [sample.shape[axis] for sample in data]

23 split_indices = np.cumsum(lengths)[:-1]

24 z = _broadcast_concatenate(data, axis)

26 # move working axis to position 0 so that new dimensions in the output

27 # of `statistic` are _prepended_. ("This axis is removed, and replaced

28 # with new dimensions...")

29 z = np.moveaxis(z, axis, 0)

31 def stat_1d(z):

32 data = np.split(z, split_indices)

33 return statistic(*data)

35 return np.apply_along_axis(stat_1d, 0, z)[()]

36 return stat_nd

39def _jackknife_resample(sample, batch=None):

40 """Jackknife resample the sample. Only one-sample stats for now."""

41 n = sample.shape[-1]

42 batch_nominal = batch or n

44 for k in range(0, n, batch_nominal):

45 # col_start:col_end are the observations to remove

46 batch_actual = min(batch_nominal, n-k)

48 # jackknife - each row leaves out one observation

49 j = np.ones((batch_actual, n), dtype=bool)

50 np.fill_diagonal(j[:, k:k+batch_actual], False)

51 i = np.arange(n)

52 i = np.broadcast_to(i, (batch_actual, n))

53 i = i[j].reshape((batch_actual, n-1))

55 resamples = sample[..., i]

56 yield resamples

59def _bootstrap_resample(sample, n_resamples=None, random_state=None):

60 """Bootstrap resample the sample."""

61 n = sample.shape[-1]

63 # bootstrap - each row is a random resample of original observations

64 i = rng_integers(random_state, 0, n, (n_resamples, n))

66 resamples = sample[..., i]

67 return resamples

70def _percentile_of_score(a, score, axis):

71 """Vectorized, simplified `scipy.stats.percentileofscore`.

72 Uses logic of the 'mean' value of percentileofscore's kind parameter.

74 Unlike `stats.percentileofscore`, the percentile returned is a fraction

75 in [0, 1].

76 """

77 B = a.shape[axis]

78 return ((a < score).sum(axis=axis) + (a <= score).sum(axis=axis)) / (2 * B)

81def _percentile_along_axis(theta_hat_b, alpha):

82 """`np.percentile` with different percentile for each slice."""

83 # the difference between _percentile_along_axis and np.percentile is that

84 # np.percentile gets _all_ the qs for each axis slice, whereas

85 # _percentile_along_axis gets the q corresponding with each axis slice

86 shape = theta_hat_b.shape[:-1]

87 alpha = np.broadcast_to(alpha, shape)

88 percentiles = np.zeros_like(alpha, dtype=np.float64)

89 for indices, alpha_i in np.ndenumerate(alpha):

90 if np.isnan(alpha_i):

91 # e.g. when bootstrap distribution has only one unique element

92 msg = (

93 "The BCa confidence interval cannot be calculated."

94 " This problem is known to occur when the distribution"

95 " is degenerate or the statistic is np.min."

96 )

97 warnings.warn(DegenerateDataWarning(msg))

98 percentiles[indices] = np.nan

99 else:

100 theta_hat_b_i = theta_hat_b[indices]

101 percentiles[indices] = np.percentile(theta_hat_b_i, alpha_i)

102 return percentiles[()] # return scalar instead of 0d array

103

104

105def _bca_interval(data, statistic, axis, alpha, theta_hat_b, batch):

106 """Bias-corrected and accelerated interval."""

107 # closely follows [1] 14.3 and 15.4 (Eq. 15.36)

108

109 # calculate z0_hat

110 theta_hat = np.asarray(statistic(*data, axis=axis))[..., None]

111 percentile = _percentile_of_score(theta_hat_b, theta_hat, axis=-1)

112 z0_hat = ndtri(percentile)

113

114 # calculate a_hat

115 theta_hat_ji = [] # j is for sample of data, i is for jackknife resample

116 for j, sample in enumerate(data):

117 # _jackknife_resample will add an axis prior to the last axis that

118 # corresponds with the different jackknife resamples. Do the same for

119 # each sample of the data to ensure broadcastability. We need to

120 # create a copy of the list containing the samples anyway, so do this

121 # in the loop to simplify the code. This is not the bottleneck...

122 samples = [np.expand_dims(sample, -2) for sample in data]

123 theta_hat_i = []

124 for jackknife_sample in _jackknife_resample(sample, batch):

125 samples[j] = jackknife_sample

126 broadcasted = _broadcast_arrays(samples, axis=-1)

127 theta_hat_i.append(statistic(*broadcasted, axis=-1))

128 theta_hat_ji.append(theta_hat_i)

129

130 theta_hat_ji = [np.concatenate(theta_hat_i, axis=-1)

131 for theta_hat_i in theta_hat_ji]

132

133 n_j = [theta_hat_i.shape[-1] for theta_hat_i in theta_hat_ji]

134

135 theta_hat_j_dot = [theta_hat_i.mean(axis=-1, keepdims=True)

136 for theta_hat_i in theta_hat_ji]

137

138 U_ji = [(n - 1) * (theta_hat_dot - theta_hat_i)

139 for theta_hat_dot, theta_hat_i, n

140 in zip(theta_hat_j_dot, theta_hat_ji, n_j)]

141

142 nums = [(U_i**3).sum(axis=-1)/n**3 for U_i, n in zip(U_ji, n_j)]

143 dens = [(U_i**2).sum(axis=-1)/n**2 for U_i, n in zip(U_ji, n_j)]

144 a_hat = 1/6 * sum(nums) / sum(dens)**(3/2)

145

146 # calculate alpha_1, alpha_2

147 z_alpha = ndtri(alpha)

148 z_1alpha = -z_alpha

149 num1 = z0_hat + z_alpha

150 alpha_1 = ndtr(z0_hat + num1/(1 - a_hat*num1))

151 num2 = z0_hat + z_1alpha

152 alpha_2 = ndtr(z0_hat + num2/(1 - a_hat*num2))

153 return alpha_1, alpha_2, a_hat # return a_hat for testing

154

155

156def _bootstrap_iv(data, statistic, vectorized, paired, axis, confidence_level,

157 n_resamples, batch, method, bootstrap_result, random_state):

158 """Input validation and standardization for `bootstrap`."""

159

160 if vectorized not in {True, False, None}:

161 raise ValueError("`vectorized` must be `True`, `False`, or `None`.")

162

163 if vectorized is None:

164 vectorized = 'axis' in inspect.signature(statistic).parameters

165

166 if not vectorized:

167 statistic = _vectorize_statistic(statistic)

168

169 axis_int = int(axis)

170 if axis != axis_int:

171 raise ValueError("`axis` must be an integer.")

172

173 n_samples = 0

174 try:

175 n_samples = len(data)

176 except TypeError:

177 raise ValueError("`data` must be a sequence of samples.")

178

179 if n_samples == 0:

180 raise ValueError("`data` must contain at least one sample.")

181

182 data_iv = []

183 for sample in data:

184 sample = np.atleast_1d(sample)

185 if sample.shape[axis_int] <= 1:

186 raise ValueError("each sample in `data` must contain two or more "

187 "observations along `axis`.")

188 sample = np.moveaxis(sample, axis_int, -1)

189 data_iv.append(sample)

190

191 if paired not in {True, False}:

192 raise ValueError("`paired` must be `True` or `False`.")

193

194 if paired:

195 n = data_iv[0].shape[-1]

196 for sample in data_iv[1:]:

197 if sample.shape[-1] != n:

198 message = ("When `paired is True`, all samples must have the "

199 "same length along `axis`")

200 raise ValueError(message)

201

202 # to generate the bootstrap distribution for paired-sample statistics,

203 # resample the indices of the observations

204 def statistic(i, axis=-1, data=data_iv, unpaired_statistic=statistic):

205 data = [sample[..., i] for sample in data]

206 return unpaired_statistic(*data, axis=axis)

207

208 data_iv = [np.arange(n)]

209

210 confidence_level_float = float(confidence_level)

211

212 n_resamples_int = int(n_resamples)

213 if n_resamples != n_resamples_int or n_resamples_int < 0:

214 raise ValueError("`n_resamples` must be a non-negative integer.")

215

216 if batch is None:

217 batch_iv = batch

218 else:

219 batch_iv = int(batch)

220 if batch != batch_iv or batch_iv <= 0:

221 raise ValueError("`batch` must be a positive integer or None.")

222

223 methods = {'percentile', 'basic', 'bca'}

224 method = method.lower()

225 if method not in methods:

226 raise ValueError(f"`method` must be in {methods}")

227

228 message = "`bootstrap_result` must have attribute `bootstrap_distribution'"

229 if (bootstrap_result is not None

230 and not hasattr(bootstrap_result, "bootstrap_distribution")):

231 raise ValueError(message)

232

233 message = ("Either `bootstrap_result.bootstrap_distribution.size` or "

234 "`n_resamples` must be positive.")

235 if ((not bootstrap_result or

236 not bootstrap_result.bootstrap_distribution.size)

237 and n_resamples_int == 0):

238 raise ValueError(message)

239

240 random_state = check_random_state(random_state)

241

242 return (data_iv, statistic, vectorized, paired, axis_int,

243 confidence_level_float, n_resamples_int, batch_iv,

244 method, bootstrap_result, random_state)

245

246

247fields = ['confidence_interval', 'bootstrap_distribution', 'standard_error']

248BootstrapResult = make_dataclass("BootstrapResult", fields)

249

250

251def bootstrap(data, statistic, *, n_resamples=9999, batch=None,

252 vectorized=None, paired=False, axis=0, confidence_level=0.95,

253 method='BCa', bootstrap_result=None, random_state=None):

254 r"""

255 Compute a two-sided bootstrap confidence interval of a statistic.

256

257 When `method` is ``'percentile'``, a bootstrap confidence interval is

258 computed according to the following procedure.

259

260 1. Resample the data: for each sample in `data` and for each of

261 `n_resamples`, take a random sample of the original sample

262 (with replacement) of the same size as the original sample.

263

264 2. Compute the bootstrap distribution of the statistic: for each set of

265 resamples, compute the test statistic.

266

267 3. Determine the confidence interval: find the interval of the bootstrap

268 distribution that is

269

270 - symmetric about the median and

271 - contains `confidence_level` of the resampled statistic values.

272

273 While the ``'percentile'`` method is the most intuitive, it is rarely

274 used in practice. Two more common methods are available, ``'basic'``

275 ('reverse percentile') and ``'BCa'`` ('bias-corrected and accelerated');

276 they differ in how step 3 is performed.

277

278 If the samples in `data` are taken at random from their respective

279 distributions :math:`n` times, the confidence interval returned by

280 `bootstrap` will contain the true value of the statistic for those

281 distributions approximately `confidence_level`:math:`\, \times \, n` times.

282

283 Parameters

284 ----------

285 data : sequence of array-like

286 Each element of data is a sample from an underlying distribution.

287 statistic : callable

288 Statistic for which the confidence interval is to be calculated.

289 `statistic` must be a callable that accepts ``len(data)`` samples

290 as separate arguments and returns the resulting statistic.

291 If `vectorized` is set ``True``,

292 `statistic` must also accept a keyword argument `axis` and be

293 vectorized to compute the statistic along the provided `axis`.

294 n_resamples : int, default: ``9999``

295 The number of resamples performed to form the bootstrap distribution

296 of the statistic.

297 batch : int, optional

298 The number of resamples to process in each vectorized call to

299 `statistic`. Memory usage is O(`batch`*``n``), where ``n`` is the

300 sample size. Default is ``None``, in which case ``batch = n_resamples``

301 (or ``batch = max(n_resamples, n)`` for ``method='BCa'``).

302 vectorized : bool, optional

303 If `vectorized` is set ``False``, `statistic` will not be passed

304 keyword argument `axis` and is expected to calculate the statistic

305 only for 1D samples. If ``True``, `statistic` will be passed keyword

306 argument `axis` and is expected to calculate the statistic along `axis`

307 when passed an ND sample array. If ``None`` (default), `vectorized`

308 will be set ``True`` if ``axis`` is a parameter of `statistic`. Use of

309 a vectorized statistic typically reduces computation time.

310 paired : bool, default: ``False``

311 Whether the statistic treats corresponding elements of the samples

312 in `data` as paired.

313 axis : int, default: ``0``

314 The axis of the samples in `data` along which the `statistic` is

315 calculated.

316 confidence_level : float, default: ``0.95``

317 The confidence level of the confidence interval.

318 method : {'percentile', 'basic', 'bca'}, default: ``'BCa'``

319 Whether to return the 'percentile' bootstrap confidence interval

320 (``'percentile'``), the 'basic' (AKA 'reverse') bootstrap confidence

321 interval (``'basic'``), or the bias-corrected and accelerated bootstrap

322 confidence interval (``'BCa'``).

323 bootstrap_result : BootstrapResult, optional

324 Provide the result object returned by a previous call to `bootstrap`

325 to include the previous bootstrap distribution in the new bootstrap

326 distribution. This can be used, for example, to change

327 `confidence_level`, change `method`, or see the effect of performing

328 additional resampling without repeating computations.

329 random_state : {None, int, `numpy.random.Generator`,

330 `numpy.random.RandomState`}, optional

331

332 Pseudorandom number generator state used to generate resamples.

333

334 If `random_state` is ``None`` (or `np.random`), the

335 `numpy.random.RandomState` singleton is used.

336 If `random_state` is an int, a new ``RandomState`` instance is used,

337 seeded with `random_state`.

338 If `random_state` is already a ``Generator`` or ``RandomState``

339 instance then that instance is used.

340

341 Returns

342 -------

343 res : BootstrapResult

344 An object with attributes:

345

346 confidence_interval : ConfidenceInterval

347 The bootstrap confidence interval as an instance of

348 `collections.namedtuple` with attributes `low` and `high`.

349 bootstrap_distribution : ndarray

350 The bootstrap distribution, that is, the value of `statistic` for

351 each resample. The last dimension corresponds with the resamples

352 (e.g. ``res.bootstrap_distribution.shape[-1] == n_resamples``).

353 standard_error : float or ndarray

354 The bootstrap standard error, that is, the sample standard

355 deviation of the bootstrap distribution.

356

357 Warns

358 -----

359 `~scipy.stats.DegenerateDataWarning`

360 Generated when ``method='BCa'`` and the bootstrap distribution is

361 degenerate (e.g. all elements are identical).

362

363 Notes

364 -----

365 Elements of the confidence interval may be NaN for ``method='BCa'`` if

366 the bootstrap distribution is degenerate (e.g. all elements are identical).

367 In this case, consider using another `method` or inspecting `data` for

368 indications that other analysis may be more appropriate (e.g. all

369 observations are identical).

370

371 References

372 ----------

373 .. [1] B. Efron and R. J. Tibshirani, An Introduction to the Bootstrap,

374 Chapman & Hall/CRC, Boca Raton, FL, USA (1993)

375 .. [2] Nathaniel E. Helwig, "Bootstrap Confidence Intervals",

376 http://users.stat.umn.edu/~helwig/notes/bootci-Notes.pdf

377 .. [3] Bootstrapping (statistics), Wikipedia,

378 https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29

379

380 Examples

381 --------

382 Suppose we have sampled data from an unknown distribution.

383

384 >>> import numpy as np

385 >>> rng = np.random.default_rng()

386 >>> from scipy.stats import norm

387 >>> dist = norm(loc=2, scale=4) # our "unknown" distribution

388 >>> data = dist.rvs(size=100, random_state=rng)

389

390 We are interested in the standard deviation of the distribution.

391

392 >>> std_true = dist.std() # the true value of the statistic

393 >>> print(std_true)

394 4.0

395 >>> std_sample = np.std(data) # the sample statistic

396 >>> print(std_sample)

397 3.9460644295563863

398

399 The bootstrap is used to approximate the variability we would expect if we

400 were to repeatedly sample from the unknown distribution and calculate the

401 statistic of the sample each time. It does this by repeatedly resampling

402 values *from the original sample* with replacement and calculating the

403 statistic of each resample. This results in a "bootstrap distribution" of

404 the statistic.

405

406 >>> import matplotlib.pyplot as plt

407 >>> from scipy.stats import bootstrap

408 >>> data = (data,) # samples must be in a sequence

409 >>> res = bootstrap(data, np.std, confidence_level=0.9,

410 ... random_state=rng)

411 >>> fig, ax = plt.subplots()

412 >>> ax.hist(res.bootstrap_distribution, bins=25)

413 >>> ax.set_title('Bootstrap Distribution')

414 >>> ax.set_xlabel('statistic value')

415 >>> ax.set_ylabel('frequency')

416 >>> plt.show()

417

418 The standard error quantifies this variability. It is calculated as the

419 standard deviation of the bootstrap distribution.

420

421 >>> res.standard_error

422 0.24427002125829136

423 >>> res.standard_error == np.std(res.bootstrap_distribution, ddof=1)

424 True

425

426 The bootstrap distribution of the statistic is often approximately normal

427 with scale equal to the standard error.

428

429 >>> x = np.linspace(3, 5)

430 >>> pdf = norm.pdf(x, loc=std_sample, scale=res.standard_error)

431 >>> fig, ax = plt.subplots()

432 >>> ax.hist(res.bootstrap_distribution, bins=25, density=True)

433 >>> ax.plot(x, pdf)

434 >>> ax.set_title('Normal Approximation of the Bootstrap Distribution')

435 >>> ax.set_xlabel('statistic value')

436 >>> ax.set_ylabel('pdf')

437 >>> plt.show()

438

439 This suggests that we could construct a 90% confidence interval on the

440 statistic based on quantiles of this normal distribution.

441

442 >>> norm.interval(0.9, loc=std_sample, scale=res.standard_error)

443 (3.5442759991341726, 4.3478528599786)

444

445 Due to central limit theorem, this normal approximation is accurate for a

446 variety of statistics and distributions underlying the samples; however,

447 the approximation is not reliable in all cases. Because `bootstrap` is

448 designed to work with arbitrary underlying distributions and statistics,

449 it uses more advanced techniques to generate an accurate confidence

450 interval.

451

452 >>> print(res.confidence_interval)

453 ConfidenceInterval(low=3.57655333533867, high=4.382043696342881)

454

455 If we sample from the original distribution 1000 times and form a bootstrap

456 confidence interval for each sample, the confidence interval

457 contains the true value of the statistic approximately 90% of the time.

458

459 >>> n_trials = 1000

460 >>> ci_contains_true_std = 0

461 >>> for i in range(n_trials):

462 ... data = (dist.rvs(size=100, random_state=rng),)

463 ... ci = bootstrap(data, np.std, confidence_level=0.9, n_resamples=1000,

464 ... random_state=rng).confidence_interval

465 ... if ci[0] < std_true < ci[1]:

466 ... ci_contains_true_std += 1

467 >>> print(ci_contains_true_std)

468 875

469

470 Rather than writing a loop, we can also determine the confidence intervals

471 for all 1000 samples at once.

472

473 >>> data = (dist.rvs(size=(n_trials, 100), random_state=rng),)

474 >>> res = bootstrap(data, np.std, axis=-1, confidence_level=0.9,

475 ... n_resamples=1000, random_state=rng)

476 >>> ci_l, ci_u = res.confidence_interval

477

478 Here, `ci_l` and `ci_u` contain the confidence interval for each of the

479 ``n_trials = 1000`` samples.

480

481 >>> print(ci_l[995:])

482 [3.77729695 3.75090233 3.45829131 3.34078217 3.48072829]

483 >>> print(ci_u[995:])

484 [4.88316666 4.86924034 4.32032996 4.2822427 4.59360598]

485

486 And again, approximately 90% contain the true value, ``std_true = 4``.

487

488 >>> print(np.sum((ci_l < std_true) & (std_true < ci_u)))

489 900

490

491 `bootstrap` can also be used to estimate confidence intervals of

492 multi-sample statistics, including those calculated by hypothesis

493 tests. `scipy.stats.mood` perform's Mood's test for equal scale parameters,

494 and it returns two outputs: a statistic, and a p-value. To get a

495 confidence interval for the test statistic, we first wrap

496 `scipy.stats.mood` in a function that accepts two sample arguments,

497 accepts an `axis` keyword argument, and returns only the statistic.

498

499 >>> from scipy.stats import mood

500 >>> def my_statistic(sample1, sample2, axis):

501 ... statistic, _ = mood(sample1, sample2, axis=-1)

502 ... return statistic

503

504 Here, we use the 'percentile' method with the default 95% confidence level.

505

506 >>> sample1 = norm.rvs(scale=1, size=100, random_state=rng)

507 >>> sample2 = norm.rvs(scale=2, size=100, random_state=rng)

508 >>> data = (sample1, sample2)

509 >>> res = bootstrap(data, my_statistic, method='basic', random_state=rng)

510 >>> print(mood(sample1, sample2)[0]) # element 0 is the statistic

511 -5.521109549096542

512 >>> print(res.confidence_interval)

513 ConfidenceInterval(low=-7.255994487314675, high=-4.016202624747605)

514

515 The bootstrap estimate of the standard error is also available.

516

517 >>> print(res.standard_error)

518 0.8344963846318795

519

520 Paired-sample statistics work, too. For example, consider the Pearson

521 correlation coefficient.

522

523 >>> from scipy.stats import pearsonr

524 >>> n = 100

525 >>> x = np.linspace(0, 10, n)

526 >>> y = x + rng.uniform(size=n)

527 >>> print(pearsonr(x, y)[0]) # element 0 is the statistic

528 0.9962357936065914

529

530 We wrap `pearsonr` so that it returns only the statistic.

531

532 >>> def my_statistic(x, y):

533 ... return pearsonr(x, y)[0]

534

535 We call `bootstrap` using ``paired=True``.

536 Also, since ``my_statistic`` isn't vectorized to calculate the statistic

537 along a given axis, we pass in ``vectorized=False``.

538

539 >>> res = bootstrap((x, y), my_statistic, vectorized=False, paired=True,

540 ... random_state=rng)

541 >>> print(res.confidence_interval)

542 ConfidenceInterval(low=0.9950085825848624, high=0.9971212407917498)

543

544 The result object can be passed back into `bootstrap` to perform additional

545 resampling:

546

547 >>> len(res.bootstrap_distribution)

548 9999

549 >>> res = bootstrap((x, y), my_statistic, vectorized=False, paired=True,

550 ... n_resamples=1001, random_state=rng,

551 ... bootstrap_result=res)

552 >>> len(res.bootstrap_distribution)

553 11000

554

555 or to change the confidence interval options:

556

557 >>> res2 = bootstrap((x, y), my_statistic, vectorized=False, paired=True,

558 ... n_resamples=0, random_state=rng, bootstrap_result=res,

559 ... method='percentile', confidence_level=0.9)

560 >>> np.testing.assert_equal(res2.bootstrap_distribution,

561 ... res.bootstrap_distribution)

562 >>> res.confidence_interval

563 ConfidenceInterval(low=0.9950035351407804, high=0.9971170323404578)

564

565 without repeating computation of the original bootstrap distribution.

566

567 """

568 # Input validation

569 args = _bootstrap_iv(data, statistic, vectorized, paired, axis,

570 confidence_level, n_resamples, batch, method,

571 bootstrap_result, random_state)

572 data, statistic, vectorized, paired, axis, confidence_level = args[:6]

573 n_resamples, batch, method, bootstrap_result, random_state = args[6:]

574

575 theta_hat_b = ([] if bootstrap_result is None

576 else [bootstrap_result.bootstrap_distribution])

577

578 batch_nominal = batch or n_resamples or 1

579

580 for k in range(0, n_resamples, batch_nominal):

581 batch_actual = min(batch_nominal, n_resamples-k)

582 # Generate resamples

583 resampled_data = []

584 for sample in data:

585 resample = _bootstrap_resample(sample, n_resamples=batch_actual,

586 random_state=random_state)

587 resampled_data.append(resample)

588

589 # Compute bootstrap distribution of statistic

590 theta_hat_b.append(statistic(*resampled_data, axis=-1))

591 theta_hat_b = np.concatenate(theta_hat_b, axis=-1)

592

593 # Calculate percentile interval

594 alpha = (1 - confidence_level)/2

595 if method == 'bca':

596 interval = _bca_interval(data, statistic, axis=-1, alpha=alpha,

597 theta_hat_b=theta_hat_b, batch=batch)[:2]

598 percentile_fun = _percentile_along_axis

599 else:

600 interval = alpha, 1-alpha

601

602 def percentile_fun(a, q):

603 return np.percentile(a=a, q=q, axis=-1)

604

605 # Calculate confidence interval of statistic

606 ci_l = percentile_fun(theta_hat_b, interval[0]*100)

607 ci_u = percentile_fun(theta_hat_b, interval[1]*100)

608 if method == 'basic': # see [3]

609 theta_hat = statistic(*data, axis=-1)

610 ci_l, ci_u = 2*theta_hat - ci_u, 2*theta_hat - ci_l

611

612 return BootstrapResult(confidence_interval=ConfidenceInterval(ci_l, ci_u),

613 bootstrap_distribution=theta_hat_b,

614 standard_error=np.std(theta_hat_b, ddof=1, axis=-1))

615

616

617def _monte_carlo_test_iv(sample, rvs, statistic, vectorized, n_resamples,

618 batch, alternative, axis):

619 """Input validation for `monte_carlo_test`."""

620

621 axis_int = int(axis)

622 if axis != axis_int:

623 raise ValueError("`axis` must be an integer.")

624

625 if vectorized not in {True, False, None}:

626 raise ValueError("`vectorized` must be `True`, `False`, or `None`.")

627

628 if not callable(rvs):

629 raise TypeError("`rvs` must be callable.")

630

631 if not callable(statistic):

632 raise TypeError("`statistic` must be callable.")

633

634 if vectorized is None:

635 vectorized = 'axis' in inspect.signature(statistic).parameters

636

637 if not vectorized:

638 statistic_vectorized = _vectorize_statistic(statistic)

639 else:

640 statistic_vectorized = statistic

641

642 sample = np.atleast_1d(sample)

643 sample = np.moveaxis(sample, axis, -1)

644

645 n_resamples_int = int(n_resamples)

646 if n_resamples != n_resamples_int or n_resamples_int <= 0:

647 raise ValueError("`n_resamples` must be a positive integer.")

648

649 if batch is None:

650 batch_iv = batch

651 else:

652 batch_iv = int(batch)

653 if batch != batch_iv or batch_iv <= 0:

654 raise ValueError("`batch` must be a positive integer or None.")

655

656 alternatives = {'two-sided', 'greater', 'less'}

657 alternative = alternative.lower()

658 if alternative not in alternatives:

659 raise ValueError(f"`alternative` must be in {alternatives}")

660

661 return (sample, rvs, statistic_vectorized, vectorized, n_resamples_int,

662 batch_iv, alternative, axis_int)

663

664

665fields = ['statistic', 'pvalue', 'null_distribution']

666MonteCarloTestResult = make_dataclass("MonteCarloTestResult", fields)

667

668

669def monte_carlo_test(sample, rvs, statistic, *, vectorized=None,

670 n_resamples=9999, batch=None, alternative="two-sided",

671 axis=0):

672 r"""

673 Monte Carlo test that a sample is drawn from a given distribution.

674

675 The null hypothesis is that the provided `sample` was drawn at random from

676 the distribution for which `rvs` generates random variates. The value of

677 the `statistic` for the given sample is compared against a Monte Carlo null

678 distribution: the value of the statistic for each of `n_resamples`

679 samples generated by `rvs`. This gives the p-value, the probability of

680 observing such an extreme value of the test statistic under the null

681 hypothesis.

682

683 Parameters

684 ----------

685 sample : array-like

686 An array of observations.

687 rvs : callable

688 Generates random variates from the distribution against which `sample`

689 will be tested. `rvs` must be a callable that accepts keyword argument

690 ``size`` (e.g. ``rvs(size=(m, n))``) and returns an N-d array sample

691 of that shape.

692 statistic : callable

693 Statistic for which the p-value of the hypothesis test is to be

694 calculated. `statistic` must be a callable that accepts a sample

695 (e.g. ``statistic(sample)``) and returns the resulting statistic.

696 If `vectorized` is set ``True``, `statistic` must also accept a keyword

697 argument `axis` and be vectorized to compute the statistic along the

698 provided `axis` of the sample array.

699 vectorized : bool, optional

700 If `vectorized` is set ``False``, `statistic` will not be passed

701 keyword argument `axis` and is expected to calculate the statistic

702 only for 1D samples. If ``True``, `statistic` will be passed keyword

703 argument `axis` and is expected to calculate the statistic along `axis`

704 when passed an ND sample array. If ``None`` (default), `vectorized`

705 will be set ``True`` if ``axis`` is a parameter of `statistic`. Use of

706 a vectorized statistic typically reduces computation time.

707 n_resamples : int, default: 9999

708 Number of random permutations used to approximate the Monte Carlo null

709 distribution.

710 batch : int, optional

711 The number of permutations to process in each call to `statistic`.

712 Memory usage is O(`batch`*``sample.size[axis]``). Default is

713 ``None``, in which case `batch` equals `n_resamples`.

714 alternative : {'two-sided', 'less', 'greater'}

715 The alternative hypothesis for which the p-value is calculated.

716 For each alternative, the p-value is defined as follows.

717

718 - ``'greater'`` : the percentage of the null distribution that is

719 greater than or equal to the observed value of the test statistic.

720 - ``'less'`` : the percentage of the null distribution that is

721 less than or equal to the observed value of the test statistic.

722 - ``'two-sided'`` : twice the smaller of the p-values above.

723

724 axis : int, default: 0

725 The axis of `sample` over which to calculate the statistic.

726

727 Returns

728 -------

729 statistic : float or ndarray

730 The observed test statistic of the sample.

731 pvalue : float or ndarray

732 The p-value for the given alternative.

733 null_distribution : ndarray

734 The values of the test statistic generated under the null hypothesis.

735

736 References

737 ----------

738

739 .. [1] B. Phipson and G. K. Smyth. "Permutation P-values Should Never Be

740 Zero: Calculating Exact P-values When Permutations Are Randomly Drawn."

741 Statistical Applications in Genetics and Molecular Biology 9.1 (2010).

742

743 Examples

744 --------

745

746 Suppose we wish to test whether a small sample has been drawn from a normal

747 distribution. We decide that we will use the skew of the sample as a

748 test statistic, and we will consider a p-value of 0.05 to be statistically

749 significant.

750

751 >>> import numpy as np

752 >>> from scipy import stats

753 >>> def statistic(x, axis):

754 ... return stats.skew(x, axis)

755

756 After collecting our data, we calculate the observed value of the test

757 statistic.

758

759 >>> rng = np.random.default_rng()

760 >>> x = stats.skewnorm.rvs(a=1, size=50, random_state=rng)

761 >>> statistic(x, axis=0)

762 0.12457412450240658

763

764 To determine the probability of observing such an extreme value of the

765 skewness by chance if the sample were drawn from the normal distribution,

766 we can perform a Monte Carlo hypothesis test. The test will draw many

767 samples at random from their normal distribution, calculate the skewness

768 of each sample, and compare our original skewness against this

769 distribution to determine an approximate p-value.

770

771 >>> from scipy.stats import monte_carlo_test

772 >>> # because our statistic is vectorized, we pass `vectorized=True`

773 >>> rvs = lambda size: stats.norm.rvs(size=size, random_state=rng)

774 >>> res = monte_carlo_test(x, rvs, statistic, vectorized=True)

775 >>> print(res.statistic)

776 0.12457412450240658

777 >>> print(res.pvalue)

778 0.7012

779

780 The probability of obtaining a test statistic less than or equal to the

781 observed value under the null hypothesis is ~70%. This is greater than

782 our chosen threshold of 5%, so we cannot consider this to be significant

783 evidence against the null hypothesis.

784

785 Note that this p-value essentially matches that of

786 `scipy.stats.skewtest`, which relies on an asymptotic distribution of a

787 test statistic based on the sample skewness.

788

789 >>> stats.skewtest(x).pvalue

790 0.6892046027110614

791

792 This asymptotic approximation is not valid for small sample sizes, but

793 `monte_carlo_test` can be used with samples of any size.

794

795 >>> x = stats.skewnorm.rvs(a=1, size=7, random_state=rng)

796 >>> # stats.skewtest(x) would produce an error due to small sample

797 >>> res = monte_carlo_test(x, rvs, statistic, vectorized=True)

798

799 The Monte Carlo distribution of the test statistic is provided for

800 further investigation.

801

802 >>> import matplotlib.pyplot as plt

803 >>> fig, ax = plt.subplots()

804 >>> ax.hist(res.null_distribution, bins=50)

805 >>> ax.set_title("Monte Carlo distribution of test statistic")

806 >>> ax.set_xlabel("Value of Statistic")

807 >>> ax.set_ylabel("Frequency")

808 >>> plt.show()

809

810 """

811 args = _monte_carlo_test_iv(sample, rvs, statistic, vectorized,

812 n_resamples, batch, alternative, axis)

813 (sample, rvs, statistic, vectorized,

814 n_resamples, batch, alternative, axis) = args

815

816 # Some statistics return plain floats; ensure they're at least np.float64

817 observed = np.asarray(statistic(sample, axis=-1))[()]

818

819 n_observations = sample.shape[-1]

820 batch_nominal = batch or n_resamples

821 null_distribution = []

822 for k in range(0, n_resamples, batch_nominal):

823 batch_actual = min(batch_nominal, n_resamples-k)

824 resamples = rvs(size=(batch_actual, n_observations))

825 null_distribution.append(statistic(resamples, axis=-1))

826 null_distribution = np.concatenate(null_distribution)

827 null_distribution = null_distribution.reshape([-1] + [1]*observed.ndim)

828

829 def less(null_distribution, observed):

830 cmps = null_distribution <= observed

831 pvalues = (cmps.sum(axis=0) + 1) / (n_resamples + 1) # see [1]

832 return pvalues

833

834 def greater(null_distribution, observed):

835 cmps = null_distribution >= observed

836 pvalues = (cmps.sum(axis=0) + 1) / (n_resamples + 1) # see [1]

837 return pvalues

838

839 def two_sided(null_distribution, observed):

840 pvalues_less = less(null_distribution, observed)

841 pvalues_greater = greater(null_distribution, observed)

842 pvalues = np.minimum(pvalues_less, pvalues_greater) * 2

843 return pvalues

844

845 compare = {"less": less,

846 "greater": greater,

847 "two-sided": two_sided}

848

849 pvalues = compare[alternative](null_distribution, observed)

850 pvalues = np.clip(pvalues, 0, 1)

851

852 return MonteCarloTestResult(observed, pvalues, null_distribution)

853

854

855attributes = ('statistic', 'pvalue', 'null_distribution')

856PermutationTestResult = make_dataclass('PermutationTestResult', attributes)

857

858

859def _all_partitions_concatenated(ns):

860 """

861 Generate all partitions of indices of groups of given sizes, concatenated

862

863 `ns` is an iterable of ints.

864 """

865 def all_partitions(z, n):

866 for c in combinations(z, n):

867 x0 = set(c)

868 x1 = z - x0

869 yield [x0, x1]

870

871 def all_partitions_n(z, ns):

872 if len(ns) == 0:

873 yield [z]

874 return

875 for c in all_partitions(z, ns[0]):

876 for d in all_partitions_n(c[1], ns[1:]):

877 yield c[0:1] + d

878

879 z = set(range(np.sum(ns)))

880 for partitioning in all_partitions_n(z, ns[:]):

881 x = np.concatenate([list(partition)

882 for partition in partitioning]).astype(int)

883 yield x

884

885

886def _batch_generator(iterable, batch):

887 """A generator that yields batches of elements from an iterable"""

888 iterator = iter(iterable)

889 if batch <= 0:

890 raise ValueError("`batch` must be positive.")

891 z = [item for i, item in zip(range(batch), iterator)]

892 while z: # we don't want StopIteration without yielding an empty list

893 yield z

894 z = [item for i, item in zip(range(batch), iterator)]

895

896

897def _pairings_permutations_gen(n_permutations, n_samples, n_obs_sample, batch,

898 random_state):

899 # Returns a generator that yields arrays of size

900 # `(batch, n_samples, n_obs_sample)`.

901 # Each row is an independent permutation of indices 0 to `n_obs_sample`.

902 batch = min(batch, n_permutations)

903

904 if hasattr(random_state, 'permuted'):

905 def batched_perm_generator():

906 indices = np.arange(n_obs_sample)

907 indices = np.tile(indices, (batch, n_samples, 1))

908 for k in range(0, n_permutations, batch):

909 batch_actual = min(batch, n_permutations-k)

910 # Don't permute in place, otherwise results depend on `batch`

911 permuted_indices = random_state.permuted(indices, axis=-1)

912 yield permuted_indices[:batch_actual]

913 else: # RandomState and early Generators don't have `permuted`

914 def batched_perm_generator():

915 for k in range(0, n_permutations, batch):

916 batch_actual = min(batch, n_permutations-k)

917 size = (batch_actual, n_samples, n_obs_sample)

918 x = random_state.random(size=size)

919 yield np.argsort(x, axis=-1)[:batch_actual]

920

921 return batched_perm_generator()

922

923def _calculate_null_both(data, statistic, n_permutations, batch,

924 random_state=None):

925 """

926 Calculate null distribution for independent sample tests.

927 """

928 n_samples = len(data)

929

930 # compute number of permutations

931 # (distinct partitions of data into samples of these sizes)

932 n_obs_i = [sample.shape[-1] for sample in data] # observations per sample

933 n_obs_ic = np.cumsum(n_obs_i)

934 n_obs = n_obs_ic[-1] # total number of observations

935 n_max = np.prod([comb(n_obs_ic[i], n_obs_ic[i-1])

936 for i in range(n_samples-1, 0, -1)])

937

938 # perm_generator is an iterator that produces permutations of indices

939 # from 0 to n_obs. We'll concatenate the samples, use these indices to

940 # permute the data, then split the samples apart again.

941 if n_permutations >= n_max:

942 exact_test = True

943 n_permutations = n_max

944 perm_generator = _all_partitions_concatenated(n_obs_i)

945 else:

946 exact_test = False

947 # Neither RandomState.permutation nor Generator.permutation

948 # can permute axis-slices independently. If this feature is

949 # added in the future, batches of the desired size should be

950 # generated in a single call.

951 perm_generator = (random_state.permutation(n_obs)

952 for i in range(n_permutations))

953

954 batch = batch or int(n_permutations)

955 null_distribution = []

956

957 # First, concatenate all the samples. In batches, permute samples with

958 # indices produced by the `perm_generator`, split them into new samples of

959 # the original sizes, compute the statistic for each batch, and add these

960 # statistic values to the null distribution.

961 data = np.concatenate(data, axis=-1)

962 for indices in _batch_generator(perm_generator, batch=batch):

963 indices = np.array(indices)

964

965 # `indices` is 2D: each row is a permutation of the indices.

966 # We use it to index `data` along its last axis, which corresponds

967 # with observations.

968 # After indexing, the second to last axis of `data_batch` corresponds

969 # with permutations, and the last axis corresponds with observations.

970 data_batch = data[..., indices]

971

972 # Move the permutation axis to the front: we'll concatenate a list

973 # of batched statistic values along this zeroth axis to form the

974 # null distribution.

975 data_batch = np.moveaxis(data_batch, -2, 0)

976 data_batch = np.split(data_batch, n_obs_ic[:-1], axis=-1)

977 null_distribution.append(statistic(*data_batch, axis=-1))

978 null_distribution = np.concatenate(null_distribution, axis=0)

979

980 return null_distribution, n_permutations, exact_test

981

982

983def _calculate_null_pairings(data, statistic, n_permutations, batch,

984 random_state=None):

985 """

986 Calculate null distribution for association tests.

987 """

988 n_samples = len(data)

989

990 # compute number of permutations (factorial(n) permutations of each sample)

991 n_obs_sample = data[0].shape[-1] # observations per sample; same for each

992 n_max = factorial(n_obs_sample)**n_samples

993

994 # `perm_generator` is an iterator that produces a list of permutations of

995 # indices from 0 to n_obs_sample, one for each sample.

996 if n_permutations >= n_max:

997 exact_test = True

998 n_permutations = n_max

999 batch = batch or int(n_permutations)

1000 # cartesian product of the sets of all permutations of indices

1001 perm_generator = product(*(permutations(range(n_obs_sample))

1002 for i in range(n_samples)))

1003 batched_perm_generator = _batch_generator(perm_generator, batch=batch)

1004 else:

1005 exact_test = False

1006 batch = batch or int(n_permutations)

1007 # Separate random permutations of indices for each sample.

1008 # Again, it would be nice if RandomState/Generator.permutation

1009 # could permute each axis-slice separately.

1010 args = n_permutations, n_samples, n_obs_sample, batch, random_state

1011 batched_perm_generator = _pairings_permutations_gen(*args)

1012

1013 null_distribution = []

1014

1015 for indices in batched_perm_generator:

1016 indices = np.array(indices)

1017

1018 # `indices` is 3D: the zeroth axis is for permutations, the next is

1019 # for samples, and the last is for observations. Swap the first two

1020 # to make the zeroth axis correspond with samples, as it does for

1021 # `data`.

1022 indices = np.swapaxes(indices, 0, 1)

1023

1024 # When we're done, `data_batch` will be a list of length `n_samples`.

1025 # Each element will be a batch of random permutations of one sample.

1026 # The zeroth axis of each batch will correspond with permutations,

1027 # and the last will correspond with observations. (This makes it

1028 # easy to pass into `statistic`.)

1029 data_batch = [None]*n_samples

1030 for i in range(n_samples):

1031 data_batch[i] = data[i][..., indices[i]]

1032 data_batch[i] = np.moveaxis(data_batch[i], -2, 0)

1033

1034 null_distribution.append(statistic(*data_batch, axis=-1))

1035 null_distribution = np.concatenate(null_distribution, axis=0)

1036

1037 return null_distribution, n_permutations, exact_test

1038

1039

1040def _calculate_null_samples(data, statistic, n_permutations, batch,

1041 random_state=None):

1042 """

1043 Calculate null distribution for paired-sample tests.

1044 """

1045 n_samples = len(data)

1046

1047 # By convention, the meaning of the "samples" permutations type for

1048 # data with only one sample is to flip the sign of the observations.

1049 # Achieve this by adding a second sample - the negative of the original.

1050 if n_samples == 1:

1051 data = [data[0], -data[0]]

1052

1053 # The "samples" permutation strategy is the same as the "pairings"

1054 # strategy except the roles of samples and observations are flipped.

1055 # So swap these axes, then we'll use the function for the "pairings"

1056 # strategy to do all the work!

1057 data = np.swapaxes(data, 0, -1)

1058

1059 # (Of course, the user's statistic doesn't know what we've done here,

1060 # so we need to pass it what it's expecting.)

1061 def statistic_wrapped(*data, axis):

1062 data = np.swapaxes(data, 0, -1)

1063 if n_samples == 1:

1064 data = data[0:1]

1065 return statistic(*data, axis=axis)

1066

1067 return _calculate_null_pairings(data, statistic_wrapped, n_permutations,

1068 batch, random_state)

1069

1070

1071def _permutation_test_iv(data, statistic, permutation_type, vectorized,

1072 n_resamples, batch, alternative, axis, random_state):

1073 """Input validation for `permutation_test`."""

1074

1075 axis_int = int(axis)

1076 if axis != axis_int:

1077 raise ValueError("`axis` must be an integer.")

1078

1079 permutation_types = {'samples', 'pairings', 'independent'}

1080 permutation_type = permutation_type.lower()

1081 if permutation_type not in permutation_types:

1082 raise ValueError(f"`permutation_type` must be in {permutation_types}.")

1083

1084 if vectorized not in {True, False, None}:

1085 raise ValueError("`vectorized` must be `True`, `False`, or `None`.")

1086

1087 if vectorized is None:

1088 vectorized = 'axis' in inspect.signature(statistic).parameters

1089

1090 if not vectorized:

1091 statistic = _vectorize_statistic(statistic)

1092

1093 message = "`data` must be a tuple containing at least two samples"

1094 try:

1095 if len(data) < 2 and permutation_type == 'independent':

1096 raise ValueError(message)

1097 except TypeError:

1098 raise TypeError(message)

1099

1100 data = _broadcast_arrays(data, axis)

1101 data_iv = []

1102 for sample in data:

1103 sample = np.atleast_1d(sample)

1104 if sample.shape[axis] <= 1:

1105 raise ValueError("each sample in `data` must contain two or more "

1106 "observations along `axis`.")

1107 sample = np.moveaxis(sample, axis_int, -1)

1108 data_iv.append(sample)

1109

1110 n_resamples_int = (int(n_resamples) if not np.isinf(n_resamples)

1111 else np.inf)

1112 if n_resamples != n_resamples_int or n_resamples_int <= 0:

1113 raise ValueError("`n_resamples` must be a positive integer.")

1114

1115 if batch is None:

1116 batch_iv = batch

1117 else:

1118 batch_iv = int(batch)

1119 if batch != batch_iv or batch_iv <= 0:

1120 raise ValueError("`batch` must be a positive integer or None.")

1121

1122 alternatives = {'two-sided', 'greater', 'less'}

1123 alternative = alternative.lower()

1124 if alternative not in alternatives:

1125 raise ValueError(f"`alternative` must be in {alternatives}")

1126

1127 random_state = check_random_state(random_state)

1128

1129 return (data_iv, statistic, permutation_type, vectorized, n_resamples_int,

1130 batch_iv, alternative, axis_int, random_state)

1131

1132

1133def permutation_test(data, statistic, *, permutation_type='independent',

1134 vectorized=None, n_resamples=9999, batch=None,

1135 alternative="two-sided", axis=0, random_state=None):

1136 r"""

1137 Performs a permutation test of a given statistic on provided data.

1138

1139 For independent sample statistics, the null hypothesis is that the data are

1140 randomly sampled from the same distribution.

1141 For paired sample statistics, two null hypothesis can be tested:

1142 that the data are paired at random or that the data are assigned to samples

1143 at random.

1144

1145 Parameters

1146 ----------

1147 data : iterable of array-like

1148 Contains the samples, each of which is an array of observations.

1149 Dimensions of sample arrays must be compatible for broadcasting except

1150 along `axis`.

1151 statistic : callable

1152 Statistic for which the p-value of the hypothesis test is to be

1153 calculated. `statistic` must be a callable that accepts samples

1154 as separate arguments (e.g. ``statistic(*data)``) and returns the

1155 resulting statistic.

1156 If `vectorized` is set ``True``, `statistic` must also accept a keyword

1157 argument `axis` and be vectorized to compute the statistic along the

1158 provided `axis` of the sample arrays.

1159 permutation_type : {'independent', 'samples', 'pairings'}, optional

1160 The type of permutations to be performed, in accordance with the

1161 null hypothesis. The first two permutation types are for paired sample

1162 statistics, in which all samples contain the same number of

1163 observations and observations with corresponding indices along `axis`

1164 are considered to be paired; the third is for independent sample

1165 statistics.

1166

1167 - ``'samples'`` : observations are assigned to different samples

1168 but remain paired with the same observations from other samples.

1169 This permutation type is appropriate for paired sample hypothesis

1170 tests such as the Wilcoxon signed-rank test and the paired t-test.

1171 - ``'pairings'`` : observations are paired with different observations,

1172 but they remain within the same sample. This permutation type is

1173 appropriate for association/correlation tests with statistics such

1174 as Spearman's :math:`\rho`, Kendall's :math:`\tau`, and Pearson's

1175 :math:`r`.

1176 - ``'independent'`` (default) : observations are assigned to different

1177 samples. Samples may contain different numbers of observations. This

1178 permutation type is appropriate for independent sample hypothesis

1179 tests such as the Mann-Whitney :math:`U` test and the independent

1180 sample t-test.

1181

1182 Please see the Notes section below for more detailed descriptions

1183 of the permutation types.

1184

1185 vectorized : bool, optional

1186 If `vectorized` is set ``False``, `statistic` will not be passed

1187 keyword argument `axis` and is expected to calculate the statistic

1188 only for 1D samples. If ``True``, `statistic` will be passed keyword

1189 argument `axis` and is expected to calculate the statistic along `axis`

1190 when passed an ND sample array. If ``None`` (default), `vectorized`

1191 will be set ``True`` if ``axis`` is a parameter of `statistic`. Use

1192 of a vectorized statistic typically reduces computation time.

1193 n_resamples : int or np.inf, default: 9999

1194 Number of random permutations (resamples) used to approximate the null

1195 distribution. If greater than or equal to the number of distinct

1196 permutations, the exact null distribution will be computed.

1197 Note that the number of distinct permutations grows very rapidly with

1198 the sizes of samples, so exact tests are feasible only for very small

1199 data sets.

1200 batch : int, optional

1201 The number of permutations to process in each call to `statistic`.

1202 Memory usage is O(`batch`*``n``), where ``n`` is the total size

1203 of all samples, regardless of the value of `vectorized`. Default is

1204 ``None``, in which case ``batch`` is the number of permutations.

1205 alternative : {'two-sided', 'less', 'greater'}, optional

1206 The alternative hypothesis for which the p-value is calculated.

1207 For each alternative, the p-value is defined for exact tests as

1208 follows.

1209

1210 - ``'greater'`` : the percentage of the null distribution that is

1211 greater than or equal to the observed value of the test statistic.

1212 - ``'less'`` : the percentage of the null distribution that is

1213 less than or equal to the observed value of the test statistic.

1214 - ``'two-sided'`` (default) : twice the smaller of the p-values above.

1215

1216 Note that p-values for randomized tests are calculated according to the

1217 conservative (over-estimated) approximation suggested in [2]_ and [3]_

1218 rather than the unbiased estimator suggested in [4]_. That is, when

1219 calculating the proportion of the randomized null distribution that is

1220 as extreme as the observed value of the test statistic, the values in

1221 the numerator and denominator are both increased by one. An

1222 interpretation of this adjustment is that the observed value of the

1223 test statistic is always included as an element of the randomized

1224 null distribution.

1225 The convention used for two-sided p-values is not universal;

1226 the observed test statistic and null distribution are returned in

1227 case a different definition is preferred.

1228

1229 axis : int, default: 0

1230 The axis of the (broadcasted) samples over which to calculate the

1231 statistic. If samples have a different number of dimensions,

1232 singleton dimensions are prepended to samples with fewer dimensions

1233 before `axis` is considered.

1234 random_state : {None, int, `numpy.random.Generator`,

1235 `numpy.random.RandomState`}, optional

1236

1237 Pseudorandom number generator state used to generate permutations.

1238

1239 If `random_state` is ``None`` (default), the

1240 `numpy.random.RandomState` singleton is used.

1241 If `random_state` is an int, a new ``RandomState`` instance is used,

1242 seeded with `random_state`.

1243 If `random_state` is already a ``Generator`` or ``RandomState``

1244 instance then that instance is used.

1245

1246 Returns

1247 -------

1248 statistic : float or ndarray

1249 The observed test statistic of the data.

1250 pvalue : float or ndarray

1251 The p-value for the given alternative.

1252 null_distribution : ndarray

1253 The values of the test statistic generated under the null hypothesis.

1254

1255 Notes

1256 -----

1257

1258 The three types of permutation tests supported by this function are

1259 described below.

1260

1261 **Unpaired statistics** (``permutation_type='independent'``):

1262

1263 The null hypothesis associated with this permutation type is that all

1264 observations are sampled from the same underlying distribution and that

1265 they have been assigned to one of the samples at random.

1266

1267 Suppose ``data`` contains two samples; e.g. ``a, b = data``.

1268 When ``1 < n_resamples < binom(n, k)``, where

1269

1270 * ``k`` is the number of observations in ``a``,

1271 * ``n`` is the total number of observations in ``a`` and ``b``, and

1272 * ``binom(n, k)`` is the binomial coefficient (``n`` choose ``k``),

1273

1274 the data are pooled (concatenated), randomly assigned to either the first

1275 or second sample, and the statistic is calculated. This process is

1276 performed repeatedly, `permutation` times, generating a distribution of the

1277 statistic under the null hypothesis. The statistic of the original

1278 data is compared to this distribution to determine the p-value.

1279

1280 When ``n_resamples >= binom(n, k)``, an exact test is performed: the data

1281 are *partitioned* between the samples in each distinct way exactly once,

1282 and the exact null distribution is formed.

1283 Note that for a given partitioning of the data between the samples,

1284 only one ordering/permutation of the data *within* each sample is

1285 considered. For statistics that do not depend on the order of the data

1286 within samples, this dramatically reduces computational cost without

1287 affecting the shape of the null distribution (because the frequency/count

1288 of each value is affected by the same factor).

1289

1290 For ``a = [a1, a2, a3, a4]`` and ``b = [b1, b2, b3]``, an example of this

1291 permutation type is ``x = [b3, a1, a2, b2]`` and ``y = [a4, b1, a3]``.

1292 Because only one ordering/permutation of the data *within* each sample

1293 is considered in an exact test, a resampling like ``x = [b3, a1, b2, a2]``

1294 and ``y = [a4, a3, b1]`` would *not* be considered distinct from the

1295 example above.

1296

1297 ``permutation_type='independent'`` does not support one-sample statistics,

1298 but it can be applied to statistics with more than two samples. In this

1299 case, if ``n`` is an array of the number of observations within each

1300 sample, the number of distinct partitions is::

1301

1302 np.product([binom(sum(n[i:]), sum(n[i+1:])) for i in range(len(n)-1)])

1303

1304 **Paired statistics, permute pairings** (``permutation_type='pairings'``):

1305

1306 The null hypothesis associated with this permutation type is that

1307 observations within each sample are drawn from the same underlying

1308 distribution and that pairings with elements of other samples are

1309 assigned at random.

1310

1311 Suppose ``data`` contains only one sample; e.g. ``a, = data``, and we

1312 wish to consider all possible pairings of elements of ``a`` with elements

1313 of a second sample, ``b``. Let ``n`` be the number of observations in

1314 ``a``, which must also equal the number of observations in ``b``.

1315

1316 When ``1 < n_resamples < factorial(n)``, the elements of ``a`` are

1317 randomly permuted. The user-supplied statistic accepts one data argument,

1318 say ``a_perm``, and calculates the statistic considering ``a_perm`` and

1319 ``b``. This process is performed repeatedly, `permutation` times,

1320 generating a distribution of the statistic under the null hypothesis.

1321 The statistic of the original data is compared to this distribution to

1322 determine the p-value.

1323

1324 When ``n_resamples >= factorial(n)``, an exact test is performed:

1325 ``a`` is permuted in each distinct way exactly once. Therefore, the

1326 `statistic` is computed for each unique pairing of samples between ``a``

1327 and ``b`` exactly once.

1328

1329 For ``a = [a1, a2, a3]`` and ``b = [b1, b2, b3]``, an example of this

1330 permutation type is ``a_perm = [a3, a1, a2]`` while ``b`` is left

1331 in its original order.

1332

1333 ``permutation_type='pairings'`` supports ``data`` containing any number

1334 of samples, each of which must contain the same number of observations.

1335 All samples provided in ``data`` are permuted *independently*. Therefore,

1336 if ``m`` is the number of samples and ``n`` is the number of observations

1337 within each sample, then the number of permutations in an exact test is::

1338

1339 factorial(n)**m

1340

1341 Note that if a two-sample statistic, for example, does not inherently

1342 depend on the order in which observations are provided - only on the

1343 *pairings* of observations - then only one of the two samples should be

1344 provided in ``data``. This dramatically reduces computational cost without

1345 affecting the shape of the null distribution (because the frequency/count

1346 of each value is affected by the same factor).

1347

1348 **Paired statistics, permute samples** (``permutation_type='samples'``):

1349

1350 The null hypothesis associated with this permutation type is that

1351 observations within each pair are drawn from the same underlying

1352 distribution and that the sample to which they are assigned is random.

1353

1354 Suppose ``data`` contains two samples; e.g. ``a, b = data``.

1355 Let ``n`` be the number of observations in ``a``, which must also equal

1356 the number of observations in ``b``.

1357

1358 When ``1 < n_resamples < 2**n``, the elements of ``a`` are ``b`` are

1359 randomly swapped between samples (maintaining their pairings) and the

1360 statistic is calculated. This process is performed repeatedly,

1361 `permutation` times, generating a distribution of the statistic under the

1362 null hypothesis. The statistic of the original data is compared to this

1363 distribution to determine the p-value.

1364

1365 When ``n_resamples >= 2**n``, an exact test is performed: the observations

1366 are assigned to the two samples in each distinct way (while maintaining

1367 pairings) exactly once.

1368

1369 For ``a = [a1, a2, a3]`` and ``b = [b1, b2, b3]``, an example of this

1370 permutation type is ``x = [b1, a2, b3]`` and ``y = [a1, b2, a3]``.

1371

1372 ``permutation_type='samples'`` supports ``data`` containing any number

1373 of samples, each of which must contain the same number of observations.

1374 If ``data`` contains more than one sample, paired observations within

1375 ``data`` are exchanged between samples *independently*. Therefore, if ``m``

1376 is the number of samples and ``n`` is the number of observations within

1377 each sample, then the number of permutations in an exact test is::

1378

1379 factorial(m)**n

1380

1381 Several paired-sample statistical tests, such as the Wilcoxon signed rank

1382 test and paired-sample t-test, can be performed considering only the

1383 *difference* between two paired elements. Accordingly, if ``data`` contains

1384 only one sample, then the null distribution is formed by independently

1385 changing the *sign* of each observation.

1386

1387 .. warning::

1388 The p-value is calculated by counting the elements of the null

1389 distribution that are as extreme or more extreme than the observed

1390 value of the statistic. Due to the use of finite precision arithmetic,

1391 some statistic functions return numerically distinct values when the

1392 theoretical values would be exactly equal. In some cases, this could

1393 lead to a large error in the calculated p-value. `permutation_test`

1394 guards against this by considering elements in the null distribution

1395 that are "close" (within a factor of ``1+1e-14``) to the observed

1396 value of the test statistic as equal to the observed value of the

1397 test statistic. However, the user is advised to inspect the null

1398 distribution to assess whether this method of comparison is

1399 appropriate, and if not, calculate the p-value manually. See example

1400 below.

1401

1402 References

1403 ----------

1404

1405 .. [1] R. A. Fisher. The Design of Experiments, 6th Ed (1951).

1406 .. [2] B. Phipson and G. K. Smyth. "Permutation P-values Should Never Be

1407 Zero: Calculating Exact P-values When Permutations Are Randomly Drawn."

1408 Statistical Applications in Genetics and Molecular Biology 9.1 (2010).

1409 .. [3] M. D. Ernst. "Permutation Methods: A Basis for Exact Inference".

1410 Statistical Science (2004).

1411 .. [4] B. Efron and R. J. Tibshirani. An Introduction to the Bootstrap

1412 (1993).

1413

1414 Examples

1415 --------

1416

1417 Suppose we wish to test whether two samples are drawn from the same

1418 distribution. Assume that the underlying distributions are unknown to us,

1419 and that before observing the data, we hypothesized that the mean of the

1420 first sample would be less than that of the second sample. We decide that

1421 we will use the difference between the sample means as a test statistic,

1422 and we will consider a p-value of 0.05 to be statistically significant.

1423

1424 For efficiency, we write the function defining the test statistic in a

1425 vectorized fashion: the samples ``x`` and ``y`` can be ND arrays, and the

1426 statistic will be calculated for each axis-slice along `axis`.

1427

1428 >>> import numpy as np

1429 >>> def statistic(x, y, axis):

1430 ... return np.mean(x, axis=axis) - np.mean(y, axis=axis)

1431

1432 After collecting our data, we calculate the observed value of the test

1433 statistic.

1434

1435 >>> from scipy.stats import norm

1436 >>> rng = np.random.default_rng()

1437 >>> x = norm.rvs(size=5, random_state=rng)

1438 >>> y = norm.rvs(size=6, loc = 3, random_state=rng)

1439 >>> statistic(x, y, 0)

1440 -3.5411688580987266

1441

1442 Indeed, the test statistic is negative, suggesting that the true mean of

1443 the distribution underlying ``x`` is less than that of the distribution

1444 underlying ``y``. To determine the probability of this occuring by chance

1445 if the two samples were drawn from the same distribution, we perform

1446 a permutation test.

1447

1448 >>> from scipy.stats import permutation_test

1449 >>> # because our statistic is vectorized, we pass `vectorized=True`

1450 >>> # `n_resamples=np.inf` indicates that an exact test is to be performed

1451 >>> res = permutation_test((x, y), statistic, vectorized=True,

1452 ... n_resamples=np.inf, alternative='less')

1453 >>> print(res.statistic)

1454 -3.5411688580987266

1455 >>> print(res.pvalue)

1456 0.004329004329004329

1457

1458 The probability of obtaining a test statistic less than or equal to the

1459 observed value under the null hypothesis is 0.4329%. This is less than our

1460 chosen threshold of 5%, so we consider this to be significant evidence

1461 against the null hypothesis in favor of the alternative.

1462

1463 Because the size of the samples above was small, `permutation_test` could

1464 perform an exact test. For larger samples, we resort to a randomized

1465 permutation test.

1466

1467 >>> x = norm.rvs(size=100, random_state=rng)

1468 >>> y = norm.rvs(size=120, loc=0.3, random_state=rng)

1469 >>> res = permutation_test((x, y), statistic, n_resamples=100000,

1470 ... vectorized=True, alternative='less',

1471 ... random_state=rng)

1472 >>> print(res.statistic)

1473 -0.5230459671240913

1474 >>> print(res.pvalue)

1475 0.00016999830001699983

1476

1477 The approximate probability of obtaining a test statistic less than or

1478 equal to the observed value under the null hypothesis is 0.0225%. This is

1479 again less than our chosen threshold of 5%, so again we have significant

1480 evidence to reject the null hypothesis in favor of the alternative.

1481

1482 For large samples and number of permutations, the result is comparable to

1483 that of the corresponding asymptotic test, the independent sample t-test.

1484

1485 >>> from scipy.stats import ttest_ind

1486 >>> res_asymptotic = ttest_ind(x, y, alternative='less')

1487 >>> print(res_asymptotic.pvalue)

1488 0.00012688101537979522

1489

1490 The permutation distribution of the test statistic is provided for

1491 further investigation.

1492

1493 >>> import matplotlib.pyplot as plt

1494 >>> plt.hist(res.null_distribution, bins=50)

1495 >>> plt.title("Permutation distribution of test statistic")

1496 >>> plt.xlabel("Value of Statistic")

1497 >>> plt.ylabel("Frequency")

1498 >>> plt.show()

1499

1500 Inspection of the null distribution is essential if the statistic suffers

1501 from inaccuracy due to limited machine precision. Consider the following

1502 case:

1503

1504 >>> from scipy.stats import pearsonr

1505 >>> x = [1, 2, 4, 3]

1506 >>> y = [2, 4, 6, 8]

1507 >>> def statistic(x, y):

1508 ... return pearsonr(x, y).statistic

1509 >>> res = permutation_test((x, y), statistic, vectorized=False,

1510 ... permutation_type='pairings',

1511 ... alternative='greater')

1512 >>> r, pvalue, null = res.statistic, res.pvalue, res.null_distribution

1513

1514 In this case, some elements of the null distribution differ from the

1515 observed value of the correlation coefficient ``r`` due to numerical noise.

1516 We manually inspect the elements of the null distribution that are nearly

1517 the same as the observed value of the test statistic.

1518

1519 >>> r

1520 0.8

1521 >>> unique = np.unique(null)

1522 >>> unique

1523 array([-1. , -0.8, -0.8, -0.6, -0.4, -0.2, -0.2, 0. , 0.2, 0.2, 0.4,

1524 0.6, 0.8, 0.8, 1. ]) # may vary

1525 >>> unique[np.isclose(r, unique)].tolist()

1526 [0.7999999999999999, 0.8]

1527

1528 If `permutation_test` were to perform the comparison naively, the

1529 elements of the null distribution with value ``0.7999999999999999`` would

1530 not be considered as extreme or more extreme as the observed value of the

1531 statistic, so the calculated p-value would be too small.

1532

1533 >>> incorrect_pvalue = np.count_nonzero(null >= r) / len(null)

1534 >>> incorrect_pvalue

1535 0.1111111111111111 # may vary

1536

1537 Instead, `permutation_test` treats elements of the null distribution that

1538 are within ``max(1e-14, abs(r)*1e-14)`` of the observed value of the

1539 statistic ``r`` to be equal to ``r``.

1540

1541 >>> correct_pvalue = np.count_nonzero(null >= r - 1e-14) / len(null)

1542 >>> correct_pvalue

1543 0.16666666666666666

1544 >>> res.pvalue == correct_pvalue

1545 True

1546

1547 This method of comparison is expected to be accurate in most practical

1548 situations, but the user is advised to assess this by inspecting the

1549 elements of the null distribution that are close to the observed value

1550 of the statistic. Also, consider the use of statistics that can be

1551 calculated using exact arithmetic (e.g. integer statistics).

1552

1553 """

1554 args = _permutation_test_iv(data, statistic, permutation_type, vectorized,

1555 n_resamples, batch, alternative, axis,

1556 random_state)

1557 (data, statistic, permutation_type, vectorized, n_resamples, batch,

1558 alternative, axis, random_state) = args

1559

1560 observed = statistic(*data, axis=-1)

1561

1562 null_calculators = {"pairings": _calculate_null_pairings,

1563 "samples": _calculate_null_samples,

1564 "independent": _calculate_null_both}

1565 null_calculator_args = (data, statistic, n_resamples,

1566 batch, random_state)

1567 calculate_null = null_calculators[permutation_type]

1568 null_distribution, n_resamples, exact_test = (

1569 calculate_null(*null_calculator_args))

1570

1571 # See References [2] and [3]

1572 adjustment = 0 if exact_test else 1

1573

1574 # relative tolerance for detecting numerically distinct but

1575 # theoretically equal values in the null distribution

1576 eps = 1e-14

1577 gamma = np.maximum(eps, np.abs(eps * observed))

1578

1579 def less(null_distribution, observed):

1580 cmps = null_distribution <= observed + gamma

1581 pvalues = (cmps.sum(axis=0) + adjustment) / (n_resamples + adjustment)

1582 return pvalues

1583

1584 def greater(null_distribution, observed):

1585 cmps = null_distribution >= observed - gamma

1586 pvalues = (cmps.sum(axis=0) + adjustment) / (n_resamples + adjustment)

1587 return pvalues

1588

1589 def two_sided(null_distribution, observed):

1590 pvalues_less = less(null_distribution, observed)

1591 pvalues_greater = greater(null_distribution, observed)

1592 pvalues = np.minimum(pvalues_less, pvalues_greater) * 2

1593 return pvalues

1594

1595 compare = {"less": less,

1596 "greater": greater,

1597 "two-sided": two_sided}

1598

1599 pvalues = compare[alternative](null_distribution, observed)

1600 pvalues = np.clip(pvalues, 0, 1)

1601

1602 return PermutationTestResult(observed, pvalues, null_distribution)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_resampling.py: 9%

403 statements