Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_resampling.py: 9%

403 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1import warnings 

2import numpy as np 

3from itertools import combinations, permutations, product 

4import inspect 

5 

6from scipy._lib._util import check_random_state 

7from scipy.special import ndtr, ndtri, comb, factorial 

8from scipy._lib._util import rng_integers 

9from dataclasses import make_dataclass 

10from ._common import ConfidenceInterval 

11from ._axis_nan_policy import _broadcast_concatenate, _broadcast_arrays 

12from ._warnings_errors import DegenerateDataWarning 

13 

14__all__ = ['bootstrap', 'monte_carlo_test', 'permutation_test'] 

15 

16 

17def _vectorize_statistic(statistic): 

18 """Vectorize an n-sample statistic""" 

19 # This is a little cleaner than np.nditer at the expense of some data 

20 # copying: concatenate samples together, then use np.apply_along_axis 

21 def stat_nd(*data, axis=0): 

22 lengths = [sample.shape[axis] for sample in data] 

23 split_indices = np.cumsum(lengths)[:-1] 

24 z = _broadcast_concatenate(data, axis) 

25 

26 # move working axis to position 0 so that new dimensions in the output 

27 # of `statistic` are _prepended_. ("This axis is removed, and replaced 

28 # with new dimensions...") 

29 z = np.moveaxis(z, axis, 0) 

30 

31 def stat_1d(z): 

32 data = np.split(z, split_indices) 

33 return statistic(*data) 

34 

35 return np.apply_along_axis(stat_1d, 0, z)[()] 

36 return stat_nd 

37 

38 

39def _jackknife_resample(sample, batch=None): 

40 """Jackknife resample the sample. Only one-sample stats for now.""" 

41 n = sample.shape[-1] 

42 batch_nominal = batch or n 

43 

44 for k in range(0, n, batch_nominal): 

45 # col_start:col_end are the observations to remove 

46 batch_actual = min(batch_nominal, n-k) 

47 

48 # jackknife - each row leaves out one observation 

49 j = np.ones((batch_actual, n), dtype=bool) 

50 np.fill_diagonal(j[:, k:k+batch_actual], False) 

51 i = np.arange(n) 

52 i = np.broadcast_to(i, (batch_actual, n)) 

53 i = i[j].reshape((batch_actual, n-1)) 

54 

55 resamples = sample[..., i] 

56 yield resamples 

57 

58 

59def _bootstrap_resample(sample, n_resamples=None, random_state=None): 

60 """Bootstrap resample the sample.""" 

61 n = sample.shape[-1] 

62 

63 # bootstrap - each row is a random resample of original observations 

64 i = rng_integers(random_state, 0, n, (n_resamples, n)) 

65 

66 resamples = sample[..., i] 

67 return resamples 

68 

69 

70def _percentile_of_score(a, score, axis): 

71 """Vectorized, simplified `scipy.stats.percentileofscore`. 

72 Uses logic of the 'mean' value of percentileofscore's kind parameter. 

73 

74 Unlike `stats.percentileofscore`, the percentile returned is a fraction 

75 in [0, 1]. 

76 """ 

77 B = a.shape[axis] 

78 return ((a < score).sum(axis=axis) + (a <= score).sum(axis=axis)) / (2 * B) 

79 

80 

81def _percentile_along_axis(theta_hat_b, alpha): 

82 """`np.percentile` with different percentile for each slice.""" 

83 # the difference between _percentile_along_axis and np.percentile is that 

84 # np.percentile gets _all_ the qs for each axis slice, whereas 

85 # _percentile_along_axis gets the q corresponding with each axis slice 

86 shape = theta_hat_b.shape[:-1] 

87 alpha = np.broadcast_to(alpha, shape) 

88 percentiles = np.zeros_like(alpha, dtype=np.float64) 

89 for indices, alpha_i in np.ndenumerate(alpha): 

90 if np.isnan(alpha_i): 

91 # e.g. when bootstrap distribution has only one unique element 

92 msg = ( 

93 "The BCa confidence interval cannot be calculated." 

94 " This problem is known to occur when the distribution" 

95 " is degenerate or the statistic is np.min." 

96 ) 

97 warnings.warn(DegenerateDataWarning(msg)) 

98 percentiles[indices] = np.nan 

99 else: 

100 theta_hat_b_i = theta_hat_b[indices] 

101 percentiles[indices] = np.percentile(theta_hat_b_i, alpha_i) 

102 return percentiles[()] # return scalar instead of 0d array 

103 

104 

105def _bca_interval(data, statistic, axis, alpha, theta_hat_b, batch): 

106 """Bias-corrected and accelerated interval.""" 

107 # closely follows [1] 14.3 and 15.4 (Eq. 15.36) 

108 

109 # calculate z0_hat 

110 theta_hat = np.asarray(statistic(*data, axis=axis))[..., None] 

111 percentile = _percentile_of_score(theta_hat_b, theta_hat, axis=-1) 

112 z0_hat = ndtri(percentile) 

113 

114 # calculate a_hat 

115 theta_hat_ji = [] # j is for sample of data, i is for jackknife resample 

116 for j, sample in enumerate(data): 

117 # _jackknife_resample will add an axis prior to the last axis that 

118 # corresponds with the different jackknife resamples. Do the same for 

119 # each sample of the data to ensure broadcastability. We need to 

120 # create a copy of the list containing the samples anyway, so do this 

121 # in the loop to simplify the code. This is not the bottleneck... 

122 samples = [np.expand_dims(sample, -2) for sample in data] 

123 theta_hat_i = [] 

124 for jackknife_sample in _jackknife_resample(sample, batch): 

125 samples[j] = jackknife_sample 

126 broadcasted = _broadcast_arrays(samples, axis=-1) 

127 theta_hat_i.append(statistic(*broadcasted, axis=-1)) 

128 theta_hat_ji.append(theta_hat_i) 

129 

130 theta_hat_ji = [np.concatenate(theta_hat_i, axis=-1) 

131 for theta_hat_i in theta_hat_ji] 

132 

133 n_j = [theta_hat_i.shape[-1] for theta_hat_i in theta_hat_ji] 

134 

135 theta_hat_j_dot = [theta_hat_i.mean(axis=-1, keepdims=True) 

136 for theta_hat_i in theta_hat_ji] 

137 

138 U_ji = [(n - 1) * (theta_hat_dot - theta_hat_i) 

139 for theta_hat_dot, theta_hat_i, n 

140 in zip(theta_hat_j_dot, theta_hat_ji, n_j)] 

141 

142 nums = [(U_i**3).sum(axis=-1)/n**3 for U_i, n in zip(U_ji, n_j)] 

143 dens = [(U_i**2).sum(axis=-1)/n**2 for U_i, n in zip(U_ji, n_j)] 

144 a_hat = 1/6 * sum(nums) / sum(dens)**(3/2) 

145 

146 # calculate alpha_1, alpha_2 

147 z_alpha = ndtri(alpha) 

148 z_1alpha = -z_alpha 

149 num1 = z0_hat + z_alpha 

150 alpha_1 = ndtr(z0_hat + num1/(1 - a_hat*num1)) 

151 num2 = z0_hat + z_1alpha 

152 alpha_2 = ndtr(z0_hat + num2/(1 - a_hat*num2)) 

153 return alpha_1, alpha_2, a_hat # return a_hat for testing 

154 

155 

156def _bootstrap_iv(data, statistic, vectorized, paired, axis, confidence_level, 

157 n_resamples, batch, method, bootstrap_result, random_state): 

158 """Input validation and standardization for `bootstrap`.""" 

159 

160 if vectorized not in {True, False, None}: 

161 raise ValueError("`vectorized` must be `True`, `False`, or `None`.") 

162 

163 if vectorized is None: 

164 vectorized = 'axis' in inspect.signature(statistic).parameters 

165 

166 if not vectorized: 

167 statistic = _vectorize_statistic(statistic) 

168 

169 axis_int = int(axis) 

170 if axis != axis_int: 

171 raise ValueError("`axis` must be an integer.") 

172 

173 n_samples = 0 

174 try: 

175 n_samples = len(data) 

176 except TypeError: 

177 raise ValueError("`data` must be a sequence of samples.") 

178 

179 if n_samples == 0: 

180 raise ValueError("`data` must contain at least one sample.") 

181 

182 data_iv = [] 

183 for sample in data: 

184 sample = np.atleast_1d(sample) 

185 if sample.shape[axis_int] <= 1: 

186 raise ValueError("each sample in `data` must contain two or more " 

187 "observations along `axis`.") 

188 sample = np.moveaxis(sample, axis_int, -1) 

189 data_iv.append(sample) 

190 

191 if paired not in {True, False}: 

192 raise ValueError("`paired` must be `True` or `False`.") 

193 

194 if paired: 

195 n = data_iv[0].shape[-1] 

196 for sample in data_iv[1:]: 

197 if sample.shape[-1] != n: 

198 message = ("When `paired is True`, all samples must have the " 

199 "same length along `axis`") 

200 raise ValueError(message) 

201 

202 # to generate the bootstrap distribution for paired-sample statistics, 

203 # resample the indices of the observations 

204 def statistic(i, axis=-1, data=data_iv, unpaired_statistic=statistic): 

205 data = [sample[..., i] for sample in data] 

206 return unpaired_statistic(*data, axis=axis) 

207 

208 data_iv = [np.arange(n)] 

209 

210 confidence_level_float = float(confidence_level) 

211 

212 n_resamples_int = int(n_resamples) 

213 if n_resamples != n_resamples_int or n_resamples_int < 0: 

214 raise ValueError("`n_resamples` must be a non-negative integer.") 

215 

216 if batch is None: 

217 batch_iv = batch 

218 else: 

219 batch_iv = int(batch) 

220 if batch != batch_iv or batch_iv <= 0: 

221 raise ValueError("`batch` must be a positive integer or None.") 

222 

223 methods = {'percentile', 'basic', 'bca'} 

224 method = method.lower() 

225 if method not in methods: 

226 raise ValueError(f"`method` must be in {methods}") 

227 

228 message = "`bootstrap_result` must have attribute `bootstrap_distribution'" 

229 if (bootstrap_result is not None 

230 and not hasattr(bootstrap_result, "bootstrap_distribution")): 

231 raise ValueError(message) 

232 

233 message = ("Either `bootstrap_result.bootstrap_distribution.size` or " 

234 "`n_resamples` must be positive.") 

235 if ((not bootstrap_result or 

236 not bootstrap_result.bootstrap_distribution.size) 

237 and n_resamples_int == 0): 

238 raise ValueError(message) 

239 

240 random_state = check_random_state(random_state) 

241 

242 return (data_iv, statistic, vectorized, paired, axis_int, 

243 confidence_level_float, n_resamples_int, batch_iv, 

244 method, bootstrap_result, random_state) 

245 

246 

247fields = ['confidence_interval', 'bootstrap_distribution', 'standard_error'] 

248BootstrapResult = make_dataclass("BootstrapResult", fields) 

249 

250 

251def bootstrap(data, statistic, *, n_resamples=9999, batch=None, 

252 vectorized=None, paired=False, axis=0, confidence_level=0.95, 

253 method='BCa', bootstrap_result=None, random_state=None): 

254 r""" 

255 Compute a two-sided bootstrap confidence interval of a statistic. 

256 

257 When `method` is ``'percentile'``, a bootstrap confidence interval is 

258 computed according to the following procedure. 

259 

260 1. Resample the data: for each sample in `data` and for each of 

261 `n_resamples`, take a random sample of the original sample 

262 (with replacement) of the same size as the original sample. 

263 

264 2. Compute the bootstrap distribution of the statistic: for each set of 

265 resamples, compute the test statistic. 

266 

267 3. Determine the confidence interval: find the interval of the bootstrap 

268 distribution that is 

269 

270 - symmetric about the median and 

271 - contains `confidence_level` of the resampled statistic values. 

272 

273 While the ``'percentile'`` method is the most intuitive, it is rarely 

274 used in practice. Two more common methods are available, ``'basic'`` 

275 ('reverse percentile') and ``'BCa'`` ('bias-corrected and accelerated'); 

276 they differ in how step 3 is performed. 

277 

278 If the samples in `data` are taken at random from their respective 

279 distributions :math:`n` times, the confidence interval returned by 

280 `bootstrap` will contain the true value of the statistic for those 

281 distributions approximately `confidence_level`:math:`\, \times \, n` times. 

282 

283 Parameters 

284 ---------- 

285 data : sequence of array-like 

286 Each element of data is a sample from an underlying distribution. 

287 statistic : callable 

288 Statistic for which the confidence interval is to be calculated. 

289 `statistic` must be a callable that accepts ``len(data)`` samples 

290 as separate arguments and returns the resulting statistic. 

291 If `vectorized` is set ``True``, 

292 `statistic` must also accept a keyword argument `axis` and be 

293 vectorized to compute the statistic along the provided `axis`. 

294 n_resamples : int, default: ``9999`` 

295 The number of resamples performed to form the bootstrap distribution 

296 of the statistic. 

297 batch : int, optional 

298 The number of resamples to process in each vectorized call to 

299 `statistic`. Memory usage is O(`batch`*``n``), where ``n`` is the 

300 sample size. Default is ``None``, in which case ``batch = n_resamples`` 

301 (or ``batch = max(n_resamples, n)`` for ``method='BCa'``). 

302 vectorized : bool, optional 

303 If `vectorized` is set ``False``, `statistic` will not be passed 

304 keyword argument `axis` and is expected to calculate the statistic 

305 only for 1D samples. If ``True``, `statistic` will be passed keyword 

306 argument `axis` and is expected to calculate the statistic along `axis` 

307 when passed an ND sample array. If ``None`` (default), `vectorized` 

308 will be set ``True`` if ``axis`` is a parameter of `statistic`. Use of 

309 a vectorized statistic typically reduces computation time. 

310 paired : bool, default: ``False`` 

311 Whether the statistic treats corresponding elements of the samples 

312 in `data` as paired. 

313 axis : int, default: ``0`` 

314 The axis of the samples in `data` along which the `statistic` is 

315 calculated. 

316 confidence_level : float, default: ``0.95`` 

317 The confidence level of the confidence interval. 

318 method : {'percentile', 'basic', 'bca'}, default: ``'BCa'`` 

319 Whether to return the 'percentile' bootstrap confidence interval 

320 (``'percentile'``), the 'basic' (AKA 'reverse') bootstrap confidence 

321 interval (``'basic'``), or the bias-corrected and accelerated bootstrap 

322 confidence interval (``'BCa'``). 

323 bootstrap_result : BootstrapResult, optional 

324 Provide the result object returned by a previous call to `bootstrap` 

325 to include the previous bootstrap distribution in the new bootstrap 

326 distribution. This can be used, for example, to change 

327 `confidence_level`, change `method`, or see the effect of performing 

328 additional resampling without repeating computations. 

329 random_state : {None, int, `numpy.random.Generator`, 

330 `numpy.random.RandomState`}, optional 

331 

332 Pseudorandom number generator state used to generate resamples. 

333 

334 If `random_state` is ``None`` (or `np.random`), the 

335 `numpy.random.RandomState` singleton is used. 

336 If `random_state` is an int, a new ``RandomState`` instance is used, 

337 seeded with `random_state`. 

338 If `random_state` is already a ``Generator`` or ``RandomState`` 

339 instance then that instance is used. 

340 

341 Returns 

342 ------- 

343 res : BootstrapResult 

344 An object with attributes: 

345 

346 confidence_interval : ConfidenceInterval 

347 The bootstrap confidence interval as an instance of 

348 `collections.namedtuple` with attributes `low` and `high`. 

349 bootstrap_distribution : ndarray 

350 The bootstrap distribution, that is, the value of `statistic` for 

351 each resample. The last dimension corresponds with the resamples 

352 (e.g. ``res.bootstrap_distribution.shape[-1] == n_resamples``). 

353 standard_error : float or ndarray 

354 The bootstrap standard error, that is, the sample standard 

355 deviation of the bootstrap distribution. 

356 

357 Warns 

358 ----- 

359 `~scipy.stats.DegenerateDataWarning` 

360 Generated when ``method='BCa'`` and the bootstrap distribution is 

361 degenerate (e.g. all elements are identical). 

362 

363 Notes 

364 ----- 

365 Elements of the confidence interval may be NaN for ``method='BCa'`` if 

366 the bootstrap distribution is degenerate (e.g. all elements are identical). 

367 In this case, consider using another `method` or inspecting `data` for 

368 indications that other analysis may be more appropriate (e.g. all 

369 observations are identical). 

370 

371 References 

372 ---------- 

373 .. [1] B. Efron and R. J. Tibshirani, An Introduction to the Bootstrap, 

374 Chapman & Hall/CRC, Boca Raton, FL, USA (1993) 

375 .. [2] Nathaniel E. Helwig, "Bootstrap Confidence Intervals", 

376 http://users.stat.umn.edu/~helwig/notes/bootci-Notes.pdf 

377 .. [3] Bootstrapping (statistics), Wikipedia, 

378 https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29 

379 

380 Examples 

381 -------- 

382 Suppose we have sampled data from an unknown distribution. 

383 

384 >>> import numpy as np 

385 >>> rng = np.random.default_rng() 

386 >>> from scipy.stats import norm 

387 >>> dist = norm(loc=2, scale=4) # our "unknown" distribution 

388 >>> data = dist.rvs(size=100, random_state=rng) 

389 

390 We are interested in the standard deviation of the distribution. 

391 

392 >>> std_true = dist.std() # the true value of the statistic 

393 >>> print(std_true) 

394 4.0 

395 >>> std_sample = np.std(data) # the sample statistic 

396 >>> print(std_sample) 

397 3.9460644295563863 

398 

399 The bootstrap is used to approximate the variability we would expect if we 

400 were to repeatedly sample from the unknown distribution and calculate the 

401 statistic of the sample each time. It does this by repeatedly resampling 

402 values *from the original sample* with replacement and calculating the 

403 statistic of each resample. This results in a "bootstrap distribution" of 

404 the statistic. 

405 

406 >>> import matplotlib.pyplot as plt 

407 >>> from scipy.stats import bootstrap 

408 >>> data = (data,) # samples must be in a sequence 

409 >>> res = bootstrap(data, np.std, confidence_level=0.9, 

410 ... random_state=rng) 

411 >>> fig, ax = plt.subplots() 

412 >>> ax.hist(res.bootstrap_distribution, bins=25) 

413 >>> ax.set_title('Bootstrap Distribution') 

414 >>> ax.set_xlabel('statistic value') 

415 >>> ax.set_ylabel('frequency') 

416 >>> plt.show() 

417 

418 The standard error quantifies this variability. It is calculated as the 

419 standard deviation of the bootstrap distribution. 

420 

421 >>> res.standard_error 

422 0.24427002125829136 

423 >>> res.standard_error == np.std(res.bootstrap_distribution, ddof=1) 

424 True 

425 

426 The bootstrap distribution of the statistic is often approximately normal 

427 with scale equal to the standard error. 

428 

429 >>> x = np.linspace(3, 5) 

430 >>> pdf = norm.pdf(x, loc=std_sample, scale=res.standard_error) 

431 >>> fig, ax = plt.subplots() 

432 >>> ax.hist(res.bootstrap_distribution, bins=25, density=True) 

433 >>> ax.plot(x, pdf) 

434 >>> ax.set_title('Normal Approximation of the Bootstrap Distribution') 

435 >>> ax.set_xlabel('statistic value') 

436 >>> ax.set_ylabel('pdf') 

437 >>> plt.show() 

438 

439 This suggests that we could construct a 90% confidence interval on the 

440 statistic based on quantiles of this normal distribution. 

441 

442 >>> norm.interval(0.9, loc=std_sample, scale=res.standard_error) 

443 (3.5442759991341726, 4.3478528599786) 

444 

445 Due to central limit theorem, this normal approximation is accurate for a 

446 variety of statistics and distributions underlying the samples; however, 

447 the approximation is not reliable in all cases. Because `bootstrap` is 

448 designed to work with arbitrary underlying distributions and statistics, 

449 it uses more advanced techniques to generate an accurate confidence 

450 interval. 

451 

452 >>> print(res.confidence_interval) 

453 ConfidenceInterval(low=3.57655333533867, high=4.382043696342881) 

454 

455 If we sample from the original distribution 1000 times and form a bootstrap 

456 confidence interval for each sample, the confidence interval 

457 contains the true value of the statistic approximately 90% of the time. 

458 

459 >>> n_trials = 1000 

460 >>> ci_contains_true_std = 0 

461 >>> for i in range(n_trials): 

462 ... data = (dist.rvs(size=100, random_state=rng),) 

463 ... ci = bootstrap(data, np.std, confidence_level=0.9, n_resamples=1000, 

464 ... random_state=rng).confidence_interval 

465 ... if ci[0] < std_true < ci[1]: 

466 ... ci_contains_true_std += 1 

467 >>> print(ci_contains_true_std) 

468 875 

469 

470 Rather than writing a loop, we can also determine the confidence intervals 

471 for all 1000 samples at once. 

472 

473 >>> data = (dist.rvs(size=(n_trials, 100), random_state=rng),) 

474 >>> res = bootstrap(data, np.std, axis=-1, confidence_level=0.9, 

475 ... n_resamples=1000, random_state=rng) 

476 >>> ci_l, ci_u = res.confidence_interval 

477 

478 Here, `ci_l` and `ci_u` contain the confidence interval for each of the 

479 ``n_trials = 1000`` samples. 

480 

481 >>> print(ci_l[995:]) 

482 [3.77729695 3.75090233 3.45829131 3.34078217 3.48072829] 

483 >>> print(ci_u[995:]) 

484 [4.88316666 4.86924034 4.32032996 4.2822427 4.59360598] 

485 

486 And again, approximately 90% contain the true value, ``std_true = 4``. 

487 

488 >>> print(np.sum((ci_l < std_true) & (std_true < ci_u))) 

489 900 

490 

491 `bootstrap` can also be used to estimate confidence intervals of 

492 multi-sample statistics, including those calculated by hypothesis 

493 tests. `scipy.stats.mood` perform's Mood's test for equal scale parameters, 

494 and it returns two outputs: a statistic, and a p-value. To get a 

495 confidence interval for the test statistic, we first wrap 

496 `scipy.stats.mood` in a function that accepts two sample arguments, 

497 accepts an `axis` keyword argument, and returns only the statistic. 

498 

499 >>> from scipy.stats import mood 

500 >>> def my_statistic(sample1, sample2, axis): 

501 ... statistic, _ = mood(sample1, sample2, axis=-1) 

502 ... return statistic 

503 

504 Here, we use the 'percentile' method with the default 95% confidence level. 

505 

506 >>> sample1 = norm.rvs(scale=1, size=100, random_state=rng) 

507 >>> sample2 = norm.rvs(scale=2, size=100, random_state=rng) 

508 >>> data = (sample1, sample2) 

509 >>> res = bootstrap(data, my_statistic, method='basic', random_state=rng) 

510 >>> print(mood(sample1, sample2)[0]) # element 0 is the statistic 

511 -5.521109549096542 

512 >>> print(res.confidence_interval) 

513 ConfidenceInterval(low=-7.255994487314675, high=-4.016202624747605) 

514 

515 The bootstrap estimate of the standard error is also available. 

516 

517 >>> print(res.standard_error) 

518 0.8344963846318795 

519 

520 Paired-sample statistics work, too. For example, consider the Pearson 

521 correlation coefficient. 

522 

523 >>> from scipy.stats import pearsonr 

524 >>> n = 100 

525 >>> x = np.linspace(0, 10, n) 

526 >>> y = x + rng.uniform(size=n) 

527 >>> print(pearsonr(x, y)[0]) # element 0 is the statistic 

528 0.9962357936065914 

529 

530 We wrap `pearsonr` so that it returns only the statistic. 

531 

532 >>> def my_statistic(x, y): 

533 ... return pearsonr(x, y)[0] 

534 

535 We call `bootstrap` using ``paired=True``. 

536 Also, since ``my_statistic`` isn't vectorized to calculate the statistic 

537 along a given axis, we pass in ``vectorized=False``. 

538 

539 >>> res = bootstrap((x, y), my_statistic, vectorized=False, paired=True, 

540 ... random_state=rng) 

541 >>> print(res.confidence_interval) 

542 ConfidenceInterval(low=0.9950085825848624, high=0.9971212407917498) 

543 

544 The result object can be passed back into `bootstrap` to perform additional 

545 resampling: 

546 

547 >>> len(res.bootstrap_distribution) 

548 9999 

549 >>> res = bootstrap((x, y), my_statistic, vectorized=False, paired=True, 

550 ... n_resamples=1001, random_state=rng, 

551 ... bootstrap_result=res) 

552 >>> len(res.bootstrap_distribution) 

553 11000 

554 

555 or to change the confidence interval options: 

556 

557 >>> res2 = bootstrap((x, y), my_statistic, vectorized=False, paired=True, 

558 ... n_resamples=0, random_state=rng, bootstrap_result=res, 

559 ... method='percentile', confidence_level=0.9) 

560 >>> np.testing.assert_equal(res2.bootstrap_distribution, 

561 ... res.bootstrap_distribution) 

562 >>> res.confidence_interval 

563 ConfidenceInterval(low=0.9950035351407804, high=0.9971170323404578) 

564 

565 without repeating computation of the original bootstrap distribution. 

566 

567 """ 

568 # Input validation 

569 args = _bootstrap_iv(data, statistic, vectorized, paired, axis, 

570 confidence_level, n_resamples, batch, method, 

571 bootstrap_result, random_state) 

572 data, statistic, vectorized, paired, axis, confidence_level = args[:6] 

573 n_resamples, batch, method, bootstrap_result, random_state = args[6:] 

574 

575 theta_hat_b = ([] if bootstrap_result is None 

576 else [bootstrap_result.bootstrap_distribution]) 

577 

578 batch_nominal = batch or n_resamples or 1 

579 

580 for k in range(0, n_resamples, batch_nominal): 

581 batch_actual = min(batch_nominal, n_resamples-k) 

582 # Generate resamples 

583 resampled_data = [] 

584 for sample in data: 

585 resample = _bootstrap_resample(sample, n_resamples=batch_actual, 

586 random_state=random_state) 

587 resampled_data.append(resample) 

588 

589 # Compute bootstrap distribution of statistic 

590 theta_hat_b.append(statistic(*resampled_data, axis=-1)) 

591 theta_hat_b = np.concatenate(theta_hat_b, axis=-1) 

592 

593 # Calculate percentile interval 

594 alpha = (1 - confidence_level)/2 

595 if method == 'bca': 

596 interval = _bca_interval(data, statistic, axis=-1, alpha=alpha, 

597 theta_hat_b=theta_hat_b, batch=batch)[:2] 

598 percentile_fun = _percentile_along_axis 

599 else: 

600 interval = alpha, 1-alpha 

601 

602 def percentile_fun(a, q): 

603 return np.percentile(a=a, q=q, axis=-1) 

604 

605 # Calculate confidence interval of statistic 

606 ci_l = percentile_fun(theta_hat_b, interval[0]*100) 

607 ci_u = percentile_fun(theta_hat_b, interval[1]*100) 

608 if method == 'basic': # see [3] 

609 theta_hat = statistic(*data, axis=-1) 

610 ci_l, ci_u = 2*theta_hat - ci_u, 2*theta_hat - ci_l 

611 

612 return BootstrapResult(confidence_interval=ConfidenceInterval(ci_l, ci_u), 

613 bootstrap_distribution=theta_hat_b, 

614 standard_error=np.std(theta_hat_b, ddof=1, axis=-1)) 

615 

616 

617def _monte_carlo_test_iv(sample, rvs, statistic, vectorized, n_resamples, 

618 batch, alternative, axis): 

619 """Input validation for `monte_carlo_test`.""" 

620 

621 axis_int = int(axis) 

622 if axis != axis_int: 

623 raise ValueError("`axis` must be an integer.") 

624 

625 if vectorized not in {True, False, None}: 

626 raise ValueError("`vectorized` must be `True`, `False`, or `None`.") 

627 

628 if not callable(rvs): 

629 raise TypeError("`rvs` must be callable.") 

630 

631 if not callable(statistic): 

632 raise TypeError("`statistic` must be callable.") 

633 

634 if vectorized is None: 

635 vectorized = 'axis' in inspect.signature(statistic).parameters 

636 

637 if not vectorized: 

638 statistic_vectorized = _vectorize_statistic(statistic) 

639 else: 

640 statistic_vectorized = statistic 

641 

642 sample = np.atleast_1d(sample) 

643 sample = np.moveaxis(sample, axis, -1) 

644 

645 n_resamples_int = int(n_resamples) 

646 if n_resamples != n_resamples_int or n_resamples_int <= 0: 

647 raise ValueError("`n_resamples` must be a positive integer.") 

648 

649 if batch is None: 

650 batch_iv = batch 

651 else: 

652 batch_iv = int(batch) 

653 if batch != batch_iv or batch_iv <= 0: 

654 raise ValueError("`batch` must be a positive integer or None.") 

655 

656 alternatives = {'two-sided', 'greater', 'less'} 

657 alternative = alternative.lower() 

658 if alternative not in alternatives: 

659 raise ValueError(f"`alternative` must be in {alternatives}") 

660 

661 return (sample, rvs, statistic_vectorized, vectorized, n_resamples_int, 

662 batch_iv, alternative, axis_int) 

663 

664 

665fields = ['statistic', 'pvalue', 'null_distribution'] 

666MonteCarloTestResult = make_dataclass("MonteCarloTestResult", fields) 

667 

668 

669def monte_carlo_test(sample, rvs, statistic, *, vectorized=None, 

670 n_resamples=9999, batch=None, alternative="two-sided", 

671 axis=0): 

672 r""" 

673 Monte Carlo test that a sample is drawn from a given distribution. 

674 

675 The null hypothesis is that the provided `sample` was drawn at random from 

676 the distribution for which `rvs` generates random variates. The value of 

677 the `statistic` for the given sample is compared against a Monte Carlo null 

678 distribution: the value of the statistic for each of `n_resamples` 

679 samples generated by `rvs`. This gives the p-value, the probability of 

680 observing such an extreme value of the test statistic under the null 

681 hypothesis. 

682 

683 Parameters 

684 ---------- 

685 sample : array-like 

686 An array of observations. 

687 rvs : callable 

688 Generates random variates from the distribution against which `sample` 

689 will be tested. `rvs` must be a callable that accepts keyword argument 

690 ``size`` (e.g. ``rvs(size=(m, n))``) and returns an N-d array sample 

691 of that shape. 

692 statistic : callable 

693 Statistic for which the p-value of the hypothesis test is to be 

694 calculated. `statistic` must be a callable that accepts a sample 

695 (e.g. ``statistic(sample)``) and returns the resulting statistic. 

696 If `vectorized` is set ``True``, `statistic` must also accept a keyword 

697 argument `axis` and be vectorized to compute the statistic along the 

698 provided `axis` of the sample array. 

699 vectorized : bool, optional 

700 If `vectorized` is set ``False``, `statistic` will not be passed 

701 keyword argument `axis` and is expected to calculate the statistic 

702 only for 1D samples. If ``True``, `statistic` will be passed keyword 

703 argument `axis` and is expected to calculate the statistic along `axis` 

704 when passed an ND sample array. If ``None`` (default), `vectorized` 

705 will be set ``True`` if ``axis`` is a parameter of `statistic`. Use of 

706 a vectorized statistic typically reduces computation time. 

707 n_resamples : int, default: 9999 

708 Number of random permutations used to approximate the Monte Carlo null 

709 distribution. 

710 batch : int, optional 

711 The number of permutations to process in each call to `statistic`. 

712 Memory usage is O(`batch`*``sample.size[axis]``). Default is 

713 ``None``, in which case `batch` equals `n_resamples`. 

714 alternative : {'two-sided', 'less', 'greater'} 

715 The alternative hypothesis for which the p-value is calculated. 

716 For each alternative, the p-value is defined as follows. 

717 

718 - ``'greater'`` : the percentage of the null distribution that is 

719 greater than or equal to the observed value of the test statistic. 

720 - ``'less'`` : the percentage of the null distribution that is 

721 less than or equal to the observed value of the test statistic. 

722 - ``'two-sided'`` : twice the smaller of the p-values above. 

723 

724 axis : int, default: 0 

725 The axis of `sample` over which to calculate the statistic. 

726 

727 Returns 

728 ------- 

729 statistic : float or ndarray 

730 The observed test statistic of the sample. 

731 pvalue : float or ndarray 

732 The p-value for the given alternative. 

733 null_distribution : ndarray 

734 The values of the test statistic generated under the null hypothesis. 

735 

736 References 

737 ---------- 

738 

739 .. [1] B. Phipson and G. K. Smyth. "Permutation P-values Should Never Be 

740 Zero: Calculating Exact P-values When Permutations Are Randomly Drawn." 

741 Statistical Applications in Genetics and Molecular Biology 9.1 (2010). 

742 

743 Examples 

744 -------- 

745 

746 Suppose we wish to test whether a small sample has been drawn from a normal 

747 distribution. We decide that we will use the skew of the sample as a 

748 test statistic, and we will consider a p-value of 0.05 to be statistically 

749 significant. 

750 

751 >>> import numpy as np 

752 >>> from scipy import stats 

753 >>> def statistic(x, axis): 

754 ... return stats.skew(x, axis) 

755 

756 After collecting our data, we calculate the observed value of the test 

757 statistic. 

758 

759 >>> rng = np.random.default_rng() 

760 >>> x = stats.skewnorm.rvs(a=1, size=50, random_state=rng) 

761 >>> statistic(x, axis=0) 

762 0.12457412450240658 

763 

764 To determine the probability of observing such an extreme value of the 

765 skewness by chance if the sample were drawn from the normal distribution, 

766 we can perform a Monte Carlo hypothesis test. The test will draw many 

767 samples at random from their normal distribution, calculate the skewness 

768 of each sample, and compare our original skewness against this 

769 distribution to determine an approximate p-value. 

770 

771 >>> from scipy.stats import monte_carlo_test 

772 >>> # because our statistic is vectorized, we pass `vectorized=True` 

773 >>> rvs = lambda size: stats.norm.rvs(size=size, random_state=rng) 

774 >>> res = monte_carlo_test(x, rvs, statistic, vectorized=True) 

775 >>> print(res.statistic) 

776 0.12457412450240658 

777 >>> print(res.pvalue) 

778 0.7012 

779 

780 The probability of obtaining a test statistic less than or equal to the 

781 observed value under the null hypothesis is ~70%. This is greater than 

782 our chosen threshold of 5%, so we cannot consider this to be significant 

783 evidence against the null hypothesis. 

784 

785 Note that this p-value essentially matches that of 

786 `scipy.stats.skewtest`, which relies on an asymptotic distribution of a 

787 test statistic based on the sample skewness. 

788 

789 >>> stats.skewtest(x).pvalue 

790 0.6892046027110614 

791 

792 This asymptotic approximation is not valid for small sample sizes, but 

793 `monte_carlo_test` can be used with samples of any size. 

794 

795 >>> x = stats.skewnorm.rvs(a=1, size=7, random_state=rng) 

796 >>> # stats.skewtest(x) would produce an error due to small sample 

797 >>> res = monte_carlo_test(x, rvs, statistic, vectorized=True) 

798 

799 The Monte Carlo distribution of the test statistic is provided for 

800 further investigation. 

801 

802 >>> import matplotlib.pyplot as plt 

803 >>> fig, ax = plt.subplots() 

804 >>> ax.hist(res.null_distribution, bins=50) 

805 >>> ax.set_title("Monte Carlo distribution of test statistic") 

806 >>> ax.set_xlabel("Value of Statistic") 

807 >>> ax.set_ylabel("Frequency") 

808 >>> plt.show() 

809 

810 """ 

811 args = _monte_carlo_test_iv(sample, rvs, statistic, vectorized, 

812 n_resamples, batch, alternative, axis) 

813 (sample, rvs, statistic, vectorized, 

814 n_resamples, batch, alternative, axis) = args 

815 

816 # Some statistics return plain floats; ensure they're at least np.float64 

817 observed = np.asarray(statistic(sample, axis=-1))[()] 

818 

819 n_observations = sample.shape[-1] 

820 batch_nominal = batch or n_resamples 

821 null_distribution = [] 

822 for k in range(0, n_resamples, batch_nominal): 

823 batch_actual = min(batch_nominal, n_resamples-k) 

824 resamples = rvs(size=(batch_actual, n_observations)) 

825 null_distribution.append(statistic(resamples, axis=-1)) 

826 null_distribution = np.concatenate(null_distribution) 

827 null_distribution = null_distribution.reshape([-1] + [1]*observed.ndim) 

828 

829 def less(null_distribution, observed): 

830 cmps = null_distribution <= observed 

831 pvalues = (cmps.sum(axis=0) + 1) / (n_resamples + 1) # see [1] 

832 return pvalues 

833 

834 def greater(null_distribution, observed): 

835 cmps = null_distribution >= observed 

836 pvalues = (cmps.sum(axis=0) + 1) / (n_resamples + 1) # see [1] 

837 return pvalues 

838 

839 def two_sided(null_distribution, observed): 

840 pvalues_less = less(null_distribution, observed) 

841 pvalues_greater = greater(null_distribution, observed) 

842 pvalues = np.minimum(pvalues_less, pvalues_greater) * 2 

843 return pvalues 

844 

845 compare = {"less": less, 

846 "greater": greater, 

847 "two-sided": two_sided} 

848 

849 pvalues = compare[alternative](null_distribution, observed) 

850 pvalues = np.clip(pvalues, 0, 1) 

851 

852 return MonteCarloTestResult(observed, pvalues, null_distribution) 

853 

854 

855attributes = ('statistic', 'pvalue', 'null_distribution') 

856PermutationTestResult = make_dataclass('PermutationTestResult', attributes) 

857 

858 

859def _all_partitions_concatenated(ns): 

860 """ 

861 Generate all partitions of indices of groups of given sizes, concatenated 

862 

863 `ns` is an iterable of ints. 

864 """ 

865 def all_partitions(z, n): 

866 for c in combinations(z, n): 

867 x0 = set(c) 

868 x1 = z - x0 

869 yield [x0, x1] 

870 

871 def all_partitions_n(z, ns): 

872 if len(ns) == 0: 

873 yield [z] 

874 return 

875 for c in all_partitions(z, ns[0]): 

876 for d in all_partitions_n(c[1], ns[1:]): 

877 yield c[0:1] + d 

878 

879 z = set(range(np.sum(ns))) 

880 for partitioning in all_partitions_n(z, ns[:]): 

881 x = np.concatenate([list(partition) 

882 for partition in partitioning]).astype(int) 

883 yield x 

884 

885 

886def _batch_generator(iterable, batch): 

887 """A generator that yields batches of elements from an iterable""" 

888 iterator = iter(iterable) 

889 if batch <= 0: 

890 raise ValueError("`batch` must be positive.") 

891 z = [item for i, item in zip(range(batch), iterator)] 

892 while z: # we don't want StopIteration without yielding an empty list 

893 yield z 

894 z = [item for i, item in zip(range(batch), iterator)] 

895 

896 

897def _pairings_permutations_gen(n_permutations, n_samples, n_obs_sample, batch, 

898 random_state): 

899 # Returns a generator that yields arrays of size 

900 # `(batch, n_samples, n_obs_sample)`. 

901 # Each row is an independent permutation of indices 0 to `n_obs_sample`. 

902 batch = min(batch, n_permutations) 

903 

904 if hasattr(random_state, 'permuted'): 

905 def batched_perm_generator(): 

906 indices = np.arange(n_obs_sample) 

907 indices = np.tile(indices, (batch, n_samples, 1)) 

908 for k in range(0, n_permutations, batch): 

909 batch_actual = min(batch, n_permutations-k) 

910 # Don't permute in place, otherwise results depend on `batch` 

911 permuted_indices = random_state.permuted(indices, axis=-1) 

912 yield permuted_indices[:batch_actual] 

913 else: # RandomState and early Generators don't have `permuted` 

914 def batched_perm_generator(): 

915 for k in range(0, n_permutations, batch): 

916 batch_actual = min(batch, n_permutations-k) 

917 size = (batch_actual, n_samples, n_obs_sample) 

918 x = random_state.random(size=size) 

919 yield np.argsort(x, axis=-1)[:batch_actual] 

920 

921 return batched_perm_generator() 

922 

923def _calculate_null_both(data, statistic, n_permutations, batch, 

924 random_state=None): 

925 """ 

926 Calculate null distribution for independent sample tests. 

927 """ 

928 n_samples = len(data) 

929 

930 # compute number of permutations 

931 # (distinct partitions of data into samples of these sizes) 

932 n_obs_i = [sample.shape[-1] for sample in data] # observations per sample 

933 n_obs_ic = np.cumsum(n_obs_i) 

934 n_obs = n_obs_ic[-1] # total number of observations 

935 n_max = np.prod([comb(n_obs_ic[i], n_obs_ic[i-1]) 

936 for i in range(n_samples-1, 0, -1)]) 

937 

938 # perm_generator is an iterator that produces permutations of indices 

939 # from 0 to n_obs. We'll concatenate the samples, use these indices to 

940 # permute the data, then split the samples apart again. 

941 if n_permutations >= n_max: 

942 exact_test = True 

943 n_permutations = n_max 

944 perm_generator = _all_partitions_concatenated(n_obs_i) 

945 else: 

946 exact_test = False 

947 # Neither RandomState.permutation nor Generator.permutation 

948 # can permute axis-slices independently. If this feature is 

949 # added in the future, batches of the desired size should be 

950 # generated in a single call. 

951 perm_generator = (random_state.permutation(n_obs) 

952 for i in range(n_permutations)) 

953 

954 batch = batch or int(n_permutations) 

955 null_distribution = [] 

956 

957 # First, concatenate all the samples. In batches, permute samples with 

958 # indices produced by the `perm_generator`, split them into new samples of 

959 # the original sizes, compute the statistic for each batch, and add these 

960 # statistic values to the null distribution. 

961 data = np.concatenate(data, axis=-1) 

962 for indices in _batch_generator(perm_generator, batch=batch): 

963 indices = np.array(indices) 

964 

965 # `indices` is 2D: each row is a permutation of the indices. 

966 # We use it to index `data` along its last axis, which corresponds 

967 # with observations. 

968 # After indexing, the second to last axis of `data_batch` corresponds 

969 # with permutations, and the last axis corresponds with observations. 

970 data_batch = data[..., indices] 

971 

972 # Move the permutation axis to the front: we'll concatenate a list 

973 # of batched statistic values along this zeroth axis to form the 

974 # null distribution. 

975 data_batch = np.moveaxis(data_batch, -2, 0) 

976 data_batch = np.split(data_batch, n_obs_ic[:-1], axis=-1) 

977 null_distribution.append(statistic(*data_batch, axis=-1)) 

978 null_distribution = np.concatenate(null_distribution, axis=0) 

979 

980 return null_distribution, n_permutations, exact_test 

981 

982 

983def _calculate_null_pairings(data, statistic, n_permutations, batch, 

984 random_state=None): 

985 """ 

986 Calculate null distribution for association tests. 

987 """ 

988 n_samples = len(data) 

989 

990 # compute number of permutations (factorial(n) permutations of each sample) 

991 n_obs_sample = data[0].shape[-1] # observations per sample; same for each 

992 n_max = factorial(n_obs_sample)**n_samples 

993 

994 # `perm_generator` is an iterator that produces a list of permutations of 

995 # indices from 0 to n_obs_sample, one for each sample. 

996 if n_permutations >= n_max: 

997 exact_test = True 

998 n_permutations = n_max 

999 batch = batch or int(n_permutations) 

1000 # cartesian product of the sets of all permutations of indices 

1001 perm_generator = product(*(permutations(range(n_obs_sample)) 

1002 for i in range(n_samples))) 

1003 batched_perm_generator = _batch_generator(perm_generator, batch=batch) 

1004 else: 

1005 exact_test = False 

1006 batch = batch or int(n_permutations) 

1007 # Separate random permutations of indices for each sample. 

1008 # Again, it would be nice if RandomState/Generator.permutation 

1009 # could permute each axis-slice separately. 

1010 args = n_permutations, n_samples, n_obs_sample, batch, random_state 

1011 batched_perm_generator = _pairings_permutations_gen(*args) 

1012 

1013 null_distribution = [] 

1014 

1015 for indices in batched_perm_generator: 

1016 indices = np.array(indices) 

1017 

1018 # `indices` is 3D: the zeroth axis is for permutations, the next is 

1019 # for samples, and the last is for observations. Swap the first two 

1020 # to make the zeroth axis correspond with samples, as it does for 

1021 # `data`. 

1022 indices = np.swapaxes(indices, 0, 1) 

1023 

1024 # When we're done, `data_batch` will be a list of length `n_samples`. 

1025 # Each element will be a batch of random permutations of one sample. 

1026 # The zeroth axis of each batch will correspond with permutations, 

1027 # and the last will correspond with observations. (This makes it 

1028 # easy to pass into `statistic`.) 

1029 data_batch = [None]*n_samples 

1030 for i in range(n_samples): 

1031 data_batch[i] = data[i][..., indices[i]] 

1032 data_batch[i] = np.moveaxis(data_batch[i], -2, 0) 

1033 

1034 null_distribution.append(statistic(*data_batch, axis=-1)) 

1035 null_distribution = np.concatenate(null_distribution, axis=0) 

1036 

1037 return null_distribution, n_permutations, exact_test 

1038 

1039 

1040def _calculate_null_samples(data, statistic, n_permutations, batch, 

1041 random_state=None): 

1042 """ 

1043 Calculate null distribution for paired-sample tests. 

1044 """ 

1045 n_samples = len(data) 

1046 

1047 # By convention, the meaning of the "samples" permutations type for 

1048 # data with only one sample is to flip the sign of the observations. 

1049 # Achieve this by adding a second sample - the negative of the original. 

1050 if n_samples == 1: 

1051 data = [data[0], -data[0]] 

1052 

1053 # The "samples" permutation strategy is the same as the "pairings" 

1054 # strategy except the roles of samples and observations are flipped. 

1055 # So swap these axes, then we'll use the function for the "pairings" 

1056 # strategy to do all the work! 

1057 data = np.swapaxes(data, 0, -1) 

1058 

1059 # (Of course, the user's statistic doesn't know what we've done here, 

1060 # so we need to pass it what it's expecting.) 

1061 def statistic_wrapped(*data, axis): 

1062 data = np.swapaxes(data, 0, -1) 

1063 if n_samples == 1: 

1064 data = data[0:1] 

1065 return statistic(*data, axis=axis) 

1066 

1067 return _calculate_null_pairings(data, statistic_wrapped, n_permutations, 

1068 batch, random_state) 

1069 

1070 

1071def _permutation_test_iv(data, statistic, permutation_type, vectorized, 

1072 n_resamples, batch, alternative, axis, random_state): 

1073 """Input validation for `permutation_test`.""" 

1074 

1075 axis_int = int(axis) 

1076 if axis != axis_int: 

1077 raise ValueError("`axis` must be an integer.") 

1078 

1079 permutation_types = {'samples', 'pairings', 'independent'} 

1080 permutation_type = permutation_type.lower() 

1081 if permutation_type not in permutation_types: 

1082 raise ValueError(f"`permutation_type` must be in {permutation_types}.") 

1083 

1084 if vectorized not in {True, False, None}: 

1085 raise ValueError("`vectorized` must be `True`, `False`, or `None`.") 

1086 

1087 if vectorized is None: 

1088 vectorized = 'axis' in inspect.signature(statistic).parameters 

1089 

1090 if not vectorized: 

1091 statistic = _vectorize_statistic(statistic) 

1092 

1093 message = "`data` must be a tuple containing at least two samples" 

1094 try: 

1095 if len(data) < 2 and permutation_type == 'independent': 

1096 raise ValueError(message) 

1097 except TypeError: 

1098 raise TypeError(message) 

1099 

1100 data = _broadcast_arrays(data, axis) 

1101 data_iv = [] 

1102 for sample in data: 

1103 sample = np.atleast_1d(sample) 

1104 if sample.shape[axis] <= 1: 

1105 raise ValueError("each sample in `data` must contain two or more " 

1106 "observations along `axis`.") 

1107 sample = np.moveaxis(sample, axis_int, -1) 

1108 data_iv.append(sample) 

1109 

1110 n_resamples_int = (int(n_resamples) if not np.isinf(n_resamples) 

1111 else np.inf) 

1112 if n_resamples != n_resamples_int or n_resamples_int <= 0: 

1113 raise ValueError("`n_resamples` must be a positive integer.") 

1114 

1115 if batch is None: 

1116 batch_iv = batch 

1117 else: 

1118 batch_iv = int(batch) 

1119 if batch != batch_iv or batch_iv <= 0: 

1120 raise ValueError("`batch` must be a positive integer or None.") 

1121 

1122 alternatives = {'two-sided', 'greater', 'less'} 

1123 alternative = alternative.lower() 

1124 if alternative not in alternatives: 

1125 raise ValueError(f"`alternative` must be in {alternatives}") 

1126 

1127 random_state = check_random_state(random_state) 

1128 

1129 return (data_iv, statistic, permutation_type, vectorized, n_resamples_int, 

1130 batch_iv, alternative, axis_int, random_state) 

1131 

1132 

1133def permutation_test(data, statistic, *, permutation_type='independent', 

1134 vectorized=None, n_resamples=9999, batch=None, 

1135 alternative="two-sided", axis=0, random_state=None): 

1136 r""" 

1137 Performs a permutation test of a given statistic on provided data. 

1138 

1139 For independent sample statistics, the null hypothesis is that the data are 

1140 randomly sampled from the same distribution. 

1141 For paired sample statistics, two null hypothesis can be tested: 

1142 that the data are paired at random or that the data are assigned to samples 

1143 at random. 

1144 

1145 Parameters 

1146 ---------- 

1147 data : iterable of array-like 

1148 Contains the samples, each of which is an array of observations. 

1149 Dimensions of sample arrays must be compatible for broadcasting except 

1150 along `axis`. 

1151 statistic : callable 

1152 Statistic for which the p-value of the hypothesis test is to be 

1153 calculated. `statistic` must be a callable that accepts samples 

1154 as separate arguments (e.g. ``statistic(*data)``) and returns the 

1155 resulting statistic. 

1156 If `vectorized` is set ``True``, `statistic` must also accept a keyword 

1157 argument `axis` and be vectorized to compute the statistic along the 

1158 provided `axis` of the sample arrays. 

1159 permutation_type : {'independent', 'samples', 'pairings'}, optional 

1160 The type of permutations to be performed, in accordance with the 

1161 null hypothesis. The first two permutation types are for paired sample 

1162 statistics, in which all samples contain the same number of 

1163 observations and observations with corresponding indices along `axis` 

1164 are considered to be paired; the third is for independent sample 

1165 statistics. 

1166 

1167 - ``'samples'`` : observations are assigned to different samples 

1168 but remain paired with the same observations from other samples. 

1169 This permutation type is appropriate for paired sample hypothesis 

1170 tests such as the Wilcoxon signed-rank test and the paired t-test. 

1171 - ``'pairings'`` : observations are paired with different observations, 

1172 but they remain within the same sample. This permutation type is 

1173 appropriate for association/correlation tests with statistics such 

1174 as Spearman's :math:`\rho`, Kendall's :math:`\tau`, and Pearson's 

1175 :math:`r`. 

1176 - ``'independent'`` (default) : observations are assigned to different 

1177 samples. Samples may contain different numbers of observations. This 

1178 permutation type is appropriate for independent sample hypothesis 

1179 tests such as the Mann-Whitney :math:`U` test and the independent 

1180 sample t-test. 

1181 

1182 Please see the Notes section below for more detailed descriptions 

1183 of the permutation types. 

1184 

1185 vectorized : bool, optional 

1186 If `vectorized` is set ``False``, `statistic` will not be passed 

1187 keyword argument `axis` and is expected to calculate the statistic 

1188 only for 1D samples. If ``True``, `statistic` will be passed keyword 

1189 argument `axis` and is expected to calculate the statistic along `axis` 

1190 when passed an ND sample array. If ``None`` (default), `vectorized` 

1191 will be set ``True`` if ``axis`` is a parameter of `statistic`. Use 

1192 of a vectorized statistic typically reduces computation time. 

1193 n_resamples : int or np.inf, default: 9999 

1194 Number of random permutations (resamples) used to approximate the null 

1195 distribution. If greater than or equal to the number of distinct 

1196 permutations, the exact null distribution will be computed. 

1197 Note that the number of distinct permutations grows very rapidly with 

1198 the sizes of samples, so exact tests are feasible only for very small 

1199 data sets. 

1200 batch : int, optional 

1201 The number of permutations to process in each call to `statistic`. 

1202 Memory usage is O(`batch`*``n``), where ``n`` is the total size 

1203 of all samples, regardless of the value of `vectorized`. Default is 

1204 ``None``, in which case ``batch`` is the number of permutations. 

1205 alternative : {'two-sided', 'less', 'greater'}, optional 

1206 The alternative hypothesis for which the p-value is calculated. 

1207 For each alternative, the p-value is defined for exact tests as 

1208 follows. 

1209 

1210 - ``'greater'`` : the percentage of the null distribution that is 

1211 greater than or equal to the observed value of the test statistic. 

1212 - ``'less'`` : the percentage of the null distribution that is 

1213 less than or equal to the observed value of the test statistic. 

1214 - ``'two-sided'`` (default) : twice the smaller of the p-values above. 

1215 

1216 Note that p-values for randomized tests are calculated according to the 

1217 conservative (over-estimated) approximation suggested in [2]_ and [3]_ 

1218 rather than the unbiased estimator suggested in [4]_. That is, when 

1219 calculating the proportion of the randomized null distribution that is 

1220 as extreme as the observed value of the test statistic, the values in 

1221 the numerator and denominator are both increased by one. An 

1222 interpretation of this adjustment is that the observed value of the 

1223 test statistic is always included as an element of the randomized 

1224 null distribution. 

1225 The convention used for two-sided p-values is not universal; 

1226 the observed test statistic and null distribution are returned in 

1227 case a different definition is preferred. 

1228 

1229 axis : int, default: 0 

1230 The axis of the (broadcasted) samples over which to calculate the 

1231 statistic. If samples have a different number of dimensions, 

1232 singleton dimensions are prepended to samples with fewer dimensions 

1233 before `axis` is considered. 

1234 random_state : {None, int, `numpy.random.Generator`, 

1235 `numpy.random.RandomState`}, optional 

1236 

1237 Pseudorandom number generator state used to generate permutations. 

1238 

1239 If `random_state` is ``None`` (default), the 

1240 `numpy.random.RandomState` singleton is used. 

1241 If `random_state` is an int, a new ``RandomState`` instance is used, 

1242 seeded with `random_state`. 

1243 If `random_state` is already a ``Generator`` or ``RandomState`` 

1244 instance then that instance is used. 

1245 

1246 Returns 

1247 ------- 

1248 statistic : float or ndarray 

1249 The observed test statistic of the data. 

1250 pvalue : float or ndarray 

1251 The p-value for the given alternative. 

1252 null_distribution : ndarray 

1253 The values of the test statistic generated under the null hypothesis. 

1254 

1255 Notes 

1256 ----- 

1257 

1258 The three types of permutation tests supported by this function are 

1259 described below. 

1260 

1261 **Unpaired statistics** (``permutation_type='independent'``): 

1262 

1263 The null hypothesis associated with this permutation type is that all 

1264 observations are sampled from the same underlying distribution and that 

1265 they have been assigned to one of the samples at random. 

1266 

1267 Suppose ``data`` contains two samples; e.g. ``a, b = data``. 

1268 When ``1 < n_resamples < binom(n, k)``, where 

1269 

1270 * ``k`` is the number of observations in ``a``, 

1271 * ``n`` is the total number of observations in ``a`` and ``b``, and 

1272 * ``binom(n, k)`` is the binomial coefficient (``n`` choose ``k``), 

1273 

1274 the data are pooled (concatenated), randomly assigned to either the first 

1275 or second sample, and the statistic is calculated. This process is 

1276 performed repeatedly, `permutation` times, generating a distribution of the 

1277 statistic under the null hypothesis. The statistic of the original 

1278 data is compared to this distribution to determine the p-value. 

1279 

1280 When ``n_resamples >= binom(n, k)``, an exact test is performed: the data 

1281 are *partitioned* between the samples in each distinct way exactly once, 

1282 and the exact null distribution is formed. 

1283 Note that for a given partitioning of the data between the samples, 

1284 only one ordering/permutation of the data *within* each sample is 

1285 considered. For statistics that do not depend on the order of the data 

1286 within samples, this dramatically reduces computational cost without 

1287 affecting the shape of the null distribution (because the frequency/count 

1288 of each value is affected by the same factor). 

1289 

1290 For ``a = [a1, a2, a3, a4]`` and ``b = [b1, b2, b3]``, an example of this 

1291 permutation type is ``x = [b3, a1, a2, b2]`` and ``y = [a4, b1, a3]``. 

1292 Because only one ordering/permutation of the data *within* each sample 

1293 is considered in an exact test, a resampling like ``x = [b3, a1, b2, a2]`` 

1294 and ``y = [a4, a3, b1]`` would *not* be considered distinct from the 

1295 example above. 

1296 

1297 ``permutation_type='independent'`` does not support one-sample statistics, 

1298 but it can be applied to statistics with more than two samples. In this 

1299 case, if ``n`` is an array of the number of observations within each 

1300 sample, the number of distinct partitions is:: 

1301 

1302 np.product([binom(sum(n[i:]), sum(n[i+1:])) for i in range(len(n)-1)]) 

1303 

1304 **Paired statistics, permute pairings** (``permutation_type='pairings'``): 

1305 

1306 The null hypothesis associated with this permutation type is that 

1307 observations within each sample are drawn from the same underlying 

1308 distribution and that pairings with elements of other samples are 

1309 assigned at random. 

1310 

1311 Suppose ``data`` contains only one sample; e.g. ``a, = data``, and we 

1312 wish to consider all possible pairings of elements of ``a`` with elements 

1313 of a second sample, ``b``. Let ``n`` be the number of observations in 

1314 ``a``, which must also equal the number of observations in ``b``. 

1315 

1316 When ``1 < n_resamples < factorial(n)``, the elements of ``a`` are 

1317 randomly permuted. The user-supplied statistic accepts one data argument, 

1318 say ``a_perm``, and calculates the statistic considering ``a_perm`` and 

1319 ``b``. This process is performed repeatedly, `permutation` times, 

1320 generating a distribution of the statistic under the null hypothesis. 

1321 The statistic of the original data is compared to this distribution to 

1322 determine the p-value. 

1323 

1324 When ``n_resamples >= factorial(n)``, an exact test is performed: 

1325 ``a`` is permuted in each distinct way exactly once. Therefore, the 

1326 `statistic` is computed for each unique pairing of samples between ``a`` 

1327 and ``b`` exactly once. 

1328 

1329 For ``a = [a1, a2, a3]`` and ``b = [b1, b2, b3]``, an example of this 

1330 permutation type is ``a_perm = [a3, a1, a2]`` while ``b`` is left 

1331 in its original order. 

1332 

1333 ``permutation_type='pairings'`` supports ``data`` containing any number 

1334 of samples, each of which must contain the same number of observations. 

1335 All samples provided in ``data`` are permuted *independently*. Therefore, 

1336 if ``m`` is the number of samples and ``n`` is the number of observations 

1337 within each sample, then the number of permutations in an exact test is:: 

1338 

1339 factorial(n)**m 

1340 

1341 Note that if a two-sample statistic, for example, does not inherently 

1342 depend on the order in which observations are provided - only on the 

1343 *pairings* of observations - then only one of the two samples should be 

1344 provided in ``data``. This dramatically reduces computational cost without 

1345 affecting the shape of the null distribution (because the frequency/count 

1346 of each value is affected by the same factor). 

1347 

1348 **Paired statistics, permute samples** (``permutation_type='samples'``): 

1349 

1350 The null hypothesis associated with this permutation type is that 

1351 observations within each pair are drawn from the same underlying 

1352 distribution and that the sample to which they are assigned is random. 

1353 

1354 Suppose ``data`` contains two samples; e.g. ``a, b = data``. 

1355 Let ``n`` be the number of observations in ``a``, which must also equal 

1356 the number of observations in ``b``. 

1357 

1358 When ``1 < n_resamples < 2**n``, the elements of ``a`` are ``b`` are 

1359 randomly swapped between samples (maintaining their pairings) and the 

1360 statistic is calculated. This process is performed repeatedly, 

1361 `permutation` times, generating a distribution of the statistic under the 

1362 null hypothesis. The statistic of the original data is compared to this 

1363 distribution to determine the p-value. 

1364 

1365 When ``n_resamples >= 2**n``, an exact test is performed: the observations 

1366 are assigned to the two samples in each distinct way (while maintaining 

1367 pairings) exactly once. 

1368 

1369 For ``a = [a1, a2, a3]`` and ``b = [b1, b2, b3]``, an example of this 

1370 permutation type is ``x = [b1, a2, b3]`` and ``y = [a1, b2, a3]``. 

1371 

1372 ``permutation_type='samples'`` supports ``data`` containing any number 

1373 of samples, each of which must contain the same number of observations. 

1374 If ``data`` contains more than one sample, paired observations within 

1375 ``data`` are exchanged between samples *independently*. Therefore, if ``m`` 

1376 is the number of samples and ``n`` is the number of observations within 

1377 each sample, then the number of permutations in an exact test is:: 

1378 

1379 factorial(m)**n 

1380 

1381 Several paired-sample statistical tests, such as the Wilcoxon signed rank 

1382 test and paired-sample t-test, can be performed considering only the 

1383 *difference* between two paired elements. Accordingly, if ``data`` contains 

1384 only one sample, then the null distribution is formed by independently 

1385 changing the *sign* of each observation. 

1386 

1387 .. warning:: 

1388 The p-value is calculated by counting the elements of the null 

1389 distribution that are as extreme or more extreme than the observed 

1390 value of the statistic. Due to the use of finite precision arithmetic, 

1391 some statistic functions return numerically distinct values when the 

1392 theoretical values would be exactly equal. In some cases, this could 

1393 lead to a large error in the calculated p-value. `permutation_test` 

1394 guards against this by considering elements in the null distribution 

1395 that are "close" (within a factor of ``1+1e-14``) to the observed 

1396 value of the test statistic as equal to the observed value of the 

1397 test statistic. However, the user is advised to inspect the null 

1398 distribution to assess whether this method of comparison is 

1399 appropriate, and if not, calculate the p-value manually. See example 

1400 below. 

1401 

1402 References 

1403 ---------- 

1404 

1405 .. [1] R. A. Fisher. The Design of Experiments, 6th Ed (1951). 

1406 .. [2] B. Phipson and G. K. Smyth. "Permutation P-values Should Never Be 

1407 Zero: Calculating Exact P-values When Permutations Are Randomly Drawn." 

1408 Statistical Applications in Genetics and Molecular Biology 9.1 (2010). 

1409 .. [3] M. D. Ernst. "Permutation Methods: A Basis for Exact Inference". 

1410 Statistical Science (2004). 

1411 .. [4] B. Efron and R. J. Tibshirani. An Introduction to the Bootstrap 

1412 (1993). 

1413 

1414 Examples 

1415 -------- 

1416 

1417 Suppose we wish to test whether two samples are drawn from the same 

1418 distribution. Assume that the underlying distributions are unknown to us, 

1419 and that before observing the data, we hypothesized that the mean of the 

1420 first sample would be less than that of the second sample. We decide that 

1421 we will use the difference between the sample means as a test statistic, 

1422 and we will consider a p-value of 0.05 to be statistically significant. 

1423 

1424 For efficiency, we write the function defining the test statistic in a 

1425 vectorized fashion: the samples ``x`` and ``y`` can be ND arrays, and the 

1426 statistic will be calculated for each axis-slice along `axis`. 

1427 

1428 >>> import numpy as np 

1429 >>> def statistic(x, y, axis): 

1430 ... return np.mean(x, axis=axis) - np.mean(y, axis=axis) 

1431 

1432 After collecting our data, we calculate the observed value of the test 

1433 statistic. 

1434 

1435 >>> from scipy.stats import norm 

1436 >>> rng = np.random.default_rng() 

1437 >>> x = norm.rvs(size=5, random_state=rng) 

1438 >>> y = norm.rvs(size=6, loc = 3, random_state=rng) 

1439 >>> statistic(x, y, 0) 

1440 -3.5411688580987266 

1441 

1442 Indeed, the test statistic is negative, suggesting that the true mean of 

1443 the distribution underlying ``x`` is less than that of the distribution 

1444 underlying ``y``. To determine the probability of this occuring by chance 

1445 if the two samples were drawn from the same distribution, we perform 

1446 a permutation test. 

1447 

1448 >>> from scipy.stats import permutation_test 

1449 >>> # because our statistic is vectorized, we pass `vectorized=True` 

1450 >>> # `n_resamples=np.inf` indicates that an exact test is to be performed 

1451 >>> res = permutation_test((x, y), statistic, vectorized=True, 

1452 ... n_resamples=np.inf, alternative='less') 

1453 >>> print(res.statistic) 

1454 -3.5411688580987266 

1455 >>> print(res.pvalue) 

1456 0.004329004329004329 

1457 

1458 The probability of obtaining a test statistic less than or equal to the 

1459 observed value under the null hypothesis is 0.4329%. This is less than our 

1460 chosen threshold of 5%, so we consider this to be significant evidence 

1461 against the null hypothesis in favor of the alternative. 

1462 

1463 Because the size of the samples above was small, `permutation_test` could 

1464 perform an exact test. For larger samples, we resort to a randomized 

1465 permutation test. 

1466 

1467 >>> x = norm.rvs(size=100, random_state=rng) 

1468 >>> y = norm.rvs(size=120, loc=0.3, random_state=rng) 

1469 >>> res = permutation_test((x, y), statistic, n_resamples=100000, 

1470 ... vectorized=True, alternative='less', 

1471 ... random_state=rng) 

1472 >>> print(res.statistic) 

1473 -0.5230459671240913 

1474 >>> print(res.pvalue) 

1475 0.00016999830001699983 

1476 

1477 The approximate probability of obtaining a test statistic less than or 

1478 equal to the observed value under the null hypothesis is 0.0225%. This is 

1479 again less than our chosen threshold of 5%, so again we have significant 

1480 evidence to reject the null hypothesis in favor of the alternative. 

1481 

1482 For large samples and number of permutations, the result is comparable to 

1483 that of the corresponding asymptotic test, the independent sample t-test. 

1484 

1485 >>> from scipy.stats import ttest_ind 

1486 >>> res_asymptotic = ttest_ind(x, y, alternative='less') 

1487 >>> print(res_asymptotic.pvalue) 

1488 0.00012688101537979522 

1489 

1490 The permutation distribution of the test statistic is provided for 

1491 further investigation. 

1492 

1493 >>> import matplotlib.pyplot as plt 

1494 >>> plt.hist(res.null_distribution, bins=50) 

1495 >>> plt.title("Permutation distribution of test statistic") 

1496 >>> plt.xlabel("Value of Statistic") 

1497 >>> plt.ylabel("Frequency") 

1498 >>> plt.show() 

1499 

1500 Inspection of the null distribution is essential if the statistic suffers 

1501 from inaccuracy due to limited machine precision. Consider the following 

1502 case: 

1503 

1504 >>> from scipy.stats import pearsonr 

1505 >>> x = [1, 2, 4, 3] 

1506 >>> y = [2, 4, 6, 8] 

1507 >>> def statistic(x, y): 

1508 ... return pearsonr(x, y).statistic 

1509 >>> res = permutation_test((x, y), statistic, vectorized=False, 

1510 ... permutation_type='pairings', 

1511 ... alternative='greater') 

1512 >>> r, pvalue, null = res.statistic, res.pvalue, res.null_distribution 

1513 

1514 In this case, some elements of the null distribution differ from the 

1515 observed value of the correlation coefficient ``r`` due to numerical noise. 

1516 We manually inspect the elements of the null distribution that are nearly 

1517 the same as the observed value of the test statistic. 

1518 

1519 >>> r 

1520 0.8 

1521 >>> unique = np.unique(null) 

1522 >>> unique 

1523 array([-1. , -0.8, -0.8, -0.6, -0.4, -0.2, -0.2, 0. , 0.2, 0.2, 0.4, 

1524 0.6, 0.8, 0.8, 1. ]) # may vary 

1525 >>> unique[np.isclose(r, unique)].tolist() 

1526 [0.7999999999999999, 0.8] 

1527 

1528 If `permutation_test` were to perform the comparison naively, the 

1529 elements of the null distribution with value ``0.7999999999999999`` would 

1530 not be considered as extreme or more extreme as the observed value of the 

1531 statistic, so the calculated p-value would be too small. 

1532 

1533 >>> incorrect_pvalue = np.count_nonzero(null >= r) / len(null) 

1534 >>> incorrect_pvalue 

1535 0.1111111111111111 # may vary 

1536 

1537 Instead, `permutation_test` treats elements of the null distribution that 

1538 are within ``max(1e-14, abs(r)*1e-14)`` of the observed value of the 

1539 statistic ``r`` to be equal to ``r``. 

1540 

1541 >>> correct_pvalue = np.count_nonzero(null >= r - 1e-14) / len(null) 

1542 >>> correct_pvalue 

1543 0.16666666666666666 

1544 >>> res.pvalue == correct_pvalue 

1545 True 

1546 

1547 This method of comparison is expected to be accurate in most practical 

1548 situations, but the user is advised to assess this by inspecting the 

1549 elements of the null distribution that are close to the observed value 

1550 of the statistic. Also, consider the use of statistics that can be 

1551 calculated using exact arithmetic (e.g. integer statistics). 

1552 

1553 """ 

1554 args = _permutation_test_iv(data, statistic, permutation_type, vectorized, 

1555 n_resamples, batch, alternative, axis, 

1556 random_state) 

1557 (data, statistic, permutation_type, vectorized, n_resamples, batch, 

1558 alternative, axis, random_state) = args 

1559 

1560 observed = statistic(*data, axis=-1) 

1561 

1562 null_calculators = {"pairings": _calculate_null_pairings, 

1563 "samples": _calculate_null_samples, 

1564 "independent": _calculate_null_both} 

1565 null_calculator_args = (data, statistic, n_resamples, 

1566 batch, random_state) 

1567 calculate_null = null_calculators[permutation_type] 

1568 null_distribution, n_resamples, exact_test = ( 

1569 calculate_null(*null_calculator_args)) 

1570 

1571 # See References [2] and [3] 

1572 adjustment = 0 if exact_test else 1 

1573 

1574 # relative tolerance for detecting numerically distinct but 

1575 # theoretically equal values in the null distribution 

1576 eps = 1e-14 

1577 gamma = np.maximum(eps, np.abs(eps * observed)) 

1578 

1579 def less(null_distribution, observed): 

1580 cmps = null_distribution <= observed + gamma 

1581 pvalues = (cmps.sum(axis=0) + adjustment) / (n_resamples + adjustment) 

1582 return pvalues 

1583 

1584 def greater(null_distribution, observed): 

1585 cmps = null_distribution >= observed - gamma 

1586 pvalues = (cmps.sum(axis=0) + adjustment) / (n_resamples + adjustment) 

1587 return pvalues 

1588 

1589 def two_sided(null_distribution, observed): 

1590 pvalues_less = less(null_distribution, observed) 

1591 pvalues_greater = greater(null_distribution, observed) 

1592 pvalues = np.minimum(pvalues_less, pvalues_greater) * 2 

1593 return pvalues 

1594 

1595 compare = {"less": less, 

1596 "greater": greater, 

1597 "two-sided": two_sided} 

1598 

1599 pvalues = compare[alternative](null_distribution, observed) 

1600 pvalues = np.clip(pvalues, 0, 1) 

1601 

1602 return PermutationTestResult(observed, pvalues, null_distribution)