Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_resampling.py: 9%
403 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1import warnings
2import numpy as np
3from itertools import combinations, permutations, product
4import inspect
6from scipy._lib._util import check_random_state
7from scipy.special import ndtr, ndtri, comb, factorial
8from scipy._lib._util import rng_integers
9from dataclasses import make_dataclass
10from ._common import ConfidenceInterval
11from ._axis_nan_policy import _broadcast_concatenate, _broadcast_arrays
12from ._warnings_errors import DegenerateDataWarning
14__all__ = ['bootstrap', 'monte_carlo_test', 'permutation_test']
17def _vectorize_statistic(statistic):
18 """Vectorize an n-sample statistic"""
19 # This is a little cleaner than np.nditer at the expense of some data
20 # copying: concatenate samples together, then use np.apply_along_axis
21 def stat_nd(*data, axis=0):
22 lengths = [sample.shape[axis] for sample in data]
23 split_indices = np.cumsum(lengths)[:-1]
24 z = _broadcast_concatenate(data, axis)
26 # move working axis to position 0 so that new dimensions in the output
27 # of `statistic` are _prepended_. ("This axis is removed, and replaced
28 # with new dimensions...")
29 z = np.moveaxis(z, axis, 0)
31 def stat_1d(z):
32 data = np.split(z, split_indices)
33 return statistic(*data)
35 return np.apply_along_axis(stat_1d, 0, z)[()]
36 return stat_nd
39def _jackknife_resample(sample, batch=None):
40 """Jackknife resample the sample. Only one-sample stats for now."""
41 n = sample.shape[-1]
42 batch_nominal = batch or n
44 for k in range(0, n, batch_nominal):
45 # col_start:col_end are the observations to remove
46 batch_actual = min(batch_nominal, n-k)
48 # jackknife - each row leaves out one observation
49 j = np.ones((batch_actual, n), dtype=bool)
50 np.fill_diagonal(j[:, k:k+batch_actual], False)
51 i = np.arange(n)
52 i = np.broadcast_to(i, (batch_actual, n))
53 i = i[j].reshape((batch_actual, n-1))
55 resamples = sample[..., i]
56 yield resamples
59def _bootstrap_resample(sample, n_resamples=None, random_state=None):
60 """Bootstrap resample the sample."""
61 n = sample.shape[-1]
63 # bootstrap - each row is a random resample of original observations
64 i = rng_integers(random_state, 0, n, (n_resamples, n))
66 resamples = sample[..., i]
67 return resamples
70def _percentile_of_score(a, score, axis):
71 """Vectorized, simplified `scipy.stats.percentileofscore`.
72 Uses logic of the 'mean' value of percentileofscore's kind parameter.
74 Unlike `stats.percentileofscore`, the percentile returned is a fraction
75 in [0, 1].
76 """
77 B = a.shape[axis]
78 return ((a < score).sum(axis=axis) + (a <= score).sum(axis=axis)) / (2 * B)
81def _percentile_along_axis(theta_hat_b, alpha):
82 """`np.percentile` with different percentile for each slice."""
83 # the difference between _percentile_along_axis and np.percentile is that
84 # np.percentile gets _all_ the qs for each axis slice, whereas
85 # _percentile_along_axis gets the q corresponding with each axis slice
86 shape = theta_hat_b.shape[:-1]
87 alpha = np.broadcast_to(alpha, shape)
88 percentiles = np.zeros_like(alpha, dtype=np.float64)
89 for indices, alpha_i in np.ndenumerate(alpha):
90 if np.isnan(alpha_i):
91 # e.g. when bootstrap distribution has only one unique element
92 msg = (
93 "The BCa confidence interval cannot be calculated."
94 " This problem is known to occur when the distribution"
95 " is degenerate or the statistic is np.min."
96 )
97 warnings.warn(DegenerateDataWarning(msg))
98 percentiles[indices] = np.nan
99 else:
100 theta_hat_b_i = theta_hat_b[indices]
101 percentiles[indices] = np.percentile(theta_hat_b_i, alpha_i)
102 return percentiles[()] # return scalar instead of 0d array
105def _bca_interval(data, statistic, axis, alpha, theta_hat_b, batch):
106 """Bias-corrected and accelerated interval."""
107 # closely follows [1] 14.3 and 15.4 (Eq. 15.36)
109 # calculate z0_hat
110 theta_hat = np.asarray(statistic(*data, axis=axis))[..., None]
111 percentile = _percentile_of_score(theta_hat_b, theta_hat, axis=-1)
112 z0_hat = ndtri(percentile)
114 # calculate a_hat
115 theta_hat_ji = [] # j is for sample of data, i is for jackknife resample
116 for j, sample in enumerate(data):
117 # _jackknife_resample will add an axis prior to the last axis that
118 # corresponds with the different jackknife resamples. Do the same for
119 # each sample of the data to ensure broadcastability. We need to
120 # create a copy of the list containing the samples anyway, so do this
121 # in the loop to simplify the code. This is not the bottleneck...
122 samples = [np.expand_dims(sample, -2) for sample in data]
123 theta_hat_i = []
124 for jackknife_sample in _jackknife_resample(sample, batch):
125 samples[j] = jackknife_sample
126 broadcasted = _broadcast_arrays(samples, axis=-1)
127 theta_hat_i.append(statistic(*broadcasted, axis=-1))
128 theta_hat_ji.append(theta_hat_i)
130 theta_hat_ji = [np.concatenate(theta_hat_i, axis=-1)
131 for theta_hat_i in theta_hat_ji]
133 n_j = [theta_hat_i.shape[-1] for theta_hat_i in theta_hat_ji]
135 theta_hat_j_dot = [theta_hat_i.mean(axis=-1, keepdims=True)
136 for theta_hat_i in theta_hat_ji]
138 U_ji = [(n - 1) * (theta_hat_dot - theta_hat_i)
139 for theta_hat_dot, theta_hat_i, n
140 in zip(theta_hat_j_dot, theta_hat_ji, n_j)]
142 nums = [(U_i**3).sum(axis=-1)/n**3 for U_i, n in zip(U_ji, n_j)]
143 dens = [(U_i**2).sum(axis=-1)/n**2 for U_i, n in zip(U_ji, n_j)]
144 a_hat = 1/6 * sum(nums) / sum(dens)**(3/2)
146 # calculate alpha_1, alpha_2
147 z_alpha = ndtri(alpha)
148 z_1alpha = -z_alpha
149 num1 = z0_hat + z_alpha
150 alpha_1 = ndtr(z0_hat + num1/(1 - a_hat*num1))
151 num2 = z0_hat + z_1alpha
152 alpha_2 = ndtr(z0_hat + num2/(1 - a_hat*num2))
153 return alpha_1, alpha_2, a_hat # return a_hat for testing
156def _bootstrap_iv(data, statistic, vectorized, paired, axis, confidence_level,
157 n_resamples, batch, method, bootstrap_result, random_state):
158 """Input validation and standardization for `bootstrap`."""
160 if vectorized not in {True, False, None}:
161 raise ValueError("`vectorized` must be `True`, `False`, or `None`.")
163 if vectorized is None:
164 vectorized = 'axis' in inspect.signature(statistic).parameters
166 if not vectorized:
167 statistic = _vectorize_statistic(statistic)
169 axis_int = int(axis)
170 if axis != axis_int:
171 raise ValueError("`axis` must be an integer.")
173 n_samples = 0
174 try:
175 n_samples = len(data)
176 except TypeError:
177 raise ValueError("`data` must be a sequence of samples.")
179 if n_samples == 0:
180 raise ValueError("`data` must contain at least one sample.")
182 data_iv = []
183 for sample in data:
184 sample = np.atleast_1d(sample)
185 if sample.shape[axis_int] <= 1:
186 raise ValueError("each sample in `data` must contain two or more "
187 "observations along `axis`.")
188 sample = np.moveaxis(sample, axis_int, -1)
189 data_iv.append(sample)
191 if paired not in {True, False}:
192 raise ValueError("`paired` must be `True` or `False`.")
194 if paired:
195 n = data_iv[0].shape[-1]
196 for sample in data_iv[1:]:
197 if sample.shape[-1] != n:
198 message = ("When `paired is True`, all samples must have the "
199 "same length along `axis`")
200 raise ValueError(message)
202 # to generate the bootstrap distribution for paired-sample statistics,
203 # resample the indices of the observations
204 def statistic(i, axis=-1, data=data_iv, unpaired_statistic=statistic):
205 data = [sample[..., i] for sample in data]
206 return unpaired_statistic(*data, axis=axis)
208 data_iv = [np.arange(n)]
210 confidence_level_float = float(confidence_level)
212 n_resamples_int = int(n_resamples)
213 if n_resamples != n_resamples_int or n_resamples_int < 0:
214 raise ValueError("`n_resamples` must be a non-negative integer.")
216 if batch is None:
217 batch_iv = batch
218 else:
219 batch_iv = int(batch)
220 if batch != batch_iv or batch_iv <= 0:
221 raise ValueError("`batch` must be a positive integer or None.")
223 methods = {'percentile', 'basic', 'bca'}
224 method = method.lower()
225 if method not in methods:
226 raise ValueError(f"`method` must be in {methods}")
228 message = "`bootstrap_result` must have attribute `bootstrap_distribution'"
229 if (bootstrap_result is not None
230 and not hasattr(bootstrap_result, "bootstrap_distribution")):
231 raise ValueError(message)
233 message = ("Either `bootstrap_result.bootstrap_distribution.size` or "
234 "`n_resamples` must be positive.")
235 if ((not bootstrap_result or
236 not bootstrap_result.bootstrap_distribution.size)
237 and n_resamples_int == 0):
238 raise ValueError(message)
240 random_state = check_random_state(random_state)
242 return (data_iv, statistic, vectorized, paired, axis_int,
243 confidence_level_float, n_resamples_int, batch_iv,
244 method, bootstrap_result, random_state)
247fields = ['confidence_interval', 'bootstrap_distribution', 'standard_error']
248BootstrapResult = make_dataclass("BootstrapResult", fields)
251def bootstrap(data, statistic, *, n_resamples=9999, batch=None,
252 vectorized=None, paired=False, axis=0, confidence_level=0.95,
253 method='BCa', bootstrap_result=None, random_state=None):
254 r"""
255 Compute a two-sided bootstrap confidence interval of a statistic.
257 When `method` is ``'percentile'``, a bootstrap confidence interval is
258 computed according to the following procedure.
260 1. Resample the data: for each sample in `data` and for each of
261 `n_resamples`, take a random sample of the original sample
262 (with replacement) of the same size as the original sample.
264 2. Compute the bootstrap distribution of the statistic: for each set of
265 resamples, compute the test statistic.
267 3. Determine the confidence interval: find the interval of the bootstrap
268 distribution that is
270 - symmetric about the median and
271 - contains `confidence_level` of the resampled statistic values.
273 While the ``'percentile'`` method is the most intuitive, it is rarely
274 used in practice. Two more common methods are available, ``'basic'``
275 ('reverse percentile') and ``'BCa'`` ('bias-corrected and accelerated');
276 they differ in how step 3 is performed.
278 If the samples in `data` are taken at random from their respective
279 distributions :math:`n` times, the confidence interval returned by
280 `bootstrap` will contain the true value of the statistic for those
281 distributions approximately `confidence_level`:math:`\, \times \, n` times.
283 Parameters
284 ----------
285 data : sequence of array-like
286 Each element of data is a sample from an underlying distribution.
287 statistic : callable
288 Statistic for which the confidence interval is to be calculated.
289 `statistic` must be a callable that accepts ``len(data)`` samples
290 as separate arguments and returns the resulting statistic.
291 If `vectorized` is set ``True``,
292 `statistic` must also accept a keyword argument `axis` and be
293 vectorized to compute the statistic along the provided `axis`.
294 n_resamples : int, default: ``9999``
295 The number of resamples performed to form the bootstrap distribution
296 of the statistic.
297 batch : int, optional
298 The number of resamples to process in each vectorized call to
299 `statistic`. Memory usage is O(`batch`*``n``), where ``n`` is the
300 sample size. Default is ``None``, in which case ``batch = n_resamples``
301 (or ``batch = max(n_resamples, n)`` for ``method='BCa'``).
302 vectorized : bool, optional
303 If `vectorized` is set ``False``, `statistic` will not be passed
304 keyword argument `axis` and is expected to calculate the statistic
305 only for 1D samples. If ``True``, `statistic` will be passed keyword
306 argument `axis` and is expected to calculate the statistic along `axis`
307 when passed an ND sample array. If ``None`` (default), `vectorized`
308 will be set ``True`` if ``axis`` is a parameter of `statistic`. Use of
309 a vectorized statistic typically reduces computation time.
310 paired : bool, default: ``False``
311 Whether the statistic treats corresponding elements of the samples
312 in `data` as paired.
313 axis : int, default: ``0``
314 The axis of the samples in `data` along which the `statistic` is
315 calculated.
316 confidence_level : float, default: ``0.95``
317 The confidence level of the confidence interval.
318 method : {'percentile', 'basic', 'bca'}, default: ``'BCa'``
319 Whether to return the 'percentile' bootstrap confidence interval
320 (``'percentile'``), the 'basic' (AKA 'reverse') bootstrap confidence
321 interval (``'basic'``), or the bias-corrected and accelerated bootstrap
322 confidence interval (``'BCa'``).
323 bootstrap_result : BootstrapResult, optional
324 Provide the result object returned by a previous call to `bootstrap`
325 to include the previous bootstrap distribution in the new bootstrap
326 distribution. This can be used, for example, to change
327 `confidence_level`, change `method`, or see the effect of performing
328 additional resampling without repeating computations.
329 random_state : {None, int, `numpy.random.Generator`,
330 `numpy.random.RandomState`}, optional
332 Pseudorandom number generator state used to generate resamples.
334 If `random_state` is ``None`` (or `np.random`), the
335 `numpy.random.RandomState` singleton is used.
336 If `random_state` is an int, a new ``RandomState`` instance is used,
337 seeded with `random_state`.
338 If `random_state` is already a ``Generator`` or ``RandomState``
339 instance then that instance is used.
341 Returns
342 -------
343 res : BootstrapResult
344 An object with attributes:
346 confidence_interval : ConfidenceInterval
347 The bootstrap confidence interval as an instance of
348 `collections.namedtuple` with attributes `low` and `high`.
349 bootstrap_distribution : ndarray
350 The bootstrap distribution, that is, the value of `statistic` for
351 each resample. The last dimension corresponds with the resamples
352 (e.g. ``res.bootstrap_distribution.shape[-1] == n_resamples``).
353 standard_error : float or ndarray
354 The bootstrap standard error, that is, the sample standard
355 deviation of the bootstrap distribution.
357 Warns
358 -----
359 `~scipy.stats.DegenerateDataWarning`
360 Generated when ``method='BCa'`` and the bootstrap distribution is
361 degenerate (e.g. all elements are identical).
363 Notes
364 -----
365 Elements of the confidence interval may be NaN for ``method='BCa'`` if
366 the bootstrap distribution is degenerate (e.g. all elements are identical).
367 In this case, consider using another `method` or inspecting `data` for
368 indications that other analysis may be more appropriate (e.g. all
369 observations are identical).
371 References
372 ----------
373 .. [1] B. Efron and R. J. Tibshirani, An Introduction to the Bootstrap,
374 Chapman & Hall/CRC, Boca Raton, FL, USA (1993)
375 .. [2] Nathaniel E. Helwig, "Bootstrap Confidence Intervals",
376 http://users.stat.umn.edu/~helwig/notes/bootci-Notes.pdf
377 .. [3] Bootstrapping (statistics), Wikipedia,
378 https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
380 Examples
381 --------
382 Suppose we have sampled data from an unknown distribution.
384 >>> import numpy as np
385 >>> rng = np.random.default_rng()
386 >>> from scipy.stats import norm
387 >>> dist = norm(loc=2, scale=4) # our "unknown" distribution
388 >>> data = dist.rvs(size=100, random_state=rng)
390 We are interested in the standard deviation of the distribution.
392 >>> std_true = dist.std() # the true value of the statistic
393 >>> print(std_true)
394 4.0
395 >>> std_sample = np.std(data) # the sample statistic
396 >>> print(std_sample)
397 3.9460644295563863
399 The bootstrap is used to approximate the variability we would expect if we
400 were to repeatedly sample from the unknown distribution and calculate the
401 statistic of the sample each time. It does this by repeatedly resampling
402 values *from the original sample* with replacement and calculating the
403 statistic of each resample. This results in a "bootstrap distribution" of
404 the statistic.
406 >>> import matplotlib.pyplot as plt
407 >>> from scipy.stats import bootstrap
408 >>> data = (data,) # samples must be in a sequence
409 >>> res = bootstrap(data, np.std, confidence_level=0.9,
410 ... random_state=rng)
411 >>> fig, ax = plt.subplots()
412 >>> ax.hist(res.bootstrap_distribution, bins=25)
413 >>> ax.set_title('Bootstrap Distribution')
414 >>> ax.set_xlabel('statistic value')
415 >>> ax.set_ylabel('frequency')
416 >>> plt.show()
418 The standard error quantifies this variability. It is calculated as the
419 standard deviation of the bootstrap distribution.
421 >>> res.standard_error
422 0.24427002125829136
423 >>> res.standard_error == np.std(res.bootstrap_distribution, ddof=1)
424 True
426 The bootstrap distribution of the statistic is often approximately normal
427 with scale equal to the standard error.
429 >>> x = np.linspace(3, 5)
430 >>> pdf = norm.pdf(x, loc=std_sample, scale=res.standard_error)
431 >>> fig, ax = plt.subplots()
432 >>> ax.hist(res.bootstrap_distribution, bins=25, density=True)
433 >>> ax.plot(x, pdf)
434 >>> ax.set_title('Normal Approximation of the Bootstrap Distribution')
435 >>> ax.set_xlabel('statistic value')
436 >>> ax.set_ylabel('pdf')
437 >>> plt.show()
439 This suggests that we could construct a 90% confidence interval on the
440 statistic based on quantiles of this normal distribution.
442 >>> norm.interval(0.9, loc=std_sample, scale=res.standard_error)
443 (3.5442759991341726, 4.3478528599786)
445 Due to central limit theorem, this normal approximation is accurate for a
446 variety of statistics and distributions underlying the samples; however,
447 the approximation is not reliable in all cases. Because `bootstrap` is
448 designed to work with arbitrary underlying distributions and statistics,
449 it uses more advanced techniques to generate an accurate confidence
450 interval.
452 >>> print(res.confidence_interval)
453 ConfidenceInterval(low=3.57655333533867, high=4.382043696342881)
455 If we sample from the original distribution 1000 times and form a bootstrap
456 confidence interval for each sample, the confidence interval
457 contains the true value of the statistic approximately 90% of the time.
459 >>> n_trials = 1000
460 >>> ci_contains_true_std = 0
461 >>> for i in range(n_trials):
462 ... data = (dist.rvs(size=100, random_state=rng),)
463 ... ci = bootstrap(data, np.std, confidence_level=0.9, n_resamples=1000,
464 ... random_state=rng).confidence_interval
465 ... if ci[0] < std_true < ci[1]:
466 ... ci_contains_true_std += 1
467 >>> print(ci_contains_true_std)
468 875
470 Rather than writing a loop, we can also determine the confidence intervals
471 for all 1000 samples at once.
473 >>> data = (dist.rvs(size=(n_trials, 100), random_state=rng),)
474 >>> res = bootstrap(data, np.std, axis=-1, confidence_level=0.9,
475 ... n_resamples=1000, random_state=rng)
476 >>> ci_l, ci_u = res.confidence_interval
478 Here, `ci_l` and `ci_u` contain the confidence interval for each of the
479 ``n_trials = 1000`` samples.
481 >>> print(ci_l[995:])
482 [3.77729695 3.75090233 3.45829131 3.34078217 3.48072829]
483 >>> print(ci_u[995:])
484 [4.88316666 4.86924034 4.32032996 4.2822427 4.59360598]
486 And again, approximately 90% contain the true value, ``std_true = 4``.
488 >>> print(np.sum((ci_l < std_true) & (std_true < ci_u)))
489 900
491 `bootstrap` can also be used to estimate confidence intervals of
492 multi-sample statistics, including those calculated by hypothesis
493 tests. `scipy.stats.mood` perform's Mood's test for equal scale parameters,
494 and it returns two outputs: a statistic, and a p-value. To get a
495 confidence interval for the test statistic, we first wrap
496 `scipy.stats.mood` in a function that accepts two sample arguments,
497 accepts an `axis` keyword argument, and returns only the statistic.
499 >>> from scipy.stats import mood
500 >>> def my_statistic(sample1, sample2, axis):
501 ... statistic, _ = mood(sample1, sample2, axis=-1)
502 ... return statistic
504 Here, we use the 'percentile' method with the default 95% confidence level.
506 >>> sample1 = norm.rvs(scale=1, size=100, random_state=rng)
507 >>> sample2 = norm.rvs(scale=2, size=100, random_state=rng)
508 >>> data = (sample1, sample2)
509 >>> res = bootstrap(data, my_statistic, method='basic', random_state=rng)
510 >>> print(mood(sample1, sample2)[0]) # element 0 is the statistic
511 -5.521109549096542
512 >>> print(res.confidence_interval)
513 ConfidenceInterval(low=-7.255994487314675, high=-4.016202624747605)
515 The bootstrap estimate of the standard error is also available.
517 >>> print(res.standard_error)
518 0.8344963846318795
520 Paired-sample statistics work, too. For example, consider the Pearson
521 correlation coefficient.
523 >>> from scipy.stats import pearsonr
524 >>> n = 100
525 >>> x = np.linspace(0, 10, n)
526 >>> y = x + rng.uniform(size=n)
527 >>> print(pearsonr(x, y)[0]) # element 0 is the statistic
528 0.9962357936065914
530 We wrap `pearsonr` so that it returns only the statistic.
532 >>> def my_statistic(x, y):
533 ... return pearsonr(x, y)[0]
535 We call `bootstrap` using ``paired=True``.
536 Also, since ``my_statistic`` isn't vectorized to calculate the statistic
537 along a given axis, we pass in ``vectorized=False``.
539 >>> res = bootstrap((x, y), my_statistic, vectorized=False, paired=True,
540 ... random_state=rng)
541 >>> print(res.confidence_interval)
542 ConfidenceInterval(low=0.9950085825848624, high=0.9971212407917498)
544 The result object can be passed back into `bootstrap` to perform additional
545 resampling:
547 >>> len(res.bootstrap_distribution)
548 9999
549 >>> res = bootstrap((x, y), my_statistic, vectorized=False, paired=True,
550 ... n_resamples=1001, random_state=rng,
551 ... bootstrap_result=res)
552 >>> len(res.bootstrap_distribution)
553 11000
555 or to change the confidence interval options:
557 >>> res2 = bootstrap((x, y), my_statistic, vectorized=False, paired=True,
558 ... n_resamples=0, random_state=rng, bootstrap_result=res,
559 ... method='percentile', confidence_level=0.9)
560 >>> np.testing.assert_equal(res2.bootstrap_distribution,
561 ... res.bootstrap_distribution)
562 >>> res.confidence_interval
563 ConfidenceInterval(low=0.9950035351407804, high=0.9971170323404578)
565 without repeating computation of the original bootstrap distribution.
567 """
568 # Input validation
569 args = _bootstrap_iv(data, statistic, vectorized, paired, axis,
570 confidence_level, n_resamples, batch, method,
571 bootstrap_result, random_state)
572 data, statistic, vectorized, paired, axis, confidence_level = args[:6]
573 n_resamples, batch, method, bootstrap_result, random_state = args[6:]
575 theta_hat_b = ([] if bootstrap_result is None
576 else [bootstrap_result.bootstrap_distribution])
578 batch_nominal = batch or n_resamples or 1
580 for k in range(0, n_resamples, batch_nominal):
581 batch_actual = min(batch_nominal, n_resamples-k)
582 # Generate resamples
583 resampled_data = []
584 for sample in data:
585 resample = _bootstrap_resample(sample, n_resamples=batch_actual,
586 random_state=random_state)
587 resampled_data.append(resample)
589 # Compute bootstrap distribution of statistic
590 theta_hat_b.append(statistic(*resampled_data, axis=-1))
591 theta_hat_b = np.concatenate(theta_hat_b, axis=-1)
593 # Calculate percentile interval
594 alpha = (1 - confidence_level)/2
595 if method == 'bca':
596 interval = _bca_interval(data, statistic, axis=-1, alpha=alpha,
597 theta_hat_b=theta_hat_b, batch=batch)[:2]
598 percentile_fun = _percentile_along_axis
599 else:
600 interval = alpha, 1-alpha
602 def percentile_fun(a, q):
603 return np.percentile(a=a, q=q, axis=-1)
605 # Calculate confidence interval of statistic
606 ci_l = percentile_fun(theta_hat_b, interval[0]*100)
607 ci_u = percentile_fun(theta_hat_b, interval[1]*100)
608 if method == 'basic': # see [3]
609 theta_hat = statistic(*data, axis=-1)
610 ci_l, ci_u = 2*theta_hat - ci_u, 2*theta_hat - ci_l
612 return BootstrapResult(confidence_interval=ConfidenceInterval(ci_l, ci_u),
613 bootstrap_distribution=theta_hat_b,
614 standard_error=np.std(theta_hat_b, ddof=1, axis=-1))
617def _monte_carlo_test_iv(sample, rvs, statistic, vectorized, n_resamples,
618 batch, alternative, axis):
619 """Input validation for `monte_carlo_test`."""
621 axis_int = int(axis)
622 if axis != axis_int:
623 raise ValueError("`axis` must be an integer.")
625 if vectorized not in {True, False, None}:
626 raise ValueError("`vectorized` must be `True`, `False`, or `None`.")
628 if not callable(rvs):
629 raise TypeError("`rvs` must be callable.")
631 if not callable(statistic):
632 raise TypeError("`statistic` must be callable.")
634 if vectorized is None:
635 vectorized = 'axis' in inspect.signature(statistic).parameters
637 if not vectorized:
638 statistic_vectorized = _vectorize_statistic(statistic)
639 else:
640 statistic_vectorized = statistic
642 sample = np.atleast_1d(sample)
643 sample = np.moveaxis(sample, axis, -1)
645 n_resamples_int = int(n_resamples)
646 if n_resamples != n_resamples_int or n_resamples_int <= 0:
647 raise ValueError("`n_resamples` must be a positive integer.")
649 if batch is None:
650 batch_iv = batch
651 else:
652 batch_iv = int(batch)
653 if batch != batch_iv or batch_iv <= 0:
654 raise ValueError("`batch` must be a positive integer or None.")
656 alternatives = {'two-sided', 'greater', 'less'}
657 alternative = alternative.lower()
658 if alternative not in alternatives:
659 raise ValueError(f"`alternative` must be in {alternatives}")
661 return (sample, rvs, statistic_vectorized, vectorized, n_resamples_int,
662 batch_iv, alternative, axis_int)
665fields = ['statistic', 'pvalue', 'null_distribution']
666MonteCarloTestResult = make_dataclass("MonteCarloTestResult", fields)
669def monte_carlo_test(sample, rvs, statistic, *, vectorized=None,
670 n_resamples=9999, batch=None, alternative="two-sided",
671 axis=0):
672 r"""
673 Monte Carlo test that a sample is drawn from a given distribution.
675 The null hypothesis is that the provided `sample` was drawn at random from
676 the distribution for which `rvs` generates random variates. The value of
677 the `statistic` for the given sample is compared against a Monte Carlo null
678 distribution: the value of the statistic for each of `n_resamples`
679 samples generated by `rvs`. This gives the p-value, the probability of
680 observing such an extreme value of the test statistic under the null
681 hypothesis.
683 Parameters
684 ----------
685 sample : array-like
686 An array of observations.
687 rvs : callable
688 Generates random variates from the distribution against which `sample`
689 will be tested. `rvs` must be a callable that accepts keyword argument
690 ``size`` (e.g. ``rvs(size=(m, n))``) and returns an N-d array sample
691 of that shape.
692 statistic : callable
693 Statistic for which the p-value of the hypothesis test is to be
694 calculated. `statistic` must be a callable that accepts a sample
695 (e.g. ``statistic(sample)``) and returns the resulting statistic.
696 If `vectorized` is set ``True``, `statistic` must also accept a keyword
697 argument `axis` and be vectorized to compute the statistic along the
698 provided `axis` of the sample array.
699 vectorized : bool, optional
700 If `vectorized` is set ``False``, `statistic` will not be passed
701 keyword argument `axis` and is expected to calculate the statistic
702 only for 1D samples. If ``True``, `statistic` will be passed keyword
703 argument `axis` and is expected to calculate the statistic along `axis`
704 when passed an ND sample array. If ``None`` (default), `vectorized`
705 will be set ``True`` if ``axis`` is a parameter of `statistic`. Use of
706 a vectorized statistic typically reduces computation time.
707 n_resamples : int, default: 9999
708 Number of random permutations used to approximate the Monte Carlo null
709 distribution.
710 batch : int, optional
711 The number of permutations to process in each call to `statistic`.
712 Memory usage is O(`batch`*``sample.size[axis]``). Default is
713 ``None``, in which case `batch` equals `n_resamples`.
714 alternative : {'two-sided', 'less', 'greater'}
715 The alternative hypothesis for which the p-value is calculated.
716 For each alternative, the p-value is defined as follows.
718 - ``'greater'`` : the percentage of the null distribution that is
719 greater than or equal to the observed value of the test statistic.
720 - ``'less'`` : the percentage of the null distribution that is
721 less than or equal to the observed value of the test statistic.
722 - ``'two-sided'`` : twice the smaller of the p-values above.
724 axis : int, default: 0
725 The axis of `sample` over which to calculate the statistic.
727 Returns
728 -------
729 statistic : float or ndarray
730 The observed test statistic of the sample.
731 pvalue : float or ndarray
732 The p-value for the given alternative.
733 null_distribution : ndarray
734 The values of the test statistic generated under the null hypothesis.
736 References
737 ----------
739 .. [1] B. Phipson and G. K. Smyth. "Permutation P-values Should Never Be
740 Zero: Calculating Exact P-values When Permutations Are Randomly Drawn."
741 Statistical Applications in Genetics and Molecular Biology 9.1 (2010).
743 Examples
744 --------
746 Suppose we wish to test whether a small sample has been drawn from a normal
747 distribution. We decide that we will use the skew of the sample as a
748 test statistic, and we will consider a p-value of 0.05 to be statistically
749 significant.
751 >>> import numpy as np
752 >>> from scipy import stats
753 >>> def statistic(x, axis):
754 ... return stats.skew(x, axis)
756 After collecting our data, we calculate the observed value of the test
757 statistic.
759 >>> rng = np.random.default_rng()
760 >>> x = stats.skewnorm.rvs(a=1, size=50, random_state=rng)
761 >>> statistic(x, axis=0)
762 0.12457412450240658
764 To determine the probability of observing such an extreme value of the
765 skewness by chance if the sample were drawn from the normal distribution,
766 we can perform a Monte Carlo hypothesis test. The test will draw many
767 samples at random from their normal distribution, calculate the skewness
768 of each sample, and compare our original skewness against this
769 distribution to determine an approximate p-value.
771 >>> from scipy.stats import monte_carlo_test
772 >>> # because our statistic is vectorized, we pass `vectorized=True`
773 >>> rvs = lambda size: stats.norm.rvs(size=size, random_state=rng)
774 >>> res = monte_carlo_test(x, rvs, statistic, vectorized=True)
775 >>> print(res.statistic)
776 0.12457412450240658
777 >>> print(res.pvalue)
778 0.7012
780 The probability of obtaining a test statistic less than or equal to the
781 observed value under the null hypothesis is ~70%. This is greater than
782 our chosen threshold of 5%, so we cannot consider this to be significant
783 evidence against the null hypothesis.
785 Note that this p-value essentially matches that of
786 `scipy.stats.skewtest`, which relies on an asymptotic distribution of a
787 test statistic based on the sample skewness.
789 >>> stats.skewtest(x).pvalue
790 0.6892046027110614
792 This asymptotic approximation is not valid for small sample sizes, but
793 `monte_carlo_test` can be used with samples of any size.
795 >>> x = stats.skewnorm.rvs(a=1, size=7, random_state=rng)
796 >>> # stats.skewtest(x) would produce an error due to small sample
797 >>> res = monte_carlo_test(x, rvs, statistic, vectorized=True)
799 The Monte Carlo distribution of the test statistic is provided for
800 further investigation.
802 >>> import matplotlib.pyplot as plt
803 >>> fig, ax = plt.subplots()
804 >>> ax.hist(res.null_distribution, bins=50)
805 >>> ax.set_title("Monte Carlo distribution of test statistic")
806 >>> ax.set_xlabel("Value of Statistic")
807 >>> ax.set_ylabel("Frequency")
808 >>> plt.show()
810 """
811 args = _monte_carlo_test_iv(sample, rvs, statistic, vectorized,
812 n_resamples, batch, alternative, axis)
813 (sample, rvs, statistic, vectorized,
814 n_resamples, batch, alternative, axis) = args
816 # Some statistics return plain floats; ensure they're at least np.float64
817 observed = np.asarray(statistic(sample, axis=-1))[()]
819 n_observations = sample.shape[-1]
820 batch_nominal = batch or n_resamples
821 null_distribution = []
822 for k in range(0, n_resamples, batch_nominal):
823 batch_actual = min(batch_nominal, n_resamples-k)
824 resamples = rvs(size=(batch_actual, n_observations))
825 null_distribution.append(statistic(resamples, axis=-1))
826 null_distribution = np.concatenate(null_distribution)
827 null_distribution = null_distribution.reshape([-1] + [1]*observed.ndim)
829 def less(null_distribution, observed):
830 cmps = null_distribution <= observed
831 pvalues = (cmps.sum(axis=0) + 1) / (n_resamples + 1) # see [1]
832 return pvalues
834 def greater(null_distribution, observed):
835 cmps = null_distribution >= observed
836 pvalues = (cmps.sum(axis=0) + 1) / (n_resamples + 1) # see [1]
837 return pvalues
839 def two_sided(null_distribution, observed):
840 pvalues_less = less(null_distribution, observed)
841 pvalues_greater = greater(null_distribution, observed)
842 pvalues = np.minimum(pvalues_less, pvalues_greater) * 2
843 return pvalues
845 compare = {"less": less,
846 "greater": greater,
847 "two-sided": two_sided}
849 pvalues = compare[alternative](null_distribution, observed)
850 pvalues = np.clip(pvalues, 0, 1)
852 return MonteCarloTestResult(observed, pvalues, null_distribution)
855attributes = ('statistic', 'pvalue', 'null_distribution')
856PermutationTestResult = make_dataclass('PermutationTestResult', attributes)
859def _all_partitions_concatenated(ns):
860 """
861 Generate all partitions of indices of groups of given sizes, concatenated
863 `ns` is an iterable of ints.
864 """
865 def all_partitions(z, n):
866 for c in combinations(z, n):
867 x0 = set(c)
868 x1 = z - x0
869 yield [x0, x1]
871 def all_partitions_n(z, ns):
872 if len(ns) == 0:
873 yield [z]
874 return
875 for c in all_partitions(z, ns[0]):
876 for d in all_partitions_n(c[1], ns[1:]):
877 yield c[0:1] + d
879 z = set(range(np.sum(ns)))
880 for partitioning in all_partitions_n(z, ns[:]):
881 x = np.concatenate([list(partition)
882 for partition in partitioning]).astype(int)
883 yield x
886def _batch_generator(iterable, batch):
887 """A generator that yields batches of elements from an iterable"""
888 iterator = iter(iterable)
889 if batch <= 0:
890 raise ValueError("`batch` must be positive.")
891 z = [item for i, item in zip(range(batch), iterator)]
892 while z: # we don't want StopIteration without yielding an empty list
893 yield z
894 z = [item for i, item in zip(range(batch), iterator)]
897def _pairings_permutations_gen(n_permutations, n_samples, n_obs_sample, batch,
898 random_state):
899 # Returns a generator that yields arrays of size
900 # `(batch, n_samples, n_obs_sample)`.
901 # Each row is an independent permutation of indices 0 to `n_obs_sample`.
902 batch = min(batch, n_permutations)
904 if hasattr(random_state, 'permuted'):
905 def batched_perm_generator():
906 indices = np.arange(n_obs_sample)
907 indices = np.tile(indices, (batch, n_samples, 1))
908 for k in range(0, n_permutations, batch):
909 batch_actual = min(batch, n_permutations-k)
910 # Don't permute in place, otherwise results depend on `batch`
911 permuted_indices = random_state.permuted(indices, axis=-1)
912 yield permuted_indices[:batch_actual]
913 else: # RandomState and early Generators don't have `permuted`
914 def batched_perm_generator():
915 for k in range(0, n_permutations, batch):
916 batch_actual = min(batch, n_permutations-k)
917 size = (batch_actual, n_samples, n_obs_sample)
918 x = random_state.random(size=size)
919 yield np.argsort(x, axis=-1)[:batch_actual]
921 return batched_perm_generator()
923def _calculate_null_both(data, statistic, n_permutations, batch,
924 random_state=None):
925 """
926 Calculate null distribution for independent sample tests.
927 """
928 n_samples = len(data)
930 # compute number of permutations
931 # (distinct partitions of data into samples of these sizes)
932 n_obs_i = [sample.shape[-1] for sample in data] # observations per sample
933 n_obs_ic = np.cumsum(n_obs_i)
934 n_obs = n_obs_ic[-1] # total number of observations
935 n_max = np.prod([comb(n_obs_ic[i], n_obs_ic[i-1])
936 for i in range(n_samples-1, 0, -1)])
938 # perm_generator is an iterator that produces permutations of indices
939 # from 0 to n_obs. We'll concatenate the samples, use these indices to
940 # permute the data, then split the samples apart again.
941 if n_permutations >= n_max:
942 exact_test = True
943 n_permutations = n_max
944 perm_generator = _all_partitions_concatenated(n_obs_i)
945 else:
946 exact_test = False
947 # Neither RandomState.permutation nor Generator.permutation
948 # can permute axis-slices independently. If this feature is
949 # added in the future, batches of the desired size should be
950 # generated in a single call.
951 perm_generator = (random_state.permutation(n_obs)
952 for i in range(n_permutations))
954 batch = batch or int(n_permutations)
955 null_distribution = []
957 # First, concatenate all the samples. In batches, permute samples with
958 # indices produced by the `perm_generator`, split them into new samples of
959 # the original sizes, compute the statistic for each batch, and add these
960 # statistic values to the null distribution.
961 data = np.concatenate(data, axis=-1)
962 for indices in _batch_generator(perm_generator, batch=batch):
963 indices = np.array(indices)
965 # `indices` is 2D: each row is a permutation of the indices.
966 # We use it to index `data` along its last axis, which corresponds
967 # with observations.
968 # After indexing, the second to last axis of `data_batch` corresponds
969 # with permutations, and the last axis corresponds with observations.
970 data_batch = data[..., indices]
972 # Move the permutation axis to the front: we'll concatenate a list
973 # of batched statistic values along this zeroth axis to form the
974 # null distribution.
975 data_batch = np.moveaxis(data_batch, -2, 0)
976 data_batch = np.split(data_batch, n_obs_ic[:-1], axis=-1)
977 null_distribution.append(statistic(*data_batch, axis=-1))
978 null_distribution = np.concatenate(null_distribution, axis=0)
980 return null_distribution, n_permutations, exact_test
983def _calculate_null_pairings(data, statistic, n_permutations, batch,
984 random_state=None):
985 """
986 Calculate null distribution for association tests.
987 """
988 n_samples = len(data)
990 # compute number of permutations (factorial(n) permutations of each sample)
991 n_obs_sample = data[0].shape[-1] # observations per sample; same for each
992 n_max = factorial(n_obs_sample)**n_samples
994 # `perm_generator` is an iterator that produces a list of permutations of
995 # indices from 0 to n_obs_sample, one for each sample.
996 if n_permutations >= n_max:
997 exact_test = True
998 n_permutations = n_max
999 batch = batch or int(n_permutations)
1000 # cartesian product of the sets of all permutations of indices
1001 perm_generator = product(*(permutations(range(n_obs_sample))
1002 for i in range(n_samples)))
1003 batched_perm_generator = _batch_generator(perm_generator, batch=batch)
1004 else:
1005 exact_test = False
1006 batch = batch or int(n_permutations)
1007 # Separate random permutations of indices for each sample.
1008 # Again, it would be nice if RandomState/Generator.permutation
1009 # could permute each axis-slice separately.
1010 args = n_permutations, n_samples, n_obs_sample, batch, random_state
1011 batched_perm_generator = _pairings_permutations_gen(*args)
1013 null_distribution = []
1015 for indices in batched_perm_generator:
1016 indices = np.array(indices)
1018 # `indices` is 3D: the zeroth axis is for permutations, the next is
1019 # for samples, and the last is for observations. Swap the first two
1020 # to make the zeroth axis correspond with samples, as it does for
1021 # `data`.
1022 indices = np.swapaxes(indices, 0, 1)
1024 # When we're done, `data_batch` will be a list of length `n_samples`.
1025 # Each element will be a batch of random permutations of one sample.
1026 # The zeroth axis of each batch will correspond with permutations,
1027 # and the last will correspond with observations. (This makes it
1028 # easy to pass into `statistic`.)
1029 data_batch = [None]*n_samples
1030 for i in range(n_samples):
1031 data_batch[i] = data[i][..., indices[i]]
1032 data_batch[i] = np.moveaxis(data_batch[i], -2, 0)
1034 null_distribution.append(statistic(*data_batch, axis=-1))
1035 null_distribution = np.concatenate(null_distribution, axis=0)
1037 return null_distribution, n_permutations, exact_test
1040def _calculate_null_samples(data, statistic, n_permutations, batch,
1041 random_state=None):
1042 """
1043 Calculate null distribution for paired-sample tests.
1044 """
1045 n_samples = len(data)
1047 # By convention, the meaning of the "samples" permutations type for
1048 # data with only one sample is to flip the sign of the observations.
1049 # Achieve this by adding a second sample - the negative of the original.
1050 if n_samples == 1:
1051 data = [data[0], -data[0]]
1053 # The "samples" permutation strategy is the same as the "pairings"
1054 # strategy except the roles of samples and observations are flipped.
1055 # So swap these axes, then we'll use the function for the "pairings"
1056 # strategy to do all the work!
1057 data = np.swapaxes(data, 0, -1)
1059 # (Of course, the user's statistic doesn't know what we've done here,
1060 # so we need to pass it what it's expecting.)
1061 def statistic_wrapped(*data, axis):
1062 data = np.swapaxes(data, 0, -1)
1063 if n_samples == 1:
1064 data = data[0:1]
1065 return statistic(*data, axis=axis)
1067 return _calculate_null_pairings(data, statistic_wrapped, n_permutations,
1068 batch, random_state)
1071def _permutation_test_iv(data, statistic, permutation_type, vectorized,
1072 n_resamples, batch, alternative, axis, random_state):
1073 """Input validation for `permutation_test`."""
1075 axis_int = int(axis)
1076 if axis != axis_int:
1077 raise ValueError("`axis` must be an integer.")
1079 permutation_types = {'samples', 'pairings', 'independent'}
1080 permutation_type = permutation_type.lower()
1081 if permutation_type not in permutation_types:
1082 raise ValueError(f"`permutation_type` must be in {permutation_types}.")
1084 if vectorized not in {True, False, None}:
1085 raise ValueError("`vectorized` must be `True`, `False`, or `None`.")
1087 if vectorized is None:
1088 vectorized = 'axis' in inspect.signature(statistic).parameters
1090 if not vectorized:
1091 statistic = _vectorize_statistic(statistic)
1093 message = "`data` must be a tuple containing at least two samples"
1094 try:
1095 if len(data) < 2 and permutation_type == 'independent':
1096 raise ValueError(message)
1097 except TypeError:
1098 raise TypeError(message)
1100 data = _broadcast_arrays(data, axis)
1101 data_iv = []
1102 for sample in data:
1103 sample = np.atleast_1d(sample)
1104 if sample.shape[axis] <= 1:
1105 raise ValueError("each sample in `data` must contain two or more "
1106 "observations along `axis`.")
1107 sample = np.moveaxis(sample, axis_int, -1)
1108 data_iv.append(sample)
1110 n_resamples_int = (int(n_resamples) if not np.isinf(n_resamples)
1111 else np.inf)
1112 if n_resamples != n_resamples_int or n_resamples_int <= 0:
1113 raise ValueError("`n_resamples` must be a positive integer.")
1115 if batch is None:
1116 batch_iv = batch
1117 else:
1118 batch_iv = int(batch)
1119 if batch != batch_iv or batch_iv <= 0:
1120 raise ValueError("`batch` must be a positive integer or None.")
1122 alternatives = {'two-sided', 'greater', 'less'}
1123 alternative = alternative.lower()
1124 if alternative not in alternatives:
1125 raise ValueError(f"`alternative` must be in {alternatives}")
1127 random_state = check_random_state(random_state)
1129 return (data_iv, statistic, permutation_type, vectorized, n_resamples_int,
1130 batch_iv, alternative, axis_int, random_state)
1133def permutation_test(data, statistic, *, permutation_type='independent',
1134 vectorized=None, n_resamples=9999, batch=None,
1135 alternative="two-sided", axis=0, random_state=None):
1136 r"""
1137 Performs a permutation test of a given statistic on provided data.
1139 For independent sample statistics, the null hypothesis is that the data are
1140 randomly sampled from the same distribution.
1141 For paired sample statistics, two null hypothesis can be tested:
1142 that the data are paired at random or that the data are assigned to samples
1143 at random.
1145 Parameters
1146 ----------
1147 data : iterable of array-like
1148 Contains the samples, each of which is an array of observations.
1149 Dimensions of sample arrays must be compatible for broadcasting except
1150 along `axis`.
1151 statistic : callable
1152 Statistic for which the p-value of the hypothesis test is to be
1153 calculated. `statistic` must be a callable that accepts samples
1154 as separate arguments (e.g. ``statistic(*data)``) and returns the
1155 resulting statistic.
1156 If `vectorized` is set ``True``, `statistic` must also accept a keyword
1157 argument `axis` and be vectorized to compute the statistic along the
1158 provided `axis` of the sample arrays.
1159 permutation_type : {'independent', 'samples', 'pairings'}, optional
1160 The type of permutations to be performed, in accordance with the
1161 null hypothesis. The first two permutation types are for paired sample
1162 statistics, in which all samples contain the same number of
1163 observations and observations with corresponding indices along `axis`
1164 are considered to be paired; the third is for independent sample
1165 statistics.
1167 - ``'samples'`` : observations are assigned to different samples
1168 but remain paired with the same observations from other samples.
1169 This permutation type is appropriate for paired sample hypothesis
1170 tests such as the Wilcoxon signed-rank test and the paired t-test.
1171 - ``'pairings'`` : observations are paired with different observations,
1172 but they remain within the same sample. This permutation type is
1173 appropriate for association/correlation tests with statistics such
1174 as Spearman's :math:`\rho`, Kendall's :math:`\tau`, and Pearson's
1175 :math:`r`.
1176 - ``'independent'`` (default) : observations are assigned to different
1177 samples. Samples may contain different numbers of observations. This
1178 permutation type is appropriate for independent sample hypothesis
1179 tests such as the Mann-Whitney :math:`U` test and the independent
1180 sample t-test.
1182 Please see the Notes section below for more detailed descriptions
1183 of the permutation types.
1185 vectorized : bool, optional
1186 If `vectorized` is set ``False``, `statistic` will not be passed
1187 keyword argument `axis` and is expected to calculate the statistic
1188 only for 1D samples. If ``True``, `statistic` will be passed keyword
1189 argument `axis` and is expected to calculate the statistic along `axis`
1190 when passed an ND sample array. If ``None`` (default), `vectorized`
1191 will be set ``True`` if ``axis`` is a parameter of `statistic`. Use
1192 of a vectorized statistic typically reduces computation time.
1193 n_resamples : int or np.inf, default: 9999
1194 Number of random permutations (resamples) used to approximate the null
1195 distribution. If greater than or equal to the number of distinct
1196 permutations, the exact null distribution will be computed.
1197 Note that the number of distinct permutations grows very rapidly with
1198 the sizes of samples, so exact tests are feasible only for very small
1199 data sets.
1200 batch : int, optional
1201 The number of permutations to process in each call to `statistic`.
1202 Memory usage is O(`batch`*``n``), where ``n`` is the total size
1203 of all samples, regardless of the value of `vectorized`. Default is
1204 ``None``, in which case ``batch`` is the number of permutations.
1205 alternative : {'two-sided', 'less', 'greater'}, optional
1206 The alternative hypothesis for which the p-value is calculated.
1207 For each alternative, the p-value is defined for exact tests as
1208 follows.
1210 - ``'greater'`` : the percentage of the null distribution that is
1211 greater than or equal to the observed value of the test statistic.
1212 - ``'less'`` : the percentage of the null distribution that is
1213 less than or equal to the observed value of the test statistic.
1214 - ``'two-sided'`` (default) : twice the smaller of the p-values above.
1216 Note that p-values for randomized tests are calculated according to the
1217 conservative (over-estimated) approximation suggested in [2]_ and [3]_
1218 rather than the unbiased estimator suggested in [4]_. That is, when
1219 calculating the proportion of the randomized null distribution that is
1220 as extreme as the observed value of the test statistic, the values in
1221 the numerator and denominator are both increased by one. An
1222 interpretation of this adjustment is that the observed value of the
1223 test statistic is always included as an element of the randomized
1224 null distribution.
1225 The convention used for two-sided p-values is not universal;
1226 the observed test statistic and null distribution are returned in
1227 case a different definition is preferred.
1229 axis : int, default: 0
1230 The axis of the (broadcasted) samples over which to calculate the
1231 statistic. If samples have a different number of dimensions,
1232 singleton dimensions are prepended to samples with fewer dimensions
1233 before `axis` is considered.
1234 random_state : {None, int, `numpy.random.Generator`,
1235 `numpy.random.RandomState`}, optional
1237 Pseudorandom number generator state used to generate permutations.
1239 If `random_state` is ``None`` (default), the
1240 `numpy.random.RandomState` singleton is used.
1241 If `random_state` is an int, a new ``RandomState`` instance is used,
1242 seeded with `random_state`.
1243 If `random_state` is already a ``Generator`` or ``RandomState``
1244 instance then that instance is used.
1246 Returns
1247 -------
1248 statistic : float or ndarray
1249 The observed test statistic of the data.
1250 pvalue : float or ndarray
1251 The p-value for the given alternative.
1252 null_distribution : ndarray
1253 The values of the test statistic generated under the null hypothesis.
1255 Notes
1256 -----
1258 The three types of permutation tests supported by this function are
1259 described below.
1261 **Unpaired statistics** (``permutation_type='independent'``):
1263 The null hypothesis associated with this permutation type is that all
1264 observations are sampled from the same underlying distribution and that
1265 they have been assigned to one of the samples at random.
1267 Suppose ``data`` contains two samples; e.g. ``a, b = data``.
1268 When ``1 < n_resamples < binom(n, k)``, where
1270 * ``k`` is the number of observations in ``a``,
1271 * ``n`` is the total number of observations in ``a`` and ``b``, and
1272 * ``binom(n, k)`` is the binomial coefficient (``n`` choose ``k``),
1274 the data are pooled (concatenated), randomly assigned to either the first
1275 or second sample, and the statistic is calculated. This process is
1276 performed repeatedly, `permutation` times, generating a distribution of the
1277 statistic under the null hypothesis. The statistic of the original
1278 data is compared to this distribution to determine the p-value.
1280 When ``n_resamples >= binom(n, k)``, an exact test is performed: the data
1281 are *partitioned* between the samples in each distinct way exactly once,
1282 and the exact null distribution is formed.
1283 Note that for a given partitioning of the data between the samples,
1284 only one ordering/permutation of the data *within* each sample is
1285 considered. For statistics that do not depend on the order of the data
1286 within samples, this dramatically reduces computational cost without
1287 affecting the shape of the null distribution (because the frequency/count
1288 of each value is affected by the same factor).
1290 For ``a = [a1, a2, a3, a4]`` and ``b = [b1, b2, b3]``, an example of this
1291 permutation type is ``x = [b3, a1, a2, b2]`` and ``y = [a4, b1, a3]``.
1292 Because only one ordering/permutation of the data *within* each sample
1293 is considered in an exact test, a resampling like ``x = [b3, a1, b2, a2]``
1294 and ``y = [a4, a3, b1]`` would *not* be considered distinct from the
1295 example above.
1297 ``permutation_type='independent'`` does not support one-sample statistics,
1298 but it can be applied to statistics with more than two samples. In this
1299 case, if ``n`` is an array of the number of observations within each
1300 sample, the number of distinct partitions is::
1302 np.product([binom(sum(n[i:]), sum(n[i+1:])) for i in range(len(n)-1)])
1304 **Paired statistics, permute pairings** (``permutation_type='pairings'``):
1306 The null hypothesis associated with this permutation type is that
1307 observations within each sample are drawn from the same underlying
1308 distribution and that pairings with elements of other samples are
1309 assigned at random.
1311 Suppose ``data`` contains only one sample; e.g. ``a, = data``, and we
1312 wish to consider all possible pairings of elements of ``a`` with elements
1313 of a second sample, ``b``. Let ``n`` be the number of observations in
1314 ``a``, which must also equal the number of observations in ``b``.
1316 When ``1 < n_resamples < factorial(n)``, the elements of ``a`` are
1317 randomly permuted. The user-supplied statistic accepts one data argument,
1318 say ``a_perm``, and calculates the statistic considering ``a_perm`` and
1319 ``b``. This process is performed repeatedly, `permutation` times,
1320 generating a distribution of the statistic under the null hypothesis.
1321 The statistic of the original data is compared to this distribution to
1322 determine the p-value.
1324 When ``n_resamples >= factorial(n)``, an exact test is performed:
1325 ``a`` is permuted in each distinct way exactly once. Therefore, the
1326 `statistic` is computed for each unique pairing of samples between ``a``
1327 and ``b`` exactly once.
1329 For ``a = [a1, a2, a3]`` and ``b = [b1, b2, b3]``, an example of this
1330 permutation type is ``a_perm = [a3, a1, a2]`` while ``b`` is left
1331 in its original order.
1333 ``permutation_type='pairings'`` supports ``data`` containing any number
1334 of samples, each of which must contain the same number of observations.
1335 All samples provided in ``data`` are permuted *independently*. Therefore,
1336 if ``m`` is the number of samples and ``n`` is the number of observations
1337 within each sample, then the number of permutations in an exact test is::
1339 factorial(n)**m
1341 Note that if a two-sample statistic, for example, does not inherently
1342 depend on the order in which observations are provided - only on the
1343 *pairings* of observations - then only one of the two samples should be
1344 provided in ``data``. This dramatically reduces computational cost without
1345 affecting the shape of the null distribution (because the frequency/count
1346 of each value is affected by the same factor).
1348 **Paired statistics, permute samples** (``permutation_type='samples'``):
1350 The null hypothesis associated with this permutation type is that
1351 observations within each pair are drawn from the same underlying
1352 distribution and that the sample to which they are assigned is random.
1354 Suppose ``data`` contains two samples; e.g. ``a, b = data``.
1355 Let ``n`` be the number of observations in ``a``, which must also equal
1356 the number of observations in ``b``.
1358 When ``1 < n_resamples < 2**n``, the elements of ``a`` are ``b`` are
1359 randomly swapped between samples (maintaining their pairings) and the
1360 statistic is calculated. This process is performed repeatedly,
1361 `permutation` times, generating a distribution of the statistic under the
1362 null hypothesis. The statistic of the original data is compared to this
1363 distribution to determine the p-value.
1365 When ``n_resamples >= 2**n``, an exact test is performed: the observations
1366 are assigned to the two samples in each distinct way (while maintaining
1367 pairings) exactly once.
1369 For ``a = [a1, a2, a3]`` and ``b = [b1, b2, b3]``, an example of this
1370 permutation type is ``x = [b1, a2, b3]`` and ``y = [a1, b2, a3]``.
1372 ``permutation_type='samples'`` supports ``data`` containing any number
1373 of samples, each of which must contain the same number of observations.
1374 If ``data`` contains more than one sample, paired observations within
1375 ``data`` are exchanged between samples *independently*. Therefore, if ``m``
1376 is the number of samples and ``n`` is the number of observations within
1377 each sample, then the number of permutations in an exact test is::
1379 factorial(m)**n
1381 Several paired-sample statistical tests, such as the Wilcoxon signed rank
1382 test and paired-sample t-test, can be performed considering only the
1383 *difference* between two paired elements. Accordingly, if ``data`` contains
1384 only one sample, then the null distribution is formed by independently
1385 changing the *sign* of each observation.
1387 .. warning::
1388 The p-value is calculated by counting the elements of the null
1389 distribution that are as extreme or more extreme than the observed
1390 value of the statistic. Due to the use of finite precision arithmetic,
1391 some statistic functions return numerically distinct values when the
1392 theoretical values would be exactly equal. In some cases, this could
1393 lead to a large error in the calculated p-value. `permutation_test`
1394 guards against this by considering elements in the null distribution
1395 that are "close" (within a factor of ``1+1e-14``) to the observed
1396 value of the test statistic as equal to the observed value of the
1397 test statistic. However, the user is advised to inspect the null
1398 distribution to assess whether this method of comparison is
1399 appropriate, and if not, calculate the p-value manually. See example
1400 below.
1402 References
1403 ----------
1405 .. [1] R. A. Fisher. The Design of Experiments, 6th Ed (1951).
1406 .. [2] B. Phipson and G. K. Smyth. "Permutation P-values Should Never Be
1407 Zero: Calculating Exact P-values When Permutations Are Randomly Drawn."
1408 Statistical Applications in Genetics and Molecular Biology 9.1 (2010).
1409 .. [3] M. D. Ernst. "Permutation Methods: A Basis for Exact Inference".
1410 Statistical Science (2004).
1411 .. [4] B. Efron and R. J. Tibshirani. An Introduction to the Bootstrap
1412 (1993).
1414 Examples
1415 --------
1417 Suppose we wish to test whether two samples are drawn from the same
1418 distribution. Assume that the underlying distributions are unknown to us,
1419 and that before observing the data, we hypothesized that the mean of the
1420 first sample would be less than that of the second sample. We decide that
1421 we will use the difference between the sample means as a test statistic,
1422 and we will consider a p-value of 0.05 to be statistically significant.
1424 For efficiency, we write the function defining the test statistic in a
1425 vectorized fashion: the samples ``x`` and ``y`` can be ND arrays, and the
1426 statistic will be calculated for each axis-slice along `axis`.
1428 >>> import numpy as np
1429 >>> def statistic(x, y, axis):
1430 ... return np.mean(x, axis=axis) - np.mean(y, axis=axis)
1432 After collecting our data, we calculate the observed value of the test
1433 statistic.
1435 >>> from scipy.stats import norm
1436 >>> rng = np.random.default_rng()
1437 >>> x = norm.rvs(size=5, random_state=rng)
1438 >>> y = norm.rvs(size=6, loc = 3, random_state=rng)
1439 >>> statistic(x, y, 0)
1440 -3.5411688580987266
1442 Indeed, the test statistic is negative, suggesting that the true mean of
1443 the distribution underlying ``x`` is less than that of the distribution
1444 underlying ``y``. To determine the probability of this occuring by chance
1445 if the two samples were drawn from the same distribution, we perform
1446 a permutation test.
1448 >>> from scipy.stats import permutation_test
1449 >>> # because our statistic is vectorized, we pass `vectorized=True`
1450 >>> # `n_resamples=np.inf` indicates that an exact test is to be performed
1451 >>> res = permutation_test((x, y), statistic, vectorized=True,
1452 ... n_resamples=np.inf, alternative='less')
1453 >>> print(res.statistic)
1454 -3.5411688580987266
1455 >>> print(res.pvalue)
1456 0.004329004329004329
1458 The probability of obtaining a test statistic less than or equal to the
1459 observed value under the null hypothesis is 0.4329%. This is less than our
1460 chosen threshold of 5%, so we consider this to be significant evidence
1461 against the null hypothesis in favor of the alternative.
1463 Because the size of the samples above was small, `permutation_test` could
1464 perform an exact test. For larger samples, we resort to a randomized
1465 permutation test.
1467 >>> x = norm.rvs(size=100, random_state=rng)
1468 >>> y = norm.rvs(size=120, loc=0.3, random_state=rng)
1469 >>> res = permutation_test((x, y), statistic, n_resamples=100000,
1470 ... vectorized=True, alternative='less',
1471 ... random_state=rng)
1472 >>> print(res.statistic)
1473 -0.5230459671240913
1474 >>> print(res.pvalue)
1475 0.00016999830001699983
1477 The approximate probability of obtaining a test statistic less than or
1478 equal to the observed value under the null hypothesis is 0.0225%. This is
1479 again less than our chosen threshold of 5%, so again we have significant
1480 evidence to reject the null hypothesis in favor of the alternative.
1482 For large samples and number of permutations, the result is comparable to
1483 that of the corresponding asymptotic test, the independent sample t-test.
1485 >>> from scipy.stats import ttest_ind
1486 >>> res_asymptotic = ttest_ind(x, y, alternative='less')
1487 >>> print(res_asymptotic.pvalue)
1488 0.00012688101537979522
1490 The permutation distribution of the test statistic is provided for
1491 further investigation.
1493 >>> import matplotlib.pyplot as plt
1494 >>> plt.hist(res.null_distribution, bins=50)
1495 >>> plt.title("Permutation distribution of test statistic")
1496 >>> plt.xlabel("Value of Statistic")
1497 >>> plt.ylabel("Frequency")
1498 >>> plt.show()
1500 Inspection of the null distribution is essential if the statistic suffers
1501 from inaccuracy due to limited machine precision. Consider the following
1502 case:
1504 >>> from scipy.stats import pearsonr
1505 >>> x = [1, 2, 4, 3]
1506 >>> y = [2, 4, 6, 8]
1507 >>> def statistic(x, y):
1508 ... return pearsonr(x, y).statistic
1509 >>> res = permutation_test((x, y), statistic, vectorized=False,
1510 ... permutation_type='pairings',
1511 ... alternative='greater')
1512 >>> r, pvalue, null = res.statistic, res.pvalue, res.null_distribution
1514 In this case, some elements of the null distribution differ from the
1515 observed value of the correlation coefficient ``r`` due to numerical noise.
1516 We manually inspect the elements of the null distribution that are nearly
1517 the same as the observed value of the test statistic.
1519 >>> r
1520 0.8
1521 >>> unique = np.unique(null)
1522 >>> unique
1523 array([-1. , -0.8, -0.8, -0.6, -0.4, -0.2, -0.2, 0. , 0.2, 0.2, 0.4,
1524 0.6, 0.8, 0.8, 1. ]) # may vary
1525 >>> unique[np.isclose(r, unique)].tolist()
1526 [0.7999999999999999, 0.8]
1528 If `permutation_test` were to perform the comparison naively, the
1529 elements of the null distribution with value ``0.7999999999999999`` would
1530 not be considered as extreme or more extreme as the observed value of the
1531 statistic, so the calculated p-value would be too small.
1533 >>> incorrect_pvalue = np.count_nonzero(null >= r) / len(null)
1534 >>> incorrect_pvalue
1535 0.1111111111111111 # may vary
1537 Instead, `permutation_test` treats elements of the null distribution that
1538 are within ``max(1e-14, abs(r)*1e-14)`` of the observed value of the
1539 statistic ``r`` to be equal to ``r``.
1541 >>> correct_pvalue = np.count_nonzero(null >= r - 1e-14) / len(null)
1542 >>> correct_pvalue
1543 0.16666666666666666
1544 >>> res.pvalue == correct_pvalue
1545 True
1547 This method of comparison is expected to be accurate in most practical
1548 situations, but the user is advised to assess this by inspecting the
1549 elements of the null distribution that are close to the observed value
1550 of the statistic. Also, consider the use of statistics that can be
1551 calculated using exact arithmetic (e.g. integer statistics).
1553 """
1554 args = _permutation_test_iv(data, statistic, permutation_type, vectorized,
1555 n_resamples, batch, alternative, axis,
1556 random_state)
1557 (data, statistic, permutation_type, vectorized, n_resamples, batch,
1558 alternative, axis, random_state) = args
1560 observed = statistic(*data, axis=-1)
1562 null_calculators = {"pairings": _calculate_null_pairings,
1563 "samples": _calculate_null_samples,
1564 "independent": _calculate_null_both}
1565 null_calculator_args = (data, statistic, n_resamples,
1566 batch, random_state)
1567 calculate_null = null_calculators[permutation_type]
1568 null_distribution, n_resamples, exact_test = (
1569 calculate_null(*null_calculator_args))
1571 # See References [2] and [3]
1572 adjustment = 0 if exact_test else 1
1574 # relative tolerance for detecting numerically distinct but
1575 # theoretically equal values in the null distribution
1576 eps = 1e-14
1577 gamma = np.maximum(eps, np.abs(eps * observed))
1579 def less(null_distribution, observed):
1580 cmps = null_distribution <= observed + gamma
1581 pvalues = (cmps.sum(axis=0) + adjustment) / (n_resamples + adjustment)
1582 return pvalues
1584 def greater(null_distribution, observed):
1585 cmps = null_distribution >= observed - gamma
1586 pvalues = (cmps.sum(axis=0) + adjustment) / (n_resamples + adjustment)
1587 return pvalues
1589 def two_sided(null_distribution, observed):
1590 pvalues_less = less(null_distribution, observed)
1591 pvalues_greater = greater(null_distribution, observed)
1592 pvalues = np.minimum(pvalues_less, pvalues_greater) * 2
1593 return pvalues
1595 compare = {"less": less,
1596 "greater": greater,
1597 "two-sided": two_sided}
1599 pvalues = compare[alternative](null_distribution, observed)
1600 pvalues = np.clip(pvalues, 0, 1)
1602 return PermutationTestResult(observed, pvalues, null_distribution)