Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_page_trend_test.py: 21%
96 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1from itertools import permutations
2import numpy as np
3import math
4from ._continuous_distns import norm
5import scipy.stats
6from dataclasses import make_dataclass
9PageTrendTestResult = make_dataclass("PageTrendTestResult",
10 ("statistic", "pvalue", "method"))
13def page_trend_test(data, ranked=False, predicted_ranks=None, method='auto'):
14 r"""
15 Perform Page's Test, a measure of trend in observations between treatments.
17 Page's Test (also known as Page's :math:`L` test) is useful when:
19 * there are :math:`n \geq 3` treatments,
20 * :math:`m \geq 2` subjects are observed for each treatment, and
21 * the observations are hypothesized to have a particular order.
23 Specifically, the test considers the null hypothesis that
25 .. math::
27 m_1 = m_2 = m_3 \cdots = m_n,
29 where :math:`m_j` is the mean of the observed quantity under treatment
30 :math:`j`, against the alternative hypothesis that
32 .. math::
34 m_1 \leq m_2 \leq m_3 \leq \cdots \leq m_n,
36 where at least one inequality is strict.
38 As noted by [4]_, Page's :math:`L` test has greater statistical power than
39 the Friedman test against the alternative that there is a difference in
40 trend, as Friedman's test only considers a difference in the means of the
41 observations without considering their order. Whereas Spearman :math:`\rho`
42 considers the correlation between the ranked observations of two variables
43 (e.g. the airspeed velocity of a swallow vs. the weight of the coconut it
44 carries), Page's :math:`L` is concerned with a trend in an observation
45 (e.g. the airspeed velocity of a swallow) across several distinct
46 treatments (e.g. carrying each of five coconuts of different weight) even
47 as the observation is repeated with multiple subjects (e.g. one European
48 swallow and one African swallow).
50 Parameters
51 ----------
52 data : array-like
53 A :math:`m \times n` array; the element in row :math:`i` and
54 column :math:`j` is the observation corresponding with subject
55 :math:`i` and treatment :math:`j`. By default, the columns are
56 assumed to be arranged in order of increasing predicted mean.
58 ranked : boolean, optional
59 By default, `data` is assumed to be observations rather than ranks;
60 it will be ranked with `scipy.stats.rankdata` along ``axis=1``. If
61 `data` is provided in the form of ranks, pass argument ``True``.
63 predicted_ranks : array-like, optional
64 The predicted ranks of the column means. If not specified,
65 the columns are assumed to be arranged in order of increasing
66 predicted mean, so the default `predicted_ranks` are
67 :math:`[1, 2, \dots, n-1, n]`.
69 method : {'auto', 'asymptotic', 'exact'}, optional
70 Selects the method used to calculate the *p*-value. The following
71 options are available.
73 * 'auto': selects between 'exact' and 'asymptotic' to
74 achieve reasonably accurate results in reasonable time (default)
75 * 'asymptotic': compares the standardized test statistic against
76 the normal distribution
77 * 'exact': computes the exact *p*-value by comparing the observed
78 :math:`L` statistic against those realized by all possible
79 permutations of ranks (under the null hypothesis that each
80 permutation is equally likely)
82 Returns
83 -------
84 res : PageTrendTestResult
85 An object containing attributes:
87 statistic : float
88 Page's :math:`L` test statistic.
89 pvalue : float
90 The associated *p*-value
91 method : {'asymptotic', 'exact'}
92 The method used to compute the *p*-value
94 See Also
95 --------
96 rankdata, friedmanchisquare, spearmanr
98 Notes
99 -----
100 As noted in [1]_, "the :math:`n` 'treatments' could just as well represent
101 :math:`n` objects or events or performances or persons or trials ranked."
102 Similarly, the :math:`m` 'subjects' could equally stand for :math:`m`
103 "groupings by ability or some other control variable, or judges doing
104 the ranking, or random replications of some other sort."
106 The procedure for calculating the :math:`L` statistic, adapted from
107 [1]_, is:
109 1. "Predetermine with careful logic the appropriate hypotheses
110 concerning the predicted ording of the experimental results.
111 If no reasonable basis for ordering any treatments is known, the
112 :math:`L` test is not appropriate."
113 2. "As in other experiments, determine at what level of confidence
114 you will reject the null hypothesis that there is no agreement of
115 experimental results with the monotonic hypothesis."
116 3. "Cast the experimental material into a two-way table of :math:`n`
117 columns (treatments, objects ranked, conditions) and :math:`m`
118 rows (subjects, replication groups, levels of control variables)."
119 4. "When experimental observations are recorded, rank them across each
120 row", e.g. ``ranks = scipy.stats.rankdata(data, axis=1)``.
121 5. "Add the ranks in each column", e.g.
122 ``colsums = np.sum(ranks, axis=0)``.
123 6. "Multiply each sum of ranks by the predicted rank for that same
124 column", e.g. ``products = predicted_ranks * colsums``.
125 7. "Sum all such products", e.g. ``L = products.sum()``.
127 [1]_ continues by suggesting use of the standardized statistic
129 .. math::
131 \chi_L^2 = \frac{\left[12L-3mn(n+1)^2\right]^2}{mn^2(n^2-1)(n+1)}
133 "which is distributed approximately as chi-square with 1 degree of
134 freedom. The ordinary use of :math:`\chi^2` tables would be
135 equivalent to a two-sided test of agreement. If a one-sided test
136 is desired, *as will almost always be the case*, the probability
137 discovered in the chi-square table should be *halved*."
139 However, this standardized statistic does not distinguish between the
140 observed values being well correlated with the predicted ranks and being
141 _anti_-correlated with the predicted ranks. Instead, we follow [2]_
142 and calculate the standardized statistic
144 .. math::
146 \Lambda = \frac{L - E_0}{\sqrt{V_0}},
148 where :math:`E_0 = \frac{1}{4} mn(n+1)^2` and
149 :math:`V_0 = \frac{1}{144} mn^2(n+1)(n^2-1)`, "which is asymptotically
150 normal under the null hypothesis".
152 The *p*-value for ``method='exact'`` is generated by comparing the observed
153 value of :math:`L` against the :math:`L` values generated for all
154 :math:`(n!)^m` possible permutations of ranks. The calculation is performed
155 using the recursive method of [5].
157 The *p*-values are not adjusted for the possibility of ties. When
158 ties are present, the reported ``'exact'`` *p*-values may be somewhat
159 larger (i.e. more conservative) than the true *p*-value [2]_. The
160 ``'asymptotic'``` *p*-values, however, tend to be smaller (i.e. less
161 conservative) than the ``'exact'`` *p*-values.
163 References
164 ----------
165 .. [1] Ellis Batten Page, "Ordered hypotheses for multiple treatments:
166 a significant test for linear ranks", *Journal of the American
167 Statistical Association* 58(301), p. 216--230, 1963.
169 .. [2] Markus Neuhauser, *Nonparametric Statistical Test: A computational
170 approach*, CRC Press, p. 150--152, 2012.
172 .. [3] Statext LLC, "Page's L Trend Test - Easy Statistics", *Statext -
173 Statistics Study*, https://www.statext.com/practice/PageTrendTest03.php,
174 Accessed July 12, 2020.
176 .. [4] "Page's Trend Test", *Wikipedia*, WikimediaFoundation,
177 https://en.wikipedia.org/wiki/Page%27s_trend_test,
178 Accessed July 12, 2020.
180 .. [5] Robert E. Odeh, "The exact distribution of Page's L-statistic in
181 the two-way layout", *Communications in Statistics - Simulation and
182 Computation*, 6(1), p. 49--61, 1977.
184 Examples
185 --------
186 We use the example from [3]_: 10 students are asked to rate three
187 teaching methods - tutorial, lecture, and seminar - on a scale of 1-5,
188 with 1 being the lowest and 5 being the highest. We have decided that
189 a confidence level of 99% is required to reject the null hypothesis in
190 favor of our alternative: that the seminar will have the highest ratings
191 and the tutorial will have the lowest. Initially, the data have been
192 tabulated with each row representing an individual student's ratings of
193 the three methods in the following order: tutorial, lecture, seminar.
195 >>> table = [[3, 4, 3],
196 ... [2, 2, 4],
197 ... [3, 3, 5],
198 ... [1, 3, 2],
199 ... [2, 3, 2],
200 ... [2, 4, 5],
201 ... [1, 2, 4],
202 ... [3, 4, 4],
203 ... [2, 4, 5],
204 ... [1, 3, 4]]
206 Because the tutorial is hypothesized to have the lowest ratings, the
207 column corresponding with tutorial rankings should be first; the seminar
208 is hypothesized to have the highest ratings, so its column should be last.
209 Since the columns are already arranged in this order of increasing
210 predicted mean, we can pass the table directly into `page_trend_test`.
212 >>> from scipy.stats import page_trend_test
213 >>> res = page_trend_test(table)
214 >>> res
215 PageTrendTestResult(statistic=133.5, pvalue=0.0018191161948127822,
216 method='exact')
218 This *p*-value indicates that there is a 0.1819% chance that
219 the :math:`L` statistic would reach such an extreme value under the null
220 hypothesis. Because 0.1819% is less than 1%, we have evidence to reject
221 the null hypothesis in favor of our alternative at a 99% confidence level.
223 The value of the :math:`L` statistic is 133.5. To check this manually,
224 we rank the data such that high scores correspond with high ranks, settling
225 ties with an average rank:
227 >>> from scipy.stats import rankdata
228 >>> ranks = rankdata(table, axis=1)
229 >>> ranks
230 array([[1.5, 3. , 1.5],
231 [1.5, 1.5, 3. ],
232 [1.5, 1.5, 3. ],
233 [1. , 3. , 2. ],
234 [1.5, 3. , 1.5],
235 [1. , 2. , 3. ],
236 [1. , 2. , 3. ],
237 [1. , 2.5, 2.5],
238 [1. , 2. , 3. ],
239 [1. , 2. , 3. ]])
241 We add the ranks within each column, multiply the sums by the
242 predicted ranks, and sum the products.
244 >>> import numpy as np
245 >>> m, n = ranks.shape
246 >>> predicted_ranks = np.arange(1, n+1)
247 >>> L = (predicted_ranks * np.sum(ranks, axis=0)).sum()
248 >>> res.statistic == L
249 True
251 As presented in [3]_, the asymptotic approximation of the *p*-value is the
252 survival function of the normal distribution evaluated at the standardized
253 test statistic:
255 >>> from scipy.stats import norm
256 >>> E0 = (m*n*(n+1)**2)/4
257 >>> V0 = (m*n**2*(n+1)*(n**2-1))/144
258 >>> Lambda = (L-E0)/np.sqrt(V0)
259 >>> p = norm.sf(Lambda)
260 >>> p
261 0.0012693433690751756
263 This does not precisely match the *p*-value reported by `page_trend_test`
264 above. The asymptotic distribution is not very accurate, nor conservative,
265 for :math:`m \leq 12` and :math:`n \leq 8`, so `page_trend_test` chose to
266 use ``method='exact'`` based on the dimensions of the table and the
267 recommendations in Page's original paper [1]_. To override
268 `page_trend_test`'s choice, provide the `method` argument.
270 >>> res = page_trend_test(table, method="asymptotic")
271 >>> res
272 PageTrendTestResult(statistic=133.5, pvalue=0.0012693433690751756,
273 method='asymptotic')
275 If the data are already ranked, we can pass in the ``ranks`` instead of
276 the ``table`` to save computation time.
278 >>> res = page_trend_test(ranks, # ranks of data
279 ... ranked=True, # data is already ranked
280 ... )
281 >>> res
282 PageTrendTestResult(statistic=133.5, pvalue=0.0018191161948127822,
283 method='exact')
285 Suppose the raw data had been tabulated in an order different from the
286 order of predicted means, say lecture, seminar, tutorial.
288 >>> table = np.asarray(table)[:, [1, 2, 0]]
290 Since the arrangement of this table is not consistent with the assumed
291 ordering, we can either rearrange the table or provide the
292 `predicted_ranks`. Remembering that the lecture is predicted
293 to have the middle rank, the seminar the highest, and tutorial the lowest,
294 we pass:
296 >>> res = page_trend_test(table, # data as originally tabulated
297 ... predicted_ranks=[2, 3, 1], # our predicted order
298 ... )
299 >>> res
300 PageTrendTestResult(statistic=133.5, pvalue=0.0018191161948127822,
301 method='exact')
303 """
305 # Possible values of the method parameter and the corresponding function
306 # used to evaluate the p value
307 methods = {"asymptotic": _l_p_asymptotic,
308 "exact": _l_p_exact,
309 "auto": None}
310 if method not in methods:
311 raise ValueError(f"`method` must be in {set(methods)}")
313 ranks = np.array(data, copy=False)
314 if ranks.ndim != 2: # TODO: relax this to accept 3d arrays?
315 raise ValueError("`data` must be a 2d array.")
317 m, n = ranks.shape
318 if m < 2 or n < 3:
319 raise ValueError("Page's L is only appropriate for data with two "
320 "or more rows and three or more columns.")
322 if np.any(np.isnan(data)):
323 raise ValueError("`data` contains NaNs, which cannot be ranked "
324 "meaningfully")
326 # ensure NumPy array and rank the data if it's not already ranked
327 if ranked:
328 # Only a basic check on whether data is ranked. Checking that the data
329 # is properly ranked could take as much time as ranking it.
330 if not (ranks.min() >= 1 and ranks.max() <= ranks.shape[1]):
331 raise ValueError("`data` is not properly ranked. Rank the data or "
332 "pass `ranked=False`.")
333 else:
334 ranks = scipy.stats.rankdata(data, axis=-1)
336 # generate predicted ranks if not provided, ensure valid NumPy array
337 if predicted_ranks is None:
338 predicted_ranks = np.arange(1, n+1)
339 else:
340 predicted_ranks = np.array(predicted_ranks, copy=False)
341 if (predicted_ranks.ndim < 1 or
342 (set(predicted_ranks) != set(range(1, n+1)) or
343 len(predicted_ranks) != n)):
344 raise ValueError(f"`predicted_ranks` must include each integer "
345 f"from 1 to {n} (the number of columns in "
346 f"`data`) exactly once.")
348 if type(ranked) is not bool:
349 raise TypeError("`ranked` must be boolean.")
351 # Calculate the L statistic
352 L = _l_vectorized(ranks, predicted_ranks)
354 # Calculate the p-value
355 if method == "auto":
356 method = _choose_method(ranks)
357 p_fun = methods[method] # get the function corresponding with the method
358 p = p_fun(L, m, n)
360 page_result = PageTrendTestResult(statistic=L, pvalue=p, method=method)
361 return page_result
364def _choose_method(ranks):
365 '''Choose method for computing p-value automatically'''
366 m, n = ranks.shape
367 if n > 8 or (m > 12 and n > 3) or m > 20: # as in [1], [4]
368 method = "asymptotic"
369 else:
370 method = "exact"
371 return method
374def _l_vectorized(ranks, predicted_ranks):
375 '''Calculate's Page's L statistic for each page of a 3d array'''
376 colsums = ranks.sum(axis=-2, keepdims=True)
377 products = predicted_ranks * colsums
378 Ls = products.sum(axis=-1)
379 Ls = Ls[0] if Ls.size == 1 else Ls.ravel()
380 return Ls
383def _l_p_asymptotic(L, m, n):
384 '''Calculate the p-value of Page's L from the asymptotic distribution'''
385 # Using [1] as a reference, the asymptotic p-value would be calculated as:
386 # chi_L = (12*L - 3*m*n*(n+1)**2)**2/(m*n**2*(n**2-1)*(n+1))
387 # p = chi2.sf(chi_L, df=1, loc=0, scale=1)/2
388 # but this is insentive to the direction of the hypothesized ranking
390 # See [2] page 151
391 E0 = (m*n*(n+1)**2)/4
392 V0 = (m*n**2*(n+1)*(n**2-1))/144
393 Lambda = (L-E0)/np.sqrt(V0)
394 # This is a one-sided "greater" test - calculate the probability that the
395 # L statistic under H0 would be greater than the observed L statistic
396 p = norm.sf(Lambda)
397 return p
400def _l_p_exact(L, m, n):
401 '''Calculate the p-value of Page's L exactly'''
402 # [1] uses m, n; [5] uses n, k.
403 # Switch convention here because exact calculation code references [5].
404 L, n, k = int(L), int(m), int(n)
405 _pagel_state.set_k(k)
406 return _pagel_state.sf(L, n)
409class _PageL:
410 '''Maintains state between `page_trend_test` executions'''
412 def __init__(self):
413 '''Lightweight initialization'''
414 self.all_pmfs = {}
416 def set_k(self, k):
417 '''Calculate lower and upper limits of L for single row'''
418 self.k = k
419 # See [5] top of page 52
420 self.a, self.b = (k*(k+1)*(k+2))//6, (k*(k+1)*(2*k+1))//6
422 def sf(self, l, n):
423 '''Survival function of Page's L statistic'''
424 ps = [self.pmf(l, n) for l in range(l, n*self.b + 1)]
425 return np.sum(ps)
427 def p_l_k_1(self):
428 '''Relative frequency of each L value over all possible single rows'''
430 # See [5] Equation (6)
431 ranks = range(1, self.k+1)
432 # generate all possible rows of length k
433 rank_perms = np.array(list(permutations(ranks)))
434 # compute Page's L for all possible rows
435 Ls = (ranks*rank_perms).sum(axis=1)
436 # count occurences of each L value
437 counts = np.histogram(Ls, np.arange(self.a-0.5, self.b+1.5))[0]
438 # factorial(k) is number of possible permutations
439 return counts/math.factorial(self.k)
441 def pmf(self, l, n):
442 '''Recursive function to evaluate p(l, k, n); see [5] Equation 1'''
444 if n not in self.all_pmfs:
445 self.all_pmfs[n] = {}
446 if self.k not in self.all_pmfs[n]:
447 self.all_pmfs[n][self.k] = {}
449 # Cache results to avoid repeating calculation. Initially this was
450 # written with lru_cache, but this seems faster? Also, we could add
451 # an option to save this for future lookup.
452 if l in self.all_pmfs[n][self.k]:
453 return self.all_pmfs[n][self.k][l]
455 if n == 1:
456 ps = self.p_l_k_1() # [5] Equation 6
457 ls = range(self.a, self.b+1)
458 # not fast, but we'll only be here once
459 self.all_pmfs[n][self.k] = {l: p for l, p in zip(ls, ps)}
460 return self.all_pmfs[n][self.k][l]
462 p = 0
463 low = max(l-(n-1)*self.b, self.a) # [5] Equation 2
464 high = min(l-(n-1)*self.a, self.b)
466 # [5] Equation 1
467 for t in range(low, high+1):
468 p1 = self.pmf(l-t, n-1)
469 p2 = self.pmf(t, 1)
470 p += p1*p2
471 self.all_pmfs[n][self.k][l] = p
472 return p
475# Maintain state for faster repeat calls to page_trend_test w/ method='exact'
476_pagel_state = _PageL()