Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_discrete_distns.py: 33%
642 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1#
2# Author: Travis Oliphant 2002-2011 with contributions from
3# SciPy Developers 2004-2011
4#
5from functools import partial
6import warnings
8from scipy import special
9from scipy.special import entr, logsumexp, betaln, gammaln as gamln, zeta
10from scipy._lib._util import _lazywhere, rng_integers
11from scipy.interpolate import interp1d
13from numpy import floor, ceil, log, exp, sqrt, log1p, expm1, tanh, cosh, sinh
15import numpy as np
17from ._distn_infrastructure import (rv_discrete, get_distribution_names,
18 _check_shape, _ShapeInfo)
19import scipy.stats._boost as _boost
20from ._biasedurn import (_PyFishersNCHypergeometric,
21 _PyWalleniusNCHypergeometric,
22 _PyStochasticLib3)
25def _isintegral(x):
26 return x == np.round(x)
29class binom_gen(rv_discrete):
30 r"""A binomial discrete random variable.
32 %(before_notes)s
34 Notes
35 -----
36 The probability mass function for `binom` is:
38 .. math::
40 f(k) = \binom{n}{k} p^k (1-p)^{n-k}
42 for :math:`k \in \{0, 1, \dots, n\}`, :math:`0 \leq p \leq 1`
44 `binom` takes :math:`n` and :math:`p` as shape parameters,
45 where :math:`p` is the probability of a single success
46 and :math:`1-p` is the probability of a single failure.
48 %(after_notes)s
50 %(example)s
52 See Also
53 --------
54 hypergeom, nbinom, nhypergeom
56 """
57 def _shape_info(self):
58 return [_ShapeInfo("n", True, (0, np.inf), (True, False)),
59 _ShapeInfo("p", False, (0, 1), (True, True))]
61 def _rvs(self, n, p, size=None, random_state=None):
62 return random_state.binomial(n, p, size)
64 def _argcheck(self, n, p):
65 return (n >= 0) & _isintegral(n) & (p >= 0) & (p <= 1)
67 def _get_support(self, n, p):
68 return self.a, n
70 def _logpmf(self, x, n, p):
71 k = floor(x)
72 combiln = (gamln(n+1) - (gamln(k+1) + gamln(n-k+1)))
73 return combiln + special.xlogy(k, p) + special.xlog1py(n-k, -p)
75 def _pmf(self, x, n, p):
76 # binom.pmf(k) = choose(n, k) * p**k * (1-p)**(n-k)
77 return _boost._binom_pdf(x, n, p)
79 def _cdf(self, x, n, p):
80 k = floor(x)
81 return _boost._binom_cdf(k, n, p)
83 def _sf(self, x, n, p):
84 k = floor(x)
85 return _boost._binom_sf(k, n, p)
87 def _isf(self, x, n, p):
88 return _boost._binom_isf(x, n, p)
90 def _ppf(self, q, n, p):
91 return _boost._binom_ppf(q, n, p)
93 def _stats(self, n, p, moments='mv'):
94 mu = _boost._binom_mean(n, p)
95 var = _boost._binom_variance(n, p)
96 g1, g2 = None, None
97 if 's' in moments:
98 g1 = _boost._binom_skewness(n, p)
99 if 'k' in moments:
100 g2 = _boost._binom_kurtosis_excess(n, p)
101 return mu, var, g1, g2
103 def _entropy(self, n, p):
104 k = np.r_[0:n + 1]
105 vals = self._pmf(k, n, p)
106 return np.sum(entr(vals), axis=0)
109binom = binom_gen(name='binom')
112class bernoulli_gen(binom_gen):
113 r"""A Bernoulli discrete random variable.
115 %(before_notes)s
117 Notes
118 -----
119 The probability mass function for `bernoulli` is:
121 .. math::
123 f(k) = \begin{cases}1-p &\text{if } k = 0\\
124 p &\text{if } k = 1\end{cases}
126 for :math:`k` in :math:`\{0, 1\}`, :math:`0 \leq p \leq 1`
128 `bernoulli` takes :math:`p` as shape parameter,
129 where :math:`p` is the probability of a single success
130 and :math:`1-p` is the probability of a single failure.
132 %(after_notes)s
134 %(example)s
136 """
137 def _shape_info(self):
138 return [_ShapeInfo("p", False, (0, 1), (True, True))]
140 def _rvs(self, p, size=None, random_state=None):
141 return binom_gen._rvs(self, 1, p, size=size, random_state=random_state)
143 def _argcheck(self, p):
144 return (p >= 0) & (p <= 1)
146 def _get_support(self, p):
147 # Overrides binom_gen._get_support!x
148 return self.a, self.b
150 def _logpmf(self, x, p):
151 return binom._logpmf(x, 1, p)
153 def _pmf(self, x, p):
154 # bernoulli.pmf(k) = 1-p if k = 0
155 # = p if k = 1
156 return binom._pmf(x, 1, p)
158 def _cdf(self, x, p):
159 return binom._cdf(x, 1, p)
161 def _sf(self, x, p):
162 return binom._sf(x, 1, p)
164 def _isf(self, x, p):
165 return binom._isf(x, 1, p)
167 def _ppf(self, q, p):
168 return binom._ppf(q, 1, p)
170 def _stats(self, p):
171 return binom._stats(1, p)
173 def _entropy(self, p):
174 return entr(p) + entr(1-p)
177bernoulli = bernoulli_gen(b=1, name='bernoulli')
180class betabinom_gen(rv_discrete):
181 r"""A beta-binomial discrete random variable.
183 %(before_notes)s
185 Notes
186 -----
187 The beta-binomial distribution is a binomial distribution with a
188 probability of success `p` that follows a beta distribution.
190 The probability mass function for `betabinom` is:
192 .. math::
194 f(k) = \binom{n}{k} \frac{B(k + a, n - k + b)}{B(a, b)}
196 for :math:`k \in \{0, 1, \dots, n\}`, :math:`n \geq 0`, :math:`a > 0`,
197 :math:`b > 0`, where :math:`B(a, b)` is the beta function.
199 `betabinom` takes :math:`n`, :math:`a`, and :math:`b` as shape parameters.
201 References
202 ----------
203 .. [1] https://en.wikipedia.org/wiki/Beta-binomial_distribution
205 %(after_notes)s
207 .. versionadded:: 1.4.0
209 See Also
210 --------
211 beta, binom
213 %(example)s
215 """
216 def _shape_info(self):
217 return [_ShapeInfo("n", True, (0, np.inf), (True, False)),
218 _ShapeInfo("a", False, (0, np.inf), (False, False)),
219 _ShapeInfo("b", False, (0, np.inf), (False, False))]
221 def _rvs(self, n, a, b, size=None, random_state=None):
222 p = random_state.beta(a, b, size)
223 return random_state.binomial(n, p, size)
225 def _get_support(self, n, a, b):
226 return 0, n
228 def _argcheck(self, n, a, b):
229 return (n >= 0) & _isintegral(n) & (a > 0) & (b > 0)
231 def _logpmf(self, x, n, a, b):
232 k = floor(x)
233 combiln = -log(n + 1) - betaln(n - k + 1, k + 1)
234 return combiln + betaln(k + a, n - k + b) - betaln(a, b)
236 def _pmf(self, x, n, a, b):
237 return exp(self._logpmf(x, n, a, b))
239 def _stats(self, n, a, b, moments='mv'):
240 e_p = a / (a + b)
241 e_q = 1 - e_p
242 mu = n * e_p
243 var = n * (a + b + n) * e_p * e_q / (a + b + 1)
244 g1, g2 = None, None
245 if 's' in moments:
246 g1 = 1.0 / sqrt(var)
247 g1 *= (a + b + 2 * n) * (b - a)
248 g1 /= (a + b + 2) * (a + b)
249 if 'k' in moments:
250 g2 = a + b
251 g2 *= (a + b - 1 + 6 * n)
252 g2 += 3 * a * b * (n - 2)
253 g2 += 6 * n ** 2
254 g2 -= 3 * e_p * b * n * (6 - n)
255 g2 -= 18 * e_p * e_q * n ** 2
256 g2 *= (a + b) ** 2 * (1 + a + b)
257 g2 /= (n * a * b * (a + b + 2) * (a + b + 3) * (a + b + n))
258 g2 -= 3
259 return mu, var, g1, g2
262betabinom = betabinom_gen(name='betabinom')
265class nbinom_gen(rv_discrete):
266 r"""A negative binomial discrete random variable.
268 %(before_notes)s
270 Notes
271 -----
272 Negative binomial distribution describes a sequence of i.i.d. Bernoulli
273 trials, repeated until a predefined, non-random number of successes occurs.
275 The probability mass function of the number of failures for `nbinom` is:
277 .. math::
279 f(k) = \binom{k+n-1}{n-1} p^n (1-p)^k
281 for :math:`k \ge 0`, :math:`0 < p \leq 1`
283 `nbinom` takes :math:`n` and :math:`p` as shape parameters where :math:`n`
284 is the number of successes, :math:`p` is the probability of a single
285 success, and :math:`1-p` is the probability of a single failure.
287 Another common parameterization of the negative binomial distribution is
288 in terms of the mean number of failures :math:`\mu` to achieve :math:`n`
289 successes. The mean :math:`\mu` is related to the probability of success
290 as
292 .. math::
294 p = \frac{n}{n + \mu}
296 The number of successes :math:`n` may also be specified in terms of a
297 "dispersion", "heterogeneity", or "aggregation" parameter :math:`\alpha`,
298 which relates the mean :math:`\mu` to the variance :math:`\sigma^2`,
299 e.g. :math:`\sigma^2 = \mu + \alpha \mu^2`. Regardless of the convention
300 used for :math:`\alpha`,
302 .. math::
304 p &= \frac{\mu}{\sigma^2} \\
305 n &= \frac{\mu^2}{\sigma^2 - \mu}
307 %(after_notes)s
309 %(example)s
311 See Also
312 --------
313 hypergeom, binom, nhypergeom
315 """
316 def _shape_info(self):
317 return [_ShapeInfo("n", True, (0, np.inf), (True, False)),
318 _ShapeInfo("p", False, (0, 1), (True, True))]
320 def _rvs(self, n, p, size=None, random_state=None):
321 return random_state.negative_binomial(n, p, size)
323 def _argcheck(self, n, p):
324 return (n > 0) & (p > 0) & (p <= 1)
326 def _pmf(self, x, n, p):
327 # nbinom.pmf(k) = choose(k+n-1, n-1) * p**n * (1-p)**k
328 return _boost._nbinom_pdf(x, n, p)
330 def _logpmf(self, x, n, p):
331 coeff = gamln(n+x) - gamln(x+1) - gamln(n)
332 return coeff + n*log(p) + special.xlog1py(x, -p)
334 def _cdf(self, x, n, p):
335 k = floor(x)
336 return _boost._nbinom_cdf(k, n, p)
338 def _logcdf(self, x, n, p):
339 k = floor(x)
340 cdf = self._cdf(k, n, p)
341 cond = cdf > 0.5
343 def f1(k, n, p):
344 return np.log1p(-special.betainc(k + 1, n, 1 - p))
346 # do calc in place
347 logcdf = cdf
348 with np.errstate(divide='ignore'):
349 logcdf[cond] = f1(k[cond], n[cond], p[cond])
350 logcdf[~cond] = np.log(cdf[~cond])
351 return logcdf
353 def _sf(self, x, n, p):
354 k = floor(x)
355 return _boost._nbinom_sf(k, n, p)
357 def _isf(self, x, n, p):
358 with warnings.catch_warnings():
359 # See gh-14901
360 message = "overflow encountered in _nbinom_isf"
361 warnings.filterwarnings('ignore', message=message)
362 return _boost._nbinom_isf(x, n, p)
364 def _ppf(self, q, n, p):
365 with warnings.catch_warnings():
366 message = "overflow encountered in _nbinom_ppf"
367 warnings.filterwarnings('ignore', message=message)
368 return _boost._nbinom_ppf(q, n, p)
370 def _stats(self, n, p):
371 return (
372 _boost._nbinom_mean(n, p),
373 _boost._nbinom_variance(n, p),
374 _boost._nbinom_skewness(n, p),
375 _boost._nbinom_kurtosis_excess(n, p),
376 )
379nbinom = nbinom_gen(name='nbinom')
382class geom_gen(rv_discrete):
383 r"""A geometric discrete random variable.
385 %(before_notes)s
387 Notes
388 -----
389 The probability mass function for `geom` is:
391 .. math::
393 f(k) = (1-p)^{k-1} p
395 for :math:`k \ge 1`, :math:`0 < p \leq 1`
397 `geom` takes :math:`p` as shape parameter,
398 where :math:`p` is the probability of a single success
399 and :math:`1-p` is the probability of a single failure.
401 %(after_notes)s
403 See Also
404 --------
405 planck
407 %(example)s
409 """
411 def _shape_info(self):
412 return [_ShapeInfo("p", False, (0, 1), (True, True))]
414 def _rvs(self, p, size=None, random_state=None):
415 return random_state.geometric(p, size=size)
417 def _argcheck(self, p):
418 return (p <= 1) & (p > 0)
420 def _pmf(self, k, p):
421 return np.power(1-p, k-1) * p
423 def _logpmf(self, k, p):
424 return special.xlog1py(k - 1, -p) + log(p)
426 def _cdf(self, x, p):
427 k = floor(x)
428 return -expm1(log1p(-p)*k)
430 def _sf(self, x, p):
431 return np.exp(self._logsf(x, p))
433 def _logsf(self, x, p):
434 k = floor(x)
435 return k*log1p(-p)
437 def _ppf(self, q, p):
438 vals = ceil(log1p(-q) / log1p(-p))
439 temp = self._cdf(vals-1, p)
440 return np.where((temp >= q) & (vals > 0), vals-1, vals)
442 def _stats(self, p):
443 mu = 1.0/p
444 qr = 1.0-p
445 var = qr / p / p
446 g1 = (2.0-p) / sqrt(qr)
447 g2 = np.polyval([1, -6, 6], p)/(1.0-p)
448 return mu, var, g1, g2
451geom = geom_gen(a=1, name='geom', longname="A geometric")
454class hypergeom_gen(rv_discrete):
455 r"""A hypergeometric discrete random variable.
457 The hypergeometric distribution models drawing objects from a bin.
458 `M` is the total number of objects, `n` is total number of Type I objects.
459 The random variate represents the number of Type I objects in `N` drawn
460 without replacement from the total population.
462 %(before_notes)s
464 Notes
465 -----
466 The symbols used to denote the shape parameters (`M`, `n`, and `N`) are not
467 universally accepted. See the Examples for a clarification of the
468 definitions used here.
470 The probability mass function is defined as,
472 .. math:: p(k, M, n, N) = \frac{\binom{n}{k} \binom{M - n}{N - k}}
473 {\binom{M}{N}}
475 for :math:`k \in [\max(0, N - M + n), \min(n, N)]`, where the binomial
476 coefficients are defined as,
478 .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
480 %(after_notes)s
482 Examples
483 --------
484 >>> import numpy as np
485 >>> from scipy.stats import hypergeom
486 >>> import matplotlib.pyplot as plt
488 Suppose we have a collection of 20 animals, of which 7 are dogs. Then if
489 we want to know the probability of finding a given number of dogs if we
490 choose at random 12 of the 20 animals, we can initialize a frozen
491 distribution and plot the probability mass function:
493 >>> [M, n, N] = [20, 7, 12]
494 >>> rv = hypergeom(M, n, N)
495 >>> x = np.arange(0, n+1)
496 >>> pmf_dogs = rv.pmf(x)
498 >>> fig = plt.figure()
499 >>> ax = fig.add_subplot(111)
500 >>> ax.plot(x, pmf_dogs, 'bo')
501 >>> ax.vlines(x, 0, pmf_dogs, lw=2)
502 >>> ax.set_xlabel('# of dogs in our group of chosen animals')
503 >>> ax.set_ylabel('hypergeom PMF')
504 >>> plt.show()
506 Instead of using a frozen distribution we can also use `hypergeom`
507 methods directly. To for example obtain the cumulative distribution
508 function, use:
510 >>> prb = hypergeom.cdf(x, M, n, N)
512 And to generate random numbers:
514 >>> R = hypergeom.rvs(M, n, N, size=10)
516 See Also
517 --------
518 nhypergeom, binom, nbinom
520 """
521 def _shape_info(self):
522 return [_ShapeInfo("M", True, (0, np.inf), (True, False)),
523 _ShapeInfo("n", True, (0, np.inf), (True, False)),
524 _ShapeInfo("N", True, (0, np.inf), (True, False))]
526 def _rvs(self, M, n, N, size=None, random_state=None):
527 return random_state.hypergeometric(n, M-n, N, size=size)
529 def _get_support(self, M, n, N):
530 return np.maximum(N-(M-n), 0), np.minimum(n, N)
532 def _argcheck(self, M, n, N):
533 cond = (M > 0) & (n >= 0) & (N >= 0)
534 cond &= (n <= M) & (N <= M)
535 cond &= _isintegral(M) & _isintegral(n) & _isintegral(N)
536 return cond
538 def _logpmf(self, k, M, n, N):
539 tot, good = M, n
540 bad = tot - good
541 result = (betaln(good+1, 1) + betaln(bad+1, 1) + betaln(tot-N+1, N+1) -
542 betaln(k+1, good-k+1) - betaln(N-k+1, bad-N+k+1) -
543 betaln(tot+1, 1))
544 return result
546 def _pmf(self, k, M, n, N):
547 return _boost._hypergeom_pdf(k, n, N, M)
549 def _cdf(self, k, M, n, N):
550 return _boost._hypergeom_cdf(k, n, N, M)
552 def _stats(self, M, n, N):
553 M, n, N = 1. * M, 1. * n, 1. * N
554 m = M - n
556 # Boost kurtosis_excess doesn't return the same as the value
557 # computed here.
558 g2 = M * (M + 1) - 6. * N * (M - N) - 6. * n * m
559 g2 *= (M - 1) * M * M
560 g2 += 6. * n * N * (M - N) * m * (5. * M - 6)
561 g2 /= n * N * (M - N) * m * (M - 2.) * (M - 3.)
562 return (
563 _boost._hypergeom_mean(n, N, M),
564 _boost._hypergeom_variance(n, N, M),
565 _boost._hypergeom_skewness(n, N, M),
566 g2,
567 )
569 def _entropy(self, M, n, N):
570 k = np.r_[N - (M - n):min(n, N) + 1]
571 vals = self.pmf(k, M, n, N)
572 return np.sum(entr(vals), axis=0)
574 def _sf(self, k, M, n, N):
575 return _boost._hypergeom_sf(k, n, N, M)
577 def _logsf(self, k, M, n, N):
578 res = []
579 for quant, tot, good, draw in zip(*np.broadcast_arrays(k, M, n, N)):
580 if (quant + 0.5) * (tot + 0.5) < (good - 0.5) * (draw - 0.5):
581 # Less terms to sum if we calculate log(1-cdf)
582 res.append(log1p(-exp(self.logcdf(quant, tot, good, draw))))
583 else:
584 # Integration over probability mass function using logsumexp
585 k2 = np.arange(quant + 1, draw + 1)
586 res.append(logsumexp(self._logpmf(k2, tot, good, draw)))
587 return np.asarray(res)
589 def _logcdf(self, k, M, n, N):
590 res = []
591 for quant, tot, good, draw in zip(*np.broadcast_arrays(k, M, n, N)):
592 if (quant + 0.5) * (tot + 0.5) > (good - 0.5) * (draw - 0.5):
593 # Less terms to sum if we calculate log(1-sf)
594 res.append(log1p(-exp(self.logsf(quant, tot, good, draw))))
595 else:
596 # Integration over probability mass function using logsumexp
597 k2 = np.arange(0, quant + 1)
598 res.append(logsumexp(self._logpmf(k2, tot, good, draw)))
599 return np.asarray(res)
602hypergeom = hypergeom_gen(name='hypergeom')
605class nhypergeom_gen(rv_discrete):
606 r"""A negative hypergeometric discrete random variable.
608 Consider a box containing :math:`M` balls:, :math:`n` red and
609 :math:`M-n` blue. We randomly sample balls from the box, one
610 at a time and *without* replacement, until we have picked :math:`r`
611 blue balls. `nhypergeom` is the distribution of the number of
612 red balls :math:`k` we have picked.
614 %(before_notes)s
616 Notes
617 -----
618 The symbols used to denote the shape parameters (`M`, `n`, and `r`) are not
619 universally accepted. See the Examples for a clarification of the
620 definitions used here.
622 The probability mass function is defined as,
624 .. math:: f(k; M, n, r) = \frac{{{k+r-1}\choose{k}}{{M-r-k}\choose{n-k}}}
625 {{M \choose n}}
627 for :math:`k \in [0, n]`, :math:`n \in [0, M]`, :math:`r \in [0, M-n]`,
628 and the binomial coefficient is:
630 .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
632 It is equivalent to observing :math:`k` successes in :math:`k+r-1`
633 samples with :math:`k+r`'th sample being a failure. The former
634 can be modelled as a hypergeometric distribution. The probability
635 of the latter is simply the number of failures remaining
636 :math:`M-n-(r-1)` divided by the size of the remaining population
637 :math:`M-(k+r-1)`. This relationship can be shown as:
639 .. math:: NHG(k;M,n,r) = HG(k;M,n,k+r-1)\frac{(M-n-(r-1))}{(M-(k+r-1))}
641 where :math:`NHG` is probability mass function (PMF) of the
642 negative hypergeometric distribution and :math:`HG` is the
643 PMF of the hypergeometric distribution.
645 %(after_notes)s
647 Examples
648 --------
649 >>> import numpy as np
650 >>> from scipy.stats import nhypergeom
651 >>> import matplotlib.pyplot as plt
653 Suppose we have a collection of 20 animals, of which 7 are dogs.
654 Then if we want to know the probability of finding a given number
655 of dogs (successes) in a sample with exactly 12 animals that
656 aren't dogs (failures), we can initialize a frozen distribution
657 and plot the probability mass function:
659 >>> M, n, r = [20, 7, 12]
660 >>> rv = nhypergeom(M, n, r)
661 >>> x = np.arange(0, n+2)
662 >>> pmf_dogs = rv.pmf(x)
664 >>> fig = plt.figure()
665 >>> ax = fig.add_subplot(111)
666 >>> ax.plot(x, pmf_dogs, 'bo')
667 >>> ax.vlines(x, 0, pmf_dogs, lw=2)
668 >>> ax.set_xlabel('# of dogs in our group with given 12 failures')
669 >>> ax.set_ylabel('nhypergeom PMF')
670 >>> plt.show()
672 Instead of using a frozen distribution we can also use `nhypergeom`
673 methods directly. To for example obtain the probability mass
674 function, use:
676 >>> prb = nhypergeom.pmf(x, M, n, r)
678 And to generate random numbers:
680 >>> R = nhypergeom.rvs(M, n, r, size=10)
682 To verify the relationship between `hypergeom` and `nhypergeom`, use:
684 >>> from scipy.stats import hypergeom, nhypergeom
685 >>> M, n, r = 45, 13, 8
686 >>> k = 6
687 >>> nhypergeom.pmf(k, M, n, r)
688 0.06180776620271643
689 >>> hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1))
690 0.06180776620271644
692 See Also
693 --------
694 hypergeom, binom, nbinom
696 References
697 ----------
698 .. [1] Negative Hypergeometric Distribution on Wikipedia
699 https://en.wikipedia.org/wiki/Negative_hypergeometric_distribution
701 .. [2] Negative Hypergeometric Distribution from
702 http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Negativehypergeometric.pdf
704 """
706 def _shape_info(self):
707 return [_ShapeInfo("M", True, (0, np.inf), (True, False)),
708 _ShapeInfo("n", True, (0, np.inf), (True, False)),
709 _ShapeInfo("r", True, (0, np.inf), (True, False))]
711 def _get_support(self, M, n, r):
712 return 0, n
714 def _argcheck(self, M, n, r):
715 cond = (n >= 0) & (n <= M) & (r >= 0) & (r <= M-n)
716 cond &= _isintegral(M) & _isintegral(n) & _isintegral(r)
717 return cond
719 def _rvs(self, M, n, r, size=None, random_state=None):
721 @_vectorize_rvs_over_shapes
722 def _rvs1(M, n, r, size, random_state):
723 # invert cdf by calculating all values in support, scalar M, n, r
724 a, b = self.support(M, n, r)
725 ks = np.arange(a, b+1)
726 cdf = self.cdf(ks, M, n, r)
727 ppf = interp1d(cdf, ks, kind='next', fill_value='extrapolate')
728 rvs = ppf(random_state.uniform(size=size)).astype(int)
729 if size is None:
730 return rvs.item()
731 return rvs
733 return _rvs1(M, n, r, size=size, random_state=random_state)
735 def _logpmf(self, k, M, n, r):
736 cond = ((r == 0) & (k == 0))
737 result = _lazywhere(~cond, (k, M, n, r),
738 lambda k, M, n, r:
739 (-betaln(k+1, r) + betaln(k+r, 1) -
740 betaln(n-k+1, M-r-n+1) + betaln(M-r-k+1, 1) +
741 betaln(n+1, M-n+1) - betaln(M+1, 1)),
742 fillvalue=0.0)
743 return result
745 def _pmf(self, k, M, n, r):
746 # same as the following but numerically more precise
747 # return comb(k+r-1, k) * comb(M-r-k, n-k) / comb(M, n)
748 return exp(self._logpmf(k, M, n, r))
750 def _stats(self, M, n, r):
751 # Promote the datatype to at least float
752 # mu = rn / (M-n+1)
753 M, n, r = 1.*M, 1.*n, 1.*r
754 mu = r*n / (M-n+1)
756 var = r*(M+1)*n / ((M-n+1)*(M-n+2)) * (1 - r / (M-n+1))
758 # The skew and kurtosis are mathematically
759 # intractable so return `None`. See [2]_.
760 g1, g2 = None, None
761 return mu, var, g1, g2
764nhypergeom = nhypergeom_gen(name='nhypergeom')
767# FIXME: Fails _cdfvec
768class logser_gen(rv_discrete):
769 r"""A Logarithmic (Log-Series, Series) discrete random variable.
771 %(before_notes)s
773 Notes
774 -----
775 The probability mass function for `logser` is:
777 .. math::
779 f(k) = - \frac{p^k}{k \log(1-p)}
781 for :math:`k \ge 1`, :math:`0 < p < 1`
783 `logser` takes :math:`p` as shape parameter,
784 where :math:`p` is the probability of a single success
785 and :math:`1-p` is the probability of a single failure.
787 %(after_notes)s
789 %(example)s
791 """
793 def _shape_info(self):
794 return [_ShapeInfo("p", False, (0, 1), (True, True))]
796 def _rvs(self, p, size=None, random_state=None):
797 # looks wrong for p>0.5, too few k=1
798 # trying to use generic is worse, no k=1 at all
799 return random_state.logseries(p, size=size)
801 def _argcheck(self, p):
802 return (p > 0) & (p < 1)
804 def _pmf(self, k, p):
805 # logser.pmf(k) = - p**k / (k*log(1-p))
806 return -np.power(p, k) * 1.0 / k / special.log1p(-p)
808 def _stats(self, p):
809 r = special.log1p(-p)
810 mu = p / (p - 1.0) / r
811 mu2p = -p / r / (p - 1.0)**2
812 var = mu2p - mu*mu
813 mu3p = -p / r * (1.0+p) / (1.0 - p)**3
814 mu3 = mu3p - 3*mu*mu2p + 2*mu**3
815 g1 = mu3 / np.power(var, 1.5)
817 mu4p = -p / r * (
818 1.0 / (p-1)**2 - 6*p / (p - 1)**3 + 6*p*p / (p-1)**4)
819 mu4 = mu4p - 4*mu3p*mu + 6*mu2p*mu*mu - 3*mu**4
820 g2 = mu4 / var**2 - 3.0
821 return mu, var, g1, g2
824logser = logser_gen(a=1, name='logser', longname='A logarithmic')
827class poisson_gen(rv_discrete):
828 r"""A Poisson discrete random variable.
830 %(before_notes)s
832 Notes
833 -----
834 The probability mass function for `poisson` is:
836 .. math::
838 f(k) = \exp(-\mu) \frac{\mu^k}{k!}
840 for :math:`k \ge 0`.
842 `poisson` takes :math:`\mu \geq 0` as shape parameter.
843 When :math:`\mu = 0`, the ``pmf`` method
844 returns ``1.0`` at quantile :math:`k = 0`.
846 %(after_notes)s
848 %(example)s
850 """
852 def _shape_info(self):
853 return [_ShapeInfo("mu", False, (0, np.inf), (True, False))]
855 # Override rv_discrete._argcheck to allow mu=0.
856 def _argcheck(self, mu):
857 return mu >= 0
859 def _rvs(self, mu, size=None, random_state=None):
860 return random_state.poisson(mu, size)
862 def _logpmf(self, k, mu):
863 Pk = special.xlogy(k, mu) - gamln(k + 1) - mu
864 return Pk
866 def _pmf(self, k, mu):
867 # poisson.pmf(k) = exp(-mu) * mu**k / k!
868 return exp(self._logpmf(k, mu))
870 def _cdf(self, x, mu):
871 k = floor(x)
872 return special.pdtr(k, mu)
874 def _sf(self, x, mu):
875 k = floor(x)
876 return special.pdtrc(k, mu)
878 def _ppf(self, q, mu):
879 vals = ceil(special.pdtrik(q, mu))
880 vals1 = np.maximum(vals - 1, 0)
881 temp = special.pdtr(vals1, mu)
882 return np.where(temp >= q, vals1, vals)
884 def _stats(self, mu):
885 var = mu
886 tmp = np.asarray(mu)
887 mu_nonzero = tmp > 0
888 g1 = _lazywhere(mu_nonzero, (tmp,), lambda x: sqrt(1.0/x), np.inf)
889 g2 = _lazywhere(mu_nonzero, (tmp,), lambda x: 1.0/x, np.inf)
890 return mu, var, g1, g2
893poisson = poisson_gen(name="poisson", longname='A Poisson')
896class planck_gen(rv_discrete):
897 r"""A Planck discrete exponential random variable.
899 %(before_notes)s
901 Notes
902 -----
903 The probability mass function for `planck` is:
905 .. math::
907 f(k) = (1-\exp(-\lambda)) \exp(-\lambda k)
909 for :math:`k \ge 0` and :math:`\lambda > 0`.
911 `planck` takes :math:`\lambda` as shape parameter. The Planck distribution
912 can be written as a geometric distribution (`geom`) with
913 :math:`p = 1 - \exp(-\lambda)` shifted by ``loc = -1``.
915 %(after_notes)s
917 See Also
918 --------
919 geom
921 %(example)s
923 """
924 def _shape_info(self):
925 return [_ShapeInfo("lambda", False, (0, np.inf), (False, False))]
927 def _argcheck(self, lambda_):
928 return lambda_ > 0
930 def _pmf(self, k, lambda_):
931 return -expm1(-lambda_)*exp(-lambda_*k)
933 def _cdf(self, x, lambda_):
934 k = floor(x)
935 return -expm1(-lambda_*(k+1))
937 def _sf(self, x, lambda_):
938 return exp(self._logsf(x, lambda_))
940 def _logsf(self, x, lambda_):
941 k = floor(x)
942 return -lambda_*(k+1)
944 def _ppf(self, q, lambda_):
945 vals = ceil(-1.0/lambda_ * log1p(-q)-1)
946 vals1 = (vals-1).clip(*(self._get_support(lambda_)))
947 temp = self._cdf(vals1, lambda_)
948 return np.where(temp >= q, vals1, vals)
950 def _rvs(self, lambda_, size=None, random_state=None):
951 # use relation to geometric distribution for sampling
952 p = -expm1(-lambda_)
953 return random_state.geometric(p, size=size) - 1.0
955 def _stats(self, lambda_):
956 mu = 1/expm1(lambda_)
957 var = exp(-lambda_)/(expm1(-lambda_))**2
958 g1 = 2*cosh(lambda_/2.0)
959 g2 = 4+2*cosh(lambda_)
960 return mu, var, g1, g2
962 def _entropy(self, lambda_):
963 C = -expm1(-lambda_)
964 return lambda_*exp(-lambda_)/C - log(C)
967planck = planck_gen(a=0, name='planck', longname='A discrete exponential ')
970class boltzmann_gen(rv_discrete):
971 r"""A Boltzmann (Truncated Discrete Exponential) random variable.
973 %(before_notes)s
975 Notes
976 -----
977 The probability mass function for `boltzmann` is:
979 .. math::
981 f(k) = (1-\exp(-\lambda)) \exp(-\lambda k) / (1-\exp(-\lambda N))
983 for :math:`k = 0,..., N-1`.
985 `boltzmann` takes :math:`\lambda > 0` and :math:`N > 0` as shape parameters.
987 %(after_notes)s
989 %(example)s
991 """
992 def _shape_info(self):
993 return [_ShapeInfo("lambda_", False, (0, np.inf), (False, False)),
994 _ShapeInfo("N", True, (0, np.inf), (False, False))]
996 def _argcheck(self, lambda_, N):
997 return (lambda_ > 0) & (N > 0) & _isintegral(N)
999 def _get_support(self, lambda_, N):
1000 return self.a, N - 1
1002 def _pmf(self, k, lambda_, N):
1003 # boltzmann.pmf(k) =
1004 # (1-exp(-lambda_)*exp(-lambda_*k)/(1-exp(-lambda_*N))
1005 fact = (1-exp(-lambda_))/(1-exp(-lambda_*N))
1006 return fact*exp(-lambda_*k)
1008 def _cdf(self, x, lambda_, N):
1009 k = floor(x)
1010 return (1-exp(-lambda_*(k+1)))/(1-exp(-lambda_*N))
1012 def _ppf(self, q, lambda_, N):
1013 qnew = q*(1-exp(-lambda_*N))
1014 vals = ceil(-1.0/lambda_ * log(1-qnew)-1)
1015 vals1 = (vals-1).clip(0.0, np.inf)
1016 temp = self._cdf(vals1, lambda_, N)
1017 return np.where(temp >= q, vals1, vals)
1019 def _stats(self, lambda_, N):
1020 z = exp(-lambda_)
1021 zN = exp(-lambda_*N)
1022 mu = z/(1.0-z)-N*zN/(1-zN)
1023 var = z/(1.0-z)**2 - N*N*zN/(1-zN)**2
1024 trm = (1-zN)/(1-z)
1025 trm2 = (z*trm**2 - N*N*zN)
1026 g1 = z*(1+z)*trm**3 - N**3*zN*(1+zN)
1027 g1 = g1 / trm2**(1.5)
1028 g2 = z*(1+4*z+z*z)*trm**4 - N**4 * zN*(1+4*zN+zN*zN)
1029 g2 = g2 / trm2 / trm2
1030 return mu, var, g1, g2
1033boltzmann = boltzmann_gen(name='boltzmann', a=0,
1034 longname='A truncated discrete exponential ')
1037class randint_gen(rv_discrete):
1038 r"""A uniform discrete random variable.
1040 %(before_notes)s
1042 Notes
1043 -----
1044 The probability mass function for `randint` is:
1046 .. math::
1048 f(k) = \frac{1}{\texttt{high} - \texttt{low}}
1050 for :math:`k \in \{\texttt{low}, \dots, \texttt{high} - 1\}`.
1052 `randint` takes :math:`\texttt{low}` and :math:`\texttt{high}` as shape
1053 parameters.
1055 %(after_notes)s
1057 %(example)s
1059 """
1061 def _shape_info(self):
1062 return [_ShapeInfo("low", True, (-np.inf, np.inf), (False, False)),
1063 _ShapeInfo("high", True, (-np.inf, np.inf), (False, False))]
1065 def _argcheck(self, low, high):
1066 return (high > low) & _isintegral(low) & _isintegral(high)
1068 def _get_support(self, low, high):
1069 return low, high-1
1071 def _pmf(self, k, low, high):
1072 # randint.pmf(k) = 1./(high - low)
1073 p = np.ones_like(k) / (high - low)
1074 return np.where((k >= low) & (k < high), p, 0.)
1076 def _cdf(self, x, low, high):
1077 k = floor(x)
1078 return (k - low + 1.) / (high - low)
1080 def _ppf(self, q, low, high):
1081 vals = ceil(q * (high - low) + low) - 1
1082 vals1 = (vals - 1).clip(low, high)
1083 temp = self._cdf(vals1, low, high)
1084 return np.where(temp >= q, vals1, vals)
1086 def _stats(self, low, high):
1087 m2, m1 = np.asarray(high), np.asarray(low)
1088 mu = (m2 + m1 - 1.0) / 2
1089 d = m2 - m1
1090 var = (d*d - 1) / 12.0
1091 g1 = 0.0
1092 g2 = -6.0/5.0 * (d*d + 1.0) / (d*d - 1.0)
1093 return mu, var, g1, g2
1095 def _rvs(self, low, high, size=None, random_state=None):
1096 """An array of *size* random integers >= ``low`` and < ``high``."""
1097 if np.asarray(low).size == 1 and np.asarray(high).size == 1:
1098 # no need to vectorize in that case
1099 return rng_integers(random_state, low, high, size=size)
1101 if size is not None:
1102 # NumPy's RandomState.randint() doesn't broadcast its arguments.
1103 # Use `broadcast_to()` to extend the shapes of low and high
1104 # up to size. Then we can use the numpy.vectorize'd
1105 # randint without needing to pass it a `size` argument.
1106 low = np.broadcast_to(low, size)
1107 high = np.broadcast_to(high, size)
1108 randint = np.vectorize(partial(rng_integers, random_state),
1109 otypes=[np.int_])
1110 return randint(low, high)
1112 def _entropy(self, low, high):
1113 return log(high - low)
1116randint = randint_gen(name='randint', longname='A discrete uniform '
1117 '(random integer)')
1120# FIXME: problems sampling.
1121class zipf_gen(rv_discrete):
1122 r"""A Zipf (Zeta) discrete random variable.
1124 %(before_notes)s
1126 See Also
1127 --------
1128 zipfian
1130 Notes
1131 -----
1132 The probability mass function for `zipf` is:
1134 .. math::
1136 f(k, a) = \frac{1}{\zeta(a) k^a}
1138 for :math:`k \ge 1`, :math:`a > 1`.
1140 `zipf` takes :math:`a > 1` as shape parameter. :math:`\zeta` is the
1141 Riemann zeta function (`scipy.special.zeta`)
1143 The Zipf distribution is also known as the zeta distribution, which is
1144 a special case of the Zipfian distribution (`zipfian`).
1146 %(after_notes)s
1148 References
1149 ----------
1150 .. [1] "Zeta Distribution", Wikipedia,
1151 https://en.wikipedia.org/wiki/Zeta_distribution
1153 %(example)s
1155 Confirm that `zipf` is the large `n` limit of `zipfian`.
1157 >>> import numpy as np
1158 >>> from scipy.stats import zipfian
1159 >>> k = np.arange(11)
1160 >>> np.allclose(zipf.pmf(k, a), zipfian.pmf(k, a, n=10000000))
1161 True
1163 """
1165 def _shape_info(self):
1166 return [_ShapeInfo("a", False, (1, np.inf), (False, False))]
1168 def _rvs(self, a, size=None, random_state=None):
1169 return random_state.zipf(a, size=size)
1171 def _argcheck(self, a):
1172 return a > 1
1174 def _pmf(self, k, a):
1175 # zipf.pmf(k, a) = 1/(zeta(a) * k**a)
1176 Pk = 1.0 / special.zeta(a, 1) / k**a
1177 return Pk
1179 def _munp(self, n, a):
1180 return _lazywhere(
1181 a > n + 1, (a, n),
1182 lambda a, n: special.zeta(a - n, 1) / special.zeta(a, 1),
1183 np.inf)
1186zipf = zipf_gen(a=1, name='zipf', longname='A Zipf')
1189def _gen_harmonic_gt1(n, a):
1190 """Generalized harmonic number, a > 1"""
1191 # See https://en.wikipedia.org/wiki/Harmonic_number; search for "hurwitz"
1192 return zeta(a, 1) - zeta(a, n+1)
1195def _gen_harmonic_leq1(n, a):
1196 """Generalized harmonic number, a <= 1"""
1197 if not np.size(n):
1198 return n
1199 n_max = np.max(n) # loop starts at maximum of all n
1200 out = np.zeros_like(a, dtype=float)
1201 # add terms of harmonic series; starting from smallest to avoid roundoff
1202 for i in np.arange(n_max, 0, -1, dtype=float):
1203 mask = i <= n # don't add terms after nth
1204 out[mask] += 1/i**a[mask]
1205 return out
1208def _gen_harmonic(n, a):
1209 """Generalized harmonic number"""
1210 n, a = np.broadcast_arrays(n, a)
1211 return _lazywhere(a > 1, (n, a),
1212 f=_gen_harmonic_gt1, f2=_gen_harmonic_leq1)
1215class zipfian_gen(rv_discrete):
1216 r"""A Zipfian discrete random variable.
1218 %(before_notes)s
1220 See Also
1221 --------
1222 zipf
1224 Notes
1225 -----
1226 The probability mass function for `zipfian` is:
1228 .. math::
1230 f(k, a, n) = \frac{1}{H_{n,a} k^a}
1232 for :math:`k \in \{1, 2, \dots, n-1, n\}`, :math:`a \ge 0`,
1233 :math:`n \in \{1, 2, 3, \dots\}`.
1235 `zipfian` takes :math:`a` and :math:`n` as shape parameters.
1236 :math:`H_{n,a}` is the :math:`n`:sup:`th` generalized harmonic
1237 number of order :math:`a`.
1239 The Zipfian distribution reduces to the Zipf (zeta) distribution as
1240 :math:`n \rightarrow \infty`.
1242 %(after_notes)s
1244 References
1245 ----------
1246 .. [1] "Zipf's Law", Wikipedia, https://en.wikipedia.org/wiki/Zipf's_law
1247 .. [2] Larry Leemis, "Zipf Distribution", Univariate Distribution
1248 Relationships. http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Zipf.pdf
1250 %(example)s
1252 Confirm that `zipfian` reduces to `zipf` for large `n`, `a > 1`.
1254 >>> import numpy as np
1255 >>> from scipy.stats import zipf
1256 >>> k = np.arange(11)
1257 >>> np.allclose(zipfian.pmf(k, a=3.5, n=10000000), zipf.pmf(k, a=3.5))
1258 True
1260 """
1262 def _shape_info(self):
1263 return [_ShapeInfo("a", False, (0, np.inf), (True, False)),
1264 _ShapeInfo("n", True, (0, np.inf), (False, False))]
1266 def _argcheck(self, a, n):
1267 # we need np.asarray here because moment (maybe others) don't convert
1268 return (a >= 0) & (n > 0) & (n == np.asarray(n, dtype=int))
1270 def _get_support(self, a, n):
1271 return 1, n
1273 def _pmf(self, k, a, n):
1274 return 1.0 / _gen_harmonic(n, a) / k**a
1276 def _cdf(self, k, a, n):
1277 return _gen_harmonic(k, a) / _gen_harmonic(n, a)
1279 def _sf(self, k, a, n):
1280 k = k + 1 # # to match SciPy convention
1281 # see http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Zipf.pdf
1282 return ((k**a*(_gen_harmonic(n, a) - _gen_harmonic(k, a)) + 1)
1283 / (k**a*_gen_harmonic(n, a)))
1285 def _stats(self, a, n):
1286 # see # see http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Zipf.pdf
1287 Hna = _gen_harmonic(n, a)
1288 Hna1 = _gen_harmonic(n, a-1)
1289 Hna2 = _gen_harmonic(n, a-2)
1290 Hna3 = _gen_harmonic(n, a-3)
1291 Hna4 = _gen_harmonic(n, a-4)
1292 mu1 = Hna1/Hna
1293 mu2n = (Hna2*Hna - Hna1**2)
1294 mu2d = Hna**2
1295 mu2 = mu2n / mu2d
1296 g1 = (Hna3/Hna - 3*Hna1*Hna2/Hna**2 + 2*Hna1**3/Hna**3)/mu2**(3/2)
1297 g2 = (Hna**3*Hna4 - 4*Hna**2*Hna1*Hna3 + 6*Hna*Hna1**2*Hna2
1298 - 3*Hna1**4) / mu2n**2
1299 g2 -= 3
1300 return mu1, mu2, g1, g2
1303zipfian = zipfian_gen(a=1, name='zipfian', longname='A Zipfian')
1306class dlaplace_gen(rv_discrete):
1307 r"""A Laplacian discrete random variable.
1309 %(before_notes)s
1311 Notes
1312 -----
1313 The probability mass function for `dlaplace` is:
1315 .. math::
1317 f(k) = \tanh(a/2) \exp(-a |k|)
1319 for integers :math:`k` and :math:`a > 0`.
1321 `dlaplace` takes :math:`a` as shape parameter.
1323 %(after_notes)s
1325 %(example)s
1327 """
1329 def _shape_info(self):
1330 return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
1332 def _pmf(self, k, a):
1333 # dlaplace.pmf(k) = tanh(a/2) * exp(-a*abs(k))
1334 return tanh(a/2.0) * exp(-a * abs(k))
1336 def _cdf(self, x, a):
1337 k = floor(x)
1338 f = lambda k, a: 1.0 - exp(-a * k) / (exp(a) + 1)
1339 f2 = lambda k, a: exp(a * (k+1)) / (exp(a) + 1)
1340 return _lazywhere(k >= 0, (k, a), f=f, f2=f2)
1342 def _ppf(self, q, a):
1343 const = 1 + exp(a)
1344 vals = ceil(np.where(q < 1.0 / (1 + exp(-a)),
1345 log(q*const) / a - 1,
1346 -log((1-q) * const) / a))
1347 vals1 = vals - 1
1348 return np.where(self._cdf(vals1, a) >= q, vals1, vals)
1350 def _stats(self, a):
1351 ea = exp(a)
1352 mu2 = 2.*ea/(ea-1.)**2
1353 mu4 = 2.*ea*(ea**2+10.*ea+1.) / (ea-1.)**4
1354 return 0., mu2, 0., mu4/mu2**2 - 3.
1356 def _entropy(self, a):
1357 return a / sinh(a) - log(tanh(a/2.0))
1359 def _rvs(self, a, size=None, random_state=None):
1360 # The discrete Laplace is equivalent to the two-sided geometric
1361 # distribution with PMF:
1362 # f(k) = (1 - alpha)/(1 + alpha) * alpha^abs(k)
1363 # Reference:
1364 # https://www.sciencedirect.com/science/
1365 # article/abs/pii/S0378375804003519
1366 # Furthermore, the two-sided geometric distribution is
1367 # equivalent to the difference between two iid geometric
1368 # distributions.
1369 # Reference (page 179):
1370 # https://pdfs.semanticscholar.org/61b3/
1371 # b99f466815808fd0d03f5d2791eea8b541a1.pdf
1372 # Thus, we can leverage the following:
1373 # 1) alpha = e^-a
1374 # 2) probability_of_success = 1 - alpha (Bernoulli trial)
1375 probOfSuccess = -np.expm1(-np.asarray(a))
1376 x = random_state.geometric(probOfSuccess, size=size)
1377 y = random_state.geometric(probOfSuccess, size=size)
1378 return x - y
1381dlaplace = dlaplace_gen(a=-np.inf,
1382 name='dlaplace', longname='A discrete Laplacian')
1385class skellam_gen(rv_discrete):
1386 r"""A Skellam discrete random variable.
1388 %(before_notes)s
1390 Notes
1391 -----
1392 Probability distribution of the difference of two correlated or
1393 uncorrelated Poisson random variables.
1395 Let :math:`k_1` and :math:`k_2` be two Poisson-distributed r.v. with
1396 expected values :math:`\lambda_1` and :math:`\lambda_2`. Then,
1397 :math:`k_1 - k_2` follows a Skellam distribution with parameters
1398 :math:`\mu_1 = \lambda_1 - \rho \sqrt{\lambda_1 \lambda_2}` and
1399 :math:`\mu_2 = \lambda_2 - \rho \sqrt{\lambda_1 \lambda_2}`, where
1400 :math:`\rho` is the correlation coefficient between :math:`k_1` and
1401 :math:`k_2`. If the two Poisson-distributed r.v. are independent then
1402 :math:`\rho = 0`.
1404 Parameters :math:`\mu_1` and :math:`\mu_2` must be strictly positive.
1406 For details see: https://en.wikipedia.org/wiki/Skellam_distribution
1408 `skellam` takes :math:`\mu_1` and :math:`\mu_2` as shape parameters.
1410 %(after_notes)s
1412 %(example)s
1414 """
1415 def _shape_info(self):
1416 return [_ShapeInfo("mu1", False, (0, np.inf), (False, False)),
1417 _ShapeInfo("mu2", False, (0, np.inf), (False, False))]
1419 def _rvs(self, mu1, mu2, size=None, random_state=None):
1420 n = size
1421 return (random_state.poisson(mu1, n) -
1422 random_state.poisson(mu2, n))
1424 def _pmf(self, x, mu1, mu2):
1425 with warnings.catch_warnings():
1426 message = "overflow encountered in _ncx2_pdf"
1427 warnings.filterwarnings("ignore", message=message)
1428 px = np.where(x < 0,
1429 _boost._ncx2_pdf(2*mu2, 2*(1-x), 2*mu1)*2,
1430 _boost._ncx2_pdf(2*mu1, 2*(1+x), 2*mu2)*2)
1431 # ncx2.pdf() returns nan's for extremely low probabilities
1432 return px
1434 def _cdf(self, x, mu1, mu2):
1435 x = floor(x)
1436 px = np.where(x < 0,
1437 _boost._ncx2_cdf(2*mu2, -2*x, 2*mu1),
1438 1 - _boost._ncx2_cdf(2*mu1, 2*(x+1), 2*mu2))
1439 return px
1441 def _stats(self, mu1, mu2):
1442 mean = mu1 - mu2
1443 var = mu1 + mu2
1444 g1 = mean / sqrt((var)**3)
1445 g2 = 1 / var
1446 return mean, var, g1, g2
1449skellam = skellam_gen(a=-np.inf, name="skellam", longname='A Skellam')
1452class yulesimon_gen(rv_discrete):
1453 r"""A Yule-Simon discrete random variable.
1455 %(before_notes)s
1457 Notes
1458 -----
1460 The probability mass function for the `yulesimon` is:
1462 .. math::
1464 f(k) = \alpha B(k, \alpha+1)
1466 for :math:`k=1,2,3,...`, where :math:`\alpha>0`.
1467 Here :math:`B` refers to the `scipy.special.beta` function.
1469 The sampling of random variates is based on pg 553, Section 6.3 of [1]_.
1470 Our notation maps to the referenced logic via :math:`\alpha=a-1`.
1472 For details see the wikipedia entry [2]_.
1474 References
1475 ----------
1476 .. [1] Devroye, Luc. "Non-uniform Random Variate Generation",
1477 (1986) Springer, New York.
1479 .. [2] https://en.wikipedia.org/wiki/Yule-Simon_distribution
1481 %(after_notes)s
1483 %(example)s
1485 """
1486 def _shape_info(self):
1487 return [_ShapeInfo("alpha", False, (0, np.inf), (False, False))]
1489 def _rvs(self, alpha, size=None, random_state=None):
1490 E1 = random_state.standard_exponential(size)
1491 E2 = random_state.standard_exponential(size)
1492 ans = ceil(-E1 / log1p(-exp(-E2 / alpha)))
1493 return ans
1495 def _pmf(self, x, alpha):
1496 return alpha * special.beta(x, alpha + 1)
1498 def _argcheck(self, alpha):
1499 return (alpha > 0)
1501 def _logpmf(self, x, alpha):
1502 return log(alpha) + special.betaln(x, alpha + 1)
1504 def _cdf(self, x, alpha):
1505 return 1 - x * special.beta(x, alpha + 1)
1507 def _sf(self, x, alpha):
1508 return x * special.beta(x, alpha + 1)
1510 def _logsf(self, x, alpha):
1511 return log(x) + special.betaln(x, alpha + 1)
1513 def _stats(self, alpha):
1514 mu = np.where(alpha <= 1, np.inf, alpha / (alpha - 1))
1515 mu2 = np.where(alpha > 2,
1516 alpha**2 / ((alpha - 2.0) * (alpha - 1)**2),
1517 np.inf)
1518 mu2 = np.where(alpha <= 1, np.nan, mu2)
1519 g1 = np.where(alpha > 3,
1520 sqrt(alpha - 2) * (alpha + 1)**2 / (alpha * (alpha - 3)),
1521 np.inf)
1522 g1 = np.where(alpha <= 2, np.nan, g1)
1523 g2 = np.where(alpha > 4,
1524 (alpha + 3) + (alpha**3 - 49 * alpha - 22) / (alpha *
1525 (alpha - 4) * (alpha - 3)), np.inf)
1526 g2 = np.where(alpha <= 2, np.nan, g2)
1527 return mu, mu2, g1, g2
1530yulesimon = yulesimon_gen(name='yulesimon', a=1)
1533def _vectorize_rvs_over_shapes(_rvs1):
1534 """Decorator that vectorizes _rvs method to work on ndarray shapes"""
1535 # _rvs1 must be a _function_ that accepts _scalar_ args as positional
1536 # arguments, `size` and `random_state` as keyword arguments.
1537 # _rvs1 must return a random variate array with shape `size`. If `size` is
1538 # None, _rvs1 must return a scalar.
1539 # When applied to _rvs1, this decorator broadcasts ndarray args
1540 # and loops over them, calling _rvs1 for each set of scalar args.
1541 # For usage example, see _nchypergeom_gen
1542 def _rvs(*args, size, random_state):
1543 _rvs1_size, _rvs1_indices = _check_shape(args[0].shape, size)
1545 size = np.array(size)
1546 _rvs1_size = np.array(_rvs1_size)
1547 _rvs1_indices = np.array(_rvs1_indices)
1549 if np.all(_rvs1_indices): # all args are scalars
1550 return _rvs1(*args, size, random_state)
1552 out = np.empty(size)
1554 # out.shape can mix dimensions associated with arg_shape and _rvs1_size
1555 # Sort them to arg_shape + _rvs1_size for easy indexing of dimensions
1556 # corresponding with the different sets of scalar args
1557 j0 = np.arange(out.ndim)
1558 j1 = np.hstack((j0[~_rvs1_indices], j0[_rvs1_indices]))
1559 out = np.moveaxis(out, j1, j0)
1561 for i in np.ndindex(*size[~_rvs1_indices]):
1562 # arg can be squeezed because singleton dimensions will be
1563 # associated with _rvs1_size, not arg_shape per _check_shape
1564 out[i] = _rvs1(*[np.squeeze(arg)[i] for arg in args],
1565 _rvs1_size, random_state)
1567 return np.moveaxis(out, j0, j1) # move axes back before returning
1568 return _rvs
1571class _nchypergeom_gen(rv_discrete):
1572 r"""A noncentral hypergeometric discrete random variable.
1574 For subclassing by nchypergeom_fisher_gen and nchypergeom_wallenius_gen.
1576 """
1578 rvs_name = None
1579 dist = None
1581 def _shape_info(self):
1582 return [_ShapeInfo("M", True, (0, np.inf), (True, False)),
1583 _ShapeInfo("n", True, (0, np.inf), (True, False)),
1584 _ShapeInfo("N", True, (0, np.inf), (True, False)),
1585 _ShapeInfo("odds", False, (0, np.inf), (False, False))]
1587 def _get_support(self, M, n, N, odds):
1588 N, m1, n = M, n, N # follow Wikipedia notation
1589 m2 = N - m1
1590 x_min = np.maximum(0, n - m2)
1591 x_max = np.minimum(n, m1)
1592 return x_min, x_max
1594 def _argcheck(self, M, n, N, odds):
1595 M, n = np.asarray(M), np.asarray(n),
1596 N, odds = np.asarray(N), np.asarray(odds)
1597 cond1 = (M.astype(int) == M) & (M >= 0)
1598 cond2 = (n.astype(int) == n) & (n >= 0)
1599 cond3 = (N.astype(int) == N) & (N >= 0)
1600 cond4 = odds > 0
1601 cond5 = N <= M
1602 cond6 = n <= M
1603 return cond1 & cond2 & cond3 & cond4 & cond5 & cond6
1605 def _rvs(self, M, n, N, odds, size=None, random_state=None):
1607 @_vectorize_rvs_over_shapes
1608 def _rvs1(M, n, N, odds, size, random_state):
1609 length = np.prod(size)
1610 urn = _PyStochasticLib3()
1611 rv_gen = getattr(urn, self.rvs_name)
1612 rvs = rv_gen(N, n, M, odds, length, random_state)
1613 rvs = rvs.reshape(size)
1614 return rvs
1616 return _rvs1(M, n, N, odds, size=size, random_state=random_state)
1618 def _pmf(self, x, M, n, N, odds):
1620 x, M, n, N, odds = np.broadcast_arrays(x, M, n, N, odds)
1621 if x.size == 0: # np.vectorize doesn't work with zero size input
1622 return np.empty_like(x)
1624 @np.vectorize
1625 def _pmf1(x, M, n, N, odds):
1626 urn = self.dist(N, n, M, odds, 1e-12)
1627 return urn.probability(x)
1629 return _pmf1(x, M, n, N, odds)
1631 def _stats(self, M, n, N, odds, moments):
1633 @np.vectorize
1634 def _moments1(M, n, N, odds):
1635 urn = self.dist(N, n, M, odds, 1e-12)
1636 return urn.moments()
1638 m, v = (_moments1(M, n, N, odds) if ("m" in moments or "v" in moments)
1639 else (None, None))
1640 s, k = None, None
1641 return m, v, s, k
1644class nchypergeom_fisher_gen(_nchypergeom_gen):
1645 r"""A Fisher's noncentral hypergeometric discrete random variable.
1647 Fisher's noncentral hypergeometric distribution models drawing objects of
1648 two types from a bin. `M` is the total number of objects, `n` is the
1649 number of Type I objects, and `odds` is the odds ratio: the odds of
1650 selecting a Type I object rather than a Type II object when there is only
1651 one object of each type.
1652 The random variate represents the number of Type I objects drawn if we
1653 take a handful of objects from the bin at once and find out afterwards
1654 that we took `N` objects.
1656 %(before_notes)s
1658 See Also
1659 --------
1660 nchypergeom_wallenius, hypergeom, nhypergeom
1662 Notes
1663 -----
1664 Let mathematical symbols :math:`N`, :math:`n`, and :math:`M` correspond
1665 with parameters `N`, `n`, and `M` (respectively) as defined above.
1667 The probability mass function is defined as
1669 .. math::
1671 p(x; M, n, N, \omega) =
1672 \frac{\binom{n}{x}\binom{M - n}{N-x}\omega^x}{P_0},
1674 for
1675 :math:`x \in [x_l, x_u]`,
1676 :math:`M \in {\mathbb N}`,
1677 :math:`n \in [0, M]`,
1678 :math:`N \in [0, M]`,
1679 :math:`\omega > 0`,
1680 where
1681 :math:`x_l = \max(0, N - (M - n))`,
1682 :math:`x_u = \min(N, n)`,
1684 .. math::
1686 P_0 = \sum_{y=x_l}^{x_u} \binom{n}{y}\binom{M - n}{N-y}\omega^y,
1688 and the binomial coefficients are defined as
1690 .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
1692 `nchypergeom_fisher` uses the BiasedUrn package by Agner Fog with
1693 permission for it to be distributed under SciPy's license.
1695 The symbols used to denote the shape parameters (`N`, `n`, and `M`) are not
1696 universally accepted; they are chosen for consistency with `hypergeom`.
1698 Note that Fisher's noncentral hypergeometric distribution is distinct
1699 from Wallenius' noncentral hypergeometric distribution, which models
1700 drawing a pre-determined `N` objects from a bin one by one.
1701 When the odds ratio is unity, however, both distributions reduce to the
1702 ordinary hypergeometric distribution.
1704 %(after_notes)s
1706 References
1707 ----------
1708 .. [1] Agner Fog, "Biased Urn Theory".
1709 https://cran.r-project.org/web/packages/BiasedUrn/vignettes/UrnTheory.pdf
1711 .. [2] "Fisher's noncentral hypergeometric distribution", Wikipedia,
1712 https://en.wikipedia.org/wiki/Fisher's_noncentral_hypergeometric_distribution
1714 %(example)s
1716 """
1718 rvs_name = "rvs_fisher"
1719 dist = _PyFishersNCHypergeometric
1722nchypergeom_fisher = nchypergeom_fisher_gen(
1723 name='nchypergeom_fisher',
1724 longname="A Fisher's noncentral hypergeometric")
1727class nchypergeom_wallenius_gen(_nchypergeom_gen):
1728 r"""A Wallenius' noncentral hypergeometric discrete random variable.
1730 Wallenius' noncentral hypergeometric distribution models drawing objects of
1731 two types from a bin. `M` is the total number of objects, `n` is the
1732 number of Type I objects, and `odds` is the odds ratio: the odds of
1733 selecting a Type I object rather than a Type II object when there is only
1734 one object of each type.
1735 The random variate represents the number of Type I objects drawn if we
1736 draw a pre-determined `N` objects from a bin one by one.
1738 %(before_notes)s
1740 See Also
1741 --------
1742 nchypergeom_fisher, hypergeom, nhypergeom
1744 Notes
1745 -----
1746 Let mathematical symbols :math:`N`, :math:`n`, and :math:`M` correspond
1747 with parameters `N`, `n`, and `M` (respectively) as defined above.
1749 The probability mass function is defined as
1751 .. math::
1753 p(x; N, n, M) = \binom{n}{x} \binom{M - n}{N-x}
1754 \int_0^1 \left(1-t^{\omega/D}\right)^x\left(1-t^{1/D}\right)^{N-x} dt
1756 for
1757 :math:`x \in [x_l, x_u]`,
1758 :math:`M \in {\mathbb N}`,
1759 :math:`n \in [0, M]`,
1760 :math:`N \in [0, M]`,
1761 :math:`\omega > 0`,
1762 where
1763 :math:`x_l = \max(0, N - (M - n))`,
1764 :math:`x_u = \min(N, n)`,
1766 .. math::
1768 D = \omega(n - x) + ((M - n)-(N-x)),
1770 and the binomial coefficients are defined as
1772 .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
1774 `nchypergeom_wallenius` uses the BiasedUrn package by Agner Fog with
1775 permission for it to be distributed under SciPy's license.
1777 The symbols used to denote the shape parameters (`N`, `n`, and `M`) are not
1778 universally accepted; they are chosen for consistency with `hypergeom`.
1780 Note that Wallenius' noncentral hypergeometric distribution is distinct
1781 from Fisher's noncentral hypergeometric distribution, which models
1782 take a handful of objects from the bin at once, finding out afterwards
1783 that `N` objects were taken.
1784 When the odds ratio is unity, however, both distributions reduce to the
1785 ordinary hypergeometric distribution.
1787 %(after_notes)s
1789 References
1790 ----------
1791 .. [1] Agner Fog, "Biased Urn Theory".
1792 https://cran.r-project.org/web/packages/BiasedUrn/vignettes/UrnTheory.pdf
1794 .. [2] "Wallenius' noncentral hypergeometric distribution", Wikipedia,
1795 https://en.wikipedia.org/wiki/Wallenius'_noncentral_hypergeometric_distribution
1797 %(example)s
1799 """
1801 rvs_name = "rvs_wallenius"
1802 dist = _PyWalleniusNCHypergeometric
1805nchypergeom_wallenius = nchypergeom_wallenius_gen(
1806 name='nchypergeom_wallenius',
1807 longname="A Wallenius' noncentral hypergeometric")
1810# Collect names of classes and objects in this module.
1811pairs = list(globals().copy().items())
1812_distn_names, _distn_gen_names = get_distribution_names(pairs, rv_discrete)
1814__all__ = _distn_names + _distn_gen_names