Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_continuous_distns.py: 32%
3800 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1# -*- coding: utf-8 -*-
2#
3# Author: Travis Oliphant 2002-2011 with contributions from
4# SciPy Developers 2004-2011
5#
6import warnings
7from collections.abc import Iterable
8from functools import wraps, cached_property
9import ctypes
11import numpy as np
12from numpy.polynomial import Polynomial
13from scipy._lib.doccer import (extend_notes_in_docstring,
14 replace_notes_in_docstring,
15 inherit_docstring_from)
16from scipy._lib._ccallback import LowLevelCallable
17from scipy import optimize
18from scipy import integrate
19import scipy.special as sc
21import scipy.special._ufuncs as scu
22from scipy._lib._util import _lazyselect, _lazywhere
23from . import _stats
24from ._tukeylambda_stats import (tukeylambda_variance as _tlvar,
25 tukeylambda_kurtosis as _tlkurt)
26from ._distn_infrastructure import (
27 get_distribution_names, _kurtosis,
28 rv_continuous, _skew, _get_fixed_fit_value, _check_shape, _ShapeInfo)
29from ._ksstats import kolmogn, kolmognp, kolmogni
30from ._constants import (_XMIN, _EULER, _ZETA3, _SQRT_PI,
31 _SQRT_2_OVER_PI, _LOG_SQRT_2_OVER_PI)
32import scipy.stats._boost as _boost
33from scipy.optimize import root_scalar
34from scipy.stats._warnings_errors import FitError
35import scipy.stats as stats
38def _remove_optimizer_parameters(kwds):
39 """
40 Remove the optimizer-related keyword arguments 'loc', 'scale' and
41 'optimizer' from `kwds`. Then check that `kwds` is empty, and
42 raise `TypeError("Unknown arguments: %s." % kwds)` if it is not.
44 This function is used in the fit method of distributions that override
45 the default method and do not use the default optimization code.
47 `kwds` is modified in-place.
48 """
49 kwds.pop('loc', None)
50 kwds.pop('scale', None)
51 kwds.pop('optimizer', None)
52 kwds.pop('method', None)
53 if kwds:
54 raise TypeError("Unknown arguments: %s." % kwds)
57def _call_super_mom(fun):
58 # if fit method is overridden only for MLE and doesn't specify what to do
59 # if method == 'mm', this decorator calls generic implementation
60 @wraps(fun)
61 def wrapper(self, *args, **kwds):
62 method = kwds.get('method', 'mle').lower()
63 if method != 'mle':
64 return super(type(self), self).fit(*args, **kwds)
65 else:
66 return fun(self, *args, **kwds)
67 return wrapper
70def _get_left_bracket(fun, rbrack, lbrack=None):
71 # find left bracket for `root_scalar`. A guess for lbrack may be provided.
72 lbrack = lbrack or rbrack - 1
73 diff = rbrack - lbrack
75 # if there is no sign change in `fun` between the brackets, expand
76 # rbrack - lbrack until a sign change occurs
77 def interval_contains_root(lbrack, rbrack):
78 # return true if the signs disagree.
79 return np.sign(fun(lbrack)) != np.sign(fun(rbrack))
81 while not interval_contains_root(lbrack, rbrack):
82 diff *= 2
83 lbrack = rbrack - diff
85 msg = ("The solver could not find a bracket containing a "
86 "root to an MLE first order condition.")
87 if np.isinf(lbrack):
88 raise FitSolverError(msg)
90 return lbrack
93class ksone_gen(rv_continuous):
94 r"""Kolmogorov-Smirnov one-sided test statistic distribution.
96 This is the distribution of the one-sided Kolmogorov-Smirnov (KS)
97 statistics :math:`D_n^+` and :math:`D_n^-`
98 for a finite sample size ``n >= 1`` (the shape parameter).
100 %(before_notes)s
102 See Also
103 --------
104 kstwobign, kstwo, kstest
106 Notes
107 -----
108 :math:`D_n^+` and :math:`D_n^-` are given by
110 .. math::
112 D_n^+ &= \text{sup}_x (F_n(x) - F(x)),\\
113 D_n^- &= \text{sup}_x (F(x) - F_n(x)),\\
115 where :math:`F` is a continuous CDF and :math:`F_n` is an empirical CDF.
116 `ksone` describes the distribution under the null hypothesis of the KS test
117 that the empirical CDF corresponds to :math:`n` i.i.d. random variates
118 with CDF :math:`F`.
120 %(after_notes)s
122 References
123 ----------
124 .. [1] Birnbaum, Z. W. and Tingey, F.H. "One-sided confidence contours
125 for probability distribution functions", The Annals of Mathematical
126 Statistics, 22(4), pp 592-596 (1951).
128 %(example)s
130 """
131 def _argcheck(self, n):
132 return (n >= 1) & (n == np.round(n))
134 def _shape_info(self):
135 return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
137 def _pdf(self, x, n):
138 return -scu._smirnovp(n, x)
140 def _cdf(self, x, n):
141 return scu._smirnovc(n, x)
143 def _sf(self, x, n):
144 return sc.smirnov(n, x)
146 def _ppf(self, q, n):
147 return scu._smirnovci(n, q)
149 def _isf(self, q, n):
150 return sc.smirnovi(n, q)
153ksone = ksone_gen(a=0.0, b=1.0, name='ksone')
156class kstwo_gen(rv_continuous):
157 r"""Kolmogorov-Smirnov two-sided test statistic distribution.
159 This is the distribution of the two-sided Kolmogorov-Smirnov (KS)
160 statistic :math:`D_n` for a finite sample size ``n >= 1``
161 (the shape parameter).
163 %(before_notes)s
165 See Also
166 --------
167 kstwobign, ksone, kstest
169 Notes
170 -----
171 :math:`D_n` is given by
173 .. math::
175 D_n = \text{sup}_x |F_n(x) - F(x)|
177 where :math:`F` is a (continuous) CDF and :math:`F_n` is an empirical CDF.
178 `kstwo` describes the distribution under the null hypothesis of the KS test
179 that the empirical CDF corresponds to :math:`n` i.i.d. random variates
180 with CDF :math:`F`.
182 %(after_notes)s
184 References
185 ----------
186 .. [1] Simard, R., L'Ecuyer, P. "Computing the Two-Sided
187 Kolmogorov-Smirnov Distribution", Journal of Statistical Software,
188 Vol 39, 11, 1-18 (2011).
190 %(example)s
192 """
193 def _argcheck(self, n):
194 return (n >= 1) & (n == np.round(n))
196 def _shape_info(self):
197 return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
199 def _get_support(self, n):
200 return (0.5/(n if not isinstance(n, Iterable) else np.asanyarray(n)),
201 1.0)
203 def _pdf(self, x, n):
204 return kolmognp(n, x)
206 def _cdf(self, x, n):
207 return kolmogn(n, x)
209 def _sf(self, x, n):
210 return kolmogn(n, x, cdf=False)
212 def _ppf(self, q, n):
213 return kolmogni(n, q, cdf=True)
215 def _isf(self, q, n):
216 return kolmogni(n, q, cdf=False)
219# Use the pdf, (not the ppf) to compute moments
220kstwo = kstwo_gen(momtype=0, a=0.0, b=1.0, name='kstwo')
223class kstwobign_gen(rv_continuous):
224 r"""Limiting distribution of scaled Kolmogorov-Smirnov two-sided test statistic.
226 This is the asymptotic distribution of the two-sided Kolmogorov-Smirnov
227 statistic :math:`\sqrt{n} D_n` that measures the maximum absolute
228 distance of the theoretical (continuous) CDF from the empirical CDF.
229 (see `kstest`).
231 %(before_notes)s
233 See Also
234 --------
235 ksone, kstwo, kstest
237 Notes
238 -----
239 :math:`\sqrt{n} D_n` is given by
241 .. math::
243 D_n = \text{sup}_x |F_n(x) - F(x)|
245 where :math:`F` is a continuous CDF and :math:`F_n` is an empirical CDF.
246 `kstwobign` describes the asymptotic distribution (i.e. the limit of
247 :math:`\sqrt{n} D_n`) under the null hypothesis of the KS test that the
248 empirical CDF corresponds to i.i.d. random variates with CDF :math:`F`.
250 %(after_notes)s
252 References
253 ----------
254 .. [1] Feller, W. "On the Kolmogorov-Smirnov Limit Theorems for Empirical
255 Distributions", Ann. Math. Statist. Vol 19, 177-189 (1948).
257 %(example)s
259 """
260 def _shape_info(self):
261 return []
263 def _pdf(self, x):
264 return -scu._kolmogp(x)
266 def _cdf(self, x):
267 return scu._kolmogc(x)
269 def _sf(self, x):
270 return sc.kolmogorov(x)
272 def _ppf(self, q):
273 return scu._kolmogci(q)
275 def _isf(self, q):
276 return sc.kolmogi(q)
279kstwobign = kstwobign_gen(a=0.0, name='kstwobign')
282## Normal distribution
284# loc = mu, scale = std
285# Keep these implementations out of the class definition so they can be reused
286# by other distributions.
287_norm_pdf_C = np.sqrt(2*np.pi)
288_norm_pdf_logC = np.log(_norm_pdf_C)
291def _norm_pdf(x):
292 return np.exp(-x**2/2.0) / _norm_pdf_C
295def _norm_logpdf(x):
296 return -x**2 / 2.0 - _norm_pdf_logC
299def _norm_cdf(x):
300 return sc.ndtr(x)
303def _norm_logcdf(x):
304 return sc.log_ndtr(x)
307def _norm_ppf(q):
308 return sc.ndtri(q)
311def _norm_sf(x):
312 return _norm_cdf(-x)
315def _norm_logsf(x):
316 return _norm_logcdf(-x)
319def _norm_isf(q):
320 return -_norm_ppf(q)
323class norm_gen(rv_continuous):
324 r"""A normal continuous random variable.
326 The location (``loc``) keyword specifies the mean.
327 The scale (``scale``) keyword specifies the standard deviation.
329 %(before_notes)s
331 Notes
332 -----
333 The probability density function for `norm` is:
335 .. math::
337 f(x) = \frac{\exp(-x^2/2)}{\sqrt{2\pi}}
339 for a real number :math:`x`.
341 %(after_notes)s
343 %(example)s
345 """
346 def _shape_info(self):
347 return []
349 def _rvs(self, size=None, random_state=None):
350 return random_state.standard_normal(size)
352 def _pdf(self, x):
353 # norm.pdf(x) = exp(-x**2/2)/sqrt(2*pi)
354 return _norm_pdf(x)
356 def _logpdf(self, x):
357 return _norm_logpdf(x)
359 def _cdf(self, x):
360 return _norm_cdf(x)
362 def _logcdf(self, x):
363 return _norm_logcdf(x)
365 def _sf(self, x):
366 return _norm_sf(x)
368 def _logsf(self, x):
369 return _norm_logsf(x)
371 def _ppf(self, q):
372 return _norm_ppf(q)
374 def _isf(self, q):
375 return _norm_isf(q)
377 def _stats(self):
378 return 0.0, 1.0, 0.0, 0.0
380 def _entropy(self):
381 return 0.5*(np.log(2*np.pi)+1)
383 @_call_super_mom
384 @replace_notes_in_docstring(rv_continuous, notes="""\
385 For the normal distribution, method of moments and maximum likelihood
386 estimation give identical fits, and explicit formulas for the estimates
387 are available.
388 This function uses these explicit formulas for the maximum likelihood
389 estimation of the normal distribution parameters, so the
390 `optimizer` and `method` arguments are ignored.\n\n""")
391 def fit(self, data, **kwds):
393 floc = kwds.pop('floc', None)
394 fscale = kwds.pop('fscale', None)
396 _remove_optimizer_parameters(kwds)
398 if floc is not None and fscale is not None:
399 # This check is for consistency with `rv_continuous.fit`.
400 # Without this check, this function would just return the
401 # parameters that were given.
402 raise ValueError("All parameters fixed. There is nothing to "
403 "optimize.")
405 data = np.asarray(data)
407 if not np.isfinite(data).all():
408 raise ValueError("The data contains non-finite values.")
410 if floc is None:
411 loc = data.mean()
412 else:
413 loc = floc
415 if fscale is None:
416 scale = np.sqrt(((data - loc)**2).mean())
417 else:
418 scale = fscale
420 return loc, scale
422 def _munp(self, n):
423 """
424 @returns Moments of standard normal distribution for integer n >= 0
426 See eq. 16 of https://arxiv.org/abs/1209.4340v2
427 """
428 if n % 2 == 0:
429 return sc.factorial2(n - 1)
430 else:
431 return 0.
434norm = norm_gen(name='norm')
437class alpha_gen(rv_continuous):
438 r"""An alpha continuous random variable.
440 %(before_notes)s
442 Notes
443 -----
444 The probability density function for `alpha` ([1]_, [2]_) is:
446 .. math::
448 f(x, a) = \frac{1}{x^2 \Phi(a) \sqrt{2\pi}} *
449 \exp(-\frac{1}{2} (a-1/x)^2)
451 where :math:`\Phi` is the normal CDF, :math:`x > 0`, and :math:`a > 0`.
453 `alpha` takes ``a`` as a shape parameter.
455 %(after_notes)s
457 References
458 ----------
459 .. [1] Johnson, Kotz, and Balakrishnan, "Continuous Univariate
460 Distributions, Volume 1", Second Edition, John Wiley and Sons,
461 p. 173 (1994).
462 .. [2] Anthony A. Salvia, "Reliability applications of the Alpha
463 Distribution", IEEE Transactions on Reliability, Vol. R-34,
464 No. 3, pp. 251-252 (1985).
466 %(example)s
468 """
469 _support_mask = rv_continuous._open_support_mask
471 def _shape_info(self):
472 return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
474 def _pdf(self, x, a):
475 # alpha.pdf(x, a) = 1/(x**2*Phi(a)*sqrt(2*pi)) * exp(-1/2 * (a-1/x)**2)
476 return 1.0/(x**2)/_norm_cdf(a)*_norm_pdf(a-1.0/x)
478 def _logpdf(self, x, a):
479 return -2*np.log(x) + _norm_logpdf(a-1.0/x) - np.log(_norm_cdf(a))
481 def _cdf(self, x, a):
482 return _norm_cdf(a-1.0/x) / _norm_cdf(a)
484 def _ppf(self, q, a):
485 return 1.0/np.asarray(a-sc.ndtri(q*_norm_cdf(a)))
487 def _stats(self, a):
488 return [np.inf]*2 + [np.nan]*2
491alpha = alpha_gen(a=0.0, name='alpha')
494class anglit_gen(rv_continuous):
495 r"""An anglit continuous random variable.
497 %(before_notes)s
499 Notes
500 -----
501 The probability density function for `anglit` is:
503 .. math::
505 f(x) = \sin(2x + \pi/2) = \cos(2x)
507 for :math:`-\pi/4 \le x \le \pi/4`.
509 %(after_notes)s
511 %(example)s
513 """
514 def _shape_info(self):
515 return []
517 def _pdf(self, x):
518 # anglit.pdf(x) = sin(2*x + \pi/2) = cos(2*x)
519 return np.cos(2*x)
521 def _cdf(self, x):
522 return np.sin(x+np.pi/4)**2.0
524 def _ppf(self, q):
525 return np.arcsin(np.sqrt(q))-np.pi/4
527 def _stats(self):
528 return 0.0, np.pi*np.pi/16-0.5, 0.0, -2*(np.pi**4 - 96)/(np.pi*np.pi-8)**2
530 def _entropy(self):
531 return 1-np.log(2)
534anglit = anglit_gen(a=-np.pi/4, b=np.pi/4, name='anglit')
537class arcsine_gen(rv_continuous):
538 r"""An arcsine continuous random variable.
540 %(before_notes)s
542 Notes
543 -----
544 The probability density function for `arcsine` is:
546 .. math::
548 f(x) = \frac{1}{\pi \sqrt{x (1-x)}}
550 for :math:`0 < x < 1`.
552 %(after_notes)s
554 %(example)s
556 """
557 def _shape_info(self):
558 return []
560 def _pdf(self, x):
561 # arcsine.pdf(x) = 1/(pi*sqrt(x*(1-x)))
562 with np.errstate(divide='ignore'):
563 return 1.0/np.pi/np.sqrt(x*(1-x))
565 def _cdf(self, x):
566 return 2.0/np.pi*np.arcsin(np.sqrt(x))
568 def _ppf(self, q):
569 return np.sin(np.pi/2.0*q)**2.0
571 def _stats(self):
572 mu = 0.5
573 mu2 = 1.0/8
574 g1 = 0
575 g2 = -3.0/2.0
576 return mu, mu2, g1, g2
578 def _entropy(self):
579 return -0.24156447527049044468
582arcsine = arcsine_gen(a=0.0, b=1.0, name='arcsine')
585class FitDataError(ValueError):
586 """Raised when input data is inconsistent with fixed parameters."""
587 # This exception is raised by, for example, beta_gen.fit when both floc
588 # and fscale are fixed and there are values in the data not in the open
589 # interval (floc, floc+fscale).
590 def __init__(self, distr, lower, upper):
591 self.args = (
592 "Invalid values in `data`. Maximum likelihood "
593 "estimation with {distr!r} requires that {lower!r} < "
594 "(x - loc)/scale < {upper!r} for each x in `data`.".format(
595 distr=distr, lower=lower, upper=upper),
596 )
599class FitSolverError(FitError):
600 """
601 Raised when a solver fails to converge while fitting a distribution.
602 """
603 # This exception is raised by, for example, beta_gen.fit when
604 # optimize.fsolve returns with ier != 1.
605 def __init__(self, mesg):
606 emsg = "Solver for the MLE equations failed to converge: "
607 emsg += mesg.replace('\n', '')
608 self.args = (emsg,)
611def _beta_mle_a(a, b, n, s1):
612 # The zeros of this function give the MLE for `a`, with
613 # `b`, `n` and `s1` given. `s1` is the sum of the logs of
614 # the data. `n` is the number of data points.
615 psiab = sc.psi(a + b)
616 func = s1 - n * (-psiab + sc.psi(a))
617 return func
620def _beta_mle_ab(theta, n, s1, s2):
621 # Zeros of this function are critical points of
622 # the maximum likelihood function. Solving this system
623 # for theta (which contains a and b) gives the MLE for a and b
624 # given `n`, `s1` and `s2`. `s1` is the sum of the logs of the data,
625 # and `s2` is the sum of the logs of 1 - data. `n` is the number
626 # of data points.
627 a, b = theta
628 psiab = sc.psi(a + b)
629 func = [s1 - n * (-psiab + sc.psi(a)),
630 s2 - n * (-psiab + sc.psi(b))]
631 return func
634class beta_gen(rv_continuous):
635 r"""A beta continuous random variable.
637 %(before_notes)s
639 Notes
640 -----
641 The probability density function for `beta` is:
643 .. math::
645 f(x, a, b) = \frac{\Gamma(a+b) x^{a-1} (1-x)^{b-1}}
646 {\Gamma(a) \Gamma(b)}
648 for :math:`0 <= x <= 1`, :math:`a > 0`, :math:`b > 0`, where
649 :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
651 `beta` takes :math:`a` and :math:`b` as shape parameters.
653 %(after_notes)s
655 %(example)s
657 """
658 def _shape_info(self):
659 ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
660 ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
661 return [ia, ib]
663 def _rvs(self, a, b, size=None, random_state=None):
664 return random_state.beta(a, b, size)
666 def _pdf(self, x, a, b):
667 # gamma(a+b) * x**(a-1) * (1-x)**(b-1)
668 # beta.pdf(x, a, b) = ------------------------------------
669 # gamma(a)*gamma(b)
670 return _boost._beta_pdf(x, a, b)
672 def _logpdf(self, x, a, b):
673 lPx = sc.xlog1py(b - 1.0, -x) + sc.xlogy(a - 1.0, x)
674 lPx -= sc.betaln(a, b)
675 return lPx
677 def _cdf(self, x, a, b):
678 return _boost._beta_cdf(x, a, b)
680 def _sf(self, x, a, b):
681 return _boost._beta_sf(x, a, b)
683 def _isf(self, x, a, b):
684 with warnings.catch_warnings():
685 # See gh-14901
686 message = "overflow encountered in _beta_isf"
687 warnings.filterwarnings('ignore', message=message)
688 return _boost._beta_isf(x, a, b)
690 def _ppf(self, q, a, b):
691 with warnings.catch_warnings():
692 message = "overflow encountered in _beta_ppf"
693 warnings.filterwarnings('ignore', message=message)
694 return _boost._beta_ppf(q, a, b)
696 def _stats(self, a, b):
697 return (
698 _boost._beta_mean(a, b),
699 _boost._beta_variance(a, b),
700 _boost._beta_skewness(a, b),
701 _boost._beta_kurtosis_excess(a, b))
703 def _fitstart(self, data):
704 g1 = _skew(data)
705 g2 = _kurtosis(data)
707 def func(x):
708 a, b = x
709 sk = 2*(b-a)*np.sqrt(a + b + 1) / (a + b + 2) / np.sqrt(a*b)
710 ku = a**3 - a**2*(2*b-1) + b**2*(b+1) - 2*a*b*(b+2)
711 ku /= a*b*(a+b+2)*(a+b+3)
712 ku *= 6
713 return [sk-g1, ku-g2]
714 a, b = optimize.fsolve(func, (1.0, 1.0))
715 return super()._fitstart(data, args=(a, b))
717 @_call_super_mom
718 @extend_notes_in_docstring(rv_continuous, notes="""\
719 In the special case where `method="MLE"` and
720 both `floc` and `fscale` are given, a
721 `ValueError` is raised if any value `x` in `data` does not satisfy
722 `floc < x < floc + fscale`.\n\n""")
723 def fit(self, data, *args, **kwds):
724 # Override rv_continuous.fit, so we can more efficiently handle the
725 # case where floc and fscale are given.
727 floc = kwds.get('floc', None)
728 fscale = kwds.get('fscale', None)
730 if floc is None or fscale is None:
731 # do general fit
732 return super().fit(data, *args, **kwds)
734 # We already got these from kwds, so just pop them.
735 kwds.pop('floc', None)
736 kwds.pop('fscale', None)
738 f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
739 f1 = _get_fixed_fit_value(kwds, ['f1', 'fb', 'fix_b'])
741 _remove_optimizer_parameters(kwds)
743 if f0 is not None and f1 is not None:
744 # This check is for consistency with `rv_continuous.fit`.
745 raise ValueError("All parameters fixed. There is nothing to "
746 "optimize.")
748 # Special case: loc and scale are constrained, so we are fitting
749 # just the shape parameters. This can be done much more efficiently
750 # than the method used in `rv_continuous.fit`. (See the subsection
751 # "Two unknown parameters" in the section "Maximum likelihood" of
752 # the Wikipedia article on the Beta distribution for the formulas.)
754 if not np.isfinite(data).all():
755 raise ValueError("The data contains non-finite values.")
757 # Normalize the data to the interval [0, 1].
758 data = (np.ravel(data) - floc) / fscale
759 if np.any(data <= 0) or np.any(data >= 1):
760 raise FitDataError("beta", lower=floc, upper=floc + fscale)
762 xbar = data.mean()
764 if f0 is not None or f1 is not None:
765 # One of the shape parameters is fixed.
767 if f0 is not None:
768 # The shape parameter a is fixed, so swap the parameters
769 # and flip the data. We always solve for `a`. The result
770 # will be swapped back before returning.
771 b = f0
772 data = 1 - data
773 xbar = 1 - xbar
774 else:
775 b = f1
777 # Initial guess for a. Use the formula for the mean of the beta
778 # distribution, E[x] = a / (a + b), to generate a reasonable
779 # starting point based on the mean of the data and the given
780 # value of b.
781 a = b * xbar / (1 - xbar)
783 # Compute the MLE for `a` by solving _beta_mle_a.
784 theta, info, ier, mesg = optimize.fsolve(
785 _beta_mle_a, a,
786 args=(b, len(data), np.log(data).sum()),
787 full_output=True
788 )
789 if ier != 1:
790 raise FitSolverError(mesg=mesg)
791 a = theta[0]
793 if f0 is not None:
794 # The shape parameter a was fixed, so swap back the
795 # parameters.
796 a, b = b, a
798 else:
799 # Neither of the shape parameters is fixed.
801 # s1 and s2 are used in the extra arguments passed to _beta_mle_ab
802 # by optimize.fsolve.
803 s1 = np.log(data).sum()
804 s2 = sc.log1p(-data).sum()
806 # Use the "method of moments" to estimate the initial
807 # guess for a and b.
808 fac = xbar * (1 - xbar) / data.var(ddof=0) - 1
809 a = xbar * fac
810 b = (1 - xbar) * fac
812 # Compute the MLE for a and b by solving _beta_mle_ab.
813 theta, info, ier, mesg = optimize.fsolve(
814 _beta_mle_ab, [a, b],
815 args=(len(data), s1, s2),
816 full_output=True
817 )
818 if ier != 1:
819 raise FitSolverError(mesg=mesg)
820 a, b = theta
822 return a, b, floc, fscale
825beta = beta_gen(a=0.0, b=1.0, name='beta')
828class betaprime_gen(rv_continuous):
829 r"""A beta prime continuous random variable.
831 %(before_notes)s
833 Notes
834 -----
835 The probability density function for `betaprime` is:
837 .. math::
839 f(x, a, b) = \frac{x^{a-1} (1+x)^{-a-b}}{\beta(a, b)}
841 for :math:`x >= 0`, :math:`a > 0`, :math:`b > 0`, where
842 :math:`\beta(a, b)` is the beta function (see `scipy.special.beta`).
844 `betaprime` takes ``a`` and ``b`` as shape parameters.
846 %(after_notes)s
848 %(example)s
850 """
851 _support_mask = rv_continuous._open_support_mask
853 def _shape_info(self):
854 ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
855 ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
856 return [ia, ib]
858 def _rvs(self, a, b, size=None, random_state=None):
859 u1 = gamma.rvs(a, size=size, random_state=random_state)
860 u2 = gamma.rvs(b, size=size, random_state=random_state)
861 return u1 / u2
863 def _pdf(self, x, a, b):
864 # betaprime.pdf(x, a, b) = x**(a-1) * (1+x)**(-a-b) / beta(a, b)
865 return np.exp(self._logpdf(x, a, b))
867 def _logpdf(self, x, a, b):
868 return sc.xlogy(a - 1.0, x) - sc.xlog1py(a + b, x) - sc.betaln(a, b)
870 def _cdf(self, x, a, b):
871 return sc.betainc(a, b, x/(1.+x))
873 def _munp(self, n, a, b):
874 if n == 1.0:
875 return np.where(b > 1,
876 a/(b-1.0),
877 np.inf)
878 elif n == 2.0:
879 return np.where(b > 2,
880 a*(a+1.0)/((b-2.0)*(b-1.0)),
881 np.inf)
882 elif n == 3.0:
883 return np.where(b > 3,
884 a*(a+1.0)*(a+2.0)/((b-3.0)*(b-2.0)*(b-1.0)),
885 np.inf)
886 elif n == 4.0:
887 return np.where(b > 4,
888 (a*(a + 1.0)*(a + 2.0)*(a + 3.0) /
889 ((b - 4.0)*(b - 3.0)*(b - 2.0)*(b - 1.0))),
890 np.inf)
891 else:
892 raise NotImplementedError
895betaprime = betaprime_gen(a=0.0, name='betaprime')
898class bradford_gen(rv_continuous):
899 r"""A Bradford continuous random variable.
901 %(before_notes)s
903 Notes
904 -----
905 The probability density function for `bradford` is:
907 .. math::
909 f(x, c) = \frac{c}{\log(1+c) (1+cx)}
911 for :math:`0 <= x <= 1` and :math:`c > 0`.
913 `bradford` takes ``c`` as a shape parameter for :math:`c`.
915 %(after_notes)s
917 %(example)s
919 """
920 def _shape_info(self):
921 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
923 def _pdf(self, x, c):
924 # bradford.pdf(x, c) = c / (k * (1+c*x))
925 return c / (c*x + 1.0) / sc.log1p(c)
927 def _cdf(self, x, c):
928 return sc.log1p(c*x) / sc.log1p(c)
930 def _ppf(self, q, c):
931 return sc.expm1(q * sc.log1p(c)) / c
933 def _stats(self, c, moments='mv'):
934 k = np.log(1.0+c)
935 mu = (c-k)/(c*k)
936 mu2 = ((c+2.0)*k-2.0*c)/(2*c*k*k)
937 g1 = None
938 g2 = None
939 if 's' in moments:
940 g1 = np.sqrt(2)*(12*c*c-9*c*k*(c+2)+2*k*k*(c*(c+3)+3))
941 g1 /= np.sqrt(c*(c*(k-2)+2*k))*(3*c*(k-2)+6*k)
942 if 'k' in moments:
943 g2 = (c**3*(k-3)*(k*(3*k-16)+24)+12*k*c*c*(k-4)*(k-3) +
944 6*c*k*k*(3*k-14) + 12*k**3)
945 g2 /= 3*c*(c*(k-2)+2*k)**2
946 return mu, mu2, g1, g2
948 def _entropy(self, c):
949 k = np.log(1+c)
950 return k/2.0 - np.log(c/k)
953bradford = bradford_gen(a=0.0, b=1.0, name='bradford')
956class burr_gen(rv_continuous):
957 r"""A Burr (Type III) continuous random variable.
959 %(before_notes)s
961 See Also
962 --------
963 fisk : a special case of either `burr` or `burr12` with ``d=1``
964 burr12 : Burr Type XII distribution
965 mielke : Mielke Beta-Kappa / Dagum distribution
967 Notes
968 -----
969 The probability density function for `burr` is:
971 .. math::
973 f(x; c, d) = c d \frac{x^{-c - 1}}
974 {{(1 + x^{-c})}^{d + 1}}
976 for :math:`x >= 0` and :math:`c, d > 0`.
978 `burr` takes ``c`` and ``d`` as shape parameters for :math:`c` and
979 :math:`d`.
981 This is the PDF corresponding to the third CDF given in Burr's list;
982 specifically, it is equation (11) in Burr's paper [1]_. The distribution
983 is also commonly referred to as the Dagum distribution [2]_. If the
984 parameter :math:`c < 1` then the mean of the distribution does not
985 exist and if :math:`c < 2` the variance does not exist [2]_.
986 The PDF is finite at the left endpoint :math:`x = 0` if :math:`c * d >= 1`.
988 %(after_notes)s
990 References
991 ----------
992 .. [1] Burr, I. W. "Cumulative frequency functions", Annals of
993 Mathematical Statistics, 13(2), pp 215-232 (1942).
994 .. [2] https://en.wikipedia.org/wiki/Dagum_distribution
995 .. [3] Kleiber, Christian. "A guide to the Dagum distributions."
996 Modeling Income Distributions and Lorenz Curves pp 97-117 (2008).
998 %(example)s
1000 """
1001 # Do not set _support_mask to rv_continuous._open_support_mask
1002 # Whether the left-hand endpoint is suitable for pdf evaluation is dependent
1003 # on the values of c and d: if c*d >= 1, the pdf is finite, otherwise infinite.
1005 def _shape_info(self):
1006 ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
1007 id = _ShapeInfo("d", False, (0, np.inf), (False, False))
1008 return [ic, id]
1010 def _pdf(self, x, c, d):
1011 # burr.pdf(x, c, d) = c * d * x**(-c-1) * (1+x**(-c))**(-d-1)
1012 output = _lazywhere(x == 0, [x, c, d],
1013 lambda x_, c_, d_: c_ * d_ * (x_**(c_*d_-1)) / (1 + x_**c_),
1014 f2 = lambda x_, c_, d_: (c_ * d_ * (x_ ** (-c_ - 1.0)) /
1015 ((1 + x_ ** (-c_)) ** (d_ + 1.0))))
1016 if output.ndim == 0:
1017 return output[()]
1018 return output
1020 def _logpdf(self, x, c, d):
1021 output = _lazywhere(
1022 x == 0, [x, c, d],
1023 lambda x_, c_, d_: (np.log(c_) + np.log(d_) + sc.xlogy(c_*d_ - 1, x_)
1024 - (d_+1) * sc.log1p(x_**(c_))),
1025 f2 = lambda x_, c_, d_: (np.log(c_) + np.log(d_)
1026 + sc.xlogy(-c_ - 1, x_)
1027 - sc.xlog1py(d_+1, x_**(-c_))))
1028 if output.ndim == 0:
1029 return output[()]
1030 return output
1032 def _cdf(self, x, c, d):
1033 return (1 + x**(-c))**(-d)
1035 def _logcdf(self, x, c, d):
1036 return sc.log1p(x**(-c)) * (-d)
1038 def _sf(self, x, c, d):
1039 return np.exp(self._logsf(x, c, d))
1041 def _logsf(self, x, c, d):
1042 return np.log1p(- (1 + x**(-c))**(-d))
1044 def _ppf(self, q, c, d):
1045 return (q**(-1.0/d) - 1)**(-1.0/c)
1047 def _stats(self, c, d):
1048 nc = np.arange(1, 5).reshape(4,1) / c
1049 #ek is the kth raw moment, e1 is the mean e2-e1**2 variance etc.
1050 e1, e2, e3, e4 = sc.beta(d + nc, 1. - nc) * d
1051 mu = np.where(c > 1.0, e1, np.nan)
1052 mu2_if_c = e2 - mu**2
1053 mu2 = np.where(c > 2.0, mu2_if_c, np.nan)
1054 g1 = _lazywhere(
1055 c > 3.0,
1056 (c, e1, e2, e3, mu2_if_c),
1057 lambda c, e1, e2, e3, mu2_if_c: (e3 - 3*e2*e1 + 2*e1**3) / np.sqrt((mu2_if_c)**3),
1058 fillvalue=np.nan)
1059 g2 = _lazywhere(
1060 c > 4.0,
1061 (c, e1, e2, e3, e4, mu2_if_c),
1062 lambda c, e1, e2, e3, e4, mu2_if_c: (
1063 ((e4 - 4*e3*e1 + 6*e2*e1**2 - 3*e1**4) / mu2_if_c**2) - 3),
1064 fillvalue=np.nan)
1065 if np.ndim(c) == 0:
1066 return mu.item(), mu2.item(), g1.item(), g2.item()
1067 return mu, mu2, g1, g2
1069 def _munp(self, n, c, d):
1070 def __munp(n, c, d):
1071 nc = 1. * n / c
1072 return d * sc.beta(1.0 - nc, d + nc)
1073 n, c, d = np.asarray(n), np.asarray(c), np.asarray(d)
1074 return _lazywhere((c > n) & (n == n) & (d == d), (c, d, n),
1075 lambda c, d, n: __munp(n, c, d),
1076 np.nan)
1079burr = burr_gen(a=0.0, name='burr')
1082class burr12_gen(rv_continuous):
1083 r"""A Burr (Type XII) continuous random variable.
1085 %(before_notes)s
1087 See Also
1088 --------
1089 fisk : a special case of either `burr` or `burr12` with ``d=1``
1090 burr : Burr Type III distribution
1092 Notes
1093 -----
1094 The probability density function for `burr12` is:
1096 .. math::
1098 f(x; c, d) = c d \frac{x^{c-1}}
1099 {(1 + x^c)^{d + 1}}
1101 for :math:`x >= 0` and :math:`c, d > 0`.
1103 `burr12` takes ``c`` and ``d`` as shape parameters for :math:`c`
1104 and :math:`d`.
1106 This is the PDF corresponding to the twelfth CDF given in Burr's list;
1107 specifically, it is equation (20) in Burr's paper [1]_.
1109 %(after_notes)s
1111 The Burr type 12 distribution is also sometimes referred to as
1112 the Singh-Maddala distribution from NIST [2]_.
1114 References
1115 ----------
1116 .. [1] Burr, I. W. "Cumulative frequency functions", Annals of
1117 Mathematical Statistics, 13(2), pp 215-232 (1942).
1119 .. [2] https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/b12pdf.htm
1121 .. [3] "Burr distribution",
1122 https://en.wikipedia.org/wiki/Burr_distribution
1124 %(example)s
1126 """
1127 def _shape_info(self):
1128 ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
1129 id = _ShapeInfo("d", False, (0, np.inf), (False, False))
1130 return [ic, id]
1132 def _pdf(self, x, c, d):
1133 # burr12.pdf(x, c, d) = c * d * x**(c-1) * (1+x**(c))**(-d-1)
1134 return np.exp(self._logpdf(x, c, d))
1136 def _logpdf(self, x, c, d):
1137 return np.log(c) + np.log(d) + sc.xlogy(c - 1, x) + sc.xlog1py(-d-1, x**c)
1139 def _cdf(self, x, c, d):
1140 return -sc.expm1(self._logsf(x, c, d))
1142 def _logcdf(self, x, c, d):
1143 return sc.log1p(-(1 + x**c)**(-d))
1145 def _sf(self, x, c, d):
1146 return np.exp(self._logsf(x, c, d))
1148 def _logsf(self, x, c, d):
1149 return sc.xlog1py(-d, x**c)
1151 def _ppf(self, q, c, d):
1152 # The following is an implementation of
1153 # ((1 - q)**(-1.0/d) - 1)**(1.0/c)
1154 # that does a better job handling small values of q.
1155 return sc.expm1(-1/d * sc.log1p(-q))**(1/c)
1157 def _munp(self, n, c, d):
1158 nc = 1. * n / c
1159 return d * sc.beta(1.0 + nc, d - nc)
1162burr12 = burr12_gen(a=0.0, name='burr12')
1165class fisk_gen(burr_gen):
1166 r"""A Fisk continuous random variable.
1168 The Fisk distribution is also known as the log-logistic distribution.
1170 %(before_notes)s
1172 See Also
1173 --------
1174 burr
1176 Notes
1177 -----
1178 The probability density function for `fisk` is:
1180 .. math::
1182 f(x, c) = \frac{c x^{c-1}}
1183 {(1 + x^c)^2}
1185 for :math:`x >= 0` and :math:`c > 0`.
1187 Please note that the above expression can be transformed into the following
1188 one, which is also commonly used:
1190 .. math::
1192 f(x, c) = \frac{c x^{-c-1}}
1193 {(1 + x^{-c})^2}
1195 `fisk` takes ``c`` as a shape parameter for :math:`c`.
1197 `fisk` is a special case of `burr` or `burr12` with ``d=1``.
1199 %(after_notes)s
1201 %(example)s
1203 """
1204 def _shape_info(self):
1205 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
1207 def _pdf(self, x, c):
1208 # fisk.pdf(x, c) = c * x**(-c-1) * (1 + x**(-c))**(-2)
1209 return burr._pdf(x, c, 1.0)
1211 def _cdf(self, x, c):
1212 return burr._cdf(x, c, 1.0)
1214 def _sf(self, x, c):
1215 return burr._sf(x, c, 1.0)
1217 def _logpdf(self, x, c):
1218 # fisk.pdf(x, c) = c * x**(-c-1) * (1 + x**(-c))**(-2)
1219 return burr._logpdf(x, c, 1.0)
1221 def _logcdf(self, x, c):
1222 return burr._logcdf(x, c, 1.0)
1224 def _logsf(self, x, c):
1225 return burr._logsf(x, c, 1.0)
1227 def _ppf(self, x, c):
1228 return burr._ppf(x, c, 1.0)
1230 def _munp(self, n, c):
1231 return burr._munp(n, c, 1.0)
1233 def _stats(self, c):
1234 return burr._stats(c, 1.0)
1236 def _entropy(self, c):
1237 return 2 - np.log(c)
1240fisk = fisk_gen(a=0.0, name='fisk')
1243class cauchy_gen(rv_continuous):
1244 r"""A Cauchy continuous random variable.
1246 %(before_notes)s
1248 Notes
1249 -----
1250 The probability density function for `cauchy` is
1252 .. math::
1254 f(x) = \frac{1}{\pi (1 + x^2)}
1256 for a real number :math:`x`.
1258 %(after_notes)s
1260 %(example)s
1262 """
1263 def _shape_info(self):
1264 return []
1266 def _pdf(self, x):
1267 # cauchy.pdf(x) = 1 / (pi * (1 + x**2))
1268 return 1.0/np.pi/(1.0+x*x)
1270 def _cdf(self, x):
1271 return 0.5 + 1.0/np.pi*np.arctan(x)
1273 def _ppf(self, q):
1274 return np.tan(np.pi*q-np.pi/2.0)
1276 def _sf(self, x):
1277 return 0.5 - 1.0/np.pi*np.arctan(x)
1279 def _isf(self, q):
1280 return np.tan(np.pi/2.0-np.pi*q)
1282 def _stats(self):
1283 return np.nan, np.nan, np.nan, np.nan
1285 def _entropy(self):
1286 return np.log(4*np.pi)
1288 def _fitstart(self, data, args=None):
1289 # Initialize ML guesses using quartiles instead of moments.
1290 p25, p50, p75 = np.percentile(data, [25, 50, 75])
1291 return p50, (p75 - p25)/2
1294cauchy = cauchy_gen(name='cauchy')
1297class chi_gen(rv_continuous):
1298 r"""A chi continuous random variable.
1300 %(before_notes)s
1302 Notes
1303 -----
1304 The probability density function for `chi` is:
1306 .. math::
1308 f(x, k) = \frac{1}{2^{k/2-1} \Gamma \left( k/2 \right)}
1309 x^{k-1} \exp \left( -x^2/2 \right)
1311 for :math:`x >= 0` and :math:`k > 0` (degrees of freedom, denoted ``df``
1312 in the implementation). :math:`\Gamma` is the gamma function
1313 (`scipy.special.gamma`).
1315 Special cases of `chi` are:
1317 - ``chi(1, loc, scale)`` is equivalent to `halfnorm`
1318 - ``chi(2, 0, scale)`` is equivalent to `rayleigh`
1319 - ``chi(3, 0, scale)`` is equivalent to `maxwell`
1321 `chi` takes ``df`` as a shape parameter.
1323 %(after_notes)s
1325 %(example)s
1327 """
1328 def _shape_info(self):
1329 return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
1331 def _rvs(self, df, size=None, random_state=None):
1332 return np.sqrt(chi2.rvs(df, size=size, random_state=random_state))
1334 def _pdf(self, x, df):
1335 # x**(df-1) * exp(-x**2/2)
1336 # chi.pdf(x, df) = -------------------------
1337 # 2**(df/2-1) * gamma(df/2)
1338 return np.exp(self._logpdf(x, df))
1340 def _logpdf(self, x, df):
1341 l = np.log(2) - .5*np.log(2)*df - sc.gammaln(.5*df)
1342 return l + sc.xlogy(df - 1., x) - .5*x**2
1344 def _cdf(self, x, df):
1345 return sc.gammainc(.5*df, .5*x**2)
1347 def _sf(self, x, df):
1348 return sc.gammaincc(.5*df, .5*x**2)
1350 def _ppf(self, q, df):
1351 return np.sqrt(2*sc.gammaincinv(.5*df, q))
1353 def _isf(self, q, df):
1354 return np.sqrt(2*sc.gammainccinv(.5*df, q))
1356 def _stats(self, df):
1357 mu = np.sqrt(2)*np.exp(sc.gammaln(df/2.0+0.5)-sc.gammaln(df/2.0))
1358 mu2 = df - mu*mu
1359 g1 = (2*mu**3.0 + mu*(1-2*df))/np.asarray(np.power(mu2, 1.5))
1360 g2 = 2*df*(1.0-df)-6*mu**4 + 4*mu**2 * (2*df-1)
1361 g2 /= np.asarray(mu2**2.0)
1362 return mu, mu2, g1, g2
1365chi = chi_gen(a=0.0, name='chi')
1368class chi2_gen(rv_continuous):
1369 r"""A chi-squared continuous random variable.
1371 For the noncentral chi-square distribution, see `ncx2`.
1373 %(before_notes)s
1375 See Also
1376 --------
1377 ncx2
1379 Notes
1380 -----
1381 The probability density function for `chi2` is:
1383 .. math::
1385 f(x, k) = \frac{1}{2^{k/2} \Gamma \left( k/2 \right)}
1386 x^{k/2-1} \exp \left( -x/2 \right)
1388 for :math:`x > 0` and :math:`k > 0` (degrees of freedom, denoted ``df``
1389 in the implementation).
1391 `chi2` takes ``df`` as a shape parameter.
1393 The chi-squared distribution is a special case of the gamma
1394 distribution, with gamma parameters ``a = df/2``, ``loc = 0`` and
1395 ``scale = 2``.
1397 %(after_notes)s
1399 %(example)s
1401 """
1402 def _shape_info(self):
1403 return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
1405 def _rvs(self, df, size=None, random_state=None):
1406 return random_state.chisquare(df, size)
1408 def _pdf(self, x, df):
1409 # chi2.pdf(x, df) = 1 / (2*gamma(df/2)) * (x/2)**(df/2-1) * exp(-x/2)
1410 return np.exp(self._logpdf(x, df))
1412 def _logpdf(self, x, df):
1413 return sc.xlogy(df/2.-1, x) - x/2. - sc.gammaln(df/2.) - (np.log(2)*df)/2.
1415 def _cdf(self, x, df):
1416 return sc.chdtr(df, x)
1418 def _sf(self, x, df):
1419 return sc.chdtrc(df, x)
1421 def _isf(self, p, df):
1422 return sc.chdtri(df, p)
1424 def _ppf(self, p, df):
1425 return 2*sc.gammaincinv(df/2, p)
1427 def _stats(self, df):
1428 mu = df
1429 mu2 = 2*df
1430 g1 = 2*np.sqrt(2.0/df)
1431 g2 = 12.0/df
1432 return mu, mu2, g1, g2
1435chi2 = chi2_gen(a=0.0, name='chi2')
1438class cosine_gen(rv_continuous):
1439 r"""A cosine continuous random variable.
1441 %(before_notes)s
1443 Notes
1444 -----
1445 The cosine distribution is an approximation to the normal distribution.
1446 The probability density function for `cosine` is:
1448 .. math::
1450 f(x) = \frac{1}{2\pi} (1+\cos(x))
1452 for :math:`-\pi \le x \le \pi`.
1454 %(after_notes)s
1456 %(example)s
1458 """
1459 def _shape_info(self):
1460 return []
1462 def _pdf(self, x):
1463 # cosine.pdf(x) = 1/(2*pi) * (1+cos(x))
1464 return 1.0/2/np.pi*(1+np.cos(x))
1466 def _logpdf(self, x):
1467 c = np.cos(x)
1468 return _lazywhere(c != -1, (c,),
1469 lambda c: np.log1p(c) - np.log(2*np.pi),
1470 fillvalue=-np.inf)
1472 def _cdf(self, x):
1473 return scu._cosine_cdf(x)
1475 def _sf(self, x):
1476 return scu._cosine_cdf(-x)
1478 def _ppf(self, p):
1479 return scu._cosine_invcdf(p)
1481 def _isf(self, p):
1482 return -scu._cosine_invcdf(p)
1484 def _stats(self):
1485 return 0.0, np.pi*np.pi/3.0-2.0, 0.0, -6.0*(np.pi**4-90)/(5.0*(np.pi*np.pi-6)**2)
1487 def _entropy(self):
1488 return np.log(4*np.pi)-1.0
1491cosine = cosine_gen(a=-np.pi, b=np.pi, name='cosine')
1494class dgamma_gen(rv_continuous):
1495 r"""A double gamma continuous random variable.
1497 %(before_notes)s
1499 Notes
1500 -----
1501 The probability density function for `dgamma` is:
1503 .. math::
1505 f(x, a) = \frac{1}{2\Gamma(a)} |x|^{a-1} \exp(-|x|)
1507 for a real number :math:`x` and :math:`a > 0`. :math:`\Gamma` is the
1508 gamma function (`scipy.special.gamma`).
1510 `dgamma` takes ``a`` as a shape parameter for :math:`a`.
1512 %(after_notes)s
1514 %(example)s
1516 """
1517 def _shape_info(self):
1518 return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
1520 def _rvs(self, a, size=None, random_state=None):
1521 u = random_state.uniform(size=size)
1522 gm = gamma.rvs(a, size=size, random_state=random_state)
1523 return gm * np.where(u >= 0.5, 1, -1)
1525 def _pdf(self, x, a):
1526 # dgamma.pdf(x, a) = 1 / (2*gamma(a)) * abs(x)**(a-1) * exp(-abs(x))
1527 ax = abs(x)
1528 return 1.0/(2*sc.gamma(a))*ax**(a-1.0) * np.exp(-ax)
1530 def _logpdf(self, x, a):
1531 ax = abs(x)
1532 return sc.xlogy(a - 1.0, ax) - ax - np.log(2) - sc.gammaln(a)
1534 def _cdf(self, x, a):
1535 fac = 0.5*sc.gammainc(a, abs(x))
1536 return np.where(x > 0, 0.5 + fac, 0.5 - fac)
1538 def _sf(self, x, a):
1539 fac = 0.5*sc.gammainc(a, abs(x))
1540 return np.where(x > 0, 0.5-fac, 0.5+fac)
1542 def _ppf(self, q, a):
1543 fac = sc.gammainccinv(a, 1-abs(2*q-1))
1544 return np.where(q > 0.5, fac, -fac)
1546 def _stats(self, a):
1547 mu2 = a*(a+1.0)
1548 return 0.0, mu2, 0.0, (a+2.0)*(a+3.0)/mu2-3.0
1551dgamma = dgamma_gen(name='dgamma')
1554class dweibull_gen(rv_continuous):
1555 r"""A double Weibull continuous random variable.
1557 %(before_notes)s
1559 Notes
1560 -----
1561 The probability density function for `dweibull` is given by
1563 .. math::
1565 f(x, c) = c / 2 |x|^{c-1} \exp(-|x|^c)
1567 for a real number :math:`x` and :math:`c > 0`.
1569 `dweibull` takes ``c`` as a shape parameter for :math:`c`.
1571 %(after_notes)s
1573 %(example)s
1575 """
1576 def _shape_info(self):
1577 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
1579 def _rvs(self, c, size=None, random_state=None):
1580 u = random_state.uniform(size=size)
1581 w = weibull_min.rvs(c, size=size, random_state=random_state)
1582 return w * (np.where(u >= 0.5, 1, -1))
1584 def _pdf(self, x, c):
1585 # dweibull.pdf(x, c) = c / 2 * abs(x)**(c-1) * exp(-abs(x)**c)
1586 ax = abs(x)
1587 Px = c / 2.0 * ax**(c-1.0) * np.exp(-ax**c)
1588 return Px
1590 def _logpdf(self, x, c):
1591 ax = abs(x)
1592 return np.log(c) - np.log(2.0) + sc.xlogy(c - 1.0, ax) - ax**c
1594 def _cdf(self, x, c):
1595 Cx1 = 0.5 * np.exp(-abs(x)**c)
1596 return np.where(x > 0, 1 - Cx1, Cx1)
1598 def _ppf(self, q, c):
1599 fac = 2. * np.where(q <= 0.5, q, 1. - q)
1600 fac = np.power(-np.log(fac), 1.0 / c)
1601 return np.where(q > 0.5, fac, -fac)
1603 def _munp(self, n, c):
1604 return (1 - (n % 2)) * sc.gamma(1.0 + 1.0 * n / c)
1606 # since we know that all odd moments are zeros, return them at once.
1607 # returning Nones from _stats makes the public stats call _munp
1608 # so overall we're saving one or two gamma function evaluations here.
1609 def _stats(self, c):
1610 return 0, None, 0, None
1613dweibull = dweibull_gen(name='dweibull')
1616class expon_gen(rv_continuous):
1617 r"""An exponential continuous random variable.
1619 %(before_notes)s
1621 Notes
1622 -----
1623 The probability density function for `expon` is:
1625 .. math::
1627 f(x) = \exp(-x)
1629 for :math:`x \ge 0`.
1631 %(after_notes)s
1633 A common parameterization for `expon` is in terms of the rate parameter
1634 ``lambda``, such that ``pdf = lambda * exp(-lambda * x)``. This
1635 parameterization corresponds to using ``scale = 1 / lambda``.
1637 The exponential distribution is a special case of the gamma
1638 distributions, with gamma shape parameter ``a = 1``.
1640 %(example)s
1642 """
1643 def _shape_info(self):
1644 return []
1646 def _rvs(self, size=None, random_state=None):
1647 return random_state.standard_exponential(size)
1649 def _pdf(self, x):
1650 # expon.pdf(x) = exp(-x)
1651 return np.exp(-x)
1653 def _logpdf(self, x):
1654 return -x
1656 def _cdf(self, x):
1657 return -sc.expm1(-x)
1659 def _ppf(self, q):
1660 return -sc.log1p(-q)
1662 def _sf(self, x):
1663 return np.exp(-x)
1665 def _logsf(self, x):
1666 return -x
1668 def _isf(self, q):
1669 return -np.log(q)
1671 def _stats(self):
1672 return 1.0, 1.0, 2.0, 6.0
1674 def _entropy(self):
1675 return 1.0
1677 @_call_super_mom
1678 @replace_notes_in_docstring(rv_continuous, notes="""\
1679 When `method='MLE'`,
1680 this function uses explicit formulas for the maximum likelihood
1681 estimation of the exponential distribution parameters, so the
1682 `optimizer`, `loc` and `scale` keyword arguments are
1683 ignored.\n\n""")
1684 def fit(self, data, *args, **kwds):
1685 if len(args) > 0:
1686 raise TypeError("Too many arguments.")
1688 floc = kwds.pop('floc', None)
1689 fscale = kwds.pop('fscale', None)
1691 _remove_optimizer_parameters(kwds)
1693 if floc is not None and fscale is not None:
1694 # This check is for consistency with `rv_continuous.fit`.
1695 raise ValueError("All parameters fixed. There is nothing to "
1696 "optimize.")
1698 data = np.asarray(data)
1700 if not np.isfinite(data).all():
1701 raise ValueError("The data contains non-finite values.")
1703 data_min = data.min()
1705 if floc is None:
1706 # ML estimate of the location is the minimum of the data.
1707 loc = data_min
1708 else:
1709 loc = floc
1710 if data_min < loc:
1711 # There are values that are less than the specified loc.
1712 raise FitDataError("expon", lower=floc, upper=np.inf)
1714 if fscale is None:
1715 # ML estimate of the scale is the shifted mean.
1716 scale = data.mean() - loc
1717 else:
1718 scale = fscale
1720 # We expect the return values to be floating point, so ensure it
1721 # by explicitly converting to float.
1722 return float(loc), float(scale)
1725expon = expon_gen(a=0.0, name='expon')
1728class exponnorm_gen(rv_continuous):
1729 r"""An exponentially modified Normal continuous random variable.
1731 Also known as the exponentially modified Gaussian distribution [1]_.
1733 %(before_notes)s
1735 Notes
1736 -----
1737 The probability density function for `exponnorm` is:
1739 .. math::
1741 f(x, K) = \frac{1}{2K} \exp\left(\frac{1}{2 K^2} - x / K \right)
1742 \text{erfc}\left(-\frac{x - 1/K}{\sqrt{2}}\right)
1744 where :math:`x` is a real number and :math:`K > 0`.
1746 It can be thought of as the sum of a standard normal random variable
1747 and an independent exponentially distributed random variable with rate
1748 ``1/K``.
1750 %(after_notes)s
1752 An alternative parameterization of this distribution (for example, in
1753 the Wikpedia article [1]_) involves three parameters, :math:`\mu`,
1754 :math:`\lambda` and :math:`\sigma`.
1756 In the present parameterization this corresponds to having ``loc`` and
1757 ``scale`` equal to :math:`\mu` and :math:`\sigma`, respectively, and
1758 shape parameter :math:`K = 1/(\sigma\lambda)`.
1760 .. versionadded:: 0.16.0
1762 References
1763 ----------
1764 .. [1] Exponentially modified Gaussian distribution, Wikipedia,
1765 https://en.wikipedia.org/wiki/Exponentially_modified_Gaussian_distribution
1767 %(example)s
1769 """
1770 def _shape_info(self):
1771 return [_ShapeInfo("K", False, (0, np.inf), (False, False))]
1773 def _rvs(self, K, size=None, random_state=None):
1774 expval = random_state.standard_exponential(size) * K
1775 gval = random_state.standard_normal(size)
1776 return expval + gval
1778 def _pdf(self, x, K):
1779 return np.exp(self._logpdf(x, K))
1781 def _logpdf(self, x, K):
1782 invK = 1.0 / K
1783 exparg = invK * (0.5 * invK - x)
1784 return exparg + _norm_logcdf(x - invK) - np.log(K)
1786 def _cdf(self, x, K):
1787 invK = 1.0 / K
1788 expval = invK * (0.5 * invK - x)
1789 logprod = expval + _norm_logcdf(x - invK)
1790 return _norm_cdf(x) - np.exp(logprod)
1792 def _sf(self, x, K):
1793 invK = 1.0 / K
1794 expval = invK * (0.5 * invK - x)
1795 logprod = expval + _norm_logcdf(x - invK)
1796 return _norm_cdf(-x) + np.exp(logprod)
1798 def _stats(self, K):
1799 K2 = K * K
1800 opK2 = 1.0 + K2
1801 skw = 2 * K**3 * opK2**(-1.5)
1802 krt = 6.0 * K2 * K2 * opK2**(-2)
1803 return K, opK2, skw, krt
1806exponnorm = exponnorm_gen(name='exponnorm')
1809class exponweib_gen(rv_continuous):
1810 r"""An exponentiated Weibull continuous random variable.
1812 %(before_notes)s
1814 See Also
1815 --------
1816 weibull_min, numpy.random.Generator.weibull
1818 Notes
1819 -----
1820 The probability density function for `exponweib` is:
1822 .. math::
1824 f(x, a, c) = a c [1-\exp(-x^c)]^{a-1} \exp(-x^c) x^{c-1}
1826 and its cumulative distribution function is:
1828 .. math::
1830 F(x, a, c) = [1-\exp(-x^c)]^a
1832 for :math:`x > 0`, :math:`a > 0`, :math:`c > 0`.
1834 `exponweib` takes :math:`a` and :math:`c` as shape parameters:
1836 * :math:`a` is the exponentiation parameter,
1837 with the special case :math:`a=1` corresponding to the
1838 (non-exponentiated) Weibull distribution `weibull_min`.
1839 * :math:`c` is the shape parameter of the non-exponentiated Weibull law.
1841 %(after_notes)s
1843 References
1844 ----------
1845 https://en.wikipedia.org/wiki/Exponentiated_Weibull_distribution
1847 %(example)s
1849 """
1850 def _shape_info(self):
1851 ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
1852 ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
1853 return [ia, ic]
1855 def _pdf(self, x, a, c):
1856 # exponweib.pdf(x, a, c) =
1857 # a * c * (1-exp(-x**c))**(a-1) * exp(-x**c)*x**(c-1)
1858 return np.exp(self._logpdf(x, a, c))
1860 def _logpdf(self, x, a, c):
1861 negxc = -x**c
1862 exm1c = -sc.expm1(negxc)
1863 logp = (np.log(a) + np.log(c) + sc.xlogy(a - 1.0, exm1c) +
1864 negxc + sc.xlogy(c - 1.0, x))
1865 return logp
1867 def _cdf(self, x, a, c):
1868 exm1c = -sc.expm1(-x**c)
1869 return exm1c**a
1871 def _ppf(self, q, a, c):
1872 return (-sc.log1p(-q**(1.0/a)))**np.asarray(1.0/c)
1875exponweib = exponweib_gen(a=0.0, name='exponweib')
1878class exponpow_gen(rv_continuous):
1879 r"""An exponential power continuous random variable.
1881 %(before_notes)s
1883 Notes
1884 -----
1885 The probability density function for `exponpow` is:
1887 .. math::
1889 f(x, b) = b x^{b-1} \exp(1 + x^b - \exp(x^b))
1891 for :math:`x \ge 0`, :math:`b > 0`. Note that this is a different
1892 distribution from the exponential power distribution that is also known
1893 under the names "generalized normal" or "generalized Gaussian".
1895 `exponpow` takes ``b`` as a shape parameter for :math:`b`.
1897 %(after_notes)s
1899 References
1900 ----------
1901 http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Exponentialpower.pdf
1903 %(example)s
1905 """
1906 def _shape_info(self):
1907 return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
1909 def _pdf(self, x, b):
1910 # exponpow.pdf(x, b) = b * x**(b-1) * exp(1 + x**b - exp(x**b))
1911 return np.exp(self._logpdf(x, b))
1913 def _logpdf(self, x, b):
1914 xb = x**b
1915 f = 1 + np.log(b) + sc.xlogy(b - 1.0, x) + xb - np.exp(xb)
1916 return f
1918 def _cdf(self, x, b):
1919 return -sc.expm1(-sc.expm1(x**b))
1921 def _sf(self, x, b):
1922 return np.exp(-sc.expm1(x**b))
1924 def _isf(self, x, b):
1925 return (sc.log1p(-np.log(x)))**(1./b)
1927 def _ppf(self, q, b):
1928 return pow(sc.log1p(-sc.log1p(-q)), 1.0/b)
1931exponpow = exponpow_gen(a=0.0, name='exponpow')
1934class fatiguelife_gen(rv_continuous):
1935 r"""A fatigue-life (Birnbaum-Saunders) continuous random variable.
1937 %(before_notes)s
1939 Notes
1940 -----
1941 The probability density function for `fatiguelife` is:
1943 .. math::
1945 f(x, c) = \frac{x+1}{2c\sqrt{2\pi x^3}} \exp(-\frac{(x-1)^2}{2x c^2})
1947 for :math:`x >= 0` and :math:`c > 0`.
1949 `fatiguelife` takes ``c`` as a shape parameter for :math:`c`.
1951 %(after_notes)s
1953 References
1954 ----------
1955 .. [1] "Birnbaum-Saunders distribution",
1956 https://en.wikipedia.org/wiki/Birnbaum-Saunders_distribution
1958 %(example)s
1960 """
1961 _support_mask = rv_continuous._open_support_mask
1963 def _shape_info(self):
1964 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
1966 def _rvs(self, c, size=None, random_state=None):
1967 z = random_state.standard_normal(size)
1968 x = 0.5*c*z
1969 x2 = x*x
1970 t = 1.0 + 2*x2 + 2*x*np.sqrt(1 + x2)
1971 return t
1973 def _pdf(self, x, c):
1974 # fatiguelife.pdf(x, c) =
1975 # (x+1) / (2*c*sqrt(2*pi*x**3)) * exp(-(x-1)**2/(2*x*c**2))
1976 return np.exp(self._logpdf(x, c))
1978 def _logpdf(self, x, c):
1979 return (np.log(x+1) - (x-1)**2 / (2.0*x*c**2) - np.log(2*c) -
1980 0.5*(np.log(2*np.pi) + 3*np.log(x)))
1982 def _cdf(self, x, c):
1983 return _norm_cdf(1.0 / c * (np.sqrt(x) - 1.0/np.sqrt(x)))
1985 def _ppf(self, q, c):
1986 tmp = c*sc.ndtri(q)
1987 return 0.25 * (tmp + np.sqrt(tmp**2 + 4))**2
1989 def _sf(self, x, c):
1990 return _norm_sf(1.0 / c * (np.sqrt(x) - 1.0/np.sqrt(x)))
1992 def _isf(self, q, c):
1993 tmp = -c*sc.ndtri(q)
1994 return 0.25 * (tmp + np.sqrt(tmp**2 + 4))**2
1996 def _stats(self, c):
1997 # NB: the formula for kurtosis in wikipedia seems to have an error:
1998 # it's 40, not 41. At least it disagrees with the one from Wolfram
1999 # Alpha. And the latter one, below, passes the tests, while the wiki
2000 # one doesn't So far I didn't have the guts to actually check the
2001 # coefficients from the expressions for the raw moments.
2002 c2 = c*c
2003 mu = c2 / 2.0 + 1.0
2004 den = 5.0 * c2 + 4.0
2005 mu2 = c2*den / 4.0
2006 g1 = 4 * c * (11*c2 + 6.0) / np.power(den, 1.5)
2007 g2 = 6 * c2 * (93*c2 + 40.0) / den**2.0
2008 return mu, mu2, g1, g2
2011fatiguelife = fatiguelife_gen(a=0.0, name='fatiguelife')
2014class foldcauchy_gen(rv_continuous):
2015 r"""A folded Cauchy continuous random variable.
2017 %(before_notes)s
2019 Notes
2020 -----
2021 The probability density function for `foldcauchy` is:
2023 .. math::
2025 f(x, c) = \frac{1}{\pi (1+(x-c)^2)} + \frac{1}{\pi (1+(x+c)^2)}
2027 for :math:`x \ge 0` and :math:`c \ge 0`.
2029 `foldcauchy` takes ``c`` as a shape parameter for :math:`c`.
2031 %(example)s
2033 """
2034 def _argcheck(self, c):
2035 return c >= 0
2037 def _shape_info(self):
2038 return [_ShapeInfo("c", False, (0, np.inf), (True, False))]
2040 def _rvs(self, c, size=None, random_state=None):
2041 return abs(cauchy.rvs(loc=c, size=size,
2042 random_state=random_state))
2044 def _pdf(self, x, c):
2045 # foldcauchy.pdf(x, c) = 1/(pi*(1+(x-c)**2)) + 1/(pi*(1+(x+c)**2))
2046 return 1.0/np.pi*(1.0/(1+(x-c)**2) + 1.0/(1+(x+c)**2))
2048 def _cdf(self, x, c):
2049 return 1.0/np.pi*(np.arctan(x-c) + np.arctan(x+c))
2051 def _stats(self, c):
2052 return np.inf, np.inf, np.nan, np.nan
2055foldcauchy = foldcauchy_gen(a=0.0, name='foldcauchy')
2058class f_gen(rv_continuous):
2059 r"""An F continuous random variable.
2061 For the noncentral F distribution, see `ncf`.
2063 %(before_notes)s
2065 See Also
2066 --------
2067 ncf
2069 Notes
2070 -----
2071 The probability density function for `f` is:
2073 .. math::
2075 f(x, df_1, df_2) = \frac{df_2^{df_2/2} df_1^{df_1/2} x^{df_1 / 2-1}}
2076 {(df_2+df_1 x)^{(df_1+df_2)/2}
2077 B(df_1/2, df_2/2)}
2079 for :math:`x > 0` and parameters :math:`df_1, df_2 > 0` .
2081 `f` takes ``dfn`` and ``dfd`` as shape parameters.
2083 %(after_notes)s
2085 %(example)s
2087 """
2088 def _shape_info(self):
2089 idfn = _ShapeInfo("dfn", False, (0, np.inf), (False, False))
2090 idfd = _ShapeInfo("dfd", False, (0, np.inf), (False, False))
2091 return [idfn, idfd]
2093 def _rvs(self, dfn, dfd, size=None, random_state=None):
2094 return random_state.f(dfn, dfd, size)
2096 def _pdf(self, x, dfn, dfd):
2097 # df2**(df2/2) * df1**(df1/2) * x**(df1/2-1)
2098 # F.pdf(x, df1, df2) = --------------------------------------------
2099 # (df2+df1*x)**((df1+df2)/2) * B(df1/2, df2/2)
2100 return np.exp(self._logpdf(x, dfn, dfd))
2102 def _logpdf(self, x, dfn, dfd):
2103 n = 1.0 * dfn
2104 m = 1.0 * dfd
2105 lPx = (m/2 * np.log(m) + n/2 * np.log(n) + sc.xlogy(n/2 - 1, x)
2106 - (((n+m)/2) * np.log(m + n*x) + sc.betaln(n/2, m/2)))
2107 return lPx
2109 def _cdf(self, x, dfn, dfd):
2110 return sc.fdtr(dfn, dfd, x)
2112 def _sf(self, x, dfn, dfd):
2113 return sc.fdtrc(dfn, dfd, x)
2115 def _ppf(self, q, dfn, dfd):
2116 return sc.fdtri(dfn, dfd, q)
2118 def _stats(self, dfn, dfd):
2119 v1, v2 = 1. * dfn, 1. * dfd
2120 v2_2, v2_4, v2_6, v2_8 = v2 - 2., v2 - 4., v2 - 6., v2 - 8.
2122 mu = _lazywhere(
2123 v2 > 2, (v2, v2_2),
2124 lambda v2, v2_2: v2 / v2_2,
2125 np.inf)
2127 mu2 = _lazywhere(
2128 v2 > 4, (v1, v2, v2_2, v2_4),
2129 lambda v1, v2, v2_2, v2_4:
2130 2 * v2 * v2 * (v1 + v2_2) / (v1 * v2_2**2 * v2_4),
2131 np.inf)
2133 g1 = _lazywhere(
2134 v2 > 6, (v1, v2_2, v2_4, v2_6),
2135 lambda v1, v2_2, v2_4, v2_6:
2136 (2 * v1 + v2_2) / v2_6 * np.sqrt(v2_4 / (v1 * (v1 + v2_2))),
2137 np.nan)
2138 g1 *= np.sqrt(8.)
2140 g2 = _lazywhere(
2141 v2 > 8, (g1, v2_6, v2_8),
2142 lambda g1, v2_6, v2_8: (8 + g1 * g1 * v2_6) / v2_8,
2143 np.nan)
2144 g2 *= 3. / 2.
2146 return mu, mu2, g1, g2
2149f = f_gen(a=0.0, name='f')
2152## Folded Normal
2153## abs(Z) where (Z is normal with mu=L and std=S so that c=abs(L)/S)
2154##
2155## note: regress docs have scale parameter correct, but first parameter
2156## he gives is a shape parameter A = c * scale
2158## Half-normal is folded normal with shape-parameter c=0.
2160class foldnorm_gen(rv_continuous):
2161 r"""A folded normal continuous random variable.
2163 %(before_notes)s
2165 Notes
2166 -----
2167 The probability density function for `foldnorm` is:
2169 .. math::
2171 f(x, c) = \sqrt{2/\pi} cosh(c x) \exp(-\frac{x^2+c^2}{2})
2173 for :math:`x \ge 0` and :math:`c \ge 0`.
2175 `foldnorm` takes ``c`` as a shape parameter for :math:`c`.
2177 %(after_notes)s
2179 %(example)s
2181 """
2182 def _argcheck(self, c):
2183 return c >= 0
2185 def _shape_info(self):
2186 return [_ShapeInfo("c", False, (0, np.inf), (True, False))]
2188 def _rvs(self, c, size=None, random_state=None):
2189 return abs(random_state.standard_normal(size) + c)
2191 def _pdf(self, x, c):
2192 # foldnormal.pdf(x, c) = sqrt(2/pi) * cosh(c*x) * exp(-(x**2+c**2)/2)
2193 return _norm_pdf(x + c) + _norm_pdf(x-c)
2195 def _cdf(self, x, c):
2196 return _norm_cdf(x-c) + _norm_cdf(x+c) - 1.0
2198 def _stats(self, c):
2199 # Regina C. Elandt, Technometrics 3, 551 (1961)
2200 # https://www.jstor.org/stable/1266561
2201 #
2202 c2 = c*c
2203 expfac = np.exp(-0.5*c2) / np.sqrt(2.*np.pi)
2205 mu = 2.*expfac + c * sc.erf(c/np.sqrt(2))
2206 mu2 = c2 + 1 - mu*mu
2208 g1 = 2. * (mu*mu*mu - c2*mu - expfac)
2209 g1 /= np.power(mu2, 1.5)
2211 g2 = c2 * (c2 + 6.) + 3 + 8.*expfac*mu
2212 g2 += (2. * (c2 - 3.) - 3. * mu**2) * mu**2
2213 g2 = g2 / mu2**2.0 - 3.
2215 return mu, mu2, g1, g2
2218foldnorm = foldnorm_gen(a=0.0, name='foldnorm')
2221class weibull_min_gen(rv_continuous):
2222 r"""Weibull minimum continuous random variable.
2224 The Weibull Minimum Extreme Value distribution, from extreme value theory
2225 (Fisher-Gnedenko theorem), is also often simply called the Weibull
2226 distribution. It arises as the limiting distribution of the rescaled
2227 minimum of iid random variables.
2229 %(before_notes)s
2231 See Also
2232 --------
2233 weibull_max, numpy.random.Generator.weibull, exponweib
2235 Notes
2236 -----
2237 The probability density function for `weibull_min` is:
2239 .. math::
2241 f(x, c) = c x^{c-1} \exp(-x^c)
2243 for :math:`x > 0`, :math:`c > 0`.
2245 `weibull_min` takes ``c`` as a shape parameter for :math:`c`.
2246 (named :math:`k` in Wikipedia article and :math:`a` in
2247 ``numpy.random.weibull``). Special shape values are :math:`c=1` and
2248 :math:`c=2` where Weibull distribution reduces to the `expon` and
2249 `rayleigh` distributions respectively.
2251 %(after_notes)s
2253 References
2254 ----------
2255 https://en.wikipedia.org/wiki/Weibull_distribution
2257 https://en.wikipedia.org/wiki/Fisher-Tippett-Gnedenko_theorem
2259 %(example)s
2261 """
2262 def _shape_info(self):
2263 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
2265 def _pdf(self, x, c):
2266 # weibull_min.pdf(x, c) = c * x**(c-1) * exp(-x**c)
2267 return c*pow(x, c-1)*np.exp(-pow(x, c))
2269 def _logpdf(self, x, c):
2270 return np.log(c) + sc.xlogy(c - 1, x) - pow(x, c)
2272 def _cdf(self, x, c):
2273 return -sc.expm1(-pow(x, c))
2275 def _sf(self, x, c):
2276 return np.exp(-pow(x, c))
2278 def _logsf(self, x, c):
2279 return -pow(x, c)
2281 def _ppf(self, q, c):
2282 return pow(-sc.log1p(-q), 1.0/c)
2284 def _munp(self, n, c):
2285 return sc.gamma(1.0+n*1.0/c)
2287 def _entropy(self, c):
2288 return -_EULER / c - np.log(c) + _EULER + 1
2290 @extend_notes_in_docstring(rv_continuous, notes="""\
2291 If ``method='mm'``, parameters fixed by the user are respected, and the
2292 remaining parameters are used to match distribution and sample moments
2293 where possible. For example, if the user fixes the location with
2294 ``floc``, the parameters will only match the distribution skewness and
2295 variance to the sample skewness and variance; no attempt will be made
2296 to match the means or minimize a norm of the errors.
2297 \n\n""")
2298 def fit(self, data, *args, **kwds):
2299 if kwds.pop('superfit', False):
2300 return super().fit(data, *args, **kwds)
2302 # this extracts fixed shape, location, and scale however they
2303 # are specified, and also leaves them in `kwds`
2304 data, fc, floc, fscale = _check_fit_input_parameters(self, data,
2305 args, kwds)
2306 method = kwds.get("method", "mle").lower()
2308 # See https://en.wikipedia.org/wiki/Weibull_distribution#Moments for
2309 # moment formulas.
2310 def skew(c):
2311 gamma1 = sc.gamma(1+1/c)
2312 gamma2 = sc.gamma(1+2/c)
2313 gamma3 = sc.gamma(1+3/c)
2314 num = 2 * gamma1**3 - 3*gamma1*gamma2 + gamma3
2315 den = (gamma2 - gamma1**2)**(3/2)
2316 return num/den
2318 # For c in [1e2, 3e4], population skewness appears to approach
2319 # asymptote near -1.139, but past c > 3e4, skewness begins to vary
2320 # wildly, and MoM won't provide a good guess. Get out early.
2321 s = stats.skew(data)
2322 max_c = 1e4
2323 s_min = skew(max_c)
2324 if s < s_min and method != "mm" and fc is None and not args:
2325 return super().fit(data, *args, **kwds)
2327 # If method is method of moments, we don't need the user's guesses.
2328 # Otherwise, extract the guesses from args and kwds.
2329 if method == "mm":
2330 c, loc, scale = None, None, None
2331 else:
2332 c = args[0] if len(args) else None
2333 loc = kwds.pop('loc', None)
2334 scale = kwds.pop('scale', None)
2336 if fc is None and c is None: # not fixed and no guess: use MoM
2337 # Solve for c that matches sample distribution skewness to sample
2338 # skewness.
2339 # we start having numerical issues with `weibull_min` with
2340 # parameters outside this range - and not just in this method.
2341 # We could probably improve the situation by doing everything
2342 # in the log space, but that is for another time.
2343 c = root_scalar(lambda c: skew(c) - s, bracket=[0.02, max_c],
2344 method='bisect').root
2345 elif fc is not None: # fixed: use it
2346 c = fc
2348 if fscale is None and scale is None:
2349 v = np.var(data)
2350 scale = np.sqrt(v / (sc.gamma(1+2/c) - sc.gamma(1+1/c)**2))
2351 elif fscale is not None:
2352 scale = fscale
2354 if floc is None and loc is None:
2355 m = np.mean(data)
2356 loc = m - scale*sc.gamma(1 + 1/c)
2357 elif floc is not None:
2358 loc = floc
2360 if method == 'mm':
2361 return c, loc, scale
2362 else:
2363 # At this point, parameter "guesses" may equal the fixed parameters
2364 # in kwds. No harm in passing them as guesses, too.
2365 return super().fit(data, c, loc=loc, scale=scale, **kwds)
2368weibull_min = weibull_min_gen(a=0.0, name='weibull_min')
2371class truncweibull_min_gen(rv_continuous):
2372 r"""A doubly truncated Weibull minimum continuous random variable.
2374 %(before_notes)s
2376 See Also
2377 --------
2378 weibull_min, truncexpon
2380 Notes
2381 -----
2382 The probability density function for `truncweibull_min` is:
2384 .. math::
2386 f(x, a, b, c) = \frac{c x^{c-1} \exp(-x^c)}{\exp(-a^c) - \exp(-b^c)}
2388 for :math:`a < x <= b`, :math:`0 \le a < b` and :math:`c > 0`.
2390 `truncweibull_min` takes :math:`a`, :math:`b`, and :math:`c` as shape
2391 parameters.
2393 Notice that the truncation values, :math:`a` and :math:`b`, are defined in
2394 standardized form:
2396 .. math::
2398 a = (u_l - loc)/scale
2399 b = (u_r - loc)/scale
2401 where :math:`u_l` and :math:`u_r` are the specific left and right
2402 truncation values, respectively. In other words, the support of the
2403 distribution becomes :math:`(a*scale + loc) < x <= (b*scale + loc)` when
2404 :math:`loc` and/or :math:`scale` are provided.
2406 %(after_notes)s
2408 References
2409 ----------
2411 .. [1] Rinne, H. "The Weibull Distribution: A Handbook". CRC Press (2009).
2413 %(example)s
2415 """
2416 def _argcheck(self, c, a, b):
2417 return (a >= 0.) & (b > a) & (c > 0.)
2419 def _shape_info(self):
2420 ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
2421 ia = _ShapeInfo("a", False, (0, np.inf), (True, False))
2422 ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
2423 return [ic, ia, ib]
2425 def _fitstart(self, data):
2426 # Arbitrary, but default a=b=c=1 is not valid
2427 return super()._fitstart(data, args=(1, 0, 1))
2429 def _get_support(self, c, a, b):
2430 return a, b
2432 def _pdf(self, x, c, a, b):
2433 denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
2434 return (c * pow(x, c-1) * np.exp(-pow(x, c))) / denum
2436 def _logpdf(self, x, c, a, b):
2437 logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
2438 return np.log(c) + sc.xlogy(c - 1, x) - pow(x, c) - logdenum
2440 def _cdf(self, x, c, a, b):
2441 num = (np.exp(-pow(a, c)) - np.exp(-pow(x, c)))
2442 denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
2443 return num / denum
2445 def _logcdf(self, x, c, a, b):
2446 lognum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(x, c)))
2447 logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
2448 return lognum - logdenum
2450 def _sf(self, x, c, a, b):
2451 num = (np.exp(-pow(x, c)) - np.exp(-pow(b, c)))
2452 denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
2453 return num / denum
2455 def _logsf(self, x, c, a, b):
2456 lognum = np.log(np.exp(-pow(x, c)) - np.exp(-pow(b, c)))
2457 logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
2458 return lognum - logdenum
2460 def _isf(self, q, c, a, b):
2461 return pow(
2462 -np.log((1 - q) * np.exp(-pow(b, c)) + q * np.exp(-pow(a, c))), 1/c
2463 )
2465 def _ppf(self, q, c, a, b):
2466 return pow(
2467 -np.log((1 - q) * np.exp(-pow(a, c)) + q * np.exp(-pow(b, c))), 1/c
2468 )
2470 def _munp(self, n, c, a, b):
2471 gamma_fun = sc.gamma(n/c + 1.) * (
2472 sc.gammainc(n/c + 1., pow(b, c)) - sc.gammainc(n/c + 1., pow(a, c))
2473 )
2474 denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
2475 return gamma_fun / denum
2478truncweibull_min = truncweibull_min_gen(name='truncweibull_min')
2481class weibull_max_gen(rv_continuous):
2482 r"""Weibull maximum continuous random variable.
2484 The Weibull Maximum Extreme Value distribution, from extreme value theory
2485 (Fisher-Gnedenko theorem), is the limiting distribution of rescaled
2486 maximum of iid random variables. This is the distribution of -X
2487 if X is from the `weibull_min` function.
2489 %(before_notes)s
2491 See Also
2492 --------
2493 weibull_min
2495 Notes
2496 -----
2497 The probability density function for `weibull_max` is:
2499 .. math::
2501 f(x, c) = c (-x)^{c-1} \exp(-(-x)^c)
2503 for :math:`x < 0`, :math:`c > 0`.
2505 `weibull_max` takes ``c`` as a shape parameter for :math:`c`.
2507 %(after_notes)s
2509 References
2510 ----------
2511 https://en.wikipedia.org/wiki/Weibull_distribution
2513 https://en.wikipedia.org/wiki/Fisher-Tippett-Gnedenko_theorem
2515 %(example)s
2517 """
2518 def _shape_info(self):
2519 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
2521 def _pdf(self, x, c):
2522 # weibull_max.pdf(x, c) = c * (-x)**(c-1) * exp(-(-x)**c)
2523 return c*pow(-x, c-1)*np.exp(-pow(-x, c))
2525 def _logpdf(self, x, c):
2526 return np.log(c) + sc.xlogy(c-1, -x) - pow(-x, c)
2528 def _cdf(self, x, c):
2529 return np.exp(-pow(-x, c))
2531 def _logcdf(self, x, c):
2532 return -pow(-x, c)
2534 def _sf(self, x, c):
2535 return -sc.expm1(-pow(-x, c))
2537 def _ppf(self, q, c):
2538 return -pow(-np.log(q), 1.0/c)
2540 def _munp(self, n, c):
2541 val = sc.gamma(1.0+n*1.0/c)
2542 if int(n) % 2:
2543 sgn = -1
2544 else:
2545 sgn = 1
2546 return sgn * val
2548 def _entropy(self, c):
2549 return -_EULER / c - np.log(c) + _EULER + 1
2552weibull_max = weibull_max_gen(b=0.0, name='weibull_max')
2555class genlogistic_gen(rv_continuous):
2556 r"""A generalized logistic continuous random variable.
2558 %(before_notes)s
2560 Notes
2561 -----
2562 The probability density function for `genlogistic` is:
2564 .. math::
2566 f(x, c) = c \frac{\exp(-x)}
2567 {(1 + \exp(-x))^{c+1}}
2569 for :math:`x >= 0`, :math:`c > 0`.
2571 `genlogistic` takes ``c`` as a shape parameter for :math:`c`.
2573 %(after_notes)s
2575 %(example)s
2577 """
2578 def _shape_info(self):
2579 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
2581 def _pdf(self, x, c):
2582 # genlogistic.pdf(x, c) = c * exp(-x) / (1 + exp(-x))**(c+1)
2583 return np.exp(self._logpdf(x, c))
2585 def _logpdf(self, x, c):
2586 # Two mathematically equivalent expressions for log(pdf(x, c)):
2587 # log(pdf(x, c)) = log(c) - x - (c + 1)*log(1 + exp(-x))
2588 # = log(c) + c*x - (c + 1)*log(1 + exp(x))
2589 mult = -(c - 1) * (x < 0) - 1
2590 absx = np.abs(x)
2591 return np.log(c) + mult*absx - (c+1) * sc.log1p(np.exp(-absx))
2593 def _cdf(self, x, c):
2594 Cx = (1+np.exp(-x))**(-c)
2595 return Cx
2597 def _ppf(self, q, c):
2598 vals = -np.log(pow(q, -1.0/c)-1)
2599 return vals
2601 def _stats(self, c):
2602 mu = _EULER + sc.psi(c)
2603 mu2 = np.pi*np.pi/6.0 + sc.zeta(2, c)
2604 g1 = -2*sc.zeta(3, c) + 2*_ZETA3
2605 g1 /= np.power(mu2, 1.5)
2606 g2 = np.pi**4/15.0 + 6*sc.zeta(4, c)
2607 g2 /= mu2**2.0
2608 return mu, mu2, g1, g2
2611genlogistic = genlogistic_gen(name='genlogistic')
2614class genpareto_gen(rv_continuous):
2615 r"""A generalized Pareto continuous random variable.
2617 %(before_notes)s
2619 Notes
2620 -----
2621 The probability density function for `genpareto` is:
2623 .. math::
2625 f(x, c) = (1 + c x)^{-1 - 1/c}
2627 defined for :math:`x \ge 0` if :math:`c \ge 0`, and for
2628 :math:`0 \le x \le -1/c` if :math:`c < 0`.
2630 `genpareto` takes ``c`` as a shape parameter for :math:`c`.
2632 For :math:`c=0`, `genpareto` reduces to the exponential
2633 distribution, `expon`:
2635 .. math::
2637 f(x, 0) = \exp(-x)
2639 For :math:`c=-1`, `genpareto` is uniform on ``[0, 1]``:
2641 .. math::
2643 f(x, -1) = 1
2645 %(after_notes)s
2647 %(example)s
2649 """
2650 def _argcheck(self, c):
2651 return np.isfinite(c)
2653 def _shape_info(self):
2654 return [_ShapeInfo("c", False, (-np.inf, np.inf), (False, False))]
2656 def _get_support(self, c):
2657 c = np.asarray(c)
2658 b = _lazywhere(c < 0, (c,),
2659 lambda c: -1. / c,
2660 np.inf)
2661 a = np.where(c >= 0, self.a, self.a)
2662 return a, b
2664 def _pdf(self, x, c):
2665 # genpareto.pdf(x, c) = (1 + c * x)**(-1 - 1/c)
2666 return np.exp(self._logpdf(x, c))
2668 def _logpdf(self, x, c):
2669 return _lazywhere((x == x) & (c != 0), (x, c),
2670 lambda x, c: -sc.xlog1py(c + 1., c*x) / c,
2671 -x)
2673 def _cdf(self, x, c):
2674 return -sc.inv_boxcox1p(-x, -c)
2676 def _sf(self, x, c):
2677 return sc.inv_boxcox(-x, -c)
2679 def _logsf(self, x, c):
2680 return _lazywhere((x == x) & (c != 0), (x, c),
2681 lambda x, c: -sc.log1p(c*x) / c,
2682 -x)
2684 def _ppf(self, q, c):
2685 return -sc.boxcox1p(-q, -c)
2687 def _isf(self, q, c):
2688 return -sc.boxcox(q, -c)
2690 def _stats(self, c, moments='mv'):
2691 if 'm' not in moments:
2692 m = None
2693 else:
2694 m = _lazywhere(c < 1, (c,),
2695 lambda xi: 1/(1 - xi),
2696 np.inf)
2697 if 'v' not in moments:
2698 v = None
2699 else:
2700 v = _lazywhere(c < 1/2, (c,),
2701 lambda xi: 1 / (1 - xi)**2 / (1 - 2*xi),
2702 np.nan)
2703 if 's' not in moments:
2704 s = None
2705 else:
2706 s = _lazywhere(c < 1/3, (c,),
2707 lambda xi: 2 * (1 + xi) * np.sqrt(1 - 2*xi) /
2708 (1 - 3*xi),
2709 np.nan)
2710 if 'k' not in moments:
2711 k = None
2712 else:
2713 k = _lazywhere(c < 1/4, (c,),
2714 lambda xi: 3 * (1 - 2*xi) * (2*xi**2 + xi + 3) /
2715 (1 - 3*xi) / (1 - 4*xi) - 3,
2716 np.nan)
2717 return m, v, s, k
2719 def _munp(self, n, c):
2720 def __munp(n, c):
2721 val = 0.0
2722 k = np.arange(0, n + 1)
2723 for ki, cnk in zip(k, sc.comb(n, k)):
2724 val = val + cnk * (-1) ** ki / (1.0 - c * ki)
2725 return np.where(c * n < 1, val * (-1.0 / c) ** n, np.inf)
2726 return _lazywhere(c != 0, (c,),
2727 lambda c: __munp(n, c),
2728 sc.gamma(n + 1))
2730 def _entropy(self, c):
2731 return 1. + c
2734genpareto = genpareto_gen(a=0.0, name='genpareto')
2737class genexpon_gen(rv_continuous):
2738 r"""A generalized exponential continuous random variable.
2740 %(before_notes)s
2742 Notes
2743 -----
2744 The probability density function for `genexpon` is:
2746 .. math::
2748 f(x, a, b, c) = (a + b (1 - \exp(-c x)))
2749 \exp(-a x - b x + \frac{b}{c} (1-\exp(-c x)))
2751 for :math:`x \ge 0`, :math:`a, b, c > 0`.
2753 `genexpon` takes :math:`a`, :math:`b` and :math:`c` as shape parameters.
2755 %(after_notes)s
2757 References
2758 ----------
2759 H.K. Ryu, "An Extension of Marshall and Olkin's Bivariate Exponential
2760 Distribution", Journal of the American Statistical Association, 1993.
2762 N. Balakrishnan, "The Exponential Distribution: Theory, Methods and
2763 Applications", Asit P. Basu.
2765 %(example)s
2767 """
2768 def _shape_info(self):
2769 ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
2770 ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
2771 ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
2772 return [ia, ib, ic]
2774 def _pdf(self, x, a, b, c):
2775 # genexpon.pdf(x, a, b, c) = (a + b * (1 - exp(-c*x))) * \
2776 # exp(-a*x - b*x + b/c * (1-exp(-c*x)))
2777 return (a + b*(-sc.expm1(-c*x)))*np.exp((-a-b)*x +
2778 b*(-sc.expm1(-c*x))/c)
2780 def _logpdf(self, x, a, b, c):
2781 return np.log(a+b*(-sc.expm1(-c*x))) + (-a-b)*x+b*(-sc.expm1(-c*x))/c
2783 def _cdf(self, x, a, b, c):
2784 return -sc.expm1((-a-b)*x + b*(-sc.expm1(-c*x))/c)
2786 def _sf(self, x, a, b, c):
2787 return np.exp((-a-b)*x + b*(-sc.expm1(-c*x))/c)
2790genexpon = genexpon_gen(a=0.0, name='genexpon')
2793class genextreme_gen(rv_continuous):
2794 r"""A generalized extreme value continuous random variable.
2796 %(before_notes)s
2798 See Also
2799 --------
2800 gumbel_r
2802 Notes
2803 -----
2804 For :math:`c=0`, `genextreme` is equal to `gumbel_r` with
2805 probability density function
2807 .. math::
2809 f(x) = \exp(-\exp(-x)) \exp(-x),
2811 where :math:`-\infty < x < \infty`.
2813 For :math:`c \ne 0`, the probability density function for `genextreme` is:
2815 .. math::
2817 f(x, c) = \exp(-(1-c x)^{1/c}) (1-c x)^{1/c-1},
2819 where :math:`-\infty < x \le 1/c` if :math:`c > 0` and
2820 :math:`1/c \le x < \infty` if :math:`c < 0`.
2822 Note that several sources and software packages use the opposite
2823 convention for the sign of the shape parameter :math:`c`.
2825 `genextreme` takes ``c`` as a shape parameter for :math:`c`.
2827 %(after_notes)s
2829 %(example)s
2831 """
2832 def _argcheck(self, c):
2833 return np.isfinite(c)
2835 def _shape_info(self):
2836 return [_ShapeInfo("c", False, (-np.inf, np.inf), (False, False))]
2838 def _get_support(self, c):
2839 _b = np.where(c > 0, 1.0 / np.maximum(c, _XMIN), np.inf)
2840 _a = np.where(c < 0, 1.0 / np.minimum(c, -_XMIN), -np.inf)
2841 return _a, _b
2843 def _loglogcdf(self, x, c):
2844 # Returns log(-log(cdf(x, c)))
2845 return _lazywhere((x == x) & (c != 0), (x, c),
2846 lambda x, c: sc.log1p(-c*x)/c, -x)
2848 def _pdf(self, x, c):
2849 # genextreme.pdf(x, c) =
2850 # exp(-exp(-x))*exp(-x), for c==0
2851 # exp(-(1-c*x)**(1/c))*(1-c*x)**(1/c-1), for x \le 1/c, c > 0
2852 return np.exp(self._logpdf(x, c))
2854 def _logpdf(self, x, c):
2855 cx = _lazywhere((x == x) & (c != 0), (x, c), lambda x, c: c*x, 0.0)
2856 logex2 = sc.log1p(-cx)
2857 logpex2 = self._loglogcdf(x, c)
2858 pex2 = np.exp(logpex2)
2859 # Handle special cases
2860 np.putmask(logpex2, (c == 0) & (x == -np.inf), 0.0)
2861 logpdf = _lazywhere(~((cx == 1) | (cx == -np.inf)),
2862 (pex2, logpex2, logex2),
2863 lambda pex2, lpex2, lex2: -pex2 + lpex2 - lex2,
2864 fillvalue=-np.inf)
2865 np.putmask(logpdf, (c == 1) & (x == 1), 0.0)
2866 return logpdf
2868 def _logcdf(self, x, c):
2869 return -np.exp(self._loglogcdf(x, c))
2871 def _cdf(self, x, c):
2872 return np.exp(self._logcdf(x, c))
2874 def _sf(self, x, c):
2875 return -sc.expm1(self._logcdf(x, c))
2877 def _ppf(self, q, c):
2878 x = -np.log(-np.log(q))
2879 return _lazywhere((x == x) & (c != 0), (x, c),
2880 lambda x, c: -sc.expm1(-c * x) / c, x)
2882 def _isf(self, q, c):
2883 x = -np.log(-sc.log1p(-q))
2884 return _lazywhere((x == x) & (c != 0), (x, c),
2885 lambda x, c: -sc.expm1(-c * x) / c, x)
2887 def _stats(self, c):
2888 g = lambda n: sc.gamma(n*c + 1)
2889 g1 = g(1)
2890 g2 = g(2)
2891 g3 = g(3)
2892 g4 = g(4)
2893 g2mg12 = np.where(abs(c) < 1e-7, (c*np.pi)**2.0/6.0, g2-g1**2.0)
2894 gam2k = np.where(abs(c) < 1e-7, np.pi**2.0/6.0,
2895 sc.expm1(sc.gammaln(2.0*c+1.0)-2*sc.gammaln(c + 1.0))/c**2.0)
2896 eps = 1e-14
2897 gamk = np.where(abs(c) < eps, -_EULER, sc.expm1(sc.gammaln(c + 1))/c)
2899 m = np.where(c < -1.0, np.nan, -gamk)
2900 v = np.where(c < -0.5, np.nan, g1**2.0*gam2k)
2902 # skewness
2903 sk1 = _lazywhere(c >= -1./3,
2904 (c, g1, g2, g3, g2mg12),
2905 lambda c, g1, g2, g3, g2gm12:
2906 np.sign(c)*(-g3 + (g2 + 2*g2mg12)*g1)/g2mg12**1.5,
2907 fillvalue=np.nan)
2908 sk = np.where(abs(c) <= eps**0.29, 12*np.sqrt(6)*_ZETA3/np.pi**3, sk1)
2910 # kurtosis
2911 ku1 = _lazywhere(c >= -1./4,
2912 (g1, g2, g3, g4, g2mg12),
2913 lambda g1, g2, g3, g4, g2mg12:
2914 (g4 + (-4*g3 + 3*(g2 + g2mg12)*g1)*g1)/g2mg12**2,
2915 fillvalue=np.nan)
2916 ku = np.where(abs(c) <= (eps)**0.23, 12.0/5.0, ku1-3.0)
2917 return m, v, sk, ku
2919 def _fitstart(self, data):
2920 # This is better than the default shape of (1,).
2921 g = _skew(data)
2922 if g < 0:
2923 a = 0.5
2924 else:
2925 a = -0.5
2926 return super()._fitstart(data, args=(a,))
2928 def _munp(self, n, c):
2929 k = np.arange(0, n+1)
2930 vals = 1.0/c**n * np.sum(
2931 sc.comb(n, k) * (-1)**k * sc.gamma(c*k + 1),
2932 axis=0)
2933 return np.where(c*n > -1, vals, np.inf)
2935 def _entropy(self, c):
2936 return _EULER*(1 - c) + 1
2939genextreme = genextreme_gen(name='genextreme')
2942def _digammainv(y):
2943 """Inverse of the digamma function (real positive arguments only).
2945 This function is used in the `fit` method of `gamma_gen`.
2946 The function uses either optimize.fsolve or optimize.newton
2947 to solve `sc.digamma(x) - y = 0`. There is probably room for
2948 improvement, but currently it works over a wide range of y:
2950 >>> import numpy as np
2951 >>> rng = np.random.default_rng()
2952 >>> y = 64*rng.standard_normal(1000000)
2953 >>> y.min(), y.max()
2954 (-311.43592651416662, 351.77388222276869)
2955 >>> x = [_digammainv(t) for t in y]
2956 >>> np.abs(sc.digamma(x) - y).max()
2957 1.1368683772161603e-13
2959 """
2960 _em = 0.5772156649015328606065120
2961 func = lambda x: sc.digamma(x) - y
2962 if y > -0.125:
2963 x0 = np.exp(y) + 0.5
2964 if y < 10:
2965 # Some experimentation shows that newton reliably converges
2966 # must faster than fsolve in this y range. For larger y,
2967 # newton sometimes fails to converge.
2968 value = optimize.newton(func, x0, tol=1e-10)
2969 return value
2970 elif y > -3:
2971 x0 = np.exp(y/2.332) + 0.08661
2972 else:
2973 x0 = 1.0 / (-y - _em)
2975 value, info, ier, mesg = optimize.fsolve(func, x0, xtol=1e-11,
2976 full_output=True)
2977 if ier != 1:
2978 raise RuntimeError("_digammainv: fsolve failed, y = %r" % y)
2980 return value[0]
2983## Gamma (Use MATLAB and MATHEMATICA (b=theta=scale, a=alpha=shape) definition)
2985## gamma(a, loc, scale) with a an integer is the Erlang distribution
2986## gamma(1, loc, scale) is the Exponential distribution
2987## gamma(df/2, 0, 2) is the chi2 distribution with df degrees of freedom.
2989class gamma_gen(rv_continuous):
2990 r"""A gamma continuous random variable.
2992 %(before_notes)s
2994 See Also
2995 --------
2996 erlang, expon
2998 Notes
2999 -----
3000 The probability density function for `gamma` is:
3002 .. math::
3004 f(x, a) = \frac{x^{a-1} e^{-x}}{\Gamma(a)}
3006 for :math:`x \ge 0`, :math:`a > 0`. Here :math:`\Gamma(a)` refers to the
3007 gamma function.
3009 `gamma` takes ``a`` as a shape parameter for :math:`a`.
3011 When :math:`a` is an integer, `gamma` reduces to the Erlang
3012 distribution, and when :math:`a=1` to the exponential distribution.
3014 Gamma distributions are sometimes parameterized with two variables,
3015 with a probability density function of:
3017 .. math::
3019 f(x, \alpha, \beta) = \frac{\beta^\alpha x^{\alpha - 1} e^{-\beta x }}{\Gamma(\alpha)}
3021 Note that this parameterization is equivalent to the above, with
3022 ``scale = 1 / beta``.
3024 %(after_notes)s
3026 %(example)s
3028 """
3029 def _shape_info(self):
3030 return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
3032 def _rvs(self, a, size=None, random_state=None):
3033 return random_state.standard_gamma(a, size)
3035 def _pdf(self, x, a):
3036 # gamma.pdf(x, a) = x**(a-1) * exp(-x) / gamma(a)
3037 return np.exp(self._logpdf(x, a))
3039 def _logpdf(self, x, a):
3040 return sc.xlogy(a-1.0, x) - x - sc.gammaln(a)
3042 def _cdf(self, x, a):
3043 return sc.gammainc(a, x)
3045 def _sf(self, x, a):
3046 return sc.gammaincc(a, x)
3048 def _ppf(self, q, a):
3049 return sc.gammaincinv(a, q)
3051 def _isf(self, q, a):
3052 return sc.gammainccinv(a, q)
3054 def _stats(self, a):
3055 return a, a, 2.0/np.sqrt(a), 6.0/a
3057 def _entropy(self, a):
3058 return sc.psi(a)*(1-a) + a + sc.gammaln(a)
3060 def _fitstart(self, data):
3061 # The skewness of the gamma distribution is `2 / np.sqrt(a)`.
3062 # We invert that to estimate the shape `a` using the skewness
3063 # of the data. The formula is regularized with 1e-8 in the
3064 # denominator to allow for degenerate data where the skewness
3065 # is close to 0.
3066 a = 4 / (1e-8 + _skew(data)**2)
3067 return super()._fitstart(data, args=(a,))
3069 @extend_notes_in_docstring(rv_continuous, notes="""\
3070 When the location is fixed by using the argument `floc`
3071 and `method='MLE'`, this
3072 function uses explicit formulas or solves a simpler numerical
3073 problem than the full ML optimization problem. So in that case,
3074 the `optimizer`, `loc` and `scale` arguments are ignored.
3075 \n\n""")
3076 def fit(self, data, *args, **kwds):
3077 floc = kwds.get('floc', None)
3078 method = kwds.get('method', 'mle')
3080 if floc is None or method.lower() == 'mm':
3081 # loc is not fixed. Use the default fit method.
3082 return super().fit(data, *args, **kwds)
3084 # We already have this value, so just pop it from kwds.
3085 kwds.pop('floc', None)
3087 f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
3088 fscale = kwds.pop('fscale', None)
3090 _remove_optimizer_parameters(kwds)
3092 # Special case: loc is fixed.
3094 if f0 is not None and fscale is not None:
3095 # This check is for consistency with `rv_continuous.fit`.
3096 # Without this check, this function would just return the
3097 # parameters that were given.
3098 raise ValueError("All parameters fixed. There is nothing to "
3099 "optimize.")
3101 # Fixed location is handled by shifting the data.
3102 data = np.asarray(data)
3104 if not np.isfinite(data).all():
3105 raise ValueError("The data contains non-finite values.")
3107 if np.any(data <= floc):
3108 raise FitDataError("gamma", lower=floc, upper=np.inf)
3110 if floc != 0:
3111 # Don't do the subtraction in-place, because `data` might be a
3112 # view of the input array.
3113 data = data - floc
3114 xbar = data.mean()
3116 # Three cases to handle:
3117 # * shape and scale both free
3118 # * shape fixed, scale free
3119 # * shape free, scale fixed
3121 if fscale is None:
3122 # scale is free
3123 if f0 is not None:
3124 # shape is fixed
3125 a = f0
3126 else:
3127 # shape and scale are both free.
3128 # The MLE for the shape parameter `a` is the solution to:
3129 # np.log(a) - sc.digamma(a) - np.log(xbar) +
3130 # np.log(data).mean() = 0
3131 s = np.log(xbar) - np.log(data).mean()
3132 func = lambda a: np.log(a) - sc.digamma(a) - s
3133 aest = (3-s + np.sqrt((s-3)**2 + 24*s)) / (12*s)
3134 xa = aest*(1-0.4)
3135 xb = aest*(1+0.4)
3136 a = optimize.brentq(func, xa, xb, disp=0)
3138 # The MLE for the scale parameter is just the data mean
3139 # divided by the shape parameter.
3140 scale = xbar / a
3141 else:
3142 # scale is fixed, shape is free
3143 # The MLE for the shape parameter `a` is the solution to:
3144 # sc.digamma(a) - np.log(data).mean() + np.log(fscale) = 0
3145 c = np.log(data).mean() - np.log(fscale)
3146 a = _digammainv(c)
3147 scale = fscale
3149 return a, floc, scale
3152gamma = gamma_gen(a=0.0, name='gamma')
3155class erlang_gen(gamma_gen):
3156 """An Erlang continuous random variable.
3158 %(before_notes)s
3160 See Also
3161 --------
3162 gamma
3164 Notes
3165 -----
3166 The Erlang distribution is a special case of the Gamma distribution, with
3167 the shape parameter `a` an integer. Note that this restriction is not
3168 enforced by `erlang`. It will, however, generate a warning the first time
3169 a non-integer value is used for the shape parameter.
3171 Refer to `gamma` for examples.
3173 """
3175 def _argcheck(self, a):
3176 allint = np.all(np.floor(a) == a)
3177 if not allint:
3178 # An Erlang distribution shouldn't really have a non-integer
3179 # shape parameter, so warn the user.
3180 warnings.warn(
3181 'The shape parameter of the erlang distribution '
3182 'has been given a non-integer value %r.' % (a,),
3183 RuntimeWarning)
3184 return a > 0
3186 def _shape_info(self):
3187 return [_ShapeInfo("a", True, (1, np.inf), (True, False))]
3189 def _fitstart(self, data):
3190 # Override gamma_gen_fitstart so that an integer initial value is
3191 # used. (Also regularize the division, to avoid issues when
3192 # _skew(data) is 0 or close to 0.)
3193 a = int(4.0 / (1e-8 + _skew(data)**2))
3194 return super(gamma_gen, self)._fitstart(data, args=(a,))
3196 # Trivial override of the fit method, so we can monkey-patch its
3197 # docstring.
3198 @extend_notes_in_docstring(rv_continuous, notes="""\
3199 The Erlang distribution is generally defined to have integer values
3200 for the shape parameter. This is not enforced by the `erlang` class.
3201 When fitting the distribution, it will generally return a non-integer
3202 value for the shape parameter. By using the keyword argument
3203 `f0=<integer>`, the fit method can be constrained to fit the data to
3204 a specific integer shape parameter.""")
3205 def fit(self, data, *args, **kwds):
3206 return super().fit(data, *args, **kwds)
3209erlang = erlang_gen(a=0.0, name='erlang')
3212class gengamma_gen(rv_continuous):
3213 r"""A generalized gamma continuous random variable.
3215 %(before_notes)s
3217 See Also
3218 --------
3219 gamma, invgamma, weibull_min
3221 Notes
3222 -----
3223 The probability density function for `gengamma` is ([1]_):
3225 .. math::
3227 f(x, a, c) = \frac{|c| x^{c a-1} \exp(-x^c)}{\Gamma(a)}
3229 for :math:`x \ge 0`, :math:`a > 0`, and :math:`c \ne 0`.
3230 :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
3232 `gengamma` takes :math:`a` and :math:`c` as shape parameters.
3234 %(after_notes)s
3236 References
3237 ----------
3238 .. [1] E.W. Stacy, "A Generalization of the Gamma Distribution",
3239 Annals of Mathematical Statistics, Vol 33(3), pp. 1187--1192.
3241 %(example)s
3243 """
3244 def _argcheck(self, a, c):
3245 return (a > 0) & (c != 0)
3247 def _shape_info(self):
3248 ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
3249 ic = _ShapeInfo("c", False, (-np.inf, np.inf), (False, False))
3250 return [ia, ic]
3252 def _pdf(self, x, a, c):
3253 return np.exp(self._logpdf(x, a, c))
3255 def _logpdf(self, x, a, c):
3256 return _lazywhere((x != 0) | (c > 0), (x, c),
3257 lambda x, c: (np.log(abs(c)) + sc.xlogy(c*a - 1, x)
3258 - x**c - sc.gammaln(a)),
3259 fillvalue=-np.inf)
3261 def _cdf(self, x, a, c):
3262 xc = x**c
3263 val1 = sc.gammainc(a, xc)
3264 val2 = sc.gammaincc(a, xc)
3265 return np.where(c > 0, val1, val2)
3267 def _rvs(self, a, c, size=None, random_state=None):
3268 r = random_state.standard_gamma(a, size=size)
3269 return r**(1./c)
3271 def _sf(self, x, a, c):
3272 xc = x**c
3273 val1 = sc.gammainc(a, xc)
3274 val2 = sc.gammaincc(a, xc)
3275 return np.where(c > 0, val2, val1)
3277 def _ppf(self, q, a, c):
3278 val1 = sc.gammaincinv(a, q)
3279 val2 = sc.gammainccinv(a, q)
3280 return np.where(c > 0, val1, val2)**(1.0/c)
3282 def _isf(self, q, a, c):
3283 val1 = sc.gammaincinv(a, q)
3284 val2 = sc.gammainccinv(a, q)
3285 return np.where(c > 0, val2, val1)**(1.0/c)
3287 def _munp(self, n, a, c):
3288 # Pochhammer symbol: sc.pocha,n) = gamma(a+n)/gamma(a)
3289 return sc.poch(a, n*1.0/c)
3291 def _entropy(self, a, c):
3292 val = sc.psi(a)
3293 return a*(1-val) + 1.0/c*val + sc.gammaln(a) - np.log(abs(c))
3296gengamma = gengamma_gen(a=0.0, name='gengamma')
3299class genhalflogistic_gen(rv_continuous):
3300 r"""A generalized half-logistic continuous random variable.
3302 %(before_notes)s
3304 Notes
3305 -----
3306 The probability density function for `genhalflogistic` is:
3308 .. math::
3310 f(x, c) = \frac{2 (1 - c x)^{1/(c-1)}}{[1 + (1 - c x)^{1/c}]^2}
3312 for :math:`0 \le x \le 1/c`, and :math:`c > 0`.
3314 `genhalflogistic` takes ``c`` as a shape parameter for :math:`c`.
3316 %(after_notes)s
3318 %(example)s
3320 """
3321 def _shape_info(self):
3322 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
3324 def _get_support(self, c):
3325 return self.a, 1.0/c
3327 def _pdf(self, x, c):
3328 # genhalflogistic.pdf(x, c) =
3329 # 2 * (1-c*x)**(1/c-1) / (1+(1-c*x)**(1/c))**2
3330 limit = 1.0/c
3331 tmp = np.asarray(1-c*x)
3332 tmp0 = tmp**(limit-1)
3333 tmp2 = tmp0*tmp
3334 return 2*tmp0 / (1+tmp2)**2
3336 def _cdf(self, x, c):
3337 limit = 1.0/c
3338 tmp = np.asarray(1-c*x)
3339 tmp2 = tmp**(limit)
3340 return (1.0-tmp2) / (1+tmp2)
3342 def _ppf(self, q, c):
3343 return 1.0/c*(1-((1.0-q)/(1.0+q))**c)
3345 def _entropy(self, c):
3346 return 2 - (2*c+1)*np.log(2)
3349genhalflogistic = genhalflogistic_gen(a=0.0, name='genhalflogistic')
3352class genhyperbolic_gen(rv_continuous):
3353 r"""A generalized hyperbolic continuous random variable.
3355 %(before_notes)s
3357 See Also
3358 --------
3359 t, norminvgauss, geninvgauss, laplace, cauchy
3361 Notes
3362 -----
3363 The probability density function for `genhyperbolic` is:
3365 .. math::
3367 f(x, p, a, b) =
3368 \frac{(a^2 - b^2)^{p/2}}
3369 {\sqrt{2\pi}a^{p-0.5}
3370 K_p\Big(\sqrt{a^2 - b^2}\Big)}
3371 e^{bx} \times \frac{K_{p - 1/2}
3372 (a \sqrt{1 + x^2})}
3373 {(\sqrt{1 + x^2})^{1/2 - p}}
3375 for :math:`x, p \in ( - \infty; \infty)`,
3376 :math:`|b| < a` if :math:`p \ge 0`,
3377 :math:`|b| \le a` if :math:`p < 0`.
3378 :math:`K_{p}(.)` denotes the modified Bessel function of the second
3379 kind and order :math:`p` (`scipy.special.kn`)
3381 `genhyperbolic` takes ``p`` as a tail parameter,
3382 ``a`` as a shape parameter,
3383 ``b`` as a skewness parameter.
3385 %(after_notes)s
3387 The original parameterization of the Generalized Hyperbolic Distribution
3388 is found in [1]_ as follows
3390 .. math::
3392 f(x, \lambda, \alpha, \beta, \delta, \mu) =
3393 \frac{(\gamma/\delta)^\lambda}{\sqrt{2\pi}K_\lambda(\delta \gamma)}
3394 e^{\beta (x - \mu)} \times \frac{K_{\lambda - 1/2}
3395 (\alpha \sqrt{\delta^2 + (x - \mu)^2})}
3396 {(\sqrt{\delta^2 + (x - \mu)^2} / \alpha)^{1/2 - \lambda}}
3398 for :math:`x \in ( - \infty; \infty)`,
3399 :math:`\gamma := \sqrt{\alpha^2 - \beta^2}`,
3400 :math:`\lambda, \mu \in ( - \infty; \infty)`,
3401 :math:`\delta \ge 0, |\beta| < \alpha` if :math:`\lambda \ge 0`,
3402 :math:`\delta > 0, |\beta| \le \alpha` if :math:`\lambda < 0`.
3404 The location-scale-based parameterization implemented in
3405 SciPy is based on [2]_, where :math:`a = \alpha\delta`,
3406 :math:`b = \beta\delta`, :math:`p = \lambda`,
3407 :math:`scale=\delta` and :math:`loc=\mu`
3409 Moments are implemented based on [3]_ and [4]_.
3411 For the distributions that are a special case such as Student's t,
3412 it is not recommended to rely on the implementation of genhyperbolic.
3413 To avoid potential numerical problems and for performance reasons,
3414 the methods of the specific distributions should be used.
3416 References
3417 ----------
3418 .. [1] O. Barndorff-Nielsen, "Hyperbolic Distributions and Distributions
3419 on Hyperbolae", Scandinavian Journal of Statistics, Vol. 5(3),
3420 pp. 151-157, 1978. https://www.jstor.org/stable/4615705
3422 .. [2] Eberlein E., Prause K. (2002) The Generalized Hyperbolic Model:
3423 Financial Derivatives and Risk Measures. In: Geman H., Madan D.,
3424 Pliska S.R., Vorst T. (eds) Mathematical Finance - Bachelier
3425 Congress 2000. Springer Finance. Springer, Berlin, Heidelberg.
3426 :doi:`10.1007/978-3-662-12429-1_12`
3428 .. [3] Scott, David J, Würtz, Diethelm, Dong, Christine and Tran,
3429 Thanh Tam, (2009), Moments of the generalized hyperbolic
3430 distribution, MPRA Paper, University Library of Munich, Germany,
3431 https://EconPapers.repec.org/RePEc:pra:mprapa:19081.
3433 .. [4] E. Eberlein and E. A. von Hammerstein. Generalized hyperbolic
3434 and inverse Gaussian distributions: Limiting cases and approximation
3435 of processes. FDM Preprint 80, April 2003. University of Freiburg.
3436 https://freidok.uni-freiburg.de/fedora/objects/freidok:7974/datastreams/FILE1/content
3438 %(example)s
3440 """
3442 def _argcheck(self, p, a, b):
3443 return (np.logical_and(np.abs(b) < a, p >= 0)
3444 | np.logical_and(np.abs(b) <= a, p < 0))
3446 def _shape_info(self):
3447 ip = _ShapeInfo("p", False, (-np.inf, np.inf), (False, False))
3448 ia = _ShapeInfo("a", False, (0, np.inf), (True, False))
3449 ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
3450 return [ip, ia, ib]
3452 def _fitstart(self, data):
3453 # Arbitrary, but the default a=b=1 is not valid
3454 return super()._fitstart(data, args=(1, 1, 0.5))
3456 def _logpdf(self, x, p, a, b):
3457 # kve instead of kv works better for large values of p
3458 # and smaller values of sqrt(a^2 - b^2)
3459 @np.vectorize
3460 def _logpdf_single(x, p, a, b):
3461 return _stats.genhyperbolic_logpdf(x, p, a, b)
3463 return _logpdf_single(x, p, a, b)
3465 def _pdf(self, x, p, a, b):
3466 # kve instead of kv works better for large values of p
3467 # and smaller values of sqrt(a^2 - b^2)
3468 @np.vectorize
3469 def _pdf_single(x, p, a, b):
3470 return _stats.genhyperbolic_pdf(x, p, a, b)
3472 return _pdf_single(x, p, a, b)
3474 def _cdf(self, x, p, a, b):
3476 @np.vectorize
3477 def _cdf_single(x, p, a, b):
3478 user_data = np.array(
3479 [p, a, b], float
3480 ).ctypes.data_as(ctypes.c_void_p)
3481 llc = LowLevelCallable.from_cython(
3482 _stats, '_genhyperbolic_pdf', user_data
3483 )
3485 t1 = integrate.quad(llc, -np.inf, x)[0]
3487 if np.isnan(t1):
3488 msg = ("Infinite values encountered in scipy.special.kve. "
3489 "Values replaced by NaN to avoid incorrect results.")
3490 warnings.warn(msg, RuntimeWarning)
3492 return t1
3494 return _cdf_single(x, p, a, b)
3496 def _rvs(self, p, a, b, size=None, random_state=None):
3497 # note: X = b * V + sqrt(V) * X has a
3498 # generalized hyperbolic distribution
3499 # if X is standard normal and V is
3500 # geninvgauss(p = p, b = t2, loc = loc, scale = t3)
3501 t1 = np.float_power(a, 2) - np.float_power(b, 2)
3502 # b in the GIG
3503 t2 = np.float_power(t1, 0.5)
3504 # scale in the GIG
3505 t3 = np.float_power(t1, - 0.5)
3506 gig = geninvgauss.rvs(
3507 p=p,
3508 b=t2,
3509 scale=t3,
3510 size=size,
3511 random_state=random_state
3512 )
3513 normst = norm.rvs(size=size, random_state=random_state)
3515 return b * gig + np.sqrt(gig) * normst
3517 def _stats(self, p, a, b):
3518 # https://mpra.ub.uni-muenchen.de/19081/1/MPRA_paper_19081.pdf
3519 # https://freidok.uni-freiburg.de/fedora/objects/freidok:7974/datastreams/FILE1/content
3520 # standardized moments
3521 p, a, b = np.broadcast_arrays(p, a, b)
3522 t1 = np.float_power(a, 2) - np.float_power(b, 2)
3523 t1 = np.float_power(t1, 0.5)
3524 t2 = np.float_power(1, 2) * np.float_power(t1, - 1)
3525 integers = np.linspace(0, 4, 5)
3526 # make integers perpendicular to existing dimensions
3527 integers = integers.reshape(integers.shape + (1,) * p.ndim)
3528 b0, b1, b2, b3, b4 = sc.kv(p + integers, t1)
3529 r1, r2, r3, r4 = [b / b0 for b in (b1, b2, b3, b4)]
3531 m = b * t2 * r1
3532 v = (
3533 t2 * r1 + np.float_power(b, 2) * np.float_power(t2, 2) *
3534 (r2 - np.float_power(r1, 2))
3535 )
3536 m3e = (
3537 np.float_power(b, 3) * np.float_power(t2, 3) *
3538 (r3 - 3 * b2 * b1 * np.float_power(b0, -2) +
3539 2 * np.float_power(r1, 3)) +
3540 3 * b * np.float_power(t2, 2) *
3541 (r2 - np.float_power(r1, 2))
3542 )
3543 s = m3e * np.float_power(v, - 3 / 2)
3544 m4e = (
3545 np.float_power(b, 4) * np.float_power(t2, 4) *
3546 (r4 - 4 * b3 * b1 * np.float_power(b0, - 2) +
3547 6 * b2 * np.float_power(b1, 2) * np.float_power(b0, - 3) -
3548 3 * np.float_power(r1, 4)) +
3549 np.float_power(b, 2) * np.float_power(t2, 3) *
3550 (6 * r3 - 12 * b2 * b1 * np.float_power(b0, - 2) +
3551 6 * np.float_power(r1, 3)) +
3552 3 * np.float_power(t2, 2) * r2
3553 )
3554 k = m4e * np.float_power(v, -2) - 3
3556 return m, v, s, k
3559genhyperbolic = genhyperbolic_gen(name='genhyperbolic')
3562class gompertz_gen(rv_continuous):
3563 r"""A Gompertz (or truncated Gumbel) continuous random variable.
3565 %(before_notes)s
3567 Notes
3568 -----
3569 The probability density function for `gompertz` is:
3571 .. math::
3573 f(x, c) = c \exp(x) \exp(-c (e^x-1))
3575 for :math:`x \ge 0`, :math:`c > 0`.
3577 `gompertz` takes ``c`` as a shape parameter for :math:`c`.
3579 %(after_notes)s
3581 %(example)s
3583 """
3584 def _shape_info(self):
3585 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
3587 def _pdf(self, x, c):
3588 # gompertz.pdf(x, c) = c * exp(x) * exp(-c*(exp(x)-1))
3589 return np.exp(self._logpdf(x, c))
3591 def _logpdf(self, x, c):
3592 return np.log(c) + x - c * sc.expm1(x)
3594 def _cdf(self, x, c):
3595 return -sc.expm1(-c * sc.expm1(x))
3597 def _ppf(self, q, c):
3598 return sc.log1p(-1.0 / c * sc.log1p(-q))
3600 def _entropy(self, c):
3601 return 1.0 - np.log(c) - np.exp(c)*sc.expn(1, c)
3604gompertz = gompertz_gen(a=0.0, name='gompertz')
3607def _average_with_log_weights(x, logweights):
3608 x = np.asarray(x)
3609 logweights = np.asarray(logweights)
3610 maxlogw = logweights.max()
3611 weights = np.exp(logweights - maxlogw)
3612 return np.average(x, weights=weights)
3615class gumbel_r_gen(rv_continuous):
3616 r"""A right-skewed Gumbel continuous random variable.
3618 %(before_notes)s
3620 See Also
3621 --------
3622 gumbel_l, gompertz, genextreme
3624 Notes
3625 -----
3626 The probability density function for `gumbel_r` is:
3628 .. math::
3630 f(x) = \exp(-(x + e^{-x}))
3632 The Gumbel distribution is sometimes referred to as a type I Fisher-Tippett
3633 distribution. It is also related to the extreme value distribution,
3634 log-Weibull and Gompertz distributions.
3636 %(after_notes)s
3638 %(example)s
3640 """
3641 def _shape_info(self):
3642 return []
3644 def _pdf(self, x):
3645 # gumbel_r.pdf(x) = exp(-(x + exp(-x)))
3646 return np.exp(self._logpdf(x))
3648 def _logpdf(self, x):
3649 return -x - np.exp(-x)
3651 def _cdf(self, x):
3652 return np.exp(-np.exp(-x))
3654 def _logcdf(self, x):
3655 return -np.exp(-x)
3657 def _ppf(self, q):
3658 return -np.log(-np.log(q))
3660 def _sf(self, x):
3661 return -sc.expm1(-np.exp(-x))
3663 def _isf(self, p):
3664 return -np.log(-np.log1p(-p))
3666 def _stats(self):
3667 return _EULER, np.pi*np.pi/6.0, 12*np.sqrt(6)/np.pi**3 * _ZETA3, 12.0/5
3669 def _entropy(self):
3670 # https://en.wikipedia.org/wiki/Gumbel_distribution
3671 return _EULER + 1.
3673 @_call_super_mom
3674 @inherit_docstring_from(rv_continuous)
3675 def fit(self, data, *args, **kwds):
3676 data, floc, fscale = _check_fit_input_parameters(self, data,
3677 args, kwds)
3679 # By the method of maximum likelihood, the estimators of the
3680 # location and scale are the roots of the equations defined in
3681 # `func` and the value of the expression for `loc` that follows.
3682 # The first `func` is a first order derivative of the log-likelihood
3683 # equation and the second is from Source: Statistical Distributions,
3684 # 3rd Edition. Evans, Hastings, and Peacock (2000), Page 101.
3686 def get_loc_from_scale(scale):
3687 return -scale * (sc.logsumexp(-data / scale) - np.log(len(data)))
3689 if fscale is not None:
3690 # if the scale is fixed, the location can be analytically
3691 # determined.
3692 scale = fscale
3693 loc = get_loc_from_scale(scale)
3694 else:
3695 # A different function is solved depending on whether the location
3696 # is fixed.
3697 if floc is not None:
3698 loc = floc
3700 # equation to use if the location is fixed.
3701 # note that one cannot use the equation in Evans, Hastings,
3702 # and Peacock (2000) (since it assumes that the derivative
3703 # w.r.t. the log-likelihood is zero). however, it is easy to
3704 # derive the MLE condition directly if loc is fixed
3705 def func(scale):
3706 term1 = (loc - data) * np.exp((loc - data) / scale) + data
3707 term2 = len(data) * (loc + scale)
3708 return term1.sum() - term2
3709 else:
3711 # equation to use if both location and scale are free
3712 def func(scale):
3713 sdata = -data / scale
3714 wavg = _average_with_log_weights(data, logweights=sdata)
3715 return data.mean() - wavg - scale
3717 # set brackets for `root_scalar` to use when optimizing over the
3718 # scale such that a root is likely between them. Use user supplied
3719 # guess or default 1.
3720 brack_start = kwds.get('scale', 1)
3721 lbrack, rbrack = brack_start / 2, brack_start * 2
3723 # if a root is not between the brackets, iteratively expand them
3724 # until they include a sign change, checking after each bracket is
3725 # modified.
3726 def interval_contains_root(lbrack, rbrack):
3727 # return true if the signs disagree.
3728 return (np.sign(func(lbrack)) !=
3729 np.sign(func(rbrack)))
3730 while (not interval_contains_root(lbrack, rbrack)
3731 and (lbrack > 0 or rbrack < np.inf)):
3732 lbrack /= 2
3733 rbrack *= 2
3735 res = optimize.root_scalar(func, bracket=(lbrack, rbrack),
3736 rtol=1e-14, xtol=1e-14)
3737 scale = res.root
3738 loc = floc if floc is not None else get_loc_from_scale(scale)
3739 return loc, scale
3742gumbel_r = gumbel_r_gen(name='gumbel_r')
3745class gumbel_l_gen(rv_continuous):
3746 r"""A left-skewed Gumbel continuous random variable.
3748 %(before_notes)s
3750 See Also
3751 --------
3752 gumbel_r, gompertz, genextreme
3754 Notes
3755 -----
3756 The probability density function for `gumbel_l` is:
3758 .. math::
3760 f(x) = \exp(x - e^x)
3762 The Gumbel distribution is sometimes referred to as a type I Fisher-Tippett
3763 distribution. It is also related to the extreme value distribution,
3764 log-Weibull and Gompertz distributions.
3766 %(after_notes)s
3768 %(example)s
3770 """
3772 def _shape_info(self):
3773 return []
3775 def _pdf(self, x):
3776 # gumbel_l.pdf(x) = exp(x - exp(x))
3777 return np.exp(self._logpdf(x))
3779 def _logpdf(self, x):
3780 return x - np.exp(x)
3782 def _cdf(self, x):
3783 return -sc.expm1(-np.exp(x))
3785 def _ppf(self, q):
3786 return np.log(-sc.log1p(-q))
3788 def _logsf(self, x):
3789 return -np.exp(x)
3791 def _sf(self, x):
3792 return np.exp(-np.exp(x))
3794 def _isf(self, x):
3795 return np.log(-np.log(x))
3797 def _stats(self):
3798 return -_EULER, np.pi*np.pi/6.0, \
3799 -12*np.sqrt(6)/np.pi**3 * _ZETA3, 12.0/5
3801 def _entropy(self):
3802 return _EULER + 1.
3804 @_call_super_mom
3805 @inherit_docstring_from(rv_continuous)
3806 def fit(self, data, *args, **kwds):
3807 # The fit method of `gumbel_r` can be used for this distribution with
3808 # small modifications. The process to do this is
3809 # 1. pass the sign negated data into `gumbel_r.fit`
3810 # - if the location is fixed, it should also be negated.
3811 # 2. negate the sign of the resulting location, leaving the scale
3812 # unmodified.
3813 # `gumbel_r.fit` holds necessary input checks.
3815 if kwds.get('floc') is not None:
3816 kwds['floc'] = -kwds['floc']
3817 loc_r, scale_r, = gumbel_r.fit(-np.asarray(data), *args, **kwds)
3818 return -loc_r, scale_r
3821gumbel_l = gumbel_l_gen(name='gumbel_l')
3824class halfcauchy_gen(rv_continuous):
3825 r"""A Half-Cauchy continuous random variable.
3827 %(before_notes)s
3829 Notes
3830 -----
3831 The probability density function for `halfcauchy` is:
3833 .. math::
3835 f(x) = \frac{2}{\pi (1 + x^2)}
3837 for :math:`x \ge 0`.
3839 %(after_notes)s
3841 %(example)s
3843 """
3844 def _shape_info(self):
3845 return []
3847 def _pdf(self, x):
3848 # halfcauchy.pdf(x) = 2 / (pi * (1 + x**2))
3849 return 2.0/np.pi/(1.0+x*x)
3851 def _logpdf(self, x):
3852 return np.log(2.0/np.pi) - sc.log1p(x*x)
3854 def _cdf(self, x):
3855 return 2.0/np.pi*np.arctan(x)
3857 def _ppf(self, q):
3858 return np.tan(np.pi/2*q)
3860 def _stats(self):
3861 return np.inf, np.inf, np.nan, np.nan
3863 def _entropy(self):
3864 return np.log(2*np.pi)
3867halfcauchy = halfcauchy_gen(a=0.0, name='halfcauchy')
3870class halflogistic_gen(rv_continuous):
3871 r"""A half-logistic continuous random variable.
3873 %(before_notes)s
3875 Notes
3876 -----
3877 The probability density function for `halflogistic` is:
3879 .. math::
3881 f(x) = \frac{ 2 e^{-x} }{ (1+e^{-x})^2 }
3882 = \frac{1}{2} \text{sech}(x/2)^2
3884 for :math:`x \ge 0`.
3886 %(after_notes)s
3888 %(example)s
3890 """
3891 def _shape_info(self):
3892 return []
3894 def _pdf(self, x):
3895 # halflogistic.pdf(x) = 2 * exp(-x) / (1+exp(-x))**2
3896 # = 1/2 * sech(x/2)**2
3897 return np.exp(self._logpdf(x))
3899 def _logpdf(self, x):
3900 return np.log(2) - x - 2. * sc.log1p(np.exp(-x))
3902 def _cdf(self, x):
3903 return np.tanh(x/2.0)
3905 def _ppf(self, q):
3906 return 2*np.arctanh(q)
3908 def _munp(self, n):
3909 if n == 1:
3910 return 2*np.log(2)
3911 if n == 2:
3912 return np.pi*np.pi/3.0
3913 if n == 3:
3914 return 9*_ZETA3
3915 if n == 4:
3916 return 7*np.pi**4 / 15.0
3917 return 2*(1-pow(2.0, 1-n))*sc.gamma(n+1)*sc.zeta(n, 1)
3919 def _entropy(self):
3920 return 2-np.log(2)
3923halflogistic = halflogistic_gen(a=0.0, name='halflogistic')
3926class halfnorm_gen(rv_continuous):
3927 r"""A half-normal continuous random variable.
3929 %(before_notes)s
3931 Notes
3932 -----
3933 The probability density function for `halfnorm` is:
3935 .. math::
3937 f(x) = \sqrt{2/\pi} \exp(-x^2 / 2)
3939 for :math:`x >= 0`.
3941 `halfnorm` is a special case of `chi` with ``df=1``.
3943 %(after_notes)s
3945 %(example)s
3947 """
3948 def _shape_info(self):
3949 return []
3951 def _rvs(self, size=None, random_state=None):
3952 return abs(random_state.standard_normal(size=size))
3954 def _pdf(self, x):
3955 # halfnorm.pdf(x) = sqrt(2/pi) * exp(-x**2/2)
3956 return np.sqrt(2.0/np.pi)*np.exp(-x*x/2.0)
3958 def _logpdf(self, x):
3959 return 0.5 * np.log(2.0/np.pi) - x*x/2.0
3961 def _cdf(self, x):
3962 return _norm_cdf(x)*2-1.0
3964 def _ppf(self, q):
3965 return sc.ndtri((1+q)/2.0)
3967 def _stats(self):
3968 return (np.sqrt(2.0/np.pi),
3969 1-2.0/np.pi,
3970 np.sqrt(2)*(4-np.pi)/(np.pi-2)**1.5,
3971 8*(np.pi-3)/(np.pi-2)**2)
3973 def _entropy(self):
3974 return 0.5*np.log(np.pi/2.0)+0.5
3977halfnorm = halfnorm_gen(a=0.0, name='halfnorm')
3980class hypsecant_gen(rv_continuous):
3981 r"""A hyperbolic secant continuous random variable.
3983 %(before_notes)s
3985 Notes
3986 -----
3987 The probability density function for `hypsecant` is:
3989 .. math::
3991 f(x) = \frac{1}{\pi} \text{sech}(x)
3993 for a real number :math:`x`.
3995 %(after_notes)s
3997 %(example)s
3999 """
4000 def _shape_info(self):
4001 return []
4003 def _pdf(self, x):
4004 # hypsecant.pdf(x) = 1/pi * sech(x)
4005 return 1.0/(np.pi*np.cosh(x))
4007 def _cdf(self, x):
4008 return 2.0/np.pi*np.arctan(np.exp(x))
4010 def _ppf(self, q):
4011 return np.log(np.tan(np.pi*q/2.0))
4013 def _stats(self):
4014 return 0, np.pi*np.pi/4, 0, 2
4016 def _entropy(self):
4017 return np.log(2*np.pi)
4020hypsecant = hypsecant_gen(name='hypsecant')
4023class gausshyper_gen(rv_continuous):
4024 r"""A Gauss hypergeometric continuous random variable.
4026 %(before_notes)s
4028 Notes
4029 -----
4030 The probability density function for `gausshyper` is:
4032 .. math::
4034 f(x, a, b, c, z) = C x^{a-1} (1-x)^{b-1} (1+zx)^{-c}
4036 for :math:`0 \le x \le 1`, :math:`a,b > 0`, :math:`c` a real number,
4037 :math:`z > -1`, and :math:`C = \frac{1}{B(a, b) F[2, 1](c, a; a+b; -z)}`.
4038 :math:`F[2, 1]` is the Gauss hypergeometric function
4039 `scipy.special.hyp2f1`.
4041 `gausshyper` takes :math:`a`, :math:`b`, :math:`c` and :math:`z` as shape
4042 parameters.
4044 %(after_notes)s
4046 References
4047 ----------
4048 .. [1] Armero, C., and M. J. Bayarri. "Prior Assessments for Prediction in
4049 Queues." *Journal of the Royal Statistical Society*. Series D (The
4050 Statistician) 43, no. 1 (1994): 139-53. doi:10.2307/2348939
4052 %(example)s
4054 """
4056 def _argcheck(self, a, b, c, z):
4057 # z > -1 per gh-10134
4058 return (a > 0) & (b > 0) & (c == c) & (z > -1)
4060 def _shape_info(self):
4061 ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
4062 ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
4063 ic = _ShapeInfo("c", False, (-np.inf, np.inf), (False, False))
4064 iz = _ShapeInfo("z", False, (-1, np.inf), (False, False))
4065 return [ia, ib, ic, iz]
4067 def _pdf(self, x, a, b, c, z):
4068 # gausshyper.pdf(x, a, b, c, z) =
4069 # C * x**(a-1) * (1-x)**(b-1) * (1+z*x)**(-c)
4070 Cinv = sc.gamma(a)*sc.gamma(b)/sc.gamma(a+b)*sc.hyp2f1(c, a, a+b, -z)
4071 return 1.0/Cinv * x**(a-1.0) * (1.0-x)**(b-1.0) / (1.0+z*x)**c
4073 def _munp(self, n, a, b, c, z):
4074 fac = sc.beta(n+a, b) / sc.beta(a, b)
4075 num = sc.hyp2f1(c, a+n, a+b+n, -z)
4076 den = sc.hyp2f1(c, a, a+b, -z)
4077 return fac*num / den
4080gausshyper = gausshyper_gen(a=0.0, b=1.0, name='gausshyper')
4083class invgamma_gen(rv_continuous):
4084 r"""An inverted gamma continuous random variable.
4086 %(before_notes)s
4088 Notes
4089 -----
4090 The probability density function for `invgamma` is:
4092 .. math::
4094 f(x, a) = \frac{x^{-a-1}}{\Gamma(a)} \exp(-\frac{1}{x})
4096 for :math:`x >= 0`, :math:`a > 0`. :math:`\Gamma` is the gamma function
4097 (`scipy.special.gamma`).
4099 `invgamma` takes ``a`` as a shape parameter for :math:`a`.
4101 `invgamma` is a special case of `gengamma` with ``c=-1``, and it is a
4102 different parameterization of the scaled inverse chi-squared distribution.
4103 Specifically, if the scaled inverse chi-squared distribution is
4104 parameterized with degrees of freedom :math:`\nu` and scaling parameter
4105 :math:`\tau^2`, then it can be modeled using `invgamma` with
4106 ``a=`` :math:`\nu/2` and ``scale=`` :math:`\nu \tau^2/2`.
4108 %(after_notes)s
4110 %(example)s
4112 """
4113 _support_mask = rv_continuous._open_support_mask
4115 def _shape_info(self):
4116 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
4118 def _pdf(self, x, a):
4119 # invgamma.pdf(x, a) = x**(-a-1) / gamma(a) * exp(-1/x)
4120 return np.exp(self._logpdf(x, a))
4122 def _logpdf(self, x, a):
4123 return -(a+1) * np.log(x) - sc.gammaln(a) - 1.0/x
4125 def _cdf(self, x, a):
4126 return sc.gammaincc(a, 1.0 / x)
4128 def _ppf(self, q, a):
4129 return 1.0 / sc.gammainccinv(a, q)
4131 def _sf(self, x, a):
4132 return sc.gammainc(a, 1.0 / x)
4134 def _isf(self, q, a):
4135 return 1.0 / sc.gammaincinv(a, q)
4137 def _stats(self, a, moments='mvsk'):
4138 m1 = _lazywhere(a > 1, (a,), lambda x: 1. / (x - 1.), np.inf)
4139 m2 = _lazywhere(a > 2, (a,), lambda x: 1. / (x - 1.)**2 / (x - 2.),
4140 np.inf)
4142 g1, g2 = None, None
4143 if 's' in moments:
4144 g1 = _lazywhere(
4145 a > 3, (a,),
4146 lambda x: 4. * np.sqrt(x - 2.) / (x - 3.), np.nan)
4147 if 'k' in moments:
4148 g2 = _lazywhere(
4149 a > 4, (a,),
4150 lambda x: 6. * (5. * x - 11.) / (x - 3.) / (x - 4.), np.nan)
4151 return m1, m2, g1, g2
4153 def _entropy(self, a):
4154 return a - (a+1.0) * sc.psi(a) + sc.gammaln(a)
4157invgamma = invgamma_gen(a=0.0, name='invgamma')
4160class invgauss_gen(rv_continuous):
4161 r"""An inverse Gaussian continuous random variable.
4163 %(before_notes)s
4165 Notes
4166 -----
4167 The probability density function for `invgauss` is:
4169 .. math::
4171 f(x, \mu) = \frac{1}{\sqrt{2 \pi x^3}}
4172 \exp(-\frac{(x-\mu)^2}{2 x \mu^2})
4174 for :math:`x >= 0` and :math:`\mu > 0`.
4176 `invgauss` takes ``mu`` as a shape parameter for :math:`\mu`.
4178 %(after_notes)s
4180 %(example)s
4182 """
4183 _support_mask = rv_continuous._open_support_mask
4185 def _shape_info(self):
4186 return [_ShapeInfo("mu", False, (0, np.inf), (False, False))]
4188 def _rvs(self, mu, size=None, random_state=None):
4189 return random_state.wald(mu, 1.0, size=size)
4191 def _pdf(self, x, mu):
4192 # invgauss.pdf(x, mu) =
4193 # 1 / sqrt(2*pi*x**3) * exp(-(x-mu)**2/(2*x*mu**2))
4194 return 1.0/np.sqrt(2*np.pi*x**3.0)*np.exp(-1.0/(2*x)*((x-mu)/mu)**2)
4196 def _logpdf(self, x, mu):
4197 return -0.5*np.log(2*np.pi) - 1.5*np.log(x) - ((x-mu)/mu)**2/(2*x)
4199 # approach adapted from equations in
4200 # https://journal.r-project.org/archive/2016-1/giner-smyth.pdf,
4201 # not R code. see gh-13616
4203 def _logcdf(self, x, mu):
4204 fac = 1 / np.sqrt(x)
4205 a = _norm_logcdf(fac * ((x / mu) - 1))
4206 b = 2 / mu + _norm_logcdf(-fac * ((x / mu) + 1))
4207 return a + np.log1p(np.exp(b - a))
4209 def _logsf(self, x, mu):
4210 fac = 1 / np.sqrt(x)
4211 a = _norm_logsf(fac * ((x / mu) - 1))
4212 b = 2 / mu + _norm_logcdf(-fac * (x + mu) / mu)
4213 return a + np.log1p(-np.exp(b - a))
4215 def _sf(self, x, mu):
4216 return np.exp(self._logsf(x, mu))
4218 def _cdf(self, x, mu):
4219 return np.exp(self._logcdf(x, mu))
4221 def _ppf(self, x, mu):
4222 with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
4223 x, mu = np.broadcast_arrays(x, mu)
4224 ppf = _boost._invgauss_ppf(x, mu, 1)
4225 i_wt = x > 0.5 # "wrong tail" - sometimes too inaccurate
4226 ppf[i_wt] = _boost._invgauss_isf(1-x[i_wt], mu[i_wt], 1)
4227 i_nan = np.isnan(ppf)
4228 ppf[i_nan] = super()._ppf(x[i_nan], mu[i_nan])
4229 return ppf
4231 def _isf(self, x, mu):
4232 with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
4233 x, mu = np.broadcast_arrays(x, mu)
4234 isf = _boost._invgauss_isf(x, mu, 1)
4235 i_wt = x > 0.5 # "wrong tail" - sometimes too inaccurate
4236 isf[i_wt] = _boost._invgauss_ppf(1-x[i_wt], mu[i_wt], 1)
4237 i_nan = np.isnan(isf)
4238 isf[i_nan] = super()._isf(x[i_nan], mu[i_nan])
4239 return isf
4241 def _stats(self, mu):
4242 return mu, mu**3.0, 3*np.sqrt(mu), 15*mu
4244 @inherit_docstring_from(rv_continuous)
4245 def fit(self, data, *args, **kwds):
4246 method = kwds.get('method', 'mle')
4248 if type(self) == wald_gen or method.lower() == 'mm':
4249 return super().fit(data, *args, **kwds)
4251 data, fshape_s, floc, fscale = _check_fit_input_parameters(self, data,
4252 args, kwds)
4253 '''
4254 Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
4255 and Peacock (2000), Page 121. Their shape parameter is equivilent to
4256 SciPy's with the conversion `fshape_s = fshape / scale`.
4258 MLE formulas are not used in 3 condtions:
4259 - `loc` is not fixed
4260 - `mu` is fixed
4261 These cases fall back on the superclass fit method.
4262 - `loc` is fixed but translation results in negative data raises
4263 a `FitDataError`.
4264 '''
4265 if floc is None or fshape_s is not None:
4266 return super().fit(data, *args, **kwds)
4267 elif np.any(data - floc < 0):
4268 raise FitDataError("invgauss", lower=0, upper=np.inf)
4269 else:
4270 data = data - floc
4271 fshape_n = np.mean(data)
4272 if fscale is None:
4273 fscale = len(data) / (np.sum(data ** -1 - fshape_n ** -1))
4274 fshape_s = fshape_n / fscale
4275 return fshape_s, floc, fscale
4278invgauss = invgauss_gen(a=0.0, name='invgauss')
4281class geninvgauss_gen(rv_continuous):
4282 r"""A Generalized Inverse Gaussian continuous random variable.
4284 %(before_notes)s
4286 Notes
4287 -----
4288 The probability density function for `geninvgauss` is:
4290 .. math::
4292 f(x, p, b) = x^{p-1} \exp(-b (x + 1/x) / 2) / (2 K_p(b))
4294 where `x > 0`, `p` is a real number and `b > 0`([1]_).
4295 :math:`K_p` is the modified Bessel function of second kind of order `p`
4296 (`scipy.special.kv`).
4298 %(after_notes)s
4300 The inverse Gaussian distribution `stats.invgauss(mu)` is a special case of
4301 `geninvgauss` with `p = -1/2`, `b = 1 / mu` and `scale = mu`.
4303 Generating random variates is challenging for this distribution. The
4304 implementation is based on [2]_.
4306 References
4307 ----------
4308 .. [1] O. Barndorff-Nielsen, P. Blaesild, C. Halgreen, "First hitting time
4309 models for the generalized inverse gaussian distribution",
4310 Stochastic Processes and their Applications 7, pp. 49--54, 1978.
4312 .. [2] W. Hoermann and J. Leydold, "Generating generalized inverse Gaussian
4313 random variates", Statistics and Computing, 24(4), p. 547--557, 2014.
4315 %(example)s
4317 """
4318 def _argcheck(self, p, b):
4319 return (p == p) & (b > 0)
4321 def _shape_info(self):
4322 ip = _ShapeInfo("p", False, (-np.inf, np.inf), (False, False))
4323 ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
4324 return [ip, ib]
4326 def _logpdf(self, x, p, b):
4327 # kve instead of kv works better for large values of b
4328 # warn if kve produces infinite values and replace by nan
4329 # otherwise c = -inf and the results are often incorrect
4330 @np.vectorize
4331 def logpdf_single(x, p, b):
4332 return _stats.geninvgauss_logpdf(x, p, b)
4334 z = logpdf_single(x, p, b)
4335 if np.isnan(z).any():
4336 msg = ("Infinite values encountered in scipy.special.kve(p, b). "
4337 "Values replaced by NaN to avoid incorrect results.")
4338 warnings.warn(msg, RuntimeWarning)
4339 return z
4341 def _pdf(self, x, p, b):
4342 # relying on logpdf avoids overflow of x**(p-1) for large x and p
4343 return np.exp(self._logpdf(x, p, b))
4345 def _cdf(self, x, *args):
4346 _a, _b = self._get_support(*args)
4348 @np.vectorize
4349 def _cdf_single(x, *args):
4350 p, b = args
4351 user_data = np.array([p, b], float).ctypes.data_as(ctypes.c_void_p)
4352 llc = LowLevelCallable.from_cython(_stats, '_geninvgauss_pdf',
4353 user_data)
4355 return integrate.quad(llc, _a, x)[0]
4357 return _cdf_single(x, *args)
4359 def _logquasipdf(self, x, p, b):
4360 # log of the quasi-density (w/o normalizing constant) used in _rvs
4361 return _lazywhere(x > 0, (x, p, b),
4362 lambda x, p, b: (p - 1)*np.log(x) - b*(x + 1/x)/2,
4363 -np.inf)
4365 def _rvs(self, p, b, size=None, random_state=None):
4366 # if p and b are scalar, use _rvs_scalar, otherwise need to create
4367 # output by iterating over parameters
4368 if np.isscalar(p) and np.isscalar(b):
4369 out = self._rvs_scalar(p, b, size, random_state)
4370 elif p.size == 1 and b.size == 1:
4371 out = self._rvs_scalar(p.item(), b.item(), size, random_state)
4372 else:
4373 # When this method is called, size will be a (possibly empty)
4374 # tuple of integers. It will not be None; if `size=None` is passed
4375 # to `rvs()`, size will be the empty tuple ().
4377 p, b = np.broadcast_arrays(p, b)
4378 # p and b now have the same shape.
4380 # `shp` is the shape of the blocks of random variates that are
4381 # generated for each combination of parameters associated with
4382 # broadcasting p and b.
4383 # bc is a tuple the same lenth as size. The values
4384 # in bc are bools. If bc[j] is True, it means that
4385 # entire axis is filled in for a given combination of the
4386 # broadcast arguments.
4387 shp, bc = _check_shape(p.shape, size)
4389 # `numsamples` is the total number of variates to be generated
4390 # for each combination of the input arguments.
4391 numsamples = int(np.prod(shp))
4393 # `out` is the array to be returned. It is filled in the
4394 # loop below.
4395 out = np.empty(size)
4397 it = np.nditer([p, b],
4398 flags=['multi_index'],
4399 op_flags=[['readonly'], ['readonly']])
4400 while not it.finished:
4401 # Convert the iterator's multi_index into an index into the
4402 # `out` array where the call to _rvs_scalar() will be stored.
4403 # Where bc is True, we use a full slice; otherwise we use the
4404 # index value from it.multi_index. len(it.multi_index) might
4405 # be less than len(bc), and in that case we want to align these
4406 # two sequences to the right, so the loop variable j runs from
4407 # -len(size) to 0. This doesn't cause an IndexError, as
4408 # bc[j] will be True in those cases where it.multi_index[j]
4409 # would cause an IndexError.
4410 idx = tuple((it.multi_index[j] if not bc[j] else slice(None))
4411 for j in range(-len(size), 0))
4412 out[idx] = self._rvs_scalar(it[0], it[1], numsamples,
4413 random_state).reshape(shp)
4414 it.iternext()
4416 if size == ():
4417 out = out.item()
4418 return out
4420 def _rvs_scalar(self, p, b, numsamples, random_state):
4421 # following [2], the quasi-pdf is used instead of the pdf for the
4422 # generation of rvs
4423 invert_res = False
4424 if not numsamples:
4425 numsamples = 1
4426 if p < 0:
4427 # note: if X is geninvgauss(p, b), then 1/X is geninvgauss(-p, b)
4428 p = -p
4429 invert_res = True
4430 m = self._mode(p, b)
4432 # determine method to be used following [2]
4433 ratio_unif = True
4434 if p >= 1 or b > 1:
4435 # ratio of uniforms with mode shift below
4436 mode_shift = True
4437 elif b >= min(0.5, 2 * np.sqrt(1 - p) / 3):
4438 # ratio of uniforms without mode shift below
4439 mode_shift = False
4440 else:
4441 # new algorithm in [2]
4442 ratio_unif = False
4444 # prepare sampling of rvs
4445 size1d = tuple(np.atleast_1d(numsamples))
4446 N = np.prod(size1d) # number of rvs needed, reshape upon return
4447 x = np.zeros(N)
4448 simulated = 0
4450 if ratio_unif:
4451 # use ratio of uniforms method
4452 if mode_shift:
4453 a2 = -2 * (p + 1) / b - m
4454 a1 = 2 * m * (p - 1) / b - 1
4455 # find roots of x**3 + a2*x**2 + a1*x + m (Cardano's formula)
4456 p1 = a1 - a2**2 / 3
4457 q1 = 2 * a2**3 / 27 - a2 * a1 / 3 + m
4458 phi = np.arccos(-q1 * np.sqrt(-27 / p1**3) / 2)
4459 s1 = -np.sqrt(-4 * p1 / 3)
4460 root1 = s1 * np.cos(phi / 3 + np.pi / 3) - a2 / 3
4461 root2 = -s1 * np.cos(phi / 3) - a2 / 3
4462 # root3 = s1 * np.cos(phi / 3 - np.pi / 3) - a2 / 3
4464 # if g is the quasipdf, rescale: g(x) / g(m) which we can write
4465 # as exp(log(g(x)) - log(g(m))). This is important
4466 # since for large values of p and b, g cannot be evaluated.
4467 # denote the rescaled quasipdf by h
4468 lm = self._logquasipdf(m, p, b)
4469 d1 = self._logquasipdf(root1, p, b) - lm
4470 d2 = self._logquasipdf(root2, p, b) - lm
4471 # compute the bounding rectangle w.r.t. h. Note that
4472 # np.exp(0.5*d1) = np.sqrt(g(root1)/g(m)) = np.sqrt(h(root1))
4473 vmin = (root1 - m) * np.exp(0.5 * d1)
4474 vmax = (root2 - m) * np.exp(0.5 * d2)
4475 umax = 1 # umax = sqrt(h(m)) = 1
4477 logqpdf = lambda x: self._logquasipdf(x, p, b) - lm
4478 c = m
4479 else:
4480 # ratio of uniforms without mode shift
4481 # compute np.sqrt(quasipdf(m))
4482 umax = np.exp(0.5*self._logquasipdf(m, p, b))
4483 xplus = ((1 + p) + np.sqrt((1 + p)**2 + b**2))/b
4484 vmin = 0
4485 # compute xplus * np.sqrt(quasipdf(xplus))
4486 vmax = xplus * np.exp(0.5 * self._logquasipdf(xplus, p, b))
4487 c = 0
4488 logqpdf = lambda x: self._logquasipdf(x, p, b)
4490 if vmin >= vmax:
4491 raise ValueError("vmin must be smaller than vmax.")
4492 if umax <= 0:
4493 raise ValueError("umax must be positive.")
4495 i = 1
4496 while simulated < N:
4497 k = N - simulated
4498 # simulate uniform rvs on [0, umax] and [vmin, vmax]
4499 u = umax * random_state.uniform(size=k)
4500 v = random_state.uniform(size=k)
4501 v = vmin + (vmax - vmin) * v
4502 rvs = v / u + c
4503 # rewrite acceptance condition u**2 <= pdf(rvs) by taking logs
4504 accept = (2*np.log(u) <= logqpdf(rvs))
4505 num_accept = np.sum(accept)
4506 if num_accept > 0:
4507 x[simulated:(simulated + num_accept)] = rvs[accept]
4508 simulated += num_accept
4510 if (simulated == 0) and (i*N >= 50000):
4511 msg = ("Not a single random variate could be generated "
4512 "in {} attempts. Sampling does not appear to "
4513 "work for the provided parameters.".format(i*N))
4514 raise RuntimeError(msg)
4515 i += 1
4516 else:
4517 # use new algorithm in [2]
4518 x0 = b / (1 - p)
4519 xs = np.max((x0, 2 / b))
4520 k1 = np.exp(self._logquasipdf(m, p, b))
4521 A1 = k1 * x0
4522 if x0 < 2 / b:
4523 k2 = np.exp(-b)
4524 if p > 0:
4525 A2 = k2 * ((2 / b)**p - x0**p) / p
4526 else:
4527 A2 = k2 * np.log(2 / b**2)
4528 else:
4529 k2, A2 = 0, 0
4530 k3 = xs**(p - 1)
4531 A3 = 2 * k3 * np.exp(-xs * b / 2) / b
4532 A = A1 + A2 + A3
4534 # [2]: rejection constant is < 2.73; so expected runtime is finite
4535 while simulated < N:
4536 k = N - simulated
4537 h, rvs = np.zeros(k), np.zeros(k)
4538 # simulate uniform rvs on [x1, x2] and [0, y2]
4539 u = random_state.uniform(size=k)
4540 v = A * random_state.uniform(size=k)
4541 cond1 = v <= A1
4542 cond2 = np.logical_not(cond1) & (v <= A1 + A2)
4543 cond3 = np.logical_not(cond1 | cond2)
4544 # subdomain (0, x0)
4545 rvs[cond1] = x0 * v[cond1] / A1
4546 h[cond1] = k1
4547 # subdomain (x0, 2 / b)
4548 if p > 0:
4549 rvs[cond2] = (x0**p + (v[cond2] - A1) * p / k2)**(1 / p)
4550 else:
4551 rvs[cond2] = b * np.exp((v[cond2] - A1) * np.exp(b))
4552 h[cond2] = k2 * rvs[cond2]**(p - 1)
4553 # subdomain (xs, infinity)
4554 z = np.exp(-xs * b / 2) - b * (v[cond3] - A1 - A2) / (2 * k3)
4555 rvs[cond3] = -2 / b * np.log(z)
4556 h[cond3] = k3 * np.exp(-rvs[cond3] * b / 2)
4557 # apply rejection method
4558 accept = (np.log(u * h) <= self._logquasipdf(rvs, p, b))
4559 num_accept = sum(accept)
4560 if num_accept > 0:
4561 x[simulated:(simulated + num_accept)] = rvs[accept]
4562 simulated += num_accept
4564 rvs = np.reshape(x, size1d)
4565 if invert_res:
4566 rvs = 1 / rvs
4567 return rvs
4569 def _mode(self, p, b):
4570 # distinguish cases to avoid catastrophic cancellation (see [2])
4571 if p < 1:
4572 return b / (np.sqrt((p - 1)**2 + b**2) + 1 - p)
4573 else:
4574 return (np.sqrt((1 - p)**2 + b**2) - (1 - p)) / b
4576 def _munp(self, n, p, b):
4577 num = sc.kve(p + n, b)
4578 denom = sc.kve(p, b)
4579 inf_vals = np.isinf(num) | np.isinf(denom)
4580 if inf_vals.any():
4581 msg = ("Infinite values encountered in the moment calculation "
4582 "involving scipy.special.kve. Values replaced by NaN to "
4583 "avoid incorrect results.")
4584 warnings.warn(msg, RuntimeWarning)
4585 m = np.full_like(num, np.nan, dtype=np.double)
4586 m[~inf_vals] = num[~inf_vals] / denom[~inf_vals]
4587 else:
4588 m = num / denom
4589 return m
4592geninvgauss = geninvgauss_gen(a=0.0, name="geninvgauss")
4595class norminvgauss_gen(rv_continuous):
4596 r"""A Normal Inverse Gaussian continuous random variable.
4598 %(before_notes)s
4600 Notes
4601 -----
4602 The probability density function for `norminvgauss` is:
4604 .. math::
4606 f(x, a, b) = \frac{a \, K_1(a \sqrt{1 + x^2})}{\pi \sqrt{1 + x^2}} \,
4607 \exp(\sqrt{a^2 - b^2} + b x)
4609 where :math:`x` is a real number, the parameter :math:`a` is the tail
4610 heaviness and :math:`b` is the asymmetry parameter satisfying
4611 :math:`a > 0` and :math:`|b| <= a`.
4612 :math:`K_1` is the modified Bessel function of second kind
4613 (`scipy.special.k1`).
4615 %(after_notes)s
4617 A normal inverse Gaussian random variable `Y` with parameters `a` and `b`
4618 can be expressed as a normal mean-variance mixture:
4619 `Y = b * V + sqrt(V) * X` where `X` is `norm(0,1)` and `V` is
4620 `invgauss(mu=1/sqrt(a**2 - b**2))`. This representation is used
4621 to generate random variates.
4623 Another common parametrization of the distribution (see Equation 2.1 in
4624 [2]_) is given by the following expression of the pdf:
4626 .. math::
4628 g(x, \alpha, \beta, \delta, \mu) =
4629 \frac{\alpha\delta K_1\left(\alpha\sqrt{\delta^2 + (x - \mu)^2}\right)}
4630 {\pi \sqrt{\delta^2 + (x - \mu)^2}} \,
4631 e^{\delta \sqrt{\alpha^2 - \beta^2} + \beta (x - \mu)}
4633 In SciPy, this corresponds to
4634 `a = alpha * delta, b = beta * delta, loc = mu, scale=delta`.
4636 References
4637 ----------
4638 .. [1] O. Barndorff-Nielsen, "Hyperbolic Distributions and Distributions on
4639 Hyperbolae", Scandinavian Journal of Statistics, Vol. 5(3),
4640 pp. 151-157, 1978.
4642 .. [2] O. Barndorff-Nielsen, "Normal Inverse Gaussian Distributions and
4643 Stochastic Volatility Modelling", Scandinavian Journal of
4644 Statistics, Vol. 24, pp. 1-13, 1997.
4646 %(example)s
4648 """
4649 _support_mask = rv_continuous._open_support_mask
4651 def _argcheck(self, a, b):
4652 return (a > 0) & (np.absolute(b) < a)
4654 def _shape_info(self):
4655 ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
4656 ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
4657 return [ia, ib]
4659 def _fitstart(self, data):
4660 # Arbitrary, but the default a=b=1 is not valid
4661 return super()._fitstart(data, args=(1, 0.5))
4663 def _pdf(self, x, a, b):
4664 gamma = np.sqrt(a**2 - b**2)
4665 fac1 = a / np.pi * np.exp(gamma)
4666 sq = np.hypot(1, x) # reduce overflows
4667 return fac1 * sc.k1e(a * sq) * np.exp(b*x - a*sq) / sq
4669 def _sf(self, x, a, b):
4670 if np.isscalar(x):
4671 # If x is a scalar, then so are a and b.
4672 return integrate.quad(self._pdf, x, np.inf, args=(a, b))[0]
4673 else:
4674 result = []
4675 for (x0, a0, b0) in zip(x, a, b):
4676 result.append(integrate.quad(self._pdf, x0, np.inf,
4677 args=(a0, b0))[0])
4678 return np.array(result)
4680 def _isf(self, q, a, b):
4681 def _isf_scalar(q, a, b):
4683 def eq(x, a, b, q):
4684 # Solve eq(x, a, b, q) = 0 to obtain isf(x, a, b) = q.
4685 return self._sf(x, a, b) - q
4687 # Find a bracketing interval for the root.
4688 # Start at the mean, and grow the length of the interval
4689 # by 2 each iteration until there is a sign change in eq.
4690 xm = self.mean(a, b)
4691 em = eq(xm, a, b, q)
4692 if em == 0:
4693 # Unlikely, but might as well check.
4694 return xm
4695 if em > 0:
4696 delta = 1
4697 left = xm
4698 right = xm + delta
4699 while eq(right, a, b, q) > 0:
4700 delta = 2*delta
4701 right = xm + delta
4702 else:
4703 # em < 0
4704 delta = 1
4705 right = xm
4706 left = xm - delta
4707 while eq(left, a, b, q) < 0:
4708 delta = 2*delta
4709 left = xm - delta
4710 result = optimize.brentq(eq, left, right, args=(a, b, q),
4711 xtol=self.xtol)
4712 return result
4714 if np.isscalar(q):
4715 return _isf_scalar(q, a, b)
4716 else:
4717 result = []
4718 for (q0, a0, b0) in zip(q, a, b):
4719 result.append(_isf_scalar(q0, a0, b0))
4720 return np.array(result)
4722 def _rvs(self, a, b, size=None, random_state=None):
4723 # note: X = b * V + sqrt(V) * X is norminvgaus(a,b) if X is standard
4724 # normal and V is invgauss(mu=1/sqrt(a**2 - b**2))
4725 gamma = np.sqrt(a**2 - b**2)
4726 ig = invgauss.rvs(mu=1/gamma, size=size, random_state=random_state)
4727 return b * ig + np.sqrt(ig) * norm.rvs(size=size,
4728 random_state=random_state)
4730 def _stats(self, a, b):
4731 gamma = np.sqrt(a**2 - b**2)
4732 mean = b / gamma
4733 variance = a**2 / gamma**3
4734 skewness = 3.0 * b / (a * np.sqrt(gamma))
4735 kurtosis = 3.0 * (1 + 4 * b**2 / a**2) / gamma
4736 return mean, variance, skewness, kurtosis
4739norminvgauss = norminvgauss_gen(name="norminvgauss")
4742class invweibull_gen(rv_continuous):
4743 """An inverted Weibull continuous random variable.
4745 This distribution is also known as the Fréchet distribution or the
4746 type II extreme value distribution.
4748 %(before_notes)s
4750 Notes
4751 -----
4752 The probability density function for `invweibull` is:
4754 .. math::
4756 f(x, c) = c x^{-c-1} \\exp(-x^{-c})
4758 for :math:`x > 0`, :math:`c > 0`.
4760 `invweibull` takes ``c`` as a shape parameter for :math:`c`.
4762 %(after_notes)s
4764 References
4765 ----------
4766 F.R.S. de Gusmao, E.M.M Ortega and G.M. Cordeiro, "The generalized inverse
4767 Weibull distribution", Stat. Papers, vol. 52, pp. 591-619, 2011.
4769 %(example)s
4771 """
4772 _support_mask = rv_continuous._open_support_mask
4774 def _shape_info(self):
4775 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
4777 def _pdf(self, x, c):
4778 # invweibull.pdf(x, c) = c * x**(-c-1) * exp(-x**(-c))
4779 xc1 = np.power(x, -c - 1.0)
4780 xc2 = np.power(x, -c)
4781 xc2 = np.exp(-xc2)
4782 return c * xc1 * xc2
4784 def _cdf(self, x, c):
4785 xc1 = np.power(x, -c)
4786 return np.exp(-xc1)
4788 def _sf(self, x, c):
4789 return -np.expm1(-x**-c)
4791 def _ppf(self, q, c):
4792 return np.power(-np.log(q), -1.0/c)
4794 def _isf(self, p, c):
4795 return (-np.log1p(-p))**(-1/c)
4797 def _munp(self, n, c):
4798 return sc.gamma(1 - n / c)
4800 def _entropy(self, c):
4801 return 1+_EULER + _EULER / c - np.log(c)
4803 def _fitstart(self, data, args=None):
4804 # invweibull requires c > 1 for the first moment to exist, so use 2.0
4805 args = (2.0,) if args is None else args
4806 return super(invweibull_gen, self)._fitstart(data, args=args)
4809invweibull = invweibull_gen(a=0, name='invweibull')
4812class johnsonsb_gen(rv_continuous):
4813 r"""A Johnson SB continuous random variable.
4815 %(before_notes)s
4817 See Also
4818 --------
4819 johnsonsu
4821 Notes
4822 -----
4823 The probability density function for `johnsonsb` is:
4825 .. math::
4827 f(x, a, b) = \frac{b}{x(1-x)} \phi(a + b \log \frac{x}{1-x} )
4829 where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`
4830 and :math:`x \in [0,1]`. :math:`\phi` is the pdf of the normal
4831 distribution.
4833 `johnsonsb` takes :math:`a` and :math:`b` as shape parameters.
4835 %(after_notes)s
4837 %(example)s
4839 """
4840 _support_mask = rv_continuous._open_support_mask
4842 def _argcheck(self, a, b):
4843 return (b > 0) & (a == a)
4845 def _shape_info(self):
4846 ia = _ShapeInfo("a", False, (-np.inf, np.inf), (False, False))
4847 ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
4848 return [ia, ib]
4850 def _pdf(self, x, a, b):
4851 # johnsonsb.pdf(x, a, b) = b / (x*(1-x)) * phi(a + b * log(x/(1-x)))
4852 trm = _norm_pdf(a + b*np.log(x/(1.0-x)))
4853 return b*1.0/(x*(1-x))*trm
4855 def _cdf(self, x, a, b):
4856 return _norm_cdf(a + b*np.log(x/(1.0-x)))
4858 def _ppf(self, q, a, b):
4859 return 1.0 / (1 + np.exp(-1.0 / b * (_norm_ppf(q) - a)))
4862johnsonsb = johnsonsb_gen(a=0.0, b=1.0, name='johnsonsb')
4865class johnsonsu_gen(rv_continuous):
4866 r"""A Johnson SU continuous random variable.
4868 %(before_notes)s
4870 See Also
4871 --------
4872 johnsonsb
4874 Notes
4875 -----
4876 The probability density function for `johnsonsu` is:
4878 .. math::
4880 f(x, a, b) = \frac{b}{\sqrt{x^2 + 1}}
4881 \phi(a + b \log(x + \sqrt{x^2 + 1}))
4883 where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`.
4884 :math:`\phi` is the pdf of the normal distribution.
4886 `johnsonsu` takes :math:`a` and :math:`b` as shape parameters.
4888 %(after_notes)s
4890 %(example)s
4892 """
4893 def _argcheck(self, a, b):
4894 return (b > 0) & (a == a)
4896 def _shape_info(self):
4897 ia = _ShapeInfo("a", False, (-np.inf, np.inf), (False, False))
4898 ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
4899 return [ia, ib]
4901 def _pdf(self, x, a, b):
4902 # johnsonsu.pdf(x, a, b) = b / sqrt(x**2 + 1) *
4903 # phi(a + b * log(x + sqrt(x**2 + 1)))
4904 x2 = x*x
4905 trm = _norm_pdf(a + b * np.log(x + np.sqrt(x2+1)))
4906 return b*1.0/np.sqrt(x2+1.0)*trm
4908 def _cdf(self, x, a, b):
4909 return _norm_cdf(a + b * np.log(x + np.sqrt(x*x + 1)))
4911 def _ppf(self, q, a, b):
4912 return np.sinh((_norm_ppf(q) - a) / b)
4915johnsonsu = johnsonsu_gen(name='johnsonsu')
4918class laplace_gen(rv_continuous):
4919 r"""A Laplace continuous random variable.
4921 %(before_notes)s
4923 Notes
4924 -----
4925 The probability density function for `laplace` is
4927 .. math::
4929 f(x) = \frac{1}{2} \exp(-|x|)
4931 for a real number :math:`x`.
4933 %(after_notes)s
4935 %(example)s
4937 """
4938 def _shape_info(self):
4939 return []
4941 def _rvs(self, size=None, random_state=None):
4942 return random_state.laplace(0, 1, size=size)
4944 def _pdf(self, x):
4945 # laplace.pdf(x) = 1/2 * exp(-abs(x))
4946 return 0.5*np.exp(-abs(x))
4948 def _cdf(self, x):
4949 with np.errstate(over='ignore'):
4950 return np.where(x > 0, 1.0 - 0.5*np.exp(-x), 0.5*np.exp(x))
4952 def _sf(self, x):
4953 # By symmetry...
4954 return self._cdf(-x)
4956 def _ppf(self, q):
4957 return np.where(q > 0.5, -np.log(2*(1-q)), np.log(2*q))
4959 def _isf(self, q):
4960 # By symmetry...
4961 return -self._ppf(q)
4963 def _stats(self):
4964 return 0, 2, 0, 3
4966 def _entropy(self):
4967 return np.log(2)+1
4969 @_call_super_mom
4970 @replace_notes_in_docstring(rv_continuous, notes="""\
4971 This function uses explicit formulas for the maximum likelihood
4972 estimation of the Laplace distribution parameters, so the keyword
4973 arguments `loc`, `scale`, and `optimizer` are ignored.\n\n""")
4974 def fit(self, data, *args, **kwds):
4975 data, floc, fscale = _check_fit_input_parameters(self, data,
4976 args, kwds)
4978 # Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
4979 # and Peacock (2000), Page 124
4981 if floc is None:
4982 floc = np.median(data)
4984 if fscale is None:
4985 fscale = (np.sum(np.abs(data - floc))) / len(data)
4987 return floc, fscale
4990laplace = laplace_gen(name='laplace')
4993class laplace_asymmetric_gen(rv_continuous):
4994 r"""An asymmetric Laplace continuous random variable.
4996 %(before_notes)s
4998 See Also
4999 --------
5000 laplace : Laplace distribution
5002 Notes
5003 -----
5004 The probability density function for `laplace_asymmetric` is
5006 .. math::
5008 f(x, \kappa) &= \frac{1}{\kappa+\kappa^{-1}}\exp(-x\kappa),\quad x\ge0\\
5009 &= \frac{1}{\kappa+\kappa^{-1}}\exp(x/\kappa),\quad x<0\\
5011 for :math:`-\infty < x < \infty`, :math:`\kappa > 0`.
5013 `laplace_asymmetric` takes ``kappa`` as a shape parameter for
5014 :math:`\kappa`. For :math:`\kappa = 1`, it is identical to a
5015 Laplace distribution.
5017 %(after_notes)s
5019 References
5020 ----------
5021 .. [1] "Asymmetric Laplace distribution", Wikipedia
5022 https://en.wikipedia.org/wiki/Asymmetric_Laplace_distribution
5024 .. [2] Kozubowski TJ and Podgórski K. A Multivariate and
5025 Asymmetric Generalization of Laplace Distribution,
5026 Computational Statistics 15, 531--540 (2000).
5027 :doi:`10.1007/PL00022717`
5029 %(example)s
5031 """
5032 def _shape_info(self):
5033 return [_ShapeInfo("kappa", False, (0, np.inf), (False, False))]
5035 def _pdf(self, x, kappa):
5036 return np.exp(self._logpdf(x, kappa))
5038 def _logpdf(self, x, kappa):
5039 kapinv = 1/kappa
5040 lPx = x * np.where(x >= 0, -kappa, kapinv)
5041 lPx -= np.log(kappa+kapinv)
5042 return lPx
5044 def _cdf(self, x, kappa):
5045 kapinv = 1/kappa
5046 kappkapinv = kappa+kapinv
5047 return np.where(x >= 0,
5048 1 - np.exp(-x*kappa)*(kapinv/kappkapinv),
5049 np.exp(x*kapinv)*(kappa/kappkapinv))
5051 def _sf(self, x, kappa):
5052 kapinv = 1/kappa
5053 kappkapinv = kappa+kapinv
5054 return np.where(x >= 0,
5055 np.exp(-x*kappa)*(kapinv/kappkapinv),
5056 1 - np.exp(x*kapinv)*(kappa/kappkapinv))
5058 def _ppf(self, q, kappa):
5059 kapinv = 1/kappa
5060 kappkapinv = kappa+kapinv
5061 return np.where(q >= kappa/kappkapinv,
5062 -np.log((1 - q)*kappkapinv*kappa)*kapinv,
5063 np.log(q*kappkapinv/kappa)*kappa)
5065 def _isf(self, q, kappa):
5066 kapinv = 1/kappa
5067 kappkapinv = kappa+kapinv
5068 return np.where(q <= kapinv/kappkapinv,
5069 -np.log(q*kappkapinv*kappa)*kapinv,
5070 np.log((1 - q)*kappkapinv/kappa)*kappa)
5072 def _stats(self, kappa):
5073 kapinv = 1/kappa
5074 mn = kapinv - kappa
5075 var = kapinv*kapinv + kappa*kappa
5076 g1 = 2.0*(1-np.power(kappa, 6))/np.power(1+np.power(kappa, 4), 1.5)
5077 g2 = 6.0*(1+np.power(kappa, 8))/np.power(1+np.power(kappa, 4), 2)
5078 return mn, var, g1, g2
5080 def _entropy(self, kappa):
5081 return 1 + np.log(kappa+1/kappa)
5084laplace_asymmetric = laplace_asymmetric_gen(name='laplace_asymmetric')
5087def _check_fit_input_parameters(dist, data, args, kwds):
5088 data = np.asarray(data)
5089 floc = kwds.get('floc', None)
5090 fscale = kwds.get('fscale', None)
5092 num_shapes = len(dist.shapes.split(",")) if dist.shapes else 0
5093 fshape_keys = []
5094 fshapes = []
5096 # user has many options for fixing the shape, so here we standardize it
5097 # into 'f' + the number of the shape.
5098 # Adapted from `_reduce_func` in `_distn_infrastructure.py`:
5099 if dist.shapes:
5100 shapes = dist.shapes.replace(',', ' ').split()
5101 for j, s in enumerate(shapes):
5102 key = 'f' + str(j)
5103 names = [key, 'f' + s, 'fix_' + s]
5104 val = _get_fixed_fit_value(kwds, names)
5105 fshape_keys.append(key)
5106 fshapes.append(val)
5107 if val is not None:
5108 kwds[key] = val
5110 # determine if there are any unknown arguments in kwds
5111 known_keys = {'loc', 'scale', 'optimizer', 'method',
5112 'floc', 'fscale', *fshape_keys}
5113 unknown_keys = set(kwds).difference(known_keys)
5114 if unknown_keys:
5115 raise TypeError(f"Unknown keyword arguments: {unknown_keys}.")
5117 if len(args) > num_shapes:
5118 raise TypeError("Too many positional arguments.")
5120 if None not in {floc, fscale, *fshapes}:
5121 # This check is for consistency with `rv_continuous.fit`.
5122 # Without this check, this function would just return the
5123 # parameters that were given.
5124 raise RuntimeError("All parameters fixed. There is nothing to "
5125 "optimize.")
5127 if not np.isfinite(data).all():
5128 raise ValueError("The data contains non-finite values.")
5130 return (data, *fshapes, floc, fscale)
5133class levy_gen(rv_continuous):
5134 r"""A Levy continuous random variable.
5136 %(before_notes)s
5138 See Also
5139 --------
5140 levy_stable, levy_l
5142 Notes
5143 -----
5144 The probability density function for `levy` is:
5146 .. math::
5148 f(x) = \frac{1}{\sqrt{2\pi x^3}} \exp\left(-\frac{1}{2x}\right)
5150 for :math:`x >= 0`.
5152 This is the same as the Levy-stable distribution with :math:`a=1/2` and
5153 :math:`b=1`.
5155 %(after_notes)s
5157 Examples
5158 --------
5159 >>> import numpy as np
5160 >>> from scipy.stats import levy
5161 >>> import matplotlib.pyplot as plt
5162 >>> fig, ax = plt.subplots(1, 1)
5164 Calculate the first four moments:
5166 >>> mean, var, skew, kurt = levy.stats(moments='mvsk')
5168 Display the probability density function (``pdf``):
5170 >>> # `levy` is very heavy-tailed.
5171 >>> # To show a nice plot, let's cut off the upper 40 percent.
5172 >>> a, b = levy.ppf(0), levy.ppf(0.6)
5173 >>> x = np.linspace(a, b, 100)
5174 >>> ax.plot(x, levy.pdf(x),
5175 ... 'r-', lw=5, alpha=0.6, label='levy pdf')
5177 Alternatively, the distribution object can be called (as a function)
5178 to fix the shape, location and scale parameters. This returns a "frozen"
5179 RV object holding the given parameters fixed.
5181 Freeze the distribution and display the frozen ``pdf``:
5183 >>> rv = levy()
5184 >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
5186 Check accuracy of ``cdf`` and ``ppf``:
5188 >>> vals = levy.ppf([0.001, 0.5, 0.999])
5189 >>> np.allclose([0.001, 0.5, 0.999], levy.cdf(vals))
5190 True
5192 Generate random numbers:
5194 >>> r = levy.rvs(size=1000)
5196 And compare the histogram:
5198 >>> # manual binning to ignore the tail
5199 >>> bins = np.concatenate((np.linspace(a, b, 20), [np.max(r)]))
5200 >>> ax.hist(r, bins=bins, density=True, histtype='stepfilled', alpha=0.2)
5201 >>> ax.set_xlim([x[0], x[-1]])
5202 >>> ax.legend(loc='best', frameon=False)
5203 >>> plt.show()
5205 """
5206 _support_mask = rv_continuous._open_support_mask
5208 def _shape_info(self):
5209 return []
5211 def _pdf(self, x):
5212 # levy.pdf(x) = 1 / (x * sqrt(2*pi*x)) * exp(-1/(2*x))
5213 return 1 / np.sqrt(2*np.pi*x) / x * np.exp(-1/(2*x))
5215 def _cdf(self, x):
5216 # Equivalent to 2*norm.sf(np.sqrt(1/x))
5217 return sc.erfc(np.sqrt(0.5 / x))
5219 def _sf(self, x):
5220 return sc.erf(np.sqrt(0.5 / x))
5222 def _ppf(self, q):
5223 # Equivalent to 1.0/(norm.isf(q/2)**2) or 0.5/(erfcinv(q)**2)
5224 val = -sc.ndtri(q/2)
5225 return 1.0 / (val * val)
5227 def _isf(self, p):
5228 return 1/(2*sc.erfinv(p)**2)
5230 def _stats(self):
5231 return np.inf, np.inf, np.nan, np.nan
5234levy = levy_gen(a=0.0, name="levy")
5237class levy_l_gen(rv_continuous):
5238 r"""A left-skewed Levy continuous random variable.
5240 %(before_notes)s
5242 See Also
5243 --------
5244 levy, levy_stable
5246 Notes
5247 -----
5248 The probability density function for `levy_l` is:
5250 .. math::
5251 f(x) = \frac{1}{|x| \sqrt{2\pi |x|}} \exp{ \left(-\frac{1}{2|x|} \right)}
5253 for :math:`x <= 0`.
5255 This is the same as the Levy-stable distribution with :math:`a=1/2` and
5256 :math:`b=-1`.
5258 %(after_notes)s
5260 Examples
5261 --------
5262 >>> import numpy as np
5263 >>> from scipy.stats import levy_l
5264 >>> import matplotlib.pyplot as plt
5265 >>> fig, ax = plt.subplots(1, 1)
5267 Calculate the first four moments:
5269 >>> mean, var, skew, kurt = levy_l.stats(moments='mvsk')
5271 Display the probability density function (``pdf``):
5273 >>> # `levy_l` is very heavy-tailed.
5274 >>> # To show a nice plot, let's cut off the lower 40 percent.
5275 >>> a, b = levy_l.ppf(0.4), levy_l.ppf(1)
5276 >>> x = np.linspace(a, b, 100)
5277 >>> ax.plot(x, levy_l.pdf(x),
5278 ... 'r-', lw=5, alpha=0.6, label='levy_l pdf')
5280 Alternatively, the distribution object can be called (as a function)
5281 to fix the shape, location and scale parameters. This returns a "frozen"
5282 RV object holding the given parameters fixed.
5284 Freeze the distribution and display the frozen ``pdf``:
5286 >>> rv = levy_l()
5287 >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
5289 Check accuracy of ``cdf`` and ``ppf``:
5291 >>> vals = levy_l.ppf([0.001, 0.5, 0.999])
5292 >>> np.allclose([0.001, 0.5, 0.999], levy_l.cdf(vals))
5293 True
5295 Generate random numbers:
5297 >>> r = levy_l.rvs(size=1000)
5299 And compare the histogram:
5301 >>> # manual binning to ignore the tail
5302 >>> bins = np.concatenate(([np.min(r)], np.linspace(a, b, 20)))
5303 >>> ax.hist(r, bins=bins, density=True, histtype='stepfilled', alpha=0.2)
5304 >>> ax.set_xlim([x[0], x[-1]])
5305 >>> ax.legend(loc='best', frameon=False)
5306 >>> plt.show()
5308 """
5309 _support_mask = rv_continuous._open_support_mask
5311 def _shape_info(self):
5312 return []
5314 def _pdf(self, x):
5315 # levy_l.pdf(x) = 1 / (abs(x) * sqrt(2*pi*abs(x))) * exp(-1/(2*abs(x)))
5316 ax = abs(x)
5317 return 1/np.sqrt(2*np.pi*ax)/ax*np.exp(-1/(2*ax))
5319 def _cdf(self, x):
5320 ax = abs(x)
5321 return 2 * _norm_cdf(1 / np.sqrt(ax)) - 1
5323 def _sf(self, x):
5324 ax = abs(x)
5325 return 2 * _norm_sf(1 / np.sqrt(ax))
5327 def _ppf(self, q):
5328 val = _norm_ppf((q + 1.0) / 2)
5329 return -1.0 / (val * val)
5331 def _isf(self, p):
5332 return -1/_norm_isf(p/2)**2
5334 def _stats(self):
5335 return np.inf, np.inf, np.nan, np.nan
5338levy_l = levy_l_gen(b=0.0, name="levy_l")
5341class logistic_gen(rv_continuous):
5342 r"""A logistic (or Sech-squared) continuous random variable.
5344 %(before_notes)s
5346 Notes
5347 -----
5348 The probability density function for `logistic` is:
5350 .. math::
5352 f(x) = \frac{\exp(-x)}
5353 {(1+\exp(-x))^2}
5355 `logistic` is a special case of `genlogistic` with ``c=1``.
5357 Remark that the survival function (``logistic.sf``) is equal to the
5358 Fermi-Dirac distribution describing fermionic statistics.
5360 %(after_notes)s
5362 %(example)s
5364 """
5365 def _shape_info(self):
5366 return []
5368 def _rvs(self, size=None, random_state=None):
5369 return random_state.logistic(size=size)
5371 def _pdf(self, x):
5372 # logistic.pdf(x) = exp(-x) / (1+exp(-x))**2
5373 return np.exp(self._logpdf(x))
5375 def _logpdf(self, x):
5376 y = -np.abs(x)
5377 return y - 2. * sc.log1p(np.exp(y))
5379 def _cdf(self, x):
5380 return sc.expit(x)
5382 def _logcdf(self, x):
5383 return sc.log_expit(x)
5385 def _ppf(self, q):
5386 return sc.logit(q)
5388 def _sf(self, x):
5389 return sc.expit(-x)
5391 def _logsf(self, x):
5392 return sc.log_expit(-x)
5394 def _isf(self, q):
5395 return -sc.logit(q)
5397 def _stats(self):
5398 return 0, np.pi*np.pi/3.0, 0, 6.0/5.0
5400 def _entropy(self):
5401 # https://en.wikipedia.org/wiki/Logistic_distribution
5402 return 2.0
5404 @_call_super_mom
5405 @inherit_docstring_from(rv_continuous)
5406 def fit(self, data, *args, **kwds):
5407 if kwds.pop('superfit', False):
5408 return super().fit(data, *args, **kwds)
5410 data, floc, fscale = _check_fit_input_parameters(self, data,
5411 args, kwds)
5412 n = len(data)
5414 # rv_continuous provided guesses
5415 loc, scale = self._fitstart(data)
5416 # these are trumped by user-provided guesses
5417 loc, scale = kwds.get('loc', loc), kwds.get('scale', scale)
5419 # the maximum likelihood estimators `a` and `b` of the location and
5420 # scale parameters are roots of the two equations described in `func`.
5421 # Source: Statistical Distributions, 3rd Edition. Evans, Hastings, and
5422 # Peacock (2000), Page 130
5423 def dl_dloc(loc, scale=fscale):
5424 c = (data - loc) / scale
5425 return np.sum(sc.expit(c)) - n/2
5427 def dl_dscale(scale, loc=floc):
5428 c = (data - loc) / scale
5429 return np.sum(c*np.tanh(c/2)) - n
5431 def func(params):
5432 loc, scale = params
5433 return dl_dloc(loc, scale), dl_dscale(scale, loc)
5435 if fscale is not None and floc is None:
5436 res = optimize.root(dl_dloc, (loc,))
5437 loc = res.x[0]
5438 scale = fscale
5439 elif floc is not None and fscale is None:
5440 res = optimize.root(dl_dscale, (scale,))
5441 scale = res.x[0]
5442 loc = floc
5443 else:
5444 res = optimize.root(func, (loc, scale))
5445 loc, scale = res.x
5447 return ((loc, scale) if res.success
5448 else super().fit(data, *args, **kwds))
5451logistic = logistic_gen(name='logistic')
5454class loggamma_gen(rv_continuous):
5455 r"""A log gamma continuous random variable.
5457 %(before_notes)s
5459 Notes
5460 -----
5461 The probability density function for `loggamma` is:
5463 .. math::
5465 f(x, c) = \frac{\exp(c x - \exp(x))}
5466 {\Gamma(c)}
5468 for all :math:`x, c > 0`. Here, :math:`\Gamma` is the
5469 gamma function (`scipy.special.gamma`).
5471 `loggamma` takes ``c`` as a shape parameter for :math:`c`.
5473 %(after_notes)s
5475 %(example)s
5477 """
5479 def _shape_info(self):
5480 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
5482 def _rvs(self, c, size=None, random_state=None):
5483 # Use the property of the gamma distribution Gamma(c)
5484 # Gamma(c) ~ Gamma(c + 1)*U**(1/c),
5485 # where U is uniform on [0, 1]. (See, e.g.,
5486 # G. Marsaglia and W.W. Tsang, "A simple method for generating gamma
5487 # variables", https://doi.org/10.1145/358407.358414)
5488 # So
5489 # log(Gamma(c)) ~ log(Gamma(c + 1)) + log(U)/c
5490 # Generating a sample with this formulation is a bit slower
5491 # than the more obvious log(Gamma(c)), but it avoids loss
5492 # of precision when c << 1.
5493 return (np.log(random_state.gamma(c + 1, size=size))
5494 + np.log(random_state.uniform(size=size))/c)
5496 def _pdf(self, x, c):
5497 # loggamma.pdf(x, c) = exp(c*x-exp(x)) / gamma(c)
5498 return np.exp(c*x-np.exp(x)-sc.gammaln(c))
5500 def _logpdf(self, x, c):
5501 return c*x - np.exp(x) - sc.gammaln(c)
5503 def _cdf(self, x, c):
5504 return sc.gammainc(c, np.exp(x))
5506 def _ppf(self, q, c):
5507 return np.log(sc.gammaincinv(c, q))
5509 def _sf(self, x, c):
5510 return sc.gammaincc(c, np.exp(x))
5512 def _isf(self, q, c):
5513 return np.log(sc.gammainccinv(c, q))
5515 def _stats(self, c):
5516 # See, for example, "A Statistical Study of Log-Gamma Distribution", by
5517 # Ping Shing Chan (thesis, McMaster University, 1993).
5518 mean = sc.digamma(c)
5519 var = sc.polygamma(1, c)
5520 skewness = sc.polygamma(2, c) / np.power(var, 1.5)
5521 excess_kurtosis = sc.polygamma(3, c) / (var*var)
5522 return mean, var, skewness, excess_kurtosis
5525loggamma = loggamma_gen(name='loggamma')
5528class loglaplace_gen(rv_continuous):
5529 r"""A log-Laplace continuous random variable.
5531 %(before_notes)s
5533 Notes
5534 -----
5535 The probability density function for `loglaplace` is:
5537 .. math::
5539 f(x, c) = \begin{cases}\frac{c}{2} x^{ c-1} &\text{for } 0 < x < 1\\
5540 \frac{c}{2} x^{-c-1} &\text{for } x \ge 1
5541 \end{cases}
5543 for :math:`c > 0`.
5545 `loglaplace` takes ``c`` as a shape parameter for :math:`c`.
5547 %(after_notes)s
5549 References
5550 ----------
5551 T.J. Kozubowski and K. Podgorski, "A log-Laplace growth rate model",
5552 The Mathematical Scientist, vol. 28, pp. 49-60, 2003.
5554 %(example)s
5556 """
5557 def _shape_info(self):
5558 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
5560 def _pdf(self, x, c):
5561 # loglaplace.pdf(x, c) = c / 2 * x**(c-1), for 0 < x < 1
5562 # = c / 2 * x**(-c-1), for x >= 1
5563 cd2 = c/2.0
5564 c = np.where(x < 1, c, -c)
5565 return cd2*x**(c-1)
5567 def _cdf(self, x, c):
5568 return np.where(x < 1, 0.5*x**c, 1-0.5*x**(-c))
5570 def _ppf(self, q, c):
5571 return np.where(q < 0.5, (2.0*q)**(1.0/c), (2*(1.0-q))**(-1.0/c))
5573 def _munp(self, n, c):
5574 return c**2 / (c**2 - n**2)
5576 def _entropy(self, c):
5577 return np.log(2.0/c) + 1.0
5580loglaplace = loglaplace_gen(a=0.0, name='loglaplace')
5583def _lognorm_logpdf(x, s):
5584 return _lazywhere(x != 0, (x, s),
5585 lambda x, s: -np.log(x)**2 / (2*s**2) - np.log(s*x*np.sqrt(2*np.pi)),
5586 -np.inf)
5589class lognorm_gen(rv_continuous):
5590 r"""A lognormal continuous random variable.
5592 %(before_notes)s
5594 Notes
5595 -----
5596 The probability density function for `lognorm` is:
5598 .. math::
5600 f(x, s) = \frac{1}{s x \sqrt{2\pi}}
5601 \exp\left(-\frac{\log^2(x)}{2s^2}\right)
5603 for :math:`x > 0`, :math:`s > 0`.
5605 `lognorm` takes ``s`` as a shape parameter for :math:`s`.
5607 %(after_notes)s
5609 Suppose a normally distributed random variable ``X`` has mean ``mu`` and
5610 standard deviation ``sigma``. Then ``Y = exp(X)`` is lognormally
5611 distributed with ``s = sigma`` and ``scale = exp(mu)``.
5613 %(example)s
5615 """
5616 _support_mask = rv_continuous._open_support_mask
5618 def _shape_info(self):
5619 return [_ShapeInfo("s", False, (0, np.inf), (False, False))]
5621 def _rvs(self, s, size=None, random_state=None):
5622 return np.exp(s * random_state.standard_normal(size))
5624 def _pdf(self, x, s):
5625 # lognorm.pdf(x, s) = 1 / (s*x*sqrt(2*pi)) * exp(-1/2*(log(x)/s)**2)
5626 return np.exp(self._logpdf(x, s))
5628 def _logpdf(self, x, s):
5629 return _lognorm_logpdf(x, s)
5631 def _cdf(self, x, s):
5632 return _norm_cdf(np.log(x) / s)
5634 def _logcdf(self, x, s):
5635 return _norm_logcdf(np.log(x) / s)
5637 def _ppf(self, q, s):
5638 return np.exp(s * _norm_ppf(q))
5640 def _sf(self, x, s):
5641 return _norm_sf(np.log(x) / s)
5643 def _logsf(self, x, s):
5644 return _norm_logsf(np.log(x) / s)
5646 def _stats(self, s):
5647 p = np.exp(s*s)
5648 mu = np.sqrt(p)
5649 mu2 = p*(p-1)
5650 g1 = np.sqrt((p-1))*(2+p)
5651 g2 = np.polyval([1, 2, 3, 0, -6.0], p)
5652 return mu, mu2, g1, g2
5654 def _entropy(self, s):
5655 return 0.5 * (1 + np.log(2*np.pi) + 2 * np.log(s))
5657 @_call_super_mom
5658 @extend_notes_in_docstring(rv_continuous, notes="""\
5659 When `method='MLE'` and
5660 the location parameter is fixed by using the `floc` argument,
5661 this function uses explicit formulas for the maximum likelihood
5662 estimation of the log-normal shape and scale parameters, so the
5663 `optimizer`, `loc` and `scale` keyword arguments are ignored.
5664 \n\n""")
5665 def fit(self, data, *args, **kwds):
5666 floc = kwds.get('floc', None)
5667 if floc is None:
5668 # fall back on the default fit method.
5669 return super().fit(data, *args, **kwds)
5671 f0 = (kwds.get('f0', None) or kwds.get('fs', None) or
5672 kwds.get('fix_s', None))
5673 fscale = kwds.get('fscale', None)
5675 if len(args) > 1:
5676 raise TypeError("Too many input arguments.")
5677 for name in ['f0', 'fs', 'fix_s', 'floc', 'fscale', 'loc', 'scale',
5678 'optimizer', 'method']:
5679 kwds.pop(name, None)
5680 if kwds:
5681 raise TypeError("Unknown arguments: %s." % kwds)
5683 # Special case: loc is fixed. Use the maximum likelihood formulas
5684 # instead of the numerical solver.
5686 if f0 is not None and fscale is not None:
5687 # This check is for consistency with `rv_continuous.fit`.
5688 raise ValueError("All parameters fixed. There is nothing to "
5689 "optimize.")
5691 data = np.asarray(data)
5693 if not np.isfinite(data).all():
5694 raise ValueError("The data contains non-finite values.")
5696 floc = float(floc)
5697 if floc != 0:
5698 # Shifting the data by floc. Don't do the subtraction in-place,
5699 # because `data` might be a view of the input array.
5700 data = data - floc
5701 if np.any(data <= 0):
5702 raise FitDataError("lognorm", lower=floc, upper=np.inf)
5703 lndata = np.log(data)
5705 # Three cases to handle:
5706 # * shape and scale both free
5707 # * shape fixed, scale free
5708 # * shape free, scale fixed
5710 if fscale is None:
5711 # scale is free.
5712 scale = np.exp(lndata.mean())
5713 if f0 is None:
5714 # shape is free.
5715 shape = lndata.std()
5716 else:
5717 # shape is fixed.
5718 shape = float(f0)
5719 else:
5720 # scale is fixed, shape is free
5721 scale = float(fscale)
5722 shape = np.sqrt(((lndata - np.log(scale))**2).mean())
5724 return shape, floc, scale
5727lognorm = lognorm_gen(a=0.0, name='lognorm')
5730class gibrat_gen(rv_continuous):
5731 r"""A Gibrat continuous random variable.
5733 %(before_notes)s
5735 Notes
5736 -----
5737 The probability density function for `gibrat` is:
5739 .. math::
5741 f(x) = \frac{1}{x \sqrt{2\pi}} \exp(-\frac{1}{2} (\log(x))^2)
5743 `gibrat` is a special case of `lognorm` with ``s=1``.
5745 %(after_notes)s
5747 %(example)s
5749 """
5750 _support_mask = rv_continuous._open_support_mask
5752 def _shape_info(self):
5753 return []
5755 def _rvs(self, size=None, random_state=None):
5756 return np.exp(random_state.standard_normal(size))
5758 def _pdf(self, x):
5759 # gibrat.pdf(x) = 1/(x*sqrt(2*pi)) * exp(-1/2*(log(x))**2)
5760 return np.exp(self._logpdf(x))
5762 def _logpdf(self, x):
5763 return _lognorm_logpdf(x, 1.0)
5765 def _cdf(self, x):
5766 return _norm_cdf(np.log(x))
5768 def _ppf(self, q):
5769 return np.exp(_norm_ppf(q))
5771 def _stats(self):
5772 p = np.e
5773 mu = np.sqrt(p)
5774 mu2 = p * (p - 1)
5775 g1 = np.sqrt((p - 1)) * (2 + p)
5776 g2 = np.polyval([1, 2, 3, 0, -6.0], p)
5777 return mu, mu2, g1, g2
5779 def _entropy(self):
5780 return 0.5 * np.log(2 * np.pi) + 0.5
5783# deprecation of gilbrat, see #15911
5784deprmsg = ("`gilbrat` is a misspelling of the correct name for the `gibrat` "
5785 "distribution, and will be removed in SciPy 1.11.")
5788class gilbrat_gen(gibrat_gen):
5789 # override __call__ protocol from rv_generic to also
5790 # deprecate instantiation of frozen distributions
5791 r"""
5793 .. deprecated:: 1.9.0
5794 `gilbrat` is deprecated, use `gibrat` instead!
5795 `gilbrat` is a misspelling of the correct name for the `gibrat`
5796 distribution, and will be removed in SciPy 1.11.
5798 """
5799 def __call__(self, *args, **kwds):
5800 # align with warning text from np.deprecated that's used for methods
5801 msg = "`gilbrat` is deprecated, use `gibrat` instead!\n" + deprmsg
5802 warnings.warn(msg, DeprecationWarning, stacklevel=2)
5803 return self.freeze(*args, **kwds)
5806gibrat = gibrat_gen(a=0.0, name='gibrat')
5807gilbrat = gilbrat_gen(a=0.0, name='gilbrat')
5810# since the deprecated class gets intantiated upon import (and we only want to
5811# warn upon use), add the deprecation to each (documented) class method, c.f.
5812# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gilbrat.html
5813_gibrat_method_names = [
5814 "cdf", "entropy", "expect", "fit", "interval", "isf", "logcdf", "logpdf",
5815 "logsf", "mean", "median", "moment", "pdf", "ppf", "rvs", "sf", "stats",
5816 "std", "var"
5817]
5818for m in _gibrat_method_names:
5819 wrapper = np.deprecate(getattr(gilbrat, m), f"gilbrat.{m}", f"gibrat.{m}",
5820 deprmsg)
5821 setattr(gilbrat, m, wrapper)
5824class maxwell_gen(rv_continuous):
5825 r"""A Maxwell continuous random variable.
5827 %(before_notes)s
5829 Notes
5830 -----
5831 A special case of a `chi` distribution, with ``df=3``, ``loc=0.0``,
5832 and given ``scale = a``, where ``a`` is the parameter used in the
5833 Mathworld description [1]_.
5835 The probability density function for `maxwell` is:
5837 .. math::
5839 f(x) = \sqrt{2/\pi}x^2 \exp(-x^2/2)
5841 for :math:`x >= 0`.
5843 %(after_notes)s
5845 References
5846 ----------
5847 .. [1] http://mathworld.wolfram.com/MaxwellDistribution.html
5849 %(example)s
5850 """
5851 def _shape_info(self):
5852 return []
5854 def _rvs(self, size=None, random_state=None):
5855 return chi.rvs(3.0, size=size, random_state=random_state)
5857 def _pdf(self, x):
5858 # maxwell.pdf(x) = sqrt(2/pi)x**2 * exp(-x**2/2)
5859 return _SQRT_2_OVER_PI*x*x*np.exp(-x*x/2.0)
5861 def _logpdf(self, x):
5862 # Allow x=0 without 'divide by zero' warnings
5863 with np.errstate(divide='ignore'):
5864 return _LOG_SQRT_2_OVER_PI + 2*np.log(x) - 0.5*x*x
5866 def _cdf(self, x):
5867 return sc.gammainc(1.5, x*x/2.0)
5869 def _ppf(self, q):
5870 return np.sqrt(2*sc.gammaincinv(1.5, q))
5872 def _stats(self):
5873 val = 3*np.pi-8
5874 return (2*np.sqrt(2.0/np.pi),
5875 3-8/np.pi,
5876 np.sqrt(2)*(32-10*np.pi)/val**1.5,
5877 (-12*np.pi*np.pi + 160*np.pi - 384) / val**2.0)
5879 def _entropy(self):
5880 return _EULER + 0.5*np.log(2*np.pi)-0.5
5883maxwell = maxwell_gen(a=0.0, name='maxwell')
5886class mielke_gen(rv_continuous):
5887 r"""A Mielke Beta-Kappa / Dagum continuous random variable.
5889 %(before_notes)s
5891 Notes
5892 -----
5893 The probability density function for `mielke` is:
5895 .. math::
5897 f(x, k, s) = \frac{k x^{k-1}}{(1+x^s)^{1+k/s}}
5899 for :math:`x > 0` and :math:`k, s > 0`. The distribution is sometimes
5900 called Dagum distribution ([2]_). It was already defined in [3]_, called
5901 a Burr Type III distribution (`burr` with parameters ``c=s`` and
5902 ``d=k/s``).
5904 `mielke` takes ``k`` and ``s`` as shape parameters.
5906 %(after_notes)s
5908 References
5909 ----------
5910 .. [1] Mielke, P.W., 1973 "Another Family of Distributions for Describing
5911 and Analyzing Precipitation Data." J. Appl. Meteor., 12, 275-280
5912 .. [2] Dagum, C., 1977 "A new model for personal income distribution."
5913 Economie Appliquee, 33, 327-367.
5914 .. [3] Burr, I. W. "Cumulative frequency functions", Annals of
5915 Mathematical Statistics, 13(2), pp 215-232 (1942).
5917 %(example)s
5919 """
5920 def _shape_info(self):
5921 ik = _ShapeInfo("k", False, (0, np.inf), (False, False))
5922 i_s = _ShapeInfo("s", False, (0, np.inf), (False, False))
5923 return [ik, i_s]
5925 def _pdf(self, x, k, s):
5926 return k*x**(k-1.0) / (1.0+x**s)**(1.0+k*1.0/s)
5928 def _logpdf(self, x, k, s):
5929 # Allow x=0 without 'divide by zero' warnings.
5930 with np.errstate(divide='ignore'):
5931 return np.log(k) + np.log(x)*(k - 1) - np.log1p(x**s)*(1 + k/s)
5933 def _cdf(self, x, k, s):
5934 return x**k / (1.0+x**s)**(k*1.0/s)
5936 def _ppf(self, q, k, s):
5937 qsk = pow(q, s*1.0/k)
5938 return pow(qsk/(1.0-qsk), 1.0/s)
5940 def _munp(self, n, k, s):
5941 def nth_moment(n, k, s):
5942 # n-th moment is defined for -k < n < s
5943 return sc.gamma((k+n)/s)*sc.gamma(1-n/s)/sc.gamma(k/s)
5945 return _lazywhere(n < s, (n, k, s), nth_moment, np.inf)
5948mielke = mielke_gen(a=0.0, name='mielke')
5951class kappa4_gen(rv_continuous):
5952 r"""Kappa 4 parameter distribution.
5954 %(before_notes)s
5956 Notes
5957 -----
5958 The probability density function for kappa4 is:
5960 .. math::
5962 f(x, h, k) = (1 - k x)^{1/k - 1} (1 - h (1 - k x)^{1/k})^{1/h-1}
5964 if :math:`h` and :math:`k` are not equal to 0.
5966 If :math:`h` or :math:`k` are zero then the pdf can be simplified:
5968 h = 0 and k != 0::
5970 kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
5971 exp(-(1.0 - k*x)**(1.0/k))
5973 h != 0 and k = 0::
5975 kappa4.pdf(x, h, k) = exp(-x)*(1.0 - h*exp(-x))**(1.0/h - 1.0)
5977 h = 0 and k = 0::
5979 kappa4.pdf(x, h, k) = exp(-x)*exp(-exp(-x))
5981 kappa4 takes :math:`h` and :math:`k` as shape parameters.
5983 The kappa4 distribution returns other distributions when certain
5984 :math:`h` and :math:`k` values are used.
5986 +------+-------------+----------------+------------------+
5987 | h | k=0.0 | k=1.0 | -inf<=k<=inf |
5988 +======+=============+================+==================+
5989 | -1.0 | Logistic | | Generalized |
5990 | | | | Logistic(1) |
5991 | | | | |
5992 | | logistic(x) | | |
5993 +------+-------------+----------------+------------------+
5994 | 0.0 | Gumbel | Reverse | Generalized |
5995 | | | Exponential(2) | Extreme Value |
5996 | | | | |
5997 | | gumbel_r(x) | | genextreme(x, k) |
5998 +------+-------------+----------------+------------------+
5999 | 1.0 | Exponential | Uniform | Generalized |
6000 | | | | Pareto |
6001 | | | | |
6002 | | expon(x) | uniform(x) | genpareto(x, -k) |
6003 +------+-------------+----------------+------------------+
6005 (1) There are at least five generalized logistic distributions.
6006 Four are described here:
6007 https://en.wikipedia.org/wiki/Generalized_logistic_distribution
6008 The "fifth" one is the one kappa4 should match which currently
6009 isn't implemented in scipy:
6010 https://en.wikipedia.org/wiki/Talk:Generalized_logistic_distribution
6011 https://www.mathwave.com/help/easyfit/html/analyses/distributions/gen_logistic.html
6012 (2) This distribution is currently not in scipy.
6014 References
6015 ----------
6016 J.C. Finney, "Optimization of a Skewed Logistic Distribution With Respect
6017 to the Kolmogorov-Smirnov Test", A Dissertation Submitted to the Graduate
6018 Faculty of the Louisiana State University and Agricultural and Mechanical
6019 College, (August, 2004),
6020 https://digitalcommons.lsu.edu/gradschool_dissertations/3672
6022 J.R.M. Hosking, "The four-parameter kappa distribution". IBM J. Res.
6023 Develop. 38 (3), 25 1-258 (1994).
6025 B. Kumphon, A. Kaew-Man, P. Seenoi, "A Rainfall Distribution for the Lampao
6026 Site in the Chi River Basin, Thailand", Journal of Water Resource and
6027 Protection, vol. 4, 866-869, (2012).
6028 :doi:`10.4236/jwarp.2012.410101`
6030 C. Winchester, "On Estimation of the Four-Parameter Kappa Distribution", A
6031 Thesis Submitted to Dalhousie University, Halifax, Nova Scotia, (March
6032 2000).
6033 http://www.nlc-bnc.ca/obj/s4/f2/dsk2/ftp01/MQ57336.pdf
6035 %(after_notes)s
6037 %(example)s
6039 """
6040 def _argcheck(self, h, k):
6041 shape = np.broadcast_arrays(h, k)[0].shape
6042 return np.full(shape, fill_value=True)
6044 def _shape_info(self):
6045 ih = _ShapeInfo("h", False, (-np.inf, np.inf), (False, False))
6046 ik = _ShapeInfo("k", False, (-np.inf, np.inf), (False, False))
6047 return [ih, ik]
6049 def _get_support(self, h, k):
6050 condlist = [np.logical_and(h > 0, k > 0),
6051 np.logical_and(h > 0, k == 0),
6052 np.logical_and(h > 0, k < 0),
6053 np.logical_and(h <= 0, k > 0),
6054 np.logical_and(h <= 0, k == 0),
6055 np.logical_and(h <= 0, k < 0)]
6057 def f0(h, k):
6058 return (1.0 - np.float_power(h, -k))/k
6060 def f1(h, k):
6061 return np.log(h)
6063 def f3(h, k):
6064 a = np.empty(np.shape(h))
6065 a[:] = -np.inf
6066 return a
6068 def f5(h, k):
6069 return 1.0/k
6071 _a = _lazyselect(condlist,
6072 [f0, f1, f0, f3, f3, f5],
6073 [h, k],
6074 default=np.nan)
6076 def f0(h, k):
6077 return 1.0/k
6079 def f1(h, k):
6080 a = np.empty(np.shape(h))
6081 a[:] = np.inf
6082 return a
6084 _b = _lazyselect(condlist,
6085 [f0, f1, f1, f0, f1, f1],
6086 [h, k],
6087 default=np.nan)
6088 return _a, _b
6090 def _pdf(self, x, h, k):
6091 # kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
6092 # (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1)
6093 return np.exp(self._logpdf(x, h, k))
6095 def _logpdf(self, x, h, k):
6096 condlist = [np.logical_and(h != 0, k != 0),
6097 np.logical_and(h == 0, k != 0),
6098 np.logical_and(h != 0, k == 0),
6099 np.logical_and(h == 0, k == 0)]
6101 def f0(x, h, k):
6102 '''pdf = (1.0 - k*x)**(1.0/k - 1.0)*(
6103 1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1.0)
6104 logpdf = ...
6105 '''
6106 return (sc.xlog1py(1.0/k - 1.0, -k*x) +
6107 sc.xlog1py(1.0/h - 1.0, -h*(1.0 - k*x)**(1.0/k)))
6109 def f1(x, h, k):
6110 '''pdf = (1.0 - k*x)**(1.0/k - 1.0)*np.exp(-(
6111 1.0 - k*x)**(1.0/k))
6112 logpdf = ...
6113 '''
6114 return sc.xlog1py(1.0/k - 1.0, -k*x) - (1.0 - k*x)**(1.0/k)
6116 def f2(x, h, k):
6117 '''pdf = np.exp(-x)*(1.0 - h*np.exp(-x))**(1.0/h - 1.0)
6118 logpdf = ...
6119 '''
6120 return -x + sc.xlog1py(1.0/h - 1.0, -h*np.exp(-x))
6122 def f3(x, h, k):
6123 '''pdf = np.exp(-x-np.exp(-x))
6124 logpdf = ...
6125 '''
6126 return -x - np.exp(-x)
6128 return _lazyselect(condlist,
6129 [f0, f1, f2, f3],
6130 [x, h, k],
6131 default=np.nan)
6133 def _cdf(self, x, h, k):
6134 return np.exp(self._logcdf(x, h, k))
6136 def _logcdf(self, x, h, k):
6137 condlist = [np.logical_and(h != 0, k != 0),
6138 np.logical_and(h == 0, k != 0),
6139 np.logical_and(h != 0, k == 0),
6140 np.logical_and(h == 0, k == 0)]
6142 def f0(x, h, k):
6143 '''cdf = (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h)
6144 logcdf = ...
6145 '''
6146 return (1.0/h)*sc.log1p(-h*(1.0 - k*x)**(1.0/k))
6148 def f1(x, h, k):
6149 '''cdf = np.exp(-(1.0 - k*x)**(1.0/k))
6150 logcdf = ...
6151 '''
6152 return -(1.0 - k*x)**(1.0/k)
6154 def f2(x, h, k):
6155 '''cdf = (1.0 - h*np.exp(-x))**(1.0/h)
6156 logcdf = ...
6157 '''
6158 return (1.0/h)*sc.log1p(-h*np.exp(-x))
6160 def f3(x, h, k):
6161 '''cdf = np.exp(-np.exp(-x))
6162 logcdf = ...
6163 '''
6164 return -np.exp(-x)
6166 return _lazyselect(condlist,
6167 [f0, f1, f2, f3],
6168 [x, h, k],
6169 default=np.nan)
6171 def _ppf(self, q, h, k):
6172 condlist = [np.logical_and(h != 0, k != 0),
6173 np.logical_and(h == 0, k != 0),
6174 np.logical_and(h != 0, k == 0),
6175 np.logical_and(h == 0, k == 0)]
6177 def f0(q, h, k):
6178 return 1.0/k*(1.0 - ((1.0 - (q**h))/h)**k)
6180 def f1(q, h, k):
6181 return 1.0/k*(1.0 - (-np.log(q))**k)
6183 def f2(q, h, k):
6184 '''ppf = -np.log((1.0 - (q**h))/h)
6185 '''
6186 return -sc.log1p(-(q**h)) + np.log(h)
6188 def f3(q, h, k):
6189 return -np.log(-np.log(q))
6191 return _lazyselect(condlist,
6192 [f0, f1, f2, f3],
6193 [q, h, k],
6194 default=np.nan)
6196 def _get_stats_info(self, h, k):
6197 condlist = [
6198 np.logical_and(h < 0, k >= 0),
6199 k < 0,
6200 ]
6202 def f0(h, k):
6203 return (-1.0/h*k).astype(int)
6205 def f1(h, k):
6206 return (-1.0/k).astype(int)
6208 return _lazyselect(condlist, [f0, f1], [h, k], default=5)
6210 def _stats(self, h, k):
6211 maxr = self._get_stats_info(h, k)
6212 outputs = [None if np.any(r < maxr) else np.nan for r in range(1, 5)]
6213 return outputs[:]
6215 def _mom1_sc(self, m, *args):
6216 maxr = self._get_stats_info(args[0], args[1])
6217 if m >= maxr:
6218 return np.nan
6219 return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
6222kappa4 = kappa4_gen(name='kappa4')
6225class kappa3_gen(rv_continuous):
6226 r"""Kappa 3 parameter distribution.
6228 %(before_notes)s
6230 Notes
6231 -----
6232 The probability density function for `kappa3` is:
6234 .. math::
6236 f(x, a) = a (a + x^a)^{-(a + 1)/a}
6238 for :math:`x > 0` and :math:`a > 0`.
6240 `kappa3` takes ``a`` as a shape parameter for :math:`a`.
6242 References
6243 ----------
6244 P.W. Mielke and E.S. Johnson, "Three-Parameter Kappa Distribution Maximum
6245 Likelihood and Likelihood Ratio Tests", Methods in Weather Research,
6246 701-707, (September, 1973),
6247 :doi:`10.1175/1520-0493(1973)101<0701:TKDMLE>2.3.CO;2`
6249 B. Kumphon, "Maximum Entropy and Maximum Likelihood Estimation for the
6250 Three-Parameter Kappa Distribution", Open Journal of Statistics, vol 2,
6251 415-419 (2012), :doi:`10.4236/ojs.2012.24050`
6253 %(after_notes)s
6255 %(example)s
6257 """
6258 def _shape_info(self):
6259 return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
6261 def _pdf(self, x, a):
6262 # kappa3.pdf(x, a) = a*(a + x**a)**(-(a + 1)/a), for x > 0
6263 return a*(a + x**a)**(-1.0/a-1)
6265 def _cdf(self, x, a):
6266 return x*(a + x**a)**(-1.0/a)
6268 def _ppf(self, q, a):
6269 return (a/(q**-a - 1.0))**(1.0/a)
6271 def _stats(self, a):
6272 outputs = [None if np.any(i < a) else np.nan for i in range(1, 5)]
6273 return outputs[:]
6275 def _mom1_sc(self, m, *args):
6276 if np.any(m >= args[0]):
6277 return np.nan
6278 return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
6281kappa3 = kappa3_gen(a=0.0, name='kappa3')
6284class moyal_gen(rv_continuous):
6285 r"""A Moyal continuous random variable.
6287 %(before_notes)s
6289 Notes
6290 -----
6291 The probability density function for `moyal` is:
6293 .. math::
6295 f(x) = \exp(-(x + \exp(-x))/2) / \sqrt{2\pi}
6297 for a real number :math:`x`.
6299 %(after_notes)s
6301 This distribution has utility in high-energy physics and radiation
6302 detection. It describes the energy loss of a charged relativistic
6303 particle due to ionization of the medium [1]_. It also provides an
6304 approximation for the Landau distribution. For an in depth description
6305 see [2]_. For additional description, see [3]_.
6307 References
6308 ----------
6309 .. [1] J.E. Moyal, "XXX. Theory of ionization fluctuations",
6310 The London, Edinburgh, and Dublin Philosophical Magazine
6311 and Journal of Science, vol 46, 263-280, (1955).
6312 :doi:`10.1080/14786440308521076` (gated)
6313 .. [2] G. Cordeiro et al., "The beta Moyal: a useful skew distribution",
6314 International Journal of Research and Reviews in Applied Sciences,
6315 vol 10, 171-192, (2012).
6316 http://www.arpapress.com/Volumes/Vol10Issue2/IJRRAS_10_2_02.pdf
6317 .. [3] C. Walck, "Handbook on Statistical Distributions for
6318 Experimentalists; International Report SUF-PFY/96-01", Chapter 26,
6319 University of Stockholm: Stockholm, Sweden, (2007).
6320 http://www.stat.rice.edu/~dobelman/textfiles/DistributionsHandbook.pdf
6322 .. versionadded:: 1.1.0
6324 %(example)s
6326 """
6327 def _shape_info(self):
6328 return []
6330 def _rvs(self, size=None, random_state=None):
6331 u1 = gamma.rvs(a=0.5, scale=2, size=size,
6332 random_state=random_state)
6333 return -np.log(u1)
6335 def _pdf(self, x):
6336 return np.exp(-0.5 * (x + np.exp(-x))) / np.sqrt(2*np.pi)
6338 def _cdf(self, x):
6339 return sc.erfc(np.exp(-0.5 * x) / np.sqrt(2))
6341 def _sf(self, x):
6342 return sc.erf(np.exp(-0.5 * x) / np.sqrt(2))
6344 def _ppf(self, x):
6345 return -np.log(2 * sc.erfcinv(x)**2)
6347 def _stats(self):
6348 mu = np.log(2) + np.euler_gamma
6349 mu2 = np.pi**2 / 2
6350 g1 = 28 * np.sqrt(2) * sc.zeta(3) / np.pi**3
6351 g2 = 4.
6352 return mu, mu2, g1, g2
6354 def _munp(self, n):
6355 if n == 1.0:
6356 return np.log(2) + np.euler_gamma
6357 elif n == 2.0:
6358 return np.pi**2 / 2 + (np.log(2) + np.euler_gamma)**2
6359 elif n == 3.0:
6360 tmp1 = 1.5 * np.pi**2 * (np.log(2)+np.euler_gamma)
6361 tmp2 = (np.log(2)+np.euler_gamma)**3
6362 tmp3 = 14 * sc.zeta(3)
6363 return tmp1 + tmp2 + tmp3
6364 elif n == 4.0:
6365 tmp1 = 4 * 14 * sc.zeta(3) * (np.log(2) + np.euler_gamma)
6366 tmp2 = 3 * np.pi**2 * (np.log(2) + np.euler_gamma)**2
6367 tmp3 = (np.log(2) + np.euler_gamma)**4
6368 tmp4 = 7 * np.pi**4 / 4
6369 return tmp1 + tmp2 + tmp3 + tmp4
6370 else:
6371 # return generic for higher moments
6372 # return rv_continuous._mom1_sc(self, n, b)
6373 return self._mom1_sc(n)
6376moyal = moyal_gen(name="moyal")
6379class nakagami_gen(rv_continuous):
6380 r"""A Nakagami continuous random variable.
6382 %(before_notes)s
6384 Notes
6385 -----
6386 The probability density function for `nakagami` is:
6388 .. math::
6390 f(x, \nu) = \frac{2 \nu^\nu}{\Gamma(\nu)} x^{2\nu-1} \exp(-\nu x^2)
6392 for :math:`x >= 0`, :math:`\nu > 0`. The distribution was introduced in
6393 [2]_, see also [1]_ for further information.
6395 `nakagami` takes ``nu`` as a shape parameter for :math:`\nu`.
6397 %(after_notes)s
6399 References
6400 ----------
6401 .. [1] "Nakagami distribution", Wikipedia
6402 https://en.wikipedia.org/wiki/Nakagami_distribution
6403 .. [2] M. Nakagami, "The m-distribution - A general formula of intensity
6404 distribution of rapid fading", Statistical methods in radio wave
6405 propagation, Pergamon Press, 1960, 3-36.
6406 :doi:`10.1016/B978-0-08-009306-2.50005-4`
6408 %(example)s
6410 """
6411 def _shape_info(self):
6412 return [_ShapeInfo("nu", False, (0, np.inf), (False, False))]
6414 def _pdf(self, x, nu):
6415 return np.exp(self._logpdf(x, nu))
6417 def _logpdf(self, x, nu):
6418 # nakagami.pdf(x, nu) = 2 * nu**nu / gamma(nu) *
6419 # x**(2*nu-1) * exp(-nu*x**2)
6420 return (np.log(2) + sc.xlogy(nu, nu) - sc.gammaln(nu) +
6421 sc.xlogy(2*nu - 1, x) - nu*x**2)
6423 def _cdf(self, x, nu):
6424 return sc.gammainc(nu, nu*x*x)
6426 def _ppf(self, q, nu):
6427 return np.sqrt(1.0/nu*sc.gammaincinv(nu, q))
6429 def _sf(self, x, nu):
6430 return sc.gammaincc(nu, nu*x*x)
6432 def _isf(self, p, nu):
6433 return np.sqrt(1/nu * sc.gammainccinv(nu, p))
6435 def _stats(self, nu):
6436 mu = sc.gamma(nu+0.5)/sc.gamma(nu)/np.sqrt(nu)
6437 mu2 = 1.0-mu*mu
6438 g1 = mu * (1 - 4*nu*mu2) / 2.0 / nu / np.power(mu2, 1.5)
6439 g2 = -6*mu**4*nu + (8*nu-2)*mu**2-2*nu + 1
6440 g2 /= nu*mu2**2.0
6441 return mu, mu2, g1, g2
6443 def _rvs(self, nu, size=None, random_state=None):
6444 # this relationship can be found in [1] or by a direct calculation
6445 return np.sqrt(random_state.standard_gamma(nu, size=size) / nu)
6447 def _fitstart(self, data, args=None):
6448 if args is None:
6449 args = (1.0,) * self.numargs
6450 # Analytical justified estimates
6451 # see: https://docs.scipy.org/doc/scipy/reference/tutorial/stats/continuous_nakagami.html
6452 loc = np.min(data)
6453 scale = np.sqrt(np.sum((data - loc)**2) / len(data))
6454 return args + (loc, scale)
6457nakagami = nakagami_gen(a=0.0, name="nakagami")
6460# The function name ncx2 is an abbreviation for noncentral chi squared.
6461def _ncx2_log_pdf(x, df, nc):
6462 # We use (xs**2 + ns**2)/2 = (xs - ns)**2/2 + xs*ns, and include the
6463 # factor of exp(-xs*ns) into the ive function to improve numerical
6464 # stability at large values of xs. See also `rice.pdf`.
6465 df2 = df/2.0 - 1.0
6466 xs, ns = np.sqrt(x), np.sqrt(nc)
6467 res = sc.xlogy(df2/2.0, x/nc) - 0.5*(xs - ns)**2
6468 corr = sc.ive(df2, xs*ns) / 2.0
6469 # Return res + np.log(corr) avoiding np.log(0)
6470 return _lazywhere(
6471 corr > 0,
6472 (res, corr),
6473 f=lambda r, c: r + np.log(c),
6474 fillvalue=-np.inf)
6477class ncx2_gen(rv_continuous):
6478 r"""A non-central chi-squared continuous random variable.
6480 %(before_notes)s
6482 Notes
6483 -----
6484 The probability density function for `ncx2` is:
6486 .. math::
6488 f(x, k, \lambda) = \frac{1}{2} \exp(-(\lambda+x)/2)
6489 (x/\lambda)^{(k-2)/4} I_{(k-2)/2}(\sqrt{\lambda x})
6491 for :math:`x >= 0`, :math:`k > 0` and :math:`\lambda \ge 0`.
6492 :math:`k` specifies the degrees of freedom (denoted ``df`` in the
6493 implementation) and :math:`\lambda` is the non-centrality parameter
6494 (denoted ``nc`` in the implementation). :math:`I_\nu` denotes the
6495 modified Bessel function of first order of degree :math:`\nu`
6496 (`scipy.special.iv`).
6498 `ncx2` takes ``df`` and ``nc`` as shape parameters.
6500 %(after_notes)s
6502 %(example)s
6504 """
6505 def _argcheck(self, df, nc):
6506 return (df > 0) & np.isfinite(df) & (nc >= 0)
6508 def _shape_info(self):
6509 idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
6510 inc = _ShapeInfo("nc", False, (0, np.inf), (True, False))
6511 return [idf, inc]
6513 def _rvs(self, df, nc, size=None, random_state=None):
6514 return random_state.noncentral_chisquare(df, nc, size)
6516 def _logpdf(self, x, df, nc):
6517 cond = np.ones_like(x, dtype=bool) & (nc != 0)
6518 return _lazywhere(cond, (x, df, nc), f=_ncx2_log_pdf,
6519 f2=lambda x, df, _: chi2._logpdf(x, df))
6521 def _pdf(self, x, df, nc):
6522 cond = np.ones_like(x, dtype=bool) & (nc != 0)
6523 with warnings.catch_warnings():
6524 message = "overflow encountered in _ncx2_pdf"
6525 warnings.filterwarnings("ignore", message=message)
6526 return _lazywhere(cond, (x, df, nc), f=_boost._ncx2_pdf,
6527 f2=lambda x, df, _: chi2._pdf(x, df))
6529 def _cdf(self, x, df, nc):
6530 cond = np.ones_like(x, dtype=bool) & (nc != 0)
6531 return _lazywhere(cond, (x, df, nc), f=_boost._ncx2_cdf,
6532 f2=lambda x, df, _: chi2._cdf(x, df))
6534 def _ppf(self, q, df, nc):
6535 cond = np.ones_like(q, dtype=bool) & (nc != 0)
6536 with warnings.catch_warnings():
6537 message = "overflow encountered in _ncx2_ppf"
6538 warnings.filterwarnings("ignore", message=message)
6539 return _lazywhere(cond, (q, df, nc), f=_boost._ncx2_ppf,
6540 f2=lambda x, df, _: chi2._ppf(x, df))
6542 def _sf(self, x, df, nc):
6543 cond = np.ones_like(x, dtype=bool) & (nc != 0)
6544 return _lazywhere(cond, (x, df, nc), f=_boost._ncx2_sf,
6545 f2=lambda x, df, _: chi2._sf(x, df))
6547 def _isf(self, x, df, nc):
6548 cond = np.ones_like(x, dtype=bool) & (nc != 0)
6549 with warnings.catch_warnings():
6550 message = "overflow encountered in _ncx2_isf"
6551 warnings.filterwarnings("ignore", message=message)
6552 return _lazywhere(cond, (x, df, nc), f=_boost._ncx2_isf,
6553 f2=lambda x, df, _: chi2._isf(x, df))
6555 def _stats(self, df, nc):
6556 return (
6557 _boost._ncx2_mean(df, nc),
6558 _boost._ncx2_variance(df, nc),
6559 _boost._ncx2_skewness(df, nc),
6560 _boost._ncx2_kurtosis_excess(df, nc),
6561 )
6564ncx2 = ncx2_gen(a=0.0, name='ncx2')
6567class ncf_gen(rv_continuous):
6568 r"""A non-central F distribution continuous random variable.
6570 %(before_notes)s
6572 See Also
6573 --------
6574 scipy.stats.f : Fisher distribution
6576 Notes
6577 -----
6578 The probability density function for `ncf` is:
6580 .. math::
6582 f(x, n_1, n_2, \lambda) =
6583 \exp\left(\frac{\lambda}{2} +
6584 \lambda n_1 \frac{x}{2(n_1 x + n_2)}
6585 \right)
6586 n_1^{n_1/2} n_2^{n_2/2} x^{n_1/2 - 1} \\
6587 (n_2 + n_1 x)^{-(n_1 + n_2)/2}
6588 \gamma(n_1/2) \gamma(1 + n_2/2) \\
6589 \frac{L^{\frac{n_1}{2}-1}_{n_2/2}
6590 \left(-\lambda n_1 \frac{x}{2(n_1 x + n_2)}\right)}
6591 {B(n_1/2, n_2/2)
6592 \gamma\left(\frac{n_1 + n_2}{2}\right)}
6594 for :math:`n_1, n_2 > 0`, :math:`\lambda \ge 0`. Here :math:`n_1` is the
6595 degrees of freedom in the numerator, :math:`n_2` the degrees of freedom in
6596 the denominator, :math:`\lambda` the non-centrality parameter,
6597 :math:`\gamma` is the logarithm of the Gamma function, :math:`L_n^k` is a
6598 generalized Laguerre polynomial and :math:`B` is the beta function.
6600 `ncf` takes ``df1``, ``df2`` and ``nc`` as shape parameters. If ``nc=0``,
6601 the distribution becomes equivalent to the Fisher distribution.
6603 %(after_notes)s
6605 %(example)s
6607 """
6608 def _argcheck(self, df1, df2, nc):
6609 return (df1 > 0) & (df2 > 0) & (nc >= 0)
6611 def _shape_info(self):
6612 idf1 = _ShapeInfo("df1", False, (0, np.inf), (False, False))
6613 idf2 = _ShapeInfo("df2", False, (0, np.inf), (False, False))
6614 inc = _ShapeInfo("nc", False, (0, np.inf), (True, False))
6615 return [idf1, idf2, inc]
6617 def _rvs(self, dfn, dfd, nc, size=None, random_state=None):
6618 return random_state.noncentral_f(dfn, dfd, nc, size)
6620 def _pdf(self, x, dfn, dfd, nc):
6621 # ncf.pdf(x, df1, df2, nc) = exp(nc/2 + nc*df1*x/(2*(df1*x+df2))) *
6622 # df1**(df1/2) * df2**(df2/2) * x**(df1/2-1) *
6623 # (df2+df1*x)**(-(df1+df2)/2) *
6624 # gamma(df1/2)*gamma(1+df2/2) *
6625 # L^{v1/2-1}^{v2/2}(-nc*v1*x/(2*(v1*x+v2))) /
6626 # (B(v1/2, v2/2) * gamma((v1+v2)/2))
6627 return _boost._ncf_pdf(x, dfn, dfd, nc)
6629 def _cdf(self, x, dfn, dfd, nc):
6630 return _boost._ncf_cdf(x, dfn, dfd, nc)
6632 def _ppf(self, q, dfn, dfd, nc):
6633 return _boost._ncf_ppf(q, dfn, dfd, nc)
6635 def _sf(self, x, dfn, dfd, nc):
6636 return _boost._ncf_sf(x, dfn, dfd, nc)
6638 def _isf(self, x, dfn, dfd, nc):
6639 return _boost._ncf_isf(x, dfn, dfd, nc)
6641 def _munp(self, n, dfn, dfd, nc):
6642 val = (dfn * 1.0/dfd)**n
6643 term = sc.gammaln(n+0.5*dfn) + sc.gammaln(0.5*dfd-n) - sc.gammaln(dfd*0.5)
6644 val *= np.exp(-nc / 2.0+term)
6645 val *= sc.hyp1f1(n+0.5*dfn, 0.5*dfn, 0.5*nc)
6646 return val
6648 def _stats(self, dfn, dfd, nc, moments='mv'):
6649 mu = _boost._ncf_mean(dfn, dfd, nc)
6650 mu2 = _boost._ncf_variance(dfn, dfd, nc)
6651 g1 = _boost._ncf_skewness(dfn, dfd, nc) if 's' in moments else None
6652 g2 = _boost._ncf_kurtosis_excess(
6653 dfn, dfd, nc) if 'k' in moments else None
6654 return mu, mu2, g1, g2
6657ncf = ncf_gen(a=0.0, name='ncf')
6660class t_gen(rv_continuous):
6661 r"""A Student's t continuous random variable.
6663 For the noncentral t distribution, see `nct`.
6665 %(before_notes)s
6667 See Also
6668 --------
6669 nct
6671 Notes
6672 -----
6673 The probability density function for `t` is:
6675 .. math::
6677 f(x, \nu) = \frac{\Gamma((\nu+1)/2)}
6678 {\sqrt{\pi \nu} \Gamma(\nu/2)}
6679 (1+x^2/\nu)^{-(\nu+1)/2}
6681 where :math:`x` is a real number and the degrees of freedom parameter
6682 :math:`\nu` (denoted ``df`` in the implementation) satisfies
6683 :math:`\nu > 0`. :math:`\Gamma` is the gamma function
6684 (`scipy.special.gamma`).
6686 %(after_notes)s
6688 %(example)s
6690 """
6691 def _shape_info(self):
6692 return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
6694 def _rvs(self, df, size=None, random_state=None):
6695 return random_state.standard_t(df, size=size)
6697 def _pdf(self, x, df):
6698 return _lazywhere(
6699 df == np.inf, (x, df),
6700 f=lambda x, df: norm._pdf(x),
6701 f2=lambda x, df: (
6702 np.exp(sc.gammaln((df+1)/2)-sc.gammaln(df/2))
6703 / (np.sqrt(df*np.pi)*(1+(x**2)/df)**((df+1)/2))
6704 )
6705 )
6707 def _logpdf(self, x, df):
6708 return _lazywhere(
6709 df == np.inf, (x, df),
6710 f=lambda x, df: norm._logpdf(x),
6711 f2=lambda x, df: (
6712 sc.gammaln((df+1)/2) - sc.gammaln(df/2)
6713 - (0.5*np.log(df*np.pi)
6714 + (df+1)/2*np.log(1+(x**2)/df))
6715 )
6716 )
6718 def _cdf(self, x, df):
6719 return sc.stdtr(df, x)
6721 def _sf(self, x, df):
6722 return sc.stdtr(df, -x)
6724 def _ppf(self, q, df):
6725 return sc.stdtrit(df, q)
6727 def _isf(self, q, df):
6728 return -sc.stdtrit(df, q)
6730 def _stats(self, df):
6731 # infinite df -> normal distribution (0.0, 1.0, 0.0, 0.0)
6732 infinite_df = np.isposinf(df)
6734 mu = np.where(df > 1, 0.0, np.inf)
6736 condlist = ((df > 1) & (df <= 2),
6737 (df > 2) & np.isfinite(df),
6738 infinite_df)
6739 choicelist = (lambda df: np.broadcast_to(np.inf, df.shape),
6740 lambda df: df / (df-2.0),
6741 lambda df: np.broadcast_to(1, df.shape))
6742 mu2 = _lazyselect(condlist, choicelist, (df,), np.nan)
6744 g1 = np.where(df > 3, 0.0, np.nan)
6746 condlist = ((df > 2) & (df <= 4),
6747 (df > 4) & np.isfinite(df),
6748 infinite_df)
6749 choicelist = (lambda df: np.broadcast_to(np.inf, df.shape),
6750 lambda df: 6.0 / (df-4.0),
6751 lambda df: np.broadcast_to(0, df.shape))
6752 g2 = _lazyselect(condlist, choicelist, (df,), np.nan)
6754 return mu, mu2, g1, g2
6756 def _entropy(self, df):
6757 if df == np.inf:
6758 return norm._entropy()
6759 half = df/2
6760 half1 = (df + 1)/2
6761 return (half1*(sc.digamma(half1) - sc.digamma(half))
6762 + np.log(np.sqrt(df)*sc.beta(half, 0.5)))
6765t = t_gen(name='t')
6768class nct_gen(rv_continuous):
6769 r"""A non-central Student's t continuous random variable.
6771 %(before_notes)s
6773 Notes
6774 -----
6775 If :math:`Y` is a standard normal random variable and :math:`V` is
6776 an independent chi-square random variable (`chi2`) with :math:`k` degrees
6777 of freedom, then
6779 .. math::
6781 X = \frac{Y + c}{\sqrt{V/k}}
6783 has a non-central Student's t distribution on the real line.
6784 The degrees of freedom parameter :math:`k` (denoted ``df`` in the
6785 implementation) satisfies :math:`k > 0` and the noncentrality parameter
6786 :math:`c` (denoted ``nc`` in the implementation) is a real number.
6788 %(after_notes)s
6790 %(example)s
6792 """
6793 def _argcheck(self, df, nc):
6794 return (df > 0) & (nc == nc)
6796 def _shape_info(self):
6797 idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
6798 inc = _ShapeInfo("nc", False, (-np.inf, np.inf), (False, False))
6799 return [idf, inc]
6801 def _rvs(self, df, nc, size=None, random_state=None):
6802 n = norm.rvs(loc=nc, size=size, random_state=random_state)
6803 c2 = chi2.rvs(df, size=size, random_state=random_state)
6804 return n * np.sqrt(df) / np.sqrt(c2)
6806 def _pdf(self, x, df, nc):
6807 # Boost version has accuracy issues in left tail; see gh-16591
6808 n = df*1.0
6809 nc = nc*1.0
6810 x2 = x*x
6811 ncx2 = nc*nc*x2
6812 fac1 = n + x2
6813 trm1 = (n/2.*np.log(n) + sc.gammaln(n+1)
6814 - (n*np.log(2) + nc*nc/2 + (n/2)*np.log(fac1)
6815 + sc.gammaln(n/2)))
6816 Px = np.exp(trm1)
6817 valF = ncx2 / (2*fac1)
6818 trm1 = (np.sqrt(2)*nc*x*sc.hyp1f1(n/2+1, 1.5, valF)
6819 / np.asarray(fac1*sc.gamma((n+1)/2)))
6820 trm2 = (sc.hyp1f1((n+1)/2, 0.5, valF)
6821 / np.asarray(np.sqrt(fac1)*sc.gamma(n/2+1)))
6822 Px *= trm1+trm2
6823 return np.clip(Px, 0, None)
6825 def _cdf(self, x, df, nc):
6826 return np.clip(_boost._nct_cdf(x, df, nc), 0, 1)
6828 def _ppf(self, q, df, nc):
6829 return _boost._nct_ppf(q, df, nc)
6831 def _sf(self, x, df, nc):
6832 return np.clip(_boost._nct_sf(x, df, nc), 0, 1)
6834 def _isf(self, x, df, nc):
6835 return _boost._nct_isf(x, df, nc)
6837 def _stats(self, df, nc, moments='mv'):
6838 mu = _boost._nct_mean(df, nc)
6839 mu2 = _boost._nct_variance(df, nc)
6840 g1 = _boost._nct_skewness(df, nc) if 's' in moments else None
6841 g2 = _boost._nct_kurtosis_excess(df, nc)-3 if 'k' in moments else None
6842 return mu, mu2, g1, g2
6845nct = nct_gen(name="nct")
6848class pareto_gen(rv_continuous):
6849 r"""A Pareto continuous random variable.
6851 %(before_notes)s
6853 Notes
6854 -----
6855 The probability density function for `pareto` is:
6857 .. math::
6859 f(x, b) = \frac{b}{x^{b+1}}
6861 for :math:`x \ge 1`, :math:`b > 0`.
6863 `pareto` takes ``b`` as a shape parameter for :math:`b`.
6865 %(after_notes)s
6867 %(example)s
6869 """
6870 def _shape_info(self):
6871 return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
6873 def _pdf(self, x, b):
6874 # pareto.pdf(x, b) = b / x**(b+1)
6875 return b * x**(-b-1)
6877 def _cdf(self, x, b):
6878 return 1 - x**(-b)
6880 def _ppf(self, q, b):
6881 return pow(1-q, -1.0/b)
6883 def _sf(self, x, b):
6884 return x**(-b)
6886 def _stats(self, b, moments='mv'):
6887 mu, mu2, g1, g2 = None, None, None, None
6888 if 'm' in moments:
6889 mask = b > 1
6890 bt = np.extract(mask, b)
6891 mu = np.full(np.shape(b), fill_value=np.inf)
6892 np.place(mu, mask, bt / (bt-1.0))
6893 if 'v' in moments:
6894 mask = b > 2
6895 bt = np.extract(mask, b)
6896 mu2 = np.full(np.shape(b), fill_value=np.inf)
6897 np.place(mu2, mask, bt / (bt-2.0) / (bt-1.0)**2)
6898 if 's' in moments:
6899 mask = b > 3
6900 bt = np.extract(mask, b)
6901 g1 = np.full(np.shape(b), fill_value=np.nan)
6902 vals = 2 * (bt + 1.0) * np.sqrt(bt - 2.0) / ((bt - 3.0) * np.sqrt(bt))
6903 np.place(g1, mask, vals)
6904 if 'k' in moments:
6905 mask = b > 4
6906 bt = np.extract(mask, b)
6907 g2 = np.full(np.shape(b), fill_value=np.nan)
6908 vals = (6.0*np.polyval([1.0, 1.0, -6, -2], bt) /
6909 np.polyval([1.0, -7.0, 12.0, 0.0], bt))
6910 np.place(g2, mask, vals)
6911 return mu, mu2, g1, g2
6913 def _entropy(self, c):
6914 return 1 + 1.0/c - np.log(c)
6916 @_call_super_mom
6917 @inherit_docstring_from(rv_continuous)
6918 def fit(self, data, *args, **kwds):
6919 parameters = _check_fit_input_parameters(self, data, args, kwds)
6920 data, fshape, floc, fscale = parameters
6922 # ensure that any fixed parameters don't violate constraints of the
6923 # distribution before continuing.
6924 if floc is not None and np.min(data) - floc < (fscale or 0):
6925 raise FitDataError("pareto", lower=1, upper=np.inf)
6927 ndata = data.shape[0]
6929 def get_shape(scale, location):
6930 # The first-order necessary condition on `shape` can be solved in
6931 # closed form
6932 return ndata / np.sum(np.log((data - location) / scale))
6934 if floc is fscale is None:
6935 # The support of the distribution is `(x - loc)/scale > 0`.
6936 # The method of Lagrange multipliers turns this constraint
6937 # into an equation that can be solved numerically.
6938 # See gh-12545 for details.
6940 def dL_dScale(shape, scale):
6941 # The partial derivative of the log-likelihood function w.r.t.
6942 # the scale.
6943 return ndata * shape / scale
6945 def dL_dLocation(shape, location):
6946 # The partial derivative of the log-likelihood function w.r.t.
6947 # the location.
6948 return (shape + 1) * np.sum(1 / (data - location))
6950 def fun_to_solve(scale):
6951 # optimize the scale by setting the partial derivatives
6952 # w.r.t. to location and scale equal and solving.
6953 location = np.min(data) - scale
6954 shape = fshape or get_shape(scale, location)
6955 return dL_dLocation(shape, location) - dL_dScale(shape, scale)
6957 def interval_contains_root(lbrack, rbrack):
6958 # return true if the signs disagree.
6959 return (np.sign(fun_to_solve(lbrack)) !=
6960 np.sign(fun_to_solve(rbrack)))
6962 # set brackets for `root_scalar` to use when optimizing over the
6963 # scale such that a root is likely between them. Use user supplied
6964 # guess or default 1.
6965 brack_start = kwds.get('scale', 1)
6966 lbrack, rbrack = brack_start / 2, brack_start * 2
6967 # if a root is not between the brackets, iteratively expand them
6968 # until they include a sign change, checking after each bracket is
6969 # modified.
6970 while (not interval_contains_root(lbrack, rbrack)
6971 and (lbrack > 0 or rbrack < np.inf)):
6972 lbrack /= 2
6973 rbrack *= 2
6974 res = root_scalar(fun_to_solve, bracket=[lbrack, rbrack])
6975 if res.converged:
6976 scale = res.root
6977 loc = np.min(data) - scale
6978 shape = fshape or get_shape(scale, loc)
6980 # The Pareto distribution requires that its parameters satisfy
6981 # the condition `fscale + floc <= min(data)`. However, to
6982 # avoid numerical issues, we require that `fscale + floc`
6983 # is strictly less than `min(data)`. If this condition
6984 # is not satisfied, reduce the scale with `np.nextafter` to
6985 # ensure that data does not fall outside of the support.
6986 if not (scale + loc) < np.min(data):
6987 scale = np.min(data) - loc
6988 scale = np.nextafter(scale, 0)
6989 return shape, loc, scale
6990 else:
6991 return super().fit(data, **kwds)
6992 elif floc is None:
6993 loc = np.min(data) - fscale
6994 else:
6995 loc = floc
6996 # Source: Evans, Hastings, and Peacock (2000), Statistical
6997 # Distributions, 3rd. Ed., John Wiley and Sons. Page 149.
6998 scale = fscale or np.min(data) - loc
6999 shape = fshape or get_shape(scale, loc)
7000 return shape, loc, scale
7003pareto = pareto_gen(a=1.0, name="pareto")
7006class lomax_gen(rv_continuous):
7007 r"""A Lomax (Pareto of the second kind) continuous random variable.
7009 %(before_notes)s
7011 Notes
7012 -----
7013 The probability density function for `lomax` is:
7015 .. math::
7017 f(x, c) = \frac{c}{(1+x)^{c+1}}
7019 for :math:`x \ge 0`, :math:`c > 0`.
7021 `lomax` takes ``c`` as a shape parameter for :math:`c`.
7023 `lomax` is a special case of `pareto` with ``loc=-1.0``.
7025 %(after_notes)s
7027 %(example)s
7029 """
7030 def _shape_info(self):
7031 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
7033 def _pdf(self, x, c):
7034 # lomax.pdf(x, c) = c / (1+x)**(c+1)
7035 return c*1.0/(1.0+x)**(c+1.0)
7037 def _logpdf(self, x, c):
7038 return np.log(c) - (c+1)*sc.log1p(x)
7040 def _cdf(self, x, c):
7041 return -sc.expm1(-c*sc.log1p(x))
7043 def _sf(self, x, c):
7044 return np.exp(-c*sc.log1p(x))
7046 def _logsf(self, x, c):
7047 return -c*sc.log1p(x)
7049 def _ppf(self, q, c):
7050 return sc.expm1(-sc.log1p(-q)/c)
7052 def _stats(self, c):
7053 mu, mu2, g1, g2 = pareto.stats(c, loc=-1.0, moments='mvsk')
7054 return mu, mu2, g1, g2
7056 def _entropy(self, c):
7057 return 1+1.0/c-np.log(c)
7060lomax = lomax_gen(a=0.0, name="lomax")
7063class pearson3_gen(rv_continuous):
7064 r"""A pearson type III continuous random variable.
7066 %(before_notes)s
7068 Notes
7069 -----
7070 The probability density function for `pearson3` is:
7072 .. math::
7074 f(x, \kappa) = \frac{|\beta|}{\Gamma(\alpha)}
7075 (\beta (x - \zeta))^{\alpha - 1}
7076 \exp(-\beta (x - \zeta))
7078 where:
7080 .. math::
7082 \beta = \frac{2}{\kappa}
7084 \alpha = \beta^2 = \frac{4}{\kappa^2}
7086 \zeta = -\frac{\alpha}{\beta} = -\beta
7088 :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
7089 Pass the skew :math:`\kappa` into `pearson3` as the shape parameter
7090 ``skew``.
7092 %(after_notes)s
7094 %(example)s
7096 References
7097 ----------
7098 R.W. Vogel and D.E. McMartin, "Probability Plot Goodness-of-Fit and
7099 Skewness Estimation Procedures for the Pearson Type 3 Distribution", Water
7100 Resources Research, Vol.27, 3149-3158 (1991).
7102 L.R. Salvosa, "Tables of Pearson's Type III Function", Ann. Math. Statist.,
7103 Vol.1, 191-198 (1930).
7105 "Using Modern Computing Tools to Fit the Pearson Type III Distribution to
7106 Aviation Loads Data", Office of Aviation Research (2003).
7108 """
7109 def _preprocess(self, x, skew):
7110 # The real 'loc' and 'scale' are handled in the calling pdf(...). The
7111 # local variables 'loc' and 'scale' within pearson3._pdf are set to
7112 # the defaults just to keep them as part of the equations for
7113 # documentation.
7114 loc = 0.0
7115 scale = 1.0
7117 # If skew is small, return _norm_pdf. The divide between pearson3
7118 # and norm was found by brute force and is approximately a skew of
7119 # 0.000016. No one, I hope, would actually use a skew value even
7120 # close to this small.
7121 norm2pearson_transition = 0.000016
7123 ans, x, skew = np.broadcast_arrays(1.0, x, skew)
7124 ans = ans.copy()
7126 # mask is True where skew is small enough to use the normal approx.
7127 mask = np.absolute(skew) < norm2pearson_transition
7128 invmask = ~mask
7130 beta = 2.0 / (skew[invmask] * scale)
7131 alpha = (scale * beta)**2
7132 zeta = loc - alpha / beta
7134 transx = beta * (x[invmask] - zeta)
7135 return ans, x, transx, mask, invmask, beta, alpha, zeta
7137 def _argcheck(self, skew):
7138 # The _argcheck function in rv_continuous only allows positive
7139 # arguments. The skew argument for pearson3 can be zero (which I want
7140 # to handle inside pearson3._pdf) or negative. So just return True
7141 # for all skew args.
7142 return np.isfinite(skew)
7144 def _shape_info(self):
7145 return [_ShapeInfo("skew", False, (-np.inf, np.inf), (False, False))]
7147 def _stats(self, skew):
7148 m = 0.0
7149 v = 1.0
7150 s = skew
7151 k = 1.5*skew**2
7152 return m, v, s, k
7154 def _pdf(self, x, skew):
7155 # pearson3.pdf(x, skew) = abs(beta) / gamma(alpha) *
7156 # (beta * (x - zeta))**(alpha - 1) * exp(-beta*(x - zeta))
7157 # Do the calculation in _logpdf since helps to limit
7158 # overflow/underflow problems
7159 ans = np.exp(self._logpdf(x, skew))
7160 if ans.ndim == 0:
7161 if np.isnan(ans):
7162 return 0.0
7163 return ans
7164 ans[np.isnan(ans)] = 0.0
7165 return ans
7167 def _logpdf(self, x, skew):
7168 # PEARSON3 logpdf GAMMA logpdf
7169 # np.log(abs(beta))
7170 # + (alpha - 1)*np.log(beta*(x - zeta)) + (a - 1)*np.log(x)
7171 # - beta*(x - zeta) - x
7172 # - sc.gammalnalpha) - sc.gammalna)
7173 ans, x, transx, mask, invmask, beta, alpha, _ = (
7174 self._preprocess(x, skew))
7176 ans[mask] = np.log(_norm_pdf(x[mask]))
7177 # use logpdf instead of _logpdf to fix issue mentioned in gh-12640
7178 # (_logpdf does not return correct result for alpha = 1)
7179 ans[invmask] = np.log(abs(beta)) + gamma.logpdf(transx, alpha)
7180 return ans
7182 def _cdf(self, x, skew):
7183 ans, x, transx, mask, invmask, _, alpha, _ = (
7184 self._preprocess(x, skew))
7186 ans[mask] = _norm_cdf(x[mask])
7188 skew = np.broadcast_to(skew, invmask.shape)
7189 invmask1a = np.logical_and(invmask, skew > 0)
7190 invmask1b = skew[invmask] > 0
7191 # use cdf instead of _cdf to fix issue mentioned in gh-12640
7192 # (_cdf produces NaNs for inputs outside support)
7193 ans[invmask1a] = gamma.cdf(transx[invmask1b], alpha[invmask1b])
7195 # The gamma._cdf approach wasn't working with negative skew.
7196 # Note that multiplying the skew by -1 reflects about x=0.
7197 # So instead of evaluating the CDF with negative skew at x,
7198 # evaluate the SF with positive skew at -x.
7199 invmask2a = np.logical_and(invmask, skew < 0)
7200 invmask2b = skew[invmask] < 0
7201 # gamma._sf produces NaNs when transx < 0, so use gamma.sf
7202 ans[invmask2a] = gamma.sf(transx[invmask2b], alpha[invmask2b])
7204 return ans
7206 def _rvs(self, skew, size=None, random_state=None):
7207 skew = np.broadcast_to(skew, size)
7208 ans, _, _, mask, invmask, beta, alpha, zeta = (
7209 self._preprocess([0], skew))
7211 nsmall = mask.sum()
7212 nbig = mask.size - nsmall
7213 ans[mask] = random_state.standard_normal(nsmall)
7214 ans[invmask] = random_state.standard_gamma(alpha, nbig)/beta + zeta
7216 if size == ():
7217 ans = ans[0]
7218 return ans
7220 def _ppf(self, q, skew):
7221 ans, q, _, mask, invmask, beta, alpha, zeta = (
7222 self._preprocess(q, skew))
7223 ans[mask] = _norm_ppf(q[mask])
7224 q = q[invmask]
7225 q[beta < 0] = 1 - q[beta < 0] # for negative skew; see gh-17050
7226 ans[invmask] = sc.gammaincinv(alpha, q)/beta + zeta
7227 return ans
7229 @_call_super_mom
7230 @extend_notes_in_docstring(rv_continuous, notes="""\
7231 Note that method of moments (`method='MM'`) is not
7232 available for this distribution.\n\n""")
7233 def fit(self, data, *args, **kwds):
7234 if kwds.get("method", None) == 'MM':
7235 raise NotImplementedError("Fit `method='MM'` is not available for "
7236 "the Pearson3 distribution. Please try "
7237 "the default `method='MLE'`.")
7238 else:
7239 return super(type(self), self).fit(data, *args, **kwds)
7242pearson3 = pearson3_gen(name="pearson3")
7245class powerlaw_gen(rv_continuous):
7246 r"""A power-function continuous random variable.
7248 %(before_notes)s
7250 See Also
7251 --------
7252 pareto
7254 Notes
7255 -----
7256 The probability density function for `powerlaw` is:
7258 .. math::
7260 f(x, a) = a x^{a-1}
7262 for :math:`0 \le x \le 1`, :math:`a > 0`.
7264 `powerlaw` takes ``a`` as a shape parameter for :math:`a`.
7266 %(after_notes)s
7268 For example, the support of `powerlaw` can be adjusted from the default
7269 interval ``[0, 1]`` to the interval ``[c, c+d]`` by setting ``loc=c`` and
7270 ``scale=d``. For a power-law distribution with infinite support, see
7271 `pareto`.
7273 `powerlaw` is a special case of `beta` with ``b=1``.
7275 %(example)s
7277 """
7278 def _shape_info(self):
7279 return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
7281 def _pdf(self, x, a):
7282 # powerlaw.pdf(x, a) = a * x**(a-1)
7283 return a*x**(a-1.0)
7285 def _logpdf(self, x, a):
7286 return np.log(a) + sc.xlogy(a - 1, x)
7288 def _cdf(self, x, a):
7289 return x**(a*1.0)
7291 def _logcdf(self, x, a):
7292 return a*np.log(x)
7294 def _ppf(self, q, a):
7295 return pow(q, 1.0/a)
7297 def _stats(self, a):
7298 return (a / (a + 1.0),
7299 a / (a + 2.0) / (a + 1.0) ** 2,
7300 -2.0 * ((a - 1.0) / (a + 3.0)) * np.sqrt((a + 2.0) / a),
7301 6 * np.polyval([1, -1, -6, 2], a) / (a * (a + 3.0) * (a + 4)))
7303 def _entropy(self, a):
7304 return 1 - 1.0/a - np.log(a)
7306 def _support_mask(self, x, a):
7307 return (super(powerlaw_gen, self)._support_mask(x, a)
7308 & ((x != 0) | (a >= 1)))
7310 @_call_super_mom
7311 @extend_notes_in_docstring(rv_continuous, notes="""\
7312 Notes specifically for ``powerlaw.fit``: If the location is a free
7313 parameter and the value returned for the shape parameter is less than
7314 one, the true maximum likelihood approaches infinity. This causes
7315 numerical difficulties, and the resulting estimates are approximate.
7316 \n\n""")
7317 def fit(self, data, *args, **kwds):
7318 # Summary of the strategy:
7319 #
7320 # 1) If the scale and location are fixed, return the shape according
7321 # to a formula.
7322 #
7323 # 2) If the scale is fixed, there are two possibilities for the other
7324 # parameters - one corresponding with shape less than one, and
7325 # another with shape greater than one. Calculate both, and return
7326 # whichever has the better log-likelihood.
7327 #
7328 # At this point, the scale is known to be free.
7329 #
7330 # 3) If the location is fixed, return the scale and shape according to
7331 # formulas (or, if the shape is fixed, the fixed shape).
7332 #
7333 # At this point, the location and scale are both free. There are
7334 # separate equations depending on whether the shape is less than one or
7335 # greater than one.
7336 #
7337 # 4a) If the shape is less than one, there are formulas for shape,
7338 # location, and scale.
7339 # 4b) If the shape is greater than one, there are formulas for shape
7340 # and scale, but there is a condition for location to be solved
7341 # numerically.
7342 #
7343 # If the shape is fixed and less than one, we use 4a.
7344 # If the shape is fixed and greater than one, we use 4b.
7345 # If the shape is also free, we calculate fits using both 4a and 4b
7346 # and choose the one that results a better log-likelihood.
7347 #
7348 # In many cases, the use of `np.nextafter` is used to avoid numerical
7349 # issues.
7350 if kwds.pop('superfit', False):
7351 return super().fit(data, *args, **kwds)
7353 if len(np.unique(data)) == 1:
7354 return super().fit(data, *args, **kwds)
7356 data, fshape, floc, fscale = _check_fit_input_parameters(self, data,
7357 args, kwds)
7358 penalized_nllf_args = [data, (self._fitstart(data),)]
7359 penalized_nllf = self._reduce_func(penalized_nllf_args, {})[1]
7361 # ensure that any fixed parameters don't violate constraints of the
7362 # distribution before continuing. The support of the distribution
7363 # is `0 < (x - loc)/scale < 1`.
7364 if floc is not None:
7365 if not data.min() > floc:
7366 raise FitDataError('powerlaw', 0, 1)
7367 if fscale is not None and not data.max() <= floc + fscale:
7368 raise FitDataError('powerlaw', 0, 1)
7370 if fscale is not None:
7371 if fscale <= 0:
7372 raise ValueError("Negative or zero `fscale` is outside the "
7373 "range allowed by the distribution.")
7374 if fscale <= data.ptp():
7375 msg = "`fscale` must be greater than the range of data."
7376 raise ValueError(msg)
7378 def get_shape(data, loc, scale):
7379 # The first-order necessary condition on `shape` can be solved in
7380 # closed form. It can be used no matter the assumption of the
7381 # value of the shape.
7382 N = len(data)
7383 return - N / (np.sum(np.log(data - loc)) - N*np.log(scale))
7385 def get_scale(data, loc):
7386 # analytical solution for `scale` based on the location.
7387 # It can be used no matter the assumption of the value of the
7388 # shape.
7389 return data.max() - loc
7391 # 1) The location and scale are both fixed. Analytically determine the
7392 # shape.
7393 if fscale is not None and floc is not None:
7394 return get_shape(data, floc, fscale), floc, fscale
7396 # 2) The scale is fixed. There are two possibilities for the other
7397 # parameters. Choose the option with better log-likelihood.
7398 if fscale is not None:
7399 # using `data.min()` as the optimal location
7400 loc_lt1 = np.nextafter(data.min(), -np.inf)
7401 shape_lt1 = fshape or get_shape(data, loc_lt1, fscale)
7402 ll_lt1 = penalized_nllf((shape_lt1, loc_lt1, fscale), data)
7404 # using `data.max() - scale` as the optimal location
7405 loc_gt1 = np.nextafter(data.max() - fscale, np.inf)
7406 shape_gt1 = fshape or get_shape(data, loc_gt1, fscale)
7407 ll_gt1 = penalized_nllf((shape_gt1, loc_gt1, fscale), data)
7409 if ll_lt1 < ll_gt1:
7410 return shape_lt1, loc_lt1, fscale
7411 else:
7412 return shape_gt1, loc_gt1, fscale
7414 # 3) The location is fixed. Return the analytical scale and the
7415 # analytical (or fixed) shape.
7416 if floc is not None:
7417 scale = get_scale(data, floc)
7418 shape = fshape or get_shape(data, floc, scale)
7419 return shape, floc, scale
7421 # 4) Location and scale are both free
7422 # 4a) Use formulas that assume `shape <= 1`.
7424 def fit_loc_scale_w_shape_lt_1():
7425 loc = np.nextafter(data.min(), -np.inf)
7426 if np.abs(loc) < np.finfo(loc.dtype).tiny:
7427 loc = np.sign(loc) * np.finfo(loc.dtype).tiny
7428 scale = np.nextafter(get_scale(data, loc), np.inf)
7429 shape = fshape or get_shape(data, loc, scale)
7430 return shape, loc, scale
7432 # 4b) Fit under the assumption that `shape > 1`. The support
7433 # of the distribution is `(x - loc)/scale <= 1`. The method of Lagrange
7434 # multipliers turns this constraint into the condition that
7435 # dL_dScale - dL_dLocation must be zero, which is solved numerically.
7436 # (Alternatively, substitute the constraint into the objective
7437 # function before deriving the likelihood equation for location.)
7439 def dL_dScale(data, shape, scale):
7440 # The partial derivative of the log-likelihood function w.r.t.
7441 # the scale.
7442 return -data.shape[0] * shape / scale
7444 def dL_dLocation(data, shape, loc):
7445 # The partial derivative of the log-likelihood function w.r.t.
7446 # the location.
7447 return (shape - 1) * np.sum(1 / (loc - data)) # -1/(data-loc)
7449 def dL_dLocation_star(loc):
7450 # The derivative of the log-likelihood function w.r.t.
7451 # the location, given optimal shape and scale
7452 scale = np.nextafter(get_scale(data, loc), -np.inf)
7453 shape = fshape or get_shape(data, loc, scale)
7454 return dL_dLocation(data, shape, loc)
7456 def fun_to_solve(loc):
7457 # optimize the location by setting the partial derivatives
7458 # w.r.t. to location and scale equal and solving.
7459 scale = np.nextafter(get_scale(data, loc), -np.inf)
7460 shape = fshape or get_shape(data, loc, scale)
7461 return (dL_dScale(data, shape, scale)
7462 - dL_dLocation(data, shape, loc))
7464 def fit_loc_scale_w_shape_gt_1():
7465 # set brackets for `root_scalar` to use when optimizing over the
7466 # location such that a root is likely between them.
7467 rbrack = np.nextafter(data.min(), -np.inf)
7469 # if the sign of `dL_dLocation_star` is positive at rbrack,
7470 # we're not going to find the root we're looking for
7471 delta = (data.min() - rbrack)
7472 while dL_dLocation_star(rbrack) > 0:
7473 rbrack = data.min() - delta
7474 delta *= 2
7476 def interval_contains_root(lbrack, rbrack):
7477 # Check if the interval (lbrack, rbrack) contains the root.
7478 return (np.sign(fun_to_solve(lbrack))
7479 != np.sign(fun_to_solve(rbrack)))
7481 lbrack = rbrack - 1
7483 # if the sign doesn't change between the brackets, move the left
7484 # bracket until it does. (The right bracket remains fixed at the
7485 # maximum permissible value.)
7486 i = 1.0
7487 while (not interval_contains_root(lbrack, rbrack)
7488 and lbrack != -np.inf):
7489 lbrack = (data.min() - i)
7490 i *= 2
7492 root = optimize.root_scalar(fun_to_solve, bracket=(lbrack, rbrack))
7494 loc = np.nextafter(root.root, -np.inf)
7495 scale = np.nextafter(get_scale(data, loc), np.inf)
7496 shape = fshape or get_shape(data, loc, scale)
7497 return shape, loc, scale
7499 # Shape is fixed - choose 4a or 4b accordingly.
7500 if fshape is not None and fshape <= 1:
7501 return fit_loc_scale_w_shape_lt_1()
7502 elif fshape is not None and fshape > 1:
7503 return fit_loc_scale_w_shape_gt_1()
7505 # Shape is free
7506 fit_shape_lt1 = fit_loc_scale_w_shape_lt_1()
7507 ll_lt1 = self.nnlf(fit_shape_lt1, data)
7509 fit_shape_gt1 = fit_loc_scale_w_shape_gt_1()
7510 ll_gt1 = self.nnlf(fit_shape_gt1, data)
7512 if ll_lt1 <= ll_gt1 and fit_shape_lt1[0] <= 1:
7513 return fit_shape_lt1
7514 elif ll_lt1 > ll_gt1 and fit_shape_gt1[0] > 1:
7515 return fit_shape_gt1
7516 else:
7517 return super().fit(data, *args, **kwds)
7520powerlaw = powerlaw_gen(a=0.0, b=1.0, name="powerlaw")
7523class powerlognorm_gen(rv_continuous):
7524 r"""A power log-normal continuous random variable.
7526 %(before_notes)s
7528 Notes
7529 -----
7530 The probability density function for `powerlognorm` is:
7532 .. math::
7534 f(x, c, s) = \frac{c}{x s} \phi(\log(x)/s)
7535 (\Phi(-\log(x)/s))^{c-1}
7537 where :math:`\phi` is the normal pdf, and :math:`\Phi` is the normal cdf,
7538 and :math:`x > 0`, :math:`s, c > 0`.
7540 `powerlognorm` takes :math:`c` and :math:`s` as shape parameters.
7542 %(after_notes)s
7544 %(example)s
7546 """
7547 _support_mask = rv_continuous._open_support_mask
7549 def _shape_info(self):
7550 ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
7551 i_s = _ShapeInfo("s", False, (0, np.inf), (False, False))
7552 return [ic, i_s]
7554 def _pdf(self, x, c, s):
7555 # powerlognorm.pdf(x, c, s) = c / (x*s) * phi(log(x)/s) *
7556 # (Phi(-log(x)/s))**(c-1),
7557 return (c/(x*s) * _norm_pdf(np.log(x)/s) *
7558 pow(_norm_cdf(-np.log(x)/s), c*1.0-1.0))
7560 def _cdf(self, x, c, s):
7561 return 1.0 - pow(_norm_cdf(-np.log(x)/s), c*1.0)
7563 def _ppf(self, q, c, s):
7564 return np.exp(-s * _norm_ppf(pow(1.0 - q, 1.0 / c)))
7567powerlognorm = powerlognorm_gen(a=0.0, name="powerlognorm")
7570class powernorm_gen(rv_continuous):
7571 r"""A power normal continuous random variable.
7573 %(before_notes)s
7575 Notes
7576 -----
7577 The probability density function for `powernorm` is:
7579 .. math::
7581 f(x, c) = c \phi(x) (\Phi(-x))^{c-1}
7583 where :math:`\phi` is the normal pdf, and :math:`\Phi` is the normal cdf,
7584 and :math:`x >= 0`, :math:`c > 0`.
7586 `powernorm` takes ``c`` as a shape parameter for :math:`c`.
7588 %(after_notes)s
7590 %(example)s
7592 """
7593 def _shape_info(self):
7594 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
7596 def _pdf(self, x, c):
7597 # powernorm.pdf(x, c) = c * phi(x) * (Phi(-x))**(c-1)
7598 return c*_norm_pdf(x) * (_norm_cdf(-x)**(c-1.0))
7600 def _logpdf(self, x, c):
7601 return np.log(c) + _norm_logpdf(x) + (c-1)*_norm_logcdf(-x)
7603 def _cdf(self, x, c):
7604 return 1.0-_norm_cdf(-x)**(c*1.0)
7606 def _ppf(self, q, c):
7607 return -_norm_ppf(pow(1.0 - q, 1.0 / c))
7610powernorm = powernorm_gen(name='powernorm')
7613class rdist_gen(rv_continuous):
7614 r"""An R-distributed (symmetric beta) continuous random variable.
7616 %(before_notes)s
7618 Notes
7619 -----
7620 The probability density function for `rdist` is:
7622 .. math::
7624 f(x, c) = \frac{(1-x^2)^{c/2-1}}{B(1/2, c/2)}
7626 for :math:`-1 \le x \le 1`, :math:`c > 0`. `rdist` is also called the
7627 symmetric beta distribution: if B has a `beta` distribution with
7628 parameters (c/2, c/2), then X = 2*B - 1 follows a R-distribution with
7629 parameter c.
7631 `rdist` takes ``c`` as a shape parameter for :math:`c`.
7633 This distribution includes the following distribution kernels as
7634 special cases::
7636 c = 2: uniform
7637 c = 3: `semicircular`
7638 c = 4: Epanechnikov (parabolic)
7639 c = 6: quartic (biweight)
7640 c = 8: triweight
7642 %(after_notes)s
7644 %(example)s
7646 """
7647 def _shape_info(self):
7648 return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
7650 # use relation to the beta distribution for pdf, cdf, etc
7651 def _pdf(self, x, c):
7652 return np.exp(self._logpdf(x, c))
7654 def _logpdf(self, x, c):
7655 return -np.log(2) + beta._logpdf((x + 1)/2, c/2, c/2)
7657 def _cdf(self, x, c):
7658 return beta._cdf((x + 1)/2, c/2, c/2)
7660 def _ppf(self, q, c):
7661 return 2*beta._ppf(q, c/2, c/2) - 1
7663 def _rvs(self, c, size=None, random_state=None):
7664 return 2 * random_state.beta(c/2, c/2, size) - 1
7666 def _munp(self, n, c):
7667 numerator = (1 - (n % 2)) * sc.beta((n + 1.0) / 2, c / 2.0)
7668 return numerator / sc.beta(1. / 2, c / 2.)
7671rdist = rdist_gen(a=-1.0, b=1.0, name="rdist")
7674class rayleigh_gen(rv_continuous):
7675 r"""A Rayleigh continuous random variable.
7677 %(before_notes)s
7679 Notes
7680 -----
7681 The probability density function for `rayleigh` is:
7683 .. math::
7685 f(x) = x \exp(-x^2/2)
7687 for :math:`x \ge 0`.
7689 `rayleigh` is a special case of `chi` with ``df=2``.
7691 %(after_notes)s
7693 %(example)s
7695 """
7696 _support_mask = rv_continuous._open_support_mask
7698 def _shape_info(self):
7699 return []
7701 def _rvs(self, size=None, random_state=None):
7702 return chi.rvs(2, size=size, random_state=random_state)
7704 def _pdf(self, r):
7705 # rayleigh.pdf(r) = r * exp(-r**2/2)
7706 return np.exp(self._logpdf(r))
7708 def _logpdf(self, r):
7709 return np.log(r) - 0.5 * r * r
7711 def _cdf(self, r):
7712 return -sc.expm1(-0.5 * r**2)
7714 def _ppf(self, q):
7715 return np.sqrt(-2 * sc.log1p(-q))
7717 def _sf(self, r):
7718 return np.exp(self._logsf(r))
7720 def _logsf(self, r):
7721 return -0.5 * r * r
7723 def _isf(self, q):
7724 return np.sqrt(-2 * np.log(q))
7726 def _stats(self):
7727 val = 4 - np.pi
7728 return (np.sqrt(np.pi/2),
7729 val/2,
7730 2*(np.pi-3)*np.sqrt(np.pi)/val**1.5,
7731 6*np.pi/val-16/val**2)
7733 def _entropy(self):
7734 return _EULER/2.0 + 1 - 0.5*np.log(2)
7736 @_call_super_mom
7737 @extend_notes_in_docstring(rv_continuous, notes="""\
7738 Notes specifically for ``rayleigh.fit``: If the location is fixed with
7739 the `floc` parameter, this method uses an analytical formula to find
7740 the scale. Otherwise, this function uses a numerical root finder on
7741 the first order conditions of the log-likelihood function to find the
7742 MLE. Only the (optional) `loc` parameter is used as the initial guess
7743 for the root finder; the `scale` parameter and any other parameters
7744 for the optimizer are ignored.\n\n""")
7745 def fit(self, data, *args, **kwds):
7746 if kwds.pop('superfit', False):
7747 return super().fit(data, *args, **kwds)
7748 data, floc, fscale = _check_fit_input_parameters(self, data,
7749 args, kwds)
7751 def scale_mle(loc):
7752 # Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
7753 # and Peacock (2000), Page 175
7754 return (np.sum((data - loc) ** 2) / (2 * len(data))) ** .5
7756 def loc_mle(loc):
7757 # This implicit equation for `loc` is used when
7758 # both `loc` and `scale` are free.
7759 xm = data - loc
7760 s1 = xm.sum()
7761 s2 = (xm**2).sum()
7762 s3 = (1/xm).sum()
7763 return s1 - s2/(2*len(data))*s3
7765 def loc_mle_scale_fixed(loc, scale=fscale):
7766 # This implicit equation for `loc` is used when
7767 # `scale` is fixed but `loc` is not.
7768 xm = data - loc
7769 return xm.sum() - scale**2 * (1/xm).sum()
7771 if floc is not None:
7772 # `loc` is fixed, analytically determine `scale`.
7773 if np.any(data - floc <= 0):
7774 raise FitDataError("rayleigh", lower=1, upper=np.inf)
7775 else:
7776 return floc, scale_mle(floc)
7778 # Account for user provided guess of `loc`.
7779 loc0 = kwds.get('loc')
7780 if loc0 is None:
7781 # Use _fitstart to estimate loc; ignore the returned scale.
7782 loc0 = self._fitstart(data)[0]
7784 fun = loc_mle if fscale is None else loc_mle_scale_fixed
7785 rbrack = np.nextafter(np.min(data), -np.inf)
7786 lbrack = _get_left_bracket(fun, rbrack)
7787 res = optimize.root_scalar(fun, bracket=(lbrack, rbrack))
7788 if not res.converged:
7789 raise FitSolverError(res.flag)
7790 loc = res.root
7791 scale = fscale or scale_mle(loc)
7792 return loc, scale
7795rayleigh = rayleigh_gen(a=0.0, name="rayleigh")
7798class reciprocal_gen(rv_continuous):
7799 r"""A loguniform or reciprocal continuous random variable.
7801 %(before_notes)s
7803 Notes
7804 -----
7805 The probability density function for this class is:
7807 .. math::
7809 f(x, a, b) = \frac{1}{x \log(b/a)}
7811 for :math:`a \le x \le b`, :math:`b > a > 0`. This class takes
7812 :math:`a` and :math:`b` as shape parameters.
7814 %(after_notes)s
7816 %(example)s
7818 This doesn't show the equal probability of ``0.01``, ``0.1`` and
7819 ``1``. This is best when the x-axis is log-scaled:
7821 >>> import numpy as np
7822 >>> fig, ax = plt.subplots(1, 1)
7823 >>> ax.hist(np.log10(r))
7824 >>> ax.set_ylabel("Frequency")
7825 >>> ax.set_xlabel("Value of random variable")
7826 >>> ax.xaxis.set_major_locator(plt.FixedLocator([-2, -1, 0]))
7827 >>> ticks = ["$10^{{ {} }}$".format(i) for i in [-2, -1, 0]]
7828 >>> ax.set_xticklabels(ticks) # doctest: +SKIP
7829 >>> plt.show()
7831 This random variable will be log-uniform regardless of the base chosen for
7832 ``a`` and ``b``. Let's specify with base ``2`` instead:
7834 >>> rvs = %(name)s(2**-2, 2**0).rvs(size=1000)
7836 Values of ``1/4``, ``1/2`` and ``1`` are equally likely with this random
7837 variable. Here's the histogram:
7839 >>> fig, ax = plt.subplots(1, 1)
7840 >>> ax.hist(np.log2(rvs))
7841 >>> ax.set_ylabel("Frequency")
7842 >>> ax.set_xlabel("Value of random variable")
7843 >>> ax.xaxis.set_major_locator(plt.FixedLocator([-2, -1, 0]))
7844 >>> ticks = ["$2^{{ {} }}$".format(i) for i in [-2, -1, 0]]
7845 >>> ax.set_xticklabels(ticks) # doctest: +SKIP
7846 >>> plt.show()
7848 """
7849 def _argcheck(self, a, b):
7850 return (a > 0) & (b > a)
7852 def _shape_info(self):
7853 ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
7854 ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
7855 return [ia, ib]
7857 def _fitstart(self, data):
7858 # Reasonable, since support is [a, b]
7859 return super()._fitstart(data, args=(np.min(data), np.max(data)))
7861 def _get_support(self, a, b):
7862 return a, b
7864 def _pdf(self, x, a, b):
7865 # reciprocal.pdf(x, a, b) = 1 / (x*log(b/a))
7866 return 1.0 / (x * np.log(b * 1.0 / a))
7868 def _logpdf(self, x, a, b):
7869 return -np.log(x) - np.log(np.log(b * 1.0 / a))
7871 def _cdf(self, x, a, b):
7872 return (np.log(x)-np.log(a)) / np.log(b * 1.0 / a)
7874 def _ppf(self, q, a, b):
7875 return a*pow(b*1.0/a, q)
7877 def _munp(self, n, a, b):
7878 return 1.0/np.log(b*1.0/a) / n * (pow(b*1.0, n) - pow(a*1.0, n))
7880 def _entropy(self, a, b):
7881 return 0.5*np.log(a*b)+np.log(np.log(b*1.0/a))
7883 fit_note = """\
7884 `loguniform`/`reciprocal` is over-parameterized. `fit` automatically
7885 fixes `scale` to 1 unless `fscale` is provided by the user.\n\n"""
7887 @extend_notes_in_docstring(rv_continuous, notes=fit_note)
7888 def fit(self, data, *args, **kwds):
7889 fscale = kwds.pop('fscale', 1)
7890 return super().fit(data, *args, fscale=fscale, **kwds)
7893loguniform = reciprocal_gen(name="loguniform")
7894reciprocal = reciprocal_gen(name="reciprocal")
7897class rice_gen(rv_continuous):
7898 r"""A Rice continuous random variable.
7900 %(before_notes)s
7902 Notes
7903 -----
7904 The probability density function for `rice` is:
7906 .. math::
7908 f(x, b) = x \exp(- \frac{x^2 + b^2}{2}) I_0(x b)
7910 for :math:`x >= 0`, :math:`b > 0`. :math:`I_0` is the modified Bessel
7911 function of order zero (`scipy.special.i0`).
7913 `rice` takes ``b`` as a shape parameter for :math:`b`.
7915 %(after_notes)s
7917 The Rice distribution describes the length, :math:`r`, of a 2-D vector with
7918 components :math:`(U+u, V+v)`, where :math:`U, V` are constant, :math:`u,
7919 v` are independent Gaussian random variables with standard deviation
7920 :math:`s`. Let :math:`R = \sqrt{U^2 + V^2}`. Then the pdf of :math:`r` is
7921 ``rice.pdf(x, R/s, scale=s)``.
7923 %(example)s
7925 """
7926 def _argcheck(self, b):
7927 return b >= 0
7929 def _shape_info(self):
7930 return [_ShapeInfo("b", False, (0, np.inf), (True, False))]
7932 def _rvs(self, b, size=None, random_state=None):
7933 # https://en.wikipedia.org/wiki/Rice_distribution
7934 t = b/np.sqrt(2) + random_state.standard_normal(size=(2,) + size)
7935 return np.sqrt((t*t).sum(axis=0))
7937 def _cdf(self, x, b):
7938 return sc.chndtr(np.square(x), 2, np.square(b))
7940 def _ppf(self, q, b):
7941 return np.sqrt(sc.chndtrix(q, 2, np.square(b)))
7943 def _pdf(self, x, b):
7944 # rice.pdf(x, b) = x * exp(-(x**2+b**2)/2) * I[0](x*b)
7945 #
7946 # We use (x**2 + b**2)/2 = ((x-b)**2)/2 + xb.
7947 # The factor of np.exp(-xb) is then included in the i0e function
7948 # in place of the modified Bessel function, i0, improving
7949 # numerical stability for large values of xb.
7950 return x * np.exp(-(x-b)*(x-b)/2.0) * sc.i0e(x*b)
7952 def _munp(self, n, b):
7953 nd2 = n/2.0
7954 n1 = 1 + nd2
7955 b2 = b*b/2.0
7956 return (2.0**(nd2) * np.exp(-b2) * sc.gamma(n1) *
7957 sc.hyp1f1(n1, 1, b2))
7960rice = rice_gen(a=0.0, name="rice")
7963class recipinvgauss_gen(rv_continuous):
7964 r"""A reciprocal inverse Gaussian continuous random variable.
7966 %(before_notes)s
7968 Notes
7969 -----
7970 The probability density function for `recipinvgauss` is:
7972 .. math::
7974 f(x, \mu) = \frac{1}{\sqrt{2\pi x}}
7975 \exp\left(\frac{-(1-\mu x)^2}{2\mu^2x}\right)
7977 for :math:`x \ge 0`.
7979 `recipinvgauss` takes ``mu`` as a shape parameter for :math:`\mu`.
7981 %(after_notes)s
7983 %(example)s
7985 """
7986 def _shape_info(self):
7987 return [_ShapeInfo("mu", False, (0, np.inf), (False, False))]
7989 def _pdf(self, x, mu):
7990 # recipinvgauss.pdf(x, mu) =
7991 # 1/sqrt(2*pi*x) * exp(-(1-mu*x)**2/(2*x*mu**2))
7992 return np.exp(self._logpdf(x, mu))
7994 def _logpdf(self, x, mu):
7995 return _lazywhere(x > 0, (x, mu),
7996 lambda x, mu: (-(1 - mu*x)**2.0 / (2*x*mu**2.0)
7997 - 0.5*np.log(2*np.pi*x)),
7998 fillvalue=-np.inf)
8000 def _cdf(self, x, mu):
8001 trm1 = 1.0/mu - x
8002 trm2 = 1.0/mu + x
8003 isqx = 1.0/np.sqrt(x)
8004 return _norm_cdf(-isqx*trm1) - np.exp(2.0/mu)*_norm_cdf(-isqx*trm2)
8006 def _sf(self, x, mu):
8007 trm1 = 1.0/mu - x
8008 trm2 = 1.0/mu + x
8009 isqx = 1.0/np.sqrt(x)
8010 return _norm_cdf(isqx*trm1) + np.exp(2.0/mu)*_norm_cdf(-isqx*trm2)
8012 def _rvs(self, mu, size=None, random_state=None):
8013 return 1.0/random_state.wald(mu, 1.0, size=size)
8016recipinvgauss = recipinvgauss_gen(a=0.0, name='recipinvgauss')
8019class semicircular_gen(rv_continuous):
8020 r"""A semicircular continuous random variable.
8022 %(before_notes)s
8024 See Also
8025 --------
8026 rdist
8028 Notes
8029 -----
8030 The probability density function for `semicircular` is:
8032 .. math::
8034 f(x) = \frac{2}{\pi} \sqrt{1-x^2}
8036 for :math:`-1 \le x \le 1`.
8038 The distribution is a special case of `rdist` with `c = 3`.
8040 %(after_notes)s
8042 References
8043 ----------
8044 .. [1] "Wigner semicircle distribution",
8045 https://en.wikipedia.org/wiki/Wigner_semicircle_distribution
8047 %(example)s
8049 """
8050 def _shape_info(self):
8051 return []
8053 def _pdf(self, x):
8054 return 2.0/np.pi*np.sqrt(1-x*x)
8056 def _logpdf(self, x):
8057 return np.log(2/np.pi) + 0.5*sc.log1p(-x*x)
8059 def _cdf(self, x):
8060 return 0.5+1.0/np.pi*(x*np.sqrt(1-x*x) + np.arcsin(x))
8062 def _ppf(self, q):
8063 return rdist._ppf(q, 3)
8065 def _rvs(self, size=None, random_state=None):
8066 # generate values uniformly distributed on the area under the pdf
8067 # (semi-circle) by randomly generating the radius and angle
8068 r = np.sqrt(random_state.uniform(size=size))
8069 a = np.cos(np.pi * random_state.uniform(size=size))
8070 return r * a
8072 def _stats(self):
8073 return 0, 0.25, 0, -1.0
8075 def _entropy(self):
8076 return 0.64472988584940017414
8079semicircular = semicircular_gen(a=-1.0, b=1.0, name="semicircular")
8082class skewcauchy_gen(rv_continuous):
8083 r"""A skewed Cauchy random variable.
8085 %(before_notes)s
8087 See Also
8088 --------
8089 cauchy : Cauchy distribution
8091 Notes
8092 -----
8094 The probability density function for `skewcauchy` is:
8096 .. math::
8098 f(x) = \frac{1}{\pi \left(\frac{x^2}{\left(a\, \text{sign}(x) + 1
8099 \right)^2} + 1 \right)}
8101 for a real number :math:`x` and skewness parameter :math:`-1 < a < 1`.
8103 When :math:`a=0`, the distribution reduces to the usual Cauchy
8104 distribution.
8106 %(after_notes)s
8108 References
8109 ----------
8110 .. [1] "Skewed generalized *t* distribution", Wikipedia
8111 https://en.wikipedia.org/wiki/Skewed_generalized_t_distribution#Skewed_Cauchy_distribution
8113 %(example)s
8115 """
8116 def _argcheck(self, a):
8117 return np.abs(a) < 1
8119 def _shape_info(self):
8120 return [_ShapeInfo("a", False, (-1.0, 1.0), (False, False))]
8122 def _pdf(self, x, a):
8123 return 1 / (np.pi * (x**2 / (a * np.sign(x) + 1)**2 + 1))
8125 def _cdf(self, x, a):
8126 return np.where(x <= 0,
8127 (1 - a) / 2 + (1 - a) / np.pi * np.arctan(x / (1 - a)),
8128 (1 - a) / 2 + (1 + a) / np.pi * np.arctan(x / (1 + a)))
8130 def _ppf(self, x, a):
8131 i = x < self._cdf(0, a)
8132 return np.where(i,
8133 np.tan(np.pi / (1 - a) * (x - (1 - a) / 2)) * (1 - a),
8134 np.tan(np.pi / (1 + a) * (x - (1 - a) / 2)) * (1 + a))
8136 def _stats(self, a, moments='mvsk'):
8137 return np.nan, np.nan, np.nan, np.nan
8139 def _fitstart(self, data):
8140 # Use 0 as the initial guess of the skewness shape parameter.
8141 # For the location and scale, estimate using the median and
8142 # quartiles.
8143 p25, p50, p75 = np.percentile(data, [25, 50, 75])
8144 return 0.0, p50, (p75 - p25)/2
8147skewcauchy = skewcauchy_gen(name='skewcauchy')
8150class skew_norm_gen(rv_continuous):
8151 r"""A skew-normal random variable.
8153 %(before_notes)s
8155 Notes
8156 -----
8157 The pdf is::
8159 skewnorm.pdf(x, a) = 2 * norm.pdf(x) * norm.cdf(a*x)
8161 `skewnorm` takes a real number :math:`a` as a skewness parameter
8162 When ``a = 0`` the distribution is identical to a normal distribution
8163 (`norm`). `rvs` implements the method of [1]_.
8165 %(after_notes)s
8167 %(example)s
8169 References
8170 ----------
8171 .. [1] A. Azzalini and A. Capitanio (1999). Statistical applications of
8172 the multivariate skew-normal distribution. J. Roy. Statist. Soc.,
8173 B 61, 579-602. :arxiv:`0911.2093`
8175 """
8176 def _argcheck(self, a):
8177 return np.isfinite(a)
8179 def _shape_info(self):
8180 return [_ShapeInfo("a", False, (-np.inf, np.inf), (False, False))]
8182 def _pdf(self, x, a):
8183 return _lazywhere(
8184 a == 0, (x, a), lambda x, a: _norm_pdf(x),
8185 f2=lambda x, a: 2.*_norm_pdf(x)*_norm_cdf(a*x)
8186 )
8188 def _cdf(self, x, a):
8189 cdf = _boost._skewnorm_cdf(x, 0, 1, a)
8190 # for some reason, a isn't broadcasted if some of x are invalid
8191 a = np.broadcast_to(a, cdf.shape)
8192 # Boost is not accurate in left tail when a > 0
8193 i_small_cdf = (cdf < 1e-6) & (a > 0)
8194 cdf[i_small_cdf] = super()._cdf(x[i_small_cdf], a[i_small_cdf])
8195 return np.clip(cdf, 0, 1)
8197 def _ppf(self, x, a):
8198 return _boost._skewnorm_ppf(x, 0, 1, a)
8200 def _sf(self, x, a):
8201 # Boost's SF is implemented this way. Use whatever customizations
8202 # we made in the _cdf.
8203 return self._cdf(-x, -a)
8205 def _isf(self, x, a):
8206 return _boost._skewnorm_isf(x, 0, 1, a)
8208 def _rvs(self, a, size=None, random_state=None):
8209 u0 = random_state.normal(size=size)
8210 v = random_state.normal(size=size)
8211 d = a/np.sqrt(1 + a**2)
8212 u1 = d*u0 + v*np.sqrt(1 - d**2)
8213 return np.where(u0 >= 0, u1, -u1)
8215 def _stats(self, a, moments='mvsk'):
8216 output = [None, None, None, None]
8217 const = np.sqrt(2/np.pi) * a/np.sqrt(1 + a**2)
8219 if 'm' in moments:
8220 output[0] = const
8221 if 'v' in moments:
8222 output[1] = 1 - const**2
8223 if 's' in moments:
8224 output[2] = ((4 - np.pi)/2) * (const/np.sqrt(1 - const**2))**3
8225 if 'k' in moments:
8226 output[3] = (2*(np.pi - 3)) * (const**4/(1 - const**2)**2)
8228 return output
8230 # For odd order, the each noncentral moment of the skew-normal distribution
8231 # with location 0 and scale 1 can be expressed as a polynomial in delta,
8232 # where delta = a/sqrt(1 + a**2) and `a` is the skew-normal shape
8233 # parameter. The dictionary _skewnorm_odd_moments defines those
8234 # polynomials for orders up to 19. The dict is implemented as a cached
8235 # property to reduce the impact of the creation of the dict on import time.
8236 @cached_property
8237 def _skewnorm_odd_moments(self):
8238 skewnorm_odd_moments = {
8239 1: Polynomial([1]),
8240 3: Polynomial([3, -1]),
8241 5: Polynomial([15, -10, 3]),
8242 7: Polynomial([105, -105, 63, -15]),
8243 9: Polynomial([945, -1260, 1134, -540, 105]),
8244 11: Polynomial([10395, -17325, 20790, -14850, 5775, -945]),
8245 13: Polynomial([135135, -270270, 405405, -386100, 225225, -73710,
8246 10395]),
8247 15: Polynomial([2027025, -4729725, 8513505, -10135125, 7882875,
8248 -3869775, 1091475, -135135]),
8249 17: Polynomial([34459425, -91891800, 192972780, -275675400,
8250 268017750, -175429800, 74220300, -18378360,
8251 2027025]),
8252 19: Polynomial([654729075, -1964187225, 4714049340, -7856748900,
8253 9166207050, -7499623950, 4230557100, -1571349780,
8254 346621275, -34459425]),
8255 }
8256 return skewnorm_odd_moments
8258 def _munp(self, order, a):
8259 if order & 1:
8260 if order > 19:
8261 raise NotImplementedError("skewnorm noncentral moments not "
8262 "implemented for odd orders greater "
8263 "than 19.")
8264 # Use the precomputed polynomials that were derived from the
8265 # moment generating function.
8266 delta = a/np.sqrt(1 + a**2)
8267 return (delta * self._skewnorm_odd_moments[order](delta**2)
8268 * _SQRT_2_OVER_PI)
8269 else:
8270 # For even order, the moment is just (order-1)!!, where !! is the
8271 # notation for the double factorial; for an odd integer m, m!! is
8272 # m*(m-2)*...*3*1.
8273 # We could use special.factorial2, but we know the argument is odd,
8274 # so avoid the overhead of that function and compute the result
8275 # directly here.
8276 return sc.gamma((order + 1)/2) * 2**(order/2) / _SQRT_PI
8278 @extend_notes_in_docstring(rv_continuous, notes="""\
8279 If ``method='mm'``, parameters fixed by the user are respected, and the
8280 remaining parameters are used to match distribution and sample moments
8281 where possible. For example, if the user fixes the location with
8282 ``floc``, the parameters will only match the distribution skewness and
8283 variance to the sample skewness and variance; no attempt will be made
8284 to match the means or minimize a norm of the errors.
8285 Note that the maximum possible skewness magnitude of a
8286 `scipy.stats.skewnorm` distribution is approximately 0.9952717; if the
8287 magnitude of the data's sample skewness exceeds this, the returned
8288 shape parameter ``a`` will be infinite.
8289 \n\n""")
8290 def fit(self, data, *args, **kwds):
8291 # this extracts fixed shape, location, and scale however they
8292 # are specified, and also leaves them in `kwds`
8293 data, fa, floc, fscale = _check_fit_input_parameters(self, data,
8294 args, kwds)
8295 method = kwds.get("method", "mle").lower()
8297 # See https://en.wikipedia.org/wiki/Skew_normal_distribution for
8298 # moment formulas.
8299 def skew_d(d): # skewness in terms of delta
8300 return (4-np.pi)/2 * ((d * np.sqrt(2 / np.pi))**3
8301 / (1 - 2*d**2 / np.pi)**(3/2))
8303 # If skewness of data is greater than max possible population skewness,
8304 # MoM won't provide a good guess. Get out early.
8305 s = stats.skew(data)
8306 s_max = skew_d(1)
8307 if abs(s) >= s_max and method != "mm" and fa is None and not args:
8308 return super().fit(data, *args, **kwds)
8310 # If method is method of moments, we don't need the user's guesses.
8311 # Otherwise, extract the guesses from args and kwds.
8312 if method == "mm":
8313 a, loc, scale = None, None, None
8314 else:
8315 a = args[0] if len(args) else None
8316 loc = kwds.pop('loc', None)
8317 scale = kwds.pop('scale', None)
8319 if fa is None and a is None: # not fixed and no guess: use MoM
8320 # Solve for a that matches sample distribution skewness to sample
8321 # skewness.
8322 s = np.clip(s, -s_max, s_max)
8323 d = root_scalar(lambda d: skew_d(d) - s, bracket=[-1, 1]).root
8324 with np.errstate(divide='ignore'):
8325 a = np.sqrt(np.divide(d**2, (1-d**2)))*np.sign(s)
8326 else:
8327 a = fa if fa is not None else a
8328 d = a / np.sqrt(1 + a**2)
8330 if fscale is None and scale is None:
8331 v = np.var(data)
8332 scale = np.sqrt(v / (1 - 2*d**2/np.pi))
8333 elif fscale is not None:
8334 scale = fscale
8336 if floc is None and loc is None:
8337 m = np.mean(data)
8338 loc = m - scale*d*np.sqrt(2/np.pi)
8339 elif floc is not None:
8340 loc = floc
8342 if method == 'mm':
8343 return a, loc, scale
8344 else:
8345 # At this point, parameter "guesses" may equal the fixed parameters
8346 # in kwds. No harm in passing them as guesses, too.
8347 return super().fit(data, a, loc=loc, scale=scale, **kwds)
8350skewnorm = skew_norm_gen(name='skewnorm')
8353class trapezoid_gen(rv_continuous):
8354 r"""A trapezoidal continuous random variable.
8356 %(before_notes)s
8358 Notes
8359 -----
8360 The trapezoidal distribution can be represented with an up-sloping line
8361 from ``loc`` to ``(loc + c*scale)``, then constant to ``(loc + d*scale)``
8362 and then downsloping from ``(loc + d*scale)`` to ``(loc+scale)``. This
8363 defines the trapezoid base from ``loc`` to ``(loc+scale)`` and the flat
8364 top from ``c`` to ``d`` proportional to the position along the base
8365 with ``0 <= c <= d <= 1``. When ``c=d``, this is equivalent to `triang`
8366 with the same values for `loc`, `scale` and `c`.
8367 The method of [1]_ is used for computing moments.
8369 `trapezoid` takes :math:`c` and :math:`d` as shape parameters.
8371 %(after_notes)s
8373 The standard form is in the range [0, 1] with c the mode.
8374 The location parameter shifts the start to `loc`.
8375 The scale parameter changes the width from 1 to `scale`.
8377 %(example)s
8379 References
8380 ----------
8381 .. [1] Kacker, R.N. and Lawrence, J.F. (2007). Trapezoidal and triangular
8382 distributions for Type B evaluation of standard uncertainty.
8383 Metrologia 44, 117-127. :doi:`10.1088/0026-1394/44/2/003`
8386 """
8387 def _argcheck(self, c, d):
8388 return (c >= 0) & (c <= 1) & (d >= 0) & (d <= 1) & (d >= c)
8390 def _shape_info(self):
8391 ic = _ShapeInfo("c", False, (0, 1.0), (True, True))
8392 id = _ShapeInfo("d", False, (0, 1.0), (True, True))
8393 return [ic, id]
8395 def _pdf(self, x, c, d):
8396 u = 2 / (d-c+1)
8398 return _lazyselect([x < c,
8399 (c <= x) & (x <= d),
8400 x > d],
8401 [lambda x, c, d, u: u * x / c,
8402 lambda x, c, d, u: u,
8403 lambda x, c, d, u: u * (1-x) / (1-d)],
8404 (x, c, d, u))
8406 def _cdf(self, x, c, d):
8407 return _lazyselect([x < c,
8408 (c <= x) & (x <= d),
8409 x > d],
8410 [lambda x, c, d: x**2 / c / (d-c+1),
8411 lambda x, c, d: (c + 2 * (x-c)) / (d-c+1),
8412 lambda x, c, d: 1-((1-x) ** 2
8413 / (d-c+1) / (1-d))],
8414 (x, c, d))
8416 def _ppf(self, q, c, d):
8417 qc, qd = self._cdf(c, c, d), self._cdf(d, c, d)
8418 condlist = [q < qc, q <= qd, q > qd]
8419 choicelist = [np.sqrt(q * c * (1 + d - c)),
8420 0.5 * q * (1 + d - c) + 0.5 * c,
8421 1 - np.sqrt((1 - q) * (d - c + 1) * (1 - d))]
8422 return np.select(condlist, choicelist)
8424 def _munp(self, n, c, d):
8425 # Using the parameterization from Kacker, 2007, with
8426 # a=bottom left, c=top left, d=top right, b=bottom right, then
8427 # E[X^n] = h/(n+1)/(n+2) [(b^{n+2}-d^{n+2})/(b-d)
8428 # - ((c^{n+2} - a^{n+2})/(c-a)]
8429 # with h = 2/((b-a) - (d-c)). The corresponding parameterization
8430 # in scipy, has a'=loc, c'=loc+c*scale, d'=loc+d*scale, b'=loc+scale,
8431 # which for standard form reduces to a'=0, b'=1, c'=c, d'=d.
8432 # Substituting into E[X^n] gives the bd' term as (1 - d^{n+2})/(1 - d)
8433 # and the ac' term as c^{n-1} for the standard form. The bd' term has
8434 # numerical difficulties near d=1, so replace (1 - d^{n+2})/(1-d)
8435 # with expm1((n+2)*log(d))/(d-1).
8436 # Testing with n=18 for c=(1e-30,1-eps) shows that this is stable.
8437 # We still require an explicit test for d=1 to prevent divide by zero,
8438 # and now a test for d=0 to prevent log(0).
8439 ab_term = c**(n+1)
8440 dc_term = _lazyselect(
8441 [d == 0.0, (0.0 < d) & (d < 1.0), d == 1.0],
8442 [lambda d: 1.0,
8443 lambda d: np.expm1((n+2) * np.log(d)) / (d-1.0),
8444 lambda d: n+2],
8445 [d])
8446 val = 2.0 / (1.0+d-c) * (dc_term - ab_term) / ((n+1) * (n+2))
8447 return val
8449 def _entropy(self, c, d):
8450 # Using the parameterization from Wikipedia (van Dorp, 2003)
8451 # with a=bottom left, c=top left, d=top right, b=bottom right
8452 # gives a'=loc, b'=loc+c*scale, c'=loc+d*scale, d'=loc+scale,
8453 # which for loc=0, scale=1 is a'=0, b'=c, c'=d, d'=1.
8454 # Substituting into the entropy formula from Wikipedia gives
8455 # the following result.
8456 return 0.5 * (1.0-d+c) / (1.0+d-c) + np.log(0.5 * (1.0+d-c))
8459trapezoid = trapezoid_gen(a=0.0, b=1.0, name="trapezoid")
8460# Note: alias kept for backwards compatibility. Rename was done
8461# because trapz is a slur in colloquial English (see gh-12924).
8462trapz = trapezoid_gen(a=0.0, b=1.0, name="trapz")
8463if trapz.__doc__:
8464 trapz.__doc__ = "trapz is an alias for `trapezoid`"
8467class triang_gen(rv_continuous):
8468 r"""A triangular continuous random variable.
8470 %(before_notes)s
8472 Notes
8473 -----
8474 The triangular distribution can be represented with an up-sloping line from
8475 ``loc`` to ``(loc + c*scale)`` and then downsloping for ``(loc + c*scale)``
8476 to ``(loc + scale)``.
8478 `triang` takes ``c`` as a shape parameter for :math:`0 \le c \le 1`.
8480 %(after_notes)s
8482 The standard form is in the range [0, 1] with c the mode.
8483 The location parameter shifts the start to `loc`.
8484 The scale parameter changes the width from 1 to `scale`.
8486 %(example)s
8488 """
8489 def _rvs(self, c, size=None, random_state=None):
8490 return random_state.triangular(0, c, 1, size)
8492 def _argcheck(self, c):
8493 return (c >= 0) & (c <= 1)
8495 def _shape_info(self):
8496 return [_ShapeInfo("c", False, (0, 1.0), (True, True))]
8498 def _pdf(self, x, c):
8499 # 0: edge case where c=0
8500 # 1: generalised case for x < c, don't use x <= c, as it doesn't cope
8501 # with c = 0.
8502 # 2: generalised case for x >= c, but doesn't cope with c = 1
8503 # 3: edge case where c=1
8504 r = _lazyselect([c == 0,
8505 x < c,
8506 (x >= c) & (c != 1),
8507 c == 1],
8508 [lambda x, c: 2 - 2 * x,
8509 lambda x, c: 2 * x / c,
8510 lambda x, c: 2 * (1 - x) / (1 - c),
8511 lambda x, c: 2 * x],
8512 (x, c))
8513 return r
8515 def _cdf(self, x, c):
8516 r = _lazyselect([c == 0,
8517 x < c,
8518 (x >= c) & (c != 1),
8519 c == 1],
8520 [lambda x, c: 2*x - x*x,
8521 lambda x, c: x * x / c,
8522 lambda x, c: (x*x - 2*x + c) / (c-1),
8523 lambda x, c: x * x],
8524 (x, c))
8525 return r
8527 def _ppf(self, q, c):
8528 return np.where(q < c, np.sqrt(c * q), 1-np.sqrt((1-c) * (1-q)))
8530 def _stats(self, c):
8531 return ((c+1.0)/3.0,
8532 (1.0-c+c*c)/18,
8533 np.sqrt(2)*(2*c-1)*(c+1)*(c-2) / (5*np.power((1.0-c+c*c), 1.5)),
8534 -3.0/5.0)
8536 def _entropy(self, c):
8537 return 0.5-np.log(2)
8540triang = triang_gen(a=0.0, b=1.0, name="triang")
8543class truncexpon_gen(rv_continuous):
8544 r"""A truncated exponential continuous random variable.
8546 %(before_notes)s
8548 Notes
8549 -----
8550 The probability density function for `truncexpon` is:
8552 .. math::
8554 f(x, b) = \frac{\exp(-x)}{1 - \exp(-b)}
8556 for :math:`0 <= x <= b`.
8558 `truncexpon` takes ``b`` as a shape parameter for :math:`b`.
8560 %(after_notes)s
8562 %(example)s
8564 """
8565 def _shape_info(self):
8566 return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
8568 def _get_support(self, b):
8569 return self.a, b
8571 def _pdf(self, x, b):
8572 # truncexpon.pdf(x, b) = exp(-x) / (1-exp(-b))
8573 return np.exp(-x)/(-sc.expm1(-b))
8575 def _logpdf(self, x, b):
8576 return -x - np.log(-sc.expm1(-b))
8578 def _cdf(self, x, b):
8579 return sc.expm1(-x)/sc.expm1(-b)
8581 def _ppf(self, q, b):
8582 return -sc.log1p(q*sc.expm1(-b))
8584 def _munp(self, n, b):
8585 # wrong answer with formula, same as in continuous.pdf
8586 # return sc.gamman+1)-sc.gammainc1+n, b)
8587 if n == 1:
8588 return (1-(b+1)*np.exp(-b))/(-sc.expm1(-b))
8589 elif n == 2:
8590 return 2*(1-0.5*(b*b+2*b+2)*np.exp(-b))/(-sc.expm1(-b))
8591 else:
8592 # return generic for higher moments
8593 # return rv_continuous._mom1_sc(self, n, b)
8594 return self._mom1_sc(n, b)
8596 def _entropy(self, b):
8597 eB = np.exp(b)
8598 return np.log(eB-1)+(1+eB*(b-1.0))/(1.0-eB)
8601truncexpon = truncexpon_gen(a=0.0, name='truncexpon')
8604# logsumexp trick for log(p + q) with only log(p) and log(q)
8605def _log_sum(log_p, log_q):
8606 return sc.logsumexp([log_p, log_q], axis=0)
8609# same as above, but using -exp(x) = exp(x + πi)
8610def _log_diff(log_p, log_q):
8611 return sc.logsumexp([log_p, log_q+np.pi*1j], axis=0)
8614def _log_gauss_mass(a, b):
8615 """Log of Gaussian probability mass within an interval"""
8616 a, b = np.atleast_1d(a), np.atleast_1d(b)
8617 a, b = np.broadcast_arrays(a, b)
8619 # Calculations in right tail are inaccurate, so we'll exploit the
8620 # symmetry and work only in the left tail
8621 case_left = b <= 0
8622 case_right = a > 0
8623 case_central = ~(case_left | case_right)
8625 def mass_case_left(a, b):
8626 return _log_diff(sc.log_ndtr(b), sc.log_ndtr(a))
8628 def mass_case_right(a, b):
8629 return mass_case_left(-b, -a)
8631 def mass_case_central(a, b):
8632 # Previously, this was implemented as:
8633 # left_mass = mass_case_left(a, 0)
8634 # right_mass = mass_case_right(0, b)
8635 # return _log_sum(left_mass, right_mass)
8636 # Catastrophic cancellation occurs as np.exp(log_mass) approaches 1.
8637 # Correct for this with an alternative formulation.
8638 # We're not concerned with underflow here: if only one term
8639 # underflows, it was insignificant; if both terms underflow,
8640 # the result can't accurately be represented in logspace anyway
8641 # because sc.log1p(x) ~ x for small x.
8642 return sc.log1p(-sc.ndtr(a) - sc.ndtr(-b))
8644 # _lazyselect not working; don't care to debug it
8645 out = np.full_like(a, fill_value=np.nan, dtype=np.complex128)
8646 if a[case_left].size:
8647 out[case_left] = mass_case_left(a[case_left], b[case_left])
8648 if a[case_right].size:
8649 out[case_right] = mass_case_right(a[case_right], b[case_right])
8650 if a[case_central].size:
8651 out[case_central] = mass_case_central(a[case_central], b[case_central])
8652 return np.real(out) # discard ~0j
8655class truncnorm_gen(rv_continuous):
8656 r"""A truncated normal continuous random variable.
8658 %(before_notes)s
8660 Notes
8661 -----
8662 This distribution is the normal distribution centered on ``loc`` (default
8663 0), with standard deviation ``scale`` (default 1), and clipped at ``a``,
8664 ``b`` standard deviations to the left, right (respectively) from ``loc``.
8665 If ``myclip_a`` and ``myclip_b`` are clip values in the sample space (as
8666 opposed to the number of standard deviations) then they can be converted
8667 to the required form according to::
8669 a, b = (myclip_a - loc) / scale, (myclip_b - loc) / scale
8671 %(example)s
8673 """
8675 def _argcheck(self, a, b):
8676 return a < b
8678 def _shape_info(self):
8679 ia = _ShapeInfo("a", False, (-np.inf, np.inf), (True, False))
8680 ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, True))
8681 return [ia, ib]
8683 def _fitstart(self, data):
8684 # Reasonable, since support is [a, b]
8685 return super()._fitstart(data, args=(np.min(data), np.max(data)))
8687 def _get_support(self, a, b):
8688 return a, b
8690 def _pdf(self, x, a, b):
8691 return np.exp(self._logpdf(x, a, b))
8693 def _logpdf(self, x, a, b):
8694 return _norm_logpdf(x) - _log_gauss_mass(a, b)
8696 def _cdf(self, x, a, b):
8697 return np.exp(self._logcdf(x, a, b))
8699 def _logcdf(self, x, a, b):
8700 x, a, b = np.broadcast_arrays(x, a, b)
8701 logcdf = _log_gauss_mass(a, x) - _log_gauss_mass(a, b)
8702 i = logcdf > -0.1 # avoid catastrophic cancellation
8703 if np.any(i):
8704 logcdf[i] = np.log1p(-np.exp(self._logsf(x[i], a[i], b[i])))
8705 return logcdf
8707 def _sf(self, x, a, b):
8708 return np.exp(self._logsf(x, a, b))
8710 def _logsf(self, x, a, b):
8711 x, a, b = np.broadcast_arrays(x, a, b)
8712 logsf = _log_gauss_mass(x, b) - _log_gauss_mass(a, b)
8713 i = logsf > -0.1 # avoid catastrophic cancellation
8714 if np.any(i):
8715 logsf[i] = np.log1p(-np.exp(self._logcdf(x[i], a[i], b[i])))
8716 return logsf
8718 def _ppf(self, q, a, b):
8719 q, a, b = np.broadcast_arrays(q, a, b)
8721 case_left = a < 0
8722 case_right = ~case_left
8724 def ppf_left(q, a, b):
8725 log_Phi_x = _log_sum(sc.log_ndtr(a),
8726 np.log(q) + _log_gauss_mass(a, b))
8727 return sc.ndtri_exp(log_Phi_x)
8729 def ppf_right(q, a, b):
8730 log_Phi_x = _log_sum(sc.log_ndtr(-b),
8731 np.log1p(-q) + _log_gauss_mass(a, b))
8732 return -sc.ndtri_exp(log_Phi_x)
8734 out = np.empty_like(q)
8736 q_left = q[case_left]
8737 q_right = q[case_right]
8739 if q_left.size:
8740 out[case_left] = ppf_left(q_left, a[case_left], b[case_left])
8741 if q_right.size:
8742 out[case_right] = ppf_right(q_right, a[case_right], b[case_right])
8744 return out
8746 def _isf(self, q, a, b):
8747 # Mostly copy-paste of _ppf, but I think this is simpler than combining
8748 q, a, b = np.broadcast_arrays(q, a, b)
8750 case_left = b < 0
8751 case_right = ~case_left
8753 def isf_left(q, a, b):
8754 log_Phi_x = _log_diff(sc.log_ndtr(b),
8755 np.log(q) + _log_gauss_mass(a, b))
8756 return sc.ndtri_exp(np.real(log_Phi_x))
8758 def isf_right(q, a, b):
8759 log_Phi_x = _log_diff(sc.log_ndtr(-a),
8760 np.log1p(-q) + _log_gauss_mass(a, b))
8761 return -sc.ndtri_exp(np.real(log_Phi_x))
8763 out = np.empty_like(q)
8765 q_left = q[case_left]
8766 q_right = q[case_right]
8768 if q_left.size:
8769 out[case_left] = isf_left(q_left, a[case_left], b[case_left])
8770 if q_right.size:
8771 out[case_right] = isf_right(q_right, a[case_right], b[case_right])
8773 return out
8775 def _munp(self, n, a, b):
8776 def n_th_moment(n, a, b):
8777 """
8778 Returns n-th moment. Defined only if n >= 0.
8779 Function cannot broadcast due to the loop over n
8780 """
8781 pA, pB = self._pdf([a, b], a, b)
8782 probs = [pA, -pB]
8783 moments = [0, 1]
8784 for k in range(1, n+1):
8785 # a or b might be infinite, and the corresponding pdf value
8786 # is 0 in that case, but nan is returned for the
8787 # multiplication. However, as b->infinity, pdf(b)*b**k -> 0.
8788 # So it is safe to use _lazywhere to avoid the nan.
8789 vals = _lazywhere(probs, [probs, [a, b]],
8790 lambda x, y: x * y**(k-1), fillvalue=0)
8791 mk = np.sum(vals) + (k-1) * moments[-2]
8792 moments.append(mk)
8793 return moments[-1]
8795 return _lazywhere((n >= 0) & (a == a) & (b == b), (n, a, b),
8796 np.vectorize(n_th_moment, otypes=[np.float64]),
8797 np.nan)
8799 def _stats(self, a, b, moments='mv'):
8800 pA, pB = self.pdf(np.array([a, b]), a, b)
8802 def _truncnorm_stats_scalar(a, b, pA, pB, moments):
8803 m1 = pA - pB
8804 mu = m1
8805 # use _lazywhere to avoid nan (See detailed comment in _munp)
8806 probs = [pA, -pB]
8807 vals = _lazywhere(probs, [probs, [a, b]], lambda x, y: x*y,
8808 fillvalue=0)
8809 m2 = 1 + np.sum(vals)
8810 vals = _lazywhere(probs, [probs, [a-mu, b-mu]], lambda x, y: x*y,
8811 fillvalue=0)
8812 # mu2 = m2 - mu**2, but not as numerically stable as:
8813 # mu2 = (a-mu)*pA - (b-mu)*pB + 1
8814 mu2 = 1 + np.sum(vals)
8815 vals = _lazywhere(probs, [probs, [a, b]], lambda x, y: x*y**2,
8816 fillvalue=0)
8817 m3 = 2*m1 + np.sum(vals)
8818 vals = _lazywhere(probs, [probs, [a, b]], lambda x, y: x*y**3,
8819 fillvalue=0)
8820 m4 = 3*m2 + np.sum(vals)
8822 mu3 = m3 + m1 * (-3*m2 + 2*m1**2)
8823 g1 = mu3 / np.power(mu2, 1.5)
8824 mu4 = m4 + m1*(-4*m3 + 3*m1*(2*m2 - m1**2))
8825 g2 = mu4 / mu2**2 - 3
8826 return mu, mu2, g1, g2
8828 _truncnorm_stats = np.vectorize(_truncnorm_stats_scalar,
8829 excluded=('moments',))
8830 return _truncnorm_stats(a, b, pA, pB, moments)
8833truncnorm = truncnorm_gen(name='truncnorm', momtype=1)
8836class truncpareto_gen(rv_continuous):
8837 r"""An upper truncated Pareto continuous random variable.
8839 %(before_notes)s
8841 See Also
8842 --------
8843 pareto : Pareto distribution
8845 Notes
8846 -----
8847 The probability density function for `truncpareto` is:
8849 .. math::
8851 f(x, b, c) = \frac{b}{1 - c^{-b}} \frac{1}{x^{b+1}}
8853 for :math:`b > 0`, :math:`c > 1` and :math:`1 \le x \le c`.
8855 `truncpareto` takes `b` and `c` as shape parameters for :math:`b` and
8856 :math:`c`.
8858 Notice that the upper truncation value :math:`c` is defined in
8859 standardized form so that random values of an unscaled, unshifted variable
8860 are within the range ``[1, c]``.
8861 If ``u_r`` is the upper bound to a scaled and/or shifted variable,
8862 then ``c = (u_r - loc) / scale``. In other words, the support of the
8863 distribution becomes ``(scale + loc) <= x <= (c*scale + loc)`` when
8864 `scale` and/or `loc` are provided.
8866 %(after_notes)s
8868 References
8869 ----------
8870 .. [1] Burroughs, S. M., and Tebbens S. F.
8871 "Upper-truncated power laws in natural systems."
8872 Pure and Applied Geophysics 158.4 (2001): 741-757.
8874 %(example)s
8876 """
8878 def _shape_info(self):
8879 ib = _ShapeInfo("b", False, (0.0, np.inf), (False, False))
8880 ic = _ShapeInfo("c", False, (1.0, np.inf), (False, False))
8881 return [ib, ic]
8883 def _argcheck(self, b, c):
8884 return (b > 0) & (c > 1)
8886 def _get_support(self, b, c):
8887 return self.a, c
8889 def _pdf(self, x, b, c):
8890 return b * x**-(b+1) / (1 - c**-b)
8892 def _logpdf(self, x, b, c):
8893 return np.log(b) - np.log1p(-c**-b) - (b+1)*np.log(x)
8895 def _cdf(self, x, b, c):
8896 return (1 - x**-b) / (1 - c**-b)
8898 def _logcdf(self, x, b, c):
8899 return np.log1p(-x**-b) - np.log1p(-c**-b)
8901 def _ppf(self, q, b, c):
8902 return pow(1 - (1 - c**-b)*q, -1/b)
8904 def _sf(self, x, b, c):
8905 return (x**-b - c**-b) / (1 - c**-b)
8907 def _logsf(self, x, b, c):
8908 return np.log(x**-b - c**-b) - np.log1p(-c**-b)
8910 def _isf(self, q, b, c):
8911 return pow(c**-b + (1 - c**-b)*q, -1/b)
8913 def _entropy(self, b, c):
8914 return -(np.log(b/(1 - c**-b))
8915 + (b+1)*(np.log(c)/(c**b - 1) - 1/b))
8917 def _munp(self, n, b, c):
8918 if n == b:
8919 return b*np.log(c) / (1 - c**-b)
8920 else:
8921 return b / (b-n) * (c**b - c**n) / (c**b - 1)
8923 def _fitstart(self, data):
8924 b, loc, scale = pareto.fit(data)
8925 c = (max(data) - loc)/scale
8926 return b, c, loc, scale
8929truncpareto = truncpareto_gen(a=1.0, name='truncpareto')
8932class tukeylambda_gen(rv_continuous):
8933 r"""A Tukey-Lamdba continuous random variable.
8935 %(before_notes)s
8937 Notes
8938 -----
8939 A flexible distribution, able to represent and interpolate between the
8940 following distributions:
8942 - Cauchy (:math:`lambda = -1`)
8943 - logistic (:math:`lambda = 0`)
8944 - approx Normal (:math:`lambda = 0.14`)
8945 - uniform from -1 to 1 (:math:`lambda = 1`)
8947 `tukeylambda` takes a real number :math:`lambda` (denoted ``lam``
8948 in the implementation) as a shape parameter.
8950 %(after_notes)s
8952 %(example)s
8954 """
8955 def _argcheck(self, lam):
8956 return np.isfinite(lam)
8958 def _shape_info(self):
8959 return [_ShapeInfo("lam", False, (-np.inf, np.inf), (False, False))]
8961 def _pdf(self, x, lam):
8962 Fx = np.asarray(sc.tklmbda(x, lam))
8963 Px = Fx**(lam-1.0) + (np.asarray(1-Fx))**(lam-1.0)
8964 Px = 1.0/np.asarray(Px)
8965 return np.where((lam <= 0) | (abs(x) < 1.0/np.asarray(lam)), Px, 0.0)
8967 def _cdf(self, x, lam):
8968 return sc.tklmbda(x, lam)
8970 def _ppf(self, q, lam):
8971 return sc.boxcox(q, lam) - sc.boxcox1p(-q, lam)
8973 def _stats(self, lam):
8974 return 0, _tlvar(lam), 0, _tlkurt(lam)
8976 def _entropy(self, lam):
8977 def integ(p):
8978 return np.log(pow(p, lam-1)+pow(1-p, lam-1))
8979 return integrate.quad(integ, 0, 1)[0]
8982tukeylambda = tukeylambda_gen(name='tukeylambda')
8985class FitUniformFixedScaleDataError(FitDataError):
8986 def __init__(self, ptp, fscale):
8987 self.args = (
8988 "Invalid values in `data`. Maximum likelihood estimation with "
8989 "the uniform distribution and fixed scale requires that "
8990 "data.ptp() <= fscale, but data.ptp() = %r and fscale = %r." %
8991 (ptp, fscale),
8992 )
8995class uniform_gen(rv_continuous):
8996 r"""A uniform continuous random variable.
8998 In the standard form, the distribution is uniform on ``[0, 1]``. Using
8999 the parameters ``loc`` and ``scale``, one obtains the uniform distribution
9000 on ``[loc, loc + scale]``.
9002 %(before_notes)s
9004 %(example)s
9006 """
9007 def _shape_info(self):
9008 return []
9010 def _rvs(self, size=None, random_state=None):
9011 return random_state.uniform(0.0, 1.0, size)
9013 def _pdf(self, x):
9014 return 1.0*(x == x)
9016 def _cdf(self, x):
9017 return x
9019 def _ppf(self, q):
9020 return q
9022 def _stats(self):
9023 return 0.5, 1.0/12, 0, -1.2
9025 def _entropy(self):
9026 return 0.0
9028 @_call_super_mom
9029 def fit(self, data, *args, **kwds):
9030 """
9031 Maximum likelihood estimate for the location and scale parameters.
9033 `uniform.fit` uses only the following parameters. Because exact
9034 formulas are used, the parameters related to optimization that are
9035 available in the `fit` method of other distributions are ignored
9036 here. The only positional argument accepted is `data`.
9038 Parameters
9039 ----------
9040 data : array_like
9041 Data to use in calculating the maximum likelihood estimate.
9042 floc : float, optional
9043 Hold the location parameter fixed to the specified value.
9044 fscale : float, optional
9045 Hold the scale parameter fixed to the specified value.
9047 Returns
9048 -------
9049 loc, scale : float
9050 Maximum likelihood estimates for the location and scale.
9052 Notes
9053 -----
9054 An error is raised if `floc` is given and any values in `data` are
9055 less than `floc`, or if `fscale` is given and `fscale` is less
9056 than ``data.max() - data.min()``. An error is also raised if both
9057 `floc` and `fscale` are given.
9059 Examples
9060 --------
9061 >>> import numpy as np
9062 >>> from scipy.stats import uniform
9064 We'll fit the uniform distribution to `x`:
9066 >>> x = np.array([2, 2.5, 3.1, 9.5, 13.0])
9068 For a uniform distribution MLE, the location is the minimum of the
9069 data, and the scale is the maximum minus the minimum.
9071 >>> loc, scale = uniform.fit(x)
9072 >>> loc
9073 2.0
9074 >>> scale
9075 11.0
9077 If we know the data comes from a uniform distribution where the support
9078 starts at 0, we can use `floc=0`:
9080 >>> loc, scale = uniform.fit(x, floc=0)
9081 >>> loc
9082 0.0
9083 >>> scale
9084 13.0
9086 Alternatively, if we know the length of the support is 12, we can use
9087 `fscale=12`:
9089 >>> loc, scale = uniform.fit(x, fscale=12)
9090 >>> loc
9091 1.5
9092 >>> scale
9093 12.0
9095 In that last example, the support interval is [1.5, 13.5]. This
9096 solution is not unique. For example, the distribution with ``loc=2``
9097 and ``scale=12`` has the same likelihood as the one above. When
9098 `fscale` is given and it is larger than ``data.max() - data.min()``,
9099 the parameters returned by the `fit` method center the support over
9100 the interval ``[data.min(), data.max()]``.
9102 """
9103 if len(args) > 0:
9104 raise TypeError("Too many arguments.")
9106 floc = kwds.pop('floc', None)
9107 fscale = kwds.pop('fscale', None)
9109 _remove_optimizer_parameters(kwds)
9111 if floc is not None and fscale is not None:
9112 # This check is for consistency with `rv_continuous.fit`.
9113 raise ValueError("All parameters fixed. There is nothing to "
9114 "optimize.")
9116 data = np.asarray(data)
9118 if not np.isfinite(data).all():
9119 raise ValueError("The data contains non-finite values.")
9121 # MLE for the uniform distribution
9122 # --------------------------------
9123 # The PDF is
9124 #
9125 # f(x, loc, scale) = {1/scale for loc <= x <= loc + scale
9126 # {0 otherwise}
9127 #
9128 # The likelihood function is
9129 # L(x, loc, scale) = (1/scale)**n
9130 # where n is len(x), assuming loc <= x <= loc + scale for all x.
9131 # The log-likelihood is
9132 # l(x, loc, scale) = -n*log(scale)
9133 # The log-likelihood is maximized by making scale as small as possible,
9134 # while keeping loc <= x <= loc + scale. So if neither loc nor scale
9135 # are fixed, the log-likelihood is maximized by choosing
9136 # loc = x.min()
9137 # scale = x.ptp()
9138 # If loc is fixed, it must be less than or equal to x.min(), and then
9139 # the scale is
9140 # scale = x.max() - loc
9141 # If scale is fixed, it must not be less than x.ptp(). If scale is
9142 # greater than x.ptp(), the solution is not unique. Note that the
9143 # likelihood does not depend on loc, except for the requirement that
9144 # loc <= x <= loc + scale. All choices of loc for which
9145 # x.max() - scale <= loc <= x.min()
9146 # have the same log-likelihood. In this case, we choose loc such that
9147 # the support is centered over the interval [data.min(), data.max()]:
9148 # loc = x.min() = 0.5*(scale - x.ptp())
9150 if fscale is None:
9151 # scale is not fixed.
9152 if floc is None:
9153 # loc is not fixed, scale is not fixed.
9154 loc = data.min()
9155 scale = data.ptp()
9156 else:
9157 # loc is fixed, scale is not fixed.
9158 loc = floc
9159 scale = data.max() - loc
9160 if data.min() < loc:
9161 raise FitDataError("uniform", lower=loc, upper=loc + scale)
9162 else:
9163 # loc is not fixed, scale is fixed.
9164 ptp = data.ptp()
9165 if ptp > fscale:
9166 raise FitUniformFixedScaleDataError(ptp=ptp, fscale=fscale)
9167 # If ptp < fscale, the ML estimate is not unique; see the comments
9168 # above. We choose the distribution for which the support is
9169 # centered over the interval [data.min(), data.max()].
9170 loc = data.min() - 0.5*(fscale - ptp)
9171 scale = fscale
9173 # We expect the return values to be floating point, so ensure it
9174 # by explicitly converting to float.
9175 return float(loc), float(scale)
9178uniform = uniform_gen(a=0.0, b=1.0, name='uniform')
9181class vonmises_gen(rv_continuous):
9182 r"""A Von Mises continuous random variable.
9184 %(before_notes)s
9186 Notes
9187 -----
9188 The probability density function for `vonmises` and `vonmises_line` is:
9190 .. math::
9192 f(x, \kappa) = \frac{ \exp(\kappa \cos(x)) }{ 2 \pi I_0(\kappa) }
9194 for :math:`-\pi \le x \le \pi`, :math:`\kappa > 0`. :math:`I_0` is the
9195 modified Bessel function of order zero (`scipy.special.i0`).
9197 `vonmises` is a circular distribution which does not restrict the
9198 distribution to a fixed interval. Currently, there is no circular
9199 distribution framework in scipy. The ``cdf`` is implemented such that
9200 ``cdf(x + 2*np.pi) == cdf(x) + 1``.
9202 `vonmises_line` is the same distribution, defined on :math:`[-\pi, \pi]`
9203 on the real line. This is a regular (i.e. non-circular) distribution.
9205 `vonmises` and `vonmises_line` take ``kappa`` as a shape parameter.
9207 %(after_notes)s
9209 %(example)s
9211 """
9212 def _shape_info(self):
9213 return [_ShapeInfo("kappa", False, (0, np.inf), (False, False))]
9215 def _rvs(self, kappa, size=None, random_state=None):
9216 return random_state.vonmises(0.0, kappa, size=size)
9218 @inherit_docstring_from(rv_continuous)
9219 def rvs(self, *args, **kwds):
9220 rvs = super().rvs(*args, **kwds)
9221 return np.mod(rvs + np.pi, 2*np.pi) - np.pi
9223 def _pdf(self, x, kappa):
9224 # vonmises.pdf(x, kappa) = exp(kappa * cos(x)) / (2*pi*I[0](kappa))
9225 # = exp(kappa * (cos(x) - 1)) /
9226 # (2*pi*exp(-kappa)*I[0](kappa))
9227 # = exp(kappa * cosm1(x)) / (2*pi*i0e(kappa))
9228 return np.exp(kappa*sc.cosm1(x)) / (2*np.pi*sc.i0e(kappa))
9230 def _logpdf(self, x, kappa):
9231 # vonmises.pdf(x, kappa) = exp(kappa * cosm1(x)) / (2*pi*i0e(kappa))
9232 return kappa * sc.cosm1(x) - np.log(2*np.pi) - np.log(sc.i0e(kappa))
9234 def _cdf(self, x, kappa):
9235 return _stats.von_mises_cdf(kappa, x)
9237 def _stats_skip(self, kappa):
9238 return 0, None, 0, None
9240 def _entropy(self, kappa):
9241 # vonmises.entropy(kappa) = -kappa * I[1](kappa) / I[0](kappa) +
9242 # log(2 * np.pi * I[0](kappa))
9243 # = -kappa * I[1](kappa) * exp(-kappa) /
9244 # (I[0](kappa) * exp(-kappa)) +
9245 # log(2 * np.pi *
9246 # I[0](kappa) * exp(-kappa) / exp(-kappa))
9247 # = -kappa * sc.i1e(kappa) / sc.i0e(kappa) +
9248 # log(2 * np.pi * i0e(kappa)) + kappa
9249 return (-kappa * sc.i1e(kappa) / sc.i0e(kappa) +
9250 np.log(2 * np.pi * sc.i0e(kappa)) + kappa)
9252 @extend_notes_in_docstring(rv_continuous, notes="""\
9253 The default limits of integration are endpoints of the interval
9254 of width ``2*pi`` centered at `loc` (e.g. ``[-pi, pi]`` when
9255 ``loc=0``).\n\n""")
9256 def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None,
9257 conditional=False, **kwds):
9258 _a, _b = -np.pi, np.pi
9260 if lb is None:
9261 lb = loc + _a
9262 if ub is None:
9263 ub = loc + _b
9265 return super().expect(func, args, loc,
9266 scale, lb, ub, conditional, **kwds)
9269vonmises = vonmises_gen(name='vonmises')
9270vonmises_line = vonmises_gen(a=-np.pi, b=np.pi, name='vonmises_line')
9273class wald_gen(invgauss_gen):
9274 r"""A Wald continuous random variable.
9276 %(before_notes)s
9278 Notes
9279 -----
9280 The probability density function for `wald` is:
9282 .. math::
9284 f(x) = \frac{1}{\sqrt{2\pi x^3}} \exp(- \frac{ (x-1)^2 }{ 2x })
9286 for :math:`x >= 0`.
9288 `wald` is a special case of `invgauss` with ``mu=1``.
9290 %(after_notes)s
9292 %(example)s
9293 """
9294 _support_mask = rv_continuous._open_support_mask
9296 def _shape_info(self):
9297 return []
9299 def _rvs(self, size=None, random_state=None):
9300 return random_state.wald(1.0, 1.0, size=size)
9302 def _pdf(self, x):
9303 # wald.pdf(x) = 1/sqrt(2*pi*x**3) * exp(-(x-1)**2/(2*x))
9304 return invgauss._pdf(x, 1.0)
9306 def _cdf(self, x):
9307 return invgauss._cdf(x, 1.0)
9309 def _sf(self, x):
9310 return invgauss._sf(x, 1.0)
9312 def _ppf(self, x):
9313 return invgauss._ppf(x, 1.0)
9315 def _isf(self, x):
9316 return invgauss._isf(x, 1.0)
9318 def _logpdf(self, x):
9319 return invgauss._logpdf(x, 1.0)
9321 def _logcdf(self, x):
9322 return invgauss._logcdf(x, 1.0)
9324 def _logsf(self, x):
9325 return invgauss._logsf(x, 1.0)
9327 def _stats(self):
9328 return 1.0, 1.0, 3.0, 15.0
9331wald = wald_gen(a=0.0, name="wald")
9334class wrapcauchy_gen(rv_continuous):
9335 r"""A wrapped Cauchy continuous random variable.
9337 %(before_notes)s
9339 Notes
9340 -----
9341 The probability density function for `wrapcauchy` is:
9343 .. math::
9345 f(x, c) = \frac{1-c^2}{2\pi (1+c^2 - 2c \cos(x))}
9347 for :math:`0 \le x \le 2\pi`, :math:`0 < c < 1`.
9349 `wrapcauchy` takes ``c`` as a shape parameter for :math:`c`.
9351 %(after_notes)s
9353 %(example)s
9355 """
9356 def _argcheck(self, c):
9357 return (c > 0) & (c < 1)
9359 def _shape_info(self):
9360 return [_ShapeInfo("c", False, (0, 1), (False, False))]
9362 def _pdf(self, x, c):
9363 # wrapcauchy.pdf(x, c) = (1-c**2) / (2*pi*(1+c**2-2*c*cos(x)))
9364 return (1.0-c*c)/(2*np.pi*(1+c*c-2*c*np.cos(x)))
9366 def _cdf(self, x, c):
9368 def f1(x, cr):
9369 # CDF for 0 <= x < pi
9370 return 1/np.pi * np.arctan(cr*np.tan(x/2))
9372 def f2(x, cr):
9373 # CDF for pi <= x <= 2*pi
9374 return 1 - 1/np.pi * np.arctan(cr*np.tan((2*np.pi - x)/2))
9376 cr = (1 + c)/(1 - c)
9377 return _lazywhere(x < np.pi, (x, cr), f=f1, f2=f2)
9379 def _ppf(self, q, c):
9380 val = (1.0-c)/(1.0+c)
9381 rcq = 2*np.arctan(val*np.tan(np.pi*q))
9382 rcmq = 2*np.pi-2*np.arctan(val*np.tan(np.pi*(1-q)))
9383 return np.where(q < 1.0/2, rcq, rcmq)
9385 def _entropy(self, c):
9386 return np.log(2*np.pi*(1-c*c))
9388 def _fitstart(self, data):
9389 # Use 0.5 as the initial guess of the shape parameter.
9390 # For the location and scale, use the minimum and
9391 # peak-to-peak/(2*pi), respectively.
9392 return 0.5, np.min(data), np.ptp(data)/(2*np.pi)
9395wrapcauchy = wrapcauchy_gen(a=0.0, b=2*np.pi, name='wrapcauchy')
9398class gennorm_gen(rv_continuous):
9399 r"""A generalized normal continuous random variable.
9401 %(before_notes)s
9403 See Also
9404 --------
9405 laplace : Laplace distribution
9406 norm : normal distribution
9408 Notes
9409 -----
9410 The probability density function for `gennorm` is [1]_:
9412 .. math::
9414 f(x, \beta) = \frac{\beta}{2 \Gamma(1/\beta)} \exp(-|x|^\beta),
9416 where :math:`x` is a real number, :math:`\beta > 0` and
9417 :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
9419 `gennorm` takes ``beta`` as a shape parameter for :math:`\beta`.
9420 For :math:`\beta = 1`, it is identical to a Laplace distribution.
9421 For :math:`\beta = 2`, it is identical to a normal distribution
9422 (with ``scale=1/sqrt(2)``).
9424 References
9425 ----------
9427 .. [1] "Generalized normal distribution, Version 1",
9428 https://en.wikipedia.org/wiki/Generalized_normal_distribution#Version_1
9430 .. [2] Nardon, Martina, and Paolo Pianca. "Simulation techniques for
9431 generalized Gaussian densities." Journal of Statistical
9432 Computation and Simulation 79.11 (2009): 1317-1329
9434 .. [3] Wicklin, Rick. "Simulate data from a generalized Gaussian
9435 distribution" in The DO Loop blog, September 21, 2016,
9436 https://blogs.sas.com/content/iml/2016/09/21/simulate-generalized-gaussian-sas.html
9438 %(example)s
9440 """
9441 def _shape_info(self):
9442 return [_ShapeInfo("beta", False, (0, np.inf), (False, False))]
9444 def _pdf(self, x, beta):
9445 return np.exp(self._logpdf(x, beta))
9447 def _logpdf(self, x, beta):
9448 return np.log(0.5*beta) - sc.gammaln(1.0/beta) - abs(x)**beta
9450 def _cdf(self, x, beta):
9451 c = 0.5 * np.sign(x)
9452 # evaluating (.5 + c) first prevents numerical cancellation
9453 return (0.5 + c) - c * sc.gammaincc(1.0/beta, abs(x)**beta)
9455 def _ppf(self, x, beta):
9456 c = np.sign(x - 0.5)
9457 # evaluating (1. + c) first prevents numerical cancellation
9458 return c * sc.gammainccinv(1.0/beta, (1.0 + c) - 2.0*c*x)**(1.0/beta)
9460 def _sf(self, x, beta):
9461 return self._cdf(-x, beta)
9463 def _isf(self, x, beta):
9464 return -self._ppf(x, beta)
9466 def _stats(self, beta):
9467 c1, c3, c5 = sc.gammaln([1.0/beta, 3.0/beta, 5.0/beta])
9468 return 0., np.exp(c3 - c1), 0., np.exp(c5 + c1 - 2.0*c3) - 3.
9470 def _entropy(self, beta):
9471 return 1. / beta - np.log(.5 * beta) + sc.gammaln(1. / beta)
9473 def _rvs(self, beta, size=None, random_state=None):
9474 # see [2]_ for the algorithm
9475 # see [3]_ for reference implementation in SAS
9476 z = random_state.gamma(1/beta, size=size)
9477 y = z ** (1/beta)
9478 # convert y to array to ensure masking support
9479 y = np.asarray(y)
9480 mask = random_state.random(size=y.shape) < 0.5
9481 y[mask] = -y[mask]
9482 return y
9485gennorm = gennorm_gen(name='gennorm')
9488class halfgennorm_gen(rv_continuous):
9489 r"""The upper half of a generalized normal continuous random variable.
9491 %(before_notes)s
9493 See Also
9494 --------
9495 gennorm : generalized normal distribution
9496 expon : exponential distribution
9497 halfnorm : half normal distribution
9499 Notes
9500 -----
9501 The probability density function for `halfgennorm` is:
9503 .. math::
9505 f(x, \beta) = \frac{\beta}{\Gamma(1/\beta)} \exp(-|x|^\beta)
9507 for :math:`x, \beta > 0`. :math:`\Gamma` is the gamma function
9508 (`scipy.special.gamma`).
9510 `halfgennorm` takes ``beta`` as a shape parameter for :math:`\beta`.
9511 For :math:`\beta = 1`, it is identical to an exponential distribution.
9512 For :math:`\beta = 2`, it is identical to a half normal distribution
9513 (with ``scale=1/sqrt(2)``).
9515 References
9516 ----------
9518 .. [1] "Generalized normal distribution, Version 1",
9519 https://en.wikipedia.org/wiki/Generalized_normal_distribution#Version_1
9521 %(example)s
9523 """
9524 def _shape_info(self):
9525 return [_ShapeInfo("beta", False, (0, np.inf), (False, False))]
9527 def _pdf(self, x, beta):
9528 # beta
9529 # halfgennorm.pdf(x, beta) = ------------- exp(-|x|**beta)
9530 # gamma(1/beta)
9531 return np.exp(self._logpdf(x, beta))
9533 def _logpdf(self, x, beta):
9534 return np.log(beta) - sc.gammaln(1.0/beta) - x**beta
9536 def _cdf(self, x, beta):
9537 return sc.gammainc(1.0/beta, x**beta)
9539 def _ppf(self, x, beta):
9540 return sc.gammaincinv(1.0/beta, x)**(1.0/beta)
9542 def _sf(self, x, beta):
9543 return sc.gammaincc(1.0/beta, x**beta)
9545 def _isf(self, x, beta):
9546 return sc.gammainccinv(1.0/beta, x)**(1.0/beta)
9548 def _entropy(self, beta):
9549 return 1.0/beta - np.log(beta) + sc.gammaln(1.0/beta)
9552halfgennorm = halfgennorm_gen(a=0, name='halfgennorm')
9555class crystalball_gen(rv_continuous):
9556 r"""
9557 Crystalball distribution
9559 %(before_notes)s
9561 Notes
9562 -----
9563 The probability density function for `crystalball` is:
9565 .. math::
9567 f(x, \beta, m) = \begin{cases}
9568 N \exp(-x^2 / 2), &\text{for } x > -\beta\\
9569 N A (B - x)^{-m} &\text{for } x \le -\beta
9570 \end{cases}
9572 where :math:`A = (m / |\beta|)^m \exp(-\beta^2 / 2)`,
9573 :math:`B = m/|\beta| - |\beta|` and :math:`N` is a normalisation constant.
9575 `crystalball` takes :math:`\beta > 0` and :math:`m > 1` as shape
9576 parameters. :math:`\beta` defines the point where the pdf changes
9577 from a power-law to a Gaussian distribution. :math:`m` is the power
9578 of the power-law tail.
9580 References
9581 ----------
9582 .. [1] "Crystal Ball Function",
9583 https://en.wikipedia.org/wiki/Crystal_Ball_function
9585 %(after_notes)s
9587 .. versionadded:: 0.19.0
9589 %(example)s
9590 """
9591 def _argcheck(self, beta, m):
9592 """
9593 Shape parameter bounds are m > 1 and beta > 0.
9594 """
9595 return (m > 1) & (beta > 0)
9597 def _shape_info(self):
9598 ibeta = _ShapeInfo("beta", False, (0, np.inf), (False, False))
9599 im = _ShapeInfo("m", False, (1, np.inf), (False, False))
9600 return [ibeta, im]
9602 def _fitstart(self, data):
9603 # Arbitrary, but the default m=1 is not valid
9604 return super()._fitstart(data, args=(1, 1.5))
9606 def _pdf(self, x, beta, m):
9607 """
9608 Return PDF of the crystalball function.
9610 --
9611 | exp(-x**2 / 2), for x > -beta
9612 crystalball.pdf(x, beta, m) = N * |
9613 | A * (B - x)**(-m), for x <= -beta
9614 --
9615 """
9616 N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
9617 _norm_pdf_C * _norm_cdf(beta))
9619 def rhs(x, beta, m):
9620 return np.exp(-x**2 / 2)
9622 def lhs(x, beta, m):
9623 return ((m/beta)**m * np.exp(-beta**2 / 2.0) *
9624 (m/beta - beta - x)**(-m))
9626 return N * _lazywhere(x > -beta, (x, beta, m), f=rhs, f2=lhs)
9628 def _logpdf(self, x, beta, m):
9629 """
9630 Return the log of the PDF of the crystalball function.
9631 """
9632 N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
9633 _norm_pdf_C * _norm_cdf(beta))
9635 def rhs(x, beta, m):
9636 return -x**2/2
9638 def lhs(x, beta, m):
9639 return m*np.log(m/beta) - beta**2/2 - m*np.log(m/beta - beta - x)
9641 return np.log(N) + _lazywhere(x > -beta, (x, beta, m), f=rhs, f2=lhs)
9643 def _cdf(self, x, beta, m):
9644 """
9645 Return CDF of the crystalball function
9646 """
9647 N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
9648 _norm_pdf_C * _norm_cdf(beta))
9650 def rhs(x, beta, m):
9651 return ((m/beta) * np.exp(-beta**2 / 2.0) / (m-1) +
9652 _norm_pdf_C * (_norm_cdf(x) - _norm_cdf(-beta)))
9654 def lhs(x, beta, m):
9655 return ((m/beta)**m * np.exp(-beta**2 / 2.0) *
9656 (m/beta - beta - x)**(-m+1) / (m-1))
9658 return N * _lazywhere(x > -beta, (x, beta, m), f=rhs, f2=lhs)
9660 def _ppf(self, p, beta, m):
9661 N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
9662 _norm_pdf_C * _norm_cdf(beta))
9663 pbeta = N * (m/beta) * np.exp(-beta**2/2) / (m - 1)
9665 def ppf_less(p, beta, m):
9666 eb2 = np.exp(-beta**2/2)
9667 C = (m/beta) * eb2 / (m-1)
9668 N = 1/(C + _norm_pdf_C * _norm_cdf(beta))
9669 return (m/beta - beta -
9670 ((m - 1)*(m/beta)**(-m)/eb2*p/N)**(1/(1-m)))
9672 def ppf_greater(p, beta, m):
9673 eb2 = np.exp(-beta**2/2)
9674 C = (m/beta) * eb2 / (m-1)
9675 N = 1/(C + _norm_pdf_C * _norm_cdf(beta))
9676 return _norm_ppf(_norm_cdf(-beta) + (1/_norm_pdf_C)*(p/N - C))
9678 return _lazywhere(p < pbeta, (p, beta, m), f=ppf_less, f2=ppf_greater)
9680 def _munp(self, n, beta, m):
9681 """
9682 Returns the n-th non-central moment of the crystalball function.
9683 """
9684 N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
9685 _norm_pdf_C * _norm_cdf(beta))
9687 def n_th_moment(n, beta, m):
9688 """
9689 Returns n-th moment. Defined only if n+1 < m
9690 Function cannot broadcast due to the loop over n
9691 """
9692 A = (m/beta)**m * np.exp(-beta**2 / 2.0)
9693 B = m/beta - beta
9694 rhs = (2**((n-1)/2.0) * sc.gamma((n+1)/2) *
9695 (1.0 + (-1)**n * sc.gammainc((n+1)/2, beta**2 / 2)))
9696 lhs = np.zeros(rhs.shape)
9697 for k in range(n + 1):
9698 lhs += (sc.binom(n, k) * B**(n-k) * (-1)**k / (m - k - 1) *
9699 (m/beta)**(-m + k + 1))
9700 return A * lhs + rhs
9702 return N * _lazywhere(n + 1 < m, (n, beta, m),
9703 np.vectorize(n_th_moment, otypes=[np.float64]),
9704 np.inf)
9707crystalball = crystalball_gen(name='crystalball', longname="A Crystalball Function")
9710def _argus_phi(chi):
9711 """
9712 Utility function for the argus distribution used in the pdf, sf and
9713 moment calculation.
9714 Note that for all x > 0:
9715 gammainc(1.5, x**2/2) = 2 * (_norm_cdf(x) - x * _norm_pdf(x) - 0.5).
9716 This can be verified directly by noting that the cdf of Gamma(1.5) can
9717 be written as erf(sqrt(x)) - 2*sqrt(x)*exp(-x)/sqrt(Pi).
9718 We use gammainc instead of the usual definition because it is more precise
9719 for small chi.
9720 """
9721 return sc.gammainc(1.5, chi**2/2) / 2
9724class argus_gen(rv_continuous):
9725 r"""
9726 Argus distribution
9728 %(before_notes)s
9730 Notes
9731 -----
9732 The probability density function for `argus` is:
9734 .. math::
9736 f(x, \chi) = \frac{\chi^3}{\sqrt{2\pi} \Psi(\chi)} x \sqrt{1-x^2}
9737 \exp(-\chi^2 (1 - x^2)/2)
9739 for :math:`0 < x < 1` and :math:`\chi > 0`, where
9741 .. math::
9743 \Psi(\chi) = \Phi(\chi) - \chi \phi(\chi) - 1/2
9745 with :math:`\Phi` and :math:`\phi` being the CDF and PDF of a standard
9746 normal distribution, respectively.
9748 `argus` takes :math:`\chi` as shape a parameter.
9750 %(after_notes)s
9752 References
9753 ----------
9754 .. [1] "ARGUS distribution",
9755 https://en.wikipedia.org/wiki/ARGUS_distribution
9757 .. versionadded:: 0.19.0
9759 %(example)s
9760 """
9761 def _shape_info(self):
9762 return [_ShapeInfo("chi", False, (0, np.inf), (False, False))]
9764 def _logpdf(self, x, chi):
9765 # for x = 0 or 1, logpdf returns -np.inf
9766 with np.errstate(divide='ignore'):
9767 y = 1.0 - x*x
9768 A = 3*np.log(chi) - _norm_pdf_logC - np.log(_argus_phi(chi))
9769 return A + np.log(x) + 0.5*np.log1p(-x*x) - chi**2 * y / 2
9771 def _pdf(self, x, chi):
9772 return np.exp(self._logpdf(x, chi))
9774 def _cdf(self, x, chi):
9775 return 1.0 - self._sf(x, chi)
9777 def _sf(self, x, chi):
9778 return _argus_phi(chi * np.sqrt(1 - x**2)) / _argus_phi(chi)
9780 def _rvs(self, chi, size=None, random_state=None):
9781 chi = np.asarray(chi)
9782 if chi.size == 1:
9783 out = self._rvs_scalar(chi, numsamples=size,
9784 random_state=random_state)
9785 else:
9786 shp, bc = _check_shape(chi.shape, size)
9787 numsamples = int(np.prod(shp))
9788 out = np.empty(size)
9789 it = np.nditer([chi],
9790 flags=['multi_index'],
9791 op_flags=[['readonly']])
9792 while not it.finished:
9793 idx = tuple((it.multi_index[j] if not bc[j] else slice(None))
9794 for j in range(-len(size), 0))
9795 r = self._rvs_scalar(it[0], numsamples=numsamples,
9796 random_state=random_state)
9797 out[idx] = r.reshape(shp)
9798 it.iternext()
9800 if size == ():
9801 out = out[()]
9802 return out
9804 def _rvs_scalar(self, chi, numsamples=None, random_state=None):
9805 # if chi <= 1.8:
9806 # use rejection method, see Devroye:
9807 # Non-Uniform Random Variate Generation, 1986, section II.3.2.
9808 # write: PDF f(x) = c * g(x) * h(x), where
9809 # h is [0,1]-valued and g is a density
9810 # we use two ways to write f
9811 #
9812 # Case 1:
9813 # write g(x) = 3*x*sqrt(1-x**2), h(x) = exp(-chi**2 (1-x**2) / 2)
9814 # If X has a distribution with density g its ppf G_inv is given by:
9815 # G_inv(u) = np.sqrt(1 - u**(2/3))
9816 #
9817 # Case 2:
9818 # g(x) = chi**2 * x * exp(-chi**2 * (1-x**2)/2) / (1 - exp(-chi**2 /2))
9819 # h(x) = sqrt(1 - x**2), 0 <= x <= 1
9820 # one can show that
9821 # G_inv(u) = np.sqrt(2*np.log(u*(np.exp(chi**2/2)-1)+1))/chi
9822 # = np.sqrt(1 + 2*np.log(np.exp(-chi**2/2)*(1-u)+u)/chi**2)
9823 # the latter expression is used for precision with small chi
9824 #
9825 # In both cases, the inverse cdf of g can be written analytically, and
9826 # we can apply the rejection method:
9827 #
9828 # REPEAT
9829 # Generate U uniformly distributed on [0, 1]
9830 # Generate X with density g (e.g. via inverse transform sampling:
9831 # X = G_inv(V) with V uniformly distributed on [0, 1])
9832 # UNTIL X <= h(X)
9833 # RETURN X
9834 #
9835 # We use case 1 for chi <= 0.5 as it maintains precision for small chi
9836 # and case 2 for 0.5 < chi <= 1.8 due to its speed for moderate chi.
9837 #
9838 # if chi > 1.8:
9839 # use relation to the Gamma distribution: if X is ARGUS with parameter
9840 # chi), then Y = chi**2 * (1 - X**2) / 2 has density proportional to
9841 # sqrt(u) * exp(-u) on [0, chi**2 / 2], i.e. a Gamma(3/2) distribution
9842 # conditioned on [0, chi**2 / 2]). Therefore, to sample X from the
9843 # ARGUS distribution, we sample Y from the gamma distribution, keeping
9844 # only samples on [0, chi**2 / 2], and apply the inverse
9845 # transformation X = (1 - 2*Y/chi**2)**(1/2). Since we only
9846 # look at chi > 1.8, gamma(1.5).cdf(chi**2/2) is large enough such
9847 # Y falls in the inteval [0, chi**2 / 2] with a high probability:
9848 # stats.gamma(1.5).cdf(1.8**2/2) = 0.644...
9849 #
9850 # The points to switch between the different methods are determined
9851 # by a comparison of the runtime of the different methods. However,
9852 # the runtime is platform-dependent. The implemented values should
9853 # ensure a good overall performance and are supported by an analysis
9854 # of the rejection constants of different methods.
9856 size1d = tuple(np.atleast_1d(numsamples))
9857 N = int(np.prod(size1d))
9858 x = np.zeros(N)
9859 simulated = 0
9860 chi2 = chi * chi
9861 if chi <= 0.5:
9862 d = -chi2 / 2
9863 while simulated < N:
9864 k = N - simulated
9865 u = random_state.uniform(size=k)
9866 v = random_state.uniform(size=k)
9867 z = v**(2/3)
9868 # acceptance condition: u <= h(G_inv(v)). This simplifies to
9869 accept = (np.log(u) <= d * z)
9870 num_accept = np.sum(accept)
9871 if num_accept > 0:
9872 # we still need to transform z=v**(2/3) to X = G_inv(v)
9873 rvs = np.sqrt(1 - z[accept])
9874 x[simulated:(simulated + num_accept)] = rvs
9875 simulated += num_accept
9876 elif chi <= 1.8:
9877 echi = np.exp(-chi2 / 2)
9878 while simulated < N:
9879 k = N - simulated
9880 u = random_state.uniform(size=k)
9881 v = random_state.uniform(size=k)
9882 z = 2 * np.log(echi * (1 - v) + v) / chi2
9883 # as in case one, simplify u <= h(G_inv(v)) and then transform
9884 # z to the target distribution X = G_inv(v)
9885 accept = (u**2 + z <= 0)
9886 num_accept = np.sum(accept)
9887 if num_accept > 0:
9888 rvs = np.sqrt(1 + z[accept])
9889 x[simulated:(simulated + num_accept)] = rvs
9890 simulated += num_accept
9891 else:
9892 # conditional Gamma for chi > 1.8
9893 while simulated < N:
9894 k = N - simulated
9895 g = random_state.standard_gamma(1.5, size=k)
9896 accept = (g <= chi2 / 2)
9897 num_accept = np.sum(accept)
9898 if num_accept > 0:
9899 x[simulated:(simulated + num_accept)] = g[accept]
9900 simulated += num_accept
9901 x = np.sqrt(1 - 2 * x / chi2)
9903 return np.reshape(x, size1d)
9905 def _stats(self, chi):
9906 # need to ensure that dtype is float
9907 # otherwise the mask below does not work for integers
9908 chi = np.asarray(chi, dtype=float)
9909 phi = _argus_phi(chi)
9910 m = np.sqrt(np.pi/8) * chi * sc.ive(1, chi**2/4) / phi
9911 # compute second moment, use Taylor expansion for small chi (<= 0.1)
9912 mu2 = np.empty_like(chi)
9913 mask = chi > 0.1
9914 c = chi[mask]
9915 mu2[mask] = 1 - 3 / c**2 + c * _norm_pdf(c) / phi[mask]
9916 c = chi[~mask]
9917 coef = [-358/65690625, 0, -94/1010625, 0, 2/2625, 0, 6/175, 0, 0.4]
9918 mu2[~mask] = np.polyval(coef, c)
9919 return m, mu2 - m**2, None, None
9922argus = argus_gen(name='argus', longname="An Argus Function", a=0.0, b=1.0)
9925class rv_histogram(rv_continuous):
9926 """
9927 Generates a distribution given by a histogram.
9928 This is useful to generate a template distribution from a binned
9929 datasample.
9931 As a subclass of the `rv_continuous` class, `rv_histogram` inherits from it
9932 a collection of generic methods (see `rv_continuous` for the full list),
9933 and implements them based on the properties of the provided binned
9934 datasample.
9936 Parameters
9937 ----------
9938 histogram : tuple of array_like
9939 Tuple containing two array_like objects.
9940 The first containing the content of n bins,
9941 the second containing the (n+1) bin boundaries.
9942 In particular, the return value of `numpy.histogram` is accepted.
9944 density : bool, optional
9945 If False, assumes the histogram is proportional to counts per bin;
9946 otherwise, assumes it is proportional to a density.
9947 For constant bin widths, these are equivalent, but the distinction
9948 is important when bin widths vary (see Notes).
9949 If None (default), sets ``density=True`` for backwards compatibility,
9950 but warns if the bin widths are variable. Set `density` explicitly
9951 to silence the warning.
9953 .. versionadded:: 1.10.0
9955 Notes
9956 -----
9957 When a histogram has unequal bin widths, there is a distinction between
9958 histograms that are proportional to counts per bin and histograms that are
9959 proportional to probability density over a bin. If `numpy.histogram` is
9960 called with its default ``density=False``, the resulting histogram is the
9961 number of counts per bin, so ``density=False`` should be passed to
9962 `rv_histogram`. If `numpy.histogram` is called with ``density=True``, the
9963 resulting histogram is in terms of probability density, so ``density=True``
9964 should be passed to `rv_histogram`. To avoid warnings, always pass
9965 ``density`` explicitly when the input histogram has unequal bin widths.
9967 There are no additional shape parameters except for the loc and scale.
9968 The pdf is defined as a stepwise function from the provided histogram.
9969 The cdf is a linear interpolation of the pdf.
9971 .. versionadded:: 0.19.0
9973 Examples
9974 --------
9976 Create a scipy.stats distribution from a numpy histogram
9978 >>> import scipy.stats
9979 >>> import numpy as np
9980 >>> data = scipy.stats.norm.rvs(size=100000, loc=0, scale=1.5, random_state=123)
9981 >>> hist = np.histogram(data, bins=100)
9982 >>> hist_dist = scipy.stats.rv_histogram(hist, density=False)
9984 Behaves like an ordinary scipy rv_continuous distribution
9986 >>> hist_dist.pdf(1.0)
9987 0.20538577847618705
9988 >>> hist_dist.cdf(2.0)
9989 0.90818568543056499
9991 PDF is zero above (below) the highest (lowest) bin of the histogram,
9992 defined by the max (min) of the original dataset
9994 >>> hist_dist.pdf(np.max(data))
9995 0.0
9996 >>> hist_dist.cdf(np.max(data))
9997 1.0
9998 >>> hist_dist.pdf(np.min(data))
9999 7.7591907244498314e-05
10000 >>> hist_dist.cdf(np.min(data))
10001 0.0
10003 PDF and CDF follow the histogram
10005 >>> import matplotlib.pyplot as plt
10006 >>> X = np.linspace(-5.0, 5.0, 100)
10007 >>> fig, ax = plt.subplots()
10008 >>> ax.set_title("PDF from Template")
10009 >>> ax.hist(data, density=True, bins=100)
10010 >>> ax.plot(X, hist_dist.pdf(X), label='PDF')
10011 >>> ax.plot(X, hist_dist.cdf(X), label='CDF')
10012 >>> ax.legend()
10013 >>> fig.show()
10015 """
10016 _support_mask = rv_continuous._support_mask
10018 def __init__(self, histogram, *args, density=None, **kwargs):
10019 """
10020 Create a new distribution using the given histogram
10022 Parameters
10023 ----------
10024 histogram : tuple of array_like
10025 Tuple containing two array_like objects.
10026 The first containing the content of n bins,
10027 the second containing the (n+1) bin boundaries.
10028 In particular, the return value of np.histogram is accepted.
10029 density : bool, optional
10030 If False, assumes the histogram is proportional to counts per bin;
10031 otherwise, assumes it is proportional to a density.
10032 For constant bin widths, these are equivalent.
10033 If None (default), sets ``density=True`` for backward
10034 compatibility, but warns if the bin widths are variable. Set
10035 `density` explicitly to silence the warning.
10036 """
10037 self._histogram = histogram
10038 self._density = density
10039 if len(histogram) != 2:
10040 raise ValueError("Expected length 2 for parameter histogram")
10041 self._hpdf = np.asarray(histogram[0])
10042 self._hbins = np.asarray(histogram[1])
10043 if len(self._hpdf) + 1 != len(self._hbins):
10044 raise ValueError("Number of elements in histogram content "
10045 "and histogram boundaries do not match, "
10046 "expected n and n+1.")
10047 self._hbin_widths = self._hbins[1:] - self._hbins[:-1]
10048 bins_vary = not np.allclose(self._hbin_widths, self._hbin_widths[0])
10049 if density is None and bins_vary:
10050 message = ("Bin widths are not constant. Assuming `density=True`."
10051 "Specify `density` explicitly to silence this warning.")
10052 warnings.warn(message, RuntimeWarning, stacklevel=2)
10053 density = True
10054 elif not density:
10055 self._hpdf = self._hpdf / self._hbin_widths
10057 self._hpdf = self._hpdf / float(np.sum(self._hpdf * self._hbin_widths))
10058 self._hcdf = np.cumsum(self._hpdf * self._hbin_widths)
10059 self._hpdf = np.hstack([0.0, self._hpdf, 0.0])
10060 self._hcdf = np.hstack([0.0, self._hcdf])
10061 # Set support
10062 kwargs['a'] = self.a = self._hbins[0]
10063 kwargs['b'] = self.b = self._hbins[-1]
10064 super().__init__(*args, **kwargs)
10066 def _pdf(self, x):
10067 """
10068 PDF of the histogram
10069 """
10070 return self._hpdf[np.searchsorted(self._hbins, x, side='right')]
10072 def _cdf(self, x):
10073 """
10074 CDF calculated from the histogram
10075 """
10076 return np.interp(x, self._hbins, self._hcdf)
10078 def _ppf(self, x):
10079 """
10080 Percentile function calculated from the histogram
10081 """
10082 return np.interp(x, self._hcdf, self._hbins)
10084 def _munp(self, n):
10085 """Compute the n-th non-central moment."""
10086 integrals = (self._hbins[1:]**(n+1) - self._hbins[:-1]**(n+1)) / (n+1)
10087 return np.sum(self._hpdf[1:-1] * integrals)
10089 def _entropy(self):
10090 """Compute entropy of distribution"""
10091 res = _lazywhere(self._hpdf[1:-1] > 0.0,
10092 (self._hpdf[1:-1],),
10093 np.log,
10094 0.0)
10095 return -np.sum(self._hpdf[1:-1] * res * self._hbin_widths)
10097 def _updated_ctor_param(self):
10098 """
10099 Set the histogram as additional constructor argument
10100 """
10101 dct = super()._updated_ctor_param()
10102 dct['histogram'] = self._histogram
10103 dct['density'] = self._density
10104 return dct
10107class studentized_range_gen(rv_continuous):
10108 r"""A studentized range continuous random variable.
10110 %(before_notes)s
10112 See Also
10113 --------
10114 t: Student's t distribution
10116 Notes
10117 -----
10118 The probability density function for `studentized_range` is:
10120 .. math::
10122 f(x; k, \nu) = \frac{k(k-1)\nu^{\nu/2}}{\Gamma(\nu/2)
10123 2^{\nu/2-1}} \int_{0}^{\infty} \int_{-\infty}^{\infty}
10124 s^{\nu} e^{-\nu s^2/2} \phi(z) \phi(sx + z)
10125 [\Phi(sx + z) - \Phi(z)]^{k-2} \,dz \,ds
10127 for :math:`x ≥ 0`, :math:`k > 1`, and :math:`\nu > 0`.
10129 `studentized_range` takes ``k`` for :math:`k` and ``df`` for :math:`\nu`
10130 as shape parameters.
10132 When :math:`\nu` exceeds 100,000, an asymptotic approximation (infinite
10133 degrees of freedom) is used to compute the cumulative distribution
10134 function [4]_ and probability distribution function.
10136 %(after_notes)s
10138 References
10139 ----------
10141 .. [1] "Studentized range distribution",
10142 https://en.wikipedia.org/wiki/Studentized_range_distribution
10143 .. [2] Batista, Ben Dêivide, et al. "Externally Studentized Normal Midrange
10144 Distribution." Ciência e Agrotecnologia, vol. 41, no. 4, 2017, pp.
10145 378-389., doi:10.1590/1413-70542017414047716.
10146 .. [3] Harter, H. Leon. "Tables of Range and Studentized Range." The Annals
10147 of Mathematical Statistics, vol. 31, no. 4, 1960, pp. 1122-1147.
10148 JSTOR, www.jstor.org/stable/2237810. Accessed 18 Feb. 2021.
10149 .. [4] Lund, R. E., and J. R. Lund. "Algorithm AS 190: Probabilities and
10150 Upper Quantiles for the Studentized Range." Journal of the Royal
10151 Statistical Society. Series C (Applied Statistics), vol. 32, no. 2,
10152 1983, pp. 204-210. JSTOR, www.jstor.org/stable/2347300. Accessed 18
10153 Feb. 2021.
10155 Examples
10156 --------
10157 >>> import numpy as np
10158 >>> from scipy.stats import studentized_range
10159 >>> import matplotlib.pyplot as plt
10160 >>> fig, ax = plt.subplots(1, 1)
10162 Calculate the first four moments:
10164 >>> k, df = 3, 10
10165 >>> mean, var, skew, kurt = studentized_range.stats(k, df, moments='mvsk')
10167 Display the probability density function (``pdf``):
10169 >>> x = np.linspace(studentized_range.ppf(0.01, k, df),
10170 ... studentized_range.ppf(0.99, k, df), 100)
10171 >>> ax.plot(x, studentized_range.pdf(x, k, df),
10172 ... 'r-', lw=5, alpha=0.6, label='studentized_range pdf')
10174 Alternatively, the distribution object can be called (as a function)
10175 to fix the shape, location and scale parameters. This returns a "frozen"
10176 RV object holding the given parameters fixed.
10178 Freeze the distribution and display the frozen ``pdf``:
10180 >>> rv = studentized_range(k, df)
10181 >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
10183 Check accuracy of ``cdf`` and ``ppf``:
10185 >>> vals = studentized_range.ppf([0.001, 0.5, 0.999], k, df)
10186 >>> np.allclose([0.001, 0.5, 0.999], studentized_range.cdf(vals, k, df))
10187 True
10189 Rather than using (``studentized_range.rvs``) to generate random variates,
10190 which is very slow for this distribution, we can approximate the inverse
10191 CDF using an interpolator, and then perform inverse transform sampling
10192 with this approximate inverse CDF.
10194 This distribution has an infinite but thin right tail, so we focus our
10195 attention on the leftmost 99.9 percent.
10197 >>> a, b = studentized_range.ppf([0, .999], k, df)
10198 >>> a, b
10199 0, 7.41058083802274
10201 >>> from scipy.interpolate import interp1d
10202 >>> rng = np.random.default_rng()
10203 >>> xs = np.linspace(a, b, 50)
10204 >>> cdf = studentized_range.cdf(xs, k, df)
10205 # Create an interpolant of the inverse CDF
10206 >>> ppf = interp1d(cdf, xs, fill_value='extrapolate')
10207 # Perform inverse transform sampling using the interpolant
10208 >>> r = ppf(rng.uniform(size=1000))
10210 And compare the histogram:
10212 >>> ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
10213 >>> ax.legend(loc='best', frameon=False)
10214 >>> plt.show()
10216 """
10218 def _argcheck(self, k, df):
10219 return (k > 1) & (df > 0)
10221 def _shape_info(self):
10222 ik = _ShapeInfo("k", False, (1, np.inf), (False, False))
10223 idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
10224 return [ik, idf]
10226 def _fitstart(self, data):
10227 # Default is k=1, but that is not a valid value of the parameter.
10228 return super(studentized_range_gen, self)._fitstart(data, args=(2, 1))
10230 def _munp(self, K, k, df):
10231 cython_symbol = '_studentized_range_moment'
10232 _a, _b = self._get_support()
10233 # all three of these are used to create a numpy array so they must
10234 # be the same shape.
10236 def _single_moment(K, k, df):
10237 log_const = _stats._studentized_range_pdf_logconst(k, df)
10238 arg = [K, k, df, log_const]
10239 usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
10241 llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
10243 ranges = [(-np.inf, np.inf), (0, np.inf), (_a, _b)]
10244 opts = dict(epsabs=1e-11, epsrel=1e-12)
10246 return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
10248 ufunc = np.frompyfunc(_single_moment, 3, 1)
10249 return np.float64(ufunc(K, k, df))
10251 def _pdf(self, x, k, df):
10253 def _single_pdf(q, k, df):
10254 # The infinite form of the PDF is derived from the infinite
10255 # CDF.
10256 if df < 100000:
10257 cython_symbol = '_studentized_range_pdf'
10258 log_const = _stats._studentized_range_pdf_logconst(k, df)
10259 arg = [q, k, df, log_const]
10260 usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
10261 ranges = [(-np.inf, np.inf), (0, np.inf)]
10263 else:
10264 cython_symbol = '_studentized_range_pdf_asymptotic'
10265 arg = [q, k]
10266 usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
10267 ranges = [(-np.inf, np.inf)]
10269 llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
10270 opts = dict(epsabs=1e-11, epsrel=1e-12)
10271 return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
10273 ufunc = np.frompyfunc(_single_pdf, 3, 1)
10274 return np.float64(ufunc(x, k, df))
10276 def _cdf(self, x, k, df):
10278 def _single_cdf(q, k, df):
10279 # "When the degrees of freedom V are infinite the probability
10280 # integral takes [on a] simpler form," and a single asymptotic
10281 # integral is evaluated rather than the standard double integral.
10282 # (Lund, Lund, page 205)
10283 if df < 100000:
10284 cython_symbol = '_studentized_range_cdf'
10285 log_const = _stats._studentized_range_cdf_logconst(k, df)
10286 arg = [q, k, df, log_const]
10287 usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
10288 ranges = [(-np.inf, np.inf), (0, np.inf)]
10290 else:
10291 cython_symbol = '_studentized_range_cdf_asymptotic'
10292 arg = [q, k]
10293 usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
10294 ranges = [(-np.inf, np.inf)]
10296 llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
10297 opts = dict(epsabs=1e-11, epsrel=1e-12)
10298 return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
10300 ufunc = np.frompyfunc(_single_cdf, 3, 1)
10302 # clip p-values to ensure they are in [0, 1].
10303 return np.clip(np.float64(ufunc(x, k, df)), 0, 1)
10306studentized_range = studentized_range_gen(name='studentized_range', a=0,
10307 b=np.inf)
10310# Collect names of classes and objects in this module.
10311pairs = list(globals().copy().items())
10312_distn_names, _distn_gen_names = get_distribution_names(pairs, rv_continuous)
10314__all__ = _distn_names + _distn_gen_names + ['rv_histogram']