Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_distn_infrastructure.py: 24%
1563 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1#
2# Author: Travis Oliphant 2002-2011 with contributions from
3# SciPy Developers 2004-2011
4#
5from scipy._lib._util import getfullargspec_no_self as _getfullargspec
7import sys
8import keyword
9import re
10import types
11import warnings
12from itertools import zip_longest
14from scipy._lib import doccer
15from ._distr_params import distcont, distdiscrete
16from scipy._lib._util import check_random_state
18from scipy.special import comb, entr
20# for root finding for continuous distribution ppf, and max likelihood
21# estimation
22from scipy import optimize
24# for functions of continuous distributions (e.g. moments, entropy, cdf)
25from scipy import integrate
27# to approximate the pdf of a continuous distribution given its cdf
28from scipy._lib._finite_differences import _derivative
30# for scipy.stats.entropy. Attempts to import just that function or file
31# have cause import problems
32from scipy import stats
34from numpy import (arange, putmask, ravel, ones, shape, ndarray, zeros, floor,
35 logical_and, log, sqrt, place, argmax, vectorize, asarray,
36 nan, inf, isinf, NINF, empty)
38import numpy as np
39from ._constants import _XMAX
40from scipy.stats._warnings_errors import FitError
42# These are the docstring parts used for substitution in specific
43# distribution docstrings
45docheaders = {'methods': """\nMethods\n-------\n""",
46 'notes': """\nNotes\n-----\n""",
47 'examples': """\nExamples\n--------\n"""}
49_doc_rvs = """\
50rvs(%(shapes)s, loc=0, scale=1, size=1, random_state=None)
51 Random variates.
52"""
53_doc_pdf = """\
54pdf(x, %(shapes)s, loc=0, scale=1)
55 Probability density function.
56"""
57_doc_logpdf = """\
58logpdf(x, %(shapes)s, loc=0, scale=1)
59 Log of the probability density function.
60"""
61_doc_pmf = """\
62pmf(k, %(shapes)s, loc=0, scale=1)
63 Probability mass function.
64"""
65_doc_logpmf = """\
66logpmf(k, %(shapes)s, loc=0, scale=1)
67 Log of the probability mass function.
68"""
69_doc_cdf = """\
70cdf(x, %(shapes)s, loc=0, scale=1)
71 Cumulative distribution function.
72"""
73_doc_logcdf = """\
74logcdf(x, %(shapes)s, loc=0, scale=1)
75 Log of the cumulative distribution function.
76"""
77_doc_sf = """\
78sf(x, %(shapes)s, loc=0, scale=1)
79 Survival function (also defined as ``1 - cdf``, but `sf` is sometimes more accurate).
80"""
81_doc_logsf = """\
82logsf(x, %(shapes)s, loc=0, scale=1)
83 Log of the survival function.
84"""
85_doc_ppf = """\
86ppf(q, %(shapes)s, loc=0, scale=1)
87 Percent point function (inverse of ``cdf`` --- percentiles).
88"""
89_doc_isf = """\
90isf(q, %(shapes)s, loc=0, scale=1)
91 Inverse survival function (inverse of ``sf``).
92"""
93_doc_moment = """\
94moment(order, %(shapes)s, loc=0, scale=1)
95 Non-central moment of the specified order.
96"""
97_doc_stats = """\
98stats(%(shapes)s, loc=0, scale=1, moments='mv')
99 Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
100"""
101_doc_entropy = """\
102entropy(%(shapes)s, loc=0, scale=1)
103 (Differential) entropy of the RV.
104"""
105_doc_fit = """\
106fit(data)
107 Parameter estimates for generic data.
108 See `scipy.stats.rv_continuous.fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html#scipy.stats.rv_continuous.fit>`__ for detailed documentation of the
109 keyword arguments.
110"""
111_doc_expect = """\
112expect(func, args=(%(shapes_)s), loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds)
113 Expected value of a function (of one argument) with respect to the distribution.
114"""
115_doc_expect_discrete = """\
116expect(func, args=(%(shapes_)s), loc=0, lb=None, ub=None, conditional=False)
117 Expected value of a function (of one argument) with respect to the distribution.
118"""
119_doc_median = """\
120median(%(shapes)s, loc=0, scale=1)
121 Median of the distribution.
122"""
123_doc_mean = """\
124mean(%(shapes)s, loc=0, scale=1)
125 Mean of the distribution.
126"""
127_doc_var = """\
128var(%(shapes)s, loc=0, scale=1)
129 Variance of the distribution.
130"""
131_doc_std = """\
132std(%(shapes)s, loc=0, scale=1)
133 Standard deviation of the distribution.
134"""
135_doc_interval = """\
136interval(confidence, %(shapes)s, loc=0, scale=1)
137 Confidence interval with equal areas around the median.
138"""
139_doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf,
140 _doc_logpdf, _doc_cdf, _doc_logcdf, _doc_sf,
141 _doc_logsf, _doc_ppf, _doc_isf, _doc_moment,
142 _doc_stats, _doc_entropy, _doc_fit,
143 _doc_expect, _doc_median,
144 _doc_mean, _doc_var, _doc_std, _doc_interval])
146_doc_default_longsummary = """\
147As an instance of the `rv_continuous` class, `%(name)s` object inherits from it
148a collection of generic methods (see below for the full list),
149and completes them with details specific for this particular distribution.
150"""
152_doc_default_frozen_note = """
153Alternatively, the object may be called (as a function) to fix the shape,
154location, and scale parameters returning a "frozen" continuous RV object:
156rv = %(name)s(%(shapes)s, loc=0, scale=1)
157 - Frozen RV object with the same methods but holding the given shape,
158 location, and scale fixed.
159"""
160_doc_default_example = """\
161Examples
162--------
163>>> import numpy as np
164>>> from scipy.stats import %(name)s
165>>> import matplotlib.pyplot as plt
166>>> fig, ax = plt.subplots(1, 1)
168Calculate the first four moments:
170%(set_vals_stmt)s
171>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
173Display the probability density function (``pdf``):
175>>> x = np.linspace(%(name)s.ppf(0.01, %(shapes)s),
176... %(name)s.ppf(0.99, %(shapes)s), 100)
177>>> ax.plot(x, %(name)s.pdf(x, %(shapes)s),
178... 'r-', lw=5, alpha=0.6, label='%(name)s pdf')
180Alternatively, the distribution object can be called (as a function)
181to fix the shape, location and scale parameters. This returns a "frozen"
182RV object holding the given parameters fixed.
184Freeze the distribution and display the frozen ``pdf``:
186>>> rv = %(name)s(%(shapes)s)
187>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
189Check accuracy of ``cdf`` and ``ppf``:
191>>> vals = %(name)s.ppf([0.001, 0.5, 0.999], %(shapes)s)
192>>> np.allclose([0.001, 0.5, 0.999], %(name)s.cdf(vals, %(shapes)s))
193True
195Generate random numbers:
197>>> r = %(name)s.rvs(%(shapes)s, size=1000)
199And compare the histogram:
201>>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
202>>> ax.set_xlim([x[0], x[-1]])
203>>> ax.legend(loc='best', frameon=False)
204>>> plt.show()
206"""
208_doc_default_locscale = """\
209The probability density above is defined in the "standardized" form. To shift
210and/or scale the distribution use the ``loc`` and ``scale`` parameters.
211Specifically, ``%(name)s.pdf(x, %(shapes)s, loc, scale)`` is identically
212equivalent to ``%(name)s.pdf(y, %(shapes)s) / scale`` with
213``y = (x - loc) / scale``. Note that shifting the location of a distribution
214does not make it a "noncentral" distribution; noncentral generalizations of
215some distributions are available in separate classes.
216"""
218_doc_default = ''.join([_doc_default_longsummary,
219 _doc_allmethods,
220 '\n',
221 _doc_default_example])
223_doc_default_before_notes = ''.join([_doc_default_longsummary,
224 _doc_allmethods])
226docdict = {
227 'rvs': _doc_rvs,
228 'pdf': _doc_pdf,
229 'logpdf': _doc_logpdf,
230 'cdf': _doc_cdf,
231 'logcdf': _doc_logcdf,
232 'sf': _doc_sf,
233 'logsf': _doc_logsf,
234 'ppf': _doc_ppf,
235 'isf': _doc_isf,
236 'stats': _doc_stats,
237 'entropy': _doc_entropy,
238 'fit': _doc_fit,
239 'moment': _doc_moment,
240 'expect': _doc_expect,
241 'interval': _doc_interval,
242 'mean': _doc_mean,
243 'std': _doc_std,
244 'var': _doc_var,
245 'median': _doc_median,
246 'allmethods': _doc_allmethods,
247 'longsummary': _doc_default_longsummary,
248 'frozennote': _doc_default_frozen_note,
249 'example': _doc_default_example,
250 'default': _doc_default,
251 'before_notes': _doc_default_before_notes,
252 'after_notes': _doc_default_locscale
253}
255# Reuse common content between continuous and discrete docs, change some
256# minor bits.
257docdict_discrete = docdict.copy()
259docdict_discrete['pmf'] = _doc_pmf
260docdict_discrete['logpmf'] = _doc_logpmf
261docdict_discrete['expect'] = _doc_expect_discrete
262_doc_disc_methods = ['rvs', 'pmf', 'logpmf', 'cdf', 'logcdf', 'sf', 'logsf',
263 'ppf', 'isf', 'stats', 'entropy', 'expect', 'median',
264 'mean', 'var', 'std', 'interval']
265for obj in _doc_disc_methods:
266 docdict_discrete[obj] = docdict_discrete[obj].replace(', scale=1', '')
268_doc_disc_methods_err_varname = ['cdf', 'logcdf', 'sf', 'logsf']
269for obj in _doc_disc_methods_err_varname:
270 docdict_discrete[obj] = docdict_discrete[obj].replace('(x, ', '(k, ')
272docdict_discrete.pop('pdf')
273docdict_discrete.pop('logpdf')
275_doc_allmethods = ''.join([docdict_discrete[obj] for obj in _doc_disc_methods])
276docdict_discrete['allmethods'] = docheaders['methods'] + _doc_allmethods
278docdict_discrete['longsummary'] = _doc_default_longsummary.replace(
279 'rv_continuous', 'rv_discrete')
281_doc_default_frozen_note = """
282Alternatively, the object may be called (as a function) to fix the shape and
283location parameters returning a "frozen" discrete RV object:
285rv = %(name)s(%(shapes)s, loc=0)
286 - Frozen RV object with the same methods but holding the given shape and
287 location fixed.
288"""
289docdict_discrete['frozennote'] = _doc_default_frozen_note
291_doc_default_discrete_example = """\
292Examples
293--------
294>>> import numpy as np
295>>> from scipy.stats import %(name)s
296>>> import matplotlib.pyplot as plt
297>>> fig, ax = plt.subplots(1, 1)
299Calculate the first four moments:
301%(set_vals_stmt)s
302>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
304Display the probability mass function (``pmf``):
306>>> x = np.arange(%(name)s.ppf(0.01, %(shapes)s),
307... %(name)s.ppf(0.99, %(shapes)s))
308>>> ax.plot(x, %(name)s.pmf(x, %(shapes)s), 'bo', ms=8, label='%(name)s pmf')
309>>> ax.vlines(x, 0, %(name)s.pmf(x, %(shapes)s), colors='b', lw=5, alpha=0.5)
311Alternatively, the distribution object can be called (as a function)
312to fix the shape and location. This returns a "frozen" RV object holding
313the given parameters fixed.
315Freeze the distribution and display the frozen ``pmf``:
317>>> rv = %(name)s(%(shapes)s)
318>>> ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1,
319... label='frozen pmf')
320>>> ax.legend(loc='best', frameon=False)
321>>> plt.show()
323Check accuracy of ``cdf`` and ``ppf``:
325>>> prob = %(name)s.cdf(x, %(shapes)s)
326>>> np.allclose(x, %(name)s.ppf(prob, %(shapes)s))
327True
329Generate random numbers:
331>>> r = %(name)s.rvs(%(shapes)s, size=1000)
332"""
335_doc_default_discrete_locscale = """\
336The probability mass function above is defined in the "standardized" form.
337To shift distribution use the ``loc`` parameter.
338Specifically, ``%(name)s.pmf(k, %(shapes)s, loc)`` is identically
339equivalent to ``%(name)s.pmf(k - loc, %(shapes)s)``.
340"""
342docdict_discrete['example'] = _doc_default_discrete_example
343docdict_discrete['after_notes'] = _doc_default_discrete_locscale
345_doc_default_before_notes = ''.join([docdict_discrete['longsummary'],
346 docdict_discrete['allmethods']])
347docdict_discrete['before_notes'] = _doc_default_before_notes
349_doc_default_disc = ''.join([docdict_discrete['longsummary'],
350 docdict_discrete['allmethods'],
351 docdict_discrete['frozennote'],
352 docdict_discrete['example']])
353docdict_discrete['default'] = _doc_default_disc
355# clean up all the separate docstring elements, we do not need them anymore
356for obj in [s for s in dir() if s.startswith('_doc_')]:
357 exec('del ' + obj)
358del obj
361def _moment(data, n, mu=None):
362 if mu is None:
363 mu = data.mean()
364 return ((data - mu)**n).mean()
367def _moment_from_stats(n, mu, mu2, g1, g2, moment_func, args):
368 if (n == 0):
369 return 1.0
370 elif (n == 1):
371 if mu is None:
372 val = moment_func(1, *args)
373 else:
374 val = mu
375 elif (n == 2):
376 if mu2 is None or mu is None:
377 val = moment_func(2, *args)
378 else:
379 val = mu2 + mu*mu
380 elif (n == 3):
381 if g1 is None or mu2 is None or mu is None:
382 val = moment_func(3, *args)
383 else:
384 mu3 = g1 * np.power(mu2, 1.5) # 3rd central moment
385 val = mu3+3*mu*mu2+mu*mu*mu # 3rd non-central moment
386 elif (n == 4):
387 if g1 is None or g2 is None or mu2 is None or mu is None:
388 val = moment_func(4, *args)
389 else:
390 mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment
391 mu3 = g1*np.power(mu2, 1.5) # 3rd central moment
392 val = mu4+4*mu*mu3+6*mu*mu*mu2+mu*mu*mu*mu
393 else:
394 val = moment_func(n, *args)
396 return val
399def _skew(data):
400 """
401 skew is third central moment / variance**(1.5)
402 """
403 data = np.ravel(data)
404 mu = data.mean()
405 m2 = ((data - mu)**2).mean()
406 m3 = ((data - mu)**3).mean()
407 return m3 / np.power(m2, 1.5)
410def _kurtosis(data):
411 """kurtosis is fourth central moment / variance**2 - 3."""
412 data = np.ravel(data)
413 mu = data.mean()
414 m2 = ((data - mu)**2).mean()
415 m4 = ((data - mu)**4).mean()
416 return m4 / m2**2 - 3
419def _fit_determine_optimizer(optimizer):
420 if not callable(optimizer) and isinstance(optimizer, str):
421 if not optimizer.startswith('fmin_'):
422 optimizer = "fmin_"+optimizer
423 if optimizer == 'fmin_':
424 optimizer = 'fmin'
425 try:
426 optimizer = getattr(optimize, optimizer)
427 except AttributeError as e:
428 raise ValueError("%s is not a valid optimizer" % optimizer) from e
429 return optimizer
432# Frozen RV class
433class rv_frozen:
435 def __init__(self, dist, *args, **kwds):
436 self.args = args
437 self.kwds = kwds
439 # create a new instance
440 self.dist = dist.__class__(**dist._updated_ctor_param())
442 shapes, _, _ = self.dist._parse_args(*args, **kwds)
443 self.a, self.b = self.dist._get_support(*shapes)
445 @property
446 def random_state(self):
447 return self.dist._random_state
449 @random_state.setter
450 def random_state(self, seed):
451 self.dist._random_state = check_random_state(seed)
453 def cdf(self, x):
454 return self.dist.cdf(x, *self.args, **self.kwds)
456 def logcdf(self, x):
457 return self.dist.logcdf(x, *self.args, **self.kwds)
459 def ppf(self, q):
460 return self.dist.ppf(q, *self.args, **self.kwds)
462 def isf(self, q):
463 return self.dist.isf(q, *self.args, **self.kwds)
465 def rvs(self, size=None, random_state=None):
466 kwds = self.kwds.copy()
467 kwds.update({'size': size, 'random_state': random_state})
468 return self.dist.rvs(*self.args, **kwds)
470 def sf(self, x):
471 return self.dist.sf(x, *self.args, **self.kwds)
473 def logsf(self, x):
474 return self.dist.logsf(x, *self.args, **self.kwds)
476 def stats(self, moments='mv'):
477 kwds = self.kwds.copy()
478 kwds.update({'moments': moments})
479 return self.dist.stats(*self.args, **kwds)
481 def median(self):
482 return self.dist.median(*self.args, **self.kwds)
484 def mean(self):
485 return self.dist.mean(*self.args, **self.kwds)
487 def var(self):
488 return self.dist.var(*self.args, **self.kwds)
490 def std(self):
491 return self.dist.std(*self.args, **self.kwds)
493 def moment(self, order=None, **kwds):
494 return self.dist.moment(order, *self.args, **self.kwds, **kwds)
496 def entropy(self):
497 return self.dist.entropy(*self.args, **self.kwds)
499 def interval(self, confidence=None, **kwds):
500 return self.dist.interval(confidence, *self.args, **self.kwds, **kwds)
502 def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
503 # expect method only accepts shape parameters as positional args
504 # hence convert self.args, self.kwds, also loc/scale
505 # See the .expect method docstrings for the meaning of
506 # other parameters.
507 a, loc, scale = self.dist._parse_args(*self.args, **self.kwds)
508 if isinstance(self.dist, rv_discrete):
509 return self.dist.expect(func, a, loc, lb, ub, conditional, **kwds)
510 else:
511 return self.dist.expect(func, a, loc, scale, lb, ub,
512 conditional, **kwds)
514 def support(self):
515 return self.dist.support(*self.args, **self.kwds)
518class rv_discrete_frozen(rv_frozen):
520 def pmf(self, k):
521 return self.dist.pmf(k, *self.args, **self.kwds)
523 def logpmf(self, k): # No error
524 return self.dist.logpmf(k, *self.args, **self.kwds)
527class rv_continuous_frozen(rv_frozen):
529 def pdf(self, x):
530 return self.dist.pdf(x, *self.args, **self.kwds)
532 def logpdf(self, x):
533 return self.dist.logpdf(x, *self.args, **self.kwds)
536def argsreduce(cond, *args):
537 """Clean arguments to:
539 1. Ensure all arguments are iterable (arrays of dimension at least one
540 2. If cond != True and size > 1, ravel(args[i]) where ravel(condition) is
541 True, in 1D.
543 Return list of processed arguments.
545 Examples
546 --------
547 >>> import numpy as np
548 >>> rng = np.random.default_rng()
549 >>> A = rng.random((4, 5))
550 >>> B = 2
551 >>> C = rng.random((1, 5))
552 >>> cond = np.ones(A.shape)
553 >>> [A1, B1, C1] = argsreduce(cond, A, B, C)
554 >>> A1.shape
555 (4, 5)
556 >>> B1.shape
557 (1,)
558 >>> C1.shape
559 (1, 5)
560 >>> cond[2,:] = 0
561 >>> [A1, B1, C1] = argsreduce(cond, A, B, C)
562 >>> A1.shape
563 (15,)
564 >>> B1.shape
565 (1,)
566 >>> C1.shape
567 (15,)
569 """
570 # some distributions assume arguments are iterable.
571 newargs = np.atleast_1d(*args)
573 # np.atleast_1d returns an array if only one argument, or a list of arrays
574 # if more than one argument.
575 if not isinstance(newargs, list):
576 newargs = [newargs, ]
578 if np.all(cond):
579 # broadcast arrays with cond
580 *newargs, cond = np.broadcast_arrays(*newargs, cond)
581 return [arg.ravel() for arg in newargs]
583 s = cond.shape
584 # np.extract returns flattened arrays, which are not broadcastable together
585 # unless they are either the same size or size == 1.
586 return [(arg if np.size(arg) == 1
587 else np.extract(cond, np.broadcast_to(arg, s)))
588 for arg in newargs]
591parse_arg_template = """
592def _parse_args(self, %(shape_arg_str)s %(locscale_in)s):
593 return (%(shape_arg_str)s), %(locscale_out)s
595def _parse_args_rvs(self, %(shape_arg_str)s %(locscale_in)s, size=None):
596 return self._argcheck_rvs(%(shape_arg_str)s %(locscale_out)s, size=size)
598def _parse_args_stats(self, %(shape_arg_str)s %(locscale_in)s, moments='mv'):
599 return (%(shape_arg_str)s), %(locscale_out)s, moments
600"""
603class rv_generic:
604 """Class which encapsulates common functionality between rv_discrete
605 and rv_continuous.
607 """
608 def __init__(self, seed=None):
609 super().__init__()
611 # figure out if _stats signature has 'moments' keyword
612 sig = _getfullargspec(self._stats)
613 self._stats_has_moments = ((sig.varkw is not None) or
614 ('moments' in sig.args) or
615 ('moments' in sig.kwonlyargs))
616 self._random_state = check_random_state(seed)
618 @property
619 def random_state(self):
620 """Get or set the generator object for generating random variates.
622 If `random_state` is None (or `np.random`), the
623 `numpy.random.RandomState` singleton is used.
624 If `random_state` is an int, a new ``RandomState`` instance is used,
625 seeded with `random_state`.
626 If `random_state` is already a ``Generator`` or ``RandomState``
627 instance, that instance is used.
629 """
630 return self._random_state
632 @random_state.setter
633 def random_state(self, seed):
634 self._random_state = check_random_state(seed)
636 def __setstate__(self, state):
637 try:
638 self.__dict__.update(state)
639 # attaches the dynamically created methods on each instance.
640 # if a subclass overrides rv_generic.__setstate__, or implements
641 # it's own _attach_methods, then it must make sure that
642 # _attach_argparser_methods is called.
643 self._attach_methods()
644 except ValueError:
645 # reconstitute an old pickle scipy<1.6, that contains
646 # (_ctor_param, random_state) as state
647 self._ctor_param = state[0]
648 self._random_state = state[1]
649 self.__init__()
651 def _attach_methods(self):
652 """Attaches dynamically created methods to the rv_* instance.
654 This method must be overridden by subclasses, and must itself call
655 _attach_argparser_methods. This method is called in __init__ in
656 subclasses, and in __setstate__
657 """
658 raise NotImplementedError
660 def _attach_argparser_methods(self):
661 """
662 Generates the argument-parsing functions dynamically and attaches
663 them to the instance.
665 Should be called from `_attach_methods`, typically in __init__ and
666 during unpickling (__setstate__)
667 """
668 ns = {}
669 exec(self._parse_arg_template, ns)
670 # NB: attach to the instance, not class
671 for name in ['_parse_args', '_parse_args_stats', '_parse_args_rvs']:
672 setattr(self, name, types.MethodType(ns[name], self))
674 def _construct_argparser(
675 self, meths_to_inspect, locscale_in, locscale_out):
676 """Construct the parser string for the shape arguments.
678 This method should be called in __init__ of a class for each
679 distribution. It creates the `_parse_arg_template` attribute that is
680 then used by `_attach_argparser_methods` to dynamically create and
681 attach the `_parse_args`, `_parse_args_stats`, `_parse_args_rvs`
682 methods to the instance.
684 If self.shapes is a non-empty string, interprets it as a
685 comma-separated list of shape parameters.
687 Otherwise inspects the call signatures of `meths_to_inspect`
688 and constructs the argument-parsing functions from these.
689 In this case also sets `shapes` and `numargs`.
690 """
692 if self.shapes:
693 # sanitize the user-supplied shapes
694 if not isinstance(self.shapes, str):
695 raise TypeError('shapes must be a string.')
697 shapes = self.shapes.replace(',', ' ').split()
699 for field in shapes:
700 if keyword.iskeyword(field):
701 raise SyntaxError('keywords cannot be used as shapes.')
702 if not re.match('^[_a-zA-Z][_a-zA-Z0-9]*$', field):
703 raise SyntaxError(
704 'shapes must be valid python identifiers')
705 else:
706 # find out the call signatures (_pdf, _cdf etc), deduce shape
707 # arguments. Generic methods only have 'self, x', any further args
708 # are shapes.
709 shapes_list = []
710 for meth in meths_to_inspect:
711 shapes_args = _getfullargspec(meth) # NB does not contain self
712 args = shapes_args.args[1:] # peel off 'x', too
714 if args:
715 shapes_list.append(args)
717 # *args or **kwargs are not allowed w/automatic shapes
718 if shapes_args.varargs is not None:
719 raise TypeError(
720 '*args are not allowed w/out explicit shapes')
721 if shapes_args.varkw is not None:
722 raise TypeError(
723 '**kwds are not allowed w/out explicit shapes')
724 if shapes_args.kwonlyargs:
725 raise TypeError(
726 'kwonly args are not allowed w/out explicit shapes')
727 if shapes_args.defaults is not None:
728 raise TypeError('defaults are not allowed for shapes')
730 if shapes_list:
731 shapes = shapes_list[0]
733 # make sure the signatures are consistent
734 for item in shapes_list:
735 if item != shapes:
736 raise TypeError('Shape arguments are inconsistent.')
737 else:
738 shapes = []
740 # have the arguments, construct the method from template
741 shapes_str = ', '.join(shapes) + ', ' if shapes else '' # NB: not None
742 dct = dict(shape_arg_str=shapes_str,
743 locscale_in=locscale_in,
744 locscale_out=locscale_out,
745 )
747 # this string is used by _attach_argparser_methods
748 self._parse_arg_template = parse_arg_template % dct
750 self.shapes = ', '.join(shapes) if shapes else None
751 if not hasattr(self, 'numargs'):
752 # allows more general subclassing with *args
753 self.numargs = len(shapes)
755 def _construct_doc(self, docdict, shapes_vals=None):
756 """Construct the instance docstring with string substitutions."""
757 tempdict = docdict.copy()
758 tempdict['name'] = self.name or 'distname'
759 tempdict['shapes'] = self.shapes or ''
761 if shapes_vals is None:
762 shapes_vals = ()
763 vals = ', '.join('%.3g' % val for val in shapes_vals)
764 tempdict['vals'] = vals
766 tempdict['shapes_'] = self.shapes or ''
767 if self.shapes and self.numargs == 1:
768 tempdict['shapes_'] += ','
770 if self.shapes:
771 tempdict['set_vals_stmt'] = '>>> %s = %s' % (self.shapes, vals)
772 else:
773 tempdict['set_vals_stmt'] = ''
775 if self.shapes is None:
776 # remove shapes from call parameters if there are none
777 for item in ['default', 'before_notes']:
778 tempdict[item] = tempdict[item].replace(
779 "\n%(shapes)s : array_like\n shape parameters", "")
780 for i in range(2):
781 if self.shapes is None:
782 # necessary because we use %(shapes)s in two forms (w w/o ", ")
783 self.__doc__ = self.__doc__.replace("%(shapes)s, ", "")
784 try:
785 self.__doc__ = doccer.docformat(self.__doc__, tempdict)
786 except TypeError as e:
787 raise Exception("Unable to construct docstring for "
788 "distribution \"%s\": %s" %
789 (self.name, repr(e))) from e
791 # correct for empty shapes
792 self.__doc__ = self.__doc__.replace('(, ', '(').replace(', )', ')')
794 def _construct_default_doc(self, longname=None, extradoc=None,
795 docdict=None, discrete='continuous'):
796 """Construct instance docstring from the default template."""
797 if longname is None:
798 longname = 'A'
799 if extradoc is None:
800 extradoc = ''
801 if extradoc.startswith('\n\n'):
802 extradoc = extradoc[2:]
803 self.__doc__ = ''.join(['%s %s random variable.' % (longname, discrete),
804 '\n\n%(before_notes)s\n', docheaders['notes'],
805 extradoc, '\n%(example)s'])
806 self._construct_doc(docdict)
808 def freeze(self, *args, **kwds):
809 """Freeze the distribution for the given arguments.
811 Parameters
812 ----------
813 arg1, arg2, arg3,... : array_like
814 The shape parameter(s) for the distribution. Should include all
815 the non-optional arguments, may include ``loc`` and ``scale``.
817 Returns
818 -------
819 rv_frozen : rv_frozen instance
820 The frozen distribution.
822 """
823 if isinstance(self, rv_continuous):
824 return rv_continuous_frozen(self, *args, **kwds)
825 else:
826 return rv_discrete_frozen(self, *args, **kwds)
828 def __call__(self, *args, **kwds):
829 return self.freeze(*args, **kwds)
830 __call__.__doc__ = freeze.__doc__
832 # The actual calculation functions (no basic checking need be done)
833 # If these are defined, the others won't be looked at.
834 # Otherwise, the other set can be defined.
835 def _stats(self, *args, **kwds):
836 return None, None, None, None
838 # Noncentral moments (also known as the moment about the origin).
839 # Expressed in LaTeX, munp would be $\mu'_{n}$, i.e. "mu-sub-n-prime".
840 # The primed mu is a widely used notation for the noncentral moment.
841 def _munp(self, n, *args):
842 # Silence floating point warnings from integration.
843 with np.errstate(all='ignore'):
844 vals = self.generic_moment(n, *args)
845 return vals
847 def _argcheck_rvs(self, *args, **kwargs):
848 # Handle broadcasting and size validation of the rvs method.
849 # Subclasses should not have to override this method.
850 # The rule is that if `size` is not None, then `size` gives the
851 # shape of the result (integer values of `size` are treated as
852 # tuples with length 1; i.e. `size=3` is the same as `size=(3,)`.)
853 #
854 # `args` is expected to contain the shape parameters (if any), the
855 # location and the scale in a flat tuple (e.g. if there are two
856 # shape parameters `a` and `b`, `args` will be `(a, b, loc, scale)`).
857 # The only keyword argument expected is 'size'.
858 size = kwargs.get('size', None)
859 all_bcast = np.broadcast_arrays(*args)
861 def squeeze_left(a):
862 while a.ndim > 0 and a.shape[0] == 1:
863 a = a[0]
864 return a
866 # Eliminate trivial leading dimensions. In the convention
867 # used by numpy's random variate generators, trivial leading
868 # dimensions are effectively ignored. In other words, when `size`
869 # is given, trivial leading dimensions of the broadcast parameters
870 # in excess of the number of dimensions in size are ignored, e.g.
871 # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]], size=3)
872 # array([ 1.00104267, 3.00422496, 4.99799278])
873 # If `size` is not given, the exact broadcast shape is preserved:
874 # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]])
875 # array([[[[ 1.00862899, 3.00061431, 4.99867122]]]])
876 #
877 all_bcast = [squeeze_left(a) for a in all_bcast]
878 bcast_shape = all_bcast[0].shape
879 bcast_ndim = all_bcast[0].ndim
881 if size is None:
882 size_ = bcast_shape
883 else:
884 size_ = tuple(np.atleast_1d(size))
886 # Check compatibility of size_ with the broadcast shape of all
887 # the parameters. This check is intended to be consistent with
888 # how the numpy random variate generators (e.g. np.random.normal,
889 # np.random.beta) handle their arguments. The rule is that, if size
890 # is given, it determines the shape of the output. Broadcasting
891 # can't change the output size.
893 # This is the standard broadcasting convention of extending the
894 # shape with fewer dimensions with enough dimensions of length 1
895 # so that the two shapes have the same number of dimensions.
896 ndiff = bcast_ndim - len(size_)
897 if ndiff < 0:
898 bcast_shape = (1,)*(-ndiff) + bcast_shape
899 elif ndiff > 0:
900 size_ = (1,)*ndiff + size_
902 # This compatibility test is not standard. In "regular" broadcasting,
903 # two shapes are compatible if for each dimension, the lengths are the
904 # same or one of the lengths is 1. Here, the length of a dimension in
905 # size_ must not be less than the corresponding length in bcast_shape.
906 ok = all([bcdim == 1 or bcdim == szdim
907 for (bcdim, szdim) in zip(bcast_shape, size_)])
908 if not ok:
909 raise ValueError("size does not match the broadcast shape of "
910 "the parameters. %s, %s, %s" % (size, size_,
911 bcast_shape))
913 param_bcast = all_bcast[:-2]
914 loc_bcast = all_bcast[-2]
915 scale_bcast = all_bcast[-1]
917 return param_bcast, loc_bcast, scale_bcast, size_
919 # These are the methods you must define (standard form functions)
920 # NB: generic _pdf, _logpdf, _cdf are different for
921 # rv_continuous and rv_discrete hence are defined in there
922 def _argcheck(self, *args):
923 """Default check for correct values on args and keywords.
925 Returns condition array of 1's where arguments are correct and
926 0's where they are not.
928 """
929 cond = 1
930 for arg in args:
931 cond = logical_and(cond, (asarray(arg) > 0))
932 return cond
934 def _get_support(self, *args, **kwargs):
935 """Return the support of the (unscaled, unshifted) distribution.
937 *Must* be overridden by distributions which have support dependent
938 upon the shape parameters of the distribution. Any such override
939 *must not* set or change any of the class members, as these members
940 are shared amongst all instances of the distribution.
942 Parameters
943 ----------
944 arg1, arg2, ... : array_like
945 The shape parameter(s) for the distribution (see docstring of the
946 instance object for more information).
948 Returns
949 -------
950 a, b : numeric (float, or int or +/-np.inf)
951 end-points of the distribution's support for the specified
952 shape parameters.
953 """
954 return self.a, self.b
956 def _support_mask(self, x, *args):
957 a, b = self._get_support(*args)
958 with np.errstate(invalid='ignore'):
959 return (a <= x) & (x <= b)
961 def _open_support_mask(self, x, *args):
962 a, b = self._get_support(*args)
963 with np.errstate(invalid='ignore'):
964 return (a < x) & (x < b)
966 def _rvs(self, *args, size=None, random_state=None):
967 # This method must handle size being a tuple, and it must
968 # properly broadcast *args and size. size might be
969 # an empty tuple, which means a scalar random variate is to be
970 # generated.
972 # Use basic inverse cdf algorithm for RV generation as default.
973 U = random_state.uniform(size=size)
974 Y = self._ppf(U, *args)
975 return Y
977 def _logcdf(self, x, *args):
978 with np.errstate(divide='ignore'):
979 return log(self._cdf(x, *args))
981 def _sf(self, x, *args):
982 return 1.0-self._cdf(x, *args)
984 def _logsf(self, x, *args):
985 with np.errstate(divide='ignore'):
986 return log(self._sf(x, *args))
988 def _ppf(self, q, *args):
989 return self._ppfvec(q, *args)
991 def _isf(self, q, *args):
992 return self._ppf(1.0-q, *args) # use correct _ppf for subclasses
994 # These are actually called, and should not be overwritten if you
995 # want to keep error checking.
996 def rvs(self, *args, **kwds):
997 """Random variates of given type.
999 Parameters
1000 ----------
1001 arg1, arg2, arg3,... : array_like
1002 The shape parameter(s) for the distribution (see docstring of the
1003 instance object for more information).
1004 loc : array_like, optional
1005 Location parameter (default=0).
1006 scale : array_like, optional
1007 Scale parameter (default=1).
1008 size : int or tuple of ints, optional
1009 Defining number of random variates (default is 1).
1010 random_state : {None, int, `numpy.random.Generator`,
1011 `numpy.random.RandomState`}, optional
1013 If `random_state` is None (or `np.random`), the
1014 `numpy.random.RandomState` singleton is used.
1015 If `random_state` is an int, a new ``RandomState`` instance is
1016 used, seeded with `random_state`.
1017 If `random_state` is already a ``Generator`` or ``RandomState``
1018 instance, that instance is used.
1020 Returns
1021 -------
1022 rvs : ndarray or scalar
1023 Random variates of given `size`.
1025 """
1026 discrete = kwds.pop('discrete', None)
1027 rndm = kwds.pop('random_state', None)
1028 args, loc, scale, size = self._parse_args_rvs(*args, **kwds)
1029 cond = logical_and(self._argcheck(*args), (scale >= 0))
1030 if not np.all(cond):
1031 message = ("Domain error in arguments. The `scale` parameter must "
1032 "be positive for all distributions, and many "
1033 "distributions have restrictions on shape parameters. "
1034 f"Please see the `scipy.stats.{self.name}` "
1035 "documentation for details.")
1036 raise ValueError(message)
1038 if np.all(scale == 0):
1039 return loc*ones(size, 'd')
1041 # extra gymnastics needed for a custom random_state
1042 if rndm is not None:
1043 random_state_saved = self._random_state
1044 random_state = check_random_state(rndm)
1045 else:
1046 random_state = self._random_state
1048 vals = self._rvs(*args, size=size, random_state=random_state)
1050 vals = vals * scale + loc
1052 # do not forget to restore the _random_state
1053 if rndm is not None:
1054 self._random_state = random_state_saved
1056 # Cast to int if discrete
1057 if discrete and not isinstance(self, rv_sample):
1058 if size == ():
1059 vals = int(vals)
1060 else:
1061 vals = vals.astype(np.int64)
1063 return vals
1065 def stats(self, *args, **kwds):
1066 """Some statistics of the given RV.
1068 Parameters
1069 ----------
1070 arg1, arg2, arg3,... : array_like
1071 The shape parameter(s) for the distribution (see docstring of the
1072 instance object for more information)
1073 loc : array_like, optional
1074 location parameter (default=0)
1075 scale : array_like, optional (continuous RVs only)
1076 scale parameter (default=1)
1077 moments : str, optional
1078 composed of letters ['mvsk'] defining which moments to compute:
1079 'm' = mean,
1080 'v' = variance,
1081 's' = (Fisher's) skew,
1082 'k' = (Fisher's) kurtosis.
1083 (default is 'mv')
1085 Returns
1086 -------
1087 stats : sequence
1088 of requested moments.
1090 """
1091 args, loc, scale, moments = self._parse_args_stats(*args, **kwds)
1092 # scale = 1 by construction for discrete RVs
1093 loc, scale = map(asarray, (loc, scale))
1094 args = tuple(map(asarray, args))
1095 cond = self._argcheck(*args) & (scale > 0) & (loc == loc)
1096 output = []
1097 default = np.full(shape(cond), fill_value=self.badvalue)
1099 # Use only entries that are valid in calculation
1100 if np.any(cond):
1101 goodargs = argsreduce(cond, *(args+(scale, loc)))
1102 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
1104 if self._stats_has_moments:
1105 mu, mu2, g1, g2 = self._stats(*goodargs,
1106 **{'moments': moments})
1107 else:
1108 mu, mu2, g1, g2 = self._stats(*goodargs)
1110 if 'm' in moments:
1111 if mu is None:
1112 mu = self._munp(1, *goodargs)
1113 out0 = default.copy()
1114 place(out0, cond, mu * scale + loc)
1115 output.append(out0)
1117 if 'v' in moments:
1118 if mu2 is None:
1119 mu2p = self._munp(2, *goodargs)
1120 if mu is None:
1121 mu = self._munp(1, *goodargs)
1122 # if mean is inf then var is also inf
1123 with np.errstate(invalid='ignore'):
1124 mu2 = np.where(~np.isinf(mu), mu2p - mu**2, np.inf)
1125 out0 = default.copy()
1126 place(out0, cond, mu2 * scale * scale)
1127 output.append(out0)
1129 if 's' in moments:
1130 if g1 is None:
1131 mu3p = self._munp(3, *goodargs)
1132 if mu is None:
1133 mu = self._munp(1, *goodargs)
1134 if mu2 is None:
1135 mu2p = self._munp(2, *goodargs)
1136 mu2 = mu2p - mu * mu
1137 with np.errstate(invalid='ignore'):
1138 mu3 = (-mu*mu - 3*mu2)*mu + mu3p
1139 g1 = mu3 / np.power(mu2, 1.5)
1140 out0 = default.copy()
1141 place(out0, cond, g1)
1142 output.append(out0)
1144 if 'k' in moments:
1145 if g2 is None:
1146 mu4p = self._munp(4, *goodargs)
1147 if mu is None:
1148 mu = self._munp(1, *goodargs)
1149 if mu2 is None:
1150 mu2p = self._munp(2, *goodargs)
1151 mu2 = mu2p - mu * mu
1152 if g1 is None:
1153 mu3 = None
1154 else:
1155 # (mu2**1.5) breaks down for nan and inf
1156 mu3 = g1 * np.power(mu2, 1.5)
1157 if mu3 is None:
1158 mu3p = self._munp(3, *goodargs)
1159 with np.errstate(invalid='ignore'):
1160 mu3 = (-mu * mu - 3 * mu2) * mu + mu3p
1161 with np.errstate(invalid='ignore'):
1162 mu4 = ((-mu**2 - 6*mu2) * mu - 4*mu3)*mu + mu4p
1163 g2 = mu4 / mu2**2.0 - 3.0
1164 out0 = default.copy()
1165 place(out0, cond, g2)
1166 output.append(out0)
1167 else: # no valid args
1168 output = [default.copy() for _ in moments]
1170 output = [out[()] for out in output]
1171 if len(output) == 1:
1172 return output[0]
1173 else:
1174 return tuple(output)
1176 def entropy(self, *args, **kwds):
1177 """Differential entropy of the RV.
1179 Parameters
1180 ----------
1181 arg1, arg2, arg3,... : array_like
1182 The shape parameter(s) for the distribution (see docstring of the
1183 instance object for more information).
1184 loc : array_like, optional
1185 Location parameter (default=0).
1186 scale : array_like, optional (continuous distributions only).
1187 Scale parameter (default=1).
1189 Notes
1190 -----
1191 Entropy is defined base `e`:
1193 >>> import numpy as np
1194 >>> drv = rv_discrete(values=((0, 1), (0.5, 0.5)))
1195 >>> np.allclose(drv.entropy(), np.log(2.0))
1196 True
1198 """
1199 args, loc, scale = self._parse_args(*args, **kwds)
1200 # NB: for discrete distributions scale=1 by construction in _parse_args
1201 loc, scale = map(asarray, (loc, scale))
1202 args = tuple(map(asarray, args))
1203 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
1204 output = zeros(shape(cond0), 'd')
1205 place(output, (1-cond0), self.badvalue)
1206 goodargs = argsreduce(cond0, scale, *args)
1207 goodscale = goodargs[0]
1208 goodargs = goodargs[1:]
1209 place(output, cond0, self.vecentropy(*goodargs) + log(goodscale))
1210 return output[()]
1212 def moment(self, order=None, *args, **kwds):
1213 """non-central moment of distribution of specified order.
1215 .. deprecated:: 1.9.0
1216 Parameter `n` is replaced by parameter `order` to avoid name
1217 collisions with the shape parameter `n` of several distributions.
1218 Parameter `n` will be removed in SciPy 1.11.0.
1220 Parameters
1221 ----------
1222 order : int, order >= 1
1223 Order of moment.
1224 arg1, arg2, arg3,... : float
1225 The shape parameter(s) for the distribution (see docstring of the
1226 instance object for more information).
1227 loc : array_like, optional
1228 location parameter (default=0)
1229 scale : array_like, optional
1230 scale parameter (default=1)
1232 """
1233 # This function was originally written with parameter `n`, but `n`
1234 # is also the name of many distribution shape parameters.
1235 # This block allows the function to accept both `n` and its
1236 # replacement `order` during a deprecation period; it can be removed
1237 # in the second release after 1.9.0.
1238 # The logic to provide a DeprecationWarning only when `n` is passed
1239 # as a keyword, accept the new keyword `order`, and otherwise be
1240 # backward-compatible deserves explanation. We need to look out for
1241 # the following:
1242 # * Does the distribution have a shape named `n`?
1243 # * Is `order` provided? It doesn't matter whether it is provided as a
1244 # positional or keyword argument; it will be used as the order of the
1245 # moment rather than a distribution shape parameter because:
1246 # - The first positional argument of `moment` has always been the
1247 # order of the moment.
1248 # - The keyword `order` is new, so it's unambiguous that it refers to
1249 # the order of the moment.
1250 # * Is `n` provided as a keyword argument? It _does_ matter whether it
1251 # is provided as a positional or keyword argument.
1252 # - The first positional argument of `moment` has always been the
1253 # order of moment, but
1254 # - if `n` is provided as a keyword argument, its meaning depends
1255 # on whether the distribution accepts `n` as a shape parameter.
1256 has_shape_n = (self.shapes is not None
1257 and "n" in (self.shapes.split(", ")))
1258 got_order = order is not None
1259 got_keyword_n = kwds.get("n", None) is not None
1261 # These lead to the following cases.
1262 # Case A: If the distribution _does_ accept `n` as a shape
1263 # 1. If both `order` and `n` are provided, this is now OK:
1264 # it is unambiguous that `order` is the order of the moment and `n`
1265 # is the shape parameter. Previously, this would have caused an
1266 # error because `n` was provided both as a keyword argument and
1267 # as the first positional argument. I don't think it is credible for
1268 # users to rely on this error in their code, though, so I don't see
1269 # this as a backward compatibility break.
1270 # 2. If only `n` is provided (as a keyword argument), this would have
1271 # been an error in the past because `n` would have been treated as
1272 # the order of the moment while the shape parameter would be
1273 # missing. It is still the same type of error, but for a different
1274 # reason: now, `n` is treated as the shape parameter while the
1275 # order of the moment is missing.
1276 # 3. If only `order` is provided, no special treament is needed.
1277 # Clearly this value is intended to be the order of the moment,
1278 # and the rest of the function will determine whether `n` is
1279 # available as a shape parameter in `args`.
1280 # 4. If neither `n` nor `order` is provided, this would have been an
1281 # error (order of the moment is not provided) and it is still an
1282 # error for the same reason.
1284 # Case B: the distribution does _not_ accept `n` as a shape
1285 # 1. If both `order` and `n` are provided, this was an error, and it
1286 # still is an error: two values for same parameter.
1287 # 2. If only `n` is provided (as a keyword argument), this was OK and
1288 # is still OK, but there shold now be a `DeprecationWarning`. The
1289 # value of `n` should be removed from `kwds` and stored in `order`.
1290 # 3. If only `order` is provided, there was no problem before providing
1291 # only the first argument of `moment`, and there is no problem with
1292 # that now.
1293 # 4. If neither `n` nor `order` is provided, this would have been an
1294 # error (order of the moment is not provided), and it is still an
1295 # error for the same reason.
1296 if not got_order and ((not got_keyword_n) # A4 and B4
1297 or (got_keyword_n and has_shape_n)): # A2
1298 message = ("moment() missing 1 required "
1299 "positional argument: `order`")
1300 raise TypeError(message)
1302 if got_keyword_n and not has_shape_n:
1303 if got_order: # B1
1304 # this will change to "moment got unexpected argument n"
1305 message = "moment() got multiple values for first argument"
1306 raise TypeError(message)
1307 else: # B2
1308 message = ("Use of keyword argument 'n' for method 'moment is"
1309 " deprecated and will be removed in SciPy 1.11.0. "
1310 "Use first positional argument or keyword argument"
1311 " 'order' instead.")
1312 order = kwds.pop("n")
1313 warnings.warn(message, DeprecationWarning, stacklevel=2)
1314 n = order
1315 # No special treatment of A1, A3, or B3 is needed because the order
1316 # of the moment is now in variable `n` and the shape parameter, if
1317 # needed, will be fished out of `args` or `kwds` by _parse_args
1318 # A3 might still cause an error if the shape parameter called `n`
1319 # is not found in `args`.
1321 shapes, loc, scale = self._parse_args(*args, **kwds)
1322 args = np.broadcast_arrays(*(*shapes, loc, scale))
1323 *shapes, loc, scale = args
1325 i0 = np.logical_and(self._argcheck(*shapes), scale > 0)
1326 i1 = np.logical_and(i0, loc == 0)
1327 i2 = np.logical_and(i0, loc != 0)
1329 args = argsreduce(i0, *shapes, loc, scale)
1330 *shapes, loc, scale = args
1332 if (floor(n) != n):
1333 raise ValueError("Moment must be an integer.")
1334 if (n < 0):
1335 raise ValueError("Moment must be positive.")
1336 mu, mu2, g1, g2 = None, None, None, None
1337 if (n > 0) and (n < 5):
1338 if self._stats_has_moments:
1339 mdict = {'moments': {1: 'm', 2: 'v', 3: 'vs', 4: 'mvsk'}[n]}
1340 else:
1341 mdict = {}
1342 mu, mu2, g1, g2 = self._stats(*shapes, **mdict)
1343 val = np.empty(loc.shape) # val needs to be indexed by loc
1344 val[...] = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, shapes)
1346 # Convert to transformed X = L + S*Y
1347 # E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n, k)*(S/L)^k E[Y^k], k=0...n)
1348 result = zeros(i0.shape)
1349 place(result, ~i0, self.badvalue)
1351 if i1.any():
1352 res1 = scale[loc == 0]**n * val[loc == 0]
1353 place(result, i1, res1)
1355 if i2.any():
1356 mom = [mu, mu2, g1, g2]
1357 arrs = [i for i in mom if i is not None]
1358 idx = [i for i in range(4) if mom[i] is not None]
1359 if any(idx):
1360 arrs = argsreduce(loc != 0, *arrs)
1361 j = 0
1362 for i in idx:
1363 mom[i] = arrs[j]
1364 j += 1
1365 mu, mu2, g1, g2 = mom
1366 args = argsreduce(loc != 0, *shapes, loc, scale, val)
1367 *shapes, loc, scale, val = args
1369 res2 = zeros(loc.shape, dtype='d')
1370 fac = scale / loc
1371 for k in range(n):
1372 valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp,
1373 shapes)
1374 res2 += comb(n, k, exact=True)*fac**k * valk
1375 res2 += fac**n * val
1376 res2 *= loc**n
1377 place(result, i2, res2)
1379 return result[()]
1381 def median(self, *args, **kwds):
1382 """Median of the distribution.
1384 Parameters
1385 ----------
1386 arg1, arg2, arg3,... : array_like
1387 The shape parameter(s) for the distribution (see docstring of the
1388 instance object for more information)
1389 loc : array_like, optional
1390 Location parameter, Default is 0.
1391 scale : array_like, optional
1392 Scale parameter, Default is 1.
1394 Returns
1395 -------
1396 median : float
1397 The median of the distribution.
1399 See Also
1400 --------
1401 rv_discrete.ppf
1402 Inverse of the CDF
1404 """
1405 return self.ppf(0.5, *args, **kwds)
1407 def mean(self, *args, **kwds):
1408 """Mean of the distribution.
1410 Parameters
1411 ----------
1412 arg1, arg2, arg3,... : array_like
1413 The shape parameter(s) for the distribution (see docstring of the
1414 instance object for more information)
1415 loc : array_like, optional
1416 location parameter (default=0)
1417 scale : array_like, optional
1418 scale parameter (default=1)
1420 Returns
1421 -------
1422 mean : float
1423 the mean of the distribution
1425 """
1426 kwds['moments'] = 'm'
1427 res = self.stats(*args, **kwds)
1428 if isinstance(res, ndarray) and res.ndim == 0:
1429 return res[()]
1430 return res
1432 def var(self, *args, **kwds):
1433 """Variance of the distribution.
1435 Parameters
1436 ----------
1437 arg1, arg2, arg3,... : array_like
1438 The shape parameter(s) for the distribution (see docstring of the
1439 instance object for more information)
1440 loc : array_like, optional
1441 location parameter (default=0)
1442 scale : array_like, optional
1443 scale parameter (default=1)
1445 Returns
1446 -------
1447 var : float
1448 the variance of the distribution
1450 """
1451 kwds['moments'] = 'v'
1452 res = self.stats(*args, **kwds)
1453 if isinstance(res, ndarray) and res.ndim == 0:
1454 return res[()]
1455 return res
1457 def std(self, *args, **kwds):
1458 """Standard deviation of the distribution.
1460 Parameters
1461 ----------
1462 arg1, arg2, arg3,... : array_like
1463 The shape parameter(s) for the distribution (see docstring of the
1464 instance object for more information)
1465 loc : array_like, optional
1466 location parameter (default=0)
1467 scale : array_like, optional
1468 scale parameter (default=1)
1470 Returns
1471 -------
1472 std : float
1473 standard deviation of the distribution
1475 """
1476 kwds['moments'] = 'v'
1477 res = sqrt(self.stats(*args, **kwds))
1478 return res
1480 def interval(self, confidence=None, *args, **kwds):
1481 """Confidence interval with equal areas around the median.
1483 .. deprecated:: 1.9.0
1484 Parameter `alpha` is replaced by parameter `confidence` to avoid
1485 name collisions with the shape parameter `alpha` of some
1486 distributions. Parameter `alpha` will be removed in SciPy 1.11.0.
1488 Parameters
1489 ----------
1490 confidence : array_like of float
1491 Probability that an rv will be drawn from the returned range.
1492 Each value should be in the range [0, 1].
1493 arg1, arg2, ... : array_like
1494 The shape parameter(s) for the distribution (see docstring of the
1495 instance object for more information).
1496 loc : array_like, optional
1497 location parameter, Default is 0.
1498 scale : array_like, optional
1499 scale parameter, Default is 1.
1501 Returns
1502 -------
1503 a, b : ndarray of float
1504 end-points of range that contain ``100 * alpha %`` of the rv's
1505 possible values.
1507 Notes
1508 -----
1509 This is implemented as ``ppf([p_tail, 1-p_tail])``, where
1510 ``ppf`` is the inverse cumulative distribution function and
1511 ``p_tail = (1-confidence)/2``. Suppose ``[c, d]`` is the support of a
1512 discrete distribution; then ``ppf([0, 1]) == (c-1, d)``. Therefore,
1513 when ``confidence=1`` and the distribution is discrete, the left end
1514 of the interval will be beyond the support of the distribution.
1515 For discrete distributions, the interval will limit the probability
1516 in each tail to be less than or equal to ``p_tail`` (usually
1517 strictly less).
1519 """
1520 # This function was originally written with parameter `alpha`, but
1521 # `alpha` is also the name of a shape parameter of two distributions.
1522 # This block allows the function to accept both `alpha` and its
1523 # replacement `confidence` during a deprecation period; it can be
1524 # removed in the second release after 1.9.0.
1525 # See description of logic in `moment` method.
1526 has_shape_alpha = (self.shapes is not None
1527 and "alpha" in (self.shapes.split(", ")))
1528 got_confidence = confidence is not None
1529 got_keyword_alpha = kwds.get("alpha", None) is not None
1531 if not got_confidence and ((not got_keyword_alpha)
1532 or (got_keyword_alpha and has_shape_alpha)):
1533 message = ("interval() missing 1 required positional argument: "
1534 "`confidence`")
1535 raise TypeError(message)
1537 if got_keyword_alpha and not has_shape_alpha:
1538 if got_confidence:
1539 # this will change to "interval got unexpected argument alpha"
1540 message = "interval() got multiple values for first argument"
1541 raise TypeError(message)
1542 else:
1543 message = ("Use of keyword argument 'alpha' for method "
1544 "'interval' is deprecated and wil be removed in "
1545 "SciPy 1.11.0. Use first positional argument or "
1546 "keyword argument 'confidence' instead.")
1547 confidence = kwds.pop("alpha")
1548 warnings.warn(message, DeprecationWarning, stacklevel=2)
1549 alpha = confidence
1551 alpha = asarray(alpha)
1552 if np.any((alpha > 1) | (alpha < 0)):
1553 raise ValueError("alpha must be between 0 and 1 inclusive")
1554 q1 = (1.0-alpha)/2
1555 q2 = (1.0+alpha)/2
1556 a = self.ppf(q1, *args, **kwds)
1557 b = self.ppf(q2, *args, **kwds)
1558 return a, b
1560 def support(self, *args, **kwargs):
1561 """Support of the distribution.
1563 Parameters
1564 ----------
1565 arg1, arg2, ... : array_like
1566 The shape parameter(s) for the distribution (see docstring of the
1567 instance object for more information).
1568 loc : array_like, optional
1569 location parameter, Default is 0.
1570 scale : array_like, optional
1571 scale parameter, Default is 1.
1573 Returns
1574 -------
1575 a, b : array_like
1576 end-points of the distribution's support.
1578 """
1579 args, loc, scale = self._parse_args(*args, **kwargs)
1580 arrs = np.broadcast_arrays(*args, loc, scale)
1581 args, loc, scale = arrs[:-2], arrs[-2], arrs[-1]
1582 cond = self._argcheck(*args) & (scale > 0)
1583 _a, _b = self._get_support(*args)
1584 if cond.all():
1585 return _a * scale + loc, _b * scale + loc
1586 elif cond.ndim == 0:
1587 return self.badvalue, self.badvalue
1588 # promote bounds to at least float to fill in the badvalue
1589 _a, _b = np.asarray(_a).astype('d'), np.asarray(_b).astype('d')
1590 out_a, out_b = _a * scale + loc, _b * scale + loc
1591 place(out_a, 1-cond, self.badvalue)
1592 place(out_b, 1-cond, self.badvalue)
1593 return out_a, out_b
1595 def nnlf(self, theta, x):
1596 """Negative loglikelihood function.
1597 Notes
1598 -----
1599 This is ``-sum(log pdf(x, theta), axis=0)`` where `theta` are the
1600 parameters (including loc and scale).
1601 """
1602 loc, scale, args = self._unpack_loc_scale(theta)
1603 if not self._argcheck(*args) or scale <= 0:
1604 return inf
1605 x = (asarray(x)-loc) / scale
1606 n_log_scale = len(x) * log(scale)
1607 if np.any(~self._support_mask(x, *args)):
1608 return inf
1609 return self._nnlf(x, *args) + n_log_scale
1611 def _nnlf(self, x, *args):
1612 return -np.sum(self._logpxf(x, *args), axis=0)
1614 def _nlff_and_penalty(self, x, args, log_fitfun):
1615 # negative log fit function
1616 cond0 = ~self._support_mask(x, *args)
1617 n_bad = np.count_nonzero(cond0, axis=0)
1618 if n_bad > 0:
1619 x = argsreduce(~cond0, x)[0]
1620 logff = log_fitfun(x, *args)
1621 finite_logff = np.isfinite(logff)
1622 n_bad += np.sum(~finite_logff, axis=0)
1623 if n_bad > 0:
1624 penalty = n_bad * log(_XMAX) * 100
1625 return -np.sum(logff[finite_logff], axis=0) + penalty
1626 return -np.sum(logff, axis=0)
1628 def _penalized_nnlf(self, theta, x):
1629 """Penalized negative loglikelihood function.
1630 i.e., - sum (log pdf(x, theta), axis=0) + penalty
1631 where theta are the parameters (including loc and scale)
1632 """
1633 loc, scale, args = self._unpack_loc_scale(theta)
1634 if not self._argcheck(*args) or scale <= 0:
1635 return inf
1636 x = asarray((x-loc) / scale)
1637 n_log_scale = len(x) * log(scale)
1638 return self._nlff_and_penalty(x, args, self._logpxf) + n_log_scale
1640 def _penalized_nlpsf(self, theta, x):
1641 """Penalized negative log product spacing function.
1642 i.e., - sum (log (diff (cdf (x, theta))), axis=0) + penalty
1643 where theta are the parameters (including loc and scale)
1644 Follows reference [1] of scipy.stats.fit
1645 """
1646 loc, scale, args = self._unpack_loc_scale(theta)
1647 if not self._argcheck(*args) or scale <= 0:
1648 return inf
1649 x = (np.sort(x) - loc)/scale
1651 def log_psf(x, *args):
1652 x, lj = np.unique(x, return_counts=True) # fast for sorted x
1653 cdf_data = self._cdf(x, *args) if x.size else []
1654 if not (x.size and 1 - cdf_data[-1] <= 0):
1655 cdf = np.concatenate(([0], cdf_data, [1]))
1656 lj = np.concatenate((lj, [1]))
1657 else:
1658 cdf = np.concatenate(([0], cdf_data))
1659 # here we could use logcdf w/ logsumexp trick to take differences,
1660 # but in the context of the method, it seems unlikely to matter
1661 return lj * np.log(np.diff(cdf) / lj)
1663 return self._nlff_and_penalty(x, args, log_psf)
1666class _ShapeInfo:
1667 def __init__(self, name, integrality=False, domain=(-np.inf, np.inf),
1668 inclusive=(True, True)):
1669 self.name = name
1670 self.integrality = integrality
1672 domain = list(domain)
1673 if np.isfinite(domain[0]) and not inclusive[0]:
1674 domain[0] = np.nextafter(domain[0], np.inf)
1675 if np.isfinite(domain[1]) and not inclusive[1]:
1676 domain[1] = np.nextafter(domain[1], -np.inf)
1677 self.domain = domain
1680def _get_fixed_fit_value(kwds, names):
1681 """
1682 Given names such as `['f0', 'fa', 'fix_a']`, check that there is
1683 at most one non-None value in `kwds` associaed with those names.
1684 Return that value, or None if none of the names occur in `kwds`.
1685 As a side effect, all occurrences of those names in `kwds` are
1686 removed.
1687 """
1688 vals = [(name, kwds.pop(name)) for name in names if name in kwds]
1689 if len(vals) > 1:
1690 repeated = [name for name, val in vals]
1691 raise ValueError("fit method got multiple keyword arguments to "
1692 "specify the same fixed parameter: " +
1693 ', '.join(repeated))
1694 return vals[0][1] if vals else None
1697# continuous random variables: implement maybe later
1698#
1699# hf --- Hazard Function (PDF / SF)
1700# chf --- Cumulative hazard function (-log(SF))
1701# psf --- Probability sparsity function (reciprocal of the pdf) in
1702# units of percent-point-function (as a function of q).
1703# Also, the derivative of the percent-point function.
1706class rv_continuous(rv_generic):
1707 """A generic continuous random variable class meant for subclassing.
1709 `rv_continuous` is a base class to construct specific distribution classes
1710 and instances for continuous random variables. It cannot be used
1711 directly as a distribution.
1713 Parameters
1714 ----------
1715 momtype : int, optional
1716 The type of generic moment calculation to use: 0 for pdf, 1 (default)
1717 for ppf.
1718 a : float, optional
1719 Lower bound of the support of the distribution, default is minus
1720 infinity.
1721 b : float, optional
1722 Upper bound of the support of the distribution, default is plus
1723 infinity.
1724 xtol : float, optional
1725 The tolerance for fixed point calculation for generic ppf.
1726 badvalue : float, optional
1727 The value in a result arrays that indicates a value that for which
1728 some argument restriction is violated, default is np.nan.
1729 name : str, optional
1730 The name of the instance. This string is used to construct the default
1731 example for distributions.
1732 longname : str, optional
1733 This string is used as part of the first line of the docstring returned
1734 when a subclass has no docstring of its own. Note: `longname` exists
1735 for backwards compatibility, do not use for new subclasses.
1736 shapes : str, optional
1737 The shape of the distribution. For example ``"m, n"`` for a
1738 distribution that takes two integers as the two shape arguments for all
1739 its methods. If not provided, shape parameters will be inferred from
1740 the signature of the private methods, ``_pdf`` and ``_cdf`` of the
1741 instance.
1742 extradoc : str, optional, deprecated
1743 This string is used as the last part of the docstring returned when a
1744 subclass has no docstring of its own. Note: `extradoc` exists for
1745 backwards compatibility and will be removed in SciPy 1.11.0, do not
1746 use for new subclasses.
1747 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
1748 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
1749 singleton is used.
1750 If `seed` is an int, a new ``RandomState`` instance is used,
1751 seeded with `seed`.
1752 If `seed` is already a ``Generator`` or ``RandomState`` instance then
1753 that instance is used.
1755 Methods
1756 -------
1757 rvs
1758 pdf
1759 logpdf
1760 cdf
1761 logcdf
1762 sf
1763 logsf
1764 ppf
1765 isf
1766 moment
1767 stats
1768 entropy
1769 expect
1770 median
1771 mean
1772 std
1773 var
1774 interval
1775 __call__
1776 fit
1777 fit_loc_scale
1778 nnlf
1779 support
1781 Notes
1782 -----
1783 Public methods of an instance of a distribution class (e.g., ``pdf``,
1784 ``cdf``) check their arguments and pass valid arguments to private,
1785 computational methods (``_pdf``, ``_cdf``). For ``pdf(x)``, ``x`` is valid
1786 if it is within the support of the distribution.
1787 Whether a shape parameter is valid is decided by an ``_argcheck`` method
1788 (which defaults to checking that its arguments are strictly positive.)
1790 **Subclassing**
1792 New random variables can be defined by subclassing the `rv_continuous` class
1793 and re-defining at least the ``_pdf`` or the ``_cdf`` method (normalized
1794 to location 0 and scale 1).
1796 If positive argument checking is not correct for your RV
1797 then you will also need to re-define the ``_argcheck`` method.
1799 For most of the scipy.stats distributions, the support interval doesn't
1800 depend on the shape parameters. ``x`` being in the support interval is
1801 equivalent to ``self.a <= x <= self.b``. If either of the endpoints of
1802 the support do depend on the shape parameters, then
1803 i) the distribution must implement the ``_get_support`` method; and
1804 ii) those dependent endpoints must be omitted from the distribution's
1805 call to the ``rv_continuous`` initializer.
1807 Correct, but potentially slow defaults exist for the remaining
1808 methods but for speed and/or accuracy you can over-ride::
1810 _logpdf, _cdf, _logcdf, _ppf, _rvs, _isf, _sf, _logsf
1812 The default method ``_rvs`` relies on the inverse of the cdf, ``_ppf``,
1813 applied to a uniform random variate. In order to generate random variates
1814 efficiently, either the default ``_ppf`` needs to be overwritten (e.g.
1815 if the inverse cdf can expressed in an explicit form) or a sampling
1816 method needs to be implemented in a custom ``_rvs`` method.
1818 If possible, you should override ``_isf``, ``_sf`` or ``_logsf``.
1819 The main reason would be to improve numerical accuracy: for example,
1820 the survival function ``_sf`` is computed as ``1 - _cdf`` which can
1821 result in loss of precision if ``_cdf(x)`` is close to one.
1823 **Methods that can be overwritten by subclasses**
1824 ::
1826 _rvs
1827 _pdf
1828 _cdf
1829 _sf
1830 _ppf
1831 _isf
1832 _stats
1833 _munp
1834 _entropy
1835 _argcheck
1836 _get_support
1838 There are additional (internal and private) generic methods that can
1839 be useful for cross-checking and for debugging, but might work in all
1840 cases when directly called.
1842 A note on ``shapes``: subclasses need not specify them explicitly. In this
1843 case, `shapes` will be automatically deduced from the signatures of the
1844 overridden methods (`pdf`, `cdf` etc).
1845 If, for some reason, you prefer to avoid relying on introspection, you can
1846 specify ``shapes`` explicitly as an argument to the instance constructor.
1849 **Frozen Distributions**
1851 Normally, you must provide shape parameters (and, optionally, location and
1852 scale parameters to each call of a method of a distribution.
1854 Alternatively, the object may be called (as a function) to fix the shape,
1855 location, and scale parameters returning a "frozen" continuous RV object:
1857 rv = generic(<shape(s)>, loc=0, scale=1)
1858 `rv_frozen` object with the same methods but holding the given shape,
1859 location, and scale fixed
1861 **Statistics**
1863 Statistics are computed using numerical integration by default.
1864 For speed you can redefine this using ``_stats``:
1866 - take shape parameters and return mu, mu2, g1, g2
1867 - If you can't compute one of these, return it as None
1868 - Can also be defined with a keyword argument ``moments``, which is a
1869 string composed of "m", "v", "s", and/or "k".
1870 Only the components appearing in string should be computed and
1871 returned in the order "m", "v", "s", or "k" with missing values
1872 returned as None.
1874 Alternatively, you can override ``_munp``, which takes ``n`` and shape
1875 parameters and returns the n-th non-central moment of the distribution.
1877 Examples
1878 --------
1879 To create a new Gaussian distribution, we would do the following:
1881 >>> from scipy.stats import rv_continuous
1882 >>> class gaussian_gen(rv_continuous):
1883 ... "Gaussian distribution"
1884 ... def _pdf(self, x):
1885 ... return np.exp(-x**2 / 2.) / np.sqrt(2.0 * np.pi)
1886 >>> gaussian = gaussian_gen(name='gaussian')
1888 ``scipy.stats`` distributions are *instances*, so here we subclass
1889 `rv_continuous` and create an instance. With this, we now have
1890 a fully functional distribution with all relevant methods automagically
1891 generated by the framework.
1893 Note that above we defined a standard normal distribution, with zero mean
1894 and unit variance. Shifting and scaling of the distribution can be done
1895 by using ``loc`` and ``scale`` parameters: ``gaussian.pdf(x, loc, scale)``
1896 essentially computes ``y = (x - loc) / scale`` and
1897 ``gaussian._pdf(y) / scale``.
1899 """
1900 def __init__(self, momtype=1, a=None, b=None, xtol=1e-14,
1901 badvalue=None, name=None, longname=None,
1902 shapes=None, extradoc=None, seed=None):
1904 super().__init__(seed)
1906 if extradoc is not None:
1907 warnings.warn("extradoc is deprecated and will be removed in "
1908 "SciPy 1.11.0", DeprecationWarning)
1910 # save the ctor parameters, cf generic freeze
1911 self._ctor_param = dict(
1912 momtype=momtype, a=a, b=b, xtol=xtol,
1913 badvalue=badvalue, name=name, longname=longname,
1914 shapes=shapes, extradoc=extradoc, seed=seed)
1916 if badvalue is None:
1917 badvalue = nan
1918 if name is None:
1919 name = 'Distribution'
1920 self.badvalue = badvalue
1921 self.name = name
1922 self.a = a
1923 self.b = b
1924 if a is None:
1925 self.a = -inf
1926 if b is None:
1927 self.b = inf
1928 self.xtol = xtol
1929 self.moment_type = momtype
1930 self.shapes = shapes
1931 self.extradoc = extradoc
1933 self._construct_argparser(meths_to_inspect=[self._pdf, self._cdf],
1934 locscale_in='loc=0, scale=1',
1935 locscale_out='loc, scale')
1936 self._attach_methods()
1938 if longname is None:
1939 if name[0] in ['aeiouAEIOU']:
1940 hstr = "An "
1941 else:
1942 hstr = "A "
1943 longname = hstr + name
1945 if sys.flags.optimize < 2:
1946 # Skip adding docstrings if interpreter is run with -OO
1947 if self.__doc__ is None:
1948 self._construct_default_doc(longname=longname,
1949 extradoc=extradoc,
1950 docdict=docdict,
1951 discrete='continuous')
1952 else:
1953 dct = dict(distcont)
1954 self._construct_doc(docdict, dct.get(self.name))
1956 def __getstate__(self):
1957 dct = self.__dict__.copy()
1959 # these methods will be remade in __setstate__
1960 # _random_state attribute is taken care of by rv_generic
1961 attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs",
1962 "_cdfvec", "_ppfvec", "vecentropy", "generic_moment"]
1963 [dct.pop(attr, None) for attr in attrs]
1964 return dct
1966 def _attach_methods(self):
1967 """
1968 Attaches dynamically created methods to the rv_continuous instance.
1969 """
1970 # _attach_methods is responsible for calling _attach_argparser_methods
1971 self._attach_argparser_methods()
1973 # nin correction
1974 self._ppfvec = vectorize(self._ppf_single, otypes='d')
1975 self._ppfvec.nin = self.numargs + 1
1976 self.vecentropy = vectorize(self._entropy, otypes='d')
1977 self._cdfvec = vectorize(self._cdf_single, otypes='d')
1978 self._cdfvec.nin = self.numargs + 1
1980 if self.moment_type == 0:
1981 self.generic_moment = vectorize(self._mom0_sc, otypes='d')
1982 else:
1983 self.generic_moment = vectorize(self._mom1_sc, otypes='d')
1984 # Because of the *args argument of _mom0_sc, vectorize cannot count the
1985 # number of arguments correctly.
1986 self.generic_moment.nin = self.numargs + 1
1988 def _updated_ctor_param(self):
1989 """Return the current version of _ctor_param, possibly updated by user.
1991 Used by freezing.
1992 Keep this in sync with the signature of __init__.
1993 """
1994 dct = self._ctor_param.copy()
1995 dct['a'] = self.a
1996 dct['b'] = self.b
1997 dct['xtol'] = self.xtol
1998 dct['badvalue'] = self.badvalue
1999 dct['name'] = self.name
2000 dct['shapes'] = self.shapes
2001 dct['extradoc'] = self.extradoc
2002 return dct
2004 def _ppf_to_solve(self, x, q, *args):
2005 return self.cdf(*(x, )+args)-q
2007 def _ppf_single(self, q, *args):
2008 factor = 10.
2009 left, right = self._get_support(*args)
2011 if np.isinf(left):
2012 left = min(-factor, right)
2013 while self._ppf_to_solve(left, q, *args) > 0.:
2014 left, right = left * factor, left
2015 # left is now such that cdf(left) <= q
2016 # if right has changed, then cdf(right) > q
2018 if np.isinf(right):
2019 right = max(factor, left)
2020 while self._ppf_to_solve(right, q, *args) < 0.:
2021 left, right = right, right * factor
2022 # right is now such that cdf(right) >= q
2024 return optimize.brentq(self._ppf_to_solve,
2025 left, right, args=(q,)+args, xtol=self.xtol)
2027 # moment from definition
2028 def _mom_integ0(self, x, m, *args):
2029 return x**m * self.pdf(x, *args)
2031 def _mom0_sc(self, m, *args):
2032 _a, _b = self._get_support(*args)
2033 return integrate.quad(self._mom_integ0, _a, _b,
2034 args=(m,)+args)[0]
2036 # moment calculated using ppf
2037 def _mom_integ1(self, q, m, *args):
2038 return (self.ppf(q, *args))**m
2040 def _mom1_sc(self, m, *args):
2041 return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
2043 def _pdf(self, x, *args):
2044 return _derivative(self._cdf, x, dx=1e-5, args=args, order=5)
2046 # Could also define any of these
2047 def _logpdf(self, x, *args):
2048 p = self._pdf(x, *args)
2049 with np.errstate(divide='ignore'):
2050 return log(p)
2052 def _logpxf(self, x, *args):
2053 # continuous distributions have PDF, discrete have PMF, but sometimes
2054 # the distinction doesn't matter. This lets us use `_logpxf` for both
2055 # discrete and continuous distributions.
2056 return self._logpdf(x, *args)
2058 def _cdf_single(self, x, *args):
2059 _a, _b = self._get_support(*args)
2060 return integrate.quad(self._pdf, _a, x, args=args)[0]
2062 def _cdf(self, x, *args):
2063 return self._cdfvec(x, *args)
2065 # generic _argcheck, _logcdf, _sf, _logsf, _ppf, _isf, _rvs are defined
2066 # in rv_generic
2068 def pdf(self, x, *args, **kwds):
2069 """Probability density function at x of the given RV.
2071 Parameters
2072 ----------
2073 x : array_like
2074 quantiles
2075 arg1, arg2, arg3,... : array_like
2076 The shape parameter(s) for the distribution (see docstring of the
2077 instance object for more information)
2078 loc : array_like, optional
2079 location parameter (default=0)
2080 scale : array_like, optional
2081 scale parameter (default=1)
2083 Returns
2084 -------
2085 pdf : ndarray
2086 Probability density function evaluated at x
2088 """
2089 args, loc, scale = self._parse_args(*args, **kwds)
2090 x, loc, scale = map(asarray, (x, loc, scale))
2091 args = tuple(map(asarray, args))
2092 dtyp = np.promote_types(x.dtype, np.float64)
2093 x = np.asarray((x - loc)/scale, dtype=dtyp)
2094 cond0 = self._argcheck(*args) & (scale > 0)
2095 cond1 = self._support_mask(x, *args) & (scale > 0)
2096 cond = cond0 & cond1
2097 output = zeros(shape(cond), dtyp)
2098 putmask(output, (1-cond0)+np.isnan(x), self.badvalue)
2099 if np.any(cond):
2100 goodargs = argsreduce(cond, *((x,)+args+(scale,)))
2101 scale, goodargs = goodargs[-1], goodargs[:-1]
2102 place(output, cond, self._pdf(*goodargs) / scale)
2103 if output.ndim == 0:
2104 return output[()]
2105 return output
2107 def logpdf(self, x, *args, **kwds):
2108 """Log of the probability density function at x of the given RV.
2110 This uses a more numerically accurate calculation if available.
2112 Parameters
2113 ----------
2114 x : array_like
2115 quantiles
2116 arg1, arg2, arg3,... : array_like
2117 The shape parameter(s) for the distribution (see docstring of the
2118 instance object for more information)
2119 loc : array_like, optional
2120 location parameter (default=0)
2121 scale : array_like, optional
2122 scale parameter (default=1)
2124 Returns
2125 -------
2126 logpdf : array_like
2127 Log of the probability density function evaluated at x
2129 """
2130 args, loc, scale = self._parse_args(*args, **kwds)
2131 x, loc, scale = map(asarray, (x, loc, scale))
2132 args = tuple(map(asarray, args))
2133 dtyp = np.promote_types(x.dtype, np.float64)
2134 x = np.asarray((x - loc)/scale, dtype=dtyp)
2135 cond0 = self._argcheck(*args) & (scale > 0)
2136 cond1 = self._support_mask(x, *args) & (scale > 0)
2137 cond = cond0 & cond1
2138 output = empty(shape(cond), dtyp)
2139 output.fill(NINF)
2140 putmask(output, (1-cond0)+np.isnan(x), self.badvalue)
2141 if np.any(cond):
2142 goodargs = argsreduce(cond, *((x,)+args+(scale,)))
2143 scale, goodargs = goodargs[-1], goodargs[:-1]
2144 place(output, cond, self._logpdf(*goodargs) - log(scale))
2145 if output.ndim == 0:
2146 return output[()]
2147 return output
2149 def cdf(self, x, *args, **kwds):
2150 """
2151 Cumulative distribution function of the given RV.
2153 Parameters
2154 ----------
2155 x : array_like
2156 quantiles
2157 arg1, arg2, arg3,... : array_like
2158 The shape parameter(s) for the distribution (see docstring of the
2159 instance object for more information)
2160 loc : array_like, optional
2161 location parameter (default=0)
2162 scale : array_like, optional
2163 scale parameter (default=1)
2165 Returns
2166 -------
2167 cdf : ndarray
2168 Cumulative distribution function evaluated at `x`
2170 """
2171 args, loc, scale = self._parse_args(*args, **kwds)
2172 x, loc, scale = map(asarray, (x, loc, scale))
2173 args = tuple(map(asarray, args))
2174 _a, _b = self._get_support(*args)
2175 dtyp = np.promote_types(x.dtype, np.float64)
2176 x = np.asarray((x - loc)/scale, dtype=dtyp)
2177 cond0 = self._argcheck(*args) & (scale > 0)
2178 cond1 = self._open_support_mask(x, *args) & (scale > 0)
2179 cond2 = (x >= np.asarray(_b)) & cond0
2180 cond = cond0 & cond1
2181 output = zeros(shape(cond), dtyp)
2182 place(output, (1-cond0)+np.isnan(x), self.badvalue)
2183 place(output, cond2, 1.0)
2184 if np.any(cond): # call only if at least 1 entry
2185 goodargs = argsreduce(cond, *((x,)+args))
2186 place(output, cond, self._cdf(*goodargs))
2187 if output.ndim == 0:
2188 return output[()]
2189 return output
2191 def logcdf(self, x, *args, **kwds):
2192 """Log of the cumulative distribution function at x of the given RV.
2194 Parameters
2195 ----------
2196 x : array_like
2197 quantiles
2198 arg1, arg2, arg3,... : array_like
2199 The shape parameter(s) for the distribution (see docstring of the
2200 instance object for more information)
2201 loc : array_like, optional
2202 location parameter (default=0)
2203 scale : array_like, optional
2204 scale parameter (default=1)
2206 Returns
2207 -------
2208 logcdf : array_like
2209 Log of the cumulative distribution function evaluated at x
2211 """
2212 args, loc, scale = self._parse_args(*args, **kwds)
2213 x, loc, scale = map(asarray, (x, loc, scale))
2214 args = tuple(map(asarray, args))
2215 _a, _b = self._get_support(*args)
2216 dtyp = np.promote_types(x.dtype, np.float64)
2217 x = np.asarray((x - loc)/scale, dtype=dtyp)
2218 cond0 = self._argcheck(*args) & (scale > 0)
2219 cond1 = self._open_support_mask(x, *args) & (scale > 0)
2220 cond2 = (x >= _b) & cond0
2221 cond = cond0 & cond1
2222 output = empty(shape(cond), dtyp)
2223 output.fill(NINF)
2224 place(output, (1-cond0)*(cond1 == cond1)+np.isnan(x), self.badvalue)
2225 place(output, cond2, 0.0)
2226 if np.any(cond): # call only if at least 1 entry
2227 goodargs = argsreduce(cond, *((x,)+args))
2228 place(output, cond, self._logcdf(*goodargs))
2229 if output.ndim == 0:
2230 return output[()]
2231 return output
2233 def sf(self, x, *args, **kwds):
2234 """Survival function (1 - `cdf`) at x of the given RV.
2236 Parameters
2237 ----------
2238 x : array_like
2239 quantiles
2240 arg1, arg2, arg3,... : array_like
2241 The shape parameter(s) for the distribution (see docstring of the
2242 instance object for more information)
2243 loc : array_like, optional
2244 location parameter (default=0)
2245 scale : array_like, optional
2246 scale parameter (default=1)
2248 Returns
2249 -------
2250 sf : array_like
2251 Survival function evaluated at x
2253 """
2254 args, loc, scale = self._parse_args(*args, **kwds)
2255 x, loc, scale = map(asarray, (x, loc, scale))
2256 args = tuple(map(asarray, args))
2257 _a, _b = self._get_support(*args)
2258 dtyp = np.promote_types(x.dtype, np.float64)
2259 x = np.asarray((x - loc)/scale, dtype=dtyp)
2260 cond0 = self._argcheck(*args) & (scale > 0)
2261 cond1 = self._open_support_mask(x, *args) & (scale > 0)
2262 cond2 = cond0 & (x <= _a)
2263 cond = cond0 & cond1
2264 output = zeros(shape(cond), dtyp)
2265 place(output, (1-cond0)+np.isnan(x), self.badvalue)
2266 place(output, cond2, 1.0)
2267 if np.any(cond):
2268 goodargs = argsreduce(cond, *((x,)+args))
2269 place(output, cond, self._sf(*goodargs))
2270 if output.ndim == 0:
2271 return output[()]
2272 return output
2274 def logsf(self, x, *args, **kwds):
2275 """Log of the survival function of the given RV.
2277 Returns the log of the "survival function," defined as (1 - `cdf`),
2278 evaluated at `x`.
2280 Parameters
2281 ----------
2282 x : array_like
2283 quantiles
2284 arg1, arg2, arg3,... : array_like
2285 The shape parameter(s) for the distribution (see docstring of the
2286 instance object for more information)
2287 loc : array_like, optional
2288 location parameter (default=0)
2289 scale : array_like, optional
2290 scale parameter (default=1)
2292 Returns
2293 -------
2294 logsf : ndarray
2295 Log of the survival function evaluated at `x`.
2297 """
2298 args, loc, scale = self._parse_args(*args, **kwds)
2299 x, loc, scale = map(asarray, (x, loc, scale))
2300 args = tuple(map(asarray, args))
2301 _a, _b = self._get_support(*args)
2302 dtyp = np.promote_types(x.dtype, np.float64)
2303 x = np.asarray((x - loc)/scale, dtype=dtyp)
2304 cond0 = self._argcheck(*args) & (scale > 0)
2305 cond1 = self._open_support_mask(x, *args) & (scale > 0)
2306 cond2 = cond0 & (x <= _a)
2307 cond = cond0 & cond1
2308 output = empty(shape(cond), dtyp)
2309 output.fill(NINF)
2310 place(output, (1-cond0)+np.isnan(x), self.badvalue)
2311 place(output, cond2, 0.0)
2312 if np.any(cond):
2313 goodargs = argsreduce(cond, *((x,)+args))
2314 place(output, cond, self._logsf(*goodargs))
2315 if output.ndim == 0:
2316 return output[()]
2317 return output
2319 def ppf(self, q, *args, **kwds):
2320 """Percent point function (inverse of `cdf`) at q of the given RV.
2322 Parameters
2323 ----------
2324 q : array_like
2325 lower tail probability
2326 arg1, arg2, arg3,... : array_like
2327 The shape parameter(s) for the distribution (see docstring of the
2328 instance object for more information)
2329 loc : array_like, optional
2330 location parameter (default=0)
2331 scale : array_like, optional
2332 scale parameter (default=1)
2334 Returns
2335 -------
2336 x : array_like
2337 quantile corresponding to the lower tail probability q.
2339 """
2340 args, loc, scale = self._parse_args(*args, **kwds)
2341 q, loc, scale = map(asarray, (q, loc, scale))
2342 args = tuple(map(asarray, args))
2343 _a, _b = self._get_support(*args)
2344 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
2345 cond1 = (0 < q) & (q < 1)
2346 cond2 = cond0 & (q == 0)
2347 cond3 = cond0 & (q == 1)
2348 cond = cond0 & cond1
2349 output = np.full(shape(cond), fill_value=self.badvalue)
2351 lower_bound = _a * scale + loc
2352 upper_bound = _b * scale + loc
2353 place(output, cond2, argsreduce(cond2, lower_bound)[0])
2354 place(output, cond3, argsreduce(cond3, upper_bound)[0])
2356 if np.any(cond): # call only if at least 1 entry
2357 goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))
2358 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
2359 place(output, cond, self._ppf(*goodargs) * scale + loc)
2360 if output.ndim == 0:
2361 return output[()]
2362 return output
2364 def isf(self, q, *args, **kwds):
2365 """Inverse survival function (inverse of `sf`) at q of the given RV.
2367 Parameters
2368 ----------
2369 q : array_like
2370 upper tail probability
2371 arg1, arg2, arg3,... : array_like
2372 The shape parameter(s) for the distribution (see docstring of the
2373 instance object for more information)
2374 loc : array_like, optional
2375 location parameter (default=0)
2376 scale : array_like, optional
2377 scale parameter (default=1)
2379 Returns
2380 -------
2381 x : ndarray or scalar
2382 Quantile corresponding to the upper tail probability q.
2384 """
2385 args, loc, scale = self._parse_args(*args, **kwds)
2386 q, loc, scale = map(asarray, (q, loc, scale))
2387 args = tuple(map(asarray, args))
2388 _a, _b = self._get_support(*args)
2389 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
2390 cond1 = (0 < q) & (q < 1)
2391 cond2 = cond0 & (q == 1)
2392 cond3 = cond0 & (q == 0)
2393 cond = cond0 & cond1
2394 output = np.full(shape(cond), fill_value=self.badvalue)
2396 lower_bound = _a * scale + loc
2397 upper_bound = _b * scale + loc
2398 place(output, cond2, argsreduce(cond2, lower_bound)[0])
2399 place(output, cond3, argsreduce(cond3, upper_bound)[0])
2401 if np.any(cond):
2402 goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))
2403 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
2404 place(output, cond, self._isf(*goodargs) * scale + loc)
2405 if output.ndim == 0:
2406 return output[()]
2407 return output
2409 def _unpack_loc_scale(self, theta):
2410 try:
2411 loc = theta[-2]
2412 scale = theta[-1]
2413 args = tuple(theta[:-2])
2414 except IndexError as e:
2415 raise ValueError("Not enough input arguments.") from e
2416 return loc, scale, args
2418 def _fitstart(self, data, args=None):
2419 """Starting point for fit (shape arguments + loc + scale)."""
2420 if args is None:
2421 args = (1.0,)*self.numargs
2422 loc, scale = self._fit_loc_scale_support(data, *args)
2423 return args + (loc, scale)
2425 def _reduce_func(self, args, kwds, data=None):
2426 """
2427 Return the (possibly reduced) function to optimize in order to find MLE
2428 estimates for the .fit method.
2429 """
2430 # Convert fixed shape parameters to the standard numeric form: e.g. for
2431 # stats.beta, shapes='a, b'. To fix `a`, the caller can give a value
2432 # for `f0`, `fa` or 'fix_a'. The following converts the latter two
2433 # into the first (numeric) form.
2434 shapes = []
2435 if self.shapes:
2436 shapes = self.shapes.replace(',', ' ').split()
2437 for j, s in enumerate(shapes):
2438 key = 'f' + str(j)
2439 names = [key, 'f' + s, 'fix_' + s]
2440 val = _get_fixed_fit_value(kwds, names)
2441 if val is not None:
2442 kwds[key] = val
2444 args = list(args)
2445 Nargs = len(args)
2446 fixedn = []
2447 names = ['f%d' % n for n in range(Nargs - 2)] + ['floc', 'fscale']
2448 x0 = []
2449 for n, key in enumerate(names):
2450 if key in kwds:
2451 fixedn.append(n)
2452 args[n] = kwds.pop(key)
2453 else:
2454 x0.append(args[n])
2456 methods = {"mle", "mm"}
2457 method = kwds.pop('method', "mle").lower()
2458 if method == "mm":
2459 n_params = len(shapes) + 2 - len(fixedn)
2460 exponents = (np.arange(1, n_params+1))[:, np.newaxis]
2461 data_moments = np.sum(data[None, :]**exponents/len(data), axis=1)
2463 def objective(theta, x):
2464 return self._moment_error(theta, x, data_moments)
2465 elif method == "mle":
2466 objective = self._penalized_nnlf
2467 else:
2468 raise ValueError("Method '{0}' not available; must be one of {1}"
2469 .format(method, methods))
2471 if len(fixedn) == 0:
2472 func = objective
2473 restore = None
2474 else:
2475 if len(fixedn) == Nargs:
2476 raise ValueError(
2477 "All parameters fixed. There is nothing to optimize.")
2479 def restore(args, theta):
2480 # Replace with theta for all numbers not in fixedn
2481 # This allows the non-fixed values to vary, but
2482 # we still call self.nnlf with all parameters.
2483 i = 0
2484 for n in range(Nargs):
2485 if n not in fixedn:
2486 args[n] = theta[i]
2487 i += 1
2488 return args
2490 def func(theta, x):
2491 newtheta = restore(args[:], theta)
2492 return objective(newtheta, x)
2494 return x0, func, restore, args
2496 def _moment_error(self, theta, x, data_moments):
2497 loc, scale, args = self._unpack_loc_scale(theta)
2498 if not self._argcheck(*args) or scale <= 0:
2499 return inf
2501 dist_moments = np.array([self.moment(i+1, *args, loc=loc, scale=scale)
2502 for i in range(len(data_moments))])
2503 if np.any(np.isnan(dist_moments)):
2504 raise ValueError("Method of moments encountered a non-finite "
2505 "distribution moment and cannot continue. "
2506 "Consider trying method='MLE'.")
2508 return (((data_moments - dist_moments) /
2509 np.maximum(np.abs(data_moments), 1e-8))**2).sum()
2511 def fit(self, data, *args, **kwds):
2512 """
2513 Return estimates of shape (if applicable), location, and scale
2514 parameters from data. The default estimation method is Maximum
2515 Likelihood Estimation (MLE), but Method of Moments (MM)
2516 is also available.
2518 Starting estimates for
2519 the fit are given by input arguments; for any arguments not provided
2520 with starting estimates, ``self._fitstart(data)`` is called to generate
2521 such.
2523 One can hold some parameters fixed to specific values by passing in
2524 keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters)
2525 and ``floc`` and ``fscale`` (for location and scale parameters,
2526 respectively).
2528 Parameters
2529 ----------
2530 data : array_like
2531 Data to use in estimating the distribution parameters.
2532 arg1, arg2, arg3,... : floats, optional
2533 Starting value(s) for any shape-characterizing arguments (those not
2534 provided will be determined by a call to ``_fitstart(data)``).
2535 No default value.
2536 **kwds : floats, optional
2537 - `loc`: initial guess of the distribution's location parameter.
2538 - `scale`: initial guess of the distribution's scale parameter.
2540 Special keyword arguments are recognized as holding certain
2541 parameters fixed:
2543 - f0...fn : hold respective shape parameters fixed.
2544 Alternatively, shape parameters to fix can be specified by name.
2545 For example, if ``self.shapes == "a, b"``, ``fa`` and ``fix_a``
2546 are equivalent to ``f0``, and ``fb`` and ``fix_b`` are
2547 equivalent to ``f1``.
2549 - floc : hold location parameter fixed to specified value.
2551 - fscale : hold scale parameter fixed to specified value.
2553 - optimizer : The optimizer to use.
2554 The optimizer must take ``func``,
2555 and starting position as the first two arguments,
2556 plus ``args`` (for extra arguments to pass to the
2557 function to be optimized) and ``disp=0`` to suppress
2558 output as keyword arguments.
2560 - method : The method to use. The default is "MLE" (Maximum
2561 Likelihood Estimate); "MM" (Method of Moments)
2562 is also available.
2564 Raises
2565 ------
2566 TypeError, ValueError
2567 If an input is invalid
2568 `~scipy.stats.FitError`
2569 If fitting fails or the fit produced would be invalid
2571 Returns
2572 -------
2573 parameter_tuple : tuple of floats
2574 Estimates for any shape parameters (if applicable),
2575 followed by those for location and scale.
2576 For most random variables, shape statistics
2577 will be returned, but there are exceptions (e.g. ``norm``).
2579 Notes
2580 -----
2581 With ``method="MLE"`` (default), the fit is computed by minimizing
2582 the negative log-likelihood function. A large, finite penalty
2583 (rather than infinite negative log-likelihood) is applied for
2584 observations beyond the support of the distribution.
2586 With ``method="MM"``, the fit is computed by minimizing the L2 norm
2587 of the relative errors between the first *k* raw (about zero) data
2588 moments and the corresponding distribution moments, where *k* is the
2589 number of non-fixed parameters.
2590 More precisely, the objective function is::
2592 (((data_moments - dist_moments)
2593 / np.maximum(np.abs(data_moments), 1e-8))**2).sum()
2595 where the constant ``1e-8`` avoids division by zero in case of
2596 vanishing data moments. Typically, this error norm can be reduced to
2597 zero.
2598 Note that the standard method of moments can produce parameters for
2599 which some data are outside the support of the fitted distribution;
2600 this implementation does nothing to prevent this.
2602 For either method,
2603 the returned answer is not guaranteed to be globally optimal; it
2604 may only be locally optimal, or the optimization may fail altogether.
2605 If the data contain any of ``np.nan``, ``np.inf``, or ``-np.inf``,
2606 the `fit` method will raise a ``RuntimeError``.
2608 Examples
2609 --------
2611 Generate some data to fit: draw random variates from the `beta`
2612 distribution
2614 >>> from scipy.stats import beta
2615 >>> a, b = 1., 2.
2616 >>> x = beta.rvs(a, b, size=1000)
2618 Now we can fit all four parameters (``a``, ``b``, ``loc``
2619 and ``scale``):
2621 >>> a1, b1, loc1, scale1 = beta.fit(x)
2623 We can also use some prior knowledge about the dataset: let's keep
2624 ``loc`` and ``scale`` fixed:
2626 >>> a1, b1, loc1, scale1 = beta.fit(x, floc=0, fscale=1)
2627 >>> loc1, scale1
2628 (0, 1)
2630 We can also keep shape parameters fixed by using ``f``-keywords. To
2631 keep the zero-th shape parameter ``a`` equal 1, use ``f0=1`` or,
2632 equivalently, ``fa=1``:
2634 >>> a1, b1, loc1, scale1 = beta.fit(x, fa=1, floc=0, fscale=1)
2635 >>> a1
2636 1
2638 Not all distributions return estimates for the shape parameters.
2639 ``norm`` for example just returns estimates for location and scale:
2641 >>> from scipy.stats import norm
2642 >>> x = norm.rvs(a, b, size=1000, random_state=123)
2643 >>> loc1, scale1 = norm.fit(x)
2644 >>> loc1, scale1
2645 (0.92087172783841631, 2.0015750750324668)
2646 """
2647 data = np.asarray(data)
2648 method = kwds.get('method', "mle").lower()
2650 # memory for method of moments
2651 Narg = len(args)
2652 if Narg > self.numargs:
2653 raise TypeError("Too many input arguments.")
2655 if not np.isfinite(data).all():
2656 raise ValueError("The data contains non-finite values.")
2658 start = [None]*2
2659 if (Narg < self.numargs) or not ('loc' in kwds and
2660 'scale' in kwds):
2661 # get distribution specific starting locations
2662 start = self._fitstart(data)
2663 args += start[Narg:-2]
2664 loc = kwds.pop('loc', start[-2])
2665 scale = kwds.pop('scale', start[-1])
2666 args += (loc, scale)
2667 x0, func, restore, args = self._reduce_func(args, kwds, data=data)
2668 optimizer = kwds.pop('optimizer', optimize.fmin)
2669 # convert string to function in scipy.optimize
2670 optimizer = _fit_determine_optimizer(optimizer)
2671 # by now kwds must be empty, since everybody took what they needed
2672 if kwds:
2673 raise TypeError("Unknown arguments: %s." % kwds)
2675 # In some cases, method of moments can be done with fsolve/root
2676 # instead of an optimizer, but sometimes no solution exists,
2677 # especially when the user fixes parameters. Minimizing the sum
2678 # of squares of the error generalizes to these cases.
2679 vals = optimizer(func, x0, args=(ravel(data),), disp=0)
2680 obj = func(vals, data)
2682 if restore is not None:
2683 vals = restore(args, vals)
2684 vals = tuple(vals)
2686 loc, scale, shapes = self._unpack_loc_scale(vals)
2687 if not (np.all(self._argcheck(*shapes)) and scale > 0):
2688 raise FitError("Optimization converged to parameters that are "
2689 "outside the range allowed by the distribution.")
2691 if method == 'mm':
2692 if not np.isfinite(obj):
2693 raise FitError("Optimization failed: either a data moment "
2694 "or fitted distribution moment is "
2695 "non-finite.")
2697 return vals
2699 def _fit_loc_scale_support(self, data, *args):
2700 """Estimate loc and scale parameters from data accounting for support.
2702 Parameters
2703 ----------
2704 data : array_like
2705 Data to fit.
2706 arg1, arg2, arg3,... : array_like
2707 The shape parameter(s) for the distribution (see docstring of the
2708 instance object for more information).
2710 Returns
2711 -------
2712 Lhat : float
2713 Estimated location parameter for the data.
2714 Shat : float
2715 Estimated scale parameter for the data.
2717 """
2718 data = np.asarray(data)
2720 # Estimate location and scale according to the method of moments.
2721 loc_hat, scale_hat = self.fit_loc_scale(data, *args)
2723 # Compute the support according to the shape parameters.
2724 self._argcheck(*args)
2725 _a, _b = self._get_support(*args)
2726 a, b = _a, _b
2727 support_width = b - a
2729 # If the support is empty then return the moment-based estimates.
2730 if support_width <= 0:
2731 return loc_hat, scale_hat
2733 # Compute the proposed support according to the loc and scale
2734 # estimates.
2735 a_hat = loc_hat + a * scale_hat
2736 b_hat = loc_hat + b * scale_hat
2738 # Use the moment-based estimates if they are compatible with the data.
2739 data_a = np.min(data)
2740 data_b = np.max(data)
2741 if a_hat < data_a and data_b < b_hat:
2742 return loc_hat, scale_hat
2744 # Otherwise find other estimates that are compatible with the data.
2745 data_width = data_b - data_a
2746 rel_margin = 0.1
2747 margin = data_width * rel_margin
2749 # For a finite interval, both the location and scale
2750 # should have interesting values.
2751 if support_width < np.inf:
2752 loc_hat = (data_a - a) - margin
2753 scale_hat = (data_width + 2 * margin) / support_width
2754 return loc_hat, scale_hat
2756 # For a one-sided interval, use only an interesting location parameter.
2757 if a > -np.inf:
2758 return (data_a - a) - margin, 1
2759 elif b < np.inf:
2760 return (data_b - b) + margin, 1
2761 else:
2762 raise RuntimeError
2764 def fit_loc_scale(self, data, *args):
2765 """
2766 Estimate loc and scale parameters from data using 1st and 2nd moments.
2768 Parameters
2769 ----------
2770 data : array_like
2771 Data to fit.
2772 arg1, arg2, arg3,... : array_like
2773 The shape parameter(s) for the distribution (see docstring of the
2774 instance object for more information).
2776 Returns
2777 -------
2778 Lhat : float
2779 Estimated location parameter for the data.
2780 Shat : float
2781 Estimated scale parameter for the data.
2783 """
2784 mu, mu2 = self.stats(*args, **{'moments': 'mv'})
2785 tmp = asarray(data)
2786 muhat = tmp.mean()
2787 mu2hat = tmp.var()
2788 Shat = sqrt(mu2hat / mu2)
2789 Lhat = muhat - Shat*mu
2790 if not np.isfinite(Lhat):
2791 Lhat = 0
2792 if not (np.isfinite(Shat) and (0 < Shat)):
2793 Shat = 1
2794 return Lhat, Shat
2796 def _entropy(self, *args):
2797 def integ(x):
2798 val = self._pdf(x, *args)
2799 return entr(val)
2801 # upper limit is often inf, so suppress warnings when integrating
2802 _a, _b = self._get_support(*args)
2803 with np.errstate(over='ignore'):
2804 h = integrate.quad(integ, _a, _b)[0]
2806 if not np.isnan(h):
2807 return h
2808 else:
2809 # try with different limits if integration problems
2810 low, upp = self.ppf([1e-10, 1. - 1e-10], *args)
2811 if np.isinf(_b):
2812 upper = upp
2813 else:
2814 upper = _b
2815 if np.isinf(_a):
2816 lower = low
2817 else:
2818 lower = _a
2819 return integrate.quad(integ, lower, upper)[0]
2821 def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None,
2822 conditional=False, **kwds):
2823 """Calculate expected value of a function with respect to the
2824 distribution by numerical integration.
2826 The expected value of a function ``f(x)`` with respect to a
2827 distribution ``dist`` is defined as::
2829 ub
2830 E[f(x)] = Integral(f(x) * dist.pdf(x)),
2831 lb
2833 where ``ub`` and ``lb`` are arguments and ``x`` has the ``dist.pdf(x)``
2834 distribution. If the bounds ``lb`` and ``ub`` correspond to the
2835 support of the distribution, e.g. ``[-inf, inf]`` in the default
2836 case, then the integral is the unrestricted expectation of ``f(x)``.
2837 Also, the function ``f(x)`` may be defined such that ``f(x)`` is ``0``
2838 outside a finite interval in which case the expectation is
2839 calculated within the finite range ``[lb, ub]``.
2841 Parameters
2842 ----------
2843 func : callable, optional
2844 Function for which integral is calculated. Takes only one argument.
2845 The default is the identity mapping f(x) = x.
2846 args : tuple, optional
2847 Shape parameters of the distribution.
2848 loc : float, optional
2849 Location parameter (default=0).
2850 scale : float, optional
2851 Scale parameter (default=1).
2852 lb, ub : scalar, optional
2853 Lower and upper bound for integration. Default is set to the
2854 support of the distribution.
2855 conditional : bool, optional
2856 If True, the integral is corrected by the conditional probability
2857 of the integration interval. The return value is the expectation
2858 of the function, conditional on being in the given interval.
2859 Default is False.
2861 Additional keyword arguments are passed to the integration routine.
2863 Returns
2864 -------
2865 expect : float
2866 The calculated expected value.
2868 Notes
2869 -----
2870 The integration behavior of this function is inherited from
2871 `scipy.integrate.quad`. Neither this function nor
2872 `scipy.integrate.quad` can verify whether the integral exists or is
2873 finite. For example ``cauchy(0).mean()`` returns ``np.nan`` and
2874 ``cauchy(0).expect()`` returns ``0.0``.
2876 Likewise, the accuracy of results is not verified by the function.
2877 `scipy.integrate.quad` is typically reliable for integrals that are
2878 numerically favorable, but it is not guaranteed to converge
2879 to a correct value for all possible intervals and integrands. This
2880 function is provided for convenience; for critical applications,
2881 check results against other integration methods.
2883 The function is not vectorized.
2885 Examples
2886 --------
2888 To understand the effect of the bounds of integration consider
2890 >>> from scipy.stats import expon
2891 >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0)
2892 0.6321205588285578
2894 This is close to
2896 >>> expon(1).cdf(2.0) - expon(1).cdf(0.0)
2897 0.6321205588285577
2899 If ``conditional=True``
2901 >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0, conditional=True)
2902 1.0000000000000002
2904 The slight deviation from 1 is due to numerical integration.
2906 The integrand can be treated as a complex-valued function
2907 by passing ``complex_func=True`` to `scipy.integrate.quad` .
2909 >>> import numpy as np
2910 >>> from scipy.stats import vonmises
2911 >>> res = vonmises(loc=2, kappa=1).expect(lambda x: np.exp(1j*x),
2912 ... complex_func=True)
2913 >>> res
2914 (-0.18576377217422957+0.40590124735052263j)
2916 >>> np.angle(res) # location of the (circular) distribution
2917 2.0
2919 """
2920 lockwds = {'loc': loc,
2921 'scale': scale}
2922 self._argcheck(*args)
2923 _a, _b = self._get_support(*args)
2924 if func is None:
2925 def fun(x, *args):
2926 return x * self.pdf(x, *args, **lockwds)
2927 else:
2928 def fun(x, *args):
2929 return func(x) * self.pdf(x, *args, **lockwds)
2930 if lb is None:
2931 lb = loc + _a * scale
2932 if ub is None:
2933 ub = loc + _b * scale
2935 cdf_bounds = self.cdf([lb, ub], *args, **lockwds)
2936 invfac = cdf_bounds[1] - cdf_bounds[0]
2938 kwds['args'] = args
2940 # split interval to help integrator w/ infinite support; see gh-8928
2941 alpha = 0.05 # split body from tails at probability mass `alpha`
2942 inner_bounds = np.array([alpha, 1-alpha])
2943 cdf_inner_bounds = cdf_bounds[0] + invfac * inner_bounds
2944 c, d = loc + self._ppf(cdf_inner_bounds, *args) * scale
2946 # Do not silence warnings from integration.
2947 lbc = integrate.quad(fun, lb, c, **kwds)[0]
2948 cd = integrate.quad(fun, c, d, **kwds)[0]
2949 dub = integrate.quad(fun, d, ub, **kwds)[0]
2950 vals = (lbc + cd + dub)
2952 if conditional:
2953 vals /= invfac
2954 return np.array(vals)[()] # make it a numpy scalar like other methods
2956 def _param_info(self):
2957 shape_info = self._shape_info()
2958 loc_info = _ShapeInfo("loc", False, (-np.inf, np.inf), (False, False))
2959 scale_info = _ShapeInfo("scale", False, (0, np.inf), (False, False))
2960 param_info = shape_info + [loc_info, scale_info]
2961 return param_info
2963# Helpers for the discrete distributions
2964def _drv2_moment(self, n, *args):
2965 """Non-central moment of discrete distribution."""
2966 def fun(x):
2967 return np.power(x, n) * self._pmf(x, *args)
2969 _a, _b = self._get_support(*args)
2970 return _expect(fun, _a, _b, self.ppf(0.5, *args), self.inc)
2973def _drv2_ppfsingle(self, q, *args): # Use basic bisection algorithm
2974 _a, _b = self._get_support(*args)
2975 b = _b
2976 a = _a
2977 if isinf(b): # Be sure ending point is > q
2978 b = int(max(100*q, 10))
2979 while 1:
2980 if b >= _b:
2981 qb = 1.0
2982 break
2983 qb = self._cdf(b, *args)
2984 if (qb < q):
2985 b += 10
2986 else:
2987 break
2988 else:
2989 qb = 1.0
2990 if isinf(a): # be sure starting point < q
2991 a = int(min(-100*q, -10))
2992 while 1:
2993 if a <= _a:
2994 qb = 0.0
2995 break
2996 qa = self._cdf(a, *args)
2997 if (qa > q):
2998 a -= 10
2999 else:
3000 break
3001 else:
3002 qa = self._cdf(a, *args)
3004 while 1:
3005 if (qa == q):
3006 return a
3007 if (qb == q):
3008 return b
3009 if b <= a+1:
3010 if qa > q:
3011 return a
3012 else:
3013 return b
3014 c = int((a+b)/2.0)
3015 qc = self._cdf(c, *args)
3016 if (qc < q):
3017 if a != c:
3018 a = c
3019 else:
3020 raise RuntimeError('updating stopped, endless loop')
3021 qa = qc
3022 elif (qc > q):
3023 if b != c:
3024 b = c
3025 else:
3026 raise RuntimeError('updating stopped, endless loop')
3027 qb = qc
3028 else:
3029 return c
3032# Must over-ride one of _pmf or _cdf or pass in
3033# x_k, p(x_k) lists in initialization
3036class rv_discrete(rv_generic):
3037 """A generic discrete random variable class meant for subclassing.
3039 `rv_discrete` is a base class to construct specific distribution classes
3040 and instances for discrete random variables. It can also be used
3041 to construct an arbitrary distribution defined by a list of support
3042 points and corresponding probabilities.
3044 Parameters
3045 ----------
3046 a : float, optional
3047 Lower bound of the support of the distribution, default: 0
3048 b : float, optional
3049 Upper bound of the support of the distribution, default: plus infinity
3050 moment_tol : float, optional
3051 The tolerance for the generic calculation of moments.
3052 values : tuple of two array_like, optional
3053 ``(xk, pk)`` where ``xk`` are integers and ``pk`` are the non-zero
3054 probabilities between 0 and 1 with ``sum(pk) = 1``. ``xk``
3055 and ``pk`` must have the same shape.
3056 inc : integer, optional
3057 Increment for the support of the distribution.
3058 Default is 1. (other values have not been tested)
3059 badvalue : float, optional
3060 The value in a result arrays that indicates a value that for which
3061 some argument restriction is violated, default is np.nan.
3062 name : str, optional
3063 The name of the instance. This string is used to construct the default
3064 example for distributions.
3065 longname : str, optional
3066 This string is used as part of the first line of the docstring returned
3067 when a subclass has no docstring of its own. Note: `longname` exists
3068 for backwards compatibility, do not use for new subclasses.
3069 shapes : str, optional
3070 The shape of the distribution. For example "m, n" for a distribution
3071 that takes two integers as the two shape arguments for all its methods
3072 If not provided, shape parameters will be inferred from
3073 the signatures of the private methods, ``_pmf`` and ``_cdf`` of
3074 the instance.
3075 extradoc : str, optional, deprecated
3076 This string is used as the last part of the docstring returned when a
3077 subclass has no docstring of its own. Note: `extradoc` exists for
3078 backwards compatibility and will be removed in SciPy 1.11.0, do not
3079 use for new subclasses.
3080 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
3081 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
3082 singleton is used.
3083 If `seed` is an int, a new ``RandomState`` instance is used,
3084 seeded with `seed`.
3085 If `seed` is already a ``Generator`` or ``RandomState`` instance then
3086 that instance is used.
3088 Methods
3089 -------
3090 rvs
3091 pmf
3092 logpmf
3093 cdf
3094 logcdf
3095 sf
3096 logsf
3097 ppf
3098 isf
3099 moment
3100 stats
3101 entropy
3102 expect
3103 median
3104 mean
3105 std
3106 var
3107 interval
3108 __call__
3109 support
3111 Notes
3112 -----
3113 This class is similar to `rv_continuous`. Whether a shape parameter is
3114 valid is decided by an ``_argcheck`` method (which defaults to checking
3115 that its arguments are strictly positive.)
3116 The main differences are:
3118 - the support of the distribution is a set of integers
3119 - instead of the probability density function, ``pdf`` (and the
3120 corresponding private ``_pdf``), this class defines the
3121 *probability mass function*, `pmf` (and the corresponding
3122 private ``_pmf``.)
3123 - scale parameter is not defined.
3125 To create a new discrete distribution, we would do the following:
3127 >>> from scipy.stats import rv_discrete
3128 >>> class poisson_gen(rv_discrete):
3129 ... "Poisson distribution"
3130 ... def _pmf(self, k, mu):
3131 ... return exp(-mu) * mu**k / factorial(k)
3133 and create an instance::
3135 >>> poisson = poisson_gen(name="poisson")
3137 Note that above we defined the Poisson distribution in the standard form.
3138 Shifting the distribution can be done by providing the ``loc`` parameter
3139 to the methods of the instance. For example, ``poisson.pmf(x, mu, loc)``
3140 delegates the work to ``poisson._pmf(x-loc, mu)``.
3142 **Discrete distributions from a list of probabilities**
3144 Alternatively, you can construct an arbitrary discrete rv defined
3145 on a finite set of values ``xk`` with ``Prob{X=xk} = pk`` by using the
3146 ``values`` keyword argument to the `rv_discrete` constructor.
3148 Examples
3149 --------
3150 Custom made discrete distribution:
3152 >>> import numpy as np
3153 >>> from scipy import stats
3154 >>> xk = np.arange(7)
3155 >>> pk = (0.1, 0.2, 0.3, 0.1, 0.1, 0.0, 0.2)
3156 >>> custm = stats.rv_discrete(name='custm', values=(xk, pk))
3157 >>>
3158 >>> import matplotlib.pyplot as plt
3159 >>> fig, ax = plt.subplots(1, 1)
3160 >>> ax.plot(xk, custm.pmf(xk), 'ro', ms=12, mec='r')
3161 >>> ax.vlines(xk, 0, custm.pmf(xk), colors='r', lw=4)
3162 >>> plt.show()
3164 Random number generation:
3166 >>> R = custm.rvs(size=100)
3168 """
3169 def __new__(cls, a=0, b=inf, name=None, badvalue=None,
3170 moment_tol=1e-8, values=None, inc=1, longname=None,
3171 shapes=None, extradoc=None, seed=None):
3173 if values is not None:
3174 # dispatch to a subclass
3175 return super(rv_discrete, cls).__new__(rv_sample)
3176 else:
3177 # business as usual
3178 return super(rv_discrete, cls).__new__(cls)
3180 def __init__(self, a=0, b=inf, name=None, badvalue=None,
3181 moment_tol=1e-8, values=None, inc=1, longname=None,
3182 shapes=None, extradoc=None, seed=None):
3184 super().__init__(seed)
3186 if extradoc is not None:
3187 warnings.warn("extradoc is deprecated and will be removed in "
3188 "SciPy 1.11.0", DeprecationWarning)
3190 # cf generic freeze
3191 self._ctor_param = dict(
3192 a=a, b=b, name=name, badvalue=badvalue,
3193 moment_tol=moment_tol, values=values, inc=inc,
3194 longname=longname, shapes=shapes, extradoc=extradoc, seed=seed)
3196 if badvalue is None:
3197 badvalue = nan
3198 self.badvalue = badvalue
3199 self.a = a
3200 self.b = b
3201 self.moment_tol = moment_tol
3202 self.inc = inc
3203 self.shapes = shapes
3205 if values is not None:
3206 raise ValueError("rv_discrete.__init__(..., values != None, ...)")
3208 self._construct_argparser(meths_to_inspect=[self._pmf, self._cdf],
3209 locscale_in='loc=0',
3210 # scale=1 for discrete RVs
3211 locscale_out='loc, 1')
3212 self._attach_methods()
3213 self._construct_docstrings(name, longname, extradoc)
3215 def __getstate__(self):
3216 dct = self.__dict__.copy()
3217 # these methods will be remade in __setstate__
3218 attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs",
3219 "_cdfvec", "_ppfvec", "generic_moment"]
3220 [dct.pop(attr, None) for attr in attrs]
3221 return dct
3223 def _attach_methods(self):
3224 """Attaches dynamically created methods to the rv_discrete instance."""
3225 self._cdfvec = vectorize(self._cdf_single, otypes='d')
3226 self.vecentropy = vectorize(self._entropy)
3228 # _attach_methods is responsible for calling _attach_argparser_methods
3229 self._attach_argparser_methods()
3231 # nin correction needs to be after we know numargs
3232 # correct nin for generic moment vectorization
3233 _vec_generic_moment = vectorize(_drv2_moment, otypes='d')
3234 _vec_generic_moment.nin = self.numargs + 2
3235 self.generic_moment = types.MethodType(_vec_generic_moment, self)
3237 # correct nin for ppf vectorization
3238 _vppf = vectorize(_drv2_ppfsingle, otypes='d')
3239 _vppf.nin = self.numargs + 2
3240 self._ppfvec = types.MethodType(_vppf, self)
3242 # now that self.numargs is defined, we can adjust nin
3243 self._cdfvec.nin = self.numargs + 1
3245 def _construct_docstrings(self, name, longname, extradoc):
3246 if name is None:
3247 name = 'Distribution'
3248 self.name = name
3249 self.extradoc = extradoc
3251 # generate docstring for subclass instances
3252 if longname is None:
3253 if name[0] in ['aeiouAEIOU']:
3254 hstr = "An "
3255 else:
3256 hstr = "A "
3257 longname = hstr + name
3259 if sys.flags.optimize < 2:
3260 # Skip adding docstrings if interpreter is run with -OO
3261 if self.__doc__ is None:
3262 self._construct_default_doc(longname=longname,
3263 extradoc=extradoc,
3264 docdict=docdict_discrete,
3265 discrete='discrete')
3266 else:
3267 dct = dict(distdiscrete)
3268 self._construct_doc(docdict_discrete, dct.get(self.name))
3270 # discrete RV do not have the scale parameter, remove it
3271 self.__doc__ = self.__doc__.replace(
3272 '\n scale : array_like, '
3273 'optional\n scale parameter (default=1)', '')
3275 def _updated_ctor_param(self):
3276 """Return the current version of _ctor_param, possibly updated by user.
3278 Used by freezing.
3279 Keep this in sync with the signature of __init__.
3280 """
3281 dct = self._ctor_param.copy()
3282 dct['a'] = self.a
3283 dct['b'] = self.b
3284 dct['badvalue'] = self.badvalue
3285 dct['moment_tol'] = self.moment_tol
3286 dct['inc'] = self.inc
3287 dct['name'] = self.name
3288 dct['shapes'] = self.shapes
3289 dct['extradoc'] = self.extradoc
3290 return dct
3292 def _nonzero(self, k, *args):
3293 return floor(k) == k
3295 def _pmf(self, k, *args):
3296 return self._cdf(k, *args) - self._cdf(k-1, *args)
3298 def _logpmf(self, k, *args):
3299 return log(self._pmf(k, *args))
3301 def _logpxf(self, k, *args):
3302 # continuous distributions have PDF, discrete have PMF, but sometimes
3303 # the distinction doesn't matter. This lets us use `_logpxf` for both
3304 # discrete and continuous distributions.
3305 return self._logpmf(k, *args)
3307 def _unpack_loc_scale(self, theta):
3308 try:
3309 loc = theta[-1]
3310 scale = 1
3311 args = tuple(theta[:-1])
3312 except IndexError as e:
3313 raise ValueError("Not enough input arguments.") from e
3314 return loc, scale, args
3316 def _cdf_single(self, k, *args):
3317 _a, _b = self._get_support(*args)
3318 m = arange(int(_a), k+1)
3319 return np.sum(self._pmf(m, *args), axis=0)
3321 def _cdf(self, x, *args):
3322 k = floor(x)
3323 return self._cdfvec(k, *args)
3325 # generic _logcdf, _sf, _logsf, _ppf, _isf, _rvs defined in rv_generic
3327 def rvs(self, *args, **kwargs):
3328 """Random variates of given type.
3330 Parameters
3331 ----------
3332 arg1, arg2, arg3,... : array_like
3333 The shape parameter(s) for the distribution (see docstring of the
3334 instance object for more information).
3335 loc : array_like, optional
3336 Location parameter (default=0).
3337 size : int or tuple of ints, optional
3338 Defining number of random variates (Default is 1). Note that `size`
3339 has to be given as keyword, not as positional argument.
3340 random_state : {None, int, `numpy.random.Generator`,
3341 `numpy.random.RandomState`}, optional
3343 If `random_state` is None (or `np.random`), the
3344 `numpy.random.RandomState` singleton is used.
3345 If `random_state` is an int, a new ``RandomState`` instance is
3346 used, seeded with `random_state`.
3347 If `random_state` is already a ``Generator`` or ``RandomState``
3348 instance, that instance is used.
3350 Returns
3351 -------
3352 rvs : ndarray or scalar
3353 Random variates of given `size`.
3355 """
3356 kwargs['discrete'] = True
3357 return super().rvs(*args, **kwargs)
3359 def pmf(self, k, *args, **kwds):
3360 """Probability mass function at k of the given RV.
3362 Parameters
3363 ----------
3364 k : array_like
3365 Quantiles.
3366 arg1, arg2, arg3,... : array_like
3367 The shape parameter(s) for the distribution (see docstring of the
3368 instance object for more information)
3369 loc : array_like, optional
3370 Location parameter (default=0).
3372 Returns
3373 -------
3374 pmf : array_like
3375 Probability mass function evaluated at k
3377 """
3378 args, loc, _ = self._parse_args(*args, **kwds)
3379 k, loc = map(asarray, (k, loc))
3380 args = tuple(map(asarray, args))
3381 _a, _b = self._get_support(*args)
3382 k = asarray((k-loc))
3383 cond0 = self._argcheck(*args)
3384 cond1 = (k >= _a) & (k <= _b)
3385 if not isinstance(self, rv_sample):
3386 cond1 = cond1 & self._nonzero(k, *args)
3387 cond = cond0 & cond1
3388 output = zeros(shape(cond), 'd')
3389 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3390 if np.any(cond):
3391 goodargs = argsreduce(cond, *((k,)+args))
3392 place(output, cond, np.clip(self._pmf(*goodargs), 0, 1))
3393 if output.ndim == 0:
3394 return output[()]
3395 return output
3397 def logpmf(self, k, *args, **kwds):
3398 """Log of the probability mass function at k of the given RV.
3400 Parameters
3401 ----------
3402 k : array_like
3403 Quantiles.
3404 arg1, arg2, arg3,... : array_like
3405 The shape parameter(s) for the distribution (see docstring of the
3406 instance object for more information).
3407 loc : array_like, optional
3408 Location parameter. Default is 0.
3410 Returns
3411 -------
3412 logpmf : array_like
3413 Log of the probability mass function evaluated at k.
3415 """
3416 args, loc, _ = self._parse_args(*args, **kwds)
3417 k, loc = map(asarray, (k, loc))
3418 args = tuple(map(asarray, args))
3419 _a, _b = self._get_support(*args)
3420 k = asarray((k-loc))
3421 cond0 = self._argcheck(*args)
3422 cond1 = (k >= _a) & (k <= _b)
3423 if not isinstance(self, rv_sample):
3424 cond1 = cond1 & self._nonzero(k, *args)
3425 cond = cond0 & cond1
3426 output = empty(shape(cond), 'd')
3427 output.fill(NINF)
3428 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3429 if np.any(cond):
3430 goodargs = argsreduce(cond, *((k,)+args))
3431 place(output, cond, self._logpmf(*goodargs))
3432 if output.ndim == 0:
3433 return output[()]
3434 return output
3436 def cdf(self, k, *args, **kwds):
3437 """Cumulative distribution function of the given RV.
3439 Parameters
3440 ----------
3441 k : array_like, int
3442 Quantiles.
3443 arg1, arg2, arg3,... : array_like
3444 The shape parameter(s) for the distribution (see docstring of the
3445 instance object for more information).
3446 loc : array_like, optional
3447 Location parameter (default=0).
3449 Returns
3450 -------
3451 cdf : ndarray
3452 Cumulative distribution function evaluated at `k`.
3454 """
3455 args, loc, _ = self._parse_args(*args, **kwds)
3456 k, loc = map(asarray, (k, loc))
3457 args = tuple(map(asarray, args))
3458 _a, _b = self._get_support(*args)
3459 k = asarray((k-loc))
3460 cond0 = self._argcheck(*args)
3461 cond1 = (k >= _a) & (k < _b)
3462 cond2 = (k >= _b)
3463 cond3 = np.isneginf(k)
3464 cond = cond0 & cond1 & np.isfinite(k)
3466 output = zeros(shape(cond), 'd')
3467 place(output, cond2*(cond0 == cond0), 1.0)
3468 place(output, cond3*(cond0 == cond0), 0.0)
3469 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3471 if np.any(cond):
3472 goodargs = argsreduce(cond, *((k,)+args))
3473 place(output, cond, np.clip(self._cdf(*goodargs), 0, 1))
3474 if output.ndim == 0:
3475 return output[()]
3476 return output
3478 def logcdf(self, k, *args, **kwds):
3479 """Log of the cumulative distribution function at k of the given RV.
3481 Parameters
3482 ----------
3483 k : array_like, int
3484 Quantiles.
3485 arg1, arg2, arg3,... : array_like
3486 The shape parameter(s) for the distribution (see docstring of the
3487 instance object for more information).
3488 loc : array_like, optional
3489 Location parameter (default=0).
3491 Returns
3492 -------
3493 logcdf : array_like
3494 Log of the cumulative distribution function evaluated at k.
3496 """
3497 args, loc, _ = self._parse_args(*args, **kwds)
3498 k, loc = map(asarray, (k, loc))
3499 args = tuple(map(asarray, args))
3500 _a, _b = self._get_support(*args)
3501 k = asarray((k-loc))
3502 cond0 = self._argcheck(*args)
3503 cond1 = (k >= _a) & (k < _b)
3504 cond2 = (k >= _b)
3505 cond = cond0 & cond1
3506 output = empty(shape(cond), 'd')
3507 output.fill(NINF)
3508 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3509 place(output, cond2*(cond0 == cond0), 0.0)
3511 if np.any(cond):
3512 goodargs = argsreduce(cond, *((k,)+args))
3513 place(output, cond, self._logcdf(*goodargs))
3514 if output.ndim == 0:
3515 return output[()]
3516 return output
3518 def sf(self, k, *args, **kwds):
3519 """Survival function (1 - `cdf`) at k of the given RV.
3521 Parameters
3522 ----------
3523 k : array_like
3524 Quantiles.
3525 arg1, arg2, arg3,... : array_like
3526 The shape parameter(s) for the distribution (see docstring of the
3527 instance object for more information).
3528 loc : array_like, optional
3529 Location parameter (default=0).
3531 Returns
3532 -------
3533 sf : array_like
3534 Survival function evaluated at k.
3536 """
3537 args, loc, _ = self._parse_args(*args, **kwds)
3538 k, loc = map(asarray, (k, loc))
3539 args = tuple(map(asarray, args))
3540 _a, _b = self._get_support(*args)
3541 k = asarray(k-loc)
3542 cond0 = self._argcheck(*args)
3543 cond1 = (k >= _a) & (k < _b)
3544 cond2 = ((k < _a) | np.isneginf(k)) & cond0
3545 cond = cond0 & cond1 & np.isfinite(k)
3546 output = zeros(shape(cond), 'd')
3547 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3548 place(output, cond2, 1.0)
3549 if np.any(cond):
3550 goodargs = argsreduce(cond, *((k,)+args))
3551 place(output, cond, np.clip(self._sf(*goodargs), 0, 1))
3552 if output.ndim == 0:
3553 return output[()]
3554 return output
3556 def logsf(self, k, *args, **kwds):
3557 """Log of the survival function of the given RV.
3559 Returns the log of the "survival function," defined as 1 - `cdf`,
3560 evaluated at `k`.
3562 Parameters
3563 ----------
3564 k : array_like
3565 Quantiles.
3566 arg1, arg2, arg3,... : array_like
3567 The shape parameter(s) for the distribution (see docstring of the
3568 instance object for more information).
3569 loc : array_like, optional
3570 Location parameter (default=0).
3572 Returns
3573 -------
3574 logsf : ndarray
3575 Log of the survival function evaluated at `k`.
3577 """
3578 args, loc, _ = self._parse_args(*args, **kwds)
3579 k, loc = map(asarray, (k, loc))
3580 args = tuple(map(asarray, args))
3581 _a, _b = self._get_support(*args)
3582 k = asarray(k-loc)
3583 cond0 = self._argcheck(*args)
3584 cond1 = (k >= _a) & (k < _b)
3585 cond2 = (k < _a) & cond0
3586 cond = cond0 & cond1
3587 output = empty(shape(cond), 'd')
3588 output.fill(NINF)
3589 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3590 place(output, cond2, 0.0)
3591 if np.any(cond):
3592 goodargs = argsreduce(cond, *((k,)+args))
3593 place(output, cond, self._logsf(*goodargs))
3594 if output.ndim == 0:
3595 return output[()]
3596 return output
3598 def ppf(self, q, *args, **kwds):
3599 """Percent point function (inverse of `cdf`) at q of the given RV.
3601 Parameters
3602 ----------
3603 q : array_like
3604 Lower tail probability.
3605 arg1, arg2, arg3,... : array_like
3606 The shape parameter(s) for the distribution (see docstring of the
3607 instance object for more information).
3608 loc : array_like, optional
3609 Location parameter (default=0).
3611 Returns
3612 -------
3613 k : array_like
3614 Quantile corresponding to the lower tail probability, q.
3616 """
3617 args, loc, _ = self._parse_args(*args, **kwds)
3618 q, loc = map(asarray, (q, loc))
3619 args = tuple(map(asarray, args))
3620 _a, _b = self._get_support(*args)
3621 cond0 = self._argcheck(*args) & (loc == loc)
3622 cond1 = (q > 0) & (q < 1)
3623 cond2 = (q == 1) & cond0
3624 cond = cond0 & cond1
3625 output = np.full(shape(cond), fill_value=self.badvalue, dtype='d')
3626 # output type 'd' to handle nin and inf
3627 place(output, (q == 0)*(cond == cond), _a-1 + loc)
3628 place(output, cond2, _b + loc)
3629 if np.any(cond):
3630 goodargs = argsreduce(cond, *((q,)+args+(loc,)))
3631 loc, goodargs = goodargs[-1], goodargs[:-1]
3632 place(output, cond, self._ppf(*goodargs) + loc)
3634 if output.ndim == 0:
3635 return output[()]
3636 return output
3638 def isf(self, q, *args, **kwds):
3639 """Inverse survival function (inverse of `sf`) at q of the given RV.
3641 Parameters
3642 ----------
3643 q : array_like
3644 Upper tail probability.
3645 arg1, arg2, arg3,... : array_like
3646 The shape parameter(s) for the distribution (see docstring of the
3647 instance object for more information).
3648 loc : array_like, optional
3649 Location parameter (default=0).
3651 Returns
3652 -------
3653 k : ndarray or scalar
3654 Quantile corresponding to the upper tail probability, q.
3656 """
3657 args, loc, _ = self._parse_args(*args, **kwds)
3658 q, loc = map(asarray, (q, loc))
3659 args = tuple(map(asarray, args))
3660 _a, _b = self._get_support(*args)
3661 cond0 = self._argcheck(*args) & (loc == loc)
3662 cond1 = (q > 0) & (q < 1)
3663 cond2 = (q == 1) & cond0
3664 cond3 = (q == 0) & cond0
3665 cond = cond0 & cond1
3667 # same problem as with ppf; copied from ppf and changed
3668 output = np.full(shape(cond), fill_value=self.badvalue, dtype='d')
3669 # output type 'd' to handle nin and inf
3670 lower_bound = _a - 1 + loc
3671 upper_bound = _b + loc
3672 place(output, cond2*(cond == cond), lower_bound)
3673 place(output, cond3*(cond == cond), upper_bound)
3675 # call place only if at least 1 valid argument
3676 if np.any(cond):
3677 goodargs = argsreduce(cond, *((q,)+args+(loc,)))
3678 loc, goodargs = goodargs[-1], goodargs[:-1]
3679 # PB same as ticket 766
3680 place(output, cond, self._isf(*goodargs) + loc)
3682 if output.ndim == 0:
3683 return output[()]
3684 return output
3686 def _entropy(self, *args):
3687 if hasattr(self, 'pk'):
3688 return stats.entropy(self.pk)
3689 else:
3690 _a, _b = self._get_support(*args)
3691 return _expect(lambda x: entr(self.pmf(x, *args)),
3692 _a, _b, self.ppf(0.5, *args), self.inc)
3694 def expect(self, func=None, args=(), loc=0, lb=None, ub=None,
3695 conditional=False, maxcount=1000, tolerance=1e-10, chunksize=32):
3696 """
3697 Calculate expected value of a function with respect to the distribution
3698 for discrete distribution by numerical summation.
3700 Parameters
3701 ----------
3702 func : callable, optional
3703 Function for which the expectation value is calculated.
3704 Takes only one argument.
3705 The default is the identity mapping f(k) = k.
3706 args : tuple, optional
3707 Shape parameters of the distribution.
3708 loc : float, optional
3709 Location parameter.
3710 Default is 0.
3711 lb, ub : int, optional
3712 Lower and upper bound for the summation, default is set to the
3713 support of the distribution, inclusive (``lb <= k <= ub``).
3714 conditional : bool, optional
3715 If true then the expectation is corrected by the conditional
3716 probability of the summation interval. The return value is the
3717 expectation of the function, `func`, conditional on being in
3718 the given interval (k such that ``lb <= k <= ub``).
3719 Default is False.
3720 maxcount : int, optional
3721 Maximal number of terms to evaluate (to avoid an endless loop for
3722 an infinite sum). Default is 1000.
3723 tolerance : float, optional
3724 Absolute tolerance for the summation. Default is 1e-10.
3725 chunksize : int, optional
3726 Iterate over the support of a distributions in chunks of this size.
3727 Default is 32.
3729 Returns
3730 -------
3731 expect : float
3732 Expected value.
3734 Notes
3735 -----
3736 For heavy-tailed distributions, the expected value may or
3737 may not exist,
3738 depending on the function, `func`. If it does exist, but the
3739 sum converges
3740 slowly, the accuracy of the result may be rather low. For instance, for
3741 ``zipf(4)``, accuracy for mean, variance in example is only 1e-5.
3742 increasing `maxcount` and/or `chunksize` may improve the result,
3743 but may also make zipf very slow.
3745 The function is not vectorized.
3747 """
3748 if func is None:
3749 def fun(x):
3750 # loc and args from outer scope
3751 return (x+loc)*self._pmf(x, *args)
3752 else:
3753 def fun(x):
3754 # loc and args from outer scope
3755 return func(x+loc)*self._pmf(x, *args)
3756 # used pmf because _pmf does not check support in randint and there
3757 # might be problems(?) with correct self.a, self.b at this stage maybe
3758 # not anymore, seems to work now with _pmf
3760 _a, _b = self._get_support(*args)
3761 if lb is None:
3762 lb = _a
3763 else:
3764 lb = lb - loc # convert bound for standardized distribution
3765 if ub is None:
3766 ub = _b
3767 else:
3768 ub = ub - loc # convert bound for standardized distribution
3769 if conditional:
3770 invfac = self.sf(lb-1, *args) - self.sf(ub, *args)
3771 else:
3772 invfac = 1.0
3774 if isinstance(self, rv_sample):
3775 res = self._expect(fun, lb, ub)
3776 return res / invfac
3778 # iterate over the support, starting from the median
3779 x0 = self.ppf(0.5, *args)
3780 res = _expect(fun, lb, ub, x0, self.inc, maxcount, tolerance, chunksize)
3781 return res / invfac
3783 def _param_info(self):
3784 shape_info = self._shape_info()
3785 loc_info = _ShapeInfo("loc", True, (-np.inf, np.inf), (False, False))
3786 param_info = shape_info + [loc_info]
3787 return param_info
3790def _expect(fun, lb, ub, x0, inc, maxcount=1000, tolerance=1e-10,
3791 chunksize=32):
3792 """Helper for computing the expectation value of `fun`."""
3793 # short-circuit if the support size is small enough
3794 if (ub - lb) <= chunksize:
3795 supp = np.arange(lb, ub+1, inc)
3796 vals = fun(supp)
3797 return np.sum(vals)
3799 # otherwise, iterate starting from x0
3800 if x0 < lb:
3801 x0 = lb
3802 if x0 > ub:
3803 x0 = ub
3805 count, tot = 0, 0.
3806 # iterate over [x0, ub] inclusive
3807 for x in _iter_chunked(x0, ub+1, chunksize=chunksize, inc=inc):
3808 count += x.size
3809 delta = np.sum(fun(x))
3810 tot += delta
3811 if abs(delta) < tolerance * x.size:
3812 break
3813 if count > maxcount:
3814 warnings.warn('expect(): sum did not converge', RuntimeWarning)
3815 return tot
3817 # iterate over [lb, x0)
3818 for x in _iter_chunked(x0-1, lb-1, chunksize=chunksize, inc=-inc):
3819 count += x.size
3820 delta = np.sum(fun(x))
3821 tot += delta
3822 if abs(delta) < tolerance * x.size:
3823 break
3824 if count > maxcount:
3825 warnings.warn('expect(): sum did not converge', RuntimeWarning)
3826 break
3828 return tot
3831def _iter_chunked(x0, x1, chunksize=4, inc=1):
3832 """Iterate from x0 to x1 in chunks of chunksize and steps inc.
3834 x0 must be finite, x1 need not be. In the latter case, the iterator is
3835 infinite.
3836 Handles both x0 < x1 and x0 > x1. In the latter case, iterates downwards
3837 (make sure to set inc < 0.)
3839 >>> [x for x in _iter_chunked(2, 5, inc=2)]
3840 [array([2, 4])]
3841 >>> [x for x in _iter_chunked(2, 11, inc=2)]
3842 [array([2, 4, 6, 8]), array([10])]
3843 >>> [x for x in _iter_chunked(2, -5, inc=-2)]
3844 [array([ 2, 0, -2, -4])]
3845 >>> [x for x in _iter_chunked(2, -9, inc=-2)]
3846 [array([ 2, 0, -2, -4]), array([-6, -8])]
3848 """
3849 if inc == 0:
3850 raise ValueError('Cannot increment by zero.')
3851 if chunksize <= 0:
3852 raise ValueError('Chunk size must be positive; got %s.' % chunksize)
3854 s = 1 if inc > 0 else -1
3855 stepsize = abs(chunksize * inc)
3857 x = x0
3858 while (x - x1) * inc < 0:
3859 delta = min(stepsize, abs(x - x1))
3860 step = delta * s
3861 supp = np.arange(x, x + step, inc)
3862 x += step
3863 yield supp
3866class rv_sample(rv_discrete):
3867 """A 'sample' discrete distribution defined by the support and values.
3869 The ctor ignores most of the arguments, only needs the `values` argument.
3870 """
3871 def __init__(self, a=0, b=inf, name=None, badvalue=None,
3872 moment_tol=1e-8, values=None, inc=1, longname=None,
3873 shapes=None, extradoc=None, seed=None):
3875 super(rv_discrete, self).__init__(seed)
3877 if extradoc is not None:
3878 warnings.warn("extradoc is deprecated and will be removed in "
3879 "SciPy 1.11.0", DeprecationWarning)
3881 if values is None:
3882 raise ValueError("rv_sample.__init__(..., values=None,...)")
3884 # cf generic freeze
3885 self._ctor_param = dict(
3886 a=a, b=b, name=name, badvalue=badvalue,
3887 moment_tol=moment_tol, values=values, inc=inc,
3888 longname=longname, shapes=shapes, extradoc=extradoc, seed=seed)
3890 if badvalue is None:
3891 badvalue = nan
3892 self.badvalue = badvalue
3893 self.moment_tol = moment_tol
3894 self.inc = inc
3895 self.shapes = shapes
3896 self.vecentropy = self._entropy
3898 xk, pk = values
3900 if np.shape(xk) != np.shape(pk):
3901 raise ValueError("xk and pk must have the same shape.")
3902 if np.less(pk, 0.0).any():
3903 raise ValueError("All elements of pk must be non-negative.")
3904 if not np.allclose(np.sum(pk), 1):
3905 raise ValueError("The sum of provided pk is not 1.")
3907 indx = np.argsort(np.ravel(xk))
3908 self.xk = np.take(np.ravel(xk), indx, 0)
3909 self.pk = np.take(np.ravel(pk), indx, 0)
3910 self.a = self.xk[0]
3911 self.b = self.xk[-1]
3913 self.qvals = np.cumsum(self.pk, axis=0)
3915 self.shapes = ' ' # bypass inspection
3917 self._construct_argparser(meths_to_inspect=[self._pmf],
3918 locscale_in='loc=0',
3919 # scale=1 for discrete RVs
3920 locscale_out='loc, 1')
3922 self._attach_methods()
3924 self._construct_docstrings(name, longname, extradoc)
3926 def __getstate__(self):
3927 dct = self.__dict__.copy()
3929 # these methods will be remade in rv_generic.__setstate__,
3930 # which calls rv_generic._attach_methods
3931 attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs"]
3932 [dct.pop(attr, None) for attr in attrs]
3934 return dct
3936 def _attach_methods(self):
3937 """Attaches dynamically created argparser methods."""
3938 self._attach_argparser_methods()
3940 def _get_support(self, *args):
3941 """Return the support of the (unscaled, unshifted) distribution.
3943 Parameters
3944 ----------
3945 arg1, arg2, ... : array_like
3946 The shape parameter(s) for the distribution (see docstring of the
3947 instance object for more information).
3949 Returns
3950 -------
3951 a, b : numeric (float, or int or +/-np.inf)
3952 end-points of the distribution's support.
3953 """
3954 return self.a, self.b
3956 def _pmf(self, x):
3957 return np.select([x == k for k in self.xk],
3958 [np.broadcast_arrays(p, x)[0] for p in self.pk], 0)
3960 def _cdf(self, x):
3961 xx, xxk = np.broadcast_arrays(x[:, None], self.xk)
3962 indx = np.argmax(xxk > xx, axis=-1) - 1
3963 return self.qvals[indx]
3965 def _ppf(self, q):
3966 qq, sqq = np.broadcast_arrays(q[..., None], self.qvals)
3967 indx = argmax(sqq >= qq, axis=-1)
3968 return self.xk[indx]
3970 def _rvs(self, size=None, random_state=None):
3971 # Need to define it explicitly, otherwise .rvs() with size=None
3972 # fails due to explicit broadcasting in _ppf
3973 U = random_state.uniform(size=size)
3974 if size is None:
3975 U = np.array(U, ndmin=1)
3976 Y = self._ppf(U)[0]
3977 else:
3978 Y = self._ppf(U)
3979 return Y
3981 def _entropy(self):
3982 return stats.entropy(self.pk)
3984 def generic_moment(self, n):
3985 n = asarray(n)
3986 return np.sum(self.xk**n[np.newaxis, ...] * self.pk, axis=0)
3988 def _expect(self, fun, lb, ub, *args, **kwds):
3989 # ignore all args, just do a brute force summation
3990 supp = self.xk[(lb <= self.xk) & (self.xk <= ub)]
3991 vals = fun(supp)
3992 return np.sum(vals)
3995def _check_shape(argshape, size):
3996 """
3997 This is a utility function used by `_rvs()` in the class geninvgauss_gen.
3998 It compares the tuple argshape to the tuple size.
4000 Parameters
4001 ----------
4002 argshape : tuple of integers
4003 Shape of the arguments.
4004 size : tuple of integers or integer
4005 Size argument of rvs().
4007 Returns
4008 -------
4009 The function returns two tuples, scalar_shape and bc.
4011 scalar_shape : tuple
4012 Shape to which the 1-d array of random variates returned by
4013 _rvs_scalar() is converted when it is copied into the
4014 output array of _rvs().
4016 bc : tuple of booleans
4017 bc is an tuple the same length as size. bc[j] is True if the data
4018 associated with that index is generated in one call of _rvs_scalar().
4020 """
4021 scalar_shape = []
4022 bc = []
4023 for argdim, sizedim in zip_longest(argshape[::-1], size[::-1],
4024 fillvalue=1):
4025 if sizedim > argdim or (argdim == sizedim == 1):
4026 scalar_shape.append(sizedim)
4027 bc.append(True)
4028 else:
4029 bc.append(False)
4030 return tuple(scalar_shape[::-1]), tuple(bc[::-1])
4033def get_distribution_names(namespace_pairs, rv_base_class):
4034 """Collect names of statistical distributions and their generators.
4036 Parameters
4037 ----------
4038 namespace_pairs : sequence
4039 A snapshot of (name, value) pairs in the namespace of a module.
4040 rv_base_class : class
4041 The base class of random variable generator classes in a module.
4043 Returns
4044 -------
4045 distn_names : list of strings
4046 Names of the statistical distributions.
4047 distn_gen_names : list of strings
4048 Names of the generators of the statistical distributions.
4049 Note that these are not simply the names of the statistical
4050 distributions, with a _gen suffix added.
4052 """
4053 distn_names = []
4054 distn_gen_names = []
4055 for name, value in namespace_pairs:
4056 if name.startswith('_'):
4057 continue
4058 if name.endswith('_gen') and issubclass(value, rv_base_class):
4059 distn_gen_names.append(name)
4060 if isinstance(value, rv_base_class):
4061 distn_names.append(name)
4062 return distn_names, distn_gen_names