Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_multivariate.py: 28%
1497 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1#
2# Author: Joris Vankerschaver 2013
3#
4import math
5import numpy as np
6from numpy import asarray_chkfinite, asarray
7from numpy.lib import NumpyVersion
8import scipy.linalg
9from scipy._lib import doccer
10from scipy.special import gammaln, psi, multigammaln, xlogy, entr, betaln
11from scipy._lib._util import check_random_state
12from scipy.linalg.blas import drot
13from scipy.linalg._misc import LinAlgError
14from scipy.linalg.lapack import get_lapack_funcs
16from ._discrete_distns import binom
17from . import _mvn, _covariance, _rcont
19__all__ = ['multivariate_normal',
20 'matrix_normal',
21 'dirichlet',
22 'wishart',
23 'invwishart',
24 'multinomial',
25 'special_ortho_group',
26 'ortho_group',
27 'random_correlation',
28 'unitary_group',
29 'multivariate_t',
30 'multivariate_hypergeom',
31 'random_table',
32 'uniform_direction']
34_LOG_2PI = np.log(2 * np.pi)
35_LOG_2 = np.log(2)
36_LOG_PI = np.log(np.pi)
39_doc_random_state = """\
40seed : {None, int, np.random.RandomState, np.random.Generator}, optional
41 Used for drawing random variates.
42 If `seed` is `None`, the `~np.random.RandomState` singleton is used.
43 If `seed` is an int, a new ``RandomState`` instance is used, seeded
44 with seed.
45 If `seed` is already a ``RandomState`` or ``Generator`` instance,
46 then that object is used.
47 Default is `None`.
48"""
51def _squeeze_output(out):
52 """
53 Remove single-dimensional entries from array and convert to scalar,
54 if necessary.
55 """
56 out = out.squeeze()
57 if out.ndim == 0:
58 out = out[()]
59 return out
62def _eigvalsh_to_eps(spectrum, cond=None, rcond=None):
63 """Determine which eigenvalues are "small" given the spectrum.
65 This is for compatibility across various linear algebra functions
66 that should agree about whether or not a Hermitian matrix is numerically
67 singular and what is its numerical matrix rank.
68 This is designed to be compatible with scipy.linalg.pinvh.
70 Parameters
71 ----------
72 spectrum : 1d ndarray
73 Array of eigenvalues of a Hermitian matrix.
74 cond, rcond : float, optional
75 Cutoff for small eigenvalues.
76 Singular values smaller than rcond * largest_eigenvalue are
77 considered zero.
78 If None or -1, suitable machine precision is used.
80 Returns
81 -------
82 eps : float
83 Magnitude cutoff for numerical negligibility.
85 """
86 if rcond is not None:
87 cond = rcond
88 if cond in [None, -1]:
89 t = spectrum.dtype.char.lower()
90 factor = {'f': 1E3, 'd': 1E6}
91 cond = factor[t] * np.finfo(t).eps
92 eps = cond * np.max(abs(spectrum))
93 return eps
96def _pinv_1d(v, eps=1e-5):
97 """A helper function for computing the pseudoinverse.
99 Parameters
100 ----------
101 v : iterable of numbers
102 This may be thought of as a vector of eigenvalues or singular values.
103 eps : float
104 Values with magnitude no greater than eps are considered negligible.
106 Returns
107 -------
108 v_pinv : 1d float ndarray
109 A vector of pseudo-inverted numbers.
111 """
112 return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float)
115class _PSD:
116 """
117 Compute coordinated functions of a symmetric positive semidefinite matrix.
119 This class addresses two issues. Firstly it allows the pseudoinverse,
120 the logarithm of the pseudo-determinant, and the rank of the matrix
121 to be computed using one call to eigh instead of three.
122 Secondly it allows these functions to be computed in a way
123 that gives mutually compatible results.
124 All of the functions are computed with a common understanding as to
125 which of the eigenvalues are to be considered negligibly small.
126 The functions are designed to coordinate with scipy.linalg.pinvh()
127 but not necessarily with np.linalg.det() or with np.linalg.matrix_rank().
129 Parameters
130 ----------
131 M : array_like
132 Symmetric positive semidefinite matrix (2-D).
133 cond, rcond : float, optional
134 Cutoff for small eigenvalues.
135 Singular values smaller than rcond * largest_eigenvalue are
136 considered zero.
137 If None or -1, suitable machine precision is used.
138 lower : bool, optional
139 Whether the pertinent array data is taken from the lower
140 or upper triangle of M. (Default: lower)
141 check_finite : bool, optional
142 Whether to check that the input matrices contain only finite
143 numbers. Disabling may give a performance gain, but may result
144 in problems (crashes, non-termination) if the inputs do contain
145 infinities or NaNs.
146 allow_singular : bool, optional
147 Whether to allow a singular matrix. (Default: True)
149 Notes
150 -----
151 The arguments are similar to those of scipy.linalg.pinvh().
153 """
155 def __init__(self, M, cond=None, rcond=None, lower=True,
156 check_finite=True, allow_singular=True):
157 self._M = np.asarray(M)
159 # Compute the symmetric eigendecomposition.
160 # Note that eigh takes care of array conversion, chkfinite,
161 # and assertion that the matrix is square.
162 s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite)
164 eps = _eigvalsh_to_eps(s, cond, rcond)
165 if np.min(s) < -eps:
166 msg = "The input matrix must be symmetric positive semidefinite."
167 raise ValueError(msg)
168 d = s[s > eps]
169 if len(d) < len(s) and not allow_singular:
170 msg = ("When `allow_singular is False`, the input matrix must be "
171 "symmetric positive definite.")
172 raise np.linalg.LinAlgError(msg)
173 s_pinv = _pinv_1d(s, eps)
174 U = np.multiply(u, np.sqrt(s_pinv))
176 # Save the eigenvector basis, and tolerance for testing support
177 self.eps = 1e3*eps
178 self.V = u[:, s <= eps]
180 # Initialize the eagerly precomputed attributes.
181 self.rank = len(d)
182 self.U = U
183 self.log_pdet = np.sum(np.log(d))
185 # Initialize attributes to be lazily computed.
186 self._pinv = None
188 def _support_mask(self, x):
189 """
190 Check whether x lies in the support of the distribution.
191 """
192 residual = np.linalg.norm(x @ self.V, axis=-1)
193 in_support = residual < self.eps
194 return in_support
196 @property
197 def pinv(self):
198 if self._pinv is None:
199 self._pinv = np.dot(self.U, self.U.T)
200 return self._pinv
203class multi_rv_generic:
204 """
205 Class which encapsulates common functionality between all multivariate
206 distributions.
207 """
208 def __init__(self, seed=None):
209 super().__init__()
210 self._random_state = check_random_state(seed)
212 @property
213 def random_state(self):
214 """ Get or set the Generator object for generating random variates.
216 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
217 singleton is used.
218 If `seed` is an int, a new ``RandomState`` instance is used,
219 seeded with `seed`.
220 If `seed` is already a ``Generator`` or ``RandomState`` instance then
221 that instance is used.
223 """
224 return self._random_state
226 @random_state.setter
227 def random_state(self, seed):
228 self._random_state = check_random_state(seed)
230 def _get_random_state(self, random_state):
231 if random_state is not None:
232 return check_random_state(random_state)
233 else:
234 return self._random_state
237class multi_rv_frozen:
238 """
239 Class which encapsulates common functionality between all frozen
240 multivariate distributions.
241 """
242 @property
243 def random_state(self):
244 return self._dist._random_state
246 @random_state.setter
247 def random_state(self, seed):
248 self._dist._random_state = check_random_state(seed)
251_mvn_doc_default_callparams = """\
252mean : array_like, default: ``[0]``
253 Mean of the distribution.
254cov : array_like or `Covariance`, default: ``[1]``
255 Symmetric positive (semi)definite covariance matrix of the distribution.
256allow_singular : bool, default: ``False``
257 Whether to allow a singular covariance matrix. This is ignored if `cov` is
258 a `Covariance` object.
259"""
261_mvn_doc_callparams_note = """\
262Setting the parameter `mean` to `None` is equivalent to having `mean`
263be the zero-vector. The parameter `cov` can be a scalar, in which case
264the covariance matrix is the identity times that value, a vector of
265diagonal entries for the covariance matrix, a two-dimensional array_like,
266or a `Covariance` object.
267"""
269_mvn_doc_frozen_callparams = ""
271_mvn_doc_frozen_callparams_note = """\
272See class definition for a detailed description of parameters."""
274mvn_docdict_params = {
275 '_mvn_doc_default_callparams': _mvn_doc_default_callparams,
276 '_mvn_doc_callparams_note': _mvn_doc_callparams_note,
277 '_doc_random_state': _doc_random_state
278}
280mvn_docdict_noparams = {
281 '_mvn_doc_default_callparams': _mvn_doc_frozen_callparams,
282 '_mvn_doc_callparams_note': _mvn_doc_frozen_callparams_note,
283 '_doc_random_state': _doc_random_state
284}
287class multivariate_normal_gen(multi_rv_generic):
288 r"""A multivariate normal random variable.
290 The `mean` keyword specifies the mean. The `cov` keyword specifies the
291 covariance matrix.
293 Methods
294 -------
295 pdf(x, mean=None, cov=1, allow_singular=False)
296 Probability density function.
297 logpdf(x, mean=None, cov=1, allow_singular=False)
298 Log of the probability density function.
299 cdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5, lower_limit=None) # noqa
300 Cumulative distribution function.
301 logcdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5)
302 Log of the cumulative distribution function.
303 rvs(mean=None, cov=1, size=1, random_state=None)
304 Draw random samples from a multivariate normal distribution.
305 entropy()
306 Compute the differential entropy of the multivariate normal.
308 Parameters
309 ----------
310 %(_mvn_doc_default_callparams)s
311 %(_doc_random_state)s
313 Notes
314 -----
315 %(_mvn_doc_callparams_note)s
317 The covariance matrix `cov` may be an instance of a subclass of
318 `Covariance`, e.g. `scipy.stats.CovViaPrecision`. If so, `allow_singular`
319 is ignored.
321 Otherwise, `cov` must be a symmetric positive semidefinite
322 matrix when `allow_singular` is True; it must be (strictly) positive
323 definite when `allow_singular` is False.
324 Symmetry is not checked; only the lower triangular portion is used.
325 The determinant and inverse of `cov` are computed
326 as the pseudo-determinant and pseudo-inverse, respectively, so
327 that `cov` does not need to have full rank.
329 The probability density function for `multivariate_normal` is
331 .. math::
333 f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}}
334 \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right),
336 where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix,
337 :math:`k` the rank of :math:`\Sigma`. In case of singular :math:`\Sigma`,
338 SciPy extends this definition according to [1]_.
340 .. versionadded:: 0.14.0
342 References
343 ----------
344 .. [1] Multivariate Normal Distribution - Degenerate Case, Wikipedia,
345 https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case
347 Examples
348 --------
349 >>> import numpy as np
350 >>> import matplotlib.pyplot as plt
351 >>> from scipy.stats import multivariate_normal
353 >>> x = np.linspace(0, 5, 10, endpoint=False)
354 >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y
355 array([ 0.00108914, 0.01033349, 0.05946514, 0.20755375, 0.43939129,
356 0.56418958, 0.43939129, 0.20755375, 0.05946514, 0.01033349])
357 >>> fig1 = plt.figure()
358 >>> ax = fig1.add_subplot(111)
359 >>> ax.plot(x, y)
360 >>> plt.show()
362 Alternatively, the object may be called (as a function) to fix the mean
363 and covariance parameters, returning a "frozen" multivariate normal
364 random variable:
366 >>> rv = multivariate_normal(mean=None, cov=1, allow_singular=False)
367 >>> # Frozen object with the same methods but holding the given
368 >>> # mean and covariance fixed.
370 The input quantiles can be any shape of array, as long as the last
371 axis labels the components. This allows us for instance to
372 display the frozen pdf for a non-isotropic random variable in 2D as
373 follows:
375 >>> x, y = np.mgrid[-1:1:.01, -1:1:.01]
376 >>> pos = np.dstack((x, y))
377 >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]])
378 >>> fig2 = plt.figure()
379 >>> ax2 = fig2.add_subplot(111)
380 >>> ax2.contourf(x, y, rv.pdf(pos))
382 """
384 def __init__(self, seed=None):
385 super().__init__(seed)
386 self.__doc__ = doccer.docformat(self.__doc__, mvn_docdict_params)
388 def __call__(self, mean=None, cov=1, allow_singular=False, seed=None):
389 """Create a frozen multivariate normal distribution.
391 See `multivariate_normal_frozen` for more information.
392 """
393 return multivariate_normal_frozen(mean, cov,
394 allow_singular=allow_singular,
395 seed=seed)
397 def _process_parameters(self, mean, cov, allow_singular=True):
398 """
399 Infer dimensionality from mean or covariance matrix, ensure that
400 mean and covariance are full vector resp. matrix.
401 """
402 if isinstance(cov, _covariance.Covariance):
403 return self._process_parameters_Covariance(mean, cov)
404 else:
405 # Before `Covariance` classes were introduced,
406 # `multivariate_normal` accepted plain arrays as `cov` and used the
407 # following input validation. To avoid disturbing the behavior of
408 # `multivariate_normal` when plain arrays are used, we use the
409 # original input validation here.
410 dim, mean, cov = self._process_parameters_psd(None, mean, cov)
411 # After input validation, some methods then processed the arrays
412 # with a `_PSD` object and used that to perform computation.
413 # To avoid branching statements in each method depending on whether
414 # `cov` is an array or `Covariance` object, we always process the
415 # array with `_PSD`, and then use wrapper that satisfies the
416 # `Covariance` interface, `CovViaPSD`.
417 psd = _PSD(cov, allow_singular=allow_singular)
418 cov_object = _covariance.CovViaPSD(psd)
419 return dim, mean, cov_object
421 def _process_parameters_Covariance(self, mean, cov):
422 dim = cov.shape[-1]
423 mean = np.array([0.]) if mean is None else mean
424 message = (f"`cov` represents a covariance matrix in {dim} dimensions,"
425 f"and so `mean` must be broadcastable to shape {(dim,)}")
426 try:
427 mean = np.broadcast_to(mean, dim)
428 except ValueError as e:
429 raise ValueError(message) from e
430 return dim, mean, cov
432 def _process_parameters_psd(self, dim, mean, cov):
433 # Try to infer dimensionality
434 if dim is None:
435 if mean is None:
436 if cov is None:
437 dim = 1
438 else:
439 cov = np.asarray(cov, dtype=float)
440 if cov.ndim < 2:
441 dim = 1
442 else:
443 dim = cov.shape[0]
444 else:
445 mean = np.asarray(mean, dtype=float)
446 dim = mean.size
447 else:
448 if not np.isscalar(dim):
449 raise ValueError("Dimension of random variable must be "
450 "a scalar.")
452 # Check input sizes and return full arrays for mean and cov if
453 # necessary
454 if mean is None:
455 mean = np.zeros(dim)
456 mean = np.asarray(mean, dtype=float)
458 if cov is None:
459 cov = 1.0
460 cov = np.asarray(cov, dtype=float)
462 if dim == 1:
463 mean = mean.reshape(1)
464 cov = cov.reshape(1, 1)
466 if mean.ndim != 1 or mean.shape[0] != dim:
467 raise ValueError("Array 'mean' must be a vector of length %d." %
468 dim)
469 if cov.ndim == 0:
470 cov = cov * np.eye(dim)
471 elif cov.ndim == 1:
472 cov = np.diag(cov)
473 elif cov.ndim == 2 and cov.shape != (dim, dim):
474 rows, cols = cov.shape
475 if rows != cols:
476 msg = ("Array 'cov' must be square if it is two dimensional,"
477 " but cov.shape = %s." % str(cov.shape))
478 else:
479 msg = ("Dimension mismatch: array 'cov' is of shape %s,"
480 " but 'mean' is a vector of length %d.")
481 msg = msg % (str(cov.shape), len(mean))
482 raise ValueError(msg)
483 elif cov.ndim > 2:
484 raise ValueError("Array 'cov' must be at most two-dimensional,"
485 " but cov.ndim = %d" % cov.ndim)
487 return dim, mean, cov
489 def _process_quantiles(self, x, dim):
490 """
491 Adjust quantiles array so that last axis labels the components of
492 each data point.
493 """
494 x = np.asarray(x, dtype=float)
496 if x.ndim == 0:
497 x = x[np.newaxis]
498 elif x.ndim == 1:
499 if dim == 1:
500 x = x[:, np.newaxis]
501 else:
502 x = x[np.newaxis, :]
504 return x
506 def _logpdf(self, x, mean, cov_object):
507 """Log of the multivariate normal probability density function.
509 Parameters
510 ----------
511 x : ndarray
512 Points at which to evaluate the log of the probability
513 density function
514 mean : ndarray
515 Mean of the distribution
516 cov_object : Covariance
517 An object representing the Covariance matrix
519 Notes
520 -----
521 As this function does no argument checking, it should not be
522 called directly; use 'logpdf' instead.
524 """
525 log_det_cov, rank = cov_object.log_pdet, cov_object.rank
526 dev = x - mean
527 if dev.ndim > 1:
528 log_det_cov = log_det_cov[..., np.newaxis]
529 rank = rank[..., np.newaxis]
530 maha = np.sum(np.square(cov_object.whiten(dev)), axis=-1)
531 return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)
533 def logpdf(self, x, mean=None, cov=1, allow_singular=False):
534 """Log of the multivariate normal probability density function.
536 Parameters
537 ----------
538 x : array_like
539 Quantiles, with the last axis of `x` denoting the components.
540 %(_mvn_doc_default_callparams)s
542 Returns
543 -------
544 pdf : ndarray or scalar
545 Log of the probability density function evaluated at `x`
547 Notes
548 -----
549 %(_mvn_doc_callparams_note)s
551 """
552 params = self._process_parameters(mean, cov, allow_singular)
553 dim, mean, cov_object = params
554 x = self._process_quantiles(x, dim)
555 out = self._logpdf(x, mean, cov_object)
556 if np.any(cov_object.rank < dim):
557 out_of_bounds = ~cov_object._support_mask(x-mean)
558 out[out_of_bounds] = -np.inf
559 return _squeeze_output(out)
561 def pdf(self, x, mean=None, cov=1, allow_singular=False):
562 """Multivariate normal probability density function.
564 Parameters
565 ----------
566 x : array_like
567 Quantiles, with the last axis of `x` denoting the components.
568 %(_mvn_doc_default_callparams)s
570 Returns
571 -------
572 pdf : ndarray or scalar
573 Probability density function evaluated at `x`
575 Notes
576 -----
577 %(_mvn_doc_callparams_note)s
579 """
580 params = self._process_parameters(mean, cov, allow_singular)
581 dim, mean, cov_object = params
582 x = self._process_quantiles(x, dim)
583 out = np.exp(self._logpdf(x, mean, cov_object))
584 if np.any((cov_object.rank < dim)):
585 out_of_bounds = ~cov_object._support_mask(x-mean)
586 out[out_of_bounds] = 0.0
587 return _squeeze_output(out)
589 def _cdf(self, x, mean, cov, maxpts, abseps, releps, lower_limit):
590 """Multivariate normal cumulative distribution function.
592 Parameters
593 ----------
594 x : ndarray
595 Points at which to evaluate the cumulative distribution function.
596 mean : ndarray
597 Mean of the distribution
598 cov : array_like
599 Covariance matrix of the distribution
600 maxpts : integer
601 The maximum number of points to use for integration
602 abseps : float
603 Absolute error tolerance
604 releps : float
605 Relative error tolerance
606 lower_limit : array_like, optional
607 Lower limit of integration of the cumulative distribution function.
608 Default is negative infinity. Must be broadcastable with `x`.
610 Notes
611 -----
612 As this function does no argument checking, it should not be
613 called directly; use 'cdf' instead.
616 .. versionadded:: 1.0.0
618 """
619 lower = (np.full(mean.shape, -np.inf)
620 if lower_limit is None else lower_limit)
621 # In 2d, _mvn.mvnun accepts input in which `lower` bound elements
622 # are greater than `x`. Not so in other dimensions. Fix this by
623 # ensuring that lower bounds are indeed lower when passed, then
624 # set signs of resulting CDF manually.
625 b, a = np.broadcast_arrays(x, lower)
626 i_swap = b < a
627 signs = (-1)**(i_swap.sum(axis=-1)) # odd # of swaps -> negative
628 a, b = a.copy(), b.copy()
629 a[i_swap], b[i_swap] = b[i_swap], a[i_swap]
630 n = x.shape[-1]
631 limits = np.concatenate((a, b), axis=-1)
633 # mvnun expects 1-d arguments, so process points sequentially
634 def func1d(limits):
635 return _mvn.mvnun(limits[:n], limits[n:], mean, cov,
636 maxpts, abseps, releps)[0]
638 out = np.apply_along_axis(func1d, -1, limits) * signs
639 return _squeeze_output(out)
641 def logcdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,
642 abseps=1e-5, releps=1e-5, *, lower_limit=None):
643 """Log of the multivariate normal cumulative distribution function.
645 Parameters
646 ----------
647 x : array_like
648 Quantiles, with the last axis of `x` denoting the components.
649 %(_mvn_doc_default_callparams)s
650 maxpts : integer, optional
651 The maximum number of points to use for integration
652 (default `1000000*dim`)
653 abseps : float, optional
654 Absolute error tolerance (default 1e-5)
655 releps : float, optional
656 Relative error tolerance (default 1e-5)
657 lower_limit : array_like, optional
658 Lower limit of integration of the cumulative distribution function.
659 Default is negative infinity. Must be broadcastable with `x`.
661 Returns
662 -------
663 cdf : ndarray or scalar
664 Log of the cumulative distribution function evaluated at `x`
666 Notes
667 -----
668 %(_mvn_doc_callparams_note)s
670 .. versionadded:: 1.0.0
672 """
673 params = self._process_parameters(mean, cov, allow_singular)
674 dim, mean, cov_object = params
675 cov = cov_object.covariance
676 x = self._process_quantiles(x, dim)
677 if not maxpts:
678 maxpts = 1000000 * dim
679 cdf = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit)
680 # the log of a negative real is complex, and cdf can be negative
681 # if lower limit is greater than upper limit
682 cdf = cdf + 0j if np.any(cdf < 0) else cdf
683 out = np.log(cdf)
684 return out
686 def cdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,
687 abseps=1e-5, releps=1e-5, *, lower_limit=None):
688 """Multivariate normal cumulative distribution function.
690 Parameters
691 ----------
692 x : array_like
693 Quantiles, with the last axis of `x` denoting the components.
694 %(_mvn_doc_default_callparams)s
695 maxpts : integer, optional
696 The maximum number of points to use for integration
697 (default `1000000*dim`)
698 abseps : float, optional
699 Absolute error tolerance (default 1e-5)
700 releps : float, optional
701 Relative error tolerance (default 1e-5)
702 lower_limit : array_like, optional
703 Lower limit of integration of the cumulative distribution function.
704 Default is negative infinity. Must be broadcastable with `x`.
706 Returns
707 -------
708 cdf : ndarray or scalar
709 Cumulative distribution function evaluated at `x`
711 Notes
712 -----
713 %(_mvn_doc_callparams_note)s
715 .. versionadded:: 1.0.0
717 """
718 params = self._process_parameters(mean, cov, allow_singular)
719 dim, mean, cov_object = params
720 cov = cov_object.covariance
721 x = self._process_quantiles(x, dim)
722 if not maxpts:
723 maxpts = 1000000 * dim
724 out = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit)
725 return out
727 def rvs(self, mean=None, cov=1, size=1, random_state=None):
728 """Draw random samples from a multivariate normal distribution.
730 Parameters
731 ----------
732 %(_mvn_doc_default_callparams)s
733 size : integer, optional
734 Number of samples to draw (default 1).
735 %(_doc_random_state)s
737 Returns
738 -------
739 rvs : ndarray or scalar
740 Random variates of size (`size`, `N`), where `N` is the
741 dimension of the random variable.
743 Notes
744 -----
745 %(_mvn_doc_callparams_note)s
747 """
748 dim, mean, cov_object = self._process_parameters(mean, cov)
749 random_state = self._get_random_state(random_state)
751 if isinstance(cov_object, _covariance.CovViaPSD):
752 cov = cov_object.covariance
753 out = random_state.multivariate_normal(mean, cov, size)
754 out = _squeeze_output(out)
755 else:
756 size = size or tuple()
757 if not np.iterable(size):
758 size = (size,)
759 shape = tuple(size) + (cov_object.shape[-1],)
760 x = random_state.normal(size=shape)
761 out = mean + cov_object.colorize(x)
762 return out
764 def entropy(self, mean=None, cov=1):
765 """Compute the differential entropy of the multivariate normal.
767 Parameters
768 ----------
769 %(_mvn_doc_default_callparams)s
771 Returns
772 -------
773 h : scalar
774 Entropy of the multivariate normal distribution
776 Notes
777 -----
778 %(_mvn_doc_callparams_note)s
780 """
781 dim, mean, cov_object = self._process_parameters(mean, cov)
782 return 0.5 * (cov_object.rank * (_LOG_2PI + 1) + cov_object.log_pdet)
785multivariate_normal = multivariate_normal_gen()
788class multivariate_normal_frozen(multi_rv_frozen):
789 def __init__(self, mean=None, cov=1, allow_singular=False, seed=None,
790 maxpts=None, abseps=1e-5, releps=1e-5):
791 """Create a frozen multivariate normal distribution.
793 Parameters
794 ----------
795 mean : array_like, default: ``[0]``
796 Mean of the distribution.
797 cov : array_like, default: ``[1]``
798 Symmetric positive (semi)definite covariance matrix of the
799 distribution.
800 allow_singular : bool, default: ``False``
801 Whether to allow a singular covariance matrix.
802 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
803 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
804 singleton is used.
805 If `seed` is an int, a new ``RandomState`` instance is used,
806 seeded with `seed`.
807 If `seed` is already a ``Generator`` or ``RandomState`` instance
808 then that instance is used.
809 maxpts : integer, optional
810 The maximum number of points to use for integration of the
811 cumulative distribution function (default `1000000*dim`)
812 abseps : float, optional
813 Absolute error tolerance for the cumulative distribution function
814 (default 1e-5)
815 releps : float, optional
816 Relative error tolerance for the cumulative distribution function
817 (default 1e-5)
819 Examples
820 --------
821 When called with the default parameters, this will create a 1D random
822 variable with mean 0 and covariance 1:
824 >>> from scipy.stats import multivariate_normal
825 >>> r = multivariate_normal()
826 >>> r.mean
827 array([ 0.])
828 >>> r.cov
829 array([[1.]])
831 """
832 self._dist = multivariate_normal_gen(seed)
833 self.dim, self.mean, self.cov_object = (
834 self._dist._process_parameters(mean, cov, allow_singular))
835 self.allow_singular = allow_singular or self.cov_object._allow_singular
836 if not maxpts:
837 maxpts = 1000000 * self.dim
838 self.maxpts = maxpts
839 self.abseps = abseps
840 self.releps = releps
842 @property
843 def cov(self):
844 return self.cov_object.covariance
846 def logpdf(self, x):
847 x = self._dist._process_quantiles(x, self.dim)
848 out = self._dist._logpdf(x, self.mean, self.cov_object)
849 if np.any(self.cov_object.rank < self.dim):
850 out_of_bounds = ~self.cov_object._support_mask(x-self.mean)
851 out[out_of_bounds] = -np.inf
852 return _squeeze_output(out)
854 def pdf(self, x):
855 return np.exp(self.logpdf(x))
857 def logcdf(self, x, *, lower_limit=None):
858 cdf = self.cdf(x, lower_limit=lower_limit)
859 # the log of a negative real is complex, and cdf can be negative
860 # if lower limit is greater than upper limit
861 cdf = cdf + 0j if np.any(cdf < 0) else cdf
862 out = np.log(cdf)
863 return out
865 def cdf(self, x, *, lower_limit=None):
866 x = self._dist._process_quantiles(x, self.dim)
867 out = self._dist._cdf(x, self.mean, self.cov_object.covariance,
868 self.maxpts, self.abseps, self.releps,
869 lower_limit)
870 return _squeeze_output(out)
872 def rvs(self, size=1, random_state=None):
873 return self._dist.rvs(self.mean, self.cov_object, size, random_state)
875 def entropy(self):
876 """Computes the differential entropy of the multivariate normal.
878 Returns
879 -------
880 h : scalar
881 Entropy of the multivariate normal distribution
883 """
884 log_pdet = self.cov_object.log_pdet
885 rank = self.cov_object.rank
886 return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet)
889# Set frozen generator docstrings from corresponding docstrings in
890# multivariate_normal_gen and fill in default strings in class docstrings
891for name in ['logpdf', 'pdf', 'logcdf', 'cdf', 'rvs']:
892 method = multivariate_normal_gen.__dict__[name]
893 method_frozen = multivariate_normal_frozen.__dict__[name]
894 method_frozen.__doc__ = doccer.docformat(method.__doc__,
895 mvn_docdict_noparams)
896 method.__doc__ = doccer.docformat(method.__doc__, mvn_docdict_params)
898_matnorm_doc_default_callparams = """\
899mean : array_like, optional
900 Mean of the distribution (default: `None`)
901rowcov : array_like, optional
902 Among-row covariance matrix of the distribution (default: `1`)
903colcov : array_like, optional
904 Among-column covariance matrix of the distribution (default: `1`)
905"""
907_matnorm_doc_callparams_note = """\
908If `mean` is set to `None` then a matrix of zeros is used for the mean.
909The dimensions of this matrix are inferred from the shape of `rowcov` and
910`colcov`, if these are provided, or set to `1` if ambiguous.
912`rowcov` and `colcov` can be two-dimensional array_likes specifying the
913covariance matrices directly. Alternatively, a one-dimensional array will
914be be interpreted as the entries of a diagonal matrix, and a scalar or
915zero-dimensional array will be interpreted as this value times the
916identity matrix.
917"""
919_matnorm_doc_frozen_callparams = ""
921_matnorm_doc_frozen_callparams_note = """\
922See class definition for a detailed description of parameters."""
924matnorm_docdict_params = {
925 '_matnorm_doc_default_callparams': _matnorm_doc_default_callparams,
926 '_matnorm_doc_callparams_note': _matnorm_doc_callparams_note,
927 '_doc_random_state': _doc_random_state
928}
930matnorm_docdict_noparams = {
931 '_matnorm_doc_default_callparams': _matnorm_doc_frozen_callparams,
932 '_matnorm_doc_callparams_note': _matnorm_doc_frozen_callparams_note,
933 '_doc_random_state': _doc_random_state
934}
937class matrix_normal_gen(multi_rv_generic):
938 r"""A matrix normal random variable.
940 The `mean` keyword specifies the mean. The `rowcov` keyword specifies the
941 among-row covariance matrix. The 'colcov' keyword specifies the
942 among-column covariance matrix.
944 Methods
945 -------
946 pdf(X, mean=None, rowcov=1, colcov=1)
947 Probability density function.
948 logpdf(X, mean=None, rowcov=1, colcov=1)
949 Log of the probability density function.
950 rvs(mean=None, rowcov=1, colcov=1, size=1, random_state=None)
951 Draw random samples.
953 Parameters
954 ----------
955 %(_matnorm_doc_default_callparams)s
956 %(_doc_random_state)s
958 Notes
959 -----
960 %(_matnorm_doc_callparams_note)s
962 The covariance matrices specified by `rowcov` and `colcov` must be
963 (symmetric) positive definite. If the samples in `X` are
964 :math:`m \times n`, then `rowcov` must be :math:`m \times m` and
965 `colcov` must be :math:`n \times n`. `mean` must be the same shape as `X`.
967 The probability density function for `matrix_normal` is
969 .. math::
971 f(X) = (2 \pi)^{-\frac{mn}{2}}|U|^{-\frac{n}{2}} |V|^{-\frac{m}{2}}
972 \exp\left( -\frac{1}{2} \mathrm{Tr}\left[ U^{-1} (X-M) V^{-1}
973 (X-M)^T \right] \right),
975 where :math:`M` is the mean, :math:`U` the among-row covariance matrix,
976 :math:`V` the among-column covariance matrix.
978 The `allow_singular` behaviour of the `multivariate_normal`
979 distribution is not currently supported. Covariance matrices must be
980 full rank.
982 The `matrix_normal` distribution is closely related to the
983 `multivariate_normal` distribution. Specifically, :math:`\mathrm{Vec}(X)`
984 (the vector formed by concatenating the columns of :math:`X`) has a
985 multivariate normal distribution with mean :math:`\mathrm{Vec}(M)`
986 and covariance :math:`V \otimes U` (where :math:`\otimes` is the Kronecker
987 product). Sampling and pdf evaluation are
988 :math:`\mathcal{O}(m^3 + n^3 + m^2 n + m n^2)` for the matrix normal, but
989 :math:`\mathcal{O}(m^3 n^3)` for the equivalent multivariate normal,
990 making this equivalent form algorithmically inefficient.
992 .. versionadded:: 0.17.0
994 Examples
995 --------
997 >>> import numpy as np
998 >>> from scipy.stats import matrix_normal
1000 >>> M = np.arange(6).reshape(3,2); M
1001 array([[0, 1],
1002 [2, 3],
1003 [4, 5]])
1004 >>> U = np.diag([1,2,3]); U
1005 array([[1, 0, 0],
1006 [0, 2, 0],
1007 [0, 0, 3]])
1008 >>> V = 0.3*np.identity(2); V
1009 array([[ 0.3, 0. ],
1010 [ 0. , 0.3]])
1011 >>> X = M + 0.1; X
1012 array([[ 0.1, 1.1],
1013 [ 2.1, 3.1],
1014 [ 4.1, 5.1]])
1015 >>> matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V)
1016 0.023410202050005054
1018 >>> # Equivalent multivariate normal
1019 >>> from scipy.stats import multivariate_normal
1020 >>> vectorised_X = X.T.flatten()
1021 >>> equiv_mean = M.T.flatten()
1022 >>> equiv_cov = np.kron(V,U)
1023 >>> multivariate_normal.pdf(vectorised_X, mean=equiv_mean, cov=equiv_cov)
1024 0.023410202050005054
1026 Alternatively, the object may be called (as a function) to fix the mean
1027 and covariance parameters, returning a "frozen" matrix normal
1028 random variable:
1030 >>> rv = matrix_normal(mean=None, rowcov=1, colcov=1)
1031 >>> # Frozen object with the same methods but holding the given
1032 >>> # mean and covariance fixed.
1034 """
1036 def __init__(self, seed=None):
1037 super().__init__(seed)
1038 self.__doc__ = doccer.docformat(self.__doc__, matnorm_docdict_params)
1040 def __call__(self, mean=None, rowcov=1, colcov=1, seed=None):
1041 """Create a frozen matrix normal distribution.
1043 See `matrix_normal_frozen` for more information.
1045 """
1046 return matrix_normal_frozen(mean, rowcov, colcov, seed=seed)
1048 def _process_parameters(self, mean, rowcov, colcov):
1049 """
1050 Infer dimensionality from mean or covariance matrices. Handle
1051 defaults. Ensure compatible dimensions.
1052 """
1054 # Process mean
1055 if mean is not None:
1056 mean = np.asarray(mean, dtype=float)
1057 meanshape = mean.shape
1058 if len(meanshape) != 2:
1059 raise ValueError("Array `mean` must be two dimensional.")
1060 if np.any(meanshape == 0):
1061 raise ValueError("Array `mean` has invalid shape.")
1063 # Process among-row covariance
1064 rowcov = np.asarray(rowcov, dtype=float)
1065 if rowcov.ndim == 0:
1066 if mean is not None:
1067 rowcov = rowcov * np.identity(meanshape[0])
1068 else:
1069 rowcov = rowcov * np.identity(1)
1070 elif rowcov.ndim == 1:
1071 rowcov = np.diag(rowcov)
1072 rowshape = rowcov.shape
1073 if len(rowshape) != 2:
1074 raise ValueError("`rowcov` must be a scalar or a 2D array.")
1075 if rowshape[0] != rowshape[1]:
1076 raise ValueError("Array `rowcov` must be square.")
1077 if rowshape[0] == 0:
1078 raise ValueError("Array `rowcov` has invalid shape.")
1079 numrows = rowshape[0]
1081 # Process among-column covariance
1082 colcov = np.asarray(colcov, dtype=float)
1083 if colcov.ndim == 0:
1084 if mean is not None:
1085 colcov = colcov * np.identity(meanshape[1])
1086 else:
1087 colcov = colcov * np.identity(1)
1088 elif colcov.ndim == 1:
1089 colcov = np.diag(colcov)
1090 colshape = colcov.shape
1091 if len(colshape) != 2:
1092 raise ValueError("`colcov` must be a scalar or a 2D array.")
1093 if colshape[0] != colshape[1]:
1094 raise ValueError("Array `colcov` must be square.")
1095 if colshape[0] == 0:
1096 raise ValueError("Array `colcov` has invalid shape.")
1097 numcols = colshape[0]
1099 # Ensure mean and covariances compatible
1100 if mean is not None:
1101 if meanshape[0] != numrows:
1102 raise ValueError("Arrays `mean` and `rowcov` must have the "
1103 "same number of rows.")
1104 if meanshape[1] != numcols:
1105 raise ValueError("Arrays `mean` and `colcov` must have the "
1106 "same number of columns.")
1107 else:
1108 mean = np.zeros((numrows, numcols))
1110 dims = (numrows, numcols)
1112 return dims, mean, rowcov, colcov
1114 def _process_quantiles(self, X, dims):
1115 """
1116 Adjust quantiles array so that last two axes labels the components of
1117 each data point.
1118 """
1119 X = np.asarray(X, dtype=float)
1120 if X.ndim == 2:
1121 X = X[np.newaxis, :]
1122 if X.shape[-2:] != dims:
1123 raise ValueError("The shape of array `X` is not compatible "
1124 "with the distribution parameters.")
1125 return X
1127 def _logpdf(self, dims, X, mean, row_prec_rt, log_det_rowcov,
1128 col_prec_rt, log_det_colcov):
1129 """Log of the matrix normal probability density function.
1131 Parameters
1132 ----------
1133 dims : tuple
1134 Dimensions of the matrix variates
1135 X : ndarray
1136 Points at which to evaluate the log of the probability
1137 density function
1138 mean : ndarray
1139 Mean of the distribution
1140 row_prec_rt : ndarray
1141 A decomposition such that np.dot(row_prec_rt, row_prec_rt.T)
1142 is the inverse of the among-row covariance matrix
1143 log_det_rowcov : float
1144 Logarithm of the determinant of the among-row covariance matrix
1145 col_prec_rt : ndarray
1146 A decomposition such that np.dot(col_prec_rt, col_prec_rt.T)
1147 is the inverse of the among-column covariance matrix
1148 log_det_colcov : float
1149 Logarithm of the determinant of the among-column covariance matrix
1151 Notes
1152 -----
1153 As this function does no argument checking, it should not be
1154 called directly; use 'logpdf' instead.
1156 """
1157 numrows, numcols = dims
1158 roll_dev = np.moveaxis(X-mean, -1, 0)
1159 scale_dev = np.tensordot(col_prec_rt.T,
1160 np.dot(roll_dev, row_prec_rt), 1)
1161 maha = np.sum(np.sum(np.square(scale_dev), axis=-1), axis=0)
1162 return -0.5 * (numrows*numcols*_LOG_2PI + numcols*log_det_rowcov
1163 + numrows*log_det_colcov + maha)
1165 def logpdf(self, X, mean=None, rowcov=1, colcov=1):
1166 """Log of the matrix normal probability density function.
1168 Parameters
1169 ----------
1170 X : array_like
1171 Quantiles, with the last two axes of `X` denoting the components.
1172 %(_matnorm_doc_default_callparams)s
1174 Returns
1175 -------
1176 logpdf : ndarray
1177 Log of the probability density function evaluated at `X`
1179 Notes
1180 -----
1181 %(_matnorm_doc_callparams_note)s
1183 """
1184 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,
1185 colcov)
1186 X = self._process_quantiles(X, dims)
1187 rowpsd = _PSD(rowcov, allow_singular=False)
1188 colpsd = _PSD(colcov, allow_singular=False)
1189 out = self._logpdf(dims, X, mean, rowpsd.U, rowpsd.log_pdet, colpsd.U,
1190 colpsd.log_pdet)
1191 return _squeeze_output(out)
1193 def pdf(self, X, mean=None, rowcov=1, colcov=1):
1194 """Matrix normal probability density function.
1196 Parameters
1197 ----------
1198 X : array_like
1199 Quantiles, with the last two axes of `X` denoting the components.
1200 %(_matnorm_doc_default_callparams)s
1202 Returns
1203 -------
1204 pdf : ndarray
1205 Probability density function evaluated at `X`
1207 Notes
1208 -----
1209 %(_matnorm_doc_callparams_note)s
1211 """
1212 return np.exp(self.logpdf(X, mean, rowcov, colcov))
1214 def rvs(self, mean=None, rowcov=1, colcov=1, size=1, random_state=None):
1215 """Draw random samples from a matrix normal distribution.
1217 Parameters
1218 ----------
1219 %(_matnorm_doc_default_callparams)s
1220 size : integer, optional
1221 Number of samples to draw (default 1).
1222 %(_doc_random_state)s
1224 Returns
1225 -------
1226 rvs : ndarray or scalar
1227 Random variates of size (`size`, `dims`), where `dims` is the
1228 dimension of the random matrices.
1230 Notes
1231 -----
1232 %(_matnorm_doc_callparams_note)s
1234 """
1235 size = int(size)
1236 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,
1237 colcov)
1238 rowchol = scipy.linalg.cholesky(rowcov, lower=True)
1239 colchol = scipy.linalg.cholesky(colcov, lower=True)
1240 random_state = self._get_random_state(random_state)
1241 # We aren't generating standard normal variates with size=(size,
1242 # dims[0], dims[1]) directly to ensure random variates remain backwards
1243 # compatible. See https://github.com/scipy/scipy/pull/12312 for more
1244 # details.
1245 std_norm = random_state.standard_normal(
1246 size=(dims[1], size, dims[0])
1247 ).transpose(1, 2, 0)
1248 out = mean + np.einsum('jp,ipq,kq->ijk',
1249 rowchol, std_norm, colchol,
1250 optimize=True)
1251 if size == 1:
1252 out = out.reshape(mean.shape)
1253 return out
1256matrix_normal = matrix_normal_gen()
1259class matrix_normal_frozen(multi_rv_frozen):
1260 """
1261 Create a frozen matrix normal distribution.
1263 Parameters
1264 ----------
1265 %(_matnorm_doc_default_callparams)s
1266 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
1267 If `seed` is `None` the `~np.random.RandomState` singleton is used.
1268 If `seed` is an int, a new ``RandomState`` instance is used, seeded
1269 with seed.
1270 If `seed` is already a ``RandomState`` or ``Generator`` instance,
1271 then that object is used.
1272 Default is `None`.
1274 Examples
1275 --------
1276 >>> import numpy as np
1277 >>> from scipy.stats import matrix_normal
1279 >>> distn = matrix_normal(mean=np.zeros((3,3)))
1280 >>> X = distn.rvs(); X
1281 array([[-0.02976962, 0.93339138, -0.09663178],
1282 [ 0.67405524, 0.28250467, -0.93308929],
1283 [-0.31144782, 0.74535536, 1.30412916]])
1284 >>> distn.pdf(X)
1285 2.5160642368346784e-05
1286 >>> distn.logpdf(X)
1287 -10.590229595124615
1288 """
1290 def __init__(self, mean=None, rowcov=1, colcov=1, seed=None):
1291 self._dist = matrix_normal_gen(seed)
1292 self.dims, self.mean, self.rowcov, self.colcov = \
1293 self._dist._process_parameters(mean, rowcov, colcov)
1294 self.rowpsd = _PSD(self.rowcov, allow_singular=False)
1295 self.colpsd = _PSD(self.colcov, allow_singular=False)
1297 def logpdf(self, X):
1298 X = self._dist._process_quantiles(X, self.dims)
1299 out = self._dist._logpdf(self.dims, X, self.mean, self.rowpsd.U,
1300 self.rowpsd.log_pdet, self.colpsd.U,
1301 self.colpsd.log_pdet)
1302 return _squeeze_output(out)
1304 def pdf(self, X):
1305 return np.exp(self.logpdf(X))
1307 def rvs(self, size=1, random_state=None):
1308 return self._dist.rvs(self.mean, self.rowcov, self.colcov, size,
1309 random_state)
1312# Set frozen generator docstrings from corresponding docstrings in
1313# matrix_normal_gen and fill in default strings in class docstrings
1314for name in ['logpdf', 'pdf', 'rvs']:
1315 method = matrix_normal_gen.__dict__[name]
1316 method_frozen = matrix_normal_frozen.__dict__[name]
1317 method_frozen.__doc__ = doccer.docformat(method.__doc__,
1318 matnorm_docdict_noparams)
1319 method.__doc__ = doccer.docformat(method.__doc__, matnorm_docdict_params)
1321_dirichlet_doc_default_callparams = """\
1322alpha : array_like
1323 The concentration parameters. The number of entries determines the
1324 dimensionality of the distribution.
1325"""
1326_dirichlet_doc_frozen_callparams = ""
1328_dirichlet_doc_frozen_callparams_note = """\
1329See class definition for a detailed description of parameters."""
1331dirichlet_docdict_params = {
1332 '_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams,
1333 '_doc_random_state': _doc_random_state
1334}
1336dirichlet_docdict_noparams = {
1337 '_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams,
1338 '_doc_random_state': _doc_random_state
1339}
1342def _dirichlet_check_parameters(alpha):
1343 alpha = np.asarray(alpha)
1344 if np.min(alpha) <= 0:
1345 raise ValueError("All parameters must be greater than 0")
1346 elif alpha.ndim != 1:
1347 raise ValueError("Parameter vector 'a' must be one dimensional, "
1348 "but a.shape = %s." % (alpha.shape, ))
1349 return alpha
1352def _dirichlet_check_input(alpha, x):
1353 x = np.asarray(x)
1355 if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]:
1356 raise ValueError("Vector 'x' must have either the same number "
1357 "of entries as, or one entry fewer than, "
1358 "parameter vector 'a', but alpha.shape = %s "
1359 "and x.shape = %s." % (alpha.shape, x.shape))
1361 if x.shape[0] != alpha.shape[0]:
1362 xk = np.array([1 - np.sum(x, 0)])
1363 if xk.ndim == 1:
1364 x = np.append(x, xk)
1365 elif xk.ndim == 2:
1366 x = np.vstack((x, xk))
1367 else:
1368 raise ValueError("The input must be one dimensional or a two "
1369 "dimensional matrix containing the entries.")
1371 if np.min(x) < 0:
1372 raise ValueError("Each entry in 'x' must be greater than or equal "
1373 "to zero.")
1375 if np.max(x) > 1:
1376 raise ValueError("Each entry in 'x' must be smaller or equal one.")
1378 # Check x_i > 0 or alpha_i > 1
1379 xeq0 = (x == 0)
1380 alphalt1 = (alpha < 1)
1381 if x.shape != alpha.shape:
1382 alphalt1 = np.repeat(alphalt1, x.shape[-1], axis=-1).reshape(x.shape)
1383 chk = np.logical_and(xeq0, alphalt1)
1385 if np.sum(chk):
1386 raise ValueError("Each entry in 'x' must be greater than zero if its "
1387 "alpha is less than one.")
1389 if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any():
1390 raise ValueError("The input vector 'x' must lie within the normal "
1391 "simplex. but np.sum(x, 0) = %s." % np.sum(x, 0))
1393 return x
1396def _lnB(alpha):
1397 r"""Internal helper function to compute the log of the useful quotient.
1399 .. math::
1401 B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)}
1402 {\Gamma\left(\sum_{i=1}^{K} \alpha_i \right)}
1404 Parameters
1405 ----------
1406 %(_dirichlet_doc_default_callparams)s
1408 Returns
1409 -------
1410 B : scalar
1411 Helper quotient, internal use only
1413 """
1414 return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha))
1417class dirichlet_gen(multi_rv_generic):
1418 r"""A Dirichlet random variable.
1420 The ``alpha`` keyword specifies the concentration parameters of the
1421 distribution.
1423 .. versionadded:: 0.15.0
1425 Methods
1426 -------
1427 pdf(x, alpha)
1428 Probability density function.
1429 logpdf(x, alpha)
1430 Log of the probability density function.
1431 rvs(alpha, size=1, random_state=None)
1432 Draw random samples from a Dirichlet distribution.
1433 mean(alpha)
1434 The mean of the Dirichlet distribution
1435 var(alpha)
1436 The variance of the Dirichlet distribution
1437 entropy(alpha)
1438 Compute the differential entropy of the Dirichlet distribution.
1440 Parameters
1441 ----------
1442 %(_dirichlet_doc_default_callparams)s
1443 %(_doc_random_state)s
1445 Notes
1446 -----
1447 Each :math:`\alpha` entry must be positive. The distribution has only
1448 support on the simplex defined by
1450 .. math::
1451 \sum_{i=1}^{K} x_i = 1
1453 where :math:`0 < x_i < 1`.
1455 If the quantiles don't lie within the simplex, a ValueError is raised.
1457 The probability density function for `dirichlet` is
1459 .. math::
1461 f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1}
1463 where
1465 .. math::
1467 \mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)}
1468 {\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)}
1470 and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the
1471 concentration parameters and :math:`K` is the dimension of the space
1472 where :math:`x` takes values.
1474 Note that the `dirichlet` interface is somewhat inconsistent.
1475 The array returned by the rvs function is transposed
1476 with respect to the format expected by the pdf and logpdf.
1478 Examples
1479 --------
1480 >>> import numpy as np
1481 >>> from scipy.stats import dirichlet
1483 Generate a dirichlet random variable
1485 >>> quantiles = np.array([0.2, 0.2, 0.6]) # specify quantiles
1486 >>> alpha = np.array([0.4, 5, 15]) # specify concentration parameters
1487 >>> dirichlet.pdf(quantiles, alpha)
1488 0.2843831684937255
1490 The same PDF but following a log scale
1492 >>> dirichlet.logpdf(quantiles, alpha)
1493 -1.2574327653159187
1495 Once we specify the dirichlet distribution
1496 we can then calculate quantities of interest
1498 >>> dirichlet.mean(alpha) # get the mean of the distribution
1499 array([0.01960784, 0.24509804, 0.73529412])
1500 >>> dirichlet.var(alpha) # get variance
1501 array([0.00089829, 0.00864603, 0.00909517])
1502 >>> dirichlet.entropy(alpha) # calculate the differential entropy
1503 -4.3280162474082715
1505 We can also return random samples from the distribution
1507 >>> dirichlet.rvs(alpha, size=1, random_state=1)
1508 array([[0.00766178, 0.24670518, 0.74563305]])
1509 >>> dirichlet.rvs(alpha, size=2, random_state=2)
1510 array([[0.01639427, 0.1292273 , 0.85437844],
1511 [0.00156917, 0.19033695, 0.80809388]])
1513 Alternatively, the object may be called (as a function) to fix
1514 concentration parameters, returning a "frozen" Dirichlet
1515 random variable:
1517 >>> rv = dirichlet(alpha)
1518 >>> # Frozen object with the same methods but holding the given
1519 >>> # concentration parameters fixed.
1521 """
1523 def __init__(self, seed=None):
1524 super().__init__(seed)
1525 self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params)
1527 def __call__(self, alpha, seed=None):
1528 return dirichlet_frozen(alpha, seed=seed)
1530 def _logpdf(self, x, alpha):
1531 """Log of the Dirichlet probability density function.
1533 Parameters
1534 ----------
1535 x : ndarray
1536 Points at which to evaluate the log of the probability
1537 density function
1538 %(_dirichlet_doc_default_callparams)s
1540 Notes
1541 -----
1542 As this function does no argument checking, it should not be
1543 called directly; use 'logpdf' instead.
1545 """
1546 lnB = _lnB(alpha)
1547 return - lnB + np.sum((xlogy(alpha - 1, x.T)).T, 0)
1549 def logpdf(self, x, alpha):
1550 """Log of the Dirichlet probability density function.
1552 Parameters
1553 ----------
1554 x : array_like
1555 Quantiles, with the last axis of `x` denoting the components.
1556 %(_dirichlet_doc_default_callparams)s
1558 Returns
1559 -------
1560 pdf : ndarray or scalar
1561 Log of the probability density function evaluated at `x`.
1563 """
1564 alpha = _dirichlet_check_parameters(alpha)
1565 x = _dirichlet_check_input(alpha, x)
1567 out = self._logpdf(x, alpha)
1568 return _squeeze_output(out)
1570 def pdf(self, x, alpha):
1571 """The Dirichlet probability density function.
1573 Parameters
1574 ----------
1575 x : array_like
1576 Quantiles, with the last axis of `x` denoting the components.
1577 %(_dirichlet_doc_default_callparams)s
1579 Returns
1580 -------
1581 pdf : ndarray or scalar
1582 The probability density function evaluated at `x`.
1584 """
1585 alpha = _dirichlet_check_parameters(alpha)
1586 x = _dirichlet_check_input(alpha, x)
1588 out = np.exp(self._logpdf(x, alpha))
1589 return _squeeze_output(out)
1591 def mean(self, alpha):
1592 """Mean of the Dirichlet distribution.
1594 Parameters
1595 ----------
1596 %(_dirichlet_doc_default_callparams)s
1598 Returns
1599 -------
1600 mu : ndarray or scalar
1601 Mean of the Dirichlet distribution.
1603 """
1604 alpha = _dirichlet_check_parameters(alpha)
1606 out = alpha / (np.sum(alpha))
1607 return _squeeze_output(out)
1609 def var(self, alpha):
1610 """Variance of the Dirichlet distribution.
1612 Parameters
1613 ----------
1614 %(_dirichlet_doc_default_callparams)s
1616 Returns
1617 -------
1618 v : ndarray or scalar
1619 Variance of the Dirichlet distribution.
1621 """
1623 alpha = _dirichlet_check_parameters(alpha)
1625 alpha0 = np.sum(alpha)
1626 out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1))
1627 return _squeeze_output(out)
1629 def entropy(self, alpha):
1630 """
1631 Differential entropy of the Dirichlet distribution.
1633 Parameters
1634 ----------
1635 %(_dirichlet_doc_default_callparams)s
1637 Returns
1638 -------
1639 h : scalar
1640 Entropy of the Dirichlet distribution
1642 """
1644 alpha = _dirichlet_check_parameters(alpha)
1646 alpha0 = np.sum(alpha)
1647 lnB = _lnB(alpha)
1648 K = alpha.shape[0]
1650 out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum(
1651 (alpha - 1) * scipy.special.psi(alpha))
1652 return _squeeze_output(out)
1654 def rvs(self, alpha, size=1, random_state=None):
1655 """
1656 Draw random samples from a Dirichlet distribution.
1658 Parameters
1659 ----------
1660 %(_dirichlet_doc_default_callparams)s
1661 size : int, optional
1662 Number of samples to draw (default 1).
1663 %(_doc_random_state)s
1665 Returns
1666 -------
1667 rvs : ndarray or scalar
1668 Random variates of size (`size`, `N`), where `N` is the
1669 dimension of the random variable.
1671 """
1672 alpha = _dirichlet_check_parameters(alpha)
1673 random_state = self._get_random_state(random_state)
1674 return random_state.dirichlet(alpha, size=size)
1677dirichlet = dirichlet_gen()
1680class dirichlet_frozen(multi_rv_frozen):
1681 def __init__(self, alpha, seed=None):
1682 self.alpha = _dirichlet_check_parameters(alpha)
1683 self._dist = dirichlet_gen(seed)
1685 def logpdf(self, x):
1686 return self._dist.logpdf(x, self.alpha)
1688 def pdf(self, x):
1689 return self._dist.pdf(x, self.alpha)
1691 def mean(self):
1692 return self._dist.mean(self.alpha)
1694 def var(self):
1695 return self._dist.var(self.alpha)
1697 def entropy(self):
1698 return self._dist.entropy(self.alpha)
1700 def rvs(self, size=1, random_state=None):
1701 return self._dist.rvs(self.alpha, size, random_state)
1704# Set frozen generator docstrings from corresponding docstrings in
1705# multivariate_normal_gen and fill in default strings in class docstrings
1706for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'entropy']:
1707 method = dirichlet_gen.__dict__[name]
1708 method_frozen = dirichlet_frozen.__dict__[name]
1709 method_frozen.__doc__ = doccer.docformat(
1710 method.__doc__, dirichlet_docdict_noparams)
1711 method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params)
1714_wishart_doc_default_callparams = """\
1715df : int
1716 Degrees of freedom, must be greater than or equal to dimension of the
1717 scale matrix
1718scale : array_like
1719 Symmetric positive definite scale matrix of the distribution
1720"""
1722_wishart_doc_callparams_note = ""
1724_wishart_doc_frozen_callparams = ""
1726_wishart_doc_frozen_callparams_note = """\
1727See class definition for a detailed description of parameters."""
1729wishart_docdict_params = {
1730 '_doc_default_callparams': _wishart_doc_default_callparams,
1731 '_doc_callparams_note': _wishart_doc_callparams_note,
1732 '_doc_random_state': _doc_random_state
1733}
1735wishart_docdict_noparams = {
1736 '_doc_default_callparams': _wishart_doc_frozen_callparams,
1737 '_doc_callparams_note': _wishart_doc_frozen_callparams_note,
1738 '_doc_random_state': _doc_random_state
1739}
1742class wishart_gen(multi_rv_generic):
1743 r"""A Wishart random variable.
1745 The `df` keyword specifies the degrees of freedom. The `scale` keyword
1746 specifies the scale matrix, which must be symmetric and positive definite.
1747 In this context, the scale matrix is often interpreted in terms of a
1748 multivariate normal precision matrix (the inverse of the covariance
1749 matrix). These arguments must satisfy the relationship
1750 ``df > scale.ndim - 1``, but see notes on using the `rvs` method with
1751 ``df < scale.ndim``.
1753 Methods
1754 -------
1755 pdf(x, df, scale)
1756 Probability density function.
1757 logpdf(x, df, scale)
1758 Log of the probability density function.
1759 rvs(df, scale, size=1, random_state=None)
1760 Draw random samples from a Wishart distribution.
1761 entropy()
1762 Compute the differential entropy of the Wishart distribution.
1764 Parameters
1765 ----------
1766 %(_doc_default_callparams)s
1767 %(_doc_random_state)s
1769 Raises
1770 ------
1771 scipy.linalg.LinAlgError
1772 If the scale matrix `scale` is not positive definite.
1774 See Also
1775 --------
1776 invwishart, chi2
1778 Notes
1779 -----
1780 %(_doc_callparams_note)s
1782 The scale matrix `scale` must be a symmetric positive definite
1783 matrix. Singular matrices, including the symmetric positive semi-definite
1784 case, are not supported. Symmetry is not checked; only the lower triangular
1785 portion is used.
1787 The Wishart distribution is often denoted
1789 .. math::
1791 W_p(\nu, \Sigma)
1793 where :math:`\nu` is the degrees of freedom and :math:`\Sigma` is the
1794 :math:`p \times p` scale matrix.
1796 The probability density function for `wishart` has support over positive
1797 definite matrices :math:`S`; if :math:`S \sim W_p(\nu, \Sigma)`, then
1798 its PDF is given by:
1800 .. math::
1802 f(S) = \frac{|S|^{\frac{\nu - p - 1}{2}}}{2^{ \frac{\nu p}{2} }
1803 |\Sigma|^\frac{\nu}{2} \Gamma_p \left ( \frac{\nu}{2} \right )}
1804 \exp\left( -tr(\Sigma^{-1} S) / 2 \right)
1806 If :math:`S \sim W_p(\nu, \Sigma)` (Wishart) then
1807 :math:`S^{-1} \sim W_p^{-1}(\nu, \Sigma^{-1})` (inverse Wishart).
1809 If the scale matrix is 1-dimensional and equal to one, then the Wishart
1810 distribution :math:`W_1(\nu, 1)` collapses to the :math:`\chi^2(\nu)`
1811 distribution.
1813 The algorithm [2]_ implemented by the `rvs` method may
1814 produce numerically singular matrices with :math:`p - 1 < \nu < p`; the
1815 user may wish to check for this condition and generate replacement samples
1816 as necessary.
1819 .. versionadded:: 0.16.0
1821 References
1822 ----------
1823 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",
1824 Wiley, 1983.
1825 .. [2] W.B. Smith and R.R. Hocking, "Algorithm AS 53: Wishart Variate
1826 Generator", Applied Statistics, vol. 21, pp. 341-345, 1972.
1828 Examples
1829 --------
1830 >>> import numpy as np
1831 >>> import matplotlib.pyplot as plt
1832 >>> from scipy.stats import wishart, chi2
1833 >>> x = np.linspace(1e-5, 8, 100)
1834 >>> w = wishart.pdf(x, df=3, scale=1); w[:5]
1835 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])
1836 >>> c = chi2.pdf(x, 3); c[:5]
1837 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])
1838 >>> plt.plot(x, w)
1839 >>> plt.show()
1841 The input quantiles can be any shape of array, as long as the last
1842 axis labels the components.
1844 Alternatively, the object may be called (as a function) to fix the degrees
1845 of freedom and scale parameters, returning a "frozen" Wishart random
1846 variable:
1848 >>> rv = wishart(df=1, scale=1)
1849 >>> # Frozen object with the same methods but holding the given
1850 >>> # degrees of freedom and scale fixed.
1852 """
1854 def __init__(self, seed=None):
1855 super().__init__(seed)
1856 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)
1858 def __call__(self, df=None, scale=None, seed=None):
1859 """Create a frozen Wishart distribution.
1861 See `wishart_frozen` for more information.
1862 """
1863 return wishart_frozen(df, scale, seed)
1865 def _process_parameters(self, df, scale):
1866 if scale is None:
1867 scale = 1.0
1868 scale = np.asarray(scale, dtype=float)
1870 if scale.ndim == 0:
1871 scale = scale[np.newaxis, np.newaxis]
1872 elif scale.ndim == 1:
1873 scale = np.diag(scale)
1874 elif scale.ndim == 2 and not scale.shape[0] == scale.shape[1]:
1875 raise ValueError("Array 'scale' must be square if it is two"
1876 " dimensional, but scale.scale = %s."
1877 % str(scale.shape))
1878 elif scale.ndim > 2:
1879 raise ValueError("Array 'scale' must be at most two-dimensional,"
1880 " but scale.ndim = %d" % scale.ndim)
1882 dim = scale.shape[0]
1884 if df is None:
1885 df = dim
1886 elif not np.isscalar(df):
1887 raise ValueError("Degrees of freedom must be a scalar.")
1888 elif df <= dim - 1:
1889 raise ValueError("Degrees of freedom must be greater than the "
1890 "dimension of scale matrix minus 1.")
1892 return dim, df, scale
1894 def _process_quantiles(self, x, dim):
1895 """
1896 Adjust quantiles array so that last axis labels the components of
1897 each data point.
1898 """
1899 x = np.asarray(x, dtype=float)
1901 if x.ndim == 0:
1902 x = x * np.eye(dim)[:, :, np.newaxis]
1903 if x.ndim == 1:
1904 if dim == 1:
1905 x = x[np.newaxis, np.newaxis, :]
1906 else:
1907 x = np.diag(x)[:, :, np.newaxis]
1908 elif x.ndim == 2:
1909 if not x.shape[0] == x.shape[1]:
1910 raise ValueError("Quantiles must be square if they are two"
1911 " dimensional, but x.shape = %s."
1912 % str(x.shape))
1913 x = x[:, :, np.newaxis]
1914 elif x.ndim == 3:
1915 if not x.shape[0] == x.shape[1]:
1916 raise ValueError("Quantiles must be square in the first two"
1917 " dimensions if they are three dimensional"
1918 ", but x.shape = %s." % str(x.shape))
1919 elif x.ndim > 3:
1920 raise ValueError("Quantiles must be at most two-dimensional with"
1921 " an additional dimension for multiple"
1922 "components, but x.ndim = %d" % x.ndim)
1924 # Now we have 3-dim array; should have shape [dim, dim, *]
1925 if not x.shape[0:2] == (dim, dim):
1926 raise ValueError('Quantiles have incompatible dimensions: should'
1927 ' be %s, got %s.' % ((dim, dim), x.shape[0:2]))
1929 return x
1931 def _process_size(self, size):
1932 size = np.asarray(size)
1934 if size.ndim == 0:
1935 size = size[np.newaxis]
1936 elif size.ndim > 1:
1937 raise ValueError('Size must be an integer or tuple of integers;'
1938 ' thus must have dimension <= 1.'
1939 ' Got size.ndim = %s' % str(tuple(size)))
1940 n = size.prod()
1941 shape = tuple(size)
1943 return n, shape
1945 def _logpdf(self, x, dim, df, scale, log_det_scale, C):
1946 """Log of the Wishart probability density function.
1948 Parameters
1949 ----------
1950 x : ndarray
1951 Points at which to evaluate the log of the probability
1952 density function
1953 dim : int
1954 Dimension of the scale matrix
1955 df : int
1956 Degrees of freedom
1957 scale : ndarray
1958 Scale matrix
1959 log_det_scale : float
1960 Logarithm of the determinant of the scale matrix
1961 C : ndarray
1962 Cholesky factorization of the scale matrix, lower triagular.
1964 Notes
1965 -----
1966 As this function does no argument checking, it should not be
1967 called directly; use 'logpdf' instead.
1969 """
1970 # log determinant of x
1971 # Note: x has components along the last axis, so that x.T has
1972 # components alone the 0-th axis. Then since det(A) = det(A'), this
1973 # gives us a 1-dim vector of determinants
1975 # Retrieve tr(scale^{-1} x)
1976 log_det_x = np.empty(x.shape[-1])
1977 scale_inv_x = np.empty(x.shape)
1978 tr_scale_inv_x = np.empty(x.shape[-1])
1979 for i in range(x.shape[-1]):
1980 _, log_det_x[i] = self._cholesky_logdet(x[:, :, i])
1981 scale_inv_x[:, :, i] = scipy.linalg.cho_solve((C, True), x[:, :, i])
1982 tr_scale_inv_x[i] = scale_inv_x[:, :, i].trace()
1984 # Log PDF
1985 out = ((0.5 * (df - dim - 1) * log_det_x - 0.5 * tr_scale_inv_x) -
1986 (0.5 * df * dim * _LOG_2 + 0.5 * df * log_det_scale +
1987 multigammaln(0.5*df, dim)))
1989 return out
1991 def logpdf(self, x, df, scale):
1992 """Log of the Wishart probability density function.
1994 Parameters
1995 ----------
1996 x : array_like
1997 Quantiles, with the last axis of `x` denoting the components.
1998 Each quantile must be a symmetric positive definite matrix.
1999 %(_doc_default_callparams)s
2001 Returns
2002 -------
2003 pdf : ndarray
2004 Log of the probability density function evaluated at `x`
2006 Notes
2007 -----
2008 %(_doc_callparams_note)s
2010 """
2011 dim, df, scale = self._process_parameters(df, scale)
2012 x = self._process_quantiles(x, dim)
2014 # Cholesky decomposition of scale, get log(det(scale))
2015 C, log_det_scale = self._cholesky_logdet(scale)
2017 out = self._logpdf(x, dim, df, scale, log_det_scale, C)
2018 return _squeeze_output(out)
2020 def pdf(self, x, df, scale):
2021 """Wishart probability density function.
2023 Parameters
2024 ----------
2025 x : array_like
2026 Quantiles, with the last axis of `x` denoting the components.
2027 Each quantile must be a symmetric positive definite matrix.
2028 %(_doc_default_callparams)s
2030 Returns
2031 -------
2032 pdf : ndarray
2033 Probability density function evaluated at `x`
2035 Notes
2036 -----
2037 %(_doc_callparams_note)s
2039 """
2040 return np.exp(self.logpdf(x, df, scale))
2042 def _mean(self, dim, df, scale):
2043 """Mean of the Wishart distribution.
2045 Parameters
2046 ----------
2047 dim : int
2048 Dimension of the scale matrix
2049 %(_doc_default_callparams)s
2051 Notes
2052 -----
2053 As this function does no argument checking, it should not be
2054 called directly; use 'mean' instead.
2056 """
2057 return df * scale
2059 def mean(self, df, scale):
2060 """Mean of the Wishart distribution.
2062 Parameters
2063 ----------
2064 %(_doc_default_callparams)s
2066 Returns
2067 -------
2068 mean : float
2069 The mean of the distribution
2070 """
2071 dim, df, scale = self._process_parameters(df, scale)
2072 out = self._mean(dim, df, scale)
2073 return _squeeze_output(out)
2075 def _mode(self, dim, df, scale):
2076 """Mode of the Wishart distribution.
2078 Parameters
2079 ----------
2080 dim : int
2081 Dimension of the scale matrix
2082 %(_doc_default_callparams)s
2084 Notes
2085 -----
2086 As this function does no argument checking, it should not be
2087 called directly; use 'mode' instead.
2089 """
2090 if df >= dim + 1:
2091 out = (df-dim-1) * scale
2092 else:
2093 out = None
2094 return out
2096 def mode(self, df, scale):
2097 """Mode of the Wishart distribution
2099 Only valid if the degrees of freedom are greater than the dimension of
2100 the scale matrix.
2102 Parameters
2103 ----------
2104 %(_doc_default_callparams)s
2106 Returns
2107 -------
2108 mode : float or None
2109 The Mode of the distribution
2110 """
2111 dim, df, scale = self._process_parameters(df, scale)
2112 out = self._mode(dim, df, scale)
2113 return _squeeze_output(out) if out is not None else out
2115 def _var(self, dim, df, scale):
2116 """Variance of the Wishart distribution.
2118 Parameters
2119 ----------
2120 dim : int
2121 Dimension of the scale matrix
2122 %(_doc_default_callparams)s
2124 Notes
2125 -----
2126 As this function does no argument checking, it should not be
2127 called directly; use 'var' instead.
2129 """
2130 var = scale**2
2131 diag = scale.diagonal() # 1 x dim array
2132 var += np.outer(diag, diag)
2133 var *= df
2134 return var
2136 def var(self, df, scale):
2137 """Variance of the Wishart distribution.
2139 Parameters
2140 ----------
2141 %(_doc_default_callparams)s
2143 Returns
2144 -------
2145 var : float
2146 The variance of the distribution
2147 """
2148 dim, df, scale = self._process_parameters(df, scale)
2149 out = self._var(dim, df, scale)
2150 return _squeeze_output(out)
2152 def _standard_rvs(self, n, shape, dim, df, random_state):
2153 """
2154 Parameters
2155 ----------
2156 n : integer
2157 Number of variates to generate
2158 shape : iterable
2159 Shape of the variates to generate
2160 dim : int
2161 Dimension of the scale matrix
2162 df : int
2163 Degrees of freedom
2164 random_state : {None, int, `numpy.random.Generator`,
2165 `numpy.random.RandomState`}, optional
2167 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
2168 singleton is used.
2169 If `seed` is an int, a new ``RandomState`` instance is used,
2170 seeded with `seed`.
2171 If `seed` is already a ``Generator`` or ``RandomState`` instance
2172 then that instance is used.
2174 Notes
2175 -----
2176 As this function does no argument checking, it should not be
2177 called directly; use 'rvs' instead.
2179 """
2180 # Random normal variates for off-diagonal elements
2181 n_tril = dim * (dim-1) // 2
2182 covariances = random_state.normal(
2183 size=n*n_tril).reshape(shape+(n_tril,))
2185 # Random chi-square variates for diagonal elements
2186 variances = (np.r_[[random_state.chisquare(df-(i+1)+1, size=n)**0.5
2187 for i in range(dim)]].reshape((dim,) +
2188 shape[::-1]).T)
2190 # Create the A matri(ces) - lower triangular
2191 A = np.zeros(shape + (dim, dim))
2193 # Input the covariances
2194 size_idx = tuple([slice(None, None, None)]*len(shape))
2195 tril_idx = np.tril_indices(dim, k=-1)
2196 A[size_idx + tril_idx] = covariances
2198 # Input the variances
2199 diag_idx = np.diag_indices(dim)
2200 A[size_idx + diag_idx] = variances
2202 return A
2204 def _rvs(self, n, shape, dim, df, C, random_state):
2205 """Draw random samples from a Wishart distribution.
2207 Parameters
2208 ----------
2209 n : integer
2210 Number of variates to generate
2211 shape : iterable
2212 Shape of the variates to generate
2213 dim : int
2214 Dimension of the scale matrix
2215 df : int
2216 Degrees of freedom
2217 C : ndarray
2218 Cholesky factorization of the scale matrix, lower triangular.
2219 %(_doc_random_state)s
2221 Notes
2222 -----
2223 As this function does no argument checking, it should not be
2224 called directly; use 'rvs' instead.
2226 """
2227 random_state = self._get_random_state(random_state)
2228 # Calculate the matrices A, which are actually lower triangular
2229 # Cholesky factorizations of a matrix B such that B ~ W(df, I)
2230 A = self._standard_rvs(n, shape, dim, df, random_state)
2232 # Calculate SA = C A A' C', where SA ~ W(df, scale)
2233 # Note: this is the product of a (lower) (lower) (lower)' (lower)'
2234 # or, denoting B = AA', it is C B C' where C is the lower
2235 # triangular Cholesky factorization of the scale matrix.
2236 # this appears to conflict with the instructions in [1]_, which
2237 # suggest that it should be D' B D where D is the lower
2238 # triangular factorization of the scale matrix. However, it is
2239 # meant to refer to the Bartlett (1933) representation of a
2240 # Wishart random variate as L A A' L' where L is lower triangular
2241 # so it appears that understanding D' to be upper triangular
2242 # is either a typo in or misreading of [1]_.
2243 for index in np.ndindex(shape):
2244 CA = np.dot(C, A[index])
2245 A[index] = np.dot(CA, CA.T)
2247 return A
2249 def rvs(self, df, scale, size=1, random_state=None):
2250 """Draw random samples from a Wishart distribution.
2252 Parameters
2253 ----------
2254 %(_doc_default_callparams)s
2255 size : integer or iterable of integers, optional
2256 Number of samples to draw (default 1).
2257 %(_doc_random_state)s
2259 Returns
2260 -------
2261 rvs : ndarray
2262 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is
2263 the dimension of the scale matrix.
2265 Notes
2266 -----
2267 %(_doc_callparams_note)s
2269 """
2270 n, shape = self._process_size(size)
2271 dim, df, scale = self._process_parameters(df, scale)
2273 # Cholesky decomposition of scale
2274 C = scipy.linalg.cholesky(scale, lower=True)
2276 out = self._rvs(n, shape, dim, df, C, random_state)
2278 return _squeeze_output(out)
2280 def _entropy(self, dim, df, log_det_scale):
2281 """Compute the differential entropy of the Wishart.
2283 Parameters
2284 ----------
2285 dim : int
2286 Dimension of the scale matrix
2287 df : int
2288 Degrees of freedom
2289 log_det_scale : float
2290 Logarithm of the determinant of the scale matrix
2292 Notes
2293 -----
2294 As this function does no argument checking, it should not be
2295 called directly; use 'entropy' instead.
2297 """
2298 return (
2299 0.5 * (dim+1) * log_det_scale +
2300 0.5 * dim * (dim+1) * _LOG_2 +
2301 multigammaln(0.5*df, dim) -
2302 0.5 * (df - dim - 1) * np.sum(
2303 [psi(0.5*(df + 1 - (i+1))) for i in range(dim)]
2304 ) +
2305 0.5 * df * dim
2306 )
2308 def entropy(self, df, scale):
2309 """Compute the differential entropy of the Wishart.
2311 Parameters
2312 ----------
2313 %(_doc_default_callparams)s
2315 Returns
2316 -------
2317 h : scalar
2318 Entropy of the Wishart distribution
2320 Notes
2321 -----
2322 %(_doc_callparams_note)s
2324 """
2325 dim, df, scale = self._process_parameters(df, scale)
2326 _, log_det_scale = self._cholesky_logdet(scale)
2327 return self._entropy(dim, df, log_det_scale)
2329 def _cholesky_logdet(self, scale):
2330 """Compute Cholesky decomposition and determine (log(det(scale)).
2332 Parameters
2333 ----------
2334 scale : ndarray
2335 Scale matrix.
2337 Returns
2338 -------
2339 c_decomp : ndarray
2340 The Cholesky decomposition of `scale`.
2341 logdet : scalar
2342 The log of the determinant of `scale`.
2344 Notes
2345 -----
2346 This computation of ``logdet`` is equivalent to
2347 ``np.linalg.slogdet(scale)``. It is ~2x faster though.
2349 """
2350 c_decomp = scipy.linalg.cholesky(scale, lower=True)
2351 logdet = 2 * np.sum(np.log(c_decomp.diagonal()))
2352 return c_decomp, logdet
2355wishart = wishart_gen()
2358class wishart_frozen(multi_rv_frozen):
2359 """Create a frozen Wishart distribution.
2361 Parameters
2362 ----------
2363 df : array_like
2364 Degrees of freedom of the distribution
2365 scale : array_like
2366 Scale matrix of the distribution
2367 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
2368 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
2369 singleton is used.
2370 If `seed` is an int, a new ``RandomState`` instance is used,
2371 seeded with `seed`.
2372 If `seed` is already a ``Generator`` or ``RandomState`` instance then
2373 that instance is used.
2375 """
2376 def __init__(self, df, scale, seed=None):
2377 self._dist = wishart_gen(seed)
2378 self.dim, self.df, self.scale = self._dist._process_parameters(
2379 df, scale)
2380 self.C, self.log_det_scale = self._dist._cholesky_logdet(self.scale)
2382 def logpdf(self, x):
2383 x = self._dist._process_quantiles(x, self.dim)
2385 out = self._dist._logpdf(x, self.dim, self.df, self.scale,
2386 self.log_det_scale, self.C)
2387 return _squeeze_output(out)
2389 def pdf(self, x):
2390 return np.exp(self.logpdf(x))
2392 def mean(self):
2393 out = self._dist._mean(self.dim, self.df, self.scale)
2394 return _squeeze_output(out)
2396 def mode(self):
2397 out = self._dist._mode(self.dim, self.df, self.scale)
2398 return _squeeze_output(out) if out is not None else out
2400 def var(self):
2401 out = self._dist._var(self.dim, self.df, self.scale)
2402 return _squeeze_output(out)
2404 def rvs(self, size=1, random_state=None):
2405 n, shape = self._dist._process_size(size)
2406 out = self._dist._rvs(n, shape, self.dim, self.df,
2407 self.C, random_state)
2408 return _squeeze_output(out)
2410 def entropy(self):
2411 return self._dist._entropy(self.dim, self.df, self.log_det_scale)
2414# Set frozen generator docstrings from corresponding docstrings in
2415# Wishart and fill in default strings in class docstrings
2416for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs', 'entropy']:
2417 method = wishart_gen.__dict__[name]
2418 method_frozen = wishart_frozen.__dict__[name]
2419 method_frozen.__doc__ = doccer.docformat(
2420 method.__doc__, wishart_docdict_noparams)
2421 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)
2424def _cho_inv_batch(a, check_finite=True):
2425 """
2426 Invert the matrices a_i, using a Cholesky factorization of A, where
2427 a_i resides in the last two dimensions of a and the other indices describe
2428 the index i.
2430 Overwrites the data in a.
2432 Parameters
2433 ----------
2434 a : array
2435 Array of matrices to invert, where the matrices themselves are stored
2436 in the last two dimensions.
2437 check_finite : bool, optional
2438 Whether to check that the input matrices contain only finite numbers.
2439 Disabling may give a performance gain, but may result in problems
2440 (crashes, non-termination) if the inputs do contain infinities or NaNs.
2442 Returns
2443 -------
2444 x : array
2445 Array of inverses of the matrices ``a_i``.
2447 See Also
2448 --------
2449 scipy.linalg.cholesky : Cholesky factorization of a matrix
2451 """
2452 if check_finite:
2453 a1 = asarray_chkfinite(a)
2454 else:
2455 a1 = asarray(a)
2456 if len(a1.shape) < 2 or a1.shape[-2] != a1.shape[-1]:
2457 raise ValueError('expected square matrix in last two dimensions')
2459 potrf, potri = get_lapack_funcs(('potrf', 'potri'), (a1,))
2461 triu_rows, triu_cols = np.triu_indices(a.shape[-2], k=1)
2462 for index in np.ndindex(a1.shape[:-2]):
2464 # Cholesky decomposition
2465 a1[index], info = potrf(a1[index], lower=True, overwrite_a=False,
2466 clean=False)
2467 if info > 0:
2468 raise LinAlgError("%d-th leading minor not positive definite"
2469 % info)
2470 if info < 0:
2471 raise ValueError('illegal value in %d-th argument of internal'
2472 ' potrf' % -info)
2473 # Inversion
2474 a1[index], info = potri(a1[index], lower=True, overwrite_c=False)
2475 if info > 0:
2476 raise LinAlgError("the inverse could not be computed")
2477 if info < 0:
2478 raise ValueError('illegal value in %d-th argument of internal'
2479 ' potrf' % -info)
2481 # Make symmetric (dpotri only fills in the lower triangle)
2482 a1[index][triu_rows, triu_cols] = a1[index][triu_cols, triu_rows]
2484 return a1
2487class invwishart_gen(wishart_gen):
2488 r"""An inverse Wishart random variable.
2490 The `df` keyword specifies the degrees of freedom. The `scale` keyword
2491 specifies the scale matrix, which must be symmetric and positive definite.
2492 In this context, the scale matrix is often interpreted in terms of a
2493 multivariate normal covariance matrix.
2495 Methods
2496 -------
2497 pdf(x, df, scale)
2498 Probability density function.
2499 logpdf(x, df, scale)
2500 Log of the probability density function.
2501 rvs(df, scale, size=1, random_state=None)
2502 Draw random samples from an inverse Wishart distribution.
2504 Parameters
2505 ----------
2506 %(_doc_default_callparams)s
2507 %(_doc_random_state)s
2509 Raises
2510 ------
2511 scipy.linalg.LinAlgError
2512 If the scale matrix `scale` is not positive definite.
2514 See Also
2515 --------
2516 wishart
2518 Notes
2519 -----
2520 %(_doc_callparams_note)s
2522 The scale matrix `scale` must be a symmetric positive definite
2523 matrix. Singular matrices, including the symmetric positive semi-definite
2524 case, are not supported. Symmetry is not checked; only the lower triangular
2525 portion is used.
2527 The inverse Wishart distribution is often denoted
2529 .. math::
2531 W_p^{-1}(\nu, \Psi)
2533 where :math:`\nu` is the degrees of freedom and :math:`\Psi` is the
2534 :math:`p \times p` scale matrix.
2536 The probability density function for `invwishart` has support over positive
2537 definite matrices :math:`S`; if :math:`S \sim W^{-1}_p(\nu, \Sigma)`,
2538 then its PDF is given by:
2540 .. math::
2542 f(S) = \frac{|\Sigma|^\frac{\nu}{2}}{2^{ \frac{\nu p}{2} }
2543 |S|^{\frac{\nu + p + 1}{2}} \Gamma_p \left(\frac{\nu}{2} \right)}
2544 \exp\left( -tr(\Sigma S^{-1}) / 2 \right)
2546 If :math:`S \sim W_p^{-1}(\nu, \Psi)` (inverse Wishart) then
2547 :math:`S^{-1} \sim W_p(\nu, \Psi^{-1})` (Wishart).
2549 If the scale matrix is 1-dimensional and equal to one, then the inverse
2550 Wishart distribution :math:`W_1(\nu, 1)` collapses to the
2551 inverse Gamma distribution with parameters shape = :math:`\frac{\nu}{2}`
2552 and scale = :math:`\frac{1}{2}`.
2554 .. versionadded:: 0.16.0
2556 References
2557 ----------
2558 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",
2559 Wiley, 1983.
2560 .. [2] M.C. Jones, "Generating Inverse Wishart Matrices", Communications
2561 in Statistics - Simulation and Computation, vol. 14.2, pp.511-514,
2562 1985.
2564 Examples
2565 --------
2566 >>> import numpy as np
2567 >>> import matplotlib.pyplot as plt
2568 >>> from scipy.stats import invwishart, invgamma
2569 >>> x = np.linspace(0.01, 1, 100)
2570 >>> iw = invwishart.pdf(x, df=6, scale=1)
2571 >>> iw[:3]
2572 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])
2573 >>> ig = invgamma.pdf(x, 6/2., scale=1./2)
2574 >>> ig[:3]
2575 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])
2576 >>> plt.plot(x, iw)
2577 >>> plt.show()
2579 The input quantiles can be any shape of array, as long as the last
2580 axis labels the components.
2582 Alternatively, the object may be called (as a function) to fix the degrees
2583 of freedom and scale parameters, returning a "frozen" inverse Wishart
2584 random variable:
2586 >>> rv = invwishart(df=1, scale=1)
2587 >>> # Frozen object with the same methods but holding the given
2588 >>> # degrees of freedom and scale fixed.
2590 """
2592 def __init__(self, seed=None):
2593 super().__init__(seed)
2594 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)
2596 def __call__(self, df=None, scale=None, seed=None):
2597 """Create a frozen inverse Wishart distribution.
2599 See `invwishart_frozen` for more information.
2601 """
2602 return invwishart_frozen(df, scale, seed)
2604 def _logpdf(self, x, dim, df, scale, log_det_scale):
2605 """Log of the inverse Wishart probability density function.
2607 Parameters
2608 ----------
2609 x : ndarray
2610 Points at which to evaluate the log of the probability
2611 density function.
2612 dim : int
2613 Dimension of the scale matrix
2614 df : int
2615 Degrees of freedom
2616 scale : ndarray
2617 Scale matrix
2618 log_det_scale : float
2619 Logarithm of the determinant of the scale matrix
2621 Notes
2622 -----
2623 As this function does no argument checking, it should not be
2624 called directly; use 'logpdf' instead.
2626 """
2627 log_det_x = np.empty(x.shape[-1])
2628 x_inv = np.copy(x).T
2629 if dim > 1:
2630 _cho_inv_batch(x_inv) # works in-place
2631 else:
2632 x_inv = 1./x_inv
2633 tr_scale_x_inv = np.empty(x.shape[-1])
2635 for i in range(x.shape[-1]):
2636 C, lower = scipy.linalg.cho_factor(x[:, :, i], lower=True)
2638 log_det_x[i] = 2 * np.sum(np.log(C.diagonal()))
2640 tr_scale_x_inv[i] = np.dot(scale, x_inv[i]).trace()
2642 # Log PDF
2643 out = ((0.5 * df * log_det_scale - 0.5 * tr_scale_x_inv) -
2644 (0.5 * df * dim * _LOG_2 + 0.5 * (df + dim + 1) * log_det_x) -
2645 multigammaln(0.5*df, dim))
2647 return out
2649 def logpdf(self, x, df, scale):
2650 """Log of the inverse Wishart probability density function.
2652 Parameters
2653 ----------
2654 x : array_like
2655 Quantiles, with the last axis of `x` denoting the components.
2656 Each quantile must be a symmetric positive definite matrix.
2657 %(_doc_default_callparams)s
2659 Returns
2660 -------
2661 pdf : ndarray
2662 Log of the probability density function evaluated at `x`
2664 Notes
2665 -----
2666 %(_doc_callparams_note)s
2668 """
2669 dim, df, scale = self._process_parameters(df, scale)
2670 x = self._process_quantiles(x, dim)
2671 _, log_det_scale = self._cholesky_logdet(scale)
2672 out = self._logpdf(x, dim, df, scale, log_det_scale)
2673 return _squeeze_output(out)
2675 def pdf(self, x, df, scale):
2676 """Inverse Wishart probability density function.
2678 Parameters
2679 ----------
2680 x : array_like
2681 Quantiles, with the last axis of `x` denoting the components.
2682 Each quantile must be a symmetric positive definite matrix.
2683 %(_doc_default_callparams)s
2685 Returns
2686 -------
2687 pdf : ndarray
2688 Probability density function evaluated at `x`
2690 Notes
2691 -----
2692 %(_doc_callparams_note)s
2694 """
2695 return np.exp(self.logpdf(x, df, scale))
2697 def _mean(self, dim, df, scale):
2698 """Mean of the inverse Wishart distribution.
2700 Parameters
2701 ----------
2702 dim : int
2703 Dimension of the scale matrix
2704 %(_doc_default_callparams)s
2706 Notes
2707 -----
2708 As this function does no argument checking, it should not be
2709 called directly; use 'mean' instead.
2711 """
2712 if df > dim + 1:
2713 out = scale / (df - dim - 1)
2714 else:
2715 out = None
2716 return out
2718 def mean(self, df, scale):
2719 """Mean of the inverse Wishart distribution.
2721 Only valid if the degrees of freedom are greater than the dimension of
2722 the scale matrix plus one.
2724 Parameters
2725 ----------
2726 %(_doc_default_callparams)s
2728 Returns
2729 -------
2730 mean : float or None
2731 The mean of the distribution
2733 """
2734 dim, df, scale = self._process_parameters(df, scale)
2735 out = self._mean(dim, df, scale)
2736 return _squeeze_output(out) if out is not None else out
2738 def _mode(self, dim, df, scale):
2739 """Mode of the inverse Wishart distribution.
2741 Parameters
2742 ----------
2743 dim : int
2744 Dimension of the scale matrix
2745 %(_doc_default_callparams)s
2747 Notes
2748 -----
2749 As this function does no argument checking, it should not be
2750 called directly; use 'mode' instead.
2752 """
2753 return scale / (df + dim + 1)
2755 def mode(self, df, scale):
2756 """Mode of the inverse Wishart distribution.
2758 Parameters
2759 ----------
2760 %(_doc_default_callparams)s
2762 Returns
2763 -------
2764 mode : float
2765 The Mode of the distribution
2767 """
2768 dim, df, scale = self._process_parameters(df, scale)
2769 out = self._mode(dim, df, scale)
2770 return _squeeze_output(out)
2772 def _var(self, dim, df, scale):
2773 """Variance of the inverse Wishart distribution.
2775 Parameters
2776 ----------
2777 dim : int
2778 Dimension of the scale matrix
2779 %(_doc_default_callparams)s
2781 Notes
2782 -----
2783 As this function does no argument checking, it should not be
2784 called directly; use 'var' instead.
2786 """
2787 if df > dim + 3:
2788 var = (df - dim + 1) * scale**2
2789 diag = scale.diagonal() # 1 x dim array
2790 var += (df - dim - 1) * np.outer(diag, diag)
2791 var /= (df - dim) * (df - dim - 1)**2 * (df - dim - 3)
2792 else:
2793 var = None
2794 return var
2796 def var(self, df, scale):
2797 """Variance of the inverse Wishart distribution.
2799 Only valid if the degrees of freedom are greater than the dimension of
2800 the scale matrix plus three.
2802 Parameters
2803 ----------
2804 %(_doc_default_callparams)s
2806 Returns
2807 -------
2808 var : float
2809 The variance of the distribution
2810 """
2811 dim, df, scale = self._process_parameters(df, scale)
2812 out = self._var(dim, df, scale)
2813 return _squeeze_output(out) if out is not None else out
2815 def _rvs(self, n, shape, dim, df, C, random_state):
2816 """Draw random samples from an inverse Wishart distribution.
2818 Parameters
2819 ----------
2820 n : integer
2821 Number of variates to generate
2822 shape : iterable
2823 Shape of the variates to generate
2824 dim : int
2825 Dimension of the scale matrix
2826 df : int
2827 Degrees of freedom
2828 C : ndarray
2829 Cholesky factorization of the scale matrix, lower triagular.
2830 %(_doc_random_state)s
2832 Notes
2833 -----
2834 As this function does no argument checking, it should not be
2835 called directly; use 'rvs' instead.
2837 """
2838 random_state = self._get_random_state(random_state)
2839 # Get random draws A such that A ~ W(df, I)
2840 A = super()._standard_rvs(n, shape, dim, df, random_state)
2842 # Calculate SA = (CA)'^{-1} (CA)^{-1} ~ iW(df, scale)
2843 eye = np.eye(dim)
2844 trtrs = get_lapack_funcs(('trtrs'), (A,))
2846 for index in np.ndindex(A.shape[:-2]):
2847 # Calculate CA
2848 CA = np.dot(C, A[index])
2849 # Get (C A)^{-1} via triangular solver
2850 if dim > 1:
2851 CA, info = trtrs(CA, eye, lower=True)
2852 if info > 0:
2853 raise LinAlgError("Singular matrix.")
2854 if info < 0:
2855 raise ValueError('Illegal value in %d-th argument of'
2856 ' internal trtrs' % -info)
2857 else:
2858 CA = 1. / CA
2859 # Get SA
2860 A[index] = np.dot(CA.T, CA)
2862 return A
2864 def rvs(self, df, scale, size=1, random_state=None):
2865 """Draw random samples from an inverse Wishart distribution.
2867 Parameters
2868 ----------
2869 %(_doc_default_callparams)s
2870 size : integer or iterable of integers, optional
2871 Number of samples to draw (default 1).
2872 %(_doc_random_state)s
2874 Returns
2875 -------
2876 rvs : ndarray
2877 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is
2878 the dimension of the scale matrix.
2880 Notes
2881 -----
2882 %(_doc_callparams_note)s
2884 """
2885 n, shape = self._process_size(size)
2886 dim, df, scale = self._process_parameters(df, scale)
2888 # Invert the scale
2889 eye = np.eye(dim)
2890 L, lower = scipy.linalg.cho_factor(scale, lower=True)
2891 inv_scale = scipy.linalg.cho_solve((L, lower), eye)
2892 # Cholesky decomposition of inverted scale
2893 C = scipy.linalg.cholesky(inv_scale, lower=True)
2895 out = self._rvs(n, shape, dim, df, C, random_state)
2897 return _squeeze_output(out)
2899 def entropy(self):
2900 # Need to find reference for inverse Wishart entropy
2901 raise AttributeError
2904invwishart = invwishart_gen()
2907class invwishart_frozen(multi_rv_frozen):
2908 def __init__(self, df, scale, seed=None):
2909 """Create a frozen inverse Wishart distribution.
2911 Parameters
2912 ----------
2913 df : array_like
2914 Degrees of freedom of the distribution
2915 scale : array_like
2916 Scale matrix of the distribution
2917 seed : {None, int, `numpy.random.Generator`}, optional
2918 If `seed` is None the `numpy.random.Generator` singleton is used.
2919 If `seed` is an int, a new ``Generator`` instance is used,
2920 seeded with `seed`.
2921 If `seed` is already a ``Generator`` instance then that instance is
2922 used.
2924 """
2925 self._dist = invwishart_gen(seed)
2926 self.dim, self.df, self.scale = self._dist._process_parameters(
2927 df, scale
2928 )
2930 # Get the determinant via Cholesky factorization
2931 C, lower = scipy.linalg.cho_factor(self.scale, lower=True)
2932 self.log_det_scale = 2 * np.sum(np.log(C.diagonal()))
2934 # Get the inverse using the Cholesky factorization
2935 eye = np.eye(self.dim)
2936 self.inv_scale = scipy.linalg.cho_solve((C, lower), eye)
2938 # Get the Cholesky factorization of the inverse scale
2939 self.C = scipy.linalg.cholesky(self.inv_scale, lower=True)
2941 def logpdf(self, x):
2942 x = self._dist._process_quantiles(x, self.dim)
2943 out = self._dist._logpdf(x, self.dim, self.df, self.scale,
2944 self.log_det_scale)
2945 return _squeeze_output(out)
2947 def pdf(self, x):
2948 return np.exp(self.logpdf(x))
2950 def mean(self):
2951 out = self._dist._mean(self.dim, self.df, self.scale)
2952 return _squeeze_output(out) if out is not None else out
2954 def mode(self):
2955 out = self._dist._mode(self.dim, self.df, self.scale)
2956 return _squeeze_output(out)
2958 def var(self):
2959 out = self._dist._var(self.dim, self.df, self.scale)
2960 return _squeeze_output(out) if out is not None else out
2962 def rvs(self, size=1, random_state=None):
2963 n, shape = self._dist._process_size(size)
2965 out = self._dist._rvs(n, shape, self.dim, self.df,
2966 self.C, random_state)
2968 return _squeeze_output(out)
2970 def entropy(self):
2971 # Need to find reference for inverse Wishart entropy
2972 raise AttributeError
2975# Set frozen generator docstrings from corresponding docstrings in
2976# inverse Wishart and fill in default strings in class docstrings
2977for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs']:
2978 method = invwishart_gen.__dict__[name]
2979 method_frozen = wishart_frozen.__dict__[name]
2980 method_frozen.__doc__ = doccer.docformat(
2981 method.__doc__, wishart_docdict_noparams)
2982 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)
2984_multinomial_doc_default_callparams = """\
2985n : int
2986 Number of trials
2987p : array_like
2988 Probability of a trial falling into each category; should sum to 1
2989"""
2991_multinomial_doc_callparams_note = """\
2992`n` should be a positive integer. Each element of `p` should be in the
2993interval :math:`[0,1]` and the elements should sum to 1. If they do not sum to
29941, the last element of the `p` array is not used and is replaced with the
2995remaining probability left over from the earlier elements.
2996"""
2998_multinomial_doc_frozen_callparams = ""
3000_multinomial_doc_frozen_callparams_note = """\
3001See class definition for a detailed description of parameters."""
3003multinomial_docdict_params = {
3004 '_doc_default_callparams': _multinomial_doc_default_callparams,
3005 '_doc_callparams_note': _multinomial_doc_callparams_note,
3006 '_doc_random_state': _doc_random_state
3007}
3009multinomial_docdict_noparams = {
3010 '_doc_default_callparams': _multinomial_doc_frozen_callparams,
3011 '_doc_callparams_note': _multinomial_doc_frozen_callparams_note,
3012 '_doc_random_state': _doc_random_state
3013}
3016class multinomial_gen(multi_rv_generic):
3017 r"""A multinomial random variable.
3019 Methods
3020 -------
3021 pmf(x, n, p)
3022 Probability mass function.
3023 logpmf(x, n, p)
3024 Log of the probability mass function.
3025 rvs(n, p, size=1, random_state=None)
3026 Draw random samples from a multinomial distribution.
3027 entropy(n, p)
3028 Compute the entropy of the multinomial distribution.
3029 cov(n, p)
3030 Compute the covariance matrix of the multinomial distribution.
3032 Parameters
3033 ----------
3034 %(_doc_default_callparams)s
3035 %(_doc_random_state)s
3037 Notes
3038 -----
3039 %(_doc_callparams_note)s
3041 The probability mass function for `multinomial` is
3043 .. math::
3045 f(x) = \frac{n!}{x_1! \cdots x_k!} p_1^{x_1} \cdots p_k^{x_k},
3047 supported on :math:`x=(x_1, \ldots, x_k)` where each :math:`x_i` is a
3048 nonnegative integer and their sum is :math:`n`.
3050 .. versionadded:: 0.19.0
3052 Examples
3053 --------
3055 >>> from scipy.stats import multinomial
3056 >>> rv = multinomial(8, [0.3, 0.2, 0.5])
3057 >>> rv.pmf([1, 3, 4])
3058 0.042000000000000072
3060 The multinomial distribution for :math:`k=2` is identical to the
3061 corresponding binomial distribution (tiny numerical differences
3062 notwithstanding):
3064 >>> from scipy.stats import binom
3065 >>> multinomial.pmf([3, 4], n=7, p=[0.4, 0.6])
3066 0.29030399999999973
3067 >>> binom.pmf(3, 7, 0.4)
3068 0.29030400000000012
3070 The functions ``pmf``, ``logpmf``, ``entropy``, and ``cov`` support
3071 broadcasting, under the convention that the vector parameters (``x`` and
3072 ``p``) are interpreted as if each row along the last axis is a single
3073 object. For instance:
3075 >>> multinomial.pmf([[3, 4], [3, 5]], n=[7, 8], p=[.3, .7])
3076 array([0.2268945, 0.25412184])
3078 Here, ``x.shape == (2, 2)``, ``n.shape == (2,)``, and ``p.shape == (2,)``,
3079 but following the rules mentioned above they behave as if the rows
3080 ``[3, 4]`` and ``[3, 5]`` in ``x`` and ``[.3, .7]`` in ``p`` were a single
3081 object, and as if we had ``x.shape = (2,)``, ``n.shape = (2,)``, and
3082 ``p.shape = ()``. To obtain the individual elements without broadcasting,
3083 we would do this:
3085 >>> multinomial.pmf([3, 4], n=7, p=[.3, .7])
3086 0.2268945
3087 >>> multinomial.pmf([3, 5], 8, p=[.3, .7])
3088 0.25412184
3090 This broadcasting also works for ``cov``, where the output objects are
3091 square matrices of size ``p.shape[-1]``. For example:
3093 >>> multinomial.cov([4, 5], [[.3, .7], [.4, .6]])
3094 array([[[ 0.84, -0.84],
3095 [-0.84, 0.84]],
3096 [[ 1.2 , -1.2 ],
3097 [-1.2 , 1.2 ]]])
3099 In this example, ``n.shape == (2,)`` and ``p.shape == (2, 2)``, and
3100 following the rules above, these broadcast as if ``p.shape == (2,)``.
3101 Thus the result should also be of shape ``(2,)``, but since each output is
3102 a :math:`2 \times 2` matrix, the result in fact has shape ``(2, 2, 2)``,
3103 where ``result[0]`` is equal to ``multinomial.cov(n=4, p=[.3, .7])`` and
3104 ``result[1]`` is equal to ``multinomial.cov(n=5, p=[.4, .6])``.
3106 Alternatively, the object may be called (as a function) to fix the `n` and
3107 `p` parameters, returning a "frozen" multinomial random variable:
3109 >>> rv = multinomial(n=7, p=[.3, .7])
3110 >>> # Frozen object with the same methods but holding the given
3111 >>> # degrees of freedom and scale fixed.
3113 See also
3114 --------
3115 scipy.stats.binom : The binomial distribution.
3116 numpy.random.Generator.multinomial : Sampling from the multinomial distribution.
3117 scipy.stats.multivariate_hypergeom :
3118 The multivariate hypergeometric distribution.
3119 """ # noqa: E501
3121 def __init__(self, seed=None):
3122 super().__init__(seed)
3123 self.__doc__ = \
3124 doccer.docformat(self.__doc__, multinomial_docdict_params)
3126 def __call__(self, n, p, seed=None):
3127 """Create a frozen multinomial distribution.
3129 See `multinomial_frozen` for more information.
3130 """
3131 return multinomial_frozen(n, p, seed)
3133 def _process_parameters(self, n, p, eps=1e-15):
3134 """Returns: n_, p_, npcond.
3136 n_ and p_ are arrays of the correct shape; npcond is a boolean array
3137 flagging values out of the domain.
3138 """
3139 p = np.array(p, dtype=np.float64, copy=True)
3140 p_adjusted = 1. - p[..., :-1].sum(axis=-1)
3141 i_adjusted = np.abs(p_adjusted) > eps
3142 p[i_adjusted, -1] = p_adjusted[i_adjusted]
3144 # true for bad p
3145 pcond = np.any(p < 0, axis=-1)
3146 pcond |= np.any(p > 1, axis=-1)
3148 n = np.array(n, dtype=np.int_, copy=True)
3150 # true for bad n
3151 ncond = n <= 0
3153 return n, p, ncond | pcond
3155 def _process_quantiles(self, x, n, p):
3156 """Returns: x_, xcond.
3158 x_ is an int array; xcond is a boolean array flagging values out of the
3159 domain.
3160 """
3161 xx = np.asarray(x, dtype=np.int_)
3163 if xx.ndim == 0:
3164 raise ValueError("x must be an array.")
3166 if xx.size != 0 and not xx.shape[-1] == p.shape[-1]:
3167 raise ValueError("Size of each quantile should be size of p: "
3168 "received %d, but expected %d." %
3169 (xx.shape[-1], p.shape[-1]))
3171 # true for x out of the domain
3172 cond = np.any(xx != x, axis=-1)
3173 cond |= np.any(xx < 0, axis=-1)
3174 cond = cond | (np.sum(xx, axis=-1) != n)
3176 return xx, cond
3178 def _checkresult(self, result, cond, bad_value):
3179 result = np.asarray(result)
3181 if cond.ndim != 0:
3182 result[cond] = bad_value
3183 elif cond:
3184 if result.ndim == 0:
3185 return bad_value
3186 result[...] = bad_value
3187 return result
3189 def _logpmf(self, x, n, p):
3190 return gammaln(n+1) + np.sum(xlogy(x, p) - gammaln(x+1), axis=-1)
3192 def logpmf(self, x, n, p):
3193 """Log of the Multinomial probability mass function.
3195 Parameters
3196 ----------
3197 x : array_like
3198 Quantiles, with the last axis of `x` denoting the components.
3199 %(_doc_default_callparams)s
3201 Returns
3202 -------
3203 logpmf : ndarray or scalar
3204 Log of the probability mass function evaluated at `x`
3206 Notes
3207 -----
3208 %(_doc_callparams_note)s
3209 """
3210 n, p, npcond = self._process_parameters(n, p)
3211 x, xcond = self._process_quantiles(x, n, p)
3213 result = self._logpmf(x, n, p)
3215 # replace values for which x was out of the domain; broadcast
3216 # xcond to the right shape
3217 xcond_ = xcond | np.zeros(npcond.shape, dtype=np.bool_)
3218 result = self._checkresult(result, xcond_, np.NINF)
3220 # replace values bad for n or p; broadcast npcond to the right shape
3221 npcond_ = npcond | np.zeros(xcond.shape, dtype=np.bool_)
3222 return self._checkresult(result, npcond_, np.NAN)
3224 def pmf(self, x, n, p):
3225 """Multinomial probability mass function.
3227 Parameters
3228 ----------
3229 x : array_like
3230 Quantiles, with the last axis of `x` denoting the components.
3231 %(_doc_default_callparams)s
3233 Returns
3234 -------
3235 pmf : ndarray or scalar
3236 Probability density function evaluated at `x`
3238 Notes
3239 -----
3240 %(_doc_callparams_note)s
3241 """
3242 return np.exp(self.logpmf(x, n, p))
3244 def mean(self, n, p):
3245 """Mean of the Multinomial distribution.
3247 Parameters
3248 ----------
3249 %(_doc_default_callparams)s
3251 Returns
3252 -------
3253 mean : float
3254 The mean of the distribution
3255 """
3256 n, p, npcond = self._process_parameters(n, p)
3257 result = n[..., np.newaxis]*p
3258 return self._checkresult(result, npcond, np.NAN)
3260 def cov(self, n, p):
3261 """Covariance matrix of the multinomial distribution.
3263 Parameters
3264 ----------
3265 %(_doc_default_callparams)s
3267 Returns
3268 -------
3269 cov : ndarray
3270 The covariance matrix of the distribution
3271 """
3272 n, p, npcond = self._process_parameters(n, p)
3274 nn = n[..., np.newaxis, np.newaxis]
3275 result = nn * np.einsum('...j,...k->...jk', -p, p)
3277 # change the diagonal
3278 for i in range(p.shape[-1]):
3279 result[..., i, i] += n*p[..., i]
3281 return self._checkresult(result, npcond, np.nan)
3283 def entropy(self, n, p):
3284 r"""Compute the entropy of the multinomial distribution.
3286 The entropy is computed using this expression:
3288 .. math::
3290 f(x) = - \log n! - n\sum_{i=1}^k p_i \log p_i +
3291 \sum_{i=1}^k \sum_{x=0}^n \binom n x p_i^x(1-p_i)^{n-x} \log x!
3293 Parameters
3294 ----------
3295 %(_doc_default_callparams)s
3297 Returns
3298 -------
3299 h : scalar
3300 Entropy of the Multinomial distribution
3302 Notes
3303 -----
3304 %(_doc_callparams_note)s
3305 """
3306 n, p, npcond = self._process_parameters(n, p)
3308 x = np.r_[1:np.max(n)+1]
3310 term1 = n*np.sum(entr(p), axis=-1)
3311 term1 -= gammaln(n+1)
3313 n = n[..., np.newaxis]
3314 new_axes_needed = max(p.ndim, n.ndim) - x.ndim + 1
3315 x.shape += (1,)*new_axes_needed
3317 term2 = np.sum(binom.pmf(x, n, p)*gammaln(x+1),
3318 axis=(-1, -1-new_axes_needed))
3320 return self._checkresult(term1 + term2, npcond, np.nan)
3322 def rvs(self, n, p, size=None, random_state=None):
3323 """Draw random samples from a Multinomial distribution.
3325 Parameters
3326 ----------
3327 %(_doc_default_callparams)s
3328 size : integer or iterable of integers, optional
3329 Number of samples to draw (default 1).
3330 %(_doc_random_state)s
3332 Returns
3333 -------
3334 rvs : ndarray or scalar
3335 Random variates of shape (`size`, `len(p)`)
3337 Notes
3338 -----
3339 %(_doc_callparams_note)s
3340 """
3341 n, p, npcond = self._process_parameters(n, p)
3342 random_state = self._get_random_state(random_state)
3343 return random_state.multinomial(n, p, size)
3346multinomial = multinomial_gen()
3349class multinomial_frozen(multi_rv_frozen):
3350 r"""Create a frozen Multinomial distribution.
3352 Parameters
3353 ----------
3354 n : int
3355 number of trials
3356 p: array_like
3357 probability of a trial falling into each category; should sum to 1
3358 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
3359 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
3360 singleton is used.
3361 If `seed` is an int, a new ``RandomState`` instance is used,
3362 seeded with `seed`.
3363 If `seed` is already a ``Generator`` or ``RandomState`` instance then
3364 that instance is used.
3365 """
3366 def __init__(self, n, p, seed=None):
3367 self._dist = multinomial_gen(seed)
3368 self.n, self.p, self.npcond = self._dist._process_parameters(n, p)
3370 # monkey patch self._dist
3371 def _process_parameters(n, p):
3372 return self.n, self.p, self.npcond
3374 self._dist._process_parameters = _process_parameters
3376 def logpmf(self, x):
3377 return self._dist.logpmf(x, self.n, self.p)
3379 def pmf(self, x):
3380 return self._dist.pmf(x, self.n, self.p)
3382 def mean(self):
3383 return self._dist.mean(self.n, self.p)
3385 def cov(self):
3386 return self._dist.cov(self.n, self.p)
3388 def entropy(self):
3389 return self._dist.entropy(self.n, self.p)
3391 def rvs(self, size=1, random_state=None):
3392 return self._dist.rvs(self.n, self.p, size, random_state)
3395# Set frozen generator docstrings from corresponding docstrings in
3396# multinomial and fill in default strings in class docstrings
3397for name in ['logpmf', 'pmf', 'mean', 'cov', 'rvs']:
3398 method = multinomial_gen.__dict__[name]
3399 method_frozen = multinomial_frozen.__dict__[name]
3400 method_frozen.__doc__ = doccer.docformat(
3401 method.__doc__, multinomial_docdict_noparams)
3402 method.__doc__ = doccer.docformat(method.__doc__,
3403 multinomial_docdict_params)
3406class special_ortho_group_gen(multi_rv_generic):
3407 r"""A Special Orthogonal matrix (SO(N)) random variable.
3409 Return a random rotation matrix, drawn from the Haar distribution
3410 (the only uniform distribution on SO(N)) with a determinant of +1.
3412 The `dim` keyword specifies the dimension N.
3414 Methods
3415 -------
3416 rvs(dim=None, size=1, random_state=None)
3417 Draw random samples from SO(N).
3419 Parameters
3420 ----------
3421 dim : scalar
3422 Dimension of matrices
3423 seed : {None, int, np.random.RandomState, np.random.Generator}, optional
3424 Used for drawing random variates.
3425 If `seed` is `None`, the `~np.random.RandomState` singleton is used.
3426 If `seed` is an int, a new ``RandomState`` instance is used, seeded
3427 with seed.
3428 If `seed` is already a ``RandomState`` or ``Generator`` instance,
3429 then that object is used.
3430 Default is `None`.
3432 Notes
3433 -----
3434 This class is wrapping the random_rot code from the MDP Toolkit,
3435 https://github.com/mdp-toolkit/mdp-toolkit
3437 Return a random rotation matrix, drawn from the Haar distribution
3438 (the only uniform distribution on SO(N)).
3439 The algorithm is described in the paper
3440 Stewart, G.W., "The efficient generation of random orthogonal
3441 matrices with an application to condition estimators", SIAM Journal
3442 on Numerical Analysis, 17(3), pp. 403-409, 1980.
3443 For more information see
3444 https://en.wikipedia.org/wiki/Orthogonal_matrix#Randomization
3446 See also the similar `ortho_group`. For a random rotation in three
3447 dimensions, see `scipy.spatial.transform.Rotation.random`.
3449 Examples
3450 --------
3451 >>> import numpy as np
3452 >>> from scipy.stats import special_ortho_group
3453 >>> x = special_ortho_group.rvs(3)
3455 >>> np.dot(x, x.T)
3456 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
3457 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
3458 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
3460 >>> import scipy.linalg
3461 >>> scipy.linalg.det(x)
3462 1.0
3464 This generates one random matrix from SO(3). It is orthogonal and
3465 has a determinant of 1.
3467 Alternatively, the object may be called (as a function) to fix the `dim`
3468 parameter, returning a "frozen" special_ortho_group random variable:
3470 >>> rv = special_ortho_group(5)
3471 >>> # Frozen object with the same methods but holding the
3472 >>> # dimension parameter fixed.
3474 See Also
3475 --------
3476 ortho_group, scipy.spatial.transform.Rotation.random
3478 """
3480 def __init__(self, seed=None):
3481 super().__init__(seed)
3482 self.__doc__ = doccer.docformat(self.__doc__)
3484 def __call__(self, dim=None, seed=None):
3485 """Create a frozen SO(N) distribution.
3487 See `special_ortho_group_frozen` for more information.
3488 """
3489 return special_ortho_group_frozen(dim, seed=seed)
3491 def _process_parameters(self, dim):
3492 """Dimension N must be specified; it cannot be inferred."""
3493 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):
3494 raise ValueError("""Dimension of rotation must be specified,
3495 and must be a scalar greater than 1.""")
3497 return dim
3499 def rvs(self, dim, size=1, random_state=None):
3500 """Draw random samples from SO(N).
3502 Parameters
3503 ----------
3504 dim : integer
3505 Dimension of rotation space (N).
3506 size : integer, optional
3507 Number of samples to draw (default 1).
3509 Returns
3510 -------
3511 rvs : ndarray or scalar
3512 Random size N-dimensional matrices, dimension (size, dim, dim)
3514 """
3515 random_state = self._get_random_state(random_state)
3517 size = int(size)
3518 size = (size,) if size > 1 else ()
3520 dim = self._process_parameters(dim)
3522 # H represents a (dim, dim) matrix, while D represents the diagonal of
3523 # a (dim, dim) diagonal matrix. The algorithm that follows is
3524 # broadcasted on the leading shape in `size` to vectorize along
3525 # samples.
3526 H = np.empty(size + (dim, dim))
3527 H[..., :, :] = np.eye(dim)
3528 D = np.empty(size + (dim,))
3530 for n in range(dim-1):
3532 # x is a vector with length dim-n, xrow and xcol are views of it as
3533 # a row vector and column vector respectively. It's important they
3534 # are views and not copies because we are going to modify x
3535 # in-place.
3536 x = random_state.normal(size=size + (dim-n,))
3537 xrow = x[..., None, :]
3538 xcol = x[..., :, None]
3540 # This is the squared norm of x, without vectorization it would be
3541 # dot(x, x), to have proper broadcasting we use matmul and squeeze
3542 # out (convert to scalar) the resulting 1x1 matrix
3543 norm2 = np.matmul(xrow, xcol).squeeze((-2, -1))
3545 x0 = x[..., 0].copy()
3546 D[..., n] = np.where(x0 != 0, np.sign(x0), 1)
3547 x[..., 0] += D[..., n]*np.sqrt(norm2)
3549 # In renormalizing x we have to append an additional axis with
3550 # [..., None] to broadcast the scalar against the vector x
3551 x /= np.sqrt((norm2 - x0**2 + x[..., 0]**2) / 2.)[..., None]
3553 # Householder transformation, without vectorization the RHS can be
3554 # written as outer(H @ x, x) (apart from the slicing)
3555 H[..., :, n:] -= np.matmul(H[..., :, n:], xcol) * xrow
3557 D[..., -1] = (-1)**(dim-1)*D[..., :-1].prod(axis=-1)
3559 # Without vectorization this could be written as H = diag(D) @ H,
3560 # left-multiplication by a diagonal matrix amounts to multiplying each
3561 # row of H by an element of the diagonal, so we add a dummy axis for
3562 # the column index
3563 H *= D[..., :, None]
3564 return H
3567special_ortho_group = special_ortho_group_gen()
3570class special_ortho_group_frozen(multi_rv_frozen):
3571 def __init__(self, dim=None, seed=None):
3572 """Create a frozen SO(N) distribution.
3574 Parameters
3575 ----------
3576 dim : scalar
3577 Dimension of matrices
3578 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
3579 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
3580 singleton is used.
3581 If `seed` is an int, a new ``RandomState`` instance is used,
3582 seeded with `seed`.
3583 If `seed` is already a ``Generator`` or ``RandomState`` instance
3584 then that instance is used.
3586 Examples
3587 --------
3588 >>> from scipy.stats import special_ortho_group
3589 >>> g = special_ortho_group(5)
3590 >>> x = g.rvs()
3592 """
3593 self._dist = special_ortho_group_gen(seed)
3594 self.dim = self._dist._process_parameters(dim)
3596 def rvs(self, size=1, random_state=None):
3597 return self._dist.rvs(self.dim, size, random_state)
3600class ortho_group_gen(multi_rv_generic):
3601 r"""An Orthogonal matrix (O(N)) random variable.
3603 Return a random orthogonal matrix, drawn from the O(N) Haar
3604 distribution (the only uniform distribution on O(N)).
3606 The `dim` keyword specifies the dimension N.
3608 Methods
3609 -------
3610 rvs(dim=None, size=1, random_state=None)
3611 Draw random samples from O(N).
3613 Parameters
3614 ----------
3615 dim : scalar
3616 Dimension of matrices
3617 seed : {None, int, np.random.RandomState, np.random.Generator}, optional
3618 Used for drawing random variates.
3619 If `seed` is `None`, the `~np.random.RandomState` singleton is used.
3620 If `seed` is an int, a new ``RandomState`` instance is used, seeded
3621 with seed.
3622 If `seed` is already a ``RandomState`` or ``Generator`` instance,
3623 then that object is used.
3624 Default is `None`.
3626 Notes
3627 -----
3628 This class is closely related to `special_ortho_group`.
3630 Some care is taken to avoid numerical error, as per the paper by Mezzadri.
3632 References
3633 ----------
3634 .. [1] F. Mezzadri, "How to generate random matrices from the classical
3635 compact groups", :arXiv:`math-ph/0609050v2`.
3637 Examples
3638 --------
3639 >>> import numpy as np
3640 >>> from scipy.stats import ortho_group
3641 >>> x = ortho_group.rvs(3)
3643 >>> np.dot(x, x.T)
3644 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
3645 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
3646 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
3648 >>> import scipy.linalg
3649 >>> np.fabs(scipy.linalg.det(x))
3650 1.0
3652 This generates one random matrix from O(3). It is orthogonal and
3653 has a determinant of +1 or -1.
3655 Alternatively, the object may be called (as a function) to fix the `dim`
3656 parameter, returning a "frozen" ortho_group random variable:
3658 >>> rv = ortho_group(5)
3659 >>> # Frozen object with the same methods but holding the
3660 >>> # dimension parameter fixed.
3662 See Also
3663 --------
3664 special_ortho_group
3665 """
3667 def __init__(self, seed=None):
3668 super().__init__(seed)
3669 self.__doc__ = doccer.docformat(self.__doc__)
3671 def __call__(self, dim=None, seed=None):
3672 """Create a frozen O(N) distribution.
3674 See `ortho_group_frozen` for more information.
3675 """
3676 return ortho_group_frozen(dim, seed=seed)
3678 def _process_parameters(self, dim):
3679 """Dimension N must be specified; it cannot be inferred."""
3680 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):
3681 raise ValueError("Dimension of rotation must be specified,"
3682 "and must be a scalar greater than 1.")
3684 return dim
3686 def rvs(self, dim, size=1, random_state=None):
3687 """Draw random samples from O(N).
3689 Parameters
3690 ----------
3691 dim : integer
3692 Dimension of rotation space (N).
3693 size : integer, optional
3694 Number of samples to draw (default 1).
3696 Returns
3697 -------
3698 rvs : ndarray or scalar
3699 Random size N-dimensional matrices, dimension (size, dim, dim)
3701 """
3702 random_state = self._get_random_state(random_state)
3704 size = int(size)
3705 if size > 1 and NumpyVersion(np.__version__) < '1.22.0':
3706 return np.array([self.rvs(dim, size=1, random_state=random_state)
3707 for i in range(size)])
3709 dim = self._process_parameters(dim)
3711 size = (size,) if size > 1 else ()
3712 z = random_state.normal(size=size + (dim, dim))
3713 q, r = np.linalg.qr(z)
3714 # The last two dimensions are the rows and columns of R matrices.
3715 # Extract the diagonals. Note that this eliminates a dimension.
3716 d = r.diagonal(offset=0, axis1=-2, axis2=-1)
3717 # Add back a dimension for proper broadcasting: we're dividing
3718 # each row of each R matrix by the diagonal of the R matrix.
3719 q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly
3720 return q
3723ortho_group = ortho_group_gen()
3726class ortho_group_frozen(multi_rv_frozen):
3727 def __init__(self, dim=None, seed=None):
3728 """Create a frozen O(N) distribution.
3730 Parameters
3731 ----------
3732 dim : scalar
3733 Dimension of matrices
3734 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
3735 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
3736 singleton is used.
3737 If `seed` is an int, a new ``RandomState`` instance is used,
3738 seeded with `seed`.
3739 If `seed` is already a ``Generator`` or ``RandomState`` instance
3740 then that instance is used.
3742 Examples
3743 --------
3744 >>> from scipy.stats import ortho_group
3745 >>> g = ortho_group(5)
3746 >>> x = g.rvs()
3748 """
3749 self._dist = ortho_group_gen(seed)
3750 self.dim = self._dist._process_parameters(dim)
3752 def rvs(self, size=1, random_state=None):
3753 return self._dist.rvs(self.dim, size, random_state)
3756class random_correlation_gen(multi_rv_generic):
3757 r"""A random correlation matrix.
3759 Return a random correlation matrix, given a vector of eigenvalues.
3761 The `eigs` keyword specifies the eigenvalues of the correlation matrix,
3762 and implies the dimension.
3764 Methods
3765 -------
3766 rvs(eigs=None, random_state=None)
3767 Draw random correlation matrices, all with eigenvalues eigs.
3769 Parameters
3770 ----------
3771 eigs : 1d ndarray
3772 Eigenvalues of correlation matrix
3773 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
3774 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
3775 singleton is used.
3776 If `seed` is an int, a new ``RandomState`` instance is used,
3777 seeded with `seed`.
3778 If `seed` is already a ``Generator`` or ``RandomState`` instance
3779 then that instance is used.
3780 tol : float, optional
3781 Tolerance for input parameter checks
3782 diag_tol : float, optional
3783 Tolerance for deviation of the diagonal of the resulting
3784 matrix. Default: 1e-7
3786 Raises
3787 ------
3788 RuntimeError
3789 Floating point error prevented generating a valid correlation
3790 matrix.
3792 Returns
3793 -------
3794 rvs : ndarray or scalar
3795 Random size N-dimensional matrices, dimension (size, dim, dim),
3796 each having eigenvalues eigs.
3798 Notes
3799 -----
3801 Generates a random correlation matrix following a numerically stable
3802 algorithm spelled out by Davies & Higham. This algorithm uses a single O(N)
3803 similarity transformation to construct a symmetric positive semi-definite
3804 matrix, and applies a series of Givens rotations to scale it to have ones
3805 on the diagonal.
3807 References
3808 ----------
3810 .. [1] Davies, Philip I; Higham, Nicholas J; "Numerically stable generation
3811 of correlation matrices and their factors", BIT 2000, Vol. 40,
3812 No. 4, pp. 640 651
3814 Examples
3815 --------
3816 >>> import numpy as np
3817 >>> from scipy.stats import random_correlation
3818 >>> rng = np.random.default_rng()
3819 >>> x = random_correlation.rvs((.5, .8, 1.2, 1.5), random_state=rng)
3820 >>> x
3821 array([[ 1. , -0.02423399, 0.03130519, 0.4946965 ],
3822 [-0.02423399, 1. , 0.20334736, 0.04039817],
3823 [ 0.03130519, 0.20334736, 1. , 0.02694275],
3824 [ 0.4946965 , 0.04039817, 0.02694275, 1. ]])
3825 >>> import scipy.linalg
3826 >>> e, v = scipy.linalg.eigh(x)
3827 >>> e
3828 array([ 0.5, 0.8, 1.2, 1.5])
3830 """
3832 def __init__(self, seed=None):
3833 super().__init__(seed)
3834 self.__doc__ = doccer.docformat(self.__doc__)
3836 def __call__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7):
3837 """Create a frozen random correlation matrix.
3839 See `random_correlation_frozen` for more information.
3840 """
3841 return random_correlation_frozen(eigs, seed=seed, tol=tol,
3842 diag_tol=diag_tol)
3844 def _process_parameters(self, eigs, tol):
3845 eigs = np.asarray(eigs, dtype=float)
3846 dim = eigs.size
3848 if eigs.ndim != 1 or eigs.shape[0] != dim or dim <= 1:
3849 raise ValueError("Array 'eigs' must be a vector of length "
3850 "greater than 1.")
3852 if np.fabs(np.sum(eigs) - dim) > tol:
3853 raise ValueError("Sum of eigenvalues must equal dimensionality.")
3855 for x in eigs:
3856 if x < -tol:
3857 raise ValueError("All eigenvalues must be non-negative.")
3859 return dim, eigs
3861 def _givens_to_1(self, aii, ajj, aij):
3862 """Computes a 2x2 Givens matrix to put 1's on the diagonal.
3864 The input matrix is a 2x2 symmetric matrix M = [ aii aij ; aij ajj ].
3866 The output matrix g is a 2x2 anti-symmetric matrix of the form
3867 [ c s ; -s c ]; the elements c and s are returned.
3869 Applying the output matrix to the input matrix (as b=g.T M g)
3870 results in a matrix with bii=1, provided tr(M) - det(M) >= 1
3871 and floating point issues do not occur. Otherwise, some other
3872 valid rotation is returned. When tr(M)==2, also bjj=1.
3874 """
3875 aiid = aii - 1.
3876 ajjd = ajj - 1.
3878 if ajjd == 0:
3879 # ajj==1, so swap aii and ajj to avoid division by zero
3880 return 0., 1.
3882 dd = math.sqrt(max(aij**2 - aiid*ajjd, 0))
3884 # The choice of t should be chosen to avoid cancellation [1]
3885 t = (aij + math.copysign(dd, aij)) / ajjd
3886 c = 1. / math.sqrt(1. + t*t)
3887 if c == 0:
3888 # Underflow
3889 s = 1.0
3890 else:
3891 s = c*t
3892 return c, s
3894 def _to_corr(self, m):
3895 """
3896 Given a psd matrix m, rotate to put one's on the diagonal, turning it
3897 into a correlation matrix. This also requires the trace equal the
3898 dimensionality. Note: modifies input matrix
3899 """
3900 # Check requirements for in-place Givens
3901 if not (m.flags.c_contiguous and m.dtype == np.float64 and
3902 m.shape[0] == m.shape[1]):
3903 raise ValueError()
3905 d = m.shape[0]
3906 for i in range(d-1):
3907 if m[i, i] == 1:
3908 continue
3909 elif m[i, i] > 1:
3910 for j in range(i+1, d):
3911 if m[j, j] < 1:
3912 break
3913 else:
3914 for j in range(i+1, d):
3915 if m[j, j] > 1:
3916 break
3918 c, s = self._givens_to_1(m[i, i], m[j, j], m[i, j])
3920 # Use BLAS to apply Givens rotations in-place. Equivalent to:
3921 # g = np.eye(d)
3922 # g[i, i] = g[j,j] = c
3923 # g[j, i] = -s; g[i, j] = s
3924 # m = np.dot(g.T, np.dot(m, g))
3925 mv = m.ravel()
3926 drot(mv, mv, c, -s, n=d,
3927 offx=i*d, incx=1, offy=j*d, incy=1,
3928 overwrite_x=True, overwrite_y=True)
3929 drot(mv, mv, c, -s, n=d,
3930 offx=i, incx=d, offy=j, incy=d,
3931 overwrite_x=True, overwrite_y=True)
3933 return m
3935 def rvs(self, eigs, random_state=None, tol=1e-13, diag_tol=1e-7):
3936 """Draw random correlation matrices.
3938 Parameters
3939 ----------
3940 eigs : 1d ndarray
3941 Eigenvalues of correlation matrix
3942 tol : float, optional
3943 Tolerance for input parameter checks
3944 diag_tol : float, optional
3945 Tolerance for deviation of the diagonal of the resulting
3946 matrix. Default: 1e-7
3948 Raises
3949 ------
3950 RuntimeError
3951 Floating point error prevented generating a valid correlation
3952 matrix.
3954 Returns
3955 -------
3956 rvs : ndarray or scalar
3957 Random size N-dimensional matrices, dimension (size, dim, dim),
3958 each having eigenvalues eigs.
3960 """
3961 dim, eigs = self._process_parameters(eigs, tol=tol)
3963 random_state = self._get_random_state(random_state)
3965 m = ortho_group.rvs(dim, random_state=random_state)
3966 m = np.dot(np.dot(m, np.diag(eigs)), m.T) # Set the trace of m
3967 m = self._to_corr(m) # Carefully rotate to unit diagonal
3969 # Check diagonal
3970 if abs(m.diagonal() - 1).max() > diag_tol:
3971 raise RuntimeError("Failed to generate a valid correlation matrix")
3973 return m
3976random_correlation = random_correlation_gen()
3979class random_correlation_frozen(multi_rv_frozen):
3980 def __init__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7):
3981 """Create a frozen random correlation matrix distribution.
3983 Parameters
3984 ----------
3985 eigs : 1d ndarray
3986 Eigenvalues of correlation matrix
3987 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
3988 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
3989 singleton is used.
3990 If `seed` is an int, a new ``RandomState`` instance is used,
3991 seeded with `seed`.
3992 If `seed` is already a ``Generator`` or ``RandomState`` instance
3993 then that instance is used.
3994 tol : float, optional
3995 Tolerance for input parameter checks
3996 diag_tol : float, optional
3997 Tolerance for deviation of the diagonal of the resulting
3998 matrix. Default: 1e-7
4000 Raises
4001 ------
4002 RuntimeError
4003 Floating point error prevented generating a valid correlation
4004 matrix.
4006 Returns
4007 -------
4008 rvs : ndarray or scalar
4009 Random size N-dimensional matrices, dimension (size, dim, dim),
4010 each having eigenvalues eigs.
4011 """
4013 self._dist = random_correlation_gen(seed)
4014 self.tol = tol
4015 self.diag_tol = diag_tol
4016 _, self.eigs = self._dist._process_parameters(eigs, tol=self.tol)
4018 def rvs(self, random_state=None):
4019 return self._dist.rvs(self.eigs, random_state=random_state,
4020 tol=self.tol, diag_tol=self.diag_tol)
4023class unitary_group_gen(multi_rv_generic):
4024 r"""A matrix-valued U(N) random variable.
4026 Return a random unitary matrix.
4028 The `dim` keyword specifies the dimension N.
4030 Methods
4031 -------
4032 rvs(dim=None, size=1, random_state=None)
4033 Draw random samples from U(N).
4035 Parameters
4036 ----------
4037 dim : scalar
4038 Dimension of matrices
4039 seed : {None, int, np.random.RandomState, np.random.Generator}, optional
4040 Used for drawing random variates.
4041 If `seed` is `None`, the `~np.random.RandomState` singleton is used.
4042 If `seed` is an int, a new ``RandomState`` instance is used, seeded
4043 with seed.
4044 If `seed` is already a ``RandomState`` or ``Generator`` instance,
4045 then that object is used.
4046 Default is `None`.
4048 Notes
4049 -----
4050 This class is similar to `ortho_group`.
4052 References
4053 ----------
4054 .. [1] F. Mezzadri, "How to generate random matrices from the classical
4055 compact groups", :arXiv:`math-ph/0609050v2`.
4057 Examples
4058 --------
4059 >>> import numpy as np
4060 >>> from scipy.stats import unitary_group
4061 >>> x = unitary_group.rvs(3)
4063 >>> np.dot(x, x.conj().T)
4064 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
4065 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
4066 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
4068 This generates one random matrix from U(3). The dot product confirms that
4069 it is unitary up to machine precision.
4071 Alternatively, the object may be called (as a function) to fix the `dim`
4072 parameter, return a "frozen" unitary_group random variable:
4074 >>> rv = unitary_group(5)
4076 See Also
4077 --------
4078 ortho_group
4080 """
4082 def __init__(self, seed=None):
4083 super().__init__(seed)
4084 self.__doc__ = doccer.docformat(self.__doc__)
4086 def __call__(self, dim=None, seed=None):
4087 """Create a frozen (U(N)) n-dimensional unitary matrix distribution.
4089 See `unitary_group_frozen` for more information.
4090 """
4091 return unitary_group_frozen(dim, seed=seed)
4093 def _process_parameters(self, dim):
4094 """Dimension N must be specified; it cannot be inferred."""
4095 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):
4096 raise ValueError("Dimension of rotation must be specified,"
4097 "and must be a scalar greater than 1.")
4099 return dim
4101 def rvs(self, dim, size=1, random_state=None):
4102 """Draw random samples from U(N).
4104 Parameters
4105 ----------
4106 dim : integer
4107 Dimension of space (N).
4108 size : integer, optional
4109 Number of samples to draw (default 1).
4111 Returns
4112 -------
4113 rvs : ndarray or scalar
4114 Random size N-dimensional matrices, dimension (size, dim, dim)
4116 """
4117 random_state = self._get_random_state(random_state)
4119 size = int(size)
4120 if size > 1 and NumpyVersion(np.__version__) < '1.22.0':
4121 return np.array([self.rvs(dim, size=1, random_state=random_state)
4122 for i in range(size)])
4124 dim = self._process_parameters(dim)
4126 size = (size,) if size > 1 else ()
4127 z = 1/math.sqrt(2)*(random_state.normal(size=size + (dim, dim)) +
4128 1j*random_state.normal(size=size + (dim, dim)))
4129 q, r = np.linalg.qr(z)
4130 # The last two dimensions are the rows and columns of R matrices.
4131 # Extract the diagonals. Note that this eliminates a dimension.
4132 d = r.diagonal(offset=0, axis1=-2, axis2=-1)
4133 # Add back a dimension for proper broadcasting: we're dividing
4134 # each row of each R matrix by the diagonal of the R matrix.
4135 q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly
4136 return q
4139unitary_group = unitary_group_gen()
4142class unitary_group_frozen(multi_rv_frozen):
4143 def __init__(self, dim=None, seed=None):
4144 """Create a frozen (U(N)) n-dimensional unitary matrix distribution.
4146 Parameters
4147 ----------
4148 dim : scalar
4149 Dimension of matrices
4150 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
4151 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
4152 singleton is used.
4153 If `seed` is an int, a new ``RandomState`` instance is used,
4154 seeded with `seed`.
4155 If `seed` is already a ``Generator`` or ``RandomState`` instance
4156 then that instance is used.
4158 Examples
4159 --------
4160 >>> from scipy.stats import unitary_group
4161 >>> x = unitary_group(3)
4162 >>> x.rvs()
4164 """
4165 self._dist = unitary_group_gen(seed)
4166 self.dim = self._dist._process_parameters(dim)
4168 def rvs(self, size=1, random_state=None):
4169 return self._dist.rvs(self.dim, size, random_state)
4172_mvt_doc_default_callparams = """\
4173loc : array_like, optional
4174 Location of the distribution. (default ``0``)
4175shape : array_like, optional
4176 Positive semidefinite matrix of the distribution. (default ``1``)
4177df : float, optional
4178 Degrees of freedom of the distribution; must be greater than zero.
4179 If ``np.inf`` then results are multivariate normal. The default is ``1``.
4180allow_singular : bool, optional
4181 Whether to allow a singular matrix. (default ``False``)
4182"""
4184_mvt_doc_callparams_note = """\
4185Setting the parameter `loc` to ``None`` is equivalent to having `loc`
4186be the zero-vector. The parameter `shape` can be a scalar, in which case
4187the shape matrix is the identity times that value, a vector of
4188diagonal entries for the shape matrix, or a two-dimensional array_like.
4189"""
4191_mvt_doc_frozen_callparams_note = """\
4192See class definition for a detailed description of parameters."""
4194mvt_docdict_params = {
4195 '_mvt_doc_default_callparams': _mvt_doc_default_callparams,
4196 '_mvt_doc_callparams_note': _mvt_doc_callparams_note,
4197 '_doc_random_state': _doc_random_state
4198}
4200mvt_docdict_noparams = {
4201 '_mvt_doc_default_callparams': "",
4202 '_mvt_doc_callparams_note': _mvt_doc_frozen_callparams_note,
4203 '_doc_random_state': _doc_random_state
4204}
4207class multivariate_t_gen(multi_rv_generic):
4208 r"""A multivariate t-distributed random variable.
4210 The `loc` parameter specifies the location. The `shape` parameter specifies
4211 the positive semidefinite shape matrix. The `df` parameter specifies the
4212 degrees of freedom.
4214 In addition to calling the methods below, the object itself may be called
4215 as a function to fix the location, shape matrix, and degrees of freedom
4216 parameters, returning a "frozen" multivariate t-distribution random.
4218 Methods
4219 -------
4220 pdf(x, loc=None, shape=1, df=1, allow_singular=False)
4221 Probability density function.
4222 logpdf(x, loc=None, shape=1, df=1, allow_singular=False)
4223 Log of the probability density function.
4224 rvs(loc=None, shape=1, df=1, size=1, random_state=None)
4225 Draw random samples from a multivariate t-distribution.
4227 Parameters
4228 ----------
4229 %(_mvt_doc_default_callparams)s
4230 %(_doc_random_state)s
4232 Notes
4233 -----
4234 %(_mvt_doc_callparams_note)s
4235 The matrix `shape` must be a (symmetric) positive semidefinite matrix. The
4236 determinant and inverse of `shape` are computed as the pseudo-determinant
4237 and pseudo-inverse, respectively, so that `shape` does not need to have
4238 full rank.
4240 The probability density function for `multivariate_t` is
4242 .. math::
4244 f(x) = \frac{\Gamma(\nu + p)/2}{\Gamma(\nu/2)\nu^{p/2}\pi^{p/2}|\Sigma|^{1/2}}
4245 \left[1 + \frac{1}{\nu} (\mathbf{x} - \boldsymbol{\mu})^{\top}
4246 \boldsymbol{\Sigma}^{-1}
4247 (\mathbf{x} - \boldsymbol{\mu}) \right]^{-(\nu + p)/2},
4249 where :math:`p` is the dimension of :math:`\mathbf{x}`,
4250 :math:`\boldsymbol{\mu}` is the :math:`p`-dimensional location,
4251 :math:`\boldsymbol{\Sigma}` the :math:`p \times p`-dimensional shape
4252 matrix, and :math:`\nu` is the degrees of freedom.
4254 .. versionadded:: 1.6.0
4256 Examples
4257 --------
4258 The object may be called (as a function) to fix the `loc`, `shape`,
4259 `df`, and `allow_singular` parameters, returning a "frozen"
4260 multivariate_t random variable:
4262 >>> import numpy as np
4263 >>> from scipy.stats import multivariate_t
4264 >>> rv = multivariate_t([1.0, -0.5], [[2.1, 0.3], [0.3, 1.5]], df=2)
4265 >>> # Frozen object with the same methods but holding the given location,
4266 >>> # scale, and degrees of freedom fixed.
4268 Create a contour plot of the PDF.
4270 >>> import matplotlib.pyplot as plt
4271 >>> x, y = np.mgrid[-1:3:.01, -2:1.5:.01]
4272 >>> pos = np.dstack((x, y))
4273 >>> fig, ax = plt.subplots(1, 1)
4274 >>> ax.set_aspect('equal')
4275 >>> plt.contourf(x, y, rv.pdf(pos))
4277 """
4279 def __init__(self, seed=None):
4280 """Initialize a multivariate t-distributed random variable.
4282 Parameters
4283 ----------
4284 seed : Random state.
4286 """
4287 super().__init__(seed)
4288 self.__doc__ = doccer.docformat(self.__doc__, mvt_docdict_params)
4289 self._random_state = check_random_state(seed)
4291 def __call__(self, loc=None, shape=1, df=1, allow_singular=False,
4292 seed=None):
4293 """Create a frozen multivariate t-distribution.
4295 See `multivariate_t_frozen` for parameters.
4296 """
4297 if df == np.inf:
4298 return multivariate_normal_frozen(mean=loc, cov=shape,
4299 allow_singular=allow_singular,
4300 seed=seed)
4301 return multivariate_t_frozen(loc=loc, shape=shape, df=df,
4302 allow_singular=allow_singular, seed=seed)
4304 def pdf(self, x, loc=None, shape=1, df=1, allow_singular=False):
4305 """Multivariate t-distribution probability density function.
4307 Parameters
4308 ----------
4309 x : array_like
4310 Points at which to evaluate the probability density function.
4311 %(_mvt_doc_default_callparams)s
4313 Returns
4314 -------
4315 pdf : Probability density function evaluated at `x`.
4317 Examples
4318 --------
4319 >>> from scipy.stats import multivariate_t
4320 >>> x = [0.4, 5]
4321 >>> loc = [0, 1]
4322 >>> shape = [[1, 0.1], [0.1, 1]]
4323 >>> df = 7
4324 >>> multivariate_t.pdf(x, loc, shape, df)
4325 array([0.00075713])
4327 """
4328 dim, loc, shape, df = self._process_parameters(loc, shape, df)
4329 x = self._process_quantiles(x, dim)
4330 shape_info = _PSD(shape, allow_singular=allow_singular)
4331 logpdf = self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df,
4332 dim, shape_info.rank)
4333 return np.exp(logpdf)
4335 def logpdf(self, x, loc=None, shape=1, df=1):
4336 """Log of the multivariate t-distribution probability density function.
4338 Parameters
4339 ----------
4340 x : array_like
4341 Points at which to evaluate the log of the probability density
4342 function.
4343 %(_mvt_doc_default_callparams)s
4345 Returns
4346 -------
4347 logpdf : Log of the probability density function evaluated at `x`.
4349 Examples
4350 --------
4351 >>> from scipy.stats import multivariate_t
4352 >>> x = [0.4, 5]
4353 >>> loc = [0, 1]
4354 >>> shape = [[1, 0.1], [0.1, 1]]
4355 >>> df = 7
4356 >>> multivariate_t.logpdf(x, loc, shape, df)
4357 array([-7.1859802])
4359 See Also
4360 --------
4361 pdf : Probability density function.
4363 """
4364 dim, loc, shape, df = self._process_parameters(loc, shape, df)
4365 x = self._process_quantiles(x, dim)
4366 shape_info = _PSD(shape)
4367 return self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df, dim,
4368 shape_info.rank)
4370 def _logpdf(self, x, loc, prec_U, log_pdet, df, dim, rank):
4371 """Utility method `pdf`, `logpdf` for parameters.
4373 Parameters
4374 ----------
4375 x : ndarray
4376 Points at which to evaluate the log of the probability density
4377 function.
4378 loc : ndarray
4379 Location of the distribution.
4380 prec_U : ndarray
4381 A decomposition such that `np.dot(prec_U, prec_U.T)` is the inverse
4382 of the shape matrix.
4383 log_pdet : float
4384 Logarithm of the determinant of the shape matrix.
4385 df : float
4386 Degrees of freedom of the distribution.
4387 dim : int
4388 Dimension of the quantiles x.
4389 rank : int
4390 Rank of the shape matrix.
4392 Notes
4393 -----
4394 As this function does no argument checking, it should not be called
4395 directly; use 'logpdf' instead.
4397 """
4398 if df == np.inf:
4399 return multivariate_normal._logpdf(x, loc, prec_U, log_pdet, rank)
4401 dev = x - loc
4402 maha = np.square(np.dot(dev, prec_U)).sum(axis=-1)
4404 t = 0.5 * (df + dim)
4405 A = gammaln(t)
4406 B = gammaln(0.5 * df)
4407 C = dim/2. * np.log(df * np.pi)
4408 D = 0.5 * log_pdet
4409 E = -t * np.log(1 + (1./df) * maha)
4411 return _squeeze_output(A - B - C - D + E)
4413 def rvs(self, loc=None, shape=1, df=1, size=1, random_state=None):
4414 """Draw random samples from a multivariate t-distribution.
4416 Parameters
4417 ----------
4418 %(_mvt_doc_default_callparams)s
4419 size : integer, optional
4420 Number of samples to draw (default 1).
4421 %(_doc_random_state)s
4423 Returns
4424 -------
4425 rvs : ndarray or scalar
4426 Random variates of size (`size`, `P`), where `P` is the
4427 dimension of the random variable.
4429 Examples
4430 --------
4431 >>> from scipy.stats import multivariate_t
4432 >>> x = [0.4, 5]
4433 >>> loc = [0, 1]
4434 >>> shape = [[1, 0.1], [0.1, 1]]
4435 >>> df = 7
4436 >>> multivariate_t.rvs(loc, shape, df)
4437 array([[0.93477495, 3.00408716]])
4439 """
4440 # For implementation details, see equation (3):
4441 #
4442 # Hofert, "On Sampling from the Multivariatet Distribution", 2013
4443 # http://rjournal.github.io/archive/2013-2/hofert.pdf
4444 #
4445 dim, loc, shape, df = self._process_parameters(loc, shape, df)
4446 if random_state is not None:
4447 rng = check_random_state(random_state)
4448 else:
4449 rng = self._random_state
4451 if np.isinf(df):
4452 x = np.ones(size)
4453 else:
4454 x = rng.chisquare(df, size=size) / df
4456 z = rng.multivariate_normal(np.zeros(dim), shape, size=size)
4457 samples = loc + z / np.sqrt(x)[..., None]
4458 return _squeeze_output(samples)
4460 def _process_quantiles(self, x, dim):
4461 """
4462 Adjust quantiles array so that last axis labels the components of
4463 each data point.
4464 """
4465 x = np.asarray(x, dtype=float)
4466 if x.ndim == 0:
4467 x = x[np.newaxis]
4468 elif x.ndim == 1:
4469 if dim == 1:
4470 x = x[:, np.newaxis]
4471 else:
4472 x = x[np.newaxis, :]
4473 return x
4475 def _process_parameters(self, loc, shape, df):
4476 """
4477 Infer dimensionality from location array and shape matrix, handle
4478 defaults, and ensure compatible dimensions.
4479 """
4480 if loc is None and shape is None:
4481 loc = np.asarray(0, dtype=float)
4482 shape = np.asarray(1, dtype=float)
4483 dim = 1
4484 elif loc is None:
4485 shape = np.asarray(shape, dtype=float)
4486 if shape.ndim < 2:
4487 dim = 1
4488 else:
4489 dim = shape.shape[0]
4490 loc = np.zeros(dim)
4491 elif shape is None:
4492 loc = np.asarray(loc, dtype=float)
4493 dim = loc.size
4494 shape = np.eye(dim)
4495 else:
4496 shape = np.asarray(shape, dtype=float)
4497 loc = np.asarray(loc, dtype=float)
4498 dim = loc.size
4500 if dim == 1:
4501 loc = loc.reshape(1)
4502 shape = shape.reshape(1, 1)
4504 if loc.ndim != 1 or loc.shape[0] != dim:
4505 raise ValueError("Array 'loc' must be a vector of length %d." %
4506 dim)
4507 if shape.ndim == 0:
4508 shape = shape * np.eye(dim)
4509 elif shape.ndim == 1:
4510 shape = np.diag(shape)
4511 elif shape.ndim == 2 and shape.shape != (dim, dim):
4512 rows, cols = shape.shape
4513 if rows != cols:
4514 msg = ("Array 'cov' must be square if it is two dimensional,"
4515 " but cov.shape = %s." % str(shape.shape))
4516 else:
4517 msg = ("Dimension mismatch: array 'cov' is of shape %s,"
4518 " but 'loc' is a vector of length %d.")
4519 msg = msg % (str(shape.shape), len(loc))
4520 raise ValueError(msg)
4521 elif shape.ndim > 2:
4522 raise ValueError("Array 'cov' must be at most two-dimensional,"
4523 " but cov.ndim = %d" % shape.ndim)
4525 # Process degrees of freedom.
4526 if df is None:
4527 df = 1
4528 elif df <= 0:
4529 raise ValueError("'df' must be greater than zero.")
4530 elif np.isnan(df):
4531 raise ValueError("'df' is 'nan' but must be greater than zero or 'np.inf'.")
4533 return dim, loc, shape, df
4536class multivariate_t_frozen(multi_rv_frozen):
4538 def __init__(self, loc=None, shape=1, df=1, allow_singular=False,
4539 seed=None):
4540 """Create a frozen multivariate t distribution.
4542 Parameters
4543 ----------
4544 %(_mvt_doc_default_callparams)s
4546 Examples
4547 --------
4548 >>> import numpy as np
4549 >>> loc = np.zeros(3)
4550 >>> shape = np.eye(3)
4551 >>> df = 10
4552 >>> dist = multivariate_t(loc, shape, df)
4553 >>> dist.rvs()
4554 array([[ 0.81412036, -1.53612361, 0.42199647]])
4555 >>> dist.pdf([1, 1, 1])
4556 array([0.01237803])
4558 """
4559 self._dist = multivariate_t_gen(seed)
4560 dim, loc, shape, df = self._dist._process_parameters(loc, shape, df)
4561 self.dim, self.loc, self.shape, self.df = dim, loc, shape, df
4562 self.shape_info = _PSD(shape, allow_singular=allow_singular)
4564 def logpdf(self, x):
4565 x = self._dist._process_quantiles(x, self.dim)
4566 U = self.shape_info.U
4567 log_pdet = self.shape_info.log_pdet
4568 return self._dist._logpdf(x, self.loc, U, log_pdet, self.df, self.dim,
4569 self.shape_info.rank)
4571 def pdf(self, x):
4572 return np.exp(self.logpdf(x))
4574 def rvs(self, size=1, random_state=None):
4575 return self._dist.rvs(loc=self.loc,
4576 shape=self.shape,
4577 df=self.df,
4578 size=size,
4579 random_state=random_state)
4582multivariate_t = multivariate_t_gen()
4585# Set frozen generator docstrings from corresponding docstrings in
4586# matrix_normal_gen and fill in default strings in class docstrings
4587for name in ['logpdf', 'pdf', 'rvs']:
4588 method = multivariate_t_gen.__dict__[name]
4589 method_frozen = multivariate_t_frozen.__dict__[name]
4590 method_frozen.__doc__ = doccer.docformat(method.__doc__,
4591 mvt_docdict_noparams)
4592 method.__doc__ = doccer.docformat(method.__doc__, mvt_docdict_params)
4595_mhg_doc_default_callparams = """\
4596m : array_like
4597 The number of each type of object in the population.
4598 That is, :math:`m[i]` is the number of objects of
4599 type :math:`i`.
4600n : array_like
4601 The number of samples taken from the population.
4602"""
4604_mhg_doc_callparams_note = """\
4605`m` must be an array of positive integers. If the quantile
4606:math:`i` contains values out of the range :math:`[0, m_i]`
4607where :math:`m_i` is the number of objects of type :math:`i`
4608in the population or if the parameters are inconsistent with one
4609another (e.g. ``x.sum() != n``), methods return the appropriate
4610value (e.g. ``0`` for ``pmf``). If `m` or `n` contain negative
4611values, the result will contain ``nan`` there.
4612"""
4614_mhg_doc_frozen_callparams = ""
4616_mhg_doc_frozen_callparams_note = """\
4617See class definition for a detailed description of parameters."""
4619mhg_docdict_params = {
4620 '_doc_default_callparams': _mhg_doc_default_callparams,
4621 '_doc_callparams_note': _mhg_doc_callparams_note,
4622 '_doc_random_state': _doc_random_state
4623}
4625mhg_docdict_noparams = {
4626 '_doc_default_callparams': _mhg_doc_frozen_callparams,
4627 '_doc_callparams_note': _mhg_doc_frozen_callparams_note,
4628 '_doc_random_state': _doc_random_state
4629}
4632class multivariate_hypergeom_gen(multi_rv_generic):
4633 r"""A multivariate hypergeometric random variable.
4635 Methods
4636 -------
4637 pmf(x, m, n)
4638 Probability mass function.
4639 logpmf(x, m, n)
4640 Log of the probability mass function.
4641 rvs(m, n, size=1, random_state=None)
4642 Draw random samples from a multivariate hypergeometric
4643 distribution.
4644 mean(m, n)
4645 Mean of the multivariate hypergeometric distribution.
4646 var(m, n)
4647 Variance of the multivariate hypergeometric distribution.
4648 cov(m, n)
4649 Compute the covariance matrix of the multivariate
4650 hypergeometric distribution.
4652 Parameters
4653 ----------
4654 %(_doc_default_callparams)s
4655 %(_doc_random_state)s
4657 Notes
4658 -----
4659 %(_doc_callparams_note)s
4661 The probability mass function for `multivariate_hypergeom` is
4663 .. math::
4665 P(X_1 = x_1, X_2 = x_2, \ldots, X_k = x_k) = \frac{\binom{m_1}{x_1}
4666 \binom{m_2}{x_2} \cdots \binom{m_k}{x_k}}{\binom{M}{n}}, \\ \quad
4667 (x_1, x_2, \ldots, x_k) \in \mathbb{N}^k \text{ with }
4668 \sum_{i=1}^k x_i = n
4670 where :math:`m_i` are the number of objects of type :math:`i`, :math:`M`
4671 is the total number of objects in the population (sum of all the
4672 :math:`m_i`), and :math:`n` is the size of the sample to be taken
4673 from the population.
4675 .. versionadded:: 1.6.0
4677 Examples
4678 --------
4679 To evaluate the probability mass function of the multivariate
4680 hypergeometric distribution, with a dichotomous population of size
4681 :math:`10` and :math:`20`, at a sample of size :math:`12` with
4682 :math:`8` objects of the first type and :math:`4` objects of the
4683 second type, use:
4685 >>> from scipy.stats import multivariate_hypergeom
4686 >>> multivariate_hypergeom.pmf(x=[8, 4], m=[10, 20], n=12)
4687 0.0025207176631464523
4689 The `multivariate_hypergeom` distribution is identical to the
4690 corresponding `hypergeom` distribution (tiny numerical differences
4691 notwithstanding) when only two types (good and bad) of objects
4692 are present in the population as in the example above. Consider
4693 another example for a comparison with the hypergeometric distribution:
4695 >>> from scipy.stats import hypergeom
4696 >>> multivariate_hypergeom.pmf(x=[3, 1], m=[10, 5], n=4)
4697 0.4395604395604395
4698 >>> hypergeom.pmf(k=3, M=15, n=4, N=10)
4699 0.43956043956044005
4701 The functions ``pmf``, ``logpmf``, ``mean``, ``var``, ``cov``, and ``rvs``
4702 support broadcasting, under the convention that the vector parameters
4703 (``x``, ``m``, and ``n``) are interpreted as if each row along the last
4704 axis is a single object. For instance, we can combine the previous two
4705 calls to `multivariate_hypergeom` as
4707 >>> multivariate_hypergeom.pmf(x=[[8, 4], [3, 1]], m=[[10, 20], [10, 5]],
4708 ... n=[12, 4])
4709 array([0.00252072, 0.43956044])
4711 This broadcasting also works for ``cov``, where the output objects are
4712 square matrices of size ``m.shape[-1]``. For example:
4714 >>> multivariate_hypergeom.cov(m=[[7, 9], [10, 15]], n=[8, 12])
4715 array([[[ 1.05, -1.05],
4716 [-1.05, 1.05]],
4717 [[ 1.56, -1.56],
4718 [-1.56, 1.56]]])
4720 That is, ``result[0]`` is equal to
4721 ``multivariate_hypergeom.cov(m=[7, 9], n=8)`` and ``result[1]`` is equal
4722 to ``multivariate_hypergeom.cov(m=[10, 15], n=12)``.
4724 Alternatively, the object may be called (as a function) to fix the `m`
4725 and `n` parameters, returning a "frozen" multivariate hypergeometric
4726 random variable.
4728 >>> rv = multivariate_hypergeom(m=[10, 20], n=12)
4729 >>> rv.pmf(x=[8, 4])
4730 0.0025207176631464523
4732 See Also
4733 --------
4734 scipy.stats.hypergeom : The hypergeometric distribution.
4735 scipy.stats.multinomial : The multinomial distribution.
4737 References
4738 ----------
4739 .. [1] The Multivariate Hypergeometric Distribution,
4740 http://www.randomservices.org/random/urn/MultiHypergeometric.html
4741 .. [2] Thomas J. Sargent and John Stachurski, 2020,
4742 Multivariate Hypergeometric Distribution
4743 https://python.quantecon.org/_downloads/pdf/multi_hyper.pdf
4744 """
4745 def __init__(self, seed=None):
4746 super().__init__(seed)
4747 self.__doc__ = doccer.docformat(self.__doc__, mhg_docdict_params)
4749 def __call__(self, m, n, seed=None):
4750 """Create a frozen multivariate_hypergeom distribution.
4752 See `multivariate_hypergeom_frozen` for more information.
4753 """
4754 return multivariate_hypergeom_frozen(m, n, seed=seed)
4756 def _process_parameters(self, m, n):
4757 m = np.asarray(m)
4758 n = np.asarray(n)
4759 if m.size == 0:
4760 m = m.astype(int)
4761 if n.size == 0:
4762 n = n.astype(int)
4763 if not np.issubdtype(m.dtype, np.integer):
4764 raise TypeError("'m' must an array of integers.")
4765 if not np.issubdtype(n.dtype, np.integer):
4766 raise TypeError("'n' must an array of integers.")
4767 if m.ndim == 0:
4768 raise ValueError("'m' must be an array with"
4769 " at least one dimension.")
4771 # check for empty arrays
4772 if m.size != 0:
4773 n = n[..., np.newaxis]
4775 m, n = np.broadcast_arrays(m, n)
4777 # check for empty arrays
4778 if m.size != 0:
4779 n = n[..., 0]
4781 mcond = m < 0
4783 M = m.sum(axis=-1)
4785 ncond = (n < 0) | (n > M)
4786 return M, m, n, mcond, ncond, np.any(mcond, axis=-1) | ncond
4788 def _process_quantiles(self, x, M, m, n):
4789 x = np.asarray(x)
4790 if not np.issubdtype(x.dtype, np.integer):
4791 raise TypeError("'x' must an array of integers.")
4792 if x.ndim == 0:
4793 raise ValueError("'x' must be an array with"
4794 " at least one dimension.")
4795 if not x.shape[-1] == m.shape[-1]:
4796 raise ValueError(f"Size of each quantile must be size of 'm': "
4797 f"received {x.shape[-1]}, "
4798 f"but expected {m.shape[-1]}.")
4800 # check for empty arrays
4801 if m.size != 0:
4802 n = n[..., np.newaxis]
4803 M = M[..., np.newaxis]
4805 x, m, n, M = np.broadcast_arrays(x, m, n, M)
4807 # check for empty arrays
4808 if m.size != 0:
4809 n, M = n[..., 0], M[..., 0]
4811 xcond = (x < 0) | (x > m)
4812 return (x, M, m, n, xcond,
4813 np.any(xcond, axis=-1) | (x.sum(axis=-1) != n))
4815 def _checkresult(self, result, cond, bad_value):
4816 result = np.asarray(result)
4817 if cond.ndim != 0:
4818 result[cond] = bad_value
4819 elif cond:
4820 return bad_value
4821 if result.ndim == 0:
4822 return result[()]
4823 return result
4825 def _logpmf(self, x, M, m, n, mxcond, ncond):
4826 # This equation of the pmf comes from the relation,
4827 # n combine r = beta(n+1, 1) / beta(r+1, n-r+1)
4828 num = np.zeros_like(m, dtype=np.float_)
4829 den = np.zeros_like(n, dtype=np.float_)
4830 m, x = m[~mxcond], x[~mxcond]
4831 M, n = M[~ncond], n[~ncond]
4832 num[~mxcond] = (betaln(m+1, 1) - betaln(x+1, m-x+1))
4833 den[~ncond] = (betaln(M+1, 1) - betaln(n+1, M-n+1))
4834 num[mxcond] = np.nan
4835 den[ncond] = np.nan
4836 num = num.sum(axis=-1)
4837 return num - den
4839 def logpmf(self, x, m, n):
4840 """Log of the multivariate hypergeometric probability mass function.
4842 Parameters
4843 ----------
4844 x : array_like
4845 Quantiles, with the last axis of `x` denoting the components.
4846 %(_doc_default_callparams)s
4848 Returns
4849 -------
4850 logpmf : ndarray or scalar
4851 Log of the probability mass function evaluated at `x`
4853 Notes
4854 -----
4855 %(_doc_callparams_note)s
4856 """
4857 M, m, n, mcond, ncond, mncond = self._process_parameters(m, n)
4858 (x, M, m, n, xcond,
4859 xcond_reduced) = self._process_quantiles(x, M, m, n)
4860 mxcond = mcond | xcond
4861 ncond = ncond | np.zeros(n.shape, dtype=np.bool_)
4863 result = self._logpmf(x, M, m, n, mxcond, ncond)
4865 # replace values for which x was out of the domain; broadcast
4866 # xcond to the right shape
4867 xcond_ = xcond_reduced | np.zeros(mncond.shape, dtype=np.bool_)
4868 result = self._checkresult(result, xcond_, np.NINF)
4870 # replace values bad for n or m; broadcast
4871 # mncond to the right shape
4872 mncond_ = mncond | np.zeros(xcond_reduced.shape, dtype=np.bool_)
4873 return self._checkresult(result, mncond_, np.nan)
4875 def pmf(self, x, m, n):
4876 """Multivariate hypergeometric probability mass function.
4878 Parameters
4879 ----------
4880 x : array_like
4881 Quantiles, with the last axis of `x` denoting the components.
4882 %(_doc_default_callparams)s
4884 Returns
4885 -------
4886 pmf : ndarray or scalar
4887 Probability density function evaluated at `x`
4889 Notes
4890 -----
4891 %(_doc_callparams_note)s
4892 """
4893 out = np.exp(self.logpmf(x, m, n))
4894 return out
4896 def mean(self, m, n):
4897 """Mean of the multivariate hypergeometric distribution.
4899 Parameters
4900 ----------
4901 %(_doc_default_callparams)s
4903 Returns
4904 -------
4905 mean : array_like or scalar
4906 The mean of the distribution
4907 """
4908 M, m, n, _, _, mncond = self._process_parameters(m, n)
4909 # check for empty arrays
4910 if m.size != 0:
4911 M, n = M[..., np.newaxis], n[..., np.newaxis]
4912 cond = (M == 0)
4913 M = np.ma.masked_array(M, mask=cond)
4914 mu = n*(m/M)
4915 if m.size != 0:
4916 mncond = (mncond[..., np.newaxis] |
4917 np.zeros(mu.shape, dtype=np.bool_))
4918 return self._checkresult(mu, mncond, np.nan)
4920 def var(self, m, n):
4921 """Variance of the multivariate hypergeometric distribution.
4923 Parameters
4924 ----------
4925 %(_doc_default_callparams)s
4927 Returns
4928 -------
4929 array_like
4930 The variances of the components of the distribution. This is
4931 the diagonal of the covariance matrix of the distribution
4932 """
4933 M, m, n, _, _, mncond = self._process_parameters(m, n)
4934 # check for empty arrays
4935 if m.size != 0:
4936 M, n = M[..., np.newaxis], n[..., np.newaxis]
4937 cond = (M == 0) & (M-1 == 0)
4938 M = np.ma.masked_array(M, mask=cond)
4939 output = n * m/M * (M-m)/M * (M-n)/(M-1)
4940 if m.size != 0:
4941 mncond = (mncond[..., np.newaxis] |
4942 np.zeros(output.shape, dtype=np.bool_))
4943 return self._checkresult(output, mncond, np.nan)
4945 def cov(self, m, n):
4946 """Covariance matrix of the multivariate hypergeometric distribution.
4948 Parameters
4949 ----------
4950 %(_doc_default_callparams)s
4952 Returns
4953 -------
4954 cov : array_like
4955 The covariance matrix of the distribution
4956 """
4957 # see [1]_ for the formula and [2]_ for implementation
4958 # cov( x_i,x_j ) = -n * (M-n)/(M-1) * (K_i*K_j) / (M**2)
4959 M, m, n, _, _, mncond = self._process_parameters(m, n)
4960 # check for empty arrays
4961 if m.size != 0:
4962 M = M[..., np.newaxis, np.newaxis]
4963 n = n[..., np.newaxis, np.newaxis]
4964 cond = (M == 0) & (M-1 == 0)
4965 M = np.ma.masked_array(M, mask=cond)
4966 output = (-n * (M-n)/(M-1) *
4967 np.einsum("...i,...j->...ij", m, m) / (M**2))
4968 # check for empty arrays
4969 if m.size != 0:
4970 M, n = M[..., 0, 0], n[..., 0, 0]
4971 cond = cond[..., 0, 0]
4972 dim = m.shape[-1]
4973 # diagonal entries need to be computed differently
4974 for i in range(dim):
4975 output[..., i, i] = (n * (M-n) * m[..., i]*(M-m[..., i]))
4976 output[..., i, i] = output[..., i, i] / (M-1)
4977 output[..., i, i] = output[..., i, i] / (M**2)
4978 if m.size != 0:
4979 mncond = (mncond[..., np.newaxis, np.newaxis] |
4980 np.zeros(output.shape, dtype=np.bool_))
4981 return self._checkresult(output, mncond, np.nan)
4983 def rvs(self, m, n, size=None, random_state=None):
4984 """Draw random samples from a multivariate hypergeometric distribution.
4986 Parameters
4987 ----------
4988 %(_doc_default_callparams)s
4989 size : integer or iterable of integers, optional
4990 Number of samples to draw. Default is ``None``, in which case a
4991 single variate is returned as an array with shape ``m.shape``.
4992 %(_doc_random_state)s
4994 Returns
4995 -------
4996 rvs : array_like
4997 Random variates of shape ``size`` or ``m.shape``
4998 (if ``size=None``).
5000 Notes
5001 -----
5002 %(_doc_callparams_note)s
5004 Also note that NumPy's `multivariate_hypergeometric` sampler is not
5005 used as it doesn't support broadcasting.
5006 """
5007 M, m, n, _, _, _ = self._process_parameters(m, n)
5009 random_state = self._get_random_state(random_state)
5011 if size is not None and isinstance(size, int):
5012 size = (size, )
5014 if size is None:
5015 rvs = np.empty(m.shape, dtype=m.dtype)
5016 else:
5017 rvs = np.empty(size + (m.shape[-1], ), dtype=m.dtype)
5018 rem = M
5020 # This sampler has been taken from numpy gh-13794
5021 # https://github.com/numpy/numpy/pull/13794
5022 for c in range(m.shape[-1] - 1):
5023 rem = rem - m[..., c]
5024 n0mask = n == 0
5025 rvs[..., c] = (~n0mask *
5026 random_state.hypergeometric(m[..., c],
5027 rem + n0mask,
5028 n + n0mask,
5029 size=size))
5030 n = n - rvs[..., c]
5031 rvs[..., m.shape[-1] - 1] = n
5033 return rvs
5036multivariate_hypergeom = multivariate_hypergeom_gen()
5039class multivariate_hypergeom_frozen(multi_rv_frozen):
5040 def __init__(self, m, n, seed=None):
5041 self._dist = multivariate_hypergeom_gen(seed)
5042 (self.M, self.m, self.n,
5043 self.mcond, self.ncond,
5044 self.mncond) = self._dist._process_parameters(m, n)
5046 # monkey patch self._dist
5047 def _process_parameters(m, n):
5048 return (self.M, self.m, self.n,
5049 self.mcond, self.ncond,
5050 self.mncond)
5051 self._dist._process_parameters = _process_parameters
5053 def logpmf(self, x):
5054 return self._dist.logpmf(x, self.m, self.n)
5056 def pmf(self, x):
5057 return self._dist.pmf(x, self.m, self.n)
5059 def mean(self):
5060 return self._dist.mean(self.m, self.n)
5062 def var(self):
5063 return self._dist.var(self.m, self.n)
5065 def cov(self):
5066 return self._dist.cov(self.m, self.n)
5068 def rvs(self, size=1, random_state=None):
5069 return self._dist.rvs(self.m, self.n,
5070 size=size,
5071 random_state=random_state)
5074# Set frozen generator docstrings from corresponding docstrings in
5075# multivariate_hypergeom and fill in default strings in class docstrings
5076for name in ['logpmf', 'pmf', 'mean', 'var', 'cov', 'rvs']:
5077 method = multivariate_hypergeom_gen.__dict__[name]
5078 method_frozen = multivariate_hypergeom_frozen.__dict__[name]
5079 method_frozen.__doc__ = doccer.docformat(
5080 method.__doc__, mhg_docdict_noparams)
5081 method.__doc__ = doccer.docformat(method.__doc__,
5082 mhg_docdict_params)
5085class random_table_gen(multi_rv_generic):
5086 r"""Contingency tables from independent samples with fixed marginal sums.
5088 This is the distribution of random tables with given row and column vector
5089 sums. This distribution represents the set of random tables under the null
5090 hypothesis that rows and columns are independent. It is used in hypothesis
5091 tests of independence.
5093 Because of assumed independence, the expected frequency of each table
5094 element can be computed from the row and column sums, so that the
5095 distribution is completely determined by these two vectors.
5097 Methods
5098 -------
5099 logpmf(x)
5100 Log-probability of table `x` to occur in the distribution.
5101 pmf(x)
5102 Probability of table `x` to occur in the distribution.
5103 mean(row, col)
5104 Mean table.
5105 rvs(row, col, size=None, method=None, random_state=None)
5106 Draw random tables with given row and column vector sums.
5108 Parameters
5109 ----------
5110 %(_doc_row_col)s
5111 %(_doc_random_state)s
5113 Notes
5114 -----
5115 %(_doc_row_col_note)s
5117 Random elements from the distribution are generated either with Boyett's
5118 [1]_ or Patefield's algorithm [2]_. Boyett's algorithm has
5119 O(N) time and space complexity, where N is the total sum of entries in the
5120 table. Patefield's algorithm has O(K x log(N)) time complexity, where K is
5121 the number of cells in the table and requires only a small constant work
5122 space. By default, the `rvs` method selects the fastest algorithm based on
5123 the input, but you can specify the algorithm with the keyword `method`.
5124 Allowed values are "boyett" and "patefield".
5126 .. versionadded:: 1.10.0
5128 Examples
5129 --------
5130 >>> from scipy.stats import random_table
5132 >>> row = [1, 5]
5133 >>> col = [2, 3, 1]
5134 >>> random_table.mean(row, col)
5135 array([[0.33333333, 0.5 , 0.16666667],
5136 [1.66666667, 2.5 , 0.83333333]])
5138 Alternatively, the object may be called (as a function) to fix the row
5139 and column vector sums, returning a "frozen" distribution.
5141 >>> dist = random_table(row, col)
5142 >>> dist.rvs(random_state=123)
5143 array([[1., 0., 0.],
5144 [1., 3., 1.]])
5146 References
5147 ----------
5148 .. [1] J. Boyett, AS 144 Appl. Statist. 28 (1979) 329-332
5149 .. [2] W.M. Patefield, AS 159 Appl. Statist. 30 (1981) 91-97
5150 """
5152 def __init__(self, seed=None):
5153 super().__init__(seed)
5155 def __call__(self, row, col, *, seed=None):
5156 """Create a frozen distribution of tables with given marginals.
5158 See `random_table_frozen` for more information.
5159 """
5160 return random_table_frozen(row, col, seed=seed)
5162 def logpmf(self, x, row, col):
5163 """Log-probability of table to occur in the distribution.
5165 Parameters
5166 ----------
5167 %(_doc_x)s
5168 %(_doc_row_col)s
5170 Returns
5171 -------
5172 logpmf : ndarray or scalar
5173 Log of the probability mass function evaluated at `x`.
5175 Notes
5176 -----
5177 %(_doc_row_col_note)s
5179 If row and column marginals of `x` do not match `row` and `col`,
5180 negative infinity is returned.
5182 Examples
5183 --------
5184 >>> from scipy.stats import random_table
5185 >>> import numpy as np
5187 >>> x = [[1, 5, 1], [2, 3, 1]]
5188 >>> row = np.sum(x, axis=1)
5189 >>> col = np.sum(x, axis=0)
5190 >>> random_table.logpmf(x, row, col)
5191 -1.6306401200847027
5193 Alternatively, the object may be called (as a function) to fix the row
5194 and column vector sums, returning a "frozen" distribution.
5196 >>> d = random_table(row, col)
5197 >>> d.logpmf(x)
5198 -1.6306401200847027
5199 """
5200 r, c, n = self._process_parameters(row, col)
5201 x = np.asarray(x)
5203 if x.ndim < 2:
5204 raise ValueError("`x` must be at least two-dimensional")
5206 dtype_is_int = np.issubdtype(x.dtype, np.integer)
5207 with np.errstate(invalid='ignore'):
5208 if not dtype_is_int and not np.all(x.astype(int) == x):
5209 raise ValueError("`x` must contain only integral values")
5211 # x does not contain NaN if we arrive here
5212 if np.any(x < 0):
5213 raise ValueError("`x` must contain only non-negative values")
5215 r2 = np.sum(x, axis=-1)
5216 c2 = np.sum(x, axis=-2)
5218 if r2.shape[-1] != len(r):
5219 raise ValueError("shape of `x` must agree with `row`")
5221 if c2.shape[-1] != len(c):
5222 raise ValueError("shape of `x` must agree with `col`")
5224 res = np.empty(x.shape[:-2])
5226 mask = np.all(r2 == r, axis=-1) & np.all(c2 == c, axis=-1)
5228 def lnfac(x):
5229 return gammaln(x + 1)
5231 res[mask] = (np.sum(lnfac(r), axis=-1) + np.sum(lnfac(c), axis=-1)
5232 - lnfac(n) - np.sum(lnfac(x[mask]), axis=(-1, -2)))
5233 res[~mask] = -np.inf
5235 return res[()]
5237 def pmf(self, x, row, col):
5238 """Probability of table to occur in the distribution.
5240 Parameters
5241 ----------
5242 %(_doc_x)s
5243 %(_doc_row_col)s
5245 Returns
5246 -------
5247 pmf : ndarray or scalar
5248 Probability mass function evaluated at `x`.
5250 Notes
5251 -----
5252 %(_doc_row_col_note)s
5254 If row and column marginals of `x` do not match `row` and `col`,
5255 zero is returned.
5257 Examples
5258 --------
5259 >>> from scipy.stats import random_table
5260 >>> import numpy as np
5262 >>> x = [[1, 5, 1], [2, 3, 1]]
5263 >>> row = np.sum(x, axis=1)
5264 >>> col = np.sum(x, axis=0)
5265 >>> random_table.pmf(x, row, col)
5266 0.19580419580419592
5268 Alternatively, the object may be called (as a function) to fix the row
5269 and column vector sums, returning a "frozen" distribution.
5271 >>> d = random_table(row, col)
5272 >>> d.pmf(x)
5273 0.19580419580419592
5274 """
5275 return np.exp(self.logpmf(x, row, col))
5277 def mean(self, row, col):
5278 """Mean of distribution of conditional tables.
5279 %(_doc_mean_params)s
5281 Returns
5282 -------
5283 mean: ndarray
5284 Mean of the distribution.
5286 Notes
5287 -----
5288 %(_doc_row_col_note)s
5290 Examples
5291 --------
5292 >>> from scipy.stats import random_table
5294 >>> row = [1, 5]
5295 >>> col = [2, 3, 1]
5296 >>> random_table.mean(row, col)
5297 array([[0.33333333, 0.5 , 0.16666667],
5298 [1.66666667, 2.5 , 0.83333333]])
5300 Alternatively, the object may be called (as a function) to fix the row
5301 and column vector sums, returning a "frozen" distribution.
5303 >>> d = random_table(row, col)
5304 >>> d.mean()
5305 array([[0.33333333, 0.5 , 0.16666667],
5306 [1.66666667, 2.5 , 0.83333333]])
5307 """
5308 r, c, n = self._process_parameters(row, col)
5309 return np.outer(r, c) / n
5311 def rvs(self, row, col, *, size=None, method=None, random_state=None):
5312 """Draw random tables with fixed column and row marginals.
5314 Parameters
5315 ----------
5316 %(_doc_row_col)s
5317 size : integer, optional
5318 Number of samples to draw (default 1).
5319 method : str, optional
5320 Which method to use, "boyett" or "patefield". If None (default),
5321 selects the fastest method for this input.
5322 %(_doc_random_state)s
5324 Returns
5325 -------
5326 rvs : ndarray
5327 Random 2D tables of shape (`size`, `len(row)`, `len(col)`).
5329 Notes
5330 -----
5331 %(_doc_row_col_note)s
5333 Examples
5334 --------
5335 >>> from scipy.stats import random_table
5337 >>> row = [1, 5]
5338 >>> col = [2, 3, 1]
5339 >>> random_table.rvs(row, col, random_state=123)
5340 array([[1., 0., 0.],
5341 [1., 3., 1.]])
5343 Alternatively, the object may be called (as a function) to fix the row
5344 and column vector sums, returning a "frozen" distribution.
5346 >>> d = random_table(row, col)
5347 >>> d.rvs(random_state=123)
5348 array([[1., 0., 0.],
5349 [1., 3., 1.]])
5350 """
5351 r, c, n = self._process_parameters(row, col)
5352 size, shape = self._process_size_shape(size, r, c)
5354 random_state = self._get_random_state(random_state)
5355 meth = self._process_rvs_method(method, r, c, n)
5357 return meth(r, c, n, size, random_state).reshape(shape)
5359 @staticmethod
5360 def _process_parameters(row, col):
5361 """
5362 Check that row and column vectors are one-dimensional, that they do
5363 not contain negative or non-integer entries, and that the sums over
5364 both vectors are equal.
5365 """
5366 r = np.array(row, dtype=np.int64, copy=True)
5367 c = np.array(col, dtype=np.int64, copy=True)
5369 if np.ndim(r) != 1:
5370 raise ValueError("`row` must be one-dimensional")
5371 if np.ndim(c) != 1:
5372 raise ValueError("`col` must be one-dimensional")
5374 if np.any(r < 0):
5375 raise ValueError("each element of `row` must be non-negative")
5376 if np.any(c < 0):
5377 raise ValueError("each element of `col` must be non-negative")
5379 n = np.sum(r)
5380 if n != np.sum(c):
5381 raise ValueError("sums over `row` and `col` must be equal")
5383 if not np.all(r == np.asarray(row)):
5384 raise ValueError("each element of `row` must be an integer")
5385 if not np.all(c == np.asarray(col)):
5386 raise ValueError("each element of `col` must be an integer")
5388 return r, c, n
5390 @staticmethod
5391 def _process_size_shape(size, r, c):
5392 """
5393 Compute the number of samples to be drawn and the shape of the output
5394 """
5395 shape = (len(r), len(c))
5397 if size is None:
5398 return 1, shape
5400 size = np.atleast_1d(size)
5401 if not np.issubdtype(size.dtype, np.integer) or np.any(size < 0):
5402 raise ValueError("`size` must be a non-negative integer or `None`")
5404 return np.prod(size), tuple(size) + shape
5406 @classmethod
5407 def _process_rvs_method(cls, method, r, c, n):
5408 known_methods = {
5409 None: cls._rvs_select(r, c, n),
5410 "boyett": cls._rvs_boyett,
5411 "patefield": cls._rvs_patefield,
5412 }
5413 try:
5414 return known_methods[method]
5415 except KeyError:
5416 raise ValueError(f"'{method}' not recognized, "
5417 f"must be one of {set(known_methods)}")
5419 @classmethod
5420 def _rvs_select(cls, r, c, n):
5421 fac = 1.0 # benchmarks show that this value is about 1
5422 k = len(r) * len(c) # number of cells
5423 # n + 1 guards against failure if n == 0
5424 if n > fac * np.log(n + 1) * k:
5425 return cls._rvs_patefield
5426 return cls._rvs_boyett
5428 @staticmethod
5429 def _rvs_boyett(row, col, ntot, size, random_state):
5430 return _rcont.rvs_rcont1(row, col, ntot, size, random_state)
5432 @staticmethod
5433 def _rvs_patefield(row, col, ntot, size, random_state):
5434 return _rcont.rvs_rcont2(row, col, ntot, size, random_state)
5437random_table = random_table_gen()
5440class random_table_frozen(multi_rv_frozen):
5441 def __init__(self, row, col, *, seed=None):
5442 self._dist = random_table_gen(seed)
5443 self._params = self._dist._process_parameters(row, col)
5445 # monkey patch self._dist
5446 def _process_parameters(r, c):
5447 return self._params
5448 self._dist._process_parameters = _process_parameters
5450 def logpmf(self, x):
5451 return self._dist.logpmf(x, None, None)
5453 def pmf(self, x):
5454 return self._dist.pmf(x, None, None)
5456 def mean(self):
5457 return self._dist.mean(None, None)
5459 def rvs(self, size=None, method=None, random_state=None):
5460 # optimisations are possible here
5461 return self._dist.rvs(None, None, size=size, method=method,
5462 random_state=random_state)
5465_ctab_doc_row_col = """\
5466row : array_like
5467 Sum of table entries in each row.
5468col : array_like
5469 Sum of table entries in each column."""
5471_ctab_doc_x = """\
5472x : array-like
5473 Two-dimensional table of non-negative integers, or a
5474 multi-dimensional array with the last two dimensions
5475 corresponding with the tables."""
5477_ctab_doc_row_col_note = """\
5478The row and column vectors must be one-dimensional, not empty,
5479and each sum up to the same value. They cannot contain negative
5480or noninteger entries."""
5482_ctab_doc_mean_params = f"""
5483Parameters
5484----------
5485{_ctab_doc_row_col}"""
5487_ctab_doc_row_col_note_frozen = """\
5488See class definition for a detailed description of parameters."""
5490_ctab_docdict = {
5491 "_doc_random_state": _doc_random_state,
5492 "_doc_row_col": _ctab_doc_row_col,
5493 "_doc_x": _ctab_doc_x,
5494 "_doc_mean_params": _ctab_doc_mean_params,
5495 "_doc_row_col_note": _ctab_doc_row_col_note,
5496}
5498_ctab_docdict_frozen = _ctab_docdict.copy()
5499_ctab_docdict_frozen.update({
5500 "_doc_row_col": "",
5501 "_doc_mean_params": "",
5502 "_doc_row_col_note": _ctab_doc_row_col_note_frozen,
5503})
5506def _docfill(obj, docdict, template=None):
5507 obj.__doc__ = doccer.docformat(template or obj.__doc__, docdict)
5510# Set frozen generator docstrings from corresponding docstrings in
5511# random_table and fill in default strings in class docstrings
5512_docfill(random_table_gen, _ctab_docdict)
5513for name in ['logpmf', 'pmf', 'mean', 'rvs']:
5514 method = random_table_gen.__dict__[name]
5515 method_frozen = random_table_frozen.__dict__[name]
5516 _docfill(method_frozen, _ctab_docdict_frozen, method.__doc__)
5517 _docfill(method, _ctab_docdict)
5520class uniform_direction_gen(multi_rv_generic):
5521 r"""A vector-valued uniform direction.
5523 Return a random direction (unit vector). The `dim` keyword specifies
5524 the dimensionality of the space.
5526 Methods
5527 -------
5528 rvs(dim=None, size=1, random_state=None)
5529 Draw random directions.
5531 Parameters
5532 ----------
5533 dim : scalar
5534 Dimension of directions.
5535 seed : {None, int, `numpy.random.Generator`,
5536 `numpy.random.RandomState`}, optional
5538 Used for drawing random variates.
5539 If `seed` is `None`, the `~np.random.RandomState` singleton is used.
5540 If `seed` is an int, a new ``RandomState`` instance is used, seeded
5541 with seed.
5542 If `seed` is already a ``RandomState`` or ``Generator`` instance,
5543 then that object is used.
5544 Default is `None`.
5546 Notes
5547 -----
5548 This distribution generates unit vectors uniformly distributed on
5549 the surface of a hypersphere. These can be interpreted as random
5550 directions.
5551 For example, if `dim` is 3, 3D vectors from the surface of :math:`S^2`
5552 will be sampled.
5554 References
5555 ----------
5556 .. [1] Marsaglia, G. (1972). "Choosing a Point from the Surface of a
5557 Sphere". Annals of Mathematical Statistics. 43 (2): 645-646.
5559 Examples
5560 --------
5561 >>> import numpy as np
5562 >>> from scipy.stats import uniform_direction
5563 >>> x = uniform_direction.rvs(3)
5564 >>> np.linalg.norm(x)
5565 1.
5567 This generates one random direction, a vector on the surface of
5568 :math:`S^2`.
5570 Alternatively, the object may be called (as a function) to return a frozen
5571 distribution with fixed `dim` parameter. Here,
5572 we create a `uniform_direction` with ``dim=3`` and draw 5 observations.
5573 The samples are then arranged in an array of shape 5x3.
5575 >>> rng = np.random.default_rng()
5576 >>> uniform_sphere_dist = uniform_direction(3)
5577 >>> unit_vectors = uniform_sphere_dist.rvs(5, random_state=rng)
5578 >>> unit_vectors
5579 array([[ 0.56688642, -0.1332634 , -0.81294566],
5580 [-0.427126 , -0.74779278, 0.50830044],
5581 [ 0.3793989 , 0.92346629, 0.05715323],
5582 [ 0.36428383, -0.92449076, -0.11231259],
5583 [-0.27733285, 0.94410968, -0.17816678]])
5584 """
5586 def __init__(self, seed=None):
5587 super().__init__(seed)
5588 self.__doc__ = doccer.docformat(self.__doc__)
5590 def __call__(self, dim=None, seed=None):
5591 """Create a frozen n-dimensional uniform direction distribution.
5593 See `uniform_direction` for more information.
5594 """
5595 return uniform_direction_frozen(dim, seed=seed)
5597 def _process_parameters(self, dim):
5598 """Dimension N must be specified; it cannot be inferred."""
5599 if dim is None or not np.isscalar(dim) or dim < 1 or dim != int(dim):
5600 raise ValueError("Dimension of vector must be specified, "
5601 "and must be an integer greater than 0.")
5603 return int(dim)
5605 def rvs(self, dim, size=None, random_state=None):
5606 """Draw random samples from S(N-1).
5608 Parameters
5609 ----------
5610 dim : integer
5611 Dimension of space (N).
5612 size : int or tuple of ints, optional
5613 Given a shape of, for example, (m,n,k), m*n*k samples are
5614 generated, and packed in an m-by-n-by-k arrangement.
5615 Because each sample is N-dimensional, the output shape
5616 is (m,n,k,N). If no shape is specified, a single (N-D)
5617 sample is returned.
5618 random_state : {None, int, `numpy.random.Generator`,
5619 `numpy.random.RandomState`}, optional
5621 Pseudorandom number generator state used to generate resamples.
5623 If `random_state` is ``None`` (or `np.random`), the
5624 `numpy.random.RandomState` singleton is used.
5625 If `random_state` is an int, a new ``RandomState`` instance is
5626 used, seeded with `random_state`.
5627 If `random_state` is already a ``Generator`` or ``RandomState``
5628 instance then that instance is used.
5630 Returns
5631 -------
5632 rvs : ndarray
5633 Random direction vectors
5635 """
5636 random_state = self._get_random_state(random_state)
5637 if size is None:
5638 size = np.array([], dtype=int)
5639 size = np.atleast_1d(size)
5641 dim = self._process_parameters(dim)
5643 samples = _sample_uniform_direction(dim, size, random_state)
5644 return samples
5647uniform_direction = uniform_direction_gen()
5650class uniform_direction_frozen(multi_rv_frozen):
5651 def __init__(self, dim=None, seed=None):
5652 """Create a frozen n-dimensional uniform direction distribution.
5654 Parameters
5655 ----------
5656 dim : int
5657 Dimension of matrices
5658 seed : {None, int, `numpy.random.Generator`,
5659 `numpy.random.RandomState`}, optional
5661 If `seed` is None (or `np.random`), the `numpy.random.RandomState`
5662 singleton is used.
5663 If `seed` is an int, a new ``RandomState`` instance is used,
5664 seeded with `seed`.
5665 If `seed` is already a ``Generator`` or ``RandomState`` instance
5666 then that instance is used.
5668 Examples
5669 --------
5670 >>> from scipy.stats import uniform_direction
5671 >>> x = uniform_direction(3)
5672 >>> x.rvs()
5674 """
5675 self._dist = uniform_direction_gen(seed)
5676 self.dim = self._dist._process_parameters(dim)
5678 def rvs(self, size=None, random_state=None):
5679 return self._dist.rvs(self.dim, size, random_state)
5682def _sample_uniform_direction(dim, size, random_state):
5683 """
5684 Private method to generate uniform directions
5685 Reference: Marsaglia, G. (1972). "Choosing a Point from the Surface of a
5686 Sphere". Annals of Mathematical Statistics. 43 (2): 645-646.
5687 """
5688 samples_shape = np.append(size, dim)
5689 samples = random_state.standard_normal(samples_shape)
5690 samples /= np.linalg.norm(samples, axis=-1, keepdims=True)
5691 return samples