Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_distn

2# Author: Travis Oliphant 2002-2011 with contributions from

3# SciPy Developers 2004-2011

5from scipy._lib._util import getfullargspec_no_self as _getfullargspec

7import sys

8import keyword

9import re

10import types

11import warnings

12from itertools import zip_longest

14from scipy._lib import doccer

15from ._distr_params import distcont, distdiscrete

16from scipy._lib._util import check_random_state

18from scipy.special import comb, entr

20# for root finding for continuous distribution ppf, and max likelihood

21# estimation

22from scipy import optimize

24# for functions of continuous distributions (e.g. moments, entropy, cdf)

25from scipy import integrate

27# to approximate the pdf of a continuous distribution given its cdf

28from scipy._lib._finite_differences import _derivative

30# for scipy.stats.entropy. Attempts to import just that function or file

31# have cause import problems

32from scipy import stats

34from numpy import (arange, putmask, ravel, ones, shape, ndarray, zeros, floor,

35 logical_and, log, sqrt, place, argmax, vectorize, asarray,

36 nan, inf, isinf, NINF, empty)

38import numpy as np

39from ._constants import _XMAX

40from scipy.stats._warnings_errors import FitError

42# These are the docstring parts used for substitution in specific

43# distribution docstrings

45docheaders = {'methods': """\nMethods\n-------\n""",

46 'notes': """\nNotes\n-----\n""",

47 'examples': """\nExamples\n--------\n"""}

49_doc_rvs = """\

50rvs(%(shapes)s, loc=0, scale=1, size=1, random_state=None)

51 Random variates.

52"""

53_doc_pdf = """\

54pdf(x, %(shapes)s, loc=0, scale=1)

55 Probability density function.

56"""

57_doc_logpdf = """\

58logpdf(x, %(shapes)s, loc=0, scale=1)

59 Log of the probability density function.

60"""

61_doc_pmf = """\

62pmf(k, %(shapes)s, loc=0, scale=1)

63 Probability mass function.

64"""

65_doc_logpmf = """\

66logpmf(k, %(shapes)s, loc=0, scale=1)

67 Log of the probability mass function.

68"""

69_doc_cdf = """\

70cdf(x, %(shapes)s, loc=0, scale=1)

71 Cumulative distribution function.

72"""

73_doc_logcdf = """\

74logcdf(x, %(shapes)s, loc=0, scale=1)

75 Log of the cumulative distribution function.

76"""

77_doc_sf = """\

78sf(x, %(shapes)s, loc=0, scale=1)

79 Survival function (also defined as ``1 - cdf``, but `sf` is sometimes more accurate).

80"""

81_doc_logsf = """\

82logsf(x, %(shapes)s, loc=0, scale=1)

83 Log of the survival function.

84"""

85_doc_ppf = """\

86ppf(q, %(shapes)s, loc=0, scale=1)

87 Percent point function (inverse of ``cdf`` --- percentiles).

88"""

89_doc_isf = """\

90isf(q, %(shapes)s, loc=0, scale=1)

91 Inverse survival function (inverse of ``sf``).

92"""

93_doc_moment = """\

94moment(order, %(shapes)s, loc=0, scale=1)

95 Non-central moment of the specified order.

96"""

97_doc_stats = """\

98stats(%(shapes)s, loc=0, scale=1, moments='mv')

99 Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').

100"""

101_doc_entropy = """\

102entropy(%(shapes)s, loc=0, scale=1)

103 (Differential) entropy of the RV.

104"""

105_doc_fit = """\

106fit(data)

107 Parameter estimates for generic data.

108 See `scipy.stats.rv_continuous.fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html#scipy.stats.rv_continuous.fit>`__ for detailed documentation of the

109 keyword arguments.

110"""

111_doc_expect = """\

112expect(func, args=(%(shapes_)s), loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds)

113 Expected value of a function (of one argument) with respect to the distribution.

114"""

115_doc_expect_discrete = """\

116expect(func, args=(%(shapes_)s), loc=0, lb=None, ub=None, conditional=False)

117 Expected value of a function (of one argument) with respect to the distribution.

118"""

119_doc_median = """\

120median(%(shapes)s, loc=0, scale=1)

121 Median of the distribution.

122"""

123_doc_mean = """\

124mean(%(shapes)s, loc=0, scale=1)

125 Mean of the distribution.

126"""

127_doc_var = """\

128var(%(shapes)s, loc=0, scale=1)

129 Variance of the distribution.

130"""

131_doc_std = """\

132std(%(shapes)s, loc=0, scale=1)

133 Standard deviation of the distribution.

134"""

135_doc_interval = """\

136interval(confidence, %(shapes)s, loc=0, scale=1)

137 Confidence interval with equal areas around the median.

138"""

139_doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf,

140 _doc_logpdf, _doc_cdf, _doc_logcdf, _doc_sf,

141 _doc_logsf, _doc_ppf, _doc_isf, _doc_moment,

142 _doc_stats, _doc_entropy, _doc_fit,

143 _doc_expect, _doc_median,

144 _doc_mean, _doc_var, _doc_std, _doc_interval])

145

146_doc_default_longsummary = """\

147As an instance of the `rv_continuous` class, `%(name)s` object inherits from it

148a collection of generic methods (see below for the full list),

149and completes them with details specific for this particular distribution.

150"""

151

152_doc_default_frozen_note = """

153Alternatively, the object may be called (as a function) to fix the shape,

154location, and scale parameters returning a "frozen" continuous RV object:

155

156rv = %(name)s(%(shapes)s, loc=0, scale=1)

157 - Frozen RV object with the same methods but holding the given shape,

158 location, and scale fixed.

159"""

160_doc_default_example = """\

161Examples

162--------

163>>> import numpy as np

164>>> from scipy.stats import %(name)s

165>>> import matplotlib.pyplot as plt

166>>> fig, ax = plt.subplots(1, 1)

167

168Calculate the first four moments:

169

170%(set_vals_stmt)s

171>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')

172

173Display the probability density function (``pdf``):

174

175>>> x = np.linspace(%(name)s.ppf(0.01, %(shapes)s),

176... %(name)s.ppf(0.99, %(shapes)s), 100)

177>>> ax.plot(x, %(name)s.pdf(x, %(shapes)s),

178... 'r-', lw=5, alpha=0.6, label='%(name)s pdf')

179

180Alternatively, the distribution object can be called (as a function)

181to fix the shape, location and scale parameters. This returns a "frozen"

182RV object holding the given parameters fixed.

183

184Freeze the distribution and display the frozen ``pdf``:

185

186>>> rv = %(name)s(%(shapes)s)

187>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

188

189Check accuracy of ``cdf`` and ``ppf``:

190

191>>> vals = %(name)s.ppf([0.001, 0.5, 0.999], %(shapes)s)

192>>> np.allclose([0.001, 0.5, 0.999], %(name)s.cdf(vals, %(shapes)s))

193True

194

195Generate random numbers:

196

197>>> r = %(name)s.rvs(%(shapes)s, size=1000)

198

199And compare the histogram:

200

201>>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)

202>>> ax.set_xlim([x[0], x[-1]])

203>>> ax.legend(loc='best', frameon=False)

204>>> plt.show()

205

206"""

207

208_doc_default_locscale = """\

209The probability density above is defined in the "standardized" form. To shift

210and/or scale the distribution use the ``loc`` and ``scale`` parameters.

211Specifically, ``%(name)s.pdf(x, %(shapes)s, loc, scale)`` is identically

212equivalent to ``%(name)s.pdf(y, %(shapes)s) / scale`` with

213``y = (x - loc) / scale``. Note that shifting the location of a distribution

214does not make it a "noncentral" distribution; noncentral generalizations of

215some distributions are available in separate classes.

216"""

217

218_doc_default = ''.join([_doc_default_longsummary,

219 _doc_allmethods,

220 '\n',

221 _doc_default_example])

222

223_doc_default_before_notes = ''.join([_doc_default_longsummary,

224 _doc_allmethods])

225

226docdict = {

227 'rvs': _doc_rvs,

228 'pdf': _doc_pdf,

229 'logpdf': _doc_logpdf,

230 'cdf': _doc_cdf,

231 'logcdf': _doc_logcdf,

232 'sf': _doc_sf,

233 'logsf': _doc_logsf,

234 'ppf': _doc_ppf,

235 'isf': _doc_isf,

236 'stats': _doc_stats,

237 'entropy': _doc_entropy,

238 'fit': _doc_fit,

239 'moment': _doc_moment,

240 'expect': _doc_expect,

241 'interval': _doc_interval,

242 'mean': _doc_mean,

243 'std': _doc_std,

244 'var': _doc_var,

245 'median': _doc_median,

246 'allmethods': _doc_allmethods,

247 'longsummary': _doc_default_longsummary,

248 'frozennote': _doc_default_frozen_note,

249 'example': _doc_default_example,

250 'default': _doc_default,

251 'before_notes': _doc_default_before_notes,

252 'after_notes': _doc_default_locscale

253}

254

255# Reuse common content between continuous and discrete docs, change some

256# minor bits.

257docdict_discrete = docdict.copy()

258

259docdict_discrete['pmf'] = _doc_pmf

260docdict_discrete['logpmf'] = _doc_logpmf

261docdict_discrete['expect'] = _doc_expect_discrete

262_doc_disc_methods = ['rvs', 'pmf', 'logpmf', 'cdf', 'logcdf', 'sf', 'logsf',

263 'ppf', 'isf', 'stats', 'entropy', 'expect', 'median',

264 'mean', 'var', 'std', 'interval']

265for obj in _doc_disc_methods:

266 docdict_discrete[obj] = docdict_discrete[obj].replace(', scale=1', '')

267

268_doc_disc_methods_err_varname = ['cdf', 'logcdf', 'sf', 'logsf']

269for obj in _doc_disc_methods_err_varname:

270 docdict_discrete[obj] = docdict_discrete[obj].replace('(x, ', '(k, ')

271

272docdict_discrete.pop('pdf')

273docdict_discrete.pop('logpdf')

274

275_doc_allmethods = ''.join([docdict_discrete[obj] for obj in _doc_disc_methods])

276docdict_discrete['allmethods'] = docheaders['methods'] + _doc_allmethods

277

278docdict_discrete['longsummary'] = _doc_default_longsummary.replace(

279 'rv_continuous', 'rv_discrete')

280

281_doc_default_frozen_note = """

282Alternatively, the object may be called (as a function) to fix the shape and

283location parameters returning a "frozen" discrete RV object:

284

285rv = %(name)s(%(shapes)s, loc=0)

286 - Frozen RV object with the same methods but holding the given shape and

287 location fixed.

288"""

289docdict_discrete['frozennote'] = _doc_default_frozen_note

290

291_doc_default_discrete_example = """\

292Examples

293--------

294>>> import numpy as np

295>>> from scipy.stats import %(name)s

296>>> import matplotlib.pyplot as plt

297>>> fig, ax = plt.subplots(1, 1)

298

299Calculate the first four moments:

300

301%(set_vals_stmt)s

302>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')

303

304Display the probability mass function (``pmf``):

305

306>>> x = np.arange(%(name)s.ppf(0.01, %(shapes)s),

307... %(name)s.ppf(0.99, %(shapes)s))

308>>> ax.plot(x, %(name)s.pmf(x, %(shapes)s), 'bo', ms=8, label='%(name)s pmf')

309>>> ax.vlines(x, 0, %(name)s.pmf(x, %(shapes)s), colors='b', lw=5, alpha=0.5)

310

311Alternatively, the distribution object can be called (as a function)

312to fix the shape and location. This returns a "frozen" RV object holding

313the given parameters fixed.

314

315Freeze the distribution and display the frozen ``pmf``:

316

317>>> rv = %(name)s(%(shapes)s)

318>>> ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1,

319... label='frozen pmf')

320>>> ax.legend(loc='best', frameon=False)

321>>> plt.show()

322

323Check accuracy of ``cdf`` and ``ppf``:

324

325>>> prob = %(name)s.cdf(x, %(shapes)s)

326>>> np.allclose(x, %(name)s.ppf(prob, %(shapes)s))

327True

328

329Generate random numbers:

330

331>>> r = %(name)s.rvs(%(shapes)s, size=1000)

332"""

333

334

335_doc_default_discrete_locscale = """\

336The probability mass function above is defined in the "standardized" form.

337To shift distribution use the ``loc`` parameter.

338Specifically, ``%(name)s.pmf(k, %(shapes)s, loc)`` is identically

339equivalent to ``%(name)s.pmf(k - loc, %(shapes)s)``.

340"""

341

342docdict_discrete['example'] = _doc_default_discrete_example

343docdict_discrete['after_notes'] = _doc_default_discrete_locscale

344

345_doc_default_before_notes = ''.join([docdict_discrete['longsummary'],

346 docdict_discrete['allmethods']])

347docdict_discrete['before_notes'] = _doc_default_before_notes

348

349_doc_default_disc = ''.join([docdict_discrete['longsummary'],

350 docdict_discrete['allmethods'],

351 docdict_discrete['frozennote'],

352 docdict_discrete['example']])

353docdict_discrete['default'] = _doc_default_disc

354

355# clean up all the separate docstring elements, we do not need them anymore

356for obj in [s for s in dir() if s.startswith('_doc_')]:

357 exec('del ' + obj)

358del obj

359

360

361def _moment(data, n, mu=None):

362 if mu is None:

363 mu = data.mean()

364 return ((data - mu)**n).mean()

365

366

367def _moment_from_stats(n, mu, mu2, g1, g2, moment_func, args):

368 if (n == 0):

369 return 1.0

370 elif (n == 1):

371 if mu is None:

372 val = moment_func(1, *args)

373 else:

374 val = mu

375 elif (n == 2):

376 if mu2 is None or mu is None:

377 val = moment_func(2, *args)

378 else:

379 val = mu2 + mu*mu

380 elif (n == 3):

381 if g1 is None or mu2 is None or mu is None:

382 val = moment_func(3, *args)

383 else:

384 mu3 = g1 * np.power(mu2, 1.5) # 3rd central moment

385 val = mu3+3*mu*mu2+mu*mu*mu # 3rd non-central moment

386 elif (n == 4):

387 if g1 is None or g2 is None or mu2 is None or mu is None:

388 val = moment_func(4, *args)

389 else:

390 mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment

391 mu3 = g1*np.power(mu2, 1.5) # 3rd central moment

392 val = mu4+4*mu*mu3+6*mu*mu*mu2+mu*mu*mu*mu

393 else:

394 val = moment_func(n, *args)

395

396 return val

397

398

399def _skew(data):

400 """

401 skew is third central moment / variance**(1.5)

402 """

403 data = np.ravel(data)

404 mu = data.mean()

405 m2 = ((data - mu)**2).mean()

406 m3 = ((data - mu)**3).mean()

407 return m3 / np.power(m2, 1.5)

408

409

410def _kurtosis(data):

411 """kurtosis is fourth central moment / variance**2 - 3."""

412 data = np.ravel(data)

413 mu = data.mean()

414 m2 = ((data - mu)**2).mean()

415 m4 = ((data - mu)**4).mean()

416 return m4 / m2**2 - 3

417

418

419def _fit_determine_optimizer(optimizer):

420 if not callable(optimizer) and isinstance(optimizer, str):

421 if not optimizer.startswith('fmin_'):

422 optimizer = "fmin_"+optimizer

423 if optimizer == 'fmin_':

424 optimizer = 'fmin'

425 try:

426 optimizer = getattr(optimize, optimizer)

427 except AttributeError as e:

428 raise ValueError("%s is not a valid optimizer" % optimizer) from e

429 return optimizer

430

431

432# Frozen RV class

433class rv_frozen:

434

435 def __init__(self, dist, *args, **kwds):

436 self.args = args

437 self.kwds = kwds

438

439 # create a new instance

440 self.dist = dist.__class__(**dist._updated_ctor_param())

441

442 shapes, _, _ = self.dist._parse_args(*args, **kwds)

443 self.a, self.b = self.dist._get_support(*shapes)

444

445 @property

446 def random_state(self):

447 return self.dist._random_state

448

449 @random_state.setter

450 def random_state(self, seed):

451 self.dist._random_state = check_random_state(seed)

452

453 def cdf(self, x):

454 return self.dist.cdf(x, *self.args, **self.kwds)

455

456 def logcdf(self, x):

457 return self.dist.logcdf(x, *self.args, **self.kwds)

458

459 def ppf(self, q):

460 return self.dist.ppf(q, *self.args, **self.kwds)

461

462 def isf(self, q):

463 return self.dist.isf(q, *self.args, **self.kwds)

464

465 def rvs(self, size=None, random_state=None):

466 kwds = self.kwds.copy()

467 kwds.update({'size': size, 'random_state': random_state})

468 return self.dist.rvs(*self.args, **kwds)

469

470 def sf(self, x):

471 return self.dist.sf(x, *self.args, **self.kwds)

472

473 def logsf(self, x):

474 return self.dist.logsf(x, *self.args, **self.kwds)

475

476 def stats(self, moments='mv'):

477 kwds = self.kwds.copy()

478 kwds.update({'moments': moments})

479 return self.dist.stats(*self.args, **kwds)

480

481 def median(self):

482 return self.dist.median(*self.args, **self.kwds)

483

484 def mean(self):

485 return self.dist.mean(*self.args, **self.kwds)

486

487 def var(self):

488 return self.dist.var(*self.args, **self.kwds)

489

490 def std(self):

491 return self.dist.std(*self.args, **self.kwds)

492

493 def moment(self, order=None, **kwds):

494 return self.dist.moment(order, *self.args, **self.kwds, **kwds)

495

496 def entropy(self):

497 return self.dist.entropy(*self.args, **self.kwds)

498

499 def interval(self, confidence=None, **kwds):

500 return self.dist.interval(confidence, *self.args, **self.kwds, **kwds)

501

502 def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):

503 # expect method only accepts shape parameters as positional args

504 # hence convert self.args, self.kwds, also loc/scale

505 # See the .expect method docstrings for the meaning of

506 # other parameters.

507 a, loc, scale = self.dist._parse_args(*self.args, **self.kwds)

508 if isinstance(self.dist, rv_discrete):

509 return self.dist.expect(func, a, loc, lb, ub, conditional, **kwds)

510 else:

511 return self.dist.expect(func, a, loc, scale, lb, ub,

512 conditional, **kwds)

513

514 def support(self):

515 return self.dist.support(*self.args, **self.kwds)

516

517

518class rv_discrete_frozen(rv_frozen):

519

520 def pmf(self, k):

521 return self.dist.pmf(k, *self.args, **self.kwds)

522

523 def logpmf(self, k): # No error

524 return self.dist.logpmf(k, *self.args, **self.kwds)

525

526

527class rv_continuous_frozen(rv_frozen):

528

529 def pdf(self, x):

530 return self.dist.pdf(x, *self.args, **self.kwds)

531

532 def logpdf(self, x):

533 return self.dist.logpdf(x, *self.args, **self.kwds)

534

535

536def argsreduce(cond, *args):

537 """Clean arguments to:

538

539 1. Ensure all arguments are iterable (arrays of dimension at least one

540 2. If cond != True and size > 1, ravel(args[i]) where ravel(condition) is

541 True, in 1D.

542

543 Return list of processed arguments.

544

545 Examples

546 --------

547 >>> import numpy as np

548 >>> rng = np.random.default_rng()

549 >>> A = rng.random((4, 5))

550 >>> B = 2

551 >>> C = rng.random((1, 5))

552 >>> cond = np.ones(A.shape)

553 >>> [A1, B1, C1] = argsreduce(cond, A, B, C)

554 >>> A1.shape

555 (4, 5)

556 >>> B1.shape

557 (1,)

558 >>> C1.shape

559 (1, 5)

560 >>> cond[2,:] = 0

561 >>> [A1, B1, C1] = argsreduce(cond, A, B, C)

562 >>> A1.shape

563 (15,)

564 >>> B1.shape

565 (1,)

566 >>> C1.shape

567 (15,)

568

569 """

570 # some distributions assume arguments are iterable.

571 newargs = np.atleast_1d(*args)

572

573 # np.atleast_1d returns an array if only one argument, or a list of arrays

574 # if more than one argument.

575 if not isinstance(newargs, list):

576 newargs = [newargs, ]

577

578 if np.all(cond):

579 # broadcast arrays with cond

580 *newargs, cond = np.broadcast_arrays(*newargs, cond)

581 return [arg.ravel() for arg in newargs]

582

583 s = cond.shape

584 # np.extract returns flattened arrays, which are not broadcastable together

585 # unless they are either the same size or size == 1.

586 return [(arg if np.size(arg) == 1

587 else np.extract(cond, np.broadcast_to(arg, s)))

588 for arg in newargs]

589

590

591parse_arg_template = """

592def _parse_args(self, %(shape_arg_str)s %(locscale_in)s):

593 return (%(shape_arg_str)s), %(locscale_out)s

594

595def _parse_args_rvs(self, %(shape_arg_str)s %(locscale_in)s, size=None):

596 return self._argcheck_rvs(%(shape_arg_str)s %(locscale_out)s, size=size)

597

598def _parse_args_stats(self, %(shape_arg_str)s %(locscale_in)s, moments='mv'):

599 return (%(shape_arg_str)s), %(locscale_out)s, moments

600"""

601

602

603class rv_generic:

604 """Class which encapsulates common functionality between rv_discrete

605 and rv_continuous.

606

607 """

608 def __init__(self, seed=None):

609 super().__init__()

610

611 # figure out if _stats signature has 'moments' keyword

612 sig = _getfullargspec(self._stats)

613 self._stats_has_moments = ((sig.varkw is not None) or

614 ('moments' in sig.args) or

615 ('moments' in sig.kwonlyargs))

616 self._random_state = check_random_state(seed)

617

618 @property

619 def random_state(self):

620 """Get or set the generator object for generating random variates.

621

622 If `random_state` is None (or `np.random`), the

623 `numpy.random.RandomState` singleton is used.

624 If `random_state` is an int, a new ``RandomState`` instance is used,

625 seeded with `random_state`.

626 If `random_state` is already a ``Generator`` or ``RandomState``

627 instance, that instance is used.

628

629 """

630 return self._random_state

631

632 @random_state.setter

633 def random_state(self, seed):

634 self._random_state = check_random_state(seed)

635

636 def __setstate__(self, state):

637 try:

638 self.__dict__.update(state)

639 # attaches the dynamically created methods on each instance.

640 # if a subclass overrides rv_generic.__setstate__, or implements

641 # it's own _attach_methods, then it must make sure that

642 # _attach_argparser_methods is called.

643 self._attach_methods()

644 except ValueError:

645 # reconstitute an old pickle scipy<1.6, that contains

646 # (_ctor_param, random_state) as state

647 self._ctor_param = state[0]

648 self._random_state = state[1]

649 self.__init__()

650

651 def _attach_methods(self):

652 """Attaches dynamically created methods to the rv_* instance.

653

654 This method must be overridden by subclasses, and must itself call

655 _attach_argparser_methods. This method is called in __init__ in

656 subclasses, and in __setstate__

657 """

658 raise NotImplementedError

659

660 def _attach_argparser_methods(self):

661 """

662 Generates the argument-parsing functions dynamically and attaches

663 them to the instance.

664

665 Should be called from `_attach_methods`, typically in __init__ and

666 during unpickling (__setstate__)

667 """

668 ns = {}

669 exec(self._parse_arg_template, ns)

670 # NB: attach to the instance, not class

671 for name in ['_parse_args', '_parse_args_stats', '_parse_args_rvs']:

672 setattr(self, name, types.MethodType(ns[name], self))

673

674 def _construct_argparser(

675 self, meths_to_inspect, locscale_in, locscale_out):

676 """Construct the parser string for the shape arguments.

677

678 This method should be called in __init__ of a class for each

679 distribution. It creates the `_parse_arg_template` attribute that is

680 then used by `_attach_argparser_methods` to dynamically create and

681 attach the `_parse_args`, `_parse_args_stats`, `_parse_args_rvs`

682 methods to the instance.

683

684 If self.shapes is a non-empty string, interprets it as a

685 comma-separated list of shape parameters.

686

687 Otherwise inspects the call signatures of `meths_to_inspect`

688 and constructs the argument-parsing functions from these.

689 In this case also sets `shapes` and `numargs`.

690 """

691

692 if self.shapes:

693 # sanitize the user-supplied shapes

694 if not isinstance(self.shapes, str):

695 raise TypeError('shapes must be a string.')

696

697 shapes = self.shapes.replace(',', ' ').split()

698

699 for field in shapes:

700 if keyword.iskeyword(field):

701 raise SyntaxError('keywords cannot be used as shapes.')

702 if not re.match('^[_a-zA-Z][_a-zA-Z0-9]*$', field):

703 raise SyntaxError(

704 'shapes must be valid python identifiers')

705 else:

706 # find out the call signatures (_pdf, _cdf etc), deduce shape

707 # arguments. Generic methods only have 'self, x', any further args

708 # are shapes.

709 shapes_list = []

710 for meth in meths_to_inspect:

711 shapes_args = _getfullargspec(meth) # NB does not contain self

712 args = shapes_args.args[1:] # peel off 'x', too

713

714 if args:

715 shapes_list.append(args)

716

717 # *args or **kwargs are not allowed w/automatic shapes

718 if shapes_args.varargs is not None:

719 raise TypeError(

720 '*args are not allowed w/out explicit shapes')

721 if shapes_args.varkw is not None:

722 raise TypeError(

723 '**kwds are not allowed w/out explicit shapes')

724 if shapes_args.kwonlyargs:

725 raise TypeError(

726 'kwonly args are not allowed w/out explicit shapes')

727 if shapes_args.defaults is not None:

728 raise TypeError('defaults are not allowed for shapes')

729

730 if shapes_list:

731 shapes = shapes_list[0]

732

733 # make sure the signatures are consistent

734 for item in shapes_list:

735 if item != shapes:

736 raise TypeError('Shape arguments are inconsistent.')

737 else:

738 shapes = []

739

740 # have the arguments, construct the method from template

741 shapes_str = ', '.join(shapes) + ', ' if shapes else '' # NB: not None

742 dct = dict(shape_arg_str=shapes_str,

743 locscale_in=locscale_in,

744 locscale_out=locscale_out,

745 )

746

747 # this string is used by _attach_argparser_methods

748 self._parse_arg_template = parse_arg_template % dct

749

750 self.shapes = ', '.join(shapes) if shapes else None

751 if not hasattr(self, 'numargs'):

752 # allows more general subclassing with *args

753 self.numargs = len(shapes)

754

755 def _construct_doc(self, docdict, shapes_vals=None):

756 """Construct the instance docstring with string substitutions."""

757 tempdict = docdict.copy()

758 tempdict['name'] = self.name or 'distname'

759 tempdict['shapes'] = self.shapes or ''

760

761 if shapes_vals is None:

762 shapes_vals = ()

763 vals = ', '.join('%.3g' % val for val in shapes_vals)

764 tempdict['vals'] = vals

765

766 tempdict['shapes_'] = self.shapes or ''

767 if self.shapes and self.numargs == 1:

768 tempdict['shapes_'] += ','

769

770 if self.shapes:

771 tempdict['set_vals_stmt'] = '>>> %s = %s' % (self.shapes, vals)

772 else:

773 tempdict['set_vals_stmt'] = ''

774

775 if self.shapes is None:

776 # remove shapes from call parameters if there are none

777 for item in ['default', 'before_notes']:

778 tempdict[item] = tempdict[item].replace(

779 "\n%(shapes)s : array_like\n shape parameters", "")

780 for i in range(2):

781 if self.shapes is None:

782 # necessary because we use %(shapes)s in two forms (w w/o ", ")

783 self.__doc__ = self.__doc__.replace("%(shapes)s, ", "")

784 try:

785 self.__doc__ = doccer.docformat(self.__doc__, tempdict)

786 except TypeError as e:

787 raise Exception("Unable to construct docstring for "

788 "distribution \"%s\": %s" %

789 (self.name, repr(e))) from e

790

791 # correct for empty shapes

792 self.__doc__ = self.__doc__.replace('(, ', '(').replace(', )', ')')

793

794 def _construct_default_doc(self, longname=None, extradoc=None,

795 docdict=None, discrete='continuous'):

796 """Construct instance docstring from the default template."""

797 if longname is None:

798 longname = 'A'

799 if extradoc is None:

800 extradoc = ''

801 if extradoc.startswith('\n\n'):

802 extradoc = extradoc[2:]

803 self.__doc__ = ''.join(['%s %s random variable.' % (longname, discrete),

804 '\n\n%(before_notes)s\n', docheaders['notes'],

805 extradoc, '\n%(example)s'])

806 self._construct_doc(docdict)

807

808 def freeze(self, *args, **kwds):

809 """Freeze the distribution for the given arguments.

810

811 Parameters

812 ----------

813 arg1, arg2, arg3,... : array_like

814 The shape parameter(s) for the distribution. Should include all

815 the non-optional arguments, may include ``loc`` and ``scale``.

816

817 Returns

818 -------

819 rv_frozen : rv_frozen instance

820 The frozen distribution.

821

822 """

823 if isinstance(self, rv_continuous):

824 return rv_continuous_frozen(self, *args, **kwds)

825 else:

826 return rv_discrete_frozen(self, *args, **kwds)

827

828 def __call__(self, *args, **kwds):

829 return self.freeze(*args, **kwds)

830 __call__.__doc__ = freeze.__doc__

831

832 # The actual calculation functions (no basic checking need be done)

833 # If these are defined, the others won't be looked at.

834 # Otherwise, the other set can be defined.

835 def _stats(self, *args, **kwds):

836 return None, None, None, None

837

838 # Noncentral moments (also known as the moment about the origin).

839 # Expressed in LaTeX, munp would be $\mu'_{n}$, i.e. "mu-sub-n-prime".

840 # The primed mu is a widely used notation for the noncentral moment.

841 def _munp(self, n, *args):

842 # Silence floating point warnings from integration.

843 with np.errstate(all='ignore'):

844 vals = self.generic_moment(n, *args)

845 return vals

846

847 def _argcheck_rvs(self, *args, **kwargs):

848 # Handle broadcasting and size validation of the rvs method.

849 # Subclasses should not have to override this method.

850 # The rule is that if `size` is not None, then `size` gives the

851 # shape of the result (integer values of `size` are treated as

852 # tuples with length 1; i.e. `size=3` is the same as `size=(3,)`.)

853 #

854 # `args` is expected to contain the shape parameters (if any), the

855 # location and the scale in a flat tuple (e.g. if there are two

856 # shape parameters `a` and `b`, `args` will be `(a, b, loc, scale)`).

857 # The only keyword argument expected is 'size'.

858 size = kwargs.get('size', None)

859 all_bcast = np.broadcast_arrays(*args)

860

861 def squeeze_left(a):

862 while a.ndim > 0 and a.shape[0] == 1:

863 a = a[0]

864 return a

865

866 # Eliminate trivial leading dimensions. In the convention

867 # used by numpy's random variate generators, trivial leading

868 # dimensions are effectively ignored. In other words, when `size`

869 # is given, trivial leading dimensions of the broadcast parameters

870 # in excess of the number of dimensions in size are ignored, e.g.

871 # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]], size=3)

872 # array([ 1.00104267, 3.00422496, 4.99799278])

873 # If `size` is not given, the exact broadcast shape is preserved:

874 # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]])

875 # array([[[[ 1.00862899, 3.00061431, 4.99867122]]]])

876 #

877 all_bcast = [squeeze_left(a) for a in all_bcast]

878 bcast_shape = all_bcast[0].shape

879 bcast_ndim = all_bcast[0].ndim

880

881 if size is None:

882 size_ = bcast_shape

883 else:

884 size_ = tuple(np.atleast_1d(size))

885

886 # Check compatibility of size_ with the broadcast shape of all

887 # the parameters. This check is intended to be consistent with

888 # how the numpy random variate generators (e.g. np.random.normal,

889 # np.random.beta) handle their arguments. The rule is that, if size

890 # is given, it determines the shape of the output. Broadcasting

891 # can't change the output size.

892

893 # This is the standard broadcasting convention of extending the

894 # shape with fewer dimensions with enough dimensions of length 1

895 # so that the two shapes have the same number of dimensions.

896 ndiff = bcast_ndim - len(size_)

897 if ndiff < 0:

898 bcast_shape = (1,)*(-ndiff) + bcast_shape

899 elif ndiff > 0:

900 size_ = (1,)*ndiff + size_

901

902 # This compatibility test is not standard. In "regular" broadcasting,

903 # two shapes are compatible if for each dimension, the lengths are the

904 # same or one of the lengths is 1. Here, the length of a dimension in

905 # size_ must not be less than the corresponding length in bcast_shape.

906 ok = all([bcdim == 1 or bcdim == szdim

907 for (bcdim, szdim) in zip(bcast_shape, size_)])

908 if not ok:

909 raise ValueError("size does not match the broadcast shape of "

910 "the parameters. %s, %s, %s" % (size, size_,

911 bcast_shape))

912

913 param_bcast = all_bcast[:-2]

914 loc_bcast = all_bcast[-2]

915 scale_bcast = all_bcast[-1]

916

917 return param_bcast, loc_bcast, scale_bcast, size_

918

919 # These are the methods you must define (standard form functions)

920 # NB: generic _pdf, _logpdf, _cdf are different for

921 # rv_continuous and rv_discrete hence are defined in there

922 def _argcheck(self, *args):

923 """Default check for correct values on args and keywords.

924

925 Returns condition array of 1's where arguments are correct and

926 0's where they are not.

927

928 """

929 cond = 1

930 for arg in args:

931 cond = logical_and(cond, (asarray(arg) > 0))

932 return cond

933

934 def _get_support(self, *args, **kwargs):

935 """Return the support of the (unscaled, unshifted) distribution.

936

937 *Must* be overridden by distributions which have support dependent

938 upon the shape parameters of the distribution. Any such override

939 *must not* set or change any of the class members, as these members

940 are shared amongst all instances of the distribution.

941

942 Parameters

943 ----------

944 arg1, arg2, ... : array_like

945 The shape parameter(s) for the distribution (see docstring of the

946 instance object for more information).

947

948 Returns

949 -------

950 a, b : numeric (float, or int or +/-np.inf)

951 end-points of the distribution's support for the specified

952 shape parameters.

953 """

954 return self.a, self.b

955

956 def _support_mask(self, x, *args):

957 a, b = self._get_support(*args)

958 with np.errstate(invalid='ignore'):

959 return (a <= x) & (x <= b)

960

961 def _open_support_mask(self, x, *args):

962 a, b = self._get_support(*args)

963 with np.errstate(invalid='ignore'):

964 return (a < x) & (x < b)

965

966 def _rvs(self, *args, size=None, random_state=None):

967 # This method must handle size being a tuple, and it must

968 # properly broadcast *args and size. size might be

969 # an empty tuple, which means a scalar random variate is to be

970 # generated.

971

972 # Use basic inverse cdf algorithm for RV generation as default.

973 U = random_state.uniform(size=size)

974 Y = self._ppf(U, *args)

975 return Y

976

977 def _logcdf(self, x, *args):

978 with np.errstate(divide='ignore'):

979 return log(self._cdf(x, *args))

980

981 def _sf(self, x, *args):

982 return 1.0-self._cdf(x, *args)

983

984 def _logsf(self, x, *args):

985 with np.errstate(divide='ignore'):

986 return log(self._sf(x, *args))

987

988 def _ppf(self, q, *args):

989 return self._ppfvec(q, *args)

990

991 def _isf(self, q, *args):

992 return self._ppf(1.0-q, *args) # use correct _ppf for subclasses

993

994 # These are actually called, and should not be overwritten if you

995 # want to keep error checking.

996 def rvs(self, *args, **kwds):

997 """Random variates of given type.

998

999 Parameters

1000 ----------

1001 arg1, arg2, arg3,... : array_like

1002 The shape parameter(s) for the distribution (see docstring of the

1003 instance object for more information).

1004 loc : array_like, optional

1005 Location parameter (default=0).

1006 scale : array_like, optional

1007 Scale parameter (default=1).

1008 size : int or tuple of ints, optional

1009 Defining number of random variates (default is 1).

1010 random_state : {None, int, `numpy.random.Generator`,

1011 `numpy.random.RandomState`}, optional

1012

1013 If `random_state` is None (or `np.random`), the

1014 `numpy.random.RandomState` singleton is used.

1015 If `random_state` is an int, a new ``RandomState`` instance is

1016 used, seeded with `random_state`.

1017 If `random_state` is already a ``Generator`` or ``RandomState``

1018 instance, that instance is used.

1019

1020 Returns

1021 -------

1022 rvs : ndarray or scalar

1023 Random variates of given `size`.

1024

1025 """

1026 discrete = kwds.pop('discrete', None)

1027 rndm = kwds.pop('random_state', None)

1028 args, loc, scale, size = self._parse_args_rvs(*args, **kwds)

1029 cond = logical_and(self._argcheck(*args), (scale >= 0))

1030 if not np.all(cond):

1031 message = ("Domain error in arguments. The `scale` parameter must "

1032 "be positive for all distributions, and many "

1033 "distributions have restrictions on shape parameters. "

1034 f"Please see the `scipy.stats.{self.name}` "

1035 "documentation for details.")

1036 raise ValueError(message)

1037

1038 if np.all(scale == 0):

1039 return loc*ones(size, 'd')

1040

1041 # extra gymnastics needed for a custom random_state

1042 if rndm is not None:

1043 random_state_saved = self._random_state

1044 random_state = check_random_state(rndm)

1045 else:

1046 random_state = self._random_state

1047

1048 vals = self._rvs(*args, size=size, random_state=random_state)

1049

1050 vals = vals * scale + loc

1051

1052 # do not forget to restore the _random_state

1053 if rndm is not None:

1054 self._random_state = random_state_saved

1055

1056 # Cast to int if discrete

1057 if discrete and not isinstance(self, rv_sample):

1058 if size == ():

1059 vals = int(vals)

1060 else:

1061 vals = vals.astype(np.int64)

1062

1063 return vals

1064

1065 def stats(self, *args, **kwds):

1066 """Some statistics of the given RV.

1067

1068 Parameters

1069 ----------

1070 arg1, arg2, arg3,... : array_like

1071 The shape parameter(s) for the distribution (see docstring of the

1072 instance object for more information)

1073 loc : array_like, optional

1074 location parameter (default=0)

1075 scale : array_like, optional (continuous RVs only)

1076 scale parameter (default=1)

1077 moments : str, optional

1078 composed of letters ['mvsk'] defining which moments to compute:

1079 'm' = mean,

1080 'v' = variance,

1081 's' = (Fisher's) skew,

1082 'k' = (Fisher's) kurtosis.

1083 (default is 'mv')

1084

1085 Returns

1086 -------

1087 stats : sequence

1088 of requested moments.

1089

1090 """

1091 args, loc, scale, moments = self._parse_args_stats(*args, **kwds)

1092 # scale = 1 by construction for discrete RVs

1093 loc, scale = map(asarray, (loc, scale))

1094 args = tuple(map(asarray, args))

1095 cond = self._argcheck(*args) & (scale > 0) & (loc == loc)

1096 output = []

1097 default = np.full(shape(cond), fill_value=self.badvalue)

1098

1099 # Use only entries that are valid in calculation

1100 if np.any(cond):

1101 goodargs = argsreduce(cond, *(args+(scale, loc)))

1102 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]

1103

1104 if self._stats_has_moments:

1105 mu, mu2, g1, g2 = self._stats(*goodargs,

1106 **{'moments': moments})

1107 else:

1108 mu, mu2, g1, g2 = self._stats(*goodargs)

1109

1110 if 'm' in moments:

1111 if mu is None:

1112 mu = self._munp(1, *goodargs)

1113 out0 = default.copy()

1114 place(out0, cond, mu * scale + loc)

1115 output.append(out0)

1116

1117 if 'v' in moments:

1118 if mu2 is None:

1119 mu2p = self._munp(2, *goodargs)

1120 if mu is None:

1121 mu = self._munp(1, *goodargs)

1122 # if mean is inf then var is also inf

1123 with np.errstate(invalid='ignore'):

1124 mu2 = np.where(~np.isinf(mu), mu2p - mu**2, np.inf)

1125 out0 = default.copy()

1126 place(out0, cond, mu2 * scale * scale)

1127 output.append(out0)

1128

1129 if 's' in moments:

1130 if g1 is None:

1131 mu3p = self._munp(3, *goodargs)

1132 if mu is None:

1133 mu = self._munp(1, *goodargs)

1134 if mu2 is None:

1135 mu2p = self._munp(2, *goodargs)

1136 mu2 = mu2p - mu * mu

1137 with np.errstate(invalid='ignore'):

1138 mu3 = (-mu*mu - 3*mu2)*mu + mu3p

1139 g1 = mu3 / np.power(mu2, 1.5)

1140 out0 = default.copy()

1141 place(out0, cond, g1)

1142 output.append(out0)

1143

1144 if 'k' in moments:

1145 if g2 is None:

1146 mu4p = self._munp(4, *goodargs)

1147 if mu is None:

1148 mu = self._munp(1, *goodargs)

1149 if mu2 is None:

1150 mu2p = self._munp(2, *goodargs)

1151 mu2 = mu2p - mu * mu

1152 if g1 is None:

1153 mu3 = None

1154 else:

1155 # (mu2**1.5) breaks down for nan and inf

1156 mu3 = g1 * np.power(mu2, 1.5)

1157 if mu3 is None:

1158 mu3p = self._munp(3, *goodargs)

1159 with np.errstate(invalid='ignore'):

1160 mu3 = (-mu * mu - 3 * mu2) * mu + mu3p

1161 with np.errstate(invalid='ignore'):

1162 mu4 = ((-mu**2 - 6*mu2) * mu - 4*mu3)*mu + mu4p

1163 g2 = mu4 / mu2**2.0 - 3.0

1164 out0 = default.copy()

1165 place(out0, cond, g2)

1166 output.append(out0)

1167 else: # no valid args

1168 output = [default.copy() for _ in moments]

1169

1170 output = [out[()] for out in output]

1171 if len(output) == 1:

1172 return output[0]

1173 else:

1174 return tuple(output)

1175

1176 def entropy(self, *args, **kwds):

1177 """Differential entropy of the RV.

1178

1179 Parameters

1180 ----------

1181 arg1, arg2, arg3,... : array_like

1182 The shape parameter(s) for the distribution (see docstring of the

1183 instance object for more information).

1184 loc : array_like, optional

1185 Location parameter (default=0).

1186 scale : array_like, optional (continuous distributions only).

1187 Scale parameter (default=1).

1188

1189 Notes

1190 -----

1191 Entropy is defined base `e`:

1192

1193 >>> import numpy as np

1194 >>> drv = rv_discrete(values=((0, 1), (0.5, 0.5)))

1195 >>> np.allclose(drv.entropy(), np.log(2.0))

1196 True

1197

1198 """

1199 args, loc, scale = self._parse_args(*args, **kwds)

1200 # NB: for discrete distributions scale=1 by construction in _parse_args

1201 loc, scale = map(asarray, (loc, scale))

1202 args = tuple(map(asarray, args))

1203 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)

1204 output = zeros(shape(cond0), 'd')

1205 place(output, (1-cond0), self.badvalue)

1206 goodargs = argsreduce(cond0, scale, *args)

1207 goodscale = goodargs[0]

1208 goodargs = goodargs[1:]

1209 place(output, cond0, self.vecentropy(*goodargs) + log(goodscale))

1210 return output[()]

1211

1212 def moment(self, order=None, *args, **kwds):

1213 """non-central moment of distribution of specified order.

1214

1215 .. deprecated:: 1.9.0

1216 Parameter `n` is replaced by parameter `order` to avoid name

1217 collisions with the shape parameter `n` of several distributions.

1218 Parameter `n` will be removed in SciPy 1.11.0.

1219

1220 Parameters

1221 ----------

1222 order : int, order >= 1

1223 Order of moment.

1224 arg1, arg2, arg3,... : float

1225 The shape parameter(s) for the distribution (see docstring of the

1226 instance object for more information).

1227 loc : array_like, optional

1228 location parameter (default=0)

1229 scale : array_like, optional

1230 scale parameter (default=1)

1231

1232 """

1233 # This function was originally written with parameter `n`, but `n`

1234 # is also the name of many distribution shape parameters.

1235 # This block allows the function to accept both `n` and its

1236 # replacement `order` during a deprecation period; it can be removed

1237 # in the second release after 1.9.0.

1238 # The logic to provide a DeprecationWarning only when `n` is passed

1239 # as a keyword, accept the new keyword `order`, and otherwise be

1240 # backward-compatible deserves explanation. We need to look out for

1241 # the following:

1242 # * Does the distribution have a shape named `n`?

1243 # * Is `order` provided? It doesn't matter whether it is provided as a

1244 # positional or keyword argument; it will be used as the order of the

1245 # moment rather than a distribution shape parameter because:

1246 # - The first positional argument of `moment` has always been the

1247 # order of the moment.

1248 # - The keyword `order` is new, so it's unambiguous that it refers to

1249 # the order of the moment.

1250 # * Is `n` provided as a keyword argument? It _does_ matter whether it

1251 # is provided as a positional or keyword argument.

1252 # - The first positional argument of `moment` has always been the

1253 # order of moment, but

1254 # - if `n` is provided as a keyword argument, its meaning depends

1255 # on whether the distribution accepts `n` as a shape parameter.

1256 has_shape_n = (self.shapes is not None

1257 and "n" in (self.shapes.split(", ")))

1258 got_order = order is not None

1259 got_keyword_n = kwds.get("n", None) is not None

1260

1261 # These lead to the following cases.

1262 # Case A: If the distribution _does_ accept `n` as a shape

1263 # 1. If both `order` and `n` are provided, this is now OK:

1264 # it is unambiguous that `order` is the order of the moment and `n`

1265 # is the shape parameter. Previously, this would have caused an

1266 # error because `n` was provided both as a keyword argument and

1267 # as the first positional argument. I don't think it is credible for

1268 # users to rely on this error in their code, though, so I don't see

1269 # this as a backward compatibility break.

1270 # 2. If only `n` is provided (as a keyword argument), this would have

1271 # been an error in the past because `n` would have been treated as

1272 # the order of the moment while the shape parameter would be

1273 # missing. It is still the same type of error, but for a different

1274 # reason: now, `n` is treated as the shape parameter while the

1275 # order of the moment is missing.

1276 # 3. If only `order` is provided, no special treament is needed.

1277 # Clearly this value is intended to be the order of the moment,

1278 # and the rest of the function will determine whether `n` is

1279 # available as a shape parameter in `args`.

1280 # 4. If neither `n` nor `order` is provided, this would have been an

1281 # error (order of the moment is not provided) and it is still an

1282 # error for the same reason.

1283

1284 # Case B: the distribution does _not_ accept `n` as a shape

1285 # 1. If both `order` and `n` are provided, this was an error, and it

1286 # still is an error: two values for same parameter.

1287 # 2. If only `n` is provided (as a keyword argument), this was OK and

1288 # is still OK, but there shold now be a `DeprecationWarning`. The

1289 # value of `n` should be removed from `kwds` and stored in `order`.

1290 # 3. If only `order` is provided, there was no problem before providing

1291 # only the first argument of `moment`, and there is no problem with

1292 # that now.

1293 # 4. If neither `n` nor `order` is provided, this would have been an

1294 # error (order of the moment is not provided), and it is still an

1295 # error for the same reason.

1296 if not got_order and ((not got_keyword_n) # A4 and B4

1297 or (got_keyword_n and has_shape_n)): # A2

1298 message = ("moment() missing 1 required "

1299 "positional argument: `order`")

1300 raise TypeError(message)

1301

1302 if got_keyword_n and not has_shape_n:

1303 if got_order: # B1

1304 # this will change to "moment got unexpected argument n"

1305 message = "moment() got multiple values for first argument"

1306 raise TypeError(message)

1307 else: # B2

1308 message = ("Use of keyword argument 'n' for method 'moment is"

1309 " deprecated and will be removed in SciPy 1.11.0. "

1310 "Use first positional argument or keyword argument"

1311 " 'order' instead.")

1312 order = kwds.pop("n")

1313 warnings.warn(message, DeprecationWarning, stacklevel=2)

1314 n = order

1315 # No special treatment of A1, A3, or B3 is needed because the order

1316 # of the moment is now in variable `n` and the shape parameter, if

1317 # needed, will be fished out of `args` or `kwds` by _parse_args

1318 # A3 might still cause an error if the shape parameter called `n`

1319 # is not found in `args`.

1320

1321 shapes, loc, scale = self._parse_args(*args, **kwds)

1322 args = np.broadcast_arrays(*(*shapes, loc, scale))

1323 *shapes, loc, scale = args

1324

1325 i0 = np.logical_and(self._argcheck(*shapes), scale > 0)

1326 i1 = np.logical_and(i0, loc == 0)

1327 i2 = np.logical_and(i0, loc != 0)

1328

1329 args = argsreduce(i0, *shapes, loc, scale)

1330 *shapes, loc, scale = args

1331

1332 if (floor(n) != n):

1333 raise ValueError("Moment must be an integer.")

1334 if (n < 0):

1335 raise ValueError("Moment must be positive.")

1336 mu, mu2, g1, g2 = None, None, None, None

1337 if (n > 0) and (n < 5):

1338 if self._stats_has_moments:

1339 mdict = {'moments': {1: 'm', 2: 'v', 3: 'vs', 4: 'mvsk'}[n]}

1340 else:

1341 mdict = {}

1342 mu, mu2, g1, g2 = self._stats(*shapes, **mdict)

1343 val = np.empty(loc.shape) # val needs to be indexed by loc

1344 val[...] = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, shapes)

1345

1346 # Convert to transformed X = L + S*Y

1347 # E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n, k)*(S/L)^k E[Y^k], k=0...n)

1348 result = zeros(i0.shape)

1349 place(result, ~i0, self.badvalue)

1350

1351 if i1.any():

1352 res1 = scale[loc == 0]**n * val[loc == 0]

1353 place(result, i1, res1)

1354

1355 if i2.any():

1356 mom = [mu, mu2, g1, g2]

1357 arrs = [i for i in mom if i is not None]

1358 idx = [i for i in range(4) if mom[i] is not None]

1359 if any(idx):

1360 arrs = argsreduce(loc != 0, *arrs)

1361 j = 0

1362 for i in idx:

1363 mom[i] = arrs[j]

1364 j += 1

1365 mu, mu2, g1, g2 = mom

1366 args = argsreduce(loc != 0, *shapes, loc, scale, val)

1367 *shapes, loc, scale, val = args

1368

1369 res2 = zeros(loc.shape, dtype='d')

1370 fac = scale / loc

1371 for k in range(n):

1372 valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp,

1373 shapes)

1374 res2 += comb(n, k, exact=True)*fac**k * valk

1375 res2 += fac**n * val

1376 res2 *= loc**n

1377 place(result, i2, res2)

1378

1379 return result[()]

1380

1381 def median(self, *args, **kwds):

1382 """Median of the distribution.

1383

1384 Parameters

1385 ----------

1386 arg1, arg2, arg3,... : array_like

1387 The shape parameter(s) for the distribution (see docstring of the

1388 instance object for more information)

1389 loc : array_like, optional

1390 Location parameter, Default is 0.

1391 scale : array_like, optional

1392 Scale parameter, Default is 1.

1393

1394 Returns

1395 -------

1396 median : float

1397 The median of the distribution.

1398

1399 See Also

1400 --------

1401 rv_discrete.ppf

1402 Inverse of the CDF

1403

1404 """

1405 return self.ppf(0.5, *args, **kwds)

1406

1407 def mean(self, *args, **kwds):

1408 """Mean of the distribution.

1409

1410 Parameters

1411 ----------

1412 arg1, arg2, arg3,... : array_like

1413 The shape parameter(s) for the distribution (see docstring of the

1414 instance object for more information)

1415 loc : array_like, optional

1416 location parameter (default=0)

1417 scale : array_like, optional

1418 scale parameter (default=1)

1419

1420 Returns

1421 -------

1422 mean : float

1423 the mean of the distribution

1424

1425 """

1426 kwds['moments'] = 'm'

1427 res = self.stats(*args, **kwds)

1428 if isinstance(res, ndarray) and res.ndim == 0:

1429 return res[()]

1430 return res

1431

1432 def var(self, *args, **kwds):

1433 """Variance of the distribution.

1434

1435 Parameters

1436 ----------

1437 arg1, arg2, arg3,... : array_like

1438 The shape parameter(s) for the distribution (see docstring of the

1439 instance object for more information)

1440 loc : array_like, optional

1441 location parameter (default=0)

1442 scale : array_like, optional

1443 scale parameter (default=1)

1444

1445 Returns

1446 -------

1447 var : float

1448 the variance of the distribution

1449

1450 """

1451 kwds['moments'] = 'v'

1452 res = self.stats(*args, **kwds)

1453 if isinstance(res, ndarray) and res.ndim == 0:

1454 return res[()]

1455 return res

1456

1457 def std(self, *args, **kwds):

1458 """Standard deviation of the distribution.

1459

1460 Parameters

1461 ----------

1462 arg1, arg2, arg3,... : array_like

1463 The shape parameter(s) for the distribution (see docstring of the

1464 instance object for more information)

1465 loc : array_like, optional

1466 location parameter (default=0)

1467 scale : array_like, optional

1468 scale parameter (default=1)

1469

1470 Returns

1471 -------

1472 std : float

1473 standard deviation of the distribution

1474

1475 """

1476 kwds['moments'] = 'v'

1477 res = sqrt(self.stats(*args, **kwds))

1478 return res

1479

1480 def interval(self, confidence=None, *args, **kwds):

1481 """Confidence interval with equal areas around the median.

1482

1483 .. deprecated:: 1.9.0

1484 Parameter `alpha` is replaced by parameter `confidence` to avoid

1485 name collisions with the shape parameter `alpha` of some

1486 distributions. Parameter `alpha` will be removed in SciPy 1.11.0.

1487

1488 Parameters

1489 ----------

1490 confidence : array_like of float

1491 Probability that an rv will be drawn from the returned range.

1492 Each value should be in the range [0, 1].

1493 arg1, arg2, ... : array_like

1494 The shape parameter(s) for the distribution (see docstring of the

1495 instance object for more information).

1496 loc : array_like, optional

1497 location parameter, Default is 0.

1498 scale : array_like, optional

1499 scale parameter, Default is 1.

1500

1501 Returns

1502 -------

1503 a, b : ndarray of float

1504 end-points of range that contain ``100 * alpha %`` of the rv's

1505 possible values.

1506

1507 Notes

1508 -----

1509 This is implemented as ``ppf([p_tail, 1-p_tail])``, where

1510 ``ppf`` is the inverse cumulative distribution function and

1511 ``p_tail = (1-confidence)/2``. Suppose ``[c, d]`` is the support of a

1512 discrete distribution; then ``ppf([0, 1]) == (c-1, d)``. Therefore,

1513 when ``confidence=1`` and the distribution is discrete, the left end

1514 of the interval will be beyond the support of the distribution.

1515 For discrete distributions, the interval will limit the probability

1516 in each tail to be less than or equal to ``p_tail`` (usually

1517 strictly less).

1518

1519 """

1520 # This function was originally written with parameter `alpha`, but

1521 # `alpha` is also the name of a shape parameter of two distributions.

1522 # This block allows the function to accept both `alpha` and its

1523 # replacement `confidence` during a deprecation period; it can be

1524 # removed in the second release after 1.9.0.

1525 # See description of logic in `moment` method.

1526 has_shape_alpha = (self.shapes is not None

1527 and "alpha" in (self.shapes.split(", ")))

1528 got_confidence = confidence is not None

1529 got_keyword_alpha = kwds.get("alpha", None) is not None

1530

1531 if not got_confidence and ((not got_keyword_alpha)

1532 or (got_keyword_alpha and has_shape_alpha)):

1533 message = ("interval() missing 1 required positional argument: "

1534 "`confidence`")

1535 raise TypeError(message)

1536

1537 if got_keyword_alpha and not has_shape_alpha:

1538 if got_confidence:

1539 # this will change to "interval got unexpected argument alpha"

1540 message = "interval() got multiple values for first argument"

1541 raise TypeError(message)

1542 else:

1543 message = ("Use of keyword argument 'alpha' for method "

1544 "'interval' is deprecated and wil be removed in "

1545 "SciPy 1.11.0. Use first positional argument or "

1546 "keyword argument 'confidence' instead.")

1547 confidence = kwds.pop("alpha")

1548 warnings.warn(message, DeprecationWarning, stacklevel=2)

1549 alpha = confidence

1550

1551 alpha = asarray(alpha)

1552 if np.any((alpha > 1) | (alpha < 0)):

1553 raise ValueError("alpha must be between 0 and 1 inclusive")

1554 q1 = (1.0-alpha)/2

1555 q2 = (1.0+alpha)/2

1556 a = self.ppf(q1, *args, **kwds)

1557 b = self.ppf(q2, *args, **kwds)

1558 return a, b

1559

1560 def support(self, *args, **kwargs):

1561 """Support of the distribution.

1562

1563 Parameters

1564 ----------

1565 arg1, arg2, ... : array_like

1566 The shape parameter(s) for the distribution (see docstring of the

1567 instance object for more information).

1568 loc : array_like, optional

1569 location parameter, Default is 0.

1570 scale : array_like, optional

1571 scale parameter, Default is 1.

1572

1573 Returns

1574 -------

1575 a, b : array_like

1576 end-points of the distribution's support.

1577

1578 """

1579 args, loc, scale = self._parse_args(*args, **kwargs)

1580 arrs = np.broadcast_arrays(*args, loc, scale)

1581 args, loc, scale = arrs[:-2], arrs[-2], arrs[-1]

1582 cond = self._argcheck(*args) & (scale > 0)

1583 _a, _b = self._get_support(*args)

1584 if cond.all():

1585 return _a * scale + loc, _b * scale + loc

1586 elif cond.ndim == 0:

1587 return self.badvalue, self.badvalue

1588 # promote bounds to at least float to fill in the badvalue

1589 _a, _b = np.asarray(_a).astype('d'), np.asarray(_b).astype('d')

1590 out_a, out_b = _a * scale + loc, _b * scale + loc

1591 place(out_a, 1-cond, self.badvalue)

1592 place(out_b, 1-cond, self.badvalue)

1593 return out_a, out_b

1594

1595 def nnlf(self, theta, x):

1596 """Negative loglikelihood function.

1597 Notes

1598 -----

1599 This is ``-sum(log pdf(x, theta), axis=0)`` where `theta` are the

1600 parameters (including loc and scale).

1601 """

1602 loc, scale, args = self._unpack_loc_scale(theta)

1603 if not self._argcheck(*args) or scale <= 0:

1604 return inf

1605 x = (asarray(x)-loc) / scale

1606 n_log_scale = len(x) * log(scale)

1607 if np.any(~self._support_mask(x, *args)):

1608 return inf

1609 return self._nnlf(x, *args) + n_log_scale

1610

1611 def _nnlf(self, x, *args):

1612 return -np.sum(self._logpxf(x, *args), axis=0)

1613

1614 def _nlff_and_penalty(self, x, args, log_fitfun):

1615 # negative log fit function

1616 cond0 = ~self._support_mask(x, *args)

1617 n_bad = np.count_nonzero(cond0, axis=0)

1618 if n_bad > 0:

1619 x = argsreduce(~cond0, x)[0]

1620 logff = log_fitfun(x, *args)

1621 finite_logff = np.isfinite(logff)

1622 n_bad += np.sum(~finite_logff, axis=0)

1623 if n_bad > 0:

1624 penalty = n_bad * log(_XMAX) * 100

1625 return -np.sum(logff[finite_logff], axis=0) + penalty

1626 return -np.sum(logff, axis=0)

1627

1628 def _penalized_nnlf(self, theta, x):

1629 """Penalized negative loglikelihood function.

1630 i.e., - sum (log pdf(x, theta), axis=0) + penalty

1631 where theta are the parameters (including loc and scale)

1632 """

1633 loc, scale, args = self._unpack_loc_scale(theta)

1634 if not self._argcheck(*args) or scale <= 0:

1635 return inf

1636 x = asarray((x-loc) / scale)

1637 n_log_scale = len(x) * log(scale)

1638 return self._nlff_and_penalty(x, args, self._logpxf) + n_log_scale

1639

1640 def _penalized_nlpsf(self, theta, x):

1641 """Penalized negative log product spacing function.

1642 i.e., - sum (log (diff (cdf (x, theta))), axis=0) + penalty

1643 where theta are the parameters (including loc and scale)

1644 Follows reference [1] of scipy.stats.fit

1645 """

1646 loc, scale, args = self._unpack_loc_scale(theta)

1647 if not self._argcheck(*args) or scale <= 0:

1648 return inf

1649 x = (np.sort(x) - loc)/scale

1650

1651 def log_psf(x, *args):

1652 x, lj = np.unique(x, return_counts=True) # fast for sorted x

1653 cdf_data = self._cdf(x, *args) if x.size else []

1654 if not (x.size and 1 - cdf_data[-1] <= 0):

1655 cdf = np.concatenate(([0], cdf_data, [1]))

1656 lj = np.concatenate((lj, [1]))

1657 else:

1658 cdf = np.concatenate(([0], cdf_data))

1659 # here we could use logcdf w/ logsumexp trick to take differences,

1660 # but in the context of the method, it seems unlikely to matter

1661 return lj * np.log(np.diff(cdf) / lj)

1662

1663 return self._nlff_and_penalty(x, args, log_psf)

1664

1665

1666class _ShapeInfo:

1667 def __init__(self, name, integrality=False, domain=(-np.inf, np.inf),

1668 inclusive=(True, True)):

1669 self.name = name

1670 self.integrality = integrality

1671

1672 domain = list(domain)

1673 if np.isfinite(domain[0]) and not inclusive[0]:

1674 domain[0] = np.nextafter(domain[0], np.inf)

1675 if np.isfinite(domain[1]) and not inclusive[1]:

1676 domain[1] = np.nextafter(domain[1], -np.inf)

1677 self.domain = domain

1678

1679

1680def _get_fixed_fit_value(kwds, names):

1681 """

1682 Given names such as `['f0', 'fa', 'fix_a']`, check that there is

1683 at most one non-None value in `kwds` associaed with those names.

1684 Return that value, or None if none of the names occur in `kwds`.

1685 As a side effect, all occurrences of those names in `kwds` are

1686 removed.

1687 """

1688 vals = [(name, kwds.pop(name)) for name in names if name in kwds]

1689 if len(vals) > 1:

1690 repeated = [name for name, val in vals]

1691 raise ValueError("fit method got multiple keyword arguments to "

1692 "specify the same fixed parameter: " +

1693 ', '.join(repeated))

1694 return vals[0][1] if vals else None

1695

1696

1697# continuous random variables: implement maybe later

1698#

1699# hf --- Hazard Function (PDF / SF)

1700# chf --- Cumulative hazard function (-log(SF))

1701# psf --- Probability sparsity function (reciprocal of the pdf) in

1702# units of percent-point-function (as a function of q).

1703# Also, the derivative of the percent-point function.

1704

1705

1706class rv_continuous(rv_generic):

1707 """A generic continuous random variable class meant for subclassing.

1708

1709 `rv_continuous` is a base class to construct specific distribution classes

1710 and instances for continuous random variables. It cannot be used

1711 directly as a distribution.

1712

1713 Parameters

1714 ----------

1715 momtype : int, optional

1716 The type of generic moment calculation to use: 0 for pdf, 1 (default)

1717 for ppf.

1718 a : float, optional

1719 Lower bound of the support of the distribution, default is minus

1720 infinity.

1721 b : float, optional

1722 Upper bound of the support of the distribution, default is plus

1723 infinity.

1724 xtol : float, optional

1725 The tolerance for fixed point calculation for generic ppf.

1726 badvalue : float, optional

1727 The value in a result arrays that indicates a value that for which

1728 some argument restriction is violated, default is np.nan.

1729 name : str, optional

1730 The name of the instance. This string is used to construct the default

1731 example for distributions.

1732 longname : str, optional

1733 This string is used as part of the first line of the docstring returned

1734 when a subclass has no docstring of its own. Note: `longname` exists

1735 for backwards compatibility, do not use for new subclasses.

1736 shapes : str, optional

1737 The shape of the distribution. For example ``"m, n"`` for a

1738 distribution that takes two integers as the two shape arguments for all

1739 its methods. If not provided, shape parameters will be inferred from

1740 the signature of the private methods, ``_pdf`` and ``_cdf`` of the

1741 instance.

1742 extradoc : str, optional, deprecated

1743 This string is used as the last part of the docstring returned when a

1744 subclass has no docstring of its own. Note: `extradoc` exists for

1745 backwards compatibility and will be removed in SciPy 1.11.0, do not

1746 use for new subclasses.

1747 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

1748 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

1749 singleton is used.

1750 If `seed` is an int, a new ``RandomState`` instance is used,

1751 seeded with `seed`.

1752 If `seed` is already a ``Generator`` or ``RandomState`` instance then

1753 that instance is used.

1754

1755 Methods

1756 -------

1757 rvs

1758 pdf

1759 logpdf

1760 cdf

1761 logcdf

1762 sf

1763 logsf

1764 ppf

1765 isf

1766 moment

1767 stats

1768 entropy

1769 expect

1770 median

1771 mean

1772 std

1773 var

1774 interval

1775 __call__

1776 fit

1777 fit_loc_scale

1778 nnlf

1779 support

1780

1781 Notes

1782 -----

1783 Public methods of an instance of a distribution class (e.g., ``pdf``,

1784 ``cdf``) check their arguments and pass valid arguments to private,

1785 computational methods (``_pdf``, ``_cdf``). For ``pdf(x)``, ``x`` is valid

1786 if it is within the support of the distribution.

1787 Whether a shape parameter is valid is decided by an ``_argcheck`` method

1788 (which defaults to checking that its arguments are strictly positive.)

1789

1790 **Subclassing**

1791

1792 New random variables can be defined by subclassing the `rv_continuous` class

1793 and re-defining at least the ``_pdf`` or the ``_cdf`` method (normalized

1794 to location 0 and scale 1).

1795

1796 If positive argument checking is not correct for your RV

1797 then you will also need to re-define the ``_argcheck`` method.

1798

1799 For most of the scipy.stats distributions, the support interval doesn't

1800 depend on the shape parameters. ``x`` being in the support interval is

1801 equivalent to ``self.a <= x <= self.b``. If either of the endpoints of

1802 the support do depend on the shape parameters, then

1803 i) the distribution must implement the ``_get_support`` method; and

1804 ii) those dependent endpoints must be omitted from the distribution's

1805 call to the ``rv_continuous`` initializer.

1806

1807 Correct, but potentially slow defaults exist for the remaining

1808 methods but for speed and/or accuracy you can over-ride::

1809

1810 _logpdf, _cdf, _logcdf, _ppf, _rvs, _isf, _sf, _logsf

1811

1812 The default method ``_rvs`` relies on the inverse of the cdf, ``_ppf``,

1813 applied to a uniform random variate. In order to generate random variates

1814 efficiently, either the default ``_ppf`` needs to be overwritten (e.g.

1815 if the inverse cdf can expressed in an explicit form) or a sampling

1816 method needs to be implemented in a custom ``_rvs`` method.

1817

1818 If possible, you should override ``_isf``, ``_sf`` or ``_logsf``.

1819 The main reason would be to improve numerical accuracy: for example,

1820 the survival function ``_sf`` is computed as ``1 - _cdf`` which can

1821 result in loss of precision if ``_cdf(x)`` is close to one.

1822

1823 **Methods that can be overwritten by subclasses**

1824 ::

1825

1826 _rvs

1827 _pdf

1828 _cdf

1829 _sf

1830 _ppf

1831 _isf

1832 _stats

1833 _munp

1834 _entropy

1835 _argcheck

1836 _get_support

1837

1838 There are additional (internal and private) generic methods that can

1839 be useful for cross-checking and for debugging, but might work in all

1840 cases when directly called.

1841

1842 A note on ``shapes``: subclasses need not specify them explicitly. In this

1843 case, `shapes` will be automatically deduced from the signatures of the

1844 overridden methods (`pdf`, `cdf` etc).

1845 If, for some reason, you prefer to avoid relying on introspection, you can

1846 specify ``shapes`` explicitly as an argument to the instance constructor.

1847

1848

1849 **Frozen Distributions**

1850

1851 Normally, you must provide shape parameters (and, optionally, location and

1852 scale parameters to each call of a method of a distribution.

1853

1854 Alternatively, the object may be called (as a function) to fix the shape,

1855 location, and scale parameters returning a "frozen" continuous RV object:

1856

1857 rv = generic(<shape(s)>, loc=0, scale=1)

1858 `rv_frozen` object with the same methods but holding the given shape,

1859 location, and scale fixed

1860

1861 **Statistics**

1862

1863 Statistics are computed using numerical integration by default.

1864 For speed you can redefine this using ``_stats``:

1865

1866 - take shape parameters and return mu, mu2, g1, g2

1867 - If you can't compute one of these, return it as None

1868 - Can also be defined with a keyword argument ``moments``, which is a

1869 string composed of "m", "v", "s", and/or "k".

1870 Only the components appearing in string should be computed and

1871 returned in the order "m", "v", "s", or "k" with missing values

1872 returned as None.

1873

1874 Alternatively, you can override ``_munp``, which takes ``n`` and shape

1875 parameters and returns the n-th non-central moment of the distribution.

1876

1877 Examples

1878 --------

1879 To create a new Gaussian distribution, we would do the following:

1880

1881 >>> from scipy.stats import rv_continuous

1882 >>> class gaussian_gen(rv_continuous):

1883 ... "Gaussian distribution"

1884 ... def _pdf(self, x):

1885 ... return np.exp(-x**2 / 2.) / np.sqrt(2.0 * np.pi)

1886 >>> gaussian = gaussian_gen(name='gaussian')

1887

1888 ``scipy.stats`` distributions are *instances*, so here we subclass

1889 `rv_continuous` and create an instance. With this, we now have

1890 a fully functional distribution with all relevant methods automagically

1891 generated by the framework.

1892

1893 Note that above we defined a standard normal distribution, with zero mean

1894 and unit variance. Shifting and scaling of the distribution can be done

1895 by using ``loc`` and ``scale`` parameters: ``gaussian.pdf(x, loc, scale)``

1896 essentially computes ``y = (x - loc) / scale`` and

1897 ``gaussian._pdf(y) / scale``.

1898

1899 """

1900 def __init__(self, momtype=1, a=None, b=None, xtol=1e-14,

1901 badvalue=None, name=None, longname=None,

1902 shapes=None, extradoc=None, seed=None):

1903

1904 super().__init__(seed)

1905

1906 if extradoc is not None:

1907 warnings.warn("extradoc is deprecated and will be removed in "

1908 "SciPy 1.11.0", DeprecationWarning)

1909

1910 # save the ctor parameters, cf generic freeze

1911 self._ctor_param = dict(

1912 momtype=momtype, a=a, b=b, xtol=xtol,

1913 badvalue=badvalue, name=name, longname=longname,

1914 shapes=shapes, extradoc=extradoc, seed=seed)

1915

1916 if badvalue is None:

1917 badvalue = nan

1918 if name is None:

1919 name = 'Distribution'

1920 self.badvalue = badvalue

1921 self.name = name

1922 self.a = a

1923 self.b = b

1924 if a is None:

1925 self.a = -inf

1926 if b is None:

1927 self.b = inf

1928 self.xtol = xtol

1929 self.moment_type = momtype

1930 self.shapes = shapes

1931 self.extradoc = extradoc

1932

1933 self._construct_argparser(meths_to_inspect=[self._pdf, self._cdf],

1934 locscale_in='loc=0, scale=1',

1935 locscale_out='loc, scale')

1936 self._attach_methods()

1937

1938 if longname is None:

1939 if name[0] in ['aeiouAEIOU']:

1940 hstr = "An "

1941 else:

1942 hstr = "A "

1943 longname = hstr + name

1944

1945 if sys.flags.optimize < 2:

1946 # Skip adding docstrings if interpreter is run with -OO

1947 if self.__doc__ is None:

1948 self._construct_default_doc(longname=longname,

1949 extradoc=extradoc,

1950 docdict=docdict,

1951 discrete='continuous')

1952 else:

1953 dct = dict(distcont)

1954 self._construct_doc(docdict, dct.get(self.name))

1955

1956 def __getstate__(self):

1957 dct = self.__dict__.copy()

1958

1959 # these methods will be remade in __setstate__

1960 # _random_state attribute is taken care of by rv_generic

1961 attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs",

1962 "_cdfvec", "_ppfvec", "vecentropy", "generic_moment"]

1963 [dct.pop(attr, None) for attr in attrs]

1964 return dct

1965

1966 def _attach_methods(self):

1967 """

1968 Attaches dynamically created methods to the rv_continuous instance.

1969 """

1970 # _attach_methods is responsible for calling _attach_argparser_methods

1971 self._attach_argparser_methods()

1972

1973 # nin correction

1974 self._ppfvec = vectorize(self._ppf_single, otypes='d')

1975 self._ppfvec.nin = self.numargs + 1

1976 self.vecentropy = vectorize(self._entropy, otypes='d')

1977 self._cdfvec = vectorize(self._cdf_single, otypes='d')

1978 self._cdfvec.nin = self.numargs + 1

1979

1980 if self.moment_type == 0:

1981 self.generic_moment = vectorize(self._mom0_sc, otypes='d')

1982 else:

1983 self.generic_moment = vectorize(self._mom1_sc, otypes='d')

1984 # Because of the *args argument of _mom0_sc, vectorize cannot count the

1985 # number of arguments correctly.

1986 self.generic_moment.nin = self.numargs + 1

1987

1988 def _updated_ctor_param(self):

1989 """Return the current version of _ctor_param, possibly updated by user.

1990

1991 Used by freezing.

1992 Keep this in sync with the signature of __init__.

1993 """

1994 dct = self._ctor_param.copy()

1995 dct['a'] = self.a

1996 dct['b'] = self.b

1997 dct['xtol'] = self.xtol

1998 dct['badvalue'] = self.badvalue

1999 dct['name'] = self.name

2000 dct['shapes'] = self.shapes

2001 dct['extradoc'] = self.extradoc

2002 return dct

2003

2004 def _ppf_to_solve(self, x, q, *args):

2005 return self.cdf(*(x, )+args)-q

2006

2007 def _ppf_single(self, q, *args):

2008 factor = 10.

2009 left, right = self._get_support(*args)

2010

2011 if np.isinf(left):

2012 left = min(-factor, right)

2013 while self._ppf_to_solve(left, q, *args) > 0.:

2014 left, right = left * factor, left

2015 # left is now such that cdf(left) <= q

2016 # if right has changed, then cdf(right) > q

2017

2018 if np.isinf(right):

2019 right = max(factor, left)

2020 while self._ppf_to_solve(right, q, *args) < 0.:

2021 left, right = right, right * factor

2022 # right is now such that cdf(right) >= q

2023

2024 return optimize.brentq(self._ppf_to_solve,

2025 left, right, args=(q,)+args, xtol=self.xtol)

2026

2027 # moment from definition

2028 def _mom_integ0(self, x, m, *args):

2029 return x**m * self.pdf(x, *args)

2030

2031 def _mom0_sc(self, m, *args):

2032 _a, _b = self._get_support(*args)

2033 return integrate.quad(self._mom_integ0, _a, _b,

2034 args=(m,)+args)[0]

2035

2036 # moment calculated using ppf

2037 def _mom_integ1(self, q, m, *args):

2038 return (self.ppf(q, *args))**m

2039

2040 def _mom1_sc(self, m, *args):

2041 return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]

2042

2043 def _pdf(self, x, *args):

2044 return _derivative(self._cdf, x, dx=1e-5, args=args, order=5)

2045

2046 # Could also define any of these

2047 def _logpdf(self, x, *args):

2048 p = self._pdf(x, *args)

2049 with np.errstate(divide='ignore'):

2050 return log(p)

2051

2052 def _logpxf(self, x, *args):

2053 # continuous distributions have PDF, discrete have PMF, but sometimes

2054 # the distinction doesn't matter. This lets us use `_logpxf` for both

2055 # discrete and continuous distributions.

2056 return self._logpdf(x, *args)

2057

2058 def _cdf_single(self, x, *args):

2059 _a, _b = self._get_support(*args)

2060 return integrate.quad(self._pdf, _a, x, args=args)[0]

2061

2062 def _cdf(self, x, *args):

2063 return self._cdfvec(x, *args)

2064

2065 # generic _argcheck, _logcdf, _sf, _logsf, _ppf, _isf, _rvs are defined

2066 # in rv_generic

2067

2068 def pdf(self, x, *args, **kwds):

2069 """Probability density function at x of the given RV.

2070

2071 Parameters

2072 ----------

2073 x : array_like

2074 quantiles

2075 arg1, arg2, arg3,... : array_like

2076 The shape parameter(s) for the distribution (see docstring of the

2077 instance object for more information)

2078 loc : array_like, optional

2079 location parameter (default=0)

2080 scale : array_like, optional

2081 scale parameter (default=1)

2082

2083 Returns

2084 -------

2085 pdf : ndarray

2086 Probability density function evaluated at x

2087

2088 """

2089 args, loc, scale = self._parse_args(*args, **kwds)

2090 x, loc, scale = map(asarray, (x, loc, scale))

2091 args = tuple(map(asarray, args))

2092 dtyp = np.promote_types(x.dtype, np.float64)

2093 x = np.asarray((x - loc)/scale, dtype=dtyp)

2094 cond0 = self._argcheck(*args) & (scale > 0)

2095 cond1 = self._support_mask(x, *args) & (scale > 0)

2096 cond = cond0 & cond1

2097 output = zeros(shape(cond), dtyp)

2098 putmask(output, (1-cond0)+np.isnan(x), self.badvalue)

2099 if np.any(cond):

2100 goodargs = argsreduce(cond, *((x,)+args+(scale,)))

2101 scale, goodargs = goodargs[-1], goodargs[:-1]

2102 place(output, cond, self._pdf(*goodargs) / scale)

2103 if output.ndim == 0:

2104 return output[()]

2105 return output

2106

2107 def logpdf(self, x, *args, **kwds):

2108 """Log of the probability density function at x of the given RV.

2109

2110 This uses a more numerically accurate calculation if available.

2111

2112 Parameters

2113 ----------

2114 x : array_like

2115 quantiles

2116 arg1, arg2, arg3,... : array_like

2117 The shape parameter(s) for the distribution (see docstring of the

2118 instance object for more information)

2119 loc : array_like, optional

2120 location parameter (default=0)

2121 scale : array_like, optional

2122 scale parameter (default=1)

2123

2124 Returns

2125 -------

2126 logpdf : array_like

2127 Log of the probability density function evaluated at x

2128

2129 """

2130 args, loc, scale = self._parse_args(*args, **kwds)

2131 x, loc, scale = map(asarray, (x, loc, scale))

2132 args = tuple(map(asarray, args))

2133 dtyp = np.promote_types(x.dtype, np.float64)

2134 x = np.asarray((x - loc)/scale, dtype=dtyp)

2135 cond0 = self._argcheck(*args) & (scale > 0)

2136 cond1 = self._support_mask(x, *args) & (scale > 0)

2137 cond = cond0 & cond1

2138 output = empty(shape(cond), dtyp)

2139 output.fill(NINF)

2140 putmask(output, (1-cond0)+np.isnan(x), self.badvalue)

2141 if np.any(cond):

2142 goodargs = argsreduce(cond, *((x,)+args+(scale,)))

2143 scale, goodargs = goodargs[-1], goodargs[:-1]

2144 place(output, cond, self._logpdf(*goodargs) - log(scale))

2145 if output.ndim == 0:

2146 return output[()]

2147 return output

2148

2149 def cdf(self, x, *args, **kwds):

2150 """

2151 Cumulative distribution function of the given RV.

2152

2153 Parameters

2154 ----------

2155 x : array_like

2156 quantiles

2157 arg1, arg2, arg3,... : array_like

2158 The shape parameter(s) for the distribution (see docstring of the

2159 instance object for more information)

2160 loc : array_like, optional

2161 location parameter (default=0)

2162 scale : array_like, optional

2163 scale parameter (default=1)

2164

2165 Returns

2166 -------

2167 cdf : ndarray

2168 Cumulative distribution function evaluated at `x`

2169

2170 """

2171 args, loc, scale = self._parse_args(*args, **kwds)

2172 x, loc, scale = map(asarray, (x, loc, scale))

2173 args = tuple(map(asarray, args))

2174 _a, _b = self._get_support(*args)

2175 dtyp = np.promote_types(x.dtype, np.float64)

2176 x = np.asarray((x - loc)/scale, dtype=dtyp)

2177 cond0 = self._argcheck(*args) & (scale > 0)

2178 cond1 = self._open_support_mask(x, *args) & (scale > 0)

2179 cond2 = (x >= np.asarray(_b)) & cond0

2180 cond = cond0 & cond1

2181 output = zeros(shape(cond), dtyp)

2182 place(output, (1-cond0)+np.isnan(x), self.badvalue)

2183 place(output, cond2, 1.0)

2184 if np.any(cond): # call only if at least 1 entry

2185 goodargs = argsreduce(cond, *((x,)+args))

2186 place(output, cond, self._cdf(*goodargs))

2187 if output.ndim == 0:

2188 return output[()]

2189 return output

2190

2191 def logcdf(self, x, *args, **kwds):

2192 """Log of the cumulative distribution function at x of the given RV.

2193

2194 Parameters

2195 ----------

2196 x : array_like

2197 quantiles

2198 arg1, arg2, arg3,... : array_like

2199 The shape parameter(s) for the distribution (see docstring of the

2200 instance object for more information)

2201 loc : array_like, optional

2202 location parameter (default=0)

2203 scale : array_like, optional

2204 scale parameter (default=1)

2205

2206 Returns

2207 -------

2208 logcdf : array_like

2209 Log of the cumulative distribution function evaluated at x

2210

2211 """

2212 args, loc, scale = self._parse_args(*args, **kwds)

2213 x, loc, scale = map(asarray, (x, loc, scale))

2214 args = tuple(map(asarray, args))

2215 _a, _b = self._get_support(*args)

2216 dtyp = np.promote_types(x.dtype, np.float64)

2217 x = np.asarray((x - loc)/scale, dtype=dtyp)

2218 cond0 = self._argcheck(*args) & (scale > 0)

2219 cond1 = self._open_support_mask(x, *args) & (scale > 0)

2220 cond2 = (x >= _b) & cond0

2221 cond = cond0 & cond1

2222 output = empty(shape(cond), dtyp)

2223 output.fill(NINF)

2224 place(output, (1-cond0)*(cond1 == cond1)+np.isnan(x), self.badvalue)

2225 place(output, cond2, 0.0)

2226 if np.any(cond): # call only if at least 1 entry

2227 goodargs = argsreduce(cond, *((x,)+args))

2228 place(output, cond, self._logcdf(*goodargs))

2229 if output.ndim == 0:

2230 return output[()]

2231 return output

2232

2233 def sf(self, x, *args, **kwds):

2234 """Survival function (1 - `cdf`) at x of the given RV.

2235

2236 Parameters

2237 ----------

2238 x : array_like

2239 quantiles

2240 arg1, arg2, arg3,... : array_like

2241 The shape parameter(s) for the distribution (see docstring of the

2242 instance object for more information)

2243 loc : array_like, optional

2244 location parameter (default=0)

2245 scale : array_like, optional

2246 scale parameter (default=1)

2247

2248 Returns

2249 -------

2250 sf : array_like

2251 Survival function evaluated at x

2252

2253 """

2254 args, loc, scale = self._parse_args(*args, **kwds)

2255 x, loc, scale = map(asarray, (x, loc, scale))

2256 args = tuple(map(asarray, args))

2257 _a, _b = self._get_support(*args)

2258 dtyp = np.promote_types(x.dtype, np.float64)

2259 x = np.asarray((x - loc)/scale, dtype=dtyp)

2260 cond0 = self._argcheck(*args) & (scale > 0)

2261 cond1 = self._open_support_mask(x, *args) & (scale > 0)

2262 cond2 = cond0 & (x <= _a)

2263 cond = cond0 & cond1

2264 output = zeros(shape(cond), dtyp)

2265 place(output, (1-cond0)+np.isnan(x), self.badvalue)

2266 place(output, cond2, 1.0)

2267 if np.any(cond):

2268 goodargs = argsreduce(cond, *((x,)+args))

2269 place(output, cond, self._sf(*goodargs))

2270 if output.ndim == 0:

2271 return output[()]

2272 return output

2273

2274 def logsf(self, x, *args, **kwds):

2275 """Log of the survival function of the given RV.

2276

2277 Returns the log of the "survival function," defined as (1 - `cdf`),

2278 evaluated at `x`.

2279

2280 Parameters

2281 ----------

2282 x : array_like

2283 quantiles

2284 arg1, arg2, arg3,... : array_like

2285 The shape parameter(s) for the distribution (see docstring of the

2286 instance object for more information)

2287 loc : array_like, optional

2288 location parameter (default=0)

2289 scale : array_like, optional

2290 scale parameter (default=1)

2291

2292 Returns

2293 -------

2294 logsf : ndarray

2295 Log of the survival function evaluated at `x`.

2296

2297 """

2298 args, loc, scale = self._parse_args(*args, **kwds)

2299 x, loc, scale = map(asarray, (x, loc, scale))

2300 args = tuple(map(asarray, args))

2301 _a, _b = self._get_support(*args)

2302 dtyp = np.promote_types(x.dtype, np.float64)

2303 x = np.asarray((x - loc)/scale, dtype=dtyp)

2304 cond0 = self._argcheck(*args) & (scale > 0)

2305 cond1 = self._open_support_mask(x, *args) & (scale > 0)

2306 cond2 = cond0 & (x <= _a)

2307 cond = cond0 & cond1

2308 output = empty(shape(cond), dtyp)

2309 output.fill(NINF)

2310 place(output, (1-cond0)+np.isnan(x), self.badvalue)

2311 place(output, cond2, 0.0)

2312 if np.any(cond):

2313 goodargs = argsreduce(cond, *((x,)+args))

2314 place(output, cond, self._logsf(*goodargs))

2315 if output.ndim == 0:

2316 return output[()]

2317 return output

2318

2319 def ppf(self, q, *args, **kwds):

2320 """Percent point function (inverse of `cdf`) at q of the given RV.

2321

2322 Parameters

2323 ----------

2324 q : array_like

2325 lower tail probability

2326 arg1, arg2, arg3,... : array_like

2327 The shape parameter(s) for the distribution (see docstring of the

2328 instance object for more information)

2329 loc : array_like, optional

2330 location parameter (default=0)

2331 scale : array_like, optional

2332 scale parameter (default=1)

2333

2334 Returns

2335 -------

2336 x : array_like

2337 quantile corresponding to the lower tail probability q.

2338

2339 """

2340 args, loc, scale = self._parse_args(*args, **kwds)

2341 q, loc, scale = map(asarray, (q, loc, scale))

2342 args = tuple(map(asarray, args))

2343 _a, _b = self._get_support(*args)

2344 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)

2345 cond1 = (0 < q) & (q < 1)

2346 cond2 = cond0 & (q == 0)

2347 cond3 = cond0 & (q == 1)

2348 cond = cond0 & cond1

2349 output = np.full(shape(cond), fill_value=self.badvalue)

2350

2351 lower_bound = _a * scale + loc

2352 upper_bound = _b * scale + loc

2353 place(output, cond2, argsreduce(cond2, lower_bound)[0])

2354 place(output, cond3, argsreduce(cond3, upper_bound)[0])

2355

2356 if np.any(cond): # call only if at least 1 entry

2357 goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))

2358 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]

2359 place(output, cond, self._ppf(*goodargs) * scale + loc)

2360 if output.ndim == 0:

2361 return output[()]

2362 return output

2363

2364 def isf(self, q, *args, **kwds):

2365 """Inverse survival function (inverse of `sf`) at q of the given RV.

2366

2367 Parameters

2368 ----------

2369 q : array_like

2370 upper tail probability

2371 arg1, arg2, arg3,... : array_like

2372 The shape parameter(s) for the distribution (see docstring of the

2373 instance object for more information)

2374 loc : array_like, optional

2375 location parameter (default=0)

2376 scale : array_like, optional

2377 scale parameter (default=1)

2378

2379 Returns

2380 -------

2381 x : ndarray or scalar

2382 Quantile corresponding to the upper tail probability q.

2383

2384 """

2385 args, loc, scale = self._parse_args(*args, **kwds)

2386 q, loc, scale = map(asarray, (q, loc, scale))

2387 args = tuple(map(asarray, args))

2388 _a, _b = self._get_support(*args)

2389 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)

2390 cond1 = (0 < q) & (q < 1)

2391 cond2 = cond0 & (q == 1)

2392 cond3 = cond0 & (q == 0)

2393 cond = cond0 & cond1

2394 output = np.full(shape(cond), fill_value=self.badvalue)

2395

2396 lower_bound = _a * scale + loc

2397 upper_bound = _b * scale + loc

2398 place(output, cond2, argsreduce(cond2, lower_bound)[0])

2399 place(output, cond3, argsreduce(cond3, upper_bound)[0])

2400

2401 if np.any(cond):

2402 goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))

2403 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]

2404 place(output, cond, self._isf(*goodargs) * scale + loc)

2405 if output.ndim == 0:

2406 return output[()]

2407 return output

2408

2409 def _unpack_loc_scale(self, theta):

2410 try:

2411 loc = theta[-2]

2412 scale = theta[-1]

2413 args = tuple(theta[:-2])

2414 except IndexError as e:

2415 raise ValueError("Not enough input arguments.") from e

2416 return loc, scale, args

2417

2418 def _fitstart(self, data, args=None):

2419 """Starting point for fit (shape arguments + loc + scale)."""

2420 if args is None:

2421 args = (1.0,)*self.numargs

2422 loc, scale = self._fit_loc_scale_support(data, *args)

2423 return args + (loc, scale)

2424

2425 def _reduce_func(self, args, kwds, data=None):

2426 """

2427 Return the (possibly reduced) function to optimize in order to find MLE

2428 estimates for the .fit method.

2429 """

2430 # Convert fixed shape parameters to the standard numeric form: e.g. for

2431 # stats.beta, shapes='a, b'. To fix `a`, the caller can give a value

2432 # for `f0`, `fa` or 'fix_a'. The following converts the latter two

2433 # into the first (numeric) form.

2434 shapes = []

2435 if self.shapes:

2436 shapes = self.shapes.replace(',', ' ').split()

2437 for j, s in enumerate(shapes):

2438 key = 'f' + str(j)

2439 names = [key, 'f' + s, 'fix_' + s]

2440 val = _get_fixed_fit_value(kwds, names)

2441 if val is not None:

2442 kwds[key] = val

2443

2444 args = list(args)

2445 Nargs = len(args)

2446 fixedn = []

2447 names = ['f%d' % n for n in range(Nargs - 2)] + ['floc', 'fscale']

2448 x0 = []

2449 for n, key in enumerate(names):

2450 if key in kwds:

2451 fixedn.append(n)

2452 args[n] = kwds.pop(key)

2453 else:

2454 x0.append(args[n])

2455

2456 methods = {"mle", "mm"}

2457 method = kwds.pop('method', "mle").lower()

2458 if method == "mm":

2459 n_params = len(shapes) + 2 - len(fixedn)

2460 exponents = (np.arange(1, n_params+1))[:, np.newaxis]

2461 data_moments = np.sum(data[None, :]**exponents/len(data), axis=1)

2462

2463 def objective(theta, x):

2464 return self._moment_error(theta, x, data_moments)

2465 elif method == "mle":

2466 objective = self._penalized_nnlf

2467 else:

2468 raise ValueError("Method '{0}' not available; must be one of {1}"

2469 .format(method, methods))

2470

2471 if len(fixedn) == 0:

2472 func = objective

2473 restore = None

2474 else:

2475 if len(fixedn) == Nargs:

2476 raise ValueError(

2477 "All parameters fixed. There is nothing to optimize.")

2478

2479 def restore(args, theta):

2480 # Replace with theta for all numbers not in fixedn

2481 # This allows the non-fixed values to vary, but

2482 # we still call self.nnlf with all parameters.

2483 i = 0

2484 for n in range(Nargs):

2485 if n not in fixedn:

2486 args[n] = theta[i]

2487 i += 1

2488 return args

2489

2490 def func(theta, x):

2491 newtheta = restore(args[:], theta)

2492 return objective(newtheta, x)

2493

2494 return x0, func, restore, args

2495

2496 def _moment_error(self, theta, x, data_moments):

2497 loc, scale, args = self._unpack_loc_scale(theta)

2498 if not self._argcheck(*args) or scale <= 0:

2499 return inf

2500

2501 dist_moments = np.array([self.moment(i+1, *args, loc=loc, scale=scale)

2502 for i in range(len(data_moments))])

2503 if np.any(np.isnan(dist_moments)):

2504 raise ValueError("Method of moments encountered a non-finite "

2505 "distribution moment and cannot continue. "

2506 "Consider trying method='MLE'.")

2507

2508 return (((data_moments - dist_moments) /

2509 np.maximum(np.abs(data_moments), 1e-8))**2).sum()

2510

2511 def fit(self, data, *args, **kwds):

2512 """

2513 Return estimates of shape (if applicable), location, and scale

2514 parameters from data. The default estimation method is Maximum

2515 Likelihood Estimation (MLE), but Method of Moments (MM)

2516 is also available.

2517

2518 Starting estimates for

2519 the fit are given by input arguments; for any arguments not provided

2520 with starting estimates, ``self._fitstart(data)`` is called to generate

2521 such.

2522

2523 One can hold some parameters fixed to specific values by passing in

2524 keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters)

2525 and ``floc`` and ``fscale`` (for location and scale parameters,

2526 respectively).

2527

2528 Parameters

2529 ----------

2530 data : array_like

2531 Data to use in estimating the distribution parameters.

2532 arg1, arg2, arg3,... : floats, optional

2533 Starting value(s) for any shape-characterizing arguments (those not

2534 provided will be determined by a call to ``_fitstart(data)``).

2535 No default value.

2536 **kwds : floats, optional

2537 - `loc`: initial guess of the distribution's location parameter.

2538 - `scale`: initial guess of the distribution's scale parameter.

2539

2540 Special keyword arguments are recognized as holding certain

2541 parameters fixed:

2542

2543 - f0...fn : hold respective shape parameters fixed.

2544 Alternatively, shape parameters to fix can be specified by name.

2545 For example, if ``self.shapes == "a, b"``, ``fa`` and ``fix_a``

2546 are equivalent to ``f0``, and ``fb`` and ``fix_b`` are

2547 equivalent to ``f1``.

2548

2549 - floc : hold location parameter fixed to specified value.

2550

2551 - fscale : hold scale parameter fixed to specified value.

2552

2553 - optimizer : The optimizer to use.

2554 The optimizer must take ``func``,

2555 and starting position as the first two arguments,

2556 plus ``args`` (for extra arguments to pass to the

2557 function to be optimized) and ``disp=0`` to suppress

2558 output as keyword arguments.

2559

2560 - method : The method to use. The default is "MLE" (Maximum

2561 Likelihood Estimate); "MM" (Method of Moments)

2562 is also available.

2563

2564 Raises

2565 ------

2566 TypeError, ValueError

2567 If an input is invalid

2568 `~scipy.stats.FitError`

2569 If fitting fails or the fit produced would be invalid

2570

2571 Returns

2572 -------

2573 parameter_tuple : tuple of floats

2574 Estimates for any shape parameters (if applicable),

2575 followed by those for location and scale.

2576 For most random variables, shape statistics

2577 will be returned, but there are exceptions (e.g. ``norm``).

2578

2579 Notes

2580 -----

2581 With ``method="MLE"`` (default), the fit is computed by minimizing

2582 the negative log-likelihood function. A large, finite penalty

2583 (rather than infinite negative log-likelihood) is applied for

2584 observations beyond the support of the distribution.

2585

2586 With ``method="MM"``, the fit is computed by minimizing the L2 norm

2587 of the relative errors between the first *k* raw (about zero) data

2588 moments and the corresponding distribution moments, where *k* is the

2589 number of non-fixed parameters.

2590 More precisely, the objective function is::

2591

2592 (((data_moments - dist_moments)

2593 / np.maximum(np.abs(data_moments), 1e-8))**2).sum()

2594

2595 where the constant ``1e-8`` avoids division by zero in case of

2596 vanishing data moments. Typically, this error norm can be reduced to

2597 zero.

2598 Note that the standard method of moments can produce parameters for

2599 which some data are outside the support of the fitted distribution;

2600 this implementation does nothing to prevent this.

2601

2602 For either method,

2603 the returned answer is not guaranteed to be globally optimal; it

2604 may only be locally optimal, or the optimization may fail altogether.

2605 If the data contain any of ``np.nan``, ``np.inf``, or ``-np.inf``,

2606 the `fit` method will raise a ``RuntimeError``.

2607

2608 Examples

2609 --------

2610

2611 Generate some data to fit: draw random variates from the `beta`

2612 distribution

2613

2614 >>> from scipy.stats import beta

2615 >>> a, b = 1., 2.

2616 >>> x = beta.rvs(a, b, size=1000)

2617

2618 Now we can fit all four parameters (``a``, ``b``, ``loc``

2619 and ``scale``):

2620

2621 >>> a1, b1, loc1, scale1 = beta.fit(x)

2622

2623 We can also use some prior knowledge about the dataset: let's keep

2624 ``loc`` and ``scale`` fixed:

2625

2626 >>> a1, b1, loc1, scale1 = beta.fit(x, floc=0, fscale=1)

2627 >>> loc1, scale1

2628 (0, 1)

2629

2630 We can also keep shape parameters fixed by using ``f``-keywords. To

2631 keep the zero-th shape parameter ``a`` equal 1, use ``f0=1`` or,

2632 equivalently, ``fa=1``:

2633

2634 >>> a1, b1, loc1, scale1 = beta.fit(x, fa=1, floc=0, fscale=1)

2635 >>> a1

2636 1

2637

2638 Not all distributions return estimates for the shape parameters.

2639 ``norm`` for example just returns estimates for location and scale:

2640

2641 >>> from scipy.stats import norm

2642 >>> x = norm.rvs(a, b, size=1000, random_state=123)

2643 >>> loc1, scale1 = norm.fit(x)

2644 >>> loc1, scale1

2645 (0.92087172783841631, 2.0015750750324668)

2646 """

2647 data = np.asarray(data)

2648 method = kwds.get('method', "mle").lower()

2649

2650 # memory for method of moments

2651 Narg = len(args)

2652 if Narg > self.numargs:

2653 raise TypeError("Too many input arguments.")

2654

2655 if not np.isfinite(data).all():

2656 raise ValueError("The data contains non-finite values.")

2657

2658 start = [None]*2

2659 if (Narg < self.numargs) or not ('loc' in kwds and

2660 'scale' in kwds):

2661 # get distribution specific starting locations

2662 start = self._fitstart(data)

2663 args += start[Narg:-2]

2664 loc = kwds.pop('loc', start[-2])

2665 scale = kwds.pop('scale', start[-1])

2666 args += (loc, scale)

2667 x0, func, restore, args = self._reduce_func(args, kwds, data=data)

2668 optimizer = kwds.pop('optimizer', optimize.fmin)

2669 # convert string to function in scipy.optimize

2670 optimizer = _fit_determine_optimizer(optimizer)

2671 # by now kwds must be empty, since everybody took what they needed

2672 if kwds:

2673 raise TypeError("Unknown arguments: %s." % kwds)

2674

2675 # In some cases, method of moments can be done with fsolve/root

2676 # instead of an optimizer, but sometimes no solution exists,

2677 # especially when the user fixes parameters. Minimizing the sum

2678 # of squares of the error generalizes to these cases.

2679 vals = optimizer(func, x0, args=(ravel(data),), disp=0)

2680 obj = func(vals, data)

2681

2682 if restore is not None:

2683 vals = restore(args, vals)

2684 vals = tuple(vals)

2685

2686 loc, scale, shapes = self._unpack_loc_scale(vals)

2687 if not (np.all(self._argcheck(*shapes)) and scale > 0):

2688 raise FitError("Optimization converged to parameters that are "

2689 "outside the range allowed by the distribution.")

2690

2691 if method == 'mm':

2692 if not np.isfinite(obj):

2693 raise FitError("Optimization failed: either a data moment "

2694 "or fitted distribution moment is "

2695 "non-finite.")

2696

2697 return vals

2698

2699 def _fit_loc_scale_support(self, data, *args):

2700 """Estimate loc and scale parameters from data accounting for support.

2701

2702 Parameters

2703 ----------

2704 data : array_like

2705 Data to fit.

2706 arg1, arg2, arg3,... : array_like

2707 The shape parameter(s) for the distribution (see docstring of the

2708 instance object for more information).

2709

2710 Returns

2711 -------

2712 Lhat : float

2713 Estimated location parameter for the data.

2714 Shat : float

2715 Estimated scale parameter for the data.

2716

2717 """

2718 data = np.asarray(data)

2719

2720 # Estimate location and scale according to the method of moments.

2721 loc_hat, scale_hat = self.fit_loc_scale(data, *args)

2722

2723 # Compute the support according to the shape parameters.

2724 self._argcheck(*args)

2725 _a, _b = self._get_support(*args)

2726 a, b = _a, _b

2727 support_width = b - a

2728

2729 # If the support is empty then return the moment-based estimates.

2730 if support_width <= 0:

2731 return loc_hat, scale_hat

2732

2733 # Compute the proposed support according to the loc and scale

2734 # estimates.

2735 a_hat = loc_hat + a * scale_hat

2736 b_hat = loc_hat + b * scale_hat

2737

2738 # Use the moment-based estimates if they are compatible with the data.

2739 data_a = np.min(data)

2740 data_b = np.max(data)

2741 if a_hat < data_a and data_b < b_hat:

2742 return loc_hat, scale_hat

2743

2744 # Otherwise find other estimates that are compatible with the data.

2745 data_width = data_b - data_a

2746 rel_margin = 0.1

2747 margin = data_width * rel_margin

2748

2749 # For a finite interval, both the location and scale

2750 # should have interesting values.

2751 if support_width < np.inf:

2752 loc_hat = (data_a - a) - margin

2753 scale_hat = (data_width + 2 * margin) / support_width

2754 return loc_hat, scale_hat

2755

2756 # For a one-sided interval, use only an interesting location parameter.

2757 if a > -np.inf:

2758 return (data_a - a) - margin, 1

2759 elif b < np.inf:

2760 return (data_b - b) + margin, 1

2761 else:

2762 raise RuntimeError

2763

2764 def fit_loc_scale(self, data, *args):

2765 """

2766 Estimate loc and scale parameters from data using 1st and 2nd moments.

2767

2768 Parameters

2769 ----------

2770 data : array_like

2771 Data to fit.

2772 arg1, arg2, arg3,... : array_like

2773 The shape parameter(s) for the distribution (see docstring of the

2774 instance object for more information).

2775

2776 Returns

2777 -------

2778 Lhat : float

2779 Estimated location parameter for the data.

2780 Shat : float

2781 Estimated scale parameter for the data.

2782

2783 """

2784 mu, mu2 = self.stats(*args, **{'moments': 'mv'})

2785 tmp = asarray(data)

2786 muhat = tmp.mean()

2787 mu2hat = tmp.var()

2788 Shat = sqrt(mu2hat / mu2)

2789 Lhat = muhat - Shat*mu

2790 if not np.isfinite(Lhat):

2791 Lhat = 0

2792 if not (np.isfinite(Shat) and (0 < Shat)):

2793 Shat = 1

2794 return Lhat, Shat

2795

2796 def _entropy(self, *args):

2797 def integ(x):

2798 val = self._pdf(x, *args)

2799 return entr(val)

2800

2801 # upper limit is often inf, so suppress warnings when integrating

2802 _a, _b = self._get_support(*args)

2803 with np.errstate(over='ignore'):

2804 h = integrate.quad(integ, _a, _b)[0]

2805

2806 if not np.isnan(h):

2807 return h

2808 else:

2809 # try with different limits if integration problems

2810 low, upp = self.ppf([1e-10, 1. - 1e-10], *args)

2811 if np.isinf(_b):

2812 upper = upp

2813 else:

2814 upper = _b

2815 if np.isinf(_a):

2816 lower = low

2817 else:

2818 lower = _a

2819 return integrate.quad(integ, lower, upper)[0]

2820

2821 def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None,

2822 conditional=False, **kwds):

2823 """Calculate expected value of a function with respect to the

2824 distribution by numerical integration.

2825

2826 The expected value of a function ``f(x)`` with respect to a

2827 distribution ``dist`` is defined as::

2828

2829 ub

2830 E[f(x)] = Integral(f(x) * dist.pdf(x)),

2831 lb

2832

2833 where ``ub`` and ``lb`` are arguments and ``x`` has the ``dist.pdf(x)``

2834 distribution. If the bounds ``lb`` and ``ub`` correspond to the

2835 support of the distribution, e.g. ``[-inf, inf]`` in the default

2836 case, then the integral is the unrestricted expectation of ``f(x)``.

2837 Also, the function ``f(x)`` may be defined such that ``f(x)`` is ``0``

2838 outside a finite interval in which case the expectation is

2839 calculated within the finite range ``[lb, ub]``.

2840

2841 Parameters

2842 ----------

2843 func : callable, optional

2844 Function for which integral is calculated. Takes only one argument.

2845 The default is the identity mapping f(x) = x.

2846 args : tuple, optional

2847 Shape parameters of the distribution.

2848 loc : float, optional

2849 Location parameter (default=0).

2850 scale : float, optional

2851 Scale parameter (default=1).

2852 lb, ub : scalar, optional

2853 Lower and upper bound for integration. Default is set to the

2854 support of the distribution.

2855 conditional : bool, optional

2856 If True, the integral is corrected by the conditional probability

2857 of the integration interval. The return value is the expectation

2858 of the function, conditional on being in the given interval.

2859 Default is False.

2860

2861 Additional keyword arguments are passed to the integration routine.

2862

2863 Returns

2864 -------

2865 expect : float

2866 The calculated expected value.

2867

2868 Notes

2869 -----

2870 The integration behavior of this function is inherited from

2871 `scipy.integrate.quad`. Neither this function nor

2872 `scipy.integrate.quad` can verify whether the integral exists or is

2873 finite. For example ``cauchy(0).mean()`` returns ``np.nan`` and

2874 ``cauchy(0).expect()`` returns ``0.0``.

2875

2876 Likewise, the accuracy of results is not verified by the function.

2877 `scipy.integrate.quad` is typically reliable for integrals that are

2878 numerically favorable, but it is not guaranteed to converge

2879 to a correct value for all possible intervals and integrands. This

2880 function is provided for convenience; for critical applications,

2881 check results against other integration methods.

2882

2883 The function is not vectorized.

2884

2885 Examples

2886 --------

2887

2888 To understand the effect of the bounds of integration consider

2889

2890 >>> from scipy.stats import expon

2891 >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0)

2892 0.6321205588285578

2893

2894 This is close to

2895

2896 >>> expon(1).cdf(2.0) - expon(1).cdf(0.0)

2897 0.6321205588285577

2898

2899 If ``conditional=True``

2900

2901 >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0, conditional=True)

2902 1.0000000000000002

2903

2904 The slight deviation from 1 is due to numerical integration.

2905

2906 The integrand can be treated as a complex-valued function

2907 by passing ``complex_func=True`` to `scipy.integrate.quad` .

2908

2909 >>> import numpy as np

2910 >>> from scipy.stats import vonmises

2911 >>> res = vonmises(loc=2, kappa=1).expect(lambda x: np.exp(1j*x),

2912 ... complex_func=True)

2913 >>> res

2914 (-0.18576377217422957+0.40590124735052263j)

2915

2916 >>> np.angle(res) # location of the (circular) distribution

2917 2.0

2918

2919 """

2920 lockwds = {'loc': loc,

2921 'scale': scale}

2922 self._argcheck(*args)

2923 _a, _b = self._get_support(*args)

2924 if func is None:

2925 def fun(x, *args):

2926 return x * self.pdf(x, *args, **lockwds)

2927 else:

2928 def fun(x, *args):

2929 return func(x) * self.pdf(x, *args, **lockwds)

2930 if lb is None:

2931 lb = loc + _a * scale

2932 if ub is None:

2933 ub = loc + _b * scale

2934

2935 cdf_bounds = self.cdf([lb, ub], *args, **lockwds)

2936 invfac = cdf_bounds[1] - cdf_bounds[0]

2937

2938 kwds['args'] = args

2939

2940 # split interval to help integrator w/ infinite support; see gh-8928

2941 alpha = 0.05 # split body from tails at probability mass `alpha`

2942 inner_bounds = np.array([alpha, 1-alpha])

2943 cdf_inner_bounds = cdf_bounds[0] + invfac * inner_bounds

2944 c, d = loc + self._ppf(cdf_inner_bounds, *args) * scale

2945

2946 # Do not silence warnings from integration.

2947 lbc = integrate.quad(fun, lb, c, **kwds)[0]

2948 cd = integrate.quad(fun, c, d, **kwds)[0]

2949 dub = integrate.quad(fun, d, ub, **kwds)[0]

2950 vals = (lbc + cd + dub)

2951

2952 if conditional:

2953 vals /= invfac

2954 return np.array(vals)[()] # make it a numpy scalar like other methods

2955

2956 def _param_info(self):

2957 shape_info = self._shape_info()

2958 loc_info = _ShapeInfo("loc", False, (-np.inf, np.inf), (False, False))

2959 scale_info = _ShapeInfo("scale", False, (0, np.inf), (False, False))

2960 param_info = shape_info + [loc_info, scale_info]

2961 return param_info

2962

2963# Helpers for the discrete distributions

2964def _drv2_moment(self, n, *args):

2965 """Non-central moment of discrete distribution."""

2966 def fun(x):

2967 return np.power(x, n) * self._pmf(x, *args)

2968

2969 _a, _b = self._get_support(*args)

2970 return _expect(fun, _a, _b, self.ppf(0.5, *args), self.inc)

2971

2972

2973def _drv2_ppfsingle(self, q, *args): # Use basic bisection algorithm

2974 _a, _b = self._get_support(*args)

2975 b = _b

2976 a = _a

2977 if isinf(b): # Be sure ending point is > q

2978 b = int(max(100*q, 10))

2979 while 1:

2980 if b >= _b:

2981 qb = 1.0

2982 break

2983 qb = self._cdf(b, *args)

2984 if (qb < q):

2985 b += 10

2986 else:

2987 break

2988 else:

2989 qb = 1.0

2990 if isinf(a): # be sure starting point < q

2991 a = int(min(-100*q, -10))

2992 while 1:

2993 if a <= _a:

2994 qb = 0.0

2995 break

2996 qa = self._cdf(a, *args)

2997 if (qa > q):

2998 a -= 10

2999 else:

3000 break

3001 else:

3002 qa = self._cdf(a, *args)

3003

3004 while 1:

3005 if (qa == q):

3006 return a

3007 if (qb == q):

3008 return b

3009 if b <= a+1:

3010 if qa > q:

3011 return a

3012 else:

3013 return b

3014 c = int((a+b)/2.0)

3015 qc = self._cdf(c, *args)

3016 if (qc < q):

3017 if a != c:

3018 a = c

3019 else:

3020 raise RuntimeError('updating stopped, endless loop')

3021 qa = qc

3022 elif (qc > q):

3023 if b != c:

3024 b = c

3025 else:

3026 raise RuntimeError('updating stopped, endless loop')

3027 qb = qc

3028 else:

3029 return c

3030

3031

3032# Must over-ride one of _pmf or _cdf or pass in

3033# x_k, p(x_k) lists in initialization

3034

3035

3036class rv_discrete(rv_generic):

3037 """A generic discrete random variable class meant for subclassing.

3038

3039 `rv_discrete` is a base class to construct specific distribution classes

3040 and instances for discrete random variables. It can also be used

3041 to construct an arbitrary distribution defined by a list of support

3042 points and corresponding probabilities.

3043

3044 Parameters

3045 ----------

3046 a : float, optional

3047 Lower bound of the support of the distribution, default: 0

3048 b : float, optional

3049 Upper bound of the support of the distribution, default: plus infinity

3050 moment_tol : float, optional

3051 The tolerance for the generic calculation of moments.

3052 values : tuple of two array_like, optional

3053 ``(xk, pk)`` where ``xk`` are integers and ``pk`` are the non-zero

3054 probabilities between 0 and 1 with ``sum(pk) = 1``. ``xk``

3055 and ``pk`` must have the same shape.

3056 inc : integer, optional

3057 Increment for the support of the distribution.

3058 Default is 1. (other values have not been tested)

3059 badvalue : float, optional

3060 The value in a result arrays that indicates a value that for which

3061 some argument restriction is violated, default is np.nan.

3062 name : str, optional

3063 The name of the instance. This string is used to construct the default

3064 example for distributions.

3065 longname : str, optional

3066 This string is used as part of the first line of the docstring returned

3067 when a subclass has no docstring of its own. Note: `longname` exists

3068 for backwards compatibility, do not use for new subclasses.

3069 shapes : str, optional

3070 The shape of the distribution. For example "m, n" for a distribution

3071 that takes two integers as the two shape arguments for all its methods

3072 If not provided, shape parameters will be inferred from

3073 the signatures of the private methods, ``_pmf`` and ``_cdf`` of

3074 the instance.

3075 extradoc : str, optional, deprecated

3076 This string is used as the last part of the docstring returned when a

3077 subclass has no docstring of its own. Note: `extradoc` exists for

3078 backwards compatibility and will be removed in SciPy 1.11.0, do not

3079 use for new subclasses.

3080 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

3081 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

3082 singleton is used.

3083 If `seed` is an int, a new ``RandomState`` instance is used,

3084 seeded with `seed`.

3085 If `seed` is already a ``Generator`` or ``RandomState`` instance then

3086 that instance is used.

3087

3088 Methods

3089 -------

3090 rvs

3091 pmf

3092 logpmf

3093 cdf

3094 logcdf

3095 sf

3096 logsf

3097 ppf

3098 isf

3099 moment

3100 stats

3101 entropy

3102 expect

3103 median

3104 mean

3105 std

3106 var

3107 interval

3108 __call__

3109 support

3110

3111 Notes

3112 -----

3113 This class is similar to `rv_continuous`. Whether a shape parameter is

3114 valid is decided by an ``_argcheck`` method (which defaults to checking

3115 that its arguments are strictly positive.)

3116 The main differences are:

3117

3118 - the support of the distribution is a set of integers

3119 - instead of the probability density function, ``pdf`` (and the

3120 corresponding private ``_pdf``), this class defines the

3121 *probability mass function*, `pmf` (and the corresponding

3122 private ``_pmf``.)

3123 - scale parameter is not defined.

3124

3125 To create a new discrete distribution, we would do the following:

3126

3127 >>> from scipy.stats import rv_discrete

3128 >>> class poisson_gen(rv_discrete):

3129 ... "Poisson distribution"

3130 ... def _pmf(self, k, mu):

3131 ... return exp(-mu) * mu**k / factorial(k)

3132

3133 and create an instance::

3134

3135 >>> poisson = poisson_gen(name="poisson")

3136

3137 Note that above we defined the Poisson distribution in the standard form.

3138 Shifting the distribution can be done by providing the ``loc`` parameter

3139 to the methods of the instance. For example, ``poisson.pmf(x, mu, loc)``

3140 delegates the work to ``poisson._pmf(x-loc, mu)``.

3141

3142 **Discrete distributions from a list of probabilities**

3143

3144 Alternatively, you can construct an arbitrary discrete rv defined

3145 on a finite set of values ``xk`` with ``Prob{X=xk} = pk`` by using the

3146 ``values`` keyword argument to the `rv_discrete` constructor.

3147

3148 Examples

3149 --------

3150 Custom made discrete distribution:

3151

3152 >>> import numpy as np

3153 >>> from scipy import stats

3154 >>> xk = np.arange(7)

3155 >>> pk = (0.1, 0.2, 0.3, 0.1, 0.1, 0.0, 0.2)

3156 >>> custm = stats.rv_discrete(name='custm', values=(xk, pk))

3157 >>>

3158 >>> import matplotlib.pyplot as plt

3159 >>> fig, ax = plt.subplots(1, 1)

3160 >>> ax.plot(xk, custm.pmf(xk), 'ro', ms=12, mec='r')

3161 >>> ax.vlines(xk, 0, custm.pmf(xk), colors='r', lw=4)

3162 >>> plt.show()

3163

3164 Random number generation:

3165

3166 >>> R = custm.rvs(size=100)

3167

3168 """

3169 def __new__(cls, a=0, b=inf, name=None, badvalue=None,

3170 moment_tol=1e-8, values=None, inc=1, longname=None,

3171 shapes=None, extradoc=None, seed=None):

3172

3173 if values is not None:

3174 # dispatch to a subclass

3175 return super(rv_discrete, cls).__new__(rv_sample)

3176 else:

3177 # business as usual

3178 return super(rv_discrete, cls).__new__(cls)

3179

3180 def __init__(self, a=0, b=inf, name=None, badvalue=None,

3181 moment_tol=1e-8, values=None, inc=1, longname=None,

3182 shapes=None, extradoc=None, seed=None):

3183

3184 super().__init__(seed)

3185

3186 if extradoc is not None:

3187 warnings.warn("extradoc is deprecated and will be removed in "

3188 "SciPy 1.11.0", DeprecationWarning)

3189

3190 # cf generic freeze

3191 self._ctor_param = dict(

3192 a=a, b=b, name=name, badvalue=badvalue,

3193 moment_tol=moment_tol, values=values, inc=inc,

3194 longname=longname, shapes=shapes, extradoc=extradoc, seed=seed)

3195

3196 if badvalue is None:

3197 badvalue = nan

3198 self.badvalue = badvalue

3199 self.a = a

3200 self.b = b

3201 self.moment_tol = moment_tol

3202 self.inc = inc

3203 self.shapes = shapes

3204

3205 if values is not None:

3206 raise ValueError("rv_discrete.__init__(..., values != None, ...)")

3207

3208 self._construct_argparser(meths_to_inspect=[self._pmf, self._cdf],

3209 locscale_in='loc=0',

3210 # scale=1 for discrete RVs

3211 locscale_out='loc, 1')

3212 self._attach_methods()

3213 self._construct_docstrings(name, longname, extradoc)

3214

3215 def __getstate__(self):

3216 dct = self.__dict__.copy()

3217 # these methods will be remade in __setstate__

3218 attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs",

3219 "_cdfvec", "_ppfvec", "generic_moment"]

3220 [dct.pop(attr, None) for attr in attrs]

3221 return dct

3222

3223 def _attach_methods(self):

3224 """Attaches dynamically created methods to the rv_discrete instance."""

3225 self._cdfvec = vectorize(self._cdf_single, otypes='d')

3226 self.vecentropy = vectorize(self._entropy)

3227

3228 # _attach_methods is responsible for calling _attach_argparser_methods

3229 self._attach_argparser_methods()

3230

3231 # nin correction needs to be after we know numargs

3232 # correct nin for generic moment vectorization

3233 _vec_generic_moment = vectorize(_drv2_moment, otypes='d')

3234 _vec_generic_moment.nin = self.numargs + 2

3235 self.generic_moment = types.MethodType(_vec_generic_moment, self)

3236

3237 # correct nin for ppf vectorization

3238 _vppf = vectorize(_drv2_ppfsingle, otypes='d')

3239 _vppf.nin = self.numargs + 2

3240 self._ppfvec = types.MethodType(_vppf, self)

3241

3242 # now that self.numargs is defined, we can adjust nin

3243 self._cdfvec.nin = self.numargs + 1

3244

3245 def _construct_docstrings(self, name, longname, extradoc):

3246 if name is None:

3247 name = 'Distribution'

3248 self.name = name

3249 self.extradoc = extradoc

3250

3251 # generate docstring for subclass instances

3252 if longname is None:

3253 if name[0] in ['aeiouAEIOU']:

3254 hstr = "An "

3255 else:

3256 hstr = "A "

3257 longname = hstr + name

3258

3259 if sys.flags.optimize < 2:

3260 # Skip adding docstrings if interpreter is run with -OO

3261 if self.__doc__ is None:

3262 self._construct_default_doc(longname=longname,

3263 extradoc=extradoc,

3264 docdict=docdict_discrete,

3265 discrete='discrete')

3266 else:

3267 dct = dict(distdiscrete)

3268 self._construct_doc(docdict_discrete, dct.get(self.name))

3269

3270 # discrete RV do not have the scale parameter, remove it

3271 self.__doc__ = self.__doc__.replace(

3272 '\n scale : array_like, '

3273 'optional\n scale parameter (default=1)', '')

3274

3275 def _updated_ctor_param(self):

3276 """Return the current version of _ctor_param, possibly updated by user.

3277

3278 Used by freezing.

3279 Keep this in sync with the signature of __init__.

3280 """

3281 dct = self._ctor_param.copy()

3282 dct['a'] = self.a

3283 dct['b'] = self.b

3284 dct['badvalue'] = self.badvalue

3285 dct['moment_tol'] = self.moment_tol

3286 dct['inc'] = self.inc

3287 dct['name'] = self.name

3288 dct['shapes'] = self.shapes

3289 dct['extradoc'] = self.extradoc

3290 return dct

3291

3292 def _nonzero(self, k, *args):

3293 return floor(k) == k

3294

3295 def _pmf(self, k, *args):

3296 return self._cdf(k, *args) - self._cdf(k-1, *args)

3297

3298 def _logpmf(self, k, *args):

3299 return log(self._pmf(k, *args))

3300

3301 def _logpxf(self, k, *args):

3302 # continuous distributions have PDF, discrete have PMF, but sometimes

3303 # the distinction doesn't matter. This lets us use `_logpxf` for both

3304 # discrete and continuous distributions.

3305 return self._logpmf(k, *args)

3306

3307 def _unpack_loc_scale(self, theta):

3308 try:

3309 loc = theta[-1]

3310 scale = 1

3311 args = tuple(theta[:-1])

3312 except IndexError as e:

3313 raise ValueError("Not enough input arguments.") from e

3314 return loc, scale, args

3315

3316 def _cdf_single(self, k, *args):

3317 _a, _b = self._get_support(*args)

3318 m = arange(int(_a), k+1)

3319 return np.sum(self._pmf(m, *args), axis=0)

3320

3321 def _cdf(self, x, *args):

3322 k = floor(x)

3323 return self._cdfvec(k, *args)

3324

3325 # generic _logcdf, _sf, _logsf, _ppf, _isf, _rvs defined in rv_generic

3326

3327 def rvs(self, *args, **kwargs):

3328 """Random variates of given type.

3329

3330 Parameters

3331 ----------

3332 arg1, arg2, arg3,... : array_like

3333 The shape parameter(s) for the distribution (see docstring of the

3334 instance object for more information).

3335 loc : array_like, optional

3336 Location parameter (default=0).

3337 size : int or tuple of ints, optional

3338 Defining number of random variates (Default is 1). Note that `size`

3339 has to be given as keyword, not as positional argument.

3340 random_state : {None, int, `numpy.random.Generator`,

3341 `numpy.random.RandomState`}, optional

3342

3343 If `random_state` is None (or `np.random`), the

3344 `numpy.random.RandomState` singleton is used.

3345 If `random_state` is an int, a new ``RandomState`` instance is

3346 used, seeded with `random_state`.

3347 If `random_state` is already a ``Generator`` or ``RandomState``

3348 instance, that instance is used.

3349

3350 Returns

3351 -------

3352 rvs : ndarray or scalar

3353 Random variates of given `size`.

3354

3355 """

3356 kwargs['discrete'] = True

3357 return super().rvs(*args, **kwargs)

3358

3359 def pmf(self, k, *args, **kwds):

3360 """Probability mass function at k of the given RV.

3361

3362 Parameters

3363 ----------

3364 k : array_like

3365 Quantiles.

3366 arg1, arg2, arg3,... : array_like

3367 The shape parameter(s) for the distribution (see docstring of the

3368 instance object for more information)

3369 loc : array_like, optional

3370 Location parameter (default=0).

3371

3372 Returns

3373 -------

3374 pmf : array_like

3375 Probability mass function evaluated at k

3376

3377 """

3378 args, loc, _ = self._parse_args(*args, **kwds)

3379 k, loc = map(asarray, (k, loc))

3380 args = tuple(map(asarray, args))

3381 _a, _b = self._get_support(*args)

3382 k = asarray((k-loc))

3383 cond0 = self._argcheck(*args)

3384 cond1 = (k >= _a) & (k <= _b)

3385 if not isinstance(self, rv_sample):

3386 cond1 = cond1 & self._nonzero(k, *args)

3387 cond = cond0 & cond1

3388 output = zeros(shape(cond), 'd')

3389 place(output, (1-cond0) + np.isnan(k), self.badvalue)

3390 if np.any(cond):

3391 goodargs = argsreduce(cond, *((k,)+args))

3392 place(output, cond, np.clip(self._pmf(*goodargs), 0, 1))

3393 if output.ndim == 0:

3394 return output[()]

3395 return output

3396

3397 def logpmf(self, k, *args, **kwds):

3398 """Log of the probability mass function at k of the given RV.

3399

3400 Parameters

3401 ----------

3402 k : array_like

3403 Quantiles.

3404 arg1, arg2, arg3,... : array_like

3405 The shape parameter(s) for the distribution (see docstring of the

3406 instance object for more information).

3407 loc : array_like, optional

3408 Location parameter. Default is 0.

3409

3410 Returns

3411 -------

3412 logpmf : array_like

3413 Log of the probability mass function evaluated at k.

3414

3415 """

3416 args, loc, _ = self._parse_args(*args, **kwds)

3417 k, loc = map(asarray, (k, loc))

3418 args = tuple(map(asarray, args))

3419 _a, _b = self._get_support(*args)

3420 k = asarray((k-loc))

3421 cond0 = self._argcheck(*args)

3422 cond1 = (k >= _a) & (k <= _b)

3423 if not isinstance(self, rv_sample):

3424 cond1 = cond1 & self._nonzero(k, *args)

3425 cond = cond0 & cond1

3426 output = empty(shape(cond), 'd')

3427 output.fill(NINF)

3428 place(output, (1-cond0) + np.isnan(k), self.badvalue)

3429 if np.any(cond):

3430 goodargs = argsreduce(cond, *((k,)+args))

3431 place(output, cond, self._logpmf(*goodargs))

3432 if output.ndim == 0:

3433 return output[()]

3434 return output

3435

3436 def cdf(self, k, *args, **kwds):

3437 """Cumulative distribution function of the given RV.

3438

3439 Parameters

3440 ----------

3441 k : array_like, int

3442 Quantiles.

3443 arg1, arg2, arg3,... : array_like

3444 The shape parameter(s) for the distribution (see docstring of the

3445 instance object for more information).

3446 loc : array_like, optional

3447 Location parameter (default=0).

3448

3449 Returns

3450 -------

3451 cdf : ndarray

3452 Cumulative distribution function evaluated at `k`.

3453

3454 """

3455 args, loc, _ = self._parse_args(*args, **kwds)

3456 k, loc = map(asarray, (k, loc))

3457 args = tuple(map(asarray, args))

3458 _a, _b = self._get_support(*args)

3459 k = asarray((k-loc))

3460 cond0 = self._argcheck(*args)

3461 cond1 = (k >= _a) & (k < _b)

3462 cond2 = (k >= _b)

3463 cond3 = np.isneginf(k)

3464 cond = cond0 & cond1 & np.isfinite(k)

3465

3466 output = zeros(shape(cond), 'd')

3467 place(output, cond2*(cond0 == cond0), 1.0)

3468 place(output, cond3*(cond0 == cond0), 0.0)

3469 place(output, (1-cond0) + np.isnan(k), self.badvalue)

3470

3471 if np.any(cond):

3472 goodargs = argsreduce(cond, *((k,)+args))

3473 place(output, cond, np.clip(self._cdf(*goodargs), 0, 1))

3474 if output.ndim == 0:

3475 return output[()]

3476 return output

3477

3478 def logcdf(self, k, *args, **kwds):

3479 """Log of the cumulative distribution function at k of the given RV.

3480

3481 Parameters

3482 ----------

3483 k : array_like, int

3484 Quantiles.

3485 arg1, arg2, arg3,... : array_like

3486 The shape parameter(s) for the distribution (see docstring of the

3487 instance object for more information).

3488 loc : array_like, optional

3489 Location parameter (default=0).

3490

3491 Returns

3492 -------

3493 logcdf : array_like

3494 Log of the cumulative distribution function evaluated at k.

3495

3496 """

3497 args, loc, _ = self._parse_args(*args, **kwds)

3498 k, loc = map(asarray, (k, loc))

3499 args = tuple(map(asarray, args))

3500 _a, _b = self._get_support(*args)

3501 k = asarray((k-loc))

3502 cond0 = self._argcheck(*args)

3503 cond1 = (k >= _a) & (k < _b)

3504 cond2 = (k >= _b)

3505 cond = cond0 & cond1

3506 output = empty(shape(cond), 'd')

3507 output.fill(NINF)

3508 place(output, (1-cond0) + np.isnan(k), self.badvalue)

3509 place(output, cond2*(cond0 == cond0), 0.0)

3510

3511 if np.any(cond):

3512 goodargs = argsreduce(cond, *((k,)+args))

3513 place(output, cond, self._logcdf(*goodargs))

3514 if output.ndim == 0:

3515 return output[()]

3516 return output

3517

3518 def sf(self, k, *args, **kwds):

3519 """Survival function (1 - `cdf`) at k of the given RV.

3520

3521 Parameters

3522 ----------

3523 k : array_like

3524 Quantiles.

3525 arg1, arg2, arg3,... : array_like

3526 The shape parameter(s) for the distribution (see docstring of the

3527 instance object for more information).

3528 loc : array_like, optional

3529 Location parameter (default=0).

3530

3531 Returns

3532 -------

3533 sf : array_like

3534 Survival function evaluated at k.

3535

3536 """

3537 args, loc, _ = self._parse_args(*args, **kwds)

3538 k, loc = map(asarray, (k, loc))

3539 args = tuple(map(asarray, args))

3540 _a, _b = self._get_support(*args)

3541 k = asarray(k-loc)

3542 cond0 = self._argcheck(*args)

3543 cond1 = (k >= _a) & (k < _b)

3544 cond2 = ((k < _a) | np.isneginf(k)) & cond0

3545 cond = cond0 & cond1 & np.isfinite(k)

3546 output = zeros(shape(cond), 'd')

3547 place(output, (1-cond0) + np.isnan(k), self.badvalue)

3548 place(output, cond2, 1.0)

3549 if np.any(cond):

3550 goodargs = argsreduce(cond, *((k,)+args))

3551 place(output, cond, np.clip(self._sf(*goodargs), 0, 1))

3552 if output.ndim == 0:

3553 return output[()]

3554 return output

3555

3556 def logsf(self, k, *args, **kwds):

3557 """Log of the survival function of the given RV.

3558

3559 Returns the log of the "survival function," defined as 1 - `cdf`,

3560 evaluated at `k`.

3561

3562 Parameters

3563 ----------

3564 k : array_like

3565 Quantiles.

3566 arg1, arg2, arg3,... : array_like

3567 The shape parameter(s) for the distribution (see docstring of the

3568 instance object for more information).

3569 loc : array_like, optional

3570 Location parameter (default=0).

3571

3572 Returns

3573 -------

3574 logsf : ndarray

3575 Log of the survival function evaluated at `k`.

3576

3577 """

3578 args, loc, _ = self._parse_args(*args, **kwds)

3579 k, loc = map(asarray, (k, loc))

3580 args = tuple(map(asarray, args))

3581 _a, _b = self._get_support(*args)

3582 k = asarray(k-loc)

3583 cond0 = self._argcheck(*args)

3584 cond1 = (k >= _a) & (k < _b)

3585 cond2 = (k < _a) & cond0

3586 cond = cond0 & cond1

3587 output = empty(shape(cond), 'd')

3588 output.fill(NINF)

3589 place(output, (1-cond0) + np.isnan(k), self.badvalue)

3590 place(output, cond2, 0.0)

3591 if np.any(cond):

3592 goodargs = argsreduce(cond, *((k,)+args))

3593 place(output, cond, self._logsf(*goodargs))

3594 if output.ndim == 0:

3595 return output[()]

3596 return output

3597

3598 def ppf(self, q, *args, **kwds):

3599 """Percent point function (inverse of `cdf`) at q of the given RV.

3600

3601 Parameters

3602 ----------

3603 q : array_like

3604 Lower tail probability.

3605 arg1, arg2, arg3,... : array_like

3606 The shape parameter(s) for the distribution (see docstring of the

3607 instance object for more information).

3608 loc : array_like, optional

3609 Location parameter (default=0).

3610

3611 Returns

3612 -------

3613 k : array_like

3614 Quantile corresponding to the lower tail probability, q.

3615

3616 """

3617 args, loc, _ = self._parse_args(*args, **kwds)

3618 q, loc = map(asarray, (q, loc))

3619 args = tuple(map(asarray, args))

3620 _a, _b = self._get_support(*args)

3621 cond0 = self._argcheck(*args) & (loc == loc)

3622 cond1 = (q > 0) & (q < 1)

3623 cond2 = (q == 1) & cond0

3624 cond = cond0 & cond1

3625 output = np.full(shape(cond), fill_value=self.badvalue, dtype='d')

3626 # output type 'd' to handle nin and inf

3627 place(output, (q == 0)*(cond == cond), _a-1 + loc)

3628 place(output, cond2, _b + loc)

3629 if np.any(cond):

3630 goodargs = argsreduce(cond, *((q,)+args+(loc,)))

3631 loc, goodargs = goodargs[-1], goodargs[:-1]

3632 place(output, cond, self._ppf(*goodargs) + loc)

3633

3634 if output.ndim == 0:

3635 return output[()]

3636 return output

3637

3638 def isf(self, q, *args, **kwds):

3639 """Inverse survival function (inverse of `sf`) at q of the given RV.

3640

3641 Parameters

3642 ----------

3643 q : array_like

3644 Upper tail probability.

3645 arg1, arg2, arg3,... : array_like

3646 The shape parameter(s) for the distribution (see docstring of the

3647 instance object for more information).

3648 loc : array_like, optional

3649 Location parameter (default=0).

3650

3651 Returns

3652 -------

3653 k : ndarray or scalar

3654 Quantile corresponding to the upper tail probability, q.

3655

3656 """

3657 args, loc, _ = self._parse_args(*args, **kwds)

3658 q, loc = map(asarray, (q, loc))

3659 args = tuple(map(asarray, args))

3660 _a, _b = self._get_support(*args)

3661 cond0 = self._argcheck(*args) & (loc == loc)

3662 cond1 = (q > 0) & (q < 1)

3663 cond2 = (q == 1) & cond0

3664 cond3 = (q == 0) & cond0

3665 cond = cond0 & cond1

3666

3667 # same problem as with ppf; copied from ppf and changed

3668 output = np.full(shape(cond), fill_value=self.badvalue, dtype='d')

3669 # output type 'd' to handle nin and inf

3670 lower_bound = _a - 1 + loc

3671 upper_bound = _b + loc

3672 place(output, cond2*(cond == cond), lower_bound)

3673 place(output, cond3*(cond == cond), upper_bound)

3674

3675 # call place only if at least 1 valid argument

3676 if np.any(cond):

3677 goodargs = argsreduce(cond, *((q,)+args+(loc,)))

3678 loc, goodargs = goodargs[-1], goodargs[:-1]

3679 # PB same as ticket 766

3680 place(output, cond, self._isf(*goodargs) + loc)

3681

3682 if output.ndim == 0:

3683 return output[()]

3684 return output

3685

3686 def _entropy(self, *args):

3687 if hasattr(self, 'pk'):

3688 return stats.entropy(self.pk)

3689 else:

3690 _a, _b = self._get_support(*args)

3691 return _expect(lambda x: entr(self.pmf(x, *args)),

3692 _a, _b, self.ppf(0.5, *args), self.inc)

3693

3694 def expect(self, func=None, args=(), loc=0, lb=None, ub=None,

3695 conditional=False, maxcount=1000, tolerance=1e-10, chunksize=32):

3696 """

3697 Calculate expected value of a function with respect to the distribution

3698 for discrete distribution by numerical summation.

3699

3700 Parameters

3701 ----------

3702 func : callable, optional

3703 Function for which the expectation value is calculated.

3704 Takes only one argument.

3705 The default is the identity mapping f(k) = k.

3706 args : tuple, optional

3707 Shape parameters of the distribution.

3708 loc : float, optional

3709 Location parameter.

3710 Default is 0.

3711 lb, ub : int, optional

3712 Lower and upper bound for the summation, default is set to the

3713 support of the distribution, inclusive (``lb <= k <= ub``).

3714 conditional : bool, optional

3715 If true then the expectation is corrected by the conditional

3716 probability of the summation interval. The return value is the

3717 expectation of the function, `func`, conditional on being in

3718 the given interval (k such that ``lb <= k <= ub``).

3719 Default is False.

3720 maxcount : int, optional

3721 Maximal number of terms to evaluate (to avoid an endless loop for

3722 an infinite sum). Default is 1000.

3723 tolerance : float, optional

3724 Absolute tolerance for the summation. Default is 1e-10.

3725 chunksize : int, optional

3726 Iterate over the support of a distributions in chunks of this size.

3727 Default is 32.

3728

3729 Returns

3730 -------

3731 expect : float

3732 Expected value.

3733

3734 Notes

3735 -----

3736 For heavy-tailed distributions, the expected value may or

3737 may not exist,

3738 depending on the function, `func`. If it does exist, but the

3739 sum converges

3740 slowly, the accuracy of the result may be rather low. For instance, for

3741 ``zipf(4)``, accuracy for mean, variance in example is only 1e-5.

3742 increasing `maxcount` and/or `chunksize` may improve the result,

3743 but may also make zipf very slow.

3744

3745 The function is not vectorized.

3746

3747 """

3748 if func is None:

3749 def fun(x):

3750 # loc and args from outer scope

3751 return (x+loc)*self._pmf(x, *args)

3752 else:

3753 def fun(x):

3754 # loc and args from outer scope

3755 return func(x+loc)*self._pmf(x, *args)

3756 # used pmf because _pmf does not check support in randint and there

3757 # might be problems(?) with correct self.a, self.b at this stage maybe

3758 # not anymore, seems to work now with _pmf

3759

3760 _a, _b = self._get_support(*args)

3761 if lb is None:

3762 lb = _a

3763 else:

3764 lb = lb - loc # convert bound for standardized distribution

3765 if ub is None:

3766 ub = _b

3767 else:

3768 ub = ub - loc # convert bound for standardized distribution

3769 if conditional:

3770 invfac = self.sf(lb-1, *args) - self.sf(ub, *args)

3771 else:

3772 invfac = 1.0

3773

3774 if isinstance(self, rv_sample):

3775 res = self._expect(fun, lb, ub)

3776 return res / invfac

3777

3778 # iterate over the support, starting from the median

3779 x0 = self.ppf(0.5, *args)

3780 res = _expect(fun, lb, ub, x0, self.inc, maxcount, tolerance, chunksize)

3781 return res / invfac

3782

3783 def _param_info(self):

3784 shape_info = self._shape_info()

3785 loc_info = _ShapeInfo("loc", True, (-np.inf, np.inf), (False, False))

3786 param_info = shape_info + [loc_info]

3787 return param_info

3788

3789

3790def _expect(fun, lb, ub, x0, inc, maxcount=1000, tolerance=1e-10,

3791 chunksize=32):

3792 """Helper for computing the expectation value of `fun`."""

3793 # short-circuit if the support size is small enough

3794 if (ub - lb) <= chunksize:

3795 supp = np.arange(lb, ub+1, inc)

3796 vals = fun(supp)

3797 return np.sum(vals)

3798

3799 # otherwise, iterate starting from x0

3800 if x0 < lb:

3801 x0 = lb

3802 if x0 > ub:

3803 x0 = ub

3804

3805 count, tot = 0, 0.

3806 # iterate over [x0, ub] inclusive

3807 for x in _iter_chunked(x0, ub+1, chunksize=chunksize, inc=inc):

3808 count += x.size

3809 delta = np.sum(fun(x))

3810 tot += delta

3811 if abs(delta) < tolerance * x.size:

3812 break

3813 if count > maxcount:

3814 warnings.warn('expect(): sum did not converge', RuntimeWarning)

3815 return tot

3816

3817 # iterate over [lb, x0)

3818 for x in _iter_chunked(x0-1, lb-1, chunksize=chunksize, inc=-inc):

3819 count += x.size

3820 delta = np.sum(fun(x))

3821 tot += delta

3822 if abs(delta) < tolerance * x.size:

3823 break

3824 if count > maxcount:

3825 warnings.warn('expect(): sum did not converge', RuntimeWarning)

3826 break

3827

3828 return tot

3829

3830

3831def _iter_chunked(x0, x1, chunksize=4, inc=1):

3832 """Iterate from x0 to x1 in chunks of chunksize and steps inc.

3833

3834 x0 must be finite, x1 need not be. In the latter case, the iterator is

3835 infinite.

3836 Handles both x0 < x1 and x0 > x1. In the latter case, iterates downwards

3837 (make sure to set inc < 0.)

3838

3839 >>> [x for x in _iter_chunked(2, 5, inc=2)]

3840 [array([2, 4])]

3841 >>> [x for x in _iter_chunked(2, 11, inc=2)]

3842 [array([2, 4, 6, 8]), array([10])]

3843 >>> [x for x in _iter_chunked(2, -5, inc=-2)]

3844 [array([ 2, 0, -2, -4])]

3845 >>> [x for x in _iter_chunked(2, -9, inc=-2)]

3846 [array([ 2, 0, -2, -4]), array([-6, -8])]

3847

3848 """

3849 if inc == 0:

3850 raise ValueError('Cannot increment by zero.')

3851 if chunksize <= 0:

3852 raise ValueError('Chunk size must be positive; got %s.' % chunksize)

3853

3854 s = 1 if inc > 0 else -1

3855 stepsize = abs(chunksize * inc)

3856

3857 x = x0

3858 while (x - x1) * inc < 0:

3859 delta = min(stepsize, abs(x - x1))

3860 step = delta * s

3861 supp = np.arange(x, x + step, inc)

3862 x += step

3863 yield supp

3864

3865

3866class rv_sample(rv_discrete):

3867 """A 'sample' discrete distribution defined by the support and values.

3868

3869 The ctor ignores most of the arguments, only needs the `values` argument.

3870 """

3871 def __init__(self, a=0, b=inf, name=None, badvalue=None,

3872 moment_tol=1e-8, values=None, inc=1, longname=None,

3873 shapes=None, extradoc=None, seed=None):

3874

3875 super(rv_discrete, self).__init__(seed)

3876

3877 if extradoc is not None:

3878 warnings.warn("extradoc is deprecated and will be removed in "

3879 "SciPy 1.11.0", DeprecationWarning)

3880

3881 if values is None:

3882 raise ValueError("rv_sample.__init__(..., values=None,...)")

3883

3884 # cf generic freeze

3885 self._ctor_param = dict(

3886 a=a, b=b, name=name, badvalue=badvalue,

3887 moment_tol=moment_tol, values=values, inc=inc,

3888 longname=longname, shapes=shapes, extradoc=extradoc, seed=seed)

3889

3890 if badvalue is None:

3891 badvalue = nan

3892 self.badvalue = badvalue

3893 self.moment_tol = moment_tol

3894 self.inc = inc

3895 self.shapes = shapes

3896 self.vecentropy = self._entropy

3897

3898 xk, pk = values

3899

3900 if np.shape(xk) != np.shape(pk):

3901 raise ValueError("xk and pk must have the same shape.")

3902 if np.less(pk, 0.0).any():

3903 raise ValueError("All elements of pk must be non-negative.")

3904 if not np.allclose(np.sum(pk), 1):

3905 raise ValueError("The sum of provided pk is not 1.")

3906

3907 indx = np.argsort(np.ravel(xk))

3908 self.xk = np.take(np.ravel(xk), indx, 0)

3909 self.pk = np.take(np.ravel(pk), indx, 0)

3910 self.a = self.xk[0]

3911 self.b = self.xk[-1]

3912

3913 self.qvals = np.cumsum(self.pk, axis=0)

3914

3915 self.shapes = ' ' # bypass inspection

3916

3917 self._construct_argparser(meths_to_inspect=[self._pmf],

3918 locscale_in='loc=0',

3919 # scale=1 for discrete RVs

3920 locscale_out='loc, 1')

3921

3922 self._attach_methods()

3923

3924 self._construct_docstrings(name, longname, extradoc)

3925

3926 def __getstate__(self):

3927 dct = self.__dict__.copy()

3928

3929 # these methods will be remade in rv_generic.__setstate__,

3930 # which calls rv_generic._attach_methods

3931 attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs"]

3932 [dct.pop(attr, None) for attr in attrs]

3933

3934 return dct

3935

3936 def _attach_methods(self):

3937 """Attaches dynamically created argparser methods."""

3938 self._attach_argparser_methods()

3939

3940 def _get_support(self, *args):

3941 """Return the support of the (unscaled, unshifted) distribution.

3942

3943 Parameters

3944 ----------

3945 arg1, arg2, ... : array_like

3946 The shape parameter(s) for the distribution (see docstring of the

3947 instance object for more information).

3948

3949 Returns

3950 -------

3951 a, b : numeric (float, or int or +/-np.inf)

3952 end-points of the distribution's support.

3953 """

3954 return self.a, self.b

3955

3956 def _pmf(self, x):

3957 return np.select([x == k for k in self.xk],

3958 [np.broadcast_arrays(p, x)[0] for p in self.pk], 0)

3959

3960 def _cdf(self, x):

3961 xx, xxk = np.broadcast_arrays(x[:, None], self.xk)

3962 indx = np.argmax(xxk > xx, axis=-1) - 1

3963 return self.qvals[indx]

3964

3965 def _ppf(self, q):

3966 qq, sqq = np.broadcast_arrays(q[..., None], self.qvals)

3967 indx = argmax(sqq >= qq, axis=-1)

3968 return self.xk[indx]

3969

3970 def _rvs(self, size=None, random_state=None):

3971 # Need to define it explicitly, otherwise .rvs() with size=None

3972 # fails due to explicit broadcasting in _ppf

3973 U = random_state.uniform(size=size)

3974 if size is None:

3975 U = np.array(U, ndmin=1)

3976 Y = self._ppf(U)[0]

3977 else:

3978 Y = self._ppf(U)

3979 return Y

3980

3981 def _entropy(self):

3982 return stats.entropy(self.pk)

3983

3984 def generic_moment(self, n):

3985 n = asarray(n)

3986 return np.sum(self.xk**n[np.newaxis, ...] * self.pk, axis=0)

3987

3988 def _expect(self, fun, lb, ub, *args, **kwds):

3989 # ignore all args, just do a brute force summation

3990 supp = self.xk[(lb <= self.xk) & (self.xk <= ub)]

3991 vals = fun(supp)

3992 return np.sum(vals)

3993

3994

3995def _check_shape(argshape, size):

3996 """

3997 This is a utility function used by `_rvs()` in the class geninvgauss_gen.

3998 It compares the tuple argshape to the tuple size.

3999

4000 Parameters

4001 ----------

4002 argshape : tuple of integers

4003 Shape of the arguments.

4004 size : tuple of integers or integer

4005 Size argument of rvs().

4006

4007 Returns

4008 -------

4009 The function returns two tuples, scalar_shape and bc.

4010

4011 scalar_shape : tuple

4012 Shape to which the 1-d array of random variates returned by

4013 _rvs_scalar() is converted when it is copied into the

4014 output array of _rvs().

4015

4016 bc : tuple of booleans

4017 bc is an tuple the same length as size. bc[j] is True if the data

4018 associated with that index is generated in one call of _rvs_scalar().

4019

4020 """

4021 scalar_shape = []

4022 bc = []

4023 for argdim, sizedim in zip_longest(argshape[::-1], size[::-1],

4024 fillvalue=1):

4025 if sizedim > argdim or (argdim == sizedim == 1):

4026 scalar_shape.append(sizedim)

4027 bc.append(True)

4028 else:

4029 bc.append(False)

4030 return tuple(scalar_shape[::-1]), tuple(bc[::-1])

4031

4032

4033def get_distribution_names(namespace_pairs, rv_base_class):

4034 """Collect names of statistical distributions and their generators.

4035

4036 Parameters

4037 ----------

4038 namespace_pairs : sequence

4039 A snapshot of (name, value) pairs in the namespace of a module.

4040 rv_base_class : class

4041 The base class of random variable generator classes in a module.

4042

4043 Returns

4044 -------

4045 distn_names : list of strings

4046 Names of the statistical distributions.

4047 distn_gen_names : list of strings

4048 Names of the generators of the statistical distributions.

4049 Note that these are not simply the names of the statistical

4050 distributions, with a _gen suffix added.

4051

4052 """

4053 distn_names = []

4054 distn_gen_names = []

4055 for name, value in namespace_pairs:

4056 if name.startswith('_'):

4057 continue

4058 if name.endswith('_gen') and issubclass(value, rv_base_class):

4059 distn_gen_names.append(name)

4060 if isinstance(value, rv_base_class):

4061 distn_names.append(name)

4062 return distn_names, distn_gen_names

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_distn_infrastructure.py: 24%

1563 statements