Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_distn_infrastructure.py: 24%

1563 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1# 

2# Author: Travis Oliphant 2002-2011 with contributions from 

3# SciPy Developers 2004-2011 

4# 

5from scipy._lib._util import getfullargspec_no_self as _getfullargspec 

6 

7import sys 

8import keyword 

9import re 

10import types 

11import warnings 

12from itertools import zip_longest 

13 

14from scipy._lib import doccer 

15from ._distr_params import distcont, distdiscrete 

16from scipy._lib._util import check_random_state 

17 

18from scipy.special import comb, entr 

19 

20# for root finding for continuous distribution ppf, and max likelihood 

21# estimation 

22from scipy import optimize 

23 

24# for functions of continuous distributions (e.g. moments, entropy, cdf) 

25from scipy import integrate 

26 

27# to approximate the pdf of a continuous distribution given its cdf 

28from scipy._lib._finite_differences import _derivative 

29 

30# for scipy.stats.entropy. Attempts to import just that function or file 

31# have cause import problems 

32from scipy import stats 

33 

34from numpy import (arange, putmask, ravel, ones, shape, ndarray, zeros, floor, 

35 logical_and, log, sqrt, place, argmax, vectorize, asarray, 

36 nan, inf, isinf, NINF, empty) 

37 

38import numpy as np 

39from ._constants import _XMAX 

40from scipy.stats._warnings_errors import FitError 

41 

42# These are the docstring parts used for substitution in specific 

43# distribution docstrings 

44 

45docheaders = {'methods': """\nMethods\n-------\n""", 

46 'notes': """\nNotes\n-----\n""", 

47 'examples': """\nExamples\n--------\n"""} 

48 

49_doc_rvs = """\ 

50rvs(%(shapes)s, loc=0, scale=1, size=1, random_state=None) 

51 Random variates. 

52""" 

53_doc_pdf = """\ 

54pdf(x, %(shapes)s, loc=0, scale=1) 

55 Probability density function. 

56""" 

57_doc_logpdf = """\ 

58logpdf(x, %(shapes)s, loc=0, scale=1) 

59 Log of the probability density function. 

60""" 

61_doc_pmf = """\ 

62pmf(k, %(shapes)s, loc=0, scale=1) 

63 Probability mass function. 

64""" 

65_doc_logpmf = """\ 

66logpmf(k, %(shapes)s, loc=0, scale=1) 

67 Log of the probability mass function. 

68""" 

69_doc_cdf = """\ 

70cdf(x, %(shapes)s, loc=0, scale=1) 

71 Cumulative distribution function. 

72""" 

73_doc_logcdf = """\ 

74logcdf(x, %(shapes)s, loc=0, scale=1) 

75 Log of the cumulative distribution function. 

76""" 

77_doc_sf = """\ 

78sf(x, %(shapes)s, loc=0, scale=1) 

79 Survival function (also defined as ``1 - cdf``, but `sf` is sometimes more accurate). 

80""" 

81_doc_logsf = """\ 

82logsf(x, %(shapes)s, loc=0, scale=1) 

83 Log of the survival function. 

84""" 

85_doc_ppf = """\ 

86ppf(q, %(shapes)s, loc=0, scale=1) 

87 Percent point function (inverse of ``cdf`` --- percentiles). 

88""" 

89_doc_isf = """\ 

90isf(q, %(shapes)s, loc=0, scale=1) 

91 Inverse survival function (inverse of ``sf``). 

92""" 

93_doc_moment = """\ 

94moment(order, %(shapes)s, loc=0, scale=1) 

95 Non-central moment of the specified order. 

96""" 

97_doc_stats = """\ 

98stats(%(shapes)s, loc=0, scale=1, moments='mv') 

99 Mean('m'), variance('v'), skew('s'), and/or kurtosis('k'). 

100""" 

101_doc_entropy = """\ 

102entropy(%(shapes)s, loc=0, scale=1) 

103 (Differential) entropy of the RV. 

104""" 

105_doc_fit = """\ 

106fit(data) 

107 Parameter estimates for generic data. 

108 See `scipy.stats.rv_continuous.fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html#scipy.stats.rv_continuous.fit>`__ for detailed documentation of the 

109 keyword arguments. 

110""" 

111_doc_expect = """\ 

112expect(func, args=(%(shapes_)s), loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds) 

113 Expected value of a function (of one argument) with respect to the distribution. 

114""" 

115_doc_expect_discrete = """\ 

116expect(func, args=(%(shapes_)s), loc=0, lb=None, ub=None, conditional=False) 

117 Expected value of a function (of one argument) with respect to the distribution. 

118""" 

119_doc_median = """\ 

120median(%(shapes)s, loc=0, scale=1) 

121 Median of the distribution. 

122""" 

123_doc_mean = """\ 

124mean(%(shapes)s, loc=0, scale=1) 

125 Mean of the distribution. 

126""" 

127_doc_var = """\ 

128var(%(shapes)s, loc=0, scale=1) 

129 Variance of the distribution. 

130""" 

131_doc_std = """\ 

132std(%(shapes)s, loc=0, scale=1) 

133 Standard deviation of the distribution. 

134""" 

135_doc_interval = """\ 

136interval(confidence, %(shapes)s, loc=0, scale=1) 

137 Confidence interval with equal areas around the median. 

138""" 

139_doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf, 

140 _doc_logpdf, _doc_cdf, _doc_logcdf, _doc_sf, 

141 _doc_logsf, _doc_ppf, _doc_isf, _doc_moment, 

142 _doc_stats, _doc_entropy, _doc_fit, 

143 _doc_expect, _doc_median, 

144 _doc_mean, _doc_var, _doc_std, _doc_interval]) 

145 

146_doc_default_longsummary = """\ 

147As an instance of the `rv_continuous` class, `%(name)s` object inherits from it 

148a collection of generic methods (see below for the full list), 

149and completes them with details specific for this particular distribution. 

150""" 

151 

152_doc_default_frozen_note = """ 

153Alternatively, the object may be called (as a function) to fix the shape, 

154location, and scale parameters returning a "frozen" continuous RV object: 

155 

156rv = %(name)s(%(shapes)s, loc=0, scale=1) 

157 - Frozen RV object with the same methods but holding the given shape, 

158 location, and scale fixed. 

159""" 

160_doc_default_example = """\ 

161Examples 

162-------- 

163>>> import numpy as np 

164>>> from scipy.stats import %(name)s 

165>>> import matplotlib.pyplot as plt 

166>>> fig, ax = plt.subplots(1, 1) 

167 

168Calculate the first four moments: 

169 

170%(set_vals_stmt)s 

171>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk') 

172 

173Display the probability density function (``pdf``): 

174 

175>>> x = np.linspace(%(name)s.ppf(0.01, %(shapes)s), 

176... %(name)s.ppf(0.99, %(shapes)s), 100) 

177>>> ax.plot(x, %(name)s.pdf(x, %(shapes)s), 

178... 'r-', lw=5, alpha=0.6, label='%(name)s pdf') 

179 

180Alternatively, the distribution object can be called (as a function) 

181to fix the shape, location and scale parameters. This returns a "frozen" 

182RV object holding the given parameters fixed. 

183 

184Freeze the distribution and display the frozen ``pdf``: 

185 

186>>> rv = %(name)s(%(shapes)s) 

187>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') 

188 

189Check accuracy of ``cdf`` and ``ppf``: 

190 

191>>> vals = %(name)s.ppf([0.001, 0.5, 0.999], %(shapes)s) 

192>>> np.allclose([0.001, 0.5, 0.999], %(name)s.cdf(vals, %(shapes)s)) 

193True 

194 

195Generate random numbers: 

196 

197>>> r = %(name)s.rvs(%(shapes)s, size=1000) 

198 

199And compare the histogram: 

200 

201>>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2) 

202>>> ax.set_xlim([x[0], x[-1]]) 

203>>> ax.legend(loc='best', frameon=False) 

204>>> plt.show() 

205 

206""" 

207 

208_doc_default_locscale = """\ 

209The probability density above is defined in the "standardized" form. To shift 

210and/or scale the distribution use the ``loc`` and ``scale`` parameters. 

211Specifically, ``%(name)s.pdf(x, %(shapes)s, loc, scale)`` is identically 

212equivalent to ``%(name)s.pdf(y, %(shapes)s) / scale`` with 

213``y = (x - loc) / scale``. Note that shifting the location of a distribution 

214does not make it a "noncentral" distribution; noncentral generalizations of 

215some distributions are available in separate classes. 

216""" 

217 

218_doc_default = ''.join([_doc_default_longsummary, 

219 _doc_allmethods, 

220 '\n', 

221 _doc_default_example]) 

222 

223_doc_default_before_notes = ''.join([_doc_default_longsummary, 

224 _doc_allmethods]) 

225 

226docdict = { 

227 'rvs': _doc_rvs, 

228 'pdf': _doc_pdf, 

229 'logpdf': _doc_logpdf, 

230 'cdf': _doc_cdf, 

231 'logcdf': _doc_logcdf, 

232 'sf': _doc_sf, 

233 'logsf': _doc_logsf, 

234 'ppf': _doc_ppf, 

235 'isf': _doc_isf, 

236 'stats': _doc_stats, 

237 'entropy': _doc_entropy, 

238 'fit': _doc_fit, 

239 'moment': _doc_moment, 

240 'expect': _doc_expect, 

241 'interval': _doc_interval, 

242 'mean': _doc_mean, 

243 'std': _doc_std, 

244 'var': _doc_var, 

245 'median': _doc_median, 

246 'allmethods': _doc_allmethods, 

247 'longsummary': _doc_default_longsummary, 

248 'frozennote': _doc_default_frozen_note, 

249 'example': _doc_default_example, 

250 'default': _doc_default, 

251 'before_notes': _doc_default_before_notes, 

252 'after_notes': _doc_default_locscale 

253} 

254 

255# Reuse common content between continuous and discrete docs, change some 

256# minor bits. 

257docdict_discrete = docdict.copy() 

258 

259docdict_discrete['pmf'] = _doc_pmf 

260docdict_discrete['logpmf'] = _doc_logpmf 

261docdict_discrete['expect'] = _doc_expect_discrete 

262_doc_disc_methods = ['rvs', 'pmf', 'logpmf', 'cdf', 'logcdf', 'sf', 'logsf', 

263 'ppf', 'isf', 'stats', 'entropy', 'expect', 'median', 

264 'mean', 'var', 'std', 'interval'] 

265for obj in _doc_disc_methods: 

266 docdict_discrete[obj] = docdict_discrete[obj].replace(', scale=1', '') 

267 

268_doc_disc_methods_err_varname = ['cdf', 'logcdf', 'sf', 'logsf'] 

269for obj in _doc_disc_methods_err_varname: 

270 docdict_discrete[obj] = docdict_discrete[obj].replace('(x, ', '(k, ') 

271 

272docdict_discrete.pop('pdf') 

273docdict_discrete.pop('logpdf') 

274 

275_doc_allmethods = ''.join([docdict_discrete[obj] for obj in _doc_disc_methods]) 

276docdict_discrete['allmethods'] = docheaders['methods'] + _doc_allmethods 

277 

278docdict_discrete['longsummary'] = _doc_default_longsummary.replace( 

279 'rv_continuous', 'rv_discrete') 

280 

281_doc_default_frozen_note = """ 

282Alternatively, the object may be called (as a function) to fix the shape and 

283location parameters returning a "frozen" discrete RV object: 

284 

285rv = %(name)s(%(shapes)s, loc=0) 

286 - Frozen RV object with the same methods but holding the given shape and 

287 location fixed. 

288""" 

289docdict_discrete['frozennote'] = _doc_default_frozen_note 

290 

291_doc_default_discrete_example = """\ 

292Examples 

293-------- 

294>>> import numpy as np 

295>>> from scipy.stats import %(name)s 

296>>> import matplotlib.pyplot as plt 

297>>> fig, ax = plt.subplots(1, 1) 

298 

299Calculate the first four moments: 

300 

301%(set_vals_stmt)s 

302>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk') 

303 

304Display the probability mass function (``pmf``): 

305 

306>>> x = np.arange(%(name)s.ppf(0.01, %(shapes)s), 

307... %(name)s.ppf(0.99, %(shapes)s)) 

308>>> ax.plot(x, %(name)s.pmf(x, %(shapes)s), 'bo', ms=8, label='%(name)s pmf') 

309>>> ax.vlines(x, 0, %(name)s.pmf(x, %(shapes)s), colors='b', lw=5, alpha=0.5) 

310 

311Alternatively, the distribution object can be called (as a function) 

312to fix the shape and location. This returns a "frozen" RV object holding 

313the given parameters fixed. 

314 

315Freeze the distribution and display the frozen ``pmf``: 

316 

317>>> rv = %(name)s(%(shapes)s) 

318>>> ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, 

319... label='frozen pmf') 

320>>> ax.legend(loc='best', frameon=False) 

321>>> plt.show() 

322 

323Check accuracy of ``cdf`` and ``ppf``: 

324 

325>>> prob = %(name)s.cdf(x, %(shapes)s) 

326>>> np.allclose(x, %(name)s.ppf(prob, %(shapes)s)) 

327True 

328 

329Generate random numbers: 

330 

331>>> r = %(name)s.rvs(%(shapes)s, size=1000) 

332""" 

333 

334 

335_doc_default_discrete_locscale = """\ 

336The probability mass function above is defined in the "standardized" form. 

337To shift distribution use the ``loc`` parameter. 

338Specifically, ``%(name)s.pmf(k, %(shapes)s, loc)`` is identically 

339equivalent to ``%(name)s.pmf(k - loc, %(shapes)s)``. 

340""" 

341 

342docdict_discrete['example'] = _doc_default_discrete_example 

343docdict_discrete['after_notes'] = _doc_default_discrete_locscale 

344 

345_doc_default_before_notes = ''.join([docdict_discrete['longsummary'], 

346 docdict_discrete['allmethods']]) 

347docdict_discrete['before_notes'] = _doc_default_before_notes 

348 

349_doc_default_disc = ''.join([docdict_discrete['longsummary'], 

350 docdict_discrete['allmethods'], 

351 docdict_discrete['frozennote'], 

352 docdict_discrete['example']]) 

353docdict_discrete['default'] = _doc_default_disc 

354 

355# clean up all the separate docstring elements, we do not need them anymore 

356for obj in [s for s in dir() if s.startswith('_doc_')]: 

357 exec('del ' + obj) 

358del obj 

359 

360 

361def _moment(data, n, mu=None): 

362 if mu is None: 

363 mu = data.mean() 

364 return ((data - mu)**n).mean() 

365 

366 

367def _moment_from_stats(n, mu, mu2, g1, g2, moment_func, args): 

368 if (n == 0): 

369 return 1.0 

370 elif (n == 1): 

371 if mu is None: 

372 val = moment_func(1, *args) 

373 else: 

374 val = mu 

375 elif (n == 2): 

376 if mu2 is None or mu is None: 

377 val = moment_func(2, *args) 

378 else: 

379 val = mu2 + mu*mu 

380 elif (n == 3): 

381 if g1 is None or mu2 is None or mu is None: 

382 val = moment_func(3, *args) 

383 else: 

384 mu3 = g1 * np.power(mu2, 1.5) # 3rd central moment 

385 val = mu3+3*mu*mu2+mu*mu*mu # 3rd non-central moment 

386 elif (n == 4): 

387 if g1 is None or g2 is None or mu2 is None or mu is None: 

388 val = moment_func(4, *args) 

389 else: 

390 mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment 

391 mu3 = g1*np.power(mu2, 1.5) # 3rd central moment 

392 val = mu4+4*mu*mu3+6*mu*mu*mu2+mu*mu*mu*mu 

393 else: 

394 val = moment_func(n, *args) 

395 

396 return val 

397 

398 

399def _skew(data): 

400 """ 

401 skew is third central moment / variance**(1.5) 

402 """ 

403 data = np.ravel(data) 

404 mu = data.mean() 

405 m2 = ((data - mu)**2).mean() 

406 m3 = ((data - mu)**3).mean() 

407 return m3 / np.power(m2, 1.5) 

408 

409 

410def _kurtosis(data): 

411 """kurtosis is fourth central moment / variance**2 - 3.""" 

412 data = np.ravel(data) 

413 mu = data.mean() 

414 m2 = ((data - mu)**2).mean() 

415 m4 = ((data - mu)**4).mean() 

416 return m4 / m2**2 - 3 

417 

418 

419def _fit_determine_optimizer(optimizer): 

420 if not callable(optimizer) and isinstance(optimizer, str): 

421 if not optimizer.startswith('fmin_'): 

422 optimizer = "fmin_"+optimizer 

423 if optimizer == 'fmin_': 

424 optimizer = 'fmin' 

425 try: 

426 optimizer = getattr(optimize, optimizer) 

427 except AttributeError as e: 

428 raise ValueError("%s is not a valid optimizer" % optimizer) from e 

429 return optimizer 

430 

431 

432# Frozen RV class 

433class rv_frozen: 

434 

435 def __init__(self, dist, *args, **kwds): 

436 self.args = args 

437 self.kwds = kwds 

438 

439 # create a new instance 

440 self.dist = dist.__class__(**dist._updated_ctor_param()) 

441 

442 shapes, _, _ = self.dist._parse_args(*args, **kwds) 

443 self.a, self.b = self.dist._get_support(*shapes) 

444 

445 @property 

446 def random_state(self): 

447 return self.dist._random_state 

448 

449 @random_state.setter 

450 def random_state(self, seed): 

451 self.dist._random_state = check_random_state(seed) 

452 

453 def cdf(self, x): 

454 return self.dist.cdf(x, *self.args, **self.kwds) 

455 

456 def logcdf(self, x): 

457 return self.dist.logcdf(x, *self.args, **self.kwds) 

458 

459 def ppf(self, q): 

460 return self.dist.ppf(q, *self.args, **self.kwds) 

461 

462 def isf(self, q): 

463 return self.dist.isf(q, *self.args, **self.kwds) 

464 

465 def rvs(self, size=None, random_state=None): 

466 kwds = self.kwds.copy() 

467 kwds.update({'size': size, 'random_state': random_state}) 

468 return self.dist.rvs(*self.args, **kwds) 

469 

470 def sf(self, x): 

471 return self.dist.sf(x, *self.args, **self.kwds) 

472 

473 def logsf(self, x): 

474 return self.dist.logsf(x, *self.args, **self.kwds) 

475 

476 def stats(self, moments='mv'): 

477 kwds = self.kwds.copy() 

478 kwds.update({'moments': moments}) 

479 return self.dist.stats(*self.args, **kwds) 

480 

481 def median(self): 

482 return self.dist.median(*self.args, **self.kwds) 

483 

484 def mean(self): 

485 return self.dist.mean(*self.args, **self.kwds) 

486 

487 def var(self): 

488 return self.dist.var(*self.args, **self.kwds) 

489 

490 def std(self): 

491 return self.dist.std(*self.args, **self.kwds) 

492 

493 def moment(self, order=None, **kwds): 

494 return self.dist.moment(order, *self.args, **self.kwds, **kwds) 

495 

496 def entropy(self): 

497 return self.dist.entropy(*self.args, **self.kwds) 

498 

499 def interval(self, confidence=None, **kwds): 

500 return self.dist.interval(confidence, *self.args, **self.kwds, **kwds) 

501 

502 def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds): 

503 # expect method only accepts shape parameters as positional args 

504 # hence convert self.args, self.kwds, also loc/scale 

505 # See the .expect method docstrings for the meaning of 

506 # other parameters. 

507 a, loc, scale = self.dist._parse_args(*self.args, **self.kwds) 

508 if isinstance(self.dist, rv_discrete): 

509 return self.dist.expect(func, a, loc, lb, ub, conditional, **kwds) 

510 else: 

511 return self.dist.expect(func, a, loc, scale, lb, ub, 

512 conditional, **kwds) 

513 

514 def support(self): 

515 return self.dist.support(*self.args, **self.kwds) 

516 

517 

518class rv_discrete_frozen(rv_frozen): 

519 

520 def pmf(self, k): 

521 return self.dist.pmf(k, *self.args, **self.kwds) 

522 

523 def logpmf(self, k): # No error 

524 return self.dist.logpmf(k, *self.args, **self.kwds) 

525 

526 

527class rv_continuous_frozen(rv_frozen): 

528 

529 def pdf(self, x): 

530 return self.dist.pdf(x, *self.args, **self.kwds) 

531 

532 def logpdf(self, x): 

533 return self.dist.logpdf(x, *self.args, **self.kwds) 

534 

535 

536def argsreduce(cond, *args): 

537 """Clean arguments to: 

538 

539 1. Ensure all arguments are iterable (arrays of dimension at least one 

540 2. If cond != True and size > 1, ravel(args[i]) where ravel(condition) is 

541 True, in 1D. 

542 

543 Return list of processed arguments. 

544 

545 Examples 

546 -------- 

547 >>> import numpy as np 

548 >>> rng = np.random.default_rng() 

549 >>> A = rng.random((4, 5)) 

550 >>> B = 2 

551 >>> C = rng.random((1, 5)) 

552 >>> cond = np.ones(A.shape) 

553 >>> [A1, B1, C1] = argsreduce(cond, A, B, C) 

554 >>> A1.shape 

555 (4, 5) 

556 >>> B1.shape 

557 (1,) 

558 >>> C1.shape 

559 (1, 5) 

560 >>> cond[2,:] = 0 

561 >>> [A1, B1, C1] = argsreduce(cond, A, B, C) 

562 >>> A1.shape 

563 (15,) 

564 >>> B1.shape 

565 (1,) 

566 >>> C1.shape 

567 (15,) 

568 

569 """ 

570 # some distributions assume arguments are iterable. 

571 newargs = np.atleast_1d(*args) 

572 

573 # np.atleast_1d returns an array if only one argument, or a list of arrays 

574 # if more than one argument. 

575 if not isinstance(newargs, list): 

576 newargs = [newargs, ] 

577 

578 if np.all(cond): 

579 # broadcast arrays with cond 

580 *newargs, cond = np.broadcast_arrays(*newargs, cond) 

581 return [arg.ravel() for arg in newargs] 

582 

583 s = cond.shape 

584 # np.extract returns flattened arrays, which are not broadcastable together 

585 # unless they are either the same size or size == 1. 

586 return [(arg if np.size(arg) == 1 

587 else np.extract(cond, np.broadcast_to(arg, s))) 

588 for arg in newargs] 

589 

590 

591parse_arg_template = """ 

592def _parse_args(self, %(shape_arg_str)s %(locscale_in)s): 

593 return (%(shape_arg_str)s), %(locscale_out)s 

594 

595def _parse_args_rvs(self, %(shape_arg_str)s %(locscale_in)s, size=None): 

596 return self._argcheck_rvs(%(shape_arg_str)s %(locscale_out)s, size=size) 

597 

598def _parse_args_stats(self, %(shape_arg_str)s %(locscale_in)s, moments='mv'): 

599 return (%(shape_arg_str)s), %(locscale_out)s, moments 

600""" 

601 

602 

603class rv_generic: 

604 """Class which encapsulates common functionality between rv_discrete 

605 and rv_continuous. 

606 

607 """ 

608 def __init__(self, seed=None): 

609 super().__init__() 

610 

611 # figure out if _stats signature has 'moments' keyword 

612 sig = _getfullargspec(self._stats) 

613 self._stats_has_moments = ((sig.varkw is not None) or 

614 ('moments' in sig.args) or 

615 ('moments' in sig.kwonlyargs)) 

616 self._random_state = check_random_state(seed) 

617 

618 @property 

619 def random_state(self): 

620 """Get or set the generator object for generating random variates. 

621 

622 If `random_state` is None (or `np.random`), the 

623 `numpy.random.RandomState` singleton is used. 

624 If `random_state` is an int, a new ``RandomState`` instance is used, 

625 seeded with `random_state`. 

626 If `random_state` is already a ``Generator`` or ``RandomState`` 

627 instance, that instance is used. 

628 

629 """ 

630 return self._random_state 

631 

632 @random_state.setter 

633 def random_state(self, seed): 

634 self._random_state = check_random_state(seed) 

635 

636 def __setstate__(self, state): 

637 try: 

638 self.__dict__.update(state) 

639 # attaches the dynamically created methods on each instance. 

640 # if a subclass overrides rv_generic.__setstate__, or implements 

641 # it's own _attach_methods, then it must make sure that 

642 # _attach_argparser_methods is called. 

643 self._attach_methods() 

644 except ValueError: 

645 # reconstitute an old pickle scipy<1.6, that contains 

646 # (_ctor_param, random_state) as state 

647 self._ctor_param = state[0] 

648 self._random_state = state[1] 

649 self.__init__() 

650 

651 def _attach_methods(self): 

652 """Attaches dynamically created methods to the rv_* instance. 

653 

654 This method must be overridden by subclasses, and must itself call 

655 _attach_argparser_methods. This method is called in __init__ in 

656 subclasses, and in __setstate__ 

657 """ 

658 raise NotImplementedError 

659 

660 def _attach_argparser_methods(self): 

661 """ 

662 Generates the argument-parsing functions dynamically and attaches 

663 them to the instance. 

664 

665 Should be called from `_attach_methods`, typically in __init__ and 

666 during unpickling (__setstate__) 

667 """ 

668 ns = {} 

669 exec(self._parse_arg_template, ns) 

670 # NB: attach to the instance, not class 

671 for name in ['_parse_args', '_parse_args_stats', '_parse_args_rvs']: 

672 setattr(self, name, types.MethodType(ns[name], self)) 

673 

674 def _construct_argparser( 

675 self, meths_to_inspect, locscale_in, locscale_out): 

676 """Construct the parser string for the shape arguments. 

677 

678 This method should be called in __init__ of a class for each 

679 distribution. It creates the `_parse_arg_template` attribute that is 

680 then used by `_attach_argparser_methods` to dynamically create and 

681 attach the `_parse_args`, `_parse_args_stats`, `_parse_args_rvs` 

682 methods to the instance. 

683 

684 If self.shapes is a non-empty string, interprets it as a 

685 comma-separated list of shape parameters. 

686 

687 Otherwise inspects the call signatures of `meths_to_inspect` 

688 and constructs the argument-parsing functions from these. 

689 In this case also sets `shapes` and `numargs`. 

690 """ 

691 

692 if self.shapes: 

693 # sanitize the user-supplied shapes 

694 if not isinstance(self.shapes, str): 

695 raise TypeError('shapes must be a string.') 

696 

697 shapes = self.shapes.replace(',', ' ').split() 

698 

699 for field in shapes: 

700 if keyword.iskeyword(field): 

701 raise SyntaxError('keywords cannot be used as shapes.') 

702 if not re.match('^[_a-zA-Z][_a-zA-Z0-9]*$', field): 

703 raise SyntaxError( 

704 'shapes must be valid python identifiers') 

705 else: 

706 # find out the call signatures (_pdf, _cdf etc), deduce shape 

707 # arguments. Generic methods only have 'self, x', any further args 

708 # are shapes. 

709 shapes_list = [] 

710 for meth in meths_to_inspect: 

711 shapes_args = _getfullargspec(meth) # NB does not contain self 

712 args = shapes_args.args[1:] # peel off 'x', too 

713 

714 if args: 

715 shapes_list.append(args) 

716 

717 # *args or **kwargs are not allowed w/automatic shapes 

718 if shapes_args.varargs is not None: 

719 raise TypeError( 

720 '*args are not allowed w/out explicit shapes') 

721 if shapes_args.varkw is not None: 

722 raise TypeError( 

723 '**kwds are not allowed w/out explicit shapes') 

724 if shapes_args.kwonlyargs: 

725 raise TypeError( 

726 'kwonly args are not allowed w/out explicit shapes') 

727 if shapes_args.defaults is not None: 

728 raise TypeError('defaults are not allowed for shapes') 

729 

730 if shapes_list: 

731 shapes = shapes_list[0] 

732 

733 # make sure the signatures are consistent 

734 for item in shapes_list: 

735 if item != shapes: 

736 raise TypeError('Shape arguments are inconsistent.') 

737 else: 

738 shapes = [] 

739 

740 # have the arguments, construct the method from template 

741 shapes_str = ', '.join(shapes) + ', ' if shapes else '' # NB: not None 

742 dct = dict(shape_arg_str=shapes_str, 

743 locscale_in=locscale_in, 

744 locscale_out=locscale_out, 

745 ) 

746 

747 # this string is used by _attach_argparser_methods 

748 self._parse_arg_template = parse_arg_template % dct 

749 

750 self.shapes = ', '.join(shapes) if shapes else None 

751 if not hasattr(self, 'numargs'): 

752 # allows more general subclassing with *args 

753 self.numargs = len(shapes) 

754 

755 def _construct_doc(self, docdict, shapes_vals=None): 

756 """Construct the instance docstring with string substitutions.""" 

757 tempdict = docdict.copy() 

758 tempdict['name'] = self.name or 'distname' 

759 tempdict['shapes'] = self.shapes or '' 

760 

761 if shapes_vals is None: 

762 shapes_vals = () 

763 vals = ', '.join('%.3g' % val for val in shapes_vals) 

764 tempdict['vals'] = vals 

765 

766 tempdict['shapes_'] = self.shapes or '' 

767 if self.shapes and self.numargs == 1: 

768 tempdict['shapes_'] += ',' 

769 

770 if self.shapes: 

771 tempdict['set_vals_stmt'] = '>>> %s = %s' % (self.shapes, vals) 

772 else: 

773 tempdict['set_vals_stmt'] = '' 

774 

775 if self.shapes is None: 

776 # remove shapes from call parameters if there are none 

777 for item in ['default', 'before_notes']: 

778 tempdict[item] = tempdict[item].replace( 

779 "\n%(shapes)s : array_like\n shape parameters", "") 

780 for i in range(2): 

781 if self.shapes is None: 

782 # necessary because we use %(shapes)s in two forms (w w/o ", ") 

783 self.__doc__ = self.__doc__.replace("%(shapes)s, ", "") 

784 try: 

785 self.__doc__ = doccer.docformat(self.__doc__, tempdict) 

786 except TypeError as e: 

787 raise Exception("Unable to construct docstring for " 

788 "distribution \"%s\": %s" % 

789 (self.name, repr(e))) from e 

790 

791 # correct for empty shapes 

792 self.__doc__ = self.__doc__.replace('(, ', '(').replace(', )', ')') 

793 

794 def _construct_default_doc(self, longname=None, extradoc=None, 

795 docdict=None, discrete='continuous'): 

796 """Construct instance docstring from the default template.""" 

797 if longname is None: 

798 longname = 'A' 

799 if extradoc is None: 

800 extradoc = '' 

801 if extradoc.startswith('\n\n'): 

802 extradoc = extradoc[2:] 

803 self.__doc__ = ''.join(['%s %s random variable.' % (longname, discrete), 

804 '\n\n%(before_notes)s\n', docheaders['notes'], 

805 extradoc, '\n%(example)s']) 

806 self._construct_doc(docdict) 

807 

808 def freeze(self, *args, **kwds): 

809 """Freeze the distribution for the given arguments. 

810 

811 Parameters 

812 ---------- 

813 arg1, arg2, arg3,... : array_like 

814 The shape parameter(s) for the distribution. Should include all 

815 the non-optional arguments, may include ``loc`` and ``scale``. 

816 

817 Returns 

818 ------- 

819 rv_frozen : rv_frozen instance 

820 The frozen distribution. 

821 

822 """ 

823 if isinstance(self, rv_continuous): 

824 return rv_continuous_frozen(self, *args, **kwds) 

825 else: 

826 return rv_discrete_frozen(self, *args, **kwds) 

827 

828 def __call__(self, *args, **kwds): 

829 return self.freeze(*args, **kwds) 

830 __call__.__doc__ = freeze.__doc__ 

831 

832 # The actual calculation functions (no basic checking need be done) 

833 # If these are defined, the others won't be looked at. 

834 # Otherwise, the other set can be defined. 

835 def _stats(self, *args, **kwds): 

836 return None, None, None, None 

837 

838 # Noncentral moments (also known as the moment about the origin). 

839 # Expressed in LaTeX, munp would be $\mu'_{n}$, i.e. "mu-sub-n-prime". 

840 # The primed mu is a widely used notation for the noncentral moment. 

841 def _munp(self, n, *args): 

842 # Silence floating point warnings from integration. 

843 with np.errstate(all='ignore'): 

844 vals = self.generic_moment(n, *args) 

845 return vals 

846 

847 def _argcheck_rvs(self, *args, **kwargs): 

848 # Handle broadcasting and size validation of the rvs method. 

849 # Subclasses should not have to override this method. 

850 # The rule is that if `size` is not None, then `size` gives the 

851 # shape of the result (integer values of `size` are treated as 

852 # tuples with length 1; i.e. `size=3` is the same as `size=(3,)`.) 

853 # 

854 # `args` is expected to contain the shape parameters (if any), the 

855 # location and the scale in a flat tuple (e.g. if there are two 

856 # shape parameters `a` and `b`, `args` will be `(a, b, loc, scale)`). 

857 # The only keyword argument expected is 'size'. 

858 size = kwargs.get('size', None) 

859 all_bcast = np.broadcast_arrays(*args) 

860 

861 def squeeze_left(a): 

862 while a.ndim > 0 and a.shape[0] == 1: 

863 a = a[0] 

864 return a 

865 

866 # Eliminate trivial leading dimensions. In the convention 

867 # used by numpy's random variate generators, trivial leading 

868 # dimensions are effectively ignored. In other words, when `size` 

869 # is given, trivial leading dimensions of the broadcast parameters 

870 # in excess of the number of dimensions in size are ignored, e.g. 

871 # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]], size=3) 

872 # array([ 1.00104267, 3.00422496, 4.99799278]) 

873 # If `size` is not given, the exact broadcast shape is preserved: 

874 # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]]) 

875 # array([[[[ 1.00862899, 3.00061431, 4.99867122]]]]) 

876 # 

877 all_bcast = [squeeze_left(a) for a in all_bcast] 

878 bcast_shape = all_bcast[0].shape 

879 bcast_ndim = all_bcast[0].ndim 

880 

881 if size is None: 

882 size_ = bcast_shape 

883 else: 

884 size_ = tuple(np.atleast_1d(size)) 

885 

886 # Check compatibility of size_ with the broadcast shape of all 

887 # the parameters. This check is intended to be consistent with 

888 # how the numpy random variate generators (e.g. np.random.normal, 

889 # np.random.beta) handle their arguments. The rule is that, if size 

890 # is given, it determines the shape of the output. Broadcasting 

891 # can't change the output size. 

892 

893 # This is the standard broadcasting convention of extending the 

894 # shape with fewer dimensions with enough dimensions of length 1 

895 # so that the two shapes have the same number of dimensions. 

896 ndiff = bcast_ndim - len(size_) 

897 if ndiff < 0: 

898 bcast_shape = (1,)*(-ndiff) + bcast_shape 

899 elif ndiff > 0: 

900 size_ = (1,)*ndiff + size_ 

901 

902 # This compatibility test is not standard. In "regular" broadcasting, 

903 # two shapes are compatible if for each dimension, the lengths are the 

904 # same or one of the lengths is 1. Here, the length of a dimension in 

905 # size_ must not be less than the corresponding length in bcast_shape. 

906 ok = all([bcdim == 1 or bcdim == szdim 

907 for (bcdim, szdim) in zip(bcast_shape, size_)]) 

908 if not ok: 

909 raise ValueError("size does not match the broadcast shape of " 

910 "the parameters. %s, %s, %s" % (size, size_, 

911 bcast_shape)) 

912 

913 param_bcast = all_bcast[:-2] 

914 loc_bcast = all_bcast[-2] 

915 scale_bcast = all_bcast[-1] 

916 

917 return param_bcast, loc_bcast, scale_bcast, size_ 

918 

919 # These are the methods you must define (standard form functions) 

920 # NB: generic _pdf, _logpdf, _cdf are different for 

921 # rv_continuous and rv_discrete hence are defined in there 

922 def _argcheck(self, *args): 

923 """Default check for correct values on args and keywords. 

924 

925 Returns condition array of 1's where arguments are correct and 

926 0's where they are not. 

927 

928 """ 

929 cond = 1 

930 for arg in args: 

931 cond = logical_and(cond, (asarray(arg) > 0)) 

932 return cond 

933 

934 def _get_support(self, *args, **kwargs): 

935 """Return the support of the (unscaled, unshifted) distribution. 

936 

937 *Must* be overridden by distributions which have support dependent 

938 upon the shape parameters of the distribution. Any such override 

939 *must not* set or change any of the class members, as these members 

940 are shared amongst all instances of the distribution. 

941 

942 Parameters 

943 ---------- 

944 arg1, arg2, ... : array_like 

945 The shape parameter(s) for the distribution (see docstring of the 

946 instance object for more information). 

947 

948 Returns 

949 ------- 

950 a, b : numeric (float, or int or +/-np.inf) 

951 end-points of the distribution's support for the specified 

952 shape parameters. 

953 """ 

954 return self.a, self.b 

955 

956 def _support_mask(self, x, *args): 

957 a, b = self._get_support(*args) 

958 with np.errstate(invalid='ignore'): 

959 return (a <= x) & (x <= b) 

960 

961 def _open_support_mask(self, x, *args): 

962 a, b = self._get_support(*args) 

963 with np.errstate(invalid='ignore'): 

964 return (a < x) & (x < b) 

965 

966 def _rvs(self, *args, size=None, random_state=None): 

967 # This method must handle size being a tuple, and it must 

968 # properly broadcast *args and size. size might be 

969 # an empty tuple, which means a scalar random variate is to be 

970 # generated. 

971 

972 # Use basic inverse cdf algorithm for RV generation as default. 

973 U = random_state.uniform(size=size) 

974 Y = self._ppf(U, *args) 

975 return Y 

976 

977 def _logcdf(self, x, *args): 

978 with np.errstate(divide='ignore'): 

979 return log(self._cdf(x, *args)) 

980 

981 def _sf(self, x, *args): 

982 return 1.0-self._cdf(x, *args) 

983 

984 def _logsf(self, x, *args): 

985 with np.errstate(divide='ignore'): 

986 return log(self._sf(x, *args)) 

987 

988 def _ppf(self, q, *args): 

989 return self._ppfvec(q, *args) 

990 

991 def _isf(self, q, *args): 

992 return self._ppf(1.0-q, *args) # use correct _ppf for subclasses 

993 

994 # These are actually called, and should not be overwritten if you 

995 # want to keep error checking. 

996 def rvs(self, *args, **kwds): 

997 """Random variates of given type. 

998 

999 Parameters 

1000 ---------- 

1001 arg1, arg2, arg3,... : array_like 

1002 The shape parameter(s) for the distribution (see docstring of the 

1003 instance object for more information). 

1004 loc : array_like, optional 

1005 Location parameter (default=0). 

1006 scale : array_like, optional 

1007 Scale parameter (default=1). 

1008 size : int or tuple of ints, optional 

1009 Defining number of random variates (default is 1). 

1010 random_state : {None, int, `numpy.random.Generator`, 

1011 `numpy.random.RandomState`}, optional 

1012 

1013 If `random_state` is None (or `np.random`), the 

1014 `numpy.random.RandomState` singleton is used. 

1015 If `random_state` is an int, a new ``RandomState`` instance is 

1016 used, seeded with `random_state`. 

1017 If `random_state` is already a ``Generator`` or ``RandomState`` 

1018 instance, that instance is used. 

1019 

1020 Returns 

1021 ------- 

1022 rvs : ndarray or scalar 

1023 Random variates of given `size`. 

1024 

1025 """ 

1026 discrete = kwds.pop('discrete', None) 

1027 rndm = kwds.pop('random_state', None) 

1028 args, loc, scale, size = self._parse_args_rvs(*args, **kwds) 

1029 cond = logical_and(self._argcheck(*args), (scale >= 0)) 

1030 if not np.all(cond): 

1031 message = ("Domain error in arguments. The `scale` parameter must " 

1032 "be positive for all distributions, and many " 

1033 "distributions have restrictions on shape parameters. " 

1034 f"Please see the `scipy.stats.{self.name}` " 

1035 "documentation for details.") 

1036 raise ValueError(message) 

1037 

1038 if np.all(scale == 0): 

1039 return loc*ones(size, 'd') 

1040 

1041 # extra gymnastics needed for a custom random_state 

1042 if rndm is not None: 

1043 random_state_saved = self._random_state 

1044 random_state = check_random_state(rndm) 

1045 else: 

1046 random_state = self._random_state 

1047 

1048 vals = self._rvs(*args, size=size, random_state=random_state) 

1049 

1050 vals = vals * scale + loc 

1051 

1052 # do not forget to restore the _random_state 

1053 if rndm is not None: 

1054 self._random_state = random_state_saved 

1055 

1056 # Cast to int if discrete 

1057 if discrete and not isinstance(self, rv_sample): 

1058 if size == (): 

1059 vals = int(vals) 

1060 else: 

1061 vals = vals.astype(np.int64) 

1062 

1063 return vals 

1064 

1065 def stats(self, *args, **kwds): 

1066 """Some statistics of the given RV. 

1067 

1068 Parameters 

1069 ---------- 

1070 arg1, arg2, arg3,... : array_like 

1071 The shape parameter(s) for the distribution (see docstring of the 

1072 instance object for more information) 

1073 loc : array_like, optional 

1074 location parameter (default=0) 

1075 scale : array_like, optional (continuous RVs only) 

1076 scale parameter (default=1) 

1077 moments : str, optional 

1078 composed of letters ['mvsk'] defining which moments to compute: 

1079 'm' = mean, 

1080 'v' = variance, 

1081 's' = (Fisher's) skew, 

1082 'k' = (Fisher's) kurtosis. 

1083 (default is 'mv') 

1084 

1085 Returns 

1086 ------- 

1087 stats : sequence 

1088 of requested moments. 

1089 

1090 """ 

1091 args, loc, scale, moments = self._parse_args_stats(*args, **kwds) 

1092 # scale = 1 by construction for discrete RVs 

1093 loc, scale = map(asarray, (loc, scale)) 

1094 args = tuple(map(asarray, args)) 

1095 cond = self._argcheck(*args) & (scale > 0) & (loc == loc) 

1096 output = [] 

1097 default = np.full(shape(cond), fill_value=self.badvalue) 

1098 

1099 # Use only entries that are valid in calculation 

1100 if np.any(cond): 

1101 goodargs = argsreduce(cond, *(args+(scale, loc))) 

1102 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2] 

1103 

1104 if self._stats_has_moments: 

1105 mu, mu2, g1, g2 = self._stats(*goodargs, 

1106 **{'moments': moments}) 

1107 else: 

1108 mu, mu2, g1, g2 = self._stats(*goodargs) 

1109 

1110 if 'm' in moments: 

1111 if mu is None: 

1112 mu = self._munp(1, *goodargs) 

1113 out0 = default.copy() 

1114 place(out0, cond, mu * scale + loc) 

1115 output.append(out0) 

1116 

1117 if 'v' in moments: 

1118 if mu2 is None: 

1119 mu2p = self._munp(2, *goodargs) 

1120 if mu is None: 

1121 mu = self._munp(1, *goodargs) 

1122 # if mean is inf then var is also inf 

1123 with np.errstate(invalid='ignore'): 

1124 mu2 = np.where(~np.isinf(mu), mu2p - mu**2, np.inf) 

1125 out0 = default.copy() 

1126 place(out0, cond, mu2 * scale * scale) 

1127 output.append(out0) 

1128 

1129 if 's' in moments: 

1130 if g1 is None: 

1131 mu3p = self._munp(3, *goodargs) 

1132 if mu is None: 

1133 mu = self._munp(1, *goodargs) 

1134 if mu2 is None: 

1135 mu2p = self._munp(2, *goodargs) 

1136 mu2 = mu2p - mu * mu 

1137 with np.errstate(invalid='ignore'): 

1138 mu3 = (-mu*mu - 3*mu2)*mu + mu3p 

1139 g1 = mu3 / np.power(mu2, 1.5) 

1140 out0 = default.copy() 

1141 place(out0, cond, g1) 

1142 output.append(out0) 

1143 

1144 if 'k' in moments: 

1145 if g2 is None: 

1146 mu4p = self._munp(4, *goodargs) 

1147 if mu is None: 

1148 mu = self._munp(1, *goodargs) 

1149 if mu2 is None: 

1150 mu2p = self._munp(2, *goodargs) 

1151 mu2 = mu2p - mu * mu 

1152 if g1 is None: 

1153 mu3 = None 

1154 else: 

1155 # (mu2**1.5) breaks down for nan and inf 

1156 mu3 = g1 * np.power(mu2, 1.5) 

1157 if mu3 is None: 

1158 mu3p = self._munp(3, *goodargs) 

1159 with np.errstate(invalid='ignore'): 

1160 mu3 = (-mu * mu - 3 * mu2) * mu + mu3p 

1161 with np.errstate(invalid='ignore'): 

1162 mu4 = ((-mu**2 - 6*mu2) * mu - 4*mu3)*mu + mu4p 

1163 g2 = mu4 / mu2**2.0 - 3.0 

1164 out0 = default.copy() 

1165 place(out0, cond, g2) 

1166 output.append(out0) 

1167 else: # no valid args 

1168 output = [default.copy() for _ in moments] 

1169 

1170 output = [out[()] for out in output] 

1171 if len(output) == 1: 

1172 return output[0] 

1173 else: 

1174 return tuple(output) 

1175 

1176 def entropy(self, *args, **kwds): 

1177 """Differential entropy of the RV. 

1178 

1179 Parameters 

1180 ---------- 

1181 arg1, arg2, arg3,... : array_like 

1182 The shape parameter(s) for the distribution (see docstring of the 

1183 instance object for more information). 

1184 loc : array_like, optional 

1185 Location parameter (default=0). 

1186 scale : array_like, optional (continuous distributions only). 

1187 Scale parameter (default=1). 

1188 

1189 Notes 

1190 ----- 

1191 Entropy is defined base `e`: 

1192 

1193 >>> import numpy as np 

1194 >>> drv = rv_discrete(values=((0, 1), (0.5, 0.5))) 

1195 >>> np.allclose(drv.entropy(), np.log(2.0)) 

1196 True 

1197 

1198 """ 

1199 args, loc, scale = self._parse_args(*args, **kwds) 

1200 # NB: for discrete distributions scale=1 by construction in _parse_args 

1201 loc, scale = map(asarray, (loc, scale)) 

1202 args = tuple(map(asarray, args)) 

1203 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc) 

1204 output = zeros(shape(cond0), 'd') 

1205 place(output, (1-cond0), self.badvalue) 

1206 goodargs = argsreduce(cond0, scale, *args) 

1207 goodscale = goodargs[0] 

1208 goodargs = goodargs[1:] 

1209 place(output, cond0, self.vecentropy(*goodargs) + log(goodscale)) 

1210 return output[()] 

1211 

1212 def moment(self, order=None, *args, **kwds): 

1213 """non-central moment of distribution of specified order. 

1214 

1215 .. deprecated:: 1.9.0 

1216 Parameter `n` is replaced by parameter `order` to avoid name 

1217 collisions with the shape parameter `n` of several distributions. 

1218 Parameter `n` will be removed in SciPy 1.11.0. 

1219 

1220 Parameters 

1221 ---------- 

1222 order : int, order >= 1 

1223 Order of moment. 

1224 arg1, arg2, arg3,... : float 

1225 The shape parameter(s) for the distribution (see docstring of the 

1226 instance object for more information). 

1227 loc : array_like, optional 

1228 location parameter (default=0) 

1229 scale : array_like, optional 

1230 scale parameter (default=1) 

1231 

1232 """ 

1233 # This function was originally written with parameter `n`, but `n` 

1234 # is also the name of many distribution shape parameters. 

1235 # This block allows the function to accept both `n` and its 

1236 # replacement `order` during a deprecation period; it can be removed 

1237 # in the second release after 1.9.0. 

1238 # The logic to provide a DeprecationWarning only when `n` is passed 

1239 # as a keyword, accept the new keyword `order`, and otherwise be 

1240 # backward-compatible deserves explanation. We need to look out for 

1241 # the following: 

1242 # * Does the distribution have a shape named `n`? 

1243 # * Is `order` provided? It doesn't matter whether it is provided as a 

1244 # positional or keyword argument; it will be used as the order of the 

1245 # moment rather than a distribution shape parameter because: 

1246 # - The first positional argument of `moment` has always been the 

1247 # order of the moment. 

1248 # - The keyword `order` is new, so it's unambiguous that it refers to 

1249 # the order of the moment. 

1250 # * Is `n` provided as a keyword argument? It _does_ matter whether it 

1251 # is provided as a positional or keyword argument. 

1252 # - The first positional argument of `moment` has always been the 

1253 # order of moment, but 

1254 # - if `n` is provided as a keyword argument, its meaning depends 

1255 # on whether the distribution accepts `n` as a shape parameter. 

1256 has_shape_n = (self.shapes is not None 

1257 and "n" in (self.shapes.split(", "))) 

1258 got_order = order is not None 

1259 got_keyword_n = kwds.get("n", None) is not None 

1260 

1261 # These lead to the following cases. 

1262 # Case A: If the distribution _does_ accept `n` as a shape 

1263 # 1. If both `order` and `n` are provided, this is now OK: 

1264 # it is unambiguous that `order` is the order of the moment and `n` 

1265 # is the shape parameter. Previously, this would have caused an 

1266 # error because `n` was provided both as a keyword argument and 

1267 # as the first positional argument. I don't think it is credible for 

1268 # users to rely on this error in their code, though, so I don't see 

1269 # this as a backward compatibility break. 

1270 # 2. If only `n` is provided (as a keyword argument), this would have 

1271 # been an error in the past because `n` would have been treated as 

1272 # the order of the moment while the shape parameter would be 

1273 # missing. It is still the same type of error, but for a different 

1274 # reason: now, `n` is treated as the shape parameter while the 

1275 # order of the moment is missing. 

1276 # 3. If only `order` is provided, no special treament is needed. 

1277 # Clearly this value is intended to be the order of the moment, 

1278 # and the rest of the function will determine whether `n` is 

1279 # available as a shape parameter in `args`. 

1280 # 4. If neither `n` nor `order` is provided, this would have been an 

1281 # error (order of the moment is not provided) and it is still an 

1282 # error for the same reason. 

1283 

1284 # Case B: the distribution does _not_ accept `n` as a shape 

1285 # 1. If both `order` and `n` are provided, this was an error, and it 

1286 # still is an error: two values for same parameter. 

1287 # 2. If only `n` is provided (as a keyword argument), this was OK and 

1288 # is still OK, but there shold now be a `DeprecationWarning`. The 

1289 # value of `n` should be removed from `kwds` and stored in `order`. 

1290 # 3. If only `order` is provided, there was no problem before providing 

1291 # only the first argument of `moment`, and there is no problem with 

1292 # that now. 

1293 # 4. If neither `n` nor `order` is provided, this would have been an 

1294 # error (order of the moment is not provided), and it is still an 

1295 # error for the same reason. 

1296 if not got_order and ((not got_keyword_n) # A4 and B4 

1297 or (got_keyword_n and has_shape_n)): # A2 

1298 message = ("moment() missing 1 required " 

1299 "positional argument: `order`") 

1300 raise TypeError(message) 

1301 

1302 if got_keyword_n and not has_shape_n: 

1303 if got_order: # B1 

1304 # this will change to "moment got unexpected argument n" 

1305 message = "moment() got multiple values for first argument" 

1306 raise TypeError(message) 

1307 else: # B2 

1308 message = ("Use of keyword argument 'n' for method 'moment is" 

1309 " deprecated and will be removed in SciPy 1.11.0. " 

1310 "Use first positional argument or keyword argument" 

1311 " 'order' instead.") 

1312 order = kwds.pop("n") 

1313 warnings.warn(message, DeprecationWarning, stacklevel=2) 

1314 n = order 

1315 # No special treatment of A1, A3, or B3 is needed because the order 

1316 # of the moment is now in variable `n` and the shape parameter, if 

1317 # needed, will be fished out of `args` or `kwds` by _parse_args 

1318 # A3 might still cause an error if the shape parameter called `n` 

1319 # is not found in `args`. 

1320 

1321 shapes, loc, scale = self._parse_args(*args, **kwds) 

1322 args = np.broadcast_arrays(*(*shapes, loc, scale)) 

1323 *shapes, loc, scale = args 

1324 

1325 i0 = np.logical_and(self._argcheck(*shapes), scale > 0) 

1326 i1 = np.logical_and(i0, loc == 0) 

1327 i2 = np.logical_and(i0, loc != 0) 

1328 

1329 args = argsreduce(i0, *shapes, loc, scale) 

1330 *shapes, loc, scale = args 

1331 

1332 if (floor(n) != n): 

1333 raise ValueError("Moment must be an integer.") 

1334 if (n < 0): 

1335 raise ValueError("Moment must be positive.") 

1336 mu, mu2, g1, g2 = None, None, None, None 

1337 if (n > 0) and (n < 5): 

1338 if self._stats_has_moments: 

1339 mdict = {'moments': {1: 'm', 2: 'v', 3: 'vs', 4: 'mvsk'}[n]} 

1340 else: 

1341 mdict = {} 

1342 mu, mu2, g1, g2 = self._stats(*shapes, **mdict) 

1343 val = np.empty(loc.shape) # val needs to be indexed by loc 

1344 val[...] = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, shapes) 

1345 

1346 # Convert to transformed X = L + S*Y 

1347 # E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n, k)*(S/L)^k E[Y^k], k=0...n) 

1348 result = zeros(i0.shape) 

1349 place(result, ~i0, self.badvalue) 

1350 

1351 if i1.any(): 

1352 res1 = scale[loc == 0]**n * val[loc == 0] 

1353 place(result, i1, res1) 

1354 

1355 if i2.any(): 

1356 mom = [mu, mu2, g1, g2] 

1357 arrs = [i for i in mom if i is not None] 

1358 idx = [i for i in range(4) if mom[i] is not None] 

1359 if any(idx): 

1360 arrs = argsreduce(loc != 0, *arrs) 

1361 j = 0 

1362 for i in idx: 

1363 mom[i] = arrs[j] 

1364 j += 1 

1365 mu, mu2, g1, g2 = mom 

1366 args = argsreduce(loc != 0, *shapes, loc, scale, val) 

1367 *shapes, loc, scale, val = args 

1368 

1369 res2 = zeros(loc.shape, dtype='d') 

1370 fac = scale / loc 

1371 for k in range(n): 

1372 valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp, 

1373 shapes) 

1374 res2 += comb(n, k, exact=True)*fac**k * valk 

1375 res2 += fac**n * val 

1376 res2 *= loc**n 

1377 place(result, i2, res2) 

1378 

1379 return result[()] 

1380 

1381 def median(self, *args, **kwds): 

1382 """Median of the distribution. 

1383 

1384 Parameters 

1385 ---------- 

1386 arg1, arg2, arg3,... : array_like 

1387 The shape parameter(s) for the distribution (see docstring of the 

1388 instance object for more information) 

1389 loc : array_like, optional 

1390 Location parameter, Default is 0. 

1391 scale : array_like, optional 

1392 Scale parameter, Default is 1. 

1393 

1394 Returns 

1395 ------- 

1396 median : float 

1397 The median of the distribution. 

1398 

1399 See Also 

1400 -------- 

1401 rv_discrete.ppf 

1402 Inverse of the CDF 

1403 

1404 """ 

1405 return self.ppf(0.5, *args, **kwds) 

1406 

1407 def mean(self, *args, **kwds): 

1408 """Mean of the distribution. 

1409 

1410 Parameters 

1411 ---------- 

1412 arg1, arg2, arg3,... : array_like 

1413 The shape parameter(s) for the distribution (see docstring of the 

1414 instance object for more information) 

1415 loc : array_like, optional 

1416 location parameter (default=0) 

1417 scale : array_like, optional 

1418 scale parameter (default=1) 

1419 

1420 Returns 

1421 ------- 

1422 mean : float 

1423 the mean of the distribution 

1424 

1425 """ 

1426 kwds['moments'] = 'm' 

1427 res = self.stats(*args, **kwds) 

1428 if isinstance(res, ndarray) and res.ndim == 0: 

1429 return res[()] 

1430 return res 

1431 

1432 def var(self, *args, **kwds): 

1433 """Variance of the distribution. 

1434 

1435 Parameters 

1436 ---------- 

1437 arg1, arg2, arg3,... : array_like 

1438 The shape parameter(s) for the distribution (see docstring of the 

1439 instance object for more information) 

1440 loc : array_like, optional 

1441 location parameter (default=0) 

1442 scale : array_like, optional 

1443 scale parameter (default=1) 

1444 

1445 Returns 

1446 ------- 

1447 var : float 

1448 the variance of the distribution 

1449 

1450 """ 

1451 kwds['moments'] = 'v' 

1452 res = self.stats(*args, **kwds) 

1453 if isinstance(res, ndarray) and res.ndim == 0: 

1454 return res[()] 

1455 return res 

1456 

1457 def std(self, *args, **kwds): 

1458 """Standard deviation of the distribution. 

1459 

1460 Parameters 

1461 ---------- 

1462 arg1, arg2, arg3,... : array_like 

1463 The shape parameter(s) for the distribution (see docstring of the 

1464 instance object for more information) 

1465 loc : array_like, optional 

1466 location parameter (default=0) 

1467 scale : array_like, optional 

1468 scale parameter (default=1) 

1469 

1470 Returns 

1471 ------- 

1472 std : float 

1473 standard deviation of the distribution 

1474 

1475 """ 

1476 kwds['moments'] = 'v' 

1477 res = sqrt(self.stats(*args, **kwds)) 

1478 return res 

1479 

1480 def interval(self, confidence=None, *args, **kwds): 

1481 """Confidence interval with equal areas around the median. 

1482 

1483 .. deprecated:: 1.9.0 

1484 Parameter `alpha` is replaced by parameter `confidence` to avoid 

1485 name collisions with the shape parameter `alpha` of some 

1486 distributions. Parameter `alpha` will be removed in SciPy 1.11.0. 

1487 

1488 Parameters 

1489 ---------- 

1490 confidence : array_like of float 

1491 Probability that an rv will be drawn from the returned range. 

1492 Each value should be in the range [0, 1]. 

1493 arg1, arg2, ... : array_like 

1494 The shape parameter(s) for the distribution (see docstring of the 

1495 instance object for more information). 

1496 loc : array_like, optional 

1497 location parameter, Default is 0. 

1498 scale : array_like, optional 

1499 scale parameter, Default is 1. 

1500 

1501 Returns 

1502 ------- 

1503 a, b : ndarray of float 

1504 end-points of range that contain ``100 * alpha %`` of the rv's 

1505 possible values. 

1506 

1507 Notes 

1508 ----- 

1509 This is implemented as ``ppf([p_tail, 1-p_tail])``, where 

1510 ``ppf`` is the inverse cumulative distribution function and 

1511 ``p_tail = (1-confidence)/2``. Suppose ``[c, d]`` is the support of a 

1512 discrete distribution; then ``ppf([0, 1]) == (c-1, d)``. Therefore, 

1513 when ``confidence=1`` and the distribution is discrete, the left end 

1514 of the interval will be beyond the support of the distribution. 

1515 For discrete distributions, the interval will limit the probability 

1516 in each tail to be less than or equal to ``p_tail`` (usually 

1517 strictly less). 

1518 

1519 """ 

1520 # This function was originally written with parameter `alpha`, but 

1521 # `alpha` is also the name of a shape parameter of two distributions. 

1522 # This block allows the function to accept both `alpha` and its 

1523 # replacement `confidence` during a deprecation period; it can be 

1524 # removed in the second release after 1.9.0. 

1525 # See description of logic in `moment` method. 

1526 has_shape_alpha = (self.shapes is not None 

1527 and "alpha" in (self.shapes.split(", "))) 

1528 got_confidence = confidence is not None 

1529 got_keyword_alpha = kwds.get("alpha", None) is not None 

1530 

1531 if not got_confidence and ((not got_keyword_alpha) 

1532 or (got_keyword_alpha and has_shape_alpha)): 

1533 message = ("interval() missing 1 required positional argument: " 

1534 "`confidence`") 

1535 raise TypeError(message) 

1536 

1537 if got_keyword_alpha and not has_shape_alpha: 

1538 if got_confidence: 

1539 # this will change to "interval got unexpected argument alpha" 

1540 message = "interval() got multiple values for first argument" 

1541 raise TypeError(message) 

1542 else: 

1543 message = ("Use of keyword argument 'alpha' for method " 

1544 "'interval' is deprecated and wil be removed in " 

1545 "SciPy 1.11.0. Use first positional argument or " 

1546 "keyword argument 'confidence' instead.") 

1547 confidence = kwds.pop("alpha") 

1548 warnings.warn(message, DeprecationWarning, stacklevel=2) 

1549 alpha = confidence 

1550 

1551 alpha = asarray(alpha) 

1552 if np.any((alpha > 1) | (alpha < 0)): 

1553 raise ValueError("alpha must be between 0 and 1 inclusive") 

1554 q1 = (1.0-alpha)/2 

1555 q2 = (1.0+alpha)/2 

1556 a = self.ppf(q1, *args, **kwds) 

1557 b = self.ppf(q2, *args, **kwds) 

1558 return a, b 

1559 

1560 def support(self, *args, **kwargs): 

1561 """Support of the distribution. 

1562 

1563 Parameters 

1564 ---------- 

1565 arg1, arg2, ... : array_like 

1566 The shape parameter(s) for the distribution (see docstring of the 

1567 instance object for more information). 

1568 loc : array_like, optional 

1569 location parameter, Default is 0. 

1570 scale : array_like, optional 

1571 scale parameter, Default is 1. 

1572 

1573 Returns 

1574 ------- 

1575 a, b : array_like 

1576 end-points of the distribution's support. 

1577 

1578 """ 

1579 args, loc, scale = self._parse_args(*args, **kwargs) 

1580 arrs = np.broadcast_arrays(*args, loc, scale) 

1581 args, loc, scale = arrs[:-2], arrs[-2], arrs[-1] 

1582 cond = self._argcheck(*args) & (scale > 0) 

1583 _a, _b = self._get_support(*args) 

1584 if cond.all(): 

1585 return _a * scale + loc, _b * scale + loc 

1586 elif cond.ndim == 0: 

1587 return self.badvalue, self.badvalue 

1588 # promote bounds to at least float to fill in the badvalue 

1589 _a, _b = np.asarray(_a).astype('d'), np.asarray(_b).astype('d') 

1590 out_a, out_b = _a * scale + loc, _b * scale + loc 

1591 place(out_a, 1-cond, self.badvalue) 

1592 place(out_b, 1-cond, self.badvalue) 

1593 return out_a, out_b 

1594 

1595 def nnlf(self, theta, x): 

1596 """Negative loglikelihood function. 

1597 Notes 

1598 ----- 

1599 This is ``-sum(log pdf(x, theta), axis=0)`` where `theta` are the 

1600 parameters (including loc and scale). 

1601 """ 

1602 loc, scale, args = self._unpack_loc_scale(theta) 

1603 if not self._argcheck(*args) or scale <= 0: 

1604 return inf 

1605 x = (asarray(x)-loc) / scale 

1606 n_log_scale = len(x) * log(scale) 

1607 if np.any(~self._support_mask(x, *args)): 

1608 return inf 

1609 return self._nnlf(x, *args) + n_log_scale 

1610 

1611 def _nnlf(self, x, *args): 

1612 return -np.sum(self._logpxf(x, *args), axis=0) 

1613 

1614 def _nlff_and_penalty(self, x, args, log_fitfun): 

1615 # negative log fit function 

1616 cond0 = ~self._support_mask(x, *args) 

1617 n_bad = np.count_nonzero(cond0, axis=0) 

1618 if n_bad > 0: 

1619 x = argsreduce(~cond0, x)[0] 

1620 logff = log_fitfun(x, *args) 

1621 finite_logff = np.isfinite(logff) 

1622 n_bad += np.sum(~finite_logff, axis=0) 

1623 if n_bad > 0: 

1624 penalty = n_bad * log(_XMAX) * 100 

1625 return -np.sum(logff[finite_logff], axis=0) + penalty 

1626 return -np.sum(logff, axis=0) 

1627 

1628 def _penalized_nnlf(self, theta, x): 

1629 """Penalized negative loglikelihood function. 

1630 i.e., - sum (log pdf(x, theta), axis=0) + penalty 

1631 where theta are the parameters (including loc and scale) 

1632 """ 

1633 loc, scale, args = self._unpack_loc_scale(theta) 

1634 if not self._argcheck(*args) or scale <= 0: 

1635 return inf 

1636 x = asarray((x-loc) / scale) 

1637 n_log_scale = len(x) * log(scale) 

1638 return self._nlff_and_penalty(x, args, self._logpxf) + n_log_scale 

1639 

1640 def _penalized_nlpsf(self, theta, x): 

1641 """Penalized negative log product spacing function. 

1642 i.e., - sum (log (diff (cdf (x, theta))), axis=0) + penalty 

1643 where theta are the parameters (including loc and scale) 

1644 Follows reference [1] of scipy.stats.fit 

1645 """ 

1646 loc, scale, args = self._unpack_loc_scale(theta) 

1647 if not self._argcheck(*args) or scale <= 0: 

1648 return inf 

1649 x = (np.sort(x) - loc)/scale 

1650 

1651 def log_psf(x, *args): 

1652 x, lj = np.unique(x, return_counts=True) # fast for sorted x 

1653 cdf_data = self._cdf(x, *args) if x.size else [] 

1654 if not (x.size and 1 - cdf_data[-1] <= 0): 

1655 cdf = np.concatenate(([0], cdf_data, [1])) 

1656 lj = np.concatenate((lj, [1])) 

1657 else: 

1658 cdf = np.concatenate(([0], cdf_data)) 

1659 # here we could use logcdf w/ logsumexp trick to take differences, 

1660 # but in the context of the method, it seems unlikely to matter 

1661 return lj * np.log(np.diff(cdf) / lj) 

1662 

1663 return self._nlff_and_penalty(x, args, log_psf) 

1664 

1665 

1666class _ShapeInfo: 

1667 def __init__(self, name, integrality=False, domain=(-np.inf, np.inf), 

1668 inclusive=(True, True)): 

1669 self.name = name 

1670 self.integrality = integrality 

1671 

1672 domain = list(domain) 

1673 if np.isfinite(domain[0]) and not inclusive[0]: 

1674 domain[0] = np.nextafter(domain[0], np.inf) 

1675 if np.isfinite(domain[1]) and not inclusive[1]: 

1676 domain[1] = np.nextafter(domain[1], -np.inf) 

1677 self.domain = domain 

1678 

1679 

1680def _get_fixed_fit_value(kwds, names): 

1681 """ 

1682 Given names such as `['f0', 'fa', 'fix_a']`, check that there is 

1683 at most one non-None value in `kwds` associaed with those names. 

1684 Return that value, or None if none of the names occur in `kwds`. 

1685 As a side effect, all occurrences of those names in `kwds` are 

1686 removed. 

1687 """ 

1688 vals = [(name, kwds.pop(name)) for name in names if name in kwds] 

1689 if len(vals) > 1: 

1690 repeated = [name for name, val in vals] 

1691 raise ValueError("fit method got multiple keyword arguments to " 

1692 "specify the same fixed parameter: " + 

1693 ', '.join(repeated)) 

1694 return vals[0][1] if vals else None 

1695 

1696 

1697# continuous random variables: implement maybe later 

1698# 

1699# hf --- Hazard Function (PDF / SF) 

1700# chf --- Cumulative hazard function (-log(SF)) 

1701# psf --- Probability sparsity function (reciprocal of the pdf) in 

1702# units of percent-point-function (as a function of q). 

1703# Also, the derivative of the percent-point function. 

1704 

1705 

1706class rv_continuous(rv_generic): 

1707 """A generic continuous random variable class meant for subclassing. 

1708 

1709 `rv_continuous` is a base class to construct specific distribution classes 

1710 and instances for continuous random variables. It cannot be used 

1711 directly as a distribution. 

1712 

1713 Parameters 

1714 ---------- 

1715 momtype : int, optional 

1716 The type of generic moment calculation to use: 0 for pdf, 1 (default) 

1717 for ppf. 

1718 a : float, optional 

1719 Lower bound of the support of the distribution, default is minus 

1720 infinity. 

1721 b : float, optional 

1722 Upper bound of the support of the distribution, default is plus 

1723 infinity. 

1724 xtol : float, optional 

1725 The tolerance for fixed point calculation for generic ppf. 

1726 badvalue : float, optional 

1727 The value in a result arrays that indicates a value that for which 

1728 some argument restriction is violated, default is np.nan. 

1729 name : str, optional 

1730 The name of the instance. This string is used to construct the default 

1731 example for distributions. 

1732 longname : str, optional 

1733 This string is used as part of the first line of the docstring returned 

1734 when a subclass has no docstring of its own. Note: `longname` exists 

1735 for backwards compatibility, do not use for new subclasses. 

1736 shapes : str, optional 

1737 The shape of the distribution. For example ``"m, n"`` for a 

1738 distribution that takes two integers as the two shape arguments for all 

1739 its methods. If not provided, shape parameters will be inferred from 

1740 the signature of the private methods, ``_pdf`` and ``_cdf`` of the 

1741 instance. 

1742 extradoc : str, optional, deprecated 

1743 This string is used as the last part of the docstring returned when a 

1744 subclass has no docstring of its own. Note: `extradoc` exists for 

1745 backwards compatibility and will be removed in SciPy 1.11.0, do not 

1746 use for new subclasses. 

1747 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional 

1748 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

1749 singleton is used. 

1750 If `seed` is an int, a new ``RandomState`` instance is used, 

1751 seeded with `seed`. 

1752 If `seed` is already a ``Generator`` or ``RandomState`` instance then 

1753 that instance is used. 

1754 

1755 Methods 

1756 ------- 

1757 rvs 

1758 pdf 

1759 logpdf 

1760 cdf 

1761 logcdf 

1762 sf 

1763 logsf 

1764 ppf 

1765 isf 

1766 moment 

1767 stats 

1768 entropy 

1769 expect 

1770 median 

1771 mean 

1772 std 

1773 var 

1774 interval 

1775 __call__ 

1776 fit 

1777 fit_loc_scale 

1778 nnlf 

1779 support 

1780 

1781 Notes 

1782 ----- 

1783 Public methods of an instance of a distribution class (e.g., ``pdf``, 

1784 ``cdf``) check their arguments and pass valid arguments to private, 

1785 computational methods (``_pdf``, ``_cdf``). For ``pdf(x)``, ``x`` is valid 

1786 if it is within the support of the distribution. 

1787 Whether a shape parameter is valid is decided by an ``_argcheck`` method 

1788 (which defaults to checking that its arguments are strictly positive.) 

1789 

1790 **Subclassing** 

1791 

1792 New random variables can be defined by subclassing the `rv_continuous` class 

1793 and re-defining at least the ``_pdf`` or the ``_cdf`` method (normalized 

1794 to location 0 and scale 1). 

1795 

1796 If positive argument checking is not correct for your RV 

1797 then you will also need to re-define the ``_argcheck`` method. 

1798 

1799 For most of the scipy.stats distributions, the support interval doesn't 

1800 depend on the shape parameters. ``x`` being in the support interval is 

1801 equivalent to ``self.a <= x <= self.b``. If either of the endpoints of 

1802 the support do depend on the shape parameters, then 

1803 i) the distribution must implement the ``_get_support`` method; and 

1804 ii) those dependent endpoints must be omitted from the distribution's 

1805 call to the ``rv_continuous`` initializer. 

1806 

1807 Correct, but potentially slow defaults exist for the remaining 

1808 methods but for speed and/or accuracy you can over-ride:: 

1809 

1810 _logpdf, _cdf, _logcdf, _ppf, _rvs, _isf, _sf, _logsf 

1811 

1812 The default method ``_rvs`` relies on the inverse of the cdf, ``_ppf``, 

1813 applied to a uniform random variate. In order to generate random variates 

1814 efficiently, either the default ``_ppf`` needs to be overwritten (e.g. 

1815 if the inverse cdf can expressed in an explicit form) or a sampling 

1816 method needs to be implemented in a custom ``_rvs`` method. 

1817 

1818 If possible, you should override ``_isf``, ``_sf`` or ``_logsf``. 

1819 The main reason would be to improve numerical accuracy: for example, 

1820 the survival function ``_sf`` is computed as ``1 - _cdf`` which can 

1821 result in loss of precision if ``_cdf(x)`` is close to one. 

1822 

1823 **Methods that can be overwritten by subclasses** 

1824 :: 

1825 

1826 _rvs 

1827 _pdf 

1828 _cdf 

1829 _sf 

1830 _ppf 

1831 _isf 

1832 _stats 

1833 _munp 

1834 _entropy 

1835 _argcheck 

1836 _get_support 

1837 

1838 There are additional (internal and private) generic methods that can 

1839 be useful for cross-checking and for debugging, but might work in all 

1840 cases when directly called. 

1841 

1842 A note on ``shapes``: subclasses need not specify them explicitly. In this 

1843 case, `shapes` will be automatically deduced from the signatures of the 

1844 overridden methods (`pdf`, `cdf` etc). 

1845 If, for some reason, you prefer to avoid relying on introspection, you can 

1846 specify ``shapes`` explicitly as an argument to the instance constructor. 

1847 

1848 

1849 **Frozen Distributions** 

1850 

1851 Normally, you must provide shape parameters (and, optionally, location and 

1852 scale parameters to each call of a method of a distribution. 

1853 

1854 Alternatively, the object may be called (as a function) to fix the shape, 

1855 location, and scale parameters returning a "frozen" continuous RV object: 

1856 

1857 rv = generic(<shape(s)>, loc=0, scale=1) 

1858 `rv_frozen` object with the same methods but holding the given shape, 

1859 location, and scale fixed 

1860 

1861 **Statistics** 

1862 

1863 Statistics are computed using numerical integration by default. 

1864 For speed you can redefine this using ``_stats``: 

1865 

1866 - take shape parameters and return mu, mu2, g1, g2 

1867 - If you can't compute one of these, return it as None 

1868 - Can also be defined with a keyword argument ``moments``, which is a 

1869 string composed of "m", "v", "s", and/or "k". 

1870 Only the components appearing in string should be computed and 

1871 returned in the order "m", "v", "s", or "k" with missing values 

1872 returned as None. 

1873 

1874 Alternatively, you can override ``_munp``, which takes ``n`` and shape 

1875 parameters and returns the n-th non-central moment of the distribution. 

1876 

1877 Examples 

1878 -------- 

1879 To create a new Gaussian distribution, we would do the following: 

1880 

1881 >>> from scipy.stats import rv_continuous 

1882 >>> class gaussian_gen(rv_continuous): 

1883 ... "Gaussian distribution" 

1884 ... def _pdf(self, x): 

1885 ... return np.exp(-x**2 / 2.) / np.sqrt(2.0 * np.pi) 

1886 >>> gaussian = gaussian_gen(name='gaussian') 

1887 

1888 ``scipy.stats`` distributions are *instances*, so here we subclass 

1889 `rv_continuous` and create an instance. With this, we now have 

1890 a fully functional distribution with all relevant methods automagically 

1891 generated by the framework. 

1892 

1893 Note that above we defined a standard normal distribution, with zero mean 

1894 and unit variance. Shifting and scaling of the distribution can be done 

1895 by using ``loc`` and ``scale`` parameters: ``gaussian.pdf(x, loc, scale)`` 

1896 essentially computes ``y = (x - loc) / scale`` and 

1897 ``gaussian._pdf(y) / scale``. 

1898 

1899 """ 

1900 def __init__(self, momtype=1, a=None, b=None, xtol=1e-14, 

1901 badvalue=None, name=None, longname=None, 

1902 shapes=None, extradoc=None, seed=None): 

1903 

1904 super().__init__(seed) 

1905 

1906 if extradoc is not None: 

1907 warnings.warn("extradoc is deprecated and will be removed in " 

1908 "SciPy 1.11.0", DeprecationWarning) 

1909 

1910 # save the ctor parameters, cf generic freeze 

1911 self._ctor_param = dict( 

1912 momtype=momtype, a=a, b=b, xtol=xtol, 

1913 badvalue=badvalue, name=name, longname=longname, 

1914 shapes=shapes, extradoc=extradoc, seed=seed) 

1915 

1916 if badvalue is None: 

1917 badvalue = nan 

1918 if name is None: 

1919 name = 'Distribution' 

1920 self.badvalue = badvalue 

1921 self.name = name 

1922 self.a = a 

1923 self.b = b 

1924 if a is None: 

1925 self.a = -inf 

1926 if b is None: 

1927 self.b = inf 

1928 self.xtol = xtol 

1929 self.moment_type = momtype 

1930 self.shapes = shapes 

1931 self.extradoc = extradoc 

1932 

1933 self._construct_argparser(meths_to_inspect=[self._pdf, self._cdf], 

1934 locscale_in='loc=0, scale=1', 

1935 locscale_out='loc, scale') 

1936 self._attach_methods() 

1937 

1938 if longname is None: 

1939 if name[0] in ['aeiouAEIOU']: 

1940 hstr = "An " 

1941 else: 

1942 hstr = "A " 

1943 longname = hstr + name 

1944 

1945 if sys.flags.optimize < 2: 

1946 # Skip adding docstrings if interpreter is run with -OO 

1947 if self.__doc__ is None: 

1948 self._construct_default_doc(longname=longname, 

1949 extradoc=extradoc, 

1950 docdict=docdict, 

1951 discrete='continuous') 

1952 else: 

1953 dct = dict(distcont) 

1954 self._construct_doc(docdict, dct.get(self.name)) 

1955 

1956 def __getstate__(self): 

1957 dct = self.__dict__.copy() 

1958 

1959 # these methods will be remade in __setstate__ 

1960 # _random_state attribute is taken care of by rv_generic 

1961 attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs", 

1962 "_cdfvec", "_ppfvec", "vecentropy", "generic_moment"] 

1963 [dct.pop(attr, None) for attr in attrs] 

1964 return dct 

1965 

1966 def _attach_methods(self): 

1967 """ 

1968 Attaches dynamically created methods to the rv_continuous instance. 

1969 """ 

1970 # _attach_methods is responsible for calling _attach_argparser_methods 

1971 self._attach_argparser_methods() 

1972 

1973 # nin correction 

1974 self._ppfvec = vectorize(self._ppf_single, otypes='d') 

1975 self._ppfvec.nin = self.numargs + 1 

1976 self.vecentropy = vectorize(self._entropy, otypes='d') 

1977 self._cdfvec = vectorize(self._cdf_single, otypes='d') 

1978 self._cdfvec.nin = self.numargs + 1 

1979 

1980 if self.moment_type == 0: 

1981 self.generic_moment = vectorize(self._mom0_sc, otypes='d') 

1982 else: 

1983 self.generic_moment = vectorize(self._mom1_sc, otypes='d') 

1984 # Because of the *args argument of _mom0_sc, vectorize cannot count the 

1985 # number of arguments correctly. 

1986 self.generic_moment.nin = self.numargs + 1 

1987 

1988 def _updated_ctor_param(self): 

1989 """Return the current version of _ctor_param, possibly updated by user. 

1990 

1991 Used by freezing. 

1992 Keep this in sync with the signature of __init__. 

1993 """ 

1994 dct = self._ctor_param.copy() 

1995 dct['a'] = self.a 

1996 dct['b'] = self.b 

1997 dct['xtol'] = self.xtol 

1998 dct['badvalue'] = self.badvalue 

1999 dct['name'] = self.name 

2000 dct['shapes'] = self.shapes 

2001 dct['extradoc'] = self.extradoc 

2002 return dct 

2003 

2004 def _ppf_to_solve(self, x, q, *args): 

2005 return self.cdf(*(x, )+args)-q 

2006 

2007 def _ppf_single(self, q, *args): 

2008 factor = 10. 

2009 left, right = self._get_support(*args) 

2010 

2011 if np.isinf(left): 

2012 left = min(-factor, right) 

2013 while self._ppf_to_solve(left, q, *args) > 0.: 

2014 left, right = left * factor, left 

2015 # left is now such that cdf(left) <= q 

2016 # if right has changed, then cdf(right) > q 

2017 

2018 if np.isinf(right): 

2019 right = max(factor, left) 

2020 while self._ppf_to_solve(right, q, *args) < 0.: 

2021 left, right = right, right * factor 

2022 # right is now such that cdf(right) >= q 

2023 

2024 return optimize.brentq(self._ppf_to_solve, 

2025 left, right, args=(q,)+args, xtol=self.xtol) 

2026 

2027 # moment from definition 

2028 def _mom_integ0(self, x, m, *args): 

2029 return x**m * self.pdf(x, *args) 

2030 

2031 def _mom0_sc(self, m, *args): 

2032 _a, _b = self._get_support(*args) 

2033 return integrate.quad(self._mom_integ0, _a, _b, 

2034 args=(m,)+args)[0] 

2035 

2036 # moment calculated using ppf 

2037 def _mom_integ1(self, q, m, *args): 

2038 return (self.ppf(q, *args))**m 

2039 

2040 def _mom1_sc(self, m, *args): 

2041 return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0] 

2042 

2043 def _pdf(self, x, *args): 

2044 return _derivative(self._cdf, x, dx=1e-5, args=args, order=5) 

2045 

2046 # Could also define any of these 

2047 def _logpdf(self, x, *args): 

2048 p = self._pdf(x, *args) 

2049 with np.errstate(divide='ignore'): 

2050 return log(p) 

2051 

2052 def _logpxf(self, x, *args): 

2053 # continuous distributions have PDF, discrete have PMF, but sometimes 

2054 # the distinction doesn't matter. This lets us use `_logpxf` for both 

2055 # discrete and continuous distributions. 

2056 return self._logpdf(x, *args) 

2057 

2058 def _cdf_single(self, x, *args): 

2059 _a, _b = self._get_support(*args) 

2060 return integrate.quad(self._pdf, _a, x, args=args)[0] 

2061 

2062 def _cdf(self, x, *args): 

2063 return self._cdfvec(x, *args) 

2064 

2065 # generic _argcheck, _logcdf, _sf, _logsf, _ppf, _isf, _rvs are defined 

2066 # in rv_generic 

2067 

2068 def pdf(self, x, *args, **kwds): 

2069 """Probability density function at x of the given RV. 

2070 

2071 Parameters 

2072 ---------- 

2073 x : array_like 

2074 quantiles 

2075 arg1, arg2, arg3,... : array_like 

2076 The shape parameter(s) for the distribution (see docstring of the 

2077 instance object for more information) 

2078 loc : array_like, optional 

2079 location parameter (default=0) 

2080 scale : array_like, optional 

2081 scale parameter (default=1) 

2082 

2083 Returns 

2084 ------- 

2085 pdf : ndarray 

2086 Probability density function evaluated at x 

2087 

2088 """ 

2089 args, loc, scale = self._parse_args(*args, **kwds) 

2090 x, loc, scale = map(asarray, (x, loc, scale)) 

2091 args = tuple(map(asarray, args)) 

2092 dtyp = np.promote_types(x.dtype, np.float64) 

2093 x = np.asarray((x - loc)/scale, dtype=dtyp) 

2094 cond0 = self._argcheck(*args) & (scale > 0) 

2095 cond1 = self._support_mask(x, *args) & (scale > 0) 

2096 cond = cond0 & cond1 

2097 output = zeros(shape(cond), dtyp) 

2098 putmask(output, (1-cond0)+np.isnan(x), self.badvalue) 

2099 if np.any(cond): 

2100 goodargs = argsreduce(cond, *((x,)+args+(scale,))) 

2101 scale, goodargs = goodargs[-1], goodargs[:-1] 

2102 place(output, cond, self._pdf(*goodargs) / scale) 

2103 if output.ndim == 0: 

2104 return output[()] 

2105 return output 

2106 

2107 def logpdf(self, x, *args, **kwds): 

2108 """Log of the probability density function at x of the given RV. 

2109 

2110 This uses a more numerically accurate calculation if available. 

2111 

2112 Parameters 

2113 ---------- 

2114 x : array_like 

2115 quantiles 

2116 arg1, arg2, arg3,... : array_like 

2117 The shape parameter(s) for the distribution (see docstring of the 

2118 instance object for more information) 

2119 loc : array_like, optional 

2120 location parameter (default=0) 

2121 scale : array_like, optional 

2122 scale parameter (default=1) 

2123 

2124 Returns 

2125 ------- 

2126 logpdf : array_like 

2127 Log of the probability density function evaluated at x 

2128 

2129 """ 

2130 args, loc, scale = self._parse_args(*args, **kwds) 

2131 x, loc, scale = map(asarray, (x, loc, scale)) 

2132 args = tuple(map(asarray, args)) 

2133 dtyp = np.promote_types(x.dtype, np.float64) 

2134 x = np.asarray((x - loc)/scale, dtype=dtyp) 

2135 cond0 = self._argcheck(*args) & (scale > 0) 

2136 cond1 = self._support_mask(x, *args) & (scale > 0) 

2137 cond = cond0 & cond1 

2138 output = empty(shape(cond), dtyp) 

2139 output.fill(NINF) 

2140 putmask(output, (1-cond0)+np.isnan(x), self.badvalue) 

2141 if np.any(cond): 

2142 goodargs = argsreduce(cond, *((x,)+args+(scale,))) 

2143 scale, goodargs = goodargs[-1], goodargs[:-1] 

2144 place(output, cond, self._logpdf(*goodargs) - log(scale)) 

2145 if output.ndim == 0: 

2146 return output[()] 

2147 return output 

2148 

2149 def cdf(self, x, *args, **kwds): 

2150 """ 

2151 Cumulative distribution function of the given RV. 

2152 

2153 Parameters 

2154 ---------- 

2155 x : array_like 

2156 quantiles 

2157 arg1, arg2, arg3,... : array_like 

2158 The shape parameter(s) for the distribution (see docstring of the 

2159 instance object for more information) 

2160 loc : array_like, optional 

2161 location parameter (default=0) 

2162 scale : array_like, optional 

2163 scale parameter (default=1) 

2164 

2165 Returns 

2166 ------- 

2167 cdf : ndarray 

2168 Cumulative distribution function evaluated at `x` 

2169 

2170 """ 

2171 args, loc, scale = self._parse_args(*args, **kwds) 

2172 x, loc, scale = map(asarray, (x, loc, scale)) 

2173 args = tuple(map(asarray, args)) 

2174 _a, _b = self._get_support(*args) 

2175 dtyp = np.promote_types(x.dtype, np.float64) 

2176 x = np.asarray((x - loc)/scale, dtype=dtyp) 

2177 cond0 = self._argcheck(*args) & (scale > 0) 

2178 cond1 = self._open_support_mask(x, *args) & (scale > 0) 

2179 cond2 = (x >= np.asarray(_b)) & cond0 

2180 cond = cond0 & cond1 

2181 output = zeros(shape(cond), dtyp) 

2182 place(output, (1-cond0)+np.isnan(x), self.badvalue) 

2183 place(output, cond2, 1.0) 

2184 if np.any(cond): # call only if at least 1 entry 

2185 goodargs = argsreduce(cond, *((x,)+args)) 

2186 place(output, cond, self._cdf(*goodargs)) 

2187 if output.ndim == 0: 

2188 return output[()] 

2189 return output 

2190 

2191 def logcdf(self, x, *args, **kwds): 

2192 """Log of the cumulative distribution function at x of the given RV. 

2193 

2194 Parameters 

2195 ---------- 

2196 x : array_like 

2197 quantiles 

2198 arg1, arg2, arg3,... : array_like 

2199 The shape parameter(s) for the distribution (see docstring of the 

2200 instance object for more information) 

2201 loc : array_like, optional 

2202 location parameter (default=0) 

2203 scale : array_like, optional 

2204 scale parameter (default=1) 

2205 

2206 Returns 

2207 ------- 

2208 logcdf : array_like 

2209 Log of the cumulative distribution function evaluated at x 

2210 

2211 """ 

2212 args, loc, scale = self._parse_args(*args, **kwds) 

2213 x, loc, scale = map(asarray, (x, loc, scale)) 

2214 args = tuple(map(asarray, args)) 

2215 _a, _b = self._get_support(*args) 

2216 dtyp = np.promote_types(x.dtype, np.float64) 

2217 x = np.asarray((x - loc)/scale, dtype=dtyp) 

2218 cond0 = self._argcheck(*args) & (scale > 0) 

2219 cond1 = self._open_support_mask(x, *args) & (scale > 0) 

2220 cond2 = (x >= _b) & cond0 

2221 cond = cond0 & cond1 

2222 output = empty(shape(cond), dtyp) 

2223 output.fill(NINF) 

2224 place(output, (1-cond0)*(cond1 == cond1)+np.isnan(x), self.badvalue) 

2225 place(output, cond2, 0.0) 

2226 if np.any(cond): # call only if at least 1 entry 

2227 goodargs = argsreduce(cond, *((x,)+args)) 

2228 place(output, cond, self._logcdf(*goodargs)) 

2229 if output.ndim == 0: 

2230 return output[()] 

2231 return output 

2232 

2233 def sf(self, x, *args, **kwds): 

2234 """Survival function (1 - `cdf`) at x of the given RV. 

2235 

2236 Parameters 

2237 ---------- 

2238 x : array_like 

2239 quantiles 

2240 arg1, arg2, arg3,... : array_like 

2241 The shape parameter(s) for the distribution (see docstring of the 

2242 instance object for more information) 

2243 loc : array_like, optional 

2244 location parameter (default=0) 

2245 scale : array_like, optional 

2246 scale parameter (default=1) 

2247 

2248 Returns 

2249 ------- 

2250 sf : array_like 

2251 Survival function evaluated at x 

2252 

2253 """ 

2254 args, loc, scale = self._parse_args(*args, **kwds) 

2255 x, loc, scale = map(asarray, (x, loc, scale)) 

2256 args = tuple(map(asarray, args)) 

2257 _a, _b = self._get_support(*args) 

2258 dtyp = np.promote_types(x.dtype, np.float64) 

2259 x = np.asarray((x - loc)/scale, dtype=dtyp) 

2260 cond0 = self._argcheck(*args) & (scale > 0) 

2261 cond1 = self._open_support_mask(x, *args) & (scale > 0) 

2262 cond2 = cond0 & (x <= _a) 

2263 cond = cond0 & cond1 

2264 output = zeros(shape(cond), dtyp) 

2265 place(output, (1-cond0)+np.isnan(x), self.badvalue) 

2266 place(output, cond2, 1.0) 

2267 if np.any(cond): 

2268 goodargs = argsreduce(cond, *((x,)+args)) 

2269 place(output, cond, self._sf(*goodargs)) 

2270 if output.ndim == 0: 

2271 return output[()] 

2272 return output 

2273 

2274 def logsf(self, x, *args, **kwds): 

2275 """Log of the survival function of the given RV. 

2276 

2277 Returns the log of the "survival function," defined as (1 - `cdf`), 

2278 evaluated at `x`. 

2279 

2280 Parameters 

2281 ---------- 

2282 x : array_like 

2283 quantiles 

2284 arg1, arg2, arg3,... : array_like 

2285 The shape parameter(s) for the distribution (see docstring of the 

2286 instance object for more information) 

2287 loc : array_like, optional 

2288 location parameter (default=0) 

2289 scale : array_like, optional 

2290 scale parameter (default=1) 

2291 

2292 Returns 

2293 ------- 

2294 logsf : ndarray 

2295 Log of the survival function evaluated at `x`. 

2296 

2297 """ 

2298 args, loc, scale = self._parse_args(*args, **kwds) 

2299 x, loc, scale = map(asarray, (x, loc, scale)) 

2300 args = tuple(map(asarray, args)) 

2301 _a, _b = self._get_support(*args) 

2302 dtyp = np.promote_types(x.dtype, np.float64) 

2303 x = np.asarray((x - loc)/scale, dtype=dtyp) 

2304 cond0 = self._argcheck(*args) & (scale > 0) 

2305 cond1 = self._open_support_mask(x, *args) & (scale > 0) 

2306 cond2 = cond0 & (x <= _a) 

2307 cond = cond0 & cond1 

2308 output = empty(shape(cond), dtyp) 

2309 output.fill(NINF) 

2310 place(output, (1-cond0)+np.isnan(x), self.badvalue) 

2311 place(output, cond2, 0.0) 

2312 if np.any(cond): 

2313 goodargs = argsreduce(cond, *((x,)+args)) 

2314 place(output, cond, self._logsf(*goodargs)) 

2315 if output.ndim == 0: 

2316 return output[()] 

2317 return output 

2318 

2319 def ppf(self, q, *args, **kwds): 

2320 """Percent point function (inverse of `cdf`) at q of the given RV. 

2321 

2322 Parameters 

2323 ---------- 

2324 q : array_like 

2325 lower tail probability 

2326 arg1, arg2, arg3,... : array_like 

2327 The shape parameter(s) for the distribution (see docstring of the 

2328 instance object for more information) 

2329 loc : array_like, optional 

2330 location parameter (default=0) 

2331 scale : array_like, optional 

2332 scale parameter (default=1) 

2333 

2334 Returns 

2335 ------- 

2336 x : array_like 

2337 quantile corresponding to the lower tail probability q. 

2338 

2339 """ 

2340 args, loc, scale = self._parse_args(*args, **kwds) 

2341 q, loc, scale = map(asarray, (q, loc, scale)) 

2342 args = tuple(map(asarray, args)) 

2343 _a, _b = self._get_support(*args) 

2344 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc) 

2345 cond1 = (0 < q) & (q < 1) 

2346 cond2 = cond0 & (q == 0) 

2347 cond3 = cond0 & (q == 1) 

2348 cond = cond0 & cond1 

2349 output = np.full(shape(cond), fill_value=self.badvalue) 

2350 

2351 lower_bound = _a * scale + loc 

2352 upper_bound = _b * scale + loc 

2353 place(output, cond2, argsreduce(cond2, lower_bound)[0]) 

2354 place(output, cond3, argsreduce(cond3, upper_bound)[0]) 

2355 

2356 if np.any(cond): # call only if at least 1 entry 

2357 goodargs = argsreduce(cond, *((q,)+args+(scale, loc))) 

2358 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2] 

2359 place(output, cond, self._ppf(*goodargs) * scale + loc) 

2360 if output.ndim == 0: 

2361 return output[()] 

2362 return output 

2363 

2364 def isf(self, q, *args, **kwds): 

2365 """Inverse survival function (inverse of `sf`) at q of the given RV. 

2366 

2367 Parameters 

2368 ---------- 

2369 q : array_like 

2370 upper tail probability 

2371 arg1, arg2, arg3,... : array_like 

2372 The shape parameter(s) for the distribution (see docstring of the 

2373 instance object for more information) 

2374 loc : array_like, optional 

2375 location parameter (default=0) 

2376 scale : array_like, optional 

2377 scale parameter (default=1) 

2378 

2379 Returns 

2380 ------- 

2381 x : ndarray or scalar 

2382 Quantile corresponding to the upper tail probability q. 

2383 

2384 """ 

2385 args, loc, scale = self._parse_args(*args, **kwds) 

2386 q, loc, scale = map(asarray, (q, loc, scale)) 

2387 args = tuple(map(asarray, args)) 

2388 _a, _b = self._get_support(*args) 

2389 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc) 

2390 cond1 = (0 < q) & (q < 1) 

2391 cond2 = cond0 & (q == 1) 

2392 cond3 = cond0 & (q == 0) 

2393 cond = cond0 & cond1 

2394 output = np.full(shape(cond), fill_value=self.badvalue) 

2395 

2396 lower_bound = _a * scale + loc 

2397 upper_bound = _b * scale + loc 

2398 place(output, cond2, argsreduce(cond2, lower_bound)[0]) 

2399 place(output, cond3, argsreduce(cond3, upper_bound)[0]) 

2400 

2401 if np.any(cond): 

2402 goodargs = argsreduce(cond, *((q,)+args+(scale, loc))) 

2403 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2] 

2404 place(output, cond, self._isf(*goodargs) * scale + loc) 

2405 if output.ndim == 0: 

2406 return output[()] 

2407 return output 

2408 

2409 def _unpack_loc_scale(self, theta): 

2410 try: 

2411 loc = theta[-2] 

2412 scale = theta[-1] 

2413 args = tuple(theta[:-2]) 

2414 except IndexError as e: 

2415 raise ValueError("Not enough input arguments.") from e 

2416 return loc, scale, args 

2417 

2418 def _fitstart(self, data, args=None): 

2419 """Starting point for fit (shape arguments + loc + scale).""" 

2420 if args is None: 

2421 args = (1.0,)*self.numargs 

2422 loc, scale = self._fit_loc_scale_support(data, *args) 

2423 return args + (loc, scale) 

2424 

2425 def _reduce_func(self, args, kwds, data=None): 

2426 """ 

2427 Return the (possibly reduced) function to optimize in order to find MLE 

2428 estimates for the .fit method. 

2429 """ 

2430 # Convert fixed shape parameters to the standard numeric form: e.g. for 

2431 # stats.beta, shapes='a, b'. To fix `a`, the caller can give a value 

2432 # for `f0`, `fa` or 'fix_a'. The following converts the latter two 

2433 # into the first (numeric) form. 

2434 shapes = [] 

2435 if self.shapes: 

2436 shapes = self.shapes.replace(',', ' ').split() 

2437 for j, s in enumerate(shapes): 

2438 key = 'f' + str(j) 

2439 names = [key, 'f' + s, 'fix_' + s] 

2440 val = _get_fixed_fit_value(kwds, names) 

2441 if val is not None: 

2442 kwds[key] = val 

2443 

2444 args = list(args) 

2445 Nargs = len(args) 

2446 fixedn = [] 

2447 names = ['f%d' % n for n in range(Nargs - 2)] + ['floc', 'fscale'] 

2448 x0 = [] 

2449 for n, key in enumerate(names): 

2450 if key in kwds: 

2451 fixedn.append(n) 

2452 args[n] = kwds.pop(key) 

2453 else: 

2454 x0.append(args[n]) 

2455 

2456 methods = {"mle", "mm"} 

2457 method = kwds.pop('method', "mle").lower() 

2458 if method == "mm": 

2459 n_params = len(shapes) + 2 - len(fixedn) 

2460 exponents = (np.arange(1, n_params+1))[:, np.newaxis] 

2461 data_moments = np.sum(data[None, :]**exponents/len(data), axis=1) 

2462 

2463 def objective(theta, x): 

2464 return self._moment_error(theta, x, data_moments) 

2465 elif method == "mle": 

2466 objective = self._penalized_nnlf 

2467 else: 

2468 raise ValueError("Method '{0}' not available; must be one of {1}" 

2469 .format(method, methods)) 

2470 

2471 if len(fixedn) == 0: 

2472 func = objective 

2473 restore = None 

2474 else: 

2475 if len(fixedn) == Nargs: 

2476 raise ValueError( 

2477 "All parameters fixed. There is nothing to optimize.") 

2478 

2479 def restore(args, theta): 

2480 # Replace with theta for all numbers not in fixedn 

2481 # This allows the non-fixed values to vary, but 

2482 # we still call self.nnlf with all parameters. 

2483 i = 0 

2484 for n in range(Nargs): 

2485 if n not in fixedn: 

2486 args[n] = theta[i] 

2487 i += 1 

2488 return args 

2489 

2490 def func(theta, x): 

2491 newtheta = restore(args[:], theta) 

2492 return objective(newtheta, x) 

2493 

2494 return x0, func, restore, args 

2495 

2496 def _moment_error(self, theta, x, data_moments): 

2497 loc, scale, args = self._unpack_loc_scale(theta) 

2498 if not self._argcheck(*args) or scale <= 0: 

2499 return inf 

2500 

2501 dist_moments = np.array([self.moment(i+1, *args, loc=loc, scale=scale) 

2502 for i in range(len(data_moments))]) 

2503 if np.any(np.isnan(dist_moments)): 

2504 raise ValueError("Method of moments encountered a non-finite " 

2505 "distribution moment and cannot continue. " 

2506 "Consider trying method='MLE'.") 

2507 

2508 return (((data_moments - dist_moments) / 

2509 np.maximum(np.abs(data_moments), 1e-8))**2).sum() 

2510 

2511 def fit(self, data, *args, **kwds): 

2512 """ 

2513 Return estimates of shape (if applicable), location, and scale 

2514 parameters from data. The default estimation method is Maximum 

2515 Likelihood Estimation (MLE), but Method of Moments (MM) 

2516 is also available. 

2517 

2518 Starting estimates for 

2519 the fit are given by input arguments; for any arguments not provided 

2520 with starting estimates, ``self._fitstart(data)`` is called to generate 

2521 such. 

2522 

2523 One can hold some parameters fixed to specific values by passing in 

2524 keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters) 

2525 and ``floc`` and ``fscale`` (for location and scale parameters, 

2526 respectively). 

2527 

2528 Parameters 

2529 ---------- 

2530 data : array_like 

2531 Data to use in estimating the distribution parameters. 

2532 arg1, arg2, arg3,... : floats, optional 

2533 Starting value(s) for any shape-characterizing arguments (those not 

2534 provided will be determined by a call to ``_fitstart(data)``). 

2535 No default value. 

2536 **kwds : floats, optional 

2537 - `loc`: initial guess of the distribution's location parameter. 

2538 - `scale`: initial guess of the distribution's scale parameter. 

2539 

2540 Special keyword arguments are recognized as holding certain 

2541 parameters fixed: 

2542 

2543 - f0...fn : hold respective shape parameters fixed. 

2544 Alternatively, shape parameters to fix can be specified by name. 

2545 For example, if ``self.shapes == "a, b"``, ``fa`` and ``fix_a`` 

2546 are equivalent to ``f0``, and ``fb`` and ``fix_b`` are 

2547 equivalent to ``f1``. 

2548 

2549 - floc : hold location parameter fixed to specified value. 

2550 

2551 - fscale : hold scale parameter fixed to specified value. 

2552 

2553 - optimizer : The optimizer to use. 

2554 The optimizer must take ``func``, 

2555 and starting position as the first two arguments, 

2556 plus ``args`` (for extra arguments to pass to the 

2557 function to be optimized) and ``disp=0`` to suppress 

2558 output as keyword arguments. 

2559 

2560 - method : The method to use. The default is "MLE" (Maximum 

2561 Likelihood Estimate); "MM" (Method of Moments) 

2562 is also available. 

2563 

2564 Raises 

2565 ------ 

2566 TypeError, ValueError 

2567 If an input is invalid 

2568 `~scipy.stats.FitError` 

2569 If fitting fails or the fit produced would be invalid 

2570 

2571 Returns 

2572 ------- 

2573 parameter_tuple : tuple of floats 

2574 Estimates for any shape parameters (if applicable), 

2575 followed by those for location and scale. 

2576 For most random variables, shape statistics 

2577 will be returned, but there are exceptions (e.g. ``norm``). 

2578 

2579 Notes 

2580 ----- 

2581 With ``method="MLE"`` (default), the fit is computed by minimizing 

2582 the negative log-likelihood function. A large, finite penalty 

2583 (rather than infinite negative log-likelihood) is applied for 

2584 observations beyond the support of the distribution. 

2585 

2586 With ``method="MM"``, the fit is computed by minimizing the L2 norm 

2587 of the relative errors between the first *k* raw (about zero) data 

2588 moments and the corresponding distribution moments, where *k* is the 

2589 number of non-fixed parameters. 

2590 More precisely, the objective function is:: 

2591 

2592 (((data_moments - dist_moments) 

2593 / np.maximum(np.abs(data_moments), 1e-8))**2).sum() 

2594 

2595 where the constant ``1e-8`` avoids division by zero in case of 

2596 vanishing data moments. Typically, this error norm can be reduced to 

2597 zero. 

2598 Note that the standard method of moments can produce parameters for 

2599 which some data are outside the support of the fitted distribution; 

2600 this implementation does nothing to prevent this. 

2601 

2602 For either method, 

2603 the returned answer is not guaranteed to be globally optimal; it 

2604 may only be locally optimal, or the optimization may fail altogether. 

2605 If the data contain any of ``np.nan``, ``np.inf``, or ``-np.inf``, 

2606 the `fit` method will raise a ``RuntimeError``. 

2607 

2608 Examples 

2609 -------- 

2610 

2611 Generate some data to fit: draw random variates from the `beta` 

2612 distribution 

2613 

2614 >>> from scipy.stats import beta 

2615 >>> a, b = 1., 2. 

2616 >>> x = beta.rvs(a, b, size=1000) 

2617 

2618 Now we can fit all four parameters (``a``, ``b``, ``loc`` 

2619 and ``scale``): 

2620 

2621 >>> a1, b1, loc1, scale1 = beta.fit(x) 

2622 

2623 We can also use some prior knowledge about the dataset: let's keep 

2624 ``loc`` and ``scale`` fixed: 

2625 

2626 >>> a1, b1, loc1, scale1 = beta.fit(x, floc=0, fscale=1) 

2627 >>> loc1, scale1 

2628 (0, 1) 

2629 

2630 We can also keep shape parameters fixed by using ``f``-keywords. To 

2631 keep the zero-th shape parameter ``a`` equal 1, use ``f0=1`` or, 

2632 equivalently, ``fa=1``: 

2633 

2634 >>> a1, b1, loc1, scale1 = beta.fit(x, fa=1, floc=0, fscale=1) 

2635 >>> a1 

2636 1 

2637 

2638 Not all distributions return estimates for the shape parameters. 

2639 ``norm`` for example just returns estimates for location and scale: 

2640 

2641 >>> from scipy.stats import norm 

2642 >>> x = norm.rvs(a, b, size=1000, random_state=123) 

2643 >>> loc1, scale1 = norm.fit(x) 

2644 >>> loc1, scale1 

2645 (0.92087172783841631, 2.0015750750324668) 

2646 """ 

2647 data = np.asarray(data) 

2648 method = kwds.get('method', "mle").lower() 

2649 

2650 # memory for method of moments 

2651 Narg = len(args) 

2652 if Narg > self.numargs: 

2653 raise TypeError("Too many input arguments.") 

2654 

2655 if not np.isfinite(data).all(): 

2656 raise ValueError("The data contains non-finite values.") 

2657 

2658 start = [None]*2 

2659 if (Narg < self.numargs) or not ('loc' in kwds and 

2660 'scale' in kwds): 

2661 # get distribution specific starting locations 

2662 start = self._fitstart(data) 

2663 args += start[Narg:-2] 

2664 loc = kwds.pop('loc', start[-2]) 

2665 scale = kwds.pop('scale', start[-1]) 

2666 args += (loc, scale) 

2667 x0, func, restore, args = self._reduce_func(args, kwds, data=data) 

2668 optimizer = kwds.pop('optimizer', optimize.fmin) 

2669 # convert string to function in scipy.optimize 

2670 optimizer = _fit_determine_optimizer(optimizer) 

2671 # by now kwds must be empty, since everybody took what they needed 

2672 if kwds: 

2673 raise TypeError("Unknown arguments: %s." % kwds) 

2674 

2675 # In some cases, method of moments can be done with fsolve/root 

2676 # instead of an optimizer, but sometimes no solution exists, 

2677 # especially when the user fixes parameters. Minimizing the sum 

2678 # of squares of the error generalizes to these cases. 

2679 vals = optimizer(func, x0, args=(ravel(data),), disp=0) 

2680 obj = func(vals, data) 

2681 

2682 if restore is not None: 

2683 vals = restore(args, vals) 

2684 vals = tuple(vals) 

2685 

2686 loc, scale, shapes = self._unpack_loc_scale(vals) 

2687 if not (np.all(self._argcheck(*shapes)) and scale > 0): 

2688 raise FitError("Optimization converged to parameters that are " 

2689 "outside the range allowed by the distribution.") 

2690 

2691 if method == 'mm': 

2692 if not np.isfinite(obj): 

2693 raise FitError("Optimization failed: either a data moment " 

2694 "or fitted distribution moment is " 

2695 "non-finite.") 

2696 

2697 return vals 

2698 

2699 def _fit_loc_scale_support(self, data, *args): 

2700 """Estimate loc and scale parameters from data accounting for support. 

2701 

2702 Parameters 

2703 ---------- 

2704 data : array_like 

2705 Data to fit. 

2706 arg1, arg2, arg3,... : array_like 

2707 The shape parameter(s) for the distribution (see docstring of the 

2708 instance object for more information). 

2709 

2710 Returns 

2711 ------- 

2712 Lhat : float 

2713 Estimated location parameter for the data. 

2714 Shat : float 

2715 Estimated scale parameter for the data. 

2716 

2717 """ 

2718 data = np.asarray(data) 

2719 

2720 # Estimate location and scale according to the method of moments. 

2721 loc_hat, scale_hat = self.fit_loc_scale(data, *args) 

2722 

2723 # Compute the support according to the shape parameters. 

2724 self._argcheck(*args) 

2725 _a, _b = self._get_support(*args) 

2726 a, b = _a, _b 

2727 support_width = b - a 

2728 

2729 # If the support is empty then return the moment-based estimates. 

2730 if support_width <= 0: 

2731 return loc_hat, scale_hat 

2732 

2733 # Compute the proposed support according to the loc and scale 

2734 # estimates. 

2735 a_hat = loc_hat + a * scale_hat 

2736 b_hat = loc_hat + b * scale_hat 

2737 

2738 # Use the moment-based estimates if they are compatible with the data. 

2739 data_a = np.min(data) 

2740 data_b = np.max(data) 

2741 if a_hat < data_a and data_b < b_hat: 

2742 return loc_hat, scale_hat 

2743 

2744 # Otherwise find other estimates that are compatible with the data. 

2745 data_width = data_b - data_a 

2746 rel_margin = 0.1 

2747 margin = data_width * rel_margin 

2748 

2749 # For a finite interval, both the location and scale 

2750 # should have interesting values. 

2751 if support_width < np.inf: 

2752 loc_hat = (data_a - a) - margin 

2753 scale_hat = (data_width + 2 * margin) / support_width 

2754 return loc_hat, scale_hat 

2755 

2756 # For a one-sided interval, use only an interesting location parameter. 

2757 if a > -np.inf: 

2758 return (data_a - a) - margin, 1 

2759 elif b < np.inf: 

2760 return (data_b - b) + margin, 1 

2761 else: 

2762 raise RuntimeError 

2763 

2764 def fit_loc_scale(self, data, *args): 

2765 """ 

2766 Estimate loc and scale parameters from data using 1st and 2nd moments. 

2767 

2768 Parameters 

2769 ---------- 

2770 data : array_like 

2771 Data to fit. 

2772 arg1, arg2, arg3,... : array_like 

2773 The shape parameter(s) for the distribution (see docstring of the 

2774 instance object for more information). 

2775 

2776 Returns 

2777 ------- 

2778 Lhat : float 

2779 Estimated location parameter for the data. 

2780 Shat : float 

2781 Estimated scale parameter for the data. 

2782 

2783 """ 

2784 mu, mu2 = self.stats(*args, **{'moments': 'mv'}) 

2785 tmp = asarray(data) 

2786 muhat = tmp.mean() 

2787 mu2hat = tmp.var() 

2788 Shat = sqrt(mu2hat / mu2) 

2789 Lhat = muhat - Shat*mu 

2790 if not np.isfinite(Lhat): 

2791 Lhat = 0 

2792 if not (np.isfinite(Shat) and (0 < Shat)): 

2793 Shat = 1 

2794 return Lhat, Shat 

2795 

2796 def _entropy(self, *args): 

2797 def integ(x): 

2798 val = self._pdf(x, *args) 

2799 return entr(val) 

2800 

2801 # upper limit is often inf, so suppress warnings when integrating 

2802 _a, _b = self._get_support(*args) 

2803 with np.errstate(over='ignore'): 

2804 h = integrate.quad(integ, _a, _b)[0] 

2805 

2806 if not np.isnan(h): 

2807 return h 

2808 else: 

2809 # try with different limits if integration problems 

2810 low, upp = self.ppf([1e-10, 1. - 1e-10], *args) 

2811 if np.isinf(_b): 

2812 upper = upp 

2813 else: 

2814 upper = _b 

2815 if np.isinf(_a): 

2816 lower = low 

2817 else: 

2818 lower = _a 

2819 return integrate.quad(integ, lower, upper)[0] 

2820 

2821 def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None, 

2822 conditional=False, **kwds): 

2823 """Calculate expected value of a function with respect to the 

2824 distribution by numerical integration. 

2825 

2826 The expected value of a function ``f(x)`` with respect to a 

2827 distribution ``dist`` is defined as:: 

2828 

2829 ub 

2830 E[f(x)] = Integral(f(x) * dist.pdf(x)), 

2831 lb 

2832 

2833 where ``ub`` and ``lb`` are arguments and ``x`` has the ``dist.pdf(x)`` 

2834 distribution. If the bounds ``lb`` and ``ub`` correspond to the 

2835 support of the distribution, e.g. ``[-inf, inf]`` in the default 

2836 case, then the integral is the unrestricted expectation of ``f(x)``. 

2837 Also, the function ``f(x)`` may be defined such that ``f(x)`` is ``0`` 

2838 outside a finite interval in which case the expectation is 

2839 calculated within the finite range ``[lb, ub]``. 

2840 

2841 Parameters 

2842 ---------- 

2843 func : callable, optional 

2844 Function for which integral is calculated. Takes only one argument. 

2845 The default is the identity mapping f(x) = x. 

2846 args : tuple, optional 

2847 Shape parameters of the distribution. 

2848 loc : float, optional 

2849 Location parameter (default=0). 

2850 scale : float, optional 

2851 Scale parameter (default=1). 

2852 lb, ub : scalar, optional 

2853 Lower and upper bound for integration. Default is set to the 

2854 support of the distribution. 

2855 conditional : bool, optional 

2856 If True, the integral is corrected by the conditional probability 

2857 of the integration interval. The return value is the expectation 

2858 of the function, conditional on being in the given interval. 

2859 Default is False. 

2860 

2861 Additional keyword arguments are passed to the integration routine. 

2862 

2863 Returns 

2864 ------- 

2865 expect : float 

2866 The calculated expected value. 

2867 

2868 Notes 

2869 ----- 

2870 The integration behavior of this function is inherited from 

2871 `scipy.integrate.quad`. Neither this function nor 

2872 `scipy.integrate.quad` can verify whether the integral exists or is 

2873 finite. For example ``cauchy(0).mean()`` returns ``np.nan`` and 

2874 ``cauchy(0).expect()`` returns ``0.0``. 

2875 

2876 Likewise, the accuracy of results is not verified by the function. 

2877 `scipy.integrate.quad` is typically reliable for integrals that are 

2878 numerically favorable, but it is not guaranteed to converge 

2879 to a correct value for all possible intervals and integrands. This 

2880 function is provided for convenience; for critical applications, 

2881 check results against other integration methods. 

2882 

2883 The function is not vectorized. 

2884 

2885 Examples 

2886 -------- 

2887 

2888 To understand the effect of the bounds of integration consider 

2889 

2890 >>> from scipy.stats import expon 

2891 >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0) 

2892 0.6321205588285578 

2893 

2894 This is close to 

2895 

2896 >>> expon(1).cdf(2.0) - expon(1).cdf(0.0) 

2897 0.6321205588285577 

2898 

2899 If ``conditional=True`` 

2900 

2901 >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0, conditional=True) 

2902 1.0000000000000002 

2903 

2904 The slight deviation from 1 is due to numerical integration. 

2905 

2906 The integrand can be treated as a complex-valued function 

2907 by passing ``complex_func=True`` to `scipy.integrate.quad` . 

2908 

2909 >>> import numpy as np 

2910 >>> from scipy.stats import vonmises 

2911 >>> res = vonmises(loc=2, kappa=1).expect(lambda x: np.exp(1j*x), 

2912 ... complex_func=True) 

2913 >>> res 

2914 (-0.18576377217422957+0.40590124735052263j) 

2915 

2916 >>> np.angle(res) # location of the (circular) distribution 

2917 2.0 

2918 

2919 """ 

2920 lockwds = {'loc': loc, 

2921 'scale': scale} 

2922 self._argcheck(*args) 

2923 _a, _b = self._get_support(*args) 

2924 if func is None: 

2925 def fun(x, *args): 

2926 return x * self.pdf(x, *args, **lockwds) 

2927 else: 

2928 def fun(x, *args): 

2929 return func(x) * self.pdf(x, *args, **lockwds) 

2930 if lb is None: 

2931 lb = loc + _a * scale 

2932 if ub is None: 

2933 ub = loc + _b * scale 

2934 

2935 cdf_bounds = self.cdf([lb, ub], *args, **lockwds) 

2936 invfac = cdf_bounds[1] - cdf_bounds[0] 

2937 

2938 kwds['args'] = args 

2939 

2940 # split interval to help integrator w/ infinite support; see gh-8928 

2941 alpha = 0.05 # split body from tails at probability mass `alpha` 

2942 inner_bounds = np.array([alpha, 1-alpha]) 

2943 cdf_inner_bounds = cdf_bounds[0] + invfac * inner_bounds 

2944 c, d = loc + self._ppf(cdf_inner_bounds, *args) * scale 

2945 

2946 # Do not silence warnings from integration. 

2947 lbc = integrate.quad(fun, lb, c, **kwds)[0] 

2948 cd = integrate.quad(fun, c, d, **kwds)[0] 

2949 dub = integrate.quad(fun, d, ub, **kwds)[0] 

2950 vals = (lbc + cd + dub) 

2951 

2952 if conditional: 

2953 vals /= invfac 

2954 return np.array(vals)[()] # make it a numpy scalar like other methods 

2955 

2956 def _param_info(self): 

2957 shape_info = self._shape_info() 

2958 loc_info = _ShapeInfo("loc", False, (-np.inf, np.inf), (False, False)) 

2959 scale_info = _ShapeInfo("scale", False, (0, np.inf), (False, False)) 

2960 param_info = shape_info + [loc_info, scale_info] 

2961 return param_info 

2962 

2963# Helpers for the discrete distributions 

2964def _drv2_moment(self, n, *args): 

2965 """Non-central moment of discrete distribution.""" 

2966 def fun(x): 

2967 return np.power(x, n) * self._pmf(x, *args) 

2968 

2969 _a, _b = self._get_support(*args) 

2970 return _expect(fun, _a, _b, self.ppf(0.5, *args), self.inc) 

2971 

2972 

2973def _drv2_ppfsingle(self, q, *args): # Use basic bisection algorithm 

2974 _a, _b = self._get_support(*args) 

2975 b = _b 

2976 a = _a 

2977 if isinf(b): # Be sure ending point is > q 

2978 b = int(max(100*q, 10)) 

2979 while 1: 

2980 if b >= _b: 

2981 qb = 1.0 

2982 break 

2983 qb = self._cdf(b, *args) 

2984 if (qb < q): 

2985 b += 10 

2986 else: 

2987 break 

2988 else: 

2989 qb = 1.0 

2990 if isinf(a): # be sure starting point < q 

2991 a = int(min(-100*q, -10)) 

2992 while 1: 

2993 if a <= _a: 

2994 qb = 0.0 

2995 break 

2996 qa = self._cdf(a, *args) 

2997 if (qa > q): 

2998 a -= 10 

2999 else: 

3000 break 

3001 else: 

3002 qa = self._cdf(a, *args) 

3003 

3004 while 1: 

3005 if (qa == q): 

3006 return a 

3007 if (qb == q): 

3008 return b 

3009 if b <= a+1: 

3010 if qa > q: 

3011 return a 

3012 else: 

3013 return b 

3014 c = int((a+b)/2.0) 

3015 qc = self._cdf(c, *args) 

3016 if (qc < q): 

3017 if a != c: 

3018 a = c 

3019 else: 

3020 raise RuntimeError('updating stopped, endless loop') 

3021 qa = qc 

3022 elif (qc > q): 

3023 if b != c: 

3024 b = c 

3025 else: 

3026 raise RuntimeError('updating stopped, endless loop') 

3027 qb = qc 

3028 else: 

3029 return c 

3030 

3031 

3032# Must over-ride one of _pmf or _cdf or pass in 

3033# x_k, p(x_k) lists in initialization 

3034 

3035 

3036class rv_discrete(rv_generic): 

3037 """A generic discrete random variable class meant for subclassing. 

3038 

3039 `rv_discrete` is a base class to construct specific distribution classes 

3040 and instances for discrete random variables. It can also be used 

3041 to construct an arbitrary distribution defined by a list of support 

3042 points and corresponding probabilities. 

3043 

3044 Parameters 

3045 ---------- 

3046 a : float, optional 

3047 Lower bound of the support of the distribution, default: 0 

3048 b : float, optional 

3049 Upper bound of the support of the distribution, default: plus infinity 

3050 moment_tol : float, optional 

3051 The tolerance for the generic calculation of moments. 

3052 values : tuple of two array_like, optional 

3053 ``(xk, pk)`` where ``xk`` are integers and ``pk`` are the non-zero 

3054 probabilities between 0 and 1 with ``sum(pk) = 1``. ``xk`` 

3055 and ``pk`` must have the same shape. 

3056 inc : integer, optional 

3057 Increment for the support of the distribution. 

3058 Default is 1. (other values have not been tested) 

3059 badvalue : float, optional 

3060 The value in a result arrays that indicates a value that for which 

3061 some argument restriction is violated, default is np.nan. 

3062 name : str, optional 

3063 The name of the instance. This string is used to construct the default 

3064 example for distributions. 

3065 longname : str, optional 

3066 This string is used as part of the first line of the docstring returned 

3067 when a subclass has no docstring of its own. Note: `longname` exists 

3068 for backwards compatibility, do not use for new subclasses. 

3069 shapes : str, optional 

3070 The shape of the distribution. For example "m, n" for a distribution 

3071 that takes two integers as the two shape arguments for all its methods 

3072 If not provided, shape parameters will be inferred from 

3073 the signatures of the private methods, ``_pmf`` and ``_cdf`` of 

3074 the instance. 

3075 extradoc : str, optional, deprecated 

3076 This string is used as the last part of the docstring returned when a 

3077 subclass has no docstring of its own. Note: `extradoc` exists for 

3078 backwards compatibility and will be removed in SciPy 1.11.0, do not 

3079 use for new subclasses. 

3080 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional 

3081 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

3082 singleton is used. 

3083 If `seed` is an int, a new ``RandomState`` instance is used, 

3084 seeded with `seed`. 

3085 If `seed` is already a ``Generator`` or ``RandomState`` instance then 

3086 that instance is used. 

3087 

3088 Methods 

3089 ------- 

3090 rvs 

3091 pmf 

3092 logpmf 

3093 cdf 

3094 logcdf 

3095 sf 

3096 logsf 

3097 ppf 

3098 isf 

3099 moment 

3100 stats 

3101 entropy 

3102 expect 

3103 median 

3104 mean 

3105 std 

3106 var 

3107 interval 

3108 __call__ 

3109 support 

3110 

3111 Notes 

3112 ----- 

3113 This class is similar to `rv_continuous`. Whether a shape parameter is 

3114 valid is decided by an ``_argcheck`` method (which defaults to checking 

3115 that its arguments are strictly positive.) 

3116 The main differences are: 

3117 

3118 - the support of the distribution is a set of integers 

3119 - instead of the probability density function, ``pdf`` (and the 

3120 corresponding private ``_pdf``), this class defines the 

3121 *probability mass function*, `pmf` (and the corresponding 

3122 private ``_pmf``.) 

3123 - scale parameter is not defined. 

3124 

3125 To create a new discrete distribution, we would do the following: 

3126 

3127 >>> from scipy.stats import rv_discrete 

3128 >>> class poisson_gen(rv_discrete): 

3129 ... "Poisson distribution" 

3130 ... def _pmf(self, k, mu): 

3131 ... return exp(-mu) * mu**k / factorial(k) 

3132 

3133 and create an instance:: 

3134 

3135 >>> poisson = poisson_gen(name="poisson") 

3136 

3137 Note that above we defined the Poisson distribution in the standard form. 

3138 Shifting the distribution can be done by providing the ``loc`` parameter 

3139 to the methods of the instance. For example, ``poisson.pmf(x, mu, loc)`` 

3140 delegates the work to ``poisson._pmf(x-loc, mu)``. 

3141 

3142 **Discrete distributions from a list of probabilities** 

3143 

3144 Alternatively, you can construct an arbitrary discrete rv defined 

3145 on a finite set of values ``xk`` with ``Prob{X=xk} = pk`` by using the 

3146 ``values`` keyword argument to the `rv_discrete` constructor. 

3147 

3148 Examples 

3149 -------- 

3150 Custom made discrete distribution: 

3151 

3152 >>> import numpy as np 

3153 >>> from scipy import stats 

3154 >>> xk = np.arange(7) 

3155 >>> pk = (0.1, 0.2, 0.3, 0.1, 0.1, 0.0, 0.2) 

3156 >>> custm = stats.rv_discrete(name='custm', values=(xk, pk)) 

3157 >>> 

3158 >>> import matplotlib.pyplot as plt 

3159 >>> fig, ax = plt.subplots(1, 1) 

3160 >>> ax.plot(xk, custm.pmf(xk), 'ro', ms=12, mec='r') 

3161 >>> ax.vlines(xk, 0, custm.pmf(xk), colors='r', lw=4) 

3162 >>> plt.show() 

3163 

3164 Random number generation: 

3165 

3166 >>> R = custm.rvs(size=100) 

3167 

3168 """ 

3169 def __new__(cls, a=0, b=inf, name=None, badvalue=None, 

3170 moment_tol=1e-8, values=None, inc=1, longname=None, 

3171 shapes=None, extradoc=None, seed=None): 

3172 

3173 if values is not None: 

3174 # dispatch to a subclass 

3175 return super(rv_discrete, cls).__new__(rv_sample) 

3176 else: 

3177 # business as usual 

3178 return super(rv_discrete, cls).__new__(cls) 

3179 

3180 def __init__(self, a=0, b=inf, name=None, badvalue=None, 

3181 moment_tol=1e-8, values=None, inc=1, longname=None, 

3182 shapes=None, extradoc=None, seed=None): 

3183 

3184 super().__init__(seed) 

3185 

3186 if extradoc is not None: 

3187 warnings.warn("extradoc is deprecated and will be removed in " 

3188 "SciPy 1.11.0", DeprecationWarning) 

3189 

3190 # cf generic freeze 

3191 self._ctor_param = dict( 

3192 a=a, b=b, name=name, badvalue=badvalue, 

3193 moment_tol=moment_tol, values=values, inc=inc, 

3194 longname=longname, shapes=shapes, extradoc=extradoc, seed=seed) 

3195 

3196 if badvalue is None: 

3197 badvalue = nan 

3198 self.badvalue = badvalue 

3199 self.a = a 

3200 self.b = b 

3201 self.moment_tol = moment_tol 

3202 self.inc = inc 

3203 self.shapes = shapes 

3204 

3205 if values is not None: 

3206 raise ValueError("rv_discrete.__init__(..., values != None, ...)") 

3207 

3208 self._construct_argparser(meths_to_inspect=[self._pmf, self._cdf], 

3209 locscale_in='loc=0', 

3210 # scale=1 for discrete RVs 

3211 locscale_out='loc, 1') 

3212 self._attach_methods() 

3213 self._construct_docstrings(name, longname, extradoc) 

3214 

3215 def __getstate__(self): 

3216 dct = self.__dict__.copy() 

3217 # these methods will be remade in __setstate__ 

3218 attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs", 

3219 "_cdfvec", "_ppfvec", "generic_moment"] 

3220 [dct.pop(attr, None) for attr in attrs] 

3221 return dct 

3222 

3223 def _attach_methods(self): 

3224 """Attaches dynamically created methods to the rv_discrete instance.""" 

3225 self._cdfvec = vectorize(self._cdf_single, otypes='d') 

3226 self.vecentropy = vectorize(self._entropy) 

3227 

3228 # _attach_methods is responsible for calling _attach_argparser_methods 

3229 self._attach_argparser_methods() 

3230 

3231 # nin correction needs to be after we know numargs 

3232 # correct nin for generic moment vectorization 

3233 _vec_generic_moment = vectorize(_drv2_moment, otypes='d') 

3234 _vec_generic_moment.nin = self.numargs + 2 

3235 self.generic_moment = types.MethodType(_vec_generic_moment, self) 

3236 

3237 # correct nin for ppf vectorization 

3238 _vppf = vectorize(_drv2_ppfsingle, otypes='d') 

3239 _vppf.nin = self.numargs + 2 

3240 self._ppfvec = types.MethodType(_vppf, self) 

3241 

3242 # now that self.numargs is defined, we can adjust nin 

3243 self._cdfvec.nin = self.numargs + 1 

3244 

3245 def _construct_docstrings(self, name, longname, extradoc): 

3246 if name is None: 

3247 name = 'Distribution' 

3248 self.name = name 

3249 self.extradoc = extradoc 

3250 

3251 # generate docstring for subclass instances 

3252 if longname is None: 

3253 if name[0] in ['aeiouAEIOU']: 

3254 hstr = "An " 

3255 else: 

3256 hstr = "A " 

3257 longname = hstr + name 

3258 

3259 if sys.flags.optimize < 2: 

3260 # Skip adding docstrings if interpreter is run with -OO 

3261 if self.__doc__ is None: 

3262 self._construct_default_doc(longname=longname, 

3263 extradoc=extradoc, 

3264 docdict=docdict_discrete, 

3265 discrete='discrete') 

3266 else: 

3267 dct = dict(distdiscrete) 

3268 self._construct_doc(docdict_discrete, dct.get(self.name)) 

3269 

3270 # discrete RV do not have the scale parameter, remove it 

3271 self.__doc__ = self.__doc__.replace( 

3272 '\n scale : array_like, ' 

3273 'optional\n scale parameter (default=1)', '') 

3274 

3275 def _updated_ctor_param(self): 

3276 """Return the current version of _ctor_param, possibly updated by user. 

3277 

3278 Used by freezing. 

3279 Keep this in sync with the signature of __init__. 

3280 """ 

3281 dct = self._ctor_param.copy() 

3282 dct['a'] = self.a 

3283 dct['b'] = self.b 

3284 dct['badvalue'] = self.badvalue 

3285 dct['moment_tol'] = self.moment_tol 

3286 dct['inc'] = self.inc 

3287 dct['name'] = self.name 

3288 dct['shapes'] = self.shapes 

3289 dct['extradoc'] = self.extradoc 

3290 return dct 

3291 

3292 def _nonzero(self, k, *args): 

3293 return floor(k) == k 

3294 

3295 def _pmf(self, k, *args): 

3296 return self._cdf(k, *args) - self._cdf(k-1, *args) 

3297 

3298 def _logpmf(self, k, *args): 

3299 return log(self._pmf(k, *args)) 

3300 

3301 def _logpxf(self, k, *args): 

3302 # continuous distributions have PDF, discrete have PMF, but sometimes 

3303 # the distinction doesn't matter. This lets us use `_logpxf` for both 

3304 # discrete and continuous distributions. 

3305 return self._logpmf(k, *args) 

3306 

3307 def _unpack_loc_scale(self, theta): 

3308 try: 

3309 loc = theta[-1] 

3310 scale = 1 

3311 args = tuple(theta[:-1]) 

3312 except IndexError as e: 

3313 raise ValueError("Not enough input arguments.") from e 

3314 return loc, scale, args 

3315 

3316 def _cdf_single(self, k, *args): 

3317 _a, _b = self._get_support(*args) 

3318 m = arange(int(_a), k+1) 

3319 return np.sum(self._pmf(m, *args), axis=0) 

3320 

3321 def _cdf(self, x, *args): 

3322 k = floor(x) 

3323 return self._cdfvec(k, *args) 

3324 

3325 # generic _logcdf, _sf, _logsf, _ppf, _isf, _rvs defined in rv_generic 

3326 

3327 def rvs(self, *args, **kwargs): 

3328 """Random variates of given type. 

3329 

3330 Parameters 

3331 ---------- 

3332 arg1, arg2, arg3,... : array_like 

3333 The shape parameter(s) for the distribution (see docstring of the 

3334 instance object for more information). 

3335 loc : array_like, optional 

3336 Location parameter (default=0). 

3337 size : int or tuple of ints, optional 

3338 Defining number of random variates (Default is 1). Note that `size` 

3339 has to be given as keyword, not as positional argument. 

3340 random_state : {None, int, `numpy.random.Generator`, 

3341 `numpy.random.RandomState`}, optional 

3342 

3343 If `random_state` is None (or `np.random`), the 

3344 `numpy.random.RandomState` singleton is used. 

3345 If `random_state` is an int, a new ``RandomState`` instance is 

3346 used, seeded with `random_state`. 

3347 If `random_state` is already a ``Generator`` or ``RandomState`` 

3348 instance, that instance is used. 

3349 

3350 Returns 

3351 ------- 

3352 rvs : ndarray or scalar 

3353 Random variates of given `size`. 

3354 

3355 """ 

3356 kwargs['discrete'] = True 

3357 return super().rvs(*args, **kwargs) 

3358 

3359 def pmf(self, k, *args, **kwds): 

3360 """Probability mass function at k of the given RV. 

3361 

3362 Parameters 

3363 ---------- 

3364 k : array_like 

3365 Quantiles. 

3366 arg1, arg2, arg3,... : array_like 

3367 The shape parameter(s) for the distribution (see docstring of the 

3368 instance object for more information) 

3369 loc : array_like, optional 

3370 Location parameter (default=0). 

3371 

3372 Returns 

3373 ------- 

3374 pmf : array_like 

3375 Probability mass function evaluated at k 

3376 

3377 """ 

3378 args, loc, _ = self._parse_args(*args, **kwds) 

3379 k, loc = map(asarray, (k, loc)) 

3380 args = tuple(map(asarray, args)) 

3381 _a, _b = self._get_support(*args) 

3382 k = asarray((k-loc)) 

3383 cond0 = self._argcheck(*args) 

3384 cond1 = (k >= _a) & (k <= _b) 

3385 if not isinstance(self, rv_sample): 

3386 cond1 = cond1 & self._nonzero(k, *args) 

3387 cond = cond0 & cond1 

3388 output = zeros(shape(cond), 'd') 

3389 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3390 if np.any(cond): 

3391 goodargs = argsreduce(cond, *((k,)+args)) 

3392 place(output, cond, np.clip(self._pmf(*goodargs), 0, 1)) 

3393 if output.ndim == 0: 

3394 return output[()] 

3395 return output 

3396 

3397 def logpmf(self, k, *args, **kwds): 

3398 """Log of the probability mass function at k of the given RV. 

3399 

3400 Parameters 

3401 ---------- 

3402 k : array_like 

3403 Quantiles. 

3404 arg1, arg2, arg3,... : array_like 

3405 The shape parameter(s) for the distribution (see docstring of the 

3406 instance object for more information). 

3407 loc : array_like, optional 

3408 Location parameter. Default is 0. 

3409 

3410 Returns 

3411 ------- 

3412 logpmf : array_like 

3413 Log of the probability mass function evaluated at k. 

3414 

3415 """ 

3416 args, loc, _ = self._parse_args(*args, **kwds) 

3417 k, loc = map(asarray, (k, loc)) 

3418 args = tuple(map(asarray, args)) 

3419 _a, _b = self._get_support(*args) 

3420 k = asarray((k-loc)) 

3421 cond0 = self._argcheck(*args) 

3422 cond1 = (k >= _a) & (k <= _b) 

3423 if not isinstance(self, rv_sample): 

3424 cond1 = cond1 & self._nonzero(k, *args) 

3425 cond = cond0 & cond1 

3426 output = empty(shape(cond), 'd') 

3427 output.fill(NINF) 

3428 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3429 if np.any(cond): 

3430 goodargs = argsreduce(cond, *((k,)+args)) 

3431 place(output, cond, self._logpmf(*goodargs)) 

3432 if output.ndim == 0: 

3433 return output[()] 

3434 return output 

3435 

3436 def cdf(self, k, *args, **kwds): 

3437 """Cumulative distribution function of the given RV. 

3438 

3439 Parameters 

3440 ---------- 

3441 k : array_like, int 

3442 Quantiles. 

3443 arg1, arg2, arg3,... : array_like 

3444 The shape parameter(s) for the distribution (see docstring of the 

3445 instance object for more information). 

3446 loc : array_like, optional 

3447 Location parameter (default=0). 

3448 

3449 Returns 

3450 ------- 

3451 cdf : ndarray 

3452 Cumulative distribution function evaluated at `k`. 

3453 

3454 """ 

3455 args, loc, _ = self._parse_args(*args, **kwds) 

3456 k, loc = map(asarray, (k, loc)) 

3457 args = tuple(map(asarray, args)) 

3458 _a, _b = self._get_support(*args) 

3459 k = asarray((k-loc)) 

3460 cond0 = self._argcheck(*args) 

3461 cond1 = (k >= _a) & (k < _b) 

3462 cond2 = (k >= _b) 

3463 cond3 = np.isneginf(k) 

3464 cond = cond0 & cond1 & np.isfinite(k) 

3465 

3466 output = zeros(shape(cond), 'd') 

3467 place(output, cond2*(cond0 == cond0), 1.0) 

3468 place(output, cond3*(cond0 == cond0), 0.0) 

3469 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3470 

3471 if np.any(cond): 

3472 goodargs = argsreduce(cond, *((k,)+args)) 

3473 place(output, cond, np.clip(self._cdf(*goodargs), 0, 1)) 

3474 if output.ndim == 0: 

3475 return output[()] 

3476 return output 

3477 

3478 def logcdf(self, k, *args, **kwds): 

3479 """Log of the cumulative distribution function at k of the given RV. 

3480 

3481 Parameters 

3482 ---------- 

3483 k : array_like, int 

3484 Quantiles. 

3485 arg1, arg2, arg3,... : array_like 

3486 The shape parameter(s) for the distribution (see docstring of the 

3487 instance object for more information). 

3488 loc : array_like, optional 

3489 Location parameter (default=0). 

3490 

3491 Returns 

3492 ------- 

3493 logcdf : array_like 

3494 Log of the cumulative distribution function evaluated at k. 

3495 

3496 """ 

3497 args, loc, _ = self._parse_args(*args, **kwds) 

3498 k, loc = map(asarray, (k, loc)) 

3499 args = tuple(map(asarray, args)) 

3500 _a, _b = self._get_support(*args) 

3501 k = asarray((k-loc)) 

3502 cond0 = self._argcheck(*args) 

3503 cond1 = (k >= _a) & (k < _b) 

3504 cond2 = (k >= _b) 

3505 cond = cond0 & cond1 

3506 output = empty(shape(cond), 'd') 

3507 output.fill(NINF) 

3508 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3509 place(output, cond2*(cond0 == cond0), 0.0) 

3510 

3511 if np.any(cond): 

3512 goodargs = argsreduce(cond, *((k,)+args)) 

3513 place(output, cond, self._logcdf(*goodargs)) 

3514 if output.ndim == 0: 

3515 return output[()] 

3516 return output 

3517 

3518 def sf(self, k, *args, **kwds): 

3519 """Survival function (1 - `cdf`) at k of the given RV. 

3520 

3521 Parameters 

3522 ---------- 

3523 k : array_like 

3524 Quantiles. 

3525 arg1, arg2, arg3,... : array_like 

3526 The shape parameter(s) for the distribution (see docstring of the 

3527 instance object for more information). 

3528 loc : array_like, optional 

3529 Location parameter (default=0). 

3530 

3531 Returns 

3532 ------- 

3533 sf : array_like 

3534 Survival function evaluated at k. 

3535 

3536 """ 

3537 args, loc, _ = self._parse_args(*args, **kwds) 

3538 k, loc = map(asarray, (k, loc)) 

3539 args = tuple(map(asarray, args)) 

3540 _a, _b = self._get_support(*args) 

3541 k = asarray(k-loc) 

3542 cond0 = self._argcheck(*args) 

3543 cond1 = (k >= _a) & (k < _b) 

3544 cond2 = ((k < _a) | np.isneginf(k)) & cond0 

3545 cond = cond0 & cond1 & np.isfinite(k) 

3546 output = zeros(shape(cond), 'd') 

3547 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3548 place(output, cond2, 1.0) 

3549 if np.any(cond): 

3550 goodargs = argsreduce(cond, *((k,)+args)) 

3551 place(output, cond, np.clip(self._sf(*goodargs), 0, 1)) 

3552 if output.ndim == 0: 

3553 return output[()] 

3554 return output 

3555 

3556 def logsf(self, k, *args, **kwds): 

3557 """Log of the survival function of the given RV. 

3558 

3559 Returns the log of the "survival function," defined as 1 - `cdf`, 

3560 evaluated at `k`. 

3561 

3562 Parameters 

3563 ---------- 

3564 k : array_like 

3565 Quantiles. 

3566 arg1, arg2, arg3,... : array_like 

3567 The shape parameter(s) for the distribution (see docstring of the 

3568 instance object for more information). 

3569 loc : array_like, optional 

3570 Location parameter (default=0). 

3571 

3572 Returns 

3573 ------- 

3574 logsf : ndarray 

3575 Log of the survival function evaluated at `k`. 

3576 

3577 """ 

3578 args, loc, _ = self._parse_args(*args, **kwds) 

3579 k, loc = map(asarray, (k, loc)) 

3580 args = tuple(map(asarray, args)) 

3581 _a, _b = self._get_support(*args) 

3582 k = asarray(k-loc) 

3583 cond0 = self._argcheck(*args) 

3584 cond1 = (k >= _a) & (k < _b) 

3585 cond2 = (k < _a) & cond0 

3586 cond = cond0 & cond1 

3587 output = empty(shape(cond), 'd') 

3588 output.fill(NINF) 

3589 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3590 place(output, cond2, 0.0) 

3591 if np.any(cond): 

3592 goodargs = argsreduce(cond, *((k,)+args)) 

3593 place(output, cond, self._logsf(*goodargs)) 

3594 if output.ndim == 0: 

3595 return output[()] 

3596 return output 

3597 

3598 def ppf(self, q, *args, **kwds): 

3599 """Percent point function (inverse of `cdf`) at q of the given RV. 

3600 

3601 Parameters 

3602 ---------- 

3603 q : array_like 

3604 Lower tail probability. 

3605 arg1, arg2, arg3,... : array_like 

3606 The shape parameter(s) for the distribution (see docstring of the 

3607 instance object for more information). 

3608 loc : array_like, optional 

3609 Location parameter (default=0). 

3610 

3611 Returns 

3612 ------- 

3613 k : array_like 

3614 Quantile corresponding to the lower tail probability, q. 

3615 

3616 """ 

3617 args, loc, _ = self._parse_args(*args, **kwds) 

3618 q, loc = map(asarray, (q, loc)) 

3619 args = tuple(map(asarray, args)) 

3620 _a, _b = self._get_support(*args) 

3621 cond0 = self._argcheck(*args) & (loc == loc) 

3622 cond1 = (q > 0) & (q < 1) 

3623 cond2 = (q == 1) & cond0 

3624 cond = cond0 & cond1 

3625 output = np.full(shape(cond), fill_value=self.badvalue, dtype='d') 

3626 # output type 'd' to handle nin and inf 

3627 place(output, (q == 0)*(cond == cond), _a-1 + loc) 

3628 place(output, cond2, _b + loc) 

3629 if np.any(cond): 

3630 goodargs = argsreduce(cond, *((q,)+args+(loc,))) 

3631 loc, goodargs = goodargs[-1], goodargs[:-1] 

3632 place(output, cond, self._ppf(*goodargs) + loc) 

3633 

3634 if output.ndim == 0: 

3635 return output[()] 

3636 return output 

3637 

3638 def isf(self, q, *args, **kwds): 

3639 """Inverse survival function (inverse of `sf`) at q of the given RV. 

3640 

3641 Parameters 

3642 ---------- 

3643 q : array_like 

3644 Upper tail probability. 

3645 arg1, arg2, arg3,... : array_like 

3646 The shape parameter(s) for the distribution (see docstring of the 

3647 instance object for more information). 

3648 loc : array_like, optional 

3649 Location parameter (default=0). 

3650 

3651 Returns 

3652 ------- 

3653 k : ndarray or scalar 

3654 Quantile corresponding to the upper tail probability, q. 

3655 

3656 """ 

3657 args, loc, _ = self._parse_args(*args, **kwds) 

3658 q, loc = map(asarray, (q, loc)) 

3659 args = tuple(map(asarray, args)) 

3660 _a, _b = self._get_support(*args) 

3661 cond0 = self._argcheck(*args) & (loc == loc) 

3662 cond1 = (q > 0) & (q < 1) 

3663 cond2 = (q == 1) & cond0 

3664 cond3 = (q == 0) & cond0 

3665 cond = cond0 & cond1 

3666 

3667 # same problem as with ppf; copied from ppf and changed 

3668 output = np.full(shape(cond), fill_value=self.badvalue, dtype='d') 

3669 # output type 'd' to handle nin and inf 

3670 lower_bound = _a - 1 + loc 

3671 upper_bound = _b + loc 

3672 place(output, cond2*(cond == cond), lower_bound) 

3673 place(output, cond3*(cond == cond), upper_bound) 

3674 

3675 # call place only if at least 1 valid argument 

3676 if np.any(cond): 

3677 goodargs = argsreduce(cond, *((q,)+args+(loc,))) 

3678 loc, goodargs = goodargs[-1], goodargs[:-1] 

3679 # PB same as ticket 766 

3680 place(output, cond, self._isf(*goodargs) + loc) 

3681 

3682 if output.ndim == 0: 

3683 return output[()] 

3684 return output 

3685 

3686 def _entropy(self, *args): 

3687 if hasattr(self, 'pk'): 

3688 return stats.entropy(self.pk) 

3689 else: 

3690 _a, _b = self._get_support(*args) 

3691 return _expect(lambda x: entr(self.pmf(x, *args)), 

3692 _a, _b, self.ppf(0.5, *args), self.inc) 

3693 

3694 def expect(self, func=None, args=(), loc=0, lb=None, ub=None, 

3695 conditional=False, maxcount=1000, tolerance=1e-10, chunksize=32): 

3696 """ 

3697 Calculate expected value of a function with respect to the distribution 

3698 for discrete distribution by numerical summation. 

3699 

3700 Parameters 

3701 ---------- 

3702 func : callable, optional 

3703 Function for which the expectation value is calculated. 

3704 Takes only one argument. 

3705 The default is the identity mapping f(k) = k. 

3706 args : tuple, optional 

3707 Shape parameters of the distribution. 

3708 loc : float, optional 

3709 Location parameter. 

3710 Default is 0. 

3711 lb, ub : int, optional 

3712 Lower and upper bound for the summation, default is set to the 

3713 support of the distribution, inclusive (``lb <= k <= ub``). 

3714 conditional : bool, optional 

3715 If true then the expectation is corrected by the conditional 

3716 probability of the summation interval. The return value is the 

3717 expectation of the function, `func`, conditional on being in 

3718 the given interval (k such that ``lb <= k <= ub``). 

3719 Default is False. 

3720 maxcount : int, optional 

3721 Maximal number of terms to evaluate (to avoid an endless loop for 

3722 an infinite sum). Default is 1000. 

3723 tolerance : float, optional 

3724 Absolute tolerance for the summation. Default is 1e-10. 

3725 chunksize : int, optional 

3726 Iterate over the support of a distributions in chunks of this size. 

3727 Default is 32. 

3728 

3729 Returns 

3730 ------- 

3731 expect : float 

3732 Expected value. 

3733 

3734 Notes 

3735 ----- 

3736 For heavy-tailed distributions, the expected value may or 

3737 may not exist, 

3738 depending on the function, `func`. If it does exist, but the 

3739 sum converges 

3740 slowly, the accuracy of the result may be rather low. For instance, for 

3741 ``zipf(4)``, accuracy for mean, variance in example is only 1e-5. 

3742 increasing `maxcount` and/or `chunksize` may improve the result, 

3743 but may also make zipf very slow. 

3744 

3745 The function is not vectorized. 

3746 

3747 """ 

3748 if func is None: 

3749 def fun(x): 

3750 # loc and args from outer scope 

3751 return (x+loc)*self._pmf(x, *args) 

3752 else: 

3753 def fun(x): 

3754 # loc and args from outer scope 

3755 return func(x+loc)*self._pmf(x, *args) 

3756 # used pmf because _pmf does not check support in randint and there 

3757 # might be problems(?) with correct self.a, self.b at this stage maybe 

3758 # not anymore, seems to work now with _pmf 

3759 

3760 _a, _b = self._get_support(*args) 

3761 if lb is None: 

3762 lb = _a 

3763 else: 

3764 lb = lb - loc # convert bound for standardized distribution 

3765 if ub is None: 

3766 ub = _b 

3767 else: 

3768 ub = ub - loc # convert bound for standardized distribution 

3769 if conditional: 

3770 invfac = self.sf(lb-1, *args) - self.sf(ub, *args) 

3771 else: 

3772 invfac = 1.0 

3773 

3774 if isinstance(self, rv_sample): 

3775 res = self._expect(fun, lb, ub) 

3776 return res / invfac 

3777 

3778 # iterate over the support, starting from the median 

3779 x0 = self.ppf(0.5, *args) 

3780 res = _expect(fun, lb, ub, x0, self.inc, maxcount, tolerance, chunksize) 

3781 return res / invfac 

3782 

3783 def _param_info(self): 

3784 shape_info = self._shape_info() 

3785 loc_info = _ShapeInfo("loc", True, (-np.inf, np.inf), (False, False)) 

3786 param_info = shape_info + [loc_info] 

3787 return param_info 

3788 

3789 

3790def _expect(fun, lb, ub, x0, inc, maxcount=1000, tolerance=1e-10, 

3791 chunksize=32): 

3792 """Helper for computing the expectation value of `fun`.""" 

3793 # short-circuit if the support size is small enough 

3794 if (ub - lb) <= chunksize: 

3795 supp = np.arange(lb, ub+1, inc) 

3796 vals = fun(supp) 

3797 return np.sum(vals) 

3798 

3799 # otherwise, iterate starting from x0 

3800 if x0 < lb: 

3801 x0 = lb 

3802 if x0 > ub: 

3803 x0 = ub 

3804 

3805 count, tot = 0, 0. 

3806 # iterate over [x0, ub] inclusive 

3807 for x in _iter_chunked(x0, ub+1, chunksize=chunksize, inc=inc): 

3808 count += x.size 

3809 delta = np.sum(fun(x)) 

3810 tot += delta 

3811 if abs(delta) < tolerance * x.size: 

3812 break 

3813 if count > maxcount: 

3814 warnings.warn('expect(): sum did not converge', RuntimeWarning) 

3815 return tot 

3816 

3817 # iterate over [lb, x0) 

3818 for x in _iter_chunked(x0-1, lb-1, chunksize=chunksize, inc=-inc): 

3819 count += x.size 

3820 delta = np.sum(fun(x)) 

3821 tot += delta 

3822 if abs(delta) < tolerance * x.size: 

3823 break 

3824 if count > maxcount: 

3825 warnings.warn('expect(): sum did not converge', RuntimeWarning) 

3826 break 

3827 

3828 return tot 

3829 

3830 

3831def _iter_chunked(x0, x1, chunksize=4, inc=1): 

3832 """Iterate from x0 to x1 in chunks of chunksize and steps inc. 

3833 

3834 x0 must be finite, x1 need not be. In the latter case, the iterator is 

3835 infinite. 

3836 Handles both x0 < x1 and x0 > x1. In the latter case, iterates downwards 

3837 (make sure to set inc < 0.) 

3838 

3839 >>> [x for x in _iter_chunked(2, 5, inc=2)] 

3840 [array([2, 4])] 

3841 >>> [x for x in _iter_chunked(2, 11, inc=2)] 

3842 [array([2, 4, 6, 8]), array([10])] 

3843 >>> [x for x in _iter_chunked(2, -5, inc=-2)] 

3844 [array([ 2, 0, -2, -4])] 

3845 >>> [x for x in _iter_chunked(2, -9, inc=-2)] 

3846 [array([ 2, 0, -2, -4]), array([-6, -8])] 

3847 

3848 """ 

3849 if inc == 0: 

3850 raise ValueError('Cannot increment by zero.') 

3851 if chunksize <= 0: 

3852 raise ValueError('Chunk size must be positive; got %s.' % chunksize) 

3853 

3854 s = 1 if inc > 0 else -1 

3855 stepsize = abs(chunksize * inc) 

3856 

3857 x = x0 

3858 while (x - x1) * inc < 0: 

3859 delta = min(stepsize, abs(x - x1)) 

3860 step = delta * s 

3861 supp = np.arange(x, x + step, inc) 

3862 x += step 

3863 yield supp 

3864 

3865 

3866class rv_sample(rv_discrete): 

3867 """A 'sample' discrete distribution defined by the support and values. 

3868 

3869 The ctor ignores most of the arguments, only needs the `values` argument. 

3870 """ 

3871 def __init__(self, a=0, b=inf, name=None, badvalue=None, 

3872 moment_tol=1e-8, values=None, inc=1, longname=None, 

3873 shapes=None, extradoc=None, seed=None): 

3874 

3875 super(rv_discrete, self).__init__(seed) 

3876 

3877 if extradoc is not None: 

3878 warnings.warn("extradoc is deprecated and will be removed in " 

3879 "SciPy 1.11.0", DeprecationWarning) 

3880 

3881 if values is None: 

3882 raise ValueError("rv_sample.__init__(..., values=None,...)") 

3883 

3884 # cf generic freeze 

3885 self._ctor_param = dict( 

3886 a=a, b=b, name=name, badvalue=badvalue, 

3887 moment_tol=moment_tol, values=values, inc=inc, 

3888 longname=longname, shapes=shapes, extradoc=extradoc, seed=seed) 

3889 

3890 if badvalue is None: 

3891 badvalue = nan 

3892 self.badvalue = badvalue 

3893 self.moment_tol = moment_tol 

3894 self.inc = inc 

3895 self.shapes = shapes 

3896 self.vecentropy = self._entropy 

3897 

3898 xk, pk = values 

3899 

3900 if np.shape(xk) != np.shape(pk): 

3901 raise ValueError("xk and pk must have the same shape.") 

3902 if np.less(pk, 0.0).any(): 

3903 raise ValueError("All elements of pk must be non-negative.") 

3904 if not np.allclose(np.sum(pk), 1): 

3905 raise ValueError("The sum of provided pk is not 1.") 

3906 

3907 indx = np.argsort(np.ravel(xk)) 

3908 self.xk = np.take(np.ravel(xk), indx, 0) 

3909 self.pk = np.take(np.ravel(pk), indx, 0) 

3910 self.a = self.xk[0] 

3911 self.b = self.xk[-1] 

3912 

3913 self.qvals = np.cumsum(self.pk, axis=0) 

3914 

3915 self.shapes = ' ' # bypass inspection 

3916 

3917 self._construct_argparser(meths_to_inspect=[self._pmf], 

3918 locscale_in='loc=0', 

3919 # scale=1 for discrete RVs 

3920 locscale_out='loc, 1') 

3921 

3922 self._attach_methods() 

3923 

3924 self._construct_docstrings(name, longname, extradoc) 

3925 

3926 def __getstate__(self): 

3927 dct = self.__dict__.copy() 

3928 

3929 # these methods will be remade in rv_generic.__setstate__, 

3930 # which calls rv_generic._attach_methods 

3931 attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs"] 

3932 [dct.pop(attr, None) for attr in attrs] 

3933 

3934 return dct 

3935 

3936 def _attach_methods(self): 

3937 """Attaches dynamically created argparser methods.""" 

3938 self._attach_argparser_methods() 

3939 

3940 def _get_support(self, *args): 

3941 """Return the support of the (unscaled, unshifted) distribution. 

3942 

3943 Parameters 

3944 ---------- 

3945 arg1, arg2, ... : array_like 

3946 The shape parameter(s) for the distribution (see docstring of the 

3947 instance object for more information). 

3948 

3949 Returns 

3950 ------- 

3951 a, b : numeric (float, or int or +/-np.inf) 

3952 end-points of the distribution's support. 

3953 """ 

3954 return self.a, self.b 

3955 

3956 def _pmf(self, x): 

3957 return np.select([x == k for k in self.xk], 

3958 [np.broadcast_arrays(p, x)[0] for p in self.pk], 0) 

3959 

3960 def _cdf(self, x): 

3961 xx, xxk = np.broadcast_arrays(x[:, None], self.xk) 

3962 indx = np.argmax(xxk > xx, axis=-1) - 1 

3963 return self.qvals[indx] 

3964 

3965 def _ppf(self, q): 

3966 qq, sqq = np.broadcast_arrays(q[..., None], self.qvals) 

3967 indx = argmax(sqq >= qq, axis=-1) 

3968 return self.xk[indx] 

3969 

3970 def _rvs(self, size=None, random_state=None): 

3971 # Need to define it explicitly, otherwise .rvs() with size=None 

3972 # fails due to explicit broadcasting in _ppf 

3973 U = random_state.uniform(size=size) 

3974 if size is None: 

3975 U = np.array(U, ndmin=1) 

3976 Y = self._ppf(U)[0] 

3977 else: 

3978 Y = self._ppf(U) 

3979 return Y 

3980 

3981 def _entropy(self): 

3982 return stats.entropy(self.pk) 

3983 

3984 def generic_moment(self, n): 

3985 n = asarray(n) 

3986 return np.sum(self.xk**n[np.newaxis, ...] * self.pk, axis=0) 

3987 

3988 def _expect(self, fun, lb, ub, *args, **kwds): 

3989 # ignore all args, just do a brute force summation 

3990 supp = self.xk[(lb <= self.xk) & (self.xk <= ub)] 

3991 vals = fun(supp) 

3992 return np.sum(vals) 

3993 

3994 

3995def _check_shape(argshape, size): 

3996 """ 

3997 This is a utility function used by `_rvs()` in the class geninvgauss_gen. 

3998 It compares the tuple argshape to the tuple size. 

3999 

4000 Parameters 

4001 ---------- 

4002 argshape : tuple of integers 

4003 Shape of the arguments. 

4004 size : tuple of integers or integer 

4005 Size argument of rvs(). 

4006 

4007 Returns 

4008 ------- 

4009 The function returns two tuples, scalar_shape and bc. 

4010 

4011 scalar_shape : tuple 

4012 Shape to which the 1-d array of random variates returned by 

4013 _rvs_scalar() is converted when it is copied into the 

4014 output array of _rvs(). 

4015 

4016 bc : tuple of booleans 

4017 bc is an tuple the same length as size. bc[j] is True if the data 

4018 associated with that index is generated in one call of _rvs_scalar(). 

4019 

4020 """ 

4021 scalar_shape = [] 

4022 bc = [] 

4023 for argdim, sizedim in zip_longest(argshape[::-1], size[::-1], 

4024 fillvalue=1): 

4025 if sizedim > argdim or (argdim == sizedim == 1): 

4026 scalar_shape.append(sizedim) 

4027 bc.append(True) 

4028 else: 

4029 bc.append(False) 

4030 return tuple(scalar_shape[::-1]), tuple(bc[::-1]) 

4031 

4032 

4033def get_distribution_names(namespace_pairs, rv_base_class): 

4034 """Collect names of statistical distributions and their generators. 

4035 

4036 Parameters 

4037 ---------- 

4038 namespace_pairs : sequence 

4039 A snapshot of (name, value) pairs in the namespace of a module. 

4040 rv_base_class : class 

4041 The base class of random variable generator classes in a module. 

4042 

4043 Returns 

4044 ------- 

4045 distn_names : list of strings 

4046 Names of the statistical distributions. 

4047 distn_gen_names : list of strings 

4048 Names of the generators of the statistical distributions. 

4049 Note that these are not simply the names of the statistical 

4050 distributions, with a _gen suffix added. 

4051 

4052 """ 

4053 distn_names = [] 

4054 distn_gen_names = [] 

4055 for name, value in namespace_pairs: 

4056 if name.startswith('_'): 

4057 continue 

4058 if name.endswith('_gen') and issubclass(value, rv_base_class): 

4059 distn_gen_names.append(name) 

4060 if isinstance(value, rv_base_class): 

4061 distn_names.append(name) 

4062 return distn_names, distn_gen_names