Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_qmc.py: 14%

547 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1"""Quasi-Monte Carlo engines and helpers.""" 

2from __future__ import annotations 

3 

4import copy 

5import math 

6import numbers 

7import os 

8import warnings 

9from abc import ABC, abstractmethod 

10from functools import partial 

11from typing import ( 

12 Callable, 

13 ClassVar, 

14 Dict, 

15 List, 

16 Literal, 

17 Optional, 

18 overload, 

19 Tuple, 

20 TYPE_CHECKING, 

21) 

22 

23import numpy as np 

24 

25if TYPE_CHECKING: 

26 import numpy.typing as npt 

27 from scipy._lib._util import ( 

28 DecimalNumber, GeneratorType, IntNumber, SeedType 

29 ) 

30 

31import scipy.stats as stats 

32from scipy._lib._util import rng_integers 

33from scipy.spatial import distance, Voronoi 

34from scipy.special import gammainc 

35from ._sobol import ( 

36 _initialize_v, _cscramble, _fill_p_cumulative, _draw, _fast_forward, 

37 _categorize, _MAXDIM 

38) 

39from ._qmc_cy import ( 

40 _cy_wrapper_centered_discrepancy, 

41 _cy_wrapper_wrap_around_discrepancy, 

42 _cy_wrapper_mixture_discrepancy, 

43 _cy_wrapper_l2_star_discrepancy, 

44 _cy_wrapper_update_discrepancy, 

45 _cy_van_der_corput_scrambled, 

46 _cy_van_der_corput, 

47) 

48 

49 

50__all__ = ['scale', 'discrepancy', 'update_discrepancy', 

51 'QMCEngine', 'Sobol', 'Halton', 'LatinHypercube', 'PoissonDisk', 

52 'MultinomialQMC', 'MultivariateNormalQMC'] 

53 

54 

55@overload 

56def check_random_state(seed: Optional[IntNumber] = ...) -> np.random.Generator: 

57 ... 

58 

59@overload 

60def check_random_state(seed: GeneratorType) -> GeneratorType: 

61 ... 

62 

63 

64# Based on scipy._lib._util.check_random_state 

65def check_random_state(seed=None): 

66 """Turn `seed` into a `numpy.random.Generator` instance. 

67 

68 Parameters 

69 ---------- 

70 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional # noqa 

71 If `seed` is an int or None, a new `numpy.random.Generator` is 

72 created using ``np.random.default_rng(seed)``. 

73 If `seed` is already a ``Generator`` or ``RandomState`` instance, then 

74 the provided instance is used. 

75 

76 Returns 

77 ------- 

78 seed : {`numpy.random.Generator`, `numpy.random.RandomState`} 

79 Random number generator. 

80 

81 """ 

82 if seed is None or isinstance(seed, (numbers.Integral, np.integer)): 

83 return np.random.default_rng(seed) 

84 elif isinstance(seed, (np.random.RandomState, np.random.Generator)): 

85 return seed 

86 else: 

87 raise ValueError(f'{seed!r} cannot be used to seed a' 

88 ' numpy.random.Generator instance') 

89 

90 

91def scale( 

92 sample: npt.ArrayLike, 

93 l_bounds: npt.ArrayLike, 

94 u_bounds: npt.ArrayLike, 

95 *, 

96 reverse: bool = False 

97) -> np.ndarray: 

98 r"""Sample scaling from unit hypercube to different bounds. 

99 

100 To convert a sample from :math:`[0, 1)` to :math:`[a, b), b>a`, 

101 with :math:`a` the lower bounds and :math:`b` the upper bounds. 

102 The following transformation is used: 

103 

104 .. math:: 

105 

106 (b - a) \cdot \text{sample} + a 

107 

108 Parameters 

109 ---------- 

110 sample : array_like (n, d) 

111 Sample to scale. 

112 l_bounds, u_bounds : array_like (d,) 

113 Lower and upper bounds (resp. :math:`a`, :math:`b`) of transformed 

114 data. If `reverse` is True, range of the original data to transform 

115 to the unit hypercube. 

116 reverse : bool, optional 

117 Reverse the transformation from different bounds to the unit hypercube. 

118 Default is False. 

119 

120 Returns 

121 ------- 

122 sample : array_like (n, d) 

123 Scaled sample. 

124 

125 Examples 

126 -------- 

127 Transform 3 samples in the unit hypercube to bounds: 

128 

129 >>> from scipy.stats import qmc 

130 >>> l_bounds = [-2, 0] 

131 >>> u_bounds = [6, 5] 

132 >>> sample = [[0.5 , 0.75], 

133 ... [0.5 , 0.5], 

134 ... [0.75, 0.25]] 

135 >>> sample_scaled = qmc.scale(sample, l_bounds, u_bounds) 

136 >>> sample_scaled 

137 array([[2. , 3.75], 

138 [2. , 2.5 ], 

139 [4. , 1.25]]) 

140 

141 And convert back to the unit hypercube: 

142 

143 >>> sample_ = qmc.scale(sample_scaled, l_bounds, u_bounds, reverse=True) 

144 >>> sample_ 

145 array([[0.5 , 0.75], 

146 [0.5 , 0.5 ], 

147 [0.75, 0.25]]) 

148 

149 """ 

150 sample = np.asarray(sample) 

151 

152 # Checking bounds and sample 

153 if not sample.ndim == 2: 

154 raise ValueError('Sample is not a 2D array') 

155 

156 lower, upper = _validate_bounds( 

157 l_bounds=l_bounds, u_bounds=u_bounds, d=sample.shape[1] 

158 ) 

159 

160 if not reverse: 

161 # Checking that sample is within the hypercube 

162 if (sample.max() > 1.) or (sample.min() < 0.): 

163 raise ValueError('Sample is not in unit hypercube') 

164 

165 return sample * (upper - lower) + lower 

166 else: 

167 # Checking that sample is within the bounds 

168 if not (np.all(sample >= lower) and np.all(sample <= upper)): 

169 raise ValueError('Sample is out of bounds') 

170 

171 return (sample - lower) / (upper - lower) 

172 

173 

174def discrepancy( 

175 sample: npt.ArrayLike, 

176 *, 

177 iterative: bool = False, 

178 method: Literal["CD", "WD", "MD", "L2-star"] = "CD", 

179 workers: IntNumber = 1) -> float: 

180 """Discrepancy of a given sample. 

181 

182 Parameters 

183 ---------- 

184 sample : array_like (n, d) 

185 The sample to compute the discrepancy from. 

186 iterative : bool, optional 

187 Must be False if not using it for updating the discrepancy. 

188 Default is False. Refer to the notes for more details. 

189 method : str, optional 

190 Type of discrepancy, can be ``CD``, ``WD``, ``MD`` or ``L2-star``. 

191 Refer to the notes for more details. Default is ``CD``. 

192 workers : int, optional 

193 Number of workers to use for parallel processing. If -1 is given all 

194 CPU threads are used. Default is 1. 

195 

196 Returns 

197 ------- 

198 discrepancy : float 

199 Discrepancy. 

200 

201 Notes 

202 ----- 

203 The discrepancy is a uniformity criterion used to assess the space filling 

204 of a number of samples in a hypercube. A discrepancy quantifies the 

205 distance between the continuous uniform distribution on a hypercube and the 

206 discrete uniform distribution on :math:`n` distinct sample points. 

207 

208 The lower the value is, the better the coverage of the parameter space is. 

209 

210 For a collection of subsets of the hypercube, the discrepancy is the 

211 difference between the fraction of sample points in one of those 

212 subsets and the volume of that subset. There are different definitions of 

213 discrepancy corresponding to different collections of subsets. Some 

214 versions take a root mean square difference over subsets instead of 

215 a maximum. 

216 

217 A measure of uniformity is reasonable if it satisfies the following 

218 criteria [1]_: 

219 

220 1. It is invariant under permuting factors and/or runs. 

221 2. It is invariant under rotation of the coordinates. 

222 3. It can measure not only uniformity of the sample over the hypercube, 

223 but also the projection uniformity of the sample over non-empty 

224 subset of lower dimension hypercubes. 

225 4. There is some reasonable geometric meaning. 

226 5. It is easy to compute. 

227 6. It satisfies the Koksma-Hlawka-like inequality. 

228 7. It is consistent with other criteria in experimental design. 

229 

230 Four methods are available: 

231 

232 * ``CD``: Centered Discrepancy - subspace involves a corner of the 

233 hypercube 

234 * ``WD``: Wrap-around Discrepancy - subspace can wrap around bounds 

235 * ``MD``: Mixture Discrepancy - mix between CD/WD covering more criteria 

236 * ``L2-star``: L2-star discrepancy - like CD BUT variant to rotation 

237 

238 See [2]_ for precise definitions of each method. 

239 

240 Lastly, using ``iterative=True``, it is possible to compute the 

241 discrepancy as if we had :math:`n+1` samples. This is useful if we want 

242 to add a point to a sampling and check the candidate which would give the 

243 lowest discrepancy. Then you could just update the discrepancy with 

244 each candidate using `update_discrepancy`. This method is faster than 

245 computing the discrepancy for a large number of candidates. 

246 

247 References 

248 ---------- 

249 .. [1] Fang et al. "Design and modeling for computer experiments". 

250 Computer Science and Data Analysis Series, 2006. 

251 .. [2] Zhou Y.-D. et al. "Mixture discrepancy for quasi-random point sets." 

252 Journal of Complexity, 29 (3-4) , pp. 283-301, 2013. 

253 .. [3] T. T. Warnock. "Computational investigations of low discrepancy 

254 point sets." Applications of Number Theory to Numerical 

255 Analysis, Academic Press, pp. 319-343, 1972. 

256 

257 Examples 

258 -------- 

259 Calculate the quality of the sample using the discrepancy: 

260 

261 >>> import numpy as np 

262 >>> from scipy.stats import qmc 

263 >>> space = np.array([[1, 3], [2, 6], [3, 2], [4, 5], [5, 1], [6, 4]]) 

264 >>> l_bounds = [0.5, 0.5] 

265 >>> u_bounds = [6.5, 6.5] 

266 >>> space = qmc.scale(space, l_bounds, u_bounds, reverse=True) 

267 >>> space 

268 array([[0.08333333, 0.41666667], 

269 [0.25 , 0.91666667], 

270 [0.41666667, 0.25 ], 

271 [0.58333333, 0.75 ], 

272 [0.75 , 0.08333333], 

273 [0.91666667, 0.58333333]]) 

274 >>> qmc.discrepancy(space) 

275 0.008142039609053464 

276 

277 We can also compute iteratively the ``CD`` discrepancy by using 

278 ``iterative=True``. 

279 

280 >>> disc_init = qmc.discrepancy(space[:-1], iterative=True) 

281 >>> disc_init 

282 0.04769081147119336 

283 >>> qmc.update_discrepancy(space[-1], space[:-1], disc_init) 

284 0.008142039609053513 

285 

286 """ 

287 sample = np.asarray(sample, dtype=np.float64, order="C") 

288 

289 # Checking that sample is within the hypercube and 2D 

290 if not sample.ndim == 2: 

291 raise ValueError("Sample is not a 2D array") 

292 

293 if (sample.max() > 1.) or (sample.min() < 0.): 

294 raise ValueError("Sample is not in unit hypercube") 

295 

296 workers = _validate_workers(workers) 

297 

298 methods = { 

299 "CD": _cy_wrapper_centered_discrepancy, 

300 "WD": _cy_wrapper_wrap_around_discrepancy, 

301 "MD": _cy_wrapper_mixture_discrepancy, 

302 "L2-star": _cy_wrapper_l2_star_discrepancy, 

303 } 

304 

305 if method in methods: 

306 return methods[method](sample, iterative, workers=workers) 

307 else: 

308 raise ValueError(f"{method!r} is not a valid method. It must be one of" 

309 f" {set(methods)!r}") 

310 

311 

312def update_discrepancy( 

313 x_new: npt.ArrayLike, 

314 sample: npt.ArrayLike, 

315 initial_disc: DecimalNumber) -> float: 

316 """Update the centered discrepancy with a new sample. 

317 

318 Parameters 

319 ---------- 

320 x_new : array_like (1, d) 

321 The new sample to add in `sample`. 

322 sample : array_like (n, d) 

323 The initial sample. 

324 initial_disc : float 

325 Centered discrepancy of the `sample`. 

326 

327 Returns 

328 ------- 

329 discrepancy : float 

330 Centered discrepancy of the sample composed of `x_new` and `sample`. 

331 

332 Examples 

333 -------- 

334 We can also compute iteratively the discrepancy by using 

335 ``iterative=True``. 

336 

337 >>> import numpy as np 

338 >>> from scipy.stats import qmc 

339 >>> space = np.array([[1, 3], [2, 6], [3, 2], [4, 5], [5, 1], [6, 4]]) 

340 >>> l_bounds = [0.5, 0.5] 

341 >>> u_bounds = [6.5, 6.5] 

342 >>> space = qmc.scale(space, l_bounds, u_bounds, reverse=True) 

343 >>> disc_init = qmc.discrepancy(space[:-1], iterative=True) 

344 >>> disc_init 

345 0.04769081147119336 

346 >>> qmc.update_discrepancy(space[-1], space[:-1], disc_init) 

347 0.008142039609053513 

348 

349 """ 

350 sample = np.asarray(sample, dtype=np.float64, order="C") 

351 x_new = np.asarray(x_new, dtype=np.float64, order="C") 

352 

353 # Checking that sample is within the hypercube and 2D 

354 if not sample.ndim == 2: 

355 raise ValueError('Sample is not a 2D array') 

356 

357 if (sample.max() > 1.) or (sample.min() < 0.): 

358 raise ValueError('Sample is not in unit hypercube') 

359 

360 # Checking that x_new is within the hypercube and 1D 

361 if not x_new.ndim == 1: 

362 raise ValueError('x_new is not a 1D array') 

363 

364 if not (np.all(x_new >= 0) and np.all(x_new <= 1)): 

365 raise ValueError('x_new is not in unit hypercube') 

366 

367 if x_new.shape[0] != sample.shape[1]: 

368 raise ValueError("x_new and sample must be broadcastable") 

369 

370 return _cy_wrapper_update_discrepancy(x_new, sample, initial_disc) 

371 

372 

373def _perturb_discrepancy(sample: np.ndarray, i1: int, i2: int, k: int, 

374 disc: float): 

375 """Centered discrepancy after an elementary perturbation of a LHS. 

376 

377 An elementary perturbation consists of an exchange of coordinates between 

378 two points: ``sample[i1, k] <-> sample[i2, k]``. By construction, 

379 this operation conserves the LHS properties. 

380 

381 Parameters 

382 ---------- 

383 sample : array_like (n, d) 

384 The sample (before permutation) to compute the discrepancy from. 

385 i1 : int 

386 The first line of the elementary permutation. 

387 i2 : int 

388 The second line of the elementary permutation. 

389 k : int 

390 The column of the elementary permutation. 

391 disc : float 

392 Centered discrepancy of the design before permutation. 

393 

394 Returns 

395 ------- 

396 discrepancy : float 

397 Centered discrepancy of the design after permutation. 

398 

399 References 

400 ---------- 

401 .. [1] Jin et al. "An efficient algorithm for constructing optimal design 

402 of computer experiments", Journal of Statistical Planning and 

403 Inference, 2005. 

404 

405 """ 

406 n = sample.shape[0] 

407 

408 z_ij = sample - 0.5 

409 

410 # Eq (19) 

411 c_i1j = (1. / n ** 2. 

412 * np.prod(0.5 * (2. + abs(z_ij[i1, :]) 

413 + abs(z_ij) - abs(z_ij[i1, :] - z_ij)), axis=1)) 

414 c_i2j = (1. / n ** 2. 

415 * np.prod(0.5 * (2. + abs(z_ij[i2, :]) 

416 + abs(z_ij) - abs(z_ij[i2, :] - z_ij)), axis=1)) 

417 

418 # Eq (20) 

419 c_i1i1 = (1. / n ** 2 * np.prod(1 + abs(z_ij[i1, :])) 

420 - 2. / n * np.prod(1. + 0.5 * abs(z_ij[i1, :]) 

421 - 0.5 * z_ij[i1, :] ** 2)) 

422 c_i2i2 = (1. / n ** 2 * np.prod(1 + abs(z_ij[i2, :])) 

423 - 2. / n * np.prod(1. + 0.5 * abs(z_ij[i2, :]) 

424 - 0.5 * z_ij[i2, :] ** 2)) 

425 

426 # Eq (22), typo in the article in the denominator i2 -> i1 

427 num = (2 + abs(z_ij[i2, k]) + abs(z_ij[:, k]) 

428 - abs(z_ij[i2, k] - z_ij[:, k])) 

429 denum = (2 + abs(z_ij[i1, k]) + abs(z_ij[:, k]) 

430 - abs(z_ij[i1, k] - z_ij[:, k])) 

431 gamma = num / denum 

432 

433 # Eq (23) 

434 c_p_i1j = gamma * c_i1j 

435 # Eq (24) 

436 c_p_i2j = c_i2j / gamma 

437 

438 alpha = (1 + abs(z_ij[i2, k])) / (1 + abs(z_ij[i1, k])) 

439 beta = (2 - abs(z_ij[i2, k])) / (2 - abs(z_ij[i1, k])) 

440 

441 g_i1 = np.prod(1. + abs(z_ij[i1, :])) 

442 g_i2 = np.prod(1. + abs(z_ij[i2, :])) 

443 h_i1 = np.prod(1. + 0.5 * abs(z_ij[i1, :]) - 0.5 * (z_ij[i1, :] ** 2)) 

444 h_i2 = np.prod(1. + 0.5 * abs(z_ij[i2, :]) - 0.5 * (z_ij[i2, :] ** 2)) 

445 

446 # Eq (25), typo in the article g is missing 

447 c_p_i1i1 = ((g_i1 * alpha) / (n ** 2) - 2. * alpha * beta * h_i1 / n) 

448 # Eq (26), typo in the article n ** 2 

449 c_p_i2i2 = ((g_i2 / ((n ** 2) * alpha)) - (2. * h_i2 / (n * alpha * beta))) 

450 

451 # Eq (26) 

452 sum_ = c_p_i1j - c_i1j + c_p_i2j - c_i2j 

453 

454 mask = np.ones(n, dtype=bool) 

455 mask[[i1, i2]] = False 

456 sum_ = sum(sum_[mask]) 

457 

458 disc_ep = (disc + c_p_i1i1 - c_i1i1 + c_p_i2i2 - c_i2i2 + 2 * sum_) 

459 

460 return disc_ep 

461 

462 

463def primes_from_2_to(n: int) -> np.ndarray: 

464 """Prime numbers from 2 to *n*. 

465 

466 Parameters 

467 ---------- 

468 n : int 

469 Sup bound with ``n >= 6``. 

470 

471 Returns 

472 ------- 

473 primes : list(int) 

474 Primes in ``2 <= p < n``. 

475 

476 Notes 

477 ----- 

478 Taken from [1]_ by P.T. Roy, written consent given on 23.04.2021 

479 by the original author, Bruno Astrolino, for free use in SciPy under 

480 the 3-clause BSD. 

481 

482 References 

483 ---------- 

484 .. [1] `StackOverflow <https://stackoverflow.com/questions/2068372>`_. 

485 

486 """ 

487 sieve = np.ones(n // 3 + (n % 6 == 2), dtype=bool) 

488 for i in range(1, int(n ** 0.5) // 3 + 1): 

489 k = 3 * i + 1 | 1 

490 sieve[k * k // 3::2 * k] = False 

491 sieve[k * (k - 2 * (i & 1) + 4) // 3::2 * k] = False 

492 return np.r_[2, 3, ((3 * np.nonzero(sieve)[0][1:] + 1) | 1)] 

493 

494 

495def n_primes(n: IntNumber) -> List[int]: 

496 """List of the n-first prime numbers. 

497 

498 Parameters 

499 ---------- 

500 n : int 

501 Number of prime numbers wanted. 

502 

503 Returns 

504 ------- 

505 primes : list(int) 

506 List of primes. 

507 

508 """ 

509 primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 

510 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 

511 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 

512 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 

513 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 

514 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 

515 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 

516 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 

517 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 

518 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 

519 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 

520 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 

521 953, 967, 971, 977, 983, 991, 997][:n] # type: ignore[misc] 

522 

523 if len(primes) < n: 

524 big_number = 2000 

525 while 'Not enough primes': 

526 primes = primes_from_2_to(big_number)[:n] # type: ignore 

527 if len(primes) == n: 

528 break 

529 big_number += 1000 

530 

531 return primes 

532 

533 

534def van_der_corput( 

535 n: IntNumber, 

536 base: IntNumber = 2, 

537 *, 

538 start_index: IntNumber = 0, 

539 scramble: bool = False, 

540 seed: SeedType = None, 

541 workers: IntNumber = 1) -> np.ndarray: 

542 """Van der Corput sequence. 

543 

544 Pseudo-random number generator based on a b-adic expansion. 

545 

546 Scrambling uses permutations of the remainders (see [1]_). Multiple 

547 permutations are applied to construct a point. The sequence of 

548 permutations has to be the same for all points of the sequence. 

549 

550 Parameters 

551 ---------- 

552 n : int 

553 Number of element of the sequence. 

554 base : int, optional 

555 Base of the sequence. Default is 2. 

556 start_index : int, optional 

557 Index to start the sequence from. Default is 0. 

558 scramble : bool, optional 

559 If True, use Owen scrambling. Otherwise no scrambling is done. 

560 Default is True. 

561 seed : {None, int, `numpy.random.Generator`}, optional 

562 If `seed` is an int or None, a new `numpy.random.Generator` is 

563 created using ``np.random.default_rng(seed)``. 

564 If `seed` is already a ``Generator`` instance, then the provided 

565 instance is used. 

566 workers : int, optional 

567 Number of workers to use for parallel processing. If -1 is 

568 given all CPU threads are used. Default is 1. 

569 

570 Returns 

571 ------- 

572 sequence : list (n,) 

573 Sequence of Van der Corput. 

574 

575 References 

576 ---------- 

577 .. [1] A. B. Owen. "A randomized Halton algorithm in R", 

578 :arxiv:`1706.02808`, 2017. 

579 

580 """ 

581 if base < 2: 

582 raise ValueError("'base' must be at least 2") 

583 

584 if scramble: 

585 rng = check_random_state(seed) 

586 # In Algorithm 1 of Owen 2017, a permutation of `np.arange(base)` is 

587 # created for each positive integer `k` such that `1 - base**-k < 1` 

588 # using floating-point arithmetic. For double precision floats, the 

589 # condition `1 - base**-k < 1` can also be written as `base**-k > 

590 # 2**-54`, which makes it more apparent how many permutations we need 

591 # to create. 

592 count = math.ceil(54 / math.log2(base)) - 1 

593 permutations = np.repeat(np.arange(base)[None], count, axis=0) 

594 for perm in permutations: 

595 rng.shuffle(perm) 

596 

597 return _cy_van_der_corput_scrambled(n, base, start_index, 

598 permutations, workers) 

599 

600 else: 

601 return _cy_van_der_corput(n, base, start_index, workers) 

602 

603 

604class QMCEngine(ABC): 

605 """A generic Quasi-Monte Carlo sampler class meant for subclassing. 

606 

607 QMCEngine is a base class to construct a specific Quasi-Monte Carlo 

608 sampler. It cannot be used directly as a sampler. 

609 

610 Parameters 

611 ---------- 

612 d : int 

613 Dimension of the parameter space. 

614 optimization : {None, "random-cd", "lloyd"}, optional 

615 Whether to use an optimization scheme to improve the quality after 

616 sampling. Note that this is a post-processing step that does not 

617 guarantee that all properties of the sample will be conserved. 

618 Default is None. 

619 

620 * ``random-cd``: random permutations of coordinates to lower the 

621 centered discrepancy. The best sample based on the centered 

622 discrepancy is constantly updated. Centered discrepancy-based 

623 sampling shows better space-filling robustness toward 2D and 3D 

624 subprojections compared to using other discrepancy measures. 

625 * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm. 

626 The process converges to equally spaced samples. 

627 

628 .. versionadded:: 1.10.0 

629 seed : {None, int, `numpy.random.Generator`}, optional 

630 If `seed` is an int or None, a new `numpy.random.Generator` is 

631 created using ``np.random.default_rng(seed)``. 

632 If `seed` is already a ``Generator`` instance, then the provided 

633 instance is used. 

634 

635 Notes 

636 ----- 

637 By convention samples are distributed over the half-open interval 

638 ``[0, 1)``. Instances of the class can access the attributes: ``d`` for 

639 the dimension; and ``rng`` for the random number generator (used for the 

640 ``seed``). 

641 

642 **Subclassing** 

643 

644 When subclassing `QMCEngine` to create a new sampler, ``__init__`` and 

645 ``random`` must be redefined. 

646 

647 * ``__init__(d, seed=None)``: at least fix the dimension. If the sampler 

648 does not take advantage of a ``seed`` (deterministic methods like 

649 Halton), this parameter can be omitted. 

650 * ``_random(n, *, workers=1)``: draw ``n`` from the engine. ``workers`` 

651 is used for parallelism. See `Halton` for example. 

652 

653 Optionally, two other methods can be overwritten by subclasses: 

654 

655 * ``reset``: Reset the engine to its original state. 

656 * ``fast_forward``: If the sequence is deterministic (like Halton 

657 sequence), then ``fast_forward(n)`` is skipping the ``n`` first draw. 

658 

659 Examples 

660 -------- 

661 To create a random sampler based on ``np.random.random``, we would do the 

662 following: 

663 

664 >>> from scipy.stats import qmc 

665 >>> class RandomEngine(qmc.QMCEngine): 

666 ... def __init__(self, d, seed=None): 

667 ... super().__init__(d=d, seed=seed) 

668 ... 

669 ... 

670 ... def _random(self, n=1, *, workers=1): 

671 ... return self.rng.random((n, self.d)) 

672 ... 

673 ... 

674 ... def reset(self): 

675 ... super().__init__(d=self.d, seed=self.rng_seed) 

676 ... return self 

677 ... 

678 ... 

679 ... def fast_forward(self, n): 

680 ... self.random(n) 

681 ... return self 

682 

683 After subclassing `QMCEngine` to define the sampling strategy we want to 

684 use, we can create an instance to sample from. 

685 

686 >>> engine = RandomEngine(2) 

687 >>> engine.random(5) 

688 array([[0.22733602, 0.31675834], # random 

689 [0.79736546, 0.67625467], 

690 [0.39110955, 0.33281393], 

691 [0.59830875, 0.18673419], 

692 [0.67275604, 0.94180287]]) 

693 

694 We can also reset the state of the generator and resample again. 

695 

696 >>> _ = engine.reset() 

697 >>> engine.random(5) 

698 array([[0.22733602, 0.31675834], # random 

699 [0.79736546, 0.67625467], 

700 [0.39110955, 0.33281393], 

701 [0.59830875, 0.18673419], 

702 [0.67275604, 0.94180287]]) 

703 

704 """ 

705 

706 @abstractmethod 

707 def __init__( 

708 self, 

709 d: IntNumber, 

710 *, 

711 optimization: Optional[Literal["random-cd", "lloyd"]] = None, 

712 seed: SeedType = None 

713 ) -> None: 

714 if not np.issubdtype(type(d), np.integer) or d < 0: 

715 raise ValueError('d must be a non-negative integer value') 

716 

717 self.d = d 

718 self.rng = check_random_state(seed) 

719 self.rng_seed = copy.deepcopy(seed) 

720 self.num_generated = 0 

721 

722 config = { 

723 # random-cd 

724 "n_nochange": 100, 

725 "n_iters": 10_000, 

726 "rng": self.rng, 

727 

728 # lloyd 

729 "tol": 1e-5, 

730 "maxiter": 10, 

731 "qhull_options": None, 

732 } 

733 self.optimization_method = _select_optimizer(optimization, config) 

734 

735 @abstractmethod 

736 def _random( 

737 self, n: IntNumber = 1, *, workers: IntNumber = 1 

738 ) -> np.ndarray: 

739 ... 

740 

741 def random( 

742 self, n: IntNumber = 1, *, workers: IntNumber = 1 

743 ) -> np.ndarray: 

744 """Draw `n` in the half-open interval ``[0, 1)``. 

745 

746 Parameters 

747 ---------- 

748 n : int, optional 

749 Number of samples to generate in the parameter space. 

750 Default is 1. 

751 workers : int, optional 

752 Only supported with `Halton`. 

753 Number of workers to use for parallel processing. If -1 is 

754 given all CPU threads are used. Default is 1. It becomes faster 

755 than one worker for `n` greater than :math:`10^3`. 

756 

757 Returns 

758 ------- 

759 sample : array_like (n, d) 

760 QMC sample. 

761 

762 """ 

763 sample = self._random(n, workers=workers) 

764 if self.optimization_method is not None: 

765 sample = self.optimization_method(sample) 

766 

767 self.num_generated += n 

768 return sample 

769 

770 def integers( 

771 self, 

772 l_bounds: npt.ArrayLike, 

773 *, 

774 u_bounds: Optional[npt.ArrayLike] = None, 

775 n: IntNumber = 1, 

776 endpoint: bool = False, 

777 workers: IntNumber = 1 

778 ) -> np.ndarray: 

779 r""" 

780 Draw `n` integers from `l_bounds` (inclusive) to `u_bounds` 

781 (exclusive), or if endpoint=True, `l_bounds` (inclusive) to 

782 `u_bounds` (inclusive). 

783 

784 Parameters 

785 ---------- 

786 l_bounds : int or array-like of ints 

787 Lowest (signed) integers to be drawn (unless ``u_bounds=None``, 

788 in which case this parameter is 0 and this value is used for 

789 `u_bounds`). 

790 u_bounds : int or array-like of ints, optional 

791 If provided, one above the largest (signed) integer to be drawn 

792 (see above for behavior if ``u_bounds=None``). 

793 If array-like, must contain integer values. 

794 n : int, optional 

795 Number of samples to generate in the parameter space. 

796 Default is 1. 

797 endpoint : bool, optional 

798 If true, sample from the interval ``[l_bounds, u_bounds]`` instead 

799 of the default ``[l_bounds, u_bounds)``. Defaults is False. 

800 workers : int, optional 

801 Number of workers to use for parallel processing. If -1 is 

802 given all CPU threads are used. Only supported when using `Halton` 

803 Default is 1. 

804 

805 Returns 

806 ------- 

807 sample : array_like (n, d) 

808 QMC sample. 

809 

810 Notes 

811 ----- 

812 It is safe to just use the same ``[0, 1)`` to integer mapping 

813 with QMC that you would use with MC. You still get unbiasedness, 

814 a strong law of large numbers, an asymptotically infinite variance 

815 reduction and a finite sample variance bound. 

816 

817 To convert a sample from :math:`[0, 1)` to :math:`[a, b), b>a`, 

818 with :math:`a` the lower bounds and :math:`b` the upper bounds, 

819 the following transformation is used: 

820 

821 .. math:: 

822 

823 \text{floor}((b - a) \cdot \text{sample} + a) 

824 

825 """ 

826 if u_bounds is None: 

827 u_bounds = l_bounds 

828 l_bounds = 0 

829 

830 u_bounds = np.atleast_1d(u_bounds) 

831 l_bounds = np.atleast_1d(l_bounds) 

832 

833 if endpoint: 

834 u_bounds = u_bounds + 1 

835 

836 if (not np.issubdtype(l_bounds.dtype, np.integer) or 

837 not np.issubdtype(u_bounds.dtype, np.integer)): 

838 message = ("'u_bounds' and 'l_bounds' must be integers or" 

839 " array-like of integers") 

840 raise ValueError(message) 

841 

842 if isinstance(self, Halton): 

843 sample = self.random(n=n, workers=workers) 

844 else: 

845 sample = self.random(n=n) 

846 

847 sample = scale(sample, l_bounds=l_bounds, u_bounds=u_bounds) 

848 sample = np.floor(sample).astype(np.int64) 

849 

850 return sample 

851 

852 def reset(self) -> QMCEngine: 

853 """Reset the engine to base state. 

854 

855 Returns 

856 ------- 

857 engine : QMCEngine 

858 Engine reset to its base state. 

859 

860 """ 

861 seed = copy.deepcopy(self.rng_seed) 

862 self.rng = check_random_state(seed) 

863 self.num_generated = 0 

864 return self 

865 

866 def fast_forward(self, n: IntNumber) -> QMCEngine: 

867 """Fast-forward the sequence by `n` positions. 

868 

869 Parameters 

870 ---------- 

871 n : int 

872 Number of points to skip in the sequence. 

873 

874 Returns 

875 ------- 

876 engine : QMCEngine 

877 Engine reset to its base state. 

878 

879 """ 

880 self.random(n=n) 

881 return self 

882 

883 

884class Halton(QMCEngine): 

885 """Halton sequence. 

886 

887 Pseudo-random number generator that generalize the Van der Corput sequence 

888 for multiple dimensions. The Halton sequence uses the base-two Van der 

889 Corput sequence for the first dimension, base-three for its second and 

890 base-:math:`n` for its n-dimension. 

891 

892 Parameters 

893 ---------- 

894 d : int 

895 Dimension of the parameter space. 

896 scramble : bool, optional 

897 If True, use Owen scrambling. Otherwise no scrambling is done. 

898 Default is True. 

899 optimization : {None, "random-cd", "lloyd"}, optional 

900 Whether to use an optimization scheme to improve the quality after 

901 sampling. Note that this is a post-processing step that does not 

902 guarantee that all properties of the sample will be conserved. 

903 Default is None. 

904 

905 * ``random-cd``: random permutations of coordinates to lower the 

906 centered discrepancy. The best sample based on the centered 

907 discrepancy is constantly updated. Centered discrepancy-based 

908 sampling shows better space-filling robustness toward 2D and 3D 

909 subprojections compared to using other discrepancy measures. 

910 * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm. 

911 The process converges to equally spaced samples. 

912 

913 .. versionadded:: 1.10.0 

914 seed : {None, int, `numpy.random.Generator`}, optional 

915 If `seed` is an int or None, a new `numpy.random.Generator` is 

916 created using ``np.random.default_rng(seed)``. 

917 If `seed` is already a ``Generator`` instance, then the provided 

918 instance is used. 

919 

920 Notes 

921 ----- 

922 The Halton sequence has severe striping artifacts for even modestly 

923 large dimensions. These can be ameliorated by scrambling. Scrambling 

924 also supports replication-based error estimates and extends 

925 applicabiltiy to unbounded integrands. 

926 

927 References 

928 ---------- 

929 .. [1] Halton, "On the efficiency of certain quasi-random sequences of 

930 points in evaluating multi-dimensional integrals", Numerische 

931 Mathematik, 1960. 

932 .. [2] A. B. Owen. "A randomized Halton algorithm in R", 

933 :arxiv:`1706.02808`, 2017. 

934 

935 Examples 

936 -------- 

937 Generate samples from a low discrepancy sequence of Halton. 

938 

939 >>> from scipy.stats import qmc 

940 >>> sampler = qmc.Halton(d=2, scramble=False) 

941 >>> sample = sampler.random(n=5) 

942 >>> sample 

943 array([[0. , 0. ], 

944 [0.5 , 0.33333333], 

945 [0.25 , 0.66666667], 

946 [0.75 , 0.11111111], 

947 [0.125 , 0.44444444]]) 

948 

949 Compute the quality of the sample using the discrepancy criterion. 

950 

951 >>> qmc.discrepancy(sample) 

952 0.088893711419753 

953 

954 If some wants to continue an existing design, extra points can be obtained 

955 by calling again `random`. Alternatively, you can skip some points like: 

956 

957 >>> _ = sampler.fast_forward(5) 

958 >>> sample_continued = sampler.random(n=5) 

959 >>> sample_continued 

960 array([[0.3125 , 0.37037037], 

961 [0.8125 , 0.7037037 ], 

962 [0.1875 , 0.14814815], 

963 [0.6875 , 0.48148148], 

964 [0.4375 , 0.81481481]]) 

965 

966 Finally, samples can be scaled to bounds. 

967 

968 >>> l_bounds = [0, 2] 

969 >>> u_bounds = [10, 5] 

970 >>> qmc.scale(sample_continued, l_bounds, u_bounds) 

971 array([[3.125 , 3.11111111], 

972 [8.125 , 4.11111111], 

973 [1.875 , 2.44444444], 

974 [6.875 , 3.44444444], 

975 [4.375 , 4.44444444]]) 

976 

977 """ 

978 

979 def __init__( 

980 self, d: IntNumber, *, scramble: bool = True, 

981 optimization: Optional[Literal["random-cd", "lloyd"]] = None, 

982 seed: SeedType = None 

983 ) -> None: 

984 # Used in `scipy.integrate.qmc_quad` 

985 self._init_quad = {'d': d, 'scramble': True, 

986 'optimization': optimization} 

987 super().__init__(d=d, optimization=optimization, seed=seed) 

988 self.seed = seed 

989 self.base = n_primes(d) 

990 self.scramble = scramble 

991 

992 def _random( 

993 self, n: IntNumber = 1, *, workers: IntNumber = 1 

994 ) -> np.ndarray: 

995 """Draw `n` in the half-open interval ``[0, 1)``. 

996 

997 Parameters 

998 ---------- 

999 n : int, optional 

1000 Number of samples to generate in the parameter space. Default is 1. 

1001 workers : int, optional 

1002 Number of workers to use for parallel processing. If -1 is 

1003 given all CPU threads are used. Default is 1. It becomes faster 

1004 than one worker for `n` greater than :math:`10^3`. 

1005 

1006 Returns 

1007 ------- 

1008 sample : array_like (n, d) 

1009 QMC sample. 

1010 

1011 """ 

1012 workers = _validate_workers(workers) 

1013 # Generate a sample using a Van der Corput sequence per dimension. 

1014 # important to have ``type(bdim) == int`` for performance reason 

1015 sample = [van_der_corput(n, int(bdim), start_index=self.num_generated, 

1016 scramble=self.scramble, 

1017 seed=copy.deepcopy(self.seed), 

1018 workers=workers) 

1019 for bdim in self.base] 

1020 

1021 return np.array(sample).T.reshape(n, self.d) 

1022 

1023 

1024class LatinHypercube(QMCEngine): 

1025 r"""Latin hypercube sampling (LHS). 

1026 

1027 A Latin hypercube sample [1]_ generates :math:`n` points in 

1028 :math:`[0,1)^{d}`. Each univariate marginal distribution is stratified, 

1029 placing exactly one point in :math:`[j/n, (j+1)/n)` for 

1030 :math:`j=0,1,...,n-1`. They are still applicable when :math:`n << d`. 

1031 

1032 Parameters 

1033 ---------- 

1034 d : int 

1035 Dimension of the parameter space. 

1036 centered : bool, optional 

1037 Center samples within cells of a multi-dimensional grid. 

1038 Default is False. 

1039 

1040 .. deprecated:: 1.10.0 

1041 `centered` is deprecated as of SciPy 1.10.0 and will be removed in 

1042 1.12.0. Use `scramble` instead. ``centered=True`` corresponds to 

1043 ``scramble=False``. 

1044 

1045 scramble : bool, optional 

1046 When False, center samples within cells of a multi-dimensional grid. 

1047 Otherwise, samples are randomly placed within cells of the grid. 

1048 

1049 .. note:: 

1050 Setting ``scramble=False`` does not ensure deterministic output. 

1051 For that, use the `seed` parameter. 

1052 

1053 Default is True. 

1054 

1055 .. versionadded:: 1.10.0 

1056 

1057 optimization : {None, "random-cd", "lloyd"}, optional 

1058 Whether to use an optimization scheme to improve the quality after 

1059 sampling. Note that this is a post-processing step that does not 

1060 guarantee that all properties of the sample will be conserved. 

1061 Default is None. 

1062 

1063 * ``random-cd``: random permutations of coordinates to lower the 

1064 centered discrepancy. The best sample based on the centered 

1065 discrepancy is constantly updated. Centered discrepancy-based 

1066 sampling shows better space-filling robustness toward 2D and 3D 

1067 subprojections compared to using other discrepancy measures. 

1068 * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm. 

1069 The process converges to equally spaced samples. 

1070 

1071 .. versionadded:: 1.8.0 

1072 .. versionchanged:: 1.10.0 

1073 Add ``lloyd``. 

1074 

1075 strength : {1, 2}, optional 

1076 Strength of the LHS. ``strength=1`` produces a plain LHS while 

1077 ``strength=2`` produces an orthogonal array based LHS of strength 2 

1078 [7]_, [8]_. In that case, only ``n=p**2`` points can be sampled, 

1079 with ``p`` a prime number. It also constrains ``d <= p + 1``. 

1080 Default is 1. 

1081 

1082 .. versionadded:: 1.8.0 

1083 

1084 seed : {None, int, `numpy.random.Generator`}, optional 

1085 If `seed` is an int or None, a new `numpy.random.Generator` is 

1086 created using ``np.random.default_rng(seed)``. 

1087 If `seed` is already a ``Generator`` instance, then the provided 

1088 instance is used. 

1089 

1090 Notes 

1091 ----- 

1092 

1093 When LHS is used for integrating a function :math:`f` over :math:`n`, 

1094 LHS is extremely effective on integrands that are nearly additive [2]_. 

1095 With a LHS of :math:`n` points, the variance of the integral is always 

1096 lower than plain MC on :math:`n-1` points [3]_. There is a central limit 

1097 theorem for LHS on the mean and variance of the integral [4]_, but not 

1098 necessarily for optimized LHS due to the randomization. 

1099 

1100 :math:`A` is called an orthogonal array of strength :math:`t` if in each 

1101 n-row-by-t-column submatrix of :math:`A`: all :math:`p^t` possible 

1102 distinct rows occur the same number of times. The elements of :math:`A` 

1103 are in the set :math:`\{0, 1, ..., p-1\}`, also called symbols. 

1104 The constraint that :math:`p` must be a prime number is to allow modular 

1105 arithmetic. Increasing strength adds some symmetry to the sub-projections 

1106 of a sample. With strength 2, samples are symmetric along the diagonals of 

1107 2D sub-projections. This may be undesirable, but on the other hand, the 

1108 sample dispersion is improved. 

1109 

1110 Strength 1 (plain LHS) brings an advantage over strength 0 (MC) and 

1111 strength 2 is a useful increment over strength 1. Going to strength 3 is 

1112 a smaller increment and scrambled QMC like Sobol', Halton are more 

1113 performant [7]_. 

1114 

1115 To create a LHS of strength 2, the orthogonal array :math:`A` is 

1116 randomized by applying a random, bijective map of the set of symbols onto 

1117 itself. For example, in column 0, all 0s might become 2; in column 1, 

1118 all 0s might become 1, etc. 

1119 Then, for each column :math:`i` and symbol :math:`j`, we add a plain, 

1120 one-dimensional LHS of size :math:`p` to the subarray where 

1121 :math:`A^i = j`. The resulting matrix is finally divided by :math:`p`. 

1122 

1123 References 

1124 ---------- 

1125 .. [1] Mckay et al., "A Comparison of Three Methods for Selecting Values 

1126 of Input Variables in the Analysis of Output from a Computer Code." 

1127 Technometrics, 1979. 

1128 .. [2] M. Stein, "Large sample properties of simulations using Latin 

1129 hypercube sampling." Technometrics 29, no. 2: 143-151, 1987. 

1130 .. [3] A. B. Owen, "Monte Carlo variance of scrambled net quadrature." 

1131 SIAM Journal on Numerical Analysis 34, no. 5: 1884-1910, 1997 

1132 .. [4] Loh, W.-L. "On Latin hypercube sampling." The annals of statistics 

1133 24, no. 5: 2058-2080, 1996. 

1134 .. [5] Fang et al. "Design and modeling for computer experiments". 

1135 Computer Science and Data Analysis Series, 2006. 

1136 .. [6] Damblin et al., "Numerical studies of space filling designs: 

1137 optimization of Latin Hypercube Samples and subprojection properties." 

1138 Journal of Simulation, 2013. 

1139 .. [7] A. B. Owen , "Orthogonal arrays for computer experiments, 

1140 integration and visualization." Statistica Sinica, 1992. 

1141 .. [8] B. Tang, "Orthogonal Array-Based Latin Hypercubes." 

1142 Journal of the American Statistical Association, 1993. 

1143 

1144 Examples 

1145 -------- 

1146 Generate samples from a Latin hypercube generator. 

1147 

1148 >>> from scipy.stats import qmc 

1149 >>> sampler = qmc.LatinHypercube(d=2) 

1150 >>> sample = sampler.random(n=5) 

1151 >>> sample 

1152 array([[0.1545328 , 0.53664833], # random 

1153 [0.84052691, 0.06474907], 

1154 [0.52177809, 0.93343721], 

1155 [0.68033825, 0.36265316], 

1156 [0.26544879, 0.61163943]]) 

1157 

1158 Compute the quality of the sample using the discrepancy criterion. 

1159 

1160 >>> qmc.discrepancy(sample) 

1161 0.0196... # random 

1162 

1163 Samples can be scaled to bounds. 

1164 

1165 >>> l_bounds = [0, 2] 

1166 >>> u_bounds = [10, 5] 

1167 >>> qmc.scale(sample, l_bounds, u_bounds) 

1168 array([[1.54532796, 3.609945 ], # random 

1169 [8.40526909, 2.1942472 ], 

1170 [5.2177809 , 4.80031164], 

1171 [6.80338249, 3.08795949], 

1172 [2.65448791, 3.83491828]]) 

1173 

1174 Use the `optimization` keyword argument to produce a LHS with 

1175 lower discrepancy at higher computational cost. 

1176 

1177 >>> sampler = qmc.LatinHypercube(d=2, optimization="random-cd") 

1178 >>> sample = sampler.random(n=5) 

1179 >>> qmc.discrepancy(sample) 

1180 0.0176... # random 

1181 

1182 Use the `strength` keyword argument to produce an orthogonal array based 

1183 LHS of strength 2. In this case, the number of sample points must be the 

1184 square of a prime number. 

1185 

1186 >>> sampler = qmc.LatinHypercube(d=2, strength=2) 

1187 >>> sample = sampler.random(n=9) 

1188 >>> qmc.discrepancy(sample) 

1189 0.00526... # random 

1190 

1191 Options could be combined to produce an optimized centered 

1192 orthogonal array based LHS. After optimization, the result would not 

1193 be guaranteed to be of strength 2. 

1194 

1195 """ 

1196 

1197 def __init__( 

1198 self, d: IntNumber, *, centered: bool = False, 

1199 scramble: bool = True, 

1200 strength: int = 1, 

1201 optimization: Optional[Literal["random-cd", "lloyd"]] = None, 

1202 seed: SeedType = None 

1203 ) -> None: 

1204 if centered: 

1205 scramble = False 

1206 warnings.warn( 

1207 "'centered' is deprecated and will be removed in SciPy 1.12." 

1208 " Please use 'scramble' instead. 'centered=True' corresponds" 

1209 " to 'scramble=False'.", 

1210 stacklevel=2 

1211 ) 

1212 

1213 # Used in `scipy.integrate.qmc_quad` 

1214 self._init_quad = {'d': d, 'scramble': True, 'strength': strength, 

1215 'optimization': optimization} 

1216 super().__init__(d=d, seed=seed, optimization=optimization) 

1217 self.scramble = scramble 

1218 

1219 lhs_method_strength = { 

1220 1: self._random_lhs, 

1221 2: self._random_oa_lhs 

1222 } 

1223 

1224 try: 

1225 self.lhs_method: Callable = lhs_method_strength[strength] 

1226 except KeyError as exc: 

1227 message = (f"{strength!r} is not a valid strength. It must be one" 

1228 f" of {set(lhs_method_strength)!r}") 

1229 raise ValueError(message) from exc 

1230 

1231 def _random( 

1232 self, n: IntNumber = 1, *, workers: IntNumber = 1 

1233 ) -> np.ndarray: 

1234 lhs = self.lhs_method(n) 

1235 return lhs 

1236 

1237 def _random_lhs(self, n: IntNumber = 1) -> np.ndarray: 

1238 """Base LHS algorithm.""" 

1239 if not self.scramble: 

1240 samples: np.ndarray | float = 0.5 

1241 else: 

1242 samples = self.rng.uniform(size=(n, self.d)) 

1243 

1244 perms = np.tile(np.arange(1, n + 1), 

1245 (self.d, 1)) # type: ignore[arg-type] 

1246 for i in range(self.d): 

1247 self.rng.shuffle(perms[i, :]) 

1248 perms = perms.T 

1249 

1250 samples = (perms - samples) / n 

1251 return samples 

1252 

1253 def _random_oa_lhs(self, n: IntNumber = 4) -> np.ndarray: 

1254 """Orthogonal array based LHS of strength 2.""" 

1255 p = np.sqrt(n).astype(int) 

1256 n_row = p**2 

1257 n_col = p + 1 

1258 

1259 primes = primes_from_2_to(p + 1) 

1260 if p not in primes or n != n_row: 

1261 raise ValueError( 

1262 "n is not the square of a prime number. Close" 

1263 f" values are {primes[-2:]**2}" 

1264 ) 

1265 if self.d > p + 1: 

1266 raise ValueError("n is too small for d. Must be n > (d-1)**2") 

1267 

1268 oa_sample = np.zeros(shape=(n_row, n_col), dtype=int) 

1269 

1270 # OA of strength 2 

1271 arrays = np.tile(np.arange(p), (2, 1)) 

1272 oa_sample[:, :2] = np.stack(np.meshgrid(*arrays), 

1273 axis=-1).reshape(-1, 2) 

1274 for p_ in range(1, p): 

1275 oa_sample[:, 2+p_-1] = np.mod(oa_sample[:, 0] 

1276 + p_*oa_sample[:, 1], p) 

1277 

1278 # scramble the OA 

1279 oa_sample_ = np.empty(shape=(n_row, n_col), dtype=int) 

1280 for j in range(n_col): 

1281 perms = self.rng.permutation(p) 

1282 oa_sample_[:, j] = perms[oa_sample[:, j]] 

1283 

1284 # following is making a scrambled OA into an OA-LHS 

1285 oa_lhs_sample = np.zeros(shape=(n_row, n_col)) 

1286 lhs_engine = LatinHypercube(d=1, scramble=self.scramble, strength=1, 

1287 seed=self.rng) # type: QMCEngine 

1288 for j in range(n_col): 

1289 for k in range(p): 

1290 idx = oa_sample[:, j] == k 

1291 lhs = lhs_engine.random(p).flatten() 

1292 oa_lhs_sample[:, j][idx] = lhs + oa_sample[:, j][idx] 

1293 

1294 lhs_engine = lhs_engine.reset() 

1295 

1296 oa_lhs_sample /= p 

1297 

1298 return oa_lhs_sample[:, :self.d] # type: ignore 

1299 

1300 

1301class Sobol(QMCEngine): 

1302 """Engine for generating (scrambled) Sobol' sequences. 

1303 

1304 Sobol' sequences are low-discrepancy, quasi-random numbers. Points 

1305 can be drawn using two methods: 

1306 

1307 * `random_base2`: safely draw :math:`n=2^m` points. This method 

1308 guarantees the balance properties of the sequence. 

1309 * `random`: draw an arbitrary number of points from the 

1310 sequence. See warning below. 

1311 

1312 Parameters 

1313 ---------- 

1314 d : int 

1315 Dimensionality of the sequence. Max dimensionality is 21201. 

1316 scramble : bool, optional 

1317 If True, use LMS+shift scrambling. Otherwise, no scrambling is done. 

1318 Default is True. 

1319 bits : int, optional 

1320 Number of bits of the generator. Control the maximum number of points 

1321 that can be generated, which is ``2**bits``. Maximal value is 64. 

1322 It does not correspond to the return type, which is always 

1323 ``np.float64`` to prevent points from repeating themselves. 

1324 Default is None, which for backward compatibility, corresponds to 30. 

1325 

1326 .. versionadded:: 1.9.0 

1327 optimization : {None, "random-cd", "lloyd"}, optional 

1328 Whether to use an optimization scheme to improve the quality after 

1329 sampling. Note that this is a post-processing step that does not 

1330 guarantee that all properties of the sample will be conserved. 

1331 Default is None. 

1332 

1333 * ``random-cd``: random permutations of coordinates to lower the 

1334 centered discrepancy. The best sample based on the centered 

1335 discrepancy is constantly updated. Centered discrepancy-based 

1336 sampling shows better space-filling robustness toward 2D and 3D 

1337 subprojections compared to using other discrepancy measures. 

1338 * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm. 

1339 The process converges to equally spaced samples. 

1340 

1341 .. versionadded:: 1.10.0 

1342 seed : {None, int, `numpy.random.Generator`}, optional 

1343 If `seed` is an int or None, a new `numpy.random.Generator` is 

1344 created using ``np.random.default_rng(seed)``. 

1345 If `seed` is already a ``Generator`` instance, then the provided 

1346 instance is used. 

1347 

1348 Notes 

1349 ----- 

1350 Sobol' sequences [1]_ provide :math:`n=2^m` low discrepancy points in 

1351 :math:`[0,1)^{d}`. Scrambling them [3]_ makes them suitable for singular 

1352 integrands, provides a means of error estimation, and can improve their 

1353 rate of convergence. The scrambling strategy which is implemented is a 

1354 (left) linear matrix scramble (LMS) followed by a digital random shift 

1355 (LMS+shift) [2]_. 

1356 

1357 There are many versions of Sobol' sequences depending on their 

1358 'direction numbers'. This code uses direction numbers from [4]_. Hence, 

1359 the maximum number of dimension is 21201. The direction numbers have been 

1360 precomputed with search criterion 6 and can be retrieved at 

1361 https://web.maths.unsw.edu.au/~fkuo/sobol/. 

1362 

1363 .. warning:: 

1364 

1365 Sobol' sequences are a quadrature rule and they lose their balance 

1366 properties if one uses a sample size that is not a power of 2, or skips 

1367 the first point, or thins the sequence [5]_. 

1368 

1369 If :math:`n=2^m` points are not enough then one should take :math:`2^M` 

1370 points for :math:`M>m`. When scrambling, the number R of independent 

1371 replicates does not have to be a power of 2. 

1372 

1373 Sobol' sequences are generated to some number :math:`B` of bits. 

1374 After :math:`2^B` points have been generated, the sequence would 

1375 repeat. Hence, an error is raised. 

1376 The number of bits can be controlled with the parameter `bits`. 

1377 

1378 References 

1379 ---------- 

1380 .. [1] I. M. Sobol', "The distribution of points in a cube and the accurate 

1381 evaluation of integrals." Zh. Vychisl. Mat. i Mat. Phys., 7:784-802, 

1382 1967. 

1383 .. [2] J. Matousek, "On the L2-discrepancy for anchored boxes." 

1384 J. of Complexity 14, 527-556, 1998. 

1385 .. [3] Art B. Owen, "Scrambling Sobol and Niederreiter-Xing points." 

1386 Journal of Complexity, 14(4):466-489, December 1998. 

1387 .. [4] S. Joe and F. Y. Kuo, "Constructing sobol sequences with better 

1388 two-dimensional projections." SIAM Journal on Scientific Computing, 

1389 30(5):2635-2654, 2008. 

1390 .. [5] Art B. Owen, "On dropping the first Sobol' point." 

1391 :arxiv:`2008.08051`, 2020. 

1392 

1393 Examples 

1394 -------- 

1395 Generate samples from a low discrepancy sequence of Sobol'. 

1396 

1397 >>> from scipy.stats import qmc 

1398 >>> sampler = qmc.Sobol(d=2, scramble=False) 

1399 >>> sample = sampler.random_base2(m=3) 

1400 >>> sample 

1401 array([[0. , 0. ], 

1402 [0.5 , 0.5 ], 

1403 [0.75 , 0.25 ], 

1404 [0.25 , 0.75 ], 

1405 [0.375, 0.375], 

1406 [0.875, 0.875], 

1407 [0.625, 0.125], 

1408 [0.125, 0.625]]) 

1409 

1410 Compute the quality of the sample using the discrepancy criterion. 

1411 

1412 >>> qmc.discrepancy(sample) 

1413 0.013882107204860938 

1414 

1415 To continue an existing design, extra points can be obtained 

1416 by calling again `random_base2`. Alternatively, you can skip some 

1417 points like: 

1418 

1419 >>> _ = sampler.reset() 

1420 >>> _ = sampler.fast_forward(4) 

1421 >>> sample_continued = sampler.random_base2(m=2) 

1422 >>> sample_continued 

1423 array([[0.375, 0.375], 

1424 [0.875, 0.875], 

1425 [0.625, 0.125], 

1426 [0.125, 0.625]]) 

1427 

1428 Finally, samples can be scaled to bounds. 

1429 

1430 >>> l_bounds = [0, 2] 

1431 >>> u_bounds = [10, 5] 

1432 >>> qmc.scale(sample_continued, l_bounds, u_bounds) 

1433 array([[3.75 , 3.125], 

1434 [8.75 , 4.625], 

1435 [6.25 , 2.375], 

1436 [1.25 , 3.875]]) 

1437 

1438 """ 

1439 

1440 MAXDIM: ClassVar[int] = _MAXDIM 

1441 

1442 def __init__( 

1443 self, d: IntNumber, *, scramble: bool = True, 

1444 bits: Optional[IntNumber] = None, seed: SeedType = None, 

1445 optimization: Optional[Literal["random-cd", "lloyd"]] = None 

1446 ) -> None: 

1447 # Used in `scipy.integrate.qmc_quad` 

1448 self._init_quad = {'d': d, 'scramble': True, 'bits': bits, 

1449 'optimization': optimization} 

1450 

1451 super().__init__(d=d, optimization=optimization, seed=seed) 

1452 if d > self.MAXDIM: 

1453 raise ValueError( 

1454 f"Maximum supported dimensionality is {self.MAXDIM}." 

1455 ) 

1456 

1457 self.bits = bits 

1458 self.dtype_i: type 

1459 

1460 if self.bits is None: 

1461 self.bits = 30 

1462 

1463 if self.bits <= 32: 

1464 self.dtype_i = np.uint32 

1465 elif 32 < self.bits <= 64: 

1466 self.dtype_i = np.uint64 

1467 else: 

1468 raise ValueError("Maximum supported 'bits' is 64") 

1469 

1470 self.maxn = 2**self.bits 

1471 

1472 # v is d x maxbit matrix 

1473 self._sv: np.ndarray = np.zeros((d, self.bits), dtype=self.dtype_i) 

1474 _initialize_v(self._sv, dim=d, bits=self.bits) 

1475 

1476 if not scramble: 

1477 self._shift: np.ndarray = np.zeros(d, dtype=self.dtype_i) 

1478 else: 

1479 # scramble self._shift and self._sv 

1480 self._scramble() 

1481 

1482 self._quasi = self._shift.copy() 

1483 

1484 # normalization constant with the largest possible number 

1485 # calculate in Python to not overflow int with 2**64 

1486 self._scale = 1.0 / 2 ** self.bits 

1487 

1488 self._first_point = (self._quasi * self._scale).reshape(1, -1) 

1489 # explicit casting to float64 

1490 self._first_point = self._first_point.astype(np.float64) 

1491 

1492 def _scramble(self) -> None: 

1493 """Scramble the sequence using LMS+shift.""" 

1494 # Generate shift vector 

1495 self._shift = np.dot( 

1496 rng_integers(self.rng, 2, size=(self.d, self.bits), 

1497 dtype=self.dtype_i), 

1498 2 ** np.arange(self.bits, dtype=self.dtype_i), 

1499 ) 

1500 # Generate lower triangular matrices (stacked across dimensions) 

1501 ltm = np.tril(rng_integers(self.rng, 2, 

1502 size=(self.d, self.bits, self.bits), 

1503 dtype=self.dtype_i)) 

1504 _cscramble( 

1505 dim=self.d, bits=self.bits, # type: ignore[arg-type] 

1506 ltm=ltm, sv=self._sv 

1507 ) 

1508 

1509 def _random( 

1510 self, n: IntNumber = 1, *, workers: IntNumber = 1 

1511 ) -> np.ndarray: 

1512 """Draw next point(s) in the Sobol' sequence. 

1513 

1514 Parameters 

1515 ---------- 

1516 n : int, optional 

1517 Number of samples to generate in the parameter space. Default is 1. 

1518 

1519 Returns 

1520 ------- 

1521 sample : array_like (n, d) 

1522 Sobol' sample. 

1523 

1524 """ 

1525 sample: np.ndarray = np.empty((n, self.d), dtype=np.float64) 

1526 

1527 if n == 0: 

1528 return sample 

1529 

1530 total_n = self.num_generated + n 

1531 if total_n > self.maxn: 

1532 msg = ( 

1533 f"At most 2**{self.bits}={self.maxn} distinct points can be " 

1534 f"generated. {self.num_generated} points have been previously " 

1535 f"generated, then: n={self.num_generated}+{n}={total_n}. " 

1536 ) 

1537 if self.bits != 64: 

1538 msg += "Consider increasing `bits`." 

1539 raise ValueError(msg) 

1540 

1541 if self.num_generated == 0: 

1542 # verify n is 2**n 

1543 if not (n & (n - 1) == 0): 

1544 warnings.warn("The balance properties of Sobol' points require" 

1545 " n to be a power of 2.", stacklevel=2) 

1546 

1547 if n == 1: 

1548 sample = self._first_point 

1549 else: 

1550 _draw( 

1551 n=n - 1, num_gen=self.num_generated, dim=self.d, 

1552 scale=self._scale, sv=self._sv, quasi=self._quasi, 

1553 sample=sample 

1554 ) 

1555 sample = np.concatenate( 

1556 [self._first_point, sample] 

1557 )[:n] # type: ignore[misc] 

1558 else: 

1559 _draw( 

1560 n=n, num_gen=self.num_generated - 1, dim=self.d, 

1561 scale=self._scale, sv=self._sv, quasi=self._quasi, 

1562 sample=sample 

1563 ) 

1564 

1565 return sample 

1566 

1567 def random_base2(self, m: IntNumber) -> np.ndarray: 

1568 """Draw point(s) from the Sobol' sequence. 

1569 

1570 This function draws :math:`n=2^m` points in the parameter space 

1571 ensuring the balance properties of the sequence. 

1572 

1573 Parameters 

1574 ---------- 

1575 m : int 

1576 Logarithm in base 2 of the number of samples; i.e., n = 2^m. 

1577 

1578 Returns 

1579 ------- 

1580 sample : array_like (n, d) 

1581 Sobol' sample. 

1582 

1583 """ 

1584 n = 2 ** m 

1585 

1586 total_n = self.num_generated + n 

1587 if not (total_n & (total_n - 1) == 0): 

1588 raise ValueError("The balance properties of Sobol' points require " 

1589 "n to be a power of 2. {0} points have been " 

1590 "previously generated, then: n={0}+2**{1}={2}. " 

1591 "If you still want to do this, the function " 

1592 "'Sobol.random()' can be used." 

1593 .format(self.num_generated, m, total_n)) 

1594 

1595 return self.random(n) 

1596 

1597 def reset(self) -> Sobol: 

1598 """Reset the engine to base state. 

1599 

1600 Returns 

1601 ------- 

1602 engine : Sobol 

1603 Engine reset to its base state. 

1604 

1605 """ 

1606 super().reset() 

1607 self._quasi = self._shift.copy() 

1608 return self 

1609 

1610 def fast_forward(self, n: IntNumber) -> Sobol: 

1611 """Fast-forward the sequence by `n` positions. 

1612 

1613 Parameters 

1614 ---------- 

1615 n : int 

1616 Number of points to skip in the sequence. 

1617 

1618 Returns 

1619 ------- 

1620 engine : Sobol 

1621 The fast-forwarded engine. 

1622 

1623 """ 

1624 if self.num_generated == 0: 

1625 _fast_forward( 

1626 n=n - 1, num_gen=self.num_generated, dim=self.d, 

1627 sv=self._sv, quasi=self._quasi 

1628 ) 

1629 else: 

1630 _fast_forward( 

1631 n=n, num_gen=self.num_generated - 1, dim=self.d, 

1632 sv=self._sv, quasi=self._quasi 

1633 ) 

1634 self.num_generated += n 

1635 return self 

1636 

1637 

1638class PoissonDisk(QMCEngine): 

1639 """Poisson disk sampling. 

1640 

1641 Parameters 

1642 ---------- 

1643 d : int 

1644 Dimension of the parameter space. 

1645 radius : float 

1646 Minimal distance to keep between points when sampling new candidates. 

1647 hypersphere : {"volume", "surface"}, optional 

1648 Sampling strategy to generate potential candidates to be added in the 

1649 final sample. Default is "volume". 

1650 

1651 * ``volume``: original Bridson algorithm as described in [1]_. 

1652 New candidates are sampled *within* the hypersphere. 

1653 * ``surface``: only sample the surface of the hypersphere. 

1654 ncandidates : int 

1655 Number of candidates to sample per iteration. More candidates result 

1656 in a denser sampling as more candidates can be accepted per iteration. 

1657 optimization : {None, "random-cd", "lloyd"}, optional 

1658 Whether to use an optimization scheme to improve the quality after 

1659 sampling. Note that this is a post-processing step that does not 

1660 guarantee that all properties of the sample will be conserved. 

1661 Default is None. 

1662 

1663 * ``random-cd``: random permutations of coordinates to lower the 

1664 centered discrepancy. The best sample based on the centered 

1665 discrepancy is constantly updated. Centered discrepancy-based 

1666 sampling shows better space-filling robustness toward 2D and 3D 

1667 subprojections compared to using other discrepancy measures. 

1668 * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm. 

1669 The process converges to equally spaced samples. 

1670 

1671 .. versionadded:: 1.10.0 

1672 seed : {None, int, `numpy.random.Generator`}, optional 

1673 If `seed` is an int or None, a new `numpy.random.Generator` is 

1674 created using ``np.random.default_rng(seed)``. 

1675 If `seed` is already a ``Generator`` instance, then the provided 

1676 instance is used. 

1677 

1678 Notes 

1679 ----- 

1680 Poisson disk sampling is an iterative sampling strategy. Starting from 

1681 a seed sample, `ncandidates` are sampled in the hypersphere 

1682 surrounding the seed. Candidates bellow a certain `radius` or outside the 

1683 domain are rejected. New samples are added in a pool of sample seed. The 

1684 process stops when the pool is empty or when the number of required 

1685 samples is reached. 

1686 

1687 The maximum number of point that a sample can contain is directly linked 

1688 to the `radius`. As the dimension of the space increases, a higher radius 

1689 spreads the points further and help overcome the curse of dimensionality. 

1690 See the :ref:`quasi monte carlo tutorial <quasi-monte-carlo>` for more 

1691 details. 

1692 

1693 .. warning:: 

1694 

1695 The algorithm is more suitable for low dimensions and sampling size 

1696 due to its iterative nature and memory requirements. 

1697 Selecting a small radius with a high dimension would 

1698 mean that the space could contain more samples than using lower 

1699 dimension or a bigger radius. 

1700 

1701 Some code taken from [2]_, written consent given on 31.03.2021 

1702 by the original author, Shamis, for free use in SciPy under 

1703 the 3-clause BSD. 

1704 

1705 References 

1706 ---------- 

1707 .. [1] Robert Bridson, "Fast Poisson Disk Sampling in Arbitrary 

1708 Dimensions." SIGGRAPH, 2007. 

1709 .. [2] `StackOverflow <https://stackoverflow.com/questions/66047540>`__. 

1710 

1711 Examples 

1712 -------- 

1713 Generate a 2D sample using a `radius` of 0.2. 

1714 

1715 >>> import numpy as np 

1716 >>> import matplotlib.pyplot as plt 

1717 >>> from matplotlib.collections import PatchCollection 

1718 >>> from scipy.stats import qmc 

1719 >>> 

1720 >>> rng = np.random.default_rng() 

1721 >>> radius = 0.2 

1722 >>> engine = qmc.PoissonDisk(d=2, radius=radius, seed=rng) 

1723 >>> sample = engine.random(20) 

1724 

1725 Visualizing the 2D sample and showing that no points are closer than 

1726 `radius`. ``radius/2`` is used to visualize non-intersecting circles. 

1727 If two samples are exactly at `radius` from each other, then their circle 

1728 of radius ``radius/2`` will touch. 

1729 

1730 >>> fig, ax = plt.subplots() 

1731 >>> _ = ax.scatter(sample[:, 0], sample[:, 1]) 

1732 >>> circles = [plt.Circle((xi, yi), radius=radius/2, fill=False) 

1733 ... for xi, yi in sample] 

1734 >>> collection = PatchCollection(circles, match_original=True) 

1735 >>> ax.add_collection(collection) 

1736 >>> _ = ax.set(aspect='equal', xlabel=r'$x_1$', ylabel=r'$x_2$', 

1737 ... xlim=[0, 1], ylim=[0, 1]) 

1738 >>> plt.show() 

1739 

1740 Such visualization can be seen as circle packing: how many circle can 

1741 we put in the space. It is a np-hard problem. The method `fill_space` 

1742 can be used to add samples until no more samples can be added. This is 

1743 a hard problem and parameters may need to be adjusted manually. Beware of 

1744 the dimension: as the dimensionality increases, the number of samples 

1745 required to fill the space increases exponentially 

1746 (curse-of-dimensionality). 

1747 

1748 """ 

1749 

1750 def __init__( 

1751 self, 

1752 d: IntNumber, 

1753 *, 

1754 radius: DecimalNumber = 0.05, 

1755 hypersphere: Literal["volume", "surface"] = "volume", 

1756 ncandidates: IntNumber = 30, 

1757 optimization: Optional[Literal["random-cd", "lloyd"]] = None, 

1758 seed: SeedType = None 

1759 ) -> None: 

1760 # Used in `scipy.integrate.qmc_quad` 

1761 self._init_quad = {'d': d, 'radius': radius, 

1762 'hypersphere': hypersphere, 

1763 'ncandidates': ncandidates, 

1764 'optimization': optimization} 

1765 super().__init__(d=d, optimization=optimization, seed=seed) 

1766 

1767 hypersphere_sample = { 

1768 "volume": self._hypersphere_volume_sample, 

1769 "surface": self._hypersphere_surface_sample 

1770 } 

1771 

1772 try: 

1773 self.hypersphere_method = hypersphere_sample[hypersphere] 

1774 except KeyError as exc: 

1775 message = ( 

1776 f"{hypersphere!r} is not a valid hypersphere sampling" 

1777 f" method. It must be one of {set(hypersphere_sample)!r}") 

1778 raise ValueError(message) from exc 

1779 

1780 # size of the sphere from which the samples are drawn relative to the 

1781 # size of a disk (radius) 

1782 # for the surface sampler, all new points are almost exactly 1 radius 

1783 # away from at least one existing sample +eps to avoid rejection 

1784 self.radius_factor = 2 if hypersphere == "volume" else 1.001 

1785 self.radius = radius 

1786 self.radius_squared = self.radius**2 

1787 

1788 # sample to generate per iteration in the hypersphere around center 

1789 self.ncandidates = ncandidates 

1790 

1791 with np.errstate(divide='ignore'): 

1792 self.cell_size = self.radius / np.sqrt(self.d) 

1793 self.grid_size = ( 

1794 np.ceil(np.ones(self.d) / self.cell_size) 

1795 ).astype(int) 

1796 

1797 self._initialize_grid_pool() 

1798 

1799 def _initialize_grid_pool(self): 

1800 """Sampling pool and sample grid.""" 

1801 self.sample_pool = [] 

1802 # Positions of cells 

1803 # n-dim value for each grid cell 

1804 self.sample_grid = np.empty( 

1805 np.append(self.grid_size, self.d), 

1806 dtype=np.float32 

1807 ) 

1808 # Initialise empty cells with NaNs 

1809 self.sample_grid.fill(np.nan) 

1810 

1811 def _random( 

1812 self, n: IntNumber = 1, *, workers: IntNumber = 1 

1813 ) -> np.ndarray: 

1814 """Draw `n` in the interval ``[0, 1]``. 

1815 

1816 Note that it can return fewer samples if the space is full. 

1817 See the note section of the class. 

1818 

1819 Parameters 

1820 ---------- 

1821 n : int, optional 

1822 Number of samples to generate in the parameter space. Default is 1. 

1823 

1824 Returns 

1825 ------- 

1826 sample : array_like (n, d) 

1827 QMC sample. 

1828 

1829 """ 

1830 if n == 0 or self.d == 0: 

1831 return np.empty((n, self.d)) 

1832 

1833 def in_limits(sample: np.ndarray) -> bool: 

1834 return (sample.max() <= 1.) and (sample.min() >= 0.) 

1835 

1836 def in_neighborhood(candidate: np.ndarray, n: int = 2) -> bool: 

1837 """ 

1838 Check if there are samples closer than ``radius_squared`` to the 

1839 `candidate` sample. 

1840 """ 

1841 indices = (candidate / self.cell_size).astype(int) 

1842 ind_min = np.maximum(indices - n, np.zeros(self.d, dtype=int)) 

1843 ind_max = np.minimum(indices + n + 1, self.grid_size) 

1844 

1845 # Check if the center cell is empty 

1846 if not np.isnan(self.sample_grid[tuple(indices)][0]): 

1847 return True 

1848 

1849 a = [slice(ind_min[i], ind_max[i]) for i in range(self.d)] 

1850 

1851 # guards against: invalid value encountered in less as we are 

1852 # comparing with nan and returns False. Which is wanted. 

1853 with np.errstate(invalid='ignore'): 

1854 if np.any( 

1855 np.sum( 

1856 np.square(candidate - self.sample_grid[tuple(a)]), 

1857 axis=self.d 

1858 ) < self.radius_squared 

1859 ): 

1860 return True 

1861 

1862 return False 

1863 

1864 def add_sample(candidate: np.ndarray) -> None: 

1865 self.sample_pool.append(candidate) 

1866 indices = (candidate / self.cell_size).astype(int) 

1867 self.sample_grid[tuple(indices)] = candidate 

1868 curr_sample.append(candidate) 

1869 

1870 curr_sample: List[np.ndarray] = [] 

1871 

1872 if len(self.sample_pool) == 0: 

1873 # the pool is being initialized with a single random sample 

1874 add_sample(self.rng.random(self.d)) 

1875 num_drawn = 1 

1876 else: 

1877 num_drawn = 0 

1878 

1879 # exhaust sample pool to have up to n sample 

1880 while len(self.sample_pool) and num_drawn < n: 

1881 # select a sample from the available pool 

1882 idx_center = rng_integers(self.rng, len(self.sample_pool)) 

1883 center = self.sample_pool[idx_center] 

1884 del self.sample_pool[idx_center] 

1885 

1886 # generate candidates around the center sample 

1887 candidates = self.hypersphere_method( 

1888 center, self.radius * self.radius_factor, self.ncandidates 

1889 ) 

1890 

1891 # keep candidates that satisfy some conditions 

1892 for candidate in candidates: 

1893 if in_limits(candidate) and not in_neighborhood(candidate): 

1894 add_sample(candidate) 

1895 

1896 num_drawn += 1 

1897 if num_drawn >= n: 

1898 break 

1899 

1900 self.num_generated += num_drawn 

1901 return np.array(curr_sample) 

1902 

1903 def fill_space(self) -> np.ndarray: 

1904 """Draw ``n`` samples in the interval ``[0, 1]``. 

1905 

1906 Unlike `random`, this method will try to add points until 

1907 the space is full. Depending on ``candidates`` (and to a lesser extent 

1908 other parameters), some empty areas can still be present in the sample. 

1909 

1910 .. warning:: 

1911 

1912 This can be extremely slow in high dimensions or if the 

1913 ``radius`` is very small-with respect to the dimensionality. 

1914 

1915 Returns 

1916 ------- 

1917 sample : array_like (n, d) 

1918 QMC sample. 

1919 

1920 """ 

1921 return self.random(np.inf) # type: ignore[arg-type] 

1922 

1923 def reset(self) -> PoissonDisk: 

1924 """Reset the engine to base state. 

1925 

1926 Returns 

1927 ------- 

1928 engine : PoissonDisk 

1929 Engine reset to its base state. 

1930 

1931 """ 

1932 super().reset() 

1933 self._initialize_grid_pool() 

1934 return self 

1935 

1936 def _hypersphere_volume_sample( 

1937 self, center: np.ndarray, radius: DecimalNumber, 

1938 candidates: IntNumber = 1 

1939 ) -> np.ndarray: 

1940 """Uniform sampling within hypersphere.""" 

1941 # should remove samples within r/2 

1942 x = self.rng.standard_normal(size=(candidates, self.d)) 

1943 ssq = np.sum(x**2, axis=1) 

1944 fr = radius * gammainc(self.d/2, ssq/2)**(1/self.d) / np.sqrt(ssq) 

1945 fr_tiled = np.tile( 

1946 fr.reshape(-1, 1), (1, self.d) # type: ignore[arg-type] 

1947 ) 

1948 p = center + np.multiply(x, fr_tiled) 

1949 return p 

1950 

1951 def _hypersphere_surface_sample( 

1952 self, center: np.ndarray, radius: DecimalNumber, 

1953 candidates: IntNumber = 1 

1954 ) -> np.ndarray: 

1955 """Uniform sampling on the hypersphere's surface.""" 

1956 vec = self.rng.standard_normal(size=(candidates, self.d)) 

1957 vec /= np.linalg.norm(vec, axis=1)[:, None] 

1958 p = center + np.multiply(vec, radius) 

1959 return p 

1960 

1961 

1962class MultivariateNormalQMC: 

1963 r"""QMC sampling from a multivariate Normal :math:`N(\mu, \Sigma)`. 

1964 

1965 Parameters 

1966 ---------- 

1967 mean : array_like (d,) 

1968 The mean vector. Where ``d`` is the dimension. 

1969 cov : array_like (d, d), optional 

1970 The covariance matrix. If omitted, use `cov_root` instead. 

1971 If both `cov` and `cov_root` are omitted, use the identity matrix. 

1972 cov_root : array_like (d, d'), optional 

1973 A root decomposition of the covariance matrix, where ``d'`` may be less 

1974 than ``d`` if the covariance is not full rank. If omitted, use `cov`. 

1975 inv_transform : bool, optional 

1976 If True, use inverse transform instead of Box-Muller. Default is True. 

1977 engine : QMCEngine, optional 

1978 Quasi-Monte Carlo engine sampler. If None, `Sobol` is used. 

1979 seed : {None, int, `numpy.random.Generator`}, optional 

1980 Used only if `engine` is None. 

1981 If `seed` is an int or None, a new `numpy.random.Generator` is 

1982 created using ``np.random.default_rng(seed)``. 

1983 If `seed` is already a ``Generator`` instance, then the provided 

1984 instance is used. 

1985 

1986 Examples 

1987 -------- 

1988 >>> import matplotlib.pyplot as plt 

1989 >>> from scipy.stats import qmc 

1990 >>> dist = qmc.MultivariateNormalQMC(mean=[0, 5], cov=[[1, 0], [0, 1]]) 

1991 >>> sample = dist.random(512) 

1992 >>> _ = plt.scatter(sample[:, 0], sample[:, 1]) 

1993 >>> plt.show() 

1994 

1995 """ 

1996 

1997 def __init__( 

1998 self, mean: npt.ArrayLike, cov: Optional[npt.ArrayLike] = None, *, 

1999 cov_root: Optional[npt.ArrayLike] = None, 

2000 inv_transform: bool = True, 

2001 engine: Optional[QMCEngine] = None, 

2002 seed: SeedType = None 

2003 ) -> None: 

2004 mean = np.array(mean, copy=False, ndmin=1) 

2005 d = mean.shape[0] 

2006 if cov is not None: 

2007 # covariance matrix provided 

2008 cov = np.array(cov, copy=False, ndmin=2) 

2009 # check for square/symmetric cov matrix and mean vector has the 

2010 # same d 

2011 if not mean.shape[0] == cov.shape[0]: 

2012 raise ValueError("Dimension mismatch between mean and " 

2013 "covariance.") 

2014 if not np.allclose(cov, cov.transpose()): 

2015 raise ValueError("Covariance matrix is not symmetric.") 

2016 # compute Cholesky decomp; if it fails, do the eigen decomposition 

2017 try: 

2018 cov_root = np.linalg.cholesky(cov).transpose() 

2019 except np.linalg.LinAlgError: 

2020 eigval, eigvec = np.linalg.eigh(cov) 

2021 if not np.all(eigval >= -1.0e-8): 

2022 raise ValueError("Covariance matrix not PSD.") 

2023 eigval = np.clip(eigval, 0.0, None) 

2024 cov_root = (eigvec * np.sqrt(eigval)).transpose() 

2025 elif cov_root is not None: 

2026 # root decomposition provided 

2027 cov_root = np.atleast_2d(cov_root) 

2028 if not mean.shape[0] == cov_root.shape[0]: 

2029 raise ValueError("Dimension mismatch between mean and " 

2030 "covariance.") 

2031 else: 

2032 # corresponds to identity covariance matrix 

2033 cov_root = None 

2034 

2035 self._inv_transform = inv_transform 

2036 

2037 if not inv_transform: 

2038 # to apply Box-Muller, we need an even number of dimensions 

2039 engine_dim = 2 * math.ceil(d / 2) 

2040 else: 

2041 engine_dim = d 

2042 if engine is None: 

2043 self.engine = Sobol( 

2044 d=engine_dim, scramble=True, bits=30, seed=seed 

2045 ) # type: QMCEngine 

2046 elif isinstance(engine, QMCEngine): 

2047 if engine.d != engine_dim: 

2048 raise ValueError("Dimension of `engine` must be consistent" 

2049 " with dimensions of mean and covariance." 

2050 " If `inv_transform` is False, it must be" 

2051 " an even number.") 

2052 self.engine = engine 

2053 else: 

2054 raise ValueError("`engine` must be an instance of " 

2055 "`scipy.stats.qmc.QMCEngine` or `None`.") 

2056 

2057 self._mean = mean 

2058 self._corr_matrix = cov_root 

2059 

2060 self._d = d 

2061 

2062 def random(self, n: IntNumber = 1) -> np.ndarray: 

2063 """Draw `n` QMC samples from the multivariate Normal. 

2064 

2065 Parameters 

2066 ---------- 

2067 n : int, optional 

2068 Number of samples to generate in the parameter space. Default is 1. 

2069 

2070 Returns 

2071 ------- 

2072 sample : array_like (n, d) 

2073 Sample. 

2074 

2075 """ 

2076 base_samples = self._standard_normal_samples(n) 

2077 return self._correlate(base_samples) 

2078 

2079 def _correlate(self, base_samples: np.ndarray) -> np.ndarray: 

2080 if self._corr_matrix is not None: 

2081 return base_samples @ self._corr_matrix + self._mean 

2082 else: 

2083 # avoid multiplying with identity here 

2084 return base_samples + self._mean 

2085 

2086 def _standard_normal_samples(self, n: IntNumber = 1) -> np.ndarray: 

2087 """Draw `n` QMC samples from the standard Normal :math:`N(0, I_d)`. 

2088 

2089 Parameters 

2090 ---------- 

2091 n : int, optional 

2092 Number of samples to generate in the parameter space. Default is 1. 

2093 

2094 Returns 

2095 ------- 

2096 sample : array_like (n, d) 

2097 Sample. 

2098 

2099 """ 

2100 # get base samples 

2101 samples = self.engine.random(n) 

2102 if self._inv_transform: 

2103 # apply inverse transform 

2104 # (values to close to 0/1 result in inf values) 

2105 return stats.norm.ppf(0.5 + (1 - 1e-10) * (samples - 0.5)) # type: ignore[attr-defined] 

2106 else: 

2107 # apply Box-Muller transform (note: indexes starting from 1) 

2108 even = np.arange(0, samples.shape[-1], 2) 

2109 Rs = np.sqrt(-2 * np.log(samples[:, even])) 

2110 thetas = 2 * math.pi * samples[:, 1 + even] 

2111 cos = np.cos(thetas) 

2112 sin = np.sin(thetas) 

2113 transf_samples = np.stack([Rs * cos, Rs * sin], 

2114 -1).reshape(n, -1) 

2115 # make sure we only return the number of dimension requested 

2116 return transf_samples[:, : self._d] 

2117 

2118 

2119class MultinomialQMC: 

2120 r"""QMC sampling from a multinomial distribution. 

2121 

2122 Parameters 

2123 ---------- 

2124 pvals : array_like (k,) 

2125 Vector of probabilities of size ``k``, where ``k`` is the number 

2126 of categories. Elements must be non-negative and sum to 1. 

2127 n_trials : int 

2128 Number of trials. 

2129 engine : QMCEngine, optional 

2130 Quasi-Monte Carlo engine sampler. If None, `Sobol` is used. 

2131 seed : {None, int, `numpy.random.Generator`}, optional 

2132 Used only if `engine` is None. 

2133 If `seed` is an int or None, a new `numpy.random.Generator` is 

2134 created using ``np.random.default_rng(seed)``. 

2135 If `seed` is already a ``Generator`` instance, then the provided 

2136 instance is used. 

2137 

2138 Examples 

2139 -------- 

2140 Let's define 3 categories and for a given sample, the sum of the trials 

2141 of each category is 8. The number of trials per category is determined 

2142 by the `pvals` associated to each category. 

2143 Then, we sample this distribution 64 times. 

2144 

2145 >>> import matplotlib.pyplot as plt 

2146 >>> from scipy.stats import qmc 

2147 >>> dist = qmc.MultinomialQMC( 

2148 ... pvals=[0.2, 0.4, 0.4], n_trials=10, engine=qmc.Halton(d=1) 

2149 ... ) 

2150 >>> sample = dist.random(64) 

2151 

2152 We can plot the sample and verify that the median of number of trials 

2153 for each category is following the `pvals`. That would be 

2154 ``pvals * n_trials = [2, 4, 4]``. 

2155 

2156 >>> fig, ax = plt.subplots() 

2157 >>> ax.yaxis.get_major_locator().set_params(integer=True) 

2158 >>> _ = ax.boxplot(sample) 

2159 >>> ax.set(xlabel="Categories", ylabel="Trials") 

2160 >>> plt.show() 

2161 

2162 """ 

2163 

2164 def __init__( 

2165 self, pvals: npt.ArrayLike, n_trials: IntNumber, 

2166 *, engine: Optional[QMCEngine] = None, 

2167 seed: SeedType = None 

2168 ) -> None: 

2169 self.pvals = np.array(pvals, copy=False, ndmin=1) 

2170 if np.min(pvals) < 0: 

2171 raise ValueError('Elements of pvals must be non-negative.') 

2172 if not np.isclose(np.sum(pvals), 1): 

2173 raise ValueError('Elements of pvals must sum to 1.') 

2174 self.n_trials = n_trials 

2175 if engine is None: 

2176 self.engine = Sobol( 

2177 d=1, scramble=True, bits=30, seed=seed 

2178 ) # type: QMCEngine 

2179 elif isinstance(engine, QMCEngine): 

2180 if engine.d != 1: 

2181 raise ValueError("Dimension of `engine` must be 1.") 

2182 self.engine = engine 

2183 else: 

2184 raise ValueError("`engine` must be an instance of " 

2185 "`scipy.stats.qmc.QMCEngine` or `None`.") 

2186 

2187 def random(self, n: IntNumber = 1) -> np.ndarray: 

2188 """Draw `n` QMC samples from the multinomial distribution. 

2189 

2190 Parameters 

2191 ---------- 

2192 n : int, optional 

2193 Number of samples to generate in the parameter space. Default is 1. 

2194 

2195 Returns 

2196 ------- 

2197 samples : array_like (n, pvals) 

2198 Sample. 

2199 

2200 """ 

2201 sample = np.empty((n, len(self.pvals))) 

2202 for i in range(n): 

2203 base_draws = self.engine.random(self.n_trials).ravel() 

2204 p_cumulative = np.empty_like(self.pvals, dtype=float) 

2205 _fill_p_cumulative(np.array(self.pvals, dtype=float), p_cumulative) 

2206 sample_ = np.zeros_like(self.pvals, dtype=int) 

2207 _categorize(base_draws, p_cumulative, sample_) 

2208 sample[i] = sample_ 

2209 return sample 

2210 

2211 

2212def _select_optimizer( 

2213 optimization: Optional[Literal["random-cd", "lloyd"]], config: Dict 

2214) -> Optional[Callable]: 

2215 """A factory for optimization methods.""" 

2216 optimization_method: Dict[str, Callable] = { 

2217 "random-cd": _random_cd, 

2218 "lloyd": _lloyd_centroidal_voronoi_tessellation 

2219 } 

2220 

2221 optimizer: Optional[partial] 

2222 if optimization is not None: 

2223 try: 

2224 optimization = optimization.lower() # type: ignore[assignment] 

2225 optimizer_ = optimization_method[optimization] 

2226 except KeyError as exc: 

2227 message = (f"{optimization!r} is not a valid optimization" 

2228 f" method. It must be one of" 

2229 f" {set(optimization_method)!r}") 

2230 raise ValueError(message) from exc 

2231 

2232 # config 

2233 optimizer = partial(optimizer_, **config) 

2234 else: 

2235 optimizer = None 

2236 

2237 return optimizer 

2238 

2239 

2240def _random_cd( 

2241 best_sample: np.ndarray, n_iters: int, n_nochange: int, rng: GeneratorType, 

2242 **kwargs: Dict 

2243) -> np.ndarray: 

2244 """Optimal LHS on CD. 

2245 

2246 Create a base LHS and do random permutations of coordinates to 

2247 lower the centered discrepancy. 

2248 Because it starts with a normal LHS, it also works with the 

2249 `centered` keyword argument. 

2250 

2251 Two stopping criterion are used to stop the algorithm: at most, 

2252 `n_iters` iterations are performed; or if there is no improvement 

2253 for `n_nochange` consecutive iterations. 

2254 """ 

2255 del kwargs # only use keywords which are defined, needed by factory 

2256 

2257 n, d = best_sample.shape 

2258 

2259 if d == 0 or n == 0: 

2260 return np.empty((n, d)) 

2261 

2262 best_disc = discrepancy(best_sample) 

2263 

2264 if n == 1: 

2265 return best_sample 

2266 

2267 bounds = ([0, d - 1], 

2268 [0, n - 1], 

2269 [0, n - 1]) 

2270 

2271 n_nochange_ = 0 

2272 n_iters_ = 0 

2273 while n_nochange_ < n_nochange and n_iters_ < n_iters: 

2274 n_iters_ += 1 

2275 

2276 col = rng_integers(rng, *bounds[0], endpoint=True) # type: ignore[misc] 

2277 row_1 = rng_integers(rng, *bounds[1], endpoint=True) # type: ignore[misc] 

2278 row_2 = rng_integers(rng, *bounds[2], endpoint=True) # type: ignore[misc] 

2279 disc = _perturb_discrepancy(best_sample, 

2280 row_1, row_2, col, 

2281 best_disc) 

2282 if disc < best_disc: 

2283 best_sample[row_1, col], best_sample[row_2, col] = ( 

2284 best_sample[row_2, col], best_sample[row_1, col]) 

2285 

2286 best_disc = disc 

2287 n_nochange_ = 0 

2288 else: 

2289 n_nochange_ += 1 

2290 

2291 return best_sample 

2292 

2293 

2294def _l1_norm(sample: np.ndarray) -> float: 

2295 return distance.pdist(sample, 'cityblock').min() 

2296 

2297 

2298def _lloyd_iteration( 

2299 sample: np.ndarray, 

2300 decay: float, 

2301 qhull_options: str 

2302) -> np.ndarray: 

2303 """Lloyd-Max algorithm iteration. 

2304 

2305 Based on the implementation of Stéfan van der Walt: 

2306 

2307 https://github.com/stefanv/lloyd 

2308 

2309 which is: 

2310 

2311 Copyright (c) 2021-04-21 Stéfan van der Walt 

2312 https://github.com/stefanv/lloyd 

2313 MIT License 

2314 

2315 Parameters 

2316 ---------- 

2317 sample : array_like (n, d) 

2318 The sample to iterate on. 

2319 decay : float 

2320 Relaxation decay. A positive value would move the samples toward 

2321 their centroid, and negative value would move them away. 

2322 1 would move the samples to their centroid. 

2323 qhull_options : str 

2324 Additional options to pass to Qhull. See Qhull manual 

2325 for details. (Default: "Qbb Qc Qz Qj Qx" for ndim > 4 and 

2326 "Qbb Qc Qz Qj" otherwise.) 

2327 

2328 Returns 

2329 ------- 

2330 sample : array_like (n, d) 

2331 The sample after an iteration of Lloyd's algorithm. 

2332 

2333 """ 

2334 new_sample = np.empty_like(sample) 

2335 

2336 voronoi = Voronoi(sample, qhull_options=qhull_options) 

2337 

2338 for ii, idx in enumerate(voronoi.point_region): 

2339 # the region is a series of indices into self.voronoi.vertices 

2340 # remove samples at infinity, designated by index -1 

2341 region = [i for i in voronoi.regions[idx] if i != -1] 

2342 

2343 # get the vertices for this region 

2344 verts = voronoi.vertices[region] 

2345 

2346 # clipping would be wrong, we need to intersect 

2347 # verts = np.clip(verts, 0, 1) 

2348 

2349 # move samples towards centroids: 

2350 # Centroid in n-D is the mean for uniformly distributed nodes 

2351 # of a geometry. 

2352 centroid = np.mean(verts, axis=0) 

2353 new_sample[ii] = sample[ii] + (centroid - sample[ii]) * decay 

2354 

2355 # only update sample to centroid within the region 

2356 is_valid = np.all(np.logical_and(new_sample >= 0, new_sample <= 1), axis=1) 

2357 sample[is_valid] = new_sample[is_valid] 

2358 

2359 return sample 

2360 

2361 

2362def _lloyd_centroidal_voronoi_tessellation( 

2363 sample: npt.ArrayLike, 

2364 *, 

2365 tol: DecimalNumber = 1e-5, 

2366 maxiter: IntNumber = 10, 

2367 qhull_options: Optional[str] = None, 

2368 **kwargs: Dict 

2369) -> np.ndarray: 

2370 """Approximate Centroidal Voronoi Tessellation. 

2371 

2372 Perturb samples in N-dimensions using Lloyd-Max algorithm. 

2373 

2374 Parameters 

2375 ---------- 

2376 sample : array_like (n, d) 

2377 The sample to iterate on. With ``n`` the number of samples and ``d`` 

2378 the dimension. Samples must be in :math:`[0, 1]^d`, with ``d>=2``. 

2379 tol : float, optional 

2380 Tolerance for termination. If the min of the L1-norm over the samples 

2381 changes less than `tol`, it stops the algorithm. Default is 1e-5. 

2382 maxiter : int, optional 

2383 Maximum number of iterations. It will stop the algorithm even if 

2384 `tol` is above the threshold. 

2385 Too many iterations tend to cluster the samples as a hypersphere. 

2386 Default is 10. 

2387 qhull_options : str, optional 

2388 Additional options to pass to Qhull. See Qhull manual 

2389 for details. (Default: "Qbb Qc Qz Qj Qx" for ndim > 4 and 

2390 "Qbb Qc Qz Qj" otherwise.) 

2391 

2392 Returns 

2393 ------- 

2394 sample : array_like (n, d) 

2395 The sample after being processed by Lloyd-Max algorithm. 

2396 

2397 Notes 

2398 ----- 

2399 Lloyd-Max algorithm is an iterative process with the purpose of improving 

2400 the dispersion of samples. For given sample: (i) compute a Voronoi 

2401 Tessellation; (ii) find the centroid of each Voronoi cell; (iii) move the 

2402 samples toward the centroid of their respective cell. See [1]_, [2]_. 

2403 

2404 A relaxation factor is used to control how fast samples can move at each 

2405 iteration. This factor is starting at 2 and ending at 1 after `maxiter` 

2406 following an exponential decay. 

2407 

2408 The process converges to equally spaced samples. It implies that measures 

2409 like the discrepancy could suffer from too many iterations. On the other 

2410 hand, L1 and L2 distances should improve. This is especially true with 

2411 QMC methods which tend to favor the discrepancy over other criteria. 

2412 

2413 .. note:: 

2414 

2415 The current implementation does not intersect the Voronoi Tessellation 

2416 with the boundaries. This implies that for a low number of samples, 

2417 empirically below 20, no Voronoi cell is touching the boundaries. 

2418 Hence, samples cannot be moved close to the boundaries. 

2419 

2420 Further improvements could consider the samples at infinity so that 

2421 all boundaries are segments of some Voronoi cells. This would fix 

2422 the computation of the centroid position. 

2423 

2424 .. warning:: 

2425 

2426 The Voronoi Tessellation step is expensive and quickly becomes 

2427 intractable with dimensions as low as 10 even for a sample 

2428 of size as low as 1000. 

2429 

2430 .. versionadded:: 1.9.0 

2431 

2432 References 

2433 ---------- 

2434 .. [1] Lloyd. "Least Squares Quantization in PCM". 

2435 IEEE Transactions on Information Theory, 1982. 

2436 .. [2] Max J. "Quantizing for minimum distortion". 

2437 IEEE Transactions on Information Theory, 1960. 

2438 

2439 Examples 

2440 -------- 

2441 >>> import numpy as np 

2442 >>> from scipy.spatial import distance 

2443 >>> rng = np.random.default_rng() 

2444 >>> sample = rng.random((128, 2)) 

2445 

2446 .. note:: 

2447 

2448 The samples need to be in :math:`[0, 1]^d`. `scipy.stats.qmc.scale` 

2449 can be used to scale the samples from their 

2450 original bounds to :math:`[0, 1]^d`. And back to their original bounds. 

2451 

2452 Compute the quality of the sample using the L1 criterion. 

2453 

2454 >>> def l1_norm(sample): 

2455 ... return distance.pdist(sample, 'cityblock').min() 

2456 

2457 >>> l1_norm(sample) 

2458 0.00161... # random 

2459 

2460 Now process the sample using Lloyd's algorithm and check the improvement 

2461 on the L1. The value should increase. 

2462 

2463 >>> sample = _lloyd_centroidal_voronoi_tessellation(sample) 

2464 >>> l1_norm(sample) 

2465 0.0278... # random 

2466 

2467 """ 

2468 del kwargs # only use keywords which are defined, needed by factory 

2469 

2470 sample = np.asarray(sample).copy() 

2471 

2472 if not sample.ndim == 2: 

2473 raise ValueError('`sample` is not a 2D array') 

2474 

2475 if not sample.shape[1] >= 2: 

2476 raise ValueError('`sample` dimension is not >= 2') 

2477 

2478 # Checking that sample is within the hypercube 

2479 if (sample.max() > 1.) or (sample.min() < 0.): 

2480 raise ValueError('`sample` is not in unit hypercube') 

2481 

2482 if qhull_options is None: 

2483 qhull_options = 'Qbb Qc Qz QJ' 

2484 

2485 if sample.shape[1] >= 5: 

2486 qhull_options += ' Qx' 

2487 

2488 # Fit an exponential to be 2 at 0 and 1 at `maxiter`. 

2489 # The decay is used for relaxation. 

2490 # analytical solution for y=exp(-maxiter/x) - 0.1 

2491 root = -maxiter / np.log(0.1) 

2492 decay = [np.exp(-x / root)+0.9 for x in range(maxiter)] 

2493 

2494 l1_old = _l1_norm(sample=sample) 

2495 for i in range(maxiter): 

2496 sample = _lloyd_iteration( 

2497 sample=sample, decay=decay[i], 

2498 qhull_options=qhull_options, 

2499 ) 

2500 

2501 l1_new = _l1_norm(sample=sample) 

2502 

2503 if abs(l1_new - l1_old) < tol: 

2504 break 

2505 else: 

2506 l1_old = l1_new 

2507 

2508 return sample 

2509 

2510 

2511def _validate_workers(workers: IntNumber = 1) -> IntNumber: 

2512 """Validate `workers` based on platform and value. 

2513 

2514 Parameters 

2515 ---------- 

2516 workers : int, optional 

2517 Number of workers to use for parallel processing. If -1 is 

2518 given all CPU threads are used. Default is 1. 

2519 

2520 Returns 

2521 ------- 

2522 Workers : int 

2523 Number of CPU used by the algorithm 

2524 

2525 """ 

2526 workers = int(workers) 

2527 if workers == -1: 

2528 workers = os.cpu_count() # type: ignore[assignment] 

2529 if workers is None: 

2530 raise NotImplementedError( 

2531 "Cannot determine the number of cpus using os.cpu_count(), " 

2532 "cannot use -1 for the number of workers" 

2533 ) 

2534 elif workers <= 0: 

2535 raise ValueError(f"Invalid number of workers: {workers}, must be -1 " 

2536 "or > 0") 

2537 

2538 return workers 

2539 

2540 

2541def _validate_bounds( 

2542 l_bounds: npt.ArrayLike, u_bounds: npt.ArrayLike, d: int 

2543) -> Tuple[np.ndarray, ...]: 

2544 """Bounds input validation. 

2545 

2546 Parameters 

2547 ---------- 

2548 l_bounds, u_bounds : array_like (d,) 

2549 Lower and upper bounds. 

2550 d : int 

2551 Dimension to use for broadcasting. 

2552 

2553 Returns 

2554 ------- 

2555 l_bounds, u_bounds : array_like (d,) 

2556 Lower and upper bounds. 

2557 

2558 """ 

2559 try: 

2560 lower = np.broadcast_to(l_bounds, d) 

2561 upper = np.broadcast_to(u_bounds, d) 

2562 except ValueError as exc: 

2563 msg = ("'l_bounds' and 'u_bounds' must be broadcastable and respect" 

2564 " the sample dimension") 

2565 raise ValueError(msg) from exc 

2566 

2567 if not np.all(lower < upper): 

2568 raise ValueError("Bounds are not consistent 'l_bounds' < 'u_bounds'") 

2569 

2570 return lower, upper