Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/conjecture/providers.py: 45%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

335 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11import abc 

12import contextlib 

13import math 

14import sys 

15import warnings 

16from collections.abc import Iterable 

17from contextlib import AbstractContextManager, contextmanager 

18from functools import cached_property 

19from random import Random 

20from sys import float_info 

21from typing import ( 

22 TYPE_CHECKING, 

23 Any, 

24 ClassVar, 

25 Literal, 

26 Optional, 

27 TypeAlias, 

28 TypedDict, 

29 TypeVar, 

30) 

31 

32from sortedcontainers import SortedSet 

33 

34from hypothesis.errors import HypothesisWarning 

35from hypothesis.internal.cache import LRUCache 

36from hypothesis.internal.compat import WINDOWS, int_from_bytes 

37from hypothesis.internal.conjecture.choice import ( 

38 ChoiceConstraintsT, 

39 ChoiceT, 

40 ChoiceTypeT, 

41 FloatConstraints, 

42 choice_constraints_key, 

43 choice_permitted, 

44) 

45from hypothesis.internal.conjecture.floats import lex_to_float 

46from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes 

47from hypothesis.internal.conjecture.utils import ( 

48 INT_SIZES, 

49 INT_SIZES_SAMPLER, 

50 Sampler, 

51 many, 

52) 

53from hypothesis.internal.constants_ast import ( 

54 Constants, 

55 constants_from_module, 

56 is_local_module_file, 

57) 

58from hypothesis.internal.floats import ( 

59 SIGNALING_NAN, 

60 float_to_int, 

61 make_float_clamper, 

62 next_down, 

63 next_up, 

64) 

65from hypothesis.internal.intervalsets import IntervalSet 

66from hypothesis.internal.observability import InfoObservationType, TestCaseObservation 

67 

68if TYPE_CHECKING: 

69 from hypothesis.internal.conjecture.data import ConjectureData 

70 from hypothesis.internal.constants_ast import ConstantT 

71 

72T = TypeVar("T") 

73LifetimeT: TypeAlias = Literal["test_case", "test_function"] 

74COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large" 

75 

76 

77#: Registered Hypothesis backends. This is a dictionary where keys are the name 

78#: to be used in |settings.backend|. The value of a key can be either: 

79#: 

80#: * A string corresponding to an importable absolute path of a 

81#: |PrimitiveProvider| subclass 

82#: * A |PrimitiveProvider| subclass (the class itself, not an instance of the 

83#: class) 

84#: 

85#: Hypothesis will instantiate the corresponding |PrimitiveProvider| subclass 

86#: when the backend is requested by a test's |settings.backend| value. 

87#: 

88#: For example, the default Hypothesis backend is registered as: 

89#: 

90#: .. code-block:: python 

91#: 

92#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS 

93#: 

94#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider" 

95#: # or 

96#: AVAILABLE_PROVIDERS["hypothesis"] = HypothesisProvider 

97#: 

98#: And can be used with: 

99#: 

100#: .. code-block:: python 

101#: 

102#: from hypothesis import given, settings, strategies as st 

103#: 

104#: @given(st.integers()) 

105#: @settings(backend="hypothesis") 

106#: def f(n): 

107#: pass 

108#: 

109#: Though, as ``backend="hypothesis"`` is the default setting, the above would 

110#: typically not have any effect. 

111#: 

112#: For third-party backend authors, we strongly encourage ensuring that 

113#: ``import hypothesis`` does not automatically import the expensive parts of 

114#: your package, by: 

115#: 

116#: - setting a string path here, instead of a provider class 

117#: - ensuring the registered hypothesis plugin path references a path which just 

118#: sets AVAILABLE_PROVIDERS and does not import your package 

119AVAILABLE_PROVIDERS: dict[str, str | type["PrimitiveProvider"]] = { 

120 "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider", 

121 "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider", 

122} 

123# cache the choice_permitted constants for a particular set of constraints. 

124CacheKeyT: TypeAlias = tuple[ChoiceTypeT, tuple[Any, ...]] 

125CacheValueT: TypeAlias = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]] 

126CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024) 

127 

128 

129_constants_integers = ( 

130 # powers of 2 

131 [2**n for n in range(16, 66)] 

132 # powers of 10 

133 + [10**n for n in range(5, 20)] 

134 # factorials 

135 + [math.factorial(n) for n in range(9, 21)] 

136 # a few primorial numbers https://en.wikipedia.org/wiki/Primorial 

137 + [ 

138 510510, 

139 6469693230, 

140 304250263527210, 

141 32589158477190044730, 

142 ] 

143) 

144_constants_integers.extend( 

145 [n - 1 for n in _constants_integers] + [n + 1 for n in _constants_integers] 

146) 

147_constants_integers.extend([-x for x in _constants_integers]) 

148 

149# arbitrary cutoffs to keep our list bounded 

150assert all(50_000 <= abs(n) <= 2**66 for n in _constants_integers) 

151 

152_constant_floats = ( 

153 [ 

154 0.5, 

155 1.1, 

156 1.5, 

157 1.9, 

158 1.0 / 3, 

159 10e6, 

160 10e-6, 

161 1.175494351e-38, 

162 next_up(0.0), 

163 float_info.min, 

164 float_info.max, 

165 3.402823466e38, 

166 9007199254740992.0, 

167 1 - 10e-6, 

168 2 + 10e-6, 

169 1.192092896e-07, 

170 2.2204460492503131e-016, 

171 ] 

172 + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32 

173 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64 

174) 

175_constant_floats.extend([-x for x in _constant_floats]) 

176assert all(isinstance(f, float) for f in _constant_floats) 

177 

178_constant_strings = { 

179 # strings which can be interpreted as code / logic 

180 "undefined", 

181 "null", 

182 "NULL", 

183 "nil", 

184 "NIL", 

185 "true", 

186 "false", 

187 "True", 

188 "False", 

189 "TRUE", 

190 "FALSE", 

191 "None", 

192 "none", 

193 "if", 

194 "then", 

195 "else", 

196 "__dict__", 

197 "__proto__", # javascript 

198 # strings which can be interpreted as a number 

199 "0", 

200 "1e100", 

201 "0..0", 

202 "0/0", 

203 "1/0", 

204 "+0.0", 

205 "Infinity", 

206 "-Infinity", 

207 "Inf", 

208 "INF", 

209 "NaN", 

210 "9" * 30, 

211 # common ascii characters 

212 ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~", 

213 # common unicode characters 

214 "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±", 

215 # characters which increase in length when lowercased 

216 "Ⱥ", 

217 "Ⱦ", 

218 # ligatures 

219 "æœÆŒffʤʨß", 

220 # emoticons 

221 "(╯°□°)╯︵ ┻━┻)", 

222 # emojis 

223 "😍", 

224 "🇺🇸", 

225 # emoji modifiers 

226 "🏻", # U+1F3FB Light Skin Tone, 

227 "👍🏻", # 👍 followed by U+1F3FB 

228 # RTL text 

229 "الكل في المجمو عة", 

230 # Ogham text, which contains the only character in the Space Separators 

231 # unicode category (Zs) that isn't visually blank:  . # noqa: RUF003 

232 "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜", 

233 # thai consonant + spacing vowel combinations, which have unusual visual combining behavior 

234 "กา", 

235 "ก ำกำ", 

236 # readable variations on text (bolt/italic/script) 

237 "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠", 

238 "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌", 

239 "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈", 

240 "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰", 

241 "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘", 

242 # upsidown text 

243 "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥", 

244 # reserved strings in windows 

245 "NUL", 

246 "COM1", 

247 "LPT1", 

248 # scunthorpe problem 

249 "Scunthorpe", 

250 # zalgo text 

251 "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣", 

252 # 

253 # examples from https://faultlore.com/blah/text-hates-you/ 

254 "मनीष منش", 

255 "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله", 

256 "lorem لا بسم الله ipsum 你好1234你好", 

257 # unicode charaacters causing unconditional line breaks, as defined by UAX #14: 

258 # https://www.unicode.org/reports/tr14/. 

259 # 

260 # We've seen multiple bugs caused by assuming `str.splitlines` is equivalent to 

261 # splitting over "\n", while it actually splits over all line breaks! 

262 # 

263 # We intersperse the line breaks with normal characters to increase the likelihood 

264 # of triggering such a bug. 

265 ( 

266 "a" 

267 "\u000a" # line feed (class: LF) 

268 "b" 

269 "\u000d" # carriage return (class: CR) 

270 "c" 

271 "\u0085" # next line (class: NL) 

272 "d" 

273 "\u000b" # line tabulation (class: BK) 

274 "e" 

275 "\u000c" # form feed (class: BK) 

276 "f" 

277 "\u2028" # line separator (class: BK) 

278 "g" 

279 "\u2029" # paragraph separator (class: BK) 

280 "h" 

281 "\u000d\u000a" # CR+LF 

282 "i" 

283 ), 

284} 

285 

286 

287# we don't actually care what order the constants are sorted in, just that the 

288# ordering is deterministic. 

289GLOBAL_CONSTANTS = Constants( 

290 integers=SortedSet(_constants_integers), 

291 floats=SortedSet(_constant_floats, key=float_to_int), 

292 bytes=SortedSet(), 

293 strings=SortedSet(_constant_strings), 

294) 

295 

296_local_constants = Constants( 

297 integers=SortedSet(), 

298 floats=SortedSet(key=float_to_int), 

299 bytes=SortedSet(), 

300 strings=SortedSet(), 

301) 

302# modules that we've already seen and processed for local constants. These are 

303# are all modules, not necessarily local ones. This lets us quickly see which 

304# modules are new without an expensive path.resolve() or is_local_module_file 

305# cache lookup. 

306# We track by module object when hashable, falling back to the module name 

307# (str key in sys.modules) for unhashable entries like SimpleNamespace. 

308_seen_modules: set = set() 

309_sys_modules_len: int | None = None 

310 

311 

312def _get_local_constants() -> Constants: 

313 global _sys_modules_len, _local_constants 

314 

315 if sys.platform == "emscripten": # pragma: no cover 

316 # pyodide builds bundle the stdlib in a nonstandard location, like 

317 # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of 

318 # the stdlib as local code and slowing down on emscripten, instead return 

319 # that nothing is local. 

320 # 

321 # pyodide may provide some way to distinguish stdlib/third-party/local 

322 # code. I haven't looked into it. If they do, we should correctly implement 

323 # ModuleLocation for pyodide instead of this. 

324 return _local_constants 

325 

326 count_constants = len(_local_constants) 

327 # We call this function once per HypothesisProvider instance, i.e. once per 

328 # input, so it needs to be performant. The logic here is more complicated 

329 # than necessary because of this. 

330 # 

331 # First, we check whether there are any new modules with a very cheap length 

332 # check. This check can be fooled if a module is added while another module is 

333 # removed, but the more correct check against tuple(sys.modules.keys()) is 

334 # substantially more expensive. Such a new module would eventually be discovered 

335 # if / when the length changes again in the future. 

336 # 

337 # If the length has changed, we find just modules we haven't seen before. Of 

338 # those, we find the ones which correspond to local modules, and extract their 

339 # constants. 

340 

341 # careful: store sys.modules length when we first check to avoid race conditions 

342 # with other threads loading a module before we set _sys_modules_len. 

343 if (sys_modules_len := len(sys.modules)) != _sys_modules_len: 

344 new_modules = [] 

345 for name, module in list(sys.modules.items()): 

346 try: 

347 seen = module in _seen_modules 

348 except TypeError: 

349 # unhashable module (e.g. SimpleNamespace); fall back to name 

350 seen = name in _seen_modules 

351 if not seen: 

352 new_modules.append((name, module)) 

353 # Repeated SortedSet unions are expensive. Do the initial unions on a 

354 # set(), then do a one-time union with _local_constants after. 

355 new_constants = Constants() 

356 for name, module in new_modules: 

357 if ( 

358 module_file := getattr(module, "__file__", None) 

359 ) is not None and is_local_module_file(module_file): 

360 new_constants |= constants_from_module(module) 

361 try: 

362 _seen_modules.add(module) 

363 except TypeError: 

364 _seen_modules.add(name) 

365 _local_constants |= new_constants 

366 _sys_modules_len = sys_modules_len 

367 

368 # if we add any new constant, invalidate the constant cache for permitted values. 

369 # A more efficient approach would be invalidating just the keys with this 

370 # choice_type. 

371 if len(_local_constants) > count_constants: 

372 CONSTANTS_CACHE.cache.clear() 

373 

374 return _local_constants 

375 

376 

377@contextmanager 

378def with_register_backend(name, provider_cls): 

379 try: 

380 AVAILABLE_PROVIDERS[name] = provider_cls 

381 yield 

382 finally: 

383 del AVAILABLE_PROVIDERS[name] 

384 

385 

386class _BackendInfoMsg(TypedDict): 

387 type: InfoObservationType 

388 title: str 

389 content: str | dict[str, Any] 

390 

391 

392# TODO_DOCS: link to choice sequence explanation page 

393 

394 

395class PrimitiveProvider(abc.ABC): 

396 """ 

397 |PrimitiveProvider| is the implementation interface of a 

398 :ref:`Hypothesis backend <alternative-backends>`. 

399 

400 A |PrimitiveProvider| is required to implement the following five 

401 ``draw_*`` methods: 

402 

403 * |PrimitiveProvider.draw_integer| 

404 * |PrimitiveProvider.draw_boolean| 

405 * |PrimitiveProvider.draw_float| 

406 * |PrimitiveProvider.draw_string| 

407 * |PrimitiveProvider.draw_bytes| 

408 

409 Each strategy in Hypothesis generates values by drawing a series of choices 

410 from these five methods. By overriding them, a |PrimitiveProvider| can control 

411 the distribution of inputs generated by Hypothesis. 

412 

413 For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider| 

414 which uses an SMT solver to generate inputs that uncover new branches. 

415 

416 Once you implement a |PrimitiveProvider|, you can make it available for use 

417 through |AVAILABLE_PROVIDERS|. 

418 """ 

419 

420 #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function`` 

421 #: or ``test_case``. 

422 #: 

423 #: If ``test_function`` (the default), a single provider instance will be 

424 #: instantiated and used for the entirety of each test function (i.e., roughly 

425 #: one provider per |@given| annotation). This can be useful for tracking state 

426 #: over the entirety of a test function. 

427 #: 

428 #: If ``test_case``, a new provider instance will be instantiated and used for 

429 #: each input Hypothesis generates. 

430 #: 

431 #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will 

432 #: be ``None`` for a lifetime of ``test_function``, and an instance of 

433 #: ``ConjectureData`` for a lifetime of ``test_case``. 

434 #: 

435 #: Third-party providers likely want to set a lifetime of ``test_function``. 

436 lifetime: ClassVar[LifetimeT] = "test_function" 

437 

438 #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values 

439 #: which record operations performed on them in order to discover new paths. 

440 #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting 

441 #: with symbolic choices returned by the provider in any way that would force 

442 #: the solver to narrow the range of possible values for that symbolic. 

443 #: 

444 #: Setting this to ``True`` disables some hypothesis features and optimizations. 

445 #: Only set this to ``True`` if it is necessary for your backend. 

446 avoid_realization: ClassVar[bool] = False 

447 

448 #: If ``True``, |PrimitiveProvider.on_observation| will be added as a 

449 #: callback via |add_observability_callback|, enabling observability during 

450 # the lifetime of this provider. If ``False``, |PrimitiveProvider.on_observation| 

451 #: will never be called by Hypothesis. 

452 #: 

453 #: The opt-in behavior of observability is because enabling observability 

454 #: might increase runtime or memory usage. 

455 add_observability_callback: ClassVar[bool] = False 

456 

457 def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None: 

458 self._cd = conjecturedata 

459 

460 @abc.abstractmethod 

461 def draw_boolean( 

462 self, 

463 p: float = 0.5, 

464 ) -> bool: 

465 """ 

466 Draw a boolean choice. 

467 

468 Parameters 

469 ---------- 

470 p: float 

471 The probability of returning ``True``. Between 0 and 1 inclusive. 

472 

473 Except for ``0`` and ``1``, the value of ``p`` is a hint provided by 

474 Hypothesis, and may be ignored by the backend. 

475 

476 If ``0``, the provider must return ``False``. If ``1``, the provider 

477 must return ``True``. 

478 """ 

479 raise NotImplementedError 

480 

481 @abc.abstractmethod 

482 def draw_integer( 

483 self, 

484 min_value: int | None = None, 

485 max_value: int | None = None, 

486 *, 

487 weights: dict[int, float] | None = None, 

488 shrink_towards: int = 0, 

489 ) -> int: 

490 """ 

491 Draw an integer choice. 

492 

493 Parameters 

494 ---------- 

495 min_value : int | None 

496 (Inclusive) lower bound on the integer value. If ``None``, there is 

497 no lower bound. 

498 max_value : int | None 

499 (Inclusive) upper bound on the integer value. If ``None``, there is 

500 no upper bound. 

501 weights: dict[int, float] | None 

502 Maps keys in the range [``min_value``, ``max_value``] to the probability 

503 of returning that key. 

504 shrink_towards: int 

505 The integer to shrink towards. This is not used during generation and 

506 can be ignored by backends. 

507 """ 

508 raise NotImplementedError 

509 

510 @abc.abstractmethod 

511 def draw_float( 

512 self, 

513 *, 

514 min_value: float = -math.inf, 

515 max_value: float = math.inf, 

516 allow_nan: bool = True, 

517 smallest_nonzero_magnitude: float, 

518 ) -> float: 

519 """ 

520 Draw a float choice. 

521 

522 Parameters 

523 ---------- 

524 min_value : float 

525 (Inclusive) lower bound on the float value. 

526 max_value : float 

527 (Inclusive) upper bound on the float value. 

528 allow_nan : bool 

529 If ``False``, it is invalid to return ``math.nan``. 

530 smallest_nonzero_magnitude : float 

531 The smallest allowed nonzero magnitude. ``draw_float`` should not 

532 return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``. 

533 """ 

534 raise NotImplementedError 

535 

536 @abc.abstractmethod 

537 def draw_string( 

538 self, 

539 intervals: IntervalSet, 

540 *, 

541 min_size: int = 0, 

542 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

543 ) -> str: 

544 """ 

545 Draw a string choice. 

546 

547 Parameters 

548 ---------- 

549 intervals : IntervalSet 

550 The set of codepoints to sample from. 

551 min_size : int 

552 (Inclusive) lower bound on the string length. 

553 max_size : int 

554 (Inclusive) upper bound on the string length. 

555 """ 

556 raise NotImplementedError 

557 

558 @abc.abstractmethod 

559 def draw_bytes( 

560 self, 

561 min_size: int = 0, 

562 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

563 ) -> bytes: 

564 """ 

565 Draw a bytes choice. 

566 

567 Parameters 

568 ---------- 

569 min_size : int 

570 (Inclusive) lower bound on the bytes length. 

571 max_size : int 

572 (Inclusive) upper bound on the bytes length. 

573 """ 

574 raise NotImplementedError 

575 

576 def per_test_case_context_manager(self) -> AbstractContextManager: 

577 """ 

578 Returns a context manager which will be entered each time Hypothesis 

579 starts generating and executing one test case, and exited when that test 

580 case finishes generating and executing, including if any exception is 

581 thrown. 

582 

583 In the lifecycle of a Hypothesis test, this is called before 

584 generating strategy values for each test case. This is just before any 

585 :ref:`custom executor <custom-function-execution>` is called. 

586 

587 Even if not returning a custom context manager, |PrimitiveProvider| 

588 subclasses are welcome to override this method to know when Hypothesis 

589 starts and ends the execution of a single test case. 

590 """ 

591 return contextlib.nullcontext() 

592 

593 def realize(self, value: T, *, for_failure: bool = False) -> T: 

594 """ 

595 Called whenever hypothesis requires a concrete (non-symbolic) value from 

596 a potentially symbolic value. Hypothesis will not check that ``value`` is 

597 symbolic before calling ``realize``, so you should handle the case where 

598 ``value`` is non-symbolic. 

599 

600 The returned value should be non-symbolic. If you cannot provide a value, 

601 raise |BackendCannotProceed| with a value of ``"discard_test_case"``. 

602 

603 If ``for_failure`` is ``True``, the value is associated with a failing example. 

604 In this case, the backend should spend substantially more effort when 

605 attempting to realize the value, since it is important to avoid discarding 

606 failing examples. Backends may still raise |BackendCannotProceed| when 

607 ``for_failure`` is ``True``, if realization is truly impossible or if 

608 realization takes significantly longer than expected (say, 5 minutes). 

609 """ 

610 return value 

611 

612 def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None: 

613 """ 

614 Called when Hypothesis has discovered a choice sequence which the provider 

615 may wish to enqueue to replay under its own instrumentation when we next 

616 ask to generate a test case, rather than generating one from scratch. 

617 

618 This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus 

619 of high-code-coverage inputs discovered by 

620 `HypoFuzz <https://hypofuzz.com/>`_. 

621 """ 

622 return None 

623 

624 def observe_test_case(self) -> dict[str, Any]: 

625 """Called at the end of the test case when :ref:`observability 

626 <observability>` is enabled. 

627 

628 The return value should be a non-symbolic json-encodable dictionary, 

629 and will be included in observations as ``observation["metadata"]["backend"]``. 

630 """ 

631 return {} 

632 

633 def observe_information_messages( 

634 self, *, lifetime: LifetimeT 

635 ) -> Iterable[_BackendInfoMsg]: 

636 """Called at the end of each test case and again at end of the test function. 

637 

638 Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}`` 

639 dictionaries to be delivered as individual information messages. Hypothesis 

640 adds the ``run_start`` timestamp and ``property`` name for you. 

641 """ 

642 assert lifetime in ("test_case", "test_function") 

643 yield from [] 

644 

645 def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027 

646 """ 

647 Called at the end of each test case which uses this provider, with the same 

648 ``observation["type"] == "test_case"`` observation that is passed to 

649 other callbacks added via |add_observability_callback|. This method is not 

650 called with ``observation["type"] in {"info", "alert", "error"}`` 

651 observations. 

652 

653 .. important:: 

654 

655 For |PrimitiveProvider.on_observation| to be called by Hypothesis, 

656 |PrimitiveProvider.add_observability_callback| must be set to ``True``. 

657 

658 |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling 

659 observability might increase runtime or memory usage. 

660 

661 Calls to this method are guaranteed to alternate with calls to 

662 |PrimitiveProvider.per_test_case_context_manager|. For example: 

663 

664 .. code-block:: python 

665 

666 # test function starts 

667 per_test_case_context_manager() 

668 on_observation() 

669 per_test_case_context_manager() 

670 on_observation() 

671 ... 

672 # test function ends 

673 

674 Note that |PrimitiveProvider.on_observation| will not be called for test 

675 cases which did not use this provider during generation, for example 

676 during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched 

677 to the standard Hypothesis backend after this backend raised too many 

678 |BackendCannotProceed| exceptions. 

679 """ 

680 

681 def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop 

682 """Marks the beginning of a semantically meaningful span of choices. 

683 

684 Spans are a depth-first tree structure. A span is opened by a call to 

685 |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end| 

686 closes the most recently opened span. So the following sequence of calls: 

687 

688 .. code-block:: python 

689 

690 span_start(label=1) 

691 n1 = draw_integer() 

692 span_start(label=2) 

693 b1 = draw_boolean() 

694 n2 = draw_integer() 

695 span_end() 

696 f1 = draw_float() 

697 span_end() 

698 

699 produces the following two spans of choices: 

700 

701 .. code-block:: 

702 

703 1: [n1, b1, n2, f1] 

704 2: [b1, n2] 

705 

706 Hypothesis uses spans to denote "semantically meaningful" sequences of 

707 choices. For instance, Hypothesis opens a span for the sequence of choices 

708 made while drawing from each strategy. Not every span corresponds to a 

709 strategy; the generation of e.g. each element in |st.lists| is also marked 

710 with a span, among others. 

711 

712 ``label`` is an opaque integer, which has no defined semantics. 

713 The only guarantee made by Hypothesis is that all spans with the same 

714 "meaning" will share the same ``label``. So all spans from the same 

715 strategy will share the same label, as will e.g. the spans for |st.lists| 

716 elements. 

717 

718 Providers can track calls to |PrimitiveProvider.span_start| and 

719 |PrimitiveProvider.span_end| to learn something about the semantics of 

720 the test's choice sequence. For instance, a provider could track the depth 

721 of the span tree, or the number of unique labels, which says something about 

722 the complexity of the choices being generated. Or a provider could track 

723 the span tree across test cases in order to determine what strategies are 

724 being used in what contexts. 

725 

726 It is possible for Hypothesis to start and immediately stop a span, 

727 without calling a ``draw_*`` method in between. These spans contain zero 

728 choices. 

729 

730 Hypothesis will always balance the number of calls to 

731 |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call 

732 to |PrimitiveProvider.span_start| will always be followed by a call to 

733 |PrimitiveProvider.span_end| before the end of the test case. 

734 

735 |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()`` 

736 internally. 

737 """ 

738 

739 def span_end(self, discard: bool, /) -> None: # noqa: B027 

740 """Marks the end of a semantically meaningful span of choices. 

741 

742 ``discard`` is ``True`` when the draw was filtered out or otherwise marked 

743 as unlikely to contribute to the input data as seen by the user's test. 

744 Note however that side effects can make this determination unsound. 

745 

746 |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()`` 

747 internally. 

748 """ 

749 

750 

751class HypothesisProvider(PrimitiveProvider): 

752 lifetime = "test_case" 

753 

754 def __init__(self, conjecturedata: Optional["ConjectureData"], /): 

755 super().__init__(conjecturedata) 

756 self._random = None if self._cd is None else self._cd._random 

757 

758 @cached_property 

759 def _local_constants(self): 

760 # defer computation of local constants until/if we need it 

761 return _get_local_constants() 

762 

763 def _maybe_draw_constant( 

764 self, 

765 choice_type: ChoiceTypeT, 

766 constraints: ChoiceConstraintsT, 

767 *, 

768 p: float = 0.05, 

769 ) -> Optional["ConstantT"]: 

770 assert self._random is not None 

771 assert choice_type != "boolean" 

772 # check whether we even want a constant before spending time computing 

773 # and caching the allowed constants. 

774 if self._random.random() > p: 

775 return None 

776 

777 # note: this property access results in computation being done 

778 assert self._local_constants is not None 

779 

780 key = (choice_type, choice_constraints_key(choice_type, constraints)) 

781 if key not in CONSTANTS_CACHE: 

782 CONSTANTS_CACHE[key] = ( 

783 tuple( 

784 choice 

785 for choice in GLOBAL_CONSTANTS.set_for_type(choice_type) 

786 if choice_permitted(choice, constraints) 

787 ), 

788 tuple( 

789 choice 

790 for choice in self._local_constants.set_for_type(choice_type) 

791 if choice_permitted(choice, constraints) 

792 ), 

793 ) 

794 

795 # split constants into two pools, so we still have a good chance to draw 

796 # global constants even if there are many local constants. 

797 global_constants, local_constants = CONSTANTS_CACHE[key] 

798 constants_lists = ([global_constants] if global_constants else []) + ( 

799 [local_constants] if local_constants else [] 

800 ) 

801 if not constants_lists: 

802 return None 

803 

804 # At this point, we've decided to use a constant. Now we select which pool 

805 # to draw that constant from. 

806 # 

807 # Note that this approach has a different probability distribution than 

808 # attempting a random.random for both global_constants and local_constants. 

809 constants = self._random.choice(constants_lists) 

810 return self._random.choice(constants) 

811 

812 def draw_boolean( 

813 self, 

814 p: float = 0.5, 

815 ) -> bool: 

816 assert self._random is not None 

817 

818 if p <= 0: 

819 return False 

820 if p >= 1: 

821 return True 

822 

823 return self._random.random() < p 

824 

825 def draw_integer( 

826 self, 

827 min_value: int | None = None, 

828 max_value: int | None = None, 

829 *, 

830 weights: dict[int, float] | None = None, 

831 shrink_towards: int = 0, 

832 ) -> int: 

833 assert self._cd is not None 

834 if ( 

835 constant := self._maybe_draw_constant( 

836 "integer", 

837 { 

838 "min_value": min_value, 

839 "max_value": max_value, 

840 "weights": weights, 

841 "shrink_towards": shrink_towards, 

842 }, 

843 ) 

844 ) is not None: 

845 assert isinstance(constant, int) 

846 return constant 

847 

848 center = 0 

849 if min_value is not None: 

850 center = max(min_value, center) 

851 if max_value is not None: 

852 center = min(max_value, center) 

853 

854 if weights is not None: 

855 assert min_value is not None 

856 assert max_value is not None 

857 

858 # format of weights is a mapping of ints to p, where sum(p) < 1. 

859 # The remaining probability mass is uniformly distributed over 

860 # *all* ints (not just the unmapped ones; this is somewhat undesirable, 

861 # but simplifies things). 

862 # 

863 # We assert that sum(p) is strictly less than 1 because it simplifies 

864 # handling forced values when we can force into the unmapped probability 

865 # mass. We should eventually remove this restriction. 

866 sampler = Sampler( 

867 [1 - sum(weights.values()), *weights.values()], observe=False 

868 ) 

869 # if we're forcing, it's easiest to force into the unmapped probability 

870 # mass and then force the drawn value after. 

871 idx = sampler.sample(self._cd) 

872 

873 if idx == 0: 

874 return self._draw_bounded_integer(min_value, max_value) 

875 # implicit reliance on dicts being sorted for determinism 

876 return list(weights)[idx - 1] 

877 

878 if min_value is None and max_value is None: 

879 return self._draw_unbounded_integer() 

880 

881 if min_value is None: 

882 assert max_value is not None 

883 probe = max_value + 1 

884 while max_value < probe: 

885 probe = center + self._draw_unbounded_integer() 

886 return probe 

887 

888 if max_value is None: 

889 assert min_value is not None 

890 probe = min_value - 1 

891 while probe < min_value: 

892 probe = center + self._draw_unbounded_integer() 

893 return probe 

894 

895 return self._draw_bounded_integer(min_value, max_value) 

896 

897 def draw_float( 

898 self, 

899 *, 

900 min_value: float = -math.inf, 

901 max_value: float = math.inf, 

902 allow_nan: bool = True, 

903 smallest_nonzero_magnitude: float, 

904 ) -> float: 

905 assert self._random is not None 

906 

907 constraints: FloatConstraints = { 

908 "min_value": min_value, 

909 "max_value": max_value, 

910 "allow_nan": allow_nan, 

911 "smallest_nonzero_magnitude": smallest_nonzero_magnitude, 

912 } 

913 if ( 

914 constant := self._maybe_draw_constant("float", constraints, p=0.15) 

915 ) is not None: 

916 assert isinstance(constant, float) 

917 return constant 

918 

919 # on top of the probability to draw a constant float, we independently 

920 # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values. 

921 weird_floats = [ 

922 f 

923 for f in [ 

924 0.0, 

925 -0.0, 

926 math.inf, 

927 -math.inf, 

928 math.nan, 

929 -math.nan, 

930 SIGNALING_NAN, 

931 -SIGNALING_NAN, 

932 min_value, 

933 next_up(min_value), 

934 min_value + 1, 

935 max_value - 1, 

936 next_down(max_value), 

937 max_value, 

938 ] 

939 if choice_permitted(f, constraints) 

940 ] 

941 

942 if weird_floats and self._random.random() < 0.05: 

943 return self._random.choice(weird_floats) 

944 

945 clamper = make_float_clamper( 

946 min_value, 

947 max_value, 

948 smallest_nonzero_magnitude=smallest_nonzero_magnitude, 

949 allow_nan=allow_nan, 

950 ) 

951 

952 result = self._draw_float() 

953 if allow_nan and math.isnan(result): 

954 clamped = result # pragma: no cover 

955 else: 

956 clamped = clamper(result) 

957 if float_to_int(clamped) != float_to_int(result) and not ( 

958 math.isnan(result) and allow_nan 

959 ): 

960 result = clamped 

961 return result 

962 

963 def draw_string( 

964 self, 

965 intervals: IntervalSet, 

966 *, 

967 min_size: int = 0, 

968 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

969 ) -> str: 

970 assert self._cd is not None 

971 assert self._random is not None 

972 

973 if len(intervals) == 0: 

974 return "" 

975 

976 if ( 

977 constant := self._maybe_draw_constant( 

978 "string", 

979 {"intervals": intervals, "min_size": min_size, "max_size": max_size}, 

980 ) 

981 ) is not None: 

982 assert isinstance(constant, str) 

983 return constant 

984 

985 average_size = min( 

986 max(min_size * 2, min_size + 5), 

987 0.5 * (min_size + max_size), 

988 ) 

989 

990 chars = [] 

991 elements = many( 

992 self._cd, 

993 min_size=min_size, 

994 max_size=max_size, 

995 average_size=average_size, 

996 observe=False, 

997 ) 

998 while elements.more(): 

999 if len(intervals) > 256: 

1000 if self.draw_boolean(0.2): 

1001 i = self._random.randint(256, len(intervals) - 1) 

1002 else: 

1003 i = self._random.randint(0, 255) 

1004 else: 

1005 i = self._random.randint(0, len(intervals) - 1) 

1006 

1007 chars.append(intervals.char_in_shrink_order(i)) 

1008 

1009 return "".join(chars) 

1010 

1011 def draw_bytes( 

1012 self, 

1013 min_size: int = 0, 

1014 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

1015 ) -> bytes: 

1016 assert self._cd is not None 

1017 assert self._random is not None 

1018 

1019 if ( 

1020 constant := self._maybe_draw_constant( 

1021 "bytes", {"min_size": min_size, "max_size": max_size} 

1022 ) 

1023 ) is not None: 

1024 assert isinstance(constant, bytes) 

1025 return constant 

1026 

1027 buf = bytearray() 

1028 average_size = min( 

1029 max(min_size * 2, min_size + 5), 

1030 0.5 * (min_size + max_size), 

1031 ) 

1032 elements = many( 

1033 self._cd, 

1034 min_size=min_size, 

1035 max_size=max_size, 

1036 average_size=average_size, 

1037 observe=False, 

1038 ) 

1039 while elements.more(): 

1040 buf += self._random.randbytes(1) 

1041 

1042 return bytes(buf) 

1043 

1044 def _draw_float(self) -> float: 

1045 assert self._random is not None 

1046 

1047 f = lex_to_float(self._random.getrandbits(64)) 

1048 sign = 1 if self._random.getrandbits(1) else -1 

1049 return sign * f 

1050 

1051 def _draw_unbounded_integer(self) -> int: 

1052 assert self._cd is not None 

1053 assert self._random is not None 

1054 

1055 size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)] 

1056 

1057 r = self._random.getrandbits(size) 

1058 sign = r & 1 

1059 r >>= 1 

1060 if sign: 

1061 r = -r 

1062 return r 

1063 

1064 def _draw_bounded_integer( 

1065 self, 

1066 lower: int, 

1067 upper: int, 

1068 *, 

1069 vary_size: bool = True, 

1070 ) -> int: 

1071 assert lower <= upper 

1072 assert self._cd is not None 

1073 assert self._random is not None 

1074 

1075 if lower == upper: 

1076 return lower 

1077 

1078 bits = (upper - lower).bit_length() 

1079 if bits > 24 and vary_size and self._random.random() < 7 / 8: 

1080 # For large ranges, we combine the uniform random distribution 

1081 # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our 

1082 # choice of unicode characters is uniform but the 32bit distribution is not. 

1083 idx = INT_SIZES_SAMPLER.sample(self._cd) 

1084 cap_bits = min(bits, INT_SIZES[idx]) 

1085 upper = min(upper, lower + 2**cap_bits - 1) 

1086 return self._random.randint(lower, upper) 

1087 

1088 return self._random.randint(lower, upper) 

1089 

1090 

1091# Masks for masking off the first byte of an n-bit buffer. 

1092# The appropriate mask is stored at position n % 8. 

1093BYTE_MASKS = [(1 << n) - 1 for n in range(8)] 

1094BYTE_MASKS[0] = 255 

1095 

1096 

1097class BytestringProvider(PrimitiveProvider): 

1098 lifetime = "test_case" 

1099 

1100 def __init__( 

1101 self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes 

1102 ): 

1103 super().__init__(conjecturedata) 

1104 self.bytestring = bytestring 

1105 self.index = 0 

1106 self.drawn = bytearray() 

1107 

1108 def _draw_bits(self, n): 

1109 if n == 0: # pragma: no cover 

1110 return 0 

1111 n_bytes = bits_to_bytes(n) 

1112 if self.index + n_bytes > len(self.bytestring): 

1113 self._cd.mark_overrun() 

1114 buf = bytearray(self.bytestring[self.index : self.index + n_bytes]) 

1115 self.index += n_bytes 

1116 

1117 buf[0] &= BYTE_MASKS[n % 8] 

1118 buf = bytes(buf) 

1119 self.drawn += buf 

1120 return int_from_bytes(buf) 

1121 

1122 def draw_boolean( 

1123 self, 

1124 p: float = 0.5, 

1125 ) -> bool: 

1126 if p <= 0: 

1127 return False 

1128 if p >= 1: 

1129 return True 

1130 

1131 # always use one byte for booleans to maintain constant draw size. 

1132 # If a probability requires more than 8 bits to represent precisely, 

1133 # the result will be slightly biased, but not badly. 

1134 bits = 8 

1135 size = 2**bits 

1136 # always leave at least one value that can be true, even for very small 

1137 # p. 

1138 falsey = max(1, math.floor(size * (1 - p))) 

1139 n = self._draw_bits(bits) 

1140 return n >= falsey 

1141 

1142 def draw_integer( 

1143 self, 

1144 min_value: int | None = None, 

1145 max_value: int | None = None, 

1146 *, 

1147 weights: dict[int, float] | None = None, 

1148 shrink_towards: int = 0, 

1149 ) -> int: 

1150 assert self._cd is not None 

1151 

1152 # we explicitly ignore integer weights for now, as they are likely net 

1153 # negative on fuzzer performance. 

1154 

1155 if min_value is None and max_value is None: 

1156 min_value = -(2**127) 

1157 max_value = 2**127 - 1 

1158 elif min_value is None: 

1159 assert max_value is not None 

1160 min_value = max_value - 2**64 

1161 elif max_value is None: 

1162 assert min_value is not None 

1163 max_value = min_value + 2**64 

1164 

1165 if min_value == max_value: 

1166 return min_value 

1167 

1168 bits = (max_value - min_value).bit_length() 

1169 value = self._draw_bits(bits) 

1170 while not (min_value <= value <= max_value): 

1171 value = self._draw_bits(bits) 

1172 return value 

1173 

1174 def draw_float( 

1175 self, 

1176 *, 

1177 min_value: float = -math.inf, 

1178 max_value: float = math.inf, 

1179 allow_nan: bool = True, 

1180 smallest_nonzero_magnitude: float, 

1181 ) -> float: 

1182 n = self._draw_bits(64) 

1183 sign = -1 if n >> 64 else 1 

1184 f = sign * lex_to_float(n & ((1 << 64) - 1)) 

1185 clamper = make_float_clamper( 

1186 min_value, 

1187 max_value, 

1188 smallest_nonzero_magnitude=smallest_nonzero_magnitude, 

1189 allow_nan=allow_nan, 

1190 ) 

1191 return clamper(f) 

1192 

1193 def _draw_collection(self, min_size, max_size, *, alphabet_size): 

1194 average_size = min( 

1195 max(min_size * 2, min_size + 5), 

1196 0.5 * (min_size + max_size), 

1197 ) 

1198 elements = many( 

1199 self._cd, 

1200 min_size=min_size, 

1201 max_size=max_size, 

1202 average_size=average_size, 

1203 observe=False, 

1204 ) 

1205 values = [] 

1206 while elements.more(): 

1207 values.append(self.draw_integer(0, alphabet_size - 1)) 

1208 return values 

1209 

1210 def draw_string( 

1211 self, 

1212 intervals: IntervalSet, 

1213 *, 

1214 min_size: int = 0, 

1215 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

1216 ) -> str: 

1217 values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals)) 

1218 return "".join(chr(intervals[v]) for v in values) 

1219 

1220 def draw_bytes( 

1221 self, 

1222 min_size: int = 0, 

1223 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

1224 ) -> bytes: 

1225 values = self._draw_collection(min_size, max_size, alphabet_size=2**8) 

1226 return bytes(values) 

1227 

1228 

1229class URandom(Random): 

1230 # we reimplement a Random instance instead of using SystemRandom, because 

1231 # os.urandom is not guaranteed to read from /dev/urandom. 

1232 

1233 @staticmethod 

1234 def _urandom(size: int) -> bytes: 

1235 # By default, we buffer more data from /dev/urandom than the actual `size` number 

1236 # of bytes we requested. This trips up anyone (and particularly Antithesis) 

1237 # hooking /dev/urandom reads to control randomness. 

1238 # 

1239 # buffering=0 disables this behavior. 

1240 with open("/dev/urandom", "rb", buffering=0) as f: 

1241 return f.read(size) 

1242 

1243 def getrandbits(self, k: int) -> int: 

1244 assert k >= 0 

1245 size = bits_to_bytes(k) 

1246 n = int_from_bytes(self._urandom(size)) 

1247 # trim excess bits 

1248 return n >> (size * 8 - k) 

1249 

1250 def random(self) -> float: 

1251 # adapted from random.SystemRandom.random 

1252 return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53) 

1253 

1254 

1255class URandomProvider(HypothesisProvider): 

1256 # A provider which reads directly from /dev/urandom as its source of randomness. 

1257 # This provider exists to provide better Hypothesis integration with Antithesis 

1258 # (https://antithesis.com/), which interprets calls to /dev/urandom as the 

1259 # randomness to mutate. This effectively gives Antithesis control over 

1260 # the choices made by the URandomProvider. 

1261 # 

1262 # If you are not using Antithesis, you probably don't want to use this 

1263 # provider. 

1264 

1265 def __init__(self, conjecturedata: Optional["ConjectureData"], /): 

1266 super().__init__(conjecturedata) 

1267 if WINDOWS: # pragma: no cover 

1268 warnings.warn( 

1269 "/dev/urandom is not available on windows. Falling back to " 

1270 'standard PRNG generation (equivalent to backend="hypothesis").', 

1271 HypothesisWarning, 

1272 stacklevel=1, 

1273 ) 

1274 # don't overwrite the HypothesisProvider self._random attribute in 

1275 # this case 

1276 else: 

1277 self._random = URandom()