Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/conjecture/providers.py: 42%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

317 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11import abc 

12import contextlib 

13import math 

14import sys 

15import warnings 

16from collections.abc import Iterable 

17from contextlib import AbstractContextManager 

18from functools import cached_property 

19from random import Random 

20from sys import float_info 

21from types import ModuleType 

22from typing import ( 

23 TYPE_CHECKING, 

24 Any, 

25 ClassVar, 

26 Literal, 

27 Optional, 

28 TypedDict, 

29 TypeVar, 

30 Union, 

31) 

32 

33from sortedcontainers import SortedSet 

34 

35from hypothesis.errors import HypothesisWarning 

36from hypothesis.internal.cache import LRUCache 

37from hypothesis.internal.compat import WINDOWS, int_from_bytes 

38from hypothesis.internal.conjecture.choice import ( 

39 ChoiceConstraintsT, 

40 ChoiceT, 

41 ChoiceTypeT, 

42 FloatConstraints, 

43 choice_constraints_key, 

44 choice_permitted, 

45) 

46from hypothesis.internal.conjecture.floats import lex_to_float 

47from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes 

48from hypothesis.internal.conjecture.utils import ( 

49 INT_SIZES, 

50 INT_SIZES_SAMPLER, 

51 Sampler, 

52 many, 

53) 

54from hypothesis.internal.constants_ast import ( 

55 Constants, 

56 constants_from_module, 

57 is_local_module_file, 

58) 

59from hypothesis.internal.floats import ( 

60 SIGNALING_NAN, 

61 float_to_int, 

62 make_float_clamper, 

63 next_down, 

64 next_up, 

65) 

66from hypothesis.internal.intervalsets import IntervalSet 

67from hypothesis.internal.observability import InfoObservationType, TestCaseObservation 

68 

69if TYPE_CHECKING: 

70 from typing import TypeAlias 

71 

72 from hypothesis.internal.conjecture.data import ConjectureData 

73 from hypothesis.internal.constants_ast import ConstantT 

74 

75T = TypeVar("T") 

76LifetimeT: "TypeAlias" = Literal["test_case", "test_function"] 

77COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large" 

78 

79 

80#: Registered Hypothesis backends. This is a dictionary whose keys are the name 

81#: to be used in |settings.backend|, and whose values are a string of the absolute 

82#: importable path to a subclass of |PrimitiveProvider|, which Hypothesis will 

83#: instantiate when your backend is requested by a test's |settings.backend| value. 

84#: 

85#: For example, the default Hypothesis backend is registered as: 

86#: 

87#: .. code-block:: python 

88#: 

89#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS 

90#: 

91#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider" 

92#: 

93#: And can be used with: 

94#: 

95#: .. code-block:: python 

96#: 

97#: from hypothesis import given, settings, strategies as st 

98#: 

99#: @given(st.integers()) 

100#: @settings(backend="hypothesis") 

101#: def f(n): 

102#: pass 

103#: 

104#: Though, as ``backend="hypothesis"`` is the default setting, the above would 

105#: typically not have any effect. 

106#: 

107#: The purpose of mapping to an absolute importable path, rather than the actual 

108#: |PrimitiveProvider| class, is to avoid slowing down Hypothesis startup times 

109#: by only importing alternative backends when required. 

110AVAILABLE_PROVIDERS = { 

111 "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider", 

112 "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider", 

113} 

114# cache the choice_permitted constants for a particular set of constraints. 

115CacheKeyT: "TypeAlias" = tuple[ChoiceTypeT, tuple[Any, ...]] 

116CacheValueT: "TypeAlias" = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]] 

117CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024) 

118 

119_constant_floats = ( 

120 [ 

121 0.5, 

122 1.1, 

123 1.5, 

124 1.9, 

125 1.0 / 3, 

126 10e6, 

127 10e-6, 

128 1.175494351e-38, 

129 next_up(0.0), 

130 float_info.min, 

131 float_info.max, 

132 3.402823466e38, 

133 9007199254740992.0, 

134 1 - 10e-6, 

135 2 + 10e-6, 

136 1.192092896e-07, 

137 2.2204460492503131e-016, 

138 ] 

139 + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32 

140 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64 

141) 

142_constant_floats.extend([-x for x in _constant_floats]) 

143assert all(isinstance(f, float) for f in _constant_floats) 

144 

145_constant_strings = { 

146 # strings which can be interpreted as code / logic 

147 "undefined", 

148 "null", 

149 "NULL", 

150 "nil", 

151 "NIL", 

152 "true", 

153 "false", 

154 "True", 

155 "False", 

156 "TRUE", 

157 "FALSE", 

158 "None", 

159 "none", 

160 "if", 

161 "then", 

162 "else", 

163 # strings which can be interpreted as a number 

164 "0", 

165 "1e100", 

166 "0..0", 

167 "0/0", 

168 "1/0", 

169 "+0.0", 

170 "Infinity", 

171 "-Infinity", 

172 "Inf", 

173 "INF", 

174 "NaN", 

175 "9" * 30, 

176 # common ascii characters 

177 ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~", 

178 # common unicode characters 

179 "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±", 

180 # characters which increase in length when lowercased 

181 "Ⱥ", 

182 "Ⱦ", 

183 # ligatures 

184 "æœÆŒffʤʨß" 

185 # emoticons 

186 "(╯°□°)╯︵ ┻━┻)", 

187 # emojis 

188 "😍", 

189 "🇺🇸", 

190 # emoji modifiers 

191 "🏻" # U+1F3FB Light Skin Tone, 

192 "👍🏻", # 👍 followed by U+1F3FB 

193 # RTL text 

194 "الكل في المجمو عة", 

195 # Ogham text, which contains the only character in the Space Separators 

196 # unicode category (Zs) that isn't visually blank:  . # noqa: RUF003 

197 "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜", 

198 # readable variations on text (bolt/italic/script) 

199 "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠", 

200 "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌", 

201 "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈", 

202 "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰", 

203 "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘", 

204 # upsidown text 

205 "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥", 

206 # reserved strings in windows 

207 "NUL", 

208 "COM1", 

209 "LPT1", 

210 # scunthorpe problem 

211 "Scunthorpe", 

212 # zalgo text 

213 "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣", 

214 # 

215 # examples from https://faultlore.com/blah/text-hates-you/ 

216 "मनीष منش", 

217 "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله", 

218 "lorem لا بسم الله ipsum 你好1234你好", 

219} 

220 

221 

222# we don't actually care what order the constants are sorted in, just that the 

223# ordering is deterministic. 

224GLOBAL_CONSTANTS = Constants( 

225 integers=SortedSet(), 

226 floats=SortedSet(_constant_floats, key=float_to_int), 

227 bytes=SortedSet(), 

228 strings=SortedSet(_constant_strings), 

229) 

230 

231_local_constants = Constants( 

232 integers=SortedSet(), 

233 floats=SortedSet(key=float_to_int), 

234 bytes=SortedSet(), 

235 strings=SortedSet(), 

236) 

237# modules that we've already seen and processed for local constants. These are 

238# are all modules, not necessarily local ones. This lets us quickly see which 

239# modules are new without an expensive path.resolve() or is_local_module_file 

240# cache lookup. 

241_seen_modules: set[ModuleType] = set() 

242_sys_modules_len: Optional[int] = None 

243 

244 

245def _get_local_constants() -> Constants: 

246 global _sys_modules_len, _local_constants 

247 

248 if sys.platform == "emscripten": # pragma: no cover 

249 # pyodide builds bundle the stdlib in a nonstandard location, like 

250 # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of 

251 # the stdlib as local code and slowing down on emscripten, instead return 

252 # that nothing is local. 

253 # 

254 # pyodide may provide some way to distinguish stdlib/third-party/local 

255 # code. I haven't looked into it. If they do, we should correctly implement 

256 # ModuleLocation for pyodide instead of this. 

257 return _local_constants 

258 

259 count_constants = len(_local_constants) 

260 # We call this function once per HypothesisProvider instance, i.e. once per 

261 # input, so it needs to be performant. The logic here is more complicated 

262 # than necessary because of this. 

263 # 

264 # First, we check whether there are any new modules with a very cheap length 

265 # check. This check can be fooled if a module is added while another module is 

266 # removed, but the more correct check against tuple(sys.modules.keys()) is 

267 # substantially more expensive. Such a new module would eventually be discovered 

268 # if / when the length changes again in the future. 

269 # 

270 # If the length has changed, we find just modules we haven't seen before. Of 

271 # those, we find the ones which correspond to local modules, and extract their 

272 # constants. 

273 

274 # careful: store sys.modules length when we first check to avoid race conditions 

275 # with other threads loading a module before we set _sys_modules_len. 

276 if (sys_modules_len := len(sys.modules)) != _sys_modules_len: 

277 # set(_seen_modules) shouldn't typically be required, but I have run into 

278 # a "set changed size during iteration" error here when running 

279 # test_provider_conformance_crosshair. 

280 new_modules = set(sys.modules.values()) - set(_seen_modules) 

281 # Repeated SortedSet unions are expensive. Do the initial unions on a 

282 # set(), then do a one-time union with _local_constants after. 

283 new_constants = Constants() 

284 for module in new_modules: 

285 if ( 

286 module_file := getattr(module, "__file__", None) 

287 ) is not None and is_local_module_file(module_file): 

288 new_constants |= constants_from_module(module) 

289 _local_constants |= new_constants 

290 _seen_modules.update(new_modules) 

291 _sys_modules_len = sys_modules_len 

292 

293 # if we add any new constant, invalidate the constant cache for permitted values. 

294 # A more efficient approach would be invalidating just the keys with this 

295 # choice_type. 

296 if len(_local_constants) > count_constants: 

297 CONSTANTS_CACHE.cache.clear() 

298 

299 return _local_constants 

300 

301 

302class _BackendInfoMsg(TypedDict): 

303 type: InfoObservationType 

304 title: str 

305 content: Union[str, dict[str, Any]] 

306 

307 

308# TODO_DOCS: link to choice sequence explanation page 

309 

310 

311class PrimitiveProvider(abc.ABC): 

312 """ 

313 |PrimitiveProvider| is the implementation interface of a 

314 :ref:`Hypothesis backend <alternative-backends>`. 

315 

316 A |PrimitiveProvider| is required to implement the following five 

317 ``draw_*`` methods: 

318 

319 * |PrimitiveProvider.draw_integer| 

320 * |PrimitiveProvider.draw_boolean| 

321 * |PrimitiveProvider.draw_float| 

322 * |PrimitiveProvider.draw_string| 

323 * |PrimitiveProvider.draw_bytes| 

324 

325 Each strategy in Hypothesis generates values by drawing a series of choices 

326 from these five methods. By overriding them, a |PrimitiveProvider| can control 

327 the distribution of inputs generated by Hypothesis. 

328 

329 For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider| 

330 which uses an SMT solver to generate inputs that uncover new branches. 

331 

332 Once you implement a |PrimitiveProvider|, you can make it available for use 

333 through |AVAILABLE_PROVIDERS|. 

334 """ 

335 

336 #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function`` 

337 #: or ``test_case``. 

338 #: 

339 #: If ``test_function`` (the default), a single provider instance will be 

340 #: instantiated and used for the entirety of each test function (i.e., roughly 

341 #: one provider per |@given| annotation). This can be useful for tracking state 

342 #: over the entirety of a test function. 

343 #: 

344 #: If ``test_case``, a new provider instance will be instantiated and used for 

345 #: each input Hypothesis generates. 

346 #: 

347 #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will 

348 #: be ``None`` for a lifetime of ``test_function``, and an instance of 

349 #: ``ConjectureData`` for a lifetime of ``test_case``. 

350 #: 

351 #: Third-party providers likely want to set a lifetime of ``test_function``. 

352 lifetime: ClassVar[LifetimeT] = "test_function" 

353 

354 #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values 

355 #: which record operations performed on them in order to discover new paths. 

356 #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting 

357 #: with symbolic choices returned by the provider in any way that would force 

358 #: the solver to narrow the range of possible values for that symbolic. 

359 #: 

360 #: Setting this to ``True`` disables some hypothesis features and optimizations. 

361 #: Only set this to ``True`` if it is necessary for your backend. 

362 avoid_realization: ClassVar[bool] = False 

363 

364 #: If ``True``, |PrimitiveProvider.on_observation| will be added as a 

365 #: callback via |add_observability_callback|, enabling observability during 

366 # the lifetime of this provider. If ``False``, |PrimitiveProvider.on_observation| 

367 #: will never be called by Hypothesis. 

368 #: 

369 #: The opt-in behavior of observability is because enabling observability 

370 #: might increase runtime or memory usage. 

371 add_observability_callback: ClassVar[bool] = False 

372 

373 def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None: 

374 self._cd = conjecturedata 

375 

376 @abc.abstractmethod 

377 def draw_boolean( 

378 self, 

379 p: float = 0.5, 

380 ) -> bool: 

381 """ 

382 Draw a boolean choice. 

383 

384 Parameters 

385 ---------- 

386 p: float 

387 The probability of returning ``True``. Between 0 and 1 inclusive. 

388 

389 Except for ``0`` and ``1``, the value of ``p`` is a hint provided by 

390 Hypothesis, and may be ignored by the backend. 

391 

392 If ``0``, the provider must return ``False``. If ``1``, the provider 

393 must return ``True``. 

394 """ 

395 raise NotImplementedError 

396 

397 @abc.abstractmethod 

398 def draw_integer( 

399 self, 

400 min_value: Optional[int] = None, 

401 max_value: Optional[int] = None, 

402 *, 

403 weights: Optional[dict[int, float]] = None, 

404 shrink_towards: int = 0, 

405 ) -> int: 

406 """ 

407 Draw an integer choice. 

408 

409 Parameters 

410 ---------- 

411 min_value : int | None 

412 (Inclusive) lower bound on the integer value. If ``None``, there is 

413 no lower bound. 

414 max_value : int | None 

415 (Inclusive) upper bound on the integer value. If ``None``, there is 

416 no upper bound. 

417 weights: dict[int, float] | None 

418 Maps keys in the range [``min_value``, ``max_value``] to the probability 

419 of returning that key. 

420 shrink_towards: int 

421 The integer to shrink towards. This is not used during generation and 

422 can be ignored by backends. 

423 """ 

424 raise NotImplementedError 

425 

426 @abc.abstractmethod 

427 def draw_float( 

428 self, 

429 *, 

430 min_value: float = -math.inf, 

431 max_value: float = math.inf, 

432 allow_nan: bool = True, 

433 smallest_nonzero_magnitude: float, 

434 ) -> float: 

435 """ 

436 Draw a float choice. 

437 

438 Parameters 

439 ---------- 

440 min_value : float 

441 (Inclusive) lower bound on the float value. 

442 max_value : float 

443 (Inclusive) upper bound on the float value. 

444 allow_nan : bool 

445 If ``False``, it is invalid to return ``math.nan``. 

446 smallest_nonzero_magnitude : float 

447 The smallest allowed nonzero magnitude. ``draw_float`` should not 

448 return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``. 

449 """ 

450 raise NotImplementedError 

451 

452 @abc.abstractmethod 

453 def draw_string( 

454 self, 

455 intervals: IntervalSet, 

456 *, 

457 min_size: int = 0, 

458 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

459 ) -> str: 

460 """ 

461 Draw a string choice. 

462 

463 Parameters 

464 ---------- 

465 intervals : IntervalSet 

466 The set of codepoints to sample from. 

467 min_size : int 

468 (Inclusive) lower bound on the string length. 

469 max_size : int 

470 (Inclusive) upper bound on the string length. 

471 """ 

472 raise NotImplementedError 

473 

474 @abc.abstractmethod 

475 def draw_bytes( 

476 self, 

477 min_size: int = 0, 

478 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

479 ) -> bytes: 

480 """ 

481 Draw a bytes choice. 

482 

483 Parameters 

484 ---------- 

485 min_size : int 

486 (Inclusive) lower bound on the bytes length. 

487 max_size : int 

488 (Inclusive) upper bound on the bytes length. 

489 """ 

490 raise NotImplementedError 

491 

492 def per_test_case_context_manager(self) -> AbstractContextManager: 

493 """ 

494 Returns a context manager which will be entered each time Hypothesis 

495 starts generating and executing one test case, and exited when that test 

496 case finishes generating and executing, including if any exception is 

497 thrown. 

498 

499 In the lifecycle of a Hypothesis test, this is called before 

500 generating strategy values for each test case. This is just before any 

501 :ref:`custom executor <custom-function-execution>` is called. 

502 

503 Even if not returning a custom context manager, |PrimitiveProvider| 

504 subclasses are welcome to override this method to know when Hypothesis 

505 starts and ends the execution of a single test case. 

506 """ 

507 return contextlib.nullcontext() 

508 

509 def realize(self, value: T, *, for_failure: bool = False) -> T: 

510 """ 

511 Called whenever hypothesis requires a concrete (non-symbolic) value from 

512 a potentially symbolic value. Hypothesis will not check that ``value`` is 

513 symbolic before calling ``realize``, so you should handle the case where 

514 ``value`` is non-symbolic. 

515 

516 The returned value should be non-symbolic. If you cannot provide a value, 

517 raise |BackendCannotProceed| with a value of ``"discard_test_case"``. 

518 

519 If ``for_failure`` is ``True``, the value is associated with a failing example. 

520 In this case, the backend should spend substantially more effort when 

521 attempting to realize the value, since it is important to avoid discarding 

522 failing examples. Backends may still raise |BackendCannotProceed| when 

523 ``for_failure`` is ``True``, if realization is truly impossible or if 

524 realization takes significantly longer than expected (say, 5 minutes). 

525 """ 

526 return value 

527 

528 def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None: 

529 """ 

530 Called when Hypothesis has discovered a choice sequence which the provider 

531 may wish to enqueue to replay under its own instrumentation when we next 

532 ask to generate a test case, rather than generating one from scratch. 

533 

534 This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus 

535 of high-code-coverage inputs discovered by 

536 `HypoFuzz <https://hypofuzz.com/>`_. 

537 """ 

538 return None 

539 

540 def observe_test_case(self) -> dict[str, Any]: 

541 """Called at the end of the test case when :ref:`observability 

542 <observability>` is enabled. 

543 

544 The return value should be a non-symbolic json-encodable dictionary, 

545 and will be included in observations as ``observation["metadata"]["backend"]``. 

546 """ 

547 return {} 

548 

549 def observe_information_messages( 

550 self, *, lifetime: LifetimeT 

551 ) -> Iterable[_BackendInfoMsg]: 

552 """Called at the end of each test case and again at end of the test function. 

553 

554 Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}`` 

555 dictionaries to be delivered as individual information messages. Hypothesis 

556 adds the ``run_start`` timestamp and ``property`` name for you. 

557 """ 

558 assert lifetime in ("test_case", "test_function") 

559 yield from [] 

560 

561 def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027 

562 """ 

563 Called at the end of each test case which uses this provider, with the same 

564 ``observation["type"] == "test_case"`` observation that is passed to 

565 other callbacks added via |add_observability_callback|. This method is not 

566 called with ``observation["type"] in {"info", "alert", "error"}`` 

567 observations. 

568 

569 .. important:: 

570 

571 For |PrimitiveProvider.on_observation| to be called by Hypothesis, 

572 |PrimitiveProvider.add_observability_callback| must be set to ``True``. 

573 

574 |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling 

575 observability might increase runtime or memory usage. 

576 

577 Calls to this method are guaranteed to alternate with calls to 

578 |PrimitiveProvider.per_test_case_context_manager|. For example: 

579 

580 .. code-block:: python 

581 

582 # test function starts 

583 per_test_case_context_manager() 

584 on_observation() 

585 per_test_case_context_manager() 

586 on_observation() 

587 ... 

588 # test function ends 

589 

590 Note that |PrimitiveProvider.on_observation| will not be called for test 

591 cases which did not use this provider during generation, for example 

592 during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched 

593 to the standard Hypothesis backend after this backend raised too many 

594 |BackendCannotProceed| exceptions. 

595 """ 

596 

597 def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop 

598 """Marks the beginning of a semantically meaningful span of choices. 

599 

600 Spans are a depth-first tree structure. A span is opened by a call to 

601 |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end| 

602 closes the most recently opened span. So the following sequence of calls: 

603 

604 .. code-block:: python 

605 

606 span_start(label=1) 

607 n1 = draw_integer() 

608 span_start(label=2) 

609 b1 = draw_boolean() 

610 n2 = draw_integer() 

611 span_end() 

612 f1 = draw_float() 

613 span_end() 

614 

615 produces the following two spans of choices: 

616 

617 .. code-block:: 

618 

619 1: [n1, b1, n2, f1] 

620 2: [b1, n2] 

621 

622 Hypothesis uses spans to denote "semantically meaningful" sequences of 

623 choices. For instance, Hypothesis opens a span for the sequence of choices 

624 made while drawing from each strategy. Not every span corresponds to a 

625 strategy; the generation of e.g. each element in |st.lists| is also marked 

626 with a span, among others. 

627 

628 ``label`` is an opaque integer, which has no defined semantics. 

629 The only guarantee made by Hypothesis is that all spans with the same 

630 "meaning" will share the same ``label``. So all spans from the same 

631 strategy will share the same label, as will e.g. the spans for |st.lists| 

632 elements. 

633 

634 Providers can track calls to |PrimitiveProvider.span_start| and 

635 |PrimitiveProvider.span_end| to learn something about the semantics of 

636 the test's choice sequence. For instance, a provider could track the depth 

637 of the span tree, or the number of unique labels, which says something about 

638 the complexity of the choices being generated. Or a provider could track 

639 the span tree across test cases in order to determine what strategies are 

640 being used in what contexts. 

641 

642 It is possible for Hypothesis to start and immediately stop a span, 

643 without calling a ``draw_*`` method in between. These spans contain zero 

644 choices. 

645 

646 Hypothesis will always balance the number of calls to 

647 |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call 

648 to |PrimitiveProvider.span_start| will always be followed by a call to 

649 |PrimitiveProvider.span_end| before the end of the test case. 

650 

651 |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()`` 

652 internally. 

653 """ 

654 

655 def span_end(self, discard: bool, /) -> None: # noqa: B027 

656 """Marks the end of a semantically meaningful span of choices. 

657 

658 ``discard`` is ``True`` when the draw was filtered out or otherwise marked 

659 as unlikely to contribute to the input data as seen by the user's test. 

660 Note however that side effects can make this determination unsound. 

661 

662 |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()`` 

663 internally. 

664 """ 

665 

666 

667class HypothesisProvider(PrimitiveProvider): 

668 lifetime = "test_case" 

669 

670 def __init__(self, conjecturedata: Optional["ConjectureData"], /): 

671 super().__init__(conjecturedata) 

672 self._random = None if self._cd is None else self._cd._random 

673 

674 @cached_property 

675 def _local_constants(self): 

676 # defer computation of local constants until/if we need it 

677 return _get_local_constants() 

678 

679 def _maybe_draw_constant( 

680 self, 

681 choice_type: ChoiceTypeT, 

682 constraints: ChoiceConstraintsT, 

683 *, 

684 p: float = 0.05, 

685 ) -> Optional["ConstantT"]: 

686 assert self._random is not None 

687 assert choice_type != "boolean" 

688 # check whether we even want a constant before spending time computing 

689 # and caching the allowed constants. 

690 if self._random.random() > p: 

691 return None 

692 

693 # note: this property access results in computation being done 

694 assert self._local_constants is not None 

695 

696 key = (choice_type, choice_constraints_key(choice_type, constraints)) 

697 if key not in CONSTANTS_CACHE: 

698 CONSTANTS_CACHE[key] = ( 

699 tuple( 

700 choice 

701 for choice in GLOBAL_CONSTANTS.set_for_type(choice_type) 

702 if choice_permitted(choice, constraints) 

703 ), 

704 tuple( 

705 choice 

706 for choice in self._local_constants.set_for_type(choice_type) 

707 if choice_permitted(choice, constraints) 

708 ), 

709 ) 

710 

711 # split constants into two pools, so we still have a good chance to draw 

712 # global constants even if there are many local constants. 

713 (global_constants, local_constants) = CONSTANTS_CACHE[key] 

714 constants_lists = ([global_constants] if global_constants else []) + ( 

715 [local_constants] if local_constants else [] 

716 ) 

717 if not constants_lists: 

718 return None 

719 

720 # At this point, we've decided to use a constant. Now we select which pool 

721 # to draw that constant from. 

722 # 

723 # Note that this approach has a different probability distribution than 

724 # attempting a random.random for both global_constants and local_constants. 

725 constants = self._random.choice(constants_lists) 

726 return self._random.choice(constants) 

727 

728 def draw_boolean( 

729 self, 

730 p: float = 0.5, 

731 ) -> bool: 

732 assert self._random is not None 

733 

734 if p <= 0: 

735 return False 

736 if p >= 1: 

737 return True 

738 

739 return self._random.random() < p 

740 

741 def draw_integer( 

742 self, 

743 min_value: Optional[int] = None, 

744 max_value: Optional[int] = None, 

745 *, 

746 weights: Optional[dict[int, float]] = None, 

747 shrink_towards: int = 0, 

748 ) -> int: 

749 assert self._cd is not None 

750 if ( 

751 constant := self._maybe_draw_constant( 

752 "integer", 

753 { 

754 "min_value": min_value, 

755 "max_value": max_value, 

756 "weights": weights, 

757 "shrink_towards": shrink_towards, 

758 }, 

759 ) 

760 ) is not None: 

761 assert isinstance(constant, int) 

762 return constant 

763 

764 center = 0 

765 if min_value is not None: 

766 center = max(min_value, center) 

767 if max_value is not None: 

768 center = min(max_value, center) 

769 

770 if weights is not None: 

771 assert min_value is not None 

772 assert max_value is not None 

773 

774 # format of weights is a mapping of ints to p, where sum(p) < 1. 

775 # The remaining probability mass is uniformly distributed over 

776 # *all* ints (not just the unmapped ones; this is somewhat undesirable, 

777 # but simplifies things). 

778 # 

779 # We assert that sum(p) is strictly less than 1 because it simplifies 

780 # handling forced values when we can force into the unmapped probability 

781 # mass. We should eventually remove this restriction. 

782 sampler = Sampler( 

783 [1 - sum(weights.values()), *weights.values()], observe=False 

784 ) 

785 # if we're forcing, it's easiest to force into the unmapped probability 

786 # mass and then force the drawn value after. 

787 idx = sampler.sample(self._cd) 

788 

789 if idx == 0: 

790 return self._draw_bounded_integer(min_value, max_value) 

791 # implicit reliance on dicts being sorted for determinism 

792 return list(weights)[idx - 1] 

793 

794 if min_value is None and max_value is None: 

795 return self._draw_unbounded_integer() 

796 

797 if min_value is None: 

798 assert max_value is not None 

799 probe = max_value + 1 

800 while max_value < probe: 

801 probe = center + self._draw_unbounded_integer() 

802 return probe 

803 

804 if max_value is None: 

805 assert min_value is not None 

806 probe = min_value - 1 

807 while probe < min_value: 

808 probe = center + self._draw_unbounded_integer() 

809 return probe 

810 

811 return self._draw_bounded_integer(min_value, max_value) 

812 

813 def draw_float( 

814 self, 

815 *, 

816 min_value: float = -math.inf, 

817 max_value: float = math.inf, 

818 allow_nan: bool = True, 

819 smallest_nonzero_magnitude: float, 

820 ) -> float: 

821 assert self._random is not None 

822 

823 constraints: FloatConstraints = { 

824 "min_value": min_value, 

825 "max_value": max_value, 

826 "allow_nan": allow_nan, 

827 "smallest_nonzero_magnitude": smallest_nonzero_magnitude, 

828 } 

829 if ( 

830 constant := self._maybe_draw_constant("float", constraints, p=0.15) 

831 ) is not None: 

832 assert isinstance(constant, float) 

833 return constant 

834 

835 # on top of the probability to draw a constant float, we independently 

836 # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values. 

837 weird_floats = [ 

838 f 

839 for f in [ 

840 0.0, 

841 -0.0, 

842 math.inf, 

843 -math.inf, 

844 math.nan, 

845 -math.nan, 

846 SIGNALING_NAN, 

847 -SIGNALING_NAN, 

848 min_value, 

849 next_up(min_value), 

850 min_value + 1, 

851 max_value - 1, 

852 next_down(max_value), 

853 max_value, 

854 ] 

855 if choice_permitted(f, constraints) 

856 ] 

857 

858 if weird_floats and self._random.random() < 0.05: 

859 return self._random.choice(weird_floats) 

860 

861 clamper = make_float_clamper( 

862 min_value, 

863 max_value, 

864 smallest_nonzero_magnitude=smallest_nonzero_magnitude, 

865 allow_nan=allow_nan, 

866 ) 

867 

868 result = self._draw_float() 

869 if allow_nan and math.isnan(result): 

870 clamped = result # pragma: no cover 

871 else: 

872 clamped = clamper(result) 

873 if float_to_int(clamped) != float_to_int(result) and not ( 

874 math.isnan(result) and allow_nan 

875 ): 

876 result = clamped 

877 return result 

878 

879 def draw_string( 

880 self, 

881 intervals: IntervalSet, 

882 *, 

883 min_size: int = 0, 

884 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

885 ) -> str: 

886 assert self._cd is not None 

887 assert self._random is not None 

888 

889 if len(intervals) == 0: 

890 return "" 

891 

892 if ( 

893 constant := self._maybe_draw_constant( 

894 "string", 

895 {"intervals": intervals, "min_size": min_size, "max_size": max_size}, 

896 ) 

897 ) is not None: 

898 assert isinstance(constant, str) 

899 return constant 

900 

901 average_size = min( 

902 max(min_size * 2, min_size + 5), 

903 0.5 * (min_size + max_size), 

904 ) 

905 

906 chars = [] 

907 elements = many( 

908 self._cd, 

909 min_size=min_size, 

910 max_size=max_size, 

911 average_size=average_size, 

912 observe=False, 

913 ) 

914 while elements.more(): 

915 if len(intervals) > 256: 

916 if self.draw_boolean(0.2): 

917 i = self._random.randint(256, len(intervals) - 1) 

918 else: 

919 i = self._random.randint(0, 255) 

920 else: 

921 i = self._random.randint(0, len(intervals) - 1) 

922 

923 chars.append(intervals.char_in_shrink_order(i)) 

924 

925 return "".join(chars) 

926 

927 def draw_bytes( 

928 self, 

929 min_size: int = 0, 

930 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

931 ) -> bytes: 

932 assert self._cd is not None 

933 assert self._random is not None 

934 

935 if ( 

936 constant := self._maybe_draw_constant( 

937 "bytes", {"min_size": min_size, "max_size": max_size} 

938 ) 

939 ) is not None: 

940 assert isinstance(constant, bytes) 

941 return constant 

942 

943 buf = bytearray() 

944 average_size = min( 

945 max(min_size * 2, min_size + 5), 

946 0.5 * (min_size + max_size), 

947 ) 

948 elements = many( 

949 self._cd, 

950 min_size=min_size, 

951 max_size=max_size, 

952 average_size=average_size, 

953 observe=False, 

954 ) 

955 while elements.more(): 

956 buf += self._random.randbytes(1) 

957 

958 return bytes(buf) 

959 

960 def _draw_float(self) -> float: 

961 assert self._random is not None 

962 

963 f = lex_to_float(self._random.getrandbits(64)) 

964 sign = 1 if self._random.getrandbits(1) else -1 

965 return sign * f 

966 

967 def _draw_unbounded_integer(self) -> int: 

968 assert self._cd is not None 

969 assert self._random is not None 

970 

971 size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)] 

972 

973 r = self._random.getrandbits(size) 

974 sign = r & 1 

975 r >>= 1 

976 if sign: 

977 r = -r 

978 return r 

979 

980 def _draw_bounded_integer( 

981 self, 

982 lower: int, 

983 upper: int, 

984 *, 

985 vary_size: bool = True, 

986 ) -> int: 

987 assert lower <= upper 

988 assert self._cd is not None 

989 assert self._random is not None 

990 

991 if lower == upper: 

992 return lower 

993 

994 bits = (upper - lower).bit_length() 

995 if bits > 24 and vary_size and self._random.random() < 7 / 8: 

996 # For large ranges, we combine the uniform random distribution 

997 # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our 

998 # choice of unicode characters is uniform but the 32bit distribution is not. 

999 idx = INT_SIZES_SAMPLER.sample(self._cd) 

1000 cap_bits = min(bits, INT_SIZES[idx]) 

1001 upper = min(upper, lower + 2**cap_bits - 1) 

1002 return self._random.randint(lower, upper) 

1003 

1004 return self._random.randint(lower, upper) 

1005 

1006 

1007# Masks for masking off the first byte of an n-bit buffer. 

1008# The appropriate mask is stored at position n % 8. 

1009BYTE_MASKS = [(1 << n) - 1 for n in range(8)] 

1010BYTE_MASKS[0] = 255 

1011 

1012 

1013class BytestringProvider(PrimitiveProvider): 

1014 lifetime = "test_case" 

1015 

1016 def __init__( 

1017 self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes 

1018 ): 

1019 super().__init__(conjecturedata) 

1020 self.bytestring = bytestring 

1021 self.index = 0 

1022 self.drawn = bytearray() 

1023 

1024 def _draw_bits(self, n): 

1025 if n == 0: # pragma: no cover 

1026 return 0 

1027 n_bytes = bits_to_bytes(n) 

1028 if self.index + n_bytes > len(self.bytestring): 

1029 self._cd.mark_overrun() 

1030 buf = bytearray(self.bytestring[self.index : self.index + n_bytes]) 

1031 self.index += n_bytes 

1032 

1033 buf[0] &= BYTE_MASKS[n % 8] 

1034 buf = bytes(buf) 

1035 self.drawn += buf 

1036 return int_from_bytes(buf) 

1037 

1038 def draw_boolean( 

1039 self, 

1040 p: float = 0.5, 

1041 ) -> bool: 

1042 if p <= 0: 

1043 return False 

1044 if p >= 1: 

1045 return True 

1046 

1047 # always use one byte for booleans to maintain constant draw size. 

1048 # If a probability requires more than 8 bits to represent precisely, 

1049 # the result will be slightly biased, but not badly. 

1050 bits = 8 

1051 size = 2**bits 

1052 # always leave at least one value that can be true, even for very small 

1053 # p. 

1054 falsey = max(1, math.floor(size * (1 - p))) 

1055 n = self._draw_bits(bits) 

1056 return n >= falsey 

1057 

1058 def draw_integer( 

1059 self, 

1060 min_value: Optional[int] = None, 

1061 max_value: Optional[int] = None, 

1062 *, 

1063 weights: Optional[dict[int, float]] = None, 

1064 shrink_towards: int = 0, 

1065 ) -> int: 

1066 assert self._cd is not None 

1067 

1068 # we explicitly ignore integer weights for now, as they are likely net 

1069 # negative on fuzzer performance. 

1070 

1071 if min_value is None and max_value is None: 

1072 min_value = -(2**127) 

1073 max_value = 2**127 - 1 

1074 elif min_value is None: 

1075 assert max_value is not None 

1076 min_value = max_value - 2**64 

1077 elif max_value is None: 

1078 assert min_value is not None 

1079 max_value = min_value + 2**64 

1080 

1081 if min_value == max_value: 

1082 return min_value 

1083 

1084 bits = (max_value - min_value).bit_length() 

1085 value = self._draw_bits(bits) 

1086 while not (min_value <= value <= max_value): 

1087 value = self._draw_bits(bits) 

1088 return value 

1089 

1090 def draw_float( 

1091 self, 

1092 *, 

1093 min_value: float = -math.inf, 

1094 max_value: float = math.inf, 

1095 allow_nan: bool = True, 

1096 smallest_nonzero_magnitude: float, 

1097 ) -> float: 

1098 n = self._draw_bits(64) 

1099 sign = -1 if n >> 64 else 1 

1100 f = sign * lex_to_float(n & ((1 << 64) - 1)) 

1101 clamper = make_float_clamper( 

1102 min_value, 

1103 max_value, 

1104 smallest_nonzero_magnitude=smallest_nonzero_magnitude, 

1105 allow_nan=allow_nan, 

1106 ) 

1107 return clamper(f) 

1108 

1109 def _draw_collection(self, min_size, max_size, *, alphabet_size): 

1110 average_size = min( 

1111 max(min_size * 2, min_size + 5), 

1112 0.5 * (min_size + max_size), 

1113 ) 

1114 elements = many( 

1115 self._cd, 

1116 min_size=min_size, 

1117 max_size=max_size, 

1118 average_size=average_size, 

1119 observe=False, 

1120 ) 

1121 values = [] 

1122 while elements.more(): 

1123 values.append(self.draw_integer(0, alphabet_size - 1)) 

1124 return values 

1125 

1126 def draw_string( 

1127 self, 

1128 intervals: IntervalSet, 

1129 *, 

1130 min_size: int = 0, 

1131 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

1132 ) -> str: 

1133 values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals)) 

1134 return "".join(chr(intervals[v]) for v in values) 

1135 

1136 def draw_bytes( 

1137 self, 

1138 min_size: int = 0, 

1139 max_size: int = COLLECTION_DEFAULT_MAX_SIZE, 

1140 ) -> bytes: 

1141 values = self._draw_collection(min_size, max_size, alphabet_size=2**8) 

1142 return bytes(values) 

1143 

1144 

1145class URandom(Random): 

1146 # we reimplement a Random instance instead of using SystemRandom, because 

1147 # os.urandom is not guaranteed to read from /dev/urandom. 

1148 

1149 @staticmethod 

1150 def _urandom(size: int) -> bytes: 

1151 with open("/dev/urandom", "rb") as f: 

1152 return f.read(size) 

1153 

1154 def getrandbits(self, k: int) -> int: 

1155 assert k >= 0 

1156 size = bits_to_bytes(k) 

1157 n = int_from_bytes(self._urandom(size)) 

1158 # trim excess bits 

1159 return n >> (size * 8 - k) 

1160 

1161 def random(self) -> float: 

1162 # adapted from random.SystemRandom.random 

1163 return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53) 

1164 

1165 

1166class URandomProvider(HypothesisProvider): 

1167 # A provider which reads directly from /dev/urandom as its source of randomness. 

1168 # This provider exists to provide better Hypothesis integration with Antithesis 

1169 # (https://antithesis.com/), which interprets calls to /dev/urandom as the 

1170 # randomness to mutate. This effectively gives Antithesis control over 

1171 # the choices made by the URandomProvider. 

1172 # 

1173 # If you are not using Antithesis, you probably don't want to use this 

1174 # provider. 

1175 

1176 def __init__(self, conjecturedata: Optional["ConjectureData"], /): 

1177 super().__init__(conjecturedata) 

1178 if WINDOWS: # pragma: no cover 

1179 warnings.warn( 

1180 "/dev/urandom is not available on windows. Falling back to " 

1181 'standard PRNG generation (equivalent to backend="hypothesis").', 

1182 HypothesisWarning, 

1183 stacklevel=1, 

1184 ) 

1185 # don't overwrite the HypothesisProvider self._random attribute in 

1186 # this case 

1187 else: 

1188 self._random = URandom()