1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import abc
12import contextlib
13import math
14import sys
15import warnings
16from collections.abc import Iterable
17from contextlib import AbstractContextManager, contextmanager
18from functools import cached_property
19from random import Random
20from sys import float_info
21from types import ModuleType
22from typing import (
23 TYPE_CHECKING,
24 Any,
25 ClassVar,
26 Literal,
27 Optional,
28 TypedDict,
29 TypeVar,
30 Union,
31)
32
33from sortedcontainers import SortedSet
34
35from hypothesis.errors import HypothesisWarning
36from hypothesis.internal.cache import LRUCache
37from hypothesis.internal.compat import WINDOWS, int_from_bytes
38from hypothesis.internal.conjecture.choice import (
39 ChoiceConstraintsT,
40 ChoiceT,
41 ChoiceTypeT,
42 FloatConstraints,
43 choice_constraints_key,
44 choice_permitted,
45)
46from hypothesis.internal.conjecture.floats import lex_to_float
47from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes
48from hypothesis.internal.conjecture.utils import (
49 INT_SIZES,
50 INT_SIZES_SAMPLER,
51 Sampler,
52 many,
53)
54from hypothesis.internal.constants_ast import (
55 Constants,
56 constants_from_module,
57 is_local_module_file,
58)
59from hypothesis.internal.floats import (
60 SIGNALING_NAN,
61 float_to_int,
62 make_float_clamper,
63 next_down,
64 next_up,
65)
66from hypothesis.internal.intervalsets import IntervalSet
67from hypothesis.internal.observability import InfoObservationType, TestCaseObservation
68
69if TYPE_CHECKING:
70 from typing import TypeAlias
71
72 from hypothesis.internal.conjecture.data import ConjectureData
73 from hypothesis.internal.constants_ast import ConstantT
74
75T = TypeVar("T")
76LifetimeT: "TypeAlias" = Literal["test_case", "test_function"]
77COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
78
79
80#: Registered Hypothesis backends. This is a dictionary where keys are the name
81#: to be used in |settings.backend|. The value of a key can be either:
82#:
83#: * A string corresponding to an importable absolute path of a
84#: |PrimitiveProvider| subclass
85#: * A |PrimitiveProvider| subclass (the class itself, not an instance of the
86#: class)
87#:
88#: Hypothesis will instantiate the corresponding |PrimitiveProvider| subclass
89#: when the backend is requested by a test's |settings.backend| value.
90#:
91#: For example, the default Hypothesis backend is registered as:
92#:
93#: .. code-block:: python
94#:
95#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS
96#:
97#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider"
98#: # or
99#: AVAILABLE_PROVIDERS["hypothesis"] = HypothesisProvider
100#:
101#: And can be used with:
102#:
103#: .. code-block:: python
104#:
105#: from hypothesis import given, settings, strategies as st
106#:
107#: @given(st.integers())
108#: @settings(backend="hypothesis")
109#: def f(n):
110#: pass
111#:
112#: Though, as ``backend="hypothesis"`` is the default setting, the above would
113#: typically not have any effect.
114#:
115#: For third-party backend authors, we strongly encourage ensuring that
116#: ``import hypothesis`` does not automatically import the expensive parts of
117#: your package, by:
118#:
119#: - setting a string path here, instead of a provider class
120#: - ensuring the registered hypothesis plugin path references a path which just
121#: sets AVAILABLE_PROVIDERS and does not import your package
122AVAILABLE_PROVIDERS: dict[str, Union[str, type["PrimitiveProvider"]]] = {
123 "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider",
124 "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider",
125}
126# cache the choice_permitted constants for a particular set of constraints.
127CacheKeyT: "TypeAlias" = tuple[ChoiceTypeT, tuple[Any, ...]]
128CacheValueT: "TypeAlias" = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]]
129CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024)
130
131_constant_floats = (
132 [
133 0.5,
134 1.1,
135 1.5,
136 1.9,
137 1.0 / 3,
138 10e6,
139 10e-6,
140 1.175494351e-38,
141 next_up(0.0),
142 float_info.min,
143 float_info.max,
144 3.402823466e38,
145 9007199254740992.0,
146 1 - 10e-6,
147 2 + 10e-6,
148 1.192092896e-07,
149 2.2204460492503131e-016,
150 ]
151 + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32
152 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64
153)
154_constant_floats.extend([-x for x in _constant_floats])
155assert all(isinstance(f, float) for f in _constant_floats)
156
157_constant_strings = {
158 # strings which can be interpreted as code / logic
159 "undefined",
160 "null",
161 "NULL",
162 "nil",
163 "NIL",
164 "true",
165 "false",
166 "True",
167 "False",
168 "TRUE",
169 "FALSE",
170 "None",
171 "none",
172 "if",
173 "then",
174 "else",
175 # strings which can be interpreted as a number
176 "0",
177 "1e100",
178 "0..0",
179 "0/0",
180 "1/0",
181 "+0.0",
182 "Infinity",
183 "-Infinity",
184 "Inf",
185 "INF",
186 "NaN",
187 "9" * 30,
188 # common ascii characters
189 ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~",
190 # common unicode characters
191 "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±",
192 # characters which increase in length when lowercased
193 "Ⱥ",
194 "Ⱦ",
195 # ligatures
196 "æœÆŒffʤʨß"
197 # emoticons
198 "(╯°□°)╯︵ ┻━┻)",
199 # emojis
200 "😍",
201 "🇺🇸",
202 # emoji modifiers
203 "🏻" # U+1F3FB Light Skin Tone,
204 "👍🏻", # 👍 followed by U+1F3FB
205 # RTL text
206 "الكل في المجمو عة",
207 # Ogham text, which contains the only character in the Space Separators
208 # unicode category (Zs) that isn't visually blank: . # noqa: RUF003
209 "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜",
210 # readable variations on text (bolt/italic/script)
211 "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠",
212 "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌",
213 "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈",
214 "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰",
215 "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘",
216 # upsidown text
217 "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥",
218 # reserved strings in windows
219 "NUL",
220 "COM1",
221 "LPT1",
222 # scunthorpe problem
223 "Scunthorpe",
224 # zalgo text
225 "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣",
226 #
227 # examples from https://faultlore.com/blah/text-hates-you/
228 "मनीष منش",
229 "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله",
230 "lorem لا بسم الله ipsum 你好1234你好",
231}
232
233
234# we don't actually care what order the constants are sorted in, just that the
235# ordering is deterministic.
236GLOBAL_CONSTANTS = Constants(
237 integers=SortedSet(),
238 floats=SortedSet(_constant_floats, key=float_to_int),
239 bytes=SortedSet(),
240 strings=SortedSet(_constant_strings),
241)
242
243_local_constants = Constants(
244 integers=SortedSet(),
245 floats=SortedSet(key=float_to_int),
246 bytes=SortedSet(),
247 strings=SortedSet(),
248)
249# modules that we've already seen and processed for local constants. These are
250# are all modules, not necessarily local ones. This lets us quickly see which
251# modules are new without an expensive path.resolve() or is_local_module_file
252# cache lookup.
253_seen_modules: set[ModuleType] = set()
254_sys_modules_len: Optional[int] = None
255
256
257def _get_local_constants() -> Constants:
258 global _sys_modules_len, _local_constants
259
260 if sys.platform == "emscripten": # pragma: no cover
261 # pyodide builds bundle the stdlib in a nonstandard location, like
262 # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of
263 # the stdlib as local code and slowing down on emscripten, instead return
264 # that nothing is local.
265 #
266 # pyodide may provide some way to distinguish stdlib/third-party/local
267 # code. I haven't looked into it. If they do, we should correctly implement
268 # ModuleLocation for pyodide instead of this.
269 return _local_constants
270
271 count_constants = len(_local_constants)
272 # We call this function once per HypothesisProvider instance, i.e. once per
273 # input, so it needs to be performant. The logic here is more complicated
274 # than necessary because of this.
275 #
276 # First, we check whether there are any new modules with a very cheap length
277 # check. This check can be fooled if a module is added while another module is
278 # removed, but the more correct check against tuple(sys.modules.keys()) is
279 # substantially more expensive. Such a new module would eventually be discovered
280 # if / when the length changes again in the future.
281 #
282 # If the length has changed, we find just modules we haven't seen before. Of
283 # those, we find the ones which correspond to local modules, and extract their
284 # constants.
285
286 # careful: store sys.modules length when we first check to avoid race conditions
287 # with other threads loading a module before we set _sys_modules_len.
288 if (sys_modules_len := len(sys.modules)) != _sys_modules_len:
289 # set(_seen_modules) shouldn't typically be required, but I have run into
290 # a "set changed size during iteration" error here when running
291 # test_provider_conformance_crosshair.
292 new_modules = set(sys.modules.values()) - set(_seen_modules)
293 # Repeated SortedSet unions are expensive. Do the initial unions on a
294 # set(), then do a one-time union with _local_constants after.
295 new_constants = Constants()
296 for module in new_modules:
297 if (
298 module_file := getattr(module, "__file__", None)
299 ) is not None and is_local_module_file(module_file):
300 new_constants |= constants_from_module(module)
301 _local_constants |= new_constants
302 _seen_modules.update(new_modules)
303 _sys_modules_len = sys_modules_len
304
305 # if we add any new constant, invalidate the constant cache for permitted values.
306 # A more efficient approach would be invalidating just the keys with this
307 # choice_type.
308 if len(_local_constants) > count_constants:
309 CONSTANTS_CACHE.cache.clear()
310
311 return _local_constants
312
313
314@contextmanager
315def with_register_backend(name, provider_cls):
316 try:
317 AVAILABLE_PROVIDERS[name] = provider_cls
318 yield
319 finally:
320 del AVAILABLE_PROVIDERS[name]
321
322
323class _BackendInfoMsg(TypedDict):
324 type: InfoObservationType
325 title: str
326 content: Union[str, dict[str, Any]]
327
328
329# TODO_DOCS: link to choice sequence explanation page
330
331
332class PrimitiveProvider(abc.ABC):
333 """
334 |PrimitiveProvider| is the implementation interface of a
335 :ref:`Hypothesis backend <alternative-backends>`.
336
337 A |PrimitiveProvider| is required to implement the following five
338 ``draw_*`` methods:
339
340 * |PrimitiveProvider.draw_integer|
341 * |PrimitiveProvider.draw_boolean|
342 * |PrimitiveProvider.draw_float|
343 * |PrimitiveProvider.draw_string|
344 * |PrimitiveProvider.draw_bytes|
345
346 Each strategy in Hypothesis generates values by drawing a series of choices
347 from these five methods. By overriding them, a |PrimitiveProvider| can control
348 the distribution of inputs generated by Hypothesis.
349
350 For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider|
351 which uses an SMT solver to generate inputs that uncover new branches.
352
353 Once you implement a |PrimitiveProvider|, you can make it available for use
354 through |AVAILABLE_PROVIDERS|.
355 """
356
357 #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function``
358 #: or ``test_case``.
359 #:
360 #: If ``test_function`` (the default), a single provider instance will be
361 #: instantiated and used for the entirety of each test function (i.e., roughly
362 #: one provider per |@given| annotation). This can be useful for tracking state
363 #: over the entirety of a test function.
364 #:
365 #: If ``test_case``, a new provider instance will be instantiated and used for
366 #: each input Hypothesis generates.
367 #:
368 #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will
369 #: be ``None`` for a lifetime of ``test_function``, and an instance of
370 #: ``ConjectureData`` for a lifetime of ``test_case``.
371 #:
372 #: Third-party providers likely want to set a lifetime of ``test_function``.
373 lifetime: ClassVar[LifetimeT] = "test_function"
374
375 #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values
376 #: which record operations performed on them in order to discover new paths.
377 #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting
378 #: with symbolic choices returned by the provider in any way that would force
379 #: the solver to narrow the range of possible values for that symbolic.
380 #:
381 #: Setting this to ``True`` disables some hypothesis features and optimizations.
382 #: Only set this to ``True`` if it is necessary for your backend.
383 avoid_realization: ClassVar[bool] = False
384
385 #: If ``True``, |PrimitiveProvider.on_observation| will be added as a
386 #: callback via |add_observability_callback|, enabling observability during
387 # the lifetime of this provider. If ``False``, |PrimitiveProvider.on_observation|
388 #: will never be called by Hypothesis.
389 #:
390 #: The opt-in behavior of observability is because enabling observability
391 #: might increase runtime or memory usage.
392 add_observability_callback: ClassVar[bool] = False
393
394 def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
395 self._cd = conjecturedata
396
397 @abc.abstractmethod
398 def draw_boolean(
399 self,
400 p: float = 0.5,
401 ) -> bool:
402 """
403 Draw a boolean choice.
404
405 Parameters
406 ----------
407 p: float
408 The probability of returning ``True``. Between 0 and 1 inclusive.
409
410 Except for ``0`` and ``1``, the value of ``p`` is a hint provided by
411 Hypothesis, and may be ignored by the backend.
412
413 If ``0``, the provider must return ``False``. If ``1``, the provider
414 must return ``True``.
415 """
416 raise NotImplementedError
417
418 @abc.abstractmethod
419 def draw_integer(
420 self,
421 min_value: Optional[int] = None,
422 max_value: Optional[int] = None,
423 *,
424 weights: Optional[dict[int, float]] = None,
425 shrink_towards: int = 0,
426 ) -> int:
427 """
428 Draw an integer choice.
429
430 Parameters
431 ----------
432 min_value : int | None
433 (Inclusive) lower bound on the integer value. If ``None``, there is
434 no lower bound.
435 max_value : int | None
436 (Inclusive) upper bound on the integer value. If ``None``, there is
437 no upper bound.
438 weights: dict[int, float] | None
439 Maps keys in the range [``min_value``, ``max_value``] to the probability
440 of returning that key.
441 shrink_towards: int
442 The integer to shrink towards. This is not used during generation and
443 can be ignored by backends.
444 """
445 raise NotImplementedError
446
447 @abc.abstractmethod
448 def draw_float(
449 self,
450 *,
451 min_value: float = -math.inf,
452 max_value: float = math.inf,
453 allow_nan: bool = True,
454 smallest_nonzero_magnitude: float,
455 ) -> float:
456 """
457 Draw a float choice.
458
459 Parameters
460 ----------
461 min_value : float
462 (Inclusive) lower bound on the float value.
463 max_value : float
464 (Inclusive) upper bound on the float value.
465 allow_nan : bool
466 If ``False``, it is invalid to return ``math.nan``.
467 smallest_nonzero_magnitude : float
468 The smallest allowed nonzero magnitude. ``draw_float`` should not
469 return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``.
470 """
471 raise NotImplementedError
472
473 @abc.abstractmethod
474 def draw_string(
475 self,
476 intervals: IntervalSet,
477 *,
478 min_size: int = 0,
479 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
480 ) -> str:
481 """
482 Draw a string choice.
483
484 Parameters
485 ----------
486 intervals : IntervalSet
487 The set of codepoints to sample from.
488 min_size : int
489 (Inclusive) lower bound on the string length.
490 max_size : int
491 (Inclusive) upper bound on the string length.
492 """
493 raise NotImplementedError
494
495 @abc.abstractmethod
496 def draw_bytes(
497 self,
498 min_size: int = 0,
499 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
500 ) -> bytes:
501 """
502 Draw a bytes choice.
503
504 Parameters
505 ----------
506 min_size : int
507 (Inclusive) lower bound on the bytes length.
508 max_size : int
509 (Inclusive) upper bound on the bytes length.
510 """
511 raise NotImplementedError
512
513 def per_test_case_context_manager(self) -> AbstractContextManager:
514 """
515 Returns a context manager which will be entered each time Hypothesis
516 starts generating and executing one test case, and exited when that test
517 case finishes generating and executing, including if any exception is
518 thrown.
519
520 In the lifecycle of a Hypothesis test, this is called before
521 generating strategy values for each test case. This is just before any
522 :ref:`custom executor <custom-function-execution>` is called.
523
524 Even if not returning a custom context manager, |PrimitiveProvider|
525 subclasses are welcome to override this method to know when Hypothesis
526 starts and ends the execution of a single test case.
527 """
528 return contextlib.nullcontext()
529
530 def realize(self, value: T, *, for_failure: bool = False) -> T:
531 """
532 Called whenever hypothesis requires a concrete (non-symbolic) value from
533 a potentially symbolic value. Hypothesis will not check that ``value`` is
534 symbolic before calling ``realize``, so you should handle the case where
535 ``value`` is non-symbolic.
536
537 The returned value should be non-symbolic. If you cannot provide a value,
538 raise |BackendCannotProceed| with a value of ``"discard_test_case"``.
539
540 If ``for_failure`` is ``True``, the value is associated with a failing example.
541 In this case, the backend should spend substantially more effort when
542 attempting to realize the value, since it is important to avoid discarding
543 failing examples. Backends may still raise |BackendCannotProceed| when
544 ``for_failure`` is ``True``, if realization is truly impossible or if
545 realization takes significantly longer than expected (say, 5 minutes).
546 """
547 return value
548
549 def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None:
550 """
551 Called when Hypothesis has discovered a choice sequence which the provider
552 may wish to enqueue to replay under its own instrumentation when we next
553 ask to generate a test case, rather than generating one from scratch.
554
555 This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus
556 of high-code-coverage inputs discovered by
557 `HypoFuzz <https://hypofuzz.com/>`_.
558 """
559 return None
560
561 def observe_test_case(self) -> dict[str, Any]:
562 """Called at the end of the test case when :ref:`observability
563 <observability>` is enabled.
564
565 The return value should be a non-symbolic json-encodable dictionary,
566 and will be included in observations as ``observation["metadata"]["backend"]``.
567 """
568 return {}
569
570 def observe_information_messages(
571 self, *, lifetime: LifetimeT
572 ) -> Iterable[_BackendInfoMsg]:
573 """Called at the end of each test case and again at end of the test function.
574
575 Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}``
576 dictionaries to be delivered as individual information messages. Hypothesis
577 adds the ``run_start`` timestamp and ``property`` name for you.
578 """
579 assert lifetime in ("test_case", "test_function")
580 yield from []
581
582 def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027
583 """
584 Called at the end of each test case which uses this provider, with the same
585 ``observation["type"] == "test_case"`` observation that is passed to
586 other callbacks added via |add_observability_callback|. This method is not
587 called with ``observation["type"] in {"info", "alert", "error"}``
588 observations.
589
590 .. important::
591
592 For |PrimitiveProvider.on_observation| to be called by Hypothesis,
593 |PrimitiveProvider.add_observability_callback| must be set to ``True``.
594
595 |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling
596 observability might increase runtime or memory usage.
597
598 Calls to this method are guaranteed to alternate with calls to
599 |PrimitiveProvider.per_test_case_context_manager|. For example:
600
601 .. code-block:: python
602
603 # test function starts
604 per_test_case_context_manager()
605 on_observation()
606 per_test_case_context_manager()
607 on_observation()
608 ...
609 # test function ends
610
611 Note that |PrimitiveProvider.on_observation| will not be called for test
612 cases which did not use this provider during generation, for example
613 during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched
614 to the standard Hypothesis backend after this backend raised too many
615 |BackendCannotProceed| exceptions.
616 """
617
618 def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop
619 """Marks the beginning of a semantically meaningful span of choices.
620
621 Spans are a depth-first tree structure. A span is opened by a call to
622 |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end|
623 closes the most recently opened span. So the following sequence of calls:
624
625 .. code-block:: python
626
627 span_start(label=1)
628 n1 = draw_integer()
629 span_start(label=2)
630 b1 = draw_boolean()
631 n2 = draw_integer()
632 span_end()
633 f1 = draw_float()
634 span_end()
635
636 produces the following two spans of choices:
637
638 .. code-block::
639
640 1: [n1, b1, n2, f1]
641 2: [b1, n2]
642
643 Hypothesis uses spans to denote "semantically meaningful" sequences of
644 choices. For instance, Hypothesis opens a span for the sequence of choices
645 made while drawing from each strategy. Not every span corresponds to a
646 strategy; the generation of e.g. each element in |st.lists| is also marked
647 with a span, among others.
648
649 ``label`` is an opaque integer, which has no defined semantics.
650 The only guarantee made by Hypothesis is that all spans with the same
651 "meaning" will share the same ``label``. So all spans from the same
652 strategy will share the same label, as will e.g. the spans for |st.lists|
653 elements.
654
655 Providers can track calls to |PrimitiveProvider.span_start| and
656 |PrimitiveProvider.span_end| to learn something about the semantics of
657 the test's choice sequence. For instance, a provider could track the depth
658 of the span tree, or the number of unique labels, which says something about
659 the complexity of the choices being generated. Or a provider could track
660 the span tree across test cases in order to determine what strategies are
661 being used in what contexts.
662
663 It is possible for Hypothesis to start and immediately stop a span,
664 without calling a ``draw_*`` method in between. These spans contain zero
665 choices.
666
667 Hypothesis will always balance the number of calls to
668 |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call
669 to |PrimitiveProvider.span_start| will always be followed by a call to
670 |PrimitiveProvider.span_end| before the end of the test case.
671
672 |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()``
673 internally.
674 """
675
676 def span_end(self, discard: bool, /) -> None: # noqa: B027
677 """Marks the end of a semantically meaningful span of choices.
678
679 ``discard`` is ``True`` when the draw was filtered out or otherwise marked
680 as unlikely to contribute to the input data as seen by the user's test.
681 Note however that side effects can make this determination unsound.
682
683 |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()``
684 internally.
685 """
686
687
688class HypothesisProvider(PrimitiveProvider):
689 lifetime = "test_case"
690
691 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
692 super().__init__(conjecturedata)
693 self._random = None if self._cd is None else self._cd._random
694
695 @cached_property
696 def _local_constants(self):
697 # defer computation of local constants until/if we need it
698 return _get_local_constants()
699
700 def _maybe_draw_constant(
701 self,
702 choice_type: ChoiceTypeT,
703 constraints: ChoiceConstraintsT,
704 *,
705 p: float = 0.05,
706 ) -> Optional["ConstantT"]:
707 assert self._random is not None
708 assert choice_type != "boolean"
709 # check whether we even want a constant before spending time computing
710 # and caching the allowed constants.
711 if self._random.random() > p:
712 return None
713
714 # note: this property access results in computation being done
715 assert self._local_constants is not None
716
717 key = (choice_type, choice_constraints_key(choice_type, constraints))
718 if key not in CONSTANTS_CACHE:
719 CONSTANTS_CACHE[key] = (
720 tuple(
721 choice
722 for choice in GLOBAL_CONSTANTS.set_for_type(choice_type)
723 if choice_permitted(choice, constraints)
724 ),
725 tuple(
726 choice
727 for choice in self._local_constants.set_for_type(choice_type)
728 if choice_permitted(choice, constraints)
729 ),
730 )
731
732 # split constants into two pools, so we still have a good chance to draw
733 # global constants even if there are many local constants.
734 (global_constants, local_constants) = CONSTANTS_CACHE[key]
735 constants_lists = ([global_constants] if global_constants else []) + (
736 [local_constants] if local_constants else []
737 )
738 if not constants_lists:
739 return None
740
741 # At this point, we've decided to use a constant. Now we select which pool
742 # to draw that constant from.
743 #
744 # Note that this approach has a different probability distribution than
745 # attempting a random.random for both global_constants and local_constants.
746 constants = self._random.choice(constants_lists)
747 return self._random.choice(constants)
748
749 def draw_boolean(
750 self,
751 p: float = 0.5,
752 ) -> bool:
753 assert self._random is not None
754
755 if p <= 0:
756 return False
757 if p >= 1:
758 return True
759
760 return self._random.random() < p
761
762 def draw_integer(
763 self,
764 min_value: Optional[int] = None,
765 max_value: Optional[int] = None,
766 *,
767 weights: Optional[dict[int, float]] = None,
768 shrink_towards: int = 0,
769 ) -> int:
770 assert self._cd is not None
771 if (
772 constant := self._maybe_draw_constant(
773 "integer",
774 {
775 "min_value": min_value,
776 "max_value": max_value,
777 "weights": weights,
778 "shrink_towards": shrink_towards,
779 },
780 )
781 ) is not None:
782 assert isinstance(constant, int)
783 return constant
784
785 center = 0
786 if min_value is not None:
787 center = max(min_value, center)
788 if max_value is not None:
789 center = min(max_value, center)
790
791 if weights is not None:
792 assert min_value is not None
793 assert max_value is not None
794
795 # format of weights is a mapping of ints to p, where sum(p) < 1.
796 # The remaining probability mass is uniformly distributed over
797 # *all* ints (not just the unmapped ones; this is somewhat undesirable,
798 # but simplifies things).
799 #
800 # We assert that sum(p) is strictly less than 1 because it simplifies
801 # handling forced values when we can force into the unmapped probability
802 # mass. We should eventually remove this restriction.
803 sampler = Sampler(
804 [1 - sum(weights.values()), *weights.values()], observe=False
805 )
806 # if we're forcing, it's easiest to force into the unmapped probability
807 # mass and then force the drawn value after.
808 idx = sampler.sample(self._cd)
809
810 if idx == 0:
811 return self._draw_bounded_integer(min_value, max_value)
812 # implicit reliance on dicts being sorted for determinism
813 return list(weights)[idx - 1]
814
815 if min_value is None and max_value is None:
816 return self._draw_unbounded_integer()
817
818 if min_value is None:
819 assert max_value is not None
820 probe = max_value + 1
821 while max_value < probe:
822 probe = center + self._draw_unbounded_integer()
823 return probe
824
825 if max_value is None:
826 assert min_value is not None
827 probe = min_value - 1
828 while probe < min_value:
829 probe = center + self._draw_unbounded_integer()
830 return probe
831
832 return self._draw_bounded_integer(min_value, max_value)
833
834 def draw_float(
835 self,
836 *,
837 min_value: float = -math.inf,
838 max_value: float = math.inf,
839 allow_nan: bool = True,
840 smallest_nonzero_magnitude: float,
841 ) -> float:
842 assert self._random is not None
843
844 constraints: FloatConstraints = {
845 "min_value": min_value,
846 "max_value": max_value,
847 "allow_nan": allow_nan,
848 "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
849 }
850 if (
851 constant := self._maybe_draw_constant("float", constraints, p=0.15)
852 ) is not None:
853 assert isinstance(constant, float)
854 return constant
855
856 # on top of the probability to draw a constant float, we independently
857 # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values.
858 weird_floats = [
859 f
860 for f in [
861 0.0,
862 -0.0,
863 math.inf,
864 -math.inf,
865 math.nan,
866 -math.nan,
867 SIGNALING_NAN,
868 -SIGNALING_NAN,
869 min_value,
870 next_up(min_value),
871 min_value + 1,
872 max_value - 1,
873 next_down(max_value),
874 max_value,
875 ]
876 if choice_permitted(f, constraints)
877 ]
878
879 if weird_floats and self._random.random() < 0.05:
880 return self._random.choice(weird_floats)
881
882 clamper = make_float_clamper(
883 min_value,
884 max_value,
885 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
886 allow_nan=allow_nan,
887 )
888
889 result = self._draw_float()
890 if allow_nan and math.isnan(result):
891 clamped = result # pragma: no cover
892 else:
893 clamped = clamper(result)
894 if float_to_int(clamped) != float_to_int(result) and not (
895 math.isnan(result) and allow_nan
896 ):
897 result = clamped
898 return result
899
900 def draw_string(
901 self,
902 intervals: IntervalSet,
903 *,
904 min_size: int = 0,
905 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
906 ) -> str:
907 assert self._cd is not None
908 assert self._random is not None
909
910 if len(intervals) == 0:
911 return ""
912
913 if (
914 constant := self._maybe_draw_constant(
915 "string",
916 {"intervals": intervals, "min_size": min_size, "max_size": max_size},
917 )
918 ) is not None:
919 assert isinstance(constant, str)
920 return constant
921
922 average_size = min(
923 max(min_size * 2, min_size + 5),
924 0.5 * (min_size + max_size),
925 )
926
927 chars = []
928 elements = many(
929 self._cd,
930 min_size=min_size,
931 max_size=max_size,
932 average_size=average_size,
933 observe=False,
934 )
935 while elements.more():
936 if len(intervals) > 256:
937 if self.draw_boolean(0.2):
938 i = self._random.randint(256, len(intervals) - 1)
939 else:
940 i = self._random.randint(0, 255)
941 else:
942 i = self._random.randint(0, len(intervals) - 1)
943
944 chars.append(intervals.char_in_shrink_order(i))
945
946 return "".join(chars)
947
948 def draw_bytes(
949 self,
950 min_size: int = 0,
951 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
952 ) -> bytes:
953 assert self._cd is not None
954 assert self._random is not None
955
956 if (
957 constant := self._maybe_draw_constant(
958 "bytes", {"min_size": min_size, "max_size": max_size}
959 )
960 ) is not None:
961 assert isinstance(constant, bytes)
962 return constant
963
964 buf = bytearray()
965 average_size = min(
966 max(min_size * 2, min_size + 5),
967 0.5 * (min_size + max_size),
968 )
969 elements = many(
970 self._cd,
971 min_size=min_size,
972 max_size=max_size,
973 average_size=average_size,
974 observe=False,
975 )
976 while elements.more():
977 buf += self._random.randbytes(1)
978
979 return bytes(buf)
980
981 def _draw_float(self) -> float:
982 assert self._random is not None
983
984 f = lex_to_float(self._random.getrandbits(64))
985 sign = 1 if self._random.getrandbits(1) else -1
986 return sign * f
987
988 def _draw_unbounded_integer(self) -> int:
989 assert self._cd is not None
990 assert self._random is not None
991
992 size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)]
993
994 r = self._random.getrandbits(size)
995 sign = r & 1
996 r >>= 1
997 if sign:
998 r = -r
999 return r
1000
1001 def _draw_bounded_integer(
1002 self,
1003 lower: int,
1004 upper: int,
1005 *,
1006 vary_size: bool = True,
1007 ) -> int:
1008 assert lower <= upper
1009 assert self._cd is not None
1010 assert self._random is not None
1011
1012 if lower == upper:
1013 return lower
1014
1015 bits = (upper - lower).bit_length()
1016 if bits > 24 and vary_size and self._random.random() < 7 / 8:
1017 # For large ranges, we combine the uniform random distribution
1018 # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our
1019 # choice of unicode characters is uniform but the 32bit distribution is not.
1020 idx = INT_SIZES_SAMPLER.sample(self._cd)
1021 cap_bits = min(bits, INT_SIZES[idx])
1022 upper = min(upper, lower + 2**cap_bits - 1)
1023 return self._random.randint(lower, upper)
1024
1025 return self._random.randint(lower, upper)
1026
1027
1028# Masks for masking off the first byte of an n-bit buffer.
1029# The appropriate mask is stored at position n % 8.
1030BYTE_MASKS = [(1 << n) - 1 for n in range(8)]
1031BYTE_MASKS[0] = 255
1032
1033
1034class BytestringProvider(PrimitiveProvider):
1035 lifetime = "test_case"
1036
1037 def __init__(
1038 self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes
1039 ):
1040 super().__init__(conjecturedata)
1041 self.bytestring = bytestring
1042 self.index = 0
1043 self.drawn = bytearray()
1044
1045 def _draw_bits(self, n):
1046 if n == 0: # pragma: no cover
1047 return 0
1048 n_bytes = bits_to_bytes(n)
1049 if self.index + n_bytes > len(self.bytestring):
1050 self._cd.mark_overrun()
1051 buf = bytearray(self.bytestring[self.index : self.index + n_bytes])
1052 self.index += n_bytes
1053
1054 buf[0] &= BYTE_MASKS[n % 8]
1055 buf = bytes(buf)
1056 self.drawn += buf
1057 return int_from_bytes(buf)
1058
1059 def draw_boolean(
1060 self,
1061 p: float = 0.5,
1062 ) -> bool:
1063 if p <= 0:
1064 return False
1065 if p >= 1:
1066 return True
1067
1068 # always use one byte for booleans to maintain constant draw size.
1069 # If a probability requires more than 8 bits to represent precisely,
1070 # the result will be slightly biased, but not badly.
1071 bits = 8
1072 size = 2**bits
1073 # always leave at least one value that can be true, even for very small
1074 # p.
1075 falsey = max(1, math.floor(size * (1 - p)))
1076 n = self._draw_bits(bits)
1077 return n >= falsey
1078
1079 def draw_integer(
1080 self,
1081 min_value: Optional[int] = None,
1082 max_value: Optional[int] = None,
1083 *,
1084 weights: Optional[dict[int, float]] = None,
1085 shrink_towards: int = 0,
1086 ) -> int:
1087 assert self._cd is not None
1088
1089 # we explicitly ignore integer weights for now, as they are likely net
1090 # negative on fuzzer performance.
1091
1092 if min_value is None and max_value is None:
1093 min_value = -(2**127)
1094 max_value = 2**127 - 1
1095 elif min_value is None:
1096 assert max_value is not None
1097 min_value = max_value - 2**64
1098 elif max_value is None:
1099 assert min_value is not None
1100 max_value = min_value + 2**64
1101
1102 if min_value == max_value:
1103 return min_value
1104
1105 bits = (max_value - min_value).bit_length()
1106 value = self._draw_bits(bits)
1107 while not (min_value <= value <= max_value):
1108 value = self._draw_bits(bits)
1109 return value
1110
1111 def draw_float(
1112 self,
1113 *,
1114 min_value: float = -math.inf,
1115 max_value: float = math.inf,
1116 allow_nan: bool = True,
1117 smallest_nonzero_magnitude: float,
1118 ) -> float:
1119 n = self._draw_bits(64)
1120 sign = -1 if n >> 64 else 1
1121 f = sign * lex_to_float(n & ((1 << 64) - 1))
1122 clamper = make_float_clamper(
1123 min_value,
1124 max_value,
1125 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
1126 allow_nan=allow_nan,
1127 )
1128 return clamper(f)
1129
1130 def _draw_collection(self, min_size, max_size, *, alphabet_size):
1131 average_size = min(
1132 max(min_size * 2, min_size + 5),
1133 0.5 * (min_size + max_size),
1134 )
1135 elements = many(
1136 self._cd,
1137 min_size=min_size,
1138 max_size=max_size,
1139 average_size=average_size,
1140 observe=False,
1141 )
1142 values = []
1143 while elements.more():
1144 values.append(self.draw_integer(0, alphabet_size - 1))
1145 return values
1146
1147 def draw_string(
1148 self,
1149 intervals: IntervalSet,
1150 *,
1151 min_size: int = 0,
1152 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1153 ) -> str:
1154 values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals))
1155 return "".join(chr(intervals[v]) for v in values)
1156
1157 def draw_bytes(
1158 self,
1159 min_size: int = 0,
1160 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1161 ) -> bytes:
1162 values = self._draw_collection(min_size, max_size, alphabet_size=2**8)
1163 return bytes(values)
1164
1165
1166class URandom(Random):
1167 # we reimplement a Random instance instead of using SystemRandom, because
1168 # os.urandom is not guaranteed to read from /dev/urandom.
1169
1170 @staticmethod
1171 def _urandom(size: int) -> bytes:
1172 with open("/dev/urandom", "rb") as f:
1173 return f.read(size)
1174
1175 def getrandbits(self, k: int) -> int:
1176 assert k >= 0
1177 size = bits_to_bytes(k)
1178 n = int_from_bytes(self._urandom(size))
1179 # trim excess bits
1180 return n >> (size * 8 - k)
1181
1182 def random(self) -> float:
1183 # adapted from random.SystemRandom.random
1184 return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53)
1185
1186
1187class URandomProvider(HypothesisProvider):
1188 # A provider which reads directly from /dev/urandom as its source of randomness.
1189 # This provider exists to provide better Hypothesis integration with Antithesis
1190 # (https://antithesis.com/), which interprets calls to /dev/urandom as the
1191 # randomness to mutate. This effectively gives Antithesis control over
1192 # the choices made by the URandomProvider.
1193 #
1194 # If you are not using Antithesis, you probably don't want to use this
1195 # provider.
1196
1197 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
1198 super().__init__(conjecturedata)
1199 if WINDOWS: # pragma: no cover
1200 warnings.warn(
1201 "/dev/urandom is not available on windows. Falling back to "
1202 'standard PRNG generation (equivalent to backend="hypothesis").',
1203 HypothesisWarning,
1204 stacklevel=1,
1205 )
1206 # don't overwrite the HypothesisProvider self._random attribute in
1207 # this case
1208 else:
1209 self._random = URandom()