1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import abc
12import contextlib
13import math
14import sys
15import warnings
16from collections.abc import Iterable
17from contextlib import AbstractContextManager, contextmanager
18from functools import cached_property
19from random import Random
20from sys import float_info
21from types import ModuleType
22from typing import (
23 TYPE_CHECKING,
24 Any,
25 ClassVar,
26 Literal,
27 Optional,
28 TypeAlias,
29 TypedDict,
30 TypeVar,
31)
32
33from sortedcontainers import SortedSet
34
35from hypothesis.errors import HypothesisWarning
36from hypothesis.internal.cache import LRUCache
37from hypothesis.internal.compat import WINDOWS, int_from_bytes
38from hypothesis.internal.conjecture.choice import (
39 ChoiceConstraintsT,
40 ChoiceT,
41 ChoiceTypeT,
42 FloatConstraints,
43 choice_constraints_key,
44 choice_permitted,
45)
46from hypothesis.internal.conjecture.floats import lex_to_float
47from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes
48from hypothesis.internal.conjecture.utils import (
49 INT_SIZES,
50 INT_SIZES_SAMPLER,
51 Sampler,
52 many,
53)
54from hypothesis.internal.constants_ast import (
55 Constants,
56 constants_from_module,
57 is_local_module_file,
58)
59from hypothesis.internal.floats import (
60 SIGNALING_NAN,
61 float_to_int,
62 make_float_clamper,
63 next_down,
64 next_up,
65)
66from hypothesis.internal.intervalsets import IntervalSet
67from hypothesis.internal.observability import InfoObservationType, TestCaseObservation
68
69if TYPE_CHECKING:
70 from hypothesis.internal.conjecture.data import ConjectureData
71 from hypothesis.internal.constants_ast import ConstantT
72
73T = TypeVar("T")
74LifetimeT: TypeAlias = Literal["test_case", "test_function"]
75COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
76
77
78#: Registered Hypothesis backends. This is a dictionary where keys are the name
79#: to be used in |settings.backend|. The value of a key can be either:
80#:
81#: * A string corresponding to an importable absolute path of a
82#: |PrimitiveProvider| subclass
83#: * A |PrimitiveProvider| subclass (the class itself, not an instance of the
84#: class)
85#:
86#: Hypothesis will instantiate the corresponding |PrimitiveProvider| subclass
87#: when the backend is requested by a test's |settings.backend| value.
88#:
89#: For example, the default Hypothesis backend is registered as:
90#:
91#: .. code-block:: python
92#:
93#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS
94#:
95#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider"
96#: # or
97#: AVAILABLE_PROVIDERS["hypothesis"] = HypothesisProvider
98#:
99#: And can be used with:
100#:
101#: .. code-block:: python
102#:
103#: from hypothesis import given, settings, strategies as st
104#:
105#: @given(st.integers())
106#: @settings(backend="hypothesis")
107#: def f(n):
108#: pass
109#:
110#: Though, as ``backend="hypothesis"`` is the default setting, the above would
111#: typically not have any effect.
112#:
113#: For third-party backend authors, we strongly encourage ensuring that
114#: ``import hypothesis`` does not automatically import the expensive parts of
115#: your package, by:
116#:
117#: - setting a string path here, instead of a provider class
118#: - ensuring the registered hypothesis plugin path references a path which just
119#: sets AVAILABLE_PROVIDERS and does not import your package
120AVAILABLE_PROVIDERS: dict[str, str | type["PrimitiveProvider"]] = {
121 "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider",
122 "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider",
123}
124# cache the choice_permitted constants for a particular set of constraints.
125CacheKeyT: TypeAlias = tuple[ChoiceTypeT, tuple[Any, ...]]
126CacheValueT: TypeAlias = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]]
127CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024)
128
129
130_constants_integers = (
131 # powers of 2
132 [2**n for n in range(16, 66)]
133 # powers of 10
134 + [10**n for n in range(5, 20)]
135 # factorials
136 + [math.factorial(n) for n in range(9, 21)]
137 # a few primorial numbers https://en.wikipedia.org/wiki/Primorial
138 + [
139 510510,
140 6469693230,
141 304250263527210,
142 32589158477190044730,
143 ]
144)
145_constants_integers.extend(
146 [n - 1 for n in _constants_integers] + [n + 1 for n in _constants_integers]
147)
148_constants_integers.extend([-x for x in _constants_integers])
149
150# arbitrary cutoffs to keep our list bounded
151assert all(50_000 <= abs(n) <= 2**66 for n in _constants_integers)
152
153_constant_floats = (
154 [
155 0.5,
156 1.1,
157 1.5,
158 1.9,
159 1.0 / 3,
160 10e6,
161 10e-6,
162 1.175494351e-38,
163 next_up(0.0),
164 float_info.min,
165 float_info.max,
166 3.402823466e38,
167 9007199254740992.0,
168 1 - 10e-6,
169 2 + 10e-6,
170 1.192092896e-07,
171 2.2204460492503131e-016,
172 ]
173 + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32
174 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64
175)
176_constant_floats.extend([-x for x in _constant_floats])
177assert all(isinstance(f, float) for f in _constant_floats)
178
179_constant_strings = {
180 # strings which can be interpreted as code / logic
181 "undefined",
182 "null",
183 "NULL",
184 "nil",
185 "NIL",
186 "true",
187 "false",
188 "True",
189 "False",
190 "TRUE",
191 "FALSE",
192 "None",
193 "none",
194 "if",
195 "then",
196 "else",
197 "__dict__",
198 "__proto__", # javascript
199 # strings which can be interpreted as a number
200 "0",
201 "1e100",
202 "0..0",
203 "0/0",
204 "1/0",
205 "+0.0",
206 "Infinity",
207 "-Infinity",
208 "Inf",
209 "INF",
210 "NaN",
211 "9" * 30,
212 # common ascii characters
213 ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~",
214 # common unicode characters
215 "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±",
216 # characters which increase in length when lowercased
217 "Ⱥ",
218 "Ⱦ",
219 # ligatures
220 "æœÆŒffʤʨß",
221 # emoticons
222 "(╯°□°)╯︵ ┻━┻)",
223 # emojis
224 "😍",
225 "🇺🇸",
226 # emoji modifiers
227 "🏻", # U+1F3FB Light Skin Tone,
228 "👍🏻", # 👍 followed by U+1F3FB
229 # RTL text
230 "الكل في المجمو عة",
231 # Ogham text, which contains the only character in the Space Separators
232 # unicode category (Zs) that isn't visually blank: . # noqa: RUF003
233 "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜",
234 # readable variations on text (bolt/italic/script)
235 "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠",
236 "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌",
237 "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈",
238 "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰",
239 "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘",
240 # upsidown text
241 "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥",
242 # reserved strings in windows
243 "NUL",
244 "COM1",
245 "LPT1",
246 # scunthorpe problem
247 "Scunthorpe",
248 # zalgo text
249 "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣",
250 #
251 # examples from https://faultlore.com/blah/text-hates-you/
252 "मनीष منش",
253 "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله",
254 "lorem لا بسم الله ipsum 你好1234你好",
255}
256
257
258# we don't actually care what order the constants are sorted in, just that the
259# ordering is deterministic.
260GLOBAL_CONSTANTS = Constants(
261 integers=SortedSet(_constants_integers),
262 floats=SortedSet(_constant_floats, key=float_to_int),
263 bytes=SortedSet(),
264 strings=SortedSet(_constant_strings),
265)
266
267_local_constants = Constants(
268 integers=SortedSet(),
269 floats=SortedSet(key=float_to_int),
270 bytes=SortedSet(),
271 strings=SortedSet(),
272)
273# modules that we've already seen and processed for local constants. These are
274# are all modules, not necessarily local ones. This lets us quickly see which
275# modules are new without an expensive path.resolve() or is_local_module_file
276# cache lookup.
277_seen_modules: set[ModuleType] = set()
278_sys_modules_len: int | None = None
279
280
281def _get_local_constants() -> Constants:
282 global _sys_modules_len, _local_constants
283
284 if sys.platform == "emscripten": # pragma: no cover
285 # pyodide builds bundle the stdlib in a nonstandard location, like
286 # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of
287 # the stdlib as local code and slowing down on emscripten, instead return
288 # that nothing is local.
289 #
290 # pyodide may provide some way to distinguish stdlib/third-party/local
291 # code. I haven't looked into it. If they do, we should correctly implement
292 # ModuleLocation for pyodide instead of this.
293 return _local_constants
294
295 count_constants = len(_local_constants)
296 # We call this function once per HypothesisProvider instance, i.e. once per
297 # input, so it needs to be performant. The logic here is more complicated
298 # than necessary because of this.
299 #
300 # First, we check whether there are any new modules with a very cheap length
301 # check. This check can be fooled if a module is added while another module is
302 # removed, but the more correct check against tuple(sys.modules.keys()) is
303 # substantially more expensive. Such a new module would eventually be discovered
304 # if / when the length changes again in the future.
305 #
306 # If the length has changed, we find just modules we haven't seen before. Of
307 # those, we find the ones which correspond to local modules, and extract their
308 # constants.
309
310 # careful: store sys.modules length when we first check to avoid race conditions
311 # with other threads loading a module before we set _sys_modules_len.
312 if (sys_modules_len := len(sys.modules)) != _sys_modules_len:
313 # set(_seen_modules) shouldn't typically be required, but I have run into
314 # a "set changed size during iteration" error here when running
315 # test_provider_conformance_crosshair.
316 new_modules = set(sys.modules.values()) - set(_seen_modules)
317 # Repeated SortedSet unions are expensive. Do the initial unions on a
318 # set(), then do a one-time union with _local_constants after.
319 new_constants = Constants()
320 for module in new_modules:
321 if (
322 module_file := getattr(module, "__file__", None)
323 ) is not None and is_local_module_file(module_file):
324 new_constants |= constants_from_module(module)
325 _local_constants |= new_constants
326 _seen_modules.update(new_modules)
327 _sys_modules_len = sys_modules_len
328
329 # if we add any new constant, invalidate the constant cache for permitted values.
330 # A more efficient approach would be invalidating just the keys with this
331 # choice_type.
332 if len(_local_constants) > count_constants:
333 CONSTANTS_CACHE.cache.clear()
334
335 return _local_constants
336
337
338@contextmanager
339def with_register_backend(name, provider_cls):
340 try:
341 AVAILABLE_PROVIDERS[name] = provider_cls
342 yield
343 finally:
344 del AVAILABLE_PROVIDERS[name]
345
346
347class _BackendInfoMsg(TypedDict):
348 type: InfoObservationType
349 title: str
350 content: str | dict[str, Any]
351
352
353# TODO_DOCS: link to choice sequence explanation page
354
355
356class PrimitiveProvider(abc.ABC):
357 """
358 |PrimitiveProvider| is the implementation interface of a
359 :ref:`Hypothesis backend <alternative-backends>`.
360
361 A |PrimitiveProvider| is required to implement the following five
362 ``draw_*`` methods:
363
364 * |PrimitiveProvider.draw_integer|
365 * |PrimitiveProvider.draw_boolean|
366 * |PrimitiveProvider.draw_float|
367 * |PrimitiveProvider.draw_string|
368 * |PrimitiveProvider.draw_bytes|
369
370 Each strategy in Hypothesis generates values by drawing a series of choices
371 from these five methods. By overriding them, a |PrimitiveProvider| can control
372 the distribution of inputs generated by Hypothesis.
373
374 For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider|
375 which uses an SMT solver to generate inputs that uncover new branches.
376
377 Once you implement a |PrimitiveProvider|, you can make it available for use
378 through |AVAILABLE_PROVIDERS|.
379 """
380
381 #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function``
382 #: or ``test_case``.
383 #:
384 #: If ``test_function`` (the default), a single provider instance will be
385 #: instantiated and used for the entirety of each test function (i.e., roughly
386 #: one provider per |@given| annotation). This can be useful for tracking state
387 #: over the entirety of a test function.
388 #:
389 #: If ``test_case``, a new provider instance will be instantiated and used for
390 #: each input Hypothesis generates.
391 #:
392 #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will
393 #: be ``None`` for a lifetime of ``test_function``, and an instance of
394 #: ``ConjectureData`` for a lifetime of ``test_case``.
395 #:
396 #: Third-party providers likely want to set a lifetime of ``test_function``.
397 lifetime: ClassVar[LifetimeT] = "test_function"
398
399 #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values
400 #: which record operations performed on them in order to discover new paths.
401 #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting
402 #: with symbolic choices returned by the provider in any way that would force
403 #: the solver to narrow the range of possible values for that symbolic.
404 #:
405 #: Setting this to ``True`` disables some hypothesis features and optimizations.
406 #: Only set this to ``True`` if it is necessary for your backend.
407 avoid_realization: ClassVar[bool] = False
408
409 #: If ``True``, |PrimitiveProvider.on_observation| will be added as a
410 #: callback via |add_observability_callback|, enabling observability during
411 # the lifetime of this provider. If ``False``, |PrimitiveProvider.on_observation|
412 #: will never be called by Hypothesis.
413 #:
414 #: The opt-in behavior of observability is because enabling observability
415 #: might increase runtime or memory usage.
416 add_observability_callback: ClassVar[bool] = False
417
418 def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
419 self._cd = conjecturedata
420
421 @abc.abstractmethod
422 def draw_boolean(
423 self,
424 p: float = 0.5,
425 ) -> bool:
426 """
427 Draw a boolean choice.
428
429 Parameters
430 ----------
431 p: float
432 The probability of returning ``True``. Between 0 and 1 inclusive.
433
434 Except for ``0`` and ``1``, the value of ``p`` is a hint provided by
435 Hypothesis, and may be ignored by the backend.
436
437 If ``0``, the provider must return ``False``. If ``1``, the provider
438 must return ``True``.
439 """
440 raise NotImplementedError
441
442 @abc.abstractmethod
443 def draw_integer(
444 self,
445 min_value: int | None = None,
446 max_value: int | None = None,
447 *,
448 weights: dict[int, float] | None = None,
449 shrink_towards: int = 0,
450 ) -> int:
451 """
452 Draw an integer choice.
453
454 Parameters
455 ----------
456 min_value : int | None
457 (Inclusive) lower bound on the integer value. If ``None``, there is
458 no lower bound.
459 max_value : int | None
460 (Inclusive) upper bound on the integer value. If ``None``, there is
461 no upper bound.
462 weights: dict[int, float] | None
463 Maps keys in the range [``min_value``, ``max_value``] to the probability
464 of returning that key.
465 shrink_towards: int
466 The integer to shrink towards. This is not used during generation and
467 can be ignored by backends.
468 """
469 raise NotImplementedError
470
471 @abc.abstractmethod
472 def draw_float(
473 self,
474 *,
475 min_value: float = -math.inf,
476 max_value: float = math.inf,
477 allow_nan: bool = True,
478 smallest_nonzero_magnitude: float,
479 ) -> float:
480 """
481 Draw a float choice.
482
483 Parameters
484 ----------
485 min_value : float
486 (Inclusive) lower bound on the float value.
487 max_value : float
488 (Inclusive) upper bound on the float value.
489 allow_nan : bool
490 If ``False``, it is invalid to return ``math.nan``.
491 smallest_nonzero_magnitude : float
492 The smallest allowed nonzero magnitude. ``draw_float`` should not
493 return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``.
494 """
495 raise NotImplementedError
496
497 @abc.abstractmethod
498 def draw_string(
499 self,
500 intervals: IntervalSet,
501 *,
502 min_size: int = 0,
503 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
504 ) -> str:
505 """
506 Draw a string choice.
507
508 Parameters
509 ----------
510 intervals : IntervalSet
511 The set of codepoints to sample from.
512 min_size : int
513 (Inclusive) lower bound on the string length.
514 max_size : int
515 (Inclusive) upper bound on the string length.
516 """
517 raise NotImplementedError
518
519 @abc.abstractmethod
520 def draw_bytes(
521 self,
522 min_size: int = 0,
523 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
524 ) -> bytes:
525 """
526 Draw a bytes choice.
527
528 Parameters
529 ----------
530 min_size : int
531 (Inclusive) lower bound on the bytes length.
532 max_size : int
533 (Inclusive) upper bound on the bytes length.
534 """
535 raise NotImplementedError
536
537 def per_test_case_context_manager(self) -> AbstractContextManager:
538 """
539 Returns a context manager which will be entered each time Hypothesis
540 starts generating and executing one test case, and exited when that test
541 case finishes generating and executing, including if any exception is
542 thrown.
543
544 In the lifecycle of a Hypothesis test, this is called before
545 generating strategy values for each test case. This is just before any
546 :ref:`custom executor <custom-function-execution>` is called.
547
548 Even if not returning a custom context manager, |PrimitiveProvider|
549 subclasses are welcome to override this method to know when Hypothesis
550 starts and ends the execution of a single test case.
551 """
552 return contextlib.nullcontext()
553
554 def realize(self, value: T, *, for_failure: bool = False) -> T:
555 """
556 Called whenever hypothesis requires a concrete (non-symbolic) value from
557 a potentially symbolic value. Hypothesis will not check that ``value`` is
558 symbolic before calling ``realize``, so you should handle the case where
559 ``value`` is non-symbolic.
560
561 The returned value should be non-symbolic. If you cannot provide a value,
562 raise |BackendCannotProceed| with a value of ``"discard_test_case"``.
563
564 If ``for_failure`` is ``True``, the value is associated with a failing example.
565 In this case, the backend should spend substantially more effort when
566 attempting to realize the value, since it is important to avoid discarding
567 failing examples. Backends may still raise |BackendCannotProceed| when
568 ``for_failure`` is ``True``, if realization is truly impossible or if
569 realization takes significantly longer than expected (say, 5 minutes).
570 """
571 return value
572
573 def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None:
574 """
575 Called when Hypothesis has discovered a choice sequence which the provider
576 may wish to enqueue to replay under its own instrumentation when we next
577 ask to generate a test case, rather than generating one from scratch.
578
579 This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus
580 of high-code-coverage inputs discovered by
581 `HypoFuzz <https://hypofuzz.com/>`_.
582 """
583 return None
584
585 def observe_test_case(self) -> dict[str, Any]:
586 """Called at the end of the test case when :ref:`observability
587 <observability>` is enabled.
588
589 The return value should be a non-symbolic json-encodable dictionary,
590 and will be included in observations as ``observation["metadata"]["backend"]``.
591 """
592 return {}
593
594 def observe_information_messages(
595 self, *, lifetime: LifetimeT
596 ) -> Iterable[_BackendInfoMsg]:
597 """Called at the end of each test case and again at end of the test function.
598
599 Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}``
600 dictionaries to be delivered as individual information messages. Hypothesis
601 adds the ``run_start`` timestamp and ``property`` name for you.
602 """
603 assert lifetime in ("test_case", "test_function")
604 yield from []
605
606 def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027
607 """
608 Called at the end of each test case which uses this provider, with the same
609 ``observation["type"] == "test_case"`` observation that is passed to
610 other callbacks added via |add_observability_callback|. This method is not
611 called with ``observation["type"] in {"info", "alert", "error"}``
612 observations.
613
614 .. important::
615
616 For |PrimitiveProvider.on_observation| to be called by Hypothesis,
617 |PrimitiveProvider.add_observability_callback| must be set to ``True``.
618
619 |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling
620 observability might increase runtime or memory usage.
621
622 Calls to this method are guaranteed to alternate with calls to
623 |PrimitiveProvider.per_test_case_context_manager|. For example:
624
625 .. code-block:: python
626
627 # test function starts
628 per_test_case_context_manager()
629 on_observation()
630 per_test_case_context_manager()
631 on_observation()
632 ...
633 # test function ends
634
635 Note that |PrimitiveProvider.on_observation| will not be called for test
636 cases which did not use this provider during generation, for example
637 during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched
638 to the standard Hypothesis backend after this backend raised too many
639 |BackendCannotProceed| exceptions.
640 """
641
642 def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop
643 """Marks the beginning of a semantically meaningful span of choices.
644
645 Spans are a depth-first tree structure. A span is opened by a call to
646 |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end|
647 closes the most recently opened span. So the following sequence of calls:
648
649 .. code-block:: python
650
651 span_start(label=1)
652 n1 = draw_integer()
653 span_start(label=2)
654 b1 = draw_boolean()
655 n2 = draw_integer()
656 span_end()
657 f1 = draw_float()
658 span_end()
659
660 produces the following two spans of choices:
661
662 .. code-block::
663
664 1: [n1, b1, n2, f1]
665 2: [b1, n2]
666
667 Hypothesis uses spans to denote "semantically meaningful" sequences of
668 choices. For instance, Hypothesis opens a span for the sequence of choices
669 made while drawing from each strategy. Not every span corresponds to a
670 strategy; the generation of e.g. each element in |st.lists| is also marked
671 with a span, among others.
672
673 ``label`` is an opaque integer, which has no defined semantics.
674 The only guarantee made by Hypothesis is that all spans with the same
675 "meaning" will share the same ``label``. So all spans from the same
676 strategy will share the same label, as will e.g. the spans for |st.lists|
677 elements.
678
679 Providers can track calls to |PrimitiveProvider.span_start| and
680 |PrimitiveProvider.span_end| to learn something about the semantics of
681 the test's choice sequence. For instance, a provider could track the depth
682 of the span tree, or the number of unique labels, which says something about
683 the complexity of the choices being generated. Or a provider could track
684 the span tree across test cases in order to determine what strategies are
685 being used in what contexts.
686
687 It is possible for Hypothesis to start and immediately stop a span,
688 without calling a ``draw_*`` method in between. These spans contain zero
689 choices.
690
691 Hypothesis will always balance the number of calls to
692 |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call
693 to |PrimitiveProvider.span_start| will always be followed by a call to
694 |PrimitiveProvider.span_end| before the end of the test case.
695
696 |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()``
697 internally.
698 """
699
700 def span_end(self, discard: bool, /) -> None: # noqa: B027
701 """Marks the end of a semantically meaningful span of choices.
702
703 ``discard`` is ``True`` when the draw was filtered out or otherwise marked
704 as unlikely to contribute to the input data as seen by the user's test.
705 Note however that side effects can make this determination unsound.
706
707 |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()``
708 internally.
709 """
710
711
712class HypothesisProvider(PrimitiveProvider):
713 lifetime = "test_case"
714
715 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
716 super().__init__(conjecturedata)
717 self._random = None if self._cd is None else self._cd._random
718
719 @cached_property
720 def _local_constants(self):
721 # defer computation of local constants until/if we need it
722 return _get_local_constants()
723
724 def _maybe_draw_constant(
725 self,
726 choice_type: ChoiceTypeT,
727 constraints: ChoiceConstraintsT,
728 *,
729 p: float = 0.05,
730 ) -> Optional["ConstantT"]:
731 assert self._random is not None
732 assert choice_type != "boolean"
733 # check whether we even want a constant before spending time computing
734 # and caching the allowed constants.
735 if self._random.random() > p:
736 return None
737
738 # note: this property access results in computation being done
739 assert self._local_constants is not None
740
741 key = (choice_type, choice_constraints_key(choice_type, constraints))
742 if key not in CONSTANTS_CACHE:
743 CONSTANTS_CACHE[key] = (
744 tuple(
745 choice
746 for choice in GLOBAL_CONSTANTS.set_for_type(choice_type)
747 if choice_permitted(choice, constraints)
748 ),
749 tuple(
750 choice
751 for choice in self._local_constants.set_for_type(choice_type)
752 if choice_permitted(choice, constraints)
753 ),
754 )
755
756 # split constants into two pools, so we still have a good chance to draw
757 # global constants even if there are many local constants.
758 global_constants, local_constants = CONSTANTS_CACHE[key]
759 constants_lists = ([global_constants] if global_constants else []) + (
760 [local_constants] if local_constants else []
761 )
762 if not constants_lists:
763 return None
764
765 # At this point, we've decided to use a constant. Now we select which pool
766 # to draw that constant from.
767 #
768 # Note that this approach has a different probability distribution than
769 # attempting a random.random for both global_constants and local_constants.
770 constants = self._random.choice(constants_lists)
771 return self._random.choice(constants)
772
773 def draw_boolean(
774 self,
775 p: float = 0.5,
776 ) -> bool:
777 assert self._random is not None
778
779 if p <= 0:
780 return False
781 if p >= 1:
782 return True
783
784 return self._random.random() < p
785
786 def draw_integer(
787 self,
788 min_value: int | None = None,
789 max_value: int | None = None,
790 *,
791 weights: dict[int, float] | None = None,
792 shrink_towards: int = 0,
793 ) -> int:
794 assert self._cd is not None
795 if (
796 constant := self._maybe_draw_constant(
797 "integer",
798 {
799 "min_value": min_value,
800 "max_value": max_value,
801 "weights": weights,
802 "shrink_towards": shrink_towards,
803 },
804 )
805 ) is not None:
806 assert isinstance(constant, int)
807 return constant
808
809 center = 0
810 if min_value is not None:
811 center = max(min_value, center)
812 if max_value is not None:
813 center = min(max_value, center)
814
815 if weights is not None:
816 assert min_value is not None
817 assert max_value is not None
818
819 # format of weights is a mapping of ints to p, where sum(p) < 1.
820 # The remaining probability mass is uniformly distributed over
821 # *all* ints (not just the unmapped ones; this is somewhat undesirable,
822 # but simplifies things).
823 #
824 # We assert that sum(p) is strictly less than 1 because it simplifies
825 # handling forced values when we can force into the unmapped probability
826 # mass. We should eventually remove this restriction.
827 sampler = Sampler(
828 [1 - sum(weights.values()), *weights.values()], observe=False
829 )
830 # if we're forcing, it's easiest to force into the unmapped probability
831 # mass and then force the drawn value after.
832 idx = sampler.sample(self._cd)
833
834 if idx == 0:
835 return self._draw_bounded_integer(min_value, max_value)
836 # implicit reliance on dicts being sorted for determinism
837 return list(weights)[idx - 1]
838
839 if min_value is None and max_value is None:
840 return self._draw_unbounded_integer()
841
842 if min_value is None:
843 assert max_value is not None
844 probe = max_value + 1
845 while max_value < probe:
846 probe = center + self._draw_unbounded_integer()
847 return probe
848
849 if max_value is None:
850 assert min_value is not None
851 probe = min_value - 1
852 while probe < min_value:
853 probe = center + self._draw_unbounded_integer()
854 return probe
855
856 return self._draw_bounded_integer(min_value, max_value)
857
858 def draw_float(
859 self,
860 *,
861 min_value: float = -math.inf,
862 max_value: float = math.inf,
863 allow_nan: bool = True,
864 smallest_nonzero_magnitude: float,
865 ) -> float:
866 assert self._random is not None
867
868 constraints: FloatConstraints = {
869 "min_value": min_value,
870 "max_value": max_value,
871 "allow_nan": allow_nan,
872 "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
873 }
874 if (
875 constant := self._maybe_draw_constant("float", constraints, p=0.15)
876 ) is not None:
877 assert isinstance(constant, float)
878 return constant
879
880 # on top of the probability to draw a constant float, we independently
881 # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values.
882 weird_floats = [
883 f
884 for f in [
885 0.0,
886 -0.0,
887 math.inf,
888 -math.inf,
889 math.nan,
890 -math.nan,
891 SIGNALING_NAN,
892 -SIGNALING_NAN,
893 min_value,
894 next_up(min_value),
895 min_value + 1,
896 max_value - 1,
897 next_down(max_value),
898 max_value,
899 ]
900 if choice_permitted(f, constraints)
901 ]
902
903 if weird_floats and self._random.random() < 0.05:
904 return self._random.choice(weird_floats)
905
906 clamper = make_float_clamper(
907 min_value,
908 max_value,
909 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
910 allow_nan=allow_nan,
911 )
912
913 result = self._draw_float()
914 if allow_nan and math.isnan(result):
915 clamped = result # pragma: no cover
916 else:
917 clamped = clamper(result)
918 if float_to_int(clamped) != float_to_int(result) and not (
919 math.isnan(result) and allow_nan
920 ):
921 result = clamped
922 return result
923
924 def draw_string(
925 self,
926 intervals: IntervalSet,
927 *,
928 min_size: int = 0,
929 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
930 ) -> str:
931 assert self._cd is not None
932 assert self._random is not None
933
934 if len(intervals) == 0:
935 return ""
936
937 if (
938 constant := self._maybe_draw_constant(
939 "string",
940 {"intervals": intervals, "min_size": min_size, "max_size": max_size},
941 )
942 ) is not None:
943 assert isinstance(constant, str)
944 return constant
945
946 average_size = min(
947 max(min_size * 2, min_size + 5),
948 0.5 * (min_size + max_size),
949 )
950
951 chars = []
952 elements = many(
953 self._cd,
954 min_size=min_size,
955 max_size=max_size,
956 average_size=average_size,
957 observe=False,
958 )
959 while elements.more():
960 if len(intervals) > 256:
961 if self.draw_boolean(0.2):
962 i = self._random.randint(256, len(intervals) - 1)
963 else:
964 i = self._random.randint(0, 255)
965 else:
966 i = self._random.randint(0, len(intervals) - 1)
967
968 chars.append(intervals.char_in_shrink_order(i))
969
970 return "".join(chars)
971
972 def draw_bytes(
973 self,
974 min_size: int = 0,
975 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
976 ) -> bytes:
977 assert self._cd is not None
978 assert self._random is not None
979
980 if (
981 constant := self._maybe_draw_constant(
982 "bytes", {"min_size": min_size, "max_size": max_size}
983 )
984 ) is not None:
985 assert isinstance(constant, bytes)
986 return constant
987
988 buf = bytearray()
989 average_size = min(
990 max(min_size * 2, min_size + 5),
991 0.5 * (min_size + max_size),
992 )
993 elements = many(
994 self._cd,
995 min_size=min_size,
996 max_size=max_size,
997 average_size=average_size,
998 observe=False,
999 )
1000 while elements.more():
1001 buf += self._random.randbytes(1)
1002
1003 return bytes(buf)
1004
1005 def _draw_float(self) -> float:
1006 assert self._random is not None
1007
1008 f = lex_to_float(self._random.getrandbits(64))
1009 sign = 1 if self._random.getrandbits(1) else -1
1010 return sign * f
1011
1012 def _draw_unbounded_integer(self) -> int:
1013 assert self._cd is not None
1014 assert self._random is not None
1015
1016 size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)]
1017
1018 r = self._random.getrandbits(size)
1019 sign = r & 1
1020 r >>= 1
1021 if sign:
1022 r = -r
1023 return r
1024
1025 def _draw_bounded_integer(
1026 self,
1027 lower: int,
1028 upper: int,
1029 *,
1030 vary_size: bool = True,
1031 ) -> int:
1032 assert lower <= upper
1033 assert self._cd is not None
1034 assert self._random is not None
1035
1036 if lower == upper:
1037 return lower
1038
1039 bits = (upper - lower).bit_length()
1040 if bits > 24 and vary_size and self._random.random() < 7 / 8:
1041 # For large ranges, we combine the uniform random distribution
1042 # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our
1043 # choice of unicode characters is uniform but the 32bit distribution is not.
1044 idx = INT_SIZES_SAMPLER.sample(self._cd)
1045 cap_bits = min(bits, INT_SIZES[idx])
1046 upper = min(upper, lower + 2**cap_bits - 1)
1047 return self._random.randint(lower, upper)
1048
1049 return self._random.randint(lower, upper)
1050
1051
1052# Masks for masking off the first byte of an n-bit buffer.
1053# The appropriate mask is stored at position n % 8.
1054BYTE_MASKS = [(1 << n) - 1 for n in range(8)]
1055BYTE_MASKS[0] = 255
1056
1057
1058class BytestringProvider(PrimitiveProvider):
1059 lifetime = "test_case"
1060
1061 def __init__(
1062 self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes
1063 ):
1064 super().__init__(conjecturedata)
1065 self.bytestring = bytestring
1066 self.index = 0
1067 self.drawn = bytearray()
1068
1069 def _draw_bits(self, n):
1070 if n == 0: # pragma: no cover
1071 return 0
1072 n_bytes = bits_to_bytes(n)
1073 if self.index + n_bytes > len(self.bytestring):
1074 self._cd.mark_overrun()
1075 buf = bytearray(self.bytestring[self.index : self.index + n_bytes])
1076 self.index += n_bytes
1077
1078 buf[0] &= BYTE_MASKS[n % 8]
1079 buf = bytes(buf)
1080 self.drawn += buf
1081 return int_from_bytes(buf)
1082
1083 def draw_boolean(
1084 self,
1085 p: float = 0.5,
1086 ) -> bool:
1087 if p <= 0:
1088 return False
1089 if p >= 1:
1090 return True
1091
1092 # always use one byte for booleans to maintain constant draw size.
1093 # If a probability requires more than 8 bits to represent precisely,
1094 # the result will be slightly biased, but not badly.
1095 bits = 8
1096 size = 2**bits
1097 # always leave at least one value that can be true, even for very small
1098 # p.
1099 falsey = max(1, math.floor(size * (1 - p)))
1100 n = self._draw_bits(bits)
1101 return n >= falsey
1102
1103 def draw_integer(
1104 self,
1105 min_value: int | None = None,
1106 max_value: int | None = None,
1107 *,
1108 weights: dict[int, float] | None = None,
1109 shrink_towards: int = 0,
1110 ) -> int:
1111 assert self._cd is not None
1112
1113 # we explicitly ignore integer weights for now, as they are likely net
1114 # negative on fuzzer performance.
1115
1116 if min_value is None and max_value is None:
1117 min_value = -(2**127)
1118 max_value = 2**127 - 1
1119 elif min_value is None:
1120 assert max_value is not None
1121 min_value = max_value - 2**64
1122 elif max_value is None:
1123 assert min_value is not None
1124 max_value = min_value + 2**64
1125
1126 if min_value == max_value:
1127 return min_value
1128
1129 bits = (max_value - min_value).bit_length()
1130 value = self._draw_bits(bits)
1131 while not (min_value <= value <= max_value):
1132 value = self._draw_bits(bits)
1133 return value
1134
1135 def draw_float(
1136 self,
1137 *,
1138 min_value: float = -math.inf,
1139 max_value: float = math.inf,
1140 allow_nan: bool = True,
1141 smallest_nonzero_magnitude: float,
1142 ) -> float:
1143 n = self._draw_bits(64)
1144 sign = -1 if n >> 64 else 1
1145 f = sign * lex_to_float(n & ((1 << 64) - 1))
1146 clamper = make_float_clamper(
1147 min_value,
1148 max_value,
1149 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
1150 allow_nan=allow_nan,
1151 )
1152 return clamper(f)
1153
1154 def _draw_collection(self, min_size, max_size, *, alphabet_size):
1155 average_size = min(
1156 max(min_size * 2, min_size + 5),
1157 0.5 * (min_size + max_size),
1158 )
1159 elements = many(
1160 self._cd,
1161 min_size=min_size,
1162 max_size=max_size,
1163 average_size=average_size,
1164 observe=False,
1165 )
1166 values = []
1167 while elements.more():
1168 values.append(self.draw_integer(0, alphabet_size - 1))
1169 return values
1170
1171 def draw_string(
1172 self,
1173 intervals: IntervalSet,
1174 *,
1175 min_size: int = 0,
1176 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1177 ) -> str:
1178 values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals))
1179 return "".join(chr(intervals[v]) for v in values)
1180
1181 def draw_bytes(
1182 self,
1183 min_size: int = 0,
1184 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1185 ) -> bytes:
1186 values = self._draw_collection(min_size, max_size, alphabet_size=2**8)
1187 return bytes(values)
1188
1189
1190class URandom(Random):
1191 # we reimplement a Random instance instead of using SystemRandom, because
1192 # os.urandom is not guaranteed to read from /dev/urandom.
1193
1194 @staticmethod
1195 def _urandom(size: int) -> bytes:
1196 with open("/dev/urandom", "rb") as f:
1197 return f.read(size)
1198
1199 def getrandbits(self, k: int) -> int:
1200 assert k >= 0
1201 size = bits_to_bytes(k)
1202 n = int_from_bytes(self._urandom(size))
1203 # trim excess bits
1204 return n >> (size * 8 - k)
1205
1206 def random(self) -> float:
1207 # adapted from random.SystemRandom.random
1208 return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53)
1209
1210
1211class URandomProvider(HypothesisProvider):
1212 # A provider which reads directly from /dev/urandom as its source of randomness.
1213 # This provider exists to provide better Hypothesis integration with Antithesis
1214 # (https://antithesis.com/), which interprets calls to /dev/urandom as the
1215 # randomness to mutate. This effectively gives Antithesis control over
1216 # the choices made by the URandomProvider.
1217 #
1218 # If you are not using Antithesis, you probably don't want to use this
1219 # provider.
1220
1221 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
1222 super().__init__(conjecturedata)
1223 if WINDOWS: # pragma: no cover
1224 warnings.warn(
1225 "/dev/urandom is not available on windows. Falling back to "
1226 'standard PRNG generation (equivalent to backend="hypothesis").',
1227 HypothesisWarning,
1228 stacklevel=1,
1229 )
1230 # don't overwrite the HypothesisProvider self._random attribute in
1231 # this case
1232 else:
1233 self._random = URandom()