1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import abc
12import contextlib
13import math
14import sys
15import warnings
16from collections.abc import Iterable
17from contextlib import AbstractContextManager
18from functools import cached_property
19from random import Random
20from sys import float_info
21from types import ModuleType
22from typing import (
23 TYPE_CHECKING,
24 Any,
25 ClassVar,
26 Literal,
27 Optional,
28 TypedDict,
29 TypeVar,
30 Union,
31)
32
33from sortedcontainers import SortedSet
34
35from hypothesis.errors import HypothesisWarning
36from hypothesis.internal.cache import LRUCache
37from hypothesis.internal.compat import WINDOWS, int_from_bytes
38from hypothesis.internal.conjecture.choice import (
39 ChoiceConstraintsT,
40 ChoiceT,
41 ChoiceTypeT,
42 FloatConstraints,
43 choice_constraints_key,
44 choice_permitted,
45)
46from hypothesis.internal.conjecture.floats import lex_to_float
47from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes
48from hypothesis.internal.conjecture.utils import (
49 INT_SIZES,
50 INT_SIZES_SAMPLER,
51 Sampler,
52 many,
53)
54from hypothesis.internal.constants_ast import (
55 Constants,
56 constants_from_module,
57 is_local_module_file,
58)
59from hypothesis.internal.floats import (
60 SIGNALING_NAN,
61 float_to_int,
62 make_float_clamper,
63 next_down,
64 next_up,
65)
66from hypothesis.internal.intervalsets import IntervalSet
67from hypothesis.internal.observability import InfoObservationType, TestCaseObservation
68
69if TYPE_CHECKING:
70 from typing import TypeAlias
71
72 from hypothesis.internal.conjecture.data import ConjectureData
73 from hypothesis.internal.constants_ast import ConstantT
74
75T = TypeVar("T")
76LifetimeT: "TypeAlias" = Literal["test_case", "test_function"]
77COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
78
79
80#: Registered Hypothesis backends. This is a dictionary whose keys are the name
81#: to be used in |settings.backend|, and whose values are a string of the absolute
82#: importable path to a subclass of |PrimitiveProvider|, which Hypothesis will
83#: instantiate when your backend is requested by a test's |settings.backend| value.
84#:
85#: For example, the default Hypothesis backend is registered as:
86#:
87#: .. code-block:: python
88#:
89#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS
90#:
91#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider"
92#:
93#: And can be used with:
94#:
95#: .. code-block:: python
96#:
97#: from hypothesis import given, settings, strategies as st
98#:
99#: @given(st.integers())
100#: @settings(backend="hypothesis")
101#: def f(n):
102#: pass
103#:
104#: Though, as ``backend="hypothesis"`` is the default setting, the above would
105#: typically not have any effect.
106#:
107#: The purpose of mapping to an absolute importable path, rather than the actual
108#: |PrimitiveProvider| class, is to avoid slowing down Hypothesis startup times
109#: by only importing alternative backends when required.
110AVAILABLE_PROVIDERS = {
111 "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider",
112 "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider",
113}
114# cache the choice_permitted constants for a particular set of constraints.
115CacheKeyT: "TypeAlias" = tuple[ChoiceTypeT, tuple[Any, ...]]
116CacheValueT: "TypeAlias" = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]]
117CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024)
118
119_constant_floats = (
120 [
121 0.5,
122 1.1,
123 1.5,
124 1.9,
125 1.0 / 3,
126 10e6,
127 10e-6,
128 1.175494351e-38,
129 next_up(0.0),
130 float_info.min,
131 float_info.max,
132 3.402823466e38,
133 9007199254740992.0,
134 1 - 10e-6,
135 2 + 10e-6,
136 1.192092896e-07,
137 2.2204460492503131e-016,
138 ]
139 + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32
140 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64
141)
142_constant_floats.extend([-x for x in _constant_floats])
143assert all(isinstance(f, float) for f in _constant_floats)
144
145_constant_strings = {
146 # strings which can be interpreted as code / logic
147 "undefined",
148 "null",
149 "NULL",
150 "nil",
151 "NIL",
152 "true",
153 "false",
154 "True",
155 "False",
156 "TRUE",
157 "FALSE",
158 "None",
159 "none",
160 "if",
161 "then",
162 "else",
163 # strings which can be interpreted as a number
164 "0",
165 "1e100",
166 "0..0",
167 "0/0",
168 "1/0",
169 "+0.0",
170 "Infinity",
171 "-Infinity",
172 "Inf",
173 "INF",
174 "NaN",
175 "9" * 30,
176 # common ascii characters
177 ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~",
178 # common unicode characters
179 "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±",
180 # characters which increase in length when lowercased
181 "Ⱥ",
182 "Ⱦ",
183 # ligatures
184 "æœÆŒffʤʨß"
185 # emoticons
186 "(╯°□°)╯︵ ┻━┻)",
187 # emojis
188 "😍",
189 "🇺🇸",
190 # emoji modifiers
191 "🏻" # U+1F3FB Light Skin Tone,
192 "👍🏻", # 👍 followed by U+1F3FB
193 # RTL text
194 "الكل في المجمو عة",
195 # Ogham text, which contains the only character in the Space Separators
196 # unicode category (Zs) that isn't visually blank: . # noqa: RUF003
197 "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜",
198 # readable variations on text (bolt/italic/script)
199 "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠",
200 "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌",
201 "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈",
202 "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰",
203 "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘",
204 # upsidown text
205 "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥",
206 # reserved strings in windows
207 "NUL",
208 "COM1",
209 "LPT1",
210 # scunthorpe problem
211 "Scunthorpe",
212 # zalgo text
213 "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣",
214 #
215 # examples from https://faultlore.com/blah/text-hates-you/
216 "मनीष منش",
217 "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله",
218 "lorem لا بسم الله ipsum 你好1234你好",
219}
220
221
222# we don't actually care what order the constants are sorted in, just that the
223# ordering is deterministic.
224GLOBAL_CONSTANTS = Constants(
225 integers=SortedSet(),
226 floats=SortedSet(_constant_floats, key=float_to_int),
227 bytes=SortedSet(),
228 strings=SortedSet(_constant_strings),
229)
230
231_local_constants = Constants(
232 integers=SortedSet(),
233 floats=SortedSet(key=float_to_int),
234 bytes=SortedSet(),
235 strings=SortedSet(),
236)
237# modules that we've already seen and processed for local constants. These are
238# are all modules, not necessarily local ones. This lets us quickly see which
239# modules are new without an expensive path.resolve() or is_local_module_file
240# cache lookup.
241_seen_modules: set[ModuleType] = set()
242_sys_modules_len: Optional[int] = None
243
244
245def _get_local_constants() -> Constants:
246 global _sys_modules_len, _local_constants
247
248 if sys.platform == "emscripten": # pragma: no cover
249 # pyodide builds bundle the stdlib in a nonstandard location, like
250 # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of
251 # the stdlib as local code and slowing down on emscripten, instead return
252 # that nothing is local.
253 #
254 # pyodide may provide some way to distinguish stdlib/third-party/local
255 # code. I haven't looked into it. If they do, we should correctly implement
256 # ModuleLocation for pyodide instead of this.
257 return _local_constants
258
259 count_constants = len(_local_constants)
260 # We call this function once per HypothesisProvider instance, i.e. once per
261 # input, so it needs to be performant. The logic here is more complicated
262 # than necessary because of this.
263 #
264 # First, we check whether there are any new modules with a very cheap length
265 # check. This check can be fooled if a module is added while another module is
266 # removed, but the more correct check against tuple(sys.modules.keys()) is
267 # substantially more expensive. Such a new module would eventually be discovered
268 # if / when the length changes again in the future.
269 #
270 # If the length has changed, we find just modules we haven't seen before. Of
271 # those, we find the ones which correspond to local modules, and extract their
272 # constants.
273
274 # careful: store sys.modules length when we first check to avoid race conditions
275 # with other threads loading a module before we set _sys_modules_len.
276 if (sys_modules_len := len(sys.modules)) != _sys_modules_len:
277 # set(_seen_modules) shouldn't typically be required, but I have run into
278 # a "set changed size during iteration" error here when running
279 # test_provider_conformance_crosshair.
280 new_modules = set(sys.modules.values()) - set(_seen_modules)
281 # Repeated SortedSet unions are expensive. Do the initial unions on a
282 # set(), then do a one-time union with _local_constants after.
283 new_constants = Constants()
284 for module in new_modules:
285 if (
286 module_file := getattr(module, "__file__", None)
287 ) is not None and is_local_module_file(module_file):
288 new_constants |= constants_from_module(module)
289 _local_constants |= new_constants
290 _seen_modules.update(new_modules)
291 _sys_modules_len = sys_modules_len
292
293 # if we add any new constant, invalidate the constant cache for permitted values.
294 # A more efficient approach would be invalidating just the keys with this
295 # choice_type.
296 if len(_local_constants) > count_constants:
297 CONSTANTS_CACHE.cache.clear()
298
299 return _local_constants
300
301
302class _BackendInfoMsg(TypedDict):
303 type: InfoObservationType
304 title: str
305 content: Union[str, dict[str, Any]]
306
307
308# TODO_DOCS: link to choice sequence explanation page
309
310
311class PrimitiveProvider(abc.ABC):
312 """
313 |PrimitiveProvider| is the implementation interface of a
314 :ref:`Hypothesis backend <alternative-backends>`.
315
316 A |PrimitiveProvider| is required to implement the following five
317 ``draw_*`` methods:
318
319 * |PrimitiveProvider.draw_integer|
320 * |PrimitiveProvider.draw_boolean|
321 * |PrimitiveProvider.draw_float|
322 * |PrimitiveProvider.draw_string|
323 * |PrimitiveProvider.draw_bytes|
324
325 Each strategy in Hypothesis generates values by drawing a series of choices
326 from these five methods. By overriding them, a |PrimitiveProvider| can control
327 the distribution of inputs generated by Hypothesis.
328
329 For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider|
330 which uses an SMT solver to generate inputs that uncover new branches.
331
332 Once you implement a |PrimitiveProvider|, you can make it available for use
333 through |AVAILABLE_PROVIDERS|.
334 """
335
336 #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function``
337 #: or ``test_case``.
338 #:
339 #: If ``test_function`` (the default), a single provider instance will be
340 #: instantiated and used for the entirety of each test function (i.e., roughly
341 #: one provider per |@given| annotation). This can be useful for tracking state
342 #: over the entirety of a test function.
343 #:
344 #: If ``test_case``, a new provider instance will be instantiated and used for
345 #: each input Hypothesis generates.
346 #:
347 #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will
348 #: be ``None`` for a lifetime of ``test_function``, and an instance of
349 #: ``ConjectureData`` for a lifetime of ``test_case``.
350 #:
351 #: Third-party providers likely want to set a lifetime of ``test_function``.
352 lifetime: ClassVar[LifetimeT] = "test_function"
353
354 #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values
355 #: which record operations performed on them in order to discover new paths.
356 #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting
357 #: with symbolic choices returned by the provider in any way that would force
358 #: the solver to narrow the range of possible values for that symbolic.
359 #:
360 #: Setting this to ``True`` disables some hypothesis features and optimizations.
361 #: Only set this to ``True`` if it is necessary for your backend.
362 avoid_realization: ClassVar[bool] = False
363
364 #: If ``True``, |PrimitiveProvider.on_observation| will be added as a
365 #: callback via |add_observability_callback|, enabling observability during
366 # the lifetime of this provider. If ``False``, |PrimitiveProvider.on_observation|
367 #: will never be called by Hypothesis.
368 #:
369 #: The opt-in behavior of observability is because enabling observability
370 #: might increase runtime or memory usage.
371 add_observability_callback: ClassVar[bool] = False
372
373 def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
374 self._cd = conjecturedata
375
376 @abc.abstractmethod
377 def draw_boolean(
378 self,
379 p: float = 0.5,
380 ) -> bool:
381 """
382 Draw a boolean choice.
383
384 Parameters
385 ----------
386 p: float
387 The probability of returning ``True``. Between 0 and 1 inclusive.
388
389 Except for ``0`` and ``1``, the value of ``p`` is a hint provided by
390 Hypothesis, and may be ignored by the backend.
391
392 If ``0``, the provider must return ``False``. If ``1``, the provider
393 must return ``True``.
394 """
395 raise NotImplementedError
396
397 @abc.abstractmethod
398 def draw_integer(
399 self,
400 min_value: Optional[int] = None,
401 max_value: Optional[int] = None,
402 *,
403 weights: Optional[dict[int, float]] = None,
404 shrink_towards: int = 0,
405 ) -> int:
406 """
407 Draw an integer choice.
408
409 Parameters
410 ----------
411 min_value : int | None
412 (Inclusive) lower bound on the integer value. If ``None``, there is
413 no lower bound.
414 max_value : int | None
415 (Inclusive) upper bound on the integer value. If ``None``, there is
416 no upper bound.
417 weights: dict[int, float] | None
418 Maps keys in the range [``min_value``, ``max_value``] to the probability
419 of returning that key.
420 shrink_towards: int
421 The integer to shrink towards. This is not used during generation and
422 can be ignored by backends.
423 """
424 raise NotImplementedError
425
426 @abc.abstractmethod
427 def draw_float(
428 self,
429 *,
430 min_value: float = -math.inf,
431 max_value: float = math.inf,
432 allow_nan: bool = True,
433 smallest_nonzero_magnitude: float,
434 ) -> float:
435 """
436 Draw a float choice.
437
438 Parameters
439 ----------
440 min_value : float
441 (Inclusive) lower bound on the float value.
442 max_value : float
443 (Inclusive) upper bound on the float value.
444 allow_nan : bool
445 If ``False``, it is invalid to return ``math.nan``.
446 smallest_nonzero_magnitude : float
447 The smallest allowed nonzero magnitude. ``draw_float`` should not
448 return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``.
449 """
450 raise NotImplementedError
451
452 @abc.abstractmethod
453 def draw_string(
454 self,
455 intervals: IntervalSet,
456 *,
457 min_size: int = 0,
458 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
459 ) -> str:
460 """
461 Draw a string choice.
462
463 Parameters
464 ----------
465 intervals : IntervalSet
466 The set of codepoints to sample from.
467 min_size : int
468 (Inclusive) lower bound on the string length.
469 max_size : int
470 (Inclusive) upper bound on the string length.
471 """
472 raise NotImplementedError
473
474 @abc.abstractmethod
475 def draw_bytes(
476 self,
477 min_size: int = 0,
478 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
479 ) -> bytes:
480 """
481 Draw a bytes choice.
482
483 Parameters
484 ----------
485 min_size : int
486 (Inclusive) lower bound on the bytes length.
487 max_size : int
488 (Inclusive) upper bound on the bytes length.
489 """
490 raise NotImplementedError
491
492 def per_test_case_context_manager(self) -> AbstractContextManager:
493 """
494 Returns a context manager which will be entered each time Hypothesis
495 starts generating and executing one test case, and exited when that test
496 case finishes generating and executing, including if any exception is
497 thrown.
498
499 In the lifecycle of a Hypothesis test, this is called before
500 generating strategy values for each test case. This is just before any
501 :ref:`custom executor <custom-function-execution>` is called.
502
503 Even if not returning a custom context manager, |PrimitiveProvider|
504 subclasses are welcome to override this method to know when Hypothesis
505 starts and ends the execution of a single test case.
506 """
507 return contextlib.nullcontext()
508
509 def realize(self, value: T, *, for_failure: bool = False) -> T:
510 """
511 Called whenever hypothesis requires a concrete (non-symbolic) value from
512 a potentially symbolic value. Hypothesis will not check that ``value`` is
513 symbolic before calling ``realize``, so you should handle the case where
514 ``value`` is non-symbolic.
515
516 The returned value should be non-symbolic. If you cannot provide a value,
517 raise |BackendCannotProceed| with a value of ``"discard_test_case"``.
518
519 If ``for_failure`` is ``True``, the value is associated with a failing example.
520 In this case, the backend should spend substantially more effort when
521 attempting to realize the value, since it is important to avoid discarding
522 failing examples. Backends may still raise |BackendCannotProceed| when
523 ``for_failure`` is ``True``, if realization is truly impossible or if
524 realization takes significantly longer than expected (say, 5 minutes).
525 """
526 return value
527
528 def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None:
529 """
530 Called when Hypothesis has discovered a choice sequence which the provider
531 may wish to enqueue to replay under its own instrumentation when we next
532 ask to generate a test case, rather than generating one from scratch.
533
534 This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus
535 of high-code-coverage inputs discovered by
536 `HypoFuzz <https://hypofuzz.com/>`_.
537 """
538 return None
539
540 def observe_test_case(self) -> dict[str, Any]:
541 """Called at the end of the test case when :ref:`observability
542 <observability>` is enabled.
543
544 The return value should be a non-symbolic json-encodable dictionary,
545 and will be included in observations as ``observation["metadata"]["backend"]``.
546 """
547 return {}
548
549 def observe_information_messages(
550 self, *, lifetime: LifetimeT
551 ) -> Iterable[_BackendInfoMsg]:
552 """Called at the end of each test case and again at end of the test function.
553
554 Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}``
555 dictionaries to be delivered as individual information messages. Hypothesis
556 adds the ``run_start`` timestamp and ``property`` name for you.
557 """
558 assert lifetime in ("test_case", "test_function")
559 yield from []
560
561 def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027
562 """
563 Called at the end of each test case which uses this provider, with the same
564 ``observation["type"] == "test_case"`` observation that is passed to
565 other callbacks added via |add_observability_callback|. This method is not
566 called with ``observation["type"] in {"info", "alert", "error"}``
567 observations.
568
569 .. important::
570
571 For |PrimitiveProvider.on_observation| to be called by Hypothesis,
572 |PrimitiveProvider.add_observability_callback| must be set to ``True``.
573
574 |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling
575 observability might increase runtime or memory usage.
576
577 Calls to this method are guaranteed to alternate with calls to
578 |PrimitiveProvider.per_test_case_context_manager|. For example:
579
580 .. code-block:: python
581
582 # test function starts
583 per_test_case_context_manager()
584 on_observation()
585 per_test_case_context_manager()
586 on_observation()
587 ...
588 # test function ends
589
590 Note that |PrimitiveProvider.on_observation| will not be called for test
591 cases which did not use this provider during generation, for example
592 during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched
593 to the standard Hypothesis backend after this backend raised too many
594 |BackendCannotProceed| exceptions.
595 """
596
597 def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop
598 """Marks the beginning of a semantically meaningful span of choices.
599
600 Spans are a depth-first tree structure. A span is opened by a call to
601 |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end|
602 closes the most recently opened span. So the following sequence of calls:
603
604 .. code-block:: python
605
606 span_start(label=1)
607 n1 = draw_integer()
608 span_start(label=2)
609 b1 = draw_boolean()
610 n2 = draw_integer()
611 span_end()
612 f1 = draw_float()
613 span_end()
614
615 produces the following two spans of choices:
616
617 .. code-block::
618
619 1: [n1, b1, n2, f1]
620 2: [b1, n2]
621
622 Hypothesis uses spans to denote "semantically meaningful" sequences of
623 choices. For instance, Hypothesis opens a span for the sequence of choices
624 made while drawing from each strategy. Not every span corresponds to a
625 strategy; the generation of e.g. each element in |st.lists| is also marked
626 with a span, among others.
627
628 ``label`` is an opaque integer, which has no defined semantics.
629 The only guarantee made by Hypothesis is that all spans with the same
630 "meaning" will share the same ``label``. So all spans from the same
631 strategy will share the same label, as will e.g. the spans for |st.lists|
632 elements.
633
634 Providers can track calls to |PrimitiveProvider.span_start| and
635 |PrimitiveProvider.span_end| to learn something about the semantics of
636 the test's choice sequence. For instance, a provider could track the depth
637 of the span tree, or the number of unique labels, which says something about
638 the complexity of the choices being generated. Or a provider could track
639 the span tree across test cases in order to determine what strategies are
640 being used in what contexts.
641
642 It is possible for Hypothesis to start and immediately stop a span,
643 without calling a ``draw_*`` method in between. These spans contain zero
644 choices.
645
646 Hypothesis will always balance the number of calls to
647 |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call
648 to |PrimitiveProvider.span_start| will always be followed by a call to
649 |PrimitiveProvider.span_end| before the end of the test case.
650
651 |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()``
652 internally.
653 """
654
655 def span_end(self, discard: bool, /) -> None: # noqa: B027
656 """Marks the end of a semantically meaningful span of choices.
657
658 ``discard`` is ``True`` when the draw was filtered out or otherwise marked
659 as unlikely to contribute to the input data as seen by the user's test.
660 Note however that side effects can make this determination unsound.
661
662 |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()``
663 internally.
664 """
665
666
667class HypothesisProvider(PrimitiveProvider):
668 lifetime = "test_case"
669
670 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
671 super().__init__(conjecturedata)
672 self._random = None if self._cd is None else self._cd._random
673
674 @cached_property
675 def _local_constants(self):
676 # defer computation of local constants until/if we need it
677 return _get_local_constants()
678
679 def _maybe_draw_constant(
680 self,
681 choice_type: ChoiceTypeT,
682 constraints: ChoiceConstraintsT,
683 *,
684 p: float = 0.05,
685 ) -> Optional["ConstantT"]:
686 assert self._random is not None
687 assert choice_type != "boolean"
688 # check whether we even want a constant before spending time computing
689 # and caching the allowed constants.
690 if self._random.random() > p:
691 return None
692
693 # note: this property access results in computation being done
694 assert self._local_constants is not None
695
696 key = (choice_type, choice_constraints_key(choice_type, constraints))
697 if key not in CONSTANTS_CACHE:
698 CONSTANTS_CACHE[key] = (
699 tuple(
700 choice
701 for choice in GLOBAL_CONSTANTS.set_for_type(choice_type)
702 if choice_permitted(choice, constraints)
703 ),
704 tuple(
705 choice
706 for choice in self._local_constants.set_for_type(choice_type)
707 if choice_permitted(choice, constraints)
708 ),
709 )
710
711 # split constants into two pools, so we still have a good chance to draw
712 # global constants even if there are many local constants.
713 (global_constants, local_constants) = CONSTANTS_CACHE[key]
714 constants_lists = ([global_constants] if global_constants else []) + (
715 [local_constants] if local_constants else []
716 )
717 if not constants_lists:
718 return None
719
720 # At this point, we've decided to use a constant. Now we select which pool
721 # to draw that constant from.
722 #
723 # Note that this approach has a different probability distribution than
724 # attempting a random.random for both global_constants and local_constants.
725 constants = self._random.choice(constants_lists)
726 return self._random.choice(constants)
727
728 def draw_boolean(
729 self,
730 p: float = 0.5,
731 ) -> bool:
732 assert self._random is not None
733
734 if p <= 0:
735 return False
736 if p >= 1:
737 return True
738
739 return self._random.random() < p
740
741 def draw_integer(
742 self,
743 min_value: Optional[int] = None,
744 max_value: Optional[int] = None,
745 *,
746 weights: Optional[dict[int, float]] = None,
747 shrink_towards: int = 0,
748 ) -> int:
749 assert self._cd is not None
750 if (
751 constant := self._maybe_draw_constant(
752 "integer",
753 {
754 "min_value": min_value,
755 "max_value": max_value,
756 "weights": weights,
757 "shrink_towards": shrink_towards,
758 },
759 )
760 ) is not None:
761 assert isinstance(constant, int)
762 return constant
763
764 center = 0
765 if min_value is not None:
766 center = max(min_value, center)
767 if max_value is not None:
768 center = min(max_value, center)
769
770 if weights is not None:
771 assert min_value is not None
772 assert max_value is not None
773
774 # format of weights is a mapping of ints to p, where sum(p) < 1.
775 # The remaining probability mass is uniformly distributed over
776 # *all* ints (not just the unmapped ones; this is somewhat undesirable,
777 # but simplifies things).
778 #
779 # We assert that sum(p) is strictly less than 1 because it simplifies
780 # handling forced values when we can force into the unmapped probability
781 # mass. We should eventually remove this restriction.
782 sampler = Sampler(
783 [1 - sum(weights.values()), *weights.values()], observe=False
784 )
785 # if we're forcing, it's easiest to force into the unmapped probability
786 # mass and then force the drawn value after.
787 idx = sampler.sample(self._cd)
788
789 if idx == 0:
790 return self._draw_bounded_integer(min_value, max_value)
791 # implicit reliance on dicts being sorted for determinism
792 return list(weights)[idx - 1]
793
794 if min_value is None and max_value is None:
795 return self._draw_unbounded_integer()
796
797 if min_value is None:
798 assert max_value is not None
799 probe = max_value + 1
800 while max_value < probe:
801 probe = center + self._draw_unbounded_integer()
802 return probe
803
804 if max_value is None:
805 assert min_value is not None
806 probe = min_value - 1
807 while probe < min_value:
808 probe = center + self._draw_unbounded_integer()
809 return probe
810
811 return self._draw_bounded_integer(min_value, max_value)
812
813 def draw_float(
814 self,
815 *,
816 min_value: float = -math.inf,
817 max_value: float = math.inf,
818 allow_nan: bool = True,
819 smallest_nonzero_magnitude: float,
820 ) -> float:
821 assert self._random is not None
822
823 constraints: FloatConstraints = {
824 "min_value": min_value,
825 "max_value": max_value,
826 "allow_nan": allow_nan,
827 "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
828 }
829 if (
830 constant := self._maybe_draw_constant("float", constraints, p=0.15)
831 ) is not None:
832 assert isinstance(constant, float)
833 return constant
834
835 # on top of the probability to draw a constant float, we independently
836 # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values.
837 weird_floats = [
838 f
839 for f in [
840 0.0,
841 -0.0,
842 math.inf,
843 -math.inf,
844 math.nan,
845 -math.nan,
846 SIGNALING_NAN,
847 -SIGNALING_NAN,
848 min_value,
849 next_up(min_value),
850 min_value + 1,
851 max_value - 1,
852 next_down(max_value),
853 max_value,
854 ]
855 if choice_permitted(f, constraints)
856 ]
857
858 if weird_floats and self._random.random() < 0.05:
859 return self._random.choice(weird_floats)
860
861 clamper = make_float_clamper(
862 min_value,
863 max_value,
864 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
865 allow_nan=allow_nan,
866 )
867
868 result = self._draw_float()
869 if allow_nan and math.isnan(result):
870 clamped = result # pragma: no cover
871 else:
872 clamped = clamper(result)
873 if float_to_int(clamped) != float_to_int(result) and not (
874 math.isnan(result) and allow_nan
875 ):
876 result = clamped
877 return result
878
879 def draw_string(
880 self,
881 intervals: IntervalSet,
882 *,
883 min_size: int = 0,
884 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
885 ) -> str:
886 assert self._cd is not None
887 assert self._random is not None
888
889 if len(intervals) == 0:
890 return ""
891
892 if (
893 constant := self._maybe_draw_constant(
894 "string",
895 {"intervals": intervals, "min_size": min_size, "max_size": max_size},
896 )
897 ) is not None:
898 assert isinstance(constant, str)
899 return constant
900
901 average_size = min(
902 max(min_size * 2, min_size + 5),
903 0.5 * (min_size + max_size),
904 )
905
906 chars = []
907 elements = many(
908 self._cd,
909 min_size=min_size,
910 max_size=max_size,
911 average_size=average_size,
912 observe=False,
913 )
914 while elements.more():
915 if len(intervals) > 256:
916 if self.draw_boolean(0.2):
917 i = self._random.randint(256, len(intervals) - 1)
918 else:
919 i = self._random.randint(0, 255)
920 else:
921 i = self._random.randint(0, len(intervals) - 1)
922
923 chars.append(intervals.char_in_shrink_order(i))
924
925 return "".join(chars)
926
927 def draw_bytes(
928 self,
929 min_size: int = 0,
930 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
931 ) -> bytes:
932 assert self._cd is not None
933 assert self._random is not None
934
935 if (
936 constant := self._maybe_draw_constant(
937 "bytes", {"min_size": min_size, "max_size": max_size}
938 )
939 ) is not None:
940 assert isinstance(constant, bytes)
941 return constant
942
943 buf = bytearray()
944 average_size = min(
945 max(min_size * 2, min_size + 5),
946 0.5 * (min_size + max_size),
947 )
948 elements = many(
949 self._cd,
950 min_size=min_size,
951 max_size=max_size,
952 average_size=average_size,
953 observe=False,
954 )
955 while elements.more():
956 buf += self._random.randbytes(1)
957
958 return bytes(buf)
959
960 def _draw_float(self) -> float:
961 assert self._random is not None
962
963 f = lex_to_float(self._random.getrandbits(64))
964 sign = 1 if self._random.getrandbits(1) else -1
965 return sign * f
966
967 def _draw_unbounded_integer(self) -> int:
968 assert self._cd is not None
969 assert self._random is not None
970
971 size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)]
972
973 r = self._random.getrandbits(size)
974 sign = r & 1
975 r >>= 1
976 if sign:
977 r = -r
978 return r
979
980 def _draw_bounded_integer(
981 self,
982 lower: int,
983 upper: int,
984 *,
985 vary_size: bool = True,
986 ) -> int:
987 assert lower <= upper
988 assert self._cd is not None
989 assert self._random is not None
990
991 if lower == upper:
992 return lower
993
994 bits = (upper - lower).bit_length()
995 if bits > 24 and vary_size and self._random.random() < 7 / 8:
996 # For large ranges, we combine the uniform random distribution
997 # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our
998 # choice of unicode characters is uniform but the 32bit distribution is not.
999 idx = INT_SIZES_SAMPLER.sample(self._cd)
1000 cap_bits = min(bits, INT_SIZES[idx])
1001 upper = min(upper, lower + 2**cap_bits - 1)
1002 return self._random.randint(lower, upper)
1003
1004 return self._random.randint(lower, upper)
1005
1006
1007# Masks for masking off the first byte of an n-bit buffer.
1008# The appropriate mask is stored at position n % 8.
1009BYTE_MASKS = [(1 << n) - 1 for n in range(8)]
1010BYTE_MASKS[0] = 255
1011
1012
1013class BytestringProvider(PrimitiveProvider):
1014 lifetime = "test_case"
1015
1016 def __init__(
1017 self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes
1018 ):
1019 super().__init__(conjecturedata)
1020 self.bytestring = bytestring
1021 self.index = 0
1022 self.drawn = bytearray()
1023
1024 def _draw_bits(self, n):
1025 if n == 0: # pragma: no cover
1026 return 0
1027 n_bytes = bits_to_bytes(n)
1028 if self.index + n_bytes > len(self.bytestring):
1029 self._cd.mark_overrun()
1030 buf = bytearray(self.bytestring[self.index : self.index + n_bytes])
1031 self.index += n_bytes
1032
1033 buf[0] &= BYTE_MASKS[n % 8]
1034 buf = bytes(buf)
1035 self.drawn += buf
1036 return int_from_bytes(buf)
1037
1038 def draw_boolean(
1039 self,
1040 p: float = 0.5,
1041 ) -> bool:
1042 if p <= 0:
1043 return False
1044 if p >= 1:
1045 return True
1046
1047 # always use one byte for booleans to maintain constant draw size.
1048 # If a probability requires more than 8 bits to represent precisely,
1049 # the result will be slightly biased, but not badly.
1050 bits = 8
1051 size = 2**bits
1052 # always leave at least one value that can be true, even for very small
1053 # p.
1054 falsey = max(1, math.floor(size * (1 - p)))
1055 n = self._draw_bits(bits)
1056 return n >= falsey
1057
1058 def draw_integer(
1059 self,
1060 min_value: Optional[int] = None,
1061 max_value: Optional[int] = None,
1062 *,
1063 weights: Optional[dict[int, float]] = None,
1064 shrink_towards: int = 0,
1065 ) -> int:
1066 assert self._cd is not None
1067
1068 # we explicitly ignore integer weights for now, as they are likely net
1069 # negative on fuzzer performance.
1070
1071 if min_value is None and max_value is None:
1072 min_value = -(2**127)
1073 max_value = 2**127 - 1
1074 elif min_value is None:
1075 assert max_value is not None
1076 min_value = max_value - 2**64
1077 elif max_value is None:
1078 assert min_value is not None
1079 max_value = min_value + 2**64
1080
1081 if min_value == max_value:
1082 return min_value
1083
1084 bits = (max_value - min_value).bit_length()
1085 value = self._draw_bits(bits)
1086 while not (min_value <= value <= max_value):
1087 value = self._draw_bits(bits)
1088 return value
1089
1090 def draw_float(
1091 self,
1092 *,
1093 min_value: float = -math.inf,
1094 max_value: float = math.inf,
1095 allow_nan: bool = True,
1096 smallest_nonzero_magnitude: float,
1097 ) -> float:
1098 n = self._draw_bits(64)
1099 sign = -1 if n >> 64 else 1
1100 f = sign * lex_to_float(n & ((1 << 64) - 1))
1101 clamper = make_float_clamper(
1102 min_value,
1103 max_value,
1104 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
1105 allow_nan=allow_nan,
1106 )
1107 return clamper(f)
1108
1109 def _draw_collection(self, min_size, max_size, *, alphabet_size):
1110 average_size = min(
1111 max(min_size * 2, min_size + 5),
1112 0.5 * (min_size + max_size),
1113 )
1114 elements = many(
1115 self._cd,
1116 min_size=min_size,
1117 max_size=max_size,
1118 average_size=average_size,
1119 observe=False,
1120 )
1121 values = []
1122 while elements.more():
1123 values.append(self.draw_integer(0, alphabet_size - 1))
1124 return values
1125
1126 def draw_string(
1127 self,
1128 intervals: IntervalSet,
1129 *,
1130 min_size: int = 0,
1131 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1132 ) -> str:
1133 values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals))
1134 return "".join(chr(intervals[v]) for v in values)
1135
1136 def draw_bytes(
1137 self,
1138 min_size: int = 0,
1139 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1140 ) -> bytes:
1141 values = self._draw_collection(min_size, max_size, alphabet_size=2**8)
1142 return bytes(values)
1143
1144
1145class URandom(Random):
1146 # we reimplement a Random instance instead of using SystemRandom, because
1147 # os.urandom is not guaranteed to read from /dev/urandom.
1148
1149 @staticmethod
1150 def _urandom(size: int) -> bytes:
1151 with open("/dev/urandom", "rb") as f:
1152 return f.read(size)
1153
1154 def getrandbits(self, k: int) -> int:
1155 assert k >= 0
1156 size = bits_to_bytes(k)
1157 n = int_from_bytes(self._urandom(size))
1158 # trim excess bits
1159 return n >> (size * 8 - k)
1160
1161 def random(self) -> float:
1162 # adapted from random.SystemRandom.random
1163 return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53)
1164
1165
1166class URandomProvider(HypothesisProvider):
1167 # A provider which reads directly from /dev/urandom as its source of randomness.
1168 # This provider exists to provide better Hypothesis integration with Antithesis
1169 # (https://antithesis.com/), which interprets calls to /dev/urandom as the
1170 # randomness to mutate. This effectively gives Antithesis control over
1171 # the choices made by the URandomProvider.
1172 #
1173 # If you are not using Antithesis, you probably don't want to use this
1174 # provider.
1175
1176 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
1177 super().__init__(conjecturedata)
1178 if WINDOWS: # pragma: no cover
1179 warnings.warn(
1180 "/dev/urandom is not available on windows. Falling back to "
1181 'standard PRNG generation (equivalent to backend="hypothesis").',
1182 HypothesisWarning,
1183 stacklevel=1,
1184 )
1185 # don't overwrite the HypothesisProvider self._random attribute in
1186 # this case
1187 else:
1188 self._random = URandom()