1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import abc
12import contextlib
13import math
14import sys
15import warnings
16from collections.abc import Iterable
17from contextlib import AbstractContextManager
18from functools import cached_property
19from random import Random
20from sys import float_info
21from types import ModuleType
22from typing import (
23 TYPE_CHECKING,
24 Any,
25 ClassVar,
26 Literal,
27 Optional,
28 TypedDict,
29 TypeVar,
30 Union,
31)
32
33from sortedcontainers import SortedSet
34
35from hypothesis.errors import HypothesisWarning
36from hypothesis.internal.cache import LRUCache
37from hypothesis.internal.compat import WINDOWS, int_from_bytes
38from hypothesis.internal.conjecture.choice import (
39 ChoiceConstraintsT,
40 ChoiceT,
41 ChoiceTypeT,
42 FloatConstraints,
43 choice_constraints_key,
44 choice_permitted,
45)
46from hypothesis.internal.conjecture.floats import lex_to_float
47from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes
48from hypothesis.internal.conjecture.utils import (
49 INT_SIZES,
50 INT_SIZES_SAMPLER,
51 Sampler,
52 many,
53)
54from hypothesis.internal.constants_ast import (
55 Constants,
56 constants_from_module,
57 is_local_module_file,
58)
59from hypothesis.internal.floats import (
60 SIGNALING_NAN,
61 float_to_int,
62 make_float_clamper,
63 next_down,
64 next_up,
65)
66from hypothesis.internal.intervalsets import IntervalSet
67from hypothesis.internal.observability import InfoObservationType, TestCaseObservation
68
69if TYPE_CHECKING:
70 from typing import TypeAlias
71
72 from hypothesis.internal.conjecture.data import ConjectureData
73 from hypothesis.internal.constants_ast import ConstantT
74
75T = TypeVar("T")
76LifetimeT: "TypeAlias" = Literal["test_case", "test_function"]
77COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
78
79
80#: Registered Hypothesis backends. This is a dictionary whose keys are the name
81#: to be used in |settings.backend|, and whose values are a string of the absolute
82#: importable path to a subclass of |PrimitiveProvider|, which Hypothesis will
83#: instantiate when your backend is requested by a test's |settings.backend| value.
84#:
85#: For example, the default Hypothesis backend is registered as:
86#:
87#: .. code-block:: python
88#:
89#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS
90#:
91#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider"
92#:
93#: And can be used with:
94#:
95#: .. code-block:: python
96#:
97#: from hypothesis import given, settings, strategies as st
98#:
99#: @given(st.integers())
100#: @settings(backend="hypothesis")
101#: def f(n):
102#: pass
103#:
104#: Though, as ``backend="hypothesis"`` is the default setting, the above would
105#: typically not have any effect.
106#:
107#: The purpose of mapping to an absolute importable path, rather than the actual
108#: |PrimitiveProvider| class, is to avoid slowing down Hypothesis startup times
109#: by only importing alternative backends when required.
110AVAILABLE_PROVIDERS = {
111 "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider",
112 "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider",
113}
114# cache the choice_permitted constants for a particular set of constraints.
115CacheKeyT: "TypeAlias" = tuple[ChoiceTypeT, tuple[Any, ...]]
116CacheValueT: "TypeAlias" = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]]
117CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024)
118
119_constant_floats = (
120 [
121 0.5,
122 1.1,
123 1.5,
124 1.9,
125 1.0 / 3,
126 10e6,
127 10e-6,
128 1.175494351e-38,
129 next_up(0.0),
130 float_info.min,
131 float_info.max,
132 3.402823466e38,
133 9007199254740992.0,
134 1 - 10e-6,
135 2 + 10e-6,
136 1.192092896e-07,
137 2.2204460492503131e-016,
138 ]
139 + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32
140 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64
141)
142_constant_floats.extend([-x for x in _constant_floats])
143assert all(isinstance(f, float) for f in _constant_floats)
144
145_constant_strings = {
146 # strings which can be interpreted as code / logic
147 "undefined",
148 "null",
149 "NULL",
150 "nil",
151 "NIL",
152 "true",
153 "false",
154 "True",
155 "False",
156 "TRUE",
157 "FALSE",
158 "None",
159 "none",
160 "if",
161 "then",
162 "else",
163 # strings which can be interpreted as a number
164 "0",
165 "1e100",
166 "0..0",
167 "0/0",
168 "1/0",
169 "+0.0",
170 "Infinity",
171 "-Infinity",
172 "Inf",
173 "INF",
174 "NaN",
175 "9" * 30,
176 # common ascii characters
177 ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~",
178 # common unicode characters
179 "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±",
180 # characters which increase in length when lowercased
181 "Ⱥ",
182 "Ⱦ",
183 # ligatures
184 "æœÆŒffʤʨß"
185 # emoticons
186 "(╯°□°)╯︵ ┻━┻)",
187 # emojis
188 "😍",
189 "🇺🇸",
190 # emoji modifiers
191 "🏻" # U+1F3FB Light Skin Tone,
192 "👍🏻", # 👍 followed by U+1F3FB
193 # RTL text
194 "الكل في المجمو عة",
195 # Ogham text, which contains the only character in the Space Separators
196 # unicode category (Zs) that isn't visually blank: . # noqa: RUF003
197 "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜",
198 # readable variations on text (bolt/italic/script)
199 "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠",
200 "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌",
201 "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈",
202 "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰",
203 "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘",
204 # upsidown text
205 "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥",
206 # reserved strings in windows
207 "NUL",
208 "COM1",
209 "LPT1",
210 # scunthorpe problem
211 "Scunthorpe",
212 # zalgo text
213 "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣",
214 #
215 # examples from https://faultlore.com/blah/text-hates-you/
216 "मनीष منش",
217 "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله",
218 "lorem لا بسم الله ipsum 你好1234你好",
219}
220
221
222# we don't actually care what order the constants are sorted in, just that the
223# ordering is deterministic.
224GLOBAL_CONSTANTS = Constants(
225 integers=SortedSet(),
226 floats=SortedSet(_constant_floats, key=float_to_int),
227 bytes=SortedSet(),
228 strings=SortedSet(_constant_strings),
229)
230
231_local_constants = Constants(
232 integers=SortedSet(),
233 floats=SortedSet(key=float_to_int),
234 bytes=SortedSet(),
235 strings=SortedSet(),
236)
237# modules that we've already seen and processed for local constants. These are
238# are all modules, not necessarily local ones. This lets us quickly see which
239# modules are new without an expensive path.resolve() or is_local_module_file
240# cache lookup.
241_seen_modules: set[ModuleType] = set()
242_sys_modules_len: Optional[int] = None
243
244
245def _get_local_constants() -> Constants:
246 global _sys_modules_len, _local_constants
247
248 if sys.platform == "emscripten": # pragma: no cover
249 # pyodide builds bundle the stdlib in a nonstandard location, like
250 # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of
251 # the stdlib as local code and slowing down on emscripten, instead return
252 # that nothing is local.
253 #
254 # pyodide may provide some way to distinguish stdlib/third-party/local
255 # code. I haven't looked into it. If they do, we should correctly implement
256 # ModuleLocation for pyodide instead of this.
257 return _local_constants
258
259 count_constants = len(_local_constants)
260 # We call this function once per HypothesisProvider instance, i.e. once per
261 # input, so it needs to be performant. The logic here is more complicated
262 # than necessary because of this.
263 #
264 # First, we check whether there are any new modules with a very cheap length
265 # check. This check can be fooled if a module is added while another module is
266 # removed, but the more correct check against tuple(sys.modules.keys()) is
267 # substantially more expensive. Such a new module would eventually be discovered
268 # if / when the length changes again in the future.
269 #
270 # If the length has changed, we find just modules we haven't seen before. Of
271 # those, we find the ones which correspond to local modules, and extract their
272 # constants.
273
274 # careful: store sys.modules length when we first check to avoid race conditions
275 # with other threads loading a module before we set _sys_modules_len.
276 if (sys_modules_len := len(sys.modules)) != _sys_modules_len:
277 # set(_seen_modules) shouldn't typically be required, but I have run into
278 # a "set changed size during iteration" error here when running
279 # test_provider_conformance_crosshair.
280 new_modules = set(sys.modules.values()) - set(_seen_modules)
281 # Repeated SortedSet unions are expensive. Do the initial unions on a
282 # set(), then do a one-time union with _local_constants after.
283 new_constants = Constants()
284 for module in new_modules:
285 if (
286 module_file := getattr(module, "__file__", None)
287 ) is not None and is_local_module_file(module_file):
288 new_constants |= constants_from_module(module)
289 _local_constants |= new_constants
290 _seen_modules.update(new_modules)
291 _sys_modules_len = sys_modules_len
292
293 # if we add any new constant, invalidate the constant cache for permitted values.
294 # A more efficient approach would be invalidating just the keys with this
295 # choice_type.
296 if len(_local_constants) > count_constants:
297 CONSTANTS_CACHE.cache.clear()
298
299 return _local_constants
300
301
302class _BackendInfoMsg(TypedDict):
303 type: InfoObservationType
304 title: str
305 content: Union[str, dict[str, Any]]
306
307
308# TODO_DOCS: link to choice sequence explanation page
309
310
311class PrimitiveProvider(abc.ABC):
312 """
313 |PrimitiveProvider| is the implementation interface of a
314 :ref:`Hypothesis backend <alternative-backends>`.
315
316 A |PrimitiveProvider| is required to implement the following five
317 ``draw_*`` methods:
318
319 * |PrimitiveProvider.draw_integer|
320 * |PrimitiveProvider.draw_boolean|
321 * |PrimitiveProvider.draw_float|
322 * |PrimitiveProvider.draw_string|
323 * |PrimitiveProvider.draw_bytes|
324
325 Each strategy in Hypothesis generates values by drawing a series of choices
326 from these five methods. By overriding them, a |PrimitiveProvider| can control
327 the distribution of inputs generated by Hypothesis.
328
329 For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider|
330 which uses an SMT solver to generate inputs that uncover new branches.
331
332 Once you implement a |PrimitiveProvider|, you can make it available for use
333 through |AVAILABLE_PROVIDERS|.
334 """
335
336 #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function``
337 #: or ``test_case``.
338 #:
339 #: If ``test_function`` (the default), a single provider instance will be
340 #: instantiated and used for the entirety of each test function (i.e., roughly
341 #: one provider per |@given| annotation). This can be useful for tracking state
342 #: over the entirety of a test function.
343 #:
344 #: If ``test_case``, a new provider instance will be instantiated and used for
345 #: each input Hypothesis generates.
346 #:
347 #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will
348 #: be ``None`` for a lifetime of ``test_function``, and an instance of
349 #: ``ConjectureData`` for a lifetime of ``test_case``.
350 #:
351 #: Third-party providers likely want to set a lifetime of ``test_function``.
352 lifetime: ClassVar[LifetimeT] = "test_function"
353
354 #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values
355 #: which record operations performed on them in order to discover new paths.
356 #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting
357 #: with symbolic choices returned by the provider in any way that would force
358 #: the solver to narrow the range of possible values for that symbolic.
359 #:
360 #: Setting this to ``True`` disables some hypothesis features and optimizations.
361 #: Only set this to ``True`` if it is necessary for your backend.
362 avoid_realization: ClassVar[bool] = False
363
364 #: If ``True``, |PrimitiveProvider.on_observation| will be added as a
365 #: callback to |TESTCASE_CALLBACKS|, enabling observability during the lifetime
366 #: of this provider. If ``False``, |PrimitiveProvider.on_observation| will
367 #: never be called by Hypothesis.
368 #:
369 #: The opt-in behavior of observability is because enabling observability
370 #: might increase runtime or memory usage.
371 add_observability_callback: ClassVar[bool] = False
372
373 def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
374 self._cd = conjecturedata
375
376 @abc.abstractmethod
377 def draw_boolean(
378 self,
379 p: float = 0.5,
380 ) -> bool:
381 """
382 Draw a boolean choice.
383
384 Parameters
385 ----------
386 p: float
387 The probability of returning ``True``. Between 0 and 1 inclusive.
388
389 Except for ``0`` and ``1``, the value of ``p`` is a hint provided by
390 Hypothesis, and may be ignored by the backend.
391
392 If ``0``, the provider must return ``False``. If ``1``, the provider
393 must return ``True``.
394 """
395 raise NotImplementedError
396
397 @abc.abstractmethod
398 def draw_integer(
399 self,
400 min_value: Optional[int] = None,
401 max_value: Optional[int] = None,
402 *,
403 weights: Optional[dict[int, float]] = None,
404 shrink_towards: int = 0,
405 ) -> int:
406 """
407 Draw an integer choice.
408
409 Parameters
410 ----------
411 min_value : int | None
412 (Inclusive) lower bound on the integer value. If ``None``, there is
413 no lower bound.
414 max_value : int | None
415 (Inclusive) upper bound on the integer value. If ``None``, there is
416 no upper bound.
417 weights: dict[int, float] | None
418 Maps keys in the range [``min_value``, ``max_value``] to the probability
419 of returning that key.
420 shrink_towards: int
421 The integer to shrink towards. This is not used during generation and
422 can be ignored by backends.
423 """
424 raise NotImplementedError
425
426 @abc.abstractmethod
427 def draw_float(
428 self,
429 *,
430 min_value: float = -math.inf,
431 max_value: float = math.inf,
432 allow_nan: bool = True,
433 smallest_nonzero_magnitude: float,
434 ) -> float:
435 """
436 Draw a float choice.
437
438 Parameters
439 ----------
440 min_value : float
441 (Inclusive) lower bound on the float value.
442 max_value : float
443 (Inclusive) upper bound on the float value.
444 allow_nan : bool
445 If ``False``, it is invalid to return ``math.nan``.
446 smallest_nonzero_magnitude : float
447 The smallest allowed nonzero magnitude. ``draw_float`` should not
448 return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``.
449 """
450 raise NotImplementedError
451
452 @abc.abstractmethod
453 def draw_string(
454 self,
455 intervals: IntervalSet,
456 *,
457 min_size: int = 0,
458 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
459 ) -> str:
460 """
461 Draw a string choice.
462
463 Parameters
464 ----------
465 intervals : IntervalSet
466 The set of codepoints to sample from.
467 min_size : int
468 (Inclusive) lower bound on the string length.
469 max_size : int
470 (Inclusive) upper bound on the string length.
471 """
472 raise NotImplementedError
473
474 @abc.abstractmethod
475 def draw_bytes(
476 self,
477 min_size: int = 0,
478 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
479 ) -> bytes:
480 """
481 Draw a bytes choice.
482
483 Parameters
484 ----------
485 min_size : int
486 (Inclusive) lower bound on the bytes length.
487 max_size : int
488 (Inclusive) upper bound on the bytes length.
489 """
490 raise NotImplementedError
491
492 def per_test_case_context_manager(self) -> AbstractContextManager:
493 """
494 Returns a context manager which will be entered each time Hypothesis
495 starts generating and executing one test case, and exited when that test
496 case finishes generating and executing, including if any exception is
497 thrown.
498
499 In the lifecycle of a Hypothesis test, this is called before
500 generating strategy values for each test case. This is just before any
501 :ref:`custom executor <custom-function-execution>` is called.
502
503 Even if not returning a custom context manager, |PrimitiveProvider|
504 subclasses are welcome to override this method to know when Hypothesis
505 starts and ends the execution of a single test case.
506 """
507 return contextlib.nullcontext()
508
509 def realize(self, value: T, *, for_failure: bool = False) -> T:
510 """
511 Called whenever hypothesis requires a concrete (non-symbolic) value from
512 a potentially symbolic value. Hypothesis will not check that ``value`` is
513 symbolic before calling ``realize``, so you should handle the case where
514 ``value`` is non-symbolic.
515
516 The returned value should be non-symbolic. If you cannot provide a value,
517 raise |BackendCannotProceed| with a value of ``"discard_test_case"``.
518
519 If ``for_failure`` is ``True``, the value is associated with a failing example.
520 In this case, the backend should spend substantially more effort when
521 attempting to realize the value, since it is important to avoid discarding
522 failing examples. Backends may still raise |BackendCannotProceed| when
523 ``for_failure`` is ``True``, if realization is truly impossible or if
524 realization takes significantly longer than expected (say, 5 minutes).
525 """
526 return value
527
528 def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None:
529 """
530 Called when Hypothesis has discovered a choice sequence which the provider
531 may wish to enqueue to replay under its own instrumentation when we next
532 ask to generate a test case, rather than generating one from scratch.
533
534 This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus
535 of high-code-coverage inputs discovered by
536 `HypoFuzz <https://hypofuzz.com/>`_.
537 """
538 return None
539
540 def observe_test_case(self) -> dict[str, Any]:
541 """Called at the end of the test case when :ref:`observability
542 <observability>` is enabled.
543
544 The return value should be a non-symbolic json-encodable dictionary,
545 and will be included in observations as ``observation["metadata"]["backend"]``.
546 """
547 return {}
548
549 def observe_information_messages(
550 self, *, lifetime: LifetimeT
551 ) -> Iterable[_BackendInfoMsg]:
552 """Called at the end of each test case and again at end of the test function.
553
554 Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}``
555 dictionaries to be delivered as individual information messages. Hypothesis
556 adds the ``run_start`` timestamp and ``property`` name for you.
557 """
558 assert lifetime in ("test_case", "test_function")
559 yield from []
560
561 def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027
562 """
563 Called at the end of each test case which uses this provider, with the same
564 ``observation["type"] == "test_case"`` observation that is passed to
565 other callbacks in |TESTCASE_CALLBACKS|. This method is not called with
566 ``observation["type"] in {"info", "alert", "error"}`` observations.
567
568 .. important::
569
570 For |PrimitiveProvider.on_observation| to be called by Hypothesis,
571 |PrimitiveProvider.add_observability_callback| must be set to ``True``.
572
573 |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling
574 observability might increase runtime or memory usage.
575
576 Calls to this method are guaranteed to alternate with calls to
577 |PrimitiveProvider.per_test_case_context_manager|. For example:
578
579 .. code-block:: python
580
581 # test function starts
582 per_test_case_context_manager()
583 on_observation()
584 per_test_case_context_manager()
585 on_observation()
586 ...
587 # test function ends
588
589 Note that |PrimitiveProvider.on_observation| will not be called for test
590 cases which did not use this provider during generation, for example
591 during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched
592 to the standard Hypothesis backend after this backend raised too many
593 |BackendCannotProceed| exceptions.
594 """
595
596 def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop
597 """Marks the beginning of a semantically meaningful span of choices.
598
599 Spans are a depth-first tree structure. A span is opened by a call to
600 |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end|
601 closes the most recently opened span. So the following sequence of calls:
602
603 .. code-block:: python
604
605 span_start(label=1)
606 n1 = draw_integer()
607 span_start(label=2)
608 b1 = draw_boolean()
609 n2 = draw_integer()
610 span_end()
611 f1 = draw_float()
612 span_end()
613
614 produces the following two spans of choices:
615
616 .. code-block::
617
618 1: [n1, b1, n2, f1]
619 2: [b1, n2]
620
621 Hypothesis uses spans to denote "semantically meaningful" sequences of
622 choices. For instance, Hypothesis opens a span for the sequence of choices
623 made while drawing from each strategy. Not every span corresponds to a
624 strategy; the generation of e.g. each element in |st.lists| is also marked
625 with a span, among others.
626
627 ``label`` is an opaque integer, which has no defined semantics.
628 The only guarantee made by Hypothesis is that all spans with the same
629 "meaning" will share the same ``label``. So all spans from the same
630 strategy will share the same label, as will e.g. the spans for |st.lists|
631 elements.
632
633 Providers can track calls to |PrimitiveProvider.span_start| and
634 |PrimitiveProvider.span_end| to learn something about the semantics of
635 the test's choice sequence. For instance, a provider could track the depth
636 of the span tree, or the number of unique labels, which says something about
637 the complexity of the choices being generated. Or a provider could track
638 the span tree across test cases in order to determine what strategies are
639 being used in what contexts.
640
641 It is possible for Hypothesis to start and immediately stop a span,
642 without calling a ``draw_*`` method in between. These spans contain zero
643 choices.
644
645 Hypothesis will always balance the number of calls to
646 |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call
647 to |PrimitiveProvider.span_start| will always be followed by a call to
648 |PrimitiveProvider.span_end| before the end of the test case.
649
650 |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()``
651 internally.
652 """
653
654 def span_end(self, discard: bool, /) -> None: # noqa: B027, FBT001
655 """Marks the end of a semantically meaningful span of choices.
656
657 ``discard`` is ``True`` when the draw was filtered out or otherwise marked
658 as unlikely to contribute to the input data as seen by the user's test.
659 Note however that side effects can make this determination unsound.
660
661 |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()``
662 internally.
663 """
664
665
666class HypothesisProvider(PrimitiveProvider):
667 lifetime = "test_case"
668
669 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
670 super().__init__(conjecturedata)
671 self._random = None if self._cd is None else self._cd._random
672
673 @cached_property
674 def _local_constants(self):
675 # defer computation of local constants until/if we need it
676 return _get_local_constants()
677
678 def _maybe_draw_constant(
679 self,
680 choice_type: ChoiceTypeT,
681 constraints: ChoiceConstraintsT,
682 *,
683 p: float = 0.05,
684 ) -> Optional["ConstantT"]:
685 assert self._random is not None
686 assert choice_type != "boolean"
687 # check whether we even want a constant before spending time computing
688 # and caching the allowed constants.
689 if self._random.random() > p:
690 return None
691
692 # note: this property access results in computation being done
693 assert self._local_constants is not None
694
695 key = (choice_type, choice_constraints_key(choice_type, constraints))
696 if key not in CONSTANTS_CACHE:
697 CONSTANTS_CACHE[key] = (
698 tuple(
699 choice
700 for choice in GLOBAL_CONSTANTS.set_for_type(choice_type)
701 if choice_permitted(choice, constraints)
702 ),
703 tuple(
704 choice
705 for choice in self._local_constants.set_for_type(choice_type)
706 if choice_permitted(choice, constraints)
707 ),
708 )
709
710 # split constants into two pools, so we still have a good chance to draw
711 # global constants even if there are many local constants.
712 (global_constants, local_constants) = CONSTANTS_CACHE[key]
713 constants_lists = ([global_constants] if global_constants else []) + (
714 [local_constants] if local_constants else []
715 )
716 if not constants_lists:
717 return None
718
719 # At this point, we've decided to use a constant. Now we select which pool
720 # to draw that constant from.
721 #
722 # Note that this approach has a different probability distribution than
723 # attempting a random.random for both global_constants and local_constants.
724 constants = self._random.choice(constants_lists)
725 return self._random.choice(constants)
726
727 def draw_boolean(
728 self,
729 p: float = 0.5,
730 ) -> bool:
731 assert self._random is not None
732
733 if p <= 0:
734 return False
735 if p >= 1:
736 return True
737
738 return self._random.random() < p
739
740 def draw_integer(
741 self,
742 min_value: Optional[int] = None,
743 max_value: Optional[int] = None,
744 *,
745 weights: Optional[dict[int, float]] = None,
746 shrink_towards: int = 0,
747 ) -> int:
748 assert self._cd is not None
749 if (
750 constant := self._maybe_draw_constant(
751 "integer",
752 {
753 "min_value": min_value,
754 "max_value": max_value,
755 "weights": weights,
756 "shrink_towards": shrink_towards,
757 },
758 )
759 ) is not None:
760 assert isinstance(constant, int)
761 return constant
762
763 center = 0
764 if min_value is not None:
765 center = max(min_value, center)
766 if max_value is not None:
767 center = min(max_value, center)
768
769 if weights is not None:
770 assert min_value is not None
771 assert max_value is not None
772
773 # format of weights is a mapping of ints to p, where sum(p) < 1.
774 # The remaining probability mass is uniformly distributed over
775 # *all* ints (not just the unmapped ones; this is somewhat undesirable,
776 # but simplifies things).
777 #
778 # We assert that sum(p) is strictly less than 1 because it simplifies
779 # handling forced values when we can force into the unmapped probability
780 # mass. We should eventually remove this restriction.
781 sampler = Sampler(
782 [1 - sum(weights.values()), *weights.values()], observe=False
783 )
784 # if we're forcing, it's easiest to force into the unmapped probability
785 # mass and then force the drawn value after.
786 idx = sampler.sample(self._cd)
787
788 if idx == 0:
789 return self._draw_bounded_integer(min_value, max_value)
790 # implicit reliance on dicts being sorted for determinism
791 return list(weights)[idx - 1]
792
793 if min_value is None and max_value is None:
794 return self._draw_unbounded_integer()
795
796 if min_value is None:
797 assert max_value is not None
798 probe = max_value + 1
799 while max_value < probe:
800 probe = center + self._draw_unbounded_integer()
801 return probe
802
803 if max_value is None:
804 assert min_value is not None
805 probe = min_value - 1
806 while probe < min_value:
807 probe = center + self._draw_unbounded_integer()
808 return probe
809
810 return self._draw_bounded_integer(min_value, max_value)
811
812 def draw_float(
813 self,
814 *,
815 min_value: float = -math.inf,
816 max_value: float = math.inf,
817 allow_nan: bool = True,
818 smallest_nonzero_magnitude: float,
819 ) -> float:
820 assert self._random is not None
821
822 constraints: FloatConstraints = {
823 "min_value": min_value,
824 "max_value": max_value,
825 "allow_nan": allow_nan,
826 "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
827 }
828 if (
829 constant := self._maybe_draw_constant("float", constraints, p=0.15)
830 ) is not None:
831 assert isinstance(constant, float)
832 return constant
833
834 # on top of the probability to draw a constant float, we independently
835 # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values.
836 weird_floats = [
837 f
838 for f in [
839 0.0,
840 -0.0,
841 math.inf,
842 -math.inf,
843 math.nan,
844 -math.nan,
845 SIGNALING_NAN,
846 -SIGNALING_NAN,
847 min_value,
848 next_up(min_value),
849 min_value + 1,
850 max_value - 1,
851 next_down(max_value),
852 max_value,
853 ]
854 if choice_permitted(f, constraints)
855 ]
856
857 if weird_floats and self._random.random() < 0.05:
858 return self._random.choice(weird_floats)
859
860 clamper = make_float_clamper(
861 min_value,
862 max_value,
863 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
864 allow_nan=allow_nan,
865 )
866
867 result = self._draw_float()
868 if allow_nan and math.isnan(result):
869 clamped = result # pragma: no cover
870 else:
871 clamped = clamper(result)
872 if float_to_int(clamped) != float_to_int(result) and not (
873 math.isnan(result) and allow_nan
874 ):
875 result = clamped
876 return result
877
878 def draw_string(
879 self,
880 intervals: IntervalSet,
881 *,
882 min_size: int = 0,
883 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
884 ) -> str:
885 assert self._cd is not None
886 assert self._random is not None
887
888 if len(intervals) == 0:
889 return ""
890
891 if (
892 constant := self._maybe_draw_constant(
893 "string",
894 {"intervals": intervals, "min_size": min_size, "max_size": max_size},
895 )
896 ) is not None:
897 assert isinstance(constant, str)
898 return constant
899
900 average_size = min(
901 max(min_size * 2, min_size + 5),
902 0.5 * (min_size + max_size),
903 )
904
905 chars = []
906 elements = many(
907 self._cd,
908 min_size=min_size,
909 max_size=max_size,
910 average_size=average_size,
911 observe=False,
912 )
913 while elements.more():
914 if len(intervals) > 256:
915 if self.draw_boolean(0.2):
916 i = self._random.randint(256, len(intervals) - 1)
917 else:
918 i = self._random.randint(0, 255)
919 else:
920 i = self._random.randint(0, len(intervals) - 1)
921
922 chars.append(intervals.char_in_shrink_order(i))
923
924 return "".join(chars)
925
926 def draw_bytes(
927 self,
928 min_size: int = 0,
929 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
930 ) -> bytes:
931 assert self._cd is not None
932 assert self._random is not None
933
934 if (
935 constant := self._maybe_draw_constant(
936 "bytes", {"min_size": min_size, "max_size": max_size}
937 )
938 ) is not None:
939 assert isinstance(constant, bytes)
940 return constant
941
942 buf = bytearray()
943 average_size = min(
944 max(min_size * 2, min_size + 5),
945 0.5 * (min_size + max_size),
946 )
947 elements = many(
948 self._cd,
949 min_size=min_size,
950 max_size=max_size,
951 average_size=average_size,
952 observe=False,
953 )
954 while elements.more():
955 buf += self._random.randbytes(1)
956
957 return bytes(buf)
958
959 def _draw_float(self) -> float:
960 assert self._random is not None
961
962 f = lex_to_float(self._random.getrandbits(64))
963 sign = 1 if self._random.getrandbits(1) else -1
964 return sign * f
965
966 def _draw_unbounded_integer(self) -> int:
967 assert self._cd is not None
968 assert self._random is not None
969
970 size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)]
971
972 r = self._random.getrandbits(size)
973 sign = r & 1
974 r >>= 1
975 if sign:
976 r = -r
977 return r
978
979 def _draw_bounded_integer(
980 self,
981 lower: int,
982 upper: int,
983 *,
984 vary_size: bool = True,
985 ) -> int:
986 assert lower <= upper
987 assert self._cd is not None
988 assert self._random is not None
989
990 if lower == upper:
991 return lower
992
993 bits = (upper - lower).bit_length()
994 if bits > 24 and vary_size and self._random.random() < 7 / 8:
995 # For large ranges, we combine the uniform random distribution
996 # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our
997 # choice of unicode characters is uniform but the 32bit distribution is not.
998 idx = INT_SIZES_SAMPLER.sample(self._cd)
999 cap_bits = min(bits, INT_SIZES[idx])
1000 upper = min(upper, lower + 2**cap_bits - 1)
1001 return self._random.randint(lower, upper)
1002
1003 return self._random.randint(lower, upper)
1004
1005
1006# Masks for masking off the first byte of an n-bit buffer.
1007# The appropriate mask is stored at position n % 8.
1008BYTE_MASKS = [(1 << n) - 1 for n in range(8)]
1009BYTE_MASKS[0] = 255
1010
1011
1012class BytestringProvider(PrimitiveProvider):
1013 lifetime = "test_case"
1014
1015 def __init__(
1016 self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes
1017 ):
1018 super().__init__(conjecturedata)
1019 self.bytestring = bytestring
1020 self.index = 0
1021 self.drawn = bytearray()
1022
1023 def _draw_bits(self, n):
1024 if n == 0: # pragma: no cover
1025 return 0
1026 n_bytes = bits_to_bytes(n)
1027 if self.index + n_bytes > len(self.bytestring):
1028 self._cd.mark_overrun()
1029 buf = bytearray(self.bytestring[self.index : self.index + n_bytes])
1030 self.index += n_bytes
1031
1032 buf[0] &= BYTE_MASKS[n % 8]
1033 buf = bytes(buf)
1034 self.drawn += buf
1035 return int_from_bytes(buf)
1036
1037 def draw_boolean(
1038 self,
1039 p: float = 0.5,
1040 ) -> bool:
1041 if p <= 0:
1042 return False
1043 if p >= 1:
1044 return True
1045
1046 # always use one byte for booleans to maintain constant draw size.
1047 # If a probability requires more than 8 bits to represent precisely,
1048 # the result will be slightly biased, but not badly.
1049 bits = 8
1050 size = 2**bits
1051 # always leave at least one value that can be true, even for very small
1052 # p.
1053 falsey = max(1, math.floor(size * (1 - p)))
1054 n = self._draw_bits(bits)
1055 return n >= falsey
1056
1057 def draw_integer(
1058 self,
1059 min_value: Optional[int] = None,
1060 max_value: Optional[int] = None,
1061 *,
1062 weights: Optional[dict[int, float]] = None,
1063 shrink_towards: int = 0,
1064 ) -> int:
1065 assert self._cd is not None
1066
1067 # we explicitly ignore integer weights for now, as they are likely net
1068 # negative on fuzzer performance.
1069
1070 if min_value is None and max_value is None:
1071 min_value = -(2**127)
1072 max_value = 2**127 - 1
1073 elif min_value is None:
1074 assert max_value is not None
1075 min_value = max_value - 2**64
1076 elif max_value is None:
1077 assert min_value is not None
1078 max_value = min_value + 2**64
1079
1080 if min_value == max_value:
1081 return min_value
1082
1083 bits = (max_value - min_value).bit_length()
1084 value = self._draw_bits(bits)
1085 while not (min_value <= value <= max_value):
1086 value = self._draw_bits(bits)
1087 return value
1088
1089 def draw_float(
1090 self,
1091 *,
1092 min_value: float = -math.inf,
1093 max_value: float = math.inf,
1094 allow_nan: bool = True,
1095 smallest_nonzero_magnitude: float,
1096 ) -> float:
1097 n = self._draw_bits(64)
1098 sign = -1 if n >> 64 else 1
1099 f = sign * lex_to_float(n & ((1 << 64) - 1))
1100 clamper = make_float_clamper(
1101 min_value,
1102 max_value,
1103 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
1104 allow_nan=allow_nan,
1105 )
1106 return clamper(f)
1107
1108 def _draw_collection(self, min_size, max_size, *, alphabet_size):
1109 average_size = min(
1110 max(min_size * 2, min_size + 5),
1111 0.5 * (min_size + max_size),
1112 )
1113 elements = many(
1114 self._cd,
1115 min_size=min_size,
1116 max_size=max_size,
1117 average_size=average_size,
1118 observe=False,
1119 )
1120 values = []
1121 while elements.more():
1122 values.append(self.draw_integer(0, alphabet_size - 1))
1123 return values
1124
1125 def draw_string(
1126 self,
1127 intervals: IntervalSet,
1128 *,
1129 min_size: int = 0,
1130 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1131 ) -> str:
1132 values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals))
1133 return "".join(chr(intervals[v]) for v in values)
1134
1135 def draw_bytes(
1136 self,
1137 min_size: int = 0,
1138 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1139 ) -> bytes:
1140 values = self._draw_collection(min_size, max_size, alphabet_size=2**8)
1141 return bytes(values)
1142
1143
1144class URandom(Random):
1145 # we reimplement a Random instance instead of using SystemRandom, because
1146 # os.urandom is not guaranteed to read from /dev/urandom.
1147
1148 @staticmethod
1149 def _urandom(size: int) -> bytes:
1150 with open("/dev/urandom", "rb") as f:
1151 return f.read(size)
1152
1153 def getrandbits(self, k: int) -> int:
1154 assert k >= 0
1155 size = bits_to_bytes(k)
1156 n = int_from_bytes(self._urandom(size))
1157 # trim excess bits
1158 return n >> (size * 8 - k)
1159
1160 def random(self) -> float:
1161 # adapted from random.SystemRandom.random
1162 return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53)
1163
1164
1165class URandomProvider(HypothesisProvider):
1166 # A provider which reads directly from /dev/urandom as its source of randomness.
1167 # This provider exists to provide better Hypothesis integration with Antithesis
1168 # (https://antithesis.com/), which interprets calls to /dev/urandom as the
1169 # randomness to mutate. This effectively gives Antithesis control over
1170 # the choices made by the URandomProvider.
1171 #
1172 # If you are not using Antithesis, you probably don't want to use this
1173 # provider.
1174
1175 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
1176 super().__init__(conjecturedata)
1177 if WINDOWS: # pragma: no cover
1178 warnings.warn(
1179 "/dev/urandom is not available on windows. Falling back to "
1180 'standard PRNG generation (equivalent to backend="hypothesis").',
1181 HypothesisWarning,
1182 stacklevel=1,
1183 )
1184 # don't overwrite the HypothesisProvider self._random attribute in
1185 # this case
1186 else:
1187 self._random = URandom()