1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import ast
12import hashlib
13import inspect
14import math
15import sys
16from ast import Constant, Expr, NodeVisitor, UnaryOp, USub
17from collections.abc import Iterator, MutableSet
18from functools import lru_cache
19from itertools import chain
20from pathlib import Path
21from types import ModuleType
22from typing import TYPE_CHECKING, Optional, Union
23
24import hypothesis
25from hypothesis.configuration import storage_directory
26from hypothesis.internal.conjecture.choice import ChoiceTypeT
27from hypothesis.internal.escalation import is_hypothesis_file
28
29if TYPE_CHECKING:
30 from typing import TypeAlias
31
32ConstantT: "TypeAlias" = Union[int, float, bytes, str]
33
34# unfortunate collision with builtin. I don't want to name the init arg bytes_.
35bytesT = bytes
36
37
38class Constants:
39 def __init__(
40 self,
41 *,
42 integers: Optional[MutableSet[int]] = None,
43 floats: Optional[MutableSet[float]] = None,
44 bytes: Optional[MutableSet[bytes]] = None,
45 strings: Optional[MutableSet[str]] = None,
46 ):
47 self.integers: MutableSet[int] = set() if integers is None else integers
48 self.floats: MutableSet[float] = set() if floats is None else floats
49 self.bytes: MutableSet[bytesT] = set() if bytes is None else bytes
50 self.strings: MutableSet[str] = set() if strings is None else strings
51
52 def set_for_type(
53 self, constant_type: Union[type[ConstantT], ChoiceTypeT]
54 ) -> Union[MutableSet[int], MutableSet[float], MutableSet[bytes], MutableSet[str]]:
55 if constant_type is int or constant_type == "integer":
56 return self.integers
57 elif constant_type is float or constant_type == "float":
58 return self.floats
59 elif constant_type is bytes or constant_type == "bytes":
60 return self.bytes
61 elif constant_type is str or constant_type == "string":
62 return self.strings
63 raise ValueError(f"unknown constant_type {constant_type}")
64
65 def add(self, constant: ConstantT) -> None:
66 self.set_for_type(type(constant)).add(constant) # type: ignore
67
68 def __contains__(self, constant: ConstantT) -> bool:
69 return constant in self.set_for_type(type(constant))
70
71 def __or__(self, other: "Constants") -> "Constants":
72 return Constants(
73 integers=self.integers | other.integers, # type: ignore
74 floats=self.floats | other.floats, # type: ignore
75 bytes=self.bytes | other.bytes, # type: ignore
76 strings=self.strings | other.strings, # type: ignore
77 )
78
79 def __iter__(self) -> Iterator[ConstantT]:
80 return iter(chain(self.integers, self.floats, self.bytes, self.strings))
81
82 def __len__(self) -> int:
83 return (
84 len(self.integers) + len(self.floats) + len(self.bytes) + len(self.strings)
85 )
86
87 def __repr__(self) -> str:
88 return f"Constants({self.integers=}, {self.floats=}, {self.bytes=}, {self.strings=})"
89
90 def __eq__(self, other: object) -> bool:
91 if not isinstance(other, Constants):
92 return False
93 return (
94 self.integers == other.integers
95 and self.floats == other.floats
96 and self.bytes == other.bytes
97 and self.strings == other.strings
98 )
99
100
101class ConstantVisitor(NodeVisitor):
102 def __init__(self):
103 super().__init__()
104 self.constants = Constants()
105
106 def _add_constant(self, value: object) -> None:
107 if isinstance(value, str) and (
108 value.isspace()
109 or value == ""
110 # long strings are unlikely to be useful.
111 or len(value) > 20
112 ):
113 return
114 if isinstance(value, bytes) and (
115 value == b""
116 # long bytes seem plausibly more likely to be useful than long strings
117 # (e.g. AES-256 has a 32 byte key), but we still want to cap at some
118 # point to avoid performance issues.
119 or len(value) > 50
120 ):
121 return
122 if isinstance(value, bool):
123 return
124 if isinstance(value, float) and math.isinf(value):
125 # we already upweight inf.
126 return
127 if isinstance(value, int) and -100 < value < 100:
128 # we already upweight small integers.
129 return
130
131 if isinstance(value, (int, float, bytes, str)):
132 self.constants.add(value)
133 return
134
135 # I don't kow what case could go here, but am also not confident there
136 # isn't one.
137 return # pragma: no cover
138
139 def visit_UnaryOp(self, node: UnaryOp) -> None:
140 # `a = -1` is actually a combination of a USub and the constant 1.
141 if (
142 isinstance(node.op, USub)
143 and isinstance(node.operand, Constant)
144 and isinstance(node.operand.value, (int, float))
145 and not isinstance(node.operand.value, bool)
146 ):
147 self._add_constant(-node.operand.value)
148 # don't recurse on this node to avoid adding the positive variant
149 return
150
151 self.generic_visit(node)
152
153 def visit_Expr(self, node: Expr) -> None:
154 if isinstance(node.value, Constant) and isinstance(node.value.value, str):
155 return
156
157 self.generic_visit(node)
158
159 def visit_JoinedStr(self, node):
160 # dont recurse on JoinedStr, i.e. f strings. Constants that appear *only*
161 # in f strings are unlikely to be helpful.
162 return
163
164 def visit_Constant(self, node):
165 self._add_constant(node.value)
166 self.generic_visit(node)
167
168
169def _constants_from_source(source: Union[str, bytes]) -> Constants:
170 tree = ast.parse(source)
171 visitor = ConstantVisitor()
172 visitor.visit(tree)
173 return visitor.constants
174
175
176@lru_cache(4096)
177def constants_from_module(module: ModuleType) -> Constants:
178 try:
179 module_file = inspect.getsourcefile(module)
180 # use type: ignore because we know this might error
181 source_bytes = Path(module_file).read_bytes() # type: ignore
182 except Exception:
183 return Constants()
184
185 source_hash = hashlib.sha1(source_bytes).hexdigest()[:16]
186 cache_p = storage_directory("constants") / source_hash
187 try:
188 return _constants_from_source(cache_p.read_bytes())
189 except Exception:
190 # if the cached location doesn't exist, or it does exist but there was
191 # a problem reading it, fall back to standard computation of the constants
192 pass
193
194 try:
195 constants = _constants_from_source(source_bytes)
196 except Exception:
197 # A bunch of things can go wrong here.
198 # * ast.parse may fail on the source code
199 # * NodeVisitor may hit a RecursionError (see many related issues on
200 # e.g. libcst https://github.com/Instagram/LibCST/issues?q=recursion),
201 # or a MemoryError (`"[1, " * 200 + "]" * 200`)
202 return Constants()
203
204 try:
205 cache_p.parent.mkdir(parents=True, exist_ok=True)
206 cache_p.write_text(
207 f"# file: {module_file}\n# hypothesis_version: {hypothesis.__version__}\n\n"
208 # somewhat arbitrary sort order. The cache file doesn't *have* to be
209 # stable... but it is aesthetically pleasing, and means we could rely
210 # on it in the future!
211 + str(sorted(constants, key=lambda v: (str(type(v)), v))),
212 encoding="utf-8",
213 )
214 except Exception: # pragma: no cover
215 pass
216
217 return constants
218
219
220@lru_cache(4096)
221def is_local_module_file(path: str) -> bool:
222 from hypothesis.internal.scrutineer import ModuleLocation
223
224 return (
225 # Skip expensive path lookup for stdlib modules.
226 # This will cause false negatives if a user names their module the
227 # same as a stdlib module.
228 #
229 # sys.stdlib_module_names is new in 3.10
230 not (sys.version_info >= (3, 10) and path in sys.stdlib_module_names)
231 # A path containing site-packages is extremely likely to be
232 # ModuleLocation.SITE_PACKAGES. Skip the expensive path lookup here.
233 and "/site-packages/" not in path
234 and ModuleLocation.from_path(path) is ModuleLocation.LOCAL
235 # normally, hypothesis is a third-party library and is not returned
236 # by local_modules. However, if it is installed as an editable package
237 # with pip install -e, then we will pick up on it. Just hardcode an
238 # ignore here.
239 and not is_hypothesis_file(path)
240 # avoid collecting constants from test files
241 and not (
242 "test" in (p := Path(path)).parts
243 or "tests" in p.parts
244 or p.stem.startswith("test_")
245 or p.stem.endswith("_test")
246 )
247 )