Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/constants_ast.py: 38%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

128 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11import ast 

12import hashlib 

13import inspect 

14import math 

15import sys 

16from ast import Constant, Expr, NodeVisitor, UnaryOp, USub 

17from collections.abc import Iterator, MutableSet 

18from functools import lru_cache 

19from itertools import chain 

20from pathlib import Path 

21from types import ModuleType 

22from typing import TYPE_CHECKING, Optional, Union 

23 

24import hypothesis 

25from hypothesis.configuration import storage_directory 

26from hypothesis.internal.conjecture.choice import ChoiceTypeT 

27from hypothesis.internal.escalation import is_hypothesis_file 

28 

29if TYPE_CHECKING: 

30 from typing import TypeAlias 

31 

32ConstantT: "TypeAlias" = Union[int, float, bytes, str] 

33 

34# unfortunate collision with builtin. I don't want to name the init arg bytes_. 

35bytesT = bytes 

36 

37 

38class Constants: 

39 def __init__( 

40 self, 

41 *, 

42 integers: Optional[MutableSet[int]] = None, 

43 floats: Optional[MutableSet[float]] = None, 

44 bytes: Optional[MutableSet[bytes]] = None, 

45 strings: Optional[MutableSet[str]] = None, 

46 ): 

47 self.integers: MutableSet[int] = set() if integers is None else integers 

48 self.floats: MutableSet[float] = set() if floats is None else floats 

49 self.bytes: MutableSet[bytesT] = set() if bytes is None else bytes 

50 self.strings: MutableSet[str] = set() if strings is None else strings 

51 

52 def set_for_type( 

53 self, constant_type: Union[type[ConstantT], ChoiceTypeT] 

54 ) -> Union[MutableSet[int], MutableSet[float], MutableSet[bytes], MutableSet[str]]: 

55 if constant_type is int or constant_type == "integer": 

56 return self.integers 

57 elif constant_type is float or constant_type == "float": 

58 return self.floats 

59 elif constant_type is bytes or constant_type == "bytes": 

60 return self.bytes 

61 elif constant_type is str or constant_type == "string": 

62 return self.strings 

63 raise ValueError(f"unknown constant_type {constant_type}") 

64 

65 def add(self, constant: ConstantT) -> None: 

66 self.set_for_type(type(constant)).add(constant) # type: ignore 

67 

68 def __contains__(self, constant: ConstantT) -> bool: 

69 return constant in self.set_for_type(type(constant)) 

70 

71 def __or__(self, other: "Constants") -> "Constants": 

72 return Constants( 

73 integers=self.integers | other.integers, # type: ignore 

74 floats=self.floats | other.floats, # type: ignore 

75 bytes=self.bytes | other.bytes, # type: ignore 

76 strings=self.strings | other.strings, # type: ignore 

77 ) 

78 

79 def __iter__(self) -> Iterator[ConstantT]: 

80 return iter(chain(self.integers, self.floats, self.bytes, self.strings)) 

81 

82 def __len__(self) -> int: 

83 return ( 

84 len(self.integers) + len(self.floats) + len(self.bytes) + len(self.strings) 

85 ) 

86 

87 def __repr__(self) -> str: 

88 return f"Constants({self.integers=}, {self.floats=}, {self.bytes=}, {self.strings=})" 

89 

90 def __eq__(self, other: object) -> bool: 

91 if not isinstance(other, Constants): 

92 return False 

93 return ( 

94 self.integers == other.integers 

95 and self.floats == other.floats 

96 and self.bytes == other.bytes 

97 and self.strings == other.strings 

98 ) 

99 

100 

101class TooManyConstants(Exception): 

102 # a control flow exception which we raise in ConstantsVisitor when the 

103 # number of constants in a module gets too large. 

104 pass 

105 

106 

107class ConstantVisitor(NodeVisitor): 

108 CONSTANTS_LIMIT: int = 1024 

109 

110 def __init__(self, *, limit: bool): 

111 super().__init__() 

112 self.constants = Constants() 

113 self.limit = limit 

114 

115 def _add_constant(self, value: object) -> None: 

116 if self.limit and len(self.constants) >= self.CONSTANTS_LIMIT: 

117 raise TooManyConstants 

118 

119 if isinstance(value, str) and ( 

120 value.isspace() 

121 or value == "" 

122 # long strings are unlikely to be useful. 

123 or len(value) > 20 

124 ): 

125 return 

126 if isinstance(value, bytes) and ( 

127 value == b"" 

128 # long bytes seem plausibly more likely to be useful than long strings 

129 # (e.g. AES-256 has a 32 byte key), but we still want to cap at some 

130 # point to avoid performance issues. 

131 or len(value) > 50 

132 ): 

133 return 

134 if isinstance(value, bool): 

135 return 

136 if isinstance(value, float) and math.isinf(value): 

137 # we already upweight inf. 

138 return 

139 if isinstance(value, int) and -100 < value < 100: 

140 # we already upweight small integers. 

141 return 

142 

143 if isinstance(value, (int, float, bytes, str)): 

144 self.constants.add(value) 

145 return 

146 

147 # I don't kow what case could go here, but am also not confident there 

148 # isn't one. 

149 return # pragma: no cover 

150 

151 def visit_UnaryOp(self, node: UnaryOp) -> None: 

152 # `a = -1` is actually a combination of a USub and the constant 1. 

153 if ( 

154 isinstance(node.op, USub) 

155 and isinstance(node.operand, Constant) 

156 and isinstance(node.operand.value, (int, float)) 

157 and not isinstance(node.operand.value, bool) 

158 ): 

159 self._add_constant(-node.operand.value) 

160 # don't recurse on this node to avoid adding the positive variant 

161 return 

162 

163 self.generic_visit(node) 

164 

165 def visit_Expr(self, node: Expr) -> None: 

166 if isinstance(node.value, Constant) and isinstance(node.value.value, str): 

167 return 

168 

169 self.generic_visit(node) 

170 

171 def visit_JoinedStr(self, node): 

172 # dont recurse on JoinedStr, i.e. f strings. Constants that appear *only* 

173 # in f strings are unlikely to be helpful. 

174 return 

175 

176 def visit_Constant(self, node): 

177 self._add_constant(node.value) 

178 self.generic_visit(node) 

179 

180 

181def _constants_from_source(source: Union[str, bytes], *, limit: bool) -> Constants: 

182 tree = ast.parse(source) 

183 visitor = ConstantVisitor(limit=limit) 

184 

185 try: 

186 visitor.visit(tree) 

187 except TooManyConstants: 

188 # in the case of an incomplete collection, return nothing, to avoid 

189 # muddying caches etc. 

190 return Constants() 

191 

192 return visitor.constants 

193 

194 

195def _constants_file_str(constants: Constants) -> str: 

196 return str(sorted(constants, key=lambda v: (str(type(v)), v))) 

197 

198 

199@lru_cache(4096) 

200def constants_from_module(module: ModuleType, *, limit: bool = True) -> Constants: 

201 try: 

202 module_file = inspect.getsourcefile(module) 

203 # use type: ignore because we know this might error 

204 source_bytes = Path(module_file).read_bytes() # type: ignore 

205 except Exception: 

206 return Constants() 

207 

208 if limit and len(source_bytes) > 512 * 1024: 

209 # Skip files over 512kb. For reference, the largest source file 

210 # in Hypothesis is strategies/_internal/core.py at 107kb at time 

211 # of writing. 

212 return Constants() 

213 

214 source_hash = hashlib.sha1(source_bytes).hexdigest()[:16] 

215 # separate cache files for each limit param. see discussion in pull/4398 

216 cache_p = storage_directory("constants") / ( 

217 source_hash + ("" if limit else "_nolimit") 

218 ) 

219 try: 

220 return _constants_from_source(cache_p.read_bytes(), limit=limit) 

221 except Exception: 

222 # if the cached location doesn't exist, or it does exist but there was 

223 # a problem reading it, fall back to standard computation of the constants 

224 pass 

225 

226 try: 

227 constants = _constants_from_source(source_bytes, limit=limit) 

228 except Exception: 

229 # A bunch of things can go wrong here. 

230 # * ast.parse may fail on the source code 

231 # * NodeVisitor may hit a RecursionError (see many related issues on 

232 # e.g. libcst https://github.com/Instagram/LibCST/issues?q=recursion), 

233 # or a MemoryError (`"[1, " * 200 + "]" * 200`) 

234 return Constants() 

235 

236 try: 

237 cache_p.parent.mkdir(parents=True, exist_ok=True) 

238 cache_p.write_text( 

239 f"# file: {module_file}\n# hypothesis_version: {hypothesis.__version__}\n\n" 

240 # somewhat arbitrary sort order. The cache file doesn't *have* to be 

241 # stable... but it is aesthetically pleasing, and means we could rely 

242 # on it in the future! 

243 + _constants_file_str(constants), 

244 encoding="utf-8", 

245 ) 

246 except Exception: # pragma: no cover 

247 pass 

248 

249 return constants 

250 

251 

252@lru_cache(4096) 

253def is_local_module_file(path: str) -> bool: 

254 from hypothesis.internal.scrutineer import ModuleLocation 

255 

256 return ( 

257 # Skip expensive path lookup for stdlib modules. 

258 # This will cause false negatives if a user names their module the 

259 # same as a stdlib module. 

260 # 

261 # sys.stdlib_module_names is new in 3.10 

262 not (sys.version_info >= (3, 10) and path in sys.stdlib_module_names) 

263 # A path containing site-packages is extremely likely to be 

264 # ModuleLocation.SITE_PACKAGES. Skip the expensive path lookup here. 

265 and "/site-packages/" not in path 

266 and ModuleLocation.from_path(path) is ModuleLocation.LOCAL 

267 # normally, hypothesis is a third-party library and is not returned 

268 # by local_modules. However, if it is installed as an editable package 

269 # with pip install -e, then we will pick up on it. Just hardcode an 

270 # ignore here. 

271 and not is_hypothesis_file(path) 

272 # avoid collecting constants from test files 

273 and not ( 

274 "test" in (p := Path(path)).parts 

275 or "tests" in p.parts 

276 or p.stem.startswith("test_") 

277 or p.stem.endswith("_test") 

278 ) 

279 )