Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/constants_ast.py: 38%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

126 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11import ast 

12import hashlib 

13import inspect 

14import math 

15import sys 

16from ast import Constant, Expr, NodeVisitor, UnaryOp, USub 

17from collections.abc import Iterator, MutableSet 

18from functools import lru_cache 

19from itertools import chain 

20from pathlib import Path 

21from types import ModuleType 

22from typing import TypeAlias 

23 

24import hypothesis 

25from hypothesis.configuration import storage_directory 

26from hypothesis.internal.conjecture.choice import ChoiceTypeT 

27from hypothesis.internal.escalation import is_hypothesis_file 

28 

29ConstantT: TypeAlias = int | float | bytes | str 

30 

31# unfortunate collision with builtin. I don't want to name the init arg bytes_. 

32bytesT = bytes 

33 

34 

35class Constants: 

36 def __init__( 

37 self, 

38 *, 

39 integers: MutableSet[int] | None = None, 

40 floats: MutableSet[float] | None = None, 

41 bytes: MutableSet[bytes] | None = None, 

42 strings: MutableSet[str] | None = None, 

43 ): 

44 self.integers: MutableSet[int] = set() if integers is None else integers 

45 self.floats: MutableSet[float] = set() if floats is None else floats 

46 self.bytes: MutableSet[bytesT] = set() if bytes is None else bytes 

47 self.strings: MutableSet[str] = set() if strings is None else strings 

48 

49 def set_for_type( 

50 self, constant_type: type[ConstantT] | ChoiceTypeT 

51 ) -> MutableSet[int] | MutableSet[float] | MutableSet[bytes] | MutableSet[str]: 

52 if constant_type is int or constant_type == "integer": 

53 return self.integers 

54 elif constant_type is float or constant_type == "float": 

55 return self.floats 

56 elif constant_type is bytes or constant_type == "bytes": 

57 return self.bytes 

58 elif constant_type is str or constant_type == "string": 

59 return self.strings 

60 raise ValueError(f"unknown constant_type {constant_type}") 

61 

62 def add(self, constant: ConstantT) -> None: 

63 self.set_for_type(type(constant)).add(constant) # type: ignore 

64 

65 def __contains__(self, constant: ConstantT) -> bool: 

66 return constant in self.set_for_type(type(constant)) 

67 

68 def __or__(self, other: "Constants") -> "Constants": 

69 return Constants( 

70 integers=self.integers | other.integers, # type: ignore 

71 floats=self.floats | other.floats, # type: ignore 

72 bytes=self.bytes | other.bytes, # type: ignore 

73 strings=self.strings | other.strings, # type: ignore 

74 ) 

75 

76 def __iter__(self) -> Iterator[ConstantT]: 

77 return iter(chain(self.integers, self.floats, self.bytes, self.strings)) 

78 

79 def __len__(self) -> int: 

80 return ( 

81 len(self.integers) + len(self.floats) + len(self.bytes) + len(self.strings) 

82 ) 

83 

84 def __repr__(self) -> str: 

85 return f"Constants({self.integers=}, {self.floats=}, {self.bytes=}, {self.strings=})" 

86 

87 def __eq__(self, other: object) -> bool: 

88 if not isinstance(other, Constants): 

89 return False 

90 return ( 

91 self.integers == other.integers 

92 and self.floats == other.floats 

93 and self.bytes == other.bytes 

94 and self.strings == other.strings 

95 ) 

96 

97 

98class TooManyConstants(Exception): 

99 # a control flow exception which we raise in ConstantsVisitor when the 

100 # number of constants in a module gets too large. 

101 pass 

102 

103 

104class ConstantVisitor(NodeVisitor): 

105 CONSTANTS_LIMIT: int = 1024 

106 

107 def __init__(self, *, limit: bool): 

108 super().__init__() 

109 self.constants = Constants() 

110 self.limit = limit 

111 

112 def _add_constant(self, value: object) -> None: 

113 if self.limit and len(self.constants) >= self.CONSTANTS_LIMIT: 

114 raise TooManyConstants 

115 

116 if isinstance(value, str) and ( 

117 value.isspace() 

118 or value == "" 

119 # long strings are unlikely to be useful. 

120 or len(value) > 20 

121 ): 

122 return 

123 if isinstance(value, bytes) and ( 

124 value == b"" 

125 # long bytes seem plausibly more likely to be useful than long strings 

126 # (e.g. AES-256 has a 32 byte key), but we still want to cap at some 

127 # point to avoid performance issues. 

128 or len(value) > 50 

129 ): 

130 return 

131 if isinstance(value, bool): 

132 return 

133 if isinstance(value, float) and math.isinf(value): 

134 # we already upweight inf. 

135 return 

136 if isinstance(value, int) and -100 < value < 100: 

137 # we already upweight small integers. 

138 return 

139 

140 if isinstance(value, (int, float, bytes, str)): 

141 self.constants.add(value) 

142 return 

143 

144 # I don't kow what case could go here, but am also not confident there 

145 # isn't one. 

146 return # pragma: no cover 

147 

148 def visit_UnaryOp(self, node: UnaryOp) -> None: 

149 # `a = -1` is actually a combination of a USub and the constant 1. 

150 if ( 

151 isinstance(node.op, USub) 

152 and isinstance(node.operand, Constant) 

153 and isinstance(node.operand.value, (int, float)) 

154 and not isinstance(node.operand.value, bool) 

155 ): 

156 self._add_constant(-node.operand.value) 

157 # don't recurse on this node to avoid adding the positive variant 

158 return 

159 

160 self.generic_visit(node) 

161 

162 def visit_Expr(self, node: Expr) -> None: 

163 if isinstance(node.value, Constant) and isinstance(node.value.value, str): 

164 return 

165 

166 self.generic_visit(node) 

167 

168 def visit_JoinedStr(self, node): 

169 # dont recurse on JoinedStr, i.e. f strings. Constants that appear *only* 

170 # in f strings are unlikely to be helpful. 

171 return 

172 

173 def visit_Constant(self, node): 

174 self._add_constant(node.value) 

175 self.generic_visit(node) 

176 

177 

178def _constants_from_source(source: str | bytes, *, limit: bool) -> Constants: 

179 tree = ast.parse(source) 

180 visitor = ConstantVisitor(limit=limit) 

181 

182 try: 

183 visitor.visit(tree) 

184 except TooManyConstants: 

185 # in the case of an incomplete collection, return nothing, to avoid 

186 # muddying caches etc. 

187 return Constants() 

188 

189 return visitor.constants 

190 

191 

192def _constants_file_str(constants: Constants) -> str: 

193 return str(sorted(constants, key=lambda v: (str(type(v)), v))) 

194 

195 

196@lru_cache(4096) 

197def constants_from_module(module: ModuleType, *, limit: bool = True) -> Constants: 

198 try: 

199 module_file = inspect.getsourcefile(module) 

200 # use type: ignore because we know this might error 

201 source_bytes = Path(module_file).read_bytes() # type: ignore 

202 except Exception: 

203 return Constants() 

204 

205 if limit and len(source_bytes) > 512 * 1024: 

206 # Skip files over 512kb. For reference, the largest source file 

207 # in Hypothesis is strategies/_internal/core.py at 107kb at time 

208 # of writing. 

209 return Constants() 

210 

211 source_hash = hashlib.sha1(source_bytes).hexdigest()[:16] 

212 # separate cache files for each limit param. see discussion in pull/4398 

213 cache_p = storage_directory("constants") / ( 

214 source_hash + ("" if limit else "_nolimit") 

215 ) 

216 try: 

217 return _constants_from_source(cache_p.read_bytes(), limit=limit) 

218 except Exception: 

219 # if the cached location doesn't exist, or it does exist but there was 

220 # a problem reading it, fall back to standard computation of the constants 

221 pass 

222 

223 try: 

224 constants = _constants_from_source(source_bytes, limit=limit) 

225 except Exception: 

226 # A bunch of things can go wrong here. 

227 # * ast.parse may fail on the source code 

228 # * NodeVisitor may hit a RecursionError (see many related issues on 

229 # e.g. libcst https://github.com/Instagram/LibCST/issues?q=recursion), 

230 # or a MemoryError (`"[1, " * 200 + "]" * 200`) 

231 return Constants() 

232 

233 try: 

234 cache_p.parent.mkdir(parents=True, exist_ok=True) 

235 cache_p.write_text( 

236 f"# file: {module_file}\n# hypothesis_version: {hypothesis.__version__}\n\n" 

237 # somewhat arbitrary sort order. The cache file doesn't *have* to be 

238 # stable... but it is aesthetically pleasing, and means we could rely 

239 # on it in the future! 

240 + _constants_file_str(constants), 

241 encoding="utf-8", 

242 ) 

243 except Exception: # pragma: no cover 

244 pass 

245 

246 return constants 

247 

248 

249@lru_cache(4096) 

250def is_local_module_file(path: str) -> bool: 

251 from hypothesis.internal.scrutineer import ModuleLocation 

252 

253 return ( 

254 # Skip expensive path lookup for stdlib modules. 

255 # This will cause false negatives if a user names their module the 

256 # same as a stdlib module. 

257 path not in sys.stdlib_module_names 

258 # A path containing site-packages is extremely likely to be 

259 # ModuleLocation.SITE_PACKAGES. Skip the expensive path lookup here. 

260 and "/site-packages/" not in path 

261 and ModuleLocation.from_path(path) is ModuleLocation.LOCAL 

262 # normally, hypothesis is a third-party library and is not returned 

263 # by local_modules. However, if it is installed as an editable package 

264 # with pip install -e, then we will pick up on it. Just hardcode an 

265 # ignore here. 

266 and not is_hypothesis_file(path) 

267 # avoid collecting constants from test files 

268 and not ( 

269 "test" in (p := Path(path)).parts 

270 or "tests" in p.parts 

271 or p.stem.startswith("test_") 

272 or p.stem.endswith("_test") 

273 ) 

274 )