Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/hypothesis/internal/constants_ast.py: 39%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

114 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11import ast 

12import hashlib 

13import inspect 

14import math 

15import sys 

16from ast import Constant, Expr, NodeVisitor, UnaryOp, USub 

17from collections.abc import Iterator, MutableSet 

18from functools import lru_cache 

19from itertools import chain 

20from pathlib import Path 

21from types import ModuleType 

22from typing import TYPE_CHECKING, Optional, Union 

23 

24import hypothesis 

25from hypothesis.configuration import storage_directory 

26from hypothesis.internal.conjecture.choice import ChoiceTypeT 

27from hypothesis.internal.escalation import is_hypothesis_file 

28 

29if TYPE_CHECKING: 

30 from typing import TypeAlias 

31 

32ConstantT: "TypeAlias" = Union[int, float, bytes, str] 

33 

34# unfortunate collision with builtin. I don't want to name the init arg bytes_. 

35bytesT = bytes 

36 

37 

38class Constants: 

39 def __init__( 

40 self, 

41 *, 

42 integers: Optional[MutableSet[int]] = None, 

43 floats: Optional[MutableSet[float]] = None, 

44 bytes: Optional[MutableSet[bytes]] = None, 

45 strings: Optional[MutableSet[str]] = None, 

46 ): 

47 self.integers: MutableSet[int] = set() if integers is None else integers 

48 self.floats: MutableSet[float] = set() if floats is None else floats 

49 self.bytes: MutableSet[bytesT] = set() if bytes is None else bytes 

50 self.strings: MutableSet[str] = set() if strings is None else strings 

51 

52 def set_for_type( 

53 self, constant_type: Union[type[ConstantT], ChoiceTypeT] 

54 ) -> Union[MutableSet[int], MutableSet[float], MutableSet[bytes], MutableSet[str]]: 

55 if constant_type is int or constant_type == "integer": 

56 return self.integers 

57 elif constant_type is float or constant_type == "float": 

58 return self.floats 

59 elif constant_type is bytes or constant_type == "bytes": 

60 return self.bytes 

61 elif constant_type is str or constant_type == "string": 

62 return self.strings 

63 raise ValueError(f"unknown constant_type {constant_type}") 

64 

65 def add(self, constant: ConstantT) -> None: 

66 self.set_for_type(type(constant)).add(constant) # type: ignore 

67 

68 def __contains__(self, constant: ConstantT) -> bool: 

69 return constant in self.set_for_type(type(constant)) 

70 

71 def __or__(self, other: "Constants") -> "Constants": 

72 return Constants( 

73 integers=self.integers | other.integers, # type: ignore 

74 floats=self.floats | other.floats, # type: ignore 

75 bytes=self.bytes | other.bytes, # type: ignore 

76 strings=self.strings | other.strings, # type: ignore 

77 ) 

78 

79 def __iter__(self) -> Iterator[ConstantT]: 

80 return iter(chain(self.integers, self.floats, self.bytes, self.strings)) 

81 

82 def __len__(self) -> int: 

83 return ( 

84 len(self.integers) + len(self.floats) + len(self.bytes) + len(self.strings) 

85 ) 

86 

87 def __repr__(self) -> str: 

88 return f"Constants({self.integers=}, {self.floats=}, {self.bytes=}, {self.strings=})" 

89 

90 def __eq__(self, other: object) -> bool: 

91 if not isinstance(other, Constants): 

92 return False 

93 return ( 

94 self.integers == other.integers 

95 and self.floats == other.floats 

96 and self.bytes == other.bytes 

97 and self.strings == other.strings 

98 ) 

99 

100 

101class ConstantVisitor(NodeVisitor): 

102 def __init__(self): 

103 super().__init__() 

104 self.constants = Constants() 

105 

106 def _add_constant(self, value: object) -> None: 

107 if isinstance(value, str) and ( 

108 value.isspace() 

109 or value == "" 

110 # long strings are unlikely to be useful. 

111 or len(value) > 20 

112 ): 

113 return 

114 if isinstance(value, bytes) and ( 

115 value == b"" 

116 # long bytes seem plausibly more likely to be useful than long strings 

117 # (e.g. AES-256 has a 32 byte key), but we still want to cap at some 

118 # point to avoid performance issues. 

119 or len(value) > 50 

120 ): 

121 return 

122 if isinstance(value, bool): 

123 return 

124 if isinstance(value, float) and math.isinf(value): 

125 # we already upweight inf. 

126 return 

127 if isinstance(value, int) and -100 < value < 100: 

128 # we already upweight small integers. 

129 return 

130 

131 if isinstance(value, (int, float, bytes, str)): 

132 self.constants.add(value) 

133 return 

134 

135 # I don't kow what case could go here, but am also not confident there 

136 # isn't one. 

137 return # pragma: no cover 

138 

139 def visit_UnaryOp(self, node: UnaryOp) -> None: 

140 # `a = -1` is actually a combination of a USub and the constant 1. 

141 if ( 

142 isinstance(node.op, USub) 

143 and isinstance(node.operand, Constant) 

144 and isinstance(node.operand.value, (int, float)) 

145 and not isinstance(node.operand.value, bool) 

146 ): 

147 self._add_constant(-node.operand.value) 

148 # don't recurse on this node to avoid adding the positive variant 

149 return 

150 

151 self.generic_visit(node) 

152 

153 def visit_Expr(self, node: Expr) -> None: 

154 if isinstance(node.value, Constant) and isinstance(node.value.value, str): 

155 return 

156 

157 self.generic_visit(node) 

158 

159 def visit_JoinedStr(self, node): 

160 # dont recurse on JoinedStr, i.e. f strings. Constants that appear *only* 

161 # in f strings are unlikely to be helpful. 

162 return 

163 

164 def visit_Constant(self, node): 

165 self._add_constant(node.value) 

166 self.generic_visit(node) 

167 

168 

169def _constants_from_source(source: Union[str, bytes]) -> Constants: 

170 tree = ast.parse(source) 

171 visitor = ConstantVisitor() 

172 visitor.visit(tree) 

173 return visitor.constants 

174 

175 

176@lru_cache(4096) 

177def constants_from_module(module: ModuleType) -> Constants: 

178 try: 

179 module_file = inspect.getsourcefile(module) 

180 # use type: ignore because we know this might error 

181 source_bytes = Path(module_file).read_bytes() # type: ignore 

182 except Exception: 

183 return Constants() 

184 

185 source_hash = hashlib.sha1(source_bytes).hexdigest()[:16] 

186 cache_p = storage_directory("constants") / source_hash 

187 try: 

188 return _constants_from_source(cache_p.read_bytes()) 

189 except Exception: 

190 # if the cached location doesn't exist, or it does exist but there was 

191 # a problem reading it, fall back to standard computation of the constants 

192 pass 

193 

194 try: 

195 constants = _constants_from_source(source_bytes) 

196 except Exception: 

197 # A bunch of things can go wrong here. 

198 # * ast.parse may fail on the source code 

199 # * NodeVisitor may hit a RecursionError (see many related issues on 

200 # e.g. libcst https://github.com/Instagram/LibCST/issues?q=recursion), 

201 # or a MemoryError (`"[1, " * 200 + "]" * 200`) 

202 return Constants() 

203 

204 try: 

205 cache_p.parent.mkdir(parents=True, exist_ok=True) 

206 cache_p.write_text( 

207 f"# file: {module_file}\n# hypothesis_version: {hypothesis.__version__}\n\n" 

208 # somewhat arbitrary sort order. The cache file doesn't *have* to be 

209 # stable... but it is aesthetically pleasing, and means we could rely 

210 # on it in the future! 

211 + str(sorted(constants, key=lambda v: (str(type(v)), v))), 

212 encoding="utf-8", 

213 ) 

214 except Exception: # pragma: no cover 

215 pass 

216 

217 return constants 

218 

219 

220@lru_cache(4096) 

221def is_local_module_file(path: str) -> bool: 

222 from hypothesis.internal.scrutineer import ModuleLocation 

223 

224 return ( 

225 # Skip expensive path lookup for stdlib modules. 

226 # This will cause false negatives if a user names their module the 

227 # same as a stdlib module. 

228 # 

229 # sys.stdlib_module_names is new in 3.10 

230 not (sys.version_info >= (3, 10) and path in sys.stdlib_module_names) 

231 # A path containing site-packages is extremely likely to be 

232 # ModuleLocation.SITE_PACKAGES. Skip the expensive path lookup here. 

233 and "/site-packages/" not in path 

234 and ModuleLocation.from_path(path) is ModuleLocation.LOCAL 

235 # normally, hypothesis is a third-party library and is not returned 

236 # by local_modules. However, if it is installed as an editable package 

237 # with pip install -e, then we will pick up on it. Just hardcode an 

238 # ignore here. 

239 and not is_hypothesis_file(path) 

240 # avoid collecting constants from test files 

241 and not ( 

242 "test" in (p := Path(path)).parts 

243 or "tests" in p.parts 

244 or p.stem.startswith("test_") 

245 or p.stem.endswith("_test") 

246 ) 

247 )