Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/hypothesis/internal/conjecture/pareto.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

148 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11from collections.abc import Iterator 

12from enum import Enum 

13from random import Random 

14from typing import TYPE_CHECKING, Callable, Optional, Union 

15 

16from sortedcontainers import SortedList 

17 

18from hypothesis.internal.conjecture.choice import choices_key 

19from hypothesis.internal.conjecture.data import ( 

20 ConjectureData, 

21 ConjectureResult, 

22 Status, 

23 _Overrun, 

24) 

25from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy 

26from hypothesis.internal.conjecture.shrinker import sort_key 

27 

28NO_SCORE = float("-inf") 

29 

30if TYPE_CHECKING: 

31 from hypothesis.internal.conjecture.engine import ConjectureRunner 

32 

33 

34class DominanceRelation(Enum): 

35 NO_DOMINANCE = 0 

36 EQUAL = 1 

37 LEFT_DOMINATES = 2 

38 RIGHT_DOMINATES = 3 

39 

40 

41def dominance(left: ConjectureResult, right: ConjectureResult) -> DominanceRelation: 

42 """Returns the dominance relation between ``left`` and ``right``, according 

43 to the rules that one ConjectureResult dominates another if and only if it 

44 is better in every way. 

45 

46 The things we currently consider to be "better" are: 

47 

48 * Something that is smaller in shrinking order is better. 

49 * Something that has higher status is better. 

50 * Each ``interesting_origin`` is treated as its own score, so if two 

51 interesting examples have different origins then neither dominates 

52 the other. 

53 * For each target observation, a higher score is better. 

54 

55 In "normal" operation where there are no bugs or target observations, the 

56 pareto front only has one element (the smallest valid test case), but for 

57 more structured or failing tests it can be useful to track, and future work 

58 will depend on it more.""" 

59 

60 left_key = sort_key(left.nodes) 

61 right_key = sort_key(right.nodes) 

62 if left_key == right_key: 

63 return DominanceRelation.EQUAL 

64 

65 if right_key < left_key: 

66 result = dominance(left=right, right=left) 

67 if result == DominanceRelation.LEFT_DOMINATES: 

68 return DominanceRelation.RIGHT_DOMINATES 

69 else: 

70 # Because we have sort_key(left) < sort_key(right) the only options 

71 # are that right is better than left or that the two are 

72 # incomparable. 

73 assert result == DominanceRelation.NO_DOMINANCE 

74 return result 

75 

76 # Either left is better or there is no dominance relationship. 

77 assert left_key < right_key 

78 

79 # The right is more interesting 

80 if left.status < right.status: 

81 return DominanceRelation.NO_DOMINANCE 

82 

83 if not right.tags.issubset(left.tags): 

84 return DominanceRelation.NO_DOMINANCE 

85 

86 # Things that are interesting for different reasons are incomparable in 

87 # the dominance relationship. 

88 if ( 

89 left.status == Status.INTERESTING 

90 and left.interesting_origin != right.interesting_origin 

91 ): 

92 return DominanceRelation.NO_DOMINANCE 

93 

94 for target in set(left.target_observations) | set(right.target_observations): 

95 left_score = left.target_observations.get(target, NO_SCORE) 

96 right_score = right.target_observations.get(target, NO_SCORE) 

97 if right_score > left_score: 

98 return DominanceRelation.NO_DOMINANCE 

99 

100 return DominanceRelation.LEFT_DOMINATES 

101 

102 

103class ParetoFront: 

104 """Maintains an approximate pareto front of ConjectureData objects. That 

105 is, we try to maintain a collection of objects such that no element of the 

106 collection is pareto dominated by any other. In practice we don't quite 

107 manage that, because doing so is computationally very expensive. Instead 

108 we maintain a random sample of data objects that are "rarely" dominated by 

109 any other element of the collection (roughly, no more than about 10%). 

110 

111 Only valid test cases are considered to belong to the pareto front - any 

112 test case with a status less than valid is discarded. 

113 

114 Note that the pareto front is potentially quite large, and currently this 

115 will store the entire front in memory. This is bounded by the number of 

116 valid examples we run, which is max_examples in normal execution, and 

117 currently we do not support workflows with large max_examples which have 

118 large values of max_examples very well anyway, so this isn't a major issue. 

119 In future we may weish to implement some sort of paging out to disk so that 

120 we can work with larger fronts. 

121 

122 Additionally, because this is only an approximate pareto front, there are 

123 scenarios where it can be much larger than the actual pareto front. There 

124 isn't a huge amount we can do about this - checking an exact pareto front 

125 is intrinsically quadratic. 

126 

127 "Most" of the time we should be relatively close to the true pareto front, 

128 say within an order of magnitude, but it's not hard to construct scenarios 

129 where this is not the case. e.g. suppose we enumerate all valid test cases 

130 in increasing shortlex order as s_1, ..., s_n, ... and have scores f and 

131 g such that f(s_i) = min(i, N) and g(s_i) = 1 if i >= N, then the pareto 

132 front is the set {s_1, ..., S_N}, but the only element of the front that 

133 will dominate s_i when i > N is S_N, which we select with probability 

134 1 / N. A better data structure could solve this, but at the cost of more 

135 expensive operations and higher per element memory use, so we'll wait to 

136 see how much of a problem this is in practice before we try that. 

137 """ 

138 

139 def __init__(self, random: Random) -> None: 

140 self.__random = random 

141 self.__eviction_listeners: list[Callable[[ConjectureResult], None]] = [] 

142 

143 self.front: SortedList[ConjectureResult] = SortedList( 

144 key=lambda d: sort_key(d.nodes) 

145 ) 

146 self.__pending: Optional[ConjectureResult] = None 

147 

148 def add(self, data: Union[ConjectureData, ConjectureResult, _Overrun]) -> bool: 

149 """Attempts to add ``data`` to the pareto front. Returns True if 

150 ``data`` is now in the front, including if data is already in the 

151 collection, and False otherwise""" 

152 if data.status < Status.VALID: 

153 return False 

154 

155 assert not isinstance(data, _Overrun) 

156 data = data.as_result() 

157 assert not isinstance(data, _Overrun) 

158 

159 if not self.front: 

160 self.front.add(data) 

161 return True 

162 

163 if data in self.front: 

164 return True 

165 

166 # We add data to the pareto front by adding it unconditionally and then 

167 # doing a certain amount of randomized "clear down" - testing a random 

168 # set of elements (currently 10) to see if they are dominated by 

169 # something else in the collection. If they are, we remove them. 

170 self.front.add(data) 

171 assert self.__pending is None 

172 try: 

173 self.__pending = data 

174 

175 # We maintain a set of the current exact pareto front of the 

176 # values we've sampled so far. When we sample a new element we 

177 # either add it to this exact pareto front or remove it from the 

178 # collection entirely. 

179 front = LazySequenceCopy(self.front) 

180 

181 # We track which values we are going to remove and remove them all 

182 # at the end so the shape of the front doesn't change while we're 

183 # using it. 

184 to_remove: list[ConjectureResult] = [] 

185 

186 # We now iteratively sample elements from the approximate pareto 

187 # front to check whether they should be retained. When the set of 

188 # dominators gets too large we have sampled at least 10 elements 

189 # and it gets too expensive to continue, so we consider that enough 

190 # due diligence. 

191 i = self.front.index(data) 

192 

193 # First we attempt to look for values that must be removed by the 

194 # addition of the data. These are necessarily to the right of it 

195 # in the list. 

196 

197 failures = 0 

198 while i + 1 < len(front) and failures < 10: 

199 j = self.__random.randrange(i + 1, len(front)) 

200 candidate = front.pop(j) 

201 dom = dominance(data, candidate) 

202 assert dom != DominanceRelation.RIGHT_DOMINATES 

203 if dom == DominanceRelation.LEFT_DOMINATES: 

204 to_remove.append(candidate) 

205 failures = 0 

206 else: 

207 failures += 1 

208 

209 # Now we look at the points up to where we put data in to see if 

210 # it is dominated. While we're here we spend some time looking for 

211 # anything else that might be dominated too, compacting down parts 

212 # of the list. 

213 

214 dominators = [data] 

215 

216 while i >= 0 and len(dominators) < 10: 

217 front.swap(i, self.__random.randint(0, i)) 

218 

219 candidate = front[i] 

220 

221 already_replaced = False 

222 j = 0 

223 while j < len(dominators): 

224 v = dominators[j] 

225 

226 dom = dominance(candidate, v) 

227 if dom == DominanceRelation.LEFT_DOMINATES: 

228 if not already_replaced: 

229 already_replaced = True 

230 dominators[j] = candidate 

231 j += 1 

232 else: 

233 dominators[j], dominators[-1] = ( 

234 dominators[-1], 

235 dominators[j], 

236 ) 

237 dominators.pop() 

238 to_remove.append(v) 

239 elif dom == DominanceRelation.RIGHT_DOMINATES: 

240 to_remove.append(candidate) 

241 break 

242 elif dom == DominanceRelation.EQUAL: 

243 break 

244 else: 

245 j += 1 

246 else: 

247 dominators.append(candidate) 

248 i -= 1 

249 

250 for v in to_remove: 

251 self._remove(v) 

252 return data in self.front 

253 finally: 

254 self.__pending = None 

255 

256 def on_evict(self, f: Callable[[ConjectureResult], None]) -> None: 

257 """Register a listener function that will be called with data when it 

258 gets removed from the front because something else dominates it.""" 

259 self.__eviction_listeners.append(f) 

260 

261 def __contains__(self, data: object) -> bool: 

262 return isinstance(data, (ConjectureData, ConjectureResult)) and ( 

263 data.as_result() in self.front 

264 ) 

265 

266 def __iter__(self) -> Iterator[ConjectureResult]: 

267 return iter(self.front) 

268 

269 def __getitem__(self, i: int) -> ConjectureResult: 

270 return self.front[i] 

271 

272 def __len__(self) -> int: 

273 return len(self.front) 

274 

275 def _remove(self, data: ConjectureResult) -> None: 

276 try: 

277 self.front.remove(data) 

278 except ValueError: 

279 return 

280 if data is not self.__pending: 

281 for f in self.__eviction_listeners: 

282 f(data) 

283 

284 

285class ParetoOptimiser: 

286 """Class for managing optimisation of the pareto front. That is, given the 

287 current best known pareto front, this class runs an optimisation process 

288 that attempts to bring it closer to the actual pareto front. 

289 

290 Currently this is fairly basic and only handles pareto optimisation that 

291 works by reducing the test case in the shortlex order. We expect it will 

292 grow more powerful over time. 

293 """ 

294 

295 def __init__(self, engine: "ConjectureRunner") -> None: 

296 self.__engine = engine 

297 assert self.__engine.pareto_front is not None 

298 self.front: ParetoFront = self.__engine.pareto_front 

299 

300 def run(self) -> None: 

301 seen = set() 

302 

303 # We iterate backwards through the pareto front, using the shrinker to 

304 # (hopefully) replace each example with a smaller one. Note that it's 

305 # important that we start from the end for two reasons: Firstly, by 

306 # doing it this way we ensure that any new front members we discover 

307 # during optimisation will also get optimised (because they will be 

308 # inserted into the part of the front that we haven't visited yet), 

309 # and secondly we generally expect that we will not finish this process 

310 # in a single run, because it's relatively expensive in terms of our 

311 # example budget, and by starting from the end we ensure that each time 

312 # we run the tests we improve the pareto front because we work on the 

313 # bits that we haven't covered yet. 

314 i = len(self.front) - 1 

315 prev = None 

316 while i >= 0 and not self.__engine.interesting_examples: 

317 assert self.front 

318 i = min(i, len(self.front) - 1) 

319 target = self.front[i] 

320 if choices_key(target.choices) in seen: 

321 i -= 1 

322 continue 

323 assert target is not prev 

324 prev = target 

325 

326 def allow_transition(source, destination): 

327 """Shrink to data that strictly pareto dominates the current 

328 best value we've seen, which is the current target of the 

329 shrinker. 

330 

331 Note that during shrinking we may discover other smaller 

332 examples that this function will reject and will get added to 

333 the front. This is fine, because they will be processed on 

334 later iterations of this loop.""" 

335 if dominance(destination, source) == DominanceRelation.LEFT_DOMINATES: 

336 # If ``destination`` dominates ``source`` then ``source`` 

337 # must be dominated in the front - either ``destination`` is in 

338 # the front, or it was not added to it because it was 

339 # dominated by something in it. 

340 self.front._remove(source) 

341 return True 

342 return False 

343 

344 shrunk = self.__engine.shrink(target, allow_transition=allow_transition) 

345 seen.add(choices_key(shrunk.choices)) 

346 

347 # Note that the front may have changed shape arbitrarily when 

348 # we ran the shrinker. If it didn't change shape then this is 

349 # i - 1. If it did change shape then this is the largest value 

350 # in the front which is smaller than the previous target, so 

351 # is the correct place to resume from. In particular note that the 

352 # size of the front might have grown because of slippage during the 

353 # shrink, but all of the newly introduced elements will be smaller 

354 # than `target`, so will be covered by this iteration. 

355 i = self.front.front.bisect_left(target)