Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/hypothesis/internal/conjecture/pareto.py: 17%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

140 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11from enum import Enum 

12 

13from sortedcontainers import SortedList 

14 

15from hypothesis.internal.conjecture.data import ConjectureData, ConjectureResult, Status 

16from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy, swap 

17from hypothesis.internal.conjecture.shrinker import sort_key 

18 

19NO_SCORE = float("-inf") 

20 

21 

22class DominanceRelation(Enum): 

23 NO_DOMINANCE = 0 

24 EQUAL = 1 

25 LEFT_DOMINATES = 2 

26 RIGHT_DOMINATES = 3 

27 

28 

29def dominance(left, right): 

30 """Returns the dominance relation between ``left`` and ``right``, according 

31 to the rules that one ConjectureResult dominates another if and only if it 

32 is better in every way. 

33 

34 The things we currently consider to be "better" are: 

35 

36 * Something that is smaller in shrinking order is better. 

37 * Something that has higher status is better. 

38 * Each ``interesting_origin`` is treated as its own score, so if two 

39 interesting examples have different origins then neither dominates 

40 the other. 

41 * For each target observation, a higher score is better. 

42 

43 In "normal" operation where there are no bugs or target observations, the 

44 pareto front only has one element (the smallest valid test case), but for 

45 more structured or failing tests it can be useful to track, and future work 

46 will depend on it more.""" 

47 

48 if left.buffer == right.buffer: 

49 return DominanceRelation.EQUAL 

50 

51 if sort_key(right.buffer) < sort_key(left.buffer): 

52 result = dominance(left=right, right=left) 

53 if result == DominanceRelation.LEFT_DOMINATES: 

54 return DominanceRelation.RIGHT_DOMINATES 

55 else: 

56 # Because we have sort_key(left) < sort_key(right) the only options 

57 # are that right is better than left or that the two are 

58 # incomparable. 

59 assert result == DominanceRelation.NO_DOMINANCE 

60 return result 

61 

62 # Either left is better or there is no dominance relationship. 

63 assert sort_key(left.buffer) < sort_key(right.buffer) 

64 

65 # The right is more interesting 

66 if left.status < right.status: 

67 return DominanceRelation.NO_DOMINANCE 

68 

69 if not right.tags.issubset(left.tags): 

70 return DominanceRelation.NO_DOMINANCE 

71 

72 # Things that are interesting for different reasons are incomparable in 

73 # the dominance relationship. 

74 if ( 

75 left.status == Status.INTERESTING 

76 and left.interesting_origin != right.interesting_origin 

77 ): 

78 return DominanceRelation.NO_DOMINANCE 

79 

80 for target in set(left.target_observations) | set(right.target_observations): 

81 left_score = left.target_observations.get(target, NO_SCORE) 

82 right_score = right.target_observations.get(target, NO_SCORE) 

83 if right_score > left_score: 

84 return DominanceRelation.NO_DOMINANCE 

85 

86 return DominanceRelation.LEFT_DOMINATES 

87 

88 

89class ParetoFront: 

90 """Maintains an approximate pareto front of ConjectureData objects. That 

91 is, we try to maintain a collection of objects such that no element of the 

92 collection is pareto dominated by any other. In practice we don't quite 

93 manage that, because doing so is computationally very expensive. Instead 

94 we maintain a random sample of data objects that are "rarely" dominated by 

95 any other element of the collection (roughly, no more than about 10%). 

96 

97 Only valid test cases are considered to belong to the pareto front - any 

98 test case with a status less than valid is discarded. 

99 

100 Note that the pareto front is potentially quite large, and currently this 

101 will store the entire front in memory. This is bounded by the number of 

102 valid examples we run, which is max_examples in normal execution, and 

103 currently we do not support workflows with large max_examples which have 

104 large values of max_examples very well anyway, so this isn't a major issue. 

105 In future we may weish to implement some sort of paging out to disk so that 

106 we can work with larger fronts. 

107 

108 Additionally, because this is only an approximate pareto front, there are 

109 scenarios where it can be much larger than the actual pareto front. There 

110 isn't a huge amount we can do about this - checking an exact pareto front 

111 is intrinsically quadratic. 

112 

113 "Most" of the time we should be relatively close to the true pareto front, 

114 say within an order of magnitude, but it's not hard to construct scenarios 

115 where this is not the case. e.g. suppose we enumerate all valid test cases 

116 in increasing shortlex order as s_1, ..., s_n, ... and have scores f and 

117 g such that f(s_i) = min(i, N) and g(s_i) = 1 if i >= N, then the pareto 

118 front is the set {s_1, ..., S_N}, but the only element of the front that 

119 will dominate s_i when i > N is S_N, which we select with probability 

120 1 / N. A better data structure could solve this, but at the cost of more 

121 expensive operations and higher per element memory use, so we'll wait to 

122 see how much of a problem this is in practice before we try that. 

123 """ 

124 

125 def __init__(self, random): 

126 self.__random = random 

127 self.__eviction_listeners = [] 

128 

129 self.front = SortedList(key=lambda d: sort_key(d.buffer)) 

130 self.__pending = None 

131 

132 def add(self, data): 

133 """Attempts to add ``data`` to the pareto front. Returns True if 

134 ``data`` is now in the front, including if data is already in the 

135 collection, and False otherwise""" 

136 if data.status < Status.VALID: 

137 return False 

138 

139 data = data.as_result() 

140 

141 if not self.front: 

142 self.front.add(data) 

143 return True 

144 

145 if data in self.front: 

146 return True 

147 

148 # We add data to the pareto front by adding it unconditionally and then 

149 # doing a certain amount of randomized "clear down" - testing a random 

150 # set of elements (currently 10) to see if they are dominated by 

151 # something else in the collection. If they are, we remove them. 

152 self.front.add(data) 

153 assert self.__pending is None 

154 try: 

155 self.__pending = data 

156 

157 # We maintain a set of the current exact pareto front of the 

158 # values we've sampled so far. When we sample a new element we 

159 # either add it to this exact pareto front or remove it from the 

160 # collection entirely. 

161 front = LazySequenceCopy(self.front) 

162 

163 # We track which values we are going to remove and remove them all 

164 # at the end so the shape of the front doesn't change while we're 

165 # using it. 

166 to_remove = [] 

167 

168 # We now iteratively sample elements from the approximate pareto 

169 # front to check whether they should be retained. When the set of 

170 # dominators gets too large we have sampled at least 10 elements 

171 # and it gets too expensive to continue, so we consider that enough 

172 # due diligence. 

173 i = self.front.index(data) 

174 

175 # First we attempt to look for values that must be removed by the 

176 # addition of the data. These are necessarily to the right of it 

177 # in the list. 

178 

179 failures = 0 

180 while i + 1 < len(front) and failures < 10: 

181 j = self.__random.randrange(i + 1, len(front)) 

182 candidate = front.pop(j) 

183 dom = dominance(data, candidate) 

184 assert dom != DominanceRelation.RIGHT_DOMINATES 

185 if dom == DominanceRelation.LEFT_DOMINATES: 

186 to_remove.append(candidate) 

187 failures = 0 

188 else: 

189 failures += 1 

190 

191 # Now we look at the points up to where we put data in to see if 

192 # it is dominated. While we're here we spend some time looking for 

193 # anything else that might be dominated too, compacting down parts 

194 # of the list. 

195 

196 dominators = [data] 

197 

198 while i >= 0 and len(dominators) < 10: 

199 swap(front, i, self.__random.randint(0, i)) 

200 

201 candidate = front[i] 

202 

203 already_replaced = False 

204 j = 0 

205 while j < len(dominators): 

206 v = dominators[j] 

207 

208 dom = dominance(candidate, v) 

209 if dom == DominanceRelation.LEFT_DOMINATES: 

210 if not already_replaced: 

211 already_replaced = True 

212 dominators[j] = candidate 

213 j += 1 

214 else: 

215 dominators[j], dominators[-1] = ( 

216 dominators[-1], 

217 dominators[j], 

218 ) 

219 dominators.pop() 

220 to_remove.append(v) 

221 elif dom == DominanceRelation.RIGHT_DOMINATES: 

222 to_remove.append(candidate) 

223 break 

224 elif dom == DominanceRelation.EQUAL: 

225 break 

226 else: 

227 j += 1 

228 else: 

229 dominators.append(candidate) 

230 i -= 1 

231 

232 for v in to_remove: 

233 self.__remove(v) 

234 return data in self.front 

235 finally: 

236 self.__pending = None 

237 

238 def on_evict(self, f): 

239 """Register a listener function that will be called with data when it 

240 gets removed from the front because something else dominates it.""" 

241 self.__eviction_listeners.append(f) 

242 

243 def __contains__(self, data): 

244 return isinstance(data, (ConjectureData, ConjectureResult)) and ( 

245 data.as_result() in self.front 

246 ) 

247 

248 def __iter__(self): 

249 return iter(self.front) 

250 

251 def __getitem__(self, i): 

252 return self.front[i] 

253 

254 def __len__(self): 

255 return len(self.front) 

256 

257 def __remove(self, data): 

258 try: 

259 self.front.remove(data) 

260 except ValueError: 

261 return 

262 if data is not self.__pending: 

263 for f in self.__eviction_listeners: 

264 f(data) 

265 

266 

267class ParetoOptimiser: 

268 """Class for managing optimisation of the pareto front. That is, given the 

269 current best known pareto front, this class runs an optimisation process 

270 that attempts to bring it closer to the actual pareto front. 

271 

272 Currently this is fairly basic and only handles pareto optimisation that 

273 works by reducing the test case in the shortlex order. We expect it will 

274 grow more powerful over time. 

275 """ 

276 

277 def __init__(self, engine): 

278 self.__engine = engine 

279 self.front = self.__engine.pareto_front 

280 

281 def run(self): 

282 seen = set() 

283 

284 # We iterate backwards through the pareto front, using the shrinker to 

285 # (hopefully) replace each example with a smaller one. Note that it's 

286 # important that we start from the end for two reasons: Firstly, by 

287 # doing it this way we ensure that any new front members we discover 

288 # during optimisation will also get optimised (because they will be 

289 # inserted into the part of the front that we haven't visited yet), 

290 # and secondly we generally expect that we will not finish this process 

291 # in a single run, because it's relatively expensive in terms of our 

292 # example budget, and by starting from the end we ensure that each time 

293 # we run the tests we improve the pareto front because we work on the 

294 # bits that we haven't covered yet. 

295 i = len(self.front) - 1 

296 prev = None 

297 while i >= 0 and not self.__engine.interesting_examples: 

298 assert self.front 

299 i = min(i, len(self.front) - 1) 

300 target = self.front[i] 

301 if target.buffer in seen: 

302 i -= 1 

303 continue 

304 assert target is not prev 

305 prev = target 

306 

307 def allow_transition(source, destination): 

308 """Shrink to data that strictly pareto dominates the current 

309 best value we've seen, which is the current target of the 

310 shrinker. 

311 

312 Note that during shrinking we may discover other smaller 

313 examples that this function will reject and will get added to 

314 the front. This is fine, because they will be processed on 

315 later iterations of this loop.""" 

316 if dominance(destination, source) == DominanceRelation.LEFT_DOMINATES: 

317 # If ``destination`` dominates ``source`` then ``source`` 

318 # must be dominated in the front - either ``destination`` is in 

319 # the front, or it was not added to it because it was 

320 # dominated by something in it. 

321 try: 

322 self.front.front.remove(source) 

323 except ValueError: 

324 pass 

325 return True 

326 return False 

327 

328 shrunk = self.__engine.shrink(target, allow_transition=allow_transition) 

329 seen.add(shrunk.buffer) 

330 

331 # Note that the front may have changed shape arbitrarily when 

332 # we ran the shrinker. If it didn't change shape then this is 

333 # i - 1. If it did change shape then this is the largest value 

334 # in the front which is smaller than the previous target, so 

335 # is the correct place to resume from. In particular note that the 

336 # size of the front might have grown because of slippage during the 

337 # shrink, but all of the newly introduced elements will be smaller 

338 # than `target`, so will be covered by this iteration. 

339 i = self.front.front.bisect_left(target)