1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11from collections.abc import Iterator
12from enum import Enum
13from random import Random
14from typing import TYPE_CHECKING, Callable, Optional, Union
15
16from sortedcontainers import SortedList
17
18from hypothesis.internal.conjecture.choice import choices_key
19from hypothesis.internal.conjecture.data import (
20 ConjectureData,
21 ConjectureResult,
22 Status,
23 _Overrun,
24)
25from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy
26from hypothesis.internal.conjecture.shrinker import sort_key
27
28NO_SCORE = float("-inf")
29
30if TYPE_CHECKING:
31 from hypothesis.internal.conjecture.engine import ConjectureRunner
32
33
34class DominanceRelation(Enum):
35 NO_DOMINANCE = 0
36 EQUAL = 1
37 LEFT_DOMINATES = 2
38 RIGHT_DOMINATES = 3
39
40
41def dominance(left: ConjectureResult, right: ConjectureResult) -> DominanceRelation:
42 """Returns the dominance relation between ``left`` and ``right``, according
43 to the rules that one ConjectureResult dominates another if and only if it
44 is better in every way.
45
46 The things we currently consider to be "better" are:
47
48 * Something that is smaller in shrinking order is better.
49 * Something that has higher status is better.
50 * Each ``interesting_origin`` is treated as its own score, so if two
51 interesting examples have different origins then neither dominates
52 the other.
53 * For each target observation, a higher score is better.
54
55 In "normal" operation where there are no bugs or target observations, the
56 pareto front only has one element (the smallest valid test case), but for
57 more structured or failing tests it can be useful to track, and future work
58 will depend on it more."""
59
60 left_key = sort_key(left.nodes)
61 right_key = sort_key(right.nodes)
62 if left_key == right_key:
63 return DominanceRelation.EQUAL
64
65 if right_key < left_key:
66 result = dominance(left=right, right=left)
67 if result == DominanceRelation.LEFT_DOMINATES:
68 return DominanceRelation.RIGHT_DOMINATES
69 else:
70 # Because we have sort_key(left) < sort_key(right) the only options
71 # are that right is better than left or that the two are
72 # incomparable.
73 assert result == DominanceRelation.NO_DOMINANCE
74 return result
75
76 # Either left is better or there is no dominance relationship.
77 assert left_key < right_key
78
79 # The right is more interesting
80 if left.status < right.status:
81 return DominanceRelation.NO_DOMINANCE
82
83 if not right.tags.issubset(left.tags):
84 return DominanceRelation.NO_DOMINANCE
85
86 # Things that are interesting for different reasons are incomparable in
87 # the dominance relationship.
88 if (
89 left.status == Status.INTERESTING
90 and left.interesting_origin != right.interesting_origin
91 ):
92 return DominanceRelation.NO_DOMINANCE
93
94 for target in set(left.target_observations) | set(right.target_observations):
95 left_score = left.target_observations.get(target, NO_SCORE)
96 right_score = right.target_observations.get(target, NO_SCORE)
97 if right_score > left_score:
98 return DominanceRelation.NO_DOMINANCE
99
100 return DominanceRelation.LEFT_DOMINATES
101
102
103class ParetoFront:
104 """Maintains an approximate pareto front of ConjectureData objects. That
105 is, we try to maintain a collection of objects such that no element of the
106 collection is pareto dominated by any other. In practice we don't quite
107 manage that, because doing so is computationally very expensive. Instead
108 we maintain a random sample of data objects that are "rarely" dominated by
109 any other element of the collection (roughly, no more than about 10%).
110
111 Only valid test cases are considered to belong to the pareto front - any
112 test case with a status less than valid is discarded.
113
114 Note that the pareto front is potentially quite large, and currently this
115 will store the entire front in memory. This is bounded by the number of
116 valid examples we run, which is max_examples in normal execution, and
117 currently we do not support workflows with large max_examples which have
118 large values of max_examples very well anyway, so this isn't a major issue.
119 In future we may weish to implement some sort of paging out to disk so that
120 we can work with larger fronts.
121
122 Additionally, because this is only an approximate pareto front, there are
123 scenarios where it can be much larger than the actual pareto front. There
124 isn't a huge amount we can do about this - checking an exact pareto front
125 is intrinsically quadratic.
126
127 "Most" of the time we should be relatively close to the true pareto front,
128 say within an order of magnitude, but it's not hard to construct scenarios
129 where this is not the case. e.g. suppose we enumerate all valid test cases
130 in increasing shortlex order as s_1, ..., s_n, ... and have scores f and
131 g such that f(s_i) = min(i, N) and g(s_i) = 1 if i >= N, then the pareto
132 front is the set {s_1, ..., S_N}, but the only element of the front that
133 will dominate s_i when i > N is S_N, which we select with probability
134 1 / N. A better data structure could solve this, but at the cost of more
135 expensive operations and higher per element memory use, so we'll wait to
136 see how much of a problem this is in practice before we try that.
137 """
138
139 def __init__(self, random: Random) -> None:
140 self.__random = random
141 self.__eviction_listeners: list[Callable[[ConjectureResult], None]] = []
142
143 self.front: SortedList[ConjectureResult] = SortedList(
144 key=lambda d: sort_key(d.nodes)
145 )
146 self.__pending: Optional[ConjectureResult] = None
147
148 def add(self, data: Union[ConjectureData, ConjectureResult, _Overrun]) -> bool:
149 """Attempts to add ``data`` to the pareto front. Returns True if
150 ``data`` is now in the front, including if data is already in the
151 collection, and False otherwise"""
152 if data.status < Status.VALID:
153 return False
154
155 assert not isinstance(data, _Overrun)
156 data = data.as_result()
157 assert not isinstance(data, _Overrun)
158
159 if not self.front:
160 self.front.add(data)
161 return True
162
163 if data in self.front:
164 return True
165
166 # We add data to the pareto front by adding it unconditionally and then
167 # doing a certain amount of randomized "clear down" - testing a random
168 # set of elements (currently 10) to see if they are dominated by
169 # something else in the collection. If they are, we remove them.
170 self.front.add(data)
171 assert self.__pending is None
172 try:
173 self.__pending = data
174
175 # We maintain a set of the current exact pareto front of the
176 # values we've sampled so far. When we sample a new element we
177 # either add it to this exact pareto front or remove it from the
178 # collection entirely.
179 front = LazySequenceCopy(self.front)
180
181 # We track which values we are going to remove and remove them all
182 # at the end so the shape of the front doesn't change while we're
183 # using it.
184 to_remove: list[ConjectureResult] = []
185
186 # We now iteratively sample elements from the approximate pareto
187 # front to check whether they should be retained. When the set of
188 # dominators gets too large we have sampled at least 10 elements
189 # and it gets too expensive to continue, so we consider that enough
190 # due diligence.
191 i = self.front.index(data)
192
193 # First we attempt to look for values that must be removed by the
194 # addition of the data. These are necessarily to the right of it
195 # in the list.
196
197 failures = 0
198 while i + 1 < len(front) and failures < 10:
199 j = self.__random.randrange(i + 1, len(front))
200 candidate = front.pop(j)
201 dom = dominance(data, candidate)
202 assert dom != DominanceRelation.RIGHT_DOMINATES
203 if dom == DominanceRelation.LEFT_DOMINATES:
204 to_remove.append(candidate)
205 failures = 0
206 else:
207 failures += 1
208
209 # Now we look at the points up to where we put data in to see if
210 # it is dominated. While we're here we spend some time looking for
211 # anything else that might be dominated too, compacting down parts
212 # of the list.
213
214 dominators = [data]
215
216 while i >= 0 and len(dominators) < 10:
217 front.swap(i, self.__random.randint(0, i))
218
219 candidate = front[i]
220
221 already_replaced = False
222 j = 0
223 while j < len(dominators):
224 v = dominators[j]
225
226 dom = dominance(candidate, v)
227 if dom == DominanceRelation.LEFT_DOMINATES:
228 if not already_replaced:
229 already_replaced = True
230 dominators[j] = candidate
231 j += 1
232 else: # pragma: no cover # flaky, by test_database_contains_only_pareto_front
233 dominators[j], dominators[-1] = (
234 dominators[-1],
235 dominators[j],
236 )
237 dominators.pop()
238 to_remove.append(v)
239 elif dom == DominanceRelation.RIGHT_DOMINATES:
240 to_remove.append(candidate)
241 break
242 elif dom == DominanceRelation.EQUAL:
243 break
244 else:
245 j += 1
246 else:
247 dominators.append(candidate)
248 i -= 1
249
250 for v in to_remove:
251 self._remove(v)
252 return data in self.front
253 finally:
254 self.__pending = None
255
256 def on_evict(self, f: Callable[[ConjectureResult], None]) -> None:
257 """Register a listener function that will be called with data when it
258 gets removed from the front because something else dominates it."""
259 self.__eviction_listeners.append(f)
260
261 def __contains__(self, data: object) -> bool:
262 if not isinstance(data, (ConjectureData, ConjectureResult)):
263 return False
264
265 result = data.as_result()
266 if isinstance(result, _Overrun):
267 return False
268
269 return result in self.front
270
271 def __iter__(self) -> Iterator[ConjectureResult]:
272 return iter(self.front)
273
274 def __getitem__(self, i: int) -> ConjectureResult:
275 return self.front[i]
276
277 def __len__(self) -> int:
278 return len(self.front)
279
280 def _remove(self, data: ConjectureResult) -> None:
281 try:
282 self.front.remove(data)
283 except ValueError:
284 return
285 if data is not self.__pending:
286 for f in self.__eviction_listeners:
287 f(data)
288
289
290class ParetoOptimiser:
291 """Class for managing optimisation of the pareto front. That is, given the
292 current best known pareto front, this class runs an optimisation process
293 that attempts to bring it closer to the actual pareto front.
294
295 Currently this is fairly basic and only handles pareto optimisation that
296 works by reducing the test case in the shortlex order. We expect it will
297 grow more powerful over time.
298 """
299
300 def __init__(self, engine: "ConjectureRunner") -> None:
301 self.__engine = engine
302 assert self.__engine.pareto_front is not None
303 self.front: ParetoFront = self.__engine.pareto_front
304
305 def run(self) -> None:
306 seen = set()
307
308 # We iterate backwards through the pareto front, using the shrinker to
309 # (hopefully) replace each example with a smaller one. Note that it's
310 # important that we start from the end for two reasons: Firstly, by
311 # doing it this way we ensure that any new front members we discover
312 # during optimisation will also get optimised (because they will be
313 # inserted into the part of the front that we haven't visited yet),
314 # and secondly we generally expect that we will not finish this process
315 # in a single run, because it's relatively expensive in terms of our
316 # example budget, and by starting from the end we ensure that each time
317 # we run the tests we improve the pareto front because we work on the
318 # bits that we haven't covered yet.
319 i = len(self.front) - 1
320 prev = None
321 while i >= 0 and not self.__engine.interesting_examples:
322 assert self.front
323 i = min(i, len(self.front) - 1)
324 target = self.front[i]
325 if choices_key(target.choices) in seen:
326 i -= 1
327 continue
328 assert target is not prev
329 prev = target
330
331 def allow_transition(source, destination):
332 """Shrink to data that strictly pareto dominates the current
333 best value we've seen, which is the current target of the
334 shrinker.
335
336 Note that during shrinking we may discover other smaller
337 examples that this function will reject and will get added to
338 the front. This is fine, because they will be processed on
339 later iterations of this loop."""
340 if dominance(destination, source) == DominanceRelation.LEFT_DOMINATES:
341 # If ``destination`` dominates ``source`` then ``source``
342 # must be dominated in the front - either ``destination`` is in
343 # the front, or it was not added to it because it was
344 # dominated by something in it.
345 self.front._remove(source)
346 return True
347 return False
348
349 shrunk = self.__engine.shrink(target, allow_transition=allow_transition)
350 seen.add(choices_key(shrunk.choices))
351
352 # Note that the front may have changed shape arbitrarily when
353 # we ran the shrinker. If it didn't change shape then this is
354 # i - 1. If it did change shape then this is the largest value
355 # in the front which is smaller than the previous target, so
356 # is the correct place to resume from. In particular note that the
357 # size of the front might have grown because of slippage during the
358 # shrink, but all of the newly introduced elements will be smaller
359 # than `target`, so will be covered by this iteration.
360 i = self.front.front.bisect_left(target)