Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/conjecture/pareto.py: 19%

1# This file is part of Hypothesis, which may be found at

2# https://github.com/HypothesisWorks/hypothesis/

4# Copyright the Hypothesis Authors.

5# Individual contributors are listed in AUTHORS.rst and the git log.

7# This Source Code Form is subject to the terms of the Mozilla Public License,

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can

9# obtain one at https://mozilla.org/MPL/2.0/.

11from collections.abc import Iterator

12from enum import Enum

13from random import Random

14from typing import TYPE_CHECKING, Callable, Optional, Union

16from sortedcontainers import SortedList

18from hypothesis.internal.conjecture.choice import choices_key

19from hypothesis.internal.conjecture.data import (

20 ConjectureData,

21 ConjectureResult,

22 Status,

23 _Overrun,

24)

25from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy

26from hypothesis.internal.conjecture.shrinker import sort_key

28NO_SCORE = float("-inf")

30if TYPE_CHECKING:

31 from hypothesis.internal.conjecture.engine import ConjectureRunner

34class DominanceRelation(Enum):

35 NO_DOMINANCE = 0

36 EQUAL = 1

37 LEFT_DOMINATES = 2

38 RIGHT_DOMINATES = 3

41def dominance(left: ConjectureResult, right: ConjectureResult) -> DominanceRelation:

42 """Returns the dominance relation between ``left`` and ``right``, according

43 to the rules that one ConjectureResult dominates another if and only if it

44 is better in every way.

46 The things we currently consider to be "better" are:

48 * Something that is smaller in shrinking order is better.

49 * Something that has higher status is better.

50 * Each ``interesting_origin`` is treated as its own score, so if two

51 interesting examples have different origins then neither dominates

52 the other.

53 * For each target observation, a higher score is better.

55 In "normal" operation where there are no bugs or target observations, the

56 pareto front only has one element (the smallest valid test case), but for

57 more structured or failing tests it can be useful to track, and future work

58 will depend on it more."""

60 left_key = sort_key(left.nodes)

61 right_key = sort_key(right.nodes)

62 if left_key == right_key:

63 return DominanceRelation.EQUAL

65 if right_key < left_key:

66 result = dominance(left=right, right=left)

67 if result == DominanceRelation.LEFT_DOMINATES:

68 return DominanceRelation.RIGHT_DOMINATES

69 else:

70 # Because we have sort_key(left) < sort_key(right) the only options

71 # are that right is better than left or that the two are

72 # incomparable.

73 assert result == DominanceRelation.NO_DOMINANCE

74 return result

76 # Either left is better or there is no dominance relationship.

77 assert left_key < right_key

79 # The right is more interesting

80 if left.status < right.status:

81 return DominanceRelation.NO_DOMINANCE

83 if not right.tags.issubset(left.tags):

84 return DominanceRelation.NO_DOMINANCE

86 # Things that are interesting for different reasons are incomparable in

87 # the dominance relationship.

88 if (

89 left.status == Status.INTERESTING

90 and right.interesting_origin is not None

91 and left.interesting_origin != right.interesting_origin

92 ):

93 return DominanceRelation.NO_DOMINANCE

95 for target in set(left.target_observations) | set(right.target_observations):

96 left_score = left.target_observations.get(target, NO_SCORE)

97 right_score = right.target_observations.get(target, NO_SCORE)

98 if right_score > left_score:

99 return DominanceRelation.NO_DOMINANCE

100

101 return DominanceRelation.LEFT_DOMINATES

102

103

104class ParetoFront:

105 """Maintains an approximate pareto front of ConjectureData objects. That

106 is, we try to maintain a collection of objects such that no element of the

107 collection is pareto dominated by any other. In practice we don't quite

108 manage that, because doing so is computationally very expensive. Instead

109 we maintain a random sample of data objects that are "rarely" dominated by

110 any other element of the collection (roughly, no more than about 10%).

111

112 Only valid test cases are considered to belong to the pareto front - any

113 test case with a status less than valid is discarded.

114

115 Note that the pareto front is potentially quite large, and currently this

116 will store the entire front in memory. This is bounded by the number of

117 valid examples we run, which is max_examples in normal execution, and

118 currently we do not support workflows with large max_examples which have

119 large values of max_examples very well anyway, so this isn't a major issue.

120 In future we may weish to implement some sort of paging out to disk so that

121 we can work with larger fronts.

122

123 Additionally, because this is only an approximate pareto front, there are

124 scenarios where it can be much larger than the actual pareto front. There

125 isn't a huge amount we can do about this - checking an exact pareto front

126 is intrinsically quadratic.

127

128 "Most" of the time we should be relatively close to the true pareto front,

129 say within an order of magnitude, but it's not hard to construct scenarios

130 where this is not the case. e.g. suppose we enumerate all valid test cases

131 in increasing shortlex order as s_1, ..., s_n, ... and have scores f and

132 g such that f(s_i) = min(i, N) and g(s_i) = 1 if i >= N, then the pareto

133 front is the set {s_1, ..., S_N}, but the only element of the front that

134 will dominate s_i when i > N is S_N, which we select with probability

135 1 / N. A better data structure could solve this, but at the cost of more

136 expensive operations and higher per element memory use, so we'll wait to

137 see how much of a problem this is in practice before we try that.

138 """

139

140 def __init__(self, random: Random) -> None:

141 self.__random = random

142 self.__eviction_listeners: list[Callable[[ConjectureResult], None]] = []

143

144 self.front: SortedList[ConjectureResult] = SortedList(

145 key=lambda d: sort_key(d.nodes)

146 )

147 self.__pending: Optional[ConjectureResult] = None

148

149 def add(self, data: Union[ConjectureData, ConjectureResult, _Overrun]) -> bool:

150 """Attempts to add ``data`` to the pareto front. Returns True if

151 ``data`` is now in the front, including if data is already in the

152 collection, and False otherwise"""

153 if data.status < Status.VALID:

154 return False

155

156 assert not isinstance(data, _Overrun)

157 data = data.as_result()

158 assert not isinstance(data, _Overrun)

159

160 if not self.front:

161 self.front.add(data)

162 return True

163

164 if data in self.front:

165 return True

166

167 # We add data to the pareto front by adding it unconditionally and then

168 # doing a certain amount of randomized "clear down" - testing a random

169 # set of elements (currently 10) to see if they are dominated by

170 # something else in the collection. If they are, we remove them.

171 self.front.add(data)

172 assert self.__pending is None

173 try:

174 self.__pending = data

175

176 # We maintain a set of the current exact pareto front of the

177 # values we've sampled so far. When we sample a new element we

178 # either add it to this exact pareto front or remove it from the

179 # collection entirely.

180 front = LazySequenceCopy(self.front)

181

182 # We track which values we are going to remove and remove them all

183 # at the end so the shape of the front doesn't change while we're

184 # using it.

185 to_remove: list[ConjectureResult] = []

186

187 # We now iteratively sample elements from the approximate pareto

188 # front to check whether they should be retained. When the set of

189 # dominators gets too large we have sampled at least 10 elements

190 # and it gets too expensive to continue, so we consider that enough

191 # due diligence.

192 i = self.front.index(data)

193

194 # First we attempt to look for values that must be removed by the

195 # addition of the data. These are necessarily to the right of it

196 # in the list.

197

198 failures = 0

199 while i + 1 < len(front) and failures < 10:

200 j = self.__random.randrange(i + 1, len(front))

201 candidate = front.pop(j)

202 dom = dominance(data, candidate)

203 assert dom != DominanceRelation.RIGHT_DOMINATES

204 if dom == DominanceRelation.LEFT_DOMINATES:

205 to_remove.append(candidate)

206 failures = 0

207 else:

208 failures += 1

209

210 # Now we look at the points up to where we put data in to see if

211 # it is dominated. While we're here we spend some time looking for

212 # anything else that might be dominated too, compacting down parts

213 # of the list.

214

215 dominators = [data]

216

217 while i >= 0 and len(dominators) < 10:

218 front.swap(i, self.__random.randint(0, i))

219

220 candidate = front[i]

221

222 already_replaced = False

223 j = 0

224 while j < len(dominators):

225 v = dominators[j]

226

227 dom = dominance(candidate, v)

228 if dom == DominanceRelation.LEFT_DOMINATES:

229 if not already_replaced:

230 already_replaced = True

231 dominators[j] = candidate

232 j += 1

233 else: # pragma: no cover # flaky, by test_database_contains_only_pareto_front

234 dominators[j], dominators[-1] = (

235 dominators[-1],

236 dominators[j],

237 )

238 dominators.pop()

239 to_remove.append(v)

240 elif dom == DominanceRelation.RIGHT_DOMINATES:

241 to_remove.append(candidate)

242 break

243 elif dom == DominanceRelation.EQUAL:

244 break

245 else:

246 j += 1

247 else:

248 dominators.append(candidate)

249 i -= 1

250

251 for v in to_remove:

252 self._remove(v)

253 return data in self.front

254 finally:

255 self.__pending = None

256

257 def on_evict(self, f: Callable[[ConjectureResult], None]) -> None:

258 """Register a listener function that will be called with data when it

259 gets removed from the front because something else dominates it."""

260 self.__eviction_listeners.append(f)

261

262 def __contains__(self, data: object) -> bool:

263 if not isinstance(data, (ConjectureData, ConjectureResult)):

264 return False

265

266 result = data.as_result()

267 if isinstance(result, _Overrun):

268 return False

269

270 return result in self.front

271

272 def __iter__(self) -> Iterator[ConjectureResult]:

273 return iter(self.front)

274

275 def __getitem__(self, i: int) -> ConjectureResult:

276 return self.front[i]

277

278 def __len__(self) -> int:

279 return len(self.front)

280

281 def _remove(self, data: ConjectureResult) -> None:

282 try:

283 self.front.remove(data)

284 except ValueError:

285 return

286 if data is not self.__pending:

287 for f in self.__eviction_listeners:

288 f(data)

289

290

291class ParetoOptimiser:

292 """Class for managing optimisation of the pareto front. That is, given the

293 current best known pareto front, this class runs an optimisation process

294 that attempts to bring it closer to the actual pareto front.

295

296 Currently this is fairly basic and only handles pareto optimisation that

297 works by reducing the test case in the shortlex order. We expect it will

298 grow more powerful over time.

299 """

300

301 def __init__(self, engine: "ConjectureRunner") -> None:

302 self.__engine = engine

303 assert self.__engine.pareto_front is not None

304 self.front: ParetoFront = self.__engine.pareto_front

305

306 def run(self) -> None:

307 seen = set()

308

309 # We iterate backwards through the pareto front, using the shrinker to

310 # (hopefully) replace each example with a smaller one. Note that it's

311 # important that we start from the end for two reasons: Firstly, by

312 # doing it this way we ensure that any new front members we discover

313 # during optimisation will also get optimised (because they will be

314 # inserted into the part of the front that we haven't visited yet),

315 # and secondly we generally expect that we will not finish this process

316 # in a single run, because it's relatively expensive in terms of our

317 # example budget, and by starting from the end we ensure that each time

318 # we run the tests we improve the pareto front because we work on the

319 # bits that we haven't covered yet.

320 i = len(self.front) - 1

321 prev = None

322 while i >= 0 and not self.__engine.interesting_examples:

323 assert self.front

324 i = min(i, len(self.front) - 1)

325 target = self.front[i]

326 if choices_key(target.choices) in seen:

327 i -= 1

328 continue

329 assert target is not prev

330 prev = target

331

332 def allow_transition(source, destination):

333 """Shrink to data that strictly pareto dominates the current

334 best value we've seen, which is the current target of the

335 shrinker.

336

337 Note that during shrinking we may discover other smaller

338 examples that this function will reject and will get added to

339 the front. This is fine, because they will be processed on

340 later iterations of this loop."""

341 if dominance(destination, source) == DominanceRelation.LEFT_DOMINATES:

342 # If ``destination`` dominates ``source`` then ``source``

343 # must be dominated in the front - either ``destination`` is in

344 # the front, or it was not added to it because it was

345 # dominated by something in it.

346 self.front._remove(source)

347 return True

348 return False

349

350 shrunk = self.__engine.shrink(target, allow_transition=allow_transition)

351 seen.add(choices_key(shrunk.choices))

352

353 # Note that the front may have changed shape arbitrarily when

354 # we ran the shrinker. If it didn't change shape then this is

355 # i - 1. If it did change shape then this is the largest value

356 # in the front which is smaller than the previous target, so

357 # is the correct place to resume from. In particular note that the

358 # size of the front might have grown because of slippage during the

359 # shrink, but all of the newly introduced elements will be smaller

360 # than `target`, so will be covered by this iteration.

361 i = self.front.front.bisect_left(target)