Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/conjecture/utils.py: 81%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

203 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11import enum 

12import hashlib 

13import heapq 

14import math 

15import sys 

16from collections import OrderedDict, abc 

17from collections.abc import Sequence 

18from functools import lru_cache 

19from typing import TYPE_CHECKING, Optional, TypeVar, Union 

20 

21from hypothesis.errors import InvalidArgument 

22from hypothesis.internal.compat import int_from_bytes 

23from hypothesis.internal.floats import next_up 

24 

25if TYPE_CHECKING: 

26 from hypothesis.internal.conjecture.data import ConjectureData 

27 

28 

29LABEL_MASK = 2**64 - 1 

30 

31 

32def calc_label_from_name(name: str) -> int: 

33 hashed = hashlib.sha384(name.encode()).digest() 

34 return int_from_bytes(hashed[:8]) 

35 

36 

37def calc_label_from_cls(cls: type) -> int: 

38 return calc_label_from_name(cls.__qualname__) 

39 

40 

41def calc_label_from_hash(obj: object) -> int: 

42 return calc_label_from_name(str(hash(obj))) 

43 

44 

45def combine_labels(*labels: int) -> int: 

46 label = 0 

47 for l in labels: 

48 label = (label << 1) & LABEL_MASK 

49 label ^= l 

50 return label 

51 

52 

53SAMPLE_IN_SAMPLER_LABEL = calc_label_from_name("a sample() in Sampler") 

54ONE_FROM_MANY_LABEL = calc_label_from_name("one more from many()") 

55 

56 

57T = TypeVar("T") 

58 

59 

60def identity(v: T) -> T: 

61 return v 

62 

63 

64def check_sample( 

65 values: Union[type[enum.Enum], Sequence[T]], strategy_name: str 

66) -> Sequence[T]: 

67 if "numpy" in sys.modules and isinstance(values, sys.modules["numpy"].ndarray): 

68 if values.ndim != 1: 

69 raise InvalidArgument( 

70 "Only one-dimensional arrays are supported for sampling, " 

71 f"and the given value has {values.ndim} dimensions (shape " 

72 f"{values.shape}). This array would give samples of array slices " 

73 "instead of elements! Use np.ravel(values) to convert " 

74 "to a one-dimensional array, or tuple(values) if you " 

75 "want to sample slices." 

76 ) 

77 elif not isinstance(values, (OrderedDict, abc.Sequence, enum.EnumMeta)): 

78 raise InvalidArgument( 

79 f"Cannot sample from {values!r} because it is not an ordered collection. " 

80 f"Hypothesis goes to some length to ensure that the {strategy_name} " 

81 "strategy has stable results between runs. To replay a saved " 

82 "example, the sampled values must have the same iteration order " 

83 "on every run - ruling out sets, dicts, etc due to hash " 

84 "randomization. Most cases can simply use `sorted(values)`, but " 

85 "mixed types or special values such as math.nan require careful " 

86 "handling - and note that when simplifying an example, " 

87 "Hypothesis treats earlier values as simpler." 

88 ) 

89 if isinstance(values, range): 

90 return values 

91 return tuple(values) 

92 

93 

94@lru_cache(64) 

95def compute_sampler_table(weights: tuple[float, ...]) -> list[tuple[int, int, float]]: 

96 n = len(weights) 

97 table: list[list[int | float | None]] = [[i, None, None] for i in range(n)] 

98 total = sum(weights) 

99 num_type = type(total) 

100 

101 zero = num_type(0) # type: ignore 

102 one = num_type(1) # type: ignore 

103 

104 small: list[int] = [] 

105 large: list[int] = [] 

106 

107 probabilities = [w / total for w in weights] 

108 scaled_probabilities: list[float] = [] 

109 

110 for i, alternate_chance in enumerate(probabilities): 

111 scaled = alternate_chance * n 

112 scaled_probabilities.append(scaled) 

113 if scaled == 1: 

114 table[i][2] = zero 

115 elif scaled < 1: 

116 small.append(i) 

117 else: 

118 large.append(i) 

119 heapq.heapify(small) 

120 heapq.heapify(large) 

121 

122 while small and large: 

123 lo = heapq.heappop(small) 

124 hi = heapq.heappop(large) 

125 

126 assert lo != hi 

127 assert scaled_probabilities[hi] > one 

128 assert table[lo][1] is None 

129 table[lo][1] = hi 

130 table[lo][2] = one - scaled_probabilities[lo] 

131 scaled_probabilities[hi] = ( 

132 scaled_probabilities[hi] + scaled_probabilities[lo] 

133 ) - one 

134 

135 if scaled_probabilities[hi] < 1: 

136 heapq.heappush(small, hi) 

137 elif scaled_probabilities[hi] == 1: 

138 table[hi][2] = zero 

139 else: 

140 heapq.heappush(large, hi) 

141 while large: 

142 table[large.pop()][2] = zero 

143 while small: 

144 table[small.pop()][2] = zero 

145 

146 new_table: list[tuple[int, int, float]] = [] 

147 for base, alternate, alternate_chance in table: 

148 assert isinstance(base, int) 

149 assert isinstance(alternate, int) or alternate is None 

150 assert alternate_chance is not None 

151 if alternate is None: 

152 new_table.append((base, base, alternate_chance)) 

153 elif alternate < base: 

154 new_table.append((alternate, base, one - alternate_chance)) 

155 else: 

156 new_table.append((base, alternate, alternate_chance)) 

157 new_table.sort() 

158 return new_table 

159 

160 

161class Sampler: 

162 """Sampler based on Vose's algorithm for the alias method. See 

163 http://www.keithschwarz.com/darts-dice-coins/ for a good explanation. 

164 

165 The general idea is that we store a table of triples (base, alternate, p). 

166 base. We then pick a triple uniformly at random, and choose its alternate 

167 value with probability p and else choose its base value. The triples are 

168 chosen so that the resulting mixture has the right distribution. 

169 

170 We maintain the following invariants to try to produce good shrinks: 

171 

172 1. The table is in lexicographic (base, alternate) order, so that choosing 

173 an earlier value in the list always lowers (or at least leaves 

174 unchanged) the value. 

175 2. base[i] < alternate[i], so that shrinking the draw always results in 

176 shrinking the chosen element. 

177 """ 

178 

179 table: list[tuple[int, int, float]] # (base_idx, alt_idx, alt_chance) 

180 

181 def __init__(self, weights: Sequence[float], *, observe: bool = True): 

182 self.observe = observe 

183 self.table = compute_sampler_table(tuple(weights)) 

184 

185 def sample( 

186 self, 

187 data: "ConjectureData", 

188 *, 

189 forced: Optional[int] = None, 

190 ) -> int: 

191 if self.observe: 

192 data.start_span(SAMPLE_IN_SAMPLER_LABEL) 

193 forced_choice = ( # pragma: no branch # https://github.com/nedbat/coveragepy/issues/1617 

194 None 

195 if forced is None 

196 else next( 

197 (base, alternate, alternate_chance) 

198 for (base, alternate, alternate_chance) in self.table 

199 if forced == base or (forced == alternate and alternate_chance > 0) 

200 ) 

201 ) 

202 base, alternate, alternate_chance = data.choice( 

203 self.table, 

204 forced=forced_choice, 

205 observe=self.observe, 

206 ) 

207 forced_use_alternate = None 

208 if forced is not None: 

209 # we maintain this invariant when picking forced_choice above. 

210 # This song and dance about alternate_chance > 0 is to avoid forcing 

211 # e.g. draw_boolean(p=0, forced=True), which is an error. 

212 forced_use_alternate = forced == alternate and alternate_chance > 0 

213 assert forced == base or forced_use_alternate 

214 

215 use_alternate = data.draw_boolean( 

216 alternate_chance, 

217 forced=forced_use_alternate, 

218 observe=self.observe, 

219 ) 

220 if self.observe: 

221 data.stop_span() 

222 if use_alternate: 

223 assert forced is None or alternate == forced, (forced, alternate) 

224 return alternate 

225 else: 

226 assert forced is None or base == forced, (forced, base) 

227 return base 

228 

229 

230INT_SIZES = (8, 16, 32, 64, 128) 

231INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5), observe=False) 

232 

233 

234class many: 

235 """Utility class for collections. Bundles up the logic we use for "should I 

236 keep drawing more values?" and handles starting and stopping examples in 

237 the right place. 

238 

239 Intended usage is something like: 

240 

241 elements = many(data, ...) 

242 while elements.more(): 

243 add_stuff_to_result() 

244 """ 

245 

246 def __init__( 

247 self, 

248 data: "ConjectureData", 

249 min_size: int, 

250 max_size: Union[int, float], 

251 average_size: Union[int, float], 

252 *, 

253 forced: Optional[int] = None, 

254 observe: bool = True, 

255 ) -> None: 

256 assert 0 <= min_size <= average_size <= max_size 

257 assert forced is None or min_size <= forced <= max_size 

258 self.min_size = min_size 

259 self.max_size = max_size 

260 self.data = data 

261 self.forced_size = forced 

262 self.p_continue = _calc_p_continue(average_size - min_size, max_size - min_size) 

263 self.count = 0 

264 self.rejections = 0 

265 self.drawn = False 

266 self.force_stop = False 

267 self.rejected = False 

268 self.observe = observe 

269 

270 def stop_span(self): 

271 if self.observe: 

272 self.data.stop_span() 

273 

274 def start_span(self, label): 

275 if self.observe: 

276 self.data.start_span(label) 

277 

278 def more(self) -> bool: 

279 """Should I draw another element to add to the collection?""" 

280 if self.drawn: 

281 self.stop_span() 

282 

283 self.drawn = True 

284 self.rejected = False 

285 

286 self.start_span(ONE_FROM_MANY_LABEL) 

287 if self.min_size == self.max_size: 

288 # if we have to hit an exact size, draw unconditionally until that 

289 # point, and no further. 

290 should_continue = self.count < self.min_size 

291 else: 

292 forced_result = None 

293 if self.force_stop: 

294 # if our size is forced, we can't reject in a way that would 

295 # cause us to differ from the forced size. 

296 assert self.forced_size is None or self.count == self.forced_size 

297 forced_result = False 

298 elif self.count < self.min_size: 

299 forced_result = True 

300 elif self.count >= self.max_size: 

301 forced_result = False 

302 elif self.forced_size is not None: 

303 forced_result = self.count < self.forced_size 

304 should_continue = self.data.draw_boolean( 

305 self.p_continue, 

306 forced=forced_result, 

307 observe=self.observe, 

308 ) 

309 

310 if should_continue: 

311 self.count += 1 

312 return True 

313 else: 

314 self.stop_span() 

315 return False 

316 

317 def reject(self, why: Optional[str] = None) -> None: 

318 """Reject the last example (i.e. don't count it towards our budget of 

319 elements because it's not going to go in the final collection).""" 

320 assert self.count > 0 

321 self.count -= 1 

322 self.rejections += 1 

323 self.rejected = True 

324 # We set a minimum number of rejections before we give up to avoid 

325 # failing too fast when we reject the first draw. 

326 if self.rejections > max(3, 2 * self.count): 

327 if self.count < self.min_size: 

328 self.data.mark_invalid(why) 

329 else: 

330 self.force_stop = True 

331 

332 

333SMALLEST_POSITIVE_FLOAT: float = next_up(0.0) or sys.float_info.min 

334 

335 

336@lru_cache 

337def _calc_p_continue(desired_avg: float, max_size: Union[int, float]) -> float: 

338 """Return the p_continue which will generate the desired average size.""" 

339 assert desired_avg <= max_size, (desired_avg, max_size) 

340 if desired_avg == max_size: 

341 return 1.0 

342 p_continue = 1 - 1.0 / (1 + desired_avg) 

343 if p_continue == 0 or max_size == math.inf: 

344 assert 0 <= p_continue < 1, p_continue 

345 return p_continue 

346 assert 0 < p_continue < 1, p_continue 

347 # For small max_size, the infinite-series p_continue is a poor approximation, 

348 # and while we can't solve the polynomial a few rounds of iteration quickly 

349 # gets us a good approximate solution in almost all cases (sometimes exact!). 

350 while _p_continue_to_avg(p_continue, max_size) > desired_avg: 

351 # This is impossible over the reals, but *can* happen with floats. 

352 p_continue -= 0.0001 

353 # If we've reached zero or gone negative, we want to break out of this loop, 

354 # and do so even if we're on a system with the unsafe denormals-are-zero flag. 

355 # We make that an explicit error in st.floats(), but here we'd prefer to 

356 # just get somewhat worse precision on collection lengths. 

357 if p_continue < SMALLEST_POSITIVE_FLOAT: 

358 p_continue = SMALLEST_POSITIVE_FLOAT 

359 break 

360 # Let's binary-search our way to a better estimate! We tried fancier options 

361 # like gradient descent, but this is numerically stable and works better. 

362 hi = 1.0 

363 while desired_avg - _p_continue_to_avg(p_continue, max_size) > 0.01: 

364 assert 0 < p_continue < hi, (p_continue, hi) 

365 mid = (p_continue + hi) / 2 

366 if _p_continue_to_avg(mid, max_size) <= desired_avg: 

367 p_continue = mid 

368 else: 

369 hi = mid 

370 assert 0 < p_continue < 1, p_continue 

371 assert _p_continue_to_avg(p_continue, max_size) <= desired_avg 

372 return p_continue 

373 

374 

375def _p_continue_to_avg(p_continue: float, max_size: Union[int, float]) -> float: 

376 """Return the average_size generated by this p_continue and max_size.""" 

377 if p_continue >= 1: 

378 return max_size 

379 return (1.0 / (1 - p_continue) - 1) * (1 - p_continue**max_size)