Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/conjecture/utils.py: 81%

1# This file is part of Hypothesis, which may be found at

2# https://github.com/HypothesisWorks/hypothesis/

4# Copyright the Hypothesis Authors.

5# Individual contributors are listed in AUTHORS.rst and the git log.

7# This Source Code Form is subject to the terms of the Mozilla Public License,

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can

9# obtain one at https://mozilla.org/MPL/2.0/.

11import enum

12import hashlib

13import heapq

14import math

15import sys

16from collections import OrderedDict, abc

17from collections.abc import Sequence

18from functools import lru_cache

19from typing import TYPE_CHECKING, Optional, TypeVar, Union

21from hypothesis.errors import InvalidArgument

22from hypothesis.internal.compat import int_from_bytes

23from hypothesis.internal.floats import next_up

25if TYPE_CHECKING:

26 from hypothesis.internal.conjecture.data import ConjectureData

29LABEL_MASK = 2**64 - 1

32def calc_label_from_name(name: str) -> int:

33 hashed = hashlib.sha384(name.encode()).digest()

34 return int_from_bytes(hashed[:8])

37def calc_label_from_cls(cls: type) -> int:

38 return calc_label_from_name(cls.__qualname__)

41def calc_label_from_hash(obj: object) -> int:

42 return calc_label_from_name(str(hash(obj)))

45def combine_labels(*labels: int) -> int:

46 label = 0

47 for l in labels:

48 label = (label << 1) & LABEL_MASK

49 label ^= l

50 return label

53SAMPLE_IN_SAMPLER_LABEL = calc_label_from_name("a sample() in Sampler")

54ONE_FROM_MANY_LABEL = calc_label_from_name("one more from many()")

57T = TypeVar("T")

60def identity(v: T) -> T:

61 return v

64def check_sample(

65 values: Union[type[enum.Enum], Sequence[T]], strategy_name: str

66) -> Sequence[T]:

67 if "numpy" in sys.modules and isinstance(values, sys.modules["numpy"].ndarray):

68 if values.ndim != 1:

69 raise InvalidArgument(

70 "Only one-dimensional arrays are supported for sampling, "

71 f"and the given value has {values.ndim} dimensions (shape "

72 f"{values.shape}). This array would give samples of array slices "

73 "instead of elements! Use np.ravel(values) to convert "

74 "to a one-dimensional array, or tuple(values) if you "

75 "want to sample slices."

76 )

77 elif not isinstance(values, (OrderedDict, abc.Sequence, enum.EnumMeta)):

78 raise InvalidArgument(

79 f"Cannot sample from {values!r} because it is not an ordered collection. "

80 f"Hypothesis goes to some length to ensure that the {strategy_name} "

81 "strategy has stable results between runs. To replay a saved "

82 "example, the sampled values must have the same iteration order "

83 "on every run - ruling out sets, dicts, etc due to hash "

84 "randomization. Most cases can simply use `sorted(values)`, but "

85 "mixed types or special values such as math.nan require careful "

86 "handling - and note that when simplifying an example, "

87 "Hypothesis treats earlier values as simpler."

88 )

89 if isinstance(values, range):

90 return values

91 return tuple(values)

94@lru_cache(64)

95def compute_sampler_table(weights: tuple[float, ...]) -> list[tuple[int, int, float]]:

96 n = len(weights)

97 table: list[list[int | float | None]] = [[i, None, None] for i in range(n)]

98 total = sum(weights)

99 num_type = type(total)

100

101 zero = num_type(0) # type: ignore

102 one = num_type(1) # type: ignore

103

104 small: list[int] = []

105 large: list[int] = []

106

107 probabilities = [w / total for w in weights]

108 scaled_probabilities: list[float] = []

109

110 for i, alternate_chance in enumerate(probabilities):

111 scaled = alternate_chance * n

112 scaled_probabilities.append(scaled)

113 if scaled == 1:

114 table[i][2] = zero

115 elif scaled < 1:

116 small.append(i)

117 else:

118 large.append(i)

119 heapq.heapify(small)

120 heapq.heapify(large)

121

122 while small and large:

123 lo = heapq.heappop(small)

124 hi = heapq.heappop(large)

125

126 assert lo != hi

127 assert scaled_probabilities[hi] > one

128 assert table[lo][1] is None

129 table[lo][1] = hi

130 table[lo][2] = one - scaled_probabilities[lo]

131 scaled_probabilities[hi] = (

132 scaled_probabilities[hi] + scaled_probabilities[lo]

133 ) - one

134

135 if scaled_probabilities[hi] < 1:

136 heapq.heappush(small, hi)

137 elif scaled_probabilities[hi] == 1:

138 table[hi][2] = zero

139 else:

140 heapq.heappush(large, hi)

141 while large:

142 table[large.pop()][2] = zero

143 while small:

144 table[small.pop()][2] = zero

145

146 new_table: list[tuple[int, int, float]] = []

147 for base, alternate, alternate_chance in table:

148 assert isinstance(base, int)

149 assert isinstance(alternate, int) or alternate is None

150 assert alternate_chance is not None

151 if alternate is None:

152 new_table.append((base, base, alternate_chance))

153 elif alternate < base:

154 new_table.append((alternate, base, one - alternate_chance))

155 else:

156 new_table.append((base, alternate, alternate_chance))

157 new_table.sort()

158 return new_table

159

160

161class Sampler:

162 """Sampler based on Vose's algorithm for the alias method. See

163 http://www.keithschwarz.com/darts-dice-coins/ for a good explanation.

164

165 The general idea is that we store a table of triples (base, alternate, p).

166 base. We then pick a triple uniformly at random, and choose its alternate

167 value with probability p and else choose its base value. The triples are

168 chosen so that the resulting mixture has the right distribution.

169

170 We maintain the following invariants to try to produce good shrinks:

171

172 1. The table is in lexicographic (base, alternate) order, so that choosing

173 an earlier value in the list always lowers (or at least leaves

174 unchanged) the value.

175 2. base[i] < alternate[i], so that shrinking the draw always results in

176 shrinking the chosen element.

177 """

178

179 table: list[tuple[int, int, float]] # (base_idx, alt_idx, alt_chance)

180

181 def __init__(self, weights: Sequence[float], *, observe: bool = True):

182 self.observe = observe

183 self.table = compute_sampler_table(tuple(weights))

184

185 def sample(

186 self,

187 data: "ConjectureData",

188 *,

189 forced: Optional[int] = None,

190 ) -> int:

191 if self.observe:

192 data.start_span(SAMPLE_IN_SAMPLER_LABEL)

193 forced_choice = ( # pragma: no branch # https://github.com/nedbat/coveragepy/issues/1617

194 None

195 if forced is None

196 else next(

197 (base, alternate, alternate_chance)

198 for (base, alternate, alternate_chance) in self.table

199 if forced == base or (forced == alternate and alternate_chance > 0)

200 )

201 )

202 base, alternate, alternate_chance = data.choice(

203 self.table,

204 forced=forced_choice,

205 observe=self.observe,

206 )

207 forced_use_alternate = None

208 if forced is not None:

209 # we maintain this invariant when picking forced_choice above.

210 # This song and dance about alternate_chance > 0 is to avoid forcing

211 # e.g. draw_boolean(p=0, forced=True), which is an error.

212 forced_use_alternate = forced == alternate and alternate_chance > 0

213 assert forced == base or forced_use_alternate

214

215 use_alternate = data.draw_boolean(

216 alternate_chance,

217 forced=forced_use_alternate,

218 observe=self.observe,

219 )

220 if self.observe:

221 data.stop_span()

222 if use_alternate:

223 assert forced is None or alternate == forced, (forced, alternate)

224 return alternate

225 else:

226 assert forced is None or base == forced, (forced, base)

227 return base

228

229

230INT_SIZES = (8, 16, 32, 64, 128)

231INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5), observe=False)

232

233

234class many:

235 """Utility class for collections. Bundles up the logic we use for "should I

236 keep drawing more values?" and handles starting and stopping examples in

237 the right place.

238

239 Intended usage is something like:

240

241 elements = many(data, ...)

242 while elements.more():

243 add_stuff_to_result()

244 """

245

246 def __init__(

247 self,

248 data: "ConjectureData",

249 min_size: int,

250 max_size: Union[int, float],

251 average_size: Union[int, float],

252 *,

253 forced: Optional[int] = None,

254 observe: bool = True,

255 ) -> None:

256 assert 0 <= min_size <= average_size <= max_size

257 assert forced is None or min_size <= forced <= max_size

258 self.min_size = min_size

259 self.max_size = max_size

260 self.data = data

261 self.forced_size = forced

262 self.p_continue = _calc_p_continue(average_size - min_size, max_size - min_size)

263 self.count = 0

264 self.rejections = 0

265 self.drawn = False

266 self.force_stop = False

267 self.rejected = False

268 self.observe = observe

269

270 def stop_span(self):

271 if self.observe:

272 self.data.stop_span()

273

274 def start_span(self, label):

275 if self.observe:

276 self.data.start_span(label)

277

278 def more(self) -> bool:

279 """Should I draw another element to add to the collection?"""

280 if self.drawn:

281 self.stop_span()

282

283 self.drawn = True

284 self.rejected = False

285

286 self.start_span(ONE_FROM_MANY_LABEL)

287 if self.min_size == self.max_size:

288 # if we have to hit an exact size, draw unconditionally until that

289 # point, and no further.

290 should_continue = self.count < self.min_size

291 else:

292 forced_result = None

293 if self.force_stop:

294 # if our size is forced, we can't reject in a way that would

295 # cause us to differ from the forced size.

296 assert self.forced_size is None or self.count == self.forced_size

297 forced_result = False

298 elif self.count < self.min_size:

299 forced_result = True

300 elif self.count >= self.max_size:

301 forced_result = False

302 elif self.forced_size is not None:

303 forced_result = self.count < self.forced_size

304 should_continue = self.data.draw_boolean(

305 self.p_continue,

306 forced=forced_result,

307 observe=self.observe,

308 )

309

310 if should_continue:

311 self.count += 1

312 return True

313 else:

314 self.stop_span()

315 return False

316

317 def reject(self, why: Optional[str] = None) -> None:

318 """Reject the last example (i.e. don't count it towards our budget of

319 elements because it's not going to go in the final collection)."""

320 assert self.count > 0

321 self.count -= 1

322 self.rejections += 1

323 self.rejected = True

324 # We set a minimum number of rejections before we give up to avoid

325 # failing too fast when we reject the first draw.

326 if self.rejections > max(3, 2 * self.count):

327 if self.count < self.min_size:

328 self.data.mark_invalid(why)

329 else:

330 self.force_stop = True

331

332

333SMALLEST_POSITIVE_FLOAT: float = next_up(0.0) or sys.float_info.min

334

335

336@lru_cache

337def _calc_p_continue(desired_avg: float, max_size: Union[int, float]) -> float:

338 """Return the p_continue which will generate the desired average size."""

339 assert desired_avg <= max_size, (desired_avg, max_size)

340 if desired_avg == max_size:

341 return 1.0

342 p_continue = 1 - 1.0 / (1 + desired_avg)

343 if p_continue == 0 or max_size == math.inf:

344 assert 0 <= p_continue < 1, p_continue

345 return p_continue

346 assert 0 < p_continue < 1, p_continue

347 # For small max_size, the infinite-series p_continue is a poor approximation,

348 # and while we can't solve the polynomial a few rounds of iteration quickly

349 # gets us a good approximate solution in almost all cases (sometimes exact!).

350 while _p_continue_to_avg(p_continue, max_size) > desired_avg:

351 # This is impossible over the reals, but *can* happen with floats.

352 p_continue -= 0.0001

353 # If we've reached zero or gone negative, we want to break out of this loop,

354 # and do so even if we're on a system with the unsafe denormals-are-zero flag.

355 # We make that an explicit error in st.floats(), but here we'd prefer to

356 # just get somewhat worse precision on collection lengths.

357 if p_continue < SMALLEST_POSITIVE_FLOAT:

358 p_continue = SMALLEST_POSITIVE_FLOAT

359 break

360 # Let's binary-search our way to a better estimate! We tried fancier options

361 # like gradient descent, but this is numerically stable and works better.

362 hi = 1.0

363 while desired_avg - _p_continue_to_avg(p_continue, max_size) > 0.01:

364 assert 0 < p_continue < hi, (p_continue, hi)

365 mid = (p_continue + hi) / 2

366 if _p_continue_to_avg(mid, max_size) <= desired_avg:

367 p_continue = mid

368 else:

369 hi = mid

370 assert 0 < p_continue < 1, p_continue

371 assert _p_continue_to_avg(p_continue, max_size) <= desired_avg

372 return p_continue

373

374

375def _p_continue_to_avg(p_continue: float, max_size: Union[int, float]) -> float:

376 """Return the average_size generated by this p_continue and max_size."""

377 if p_continue >= 1:

378 return max_size

379 return (1.0 / (1 - p_continue) - 1) * (1 - p_continue**max_size)