Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/statistics.py: 74%

1# This file is part of Hypothesis, which may be found at

2# https://github.com/HypothesisWorks/hypothesis/

4# Copyright the Hypothesis Authors.

5# Individual contributors are listed in AUTHORS.rst and the git log.

7# This Source Code Form is subject to the terms of the Mozilla Public License,

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can

9# obtain one at https://mozilla.org/MPL/2.0/.

11import abc

12import math

14from hypothesis.internal.floats import clamp

16# Liam: please be aware that an LLM wrote the stdtr and stdtrit functions, with some

17# guidance from me on comparing to scipy's existing implementation.

18#

19# This code is strongly in-distribution for AI, and is tested against a strong

20# oracle (scipy's implementation), which are the only reasons I am willing to

21# accept this code without deeply understanding it.

24def stdtr(df: int, t: float) -> float: # pragma: no cover # covered by tests/scipy

25 """Student's t CDF for integer df >= 1, evaluated at t.

27 Closed-form finite sum from Abramowitz & Stegun 26.7.7-8.

28 """

29 assert isinstance(df, int)

30 if df < 1:

31 raise ValueError(f"stdtr requires integer df >= 1, got {df}")

32 if t == 0.0:

33 return 0.5

34 abs_t = abs(t)

35 z = 1.0 + abs_t * abs_t / df

36 if df % 2 == 1:

37 # odd: includes an arctan term

38 u = abs_t / math.sqrt(df)

39 p = math.atan(u)

40 if df > 1:

41 f = 1.0

42 tz = 1.0

43 j = 3

44 while j <= df - 2:

45 tz *= (j - 1) / (z * j)

46 f += tz

47 j += 2

48 p += f * u / z

49 p *= 2.0 / math.pi

50 else:

51 # even: simple finite sum, no arctan

52 f = 1.0

53 tz = 1.0

54 j = 2

55 while j <= df - 2:

56 tz *= (j - 1) / (z * j)

57 f += tz

58 j += 2

59 p = f * abs_t / math.sqrt(z * df)

60 return 0.5 - 0.5 * p if t < 0 else 0.5 + 0.5 * p

63def stdtrit(

64 df: int, p: float, *, eps: float = 1e-10, max_iter: int = 50

65) -> float: # pragma: no cover # covered by tests/scipy

66 """Inverse Student's t CDF (quantile) for integer df >= 1.

68 df ∈ {1, 2}: closed-form analytic quantile (Shaw 2006, eq 35-36).

70 df >= 3: bracketed Newton iteration on stdtr — doubling search from t=1

71 to bracket the target, then Newton steps with bisection fallback. Always

72 converges; ~5-10 iterations typical.

73 """

74 if df < 1:

75 raise ValueError(f"stdtrit requires integer df >= 1, got {df}")

76 if p == 0.5:

77 return 0.0

78 if not 0.0 < p < 1.0:

79 raise ValueError(f"stdtrit requires 0 < p < 1, got {p}")

80 if df == 1:

81 # Cauchy: F^{-1}(p) = -cot(pi p). Reflect via 1-p when p > 0.5 because

82 # sin(pi p) near pi suffers cancellation; sin(pi (1-p)) near 0 is exact.

83 if p > 0.5:

84 return math.cos(math.pi * (1 - p)) / math.sin(math.pi * (1 - p))

85 return -math.cos(math.pi * p) / math.sin(math.pi * p)

86 if df == 2:

87 return (2 * p - 1) / math.sqrt(2 * p * (1 - p))

89 sign = 1.0 if p > 0.5 else -1.0

90 q = p if p > 0.5 else 1 - p

92 lo, hi = 0.0, 1.0

93 while stdtr(df, hi) < q:

94 hi *= 2

96 log_norm = (

97 math.lgamma(0.5 * (df + 1))

98 - 0.5 * math.log(df * math.pi)

99 - math.lgamma(0.5 * df)

100 )

101 t = 0.5 * (lo + hi)

102 for _ in range(max_iter):

103 F = stdtr(df, t)

104 if F < q:

105 lo = t

106 else:

107 hi = t

108 log_f = log_norm - 0.5 * (df + 1) * math.log1p(t * t / df)

109 f = math.exp(log_f)

110 if f == 0.0:

111 t = 0.5 * (lo + hi)

112 else:

113 t_newton = t - (F - q) / f

114 t = t_newton if lo <= t_newton <= hi else 0.5 * (lo + hi)

115 if hi - lo < eps * (1 + abs(t)):

116 break

117 return sign * t

118

119

120# Liam: I'm not convinced this abstraction pays for itself. If it's a hindrance in the

121# future, feel free to refactor. This class is mainly here to concretize the minimal set

122# of abstractions our integer sampling code expects a distribution to define, and to

123# provide stronger guardrails around composition.

124#

125# Note that our code makes the explicit assumption that any _Distribution

126# implementation is symmetric around 0. This implies inverse_cdf(0.5) = 0, for example.

127# Do not break this assumption without verifying carefully against callers.

128class _Distribution(abc.ABC):

129 @abc.abstractmethod

130 def cdf(self, x: float) -> float:

131 raise NotImplementedError

132

133 @abc.abstractmethod

134 def inverse_cdf(self, u: float) -> float:

135 raise NotImplementedError

136

137 @abc.abstractmethod

138 def pdf(self, x: float) -> float:

139 raise NotImplementedError

140

141

142class UniformDistribution(_Distribution):

143 """Uniform distribution on [-half_width, half_width]."""

144

145 def __init__(self, *, half_width: float) -> None:

146 self.half_width = half_width

147

148 def cdf(self, x: float) -> float:

149 if x < -self.half_width:

150 return 0.0 # pragma: no cover

151 if x > self.half_width:

152 return 1.0 # pragma: no cover

153 return (x + self.half_width) / (2 * self.half_width)

154

155 def inverse_cdf(self, u: float) -> float:

156 return -self.half_width + 2 * self.half_width * u

157

158 def pdf(self, x: float) -> float:

159 if -self.half_width <= x <= self.half_width:

160 return 1 / (2 * self.half_width)

161 return 0.0 # pragma: no cover

162

163

164class LogStudentTDistribution(_Distribution):

165 """Student's t distribution, in the transformed domain of log_2(x).

166

167 Y = sign(x) * log_2(1 + |x|) ~ scale_bits * t(df).

168

169 Note that we only support integer df. This is to reduce surface area in the vendored

170 mathematical functions, which are simpler when df is an integer. There is no

171 fundamental difficulty to supporting float df, other than the necessary due diligence

172 for the vendored code.

173 """

174

175 LN2 = math.log(2)

176

177 def __init__(self, *, scale_bits: float, df: int) -> None:

178 self.scale_bits = scale_bits

179 self.df = df

180 self._t_coef = math.gamma((df + 1) / 2) / (

181 math.sqrt(df * math.pi) * math.gamma(df / 2)

182 )

183

184 def cdf(self, x: float) -> float:

185 y = math.copysign(math.log2(1 + abs(x)), x) / self.scale_bits

186 return float(stdtr(self.df, y))

187

188 def inverse_cdf(self, u: float) -> float:

189 y = self.scale_bits * float(stdtrit(self.df, u))

190 # 2^1023 is the largest power of 2 below sys.float_info.max. This makes y=1023

191 # the largest value we can turn into a float to return here without overflowing.

192 #

193 # In the rare case that we draw such an extreme value, clamp to the bounds.

194 y = clamp(-1023, y, 1023)

195 return math.copysign(math.expm1(abs(y) * self.LN2), y)

196

197 def pdf(self, x: float) -> float:

198 y = math.copysign(math.log2(1 + abs(x)), x) / self.scale_bits

199 f_t = self._t_coef * (1 + y * y / self.df) ** (-(self.df + 1) / 2)

200 return f_t / (self.scale_bits * (1 + abs(x)) * self.LN2)

201

202

203class PiecewiseDistribution:

204 """Two-region splice: `inner` on (-switchover, switchover),

205 `outer` on |x| >= switchover.

206

207 Each region is normalized so that the resulting density is continuous at

208 ±switchover and integrates to 1. Both inner and outer must be symmetric around 0.

209 """

210

211 def __init__(

212 self, *, inner: _Distribution, outer: _Distribution, switchover: float

213 ) -> None:

214 # Note that this code could be made simpler by taking advantage of our assumption that

215 # `inner` and `outer` are symmetric around 0. The code is currently generic enough

216 # to support asymmetric distributions, but doesn't need to be,

217 self.inner = inner

218 self.outer = outer

219 self.switchover = switchover

220 self._inner_g_neg = inner.cdf(-switchover)

221 self._inner_g_pos = inner.cdf(switchover)

222 self._outer_g_neg = outer.cdf(-switchover)

223 self._outer_g_pos = outer.cdf(switchover)

224 outer_outer_mass = 1 - (self._outer_g_pos - self._outer_g_neg)

225 inner_inner_mass = self._inner_g_pos - self._inner_g_neg

226 # density continuity: beta * inner.pdf(c) = alpha * outer.pdf(c)

227 # plus total mass 1; solve for (alpha, beta).

228 inner_pdf = inner.pdf(switchover)

229 outer_pdf = outer.pdf(switchover)

230 assert inner_pdf != 0, (

231 f"inner.pdf(switchover={switchover}) == 0; cannot density-match. inner "

232 "must have support at the boundary."

233 )

234 self._alpha = 1 / (outer_pdf * inner_inner_mass / inner_pdf + outer_outer_mass)

235 self._beta = self._alpha * outer_pdf / inner_pdf

236 self._inner_mass = self._beta * inner_inner_mass

237 self._left_mass = self._alpha * self._outer_g_neg

238

239 def cdf(self, x: float) -> float:

240 if x <= -self.switchover:

241 return self._alpha * self.outer.cdf(x)

242 if x < self.switchover:

243 return self._left_mass + self._beta * (

244 self.inner.cdf(x) - self._inner_g_neg

245 )

246 return (

247 self._left_mass

248 + self._inner_mass

249 + self._alpha * (self.outer.cdf(x) - self._outer_g_pos)

250 )

251

252 def inverse_cdf(self, u: float) -> float:

253 if u <= self._left_mass:

254 return self.outer.inverse_cdf(u / self._alpha)

255 if u < self._left_mass + self._inner_mass:

256 target = self._inner_g_neg + (u - self._left_mass) / self._beta

257 return self.inner.inverse_cdf(target)

258 return self.outer.inverse_cdf(

259 (u - self._left_mass - self._inner_mass) / self._alpha + self._outer_g_pos

260 )