Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/strategies/

1# This file is part of Hypothesis, which may be found at

2# https://github.com/HypothesisWorks/hypothesis/

4# Copyright the Hypothesis Authors.

5# Individual contributors are listed in AUTHORS.rst and the git log.

7# This Source Code Form is subject to the terms of the Mozilla Public License,

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can

9# obtain one at https://mozilla.org/MPL/2.0/.

11import copy

12import re

13import warnings

14from collections.abc import Collection

15from functools import cache, lru_cache, partial

16from typing import cast

18from hypothesis.errors import HypothesisWarning, InvalidArgument

19from hypothesis.internal import charmap

20from hypothesis.internal.charmap import Categories

21from hypothesis.internal.conjecture.data import ConjectureData

22from hypothesis.internal.conjecture.providers import COLLECTION_DEFAULT_MAX_SIZE

23from hypothesis.internal.filtering import max_len, min_len

24from hypothesis.internal.intervalsets import IntervalSet

25from hypothesis.internal.reflection import get_pretty_function_description

26from hypothesis.strategies._internal.collections import ListStrategy

27from hypothesis.strategies._internal.lazy import unwrap_strategies

28from hypothesis.strategies._internal.strategies import (

29 OneOfStrategy,

30 SampledFromStrategy,

31 SearchStrategy,

32)

33from hypothesis.vendor.pretty import pretty

36# Cache size is limited by sys.maxunicode, but passing None makes it slightly faster.

37@cache

38# this is part of our forward-facing validation, so we do *not* tell mypyc that c

39# should be a str, because we don't want it to validate it before we can.

40def _check_is_single_character(c: object) -> str:

41 # In order to mitigate the performance cost of this check, we use a shared cache,

42 # even at the cost of showing the culprit strategy in the error message.

43 if not isinstance(c, str):

44 type_ = get_pretty_function_description(type(c))

45 raise InvalidArgument(f"Got non-string {c!r} (type {type_})")

46 if len(c) != 1:

47 raise InvalidArgument(f"Got {c!r} (length {len(c)} != 1)")

48 return c

51class OneCharStringStrategy(SearchStrategy[str]):

52 """A strategy which generates single character strings of text type."""

54 def __init__(self, intervals: IntervalSet, force_repr: str | None = None) -> None:

55 super().__init__()

56 assert isinstance(intervals, IntervalSet)

57 self.intervals = intervals

58 self._force_repr = force_repr

60 @classmethod

61 def from_characters_args(

62 cls,

63 *,

64 codec: str | None = None,

65 min_codepoint: int | None = None,

66 max_codepoint: int | None = None,

67 categories: Categories | None = None,

68 exclude_characters: Collection[str] = "",

69 include_characters: Collection[str] = "",

70 ) -> "OneCharStringStrategy":

71 assert set(categories or ()).issubset(charmap.categories())

72 intervals = charmap.query(

73 min_codepoint=min_codepoint,

74 max_codepoint=max_codepoint,

75 categories=categories,

76 exclude_characters=exclude_characters,

77 include_characters=include_characters,

78 )

79 if codec is not None:

80 intervals &= charmap.intervals_from_codec(codec)

82 _arg_repr = ", ".join(

83 f"{k}={v!r}"

84 for k, v in [

85 ("codec", codec),

86 ("min_codepoint", min_codepoint),

87 ("max_codepoint", max_codepoint),

88 ("categories", categories),

89 ("exclude_characters", exclude_characters),

90 ("include_characters", include_characters),

91 ]

92 if v not in (None, "")

93 and not (

94 k == "categories"

95 # v has to be `categories` here. Help mypy along to infer that.

96 and set(cast(Categories, v)) == set(charmap.categories()) - {"Cs"}

97 )

98 )

99 if not intervals:

100 raise InvalidArgument(

101 "No characters are allowed to be generated by this "

102 f"combination of arguments: {_arg_repr}"

103 )

104 return cls(intervals, force_repr=f"characters({_arg_repr})")

105

106 @classmethod

107 def from_alphabet(cls, alphabet: str | SearchStrategy) -> "OneCharStringStrategy":

108 if isinstance(alphabet, str):

109 return cls.from_characters_args(categories=(), include_characters=alphabet)

110

111 assert isinstance(alphabet, SearchStrategy)

112 char_strategy = unwrap_strategies(alphabet)

113 if isinstance(char_strategy, cls):

114 return char_strategy

115 elif isinstance(char_strategy, SampledFromStrategy):

116 for c in char_strategy.elements:

117 _check_is_single_character(c)

118 return cls.from_characters_args(

119 categories=(),

120 include_characters=char_strategy.elements,

121 )

122 elif isinstance(char_strategy, OneOfStrategy):

123 intervals = IntervalSet()

124 for s in char_strategy.element_strategies:

125 intervals = intervals.union(cls.from_alphabet(s).intervals)

126 return cls(intervals, force_repr=repr(alphabet))

127 raise InvalidArgument(

128 f"{alphabet=} must be a sampled_from() or characters() strategy"

129 )

130

131 def __repr__(self) -> str:

132 return self._force_repr or f"OneCharStringStrategy({self.intervals!r})"

133

134 def do_draw(self, data: ConjectureData) -> str:

135 return data.draw_string(self.intervals, min_size=1, max_size=1)

136

137

138_nonempty_names = (

139 "capitalize",

140 "expandtabs",

141 "join",

142 "lower",

143 "rsplit",

144 "split",

145 "splitlines",

146 "swapcase",

147 "title",

148 "upper",

149)

150_nonempty_and_content_names = (

151 "islower",

152 "isupper",

153 "isalnum",

154 "isalpha",

155 "isascii",

156 "isdigit",

157 "isspace",

158 "istitle",

159 "lstrip",

160 "rstrip",

161 "strip",

162)

163

164

165class TextStrategy(ListStrategy[str]):

166 def do_draw(self, data):

167 # if our element strategy is OneCharStringStrategy, we can skip the

168 # ListStrategy draw and jump right to data.draw_string.

169 # Doing so for user-provided element strategies is not correct in

170 # general, as they may define a different distribution than data.draw_string.

171 elems = unwrap_strategies(self.element_strategy)

172 if isinstance(elems, OneCharStringStrategy):

173 return data.draw_string(

174 elems.intervals,

175 min_size=self.min_size,

176 max_size=(

177 COLLECTION_DEFAULT_MAX_SIZE

178 if self.max_size == float("inf")

179 else self.max_size

180 ),

181 )

182 return "".join(super().do_draw(data))

183

184 def __repr__(self) -> str:

185 args = []

186 if repr(self.element_strategy) != "characters()":

187 args.append(repr(self.element_strategy))

188 if self.min_size:

189 args.append(f"min_size={self.min_size}")

190 if self.max_size < float("inf"):

191 args.append(f"max_size={self.max_size}")

192 return f"text({', '.join(args)})"

193

194 # See https://docs.python.org/3/library/stdtypes.html#string-methods

195 # These methods always return Truthy values for any nonempty string.

196 _nonempty_filters = (

197 *ListStrategy._nonempty_filters,

198 str,

199 str.casefold,

200 str.encode,

201 *(getattr(str, n) for n in _nonempty_names),

202 )

203 _nonempty_and_content_filters = (

204 str.isdecimal,

205 str.isnumeric,

206 *(getattr(str, n) for n in _nonempty_and_content_names),

207 )

208

209 def filter(self, condition):

210 elems = unwrap_strategies(self.element_strategy)

211 if (

212 condition is str.isidentifier

213 and self.max_size >= 1

214 and isinstance(elems, OneCharStringStrategy)

215 ):

216 from hypothesis.strategies import builds, nothing

217

218 id_start, id_continue = _identifier_characters()

219 if not (elems.intervals & id_start):

220 return nothing()

221 return builds(

222 "{}{}".format,

223 OneCharStringStrategy(elems.intervals & id_start),

224 TextStrategy(

225 OneCharStringStrategy(elems.intervals & id_continue),

226 min_size=max(0, self.min_size - 1),

227 max_size=self.max_size - 1,

228 ),

229 # Filter to ensure that NFKC normalization keeps working in future

230 ).filter(str.isidentifier)

231 if (new := _string_filter_rewrite(self, str, condition)) is not None:

232 return new

233 return super().filter(condition)

234

235

236def _string_filter_rewrite(self, kind, condition):

237 if condition in (kind.lower, kind.title, kind.upper):

238 k = kind.__name__

239 warnings.warn(

240 f"You applied {k}.{condition.__name__} as a filter, but this allows "

241 f"all nonempty strings! Did you mean {k}.is{condition.__name__}?",

242 HypothesisWarning,

243 stacklevel=2,

244 )

245

246 if (

247 (

248 kind is bytes

249 or isinstance(

250 unwrap_strategies(self.element_strategy), OneCharStringStrategy

251 )

252 )

253 and isinstance(pattern := getattr(condition, "__self__", None), re.Pattern)

254 and isinstance(pattern.pattern, kind)

255 ):

256 from hypothesis.strategies._internal.regex import regex_strategy

257

258 if condition.__name__ == "match":

259 # Replace with an easier-to-handle equivalent condition

260 caret, close = ("^(?:", ")") if kind is str else (b"^(?:", b")")

261 pattern = re.compile(caret + pattern.pattern + close, flags=pattern.flags)

262 condition = pattern.search

263

264 if condition.__name__ in ("search", "findall", "fullmatch"):

265 s = regex_strategy(

266 pattern,

267 fullmatch=condition.__name__ == "fullmatch",

268 alphabet=self.element_strategy if kind is str else None,

269 )

270 if self.min_size > 0:

271 s = s.filter(partial(min_len, self.min_size))

272 if self.max_size < 1e999:

273 s = s.filter(partial(max_len, self.max_size))

274 return s

275 elif condition.__name__ in ("finditer", "scanner"):

276 # PyPy implements `finditer` as an alias to their `scanner` method

277 warnings.warn(

278 f"You applied {pretty(condition)} as a filter, but this allows "

279 f"any string at all! Did you mean .findall ?",

280 HypothesisWarning,

281 stacklevel=3,

282 )

283 return self

284 elif condition.__name__ == "split":

285 warnings.warn(

286 f"You applied {pretty(condition)} as a filter, but this allows "

287 f"any nonempty string! Did you mean .search ?",

288 HypothesisWarning,

289 stacklevel=3,

290 )

291 return self.filter(bool)

292

293 # We use ListStrategy filter logic for the conditions that *only* imply

294 # the string is nonempty. Here, we increment the min_size but still apply

295 # the filter for conditions that imply nonempty *and specific contents*.

296 if condition in self._nonempty_and_content_filters and self.max_size >= 1:

297 self = copy.copy(self)

298 self.min_size = max(1, self.min_size)

299 return ListStrategy.filter(self, condition)

300

301 return None

302

303

304# Excerpted from https://www.unicode.org/Public/15.0.0/ucd/PropList.txt

305# Python updates it's Unicode version between minor releases, but fortunately

306# these properties do not change between the Unicode versions in question.

307_PROPLIST = """

308# ================================================

309

3101885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA

3112118 ; Other_ID_Start # Sm SCRIPT CAPITAL P

312212E ; Other_ID_Start # So ESTIMATED SYMBOL

313309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK

314

315# Total code points: 6

316

317# ================================================

318

31900B7 ; Other_ID_Continue # Po MIDDLE DOT

3200387 ; Other_ID_Continue # Po GREEK ANO TELEIA

3211369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE

32219DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE

323

324# Total code points: 12

325"""

326

327

328@lru_cache

329def _identifier_characters() -> tuple[IntervalSet, IntervalSet]:

330 """See https://docs.python.org/3/reference/lexical_analysis.html#identifiers"""

331 # Start by computing the set of special characters

332 chars = {"Other_ID_Start": "", "Other_ID_Continue": ""}

333 for line in _PROPLIST.splitlines():

334 if m := re.match(r"([0-9A-F.]+) +; (\w+) # ", line):

335 codes, prop = m.groups()

336 span = range(int(codes[:4], base=16), int(codes[-4:], base=16) + 1)

337 chars[prop] += "".join(chr(x) for x in span)

338

339 # Then get the basic set by Unicode category and known extras

340 id_start = charmap.query(

341 categories=("Lu", "Ll", "Lt", "Lm", "Lo", "Nl"),

342 include_characters="_" + chars["Other_ID_Start"],

343 )

344 id_start -= IntervalSet.from_string(

345 # Magic value: the characters which NFKC-normalize to be invalid identifiers.

346 # Conveniently they're all in `id_start`, so we only need to do this once.

347 "\u037a\u0e33\u0eb3\u2e2f\u309b\u309c\ufc5e\ufc5f\ufc60\ufc61\ufc62\ufc63"

348 "\ufdfa\ufdfb\ufe70\ufe72\ufe74\ufe76\ufe78\ufe7a\ufe7c\ufe7e\uff9e\uff9f"

349 )

350 id_continue = id_start | charmap.query(

351 categories=("Mn", "Mc", "Nd", "Pc"),

352 include_characters=chars["Other_ID_Continue"],

353 )

354 return id_start, id_continue

355

356

357class BytesStrategy(SearchStrategy):

358 def __init__(self, min_size: int, max_size: int | None):

359 super().__init__()

360 self.min_size = min_size

361 self.max_size = (

362 max_size if max_size is not None else COLLECTION_DEFAULT_MAX_SIZE

363 )

364

365 def do_draw(self, data: ConjectureData) -> bytes:

366 return data.draw_bytes(self.min_size, self.max_size)

367

368 _nonempty_filters = (

369 *ListStrategy._nonempty_filters,

370 bytes,

371 *(getattr(bytes, n) for n in _nonempty_names),

372 )

373 _nonempty_and_content_filters = (

374 *(getattr(bytes, n) for n in _nonempty_and_content_names),

375 )

376

377 def filter(self, condition):

378 if (new := _string_filter_rewrite(self, bytes, condition)) is not None:

379 return new

380 return ListStrategy.filter(self, condition)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/strategies/_internal/strings.py: 38%

149 statements