Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/strategies/

1# This file is part of Hypothesis, which may be found at

2# https://github.com/HypothesisWorks/hypothesis/

4# Copyright the Hypothesis Authors.

5# Individual contributors are listed in AUTHORS.rst and the git log.

7# This Source Code Form is subject to the terms of the Mozilla Public License,

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can

9# obtain one at https://mozilla.org/MPL/2.0/.

11import copy

12import re

13import warnings

14from functools import cache, lru_cache, partial

15from typing import Optional

17from hypothesis.errors import HypothesisWarning, InvalidArgument

18from hypothesis.internal import charmap

19from hypothesis.internal.conjecture.data import ConjectureData

20from hypothesis.internal.conjecture.providers import COLLECTION_DEFAULT_MAX_SIZE

21from hypothesis.internal.filtering import max_len, min_len

22from hypothesis.internal.intervalsets import IntervalSet

23from hypothesis.internal.reflection import get_pretty_function_description

24from hypothesis.strategies._internal.collections import ListStrategy

25from hypothesis.strategies._internal.lazy import unwrap_strategies

26from hypothesis.strategies._internal.strategies import (

27 OneOfStrategy,

28 SampledFromStrategy,

29 SearchStrategy,

30)

31from hypothesis.vendor.pretty import pretty

34# Cache size is limited by sys.maxunicode, but passing None makes it slightly faster.

35@cache

36def _check_is_single_character(c):

37 # In order to mitigate the performance cost of this check, we use a shared cache,

38 # even at the cost of showing the culprit strategy in the error message.

39 if not isinstance(c, str):

40 type_ = get_pretty_function_description(type(c))

41 raise InvalidArgument(f"Got non-string {c!r} (type {type_})")

42 if len(c) != 1:

43 raise InvalidArgument(f"Got {c!r} (length {len(c)} != 1)")

44 return c

47class OneCharStringStrategy(SearchStrategy[str]):

48 """A strategy which generates single character strings of text type."""

50 def __init__(

51 self, intervals: IntervalSet, force_repr: Optional[str] = None

52 ) -> None:

53 assert isinstance(intervals, IntervalSet)

54 self.intervals = intervals

55 self._force_repr = force_repr

57 @classmethod

58 def from_characters_args(

59 cls,

60 *,

61 codec=None,

62 min_codepoint=None,

63 max_codepoint=None,

64 categories=None,

65 exclude_characters=None,

66 include_characters=None,

67 ):

68 assert set(categories or ()).issubset(charmap.categories())

69 intervals = charmap.query(

70 min_codepoint=min_codepoint,

71 max_codepoint=max_codepoint,

72 categories=categories,

73 exclude_characters=exclude_characters,

74 include_characters=include_characters,

75 )

76 if codec is not None:

77 intervals &= charmap.intervals_from_codec(codec)

79 _arg_repr = ", ".join(

80 f"{k}={v!r}"

81 for k, v in [

82 ("codec", codec),

83 ("min_codepoint", min_codepoint),

84 ("max_codepoint", max_codepoint),

85 ("categories", categories),

86 ("exclude_characters", exclude_characters),

87 ("include_characters", include_characters),

88 ]

89 if v not in (None, "")

90 and not (k == "categories" and set(v) == set(charmap.categories()) - {"Cs"})

91 )

92 if not intervals:

93 raise InvalidArgument(

94 "No characters are allowed to be generated by this "

95 f"combination of arguments: {_arg_repr}"

96 )

97 return cls(intervals, force_repr=f"characters({_arg_repr})")

99 @classmethod

100 def from_alphabet(cls, alphabet):

101 if isinstance(alphabet, str):

102 return cls.from_characters_args(categories=(), include_characters=alphabet)

103

104 assert isinstance(alphabet, SearchStrategy)

105 char_strategy = unwrap_strategies(alphabet)

106 if isinstance(char_strategy, cls):

107 return char_strategy

108 elif isinstance(char_strategy, SampledFromStrategy):

109 for c in char_strategy.elements:

110 _check_is_single_character(c)

111 return cls.from_characters_args(

112 categories=(),

113 include_characters=char_strategy.elements,

114 )

115 elif isinstance(char_strategy, OneOfStrategy):

116 intervals = IntervalSet()

117 for s in char_strategy.element_strategies:

118 intervals = intervals.union(cls.from_alphabet(s).intervals)

119 return cls(intervals, force_repr=repr(alphabet))

120 raise InvalidArgument(

121 f"{alphabet=} must be a sampled_from() or characters() strategy"

122 )

123

124 def __repr__(self) -> str:

125 return self._force_repr or f"OneCharStringStrategy({self.intervals!r})"

126

127 def do_draw(self, data: ConjectureData) -> str:

128 return data.draw_string(self.intervals, min_size=1, max_size=1)

129

130

131_nonempty_names = (

132 "capitalize",

133 "expandtabs",

134 "join",

135 "lower",

136 "rsplit",

137 "split",

138 "splitlines",

139 "swapcase",

140 "title",

141 "upper",

142)

143_nonempty_and_content_names = (

144 "islower",

145 "isupper",

146 "isalnum",

147 "isalpha",

148 "isascii",

149 "isdigit",

150 "isspace",

151 "istitle",

152 "lstrip",

153 "rstrip",

154 "strip",

155)

156

157

158class TextStrategy(ListStrategy[str]):

159 def do_draw(self, data):

160 # if our element strategy is OneCharStringStrategy, we can skip the

161 # ListStrategy draw and jump right to data.draw_string.

162 # Doing so for user-provided element strategies is not correct in

163 # general, as they may define a different distribution than data.draw_string.

164 elems = unwrap_strategies(self.element_strategy)

165 if isinstance(elems, OneCharStringStrategy):

166 return data.draw_string(

167 elems.intervals,

168 min_size=self.min_size,

169 max_size=(

170 COLLECTION_DEFAULT_MAX_SIZE

171 if self.max_size == float("inf")

172 else self.max_size

173 ),

174 )

175 return "".join(super().do_draw(data))

176

177 def __repr__(self) -> str:

178 args = []

179 if repr(self.element_strategy) != "characters()":

180 args.append(repr(self.element_strategy))

181 if self.min_size:

182 args.append(f"min_size={self.min_size}")

183 if self.max_size < float("inf"):

184 args.append(f"max_size={self.max_size}")

185 return f"text({', '.join(args)})"

186

187 # See https://docs.python.org/3/library/stdtypes.html#string-methods

188 # These methods always return Truthy values for any nonempty string.

189 _nonempty_filters = (

190 *ListStrategy._nonempty_filters,

191 str,

192 str.casefold,

193 str.encode,

194 *(getattr(str, n) for n in _nonempty_names),

195 )

196 _nonempty_and_content_filters = (

197 str.isdecimal,

198 str.isnumeric,

199 *(getattr(str, n) for n in _nonempty_and_content_names),

200 )

201

202 def filter(self, condition):

203 elems = unwrap_strategies(self.element_strategy)

204 if (

205 condition is str.isidentifier

206 and self.max_size >= 1

207 and isinstance(elems, OneCharStringStrategy)

208 ):

209 from hypothesis.strategies import builds, nothing

210

211 id_start, id_continue = _identifier_characters()

212 if not (elems.intervals & id_start):

213 return nothing()

214 return builds(

215 "{}{}".format,

216 OneCharStringStrategy(elems.intervals & id_start),

217 TextStrategy(

218 OneCharStringStrategy(elems.intervals & id_continue),

219 min_size=max(0, self.min_size - 1),

220 max_size=self.max_size - 1,

221 ),

222 # Filter to ensure that NFKC normalization keeps working in future

223 ).filter(str.isidentifier)

224 if (new := _string_filter_rewrite(self, str, condition)) is not None:

225 return new

226 return super().filter(condition)

227

228

229def _string_filter_rewrite(self, kind, condition):

230 if condition in (kind.lower, kind.title, kind.upper):

231 k = kind.__name__

232 warnings.warn(

233 f"You applied {k}.{condition.__name__} as a filter, but this allows "

234 f"all nonempty strings! Did you mean {k}.is{condition.__name__}?",

235 HypothesisWarning,

236 stacklevel=2,

237 )

238

239 if (

240 (

241 kind is bytes

242 or isinstance(

243 unwrap_strategies(self.element_strategy), OneCharStringStrategy

244 )

245 )

246 and isinstance(pattern := getattr(condition, "__self__", None), re.Pattern)

247 and isinstance(pattern.pattern, kind)

248 ):

249 from hypothesis.strategies._internal.regex import regex_strategy

250

251 if condition.__name__ == "match":

252 # Replace with an easier-to-handle equivalent condition

253 caret, close = ("^(?:", ")") if kind is str else (b"^(?:", b")")

254 pattern = re.compile(caret + pattern.pattern + close, flags=pattern.flags)

255 condition = pattern.search

256

257 if condition.__name__ in ("search", "findall", "fullmatch"):

258 s = regex_strategy(

259 pattern,

260 fullmatch=condition.__name__ == "fullmatch",

261 alphabet=self.element_strategy if kind is str else None,

262 )

263 if self.min_size > 0:

264 s = s.filter(partial(min_len, self.min_size))

265 if self.max_size < 1e999:

266 s = s.filter(partial(max_len, self.max_size))

267 return s

268 elif condition.__name__ in ("finditer", "scanner"):

269 # PyPy implements `finditer` as an alias to their `scanner` method

270 warnings.warn(

271 f"You applied {pretty(condition)} as a filter, but this allows "

272 f"any string at all! Did you mean .findall ?",

273 HypothesisWarning,

274 stacklevel=3,

275 )

276 return self

277 elif condition.__name__ == "split":

278 warnings.warn(

279 f"You applied {pretty(condition)} as a filter, but this allows "

280 f"any nonempty string! Did you mean .search ?",

281 HypothesisWarning,

282 stacklevel=3,

283 )

284 return self.filter(bool)

285

286 # We use ListStrategy filter logic for the conditions that *only* imply

287 # the string is nonempty. Here, we increment the min_size but still apply

288 # the filter for conditions that imply nonempty *and specific contents*.

289 if condition in self._nonempty_and_content_filters and self.max_size >= 1:

290 self = copy.copy(self)

291 self.min_size = max(1, self.min_size)

292 return ListStrategy.filter(self, condition)

293

294 return None

295

296

297# Excerpted from https://www.unicode.org/Public/15.0.0/ucd/PropList.txt

298# Python updates it's Unicode version between minor releases, but fortunately

299# these properties do not change between the Unicode versions in question.

300_PROPLIST = """

301# ================================================

302

3031885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA

3042118 ; Other_ID_Start # Sm SCRIPT CAPITAL P

305212E ; Other_ID_Start # So ESTIMATED SYMBOL

306309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK

307

308# Total code points: 6

309

310# ================================================

311

31200B7 ; Other_ID_Continue # Po MIDDLE DOT

3130387 ; Other_ID_Continue # Po GREEK ANO TELEIA

3141369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE

31519DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE

316

317# Total code points: 12

318"""

319

320

321@lru_cache

322def _identifier_characters():

323 """See https://docs.python.org/3/reference/lexical_analysis.html#identifiers"""

324 # Start by computing the set of special characters

325 chars = {"Other_ID_Start": "", "Other_ID_Continue": ""}

326 for line in _PROPLIST.splitlines():

327 if m := re.match(r"([0-9A-F.]+) +; (\w+) # ", line):

328 codes, prop = m.groups()

329 span = range(int(codes[:4], base=16), int(codes[-4:], base=16) + 1)

330 chars[prop] += "".join(chr(x) for x in span)

331

332 # Then get the basic set by Unicode category and known extras

333 id_start = charmap.query(

334 categories=("Lu", "Ll", "Lt", "Lm", "Lo", "Nl"),

335 include_characters="_" + chars["Other_ID_Start"],

336 )

337 id_start -= IntervalSet.from_string(

338 # Magic value: the characters which NFKC-normalize to be invalid identifiers.

339 # Conveniently they're all in `id_start`, so we only need to do this once.

340 "\u037a\u0e33\u0eb3\u2e2f\u309b\u309c\ufc5e\ufc5f\ufc60\ufc61\ufc62\ufc63"

341 "\ufdfa\ufdfb\ufe70\ufe72\ufe74\ufe76\ufe78\ufe7a\ufe7c\ufe7e\uff9e\uff9f"

342 )

343 id_continue = id_start | charmap.query(

344 categories=("Mn", "Mc", "Nd", "Pc"),

345 include_characters=chars["Other_ID_Continue"],

346 )

347 return id_start, id_continue

348

349

350class BytesStrategy(SearchStrategy):

351 def __init__(self, min_size: int, max_size: Optional[int]):

352 self.min_size = min_size

353 self.max_size = (

354 max_size if max_size is not None else COLLECTION_DEFAULT_MAX_SIZE

355 )

356

357 def do_draw(self, data):

358 return data.draw_bytes(self.min_size, self.max_size)

359

360 _nonempty_filters = (

361 *ListStrategy._nonempty_filters,

362 bytes,

363 *(getattr(bytes, n) for n in _nonempty_names),

364 )

365 _nonempty_and_content_filters = (

366 *(getattr(bytes, n) for n in _nonempty_and_content_names),

367 )

368

369 def filter(self, condition):

370 if (new := _string_filter_rewrite(self, bytes, condition)) is not None:

371 return new

372 return ListStrategy.filter(self, condition)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/strategies/_internal/strings.py: 44%

145 statements