Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/strategies/

1# This file is part of Hypothesis, which may be found at

2# https://github.com/HypothesisWorks/hypothesis/

4# Copyright the Hypothesis Authors.

5# Individual contributors are listed in AUTHORS.rst and the git log.

7# This Source Code Form is subject to the terms of the Mozilla Public License,

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can

9# obtain one at https://mozilla.org/MPL/2.0/.

11import copy

12import re

13import warnings

14from collections.abc import Collection

15from functools import cache, lru_cache, partial

16from typing import Optional, Union, cast

18from hypothesis.errors import HypothesisWarning, InvalidArgument

19from hypothesis.internal import charmap

20from hypothesis.internal.charmap import Categories

21from hypothesis.internal.conjecture.data import ConjectureData

22from hypothesis.internal.conjecture.providers import COLLECTION_DEFAULT_MAX_SIZE

23from hypothesis.internal.filtering import max_len, min_len

24from hypothesis.internal.intervalsets import IntervalSet

25from hypothesis.internal.reflection import get_pretty_function_description

26from hypothesis.strategies._internal.collections import ListStrategy

27from hypothesis.strategies._internal.lazy import unwrap_strategies

28from hypothesis.strategies._internal.strategies import (

29 OneOfStrategy,

30 SampledFromStrategy,

31 SearchStrategy,

32)

33from hypothesis.vendor.pretty import pretty

36# Cache size is limited by sys.maxunicode, but passing None makes it slightly faster.

37@cache

38# this is part of our forward-facing validation, so we do *not* tell mypyc that c

39# should be a str, because we don't want it to validate it before we can.

40def _check_is_single_character(c: object) -> str:

41 # In order to mitigate the performance cost of this check, we use a shared cache,

42 # even at the cost of showing the culprit strategy in the error message.

43 if not isinstance(c, str):

44 type_ = get_pretty_function_description(type(c))

45 raise InvalidArgument(f"Got non-string {c!r} (type {type_})")

46 if len(c) != 1:

47 raise InvalidArgument(f"Got {c!r} (length {len(c)} != 1)")

48 return c

51class OneCharStringStrategy(SearchStrategy[str]):

52 """A strategy which generates single character strings of text type."""

54 def __init__(

55 self, intervals: IntervalSet, force_repr: Optional[str] = None

56 ) -> None:

57 super().__init__()

58 assert isinstance(intervals, IntervalSet)

59 self.intervals = intervals

60 self._force_repr = force_repr

62 @classmethod

63 def from_characters_args(

64 cls,

65 *,

66 codec: Optional[str] = None,

67 min_codepoint: Optional[int] = None,

68 max_codepoint: Optional[int] = None,

69 categories: Optional[Categories] = None,

70 exclude_characters: Collection[str] = "",

71 include_characters: Collection[str] = "",

72 ) -> "OneCharStringStrategy":

73 assert set(categories or ()).issubset(charmap.categories())

74 intervals = charmap.query(

75 min_codepoint=min_codepoint,

76 max_codepoint=max_codepoint,

77 categories=categories,

78 exclude_characters=exclude_characters,

79 include_characters=include_characters,

80 )

81 if codec is not None:

82 intervals &= charmap.intervals_from_codec(codec)

84 _arg_repr = ", ".join(

85 f"{k}={v!r}"

86 for k, v in [

87 ("codec", codec),

88 ("min_codepoint", min_codepoint),

89 ("max_codepoint", max_codepoint),

90 ("categories", categories),

91 ("exclude_characters", exclude_characters),

92 ("include_characters", include_characters),

93 ]

94 if v not in (None, "")

95 and not (

96 k == "categories"

97 # v has to be `categories` here. Help mypy along to infer that.

98 and set(cast(Categories, v)) == set(charmap.categories()) - {"Cs"}

99 )

100 )

101 if not intervals:

102 raise InvalidArgument(

103 "No characters are allowed to be generated by this "

104 f"combination of arguments: {_arg_repr}"

105 )

106 return cls(intervals, force_repr=f"characters({_arg_repr})")

107

108 @classmethod

109 def from_alphabet(

110 cls, alphabet: Union[str, SearchStrategy]

111 ) -> "OneCharStringStrategy":

112 if isinstance(alphabet, str):

113 return cls.from_characters_args(categories=(), include_characters=alphabet)

114

115 assert isinstance(alphabet, SearchStrategy)

116 char_strategy = unwrap_strategies(alphabet)

117 if isinstance(char_strategy, cls):

118 return char_strategy

119 elif isinstance(char_strategy, SampledFromStrategy):

120 for c in char_strategy.elements:

121 _check_is_single_character(c)

122 return cls.from_characters_args(

123 categories=(),

124 include_characters=char_strategy.elements,

125 )

126 elif isinstance(char_strategy, OneOfStrategy):

127 intervals = IntervalSet()

128 for s in char_strategy.element_strategies:

129 intervals = intervals.union(cls.from_alphabet(s).intervals)

130 return cls(intervals, force_repr=repr(alphabet))

131 raise InvalidArgument(

132 f"{alphabet=} must be a sampled_from() or characters() strategy"

133 )

134

135 def __repr__(self) -> str:

136 return self._force_repr or f"OneCharStringStrategy({self.intervals!r})"

137

138 def do_draw(self, data: ConjectureData) -> str:

139 return data.draw_string(self.intervals, min_size=1, max_size=1)

140

141

142_nonempty_names = (

143 "capitalize",

144 "expandtabs",

145 "join",

146 "lower",

147 "rsplit",

148 "split",

149 "splitlines",

150 "swapcase",

151 "title",

152 "upper",

153)

154_nonempty_and_content_names = (

155 "islower",

156 "isupper",

157 "isalnum",

158 "isalpha",

159 "isascii",

160 "isdigit",

161 "isspace",

162 "istitle",

163 "lstrip",

164 "rstrip",

165 "strip",

166)

167

168

169class TextStrategy(ListStrategy[str]):

170 def do_draw(self, data):

171 # if our element strategy is OneCharStringStrategy, we can skip the

172 # ListStrategy draw and jump right to data.draw_string.

173 # Doing so for user-provided element strategies is not correct in

174 # general, as they may define a different distribution than data.draw_string.

175 elems = unwrap_strategies(self.element_strategy)

176 if isinstance(elems, OneCharStringStrategy):

177 return data.draw_string(

178 elems.intervals,

179 min_size=self.min_size,

180 max_size=(

181 COLLECTION_DEFAULT_MAX_SIZE

182 if self.max_size == float("inf")

183 else self.max_size

184 ),

185 )

186 return "".join(super().do_draw(data))

187

188 def __repr__(self) -> str:

189 args = []

190 if repr(self.element_strategy) != "characters()":

191 args.append(repr(self.element_strategy))

192 if self.min_size:

193 args.append(f"min_size={self.min_size}")

194 if self.max_size < float("inf"):

195 args.append(f"max_size={self.max_size}")

196 return f"text({', '.join(args)})"

197

198 # See https://docs.python.org/3/library/stdtypes.html#string-methods

199 # These methods always return Truthy values for any nonempty string.

200 _nonempty_filters = (

201 *ListStrategy._nonempty_filters,

202 str,

203 str.casefold,

204 str.encode,

205 *(getattr(str, n) for n in _nonempty_names),

206 )

207 _nonempty_and_content_filters = (

208 str.isdecimal,

209 str.isnumeric,

210 *(getattr(str, n) for n in _nonempty_and_content_names),

211 )

212

213 def filter(self, condition):

214 elems = unwrap_strategies(self.element_strategy)

215 if (

216 condition is str.isidentifier

217 and self.max_size >= 1

218 and isinstance(elems, OneCharStringStrategy)

219 ):

220 from hypothesis.strategies import builds, nothing

221

222 id_start, id_continue = _identifier_characters()

223 if not (elems.intervals & id_start):

224 return nothing()

225 return builds(

226 "{}{}".format,

227 OneCharStringStrategy(elems.intervals & id_start),

228 TextStrategy(

229 OneCharStringStrategy(elems.intervals & id_continue),

230 min_size=max(0, self.min_size - 1),

231 max_size=self.max_size - 1,

232 ),

233 # Filter to ensure that NFKC normalization keeps working in future

234 ).filter(str.isidentifier)

235 if (new := _string_filter_rewrite(self, str, condition)) is not None:

236 return new

237 return super().filter(condition)

238

239

240def _string_filter_rewrite(self, kind, condition):

241 if condition in (kind.lower, kind.title, kind.upper):

242 k = kind.__name__

243 warnings.warn(

244 f"You applied {k}.{condition.__name__} as a filter, but this allows "

245 f"all nonempty strings! Did you mean {k}.is{condition.__name__}?",

246 HypothesisWarning,

247 stacklevel=2,

248 )

249

250 if (

251 (

252 kind is bytes

253 or isinstance(

254 unwrap_strategies(self.element_strategy), OneCharStringStrategy

255 )

256 )

257 and isinstance(pattern := getattr(condition, "__self__", None), re.Pattern)

258 and isinstance(pattern.pattern, kind)

259 ):

260 from hypothesis.strategies._internal.regex import regex_strategy

261

262 if condition.__name__ == "match":

263 # Replace with an easier-to-handle equivalent condition

264 caret, close = ("^(?:", ")") if kind is str else (b"^(?:", b")")

265 pattern = re.compile(caret + pattern.pattern + close, flags=pattern.flags)

266 condition = pattern.search

267

268 if condition.__name__ in ("search", "findall", "fullmatch"):

269 s = regex_strategy(

270 pattern,

271 fullmatch=condition.__name__ == "fullmatch",

272 alphabet=self.element_strategy if kind is str else None,

273 )

274 if self.min_size > 0:

275 s = s.filter(partial(min_len, self.min_size))

276 if self.max_size < 1e999:

277 s = s.filter(partial(max_len, self.max_size))

278 return s

279 elif condition.__name__ in ("finditer", "scanner"):

280 # PyPy implements `finditer` as an alias to their `scanner` method

281 warnings.warn(

282 f"You applied {pretty(condition)} as a filter, but this allows "

283 f"any string at all! Did you mean .findall ?",

284 HypothesisWarning,

285 stacklevel=3,

286 )

287 return self

288 elif condition.__name__ == "split":

289 warnings.warn(

290 f"You applied {pretty(condition)} as a filter, but this allows "

291 f"any nonempty string! Did you mean .search ?",

292 HypothesisWarning,

293 stacklevel=3,

294 )

295 return self.filter(bool)

296

297 # We use ListStrategy filter logic for the conditions that *only* imply

298 # the string is nonempty. Here, we increment the min_size but still apply

299 # the filter for conditions that imply nonempty *and specific contents*.

300 if condition in self._nonempty_and_content_filters and self.max_size >= 1:

301 self = copy.copy(self)

302 self.min_size = max(1, self.min_size)

303 return ListStrategy.filter(self, condition)

304

305 return None

306

307

308# Excerpted from https://www.unicode.org/Public/15.0.0/ucd/PropList.txt

309# Python updates it's Unicode version between minor releases, but fortunately

310# these properties do not change between the Unicode versions in question.

311_PROPLIST = """

312# ================================================

313

3141885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA

3152118 ; Other_ID_Start # Sm SCRIPT CAPITAL P

316212E ; Other_ID_Start # So ESTIMATED SYMBOL

317309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK

318

319# Total code points: 6

320

321# ================================================

322

32300B7 ; Other_ID_Continue # Po MIDDLE DOT

3240387 ; Other_ID_Continue # Po GREEK ANO TELEIA

3251369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE

32619DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE

327

328# Total code points: 12

329"""

330

331

332@lru_cache

333def _identifier_characters() -> tuple[IntervalSet, IntervalSet]:

334 """See https://docs.python.org/3/reference/lexical_analysis.html#identifiers"""

335 # Start by computing the set of special characters

336 chars = {"Other_ID_Start": "", "Other_ID_Continue": ""}

337 for line in _PROPLIST.splitlines():

338 if m := re.match(r"([0-9A-F.]+) +; (\w+) # ", line):

339 codes, prop = m.groups()

340 span = range(int(codes[:4], base=16), int(codes[-4:], base=16) + 1)

341 chars[prop] += "".join(chr(x) for x in span)

342

343 # Then get the basic set by Unicode category and known extras

344 id_start = charmap.query(

345 categories=("Lu", "Ll", "Lt", "Lm", "Lo", "Nl"),

346 include_characters="_" + chars["Other_ID_Start"],

347 )

348 id_start -= IntervalSet.from_string(

349 # Magic value: the characters which NFKC-normalize to be invalid identifiers.

350 # Conveniently they're all in `id_start`, so we only need to do this once.

351 "\u037a\u0e33\u0eb3\u2e2f\u309b\u309c\ufc5e\ufc5f\ufc60\ufc61\ufc62\ufc63"

352 "\ufdfa\ufdfb\ufe70\ufe72\ufe74\ufe76\ufe78\ufe7a\ufe7c\ufe7e\uff9e\uff9f"

353 )

354 id_continue = id_start | charmap.query(

355 categories=("Mn", "Mc", "Nd", "Pc"),

356 include_characters=chars["Other_ID_Continue"],

357 )

358 return id_start, id_continue

359

360

361class BytesStrategy(SearchStrategy):

362 def __init__(self, min_size: int, max_size: Optional[int]):

363 super().__init__()

364 self.min_size = min_size

365 self.max_size = (

366 max_size if max_size is not None else COLLECTION_DEFAULT_MAX_SIZE

367 )

368

369 def do_draw(self, data: ConjectureData) -> bytes:

370 return data.draw_bytes(self.min_size, self.max_size)

371

372 _nonempty_filters = (

373 *ListStrategy._nonempty_filters,

374 bytes,

375 *(getattr(bytes, n) for n in _nonempty_names),

376 )

377 _nonempty_and_content_filters = (

378 *(getattr(bytes, n) for n in _nonempty_and_content_names),

379 )

380

381 def filter(self, condition):

382 if (new := _string_filter_rewrite(self, bytes, condition)) is not None:

383 return new

384 return ListStrategy.filter(self, condition)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/strategies/_internal/strings.py: 38%

149 statements