Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/hypothesis/strategies/

1# This file is part of Hypothesis, which may be found at

2# https://github.com/HypothesisWorks/hypothesis/

4# Copyright the Hypothesis Authors.

5# Individual contributors are listed in AUTHORS.rst and the git log.

7# This Source Code Form is subject to the terms of the Mozilla Public License,

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can

9# obtain one at https://mozilla.org/MPL/2.0/.

11import copy

12import re

13import warnings

14from functools import lru_cache, partial

16from hypothesis.errors import HypothesisWarning, InvalidArgument

17from hypothesis.internal import charmap

18from hypothesis.internal.filtering import max_len, min_len

19from hypothesis.internal.intervalsets import IntervalSet

20from hypothesis.strategies._internal.collections import ListStrategy

21from hypothesis.strategies._internal.lazy import unwrap_strategies

22from hypothesis.strategies._internal.numbers import IntegersStrategy

23from hypothesis.strategies._internal.strategies import SearchStrategy

24from hypothesis.vendor.pretty import pretty

27class OneCharStringStrategy(SearchStrategy):

28 """A strategy which generates single character strings of text type."""

30 def __init__(self, intervals, force_repr=None):

31 assert isinstance(intervals, IntervalSet)

32 self.intervals = intervals

33 self._force_repr = force_repr

35 @classmethod

36 def from_characters_args(

37 cls,

38 *,

39 codec=None,

40 min_codepoint=None,

41 max_codepoint=None,

42 categories=None,

43 exclude_characters=None,

44 include_characters=None,

45 ):

46 assert set(categories or ()).issubset(charmap.categories())

47 intervals = charmap.query(

48 min_codepoint=min_codepoint,

49 max_codepoint=max_codepoint,

50 categories=categories,

51 exclude_characters=exclude_characters,

52 include_characters=include_characters,

53 )

54 if codec is not None:

55 intervals &= charmap.intervals_from_codec(codec)

56 _arg_repr = ", ".join(

57 f"{k}={v!r}"

58 for k, v in [

59 ("codec", codec),

60 ("min_codepoint", min_codepoint),

61 ("max_codepoint", max_codepoint),

62 ("categories", categories),

63 ("exclude_characters", exclude_characters),

64 ("include_characters", include_characters),

65 ]

66 if v not in (None, "", set(charmap.categories()) - {"Cs"})

67 )

68 if not intervals:

69 raise InvalidArgument(

70 "No characters are allowed to be generated by this "

71 f"combination of arguments: {_arg_repr}"

72 )

73 return cls(intervals, force_repr=f"characters({_arg_repr})")

75 def __repr__(self):

76 return self._force_repr or f"OneCharStringStrategy({self.intervals!r})"

78 def do_draw(self, data):

79 return data.draw_string(self.intervals, min_size=1, max_size=1)

82_nonempty_names = (

83 "capitalize",

84 "expandtabs",

85 "join",

86 "lower",

87 "rsplit",

88 "split",

89 "splitlines",

90 "swapcase",

91 "title",

92 "upper",

93)

94_nonempty_and_content_names = (

95 "islower",

96 "isupper",

97 "isalnum",

98 "isalpha",

99 "isascii",

100 "isdigit",

101 "isspace",

102 "istitle",

103 "lstrip",

104 "rstrip",

105 "strip",

106)

107

108

109class TextStrategy(ListStrategy):

110 def do_draw(self, data):

111 # if our element strategy is OneCharStringStrategy, we can skip the

112 # ListStrategy draw and jump right to our nice IR string draw.

113 # Doing so for user-provided element strategies is not correct in

114 # general, as they may define a different distribution than our IR.

115 elems = unwrap_strategies(self.element_strategy)

116 if isinstance(elems, OneCharStringStrategy):

117 return data.draw_string(

118 elems.intervals, min_size=self.min_size, max_size=self.max_size

119 )

120 return "".join(super().do_draw(data))

121

122 def __repr__(self):

123 args = []

124 if repr(self.element_strategy) != "characters()":

125 args.append(repr(self.element_strategy))

126 if self.min_size:

127 args.append(f"min_size={self.min_size}")

128 if self.max_size < float("inf"):

129 args.append(f"max_size={self.max_size}")

130 return f"text({', '.join(args)})"

131

132 # See https://docs.python.org/3/library/stdtypes.html#string-methods

133 # These methods always return Truthy values for any nonempty string.

134 _nonempty_filters = (

135 *ListStrategy._nonempty_filters,

136 str,

137 str.casefold,

138 str.encode,

139 *(getattr(str, n) for n in _nonempty_names),

140 )

141 _nonempty_and_content_filters = (

142 str.isdecimal,

143 str.isnumeric,

144 *(getattr(str, n) for n in _nonempty_and_content_names),

145 )

146

147 def filter(self, condition):

148 elems = unwrap_strategies(self.element_strategy)

149 if (

150 condition is str.isidentifier

151 and self.max_size >= 1

152 and isinstance(elems, OneCharStringStrategy)

153 ):

154 from hypothesis.strategies import builds, nothing

155

156 id_start, id_continue = _identifier_characters()

157 if not (elems.intervals & id_start):

158 return nothing()

159 return builds(

160 "{}{}".format,

161 OneCharStringStrategy(elems.intervals & id_start),

162 TextStrategy(

163 OneCharStringStrategy(elems.intervals & id_continue),

164 min_size=max(0, self.min_size - 1),

165 max_size=self.max_size - 1,

166 ),

167 # Filter to ensure that NFKC normalization keeps working in future

168 ).filter(str.isidentifier)

169 if (new := _string_filter_rewrite(self, str, condition)) is not None:

170 return new

171 return super().filter(condition)

172

173

174def _string_filter_rewrite(self, kind, condition):

175 if condition in (kind.lower, kind.title, kind.upper):

176 k = kind.__name__

177 warnings.warn(

178 f"You applied {k}.{condition.__name__} as a filter, but this allows "

179 f"all nonempty strings! Did you mean {k}.is{condition.__name__}?",

180 HypothesisWarning,

181 stacklevel=2,

182 )

183

184 elems = unwrap_strategies(self.element_strategy)

185 if (

186 (kind is bytes or isinstance(elems, OneCharStringStrategy))

187 and isinstance(pattern := getattr(condition, "__self__", None), re.Pattern)

188 and isinstance(pattern.pattern, kind)

189 ):

190 from hypothesis.strategies._internal.regex import regex_strategy

191

192 print(f"{condition=}")

193 print(f"{condition.__name__=}")

194

195 if condition.__name__ == "match":

196 # Replace with an easier-to-handle equivalent condition

197 caret = "^" if kind is str else b"^"

198 pattern = re.compile(caret + pattern.pattern, flags=pattern.flags)

199 condition = pattern.search

200

201 if condition.__name__ in ("search", "findall", "fullmatch"):

202 s = regex_strategy(

203 pattern,

204 fullmatch=condition.__name__ == "fullmatch",

205 alphabet=self.element_strategy if kind is str else None,

206 )

207 if self.min_size > 0:

208 s = s.filter(partial(min_len, self.min_size))

209 if self.max_size < 1e999:

210 s = s.filter(partial(max_len, self.max_size))

211 return s

212 elif condition.__name__ in ("finditer", "scanner"):

213 # PyPy implements `finditer` as an alias to their `scanner` method

214 warnings.warn(

215 f"You applied {pretty(condition)} as a filter, but this allows "

216 f"any string at all! Did you mean .findall ?",

217 HypothesisWarning,

218 stacklevel=3,

219 )

220 return self

221 elif condition.__name__ == "split":

222 warnings.warn(

223 f"You applied {pretty(condition)} as a filter, but this allows "

224 f"any nonempty string! Did you mean .search ?",

225 HypothesisWarning,

226 stacklevel=3,

227 )

228 return self.filter(bool)

229

230 # We use ListStrategy filter logic for the conditions that *only* imply

231 # the string is nonempty. Here, we increment the min_size but still apply

232 # the filter for conditions that imply nonempty *and specific contents*.

233 if condition in self._nonempty_and_content_filters and self.max_size >= 1:

234 self = copy.copy(self)

235 self.min_size = max(1, self.min_size)

236 return ListStrategy.filter(self, condition)

237

238 return None

239

240

241# Excerpted from https://www.unicode.org/Public/15.0.0/ucd/PropList.txt

242# Python updates it's Unicode version between minor releases, but fortunately

243# these properties do not change between the Unicode versions in question.

244_PROPLIST = """

245# ================================================

246

2471885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA

2482118 ; Other_ID_Start # Sm SCRIPT CAPITAL P

249212E ; Other_ID_Start # So ESTIMATED SYMBOL

250309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK

251

252# Total code points: 6

253

254# ================================================

255

25600B7 ; Other_ID_Continue # Po MIDDLE DOT

2570387 ; Other_ID_Continue # Po GREEK ANO TELEIA

2581369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE

25919DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE

260

261# Total code points: 12

262"""

263

264

265@lru_cache

266def _identifier_characters():

267 """See https://docs.python.org/3/reference/lexical_analysis.html#identifiers"""

268 # Start by computing the set of special characters

269 chars = {"Other_ID_Start": "", "Other_ID_Continue": ""}

270 for line in _PROPLIST.splitlines():

271 if m := re.match(r"([0-9A-F.]+) +; (\w+) # ", line):

272 codes, prop = m.groups()

273 span = range(int(codes[:4], base=16), int(codes[-4:], base=16) + 1)

274 chars[prop] += "".join(chr(x) for x in span)

275

276 # Then get the basic set by Unicode category and known extras

277 id_start = charmap.query(

278 categories=("Lu", "Ll", "Lt", "Lm", "Lo", "Nl"),

279 include_characters="_" + chars["Other_ID_Start"],

280 )

281 id_start -= IntervalSet.from_string(

282 # Magic value: the characters which NFKC-normalize to be invalid identifiers.

283 # Conveniently they're all in `id_start`, so we only need to do this once.

284 "\u037a\u0e33\u0eb3\u2e2f\u309b\u309c\ufc5e\ufc5f\ufc60\ufc61\ufc62\ufc63"

285 "\ufdfa\ufdfb\ufe70\ufe72\ufe74\ufe76\ufe78\ufe7a\ufe7c\ufe7e\uff9e\uff9f"

286 )

287 id_continue = id_start | charmap.query(

288 categories=("Mn", "Mc", "Nd", "Pc"),

289 include_characters=chars["Other_ID_Continue"],

290 )

291 return id_start, id_continue

292

293

294class BytesStrategy(ListStrategy):

295 def __init__(self, min_size, max_size):

296 super().__init__(IntegersStrategy(0, 255), min_size=min_size, max_size=max_size)

297

298 def do_draw(self, data):

299 # TODO: refactor the underlying provider to support variable-length bytes

300 if self.min_size == self.max_size:

301 return bytes(data.draw_bytes(self.min_size))

302 return bytes(super().do_draw(data))

303

304 _nonempty_filters = (

305 *ListStrategy._nonempty_filters,

306 bytes,

307 *(getattr(bytes, n) for n in _nonempty_names),

308 )

309 _nonempty_and_content_filters = (

310 *(getattr(bytes, n) for n in _nonempty_and_content_names),

311 )

312

313 def filter(self, condition):

314 if (new := _string_filter_rewrite(self, bytes, condition)) is not None:

315 return new

316 return super().filter(condition)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/hypothesis/strategies/_internal/strings.py: 48%

119 statements