Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/hypothesis/strategies/_internal/strings.py: 48%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

119 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11import copy 

12import re 

13import warnings 

14from functools import lru_cache, partial 

15 

16from hypothesis.errors import HypothesisWarning, InvalidArgument 

17from hypothesis.internal import charmap 

18from hypothesis.internal.filtering import max_len, min_len 

19from hypothesis.internal.intervalsets import IntervalSet 

20from hypothesis.strategies._internal.collections import ListStrategy 

21from hypothesis.strategies._internal.lazy import unwrap_strategies 

22from hypothesis.strategies._internal.numbers import IntegersStrategy 

23from hypothesis.strategies._internal.strategies import SearchStrategy 

24from hypothesis.vendor.pretty import pretty 

25 

26 

27class OneCharStringStrategy(SearchStrategy): 

28 """A strategy which generates single character strings of text type.""" 

29 

30 def __init__(self, intervals, force_repr=None): 

31 assert isinstance(intervals, IntervalSet) 

32 self.intervals = intervals 

33 self._force_repr = force_repr 

34 

35 @classmethod 

36 def from_characters_args( 

37 cls, 

38 *, 

39 codec=None, 

40 min_codepoint=None, 

41 max_codepoint=None, 

42 categories=None, 

43 exclude_characters=None, 

44 include_characters=None, 

45 ): 

46 assert set(categories or ()).issubset(charmap.categories()) 

47 intervals = charmap.query( 

48 min_codepoint=min_codepoint, 

49 max_codepoint=max_codepoint, 

50 categories=categories, 

51 exclude_characters=exclude_characters, 

52 include_characters=include_characters, 

53 ) 

54 if codec is not None: 

55 intervals &= charmap.intervals_from_codec(codec) 

56 _arg_repr = ", ".join( 

57 f"{k}={v!r}" 

58 for k, v in [ 

59 ("codec", codec), 

60 ("min_codepoint", min_codepoint), 

61 ("max_codepoint", max_codepoint), 

62 ("categories", categories), 

63 ("exclude_characters", exclude_characters), 

64 ("include_characters", include_characters), 

65 ] 

66 if v not in (None, "", set(charmap.categories()) - {"Cs"}) 

67 ) 

68 if not intervals: 

69 raise InvalidArgument( 

70 "No characters are allowed to be generated by this " 

71 f"combination of arguments: {_arg_repr}" 

72 ) 

73 return cls(intervals, force_repr=f"characters({_arg_repr})") 

74 

75 def __repr__(self): 

76 return self._force_repr or f"OneCharStringStrategy({self.intervals!r})" 

77 

78 def do_draw(self, data): 

79 return data.draw_string(self.intervals, min_size=1, max_size=1) 

80 

81 

82_nonempty_names = ( 

83 "capitalize", 

84 "expandtabs", 

85 "join", 

86 "lower", 

87 "rsplit", 

88 "split", 

89 "splitlines", 

90 "swapcase", 

91 "title", 

92 "upper", 

93) 

94_nonempty_and_content_names = ( 

95 "islower", 

96 "isupper", 

97 "isalnum", 

98 "isalpha", 

99 "isascii", 

100 "isdigit", 

101 "isspace", 

102 "istitle", 

103 "lstrip", 

104 "rstrip", 

105 "strip", 

106) 

107 

108 

109class TextStrategy(ListStrategy): 

110 def do_draw(self, data): 

111 # if our element strategy is OneCharStringStrategy, we can skip the 

112 # ListStrategy draw and jump right to our nice IR string draw. 

113 # Doing so for user-provided element strategies is not correct in 

114 # general, as they may define a different distribution than our IR. 

115 elems = unwrap_strategies(self.element_strategy) 

116 if isinstance(elems, OneCharStringStrategy): 

117 return data.draw_string( 

118 elems.intervals, min_size=self.min_size, max_size=self.max_size 

119 ) 

120 return "".join(super().do_draw(data)) 

121 

122 def __repr__(self): 

123 args = [] 

124 if repr(self.element_strategy) != "characters()": 

125 args.append(repr(self.element_strategy)) 

126 if self.min_size: 

127 args.append(f"min_size={self.min_size}") 

128 if self.max_size < float("inf"): 

129 args.append(f"max_size={self.max_size}") 

130 return f"text({', '.join(args)})" 

131 

132 # See https://docs.python.org/3/library/stdtypes.html#string-methods 

133 # These methods always return Truthy values for any nonempty string. 

134 _nonempty_filters = ( 

135 *ListStrategy._nonempty_filters, 

136 str, 

137 str.casefold, 

138 str.encode, 

139 *(getattr(str, n) for n in _nonempty_names), 

140 ) 

141 _nonempty_and_content_filters = ( 

142 str.isdecimal, 

143 str.isnumeric, 

144 *(getattr(str, n) for n in _nonempty_and_content_names), 

145 ) 

146 

147 def filter(self, condition): 

148 elems = unwrap_strategies(self.element_strategy) 

149 if ( 

150 condition is str.isidentifier 

151 and self.max_size >= 1 

152 and isinstance(elems, OneCharStringStrategy) 

153 ): 

154 from hypothesis.strategies import builds, nothing 

155 

156 id_start, id_continue = _identifier_characters() 

157 if not (elems.intervals & id_start): 

158 return nothing() 

159 return builds( 

160 "{}{}".format, 

161 OneCharStringStrategy(elems.intervals & id_start), 

162 TextStrategy( 

163 OneCharStringStrategy(elems.intervals & id_continue), 

164 min_size=max(0, self.min_size - 1), 

165 max_size=self.max_size - 1, 

166 ), 

167 # Filter to ensure that NFKC normalization keeps working in future 

168 ).filter(str.isidentifier) 

169 if (new := _string_filter_rewrite(self, str, condition)) is not None: 

170 return new 

171 return super().filter(condition) 

172 

173 

174def _string_filter_rewrite(self, kind, condition): 

175 if condition in (kind.lower, kind.title, kind.upper): 

176 k = kind.__name__ 

177 warnings.warn( 

178 f"You applied {k}.{condition.__name__} as a filter, but this allows " 

179 f"all nonempty strings! Did you mean {k}.is{condition.__name__}?", 

180 HypothesisWarning, 

181 stacklevel=2, 

182 ) 

183 

184 elems = unwrap_strategies(self.element_strategy) 

185 if ( 

186 (kind is bytes or isinstance(elems, OneCharStringStrategy)) 

187 and isinstance(pattern := getattr(condition, "__self__", None), re.Pattern) 

188 and isinstance(pattern.pattern, kind) 

189 ): 

190 from hypothesis.strategies._internal.regex import regex_strategy 

191 

192 print(f"{condition=}") 

193 print(f"{condition.__name__=}") 

194 

195 if condition.__name__ == "match": 

196 # Replace with an easier-to-handle equivalent condition 

197 caret = "^" if kind is str else b"^" 

198 pattern = re.compile(caret + pattern.pattern, flags=pattern.flags) 

199 condition = pattern.search 

200 

201 if condition.__name__ in ("search", "findall", "fullmatch"): 

202 s = regex_strategy( 

203 pattern, 

204 fullmatch=condition.__name__ == "fullmatch", 

205 alphabet=self.element_strategy if kind is str else None, 

206 ) 

207 if self.min_size > 0: 

208 s = s.filter(partial(min_len, self.min_size)) 

209 if self.max_size < 1e999: 

210 s = s.filter(partial(max_len, self.max_size)) 

211 return s 

212 elif condition.__name__ in ("finditer", "scanner"): 

213 # PyPy implements `finditer` as an alias to their `scanner` method 

214 warnings.warn( 

215 f"You applied {pretty(condition)} as a filter, but this allows " 

216 f"any string at all! Did you mean .findall ?", 

217 HypothesisWarning, 

218 stacklevel=3, 

219 ) 

220 return self 

221 elif condition.__name__ == "split": 

222 warnings.warn( 

223 f"You applied {pretty(condition)} as a filter, but this allows " 

224 f"any nonempty string! Did you mean .search ?", 

225 HypothesisWarning, 

226 stacklevel=3, 

227 ) 

228 return self.filter(bool) 

229 

230 # We use ListStrategy filter logic for the conditions that *only* imply 

231 # the string is nonempty. Here, we increment the min_size but still apply 

232 # the filter for conditions that imply nonempty *and specific contents*. 

233 if condition in self._nonempty_and_content_filters and self.max_size >= 1: 

234 self = copy.copy(self) 

235 self.min_size = max(1, self.min_size) 

236 return ListStrategy.filter(self, condition) 

237 

238 return None 

239 

240 

241# Excerpted from https://www.unicode.org/Public/15.0.0/ucd/PropList.txt 

242# Python updates it's Unicode version between minor releases, but fortunately 

243# these properties do not change between the Unicode versions in question. 

244_PROPLIST = """ 

245# ================================================ 

246 

2471885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA 

2482118 ; Other_ID_Start # Sm SCRIPT CAPITAL P 

249212E ; Other_ID_Start # So ESTIMATED SYMBOL 

250309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 

251 

252# Total code points: 6 

253 

254# ================================================ 

255 

25600B7 ; Other_ID_Continue # Po MIDDLE DOT 

2570387 ; Other_ID_Continue # Po GREEK ANO TELEIA 

2581369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE 

25919DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE 

260 

261# Total code points: 12 

262""" 

263 

264 

265@lru_cache 

266def _identifier_characters(): 

267 """See https://docs.python.org/3/reference/lexical_analysis.html#identifiers""" 

268 # Start by computing the set of special characters 

269 chars = {"Other_ID_Start": "", "Other_ID_Continue": ""} 

270 for line in _PROPLIST.splitlines(): 

271 if m := re.match(r"([0-9A-F.]+) +; (\w+) # ", line): 

272 codes, prop = m.groups() 

273 span = range(int(codes[:4], base=16), int(codes[-4:], base=16) + 1) 

274 chars[prop] += "".join(chr(x) for x in span) 

275 

276 # Then get the basic set by Unicode category and known extras 

277 id_start = charmap.query( 

278 categories=("Lu", "Ll", "Lt", "Lm", "Lo", "Nl"), 

279 include_characters="_" + chars["Other_ID_Start"], 

280 ) 

281 id_start -= IntervalSet.from_string( 

282 # Magic value: the characters which NFKC-normalize to be invalid identifiers. 

283 # Conveniently they're all in `id_start`, so we only need to do this once. 

284 "\u037a\u0e33\u0eb3\u2e2f\u309b\u309c\ufc5e\ufc5f\ufc60\ufc61\ufc62\ufc63" 

285 "\ufdfa\ufdfb\ufe70\ufe72\ufe74\ufe76\ufe78\ufe7a\ufe7c\ufe7e\uff9e\uff9f" 

286 ) 

287 id_continue = id_start | charmap.query( 

288 categories=("Mn", "Mc", "Nd", "Pc"), 

289 include_characters=chars["Other_ID_Continue"], 

290 ) 

291 return id_start, id_continue 

292 

293 

294class BytesStrategy(ListStrategy): 

295 def __init__(self, min_size, max_size): 

296 super().__init__(IntegersStrategy(0, 255), min_size=min_size, max_size=max_size) 

297 

298 def do_draw(self, data): 

299 # TODO: refactor the underlying provider to support variable-length bytes 

300 if self.min_size == self.max_size: 

301 return bytes(data.draw_bytes(self.min_size)) 

302 return bytes(super().do_draw(data)) 

303 

304 _nonempty_filters = ( 

305 *ListStrategy._nonempty_filters, 

306 bytes, 

307 *(getattr(bytes, n) for n in _nonempty_names), 

308 ) 

309 _nonempty_and_content_filters = ( 

310 *(getattr(bytes, n) for n in _nonempty_and_content_names), 

311 ) 

312 

313 def filter(self, condition): 

314 if (new := _string_filter_rewrite(self, bytes, condition)) is not None: 

315 return new 

316 return super().filter(condition)