Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pathspec/patterns/gitwildmatch.py: 18%

158 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:35 +0000

1# encoding: utf-8 

2""" 

3This module implements Git's wildmatch pattern matching which itself is 

4derived from Rsync's wildmatch. Git uses wildmatch for its ".gitignore" 

5files. 

6""" 

7from __future__ import unicode_literals 

8 

9import re 

10import warnings 

11try: 

12 from typing import ( 

13 AnyStr, 

14 Optional, 

15 Text, 

16 Tuple) 

17except ImportError: 

18 pass 

19 

20from .. import util 

21from ..compat import unicode 

22from ..pattern import RegexPattern 

23 

24#: The encoding to use when parsing a byte string pattern. 

25_BYTES_ENCODING = 'latin1' 

26 

27 

28class GitWildMatchPatternError(ValueError): 

29 """ 

30 The :class:`GitWildMatchPatternError` indicates an invalid git wild match 

31 pattern. 

32 """ 

33 pass 

34 

35 

36class GitWildMatchPattern(RegexPattern): 

37 """ 

38 The :class:`GitWildMatchPattern` class represents a compiled Git 

39 wildmatch pattern. 

40 """ 

41 

42 # Keep the dict-less class hierarchy. 

43 __slots__ = () 

44 

45 @classmethod 

46 def pattern_to_regex(cls, pattern): 

47 # type: (AnyStr) -> Tuple[Optional[AnyStr], Optional[bool]] 

48 """ 

49 Convert the pattern into a regular expression. 

50 

51 *pattern* (:class:`unicode` or :class:`bytes`) is the pattern to 

52 convert into a regular expression. 

53 

54 Returns the uncompiled regular expression (:class:`unicode`, :class:`bytes`, 

55 or :data:`None`), and whether matched files should be included 

56 (:data:`True`), excluded (:data:`False`), or if it is a 

57 null-operation (:data:`None`). 

58 """ 

59 if isinstance(pattern, unicode): 

60 return_type = unicode 

61 elif isinstance(pattern, bytes): 

62 return_type = bytes 

63 pattern = pattern.decode(_BYTES_ENCODING) 

64 else: 

65 raise TypeError("pattern:{!r} is not a unicode or byte string.".format(pattern)) 

66 

67 original_pattern = pattern 

68 pattern = pattern.strip() 

69 

70 if pattern.startswith('#'): 

71 # A pattern starting with a hash ('#') serves as a comment 

72 # (neither includes nor excludes files). Escape the hash with a 

73 # back-slash to match a literal hash (i.e., '\#'). 

74 regex = None 

75 include = None 

76 

77 elif pattern == '/': 

78 # EDGE CASE: According to `git check-ignore` (v2.4.1), a single 

79 # '/' does not match any file. 

80 regex = None 

81 include = None 

82 

83 elif pattern: 

84 if pattern.startswith('!'): 

85 # A pattern starting with an exclamation mark ('!') negates the 

86 # pattern (exclude instead of include). Escape the exclamation 

87 # mark with a back-slash to match a literal exclamation mark 

88 # (i.e., '\!'). 

89 include = False 

90 # Remove leading exclamation mark. 

91 pattern = pattern[1:] 

92 else: 

93 include = True 

94 

95 if pattern.startswith('\\'): 

96 # Remove leading back-slash escape for escaped hash ('#') or 

97 # exclamation mark ('!'). 

98 pattern = pattern[1:] 

99 

100 # Allow a regex override for edge cases that cannot be handled 

101 # through normalization. 

102 override_regex = None 

103 

104 # Split pattern into segments. 

105 pattern_segs = pattern.split('/') 

106 

107 # Normalize pattern to make processing easier. 

108 

109 # EDGE CASE: Deal with duplicate double-asterisk sequences. 

110 # Collapse each sequence down to one double-asterisk. Iterate over 

111 # the segments in reverse and remove the duplicate double 

112 # asterisks as we go. 

113 for i in range(len(pattern_segs) - 1, 0, -1): 

114 prev = pattern_segs[i-1] 

115 seg = pattern_segs[i] 

116 if prev == '**' and seg == '**': 

117 del pattern_segs[i] 

118 

119 if len(pattern_segs) == 2 and pattern_segs[0] == '**' and not pattern_segs[1]: 

120 # EDGE CASE: The '**/' pattern should match everything except 

121 # individual files in the root directory. This case cannot be 

122 # adequately handled through normalization. Use the override. 

123 override_regex = '^.+/.*$' 

124 

125 if not pattern_segs[0]: 

126 # A pattern beginning with a slash ('/') will only match paths 

127 # directly on the root directory instead of any descendant 

128 # paths. So, remove empty first segment to make pattern relative 

129 # to root. 

130 del pattern_segs[0] 

131 

132 elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]): 

133 # A single pattern without a beginning slash ('/') will match 

134 # any descendant path. This is equivalent to "**/{pattern}". So, 

135 # prepend with double-asterisks to make pattern relative to 

136 # root. 

137 # EDGE CASE: This also holds for a single pattern with a 

138 # trailing slash (e.g. dir/). 

139 if pattern_segs[0] != '**': 

140 pattern_segs.insert(0, '**') 

141 

142 else: 

143 # EDGE CASE: A pattern without a beginning slash ('/') but 

144 # contains at least one prepended directory (e.g. 

145 # "dir/{pattern}") should not match "**/dir/{pattern}", 

146 # according to `git check-ignore` (v2.4.1). 

147 pass 

148 

149 if not pattern_segs: 

150 # After resolving the edge cases, we end up with no 

151 # pattern at all. This must be because the pattern is 

152 # invalid. 

153 raise GitWildMatchPatternError("Invalid git pattern: %r" % (original_pattern,)) 

154 

155 if not pattern_segs[-1] and len(pattern_segs) > 1: 

156 # A pattern ending with a slash ('/') will match all 

157 # descendant paths if it is a directory but not if it is a 

158 # regular file. This is equivalent to "{pattern}/**". So, set 

159 # last segment to a double-asterisk to include all 

160 # descendants. 

161 pattern_segs[-1] = '**' 

162 

163 if override_regex is None: 

164 # Build regular expression from pattern. 

165 output = ['^'] 

166 need_slash = False 

167 end = len(pattern_segs) - 1 

168 for i, seg in enumerate(pattern_segs): 

169 if seg == '**': 

170 if i == 0 and i == end: 

171 # A pattern consisting solely of double-asterisks ('**') 

172 # will match every path. 

173 output.append('.+') 

174 elif i == 0: 

175 # A normalized pattern beginning with double-asterisks 

176 # ('**') will match any leading path segments. 

177 output.append('(?:.+/)?') 

178 need_slash = False 

179 elif i == end: 

180 # A normalized pattern ending with double-asterisks ('**') 

181 # will match any trailing path segments. 

182 output.append('/.*') 

183 else: 

184 # A pattern with inner double-asterisks ('**') will match 

185 # multiple (or zero) inner path segments. 

186 output.append('(?:/.+)?') 

187 need_slash = True 

188 

189 elif seg == '*': 

190 # Match single path segment. 

191 if need_slash: 

192 output.append('/') 

193 output.append('[^/]+') 

194 need_slash = True 

195 

196 else: 

197 # Match segment glob pattern. 

198 if need_slash: 

199 output.append('/') 

200 

201 output.append(cls._translate_segment_glob(seg)) 

202 if i == end and include is True: 

203 # A pattern ending without a slash ('/') will match a file 

204 # or a directory (with paths underneath it). E.g., "foo" 

205 # matches "foo", "foo/bar", "foo/bar/baz", etc. 

206 # EDGE CASE: However, this does not hold for exclusion cases 

207 # according to `git check-ignore` (v2.4.1). 

208 output.append('(?:/.*)?') 

209 

210 need_slash = True 

211 

212 output.append('$') 

213 regex = ''.join(output) 

214 

215 else: 

216 # Use regex override. 

217 regex = override_regex 

218 

219 else: 

220 # A blank pattern is a null-operation (neither includes nor 

221 # excludes files). 

222 regex = None 

223 include = None 

224 

225 if regex is not None and return_type is bytes: 

226 regex = regex.encode(_BYTES_ENCODING) 

227 

228 return regex, include 

229 

230 @staticmethod 

231 def _translate_segment_glob(pattern): 

232 # type: (Text) -> Text 

233 """ 

234 Translates the glob pattern to a regular expression. This is used in 

235 the constructor to translate a path segment glob pattern to its 

236 corresponding regular expression. 

237 

238 *pattern* (:class:`str`) is the glob pattern. 

239 

240 Returns the regular expression (:class:`str`). 

241 """ 

242 # NOTE: This is derived from `fnmatch.translate()` and is similar to 

243 # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set. 

244 

245 escape = False 

246 regex = '' 

247 i, end = 0, len(pattern) 

248 while i < end: 

249 # Get next character. 

250 char = pattern[i] 

251 i += 1 

252 

253 if escape: 

254 # Escape the character. 

255 escape = False 

256 regex += re.escape(char) 

257 

258 elif char == '\\': 

259 # Escape character, escape next character. 

260 escape = True 

261 

262 elif char == '*': 

263 # Multi-character wildcard. Match any string (except slashes), 

264 # including an empty string. 

265 regex += '[^/]*' 

266 

267 elif char == '?': 

268 # Single-character wildcard. Match any single character (except 

269 # a slash). 

270 regex += '[^/]' 

271 

272 elif char == '[': 

273 # Bracket expression wildcard. Except for the beginning 

274 # exclamation mark, the whole bracket expression can be used 

275 # directly as regex but we have to find where the expression 

276 # ends. 

277 # - "[][!]" matches ']', '[' and '!'. 

278 # - "[]-]" matches ']' and '-'. 

279 # - "[!]a-]" matches any character except ']', 'a' and '-'. 

280 j = i 

281 # Pass brack expression negation. 

282 if j < end and pattern[j] == '!': 

283 j += 1 

284 # Pass first closing bracket if it is at the beginning of the 

285 # expression. 

286 if j < end and pattern[j] == ']': 

287 j += 1 

288 # Find closing bracket. Stop once we reach the end or find it. 

289 while j < end and pattern[j] != ']': 

290 j += 1 

291 

292 if j < end: 

293 # Found end of bracket expression. Increment j to be one past 

294 # the closing bracket: 

295 # 

296 # [...] 

297 # ^ ^ 

298 # i j 

299 # 

300 j += 1 

301 expr = '[' 

302 

303 if pattern[i] == '!': 

304 # Braket expression needs to be negated. 

305 expr += '^' 

306 i += 1 

307 elif pattern[i] == '^': 

308 # POSIX declares that the regex bracket expression negation 

309 # "[^...]" is undefined in a glob pattern. Python's 

310 # `fnmatch.translate()` escapes the caret ('^') as a 

311 # literal. To maintain consistency with undefined behavior, 

312 # I am escaping the '^' as well. 

313 expr += '\\^' 

314 i += 1 

315 

316 # Build regex bracket expression. Escape slashes so they are 

317 # treated as literal slashes by regex as defined by POSIX. 

318 expr += pattern[i:j].replace('\\', '\\\\') 

319 

320 # Add regex bracket expression to regex result. 

321 regex += expr 

322 

323 # Set i to one past the closing bracket. 

324 i = j 

325 

326 else: 

327 # Failed to find closing bracket, treat opening bracket as a 

328 # bracket literal instead of as an expression. 

329 regex += '\\[' 

330 

331 else: 

332 # Regular character, escape it for regex. 

333 regex += re.escape(char) 

334 

335 return regex 

336 

337 @staticmethod 

338 def escape(s): 

339 # type: (AnyStr) -> AnyStr 

340 """ 

341 Escape special characters in the given string. 

342 

343 *s* (:class:`unicode` or :class:`bytes`) a filename or a string 

344 that you want to escape, usually before adding it to a `.gitignore` 

345 

346 Returns the escaped string (:class:`unicode` or :class:`bytes`) 

347 """ 

348 if isinstance(s, unicode): 

349 return_type = unicode 

350 string = s 

351 elif isinstance(s, bytes): 

352 return_type = bytes 

353 string = s.decode(_BYTES_ENCODING) 

354 else: 

355 raise TypeError("s:{!r} is not a unicode or byte string.".format(s)) 

356 

357 # Reference: https://git-scm.com/docs/gitignore#_pattern_format 

358 meta_characters = r"[]!*#?" 

359 

360 out_string = "".join("\\" + x if x in meta_characters else x for x in string) 

361 

362 if return_type is bytes: 

363 return out_string.encode(_BYTES_ENCODING) 

364 else: 

365 return out_string 

366 

367util.register_pattern('gitwildmatch', GitWildMatchPattern) 

368 

369 

370class GitIgnorePattern(GitWildMatchPattern): 

371 """ 

372 The :class:`GitIgnorePattern` class is deprecated by :class:`GitWildMatchPattern`. 

373 This class only exists to maintain compatibility with v0.4. 

374 """ 

375 

376 def __init__(self, *args, **kw): 

377 """ 

378 Warn about deprecation. 

379 """ 

380 self._deprecated() 

381 super(GitIgnorePattern, self).__init__(*args, **kw) 

382 

383 @staticmethod 

384 def _deprecated(): 

385 """ 

386 Warn about deprecation. 

387 """ 

388 warnings.warn("GitIgnorePattern ('gitignore') is deprecated. Use GitWildMatchPattern ('gitwildmatch') instead.", DeprecationWarning, stacklevel=3) 

389 

390 @classmethod 

391 def pattern_to_regex(cls, *args, **kw): 

392 """ 

393 Warn about deprecation. 

394 """ 

395 cls._deprecated() 

396 return super(GitIgnorePattern, cls).pattern_to_regex(*args, **kw) 

397 

398# Register `GitIgnorePattern` as "gitignore" for backward compatibility 

399# with v0.4. 

400util.register_pattern('gitignore', GitIgnorePattern)