Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pathspec/patterns/gitwildmatch.py: 16%

1"""

2This module implements Git's wildmatch pattern matching which itself is

3derived from Rsync's wildmatch. Git uses wildmatch for its ".gitignore"

4files.

5"""

7import re

8import warnings

9from typing import (

10 AnyStr,

11 Optional,

12 Tuple)

14from .. import util

15from ..pattern import RegexPattern

17_BYTES_ENCODING = 'latin1'

18"""

19The encoding to use when parsing a byte string pattern.

20"""

22_DIR_MARK = 'ps_d'

23"""

24The regex group name for the directory marker. This is only used by

25:class:`GitIgnoreSpec`.

26"""

29class GitWildMatchPatternError(ValueError):

30 """

31 The :class:`GitWildMatchPatternError` indicates an invalid git wild match

32 pattern.

33 """

34 pass

37class GitWildMatchPattern(RegexPattern):

38 """

39 The :class:`GitWildMatchPattern` class represents a compiled Git

40 wildmatch pattern.

41 """

43 # Keep the dict-less class hierarchy.

44 __slots__ = ()

46 @classmethod

47 def pattern_to_regex(

48 cls,

49 pattern: AnyStr,

50 ) -> Tuple[Optional[AnyStr], Optional[bool]]:

51 """

52 Convert the pattern into a regular expression.

54 *pattern* (:class:`str` or :class:`bytes`) is the pattern to convert

55 into a regular expression.

57 Returns the uncompiled regular expression (:class:`str`, :class:`bytes`,

58 or :data:`None`); and whether matched files should be included

59 (:data:`True`), excluded (:data:`False`), or if it is a

60 null-operation (:data:`None`).

61 """

62 if isinstance(pattern, str):

63 return_type = str

64 elif isinstance(pattern, bytes):

65 return_type = bytes

66 pattern = pattern.decode(_BYTES_ENCODING)

67 else:

68 raise TypeError(f"pattern:{pattern!r} is not a unicode or byte string.")

70 original_pattern = pattern

71 pattern = pattern.strip()

73 if pattern.startswith('#'):

74 # A pattern starting with a hash ('#') serves as a comment

75 # (neither includes nor excludes files). Escape the hash with a

76 # back-slash to match a literal hash (i.e., '\#').

77 regex = None

78 include = None

80 elif pattern == '/':

81 # EDGE CASE: According to `git check-ignore` (v2.4.1), a single

82 # '/' does not match any file.

83 regex = None

84 include = None

86 elif pattern:

87 if pattern.startswith('!'):

88 # A pattern starting with an exclamation mark ('!') negates the

89 # pattern (exclude instead of include). Escape the exclamation

90 # mark with a back-slash to match a literal exclamation mark

91 # (i.e., '\!').

92 include = False

93 # Remove leading exclamation mark.

94 pattern = pattern[1:]

95 else:

96 include = True

98 # Allow a regex override for edge cases that cannot be handled

99 # through normalization.

100 override_regex = None

101

102 # Split pattern into segments.

103 pattern_segs = pattern.split('/')

104

105 # Normalize pattern to make processing easier.

106

107 # EDGE CASE: Deal with duplicate double-asterisk sequences.

108 # Collapse each sequence down to one double-asterisk. Iterate over

109 # the segments in reverse and remove the duplicate double

110 # asterisks as we go.

111 for i in range(len(pattern_segs) - 1, 0, -1):

112 prev = pattern_segs[i-1]

113 seg = pattern_segs[i]

114 if prev == '**' and seg == '**':

115 del pattern_segs[i]

116

117 if len(pattern_segs) == 2 and pattern_segs[0] == '**' and not pattern_segs[1]:

118 # EDGE CASE: The '**/' pattern should match everything except

119 # individual files in the root directory. This case cannot be

120 # adequately handled through normalization. Use the override.

121 override_regex = f'^.+(?P<{_DIR_MARK}>/).*$'

122

123 if not pattern_segs[0]:

124 # A pattern beginning with a slash ('/') will only match paths

125 # directly on the root directory instead of any descendant

126 # paths. So, remove empty first segment to make pattern relative

127 # to root.

128 del pattern_segs[0]

129

130 elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):

131 # A single pattern without a beginning slash ('/') will match

132 # any descendant path. This is equivalent to "**/{pattern}". So,

133 # prepend with double-asterisks to make pattern relative to

134 # root.

135 # EDGE CASE: This also holds for a single pattern with a

136 # trailing slash (e.g. dir/).

137 if pattern_segs[0] != '**':

138 pattern_segs.insert(0, '**')

139

140 else:

141 # EDGE CASE: A pattern without a beginning slash ('/') but

142 # contains at least one prepended directory (e.g.

143 # "dir/{pattern}") should not match "**/dir/{pattern}",

144 # according to `git check-ignore` (v2.4.1).

145 pass

146

147 if not pattern_segs:

148 # After resolving the edge cases, we end up with no pattern at

149 # all. This must be because the pattern is invalid.

150 raise GitWildMatchPatternError(f"Invalid git pattern: {original_pattern!r}")

151

152 if not pattern_segs[-1] and len(pattern_segs) > 1:

153 # A pattern ending with a slash ('/') will match all descendant

154 # paths if it is a directory but not if it is a regular file.

155 # This is equivalent to "{pattern}/**". So, set last segment to

156 # a double-asterisk to include all descendants.

157 pattern_segs[-1] = '**'

158

159 if override_regex is None:

160 # Build regular expression from pattern.

161 output = ['^']

162 need_slash = False

163 end = len(pattern_segs) - 1

164 for i, seg in enumerate(pattern_segs):

165 if seg == '**':

166 if i == 0 and i == end:

167 # A pattern consisting solely of double-asterisks ('**')

168 # will match every path.

169 output.append(f'[^/]+(?:(?P<{_DIR_MARK}>/).*)?')

170 elif i == 0:

171 # A normalized pattern beginning with double-asterisks

172 # ('**') will match any leading path segments.

173 output.append('(?:.+/)?')

174 need_slash = False

175 elif i == end:

176 # A normalized pattern ending with double-asterisks ('**')

177 # will match any trailing path segments.

178 output.append(f'(?P<{_DIR_MARK}>/).*')

179 else:

180 # A pattern with inner double-asterisks ('**') will match

181 # multiple (or zero) inner path segments.

182 output.append('(?:/.+)?')

183 need_slash = True

184

185 elif seg == '*':

186 # Match single path segment.

187 if need_slash:

188 output.append('/')

189

190 output.append('[^/]+')

191

192 if i == end:

193 # A pattern ending without a slash ('/') will match a file

194 # or a directory (with paths underneath it). E.g., "foo"

195 # matches "foo", "foo/bar", "foo/bar/baz", etc.

196 output.append(f'(?:(?P<{_DIR_MARK}>/).*)?')

197

198 need_slash = True

199

200 else:

201 # Match segment glob pattern.

202 if need_slash:

203 output.append('/')

204

205 try:

206 output.append(cls._translate_segment_glob(seg))

207 except ValueError as e:

208 raise GitWildMatchPatternError(f"Invalid git pattern: {original_pattern!r}") from e

209

210 if i == end:

211 # A pattern ending without a slash ('/') will match a file

212 # or a directory (with paths underneath it). E.g., "foo"

213 # matches "foo", "foo/bar", "foo/bar/baz", etc.

214 output.append(f'(?:(?P<{_DIR_MARK}>/).*)?')

215

216 need_slash = True

217

218 output.append('$')

219 regex = ''.join(output)

220

221 else:

222 # Use regex override.

223 regex = override_regex

224

225 else:

226 # A blank pattern is a null-operation (neither includes nor

227 # excludes files).

228 regex = None

229 include = None

230

231 if regex is not None and return_type is bytes:

232 regex = regex.encode(_BYTES_ENCODING)

233

234 return regex, include

235

236 @staticmethod

237 def _translate_segment_glob(pattern: str) -> str:

238 """

239 Translates the glob pattern to a regular expression. This is used in

240 the constructor to translate a path segment glob pattern to its

241 corresponding regular expression.

242

243 *pattern* (:class:`str`) is the glob pattern.

244

245 Returns the regular expression (:class:`str`).

246 """

247 # NOTE: This is derived from `fnmatch.translate()` and is similar to

248 # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.

249

250 escape = False

251 regex = ''

252 i, end = 0, len(pattern)

253 while i < end:

254 # Get next character.

255 char = pattern[i]

256 i += 1

257

258 if escape:

259 # Escape the character.

260 escape = False

261 regex += re.escape(char)

262

263 elif char == '\\':

264 # Escape character, escape next character.

265 escape = True

266

267 elif char == '*':

268 # Multi-character wildcard. Match any string (except slashes),

269 # including an empty string.

270 regex += '[^/]*'

271

272 elif char == '?':

273 # Single-character wildcard. Match any single character (except

274 # a slash).

275 regex += '[^/]'

276

277 elif char == '[':

278 # Bracket expression wildcard. Except for the beginning

279 # exclamation mark, the whole bracket expression can be used

280 # directly as regex but we have to find where the expression

281 # ends.

282 # - "[][!]" matches ']', '[' and '!'.

283 # - "[]-]" matches ']' and '-'.

284 # - "[!]a-]" matches any character except ']', 'a' and '-'.

285 j = i

286 # Pass brack expression negation.

287 if j < end and pattern[j] == '!':

288 j += 1

289 # Pass first closing bracket if it is at the beginning of the

290 # expression.

291 if j < end and pattern[j] == ']':

292 j += 1

293 # Find closing bracket. Stop once we reach the end or find it.

294 while j < end and pattern[j] != ']':

295 j += 1

296

297 if j < end:

298 # Found end of bracket expression. Increment j to be one past

299 # the closing bracket:

300 #

301 # [...]

302 # ^ ^

303 # i j

304 #

305 j += 1

306 expr = '['

307

308 if pattern[i] == '!':

309 # Braket expression needs to be negated.

310 expr += '^'

311 i += 1

312 elif pattern[i] == '^':

313 # POSIX declares that the regex bracket expression negation

314 # "[^...]" is undefined in a glob pattern. Python's

315 # `fnmatch.translate()` escapes the caret ('^') as a

316 # literal. To maintain consistency with undefined behavior,

317 # I am escaping the '^' as well.

318 expr += '\\^'

319 i += 1

320

321 # Build regex bracket expression. Escape slashes so they are

322 # treated as literal slashes by regex as defined by POSIX.

323 expr += pattern[i:j].replace('\\', '\\\\')

324

325 # Add regex bracket expression to regex result.

326 regex += expr

327

328 # Set i to one past the closing bracket.

329 i = j

330

331 else:

332 # Failed to find closing bracket, treat opening bracket as a

333 # bracket literal instead of as an expression.

334 regex += '\\['

335

336 else:

337 # Regular character, escape it for regex.

338 regex += re.escape(char)

339

340 if escape:

341 raise ValueError(f"Escape character found with no next character to escape: {pattern!r}")

342

343 return regex

344

345 @staticmethod

346 def escape(s: AnyStr) -> AnyStr:

347 """

348 Escape special characters in the given string.

349

350 *s* (:class:`str` or :class:`bytes`) a filename or a string that you

351 want to escape, usually before adding it to a ".gitignore".

352

353 Returns the escaped string (:class:`str` or :class:`bytes`).

354 """

355 if isinstance(s, str):

356 return_type = str

357 string = s

358 elif isinstance(s, bytes):

359 return_type = bytes

360 string = s.decode(_BYTES_ENCODING)

361 else:

362 raise TypeError(f"s:{s!r} is not a unicode or byte string.")

363

364 # Reference: https://git-scm.com/docs/gitignore#_pattern_format

365 meta_characters = r"[]!*#?"

366

367 out_string = "".join("\\" + x if x in meta_characters else x for x in string)

368

369 if return_type is bytes:

370 return out_string.encode(_BYTES_ENCODING)

371 else:

372 return out_string

373

374util.register_pattern('gitwildmatch', GitWildMatchPattern)

375

376

377class GitIgnorePattern(GitWildMatchPattern):

378 """

379 The :class:`GitIgnorePattern` class is deprecated by :class:`GitWildMatchPattern`.

380 This class only exists to maintain compatibility with v0.4.

381 """

382

383 def __init__(self, *args, **kw) -> None:

384 """

385 Warn about deprecation.

386 """

387 self._deprecated()

388 super(GitIgnorePattern, self).__init__(*args, **kw)

389

390 @staticmethod

391 def _deprecated() -> None:

392 """

393 Warn about deprecation.

394 """

395 warnings.warn((

396 "GitIgnorePattern ('gitignore') is deprecated. Use "

397 "GitWildMatchPattern ('gitwildmatch') instead."

398 ), DeprecationWarning, stacklevel=3)

399

400 @classmethod

401 def pattern_to_regex(cls, *args, **kw):

402 """

403 Warn about deprecation.

404 """

405 cls._deprecated()

406 return super(GitIgnorePattern, cls).pattern_to_regex(*args, **kw)

407

408# Register `GitIgnorePattern` as "gitignore" for backward compatibility

409# with v0.4.

410util.register_pattern('gitignore', GitIgnorePattern)