Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pathspec/patterns/gitwildmatch.py: 18%

1# encoding: utf-8

2"""

3This module implements Git's wildmatch pattern matching which itself is

4derived from Rsync's wildmatch. Git uses wildmatch for its ".gitignore"

5files.

6"""

7from __future__ import unicode_literals

9import re

10import warnings

11try:

12 from typing import (

13 AnyStr,

14 Optional,

15 Text,

16 Tuple)

17except ImportError:

18 pass

20from .. import util

21from ..compat import unicode

22from ..pattern import RegexPattern

24#: The encoding to use when parsing a byte string pattern.

25_BYTES_ENCODING = 'latin1'

28class GitWildMatchPatternError(ValueError):

29 """

30 The :class:`GitWildMatchPatternError` indicates an invalid git wild match

31 pattern.

32 """

33 pass

36class GitWildMatchPattern(RegexPattern):

37 """

38 The :class:`GitWildMatchPattern` class represents a compiled Git

39 wildmatch pattern.

40 """

42 # Keep the dict-less class hierarchy.

43 __slots__ = ()

45 @classmethod

46 def pattern_to_regex(cls, pattern):

47 # type: (AnyStr) -> Tuple[Optional[AnyStr], Optional[bool]]

48 """

49 Convert the pattern into a regular expression.

51 *pattern* (:class:`unicode` or :class:`bytes`) is the pattern to

52 convert into a regular expression.

54 Returns the uncompiled regular expression (:class:`unicode`, :class:`bytes`,

55 or :data:`None`), and whether matched files should be included

56 (:data:`True`), excluded (:data:`False`), or if it is a

57 null-operation (:data:`None`).

58 """

59 if isinstance(pattern, unicode):

60 return_type = unicode

61 elif isinstance(pattern, bytes):

62 return_type = bytes

63 pattern = pattern.decode(_BYTES_ENCODING)

64 else:

65 raise TypeError("pattern:{!r} is not a unicode or byte string.".format(pattern))

67 original_pattern = pattern

68 pattern = pattern.strip()

70 if pattern.startswith('#'):

71 # A pattern starting with a hash ('#') serves as a comment

72 # (neither includes nor excludes files). Escape the hash with a

73 # back-slash to match a literal hash (i.e., '\#').

74 regex = None

75 include = None

77 elif pattern == '/':

78 # EDGE CASE: According to `git check-ignore` (v2.4.1), a single

79 # '/' does not match any file.

80 regex = None

81 include = None

83 elif pattern:

84 if pattern.startswith('!'):

85 # A pattern starting with an exclamation mark ('!') negates the

86 # pattern (exclude instead of include). Escape the exclamation

87 # mark with a back-slash to match a literal exclamation mark

88 # (i.e., '\!').

89 include = False

90 # Remove leading exclamation mark.

91 pattern = pattern[1:]

92 else:

93 include = True

95 if pattern.startswith('\\'):

96 # Remove leading back-slash escape for escaped hash ('#') or

97 # exclamation mark ('!').

98 pattern = pattern[1:]

100 # Allow a regex override for edge cases that cannot be handled

101 # through normalization.

102 override_regex = None

103

104 # Split pattern into segments.

105 pattern_segs = pattern.split('/')

106

107 # Normalize pattern to make processing easier.

108

109 # EDGE CASE: Deal with duplicate double-asterisk sequences.

110 # Collapse each sequence down to one double-asterisk. Iterate over

111 # the segments in reverse and remove the duplicate double

112 # asterisks as we go.

113 for i in range(len(pattern_segs) - 1, 0, -1):

114 prev = pattern_segs[i-1]

115 seg = pattern_segs[i]

116 if prev == '**' and seg == '**':

117 del pattern_segs[i]

118

119 if len(pattern_segs) == 2 and pattern_segs[0] == '**' and not pattern_segs[1]:

120 # EDGE CASE: The '**/' pattern should match everything except

121 # individual files in the root directory. This case cannot be

122 # adequately handled through normalization. Use the override.

123 override_regex = '^.+/.*$'

124

125 if not pattern_segs[0]:

126 # A pattern beginning with a slash ('/') will only match paths

127 # directly on the root directory instead of any descendant

128 # paths. So, remove empty first segment to make pattern relative

129 # to root.

130 del pattern_segs[0]

131

132 elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):

133 # A single pattern without a beginning slash ('/') will match

134 # any descendant path. This is equivalent to "**/{pattern}". So,

135 # prepend with double-asterisks to make pattern relative to

136 # root.

137 # EDGE CASE: This also holds for a single pattern with a

138 # trailing slash (e.g. dir/).

139 if pattern_segs[0] != '**':

140 pattern_segs.insert(0, '**')

141

142 else:

143 # EDGE CASE: A pattern without a beginning slash ('/') but

144 # contains at least one prepended directory (e.g.

145 # "dir/{pattern}") should not match "**/dir/{pattern}",

146 # according to `git check-ignore` (v2.4.1).

147 pass

148

149 if not pattern_segs:

150 # After resolving the edge cases, we end up with no

151 # pattern at all. This must be because the pattern is

152 # invalid.

153 raise GitWildMatchPatternError("Invalid git pattern: %r" % (original_pattern,))

154

155 if not pattern_segs[-1] and len(pattern_segs) > 1:

156 # A pattern ending with a slash ('/') will match all

157 # descendant paths if it is a directory but not if it is a

158 # regular file. This is equivalent to "{pattern}/**". So, set

159 # last segment to a double-asterisk to include all

160 # descendants.

161 pattern_segs[-1] = '**'

162

163 if override_regex is None:

164 # Build regular expression from pattern.

165 output = ['^']

166 need_slash = False

167 end = len(pattern_segs) - 1

168 for i, seg in enumerate(pattern_segs):

169 if seg == '**':

170 if i == 0 and i == end:

171 # A pattern consisting solely of double-asterisks ('**')

172 # will match every path.

173 output.append('.+')

174 elif i == 0:

175 # A normalized pattern beginning with double-asterisks

176 # ('**') will match any leading path segments.

177 output.append('(?:.+/)?')

178 need_slash = False

179 elif i == end:

180 # A normalized pattern ending with double-asterisks ('**')

181 # will match any trailing path segments.

182 output.append('/.*')

183 else:

184 # A pattern with inner double-asterisks ('**') will match

185 # multiple (or zero) inner path segments.

186 output.append('(?:/.+)?')

187 need_slash = True

188

189 elif seg == '*':

190 # Match single path segment.

191 if need_slash:

192 output.append('/')

193 output.append('[^/]+')

194 need_slash = True

195

196 else:

197 # Match segment glob pattern.

198 if need_slash:

199 output.append('/')

200

201 output.append(cls._translate_segment_glob(seg))

202 if i == end and include is True:

203 # A pattern ending without a slash ('/') will match a file

204 # or a directory (with paths underneath it). E.g., "foo"

205 # matches "foo", "foo/bar", "foo/bar/baz", etc.

206 # EDGE CASE: However, this does not hold for exclusion cases

207 # according to `git check-ignore` (v2.4.1).

208 output.append('(?:/.*)?')

209

210 need_slash = True

211

212 output.append('$')

213 regex = ''.join(output)

214

215 else:

216 # Use regex override.

217 regex = override_regex

218

219 else:

220 # A blank pattern is a null-operation (neither includes nor

221 # excludes files).

222 regex = None

223 include = None

224

225 if regex is not None and return_type is bytes:

226 regex = regex.encode(_BYTES_ENCODING)

227

228 return regex, include

229

230 @staticmethod

231 def _translate_segment_glob(pattern):

232 # type: (Text) -> Text

233 """

234 Translates the glob pattern to a regular expression. This is used in

235 the constructor to translate a path segment glob pattern to its

236 corresponding regular expression.

237

238 *pattern* (:class:`str`) is the glob pattern.

239

240 Returns the regular expression (:class:`str`).

241 """

242 # NOTE: This is derived from `fnmatch.translate()` and is similar to

243 # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.

244

245 escape = False

246 regex = ''

247 i, end = 0, len(pattern)

248 while i < end:

249 # Get next character.

250 char = pattern[i]

251 i += 1

252

253 if escape:

254 # Escape the character.

255 escape = False

256 regex += re.escape(char)

257

258 elif char == '\\':

259 # Escape character, escape next character.

260 escape = True

261

262 elif char == '*':

263 # Multi-character wildcard. Match any string (except slashes),

264 # including an empty string.

265 regex += '[^/]*'

266

267 elif char == '?':

268 # Single-character wildcard. Match any single character (except

269 # a slash).

270 regex += '[^/]'

271

272 elif char == '[':

273 # Bracket expression wildcard. Except for the beginning

274 # exclamation mark, the whole bracket expression can be used

275 # directly as regex but we have to find where the expression

276 # ends.

277 # - "[][!]" matches ']', '[' and '!'.

278 # - "[]-]" matches ']' and '-'.

279 # - "[!]a-]" matches any character except ']', 'a' and '-'.

280 j = i

281 # Pass brack expression negation.

282 if j < end and pattern[j] == '!':

283 j += 1

284 # Pass first closing bracket if it is at the beginning of the

285 # expression.

286 if j < end and pattern[j] == ']':

287 j += 1

288 # Find closing bracket. Stop once we reach the end or find it.

289 while j < end and pattern[j] != ']':

290 j += 1

291

292 if j < end:

293 # Found end of bracket expression. Increment j to be one past

294 # the closing bracket:

295 #

296 # [...]

297 # ^ ^

298 # i j

299 #

300 j += 1

301 expr = '['

302

303 if pattern[i] == '!':

304 # Braket expression needs to be negated.

305 expr += '^'

306 i += 1

307 elif pattern[i] == '^':

308 # POSIX declares that the regex bracket expression negation

309 # "[^...]" is undefined in a glob pattern. Python's

310 # `fnmatch.translate()` escapes the caret ('^') as a

311 # literal. To maintain consistency with undefined behavior,

312 # I am escaping the '^' as well.

313 expr += '\\^'

314 i += 1

315

316 # Build regex bracket expression. Escape slashes so they are

317 # treated as literal slashes by regex as defined by POSIX.

318 expr += pattern[i:j].replace('\\', '\\\\')

319

320 # Add regex bracket expression to regex result.

321 regex += expr

322

323 # Set i to one past the closing bracket.

324 i = j

325

326 else:

327 # Failed to find closing bracket, treat opening bracket as a

328 # bracket literal instead of as an expression.

329 regex += '\\['

330

331 else:

332 # Regular character, escape it for regex.

333 regex += re.escape(char)

334

335 return regex

336

337 @staticmethod

338 def escape(s):

339 # type: (AnyStr) -> AnyStr

340 """

341 Escape special characters in the given string.

342

343 *s* (:class:`unicode` or :class:`bytes`) a filename or a string

344 that you want to escape, usually before adding it to a `.gitignore`

345

346 Returns the escaped string (:class:`unicode` or :class:`bytes`)

347 """

348 if isinstance(s, unicode):

349 return_type = unicode

350 string = s

351 elif isinstance(s, bytes):

352 return_type = bytes

353 string = s.decode(_BYTES_ENCODING)

354 else:

355 raise TypeError("s:{!r} is not a unicode or byte string.".format(s))

356

357 # Reference: https://git-scm.com/docs/gitignore#_pattern_format

358 meta_characters = r"[]!*#?"

359

360 out_string = "".join("\\" + x if x in meta_characters else x for x in string)

361

362 if return_type is bytes:

363 return out_string.encode(_BYTES_ENCODING)

364 else:

365 return out_string

366

367util.register_pattern('gitwildmatch', GitWildMatchPattern)

368

369

370class GitIgnorePattern(GitWildMatchPattern):

371 """

372 The :class:`GitIgnorePattern` class is deprecated by :class:`GitWildMatchPattern`.

373 This class only exists to maintain compatibility with v0.4.

374 """

375

376 def __init__(self, *args, **kw):

377 """

378 Warn about deprecation.

379 """

380 self._deprecated()

381 super(GitIgnorePattern, self).__init__(*args, **kw)

382

383 @staticmethod

384 def _deprecated():

385 """

386 Warn about deprecation.

387 """

388 warnings.warn("GitIgnorePattern ('gitignore') is deprecated. Use GitWildMatchPattern ('gitwildmatch') instead.", DeprecationWarning, stacklevel=3)

389

390 @classmethod

391 def pattern_to_regex(cls, *args, **kw):

392 """

393 Warn about deprecation.

394 """

395 cls._deprecated()

396 return super(GitIgnorePattern, cls).pattern_to_regex(*args, **kw)

397

398# Register `GitIgnorePattern` as "gitignore" for backward compatibility

399# with v0.4.

400util.register_pattern('gitignore', GitIgnorePattern)