1"""
2This module provides common classes for the gitignore patterns.
3"""
4
5import re
6
7from pathspec.pattern import (
8 RegexPattern)
9from pathspec._typing import (
10 AnyStr) # Removed in 3.18.
11
12_BYTES_ENCODING = 'latin1'
13"""
14The encoding to use when parsing a byte string pattern.
15"""
16
17
18class _GitIgnoreBasePattern(RegexPattern):
19 """
20 .. warning:: This class is not part of the public API. It is subject to
21 change.
22
23 The :class:`_GitIgnoreBasePattern` class is the base implementation for a
24 compiled gitignore pattern.
25 """
26
27 # Keep the dict-less class hierarchy.
28 __slots__ = ()
29
30 @staticmethod
31 def escape(s: AnyStr) -> AnyStr:
32 """
33 Escape special characters in the given string.
34
35 *s* (:class:`str` or :class:`bytes`) a filename or a string that you want to
36 escape, usually before adding it to a ".gitignore".
37
38 Returns the escaped string (:class:`str` or :class:`bytes`).
39 """
40 if isinstance(s, str):
41 return_type = str
42 string = s
43 elif isinstance(s, bytes):
44 return_type = bytes
45 string = s.decode(_BYTES_ENCODING)
46 else:
47 raise TypeError(f"s:{s!r} is not a unicode or byte string.")
48
49 # Reference: https://git-scm.com/docs/gitignore#_pattern_format
50 out_string = ''.join((f"\\{x}" if x in '[]!*#?' else x) for x in string)
51
52 if return_type is bytes:
53 return out_string.encode(_BYTES_ENCODING)
54 else:
55 return out_string
56
57 @staticmethod
58 def _translate_segment_glob(pattern: str) -> str:
59 """
60 Translates the glob pattern to a regular expression. This is used in the
61 constructor to translate a path segment glob pattern to its corresponding
62 regular expression.
63
64 *pattern* (:class:`str`) is the glob pattern.
65
66 Returns the regular expression (:class:`str`).
67 """
68 # NOTE: This is derived from `fnmatch.translate()` and is similar to the
69 # POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
70
71 escape = False
72 regex = ''
73 i, end = 0, len(pattern)
74 while i < end:
75 # Get next character.
76 char = pattern[i]
77 i += 1
78
79 if escape:
80 # Escape the character.
81 escape = False
82 regex += re.escape(char)
83
84 elif char == '\\':
85 # Escape character, escape next character.
86 escape = True
87
88 elif char == '*':
89 # Multi-character wildcard. Match any string (except slashes), including
90 # an empty string.
91 regex += '[^/]*'
92
93 elif char == '?':
94 # Single-character wildcard. Match any single character (except a
95 # slash).
96 regex += '[^/]'
97
98 elif char == '[':
99 # Bracket expression wildcard. Except for the beginning exclamation
100 # mark, the whole bracket expression can be used directly as regex, but
101 # we have to find where the expression ends.
102 # - "[][!]" matches ']', '[' and '!'.
103 # - "[]-]" matches ']' and '-'.
104 # - "[!]a-]" matches any character except ']', 'a' and '-'.
105 j = i
106
107 # Pass bracket expression negation.
108 if j < end and (pattern[j] == '!' or pattern[j] == '^'):
109 j += 1
110
111 # Pass first closing bracket if it is at the beginning of the
112 # expression.
113 if j < end and pattern[j] == ']':
114 j += 1
115
116 # Find closing bracket. Stop once we reach the end or find it.
117 while j < end and pattern[j] != ']':
118 j += 1
119
120 if j < end:
121 # Found end of bracket expression. Increment j to be one past the
122 # closing bracket:
123 #
124 # [...]
125 # ^ ^
126 # i j
127 #
128 j += 1
129 expr = '['
130
131 if pattern[i] == '!':
132 # Bracket expression needs to be negated.
133 expr += '^'
134 i += 1
135 elif pattern[i] == '^':
136 # POSIX declares that the regex bracket expression negation "[^...]"
137 # is undefined in a glob pattern. Python's `fnmatch.translate()`
138 # escapes the caret ('^') as a literal. Git supports the using a
139 # caret for negation. Maintain consistency with Git because that is
140 # the expected behavior.
141 expr += '^'
142 i += 1
143
144 # Build regex bracket expression. Escape slashes so they are treated
145 # as literal slashes by regex as defined by POSIX.
146 expr += pattern[i:j].replace('\\', '\\\\')
147
148 # Add regex bracket expression to regex result.
149 regex += expr
150
151 # Set i to one past the closing bracket.
152 i = j
153
154 else:
155 # Failed to find closing bracket, treat opening bracket as a bracket
156 # literal instead of as an expression.
157 regex += '\\['
158
159 else:
160 # Regular character, escape it for regex.
161 regex += re.escape(char)
162
163 if escape:
164 raise ValueError((
165 f"Escape character found with no next character to escape: {pattern!r}"
166 )) # ValueError
167
168 return regex
169
170
171class GitIgnorePatternError(ValueError):
172 """
173 The :class:`GitIgnorePatternError` class indicates an invalid gitignore
174 pattern.
175 """
176 pass