1"""
2This module provides common classes for the gitignore patterns.
3"""
4
5import re
6
7from typing import (
8 Literal)
9
10from pathspec.pattern import (
11 RegexPattern)
12from pathspec._typing import (
13 AnyStr, # Removed in 3.18.
14 assert_unreachable)
15
16_BYTES_ENCODING = 'latin1'
17"""
18The encoding to use when parsing a byte string pattern.
19"""
20
21
22class _GitIgnoreBasePattern(RegexPattern):
23 """
24 .. warning:: This class is not part of the public API. It is subject to
25 change.
26
27 The :class:`_GitIgnoreBasePattern` class is the base implementation for a
28 compiled gitignore pattern.
29 """
30
31 # Keep the dict-less class hierarchy.
32 __slots__ = ()
33
34 @staticmethod
35 def escape(s: AnyStr) -> AnyStr:
36 """
37 Escape special characters in the given string.
38
39 *s* (:class:`str` or :class:`bytes`) a filename or a string that you want to
40 escape, usually before adding it to a ".gitignore".
41
42 Returns the escaped string (:class:`str` or :class:`bytes`).
43 """
44 if isinstance(s, str):
45 return_type = str
46 string = s
47 elif isinstance(s, bytes):
48 return_type = bytes
49 string = s.decode(_BYTES_ENCODING)
50 else:
51 raise TypeError(f"s:{s!r} is not a unicode or byte string.")
52
53 # Reference: https://git-scm.com/docs/gitignore#_pattern_format
54 out_string = ''.join((f"\\{x}" if x in '\\[]!*#?' else x) for x in string)
55
56 if return_type is bytes:
57 out_bytes = out_string.encode(_BYTES_ENCODING)
58 return out_bytes # type: ignore[return-value]
59 else:
60 return out_string # type: ignore[return-value]
61
62 @staticmethod
63 def _translate_segment_glob(
64 pattern: str,
65 range_error: Literal['literal', 'raise'],
66 ) -> str:
67 """
68 Translates the glob pattern to a regular expression. This is used in the
69 constructor to translate a path segment glob pattern to its corresponding
70 regular expression.
71
72 *pattern* (:class:`str`) is the glob pattern.
73
74 *range_error* (:class:`int`) is how to handle invalid range notation in the
75 pattern:
76
77 - :data:`"literal"`: Invalid notation will be treated as a literal string.
78
79 - :data:`"raise"`: Invalid notation will cause a :class:`_RangeError` to be
80 raised.
81
82 Returns the regular expression (:class:`str`).
83 """
84 # NOTE: This is derived from `fnmatch.translate()` and is similar to the
85 # POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
86
87 escape = False
88 regex = ''
89 i, end = 0, len(pattern)
90 while i < end:
91 # Get next character.
92 char = pattern[i]
93 i += 1
94
95 if escape:
96 # Escape the character.
97 escape = False
98 regex += re.escape(char)
99
100 elif char == '\\':
101 # Escape character, escape next character.
102 escape = True
103
104 elif char == '*':
105 # Multi-character wildcard. Match any string (except slashes), including
106 # an empty string.
107 regex += '[^/]*'
108
109 elif char == '?':
110 # Single-character wildcard. Match any single character (except a
111 # slash).
112 regex += '[^/]'
113
114 elif char == '[':
115 # Bracket expression (range notation) wildcard. Except for the beginning
116 # exclamation mark, the whole bracket expression can be used directly as
117 # regex, but we have to find where the expression ends.
118 # - "[][!]" matches ']', '[' and '!'.
119 # - "[]-]" matches ']' and '-'.
120 # - "[!]a-]" matches any character except ']', 'a' and '-'.
121 j = i
122
123 # Pass bracket expression negation.
124 if j < end and (pattern[j] == '!' or pattern[j] == '^'):
125 j += 1
126
127 # Pass first closing bracket if it is at the beginning of the
128 # expression.
129 if j < end and pattern[j] == ']':
130 j += 1
131
132 # Find closing bracket. Stop once we reach the end or find it.
133 while j < end and pattern[j] != ']':
134 j += 1
135
136 if j < end:
137 # Found end of bracket expression. Increment j to be one past the
138 # closing bracket:
139 #
140 # [...]
141 # ^ ^
142 # i j
143 #
144 j += 1
145 expr = '['
146
147 if pattern[i] == '!':
148 # Bracket expression needs to be negated.
149 expr += '^'
150 i += 1
151 elif pattern[i] == '^':
152 # POSIX declares that the regex bracket expression negation "[^...]"
153 # is undefined in a glob pattern. Python's `fnmatch.translate()`
154 # escapes the caret ('^') as a literal. Git supports the using a
155 # caret for negation. Maintain consistency with Git because that is
156 # the expected behavior.
157 expr += '^'
158 i += 1
159
160 # Build regex bracket expression. Escape slashes so they are treated
161 # as literal slashes by regex as defined by POSIX.
162 expr += pattern[i:j].replace('\\', '\\\\')
163
164 # Add regex bracket expression to regex result.
165 regex += expr
166
167 # Set i to one past the closing bracket.
168 i = j
169
170 else:
171 # Failed to find closing bracket.
172 if range_error == 'literal':
173 # Treat opening bracket as a bracket literal instead of as an
174 # expression.
175 regex += '\\['
176 elif range_error == 'raise':
177 # Treat invalid range notation as an error.
178 raise _RangeError((
179 f"Invalid range notation={pattern[i:j]!r} found in pattern="
180 f"{pattern!r}."
181 ))
182 else:
183 assert_unreachable(f"{range_error=!r} is invalid.")
184
185 else:
186 # Regular character, escape it for regex.
187 regex += re.escape(char)
188
189 if escape:
190 raise ValueError((
191 f"Escape character found with no next character to escape: {pattern!r}"
192 )) # ValueError
193
194 return regex
195
196
197class GitIgnorePatternError(ValueError):
198 """
199 The :class:`GitIgnorePatternError` class indicates an invalid gitignore
200 pattern.
201 """
202 pass
203
204
205class _RangeError(GitIgnorePatternError):
206 """
207 The :class:`_RangeError` class indicates an invalid range notation was found
208 in a gitignore pattern.
209 """
210 pass