1"""
2This module provides common classes for the gitignore patterns.
3"""
4
5import re
6
7from typing import (
8 Literal)
9
10from pathspec.pattern import (
11 RegexPattern)
12from pathspec._typing import (
13 AnyStr, # Removed in 3.18.
14 assert_unreachable)
15
16_BYTES_ENCODING = 'latin1'
17"""
18The encoding to use when parsing a byte string pattern.
19"""
20
21
22class _GitIgnoreBasePattern(RegexPattern):
23 """
24 .. warning:: This class is not part of the public API. It is subject to
25 change.
26
27 The :class:`_GitIgnoreBasePattern` class is the base implementation for a
28 compiled gitignore pattern.
29 """
30
31 # Keep the dict-less class hierarchy.
32 __slots__ = ()
33
34 @staticmethod
35 def escape(s: AnyStr) -> AnyStr:
36 """
37 Escape special characters in the given string.
38
39 *s* (:class:`str` or :class:`bytes`) a filename or a string that you want to
40 escape, usually before adding it to a ".gitignore".
41
42 Returns the escaped string (:class:`str` or :class:`bytes`).
43 """
44 if isinstance(s, str):
45 return_type = str
46 string = s
47 elif isinstance(s, bytes):
48 return_type = bytes
49 string = s.decode(_BYTES_ENCODING)
50 else:
51 raise TypeError(f"s:{s!r} is not a unicode or byte string.")
52
53 # Reference: https://git-scm.com/docs/gitignore#_pattern_format
54 out_string = ''.join((f"\\{x}" if x in '\\[]!*#?' else x) for x in string)
55
56 if return_type is bytes:
57 return out_string.encode(_BYTES_ENCODING)
58 else:
59 return out_string
60
61 @staticmethod
62 def _translate_segment_glob(
63 pattern: str,
64 range_error: Literal['literal', 'raise'],
65 ) -> str:
66 """
67 Translates the glob pattern to a regular expression. This is used in the
68 constructor to translate a path segment glob pattern to its corresponding
69 regular expression.
70
71 *pattern* (:class:`str`) is the glob pattern.
72
73 *range_error* (:class:`int`) is how to handle invalid range notation in the
74 pattern:
75
76 - :data:`"literal"`: Invalid notation will be treated as a literal string.
77
78 - :data:`"raise"`: Invalid notation will cause a :class:`_RangeError` to be
79 raised.
80
81 Returns the regular expression (:class:`str`).
82 """
83 # NOTE: This is derived from `fnmatch.translate()` and is similar to the
84 # POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
85
86 escape = False
87 regex = ''
88 i, end = 0, len(pattern)
89 while i < end:
90 # Get next character.
91 char = pattern[i]
92 i += 1
93
94 if escape:
95 # Escape the character.
96 escape = False
97 regex += re.escape(char)
98
99 elif char == '\\':
100 # Escape character, escape next character.
101 escape = True
102
103 elif char == '*':
104 # Multi-character wildcard. Match any string (except slashes), including
105 # an empty string.
106 regex += '[^/]*'
107
108 elif char == '?':
109 # Single-character wildcard. Match any single character (except a
110 # slash).
111 regex += '[^/]'
112
113 elif char == '[':
114 # Bracket expression (range notation) wildcard. Except for the beginning
115 # exclamation mark, the whole bracket expression can be used directly as
116 # regex, but we have to find where the expression ends.
117 # - "[][!]" matches ']', '[' and '!'.
118 # - "[]-]" matches ']' and '-'.
119 # - "[!]a-]" matches any character except ']', 'a' and '-'.
120 j = i
121
122 # Pass bracket expression negation.
123 if j < end and (pattern[j] == '!' or pattern[j] == '^'):
124 j += 1
125
126 # Pass first closing bracket if it is at the beginning of the
127 # expression.
128 if j < end and pattern[j] == ']':
129 j += 1
130
131 # Find closing bracket. Stop once we reach the end or find it.
132 while j < end and pattern[j] != ']':
133 j += 1
134
135 if j < end:
136 # Found end of bracket expression. Increment j to be one past the
137 # closing bracket:
138 #
139 # [...]
140 # ^ ^
141 # i j
142 #
143 j += 1
144 expr = '['
145
146 if pattern[i] == '!':
147 # Bracket expression needs to be negated.
148 expr += '^'
149 i += 1
150 elif pattern[i] == '^':
151 # POSIX declares that the regex bracket expression negation "[^...]"
152 # is undefined in a glob pattern. Python's `fnmatch.translate()`
153 # escapes the caret ('^') as a literal. Git supports the using a
154 # caret for negation. Maintain consistency with Git because that is
155 # the expected behavior.
156 expr += '^'
157 i += 1
158
159 # Build regex bracket expression. Escape slashes so they are treated
160 # as literal slashes by regex as defined by POSIX.
161 expr += pattern[i:j].replace('\\', '\\\\')
162
163 # Add regex bracket expression to regex result.
164 regex += expr
165
166 # Set i to one past the closing bracket.
167 i = j
168
169 else:
170 # Failed to find closing bracket.
171 if range_error == 'literal':
172 # Treat opening bracket as a bracket literal instead of as an
173 # expression.
174 regex += '\\['
175 elif range_error == 'raise':
176 # Treat invalid range notation as an error.
177 raise _RangeError((
178 f"Invalid range notation={pattern[i:j]!r} found in pattern="
179 f"{pattern!r}."
180 ))
181 else:
182 assert_unreachable(f"{range_error=!r} is invalid.")
183
184 else:
185 # Regular character, escape it for regex.
186 regex += re.escape(char)
187
188 if escape:
189 raise ValueError((
190 f"Escape character found with no next character to escape: {pattern!r}"
191 )) # ValueError
192
193 return regex
194
195
196class GitIgnorePatternError(ValueError):
197 """
198 The :class:`GitIgnorePatternError` class indicates an invalid gitignore
199 pattern.
200 """
201 pass
202
203
204class _RangeError(GitIgnorePatternError):
205 """
206 The :class:`_RangeError` class indicates an invalid range notation was found
207 in a gitignore pattern.
208 """
209 pass