Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathspec/patterns/gitignore/base.py: 82%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

78 statements  

1""" 

2This module provides common classes for the gitignore patterns. 

3""" 

4 

5import re 

6 

7from typing import ( 

8 Literal) 

9 

10from pathspec.pattern import ( 

11 RegexPattern) 

12from pathspec._typing import ( 

13 AnyStr, # Removed in 3.18. 

14 assert_unreachable) 

15 

16_BYTES_ENCODING = 'latin1' 

17""" 

18The encoding to use when parsing a byte string pattern. 

19""" 

20 

21 

22class _GitIgnoreBasePattern(RegexPattern): 

23 """ 

24 .. warning:: This class is not part of the public API. It is subject to 

25 change. 

26 

27 The :class:`_GitIgnoreBasePattern` class is the base implementation for a 

28 compiled gitignore pattern. 

29 """ 

30 

31 # Keep the dict-less class hierarchy. 

32 __slots__ = () 

33 

34 @staticmethod 

35 def escape(s: AnyStr) -> AnyStr: 

36 """ 

37 Escape special characters in the given string. 

38 

39 *s* (:class:`str` or :class:`bytes`) a filename or a string that you want to 

40 escape, usually before adding it to a ".gitignore". 

41 

42 Returns the escaped string (:class:`str` or :class:`bytes`). 

43 """ 

44 if isinstance(s, str): 

45 return_type = str 

46 string = s 

47 elif isinstance(s, bytes): 

48 return_type = bytes 

49 string = s.decode(_BYTES_ENCODING) 

50 else: 

51 raise TypeError(f"s:{s!r} is not a unicode or byte string.") 

52 

53 # Reference: https://git-scm.com/docs/gitignore#_pattern_format 

54 out_string = ''.join((f"\\{x}" if x in '\\[]!*#?' else x) for x in string) 

55 

56 if return_type is bytes: 

57 out_bytes = out_string.encode(_BYTES_ENCODING) 

58 return out_bytes # type: ignore[return-value] 

59 else: 

60 return out_string # type: ignore[return-value] 

61 

62 @staticmethod 

63 def _translate_segment_glob( 

64 pattern: str, 

65 range_error: Literal['literal', 'raise'], 

66 ) -> str: 

67 """ 

68 Translates the glob pattern to a regular expression. This is used in the 

69 constructor to translate a path segment glob pattern to its corresponding 

70 regular expression. 

71 

72 *pattern* (:class:`str`) is the glob pattern. 

73 

74 *range_error* (:class:`int`) is how to handle invalid range notation in the 

75 pattern: 

76 

77 - :data:`"literal"`: Invalid notation will be treated as a literal string. 

78 

79 - :data:`"raise"`: Invalid notation will cause a :class:`_RangeError` to be 

80 raised. 

81 

82 Returns the regular expression (:class:`str`). 

83 """ 

84 # NOTE: This is derived from `fnmatch.translate()` and is similar to the 

85 # POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set. 

86 

87 escape = False 

88 regex = '' 

89 i, end = 0, len(pattern) 

90 while i < end: 

91 # Get next character. 

92 char = pattern[i] 

93 i += 1 

94 

95 if escape: 

96 # Escape the character. 

97 escape = False 

98 regex += re.escape(char) 

99 

100 elif char == '\\': 

101 # Escape character, escape next character. 

102 escape = True 

103 

104 elif char == '*': 

105 # Multi-character wildcard. Match any string (except slashes), including 

106 # an empty string. 

107 regex += '[^/]*' 

108 

109 elif char == '?': 

110 # Single-character wildcard. Match any single character (except a 

111 # slash). 

112 regex += '[^/]' 

113 

114 elif char == '[': 

115 # Bracket expression (range notation) wildcard. Except for the beginning 

116 # exclamation mark, the whole bracket expression can be used directly as 

117 # regex, but we have to find where the expression ends. 

118 # - "[][!]" matches ']', '[' and '!'. 

119 # - "[]-]" matches ']' and '-'. 

120 # - "[!]a-]" matches any character except ']', 'a' and '-'. 

121 j = i 

122 

123 # Pass bracket expression negation. 

124 if j < end and (pattern[j] == '!' or pattern[j] == '^'): 

125 j += 1 

126 

127 # Pass first closing bracket if it is at the beginning of the 

128 # expression. 

129 if j < end and pattern[j] == ']': 

130 j += 1 

131 

132 # Find closing bracket. Stop once we reach the end or find it. 

133 while j < end and pattern[j] != ']': 

134 j += 1 

135 

136 if j < end: 

137 # Found end of bracket expression. Increment j to be one past the 

138 # closing bracket: 

139 # 

140 # [...] 

141 # ^ ^ 

142 # i j 

143 # 

144 j += 1 

145 expr = '[' 

146 

147 if pattern[i] == '!': 

148 # Bracket expression needs to be negated. 

149 expr += '^' 

150 i += 1 

151 elif pattern[i] == '^': 

152 # POSIX declares that the regex bracket expression negation "[^...]" 

153 # is undefined in a glob pattern. Python's `fnmatch.translate()` 

154 # escapes the caret ('^') as a literal. Git supports the using a 

155 # caret for negation. Maintain consistency with Git because that is 

156 # the expected behavior. 

157 expr += '^' 

158 i += 1 

159 

160 # Build regex bracket expression. Escape slashes so they are treated 

161 # as literal slashes by regex as defined by POSIX. 

162 expr += pattern[i:j].replace('\\', '\\\\') 

163 

164 if range_error == 'raise': 

165 try: 

166 re.compile(expr) 

167 except re.error as e: 

168 raise _RangeError(( 

169 f"Invalid range notation={pattern[i:j]!r} found in " 

170 f"pattern={pattern!r}." 

171 )) from e 

172 

173 # Add regex bracket expression to regex result. 

174 regex += expr 

175 

176 # Set i to one past the closing bracket. 

177 i = j 

178 

179 else: 

180 # Failed to find closing bracket. 

181 if range_error == 'literal': 

182 # Treat opening bracket as a bracket literal instead of as an 

183 # expression. 

184 regex += '\\[' 

185 elif range_error == 'raise': 

186 # Treat invalid range notation as an error. 

187 raise _RangeError(( 

188 f"Invalid range notation={pattern[i:j]!r} found in pattern=" 

189 f"{pattern!r}." 

190 )) 

191 else: 

192 assert_unreachable(f"{range_error=!r} is invalid.") 

193 

194 else: 

195 # Regular character, escape it for regex. 

196 regex += re.escape(char) 

197 

198 if escape: 

199 raise ValueError(( 

200 f"Escape character found with no next character to escape: {pattern!r}" 

201 )) # ValueError 

202 

203 return regex 

204 

205 

206class GitIgnorePatternError(ValueError): 

207 """ 

208 The :class:`GitIgnorePatternError` class indicates an invalid gitignore 

209 pattern. 

210 """ 

211 pass 

212 

213 

214class _RangeError(GitIgnorePatternError): 

215 """ 

216 The :class:`_RangeError` class indicates an invalid range notation was found 

217 in a gitignore pattern. 

218 """ 

219 pass