Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathspec/patterns/gitignore/base.py: 82%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

72 statements  

1""" 

2This module provides common classes for the gitignore patterns. 

3""" 

4 

5import re 

6 

7from typing import ( 

8 Literal) 

9 

10from pathspec.pattern import ( 

11 RegexPattern) 

12from pathspec._typing import ( 

13 AnyStr, # Removed in 3.18. 

14 assert_unreachable) 

15 

16_BYTES_ENCODING = 'latin1' 

17""" 

18The encoding to use when parsing a byte string pattern. 

19""" 

20 

21 

22class _GitIgnoreBasePattern(RegexPattern): 

23 """ 

24 .. warning:: This class is not part of the public API. It is subject to 

25 change. 

26 

27 The :class:`_GitIgnoreBasePattern` class is the base implementation for a 

28 compiled gitignore pattern. 

29 """ 

30 

31 # Keep the dict-less class hierarchy. 

32 __slots__ = () 

33 

34 @staticmethod 

35 def escape(s: AnyStr) -> AnyStr: 

36 """ 

37 Escape special characters in the given string. 

38 

39 *s* (:class:`str` or :class:`bytes`) a filename or a string that you want to 

40 escape, usually before adding it to a ".gitignore". 

41 

42 Returns the escaped string (:class:`str` or :class:`bytes`). 

43 """ 

44 if isinstance(s, str): 

45 return_type = str 

46 string = s 

47 elif isinstance(s, bytes): 

48 return_type = bytes 

49 string = s.decode(_BYTES_ENCODING) 

50 else: 

51 raise TypeError(f"s:{s!r} is not a unicode or byte string.") 

52 

53 # Reference: https://git-scm.com/docs/gitignore#_pattern_format 

54 out_string = ''.join((f"\\{x}" if x in '\\[]!*#?' else x) for x in string) 

55 

56 if return_type is bytes: 

57 return out_string.encode(_BYTES_ENCODING) 

58 else: 

59 return out_string 

60 

61 @staticmethod 

62 def _translate_segment_glob( 

63 pattern: str, 

64 range_error: Literal['literal', 'raise'], 

65 ) -> str: 

66 """ 

67 Translates the glob pattern to a regular expression. This is used in the 

68 constructor to translate a path segment glob pattern to its corresponding 

69 regular expression. 

70 

71 *pattern* (:class:`str`) is the glob pattern. 

72 

73 *range_error* (:class:`int`) is how to handle invalid range notation in the 

74 pattern: 

75 

76 - :data:`"literal"`: Invalid notation will be treated as a literal string. 

77 

78 - :data:`"raise"`: Invalid notation will cause a :class:`_RangeError` to be 

79 raised. 

80 

81 Returns the regular expression (:class:`str`). 

82 """ 

83 # NOTE: This is derived from `fnmatch.translate()` and is similar to the 

84 # POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set. 

85 

86 escape = False 

87 regex = '' 

88 i, end = 0, len(pattern) 

89 while i < end: 

90 # Get next character. 

91 char = pattern[i] 

92 i += 1 

93 

94 if escape: 

95 # Escape the character. 

96 escape = False 

97 regex += re.escape(char) 

98 

99 elif char == '\\': 

100 # Escape character, escape next character. 

101 escape = True 

102 

103 elif char == '*': 

104 # Multi-character wildcard. Match any string (except slashes), including 

105 # an empty string. 

106 regex += '[^/]*' 

107 

108 elif char == '?': 

109 # Single-character wildcard. Match any single character (except a 

110 # slash). 

111 regex += '[^/]' 

112 

113 elif char == '[': 

114 # Bracket expression (range notation) wildcard. Except for the beginning 

115 # exclamation mark, the whole bracket expression can be used directly as 

116 # regex, but we have to find where the expression ends. 

117 # - "[][!]" matches ']', '[' and '!'. 

118 # - "[]-]" matches ']' and '-'. 

119 # - "[!]a-]" matches any character except ']', 'a' and '-'. 

120 j = i 

121 

122 # Pass bracket expression negation. 

123 if j < end and (pattern[j] == '!' or pattern[j] == '^'): 

124 j += 1 

125 

126 # Pass first closing bracket if it is at the beginning of the 

127 # expression. 

128 if j < end and pattern[j] == ']': 

129 j += 1 

130 

131 # Find closing bracket. Stop once we reach the end or find it. 

132 while j < end and pattern[j] != ']': 

133 j += 1 

134 

135 if j < end: 

136 # Found end of bracket expression. Increment j to be one past the 

137 # closing bracket: 

138 # 

139 # [...] 

140 # ^ ^ 

141 # i j 

142 # 

143 j += 1 

144 expr = '[' 

145 

146 if pattern[i] == '!': 

147 # Bracket expression needs to be negated. 

148 expr += '^' 

149 i += 1 

150 elif pattern[i] == '^': 

151 # POSIX declares that the regex bracket expression negation "[^...]" 

152 # is undefined in a glob pattern. Python's `fnmatch.translate()` 

153 # escapes the caret ('^') as a literal. Git supports the using a 

154 # caret for negation. Maintain consistency with Git because that is 

155 # the expected behavior. 

156 expr += '^' 

157 i += 1 

158 

159 # Build regex bracket expression. Escape slashes so they are treated 

160 # as literal slashes by regex as defined by POSIX. 

161 expr += pattern[i:j].replace('\\', '\\\\') 

162 

163 # Add regex bracket expression to regex result. 

164 regex += expr 

165 

166 # Set i to one past the closing bracket. 

167 i = j 

168 

169 else: 

170 # Failed to find closing bracket. 

171 if range_error == 'literal': 

172 # Treat opening bracket as a bracket literal instead of as an 

173 # expression. 

174 regex += '\\[' 

175 elif range_error == 'raise': 

176 # Treat invalid range notation as an error. 

177 raise _RangeError(( 

178 f"Invalid range notation={pattern[i:j]!r} found in pattern=" 

179 f"{pattern!r}." 

180 )) 

181 else: 

182 assert_unreachable(f"{range_error=!r} is invalid.") 

183 

184 else: 

185 # Regular character, escape it for regex. 

186 regex += re.escape(char) 

187 

188 if escape: 

189 raise ValueError(( 

190 f"Escape character found with no next character to escape: {pattern!r}" 

191 )) # ValueError 

192 

193 return regex 

194 

195 

196class GitIgnorePatternError(ValueError): 

197 """ 

198 The :class:`GitIgnorePatternError` class indicates an invalid gitignore 

199 pattern. 

200 """ 

201 pass 

202 

203 

204class _RangeError(GitIgnorePatternError): 

205 """ 

206 The :class:`_RangeError` class indicates an invalid range notation was found 

207 in a gitignore pattern. 

208 """ 

209 pass