Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathspec/patterns/gitignore/base.py: 81%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

73 statements  

1""" 

2This module provides common classes for the gitignore patterns. 

3""" 

4 

5import re 

6 

7from typing import ( 

8 Literal) 

9 

10from pathspec.pattern import ( 

11 RegexPattern) 

12from pathspec._typing import ( 

13 AnyStr, # Removed in 3.18. 

14 assert_unreachable) 

15 

16_BYTES_ENCODING = 'latin1' 

17""" 

18The encoding to use when parsing a byte string pattern. 

19""" 

20 

21 

22class _GitIgnoreBasePattern(RegexPattern): 

23 """ 

24 .. warning:: This class is not part of the public API. It is subject to 

25 change. 

26 

27 The :class:`_GitIgnoreBasePattern` class is the base implementation for a 

28 compiled gitignore pattern. 

29 """ 

30 

31 # Keep the dict-less class hierarchy. 

32 __slots__ = () 

33 

34 @staticmethod 

35 def escape(s: AnyStr) -> AnyStr: 

36 """ 

37 Escape special characters in the given string. 

38 

39 *s* (:class:`str` or :class:`bytes`) a filename or a string that you want to 

40 escape, usually before adding it to a ".gitignore". 

41 

42 Returns the escaped string (:class:`str` or :class:`bytes`). 

43 """ 

44 if isinstance(s, str): 

45 return_type = str 

46 string = s 

47 elif isinstance(s, bytes): 

48 return_type = bytes 

49 string = s.decode(_BYTES_ENCODING) 

50 else: 

51 raise TypeError(f"s:{s!r} is not a unicode or byte string.") 

52 

53 # Reference: https://git-scm.com/docs/gitignore#_pattern_format 

54 out_string = ''.join((f"\\{x}" if x in '\\[]!*#?' else x) for x in string) 

55 

56 if return_type is bytes: 

57 out_bytes = out_string.encode(_BYTES_ENCODING) 

58 return out_bytes # type: ignore[return-value] 

59 else: 

60 return out_string # type: ignore[return-value] 

61 

62 @staticmethod 

63 def _translate_segment_glob( 

64 pattern: str, 

65 range_error: Literal['literal', 'raise'], 

66 ) -> str: 

67 """ 

68 Translates the glob pattern to a regular expression. This is used in the 

69 constructor to translate a path segment glob pattern to its corresponding 

70 regular expression. 

71 

72 *pattern* (:class:`str`) is the glob pattern. 

73 

74 *range_error* (:class:`int`) is how to handle invalid range notation in the 

75 pattern: 

76 

77 - :data:`"literal"`: Invalid notation will be treated as a literal string. 

78 

79 - :data:`"raise"`: Invalid notation will cause a :class:`_RangeError` to be 

80 raised. 

81 

82 Returns the regular expression (:class:`str`). 

83 """ 

84 # NOTE: This is derived from `fnmatch.translate()` and is similar to the 

85 # POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set. 

86 

87 escape = False 

88 regex = '' 

89 i, end = 0, len(pattern) 

90 while i < end: 

91 # Get next character. 

92 char = pattern[i] 

93 i += 1 

94 

95 if escape: 

96 # Escape the character. 

97 escape = False 

98 regex += re.escape(char) 

99 

100 elif char == '\\': 

101 # Escape character, escape next character. 

102 escape = True 

103 

104 elif char == '*': 

105 # Multi-character wildcard. Match any string (except slashes), including 

106 # an empty string. 

107 regex += '[^/]*' 

108 

109 elif char == '?': 

110 # Single-character wildcard. Match any single character (except a 

111 # slash). 

112 regex += '[^/]' 

113 

114 elif char == '[': 

115 # Bracket expression (range notation) wildcard. Except for the beginning 

116 # exclamation mark, the whole bracket expression can be used directly as 

117 # regex, but we have to find where the expression ends. 

118 # - "[][!]" matches ']', '[' and '!'. 

119 # - "[]-]" matches ']' and '-'. 

120 # - "[!]a-]" matches any character except ']', 'a' and '-'. 

121 j = i 

122 

123 # Pass bracket expression negation. 

124 if j < end and (pattern[j] == '!' or pattern[j] == '^'): 

125 j += 1 

126 

127 # Pass first closing bracket if it is at the beginning of the 

128 # expression. 

129 if j < end and pattern[j] == ']': 

130 j += 1 

131 

132 # Find closing bracket. Stop once we reach the end or find it. 

133 while j < end and pattern[j] != ']': 

134 j += 1 

135 

136 if j < end: 

137 # Found end of bracket expression. Increment j to be one past the 

138 # closing bracket: 

139 # 

140 # [...] 

141 # ^ ^ 

142 # i j 

143 # 

144 j += 1 

145 expr = '[' 

146 

147 if pattern[i] == '!': 

148 # Bracket expression needs to be negated. 

149 expr += '^' 

150 i += 1 

151 elif pattern[i] == '^': 

152 # POSIX declares that the regex bracket expression negation "[^...]" 

153 # is undefined in a glob pattern. Python's `fnmatch.translate()` 

154 # escapes the caret ('^') as a literal. Git supports the using a 

155 # caret for negation. Maintain consistency with Git because that is 

156 # the expected behavior. 

157 expr += '^' 

158 i += 1 

159 

160 # Build regex bracket expression. Escape slashes so they are treated 

161 # as literal slashes by regex as defined by POSIX. 

162 expr += pattern[i:j].replace('\\', '\\\\') 

163 

164 # Add regex bracket expression to regex result. 

165 regex += expr 

166 

167 # Set i to one past the closing bracket. 

168 i = j 

169 

170 else: 

171 # Failed to find closing bracket. 

172 if range_error == 'literal': 

173 # Treat opening bracket as a bracket literal instead of as an 

174 # expression. 

175 regex += '\\[' 

176 elif range_error == 'raise': 

177 # Treat invalid range notation as an error. 

178 raise _RangeError(( 

179 f"Invalid range notation={pattern[i:j]!r} found in pattern=" 

180 f"{pattern!r}." 

181 )) 

182 else: 

183 assert_unreachable(f"{range_error=!r} is invalid.") 

184 

185 else: 

186 # Regular character, escape it for regex. 

187 regex += re.escape(char) 

188 

189 if escape: 

190 raise ValueError(( 

191 f"Escape character found with no next character to escape: {pattern!r}" 

192 )) # ValueError 

193 

194 return regex 

195 

196 

197class GitIgnorePatternError(ValueError): 

198 """ 

199 The :class:`GitIgnorePatternError` class indicates an invalid gitignore 

200 pattern. 

201 """ 

202 pass 

203 

204 

205class _RangeError(GitIgnorePatternError): 

206 """ 

207 The :class:`_RangeError` class indicates an invalid range notation was found 

208 in a gitignore pattern. 

209 """ 

210 pass