Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathspec/patterns/gitignore/basic.py: 14%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

106 statements  

1""" 

2This module provides :class:`GitIgnoreBasicPattern` which implements Git's 

3`gitignore`_ patterns as documented. This differs from how Git actually behaves 

4when including files in excluded directories. 

5 

6.. _`gitignore`: https://git-scm.com/docs/gitignore 

7""" 

8 

9from typing import ( 

10 Optional) # Replaced by `X | None` in 3.10. 

11 

12from pathspec import util 

13from pathspec._typing import ( 

14 AnyStr, # Removed in 3.18. 

15 assert_unreachable, 

16 override) # Added in 3.12. 

17 

18from .base import ( 

19 GitIgnorePatternError, 

20 _BYTES_ENCODING, 

21 _GitIgnoreBasePattern) 

22 

23 

24class GitIgnoreBasicPattern(_GitIgnoreBasePattern): 

25 """ 

26 The :class:`GitIgnoreBasicPattern` class represents a compiled gitignore 

27 pattern as documented. This is registered as "gitignore". 

28 """ 

29 

30 # Keep the dict-less class hierarchy. 

31 __slots__ = () 

32 

33 @staticmethod 

34 def __normalize_segments( 

35 is_dir_pattern: bool, 

36 pattern_segs: list[str], 

37 ) -> tuple[Optional[list[str]], Optional[str]]: 

38 """ 

39 Normalize the pattern segments to make processing easier. 

40 

41 *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory 

42 pattern (i.e., ends with a slash '/'). 

43 

44 *pattern_segs* (:class:`list` of :class:`str`) contains the pattern 

45 segments. This may be modified in place. 

46 

47 Returns a :class:`tuple` containing either: 

48 

49 - The normalized segments (:class:`list` of :class:`str`; or :data:`None`). 

50 

51 - The regular expression override (:class:`str` or :data:`None`). 

52 """ 

53 if not pattern_segs[0]: 

54 # A pattern beginning with a slash ('/') should match relative to the root 

55 # directory. Remove the empty first segment to make the pattern relative 

56 # to root. 

57 del pattern_segs[0] 

58 

59 elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]): 

60 # A single segment pattern with or without a trailing slash ('/') will 

61 # match any descendant path. This is equivalent to "**/{pattern}". Prepend 

62 # double-asterisk segment to make pattern relative to root. 

63 if pattern_segs[0] != '**': 

64 pattern_segs.insert(0, '**') 

65 

66 else: 

67 # A pattern without a beginning slash ('/') but contains at least one 

68 # prepended directory (e.g., "dir/{pattern}") should match relative to the 

69 # root directory. No segment modification is needed. 

70 pass 

71 

72 if not pattern_segs: 

73 # After normalization, we end up with no pattern at all. This must be 

74 # because the pattern is invalid. 

75 raise ValueError("Pattern normalized to nothing.") 

76 

77 if not pattern_segs[-1]: 

78 # A pattern ending with a slash ('/') will match all descendant paths if 

79 # it is a directory but not if it is a regular file. This is equivalent to 

80 # "{pattern}/**". Set empty last segment to a double-asterisk to include 

81 # all descendants. 

82 pattern_segs[-1] = '**' 

83 

84 # EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**'). 

85 # Iterate over the segments in reverse order and remove the duplicate double 

86 # asterisks as we go. 

87 for i in range(len(pattern_segs) - 1, 0, -1): 

88 prev = pattern_segs[i-1] 

89 seg = pattern_segs[i] 

90 if prev == '**' and seg == '**': 

91 del pattern_segs[i] 

92 

93 seg_count = len(pattern_segs) 

94 if seg_count == 1 and pattern_segs[0] == '**': 

95 if is_dir_pattern: 

96 # The pattern "**/" will be normalized to "**", but it should match 

97 # everything except for files in the root. Special case this pattern. 

98 return (None, '/') 

99 else: 

100 # The pattern "**" will match every path. Special case this pattern. 

101 return (None, '.') 

102 

103 elif ( 

104 seg_count == 2 

105 and pattern_segs[0] == '**' 

106 and pattern_segs[1] == '*' 

107 ): 

108 # The pattern "*" will be normalized to "**/*" and will match every 

109 # path. Special case this pattern for efficiency. 

110 return (None, '.') 

111 

112 elif ( 

113 seg_count == 3 

114 and pattern_segs[0] == '**' 

115 and pattern_segs[1] == '*' 

116 and pattern_segs[2] == '**' 

117 ): 

118 # The pattern "*/" will be normalized to "**/*/**" which will match every 

119 # file not in the root directory. Special case this pattern for 

120 # efficiency. 

121 return (None, '/') 

122 

123 # No regular expression override, return modified pattern segments. 

124 return (pattern_segs, None) 

125 

126 @override 

127 @classmethod 

128 def pattern_to_regex( 

129 cls, 

130 pattern: AnyStr, 

131 ) -> tuple[Optional[AnyStr], Optional[bool]]: 

132 """ 

133 Convert the pattern into a regular expression. 

134 

135 *pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a 

136 regular expression. 

137 

138 Returns a :class:`tuple` containing: 

139 

140 - *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the 

141 uncompiled regular expression. 

142 

143 - *include* (:class:`bool` or :data:`None`) is whether matched files 

144 should be included (:data:`True`), excluded (:data:`False`), or is a 

145 null-operation (:data:`None`). 

146 """ 

147 if isinstance(pattern, str): 

148 pattern_str = pattern 

149 return_type = str 

150 elif isinstance(pattern, bytes): 

151 pattern_str = pattern.decode(_BYTES_ENCODING) 

152 return_type = bytes 

153 else: 

154 raise TypeError(f"{pattern=!r} is not a unicode or byte string.") 

155 

156 original_pattern = pattern_str 

157 del pattern 

158 

159 if pattern_str.endswith('\\ '): 

160 # EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends 

161 # with a backslash is followed by a space, do not strip from the left. 

162 pass 

163 else: 

164 # EDGE CASE: Leading spaces should be kept (only trailing spaces should be 

165 # removed). 

166 pattern_str = pattern_str.rstrip() 

167 

168 regex: Optional[str] 

169 include: Optional[bool] 

170 

171 if not pattern_str: 

172 # A blank pattern is a null-operation (neither includes nor excludes 

173 # files). 

174 return (None, None) 

175 

176 elif pattern_str.startswith('#'): 

177 # A pattern starting with a hash ('#') serves as a comment (neither 

178 # includes nor excludes files). Escape the hash with a backslash to match 

179 # a literal hash (i.e., '\#'). 

180 return (None, None) 

181 

182 if pattern_str.startswith('!'): 

183 # A pattern starting with an exclamation mark ('!') negates the pattern 

184 # (exclude instead of include). Escape the exclamation mark with a back 

185 # slash to match a literal exclamation mark (i.e., '\!'). 

186 include = False 

187 # Remove leading exclamation mark. 

188 pattern_str = pattern_str[1:] 

189 else: 

190 include = True 

191 

192 # Split pattern into segments. 

193 pattern_segs = pattern_str.split('/') 

194 

195 # Check whether the pattern is specifically a directory pattern before 

196 # normalization. 

197 is_dir_pattern = not pattern_segs[-1] 

198 

199 if pattern_str == '/': 

200 # EDGE CASE: A single slash ('/') is not addressed by the gitignore 

201 # documentation. Git treats it as a no-op (does not match any files). The 

202 # straight forward interpretation is to treat it as a directory and match 

203 # every descendant path (equivalent to '**'). Remove the directory pattern 

204 # flag so that it is treated as '**' instead of '**/'. 

205 is_dir_pattern = False 

206 

207 # Normalize pattern to make processing easier. 

208 try: 

209 pattern_segs, override_regex = cls.__normalize_segments( 

210 is_dir_pattern, pattern_segs, 

211 ) 

212 except ValueError as e: 

213 raise GitIgnorePatternError(( 

214 f"Invalid git pattern: {original_pattern!r}" 

215 )) from e # GitIgnorePatternError 

216 

217 if override_regex is not None: 

218 # Use regex override. 

219 regex = override_regex 

220 

221 elif pattern_segs is not None: 

222 # Build regular expression from pattern. 

223 try: 

224 regex_parts = cls.__translate_segments(pattern_segs) 

225 except ValueError as e: 

226 raise GitIgnorePatternError(( 

227 f"Invalid git pattern: {original_pattern!r}" 

228 )) from e # GitIgnorePatternError 

229 

230 regex = ''.join(regex_parts) 

231 

232 else: 

233 assert_unreachable(( 

234 f"{override_regex=} and {pattern_segs=} cannot both be null." 

235 )) # assert_unreachable 

236 

237 # Encode regex if needed. 

238 out_regex: AnyStr 

239 if regex is not None and return_type is bytes: 

240 out_regex = regex.encode(_BYTES_ENCODING) 

241 else: 

242 out_regex = regex 

243 

244 return (out_regex, include) 

245 

246 @classmethod 

247 def __translate_segments(cls, pattern_segs: list[str]) -> list[str]: 

248 """ 

249 Translate the pattern segments to regular expressions. 

250 

251 *pattern_segs* (:class:`list` of :class:`str`) contains the pattern 

252 segments. 

253 

254 Returns the regular expression parts (:class:`list` of :class:`str`). 

255 """ 

256 # Build regular expression from pattern. 

257 out_parts = [] 

258 need_slash = False 

259 end = len(pattern_segs) - 1 

260 for i, seg in enumerate(pattern_segs): 

261 if seg == '**': 

262 if i == 0: 

263 # A normalized pattern beginning with double-asterisks ('**') will 

264 # match any leading path segments. 

265 # - NOTICE: '(?:^|/)' benchmarks slower using p15 (sm=0.9382, 

266 # hs=0.9966, re2=0.9337). 

267 out_parts.append('^(?:.+/)?') 

268 

269 elif i < end: 

270 # A pattern with inner double-asterisks ('**') will match multiple (or 

271 # zero) inner path segments. 

272 out_parts.append('(?:/.+)?') 

273 need_slash = True 

274 

275 else: 

276 assert i == end, (i, end) 

277 # A normalized pattern ending with double-asterisks ('**') will match 

278 # any trailing path segments. 

279 out_parts.append('/') 

280 

281 else: 

282 # Match path segment. 

283 if i == 0: 

284 # Anchor to root directory. 

285 out_parts.append('^') 

286 

287 if need_slash: 

288 out_parts.append('/') 

289 

290 if seg == '*': 

291 # Match whole path segment. 

292 out_parts.append('[^/]+') 

293 

294 else: 

295 # Match segment glob pattern. 

296 out_parts.append(cls._translate_segment_glob(seg)) 

297 

298 if i == end: 

299 if seg == '*': 

300 # A pattern ending with an asterisk ('*') will match a file or 

301 # directory (without matching descendant paths). E.g., "foo/*" 

302 # matches "foo/test.json", "foo/bar/", but not "foo/bar/hello.c". 

303 out_parts.append('/?$') 

304 

305 else: 

306 # A pattern ending without a slash ('/') will match a file or a 

307 # directory (with paths underneath it). E.g., "foo" matches "foo", 

308 # "foo/bar", "foo/bar/baz", etc. 

309 out_parts.append('(?:/|$)') 

310 

311 need_slash = True 

312 

313 return out_parts 

314 

315 

316# Register GitIgnoreBasicPattern as "gitignore". 

317util.register_pattern('gitignore', GitIgnoreBasicPattern)