Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathspec/patterns/gitignore/spec.py: 95%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

114 statements  

1""" 

2This module provides :class:`GitIgnoreSpecPattern` which implements Git's 

3`gitignore`_ patterns, and handles edge-cases where Git's behavior differs from 

4what's documented. Git allows including files from excluded directories which 

5appears to contradict the documentation. Git discards patterns with invalid 

6range notation. This is used by :class:`~pathspec.gitignore.GitIgnoreSpec` to 

7fully replicate Git's handling. 

8 

9.. _`gitignore`: https://git-scm.com/docs/gitignore 

10""" 

11 

12from typing import ( 

13 Optional) # Replaced by `X | None` in 3.10. 

14 

15from pathspec._typing import ( 

16 AnyStr, # Removed in 3.18. 

17 assert_unreachable, 

18 override) # Added in 3.12. 

19 

20from .base import ( 

21 GitIgnorePatternError, 

22 _BYTES_ENCODING, 

23 _GitIgnoreBasePattern, 

24 _RangeError) 

25 

26_DIR_MARK = 'ps_d' 

27""" 

28The regex group name for the directory marker. This is only used by 

29:class:`GitIgnoreSpec`. 

30""" 

31 

32_DIR_MARK_CG = f'(?P<{_DIR_MARK}>/)' 

33""" 

34This regular expression matches the directory marker. 

35""" 

36 

37_DIR_MARK_OPT = f'(?:{_DIR_MARK_CG}|$)' 

38""" 

39This regular expression matches the optional directory marker and sub-path. 

40""" 

41 

42 

43class GitIgnoreSpecPattern(_GitIgnoreBasePattern): 

44 """ 

45 The :class:`GitIgnoreSpecPattern` class represents a compiled gitignore 

46 pattern with special handling for edge-cases to replicate Git's behavior. 

47 

48 This is registered under the deprecated name "gitwildmatch" for backward 

49 compatibility with v0.12. The registered name will be removed in a future 

50 version. 

51 """ 

52 

53 # Keep the dict-less class hierarchy. 

54 __slots__ = () 

55 

56 @staticmethod 

57 def __normalize_segments( 

58 is_dir_pattern: bool, 

59 pattern_segs: list[str], 

60 ) -> tuple[Optional[list[str]], Optional[str]]: 

61 """ 

62 Normalize the pattern segments to make processing easier. 

63 

64 *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory 

65 pattern (i.e., ends with a slash '/'). 

66 

67 *pattern_segs* (:class:`list` of :class:`str`) contains the pattern 

68 segments. This may be modified in place. 

69 

70 Returns a :class:`tuple` containing either: 

71 

72 - The normalized segments (:class:`list` of :class:`str`; or :data:`None`). 

73 

74 - The regular expression override (:class:`str` or :data:`None`). 

75 """ 

76 if not pattern_segs[0]: 

77 # A pattern beginning with a slash ('/') should match relative to the root 

78 # directory. Remove the empty first segment to make the pattern relative 

79 # to root. 

80 del pattern_segs[0] 

81 

82 elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]): 

83 # A single segment pattern with or without a trailing slash ('/') will 

84 # match any descendant path. This is equivalent to "**/{pattern}". Prepend 

85 # a double-asterisk segment to make the pattern relative to root. 

86 if pattern_segs[0] != '**': 

87 pattern_segs.insert(0, '**') 

88 

89 else: 

90 # A pattern without a beginning slash ('/') but contains at least one 

91 # prepended directory (e.g., "dir/{pattern}") should match relative to the 

92 # root directory. No segment modification is needed. 

93 pass 

94 

95 if not pattern_segs: 

96 # After normalization, we end up with no pattern at all. This must be 

97 # because the pattern is invalid. 

98 raise ValueError("Pattern normalized to nothing.") 

99 

100 if not pattern_segs[-1]: 

101 # A pattern ending with a slash ('/') will match all descendant paths if 

102 # it is a directory but not if it is a regular file. This is equivalent to 

103 # "{pattern}/**". Set the empty last segment to a double-asterisk to 

104 # include all descendants. 

105 pattern_segs[-1] = '**' 

106 

107 # EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**'). 

108 # Iterate over the segments in reverse order and remove the duplicate double 

109 # asterisks as we go. 

110 for i in range(len(pattern_segs) - 1, 0, -1): 

111 prev = pattern_segs[i-1] 

112 seg = pattern_segs[i] 

113 if prev == '**' and seg == '**': 

114 del pattern_segs[i] 

115 

116 seg_count = len(pattern_segs) 

117 if seg_count == 1 and pattern_segs[0] == '**': 

118 if is_dir_pattern: 

119 # The pattern "**/" will be normalized to "**", but it should match 

120 # everything except for files in the root. Special case this pattern. 

121 return (None, _DIR_MARK_CG) 

122 else: 

123 # The pattern "**" will match every path. Special case this pattern. 

124 return (None, '.') 

125 

126 elif ( 

127 seg_count == 2 

128 and pattern_segs[0] == '**' 

129 and pattern_segs[1] == '*' 

130 ): 

131 # The pattern "*" will be normalized to "**/*" and will match every 

132 # path. Special case this pattern for efficiency. 

133 return (None, '.') 

134 

135 elif ( 

136 seg_count == 3 

137 and pattern_segs[0] == '**' 

138 and pattern_segs[1] == '*' 

139 and pattern_segs[2] == '**' 

140 ): 

141 # The pattern "*/" will be normalized to "**/*/**" which will match every 

142 # file not in the root directory. Special case this pattern for 

143 # efficiency. 

144 if is_dir_pattern: 

145 return (None, _DIR_MARK_CG) 

146 else: 

147 return (None, '/') 

148 

149 # No regular expression override, return modified pattern segments. 

150 return (pattern_segs, None) 

151 

152 @override 

153 @classmethod 

154 def pattern_to_regex( 

155 cls, 

156 pattern: AnyStr, 

157 ) -> tuple[Optional[AnyStr], Optional[bool]]: 

158 """ 

159 Convert the pattern into a regular expression. 

160 

161 *pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a 

162 regular expression. 

163 

164 Returns a :class:`tuple` containing: 

165 

166 - *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the 

167 uncompiled regular expression. 

168 

169 - *include* (:class:`bool` or :data:`None`) is whether matched files 

170 should be included (:data:`True`), excluded (:data:`False`), or is a 

171 null-operation (:data:`None`). 

172 """ 

173 if isinstance(pattern, str): 

174 pattern_str = pattern 

175 return_type = str 

176 elif isinstance(pattern, bytes): 

177 pattern_str = pattern.decode(_BYTES_ENCODING) 

178 return_type = bytes 

179 else: 

180 raise TypeError(f"{pattern=!r} is not a unicode or byte string.") 

181 

182 original_pattern = pattern_str 

183 del pattern 

184 

185 if pattern_str.endswith('\\ '): 

186 # EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends 

187 # with a backslash is followed by a space, do not strip from the left. 

188 pass 

189 else: 

190 # EDGE CASE: Leading spaces should be kept (only trailing spaces should be 

191 # removed). Git does not remove leading spaces. 

192 pattern_str = pattern_str.rstrip() 

193 

194 regex: Optional[str] 

195 include: Optional[bool] 

196 

197 if not pattern_str: 

198 # A blank pattern is a null-operation (neither includes nor excludes 

199 # files). 

200 return (None, None) 

201 

202 elif pattern_str.startswith('#'): 

203 # A pattern starting with a hash ('#') serves as a comment (neither 

204 # includes nor excludes files). Escape the hash with a backslash to match 

205 # a literal hash (i.e., '\#'). 

206 return (None, None) 

207 

208 elif pattern_str == '/': 

209 # EDGE CASE: According to `git check-ignore` (v2.4.1), a single '/' does 

210 # not match any file. 

211 return (None, None) 

212 

213 if pattern_str.startswith('!'): 

214 # A pattern starting with an exclamation mark ('!') negates the pattern 

215 # (exclude instead of include). Escape the exclamation mark with a 

216 # backslash to match a literal exclamation mark (i.e., '\!'). 

217 include = False 

218 # Remove leading exclamation mark. 

219 pattern_str = pattern_str[1:] 

220 else: 

221 include = True 

222 

223 # Split pattern into segments. 

224 pattern_segs = pattern_str.split('/') 

225 

226 # Check whether the pattern is specifically a directory pattern before 

227 # normalization. 

228 is_dir_pattern = not pattern_segs[-1] 

229 

230 # Normalize pattern to make processing easier. 

231 try: 

232 pattern_segs, override_regex = cls.__normalize_segments( 

233 is_dir_pattern, pattern_segs, 

234 ) 

235 except ValueError as e: 

236 raise GitIgnorePatternError(( 

237 f"Invalid git pattern: {original_pattern!r}" 

238 )) from e # GitIgnorePatternError 

239 

240 if override_regex is not None: 

241 # Use regex override. 

242 regex = override_regex 

243 

244 elif pattern_segs is not None: 

245 # Build regular expression from pattern. 

246 try: 

247 regex_parts = cls.__translate_segments(is_dir_pattern, pattern_segs) 

248 except _RangeError: 

249 # EDGE CASE: Git discards patterns with invalid range notation. 

250 return (None, None) 

251 except ValueError as e: 

252 raise GitIgnorePatternError(( 

253 f"Invalid git pattern: {original_pattern!r}" 

254 )) from e # GitIgnorePatternError 

255 

256 regex = ''.join(regex_parts) 

257 

258 else: 

259 assert_unreachable(( 

260 f"{override_regex=} and {pattern_segs=} cannot both be null." 

261 )) # assert_unreachable 

262 

263 # Encode regex if needed. 

264 out_regex: AnyStr 

265 if regex is not None and return_type is bytes: 

266 out_regex = regex.encode(_BYTES_ENCODING) 

267 else: 

268 out_regex = regex 

269 

270 return (out_regex, include) 

271 

272 @classmethod 

273 def __translate_segments( 

274 cls, 

275 is_dir_pattern: bool, 

276 pattern_segs: list[str], 

277 ) -> list[str]: 

278 """ 

279 Translate the pattern segments to regular expressions. 

280 

281 *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory 

282 pattern (i.e., ends with a slash '/'). 

283 

284 *pattern_segs* (:class:`list` of :class:`str`) contains the pattern 

285 segments. 

286 

287 Raises :class:`_RangeError` if invalid range notation is found. 

288 

289 Returns the regular expression parts (:class:`list` of :class:`str`). 

290 """ 

291 # Build regular expression from pattern. 

292 out_parts = [] 

293 need_slash = False 

294 end = len(pattern_segs) - 1 

295 for i, seg in enumerate(pattern_segs): 

296 if seg == '**': 

297 if i == 0: 

298 # A normalized pattern beginning with double-asterisks ('**') will 

299 # match any leading path segments. 

300 out_parts.append('^(?:.+/)?') 

301 

302 elif i < end: 

303 # A pattern with inner double-asterisks ('**') will match multiple (or 

304 # zero) inner path segments. 

305 out_parts.append('(?:/.+)?') 

306 need_slash = True 

307 

308 else: 

309 assert i == end, (i, end) 

310 # A normalized pattern ending with double-asterisks ('**') will match 

311 # any trailing path segments. 

312 if is_dir_pattern: 

313 out_parts.append(_DIR_MARK_CG) 

314 else: 

315 out_parts.append('/') 

316 

317 else: 

318 # Match path segment. 

319 if i == 0: 

320 # Anchor to root directory. 

321 out_parts.append('^') 

322 

323 if need_slash: 

324 out_parts.append('/') 

325 

326 if seg == '*': 

327 # Match whole path segment. 

328 out_parts.append('[^/]+') 

329 

330 else: 

331 # Match segment glob pattern. 

332 # - EDGE CASE: Git discards patterns with invalid range notation. 

333 out_parts.append(cls._translate_segment_glob(seg, 'raise')) 

334 

335 if i == end: 

336 # A pattern ending without a slash ('/') will match a file or a 

337 # directory (with paths underneath it). E.g., "foo" matches "foo", 

338 # "foo/bar", "foo/bar/baz", etc. 

339 out_parts.append(_DIR_MARK_OPT) 

340 

341 need_slash = True 

342 

343 return out_parts