Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathspec/patterns/gitignore/spec.py: 95%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

112 statements  

1""" 

2This module provides :class:`GitIgnoreSpecPattern` which implements Git's 

3`gitignore`_ patterns, and handles edge-cases where Git's behavior differs from 

4what's documented. Git allows including files from excluded directories which 

5appears to contradict the documentation. This is used by 

6:class:`~pathspec.gitignore.GitIgnoreSpec` to fully replicate Git's handling. 

7 

8.. _`gitignore`: https://git-scm.com/docs/gitignore 

9""" 

10 

11from typing import ( 

12 Optional) # Replaced by `X | None` in 3.10. 

13 

14from pathspec._typing import ( 

15 AnyStr, # Removed in 3.18. 

16 assert_unreachable, 

17 override) # Added in 3.12. 

18 

19from .base import ( 

20 GitIgnorePatternError, 

21 _BYTES_ENCODING, 

22 _GitIgnoreBasePattern) 

23 

24_DIR_MARK = 'ps_d' 

25""" 

26The regex group name for the directory marker. This is only used by 

27:class:`GitIgnoreSpec`. 

28""" 

29 

30_DIR_MARK_CG = f'(?P<{_DIR_MARK}>/)' 

31""" 

32This regular expression matches the directory marker. 

33""" 

34 

35_DIR_MARK_OPT = f'(?:{_DIR_MARK_CG}|$)' 

36""" 

37This regular expression matches the optional directory marker and sub-path. 

38""" 

39 

40 

41class GitIgnoreSpecPattern(_GitIgnoreBasePattern): 

42 """ 

43 The :class:`GitIgnoreSpecPattern` class represents a compiled gitignore 

44 pattern with special handling for edge-cases to replicate Git's behavior. 

45 

46 This is registered under the deprecated name "gitwildmatch" for backward 

47 compatibility with v0.12. The registered name will be removed in a future 

48 version. 

49 """ 

50 

51 # Keep the dict-less class hierarchy. 

52 __slots__ = () 

53 

54 @staticmethod 

55 def __normalize_segments( 

56 is_dir_pattern: bool, 

57 pattern_segs: list[str], 

58 ) -> tuple[Optional[list[str]], Optional[str]]: 

59 """ 

60 Normalize the pattern segments to make processing easier. 

61 

62 *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory 

63 pattern (i.e., ends with a slash '/'). 

64 

65 *pattern_segs* (:class:`list` of :class:`str`) contains the pattern 

66 segments. This may be modified in place. 

67 

68 Returns a :class:`tuple` containing either: 

69 

70 - The normalized segments (:class:`list` of :class:`str`; or :data:`None`). 

71 

72 - The regular expression override (:class:`str` or :data:`None`). 

73 """ 

74 if not pattern_segs[0]: 

75 # A pattern beginning with a slash ('/') should match relative to the root 

76 # directory. Remove the empty first segment to make the pattern relative 

77 # to root. 

78 del pattern_segs[0] 

79 

80 elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]): 

81 # A single segment pattern with or without a trailing slash ('/') will 

82 # match any descendant path. This is equivalent to "**/{pattern}". Prepend 

83 # double-asterisk segment to make pattern relative to root. 

84 if pattern_segs[0] != '**': 

85 pattern_segs.insert(0, '**') 

86 

87 else: 

88 # A pattern without a beginning slash ('/') but contains at least one 

89 # prepended directory (e.g., "dir/{pattern}") should match relative to the 

90 # root directory. No segment modification is needed. 

91 pass 

92 

93 if not pattern_segs: 

94 # After normalization, we end up with no pattern at all. This must be 

95 # because the pattern is invalid. 

96 raise ValueError("Pattern normalized to nothing.") 

97 

98 if not pattern_segs[-1]: 

99 # A pattern ending with a slash ('/') will match all descendant paths if 

100 # it is a directory but not if it is a regular file. This is equivalent to 

101 # "{pattern}/**". Set empty last segment to a double-asterisk to include 

102 # all descendants. 

103 pattern_segs[-1] = '**' 

104 

105 # EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**'). 

106 # Iterate over the segments in reverse order and remove the duplicate double 

107 # asterisks as we go. 

108 for i in range(len(pattern_segs) - 1, 0, -1): 

109 prev = pattern_segs[i-1] 

110 seg = pattern_segs[i] 

111 if prev == '**' and seg == '**': 

112 del pattern_segs[i] 

113 

114 seg_count = len(pattern_segs) 

115 if seg_count == 1 and pattern_segs[0] == '**': 

116 if is_dir_pattern: 

117 # The pattern "**/" will be normalized to "**", but it should match 

118 # everything except for files in the root. Special case this pattern. 

119 return (None, _DIR_MARK_CG) 

120 else: 

121 # The pattern "**" will match every path. Special case this pattern. 

122 return (None, '.') 

123 

124 elif ( 

125 seg_count == 2 

126 and pattern_segs[0] == '**' 

127 and pattern_segs[1] == '*' 

128 ): 

129 # The pattern "*" will be normalized to "**/*" and will match every 

130 # path. Special case this pattern for efficiency. 

131 return (None, '.') 

132 

133 elif ( 

134 seg_count == 3 

135 and pattern_segs[0] == '**' 

136 and pattern_segs[1] == '*' 

137 and pattern_segs[2] == '**' 

138 ): 

139 # The pattern "*/" will be normalized to "**/*/**" which will match every 

140 # file not in the root directory. Special case this pattern for 

141 # efficiency. 

142 if is_dir_pattern: 

143 return (None, _DIR_MARK_CG) 

144 else: 

145 return (None, '/') 

146 

147 # No regular expression override, return modified pattern segments. 

148 return (pattern_segs, None) 

149 

150 @override 

151 @classmethod 

152 def pattern_to_regex( 

153 cls, 

154 pattern: AnyStr, 

155 ) -> tuple[Optional[AnyStr], Optional[bool]]: 

156 """ 

157 Convert the pattern into a regular expression. 

158 

159 *pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a 

160 regular expression. 

161 

162 Returns a :class:`tuple` containing: 

163 

164 - *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the 

165 uncompiled regular expression. 

166 

167 - *include* (:class:`bool` or :data:`None`) is whether matched files 

168 should be included (:data:`True`), excluded (:data:`False`), or is a 

169 null-operation (:data:`None`). 

170 """ 

171 if isinstance(pattern, str): 

172 pattern_str = pattern 

173 return_type = str 

174 elif isinstance(pattern, bytes): 

175 pattern_str = pattern.decode(_BYTES_ENCODING) 

176 return_type = bytes 

177 else: 

178 raise TypeError(f"{pattern=!r} is not a unicode or byte string.") 

179 

180 original_pattern = pattern_str 

181 del pattern 

182 

183 if pattern_str.endswith('\\ '): 

184 # EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends 

185 # with a backslash is followed by a space, do not strip from the left. 

186 pass 

187 else: 

188 # EDGE CASE: Leading spaces should be kept (only trailing spaces should be 

189 # removed). Git does not remove leading spaces. 

190 pattern_str = pattern_str.rstrip() 

191 

192 regex: Optional[str] 

193 include: Optional[bool] 

194 

195 if not pattern_str: 

196 # A blank pattern is a null-operation (neither includes nor excludes 

197 # files). 

198 return (None, None) 

199 

200 elif pattern_str.startswith('#'): 

201 # A pattern starting with a hash ('#') serves as a comment (neither 

202 # includes nor excludes files). Escape the hash with a backslash to match 

203 # a literal hash (i.e., '\#'). 

204 return (None, None) 

205 

206 elif pattern_str == '/': 

207 # EDGE CASE: According to `git check-ignore` (v2.4.1), a single '/' does 

208 # not match any file. 

209 return (None, None) 

210 

211 if pattern_str.startswith('!'): 

212 # A pattern starting with an exclamation mark ('!') negates the pattern 

213 # (exclude instead of include). Escape the exclamation mark with a back 

214 # slash to match a literal exclamation mark (i.e., '\!'). 

215 include = False 

216 # Remove leading exclamation mark. 

217 pattern_str = pattern_str[1:] 

218 else: 

219 include = True 

220 

221 # Split pattern into segments. 

222 pattern_segs = pattern_str.split('/') 

223 

224 # Check whether the pattern is specifically a directory pattern before 

225 # normalization. 

226 is_dir_pattern = not pattern_segs[-1] 

227 

228 # Normalize pattern to make processing easier. 

229 try: 

230 pattern_segs, override_regex = cls.__normalize_segments( 

231 is_dir_pattern, pattern_segs, 

232 ) 

233 except ValueError as e: 

234 raise GitIgnorePatternError(( 

235 f"Invalid git pattern: {original_pattern!r}" 

236 )) from e # GitIgnorePatternError 

237 

238 if override_regex is not None: 

239 # Use regex override. 

240 regex = override_regex 

241 

242 elif pattern_segs is not None: 

243 # Build regular expression from pattern. 

244 try: 

245 regex_parts = cls.__translate_segments(is_dir_pattern, pattern_segs) 

246 except ValueError as e: 

247 raise GitIgnorePatternError(( 

248 f"Invalid git pattern: {original_pattern!r}" 

249 )) from e # GitIgnorePatternError 

250 

251 regex = ''.join(regex_parts) 

252 

253 else: 

254 assert_unreachable(( 

255 f"{override_regex=} and {pattern_segs=} cannot both be null." 

256 )) # assert_unreachable 

257 

258 # Encode regex if needed. 

259 out_regex: AnyStr 

260 if regex is not None and return_type is bytes: 

261 out_regex = regex.encode(_BYTES_ENCODING) 

262 else: 

263 out_regex = regex 

264 

265 return (out_regex, include) 

266 

267 @classmethod 

268 def __translate_segments( 

269 cls, 

270 is_dir_pattern: bool, 

271 pattern_segs: list[str], 

272 ) -> list[str]: 

273 """ 

274 Translate the pattern segments to regular expressions. 

275 

276 *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory 

277 pattern (i.e., ends with a slash '/'). 

278 

279 *pattern_segs* (:class:`list` of :class:`str`) contains the pattern 

280 segments. 

281 

282 Returns the regular expression parts (:class:`list` of :class:`str`). 

283 """ 

284 # Build regular expression from pattern. 

285 out_parts = [] 

286 need_slash = False 

287 end = len(pattern_segs) - 1 

288 for i, seg in enumerate(pattern_segs): 

289 if seg == '**': 

290 if i == 0: 

291 # A normalized pattern beginning with double-asterisks ('**') will 

292 # match any leading path segments. 

293 out_parts.append('^(?:.+/)?') 

294 

295 elif i < end: 

296 # A pattern with inner double-asterisks ('**') will match multiple (or 

297 # zero) inner path segments. 

298 out_parts.append('(?:/.+)?') 

299 need_slash = True 

300 

301 else: 

302 assert i == end, (i, end) 

303 # A normalized pattern ending with double-asterisks ('**') will match 

304 # any trailing path segments. 

305 if is_dir_pattern: 

306 out_parts.append(_DIR_MARK_CG) 

307 else: 

308 out_parts.append('/') 

309 

310 else: 

311 # Match path segment. 

312 if i == 0: 

313 # Anchor to root directory. 

314 out_parts.append('^') 

315 

316 if need_slash: 

317 out_parts.append('/') 

318 

319 if seg == '*': 

320 # Match whole path segment. 

321 out_parts.append('[^/]+') 

322 

323 else: 

324 # Match segment glob pattern. 

325 out_parts.append(cls._translate_segment_glob(seg)) 

326 

327 if i == end: 

328 # A pattern ending without a slash ('/') will match a file or a 

329 # directory (with paths underneath it). E.g., "foo" matches "foo", 

330 # "foo/bar", "foo/bar/baz", etc. 

331 out_parts.append(_DIR_MARK_OPT) 

332 

333 need_slash = True 

334 

335 return out_parts