Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathspec/patterns/gitignore/spec.py: 95%

1"""

2This module provides :class:`GitIgnoreSpecPattern` which implements Git's

3`gitignore`_ patterns, and handles edge-cases where Git's behavior differs from

4what's documented. Git allows including files from excluded directories which

5appears to contradict the documentation. Git discards patterns with invalid

6range notation. This is used by :class:`~pathspec.gitignore.GitIgnoreSpec` to

7fully replicate Git's handling.

9.. _`gitignore`: https://git-scm.com/docs/gitignore

10"""

12from typing import (

13 Optional) # Replaced by `X | None` in 3.10.

15from pathspec._typing import (

16 AnyStr, # Removed in 3.18.

17 assert_unreachable,

18 override) # Added in 3.12.

20from .base import (

21 GitIgnorePatternError,

22 _BYTES_ENCODING,

23 _GitIgnoreBasePattern,

24 _RangeError)

26_DIR_MARK = 'ps_d'

27"""

28The regex group name for the directory marker. This is only used by

29:class:`GitIgnoreSpec`.

30"""

32_DIR_MARK_CG = f'(?P<{_DIR_MARK}>/)'

33"""

34This regular expression matches the directory marker.

35"""

37_DIR_MARK_OPT = f'(?:{_DIR_MARK_CG}|$)'

38"""

39This regular expression matches the optional directory marker and sub-path.

40"""

43class GitIgnoreSpecPattern(_GitIgnoreBasePattern):

44 """

45 The :class:`GitIgnoreSpecPattern` class represents a compiled gitignore

46 pattern with special handling for edge-cases to replicate Git's behavior.

48 This is registered under the deprecated name "gitwildmatch" for backward

49 compatibility with v0.12. The registered name will be removed in a future

50 version.

51 """

53 # Keep the dict-less class hierarchy.

54 __slots__ = ()

56 @staticmethod

57 def __normalize_segments(

58 is_dir_pattern: bool,

59 pattern_segs: list[str],

60 ) -> tuple[Optional[list[str]], Optional[str]]:

61 """

62 Normalize the pattern segments to make processing easier.

64 *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory

65 pattern (i.e., ends with a slash '/').

67 *pattern_segs* (:class:`list` of :class:`str`) contains the pattern

68 segments. This may be modified in place.

70 Returns a :class:`tuple` containing either:

72 - The normalized segments (:class:`list` of :class:`str`; or :data:`None`).

74 - The regular expression override (:class:`str` or :data:`None`).

75 """

76 if not pattern_segs[0]:

77 # A pattern beginning with a slash ('/') should match relative to the root

78 # directory. Remove the empty first segment to make the pattern relative

79 # to root.

80 del pattern_segs[0]

82 elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):

83 # A single segment pattern with or without a trailing slash ('/') will

84 # match any descendant path. This is equivalent to "**/{pattern}". Prepend

85 # a double-asterisk segment to make the pattern relative to root.

86 if pattern_segs[0] != '**':

87 pattern_segs.insert(0, '**')

89 else:

90 # A pattern without a beginning slash ('/') but contains at least one

91 # prepended directory (e.g., "dir/{pattern}") should match relative to the

92 # root directory. No segment modification is needed.

93 pass

95 if not pattern_segs:

96 # After normalization, we end up with no pattern at all. This must be

97 # because the pattern is invalid.

98 raise ValueError("Pattern normalized to nothing.")

100 if not pattern_segs[-1]:

101 # A pattern ending with a slash ('/') will match all descendant paths if

102 # it is a directory but not if it is a regular file. This is equivalent to

103 # "{pattern}/**". Set the empty last segment to a double-asterisk to

104 # include all descendants.

105 pattern_segs[-1] = '**'

106

107 # EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**').

108 # Iterate over the segments in reverse order and remove the duplicate double

109 # asterisks as we go.

110 for i in range(len(pattern_segs) - 1, 0, -1):

111 prev = pattern_segs[i-1]

112 seg = pattern_segs[i]

113 if prev == '**' and seg == '**':

114 del pattern_segs[i]

115

116 seg_count = len(pattern_segs)

117 if seg_count == 1 and pattern_segs[0] == '**':

118 if is_dir_pattern:

119 # The pattern "**/" will be normalized to "**", but it should match

120 # everything except for files in the root. Special case this pattern.

121 return (None, _DIR_MARK_CG)

122 else:

123 # The pattern "**" will match every path. Special case this pattern.

124 return (None, '.')

125

126 elif (

127 seg_count == 2

128 and pattern_segs[0] == '**'

129 and pattern_segs[1] == '*'

130 ):

131 # The pattern "*" will be normalized to "**/*" and will match every

132 # path. Special case this pattern for efficiency.

133 return (None, '.')

134

135 elif (

136 seg_count == 3

137 and pattern_segs[0] == '**'

138 and pattern_segs[1] == '*'

139 and pattern_segs[2] == '**'

140 ):

141 # The pattern "*/" will be normalized to "**/*/**" which will match every

142 # file not in the root directory. Special case this pattern for

143 # efficiency.

144 if is_dir_pattern:

145 return (None, _DIR_MARK_CG)

146 else:

147 return (None, '/')

148

149 # No regular expression override, return modified pattern segments.

150 return (pattern_segs, None)

151

152 @override

153 @classmethod

154 def pattern_to_regex(

155 cls,

156 pattern: AnyStr,

157 ) -> tuple[Optional[AnyStr], Optional[bool]]:

158 """

159 Convert the pattern into a regular expression.

160

161 *pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a

162 regular expression.

163

164 Returns a :class:`tuple` containing:

165

166 - *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the

167 uncompiled regular expression.

168

169 - *include* (:class:`bool` or :data:`None`) is whether matched files

170 should be included (:data:`True`), excluded (:data:`False`), or is a

171 null-operation (:data:`None`).

172 """

173 if isinstance(pattern, str):

174 pattern_str = pattern

175 return_type = str

176 elif isinstance(pattern, bytes):

177 pattern_str = pattern.decode(_BYTES_ENCODING)

178 return_type = bytes

179 else:

180 raise TypeError(f"{pattern=!r} is not a unicode or byte string.")

181

182 original_pattern = pattern_str

183 del pattern

184

185 if pattern_str.endswith('\\ '):

186 # EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends

187 # with a backslash is followed by a space, do not strip from the left.

188 pass

189 else:

190 # EDGE CASE: Leading spaces should be kept (only trailing spaces should be

191 # removed). Git does not remove leading spaces.

192 pattern_str = pattern_str.rstrip()

193

194 regex: Optional[str]

195 include: Optional[bool]

196

197 if not pattern_str:

198 # A blank pattern is a null-operation (neither includes nor excludes

199 # files).

200 return (None, None)

201

202 elif pattern_str.startswith('#'):

203 # A pattern starting with a hash ('#') serves as a comment (neither

204 # includes nor excludes files). Escape the hash with a backslash to match

205 # a literal hash (i.e., '\#').

206 return (None, None)

207

208 elif pattern_str == '/':

209 # EDGE CASE: According to `git check-ignore` (v2.4.1), a single '/' does

210 # not match any file.

211 return (None, None)

212

213 if pattern_str.startswith('!'):

214 # A pattern starting with an exclamation mark ('!') negates the pattern

215 # (exclude instead of include). Escape the exclamation mark with a

216 # backslash to match a literal exclamation mark (i.e., '\!').

217 include = False

218 # Remove leading exclamation mark.

219 pattern_str = pattern_str[1:]

220 else:

221 include = True

222

223 # Split pattern into segments.

224 pattern_segs = pattern_str.split('/')

225

226 # Check whether the pattern is specifically a directory pattern before

227 # normalization.

228 is_dir_pattern = not pattern_segs[-1]

229

230 # Normalize pattern to make processing easier.

231 try:

232 pattern_segs, override_regex = cls.__normalize_segments(

233 is_dir_pattern, pattern_segs,

234 )

235 except ValueError as e:

236 raise GitIgnorePatternError((

237 f"Invalid git pattern: {original_pattern!r}"

238 )) from e # GitIgnorePatternError

239

240 if override_regex is not None:

241 # Use regex override.

242 regex = override_regex

243

244 elif pattern_segs is not None:

245 # Build regular expression from pattern.

246 try:

247 regex_parts = cls.__translate_segments(is_dir_pattern, pattern_segs)

248 except _RangeError:

249 # EDGE CASE: Git discards patterns with invalid range notation.

250 return (None, None)

251 except ValueError as e:

252 raise GitIgnorePatternError((

253 f"Invalid git pattern: {original_pattern!r}"

254 )) from e # GitIgnorePatternError

255

256 regex = ''.join(regex_parts)

257

258 else:

259 assert_unreachable((

260 f"{override_regex=} and {pattern_segs=} cannot both be null."

261 )) # assert_unreachable

262

263 # Encode regex if needed.

264 out_regex: AnyStr

265 if regex is not None and return_type is bytes:

266 out_regex = regex.encode(_BYTES_ENCODING)

267 else:

268 out_regex = regex

269

270 return (out_regex, include)

271

272 @classmethod

273 def __translate_segments(

274 cls,

275 is_dir_pattern: bool,

276 pattern_segs: list[str],

277 ) -> list[str]:

278 """

279 Translate the pattern segments to regular expressions.

280

281 *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory

282 pattern (i.e., ends with a slash '/').

283

284 *pattern_segs* (:class:`list` of :class:`str`) contains the pattern

285 segments.

286

287 Raises :class:`_RangeError` if invalid range notation is found.

288

289 Returns the regular expression parts (:class:`list` of :class:`str`).

290 """

291 # Build regular expression from pattern.

292 out_parts = []

293 need_slash = False

294 end = len(pattern_segs) - 1

295 for i, seg in enumerate(pattern_segs):

296 if seg == '**':

297 if i == 0:

298 # A normalized pattern beginning with double-asterisks ('**') will

299 # match any leading path segments.

300 out_parts.append('^(?:.+/)?')

301

302 elif i < end:

303 # A pattern with inner double-asterisks ('**') will match multiple (or

304 # zero) inner path segments.

305 out_parts.append('(?:/.+)?')

306 need_slash = True

307

308 else:

309 assert i == end, (i, end)

310 # A normalized pattern ending with double-asterisks ('**') will match

311 # any trailing path segments.

312 if is_dir_pattern:

313 out_parts.append(_DIR_MARK_CG)

314 else:

315 out_parts.append('/')

316

317 else:

318 # Match path segment.

319 if i == 0:

320 # Anchor to root directory.

321 out_parts.append('^')

322

323 if need_slash:

324 out_parts.append('/')

325

326 if seg == '*':

327 # Match whole path segment.

328 out_parts.append('[^/]+')

329

330 else:

331 # Match segment glob pattern.

332 # - EDGE CASE: Git discards patterns with invalid range notation.

333 out_parts.append(cls._translate_segment_glob(seg, 'raise'))

334

335 if i == end:

336 # A pattern ending without a slash ('/') will match a file or a

337 # directory (with paths underneath it). E.g., "foo" matches "foo",

338 # "foo/bar", "foo/bar/baz", etc.

339 out_parts.append(_DIR_MARK_OPT)

340

341 need_slash = True

342

343 return out_parts