Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathspec/patterns/gitignore/spec.py: 95%

1"""

2This module provides :class:`GitIgnoreSpecPattern` which implements Git's

3`gitignore`_ patterns, and handles edge-cases where Git's behavior differs from

4what's documented. Git allows including files from excluded directories which

5appears to contradict the documentation. This is used by

6:class:`~pathspec.gitignore.GitIgnoreSpec` to fully replicate Git's handling.

8.. _`gitignore`: https://git-scm.com/docs/gitignore

9"""

11from typing import (

12 Optional) # Replaced by `X | None` in 3.10.

14from pathspec._typing import (

15 AnyStr, # Removed in 3.18.

16 assert_unreachable,

17 override) # Added in 3.12.

19from .base import (

20 GitIgnorePatternError,

21 _BYTES_ENCODING,

22 _GitIgnoreBasePattern)

24_DIR_MARK = 'ps_d'

25"""

26The regex group name for the directory marker. This is only used by

27:class:`GitIgnoreSpec`.

28"""

30_DIR_MARK_CG = f'(?P<{_DIR_MARK}>/)'

31"""

32This regular expression matches the directory marker.

33"""

35_DIR_MARK_OPT = f'(?:{_DIR_MARK_CG}|$)'

36"""

37This regular expression matches the optional directory marker and sub-path.

38"""

41class GitIgnoreSpecPattern(_GitIgnoreBasePattern):

42 """

43 The :class:`GitIgnoreSpecPattern` class represents a compiled gitignore

44 pattern with special handling for edge-cases to replicate Git's behavior.

46 This is registered under the deprecated name "gitwildmatch" for backward

47 compatibility with v0.12. The registered name will be removed in a future

48 version.

49 """

51 # Keep the dict-less class hierarchy.

52 __slots__ = ()

54 @staticmethod

55 def __normalize_segments(

56 is_dir_pattern: bool,

57 pattern_segs: list[str],

58 ) -> tuple[Optional[list[str]], Optional[str]]:

59 """

60 Normalize the pattern segments to make processing easier.

62 *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory

63 pattern (i.e., ends with a slash '/').

65 *pattern_segs* (:class:`list` of :class:`str`) contains the pattern

66 segments. This may be modified in place.

68 Returns a :class:`tuple` containing either:

70 - The normalized segments (:class:`list` of :class:`str`; or :data:`None`).

72 - The regular expression override (:class:`str` or :data:`None`).

73 """

74 if not pattern_segs[0]:

75 # A pattern beginning with a slash ('/') should match relative to the root

76 # directory. Remove the empty first segment to make the pattern relative

77 # to root.

78 del pattern_segs[0]

80 elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):

81 # A single segment pattern with or without a trailing slash ('/') will

82 # match any descendant path. This is equivalent to "**/{pattern}". Prepend

83 # double-asterisk segment to make pattern relative to root.

84 if pattern_segs[0] != '**':

85 pattern_segs.insert(0, '**')

87 else:

88 # A pattern without a beginning slash ('/') but contains at least one

89 # prepended directory (e.g., "dir/{pattern}") should match relative to the

90 # root directory. No segment modification is needed.

91 pass

93 if not pattern_segs:

94 # After normalization, we end up with no pattern at all. This must be

95 # because the pattern is invalid.

96 raise ValueError("Pattern normalized to nothing.")

98 if not pattern_segs[-1]:

99 # A pattern ending with a slash ('/') will match all descendant paths if

100 # it is a directory but not if it is a regular file. This is equivalent to

101 # "{pattern}/**". Set empty last segment to a double-asterisk to include

102 # all descendants.

103 pattern_segs[-1] = '**'

104

105 # EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**').

106 # Iterate over the segments in reverse order and remove the duplicate double

107 # asterisks as we go.

108 for i in range(len(pattern_segs) - 1, 0, -1):

109 prev = pattern_segs[i-1]

110 seg = pattern_segs[i]

111 if prev == '**' and seg == '**':

112 del pattern_segs[i]

113

114 seg_count = len(pattern_segs)

115 if seg_count == 1 and pattern_segs[0] == '**':

116 if is_dir_pattern:

117 # The pattern "**/" will be normalized to "**", but it should match

118 # everything except for files in the root. Special case this pattern.

119 return (None, _DIR_MARK_CG)

120 else:

121 # The pattern "**" will match every path. Special case this pattern.

122 return (None, '.')

123

124 elif (

125 seg_count == 2

126 and pattern_segs[0] == '**'

127 and pattern_segs[1] == '*'

128 ):

129 # The pattern "*" will be normalized to "**/*" and will match every

130 # path. Special case this pattern for efficiency.

131 return (None, '.')

132

133 elif (

134 seg_count == 3

135 and pattern_segs[0] == '**'

136 and pattern_segs[1] == '*'

137 and pattern_segs[2] == '**'

138 ):

139 # The pattern "*/" will be normalized to "**/*/**" which will match every

140 # file not in the root directory. Special case this pattern for

141 # efficiency.

142 if is_dir_pattern:

143 return (None, _DIR_MARK_CG)

144 else:

145 return (None, '/')

146

147 # No regular expression override, return modified pattern segments.

148 return (pattern_segs, None)

149

150 @override

151 @classmethod

152 def pattern_to_regex(

153 cls,

154 pattern: AnyStr,

155 ) -> tuple[Optional[AnyStr], Optional[bool]]:

156 """

157 Convert the pattern into a regular expression.

158

159 *pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a

160 regular expression.

161

162 Returns a :class:`tuple` containing:

163

164 - *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the

165 uncompiled regular expression.

166

167 - *include* (:class:`bool` or :data:`None`) is whether matched files

168 should be included (:data:`True`), excluded (:data:`False`), or is a

169 null-operation (:data:`None`).

170 """

171 if isinstance(pattern, str):

172 pattern_str = pattern

173 return_type = str

174 elif isinstance(pattern, bytes):

175 pattern_str = pattern.decode(_BYTES_ENCODING)

176 return_type = bytes

177 else:

178 raise TypeError(f"{pattern=!r} is not a unicode or byte string.")

179

180 original_pattern = pattern_str

181 del pattern

182

183 if pattern_str.endswith('\\ '):

184 # EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends

185 # with a backslash is followed by a space, do not strip from the left.

186 pass

187 else:

188 # EDGE CASE: Leading spaces should be kept (only trailing spaces should be

189 # removed). Git does not remove leading spaces.

190 pattern_str = pattern_str.rstrip()

191

192 regex: Optional[str]

193 include: Optional[bool]

194

195 if not pattern_str:

196 # A blank pattern is a null-operation (neither includes nor excludes

197 # files).

198 return (None, None)

199

200 elif pattern_str.startswith('#'):

201 # A pattern starting with a hash ('#') serves as a comment (neither

202 # includes nor excludes files). Escape the hash with a backslash to match

203 # a literal hash (i.e., '\#').

204 return (None, None)

205

206 elif pattern_str == '/':

207 # EDGE CASE: According to `git check-ignore` (v2.4.1), a single '/' does

208 # not match any file.

209 return (None, None)

210

211 if pattern_str.startswith('!'):

212 # A pattern starting with an exclamation mark ('!') negates the pattern

213 # (exclude instead of include). Escape the exclamation mark with a back

214 # slash to match a literal exclamation mark (i.e., '\!').

215 include = False

216 # Remove leading exclamation mark.

217 pattern_str = pattern_str[1:]

218 else:

219 include = True

220

221 # Split pattern into segments.

222 pattern_segs = pattern_str.split('/')

223

224 # Check whether the pattern is specifically a directory pattern before

225 # normalization.

226 is_dir_pattern = not pattern_segs[-1]

227

228 # Normalize pattern to make processing easier.

229 try:

230 pattern_segs, override_regex = cls.__normalize_segments(

231 is_dir_pattern, pattern_segs,

232 )

233 except ValueError as e:

234 raise GitIgnorePatternError((

235 f"Invalid git pattern: {original_pattern!r}"

236 )) from e # GitIgnorePatternError

237

238 if override_regex is not None:

239 # Use regex override.

240 regex = override_regex

241

242 elif pattern_segs is not None:

243 # Build regular expression from pattern.

244 try:

245 regex_parts = cls.__translate_segments(is_dir_pattern, pattern_segs)

246 except ValueError as e:

247 raise GitIgnorePatternError((

248 f"Invalid git pattern: {original_pattern!r}"

249 )) from e # GitIgnorePatternError

250

251 regex = ''.join(regex_parts)

252

253 else:

254 assert_unreachable((

255 f"{override_regex=} and {pattern_segs=} cannot both be null."

256 )) # assert_unreachable

257

258 # Encode regex if needed.

259 out_regex: AnyStr

260 if regex is not None and return_type is bytes:

261 out_regex = regex.encode(_BYTES_ENCODING)

262 else:

263 out_regex = regex

264

265 return (out_regex, include)

266

267 @classmethod

268 def __translate_segments(

269 cls,

270 is_dir_pattern: bool,

271 pattern_segs: list[str],

272 ) -> list[str]:

273 """

274 Translate the pattern segments to regular expressions.

275

276 *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory

277 pattern (i.e., ends with a slash '/').

278

279 *pattern_segs* (:class:`list` of :class:`str`) contains the pattern

280 segments.

281

282 Returns the regular expression parts (:class:`list` of :class:`str`).

283 """

284 # Build regular expression from pattern.

285 out_parts = []

286 need_slash = False

287 end = len(pattern_segs) - 1

288 for i, seg in enumerate(pattern_segs):

289 if seg == '**':

290 if i == 0:

291 # A normalized pattern beginning with double-asterisks ('**') will

292 # match any leading path segments.

293 out_parts.append('^(?:.+/)?')

294

295 elif i < end:

296 # A pattern with inner double-asterisks ('**') will match multiple (or

297 # zero) inner path segments.

298 out_parts.append('(?:/.+)?')

299 need_slash = True

300

301 else:

302 assert i == end, (i, end)

303 # A normalized pattern ending with double-asterisks ('**') will match

304 # any trailing path segments.

305 if is_dir_pattern:

306 out_parts.append(_DIR_MARK_CG)

307 else:

308 out_parts.append('/')

309

310 else:

311 # Match path segment.

312 if i == 0:

313 # Anchor to root directory.

314 out_parts.append('^')

315

316 if need_slash:

317 out_parts.append('/')

318

319 if seg == '*':

320 # Match whole path segment.

321 out_parts.append('[^/]+')

322

323 else:

324 # Match segment glob pattern.

325 out_parts.append(cls._translate_segment_glob(seg))

326

327 if i == end:

328 # A pattern ending without a slash ('/') will match a file or a

329 # directory (with paths underneath it). E.g., "foo" matches "foo",

330 # "foo/bar", "foo/bar/baz", etc.

331 out_parts.append(_DIR_MARK_OPT)

332

333 need_slash = True

334

335 return out_parts