Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathspec/_backends/re2/gitignore.py: 25%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

72 statements  

1""" 

2This module provides the :module:`re2` backend for :class:`~pathspec.gitignore.GitIgnoreSpec`. 

3 

4WARNING: The *pathspec._backends.re2* package is not part of the public API. Its 

5contents and structure are likely to change. 

6""" 

7from __future__ import annotations 

8 

9from typing import ( 

10 Callable, # Replaced by `collections.abc.Callable` in 3.9.2. 

11 Optional, # Replaced by `X | None` in 3.10. 

12 Union) # Replaced by `X | Y` in 3.10. 

13 

14try: 

15 import re2 

16except ModuleNotFoundError: 

17 re2 = None # type: ignore[assignment] 

18 

19from pathspec.pattern import ( 

20 RegexPattern) 

21from pathspec.patterns.gitignore.spec import ( 

22 GitIgnoreSpecPattern, 

23 _BYTES_ENCODING, 

24 _DIR_MARK_CG, 

25 _DIR_MARK_OPT) 

26from pathspec._typing import ( 

27 override) # Added in 3.12. 

28 

29from ._base import ( 

30 Re2RegexDat, 

31 Re2RegexDebug) 

32from .pathspec import ( 

33 Re2PsBackend) 

34 

35 

36class Re2GiBackend(Re2PsBackend): 

37 """ 

38 The :class:`Re2GiBackend` class is the :module:`re2` implementation used by 

39 :class:`~pathspec.gitignore.GitIgnoreSpec` for matching files. 

40 """ 

41 

42 @override 

43 @staticmethod 

44 def _init_set( 

45 debug: bool, 

46 patterns: dict[int, RegexPattern], 

47 regex_set: re2.Set, # type: ignore 

48 sort_indices: Optional[Callable[[list[int]], None]], 

49 ) -> list[Re2RegexDat]: 

50 """ 

51 Create the re2 regex set. 

52 

53 *debug* (:class:`bool`) is whether to include additional debugging 

54 information for the regular expressions. 

55 

56 *patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern 

57 (:class:`.RegexPattern`). 

58 

59 *regex_set* (:class:`re2.Set`) is the regex set. 

60 

61 *sort_indices* (:class:`callable` or :data:`None`) is a function used to 

62 sort the patterns by index. This is used during testing to ensure the order 

63 of patterns is not accidentally relied on. 

64 

65 Returns a :class:`list` indexed by regex id (:class:`int`) to its data 

66 (:class:`Re2RegexDat`). 

67 """ 

68 # Sort patterns. 

69 indices = list(patterns.keys()) 

70 if sort_indices is not None: 

71 sort_indices(indices) 

72 

73 # Prepare patterns. 

74 regex_data: list[Re2RegexDat] = [] 

75 for pattern_index in indices: 

76 pattern = patterns[pattern_index] 

77 if pattern.include is None: 

78 continue 

79 

80 assert pattern.regex is not None, pattern 

81 assert isinstance(pattern, RegexPattern), pattern 

82 regex = pattern.regex.pattern 

83 

84 use_regexes: list[tuple[Union[str, bytes], bool]] = [] 

85 if isinstance(pattern, GitIgnoreSpecPattern): 

86 # GitIgnoreSpecPattern uses capture groups for its directory marker. Re2 

87 # supports capture groups, but they cannot be utilized when using 

88 # `re2.Set`. Handle this scenario. 

89 regex_str: str 

90 if isinstance(regex, str): 

91 regex_str = regex 

92 else: 

93 assert isinstance(regex, bytes), regex 

94 regex_str = regex.decode(_BYTES_ENCODING) 

95 

96 if _DIR_MARK_CG in regex_str: 

97 # Found directory marker. 

98 if regex_str.endswith(_DIR_MARK_OPT): 

99 # Regex has optional directory marker. Split regex into directory 

100 # and file variants. 

101 base_regex = regex_str[:-len(_DIR_MARK_OPT)] 

102 use_regexes.append((f'{base_regex}/', True)) 

103 use_regexes.append((f'{base_regex}$', False)) 

104 else: 

105 # Remove capture group. 

106 base_regex = regex_str.replace(_DIR_MARK_CG, '/') 

107 use_regexes.append((base_regex, True)) 

108 

109 if not use_regexes: 

110 # No special case for regex. 

111 use_regexes.append((regex, False)) 

112 

113 for regex, is_dir_pattern in use_regexes: 

114 if debug: 

115 regex_data.append(Re2RegexDebug( 

116 include=pattern.include, 

117 index=pattern_index, 

118 is_dir_pattern=is_dir_pattern, 

119 regex=regex, 

120 )) 

121 else: 

122 regex_data.append(Re2RegexDat( 

123 include=pattern.include, 

124 index=pattern_index, 

125 is_dir_pattern=is_dir_pattern, 

126 )) 

127 

128 regex_set.Add(regex) # type: ignore[type-var] 

129 

130 # Compile patterns. 

131 regex_set.Compile() 

132 return regex_data 

133 

134 @override 

135 def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]: 

136 """ 

137 Check the file against the patterns. 

138 

139 *file* (:class:`str`) is the normalized file path to check. 

140 

141 Returns a :class:`tuple` containing whether to include *file* (:class:`bool` 

142 or :data:`None`), and the index of the last matched pattern (:class:`int` or 

143 :data:`None`). 

144 """ 

145 # Find best match. 

146 match_ids: Optional[list[int]] = self._set.Match(file) # type: ignore[assignment] 

147 if not match_ids: 

148 return (None, None) 

149 

150 out_include: Optional[bool] = None 

151 out_index: int = -1 

152 out_priority = -1 

153 

154 regex_data = self._regex_data 

155 for regex_id in match_ids: 

156 regex_dat = regex_data[regex_id] 

157 

158 is_dir_pattern = regex_dat.is_dir_pattern 

159 if is_dir_pattern: 

160 # Pattern matched by a directory pattern. 

161 priority = 1 

162 else: 

163 # Pattern matched by a file pattern. 

164 priority = 2 

165 

166 # WARNING: According to the documentation on `RE2::Set::Match()`, there is 

167 # no guarantee matches will be produced in order! 

168 include = regex_dat.include 

169 index = regex_dat.index 

170 if ( 

171 (include and is_dir_pattern and index > out_index) 

172 or (priority == out_priority and index > out_index) 

173 or priority > out_priority 

174 ): 

175 out_include = include 

176 out_index = index 

177 out_priority = priority 

178 

179 assert out_index != -1, (out_index, out_include, out_priority) 

180 return (out_include, out_index)