Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathspec/_backends/re2/pathspec.py: 35%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

62 statements  

1""" 

2This module provides the :module:`re2` backend for :class:`~pathspec.pathspec.PathSpec`. 

3 

4WARNING: The *pathspec._backends.re2* package is not part of the public API. Its 

5contents and structure are likely to change. 

6""" 

7from __future__ import annotations 

8 

9from collections.abc import ( 

10 Callable, 

11 Sequence) 

12from typing import ( 

13 Optional) # Replaced by `X | None` in 3.10. 

14 

15try: 

16 import re2 

17except ModuleNotFoundError: 

18 re2 = None 

19 

20from ...pattern import ( 

21 RegexPattern) 

22from ..._typing import ( 

23 override) # Added in 3.12. 

24 

25from ..base import ( 

26 Backend) 

27from .._utils import ( 

28 enumerate_patterns) 

29 

30from .base import ( 

31 re2_error) 

32from ._base import ( 

33 RE2_OPTIONS, 

34 Re2RegexDat, 

35 Re2RegexDebug) 

36 

37 

38class Re2PsBackend(Backend): 

39 """ 

40 The :class:`Re2PsBackend` class is the :module:`re2` implementation used by 

41 :class:`~pathspec.pathspec.PathSpec` for matching files. 

42 """ 

43 

44 def __init__( 

45 self, 

46 patterns: Sequence[RegexPattern], 

47 *, 

48 _debug_regex: Optional[bool] = None, 

49 _test_sort: Optional[Callable[[list], None]] = None, 

50 ) -> None: 

51 """ 

52 Initialize the :class:`Re2PsBackend` instance. 

53 

54 *patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the 

55 compiled patterns. 

56 """ 

57 if re2_error is not None: 

58 raise re2_error 

59 

60 if not patterns: 

61 raise ValueError(f"{patterns=!r} cannot be empty.") 

62 elif not isinstance(patterns[0], RegexPattern): 

63 raise TypeError(f"{patterns[0]=!r} must be a RegexPattern.") 

64 

65 use_patterns = dict(enumerate_patterns( 

66 patterns, filter=True, reverse=False, 

67 )) 

68 regex_set = self._make_set() 

69 

70 self._debug_regex = bool(_debug_regex) 

71 """ 

72 *_debug_regex* (:class:`bool`) is whether to include additional debugging 

73 information for the regular expressions. 

74 """ 

75 

76 self._patterns: dict[int, RegexPattern] = use_patterns 

77 """ 

78 *_patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern 

79 (:class:`RegexPattern`). 

80 """ 

81 

82 self._regex_data: list[Re2RegexDat] = self._init_set( 

83 debug=self._debug_regex, 

84 patterns=use_patterns, 

85 regex_set=regex_set, 

86 sort_indices=_test_sort, 

87 ) 

88 """ 

89 *_regex_data* (:class:`list`) maps regex index (:class:`int`) to regex data 

90 (:class:`Re2RegexDat`). 

91 """ 

92 

93 self._set: re2.Set = regex_set 

94 """ 

95 *_set* (:class:`re2.Set`) is the re2 regex set. 

96 """ 

97 

98 @staticmethod 

99 def _init_set( 

100 debug: bool, 

101 patterns: dict[int, RegexPattern], 

102 regex_set: re2.Set, 

103 sort_indices: Optional[Callable[[list[int]], None]], 

104 ) -> list[Re2RegexDat]: 

105 """ 

106 Create the re2 regex set. 

107 

108 *debug* (:class:`bool`) is whether to include additional debugging 

109 information for the regular expressions. 

110 

111 *patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern 

112 (:class:`.RegexPattern`). 

113 

114 *regex_set* (:class:`re2.Set`) is the regex set. 

115 

116 *sort_indices* (:class:`callable` or :data:`None`) is a function used to 

117 sort the patterns by index. This is used during testing to ensure the order 

118 of patterns is not accidentally relied on. 

119 

120 Returns a :class:`list` indexed by regex id (:class:`int`) to its data 

121 (:class:`Re2RegexDat`). 

122 """ 

123 # Sort patterns. 

124 indices = list(patterns.keys()) 

125 if sort_indices is not None: 

126 sort_indices(indices) 

127 

128 # Prepare patterns. 

129 regex_data: list[Re2RegexDat] = [] 

130 for pattern_index in indices: 

131 pattern = patterns[pattern_index] 

132 if pattern.include is None: 

133 continue 

134 

135 assert isinstance(pattern, RegexPattern), pattern 

136 regex = pattern.regex.pattern 

137 

138 if debug: 

139 regex_data.append(Re2RegexDebug( 

140 include=pattern.include, 

141 index=pattern_index, 

142 is_dir_pattern=False, 

143 regex=regex, 

144 )) 

145 else: 

146 regex_data.append(Re2RegexDat( 

147 include=pattern.include, 

148 index=pattern_index, 

149 is_dir_pattern=False, 

150 )) 

151 

152 regex_set.Add(regex) 

153 

154 # Compile patterns. 

155 regex_set.Compile() 

156 return regex_data 

157 

158 @staticmethod 

159 def _make_set() -> re2.Set: 

160 """ 

161 Create the re2 regex set. 

162 

163 Returns the set (:class:`re2.Set`). 

164 """ 

165 return re2.Set.SearchSet(RE2_OPTIONS) 

166 

167 @override 

168 def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]: 

169 """ 

170 Check the file against the patterns. 

171 

172 *file* (:class:`str`) is the normalized file path to check. 

173 

174 Returns a :class:`tuple` containing whether to include *file* (:class:`bool` 

175 or :data:`None`), and the index of the last matched pattern (:class:`int` or 

176 :data:`None`). 

177 """ 

178 # Find best match. 

179 # - WARNING: According to the documentation on `RE2::Set::Match()`, there is 

180 # no guarantee matches will be produced in order! Later expressions have 

181 # higher priority. 

182 match_ids: Optional[list[int]] = self._set.Match(file) 

183 if not match_ids: 

184 return (None, None) 

185 

186 regex_data = self._regex_data 

187 pattern_index = max(regex_data[__id].index for __id in match_ids) 

188 pattern = self._patterns[pattern_index] 

189 return (pattern.include, pattern_index)