Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathspec/_backends/re2/pathspec.py: 37%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

60 statements  

1""" 

2This module provides the :module:`re2` backend for :class:`~pathspec.pathspec.PathSpec`. 

3 

4WARNING: The *pathspec._backends.re2* package is not part of the public API. Its 

5contents and structure are likely to change. 

6""" 

7from __future__ import annotations 

8 

9from collections.abc import ( 

10 Sequence) 

11from typing import ( 

12 Callable, # Replaced by `collections.abc.Callable` in 3.9.2. 

13 Optional) # Replaced by `X | None` in 3.10. 

14 

15try: 

16 import re2 

17except ModuleNotFoundError: 

18 re2 = None 

19 

20from pathspec.backend import ( 

21 _Backend) 

22from pathspec.pattern import ( 

23 RegexPattern) 

24from pathspec._typing import ( 

25 override) # Added in 3.12. 

26 

27from .._utils import ( 

28 enumerate_patterns) 

29 

30from .base import ( 

31 re2_error) 

32from ._base import ( 

33 RE2_OPTIONS, 

34 Re2RegexDat, 

35 Re2RegexDebug) 

36 

37 

38class Re2PsBackend(_Backend): 

39 """ 

40 The :class:`Re2PsBackend` class is the :module:`re2` implementation used by 

41 :class:`~pathspec.pathspec.PathSpec` for matching files. 

42 """ 

43 

44 def __init__( 

45 self, 

46 patterns: Sequence[RegexPattern], 

47 *, 

48 _debug_regex: Optional[bool] = None, 

49 _test_sort: Optional[Callable[[list], None]] = None, 

50 ) -> None: 

51 """ 

52 Initialize the :class:`Re2PsBackend` instance. 

53 

54 *patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the 

55 compiled patterns. 

56 """ 

57 if re2_error is not None: 

58 raise re2_error 

59 

60 if patterns and not isinstance(patterns[0], RegexPattern): 

61 raise TypeError(f"{patterns[0]=!r} must be a RegexPattern.") 

62 

63 use_patterns = dict(enumerate_patterns( 

64 patterns, filter=True, reverse=False, 

65 )) 

66 regex_set = self._make_set() 

67 

68 self._debug_regex = bool(_debug_regex) 

69 """ 

70 *_debug_regex* (:class:`bool`) is whether to include additional debugging 

71 information for the regular expressions. 

72 """ 

73 

74 self._patterns: dict[int, RegexPattern] = use_patterns 

75 """ 

76 *_patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern 

77 (:class:`RegexPattern`). 

78 """ 

79 

80 self._regex_data: list[Re2RegexDat] = self._init_set( 

81 debug=self._debug_regex, 

82 patterns=use_patterns, 

83 regex_set=regex_set, 

84 sort_indices=_test_sort, 

85 ) 

86 """ 

87 *_regex_data* (:class:`list`) maps regex index (:class:`int`) to regex data 

88 (:class:`Re2RegexDat`). 

89 """ 

90 

91 self._set: re2.Set = regex_set 

92 """ 

93 *_set* (:class:`re2.Set`) is the re2 regex set. 

94 """ 

95 

96 @staticmethod 

97 def _init_set( 

98 debug: bool, 

99 patterns: dict[int, RegexPattern], 

100 regex_set: re2.Set, 

101 sort_indices: Optional[Callable[[list[int]], None]], 

102 ) -> list[Re2RegexDat]: 

103 """ 

104 Create the re2 regex set. 

105 

106 *debug* (:class:`bool`) is whether to include additional debugging 

107 information for the regular expressions. 

108 

109 *patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern 

110 (:class:`.RegexPattern`). 

111 

112 *regex_set* (:class:`re2.Set`) is the regex set. 

113 

114 *sort_indices* (:class:`callable` or :data:`None`) is a function used to 

115 sort the patterns by index. This is used during testing to ensure the order 

116 of patterns is not accidentally relied on. 

117 

118 Returns a :class:`list` indexed by regex id (:class:`int`) to its data 

119 (:class:`Re2RegexDat`). 

120 """ 

121 # Sort patterns. 

122 indices = list(patterns.keys()) 

123 if sort_indices is not None: 

124 sort_indices(indices) 

125 

126 # Prepare patterns. 

127 regex_data: list[Re2RegexDat] = [] 

128 for pattern_index in indices: 

129 pattern = patterns[pattern_index] 

130 if pattern.include is None: 

131 continue 

132 

133 assert isinstance(pattern, RegexPattern), pattern 

134 regex = pattern.regex.pattern 

135 

136 if debug: 

137 regex_data.append(Re2RegexDebug( 

138 include=pattern.include, 

139 index=pattern_index, 

140 is_dir_pattern=False, 

141 regex=regex, 

142 )) 

143 else: 

144 regex_data.append(Re2RegexDat( 

145 include=pattern.include, 

146 index=pattern_index, 

147 is_dir_pattern=False, 

148 )) 

149 

150 regex_set.Add(regex) 

151 

152 # Compile patterns. 

153 regex_set.Compile() 

154 return regex_data 

155 

156 @staticmethod 

157 def _make_set() -> re2.Set: 

158 """ 

159 Create the re2 regex set. 

160 

161 Returns the set (:class:`re2.Set`). 

162 """ 

163 return re2.Set.SearchSet(RE2_OPTIONS) 

164 

165 @override 

166 def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]: 

167 """ 

168 Check the file against the patterns. 

169 

170 *file* (:class:`str`) is the normalized file path to check. 

171 

172 Returns a :class:`tuple` containing whether to include *file* (:class:`bool` 

173 or :data:`None`), and the index of the last matched pattern (:class:`int` or 

174 :data:`None`). 

175 """ 

176 # Find best match. 

177 # - WARNING: According to the documentation on `RE2::Set::Match()`, there is 

178 # no guarantee matches will be produced in order! Later expressions have 

179 # higher priority. 

180 match_ids: Optional[list[int]] = self._set.Match(file) 

181 if not match_ids: 

182 return (None, None) 

183 

184 regex_data = self._regex_data 

185 pattern_index = max(regex_data[__id].index for __id in match_ids) 

186 pattern = self._patterns[pattern_index] 

187 return (pattern.include, pattern_index)