1"""
2This module provides the :module:`re2` backend for :class:`~pathspec.pathspec.PathSpec`.
3
4WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
5contents and structure are likely to change.
6"""
7from __future__ import annotations
8
9from collections.abc import (
10 Callable,
11 Sequence)
12from typing import (
13 Optional) # Replaced by `X | None` in 3.10.
14
15try:
16 import re2
17except ModuleNotFoundError:
18 re2 = None
19
20from ...pattern import (
21 RegexPattern)
22from ..._typing import (
23 override) # Added in 3.12.
24
25from ..base import (
26 Backend)
27from .._utils import (
28 enumerate_patterns)
29
30from .base import (
31 re2_error)
32from ._base import (
33 RE2_OPTIONS,
34 Re2RegexDat,
35 Re2RegexDebug)
36
37
38class Re2PsBackend(Backend):
39 """
40 The :class:`Re2PsBackend` class is the :module:`re2` implementation used by
41 :class:`~pathspec.pathspec.PathSpec` for matching files.
42 """
43
44 def __init__(
45 self,
46 patterns: Sequence[RegexPattern],
47 *,
48 _debug_regex: Optional[bool] = None,
49 _test_sort: Optional[Callable[[list], None]] = None,
50 ) -> None:
51 """
52 Initialize the :class:`Re2PsBackend` instance.
53
54 *patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the
55 compiled patterns.
56 """
57 if re2_error is not None:
58 raise re2_error
59
60 if not patterns:
61 raise ValueError(f"{patterns=!r} cannot be empty.")
62 elif not isinstance(patterns[0], RegexPattern):
63 raise TypeError(f"{patterns[0]=!r} must be a RegexPattern.")
64
65 use_patterns = dict(enumerate_patterns(
66 patterns, filter=True, reverse=False,
67 ))
68 regex_set = self._make_set()
69
70 self._debug_regex = bool(_debug_regex)
71 """
72 *_debug_regex* (:class:`bool`) is whether to include additional debugging
73 information for the regular expressions.
74 """
75
76 self._patterns: dict[int, RegexPattern] = use_patterns
77 """
78 *_patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
79 (:class:`RegexPattern`).
80 """
81
82 self._regex_data: list[Re2RegexDat] = self._init_set(
83 debug=self._debug_regex,
84 patterns=use_patterns,
85 regex_set=regex_set,
86 sort_indices=_test_sort,
87 )
88 """
89 *_regex_data* (:class:`list`) maps regex index (:class:`int`) to regex data
90 (:class:`Re2RegexDat`).
91 """
92
93 self._set: re2.Set = regex_set
94 """
95 *_set* (:class:`re2.Set`) is the re2 regex set.
96 """
97
98 @staticmethod
99 def _init_set(
100 debug: bool,
101 patterns: dict[int, RegexPattern],
102 regex_set: re2.Set,
103 sort_indices: Optional[Callable[[list[int]], None]],
104 ) -> list[Re2RegexDat]:
105 """
106 Create the re2 regex set.
107
108 *debug* (:class:`bool`) is whether to include additional debugging
109 information for the regular expressions.
110
111 *patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
112 (:class:`.RegexPattern`).
113
114 *regex_set* (:class:`re2.Set`) is the regex set.
115
116 *sort_indices* (:class:`callable` or :data:`None`) is a function used to
117 sort the patterns by index. This is used during testing to ensure the order
118 of patterns is not accidentally relied on.
119
120 Returns a :class:`list` indexed by regex id (:class:`int`) to its data
121 (:class:`Re2RegexDat`).
122 """
123 # Sort patterns.
124 indices = list(patterns.keys())
125 if sort_indices is not None:
126 sort_indices(indices)
127
128 # Prepare patterns.
129 regex_data: list[Re2RegexDat] = []
130 for pattern_index in indices:
131 pattern = patterns[pattern_index]
132 if pattern.include is None:
133 continue
134
135 assert isinstance(pattern, RegexPattern), pattern
136 regex = pattern.regex.pattern
137
138 if debug:
139 regex_data.append(Re2RegexDebug(
140 include=pattern.include,
141 index=pattern_index,
142 is_dir_pattern=False,
143 regex=regex,
144 ))
145 else:
146 regex_data.append(Re2RegexDat(
147 include=pattern.include,
148 index=pattern_index,
149 is_dir_pattern=False,
150 ))
151
152 regex_set.Add(regex)
153
154 # Compile patterns.
155 regex_set.Compile()
156 return regex_data
157
158 @staticmethod
159 def _make_set() -> re2.Set:
160 """
161 Create the re2 regex set.
162
163 Returns the set (:class:`re2.Set`).
164 """
165 return re2.Set.SearchSet(RE2_OPTIONS)
166
167 @override
168 def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
169 """
170 Check the file against the patterns.
171
172 *file* (:class:`str`) is the normalized file path to check.
173
174 Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
175 or :data:`None`), and the index of the last matched pattern (:class:`int` or
176 :data:`None`).
177 """
178 # Find best match.
179 # - WARNING: According to the documentation on `RE2::Set::Match()`, there is
180 # no guarantee matches will be produced in order! Later expressions have
181 # higher priority.
182 match_ids: Optional[list[int]] = self._set.Match(file)
183 if not match_ids:
184 return (None, None)
185
186 regex_data = self._regex_data
187 pattern_index = max(regex_data[__id].index for __id in match_ids)
188 pattern = self._patterns[pattern_index]
189 return (pattern.include, pattern_index)