1"""
2This module provides the :module:`re2` backend for :class:`~pathspec.pathspec.PathSpec`.
3
4WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
5contents and structure are likely to change.
6"""
7from __future__ import annotations
8
9from collections.abc import (
10 Sequence)
11from typing import (
12 Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
13 Optional) # Replaced by `X | None` in 3.10.
14
15try:
16 import re2
17except ModuleNotFoundError:
18 re2 = None
19
20from pathspec.backend import (
21 _Backend)
22from pathspec.pattern import (
23 RegexPattern)
24from pathspec._typing import (
25 override) # Added in 3.12.
26
27from .._utils import (
28 enumerate_patterns)
29
30from .base import (
31 re2_error)
32from ._base import (
33 RE2_OPTIONS,
34 Re2RegexDat,
35 Re2RegexDebug)
36
37
38class Re2PsBackend(_Backend):
39 """
40 The :class:`Re2PsBackend` class is the :module:`re2` implementation used by
41 :class:`~pathspec.pathspec.PathSpec` for matching files.
42 """
43
44 def __init__(
45 self,
46 patterns: Sequence[RegexPattern],
47 *,
48 _debug_regex: Optional[bool] = None,
49 _test_sort: Optional[Callable[[list], None]] = None,
50 ) -> None:
51 """
52 Initialize the :class:`Re2PsBackend` instance.
53
54 *patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the
55 compiled patterns.
56 """
57 if re2_error is not None:
58 raise re2_error
59
60 if patterns and not isinstance(patterns[0], RegexPattern):
61 raise TypeError(f"{patterns[0]=!r} must be a RegexPattern.")
62
63 use_patterns = dict(enumerate_patterns(
64 patterns, filter=True, reverse=False,
65 ))
66 regex_set = self._make_set()
67
68 self._debug_regex = bool(_debug_regex)
69 """
70 *_debug_regex* (:class:`bool`) is whether to include additional debugging
71 information for the regular expressions.
72 """
73
74 self._patterns: dict[int, RegexPattern] = use_patterns
75 """
76 *_patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
77 (:class:`RegexPattern`).
78 """
79
80 self._regex_data: list[Re2RegexDat] = self._init_set(
81 debug=self._debug_regex,
82 patterns=use_patterns,
83 regex_set=regex_set,
84 sort_indices=_test_sort,
85 )
86 """
87 *_regex_data* (:class:`list`) maps regex index (:class:`int`) to regex data
88 (:class:`Re2RegexDat`).
89 """
90
91 self._set: re2.Set = regex_set
92 """
93 *_set* (:class:`re2.Set`) is the re2 regex set.
94 """
95
96 @staticmethod
97 def _init_set(
98 debug: bool,
99 patterns: dict[int, RegexPattern],
100 regex_set: re2.Set,
101 sort_indices: Optional[Callable[[list[int]], None]],
102 ) -> list[Re2RegexDat]:
103 """
104 Create the re2 regex set.
105
106 *debug* (:class:`bool`) is whether to include additional debugging
107 information for the regular expressions.
108
109 *patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
110 (:class:`.RegexPattern`).
111
112 *regex_set* (:class:`re2.Set`) is the regex set.
113
114 *sort_indices* (:class:`callable` or :data:`None`) is a function used to
115 sort the patterns by index. This is used during testing to ensure the order
116 of patterns is not accidentally relied on.
117
118 Returns a :class:`list` indexed by regex id (:class:`int`) to its data
119 (:class:`Re2RegexDat`).
120 """
121 # Sort patterns.
122 indices = list(patterns.keys())
123 if sort_indices is not None:
124 sort_indices(indices)
125
126 # Prepare patterns.
127 regex_data: list[Re2RegexDat] = []
128 for pattern_index in indices:
129 pattern = patterns[pattern_index]
130 if pattern.include is None:
131 continue
132
133 assert isinstance(pattern, RegexPattern), pattern
134 regex = pattern.regex.pattern
135
136 if debug:
137 regex_data.append(Re2RegexDebug(
138 include=pattern.include,
139 index=pattern_index,
140 is_dir_pattern=False,
141 regex=regex,
142 ))
143 else:
144 regex_data.append(Re2RegexDat(
145 include=pattern.include,
146 index=pattern_index,
147 is_dir_pattern=False,
148 ))
149
150 regex_set.Add(regex)
151
152 # Compile patterns.
153 regex_set.Compile()
154 return regex_data
155
156 @staticmethod
157 def _make_set() -> re2.Set:
158 """
159 Create the re2 regex set.
160
161 Returns the set (:class:`re2.Set`).
162 """
163 return re2.Set.SearchSet(RE2_OPTIONS)
164
165 @override
166 def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
167 """
168 Check the file against the patterns.
169
170 *file* (:class:`str`) is the normalized file path to check.
171
172 Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
173 or :data:`None`), and the index of the last matched pattern (:class:`int` or
174 :data:`None`).
175 """
176 # Find best match.
177 # - WARNING: According to the documentation on `RE2::Set::Match()`, there is
178 # no guarantee matches will be produced in order! Later expressions have
179 # higher priority.
180 match_ids: Optional[list[int]] = self._set.Match(file)
181 if not match_ids:
182 return (None, None)
183
184 regex_data = self._regex_data
185 pattern_index = max(regex_data[__id].index for __id in match_ids)
186 pattern = self._patterns[pattern_index]
187 return (pattern.include, pattern_index)