/rust/registry/src/index.crates.io-1949cf8c6b5b557f/regex-1.5.6/src/re_builder.rs
Line | Count | Source |
1 | | /// The set of user configurable options for compiling zero or more regexes. |
2 | | #[derive(Clone, Debug)] |
3 | | #[allow(missing_docs)] |
4 | | pub struct RegexOptions { |
5 | | pub pats: Vec<String>, |
6 | | pub size_limit: usize, |
7 | | pub dfa_size_limit: usize, |
8 | | pub nest_limit: u32, |
9 | | pub case_insensitive: bool, |
10 | | pub multi_line: bool, |
11 | | pub dot_matches_new_line: bool, |
12 | | pub swap_greed: bool, |
13 | | pub ignore_whitespace: bool, |
14 | | pub unicode: bool, |
15 | | pub octal: bool, |
16 | | } |
17 | | |
18 | | impl Default for RegexOptions { |
19 | 15 | fn default() -> Self { |
20 | 15 | RegexOptions { |
21 | 15 | pats: vec![], |
22 | 15 | size_limit: 10 * (1 << 20), |
23 | 15 | dfa_size_limit: 2 * (1 << 20), |
24 | 15 | nest_limit: 250, |
25 | 15 | case_insensitive: false, |
26 | 15 | multi_line: false, |
27 | 15 | dot_matches_new_line: false, |
28 | 15 | swap_greed: false, |
29 | 15 | ignore_whitespace: false, |
30 | 15 | unicode: true, |
31 | 15 | octal: false, |
32 | 15 | } |
33 | 15 | } <regex::re_builder::RegexOptions as core::default::Default>::default Line | Count | Source | 19 | 9 | fn default() -> Self { | 20 | 9 | RegexOptions { | 21 | 9 | pats: vec![], | 22 | 9 | size_limit: 10 * (1 << 20), | 23 | 9 | dfa_size_limit: 2 * (1 << 20), | 24 | 9 | nest_limit: 250, | 25 | 9 | case_insensitive: false, | 26 | 9 | multi_line: false, | 27 | 9 | dot_matches_new_line: false, | 28 | 9 | swap_greed: false, | 29 | 9 | ignore_whitespace: false, | 30 | 9 | unicode: true, | 31 | 9 | octal: false, | 32 | 9 | } | 33 | 9 | } |
<regex::re_builder::RegexOptions as core::default::Default>::default Line | Count | Source | 19 | 6 | fn default() -> Self { | 20 | 6 | RegexOptions { | 21 | 6 | pats: vec![], | 22 | 6 | size_limit: 10 * (1 << 20), | 23 | 6 | dfa_size_limit: 2 * (1 << 20), | 24 | 6 | nest_limit: 250, | 25 | 6 | case_insensitive: false, | 26 | 6 | multi_line: false, | 27 | 6 | dot_matches_new_line: false, | 28 | 6 | swap_greed: false, | 29 | 6 | ignore_whitespace: false, | 30 | 6 | unicode: true, | 31 | 6 | octal: false, | 32 | 6 | } | 33 | 6 | } |
|
34 | | } |
35 | | |
36 | | macro_rules! define_builder { |
37 | | ($name:ident, $regex_mod:ident, $only_utf8:expr) => { |
38 | | pub mod $name { |
39 | | use super::RegexOptions; |
40 | | use crate::error::Error; |
41 | | use crate::exec::ExecBuilder; |
42 | | |
43 | | use crate::$regex_mod::Regex; |
44 | | |
45 | | /// A configurable builder for a regular expression. |
46 | | /// |
47 | | /// A builder can be used to configure how the regex is built, for example, by |
48 | | /// setting the default flags (which can be overridden in the expression |
49 | | /// itself) or setting various limits. |
50 | | #[derive(Debug)] |
51 | | pub struct RegexBuilder(RegexOptions); |
52 | | |
53 | | impl RegexBuilder { |
54 | | /// Create a new regular expression builder with the given pattern. |
55 | | /// |
56 | | /// If the pattern is invalid, then an error will be returned when |
57 | | /// `build` is called. |
58 | 15 | pub fn new(pattern: &str) -> RegexBuilder { |
59 | 15 | let mut builder = RegexBuilder(RegexOptions::default()); |
60 | 15 | builder.0.pats.push(pattern.to_owned()); |
61 | 15 | builder |
62 | 15 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::new <regex::re_builder::unicode::RegexBuilder>::new Line | Count | Source | 58 | 9 | pub fn new(pattern: &str) -> RegexBuilder { | 59 | 9 | let mut builder = RegexBuilder(RegexOptions::default()); | 60 | 9 | builder.0.pats.push(pattern.to_owned()); | 61 | 9 | builder | 62 | 9 | } |
Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::new <regex::re_builder::unicode::RegexBuilder>::new Line | Count | Source | 58 | 6 | pub fn new(pattern: &str) -> RegexBuilder { | 59 | 6 | let mut builder = RegexBuilder(RegexOptions::default()); | 60 | 6 | builder.0.pats.push(pattern.to_owned()); | 61 | 6 | builder | 62 | 6 | } |
|
63 | | |
64 | | /// Consume the builder and compile the regular expression. |
65 | | /// |
66 | | /// Note that calling `as_str` on the resulting `Regex` will produce the |
67 | | /// pattern given to `new` verbatim. Notably, it will not incorporate any |
68 | | /// of the flags set on this builder. |
69 | 15 | pub fn build(&self) -> Result<Regex, Error> { |
70 | 15 | ExecBuilder::new_options(self.0.clone()) |
71 | 15 | .only_utf8($only_utf8) |
72 | 15 | .build() |
73 | 15 | .map(Regex::from) |
74 | 15 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::build <regex::re_builder::unicode::RegexBuilder>::build Line | Count | Source | 69 | 9 | pub fn build(&self) -> Result<Regex, Error> { | 70 | 9 | ExecBuilder::new_options(self.0.clone()) | 71 | 9 | .only_utf8($only_utf8) | 72 | 9 | .build() | 73 | 9 | .map(Regex::from) | 74 | 9 | } |
Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::build <regex::re_builder::unicode::RegexBuilder>::build Line | Count | Source | 69 | 6 | pub fn build(&self) -> Result<Regex, Error> { | 70 | 6 | ExecBuilder::new_options(self.0.clone()) | 71 | 6 | .only_utf8($only_utf8) | 72 | 6 | .build() | 73 | 6 | .map(Regex::from) | 74 | 6 | } |
|
75 | | |
76 | | /// Set the value for the case insensitive (`i`) flag. |
77 | | /// |
78 | | /// When enabled, letters in the pattern will match both upper case and |
79 | | /// lower case variants. |
80 | 0 | pub fn case_insensitive( |
81 | 0 | &mut self, |
82 | 0 | yes: bool, |
83 | 0 | ) -> &mut RegexBuilder { |
84 | 0 | self.0.case_insensitive = yes; |
85 | 0 | self |
86 | 0 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::case_insensitive Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::case_insensitive Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::case_insensitive Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::case_insensitive |
87 | | |
88 | | /// Set the value for the multi-line matching (`m`) flag. |
89 | | /// |
90 | | /// When enabled, `^` matches the beginning of lines and `$` matches the |
91 | | /// end of lines. |
92 | | /// |
93 | | /// By default, they match beginning/end of the input. |
94 | 0 | pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { |
95 | 0 | self.0.multi_line = yes; |
96 | 0 | self |
97 | 0 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::multi_line Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::multi_line Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::multi_line Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::multi_line |
98 | | |
99 | | /// Set the value for the any character (`s`) flag, where in `.` matches |
100 | | /// anything when `s` is set and matches anything except for new line when |
101 | | /// it is not set (the default). |
102 | | /// |
103 | | /// N.B. "matches anything" means "any byte" when Unicode is disabled and |
104 | | /// means "any valid UTF-8 encoding of any Unicode scalar value" when |
105 | | /// Unicode is enabled. |
106 | 0 | pub fn dot_matches_new_line( |
107 | 0 | &mut self, |
108 | 0 | yes: bool, |
109 | 0 | ) -> &mut RegexBuilder { |
110 | 0 | self.0.dot_matches_new_line = yes; |
111 | 0 | self |
112 | 0 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::dot_matches_new_line Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::dot_matches_new_line Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::dot_matches_new_line Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::dot_matches_new_line |
113 | | |
114 | | /// Set the value for the greedy swap (`U`) flag. |
115 | | /// |
116 | | /// When enabled, a pattern like `a*` is lazy (tries to find shortest |
117 | | /// match) and `a*?` is greedy (tries to find longest match). |
118 | | /// |
119 | | /// By default, `a*` is greedy and `a*?` is lazy. |
120 | 0 | pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { |
121 | 0 | self.0.swap_greed = yes; |
122 | 0 | self |
123 | 0 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::swap_greed Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::swap_greed Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::swap_greed Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::swap_greed |
124 | | |
125 | | /// Set the value for the ignore whitespace (`x`) flag. |
126 | | /// |
127 | | /// When enabled, whitespace such as new lines and spaces will be ignored |
128 | | /// between expressions of the pattern, and `#` can be used to start a |
129 | | /// comment until the next new line. |
130 | 0 | pub fn ignore_whitespace( |
131 | 0 | &mut self, |
132 | 0 | yes: bool, |
133 | 0 | ) -> &mut RegexBuilder { |
134 | 0 | self.0.ignore_whitespace = yes; |
135 | 0 | self |
136 | 0 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::ignore_whitespace Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::ignore_whitespace Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::ignore_whitespace Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::ignore_whitespace |
137 | | |
138 | | /// Set the value for the Unicode (`u`) flag. |
139 | | /// |
140 | | /// Enabled by default. When disabled, character classes such as `\w` only |
141 | | /// match ASCII word characters instead of all Unicode word characters. |
142 | 0 | pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { |
143 | 0 | self.0.unicode = yes; |
144 | 0 | self |
145 | 0 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::unicode Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::unicode Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::unicode Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::unicode |
146 | | |
147 | | /// Whether to support octal syntax or not. |
148 | | /// |
149 | | /// Octal syntax is a little-known way of uttering Unicode codepoints in |
150 | | /// a regular expression. For example, `a`, `\x61`, `\u0061` and |
151 | | /// `\141` are all equivalent regular expressions, where the last example |
152 | | /// shows octal syntax. |
153 | | /// |
154 | | /// While supporting octal syntax isn't in and of itself a problem, it does |
155 | | /// make good error messages harder. That is, in PCRE based regex engines, |
156 | | /// syntax like `\0` invokes a backreference, which is explicitly |
157 | | /// unsupported in Rust's regex engine. However, many users expect it to |
158 | | /// be supported. Therefore, when octal support is disabled, the error |
159 | | /// message will explicitly mention that backreferences aren't supported. |
160 | | /// |
161 | | /// Octal syntax is disabled by default. |
162 | 0 | pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { |
163 | 0 | self.0.octal = yes; |
164 | 0 | self |
165 | 0 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::octal Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::octal Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::octal Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::octal |
166 | | |
167 | | /// Set the approximate size limit of the compiled regular expression. |
168 | | /// |
169 | | /// This roughly corresponds to the number of bytes occupied by a single |
170 | | /// compiled program. If the program exceeds this number, then a |
171 | | /// compilation error is returned. |
172 | 0 | pub fn size_limit( |
173 | 0 | &mut self, |
174 | 0 | limit: usize, |
175 | 0 | ) -> &mut RegexBuilder { |
176 | 0 | self.0.size_limit = limit; |
177 | 0 | self |
178 | 0 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::size_limit Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::size_limit Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::size_limit Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::size_limit |
179 | | |
180 | | /// Set the approximate size of the cache used by the DFA. |
181 | | /// |
182 | | /// This roughly corresponds to the number of bytes that the DFA will |
183 | | /// use while searching. |
184 | | /// |
185 | | /// Note that this is a *per thread* limit. There is no way to set a global |
186 | | /// limit. In particular, if a regex is used from multiple threads |
187 | | /// simultaneously, then each thread may use up to the number of bytes |
188 | | /// specified here. |
189 | 0 | pub fn dfa_size_limit( |
190 | 0 | &mut self, |
191 | 0 | limit: usize, |
192 | 0 | ) -> &mut RegexBuilder { |
193 | 0 | self.0.dfa_size_limit = limit; |
194 | 0 | self |
195 | 0 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::dfa_size_limit Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::dfa_size_limit Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::dfa_size_limit Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::dfa_size_limit |
196 | | |
197 | | /// Set the nesting limit for this parser. |
198 | | /// |
199 | | /// The nesting limit controls how deep the abstract syntax tree is allowed |
200 | | /// to be. If the AST exceeds the given limit (e.g., with too many nested |
201 | | /// groups), then an error is returned by the parser. |
202 | | /// |
203 | | /// The purpose of this limit is to act as a heuristic to prevent stack |
204 | | /// overflow for consumers that do structural induction on an `Ast` using |
205 | | /// explicit recursion. While this crate never does this (instead using |
206 | | /// constant stack space and moving the call stack to the heap), other |
207 | | /// crates may. |
208 | | /// |
209 | | /// This limit is not checked until the entire Ast is parsed. Therefore, |
210 | | /// if callers want to put a limit on the amount of heap space used, then |
211 | | /// they should impose a limit on the length, in bytes, of the concrete |
212 | | /// pattern string. In particular, this is viable since this parser |
213 | | /// implementation will limit itself to heap space proportional to the |
214 | | /// length of the pattern string. |
215 | | /// |
216 | | /// Note that a nest limit of `0` will return a nest limit error for most |
217 | | /// patterns but not all. For example, a nest limit of `0` permits `a` but |
218 | | /// not `ab`, since `ab` requires a concatenation, which results in a nest |
219 | | /// depth of `1`. In general, a nest limit is not something that manifests |
220 | | /// in an obvious way in the concrete syntax, therefore, it should not be |
221 | | /// used in a granular way. |
222 | 0 | pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { |
223 | 0 | self.0.nest_limit = limit; |
224 | 0 | self |
225 | 0 | } Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::nest_limit Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::nest_limit Unexecuted instantiation: <regex::re_builder::bytes::RegexBuilder>::nest_limit Unexecuted instantiation: <regex::re_builder::unicode::RegexBuilder>::nest_limit |
226 | | } |
227 | | } |
228 | | }; |
229 | | } |
230 | | |
231 | | define_builder!(bytes, re_bytes, false); |
232 | | define_builder!(unicode, re_unicode, true); |
233 | | |
234 | | macro_rules! define_set_builder { |
235 | | ($name:ident, $regex_mod:ident, $only_utf8:expr) => { |
236 | | pub mod $name { |
237 | | use super::RegexOptions; |
238 | | use crate::error::Error; |
239 | | use crate::exec::ExecBuilder; |
240 | | |
241 | | use crate::re_set::$regex_mod::RegexSet; |
242 | | |
243 | | /// A configurable builder for a set of regular expressions. |
244 | | /// |
245 | | /// A builder can be used to configure how the regexes are built, for example, |
246 | | /// by setting the default flags (which can be overridden in the expression |
247 | | /// itself) or setting various limits. |
248 | | #[derive(Debug)] |
249 | | pub struct RegexSetBuilder(RegexOptions); |
250 | | |
251 | | impl RegexSetBuilder { |
252 | | /// Create a new regular expression builder with the given pattern. |
253 | | /// |
254 | | /// If the pattern is invalid, then an error will be returned when |
255 | | /// `build` is called. |
256 | 0 | pub fn new<I, S>(patterns: I) -> RegexSetBuilder |
257 | 0 | where |
258 | 0 | S: AsRef<str>, |
259 | 0 | I: IntoIterator<Item = S>, |
260 | | { |
261 | 0 | let mut builder = RegexSetBuilder(RegexOptions::default()); |
262 | 0 | for pat in patterns { |
263 | 0 | builder.0.pats.push(pat.as_ref().to_owned()); |
264 | 0 | } |
265 | 0 | builder |
266 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::new::<&[&str; 0], &&str> Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::new::<&[&str; 0], &&str> Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::new::<&[&str; 0], &&str> Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::new::<&[&str; 0], &&str> |
267 | | |
268 | | /// Consume the builder and compile the regular expressions into a set. |
269 | 0 | pub fn build(&self) -> Result<RegexSet, Error> { |
270 | 0 | ExecBuilder::new_options(self.0.clone()) |
271 | 0 | .only_utf8($only_utf8) |
272 | 0 | .build() |
273 | 0 | .map(RegexSet::from) |
274 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::build Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::build Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::build Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::build |
275 | | |
276 | | /// Set the value for the case insensitive (`i`) flag. |
277 | 0 | pub fn case_insensitive( |
278 | 0 | &mut self, |
279 | 0 | yes: bool, |
280 | 0 | ) -> &mut RegexSetBuilder { |
281 | 0 | self.0.case_insensitive = yes; |
282 | 0 | self |
283 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::case_insensitive Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::case_insensitive Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::case_insensitive Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::case_insensitive |
284 | | |
285 | | /// Set the value for the multi-line matching (`m`) flag. |
286 | 0 | pub fn multi_line( |
287 | 0 | &mut self, |
288 | 0 | yes: bool, |
289 | 0 | ) -> &mut RegexSetBuilder { |
290 | 0 | self.0.multi_line = yes; |
291 | 0 | self |
292 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::multi_line Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::multi_line Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::multi_line Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::multi_line |
293 | | |
294 | | /// Set the value for the any character (`s`) flag, where in `.` matches |
295 | | /// anything when `s` is set and matches anything except for new line when |
296 | | /// it is not set (the default). |
297 | | /// |
298 | | /// N.B. "matches anything" means "any byte" for `regex::bytes::RegexSet` |
299 | | /// expressions and means "any Unicode scalar value" for `regex::RegexSet` |
300 | | /// expressions. |
301 | 0 | pub fn dot_matches_new_line( |
302 | 0 | &mut self, |
303 | 0 | yes: bool, |
304 | 0 | ) -> &mut RegexSetBuilder { |
305 | 0 | self.0.dot_matches_new_line = yes; |
306 | 0 | self |
307 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::dot_matches_new_line Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::dot_matches_new_line Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::dot_matches_new_line Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::dot_matches_new_line |
308 | | |
309 | | /// Set the value for the greedy swap (`U`) flag. |
310 | 0 | pub fn swap_greed( |
311 | 0 | &mut self, |
312 | 0 | yes: bool, |
313 | 0 | ) -> &mut RegexSetBuilder { |
314 | 0 | self.0.swap_greed = yes; |
315 | 0 | self |
316 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::swap_greed Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::swap_greed Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::swap_greed Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::swap_greed |
317 | | |
318 | | /// Set the value for the ignore whitespace (`x`) flag. |
319 | 0 | pub fn ignore_whitespace( |
320 | 0 | &mut self, |
321 | 0 | yes: bool, |
322 | 0 | ) -> &mut RegexSetBuilder { |
323 | 0 | self.0.ignore_whitespace = yes; |
324 | 0 | self |
325 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::ignore_whitespace Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::ignore_whitespace Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::ignore_whitespace Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::ignore_whitespace |
326 | | |
327 | | /// Set the value for the Unicode (`u`) flag. |
328 | 0 | pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { |
329 | 0 | self.0.unicode = yes; |
330 | 0 | self |
331 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::unicode Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::unicode Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::unicode Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::unicode |
332 | | |
333 | | /// Whether to support octal syntax or not. |
334 | | /// |
335 | | /// Octal syntax is a little-known way of uttering Unicode codepoints in |
336 | | /// a regular expression. For example, `a`, `\x61`, `\u0061` and |
337 | | /// `\141` are all equivalent regular expressions, where the last example |
338 | | /// shows octal syntax. |
339 | | /// |
340 | | /// While supporting octal syntax isn't in and of itself a problem, it does |
341 | | /// make good error messages harder. That is, in PCRE based regex engines, |
342 | | /// syntax like `\0` invokes a backreference, which is explicitly |
343 | | /// unsupported in Rust's regex engine. However, many users expect it to |
344 | | /// be supported. Therefore, when octal support is disabled, the error |
345 | | /// message will explicitly mention that backreferences aren't supported. |
346 | | /// |
347 | | /// Octal syntax is disabled by default. |
348 | 0 | pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { |
349 | 0 | self.0.octal = yes; |
350 | 0 | self |
351 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::octal Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::octal Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::octal Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::octal |
352 | | |
353 | | /// Set the approximate size limit of the compiled regular expression. |
354 | | /// |
355 | | /// This roughly corresponds to the number of bytes occupied by a single |
356 | | /// compiled program. If the program exceeds this number, then a |
357 | | /// compilation error is returned. |
358 | 0 | pub fn size_limit( |
359 | 0 | &mut self, |
360 | 0 | limit: usize, |
361 | 0 | ) -> &mut RegexSetBuilder { |
362 | 0 | self.0.size_limit = limit; |
363 | 0 | self |
364 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::size_limit Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::size_limit Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::size_limit Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::size_limit |
365 | | |
366 | | /// Set the approximate size of the cache used by the DFA. |
367 | | /// |
368 | | /// This roughly corresponds to the number of bytes that the DFA will |
369 | | /// use while searching. |
370 | | /// |
371 | | /// Note that this is a *per thread* limit. There is no way to set a global |
372 | | /// limit. In particular, if a regex is used from multiple threads |
373 | | /// simultaneously, then each thread may use up to the number of bytes |
374 | | /// specified here. |
375 | 0 | pub fn dfa_size_limit( |
376 | 0 | &mut self, |
377 | 0 | limit: usize, |
378 | 0 | ) -> &mut RegexSetBuilder { |
379 | 0 | self.0.dfa_size_limit = limit; |
380 | 0 | self |
381 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::dfa_size_limit Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::dfa_size_limit Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::dfa_size_limit Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::dfa_size_limit |
382 | | |
383 | | /// Set the nesting limit for this parser. |
384 | | /// |
385 | | /// The nesting limit controls how deep the abstract syntax tree is allowed |
386 | | /// to be. If the AST exceeds the given limit (e.g., with too many nested |
387 | | /// groups), then an error is returned by the parser. |
388 | | /// |
389 | | /// The purpose of this limit is to act as a heuristic to prevent stack |
390 | | /// overflow for consumers that do structural induction on an `Ast` using |
391 | | /// explicit recursion. While this crate never does this (instead using |
392 | | /// constant stack space and moving the call stack to the heap), other |
393 | | /// crates may. |
394 | | /// |
395 | | /// This limit is not checked until the entire Ast is parsed. Therefore, |
396 | | /// if callers want to put a limit on the amount of heap space used, then |
397 | | /// they should impose a limit on the length, in bytes, of the concrete |
398 | | /// pattern string. In particular, this is viable since this parser |
399 | | /// implementation will limit itself to heap space proportional to the |
400 | | /// length of the pattern string. |
401 | | /// |
402 | | /// Note that a nest limit of `0` will return a nest limit error for most |
403 | | /// patterns but not all. For example, a nest limit of `0` permits `a` but |
404 | | /// not `ab`, since `ab` requires a concatenation, which results in a nest |
405 | | /// depth of `1`. In general, a nest limit is not something that manifests |
406 | | /// in an obvious way in the concrete syntax, therefore, it should not be |
407 | | /// used in a granular way. |
408 | 0 | pub fn nest_limit( |
409 | 0 | &mut self, |
410 | 0 | limit: u32, |
411 | 0 | ) -> &mut RegexSetBuilder { |
412 | 0 | self.0.nest_limit = limit; |
413 | 0 | self |
414 | 0 | } Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::nest_limit Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::nest_limit Unexecuted instantiation: <regex::re_builder::set_unicode::RegexSetBuilder>::nest_limit Unexecuted instantiation: <regex::re_builder::set_bytes::RegexSetBuilder>::nest_limit |
415 | | } |
416 | | } |
417 | | }; |
418 | | } |
419 | | |
420 | | define_set_builder!(set_bytes, bytes, false); |
421 | | define_set_builder!(set_unicode, unicode, true); |