Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.  | 
2  |  | // Use of this source code is governed by a BSD-style  | 
3  |  | // license that can be found in the LICENSE file.  | 
4  |  |  | 
5  |  | #ifndef RE2_RE2_H_  | 
6  |  | #define RE2_RE2_H_  | 
7  |  |  | 
8  |  | // C++ interface to the re2 regular-expression library.  | 
9  |  | // RE2 supports Perl-style regular expressions (with extensions like  | 
10  |  | // \d, \w, \s, ...).  | 
11  |  | //  | 
12  |  | // -----------------------------------------------------------------------  | 
13  |  | // REGEXP SYNTAX:  | 
14  |  | //  | 
15  |  | // This module uses the re2 library and hence supports  | 
16  |  | // its syntax for regular expressions, which is similar to Perl's with  | 
17  |  | // some of the more complicated things thrown away.  In particular,  | 
18  |  | // backreferences and generalized assertions are not available, nor is \Z.  | 
19  |  | //  | 
20  |  | // See https://github.com/google/re2/wiki/Syntax for the syntax  | 
21  |  | // supported by RE2, and a comparison with PCRE and PERL regexps.  | 
22  |  | //  | 
23  |  | // For those not familiar with Perl's regular expressions,  | 
24  |  | // here are some examples of the most commonly used extensions:  | 
25  |  | //  | 
26  |  | //   "hello (\\w+) world"  -- \w matches a "word" character  | 
27  |  | //   "version (\\d+)"      -- \d matches a digit  | 
28  |  | //   "hello\\s+world"      -- \s matches any whitespace character  | 
29  |  | //   "\\b(\\w+)\\b"        -- \b matches non-empty string at word boundary  | 
30  |  | //   "(?i)hello"           -- (?i) turns on case-insensitive matching  | 
31  |  | //   "/\\*(.*?)\\*/"       -- .*? matches . minimum no. of times possible  | 
32  |  | //  | 
33  |  | // The double backslashes are needed when writing C++ string literals.  | 
34  |  | // However, they should NOT be used when writing C++11 raw string literals:  | 
35  |  | //  | 
36  |  | //   R"(hello (\w+) world)"  -- \w matches a "word" character  | 
37  |  | //   R"(version (\d+))"      -- \d matches a digit  | 
38  |  | //   R"(hello\s+world)"      -- \s matches any whitespace character  | 
39  |  | //   R"(\b(\w+)\b)"          -- \b matches non-empty string at word boundary  | 
40  |  | //   R"((?i)hello)"          -- (?i) turns on case-insensitive matching  | 
41  |  | //   R"(/\*(.*?)\*/)"        -- .*? matches . minimum no. of times possible  | 
42  |  | //  | 
43  |  | // When using UTF-8 encoding, case-insensitive matching will perform  | 
44  |  | // simple case folding, not full case folding.  | 
45  |  | //  | 
46  |  | // -----------------------------------------------------------------------  | 
47  |  | // MATCHING INTERFACE:  | 
48  |  | //  | 
49  |  | // The "FullMatch" operation checks that supplied text matches a  | 
50  |  | // supplied pattern exactly.  | 
51  |  | //  | 
52  |  | // Example: successful match  | 
53  |  | //    CHECK(RE2::FullMatch("hello", "h.*o")); | 
54  |  | //  | 
55  |  | // Example: unsuccessful match (requires full match):  | 
56  |  | //    CHECK(!RE2::FullMatch("hello", "e")); | 
57  |  | //  | 
58  |  | // -----------------------------------------------------------------------  | 
59  |  | // UTF-8 AND THE MATCHING INTERFACE:  | 
60  |  | //  | 
61  |  | // By default, the pattern and input text are interpreted as UTF-8.  | 
62  |  | // The RE2::Latin1 option causes them to be interpreted as Latin-1.  | 
63  |  | //  | 
64  |  | // Example:  | 
65  |  | //    CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern)));  | 
66  |  | //    CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));  | 
67  |  | //  | 
68  |  | // -----------------------------------------------------------------------  | 
69  |  | // SUBMATCH EXTRACTION:  | 
70  |  | //  | 
71  |  | // You can supply extra pointer arguments to extract submatches.  | 
72  |  | // On match failure, none of the pointees will have been modified.  | 
73  |  | // On match success, the submatches will be converted (as necessary) and  | 
74  |  | // their values will be assigned to their pointees until all conversions  | 
75  |  | // have succeeded or one conversion has failed.  | 
76  |  | // On conversion failure, the pointees will be in an indeterminate state  | 
77  |  | // because the caller has no way of knowing which conversion failed.  | 
78  |  | // However, conversion cannot fail for types like string and StringPiece  | 
79  |  | // that do not inspect the submatch contents. Hence, in the common case  | 
80  |  | // where all of the pointees are of such types, failure is always due to  | 
81  |  | // match failure and thus none of the pointees will have been modified.  | 
82  |  | //  | 
83  |  | // Example: extracts "ruby" into "s" and 1234 into "i"  | 
84  |  | //    int i;  | 
85  |  | //    std::string s;  | 
86  |  | //    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); | 
87  |  | //  | 
88  |  | // Example: fails because string cannot be stored in integer  | 
89  |  | //    CHECK(!RE2::FullMatch("ruby", "(.*)", &i)); | 
90  |  | //  | 
91  |  | // Example: fails because there aren't enough sub-patterns  | 
92  |  | //    CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s)); | 
93  |  | //  | 
94  |  | // Example: does not try to extract any extra sub-patterns  | 
95  |  | //    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s)); | 
96  |  | //  | 
97  |  | // Example: does not try to extract into NULL  | 
98  |  | //    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i)); | 
99  |  | //  | 
100  |  | // Example: integer overflow causes failure  | 
101  |  | //    CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i)); | 
102  |  | //  | 
103  |  | // NOTE(rsc): Asking for submatches slows successful matches quite a bit.  | 
104  |  | // This may get a little faster in the future, but right now is slower  | 
105  |  | // than PCRE.  On the other hand, failed matches run *very* fast (faster  | 
106  |  | // than PCRE), as do matches without submatch extraction.  | 
107  |  | //  | 
108  |  | // -----------------------------------------------------------------------  | 
109  |  | // PARTIAL MATCHES  | 
110  |  | //  | 
111  |  | // You can use the "PartialMatch" operation when you want the pattern  | 
112  |  | // to match any substring of the text.  | 
113  |  | //  | 
114  |  | // Example: simple search for a string:  | 
115  |  | //      CHECK(RE2::PartialMatch("hello", "ell")); | 
116  |  | //  | 
117  |  | // Example: find first number in a string  | 
118  |  | //      int number;  | 
119  |  | //      CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number)); | 
120  |  | //      CHECK_EQ(number, 100);  | 
121  |  | //  | 
122  |  | // -----------------------------------------------------------------------  | 
123  |  | // PRE-COMPILED REGULAR EXPRESSIONS  | 
124  |  | //  | 
125  |  | // RE2 makes it easy to use any string as a regular expression, without  | 
126  |  | // requiring a separate compilation step.  | 
127  |  | //  | 
128  |  | // If speed is of the essence, you can create a pre-compiled "RE2"  | 
129  |  | // object from the pattern and use it multiple times.  If you do so,  | 
130  |  | // you can typically parse text faster than with sscanf.  | 
131  |  | //  | 
132  |  | // Example: precompile pattern for faster matching:  | 
133  |  | //    RE2 pattern("h.*o"); | 
134  |  | //    while (ReadLine(&str)) { | 
135  |  | //      if (RE2::FullMatch(str, pattern)) ...;  | 
136  |  | //    }  | 
137  |  | //  | 
138  |  | // -----------------------------------------------------------------------  | 
139  |  | // SCANNING TEXT INCREMENTALLY  | 
140  |  | //  | 
141  |  | // The "Consume" operation may be useful if you want to repeatedly  | 
142  |  | // match regular expressions at the front of a string and skip over  | 
143  |  | // them as they match.  This requires use of the "StringPiece" type,  | 
144  |  | // which represents a sub-range of a real string.  | 
145  |  | //  | 
146  |  | // Example: read lines of the form "var = value" from a string.  | 
147  |  | //      std::string contents = ...;     // Fill string somehow  | 
148  |  | //      StringPiece input(contents);    // Wrap a StringPiece around it  | 
149  |  | //  | 
150  |  | //      std::string var;  | 
151  |  | //      int value;  | 
152  |  | //      while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) { | 
153  |  | //        ...;  | 
154  |  | //      }  | 
155  |  | //  | 
156  |  | // Each successful call to "Consume" will set "var/value", and also  | 
157  |  | // advance "input" so it points past the matched text.  Note that if the  | 
158  |  | // regular expression matches an empty string, input will advance  | 
159  |  | // by 0 bytes.  If the regular expression being used might match  | 
160  |  | // an empty string, the loop body must check for this case and either  | 
161  |  | // advance the string or break out of the loop.  | 
162  |  | //  | 
163  |  | // The "FindAndConsume" operation is similar to "Consume" but does not  | 
164  |  | // anchor your match at the beginning of the string.  For example, you  | 
165  |  | // could extract all words from a string by repeatedly calling  | 
166  |  | //     RE2::FindAndConsume(&input, "(\\w+)", &word)  | 
167  |  | //  | 
168  |  | // -----------------------------------------------------------------------  | 
169  |  | // USING VARIABLE NUMBER OF ARGUMENTS  | 
170  |  | //  | 
171  |  | // The above operations require you to know the number of arguments  | 
172  |  | // when you write the code.  This is not always possible or easy (for  | 
173  |  | // example, the regular expression may be calculated at run time).  | 
174  |  | // You can use the "N" version of the operations when the number of  | 
175  |  | // match arguments are determined at run time.  | 
176  |  | //  | 
177  |  | // Example:  | 
178  |  | //   const RE2::Arg* args[10];  | 
179  |  | //   int n;  | 
180  |  | //   // ... populate args with pointers to RE2::Arg values ...  | 
181  |  | //   // ... set n to the number of RE2::Arg objects ...  | 
182  |  | //   bool match = RE2::FullMatchN(input, pattern, args, n);  | 
183  |  | //  | 
184  |  | // The last statement is equivalent to  | 
185  |  | //  | 
186  |  | //   bool match = RE2::FullMatch(input, pattern,  | 
187  |  | //                               *args[0], *args[1], ..., *args[n - 1]);  | 
188  |  | //  | 
189  |  | // -----------------------------------------------------------------------  | 
190  |  | // PARSING HEX/OCTAL/C-RADIX NUMBERS  | 
191  |  | //  | 
192  |  | // By default, if you pass a pointer to a numeric value, the  | 
193  |  | // corresponding text is interpreted as a base-10 number.  You can  | 
194  |  | // instead wrap the pointer with a call to one of the operators Hex(),  | 
195  |  | // Octal(), or CRadix() to interpret the text in another base.  The  | 
196  |  | // CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)  | 
197  |  | // prefixes, but defaults to base-10.  | 
198  |  | //  | 
199  |  | // Example:  | 
200  |  | //   int a, b, c, d;  | 
201  |  | //   CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)", | 
202  |  | //         RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d));  | 
203  |  | // will leave 64 in a, b, c, and d.  | 
204  |  |  | 
205  |  | #include <stddef.h>  | 
206  |  | #include <stdint.h>  | 
207  |  | #include <algorithm>  | 
208  |  | #include <map>  | 
209  |  | #include <mutex>  | 
210  |  | #include <string>  | 
211  |  | #include <type_traits>  | 
212  |  | #include <vector>  | 
213  |  |  | 
214  |  | #if defined(__APPLE__)  | 
215  |  | #include <TargetConditionals.h>  | 
216  |  | #endif  | 
217  |  |  | 
218  |  | #include "re2/stringpiece.h"  | 
219  |  |  | 
220  |  | namespace re2 { | 
221  |  | class Prog;  | 
222  |  | class Regexp;  | 
223  |  | }  // namespace re2  | 
224  |  |  | 
225  |  | namespace re2 { | 
226  |  |  | 
227  |  | // Interface for regular expression matching.  Also corresponds to a  | 
228  |  | // pre-compiled regular expression.  An "RE2" object is safe for  | 
229  |  | // concurrent use by multiple threads.  | 
230  |  | class RE2 { | 
231  |  |  public:  | 
232  |  |   // We convert user-passed pointers into special Arg objects  | 
233  |  |   class Arg;  | 
234  |  |   class Options;  | 
235  |  |  | 
236  |  |   // Defined in set.h.  | 
237  |  |   class Set;  | 
238  |  |  | 
239  |  |   enum ErrorCode { | 
240  |  |     NoError = 0,  | 
241  |  |  | 
242  |  |     // Unexpected error  | 
243  |  |     ErrorInternal,  | 
244  |  |  | 
245  |  |     // Parse errors  | 
246  |  |     ErrorBadEscape,          // bad escape sequence  | 
247  |  |     ErrorBadCharClass,       // bad character class  | 
248  |  |     ErrorBadCharRange,       // bad character class range  | 
249  |  |     ErrorMissingBracket,     // missing closing ]  | 
250  |  |     ErrorMissingParen,       // missing closing )  | 
251  |  |     ErrorUnexpectedParen,    // unexpected closing )  | 
252  |  |     ErrorTrailingBackslash,  // trailing \ at end of regexp  | 
253  |  |     ErrorRepeatArgument,     // repeat argument missing, e.g. "*"  | 
254  |  |     ErrorRepeatSize,         // bad repetition argument  | 
255  |  |     ErrorRepeatOp,           // bad repetition operator  | 
256  |  |     ErrorBadPerlOp,          // bad perl operator  | 
257  |  |     ErrorBadUTF8,            // invalid UTF-8 in regexp  | 
258  |  |     ErrorBadNamedCapture,    // bad named capture group  | 
259  |  |     ErrorPatternTooLarge     // pattern too large (compile failed)  | 
260  |  |   };  | 
261  |  |  | 
262  |  |   // Predefined common options.  | 
263  |  |   // If you need more complicated things, instantiate  | 
264  |  |   // an Option class, possibly passing one of these to  | 
265  |  |   // the Option constructor, change the settings, and pass that  | 
266  |  |   // Option class to the RE2 constructor.  | 
267  |  |   enum CannedOptions { | 
268  |  |     DefaultOptions = 0,  | 
269  |  |     Latin1, // treat input as Latin-1 (default UTF-8)  | 
270  |  |     POSIX, // POSIX syntax, leftmost-longest match  | 
271  |  |     Quiet // do not log about regexp parse errors  | 
272  |  |   };  | 
273  |  |  | 
274  |  |   // Need to have the const char* and const std::string& forms for implicit  | 
275  |  |   // conversions when passing string literals to FullMatch and PartialMatch.  | 
276  |  |   // Otherwise the StringPiece form would be sufficient.  | 
277  |  |   RE2(const char* pattern);  | 
278  |  |   RE2(const std::string& pattern);  | 
279  |  |   RE2(const StringPiece& pattern);  | 
280  |  |   RE2(const StringPiece& pattern, const Options& options);  | 
281  |  |   ~RE2();  | 
282  |  |  | 
283  |  |   // Not copyable.  | 
284  |  |   // RE2 objects are expensive. You should probably use std::shared_ptr<RE2>  | 
285  |  |   // instead. If you really must copy, RE2(first.pattern(), first.options())  | 
286  |  |   // effectively does so: it produces a second object that mimics the first.  | 
287  |  |   RE2(const RE2&) = delete;  | 
288  |  |   RE2& operator=(const RE2&) = delete;  | 
289  |  |   // Not movable.  | 
290  |  |   // RE2 objects are thread-safe and logically immutable. You should probably  | 
291  |  |   // use std::unique_ptr<RE2> instead. Otherwise, consider std::deque<RE2> if  | 
292  |  |   // direct emplacement into a container is desired. If you really must move,  | 
293  |  |   // be prepared to submit a design document along with your feature request.  | 
294  |  |   RE2(RE2&&) = delete;  | 
295  |  |   RE2& operator=(RE2&&) = delete;  | 
296  |  |  | 
297  |  |   // Returns whether RE2 was created properly.  | 
298  | 35.6k  |   bool ok() const { return error_code() == NoError; } | 
299  |  |  | 
300  |  |   // The string specification for this RE2.  E.g.  | 
301  |  |   //   RE2 re("ab*c?d+"); | 
302  |  |   //   re.pattern();    // "ab*c?d+"  | 
303  | 0  |   const std::string& pattern() const { return *pattern_; } | 
304  |  |  | 
305  |  |   // If RE2 could not be created properly, returns an error string.  | 
306  |  |   // Else returns the empty string.  | 
307  | 0  |   const std::string& error() const { return *error_; } | 
308  |  |  | 
309  |  |   // If RE2 could not be created properly, returns an error code.  | 
310  |  |   // Else returns RE2::NoError (== 0).  | 
311  | 35.6k  |   ErrorCode error_code() const { return error_code_; } | 
312  |  |  | 
313  |  |   // If RE2 could not be created properly, returns the offending  | 
314  |  |   // portion of the regexp.  | 
315  | 0  |   const std::string& error_arg() const { return *error_arg_; } | 
316  |  |  | 
317  |  |   // Returns the program size, a very approximate measure of a regexp's "cost".  | 
318  |  |   // Larger numbers are more expensive than smaller numbers.  | 
319  |  |   int ProgramSize() const;  | 
320  |  |   int ReverseProgramSize() const;  | 
321  |  |  | 
322  |  |   // If histogram is not null, outputs the program fanout  | 
323  |  |   // as a histogram bucketed by powers of 2.  | 
324  |  |   // Returns the number of the largest non-empty bucket.  | 
325  |  |   int ProgramFanout(std::vector<int>* histogram) const;  | 
326  |  |   int ReverseProgramFanout(std::vector<int>* histogram) const;  | 
327  |  |  | 
328  |  |   // Returns the underlying Regexp; not for general use.  | 
329  |  |   // Returns entire_regexp_ so that callers don't need  | 
330  |  |   // to know about prefix_ and prefix_foldcase_.  | 
331  | 0  |   re2::Regexp* Regexp() const { return entire_regexp_; } | 
332  |  |  | 
333  |  |   /***** The array-based matching interface ******/  | 
334  |  |  | 
335  |  |   // The functions here have names ending in 'N' and are used to implement  | 
336  |  |   // the functions whose names are the prefix before the 'N'. It is sometimes  | 
337  |  |   // useful to invoke them directly, but the syntax is awkward, so the 'N'-less  | 
338  |  |   // versions should be preferred.  | 
339  |  |   static bool FullMatchN(const StringPiece& text, const RE2& re,  | 
340  |  |                          const Arg* const args[], int n);  | 
341  |  |   static bool PartialMatchN(const StringPiece& text, const RE2& re,  | 
342  |  |                             const Arg* const args[], int n);  | 
343  |  |   static bool ConsumeN(StringPiece* input, const RE2& re,  | 
344  |  |                        const Arg* const args[], int n);  | 
345  |  |   static bool FindAndConsumeN(StringPiece* input, const RE2& re,  | 
346  |  |                               const Arg* const args[], int n);  | 
347  |  |  | 
348  |  |  private:  | 
349  |  |   template <typename F, typename SP>  | 
350  | 10.9k  |   static inline bool Apply(F f, SP sp, const RE2& re) { | 
351  | 10.9k  |     return f(sp, re, NULL, 0);  | 
352  | 10.9k  |   }  | 
353  |  |  | 
354  |  |   template <typename F, typename SP, typename... A>  | 
355  |  |   static inline bool Apply(F f, SP sp, const RE2& re, const A&... a) { | 
356  |  |     const Arg* const args[] = {&a...}; | 
357  |  |     const int n = sizeof...(a);  | 
358  |  |     return f(sp, re, args, n);  | 
359  |  |   }  | 
360  |  |  | 
361  |  |  public:  | 
362  |  |   // In order to allow FullMatch() et al. to be called with a varying number  | 
363  |  |   // of arguments of varying types, we use two layers of variadic templates.  | 
364  |  |   // The first layer constructs the temporary Arg objects. The second layer  | 
365  |  |   // (above) constructs the array of pointers to the temporary Arg objects.  | 
366  |  |  | 
367  |  |   /***** The useful part: the matching interface *****/  | 
368  |  |  | 
369  |  |   // Matches "text" against "re".  If pointer arguments are  | 
370  |  |   // supplied, copies matched sub-patterns into them.  | 
371  |  |   //  | 
372  |  |   // You can pass in a "const char*" or a "std::string" for "text".  | 
373  |  |   // You can pass in a "const char*" or a "std::string" or a "RE2" for "re".  | 
374  |  |   //  | 
375  |  |   // The provided pointer arguments can be pointers to any scalar numeric  | 
376  |  |   // type, or one of:  | 
377  |  |   //    std::string     (matched piece is copied to string)  | 
378  |  |   //    StringPiece     (StringPiece is mutated to point to matched piece)  | 
379  |  |   //    T               (where "bool T::ParseFrom(const char*, size_t)" exists)  | 
380  |  |   //    (void*)NULL     (the corresponding matched sub-pattern is not copied)  | 
381  |  |   //  | 
382  |  |   // Returns true iff all of the following conditions are satisfied:  | 
383  |  |   //   a. "text" matches "re" fully - from the beginning to the end of "text".  | 
384  |  |   //   b. The number of matched sub-patterns is >= number of supplied pointers.  | 
385  |  |   //   c. The "i"th argument has a suitable type for holding the  | 
386  |  |   //      string captured as the "i"th sub-pattern.  If you pass in  | 
387  |  |   //      NULL for the "i"th argument, or pass fewer arguments than  | 
388  |  |   //      number of sub-patterns, the "i"th captured sub-pattern is  | 
389  |  |   //      ignored.  | 
390  |  |   //  | 
391  |  |   // CAVEAT: An optional sub-pattern that does not exist in the  | 
392  |  |   // matched string is assigned the empty string.  Therefore, the  | 
393  |  |   // following will return false (because the empty string is not a  | 
394  |  |   // valid number):  | 
395  |  |   //    int number;  | 
396  |  |   //    RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number); | 
397  |  |   template <typename... A>  | 
398  | 10.9k  |   static bool FullMatch(const StringPiece& text, const RE2& re, A&&... a) { | 
399  | 10.9k  |     return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);  | 
400  | 10.9k  |   }  | 
401  |  |  | 
402  |  |   // Like FullMatch(), except that "re" is allowed to match a substring  | 
403  |  |   // of "text".  | 
404  |  |   //  | 
405  |  |   // Returns true iff all of the following conditions are satisfied:  | 
406  |  |   //   a. "text" matches "re" partially - for some substring of "text".  | 
407  |  |   //   b. The number of matched sub-patterns is >= number of supplied pointers.  | 
408  |  |   //   c. The "i"th argument has a suitable type for holding the  | 
409  |  |   //      string captured as the "i"th sub-pattern.  If you pass in  | 
410  |  |   //      NULL for the "i"th argument, or pass fewer arguments than  | 
411  |  |   //      number of sub-patterns, the "i"th captured sub-pattern is  | 
412  |  |   //      ignored.  | 
413  |  |   template <typename... A>  | 
414  |  |   static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) { | 
415  |  |     return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...);  | 
416  |  |   }  | 
417  |  |  | 
418  |  |   // Like FullMatch() and PartialMatch(), except that "re" has to match  | 
419  |  |   // a prefix of the text, and "input" is advanced past the matched  | 
420  |  |   // text.  Note: "input" is modified iff this routine returns true  | 
421  |  |   // and "re" matched a non-empty substring of "input".  | 
422  |  |   //  | 
423  |  |   // Returns true iff all of the following conditions are satisfied:  | 
424  |  |   //   a. "input" matches "re" partially - for some prefix of "input".  | 
425  |  |   //   b. The number of matched sub-patterns is >= number of supplied pointers.  | 
426  |  |   //   c. The "i"th argument has a suitable type for holding the  | 
427  |  |   //      string captured as the "i"th sub-pattern.  If you pass in  | 
428  |  |   //      NULL for the "i"th argument, or pass fewer arguments than  | 
429  |  |   //      number of sub-patterns, the "i"th captured sub-pattern is  | 
430  |  |   //      ignored.  | 
431  |  |   template <typename... A>  | 
432  |  |   static bool Consume(StringPiece* input, const RE2& re, A&&... a) { | 
433  |  |     return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...);  | 
434  |  |   }  | 
435  |  |  | 
436  |  |   // Like Consume(), but does not anchor the match at the beginning of  | 
437  |  |   // the text.  That is, "re" need not start its match at the beginning  | 
438  |  |   // of "input".  For example, "FindAndConsume(s, "(\\w+)", &word)" finds  | 
439  |  |   // the next word in "s" and stores it in "word".  | 
440  |  |   //  | 
441  |  |   // Returns true iff all of the following conditions are satisfied:  | 
442  |  |   //   a. "input" matches "re" partially - for some substring of "input".  | 
443  |  |   //   b. The number of matched sub-patterns is >= number of supplied pointers.  | 
444  |  |   //   c. The "i"th argument has a suitable type for holding the  | 
445  |  |   //      string captured as the "i"th sub-pattern.  If you pass in  | 
446  |  |   //      NULL for the "i"th argument, or pass fewer arguments than  | 
447  |  |   //      number of sub-patterns, the "i"th captured sub-pattern is  | 
448  |  |   //      ignored.  | 
449  |  |   template <typename... A>  | 
450  |  |   static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) { | 
451  |  |     return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);  | 
452  |  |   }  | 
453  |  |  | 
454  |  |   // Replace the first match of "re" in "str" with "rewrite".  | 
455  |  |   // Within "rewrite", backslash-escaped digits (\1 to \9) can be  | 
456  |  |   // used to insert text matching corresponding parenthesized group  | 
457  |  |   // from the pattern.  \0 in "rewrite" refers to the entire matching  | 
458  |  |   // text.  E.g.,  | 
459  |  |   //  | 
460  |  |   //   std::string s = "yabba dabba doo";  | 
461  |  |   //   CHECK(RE2::Replace(&s, "b+", "d"));  | 
462  |  |   //  | 
463  |  |   // will leave "s" containing "yada dabba doo"  | 
464  |  |   //  | 
465  |  |   // Returns true if the pattern matches and a replacement occurs,  | 
466  |  |   // false otherwise.  | 
467  |  |   static bool Replace(std::string* str,  | 
468  |  |                       const RE2& re,  | 
469  |  |                       const StringPiece& rewrite);  | 
470  |  |  | 
471  |  |   // Like Replace(), except replaces successive non-overlapping occurrences  | 
472  |  |   // of the pattern in the string with the rewrite. E.g.  | 
473  |  |   //  | 
474  |  |   //   std::string s = "yabba dabba doo";  | 
475  |  |   //   CHECK(RE2::GlobalReplace(&s, "b+", "d"));  | 
476  |  |   //  | 
477  |  |   // will leave "s" containing "yada dada doo"  | 
478  |  |   // Replacements are not subject to re-matching.  | 
479  |  |   //  | 
480  |  |   // Because GlobalReplace only replaces non-overlapping matches,  | 
481  |  |   // replacing "ana" within "banana" makes only one replacement, not two.  | 
482  |  |   //  | 
483  |  |   // Returns the number of replacements made.  | 
484  |  |   static int GlobalReplace(std::string* str,  | 
485  |  |                            const RE2& re,  | 
486  |  |                            const StringPiece& rewrite);  | 
487  |  |  | 
488  |  |   // Like Replace, except that if the pattern matches, "rewrite"  | 
489  |  |   // is copied into "out" with substitutions.  The non-matching  | 
490  |  |   // portions of "text" are ignored.  | 
491  |  |   //  | 
492  |  |   // Returns true iff a match occurred and the extraction happened  | 
493  |  |   // successfully;  if no match occurs, the string is left unaffected.  | 
494  |  |   //  | 
495  |  |   // REQUIRES: "text" must not alias any part of "*out".  | 
496  |  |   static bool Extract(const StringPiece& text,  | 
497  |  |                       const RE2& re,  | 
498  |  |                       const StringPiece& rewrite,  | 
499  |  |                       std::string* out);  | 
500  |  |  | 
501  |  |   // Escapes all potentially meaningful regexp characters in  | 
502  |  |   // 'unquoted'.  The returned string, used as a regular expression,  | 
503  |  |   // will match exactly the original string.  For example,  | 
504  |  |   //           1.5-2.0?  | 
505  |  |   // may become:  | 
506  |  |   //           1\.5\-2\.0\?  | 
507  |  |   static std::string QuoteMeta(const StringPiece& unquoted);  | 
508  |  |  | 
509  |  |   // Computes range for any strings matching regexp. The min and max can in  | 
510  |  |   // some cases be arbitrarily precise, so the caller gets to specify the  | 
511  |  |   // maximum desired length of string returned.  | 
512  |  |   //  | 
513  |  |   // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any  | 
514  |  |   // string s that is an anchored match for this regexp satisfies  | 
515  |  |   //   min <= s && s <= max.  | 
516  |  |   //  | 
517  |  |   // Note that PossibleMatchRange() will only consider the first copy of an  | 
518  |  |   // infinitely repeated element (i.e., any regexp element followed by a '*' or  | 
519  |  |   // '+' operator). Regexps with "{N}" constructions are not affected, as those | 
520  |  |   // do not compile down to infinite repetitions.  | 
521  |  |   //  | 
522  |  |   // Returns true on success, false on error.  | 
523  |  |   bool PossibleMatchRange(std::string* min, std::string* max,  | 
524  |  |                           int maxlen) const;  | 
525  |  |  | 
526  |  |   // Generic matching interface  | 
527  |  |  | 
528  |  |   // Type of match.  | 
529  |  |   enum Anchor { | 
530  |  |     UNANCHORED,         // No anchoring  | 
531  |  |     ANCHOR_START,       // Anchor at start only  | 
532  |  |     ANCHOR_BOTH         // Anchor at start and end  | 
533  |  |   };  | 
534  |  |  | 
535  |  |   // Return the number of capturing subpatterns, or -1 if the  | 
536  |  |   // regexp wasn't valid on construction.  The overall match ($0)  | 
537  |  |   // does not count: if the regexp is "(a)(b)", returns 2.  | 
538  | 21.8k  |   int NumberOfCapturingGroups() const { return num_captures_; } | 
539  |  |  | 
540  |  |   // Return a map from names to capturing indices.  | 
541  |  |   // The map records the index of the leftmost group  | 
542  |  |   // with the given name.  | 
543  |  |   // Only valid until the re is deleted.  | 
544  |  |   const std::map<std::string, int>& NamedCapturingGroups() const;  | 
545  |  |  | 
546  |  |   // Return a map from capturing indices to names.  | 
547  |  |   // The map has no entries for unnamed groups.  | 
548  |  |   // Only valid until the re is deleted.  | 
549  |  |   const std::map<int, std::string>& CapturingGroupNames() const;  | 
550  |  |  | 
551  |  |   // General matching routine.  | 
552  |  |   // Match against text starting at offset startpos  | 
553  |  |   // and stopping the search at offset endpos.  | 
554  |  |   // Returns true if match found, false if not.  | 
555  |  |   // On a successful match, fills in submatch[] (up to nsubmatch entries)  | 
556  |  |   // with information about submatches.  | 
557  |  |   // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, with | 
558  |  |   // submatch[0] = "barbaz", submatch[1].data() = NULL, submatch[2] = "bar",  | 
559  |  |   // submatch[3].data() = NULL, ..., up to submatch[nsubmatch-1].data() = NULL.  | 
560  |  |   // Caveat: submatch[] may be clobbered even on match failure.  | 
561  |  |   //  | 
562  |  |   // Don't ask for more match information than you will use:  | 
563  |  |   // runs much faster with nsubmatch == 1 than nsubmatch > 1, and  | 
564  |  |   // runs even faster if nsubmatch == 0.  | 
565  |  |   // Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(),  | 
566  |  |   // but will be handled correctly.  | 
567  |  |   //  | 
568  |  |   // Passing text == StringPiece(NULL, 0) will be handled like any other  | 
569  |  |   // empty string, but note that on return, it will not be possible to tell  | 
570  |  |   // whether submatch i matched the empty string or did not match:  | 
571  |  |   // either way, submatch[i].data() == NULL.  | 
572  |  |   bool Match(const StringPiece& text,  | 
573  |  |              size_t startpos,  | 
574  |  |              size_t endpos,  | 
575  |  |              Anchor re_anchor,  | 
576  |  |              StringPiece* submatch,  | 
577  |  |              int nsubmatch) const;  | 
578  |  |  | 
579  |  |   // Check that the given rewrite string is suitable for use with this  | 
580  |  |   // regular expression.  It checks that:  | 
581  |  |   //   * The regular expression has enough parenthesized subexpressions  | 
582  |  |   //     to satisfy all of the \N tokens in rewrite  | 
583  |  |   //   * The rewrite string doesn't have any syntax errors.  E.g.,  | 
584  |  |   //     '\' followed by anything other than a digit or '\'.  | 
585  |  |   // A true return value guarantees that Replace() and Extract() won't  | 
586  |  |   // fail because of a bad rewrite string.  | 
587  |  |   bool CheckRewriteString(const StringPiece& rewrite,  | 
588  |  |                           std::string* error) const;  | 
589  |  |  | 
590  |  |   // Returns the maximum submatch needed for the rewrite to be done by  | 
591  |  |   // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2.  | 
592  |  |   static int MaxSubmatch(const StringPiece& rewrite);  | 
593  |  |  | 
594  |  |   // Append the "rewrite" string, with backslash subsitutions from "vec",  | 
595  |  |   // to string "out".  | 
596  |  |   // Returns true on success.  This method can fail because of a malformed  | 
597  |  |   // rewrite string.  CheckRewriteString guarantees that the rewrite will  | 
598  |  |   // be sucessful.  | 
599  |  |   bool Rewrite(std::string* out,  | 
600  |  |                const StringPiece& rewrite,  | 
601  |  |                const StringPiece* vec,  | 
602  |  |                int veclen) const;  | 
603  |  |  | 
604  |  |   // Constructor options  | 
605  |  |   class Options { | 
606  |  |    public:  | 
607  |  |     // The options are (defaults in parentheses):  | 
608  |  |     //  | 
609  |  |     //   utf8             (true)  text and pattern are UTF-8; otherwise Latin-1  | 
610  |  |     //   posix_syntax     (false) restrict regexps to POSIX egrep syntax  | 
611  |  |     //   longest_match    (false) search for longest match, not first match  | 
612  |  |     //   log_errors       (true)  log syntax and execution errors to ERROR  | 
613  |  |     //   max_mem          (see below)  approx. max memory footprint of RE2  | 
614  |  |     //   literal          (false) interpret string as literal, not regexp  | 
615  |  |     //   never_nl         (false) never match \n, even if it is in regexp  | 
616  |  |     //   dot_nl           (false) dot matches everything including new line  | 
617  |  |     //   never_capture    (false) parse all parens as non-capturing  | 
618  |  |     //   case_sensitive   (true)  match is case-sensitive (regexp can override  | 
619  |  |     //                              with (?i) unless in posix_syntax mode)  | 
620  |  |     //  | 
621  |  |     // The following options are only consulted when posix_syntax == true.  | 
622  |  |     // When posix_syntax == false, these features are always enabled and  | 
623  |  |     // cannot be turned off; to perform multi-line matching in that case,  | 
624  |  |     // begin the regexp with (?m).  | 
625  |  |     //   perl_classes     (false) allow Perl's \d \s \w \D \S \W  | 
626  |  |     //   word_boundary    (false) allow Perl's \b \B (word boundary and not)  | 
627  |  |     //   one_line         (false) ^ and $ only match beginning and end of text  | 
628  |  |     //  | 
629  |  |     // The max_mem option controls how much memory can be used  | 
630  |  |     // to hold the compiled form of the regexp (the Prog) and  | 
631  |  |     // its cached DFA graphs.  Code Search placed limits on the number  | 
632  |  |     // of Prog instructions and DFA states: 10,000 for both.  | 
633  |  |     // In RE2, those limits would translate to about 240 KB per Prog  | 
634  |  |     // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a  | 
635  |  |     // better job of keeping them small than Code Search did).  | 
636  |  |     // Each RE2 has two Progs (one forward, one reverse), and each Prog  | 
637  |  |     // can have two DFAs (one first match, one longest match).  | 
638  |  |     // That makes 4 DFAs:  | 
639  |  |     //  | 
640  |  |     //   forward, first-match    - used for UNANCHORED or ANCHOR_START searches  | 
641  |  |     //                               if opt.longest_match() == false  | 
642  |  |     //   forward, longest-match  - used for all ANCHOR_BOTH searches,  | 
643  |  |     //                               and the other two kinds if  | 
644  |  |     //                               opt.longest_match() == true  | 
645  |  |     //   reverse, first-match    - never used  | 
646  |  |     //   reverse, longest-match  - used as second phase for unanchored searches  | 
647  |  |     //  | 
648  |  |     // The RE2 memory budget is statically divided between the two  | 
649  |  |     // Progs and then the DFAs: two thirds to the forward Prog  | 
650  |  |     // and one third to the reverse Prog.  The forward Prog gives half  | 
651  |  |     // of what it has left over to each of its DFAs.  The reverse Prog  | 
652  |  |     // gives it all to its longest-match DFA.  | 
653  |  |     //  | 
654  |  |     // Once a DFA fills its budget, it flushes its cache and starts over.  | 
655  |  |     // If this happens too often, RE2 falls back on the NFA implementation.  | 
656  |  |  | 
657  |  |     // For now, make the default budget something close to Code Search.  | 
658  |  |     static const int kDefaultMaxMem = 8<<20;  | 
659  |  |  | 
660  |  |     enum Encoding { | 
661  |  |       EncodingUTF8 = 1,  | 
662  |  |       EncodingLatin1  | 
663  |  |     };  | 
664  |  |  | 
665  |  |     Options() :  | 
666  |  |       max_mem_(kDefaultMaxMem),  | 
667  |  |       encoding_(EncodingUTF8),  | 
668  |  |       posix_syntax_(false),  | 
669  |  |       longest_match_(false),  | 
670  |  |       log_errors_(true),  | 
671  |  |       literal_(false),  | 
672  |  |       never_nl_(false),  | 
673  |  |       dot_nl_(false),  | 
674  |  |       never_capture_(false),  | 
675  |  |       case_sensitive_(true),  | 
676  |  |       perl_classes_(false),  | 
677  |  |       word_boundary_(false),  | 
678  | 27.5k  |       one_line_(false) { | 
679  | 27.5k  |     }  | 
680  |  |  | 
681  |  |     /*implicit*/ Options(CannedOptions);  | 
682  |  |  | 
683  | 10.9k  |     int64_t max_mem() const { return max_mem_; } | 
684  | 0  |     void set_max_mem(int64_t m) { max_mem_ = m; } | 
685  |  |  | 
686  | 13.7k  |     Encoding encoding() const { return encoding_; } | 
687  | 8.46k  |     void set_encoding(Encoding encoding) { encoding_ = encoding; } | 
688  |  |  | 
689  | 13.7k  |     bool posix_syntax() const { return posix_syntax_; } | 
690  | 13.7k  |     void set_posix_syntax(bool b) { posix_syntax_ = b; } | 
691  |  |  | 
692  | 13.7k  |     bool longest_match() const { return longest_match_; } | 
693  | 13.7k  |     void set_longest_match(bool b) { longest_match_ = b; } | 
694  |  |  | 
695  | 3.86k  |     bool log_errors() const { return log_errors_; } | 
696  | 13.7k  |     void set_log_errors(bool b) { log_errors_ = b; } | 
697  |  |  | 
698  | 13.7k  |     bool literal() const { return literal_; } | 
699  | 13.7k  |     void set_literal(bool b) { literal_ = b; } | 
700  |  |  | 
701  | 13.7k  |     bool never_nl() const { return never_nl_; } | 
702  | 13.7k  |     void set_never_nl(bool b) { never_nl_ = b; } | 
703  |  |  | 
704  | 13.7k  |     bool dot_nl() const { return dot_nl_; } | 
705  | 13.7k  |     void set_dot_nl(bool b) { dot_nl_ = b; } | 
706  |  |  | 
707  | 13.7k  |     bool never_capture() const { return never_capture_; } | 
708  | 13.7k  |     void set_never_capture(bool b) { never_capture_ = b; } | 
709  |  |  | 
710  | 13.7k  |     bool case_sensitive() const { return case_sensitive_; } | 
711  | 13.7k  |     void set_case_sensitive(bool b) { case_sensitive_ = b; } | 
712  |  |  | 
713  | 13.7k  |     bool perl_classes() const { return perl_classes_; } | 
714  | 13.7k  |     void set_perl_classes(bool b) { perl_classes_ = b; } | 
715  |  |  | 
716  | 13.7k  |     bool word_boundary() const { return word_boundary_; } | 
717  | 13.7k  |     void set_word_boundary(bool b) { word_boundary_ = b; } | 
718  |  |  | 
719  | 13.7k  |     bool one_line() const { return one_line_; } | 
720  | 13.7k  |     void set_one_line(bool b) { one_line_ = b; } | 
721  |  |  | 
722  | 13.7k  |     void Copy(const Options& src) { | 
723  | 13.7k  |       *this = src;  | 
724  | 13.7k  |     }  | 
725  |  |  | 
726  |  |     int ParseFlags() const;  | 
727  |  |  | 
728  |  |    private:  | 
729  |  |     int64_t max_mem_;  | 
730  |  |     Encoding encoding_;  | 
731  |  |     bool posix_syntax_;  | 
732  |  |     bool longest_match_;  | 
733  |  |     bool log_errors_;  | 
734  |  |     bool literal_;  | 
735  |  |     bool never_nl_;  | 
736  |  |     bool dot_nl_;  | 
737  |  |     bool never_capture_;  | 
738  |  |     bool case_sensitive_;  | 
739  |  |     bool perl_classes_;  | 
740  |  |     bool word_boundary_;  | 
741  |  |     bool one_line_;  | 
742  |  |   };  | 
743  |  |  | 
744  |  |   // Returns the options set in the constructor.  | 
745  | 0  |   const Options& options() const { return options_; } | 
746  |  |  | 
747  |  |   // Argument converters; see below.  | 
748  |  |   template <typename T>  | 
749  |  |   static Arg CRadix(T* ptr);  | 
750  |  |   template <typename T>  | 
751  |  |   static Arg Hex(T* ptr);  | 
752  |  |   template <typename T>  | 
753  |  |   static Arg Octal(T* ptr);  | 
754  |  |  | 
755  |  |   // Controls the maximum count permitted by GlobalReplace(); -1 is unlimited.  | 
756  |  |   // FOR FUZZING ONLY.  | 
757  |  |   static void FUZZING_ONLY_set_maximum_global_replace_count(int i);  | 
758  |  |  | 
759  |  |  private:  | 
760  |  |   void Init(const StringPiece& pattern, const Options& options);  | 
761  |  |  | 
762  |  |   bool DoMatch(const StringPiece& text,  | 
763  |  |                Anchor re_anchor,  | 
764  |  |                size_t* consumed,  | 
765  |  |                const Arg* const args[],  | 
766  |  |                int n) const;  | 
767  |  |  | 
768  |  |   re2::Prog* ReverseProg() const;  | 
769  |  |  | 
770  |  |   // First cache line is relatively cold fields.  | 
771  |  |   const std::string* pattern_;    // string regular expression  | 
772  |  |   Options options_;               // option flags  | 
773  |  |   re2::Regexp* entire_regexp_;    // parsed regular expression  | 
774  |  |   re2::Regexp* suffix_regexp_;    // parsed regular expression, prefix_ removed  | 
775  |  |   const std::string* error_;      // error indicator (or points to empty string)  | 
776  |  |   const std::string* error_arg_;  // fragment of regexp showing error (or ditto)  | 
777  |  |  | 
778  |  |   // Second cache line is relatively hot fields.  | 
779  |  |   // These are ordered oddly to pack everything.  | 
780  |  |   int num_captures_;              // number of capturing groups  | 
781  |  |   ErrorCode error_code_ : 29;     // error code (29 bits is more than enough)  | 
782  |  |   bool longest_match_ : 1;        // cached copy of options_.longest_match()  | 
783  |  |   bool is_one_pass_ : 1;          // can use prog_->SearchOnePass?  | 
784  |  |   bool prefix_foldcase_ : 1;      // prefix_ is ASCII case-insensitive  | 
785  |  |   std::string prefix_;            // required prefix (before suffix_regexp_)  | 
786  |  |   re2::Prog* prog_;               // compiled program for regexp  | 
787  |  |  | 
788  |  |   // Reverse Prog for DFA execution only  | 
789  |  |   mutable re2::Prog* rprog_;  | 
790  |  |   // Map from capture names to indices  | 
791  |  |   mutable const std::map<std::string, int>* named_groups_;  | 
792  |  |   // Map from capture indices to names  | 
793  |  |   mutable const std::map<int, std::string>* group_names_;  | 
794  |  |  | 
795  |  |   mutable std::once_flag rprog_once_;  | 
796  |  |   mutable std::once_flag named_groups_once_;  | 
797  |  |   mutable std::once_flag group_names_once_;  | 
798  |  | };  | 
799  |  |  | 
800  |  | /***** Implementation details *****/  | 
801  |  |  | 
802  |  | namespace re2_internal { | 
803  |  |  | 
804  |  | // Types for which the 3-ary Parse() function template has specializations.  | 
805  |  | template <typename T> struct Parse3ary : public std::false_type {}; | 
806  |  | template <> struct Parse3ary<void> : public std::true_type {}; | 
807  |  | template <> struct Parse3ary<std::string> : public std::true_type {}; | 
808  |  | template <> struct Parse3ary<StringPiece> : public std::true_type {}; | 
809  |  | template <> struct Parse3ary<char> : public std::true_type {}; | 
810  |  | template <> struct Parse3ary<signed char> : public std::true_type {}; | 
811  |  | template <> struct Parse3ary<unsigned char> : public std::true_type {}; | 
812  |  | template <> struct Parse3ary<float> : public std::true_type {}; | 
813  |  | template <> struct Parse3ary<double> : public std::true_type {}; | 
814  |  |  | 
815  |  | template <typename T>  | 
816  |  | bool Parse(const char* str, size_t n, T* dest);  | 
817  |  |  | 
818  |  | // Types for which the 4-ary Parse() function template has specializations.  | 
819  |  | template <typename T> struct Parse4ary : public std::false_type {}; | 
820  |  | template <> struct Parse4ary<long> : public std::true_type {}; | 
821  |  | template <> struct Parse4ary<unsigned long> : public std::true_type {}; | 
822  |  | template <> struct Parse4ary<short> : public std::true_type {}; | 
823  |  | template <> struct Parse4ary<unsigned short> : public std::true_type {}; | 
824  |  | template <> struct Parse4ary<int> : public std::true_type {}; | 
825  |  | template <> struct Parse4ary<unsigned int> : public std::true_type {}; | 
826  |  | template <> struct Parse4ary<long long> : public std::true_type {}; | 
827  |  | template <> struct Parse4ary<unsigned long long> : public std::true_type {}; | 
828  |  |  | 
829  |  | template <typename T>  | 
830  |  | bool Parse(const char* str, size_t n, T* dest, int radix);  | 
831  |  |  | 
832  |  | }  // namespace re2_internal  | 
833  |  |  | 
834  |  | class RE2::Arg { | 
835  |  |  private:  | 
836  |  |   template <typename T>  | 
837  |  |   using CanParse3ary = typename std::enable_if<  | 
838  |  |       re2_internal::Parse3ary<T>::value,  | 
839  |  |       int>::type;  | 
840  |  |  | 
841  |  |   template <typename T>  | 
842  |  |   using CanParse4ary = typename std::enable_if<  | 
843  |  |       re2_internal::Parse4ary<T>::value,  | 
844  |  |       int>::type;  | 
845  |  |  | 
846  |  | #if !defined(_MSC_VER)  | 
847  |  |   template <typename T>  | 
848  |  |   using CanParseFrom = typename std::enable_if<  | 
849  |  |       std::is_member_function_pointer<  | 
850  |  |           decltype(static_cast<bool (T::*)(const char*, size_t)>(  | 
851  |  |               &T::ParseFrom))>::value,  | 
852  |  |       int>::type;  | 
853  |  | #endif  | 
854  |  |  | 
855  |  |  public:  | 
856  | 0  |   Arg() : Arg(nullptr) {} | 
857  | 0  |   Arg(std::nullptr_t ptr) : arg_(ptr), parser_(DoNothing) {} | 
858  |  |  | 
859  |  |   template <typename T, CanParse3ary<T> = 0>  | 
860  |  |   Arg(T* ptr) : arg_(ptr), parser_(DoParse3ary<T>) {} | 
861  |  |  | 
862  |  |   template <typename T, CanParse4ary<T> = 0>  | 
863  |  |   Arg(T* ptr) : arg_(ptr), parser_(DoParse4ary<T>) {} | 
864  |  |  | 
865  |  | #if !defined(_MSC_VER)  | 
866  |  |   template <typename T, CanParseFrom<T> = 0>  | 
867  |  |   Arg(T* ptr) : arg_(ptr), parser_(DoParseFrom<T>) {} | 
868  |  | #endif  | 
869  |  |  | 
870  |  |   typedef bool (*Parser)(const char* str, size_t n, void* dest);  | 
871  |  |  | 
872  |  |   template <typename T>  | 
873  |  |   Arg(T* ptr, Parser parser) : arg_(ptr), parser_(parser) {} | 
874  |  |  | 
875  | 0  |   bool Parse(const char* str, size_t n) const { | 
876  | 0  |     return (*parser_)(str, n, arg_);  | 
877  | 0  |   }  | 
878  |  |  | 
879  |  |  private:  | 
880  | 0  |   static bool DoNothing(const char* /*str*/, size_t /*n*/, void* /*dest*/) { | 
881  | 0  |     return true;  | 
882  | 0  |   }  | 
883  |  |  | 
884  |  |   template <typename T>  | 
885  |  |   static bool DoParse3ary(const char* str, size_t n, void* dest) { | 
886  |  |     return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest));  | 
887  |  |   }  | 
888  |  |  | 
889  |  |   template <typename T>  | 
890  |  |   static bool DoParse4ary(const char* str, size_t n, void* dest) { | 
891  |  |     return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 10);  | 
892  |  |   }  | 
893  |  |  | 
894  |  | #if !defined(_MSC_VER)  | 
895  |  |   template <typename T>  | 
896  |  |   static bool DoParseFrom(const char* str, size_t n, void* dest) { | 
897  |  |     if (dest == NULL) return true;  | 
898  |  |     return reinterpret_cast<T*>(dest)->ParseFrom(str, n);  | 
899  |  |   }  | 
900  |  | #endif  | 
901  |  |  | 
902  |  |   void*         arg_;  | 
903  |  |   Parser        parser_;  | 
904  |  | };  | 
905  |  |  | 
906  |  | template <typename T>  | 
907  |  | inline RE2::Arg RE2::CRadix(T* ptr) { | 
908  |  |   return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool { | 
909  |  |     return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 0);  | 
910  |  |   });  | 
911  |  | }  | 
912  |  |  | 
913  |  | template <typename T>  | 
914  |  | inline RE2::Arg RE2::Hex(T* ptr) { | 
915  |  |   return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool { | 
916  |  |     return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 16);  | 
917  |  |   });  | 
918  |  | }  | 
919  |  |  | 
920  |  | template <typename T>  | 
921  |  | inline RE2::Arg RE2::Octal(T* ptr) { | 
922  |  |   return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool { | 
923  |  |     return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 8);  | 
924  |  |   });  | 
925  |  | }  | 
926  |  |  | 
927  |  | // Silence warnings about missing initializers for members of LazyRE2.  | 
928  |  | #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6  | 
929  |  | #pragma GCC diagnostic ignored "-Wmissing-field-initializers"  | 
930  |  | #endif  | 
931  |  |  | 
932  |  | // Helper for writing global or static RE2s safely.  | 
933  |  | // Write  | 
934  |  | //     static LazyRE2 re = {".*"}; | 
935  |  | // and then use *re instead of writing  | 
936  |  | //     static RE2 re(".*"); | 
937  |  | // The former is more careful about multithreaded  | 
938  |  | // situations than the latter.  | 
939  |  | //  | 
940  |  | // N.B. This class never deletes the RE2 object that  | 
941  |  | // it constructs: that's a feature, so that it can be used  | 
942  |  | // for global and function static variables.  | 
943  |  | class LazyRE2 { | 
944  |  |  private:  | 
945  |  |   struct NoArg {}; | 
946  |  |  | 
947  |  |  public:  | 
948  |  |   typedef RE2 element_type;  // support std::pointer_traits  | 
949  |  |  | 
950  |  |   // Constructor omitted to preserve braced initialization in C++98.  | 
951  |  |  | 
952  |  |   // Pretend to be a pointer to Type (never NULL due to on-demand creation):  | 
953  | 0  |   RE2& operator*() const { return *get(); } | 
954  | 0  |   RE2* operator->() const { return get(); } | 
955  |  |  | 
956  |  |   // Named accessor/initializer:  | 
957  | 0  |   RE2* get() const { | 
958  | 0  |     std::call_once(once_, &LazyRE2::Init, this);  | 
959  | 0  |     return ptr_;  | 
960  | 0  |   }  | 
961  |  |  | 
962  |  |   // All data fields must be public to support {"foo"} initialization. | 
963  |  |   const char* pattern_;  | 
964  |  |   RE2::CannedOptions options_;  | 
965  |  |   NoArg barrier_against_excess_initializers_;  | 
966  |  |  | 
967  |  |   mutable RE2* ptr_;  | 
968  |  |   mutable std::once_flag once_;  | 
969  |  |  | 
970  |  |  private:  | 
971  | 0  |   static void Init(const LazyRE2* lazy_re2) { | 
972  | 0  |     lazy_re2->ptr_ = new RE2(lazy_re2->pattern_, lazy_re2->options_);  | 
973  | 0  |   }  | 
974  |  |  | 
975  |  |   void operator=(const LazyRE2&);  // disallowed  | 
976  |  | };  | 
977  |  |  | 
978  |  | namespace hooks { | 
979  |  |  | 
980  |  | // Most platforms support thread_local. Older versions of iOS don't support  | 
981  |  | // thread_local, but for the sake of brevity, we lump together all versions  | 
982  |  | // of Apple platforms that aren't macOS. If an iOS application really needs  | 
983  |  | // the context pointee someday, we can get more specific then...  | 
984  |  | //  | 
985  |  | // As per https://github.com/google/re2/issues/325, thread_local support in  | 
986  |  | // MinGW seems to be buggy. (FWIW, Abseil folks also avoid it.)  | 
987  |  | #define RE2_HAVE_THREAD_LOCAL  | 
988  |  | #if (defined(__APPLE__) && !(defined(TARGET_OS_OSX) && TARGET_OS_OSX)) || defined(__MINGW32__)  | 
989  |  | #undef RE2_HAVE_THREAD_LOCAL  | 
990  |  | #endif  | 
991  |  |  | 
992  |  | // A hook must not make any assumptions regarding the lifetime of the context  | 
993  |  | // pointee beyond the current invocation of the hook. Pointers and references  | 
994  |  | // obtained via the context pointee should be considered invalidated when the  | 
995  |  | // hook returns. Hence, any data about the context pointee (e.g. its pattern)  | 
996  |  | // would have to be copied in order for it to be kept for an indefinite time.  | 
997  |  | //  | 
998  |  | // A hook must not use RE2 for matching. Control flow reentering RE2::Match()  | 
999  |  | // could result in infinite mutual recursion. To discourage that possibility,  | 
1000  |  | // RE2 will not maintain the context pointer correctly when used in that way.  | 
1001  |  | #ifdef RE2_HAVE_THREAD_LOCAL  | 
1002  |  | extern thread_local const RE2* context;  | 
1003  |  | #endif  | 
1004  |  |  | 
1005  |  | struct DFAStateCacheReset { | 
1006  |  |   int64_t state_budget;  | 
1007  |  |   size_t state_cache_size;  | 
1008  |  | };  | 
1009  |  |  | 
1010  |  | struct DFASearchFailure { | 
1011  |  |   // Nothing yet...  | 
1012  |  | };  | 
1013  |  |  | 
1014  |  | #define DECLARE_HOOK(type)                  \  | 
1015  |  |   using type##Callback = void(const type&); \  | 
1016  |  |   void Set##type##Hook(type##Callback* cb); \  | 
1017  |  |   type##Callback* Get##type##Hook();  | 
1018  |  |  | 
1019  |  | DECLARE_HOOK(DFAStateCacheReset)  | 
1020  |  | DECLARE_HOOK(DFASearchFailure)  | 
1021  |  |  | 
1022  |  | #undef DECLARE_HOOK  | 
1023  |  |  | 
1024  |  | }  // namespace hooks  | 
1025  |  |  | 
1026  |  | }  // namespace re2  | 
1027  |  |  | 
1028  |  | using re2::RE2;  | 
1029  |  | using re2::LazyRE2;  | 
1030  |  |  | 
1031  |  | #endif  // RE2_RE2_H_  |