/src/libphonenumber/cpp/src/phonenumbers/regexp_adapter.h
Line | Count | Source |
1 | | // Copyright (C) 2011 The Libphonenumber Authors |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | // Author: George Yakovlev |
16 | | // Philippe Liard |
17 | | // |
18 | | // RegExp adapter to allow a pluggable regexp engine. It has been introduced |
19 | | // during the integration of the open-source version of this library into |
20 | | // Chromium to be able to use the ICU Regex engine instead of RE2, which is not |
21 | | // officially supported on Windows. |
22 | | // Since RE2 was initially used in this library, the interface of this adapter |
23 | | // is very close to the subset of the RE2 API used in phonenumberutil.cc. |
24 | | |
25 | | #ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ |
26 | | #define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ |
27 | | |
28 | | #include <cstddef> |
29 | | #include <string> |
30 | | |
31 | | namespace i18n { |
32 | | namespace phonenumbers { |
33 | | |
34 | | using std::string; |
35 | | |
36 | | // RegExpInput is the interface that abstracts the input that feeds the |
37 | | // Consume() method of RegExp which may differ depending on its various |
38 | | // implementations (StringPiece for RE2, UnicodeString for ICU Regex). |
39 | | class RegExpInput { |
40 | | public: |
41 | 3.00M | virtual ~RegExpInput() {} |
42 | | |
43 | | // Converts to a C++ string. |
44 | | virtual string ToString() const = 0; |
45 | | }; |
46 | | |
47 | | // The regular expression abstract class. It supports only functions used in |
48 | | // phonenumberutil.cc. Consume(), Match() and Replace() methods must be |
49 | | // implemented. |
50 | | class RegExp { |
51 | | public: |
52 | 32.7k | virtual ~RegExp() {} |
53 | | |
54 | | // Matches string to regular expression, returns true if expression was |
55 | | // matched, false otherwise, advances position in the match. |
56 | | // input_string - string to be searched. |
57 | | // anchor_at_start - if true, match would be successful only if it appears at |
58 | | // the beginning of the tested region of the string. |
59 | | // matched_string1..6 - string extracted from the match in sequential order. |
60 | | // Can be NULL. |
61 | | virtual bool Consume(RegExpInput* input_string, |
62 | | bool anchor_at_start, |
63 | | string* matched_string1, |
64 | | string* matched_string2, |
65 | | string* matched_string3, |
66 | | string* matched_string4, |
67 | | string* matched_string5, |
68 | | string* matched_string6) const = 0; |
69 | | |
70 | | // Helper methods calling the Consume method that assume the match must start |
71 | | // at the beginning. |
72 | | inline bool Consume(RegExpInput* input_string, string* matched_string1, |
73 | | string* matched_string2, |
74 | | string* matched_string3, |
75 | | string* matched_string4, |
76 | | string* matched_string5, |
77 | 0 | string* matched_string6) const { |
78 | 0 | return Consume(input_string, true, matched_string1, matched_string2, |
79 | 0 | matched_string3, matched_string4, matched_string5, |
80 | 0 | matched_string6); |
81 | 0 | } |
82 | | |
83 | | inline bool Consume(RegExpInput* input_string, string* matched_string1, |
84 | | string* matched_string2, |
85 | | string* matched_string3, |
86 | | string* matched_string4, |
87 | 0 | string* matched_string5) const { |
88 | 0 | return Consume(input_string, true, matched_string1, matched_string2, |
89 | 0 | matched_string3, matched_string4, matched_string5, NULL); |
90 | 0 | } |
91 | | |
92 | | inline bool Consume(RegExpInput* input_string, string* matched_string1, |
93 | | string* matched_string2, |
94 | | string* matched_string3, |
95 | 0 | string* matched_string4) const { |
96 | 0 | return Consume(input_string, true, matched_string1, matched_string2, |
97 | 0 | matched_string3, matched_string4, NULL, NULL); |
98 | 0 | } |
99 | | |
100 | | |
101 | | // Helper methods calling the Consume method that assume the match must start |
102 | | // at the beginning. |
103 | | inline bool Consume(RegExpInput* input_string, |
104 | | string* matched_string1, |
105 | | string* matched_string2, |
106 | 0 | string* matched_string3) const { |
107 | 0 | return Consume(input_string, true, matched_string1, matched_string2, |
108 | 0 | matched_string3, NULL, NULL, NULL); |
109 | 0 | } |
110 | | |
111 | | inline bool Consume(RegExpInput* input_string, |
112 | | string* matched_string1, |
113 | 11.6k | string* matched_string2) const { |
114 | 11.6k | return Consume(input_string, true, matched_string1, matched_string2, NULL, |
115 | 11.6k | NULL, NULL, NULL); |
116 | 11.6k | } |
117 | | |
118 | 85.5k | inline bool Consume(RegExpInput* input_string, string* matched_string) const { |
119 | 85.5k | return Consume(input_string, true, matched_string, NULL, NULL, NULL, NULL, |
120 | 85.5k | NULL); |
121 | 85.5k | } |
122 | | |
123 | 1.13M | inline bool Consume(RegExpInput* input_string) const { |
124 | 1.13M | return Consume(input_string, true, NULL, NULL, NULL, NULL, NULL, NULL); |
125 | 1.13M | } |
126 | | |
127 | | // Helper method calling the Consume method that assumes the match can start |
128 | | // at any place in the string. |
129 | | inline bool FindAndConsume(RegExpInput* input_string, |
130 | 210k | string* matched_string) const { |
131 | 210k | return Consume(input_string, false, matched_string, NULL, NULL, NULL, NULL, |
132 | 210k | NULL); |
133 | 210k | } |
134 | | |
135 | | // Matches string to regular expression, returns true if the expression was |
136 | | // matched, false otherwise. |
137 | | // input_string - string to be searched. |
138 | | // full_match - if true, match would be successful only if it matches the |
139 | | // complete string. |
140 | | // matched_string - the string extracted from the match. Can be NULL. |
141 | | virtual bool Match(const string& input_string, |
142 | | bool full_match, |
143 | | string* matched_string) const = 0; |
144 | | |
145 | | // Helper methods calling the Match method with the right arguments. |
146 | | inline bool PartialMatch(const string& input_string, |
147 | 144k | string* matched_string) const { |
148 | 144k | return Match(input_string, false, matched_string); |
149 | 144k | } |
150 | | |
151 | 190k | inline bool PartialMatch(const string& input_string) const { |
152 | 190k | return Match(input_string, false, NULL); |
153 | 190k | } |
154 | | |
155 | | inline bool FullMatch(const string& input_string, |
156 | 0 | string* matched_string) const { |
157 | 0 | return Match(input_string, true, matched_string); |
158 | 0 | } |
159 | | |
160 | 1.04M | inline bool FullMatch(const string& input_string) const { |
161 | 1.04M | return Match(input_string, true, NULL); |
162 | 1.04M | } |
163 | | |
164 | | // Replaces match(es) in 'string_to_process'. If 'global' is true, |
165 | | // replaces all the matches, otherwise only the first match. |
166 | | // replacement_string - text the matches are replaced with. The groups in the |
167 | | // replacement string are referenced with the $[0-9] notation. |
168 | | // Returns true if the pattern matches and a replacement occurs, false |
169 | | // otherwise. |
170 | | virtual bool Replace(string* string_to_process, |
171 | | bool global, |
172 | | const string& replacement_string) const = 0; |
173 | | |
174 | | // Helper methods calling the Replace method with the right arguments. |
175 | | inline bool Replace(string* string_to_process, |
176 | 14.0k | const string& replacement_string) const { |
177 | 14.0k | return Replace(string_to_process, false, replacement_string); |
178 | 14.0k | } |
179 | | |
180 | | inline bool GlobalReplace(string* string_to_process, |
181 | 168k | const string& replacement_string) const { |
182 | 168k | return Replace(string_to_process, true, replacement_string); |
183 | 168k | } |
184 | | }; |
185 | | |
186 | | // Abstract factory class that lets its subclasses instantiate the classes |
187 | | // implementing RegExp and RegExpInput. |
188 | | class AbstractRegExpFactory { |
189 | | public: |
190 | 13.8k | virtual ~AbstractRegExpFactory() {} |
191 | | |
192 | | // Creates a new instance of RegExpInput. The deletion of the returned |
193 | | // instance is under the responsibility of the caller. |
194 | | virtual RegExpInput* CreateInput(const string& utf8_input) const = 0; |
195 | | |
196 | | // Creates a new instance of RegExp. The deletion of the returned instance is |
197 | | // under the responsibility of the caller. |
198 | | virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0; |
199 | | }; |
200 | | |
201 | | } // namespace phonenumbers |
202 | | } // namespace i18n |
203 | | |
204 | | #endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ |