Line data Source code
1 : // Copyright 2016 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/ostreams.h"
6 : #include "src/regexp/regexp-ast.h"
7 :
8 : namespace v8 {
9 : namespace internal {
10 :
11 : #define MAKE_ACCEPT(Name) \
12 : void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
13 : return visitor->Visit##Name(this, data); \
14 : }
15 2260 : FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
16 : #undef MAKE_ACCEPT
17 :
18 : #define MAKE_TYPE_CASE(Name) \
19 : RegExp##Name* RegExpTree::As##Name() { return nullptr; } \
20 : bool RegExpTree::Is##Name() { return false; }
21 1864474 : FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
22 : #undef MAKE_TYPE_CASE
23 :
24 : #define MAKE_TYPE_CASE(Name) \
25 : RegExp##Name* RegExp##Name::As##Name() { return this; } \
26 : bool RegExp##Name::Is##Name() { return true; }
27 1708259 : FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
28 : #undef MAKE_TYPE_CASE
29 :
30 :
31 11634 : static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
32 : Interval result = Interval::Empty();
33 61452 : for (int i = 0; i < children->length(); i++)
34 24909 : result = result.Union(children->at(i)->CaptureRegisters());
35 11634 : return result;
36 : }
37 :
38 :
39 10771 : Interval RegExpAlternative::CaptureRegisters() {
40 10771 : return ListCaptureRegisters(nodes());
41 : }
42 :
43 :
44 863 : Interval RegExpDisjunction::CaptureRegisters() {
45 863 : return ListCaptureRegisters(alternatives());
46 : }
47 :
48 :
49 84 : Interval RegExpLookaround::CaptureRegisters() {
50 84 : return body()->CaptureRegisters();
51 : }
52 :
53 :
54 3380 : Interval RegExpCapture::CaptureRegisters() {
55 : Interval self(StartRegister(index()), EndRegister(index()));
56 6760 : return self.Union(body()->CaptureRegisters());
57 : }
58 :
59 :
60 11520 : Interval RegExpQuantifier::CaptureRegisters() {
61 11520 : return body()->CaptureRegisters();
62 : }
63 :
64 :
65 3170 : bool RegExpAssertion::IsAnchoredAtStart() {
66 3170 : return assertion_type() == RegExpAssertion::START_OF_INPUT;
67 : }
68 :
69 :
70 1853 : bool RegExpAssertion::IsAnchoredAtEnd() {
71 1853 : return assertion_type() == RegExpAssertion::END_OF_INPUT;
72 : }
73 :
74 :
75 16646 : bool RegExpAlternative::IsAnchoredAtStart() {
76 : ZoneList<RegExpTree*>* nodes = this->nodes();
77 19052 : for (int i = 0; i < nodes->length(); i++) {
78 17820 : RegExpTree* node = nodes->at(i);
79 17820 : if (node->IsAnchoredAtStart()) {
80 : return true;
81 : }
82 14921 : if (node->max_match() > 0) {
83 : return false;
84 : }
85 : }
86 : return false;
87 : }
88 :
89 :
90 12742 : bool RegExpAlternative::IsAnchoredAtEnd() {
91 : ZoneList<RegExpTree*>* nodes = this->nodes();
92 13218 : for (int i = nodes->length() - 1; i >= 0; i--) {
93 13152 : RegExpTree* node = nodes->at(i);
94 13152 : if (node->IsAnchoredAtEnd()) {
95 : return true;
96 : }
97 11531 : if (node->max_match() > 0) {
98 : return false;
99 : }
100 : }
101 : return false;
102 : }
103 :
104 :
105 835 : bool RegExpDisjunction::IsAnchoredAtStart() {
106 : ZoneList<RegExpTree*>* alternatives = this->alternatives();
107 1025 : for (int i = 0; i < alternatives->length(); i++) {
108 920 : if (!alternatives->at(i)->IsAnchoredAtStart()) return false;
109 : }
110 : return true;
111 : }
112 :
113 :
114 865 : bool RegExpDisjunction::IsAnchoredAtEnd() {
115 : ZoneList<RegExpTree*>* alternatives = this->alternatives();
116 951 : for (int i = 0; i < alternatives->length(); i++) {
117 888 : if (!alternatives->at(i)->IsAnchoredAtEnd()) return false;
118 : }
119 : return true;
120 : }
121 :
122 :
123 999 : bool RegExpLookaround::IsAnchoredAtStart() {
124 999 : return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
125 : }
126 :
127 :
128 10061 : bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
129 :
130 :
131 4165 : bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
132 :
133 :
134 : // Convert regular expression trees to a simple sexp representation.
135 : // This representation should be different from the input grammar
136 : // in as many cases as possible, to make it more difficult for incorrect
137 : // parses to look as correct ones which is likely if the input and
138 : // output formats are alike.
139 765 : class RegExpUnparser final : public RegExpVisitor {
140 : public:
141 765 : RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
142 : void VisitCharacterRange(CharacterRange that);
143 : #define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
144 : FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
145 : #undef MAKE_CASE
146 : private:
147 : std::ostream& os_;
148 : Zone* zone_;
149 : };
150 :
151 :
152 40 : void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
153 40 : os_ << "(|";
154 230 : for (int i = 0; i < that->alternatives()->length(); i++) {
155 95 : os_ << " ";
156 95 : that->alternatives()->at(i)->Accept(this, data);
157 : }
158 40 : os_ << ")";
159 40 : return nullptr;
160 : }
161 :
162 :
163 260 : void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
164 260 : os_ << "(:";
165 1900 : for (int i = 0; i < that->nodes()->length(); i++) {
166 820 : os_ << " ";
167 820 : that->nodes()->at(i)->Accept(this, data);
168 : }
169 260 : os_ << ")";
170 260 : return nullptr;
171 : }
172 :
173 :
174 410 : void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
175 410 : os_ << AsUC32(that.from());
176 410 : if (!that.IsSingleton()) {
177 190 : os_ << "-" << AsUC32(that.to());
178 : }
179 410 : }
180 :
181 :
182 190 : void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
183 : void* data) {
184 190 : if (that->is_negated()) os_ << "^";
185 190 : os_ << "[";
186 1610 : for (int i = 0; i < that->ranges(zone_)->length(); i++) {
187 410 : if (i > 0) os_ << " ";
188 820 : VisitCharacterRange(that->ranges(zone_)->at(i));
189 : }
190 190 : os_ << "]";
191 190 : return nullptr;
192 : }
193 :
194 :
195 70 : void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
196 70 : switch (that->assertion_type()) {
197 : case RegExpAssertion::START_OF_INPUT:
198 10 : os_ << "@^i";
199 10 : break;
200 : case RegExpAssertion::END_OF_INPUT:
201 10 : os_ << "@$i";
202 10 : break;
203 : case RegExpAssertion::START_OF_LINE:
204 0 : os_ << "@^l";
205 0 : break;
206 : case RegExpAssertion::END_OF_LINE:
207 0 : os_ << "@$l";
208 0 : break;
209 : case RegExpAssertion::BOUNDARY:
210 40 : os_ << "@b";
211 40 : break;
212 : case RegExpAssertion::NON_BOUNDARY:
213 10 : os_ << "@B";
214 10 : break;
215 : }
216 70 : return nullptr;
217 : }
218 :
219 :
220 960 : void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
221 960 : os_ << "'";
222 : Vector<const uc16> chardata = that->data();
223 4680 : for (int i = 0; i < chardata.length(); i++) {
224 5580 : os_ << AsUC16(chardata[i]);
225 : }
226 960 : os_ << "'";
227 960 : return nullptr;
228 : }
229 :
230 :
231 10 : void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
232 10 : if (that->elements()->length() == 1) {
233 0 : that->elements()->at(0).tree()->Accept(this, data);
234 : } else {
235 10 : os_ << "(!";
236 50 : for (int i = 0; i < that->elements()->length(); i++) {
237 20 : os_ << " ";
238 20 : that->elements()->at(i).tree()->Accept(this, data);
239 : }
240 10 : os_ << ")";
241 : }
242 10 : return nullptr;
243 : }
244 :
245 :
246 115 : void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
247 230 : os_ << "(# " << that->min() << " ";
248 115 : if (that->max() == RegExpTree::kInfinity) {
249 70 : os_ << "- ";
250 : } else {
251 45 : os_ << that->max() << " ";
252 : }
253 115 : os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
254 115 : that->body()->Accept(this, data);
255 115 : os_ << ")";
256 115 : return nullptr;
257 : }
258 :
259 :
260 380 : void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
261 380 : os_ << "(^ ";
262 380 : that->body()->Accept(this, data);
263 380 : os_ << ")";
264 380 : return nullptr;
265 : }
266 :
267 15 : void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) {
268 15 : os_ << "(?: ";
269 15 : that->body()->Accept(this, data);
270 15 : os_ << ")";
271 15 : return nullptr;
272 : }
273 :
274 50 : void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
275 50 : os_ << "(";
276 50 : os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
277 50 : os_ << (that->is_positive() ? " + " : " - ");
278 50 : that->body()->Accept(this, data);
279 50 : os_ << ")";
280 50 : return nullptr;
281 : }
282 :
283 :
284 150 : void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
285 : void* data) {
286 300 : os_ << "(<- " << that->index() << ")";
287 150 : return nullptr;
288 : }
289 :
290 :
291 20 : void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
292 20 : os_ << '%';
293 20 : return nullptr;
294 : }
295 :
296 :
297 765 : std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT
298 : RegExpUnparser unparser(os, zone);
299 765 : Accept(&unparser, nullptr);
300 765 : return os;
301 : }
302 :
303 :
304 21697 : RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
305 21697 : : alternatives_(alternatives) {
306 : DCHECK_LT(1, alternatives->length());
307 21697 : RegExpTree* first_alternative = alternatives->at(0);
308 21697 : min_match_ = first_alternative->min_match();
309 21697 : max_match_ = first_alternative->max_match();
310 340145 : for (int i = 1; i < alternatives->length(); i++) {
311 159224 : RegExpTree* alternative = alternatives->at(i);
312 318448 : min_match_ = Min(min_match_, alternative->min_match());
313 318448 : max_match_ = Max(max_match_, alternative->max_match());
314 : }
315 21697 : }
316 :
317 :
318 : static int IncreaseBy(int previous, int increase) {
319 3103016 : if (RegExpTree::kInfinity - previous < increase) {
320 : return RegExpTree::kInfinity;
321 : } else {
322 3062949 : return previous + increase;
323 : }
324 : }
325 :
326 :
327 41165 : RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
328 41165 : : nodes_(nodes) {
329 : DCHECK_LT(1, nodes->length());
330 41165 : min_match_ = 0;
331 41165 : max_match_ = 0;
332 3144181 : for (int i = 0; i < nodes->length(); i++) {
333 1551508 : RegExpTree* node = nodes->at(i);
334 1551508 : int node_min_match = node->min_match();
335 3103016 : min_match_ = IncreaseBy(min_match_, node_min_match);
336 1551508 : int node_max_match = node->max_match();
337 3103016 : max_match_ = IncreaseBy(max_match_, node_max_match);
338 : }
339 41165 : }
340 :
341 :
342 : } // namespace internal
343 121996 : } // namespace v8
|