Line data Source code
1 : // Copyright 2016 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/ostreams.h"
6 : #include "src/regexp/regexp-ast.h"
7 :
8 : namespace v8 {
9 : namespace internal {
10 :
11 : #define MAKE_ACCEPT(Name) \
12 : void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
13 : return visitor->Visit##Name(this, data); \
14 : }
15 6244 : FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
16 : #undef MAKE_ACCEPT
17 :
18 : #define MAKE_TYPE_CASE(Name) \
19 : RegExp##Name* RegExpTree::As##Name() { return NULL; } \
20 : bool RegExpTree::Is##Name() { return false; }
21 2429082 : FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
22 : #undef MAKE_TYPE_CASE
23 :
24 : #define MAKE_TYPE_CASE(Name) \
25 : RegExp##Name* RegExp##Name::As##Name() { return this; } \
26 : bool RegExp##Name::Is##Name() { return true; }
27 2003538 : FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
28 : #undef MAKE_TYPE_CASE
29 :
30 :
31 19694 : static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
32 : Interval result = Interval::Empty();
33 122178 : for (int i = 0; i < children->length(); i++)
34 102484 : result = result.Union(children->at(i)->CaptureRegisters());
35 19694 : return result;
36 : }
37 :
38 :
39 16989 : Interval RegExpAlternative::CaptureRegisters() {
40 16989 : return ListCaptureRegisters(nodes());
41 : }
42 :
43 :
44 2705 : Interval RegExpDisjunction::CaptureRegisters() {
45 2705 : return ListCaptureRegisters(alternatives());
46 : }
47 :
48 :
49 103 : Interval RegExpLookaround::CaptureRegisters() {
50 103 : return body()->CaptureRegisters();
51 : }
52 :
53 :
54 14508 : Interval RegExpCapture::CaptureRegisters() {
55 : Interval self(StartRegister(index()), EndRegister(index()));
56 14508 : return self.Union(body()->CaptureRegisters());
57 : }
58 :
59 :
60 17791 : Interval RegExpQuantifier::CaptureRegisters() {
61 17791 : return body()->CaptureRegisters();
62 : }
63 :
64 :
65 5407 : bool RegExpAssertion::IsAnchoredAtStart() {
66 5407 : return assertion_type() == RegExpAssertion::START_OF_INPUT;
67 : }
68 :
69 :
70 3614 : bool RegExpAssertion::IsAnchoredAtEnd() {
71 3614 : return assertion_type() == RegExpAssertion::END_OF_INPUT;
72 : }
73 :
74 :
75 23475 : bool RegExpAlternative::IsAnchoredAtStart() {
76 : ZoneList<RegExpTree*>* nodes = this->nodes();
77 51054 : for (int i = 0; i < nodes->length(); i++) {
78 51025 : RegExpTree* node = nodes->at(i);
79 25498 : if (node->IsAnchoredAtStart()) {
80 : return true;
81 : }
82 21014 : if (node->max_match() > 0) {
83 : return false;
84 : }
85 : }
86 : return false;
87 : }
88 :
89 :
90 17118 : bool RegExpAlternative::IsAnchoredAtEnd() {
91 : ZoneList<RegExpTree*>* nodes = this->nodes();
92 17652 : for (int i = nodes->length() - 1; i >= 0; i--) {
93 17572 : RegExpTree* node = nodes->at(i);
94 17572 : if (node->IsAnchoredAtEnd()) {
95 : return true;
96 : }
97 14167 : if (node->max_match() > 0) {
98 : return false;
99 : }
100 : }
101 : return false;
102 : }
103 :
104 :
105 1096 : bool RegExpDisjunction::IsAnchoredAtStart() {
106 : ZoneList<RegExpTree*>* alternatives = this->alternatives();
107 2460 : for (int i = 0; i < alternatives->length(); i++) {
108 2447 : if (!alternatives->at(i)->IsAnchoredAtStart()) return false;
109 : }
110 : return true;
111 : }
112 :
113 :
114 1142 : bool RegExpDisjunction::IsAnchoredAtEnd() {
115 : ZoneList<RegExpTree*>* alternatives = this->alternatives();
116 2390 : for (int i = 0; i < alternatives->length(); i++) {
117 2365 : if (!alternatives->at(i)->IsAnchoredAtEnd()) return false;
118 : }
119 : return true;
120 : }
121 :
122 :
123 2773 : bool RegExpLookaround::IsAnchoredAtStart() {
124 2773 : return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
125 : }
126 :
127 :
128 13398 : bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
129 :
130 :
131 4500 : bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
132 :
133 :
134 : // Convert regular expression trees to a simple sexp representation.
135 : // This representation should be different from the input grammar
136 : // in as many cases as possible, to make it more difficult for incorrect
137 : // parses to look as correct ones which is likely if the input and
138 : // output formats are alike.
139 0 : class RegExpUnparser final : public RegExpVisitor {
140 : public:
141 2114 : RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
142 : void VisitCharacterRange(CharacterRange that);
143 : #define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
144 : FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
145 : #undef MAKE_CASE
146 : private:
147 : std::ostream& os_;
148 : Zone* zone_;
149 : };
150 :
151 :
152 714 : void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
153 105 : os_ << "(|";
154 714 : for (int i = 0; i < that->alternatives()->length(); i++) {
155 252 : os_ << " ";
156 252 : that->alternatives()->at(i)->Accept(this, data);
157 : }
158 105 : os_ << ")";
159 105 : return NULL;
160 : }
161 :
162 :
163 5992 : void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
164 721 : os_ << "(:";
165 5992 : for (int i = 0; i < that->nodes()->length(); i++) {
166 2275 : os_ << " ";
167 2275 : that->nodes()->at(i)->Accept(this, data);
168 : }
169 721 : os_ << ")";
170 721 : return NULL;
171 : }
172 :
173 :
174 1120 : void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
175 1120 : os_ << AsUC32(that.from());
176 1120 : if (!that.IsSingleton()) {
177 259 : os_ << "-" << AsUC32(that.to());
178 : }
179 1120 : }
180 :
181 :
182 518 : void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
183 : void* data) {
184 518 : if (that->is_negated()) os_ << "^";
185 518 : os_ << "[";
186 3276 : for (int i = 0; i < that->ranges(zone_)->length(); i++) {
187 1120 : if (i > 0) os_ << " ";
188 2240 : VisitCharacterRange(that->ranges(zone_)->at(i));
189 : }
190 518 : os_ << "]";
191 518 : return NULL;
192 : }
193 :
194 :
195 196 : void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
196 196 : switch (that->assertion_type()) {
197 : case RegExpAssertion::START_OF_INPUT:
198 28 : os_ << "@^i";
199 28 : break;
200 : case RegExpAssertion::END_OF_INPUT:
201 28 : os_ << "@$i";
202 28 : break;
203 : case RegExpAssertion::START_OF_LINE:
204 0 : os_ << "@^l";
205 0 : break;
206 : case RegExpAssertion::END_OF_LINE:
207 0 : os_ << "@$l";
208 0 : break;
209 : case RegExpAssertion::BOUNDARY:
210 112 : os_ << "@b";
211 112 : break;
212 : case RegExpAssertion::NON_BOUNDARY:
213 28 : os_ << "@B";
214 28 : break;
215 : }
216 196 : return NULL;
217 : }
218 :
219 :
220 2653 : void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
221 2653 : os_ << "'";
222 : Vector<const uc16> chardata = that->data();
223 7805 : for (int i = 0; i < chardata.length(); i++) {
224 10304 : os_ << AsUC16(chardata[i]);
225 : }
226 2653 : os_ << "'";
227 2653 : return NULL;
228 : }
229 :
230 :
231 21 : void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
232 21 : if (that->elements()->length() == 1) {
233 0 : that->elements()->at(0).tree()->Accept(this, data);
234 : } else {
235 21 : os_ << "(!";
236 63 : for (int i = 0; i < that->elements()->length(); i++) {
237 42 : os_ << " ";
238 42 : that->elements()->at(i).tree()->Accept(this, data);
239 : }
240 21 : os_ << ")";
241 : }
242 21 : return NULL;
243 : }
244 :
245 :
246 1260 : void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
247 315 : os_ << "(# " << that->min() << " ";
248 315 : if (that->max() == RegExpTree::kInfinity) {
249 189 : os_ << "- ";
250 : } else {
251 126 : os_ << that->max() << " ";
252 : }
253 315 : os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
254 315 : that->body()->Accept(this, data);
255 315 : os_ << ")";
256 315 : return NULL;
257 : }
258 :
259 :
260 2128 : void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
261 1064 : os_ << "(^ ";
262 1064 : that->body()->Accept(this, data);
263 1064 : os_ << ")";
264 1064 : return NULL;
265 : }
266 :
267 84 : void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) {
268 42 : os_ << "(?: ";
269 42 : that->body()->Accept(this, data);
270 42 : os_ << ")";
271 42 : return NULL;
272 : }
273 :
274 560 : void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
275 140 : os_ << "(";
276 140 : os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
277 140 : os_ << (that->is_positive() ? " + " : " - ");
278 140 : that->body()->Accept(this, data);
279 140 : os_ << ")";
280 140 : return NULL;
281 : }
282 :
283 :
284 420 : void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
285 : void* data) {
286 420 : os_ << "(<- " << that->index() << ")";
287 420 : return NULL;
288 : }
289 :
290 :
291 49 : void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
292 49 : os_ << '%';
293 49 : return NULL;
294 : }
295 :
296 :
297 2114 : std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT
298 : RegExpUnparser unparser(os, zone);
299 2114 : Accept(&unparser, NULL);
300 2114 : return os;
301 : }
302 :
303 :
304 31263 : RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
305 31263 : : alternatives_(alternatives) {
306 : DCHECK(alternatives->length() > 1);
307 225228 : RegExpTree* first_alternative = alternatives->at(0);
308 31263 : min_match_ = first_alternative->min_match();
309 31263 : max_match_ = first_alternative->max_match();
310 387930 : for (int i = 1; i < alternatives->length(); i++) {
311 162702 : RegExpTree* alternative = alternatives->at(i);
312 325404 : min_match_ = Min(min_match_, alternative->min_match());
313 325404 : max_match_ = Max(max_match_, alternative->max_match());
314 : }
315 31263 : }
316 :
317 :
318 : static int IncreaseBy(int previous, int increase) {
319 4922818 : if (RegExpTree::kInfinity - previous < increase) {
320 : return RegExpTree::kInfinity;
321 : } else {
322 4851954 : return previous + increase;
323 : }
324 : }
325 :
326 :
327 77100 : RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
328 77100 : : nodes_(nodes) {
329 : DCHECK(nodes->length() > 1);
330 77100 : min_match_ = 0;
331 77100 : max_match_ = 0;
332 5077018 : for (int i = 0; i < nodes->length(); i++) {
333 4999918 : RegExpTree* node = nodes->at(i);
334 2461409 : int node_min_match = node->min_match();
335 4922818 : min_match_ = IncreaseBy(min_match_, node_min_match);
336 2461409 : int node_max_match = node->max_match();
337 4922818 : max_match_ = IncreaseBy(max_match_, node_max_match);
338 : }
339 77100 : }
340 :
341 :
342 : } // namespace internal
343 : } // namespace v8
|