/src/mozilla-central/js/src/irregexp/RegExpAST.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
2 | | * vim: set ts=8 sts=4 et sw=4 tw=99: */ |
3 | | |
4 | | // Copyright 2012 the V8 project authors. All rights reserved. |
5 | | // Redistribution and use in source and binary forms, with or without |
6 | | // modification, are permitted provided that the following conditions are |
7 | | // met: |
8 | | // |
9 | | // * Redistributions of source code must retain the above copyright |
10 | | // notice, this list of conditions and the following disclaimer. |
11 | | // * Redistributions in binary form must reproduce the above |
12 | | // copyright notice, this list of conditions and the following |
13 | | // disclaimer in the documentation and/or other materials provided |
14 | | // with the distribution. |
15 | | // * Neither the name of Google Inc. nor the names of its |
16 | | // contributors may be used to endorse or promote products derived |
17 | | // from this software without specific prior written permission. |
18 | | // |
19 | | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | | |
31 | | #ifndef V8_REGEXP_AST_H_ |
32 | | #define V8_REGEXP_AST_H_ |
33 | | |
34 | | // Prevent msvc build failures as indicated in bug 1205328 |
35 | | #ifdef min |
36 | | # undef min |
37 | | #endif |
38 | | #ifdef max |
39 | | # undef max |
40 | | #endif |
41 | | |
42 | | #include "irregexp/RegExpEngine.h" |
43 | | |
44 | | namespace js { |
45 | | namespace irregexp { |
46 | | |
47 | | class RegExpCompiler; |
48 | | class RegExpNode; |
49 | | |
50 | | class RegExpVisitor |
51 | | { |
52 | | public: |
53 | 0 | virtual ~RegExpVisitor() { } |
54 | | #define MAKE_CASE(Name) \ |
55 | | virtual void* Visit##Name(RegExp##Name*, void* data) = 0; |
56 | | FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) |
57 | | #undef MAKE_CASE |
58 | | }; |
59 | | |
60 | | class RegExpTree |
61 | | { |
62 | | public: |
63 | | static const int kInfinity = INT32_MAX; |
64 | 0 | virtual ~RegExpTree() {} |
65 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
66 | | RegExpNode* on_success) = 0; |
67 | 0 | virtual bool IsTextElement() { return false; } |
68 | 0 | virtual bool IsAnchoredAtStart() { return false; } |
69 | 0 | virtual bool IsAnchoredAtEnd() { return false; } |
70 | | virtual int min_match() = 0; |
71 | | virtual int max_match() = 0; |
72 | | // Returns the interval of registers used for captures within this |
73 | | // expression. |
74 | 0 | virtual Interval CaptureRegisters() { return Interval::Empty(); } |
75 | 0 | virtual void AppendToText(RegExpText* text) { |
76 | 0 | MOZ_CRASH("Bad call"); |
77 | 0 | } |
78 | | #define MAKE_ASTYPE(Name) \ |
79 | | virtual RegExp##Name* As##Name(); \ |
80 | | virtual bool Is##Name(); |
81 | | FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) |
82 | | #undef MAKE_ASTYPE |
83 | | }; |
84 | | |
85 | | typedef InfallibleVector<RegExpTree*, 1> RegExpTreeVector; |
86 | | |
87 | | class RegExpDisjunction : public RegExpTree |
88 | | { |
89 | | public: |
90 | | explicit RegExpDisjunction(RegExpTreeVector* alternatives); |
91 | | virtual void* Accept(RegExpVisitor* visitor, void* data); |
92 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
93 | | RegExpNode* on_success) override; |
94 | | virtual RegExpDisjunction* AsDisjunction() override; |
95 | | virtual Interval CaptureRegisters() override; |
96 | | virtual bool IsDisjunction() override; |
97 | | virtual bool IsAnchoredAtStart() override; |
98 | | virtual bool IsAnchoredAtEnd() override; |
99 | 0 | virtual int min_match() override { return min_match_; } |
100 | 0 | virtual int max_match() override { return max_match_; } |
101 | | |
102 | 0 | const RegExpTreeVector& alternatives() { return *alternatives_; } |
103 | | |
104 | | private: |
105 | | RegExpTreeVector* alternatives_; |
106 | | int min_match_; |
107 | | int max_match_; |
108 | | }; |
109 | | |
110 | | class RegExpAlternative : public RegExpTree |
111 | | { |
112 | | public: |
113 | | explicit RegExpAlternative(RegExpTreeVector* nodes); |
114 | | virtual void* Accept(RegExpVisitor* visitor, void* data); |
115 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
116 | | RegExpNode* on_success) override; |
117 | | virtual RegExpAlternative* AsAlternative() override; |
118 | | virtual Interval CaptureRegisters() override; |
119 | | virtual bool IsAlternative() override; |
120 | | virtual bool IsAnchoredAtStart() override; |
121 | | virtual bool IsAnchoredAtEnd() override; |
122 | 0 | virtual int min_match() override { return min_match_; } |
123 | 0 | virtual int max_match() override { return max_match_; } |
124 | | |
125 | 0 | const RegExpTreeVector& nodes() { return *nodes_; } |
126 | | |
127 | | private: |
128 | | RegExpTreeVector* nodes_; |
129 | | int min_match_; |
130 | | int max_match_; |
131 | | }; |
132 | | |
133 | | class RegExpAssertion : public RegExpTree { |
134 | | public: |
135 | | enum AssertionType { |
136 | | START_OF_LINE, |
137 | | START_OF_INPUT, |
138 | | END_OF_LINE, |
139 | | END_OF_INPUT, |
140 | | BOUNDARY, |
141 | | NON_BOUNDARY, |
142 | | NOT_AFTER_LEAD_SURROGATE, |
143 | | NOT_IN_SURROGATE_PAIR |
144 | | }; |
145 | 0 | explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { } |
146 | | virtual void* Accept(RegExpVisitor* visitor, void* data); |
147 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
148 | | RegExpNode* on_success) override; |
149 | | virtual RegExpAssertion* AsAssertion() override; |
150 | | virtual bool IsAssertion() override; |
151 | | virtual bool IsAnchoredAtStart() override; |
152 | | virtual bool IsAnchoredAtEnd() override; |
153 | 0 | virtual int min_match() override { return 0; } |
154 | 0 | virtual int max_match() override { return 0; } |
155 | 0 | AssertionType assertion_type() { return assertion_type_; } |
156 | | private: |
157 | | AssertionType assertion_type_; |
158 | | }; |
159 | | |
160 | | class CharacterSet |
161 | | { |
162 | | public: |
163 | | explicit CharacterSet(char16_t standard_set_type) |
164 | | : ranges_(nullptr), |
165 | | standard_set_type_(standard_set_type) |
166 | 0 | {} |
167 | | explicit CharacterSet(CharacterRangeVector* ranges) |
168 | | : ranges_(ranges), |
169 | | standard_set_type_(0) |
170 | 0 | {} |
171 | | |
172 | | CharacterRangeVector& ranges(LifoAlloc* alloc); |
173 | 0 | char16_t standard_set_type() { return standard_set_type_; } |
174 | 0 | void set_standard_set_type(char16_t special_set_type) { |
175 | 0 | standard_set_type_ = special_set_type; |
176 | 0 | } |
177 | 0 | bool is_standard() { return standard_set_type_ != 0; } |
178 | | void Canonicalize(); |
179 | | |
180 | | private: |
181 | | CharacterRangeVector* ranges_; |
182 | | |
183 | | // If non-zero, the value represents a standard set (e.g., all whitespace |
184 | | // characters) without having to expand the ranges. |
185 | | char16_t standard_set_type_; |
186 | | }; |
187 | | |
188 | | class RegExpCharacterClass : public RegExpTree |
189 | | { |
190 | | public: |
191 | | RegExpCharacterClass(CharacterRangeVector* ranges, bool is_negated) |
192 | | : set_(ranges), |
193 | | is_negated_(is_negated) |
194 | 0 | {} |
195 | | |
196 | | explicit RegExpCharacterClass(char16_t type) |
197 | | : set_(type), |
198 | | is_negated_(false) |
199 | 0 | {} |
200 | | |
201 | | virtual void* Accept(RegExpVisitor* visitor, void* data); |
202 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
203 | | RegExpNode* on_success) override; |
204 | | virtual RegExpCharacterClass* AsCharacterClass() override; |
205 | | virtual bool IsCharacterClass() override; |
206 | 0 | virtual bool IsTextElement() override { return true; } |
207 | 0 | virtual int min_match() override { return 1; } |
208 | 0 | virtual int max_match() override { return 1; } |
209 | | virtual void AppendToText(RegExpText* text) override; |
210 | | |
211 | 0 | CharacterSet character_set() { return set_; } |
212 | | |
213 | | // TODO(lrn): Remove need for complex version if is_standard that |
214 | | // recognizes a mangled standard set and just do { return set_.is_special(); } |
215 | | bool is_standard(LifoAlloc* alloc); |
216 | | |
217 | | // Returns a value representing the standard character set if is_standard() |
218 | | // returns true. |
219 | | // Currently used values are: |
220 | | // s : unicode whitespace |
221 | | // S : unicode non-whitespace |
222 | | // w : ASCII word character (digit, letter, underscore) |
223 | | // W : non-ASCII word character |
224 | | // d : ASCII digit |
225 | | // D : non-ASCII digit |
226 | | // . : non-unicode non-newline |
227 | | // * : All characters |
228 | 0 | char16_t standard_type() { return set_.standard_set_type(); } |
229 | | |
230 | 0 | CharacterRangeVector& ranges(LifoAlloc* alloc) { return set_.ranges(alloc); } |
231 | 0 | bool is_negated() { return is_negated_; } |
232 | | |
233 | | private: |
234 | | CharacterSet set_; |
235 | | bool is_negated_; |
236 | | }; |
237 | | |
238 | | typedef InfallibleVector<char16_t, 10> CharacterVector; |
239 | | |
240 | | class RegExpAtom : public RegExpTree |
241 | | { |
242 | | public: |
243 | | explicit RegExpAtom(CharacterVector* data) |
244 | | : data_(data) |
245 | 0 | {} |
246 | | |
247 | | virtual void* Accept(RegExpVisitor* visitor, void* data); |
248 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
249 | | RegExpNode* on_success) override; |
250 | | virtual RegExpAtom* AsAtom() override; |
251 | | virtual bool IsAtom() override; |
252 | 0 | virtual bool IsTextElement() override { return true; } |
253 | 0 | virtual int min_match() override { return data_->length(); } |
254 | 0 | virtual int max_match() override { return data_->length(); } |
255 | | virtual void AppendToText(RegExpText* text) override; |
256 | | |
257 | 0 | const CharacterVector& data() { return *data_; } |
258 | 0 | int length() { return data_->length(); } |
259 | | |
260 | | private: |
261 | | CharacterVector* data_; |
262 | | }; |
263 | | |
264 | | class RegExpText : public RegExpTree |
265 | | { |
266 | | public: |
267 | | explicit RegExpText(LifoAlloc* alloc) |
268 | | : elements_(*alloc), length_(0) |
269 | 0 | {} |
270 | | |
271 | | virtual void* Accept(RegExpVisitor* visitor, void* data); |
272 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
273 | | RegExpNode* on_success) override; |
274 | | virtual RegExpText* AsText() override; |
275 | | virtual bool IsText() override; |
276 | 0 | virtual bool IsTextElement() override { return true; } |
277 | 0 | virtual int min_match() override { return length_; } |
278 | 0 | virtual int max_match() override { return length_; } |
279 | | void AppendToText(RegExpText* text) override; |
280 | | |
281 | 0 | void AddElement(TextElement elm) { |
282 | 0 | elements_.append(elm); |
283 | 0 | length_ += elm.length(); |
284 | 0 | } |
285 | 0 | const TextElementVector& elements() { return elements_; } |
286 | | |
287 | | private: |
288 | | TextElementVector elements_; |
289 | | int length_; |
290 | | }; |
291 | | |
292 | | class RegExpQuantifier : public RegExpTree |
293 | | { |
294 | | public: |
295 | | enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; |
296 | | RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body) |
297 | | : body_(body), |
298 | | min_(min), |
299 | | max_(max), |
300 | | min_match_(min * body->min_match()), |
301 | | quantifier_type_(type) |
302 | 0 | { |
303 | 0 | if (max > 0 && body->max_match() > kInfinity / max) { |
304 | 0 | max_match_ = kInfinity; |
305 | 0 | } else { |
306 | 0 | max_match_ = max * body->max_match(); |
307 | 0 | } |
308 | 0 | } |
309 | | |
310 | | virtual void* Accept(RegExpVisitor* visitor, void* data); |
311 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
312 | | RegExpNode* on_success) override; |
313 | | static RegExpNode* ToNode(int min, |
314 | | int max, |
315 | | bool is_greedy, |
316 | | RegExpTree* body, |
317 | | RegExpCompiler* compiler, |
318 | | RegExpNode* on_success, |
319 | | bool not_at_start = false); |
320 | | virtual RegExpQuantifier* AsQuantifier() override; |
321 | | virtual Interval CaptureRegisters() override; |
322 | | virtual bool IsQuantifier() override; |
323 | 0 | virtual int min_match() override { return min_match_; } |
324 | 0 | virtual int max_match() override { return max_match_; } |
325 | 0 | int min() { return min_; } |
326 | 0 | int max() { return max_; } |
327 | 0 | bool is_possessive() { return quantifier_type_ == POSSESSIVE; } |
328 | 0 | bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; } |
329 | 0 | bool is_greedy() { return quantifier_type_ == GREEDY; } |
330 | 0 | RegExpTree* body() { return body_; } |
331 | | |
332 | | private: |
333 | | RegExpTree* body_; |
334 | | int min_; |
335 | | int max_; |
336 | | int min_match_; |
337 | | int max_match_; |
338 | | QuantifierType quantifier_type_; |
339 | | }; |
340 | | |
341 | | class RegExpCapture : public RegExpTree |
342 | | { |
343 | | public: |
344 | | explicit RegExpCapture(RegExpTree* body, int index) |
345 | | : body_(body), index_(index) |
346 | 0 | {} |
347 | | |
348 | | virtual void* Accept(RegExpVisitor* visitor, void* data); |
349 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
350 | | RegExpNode* on_success) override; |
351 | | static RegExpNode* ToNode(RegExpTree* body, |
352 | | int index, |
353 | | RegExpCompiler* compiler, |
354 | | RegExpNode* on_success); |
355 | | virtual RegExpCapture* AsCapture() override; |
356 | | virtual bool IsAnchoredAtStart() override; |
357 | | virtual bool IsAnchoredAtEnd() override; |
358 | | virtual Interval CaptureRegisters() override; |
359 | | virtual bool IsCapture() override; |
360 | 0 | virtual int min_match() override { return body_->min_match(); } |
361 | 0 | virtual int max_match() override { return body_->max_match(); } |
362 | 0 | RegExpTree* body() { return body_; } |
363 | 0 | int index() { return index_; } |
364 | 0 | static int StartRegister(int index) { return index * 2; } |
365 | 0 | static int EndRegister(int index) { return index * 2 + 1; } |
366 | | |
367 | | private: |
368 | | RegExpTree* body_; |
369 | | int index_; |
370 | | }; |
371 | | |
372 | | class RegExpLookahead : public RegExpTree |
373 | | { |
374 | | public: |
375 | | RegExpLookahead(RegExpTree* body, |
376 | | bool is_positive, |
377 | | int capture_count, |
378 | | int capture_from) |
379 | | : body_(body), |
380 | | is_positive_(is_positive), |
381 | | capture_count_(capture_count), |
382 | | capture_from_(capture_from) |
383 | 0 | {} |
384 | | |
385 | | virtual void* Accept(RegExpVisitor* visitor, void* data); |
386 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
387 | | RegExpNode* on_success) override; |
388 | | virtual RegExpLookahead* AsLookahead() override; |
389 | | virtual Interval CaptureRegisters() override; |
390 | | virtual bool IsLookahead() override; |
391 | | virtual bool IsAnchoredAtStart() override; |
392 | 0 | virtual int min_match() override { return 0; } |
393 | 0 | virtual int max_match() override { return 0; } |
394 | 0 | RegExpTree* body() { return body_; } |
395 | 0 | bool is_positive() { return is_positive_; } |
396 | 0 | int capture_count() { return capture_count_; } |
397 | 0 | int capture_from() { return capture_from_; } |
398 | | |
399 | | private: |
400 | | RegExpTree* body_; |
401 | | bool is_positive_; |
402 | | int capture_count_; |
403 | | int capture_from_; |
404 | | }; |
405 | | |
406 | | typedef InfallibleVector<RegExpCapture*, 1> RegExpCaptureVector; |
407 | | |
408 | | class RegExpBackReference : public RegExpTree |
409 | | { |
410 | | public: |
411 | | explicit RegExpBackReference(RegExpCapture* capture) |
412 | | : capture_(capture) |
413 | 0 | {} |
414 | | |
415 | | virtual void* Accept(RegExpVisitor* visitor, void* data); |
416 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
417 | | RegExpNode* on_success) override; |
418 | | virtual RegExpBackReference* AsBackReference() override; |
419 | | virtual bool IsBackReference() override; |
420 | 0 | virtual int min_match() override { return 0; } |
421 | 0 | virtual int max_match() override { return capture_->max_match(); } |
422 | 0 | int index() { return capture_->index(); } |
423 | 0 | RegExpCapture* capture() { return capture_; } |
424 | | private: |
425 | | RegExpCapture* capture_; |
426 | | }; |
427 | | |
428 | | class RegExpEmpty : public RegExpTree |
429 | | { |
430 | | public: |
431 | | RegExpEmpty() |
432 | 0 | {} |
433 | | |
434 | | virtual void* Accept(RegExpVisitor* visitor, void* data); |
435 | | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
436 | | RegExpNode* on_success) override; |
437 | | virtual RegExpEmpty* AsEmpty() override; |
438 | | virtual bool IsEmpty() override; |
439 | 0 | virtual int min_match() override { return 0; } |
440 | 0 | virtual int max_match() override { return 0; } |
441 | 0 | static RegExpEmpty* GetInstance() { |
442 | 0 | static RegExpEmpty instance; |
443 | 0 | return &instance; |
444 | 0 | } |
445 | | }; |
446 | | |
447 | | } } // namespace js::irregexp |
448 | | |
449 | | #endif // V8_REGEXP_AST_H_ |