Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/js/src/irregexp/RegExpAST.h
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2
 * vim: set ts=8 sts=4 et sw=4 tw=99: */
3
4
// Copyright 2012 the V8 project authors. All rights reserved.
5
// Redistribution and use in source and binary forms, with or without
6
// modification, are permitted provided that the following conditions are
7
// met:
8
//
9
//     * Redistributions of source code must retain the above copyright
10
//       notice, this list of conditions and the following disclaimer.
11
//     * Redistributions in binary form must reproduce the above
12
//       copyright notice, this list of conditions and the following
13
//       disclaimer in the documentation and/or other materials provided
14
//       with the distribution.
15
//     * Neither the name of Google Inc. nor the names of its
16
//       contributors may be used to endorse or promote products derived
17
//       from this software without specific prior written permission.
18
//
19
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31
#ifndef V8_REGEXP_AST_H_
32
#define V8_REGEXP_AST_H_
33
34
// Prevent msvc build failures as indicated in bug 1205328
35
#ifdef min
36
# undef min
37
#endif
38
#ifdef max
39
# undef max
40
#endif
41
42
#include "irregexp/RegExpEngine.h"
43
44
namespace js {
45
namespace irregexp {
46
47
class RegExpCompiler;
48
class RegExpNode;
49
50
class RegExpVisitor
51
{
52
  public:
53
0
    virtual ~RegExpVisitor() { }
54
#define MAKE_CASE(Name)                                         \
55
    virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
56
    FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
57
#undef MAKE_CASE
58
};
59
60
class RegExpTree
61
{
62
  public:
63
    static const int kInfinity = INT32_MAX;
64
0
    virtual ~RegExpTree() {}
65
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
66
                               RegExpNode* on_success) = 0;
67
0
    virtual bool IsTextElement() { return false; }
68
0
    virtual bool IsAnchoredAtStart() { return false; }
69
0
    virtual bool IsAnchoredAtEnd() { return false; }
70
    virtual int min_match() = 0;
71
    virtual int max_match() = 0;
72
    // Returns the interval of registers used for captures within this
73
    // expression.
74
0
    virtual Interval CaptureRegisters() { return Interval::Empty(); }
75
0
    virtual void AppendToText(RegExpText* text) {
76
0
        MOZ_CRASH("Bad call");
77
0
    }
78
#define MAKE_ASTYPE(Name)                                               \
79
    virtual RegExp##Name* As##Name();                                   \
80
    virtual bool Is##Name();
81
    FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
82
#undef MAKE_ASTYPE
83
};
84
85
typedef InfallibleVector<RegExpTree*, 1> RegExpTreeVector;
86
87
class RegExpDisjunction : public RegExpTree
88
{
89
  public:
90
    explicit RegExpDisjunction(RegExpTreeVector* alternatives);
91
    virtual void* Accept(RegExpVisitor* visitor, void* data);
92
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
93
                               RegExpNode* on_success) override;
94
    virtual RegExpDisjunction* AsDisjunction() override;
95
    virtual Interval CaptureRegisters() override;
96
    virtual bool IsDisjunction() override;
97
    virtual bool IsAnchoredAtStart() override;
98
    virtual bool IsAnchoredAtEnd() override;
99
0
    virtual int min_match() override { return min_match_; }
100
0
    virtual int max_match() override { return max_match_; }
101
102
0
    const RegExpTreeVector& alternatives() { return *alternatives_; }
103
104
  private:
105
    RegExpTreeVector* alternatives_;
106
    int min_match_;
107
    int max_match_;
108
};
109
110
class RegExpAlternative : public RegExpTree
111
{
112
  public:
113
    explicit RegExpAlternative(RegExpTreeVector* nodes);
114
    virtual void* Accept(RegExpVisitor* visitor, void* data);
115
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
116
                               RegExpNode* on_success) override;
117
    virtual RegExpAlternative* AsAlternative() override;
118
    virtual Interval CaptureRegisters() override;
119
    virtual bool IsAlternative() override;
120
    virtual bool IsAnchoredAtStart() override;
121
    virtual bool IsAnchoredAtEnd() override;
122
0
    virtual int min_match() override { return min_match_; }
123
0
    virtual int max_match() override { return max_match_; }
124
125
0
    const RegExpTreeVector& nodes() { return *nodes_; }
126
127
  private:
128
    RegExpTreeVector* nodes_;
129
    int min_match_;
130
    int max_match_;
131
};
132
133
class RegExpAssertion : public RegExpTree {
134
 public:
135
  enum AssertionType {
136
    START_OF_LINE,
137
    START_OF_INPUT,
138
    END_OF_LINE,
139
    END_OF_INPUT,
140
    BOUNDARY,
141
    NON_BOUNDARY,
142
    NOT_AFTER_LEAD_SURROGATE,
143
    NOT_IN_SURROGATE_PAIR
144
  };
145
0
  explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
146
  virtual void* Accept(RegExpVisitor* visitor, void* data);
147
  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
148
                             RegExpNode* on_success) override;
149
  virtual RegExpAssertion* AsAssertion() override;
150
  virtual bool IsAssertion() override;
151
  virtual bool IsAnchoredAtStart() override;
152
  virtual bool IsAnchoredAtEnd() override;
153
0
  virtual int min_match() override { return 0; }
154
0
  virtual int max_match() override { return 0; }
155
0
  AssertionType assertion_type() { return assertion_type_; }
156
 private:
157
  AssertionType assertion_type_;
158
};
159
160
class CharacterSet
161
{
162
  public:
163
    explicit CharacterSet(char16_t standard_set_type)
164
      : ranges_(nullptr),
165
        standard_set_type_(standard_set_type)
166
0
    {}
167
    explicit CharacterSet(CharacterRangeVector* ranges)
168
      : ranges_(ranges),
169
        standard_set_type_(0)
170
0
    {}
171
172
    CharacterRangeVector& ranges(LifoAlloc* alloc);
173
0
    char16_t standard_set_type() { return standard_set_type_; }
174
0
    void set_standard_set_type(char16_t special_set_type) {
175
0
        standard_set_type_ = special_set_type;
176
0
    }
177
0
    bool is_standard() { return standard_set_type_ != 0; }
178
    void Canonicalize();
179
180
  private:
181
    CharacterRangeVector* ranges_;
182
183
    // If non-zero, the value represents a standard set (e.g., all whitespace
184
    // characters) without having to expand the ranges.
185
    char16_t standard_set_type_;
186
};
187
188
class RegExpCharacterClass : public RegExpTree
189
{
190
  public:
191
    RegExpCharacterClass(CharacterRangeVector* ranges, bool is_negated)
192
      : set_(ranges),
193
        is_negated_(is_negated)
194
0
    {}
195
196
    explicit RegExpCharacterClass(char16_t type)
197
      : set_(type),
198
        is_negated_(false)
199
0
    {}
200
201
    virtual void* Accept(RegExpVisitor* visitor, void* data);
202
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
203
                               RegExpNode* on_success) override;
204
    virtual RegExpCharacterClass* AsCharacterClass() override;
205
    virtual bool IsCharacterClass() override;
206
0
    virtual bool IsTextElement() override { return true; }
207
0
    virtual int min_match() override { return 1; }
208
0
    virtual int max_match() override { return 1; }
209
    virtual void AppendToText(RegExpText* text) override;
210
211
0
    CharacterSet character_set() { return set_; }
212
213
    // TODO(lrn): Remove need for complex version if is_standard that
214
    // recognizes a mangled standard set and just do { return set_.is_special(); }
215
    bool is_standard(LifoAlloc* alloc);
216
217
    // Returns a value representing the standard character set if is_standard()
218
    // returns true.
219
    // Currently used values are:
220
    // s : unicode whitespace
221
    // S : unicode non-whitespace
222
    // w : ASCII word character (digit, letter, underscore)
223
    // W : non-ASCII word character
224
    // d : ASCII digit
225
    // D : non-ASCII digit
226
    // . : non-unicode non-newline
227
    // * : All characters
228
0
    char16_t standard_type() { return set_.standard_set_type(); }
229
230
0
    CharacterRangeVector& ranges(LifoAlloc* alloc) { return set_.ranges(alloc); }
231
0
    bool is_negated() { return is_negated_; }
232
233
  private:
234
    CharacterSet set_;
235
    bool is_negated_;
236
};
237
238
typedef InfallibleVector<char16_t, 10> CharacterVector;
239
240
class RegExpAtom : public RegExpTree
241
{
242
  public:
243
    explicit RegExpAtom(CharacterVector* data)
244
      : data_(data)
245
0
    {}
246
247
    virtual void* Accept(RegExpVisitor* visitor, void* data);
248
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
249
                               RegExpNode* on_success) override;
250
    virtual RegExpAtom* AsAtom() override;
251
    virtual bool IsAtom() override;
252
0
    virtual bool IsTextElement() override { return true; }
253
0
    virtual int min_match() override { return data_->length(); }
254
0
    virtual int max_match() override { return data_->length(); }
255
    virtual void AppendToText(RegExpText* text) override;
256
257
0
    const CharacterVector& data() { return *data_; }
258
0
    int length() { return data_->length(); }
259
260
  private:
261
    CharacterVector* data_;
262
};
263
264
class RegExpText : public RegExpTree
265
{
266
  public:
267
    explicit RegExpText(LifoAlloc* alloc)
268
      : elements_(*alloc), length_(0)
269
0
    {}
270
271
    virtual void* Accept(RegExpVisitor* visitor, void* data);
272
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
273
                               RegExpNode* on_success) override;
274
    virtual RegExpText* AsText() override;
275
    virtual bool IsText() override;
276
0
    virtual bool IsTextElement() override { return true; }
277
0
    virtual int min_match() override { return length_; }
278
0
    virtual int max_match() override { return length_; }
279
    void AppendToText(RegExpText* text) override;
280
281
0
    void AddElement(TextElement elm)  {
282
0
        elements_.append(elm);
283
0
        length_ += elm.length();
284
0
    }
285
0
    const TextElementVector& elements() { return elements_; }
286
287
  private:
288
    TextElementVector elements_;
289
    int length_;
290
};
291
292
class RegExpQuantifier : public RegExpTree
293
{
294
  public:
295
    enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
296
    RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body)
297
      : body_(body),
298
        min_(min),
299
        max_(max),
300
        min_match_(min * body->min_match()),
301
        quantifier_type_(type)
302
0
    {
303
0
        if (max > 0 && body->max_match() > kInfinity / max) {
304
0
            max_match_ = kInfinity;
305
0
        } else {
306
0
            max_match_ = max * body->max_match();
307
0
        }
308
0
    }
309
310
    virtual void* Accept(RegExpVisitor* visitor, void* data);
311
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
312
                               RegExpNode* on_success) override;
313
    static RegExpNode* ToNode(int min,
314
                              int max,
315
                              bool is_greedy,
316
                              RegExpTree* body,
317
                              RegExpCompiler* compiler,
318
                              RegExpNode* on_success,
319
                              bool not_at_start = false);
320
    virtual RegExpQuantifier* AsQuantifier() override;
321
    virtual Interval CaptureRegisters() override;
322
    virtual bool IsQuantifier() override;
323
0
    virtual int min_match() override { return min_match_; }
324
0
    virtual int max_match() override { return max_match_; }
325
0
    int min() { return min_; }
326
0
    int max() { return max_; }
327
0
    bool is_possessive() { return quantifier_type_ == POSSESSIVE; }
328
0
    bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
329
0
    bool is_greedy() { return quantifier_type_ == GREEDY; }
330
0
    RegExpTree* body() { return body_; }
331
332
  private:
333
    RegExpTree* body_;
334
    int min_;
335
    int max_;
336
    int min_match_;
337
    int max_match_;
338
    QuantifierType quantifier_type_;
339
};
340
341
class RegExpCapture : public RegExpTree
342
{
343
  public:
344
    explicit RegExpCapture(RegExpTree* body, int index)
345
      : body_(body), index_(index)
346
0
    {}
347
348
    virtual void* Accept(RegExpVisitor* visitor, void* data);
349
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
350
                               RegExpNode* on_success) override;
351
    static RegExpNode* ToNode(RegExpTree* body,
352
                              int index,
353
                              RegExpCompiler* compiler,
354
                              RegExpNode* on_success);
355
    virtual RegExpCapture* AsCapture() override;
356
    virtual bool IsAnchoredAtStart() override;
357
    virtual bool IsAnchoredAtEnd() override;
358
    virtual Interval CaptureRegisters() override;
359
    virtual bool IsCapture() override;
360
0
    virtual int min_match() override { return body_->min_match(); }
361
0
    virtual int max_match() override { return body_->max_match(); }
362
0
    RegExpTree* body() { return body_; }
363
0
    int index() { return index_; }
364
0
    static int StartRegister(int index) { return index * 2; }
365
0
    static int EndRegister(int index) { return index * 2 + 1; }
366
367
  private:
368
    RegExpTree* body_;
369
    int index_;
370
};
371
372
class RegExpLookahead : public RegExpTree
373
{
374
  public:
375
    RegExpLookahead(RegExpTree* body,
376
                    bool is_positive,
377
                    int capture_count,
378
                    int capture_from)
379
      : body_(body),
380
        is_positive_(is_positive),
381
        capture_count_(capture_count),
382
        capture_from_(capture_from)
383
0
    {}
384
385
    virtual void* Accept(RegExpVisitor* visitor, void* data);
386
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
387
                               RegExpNode* on_success) override;
388
    virtual RegExpLookahead* AsLookahead() override;
389
    virtual Interval CaptureRegisters() override;
390
    virtual bool IsLookahead() override;
391
    virtual bool IsAnchoredAtStart() override;
392
0
    virtual int min_match() override { return 0; }
393
0
    virtual int max_match() override { return 0; }
394
0
    RegExpTree* body() { return body_; }
395
0
    bool is_positive() { return is_positive_; }
396
0
    int capture_count() { return capture_count_; }
397
0
    int capture_from() { return capture_from_; }
398
399
  private:
400
    RegExpTree* body_;
401
    bool is_positive_;
402
    int capture_count_;
403
    int capture_from_;
404
};
405
406
typedef InfallibleVector<RegExpCapture*, 1> RegExpCaptureVector;
407
408
class RegExpBackReference : public RegExpTree
409
{
410
  public:
411
    explicit RegExpBackReference(RegExpCapture* capture)
412
      : capture_(capture)
413
0
    {}
414
415
    virtual void* Accept(RegExpVisitor* visitor, void* data);
416
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
417
                               RegExpNode* on_success) override;
418
    virtual RegExpBackReference* AsBackReference() override;
419
    virtual bool IsBackReference() override;
420
0
    virtual int min_match() override { return 0; }
421
0
    virtual int max_match() override { return capture_->max_match(); }
422
0
    int index() { return capture_->index(); }
423
0
    RegExpCapture* capture() { return capture_; }
424
  private:
425
    RegExpCapture* capture_;
426
};
427
428
class RegExpEmpty : public RegExpTree
429
{
430
  public:
431
    RegExpEmpty()
432
0
    {}
433
434
    virtual void* Accept(RegExpVisitor* visitor, void* data);
435
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
436
                               RegExpNode* on_success) override;
437
    virtual RegExpEmpty* AsEmpty() override;
438
    virtual bool IsEmpty() override;
439
0
    virtual int min_match() override { return 0; }
440
0
    virtual int max_match() override { return 0; }
441
0
    static RegExpEmpty* GetInstance() {
442
0
        static RegExpEmpty instance;
443
0
        return &instance;
444
0
    }
445
};
446
447
} } // namespace js::irregexp
448
449
#endif  // V8_REGEXP_AST_H_