Line data Source code
1 : // Copyright 2017 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_OBJECTS_JS_REGEXP_H_
6 : #define V8_OBJECTS_JS_REGEXP_H_
7 :
8 : #include "src/objects/js-array.h"
9 :
10 : // Has to be the last include (doesn't have include guards):
11 : #include "src/objects/object-macros.h"
12 :
13 : namespace v8 {
14 : namespace internal {
15 :
16 : // Regular expressions
17 : // The regular expression holds a single reference to a FixedArray in
18 : // the kDataOffset field.
19 : // The FixedArray contains the following data:
20 : // - tag : type of regexp implementation (not compiled yet, atom or irregexp)
21 : // - reference to the original source string
22 : // - reference to the original flag string
23 : // If it is an atom regexp
24 : // - a reference to a literal string to search for
25 : // If it is an irregexp regexp:
26 : // - a reference to code for Latin1 inputs (bytecode or compiled), or a smi
27 : // used for tracking the last usage (used for regexp code flushing).
28 : // - a reference to code for UC16 inputs (bytecode or compiled), or a smi
29 : // used for tracking the last usage (used for regexp code flushing).
30 : // - max number of registers used by irregexp implementations.
31 : // - number of capture registers (output values) of the regexp.
32 : class JSRegExp : public JSObject {
33 : public:
34 : // Meaning of Type:
35 : // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
36 : // ATOM: A simple string to match against using an indexOf operation.
37 : // IRREGEXP: Compiled with Irregexp.
38 : enum Type { NOT_COMPILED, ATOM, IRREGEXP };
39 : struct FlagShiftBit {
40 : static const int kGlobal = 0;
41 : static const int kIgnoreCase = 1;
42 : static const int kMultiline = 2;
43 : static const int kSticky = 3;
44 : static const int kUnicode = 4;
45 : static const int kDotAll = 5;
46 : static const int kInvalid = 7;
47 : };
48 : enum Flag : uint8_t {
49 : kNone = 0,
50 : kGlobal = 1 << FlagShiftBit::kGlobal,
51 : kIgnoreCase = 1 << FlagShiftBit::kIgnoreCase,
52 : kMultiline = 1 << FlagShiftBit::kMultiline,
53 : kSticky = 1 << FlagShiftBit::kSticky,
54 : kUnicode = 1 << FlagShiftBit::kUnicode,
55 : kDotAll = 1 << FlagShiftBit::kDotAll,
56 : // Update FlagCount when adding new flags.
57 : kInvalid = 1 << FlagShiftBit::kInvalid, // Not included in FlagCount.
58 : };
59 : typedef base::Flags<Flag> Flags;
60 : static constexpr int FlagCount() { return 6; }
61 :
62 952 : static int FlagShiftBits(Flag flag) {
63 952 : switch (flag) {
64 : case kGlobal:
65 336 : return FlagShiftBit::kGlobal;
66 : case kIgnoreCase:
67 56 : return FlagShiftBit::kIgnoreCase;
68 : case kMultiline:
69 56 : return FlagShiftBit::kMultiline;
70 : case kSticky:
71 112 : return FlagShiftBit::kSticky;
72 : case kUnicode:
73 336 : return FlagShiftBit::kUnicode;
74 : case kDotAll:
75 56 : return FlagShiftBit::kDotAll;
76 : default:
77 : STATIC_ASSERT(FlagCount() == 6);
78 0 : UNREACHABLE();
79 : }
80 : }
81 :
82 : DECL_ACCESSORS(data, Object)
83 : DECL_ACCESSORS(flags, Object)
84 : DECL_ACCESSORS(last_index, Object)
85 : DECL_ACCESSORS(source, Object)
86 :
87 : V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New(Isolate* isolate,
88 : Handle<String> source,
89 : Flags flags);
90 : static Handle<JSRegExp> Copy(Handle<JSRegExp> regexp);
91 :
92 : static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
93 : Handle<String> source, Flags flags);
94 : static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
95 : Handle<String> source,
96 : Handle<String> flags_string);
97 :
98 : inline Type TypeTag() const;
99 : // Number of captures (without the match itself).
100 : inline int CaptureCount();
101 : inline Flags GetFlags();
102 : inline String Pattern();
103 : inline Object CaptureNameMap();
104 : inline Object DataAt(int index) const;
105 : // Set implementation data after the object has been prepared.
106 : inline void SetDataAt(int index, Object value);
107 :
108 : static int code_index(bool is_latin1) {
109 8952394 : if (is_latin1) {
110 : return kIrregexpLatin1CodeIndex;
111 : } else {
112 : return kIrregexpUC16CodeIndex;
113 : }
114 : }
115 :
116 : inline bool HasCompiledCode() const;
117 : inline void DiscardCompiledCodeForSerialization();
118 :
119 : DECL_CAST(JSRegExp)
120 :
121 : // Dispatched behavior.
122 : DECL_PRINTER(JSRegExp)
123 : DECL_VERIFIER(JSRegExp)
124 :
125 : // Layout description.
126 : #define JS_REGEXP_FIELDS(V) \
127 : V(kDataOffset, kTaggedSize) \
128 : V(kSourceOffset, kTaggedSize) \
129 : V(kFlagsOffset, kTaggedSize) \
130 : /* Total size. */ \
131 : V(kSize, 0) \
132 : /* This is already an in-object field. */ \
133 : V(kLastIndexOffset, 0)
134 :
135 : DEFINE_FIELD_OFFSET_CONSTANTS(JSObject::kHeaderSize, JS_REGEXP_FIELDS)
136 : #undef JS_REGEXP_FIELDS
137 :
138 : // Indices in the data array.
139 : static const int kTagIndex = 0;
140 : static const int kSourceIndex = kTagIndex + 1;
141 : static const int kFlagsIndex = kSourceIndex + 1;
142 : static const int kDataIndex = kFlagsIndex + 1;
143 : // The data fields are used in different ways depending on the
144 : // value of the tag.
145 : // Atom regexps (literal strings).
146 : static const int kAtomPatternIndex = kDataIndex;
147 :
148 : static const int kAtomDataSize = kAtomPatternIndex + 1;
149 :
150 : // Irregexp compiled code or bytecode for Latin1. If compilation
151 : // fails, this fields hold an exception object that should be
152 : // thrown if the regexp is used again.
153 : static const int kIrregexpLatin1CodeIndex = kDataIndex;
154 : // Irregexp compiled code or bytecode for UC16. If compilation
155 : // fails, this fields hold an exception object that should be
156 : // thrown if the regexp is used again.
157 : static const int kIrregexpUC16CodeIndex = kDataIndex + 1;
158 : // Maximal number of registers used by either Latin1 or UC16.
159 : // Only used to check that there is enough stack space
160 : static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 2;
161 : // Number of captures in the compiled regexp.
162 : static const int kIrregexpCaptureCountIndex = kDataIndex + 3;
163 : // Maps names of named capture groups (at indices 2i) to their corresponding
164 : // (1-based) capture group indices (at indices 2i + 1).
165 : static const int kIrregexpCaptureNameMapIndex = kDataIndex + 4;
166 :
167 : static const int kIrregexpDataSize = kIrregexpCaptureNameMapIndex + 1;
168 :
169 : // In-object fields.
170 : static const int kLastIndexFieldIndex = 0;
171 : static const int kInObjectFieldCount = 1;
172 :
173 : // Descriptor array index to important methods in the prototype.
174 : static const int kExecFunctionDescriptorIndex = 1;
175 : static const int kSymbolMatchFunctionDescriptorIndex = 13;
176 : static const int kSymbolReplaceFunctionDescriptorIndex = 14;
177 : static const int kSymbolSearchFunctionDescriptorIndex = 15;
178 : static const int kSymbolSplitFunctionDescriptorIndex = 16;
179 : static const int kSymbolMatchAllFunctionDescriptorIndex = 17;
180 :
181 : // The uninitialized value for a regexp code object.
182 : static const int kUninitializedValue = -1;
183 :
184 329630 : OBJECT_CONSTRUCTORS(JSRegExp, JSObject);
185 : };
186 :
187 504 : DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)
188 :
189 : // JSRegExpResult is just a JSArray with a specific initial map.
190 : // This initial map adds in-object properties for "index" and "input"
191 : // properties, as assigned by RegExp.prototype.exec, which allows
192 : // faster creation of RegExp exec results.
193 : // This class just holds constants used when creating the result.
194 : // After creation the result must be treated as a JSArray in all regards.
195 : class JSRegExpResult : public JSArray {
196 : public:
197 : // Layout description.
198 : #define REG_EXP_RESULT_FIELDS(V) \
199 : V(kIndexOffset, kTaggedSize) \
200 : V(kInputOffset, kTaggedSize) \
201 : V(kGroupsOffset, kTaggedSize) \
202 : /* Total size. */ \
203 : V(kSize, 0)
204 :
205 : DEFINE_FIELD_OFFSET_CONSTANTS(JSArray::kSize, REG_EXP_RESULT_FIELDS)
206 : #undef REG_EXP_RESULT_FIELDS
207 :
208 : // Indices of in-object properties.
209 : static const int kIndexIndex = 0;
210 : static const int kInputIndex = 1;
211 : static const int kGroupsIndex = 2;
212 : static const int kInObjectPropertyCount = 3;
213 :
214 : private:
215 : DISALLOW_IMPLICIT_CONSTRUCTORS(JSRegExpResult);
216 : };
217 :
218 : } // namespace internal
219 : } // namespace v8
220 :
221 : #include "src/objects/object-macros-undef.h"
222 :
223 : #endif // V8_OBJECTS_JS_REGEXP_H_
|