Line data Source code
1 : // Copyright 2017 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_OBJECTS_JS_REGEXP_H_
6 : #define V8_OBJECTS_JS_REGEXP_H_
7 :
8 : #include "src/objects/js-array.h"
9 :
10 : // Has to be the last include (doesn't have include guards):
11 : #include "src/objects/object-macros.h"
12 :
13 : namespace v8 {
14 : namespace internal {
15 :
16 : // Regular expressions
17 : // The regular expression holds a single reference to a FixedArray in
18 : // the kDataOffset field.
19 : // The FixedArray contains the following data:
20 : // - tag : type of regexp implementation (not compiled yet, atom or irregexp)
21 : // - reference to the original source string
22 : // - reference to the original flag string
23 : // If it is an atom regexp
24 : // - a reference to a literal string to search for
25 : // If it is an irregexp regexp:
26 : // - a reference to code for Latin1 inputs (bytecode or compiled), or a smi
27 : // used for tracking the last usage (used for regexp code flushing).
28 : // - a reference to code for UC16 inputs (bytecode or compiled), or a smi
29 : // used for tracking the last usage (used for regexp code flushing).
30 : // - max number of registers used by irregexp implementations.
31 : // - number of capture registers (output values) of the regexp.
32 : class JSRegExp : public JSObject {
33 : public:
34 : // Meaning of Type:
35 : // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
36 : // ATOM: A simple string to match against using an indexOf operation.
37 : // IRREGEXP: Compiled with Irregexp.
38 : enum Type { NOT_COMPILED, ATOM, IRREGEXP };
39 : struct FlagShiftBit {
40 : static const int kGlobal = 0;
41 : static const int kIgnoreCase = 1;
42 : static const int kMultiline = 2;
43 : static const int kSticky = 3;
44 : static const int kUnicode = 4;
45 : static const int kDotAll = 5;
46 : static const int kInvalid = 7;
47 : };
48 : enum Flag : uint8_t {
49 : kNone = 0,
50 : kGlobal = 1 << FlagShiftBit::kGlobal,
51 : kIgnoreCase = 1 << FlagShiftBit::kIgnoreCase,
52 : kMultiline = 1 << FlagShiftBit::kMultiline,
53 : kSticky = 1 << FlagShiftBit::kSticky,
54 : kUnicode = 1 << FlagShiftBit::kUnicode,
55 : kDotAll = 1 << FlagShiftBit::kDotAll,
56 : // Update FlagCount when adding new flags.
57 : kInvalid = 1 << FlagShiftBit::kInvalid, // Not included in FlagCount.
58 : };
59 : using Flags = base::Flags<Flag>;
60 : static constexpr int FlagCount() { return 6; }
61 :
62 952 : static int FlagShiftBits(Flag flag) {
63 952 : switch (flag) {
64 : case kGlobal:
65 : return FlagShiftBit::kGlobal;
66 : case kIgnoreCase:
67 56 : return FlagShiftBit::kIgnoreCase;
68 : case kMultiline:
69 56 : return FlagShiftBit::kMultiline;
70 : case kSticky:
71 112 : return FlagShiftBit::kSticky;
72 : case kUnicode:
73 336 : return FlagShiftBit::kUnicode;
74 : case kDotAll:
75 56 : return FlagShiftBit::kDotAll;
76 : default:
77 : STATIC_ASSERT(FlagCount() == 6);
78 0 : UNREACHABLE();
79 : }
80 : }
81 :
82 : DECL_ACCESSORS(data, Object)
83 : DECL_ACCESSORS(flags, Object)
84 : DECL_ACCESSORS(last_index, Object)
85 : DECL_ACCESSORS(source, Object)
86 :
87 : V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New(Isolate* isolate,
88 : Handle<String> source,
89 : Flags flags);
90 : static Handle<JSRegExp> Copy(Handle<JSRegExp> regexp);
91 :
92 : static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
93 : Handle<String> source, Flags flags);
94 : static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
95 : Handle<String> source,
96 : Handle<String> flags_string);
97 :
98 : inline Type TypeTag() const;
99 : // Number of captures (without the match itself).
100 : inline int CaptureCount();
101 : inline Flags GetFlags();
102 : inline String Pattern();
103 : inline Object CaptureNameMap();
104 : inline Object DataAt(int index) const;
105 : // Set implementation data after the object has been prepared.
106 : inline void SetDataAt(int index, Object value);
107 :
108 : static int code_index(bool is_latin1) {
109 8948084 : if (is_latin1) {
110 : return kIrregexpLatin1CodeIndex;
111 : } else {
112 : return kIrregexpUC16CodeIndex;
113 : }
114 : }
115 :
116 : inline bool HasCompiledCode() const;
117 : inline void DiscardCompiledCodeForSerialization();
118 :
119 : DECL_CAST(JSRegExp)
120 :
121 : // Dispatched behavior.
122 : DECL_PRINTER(JSRegExp)
123 : DECL_VERIFIER(JSRegExp)
124 :
125 : // Layout description.
126 : DEFINE_FIELD_OFFSET_CONSTANTS(JSObject::kHeaderSize,
127 : TORQUE_GENERATED_JSREG_EXP_FIELDS)
128 : /* This is already an in-object field. */
129 : // TODO(v8:8944): improve handling of in-object fields
130 : static constexpr int kLastIndexOffset = kSize;
131 :
132 : // Indices in the data array.
133 : static const int kTagIndex = 0;
134 : static const int kSourceIndex = kTagIndex + 1;
135 : static const int kFlagsIndex = kSourceIndex + 1;
136 : static const int kDataIndex = kFlagsIndex + 1;
137 : // The data fields are used in different ways depending on the
138 : // value of the tag.
139 : // Atom regexps (literal strings).
140 : static const int kAtomPatternIndex = kDataIndex;
141 :
142 : static const int kAtomDataSize = kAtomPatternIndex + 1;
143 :
144 : // Irregexp compiled code or bytecode for Latin1. If compilation
145 : // fails, this fields hold an exception object that should be
146 : // thrown if the regexp is used again.
147 : static const int kIrregexpLatin1CodeIndex = kDataIndex;
148 : // Irregexp compiled code or bytecode for UC16. If compilation
149 : // fails, this fields hold an exception object that should be
150 : // thrown if the regexp is used again.
151 : static const int kIrregexpUC16CodeIndex = kDataIndex + 1;
152 : // Maximal number of registers used by either Latin1 or UC16.
153 : // Only used to check that there is enough stack space
154 : static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 2;
155 : // Number of captures in the compiled regexp.
156 : static const int kIrregexpCaptureCountIndex = kDataIndex + 3;
157 : // Maps names of named capture groups (at indices 2i) to their corresponding
158 : // (1-based) capture group indices (at indices 2i + 1).
159 : static const int kIrregexpCaptureNameMapIndex = kDataIndex + 4;
160 :
161 : static const int kIrregexpDataSize = kIrregexpCaptureNameMapIndex + 1;
162 :
163 : // In-object fields.
164 : static const int kLastIndexFieldIndex = 0;
165 : static const int kInObjectFieldCount = 1;
166 :
167 : // Descriptor array index to important methods in the prototype.
168 : static const int kExecFunctionDescriptorIndex = 1;
169 : static const int kSymbolMatchFunctionDescriptorIndex = 13;
170 : static const int kSymbolReplaceFunctionDescriptorIndex = 14;
171 : static const int kSymbolSearchFunctionDescriptorIndex = 15;
172 : static const int kSymbolSplitFunctionDescriptorIndex = 16;
173 : static const int kSymbolMatchAllFunctionDescriptorIndex = 17;
174 :
175 : // The uninitialized value for a regexp code object.
176 : static const int kUninitializedValue = -1;
177 :
178 : OBJECT_CONSTRUCTORS(JSRegExp, JSObject);
179 : };
180 :
181 : DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)
182 :
183 : // JSRegExpResult is just a JSArray with a specific initial map.
184 : // This initial map adds in-object properties for "index" and "input"
185 : // properties, as assigned by RegExp.prototype.exec, which allows
186 : // faster creation of RegExp exec results.
187 : // This class just holds constants used when creating the result.
188 : // After creation the result must be treated as a JSArray in all regards.
189 : class JSRegExpResult : public JSArray {
190 : public:
191 : // Layout description.
192 : DEFINE_FIELD_OFFSET_CONSTANTS(JSArray::kSize,
193 : TORQUE_GENERATED_JSREG_EXP_RESULT_FIELDS)
194 :
195 : // Indices of in-object properties.
196 : static const int kIndexIndex = 0;
197 : static const int kInputIndex = 1;
198 : static const int kGroupsIndex = 2;
199 : static const int kInObjectPropertyCount = 3;
200 :
201 : private:
202 : DISALLOW_IMPLICIT_CONSTRUCTORS(JSRegExpResult);
203 : };
204 :
205 : } // namespace internal
206 : } // namespace v8
207 :
208 : #include "src/objects/object-macros-undef.h"
209 :
210 : #endif // V8_OBJECTS_JS_REGEXP_H_
|