Line data Source code
1 : // Copyright 2012 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
6 : #define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
7 :
8 : #include "src/label.h"
9 : #include "src/regexp/regexp-ast.h"
10 :
11 : namespace v8 {
12 : namespace internal {
13 :
14 : static const uc32 kLeadSurrogateStart = 0xd800;
15 : static const uc32 kLeadSurrogateEnd = 0xdbff;
16 : static const uc32 kTrailSurrogateStart = 0xdc00;
17 : static const uc32 kTrailSurrogateEnd = 0xdfff;
18 : static const uc32 kNonBmpStart = 0x10000;
19 : static const uc32 kNonBmpEnd = 0x10ffff;
20 :
21 : struct DisjunctDecisionRow {
22 : RegExpCharacterClass cc;
23 : Label* on_match;
24 : };
25 :
26 :
27 85361 : class RegExpMacroAssembler {
28 : public:
29 : // The implementation must be able to handle at least:
30 : static const int kMaxRegister = (1 << 16) - 1;
31 : static const int kMaxCPOffset = (1 << 15) - 1;
32 : static const int kMinCPOffset = -(1 << 15);
33 :
34 : static const int kTableSizeBits = 7;
35 : static const int kTableSize = 1 << kTableSizeBits;
36 : static const int kTableMask = kTableSize - 1;
37 :
38 : enum IrregexpImplementation {
39 : kIA32Implementation,
40 : kARMImplementation,
41 : kARM64Implementation,
42 : kMIPSImplementation,
43 : kS390Implementation,
44 : kPPCImplementation,
45 : kX64Implementation,
46 : kX87Implementation,
47 : kBytecodeImplementation
48 : };
49 :
50 : enum StackCheckFlag {
51 : kNoStackLimitCheck = false,
52 : kCheckStackLimit = true
53 : };
54 :
55 : RegExpMacroAssembler(Isolate* isolate, Zone* zone);
56 : virtual ~RegExpMacroAssembler();
57 : // This function is called when code generation is aborted, so that
58 : // the assembler could clean up internal data structures.
59 0 : virtual void AbortedCodeGeneration() {}
60 : // The maximal number of pushes between stack checks. Users must supply
61 : // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck)
62 : // at least once for every stack_limit() pushes that are executed.
63 : virtual int stack_limit_slack() = 0;
64 : virtual bool CanReadUnaligned() = 0;
65 : virtual void AdvanceCurrentPosition(int by) = 0; // Signed cp change.
66 : virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by.
67 : // Continues execution from the position pushed on the top of the backtrack
68 : // stack by an earlier PushBacktrack(Label*).
69 : virtual void Backtrack() = 0;
70 : virtual void Bind(Label* label) = 0;
71 : virtual void CheckAtStart(Label* on_at_start) = 0;
72 : // Dispatch after looking the current character up in a 2-bits-per-entry
73 : // map. The destinations vector has up to 4 labels.
74 : virtual void CheckCharacter(unsigned c, Label* on_equal) = 0;
75 : // Bitwise and the current character with the given constant and then
76 : // check for a match with c.
77 : virtual void CheckCharacterAfterAnd(unsigned c,
78 : unsigned and_with,
79 : Label* on_equal) = 0;
80 : virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
81 : virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
82 : virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
83 : virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start) = 0;
84 : virtual void CheckNotBackReference(int start_reg, bool read_backward,
85 : Label* on_no_match) = 0;
86 : virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
87 : bool read_backward, bool unicode,
88 : Label* on_no_match) = 0;
89 : // Check the current character for a match with a literal character. If we
90 : // fail to match then goto the on_failure label. End of input always
91 : // matches. If the label is nullptr then we should pop a backtrack address
92 : // off the stack and go to that.
93 : virtual void CheckNotCharacter(unsigned c, Label* on_not_equal) = 0;
94 : virtual void CheckNotCharacterAfterAnd(unsigned c,
95 : unsigned and_with,
96 : Label* on_not_equal) = 0;
97 : // Subtract a constant from the current character, then and with the given
98 : // constant and then check for a match with c.
99 : virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
100 : uc16 minus,
101 : uc16 and_with,
102 : Label* on_not_equal) = 0;
103 : virtual void CheckCharacterInRange(uc16 from,
104 : uc16 to, // Both inclusive.
105 : Label* on_in_range) = 0;
106 : virtual void CheckCharacterNotInRange(uc16 from,
107 : uc16 to, // Both inclusive.
108 : Label* on_not_in_range) = 0;
109 :
110 : // The current character (modulus the kTableSize) is looked up in the byte
111 : // array, and if the found byte is non-zero, we jump to the on_bit_set label.
112 : virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) = 0;
113 :
114 : // Checks whether the given offset from the current position is before
115 : // the end of the string. May overwrite the current character.
116 : virtual void CheckPosition(int cp_offset, Label* on_outside_input);
117 : // Check whether a standard/default character class matches the current
118 : // character. Returns false if the type of special character class does
119 : // not have custom support.
120 : // May clobber the current loaded character.
121 : virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match);
122 : virtual void Fail() = 0;
123 : virtual Handle<HeapObject> GetCode(Handle<String> source) = 0;
124 : virtual void GoTo(Label* label) = 0;
125 : // Check whether a register is >= a given constant and go to a label if it
126 : // is. Backtracks instead if the label is nullptr.
127 : virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0;
128 : // Check whether a register is < a given constant and go to a label if it is.
129 : // Backtracks instead if the label is nullptr.
130 : virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
131 : // Check whether a register is == to the current position and go to a
132 : // label if it is.
133 : virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0;
134 : virtual IrregexpImplementation Implementation() = 0;
135 : virtual void LoadCurrentCharacter(int cp_offset,
136 : Label* on_end_of_input,
137 : bool check_bounds = true,
138 : int characters = 1) = 0;
139 : virtual void PopCurrentPosition() = 0;
140 : virtual void PopRegister(int register_index) = 0;
141 : // Pushes the label on the backtrack stack, so that a following Backtrack
142 : // will go to this label. Always checks the backtrack stack limit.
143 : virtual void PushBacktrack(Label* label) = 0;
144 : virtual void PushCurrentPosition() = 0;
145 : virtual void PushRegister(int register_index,
146 : StackCheckFlag check_stack_limit) = 0;
147 : virtual void ReadCurrentPositionFromRegister(int reg) = 0;
148 : virtual void ReadStackPointerFromRegister(int reg) = 0;
149 : virtual void SetCurrentPositionFromEnd(int by) = 0;
150 : virtual void SetRegister(int register_index, int to) = 0;
151 : // Return whether the matching (with a global regexp) will be restarted.
152 : virtual bool Succeed() = 0;
153 : virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
154 : virtual void ClearRegisters(int reg_from, int reg_to) = 0;
155 : virtual void WriteStackPointerToRegister(int reg) = 0;
156 :
157 : // Compares two-byte strings case insensitively.
158 : // Called from generated RegExp code.
159 : static int CaseInsensitiveCompareUC16(Address byte_offset1,
160 : Address byte_offset2,
161 : size_t byte_length, Isolate* isolate);
162 :
163 : // Check that we are not in the middle of a surrogate pair.
164 : void CheckNotInSurrogatePair(int cp_offset, Label* on_failure);
165 :
166 : // Controls the generation of large inlined constants in the code.
167 85312 : void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
168 : bool slow_safe() { return slow_safe_compiler_; }
169 :
170 : enum GlobalMode {
171 : NOT_GLOBAL,
172 : GLOBAL_NO_ZERO_LENGTH_CHECK,
173 : GLOBAL,
174 : GLOBAL_UNICODE
175 : };
176 : // Set whether the regular expression has the global flag. Exiting due to
177 : // a failure in a global regexp may still mean success overall.
178 3819 : inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
179 86831 : inline bool global() { return global_mode_ != NOT_GLOBAL; }
180 : inline bool global_with_zero_length_check() {
181 84914 : return global_mode_ == GLOBAL || global_mode_ == GLOBAL_UNICODE;
182 : }
183 : inline bool global_unicode() { return global_mode_ == GLOBAL_UNICODE; }
184 :
185 : Isolate* isolate() const { return isolate_; }
186 : Zone* zone() const { return zone_; }
187 :
188 : private:
189 : bool slow_safe_compiler_;
190 : GlobalMode global_mode_;
191 : Isolate* isolate_;
192 : Zone* zone_;
193 : };
194 :
195 164198 : class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
196 : public:
197 : // Type of input string to generate code for.
198 : enum Mode { LATIN1 = 1, UC16 = 2 };
199 :
200 : // Result of calling generated native RegExp code.
201 : // RETRY: Something significant changed during execution, and the matching
202 : // should be retried from scratch.
203 : // EXCEPTION: Something failed during execution. If no exception has been
204 : // thrown, it's an internal out-of-memory, and the caller should
205 : // throw the exception.
206 : // FAILURE: Matching failed.
207 : // SUCCESS: Matching succeeded, and the output array has been filled with
208 : // capture positions.
209 : enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
210 :
211 : NativeRegExpMacroAssembler(Isolate* isolate, Zone* zone);
212 : ~NativeRegExpMacroAssembler() override;
213 : bool CanReadUnaligned() override;
214 :
215 : // Returns a {Result} sentinel, or the number of successful matches.
216 : static int Match(Handle<Code> regexp, Handle<String> subject,
217 : int* offsets_vector, int offsets_vector_length,
218 : int previous_index, Isolate* isolate);
219 :
220 : // Called from RegExp if the backtrack stack limit is hit.
221 : // Tries to expand the stack. Returns the new stack-pointer if
222 : // successful, and updates the stack_top address, or returns 0 if unable
223 : // to grow the stack.
224 : // This function must not trigger a garbage collection.
225 : static Address GrowStack(Address stack_pointer, Address* stack_top,
226 : Isolate* isolate);
227 :
228 : static const byte* StringCharacterPosition(
229 : String subject, int start_index, const DisallowHeapAllocation& no_gc);
230 :
231 : static int CheckStackGuardState(Isolate* isolate, int start_index,
232 : bool is_direct_call, Address* return_address,
233 : Code re_code, Address* subject,
234 : const byte** input_start,
235 : const byte** input_end);
236 :
237 : // Byte map of one byte characters with a 0xff if the character is a word
238 : // character (digit, letter or underscore) and 0x00 otherwise.
239 : // Used by generated RegExp code.
240 : static const byte word_character_map[256];
241 :
242 : static Address word_character_map_address() {
243 63558 : return reinterpret_cast<Address>(&word_character_map[0]);
244 : }
245 :
246 : // Returns a {Result} sentinel, or the number of successful matches.
247 : static int Execute(Code code, String input, int start_offset,
248 : const byte* input_start, const byte* input_end,
249 : int* output, int output_size, Isolate* isolate);
250 : };
251 :
252 : } // namespace internal
253 : } // namespace v8
254 :
255 : #endif // V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
|