Line data Source code
1 : // Copyright 2011 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : // A simple interpreter for the Irregexp byte code.
6 :
7 : #include "src/regexp/interpreter-irregexp.h"
8 :
9 : #include "src/ast/ast.h"
10 : #include "src/objects-inl.h"
11 : #include "src/regexp/bytecodes-irregexp.h"
12 : #include "src/regexp/jsregexp.h"
13 : #include "src/regexp/regexp-macro-assembler.h"
14 : #include "src/unicode.h"
15 : #include "src/utils.h"
16 :
17 : #ifdef V8_INTL_SUPPORT
18 : #include "unicode/uchar.h"
19 : #endif // V8_INTL_SUPPORT
20 :
21 : namespace v8 {
22 : namespace internal {
23 :
24 : static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
25 : int len, Vector<const uc16> subject,
26 : bool unicode) {
27 : Address offset_a =
28 92450 : reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
29 : Address offset_b =
30 92450 : reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
31 46225 : size_t length = len * kUC16Size;
32 46225 : return RegExpMacroAssembler::CaseInsensitiveCompareUC16(
33 : offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
34 : }
35 :
36 :
37 36835 : static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
38 : int len, Vector<const uint8_t> subject,
39 : bool unicode) {
40 : // For Latin1 characters the unicode flag makes no difference.
41 43097 : for (int i = 0; i < len; i++) {
42 73712 : unsigned int old_char = subject[from++];
43 73712 : unsigned int new_char = subject[current++];
44 36856 : if (old_char == new_char) continue;
45 : // Convert both characters to lower case.
46 33858 : old_char |= 0x20;
47 33858 : new_char |= 0x20;
48 33858 : if (old_char != new_char) return false;
49 : // Not letters in the ASCII range and Latin-1 range.
50 456 : if (!(old_char - 'a' <= 'z' - 'a') &&
51 177 : !(old_char - 224 <= 254 - 224 && old_char != 247)) {
52 : return false;
53 : }
54 : }
55 : return true;
56 : }
57 :
58 :
59 : #ifdef DEBUG
60 : static void TraceInterpreter(const byte* code_base,
61 : const byte* pc,
62 : int stack_depth,
63 : int current_position,
64 : uint32_t current_char,
65 : int bytecode_length,
66 : const char* bytecode_name) {
67 : if (FLAG_trace_regexp_bytecodes) {
68 : bool printable = (current_char < 127 && current_char >= 32);
69 : const char* format =
70 : printable ?
71 : "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
72 : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
73 : PrintF(format,
74 : pc - code_base,
75 : stack_depth,
76 : current_position,
77 : current_char,
78 : printable ? current_char : '.',
79 : bytecode_name);
80 : for (int i = 0; i < bytecode_length; i++) {
81 : printf(", %02x", pc[i]);
82 : }
83 : printf(" ");
84 : for (int i = 1; i < bytecode_length; i++) {
85 : unsigned char b = pc[i];
86 : if (b < 127 && b >= 32) {
87 : printf("%c", b);
88 : } else {
89 : printf(".");
90 : }
91 : }
92 : printf("\n");
93 : }
94 : }
95 :
96 :
97 : #define BYTECODE(name) \
98 : case BC_##name: \
99 : TraceInterpreter(code_base, \
100 : pc, \
101 : static_cast<int>(backtrack_sp - backtrack_stack_base), \
102 : current, \
103 : current_char, \
104 : BC_##name##_LENGTH, \
105 : #name);
106 : #else
107 : #define BYTECODE(name) \
108 : case BC_##name:
109 : #endif
110 :
111 :
112 : static int32_t Load32Aligned(const byte* pc) {
113 : DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
114 2588642716 : return *reinterpret_cast<const int32_t *>(pc);
115 : }
116 :
117 :
118 : static int32_t Load16Aligned(const byte* pc) {
119 : DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
120 13474804 : return *reinterpret_cast<const uint16_t *>(pc);
121 : }
122 :
123 :
124 : // A simple abstraction over the backtracking stack used by the interpreter.
125 : // This backtracking stack does not grow automatically, but it ensures that the
126 : // the memory held by the stack is released or remembered in a cache if the
127 : // matching terminates.
128 : class BacktrackStack {
129 : public:
130 4282055 : BacktrackStack() { data_ = NewArray<int>(kBacktrackStackSize); }
131 :
132 : ~BacktrackStack() {
133 : DeleteArray(data_);
134 : }
135 :
136 : int* data() const { return data_; }
137 :
138 : int max_size() const { return kBacktrackStackSize; }
139 :
140 : private:
141 : static const int kBacktrackStackSize = 10000;
142 :
143 : int* data_;
144 :
145 : DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
146 : };
147 :
148 : namespace {
149 :
150 : IrregexpInterpreter::Result StackOverflow(Isolate* isolate) {
151 : // We abort interpreter execution after the stack overflow is thrown, and thus
152 : // allow allocation here despite the outer DisallowHeapAllocationScope.
153 : AllowHeapAllocation yes_gc;
154 4 : isolate->StackOverflow();
155 : return IrregexpInterpreter::EXCEPTION;
156 : }
157 :
158 : // Runs all pending interrupts. Callers must update unhandlified object
159 : // references after this function completes.
160 1382078 : IrregexpInterpreter::Result HandleInterrupts(Isolate* isolate,
161 : Handle<String> subject_string) {
162 : DisallowHeapAllocation no_gc;
163 :
164 : StackLimitCheck check(isolate);
165 1382078 : if (check.JsHasOverflowed()) {
166 : // A real stack overflow.
167 2 : return StackOverflow(isolate);
168 : }
169 :
170 : const bool was_one_byte =
171 1382076 : String::IsOneByteRepresentationUnderneath(*subject_string);
172 :
173 : Object result;
174 : {
175 : AllowHeapAllocation yes_gc;
176 1382076 : result = isolate->stack_guard()->HandleInterrupts();
177 : }
178 :
179 1382076 : if (result->IsException(isolate)) {
180 : return IrregexpInterpreter::EXCEPTION;
181 : }
182 :
183 : // If we changed between a LATIN1 and a UC16 string, we need to restart
184 : // regexp matching with the appropriate template instantiation of RawMatch.
185 1382072 : if (String::IsOneByteRepresentationUnderneath(*subject_string) !=
186 : was_one_byte) {
187 : return IrregexpInterpreter::RETRY;
188 : }
189 :
190 1382071 : return IrregexpInterpreter::SUCCESS;
191 : }
192 :
193 : template <typename Char>
194 1382071 : void UpdateCodeAndSubjectReferences(Isolate* isolate,
195 : Handle<ByteArray> code_array,
196 : Handle<String> subject_string,
197 : const byte** code_base_out,
198 : const byte** pc_out,
199 : Vector<const Char>* subject_string_out) {
200 : DisallowHeapAllocation no_gc;
201 :
202 2764142 : if (*code_base_out != code_array->GetDataStartAddress()) {
203 1 : const intptr_t pc_offset = *pc_out - *code_base_out;
204 : DCHECK_GT(pc_offset, 0);
205 1 : *code_base_out = code_array->GetDataStartAddress();
206 1 : *pc_out = *code_base_out + pc_offset;
207 : }
208 :
209 : DCHECK(subject_string->IsFlat());
210 2764142 : *subject_string_out = subject_string->GetCharVector<Char>(no_gc);
211 1382071 : }
212 :
213 : template <typename Char>
214 4282055 : IrregexpInterpreter::Result RawMatch(Isolate* isolate,
215 : Handle<ByteArray> code_array,
216 : Handle<String> subject_string,
217 : Vector<const Char> subject, int* registers,
218 : int current, uint32_t current_char) {
219 : DisallowHeapAllocation no_gc;
220 :
221 4282055 : const byte* pc = code_array->GetDataStartAddress();
222 4282055 : const byte* code_base = pc;
223 :
224 : // BacktrackStack ensures that the memory allocated for the backtracking stack
225 : // is returned to the system or cached if there is no stack being cached at
226 : // the moment.
227 : BacktrackStack backtrack_stack;
228 : int* backtrack_stack_base = backtrack_stack.data();
229 : int* backtrack_sp = backtrack_stack_base;
230 : int backtrack_stack_space = backtrack_stack.max_size();
231 : #ifdef DEBUG
232 : if (FLAG_trace_regexp_bytecodes) {
233 : PrintF("\n\nStart bytecode interpreter\n\n");
234 : }
235 : #endif
236 : while (true) {
237 1959499446 : int32_t insn = Load32Aligned(pc);
238 1959499446 : switch (insn & BYTECODE_MASK) {
239 : BYTECODE(BREAK)
240 0 : UNREACHABLE();
241 : BYTECODE(PUSH_CP)
242 5507694 : if (--backtrack_stack_space < 0) {
243 1 : return StackOverflow(isolate);
244 : }
245 5507693 : *backtrack_sp++ = current;
246 5507693 : pc += BC_PUSH_CP_LENGTH;
247 5507693 : break;
248 : BYTECODE(PUSH_BT)
249 9659743 : if (--backtrack_stack_space < 0) {
250 0 : return StackOverflow(isolate);
251 : }
252 19319486 : *backtrack_sp++ = Load32Aligned(pc + 4);
253 9659743 : pc += BC_PUSH_BT_LENGTH;
254 9659743 : break;
255 : BYTECODE(PUSH_REGISTER)
256 1351377 : if (--backtrack_stack_space < 0) {
257 1 : return StackOverflow(isolate);
258 : }
259 1351376 : *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
260 1351376 : pc += BC_PUSH_REGISTER_LENGTH;
261 1351376 : break;
262 : BYTECODE(SET_REGISTER)
263 2860240 : registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
264 1430120 : pc += BC_SET_REGISTER_LENGTH;
265 1430120 : break;
266 : BYTECODE(ADVANCE_REGISTER)
267 110718 : registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
268 110718 : pc += BC_ADVANCE_REGISTER_LENGTH;
269 110718 : break;
270 : BYTECODE(SET_REGISTER_TO_CP)
271 20045366 : registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
272 10022683 : pc += BC_SET_REGISTER_TO_CP_LENGTH;
273 10022683 : break;
274 : BYTECODE(SET_CP_TO_REGISTER)
275 3168 : current = registers[insn >> BYTECODE_SHIFT];
276 3168 : pc += BC_SET_CP_TO_REGISTER_LENGTH;
277 3168 : break;
278 : BYTECODE(SET_REGISTER_TO_SP)
279 13147 : registers[insn >> BYTECODE_SHIFT] =
280 : static_cast<int>(backtrack_sp - backtrack_stack_base);
281 13147 : pc += BC_SET_REGISTER_TO_SP_LENGTH;
282 13147 : break;
283 : BYTECODE(SET_SP_TO_REGISTER)
284 3168 : backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
285 3168 : backtrack_stack_space = backtrack_stack.max_size() -
286 : static_cast<int>(backtrack_sp - backtrack_stack_base);
287 3168 : pc += BC_SET_SP_TO_REGISTER_LENGTH;
288 3168 : break;
289 : BYTECODE(POP_CP)
290 950667 : backtrack_stack_space++;
291 950667 : --backtrack_sp;
292 950667 : current = *backtrack_sp;
293 950667 : pc += BC_POP_CP_LENGTH;
294 950667 : break;
295 : // clang-format off
296 : BYTECODE(POP_BT) {
297 : IrregexpInterpreter::Result return_code = HandleInterrupts(
298 1382078 : isolate, subject_string);
299 1382078 : if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
300 :
301 1382071 : UpdateCodeAndSubjectReferences(isolate, code_array, subject_string,
302 : &code_base, &pc, &subject);
303 :
304 1382071 : backtrack_stack_space++;
305 1382071 : --backtrack_sp;
306 1382071 : pc = code_base + *backtrack_sp;
307 1382071 : break;
308 : }
309 : BYTECODE(POP_REGISTER) // clang-format on
310 1324910 : backtrack_stack_space++;
311 1324910 : --backtrack_sp;
312 1324910 : registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
313 1324910 : pc += BC_POP_REGISTER_LENGTH;
314 1324910 : break;
315 : BYTECODE(FAIL)
316 : return IrregexpInterpreter::FAILURE;
317 : BYTECODE(SUCCEED)
318 4107355 : return IrregexpInterpreter::SUCCESS;
319 : BYTECODE(ADVANCE_CP)
320 4201831 : current += insn >> BYTECODE_SHIFT;
321 4201831 : pc += BC_ADVANCE_CP_LENGTH;
322 4201831 : break;
323 : BYTECODE(GOTO)
324 4707710 : pc = code_base + Load32Aligned(pc + 4);
325 2353855 : break;
326 : BYTECODE(ADVANCE_CP_AND_GOTO)
327 592706820 : current += insn >> BYTECODE_SHIFT;
328 1185413640 : pc = code_base + Load32Aligned(pc + 4);
329 592706820 : break;
330 : BYTECODE(CHECK_GREEDY)
331 625672 : if (current == backtrack_sp[-1]) {
332 369765 : backtrack_sp--;
333 369765 : backtrack_stack_space++;
334 739530 : pc = code_base + Load32Aligned(pc + 4);
335 : } else {
336 255907 : pc += BC_CHECK_GREEDY_LENGTH;
337 : }
338 : break;
339 : BYTECODE(LOAD_CURRENT_CHAR) {
340 602927587 : int pos = current + (insn >> BYTECODE_SHIFT);
341 602927587 : if (pos >= subject.length() || pos < 0) {
342 1888678 : pc = code_base + Load32Aligned(pc + 4);
343 : } else {
344 1203966496 : current_char = subject[pos];
345 601983248 : pc += BC_LOAD_CURRENT_CHAR_LENGTH;
346 : }
347 : break;
348 : }
349 : BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
350 3174803 : int pos = current + (insn >> BYTECODE_SHIFT);
351 6349606 : current_char = subject[pos];
352 3174803 : pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
353 3174803 : break;
354 : }
355 : BYTECODE(LOAD_2_CURRENT_CHARS) {
356 0 : int pos = current + (insn >> BYTECODE_SHIFT);
357 0 : if (pos + 2 > subject.length() || pos < 0) {
358 0 : pc = code_base + Load32Aligned(pc + 4);
359 : } else {
360 0 : Char next = subject[pos + 1];
361 0 : current_char =
362 0 : (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
363 0 : pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
364 : }
365 : break;
366 : }
367 : BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
368 0 : int pos = current + (insn >> BYTECODE_SHIFT);
369 0 : Char next = subject[pos + 1];
370 0 : current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
371 0 : pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
372 0 : break;
373 : }
374 : BYTECODE(LOAD_4_CURRENT_CHARS) {
375 : DCHECK_EQ(1, sizeof(Char));
376 0 : int pos = current + (insn >> BYTECODE_SHIFT);
377 0 : if (pos + 4 > subject.length() || pos < 0) {
378 0 : pc = code_base + Load32Aligned(pc + 4);
379 : } else {
380 0 : Char next1 = subject[pos + 1];
381 0 : Char next2 = subject[pos + 2];
382 0 : Char next3 = subject[pos + 3];
383 0 : current_char = (subject[pos] |
384 : (next1 << 8) |
385 : (next2 << 16) |
386 : (next3 << 24));
387 0 : pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
388 : }
389 : break;
390 : }
391 : BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
392 : DCHECK_EQ(1, sizeof(Char));
393 0 : int pos = current + (insn >> BYTECODE_SHIFT);
394 0 : Char next1 = subject[pos + 1];
395 0 : Char next2 = subject[pos + 2];
396 0 : Char next3 = subject[pos + 3];
397 0 : current_char = (subject[pos] |
398 : (next1 << 8) |
399 : (next2 << 16) |
400 : (next3 << 24));
401 0 : pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
402 0 : break;
403 : }
404 : BYTECODE(CHECK_4_CHARS) {
405 0 : uint32_t c = Load32Aligned(pc + 4);
406 0 : if (c == current_char) {
407 0 : pc = code_base + Load32Aligned(pc + 8);
408 : } else {
409 0 : pc += BC_CHECK_4_CHARS_LENGTH;
410 : }
411 : break;
412 : }
413 : BYTECODE(CHECK_CHAR) {
414 698766061 : uint32_t c = (insn >> BYTECODE_SHIFT);
415 698766061 : if (c == current_char) {
416 3965934 : pc = code_base + Load32Aligned(pc + 4);
417 : } else {
418 696783094 : pc += BC_CHECK_CHAR_LENGTH;
419 : }
420 : break;
421 : }
422 : BYTECODE(CHECK_NOT_4_CHARS) {
423 0 : uint32_t c = Load32Aligned(pc + 4);
424 0 : if (c != current_char) {
425 0 : pc = code_base + Load32Aligned(pc + 8);
426 : } else {
427 0 : pc += BC_CHECK_NOT_4_CHARS_LENGTH;
428 : }
429 : break;
430 : }
431 : BYTECODE(CHECK_NOT_CHAR) {
432 1382490 : uint32_t c = (insn >> BYTECODE_SHIFT);
433 1382490 : if (c != current_char) {
434 814694 : pc = code_base + Load32Aligned(pc + 4);
435 : } else {
436 975143 : pc += BC_CHECK_NOT_CHAR_LENGTH;
437 : }
438 : break;
439 : }
440 : BYTECODE(AND_CHECK_4_CHARS) {
441 0 : uint32_t c = Load32Aligned(pc + 4);
442 0 : if (c == (current_char & Load32Aligned(pc + 8))) {
443 0 : pc = code_base + Load32Aligned(pc + 12);
444 : } else {
445 0 : pc += BC_AND_CHECK_4_CHARS_LENGTH;
446 : }
447 : break;
448 : }
449 : BYTECODE(AND_CHECK_CHAR) {
450 2196909 : uint32_t c = (insn >> BYTECODE_SHIFT);
451 2196909 : if (c == (current_char & Load32Aligned(pc + 4))) {
452 362488 : pc = code_base + Load32Aligned(pc + 8);
453 : } else {
454 2015665 : pc += BC_AND_CHECK_CHAR_LENGTH;
455 : }
456 : break;
457 : }
458 : BYTECODE(AND_CHECK_NOT_4_CHARS) {
459 0 : uint32_t c = Load32Aligned(pc + 4);
460 0 : if (c != (current_char & Load32Aligned(pc + 8))) {
461 0 : pc = code_base + Load32Aligned(pc + 12);
462 : } else {
463 0 : pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
464 : }
465 : break;
466 : }
467 : BYTECODE(AND_CHECK_NOT_CHAR) {
468 118725 : uint32_t c = (insn >> BYTECODE_SHIFT);
469 118725 : if (c != (current_char & Load32Aligned(pc + 4))) {
470 3348 : pc = code_base + Load32Aligned(pc + 8);
471 : } else {
472 117051 : pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
473 : }
474 : break;
475 : }
476 : BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
477 14 : uint32_t c = (insn >> BYTECODE_SHIFT);
478 14 : uint32_t minus = Load16Aligned(pc + 4);
479 14 : uint32_t mask = Load16Aligned(pc + 6);
480 14 : if (c != ((current_char - minus) & mask)) {
481 0 : pc = code_base + Load32Aligned(pc + 8);
482 : } else {
483 14 : pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
484 : }
485 : break;
486 : }
487 : BYTECODE(CHECK_CHAR_IN_RANGE) {
488 2420021 : uint32_t from = Load16Aligned(pc + 4);
489 2420021 : uint32_t to = Load16Aligned(pc + 6);
490 2420021 : if (from <= current_char && current_char <= to) {
491 4276736 : pc = code_base + Load32Aligned(pc + 8);
492 : } else {
493 281653 : pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
494 : }
495 : break;
496 : }
497 : BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
498 4317367 : uint32_t from = Load16Aligned(pc + 4);
499 4317367 : uint32_t to = Load16Aligned(pc + 6);
500 4317367 : if (from > current_char || current_char > to) {
501 178476 : pc = code_base + Load32Aligned(pc + 8);
502 : } else {
503 4228129 : pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
504 : }
505 : break;
506 : }
507 : BYTECODE(CHECK_BIT_IN_TABLE) {
508 : int mask = RegExpMacroAssembler::kTableMask;
509 609326 : byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
510 609326 : int bit = (current_char & (kBitsPerByte - 1));
511 609326 : if ((b & (1 << bit)) != 0) {
512 830310 : pc = code_base + Load32Aligned(pc + 4);
513 : } else {
514 194171 : pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
515 : }
516 : break;
517 : }
518 : BYTECODE(CHECK_LT) {
519 2900086 : uint32_t limit = (insn >> BYTECODE_SHIFT);
520 2900086 : if (current_char < limit) {
521 218572 : pc = code_base + Load32Aligned(pc + 4);
522 : } else {
523 2790800 : pc += BC_CHECK_LT_LENGTH;
524 : }
525 : break;
526 : }
527 : BYTECODE(CHECK_GT) {
528 4157769 : uint32_t limit = (insn >> BYTECODE_SHIFT);
529 4157769 : if (current_char > limit) {
530 7028662 : pc = code_base + Load32Aligned(pc + 4);
531 : } else {
532 643438 : pc += BC_CHECK_GT_LENGTH;
533 : }
534 : break;
535 : }
536 : BYTECODE(CHECK_REGISTER_LT)
537 5656 : if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
538 312 : pc = code_base + Load32Aligned(pc + 8);
539 : } else {
540 2672 : pc += BC_CHECK_REGISTER_LT_LENGTH;
541 : }
542 : break;
543 : BYTECODE(CHECK_REGISTER_GE)
544 9688 : if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
545 4308 : pc = code_base + Load32Aligned(pc + 8);
546 : } else {
547 2690 : pc += BC_CHECK_REGISTER_GE_LENGTH;
548 : }
549 : break;
550 : BYTECODE(CHECK_REGISTER_EQ_POS)
551 456212 : if (registers[insn >> BYTECODE_SHIFT] == current) {
552 544842 : pc = code_base + Load32Aligned(pc + 4);
553 : } else {
554 183791 : pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
555 : }
556 : break;
557 : BYTECODE(CHECK_NOT_REGS_EQUAL)
558 0 : if (registers[insn >> BYTECODE_SHIFT] ==
559 : registers[Load32Aligned(pc + 4)]) {
560 0 : pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
561 : } else {
562 0 : pc = code_base + Load32Aligned(pc + 8);
563 : }
564 : break;
565 : BYTECODE(CHECK_NOT_BACK_REF) {
566 3229 : int from = registers[insn >> BYTECODE_SHIFT];
567 3229 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
568 3229 : if (from >= 0 && len > 0) {
569 2224 : if (current + len > subject.length() ||
570 1408 : CompareChars(&subject[from], &subject[current], len) != 0) {
571 334 : pc = code_base + Load32Aligned(pc + 4);
572 167 : break;
573 : }
574 : current += len;
575 : }
576 3062 : pc += BC_CHECK_NOT_BACK_REF_LENGTH;
577 3062 : break;
578 : }
579 : BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
580 167 : int from = registers[insn >> BYTECODE_SHIFT];
581 167 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
582 167 : if (from >= 0 && len > 0) {
583 246 : if (current - len < 0 ||
584 192 : CompareChars(&subject[from], &subject[current - len], len) != 0) {
585 176 : pc = code_base + Load32Aligned(pc + 4);
586 88 : break;
587 : }
588 : current -= len;
589 : }
590 79 : pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
591 79 : break;
592 : }
593 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
594 : V8_FALLTHROUGH;
595 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
596 : bool unicode =
597 46885 : (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
598 103119 : int from = registers[insn >> BYTECODE_SHIFT];
599 103119 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
600 103119 : if (from >= 0 && len > 0) {
601 289090 : if (current + len > subject.length() ||
602 36800 : !BackRefMatchesNoCase(isolate, from, current, len, subject,
603 : unicode)) {
604 194458 : pc = code_base + Load32Aligned(pc + 4);
605 97229 : break;
606 : }
607 : current += len;
608 : }
609 5890 : pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
610 5890 : break;
611 : }
612 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
613 : V8_FALLTHROUGH;
614 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
615 : bool unicode = (insn & BYTECODE_MASK) ==
616 43 : BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
617 44 : int from = registers[insn >> BYTECODE_SHIFT];
618 44 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
619 44 : if (from >= 0 && len > 0) {
620 80 : if (current - len < 0 ||
621 35 : !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
622 : unicode)) {
623 38 : pc = code_base + Load32Aligned(pc + 4);
624 19 : break;
625 : }
626 : current -= len;
627 : }
628 25 : pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
629 25 : break;
630 : }
631 : BYTECODE(CHECK_AT_START)
632 4929 : if (current == 0) {
633 106 : pc = code_base + Load32Aligned(pc + 4);
634 : } else {
635 4876 : pc += BC_CHECK_AT_START_LENGTH;
636 : }
637 : break;
638 : BYTECODE(CHECK_NOT_AT_START)
639 23050 : if (current + (insn >> BYTECODE_SHIFT) == 0) {
640 13026 : pc += BC_CHECK_NOT_AT_START_LENGTH;
641 : } else {
642 20048 : pc = code_base + Load32Aligned(pc + 4);
643 : }
644 : break;
645 : BYTECODE(SET_CURRENT_POSITION_FROM_END) {
646 169 : int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
647 169 : if (subject.length() - current > by) {
648 140 : current = subject.length() - by;
649 280 : current_char = subject[current - 1];
650 : }
651 169 : pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
652 169 : break;
653 : }
654 : default:
655 0 : UNREACHABLE();
656 : break;
657 : }
658 : }
659 : }
660 :
661 : } // namespace
662 :
663 : // static
664 4282055 : IrregexpInterpreter::Result IrregexpInterpreter::Match(
665 : Isolate* isolate, Handle<ByteArray> code_array,
666 : Handle<String> subject_string, int* registers, int start_position) {
667 : DCHECK(subject_string->IsFlat());
668 :
669 : // Note: Heap allocation *is* allowed in two situations:
670 : // 1. When creating & throwing a stack overflow exception. The interpreter
671 : // aborts afterwards, and thus possible-moved objects are never used.
672 : // 2. When handling interrupts. We manually relocate unhandlified references
673 : // after interrupts have run.
674 : DisallowHeapAllocation no_gc;
675 :
676 : uc16 previous_char = '\n';
677 4282055 : String::FlatContent subject_content = subject_string->GetFlatContent(no_gc);
678 4282055 : if (subject_content.IsOneByte()) {
679 1627112 : Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
680 2953935 : if (start_position != 0) previous_char = subject_vector[start_position - 1];
681 1627112 : return RawMatch(isolate, code_array, subject_string, subject_vector,
682 1627112 : registers, start_position, previous_char);
683 : } else {
684 : DCHECK(subject_content.IsTwoByte());
685 2654943 : Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
686 2807217 : if (start_position != 0) previous_char = subject_vector[start_position - 1];
687 2654943 : return RawMatch(isolate, code_array, subject_string, subject_vector,
688 2654943 : registers, start_position, previous_char);
689 : }
690 : }
691 :
692 : } // namespace internal
693 121996 : } // namespace v8
|