Line data Source code
1 : // Copyright 2011 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : // A simple interpreter for the Irregexp byte code.
6 :
7 : #include "src/regexp/interpreter-irregexp.h"
8 :
9 : #include "src/ast/ast.h"
10 : #include "src/objects-inl.h"
11 : #include "src/regexp/bytecodes-irregexp.h"
12 : #include "src/regexp/jsregexp.h"
13 : #include "src/regexp/regexp-macro-assembler.h"
14 : #include "src/unicode.h"
15 : #include "src/utils.h"
16 :
17 : #ifdef V8_INTL_SUPPORT
18 : #include "unicode/uchar.h"
19 : #endif // V8_INTL_SUPPORT
20 :
21 : namespace v8 {
22 : namespace internal {
23 :
24 : typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
25 :
26 : static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
27 : int len, Vector<const uc16> subject,
28 : bool unicode) {
29 : Address offset_a =
30 92450 : reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
31 : Address offset_b =
32 92450 : reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
33 46225 : size_t length = len * kUC16Size;
34 46225 : return RegExpMacroAssembler::CaseInsensitiveCompareUC16(
35 : offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
36 : }
37 :
38 :
39 36835 : static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
40 : int len, Vector<const uint8_t> subject,
41 : bool unicode) {
42 : // For Latin1 characters the unicode flag makes no difference.
43 43097 : for (int i = 0; i < len; i++) {
44 73712 : unsigned int old_char = subject[from++];
45 73712 : unsigned int new_char = subject[current++];
46 36856 : if (old_char == new_char) continue;
47 : // Convert both characters to lower case.
48 33858 : old_char |= 0x20;
49 33858 : new_char |= 0x20;
50 33858 : if (old_char != new_char) return false;
51 : // Not letters in the ASCII range and Latin-1 range.
52 456 : if (!(old_char - 'a' <= 'z' - 'a') &&
53 177 : !(old_char - 224 <= 254 - 224 && old_char != 247)) {
54 : return false;
55 : }
56 : }
57 : return true;
58 : }
59 :
60 :
61 : #ifdef DEBUG
62 : static void TraceInterpreter(const byte* code_base,
63 : const byte* pc,
64 : int stack_depth,
65 : int current_position,
66 : uint32_t current_char,
67 : int bytecode_length,
68 : const char* bytecode_name) {
69 : if (FLAG_trace_regexp_bytecodes) {
70 : bool printable = (current_char < 127 && current_char >= 32);
71 : const char* format =
72 : printable ?
73 : "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
74 : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
75 : PrintF(format,
76 : pc - code_base,
77 : stack_depth,
78 : current_position,
79 : current_char,
80 : printable ? current_char : '.',
81 : bytecode_name);
82 : for (int i = 0; i < bytecode_length; i++) {
83 : printf(", %02x", pc[i]);
84 : }
85 : printf(" ");
86 : for (int i = 1; i < bytecode_length; i++) {
87 : unsigned char b = pc[i];
88 : if (b < 127 && b >= 32) {
89 : printf("%c", b);
90 : } else {
91 : printf(".");
92 : }
93 : }
94 : printf("\n");
95 : }
96 : }
97 :
98 :
99 : #define BYTECODE(name) \
100 : case BC_##name: \
101 : TraceInterpreter(code_base, \
102 : pc, \
103 : static_cast<int>(backtrack_sp - backtrack_stack_base), \
104 : current, \
105 : current_char, \
106 : BC_##name##_LENGTH, \
107 : #name);
108 : #else
109 : #define BYTECODE(name) \
110 : case BC_##name:
111 : #endif
112 :
113 :
114 : static int32_t Load32Aligned(const byte* pc) {
115 : DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
116 980462315 : return *reinterpret_cast<const int32_t *>(pc);
117 : }
118 :
119 :
120 : static int32_t Load16Aligned(const byte* pc) {
121 : DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
122 13473684 : return *reinterpret_cast<const uint16_t *>(pc);
123 : }
124 :
125 :
126 : // A simple abstraction over the backtracking stack used by the interpreter.
127 : // This backtracking stack does not grow automatically, but it ensures that the
128 : // the memory held by the stack is released or remembered in a cache if the
129 : // matching terminates.
130 : class BacktrackStack {
131 : public:
132 4281270 : BacktrackStack() { data_ = NewArray<int>(kBacktrackStackSize); }
133 :
134 : ~BacktrackStack() {
135 : DeleteArray(data_);
136 : }
137 :
138 : int* data() const { return data_; }
139 :
140 : int max_size() const { return kBacktrackStackSize; }
141 :
142 : private:
143 : static const int kBacktrackStackSize = 10000;
144 :
145 : int* data_;
146 :
147 : DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
148 : };
149 :
150 : namespace {
151 :
152 : IrregexpInterpreter::Result StackOverflow(Isolate* isolate) {
153 : // We abort interpreter execution after the stack overflow is thrown, and thus
154 : // allow allocation here despite the outer DisallowHeapAllocationScope.
155 : AllowHeapAllocation yes_gc;
156 4 : isolate->StackOverflow();
157 : return IrregexpInterpreter::EXCEPTION;
158 : }
159 :
160 : // Runs all pending interrupts. Callers must update unhandlified object
161 : // references after this function completes.
162 1486491 : IrregexpInterpreter::Result HandleInterrupts(Isolate* isolate,
163 : Handle<String> subject_string) {
164 : DisallowHeapAllocation no_gc;
165 :
166 : StackLimitCheck check(isolate);
167 1486491 : if (check.JsHasOverflowed()) {
168 : // A real stack overflow.
169 2 : return StackOverflow(isolate);
170 : }
171 :
172 : const bool was_one_byte =
173 1486489 : String::IsOneByteRepresentationUnderneath(*subject_string);
174 :
175 : Object result;
176 : {
177 : AllowHeapAllocation yes_gc;
178 1486489 : result = isolate->stack_guard()->HandleInterrupts();
179 : }
180 :
181 1486489 : if (result->IsException(isolate)) {
182 : return IrregexpInterpreter::EXCEPTION;
183 : }
184 :
185 : // If we changed between a LATIN1 and a UC16 string, we need to restart
186 : // regexp matching with the appropriate template instantiation of RawMatch.
187 1486485 : if (String::IsOneByteRepresentationUnderneath(*subject_string) !=
188 : was_one_byte) {
189 : return IrregexpInterpreter::RETRY;
190 : }
191 :
192 1486484 : return IrregexpInterpreter::SUCCESS;
193 : }
194 :
195 : template <typename Char>
196 1486484 : void UpdateCodeAndSubjectReferences(Isolate* isolate,
197 : Handle<ByteArray> code_array,
198 : Handle<String> subject_string,
199 : const byte** code_base_out,
200 : const byte** pc_out,
201 : Vector<const Char>* subject_string_out) {
202 : DisallowHeapAllocation no_gc;
203 :
204 2972968 : if (*code_base_out != code_array->GetDataStartAddress()) {
205 1 : const intptr_t pc_offset = *pc_out - *code_base_out;
206 : DCHECK_GT(pc_offset, 0);
207 1 : *code_base_out = code_array->GetDataStartAddress();
208 1 : *pc_out = *code_base_out + pc_offset;
209 : }
210 :
211 : DCHECK(subject_string->IsFlat());
212 2972968 : *subject_string_out = subject_string->GetCharVector<Char>(no_gc);
213 1486484 : }
214 :
215 : template <typename Char>
216 4281270 : IrregexpInterpreter::Result RawMatch(Isolate* isolate,
217 : Handle<ByteArray> code_array,
218 : Handle<String> subject_string,
219 : Vector<const Char> subject, int* registers,
220 : int current, uint32_t current_char) {
221 : DisallowHeapAllocation no_gc;
222 :
223 4281270 : const byte* pc = code_array->GetDataStartAddress();
224 4281270 : const byte* code_base = pc;
225 :
226 : // BacktrackStack ensures that the memory allocated for the backtracking stack
227 : // is returned to the system or cached if there is no stack being cached at
228 : // the moment.
229 : BacktrackStack backtrack_stack;
230 : int* backtrack_stack_base = backtrack_stack.data();
231 : int* backtrack_sp = backtrack_stack_base;
232 : int backtrack_stack_space = backtrack_stack.max_size();
233 : #ifdef DEBUG
234 : if (FLAG_trace_regexp_bytecodes) {
235 : PrintF("\n\nStart bytecode interpreter\n\n");
236 : }
237 : #endif
238 : while (true) {
239 753175562 : int32_t insn = Load32Aligned(pc);
240 753175562 : switch (insn & BYTECODE_MASK) {
241 : BYTECODE(BREAK)
242 0 : UNREACHABLE();
243 : BYTECODE(PUSH_CP)
244 5637244 : if (--backtrack_stack_space < 0) {
245 1 : return StackOverflow(isolate);
246 : }
247 5637243 : *backtrack_sp++ = current;
248 5637243 : pc += BC_PUSH_CP_LENGTH;
249 5637243 : break;
250 : BYTECODE(PUSH_BT)
251 9762629 : if (--backtrack_stack_space < 0) {
252 0 : return StackOverflow(isolate);
253 : }
254 19525258 : *backtrack_sp++ = Load32Aligned(pc + 4);
255 9762629 : pc += BC_PUSH_BT_LENGTH;
256 9762629 : break;
257 : BYTECODE(PUSH_REGISTER)
258 1501422 : if (--backtrack_stack_space < 0) {
259 1 : return StackOverflow(isolate);
260 : }
261 1501421 : *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
262 1501421 : pc += BC_PUSH_REGISTER_LENGTH;
263 1501421 : break;
264 : BYTECODE(SET_REGISTER)
265 3095338 : registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
266 1547669 : pc += BC_SET_REGISTER_LENGTH;
267 1547669 : break;
268 : BYTECODE(ADVANCE_REGISTER)
269 136646 : registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
270 136646 : pc += BC_ADVANCE_REGISTER_LENGTH;
271 136646 : break;
272 : BYTECODE(SET_REGISTER_TO_CP)
273 20290464 : registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
274 10145232 : pc += BC_SET_REGISTER_TO_CP_LENGTH;
275 10145232 : break;
276 : BYTECODE(SET_CP_TO_REGISTER)
277 3168 : current = registers[insn >> BYTECODE_SHIFT];
278 3168 : pc += BC_SET_CP_TO_REGISTER_LENGTH;
279 3168 : break;
280 : BYTECODE(SET_REGISTER_TO_SP)
281 13152 : registers[insn >> BYTECODE_SHIFT] =
282 : static_cast<int>(backtrack_sp - backtrack_stack_base);
283 13152 : pc += BC_SET_REGISTER_TO_SP_LENGTH;
284 13152 : break;
285 : BYTECODE(SET_SP_TO_REGISTER)
286 3168 : backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
287 3168 : backtrack_stack_space = backtrack_stack.max_size() -
288 : static_cast<int>(backtrack_sp - backtrack_stack_base);
289 3168 : pc += BC_SET_SP_TO_REGISTER_LENGTH;
290 3168 : break;
291 : BYTECODE(POP_CP)
292 1041947 : backtrack_stack_space++;
293 1041947 : --backtrack_sp;
294 1041947 : current = *backtrack_sp;
295 1041947 : pc += BC_POP_CP_LENGTH;
296 1041947 : break;
297 : // clang-format off
298 : BYTECODE(POP_BT) {
299 : IrregexpInterpreter::Result return_code = HandleInterrupts(
300 1486491 : isolate, subject_string);
301 1486491 : if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
302 :
303 1486484 : UpdateCodeAndSubjectReferences(isolate, code_array, subject_string,
304 : &code_base, &pc, &subject);
305 :
306 1486484 : backtrack_stack_space++;
307 1486484 : --backtrack_sp;
308 1486484 : pc = code_base + *backtrack_sp;
309 1486484 : break;
310 : }
311 : BYTECODE(POP_REGISTER) // clang-format on
312 1474964 : backtrack_stack_space++;
313 1474964 : --backtrack_sp;
314 1474964 : registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
315 1474964 : pc += BC_POP_REGISTER_LENGTH;
316 1474964 : break;
317 : BYTECODE(FAIL)
318 : return IrregexpInterpreter::FAILURE;
319 : BYTECODE(SUCCEED)
320 4106571 : return IrregexpInterpreter::SUCCESS;
321 : BYTECODE(ADVANCE_CP)
322 4226981 : current += insn >> BYTECODE_SHIFT;
323 4226981 : pc += BC_ADVANCE_CP_LENGTH;
324 4226981 : break;
325 : BYTECODE(GOTO)
326 5020356 : pc = code_base + Load32Aligned(pc + 4);
327 2510178 : break;
328 : BYTECODE(ADVANCE_CP_AND_GOTO)
329 190118670 : current += insn >> BYTECODE_SHIFT;
330 380237340 : pc = code_base + Load32Aligned(pc + 4);
331 190118670 : break;
332 : BYTECODE(CHECK_GREEDY)
333 697240 : if (current == backtrack_sp[-1]) {
334 408838 : backtrack_sp--;
335 408838 : backtrack_stack_space++;
336 817676 : pc = code_base + Load32Aligned(pc + 4);
337 : } else {
338 288402 : pc += BC_CHECK_GREEDY_LENGTH;
339 : }
340 : break;
341 : BYTECODE(LOAD_CURRENT_CHAR) {
342 200429347 : int pos = current + (insn >> BYTECODE_SHIFT);
343 200429347 : if (pos >= subject.length() || pos < 0) {
344 2027190 : pc = code_base + Load32Aligned(pc + 4);
345 : } else {
346 398831504 : current_char = subject[pos];
347 199415752 : pc += BC_LOAD_CURRENT_CHAR_LENGTH;
348 : }
349 : break;
350 : }
351 : BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
352 3200728 : int pos = current + (insn >> BYTECODE_SHIFT);
353 6401456 : current_char = subject[pos];
354 3200728 : pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
355 3200728 : break;
356 : }
357 : BYTECODE(LOAD_2_CURRENT_CHARS) {
358 0 : int pos = current + (insn >> BYTECODE_SHIFT);
359 0 : if (pos + 2 > subject.length() || pos < 0) {
360 0 : pc = code_base + Load32Aligned(pc + 4);
361 : } else {
362 0 : Char next = subject[pos + 1];
363 0 : current_char =
364 0 : (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
365 0 : pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
366 : }
367 : break;
368 : }
369 : BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
370 0 : int pos = current + (insn >> BYTECODE_SHIFT);
371 0 : Char next = subject[pos + 1];
372 0 : current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
373 0 : pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
374 0 : break;
375 : }
376 : BYTECODE(LOAD_4_CURRENT_CHARS) {
377 : DCHECK_EQ(1, sizeof(Char));
378 0 : int pos = current + (insn >> BYTECODE_SHIFT);
379 0 : if (pos + 4 > subject.length() || pos < 0) {
380 0 : pc = code_base + Load32Aligned(pc + 4);
381 : } else {
382 0 : Char next1 = subject[pos + 1];
383 0 : Char next2 = subject[pos + 2];
384 0 : Char next3 = subject[pos + 3];
385 0 : current_char = (subject[pos] |
386 : (next1 << 8) |
387 : (next2 << 16) |
388 : (next3 << 24));
389 0 : pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
390 : }
391 : break;
392 : }
393 : BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
394 : DCHECK_EQ(1, sizeof(Char));
395 0 : int pos = current + (insn >> BYTECODE_SHIFT);
396 0 : Char next1 = subject[pos + 1];
397 0 : Char next2 = subject[pos + 2];
398 0 : Char next3 = subject[pos + 3];
399 0 : current_char = (subject[pos] |
400 : (next1 << 8) |
401 : (next2 << 16) |
402 : (next3 << 24));
403 0 : pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
404 0 : break;
405 : }
406 : BYTECODE(CHECK_4_CHARS) {
407 0 : uint32_t c = Load32Aligned(pc + 4);
408 0 : if (c == current_char) {
409 0 : pc = code_base + Load32Aligned(pc + 8);
410 : } else {
411 0 : pc += BC_CHECK_4_CHARS_LENGTH;
412 : }
413 : break;
414 : }
415 : BYTECODE(CHECK_CHAR) {
416 296138431 : uint32_t c = (insn >> BYTECODE_SHIFT);
417 296138431 : if (c == current_char) {
418 4017790 : pc = code_base + Load32Aligned(pc + 4);
419 : } else {
420 294129536 : pc += BC_CHECK_CHAR_LENGTH;
421 : }
422 : break;
423 : }
424 : BYTECODE(CHECK_NOT_4_CHARS) {
425 0 : uint32_t c = Load32Aligned(pc + 4);
426 0 : if (c != current_char) {
427 0 : pc = code_base + Load32Aligned(pc + 8);
428 : } else {
429 0 : pc += BC_CHECK_NOT_4_CHARS_LENGTH;
430 : }
431 : break;
432 : }
433 : BYTECODE(CHECK_NOT_CHAR) {
434 1442810 : uint32_t c = (insn >> BYTECODE_SHIFT);
435 1442810 : if (c != current_char) {
436 870356 : pc = code_base + Load32Aligned(pc + 4);
437 : } else {
438 1007632 : pc += BC_CHECK_NOT_CHAR_LENGTH;
439 : }
440 : break;
441 : }
442 : BYTECODE(AND_CHECK_4_CHARS) {
443 0 : uint32_t c = Load32Aligned(pc + 4);
444 0 : if (c == (current_char & Load32Aligned(pc + 8))) {
445 0 : pc = code_base + Load32Aligned(pc + 12);
446 : } else {
447 0 : pc += BC_AND_CHECK_4_CHARS_LENGTH;
448 : }
449 : break;
450 : }
451 : BYTECODE(AND_CHECK_CHAR) {
452 2196140 : uint32_t c = (insn >> BYTECODE_SHIFT);
453 2196140 : if (c == (current_char & Load32Aligned(pc + 4))) {
454 360960 : pc = code_base + Load32Aligned(pc + 8);
455 : } else {
456 2015660 : pc += BC_AND_CHECK_CHAR_LENGTH;
457 : }
458 : break;
459 : }
460 : BYTECODE(AND_CHECK_NOT_4_CHARS) {
461 0 : uint32_t c = Load32Aligned(pc + 4);
462 0 : if (c != (current_char & Load32Aligned(pc + 8))) {
463 0 : pc = code_base + Load32Aligned(pc + 12);
464 : } else {
465 0 : pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
466 : }
467 : break;
468 : }
469 : BYTECODE(AND_CHECK_NOT_CHAR) {
470 144645 : uint32_t c = (insn >> BYTECODE_SHIFT);
471 144645 : if (c != (current_char & Load32Aligned(pc + 4))) {
472 3348 : pc = code_base + Load32Aligned(pc + 8);
473 : } else {
474 142971 : pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
475 : }
476 : break;
477 : }
478 : BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
479 15 : uint32_t c = (insn >> BYTECODE_SHIFT);
480 15 : uint32_t minus = Load16Aligned(pc + 4);
481 15 : uint32_t mask = Load16Aligned(pc + 6);
482 15 : if (c != ((current_char - minus) & mask)) {
483 0 : pc = code_base + Load32Aligned(pc + 8);
484 : } else {
485 15 : pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
486 : }
487 : break;
488 : }
489 : BYTECODE(CHECK_CHAR_IN_RANGE) {
490 2419981 : uint32_t from = Load16Aligned(pc + 4);
491 2419981 : uint32_t to = Load16Aligned(pc + 6);
492 2419981 : if (from <= current_char && current_char <= to) {
493 4276752 : pc = code_base + Load32Aligned(pc + 8);
494 : } else {
495 281605 : pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
496 : }
497 : break;
498 : }
499 : BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
500 4316846 : uint32_t from = Load16Aligned(pc + 4);
501 4316846 : uint32_t to = Load16Aligned(pc + 6);
502 4316846 : if (from > current_char || current_char > to) {
503 178960 : pc = code_base + Load32Aligned(pc + 8);
504 : } else {
505 4227366 : pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
506 : }
507 : break;
508 : }
509 : BYTECODE(CHECK_BIT_IN_TABLE) {
510 : int mask = RegExpMacroAssembler::kTableMask;
511 609324 : byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
512 609324 : int bit = (current_char & (kBitsPerByte - 1));
513 609324 : if ((b & (1 << bit)) != 0) {
514 830310 : pc = code_base + Load32Aligned(pc + 4);
515 : } else {
516 194169 : pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
517 : }
518 : break;
519 : }
520 : BYTECODE(CHECK_LT) {
521 2900016 : uint32_t limit = (insn >> BYTECODE_SHIFT);
522 2900016 : if (current_char < limit) {
523 218530 : pc = code_base + Load32Aligned(pc + 4);
524 : } else {
525 2790751 : pc += BC_CHECK_LT_LENGTH;
526 : }
527 : break;
528 : }
529 : BYTECODE(CHECK_GT) {
530 4157784 : uint32_t limit = (insn >> BYTECODE_SHIFT);
531 4157784 : if (current_char > limit) {
532 7028640 : pc = code_base + Load32Aligned(pc + 4);
533 : } else {
534 643464 : pc += BC_CHECK_GT_LENGTH;
535 : }
536 : break;
537 : }
538 : BYTECODE(CHECK_REGISTER_LT)
539 5656 : if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
540 312 : pc = code_base + Load32Aligned(pc + 8);
541 : } else {
542 2672 : pc += BC_CHECK_REGISTER_LT_LENGTH;
543 : }
544 : break;
545 : BYTECODE(CHECK_REGISTER_GE)
546 9688 : if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
547 4308 : pc = code_base + Load32Aligned(pc + 8);
548 : } else {
549 2690 : pc += BC_CHECK_REGISTER_GE_LENGTH;
550 : }
551 : break;
552 : BYTECODE(CHECK_REGISTER_EQ_POS)
553 489054 : if (registers[insn >> BYTECODE_SHIFT] == current) {
554 584252 : pc = code_base + Load32Aligned(pc + 4);
555 : } else {
556 196928 : pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
557 : }
558 : break;
559 : BYTECODE(CHECK_NOT_REGS_EQUAL)
560 0 : if (registers[insn >> BYTECODE_SHIFT] ==
561 : registers[Load32Aligned(pc + 4)]) {
562 0 : pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
563 : } else {
564 0 : pc = code_base + Load32Aligned(pc + 8);
565 : }
566 : break;
567 : BYTECODE(CHECK_NOT_BACK_REF) {
568 3229 : int from = registers[insn >> BYTECODE_SHIFT];
569 3229 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
570 3229 : if (from >= 0 && len > 0) {
571 2224 : if (current + len > subject.length() ||
572 1408 : CompareChars(&subject[from], &subject[current], len) != 0) {
573 334 : pc = code_base + Load32Aligned(pc + 4);
574 167 : break;
575 : }
576 : current += len;
577 : }
578 3062 : pc += BC_CHECK_NOT_BACK_REF_LENGTH;
579 3062 : break;
580 : }
581 : BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
582 167 : int from = registers[insn >> BYTECODE_SHIFT];
583 167 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
584 167 : if (from >= 0 && len > 0) {
585 246 : if (current - len < 0 ||
586 192 : CompareChars(&subject[from], &subject[current - len], len) != 0) {
587 176 : pc = code_base + Load32Aligned(pc + 4);
588 88 : break;
589 : }
590 : current -= len;
591 : }
592 79 : pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
593 79 : break;
594 : }
595 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
596 : V8_FALLTHROUGH;
597 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
598 : bool unicode =
599 46885 : (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
600 103119 : int from = registers[insn >> BYTECODE_SHIFT];
601 103119 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
602 103119 : if (from >= 0 && len > 0) {
603 289090 : if (current + len > subject.length() ||
604 36800 : !BackRefMatchesNoCase(isolate, from, current, len, subject,
605 : unicode)) {
606 194458 : pc = code_base + Load32Aligned(pc + 4);
607 97229 : break;
608 : }
609 : current += len;
610 : }
611 5890 : pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
612 5890 : break;
613 : }
614 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
615 : V8_FALLTHROUGH;
616 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
617 : bool unicode = (insn & BYTECODE_MASK) ==
618 43 : BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
619 44 : int from = registers[insn >> BYTECODE_SHIFT];
620 44 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
621 44 : if (from >= 0 && len > 0) {
622 80 : if (current - len < 0 ||
623 35 : !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
624 : unicode)) {
625 38 : pc = code_base + Load32Aligned(pc + 4);
626 19 : break;
627 : }
628 : current -= len;
629 : }
630 25 : pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
631 25 : break;
632 : }
633 : BYTECODE(CHECK_AT_START)
634 4929 : if (current == 0) {
635 106 : pc = code_base + Load32Aligned(pc + 4);
636 : } else {
637 4876 : pc += BC_CHECK_AT_START_LENGTH;
638 : }
639 : break;
640 : BYTECODE(CHECK_NOT_AT_START)
641 23050 : if (current + (insn >> BYTECODE_SHIFT) == 0) {
642 13026 : pc += BC_CHECK_NOT_AT_START_LENGTH;
643 : } else {
644 20048 : pc = code_base + Load32Aligned(pc + 4);
645 : }
646 : break;
647 : BYTECODE(SET_CURRENT_POSITION_FROM_END) {
648 169 : int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
649 169 : if (subject.length() - current > by) {
650 140 : current = subject.length() - by;
651 280 : current_char = subject[current - 1];
652 : }
653 169 : pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
654 169 : break;
655 : }
656 : default:
657 0 : UNREACHABLE();
658 : break;
659 : }
660 : }
661 : }
662 :
663 : } // namespace
664 :
665 : // static
666 4281270 : IrregexpInterpreter::Result IrregexpInterpreter::Match(
667 : Isolate* isolate, Handle<ByteArray> code_array,
668 : Handle<String> subject_string, int* registers, int start_position) {
669 : DCHECK(subject_string->IsFlat());
670 :
671 : // Note: Heap allocation *is* allowed in two situations:
672 : // 1. When creating & throwing a stack overflow exception. The interpreter
673 : // aborts afterwards, and thus possible-moved objects are never used.
674 : // 2. When handling interrupts. We manually relocate unhandlified references
675 : // after interrupts have run.
676 : DisallowHeapAllocation no_gc;
677 :
678 : uc16 previous_char = '\n';
679 4281270 : String::FlatContent subject_content = subject_string->GetFlatContent(no_gc);
680 4281270 : if (subject_content.IsOneByte()) {
681 1626331 : Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
682 2952386 : if (start_position != 0) previous_char = subject_vector[start_position - 1];
683 1626331 : return RawMatch(isolate, code_array, subject_string, subject_vector,
684 1626331 : registers, start_position, previous_char);
685 : } else {
686 : DCHECK(subject_content.IsTwoByte());
687 2654939 : Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
688 2807213 : if (start_position != 0) previous_char = subject_vector[start_position - 1];
689 2654939 : return RawMatch(isolate, code_array, subject_string, subject_vector,
690 2654939 : registers, start_position, previous_char);
691 : }
692 : }
693 :
694 : } // namespace internal
695 120216 : } // namespace v8
|