Line data Source code
1 : // Copyright 2011 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : // A simple interpreter for the Irregexp byte code.
6 :
7 : #include "src/regexp/interpreter-irregexp.h"
8 :
9 : #include "src/ast/ast.h"
10 : #include "src/objects-inl.h"
11 : #include "src/regexp/bytecodes-irregexp.h"
12 : #include "src/regexp/jsregexp.h"
13 : #include "src/regexp/regexp-macro-assembler.h"
14 : #include "src/unicode.h"
15 : #include "src/utils.h"
16 :
17 : #ifdef V8_INTL_SUPPORT
18 : #include "unicode/uchar.h"
19 : #endif // V8_INTL_SUPPORT
20 :
21 : namespace v8 {
22 : namespace internal {
23 :
24 : typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
25 :
26 46225 : static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
27 : int len, Vector<const uc16> subject,
28 : bool unicode) {
29 : Address offset_a =
30 92450 : reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
31 : Address offset_b =
32 92450 : reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
33 46225 : size_t length = len * kUC16Size;
34 : return RegExpMacroAssembler::CaseInsensitiveCompareUC16(
35 46225 : offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
36 : }
37 :
38 :
39 36835 : static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
40 : int len, Vector<const uint8_t> subject,
41 : bool unicode) {
42 : // For Latin1 characters the unicode flag makes no difference.
43 3131 : for (int i = 0; i < len; i++) {
44 73712 : unsigned int old_char = subject[from++];
45 73712 : unsigned int new_char = subject[current++];
46 36856 : if (old_char == new_char) continue;
47 : // Convert both characters to lower case.
48 33858 : old_char |= 0x20;
49 33858 : new_char |= 0x20;
50 33858 : if (old_char != new_char) return false;
51 : // Not letters in the ASCII range and Latin-1 range.
52 456 : if (!(old_char - 'a' <= 'z' - 'a') &&
53 177 : !(old_char - 224 <= 254 - 224 && old_char != 247)) {
54 : return false;
55 : }
56 : }
57 : return true;
58 : }
59 :
60 :
61 : #ifdef DEBUG
62 : static void TraceInterpreter(const byte* code_base,
63 : const byte* pc,
64 : int stack_depth,
65 : int current_position,
66 : uint32_t current_char,
67 : int bytecode_length,
68 : const char* bytecode_name) {
69 : if (FLAG_trace_regexp_bytecodes) {
70 : bool printable = (current_char < 127 && current_char >= 32);
71 : const char* format =
72 : printable ?
73 : "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
74 : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
75 : PrintF(format,
76 : pc - code_base,
77 : stack_depth,
78 : current_position,
79 : current_char,
80 : printable ? current_char : '.',
81 : bytecode_name);
82 : for (int i = 0; i < bytecode_length; i++) {
83 : printf(", %02x", pc[i]);
84 : }
85 : printf(" ");
86 : for (int i = 1; i < bytecode_length; i++) {
87 : unsigned char b = pc[i];
88 : if (b < 127 && b >= 32) {
89 : printf("%c", b);
90 : } else {
91 : printf(".");
92 : }
93 : }
94 : printf("\n");
95 : }
96 : }
97 :
98 :
99 : #define BYTECODE(name) \
100 : case BC_##name: \
101 : TraceInterpreter(code_base, \
102 : pc, \
103 : static_cast<int>(backtrack_sp - backtrack_stack_base), \
104 : current, \
105 : current_char, \
106 : BC_##name##_LENGTH, \
107 : #name);
108 : #else
109 : #define BYTECODE(name) \
110 : case BC_##name:
111 : #endif
112 :
113 :
114 : static int32_t Load32Aligned(const byte* pc) {
115 : DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
116 2569052332 : return *reinterpret_cast<const int32_t *>(pc);
117 : }
118 :
119 :
120 : static int32_t Load16Aligned(const byte* pc) {
121 : DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
122 13474770 : return *reinterpret_cast<const uint16_t *>(pc);
123 : }
124 :
125 :
126 : // A simple abstraction over the backtracking stack used by the interpreter.
127 : // This backtracking stack does not grow automatically, but it ensures that the
128 : // the memory held by the stack is released or remembered in a cache if the
129 : // matching terminates.
130 : class BacktrackStack {
131 : public:
132 4284692 : BacktrackStack() { data_ = NewArray<int>(kBacktrackStackSize); }
133 :
134 : ~BacktrackStack() {
135 : DeleteArray(data_);
136 : }
137 :
138 : int* data() const { return data_; }
139 :
140 : int max_size() const { return kBacktrackStackSize; }
141 :
142 : private:
143 : static const int kBacktrackStackSize = 10000;
144 :
145 : int* data_;
146 :
147 : DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
148 : };
149 :
150 :
151 : template <typename Char>
152 4284692 : static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
153 : const byte* code_base,
154 : Vector<const Char> subject,
155 : int* registers,
156 : int current,
157 : uint32_t current_char) {
158 : const byte* pc = code_base;
159 : // BacktrackStack ensures that the memory allocated for the backtracking stack
160 : // is returned to the system or cached if there is no stack being cached at
161 : // the moment.
162 : BacktrackStack backtrack_stack;
163 : int* backtrack_stack_base = backtrack_stack.data();
164 : int* backtrack_sp = backtrack_stack_base;
165 : int backtrack_stack_space = backtrack_stack.max_size();
166 : #ifdef DEBUG
167 : if (FLAG_trace_regexp_bytecodes) {
168 : PrintF("\n\nStart bytecode interpreter\n\n");
169 : }
170 : #endif
171 : while (true) {
172 : int32_t insn = Load32Aligned(pc);
173 1946331849 : switch (insn & BYTECODE_MASK) {
174 : BYTECODE(BREAK)
175 0 : UNREACHABLE();
176 : BYTECODE(PUSH_CP)
177 4461349 : if (--backtrack_stack_space < 0) {
178 : return RegExpImpl::RE_EXCEPTION;
179 : }
180 4461348 : *backtrack_sp++ = current;
181 4461348 : pc += BC_PUSH_CP_LENGTH;
182 2604276 : break;
183 : BYTECODE(PUSH_BT)
184 8726612 : if (--backtrack_stack_space < 0) {
185 : return RegExpImpl::RE_EXCEPTION;
186 : }
187 17453224 : *backtrack_sp++ = Load32Aligned(pc + 4);
188 8726612 : pc += BC_PUSH_BT_LENGTH;
189 5252628 : break;
190 : BYTECODE(PUSH_REGISTER)
191 30206 : if (--backtrack_stack_space < 0) {
192 : return RegExpImpl::RE_EXCEPTION;
193 : }
194 30206 : *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
195 30206 : pc += BC_PUSH_REGISTER_LENGTH;
196 34 : break;
197 : BYTECODE(SET_REGISTER)
198 630508 : registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
199 315254 : pc += BC_SET_REGISTER_LENGTH;
200 107299 : break;
201 : BYTECODE(ADVANCE_REGISTER)
202 4800 : registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
203 4800 : pc += BC_ADVANCE_REGISTER_LENGTH;
204 10 : break;
205 : BYTECODE(SET_REGISTER_TO_CP)
206 17623658 : registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
207 8811829 : pc += BC_SET_REGISTER_TO_CP_LENGTH;
208 5241944 : break;
209 : BYTECODE(SET_CP_TO_REGISTER)
210 3162 : current = registers[insn >> BYTECODE_SHIFT];
211 3162 : pc += BC_SET_CP_TO_REGISTER_LENGTH;
212 2645 : break;
213 : BYTECODE(SET_REGISTER_TO_SP)
214 13141 : registers[insn >> BYTECODE_SHIFT] =
215 : static_cast<int>(backtrack_sp - backtrack_stack_base);
216 13141 : pc += BC_SET_REGISTER_TO_SP_LENGTH;
217 10998 : break;
218 : BYTECODE(SET_SP_TO_REGISTER)
219 3162 : backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
220 3162 : backtrack_stack_space = backtrack_stack.max_size() -
221 : static_cast<int>(backtrack_sp - backtrack_stack_base);
222 3162 : pc += BC_SET_SP_TO_REGISTER_LENGTH;
223 2645 : break;
224 : BYTECODE(POP_CP)
225 194552 : backtrack_stack_space++;
226 194552 : --backtrack_sp;
227 194552 : current = *backtrack_sp;
228 194552 : pc += BC_POP_CP_LENGTH;
229 59857 : break;
230 : BYTECODE(POP_BT)
231 451996 : backtrack_stack_space++;
232 451996 : --backtrack_sp;
233 451996 : pc = code_base + *backtrack_sp;
234 202291 : break;
235 : BYTECODE(POP_REGISTER)
236 8946 : backtrack_stack_space++;
237 8946 : --backtrack_sp;
238 8946 : registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
239 8946 : pc += BC_POP_REGISTER_LENGTH;
240 13 : break;
241 : BYTECODE(FAIL)
242 : return RegExpImpl::RE_FAILURE;
243 : BYTECODE(SUCCEED)
244 2512411 : return RegExpImpl::RE_SUCCESS;
245 : BYTECODE(ADVANCE_CP)
246 4093231 : current += insn >> BYTECODE_SHIFT;
247 4093231 : pc += BC_ADVANCE_CP_LENGTH;
248 2565494 : break;
249 : BYTECODE(GOTO)
250 1153312 : pc = code_base + Load32Aligned(pc + 4);
251 904183 : break;
252 : BYTECODE(ADVANCE_CP_AND_GOTO)
253 592317967 : current += insn >> BYTECODE_SHIFT;
254 592317967 : pc = code_base + Load32Aligned(pc + 4);
255 548378418 : break;
256 : BYTECODE(CHECK_GREEDY)
257 141575 : if (current == backtrack_sp[-1]) {
258 82356 : backtrack_sp--;
259 82356 : backtrack_stack_space++;
260 82356 : pc = code_base + Load32Aligned(pc + 4);
261 : } else {
262 59219 : pc += BC_CHECK_GREEDY_LENGTH;
263 : }
264 : break;
265 : BYTECODE(LOAD_CURRENT_CHAR) {
266 601983329 : int pos = current + (insn >> BYTECODE_SHIFT);
267 601983329 : if (pos >= subject.length() || pos < 0) {
268 459667 : pc = code_base + Load32Aligned(pc + 4);
269 : } else {
270 1203047324 : current_char = subject[pos];
271 601523662 : pc += BC_LOAD_CURRENT_CHAR_LENGTH;
272 : }
273 : break;
274 : }
275 : BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
276 3001106 : int pos = current + (insn >> BYTECODE_SHIFT);
277 6002212 : current_char = subject[pos];
278 3001106 : pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
279 2341186 : break;
280 : }
281 : BYTECODE(LOAD_2_CURRENT_CHARS) {
282 0 : int pos = current + (insn >> BYTECODE_SHIFT);
283 0 : if (pos + 2 > subject.length() || pos < 0) {
284 0 : pc = code_base + Load32Aligned(pc + 4);
285 : } else {
286 0 : Char next = subject[pos + 1];
287 0 : current_char =
288 0 : (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
289 0 : pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
290 : }
291 : break;
292 : }
293 : BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
294 0 : int pos = current + (insn >> BYTECODE_SHIFT);
295 0 : Char next = subject[pos + 1];
296 0 : current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
297 0 : pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
298 0 : break;
299 : }
300 : BYTECODE(LOAD_4_CURRENT_CHARS) {
301 : DCHECK_EQ(1, sizeof(Char));
302 0 : int pos = current + (insn >> BYTECODE_SHIFT);
303 0 : if (pos + 4 > subject.length() || pos < 0) {
304 0 : pc = code_base + Load32Aligned(pc + 4);
305 : } else {
306 0 : Char next1 = subject[pos + 1];
307 0 : Char next2 = subject[pos + 2];
308 0 : Char next3 = subject[pos + 3];
309 0 : current_char = (subject[pos] |
310 : (next1 << 8) |
311 : (next2 << 16) |
312 : (next3 << 24));
313 0 : pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
314 : }
315 : break;
316 : }
317 : BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
318 : DCHECK_EQ(1, sizeof(Char));
319 0 : int pos = current + (insn >> BYTECODE_SHIFT);
320 0 : Char next1 = subject[pos + 1];
321 0 : Char next2 = subject[pos + 2];
322 0 : Char next3 = subject[pos + 3];
323 0 : current_char = (subject[pos] |
324 : (next1 << 8) |
325 : (next2 << 16) |
326 : (next3 << 24));
327 0 : pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
328 0 : break;
329 : }
330 : BYTECODE(CHECK_4_CHARS) {
331 0 : uint32_t c = Load32Aligned(pc + 4);
332 0 : if (c == current_char) {
333 0 : pc = code_base + Load32Aligned(pc + 8);
334 : } else {
335 0 : pc += BC_CHECK_4_CHARS_LENGTH;
336 : }
337 : break;
338 : }
339 : BYTECODE(CHECK_CHAR) {
340 698557630 : uint32_t c = (insn >> BYTECODE_SHIFT);
341 698557630 : if (c == current_char) {
342 1820998 : pc = code_base + Load32Aligned(pc + 4);
343 : } else {
344 696736632 : pc += BC_CHECK_CHAR_LENGTH;
345 : }
346 : break;
347 : }
348 : BYTECODE(CHECK_NOT_4_CHARS) {
349 0 : uint32_t c = Load32Aligned(pc + 4);
350 0 : if (c != current_char) {
351 0 : pc = code_base + Load32Aligned(pc + 8);
352 : } else {
353 0 : pc += BC_CHECK_NOT_4_CHARS_LENGTH;
354 : }
355 : break;
356 : }
357 : BYTECODE(CHECK_NOT_CHAR) {
358 1012455 : uint32_t c = (insn >> BYTECODE_SHIFT);
359 1012455 : if (c != current_char) {
360 251150 : pc = code_base + Load32Aligned(pc + 4);
361 : } else {
362 761305 : pc += BC_CHECK_NOT_CHAR_LENGTH;
363 : }
364 : break;
365 : }
366 : BYTECODE(AND_CHECK_4_CHARS) {
367 0 : uint32_t c = Load32Aligned(pc + 4);
368 0 : if (c == (current_char & Load32Aligned(pc + 8))) {
369 0 : pc = code_base + Load32Aligned(pc + 12);
370 : } else {
371 0 : pc += BC_AND_CHECK_4_CHARS_LENGTH;
372 : }
373 : break;
374 : }
375 : BYTECODE(AND_CHECK_CHAR) {
376 2196775 : uint32_t c = (insn >> BYTECODE_SHIFT);
377 2196775 : if (c == (current_char & Load32Aligned(pc + 4))) {
378 181115 : pc = code_base + Load32Aligned(pc + 8);
379 : } else {
380 2015660 : pc += BC_AND_CHECK_CHAR_LENGTH;
381 : }
382 : break;
383 : }
384 : BYTECODE(AND_CHECK_NOT_4_CHARS) {
385 0 : uint32_t c = Load32Aligned(pc + 4);
386 0 : if (c != (current_char & Load32Aligned(pc + 8))) {
387 0 : pc = code_base + Load32Aligned(pc + 12);
388 : } else {
389 0 : pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
390 : }
391 : break;
392 : }
393 : BYTECODE(AND_CHECK_NOT_CHAR) {
394 12798 : uint32_t c = (insn >> BYTECODE_SHIFT);
395 12798 : if (c != (current_char & Load32Aligned(pc + 4))) {
396 1674 : pc = code_base + Load32Aligned(pc + 8);
397 : } else {
398 11124 : pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
399 : }
400 : break;
401 : }
402 : BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
403 15 : uint32_t c = (insn >> BYTECODE_SHIFT);
404 15 : uint32_t minus = Load16Aligned(pc + 4);
405 15 : uint32_t mask = Load16Aligned(pc + 6);
406 15 : if (c != ((current_char - minus) & mask)) {
407 0 : pc = code_base + Load32Aligned(pc + 8);
408 : } else {
409 15 : pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
410 : }
411 : break;
412 : }
413 : BYTECODE(CHECK_CHAR_IN_RANGE) {
414 2419825 : uint32_t from = Load16Aligned(pc + 4);
415 2419825 : uint32_t to = Load16Aligned(pc + 6);
416 2419825 : if (from <= current_char && current_char <= to) {
417 2138372 : pc = code_base + Load32Aligned(pc + 8);
418 : } else {
419 281453 : pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
420 : }
421 : break;
422 : }
423 : BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
424 4317545 : uint32_t from = Load16Aligned(pc + 4);
425 4317545 : uint32_t to = Load16Aligned(pc + 6);
426 4317545 : if (from > current_char || current_char > to) {
427 89477 : pc = code_base + Load32Aligned(pc + 8);
428 : } else {
429 4228068 : pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
430 : }
431 : break;
432 : }
433 : BYTECODE(CHECK_BIT_IN_TABLE) {
434 : int mask = RegExpMacroAssembler::kTableMask;
435 609189 : byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
436 609189 : int bit = (current_char & (kBitsPerByte - 1));
437 609189 : if ((b & (1 << bit)) != 0) {
438 415024 : pc = code_base + Load32Aligned(pc + 4);
439 : } else {
440 194165 : pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
441 : }
442 : break;
443 : }
444 : BYTECODE(CHECK_LT) {
445 2900016 : uint32_t limit = (insn >> BYTECODE_SHIFT);
446 2900016 : if (current_char < limit) {
447 109265 : pc = code_base + Load32Aligned(pc + 4);
448 : } else {
449 2790751 : pc += BC_CHECK_LT_LENGTH;
450 : }
451 : break;
452 : }
453 : BYTECODE(CHECK_GT) {
454 4157784 : uint32_t limit = (insn >> BYTECODE_SHIFT);
455 4157784 : if (current_char > limit) {
456 3514320 : pc = code_base + Load32Aligned(pc + 4);
457 : } else {
458 643464 : pc += BC_CHECK_GT_LENGTH;
459 : }
460 : break;
461 : }
462 : BYTECODE(CHECK_REGISTER_LT)
463 5656 : if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
464 156 : pc = code_base + Load32Aligned(pc + 8);
465 : } else {
466 2672 : pc += BC_CHECK_REGISTER_LT_LENGTH;
467 : }
468 : break;
469 : BYTECODE(CHECK_REGISTER_GE)
470 9674 : if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
471 2154 : pc = code_base + Load32Aligned(pc + 8);
472 : } else {
473 2683 : pc += BC_CHECK_REGISTER_GE_LENGTH;
474 : }
475 : break;
476 : BYTECODE(CHECK_REGISTER_EQ_POS)
477 2425 : if (registers[insn >> BYTECODE_SHIFT] == current) {
478 163 : pc = code_base + Load32Aligned(pc + 4);
479 : } else {
480 2262 : pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
481 : }
482 : break;
483 : BYTECODE(CHECK_NOT_REGS_EQUAL)
484 0 : if (registers[insn >> BYTECODE_SHIFT] ==
485 : registers[Load32Aligned(pc + 4)]) {
486 0 : pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
487 : } else {
488 0 : pc = code_base + Load32Aligned(pc + 8);
489 : }
490 : break;
491 : BYTECODE(CHECK_NOT_BACK_REF) {
492 3229 : int from = registers[insn >> BYTECODE_SHIFT];
493 3229 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
494 3229 : if (from >= 0 && len > 0) {
495 2224 : if (current + len > subject.length() ||
496 1408 : CompareChars(&subject[from], &subject[current], len) != 0) {
497 167 : pc = code_base + Load32Aligned(pc + 4);
498 62 : break;
499 : }
500 : current += len;
501 : }
502 3062 : pc += BC_CHECK_NOT_BACK_REF_LENGTH;
503 12 : break;
504 : }
505 : BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
506 167 : int from = registers[insn >> BYTECODE_SHIFT];
507 167 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
508 167 : if (from >= 0 && len > 0) {
509 246 : if (current - len < 0 ||
510 192 : CompareChars(&subject[from], &subject[current - len], len) != 0) {
511 88 : pc = code_base + Load32Aligned(pc + 4);
512 6 : break;
513 : }
514 : current -= len;
515 : }
516 79 : pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
517 3 : break;
518 : }
519 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
520 : V8_FALLTHROUGH;
521 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
522 : bool unicode =
523 56234 : (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
524 103119 : int from = registers[insn >> BYTECODE_SHIFT];
525 103119 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
526 103119 : if (from >= 0 && len > 0) {
527 289090 : if (current + len > subject.length() ||
528 : !BackRefMatchesNoCase(isolate, from, current, len, subject,
529 83024 : unicode)) {
530 97229 : pc = code_base + Load32Aligned(pc + 4);
531 53508 : break;
532 : }
533 : current += len;
534 : }
535 5890 : pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
536 2726 : break;
537 : }
538 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
539 : V8_FALLTHROUGH;
540 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
541 : bool unicode = (insn & BYTECODE_MASK) ==
542 1 : BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
543 44 : int from = registers[insn >> BYTECODE_SHIFT];
544 44 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
545 44 : if (from >= 0 && len > 0) {
546 80 : if (current - len < 0 ||
547 : !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
548 36 : unicode)) {
549 19 : pc = code_base + Load32Aligned(pc + 4);
550 0 : break;
551 : }
552 : current -= len;
553 : }
554 25 : pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
555 1 : break;
556 : }
557 : BYTECODE(CHECK_AT_START)
558 4929 : if (current == 0) {
559 53 : pc = code_base + Load32Aligned(pc + 4);
560 : } else {
561 4876 : pc += BC_CHECK_AT_START_LENGTH;
562 : }
563 : break;
564 : BYTECODE(CHECK_NOT_AT_START)
565 25850 : if (current + (insn >> BYTECODE_SHIFT) == 0) {
566 15826 : pc += BC_CHECK_NOT_AT_START_LENGTH;
567 : } else {
568 10024 : pc = code_base + Load32Aligned(pc + 4);
569 : }
570 : break;
571 : BYTECODE(SET_CURRENT_POSITION_FROM_END) {
572 169 : int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
573 169 : if (subject.length() - current > by) {
574 140 : current = subject.length() - by;
575 280 : current_char = subject[current - 1];
576 : }
577 169 : pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
578 2 : break;
579 : }
580 : default:
581 0 : UNREACHABLE();
582 : break;
583 : }
584 : }
585 : }
586 :
587 :
588 4284692 : RegExpImpl::IrregexpResult IrregexpInterpreter::Match(
589 : Isolate* isolate,
590 : Handle<ByteArray> code_array,
591 : Handle<String> subject,
592 : int* registers,
593 : int start_position) {
594 : DCHECK(subject->IsFlat());
595 :
596 : DisallowHeapAllocation no_gc;
597 : const byte* code_base = code_array->GetDataStartAddress();
598 : uc16 previous_char = '\n';
599 4284692 : String::FlatContent subject_content = subject->GetFlatContent(no_gc);
600 4284692 : if (subject_content.IsOneByte()) {
601 1629903 : Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
602 2956721 : if (start_position != 0) previous_char = subject_vector[start_position - 1];
603 : return RawMatch(isolate,
604 : code_base,
605 : subject_vector,
606 : registers,
607 : start_position,
608 1629903 : previous_char);
609 : } else {
610 : DCHECK(subject_content.IsTwoByte());
611 2654789 : Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
612 2807063 : if (start_position != 0) previous_char = subject_vector[start_position - 1];
613 : return RawMatch(isolate,
614 : code_base,
615 : subject_vector,
616 : registers,
617 : start_position,
618 2654789 : previous_char);
619 : }
620 : }
621 :
622 : } // namespace internal
623 178779 : } // namespace v8
|