LCOV - code coverage report
Current view: top level - src/regexp - regexp-macro-assembler.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 98 108 90.7 %
Date: 2017-10-20 Functions: 10 16 62.5 %

          Line data    Source code
       1             : // Copyright 2012 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/regexp/regexp-macro-assembler.h"
       6             : 
       7             : #include "src/assembler.h"
       8             : #include "src/isolate-inl.h"
       9             : #include "src/regexp/regexp-stack.h"
      10             : #include "src/simulator.h"
      11             : #include "src/unicode-inl.h"
      12             : 
      13             : #ifdef V8_INTL_SUPPORT
      14             : #include "unicode/uchar.h"
      15             : #endif  // V8_INTL_SUPPORT
      16             : 
      17             : namespace v8 {
      18             : namespace internal {
      19             : 
      20           0 : RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone)
      21             :     : slow_safe_compiler_(false),
      22             :       global_mode_(NOT_GLOBAL),
      23             :       isolate_(isolate),
      24       93584 :       zone_(zone) {}
      25             : 
      26             : 
      27       93584 : RegExpMacroAssembler::~RegExpMacroAssembler() {
      28           0 : }
      29             : 
      30             : 
      31      462264 : int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
      32             :                                                      Address byte_offset2,
      33             :                                                      size_t byte_length,
      34             :                                                      Isolate* isolate) {
      35             :   unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
      36      462264 :       isolate->regexp_macro_assembler_canonicalize();
      37             :   // This function is not allowed to cause a garbage collection.
      38             :   // A GC might move the calling generated code and invalidate the
      39             :   // return address on the stack.
      40             :   DCHECK_EQ(0, byte_length % 2);
      41             :   uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
      42             :   uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
      43      462264 :   size_t length = byte_length >> 1;
      44             : 
      45             : #ifdef V8_INTL_SUPPORT
      46      462264 :   if (isolate == nullptr) {
      47          35 :     for (size_t i = 0; i < length; i++) {
      48          35 :       uc32 c1 = substring1[i];
      49          35 :       uc32 c2 = substring2[i];
      50          35 :       if (unibrow::Utf16::IsLeadSurrogate(c1)) {
      51             :         // Non-BMP characters do not have case-equivalents in the BMP.
      52             :         // Both have to be non-BMP for them to be able to match.
      53           9 :         if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0;
      54           9 :         if (i + 1 < length) {
      55           9 :           uc16 c1t = substring1[i + 1];
      56           9 :           uc16 c2t = substring2[i + 1];
      57          27 :           if (unibrow::Utf16::IsTrailSurrogate(c1t) &&
      58           9 :               unibrow::Utf16::IsTrailSurrogate(c2t)) {
      59             :             c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t);
      60             :             c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t);
      61             :             i++;
      62             :           }
      63             :         }
      64             :       }
      65          35 :       c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
      66          35 :       c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
      67          35 :       if (c1 != c2) return 0;
      68             :     }
      69             :     return 1;
      70             :   }
      71             : #endif  // V8_INTL_SUPPORT
      72             :   DCHECK_NOT_NULL(isolate);
      73       27217 :   for (size_t i = 0; i < length; i++) {
      74      462289 :     unibrow::uchar c1 = substring1[i];
      75      462289 :     unibrow::uchar c2 = substring2[i];
      76      462289 :     if (c1 != c2) {
      77      435176 :       unibrow::uchar s1[1] = {c1};
      78      435176 :       canonicalize->get(c1, '\0', s1);
      79      435176 :       if (s1[0] != c2) {
      80      435156 :         unibrow::uchar s2[1] = {c2};
      81      435156 :         canonicalize->get(c2, '\0', s2);
      82      435156 :         if (s1[0] != s2[0]) {
      83      435072 :           return 0;
      84             :         }
      85             :       }
      86             :     }
      87             :   }
      88             :   return 1;
      89             : }
      90             : 
      91             : 
      92          85 : void RegExpMacroAssembler::CheckNotInSurrogatePair(int cp_offset,
      93             :                                                    Label* on_failure) {
      94             :   Label ok;
      95             :   // Check that current character is not a trail surrogate.
      96          85 :   LoadCurrentCharacter(cp_offset, &ok);
      97          85 :   CheckCharacterNotInRange(kTrailSurrogateStart, kTrailSurrogateEnd, &ok);
      98             :   // Check that previous character is not a lead surrogate.
      99          85 :   LoadCurrentCharacter(cp_offset - 1, &ok);
     100          85 :   CheckCharacterInRange(kLeadSurrogateStart, kLeadSurrogateEnd, on_failure);
     101          85 :   Bind(&ok);
     102          85 : }
     103             : 
     104           0 : void RegExpMacroAssembler::CheckPosition(int cp_offset,
     105             :                                          Label* on_outside_input) {
     106           0 :   LoadCurrentCharacter(cp_offset, on_outside_input, true);
     107           0 : }
     108             : 
     109           0 : bool RegExpMacroAssembler::CheckSpecialCharacterClass(uc16 type,
     110             :                                                       Label* on_no_match) {
     111           0 :   return false;
     112             : }
     113             : 
     114             : #ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
     115             : 
     116       93584 : NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate,
     117             :                                                        Zone* zone)
     118       93584 :     : RegExpMacroAssembler(isolate, zone) {}
     119             : 
     120             : 
     121       93584 : NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
     122       93584 : }
     123             : 
     124             : 
     125      245782 : bool NativeRegExpMacroAssembler::CanReadUnaligned() {
     126      245782 :   return FLAG_enable_regexp_unaligned_accesses && !slow_safe();
     127             : }
     128             : 
     129      598979 : const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
     130             :     String* subject,
     131             :     int start_index) {
     132      598979 :   if (subject->IsConsString()) {
     133             :     subject = ConsString::cast(subject)->first();
     134      598979 :   } else if (subject->IsSlicedString()) {
     135           0 :     start_index += SlicedString::cast(subject)->offset();
     136             :     subject = SlicedString::cast(subject)->parent();
     137             :   }
     138      598979 :   if (subject->IsThinString()) {
     139             :     subject = ThinString::cast(subject)->actual();
     140             :   }
     141             :   DCHECK_LE(0, start_index);
     142             :   DCHECK_LE(start_index, subject->length());
     143      598979 :   if (subject->IsSeqOneByteString()) {
     144             :     return reinterpret_cast<const byte*>(
     145      504272 :         SeqOneByteString::cast(subject)->GetChars() + start_index);
     146       94707 :   } else if (subject->IsSeqTwoByteString()) {
     147             :     return reinterpret_cast<const byte*>(
     148       94565 :         SeqTwoByteString::cast(subject)->GetChars() + start_index);
     149         142 :   } else if (subject->IsExternalOneByteString()) {
     150             :     return reinterpret_cast<const byte*>(
     151          26 :         ExternalOneByteString::cast(subject)->GetChars() + start_index);
     152             :   } else {
     153             :     DCHECK(subject->IsExternalTwoByteString());
     154             :     return reinterpret_cast<const byte*>(
     155         116 :         ExternalTwoByteString::cast(subject)->GetChars() + start_index);
     156             :   }
     157             : }
     158             : 
     159             : 
     160         379 : int NativeRegExpMacroAssembler::CheckStackGuardState(
     161             :     Isolate* isolate, int start_index, bool is_direct_call,
     162             :     Address* return_address, Code* re_code, String** subject,
     163             :     const byte** input_start, const byte** input_end) {
     164             :   DCHECK(re_code->instruction_start() <= *return_address);
     165             :   DCHECK(*return_address <= re_code->instruction_end());
     166             :   int return_value = 0;
     167             :   // Prepare for possible GC.
     168             :   HandleScope handles(isolate);
     169             :   Handle<Code> code_handle(re_code);
     170         379 :   Handle<String> subject_handle(*subject);
     171         379 :   bool is_one_byte = subject_handle->IsOneByteRepresentationUnderneath();
     172             : 
     173             :   StackLimitCheck check(isolate);
     174         379 :   bool js_has_overflowed = check.JsHasOverflowed();
     175             : 
     176         379 :   if (is_direct_call) {
     177             :     // Direct calls from JavaScript can be interrupted in two ways:
     178             :     // 1. A real stack overflow, in which case we let the caller throw the
     179             :     //    exception.
     180             :     // 2. The stack guard was used to interrupt execution for another purpose,
     181             :     //    forcing the call through the runtime system.
     182          64 :     return_value = js_has_overflowed ? EXCEPTION : RETRY;
     183         315 :   } else if (js_has_overflowed) {
     184         229 :     isolate->StackOverflow();
     185             :     return_value = EXCEPTION;
     186             :   } else {
     187          86 :     Object* result = isolate->stack_guard()->HandleInterrupts();
     188          86 :     if (result->IsException(isolate)) return_value = EXCEPTION;
     189             :   }
     190             : 
     191             :   DisallowHeapAllocation no_gc;
     192             : 
     193         379 :   if (*code_handle != re_code) {  // Return address no longer valid
     194           0 :     intptr_t delta = code_handle->address() - re_code->address();
     195             :     // Overwrite the return address on the stack.
     196           0 :     *return_address += delta;
     197             :   }
     198             : 
     199             :   // If we continue, we need to update the subject string addresses.
     200         379 :   if (return_value == 0) {
     201             :     // String encoding might have changed.
     202          81 :     if (subject_handle->IsOneByteRepresentationUnderneath() != is_one_byte) {
     203             :       // If we changed between an LATIN1 and an UC16 string, the specialized
     204             :       // code cannot be used, and we need to restart regexp matching from
     205             :       // scratch (including, potentially, compiling a new version of the code).
     206             :       return_value = RETRY;
     207             :     } else {
     208          81 :       *subject = *subject_handle;
     209          81 :       intptr_t byte_length = *input_end - *input_start;
     210          81 :       *input_start = StringCharacterPosition(*subject, start_index);
     211          81 :       *input_end = *input_start + byte_length;
     212             :     }
     213             :   }
     214         379 :   return return_value;
     215             : }
     216             : 
     217             : 
     218      598898 : NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
     219             :     Handle<Code> regexp_code,
     220             :     Handle<String> subject,
     221             :     int* offsets_vector,
     222             :     int offsets_vector_length,
     223             :     int previous_index,
     224             :     Isolate* isolate) {
     225             : 
     226             :   DCHECK(subject->IsFlat());
     227             :   DCHECK_LE(0, previous_index);
     228             :   DCHECK_LE(previous_index, subject->length());
     229             : 
     230             :   // No allocations before calling the regexp, but we can't use
     231             :   // DisallowHeapAllocation, since regexps might be preempted, and another
     232             :   // thread might do allocation anyway.
     233             : 
     234             :   String* subject_ptr = *subject;
     235             :   // Character offsets into string.
     236             :   int start_offset = previous_index;
     237      598898 :   int char_length = subject_ptr->length() - start_offset;
     238             :   int slice_offset = 0;
     239             : 
     240             :   // The string has been flattened, so if it is a cons string it contains the
     241             :   // full string in the first part.
     242      598898 :   if (StringShape(subject_ptr).IsCons()) {
     243             :     DCHECK_EQ(0, ConsString::cast(subject_ptr)->second()->length());
     244             :     subject_ptr = ConsString::cast(subject_ptr)->first();
     245      598898 :   } else if (StringShape(subject_ptr).IsSliced()) {
     246             :     SlicedString* slice = SlicedString::cast(subject_ptr);
     247             :     subject_ptr = slice->parent();
     248             :     slice_offset = slice->offset();
     249             :   }
     250      598898 :   if (StringShape(subject_ptr).IsThin()) {
     251             :     subject_ptr = ThinString::cast(subject_ptr)->actual();
     252             :   }
     253             :   // Ensure that an underlying string has the same representation.
     254             :   bool is_one_byte = subject_ptr->IsOneByteRepresentation();
     255             :   DCHECK(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
     256             :   // String is now either Sequential or External
     257      598898 :   int char_size_shift = is_one_byte ? 0 : 1;
     258             : 
     259             :   const byte* input_start =
     260      598898 :       StringCharacterPosition(subject_ptr, start_offset + slice_offset);
     261      598898 :   int byte_length = char_length << char_size_shift;
     262      598898 :   const byte* input_end = input_start + byte_length;
     263             :   Result res = Execute(*regexp_code,
     264             :                        *subject,
     265             :                        start_offset,
     266             :                        input_start,
     267             :                        input_end,
     268             :                        offsets_vector,
     269             :                        offsets_vector_length,
     270      598898 :                        isolate);
     271      598898 :   return res;
     272             : }
     273             : 
     274             : 
     275      598982 : NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
     276             :     Code* code,
     277             :     String* input,  // This needs to be the unpacked (sliced, cons) string.
     278             :     int start_offset,
     279             :     const byte* input_start,
     280             :     const byte* input_end,
     281             :     int* output,
     282             :     int output_size,
     283             :     Isolate* isolate) {
     284             :   // Ensure that the minimum stack has been allocated.
     285      598982 :   RegExpStackScope stack_scope(isolate);
     286      598982 :   Address stack_base = stack_scope.stack()->stack_base();
     287             : 
     288             :   int direct_call = 0;
     289     1197964 :   int result = CALL_GENERATED_REGEXP_CODE(
     290             :       isolate, code->entry(), input, start_offset, input_start, input_end,
     291             :       output, output_size, stack_base, direct_call, isolate);
     292             :   DCHECK(result >= RETRY);
     293             : 
     294      598982 :   if (result == EXCEPTION && !isolate->has_pending_exception()) {
     295             :     // We detected a stack overflow (on the backtrack stack) in RegExp code,
     296             :     // but haven't created the exception yet.
     297          16 :     isolate->StackOverflow();
     298             :   }
     299      598982 :   return static_cast<Result>(result);
     300             : }
     301             : 
     302             : 
     303             : const byte NativeRegExpMacroAssembler::word_character_map[] = {
     304             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     305             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     306             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     307             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     308             : 
     309             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     310             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     311             :     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
     312             :     0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
     313             : 
     314             :     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
     315             :     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
     316             :     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
     317             :     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
     318             : 
     319             :     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
     320             :     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
     321             :     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
     322             :     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
     323             :     // Latin-1 range
     324             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     325             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     326             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     327             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     328             : 
     329             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     330             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     331             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     332             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     333             : 
     334             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     335             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     336             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     337             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     338             : 
     339             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     340             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     341             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     342             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     343             : };
     344             : 
     345             : 
     346         443 : Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
     347             :                                               Address* stack_base,
     348         443 :                                               Isolate* isolate) {
     349         443 :   RegExpStack* regexp_stack = isolate->regexp_stack();
     350             :   size_t size = regexp_stack->stack_capacity();
     351             :   Address old_stack_base = regexp_stack->stack_base();
     352             :   DCHECK(old_stack_base == *stack_base);
     353             :   DCHECK(stack_pointer <= old_stack_base);
     354             :   DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
     355         443 :   Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
     356         443 :   if (new_stack_base == nullptr) {
     357             :     return nullptr;
     358             :   }
     359         437 :   *stack_base = new_stack_base;
     360         437 :   intptr_t stack_content_size = old_stack_base - stack_pointer;
     361         437 :   return new_stack_base - stack_content_size;
     362             : }
     363             : 
     364             : #endif  // V8_INTERPRETED_REGEXP
     365             : 
     366             : }  // namespace internal
     367             : }  // namespace v8

Generated by: LCOV version 1.10