LCOV - code coverage report
Current view: top level - src/regexp - regexp-macro-assembler.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 99 108 91.7 %
Date: 2017-04-26 Functions: 10 16 62.5 %

          Line data    Source code
       1             : // Copyright 2012 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/regexp/regexp-macro-assembler.h"
       6             : 
       7             : #include "src/assembler.h"
       8             : #include "src/isolate-inl.h"
       9             : #include "src/regexp/regexp-stack.h"
      10             : #include "src/simulator.h"
      11             : 
      12             : #ifdef V8_INTL_SUPPORT
      13             : #include "unicode/uchar.h"
      14             : #endif  // V8_INTL_SUPPORT
      15             : 
      16             : namespace v8 {
      17             : namespace internal {
      18             : 
      19           0 : RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone)
      20             :     : slow_safe_compiler_(false),
      21             :       global_mode_(NOT_GLOBAL),
      22             :       isolate_(isolate),
      23       92258 :       zone_(zone) {}
      24             : 
      25             : 
      26       92258 : RegExpMacroAssembler::~RegExpMacroAssembler() {
      27           0 : }
      28             : 
      29             : 
      30      693412 : int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
      31             :                                                      Address byte_offset2,
      32             :                                                      size_t byte_length,
      33             :                                                      Isolate* isolate) {
      34             :   unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
      35      693412 :       isolate->regexp_macro_assembler_canonicalize();
      36             :   // This function is not allowed to cause a garbage collection.
      37             :   // A GC might move the calling generated code and invalidate the
      38             :   // return address on the stack.
      39             :   DCHECK(byte_length % 2 == 0);
      40             :   uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
      41             :   uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
      42      693412 :   size_t length = byte_length >> 1;
      43             : 
      44             : #ifdef V8_INTL_SUPPORT
      45      693412 :   if (isolate == nullptr) {
      46          58 :     for (size_t i = 0; i < length; i++) {
      47          58 :       uc32 c1 = substring1[i];
      48          58 :       uc32 c2 = substring2[i];
      49          58 :       if (unibrow::Utf16::IsLeadSurrogate(c1)) {
      50             :         // Non-BMP characters do not have case-equivalents in the BMP.
      51             :         // Both have to be non-BMP for them to be able to match.
      52          14 :         if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0;
      53          14 :         if (i + 1 < length) {
      54          14 :           uc16 c1t = substring1[i + 1];
      55          14 :           uc16 c2t = substring2[i + 1];
      56          42 :           if (unibrow::Utf16::IsTrailSurrogate(c1t) &&
      57          14 :               unibrow::Utf16::IsTrailSurrogate(c2t)) {
      58             :             c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t);
      59             :             c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t);
      60             :             i++;
      61             :           }
      62             :         }
      63             :       }
      64          58 :       c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
      65          58 :       c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
      66          58 :       if (c1 != c2) return 0;
      67             :     }
      68             :     return 1;
      69             :   }
      70             : #endif  // V8_INTL_SUPPORT
      71             :   DCHECK_NOT_NULL(isolate);
      72       40830 :   for (size_t i = 0; i < length; i++) {
      73      693444 :     unibrow::uchar c1 = substring1[i];
      74      693444 :     unibrow::uchar c2 = substring2[i];
      75      693444 :     if (c1 != c2) {
      76      652772 :       unibrow::uchar s1[1] = {c1};
      77      652772 :       canonicalize->get(c1, '\0', s1);
      78      652772 :       if (s1[0] != c2) {
      79      652742 :         unibrow::uchar s2[1] = {c2};
      80      652742 :         canonicalize->get(c2, '\0', s2);
      81      652742 :         if (s1[0] != s2[0]) {
      82      652614 :           return 0;
      83             :         }
      84             :       }
      85             :     }
      86             :   }
      87             :   return 1;
      88             : }
      89             : 
      90             : 
      91         102 : void RegExpMacroAssembler::CheckNotInSurrogatePair(int cp_offset,
      92             :                                                    Label* on_failure) {
      93             :   Label ok;
      94             :   // Check that current character is not a trail surrogate.
      95         102 :   LoadCurrentCharacter(cp_offset, &ok);
      96         102 :   CheckCharacterNotInRange(kTrailSurrogateStart, kTrailSurrogateEnd, &ok);
      97             :   // Check that previous character is not a lead surrogate.
      98         102 :   LoadCurrentCharacter(cp_offset - 1, &ok);
      99         102 :   CheckCharacterInRange(kLeadSurrogateStart, kLeadSurrogateEnd, on_failure);
     100         102 :   Bind(&ok);
     101         102 : }
     102             : 
     103           0 : void RegExpMacroAssembler::CheckPosition(int cp_offset,
     104             :                                          Label* on_outside_input) {
     105           0 :   LoadCurrentCharacter(cp_offset, on_outside_input, true);
     106           0 : }
     107             : 
     108           0 : bool RegExpMacroAssembler::CheckSpecialCharacterClass(uc16 type,
     109             :                                                       Label* on_no_match) {
     110           0 :   return false;
     111             : }
     112             : 
     113             : #ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
     114             : 
     115       92258 : NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate,
     116             :                                                        Zone* zone)
     117       92258 :     : RegExpMacroAssembler(isolate, zone) {}
     118             : 
     119             : 
     120       92258 : NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
     121       92258 : }
     122             : 
     123             : 
     124      268841 : bool NativeRegExpMacroAssembler::CanReadUnaligned() {
     125      268841 :   return FLAG_enable_regexp_unaligned_accesses && !slow_safe();
     126             : }
     127             : 
     128      700474 : const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
     129             :     String* subject,
     130             :     int start_index) {
     131      700474 :   if (subject->IsConsString()) {
     132             :     subject = ConsString::cast(subject)->first();
     133      700474 :   } else if (subject->IsSlicedString()) {
     134          11 :     start_index += SlicedString::cast(subject)->offset();
     135             :     subject = SlicedString::cast(subject)->parent();
     136             :   }
     137      700474 :   if (subject->IsThinString()) {
     138             :     subject = ThinString::cast(subject)->actual();
     139             :   }
     140             :   DCHECK(start_index >= 0);
     141             :   DCHECK(start_index <= subject->length());
     142      700474 :   if (subject->IsSeqOneByteString()) {
     143             :     return reinterpret_cast<const byte*>(
     144      590157 :         SeqOneByteString::cast(subject)->GetChars() + start_index);
     145      110317 :   } else if (subject->IsSeqTwoByteString()) {
     146             :     return reinterpret_cast<const byte*>(
     147      110132 :         SeqTwoByteString::cast(subject)->GetChars() + start_index);
     148         185 :   } else if (subject->IsExternalOneByteString()) {
     149             :     return reinterpret_cast<const byte*>(
     150          37 :         ExternalOneByteString::cast(subject)->GetChars() + start_index);
     151             :   } else {
     152             :     DCHECK(subject->IsExternalTwoByteString());
     153             :     return reinterpret_cast<const byte*>(
     154         148 :         ExternalTwoByteString::cast(subject)->GetChars() + start_index);
     155             :   }
     156             : }
     157             : 
     158             : 
     159         440 : int NativeRegExpMacroAssembler::CheckStackGuardState(
     160             :     Isolate* isolate, int start_index, bool is_direct_call,
     161             :     Address* return_address, Code* re_code, String** subject,
     162             :     const byte** input_start, const byte** input_end) {
     163             :   DCHECK(re_code->instruction_start() <= *return_address);
     164             :   DCHECK(*return_address <= re_code->instruction_end());
     165             :   int return_value = 0;
     166             :   // Prepare for possible GC.
     167             :   HandleScope handles(isolate);
     168             :   Handle<Code> code_handle(re_code);
     169         440 :   Handle<String> subject_handle(*subject);
     170         440 :   bool is_one_byte = subject_handle->IsOneByteRepresentationUnderneath();
     171             : 
     172             :   StackLimitCheck check(isolate);
     173         440 :   bool js_has_overflowed = check.JsHasOverflowed();
     174             : 
     175         440 :   if (is_direct_call) {
     176             :     // Direct calls from JavaScript can be interrupted in two ways:
     177             :     // 1. A real stack overflow, in which case we let the caller throw the
     178             :     //    exception.
     179             :     // 2. The stack guard was used to interrupt execution for another purpose,
     180             :     //    forcing the call through the runtime system.
     181          81 :     return_value = js_has_overflowed ? EXCEPTION : RETRY;
     182         359 :   } else if (js_has_overflowed) {
     183         259 :     isolate->StackOverflow();
     184             :     return_value = EXCEPTION;
     185             :   } else {
     186         100 :     Object* result = isolate->stack_guard()->HandleInterrupts();
     187         100 :     if (result->IsException(isolate)) return_value = EXCEPTION;
     188             :   }
     189             : 
     190             :   DisallowHeapAllocation no_gc;
     191             : 
     192         440 :   if (*code_handle != re_code) {  // Return address no longer valid
     193           0 :     intptr_t delta = code_handle->address() - re_code->address();
     194             :     // Overwrite the return address on the stack.
     195           0 :     *return_address += delta;
     196             :   }
     197             : 
     198             :   // If we continue, we need to update the subject string addresses.
     199         440 :   if (return_value == 0) {
     200             :     // String encoding might have changed.
     201          94 :     if (subject_handle->IsOneByteRepresentationUnderneath() != is_one_byte) {
     202             :       // If we changed between an LATIN1 and an UC16 string, the specialized
     203             :       // code cannot be used, and we need to restart regexp matching from
     204             :       // scratch (including, potentially, compiling a new version of the code).
     205             :       return_value = RETRY;
     206             :     } else {
     207          94 :       *subject = *subject_handle;
     208          94 :       intptr_t byte_length = *input_end - *input_start;
     209          94 :       *input_start = StringCharacterPosition(*subject, start_index);
     210          94 :       *input_end = *input_start + byte_length;
     211             :     }
     212             :   }
     213         440 :   return return_value;
     214             : }
     215             : 
     216             : 
     217      700380 : NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
     218             :     Handle<Code> regexp_code,
     219             :     Handle<String> subject,
     220             :     int* offsets_vector,
     221             :     int offsets_vector_length,
     222             :     int previous_index,
     223             :     Isolate* isolate) {
     224             : 
     225             :   DCHECK(subject->IsFlat());
     226             :   DCHECK(previous_index >= 0);
     227             :   DCHECK(previous_index <= subject->length());
     228             : 
     229             :   // No allocations before calling the regexp, but we can't use
     230             :   // DisallowHeapAllocation, since regexps might be preempted, and another
     231             :   // thread might do allocation anyway.
     232             : 
     233             :   String* subject_ptr = *subject;
     234             :   // Character offsets into string.
     235             :   int start_offset = previous_index;
     236      700380 :   int char_length = subject_ptr->length() - start_offset;
     237             :   int slice_offset = 0;
     238             : 
     239             :   // The string has been flattened, so if it is a cons string it contains the
     240             :   // full string in the first part.
     241      700380 :   if (StringShape(subject_ptr).IsCons()) {
     242             :     DCHECK_EQ(0, ConsString::cast(subject_ptr)->second()->length());
     243             :     subject_ptr = ConsString::cast(subject_ptr)->first();
     244      700380 :   } else if (StringShape(subject_ptr).IsSliced()) {
     245             :     SlicedString* slice = SlicedString::cast(subject_ptr);
     246             :     subject_ptr = slice->parent();
     247             :     slice_offset = slice->offset();
     248             :   }
     249      700380 :   if (StringShape(subject_ptr).IsThin()) {
     250             :     subject_ptr = ThinString::cast(subject_ptr)->actual();
     251             :   }
     252             :   // Ensure that an underlying string has the same representation.
     253             :   bool is_one_byte = subject_ptr->IsOneByteRepresentation();
     254             :   DCHECK(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
     255             :   // String is now either Sequential or External
     256      700380 :   int char_size_shift = is_one_byte ? 0 : 1;
     257             : 
     258             :   const byte* input_start =
     259      700380 :       StringCharacterPosition(subject_ptr, start_offset + slice_offset);
     260      700380 :   int byte_length = char_length << char_size_shift;
     261      700380 :   const byte* input_end = input_start + byte_length;
     262             :   Result res = Execute(*regexp_code,
     263             :                        *subject,
     264             :                        start_offset,
     265             :                        input_start,
     266             :                        input_end,
     267             :                        offsets_vector,
     268             :                        offsets_vector_length,
     269      700380 :                        isolate);
     270      700380 :   return res;
     271             : }
     272             : 
     273             : 
     274      700478 : NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
     275             :     Code* code,
     276             :     String* input,  // This needs to be the unpacked (sliced, cons) string.
     277             :     int start_offset,
     278             :     const byte* input_start,
     279             :     const byte* input_end,
     280             :     int* output,
     281             :     int output_size,
     282             :     Isolate* isolate) {
     283             :   // Ensure that the minimum stack has been allocated.
     284      700478 :   RegExpStackScope stack_scope(isolate);
     285      700478 :   Address stack_base = stack_scope.stack()->stack_base();
     286             : 
     287             :   int direct_call = 0;
     288     1400956 :   int result = CALL_GENERATED_REGEXP_CODE(
     289             :       isolate, code->entry(), input, start_offset, input_start, input_end,
     290             :       output, output_size, stack_base, direct_call, isolate);
     291             :   DCHECK(result >= RETRY);
     292             : 
     293      700478 :   if (result == EXCEPTION && !isolate->has_pending_exception()) {
     294             :     // We detected a stack overflow (on the backtrack stack) in RegExp code,
     295             :     // but haven't created the exception yet.
     296          17 :     isolate->StackOverflow();
     297             :   }
     298      700478 :   return static_cast<Result>(result);
     299             : }
     300             : 
     301             : 
     302             : const byte NativeRegExpMacroAssembler::word_character_map[] = {
     303             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     304             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     305             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     306             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     307             : 
     308             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     309             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     310             :     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
     311             :     0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
     312             : 
     313             :     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
     314             :     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
     315             :     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
     316             :     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
     317             : 
     318             :     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
     319             :     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
     320             :     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
     321             :     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
     322             :     // Latin-1 range
     323             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     324             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     325             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     326             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     327             : 
     328             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     329             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     330             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     331             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     332             : 
     333             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     334             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     335             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     336             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     337             : 
     338             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     339             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     340             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     341             :     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
     342             : };
     343             : 
     344             : 
     345         505 : Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
     346             :                                               Address* stack_base,
     347         505 :                                               Isolate* isolate) {
     348         505 :   RegExpStack* regexp_stack = isolate->regexp_stack();
     349             :   size_t size = regexp_stack->stack_capacity();
     350             :   Address old_stack_base = regexp_stack->stack_base();
     351             :   DCHECK(old_stack_base == *stack_base);
     352             :   DCHECK(stack_pointer <= old_stack_base);
     353             :   DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
     354         505 :   Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
     355         505 :   if (new_stack_base == NULL) {
     356             :     return NULL;
     357             :   }
     358         498 :   *stack_base = new_stack_base;
     359         498 :   intptr_t stack_content_size = old_stack_base - stack_pointer;
     360         498 :   return new_stack_base - stack_content_size;
     361             : }
     362             : 
     363             : #endif  // V8_INTERPRETED_REGEXP
     364             : 
     365             : }  // namespace internal
     366             : }  // namespace v8

Generated by: LCOV version 1.10