LCOV - app.info - src/regexp/jsregexp.cc

LCOV - code coverage report

Current view:	top level - src/regexp - jsregexp.cc (source / functions)		Hit	Total	Coverage
Test:	app.info	Lines:	2174	2286	95.1 %
Date:	2019-04-17	Functions:	190	227	83.7 %

          Line data    Source code

       1             : // Copyright 2012 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/regexp/jsregexp.h"
       6             : 
       7             : #include <memory>
       8             : #include <vector>
       9             : 
      10             : #include "src/base/platform/platform.h"
      11             : #include "src/code-tracer.h"
      12             : #include "src/compilation-cache.h"
      13             : #include "src/elements.h"
      14             : #include "src/execution.h"
      15             : #include "src/heap/factory.h"
      16             : #include "src/heap/heap-inl.h"
      17             : #include "src/isolate-inl.h"
      18             : #include "src/message-template.h"
      19             : #include "src/ostreams.h"
      20             : #include "src/regexp/interpreter-irregexp.h"
      21             : #include "src/regexp/jsregexp-inl.h"
      22             : #include "src/regexp/regexp-macro-assembler-irregexp.h"
      23             : #include "src/regexp/regexp-macro-assembler-tracer.h"
      24             : #include "src/regexp/regexp-macro-assembler.h"
      25             : #include "src/regexp/regexp-parser.h"
      26             : #include "src/regexp/regexp-stack.h"
      27             : #include "src/runtime/runtime.h"
      28             : #include "src/splay-tree-inl.h"
      29             : #include "src/string-search.h"
      30             : #include "src/unicode-decoder.h"
      31             : #include "src/unicode-inl.h"
      32             : #include "src/zone/zone-list-inl.h"
      33             : 
      34             : #ifdef V8_INTL_SUPPORT
      35             : #include "unicode/uniset.h"
      36             : #include "unicode/utypes.h"
      37             : #endif  // V8_INTL_SUPPORT
      38             : 
      39             : #if V8_TARGET_ARCH_IA32
      40             : #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
      41             : #elif V8_TARGET_ARCH_X64
      42             : #include "src/regexp/x64/regexp-macro-assembler-x64.h"
      43             : #elif V8_TARGET_ARCH_ARM64
      44             : #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
      45             : #elif V8_TARGET_ARCH_ARM
      46             : #include "src/regexp/arm/regexp-macro-assembler-arm.h"
      47             : #elif V8_TARGET_ARCH_PPC
      48             : #include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
      49             : #elif V8_TARGET_ARCH_S390
      50             : #include "src/regexp/s390/regexp-macro-assembler-s390.h"
      51             : #elif V8_TARGET_ARCH_MIPS
      52             : #include "src/regexp/mips/regexp-macro-assembler-mips.h"
      53             : #elif V8_TARGET_ARCH_MIPS64
      54             : #include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
      55             : #else
      56             : #error Unsupported target architecture.
      57             : #endif
      58             : 
      59             : namespace v8 {
      60             : namespace internal {
      61             : 
      62             : V8_WARN_UNUSED_RESULT
      63        3205 : static inline MaybeHandle<Object> ThrowRegExpException(
      64             :     Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
      65             :     Handle<String> error_text) {
      66        6410 :   THROW_NEW_ERROR(isolate, NewSyntaxError(MessageTemplate::kMalformedRegExp,
      67             :                                           pattern, error_text),
      68             :                   Object);
      69             : }
      70             : 
      71         349 : inline void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
      72             :                                  Handle<String> error_text) {
      73         349 :   USE(ThrowRegExpException(isolate, re, Handle<String>(re->Pattern(), isolate),
      74             :                            error_text));
      75         349 : }
      76             : 
      77             : 
      78           0 : ContainedInLattice AddRange(ContainedInLattice containment,
      79             :                             const int* ranges,
      80             :                             int ranges_length,
      81             :                             Interval new_range) {
      82             :   DCHECK_EQ(1, ranges_length & 1);
      83             :   DCHECK_EQ(String::kMaxCodePoint + 1, ranges[ranges_length - 1]);
      84      992692 :   if (containment == kLatticeUnknown) return containment;
      85             :   bool inside = false;
      86             :   int last = 0;
      87    10421341 :   for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
      88             :     // Consider the range from last to ranges[i].
      89             :     // We haven't got to the new range yet.
      90     5653127 :     if (ranges[i] <= new_range.from()) continue;
      91             :     // New range is wholly inside last-ranges[i].  Note that new_range.to() is
      92             :     // inclusive, but the values in ranges are not.
      93      884913 :     if (last <= new_range.from() && new_range.to() < ranges[i]) {
      94      867386 :       return Combine(containment, inside ? kLatticeIn : kLatticeOut);
      95             :     }
      96             :     return kLatticeUnknown;
      97             :   }
      98             :   return containment;
      99             : }
     100             : 
     101             : // More makes code generation slower, less makes V8 benchmark score lower.
     102             : const int kMaxLookaheadForBoyerMoore = 8;
     103             : // In a 3-character pattern you can maximally step forwards 3 characters
     104             : // at a time, which is not always enough to pay for the extra logic.
     105             : const int kPatternTooShortForBoyerMoore = 2;
     106             : 
     107             : // Identifies the sort of regexps where the regexp engine is faster
     108             : // than the code used for atom matches.
     109      204870 : static bool HasFewDifferentCharacters(Handle<String> pattern) {
     110             :   int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
     111      204870 :   if (length <= kPatternTooShortForBoyerMoore) return false;
     112             :   const int kMod = 128;
     113             :   bool character_found[kMod];
     114             :   int different = 0;
     115             :   memset(&character_found[0], 0, sizeof(character_found));
     116      995941 :   for (int i = 0; i < length; i++) {
     117      598069 :     int ch = (pattern->Get(i) & (kMod - 1));
     118      598069 :     if (!character_found[ch]) {
     119      597649 :       character_found[ch] = true;
     120      597649 :       different++;
     121             :       // We declare a regexp low-alphabet if it has at least 3 times as many
     122             :       // characters as it has different characters.
     123      597649 :       if (different * 3 > length) return false;
     124             :     }
     125             :   }
     126             :   return true;
     127             : }
     128             : 
     129             : // Generic RegExp methods. Dispatches to implementation specific methods.
     130             : 
     131      460444 : MaybeHandle<Object> RegExpImpl::Compile(Isolate* isolate, Handle<JSRegExp> re,
     132             :                                         Handle<String> pattern,
     133             :                                         JSRegExp::Flags flags) {
     134             :   DCHECK(pattern->IsFlat());
     135             : 
     136      920888 :   Zone zone(isolate->allocator(), ZONE_NAME);
     137             :   CompilationCache* compilation_cache = isolate->compilation_cache();
     138             :   MaybeHandle<FixedArray> maybe_cached =
     139      460444 :       compilation_cache->LookupRegExp(pattern, flags);
     140             :   Handle<FixedArray> cached;
     141      460444 :   if (maybe_cached.ToHandle(&cached)) {
     142      334510 :     re->set_data(*cached);
     143      167255 :     return re;
     144             :   }
     145             : 
     146             :   PostponeInterruptsScope postpone(isolate);
     147             :   RegExpCompileData parse_result;
     148      293189 :   FlatStringReader reader(isolate, pattern);
     149             :   DCHECK(!isolate->has_pending_exception());
     150      293189 :   if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
     151             :                                  &parse_result)) {
     152             :     // Throw an exception if we fail to parse the pattern.
     153        2831 :     return ThrowRegExpException(isolate, re, pattern, parse_result.error);
     154             :   }
     155             : 
     156             :   bool has_been_compiled = false;
     157             : 
     158      885492 :   if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) &&
     159      198173 :       !HasFewDifferentCharacters(pattern)) {
     160             :     // Parse-tree is a single atom that is equal to the pattern.
     161             :     AtomCompile(isolate, re, pattern, flags, pattern);
     162             :     has_been_compiled = true;
     163      106810 :   } else if (parse_result.tree->IsAtom() && !IsSticky(flags) &&
     164        7261 :              parse_result.capture_count == 0) {
     165        7251 :     RegExpAtom* atom = parse_result.tree->AsAtom();
     166        7251 :     Vector<const uc16> atom_pattern = atom->data();
     167             :     Handle<String> atom_string;
     168       14502 :     ASSIGN_RETURN_ON_EXCEPTION(
     169             :         isolate, atom_string,
     170             :         isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
     171        7251 :     if (!IgnoreCase(atom->flags()) && !HasFewDifferentCharacters(atom_string)) {
     172             :       AtomCompile(isolate, re, pattern, flags, atom_string);
     173             :       has_been_compiled = true;
     174             :     }
     175             :   }
     176      290358 :   if (!has_been_compiled) {
     177       85576 :     IrregexpInitialize(isolate, re, pattern, flags, parse_result.capture_count);
     178             :   }
     179             :   DCHECK(re->data()->IsFixedArray());
     180             :   // Compilation succeeded so the data is set on the regexp
     181             :   // and we can store it in the cache.
     182             :   Handle<FixedArray> data(FixedArray::cast(re->data()), isolate);
     183      290358 :   compilation_cache->PutRegExp(pattern, flags, data);
     184             : 
     185      290358 :   return re;
     186             : }
     187             : 
     188     4350688 : MaybeHandle<Object> RegExpImpl::Exec(Isolate* isolate, Handle<JSRegExp> regexp,
     189             :                                      Handle<String> subject, int index,
     190             :                                      Handle<RegExpMatchInfo> last_match_info) {
     191     4350688 :   switch (regexp->TypeTag()) {
     192             :     case JSRegExp::ATOM:
     193         286 :       return AtomExec(isolate, regexp, subject, index, last_match_info);
     194             :     case JSRegExp::IRREGEXP: {
     195     4350402 :       return IrregexpExec(isolate, regexp, subject, index, last_match_info);
     196             :     }
     197             :     default:
     198           0 :       UNREACHABLE();
     199             :   }
     200             : }
     201             : 
     202             : 
     203             : // RegExp Atom implementation: Simple string search using indexOf.
     204             : 
     205           0 : void RegExpImpl::AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
     206             :                              Handle<String> pattern, JSRegExp::Flags flags,
     207             :                              Handle<String> match_pattern) {
     208      204782 :   isolate->factory()->SetRegExpAtomData(re, JSRegExp::ATOM, pattern, flags,
     209      204782 :                                         match_pattern);
     210           0 : }
     211             : 
     212         273 : static void SetAtomLastCapture(Isolate* isolate,
     213             :                                Handle<RegExpMatchInfo> last_match_info,
     214             :                                String subject, int from, int to) {
     215             :   SealHandleScope shs(isolate);
     216             :   last_match_info->SetNumberOfCaptureRegisters(2);
     217         546 :   last_match_info->SetLastSubject(subject);
     218         546 :   last_match_info->SetLastInput(subject);
     219             :   last_match_info->SetCapture(0, from);
     220             :   last_match_info->SetCapture(1, to);
     221         273 : }
     222             : 
     223       90541 : int RegExpImpl::AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
     224             :                             Handle<String> subject, int index, int32_t* output,
     225             :                             int output_size) {
     226             :   DCHECK_LE(0, index);
     227             :   DCHECK_LE(index, subject->length());
     228             : 
     229       90541 :   subject = String::Flatten(isolate, subject);
     230             :   DisallowHeapAllocation no_gc;  // ensure vectors stay valid
     231             : 
     232       90541 :   String needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex));
     233             :   int needle_len = needle->length();
     234             :   DCHECK(needle->IsFlat());
     235             :   DCHECK_LT(0, needle_len);
     236             : 
     237      181082 :   if (index + needle_len > subject->length()) {
     238             :     return RegExpImpl::RE_FAILURE;
     239             :   }
     240             : 
     241      273479 :   for (int i = 0; i < output_size; i += 2) {
     242      181736 :     String::FlatContent needle_content = needle->GetFlatContent(no_gc);
     243      181736 :     String::FlatContent subject_content = subject->GetFlatContent(no_gc);
     244             :     DCHECK(needle_content.IsFlat());
     245             :     DCHECK(subject_content.IsFlat());
     246             :     // dispatch on type of strings
     247             :     index =
     248             :         (needle_content.IsOneByte()
     249             :              ? (subject_content.IsOneByte()
     250             :                     ? SearchString(isolate, subject_content.ToOneByteVector(),
     251             :                                    needle_content.ToOneByteVector(), index)
     252             :                     : SearchString(isolate, subject_content.ToUC16Vector(),
     253             :                                    needle_content.ToOneByteVector(), index))
     254             :              : (subject_content.IsOneByte()
     255             :                     ? SearchString(isolate, subject_content.ToOneByteVector(),
     256             :                                    needle_content.ToUC16Vector(), index)
     257             :                     : SearchString(isolate, subject_content.ToUC16Vector(),
     258      363472 :                                    needle_content.ToUC16Vector(), index)));
     259      181736 :     if (index == -1) {
     260       90266 :       return i / 2;  // Return number of matches.
     261             :     } else {
     262       91470 :       output[i] = index;
     263       91470 :       output[i+1] = index + needle_len;
     264             :       index += needle_len;
     265             :     }
     266             :   }
     267         273 :   return output_size / 2;
     268             : }
     269             : 
     270         286 : Handle<Object> RegExpImpl::AtomExec(Isolate* isolate, Handle<JSRegExp> re,
     271             :                                     Handle<String> subject, int index,
     272             :                                     Handle<RegExpMatchInfo> last_match_info) {
     273             :   static const int kNumRegisters = 2;
     274             :   STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize);
     275             :   int32_t* output_registers = isolate->jsregexp_static_offsets_vector();
     276             : 
     277             :   int res =
     278         286 :       AtomExecRaw(isolate, re, subject, index, output_registers, kNumRegisters);
     279             : 
     280         299 :   if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value();
     281             : 
     282             :   DCHECK_EQ(res, RegExpImpl::RE_SUCCESS);
     283             :   SealHandleScope shs(isolate);
     284         273 :   SetAtomLastCapture(isolate, last_match_info, *subject, output_registers[0],
     285         273 :                      output_registers[1]);
     286         273 :   return last_match_info;
     287             : }
     288             : 
     289             : 
     290             : // Irregexp implementation.
     291             : 
     292             : // Ensures that the regexp object contains a compiled version of the
     293             : // source for either one-byte or two-byte subject strings.
     294             : // If the compiled version doesn't already exist, it is compiled
     295             : // from the source pattern.
     296             : // If compilation fails, an exception is thrown and this function
     297             : // returns false.
     298     4469232 : bool RegExpImpl::EnsureCompiledIrregexp(Isolate* isolate, Handle<JSRegExp> re,
     299             :                                         Handle<String> sample_subject,
     300             :                                         bool is_one_byte) {
     301             :   Object compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte));
     302     4469232 :   if (compiled_code != Smi::FromInt(JSRegExp::kUninitializedValue)) {
     303             :     DCHECK(FLAG_regexp_interpret_all ? compiled_code->IsByteArray()
     304             :                                      : compiled_code->IsCode());
     305             :     return true;
     306             :   }
     307       85768 :   return CompileIrregexp(isolate, re, sample_subject, is_one_byte);
     308             : }
     309             : 
     310       85768 : bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
     311             :                                  Handle<String> sample_subject,
     312             :                                  bool is_one_byte) {
     313             :   // Compile the RegExp.
     314      171536 :   Zone zone(isolate->allocator(), ZONE_NAME);
     315             :   PostponeInterruptsScope postpone(isolate);
     316             : #ifdef DEBUG
     317             :   Object entry = re->DataAt(JSRegExp::code_index(is_one_byte));
     318             :   // When arriving here entry can only be a smi representing an uncompiled
     319             :   // regexp.
     320             :   DCHECK(entry->IsSmi());
     321             :   int entry_value = Smi::ToInt(entry);
     322             :   DCHECK_EQ(JSRegExp::kUninitializedValue, entry_value);
     323             : #endif
     324             : 
     325       85768 :   JSRegExp::Flags flags = re->GetFlags();
     326             : 
     327             :   Handle<String> pattern(re->Pattern(), isolate);
     328       85768 :   pattern = String::Flatten(isolate, pattern);
     329             :   RegExpCompileData compile_data;
     330       85768 :   FlatStringReader reader(isolate, pattern);
     331       85768 :   if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
     332             :                                  &compile_data)) {
     333             :     // Throw an exception if we fail to parse the pattern.
     334             :     // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
     335          25 :     USE(ThrowRegExpException(isolate, re, pattern, compile_data.error));
     336          25 :     return false;
     337             :   }
     338             :   RegExpEngine::CompilationResult result =
     339             :       RegExpEngine::Compile(isolate, &zone, &compile_data, flags, pattern,
     340       85743 :                             sample_subject, is_one_byte);
     341       85743 :   if (result.error_message != nullptr) {
     342             :     // Unable to compile regexp.
     343         349 :     if (FLAG_abort_on_stack_or_string_length_overflow &&
     344           0 :         strncmp(result.error_message, "Stack overflow", 15) == 0) {
     345           0 :       FATAL("Aborting on stack overflow");
     346             :     }
     347         698 :     Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
     348         349 :         CStrVector(result.error_message)).ToHandleChecked();
     349         349 :     ThrowRegExpException(isolate, re, error_message);
     350             :     return false;
     351             :   }
     352             : 
     353             :   Handle<FixedArray> data =
     354             :       Handle<FixedArray>(FixedArray::cast(re->data()), isolate);
     355       85394 :   data->set(JSRegExp::code_index(is_one_byte), result.code);
     356       85394 :   SetIrregexpCaptureNameMap(*data, compile_data.capture_name_map);
     357             :   int register_max = IrregexpMaxRegisterCount(*data);
     358       85394 :   if (result.num_registers > register_max) {
     359             :     SetIrregexpMaxRegisterCount(*data, result.num_registers);
     360             :   }
     361             : 
     362             :   return true;
     363             : }
     364             : 
     365           0 : int RegExpImpl::IrregexpMaxRegisterCount(FixedArray re) {
     366             :   return Smi::cast(
     367           0 :       re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
     368             : }
     369             : 
     370           0 : void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray re, int value) {
     371             :   re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));
     372           0 : }
     373             : 
     374       85394 : void RegExpImpl::SetIrregexpCaptureNameMap(FixedArray re,
     375             :                                            Handle<FixedArray> value) {
     376       85394 :   if (value.is_null()) {
     377       85034 :     re->set(JSRegExp::kIrregexpCaptureNameMapIndex, Smi::kZero);
     378             :   } else {
     379         360 :     re->set(JSRegExp::kIrregexpCaptureNameMapIndex, *value);
     380             :   }
     381       85394 : }
     382             : 
     383           0 : int RegExpImpl::IrregexpNumberOfCaptures(FixedArray re) {
     384           0 :   return Smi::ToInt(re->get(JSRegExp::kIrregexpCaptureCountIndex));
     385             : }
     386             : 
     387           0 : int RegExpImpl::IrregexpNumberOfRegisters(FixedArray re) {
     388           0 :   return Smi::ToInt(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex));
     389             : }
     390             : 
     391           0 : ByteArray RegExpImpl::IrregexpByteCode(FixedArray re, bool is_one_byte) {
     392           0 :   return ByteArray::cast(re->get(JSRegExp::code_index(is_one_byte)));
     393             : }
     394             : 
     395           0 : Code RegExpImpl::IrregexpNativeCode(FixedArray re, bool is_one_byte) {
     396           0 :   return Code::cast(re->get(JSRegExp::code_index(is_one_byte)));
     397             : }
     398             : 
     399           0 : void RegExpImpl::IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
     400             :                                     Handle<String> pattern,
     401             :                                     JSRegExp::Flags flags, int capture_count) {
     402             :   // Initialize compiled code entries to null.
     403             :   isolate->factory()->SetRegExpIrregexpData(re, JSRegExp::IRREGEXP, pattern,
     404       85576 :                                             flags, capture_count);
     405           0 : }
     406             : 
     407     4357818 : int RegExpImpl::IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
     408             :                                 Handle<String> subject) {
     409             :   DCHECK(subject->IsFlat());
     410             : 
     411             :   // Check representation of the underlying storage.
     412     4357818 :   bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
     413     4357818 :   if (!EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte)) return -1;
     414             : 
     415     4357444 :   if (FLAG_regexp_interpret_all) {
     416             :     // Byte-code regexp needs space allocated for all its registers.
     417             :     // The result captures are copied to the start of the registers array
     418             :     // if the match succeeds.  This way those registers are not clobbered
     419             :     // when we set the last match info from last successful match.
     420             :     return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +
     421     4251888 :            (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
     422             :   } else {
     423             :     // Native regexp only needs room to output captures. Registers are handled
     424             :     // internally.
     425      105556 :     return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
     426             :   }
     427             : }
     428             : 
     429     4393453 : int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
     430             :                                 Handle<String> subject, int index,
     431             :                                 int32_t* output, int output_size) {
     432             :   Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
     433             : 
     434             :   DCHECK_LE(0, index);
     435             :   DCHECK_LE(index, subject->length());
     436             :   DCHECK(subject->IsFlat());
     437             : 
     438     4393453 :   bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
     439             : 
     440     4393453 :   if (!FLAG_regexp_interpret_all) {
     441             :     DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
     442           4 :     do {
     443      111413 :       EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
     444             :       Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);
     445             :       // The stack is used to allocate registers for the compiled regexp code.
     446             :       // This means that in case of failure, the output registers array is left
     447             :       // untouched and contains the capture results from the previous successful
     448             :       // match.  We can use that to set the last match info lazily.
     449             :       int res = NativeRegExpMacroAssembler::Match(code, subject, output,
     450      111413 :                                                   output_size, index, isolate);
     451      111413 :       if (res != NativeRegExpMacroAssembler::RETRY) {
     452             :         DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION ||
     453             :                isolate->has_pending_exception());
     454             :         STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) ==
     455             :                       RE_SUCCESS);
     456             :         STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::FAILURE) ==
     457             :                       RE_FAILURE);
     458             :         STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) ==
     459             :                       RE_EXCEPTION);
     460      111409 :         return res;
     461             :       }
     462             :       // If result is RETRY, the string has changed representation, and we
     463             :       // must restart from scratch.
     464             :       // In this case, it means we must make sure we are prepared to handle
     465             :       // the, potentially, different subject (the string can switch between
     466             :       // being internal and external, and even between being Latin1 and UC16,
     467             :       // but the characters are always the same).
     468           4 :       IrregexpPrepare(isolate, regexp, subject);
     469           4 :       is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
     470             :     } while (true);
     471             :     UNREACHABLE();
     472             :   } else {
     473             :     DCHECK(FLAG_regexp_interpret_all);
     474             :     DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp));
     475             :     // We must have done EnsureCompiledIrregexp, so we can get the number of
     476             :     // registers.
     477             :     int number_of_capture_registers =
     478     4282044 :         (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
     479     4282044 :     int32_t* raw_output = &output[number_of_capture_registers];
     480             : 
     481           1 :     do {
     482             :       // We do not touch the actual capture result registers until we know there
     483             :       // has been a match so that we can use those capture results to set the
     484             :       // last match info.
     485    13135271 :       for (int i = number_of_capture_registers - 1; i >= 0; i--) {
     486     8853226 :         raw_output[i] = -1;
     487             :       }
     488             :       Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
     489             :                                    isolate);
     490             : 
     491             :       IrregexpInterpreter::Result result = IrregexpInterpreter::Match(
     492     4282045 :           isolate, byte_codes, subject, raw_output, index);
     493             :       DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
     494             :                      isolate->has_pending_exception());
     495             : 
     496     4282045 :       switch (result) {
     497             :         case IrregexpInterpreter::SUCCESS:
     498             :           // Copy capture results to the start of the registers array.
     499             :           MemCopy(output, raw_output,
     500             :                   number_of_capture_registers * sizeof(int32_t));
     501     8389394 :           return result;
     502             :         case IrregexpInterpreter::EXCEPTION:
     503             :         case IrregexpInterpreter::FAILURE:
     504             :           return result;
     505             :         case IrregexpInterpreter::RETRY:
     506             :           // The string has changed representation, and we must restart the
     507             :           // match.
     508           1 :           is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
     509           1 :           EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
     510           1 :           break;
     511             :       }
     512             :     } while (true);
     513             :     UNREACHABLE();
     514             :   }
     515             : }
     516             : 
     517     4350402 : MaybeHandle<Object> RegExpImpl::IrregexpExec(
     518             :     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
     519             :     int previous_index, Handle<RegExpMatchInfo> last_match_info) {
     520             :   DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
     521             : 
     522     4350402 :   subject = String::Flatten(isolate, subject);
     523             : 
     524             :   // Prepare space for the return values.
     525             : #ifdef DEBUG
     526             :   if (FLAG_regexp_interpret_all && FLAG_trace_regexp_bytecodes) {
     527             :     String pattern = regexp->Pattern();
     528             :     PrintF("\n\nRegexp match:   /%s/\n\n", pattern->ToCString().get());
     529             :     PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get());
     530             :   }
     531             : #endif
     532             :   int required_registers =
     533     4350402 :       RegExpImpl::IrregexpPrepare(isolate, regexp, subject);
     534     4350402 :   if (required_registers < 0) {
     535             :     // Compiling failed with an exception.
     536             :     DCHECK(isolate->has_pending_exception());
     537         279 :     return MaybeHandle<Object>();
     538             :   }
     539             : 
     540             :   int32_t* output_registers = nullptr;
     541     4350123 :   if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) {
     542        2844 :     output_registers = NewArray<int32_t>(required_registers);
     543             :   }
     544             :   std::unique_ptr<int32_t[]> auto_release(output_registers);
     545     4350123 :   if (output_registers == nullptr) {
     546             :     output_registers = isolate->jsregexp_static_offsets_vector();
     547             :   }
     548             : 
     549             :   int res =
     550             :       RegExpImpl::IrregexpExecRaw(isolate, regexp, subject, previous_index,
     551     4350123 :                                   output_registers, required_registers);
     552     4350123 :   if (res == RE_SUCCESS) {
     553             :     int capture_count =
     554             :         IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
     555             :     return SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
     556     4161191 :                             output_registers);
     557             :   }
     558      188932 :   if (res == RE_EXCEPTION) {
     559             :     DCHECK(isolate->has_pending_exception());
     560          59 :     return MaybeHandle<Object>();
     561             :   }
     562             :   DCHECK(res == RE_FAILURE);
     563      188873 :   return isolate->factory()->null_value();
     564             : }
     565             : 
     566     4255951 : Handle<RegExpMatchInfo> RegExpImpl::SetLastMatchInfo(
     567             :     Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
     568             :     Handle<String> subject, int capture_count, int32_t* match) {
     569             :   // This is the only place where match infos can grow. If, after executing the
     570             :   // regexp, RegExpExecStub finds that the match info is too small, it restarts
     571             :   // execution in RegExpImpl::Exec, which finally grows the match info right
     572             :   // here.
     573             : 
     574     4255951 :   int capture_register_count = (capture_count + 1) * 2;
     575             :   Handle<RegExpMatchInfo> result = RegExpMatchInfo::ReserveCaptures(
     576     4255951 :       isolate, last_match_info, capture_register_count);
     577             :   result->SetNumberOfCaptureRegisters(capture_register_count);
     578             : 
     579     4255951 :   if (*result != *last_match_info) {
     580        4256 :     if (*last_match_info == *isolate->regexp_last_match_info()) {
     581             :       // This inner condition is only needed for special situations like the
     582             :       // regexp fuzzer, where we pass our own custom RegExpMatchInfo to
     583             :       // RegExpImpl::Exec; there actually want to bypass the Isolate's match
     584             :       // info and execute the regexp without side effects.
     585        4256 :       isolate->native_context()->set_regexp_last_match_info(*result);
     586             :     }
     587             :   }
     588             : 
     589             :   DisallowHeapAllocation no_allocation;
     590     4255951 :   if (match != nullptr) {
     591    15173507 :     for (int i = 0; i < capture_register_count; i += 2) {
     592     5458778 :       result->SetCapture(i, match[i]);
     593     5458778 :       result->SetCapture(i + 1, match[i + 1]);
     594             :     }
     595             :   }
     596     8511902 :   result->SetLastSubject(*subject);
     597     8511902 :   result->SetLastInput(*subject);
     598     4255951 :   return result;
     599             : }
     600             : 
     601       95498 : RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
     602             :                                      Handle<String> subject, Isolate* isolate)
     603             :     : register_array_(nullptr),
     604             :       register_array_size_(0),
     605             :       regexp_(regexp),
     606             :       subject_(subject),
     607       95498 :       isolate_(isolate) {
     608       95498 :   bool interpreted = FLAG_regexp_interpret_all;
     609             : 
     610       95498 :   if (regexp_->TypeTag() == JSRegExp::ATOM) {
     611             :     static const int kAtomRegistersPerMatch = 2;
     612       90255 :     registers_per_match_ = kAtomRegistersPerMatch;
     613             :     // There is no distinction between interpreted and native for atom regexps.
     614             :     interpreted = false;
     615             :   } else {
     616             :     registers_per_match_ =
     617        5243 :         RegExpImpl::IrregexpPrepare(isolate_, regexp_, subject_);
     618        5243 :     if (registers_per_match_ < 0) {
     619          95 :       num_matches_ = -1;  // Signal exception.
     620          95 :       return;
     621             :     }
     622             :   }
     623             : 
     624             :   DCHECK(IsGlobal(regexp->GetFlags()));
     625       95403 :   if (!interpreted) {
     626             :     register_array_size_ =
     627      189494 :         Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
     628       94747 :     max_matches_ = register_array_size_ / registers_per_match_;
     629             :   } else {
     630             :     // Global loop in interpreted regexp is not implemented.  We choose
     631             :     // the size of the offsets vector so that it can only store one match.
     632         656 :     register_array_size_ = registers_per_match_;
     633         656 :     max_matches_ = 1;
     634             :   }
     635             : 
     636       95403 :   if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
     637        1072 :     register_array_ = NewArray<int32_t>(register_array_size_);
     638             :   } else {
     639       94331 :     register_array_ = isolate->jsregexp_static_offsets_vector();
     640             :   }
     641             : 
     642             :   // Set state so that fetching the results the first time triggers a call
     643             :   // to the compiled regexp.
     644       95403 :   current_match_index_ = max_matches_ - 1;
     645       95403 :   num_matches_ = max_matches_;
     646             :   DCHECK_LE(2, registers_per_match_);  // Each match has at least one capture.
     647             :   DCHECK_GE(register_array_size_, registers_per_match_);
     648             :   int32_t* last_match =
     649       95403 :       &register_array_[current_match_index_ * registers_per_match_];
     650       95403 :   last_match[0] = -1;
     651       95403 :   last_match[1] = 0;
     652             : }
     653             : 
     654           7 : int RegExpImpl::GlobalCache::AdvanceZeroLength(int last_index) {
     655           7 :   if (IsUnicode(regexp_->GetFlags()) && last_index + 1 < subject_->length() &&
     656           7 :       unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) &&
     657             :       unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) {
     658             :     // Advance over the surrogate pair.
     659           0 :     return last_index + 2;
     660             :   }
     661           7 :   return last_index + 1;
     662             : }
     663             : 
     664             : // -------------------------------------------------------------------
     665             : // Implementation of the Irregexp regular expression engine.
     666             : //
     667             : // The Irregexp regular expression engine is intended to be a complete
     668             : // implementation of ECMAScript regular expressions.  It generates either
     669             : // bytecodes or native code.
     670             : 
     671             : //   The Irregexp regexp engine is structured in three steps.
     672             : //   1) The parser generates an abstract syntax tree.  See ast.cc.
     673             : //   2) From the AST a node network is created.  The nodes are all
     674             : //      subclasses of RegExpNode.  The nodes represent states when
     675             : //      executing a regular expression.  Several optimizations are
     676             : //      performed on the node network.
     677             : //   3) From the nodes we generate either byte codes or native code
     678             : //      that can actually execute the regular expression (perform
     679             : //      the search).  The code generation step is described in more
     680             : //      detail below.
     681             : 
     682             : // Code generation.
     683             : //
     684             : //   The nodes are divided into four main categories.
     685             : //   * Choice nodes
     686             : //        These represent places where the regular expression can
     687             : //        match in more than one way.  For example on entry to an
     688             : //        alternation (foo|bar) or a repetition (*, +, ? or {}).
     689             : //   * Action nodes
     690             : //        These represent places where some action should be
     691             : //        performed.  Examples include recording the current position
     692             : //        in the input string to a register (in order to implement
     693             : //        captures) or other actions on register for example in order
     694             : //        to implement the counters needed for {} repetitions.
     695             : //   * Matching nodes
     696             : //        These attempt to match some element part of the input string.
     697             : //        Examples of elements include character classes, plain strings
     698             : //        or back references.
     699             : //   * End nodes
     700             : //        These are used to implement the actions required on finding
     701             : //        a successful match or failing to find a match.
     702             : //
     703             : //   The code generated (whether as byte codes or native code) maintains
     704             : //   some state as it runs.  This consists of the following elements:
     705             : //
     706             : //   * The capture registers.  Used for string captures.
     707             : //   * Other registers.  Used for counters etc.
     708             : //   * The current position.
     709             : //   * The stack of backtracking information.  Used when a matching node
     710             : //     fails to find a match and needs to try an alternative.
     711             : //
     712             : // Conceptual regular expression execution model:
     713             : //
     714             : //   There is a simple conceptual model of regular expression execution
     715             : //   which will be presented first.  The actual code generated is a more
     716             : //   efficient simulation of the simple conceptual model:
     717             : //
     718             : //   * Choice nodes are implemented as follows:
     719             : //     For each choice except the last {
     720             : //       push current position
     721             : //       push backtrack code location
     722             : //       <generate code to test for choice>
     723             : //       backtrack code location:
     724             : //       pop current position
     725             : //     }
     726             : //     <generate code to test for last choice>
     727             : //
     728             : //   * Actions nodes are generated as follows
     729             : //     <push affected registers on backtrack stack>
     730             : //     <generate code to perform action>
     731             : //     push backtrack code location
     732             : //     <generate code to test for following nodes>
     733             : //     backtrack code location:
     734             : //     <pop affected registers to restore their state>
     735             : //     <pop backtrack location from stack and go to it>
     736             : //
     737             : //   * Matching nodes are generated as follows:
     738             : //     if input string matches at current position
     739             : //       update current position
     740             : //       <generate code to test for following nodes>
     741             : //     else
     742             : //       <pop backtrack location from stack and go to it>
     743             : //
     744             : //   Thus it can be seen that the current position is saved and restored
     745             : //   by the choice nodes, whereas the registers are saved and restored by
     746             : //   by the action nodes that manipulate them.
     747             : //
     748             : //   The other interesting aspect of this model is that nodes are generated
     749             : //   at the point where they are needed by a recursive call to Emit().  If
     750             : //   the node has already been code generated then the Emit() call will
     751             : //   generate a jump to the previously generated code instead.  In order to
     752             : //   limit recursion it is possible for the Emit() function to put the node
     753             : //   on a work list for later generation and instead generate a jump.  The
     754             : //   destination of the jump is resolved later when the code is generated.
     755             : //
     756             : // Actual regular expression code generation.
     757             : //
     758             : //   Code generation is actually more complicated than the above.  In order
     759             : //   to improve the efficiency of the generated code some optimizations are
     760             : //   performed
     761             : //
     762             : //   * Choice nodes have 1-character lookahead.
     763             : //     A choice node looks at the following character and eliminates some of
     764             : //     the choices immediately based on that character.  This is not yet
     765             : //     implemented.
     766             : //   * Simple greedy loops store reduced backtracking information.
     767             : //     A quantifier like /.*foo/m will greedily match the whole input.  It will
     768             : //     then need to backtrack to a point where it can match "foo".  The naive
     769             : //     implementation of this would push each character position onto the
     770             : //     backtracking stack, then pop them off one by one.  This would use space
     771             : //     proportional to the length of the input string.  However since the "."
     772             : //     can only match in one way and always has a constant length (in this case
     773             : //     of 1) it suffices to store the current position on the top of the stack
     774             : //     once.  Matching now becomes merely incrementing the current position and
     775             : //     backtracking becomes decrementing the current position and checking the
     776             : //     result against the stored current position.  This is faster and saves
     777             : //     space.
     778             : //   * The current state is virtualized.
     779             : //     This is used to defer expensive operations until it is clear that they
     780             : //     are needed and to generate code for a node more than once, allowing
     781             : //     specialized an efficient versions of the code to be created. This is
     782             : //     explained in the section below.
     783             : //
     784             : // Execution state virtualization.
     785             : //
     786             : //   Instead of emitting code, nodes that manipulate the state can record their
     787             : //   manipulation in an object called the Trace.  The Trace object can record a
     788             : //   current position offset, an optional backtrack code location on the top of
     789             : //   the virtualized backtrack stack and some register changes.  When a node is
     790             : //   to be emitted it can flush the Trace or update it.  Flushing the Trace
     791             : //   will emit code to bring the actual state into line with the virtual state.
     792             : //   Avoiding flushing the state can postpone some work (e.g. updates of capture
     793             : //   registers).  Postponing work can save time when executing the regular
     794             : //   expression since it may be found that the work never has to be done as a
     795             : //   failure to match can occur.  In addition it is much faster to jump to a
     796             : //   known backtrack code location than it is to pop an unknown backtrack
     797             : //   location from the stack and jump there.
     798             : //
     799             : //   The virtual state found in the Trace affects code generation.  For example
     800             : //   the virtual state contains the difference between the actual current
     801             : //   position and the virtual current position, and matching code needs to use
     802             : //   this offset to attempt a match in the correct location of the input
     803             : //   string.  Therefore code generated for a non-trivial trace is specialized
     804             : //   to that trace.  The code generator therefore has the ability to generate
     805             : //   code for each node several times.  In order to limit the size of the
     806             : //   generated code there is an arbitrary limit on how many specialized sets of
     807             : //   code may be generated for a given node.  If the limit is reached, the
     808             : //   trace is flushed and a generic version of the code for a node is emitted.
     809             : //   This is subsequently used for that node.  The code emitted for non-generic
     810             : //   trace is not recorded in the node and so it cannot currently be reused in
     811             : //   the event that code generation is requested for an identical trace.
     812             : 
     813             : 
     814           0 : void RegExpTree::AppendToText(RegExpText* text, Zone* zone) {
     815           0 :   UNREACHABLE();
     816             : }
     817             : 
     818             : 
     819       99371 : void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
     820             :   text->AddElement(TextElement::Atom(this), zone);
     821       99371 : }
     822             : 
     823             : 
     824        7635 : void RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) {
     825             :   text->AddElement(TextElement::CharClass(this), zone);
     826        7635 : }
     827             : 
     828             : 
     829           0 : void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
     830           0 :   for (int i = 0; i < elements()->length(); i++)
     831             :     text->AddElement(elements()->at(i), zone);
     832           0 : }
     833             : 
     834             : 
     835           0 : TextElement TextElement::Atom(RegExpAtom* atom) {
     836           0 :   return TextElement(ATOM, atom);
     837             : }
     838             : 
     839             : 
     840           0 : TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
     841           0 :   return TextElement(CHAR_CLASS, char_class);
     842             : }
     843             : 
     844             : 
     845     7445146 : int TextElement::length() const {
     846     7445146 :   switch (text_type()) {
     847             :     case ATOM:
     848     6603781 :       return atom()->length();
     849             : 
     850             :     case CHAR_CLASS:
     851             :       return 1;
     852             :   }
     853           0 :   UNREACHABLE();
     854             : }
     855             : 
     856             : 
     857           0 : DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
     858           0 :   if (table_ == nullptr) {
     859           0 :     table_ = new(zone()) DispatchTable(zone());
     860             :     DispatchTableConstructor cons(table_, ignore_case, zone());
     861             :     cons.BuildTable(this);
     862             :   }
     863           0 :   return table_;
     864             : }
     865             : 
     866             : 
     867             : class FrequencyCollator {
     868             :  public:
     869    11060331 :   FrequencyCollator() : total_samples_(0) {
     870    22034923 :     for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
     871    10974592 :       frequencies_[i] = CharacterFrequency(i);
     872             :     }
     873             :   }
     874             : 
     875             :   void CountCharacter(int character) {
     876      456959 :     int index = (character & RegExpMacroAssembler::kTableMask);
     877             :     frequencies_[index].Increment();
     878      456959 :     total_samples_++;
     879             :   }
     880             : 
     881             :   // Does not measure in percent, but rather per-128 (the table size from the
     882             :   // regexp macro assembler).
     883             :   int Frequency(int in_character) {
     884             :     DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character);
     885      490082 :     if (total_samples_ < 1) return 1;  // Division by zero.
     886             :     int freq_in_per128 =
     887      489817 :         (frequencies_[in_character].counter() * 128) / total_samples_;
     888             :     return freq_in_per128;
     889             :   }
     890             : 
     891             :  private:
     892             :   class CharacterFrequency {
     893             :    public:
     894    10974592 :     CharacterFrequency() : counter_(0), character_(-1) { }
     895             :     explicit CharacterFrequency(int character)
     896             :         : counter_(0), character_(character) { }
     897             : 
     898      456959 :     void Increment() { counter_++; }
     899             :     int counter() { return counter_; }
     900             :     int character() { return character_; }
     901             : 
     902             :    private:
     903             :     int counter_;
     904             :     int character_;
     905             :   };
     906             : 
     907             : 
     908             :  private:
     909             :   CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
     910             :   int total_samples_;
     911             : };
     912             : 
     913             : 
     914             : class RegExpCompiler {
     915             :  public:
     916             :   RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
     917             :                  bool is_one_byte);
     918             : 
     919             :   int AllocateRegister() {
     920      909516 :     if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
     921      310203 :       reg_exp_too_big_ = true;
     922             :       return next_register_;
     923             :     }
     924      599313 :     return next_register_++;
     925             :   }
     926             : 
     927             :   // Lookarounds to match lone surrogates for unicode character class matches
     928             :   // are never nested. We can therefore reuse registers.
     929             :   int UnicodeLookaroundStackRegister() {
     930        2460 :     if (unicode_lookaround_stack_register_ == kNoRegister) {
     931        1040 :       unicode_lookaround_stack_register_ = AllocateRegister();
     932             :     }
     933        2460 :     return unicode_lookaround_stack_register_;
     934             :   }
     935             : 
     936             :   int UnicodeLookaroundPositionRegister() {
     937        2460 :     if (unicode_lookaround_position_register_ == kNoRegister) {
     938        1040 :       unicode_lookaround_position_register_ = AllocateRegister();
     939             :     }
     940        2460 :     return unicode_lookaround_position_register_;
     941             :   }
     942             : 
     943             :   RegExpEngine::CompilationResult Assemble(Isolate* isolate,
     944             :                                            RegExpMacroAssembler* assembler,
     945             :                                            RegExpNode* start, int capture_count,
     946             :                                            Handle<String> pattern);
     947             : 
     948      594380 :   inline void AddWork(RegExpNode* node) {
     949      949519 :     if (!node->on_work_list() && !node->label()->is_bound()) {
     950             :       node->set_on_work_list(true);
     951      210984 :       work_list_->push_back(node);
     952             :     }
     953      594380 :   }
     954             : 
     955             :   static const int kImplementationOffset = 0;
     956             :   static const int kNumberOfRegistersOffset = 0;
     957             :   static const int kCodeOffset = 1;
     958             : 
     959             :   RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
     960             :   EndNode* accept() { return accept_; }
     961             : 
     962             :   static const int kMaxRecursion = 100;
     963             :   inline int recursion_depth() { return recursion_depth_; }
     964     1000487 :   inline void IncrementRecursionDepth() { recursion_depth_++; }
     965     1000487 :   inline void DecrementRecursionDepth() { recursion_depth_--; }
     966             : 
     967           0 :   void SetRegExpTooBig() { reg_exp_too_big_ = true; }
     968             : 
     969             :   inline bool one_byte() { return one_byte_; }
     970             :   inline bool optimize() { return optimize_; }
     971       84574 :   inline void set_optimize(bool value) { optimize_ = value; }
     972             :   inline bool limiting_recursion() { return limiting_recursion_; }
     973             :   inline void set_limiting_recursion(bool value) {
     974      957024 :     limiting_recursion_ = value;
     975             :   }
     976             :   bool read_backward() { return read_backward_; }
     977        3336 :   void set_read_backward(bool value) { read_backward_ = value; }
     978             :   FrequencyCollator* frequency_collator() { return &frequency_collator_; }
     979             : 
     980             :   int current_expansion_factor() { return current_expansion_factor_; }
     981             :   void set_current_expansion_factor(int value) {
     982       85695 :     current_expansion_factor_ = value;
     983             :   }
     984             : 
     985             :   Isolate* isolate() const { return isolate_; }
     986             :   Zone* zone() const { return zone_; }
     987             : 
     988             :   static const int kNoRegister = -1;
     989             : 
     990             :  private:
     991             :   EndNode* accept_;
     992             :   int next_register_;
     993             :   int unicode_lookaround_stack_register_;
     994             :   int unicode_lookaround_position_register_;
     995             :   std::vector<RegExpNode*>* work_list_;
     996             :   int recursion_depth_;
     997             :   RegExpMacroAssembler* macro_assembler_;
     998             :   bool one_byte_;
     999             :   bool reg_exp_too_big_;
    1000             :   bool limiting_recursion_;
    1001             :   bool optimize_;
    1002             :   bool read_backward_;
    1003             :   int current_expansion_factor_;
    1004             :   FrequencyCollator frequency_collator_;
    1005             :   Isolate* isolate_;
    1006             :   Zone* zone_;
    1007             : };
    1008             : 
    1009             : 
    1010             : class RecursionCheck {
    1011             :  public:
    1012             :   explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
    1013             :     compiler->IncrementRecursionDepth();
    1014             :   }
    1015             :   ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
    1016             :  private:
    1017             :   RegExpCompiler* compiler_;
    1018             : };
    1019             : 
    1020             : 
    1021             : static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {
    1022             :   return RegExpEngine::CompilationResult(isolate, "RegExp too big");
    1023             : }
    1024             : 
    1025             : 
    1026             : // Attempts to compile the regexp using an Irregexp code generator.  Returns
    1027             : // a fixed array or a null handle depending on whether it succeeded.
    1028       85739 : RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
    1029             :                                bool one_byte)
    1030       85739 :     : next_register_(2 * (capture_count + 1)),
    1031             :       unicode_lookaround_stack_register_(kNoRegister),
    1032             :       unicode_lookaround_position_register_(kNoRegister),
    1033             :       work_list_(nullptr),
    1034             :       recursion_depth_(0),
    1035             :       one_byte_(one_byte),
    1036             :       reg_exp_too_big_(false),
    1037             :       limiting_recursion_(false),
    1038             :       optimize_(FLAG_regexp_optimization),
    1039             :       read_backward_(false),
    1040             :       current_expansion_factor_(1),
    1041             :       frequency_collator_(),
    1042             :       isolate_(isolate),
    1043      171478 :       zone_(zone) {
    1044       85739 :   accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
    1045             :   DCHECK_GE(RegExpMacroAssembler::kMaxRegister, next_register_ - 1);
    1046       85739 : }
    1047             : 
    1048       85399 : RegExpEngine::CompilationResult RegExpCompiler::Assemble(
    1049             :     Isolate* isolate, RegExpMacroAssembler* macro_assembler, RegExpNode* start,
    1050             :     int capture_count, Handle<String> pattern) {
    1051             : #ifdef DEBUG
    1052             :   if (FLAG_trace_regexp_assembler)
    1053             :     macro_assembler_ = new RegExpMacroAssemblerTracer(isolate, macro_assembler);
    1054             :   else
    1055             : #endif
    1056       85399 :     macro_assembler_ = macro_assembler;
    1057             : 
    1058             :   std::vector<RegExpNode*> work_list;
    1059       85399 :   work_list_ = &work_list;
    1060       85399 :   Label fail;
    1061       85399 :   macro_assembler_->PushBacktrack(&fail);
    1062             :   Trace new_trace;
    1063       85399 :   start->Emit(this, &new_trace);
    1064       85399 :   macro_assembler_->Bind(&fail);
    1065       85399 :   macro_assembler_->Fail();
    1066      296383 :   while (!work_list.empty()) {
    1067      210984 :     RegExpNode* node = work_list.back();
    1068             :     work_list.pop_back();
    1069             :     node->set_on_work_list(false);
    1070      210984 :     if (!node->label()->is_bound()) node->Emit(this, &new_trace);
    1071             :   }
    1072       85399 :   if (reg_exp_too_big_) {
    1073           0 :     macro_assembler_->AbortedCodeGeneration();
    1074           0 :     return IrregexpRegExpTooBig(isolate_);
    1075             :   }
    1076             : 
    1077       85399 :   Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
    1078      170798 :   isolate->IncreaseTotalRegexpCodeGenerated(code->Size());
    1079       85399 :   work_list_ = nullptr;
    1080             : #ifdef ENABLE_DISASSEMBLER
    1081             :   if (FLAG_print_code && !FLAG_regexp_interpret_all) {
    1082             :     CodeTracer::Scope trace_scope(isolate->GetCodeTracer());
    1083             :     OFStream os(trace_scope.file());
    1084             :     Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os);
    1085             :   }
    1086             : #endif
    1087             : #ifdef DEBUG
    1088             :   if (FLAG_trace_regexp_assembler) {
    1089             :     delete macro_assembler_;
    1090             :   }
    1091             : #endif
    1092       85399 :   return RegExpEngine::CompilationResult(*code, next_register_);
    1093             : }
    1094             : 
    1095             : 
    1096           0 : bool Trace::DeferredAction::Mentions(int that) {
    1097     2465627 :   if (action_type() == ActionNode::CLEAR_CAPTURES) {
    1098             :     Interval range = static_cast<DeferredClearCaptures*>(this)->range();
    1099             :     return range.Contains(that);
    1100             :   } else {
    1101     2418147 :     return reg() == that;
    1102             :   }
    1103             : }
    1104             : 
    1105             : 
    1106           0 : bool Trace::mentions_reg(int reg) {
    1107           0 :   for (DeferredAction* action = actions_; action != nullptr;
    1108             :        action = action->next()) {
    1109           0 :     if (action->Mentions(reg))
    1110             :       return true;
    1111             :   }
    1112             :   return false;
    1113             : }
    1114             : 
    1115             : 
    1116         973 : bool Trace::GetStoredPosition(int reg, int* cp_offset) {
    1117             :   DCHECK_EQ(0, *cp_offset);
    1118        1516 :   for (DeferredAction* action = actions_; action != nullptr;
    1119             :        action = action->next()) {
    1120         953 :     if (action->Mentions(reg)) {
    1121         410 :       if (action->action_type() == ActionNode::STORE_POSITION) {
    1122         410 :         *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
    1123         410 :         return true;
    1124             :       } else {
    1125             :         return false;
    1126             :       }
    1127             :     }
    1128             :   }
    1129             :   return false;
    1130             : }
    1131             : 
    1132             : 
    1133      512388 : int Trace::FindAffectedRegisters(OutSet* affected_registers,
    1134             :                                  Zone* zone) {
    1135             :   int max_register = RegExpCompiler::kNoRegister;
    1136      933064 :   for (DeferredAction* action = actions_; action != nullptr;
    1137             :        action = action->next()) {
    1138      420676 :     if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
    1139             :       Interval range = static_cast<DeferredClearCaptures*>(action)->range();
    1140       93130 :       for (int i = range.from(); i <= range.to(); i++)
    1141       45084 :         affected_registers->Set(i, zone);
    1142        2962 :       if (range.to() > max_register) max_register = range.to();
    1143             :     } else {
    1144      417714 :       affected_registers->Set(action->reg(), zone);
    1145      417714 :       if (action->reg() > max_register) max_register = action->reg();
    1146             :     }
    1147             :   }
    1148      512388 :   return max_register;
    1149             : }
    1150             : 
    1151             : 
    1152      512388 : void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
    1153             :                                      int max_register,
    1154             :                                      const OutSet& registers_to_pop,
    1155             :                                      const OutSet& registers_to_clear) {
    1156    20425968 :   for (int reg = max_register; reg >= 0; reg--) {
    1157    19913580 :     if (registers_to_pop.Get(reg)) {
    1158       52908 :       assembler->PopRegister(reg);
    1159     9903882 :     } else if (registers_to_clear.Get(reg)) {
    1160             :       int clear_to = reg;
    1161      472005 :       while (reg > 0 && registers_to_clear.Get(reg - 1)) {
    1162      105531 :         reg--;
    1163             :       }
    1164       77706 :       assembler->ClearRegisters(reg, clear_to);
    1165             :     }
    1166             :   }
    1167      512388 : }
    1168             : 
    1169             : 
    1170      512388 : void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
    1171             :                                    int max_register,
    1172             :                                    const OutSet& affected_registers,
    1173             :                                    OutSet* registers_to_pop,
    1174             :                                    OutSet* registers_to_clear,
    1175             :                                    Zone* zone) {
    1176             :   // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
    1177      512388 :   const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
    1178             : 
    1179             :   // Count pushes performed to force a stack limit check occasionally.
    1180             :   int pushes = 0;
    1181             : 
    1182    20637030 :   for (int reg = 0; reg <= max_register; reg++) {
    1183    20124642 :     if (!affected_registers.Get(reg)) {
    1184             :       continue;
    1185             :     }
    1186             : 
    1187             :     // The chronologically first deferred action in the trace
    1188             :     // is used to infer the action needed to restore a register
    1189             :     // to its previous state (or not, if it's safe to ignore it).
    1190             :     enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
    1191             :     DeferredActionUndoType undo_action = IGNORE;
    1192             : 
    1193             :     int value = 0;
    1194             :     bool absolute = false;
    1195             :     bool clear = false;
    1196             :     static const int kNoStore = kMinInt;
    1197             :     int store_position = kNoStore;
    1198             :     // This is a little tricky because we are scanning the actions in reverse
    1199             :     // historical order (newest first).
    1200     2920901 :     for (DeferredAction* action = actions_; action != nullptr;
    1201             :          action = action->next()) {
    1202     2464674 :       if (action->Mentions(reg)) {
    1203      462798 :         switch (action->action_type()) {
    1204             :           case ActionNode::SET_REGISTER: {
    1205             :             Trace::DeferredSetRegister* psr =
    1206             :                 static_cast<Trace::DeferredSetRegister*>(action);
    1207        3471 :             if (!absolute) {
    1208        3471 :               value += psr->value();
    1209             :               absolute = true;
    1210             :             }
    1211             :             // SET_REGISTER is currently only used for newly introduced loop
    1212             :             // counters. They can have a significant previous value if they
    1213             :             // occur in a loop. TODO(lrn): Propagate this information, so
    1214             :             // we can set undo_action to IGNORE if we know there is no value to
    1215             :             // restore.
    1216             :             undo_action = RESTORE;
    1217             :             DCHECK_EQ(store_position, kNoStore);
    1218             :             DCHECK(!clear);
    1219             :             break;
    1220             :           }
    1221             :           case ActionNode::INCREMENT_REGISTER:
    1222        3750 :             if (!absolute) {
    1223        3750 :               value++;
    1224             :             }
    1225             :             DCHECK_EQ(store_position, kNoStore);
    1226             :             DCHECK(!clear);
    1227             :             undo_action = RESTORE;
    1228             :             break;
    1229             :           case ActionNode::STORE_POSITION: {
    1230             :             Trace::DeferredCapture* pc =
    1231             :                 static_cast<Trace::DeferredCapture*>(action);
    1232      410493 :             if (!clear && store_position == kNoStore) {
    1233             :               store_position = pc->cp_offset();
    1234             :             }
    1235             : 
    1236             :             // For captures we know that stores and clears alternate.
    1237             :             // Other register, are never cleared, and if the occur
    1238             :             // inside a loop, they might be assigned more than once.
    1239      410493 :             if (reg <= 1) {
    1240             :               // Registers zero and one, aka "capture zero", is
    1241             :               // always set correctly if we succeed. There is no
    1242             :               // need to undo a setting on backtrack, because we
    1243             :               // will set it again or fail.
    1244             :               undo_action = IGNORE;
    1245             :             } else {
    1246      190411 :               undo_action = pc->is_capture() ? CLEAR : RESTORE;
    1247             :             }
    1248             :             DCHECK(!absolute);
    1249             :             DCHECK_EQ(value, 0);
    1250             :             break;
    1251             :           }
    1252             :           case ActionNode::CLEAR_CAPTURES: {
    1253             :             // Since we're scanning in reverse order, if we've already
    1254             :             // set the position we have to ignore historically earlier
    1255             :             // clearing operations.
    1256       45084 :             if (store_position == kNoStore) {
    1257             :               clear = true;
    1258             :             }
    1259             :             undo_action = RESTORE;
    1260             :             DCHECK(!absolute);
    1261             :             DCHECK_EQ(value, 0);
    1262             :             break;
    1263             :           }
    1264             :           default:
    1265           0 :             UNREACHABLE();
    1266             :             break;
    1267             :         }
    1268             :       }
    1269             :     }
    1270             :     // Prepare for the undo-action (e.g., push if it's going to be popped).
    1271      456227 :     if (undo_action == RESTORE) {
    1272       52908 :       pushes++;
    1273             :       RegExpMacroAssembler::StackCheckFlag stack_check =
    1274             :           RegExpMacroAssembler::kNoStackLimitCheck;
    1275       52908 :       if (pushes == push_limit) {
    1276             :         stack_check = RegExpMacroAssembler::kCheckStackLimit;
    1277             :         pushes = 0;
    1278             :       }
    1279             : 
    1280       52908 :       assembler->PushRegister(reg, stack_check);
    1281       52908 :       registers_to_pop->Set(reg, zone);
    1282      403319 :     } else if (undo_action == CLEAR) {
    1283      183237 :       registers_to_clear->Set(reg, zone);
    1284             :     }
    1285             :     // Perform the chronologically last action (or accumulated increment)
    1286             :     // for the register.
    1287      456227 :     if (store_position != kNoStore) {
    1288      410493 :       assembler->WriteCurrentPositionToRegister(reg, store_position);
    1289       45734 :     } else if (clear) {
    1290       38513 :       assembler->ClearRegisters(reg, reg);
    1291        7221 :     } else if (absolute) {
    1292        3471 :       assembler->SetRegister(reg, value);
    1293        3750 :     } else if (value != 0) {
    1294        3750 :       assembler->AdvanceRegister(reg, value);
    1295             :     }
    1296             :   }
    1297      512388 : }
    1298             : 
    1299             : 
    1300             : // This is called as we come into a loop choice node and some other tricky
    1301             : // nodes.  It normalizes the state of the code generator to ensure we can
    1302             : // generate generic code.
    1303      702001 : void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
    1304             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    1305             : 
    1306             :   DCHECK(!is_trivial());
    1307             : 
    1308      702001 :   if (actions_ == nullptr && backtrack() == nullptr) {
    1309             :     // Here we just have some deferred cp advances to fix and we are back to
    1310             :     // a normal situation.  We may also have to forget some information gained
    1311             :     // through a quick check that was already performed.
    1312      189613 :     if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
    1313             :     // Create a new trivial state and generate the node with that.
    1314             :     Trace new_state;
    1315      189613 :     successor->Emit(compiler, &new_state);
    1316             :     return;
    1317             :   }
    1318             : 
    1319             :   // Generate deferred actions here along with code to undo them again.
    1320             :   OutSet affected_registers;
    1321             : 
    1322      512388 :   if (backtrack() != nullptr) {
    1323             :     // Here we have a concrete backtrack location.  These are set up by choice
    1324             :     // nodes and so they indicate that we have a deferred save of the current
    1325             :     // position which we may need to emit here.
    1326      401805 :     assembler->PushCurrentPosition();
    1327             :   }
    1328             : 
    1329             :   int max_register = FindAffectedRegisters(&affected_registers,
    1330      512388 :                                            compiler->zone());
    1331             :   OutSet registers_to_pop;
    1332             :   OutSet registers_to_clear;
    1333             :   PerformDeferredActions(assembler,
    1334             :                          max_register,
    1335             :                          affected_registers,
    1336             :                          &registers_to_pop,
    1337             :                          &registers_to_clear,
    1338      512388 :                          compiler->zone());
    1339      512388 :   if (cp_offset_ != 0) {
    1340      296033 :     assembler->AdvanceCurrentPosition(cp_offset_);
    1341             :   }
    1342             : 
    1343             :   // Create a new trivial state and generate the node with that.
    1344      512388 :   Label undo;
    1345      512388 :   assembler->PushBacktrack(&undo);
    1346      512388 :   if (successor->KeepRecursing(compiler)) {
    1347             :     Trace new_state;
    1348      138699 :     successor->Emit(compiler, &new_state);
    1349             :   } else {
    1350      373689 :     compiler->AddWork(successor);
    1351      747378 :     assembler->GoTo(successor->label());
    1352             :   }
    1353             : 
    1354             :   // On backtrack we need to restore state.
    1355      512388 :   assembler->Bind(&undo);
    1356             :   RestoreAffectedRegisters(assembler,
    1357             :                            max_register,
    1358             :                            registers_to_pop,
    1359      512388 :                            registers_to_clear);
    1360      512388 :   if (backtrack() == nullptr) {
    1361      110583 :     assembler->Backtrack();
    1362             :   } else {
    1363      401805 :     assembler->PopCurrentPosition();
    1364      401805 :     assembler->GoTo(backtrack());
    1365             :   }
    1366             : }
    1367             : 
    1368             : 
    1369        2843 : void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
    1370             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    1371             : 
    1372             :   // Omit flushing the trace. We discard the entire stack frame anyway.
    1373             : 
    1374        2843 :   if (!label()->is_bound()) {
    1375             :     // We are completely independent of the trace, since we ignore it,
    1376             :     // so this code can be used as the generic version.
    1377        5604 :     assembler->Bind(label());
    1378             :   }
    1379             : 
    1380             :   // Throw away everything on the backtrack stack since the start
    1381             :   // of the negative submatch and restore the character position.
    1382        2843 :   assembler->ReadCurrentPositionFromRegister(current_position_register_);
    1383        2843 :   assembler->ReadStackPointerFromRegister(stack_pointer_register_);
    1384        2843 :   if (clear_capture_count_ > 0) {
    1385             :     // Clear any captures that might have been performed during the success
    1386             :     // of the body of the negative look-ahead.
    1387         107 :     int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
    1388         107 :     assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
    1389             :   }
    1390             :   // Now that we have unwound the stack we find at the top of the stack the
    1391             :   // backtrack that the BeginSubmatch node got.
    1392        2843 :   assembler->Backtrack();
    1393        2843 : }
    1394             : 
    1395             : 
    1396      182854 : void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
    1397      182854 :   if (!trace->is_trivial()) {
    1398       91277 :     trace->Flush(compiler, this);
    1399       91277 :     return;
    1400             :   }
    1401             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    1402       91577 :   if (!label()->is_bound()) {
    1403      170776 :     assembler->Bind(label());
    1404             :   }
    1405       91577 :   switch (action_) {
    1406             :     case ACCEPT:
    1407       91277 :       assembler->Succeed();
    1408       91277 :       return;
    1409             :     case BACKTRACK:
    1410         300 :       assembler->GoTo(trace->backtrack());
    1411         300 :       return;
    1412             :     case NEGATIVE_SUBMATCH_SUCCESS:
    1413             :       // This case is handled in a different virtual method.
    1414           0 :       UNREACHABLE();
    1415             :   }
    1416           0 :   UNIMPLEMENTED();
    1417             : }
    1418             : 
    1419             : 
    1420      903942 : void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
    1421     1807884 :   if (guards_ == nullptr) guards_ = new (zone) ZoneList<Guard*>(1, zone);
    1422      903942 :   guards_->Add(guard, zone);
    1423      903942 : }
    1424             : 
    1425             : 
    1426      903365 : ActionNode* ActionNode::SetRegister(int reg,
    1427             :                                     int val,
    1428             :                                     RegExpNode* on_success) {
    1429             :   ActionNode* result =
    1430             :       new(on_success->zone()) ActionNode(SET_REGISTER, on_success);
    1431      903365 :   result->data_.u_store_register.reg = reg;
    1432      903365 :   result->data_.u_store_register.value = val;
    1433      903365 :   return result;
    1434             : }
    1435             : 
    1436             : 
    1437      903365 : ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
    1438             :   ActionNode* result =
    1439             :       new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success);
    1440      903365 :   result->data_.u_increment_register.reg = reg;
    1441      903365 :   return result;
    1442             : }
    1443             : 
    1444             : 
    1445      226407 : ActionNode* ActionNode::StorePosition(int reg,
    1446             :                                       bool is_capture,
    1447             :                                       RegExpNode* on_success) {
    1448             :   ActionNode* result =
    1449             :       new(on_success->zone()) ActionNode(STORE_POSITION, on_success);
    1450      226407 :   result->data_.u_position_register.reg = reg;
    1451      226407 :   result->data_.u_position_register.is_capture = is_capture;
    1452      226407 :   return result;
    1453             : }
    1454             : 
    1455             : 
    1456        2382 : ActionNode* ActionNode::ClearCaptures(Interval range,
    1457             :                                       RegExpNode* on_success) {
    1458             :   ActionNode* result =
    1459             :       new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success);
    1460        2382 :   result->data_.u_clear_captures.range_from = range.from();
    1461        2382 :   result->data_.u_clear_captures.range_to = range.to();
    1462        2382 :   return result;
    1463             : }
    1464             : 
    1465             : 
    1466        4467 : ActionNode* ActionNode::BeginSubmatch(int stack_reg,
    1467             :                                       int position_reg,
    1468             :                                       RegExpNode* on_success) {
    1469             :   ActionNode* result =
    1470             :       new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success);
    1471        4467 :   result->data_.u_submatch.stack_pointer_register = stack_reg;
    1472        4467 :   result->data_.u_submatch.current_position_register = position_reg;
    1473        4467 :   return result;
    1474             : }
    1475             : 
    1476             : 
    1477        1655 : ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
    1478             :                                                 int position_reg,
    1479             :                                                 int clear_register_count,
    1480             :                                                 int clear_register_from,
    1481             :                                                 RegExpNode* on_success) {
    1482             :   ActionNode* result =
    1483             :       new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
    1484        1655 :   result->data_.u_submatch.stack_pointer_register = stack_reg;
    1485        1655 :   result->data_.u_submatch.current_position_register = position_reg;
    1486        1655 :   result->data_.u_submatch.clear_register_count = clear_register_count;
    1487        1655 :   result->data_.u_submatch.clear_register_from = clear_register_from;
    1488        1655 :   return result;
    1489             : }
    1490             : 
    1491             : 
    1492         537 : ActionNode* ActionNode::EmptyMatchCheck(int start_register,
    1493             :                                         int repetition_register,
    1494             :                                         int repetition_limit,
    1495             :                                         RegExpNode* on_success) {
    1496             :   ActionNode* result =
    1497             :       new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success);
    1498         537 :   result->data_.u_empty_match_check.start_register = start_register;
    1499         537 :   result->data_.u_empty_match_check.repetition_register = repetition_register;
    1500         537 :   result->data_.u_empty_match_check.repetition_limit = repetition_limit;
    1501         537 :   return result;
    1502             : }
    1503             : 
    1504             : 
    1505             : #define DEFINE_ACCEPT(Type)                                          \
    1506             :   void Type##Node::Accept(NodeVisitor* visitor) {                    \
    1507             :     visitor->Visit##Type(this);                                      \
    1508             :   }
    1509      727470 : FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
    1510             : #undef DEFINE_ACCEPT
    1511             : 
    1512             : 
    1513      145900 : void LoopChoiceNode::Accept(NodeVisitor* visitor) {
    1514      145900 :   visitor->VisitLoopChoice(this);
    1515      145900 : }
    1516             : 
    1517             : 
    1518             : // -------------------------------------------------------------------
    1519             : // Emit code.
    1520             : 
    1521             : 
    1522        3942 : void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
    1523             :                                Guard* guard,
    1524             :                                Trace* trace) {
    1525        3942 :   switch (guard->op()) {
    1526             :     case Guard::LT:
    1527             :       DCHECK(!trace->mentions_reg(guard->reg()));
    1528             :       macro_assembler->IfRegisterGE(guard->reg(),
    1529             :                                     guard->value(),
    1530        2620 :                                     trace->backtrack());
    1531        2620 :       break;
    1532             :     case Guard::GEQ:
    1533             :       DCHECK(!trace->mentions_reg(guard->reg()));
    1534             :       macro_assembler->IfRegisterLT(guard->reg(),
    1535             :                                     guard->value(),
    1536        1322 :                                     trace->backtrack());
    1537        1322 :       break;
    1538             :   }
    1539        3942 : }
    1540             : 
    1541             : 
    1542             : // Returns the number of characters in the equivalence class, omitting those
    1543             : // that cannot occur in the source string because it is Latin1.
    1544       22320 : static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
    1545             :                                      bool one_byte_subject,
    1546             :                                      unibrow::uchar* letters,
    1547             :                                      int letter_length) {
    1548             : #ifdef V8_INTL_SUPPORT
    1549       44640 :   icu::UnicodeSet set;
    1550       22320 :   set.add(character);
    1551       22320 :   set = set.closeOver(USET_CASE_INSENSITIVE);
    1552       22320 :   int32_t range_count = set.getRangeCount();
    1553             :   int items = 0;
    1554      109048 :   for (int32_t i = 0; i < range_count; i++) {
    1555       43364 :     UChar32 start = set.getRangeStart(i);
    1556       43364 :     UChar32 end = set.getRangeEnd(i);
    1557       43364 :     CHECK(end - start + items <= letter_length);
    1558      130388 :     while (start <= end) {
    1559       44304 :       if (one_byte_subject && start > String::kMaxOneByteCharCode) break;
    1560       43512 :       letters[items++] = (unibrow::uchar)(start);
    1561       43512 :       start++;
    1562             :     }
    1563             :   }
    1564       22320 :   return items;
    1565             : #else
    1566             :   int length =
    1567             :       isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
    1568             :   // Unibrow returns 0 or 1 for characters where case independence is
    1569             :   // trivial.
    1570             :   if (length == 0) {
    1571             :     letters[0] = character;
    1572             :     length = 1;
    1573             :   }
    1574             : 
    1575             :   if (one_byte_subject) {
    1576             :     int new_length = 0;
    1577             :     for (int i = 0; i < length; i++) {
    1578             :       if (letters[i] <= String::kMaxOneByteCharCode) {
    1579             :         letters[new_length++] = letters[i];
    1580             :       }
    1581             :     }
    1582             :     length = new_length;
    1583             :   }
    1584             : 
    1585             :   return length;
    1586             : #endif  // V8_INTL_SUPPORT
    1587             : }
    1588             : 
    1589      584865 : static inline bool EmitSimpleCharacter(Isolate* isolate,
    1590             :                                        RegExpCompiler* compiler,
    1591             :                                        uc16 c,
    1592             :                                        Label* on_failure,
    1593             :                                        int cp_offset,
    1594             :                                        bool check,
    1595             :                                        bool preloaded) {
    1596             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    1597             :   bool bound_checked = false;
    1598      584865 :   if (!preloaded) {
    1599      584865 :     assembler->LoadCurrentCharacter(
    1600             :         cp_offset,
    1601             :         on_failure,
    1602     1169730 :         check);
    1603             :     bound_checked = true;
    1604             :   }
    1605      584865 :   assembler->CheckNotCharacter(c, on_failure);
    1606      584865 :   return bound_checked;
    1607             : }
    1608             : 
    1609             : 
    1610             : // Only emits non-letters (things that don't have case).  Only used for case
    1611             : // independent matches.
    1612        5619 : static inline bool EmitAtomNonLetter(Isolate* isolate,
    1613             :                                      RegExpCompiler* compiler,
    1614             :                                      uc16 c,
    1615             :                                      Label* on_failure,
    1616             :                                      int cp_offset,
    1617             :                                      bool check,
    1618             :                                      bool preloaded) {
    1619             :   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    1620             :   bool one_byte = compiler->one_byte();
    1621             :   unibrow::uchar chars[4];
    1622        5619 :   int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
    1623        5619 :   if (length < 1) {
    1624             :     // This can't match.  Must be an one-byte subject and a non-one-byte
    1625             :     // character.  We do not need to do anything since the one-byte pass
    1626             :     // already handled this.
    1627             :     return false;  // Bounds not checked.
    1628             :   }
    1629             :   bool checked = false;
    1630             :   // We handle the length > 1 case in a later pass.
    1631        5614 :   if (length == 1) {
    1632         371 :     if (one_byte && c > String::kMaxOneByteCharCodeU) {
    1633             :       // Can't match - see above.
    1634             :       return false;  // Bounds not checked.
    1635             :     }
    1636         371 :     if (!preloaded) {
    1637         371 :       macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
    1638             :       checked = check;
    1639             :     }
    1640         371 :     macro_assembler->CheckNotCharacter(c, on_failure);
    1641             :   }
    1642             :   return checked;
    1643             : }
    1644             : 
    1645             : 
    1646        4789 : static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
    1647             :                                       bool one_byte, uc16 c1, uc16 c2,
    1648             :                                       Label* on_failure) {
    1649             :   uc16 char_mask;
    1650        4789 :   if (one_byte) {
    1651             :     char_mask = String::kMaxOneByteCharCode;
    1652             :   } else {
    1653             :     char_mask = String::kMaxUtf16CodeUnit;
    1654             :   }
    1655        4789 :   uc16 exor = c1 ^ c2;
    1656             :   // Check whether exor has only one bit set.
    1657        4789 :   if (((exor - 1) & exor) == 0) {
    1658             :     // If c1 and c2 differ only by one bit.
    1659             :     // Ecma262UnCanonicalize always gives the highest number last.
    1660             :     DCHECK(c2 > c1);
    1661        4699 :     uc16 mask = char_mask ^ exor;
    1662        4699 :     macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
    1663        4699 :     return true;
    1664             :   }
    1665             :   DCHECK(c2 > c1);
    1666          90 :   uc16 diff = c2 - c1;
    1667          90 :   if (((diff - 1) & diff) == 0 && c1 >= diff) {
    1668             :     // If the characters differ by 2^n but don't differ by one bit then
    1669             :     // subtract the difference from the found character, then do the or
    1670             :     // trick.  We avoid the theoretical case where negative numbers are
    1671             :     // involved in order to simplify code generation.
    1672          60 :     uc16 mask = char_mask ^ diff;
    1673          60 :     macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
    1674             :                                                     diff,
    1675             :                                                     mask,
    1676         120 :                                                     on_failure);
    1677          60 :     return true;
    1678             :   }
    1679             :   return false;
    1680             : }
    1681             : 
    1682             : using EmitCharacterFunction = bool(Isolate* isolate, RegExpCompiler* compiler,
    1683             :                                    uc16 c, Label* on_failure, int cp_offset,
    1684             :                                    bool check, bool preloaded);
    1685             : 
    1686             : // Only emits letters (things that have case).  Only used for case independent
    1687             : // matches.
    1688        5619 : static inline bool EmitAtomLetter(Isolate* isolate,
    1689             :                                   RegExpCompiler* compiler,
    1690             :                                   uc16 c,
    1691             :                                   Label* on_failure,
    1692             :                                   int cp_offset,
    1693             :                                   bool check,
    1694             :                                   bool preloaded) {
    1695             :   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    1696             :   bool one_byte = compiler->one_byte();
    1697             :   unibrow::uchar chars[4];
    1698        5619 :   int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
    1699        5619 :   if (length <= 1) return false;
    1700             :   // We may not need to check against the end of the input string
    1701             :   // if this character lies before a character that matched.
    1702        5243 :   if (!preloaded) {
    1703        4882 :     macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
    1704             :   }
    1705        5243 :   Label ok;
    1706        5243 :   switch (length) {
    1707             :     case 2: {
    1708        4789 :       if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
    1709        4789 :                                     chars[1], on_failure)) {
    1710             :       } else {
    1711          30 :         macro_assembler->CheckCharacter(chars[0], &ok);
    1712          30 :         macro_assembler->CheckNotCharacter(chars[1], on_failure);
    1713          30 :         macro_assembler->Bind(&ok);
    1714             :       }
    1715             :       break;
    1716             :     }
    1717             :     case 4:
    1718          60 :       macro_assembler->CheckCharacter(chars[3], &ok);
    1719             :       V8_FALLTHROUGH;
    1720             :     case 3:
    1721         454 :       macro_assembler->CheckCharacter(chars[0], &ok);
    1722         454 :       macro_assembler->CheckCharacter(chars[1], &ok);
    1723         454 :       macro_assembler->CheckNotCharacter(chars[2], on_failure);
    1724         454 :       macro_assembler->Bind(&ok);
    1725         454 :       break;
    1726             :     default:
    1727           0 :       UNREACHABLE();
    1728             :       break;
    1729             :   }
    1730             :   return true;
    1731             : }
    1732             : 
    1733             : 
    1734        8607 : static void EmitBoundaryTest(RegExpMacroAssembler* masm,
    1735             :                              int border,
    1736             :                              Label* fall_through,
    1737             :                              Label* above_or_equal,
    1738             :                              Label* below) {
    1739        8607 :   if (below != fall_through) {
    1740        8242 :     masm->CheckCharacterLT(border, below);
    1741        8242 :     if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
    1742             :   } else {
    1743         365 :     masm->CheckCharacterGT(border - 1, above_or_equal);
    1744             :   }
    1745        8607 : }
    1746             : 
    1747             : 
    1748      161283 : static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
    1749             :                                    int first,
    1750             :                                    int last,
    1751             :                                    Label* fall_through,
    1752             :                                    Label* in_range,
    1753             :                                    Label* out_of_range) {
    1754      161283 :   if (in_range == fall_through) {
    1755      109069 :     if (first == last) {
    1756       15089 :       masm->CheckNotCharacter(first, out_of_range);
    1757             :     } else {
    1758       93980 :       masm->CheckCharacterNotInRange(first, last, out_of_range);
    1759             :     }
    1760             :   } else {
    1761       52214 :     if (first == last) {
    1762       28524 :       masm->CheckCharacter(first, in_range);
    1763             :     } else {
    1764       23690 :       masm->CheckCharacterInRange(first, last, in_range);
    1765             :     }
    1766       52214 :     if (out_of_range != fall_through) masm->GoTo(out_of_range);
    1767             :   }
    1768      161283 : }
    1769             : 
    1770             : 
    1771             : // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
    1772             : // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
    1773        5887 : static void EmitUseLookupTable(
    1774             :     RegExpMacroAssembler* masm,
    1775             :     ZoneList<int>* ranges,
    1776             :     int start_index,
    1777             :     int end_index,
    1778             :     int min_char,
    1779             :     Label* fall_through,
    1780             :     Label* even_label,
    1781             :     Label* odd_label) {
    1782             :   static const int kSize = RegExpMacroAssembler::kTableSize;
    1783             :   static const int kMask = RegExpMacroAssembler::kTableMask;
    1784             : 
    1785             :   int base = (min_char & ~kMask);
    1786             :   USE(base);
    1787             : 
    1788             :   // Assert that everything is on one kTableSize page.
    1789             :   for (int i = start_index; i <= end_index; i++) {
    1790             :     DCHECK_EQ(ranges->at(i) & ~kMask, base);
    1791             :   }
    1792             :   DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base);
    1793             : 
    1794             :   char templ[kSize];
    1795             :   Label* on_bit_set;
    1796             :   Label* on_bit_clear;
    1797             :   int bit;
    1798        5887 :   if (even_label == fall_through) {
    1799             :     on_bit_set = odd_label;
    1800             :     on_bit_clear = even_label;
    1801             :     bit = 1;
    1802             :   } else {
    1803             :     on_bit_set = even_label;
    1804             :     on_bit_clear = odd_label;
    1805             :     bit = 0;
    1806             :   }
    1807      254475 :   for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) {
    1808      124294 :     templ[i] = bit;
    1809             :   }
    1810             :   int j = 0;
    1811        5887 :   bit ^= 1;
    1812       95590 :   for (int i = start_index; i < end_index; i++) {
    1813     1209206 :     for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
    1814      514900 :       templ[j] = bit;
    1815             :     }
    1816       89703 :     bit ^= 1;
    1817             :   }
    1818      234571 :   for (int i = j; i < kSize; i++) {
    1819      114342 :     templ[i] = bit;
    1820             :   }
    1821             :   Factory* factory = masm->isolate()->factory();
    1822             :   // TODO(erikcorry): Cache these.
    1823        5887 :   Handle<ByteArray> ba = factory->NewByteArray(kSize, AllocationType::kOld);
    1824     1512959 :   for (int i = 0; i < kSize; i++) {
    1825      753536 :     ba->set(i, templ[i]);
    1826             :   }
    1827        5887 :   masm->CheckBitInTable(ba, on_bit_set);
    1828        5887 :   if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
    1829        5887 : }
    1830             : 
    1831             : 
    1832       36263 : static void CutOutRange(RegExpMacroAssembler* masm,
    1833             :                         ZoneList<int>* ranges,
    1834             :                         int start_index,
    1835             :                         int end_index,
    1836             :                         int cut_index,
    1837             :                         Label* even_label,
    1838             :                         Label* odd_label) {
    1839       36263 :   bool odd = (((cut_index - start_index) & 1) == 1);
    1840       36263 :   Label* in_range_label = odd ? odd_label : even_label;
    1841       36263 :   Label dummy;
    1842       72526 :   EmitDoubleBoundaryTest(masm,
    1843             :                          ranges->at(cut_index),
    1844       72526 :                          ranges->at(cut_index + 1) - 1,
    1845             :                          &dummy,
    1846             :                          in_range_label,
    1847       36263 :                          &dummy);
    1848             :   DCHECK(!dummy.is_linked());
    1849             :   // Cut out the single range by rewriting the array.  This creates a new
    1850             :   // range that is a merger of the two ranges on either side of the one we
    1851             :   // are cutting out.  The oddity of the labels is preserved.
    1852       75633 :   for (int j = cut_index; j > start_index; j--) {
    1853       39370 :     ranges->at(j) = ranges->at(j - 1);
    1854             :   }
    1855      184239 :   for (int j = cut_index + 1; j < end_index; j++) {
    1856      147976 :     ranges->at(j) = ranges->at(j + 1);
    1857             :   }
    1858       36263 : }
    1859             : 
    1860             : 
    1861             : // Unicode case.  Split the search space into kSize spaces that are handled
    1862             : // with recursion.
    1863       19808 : static void SplitSearchSpace(ZoneList<int>* ranges,
    1864             :                              int start_index,
    1865             :                              int end_index,
    1866             :                              int* new_start_index,
    1867             :                              int* new_end_index,
    1868             :                              int* border) {
    1869             :   static const int kSize = RegExpMacroAssembler::kTableSize;
    1870             :   static const int kMask = RegExpMacroAssembler::kTableMask;
    1871             : 
    1872       19808 :   int first = ranges->at(start_index);
    1873       19808 :   int last = ranges->at(end_index) - 1;
    1874             : 
    1875       19808 :   *new_start_index = start_index;
    1876       19808 :   *border = (ranges->at(start_index) & ~kMask) + kSize;
    1877      274872 :   while (*new_start_index < end_index) {
    1878      146124 :     if (ranges->at(*new_start_index) > *border) break;
    1879      127532 :     (*new_start_index)++;
    1880             :   }
    1881             :   // new_start_index is the index of the first edge that is beyond the
    1882             :   // current kSize space.
    1883             : 
    1884             :   // For very large search spaces we do a binary chop search of the non-Latin1
    1885             :   // space instead of just going to the end of the current kSize space.  The
    1886             :   // heuristics are complicated a little by the fact that any 128-character
    1887             :   // encoding space can be quickly tested with a table lookup, so we don't
    1888             :   // wish to do binary chop search at a smaller granularity than that.  A
    1889             :   // 128-character space can take up a lot of space in the ranges array if,
    1890             :   // for example, we only want to match every second character (eg. the lower
    1891             :   // case characters on some Unicode pages).
    1892       19808 :   int binary_chop_index = (end_index + start_index) / 2;
    1893             :   // The first test ensures that we get to the code that handles the Latin1
    1894             :   // range with a single not-taken branch, speeding up this important
    1895             :   // character range (even non-Latin1 charset-based text has spaces and
    1896             :   // punctuation).
    1897       54537 :   if (*border - 1 > String::kMaxOneByteCharCode &&  // Latin1 case.
    1898       27743 :       end_index - start_index > (*new_start_index - start_index) * 2 &&
    1899       56223 :       last - first > kSize * 2 && binary_chop_index > *new_start_index &&
    1900       23386 :       ranges->at(binary_chop_index) >= first + 2 * kSize) {
    1901             :     int scan_forward_for_section_border = binary_chop_index;;
    1902        9755 :     int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
    1903             : 
    1904      126661 :     while (scan_forward_for_section_border < end_index) {
    1905       66206 :       if (ranges->at(scan_forward_for_section_border) > new_border) {
    1906        7753 :         *new_start_index = scan_forward_for_section_border;
    1907        7753 :         *border = new_border;
    1908        7753 :         break;
    1909             :       }
    1910       58453 :       scan_forward_for_section_border++;
    1911             :     }
    1912             :   }
    1913             : 
    1914             :   DCHECK(*new_start_index > start_index);
    1915       19808 :   *new_end_index = *new_start_index - 1;
    1916       19808 :   if (ranges->at(*new_end_index) == *border) {
    1917        2843 :     (*new_end_index)--;
    1918             :   }
    1919       39616 :   if (*border >= ranges->at(end_index)) {
    1920        1214 :     *border = ranges->at(end_index);
    1921        1214 :     *new_start_index = end_index;  // Won't be used.
    1922        1214 :     *new_end_index = end_index - 1;
    1923             :   }
    1924       19808 : }
    1925             : 
    1926             : // Gets a series of segment boundaries representing a character class.  If the
    1927             : // character is in the range between an even and an odd boundary (counting from
    1928             : // start_index) then go to even_label, otherwise go to odd_label.  We already
    1929             : // know that the character is in the range of min_char to max_char inclusive.
    1930             : // Either label can be nullptr indicating backtracking.  Either label can also
    1931             : // be equal to the fall_through label.
    1932      203831 : static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges,
    1933             :                              int start_index, int end_index, uc32 min_char,
    1934             :                              uc32 max_char, Label* fall_through,
    1935             :                              Label* even_label, Label* odd_label) {
    1936             :   DCHECK_LE(min_char, String::kMaxUtf16CodeUnit);
    1937             :   DCHECK_LE(max_char, String::kMaxUtf16CodeUnit);
    1938             : 
    1939      203831 :   int first = ranges->at(start_index);
    1940      203831 :   int last = ranges->at(end_index) - 1;
    1941             : 
    1942             :   DCHECK_LT(min_char, first);
    1943             : 
    1944             :   // Just need to test if the character is before or on-or-after
    1945             :   // a particular character.
    1946      203831 :   if (start_index == end_index) {
    1947        8607 :     EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
    1948        8607 :     return;
    1949             :   }
    1950             : 
    1951             :   // Another almost trivial case:  There is one interval in the middle that is
    1952             :   // different from the end intervals.
    1953      195224 :   if (start_index + 1 == end_index) {
    1954             :     EmitDoubleBoundaryTest(
    1955      125020 :         masm, first, last, fall_through, even_label, odd_label);
    1956      125020 :     return;
    1957             :   }
    1958             : 
    1959             :   // It's not worth using table lookup if there are very few intervals in the
    1960             :   // character class.
    1961       70204 :   if (end_index - start_index <= 6) {
    1962             :     // It is faster to test for individual characters, so we look for those
    1963             :     // first, then try arbitrary ranges in the second round.
    1964             :     static int kNoCutIndex = -1;
    1965       36263 :     int cut = kNoCutIndex;
    1966      111658 :     for (int i = start_index; i < end_index; i++) {
    1967      189112 :       if (ranges->at(i) == ranges->at(i + 1) - 1) {
    1968             :         cut = i;
    1969             :         break;
    1970             :       }
    1971             :     }
    1972       36263 :     if (cut == kNoCutIndex) cut = start_index;
    1973             :     CutOutRange(
    1974       36263 :         masm, ranges, start_index, end_index, cut, even_label, odd_label);
    1975             :     DCHECK_GE(end_index - start_index, 2);
    1976       36263 :     GenerateBranches(masm,
    1977             :                      ranges,
    1978             :                      start_index + 1,
    1979             :                      end_index - 1,
    1980             :                      min_char,
    1981             :                      max_char,
    1982             :                      fall_through,
    1983             :                      even_label,
    1984       36263 :                      odd_label);
    1985       36263 :     return;
    1986             :   }
    1987             : 
    1988             :   // If there are a lot of intervals in the regexp, then we will use tables to
    1989             :   // determine whether the character is inside or outside the character class.
    1990             :   static const int kBits = RegExpMacroAssembler::kTableSizeBits;
    1991             : 
    1992       33941 :   if ((max_char >> kBits) == (min_char >> kBits)) {
    1993             :     EmitUseLookupTable(masm,
    1994             :                        ranges,
    1995             :                        start_index,
    1996             :                        end_index,
    1997             :                        min_char,
    1998             :                        fall_through,
    1999             :                        even_label,
    2000        5887 :                        odd_label);
    2001        5887 :     return;
    2002             :   }
    2003             : 
    2004       28054 :   if ((min_char >> kBits) != (first >> kBits)) {
    2005        8246 :     masm->CheckCharacterLT(first, odd_label);
    2006             :     GenerateBranches(masm,
    2007             :                      ranges,
    2008             :                      start_index + 1,
    2009             :                      end_index,
    2010             :                      first,
    2011             :                      max_char,
    2012             :                      fall_through,
    2013             :                      odd_label,
    2014        8246 :                      even_label);
    2015        8246 :     return;
    2016             :   }
    2017             : 
    2018       19808 :   int new_start_index = 0;
    2019       19808 :   int new_end_index = 0;
    2020       19808 :   int border = 0;
    2021             : 
    2022             :   SplitSearchSpace(ranges,
    2023             :                    start_index,
    2024             :                    end_index,
    2025             :                    &new_start_index,
    2026             :                    &new_end_index,
    2027       19808 :                    &border);
    2028             : 
    2029       19808 :   Label handle_rest;
    2030             :   Label* above = &handle_rest;
    2031       19808 :   if (border == last + 1) {
    2032             :     // We didn't find any section that started after the limit, so everything
    2033             :     // above the border is one of the terminal labels.
    2034        1214 :     above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
    2035             :     DCHECK(new_end_index == end_index - 1);
    2036             :   }
    2037             : 
    2038             :   DCHECK_LE(start_index, new_end_index);
    2039             :   DCHECK_LE(new_start_index, end_index);
    2040             :   DCHECK_LT(start_index, new_start_index);
    2041             :   DCHECK_LT(new_end_index, end_index);
    2042             :   DCHECK(new_end_index + 1 == new_start_index ||
    2043             :          (new_end_index + 2 == new_start_index &&
    2044             :           border == ranges->at(new_end_index + 1)));
    2045             :   DCHECK_LT(min_char, border - 1);
    2046             :   DCHECK_LT(border, max_char);
    2047             :   DCHECK_LT(ranges->at(new_end_index), border);
    2048             :   DCHECK(border < ranges->at(new_start_index) ||
    2049             :          (border == ranges->at(new_start_index) &&
    2050             :           new_start_index == end_index &&
    2051             :           new_end_index == end_index - 1 &&
    2052             :           border == last + 1));
    2053             :   DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1));
    2054             : 
    2055       19808 :   masm->CheckCharacterGT(border - 1, above);
    2056       19808 :   Label dummy;
    2057       19808 :   GenerateBranches(masm,
    2058             :                    ranges,
    2059             :                    start_index,
    2060             :                    new_end_index,
    2061             :                    min_char,
    2062             :                    border - 1,
    2063             :                    &dummy,
    2064             :                    even_label,
    2065       19808 :                    odd_label);
    2066       19808 :   if (handle_rest.is_linked()) {
    2067       18594 :     masm->Bind(&handle_rest);
    2068       18594 :     bool flip = (new_start_index & 1) != (start_index & 1);
    2069       18594 :     GenerateBranches(masm,
    2070             :                      ranges,
    2071             :                      new_start_index,
    2072             :                      end_index,
    2073             :                      border,
    2074             :                      max_char,
    2075             :                      &dummy,
    2076             :                      flip ? odd_label : even_label,
    2077       18594 :                      flip ? even_label : odd_label);
    2078             :   }
    2079             : }
    2080             : 
    2081             : 
    2082      212367 : static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
    2083             :                           RegExpCharacterClass* cc, bool one_byte,
    2084             :                           Label* on_failure, int cp_offset, bool check_offset,
    2085             :                           bool preloaded, Zone* zone) {
    2086             :   ZoneList<CharacterRange>* ranges = cc->ranges(zone);
    2087      212367 :   CharacterRange::Canonicalize(ranges);
    2088             : 
    2089             :   int max_char;
    2090      212367 :   if (one_byte) {
    2091             :     max_char = String::kMaxOneByteCharCode;
    2092             :   } else {
    2093             :     max_char = String::kMaxUtf16CodeUnit;
    2094             :   }
    2095             : 
    2096             :   int range_count = ranges->length();
    2097             : 
    2098      212367 :   int last_valid_range = range_count - 1;
    2099      577443 :   while (last_valid_range >= 0) {
    2100             :     CharacterRange& range = ranges->at(last_valid_range);
    2101      394870 :     if (range.from() <= max_char) {
    2102             :       break;
    2103             :     }
    2104      182538 :     last_valid_range--;
    2105             :   }
    2106             : 
    2107      212367 :   if (last_valid_range < 0) {
    2108          35 :     if (!cc->is_negated()) {
    2109          10 :       macro_assembler->GoTo(on_failure);
    2110             :     }
    2111          35 :     if (check_offset) {
    2112          33 :       macro_assembler->CheckPosition(cp_offset, on_failure);
    2113             :     }
    2114       91447 :     return;
    2115             :   }
    2116             : 
    2117      402016 :   if (last_valid_range == 0 &&
    2118             :       ranges->at(0).IsEverything(max_char)) {
    2119       82625 :     if (cc->is_negated()) {
    2120          31 :       macro_assembler->GoTo(on_failure);
    2121             :     } else {
    2122             :       // This is a common case hit by non-anchored expressions.
    2123       82594 :       if (check_offset) {
    2124       53793 :         macro_assembler->CheckPosition(cp_offset, on_failure);
    2125             :       }
    2126             :     }
    2127             :     return;
    2128             :   }
    2129             : 
    2130      129707 :   if (!preloaded) {
    2131      117457 :     macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
    2132             :   }
    2133             : 
    2134      140616 :   if (cc->is_standard(zone) &&
    2135       10909 :       macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
    2136       10909 :                                                   on_failure)) {
    2137             :       return;
    2138             :   }
    2139             : 
    2140             : 
    2141             :   // A new list with ascending entries.  Each entry is a code unit
    2142             :   // where there is a boundary between code units that are part of
    2143             :   // the class and code units that are not.  Normally we insert an
    2144             :   // entry at zero which goes to the failure label, but if there
    2145             :   // was already one there we fall through for success on that entry.
    2146             :   // Subsequent entries have alternating meaning (success/failure).
    2147             :   ZoneList<int>* range_boundaries =
    2148             :       new(zone) ZoneList<int>(last_valid_range, zone);
    2149             : 
    2150      120920 :   bool zeroth_entry_is_failure = !cc->is_negated();
    2151             : 
    2152      567360 :   for (int i = 0; i <= last_valid_range; i++) {
    2153             :     CharacterRange& range = ranges->at(i);
    2154      223220 :     if (range.from() == 0) {
    2155             :       DCHECK_EQ(i, 0);
    2156        3388 :       zeroth_entry_is_failure = !zeroth_entry_is_failure;
    2157             :     } else {
    2158      219832 :       range_boundaries->Add(range.from(), zone);
    2159             :     }
    2160      223220 :     range_boundaries->Add(range.to() + 1, zone);
    2161             :   }
    2162      120920 :   int end_index = range_boundaries->length() - 1;
    2163      120920 :   if (range_boundaries->at(end_index) > max_char) {
    2164        3986 :     end_index--;
    2165             :   }
    2166             : 
    2167      120920 :   Label fall_through;
    2168      120920 :   GenerateBranches(macro_assembler,
    2169             :                    range_boundaries,
    2170             :                    0,  // start_index.
    2171             :                    end_index,
    2172             :                    0,  // min_char.
    2173             :                    max_char,
    2174             :                    &fall_through,
    2175             :                    zeroth_entry_is_failure ? &fall_through : on_failure,
    2176      120920 :                    zeroth_entry_is_failure ? on_failure : &fall_through);
    2177      120920 :   macro_assembler->Bind(&fall_through);
    2178             : }
    2179             : 
    2180             : RegExpNode::~RegExpNode() = default;
    2181             : 
    2182     1701150 : RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
    2183             :                                                   Trace* trace) {
    2184             :   // If we are generating a greedy loop then don't stop and don't reuse code.
    2185     1701150 :   if (trace->stop_node() != nullptr) {
    2186             :     return CONTINUE;
    2187             :   }
    2188             : 
    2189             :   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    2190     1689447 :   if (trace->is_trivial()) {
    2191     1062203 :     if (label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) {
    2192             :       // If a generic version is already scheduled to be generated or we have
    2193             :       // recursed too deeply then just generate a jump to that code.
    2194      220691 :       macro_assembler->GoTo(&label_);
    2195             :       // This will queue it up for generation of a generic version if it hasn't
    2196             :       // already been queued.
    2197      220691 :       compiler->AddWork(this);
    2198      220691 :       return DONE;
    2199             :     }
    2200             :     // Generate generic version of the node and bind the label for later use.
    2201      392113 :     macro_assembler->Bind(&label_);
    2202      392113 :     return CONTINUE;
    2203             :   }
    2204             : 
    2205             :   // We are being asked to make a non-generic version.  Keep track of how many
    2206             :   // non-generic versions we generate so as not to overdo it.
    2207     1076643 :   trace_count_++;
    2208     1076643 :   if (KeepRecursing(compiler) && compiler->optimize() &&
    2209             :       trace_count_ < kMaxCopiesCodeGenerated) {
    2210             :     return CONTINUE;
    2211             :   }
    2212             : 
    2213             :   // If we get here code has been generated for this node too many times or
    2214             :   // recursion is too deep.  Time to switch to a generic version.  The code for
    2215             :   // generic versions above can handle deep recursion properly.
    2216             :   bool was_limiting = compiler->limiting_recursion();
    2217             :   compiler->set_limiting_recursion(true);
    2218      478512 :   trace->Flush(compiler, this);
    2219             :   compiler->set_limiting_recursion(was_limiting);
    2220      478512 :   return DONE;
    2221             : }
    2222             : 
    2223             : 
    2224           0 : bool RegExpNode::KeepRecursing(RegExpCompiler* compiler) {
    2225     2038430 :   return !compiler->limiting_recursion() &&
    2226           0 :          compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion;
    2227             : }
    2228             : 
    2229             : 
    2230      584331 : int ActionNode::EatsAtLeast(int still_to_find,
    2231             :                             int budget,
    2232             :                             bool not_at_start) {
    2233      584331 :   if (budget <= 0) return 0;
    2234      571265 :   if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0;  // Rewinds input!
    2235      566102 :   return on_success()->EatsAtLeast(still_to_find,
    2236             :                                    budget - 1,
    2237     1132204 :                                    not_at_start);
    2238             : }
    2239             : 
    2240             : 
    2241       90669 : void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
    2242             :                               BoyerMooreLookahead* bm, bool not_at_start) {
    2243       90669 :   if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
    2244       90669 :     on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
    2245             :   }
    2246             :   SaveBMInfo(bm, not_at_start, offset);
    2247       90669 : }
    2248             : 
    2249             : 
    2250       10347 : int AssertionNode::EatsAtLeast(int still_to_find,
    2251             :                                int budget,
    2252             :                                bool not_at_start) {
    2253       10347 :   if (budget <= 0) return 0;
    2254             :   // If we know we are not at the start and we are asked "how many characters
    2255             :   // will you match if you succeed?" then we can answer anything since false
    2256             :   // implies false.  So lets just return the max answer (still_to_find) since
    2257             :   // that won't prevent us from preloading a lot of characters for the other
    2258             :   // branches in the node graph.
    2259        9326 :   if (assertion_type() == AT_START && not_at_start) return still_to_find;
    2260        9104 :   return on_success()->EatsAtLeast(still_to_find,
    2261             :                                    budget - 1,
    2262       18208 :                                    not_at_start);
    2263             : }
    2264             : 
    2265             : 
    2266         379 : void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
    2267             :                                  BoyerMooreLookahead* bm, bool not_at_start) {
    2268             :   // Match the behaviour of EatsAtLeast on this node.
    2269         379 :   if (assertion_type() == AT_START && not_at_start) return;
    2270         363 :   on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
    2271             :   SaveBMInfo(bm, not_at_start, offset);
    2272             : }
    2273             : 
    2274             : 
    2275        3112 : int BackReferenceNode::EatsAtLeast(int still_to_find,
    2276             :                                    int budget,
    2277             :                                    bool not_at_start) {
    2278        3112 :   if (read_backward()) return 0;
    2279        3002 :   if (budget <= 0) return 0;
    2280        3002 :   return on_success()->EatsAtLeast(still_to_find,
    2281             :                                    budget - 1,
    2282        6004 :                                    not_at_start);
    2283             : }
    2284             : 
    2285             : 
    2286     5766745 : int TextNode::EatsAtLeast(int still_to_find,
    2287             :                           int budget,
    2288             :                           bool not_at_start) {
    2289     5766745 :   if (read_backward()) return 0;
    2290     5764893 :   int answer = Length();
    2291     5764893 :   if (answer >= still_to_find) return answer;
    2292     3399790 :   if (budget <= 0) return answer;
    2293             :   // We are not at start after this node so we set the last argument to 'true'.
    2294     2374227 :   return answer + on_success()->EatsAtLeast(still_to_find - answer,
    2295             :                                             budget - 1,
    2296     4748454 :                                             true);
    2297             : }
    2298             : 
    2299             : 
    2300        9503 : int NegativeLookaroundChoiceNode::EatsAtLeast(int still_to_find, int budget,
    2301             :                                               bool not_at_start) {
    2302        9503 :   if (budget <= 0) return 0;
    2303             :   // Alternative 0 is the negative lookahead, alternative 1 is what comes
    2304             :   // afterwards.
    2305        9291 :   RegExpNode* node = alternatives_->at(1).node();
    2306        9291 :   return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);
    2307             : }
    2308             : 
    2309             : 
    2310        3556 : void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
    2311             :     QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in,
    2312             :     bool not_at_start) {
    2313             :   // Alternative 0 is the negative lookahead, alternative 1 is what comes
    2314             :   // afterwards.
    2315        3556 :   RegExpNode* node = alternatives_->at(1).node();
    2316        3556 :   return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
    2317             : }
    2318             : 
    2319             : 
    2320     6946032 : int ChoiceNode::EatsAtLeastHelper(int still_to_find,
    2321             :                                   int budget,
    2322             :                                   RegExpNode* ignore_this_node,
    2323             :                                   bool not_at_start) {
    2324     6946032 :   if (budget <= 0) return 0;
    2325             :   int min = 100;
    2326     4819681 :   int choice_count = alternatives_->length();
    2327     4819681 :   budget = (budget - 1) / choice_count;
    2328    16633195 :   for (int i = 0; i < choice_count; i++) {
    2329    10404836 :     RegExpNode* node = alternatives_->at(i).node();
    2330    10404836 :     if (node == ignore_this_node) continue;
    2331             :     int node_eats_at_least =
    2332    10259632 :         node->EatsAtLeast(still_to_find, budget, not_at_start);
    2333    10259632 :     if (node_eats_at_least < min) min = node_eats_at_least;
    2334    10259632 :     if (min == 0) return 0;
    2335             :   }
    2336             :   return min;
    2337             : }
    2338             : 
    2339             : 
    2340      153793 : int LoopChoiceNode::EatsAtLeast(int still_to_find,
    2341             :                                 int budget,
    2342             :                                 bool not_at_start) {
    2343      153793 :   return EatsAtLeastHelper(still_to_find,
    2344             :                            budget - 1,
    2345             :                            loop_node_,
    2346      153793 :                            not_at_start);
    2347             : }
    2348             : 
    2349             : 
    2350     6792239 : int ChoiceNode::EatsAtLeast(int still_to_find,
    2351             :                             int budget,
    2352             :                             bool not_at_start) {
    2353     6792239 :   return EatsAtLeastHelper(still_to_find, budget, nullptr, not_at_start);
    2354             : }
    2355             : 
    2356             : 
    2357             : // Takes the left-most 1-bit and smears it out, setting all bits to its right.
    2358             : static inline uint32_t SmearBitsRight(uint32_t v) {
    2359      239875 :   v |= v >> 1;
    2360      239875 :   v |= v >> 2;
    2361      239875 :   v |= v >> 4;
    2362      239875 :   v |= v >> 8;
    2363      239875 :   v |= v >> 16;
    2364             :   return v;
    2365             : }
    2366             : 
    2367             : 
    2368      271619 : bool QuickCheckDetails::Rationalize(bool asc) {
    2369             :   bool found_useful_op = false;
    2370             :   uint32_t char_mask;
    2371      271619 :   if (asc) {
    2372             :     char_mask = String::kMaxOneByteCharCode;
    2373             :   } else {
    2374             :     char_mask = String::kMaxUtf16CodeUnit;
    2375             :   }
    2376      271619 :   mask_ = 0;
    2377      271619 :   value_ = 0;
    2378             :   int char_shift = 0;
    2379     1136199 :   for (int i = 0; i < characters_; i++) {
    2380             :     Position* pos = &positions_[i];
    2381      432290 :     if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
    2382             :       found_useful_op = true;
    2383             :     }
    2384      432290 :     mask_ |= (pos->mask & char_mask) << char_shift;
    2385      432290 :     value_ |= (pos->value & char_mask) << char_shift;
    2386      432290 :     char_shift += asc ? 8 : 16;
    2387             :   }
    2388      271619 :   return found_useful_op;
    2389             : }
    2390             : 
    2391             : 
    2392      476778 : bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
    2393             :                                 Trace* bounds_check_trace,
    2394             :                                 Trace* trace,
    2395             :                                 bool preload_has_checked_bounds,
    2396             :                                 Label* on_possible_success,
    2397             :                                 QuickCheckDetails* details,
    2398             :                                 bool fall_through_on_failure) {
    2399      476778 :   if (details->characters() == 0) return false;
    2400      271729 :   GetQuickCheckDetails(
    2401      543458 :       details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
    2402      271729 :   if (details->cannot_match()) return false;
    2403      271619 :   if (!details->Rationalize(compiler->one_byte())) return false;
    2404             :   DCHECK(details->characters() == 1 ||
    2405             :          compiler->macro_assembler()->CanReadUnaligned());
    2406             :   uint32_t mask = details->mask();
    2407             :   uint32_t value = details->value();
    2408             : 
    2409             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2410             : 
    2411      226926 :   if (trace->characters_preloaded() != details->characters()) {
    2412             :     DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset());
    2413             :     // We are attempting to preload the minimum number of characters
    2414             :     // any choice would eat, so if the bounds check fails, then none of the
    2415             :     // choices can succeed, so we can just immediately backtrack, rather
    2416             :     // than go to the next choice.
    2417       63051 :     assembler->LoadCurrentCharacter(trace->cp_offset(),
    2418             :                                     bounds_check_trace->backtrack(),
    2419       63051 :                                     !preload_has_checked_bounds,
    2420      126102 :                                     details->characters());
    2421             :   }
    2422             : 
    2423             : 
    2424             :   bool need_mask = true;
    2425             : 
    2426      226926 :   if (details->characters() == 1) {
    2427             :     // If number of characters preloaded is 1 then we used a byte or 16 bit
    2428             :     // load so the value is already masked down.
    2429             :     uint32_t char_mask;
    2430       82864 :     if (compiler->one_byte()) {
    2431             :       char_mask = String::kMaxOneByteCharCode;
    2432             :     } else {
    2433             :       char_mask = String::kMaxUtf16CodeUnit;
    2434             :     }
    2435       82864 :     if ((mask & char_mask) == char_mask) need_mask = false;
    2436             :     mask &= char_mask;
    2437             :   } else {
    2438             :     // For 2-character preloads in one-byte mode or 1-character preloads in
    2439             :     // two-byte mode we also use a 16 bit load with zero extend.
    2440             :     static const uint32_t kTwoByteMask = 0xFFFF;
    2441             :     static const uint32_t kFourByteMask = 0xFFFFFFFF;
    2442      144062 :     if (details->characters() == 2 && compiler->one_byte()) {
    2443      127909 :       if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
    2444       16153 :     } else if (details->characters() == 1 && !compiler->one_byte()) {
    2445           0 :       if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
    2446             :     } else {
    2447       16153 :       if (mask == kFourByteMask) need_mask = false;
    2448             :     }
    2449             :   }
    2450             : 
    2451      226926 :   if (fall_through_on_failure) {
    2452      192498 :     if (need_mask) {
    2453       46384 :       assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
    2454             :     } else {
    2455      146114 :       assembler->CheckCharacter(value, on_possible_success);
    2456             :     }
    2457             :   } else {
    2458       34428 :     if (need_mask) {
    2459        3745 :       assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
    2460             :     } else {
    2461       30683 :       assembler->CheckNotCharacter(value, trace->backtrack());
    2462             :     }
    2463             :   }
    2464             :   return true;
    2465             : }
    2466             : 
    2467             : 
    2468             : // Here is the meat of GetQuickCheckDetails (see also the comment on the
    2469             : // super-class in the .h file).
    2470             : //
    2471             : // We iterate along the text object, building up for each character a
    2472             : // mask and value that can be used to test for a quick failure to match.
    2473             : // The masks and values for the positions will be combined into a single
    2474             : // machine word for the current character width in order to be used in
    2475             : // generating a quick check.
    2476      463185 : void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
    2477             :                                     RegExpCompiler* compiler,
    2478             :                                     int characters_filled_in,
    2479             :                                     bool not_at_start) {
    2480             :   // Do not collect any quick check details if the text node reads backward,
    2481             :   // since it reads in the opposite direction than we use for quick checks.
    2482      463185 :   if (read_backward()) return;
    2483             :   Isolate* isolate = compiler->macro_assembler()->isolate();
    2484             :   DCHECK(characters_filled_in < details->characters());
    2485             :   int characters = details->characters();
    2486             :   int char_mask;
    2487      463185 :   if (compiler->one_byte()) {
    2488             :     char_mask = String::kMaxOneByteCharCode;
    2489             :   } else {
    2490             :     char_mask = String::kMaxUtf16CodeUnit;
    2491             :   }
    2492      548289 :   for (int k = 0; k < elements()->length(); k++) {
    2493      467747 :     TextElement elm = elements()->at(k);
    2494      467747 :     if (elm.text_type() == TextElement::ATOM) {
    2495             :       Vector<const uc16> quarks = elm.atom()->data();
    2496     1163807 :       for (int i = 0; i < characters && i < quarks.length(); i++) {
    2497             :         QuickCheckDetails::Position* pos =
    2498             :             details->positions(characters_filled_in);
    2499      949528 :         uc16 c = quarks[i];
    2500      474764 :         if (elm.atom()->ignore_case()) {
    2501             :           unibrow::uchar chars[4];
    2502        6347 :           int length = GetCaseIndependentLetters(
    2503        6347 :               isolate, c, compiler->one_byte(), chars, 4);
    2504        6347 :           if (length == 0) {
    2505             :             // This can happen because all case variants are non-Latin1, but we
    2506             :             // know the input is Latin1.
    2507             :             details->set_cannot_match();
    2508          25 :             pos->determines_perfectly = false;
    2509          25 :             return;
    2510             :           }
    2511        6322 :           if (length == 1) {
    2512             :             // This letter has no case equivalents, so it's nice and simple
    2513             :             // and the mask-compare will determine definitely whether we have
    2514             :             // a match at this character position.
    2515        1227 :             pos->mask = char_mask;
    2516        1227 :             pos->value = c;
    2517        1227 :             pos->determines_perfectly = true;
    2518             :           } else {
    2519        5095 :             uint32_t common_bits = char_mask;
    2520        5095 :             uint32_t bits = chars[0];
    2521       16241 :             for (int j = 1; j < length; j++) {
    2522        5573 :               uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
    2523        5573 :               common_bits ^= differing_bits;
    2524        5573 :               bits &= common_bits;
    2525             :             }
    2526             :             // If length is 2 and common bits has only one zero in it then
    2527             :             // our mask and compare instruction will determine definitely
    2528             :             // whether we have a match at this character position.  Otherwise
    2529             :             // it can only be an approximate check.
    2530        5095 :             uint32_t one_zero = (common_bits | ~char_mask);
    2531        5095 :             if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
    2532        4596 :               pos->determines_perfectly = true;
    2533             :             }
    2534        5095 :             pos->mask = common_bits;
    2535        5095 :             pos->value = bits;
    2536             :           }
    2537             :         } else {
    2538             :           // Don't ignore case.  Nice simple case where the mask-compare will
    2539             :           // determine definitely whether we have a match at this character
    2540             :           // position.
    2541      468417 :           if (c > char_mask) {
    2542             :             details->set_cannot_match();
    2543          25 :             pos->determines_perfectly = false;
    2544          25 :             return;
    2545             :           }
    2546      468392 :           pos->mask = char_mask;
    2547      468392 :           pos->value = c;
    2548      468392 :           pos->determines_perfectly = true;
    2549             :         }
    2550      474714 :         characters_filled_in++;
    2551             :         DCHECK(characters_filled_in <= details->characters());
    2552      474714 :         if (characters_filled_in == details->characters()) {
    2553             :           return;
    2554             :         }
    2555             :       }
    2556             :     } else {
    2557             :       QuickCheckDetails::Position* pos =
    2558             :           details->positions(characters_filled_in);
    2559             :       RegExpCharacterClass* tree = elm.char_class();
    2560             :       ZoneList<CharacterRange>* ranges = tree->ranges(zone());
    2561             :       DCHECK(!ranges->is_empty());
    2562      125716 :       if (tree->is_negated()) {
    2563             :         // A quick check uses multi-character mask and compare.  There is no
    2564             :         // useful way to incorporate a negative char class into this scheme
    2565             :         // so we just conservatively create a mask and value that will always
    2566             :         // succeed.
    2567        3506 :         pos->mask = 0;
    2568        3506 :         pos->value = 0;
    2569             :       } else {
    2570             :         int first_range = 0;
    2571      122240 :         while (ranges->at(first_range).from() > char_mask) {
    2572          80 :           first_range++;
    2573          80 :           if (first_range == ranges->length()) {
    2574             :             details->set_cannot_match();
    2575          50 :             pos->determines_perfectly = false;
    2576             :             return;
    2577             :           }
    2578             :         }
    2579      122160 :         CharacterRange range = ranges->at(first_range);
    2580      122160 :         uc16 from = range.from();
    2581      122160 :         uc16 to = range.to();
    2582      122160 :         if (to > char_mask) {
    2583       15074 :           to = char_mask;
    2584             :         }
    2585      122160 :         uint32_t differing_bits = (from ^ to);
    2586             :         // A mask and compare is only perfect if the differing bits form a
    2587             :         // number like 00011111 with one single block of trailing 1s.
    2588      227109 :         if ((differing_bits & (differing_bits + 1)) == 0 &&
    2589      104949 :              from + differing_bits == to) {
    2590       95305 :           pos->determines_perfectly = true;
    2591             :         }
    2592      122160 :         uint32_t common_bits = ~SmearBitsRight(differing_bits);
    2593      122160 :         uint32_t bits = (from & common_bits);
    2594      750722 :         for (int i = first_range + 1; i < ranges->length(); i++) {
    2595      253201 :           CharacterRange range = ranges->at(i);
    2596      253201 :           uc16 from = range.from();
    2597      253201 :           uc16 to = range.to();
    2598      253201 :           if (from > char_mask) continue;
    2599      117715 :           if (to > char_mask) to = char_mask;
    2600             :           // Here we are combining more ranges into the mask and compare
    2601             :           // value.  With each new range the mask becomes more sparse and
    2602             :           // so the chances of a false positive rise.  A character class
    2603             :           // with multiple ranges is assumed never to be equivalent to a
    2604             :           // mask and compare operation.
    2605      117715 :           pos->determines_perfectly = false;
    2606      117715 :           uint32_t new_common_bits = (from ^ to);
    2607      117715 :           new_common_bits = ~SmearBitsRight(new_common_bits);
    2608      117715 :           common_bits &= new_common_bits;
    2609      117715 :           bits &= new_common_bits;
    2610      117715 :           uint32_t differing_bits = (from & common_bits) ^ bits;
    2611      117715 :           common_bits ^= differing_bits;
    2612      117715 :           bits &= common_bits;
    2613             :         }
    2614      122160 :         pos->mask = common_bits;
    2615      122160 :         pos->value = bits;
    2616             :       }
    2617      125666 :       characters_filled_in++;
    2618             :       DCHECK(characters_filled_in <= details->characters());
    2619      125666 :       if (characters_filled_in == details->characters()) {
    2620             :         return;
    2621             :       }
    2622             :     }
    2623             :   }
    2624             :   DCHECK(characters_filled_in != details->characters());
    2625       37990 :   if (!details->cannot_match()) {
    2626             :     on_success()-> GetQuickCheckDetails(details,
    2627             :                                         compiler,
    2628             :                                         characters_filled_in,
    2629       37990 :                                         true);
    2630             :   }
    2631             : }
    2632             : 
    2633             : 
    2634           0 : void QuickCheckDetails::Clear() {
    2635     1801552 :   for (int i = 0; i < characters_; i++) {
    2636      350165 :     positions_[i].mask = 0;
    2637      350165 :     positions_[i].value = 0;
    2638      350165 :     positions_[i].determines_perfectly = false;
    2639             :   }
    2640     1101222 :   characters_ = 0;
    2641           0 : }
    2642             : 
    2643             : 
    2644      518222 : void QuickCheckDetails::Advance(int by, bool one_byte) {
    2645      518222 :   if (by >= characters_ || by < 0) {
    2646             :     DCHECK_IMPLIES(by < 0, characters_ == 0);
    2647             :     Clear();
    2648             :     return;
    2649             :   }
    2650             :   DCHECK_LE(characters_ - by, 4);
    2651             :   DCHECK_LE(characters_, 4);
    2652       72975 :   for (int i = 0; i < characters_ - by; i++) {
    2653       24892 :     positions_[i] = positions_[by + i];
    2654             :   }
    2655       72967 :   for (int i = characters_ - by; i < characters_; i++) {
    2656       24888 :     positions_[i].mask = 0;
    2657       24888 :     positions_[i].value = 0;
    2658       24888 :     positions_[i].determines_perfectly = false;
    2659             :   }
    2660       23191 :   characters_ -= by;
    2661             :   // We could change mask_ and value_ here but we would never advance unless
    2662             :   // they had already been used in a check and they won't be used again because
    2663             :   // it would gain us nothing.  So there's no point.
    2664             : }
    2665             : 
    2666             : 
    2667      157490 : void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
    2668             :   DCHECK(characters_ == other->characters_);
    2669      157490 :   if (other->cannot_match_) {
    2670             :     return;
    2671             :   }
    2672      157416 :   if (cannot_match_) {
    2673         247 :     *this = *other;
    2674         247 :     return;
    2675             :   }
    2676      505865 :   for (int i = from_index; i < characters_; i++) {
    2677             :     QuickCheckDetails::Position* pos = positions(i);
    2678             :     QuickCheckDetails::Position* other_pos = other->positions(i);
    2679      206957 :     if (pos->mask != other_pos->mask ||
    2680       42719 :         pos->value != other_pos->value ||
    2681       10110 :         !other_pos->determines_perfectly) {
    2682             :       // Our mask-compare operation will be approximate unless we have the
    2683             :       // exact same operation on both sides of the alternation.
    2684      167044 :       pos->determines_perfectly = false;
    2685             :     }
    2686      174348 :     pos->mask &= other_pos->mask;
    2687      174348 :     pos->value &= pos->mask;
    2688      174348 :     other_pos->value &= pos->mask;
    2689      174348 :     uc16 differing_bits = (pos->value ^ other_pos->value);
    2690      174348 :     pos->mask &= ~differing_bits;
    2691      174348 :     pos->value &= pos->mask;
    2692             :   }
    2693             : }
    2694             : 
    2695             : 
    2696             : class VisitMarker {
    2697             :  public:
    2698             :   explicit VisitMarker(NodeInfo* info) : info_(info) {
    2699             :     DCHECK(!info->visited);
    2700      197217 :     info->visited = true;
    2701             :   }
    2702             :   ~VisitMarker() {
    2703      173221 :     info_->visited = false;
    2704             :   }
    2705             :  private:
    2706             :   NodeInfo* info_;
    2707             : };
    2708             : 
    2709       99131 : RegExpNode* SeqRegExpNode::FilterOneByte(int depth) {
    2710       99131 :   if (info()->replacement_calculated) return replacement();
    2711       72400 :   if (depth < 0) return this;
    2712             :   DCHECK(!info()->visited);
    2713             :   VisitMarker marker(info());
    2714             :   return FilterSuccessor(depth - 1);
    2715             : }
    2716             : 
    2717           0 : RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) {
    2718      132641 :   RegExpNode* next = on_success_->FilterOneByte(depth - 1);
    2719      132641 :   if (next == nullptr) return set_replacement(nullptr);
    2720      132159 :   on_success_ = next;
    2721      132159 :   return set_replacement(this);
    2722             : }
    2723             : 
    2724             : // We need to check for the following characters: 0x39C 0x3BC 0x178.
    2725        1462 : static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
    2726             :   // TODO(dcarney): this could be a lot more efficient.
    2727        4260 :   return range.Contains(0x039C) || range.Contains(0x03BC) ||
    2728        1462 :          range.Contains(0x0178);
    2729             : }
    2730             : 
    2731             : 
    2732          41 : static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
    2733          81 :   for (int i = 0; i < ranges->length(); i++) {
    2734             :     // TODO(dcarney): this could be a lot more efficient.
    2735          46 :     if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
    2736             :   }
    2737             :   return false;
    2738             : }
    2739             : 
    2740       65125 : RegExpNode* TextNode::FilterOneByte(int depth) {
    2741       65125 :   if (info()->replacement_calculated) return replacement();
    2742       60886 :   if (depth < 0) return this;
    2743             :   DCHECK(!info()->visited);
    2744             :   VisitMarker marker(info());
    2745             :   int element_count = elements()->length();
    2746      190665 :   for (int i = 0; i < element_count; i++) {
    2747       65347 :     TextElement elm = elements()->at(i);
    2748       65347 :     if (elm.text_type() == TextElement::ATOM) {
    2749             :       Vector<const uc16> quarks = elm.atom()->data();
    2750      145454 :       for (int j = 0; j < quarks.length(); j++) {
    2751      115234 :         uint16_t c = quarks[j];
    2752       57617 :         if (elm.atom()->ignore_case()) {
    2753             :           c = unibrow::Latin1::TryConvertToLatin1(c);
    2754             :         }
    2755       57617 :         if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
    2756             :         // Replace quark in case we converted to Latin-1.
    2757             :         uint16_t* writable_quarks = const_cast<uint16_t*>(quarks.start());
    2758       57451 :         writable_quarks[j] = c;
    2759             :       }
    2760             :     } else {
    2761             :       DCHECK(elm.text_type() == TextElement::CHAR_CLASS);
    2762             :       RegExpCharacterClass* cc = elm.char_class();
    2763             :       ZoneList<CharacterRange>* ranges = cc->ranges(zone());
    2764       34795 :       CharacterRange::Canonicalize(ranges);
    2765             :       // Now they are in order so we only need to look at the first.
    2766             :       int range_count = ranges->length();
    2767       34795 :       if (cc->is_negated()) {
    2768        8434 :         if (range_count != 0 &&
    2769        4395 :             ranges->at(0).from() == 0 &&
    2770             :             ranges->at(0).to() >= String::kMaxOneByteCharCode) {
    2771             :           // This will be handled in a later filter.
    2772          40 :           if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
    2773             :             continue;
    2774          39 :           return set_replacement(nullptr);
    2775             :         }
    2776             :       } else {
    2777       30578 :         if (range_count == 0 ||
    2778             :             ranges->at(0).from() > String::kMaxOneByteCharCode) {
    2779             :           // This will be handled in a later filter.
    2780         255 :           if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
    2781             :             continue;
    2782         230 :           return set_replacement(nullptr);
    2783             :         }
    2784             :       }
    2785             :     }
    2786             :   }
    2787       60406 :   return FilterSuccessor(depth - 1);
    2788             : }
    2789             : 
    2790       59856 : RegExpNode* LoopChoiceNode::FilterOneByte(int depth) {
    2791       59856 :   if (info()->replacement_calculated) return replacement();
    2792       46200 :   if (depth < 0) return this;
    2793       46110 :   if (info()->visited) return this;
    2794             :   {
    2795             :     VisitMarker marker(info());
    2796             : 
    2797       24344 :     RegExpNode* continue_replacement = continue_node_->FilterOneByte(depth - 1);
    2798             :     // If we can't continue after the loop then there is no sense in doing the
    2799             :     // loop.
    2800       24344 :     if (continue_replacement == nullptr) return set_replacement(nullptr);
    2801             :   }
    2802             : 
    2803       23996 :   return ChoiceNode::FilterOneByte(depth - 1);
    2804             : }
    2805             : 
    2806       29821 : RegExpNode* ChoiceNode::FilterOneByte(int depth) {
    2807       29821 :   if (info()->replacement_calculated) return replacement();
    2808       27778 :   if (depth < 0) return this;
    2809       27683 :   if (info()->visited) return this;
    2810             :   VisitMarker marker(info());
    2811       27683 :   int choice_count = alternatives_->length();
    2812             : 
    2813      144231 :   for (int i = 0; i < choice_count; i++) {
    2814       60652 :     GuardedAlternative alternative = alternatives_->at(i);
    2815       63030 :     if (alternative.guards() != nullptr &&
    2816             :         alternative.guards()->length() != 0) {
    2817        2378 :       set_replacement(this);
    2818             :       return this;
    2819             :     }
    2820             :   }
    2821             : 
    2822             :   int surviving = 0;
    2823             :   RegExpNode* survivor = nullptr;
    2824      141149 :   for (int i = 0; i < choice_count; i++) {
    2825      115844 :     GuardedAlternative alternative = alternatives_->at(i);
    2826       57922 :     RegExpNode* replacement = alternative.node()->FilterOneByte(depth - 1);
    2827             :     DCHECK(replacement != this);  // No missing EMPTY_MATCH_CHECK.
    2828       57922 :     if (replacement != nullptr) {
    2829       57776 :       alternatives_->at(i).set_node(replacement);
    2830       57776 :       surviving++;
    2831             :       survivor = replacement;
    2832             :     }
    2833             :   }
    2834       25371 :   if (surviving < 2) return set_replacement(survivor);
    2835             : 
    2836       25239 :   set_replacement(this);
    2837       25239 :   if (surviving == choice_count) {
    2838             :     return this;
    2839             :   }
    2840             :   // Only some of the nodes survived the filtering.  We need to rebuild the
    2841             :   // alternatives list.
    2842             :   ZoneList<GuardedAlternative>* new_alternatives =
    2843             :       new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
    2844         380 :   for (int i = 0; i < choice_count; i++) {
    2845             :     RegExpNode* replacement =
    2846         360 :         alternatives_->at(i).node()->FilterOneByte(depth - 1);
    2847         180 :     if (replacement != nullptr) {
    2848         130 :       alternatives_->at(i).set_node(replacement);
    2849         260 :       new_alternatives->Add(alternatives_->at(i), zone());
    2850             :     }
    2851             :   }
    2852          20 :   alternatives_ = new_alternatives;
    2853          20 :   return this;
    2854             : }
    2855             : 
    2856         357 : RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth) {
    2857         357 :   if (info()->replacement_calculated) return replacement();
    2858         357 :   if (depth < 0) return this;
    2859         357 :   if (info()->visited) return this;
    2860             :   VisitMarker marker(info());
    2861             :   // Alternative 0 is the negative lookahead, alternative 1 is what comes
    2862             :   // afterwards.
    2863         357 :   RegExpNode* node = alternatives_->at(1).node();
    2864         357 :   RegExpNode* replacement = node->FilterOneByte(depth - 1);
    2865         362 :   if (replacement == nullptr) return set_replacement(nullptr);
    2866         352 :   alternatives_->at(1).set_node(replacement);
    2867             : 
    2868         352 :   RegExpNode* neg_node = alternatives_->at(0).node();
    2869         352 :   RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1);
    2870             :   // If the negative lookahead is always going to fail then
    2871             :   // we don't need to check it.
    2872         357 :   if (neg_replacement == nullptr) return set_replacement(replacement);
    2873         347 :   alternatives_->at(0).set_node(neg_replacement);
    2874         694 :   return set_replacement(this);
    2875             : }
    2876             : 
    2877             : 
    2878       14928 : void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
    2879             :                                           RegExpCompiler* compiler,
    2880             :                                           int characters_filled_in,
    2881             :                                           bool not_at_start) {
    2882       14928 :   if (body_can_be_zero_length_ || info()->visited) return;
    2883             :   VisitMarker marker(info());
    2884       11757 :   return ChoiceNode::GetQuickCheckDetails(details,
    2885             :                                           compiler,
    2886             :                                           characters_filled_in,
    2887       11757 :                                           not_at_start);
    2888             : }
    2889             : 
    2890             : 
    2891        5136 : void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
    2892             :                                   BoyerMooreLookahead* bm, bool not_at_start) {
    2893        5136 :   if (body_can_be_zero_length_ || budget <= 0) {
    2894             :     bm->SetRest(offset);
    2895             :     SaveBMInfo(bm, not_at_start, offset);
    2896             :     return;
    2897             :   }
    2898        4919 :   ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
    2899             :   SaveBMInfo(bm, not_at_start, offset);
    2900             : }
    2901             : 
    2902             : 
    2903       38784 : void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
    2904             :                                       RegExpCompiler* compiler,
    2905             :                                       int characters_filled_in,
    2906             :                                       bool not_at_start) {
    2907       38784 :   not_at_start = (not_at_start || not_at_start_);
    2908       38784 :   int choice_count = alternatives_->length();
    2909             :   DCHECK_LT(0, choice_count);
    2910       38784 :   alternatives_->at(0).node()->GetQuickCheckDetails(details,
    2911             :                                                     compiler,
    2912             :                                                     characters_filled_in,
    2913       77568 :                                                     not_at_start);
    2914      353764 :   for (int i = 1; i < choice_count; i++) {
    2915             :     QuickCheckDetails new_details(details->characters());
    2916      157490 :     RegExpNode* node = alternatives_->at(i).node();
    2917             :     node->GetQuickCheckDetails(&new_details, compiler,
    2918             :                                characters_filled_in,
    2919      157490 :                                not_at_start);
    2920             :     // Here we merge the quick match details of the two branches.
    2921      157490 :     details->Merge(&new_details, characters_filled_in);
    2922             :   }
    2923       38784 : }
    2924             : 
    2925             : 
    2926             : // Check for [0-9A-Z_a-z].
    2927         557 : static void EmitWordCheck(RegExpMacroAssembler* assembler,
    2928             :                           Label* word,
    2929             :                           Label* non_word,
    2930             :                           bool fall_through_on_word) {
    2931         557 :   if (assembler->CheckSpecialCharacterClass(
    2932             :           fall_through_on_word ? 'w' : 'W',
    2933         557 :           fall_through_on_word ? non_word : word)) {
    2934             :     // Optimized implementation available.
    2935             :     return;
    2936             :   }
    2937          99 :   assembler->CheckCharacterGT('z', non_word);
    2938          99 :   assembler->CheckCharacterLT('0', non_word);
    2939          99 :   assembler->CheckCharacterGT('a' - 1, word);
    2940          99 :   assembler->CheckCharacterLT('9' + 1, word);
    2941          99 :   assembler->CheckCharacterLT('A', non_word);
    2942          99 :   assembler->CheckCharacterLT('Z' + 1, word);
    2943          99 :   if (fall_through_on_word) {
    2944          34 :     assembler->CheckNotCharacter('_', non_word);
    2945             :   } else {
    2946          65 :     assembler->CheckCharacter('_', word);
    2947             :   }
    2948             : }
    2949             : 
    2950             : 
    2951             : // Emit the code to check for a ^ in multiline mode (1-character lookbehind
    2952             : // that matches newline or the start of input).
    2953         129 : static void EmitHat(RegExpCompiler* compiler,
    2954             :                     RegExpNode* on_success,
    2955             :                     Trace* trace) {
    2956             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2957             :   // We will be loading the previous character into the current character
    2958             :   // register.
    2959         129 :   Trace new_trace(*trace);
    2960             :   new_trace.InvalidateCurrentCharacter();
    2961             : 
    2962         129 :   Label ok;
    2963         129 :   if (new_trace.cp_offset() == 0) {
    2964             :     // The start of input counts as a newline in this context, so skip to
    2965             :     // ok if we are at the start.
    2966         119 :     assembler->CheckAtStart(&ok);
    2967             :   }
    2968             :   // We already checked that we are not at the start of input so it must be
    2969             :   // OK to load the previous character.
    2970         129 :   assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
    2971             :                                   new_trace.backtrack(),
    2972         258 :                                   false);
    2973         129 :   if (!assembler->CheckSpecialCharacterClass('n',
    2974         129 :                                              new_trace.backtrack())) {
    2975             :     // Newline means \n, \r, 0x2028 or 0x2029.
    2976          24 :     if (!compiler->one_byte()) {
    2977           2 :       assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
    2978             :     }
    2979          24 :     assembler->CheckCharacter('\n', &ok);
    2980          24 :     assembler->CheckNotCharacter('\r', new_trace.backtrack());
    2981             :   }
    2982         129 :   assembler->Bind(&ok);
    2983         129 :   on_success->Emit(compiler, &new_trace);
    2984         129 : }
    2985             : 
    2986             : 
    2987             : // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
    2988         255 : void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
    2989             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2990             :   Isolate* isolate = assembler->isolate();
    2991             :   Trace::TriBool next_is_word_character = Trace::UNKNOWN;
    2992         255 :   bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
    2993             :   BoyerMooreLookahead* lookahead = bm_info(not_at_start);
    2994         255 :   if (lookahead == nullptr) {
    2995             :     int eats_at_least =
    2996         202 :         Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore,
    2997             :                                                     kRecursionBudget,
    2998         202 :                                                     not_at_start));
    2999         202 :     if (eats_at_least >= 1) {
    3000             :       BoyerMooreLookahead* bm =
    3001          97 :           new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
    3002          97 :       FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start);
    3003          97 :       if (bm->at(0)->is_non_word())
    3004             :         next_is_word_character = Trace::FALSE_VALUE;
    3005          97 :       if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
    3006             :     }
    3007             :   } else {
    3008          53 :     if (lookahead->at(0)->is_non_word())
    3009             :       next_is_word_character = Trace::FALSE_VALUE;
    3010          53 :     if (lookahead->at(0)->is_word())
    3011             :       next_is_word_character = Trace::TRUE_VALUE;
    3012             :   }
    3013         255 :   bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
    3014         255 :   if (next_is_word_character == Trace::UNKNOWN) {
    3015         151 :     Label before_non_word;
    3016         151 :     Label before_word;
    3017         151 :     if (trace->characters_preloaded() != 1) {
    3018         150 :       assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
    3019             :     }
    3020             :     // Fall through on non-word.
    3021         151 :     EmitWordCheck(assembler, &before_word, &before_non_word, false);
    3022             :     // Next character is not a word character.
    3023         151 :     assembler->Bind(&before_non_word);
    3024         151 :     Label ok;
    3025         151 :     BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
    3026         151 :     assembler->GoTo(&ok);
    3027             : 
    3028         151 :     assembler->Bind(&before_word);
    3029         151 :     BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
    3030         151 :     assembler->Bind(&ok);
    3031         104 :   } else if (next_is_word_character == Trace::TRUE_VALUE) {
    3032          79 :     BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
    3033             :   } else {
    3034             :     DCHECK(next_is_word_character == Trace::FALSE_VALUE);
    3035          25 :     BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
    3036             :   }
    3037         255 : }
    3038             : 
    3039             : 
    3040         406 : void AssertionNode::BacktrackIfPrevious(
    3041             :     RegExpCompiler* compiler,
    3042             :     Trace* trace,
    3043             :     AssertionNode::IfPrevious backtrack_if_previous) {
    3044             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3045         406 :   Trace new_trace(*trace);
    3046             :   new_trace.InvalidateCurrentCharacter();
    3047             : 
    3048         406 :   Label fall_through, dummy;
    3049             : 
    3050             :   Label* non_word = backtrack_if_previous == kIsNonWord ?
    3051             :                     new_trace.backtrack() :
    3052         406 :                     &fall_through;
    3053             :   Label* word = backtrack_if_previous == kIsNonWord ?
    3054             :                 &fall_through :
    3055         406 :                 new_trace.backtrack();
    3056             : 
    3057         406 :   if (new_trace.cp_offset() == 0) {
    3058             :     // The start of input counts as a non-word character, so the question is
    3059             :     // decided if we are at the start.
    3060         169 :     assembler->CheckAtStart(non_word);
    3061             :   }
    3062             :   // We already checked that we are not at the start of input so it must be
    3063             :   // OK to load the previous character.
    3064         406 :   assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
    3065         406 :   EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
    3066             : 
    3067         406 :   assembler->Bind(&fall_through);
    3068         406 :   on_success()->Emit(compiler, &new_trace);
    3069         406 : }
    3070             : 
    3071             : 
    3072        1935 : void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
    3073             :                                          RegExpCompiler* compiler,
    3074             :                                          int filled_in,
    3075             :                                          bool not_at_start) {
    3076        1935 :   if (assertion_type_ == AT_START && not_at_start) {
    3077             :     details->set_cannot_match();
    3078             :     return;
    3079             :   }
    3080        1604 :   return on_success()->GetQuickCheckDetails(details,
    3081             :                                             compiler,
    3082             :                                             filled_in,
    3083        3208 :                                             not_at_start);
    3084             : }
    3085             : 
    3086             : 
    3087        5769 : void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
    3088             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3089        5769 :   switch (assertion_type_) {
    3090             :     case AT_END: {
    3091        2333 :       Label ok;
    3092        2333 :       assembler->CheckPosition(trace->cp_offset(), &ok);
    3093        2333 :       assembler->GoTo(trace->backtrack());
    3094        2333 :       assembler->Bind(&ok);
    3095             :       break;
    3096             :     }
    3097             :     case AT_START: {
    3098        3052 :       if (trace->at_start() == Trace::FALSE_VALUE) {
    3099           9 :         assembler->GoTo(trace->backtrack());
    3100           9 :         return;
    3101             :       }
    3102        3043 :       if (trace->at_start() == Trace::UNKNOWN) {
    3103        3043 :         assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
    3104        3043 :         Trace at_start_trace = *trace;
    3105             :         at_start_trace.set_at_start(Trace::TRUE_VALUE);
    3106        3043 :         on_success()->Emit(compiler, &at_start_trace);
    3107             :         return;
    3108             :       }
    3109             :     }
    3110             :     break;
    3111             :     case AFTER_NEWLINE:
    3112         129 :       EmitHat(compiler, on_success(), trace);
    3113         129 :       return;
    3114             :     case AT_BOUNDARY:
    3115             :     case AT_NON_BOUNDARY: {
    3116         255 :       EmitBoundaryCheck(compiler, trace);
    3117         255 :       return;
    3118             :     }
    3119             :   }
    3120        2333 :   on_success()->Emit(compiler, trace);
    3121             : }
    3122             : 
    3123             : 
    3124             : static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
    3125     2753026 :   if (quick_check == nullptr) return false;
    3126     2753026 :   if (offset >= quick_check->characters()) return false;
    3127      854561 :   return quick_check->positions(offset)->determines_perfectly;
    3128             : }
    3129             : 
    3130             : 
    3131             : static void UpdateBoundsCheck(int index, int* checked_up_to) {
    3132      802794 :   if (index > *checked_up_to) {
    3133      416257 :     *checked_up_to = index;
    3134             :   }
    3135             : }
    3136             : 
    3137             : 
    3138             : // We call this repeatedly to generate code for each pass over the text node.
    3139             : // The passes are in increasing order of difficulty because we hope one
    3140             : // of the first passes will fail in which case we are saved the work of the
    3141             : // later passes.  for example for the case independent regexp /%[asdfghjkl]a/
    3142             : // we will check the '%' in the first pass, the case independent 'a' in the
    3143             : // second pass and the character class in the last pass.
    3144             : //
    3145             : // The passes are done from right to left, so for example to test for /bar/
    3146             : // we will first test for an 'r' with offset 2, then an 'a' with offset 1
    3147             : // and then a 'b' with offset 0.  This means we can avoid the end-of-input
    3148             : // bounds check most of the time.  In the example we only need to check for
    3149             : // end-of-input when loading the putative 'r'.
    3150             : //
    3151             : // A slight complication involves the fact that the first character may already
    3152             : // be fetched into a register by the previous node.  In this case we want to
    3153             : // do the test for that character first.  We do this in separate passes.  The
    3154             : // 'preloaded' argument indicates that we are doing such a 'pass'.  If such a
    3155             : // pass has been performed then subsequent passes will have true in
    3156             : // first_element_checked to indicate that that character does not need to be
    3157             : // checked again.
    3158             : //
    3159             : // In addition to all this we are passed a Trace, which can
    3160             : // contain an AlternativeGeneration object.  In this AlternativeGeneration
    3161             : // object we can see details of any quick check that was already passed in
    3162             : // order to get to the code we are now generating.  The quick check can involve
    3163             : // loading characters, which means we do not need to recheck the bounds
    3164             : // up to the limit the quick check already checked.  In addition the quick
    3165             : // check can have involved a mask and compare operation which may simplify
    3166             : // or obviate the need for further checks at some character positions.
    3167     2709889 : void TextNode::TextEmitPass(RegExpCompiler* compiler,
    3168             :                             TextEmitPassType pass,
    3169             :                             bool preloaded,
    3170             :                             Trace* trace,
    3171             :                             bool first_element_checked,
    3172             :                             int* checked_up_to) {
    3173             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3174             :   Isolate* isolate = assembler->isolate();
    3175             :   bool one_byte = compiler->one_byte();
    3176             :   Label* backtrack = trace->backtrack();
    3177             :   QuickCheckDetails* quick_check = trace->quick_check_performed();
    3178             :   int element_count = elements()->length();
    3179     2709889 :   int backward_offset = read_backward() ? -Length() : 0;
    3180     5679531 :   for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
    3181     2969667 :     TextElement elm = elements()->at(i);
    3182     2969667 :     int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
    3183     2969667 :     if (elm.text_type() == TextElement::ATOM) {
    3184     1813231 :       if (SkipPass(pass, elm.atom()->ignore_case())) continue;
    3185             :       Vector<const uc16> quarks = elm.atom()->data();
    3186     4637115 :       for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
    3187     2592939 :         if (first_element_checked && i == 0 && j == 0) continue;
    3188     5024544 :         if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
    3189             :         EmitCharacterFunction* emit_function = nullptr;
    3190     3410786 :         uc16 quark = quarks[j];
    3191     1705393 :         if (elm.atom()->ignore_case()) {
    3192             :           // Everywhere else we assume that a non-Latin-1 character cannot match
    3193             :           // a Latin-1 character. Avoid the cases where this is assumption is
    3194             :           // invalid by using the Latin1 equivalent instead.
    3195             :           quark = unibrow::Latin1::TryConvertToLatin1(quark);
    3196             :         }
    3197     1705393 :         switch (pass) {
    3198             :           case NON_LATIN1_MATCH:
    3199             :             DCHECK(one_byte);
    3200      518806 :             if (quark > String::kMaxOneByteCharCode) {
    3201          25 :               assembler->GoTo(backtrack);
    3202             :               return;
    3203             :             }
    3204             :             break;
    3205             :           case NON_LETTER_CHARACTER_MATCH:
    3206             :             emit_function = &EmitAtomNonLetter;
    3207        5619 :             break;
    3208             :           case SIMPLE_CHARACTER_MATCH:
    3209             :             emit_function = &EmitSimpleCharacter;
    3210      584865 :             break;
    3211             :           case CASE_CHARACTER_MATCH:
    3212             :             emit_function = &EmitAtomLetter;
    3213        5619 :             break;
    3214             :           default:
    3215             :             break;
    3216             :         }
    3217     1705368 :         if (emit_function != nullptr) {
    3218      596103 :           bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
    3219             :           bool bound_checked =
    3220      596103 :               emit_function(isolate, compiler, quark, backtrack, cp_offset + j,
    3221      596103 :                             bounds_check, preloaded);
    3222      596103 :           if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
    3223             :         }
    3224             :       }
    3225             :     } else {
    3226             :       DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());
    3227     1156436 :       if (pass == CHARACTER_CLASS_MATCH) {
    3228      280098 :         if (first_element_checked && i == 0) continue;
    3229      240754 :         if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
    3230             :         RegExpCharacterClass* cc = elm.char_class();
    3231      212367 :         bool bounds_check = *checked_up_to < cp_offset || read_backward();
    3232      212367 :         EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
    3233      212367 :                       bounds_check, preloaded, zone());
    3234             :         UpdateBoundsCheck(cp_offset, checked_up_to);
    3235             :       }
    3236             :     }
    3237             :   }
    3238             : }
    3239             : 
    3240             : 
    3241     6958219 : int TextNode::Length() {
    3242     6958219 :   TextElement elm = elements()->last();
    3243             :   DCHECK_LE(0, elm.cp_offset());
    3244     6958219 :   return elm.cp_offset() + elm.length();
    3245             : }
    3246             : 
    3247           0 : bool TextNode::SkipPass(TextEmitPassType pass, bool ignore_case) {
    3248     1813231 :   if (ignore_case) {
    3249       44992 :     return pass == SIMPLE_CHARACTER_MATCH;
    3250             :   } else {
    3251     1768239 :     return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
    3252             :   }
    3253             : }
    3254             : 
    3255        7207 : TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
    3256             :                                              ZoneList<CharacterRange>* ranges,
    3257             :                                              bool read_backward,
    3258             :                                              RegExpNode* on_success,
    3259             :                                              JSRegExp::Flags flags) {
    3260             :   DCHECK_NOT_NULL(ranges);
    3261             :   ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);
    3262       14414 :   elms->Add(TextElement::CharClass(
    3263       21621 :                 new (zone) RegExpCharacterClass(zone, ranges, flags)),
    3264        7207 :             zone);
    3265        7207 :   return new (zone) TextNode(elms, read_backward, on_success);
    3266             : }
    3267             : 
    3268       28195 : TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
    3269             :                                            CharacterRange trail,
    3270             :                                            bool read_backward,
    3271             :                                            RegExpNode* on_success,
    3272             :                                            JSRegExp::Flags flags) {
    3273       28195 :   ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
    3274       28195 :   ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
    3275             :   ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);
    3276       56390 :   elms->Add(TextElement::CharClass(
    3277       84585 :                 new (zone) RegExpCharacterClass(zone, lead_ranges, flags)),
    3278       28195 :             zone);
    3279       56390 :   elms->Add(TextElement::CharClass(
    3280       84585 :                 new (zone) RegExpCharacterClass(zone, trail_ranges, flags)),
    3281       28195 :             zone);
    3282       28195 :   return new (zone) TextNode(elms, read_backward, on_success);
    3283             : }
    3284             : 
    3285             : 
    3286             : // This generates the code to match a text node.  A text node can contain
    3287             : // straight character sequences (possibly to be matched in a case-independent
    3288             : // way) and character classes.  For efficiency we do not do this in a single
    3289             : // pass from left to right.  Instead we pass over the text node several times,
    3290             : // emitting code for some character positions every time.  See the comment on
    3291             : // TextEmitPass for details.
    3292      620127 : void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
    3293      620127 :   LimitResult limit_result = LimitVersions(compiler, trace);
    3294      722032 :   if (limit_result == DONE) return;
    3295             :   DCHECK(limit_result == CONTINUE);
    3296             : 
    3297      518222 :   if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
    3298             :     compiler->SetRegExpTooBig();
    3299             :     return;
    3300             :   }
    3301             : 
    3302      518222 :   if (compiler->one_byte()) {
    3303      323113 :     int dummy = 0;
    3304      323113 :     TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
    3305             :   }
    3306             : 
    3307             :   bool first_elt_done = false;
    3308      518222 :   int bound_checked_to = trace->cp_offset() - 1;
    3309      518222 :   bound_checked_to += trace->bound_checked_up_to();
    3310             : 
    3311             :   // If a character is preloaded into the current character register then
    3312             :   // check that now.
    3313      518222 :   if (trace->characters_preloaded() == 1) {
    3314      706248 :     for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
    3315      313888 :       TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace,
    3316      313888 :                    false, &bound_checked_to);
    3317             :     }
    3318             :     first_elt_done = true;
    3319             :   }
    3320             : 
    3321     4663998 :   for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
    3322     2072888 :     TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace,
    3323     2072888 :                  first_elt_done, &bound_checked_to);
    3324             :   }
    3325             : 
    3326      518222 :   Trace successor_trace(*trace);
    3327             :   // If we advance backward, we may end up at the start.
    3328      523394 :   successor_trace.AdvanceCurrentPositionInTrace(
    3329      523394 :       read_backward() ? -Length() : Length(), compiler);
    3330      518222 :   successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
    3331             :                                                : Trace::FALSE_VALUE);
    3332             :   RecursionCheck rc(compiler);
    3333      518222 :   on_success()->Emit(compiler, &successor_trace);
    3334             : }
    3335             : 
    3336             : 
    3337           0 : void Trace::InvalidateCurrentCharacter() {
    3338      228489 :   characters_preloaded_ = 0;
    3339           0 : }
    3340             : 
    3341             : 
    3342      518222 : void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
    3343             :   // We don't have an instruction for shifting the current character register
    3344             :   // down or for using a shifted value for anything so lets just forget that
    3345             :   // we preloaded any characters into it.
    3346      518222 :   characters_preloaded_ = 0;
    3347             :   // Adjust the offsets of the quick check performed information.  This
    3348             :   // information is used to find out what we already determined about the
    3349             :   // characters by means of mask and compare.
    3350      518222 :   quick_check_performed_.Advance(by, compiler->one_byte());
    3351      518222 :   cp_offset_ += by;
    3352      518222 :   if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
    3353             :     compiler->SetRegExpTooBig();
    3354           0 :     cp_offset_ = 0;
    3355             :   }
    3356     1036444 :   bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
    3357      518222 : }
    3358             : 
    3359             : 
    3360      319047 : void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
    3361             :   int element_count = elements()->length();
    3362     1082697 :   for (int i = 0; i < element_count; i++) {
    3363      381825 :     TextElement elm = elements()->at(i);
    3364      381825 :     if (elm.text_type() == TextElement::CHAR_CLASS) {
    3365             :       RegExpCharacterClass* cc = elm.char_class();
    3366             : #ifdef V8_INTL_SUPPORT
    3367             :       bool case_equivalents_already_added =
    3368             :           NeedsUnicodeCaseEquivalents(cc->flags());
    3369             : #else
    3370             :       bool case_equivalents_already_added = false;
    3371             : #endif
    3372      240106 :       if (IgnoreCase(cc->flags()) && !case_equivalents_already_added) {
    3373             :         // None of the standard character classes is different in the case
    3374             :         // independent case and it slows us down if we don't know that.
    3375       68981 :         if (cc->is_standard(zone())) continue;
    3376             :         ZoneList<CharacterRange>* ranges = cc->ranges(zone());
    3377       66900 :         CharacterRange::AddCaseEquivalents(isolate, zone(), ranges,
    3378       66900 :                                            is_one_byte);
    3379             :       }
    3380             :     }
    3381             :   }
    3382      319047 : }
    3383             : 
    3384             : 
    3385      135273 : int TextNode::GreedyLoopTextLength() { return Length(); }
    3386             : 
    3387             : 
    3388       85918 : RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
    3389             :     RegExpCompiler* compiler) {
    3390       85918 :   if (read_backward()) return nullptr;
    3391       85793 :   if (elements()->length() != 1) return nullptr;
    3392       85456 :   TextElement elm = elements()->at(0);
    3393       85456 :   if (elm.text_type() != TextElement::CHAR_CLASS) return nullptr;
    3394             :   RegExpCharacterClass* node = elm.char_class();
    3395             :   ZoneList<CharacterRange>* ranges = node->ranges(zone());
    3396       84101 :   CharacterRange::Canonicalize(ranges);
    3397       84101 :   if (node->is_negated()) {
    3398         117 :     return ranges->length() == 0 ? on_success() : nullptr;
    3399             :   }
    3400       83984 :   if (ranges->length() != 1) return nullptr;
    3401             :   uint32_t max_char;
    3402       83524 :   if (compiler->one_byte()) {
    3403             :     max_char = String::kMaxOneByteCharCode;
    3404             :   } else {
    3405             :     max_char = String::kMaxUtf16CodeUnit;
    3406             :   }
    3407      167048 :   return ranges->at(0).IsEverything(max_char) ? on_success() : nullptr;
    3408             : }
    3409             : 
    3410             : 
    3411             : // Finds the fixed match length of a sequence of nodes that goes from
    3412             : // this alternative and back to this choice node.  If there are variable
    3413             : // length nodes or other complications in the way then return a sentinel
    3414             : // value indicating that a greedy loop cannot be constructed.
    3415      225423 : int ChoiceNode::GreedyLoopTextLengthForAlternative(
    3416             :     GuardedAlternative* alternative) {
    3417             :   int length = 0;
    3418             :   RegExpNode* node = alternative->node();
    3419             :   // Later we will generate code for all these text nodes using recursion
    3420             :   // so we have to limit the max number.
    3421             :   int recursion_depth = 0;
    3422      495969 :   while (node != this) {
    3423      337310 :     if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
    3424             :       return kNodeIsTooComplexForGreedyLoops;
    3425             :     }
    3426      337310 :     int node_length = node->GreedyLoopTextLength();
    3427      337310 :     if (node_length == kNodeIsTooComplexForGreedyLoops) {
    3428             :       return kNodeIsTooComplexForGreedyLoops;
    3429             :     }
    3430      135273 :     length += node_length;
    3431             :     SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
    3432             :     node = seq_node->on_success();
    3433             :   }
    3434       23386 :   return read_backward() ? -length : length;
    3435             : }
    3436             : 
    3437             : 
    3438           0 : void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
    3439             :   DCHECK_NULL(loop_node_);
    3440             :   AddAlternative(alt);
    3441      999457 :   loop_node_ = alt.node();
    3442           0 : }
    3443             : 
    3444             : 
    3445           0 : void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
    3446             :   DCHECK_NULL(continue_node_);
    3447             :   AddAlternative(alt);
    3448      999457 :   continue_node_ = alt.node();
    3449           0 : }
    3450             : 
    3451             : 
    3452      334347 : void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
    3453             :   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    3454      334347 :   if (trace->stop_node() == this) {
    3455             :     // Back edge of greedy optimized loop node graph.
    3456             :     int text_length =
    3457       11693 :         GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
    3458             :     DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length);
    3459             :     // Update the counter-based backtracking info on the stack.  This is an
    3460             :     // optimization for greedy loops (see below).
    3461             :     DCHECK(trace->cp_offset() == text_length);
    3462       11693 :     macro_assembler->AdvanceCurrentPosition(text_length);
    3463       11693 :     macro_assembler->GoTo(trace->loop_label());
    3464       11693 :     return;
    3465             :   }
    3466             :   DCHECK_NULL(trace->stop_node());
    3467      322654 :   if (!trace->is_trivial()) {
    3468      120157 :     trace->Flush(compiler, this);
    3469      120157 :     return;
    3470             :   }
    3471      202497 :   ChoiceNode::Emit(compiler, trace);
    3472             : }
    3473             : 
    3474             : 
    3475      213730 : int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
    3476             :                                            int eats_at_least) {
    3477             :   int preload_characters = Min(4, eats_at_least);
    3478             :   DCHECK_LE(preload_characters, 4);
    3479      213730 :   if (compiler->macro_assembler()->CanReadUnaligned()) {
    3480             :     bool one_byte = compiler->one_byte();
    3481      134435 :     if (one_byte) {
    3482             :       // We can't preload 3 characters because there is no machine instruction
    3483             :       // to do that.  We can't just load 4 because we could be reading
    3484             :       // beyond the end of the string, which could cause a memory fault.
    3485      106849 :       if (preload_characters == 3) preload_characters = 2;
    3486             :     } else {
    3487       27586 :       if (preload_characters > 2) preload_characters = 2;
    3488             :     }
    3489             :   } else {
    3490       79295 :     if (preload_characters > 1) preload_characters = 1;
    3491             :   }
    3492      213730 :   return preload_characters;
    3493             : }
    3494             : 
    3495             : 
    3496             : // This class is used when generating the alternatives in a choice node.  It
    3497             : // records the way the alternative is being code generated.
    3498             : class AlternativeGeneration: public Malloced {
    3499             :  public:
    3500             :   AlternativeGeneration()
    3501             :       : possible_success(),
    3502             :         expects_preload(false),
    3503             :         after(),
    3504     2181542 :         quick_check_details() { }
    3505             :   Label possible_success;
    3506             :   bool expects_preload;
    3507             :   Label after;
    3508             :   QuickCheckDetails quick_check_details;
    3509             : };
    3510             : 
    3511             : 
    3512             : // Creates a list of AlternativeGenerations.  If the list has a reasonable
    3513             : // size then it is on the stack, otherwise the excess is on the heap.
    3514             : class AlternativeGenerationList {
    3515             :  public:
    3516      213730 :   AlternativeGenerationList(int count, Zone* zone)
    3517     2351030 :       : alt_gens_(count, zone) {
    3518     1361014 :     for (int i = 0; i < count && i < kAFew; i++) {
    3519      573642 :       alt_gens_.Add(a_few_alt_gens_ + i, zone);
    3520             :     }
    3521      302214 :     for (int i = kAFew; i < count; i++) {
    3522       44242 :       alt_gens_.Add(new AlternativeGeneration(), zone);
    3523             :     }
    3524      213730 :   }
    3525      427460 :   ~AlternativeGenerationList() {
    3526      302214 :     for (int i = kAFew; i < alt_gens_.length(); i++) {
    3527       44242 :       delete alt_gens_[i];
    3528       44242 :       alt_gens_[i] = nullptr;
    3529             :     }
    3530      213730 :   }
    3531             : 
    3532             :   AlternativeGeneration* at(int i) {
    3533     1628229 :     return alt_gens_[i];
    3534             :   }
    3535             : 
    3536             :  private:
    3537             :   static const int kAFew = 10;
    3538             :   ZoneList<AlternativeGeneration*> alt_gens_;
    3539             :   AlternativeGeneration a_few_alt_gens_[kAFew];
    3540             : };
    3541             : 
    3542             : 
    3543             : static const uc32 kRangeEndMarker = 0x110000;
    3544             : 
    3545             : // The '2' variant is has inclusive from and exclusive to.
    3546             : // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
    3547             : // which include WhiteSpace (7.2) or LineTerminator (7.3) values.
    3548             : static const int kSpaceRanges[] = {
    3549             :     '\t',   '\r' + 1, ' ',    ' ' + 1, 0x00A0, 0x00A1, 0x1680,
    3550             :     0x1681, 0x2000,   0x200B, 0x2028,  0x202A, 0x202F, 0x2030,
    3551             :     0x205F, 0x2060,   0x3000, 0x3001,  0xFEFF, 0xFF00, kRangeEndMarker};
    3552             : static const int kSpaceRangeCount = arraysize(kSpaceRanges);
    3553             : 
    3554             : static const int kWordRanges[] = {
    3555             :     '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, kRangeEndMarker};
    3556             : static const int kWordRangeCount = arraysize(kWordRanges);
    3557             : static const int kDigitRanges[] = {'0', '9' + 1, kRangeEndMarker};
    3558             : static const int kDigitRangeCount = arraysize(kDigitRanges);
    3559             : static const int kSurrogateRanges[] = {
    3560             :     kLeadSurrogateStart, kLeadSurrogateStart + 1, kRangeEndMarker};
    3561             : static const int kSurrogateRangeCount = arraysize(kSurrogateRanges);
    3562             : static const int kLineTerminatorRanges[] = {
    3563             :     0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, kRangeEndMarker};
    3564             : static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
    3565             : 
    3566           0 : void BoyerMoorePositionInfo::Set(int character) {
    3567       86966 :   SetInterval(Interval(character, character));
    3568           0 : }
    3569             : 
    3570             : 
    3571      248173 : void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
    3572      496346 :   s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
    3573      496346 :   w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
    3574      496346 :   d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
    3575             :   surrogate_ =
    3576      496346 :       AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
    3577      248173 :   if (interval.to() - interval.from() >= kMapSize - 1) {
    3578       13661 :     if (map_count_ != kMapSize) {
    3579        6372 :       map_count_ = kMapSize;
    3580     1637604 :       for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
    3581             :     }
    3582             :     return;
    3583             :   }
    3584     1235656 :   for (int i = interval.from(); i <= interval.to(); i++) {
    3585      544913 :     int mod_character = (i & kMask);
    3586     1089826 :     if (!map_->at(mod_character)) {
    3587      374775 :       map_count_++;
    3588      374775 :       map_->at(mod_character) = true;
    3589             :     }
    3590      544913 :     if (map_count_ == kMapSize) return;
    3591             :   }
    3592             : }
    3593             : 
    3594             : 
    3595           0 : void BoyerMoorePositionInfo::SetAll() {
    3596        5507 :   s_ = w_ = d_ = kLatticeUnknown;
    3597        5507 :   if (map_count_ != kMapSize) {
    3598        5077 :     map_count_ = kMapSize;
    3599     1304789 :     for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
    3600             :   }
    3601           0 : }
    3602             : 
    3603             : 
    3604       80788 : BoyerMooreLookahead::BoyerMooreLookahead(
    3605             :     int length, RegExpCompiler* compiler, Zone* zone)
    3606             :     : length_(length),
    3607       80788 :       compiler_(compiler) {
    3608       80788 :   if (compiler->one_byte()) {
    3609       10143 :     max_char_ = String::kMaxOneByteCharCode;
    3610             :   } else {
    3611       70645 :     max_char_ = String::kMaxUtf16CodeUnit;
    3612             :   }
    3613       80788 :   bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
    3614      280168 :   for (int i = 0; i < length; i++) {
    3615       99690 :     bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);
    3616             :   }
    3617       80788 : }
    3618             : 
    3619             : 
    3620             : // Find the longest range of lookahead that has the fewest number of different
    3621             : // characters that can occur at a given position.  Since we are optimizing two
    3622             : // different parameters at once this is a tradeoff.
    3623           0 : bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
    3624             :   int biggest_points = 0;
    3625             :   // If more than 32 characters out of 128 can occur it is unlikely that we can
    3626             :   // be lucky enough to step forwards much of the time.
    3627             :   const int kMaxMax = 32;
    3628      242073 :   for (int max_number_of_chars = 4;
    3629      322764 :        max_number_of_chars < kMaxMax;
    3630             :        max_number_of_chars *= 2) {
    3631             :     biggest_points =
    3632      242073 :         FindBestInterval(max_number_of_chars, biggest_points, from, to);
    3633             :   }
    3634       80691 :   if (biggest_points == 0) return false;
    3635           0 :   return true;
    3636             : }
    3637             : 
    3638             : 
    3639             : // Find the highest-points range between 0 and length_ where the character
    3640             : // information is not too vague.  'Too vague' means that there are more than
    3641             : // max_number_of_chars that can occur at this position.  Calculates the number
    3642             : // of points as the product of width-of-the-range and
    3643             : // probability-of-finding-one-of-the-characters, where the probability is
    3644             : // calculated using the frequency distribution of the sample subject string.
    3645      242073 : int BoyerMooreLookahead::FindBestInterval(
    3646             :     int max_number_of_chars, int old_biggest_points, int* from, int* to) {
    3647             :   int biggest_points = old_biggest_points;
    3648             :   static const int kSize = RegExpMacroAssembler::kTableSize;
    3649      700377 :   for (int i = 0; i < length_; ) {
    3650      569318 :     while (i < length_ && Count(i) > max_number_of_chars) i++;
    3651      256944 :     if (i == length_) break;
    3652             :     int remembered_from = i;
    3653             :     bool union_map[kSize];
    3654    29560608 :     for (int j = 0; j < kSize; j++) union_map[j] = false;
    3655     1014789 :     while (i < length_ && Count(i) <= max_number_of_chars) {
    3656      513844 :       BoyerMoorePositionInfo* map = bitmaps_->at(i);
    3657    66028954 :       for (int j = 0; j < kSize; j++) union_map[j] |= map->at(j);
    3658      256922 :       i++;
    3659             :     }
    3660             :     int frequency = 0;
    3661    58892064 :     for (int j = 0; j < kSize; j++) {
    3662    29331456 :       if (union_map[j]) {
    3663             :         // Add 1 to the frequency to give a small per-character boost for
    3664             :         // the cases where our sampling is not good enough and many
    3665             :         // characters have a frequency of zero.  This means the frequency
    3666             :         // can theoretically be up to 2*kSize though we treat it mostly as
    3667             :         // a fraction of kSize.
    3668      980164 :         frequency += compiler_->frequency_collator()->Frequency(j) + 1;
    3669             :       }
    3670             :     }
    3671             :     // We use the probability of skipping times the distance we are skipping to
    3672             :     // judge the effectiveness of this.  Actually we have a cut-off:  By
    3673             :     // dividing by 2 we switch off the skipping if the probability of skipping
    3674             :     // is less than 50%.  This is because the multibyte mask-and-compare
    3675             :     // skipping in quickcheck is more likely to do well on this case.
    3676             :     bool in_quickcheck_range =
    3677      231971 :         ((i - remembered_from < 4) ||
    3678        2819 :          (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
    3679             :     // Called 'probability' but it is only a rough estimate and can actually
    3680             :     // be outside the 0-kSize range.
    3681      229152 :     int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
    3682      229152 :     int points = (i - remembered_from) * probability;
    3683      229152 :     if (points > biggest_points) {
    3684        5878 :       *from = remembered_from;
    3685        5878 :       *to = i - 1;
    3686             :       biggest_points = points;
    3687             :     }
    3688             :   }
    3689      242073 :   return biggest_points;
    3690             : }
    3691             : 
    3692             : 
    3693             : // Take all the characters that will not prevent a successful match if they
    3694             : // occur in the subject string in the range between min_lookahead and
    3695             : // max_lookahead (inclusive) measured from the current position.  If the
    3696             : // character at max_lookahead offset is not one of these characters, then we
    3697             : // can safely skip forwards by the number of characters in the range.
    3698        4467 : int BoyerMooreLookahead::GetSkipTable(int min_lookahead,
    3699             :                                       int max_lookahead,
    3700             :                                       Handle<ByteArray> boolean_skip_table) {
    3701             :   const int kSize = RegExpMacroAssembler::kTableSize;
    3702             : 
    3703             :   const int kSkipArrayEntry = 0;
    3704             :   const int kDontSkipArrayEntry = 1;
    3705             : 
    3706     1148019 :   for (int i = 0; i < kSize; i++) {
    3707             :     boolean_skip_table->set(i, kSkipArrayEntry);
    3708             :   }
    3709        4467 :   int skip = max_lookahead + 1 - min_lookahead;
    3710             : 
    3711       23747 :   for (int i = max_lookahead; i >= min_lookahead; i--) {
    3712       19280 :     BoyerMoorePositionInfo* map = bitmaps_->at(i);
    3713     2477480 :     for (int j = 0; j < kSize; j++) {
    3714     1233920 :       if (map->at(j)) {
    3715             :         boolean_skip_table->set(j, kDontSkipArrayEntry);
    3716             :       }
    3717             :     }
    3718             :   }
    3719             : 
    3720        4467 :   return skip;
    3721             : }
    3722             : 
    3723             : 
    3724             : // See comment above on the implementation of GetSkipTable.
    3725       80691 : void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
    3726             :   const int kSize = RegExpMacroAssembler::kTableSize;
    3727             : 
    3728       80691 :   int min_lookahead = 0;
    3729       80691 :   int max_lookahead = 0;
    3730             : 
    3731      156915 :   if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
    3732             : 
    3733             :   bool found_single_character = false;
    3734             :   int single_character = 0;
    3735        9860 :   for (int i = max_lookahead; i >= min_lookahead; i--) {
    3736       17684 :     BoyerMoorePositionInfo* map = bitmaps_->at(i);
    3737       17684 :     if (map->map_count() > 1 ||
    3738        3201 :         (found_single_character && map->map_count() != 0)) {
    3739             :       found_single_character = false;
    3740             :       break;
    3741             :     }
    3742      808195 :     for (int j = 0; j < kSize; j++) {
    3743      406209 :       if (map->at(j)) {
    3744             :         found_single_character = true;
    3745             :         single_character = j;
    3746             :         break;
    3747             :       }
    3748             :     }
    3749             :   }
    3750             : 
    3751        5485 :   int lookahead_width = max_lookahead + 1 - min_lookahead;
    3752             : 
    3753        5485 :   if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
    3754             :     // The mask-compare can probably handle this better.
    3755             :     return;
    3756             :   }
    3757             : 
    3758        4563 :   if (found_single_character) {
    3759          96 :     Label cont, again;
    3760          96 :     masm->Bind(&again);
    3761          96 :     masm->LoadCurrentCharacter(max_lookahead, &cont, true);
    3762          96 :     if (max_char_ > kSize) {
    3763          96 :       masm->CheckCharacterAfterAnd(single_character,
    3764             :                                    RegExpMacroAssembler::kTableMask,
    3765         192 :                                    &cont);
    3766             :     } else {
    3767           0 :       masm->CheckCharacter(single_character, &cont);
    3768             :     }
    3769          96 :     masm->AdvanceCurrentPosition(lookahead_width);
    3770          96 :     masm->GoTo(&again);
    3771          96 :     masm->Bind(&cont);
    3772             :     return;
    3773             :   }
    3774             : 
    3775             :   Factory* factory = masm->isolate()->factory();
    3776             :   Handle<ByteArray> boolean_skip_table =
    3777        4467 :       factory->NewByteArray(kSize, AllocationType::kOld);
    3778        4467 :   int skip_distance = GetSkipTable(
    3779        4467 :       min_lookahead, max_lookahead, boolean_skip_table);
    3780             :   DCHECK_NE(0, skip_distance);
    3781             : 
    3782        4467 :   Label cont, again;
    3783        4467 :   masm->Bind(&again);
    3784        4467 :   masm->LoadCurrentCharacter(max_lookahead, &cont, true);
    3785        4467 :   masm->CheckBitInTable(boolean_skip_table, &cont);
    3786        4467 :   masm->AdvanceCurrentPosition(skip_distance);
    3787        4467 :   masm->GoTo(&again);
    3788        4467 :   masm->Bind(&cont);
    3789             : }
    3790             : 
    3791             : 
    3792             : /* Code generation for choice nodes.
    3793             :  *
    3794             :  * We generate quick checks that do a mask and compare to eliminate a
    3795             :  * choice.  If the quick check succeeds then it jumps to the continuation to
    3796             :  * do slow checks and check subsequent nodes.  If it fails (the common case)
    3797             :  * it falls through to the next choice.
    3798             :  *
    3799             :  * Here is the desired flow graph.  Nodes directly below each other imply
    3800             :  * fallthrough.  Alternatives 1 and 2 have quick checks.  Alternative
    3801             :  * 3 doesn't have a quick check so we have to call the slow check.
    3802             :  * Nodes are marked Qn for quick checks and Sn for slow checks.  The entire
    3803             :  * regexp continuation is generated directly after the Sn node, up to the
    3804             :  * next GoTo if we decide to reuse some already generated code.  Some
    3805             :  * nodes expect preload_characters to be preloaded into the current
    3806             :  * character register.  R nodes do this preloading.  Vertices are marked
    3807             :  * F for failures and S for success (possible success in the case of quick
    3808             :  * nodes).  L, V, < and > are used as arrow heads.
    3809             :  *
    3810             :  * ----------> R
    3811             :  *             |
    3812             :  *             V
    3813             :  *            Q1 -----> S1
    3814             :  *             |   S   /
    3815             :  *            F|      /
    3816             :  *             |    F/
    3817             :  *             |    /
    3818             :  *             |   R
    3819             :  *             |  /
    3820             :  *             V L
    3821             :  *            Q2 -----> S2
    3822             :  *             |   S   /
    3823             :  *            F|      /
    3824             :  *             |    F/
    3825             :  *             |    /
    3826             :  *             |   R
    3827             :  *             |  /
    3828             :  *             V L
    3829             :  *            S3
    3830             :  *             |
    3831             :  *            F|
    3832             :  *             |
    3833             :  *             R
    3834             :  *             |
    3835             :  * backtrack   V
    3836             :  * <----------Q4
    3837             :  *   \    F    |
    3838             :  *    \        |S
    3839             :  *     \   F   V
    3840             :  *      \-----S4
    3841             :  *
    3842             :  * For greedy loops we push the current position, then generate the code that
    3843             :  * eats the input specially in EmitGreedyLoop.  The other choice (the
    3844             :  * continuation) is generated by the normal code in EmitChoices, and steps back
    3845             :  * in the input to the starting position when it fails to match.  The loop code
    3846             :  * looks like this (U is the unwind code that steps back in the greedy loop).
    3847             :  *
    3848             :  *              _____
    3849             :  *             /     \
    3850             :  *             V     |
    3851             :  * ----------> S1    |
    3852             :  *            /|     |
    3853             :  *           / |S    |
    3854             :  *         F/  \_____/
    3855             :  *         /
    3856             :  *        |<-----
    3857             :  *        |      \
    3858             :  *        V       |S
    3859             :  *        Q2 ---> U----->backtrack
    3860             :  *        |  F   /
    3861             :  *       S|     /
    3862             :  *        V  F /
    3863             :  *        S2--/
    3864             :  */
    3865             : 
    3866      213730 : GreedyLoopState::GreedyLoopState(bool not_at_start) {
    3867      213730 :   counter_backtrack_trace_.set_backtrack(&label_);
    3868      213730 :   if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
    3869      213730 : }
    3870             : 
    3871             : 
    3872           0 : void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
    3873             : #ifdef DEBUG
    3874             :   int choice_count = alternatives_->length();
    3875             :   for (int i = 0; i < choice_count - 1; i++) {
    3876             :     GuardedAlternative alternative = alternatives_->at(i);
    3877             :     ZoneList<Guard*>* guards = alternative.guards();
    3878             :     int guard_count = (guards == nullptr) ? 0 : guards->length();
    3879             :     for (int j = 0; j < guard_count; j++) {
    3880             :       DCHECK(!trace->mentions_reg(guards->at(j)->reg()));
    3881             :     }
    3882             :   }
    3883             : #endif
    3884           0 : }
    3885             : 
    3886             : 
    3887      213730 : void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
    3888             :                               Trace* current_trace,
    3889             :                               PreloadState* state) {
    3890      213730 :     if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
    3891             :       // Save some time by looking at most one machine word ahead.
    3892             :       state->eats_at_least_ =
    3893      131516 :           EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,
    3894      263032 :                       current_trace->at_start() == Trace::FALSE_VALUE);
    3895             :     }
    3896             :     state->preload_characters_ =
    3897      213730 :         CalculatePreloadCharacters(compiler, state->eats_at_least_);
    3898             : 
    3899             :     state->preload_is_current_ =
    3900      213730 :         (current_trace->characters_preloaded() == state->preload_characters_);
    3901      213730 :     state->preload_has_checked_bounds_ = state->preload_is_current_;
    3902      213730 : }
    3903             : 
    3904             : 
    3905      584273 : void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
    3906      584273 :   int choice_count = alternatives_->length();
    3907             : 
    3908      584273 :   if (choice_count == 1 && alternatives_->at(0).guards() == nullptr) {
    3909        1358 :     alternatives_->at(0).node()->Emit(compiler, trace);
    3910        1358 :     return;
    3911             :   }
    3912             : 
    3913             :   AssertGuardsMentionRegisters(trace);
    3914             : 
    3915      582915 :   LimitResult limit_result = LimitVersions(compiler, trace);
    3916      582915 :   if (limit_result == DONE) return;
    3917             :   DCHECK(limit_result == CONTINUE);
    3918             : 
    3919             :   // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
    3920             :   // other choice nodes we only flush if we are out of code size budget.
    3921      215190 :   if (trace->flush_budget() == 0 && trace->actions() != nullptr) {
    3922        1460 :     trace->Flush(compiler, this);
    3923        1460 :     return;
    3924             :   }
    3925             : 
    3926             :   RecursionCheck rc(compiler);
    3927             : 
    3928             :   PreloadState preload;
    3929             :   preload.init();
    3930      213730 :   GreedyLoopState greedy_loop_state(not_at_start());
    3931             : 
    3932      213730 :   int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
    3933      427460 :   AlternativeGenerationList alt_gens(choice_count, zone());
    3934             : 
    3935      213730 :   if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
    3936             :     trace = EmitGreedyLoop(compiler,
    3937             :                            trace,
    3938             :                            &alt_gens,
    3939             :                            &preload,
    3940             :                            &greedy_loop_state,
    3941       11693 :                            text_length);
    3942             :   } else {
    3943             :     // TODO(erikcorry): Delete this.  We don't need this label, but it makes us
    3944             :     // match the traces produced pre-cleanup.
    3945      202037 :     Label second_choice;
    3946      202037 :     compiler->macro_assembler()->Bind(&second_choice);
    3947             : 
    3948      202037 :     preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
    3949             : 
    3950             :     EmitChoices(compiler,
    3951             :                 &alt_gens,
    3952             :                 0,
    3953             :                 trace,
    3954      202037 :                 &preload);
    3955             :   }
    3956             : 
    3957             :   // At this point we need to generate slow checks for the alternatives where
    3958             :   // the quick check was inlined.  We can recognize these because the associated
    3959             :   // label was bound.
    3960      213730 :   int new_flush_budget = trace->flush_budget() / choice_count;
    3961     1449498 :   for (int i = 0; i < choice_count; i++) {
    3962             :     AlternativeGeneration* alt_gen = alt_gens.at(i);
    3963      617884 :     Trace new_trace(*trace);
    3964             :     // If there are actions to be flushed we have to limit how many times
    3965             :     // they are flushed.  Take the budget of the parent trace and distribute
    3966             :     // it fairly amongst the children.
    3967      617884 :     if (new_trace.actions() != nullptr) {
    3968             :       new_trace.set_flush_budget(new_flush_budget);
    3969             :     }
    3970             :     bool next_expects_preload =
    3971     1022038 :         i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
    3972      617884 :     EmitOutOfLineContinuation(compiler,
    3973             :                               &new_trace,
    3974      617884 :                               alternatives_->at(i),
    3975             :                               alt_gen,
    3976             :                               preload.preload_characters_,
    3977      617884 :                               next_expects_preload);
    3978             :   }
    3979             : }
    3980             : 
    3981             : 
    3982       11693 : Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
    3983             :                                   Trace* trace,
    3984             :                                   AlternativeGenerationList* alt_gens,
    3985             :                                   PreloadState* preload,
    3986             :                                   GreedyLoopState* greedy_loop_state,
    3987             :                                   int text_length) {
    3988             :   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    3989             :   // Here we have special handling for greedy loops containing only text nodes
    3990             :   // and other simple nodes.  These are handled by pushing the current
    3991             :   // position on the stack and then incrementing the current position each
    3992             :   // time around the switch.  On backtrack we decrement the current position
    3993             :   // and check it against the pushed value.  This avoids pushing backtrack
    3994             :   // information for each iteration of the loop, which could take up a lot of
    3995             :   // space.
    3996             :   DCHECK(trace->stop_node() == nullptr);
    3997       11693 :   macro_assembler->PushCurrentPosition();
    3998       11693 :   Label greedy_match_failed;
    3999             :   Trace greedy_match_trace;
    4000       11693 :   if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
    4001             :   greedy_match_trace.set_backtrack(&greedy_match_failed);
    4002       11693 :   Label loop_label;
    4003       11693 :   macro_assembler->Bind(&loop_label);
    4004       11693 :   greedy_match_trace.set_stop_node(this);
    4005             :   greedy_match_trace.set_loop_label(&loop_label);
    4006       11693 :   alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
    4007       11693 :   macro_assembler->Bind(&greedy_match_failed);
    4008             : 
    4009       11693 :   Label second_choice;  // For use in greedy matches.
    4010       11693 :   macro_assembler->Bind(&second_choice);
    4011             : 
    4012             :   Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
    4013             : 
    4014             :   EmitChoices(compiler,
    4015             :               alt_gens,
    4016             :               1,
    4017             :               new_trace,
    4018       11693 :               preload);
    4019             : 
    4020       23386 :   macro_assembler->Bind(greedy_loop_state->label());
    4021             :   // If we have unwound to the bottom then backtrack.
    4022       11693 :   macro_assembler->CheckGreedyLoop(trace->backtrack());
    4023             :   // Otherwise try the second priority at an earlier position.
    4024       11693 :   macro_assembler->AdvanceCurrentPosition(-text_length);
    4025       11693 :   macro_assembler->GoTo(&second_choice);
    4026       11693 :   return new_trace;
    4027             : }
    4028             : 
    4029      202037 : int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
    4030             :                                               Trace* trace) {
    4031             :   int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
    4032      404074 :   if (alternatives_->length() != 2) return eats_at_least;
    4033             : 
    4034      165896 :   GuardedAlternative alt1 = alternatives_->at(1);
    4035      167316 :   if (alt1.guards() != nullptr && alt1.guards()->length() != 0) {
    4036             :     return eats_at_least;
    4037             :   }
    4038             :   RegExpNode* eats_anything_node = alt1.node();
    4039      164476 :   if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
    4040             :     return eats_at_least;
    4041             :   }
    4042             : 
    4043             :   // Really we should be creating a new trace when we execute this function,
    4044             :   // but there is no need, because the code it generates cannot backtrack, and
    4045             :   // we always arrive here with a trivial trace (since it's the entry to a
    4046             :   // loop.  That also implies that there are no preloaded characters, which is
    4047             :   // good, because it means we won't be violating any assumptions by
    4048             :   // overwriting those characters with new load instructions.
    4049             :   DCHECK(trace->is_trivial());
    4050             : 
    4051             :   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    4052             :   Isolate* isolate = macro_assembler->isolate();
    4053             :   // At this point we know that we are at a non-greedy loop that will eat
    4054             :   // any character one at a time.  Any non-anchored regexp has such a
    4055             :   // loop prepended to it in order to find where it starts.  We look for
    4056             :   // a pattern of the form ...abc... where we can look 6 characters ahead
    4057             :   // and step forwards 3 if the character is not one of abc.  Abc need
    4058             :   // not be atoms, they can be any reasonably limited character class or
    4059             :   // small alternation.
    4060             :   BoyerMooreLookahead* bm = bm_info(false);
    4061       82214 :   if (bm == nullptr) {
    4062       82214 :     eats_at_least = Min(kMaxLookaheadForBoyerMoore,
    4063             :                         EatsAtLeast(kMaxLookaheadForBoyerMoore,
    4064             :                                     kRecursionBudget,
    4065       82214 :                                     false));
    4066       82214 :     if (eats_at_least >= 1) {
    4067             :       bm = new(zone()) BoyerMooreLookahead(eats_at_least,
    4068             :                                            compiler,
    4069       80691 :                                            zone());
    4070       80691 :       GuardedAlternative alt0 = alternatives_->at(0);
    4071       80691 :       alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false);
    4072             :     }
    4073             :   }
    4074       82214 :   if (bm != nullptr) {
    4075       80691 :     bm->EmitSkipInstructions(macro_assembler);
    4076             :   }
    4077             :   return eats_at_least;
    4078             : }
    4079             : 
    4080             : 
    4081      213730 : void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
    4082             :                              AlternativeGenerationList* alt_gens,
    4083             :                              int first_choice,
    4084             :                              Trace* trace,
    4085             :                              PreloadState* preload) {
    4086             :   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    4087      213730 :   SetUpPreLoad(compiler, trace, preload);
    4088             : 
    4089             :   // For now we just call all choices one after the other.  The idea ultimately
    4090             :   // is to use the Dispatch table to try only the relevant ones.
    4091      213730 :   int choice_count = alternatives_->length();
    4092             : 
    4093      213730 :   int new_flush_budget = trace->flush_budget() / choice_count;
    4094             : 
    4095     1426112 :   for (int i = first_choice; i < choice_count; i++) {
    4096      606191 :     bool is_last = i == choice_count - 1;
    4097      606191 :     bool fall_through_on_failure = !is_last;
    4098     1212382 :     GuardedAlternative alternative = alternatives_->at(i);
    4099             :     AlternativeGeneration* alt_gen = alt_gens->at(i);
    4100      606191 :     alt_gen->quick_check_details.set_characters(preload->preload_characters_);
    4101             :     ZoneList<Guard*>* guards = alternative.guards();
    4102      606191 :     int guard_count = (guards == nullptr) ? 0 : guards->length();
    4103      606191 :     Trace new_trace(*trace);
    4104      606191 :     new_trace.set_characters_preloaded(preload->preload_is_current_ ?
    4105             :                                          preload->preload_characters_ :
    4106             :                                          0);
    4107      606191 :     if (preload->preload_has_checked_bounds_) {
    4108      400688 :       new_trace.set_bound_checked_up_to(preload->preload_characters_);
    4109             :     }
    4110             :     new_trace.quick_check_performed()->Clear();
    4111      606191 :     if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
    4112      606191 :     if (!is_last) {
    4113      392461 :       new_trace.set_backtrack(&alt_gen->after);
    4114             :     }
    4115      606191 :     alt_gen->expects_preload = preload->preload_is_current_;
    4116             :     bool generate_full_check_inline = false;
    4117     1085771 :     if (compiler->optimize() &&
    4118     1082969 :         try_to_emit_quick_check_for_alternative(i == 0) &&
    4119      953556 :         alternative.node()->EmitQuickCheck(
    4120      476778 :             compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
    4121             :             &alt_gen->possible_success, &alt_gen->quick_check_details,
    4122             :             fall_through_on_failure)) {
    4123             :       // Quick check was generated for this choice.
    4124      226926 :       preload->preload_is_current_ = true;
    4125      226926 :       preload->preload_has_checked_bounds_ = true;
    4126             :       // If we generated the quick check to fall through on possible success,
    4127             :       // we now need to generate the full check inline.
    4128      226926 :       if (!fall_through_on_failure) {
    4129       34428 :         macro_assembler->Bind(&alt_gen->possible_success);
    4130             :         new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
    4131       34428 :         new_trace.set_characters_preloaded(preload->preload_characters_);
    4132             :         new_trace.set_bound_checked_up_to(preload->preload_characters_);
    4133             :         generate_full_check_inline = true;
    4134             :       }
    4135      379265 :     } else if (alt_gen->quick_check_details.cannot_match()) {
    4136         110 :       if (!fall_through_on_failure) {
    4137          22 :         macro_assembler->GoTo(trace->backtrack());
    4138             :       }
    4139         110 :       continue;
    4140             :     } else {
    4141             :       // No quick check was generated.  Put the full code here.
    4142             :       // If this is not the first choice then there could be slow checks from
    4143             :       // previous cases that go here when they fail.  There's no reason to
    4144             :       // insist that they preload characters since the slow check we are about
    4145             :       // to generate probably can't use it.
    4146      379155 :       if (i != first_choice) {
    4147      227954 :         alt_gen->expects_preload = false;
    4148             :         new_trace.InvalidateCurrentCharacter();
    4149             :       }
    4150             :       generate_full_check_inline = true;
    4151             :     }
    4152      606081 :     if (generate_full_check_inline) {
    4153      413583 :       if (new_trace.actions() != nullptr) {
    4154             :         new_trace.set_flush_budget(new_flush_budget);
    4155             :       }
    4156      418535 :       for (int j = 0; j < guard_count; j++) {
    4157        2476 :         GenerateGuard(macro_assembler, guards->at(j), &new_trace);
    4158             :       }
    4159      413583 :       alternative.node()->Emit(compiler, &new_trace);
    4160      413583 :       preload->preload_is_current_ = false;
    4161             :     }
    4162      606081 :     macro_assembler->Bind(&alt_gen->after);
    4163             :   }
    4164      213730 : }
    4165             : 
    4166             : 
    4167      617884 : void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
    4168             :                                            Trace* trace,
    4169             :                                            GuardedAlternative alternative,
    4170             :                                            AlternativeGeneration* alt_gen,
    4171             :                                            int preload_characters,
    4172             :                                            bool next_expects_preload) {
    4173     1043270 :   if (!alt_gen->possible_success.is_linked()) return;
    4174             : 
    4175             :   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    4176      192498 :   macro_assembler->Bind(&alt_gen->possible_success);
    4177      192498 :   Trace out_of_line_trace(*trace);
    4178             :   out_of_line_trace.set_characters_preloaded(preload_characters);
    4179             :   out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
    4180      192498 :   if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
    4181             :   ZoneList<Guard*>* guards = alternative.guards();
    4182      192498 :   int guard_count = (guards == nullptr) ? 0 : guards->length();
    4183      192498 :   if (next_expects_preload) {
    4184      162871 :     Label reload_current_char;
    4185             :     out_of_line_trace.set_backtrack(&reload_current_char);
    4186      165651 :     for (int j = 0; j < guard_count; j++) {
    4187        1390 :       GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
    4188             :     }
    4189      162871 :     alternative.node()->Emit(compiler, &out_of_line_trace);
    4190      162871 :     macro_assembler->Bind(&reload_current_char);
    4191             :     // Reload the current character, since the next quick check expects that.
    4192             :     // We don't need to check bounds here because we only get into this
    4193             :     // code through a quick check which already did the checked load.
    4194             :     macro_assembler->LoadCurrentCharacter(trace->cp_offset(), nullptr, false,
    4195      162871 :                                           preload_characters);
    4196      162871 :     macro_assembler->GoTo(&(alt_gen->after));
    4197             :   } else {
    4198       29627 :     out_of_line_trace.set_backtrack(&(alt_gen->after));
    4199       29779 :     for (int j = 0; j < guard_count; j++) {
    4200          76 :       GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
    4201             :     }
    4202       29627 :     alternative.node()->Emit(compiler, &out_of_line_trace);
    4203             :   }
    4204             : }
    4205             : 
    4206             : 
    4207      495617 : void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
    4208             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    4209      495617 :   LimitResult limit_result = LimitVersions(compiler, trace);
    4210      495617 :   if (limit_result == DONE) return;
    4211             :   DCHECK(limit_result == CONTINUE);
    4212             : 
    4213             :   RecursionCheck rc(compiler);
    4214             : 
    4215      266244 :   switch (action_type_) {
    4216             :     case STORE_POSITION: {
    4217             :       Trace::DeferredCapture
    4218             :           new_capture(data_.u_position_register.reg,
    4219      241749 :                       data_.u_position_register.is_capture,
    4220      241749 :                       trace);
    4221      241749 :       Trace new_trace = *trace;
    4222             :       new_trace.add_action(&new_capture);
    4223      241749 :       on_success()->Emit(compiler, &new_trace);
    4224             :       break;
    4225             :     }
    4226             :     case INCREMENT_REGISTER: {
    4227             :       Trace::DeferredIncrementRegister
    4228        3750 :           new_increment(data_.u_increment_register.reg);
    4229        3750 :       Trace new_trace = *trace;
    4230             :       new_trace.add_action(&new_increment);
    4231        3750 :       on_success()->Emit(compiler, &new_trace);
    4232             :       break;
    4233             :     }
    4234             :     case SET_REGISTER: {
    4235             :       Trace::DeferredSetRegister
    4236        3471 :           new_set(data_.u_store_register.reg, data_.u_store_register.value);
    4237        3471 :       Trace new_trace = *trace;
    4238             :       new_trace.add_action(&new_set);
    4239        3471 :       on_success()->Emit(compiler, &new_trace);
    4240             :       break;
    4241             :     }
    4242             :     case CLEAR_CAPTURES: {
    4243             :       Trace::DeferredClearCaptures
    4244             :         new_capture(Interval(data_.u_clear_captures.range_from,
    4245        2232 :                              data_.u_clear_captures.range_to));
    4246        2232 :       Trace new_trace = *trace;
    4247             :       new_trace.add_action(&new_capture);
    4248        2232 :       on_success()->Emit(compiler, &new_trace);
    4249             :       break;
    4250             :     }
    4251             :     case BEGIN_SUBMATCH:
    4252        9483 :       if (!trace->is_trivial()) {
    4253        5046 :         trace->Flush(compiler, this);
    4254             :       } else {
    4255        4437 :         assembler->WriteCurrentPositionToRegister(
    4256        8874 :             data_.u_submatch.current_position_register, 0);
    4257        4437 :         assembler->WriteStackPointerToRegister(
    4258        8874 :             data_.u_submatch.stack_pointer_register);
    4259        4437 :         on_success()->Emit(compiler, trace);
    4260             :       }
    4261             :       break;
    4262             :     case EMPTY_MATCH_CHECK: {
    4263         973 :       int start_pos_reg = data_.u_empty_match_check.start_register;
    4264         973 :       int stored_pos = 0;
    4265         973 :       int rep_reg = data_.u_empty_match_check.repetition_register;
    4266             :       bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
    4267         973 :       bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
    4268         973 :       if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
    4269             :         // If we know we haven't advanced and there is no minimum we
    4270             :         // can just backtrack immediately.
    4271          76 :         assembler->GoTo(trace->backtrack());
    4272         897 :       } else if (know_dist && stored_pos < trace->cp_offset()) {
    4273             :         // If we know we've advanced we can generate the continuation
    4274             :         // immediately.
    4275         247 :         on_success()->Emit(compiler, trace);
    4276         650 :       } else if (!trace->is_trivial()) {
    4277         339 :         trace->Flush(compiler, this);
    4278             :       } else {
    4279         311 :         Label skip_empty_check;
    4280             :         // If we have a minimum number of repetitions we check the current
    4281             :         // number first and skip the empty check if it's not enough.
    4282         311 :         if (has_minimum) {
    4283         206 :           int limit = data_.u_empty_match_check.repetition_limit;
    4284         206 :           assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
    4285             :         }
    4286             :         // If the match is empty we bail out, otherwise we fall through
    4287             :         // to the on-success continuation.
    4288         311 :         assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
    4289         622 :                                    trace->backtrack());
    4290         311 :         assembler->Bind(&skip_empty_check);
    4291         311 :         on_success()->Emit(compiler, trace);
    4292             :       }
    4293             :       break;
    4294             :     }
    4295             :     case POSITIVE_SUBMATCH_SUCCESS: {
    4296        4586 :       if (!trace->is_trivial()) {
    4297        2956 :         trace->Flush(compiler, this);
    4298        2956 :         return;
    4299             :       }
    4300        1630 :       assembler->ReadCurrentPositionFromRegister(
    4301        3260 :           data_.u_submatch.current_position_register);
    4302        1630 :       assembler->ReadStackPointerFromRegister(
    4303        3260 :           data_.u_submatch.stack_pointer_register);
    4304        1630 :       int clear_register_count = data_.u_submatch.clear_register_count;
    4305        1630 :       if (clear_register_count == 0) {
    4306        1147 :         on_success()->Emit(compiler, trace);
    4307        1147 :         return;
    4308             :       }
    4309         483 :       int clear_registers_from = data_.u_submatch.clear_register_from;
    4310         483 :       Label clear_registers_backtrack;
    4311         483 :       Trace new_trace = *trace;
    4312             :       new_trace.set_backtrack(&clear_registers_backtrack);
    4313         483 :       on_success()->Emit(compiler, &new_trace);
    4314             : 
    4315         483 :       assembler->Bind(&clear_registers_backtrack);
    4316         483 :       int clear_registers_to = clear_registers_from + clear_register_count - 1;
    4317         483 :       assembler->ClearRegisters(clear_registers_from, clear_registers_to);
    4318             : 
    4319             :       DCHECK(trace->backtrack() == nullptr);
    4320         483 :       assembler->Backtrack();
    4321         483 :       return;
    4322             :     }
    4323             :     default:
    4324           0 :       UNREACHABLE();
    4325             :   }
    4326             : }
    4327             : 
    4328             : 
    4329        4745 : void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
    4330             :   RegExpMacroAssembler* assembler = compiler->macro_assembler();
    4331        4745 :   if (!trace->is_trivial()) {
    4332        2254 :     trace->Flush(compiler, this);
    4333        2254 :     return;
    4334             :   }
    4335             : 
    4336        2491 :   LimitResult limit_result = LimitVersions(compiler, trace);
    4337        2491 :   if (limit_result == DONE) return;
    4338             :   DCHECK(limit_result == CONTINUE);
    4339             : 
    4340             :   RecursionCheck rc(compiler);
    4341             : 
    4342             :   DCHECK_EQ(start_reg_ + 1, end_reg_);
    4343        2291 :   if (IgnoreCase(flags_)) {
    4344        1682 :     assembler->CheckNotBackReferenceIgnoreCase(
    4345        3364 :         start_reg_, read_backward(), IsUnicode(flags_), trace->backtrack());
    4346             :   } else {
    4347         609 :     assembler->CheckNotBackReference(start_reg_, read_backward(),
    4348        1218 :                                      trace->backtrack());
    4349             :   }
    4350             :   // We are going to advance backward, so we may end up at the start.
    4351        2291 :   if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
    4352             : 
    4353             :   // Check that the back reference does not end inside a surrogate pair.
    4354        2291 :   if (IsUnicode(flags_) && !compiler->one_byte()) {
    4355          80 :     assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
    4356             :   }
    4357        2291 :   on_success()->Emit(compiler, trace);
    4358             : }
    4359             : 
    4360             : 
    4361             : // -------------------------------------------------------------------
    4362             : // Dot/dotty output
    4363             : 
    4364             : 
    4365             : #ifdef DEBUG
    4366             : 
    4367             : 
    4368             : class DotPrinter: public NodeVisitor {
    4369             :  public:
    4370             :   DotPrinter(std::ostream& os, bool ignore_case)  // NOLINT
    4371             :       : os_(os),
    4372             :         ignore_case_(ignore_case) {}
    4373             :   void PrintNode(const char* label, RegExpNode* node);
    4374             :   void Visit(RegExpNode* node);
    4375             :   void PrintAttributes(RegExpNode* from);
    4376             :   void PrintOnFailure(RegExpNode* from, RegExpNode* to);
    4377             : #define DECLARE_VISIT(Type)                                          \
    4378             :   virtual void Visit##Type(Type##Node* that);
    4379             : FOR_EACH_NODE_TYPE(DECLARE_VISIT)
    4380             : #undef DECLARE_VISIT
    4381             :  private:
    4382             :   std::ostream& os_;
    4383             :   bool ignore_case_;
    4384             : };
    4385             : 
    4386             : 
    4387             : void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
    4388             :   os_ << "digraph G {\n  graph [label=\"";
    4389             :   for (int i = 0; label[i]; i++) {
    4390             :     switch (label[i]) {
    4391             :       case '\\':
    4392             :         os_ << "\\\\";
    4393             :         break;
    4394             :       case '"':
    4395             :         os_ << "\"";
    4396             :         break;
    4397             :       default:
    4398             :         os_ << label[i];
    4399             :         break;
    4400             :     }
    4401             :   }
    4402             :   os_ << "\"];\n";
    4403             :   Visit(node);
    4404             :   os_ << "}" << std::endl;
    4405             : }
    4406             : 
    4407             : 
    4408             : void DotPrinter::Visit(RegExpNode* node) {
    4409             :   if (node->info()->visited) return;
    4410             :   node->info()->visited = true;
    4411             :   node->Accept(this);
    4412             : }
    4413             : 
    4414             : 
    4415             : void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
    4416             :   os_ << "  n" << from << " -> n" << on_failure << " [style=dotted];\n";
    4417             :   Visit(on_failure);
    4418             : }
    4419             : 
    4420             : 
    4421             : class TableEntryBodyPrinter {
    4422             :  public:
    4423             :   TableEntryBodyPrinter(std::ostream& os, ChoiceNode* choice)  // NOLINT
    4424             :       : os_(os),
    4425             :         choice_(choice) {}
    4426             :   void Call(uc16 from, DispatchTable::Entry entry) {
    4427             :     OutSet* out_set = entry.out_set();
    4428             :     for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
    4429             :       if (out_set->Get(i)) {
    4430             :         os_ << "    n" << choice() << ":s" << from << "o" << i << " -> n"
    4431             :             << choice()->alternatives()->at(i).node() << ";\n";
    4432             :       }
    4433             :     }
    4434             :   }
    4435             :  private:
    4436             :   ChoiceNode* choice() { return choice_; }
    4437             :   std::ostream& os_;
    4438             :   ChoiceNode* choice_;
    4439             : };
    4440             : 
    4441             : 
    4442             : class TableEntryHeaderPrinter {
    4443             :  public:
    4444             :   explicit TableEntryHeaderPrinter(std::ostream& os)  // NOLINT
    4445             :       : first_(true),
    4446             :         os_(os) {}
    4447             :   void Call(uc16 from, DispatchTable::Entry entry) {
    4448             :     if (first_) {
    4449             :       first_ = false;
    4450             :     } else {
    4451             :       os_ << "|";
    4452             :     }
    4453             :     os_ << "{\\" << AsUC16(from) << "-\\" << AsUC16(entry.to()) << "|{";
    4454             :     OutSet* out_set = entry.out_set();
    4455             :     int priority = 0;
    4456             :     for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
    4457             :       if (out_set->Get(i)) {
    4458             :         if (priority > 0) os_ << "|";
    4459             :         os_ << "<s" << from << "o" << i << "> " << priority;
    4460             :         priority++;
    4461             :       }
    4462             :     }
    4463             :     os_ << "}}";
    4464             :   }
    4465             : 
    4466             :  private:
    4467             :   bool first_;
    4468             :   std::ostream& os_;
    4469             : };
    4470             : 
    4471             : 
    4472             : class AttributePrinter {
    4473             :  public:
    4474             :   explicit AttributePrinter(std::ostream& os)  // NOLINT
    4475             :       : os_(os),
    4476             :         first_(true) {}
    4477             :   void PrintSeparator() {
    4478             :     if (first_) {
    4479             :       first_ = false;
    4480             :     } else {
    4481             :       os_ << "|";
    4482             :     }
    4483             :   }
    4484             :   void PrintBit(const char* name, bool value) {
    4485             :     if (!value) return;
    4486             :     PrintSeparator();
    4487             :     os_ << "{" << name << "}";
    4488             :   }
    4489             :   void PrintPositive(const char* name, int value) {
    4490             :     if (value < 0) return;
    4491             :     PrintSeparator();
    4492             :     os_ << "{" << name << "|" << value << "}";
    4493             :   }
    4494             : 
    4495             :  private:
    4496             :   std::ostream& os_;
    4497             :   bool first_;
    4498             : };
    4499             : 
    4500             : 
    4501             : void DotPrinter::PrintAttributes(RegExpNode* that) {
    4502             :   os_ << "  a" << that << " [shape=Mrecord, color=grey, fontcolor=grey, "
    4503             :       << "margin=0.1, fontsize=10, label=\"{";
    4504             :   AttributePrinter printer(os_);
    4505             :   NodeInfo* info = that->info();
    4506             :   printer.PrintBit("NI", info->follows_newline_interest);
    4507             :   printer.PrintBit("WI", info->follows_word_interest);
    4508             :   printer.PrintBit("SI", info->follows_start_interest);
    4509             :   Label* label = that->label();
    4510             :   if (label->is_bound())
    4511             :     printer.PrintPositive("@", label->pos());
    4512             :   os_ << "}\"];\n"
    4513             :       << "  a" << that << " -> n" << that
    4514             :       << " [style=dashed, color=grey, arrowhead=none];\n";
    4515             : }
    4516             : 
    4517             : 
    4518             : static const bool kPrintDispatchTable = false;
    4519             : void DotPrinter::VisitChoice(ChoiceNode* that) {
    4520             :   if (kPrintDispatchTable) {
    4521             :     os_ << "  n" << that << " [shape=Mrecord, label=\"";
    4522             :     TableEntryHeaderPrinter header_printer(os_);
    4523             :     that->GetTable(ignore_case_)->ForEach(&header_printer);
    4524             :     os_ << "\"]\n";
    4525             :     PrintAttributes(that);
    4526             :     TableEntryBodyPrinter body_printer(os_, that);
    4527             :     that->GetTable(ignore_case_)->ForEach(&body_printer);
    4528             :   } else {
    4529             :     os_ << "  n" << that << " [shape=Mrecord, label=\"?\"];\n";
    4530             :     for (int i = 0; i < that->alternatives()->length(); i++) {
    4531             :       GuardedAlternative alt = that->alternatives()->at(i);
    4532             :       os_ << "  n" << that << " -> n" << alt.node();
    4533             :     }
    4534             :   }
    4535             :   for (int i = 0; i < that->alternatives()->length(); i++) {
    4536             :     GuardedAlternative alt = that->alternatives()->at(i);
    4537             :     alt.node()->Accept(this);
    4538             :   }
    4539             : }
    4540             : 
    4541             : 
    4542             : void DotPrinter::VisitText(TextNode* that) {
    4543             :   Zone* zone = that->zone();
    4544             :   os_ << "  n" << that << " [label=\"";
    4545             :   for (int i = 0; i < that->elements()->length(); i++) {
    4546             :     if (i > 0) os_ << " ";
    4547             :     TextElement elm = that->elements()->at(i);
    4548             :     switch (elm.text_type()) {
    4549             :       case TextElement::ATOM: {
    4550             :         Vector<const uc16> data = elm.atom()->data();
    4551             :         for (int i = 0; i < data.length(); i++) {
    4552             :           os_ << static_cast<char>(data[i]);
    4553             :         }
    4554             :         break;
    4555             :       }
    4556             :       case TextElement::CHAR_CLASS: {
    4557             :         RegExpCharacterClass* node = elm.char_class();
    4558             :         os_ << "[";
    4559             :         if (node->is_negated()) os_ << "^";
    4560             :         for (int j = 0; j < node->ranges(zone)->length(); j++) {
    4561             :           CharacterRange range = node->ranges(zone)->at(j);
    4562             :           os_ << AsUC16(range.from()) << "-" << AsUC16(range.to());
    4563             :         }
    4564             :         os_ << "]";
    4565             :         break;
    4566             :       }
    4567             :       default:
    4568             :         UNREACHABLE();
    4569             :     }
    4570             :   }
    4571             :   os_ << "\", shape=box, peripheries=2];\n";
    4572             :   PrintAttributes(that);
    4573             :   os_ << "  n" << that << " -> n" << that->on_success() << ";\n";
    4574             :   Visit(that->on_success());
    4575             : }
    4576             : 
    4577             : 
    4578             : void DotPrinter::VisitBackReference(BackReferenceNode* that) {
    4579             :   os_ << "  n" << that << " [label=\"$" << that->start_register() << "..$"
    4580             :       << that->end_register() << "\", shape=doubleoctagon];\n";
    4581             :   PrintAttributes(that);
    4582             :   os_ << "  n" << that << " -> n" << that->on_success() << ";\n";
    4583             :   Visit(that->on_success());
    4584             : }
    4585             : 
    4586             : 
    4587             : void DotPrinter::VisitEnd(EndNode* that) {
    4588             :   os_ << "  n" << that << " [style=bold, shape=point];\n";
    4589             :   PrintAttributes(that);
    4590             : }
    4591             : 
    4592             : 
    4593             : void DotPrinter::VisitAssertion(AssertionNode* that) {
    4594             :   os_ << "  n" << that << " [";
    4595             :   switch (that->assertion_type()) {
    4596             :     case AssertionNode::AT_END:
    4597             :       os_ << "label=\"$\", shape=septagon";
    4598             :       break;
    4599             :     case AssertionNode::AT_START:
    4600             :       os_ << "label=\"^\", shape=septagon";
    4601             :       break;
    4602             :     case AssertionNode::AT_BOUNDARY:
    4603             :       os_ << "label=\"\\b\", shape=septagon";
    4604             :       break;
    4605             :     case AssertionNode::AT_NON_BOUNDARY:
    4606             :       os_ << "label=\"\\B\", shape=septagon";
    4607             :       break;
    4608             :     case AssertionNode::AFTER_NEWLINE:
    4609             :       os_ << "label=\"(?<=\\n)\", shape=septagon";
    4610             :       break;
    4611             :   }
    4612             :   os_ << "];\n";
    4613             :   PrintAttributes(that);
    4614             :   RegExpNode* successor = that->on_success();
    4615             :   os_ << "  n" << that << " -> n" << successor << ";\n";
    4616             :   Visit(successor);
    4617             : }
    4618             : 
    4619             : 
    4620             : void DotPrinter::VisitAction(ActionNode* that) {
    4621             :   os_ << "  n" << that << " [";
    4622             :   switch (that->action_type_) {
    4623             :     case ActionNode::SET_REGISTER:
    4624             :       os_ << "label=\"$" << that->data_.u_store_register.reg
    4625             :           << ":=" << that->data_.u_store_register.value << "\", shape=octagon";
    4626             :       break;
    4627             :     case ActionNode::INCREMENT_REGISTER:
    4628             :       os_ << "label=\"$" << that->data_.u_increment_register.reg
    4629             :           << "++\", shape=octagon";
    4630             :       break;
    4631             :     case ActionNode::STORE_POSITION:
    4632             :       os_ << "label=\"$" << that->data_.u_position_register.reg
    4633             :           << ":=$pos\", shape=octagon";
    4634             :       break;
    4635             :     case ActionNode::BEGIN_SUBMATCH:
    4636             :       os_ << "label=\"$" << that->data_.u_submatch.current_position_register
    4637             :           << ":=$pos,begin\", shape=septagon";
    4638             :       break;
    4639             :     case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
    4640             :       os_ << "label=\"escape\", shape=septagon";
    4641             :       break;
    4642             :     case ActionNode::EMPTY_MATCH_CHECK:
    4643             :       os_ << "label=\"$" << that->data_.u_empty_match_check.start_register
    4644             :           << "=$pos?,$" << that->data_.u_empty_match_check.repetition_register
    4645             :           << "<" << that->data_.u_empty_match_check.repetition_limit
    4646             :           << "?\", shape=septagon";
    4647             :       break;
    4648             :     case ActionNode::CLEAR_CAPTURES: {
    4649             :       os_ << "label=\"clear $" << that->data_.u_clear_captures.range_from
    4650             :           << " to $" << that->data_.u_clear_captures.range_to
    4651             :           << "\", shape=septagon";
    4652             :       break;
    4653             :     }
    4654             :   }
    4655             :   os_ << "];\n";
    4656             :   PrintAttributes(that);
    4657             :   RegExpNode* successor = that->on_success();
    4658             :   os_ << "  n" << that << " -> n" << successor << ";\n";
    4659             :   Visit(successor);
    4660             : }
    4661             : 
    4662             : 
    4663             : class DispatchTableDumper {
    4664             :  public:
    4665             :   explicit DispatchTableDumper(std::ostream& os) : os_(os) {}
    4666             :   void Call(uc16 key, DispatchTable::Entry entry);
    4667             :  private:
    4668             :   std::ostream& os_;
    4669             : };
    4670             : 
    4671             : 
    4672             : void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
    4673             :   os_ << "[" << AsUC16(key) << "-" << AsUC16(entry.to()) << "]: {";
    4674             :   OutSet* set = entry.out_set();
    4675             :   bool first = true;
    4676             :   for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
    4677             :     if (set->Get(i)) {
    4678             :       if (first) {
    4679             :         first = false;
    4680             :       } else {
    4681             :         os_ << ", ";
    4682             :       }
    4683             :       os_ << i;
    4684             :     }
    4685             :   }
    4686             :   os_ << "}\n";
    4687             : }
    4688             : 
    4689             : 
    4690             : void DispatchTable::Dump() {
    4691             :   OFStream os(stderr);
    4692             :   DispatchTableDumper dumper(os);
    4693             :   tree()->ForEach(&dumper);
    4694             : }
    4695             : 
    4696             : 
    4697             : void RegExpEngine::DotPrint(const char* label,
    4698             :                             RegExpNode* node,
    4699             :                             bool ignore_case) {
    4700             :   StdoutStream os;
    4701             :   DotPrinter printer(os, ignore_case);
    4702             :   printer.PrintNode(label, node);
    4703             : }
    4704             : 
    4705             : 
    4706             : #endif  // DEBUG
    4707             : 
    4708             : 
    4709             : // -------------------------------------------------------------------
    4710             : // Tree to graph conversion
    4711             : 
    4712      992686 : RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
    4713             :                                RegExpNode* on_success) {
    4714             :   ZoneList<TextElement>* elms =
    4715             :       new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
    4716      992686 :   elms->Add(TextElement::Atom(this), compiler->zone());
    4717             :   return new (compiler->zone())
    4718      992686 :       TextNode(elms, compiler->read_backward(), on_success);
    4719             : }
    4720             : 
    4721             : 
    4722       18669 : RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
    4723             :                                RegExpNode* on_success) {
    4724             :   return new (compiler->zone())
    4725       18669 :       TextNode(elements(), compiler->read_backward(), on_success);
    4726             : }
    4727             : 
    4728             : 
    4729      558166 : static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
    4730             :                                  const int* special_class,
    4731             :                                  int length) {
    4732      558166 :   length--;  // Remove final marker.
    4733             :   DCHECK_EQ(kRangeEndMarker, special_class[length]);
    4734             :   DCHECK_NE(0, ranges->length());
    4735             :   DCHECK_NE(0, length);
    4736             :   DCHECK_NE(0, special_class[0]);
    4737      558166 :   if (ranges->length() != (length >> 1) + 1) {
    4738             :     return false;
    4739             :   }
    4740       10234 :   CharacterRange range = ranges->at(0);
    4741       10234 :   if (range.from() != 0) {
    4742             :     return false;
    4743             :   }
    4744       58424 :   for (int i = 0; i < length; i += 2) {
    4745       25579 :     if (special_class[i] != (range.to() + 1)) {
    4746             :       return false;
    4747             :     }
    4748       50048 :     range = ranges->at((i >> 1) + 1);
    4749       25024 :     if (special_class[i+1] != range.from()) {
    4750             :       return false;
    4751             :     }
    4752             :   }
    4753        7821 :   if (range.to() != String::kMaxCodePoint) {
    4754             :     return false;
    4755             :   }
    4756        7821 :   return true;
    4757             : }
    4758             : 
    4759             : 
    4760      552542 : static bool CompareRanges(ZoneList<CharacterRange>* ranges,
    4761             :                           const int* special_class,
    4762             :                           int length) {
    4763      552542 :   length--;  // Remove final marker.
    4764             :   DCHECK_EQ(kRangeEndMarker, special_class[length]);
    4765      552542 :   if (ranges->length() * 2 != length) {
    4766             :     return false;
    4767             :   }
    4768       27785 :   for (int i = 0; i < length; i += 2) {
    4769       29386 :     CharacterRange range = ranges->at(i >> 1);
    4770       25908 :     if (range.from() != special_class[i] ||
    4771       11215 :         range.to() != special_class[i + 1] - 1) {
    4772             :       return false;
    4773             :     }
    4774             :   }
    4775             :   return true;
    4776             : }
    4777             : 
    4778             : 
    4779      198688 : bool RegExpCharacterClass::is_standard(Zone* zone) {
    4780             :   // TODO(lrn): Remove need for this function, by not throwing away information
    4781             :   // along the way.
    4782      198688 :   if (is_negated()) {
    4783             :     return false;
    4784             :   }
    4785      193009 :   if (set_.is_standard()) {
    4786             :     return true;
    4787             :   }
    4788      189728 :   if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
    4789             :     set_.set_standard_set_type('s');
    4790         607 :     return true;
    4791             :   }
    4792      189121 :   if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
    4793             :     set_.set_standard_set_type('S');
    4794         207 :     return true;
    4795             :   }
    4796      188914 :   if (CompareInverseRanges(set_.ranges(zone),
    4797             :                            kLineTerminatorRanges,
    4798             :                            kLineTerminatorRangeCount)) {
    4799             :     set_.set_standard_set_type('.');
    4800        7502 :     return true;
    4801             :   }
    4802      181412 :   if (CompareRanges(set_.ranges(zone),
    4803             :                     kLineTerminatorRanges,
    4804             :                     kLineTerminatorRangeCount)) {
    4805             :     set_.set_standard_set_type('n');
    4806          10 :     return true;
    4807             :   }
    4808      181402 :   if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
    4809             :     set_.set_standard_set_type('w');
    4810        1271 :     return true;
    4811             :   }
    4812      180131 :   if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
    4813             :     set_.set_standard_set_type('W');
    4814         112 :     return true;
    4815             :   }
    4816             :   return false;
    4817             : }
    4818             : 
    4819             : 
    4820        2587 : UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,
    4821             :                                            ZoneList<CharacterRange>* base)
    4822             :     : zone_(zone),
    4823             :       table_(zone),
    4824             :       bmp_(nullptr),
    4825             :       lead_surrogates_(nullptr),
    4826             :       trail_surrogates_(nullptr),
    4827        5174 :       non_bmp_(nullptr) {
    4828             :   // The unicode range splitter categorizes given character ranges into:
    4829             :   // - Code points from the BMP representable by one code unit.
    4830             :   // - Code points outside the BMP that need to be split into surrogate pairs.
    4831             :   // - Lone lead surrogates.
    4832             :   // - Lone trail surrogates.
    4833             :   // Lone surrogates are valid code points, even though no actual characters.
    4834             :   // They require special matching to make sure we do not split surrogate pairs.
    4835             :   // We use the dispatch table to accomplish this. The base range is split up
    4836             :   // by the table by the overlay ranges, and the Call callback is used to
    4837             :   // filter and collect ranges for each category.
    4838      153083 :   for (int i = 0; i < base->length(); i++) {
    4839      150496 :     table_.AddRange(base->at(i), kBase, zone_);
    4840             :   }
    4841             :   // Add overlay ranges.
    4842        2587 :   table_.AddRange(CharacterRange::Range(0, kLeadSurrogateStart - 1),
    4843        2587 :                   kBmpCodePoints, zone_);
    4844        2587 :   table_.AddRange(CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd),
    4845        2587 :                   kLeadSurrogates, zone_);
    4846        2587 :   table_.AddRange(
    4847             :       CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
    4848        2587 :       kTrailSurrogates, zone_);
    4849        2587 :   table_.AddRange(
    4850             :       CharacterRange::Range(kTrailSurrogateEnd + 1, kNonBmpStart - 1),
    4851        2587 :       kBmpCodePoints, zone_);
    4852        2587 :   table_.AddRange(CharacterRange::Range(kNonBmpStart, kNonBmpEnd),
    4853        2587 :                   kNonBmpCodePoints, zone_);
    4854             :   table_.ForEach(this);
    4855        2587 : }
    4856             : 
    4857             : 
    4858      161041 : void UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) {
    4859             :   OutSet* outset = entry.out_set();
    4860      161041 :   if (!outset->Get(kBase)) return;
    4861             :   ZoneList<CharacterRange>** target = nullptr;
    4862       79253 :   if (outset->Get(kBmpCodePoints)) {
    4863       50598 :     target = &bmp_;
    4864       28655 :   } else if (outset->Get(kLeadSurrogates)) {
    4865        1175 :     target = &lead_surrogates_;
    4866       27480 :   } else if (outset->Get(kTrailSurrogates)) {
    4867        1175 :     target = &trail_surrogates_;
    4868             :   } else {
    4869             :     DCHECK(outset->Get(kNonBmpCodePoints));
    4870       26305 :     target = &non_bmp_;
    4871             :   }
    4872       79253 :   if (*target == nullptr)
    4873       18201 :     *target = new (zone_) ZoneList<CharacterRange>(2, zone_);
    4874       79253 :   (*target)->Add(CharacterRange::Range(entry.from(), entry.to()), zone_);
    4875             : }
    4876             : 
    4877        2582 : void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
    4878             :                       RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
    4879             :   ZoneList<CharacterRange>* bmp = splitter->bmp();
    4880        3037 :   if (bmp == nullptr) return;
    4881             :   JSRegExp::Flags default_flags = JSRegExp::Flags();
    4882        2127 :   result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
    4883             :       compiler->zone(), bmp, compiler->read_backward(), on_success,
    4884             :       default_flags)));
    4885             : }
    4886             : 
    4887        2582 : void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
    4888             :                              RegExpNode* on_success,
    4889             :                              UnicodeRangeSplitter* splitter) {
    4890             :   ZoneList<CharacterRange>* non_bmp = splitter->non_bmp();
    4891        3584 :   if (non_bmp == nullptr) return;
    4892             :   DCHECK(!compiler->one_byte());
    4893             :   Zone* zone = compiler->zone();
    4894             :   JSRegExp::Flags default_flags = JSRegExp::Flags();
    4895        1580 :   CharacterRange::Canonicalize(non_bmp);
    4896       54180 :   for (int i = 0; i < non_bmp->length(); i++) {
    4897             :     // Match surrogate pair.
    4898             :     // E.g. [\u10005-\u11005] becomes
    4899             :     //      \ud800[\udc05-\udfff]|
    4900             :     //      [\ud801-\ud803][\udc00-\udfff]|
    4901             :     //      \ud804[\udc00-\udc05]
    4902             :     uc32 from = non_bmp->at(i).from();
    4903             :     uc32 to = non_bmp->at(i).to();
    4904       26300 :     uc16 from_l = unibrow::Utf16::LeadSurrogate(from);
    4905             :     uc16 from_t = unibrow::Utf16::TrailSurrogate(from);
    4906       26300 :     uc16 to_l = unibrow::Utf16::LeadSurrogate(to);
    4907             :     uc16 to_t = unibrow::Utf16::TrailSurrogate(to);
    4908       26300 :     if (from_l == to_l) {
    4909             :       // The lead surrogate is the same.
    4910             :       result->AddAlternative(
    4911       23990 :           GuardedAlternative(TextNode::CreateForSurrogatePair(
    4912             :               zone, CharacterRange::Singleton(from_l),
    4913             :               CharacterRange::Range(from_t, to_t), compiler->read_backward(),
    4914             :               on_success, default_flags)));
    4915             :     } else {
    4916        2310 :       if (from_t != kTrailSurrogateStart) {
    4917             :         // Add [from_l][from_t-\udfff]
    4918             :         result->AddAlternative(
    4919        1180 :             GuardedAlternative(TextNode::CreateForSurrogatePair(
    4920             :                 zone, CharacterRange::Singleton(from_l),
    4921             :                 CharacterRange::Range(from_t, kTrailSurrogateEnd),
    4922             :                 compiler->read_backward(), on_success, default_flags)));
    4923        1180 :         from_l++;
    4924             :       }
    4925        2310 :       if (to_t != kTrailSurrogateEnd) {
    4926             :         // Add [to_l][\udc00-to_t]
    4927             :         result->AddAlternative(
    4928         925 :             GuardedAlternative(TextNode::CreateForSurrogatePair(
    4929             :                 zone, CharacterRange::Singleton(to_l),
    4930             :                 CharacterRange::Range(kTrailSurrogateStart, to_t),
    4931             :                 compiler->read_backward(), on_success, default_flags)));
    4932         925 :         to_l--;
    4933             :       }
    4934        2310 :       if (from_l <= to_l) {
    4935             :         // Add [from_l-to_l][\udc00-\udfff]
    4936             :         result->AddAlternative(
    4937        2100 :             GuardedAlternative(TextNode::CreateForSurrogatePair(
    4938             :                 zone, CharacterRange::Range(from_l, to_l),
    4939             :                 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
    4940             :                 compiler->read_backward(), on_success, default_flags)));
    4941             :       }
    4942             :     }
    4943             :   }
    4944             : }
    4945             : 
    4946        1175 : RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
    4947             :     RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
    4948             :     ZoneList<CharacterRange>* match, RegExpNode* on_success, bool read_backward,
    4949             :     JSRegExp::Flags flags) {
    4950             :   Zone* zone = compiler->zone();
    4951        1175 :   RegExpNode* match_node = TextNode::CreateForCharacterRanges(
    4952        1175 :       zone, match, read_backward, on_success, flags);
    4953             :   int stack_register = compiler->UnicodeLookaroundStackRegister();
    4954             :   int position_register = compiler->UnicodeLookaroundPositionRegister();
    4955             :   RegExpLookaround::Builder lookaround(false, match_node, stack_register,
    4956        1175 :                                        position_register);
    4957        1175 :   RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
    4958        2350 :       zone, lookbehind, !read_backward, lookaround.on_match_success(), flags);
    4959        1175 :   return lookaround.ForMatch(negative_match);
    4960             : }
    4961             : 
    4962        1165 : RegExpNode* MatchAndNegativeLookaroundInReadDirection(
    4963             :     RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
    4964             :     ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
    4965             :     bool read_backward, JSRegExp::Flags flags) {
    4966             :   Zone* zone = compiler->zone();
    4967             :   int stack_register = compiler->UnicodeLookaroundStackRegister();
    4968             :   int position_register = compiler->UnicodeLookaroundPositionRegister();
    4969             :   RegExpLookaround::Builder lookaround(false, on_success, stack_register,
    4970        1165 :                                        position_register);
    4971        1165 :   RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
    4972        1165 :       zone, lookahead, read_backward, lookaround.on_match_success(), flags);
    4973        1165 :   return TextNode::CreateForCharacterRanges(
    4974        1165 :       zone, match, read_backward, lookaround.ForMatch(negative_match), flags);
    4975             : }
    4976             : 
    4977        2582 : void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
    4978             :                            RegExpNode* on_success,
    4979             :                            UnicodeRangeSplitter* splitter) {
    4980             :   JSRegExp::Flags default_flags = JSRegExp::Flags();
    4981             :   ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates();
    4982        3994 :   if (lead_surrogates == nullptr) return;
    4983             :   Zone* zone = compiler->zone();
    4984             :   // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]).
    4985             :   ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
    4986        1170 :       zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
    4987             : 
    4988             :   RegExpNode* match;
    4989        1170 :   if (compiler->read_backward()) {
    4990             :     // Reading backward. Assert that reading forward, there is no trail
    4991             :     // surrogate, and then backward match the lead surrogate.
    4992             :     match = NegativeLookaroundAgainstReadDirectionAndMatch(
    4993             :         compiler, trail_surrogates, lead_surrogates, on_success, true,
    4994          95 :         default_flags);
    4995             :   } else {
    4996             :     // Reading forward. Forward match the lead surrogate and assert that
    4997             :     // no trail surrogate follows.
    4998             :     match = MatchAndNegativeLookaroundInReadDirection(
    4999             :         compiler, lead_surrogates, trail_surrogates, on_success, false,
    5000        1075 :         default_flags);
    5001             :   }
    5002             :   result->AddAlternative(GuardedAlternative(match));
    5003             : }
    5004             : 
    5005        2582 : void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
    5006             :                             RegExpNode* on_success,
    5007             :                             UnicodeRangeSplitter* splitter) {
    5008             :   JSRegExp::Flags default_flags = JSRegExp::Flags();
    5009             :   ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates();
    5010        3994 :   if (trail_surrogates == nullptr) return;
    5011             :   Zone* zone = compiler->zone();
    5012             :   // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01
    5013             :   ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
    5014        1170 :       zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
    5015             : 
    5016             :   RegExpNode* match;
    5017        1170 :   if (compiler->read_backward()) {
    5018             :     // Reading backward. Backward match the trail surrogate and assert that no
    5019             :     // lead surrogate precedes it.
    5020             :     match = MatchAndNegativeLookaroundInReadDirection(
    5021             :         compiler, trail_surrogates, lead_surrogates, on_success, true,
    5022          90 :         default_flags);
    5023             :   } else {
    5024             :     // Reading forward. Assert that reading backward, there is no lead
    5025             :     // surrogate, and then forward match the trail surrogate.
    5026             :     match = NegativeLookaroundAgainstReadDirectionAndMatch(
    5027             :         compiler, lead_surrogates, trail_surrogates, on_success, false,
    5028        1080 :         default_flags);
    5029             :   }
    5030             :   result->AddAlternative(GuardedAlternative(match));
    5031             : }
    5032             : 
    5033           0 : RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
    5034             :                               RegExpNode* on_success) {
    5035             :   // This implements ES2015 21.2.5.2.3, AdvanceStringIndex.
    5036             :   DCHECK(!compiler->read_backward());
    5037             :   Zone* zone = compiler->zone();
    5038             :   // Advance any character. If the character happens to be a lead surrogate and
    5039             :   // we advanced into the middle of a surrogate pair, it will work out, as
    5040             :   // nothing will match from there. We will have to advance again, consuming
    5041             :   // the associated trail surrogate.
    5042             :   ZoneList<CharacterRange>* range = CharacterRange::List(
    5043           0 :       zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
    5044             :   JSRegExp::Flags default_flags = JSRegExp::Flags();
    5045             :   return TextNode::CreateForCharacterRanges(zone, range, false, on_success,
    5046           0 :                                             default_flags);
    5047             : }
    5048             : 
    5049        1189 : void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
    5050             : #ifdef V8_INTL_SUPPORT
    5051             :   DCHECK(CharacterRange::IsCanonical(ranges));
    5052             : 
    5053             :   // Micro-optimization to avoid passing large ranges to UnicodeSet::closeOver.
    5054             :   // See also https://crbug.com/v8/6727.
    5055             :   // TODO(jgruber): This only covers the special case of the {0,0x10FFFF} range,
    5056             :   // which we use frequently internally. But large ranges can also easily be
    5057             :   // created by the user. We might want to have a more general caching mechanism
    5058             :   // for such ranges.
    5059        1728 :   if (ranges->length() == 1 && ranges->at(0).IsEverything(kNonBmpEnd)) return;
    5060             : 
    5061             :   // Use ICU to compute the case fold closure over the ranges.
    5062        2378 :   icu::UnicodeSet set;
    5063      247697 :   for (int i = 0; i < ranges->length(); i++) {
    5064      123254 :     set.add(ranges->at(i).from(), ranges->at(i).to());
    5065             :   }
    5066             :   ranges->Clear();
    5067        1189 :   set.closeOver(USET_CASE_INSENSITIVE);
    5068             :   // Full case mapping map single characters to multiple characters.
    5069             :   // Those are represented as strings in the set. Remove them so that
    5070             :   // we end up with only simple and common case mappings.
    5071        1189 :   set.removeAllStrings();
    5072       37731 :   for (int i = 0; i < set.getRangeCount(); i++) {
    5073       36542 :     ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
    5074       18271 :                 zone);
    5075             :   }
    5076             :   // No errors and everything we collected have been ranges.
    5077        1189 :   CharacterRange::Canonicalize(ranges);
    5078             : #endif  // V8_INTL_SUPPORT
    5079             : }
    5080             : 
    5081             : 
    5082      176260 : RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
    5083             :                                          RegExpNode* on_success) {
    5084             :   set_.Canonicalize();
    5085             :   Zone* zone = compiler->zone();
    5086             :   ZoneList<CharacterRange>* ranges = this->ranges(zone);
    5087      176260 :   if (NeedsUnicodeCaseEquivalents(flags_)) {
    5088         949 :     AddUnicodeCaseEquivalents(ranges, zone);
    5089             :   }
    5090      178882 :   if (IsUnicode(flags_) && !compiler->one_byte() &&
    5091             :       !contains_split_surrogate()) {
    5092        2612 :     if (is_negated()) {
    5093             :       ZoneList<CharacterRange>* negated =
    5094             :           new (zone) ZoneList<CharacterRange>(2, zone);
    5095         140 :       CharacterRange::Negate(ranges, negated, zone);
    5096             :       ranges = negated;
    5097             :     }
    5098        2612 :     if (ranges->length() == 0) {
    5099             :       JSRegExp::Flags default_flags;
    5100             :       RegExpCharacterClass* fail =
    5101          60 :           new (zone) RegExpCharacterClass(zone, ranges, default_flags);
    5102          60 :       return new (zone) TextNode(fail, compiler->read_backward(), on_success);
    5103             :     }
    5104        2582 :     if (standard_type() == '*') {
    5105           0 :       return UnanchoredAdvance(compiler, on_success);
    5106             :     } else {
    5107        2582 :       ChoiceNode* result = new (zone) ChoiceNode(2, zone);
    5108        2582 :       UnicodeRangeSplitter splitter(zone, ranges);
    5109        2582 :       AddBmpCharacters(compiler, result, on_success, &splitter);
    5110        2582 :       AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
    5111        2582 :       AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
    5112        2582 :       AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
    5113             :       return result;
    5114             :     }
    5115             :   } else {
    5116      347296 :     return new (zone) TextNode(this, compiler->read_backward(), on_success);
    5117             :   }
    5118             : }
    5119             : 
    5120             : 
    5121      146822 : int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
    5122      146822 :   RegExpAtom* atom1 = (*a)->AsAtom();
    5123      146822 :   RegExpAtom* atom2 = (*b)->AsAtom();
    5124      146822 :   uc16 character1 = atom1->data().at(0);
    5125      146822 :   uc16 character2 = atom2->data().at(0);
    5126      146822 :   if (character1 < character2) return -1;
    5127      129859 :   if (character1 > character2) return 1;
    5128       17383 :   return 0;
    5129             : }
    5130             : 
    5131             : #ifdef V8_INTL_SUPPORT
    5132             : 
    5133             : // Case Insensitve comparesion
    5134       63041 : int CompareFirstCharCaseInsensitve(RegExpTree* const* a, RegExpTree* const* b) {
    5135       63041 :   RegExpAtom* atom1 = (*a)->AsAtom();
    5136       63041 :   RegExpAtom* atom2 = (*b)->AsAtom();
    5137      126082 :   icu::UnicodeString character1(atom1->data().at(0));
    5138      126082 :   return character1.caseCompare(atom2->data().at(0), U_FOLD_CASE_DEFAULT);
    5139             : }
    5140             : 
    5141             : #else
    5142             : 
    5143             : static unibrow::uchar Canonical(
    5144             :     unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
    5145             :     unibrow::uchar c) {
    5146             :   unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
    5147             :   int length = canonicalize->get(c, '\0', chars);
    5148             :   DCHECK_LE(length, 1);
    5149             :   unibrow::uchar canonical = c;
    5150             :   if (length == 1) canonical = chars[0];
    5151             :   return canonical;
    5152             : }
    5153             : 
    5154             : int CompareFirstCharCaseIndependent(
    5155             :     unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
    5156             :     RegExpTree* const* a, RegExpTree* const* b) {
    5157             :   RegExpAtom* atom1 = (*a)->AsAtom();
    5158             :   RegExpAtom* atom2 = (*b)->AsAtom();
    5159             :   unibrow::uchar character1 = atom1->data().at(0);
    5160             :   unibrow::uchar character2 = atom2->data().at(0);
    5161             :   if (character1 == character2) return 0;
    5162             :   if (character1 >= 'a' || character2 >= 'a') {
    5163             :     character1 = Canonical(canonicalize, character1);
    5164             :     character2 = Canonical(canonicalize, character2);
    5165             :   }
    5166             :   return static_cast<int>(character1) - static_cast<int>(character2);
    5167             : }
    5168             : #endif  // V8_INTL_SUPPORT
    5169             : 
    5170             : // We can stable sort runs of atoms, since the order does not matter if they
    5171             : // start with different characters.
    5172             : // Returns true if any consecutive atoms were found.
    5173        9303 : bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
    5174             :   ZoneList<RegExpTree*>* alternatives = this->alternatives();
    5175             :   int length = alternatives->length();
    5176             :   bool found_consecutive_atoms = false;
    5177       26557 :   for (int i = 0; i < length; i++) {
    5178       62485 :     while (i < length) {
    5179       35145 :       RegExpTree* alternative = alternatives->at(i);
    5180       35145 :       if (alternative->IsAtom()) break;
    5181       26518 :       i++;
    5182             :     }
    5183             :     // i is length or it is the index of an atom.
    5184        9449 :     if (i == length) break;
    5185             :     int first_atom = i;
    5186        8627 :     JSRegExp::Flags flags = alternatives->at(i)->AsAtom()->flags();
    5187        8627 :     i++;
    5188      120869 :     while (i < length) {
    5189       56362 :       RegExpTree* alternative = alternatives->at(i);
    5190       56362 :       if (!alternative->IsAtom()) break;
    5191       56121 :       if (alternative->AsAtom()->flags() != flags) break;
    5192       56121 :       i++;
    5193             :     }
    5194             :     // Sort atoms to get ones with common prefixes together.
    5195             :     // This step is more tricky if we are in a case-independent regexp,
    5196             :     // because it would change /is|I/ to /I|is/, and order matters when
    5197             :     // the regexp parts don't match only disjoint starting points. To fix
    5198             :     // this we have a version of CompareFirstChar that uses case-
    5199             :     // independent character classes for comparison.
    5200             :     DCHECK_LT(first_atom, alternatives->length());
    5201             :     DCHECK_LE(i, alternatives->length());
    5202             :     DCHECK_LE(first_atom, i);
    5203        8627 :     if (IgnoreCase(flags)) {
    5204             : #ifdef V8_INTL_SUPPORT
    5205         474 :       alternatives->StableSort(CompareFirstCharCaseInsensitve, first_atom,
    5206         474 :                                i - first_atom);
    5207             : #else
    5208             :       unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
    5209             :           compiler->isolate()->regexp_macro_assembler_canonicalize();
    5210             :       auto compare_closure =
    5211             :           [canonicalize](RegExpTree* const* a, RegExpTree* const* b) {
    5212             :             return CompareFirstCharCaseIndependent(canonicalize, a, b);
    5213             :           };
    5214             :       alternatives->StableSort(compare_closure, first_atom, i - first_atom);
    5215             : #endif  // V8_INTL_SUPPORT
    5216             :     } else {
    5217        8153 :       alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
    5218             :     }
    5219        8627 :     if (i - first_atom > 1) found_consecutive_atoms = true;
    5220             :   }
    5221        9303 :   return found_consecutive_atoms;
    5222             : }
    5223             : 
    5224             : 
    5225             : // Optimizes ab|ac|az to a(?:b|c|d).
    5226        8370 : void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
    5227             :   Zone* zone = compiler->zone();
    5228             :   ZoneList<RegExpTree*>* alternatives = this->alternatives();
    5229             :   int length = alternatives->length();
    5230             : 
    5231             :   int write_posn = 0;
    5232             :   int i = 0;
    5233       73522 :   while (i < length) {
    5234       65152 :     RegExpTree* alternative = alternatives->at(i);
    5235       65152 :     if (!alternative->IsAtom()) {
    5236       18582 :       alternatives->at(write_posn++) = alternatives->at(i);
    5237        9291 :       i++;
    5238        9291 :       continue;
    5239             :     }
    5240       55861 :     RegExpAtom* const atom = alternative->AsAtom();
    5241             :     JSRegExp::Flags flags = atom->flags();
    5242             : #ifdef V8_INTL_SUPPORT
    5243      111722 :     icu::UnicodeString common_prefix(atom->data().at(0));
    5244             : #else
    5245             :     unibrow::uchar common_prefix = atom->data().at(0);
    5246             : #endif  // V8_INTL_SUPPORT
    5247             :     int first_with_prefix = i;
    5248             :     int prefix_length = atom->length();
    5249       55861 :     i++;
    5250       64491 :     while (i < length) {
    5251       56221 :       alternative = alternatives->at(i);
    5252       56221 :       if (!alternative->IsAtom()) break;
    5253       56121 :       RegExpAtom* const atom = alternative->AsAtom();
    5254       56121 :       if (atom->flags() != flags) break;
    5255             : #ifdef V8_INTL_SUPPORT
    5256       64751 :       icu::UnicodeString new_prefix(atom->data().at(0));
    5257       56121 :       if (new_prefix != common_prefix) {
    5258       47645 :         if (!IgnoreCase(flags)) break;
    5259        4578 :         if (common_prefix.caseCompare(new_prefix, U_FOLD_CASE_DEFAULT) != 0)
    5260             :           break;
    5261             :       }
    5262             : #else
    5263             :       unibrow::uchar new_prefix = atom->data().at(0);
    5264             :       if (new_prefix != common_prefix) {
    5265             :         if (!IgnoreCase(flags)) break;
    5266             :         unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
    5267             :             compiler->isolate()->regexp_macro_assembler_canonicalize();
    5268             :         new_prefix = Canonical(canonicalize, new_prefix);
    5269             :         common_prefix = Canonical(canonicalize, common_prefix);
    5270             :         if (new_prefix != common_prefix) break;
    5271             :       }
    5272             : #endif  // V8_INTL_SUPPORT
    5273             :       prefix_length = Min(prefix_length, atom->length());
    5274        8630 :       i++;
    5275             :     }
    5276       55861 :     if (i > first_with_prefix + 2) {
    5277             :       // Found worthwhile run of alternatives with common prefix of at least one
    5278             :       // character.  The sorting function above did not sort on more than one
    5279             :       // character for reasons of correctness, but there may still be a longer
    5280             :       // common prefix if the terms were similar or presorted in the input.
    5281             :       // Find out how long the common prefix is.
    5282         268 :       int run_length = i - first_with_prefix;
    5283         268 :       RegExpAtom* const atom = alternatives->at(first_with_prefix)->AsAtom();
    5284         742 :       for (int j = 1; j < run_length && prefix_length > 1; j++) {
    5285             :         RegExpAtom* old_atom =
    5286         474 :             alternatives->at(j + first_with_prefix)->AsAtom();
    5287         477 :         for (int k = 1; k < prefix_length; k++) {
    5288         711 :           if (atom->data().at(k) != old_atom->data().at(k)) {
    5289             :             prefix_length = k;
    5290             :             break;
    5291             :           }
    5292             :         }
    5293             :       }
    5294             :       RegExpAtom* prefix = new (zone)
    5295         268 :           RegExpAtom(atom->data().SubVector(0, prefix_length), flags);
    5296             :       ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone);
    5297         268 :       pair->Add(prefix, zone);
    5298             :       ZoneList<RegExpTree*>* suffixes =
    5299             :           new (zone) ZoneList<RegExpTree*>(run_length, zone);
    5300       17600 :       for (int j = 0; j < run_length; j++) {
    5301             :         RegExpAtom* old_atom =
    5302       17332 :             alternatives->at(j + first_with_prefix)->AsAtom();
    5303             :         int len = old_atom->length();
    5304        8666 :         if (len == prefix_length) {
    5305         302 :           suffixes->Add(new (zone) RegExpEmpty(), zone);
    5306             :         } else {
    5307             :           RegExpTree* suffix = new (zone) RegExpAtom(
    5308        8515 :               old_atom->data().SubVector(prefix_length, old_atom->length()),
    5309        8515 :               flags);
    5310        8515 :           suffixes->Add(suffix, zone);
    5311             :         }
    5312             :       }
    5313         268 :       pair->Add(new (zone) RegExpDisjunction(suffixes), zone);
    5314         536 :       alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair);
    5315             :     } else {
    5316             :       // Just copy any non-worthwhile alternatives.
    5317      167243 :       for (int j = first_with_prefix; j < i; j++) {
    5318      111650 :         alternatives->at(write_posn++) = alternatives->at(j);
    5319             :       }
    5320             :     }
    5321             :   }
    5322             :   alternatives->Rewind(write_posn);  // Trim end of array.
    5323        8370 : }
    5324             : 
    5325             : 
    5326             : // Optimizes b|c|z to [bcz].
    5327        9303 : void RegExpDisjunction::FixSingleCharacterDisjunctions(
    5328             :     RegExpCompiler* compiler) {
    5329             :   Zone* zone = compiler->zone();
    5330             :   ZoneList<RegExpTree*>* alternatives = this->alternatives();
    5331             :   int length = alternatives->length();
    5332             : 
    5333             :   int write_posn = 0;
    5334             :   int i = 0;
    5335       83975 :   while (i < length) {
    5336       74672 :     RegExpTree* alternative = alternatives->at(i);
    5337       74672 :     if (!alternative->IsAtom()) {
    5338       54054 :       alternatives->at(write_posn++) = alternatives->at(i);
    5339       27027 :       i++;
    5340       27027 :       continue;
    5341             :     }
    5342       47645 :     RegExpAtom* const atom = alternative->AsAtom();
    5343       47645 :     if (atom->length() != 1) {
    5344       78592 :       alternatives->at(write_posn++) = alternatives->at(i);
    5345       39296 :       i++;
    5346       39296 :       continue;
    5347             :     }
    5348             :     JSRegExp::Flags flags = atom->flags();
    5349             :     DCHECK_IMPLIES(IsUnicode(flags),
    5350             :                    !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
    5351             :     bool contains_trail_surrogate =
    5352        8349 :         unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
    5353             :     int first_in_run = i;
    5354        8349 :     i++;
    5355             :     // Find a run of single-character atom alternatives that have identical
    5356             :     // flags (case independence and unicode-ness).
    5357       25223 :     while (i < length) {
    5358       16454 :       alternative = alternatives->at(i);
    5359       16454 :       if (!alternative->IsAtom()) break;
    5360       16223 :       RegExpAtom* const atom = alternative->AsAtom();
    5361       16223 :       if (atom->length() != 1) break;
    5362        8437 :       if (atom->flags() != flags) break;
    5363             :       DCHECK_IMPLIES(IsUnicode(flags),
    5364             :                      !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
    5365             :       contains_trail_surrogate |=
    5366       16874 :           unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
    5367        8437 :       i++;
    5368             :     }
    5369        8349 :     if (i > first_in_run + 1) {
    5370             :       // Found non-trivial run of single-character alternatives.
    5371         271 :       int run_length = i - first_in_run;
    5372             :       ZoneList<CharacterRange>* ranges =
    5373             :           new (zone) ZoneList<CharacterRange>(2, zone);
    5374       17687 :       for (int j = 0; j < run_length; j++) {
    5375       17416 :         RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();
    5376             :         DCHECK_EQ(old_atom->length(), 1);
    5377        8708 :         ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
    5378             :       }
    5379             :       RegExpCharacterClass::CharacterClassFlags character_class_flags;
    5380         271 :       if (IsUnicode(flags) && contains_trail_surrogate) {
    5381             :         character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
    5382             :       }
    5383         271 :       alternatives->at(write_posn++) = new (zone)
    5384         813 :           RegExpCharacterClass(zone, ranges, flags, character_class_flags);
    5385             :     } else {
    5386             :       // Just copy any trivial alternatives.
    5387       24234 :       for (int j = first_in_run; j < i; j++) {
    5388       16156 :         alternatives->at(write_posn++) = alternatives->at(j);
    5389             :       }
    5390             :     }
    5391             :   }
    5392             :   alternatives->Rewind(write_posn);  // Trim end of array.
    5393        9303 : }
    5394             : 
    5395             : 
    5396       10931 : RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
    5397             :                                       RegExpNode* on_success) {
    5398             :   ZoneList<RegExpTree*>* alternatives = this->alternatives();
    5399             : 
    5400       10931 :   if (alternatives->length() > 2) {
    5401        9303 :     bool found_consecutive_atoms = SortConsecutiveAtoms(compiler);
    5402        9303 :     if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler);
    5403        9303 :     FixSingleCharacterDisjunctions(compiler);
    5404        9303 :     if (alternatives->length() == 1) {
    5405         242 :       return alternatives->at(0)->ToNode(compiler, on_success);
    5406             :     }
    5407             :   }
    5408             : 
    5409             :   int length = alternatives->length();
    5410             : 
    5411             :   ChoiceNode* result =
    5412       10689 :       new(compiler->zone()) ChoiceNode(length, compiler->zone());
    5413      166055 :   for (int i = 0; i < length; i++) {
    5414             :     GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
    5415       77683 :                                                                on_success));
    5416             :     result->AddAlternative(alternative);
    5417             :   }
    5418             :   return result;
    5419             : }
    5420             : 
    5421             : 
    5422      926334 : RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
    5423             :                                      RegExpNode* on_success) {
    5424      926334 :   return ToNode(min(),
    5425             :                 max(),
    5426             :                 is_greedy(),
    5427             :                 body(),
    5428             :                 compiler,
    5429      926334 :                 on_success);
    5430             : }
    5431             : 
    5432             : 
    5433             : // Scoped object to keep track of how much we unroll quantifier loops in the
    5434             : // regexp graph generator.
    5435             : class RegExpExpansionLimiter {
    5436             :  public:
    5437             :   static const int kMaxExpansionFactor = 6;
    5438             :   RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
    5439             :       : compiler_(compiler),
    5440             :         saved_expansion_factor_(compiler->current_expansion_factor()),
    5441       62510 :         ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
    5442             :     DCHECK_LT(0, factor);
    5443       72065 :     if (ok_to_expand_) {
    5444       72065 :       if (factor > kMaxExpansionFactor) {
    5445             :         // Avoid integer overflow of the current expansion factor.
    5446             :         ok_to_expand_ = false;
    5447             :         compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
    5448             :       } else {
    5449       71937 :         int new_factor = saved_expansion_factor_ * factor;
    5450       71937 :         ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
    5451             :         compiler->set_current_expansion_factor(new_factor);
    5452             :       }
    5453             :     }
    5454             :   }
    5455             : 
    5456             :   ~RegExpExpansionLimiter() {
    5457             :     compiler_->set_current_expansion_factor(saved_expansion_factor_);
    5458             :   }
    5459             : 
    5460             :   bool ok_to_expand() { return ok_to_expand_; }
    5461             : 
    5462             :  private:
    5463             :   RegExpCompiler* compiler_;
    5464             :   int saved_expansion_factor_;
    5465             :   bool ok_to_expand_;
    5466             : 
    5467             :   DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
    5468             : };
    5469             : 
    5470             : 
    5471     1013442 : RegExpNode* RegExpQuantifier::ToNode(int min,
    5472             :                                      int max,
    5473             :                                      bool is_greedy,
    5474             :                                      RegExpTree* body,
    5475             :                                      RegExpCompiler* compiler,
    5476             :                                      RegExpNode* on_success,
    5477             :                                      bool not_at_start) {
    5478             :   // x{f, t} becomes this:
    5479             :   //
    5480             :   //             (r++)<-.
    5481             :   //               |     `
    5482             :   //               |     (x)
    5483             :   //               v     ^
    5484             :   //      (r=0)-->(?)---/ [if r < t]
    5485             :   //               |
    5486             :   //   [if r >= f] \----> ...
    5487             :   //
    5488             : 
    5489             :   // 15.10.2.5 RepeatMatcher algorithm.
    5490             :   // The parser has already eliminated the case where max is 0.  In the case
    5491             :   // where max_match is zero the parser has removed the quantifier if min was
    5492             :   // > 0 and removed the atom if min was 0.  See AddQuantifierToAtom.
    5493             : 
    5494             :   // If we know that we cannot match zero length then things are a little
    5495             :   // simpler since we don't need to make the special zero length match check
    5496             :   // from step 2.1.  If the min and max are small we can unroll a little in
    5497             :   // this case.
    5498             :   static const int kMaxUnrolledMinMatches = 3;  // Unroll (foo)+ and (foo){3,}
    5499             :   static const int kMaxUnrolledMaxMatches = 3;  // Unroll (foo)? and (foo){x,3}
    5500     1013442 :   if (max == 0) return on_success;  // This can happen due to recursion.
    5501     1013087 :   bool body_can_be_empty = (body->min_match() == 0);
    5502             :   int body_start_reg = RegExpCompiler::kNoRegister;
    5503     1013087 :   Interval capture_registers = body->CaptureRegisters();
    5504             :   bool needs_capture_clearing = !capture_registers.is_empty();
    5505             :   Zone* zone = compiler->zone();
    5506             : 
    5507     1013087 :   if (body_can_be_empty) {
    5508             :     body_start_reg = compiler->AllocateRegister();
    5509     1012550 :   } else if (compiler->optimize() && !needs_capture_clearing) {
    5510             :     // Only unroll if there are no captures and the body can't be
    5511             :     // empty.
    5512             :     {
    5513             :       RegExpExpansionLimiter limiter(
    5514       62510 :           compiler, min + ((max != min) ? 1 : 0));
    5515       62510 :       if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
    5516        4336 :         int new_max = (max == kInfinity) ? max : max - min;
    5517             :         // Recurse once to get the loop or optional matches after the fixed
    5518             :         // ones.
    5519        4336 :         RegExpNode* answer = ToNode(
    5520        4336 :             0, new_max, is_greedy, body, compiler, on_success, true);
    5521             :         // Unroll the forced matches from 0 to min.  This can cause chains of
    5522             :         // TextNodes (which the parser does not generate).  These should be
    5523             :         // combined if it turns out they hinder good code generation.
    5524       14408 :         for (int i = 0; i < min; i++) {
    5525        5036 :           answer = body->ToNode(compiler, answer);
    5526             :         }
    5527             :         return answer;
    5528             :       }
    5529             :     }
    5530       58174 :     if (max <= kMaxUnrolledMaxMatches && min == 0) {
    5531             :       DCHECK_LT(0, max);  // Due to the 'if' above.
    5532             :       RegExpExpansionLimiter limiter(compiler, max);
    5533        9555 :       if (limiter.ok_to_expand()) {
    5534             :         // Unroll the optional matches up to max.
    5535             :         RegExpNode* answer = on_success;
    5536       28076 :         for (int i = 0; i < max; i++) {
    5537        9391 :           ChoiceNode* alternation = new(zone) ChoiceNode(2, zone);
    5538        9391 :           if (is_greedy) {
    5539        9245 :             alternation->AddAlternative(
    5540        9245 :                 GuardedAlternative(body->ToNode(compiler, answer)));
    5541             :             alternation->AddAlternative(GuardedAlternative(on_success));
    5542             :           } else {
    5543             :             alternation->AddAlternative(GuardedAlternative(on_success));
    5544         146 :             alternation->AddAlternative(
    5545         146 :                 GuardedAlternative(body->ToNode(compiler, answer)));
    5546             :           }
    5547             :           answer = alternation;
    5548        9391 :           if (not_at_start && !compiler->read_backward()) {
    5549             :             alternation->set_not_at_start();
    5550             :           }
    5551             :         }
    5552             :         return answer;
    5553             :       }
    5554             :     }
    5555             :   }
    5556      999457 :   bool has_min = min > 0;
    5557      999457 :   bool has_max = max < RegExpTree::kInfinity;
    5558      999457 :   bool needs_counter = has_min || has_max;
    5559             :   int reg_ctr = needs_counter
    5560             :       ? compiler->AllocateRegister()
    5561      999457 :       : RegExpCompiler::kNoRegister;
    5562             :   LoopChoiceNode* center = new (zone)
    5563      999457 :       LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone);
    5564      999457 :   if (not_at_start && !compiler->read_backward()) center->set_not_at_start();
    5565             :   RegExpNode* loop_return = needs_counter
    5566             :       ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
    5567      999457 :       : static_cast<RegExpNode*>(center);
    5568      999457 :   if (body_can_be_empty) {
    5569             :     // If the body can be empty we need to check if it was and then
    5570             :     // backtrack.
    5571             :     loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
    5572             :                                               reg_ctr,
    5573             :                                               min,
    5574         537 :                                               loop_return);
    5575             :   }
    5576      999457 :   RegExpNode* body_node = body->ToNode(compiler, loop_return);
    5577      999457 :   if (body_can_be_empty) {
    5578             :     // If the body can be empty we need to store the start position
    5579             :     // so we can bail out if it was empty.
    5580         537 :     body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
    5581             :   }
    5582      999457 :   if (needs_capture_clearing) {
    5583             :     // Before entering the body of this loop we need to clear captures.
    5584        2382 :     body_node = ActionNode::ClearCaptures(capture_registers, body_node);
    5585             :   }
    5586             :   GuardedAlternative body_alt(body_node);
    5587      999457 :   if (has_max) {
    5588             :     Guard* body_guard =
    5589             :         new(zone) Guard(reg_ctr, Guard::LT, max);
    5590      902620 :     body_alt.AddGuard(body_guard, zone);
    5591             :   }
    5592             :   GuardedAlternative rest_alt(on_success);
    5593      999457 :   if (has_min) {
    5594             :     Guard* rest_guard = new(compiler->zone()) Guard(reg_ctr, Guard::GEQ, min);
    5595        1322 :     rest_alt.AddGuard(rest_guard, zone);
    5596             :   }
    5597      999457 :   if (is_greedy) {
    5598             :     center->AddLoopAlternative(body_alt);
    5599             :     center->AddContinueAlternative(rest_alt);
    5600             :   } else {
    5601             :     center->AddContinueAlternative(rest_alt);
    5602             :     center->AddLoopAlternative(body_alt);
    5603             :   }
    5604      999457 :   if (needs_counter) {
    5605      903365 :     return ActionNode::SetRegister(reg_ctr, 0, center);
    5606             :   } else {
    5607             :     return center;
    5608             :   }
    5609             : }
    5610             : 
    5611             : namespace {
    5612             : // Desugar \b to (?<=\w)(?=\W)|(?<=\W)(?=\w) and
    5613             : //         \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
    5614          80 : RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
    5615             :                                           RegExpNode* on_success,
    5616             :                                           RegExpAssertion::AssertionType type,
    5617             :                                           JSRegExp::Flags flags) {
    5618             :   DCHECK(NeedsUnicodeCaseEquivalents(flags));
    5619             :   Zone* zone = compiler->zone();
    5620             :   ZoneList<CharacterRange>* word_range =
    5621             :       new (zone) ZoneList<CharacterRange>(2, zone);
    5622          80 :   CharacterRange::AddClassEscape('w', word_range, true, zone);
    5623             :   int stack_register = compiler->UnicodeLookaroundStackRegister();
    5624             :   int position_register = compiler->UnicodeLookaroundPositionRegister();
    5625          80 :   ChoiceNode* result = new (zone) ChoiceNode(2, zone);
    5626             :   // Add two choices. The (non-)boundary could start with a word or
    5627             :   // a non-word-character.
    5628         400 :   for (int i = 0; i < 2; i++) {
    5629         160 :     bool lookbehind_for_word = i == 0;
    5630             :     bool lookahead_for_word =
    5631         160 :         (type == RegExpAssertion::BOUNDARY) ^ lookbehind_for_word;
    5632             :     // Look to the left.
    5633             :     RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
    5634         160 :                                          stack_register, position_register);
    5635             :     RegExpNode* backward = TextNode::CreateForCharacterRanges(
    5636         160 :         zone, word_range, true, lookbehind.on_match_success(), flags);
    5637             :     // Look to the right.
    5638             :     RegExpLookaround::Builder lookahead(lookahead_for_word,
    5639             :                                         lookbehind.ForMatch(backward),
    5640         160 :                                         stack_register, position_register);
    5641             :     RegExpNode* forward = TextNode::CreateForCharacterRanges(
    5642         160 :         zone, word_range, false, lookahead.on_match_success(), flags);
    5643         160 :     result->AddAlternative(GuardedAlternative(lookahead.ForMatch(forward)));
    5644             :   }
    5645          80 :   return result;
    5646             : }
    5647             : }  // anonymous namespace
    5648             : 
    5649        5528 : RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
    5650             :                                     RegExpNode* on_success) {
    5651             :   NodeInfo info;
    5652             :   Zone* zone = compiler->zone();
    5653             : 
    5654        5528 :   switch (assertion_type()) {
    5655             :     case START_OF_LINE:
    5656         129 :       return AssertionNode::AfterNewline(on_success);
    5657             :     case START_OF_INPUT:
    5658        3055 :       return AssertionNode::AtStart(on_success);
    5659             :     case BOUNDARY:
    5660             :       return NeedsUnicodeCaseEquivalents(flags_)
    5661             :                  ? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY,
    5662             :                                                  flags_)
    5663         176 :                  : AssertionNode::AtBoundary(on_success);
    5664             :     case NON_BOUNDARY:
    5665             :       return NeedsUnicodeCaseEquivalents(flags_)
    5666             :                  ? BoundaryAssertionAsLookaround(compiler, on_success,
    5667             :                                                  NON_BOUNDARY, flags_)
    5668         154 :                  : AssertionNode::AtNonBoundary(on_success);
    5669             :     case END_OF_INPUT:
    5670        1915 :       return AssertionNode::AtEnd(on_success);
    5671             :     case END_OF_LINE: {
    5672             :       // Compile $ in multiline regexps as an alternation with a positive
    5673             :       // lookahead in one side and an end-of-input on the other side.
    5674             :       // We need two registers for the lookahead.
    5675             :       int stack_pointer_register = compiler->AllocateRegister();
    5676             :       int position_register = compiler->AllocateRegister();
    5677             :       // The ChoiceNode to distinguish between a newline and end-of-input.
    5678          99 :       ChoiceNode* result = new(zone) ChoiceNode(2, zone);
    5679             :       // Create a newline atom.
    5680             :       ZoneList<CharacterRange>* newline_ranges =
    5681             :           new(zone) ZoneList<CharacterRange>(3, zone);
    5682          99 :       CharacterRange::AddClassEscape('n', newline_ranges, false, zone);
    5683             :       JSRegExp::Flags default_flags = JSRegExp::Flags();
    5684             :       RegExpCharacterClass* newline_atom =
    5685             :           new (zone) RegExpCharacterClass('n', default_flags);
    5686             :       TextNode* newline_matcher = new (zone) TextNode(
    5687             :           newline_atom, false, ActionNode::PositiveSubmatchSuccess(
    5688             :                                    stack_pointer_register, position_register,
    5689             :                                    0,   // No captures inside.
    5690             :                                    -1,  // Ignored if no captures.
    5691         198 :                                    on_success));
    5692             :       // Create an end-of-input matcher.
    5693             :       RegExpNode* end_of_line = ActionNode::BeginSubmatch(
    5694             :           stack_pointer_register,
    5695             :           position_register,
    5696          99 :           newline_matcher);
    5697             :       // Add the two alternatives to the ChoiceNode.
    5698             :       GuardedAlternative eol_alternative(end_of_line);
    5699             :       result->AddAlternative(eol_alternative);
    5700          99 :       GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
    5701             :       result->AddAlternative(end_alternative);
    5702             :       return result;
    5703             :     }
    5704             :     default:
    5705           0 :       UNREACHABLE();
    5706             :   }
    5707             :   return on_success;
    5708             : }
    5709             : 
    5710             : 
    5711        2376 : RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
    5712             :                                         RegExpNode* on_success) {
    5713             :   return new (compiler->zone())
    5714             :       BackReferenceNode(RegExpCapture::StartRegister(index()),
    5715             :                         RegExpCapture::EndRegister(index()), flags_,
    5716        4752 :                         compiler->read_backward(), on_success);
    5717             : }
    5718             : 
    5719             : 
    5720        1050 : RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
    5721             :                                 RegExpNode* on_success) {
    5722        1050 :   return on_success;
    5723             : }
    5724             : 
    5725             : 
    5726        4368 : RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,
    5727             :                                    int stack_pointer_register,
    5728             :                                    int position_register,
    5729             :                                    int capture_register_count,
    5730             :                                    int capture_register_start)
    5731             :     : is_positive_(is_positive),
    5732             :       on_success_(on_success),
    5733             :       stack_pointer_register_(stack_pointer_register),
    5734        4368 :       position_register_(position_register) {
    5735        4368 :   if (is_positive_) {
    5736        1556 :     on_match_success_ = ActionNode::PositiveSubmatchSuccess(
    5737             :         stack_pointer_register, position_register, capture_register_count,
    5738        1556 :         capture_register_start, on_success_);
    5739             :   } else {
    5740             :     Zone* zone = on_success_->zone();
    5741             :     on_match_success_ = new (zone) NegativeSubmatchSuccess(
    5742             :         stack_pointer_register, position_register, capture_register_count,
    5743        2812 :         capture_register_start, zone);
    5744             :   }
    5745        4368 : }
    5746             : 
    5747             : 
    5748        4368 : RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {
    5749        4368 :   if (is_positive_) {
    5750        1556 :     return ActionNode::BeginSubmatch(stack_pointer_register_,
    5751        1556 :                                      position_register_, match);
    5752             :   } else {
    5753        2812 :     Zone* zone = on_success_->zone();
    5754             :     // We use a ChoiceNode to represent the negative lookaround. The first
    5755             :     // alternative is the negative match. On success, the end node backtracks.
    5756             :     // On failure, the second alternative is tried and leads to success.
    5757             :     // NegativeLookaheadChoiceNode is a special ChoiceNode that ignores the
    5758             :     // first exit when calculating quick checks.
    5759             :     ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(
    5760        2812 :         GuardedAlternative(match), GuardedAlternative(on_success_), zone);
    5761        2812 :     return ActionNode::BeginSubmatch(stack_pointer_register_,
    5762        2812 :                                      position_register_, choice_node);
    5763             :   }
    5764             : }
    5765             : 
    5766             : 
    5767        1668 : RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
    5768             :                                      RegExpNode* on_success) {
    5769             :   int stack_pointer_register = compiler->AllocateRegister();
    5770             :   int position_register = compiler->AllocateRegister();
    5771             : 
    5772             :   const int registers_per_capture = 2;
    5773             :   const int register_of_first_capture = 2;
    5774        1668 :   int register_count = capture_count_ * registers_per_capture;
    5775             :   int register_start =
    5776        1668 :     register_of_first_capture + capture_from_ * registers_per_capture;
    5777             : 
    5778             :   RegExpNode* result;
    5779             :   bool was_reading_backward = compiler->read_backward();
    5780        1668 :   compiler->set_read_backward(type() == LOOKBEHIND);
    5781             :   Builder builder(is_positive(), on_success, stack_pointer_register,
    5782        1668 :                   position_register, register_count, register_start);
    5783        1668 :   RegExpNode* match = body_->ToNode(compiler, builder.on_match_success());
    5784        1668 :   result = builder.ForMatch(match);
    5785             :   compiler->set_read_backward(was_reading_backward);
    5786        1668 :   return result;
    5787             : }
    5788             : 
    5789             : 
    5790       27196 : RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
    5791             :                                   RegExpNode* on_success) {
    5792       27196 :   return ToNode(body(), index(), compiler, on_success);
    5793             : }
    5794             : 
    5795             : 
    5796      112935 : RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
    5797             :                                   int index,
    5798             :                                   RegExpCompiler* compiler,
    5799             :                                   RegExpNode* on_success) {
    5800             :   DCHECK_NOT_NULL(body);
    5801             :   int start_reg = RegExpCapture::StartRegister(index);
    5802             :   int end_reg = RegExpCapture::EndRegister(index);
    5803      112935 :   if (compiler->read_backward()) std::swap(start_reg, end_reg);
    5804      112935 :   RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
    5805      112935 :   RegExpNode* body_node = body->ToNode(compiler, store_end);
    5806      112935 :   return ActionNode::StorePosition(start_reg, true, body_node);
    5807             : }
    5808             : 
    5809             : 
    5810       21356 : RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
    5811             :                                       RegExpNode* on_success) {
    5812             :   ZoneList<RegExpTree*>* children = nodes();
    5813             :   RegExpNode* current = on_success;
    5814       21356 :   if (compiler->read_backward()) {
    5815        1905 :     for (int i = 0; i < children->length(); i++) {
    5816         790 :       current = children->at(i)->ToNode(compiler, current);
    5817             :     }
    5818             :   } else {
    5819      997883 :     for (int i = children->length() - 1; i >= 0; i--) {
    5820      976852 :       current = children->at(i)->ToNode(compiler, current);
    5821             :     }
    5822             :   }
    5823       21356 :   return current;
    5824             : }
    5825             : 
    5826             : 
    5827        7425 : static void AddClass(const int* elmv,
    5828             :                      int elmc,
    5829             :                      ZoneList<CharacterRange>* ranges,
    5830             :                      Zone* zone) {
    5831        7425 :   elmc--;
    5832             :   DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
    5833       71985 :   for (int i = 0; i < elmc; i += 2) {
    5834             :     DCHECK(elmv[i] < elmv[i + 1]);
    5835       32280 :     ranges->Add(CharacterRange::Range(elmv[i], elmv[i + 1] - 1), zone);
    5836             :   }
    5837        7425 : }
    5838             : 
    5839             : 
    5840       20191 : static void AddClassNegated(const int *elmv,
    5841             :                             int elmc,
    5842             :                             ZoneList<CharacterRange>* ranges,
    5843             :                             Zone* zone) {
    5844       20191 :   elmc--;
    5845             :   DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
    5846             :   DCHECK_NE(0x0000, elmv[0]);
    5847             :   DCHECK_NE(String::kMaxCodePoint, elmv[elmc - 1]);
    5848             :   uc16 last = 0x0000;
    5849      151855 :   for (int i = 0; i < elmc; i += 2) {
    5850             :     DCHECK(last <= elmv[i] - 1);
    5851             :     DCHECK(elmv[i] < elmv[i + 1]);
    5852       65832 :     ranges->Add(CharacterRange::Range(last, elmv[i] - 1), zone);
    5853       65832 :     last = elmv[i + 1];
    5854             :   }
    5855       20191 :   ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone);
    5856       20191 : }
    5857             : 
    5858      110712 : void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
    5859             :                                     bool add_unicode_case_equivalents,
    5860             :                                     Zone* zone) {
    5861      110712 :   if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
    5862             :     // See #sec-runtime-semantics-wordcharacters-abstract-operation
    5863             :     // In case of unicode and ignore_case, we need to create the closure over
    5864             :     // case equivalent characters before negating.
    5865             :     ZoneList<CharacterRange>* new_ranges =
    5866             :         new (zone) ZoneList<CharacterRange>(2, zone);
    5867         240 :     AddClass(kWordRanges, kWordRangeCount, new_ranges, zone);
    5868         240 :     AddUnicodeCaseEquivalents(new_ranges, zone);
    5869         240 :     if (type == 'W') {
    5870             :       ZoneList<CharacterRange>* negated =
    5871             :           new (zone) ZoneList<CharacterRange>(2, zone);
    5872          90 :       CharacterRange::Negate(new_ranges, negated, zone);
    5873             :       new_ranges = negated;
    5874             :     }
    5875             :     ranges->AddAll(*new_ranges, zone);
    5876             :     return;
    5877             :   }
    5878      110472 :   AddClassEscape(type, ranges, zone);
    5879             : }
    5880             : 
    5881      110507 : void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
    5882             :                                     Zone* zone) {
    5883      110507 :   switch (type) {
    5884             :     case 's':
    5885        1709 :       AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
    5886        1709 :       break;
    5887             :     case 'S':
    5888         784 :       AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
    5889         784 :       break;
    5890             :     case 'w':
    5891        2786 :       AddClass(kWordRanges, kWordRangeCount, ranges, zone);
    5892        2786 :       break;
    5893             :     case 'W':
    5894         307 :       AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
    5895         307 :       break;
    5896             :     case 'd':
    5897        2492 :       AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
    5898        2492 :       break;
    5899             :     case 'D':
    5900         268 :       AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
    5901         268 :       break;
    5902             :     case '.':
    5903             :       AddClassNegated(kLineTerminatorRanges,
    5904             :                       kLineTerminatorRangeCount,
    5905             :                       ranges,
    5906       18832 :                       zone);
    5907       18832 :       break;
    5908             :     // This is not a character range as defined by the spec but a
    5909             :     // convenient shorthand for a character class that matches any
    5910             :     // character.
    5911             :     case '*':
    5912       83131 :       ranges->Add(CharacterRange::Everything(), zone);
    5913       83131 :       break;
    5914             :     // This is the set of characters matched by the $ and ^ symbols
    5915             :     // in multiline mode.
    5916             :     case 'n':
    5917             :       AddClass(kLineTerminatorRanges,
    5918             :                kLineTerminatorRangeCount,
    5919             :                ranges,
    5920         198 :                zone);
    5921         198 :       break;
    5922             :     default:
    5923           0 :       UNREACHABLE();
    5924             :   }
    5925      110507 : }
    5926             : 
    5927             : 
    5928           0 : Vector<const int> CharacterRange::GetWordBounds() {
    5929           0 :   return Vector<const int>(kWordRanges, kWordRangeCount - 1);
    5930             : }
    5931             : 
    5932             : // static
    5933       66930 : void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
    5934             :                                         ZoneList<CharacterRange>* ranges,
    5935             :                                         bool is_one_byte) {
    5936       66930 :   CharacterRange::Canonicalize(ranges);
    5937             :   int range_count = ranges->length();
    5938             : #ifdef V8_INTL_SUPPORT
    5939      133860 :   icu::UnicodeSet already_added;
    5940      133860 :   icu::UnicodeSet others;
    5941      210828 :   for (int i = 0; i < range_count; i++) {
    5942       71949 :     CharacterRange range = ranges->at(i);
    5943             :     uc32 bottom = range.from();
    5944       71949 :     if (bottom > String::kMaxUtf16CodeUnit) continue;
    5945             :     uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
    5946             :     // Nothing to be done for surrogates.
    5947       71949 :     if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) continue;
    5948       69902 :     if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
    5949        1354 :       if (bottom > String::kMaxOneByteCharCode) continue;
    5950        1243 :       if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
    5951             :     }
    5952       69791 :     already_added.add(bottom, top);
    5953    15709049 :     while (bottom <= top) {
    5954    15639258 :       icu::UnicodeString upper(bottom);
    5955     7819629 :       upper.toUpper();
    5956    15639258 :       icu::UnicodeSet expanded(bottom, bottom);
    5957     7819629 :       expanded.closeOver(USET_CASE_INSENSITIVE);
    5958    23798823 :       for (int32_t i = 0; i < expanded.getRangeCount(); i++) {
    5959     7989597 :         UChar32 start = expanded.getRangeStart(i);
    5960     7989597 :         UChar32 end = expanded.getRangeEnd(i);
    5961    24239465 :         while (start <= end) {
    5962    16249868 :           icu::UnicodeString upper2(start);
    5963     8124934 :           upper2.toUpper();
    5964             :           // Only add if the upper case are the same.
    5965     8124934 :           if (upper[0] == upper2[0]) {
    5966     8122177 :             others.add(start);
    5967             :           }
    5968     8124934 :           start++;
    5969             :         }
    5970             :       }
    5971     7819629 :       bottom++;
    5972             :     }
    5973             :   }
    5974       66930 :   others.removeAll(already_added);
    5975       77310 :   for (int32_t i = 0; i < others.getRangeCount(); i++) {
    5976        5190 :     UChar32 start = others.getRangeStart(i);
    5977        5190 :     UChar32 end = others.getRangeEnd(i);
    5978        5190 :     if (start == end) {
    5979        3686 :       ranges->Add(CharacterRange::Singleton(start), zone);
    5980             :     } else {
    5981        1504 :       ranges->Add(CharacterRange::Range(start, end), zone);
    5982             :     }
    5983             :   }
    5984             : #else
    5985             :   for (int i = 0; i < range_count; i++) {
    5986             :     CharacterRange range = ranges->at(i);
    5987             :     uc32 bottom = range.from();
    5988             :     if (bottom > String::kMaxUtf16CodeUnit) continue;
    5989             :     uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
    5990             :     // Nothing to be done for surrogates.
    5991             :     if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) continue;
    5992             :     if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
    5993             :       if (bottom > String::kMaxOneByteCharCode) continue;
    5994             :       if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
    5995             :     }
    5996             :     unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    5997             :     if (top == bottom) {
    5998             :       // If this is a singleton we just expand the one character.
    5999             :       int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
    6000             :       for (int i = 0; i < length; i++) {
    6001             :         uc32 chr = chars[i];
    6002             :         if (chr != bottom) {
    6003             :           ranges->Add(CharacterRange::Singleton(chars[i]), zone);
    6004             :         }
    6005             :       }
    6006             :     } else {
    6007             :       // If this is a range we expand the characters block by block, expanding
    6008             :       // contiguous subranges (blocks) one at a time.  The approach is as
    6009             :       // follows.  For a given start character we look up the remainder of the
    6010             :       // block that contains it (represented by the end point), for instance we
    6011             :       // find 'z' if the character is 'c'.  A block is characterized by the
    6012             :       // property that all characters uncanonicalize in the same way, except
    6013             :       // that each entry in the result is incremented by the distance from the
    6014             :       // first element.  So a-z is a block because 'a' uncanonicalizes to ['a',
    6015             :       // 'A'] and the k'th letter uncanonicalizes to ['a' + k, 'A' + k].  Once
    6016             :       // we've found the end point we look up its uncanonicalization and
    6017             :       // produce a range for each element.  For instance for [c-f] we look up
    6018             :       // ['z', 'Z'] and produce [c-f] and [C-F].  We then only add a range if
    6019             :       // it is not already contained in the input, so [c-f] will be skipped but
    6020             :       // [C-F] will be added.  If this range is not completely contained in a
    6021             :       // block we do this for all the blocks covered by the range (handling
    6022             :       // characters that is not in a block as a "singleton block").
    6023             :       unibrow::uchar equivalents[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    6024             :       int pos = bottom;
    6025             :       while (pos <= top) {
    6026             :         int length =
    6027             :             isolate->jsregexp_canonrange()->get(pos, '\0', equivalents);
    6028             :         uc32 block_end;
    6029             :         if (length == 0) {
    6030             :           block_end = pos;
    6031             :         } else {
    6032             :           DCHECK_EQ(1, length);
    6033             :           block_end = equivalents[0];
    6034             :         }
    6035             :         int end = (block_end > top) ? top : block_end;
    6036             :         length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0',
    6037             :                                                          equivalents);
    6038             :         for (int i = 0; i < length; i++) {
    6039             :           uc32 c = equivalents[i];
    6040             :           uc32 range_from = c - (block_end - pos);
    6041             :           uc32 range_to = c - (block_end - end);
    6042             :           if (!(bottom <= range_from && range_to <= top)) {
    6043             :             ranges->Add(CharacterRange::Range(range_from, range_to), zone);
    6044             :           }
    6045             :         }
    6046             :         pos = end + 1;
    6047             :       }
    6048             :     }
    6049             :   }
    6050             : #endif  // V8_INTL_SUPPORT
    6051       66930 : }
    6052             : 
    6053          10 : bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
    6054             :   DCHECK_NOT_NULL(ranges);
    6055             :   int n = ranges->length();
    6056          10 :   if (n <= 1) return true;
    6057             :   int max = ranges->at(0).to();
    6058         590 :   for (int i = 1; i < n; i++) {
    6059         290 :     CharacterRange next_range = ranges->at(i);
    6060         290 :     if (next_range.from() <= max + 1) return false;
    6061             :     max = next_range.to();
    6062             :   }
    6063             :   return true;
    6064             : }
    6065             : 
    6066             : 
    6067     1915955 : ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
    6068     1915955 :   if (ranges_ == nullptr) {
    6069       83020 :     ranges_ = new(zone) ZoneList<CharacterRange>(2, zone);
    6070       83020 :     CharacterRange::AddClassEscape(standard_set_type_, ranges_, false, zone);
    6071             :   }
    6072     1915955 :   return ranges_;
    6073             : }
    6074             : 
    6075             : 
    6076             : // Move a number of elements in a zonelist to another position
    6077             : // in the same list. Handles overlapping source and target areas.
    6078       93340 : static void MoveRanges(ZoneList<CharacterRange>* list,
    6079             :                        int from,
    6080             :                        int to,
    6081             :                        int count) {
    6082             :   // Ranges are potentially overlapping.
    6083       93340 :   if (from < to) {
    6084    10294689 :     for (int i = count - 1; i >= 0; i--) {
    6085    30639657 :       list->at(to + i) = list->at(from + i);
    6086             :     }
    6087             :   } else {
    6088     7322102 :     for (int i = 0; i < count; i++) {
    6089    10965348 :       list->at(to + i) = list->at(from + i);
    6090             :     }
    6091             :   }
    6092       93340 : }
    6093             : 
    6094             : 
    6095      171443 : static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
    6096             :                                       int count,
    6097             :                                       CharacterRange insert) {
    6098             :   // Inserts a range into list[0..count[, which must be sorted
    6099             :   // by from value and non-overlapping and non-adjacent, using at most
    6100             :   // list[0..count] for the result. Returns the number of resulting
    6101             :   // canonicalized ranges. Inserting a range may collapse existing ranges into
    6102             :   // fewer ranges, so the return value can be anything in the range 1..count+1.
    6103             :   uc32 from = insert.from();
    6104             :   uc32 to = insert.to();
    6105             :   int start_pos = 0;
    6106             :   int end_pos = count;
    6107    18369192 :   for (int i = count - 1; i >= 0; i--) {
    6108    18284785 :     CharacterRange current = list->at(i);
    6109    18284785 :     if (current.from() > to + 1) {
    6110             :       end_pos = i;
    6111      136625 :     } else if (current.to() + 1 < from) {
    6112       87036 :       start_pos = i + 1;
    6113             :       break;
    6114             :     }
    6115             :   }
    6116             : 
    6117             :   // Inserted range overlaps, or is adjacent to, ranges at positions
    6118             :   // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
    6119             :   // not affected by the insertion.
    6120             :   // If start_pos == end_pos, the range must be inserted before start_pos.
    6121             :   // if start_pos < end_pos, the entire range from start_pos to end_pos
    6122             :   // must be merged with the insert range.
    6123             : 
    6124      171443 :   if (start_pos == end_pos) {
    6125             :     // Insert between existing ranges at position start_pos.
    6126      134102 :     if (start_pos < count) {
    6127       81470 :       MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
    6128             :     }
    6129      134102 :     list->at(start_pos) = insert;
    6130      134102 :     return count + 1;
    6131             :   }
    6132       37341 :   if (start_pos + 1 == end_pos) {
    6133             :     // Replace single existing range at position start_pos.
    6134       25338 :     CharacterRange to_replace = list->at(start_pos);
    6135             :     int new_from = Min(to_replace.from(), from);
    6136             :     int new_to = Max(to_replace.to(), to);
    6137       25338 :     list->at(start_pos) = CharacterRange::Range(new_from, new_to);
    6138             :     return count;
    6139             :   }
    6140             :   // Replace a number of existing ranges from start_pos to end_pos - 1.
    6141             :   // Move the remaining ranges down.
    6142             : 
    6143             :   int new_from = Min(list->at(start_pos).from(), from);
    6144       12003 :   int new_to = Max(list->at(end_pos - 1).to(), to);
    6145       12003 :   if (end_pos < count) {
    6146       11870 :     MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
    6147             :   }
    6148       12003 :   list->at(start_pos) = CharacterRange::Range(new_from, new_to);
    6149       12003 :   return count - (end_pos - start_pos) + 1;
    6150             : }
    6151             : 
    6152             : 
    6153          20 : void CharacterSet::Canonicalize() {
    6154             :   // Special/default classes are always considered canonical. The result
    6155             :   // of calling ranges() will be sorted.
    6156      176280 :   if (ranges_ == nullptr) return;
    6157       93508 :   CharacterRange::Canonicalize(ranges_);
    6158             : }
    6159             : 
    6160             : 
    6161      499726 : void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
    6162      499726 :   if (character_ranges->length() <= 1) return;
    6163             :   // Check whether ranges are already canonical (increasing, non-overlapping,
    6164             :   // non-adjacent).
    6165             :   int n = character_ranges->length();
    6166             :   int max = character_ranges->at(0).to();
    6167             :   int i = 1;
    6168     2645496 :   while (i < n) {
    6169     1299626 :     CharacterRange current = character_ranges->at(i);
    6170     1299626 :     if (current.from() <= max + 1) {
    6171             :       break;
    6172             :     }
    6173             :     max = current.to();
    6174     1290428 :     i++;
    6175             :   }
    6176             :   // Canonical until the i'th range. If that's all of them, we are done.
    6177       64640 :   if (i == n) return;
    6178             : 
    6179             :   // The ranges at index i and forward are not canonicalized. Make them so by
    6180             :   // doing the equivalent of insertion sort (inserting each into the previous
    6181             :   // list, in order).
    6182             :   // Notice that inserting a range can reduce the number of ranges in the
    6183             :   // result due to combining of adjacent and overlapping ranges.
    6184             :   int read = i;  // Range to insert.
    6185             :   int num_canonical = i;  // Length of canonicalized part of list.
    6186             :   do {
    6187             :     num_canonical = InsertRangeInCanonicalList(character_ranges,
    6188             :                                                num_canonical,
    6189      171443 :                                                character_ranges->at(read));
    6190      171443 :     read++;
    6191      171443 :   } while (read < n);
    6192             :   character_ranges->Rewind(num_canonical);
    6193             : 
    6194             :   DCHECK(CharacterRange::IsCanonical(character_ranges));
    6195             : }
    6196             : 
    6197             : 
    6198         230 : void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
    6199             :                             ZoneList<CharacterRange>* negated_ranges,
    6200             :                             Zone* zone) {
    6201             :   DCHECK(CharacterRange::IsCanonical(ranges));
    6202             :   DCHECK_EQ(0, negated_ranges->length());
    6203             :   int range_count = ranges->length();
    6204             :   uc32 from = 0;
    6205             :   int i = 0;
    6206         230 :   if (range_count > 0 && ranges->at(0).from() == 0) {
    6207          40 :     from = ranges->at(0).to() + 1;
    6208             :     i = 1;
    6209             :   }
    6210       14890 :   while (i < range_count) {
    6211        7330 :     CharacterRange range = ranges->at(i);
    6212        7330 :     negated_ranges->Add(CharacterRange::Range(from, range.from() - 1), zone);
    6213        7330 :     from = range.to() + 1;
    6214        7330 :     i++;
    6215             :   }
    6216         230 :   if (from < String::kMaxCodePoint) {
    6217         360 :     negated_ranges->Add(CharacterRange::Range(from, String::kMaxCodePoint),
    6218         180 :                         zone);
    6219             :   }
    6220         230 : }
    6221             : 
    6222             : 
    6223             : // -------------------------------------------------------------------
    6224             : // Splay tree
    6225             : 
    6226             : 
    6227      239769 : OutSet* OutSet::Extend(unsigned value, Zone* zone) {
    6228      239769 :   if (Get(value))
    6229             :     return this;
    6230      239764 :   if (successors(zone) != nullptr) {
    6231      633180 :     for (int i = 0; i < successors(zone)->length(); i++) {
    6232      422322 :       OutSet* successor = successors(zone)->at(i);
    6233      422322 :       if (successor->Get(value))
    6234             :         return successor;
    6235             :     }
    6236             :   } else {
    6237        5694 :     successors_ = new(zone) ZoneList<OutSet*>(2, zone);
    6238             :   }
    6239       33994 :   OutSet* result = new(zone) OutSet(first_, remaining_);
    6240       16997 :   result->Set(value, zone);
    6241       16997 :   successors(zone)->Add(result, zone);
    6242       16997 :   return result;
    6243             : }
    6244             : 
    6245             : 
    6246      715940 : void OutSet::Set(unsigned value, Zone *zone) {
    6247      715940 :   if (value < kFirstLimit) {
    6248      391306 :     first_ |= (1 << value);
    6249             :   } else {
    6250      324634 :     if (remaining_ == nullptr)
    6251       84582 :       remaining_ = new(zone) ZoneList<unsigned>(1, zone);
    6252      889320 :     if (remaining_->is_empty() || !remaining_->Contains(value))
    6253      323584 :       remaining_->Add(value, zone);
    6254             :   }
    6255      715940 : }
    6256             : 
    6257             : 
    6258       90010 : bool OutSet::Get(unsigned value) const {
    6259    30858331 :   if (value < kFirstLimit) {
    6260     6653273 :     return (first_ & (1 << value)) != 0;
    6261    24501487 :   } else if (remaining_ == nullptr) {
    6262             :     return false;
    6263             :   } else {
    6264           0 :     return remaining_->Contains(value);
    6265             :   }
    6266             : }
    6267             : 
    6268             : 
    6269             : const uc32 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;
    6270             : 
    6271             : 
    6272       89403 : void DispatchTable::AddRange(CharacterRange full_range, int value,
    6273             :                              Zone* zone) {
    6274       89403 :   CharacterRange current = full_range;
    6275       89403 :   if (tree()->is_empty()) {
    6276             :     // If this is the first range we just insert into the table.
    6277             :     ZoneSplayTree<Config>::Locator loc;
    6278        2647 :     bool inserted = tree()->Insert(current.from(), &loc);
    6279             :     DCHECK(inserted);
    6280             :     USE(inserted);
    6281        5294 :     loc.set_value(Entry(current.from(), current.to(),
    6282             :                         empty()->Extend(value, zone)));
    6283             :     return;
    6284             :   }
    6285             :   // First see if there is a range to the left of this one that
    6286             :   // overlaps.
    6287             :   ZoneSplayTree<Config>::Locator loc;
    6288       86756 :   if (tree()->FindGreatestLessThan(current.from(), &loc)) {
    6289             :     Entry* entry = &loc.value();
    6290             :     // If we've found a range that overlaps with this one, and it
    6291             :     // starts strictly to the left of this one, we have to fix it
    6292             :     // because the following code only handles ranges that start on
    6293             :     // or after the start point of the range we're adding.
    6294       85154 :     if (entry->from() < current.from() && entry->to() >= current.from()) {
    6295             :       // Snap the overlapping range in half around the start point of
    6296             :       // the range we're adding.
    6297             :       CharacterRange left =
    6298         400 :           CharacterRange::Range(entry->from(), current.from() - 1);
    6299             :       CharacterRange right = CharacterRange::Range(current.from(), entry->to());
    6300             :       // The left part of the overlapping range doesn't overlap.
    6301             :       // Truncate the whole entry to be just the left part.
    6302             :       entry->set_to(left.to());
    6303             :       // The right part is the one that overlaps.  We add this part
    6304             :       // to the map and let the next step deal with merging it with
    6305             :       // the range we're adding.
    6306             :       ZoneSplayTree<Config>::Locator loc;
    6307         400 :       bool inserted = tree()->Insert(right.from(), &loc);
    6308             :       DCHECK(inserted);
    6309             :       USE(inserted);
    6310             :       loc.set_value(Entry(right.from(),
    6311             :                           right.to(),
    6312             :                           entry->out_set()));
    6313             :     }
    6314             :   }
    6315      168614 :   while (current.is_valid()) {
    6316      411141 :     if (tree()->FindLeastGreaterThan(current.from(), &loc) &&
    6317      244912 :         (loc.value().from() <= current.to()) &&
    6318             :         (loc.value().to() >= current.from())) {
    6319             :       Entry* entry = &loc.value();
    6320             :       // We have overlap.  If there is space between the start point of
    6321             :       // the range we're adding and where the overlapping range starts
    6322             :       // then we have to add a range covering just that space.
    6323       81858 :       if (current.from() < entry->from()) {
    6324             :         ZoneSplayTree<Config>::Locator ins;
    6325       74068 :         bool inserted = tree()->Insert(current.from(), &ins);
    6326             :         DCHECK(inserted);
    6327             :         USE(inserted);
    6328      148136 :         ins.set_value(Entry(current.from(),
    6329             :                             entry->from() - 1,
    6330             :                             empty()->Extend(value, zone)));
    6331             :         current.set_from(entry->from());
    6332             :       }
    6333             :       DCHECK_EQ(current.from(), entry->from());
    6334             :       // If the overlapping range extends beyond the one we want to add
    6335             :       // we have to snap the right part off and add it separately.
    6336       81858 :       if (entry->to() > current.to()) {
    6337             :         ZoneSplayTree<Config>::Locator ins;
    6338        4430 :         bool inserted = tree()->Insert(current.to() + 1, &ins);
    6339             :         DCHECK(inserted);
    6340             :         USE(inserted);
    6341             :         ins.set_value(Entry(current.to() + 1,
    6342             :                             entry->to(),
    6343             :                             entry->out_set()));
    6344             :         entry->set_to(current.to());
    6345             :       }
    6346             :       DCHECK(entry->to() <= current.to());
    6347             :       // The overlapping range is now completely contained by the range
    6348             :       // we're adding so we can just update it and move the start point
    6349             :       // of the range we're adding just past it.
    6350             :       entry->AddValue(value, zone);
    6351             :       DCHECK(entry->to() + 1 > current.from());
    6352       81858 :       current.set_from(entry->to() + 1);
    6353             :     } else {
    6354             :       // There is no overlap so we can just add the range
    6355             :       ZoneSplayTree<Config>::Locator ins;
    6356       81196 :       bool inserted = tree()->Insert(current.from(), &ins);
    6357             :       DCHECK(inserted);
    6358             :       USE(inserted);
    6359      162392 :       ins.set_value(Entry(current.from(),
    6360             :                           current.to(),
    6361             :                           empty()->Extend(value, zone)));
    6362             :       break;
    6363             :     }
    6364             :   }
    6365             : }
    6366             : 
    6367             : 
    6368       55010 : OutSet* DispatchTable::Get(uc32 value) {
    6369             :   ZoneSplayTree<Config>::Locator loc;
    6370       55010 :   if (!tree()->FindGreatestLessThan(value, &loc))
    6371           0 :     return empty();
    6372             :   Entry* entry = &loc.value();
    6373       55010 :   if (value <= entry->to())
    6374       38885 :     return entry->out_set();
    6375             :   else
    6376       16125 :     return empty();
    6377             : }
    6378             : 
    6379             : 
    6380             : // -------------------------------------------------------------------
    6381             : // Analysis
    6382             : 
    6383             : 
    6384     1078981 : void Analysis::EnsureAnalyzed(RegExpNode* that) {
    6385             :   StackLimitCheck check(isolate());
    6386     1078981 :   if (check.HasOverflowed()) {
    6387             :     fail("Stack overflow");
    6388             :     return;
    6389             :   }
    6390     1078641 :   if (that->info()->been_analyzed || that->info()->being_analyzed)
    6391             :     return;
    6392      873370 :   that->info()->being_analyzed = true;
    6393      873370 :   that->Accept(this);
    6394      873370 :   that->info()->being_analyzed = false;
    6395      873370 :   that->info()->been_analyzed = true;
    6396             : }
    6397             : 
    6398             : 
    6399       88201 : void Analysis::VisitEnd(EndNode* that) {
    6400             :   // nothing to do
    6401       88201 : }
    6402             : 
    6403             : 
    6404      317143 : void TextNode::CalculateOffsets() {
    6405             :   int element_count = elements()->length();
    6406             :   // Set up the offsets of the elements relative to the start.  This is a fixed
    6407             :   // quantity since a TextNode can only contain fixed-width things.
    6408             :   int cp_offset = 0;
    6409     1076985 :   for (int i = 0; i < element_count; i++) {
    6410             :     TextElement& elm = elements()->at(i);
    6411             :     elm.set_cp_offset(cp_offset);
    6412      379921 :     cp_offset += elm.length();
    6413             :   }
    6414      317143 : }
    6415             : 
    6416             : 
    6417      319047 : void Analysis::VisitText(TextNode* that) {
    6418      319047 :   that->MakeCaseIndependent(isolate(), is_one_byte_);
    6419      319047 :   EnsureAnalyzed(that->on_success());
    6420      319047 :   if (!has_failed()) {
    6421      317143 :     that->CalculateOffsets();
    6422             :   }
    6423      319047 : }
    6424             : 
    6425             : 
    6426      286673 : void Analysis::VisitAction(ActionNode* that) {
    6427             :   RegExpNode* target = that->on_success();
    6428      286673 :   EnsureAnalyzed(target);
    6429      286673 :   if (!has_failed()) {
    6430             :     // If the next node is interested in what it follows then this node
    6431             :     // has to be interested too so it can pass the information on.
    6432             :     that->info()->AddFromFollowing(target->info());
    6433             :   }
    6434      286673 : }
    6435             : 
    6436             : 
    6437       25781 : void Analysis::VisitChoice(ChoiceNode* that) {
    6438             :   NodeInfo* info = that->info();
    6439      296373 :   for (int i = 0; i < that->alternatives()->length(); i++) {
    6440             :     RegExpNode* node = that->alternatives()->at(i).node();
    6441      135296 :     EnsureAnalyzed(node);
    6442      135296 :     if (has_failed()) return;
    6443             :     // Anything the following nodes need to know has to be known by
    6444             :     // this node also, so it can pass it on.
    6445             :     info->AddFromFollowing(node->info());
    6446             :   }
    6447             : }
    6448             : 
    6449             : 
    6450      145900 : void Analysis::VisitLoopChoice(LoopChoiceNode* that) {
    6451             :   NodeInfo* info = that->info();
    6452      634416 :   for (int i = 0; i < that->alternatives()->length(); i++) {
    6453             :     RegExpNode* node = that->alternatives()->at(i).node();
    6454      291600 :     if (node != that->loop_node()) {
    6455      145900 :       EnsureAnalyzed(node);
    6456      145900 :       if (has_failed()) return;
    6457             :       info->AddFromFollowing(node->info());
    6458             :     }
    6459             :   }
    6460             :   // Check the loop last since it may need the value of this node
    6461             :   // to get a correct result.
    6462       98558 :   EnsureAnalyzed(that->loop_node());
    6463       98558 :   if (!has_failed()) {
    6464             :     info->AddFromFollowing(that->loop_node()->info());
    6465             :   }
    6466             : }
    6467             : 
    6468             : 
    6469        2321 : void Analysis::VisitBackReference(BackReferenceNode* that) {
    6470        2321 :   EnsureAnalyzed(that->on_success());
    6471        2321 : }
    6472             : 
    6473             : 
    6474        5447 : void Analysis::VisitAssertion(AssertionNode* that) {
    6475        5447 :   EnsureAnalyzed(that->on_success());
    6476        5447 : }
    6477             : 
    6478             : 
    6479         188 : void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
    6480             :                                      BoyerMooreLookahead* bm,
    6481             :                                      bool not_at_start) {
    6482             :   // Working out the set of characters that a backreference can match is too
    6483             :   // hard, so we just say that any character can match.
    6484             :   bm->SetRest(offset);
    6485             :   SaveBMInfo(bm, not_at_start, offset);
    6486         188 : }
    6487             : 
    6488             : 
    6489             : STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
    6490             :               RegExpMacroAssembler::kTableSize);
    6491             : 
    6492             : 
    6493        7838 : void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
    6494             :                               BoyerMooreLookahead* bm, bool not_at_start) {
    6495             :   ZoneList<GuardedAlternative>* alts = alternatives();
    6496       15676 :   budget = (budget - 1) / alts->length();
    6497       91230 :   for (int i = 0; i < alts->length(); i++) {
    6498             :     GuardedAlternative& alt = alts->at(i);
    6499       42168 :     if (alt.guards() != nullptr && alt.guards()->length() != 0) {
    6500             :       bm->SetRest(offset);  // Give up trying to fill in info.
    6501             :       SaveBMInfo(bm, not_at_start, offset);
    6502             :       return;
    6503             :     }
    6504       41696 :     alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start);
    6505             :   }
    6506             :   SaveBMInfo(bm, not_at_start, offset);
    6507             : }
    6508             : 
    6509             : 
    6510      123610 : void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
    6511             :                             BoyerMooreLookahead* bm, bool not_at_start) {
    6512      123610 :   if (initial_offset >= bm->length()) return;
    6513             :   int offset = initial_offset;
    6514             :   int max_char = bm->max_char();
    6515      405222 :   for (int i = 0; i < elements()->length(); i++) {
    6516      164635 :     if (offset >= bm->length()) {
    6517       17472 :       if (initial_offset == 0) set_bm_info(not_at_start, bm);
    6518             :       return;
    6519             :     }
    6520      147163 :     TextElement text = elements()->at(i);
    6521      147163 :     if (text.text_type() == TextElement::ATOM) {
    6522             :       RegExpAtom* atom = text.atom();
    6523      213973 :       for (int j = 0; j < atom->length(); j++, offset++) {
    6524       89183 :         if (offset >= bm->length()) {
    6525        6357 :           if (initial_offset == 0) set_bm_info(not_at_start, bm);
    6526             :           return;
    6527             :         }
    6528      165652 :         uc16 character = atom->data()[j];
    6529       82826 :         if (IgnoreCase(atom->flags())) {
    6530             :           unibrow::uchar chars[4];
    6531        4735 :           int length = GetCaseIndependentLetters(
    6532             :               isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
    6533        4735 :               chars, 4);
    6534       22485 :           for (int j = 0; j < length; j++) {
    6535        8875 :             bm->Set(offset, chars[j]);
    6536             :           }
    6537             :         } else {
    6538       78091 :           if (character <= max_char) bm->Set(offset, character);
    6539             :         }
    6540             :       }
    6541             :     } else {
    6542             :       DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type());
    6543             :       RegExpCharacterClass* char_class = text.char_class();
    6544             :       ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
    6545       98842 :       if (char_class->is_negated()) {
    6546        4382 :         bm->SetAll(offset);
    6547             :       } else {
    6548      673422 :         for (int k = 0; k < ranges->length(); k++) {
    6549             :           CharacterRange& range = ranges->at(k);
    6550      289481 :           if (range.from() > max_char) continue;
    6551             :           int to = Min(max_char, static_cast<int>(range.to()));
    6552      161207 :           bm->SetInterval(offset, Interval(range.from(), to));
    6553             :         }
    6554             :       }
    6555       98842 :       offset++;
    6556             :     }
    6557             :   }
    6558       99781 :   if (offset >= bm->length()) {
    6559       90396 :     if (initial_offset == 0) set_bm_info(not_at_start, bm);
    6560             :     return;
    6561             :   }
    6562        9385 :   on_success()->FillInBMInfo(isolate, offset, budget - 1, bm,
    6563       18770 :                              true);  // Not at start after a text node.
    6564        9385 :   if (initial_offset == 0) set_bm_info(not_at_start, bm);
    6565             : }
    6566             : 
    6567             : 
    6568             : // -------------------------------------------------------------------
    6569             : // Dispatch table construction
    6570             : 
    6571             : 
    6572           0 : void DispatchTableConstructor::VisitEnd(EndNode* that) {
    6573             :   AddRange(CharacterRange::Everything());
    6574           0 : }
    6575             : 
    6576             : 
    6577           0 : void DispatchTableConstructor::BuildTable(ChoiceNode* node) {
    6578             :   node->set_being_calculated(true);
    6579             :   ZoneList<GuardedAlternative>* alternatives = node->alternatives();
    6580           0 :   for (int i = 0; i < alternatives->length(); i++) {
    6581             :     set_choice_index(i);
    6582           0 :     alternatives->at(i).node()->Accept(this);
    6583             :   }
    6584             :   node->set_being_calculated(false);
    6585           0 : }
    6586             : 
    6587             : 
    6588             : class AddDispatchRange {
    6589             :  public:
    6590             :   explicit AddDispatchRange(DispatchTableConstructor* constructor)
    6591           0 :     : constructor_(constructor) { }
    6592             :   void Call(uc32 from, DispatchTable::Entry entry);
    6593             :  private:
    6594             :   DispatchTableConstructor* constructor_;
    6595             : };
    6596             : 
    6597             : 
    6598           0 : void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) {
    6599           0 :   constructor_->AddRange(CharacterRange::Range(from, entry.to()));
    6600           0 : }
    6601             : 
    6602             : 
    6603           0 : void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {
    6604           0 :   if (node->being_calculated())
    6605           0 :     return;
    6606           0 :   DispatchTable* table = node->GetTable(ignore_case_);
    6607             :   AddDispatchRange adder(this);
    6608             :   table->ForEach(&adder);
    6609             : }
    6610             : 
    6611             : 
    6612           0 : void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
    6613             :   // TODO(160): Find the node that we refer back to and propagate its start
    6614             :   // set back to here.  For now we just accept anything.
    6615             :   AddRange(CharacterRange::Everything());
    6616           0 : }
    6617             : 
    6618             : 
    6619           0 : void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
    6620             :   RegExpNode* target = that->on_success();
    6621           0 :   target->Accept(this);
    6622           0 : }
    6623             : 
    6624             : 
    6625        3935 : static int CompareRangeByFrom(const CharacterRange* a,
    6626             :                               const CharacterRange* b) {
    6627        7870 :   return Compare<uc16>(a->from(), b->from());
    6628             : }
    6629             : 
    6630             : 
    6631          55 : void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
    6632             :   ranges->Sort(CompareRangeByFrom);
    6633             :   uc16 last = 0;
    6634        1665 :   for (int i = 0; i < ranges->length(); i++) {
    6635         805 :     CharacterRange range = ranges->at(i);
    6636         805 :     if (last < range.from())
    6637         525 :       AddRange(CharacterRange::Range(last, range.from() - 1));
    6638         805 :     if (range.to() >= last) {
    6639         715 :       if (range.to() == String::kMaxCodePoint) {
    6640             :         return;
    6641             :       } else {
    6642         715 :         last = range.to() + 1;
    6643             :       }
    6644             :     }
    6645             :   }
    6646          55 :   AddRange(CharacterRange::Range(last, String::kMaxCodePoint));
    6647             : }
    6648             : 
    6649             : 
    6650           0 : void DispatchTableConstructor::VisitText(TextNode* that) {
    6651           0 :   TextElement elm = that->elements()->at(0);
    6652           0 :   switch (elm.text_type()) {
    6653             :     case TextElement::ATOM: {
    6654           0 :       uc16 c = elm.atom()->data()[0];
    6655           0 :       AddRange(CharacterRange::Range(c, c));
    6656             :       break;
    6657             :     }
    6658             :     case TextElement::CHAR_CLASS: {
    6659             :       RegExpCharacterClass* tree = elm.char_class();
    6660             :       ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());
    6661           0 :       if (tree->is_negated()) {
    6662           0 :         AddInverse(ranges);
    6663             :       } else {
    6664           0 :         for (int i = 0; i < ranges->length(); i++)
    6665             :           AddRange(ranges->at(i));
    6666             :       }
    6667             :       break;
    6668             :     }
    6669             :     default: {
    6670           0 :       UNIMPLEMENTED();
    6671             :     }
    6672             :   }
    6673           0 : }
    6674             : 
    6675             : 
    6676           0 : void DispatchTableConstructor::VisitAction(ActionNode* that) {
    6677             :   RegExpNode* target = that->on_success();
    6678           0 :   target->Accept(this);
    6679           0 : }
    6680             : 
    6681          40 : RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler,
    6682             :                                               RegExpNode* on_success,
    6683             :                                               JSRegExp::Flags flags) {
    6684             :   // If the regexp matching starts within a surrogate pair, step back
    6685             :   // to the lead surrogate and start matching from there.
    6686             :   DCHECK(!compiler->read_backward());
    6687             :   Zone* zone = compiler->zone();
    6688             :   ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
    6689          40 :       zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
    6690             :   ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
    6691          40 :       zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
    6692             : 
    6693          40 :   ChoiceNode* optional_step_back = new (zone) ChoiceNode(2, zone);
    6694             : 
    6695             :   int stack_register = compiler->UnicodeLookaroundStackRegister();
    6696             :   int position_register = compiler->UnicodeLookaroundPositionRegister();
    6697             :   RegExpNode* step_back = TextNode::CreateForCharacterRanges(
    6698          40 :       zone, lead_surrogates, true, on_success, flags);
    6699             :   RegExpLookaround::Builder builder(true, step_back, stack_register,
    6700          40 :                                     position_register);
    6701             :   RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
    6702          40 :       zone, trail_surrogates, false, builder.on_match_success(), flags);
    6703             : 
    6704          40 :   optional_step_back->AddAlternative(
    6705             :       GuardedAlternative(builder.ForMatch(match_trail)));
    6706             :   optional_step_back->AddAlternative(GuardedAlternative(on_success));
    6707             : 
    6708          40 :   return optional_step_back;
    6709             : }
    6710             : 
    6711             : 
    6712       85748 : RegExpEngine::CompilationResult RegExpEngine::Compile(
    6713             :     Isolate* isolate, Zone* zone, RegExpCompileData* data,
    6714             :     JSRegExp::Flags flags, Handle<String> pattern,
    6715             :     Handle<String> sample_subject, bool is_one_byte) {
    6716       85748 :   if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
    6717             :     return IrregexpRegExpTooBig(isolate);
    6718             :   }
    6719             :   bool is_sticky = IsSticky(flags);
    6720             :   bool is_global = IsGlobal(flags);
    6721             :   bool is_unicode = IsUnicode(flags);
    6722       85739 :   RegExpCompiler compiler(isolate, zone, data->capture_count, is_one_byte);
    6723             : 
    6724       85739 :   if (compiler.optimize())
    6725       84574 :     compiler.set_optimize(!TooMuchRegExpCode(isolate, pattern));
    6726             : 
    6727             :   // Sample some characters from the middle of the string.
    6728             :   static const int kSampleSize = 128;
    6729             : 
    6730       85739 :   sample_subject = String::Flatten(isolate, sample_subject);
    6731             :   int chars_sampled = 0;
    6732       85739 :   int half_way = (sample_subject->length() - kSampleSize) / 2;
    6733      999657 :   for (int i = Max(0, half_way);
    6734      542698 :        i < sample_subject->length() && chars_sampled < kSampleSize;
    6735             :        i++, chars_sampled++) {
    6736             :     compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
    6737             :   }
    6738             : 
    6739             :   // Wrap the body of the regexp in capture #0.
    6740       85739 :   RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
    6741             :                                                     0,
    6742             :                                                     &compiler,
    6743       85739 :                                                     compiler.accept());
    6744             :   RegExpNode* node = captured_body;
    6745       85739 :   bool is_end_anchored = data->tree->IsAnchoredAtEnd();
    6746       85739 :   bool is_start_anchored = data->tree->IsAnchoredAtStart();
    6747       85739 :   int max_length = data->tree->max_match();
    6748       85739 :   if (!is_start_anchored && !is_sticky) {
    6749             :     // Add a .*? at the beginning, outside the body capture, unless
    6750             :     // this expression is anchored at the beginning or sticky.
    6751             :     JSRegExp::Flags default_flags = JSRegExp::Flags();
    6752       82772 :     RegExpNode* loop_node = RegExpQuantifier::ToNode(
    6753             :         0, RegExpTree::kInfinity, false,
    6754             :         new (zone) RegExpCharacterClass('*', default_flags), &compiler,
    6755      165544 :         captured_body, data->contains_anchor);
    6756             : 
    6757       82772 :     if (data->contains_anchor) {
    6758             :       // Unroll loop once, to take care of the case that might start
    6759             :       // at the start of input.
    6760         149 :       ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
    6761             :       first_step_node->AddAlternative(GuardedAlternative(captured_body));
    6762             :       first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
    6763             :           new (zone) RegExpCharacterClass('*', default_flags), false,
    6764         149 :           loop_node)));
    6765             :       node = first_step_node;
    6766             :     } else {
    6767             :       node = loop_node;
    6768             :     }
    6769             :   }
    6770       85739 :   if (is_one_byte) {
    6771       14663 :     node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
    6772             :     // Do it again to propagate the new nodes to places where they were not
    6773             :     // put because they had not been calculated yet.
    6774       14663 :     if (node != nullptr) {
    6775       14363 :       node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
    6776             :     }
    6777       71076 :   } else if (is_unicode && (is_global || is_sticky)) {
    6778          40 :     node = OptionallyStepBackToLeadSurrogate(&compiler, node, flags);
    6779             :   }
    6780             : 
    6781       85739 :   if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
    6782       85739 :   data->node = node;
    6783             :   Analysis analysis(isolate, is_one_byte);
    6784       85739 :   analysis.EnsureAnalyzed(node);
    6785       85739 :   if (analysis.has_failed()) {
    6786             :     const char* error_message = analysis.error_message();
    6787             :     return CompilationResult(isolate, error_message);
    6788             :   }
    6789             : 
    6790             :   // Create the correct assembler for the architecture.
    6791             :   std::unique_ptr<RegExpMacroAssembler> macro_assembler;
    6792       85399 :   if (!FLAG_regexp_interpret_all) {
    6793             :     // Native regexp implementation.
    6794             :     DCHECK(!FLAG_jitless);
    6795             : 
    6796             :     NativeRegExpMacroAssembler::Mode mode =
    6797             :         is_one_byte ? NativeRegExpMacroAssembler::LATIN1
    6798       82129 :                     : NativeRegExpMacroAssembler::UC16;
    6799             : 
    6800             : #if V8_TARGET_ARCH_IA32
    6801             :     macro_assembler.reset(new RegExpMacroAssemblerIA32(
    6802             :         isolate, zone, mode, (data->capture_count + 1) * 2));
    6803             : #elif V8_TARGET_ARCH_X64
    6804       82129 :     macro_assembler.reset(new RegExpMacroAssemblerX64(
    6805       82129 :         isolate, zone, mode, (data->capture_count + 1) * 2));
    6806             : #elif V8_TARGET_ARCH_ARM
    6807             :     macro_assembler.reset(new RegExpMacroAssemblerARM(
    6808             :         isolate, zone, mode, (data->capture_count + 1) * 2));
    6809             : #elif V8_TARGET_ARCH_ARM64
    6810             :     macro_assembler.reset(new RegExpMacroAssemblerARM64(
    6811             :         isolate, zone, mode, (data->capture_count + 1) * 2));
    6812             : #elif V8_TARGET_ARCH_S390
    6813             :     macro_assembler.reset(new RegExpMacroAssemblerS390(
    6814             :         isolate, zone, mode, (data->capture_count + 1) * 2));
    6815             : #elif V8_TARGET_ARCH_PPC
    6816             :     macro_assembler.reset(new RegExpMacroAssemblerPPC(
    6817             :         isolate, zone, mode, (data->capture_count + 1) * 2));
    6818             : #elif V8_TARGET_ARCH_MIPS
    6819             :     macro_assembler.reset(new RegExpMacroAssemblerMIPS(
    6820             :         isolate, zone, mode, (data->capture_count + 1) * 2));
    6821             : #elif V8_TARGET_ARCH_MIPS64
    6822             :     macro_assembler.reset(new RegExpMacroAssemblerMIPS(
    6823             :         isolate, zone, mode, (data->capture_count + 1) * 2));
    6824             : #else
    6825             : #error "Unsupported architecture"
    6826             : #endif
    6827             :   } else {
    6828             :     DCHECK(FLAG_regexp_interpret_all);
    6829             : 
    6830             :     // Interpreted regexp implementation.
    6831        3270 :     macro_assembler.reset(new RegExpMacroAssemblerIrregexp(isolate, zone));
    6832             :   }
    6833             : 
    6834       85399 :   macro_assembler->set_slow_safe(TooMuchRegExpCode(isolate, pattern));
    6835             : 
    6836             :   // Inserted here, instead of in Assembler, because it depends on information
    6837             :   // in the AST that isn't replicated in the Node structure.
    6838             :   static const int kMaxBacksearchLimit = 1024;
    6839       85942 :   if (is_end_anchored && !is_start_anchored && !is_sticky &&
    6840         543 :       max_length < kMaxBacksearchLimit) {
    6841         210 :     macro_assembler->SetCurrentPositionFromEnd(max_length);
    6842             :   }
    6843             : 
    6844       85399 :   if (is_global) {
    6845             :     RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
    6846        3824 :     if (data->tree->min_match() > 0) {
    6847             :       mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
    6848         138 :     } else if (is_unicode) {
    6849             :       mode = RegExpMacroAssembler::GLOBAL_UNICODE;
    6850             :     }
    6851             :     macro_assembler->set_global_mode(mode);
    6852             :   }
    6853             : 
    6854             :   return compiler.Assemble(isolate, macro_assembler.get(), node,
    6855       85399 :                            data->capture_count, pattern);
    6856             : }
    6857             : 
    6858      169973 : bool RegExpEngine::TooMuchRegExpCode(Isolate* isolate, Handle<String> pattern) {
    6859             :   Heap* heap = isolate->heap();
    6860      169973 :   bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize;
    6861      169973 :   if (isolate->total_regexp_code_generated() >
    6862      298526 :           RegExpImpl::kRegExpCompiledLimit &&
    6863      128553 :       heap->CommittedMemoryExecutable() >
    6864             :           RegExpImpl::kRegExpExecutableMemoryLimit) {
    6865             :     too_much = true;
    6866             :   }
    6867      169973 :   return too_much;
    6868             : }
    6869             : 
    6870       36155 : Object RegExpResultsCache::Lookup(Heap* heap, String key_string,
    6871             :                                   Object key_pattern,
    6872             :                                   FixedArray* last_match_cache,
    6873             :                                   ResultsCacheType type) {
    6874             :   FixedArray cache;
    6875       36155 :   if (!key_string->IsInternalizedString()) return Smi::kZero;
    6876        5238 :   if (type == STRING_SPLIT_SUBSTRINGS) {
    6877             :     DCHECK(key_pattern->IsString());
    6878        5238 :     if (!key_pattern->IsInternalizedString()) return Smi::kZero;
    6879             :     cache = heap->string_split_cache();
    6880             :   } else {
    6881             :     DCHECK(type == REGEXP_MULTIPLE_INDICES);
    6882             :     DCHECK(key_pattern->IsFixedArray());
    6883             :     cache = heap->regexp_multiple_cache();
    6884             :   }
    6885             : 
    6886        5238 :   uint32_t hash = key_string->Hash();
    6887             :   uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) &
    6888        5238 :                     ~(kArrayEntriesPerCacheEntry - 1));
    6889       14406 :   if (cache->get(index + kStringOffset) != key_string ||
    6890        3930 :       cache->get(index + kPatternOffset) != key_pattern) {
    6891             :     index =
    6892        1339 :         ((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
    6893        2694 :     if (cache->get(index + kStringOffset) != key_string ||
    6894          16 :         cache->get(index + kPatternOffset) != key_pattern) {
    6895        1334 :       return Smi::kZero;
    6896             :     }
    6897             :   }
    6898             : 
    6899        7808 :   *last_match_cache = FixedArray::cast(cache->get(index + kLastMatchOffset));
    6900        3904 :   return cache->get(index + kArrayOffset);
    6901             : }
    6902             : 
    6903       32251 : void RegExpResultsCache::Enter(Isolate* isolate, Handle<String> key_string,
    6904             :                                Handle<Object> key_pattern,
    6905             :                                Handle<FixedArray> value_array,
    6906             :                                Handle<FixedArray> last_match_cache,
    6907             :                                ResultsCacheType type) {
    6908             :   Factory* factory = isolate->factory();
    6909             :   Handle<FixedArray> cache;
    6910       32251 :   if (!key_string->IsInternalizedString()) return;
    6911        1334 :   if (type == STRING_SPLIT_SUBSTRINGS) {
    6912             :     DCHECK(key_pattern->IsString());
    6913        1334 :     if (!key_pattern->IsInternalizedString()) return;
    6914             :     cache = factory->string_split_cache();
    6915             :   } else {
    6916             :     DCHECK(type == REGEXP_MULTIPLE_INDICES);
    6917             :     DCHECK(key_pattern->IsFixedArray());
    6918             :     cache = factory->regexp_multiple_cache();
    6919             :   }
    6920             : 
    6921        1334 :   uint32_t hash = key_string->Hash();
    6922             :   uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) &
    6923        1334 :                     ~(kArrayEntriesPerCacheEntry - 1));
    6924        2668 :   if (cache->get(index + kStringOffset) == Smi::kZero) {
    6925        2260 :     cache->set(index + kStringOffset, *key_string);
    6926        2260 :     cache->set(index + kPatternOffset, *key_pattern);
    6927        2260 :     cache->set(index + kArrayOffset, *value_array);
    6928        2260 :     cache->set(index + kLastMatchOffset, *last_match_cache);
    6929             :   } else {
    6930             :     uint32_t index2 =
    6931         204 :         ((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
    6932         408 :     if (cache->get(index2 + kStringOffset) == Smi::kZero) {
    6933         318 :       cache->set(index2 + kStringOffset, *key_string);
    6934         318 :       cache->set(index2 + kPatternOffset, *key_pattern);
    6935         318 :       cache->set(index2 + kArrayOffset, *value_array);
    6936         318 :       cache->set(index2 + kLastMatchOffset, *last_match_cache);
    6937             :     } else {
    6938          45 :       cache->set(index2 + kStringOffset, Smi::kZero);
    6939          90 :       cache->set(index2 + kPatternOffset, Smi::kZero);
    6940          90 :       cache->set(index2 + kArrayOffset, Smi::kZero);
    6941          90 :       cache->set(index2 + kLastMatchOffset, Smi::kZero);
    6942          90 :       cache->set(index + kStringOffset, *key_string);
    6943          90 :       cache->set(index + kPatternOffset, *key_pattern);
    6944          90 :       cache->set(index + kArrayOffset, *value_array);
    6945          90 :       cache->set(index + kLastMatchOffset, *last_match_cache);
    6946             :     }
    6947             :   }
    6948             :   // If the array is a reasonably short list of substrings, convert it into a
    6949             :   // list of internalized strings.
    6950        2668 :   if (type == STRING_SPLIT_SUBSTRINGS && value_array->length() < 100) {
    6951       13510 :     for (int i = 0; i < value_array->length(); i++) {
    6952             :       Handle<String> str(String::cast(value_array->get(i)), isolate);
    6953        6107 :       Handle<String> internalized_str = factory->InternalizeString(str);
    6954       12214 :       value_array->set(i, *internalized_str);
    6955             :     }
    6956             :   }
    6957             :   // Convert backing store to a copy-on-write array.
    6958             :   value_array->set_map_no_write_barrier(
    6959             :       ReadOnlyRoots(isolate).fixed_cow_array_map());
    6960             : }
    6961             : 
    6962      137729 : void RegExpResultsCache::Clear(FixedArray cache) {
    6963    70653677 :   for (int i = 0; i < kRegExpResultsCacheSize; i++) {
    6964    35257975 :     cache->set(i, Smi::kZero);
    6965             :   }
    6966      137728 : }
    6967             : 
    6968             : }  // namespace internal
    6969      121996 : }  // namespace v8

Generated by: LCOV version 1.10