LCOV - code coverage report
Current view: top level - src/regexp - jsregexp.h (source / functions) Hit Total Coverage
Test: app.info Lines: 144 171 84.2 %
Date: 2017-04-26 Functions: 26 58 44.8 %

          Line data    Source code
       1             : // Copyright 2012 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_REGEXP_JSREGEXP_H_
       6             : #define V8_REGEXP_JSREGEXP_H_
       7             : 
       8             : #include "src/allocation.h"
       9             : #include "src/assembler.h"
      10             : #include "src/regexp/regexp-ast.h"
      11             : #include "src/regexp/regexp-macro-assembler.h"
      12             : 
      13             : namespace v8 {
      14             : namespace internal {
      15             : 
      16             : class NodeVisitor;
      17             : class RegExpCompiler;
      18             : class RegExpMacroAssembler;
      19             : class RegExpNode;
      20             : class RegExpTree;
      21             : class BoyerMooreLookahead;
      22             : 
      23             : class RegExpImpl {
      24             :  public:
      25             :   // Whether V8 is compiled with native regexp support or not.
      26             :   static bool UsesNativeRegExp() {
      27             : #ifdef V8_INTERPRETED_REGEXP
      28             :     return false;
      29             : #else
      30             :     return true;
      31             : #endif
      32             :   }
      33             : 
      34             :   // Returns a string representation of a regular expression.
      35             :   // Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
      36             :   // This function calls the garbage collector if necessary.
      37             :   static Handle<String> ToString(Handle<Object> value);
      38             : 
      39             :   // Parses the RegExp pattern and prepares the JSRegExp object with
      40             :   // generic data and choice of implementation - as well as what
      41             :   // the implementation wants to store in the data field.
      42             :   // Returns false if compilation fails.
      43             :   MUST_USE_RESULT static MaybeHandle<Object> Compile(Handle<JSRegExp> re,
      44             :                                                      Handle<String> pattern,
      45             :                                                      JSRegExp::Flags flags);
      46             : 
      47             :   // See ECMA-262 section 15.10.6.2.
      48             :   // This function calls the garbage collector if necessary.
      49             :   V8_EXPORT_PRIVATE MUST_USE_RESULT static MaybeHandle<Object> Exec(
      50             :       Handle<JSRegExp> regexp, Handle<String> subject, int index,
      51             :       Handle<RegExpMatchInfo> last_match_info);
      52             : 
      53             :   // Prepares a JSRegExp object with Irregexp-specific data.
      54             :   static void IrregexpInitialize(Handle<JSRegExp> re,
      55             :                                  Handle<String> pattern,
      56             :                                  JSRegExp::Flags flags,
      57             :                                  int capture_register_count);
      58             : 
      59             : 
      60             :   static void AtomCompile(Handle<JSRegExp> re,
      61             :                           Handle<String> pattern,
      62             :                           JSRegExp::Flags flags,
      63             :                           Handle<String> match_pattern);
      64             : 
      65             : 
      66             :   static int AtomExecRaw(Handle<JSRegExp> regexp,
      67             :                          Handle<String> subject,
      68             :                          int index,
      69             :                          int32_t* output,
      70             :                          int output_size);
      71             : 
      72             :   static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
      73             :                                  Handle<String> subject, int index,
      74             :                                  Handle<RegExpMatchInfo> last_match_info);
      75             : 
      76             :   enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 };
      77             : 
      78             :   // Prepare a RegExp for being executed one or more times (using
      79             :   // IrregexpExecOnce) on the subject.
      80             :   // This ensures that the regexp is compiled for the subject, and that
      81             :   // the subject is flat.
      82             :   // Returns the number of integer spaces required by IrregexpExecOnce
      83             :   // as its "registers" argument.  If the regexp cannot be compiled,
      84             :   // an exception is set as pending, and this function returns negative.
      85             :   static int IrregexpPrepare(Handle<JSRegExp> regexp,
      86             :                              Handle<String> subject);
      87             : 
      88             :   // Execute a regular expression on the subject, starting from index.
      89             :   // If matching succeeds, return the number of matches.  This can be larger
      90             :   // than one in the case of global regular expressions.
      91             :   // The captures and subcaptures are stored into the registers vector.
      92             :   // If matching fails, returns RE_FAILURE.
      93             :   // If execution fails, sets a pending exception and returns RE_EXCEPTION.
      94             :   static int IrregexpExecRaw(Handle<JSRegExp> regexp,
      95             :                              Handle<String> subject,
      96             :                              int index,
      97             :                              int32_t* output,
      98             :                              int output_size);
      99             : 
     100             :   // Execute an Irregexp bytecode pattern.
     101             :   // On a successful match, the result is a JSArray containing
     102             :   // captured positions.  On a failure, the result is the null value.
     103             :   // Returns an empty handle in case of an exception.
     104             :   MUST_USE_RESULT static MaybeHandle<Object> IrregexpExec(
     105             :       Handle<JSRegExp> regexp, Handle<String> subject, int index,
     106             :       Handle<RegExpMatchInfo> last_match_info);
     107             : 
     108             :   // Set last match info.  If match is NULL, then setting captures is omitted.
     109             :   static Handle<RegExpMatchInfo> SetLastMatchInfo(
     110             :       Handle<RegExpMatchInfo> last_match_info, Handle<String> subject,
     111             :       int capture_count, int32_t* match);
     112             : 
     113             :   class GlobalCache {
     114             :    public:
     115             :     GlobalCache(Handle<JSRegExp> regexp,
     116             :                 Handle<String> subject,
     117             :                 Isolate* isolate);
     118             : 
     119             :     INLINE(~GlobalCache());
     120             : 
     121             :     // Fetch the next entry in the cache for global regexp match results.
     122             :     // This does not set the last match info.  Upon failure, NULL is returned.
     123             :     // The cause can be checked with Result().  The previous
     124             :     // result is still in available in memory when a failure happens.
     125             :     INLINE(int32_t* FetchNext());
     126             : 
     127             :     INLINE(int32_t* LastSuccessfulMatch());
     128             : 
     129             :     INLINE(bool HasException()) { return num_matches_ < 0; }
     130             : 
     131             :    private:
     132             :     int AdvanceZeroLength(int last_index);
     133             : 
     134             :     int num_matches_;
     135             :     int max_matches_;
     136             :     int current_match_index_;
     137             :     int registers_per_match_;
     138             :     // Pointer to the last set of captures.
     139             :     int32_t* register_array_;
     140             :     int register_array_size_;
     141             :     Handle<JSRegExp> regexp_;
     142             :     Handle<String> subject_;
     143             :   };
     144             : 
     145             :   // For acting on the JSRegExp data FixedArray.
     146             :   static int IrregexpMaxRegisterCount(FixedArray* re);
     147             :   static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
     148             :   static void SetIrregexpCaptureNameMap(FixedArray* re,
     149             :                                         Handle<FixedArray> value);
     150             :   static int IrregexpNumberOfCaptures(FixedArray* re);
     151             :   static int IrregexpNumberOfRegisters(FixedArray* re);
     152             :   static ByteArray* IrregexpByteCode(FixedArray* re, bool is_one_byte);
     153             :   static Code* IrregexpNativeCode(FixedArray* re, bool is_one_byte);
     154             : 
     155             :   // Limit the space regexps take up on the heap.  In order to limit this we
     156             :   // would like to keep track of the amount of regexp code on the heap.  This
     157             :   // is not tracked, however.  As a conservative approximation we track the
     158             :   // total regexp code compiled including code that has subsequently been freed
     159             :   // and the total executable memory at any point.
     160             :   static const size_t kRegExpExecutableMemoryLimit = 16 * MB;
     161             :   static const size_t kRegExpCompiledLimit = 1 * MB;
     162             :   static const int kRegExpTooLargeToOptimize = 20 * KB;
     163             : 
     164             :  private:
     165             :   static bool CompileIrregexp(Handle<JSRegExp> re,
     166             :                               Handle<String> sample_subject, bool is_one_byte);
     167             :   static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re,
     168             :                                             Handle<String> sample_subject,
     169             :                                             bool is_one_byte);
     170             : };
     171             : 
     172             : 
     173             : // Represents the location of one element relative to the intersection of
     174             : // two sets. Corresponds to the four areas of a Venn diagram.
     175             : enum ElementInSetsRelation {
     176             :   kInsideNone = 0,
     177             :   kInsideFirst = 1,
     178             :   kInsideSecond = 2,
     179             :   kInsideBoth = 3
     180             : };
     181             : 
     182             : 
     183             : // A set of unsigned integers that behaves especially well on small
     184             : // integers (< 32).  May do zone-allocation.
     185             : class OutSet: public ZoneObject {
     186             :  public:
     187     1193775 :   OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
     188             :   OutSet* Extend(unsigned value, Zone* zone);
     189             :   bool Get(unsigned value) const;
     190             :   static const unsigned kFirstLimit = 32;
     191             : 
     192             :  private:
     193             :   // Destructively set a value in this set.  In most cases you want
     194             :   // to use Extend instead to ensure that only one instance exists
     195             :   // that contains the same values.
     196             :   void Set(unsigned value, Zone* zone);
     197             : 
     198             :   // The successors are a list of sets that contain the same values
     199             :   // as this set and the one more value that is not present in this
     200             :   // set.
     201             :   ZoneList<OutSet*>* successors(Zone* zone) { return successors_; }
     202             : 
     203             :   OutSet(uint32_t first, ZoneList<unsigned>* remaining)
     204       21069 :       : first_(first), remaining_(remaining), successors_(NULL) { }
     205             :   uint32_t first_;
     206             :   ZoneList<unsigned>* remaining_;
     207             :   ZoneList<OutSet*>* successors_;
     208             :   friend class Trace;
     209             : };
     210             : 
     211             : 
     212             : // A mapping from integers, specified as ranges, to a set of integers.
     213             : // Used for mapping character ranges to choices.
     214             : class DispatchTable : public ZoneObject {
     215             :  public:
     216             :   explicit DispatchTable(Zone* zone) : tree_(zone) { }
     217             : 
     218             :   class Entry {
     219             :    public:
     220             :     Entry() : from_(0), to_(0), out_set_(NULL) { }
     221             :     Entry(uc32 from, uc32 to, OutSet* out_set)
     222             :         : from_(from), to_(to), out_set_(out_set) {
     223             :       DCHECK(from <= to);
     224             :     }
     225             :     uc32 from() { return from_; }
     226             :     uc32 to() { return to_; }
     227        6467 :     void set_to(uc32 value) { to_ = value; }
     228             :     void AddValue(int value, Zone* zone) {
     229       93808 :       out_set_ = out_set_->Extend(value, zone);
     230             :     }
     231             :     OutSet* out_set() { return out_set_; }
     232             :    private:
     233             :     uc32 from_;
     234             :     uc32 to_;
     235             :     OutSet* out_set_;
     236             :   };
     237             : 
     238             :   class Config {
     239             :    public:
     240             :     typedef uc32 Key;
     241             :     typedef Entry Value;
     242             :     static const uc32 kNoKey;
     243             :     static const Entry NoValue() { return Value(); }
     244             :     static inline int Compare(uc32 a, uc32 b) {
     245     1861086 :       if (a == b)
     246             :         return 0;
     247     1816904 :       else if (a < b)
     248             :         return -1;
     249             :       else
     250             :         return 1;
     251             :     }
     252             :   };
     253             : 
     254             :   void AddRange(CharacterRange range, int value, Zone* zone);
     255             :   OutSet* Get(uc32 value);
     256             :   void Dump();
     257             : 
     258             :   template <typename Callback>
     259             :   void ForEach(Callback* callback) {
     260        3163 :     return tree()->ForEach(callback);
     261             :   }
     262             : 
     263             :  private:
     264             :   // There can't be a static empty set since it allocates its
     265             :   // successors in a zone and caches them.
     266             :   OutSet* empty() { return &empty_; }
     267             :   OutSet empty_;
     268             :   ZoneSplayTree<Config>* tree() { return &tree_; }
     269             :   ZoneSplayTree<Config> tree_;
     270             : };
     271             : 
     272             : 
     273             : // Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates.
     274             : class UnicodeRangeSplitter {
     275             :  public:
     276             :   UnicodeRangeSplitter(Zone* zone, ZoneList<CharacterRange>* base);
     277             :   void Call(uc32 from, DispatchTable::Entry entry);
     278             : 
     279             :   ZoneList<CharacterRange>* bmp() { return bmp_; }
     280             :   ZoneList<CharacterRange>* lead_surrogates() { return lead_surrogates_; }
     281             :   ZoneList<CharacterRange>* trail_surrogates() { return trail_surrogates_; }
     282             :   ZoneList<CharacterRange>* non_bmp() const { return non_bmp_; }
     283             : 
     284             :  private:
     285             :   static const int kBase = 0;
     286             :   // Separate ranges into
     287             :   static const int kBmpCodePoints = 1;
     288             :   static const int kLeadSurrogates = 2;
     289             :   static const int kTrailSurrogates = 3;
     290             :   static const int kNonBmpCodePoints = 4;
     291             : 
     292             :   Zone* zone_;
     293             :   DispatchTable table_;
     294             :   ZoneList<CharacterRange>* bmp_;
     295             :   ZoneList<CharacterRange>* lead_surrogates_;
     296             :   ZoneList<CharacterRange>* trail_surrogates_;
     297             :   ZoneList<CharacterRange>* non_bmp_;
     298             : };
     299             : 
     300             : 
     301             : #define FOR_EACH_NODE_TYPE(VISIT)                                    \
     302             :   VISIT(End)                                                         \
     303             :   VISIT(Action)                                                      \
     304             :   VISIT(Choice)                                                      \
     305             :   VISIT(BackReference)                                               \
     306             :   VISIT(Assertion)                                                   \
     307             :   VISIT(Text)
     308             : 
     309             : 
     310             : class Trace;
     311             : struct PreloadState;
     312             : class GreedyLoopState;
     313             : class AlternativeGenerationList;
     314             : 
     315             : struct NodeInfo {
     316             :   NodeInfo()
     317             :       : being_analyzed(false),
     318             :         been_analyzed(false),
     319             :         follows_word_interest(false),
     320             :         follows_newline_interest(false),
     321             :         follows_start_interest(false),
     322             :         at_end(false),
     323             :         visited(false),
     324     6915745 :         replacement_calculated(false) { }
     325             : 
     326             :   // Returns true if the interests and assumptions of this node
     327             :   // matches the given one.
     328             :   bool Matches(NodeInfo* that) {
     329             :     return (at_end == that->at_end) &&
     330             :            (follows_word_interest == that->follows_word_interest) &&
     331             :            (follows_newline_interest == that->follows_newline_interest) &&
     332             :            (follows_start_interest == that->follows_start_interest);
     333             :   }
     334             : 
     335             :   // Updates the interests of this node given the interests of the
     336             :   // node preceding it.
     337             :   void AddFromPreceding(NodeInfo* that) {
     338             :     at_end |= that->at_end;
     339             :     follows_word_interest |= that->follows_word_interest;
     340             :     follows_newline_interest |= that->follows_newline_interest;
     341             :     follows_start_interest |= that->follows_start_interest;
     342             :   }
     343             : 
     344             :   bool HasLookbehind() {
     345             :     return follows_word_interest ||
     346             :            follows_newline_interest ||
     347             :            follows_start_interest;
     348             :   }
     349             : 
     350             :   // Sets the interests of this node to include the interests of the
     351             :   // following node.
     352             :   void AddFromFollowing(NodeInfo* that) {
     353      662904 :     follows_word_interest |= that->follows_word_interest;
     354      662904 :     follows_newline_interest |= that->follows_newline_interest;
     355      662904 :     follows_start_interest |= that->follows_start_interest;
     356             :   }
     357             : 
     358             :   void ResetCompilationState() {
     359             :     being_analyzed = false;
     360             :     been_analyzed = false;
     361             :   }
     362             : 
     363             :   bool being_analyzed: 1;
     364             :   bool been_analyzed: 1;
     365             : 
     366             :   // These bits are set of this node has to know what the preceding
     367             :   // character was.
     368             :   bool follows_word_interest: 1;
     369             :   bool follows_newline_interest: 1;
     370             :   bool follows_start_interest: 1;
     371             : 
     372             :   bool at_end: 1;
     373             :   bool visited: 1;
     374             :   bool replacement_calculated: 1;
     375             : };
     376             : 
     377             : 
     378             : // Details of a quick mask-compare check that can look ahead in the
     379             : // input stream.
     380             : class QuickCheckDetails {
     381             :  public:
     382             :   QuickCheckDetails()
     383             :       : characters_(0),
     384             :         mask_(0),
     385             :         value_(0),
     386    17344640 :         cannot_match_(false) { }
     387             :   explicit QuickCheckDetails(int characters)
     388             :       : characters_(characters),
     389             :         mask_(0),
     390             :         value_(0),
     391      995545 :         cannot_match_(false) { }
     392             :   bool Rationalize(bool one_byte);
     393             :   // Merge in the information from another branch of an alternation.
     394             :   void Merge(QuickCheckDetails* other, int from_index);
     395             :   // Advance the current position by some amount.
     396             :   void Advance(int by, bool one_byte);
     397             :   void Clear();
     398             :   bool cannot_match() { return cannot_match_; }
     399         578 :   void set_cannot_match() { cannot_match_ = true; }
     400             :   struct Position {
     401    14672148 :     Position() : mask(0), value(0), determines_perfectly(false) { }
     402             :     uc16 mask;
     403             :     uc16 value;
     404             :     bool determines_perfectly;
     405             :   };
     406             :   int characters() { return characters_; }
     407      730201 :   void set_characters(int characters) { characters_ = characters; }
     408             :   Position* positions(int index) {
     409             :     DCHECK(index >= 0);
     410             :     DCHECK(index < characters_);
     411     2066721 :     return positions_ + index;
     412             :   }
     413             :   uint32_t mask() { return mask_; }
     414             :   uint32_t value() { return value_; }
     415             : 
     416             :  private:
     417             :   // How many characters do we have quick check information from.  This is
     418             :   // the same for all branches of a choice node.
     419             :   int characters_;
     420             :   Position positions_[4];
     421             :   // These values are the condensate of the above array after Rationalize().
     422             :   uint32_t mask_;
     423             :   uint32_t value_;
     424             :   // If set to true, there is no way this quick check can match at all.
     425             :   // E.g., if it requires to be at the start of the input, and isn't.
     426             :   bool cannot_match_;
     427             : };
     428             : 
     429             : 
     430             : extern int kUninitializedRegExpNodePlaceHolder;
     431             : 
     432             : 
     433             : class RegExpNode: public ZoneObject {
     434             :  public:
     435             :   explicit RegExpNode(Zone* zone)
     436    20747235 :       : replacement_(NULL), on_work_list_(false), trace_count_(0), zone_(zone) {
     437     6915745 :     bm_info_[0] = bm_info_[1] = NULL;
     438             :   }
     439             :   virtual ~RegExpNode();
     440             :   virtual void Accept(NodeVisitor* visitor) = 0;
     441             :   // Generates a goto to this node or actually generates the code at this point.
     442             :   virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
     443             :   // How many characters must this node consume at a minimum in order to
     444             :   // succeed.  If we have found at least 'still_to_find' characters that
     445             :   // must be consumed there is no need to ask any following nodes whether
     446             :   // they are sure to eat any more characters.  The not_at_start argument is
     447             :   // used to indicate that we know we are not at the start of the input.  In
     448             :   // this case anchored branches will always fail and can be ignored when
     449             :   // determining how many characters are consumed on success.
     450             :   virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start) = 0;
     451             :   // Emits some quick code that checks whether the preloaded characters match.
     452             :   // Falls through on certain failure, jumps to the label on possible success.
     453             :   // If the node cannot make a quick check it does nothing and returns false.
     454             :   bool EmitQuickCheck(RegExpCompiler* compiler,
     455             :                       Trace* bounds_check_trace,
     456             :                       Trace* trace,
     457             :                       bool preload_has_checked_bounds,
     458             :                       Label* on_possible_success,
     459             :                       QuickCheckDetails* details_return,
     460             :                       bool fall_through_on_failure);
     461             :   // For a given number of characters this returns a mask and a value.  The
     462             :   // next n characters are anded with the mask and compared with the value.
     463             :   // A comparison failure indicates the node cannot match the next n characters.
     464             :   // A comparison success indicates the node may match.
     465             :   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     466             :                                     RegExpCompiler* compiler,
     467             :                                     int characters_filled_in,
     468             :                                     bool not_at_start) = 0;
     469             :   static const int kNodeIsTooComplexForGreedyLoops = kMinInt;
     470      115701 :   virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
     471             :   // Only returns the successor for a text node of length 1 that matches any
     472             :   // character and that has no guards on it.
     473      111129 :   virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
     474             :       RegExpCompiler* compiler) {
     475      111129 :     return NULL;
     476             :   }
     477             : 
     478             :   // Collects information on the possible code units (mod 128) that can match if
     479             :   // we look forward.  This is used for a Boyer-Moore-like string searching
     480             :   // implementation.  TODO(erikcorry):  This should share more code with
     481             :   // EatsAtLeast, GetQuickCheckDetails.  The budget argument is used to limit
     482             :   // the number of nodes we are willing to look at in order to create this data.
     483             :   static const int kRecursionBudget = 200;
     484             :   bool KeepRecursing(RegExpCompiler* compiler);
     485           0 :   virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
     486             :                             BoyerMooreLookahead* bm, bool not_at_start) {
     487           0 :     UNREACHABLE();
     488             :   }
     489             : 
     490             :   // If we know that the input is one-byte then there are some nodes that can
     491             :   // never match.  This method returns a node that can be substituted for
     492             :   // itself, or NULL if the node can never match.
     493       20163 :   virtual RegExpNode* FilterOneByte(int depth, bool ignore_case) {
     494       20163 :     return this;
     495             :   }
     496             :   // Helper for FilterOneByte.
     497             :   RegExpNode* replacement() {
     498             :     DCHECK(info()->replacement_calculated);
     499             :     return replacement_;
     500             :   }
     501             :   RegExpNode* set_replacement(RegExpNode* replacement) {
     502      245975 :     info()->replacement_calculated = true;
     503      245975 :     replacement_ =  replacement;
     504             :     return replacement;  // For convenience.
     505             :   }
     506             : 
     507             :   // We want to avoid recalculating the lookahead info, so we store it on the
     508             :   // node.  Only info that is for this node is stored.  We can tell that the
     509             :   // info is for this node when offset == 0, so the information is calculated
     510             :   // relative to this node.
     511             :   void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) {
     512      139634 :     if (offset == 0) set_bm_info(not_at_start, bm);
     513             :   }
     514             : 
     515             :   Label* label() { return &label_; }
     516             :   // If non-generic code is generated for a node (i.e. the node is not at the
     517             :   // start of the trace) then it cannot be reused.  This variable sets a limit
     518             :   // on how often we allow that to happen before we insist on starting a new
     519             :   // trace and generating generic code for a node that can be reused by flushing
     520             :   // the deferred actions in the current trace and generating a goto.
     521             :   static const int kMaxCopiesCodeGenerated = 10;
     522             : 
     523             :   bool on_work_list() { return on_work_list_; }
     524      465788 :   void set_on_work_list(bool value) { on_work_list_ = value; }
     525             : 
     526             :   NodeInfo* info() { return &info_; }
     527             : 
     528             :   BoyerMooreLookahead* bm_info(bool not_at_start) {
     529       87770 :     return bm_info_[not_at_start ? 1 : 0];
     530             :   }
     531             : 
     532             :   Zone* zone() const { return zone_; }
     533             : 
     534             :  protected:
     535             :   enum LimitResult { DONE, CONTINUE };
     536             :   RegExpNode* replacement_;
     537             : 
     538             :   LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
     539             : 
     540             :   void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) {
     541      200222 :     bm_info_[not_at_start ? 1 : 0] = bm;
     542             :   }
     543             : 
     544             :  private:
     545             :   static const int kFirstCharBudget = 10;
     546             :   Label label_;
     547             :   bool on_work_list_;
     548             :   NodeInfo info_;
     549             :   // This variable keeps track of how many times code has been generated for
     550             :   // this node (in different traces).  We don't keep track of where the
     551             :   // generated code is located unless the code is generated at the start of
     552             :   // a trace, in which case it is generic and can be reused by flushing the
     553             :   // deferred operations in the current trace and generating a goto.
     554             :   int trace_count_;
     555             :   BoyerMooreLookahead* bm_info_[2];
     556             : 
     557             :   Zone* zone_;
     558             : };
     559             : 
     560             : 
     561           0 : class SeqRegExpNode: public RegExpNode {
     562             :  public:
     563     5169900 :   explicit SeqRegExpNode(RegExpNode* on_success)
     564     5169900 :       : RegExpNode(on_success->zone()), on_success_(on_success) { }
     565             :   RegExpNode* on_success() { return on_success_; }
     566             :   void set_on_success(RegExpNode* node) { on_success_ = node; }
     567             :   virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
     568           0 :   virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
     569             :                             BoyerMooreLookahead* bm, bool not_at_start) {
     570           0 :     on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
     571           0 :     if (offset == 0) set_bm_info(not_at_start, bm);
     572           0 :   }
     573             : 
     574             :  protected:
     575             :   RegExpNode* FilterSuccessor(int depth, bool ignore_case);
     576             : 
     577             :  private:
     578             :   RegExpNode* on_success_;
     579             : };
     580             : 
     581             : 
     582           0 : class ActionNode: public SeqRegExpNode {
     583             :  public:
     584             :   enum ActionType {
     585             :     SET_REGISTER,
     586             :     INCREMENT_REGISTER,
     587             :     STORE_POSITION,
     588             :     BEGIN_SUBMATCH,
     589             :     POSITIVE_SUBMATCH_SUCCESS,
     590             :     EMPTY_MATCH_CHECK,
     591             :     CLEAR_CAPTURES
     592             :   };
     593             :   static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
     594             :   static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
     595             :   static ActionNode* StorePosition(int reg,
     596             :                                    bool is_capture,
     597             :                                    RegExpNode* on_success);
     598             :   static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
     599             :   static ActionNode* BeginSubmatch(int stack_pointer_reg,
     600             :                                    int position_reg,
     601             :                                    RegExpNode* on_success);
     602             :   static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
     603             :                                              int restore_reg,
     604             :                                              int clear_capture_count,
     605             :                                              int clear_capture_from,
     606             :                                              RegExpNode* on_success);
     607             :   static ActionNode* EmptyMatchCheck(int start_register,
     608             :                                      int repetition_register,
     609             :                                      int repetition_limit,
     610             :                                      RegExpNode* on_success);
     611             :   virtual void Accept(NodeVisitor* visitor);
     612             :   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     613             :   virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
     614       80720 :   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     615             :                                     RegExpCompiler* compiler,
     616             :                                     int filled_in,
     617             :                                     bool not_at_start) {
     618       80720 :     return on_success()->GetQuickCheckDetails(
     619       80720 :         details, compiler, filled_in, not_at_start);
     620             :   }
     621             :   virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
     622             :                             BoyerMooreLookahead* bm, bool not_at_start);
     623             :   ActionType action_type() { return action_type_; }
     624             :   // TODO(erikcorry): We should allow some action nodes in greedy loops.
     625      137100 :   virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
     626             : 
     627             :  private:
     628             :   union {
     629             :     struct {
     630             :       int reg;
     631             :       int value;
     632             :     } u_store_register;
     633             :     struct {
     634             :       int reg;
     635             :     } u_increment_register;
     636             :     struct {
     637             :       int reg;
     638             :       bool is_capture;
     639             :     } u_position_register;
     640             :     struct {
     641             :       int stack_pointer_register;
     642             :       int current_position_register;
     643             :       int clear_register_count;
     644             :       int clear_register_from;
     645             :     } u_submatch;
     646             :     struct {
     647             :       int start_register;
     648             :       int repetition_register;
     649             :       int repetition_limit;
     650             :     } u_empty_match_check;
     651             :     struct {
     652             :       int range_from;
     653             :       int range_to;
     654             :     } u_clear_captures;
     655             :   } data_;
     656             :   ActionNode(ActionType action_type, RegExpNode* on_success)
     657             :       : SeqRegExpNode(on_success),
     658     3295122 :         action_type_(action_type) { }
     659             :   ActionType action_type_;
     660             :   friend class DotPrinter;
     661             : };
     662             : 
     663             : 
     664           0 : class TextNode: public SeqRegExpNode {
     665             :  public:
     666             :   TextNode(ZoneList<TextElement>* elms, bool read_backward,
     667             :            RegExpNode* on_success)
     668     1665932 :       : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {}
     669      196167 :   TextNode(RegExpCharacterClass* that, bool read_backward,
     670             :            RegExpNode* on_success)
     671             :       : SeqRegExpNode(on_success),
     672      392334 :         elms_(new (zone()) ZoneList<TextElement>(1, zone())),
     673      392334 :         read_backward_(read_backward) {
     674      392334 :     elms_->Add(TextElement::CharClass(that), zone());
     675      196167 :   }
     676             :   // Create TextNode for a single character class for the given ranges.
     677             :   static TextNode* CreateForCharacterRanges(Zone* zone,
     678             :                                             ZoneList<CharacterRange>* ranges,
     679             :                                             bool read_backward,
     680             :                                             RegExpNode* on_success);
     681             :   // Create TextNode for a surrogate pair with a range given for the
     682             :   // lead and the trail surrogate each.
     683             :   static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
     684             :                                           CharacterRange trail,
     685             :                                           bool read_backward,
     686             :                                           RegExpNode* on_success);
     687             :   virtual void Accept(NodeVisitor* visitor);
     688             :   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     689             :   virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
     690             :   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     691             :                                     RegExpCompiler* compiler,
     692             :                                     int characters_filled_in,
     693             :                                     bool not_at_start);
     694             :   ZoneList<TextElement>* elements() { return elms_; }
     695             :   bool read_backward() { return read_backward_; }
     696             :   void MakeCaseIndependent(Isolate* isolate, bool is_one_byte);
     697             :   virtual int GreedyLoopTextLength();
     698             :   virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
     699             :       RegExpCompiler* compiler);
     700             :   virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
     701             :                             BoyerMooreLookahead* bm, bool not_at_start);
     702             :   void CalculateOffsets();
     703             :   virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
     704             : 
     705             :  private:
     706             :   enum TextEmitPassType {
     707             :     NON_LATIN1_MATCH,            // Check for characters that can't match.
     708             :     SIMPLE_CHARACTER_MATCH,      // Case-dependent single character check.
     709             :     NON_LETTER_CHARACTER_MATCH,  // Check characters that have no case equivs.
     710             :     CASE_CHARACTER_MATCH,        // Case-independent single character check.
     711             :     CHARACTER_CLASS_MATCH        // Character class.
     712             :   };
     713             :   static bool SkipPass(int pass, bool ignore_case);
     714             :   static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
     715             :   static const int kLastPass = CHARACTER_CLASS_MATCH;
     716             :   void TextEmitPass(RegExpCompiler* compiler,
     717             :                     TextEmitPassType pass,
     718             :                     bool preloaded,
     719             :                     Trace* trace,
     720             :                     bool first_element_checked,
     721             :                     int* checked_up_to);
     722             :   int Length();
     723             :   ZoneList<TextElement>* elms_;
     724             :   bool read_backward_;
     725             : };
     726             : 
     727             : 
     728           0 : class AssertionNode: public SeqRegExpNode {
     729             :  public:
     730             :   enum AssertionType {
     731             :     AT_END,
     732             :     AT_START,
     733             :     AT_BOUNDARY,
     734             :     AT_NON_BOUNDARY,
     735             :     AFTER_NEWLINE
     736             :   };
     737        3809 :   static AssertionNode* AtEnd(RegExpNode* on_success) {
     738        3809 :     return new(on_success->zone()) AssertionNode(AT_END, on_success);
     739             :   }
     740        4673 :   static AssertionNode* AtStart(RegExpNode* on_success) {
     741        4673 :     return new(on_success->zone()) AssertionNode(AT_START, on_success);
     742             :   }
     743         167 :   static AssertionNode* AtBoundary(RegExpNode* on_success) {
     744         167 :     return new(on_success->zone()) AssertionNode(AT_BOUNDARY, on_success);
     745             :   }
     746         143 :   static AssertionNode* AtNonBoundary(RegExpNode* on_success) {
     747         143 :     return new(on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success);
     748             :   }
     749         797 :   static AssertionNode* AfterNewline(RegExpNode* on_success) {
     750         797 :     return new(on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success);
     751             :   }
     752             :   virtual void Accept(NodeVisitor* visitor);
     753             :   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     754             :   virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
     755             :   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     756             :                                     RegExpCompiler* compiler,
     757             :                                     int filled_in,
     758             :                                     bool not_at_start);
     759             :   virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
     760             :                             BoyerMooreLookahead* bm, bool not_at_start);
     761             :   AssertionType assertion_type() { return assertion_type_; }
     762             : 
     763             :  private:
     764             :   void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
     765             :   enum IfPrevious { kIsNonWord, kIsWord };
     766             :   void BacktrackIfPrevious(RegExpCompiler* compiler,
     767             :                            Trace* trace,
     768             :                            IfPrevious backtrack_if_previous);
     769             :   AssertionNode(AssertionType t, RegExpNode* on_success)
     770        9589 :       : SeqRegExpNode(on_success), assertion_type_(t) { }
     771             :   AssertionType assertion_type_;
     772             : };
     773             : 
     774             : 
     775           0 : class BackReferenceNode: public SeqRegExpNode {
     776             :  public:
     777             :   BackReferenceNode(int start_reg, int end_reg, bool read_backward,
     778             :                     RegExpNode* on_success)
     779             :       : SeqRegExpNode(on_success),
     780             :         start_reg_(start_reg),
     781             :         end_reg_(end_reg),
     782        3090 :         read_backward_(read_backward) {}
     783             :   virtual void Accept(NodeVisitor* visitor);
     784             :   int start_register() { return start_reg_; }
     785             :   int end_register() { return end_reg_; }
     786             :   bool read_backward() { return read_backward_; }
     787             :   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     788             :   virtual int EatsAtLeast(int still_to_find,
     789             :                           int recursion_depth,
     790             :                           bool not_at_start);
     791         726 :   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     792             :                                     RegExpCompiler* compiler,
     793             :                                     int characters_filled_in,
     794             :                                     bool not_at_start) {
     795         726 :     return;
     796             :   }
     797             :   virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
     798             :                             BoyerMooreLookahead* bm, bool not_at_start);
     799             : 
     800             :  private:
     801             :   int start_reg_;
     802             :   int end_reg_;
     803             :   bool read_backward_;
     804             : };
     805             : 
     806             : 
     807           0 : class EndNode: public RegExpNode {
     808             :  public:
     809             :   enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
     810       96493 :   EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {}
     811             :   virtual void Accept(NodeVisitor* visitor);
     812             :   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     813      135207 :   virtual int EatsAtLeast(int still_to_find,
     814             :                           int recursion_depth,
     815      135207 :                           bool not_at_start) { return 0; }
     816           0 :   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     817             :                                     RegExpCompiler* compiler,
     818             :                                     int characters_filled_in,
     819             :                                     bool not_at_start) {
     820             :     // Returning 0 from EatsAtLeast should ensure we never get here.
     821           0 :     UNREACHABLE();
     822             :   }
     823           0 :   virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
     824             :                             BoyerMooreLookahead* bm, bool not_at_start) {
     825             :     // Returning 0 from EatsAtLeast should ensure we never get here.
     826           0 :     UNREACHABLE();
     827             :   }
     828             : 
     829             :  private:
     830             :   Action action_;
     831             : };
     832             : 
     833             : 
     834           0 : class NegativeSubmatchSuccess: public EndNode {
     835             :  public:
     836             :   NegativeSubmatchSuccess(int stack_pointer_reg,
     837             :                           int position_reg,
     838             :                           int clear_capture_count,
     839             :                           int clear_capture_start,
     840             :                           Zone* zone)
     841             :       : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone),
     842             :         stack_pointer_register_(stack_pointer_reg),
     843             :         current_position_register_(position_reg),
     844             :         clear_capture_count_(clear_capture_count),
     845        3515 :         clear_capture_start_(clear_capture_start) { }
     846             :   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     847             : 
     848             :  private:
     849             :   int stack_pointer_register_;
     850             :   int current_position_register_;
     851             :   int clear_capture_count_;
     852             :   int clear_capture_start_;
     853             : };
     854             : 
     855             : 
     856             : class Guard: public ZoneObject {
     857             :  public:
     858             :   enum Relation { LT, GEQ };
     859             :   Guard(int reg, Relation op, int value)
     860             :       : reg_(reg),
     861             :         op_(op),
     862     1507750 :         value_(value) { }
     863             :   int reg() { return reg_; }
     864             :   Relation op() { return op_; }
     865             :   int value() { return value_; }
     866             : 
     867             :  private:
     868             :   int reg_;
     869             :   Relation op_;
     870             :   int value_;
     871             : };
     872             : 
     873             : 
     874             : class GuardedAlternative {
     875             :  public:
     876     3228627 :   explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
     877             :   void AddGuard(Guard* guard, Zone* zone);
     878             :   RegExpNode* node() { return node_; }
     879       86141 :   void set_node(RegExpNode* node) { node_ = node; }
     880             :   ZoneList<Guard*>* guards() { return guards_; }
     881             : 
     882             :  private:
     883             :   RegExpNode* node_;
     884             :   ZoneList<Guard*>* guards_;
     885             : };
     886             : 
     887             : 
     888             : class AlternativeGeneration;
     889             : 
     890             : 
     891           0 : class ChoiceNode: public RegExpNode {
     892             :  public:
     893     1649352 :   explicit ChoiceNode(int expected_size, Zone* zone)
     894             :       : RegExpNode(zone),
     895             :         alternatives_(new(zone)
     896     1649352 :                       ZoneList<GuardedAlternative>(expected_size, zone)),
     897             :         table_(NULL),
     898             :         not_at_start_(false),
     899     3298704 :         being_calculated_(false) { }
     900             :   virtual void Accept(NodeVisitor* visitor);
     901     3377266 :   void AddAlternative(GuardedAlternative node) {
     902     3377266 :     alternatives()->Add(node, zone());
     903             :   }
     904             :   ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
     905             :   DispatchTable* GetTable(bool ignore_case);
     906             :   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     907             :   virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
     908             :   int EatsAtLeastHelper(int still_to_find,
     909             :                         int budget,
     910             :                         RegExpNode* ignore_this_node,
     911             :                         bool not_at_start);
     912             :   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     913             :                                     RegExpCompiler* compiler,
     914             :                                     int characters_filled_in,
     915             :                                     bool not_at_start);
     916             :   virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
     917             :                             BoyerMooreLookahead* bm, bool not_at_start);
     918             : 
     919             :   bool being_calculated() { return being_calculated_; }
     920             :   bool not_at_start() { return not_at_start_; }
     921        7690 :   void set_not_at_start() { not_at_start_ = true; }
     922           0 :   void set_being_calculated(bool b) { being_calculated_ = b; }
     923      587302 :   virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
     924      587302 :     return true;
     925             :   }
     926             :   virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
     927           0 :   virtual bool read_backward() { return false; }
     928             : 
     929             :  protected:
     930             :   int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
     931             :   ZoneList<GuardedAlternative>* alternatives_;
     932             : 
     933             :  private:
     934             :   friend class DispatchTableConstructor;
     935             :   friend class Analysis;
     936             :   void GenerateGuard(RegExpMacroAssembler* macro_assembler,
     937             :                      Guard* guard,
     938             :                      Trace* trace);
     939             :   int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least);
     940             :   void EmitOutOfLineContinuation(RegExpCompiler* compiler,
     941             :                                  Trace* trace,
     942             :                                  GuardedAlternative alternative,
     943             :                                  AlternativeGeneration* alt_gen,
     944             :                                  int preload_characters,
     945             :                                  bool next_expects_preload);
     946             :   void SetUpPreLoad(RegExpCompiler* compiler,
     947             :                     Trace* current_trace,
     948             :                     PreloadState* preloads);
     949             :   void AssertGuardsMentionRegisters(Trace* trace);
     950             :   int EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, Trace* trace);
     951             :   Trace* EmitGreedyLoop(RegExpCompiler* compiler,
     952             :                         Trace* trace,
     953             :                         AlternativeGenerationList* alt_gens,
     954             :                         PreloadState* preloads,
     955             :                         GreedyLoopState* greedy_loop_state,
     956             :                         int text_length);
     957             :   void EmitChoices(RegExpCompiler* compiler,
     958             :                    AlternativeGenerationList* alt_gens,
     959             :                    int first_choice,
     960             :                    Trace* trace,
     961             :                    PreloadState* preloads);
     962             :   DispatchTable* table_;
     963             :   // If true, this node is never checked at the start of the input.
     964             :   // Allows a new trace to start with at_start() set to false.
     965             :   bool not_at_start_;
     966             :   bool being_calculated_;
     967             : };
     968             : 
     969             : 
     970           0 : class NegativeLookaroundChoiceNode : public ChoiceNode {
     971             :  public:
     972        3515 :   explicit NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail,
     973             :                                         GuardedAlternative then_do_this,
     974             :                                         Zone* zone)
     975        3515 :       : ChoiceNode(2, zone) {
     976             :     AddAlternative(this_must_fail);
     977             :     AddAlternative(then_do_this);
     978        3515 :   }
     979             :   virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
     980             :   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     981             :                                     RegExpCompiler* compiler,
     982             :                                     int characters_filled_in,
     983             :                                     bool not_at_start);
     984        1423 :   virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
     985             :                             BoyerMooreLookahead* bm, bool not_at_start) {
     986        2846 :     alternatives_->at(1).node()->FillInBMInfo(isolate, offset, budget - 1, bm,
     987        1423 :                                               not_at_start);
     988        1423 :     if (offset == 0) set_bm_info(not_at_start, bm);
     989        1423 :   }
     990             :   // For a negative lookahead we don't emit the quick check for the
     991             :   // alternative that is expected to fail.  This is because quick check code
     992             :   // starts by loading enough characters for the alternative that takes fewest
     993             :   // characters, but on a negative lookahead the negative branch did not take
     994             :   // part in that calculation (EatsAtLeast) so the assumptions don't hold.
     995        7002 :   virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
     996        7002 :     return !is_first;
     997             :   }
     998             :   virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
     999             : };
    1000             : 
    1001             : 
    1002           0 : class LoopChoiceNode: public ChoiceNode {
    1003             :  public:
    1004             :   LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, Zone* zone)
    1005             :       : ChoiceNode(2, zone),
    1006             :         loop_node_(NULL),
    1007             :         continue_node_(NULL),
    1008             :         body_can_be_zero_length_(body_can_be_zero_length),
    1009     1612556 :         read_backward_(read_backward) {}
    1010             :   void AddLoopAlternative(GuardedAlternative alt);
    1011             :   void AddContinueAlternative(GuardedAlternative alt);
    1012             :   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
    1013             :   virtual int EatsAtLeast(int still_to_find,  int budget, bool not_at_start);
    1014             :   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
    1015             :                                     RegExpCompiler* compiler,
    1016             :                                     int characters_filled_in,
    1017             :                                     bool not_at_start);
    1018             :   virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
    1019             :                             BoyerMooreLookahead* bm, bool not_at_start);
    1020             :   RegExpNode* loop_node() { return loop_node_; }
    1021             :   RegExpNode* continue_node() { return continue_node_; }
    1022             :   bool body_can_be_zero_length() { return body_can_be_zero_length_; }
    1023       32080 :   virtual bool read_backward() { return read_backward_; }
    1024             :   virtual void Accept(NodeVisitor* visitor);
    1025             :   virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
    1026             : 
    1027             :  private:
    1028             :   // AddAlternative is made private for loop nodes because alternatives
    1029             :   // should not be added freely, we need to keep track of which node
    1030             :   // goes back to the node itself.
    1031     3225112 :   void AddAlternative(GuardedAlternative node) {
    1032             :     ChoiceNode::AddAlternative(node);
    1033     3225112 :   }
    1034             : 
    1035             :   RegExpNode* loop_node_;
    1036             :   RegExpNode* continue_node_;
    1037             :   bool body_can_be_zero_length_;
    1038             :   bool read_backward_;
    1039             : };
    1040             : 
    1041             : 
    1042             : // Improve the speed that we scan for an initial point where a non-anchored
    1043             : // regexp can match by using a Boyer-Moore-like table. This is done by
    1044             : // identifying non-greedy non-capturing loops in the nodes that eat any
    1045             : // character one at a time.  For example in the middle of the regexp
    1046             : // /foo[\s\S]*?bar/ we find such a loop.  There is also such a loop implicitly
    1047             : // inserted at the start of any non-anchored regexp.
    1048             : //
    1049             : // When we have found such a loop we look ahead in the nodes to find the set of
    1050             : // characters that can come at given distances. For example for the regexp
    1051             : // /.?foo/ we know that there are at least 3 characters ahead of us, and the
    1052             : // sets of characters that can occur are [any, [f, o], [o]]. We find a range in
    1053             : // the lookahead info where the set of characters is reasonably constrained. In
    1054             : // our example this is from index 1 to 2 (0 is not constrained). We can now
    1055             : // look 3 characters ahead and if we don't find one of [f, o] (the union of
    1056             : // [f, o] and [o]) then we can skip forwards by the range size (in this case 2).
    1057             : //
    1058             : // For Unicode input strings we do the same, but modulo 128.
    1059             : //
    1060             : // We also look at the first string fed to the regexp and use that to get a hint
    1061             : // of the character frequencies in the inputs. This affects the assessment of
    1062             : // whether the set of characters is 'reasonably constrained'.
    1063             : //
    1064             : // We also have another lookahead mechanism (called quick check in the code),
    1065             : // which uses a wide load of multiple characters followed by a mask and compare
    1066             : // to determine whether a match is possible at this point.
    1067             : enum ContainedInLattice {
    1068             :   kNotYet = 0,
    1069             :   kLatticeIn = 1,
    1070             :   kLatticeOut = 2,
    1071             :   kLatticeUnknown = 3  // Can also mean both in and out.
    1072             : };
    1073             : 
    1074             : 
    1075             : inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) {
    1076      861889 :   return static_cast<ContainedInLattice>(a | b);
    1077             : }
    1078             : 
    1079             : 
    1080             : ContainedInLattice AddRange(ContainedInLattice a,
    1081             :                             const int* ranges,
    1082             :                             int ranges_size,
    1083             :                             Interval new_range);
    1084             : 
    1085             : 
    1086             : class BoyerMoorePositionInfo : public ZoneObject {
    1087             :  public:
    1088      113434 :   explicit BoyerMoorePositionInfo(Zone* zone)
    1089             :       : map_(new(zone) ZoneList<bool>(kMapSize, zone)),
    1090             :         map_count_(0),
    1091             :         w_(kNotYet),
    1092             :         s_(kNotYet),
    1093             :         d_(kNotYet),
    1094      113434 :         surrogate_(kNotYet) {
    1095    14632986 :      for (int i = 0; i < kMapSize; i++) {
    1096    14519552 :        map_->Add(false, zone);
    1097             :      }
    1098      113434 :   }
    1099             : 
    1100             :   bool& at(int i) { return map_->at(i); }
    1101             : 
    1102             :   static const int kMapSize = 128;
    1103             :   static const int kMask = kMapSize - 1;
    1104             : 
    1105             :   int map_count() const { return map_count_; }
    1106             : 
    1107             :   void Set(int character);
    1108             :   void SetInterval(const Interval& interval);
    1109             :   void SetAll();
    1110             :   bool is_non_word() { return w_ == kLatticeOut; }
    1111             :   bool is_word() { return w_ == kLatticeIn; }
    1112             : 
    1113             :  private:
    1114             :   ZoneList<bool>* map_;
    1115             :   int map_count_;  // Number of set bits in the map.
    1116             :   ContainedInLattice w_;  // The \w character class.
    1117             :   ContainedInLattice s_;  // The \s character class.
    1118             :   ContainedInLattice d_;  // The \d character class.
    1119             :   ContainedInLattice surrogate_;  // Surrogate UTF-16 code units.
    1120             : };
    1121             : 
    1122             : 
    1123             : class BoyerMooreLookahead : public ZoneObject {
    1124             :  public:
    1125             :   BoyerMooreLookahead(int length, RegExpCompiler* compiler, Zone* zone);
    1126             : 
    1127             :   int length() { return length_; }
    1128             :   int max_char() { return max_char_; }
    1129             :   RegExpCompiler* compiler() { return compiler_; }
    1130             : 
    1131             :   int Count(int map_number) {
    1132      599221 :     return bitmaps_->at(map_number)->map_count();
    1133             :   }
    1134             : 
    1135         193 :   BoyerMoorePositionInfo* at(int i) { return bitmaps_->at(i); }
    1136             : 
    1137       50775 :   void Set(int map_number, int character) {
    1138      101550 :     if (character > max_char_) return;
    1139      101550 :     BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
    1140             :     info->Set(character);
    1141             :   }
    1142             : 
    1143      442708 :   void SetInterval(int map_number, const Interval& interval) {
    1144      442708 :     if (interval.from() > max_char_) return;
    1145      442708 :     BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
    1146      221354 :     if (interval.to() > max_char_) {
    1147           0 :       info->SetInterval(Interval(interval.from(), max_char_));
    1148             :     } else {
    1149      221354 :       info->SetInterval(interval);
    1150             :     }
    1151             :   }
    1152             : 
    1153        6750 :   void SetAll(int map_number) {
    1154       13500 :     bitmaps_->at(map_number)->SetAll();
    1155        6750 :   }
    1156             : 
    1157        1596 :   void SetRest(int from_map) {
    1158        1596 :     for (int i = from_map; i < length_; i++) SetAll(i);
    1159             :   }
    1160             :   void EmitSkipInstructions(RegExpMacroAssembler* masm);
    1161             : 
    1162             :  private:
    1163             :   // This is the value obtained by EatsAtLeast.  If we do not have at least this
    1164             :   // many characters left in the sample string then the match is bound to fail.
    1165             :   // Therefore it is OK to read a character this far ahead of the current match
    1166             :   // point.
    1167             :   int length_;
    1168             :   RegExpCompiler* compiler_;
    1169             :   // 0xff for Latin1, 0xffff for UTF-16.
    1170             :   int max_char_;
    1171             :   ZoneList<BoyerMoorePositionInfo*>* bitmaps_;
    1172             : 
    1173             :   int GetSkipTable(int min_lookahead,
    1174             :                    int max_lookahead,
    1175             :                    Handle<ByteArray> boolean_skip_table);
    1176             :   bool FindWorthwhileInterval(int* from, int* to);
    1177             :   int FindBestInterval(
    1178             :     int max_number_of_chars, int old_biggest_points, int* from, int* to);
    1179             : };
    1180             : 
    1181             : 
    1182             : // There are many ways to generate code for a node.  This class encapsulates
    1183             : // the current way we should be generating.  In other words it encapsulates
    1184             : // the current state of the code generator.  The effect of this is that we
    1185             : // generate code for paths that the matcher can take through the regular
    1186             : // expression.  A given node in the regexp can be code-generated several times
    1187             : // as it can be part of several traces.  For example for the regexp:
    1188             : // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part
    1189             : // of the foo-bar-baz trace and once as part of the foo-ip-baz trace.  The code
    1190             : // to match foo is generated only once (the traces have a common prefix).  The
    1191             : // code to store the capture is deferred and generated (twice) after the places
    1192             : // where baz has been matched.
    1193             : class Trace {
    1194             :  public:
    1195             :   // A value for a property that is either known to be true, know to be false,
    1196             :   // or not known.
    1197             :   enum TriBool {
    1198             :     UNKNOWN = -1, FALSE_VALUE = 0, TRUE_VALUE = 1
    1199             :   };
    1200             : 
    1201             :   class DeferredAction {
    1202             :    public:
    1203             :     DeferredAction(ActionNode::ActionType action_type, int reg)
    1204      320303 :         : action_type_(action_type), reg_(reg), next_(NULL) { }
    1205             :     DeferredAction* next() { return next_; }
    1206             :     bool Mentions(int reg);
    1207             :     int reg() { return reg_; }
    1208             :     ActionNode::ActionType action_type() { return action_type_; }
    1209             :    private:
    1210             :     ActionNode::ActionType action_type_;
    1211             :     int reg_;
    1212             :     DeferredAction* next_;
    1213             :     friend class Trace;
    1214             :   };
    1215             : 
    1216             :   class DeferredCapture : public DeferredAction {
    1217             :    public:
    1218      302875 :     DeferredCapture(int reg, bool is_capture, Trace* trace)
    1219             :         : DeferredAction(ActionNode::STORE_POSITION, reg),
    1220             :           cp_offset_(trace->cp_offset()),
    1221      302875 :           is_capture_(is_capture) { }
    1222             :     int cp_offset() { return cp_offset_; }
    1223             :     bool is_capture() { return is_capture_; }
    1224             :    private:
    1225             :     int cp_offset_;
    1226             :     bool is_capture_;
    1227             :     void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
    1228             :   };
    1229             : 
    1230             :   class DeferredSetRegister : public DeferredAction {
    1231             :    public:
    1232             :     DeferredSetRegister(int reg, int value)
    1233             :         : DeferredAction(ActionNode::SET_REGISTER, reg),
    1234        6226 :           value_(value) { }
    1235             :     int value() { return value_; }
    1236             :    private:
    1237             :     int value_;
    1238             :   };
    1239             : 
    1240             :   class DeferredClearCaptures : public DeferredAction {
    1241             :    public:
    1242             :     explicit DeferredClearCaptures(Interval range)
    1243             :         : DeferredAction(ActionNode::CLEAR_CAPTURES, -1),
    1244        3897 :           range_(range) { }
    1245             :     Interval range() { return range_; }
    1246             :    private:
    1247             :     Interval range_;
    1248             :   };
    1249             : 
    1250             :   class DeferredIncrementRegister : public DeferredAction {
    1251             :    public:
    1252             :     explicit DeferredIncrementRegister(int reg)
    1253             :         : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
    1254             :   };
    1255             : 
    1256      752663 :   Trace()
    1257             :       : cp_offset_(0),
    1258             :         actions_(NULL),
    1259             :         backtrack_(NULL),
    1260             :         stop_node_(NULL),
    1261             :         loop_label_(NULL),
    1262             :         characters_preloaded_(0),
    1263             :         bound_checked_up_to_(0),
    1264             :         flush_budget_(100),
    1265     1505326 :         at_start_(UNKNOWN) { }
    1266             : 
    1267             :   // End the trace.  This involves flushing the deferred actions in the trace
    1268             :   // and pushing a backtrack location onto the backtrack stack.  Once this is
    1269             :   // done we can start a new trace or go to one that has already been
    1270             :   // generated.
    1271             :   void Flush(RegExpCompiler* compiler, RegExpNode* successor);
    1272             :   int cp_offset() { return cp_offset_; }
    1273             :   DeferredAction* actions() { return actions_; }
    1274             :   // A trivial trace is one that has no deferred actions or other state that
    1275             :   // affects the assumptions used when generating code.  There is no recorded
    1276             :   // backtrack location in a trivial trace, so with a trivial trace we will
    1277             :   // generate code that, on a failure to match, gets the backtrack location
    1278             :   // from the backtrack stack rather than using a direct jump instruction.  We
    1279             :   // always start code generation with a trivial trace and non-trivial traces
    1280             :   // are created as we emit code for nodes or add to the list of deferred
    1281             :   // actions in the trace.  The location of the code generated for a node using
    1282             :   // a trivial trace is recorded in a label in the node so that gotos can be
    1283             :   // generated to that code.
    1284     2601318 :   bool is_trivial() {
    1285     4061037 :     return backtrack_ == NULL &&
    1286     2744750 :            actions_ == NULL &&
    1287     1285031 :            cp_offset_ == 0 &&
    1288     1067772 :            characters_preloaded_ == 0 &&
    1289     1030254 :            bound_checked_up_to_ == 0 &&
    1290     4661826 :            quick_check_performed_.characters() == 0 &&
    1291     3631572 :            at_start_ == UNKNOWN;
    1292             :   }
    1293             :   TriBool at_start() { return at_start_; }
    1294      641236 :   void set_at_start(TriBool at_start) { at_start_ = at_start; }
    1295             :   Label* backtrack() { return backtrack_; }
    1296             :   Label* loop_label() { return loop_label_; }
    1297             :   RegExpNode* stop_node() { return stop_node_; }
    1298             :   int characters_preloaded() { return characters_preloaded_; }
    1299             :   int bound_checked_up_to() { return bound_checked_up_to_; }
    1300             :   int flush_budget() { return flush_budget_; }
    1301             :   QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
    1302             :   bool mentions_reg(int reg);
    1303             :   // Returns true if a deferred position store exists to the specified
    1304             :   // register and stores the offset in the out-parameter.  Otherwise
    1305             :   // returns false.
    1306             :   bool GetStoredPosition(int reg, int* cp_offset);
    1307             :   // These set methods and AdvanceCurrentPositionInTrace should be used only on
    1308             :   // new traces - the intention is that traces are immutable after creation.
    1309             :   void add_action(DeferredAction* new_action) {
    1310             :     DCHECK(new_action->next_ == NULL);
    1311      320303 :     new_action->next_ = actions_;
    1312      320303 :     actions_ = new_action;
    1313             :   }
    1314      956252 :   void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
    1315       16040 :   void set_stop_node(RegExpNode* node) { stop_node_ = node; }
    1316       16040 :   void set_loop_label(Label* label) { loop_label_ = label; }
    1317      984147 :   void set_characters_preloaded(int count) { characters_preloaded_ = count; }
    1318      544001 :   void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
    1319      250811 :   void set_flush_budget(int to) { flush_budget_ = to; }
    1320             :   void set_quick_check_performed(QuickCheckDetails* d) {
    1321      253946 :     quick_check_performed_ = *d;
    1322             :   }
    1323             :   void InvalidateCurrentCharacter();
    1324             :   void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
    1325             : 
    1326             :  private:
    1327             :   int FindAffectedRegisters(OutSet* affected_registers, Zone* zone);
    1328             :   void PerformDeferredActions(RegExpMacroAssembler* macro,
    1329             :                               int max_register,
    1330             :                               const OutSet& affected_registers,
    1331             :                               OutSet* registers_to_pop,
    1332             :                               OutSet* registers_to_clear,
    1333             :                               Zone* zone);
    1334             :   void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
    1335             :                                 int max_register,
    1336             :                                 const OutSet& registers_to_pop,
    1337             :                                 const OutSet& registers_to_clear);
    1338             :   int cp_offset_;
    1339             :   DeferredAction* actions_;
    1340             :   Label* backtrack_;
    1341             :   RegExpNode* stop_node_;
    1342             :   Label* loop_label_;
    1343             :   int characters_preloaded_;
    1344             :   int bound_checked_up_to_;
    1345             :   QuickCheckDetails quick_check_performed_;
    1346             :   int flush_budget_;
    1347             :   TriBool at_start_;
    1348             : };
    1349             : 
    1350             : 
    1351             : class GreedyLoopState {
    1352             :  public:
    1353             :   explicit GreedyLoopState(bool not_at_start);
    1354             : 
    1355             :   Label* label() { return &label_; }
    1356             :   Trace* counter_backtrack_trace() { return &counter_backtrack_trace_; }
    1357             : 
    1358             :  private:
    1359             :   Label label_;
    1360             :   Trace counter_backtrack_trace_;
    1361             : };
    1362             : 
    1363             : 
    1364             : struct PreloadState {
    1365             :   static const int kEatsAtLeastNotYetInitialized = -1;
    1366             :   bool preload_is_current_;
    1367             :   bool preload_has_checked_bounds_;
    1368             :   int preload_characters_;
    1369             :   int eats_at_least_;
    1370             :   void init() {
    1371      268841 :     eats_at_least_ = kEatsAtLeastNotYetInitialized;
    1372             :   }
    1373             : };
    1374             : 
    1375             : 
    1376       92622 : class NodeVisitor {
    1377             :  public:
    1378       92622 :   virtual ~NodeVisitor() { }
    1379             : #define DECLARE_VISIT(Type)                                          \
    1380             :   virtual void Visit##Type(Type##Node* that) = 0;
    1381             : FOR_EACH_NODE_TYPE(DECLARE_VISIT)
    1382             : #undef DECLARE_VISIT
    1383           0 :   virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); }
    1384             : };
    1385             : 
    1386             : 
    1387             : // Node visitor used to add the start set of the alternatives to the
    1388             : // dispatch table of a choice node.
    1389           0 : class DispatchTableConstructor: public NodeVisitor {
    1390             :  public:
    1391             :   DispatchTableConstructor(DispatchTable* table, bool ignore_case,
    1392             :                            Zone* zone)
    1393             :       : table_(table),
    1394             :         choice_index_(-1),
    1395             :         ignore_case_(ignore_case),
    1396           0 :         zone_(zone) { }
    1397             : 
    1398             :   void BuildTable(ChoiceNode* node);
    1399             : 
    1400         812 :   void AddRange(CharacterRange range) {
    1401        1624 :     table()->AddRange(range, choice_index_, zone_);
    1402             :   }
    1403             : 
    1404             :   void AddInverse(ZoneList<CharacterRange>* ranges);
    1405             : 
    1406             : #define DECLARE_VISIT(Type)                                          \
    1407             :   virtual void Visit##Type(Type##Node* that);
    1408             : FOR_EACH_NODE_TYPE(DECLARE_VISIT)
    1409             : #undef DECLARE_VISIT
    1410             : 
    1411             :   DispatchTable* table() { return table_; }
    1412           0 :   void set_choice_index(int value) { choice_index_ = value; }
    1413             : 
    1414             :  protected:
    1415             :   DispatchTable* table_;
    1416             :   int choice_index_;
    1417             :   bool ignore_case_;
    1418             :   Zone* zone_;
    1419             : };
    1420             : 
    1421             : 
    1422             : // Assertion propagation moves information about assertions such as
    1423             : // \b to the affected nodes.  For instance, in /.\b./ information must
    1424             : // be propagated to the first '.' that whatever follows needs to know
    1425             : // if it matched a word or a non-word, and to the second '.' that it
    1426             : // has to check if it succeeds a word or non-word.  In this case the
    1427             : // result will be something like:
    1428             : //
    1429             : //   +-------+        +------------+
    1430             : //   |   .   |        |      .     |
    1431             : //   +-------+  --->  +------------+
    1432             : //   | word? |        | check word |
    1433             : //   +-------+        +------------+
    1434       92622 : class Analysis: public NodeVisitor {
    1435             :  public:
    1436             :   Analysis(Isolate* isolate, JSRegExp::Flags flags, bool is_one_byte)
    1437             :       : isolate_(isolate),
    1438             :         flags_(flags),
    1439             :         is_one_byte_(is_one_byte),
    1440       92622 :         error_message_(NULL) {}
    1441             :   void EnsureAnalyzed(RegExpNode* node);
    1442             : 
    1443             : #define DECLARE_VISIT(Type)                                          \
    1444             :   virtual void Visit##Type(Type##Node* that);
    1445             : FOR_EACH_NODE_TYPE(DECLARE_VISIT)
    1446             : #undef DECLARE_VISIT
    1447             :   virtual void VisitLoopChoice(LoopChoiceNode* that);
    1448             : 
    1449             :   bool has_failed() { return error_message_ != NULL; }
    1450             :   const char* error_message() {
    1451             :     DCHECK(error_message_ != NULL);
    1452             :     return error_message_;
    1453             :   }
    1454             :   void fail(const char* error_message) {
    1455         441 :     error_message_ = error_message;
    1456             :   }
    1457             : 
    1458             :   Isolate* isolate() const { return isolate_; }
    1459             : 
    1460             :   bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
    1461             :   bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
    1462             : 
    1463             :  private:
    1464             :   Isolate* isolate_;
    1465             :   JSRegExp::Flags flags_;
    1466             :   bool is_one_byte_;
    1467             :   const char* error_message_;
    1468             : 
    1469             :   DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
    1470             : };
    1471             : 
    1472             : 
    1473             : struct RegExpCompileData {
    1474             :   RegExpCompileData()
    1475             :     : tree(NULL),
    1476             :       node(NULL),
    1477             :       simple(true),
    1478             :       contains_anchor(false),
    1479      912522 :       capture_count(0) { }
    1480             :   RegExpTree* tree;
    1481             :   RegExpNode* node;
    1482             :   bool simple;
    1483             :   bool contains_anchor;
    1484             :   Handle<FixedArray> capture_name_map;
    1485             :   Handle<String> error;
    1486             :   int capture_count;
    1487             : };
    1488             : 
    1489             : 
    1490             : class RegExpEngine: public AllStatic {
    1491             :  public:
    1492             :   struct CompilationResult {
    1493             :     CompilationResult(Isolate* isolate, const char* error_message)
    1494             :         : error_message(error_message),
    1495         456 :           code(isolate->heap()->the_hole_value()),
    1496         912 :           num_registers(0) {}
    1497             :     CompilationResult(Object* code, int registers)
    1498       92181 :         : error_message(NULL), code(code), num_registers(registers) {}
    1499             :     const char* error_message;
    1500             :     Object* code;
    1501             :     int num_registers;
    1502             :   };
    1503             : 
    1504             :   static CompilationResult Compile(Isolate* isolate, Zone* zone,
    1505             :                                    RegExpCompileData* input,
    1506             :                                    JSRegExp::Flags flags,
    1507             :                                    Handle<String> pattern,
    1508             :                                    Handle<String> sample_subject,
    1509             :                                    bool is_one_byte);
    1510             : 
    1511             :   static bool TooMuchRegExpCode(Handle<String> pattern);
    1512             : 
    1513             :   static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
    1514             : };
    1515             : 
    1516             : 
    1517             : class RegExpResultsCache : public AllStatic {
    1518             :  public:
    1519             :   enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS };
    1520             : 
    1521             :   // Attempt to retrieve a cached result.  On failure, 0 is returned as a Smi.
    1522             :   // On success, the returned result is guaranteed to be a COW-array.
    1523             :   static Object* Lookup(Heap* heap, String* key_string, Object* key_pattern,
    1524             :                         FixedArray** last_match_out, ResultsCacheType type);
    1525             :   // Attempt to add value_array to the cache specified by type.  On success,
    1526             :   // value_array is turned into a COW-array.
    1527             :   static void Enter(Isolate* isolate, Handle<String> key_string,
    1528             :                     Handle<Object> key_pattern, Handle<FixedArray> value_array,
    1529             :                     Handle<FixedArray> last_match_cache, ResultsCacheType type);
    1530             :   static void Clear(FixedArray* cache);
    1531             :   static const int kRegExpResultsCacheSize = 0x100;
    1532             : 
    1533             :  private:
    1534             :   static const int kArrayEntriesPerCacheEntry = 4;
    1535             :   static const int kStringOffset = 0;
    1536             :   static const int kPatternOffset = 1;
    1537             :   static const int kArrayOffset = 2;
    1538             :   static const int kLastMatchOffset = 3;
    1539             : };
    1540             : 
    1541             : }  // namespace internal
    1542             : }  // namespace v8
    1543             : 
    1544             : #endif  // V8_REGEXP_JSREGEXP_H_

Generated by: LCOV version 1.10