LCOV - app.info - src/parsing/scanner.cc

LCOV - code coverage report

Current view:	top level - src/parsing - scanner.cc (source / functions)		Hit	Total	Coverage
Test:	app.info	Lines:	571	582	98.1 %
Date:	2017-04-26	Functions:	50	54	92.6 %

          Line data    Source code

       1             : // Copyright 2011 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : // Features shared by parsing and pre-parsing scanners.
       6             : 
       7             : #include "src/parsing/scanner.h"
       8             : 
       9             : #include <stdint.h>
      10             : 
      11             : #include <cmath>
      12             : 
      13             : #include "src/ast/ast-value-factory.h"
      14             : #include "src/char-predicates-inl.h"
      15             : #include "src/conversions-inl.h"
      16             : #include "src/list-inl.h"
      17             : #include "src/parsing/duplicate-finder.h"  // For Scanner::FindSymbol
      18             : 
      19             : namespace v8 {
      20             : namespace internal {
      21             : 
      22             : class Scanner::ErrorState {
      23             :  public:
      24             :   ErrorState(MessageTemplate::Template* message_stack,
      25             :              Scanner::Location* location_stack)
      26             :       : message_stack_(message_stack),
      27             :         old_message_(*message_stack),
      28             :         location_stack_(location_stack),
      29      337302 :         old_location_(*location_stack) {
      30      337302 :     *message_stack_ = MessageTemplate::kNone;
      31      337302 :     *location_stack_ = Location::invalid();
      32             :   }
      33             : 
      34             :   ~ErrorState() {
      35      337302 :     *message_stack_ = old_message_;
      36      337302 :     *location_stack_ = old_location_;
      37             :   }
      38             : 
      39             :   void MoveErrorTo(TokenDesc* dest) {
      40       63496 :     if (*message_stack_ == MessageTemplate::kNone) {
      41             :       return;
      42             :     }
      43       27692 :     if (dest->invalid_template_escape_message == MessageTemplate::kNone) {
      44       27692 :       dest->invalid_template_escape_message = *message_stack_;
      45       27692 :       dest->invalid_template_escape_location = *location_stack_;
      46             :     }
      47       27692 :     *message_stack_ = MessageTemplate::kNone;
      48       27692 :     *location_stack_ = Location::invalid();
      49             :   }
      50             : 
      51             :  private:
      52             :   MessageTemplate::Template* const message_stack_;
      53             :   MessageTemplate::Template const old_message_;
      54             :   Scanner::Location* const location_stack_;
      55             :   Scanner::Location const old_location_;
      56             : };
      57             : 
      58             : // ----------------------------------------------------------------------------
      59             : // Scanner::LiteralBuffer
      60             : 
      61        2439 : Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
      62        2439 :   if (is_one_byte()) {
      63        2439 :     return isolate->factory()->InternalizeOneByteString(one_byte_literal());
      64             :   }
      65           0 :   return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
      66             : }
      67             : 
      68           0 : int Scanner::LiteralBuffer::NewCapacity(int min_capacity) {
      69     4621452 :   int capacity = Max(min_capacity, backing_store_.length());
      70     4621460 :   int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
      71           0 :   return new_capacity;
      72             : }
      73             : 
      74     4621452 : void Scanner::LiteralBuffer::ExpandBuffer() {
      75             :   Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
      76     4621463 :   MemCopy(new_store.start(), backing_store_.start(), position_);
      77             :   backing_store_.Dispose();
      78     4621463 :   backing_store_ = new_store;
      79     4621463 : }
      80             : 
      81       67037 : void Scanner::LiteralBuffer::ConvertToTwoByte() {
      82             :   DCHECK(is_one_byte_);
      83             :   Vector<byte> new_store;
      84       67037 :   int new_content_size = position_ * kUC16Size;
      85      134074 :   if (new_content_size >= backing_store_.length()) {
      86             :     // Ensure room for all currently read code units as UC16 as well
      87             :     // as the code unit about to be stored.
      88             :     new_store = Vector<byte>::New(NewCapacity(new_content_size));
      89             :   } else {
      90       67029 :     new_store = backing_store_;
      91             :   }
      92             :   uint8_t* src = backing_store_.start();
      93             :   uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
      94      142596 :   for (int i = position_ - 1; i >= 0; i--) {
      95       75559 :     dst[i] = src[i];
      96             :   }
      97       67037 :   if (new_store.start() != backing_store_.start()) {
      98             :     backing_store_.Dispose();
      99           8 :     backing_store_ = new_store;
     100             :   }
     101       67037 :   position_ = new_content_size;
     102       67037 :   is_one_byte_ = false;
     103       67037 : }
     104             : 
     105     1457068 : void Scanner::LiteralBuffer::AddCharSlow(uc32 code_unit) {
     106     2930082 :   if (position_ >= backing_store_.length()) ExpandBuffer();
     107     1457068 :   if (is_one_byte_) {
     108       67037 :     if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
     109           0 :       backing_store_[position_] = static_cast<byte>(code_unit);
     110           0 :       position_ += kOneByteSize;
     111     1457068 :       return;
     112             :     }
     113       67037 :     ConvertToTwoByte();
     114             :   }
     115     1457068 :   if (code_unit <=
     116             :       static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
     117     2898190 :     *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
     118     1449095 :     position_ += kUC16Size;
     119             :   } else {
     120        7973 :     *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
     121       15946 :         unibrow::Utf16::LeadSurrogate(code_unit);
     122        7973 :     position_ += kUC16Size;
     123        7973 :     if (position_ >= backing_store_.length()) ExpandBuffer();
     124        7973 :     *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
     125        7973 :         unibrow::Utf16::TrailSurrogate(code_unit);
     126        7973 :     position_ += kUC16Size;
     127             :   }
     128             : }
     129             : 
     130             : // ----------------------------------------------------------------------------
     131             : // Scanner::BookmarkScope
     132             : 
     133             : const size_t Scanner::BookmarkScope::kBookmarkAtFirstPos =
     134             :     std::numeric_limits<size_t>::max() - 2;
     135             : const size_t Scanner::BookmarkScope::kNoBookmark =
     136             :     std::numeric_limits<size_t>::max() - 1;
     137             : const size_t Scanner::BookmarkScope::kBookmarkWasApplied =
     138             :     std::numeric_limits<size_t>::max();
     139             : 
     140     1434328 : void Scanner::BookmarkScope::Set() {
     141             :   DCHECK_EQ(bookmark_, kNoBookmark);
     142             :   DCHECK_EQ(scanner_->next_next_.token, Token::UNINITIALIZED);
     143             : 
     144             :   // The first token is a bit special, since current_ will still be
     145             :   // uninitialized. In this case, store kBookmarkAtFirstPos and special-case it
     146             :   // when
     147             :   // applying the bookmark.
     148             :   DCHECK_IMPLIES(
     149             :       scanner_->current_.token == Token::UNINITIALIZED,
     150             :       scanner_->current_.location.beg_pos == scanner_->next_.location.beg_pos);
     151     1434328 :   bookmark_ = (scanner_->current_.token == Token::UNINITIALIZED)
     152             :                   ? kBookmarkAtFirstPos
     153     1434328 :                   : scanner_->location().beg_pos;
     154     1434328 : }
     155             : 
     156         178 : void Scanner::BookmarkScope::Apply() {
     157             :   DCHECK(HasBeenSet());  // Caller hasn't called SetBookmark.
     158         178 :   if (bookmark_ == kBookmarkAtFirstPos) {
     159           7 :     scanner_->SeekNext(0);
     160             :   } else {
     161         171 :     scanner_->SeekNext(bookmark_);
     162         171 :     scanner_->Next();
     163             :     DCHECK_EQ(scanner_->location().beg_pos, static_cast<int>(bookmark_));
     164             :   }
     165         178 :   bookmark_ = kBookmarkWasApplied;
     166         178 : }
     167             : 
     168           0 : bool Scanner::BookmarkScope::HasBeenSet() {
     169           0 :   return bookmark_ != kNoBookmark && bookmark_ != kBookmarkWasApplied;
     170             : }
     171             : 
     172           0 : bool Scanner::BookmarkScope::HasBeenApplied() {
     173           0 :   return bookmark_ == kBookmarkWasApplied;
     174             : }
     175             : 
     176             : // ----------------------------------------------------------------------------
     177             : // Scanner
     178             : 
     179     4054678 : Scanner::Scanner(UnicodeCache* unicode_cache)
     180             :     : unicode_cache_(unicode_cache),
     181             :       octal_pos_(Location::invalid()),
     182             :       octal_message_(MessageTemplate::kNone),
     183    12164034 :       found_html_comment_(false) {}
     184             : 
     185     4050466 : void Scanner::Initialize(Utf16CharacterStream* source) {
     186             :   DCHECK_NOT_NULL(source);
     187     4050466 :   source_ = source;
     188             :   // Need to capture identifiers in order to recognize "get" and "set"
     189             :   // in object literals.
     190     4050466 :   Init();
     191             :   // Skip initial whitespace allowing HTML comment ends just like
     192             :   // after a newline and scan first token.
     193     4050467 :   has_line_terminator_before_next_ = true;
     194     4050467 :   SkipWhiteSpace();
     195     4050472 :   Scan();
     196     4050469 : }
     197             : 
     198             : template <bool capture_raw, bool unicode>
     199       70658 : uc32 Scanner::ScanHexNumber(int expected_length) {
     200             :   DCHECK(expected_length <= 4);  // prevent overflow
     201             : 
     202       70658 :   int begin = source_pos() - 2;
     203             :   uc32 x = 0;
     204      308934 :   for (int i = 0; i < expected_length; i++) {
     205      252189 :     int d = HexValue(c0_);
     206      252189 :     if (d < 0) {
     207             :       ReportScannerError(Location(begin, begin + expected_length + 2),
     208             :                          unicode
     209             :                              ? MessageTemplate::kInvalidUnicodeEscapeSequence
     210       13913 :                              : MessageTemplate::kInvalidHexEscapeSequence);
     211             :       return -1;
     212             :     }
     213      238276 :     x = x * 16 + d;
     214      238276 :     Advance<capture_raw>();
     215             :   }
     216             : 
     217             :   return x;
     218             : }
     219             : 
     220             : template <bool capture_raw>
     221       38536 : uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
     222             :   uc32 x = 0;
     223       35784 :   int d = HexValue(c0_);
     224       35784 :   if (d < 0) return -1;
     225             : 
     226      139562 :   while (d >= 0) {
     227      113554 :     x = x * 16 + d;
     228      113554 :     if (x > max_value) {
     229             :       ReportScannerError(Location(beg_pos, source_pos() + 1),
     230             :                          MessageTemplate::kUndefinedUnicodeCodePoint);
     231             :       return -1;
     232             :     }
     233      110802 :     Advance<capture_raw>();
     234      110802 :     d = HexValue(c0_);
     235             :   }
     236             : 
     237             :   return x;
     238             : }
     239             : 
     240             : 
     241             : // Ensure that tokens can be stored in a byte.
     242             : STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
     243             : 
     244             : // Table of one-character tokens, by character (0x00..0x7f only).
     245             : static const byte one_char_tokens[] = {
     246             :   Token::ILLEGAL,
     247             :   Token::ILLEGAL,
     248             :   Token::ILLEGAL,
     249             :   Token::ILLEGAL,
     250             :   Token::ILLEGAL,
     251             :   Token::ILLEGAL,
     252             :   Token::ILLEGAL,
     253             :   Token::ILLEGAL,
     254             :   Token::ILLEGAL,
     255             :   Token::ILLEGAL,
     256             :   Token::ILLEGAL,
     257             :   Token::ILLEGAL,
     258             :   Token::ILLEGAL,
     259             :   Token::ILLEGAL,
     260             :   Token::ILLEGAL,
     261             :   Token::ILLEGAL,
     262             :   Token::ILLEGAL,
     263             :   Token::ILLEGAL,
     264             :   Token::ILLEGAL,
     265             :   Token::ILLEGAL,
     266             :   Token::ILLEGAL,
     267             :   Token::ILLEGAL,
     268             :   Token::ILLEGAL,
     269             :   Token::ILLEGAL,
     270             :   Token::ILLEGAL,
     271             :   Token::ILLEGAL,
     272             :   Token::ILLEGAL,
     273             :   Token::ILLEGAL,
     274             :   Token::ILLEGAL,
     275             :   Token::ILLEGAL,
     276             :   Token::ILLEGAL,
     277             :   Token::ILLEGAL,
     278             :   Token::ILLEGAL,
     279             :   Token::ILLEGAL,
     280             :   Token::ILLEGAL,
     281             :   Token::ILLEGAL,
     282             :   Token::ILLEGAL,
     283             :   Token::ILLEGAL,
     284             :   Token::ILLEGAL,
     285             :   Token::ILLEGAL,
     286             :   Token::LPAREN,       // 0x28
     287             :   Token::RPAREN,       // 0x29
     288             :   Token::ILLEGAL,
     289             :   Token::ILLEGAL,
     290             :   Token::COMMA,        // 0x2c
     291             :   Token::ILLEGAL,
     292             :   Token::ILLEGAL,
     293             :   Token::ILLEGAL,
     294             :   Token::ILLEGAL,
     295             :   Token::ILLEGAL,
     296             :   Token::ILLEGAL,
     297             :   Token::ILLEGAL,
     298             :   Token::ILLEGAL,
     299             :   Token::ILLEGAL,
     300             :   Token::ILLEGAL,
     301             :   Token::ILLEGAL,
     302             :   Token::ILLEGAL,
     303             :   Token::ILLEGAL,
     304             :   Token::COLON,        // 0x3a
     305             :   Token::SEMICOLON,    // 0x3b
     306             :   Token::ILLEGAL,
     307             :   Token::ILLEGAL,
     308             :   Token::ILLEGAL,
     309             :   Token::CONDITIONAL,  // 0x3f
     310             :   Token::ILLEGAL,
     311             :   Token::ILLEGAL,
     312             :   Token::ILLEGAL,
     313             :   Token::ILLEGAL,
     314             :   Token::ILLEGAL,
     315             :   Token::ILLEGAL,
     316             :   Token::ILLEGAL,
     317             :   Token::ILLEGAL,
     318             :   Token::ILLEGAL,
     319             :   Token::ILLEGAL,
     320             :   Token::ILLEGAL,
     321             :   Token::ILLEGAL,
     322             :   Token::ILLEGAL,
     323             :   Token::ILLEGAL,
     324             :   Token::ILLEGAL,
     325             :   Token::ILLEGAL,
     326             :   Token::ILLEGAL,
     327             :   Token::ILLEGAL,
     328             :   Token::ILLEGAL,
     329             :   Token::ILLEGAL,
     330             :   Token::ILLEGAL,
     331             :   Token::ILLEGAL,
     332             :   Token::ILLEGAL,
     333             :   Token::ILLEGAL,
     334             :   Token::ILLEGAL,
     335             :   Token::ILLEGAL,
     336             :   Token::ILLEGAL,
     337             :   Token::LBRACK,     // 0x5b
     338             :   Token::ILLEGAL,
     339             :   Token::RBRACK,     // 0x5d
     340             :   Token::ILLEGAL,
     341             :   Token::ILLEGAL,
     342             :   Token::ILLEGAL,
     343             :   Token::ILLEGAL,
     344             :   Token::ILLEGAL,
     345             :   Token::ILLEGAL,
     346             :   Token::ILLEGAL,
     347             :   Token::ILLEGAL,
     348             :   Token::ILLEGAL,
     349             :   Token::ILLEGAL,
     350             :   Token::ILLEGAL,
     351             :   Token::ILLEGAL,
     352             :   Token::ILLEGAL,
     353             :   Token::ILLEGAL,
     354             :   Token::ILLEGAL,
     355             :   Token::ILLEGAL,
     356             :   Token::ILLEGAL,
     357             :   Token::ILLEGAL,
     358             :   Token::ILLEGAL,
     359             :   Token::ILLEGAL,
     360             :   Token::ILLEGAL,
     361             :   Token::ILLEGAL,
     362             :   Token::ILLEGAL,
     363             :   Token::ILLEGAL,
     364             :   Token::ILLEGAL,
     365             :   Token::ILLEGAL,
     366             :   Token::ILLEGAL,
     367             :   Token::ILLEGAL,
     368             :   Token::ILLEGAL,
     369             :   Token::LBRACE,       // 0x7b
     370             :   Token::ILLEGAL,
     371             :   Token::RBRACE,       // 0x7d
     372             :   Token::BIT_NOT,      // 0x7e
     373             :   Token::ILLEGAL
     374             : };
     375             : 
     376             : 
     377   892451859 : Token::Value Scanner::Next() {
     378   691898255 :   if (next_.token == Token::EOS) {
     379        3578 :     next_.location.beg_pos = current_.location.beg_pos;
     380        3578 :     next_.location.end_pos = current_.location.end_pos;
     381             :   }
     382   691898255 :   current_ = next_;
     383   691898255 :   if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {
     384   120180694 :     next_ = next_next_;
     385   120180694 :     next_next_.token = Token::UNINITIALIZED;
     386   120180694 :     next_next_.contextual_token = Token::UNINITIALIZED;
     387   120180694 :     has_line_terminator_before_next_ = has_line_terminator_after_next_;
     388   120180694 :     return current_.token;
     389             :   }
     390   571717561 :   has_line_terminator_before_next_ = false;
     391   571717561 :   has_multiline_comment_before_next_ = false;
     392   571717561 :   if (static_cast<unsigned>(c0_) <= 0x7f) {
     393   568414198 :     Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
     394   568414198 :     if (token != Token::ILLEGAL) {
     395             :       int pos = source_pos();
     396   200553604 :       next_.token = token;
     397   200553604 :       next_.contextual_token = Token::UNINITIALIZED;
     398   200553604 :       next_.location.beg_pos = pos;
     399   200553604 :       next_.location.end_pos = pos + 1;
     400   200553604 :       next_.literal_chars = nullptr;
     401   200553604 :       next_.raw_literal_chars = nullptr;
     402   200553604 :       next_.invalid_template_escape_message = MessageTemplate::kNone;
     403   200553604 :       Advance();
     404   200554055 :       return current_.token;
     405             :     }
     406             :   }
     407   371163957 :   Scan();
     408   371169893 :   return current_.token;
     409             : }
     410             : 
     411             : 
     412   120498098 : Token::Value Scanner::PeekAhead() {
     413             :   DCHECK(next_.token != Token::DIV);
     414             :   DCHECK(next_.token != Token::ASSIGN_DIV);
     415             : 
     416   120498098 :   if (next_next_.token != Token::UNINITIALIZED) {
     417             :     return next_next_.token;
     418             :   }
     419   120181089 :   TokenDesc prev = current_;
     420             :   bool has_line_terminator_before_next =
     421   120181089 :       has_line_terminator_before_next_ || has_multiline_comment_before_next_;
     422   120181089 :   Next();
     423             :   has_line_terminator_after_next_ =
     424   120181312 :       has_line_terminator_before_next_ || has_multiline_comment_before_next_;
     425   120181312 :   has_line_terminator_before_next_ = has_line_terminator_before_next;
     426   120181312 :   Token::Value ret = next_.token;
     427   120181312 :   next_next_ = next_;
     428   120181312 :   next_ = current_;
     429   120181312 :   current_ = prev;
     430   120181312 :   return ret;
     431             : }
     432             : 
     433             : 
     434             : // TODO(yangguo): check whether this is actually necessary.
     435             : static inline bool IsLittleEndianByteOrderMark(uc32 c) {
     436             :   // The Unicode value U+FFFE is guaranteed never to be assigned as a
     437             :   // Unicode character; this implies that in a Unicode context the
     438             :   // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
     439             :   // character expressed in little-endian byte order (since it could
     440             :   // not be a U+FFFE character expressed in big-endian byte
     441             :   // order). Nevertheless, we check for it to be compatible with
     442             :   // Spidermonkey.
     443             :   return c == 0xFFFE;
     444             : }
     445             : 
     446     8122418 : bool Scanner::SkipWhiteSpace() {
     447             :   int start_position = source_pos();
     448             : 
     449             :   while (true) {
     450             :     while (true) {
     451             :       // Don't skip behind the end of input.
     452     8141838 :       if (c0_ == kEndOfInput) break;
     453             : 
     454             :       // Advance as long as character is a WhiteSpace or LineTerminator.
     455             :       // Remember if the latter is the case.
     456    16265037 :       if (unicode_cache_->IsLineTerminator(c0_)) {
     457     4022573 :         has_line_terminator_before_next_ = true;
     458    12268467 :       } else if (!unicode_cache_->IsWhiteSpace(c0_) &&
     459     4048573 :                  !IsLittleEndianByteOrderMark(c0_)) {
     460             :         break;
     461             :       }
     462     4083946 :       Advance();
     463             :     }
     464             : 
     465             :     // If there is an HTML comment end '-->' at the beginning of a
     466             :     // line (with only whitespace in front of it), we treat the rest
     467             :     // of the line as a comment. This is in line with the way
     468             :     // SpiderMonkey handles it.
     469     4057895 :     if (c0_ != '-' || !has_line_terminator_before_next_) break;
     470             : 
     471        6792 :     Advance();
     472        6792 :     if (c0_ != '-') {
     473             :       PushBack('-');  // undo Advance()
     474             :       break;
     475             :     }
     476             : 
     477         126 :     Advance();
     478         126 :     if (c0_ != '>') {
     479             :       PushBack2('-', '-');  // undo 2x Advance();
     480             :       break;
     481             :     }
     482             : 
     483             :     // Treat the rest of the line as a comment.
     484          53 :     SkipSingleLineComment();
     485             :   }
     486             : 
     487             :   // Return whether or not we skipped any characters.
     488     8141843 :   return source_pos() != start_position;
     489             : }
     490             : 
     491     6777900 : Token::Value Scanner::SkipSingleLineComment() {
     492     6777900 :   Advance();
     493             : 
     494             :   // The line terminator at the end of the line is not considered
     495             :   // to be part of the single-line comment; it is recognized
     496             :   // separately by the lexical grammar and becomes part of the
     497             :   // stream of input elements for the syntactic grammar (see
     498             :   // ECMA-262, section 7.4).
     499   701804732 :   while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
     500   340735687 :     Advance();
     501             :   }
     502             : 
     503     6777900 :   return Token::WHITESPACE;
     504             : }
     505             : 
     506             : 
     507        2566 : Token::Value Scanner::SkipSourceURLComment() {
     508        2566 :   TryToParseSourceURLComment();
     509        7044 :   while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
     510         756 :     Advance();
     511             :   }
     512             : 
     513        2566 :   return Token::WHITESPACE;
     514             : }
     515             : 
     516             : 
     517        2566 : void Scanner::TryToParseSourceURLComment() {
     518             :   // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this
     519             :   // function will just return if it cannot parse a magic comment.
     520        5207 :   if (c0_ == kEndOfInput || !unicode_cache_->IsWhiteSpace(c0_)) return;
     521        2550 :   Advance();
     522             :   LiteralBuffer name;
     523       79725 :   while (c0_ != kEndOfInput &&
     524       53126 :          !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') {
     525       24025 :     name.AddChar(c0_);
     526       24025 :     Advance();
     527             :   }
     528        2550 :   if (!name.is_one_byte()) return;
     529             :   Vector<const uint8_t> name_literal = name.one_byte_literal();
     530             :   LiteralBuffer* value;
     531        2550 :   if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {
     532        2342 :     value = &source_url_;
     533         208 :   } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {
     534         169 :     value = &source_mapping_url_;
     535             :   } else {
     536             :     return;
     537             :   }
     538        2511 :   if (c0_ != '=')
     539             :     return;
     540        2499 :   Advance();
     541             :   value->Reset();
     542        5046 :   while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {
     543          24 :     Advance();
     544             :   }
     545       61211 :   while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
     546             :     // Disallowed characters.
     547       29235 :     if (c0_ == '"' || c0_ == '\'') {
     548             :       value->Reset();
     549             :       return;
     550             :     }
     551       58422 :     if (unicode_cache_->IsWhiteSpace(c0_)) {
     552             :       break;
     553             :     }
     554       29161 :     value->AddChar(c0_);
     555       29161 :     Advance();
     556             :   }
     557             :   // Allow whitespace at the end.
     558        2951 :   while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
     559         172 :     if (!unicode_cache_->IsWhiteSpace(c0_)) {
     560             :       value->Reset();
     561             :       break;
     562             :     }
     563          62 :     Advance();
     564             :   }
     565             : }
     566             : 
     567             : 
     568      549650 : Token::Value Scanner::SkipMultiLineComment() {
     569             :   DCHECK(c0_ == '*');
     570      549650 :   Advance();
     571             : 
     572     8322745 :   while (c0_ != kEndOfInput) {
     573             :     uc32 ch = c0_;
     574     7773093 :     Advance();
     575    15546184 :     if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) {
     576             :       // Following ECMA-262, section 7.4, a comment containing
     577             :       // a newline will make the comment count as a line-terminator.
     578      147354 :       has_multiline_comment_before_next_ = true;
     579             :     }
     580             :     // If we have reached the end of the multi-line comment, we
     581             :     // consume the '/' and insert a whitespace. This way all
     582             :     // multi-line comments are treated as whitespace.
     583     7773093 :     if (ch == '*' && c0_ == '/') {
     584      549648 :       c0_ = ' ';
     585      549648 :       return Token::WHITESPACE;
     586             :     }
     587             :   }
     588             : 
     589             :   // Unterminated multi-line comment.
     590             :   return Token::ILLEGAL;
     591             : }
     592             : 
     593         130 : Token::Value Scanner::ScanHtmlComment() {
     594             :   // Check for <!-- comments.
     595             :   DCHECK(c0_ == '!');
     596          84 :   Advance();
     597          84 :   if (c0_ != '-') {
     598             :     PushBack('!');  // undo Advance()
     599          23 :     return Token::LT;
     600             :   }
     601             : 
     602          61 :   Advance();
     603          61 :   if (c0_ != '-') {
     604             :     PushBack2('-', '!');  // undo 2x Advance()
     605             :     return Token::LT;
     606             :   }
     607             : 
     608          38 :   found_html_comment_ = true;
     609          38 :   return SkipSingleLineComment();
     610             : }
     611             : 
     612  1527867417 : void Scanner::Scan() {
     613   375223718 :   next_.literal_chars = NULL;
     614   375223718 :   next_.raw_literal_chars = NULL;
     615   375223718 :   next_.invalid_template_escape_message = MessageTemplate::kNone;
     616             :   Token::Value token;
     617   770641234 :   do {
     618             :     // Remember the position of the next token
     619   770644529 :     next_.location.beg_pos = source_pos();
     620             : 
     621   770644529 :     switch (c0_) {
     622             :       case ' ':
     623             :       case '\t':
     624   310691411 :         Advance();
     625             :         token = Token::WHITESPACE;
     626   310691404 :         break;
     627             : 
     628             :       case '\n':
     629    77392529 :         Advance();
     630    77392523 :         has_line_terminator_before_next_ = true;
     631             :         token = Token::WHITESPACE;
     632    77392523 :         break;
     633             : 
     634             :       case '"':
     635             :       case '\'':
     636    14313735 :         token = ScanString();
     637    14313743 :         break;
     638             : 
     639             :       case '<':
     640             :         // < <= << <<= <!--
     641     1684971 :         Advance();
     642     1684971 :         if (c0_ == '=') {
     643             :           token = Select(Token::LTE);
     644     1548790 :         } else if (c0_ == '<') {
     645             :           token = Select('=', Token::ASSIGN_SHL, Token::SHL);
     646     1195723 :         } else if (c0_ == '!') {
     647          84 :           token = ScanHtmlComment();
     648             :         } else {
     649             :           token = Token::LT;
     650             :         }
     651             :         break;
     652             : 
     653             :       case '>':
     654             :         // > >= >> >>= >>> >>>=
     655     1243583 :         Advance();
     656     1243583 :         if (c0_ == '=') {
     657             :           token = Select(Token::GTE);
     658     1064796 :         } else if (c0_ == '>') {
     659             :           // >> >>= >>> >>>=
     660      718290 :           Advance();
     661      718290 :           if (c0_ == '=') {
     662             :             token = Select(Token::ASSIGN_SAR);
     663      710434 :           } else if (c0_ == '>') {
     664             :             token = Select('=', Token::ASSIGN_SHR, Token::SHR);
     665             :           } else {
     666             :             token = Token::SAR;
     667             :           }
     668             :         } else {
     669             :           token = Token::GT;
     670             :         }
     671             :         break;
     672             : 
     673             :       case '=':
     674             :         // = == === =>
     675    33778745 :         Advance();
     676    33778752 :         if (c0_ == '=') {
     677             :           token = Select('=', Token::EQ_STRICT, Token::EQ);
     678    30376808 :         } else if (c0_ == '>') {
     679             :           token = Select(Token::ARROW);
     680             :         } else {
     681             :           token = Token::ASSIGN;
     682             :         }
     683             :         break;
     684             : 
     685             :       case '!':
     686             :         // ! != !==
     687     3028528 :         Advance();
     688     3028528 :         if (c0_ == '=') {
     689             :           token = Select('=', Token::NE_STRICT, Token::NE);
     690             :         } else {
     691             :           token = Token::NOT;
     692             :         }
     693             :         break;
     694             : 
     695             :       case '+':
     696             :         // + ++ +=
     697     7043129 :         Advance();
     698     7043129 :         if (c0_ == '+') {
     699             :           token = Select(Token::INC);
     700     5254423 :         } else if (c0_ == '=') {
     701             :           token = Select(Token::ASSIGN_ADD);
     702             :         } else {
     703             :           token = Token::ADD;
     704             :         }
     705             :         break;
     706             : 
     707             :       case '-':
     708             :         // - -- --> -=
     709     1379608 :         Advance();
     710     1379608 :         if (c0_ == '-') {
     711       80969 :           Advance();
     712       81185 :           if (c0_ == '>' && HasAnyLineTerminatorBeforeNext()) {
     713             :             // For compatibility with SpiderMonkey, we skip lines that
     714             :             // start with an HTML comment end '-->'.
     715         142 :             token = SkipSingleLineComment();
     716             :           } else {
     717             :             token = Token::DEC;
     718             :           }
     719     1298639 :         } else if (c0_ == '=') {
     720             :           token = Select(Token::ASSIGN_SUB);
     721             :         } else {
     722             :           token = Token::SUB;
     723             :         }
     724             :         break;
     725             : 
     726             :       case '*':
     727             :         // * *=
     728      764768 :         Advance();
     729      764768 :         if (c0_ == '*') {
     730             :           token = Select('=', Token::ASSIGN_EXP, Token::EXP);
     731      755195 :         } else if (c0_ == '=') {
     732             :           token = Select(Token::ASSIGN_MUL);
     733             :         } else {
     734             :           token = Token::MUL;
     735             :         }
     736             :         break;
     737             : 
     738             :       case '%':
     739             :         // % %=
     740             :         token = Select('=', Token::ASSIGN_MOD, Token::MOD);
     741     2287787 :         break;
     742             : 
     743             :       case '/':
     744             :         // /  // /* /=
     745     7810708 :         Advance();
     746     7810708 :         if (c0_ == '/') {
     747     6780233 :           Advance();
     748     6780233 :           if (c0_ == '#' || c0_ == '@') {
     749        2566 :             Advance();
     750        2566 :             token = SkipSourceURLComment();
     751             :           } else {
     752             :             PushBack(c0_);
     753     6777667 :             token = SkipSingleLineComment();
     754             :           }
     755     1030475 :         } else if (c0_ == '*') {
     756      549650 :           token = SkipMultiLineComment();
     757      480825 :         } else if (c0_ == '=') {
     758             :           token = Select(Token::ASSIGN_DIV);
     759             :         } else {
     760             :           token = Token::DIV;
     761             :         }
     762             :         break;
     763             : 
     764             :       case '&':
     765             :         // & && &=
     766     1655498 :         Advance();
     767     1655498 :         if (c0_ == '&') {
     768             :           token = Select(Token::AND);
     769      825361 :         } else if (c0_ == '=') {
     770             :           token = Select(Token::ASSIGN_BIT_AND);
     771             :         } else {
     772             :           token = Token::BIT_AND;
     773             :         }
     774             :         break;
     775             : 
     776             :       case '|':
     777             :         // | || |=
     778     2049474 :         Advance();
     779     2049474 :         if (c0_ == '|') {
     780             :           token = Select(Token::OR);
     781     1119064 :         } else if (c0_ == '=') {
     782             :           token = Select(Token::ASSIGN_BIT_OR);
     783             :         } else {
     784             :           token = Token::BIT_OR;
     785             :         }
     786             :         break;
     787             : 
     788             :       case '^':
     789             :         // ^ ^=
     790             :         token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
     791       45374 :         break;
     792             : 
     793             :       case '.':
     794             :         // . Number
     795    24061766 :         Advance();
     796    48123530 :         if (IsDecimalDigit(c0_)) {
     797        5084 :           token = ScanNumber(true);
     798             :         } else {
     799             :           token = Token::PERIOD;
     800    24056681 :           if (c0_ == '.') {
     801      125537 :             Advance();
     802      125537 :             if (c0_ == '.') {
     803      124673 :               Advance();
     804             :               token = Token::ELLIPSIS;
     805             :             } else {
     806             :               PushBack('.');
     807             :             }
     808             :           }
     809             :         }
     810             :         break;
     811             : 
     812             :       case ':':
     813             :         token = Select(Token::COLON);
     814     2454765 :         break;
     815             : 
     816             :       case ';':
     817             :         token = Select(Token::SEMICOLON);
     818       56632 :         break;
     819             : 
     820             :       case ',':
     821             :         token = Select(Token::COMMA);
     822      493944 :         break;
     823             : 
     824             :       case '(':
     825             :         token = Select(Token::LPAREN);
     826     8830758 :         break;
     827             : 
     828             :       case ')':
     829             :         token = Select(Token::RPAREN);
     830      554242 :         break;
     831             : 
     832             :       case '[':
     833             :         token = Select(Token::LBRACK);
     834      575636 :         break;
     835             : 
     836             :       case ']':
     837             :         token = Select(Token::RBRACK);
     838      178481 :         break;
     839             : 
     840             :       case '{':
     841             :         token = Select(Token::LBRACE);
     842    10265604 :         break;
     843             : 
     844             :       case '}':
     845             :         token = Select(Token::RBRACE);
     846    13798526 :         break;
     847             : 
     848             :       case '?':
     849             :         token = Select(Token::CONDITIONAL);
     850      228741 :         break;
     851             : 
     852             :       case '~':
     853             :         token = Select(Token::BIT_NOT);
     854        5751 :         break;
     855             : 
     856             :       case '`':
     857       88967 :         token = ScanTemplateStart();
     858       88967 :         break;
     859             : 
     860             :       default:
     861   243880872 :         if (c0_ == kEndOfInput) {
     862             :           token = Token::EOS;
     863   480744625 :         } else if (unicode_cache_->IsIdentifierStart(c0_)) {
     864   200166218 :           token = ScanIdentifierOrKeyword();
     865    80412502 :         } else if (IsDecimalDigit(c0_)) {
     866    40198881 :           token = ScanNumber(false);
     867        7370 :         } else if (SkipWhiteSpace()) {
     868             :           token = Token::WHITESPACE;
     869             :         } else {
     870             :           token = Select(Token::ILLEGAL);
     871             :         }
     872             :         break;
     873             :     }
     874             : 
     875             :     // Continue scanning for tokens as long as we're just skipping
     876             :     // whitespace.
     877             :   } while (token == Token::WHITESPACE);
     878             : 
     879   375220423 :   next_.location.end_pos = source_pos();
     880   375220423 :   if (Token::IsContextualKeyword(token)) {
     881     5074816 :     next_.token = Token::IDENTIFIER;
     882     5074816 :     next_.contextual_token = token;
     883             :   } else {
     884   370145607 :     next_.token = token;
     885   370145607 :     next_.contextual_token = Token::UNINITIALIZED;
     886             :   }
     887             : 
     888             : #ifdef DEBUG
     889             :   SanityCheckTokenDesc(current_);
     890             :   SanityCheckTokenDesc(next_);
     891             :   SanityCheckTokenDesc(next_next_);
     892             : #endif
     893   375220423 : }
     894             : 
     895             : #ifdef DEBUG
     896             : void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
     897             :   // Most tokens should not have literal_chars or even raw_literal chars.
     898             :   // The rules are:
     899             :   // - UNINITIALIZED: we don't care.
     900             :   // - TEMPLATE_*: need both literal + raw literal chars.
     901             :   // - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal.
     902             :   // - all others: should have neither.
     903             :   // Furthermore, only TEMPLATE_* tokens can have a
     904             :   // invalid_template_escape_message.
     905             : 
     906             :   switch (token.token) {
     907             :     case Token::UNINITIALIZED:
     908             :       // token.literal_chars & other members might be garbage. That's ok.
     909             :       break;
     910             :     case Token::TEMPLATE_SPAN:
     911             :     case Token::TEMPLATE_TAIL:
     912             :       DCHECK_NOT_NULL(token.raw_literal_chars);
     913             :       DCHECK_NOT_NULL(token.literal_chars);
     914             :       break;
     915             :     case Token::ESCAPED_KEYWORD:
     916             :     case Token::ESCAPED_STRICT_RESERVED_WORD:
     917             :     case Token::FUTURE_STRICT_RESERVED_WORD:
     918             :     case Token::IDENTIFIER:
     919             :     case Token::NUMBER:
     920             :     case Token::REGEXP_LITERAL:
     921             :     case Token::SMI:
     922             :     case Token::STRING:
     923             :       DCHECK_NOT_NULL(token.literal_chars);
     924             :       DCHECK_NULL(token.raw_literal_chars);
     925             :       DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
     926             :       break;
     927             :     default:
     928             :       DCHECK_NULL(token.literal_chars);
     929             :       DCHECK_NULL(token.raw_literal_chars);
     930             :       DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
     931             :       break;
     932             :   }
     933             : 
     934             :   DCHECK_IMPLIES(token.token != Token::IDENTIFIER,
     935             :                  token.contextual_token == Token::UNINITIALIZED);
     936             :   DCHECK_IMPLIES(token.contextual_token != Token::UNINITIALIZED,
     937             :                  token.token == Token::IDENTIFIER &&
     938             :                      Token::IsContextualKeyword(token.contextual_token));
     939             :   DCHECK(!Token::IsContextualKeyword(token.token));
     940             : }
     941             : #endif  // DEBUG
     942             : 
     943         340 : void Scanner::SeekForward(int pos) {
     944             :   // After this call, we will have the token at the given position as
     945             :   // the "next" token. The "current" token will be invalid.
     946         352 :   if (pos == next_.location.beg_pos) return;
     947             :   int current_pos = source_pos();
     948             :   DCHECK_EQ(next_.location.end_pos, current_pos);
     949             :   // Positions inside the lookahead token aren't supported.
     950             :   DCHECK(pos >= current_pos);
     951         164 :   if (pos != current_pos) {
     952         158 :     source_->Seek(pos);
     953         158 :     Advance();
     954             :     // This function is only called to seek to the location
     955             :     // of the end of a function (at the "}" token). It doesn't matter
     956             :     // whether there was a line terminator in the part we skip.
     957         158 :     has_line_terminator_before_next_ = false;
     958         158 :     has_multiline_comment_before_next_ = false;
     959             :   }
     960         164 :   Scan();
     961             : }
     962             : 
     963             : 
     964             : template <bool capture_raw, bool in_template_literal>
     965     3398535 : bool Scanner::ScanEscape() {
     966     1719605 :   uc32 c = c0_;
     967     1719605 :   Advance<capture_raw>();
     968             : 
     969             :   // Skip escaped newlines.
     970     3375714 :   if (!in_template_literal && c0_ != kEndOfInput &&
     971     1687857 :       unicode_cache_->IsLineTerminator(c)) {
     972             :     // Allow CR+LF newlines in multiline string literals.
     973       15104 :     if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
     974             :     // Allow LF+CR newlines in multiline string literals.
     975       15104 :     if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>();
     976             :     return true;
     977             :   }
     978             : 
     979     1704501 :   switch (c) {
     980             :     case '\'':  // fall through
     981             :     case '"' :  // fall through
     982             :     case '\\': break;
     983          59 :     case 'b' : c = '\b'; break;
     984         134 :     case 'f' : c = '\f'; break;
     985     1142279 :     case 'n' : c = '\n'; break;
     986         850 :     case 'r' : c = '\r'; break;
     987         392 :     case 't' : c = '\t'; break;
     988             :     case 'u' : {
     989       80747 :       c = ScanUnicodeEscape<capture_raw>();
     990       80747 :       if (c < 0) return false;
     991             :       break;
     992             :     }
     993             :     case 'v':
     994             :       c = '\v';
     995          74 :       break;
     996             :     case 'x': {
     997        6795 :       c = ScanHexNumber<capture_raw>(2);
     998        6795 :       if (c < 0) return false;
     999             :       break;
    1000             :     }
    1001             :     case '0':  // Fall through.
    1002             :     case '1':  // fall through
    1003             :     case '2':  // fall through
    1004             :     case '3':  // fall through
    1005             :     case '4':  // fall through
    1006             :     case '5':  // fall through
    1007             :     case '6':  // fall through
    1008             :     case '7':
    1009        5600 :       c = ScanOctalEscape<capture_raw>(c, 2);
    1010        5600 :       break;
    1011             :   }
    1012             : 
    1013             :   // Other escaped characters are interpreted as their non-escaped version.
    1014             :   AddLiteralChar(c);
    1015             :   return true;
    1016             : }
    1017             : 
    1018             : 
    1019             : template <bool capture_raw>
    1020       10735 : uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
    1021        5600 :   uc32 x = c - '0';
    1022             :   int i = 0;
    1023        8891 :   for (; i < length; i++) {
    1024        8332 :     int d = c0_ - '0';
    1025        8332 :     if (d < 0 || d > 7) break;
    1026        3315 :     int nx = x * 8 + d;
    1027        3315 :     if (nx >= 256) break;
    1028             :     x = nx;
    1029        3291 :     Advance<capture_raw>();
    1030             :   }
    1031             :   // Anything except '\0' is an octal escape sequence, illegal in strict mode.
    1032             :   // Remember the position of octal escape sequences so that an error
    1033             :   // can be reported later (in strict mode).
    1034             :   // We don't report the error immediately, because the octal escape can
    1035             :   // occur before the "use strict" directive.
    1036        5600 :   if (c != '0' || i > 0) {
    1037        5135 :     octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
    1038        5135 :     octal_message_ = MessageTemplate::kStrictOctalEscape;
    1039             :   }
    1040        5600 :   return x;
    1041             : }
    1042             : 
    1043             : 
    1044   520361961 : Token::Value Scanner::ScanString() {
    1045    14313735 :   uc32 quote = c0_;
    1046             :   Advance<false, false>();  // consume quote
    1047             : 
    1048             :   LiteralScope literal(this);
    1049             :   while (true) {
    1050   245518844 :     if (c0_ > kMaxAscii) {
    1051       20372 :       HandleLeadSurrogate();
    1052       20376 :       break;
    1053             :     }
    1054   245498472 :     if (c0_ == kEndOfInput || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL;
    1055   245498331 :     if (c0_ == quote) {
    1056             :       literal.Complete();
    1057             :       Advance<false, false>();
    1058    13876063 :       return Token::STRING;
    1059             :     }
    1060   231622268 :     char c = static_cast<char>(c0_);
    1061   231622268 :     if (c == '\\') break;
    1062             :     Advance<false, false>();
    1063             :     AddLiteralChar(c);
    1064             :   }
    1065             : 
    1066    34707501 :   while (c0_ != quote && c0_ != kEndOfInput &&
    1067    17136013 :          !unicode_cache_->IsLineTerminator(c0_)) {
    1068    17136013 :     uc32 c = c0_;
    1069    17136013 :     Advance();
    1070    17136013 :     if (c == '\\') {
    1071     1687871 :       if (c0_ == kEndOfInput || !ScanEscape<false, false>()) {
    1072             :         return Token::ILLEGAL;
    1073             :       }
    1074             :     } else {
    1075             :       AddLiteralChar(c);
    1076             :     }
    1077             :   }
    1078      435475 :   if (c0_ != quote) return Token::ILLEGAL;
    1079             :   literal.Complete();
    1080             : 
    1081      435475 :   Advance();  // consume quote
    1082      435475 :   return Token::STRING;
    1083             : }
    1084             : 
    1085             : 
    1086     1720446 : Token::Value Scanner::ScanTemplateSpan() {
    1087             :   // When scanning a TemplateSpan, we are looking for the following construct:
    1088             :   // TEMPLATE_SPAN ::
    1089             :   //     ` LiteralChars* ${
    1090             :   //   | } LiteralChars* ${
    1091             :   //
    1092             :   // TEMPLATE_TAIL ::
    1093             :   //     ` LiteralChars* `
    1094             :   //   | } LiteralChar* `
    1095             :   //
    1096             :   // A TEMPLATE_SPAN should always be followed by an Expression, while a
    1097             :   // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be
    1098             :   // followed by an Expression.
    1099             : 
    1100             :   // These scoped helpers save and restore the original error state, so that we
    1101             :   // can specially treat invalid escape sequences in templates (which are
    1102             :   // handled by the parser).
    1103      168651 :   ErrorState scanner_error_state(&scanner_error_, &scanner_error_location_);
    1104      168651 :   ErrorState octal_error_state(&octal_message_, &octal_pos_);
    1105             : 
    1106             :   Token::Value result = Token::TEMPLATE_SPAN;
    1107             :   LiteralScope literal(this);
    1108             :   StartRawLiteral();
    1109             :   const bool capture_raw = true;
    1110             :   const bool in_template_literal = true;
    1111             :   while (true) {
    1112     1414132 :     uc32 c = c0_;
    1113     1414132 :     Advance<capture_raw>();
    1114     1414132 :     if (c == '`') {
    1115             :       result = Token::TEMPLATE_TAIL;
    1116             :       ReduceRawLiteralLength(1);
    1117             :       break;
    1118     1341062 :     } else if (c == '$' && c0_ == '{') {
    1119       93762 :       Advance<capture_raw>();  // Consume '{'
    1120             :       ReduceRawLiteralLength(2);
    1121             :       break;
    1122     1247300 :     } else if (c == '\\') {
    1123       63743 :       if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) {
    1124             :         // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
    1125             :         // code unit sequence.
    1126         131 :         uc32 lastChar = c0_;
    1127         131 :         Advance<capture_raw>();
    1128         131 :         if (lastChar == '\r') {
    1129             :           ReduceRawLiteralLength(1);  // Remove \r
    1130          78 :           if (c0_ == '\n') {
    1131          39 :             Advance<capture_raw>();  // Adds \n
    1132             :           } else {
    1133             :             AddRawLiteralChar('\n');
    1134             :           }
    1135             :         }
    1136             :       } else {
    1137       31748 :         bool success = ScanEscape<capture_raw, in_template_literal>();
    1138             :         USE(success);
    1139             :         DCHECK_EQ(!success, has_error());
    1140             :         // For templates, invalid escape sequence checking is handled in the
    1141             :         // parser.
    1142             :         scanner_error_state.MoveErrorTo(&next_);
    1143             :         octal_error_state.MoveErrorTo(&next_);
    1144             :       }
    1145     1215421 :     } else if (c < 0) {
    1146             :       // Unterminated template literal
    1147             :       PushBack(c);
    1148             :       break;
    1149             :     } else {
    1150             :       // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
    1151             :       // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
    1152             :       // consisting of the CV 0x000A.
    1153     1213602 :       if (c == '\r') {
    1154             :         ReduceRawLiteralLength(1);  // Remove \r
    1155         711 :         if (c0_ == '\n') {
    1156         648 :           Advance<capture_raw>();  // Adds \n
    1157             :         } else {
    1158             :           AddRawLiteralChar('\n');
    1159             :         }
    1160             :         c = '\n';
    1161             :       }
    1162             :       AddLiteralChar(c);
    1163             :     }
    1164             :   }
    1165             :   literal.Complete();
    1166      168651 :   next_.location.end_pos = source_pos();
    1167      168651 :   next_.token = result;
    1168      168651 :   next_.contextual_token = Token::UNINITIALIZED;
    1169             : 
    1170      168651 :   return result;
    1171             : }
    1172             : 
    1173             : 
    1174       88967 : Token::Value Scanner::ScanTemplateStart() {
    1175             :   DCHECK(next_next_.token == Token::UNINITIALIZED);
    1176             :   DCHECK(c0_ == '`');
    1177       88967 :   next_.location.beg_pos = source_pos();
    1178       88967 :   Advance();  // Consume `
    1179       88967 :   return ScanTemplateSpan();
    1180             : }
    1181             : 
    1182             : 
    1183       79684 : Token::Value Scanner::ScanTemplateContinuation() {
    1184             :   DCHECK_EQ(next_.token, Token::RBRACE);
    1185       79684 :   next_.location.beg_pos = source_pos() - 1;  // We already consumed }
    1186       79684 :   return ScanTemplateSpan();
    1187             : }
    1188             : 
    1189     2263498 : Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
    1190             :   Handle<String> tmp;
    1191     4526996 :   if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate);
    1192     2263498 :   return tmp;
    1193             : }
    1194             : 
    1195     2263497 : Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const {
    1196             :   Handle<String> tmp;
    1197     4526994 :   if (source_mapping_url_.length() > 0)
    1198         133 :     tmp = source_mapping_url_.Internalize(isolate);
    1199     2263497 :   return tmp;
    1200             : }
    1201             : 
    1202           0 : void Scanner::ScanDecimalDigits() {
    1203    13915776 :   while (IsDecimalDigit(c0_))
    1204     2812865 :     AddLiteralCharAdvance();
    1205           0 : }
    1206             : 
    1207             : 
    1208   215682097 : Token::Value Scanner::ScanNumber(bool seen_period) {
    1209             :   DCHECK(IsDecimalDigit(c0_));  // the first digit of the number or the fraction
    1210             : 
    1211             :   enum {
    1212             :     DECIMAL,
    1213             :     DECIMAL_WITH_LEADING_ZERO,
    1214             :     HEX,
    1215             :     OCTAL,
    1216             :     IMPLICIT_OCTAL,
    1217             :     BINARY
    1218             :   } kind = DECIMAL;
    1219             : 
    1220             :   LiteralScope literal(this);
    1221    40203956 :   bool at_start = !seen_period;
    1222             :   int start_pos = source_pos();  // For reporting octal positions.
    1223    40203956 :   if (seen_period) {
    1224             :     // we have already seen a decimal point of the float
    1225             :     AddLiteralChar('.');
    1226             :     ScanDecimalDigits();  // we know we have at least one digit
    1227             : 
    1228             :   } else {
    1229             :     // if the first character is '0' we must check for octals and hex
    1230    40198872 :     if (c0_ == '0') {
    1231    16195744 :       AddLiteralCharAdvance();
    1232             : 
    1233             :       // either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
    1234             :       // an octal number.
    1235    16195745 :       if (c0_ == 'x' || c0_ == 'X') {
    1236             :         // hex number
    1237             :         kind = HEX;
    1238      213945 :         AddLiteralCharAdvance();
    1239      427890 :         if (!IsHexDigit(c0_)) {
    1240             :           // we must have at least one hex digit after 'x'/'X'
    1241             :           return Token::ILLEGAL;
    1242             :         }
    1243     1658768 :         while (IsHexDigit(c0_)) {
    1244      615442 :           AddLiteralCharAdvance();
    1245             :         }
    1246    15981800 :       } else if (c0_ == 'o' || c0_ == 'O') {
    1247             :         kind = OCTAL;
    1248          86 :         AddLiteralCharAdvance();
    1249         172 :         if (!IsOctalDigit(c0_)) {
    1250             :           // we must have at least one octal digit after 'o'/'O'
    1251             :           return Token::ILLEGAL;
    1252             :         }
    1253         412 :         while (IsOctalDigit(c0_)) {
    1254         120 :           AddLiteralCharAdvance();
    1255             :         }
    1256    15981714 :       } else if (c0_ == 'b' || c0_ == 'B') {
    1257             :         kind = BINARY;
    1258          72 :         AddLiteralCharAdvance();
    1259         144 :         if (!IsBinaryDigit(c0_)) {
    1260             :           // we must have at least one binary digit after 'b'/'B'
    1261             :           return Token::ILLEGAL;
    1262             :         }
    1263         356 :         while (IsBinaryDigit(c0_)) {
    1264         106 :           AddLiteralCharAdvance();
    1265             :         }
    1266    16162894 :       } else if ('0' <= c0_ && c0_ <= '7') {
    1267             :         // (possible) octal number
    1268             :         kind = IMPLICIT_OCTAL;
    1269             :         while (true) {
    1270      361817 :           if (c0_ == '8' || c0_ == '9') {
    1271             :             at_start = false;
    1272             :             kind = DECIMAL_WITH_LEADING_ZERO;
    1273             :             break;
    1274             :           }
    1275      361808 :           if (c0_  < '0' || '7'  < c0_) {
    1276             :             // Octal literal finished.
    1277      180556 :             octal_pos_ = Location(start_pos, source_pos());
    1278      180556 :             octal_message_ = MessageTemplate::kStrictOctalLiteral;
    1279      180556 :             break;
    1280             :           }
    1281      181252 :           AddLiteralCharAdvance();
    1282             :         }
    1283    15801077 :       } else if (c0_ == '8' || c0_ == '9') {
    1284             :         kind = DECIMAL_WITH_LEADING_ZERO;
    1285             :       }
    1286             :     }
    1287             : 
    1288             :     // Parse decimal digits and allow trailing fractional part.
    1289    40198870 :     if (kind == DECIMAL || kind == DECIMAL_WITH_LEADING_ZERO) {
    1290    39804211 :       if (at_start) {
    1291             :         uint64_t value = 0;
    1292   214657054 :         while (IsDecimalDigit(c0_)) {
    1293    67524317 :           value = 10 * value + (c0_ - '0');
    1294             : 
    1295             :           uc32 first_char = c0_;
    1296             :           Advance<false, false>();
    1297             :           AddLiteralChar(first_char);
    1298             :         }
    1299             : 
    1300   119361345 :         if (next_.literal_chars->one_byte_literal().length() <= 10 &&
    1301    79411396 :             value <= Smi::kMaxValue && c0_ != '.' &&
    1302    75072820 :             (c0_ == kEndOfInput || !unicode_cache_->IsIdentifierStart(c0_))) {
    1303    37658039 :           next_.smi_value_ = static_cast<uint32_t>(value);
    1304             :           literal.Complete();
    1305    37658039 :           HandleLeadSurrogate();
    1306             : 
    1307    37658039 :           if (kind == DECIMAL_WITH_LEADING_ZERO) {
    1308       44977 :             octal_pos_ = Location(start_pos, source_pos());
    1309       44977 :             octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
    1310             :           }
    1311             :           return Token::SMI;
    1312             :         }
    1313     2146175 :         HandleLeadSurrogate();
    1314             :       }
    1315             : 
    1316             :       ScanDecimalDigits();  // optional
    1317     2146184 :       if (c0_ == '.') {
    1318     1969887 :         AddLiteralCharAdvance();
    1319             :         ScanDecimalDigits();  // optional
    1320             :       }
    1321             :     }
    1322             :   }
    1323             : 
    1324             :   // scan exponent, if any
    1325     2545927 :   if (c0_ == 'e' || c0_ == 'E') {
    1326             :     DCHECK(kind != HEX);  // 'e'/'E' must be scanned as part of the hex number
    1327       23913 :     if (!(kind == DECIMAL || kind == DECIMAL_WITH_LEADING_ZERO))
    1328             :       return Token::ILLEGAL;
    1329             :     // scan exponent
    1330       23913 :     AddLiteralCharAdvance();
    1331       23913 :     if (c0_ == '+' || c0_ == '-')
    1332       13191 :       AddLiteralCharAdvance();
    1333       47816 :     if (!IsDecimalDigit(c0_)) {
    1334             :       // we must have at least one decimal digit after 'e'/'E'
    1335             :       return Token::ILLEGAL;
    1336             :     }
    1337             :     ScanDecimalDigits();
    1338             :   }
    1339             : 
    1340             :   // The source character immediately following a numeric literal must
    1341             :   // not be an identifier start or a decimal digit; see ECMA-262
    1342             :   // section 7.8.3, page 17 (note that we read only one decimal digit
    1343             :   // if the value is 0).
    1344     7637646 :   if (IsDecimalDigit(c0_) ||
    1345     5049756 :       (c0_ != kEndOfInput && unicode_cache_->IsIdentifierStart(c0_)))
    1346             :     return Token::ILLEGAL;
    1347             : 
    1348             :   literal.Complete();
    1349             : 
    1350     2545132 :   if (kind == DECIMAL_WITH_LEADING_ZERO) {
    1351           9 :     octal_pos_ = Location(start_pos, source_pos());
    1352           9 :     octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
    1353             :   }
    1354             :   return Token::NUMBER;
    1355             : }
    1356             : 
    1357             : 
    1358       19299 : uc32 Scanner::ScanIdentifierUnicodeEscape() {
    1359       19299 :   Advance();
    1360       19299 :   if (c0_ != 'u') return -1;
    1361       18900 :   Advance();
    1362       18900 :   return ScanUnicodeEscape<false>();
    1363             : }
    1364             : 
    1365             : 
    1366             : template <bool capture_raw>
    1367      122834 : uc32 Scanner::ScanUnicodeEscape() {
    1368             :   // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of
    1369             :   // hex digits between { } is arbitrary. \ and u have already been read.
    1370       99647 :   if (c0_ == '{') {
    1371       35784 :     int begin = source_pos() - 2;
    1372       35784 :     Advance<capture_raw>();
    1373       35784 :     uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff, begin);
    1374       35784 :     if (cp < 0 || c0_ != '}') {
    1375             :       ReportScannerError(source_pos(),
    1376             :                          MessageTemplate::kInvalidUnicodeEscapeSequence);
    1377             :       return -1;
    1378             :     }
    1379       23400 :     Advance<capture_raw>();
    1380       23400 :     return cp;
    1381             :   }
    1382             :   const bool unicode = true;
    1383       63863 :   return ScanHexNumber<capture_raw, unicode>(4);
    1384             : }
    1385             : 
    1386             : 
    1387             : // ----------------------------------------------------------------------------
    1388             : // Keyword Matcher
    1389             : 
    1390             : #define KEYWORDS(KEYWORD_GROUP, KEYWORD)                    \
    1391             :   KEYWORD_GROUP('a')                                        \
    1392             :   KEYWORD("arguments", Token::ARGUMENTS)                    \
    1393             :   KEYWORD("as", Token::AS)                                  \
    1394             :   KEYWORD("async", Token::ASYNC)                            \
    1395             :   KEYWORD("await", Token::AWAIT)                            \
    1396             :   KEYWORD("anonymous", Token::ANONYMOUS)                    \
    1397             :   KEYWORD_GROUP('b')                                        \
    1398             :   KEYWORD("break", Token::BREAK)                            \
    1399             :   KEYWORD_GROUP('c')                                        \
    1400             :   KEYWORD("case", Token::CASE)                              \
    1401             :   KEYWORD("catch", Token::CATCH)                            \
    1402             :   KEYWORD("class", Token::CLASS)                            \
    1403             :   KEYWORD("const", Token::CONST)                            \
    1404             :   KEYWORD("constructor", Token::CONSTRUCTOR)                \
    1405             :   KEYWORD("continue", Token::CONTINUE)                      \
    1406             :   KEYWORD_GROUP('d')                                        \
    1407             :   KEYWORD("debugger", Token::DEBUGGER)                      \
    1408             :   KEYWORD("default", Token::DEFAULT)                        \
    1409             :   KEYWORD("delete", Token::DELETE)                          \
    1410             :   KEYWORD("do", Token::DO)                                  \
    1411             :   KEYWORD_GROUP('e')                                        \
    1412             :   KEYWORD("else", Token::ELSE)                              \
    1413             :   KEYWORD("enum", Token::ENUM)                              \
    1414             :   KEYWORD("eval", Token::EVAL)                              \
    1415             :   KEYWORD("export", Token::EXPORT)                          \
    1416             :   KEYWORD("extends", Token::EXTENDS)                        \
    1417             :   KEYWORD_GROUP('f')                                        \
    1418             :   KEYWORD("false", Token::FALSE_LITERAL)                    \
    1419             :   KEYWORD("finally", Token::FINALLY)                        \
    1420             :   KEYWORD("for", Token::FOR)                                \
    1421             :   KEYWORD("from", Token::FROM)                              \
    1422             :   KEYWORD("function", Token::FUNCTION)                      \
    1423             :   KEYWORD_GROUP('g')                                        \
    1424             :   KEYWORD("get", Token::GET)                                \
    1425             :   KEYWORD_GROUP('i')                                        \
    1426             :   KEYWORD("if", Token::IF)                                  \
    1427             :   KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
    1428             :   KEYWORD("import", Token::IMPORT)                          \
    1429             :   KEYWORD("in", Token::IN)                                  \
    1430             :   KEYWORD("instanceof", Token::INSTANCEOF)                  \
    1431             :   KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD)  \
    1432             :   KEYWORD_GROUP('l')                                        \
    1433             :   KEYWORD("let", Token::LET)                                \
    1434             :   KEYWORD_GROUP('n')                                        \
    1435             :   KEYWORD("name", Token::NAME)                              \
    1436             :   KEYWORD("new", Token::NEW)                                \
    1437             :   KEYWORD("null", Token::NULL_LITERAL)                      \
    1438             :   KEYWORD_GROUP('o')                                        \
    1439             :   KEYWORD("of", Token::OF)                                  \
    1440             :   KEYWORD_GROUP('p')                                        \
    1441             :   KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD)    \
    1442             :   KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD)    \
    1443             :   KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD)  \
    1444             :   KEYWORD("prototype", Token::PROTOTYPE)                    \
    1445             :   KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD)     \
    1446             :   KEYWORD_GROUP('r')                                        \
    1447             :   KEYWORD("return", Token::RETURN)                          \
    1448             :   KEYWORD_GROUP('s')                                        \
    1449             :   KEYWORD("sent", Token::SENT)                              \
    1450             :   KEYWORD("set", Token::SET)                                \
    1451             :   KEYWORD("static", Token::STATIC)                          \
    1452             :   KEYWORD("super", Token::SUPER)                            \
    1453             :   KEYWORD("switch", Token::SWITCH)                          \
    1454             :   KEYWORD_GROUP('t')                                        \
    1455             :   KEYWORD("target", Token::TARGET)                          \
    1456             :   KEYWORD("this", Token::THIS)                              \
    1457             :   KEYWORD("throw", Token::THROW)                            \
    1458             :   KEYWORD("true", Token::TRUE_LITERAL)                      \
    1459             :   KEYWORD("try", Token::TRY)                                \
    1460             :   KEYWORD("typeof", Token::TYPEOF)                          \
    1461             :   KEYWORD_GROUP('u')                                        \
    1462             :   KEYWORD("undefined", Token::UNDEFINED)                    \
    1463             :   KEYWORD_GROUP('v')                                        \
    1464             :   KEYWORD("var", Token::VAR)                                \
    1465             :   KEYWORD("void", Token::VOID)                              \
    1466             :   KEYWORD_GROUP('w')                                        \
    1467             :   KEYWORD("while", Token::WHILE)                            \
    1468             :   KEYWORD("with", Token::WITH)                              \
    1469             :   KEYWORD_GROUP('y')                                        \
    1470             :   KEYWORD("yield", Token::YIELD)                            \
    1471             :   KEYWORD_GROUP('_')                                        \
    1472             :   KEYWORD("__proto__", Token::PROTO_UNDERSCORED)
    1473             : 
    1474   145769407 : static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
    1475             :                                              int input_length) {
    1476             :   DCHECK(input_length >= 1);
    1477             :   const int kMinLength = 2;
    1478             :   const int kMaxLength = 11;
    1479   145769407 :   if (input_length < kMinLength || input_length > kMaxLength) {
    1480             :     return Token::IDENTIFIER;
    1481             :   }
    1482   120043692 :   switch (input[0]) {
    1483             :     default:
    1484             : #define KEYWORD_GROUP_CASE(ch)                                \
    1485             :       break;                                                  \
    1486             :     case ch:
    1487             : #define KEYWORD(keyword, token)                                           \
    1488             :   {                                                                       \
    1489             :     /* 'keyword' is a char array, so sizeof(keyword) is */                \
    1490             :     /* strlen(keyword) plus 1 for the NUL char. */                        \
    1491             :     const int keyword_length = sizeof(keyword) - 1;                       \
    1492             :     STATIC_ASSERT(keyword_length >= kMinLength);                          \
    1493             :     STATIC_ASSERT(keyword_length <= kMaxLength);                          \
    1494             :     DCHECK_EQ(input[0], keyword[0]);                                      \
    1495             :     DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD ||                 \
    1496             :            0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \
    1497             :     if (input_length == keyword_length && input[1] == keyword[1] &&       \
    1498             :         (keyword_length <= 2 || input[2] == keyword[2]) &&                \
    1499             :         (keyword_length <= 3 || input[3] == keyword[3]) &&                \
    1500             :         (keyword_length <= 4 || input[4] == keyword[4]) &&                \
    1501             :         (keyword_length <= 5 || input[5] == keyword[5]) &&                \
    1502             :         (keyword_length <= 6 || input[6] == keyword[6]) &&                \
    1503             :         (keyword_length <= 7 || input[7] == keyword[7]) &&                \
    1504             :         (keyword_length <= 8 || input[8] == keyword[8]) &&                \
    1505             :         (keyword_length <= 9 || input[9] == keyword[9]) &&                \
    1506             :         (keyword_length <= 10 || input[10] == keyword[10])) {             \
    1507             :       return token;                                                       \
    1508             :     }                                                                     \
    1509             :   }
    1510     3532070 :       KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
    1511             :   }
    1512    57724097 :   return Token::IDENTIFIER;
    1513             : }
    1514             : 
    1515             : 
    1516  2644154333 : Token::Value Scanner::ScanIdentifierOrKeyword() {
    1517             :   DCHECK(unicode_cache_->IsIdentifierStart(c0_));
    1518             :   LiteralScope literal(this);
    1519   400333066 :   if (IsInRange(c0_, 'a', 'z') || c0_ == '_') {
    1520   835382431 :     do {
    1521   835382082 :       char first_char = static_cast<char>(c0_);
    1522             :       Advance<false, false>();
    1523             :       AddLiteralChar(first_char);
    1524  1670764862 :     } while (IsInRange(c0_, 'a', 'z') || c0_ == '_');
    1525             : 
    1526   350490144 :     if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' ||
    1527             :         c0_ == '$') {
    1528             :       // Identifier starting with lowercase.
    1529    33957969 :       char first_char = static_cast<char>(c0_);
    1530             :       Advance<false, false>();
    1531             :       AddLiteralChar(first_char);
    1532   396202960 :       while (IsAsciiIdentifier(c0_)) {
    1533   164143506 :         char first_char = static_cast<char>(c0_);
    1534             :         Advance<false, false>();
    1535             :         AddLiteralChar(first_char);
    1536             :       }
    1537    33957974 :       if (c0_ <= kMaxAscii && c0_ != '\\') {
    1538             :         literal.Complete();
    1539    33957977 :         return Token::IDENTIFIER;
    1540             :       }
    1541   145767976 :     } else if (c0_ <= kMaxAscii && c0_ != '\\') {
    1542             :       // Only a-z+ or _: could be a keyword or identifier.
    1543   145752351 :       Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
    1544             :       Token::Value token =
    1545   145751719 :           KeywordOrIdentifierToken(chars.start(), chars.length());
    1546   291503492 :       if (token == Token::IDENTIFIER ||
    1547   208017312 :           token == Token::FUTURE_STRICT_RESERVED_WORD ||
    1548             :           Token::IsContextualKeyword(token))
    1549             :         literal.Complete();
    1550             :       return token;
    1551             :     }
    1552             : 
    1553       16254 :     HandleLeadSurrogate();
    1554    20440937 :   } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') {
    1555   188508276 :     do {
    1556   188508269 :       char first_char = static_cast<char>(c0_);
    1557             :       Advance<false, false>();
    1558             :       AddLiteralChar(first_char);
    1559   188508276 :     } while (IsAsciiIdentifier(c0_));
    1560             : 
    1561    20437429 :     if (c0_ <= kMaxAscii && c0_ != '\\') {
    1562             :       literal.Complete();
    1563    20437361 :       return Token::IDENTIFIER;
    1564             :     }
    1565             : 
    1566          68 :     HandleLeadSurrogate();
    1567        3515 :   } else if (c0_ == '\\') {
    1568             :     // Scan identifier start character.
    1569        3363 :     uc32 c = ScanIdentifierUnicodeEscape();
    1570             :     // Only allow legal identifier start characters.
    1571       10089 :     if (c < 0 ||
    1572        9636 :         c == '\\' ||  // No recursive escapes.
    1573        2910 :         !unicode_cache_->IsIdentifierStart(c)) {
    1574             :       return Token::ILLEGAL;
    1575             :     }
    1576             :     AddLiteralChar(c);
    1577        2851 :     return ScanIdentifierSuffix(&literal, true);
    1578             :   } else {
    1579             :     uc32 first_char = c0_;
    1580         152 :     Advance();
    1581             :     AddLiteralChar(first_char);
    1582             :   }
    1583             : 
    1584             :   // Scan the rest of the identifier characters.
    1585       34539 :   while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
    1586       16710 :     if (c0_ != '\\') {
    1587             :       uc32 next_char = c0_;
    1588         854 :       Advance();
    1589             :       AddLiteralChar(next_char);
    1590             :       continue;
    1591             :     }
    1592             :     // Fallthrough if no longer able to complete keyword.
    1593       15856 :     return ScanIdentifierSuffix(&literal, false);
    1594             :   }
    1595             : 
    1596        1264 :   if (next_.literal_chars->is_one_byte()) {
    1597             :     Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
    1598             :     Token::Value token =
    1599          51 :         KeywordOrIdentifierToken(chars.start(), chars.length());
    1600         102 :     if (token == Token::IDENTIFIER ||
    1601          51 :         token == Token::FUTURE_STRICT_RESERVED_WORD ||
    1602             :         Token::IsContextualKeyword(token))
    1603             :       literal.Complete();
    1604             :     return token;
    1605             :   }
    1606             :   literal.Complete();
    1607         581 :   return Token::IDENTIFIER;
    1608             : }
    1609             : 
    1610             : 
    1611       18707 : Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,
    1612       63592 :                                            bool escaped) {
    1613             :   // Scan the rest of the identifier characters.
    1614      182939 :   while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
    1615       64292 :     if (c0_ == '\\') {
    1616       15936 :       uc32 c = ScanIdentifierUnicodeEscape();
    1617             :       escaped = true;
    1618             :       // Only allow legal identifier part characters.
    1619       47808 :       if (c < 0 ||
    1620       47136 :           c == '\\' ||
    1621       15264 :           !unicode_cache_->IsIdentifierPart(c)) {
    1622             :         return Token::ILLEGAL;
    1623             :       }
    1624             :       AddLiteralChar(c);
    1625             :     } else {
    1626             :       AddLiteralChar(c0_);
    1627       48356 :       Advance();
    1628             :     }
    1629             :   }
    1630             :   literal->Complete();
    1631             : 
    1632       18007 :   if (escaped && next_.literal_chars->is_one_byte()) {
    1633       17592 :     Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
    1634             :     Token::Value token =
    1635       17592 :         KeywordOrIdentifierToken(chars.start(), chars.length());
    1636             :     /* TODO(adamk): YIELD should be handled specially. */
    1637       34102 :     if (token == Token::IDENTIFIER || Token::IsContextualKeyword(token)) {
    1638             :       return token;
    1639       32300 :     } else if (token == Token::FUTURE_STRICT_RESERVED_WORD ||
    1640       28204 :                token == Token::LET || token == Token::STATIC) {
    1641             :       return Token::ESCAPED_STRICT_RESERVED_WORD;
    1642             :     } else {
    1643        9510 :       return Token::ESCAPED_KEYWORD;
    1644             :     }
    1645             :   }
    1646             :   return Token::IDENTIFIER;
    1647             : }
    1648             : 
    1649      259362 : bool Scanner::ScanRegExpPattern() {
    1650             :   DCHECK(next_next_.token == Token::UNINITIALIZED);
    1651             :   DCHECK(next_.token == Token::DIV || next_.token == Token::ASSIGN_DIV);
    1652             : 
    1653             :   // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
    1654             :   bool in_character_class = false;
    1655      129681 :   bool seen_equal = (next_.token == Token::ASSIGN_DIV);
    1656             : 
    1657             :   // Previous token is either '/' or '/=', in the second case, the
    1658             :   // pattern starts at =.
    1659      129681 :   next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
    1660      129681 :   next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
    1661             : 
    1662             :   // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
    1663             :   // the scanner should pass uninterpreted bodies to the RegExp
    1664             :   // constructor.
    1665             :   LiteralScope literal(this);
    1666      129681 :   if (seen_equal) {
    1667             :     AddLiteralChar('=');
    1668             :   }
    1669             : 
    1670     1243318 :   while (c0_ != '/' || in_character_class) {
    1671     2227575 :     if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
    1672             :       return false;
    1673     1113643 :     if (c0_ == '\\') {  // Escape sequence.
    1674      148205 :       AddLiteralCharAdvance();
    1675      296404 :       if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
    1676             :         return false;
    1677      148199 :       AddLiteralCharAdvance();
    1678             :       // If the escape allows more characters, i.e., \x??, \u????, or \c?,
    1679             :       // only "safe" characters are allowed (letters, digits, underscore),
    1680             :       // otherwise the escape isn't valid and the invalid character has
    1681             :       // its normal meaning. I.e., we can just continue scanning without
    1682             :       // worrying whether the following characters are part of the escape
    1683             :       // or not, since any '/', '\\' or '[' is guaranteed to not be part
    1684             :       // of the escape sequence.
    1685             : 
    1686             :       // TODO(896): At some point, parse RegExps more throughly to capture
    1687             :       // octal esacpes in strict mode.
    1688             :     } else {  // Unescaped character.
    1689      965438 :       if (c0_ == '[') in_character_class = true;
    1690      965438 :       if (c0_ == ']') in_character_class = false;
    1691      965438 :       AddLiteralCharAdvance();
    1692             :     }
    1693             :   }
    1694      129386 :   Advance();  // consume '/'
    1695             : 
    1696             :   literal.Complete();
    1697      129386 :   next_.token = Token::REGEXP_LITERAL;
    1698      129386 :   next_.contextual_token = Token::UNINITIALIZED;
    1699      129386 :   return true;
    1700             : }
    1701             : 
    1702             : 
    1703      257553 : Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
    1704             :   DCHECK(next_.token == Token::REGEXP_LITERAL);
    1705             : 
    1706             :   // Scan regular expression flags.
    1707             :   int flags = 0;
    1708      547139 :   while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
    1709             :     RegExp::Flags flag = RegExp::kNone;
    1710       81710 :     switch (c0_) {
    1711             :       case 'g':
    1712             :         flag = RegExp::kGlobal;
    1713             :         break;
    1714             :       case 'i':
    1715             :         flag = RegExp::kIgnoreCase;
    1716        5495 :         break;
    1717             :       case 'm':
    1718             :         flag = RegExp::kMultiline;
    1719        8703 :         break;
    1720             :       case 's':
    1721          56 :         if (FLAG_harmony_regexp_dotall) {
    1722             :           flag = RegExp::kDotAll;
    1723             :         } else {
    1724             :           return Nothing<RegExp::Flags>();
    1725             :         }
    1726             :         break;
    1727             :       case 'u':
    1728             :         flag = RegExp::kUnicode;
    1729       23894 :         break;
    1730             :       case 'y':
    1731             :         flag = RegExp::kSticky;
    1732         100 :         break;
    1733             :       default:
    1734             :         return Nothing<RegExp::Flags>();
    1735             :     }
    1736       80964 :     if (flags & flag) {
    1737             :       return Nothing<RegExp::Flags>();
    1738             :     }
    1739       80755 :     Advance();
    1740       80755 :     flags |= flag;
    1741             :   }
    1742             : 
    1743      128299 :   next_.location.end_pos = source_pos();
    1744      128299 :   return Just(RegExp::Flags(flags));
    1745             : }
    1746             : 
    1747   142597687 : const AstRawString* Scanner::CurrentSymbol(
    1748   142597687 :     AstValueFactory* ast_value_factory) const {
    1749   142597687 :   if (is_literal_one_byte()) {
    1750   142534818 :     return ast_value_factory->GetOneByteString(literal_one_byte_string());
    1751             :   }
    1752       63043 :   return ast_value_factory->GetTwoByteString(literal_two_byte_string());
    1753             : }
    1754             : 
    1755       98348 : const AstRawString* Scanner::NextSymbol(
    1756       98348 :     AstValueFactory* ast_value_factory) const {
    1757       98348 :   if (is_next_literal_one_byte()) {
    1758       95229 :     return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
    1759             :   }
    1760        3119 :   return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
    1761             : }
    1762             : 
    1763       70792 : const AstRawString* Scanner::CurrentRawSymbol(
    1764       70792 :     AstValueFactory* ast_value_factory) const {
    1765       70792 :   if (is_raw_literal_one_byte()) {
    1766       70763 :     return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());
    1767             :   }
    1768          29 :   return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());
    1769             : }
    1770             : 
    1771             : 
    1772     2562778 : double Scanner::DoubleValue() {
    1773             :   DCHECK(is_literal_one_byte());
    1774             :   return StringToDouble(
    1775             :       unicode_cache_,
    1776             :       literal_one_byte_string(),
    1777     2562778 :       ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
    1778             : }
    1779             : 
    1780             : 
    1781     1745359 : bool Scanner::ContainsDot() {
    1782             :   DCHECK(is_literal_one_byte());
    1783             :   Vector<const uint8_t> str = literal_one_byte_string();
    1784     3490718 :   return std::find(str.begin(), str.end(), '.') != str.end();
    1785             : }
    1786             : 
    1787    11242565 : bool Scanner::IsDuplicateSymbol(DuplicateFinder* duplicate_finder,
    1788             :                                 AstValueFactory* ast_value_factory) const {
    1789             :   DCHECK_NOT_NULL(duplicate_finder);
    1790             :   DCHECK_NOT_NULL(ast_value_factory);
    1791    11242565 :   const AstRawString* string = CurrentSymbol(ast_value_factory);
    1792    22485132 :   return !duplicate_finder->known_symbols_.insert(string).second;
    1793             : }
    1794             : 
    1795         178 : void Scanner::SeekNext(size_t position) {
    1796             :   // Use with care: This cleanly resets most, but not all scanner state.
    1797             :   // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.
    1798             : 
    1799             :   // To re-scan from a given character position, we need to:
    1800             :   // 1, Reset the current_, next_ and next_next_ tokens
    1801             :   //    (next_ + next_next_ will be overwrittem by Next(),
    1802             :   //     current_ will remain unchanged, so overwrite it fully.)
    1803             :   current_ = {{0, 0},
    1804             :               nullptr,
    1805             :               nullptr,
    1806             :               0,
    1807             :               Token::UNINITIALIZED,
    1808             :               MessageTemplate::kNone,
    1809             :               {0, 0},
    1810         178 :               Token::UNINITIALIZED};
    1811         178 :   next_.token = Token::UNINITIALIZED;
    1812         178 :   next_.contextual_token = Token::UNINITIALIZED;
    1813         178 :   next_next_.token = Token::UNINITIALIZED;
    1814         178 :   next_next_.contextual_token = Token::UNINITIALIZED;
    1815             :   // 2, reset the source to the desired position,
    1816         178 :   source_->Seek(position);
    1817             :   // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
    1818         178 :   c0_ = source_->Advance();
    1819         178 :   Next();
    1820             :   DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));
    1821         178 : }
    1822             : 
    1823             : }  // namespace internal
    1824             : }  // namespace v8

Generated by: LCOV version 1.10