LCOV - code coverage report
Current view: top level - src/parsing - scanner.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 404 411 98.3 %
Date: 2019-02-19 Functions: 58 61 95.1 %

          Line data    Source code
       1             : // Copyright 2011 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : // Features shared by parsing and pre-parsing scanners.
       6             : 
       7             : #include "src/parsing/scanner.h"
       8             : 
       9             : #include <stdint.h>
      10             : 
      11             : #include <cmath>
      12             : 
      13             : #include "src/ast/ast-value-factory.h"
      14             : #include "src/conversions-inl.h"
      15             : #include "src/objects/bigint.h"
      16             : #include "src/parsing/scanner-inl.h"
      17             : #include "src/zone/zone.h"
      18             : 
      19             : namespace v8 {
      20             : namespace internal {
      21             : 
      22             : class Scanner::ErrorState {
      23             :  public:
      24             :   ErrorState(MessageTemplate* message_stack, Scanner::Location* location_stack)
      25             :       : message_stack_(message_stack),
      26             :         old_message_(*message_stack),
      27             :         location_stack_(location_stack),
      28      308328 :         old_location_(*location_stack) {
      29      308328 :     *message_stack_ = MessageTemplate::kNone;
      30      308328 :     *location_stack_ = Location::invalid();
      31             :   }
      32             : 
      33             :   ~ErrorState() {
      34      308406 :     *message_stack_ = old_message_;
      35      308406 :     *location_stack_ = old_location_;
      36             :   }
      37             : 
      38             :   void MoveErrorTo(TokenDesc* dest) {
      39       30030 :     if (*message_stack_ == MessageTemplate::kNone) {
      40             :       return;
      41             :     }
      42       13078 :     if (dest->invalid_template_escape_message == MessageTemplate::kNone) {
      43       13078 :       dest->invalid_template_escape_message = *message_stack_;
      44       13078 :       dest->invalid_template_escape_location = *location_stack_;
      45             :     }
      46       13078 :     *message_stack_ = MessageTemplate::kNone;
      47       13078 :     *location_stack_ = Location::invalid();
      48             :   }
      49             : 
      50             :  private:
      51             :   MessageTemplate* const message_stack_;
      52             :   MessageTemplate const old_message_;
      53             :   Scanner::Location* const location_stack_;
      54             :   Scanner::Location const old_location_;
      55             : };
      56             : 
      57             : // ----------------------------------------------------------------------------
      58             : // Scanner::LiteralBuffer
      59             : 
      60        3903 : Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
      61        3903 :   if (is_one_byte()) {
      62        3903 :     return isolate->factory()->InternalizeOneByteString(one_byte_literal());
      63             :   }
      64           0 :   return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
      65             : }
      66             : 
      67           0 : int Scanner::LiteralBuffer::NewCapacity(int min_capacity) {
      68             :   return min_capacity < (kMaxGrowth / (kGrowthFactor - 1))
      69             :              ? min_capacity * kGrowthFactor
      70     5743276 :              : min_capacity + kMaxGrowth;
      71             : }
      72             : 
      73     5740957 : void Scanner::LiteralBuffer::ExpandBuffer() {
      74     5886313 :   int min_capacity = Max(kInitialCapacity, backing_store_.length());
      75             :   Vector<byte> new_store = Vector<byte>::New(NewCapacity(min_capacity));
      76     5741282 :   if (position_ > 0) {
      77      145356 :     MemCopy(new_store.start(), backing_store_.start(), position_);
      78             :   }
      79             :   backing_store_.Dispose();
      80     5741285 :   backing_store_ = new_store;
      81     5741285 : }
      82             : 
      83       77019 : void Scanner::LiteralBuffer::ConvertToTwoByte() {
      84             :   DCHECK(is_one_byte());
      85             :   Vector<byte> new_store;
      86       77019 :   int new_content_size = position_ * kUC16Size;
      87      308086 :   if (new_content_size >= backing_store_.length()) {
      88             :     // Ensure room for all currently read code units as UC16 as well
      89             :     // as the code unit about to be stored.
      90             :     new_store = Vector<byte>::New(NewCapacity(new_content_size));
      91             :   } else {
      92       74700 :     new_store = backing_store_;
      93             :   }
      94             :   uint8_t* src = backing_store_.start();
      95             :   uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
      96      192549 :   for (int i = position_ - 1; i >= 0; i--) {
      97      115525 :     dst[i] = src[i];
      98             :   }
      99       77024 :   if (new_store.start() != backing_store_.start()) {
     100             :     backing_store_.Dispose();
     101        2319 :     backing_store_ = new_store;
     102             :   }
     103       77024 :   position_ = new_content_size;
     104       77024 :   is_one_byte_ = false;
     105       77024 : }
     106             : 
     107     1307150 : void Scanner::LiteralBuffer::AddTwoByteChar(uc32 code_unit) {
     108             :   DCHECK(!is_one_byte());
     109     3975549 :   if (position_ >= backing_store_.length()) ExpandBuffer();
     110     1308056 :   if (code_unit <=
     111             :       static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
     112     2562920 :     *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
     113     1281460 :     position_ += kUC16Size;
     114             :   } else {
     115       26596 :     *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
     116       53192 :         unibrow::Utf16::LeadSurrogate(code_unit);
     117       26596 :     position_ += kUC16Size;
     118       26596 :     if (position_ >= backing_store_.length()) ExpandBuffer();
     119       26597 :     *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
     120       26597 :         unibrow::Utf16::TrailSurrogate(code_unit);
     121       26597 :     position_ += kUC16Size;
     122             :   }
     123     1308057 : }
     124             : 
     125             : // ----------------------------------------------------------------------------
     126             : // Scanner::BookmarkScope
     127             : 
     128             : const size_t Scanner::BookmarkScope::kNoBookmark =
     129             :     std::numeric_limits<size_t>::max() - 1;
     130             : const size_t Scanner::BookmarkScope::kBookmarkWasApplied =
     131             :     std::numeric_limits<size_t>::max();
     132             : 
     133     2510259 : void Scanner::BookmarkScope::Set(size_t position) {
     134             :   DCHECK_EQ(bookmark_, kNoBookmark);
     135     2510259 :   bookmark_ = position;
     136     2510259 : }
     137             : 
     138       48183 : void Scanner::BookmarkScope::Apply() {
     139             :   DCHECK(HasBeenSet());  // Caller hasn't called SetBookmark.
     140       48183 :   if (had_parser_error_) {
     141       48183 :     scanner_->set_parser_error();
     142             :   } else {
     143       48183 :     scanner_->reset_parser_error_flag();
     144       48183 :     scanner_->SeekNext(bookmark_);
     145             :   }
     146       48182 :   bookmark_ = kBookmarkWasApplied;
     147       48182 : }
     148             : 
     149           0 : bool Scanner::BookmarkScope::HasBeenSet() const {
     150           0 :   return bookmark_ != kNoBookmark && bookmark_ != kBookmarkWasApplied;
     151             : }
     152             : 
     153           0 : bool Scanner::BookmarkScope::HasBeenApplied() const {
     154           0 :   return bookmark_ == kBookmarkWasApplied;
     155             : }
     156             : 
     157             : // ----------------------------------------------------------------------------
     158             : // Scanner
     159             : 
     160     2959452 : Scanner::Scanner(Utf16CharacterStream* source, bool is_module)
     161             :     : source_(source),
     162             :       found_html_comment_(false),
     163             :       allow_harmony_numeric_separator_(false),
     164             :       is_module_(is_module),
     165             :       octal_pos_(Location::invalid()),
     166    14797266 :       octal_message_(MessageTemplate::kNone) {
     167             :   DCHECK_NOT_NULL(source);
     168     2959452 : }
     169             : 
     170     5918890 : void Scanner::Initialize() {
     171             :   // Need to capture identifiers in order to recognize "get" and "set"
     172             :   // in object literals.
     173     2959385 :   Init();
     174     2959505 :   next().after_line_terminator = true;
     175             :   Scan();
     176     2959492 : }
     177             : 
     178             : template <bool capture_raw, bool unicode>
     179    76631466 : uc32 Scanner::ScanHexNumber(int expected_length) {
     180             :   DCHECK_LE(expected_length, 4);  // prevent overflow
     181             : 
     182    25495531 :   int begin = source_pos() - 2;
     183             :   uc32 x = 0;
     184    76633690 :   for (int i = 0; i < expected_length; i++) {
     185    51149175 :     int d = HexValue(c0_);
     186    51149175 :     if (d < 0) {
     187             :       ReportScannerError(Location(begin, begin + expected_length + 2),
     188             :                          unicode
     189             :                              ? MessageTemplate::kInvalidUnicodeEscapeSequence
     190        6812 :                              : MessageTemplate::kInvalidHexEscapeSequence);
     191             :       return -1;
     192             :     }
     193    51142363 :     x = x * 16 + d;
     194        6428 :     Advance<capture_raw>();
     195             :   }
     196             : 
     197             :   return x;
     198             : }
     199             : 
     200             : template <bool capture_raw>
     201      217470 : uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
     202             :   uc32 x = 0;
     203       44583 :   int d = HexValue(c0_);
     204       44583 :   if (d < 0) return -1;
     205             : 
     206      219573 :   while (d >= 0) {
     207      179776 :     x = x * 16 + d;
     208      179776 :     if (x > max_value) {
     209             :       ReportScannerError(Location(beg_pos, source_pos() + 1),
     210             :                          MessageTemplate::kUndefinedUnicodeCodePoint);
     211             :       return -1;
     212             :     }
     213        6889 :     Advance<capture_raw>();
     214        6889 :     d = HexValue(c0_);
     215             :   }
     216             : 
     217             :   return x;
     218             : }
     219             : 
     220  1160868529 : Token::Value Scanner::Next() {
     221             :   // Rotate through tokens.
     222   386970098 :   TokenDesc* previous = current_;
     223   386970098 :   current_ = next_;
     224             :   // Either we already have the next token lined up, in which case next_next_
     225             :   // simply becomes next_. In that case we use current_ as new next_next_ and
     226             :   // clear its token to indicate that it wasn't scanned yet. Otherwise we use
     227             :   // current_ as next_ and scan into it, leaving next_next_ uninitialized.
     228   386970098 :   if (V8_LIKELY(next_next().token == Token::UNINITIALIZED)) {
     229   384370546 :     next_ = previous;
     230             :     // User 'previous' instead of 'next_' because for some reason the compiler
     231             :     // thinks 'next_' could be modified before the entry into Scan.
     232   384370546 :     previous->after_line_terminator = false;
     233             :     Scan(previous);
     234             :   } else {
     235     2599552 :     next_ = next_next_;
     236     2599552 :     next_next_ = previous;
     237     2599552 :     previous->token = Token::UNINITIALIZED;
     238             :     DCHECK_NE(Token::UNINITIALIZED, current().token);
     239             :   }
     240   386928333 :   return current().token;
     241             : }
     242             : 
     243     2084657 : Token::Value Scanner::PeekAhead() {
     244             :   DCHECK(next().token != Token::DIV);
     245             :   DCHECK(next().token != Token::ASSIGN_DIV);
     246             : 
     247     2084657 :   if (next_next().token != Token::UNINITIALIZED) {
     248             :     return next_next().token;
     249             :   }
     250     1913246 :   TokenDesc* temp = next_;
     251     1913246 :   next_ = next_next_;
     252     1913246 :   next().after_line_terminator = false;
     253             :   Scan();
     254     1913648 :   next_next_ = next_;
     255     1913648 :   next_ = temp;
     256     1913648 :   return next_next().token;
     257             : }
     258             : 
     259         198 : Token::Value Scanner::SkipSingleHTMLComment() {
     260         193 :   if (is_module_) {
     261             :     ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule);
     262             :     return Token::ILLEGAL;
     263             :   }
     264         188 :   return SkipSingleLineComment();
     265             : }
     266             : 
     267     7119478 : Token::Value Scanner::SkipSingleLineComment() {
     268             :   // The line terminator at the end of the line is not considered
     269             :   // to be part of the single-line comment; it is recognized
     270             :   // separately by the lexical grammar and becomes part of the
     271             :   // stream of input elements for the syntactic grammar (see
     272             :   // ECMA-262, section 7.4).
     273   364750189 :   AdvanceUntil([](uc32 c0_) { return unibrow::IsLineTerminator(c0_); });
     274             : 
     275     7124144 :   return Token::WHITESPACE;
     276             : }
     277             : 
     278        4617 : Token::Value Scanner::SkipSourceURLComment() {
     279        4005 :   TryToParseSourceURLComment();
     280        9553 :   while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
     281             :     Advance();
     282             :   }
     283             : 
     284        4005 :   return Token::WHITESPACE;
     285             : }
     286             : 
     287       88557 : void Scanner::TryToParseSourceURLComment() {
     288             :   // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this
     289             :   // function will just return if it cannot parse a magic comment.
     290             :   DCHECK(!IsWhiteSpaceOrLineTerminator(kEndOfInput));
     291        8077 :   if (!IsWhiteSpace(c0_)) return;
     292             :   Advance();
     293             :   LiteralBuffer name;
     294             :   name.Start();
     295             : 
     296      122320 :   while (c0_ != kEndOfInput && !IsWhiteSpaceOrLineTerminator(c0_) &&
     297       40760 :          c0_ != '=') {
     298       36784 :     name.AddChar(c0_);
     299             :     Advance();
     300             :   }
     301        3996 :   if (!name.is_one_byte()) return;
     302             :   Vector<const uint8_t> name_literal = name.one_byte_literal();
     303             :   LiteralBuffer* value;
     304        3996 :   if (name_literal == StaticCharVector("sourceURL")) {
     305        3838 :     value = &source_url_;
     306         158 :   } else if (name_literal == StaticCharVector("sourceMappingURL")) {
     307         130 :     value = &source_mapping_url_;
     308             :   } else {
     309             :     return;
     310             :   }
     311        3968 :   if (c0_ != '=')
     312             :     return;
     313             :   value->Start();
     314             :   Advance();
     315        7956 :   while (IsWhiteSpace(c0_)) {
     316             :     Advance();
     317             :   }
     318       83755 :   while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
     319             :     // Disallowed characters.
     320       39804 :     if (c0_ == '"' || c0_ == '\'') {
     321             :       value->Start();
     322             :       return;
     323             :     }
     324       39784 :     if (IsWhiteSpace(c0_)) {
     325             :       break;
     326             :     }
     327       39744 :     value->AddChar(c0_);
     328             :     Advance();
     329             :   }
     330             :   // Allow whitespace at the end.
     331        4317 :   while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
     332          70 :     if (!IsWhiteSpace(c0_)) {
     333             :       value->Start();
     334             :       break;
     335             :     }
     336             :     Advance();
     337             :   }
     338             : }
     339             : 
     340     6811485 : Token::Value Scanner::SkipMultiLineComment() {
     341             :   DCHECK_EQ(c0_, '*');
     342             :   Advance();
     343             : 
     344     6566198 :   while (c0_ != kEndOfInput) {
     345             :     DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
     346     6632279 :     if (!HasLineTerminatorBeforeNext() && unibrow::IsLineTerminator(c0_)) {
     347             :       // Following ECMA-262, section 7.4, a comment containing
     348             :       // a newline will make the comment count as a line-terminator.
     349         462 :       next().after_line_terminator = true;
     350             :     }
     351             : 
     352     6696868 :     while (V8_UNLIKELY(c0_ == '*')) {
     353             :       Advance();
     354      187842 :       if (c0_ == '/') {
     355             :         Advance();
     356       57315 :         return Token::WHITESPACE;
     357             :       }
     358             :     }
     359             :     Advance();
     360             :   }
     361             : 
     362             :   // Unterminated multi-line comment.
     363             :   return Token::ILLEGAL;
     364             : }
     365             : 
     366       25576 : void Scanner::SkipHashBang() {
     367       25576 :   if (c0_ == '#' && Peek() == '!' && source_pos() == 0) {
     368         400 :     SkipSingleLineComment();
     369             :     Scan();
     370             :   }
     371       20776 : }
     372             : 
     373         200 : Token::Value Scanner::ScanHtmlComment() {
     374             :   // Check for <!-- comments.
     375             :   DCHECK_EQ(c0_, '!');
     376             :   Advance();
     377         128 :   if (c0_ != '-' || Peek() != '-') {
     378             :     PushBack('!');  // undo Advance()
     379          33 :     return Token::LT;
     380             :   }
     381             :   Advance();
     382             : 
     383          39 :   found_html_comment_ = true;
     384          39 :   return SkipSingleHTMLComment();
     385             : }
     386             : 
     387             : #ifdef DEBUG
     388             : void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
     389             :   // Only TEMPLATE_* tokens can have a invalid_template_escape_message.
     390             :   // ILLEGAL and UNINITIALIZED can have garbage for the field.
     391             : 
     392             :   switch (token.token) {
     393             :     case Token::UNINITIALIZED:
     394             :     case Token::ILLEGAL:
     395             :       // token.literal_chars & other members might be garbage. That's ok.
     396             :     case Token::TEMPLATE_SPAN:
     397             :     case Token::TEMPLATE_TAIL:
     398             :       break;
     399             :     default:
     400             :       DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
     401             :       break;
     402             :   }
     403             : }
     404             : #endif  // DEBUG
     405             : 
     406      241651 : void Scanner::SeekForward(int pos) {
     407             :   // After this call, we will have the token at the given position as
     408             :   // the "next" token. The "current" token will be invalid.
     409      120838 :   if (pos == next().location.beg_pos) return;
     410             :   int current_pos = source_pos();
     411             :   DCHECK_EQ(next().location.end_pos, current_pos);
     412             :   // Positions inside the lookahead token aren't supported.
     413             :   DCHECK(pos >= current_pos);
     414       60414 :   if (pos != current_pos) {
     415       60409 :     source_->Seek(pos);
     416             :     Advance();
     417             :     // This function is only called to seek to the location
     418             :     // of the end of a function (at the "}" token). It doesn't matter
     419             :     // whether there was a line terminator in the part we skip.
     420       60409 :     next().after_line_terminator = false;
     421             :   }
     422             :   Scan();
     423             : }
     424             : 
     425             : template <bool capture_raw>
     426    52215286 : bool Scanner::ScanEscape() {
     427    26115142 :   uc32 c = c0_;
     428       15016 :   Advance<capture_raw>();
     429             : 
     430             :   // Skip escaped newlines.
     431             :   DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
     432    52207028 :   if (!capture_raw && unibrow::IsLineTerminator(c)) {
     433             :     // Allow escaped CR+LF newlines in multiline string literals.
     434       11052 :     if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
     435             :     return true;
     436             :   }
     437             : 
     438    26107477 :   switch (c) {
     439             :     case '\'':  // fall through
     440             :     case '"' :  // fall through
     441             :     case '\\': break;
     442          59 :     case 'b' : c = '\b'; break;
     443         107 :     case 'f' : c = '\f'; break;
     444      466753 :     case 'n' : c = '\n'; break;
     445         729 :     case 'r' : c = '\r'; break;
     446         401 :     case 't' : c = '\t'; break;
     447             :     case 'u' : {
     448      103822 :       c = ScanUnicodeEscape<capture_raw>();
     449      103821 :       if (c < 0) return false;
     450             :       break;
     451             :     }
     452             :     case 'v':
     453             :       c = '\v';
     454          47 :       break;
     455             :     case 'x': {
     456    25406247 :       c = ScanHexNumber<capture_raw>(2);
     457    25371301 :       if (c < 0) return false;
     458             :       break;
     459             :     }
     460             :     case '0':  // Fall through.
     461             :     case '1':  // fall through
     462             :     case '2':  // fall through
     463             :     case '3':  // fall through
     464             :     case '4':  // fall through
     465             :     case '5':  // fall through
     466             :     case '6':  // fall through
     467             :     case '7':
     468        4257 :       c = ScanOctalEscape<capture_raw>(c, 2);
     469        4257 :       break;
     470             :   }
     471             : 
     472             :   // Other escaped characters are interpreted as their non-escaped version.
     473             :   AddLiteralChar(c);
     474             :   return true;
     475             : }
     476             : 
     477             : template <bool capture_raw>
     478        9691 : uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
     479        4257 :   uc32 x = c - '0';
     480             :   int i = 0;
     481        6758 :   for (; i < length; i++) {
     482        6142 :     int d = c0_ - '0';
     483        6142 :     if (d < 0 || d > 7) break;
     484        2519 :     int nx = x * 8 + d;
     485        2519 :     if (nx >= 256) break;
     486             :     x = nx;
     487         880 :     Advance<capture_raw>();
     488             :   }
     489             :   // Anything except '\0' is an octal escape sequence, illegal in strict mode.
     490             :   // Remember the position of octal escape sequences so that an error
     491             :   // can be reported later (in strict mode).
     492             :   // We don't report the error immediately, because the octal escape can
     493             :   // occur before the "use strict" directive.
     494        5293 :   if (c != '0' || i > 0 || IsNonOctalDecimalDigit(c0_)) {
     495        3813 :     octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
     496        3813 :     octal_message_ = capture_raw ? MessageTemplate::kTemplateOctalLiteral
     497             :                                  : MessageTemplate::kStrictOctalEscape;
     498             :   }
     499        4257 :   return x;
     500             : }
     501             : 
     502    76871942 : Token::Value Scanner::ScanString() {
     503    10352856 :   uc32 quote = c0_;
     504             :   Advance();  // consume quote
     505             : 
     506             :   next().literal_chars.Start();
     507             :   while (true) {
     508    36882084 :     if (V8_UNLIKELY(c0_ == kEndOfInput)) return Token::ILLEGAL;
     509    73777939 :     if ((V8_UNLIKELY(static_cast<uint32_t>(c0_) >= kMaxAscii) &&
     510    73749058 :          !unibrow::IsStringLiteralLineTerminator(c0_)) ||
     511    36866976 :         !MayTerminateString(character_scan_flags[c0_])) {
     512             :       AddLiteralChar(c0_);
     513   151732621 :       AdvanceUntil([this](uc32 c0) {
     514   151732621 :         if (V8_UNLIKELY(static_cast<uint32_t>(c0) > kMaxAscii)) {
     515      945730 :           if (V8_UNLIKELY(unibrow::IsStringLiteralLineTerminator(c0))) {
     516             :             return true;
     517             :           }
     518      945812 :           AddLiteralChar(c0);
     519             :           return false;
     520             :         }
     521   150786891 :         uint8_t char_flags = character_scan_flags[c0];
     522   150786891 :         if (MayTerminateString(char_flags)) return true;
     523   141433523 :         AddLiteralChar(c0);
     524             :         return false;
     525             :       });
     526             :     }
     527    36882517 :     if (c0_ == quote) {
     528             :       Advance();
     529    10351652 :       return Token::STRING;
     530             :     }
     531    26530813 :     if (c0_ == '\\') {
     532             :       Advance();
     533             :       // TODO(verwaest): Check whether we can remove the additional check.
     534    26098392 :       if (V8_UNLIKELY(c0_ == kEndOfInput || !ScanEscape<false>())) {
     535             :         return Token::ILLEGAL;
     536             :       }
     537             :       continue;
     538             :     }
     539      860707 :     if (V8_UNLIKELY(c0_ == kEndOfInput ||
     540             :                     unibrow::IsStringLiteralLineTerminator(c0_))) {
     541             :       return Token::ILLEGAL;
     542             :     }
     543             :     DCHECK_NE(quote, c0_);
     544             :     DCHECK((c0_ == '\'' || c0_ == '"'));
     545             :     AddLiteralCharAdvance();
     546             :   }
     547             : }
     548             : 
     549      656107 : Token::Value Scanner::ScanPrivateName() {
     550      277959 :   if (!allow_harmony_private_fields()) {
     551             :     ReportScannerError(source_pos(),
     552             :                        MessageTemplate::kInvalidOrUnexpectedToken);
     553             :     return Token::ILLEGAL;
     554             :   }
     555             : 
     556             :   next().literal_chars.Start();
     557             :   DCHECK_EQ(c0_, '#');
     558             :   DCHECK(!IsIdentifierStart(kEndOfInput));
     559       79709 :   if (!IsIdentifierStart(Peek())) {
     560             :     ReportScannerError(source_pos(),
     561             :                        MessageTemplate::kInvalidOrUnexpectedToken);
     562             :     return Token::ILLEGAL;
     563             :   }
     564             : 
     565             :   AddLiteralCharAdvance();
     566             :   Token::Value token = ScanIdentifierOrKeywordInner();
     567       59229 :   return token == Token::ILLEGAL ? Token::ILLEGAL : Token::PRIVATE_NAME;
     568             : }
     569             : 
     570     2982916 : Token::Value Scanner::ScanTemplateSpan() {
     571             :   // When scanning a TemplateSpan, we are looking for the following construct:
     572             :   // TEMPLATE_SPAN ::
     573             :   //     ` LiteralChars* ${
     574             :   //   | } LiteralChars* ${
     575             :   //
     576             :   // TEMPLATE_TAIL ::
     577             :   //     ` LiteralChars* `
     578             :   //   | } LiteralChar* `
     579             :   //
     580             :   // A TEMPLATE_SPAN should always be followed by an Expression, while a
     581             :   // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be
     582             :   // followed by an Expression.
     583             : 
     584             :   // These scoped helpers save and restore the original error state, so that we
     585             :   // can specially treat invalid escape sequences in templates (which are
     586             :   // handled by the parser).
     587      154164 :   ErrorState scanner_error_state(&scanner_error_, &scanner_error_location_);
     588      154164 :   ErrorState octal_error_state(&octal_message_, &octal_pos_);
     589             : 
     590             :   Token::Value result = Token::TEMPLATE_SPAN;
     591             :   next().literal_chars.Start();
     592             :   next().raw_literal_chars.Start();
     593             :   const bool capture_raw = true;
     594             :   while (true) {
     595     2041631 :     uc32 c = c0_;
     596     2041631 :     if (c == '`') {
     597             :       Advance();  // Consume '`'
     598             :       result = Token::TEMPLATE_TAIL;
     599             :       break;
     600     2060585 :     } else if (c == '$' && Peek() == '{') {
     601             :       Advance();  // Consume '$'
     602             :       Advance();  // Consume '{'
     603             :       break;
     604     1889199 :     } else if (c == '\\') {
     605             :       Advance();  // Consume '\\'
     606             :       DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
     607             :       if (capture_raw) AddRawLiteralChar('\\');
     608       30352 :       if (unibrow::IsLineTerminator(c0_)) {
     609             :         // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
     610             :         // code unit sequence.
     611             :         uc32 lastChar = c0_;
     612             :         Advance();
     613         160 :         if (lastChar == '\r') {
     614             :           // Also skip \n.
     615          92 :           if (c0_ == '\n') Advance();
     616             :           lastChar = '\n';
     617             :         }
     618             :         if (capture_raw) AddRawLiteralChar(lastChar);
     619             :       } else {
     620       15016 :         bool success = ScanEscape<capture_raw>();
     621             :         USE(success);
     622             :         DCHECK_EQ(!success, has_error());
     623             :         // For templates, invalid escape sequence checking is handled in the
     624             :         // parser.
     625       15015 :         scanner_error_state.MoveErrorTo(next_);
     626       15015 :         octal_error_state.MoveErrorTo(next_);
     627             :       }
     628     1874023 :     } else if (c < 0) {
     629             :       // Unterminated template literal
     630             :       break;
     631             :     } else {
     632             :       Advance();  // Consume c.
     633             :       // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
     634             :       // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
     635             :       // consisting of the CV 0x000A.
     636     1872480 :       if (c == '\r') {
     637         614 :         if (c0_ == '\n') Advance();  // Consume '\n'
     638             :         c = '\n';
     639             :       }
     640             :       if (capture_raw) AddRawLiteralChar(c);
     641             :       AddLiteralChar(c);
     642             :     }
     643             :   }
     644      154203 :   next().location.end_pos = source_pos();
     645      154203 :   next().token = result;
     646             : 
     647      154203 :   return result;
     648             : }
     649             : 
     650     1718309 : Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
     651             :   Handle<String> tmp;
     652     1718309 :   if (source_url_.length() > 0) {
     653        3803 :     tmp = source_url_.Internalize(isolate);
     654             :   }
     655     1718309 :   return tmp;
     656             : }
     657             : 
     658     1718312 : Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const {
     659             :   Handle<String> tmp;
     660     1718312 :   if (source_mapping_url_.length() > 0) {
     661         100 :     tmp = source_mapping_url_.Internalize(isolate);
     662             :   }
     663     1718312 :   return tmp;
     664             : }
     665             : 
     666        2697 : bool Scanner::ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
     667        2894 :                                               bool is_check_first_digit) {
     668             :   // we must have at least one digit after 'x'/'b'/'o'
     669        2697 :   if (is_check_first_digit && !predicate(c0_)) return false;
     670             : 
     671             :   bool separator_seen = false;
     672        7863 :   while (predicate(c0_) || c0_ == '_') {
     673        5780 :     if (c0_ == '_') {
     674             :       Advance();
     675        2200 :       if (c0_ == '_') {
     676             :         ReportScannerError(Location(source_pos(), source_pos() + 1),
     677             :                            MessageTemplate::kContinuousNumericSeparator);
     678             :         return false;
     679             :       }
     680             :       separator_seen = true;
     681             :       continue;
     682             :     }
     683             :     separator_seen = false;
     684             :     AddLiteralCharAdvance();
     685             :   }
     686             : 
     687        2083 :   if (separator_seen) {
     688             :     ReportScannerError(Location(source_pos(), source_pos() + 1),
     689             :                        MessageTemplate::kTrailingNumericSeparator);
     690             :     return false;
     691             :   }
     692             : 
     693             :   return true;
     694             : }
     695             : 
     696     2197785 : bool Scanner::ScanDecimalDigits() {
     697     2197785 :   if (allow_harmony_numeric_separator()) {
     698        1620 :     return ScanDigitsWithNumericSeparators(&IsDecimalDigit, false);
     699             :   }
     700     7520398 :   while (IsDecimalDigit(c0_)) {
     701             :     AddLiteralCharAdvance();
     702             :   }
     703             :   return true;
     704             : }
     705             : 
     706       59519 : bool Scanner::ScanDecimalAsSmiWithNumericSeparators(uint64_t* value) {
     707             :   bool separator_seen = false;
     708      153047 :   while (IsDecimalDigit(c0_) || c0_ == '_') {
     709       24535 :     if (c0_ == '_') {
     710             :       Advance();
     711        1068 :       if (c0_ == '_') {
     712             :         ReportScannerError(Location(source_pos(), source_pos() + 1),
     713             :                            MessageTemplate::kContinuousNumericSeparator);
     714             :         return false;
     715             :       }
     716             :       separator_seen = true;
     717             :       continue;
     718             :     }
     719             :     separator_seen = false;
     720       23468 :     *value = 10 * *value + (c0_ - '0');
     721       23468 :     uc32 first_char = c0_;
     722             :     Advance();
     723             :     AddLiteralChar(first_char);
     724             :   }
     725             : 
     726       34630 :   if (separator_seen) {
     727             :     ReportScannerError(Location(source_pos(), source_pos() + 1),
     728             :                        MessageTemplate::kTrailingNumericSeparator);
     729             :     return false;
     730             :   }
     731             : 
     732             :   return true;
     733             : }
     734             : 
     735    85566617 : bool Scanner::ScanDecimalAsSmi(uint64_t* value) {
     736    32409935 :   if (allow_harmony_numeric_separator()) {
     737       34717 :     return ScanDecimalAsSmiWithNumericSeparators(value);
     738             :   }
     739             : 
     740   171064020 :   while (IsDecimalDigit(c0_)) {
     741    53156682 :     *value = 10 * *value + (c0_ - '0');
     742    53156682 :     uc32 first_char = c0_;
     743             :     Advance();
     744             :     AddLiteralChar(first_char);
     745             :   }
     746             :   return true;
     747             : }
     748             : 
     749         757 : bool Scanner::ScanBinaryDigits() {
     750         757 :   if (allow_harmony_numeric_separator()) {
     751         359 :     return ScanDigitsWithNumericSeparators(&IsBinaryDigit, true);
     752             :   }
     753             : 
     754             :   // we must have at least one binary digit after 'b'/'B'
     755         796 :   if (!IsBinaryDigit(c0_)) {
     756             :     return false;
     757             :   }
     758             : 
     759        1596 :   while (IsBinaryDigit(c0_)) {
     760             :     AddLiteralCharAdvance();
     761             :   }
     762             :   return true;
     763             : }
     764             : 
     765         768 : bool Scanner::ScanOctalDigits() {
     766         768 :   if (allow_harmony_numeric_separator()) {
     767         359 :     return ScanDigitsWithNumericSeparators(&IsOctalDigit, true);
     768             :   }
     769             : 
     770             :   // we must have at least one octal digit after 'o'/'O'
     771         818 :   if (!IsOctalDigit(c0_)) {
     772             :     return false;
     773             :   }
     774             : 
     775        1752 :   while (IsOctalDigit(c0_)) {
     776             :     AddLiteralCharAdvance();
     777             :   }
     778             :   return true;
     779             : }
     780             : 
     781      199945 : bool Scanner::ScanImplicitOctalDigits(int start_pos,
     782      199940 :                                       Scanner::NumberKind* kind) {
     783      199945 :   *kind = IMPLICIT_OCTAL;
     784             : 
     785             :   while (true) {
     786             :     // (possible) octal number
     787      808026 :     if (IsNonOctalDecimalDigit(c0_)) {
     788           6 :       *kind = DECIMAL_WITH_LEADING_ZERO;
     789           6 :       return true;
     790             :     }
     791      404007 :     if (!IsOctalDigit(c0_)) {
     792             :       // Octal literal finished.
     793      199940 :       octal_pos_ = Location(start_pos, source_pos());
     794      199940 :       octal_message_ = MessageTemplate::kStrictOctalLiteral;
     795      199940 :       return true;
     796             :     }
     797             :     AddLiteralCharAdvance();
     798             :   }
     799             : }
     800             : 
     801      476085 : bool Scanner::ScanHexDigits() {
     802      476085 :   if (allow_harmony_numeric_separator()) {
     803         359 :     return ScanDigitsWithNumericSeparators(&IsHexDigit, true);
     804             :   }
     805             : 
     806             :   // we must have at least one hex digit after 'x'/'X'
     807      951452 :   if (!IsHexDigit(c0_)) {
     808             :     return false;
     809             :   }
     810             : 
     811     3362554 :   while (IsHexDigit(c0_)) {
     812             :     AddLiteralCharAdvance();
     813             :   }
     814             :   return true;
     815             : }
     816             : 
     817       15582 : bool Scanner::ScanSignedInteger() {
     818       15582 :   if (c0_ == '+' || c0_ == '-') AddLiteralCharAdvance();
     819             :   // we must have at least one decimal digit after 'e'/'E'
     820       31162 :   if (!IsDecimalDigit(c0_)) return false;
     821       15207 :   return ScanDecimalDigits();
     822             : }
     823             : 
     824   174915599 : Token::Value Scanner::ScanNumber(bool seen_period) {
     825             :   DCHECK(IsDecimalDigit(c0_));  // the first digit of the number or the fraction
     826             : 
     827    33091206 :   NumberKind kind = DECIMAL;
     828             : 
     829             :   next().literal_chars.Start();
     830    33091206 :   bool at_start = !seen_period;
     831             :   int start_pos = source_pos();  // For reporting octal positions.
     832    33091206 :   if (seen_period) {
     833             :     // we have already seen a decimal point of the float
     834             :     AddLiteralChar('.');
     835        3066 :     if (allow_harmony_numeric_separator() && c0_ == '_') {
     836             :       return Token::ILLEGAL;
     837             :     }
     838             :     // we know we have at least one digit
     839        3066 :     if (!ScanDecimalDigits()) return Token::ILLEGAL;
     840             :   } else {
     841             :     // if the first character is '0' we must check for octals and hex
     842    33088140 :     if (c0_ == '0') {
     843             :       AddLiteralCharAdvance();
     844             : 
     845             :       // either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
     846             :       // an octal number.
     847    11610335 :       if (c0_ == 'x' || c0_ == 'X') {
     848             :         AddLiteralCharAdvance();
     849      476096 :         kind = HEX;
     850      476096 :         if (!ScanHexDigits()) return Token::ILLEGAL;
     851    11134148 :       } else if (c0_ == 'o' || c0_ == 'O') {
     852             :         AddLiteralCharAdvance();
     853         768 :         kind = OCTAL;
     854         768 :         if (!ScanOctalDigits()) return Token::ILLEGAL;
     855    11133380 :       } else if (c0_ == 'b' || c0_ == 'B') {
     856             :         AddLiteralCharAdvance();
     857         757 :         kind = BINARY;
     858         757 :         if (!ScanBinaryDigits()) return Token::ILLEGAL;
     859    11132623 :       } else if (IsOctalDigit(c0_)) {
     860      199945 :         kind = IMPLICIT_OCTAL;
     861      199945 :         if (!ScanImplicitOctalDigits(start_pos, &kind)) {
     862             :           return Token::ILLEGAL;
     863             :         }
     864      199946 :         if (kind == DECIMAL_WITH_LEADING_ZERO) {
     865             :           at_start = false;
     866             :         }
     867    10932678 :       } else if (IsNonOctalDecimalDigit(c0_)) {
     868       48816 :         kind = DECIMAL_WITH_LEADING_ZERO;
     869    10883862 :       } else if (allow_harmony_numeric_separator() && c0_ == '_') {
     870             :         ReportScannerError(Location(source_pos(), source_pos() + 1),
     871             :                            MessageTemplate::kZeroDigitNumericSeparator);
     872             :         return Token::ILLEGAL;
     873             :       }
     874             :     }
     875             : 
     876             :     // Parse decimal digits and allow trailing fractional part.
     877    33086319 :     if (kind == DECIMAL || kind == DECIMAL_WITH_LEADING_ZERO) {
     878             :       // This is an optimization for parsing Decimal numbers as Smi's.
     879    32409825 :       if (at_start) {
     880    32409906 :         uint64_t value = 0;
     881             :         // scan subsequent decimal digits
     882    32409906 :         if (!ScanDecimalAsSmi(&value)) {
     883    31256587 :           return Token::ILLEGAL;
     884             :         }
     885             : 
     886    64789712 :         if (next().literal_chars.one_byte_literal().length() <= 10 &&
     887    95969218 :             value <= Smi::kMaxValue && c0_ != '.' && !IsIdentifierStart(c0_)) {
     888    31256227 :           next().smi_value_ = static_cast<uint32_t>(value);
     889             : 
     890    31256227 :           if (kind == DECIMAL_WITH_LEADING_ZERO) {
     891       48816 :             octal_pos_ = Location(start_pos, source_pos());
     892       48816 :             octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
     893             :           }
     894             :           return Token::SMI;
     895             :         }
     896             :       }
     897             : 
     898     1153379 :       if (!ScanDecimalDigits()) return Token::ILLEGAL;
     899     1153451 :       if (c0_ == '.') {
     900             :         seen_period = true;
     901             :         AddLiteralCharAdvance();
     902     1026165 :         if (allow_harmony_numeric_separator() && c0_ == '_') {
     903             :           return Token::ILLEGAL;
     904             :         }
     905     1026081 :         if (!ScanDecimalDigits()) return Token::ILLEGAL;
     906             :       }
     907             :     }
     908             :   }
     909             : 
     910             :   bool is_bigint = false;
     911     3679267 :   if (c0_ == 'n' && !seen_period &&
     912       13751 :       (kind == DECIMAL || kind == HEX || kind == OCTAL || kind == BINARY)) {
     913             :     // Check that the literal is within our limits for BigInt length.
     914             :     // For simplicity, use 4 bits per character to calculate the maximum
     915             :     // allowed literal length.
     916             :     static const int kMaxBigIntCharacters = BigInt::kMaxLengthBits / 4;
     917       13709 :     int length = source_pos() - start_pos - (kind != DECIMAL ? 2 : 0);
     918       13709 :     if (length > kMaxBigIntCharacters) {
     919             :       ReportScannerError(Location(start_pos, source_pos()),
     920             :                          MessageTemplate::kBigIntTooBig);
     921             :       return Token::ILLEGAL;
     922             :     }
     923             : 
     924             :     is_bigint = true;
     925             :     Advance();
     926     1819061 :   } else if (c0_ == 'e' || c0_ == 'E') {
     927             :     // scan exponent, if any
     928             :     DCHECK(kind != HEX);  // 'e'/'E' must be scanned as part of the hex number
     929             : 
     930       15583 :     if (!(kind == DECIMAL || kind == DECIMAL_WITH_LEADING_ZERO))
     931             :       return Token::ILLEGAL;
     932             : 
     933             :     // scan exponent
     934             :     AddLiteralCharAdvance();
     935             : 
     936       15582 :     if (!ScanSignedInteger()) return Token::ILLEGAL;
     937             :   }
     938             : 
     939             :   // The source character immediately following a numeric literal must
     940             :   // not be an identifier start or a decimal digit; see ECMA-262
     941             :   // section 7.8.3, page 17 (note that we read only one decimal digit
     942             :   // if the value is 0).
     943     5497158 :   if (IsDecimalDigit(c0_) || IsIdentifierStart(c0_)) {
     944             :     return Token::ILLEGAL;
     945             :   }
     946             : 
     947     1828717 :   if (kind == DECIMAL_WITH_LEADING_ZERO) {
     948           6 :     octal_pos_ = Location(start_pos, source_pos());
     949           6 :     octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
     950             :   }
     951             : 
     952     1828717 :   return is_bigint ? Token::BIGINT : Token::NUMBER;
     953             : }
     954             : 
     955       68215 : uc32 Scanner::ScanIdentifierUnicodeEscape() {
     956             :   Advance();
     957       37812 :   if (c0_ != 'u') return -1;
     958             :   Advance();
     959       30403 :   return ScanUnicodeEscape<false>();
     960             : }
     961             : 
     962             : template <bool capture_raw>
     963      144728 : uc32 Scanner::ScanUnicodeEscape() {
     964             :   // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of
     965             :   // hex digits between { } is arbitrary. \ and u have already been read.
     966      134227 :   if (c0_ == '{') {
     967       44586 :     int begin = source_pos() - 2;
     968        5629 :     Advance<capture_raw>();
     969       44583 :     uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10FFFF, begin);
     970       44585 :     if (cp < 0 || c0_ != '}') {
     971             :       ReportScannerError(source_pos(),
     972             :                          MessageTemplate::kInvalidUnicodeEscapeSequence);
     973             :       return -1;
     974             :     }
     975         757 :     Advance<capture_raw>();
     976       38181 :     return cp;
     977             :   }
     978             :   const bool unicode = true;
     979       89641 :   return ScanHexNumber<capture_raw, unicode>(4);
     980             : }
     981             : 
     982       16426 : Token::Value Scanner::ScanIdentifierOrKeywordInnerSlow(bool escaped,
     983       29390 :                                                        bool can_be_keyword) {
     984             :   while (true) {
     985       71374 :     if (c0_ == '\\') {
     986             :       escaped = true;
     987       13201 :       uc32 c = ScanIdentifierUnicodeEscape();
     988             :       // Only allow legal identifier part characters.
     989             :       // TODO(verwaest): Make this true.
     990             :       // DCHECK(!IsIdentifierPart('\'));
     991             :       DCHECK(!IsIdentifierPart(-1));
     992       26400 :       if (c == '\\' || !IsIdentifierPart(c)) {
     993             :         return Token::ILLEGAL;
     994             :       }
     995       25163 :       can_be_keyword = can_be_keyword && CharCanBeKeyword(c);
     996             :       AddLiteralChar(c);
     997      132248 :     } else if (IsIdentifierPart(c0_) ||
     998       16046 :                (CombineSurrogatePair() && IsIdentifierPart(c0_))) {
     999       83448 :       can_be_keyword = can_be_keyword && CharCanBeKeyword(c0_);
    1000             :       AddLiteralCharAdvance();
    1001             :     } else {
    1002             :       break;
    1003             :     }
    1004             :   }
    1005             : 
    1006       30543 :   if (can_be_keyword && next().literal_chars.is_one_byte()) {
    1007             :     Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
    1008             :     Token::Value token =
    1009             :         KeywordOrIdentifierToken(chars.start(), chars.length());
    1010       14695 :     if (IsInRange(token, Token::IDENTIFIER, Token::YIELD)) return token;
    1011             : 
    1012       12792 :     if (token == Token::FUTURE_STRICT_RESERVED_WORD) {
    1013        2280 :       if (escaped) return Token::ESCAPED_STRICT_RESERVED_WORD;
    1014           0 :       return token;
    1015             :     }
    1016             : 
    1017       10512 :     if (!escaped) return token;
    1018             : 
    1019             :     STATIC_ASSERT(Token::LET + 1 == Token::STATIC);
    1020       10512 :     if (IsInRange(token, Token::LET, Token::STATIC)) {
    1021             :       return Token::ESCAPED_STRICT_RESERVED_WORD;
    1022             :     }
    1023        7260 :     return Token::ESCAPED_KEYWORD;
    1024             :   }
    1025             : 
    1026             :   return Token::IDENTIFIER;
    1027             : }
    1028             : 
    1029      317059 : bool Scanner::ScanRegExpPattern() {
    1030             :   DCHECK_EQ(Token::UNINITIALIZED, next_next().token);
    1031             :   DCHECK(next().token == Token::DIV || next().token == Token::ASSIGN_DIV);
    1032             : 
    1033             :   // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
    1034             :   bool in_character_class = false;
    1035             : 
    1036             :   // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
    1037             :   // the scanner should pass uninterpreted bodies to the RegExp
    1038             :   // constructor.
    1039             :   next().literal_chars.Start();
    1040       79375 :   if (next().token == Token::ASSIGN_DIV) {
    1041             :     AddLiteralChar('=');
    1042             :   }
    1043             : 
    1044      979574 :   while (c0_ != '/' || in_character_class) {
    1045     1800549 :     if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
    1046             :       return false;
    1047             :     }
    1048      900164 :     if (c0_ == '\\') {  // Escape sequence.
    1049             :       AddLiteralCharAdvance();
    1050      184875 :       if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
    1051             :         return false;
    1052             :       }
    1053             :       AddLiteralCharAdvance();
    1054             :       // If the escape allows more characters, i.e., \x??, \u????, or \c?,
    1055             :       // only "safe" characters are allowed (letters, digits, underscore),
    1056             :       // otherwise the escape isn't valid and the invalid character has
    1057             :       // its normal meaning. I.e., we can just continue scanning without
    1058             :       // worrying whether the following characters are part of the escape
    1059             :       // or not, since any '/', '\\' or '[' is guaranteed to not be part
    1060             :       // of the escape sequence.
    1061             : 
    1062             :       // TODO(896): At some point, parse RegExps more thoroughly to capture
    1063             :       // octal esacpes in strict mode.
    1064             :     } else {  // Unescaped character.
    1065      807715 :       if (c0_ == '[') in_character_class = true;
    1066      807715 :       if (c0_ == ']') in_character_class = false;
    1067             :       AddLiteralCharAdvance();
    1068             :     }
    1069             :   }
    1070             :   Advance();  // consume '/'
    1071             : 
    1072       79155 :   next().token = Token::REGEXP_LITERAL;
    1073       79155 :   return true;
    1074             : }
    1075             : 
    1076             : 
    1077      192165 : Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
    1078             :   DCHECK_EQ(Token::REGEXP_LITERAL, next().token);
    1079             : 
    1080             :   // Scan regular expression flags.
    1081             :   int flags = 0;
    1082      306504 :   while (IsIdentifierPart(c0_)) {
    1083             :     RegExp::Flags flag = RegExp::kNone;
    1084       35297 :     switch (c0_) {
    1085             :       case 'g':
    1086             :         flag = RegExp::kGlobal;
    1087             :         break;
    1088             :       case 'i':
    1089             :         flag = RegExp::kIgnoreCase;
    1090        4518 :         break;
    1091             :       case 'm':
    1092             :         flag = RegExp::kMultiline;
    1093         590 :         break;
    1094             :       case 's':
    1095             :         flag = RegExp::kDotAll;
    1096         156 :         break;
    1097             :       case 'u':
    1098             :         flag = RegExp::kUnicode;
    1099        7286 :         break;
    1100             :       case 'y':
    1101             :         flag = RegExp::kSticky;
    1102         123 :         break;
    1103             :       default:
    1104             :         return Nothing<RegExp::Flags>();
    1105             :     }
    1106       34812 :     if (flags & flag) {
    1107             :       return Nothing<RegExp::Flags>();
    1108             :     }
    1109             :     Advance();
    1110       34688 :     flags |= flag;
    1111             :   }
    1112             : 
    1113       78439 :   next().location.end_pos = source_pos();
    1114       78439 :   return Just(RegExp::Flags(flags));
    1115             : }
    1116             : 
    1117    99715910 : const AstRawString* Scanner::CurrentSymbol(
    1118             :     AstValueFactory* ast_value_factory) const {
    1119    99715910 :   if (is_literal_one_byte()) {
    1120    99677852 :     return ast_value_factory->GetOneByteString(literal_one_byte_string());
    1121             :   }
    1122       74130 :   return ast_value_factory->GetTwoByteString(literal_two_byte_string());
    1123             : }
    1124             : 
    1125      553164 : const AstRawString* Scanner::NextSymbol(
    1126             :     AstValueFactory* ast_value_factory) const {
    1127      553164 :   if (is_next_literal_one_byte()) {
    1128      551044 :     return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
    1129             :   }
    1130        2138 :   return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
    1131             : }
    1132             : 
    1133       82293 : const AstRawString* Scanner::CurrentRawSymbol(
    1134             :     AstValueFactory* ast_value_factory) const {
    1135       82293 :   if (is_raw_literal_one_byte()) {
    1136       82263 :     return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());
    1137             :   }
    1138          33 :   return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());
    1139             : }
    1140             : 
    1141             : 
    1142     1250767 : double Scanner::DoubleValue() {
    1143             :   DCHECK(is_literal_one_byte());
    1144             :   return StringToDouble(
    1145             :       literal_one_byte_string(),
    1146     1250767 :       ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
    1147             : }
    1148             : 
    1149       12251 : const char* Scanner::CurrentLiteralAsCString(Zone* zone) const {
    1150             :   DCHECK(is_literal_one_byte());
    1151             :   Vector<const uint8_t> vector = literal_one_byte_string();
    1152             :   int length = vector.length();
    1153       12251 :   char* buffer = zone->NewArray<char>(length + 1);
    1154             :   memcpy(buffer, vector.start(), length);
    1155       12251 :   buffer[length] = '\0';
    1156       12251 :   return buffer;
    1157             : }
    1158             : 
    1159       96367 : void Scanner::SeekNext(size_t position) {
    1160             :   // Use with care: This cleanly resets most, but not all scanner state.
    1161             :   // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.
    1162             : 
    1163             :   // To re-scan from a given character position, we need to:
    1164             :   // 1, Reset the current_, next_ and next_next_ tokens
    1165             :   //    (next_ + next_next_ will be overwrittem by Next(),
    1166             :   //     current_ will remain unchanged, so overwrite it fully.)
    1167      192730 :   for (TokenDesc& token : token_storage_) {
    1168      144546 :     token.token = Token::UNINITIALIZED;
    1169      144546 :     token.invalid_template_escape_message = MessageTemplate::kNone;
    1170             :   }
    1171             :   // 2, reset the source to the desired position,
    1172       48184 :   source_->Seek(position);
    1173             :   // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
    1174       96366 :   c0_ = source_->Advance();
    1175       48183 :   next().after_line_terminator = false;
    1176             :   Scan();
    1177             :   DCHECK_EQ(next().location.beg_pos, static_cast<int>(position));
    1178       48182 : }
    1179             : 
    1180             : }  // namespace internal
    1181      178779 : }  // namespace v8

Generated by: LCOV version 1.10