LCOV - code coverage report
Current view: top level - src/parsing - scanner.h (source / functions) Hit Total Coverage
Test: app.info Lines: 160 162 98.8 %
Date: 2017-10-20 Functions: 21 23 91.3 %

          Line data    Source code
       1             : // Copyright 2011 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : // Features shared by parsing and pre-parsing scanners.
       6             : 
       7             : #ifndef V8_PARSING_SCANNER_H_
       8             : #define V8_PARSING_SCANNER_H_
       9             : 
      10             : #include "src/allocation.h"
      11             : #include "src/base/logging.h"
      12             : #include "src/char-predicates.h"
      13             : #include "src/globals.h"
      14             : #include "src/messages.h"
      15             : #include "src/parsing/token.h"
      16             : #include "src/unicode-decoder.h"
      17             : #include "src/unicode.h"
      18             : 
      19             : namespace v8 {
      20             : namespace internal {
      21             : 
      22             : 
      23             : class AstRawString;
      24             : class AstValueFactory;
      25             : class DuplicateFinder;
      26             : class ExternalOneByteString;
      27             : class ExternalTwoByteString;
      28             : class ParserRecorder;
      29             : class UnicodeCache;
      30             : 
      31             : // ---------------------------------------------------------------------
      32             : // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
      33             : // A code unit is a 16 bit value representing either a 16 bit code point
      34             : // or one part of a surrogate pair that make a single 21 bit code point.
      35             : class Utf16CharacterStream {
      36             :  public:
      37             :   static const uc32 kEndOfInput = -1;
      38             : 
      39     2786688 :   virtual ~Utf16CharacterStream() { }
      40             : 
      41             :   // Returns and advances past the next UTF-16 code unit in the input
      42             :   // stream. If there are no more code units it returns kEndOfInput.
      43  2101925942 :   inline uc32 Advance() {
      44  2101925942 :     if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
      45  2093154823 :       return static_cast<uc32>(*(buffer_cursor_++));
      46     8771125 :     } else if (ReadBlockChecked()) {
      47     6391021 :       return static_cast<uc32>(*(buffer_cursor_++));
      48             :     } else {
      49             :       // Note: currently the following increment is necessary to avoid a
      50             :       // parser problem! The scanner treats the final kEndOfInput as
      51             :       // a code unit with a position, and does math relative to that
      52             :       // position.
      53     2380104 :       buffer_cursor_++;
      54     2380104 :       return kEndOfInput;
      55             :     }
      56             :   }
      57             : 
      58             :   // Go back one by one character in the input stream.
      59             :   // This undoes the most recent Advance().
      60    10053065 :   inline void Back() {
      61             :     // The common case - if the previous character is within
      62             :     // buffer_start_ .. buffer_end_ will be handles locally.
      63             :     // Otherwise, a new block is requested.
      64    10053065 :     if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
      65     9878220 :       buffer_cursor_--;
      66             :     } else {
      67      174845 :       ReadBlockAt(pos() - 1);
      68             :     }
      69    10053065 :   }
      70             : 
      71             :   // Go back one by two characters in the input stream. (This is the same as
      72             :   // calling Back() twice. But Back() may - in some instances - do substantial
      73             :   // work. Back2() guarantees this work will be done only once.)
      74          18 :   inline void Back2() {
      75          18 :     if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {
      76          18 :       buffer_cursor_ -= 2;
      77             :     } else {
      78           0 :       ReadBlockAt(pos() - 2);
      79             :     }
      80          18 :   }
      81             : 
      82             :   inline size_t pos() const {
      83  1069481926 :     return buffer_pos_ + (buffer_cursor_ - buffer_start_);
      84             :   }
      85             : 
      86      383848 :   inline void Seek(size_t pos) {
      87      383848 :     if (V8_LIKELY(pos >= buffer_pos_ &&
      88             :                   pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
      89      105869 :       buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
      90             :     } else {
      91             :       ReadBlockAt(pos);
      92             :     }
      93      383848 :   }
      94             : 
      95             :   // Returns true if the stream could access the V8 heap after construction.
      96             :   virtual bool can_access_heap() = 0;
      97             : 
      98             :  protected:
      99             :   Utf16CharacterStream(const uint16_t* buffer_start,
     100             :                        const uint16_t* buffer_cursor,
     101             :                        const uint16_t* buffer_end, size_t buffer_pos)
     102             :       : buffer_start_(buffer_start),
     103             :         buffer_cursor_(buffer_cursor),
     104             :         buffer_end_(buffer_end),
     105     2786691 :         buffer_pos_(buffer_pos) {}
     106             :   Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
     107             : 
     108             :   bool ReadBlockChecked() {
     109             :     size_t position = pos();
     110             :     USE(position);
     111     9223943 :     bool success = ReadBlock();
     112             : 
     113             :     // Post-conditions: 1, We should always be at the right position.
     114             :     //                  2, Cursor should be inside the buffer.
     115             :     //                  3, We should have more characters available iff success.
     116             :     DCHECK_EQ(pos(), position);
     117             :     DCHECK_LE(buffer_cursor_, buffer_end_);
     118             :     DCHECK_LE(buffer_start_, buffer_cursor_);
     119             :     DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
     120             :     return success;
     121             :   }
     122             : 
     123             :   void ReadBlockAt(size_t new_pos) {
     124             :     // The callers of this method (Back/Back2/Seek) should handle the easy
     125             :     // case (seeking within the current buffer), and we should only get here
     126             :     // if we actually require new data.
     127             :     // (This is really an efficiency check, not a correctness invariant.)
     128             :     DCHECK(new_pos < buffer_pos_ ||
     129             :            new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
     130             : 
     131             :     // Change pos() to point to new_pos.
     132      452824 :     buffer_pos_ = new_pos;
     133      452824 :     buffer_cursor_ = buffer_start_;
     134             :     DCHECK_EQ(pos(), new_pos);
     135             :     ReadBlockChecked();
     136             :   }
     137             : 
     138             :   // Read more data, and update buffer_*_ to point to it.
     139             :   // Returns true if more data was available.
     140             :   //
     141             :   // ReadBlock() may modify any of the buffer_*_ members, but must sure that
     142             :   // the result of pos() remains unaffected.
     143             :   //
     144             :   // Examples:
     145             :   // - a stream could either fill a separate buffer. Then buffer_start_ and
     146             :   //   buffer_cursor_ would point to the beginning of the buffer, and
     147             :   //   buffer_pos would be the old pos().
     148             :   // - a stream with existing buffer chunks would set buffer_start_ and
     149             :   //   buffer_end_ to cover the full chunk, and then buffer_cursor_ would
     150             :   //   point into the middle of the buffer, while buffer_pos_ would describe
     151             :   //   the start of the buffer.
     152             :   virtual bool ReadBlock() = 0;
     153             : 
     154             :   const uint16_t* buffer_start_;
     155             :   const uint16_t* buffer_cursor_;
     156             :   const uint16_t* buffer_end_;
     157             :   size_t buffer_pos_;
     158             : };
     159             : 
     160             : 
     161             : // ----------------------------------------------------------------------------
     162             : // JavaScript Scanner.
     163             : 
     164     5572114 : class Scanner {
     165             :  public:
     166             :   // Scoped helper for a re-settable bookmark.
     167             :   class BookmarkScope {
     168             :    public:
     169             :     explicit BookmarkScope(Scanner* scanner)
     170     2245915 :         : scanner_(scanner), bookmark_(kNoBookmark) {
     171             :       DCHECK_NOT_NULL(scanner_);
     172             :     }
     173             :     ~BookmarkScope() {}
     174             : 
     175             :     void Set();
     176             :     void Apply();
     177             :     bool HasBeenSet();
     178             :     bool HasBeenApplied();
     179             : 
     180             :    private:
     181             :     static const size_t kNoBookmark;
     182             :     static const size_t kBookmarkWasApplied;
     183             :     static const size_t kBookmarkAtFirstPos;
     184             : 
     185             :     Scanner* scanner_;
     186             :     size_t bookmark_;
     187             : 
     188             :     DISALLOW_COPY_AND_ASSIGN(BookmarkScope);
     189             :   };
     190             : 
     191             :   // Representation of an interval of source positions.
     192             :   struct Location {
     193    11902182 :     Location(int b, int e) : beg_pos(b), end_pos(e) { }
     194    18794083 :     Location() : beg_pos(0), end_pos(0) { }
     195             : 
     196             :     bool IsValid() const {
     197    28202824 :       return beg_pos >= 0 && end_pos >= beg_pos;
     198             :     }
     199             : 
     200             :     static Location invalid() { return Location(-1, -1); }
     201             : 
     202             :     int beg_pos;
     203             :     int end_pos;
     204             :   };
     205             : 
     206             :   // -1 is outside of the range of any real source code.
     207             :   static const int kNoOctalLocation = -1;
     208             :   static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
     209             : 
     210             :   explicit Scanner(UnicodeCache* scanner_contants, int* use_counts_);
     211             : 
     212             :   void Initialize(Utf16CharacterStream* source, bool is_module);
     213             : 
     214             :   // Returns the next token and advances input.
     215             :   Token::Value Next();
     216             :   // Returns the token following peek()
     217             :   Token::Value PeekAhead();
     218             :   // Returns the current token again.
     219             :   Token::Value current_token() { return current_.token; }
     220             : 
     221             :   Token::Value current_contextual_token() { return current_.contextual_token; }
     222             :   Token::Value next_contextual_token() { return next_.contextual_token; }
     223             : 
     224             :   // Returns the location information for the current token
     225             :   // (the token last returned by Next()).
     226     5239475 :   Location location() const { return current_.location; }
     227             : 
     228             :   // This error is specifically an invalid hex or unicode escape sequence.
     229             :   bool has_error() const { return scanner_error_ != MessageTemplate::kNone; }
     230             :   MessageTemplate::Template error() const { return scanner_error_; }
     231             :   Location error_location() const { return scanner_error_location_; }
     232             : 
     233             :   bool has_invalid_template_escape() const {
     234             :     return current_.invalid_template_escape_message != MessageTemplate::kNone;
     235             :   }
     236             :   MessageTemplate::Template invalid_template_escape_message() const {
     237             :     DCHECK(has_invalid_template_escape());
     238             :     return current_.invalid_template_escape_message;
     239             :   }
     240             :   Location invalid_template_escape_location() const {
     241             :     DCHECK(has_invalid_template_escape());
     242             :     return current_.invalid_template_escape_location;
     243             :   }
     244             : 
     245             :   // Similar functions for the upcoming token.
     246             : 
     247             :   // One token look-ahead (past the token returned by Next()).
     248  2209936692 :   Token::Value peek() const { return next_.token; }
     249             : 
     250             :   Location peek_location() const { return next_.location; }
     251             : 
     252             :   bool literal_contains_escapes() const {
     253    42386538 :     return LiteralContainsEscapes(current_);
     254             :   }
     255             : 
     256             :   const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory) const;
     257             :   const AstRawString* NextSymbol(AstValueFactory* ast_value_factory) const;
     258             :   const AstRawString* CurrentRawSymbol(
     259             :       AstValueFactory* ast_value_factory) const;
     260             : 
     261             :   double DoubleValue();
     262             : 
     263             :   const char* CurrentLiteralAsCString(Zone* zone) const;
     264             : 
     265             :   inline bool CurrentMatches(Token::Value token) const {
     266             :     DCHECK(Token::IsKeyword(token));
     267             :     return current_.token == token;
     268             :   }
     269             : 
     270             :   inline bool CurrentMatchesContextual(Token::Value token) const {
     271             :     DCHECK(Token::IsContextualKeyword(token));
     272      128440 :     return current_.contextual_token == token;
     273             :   }
     274             : 
     275             :   // Match the token against the contextual keyword or literal buffer.
     276     3511922 :   inline bool CurrentMatchesContextualEscaped(Token::Value token) const {
     277             :     DCHECK(Token::IsContextualKeyword(token) || token == Token::LET);
     278             :     // Escaped keywords are not matched as tokens. So if we require escape
     279             :     // and/or string processing we need to look at the literal content
     280             :     // (which was escape-processed already).
     281             :     // Conveniently, current_.literal_chars == nullptr for all proper keywords,
     282             :     // so this second condition should exit early in common cases.
     283     3511922 :     return (current_.contextual_token == token) ||
     284     3355945 :            (current_.literal_chars &&
     285             :             current_.literal_chars->Equals(Vector<const char>(
     286    10223812 :                 Token::String(token), Token::StringLength(token))));
     287             :   }
     288             : 
     289     5228304 :   bool IsUseStrict() const {
     290    10456608 :     return current_.token == Token::STRING &&
     291             :            current_.literal_chars->Equals(
     292    10456608 :                Vector<const char>("use strict", strlen("use strict")));
     293             :   }
     294      128440 :   bool IsGetOrSet(bool* is_get, bool* is_set) const {
     295       64220 :     *is_get = CurrentMatchesContextual(Token::GET);
     296       64220 :     *is_set = CurrentMatchesContextual(Token::SET);
     297       64220 :     return *is_get || *is_set;
     298             :   }
     299       79675 :   bool IsLet() const {
     300       79675 :     return CurrentMatches(Token::LET) ||
     301       79675 :            CurrentMatchesContextualEscaped(Token::LET);
     302             :   }
     303             : 
     304             :   // Check whether the CurrentSymbol() has already been seen.
     305             :   // The DuplicateFinder holds the data, so different instances can be used
     306             :   // for different sets of duplicates to check for.
     307             :   bool IsDuplicateSymbol(DuplicateFinder* duplicate_finder,
     308             :                          AstValueFactory* ast_value_factory) const;
     309             : 
     310             :   UnicodeCache* unicode_cache() { return unicode_cache_; }
     311             : 
     312             :   // Returns the location of the last seen octal literal.
     313             :   Location octal_position() const { return octal_pos_; }
     314             :   void clear_octal_position() {
     315         964 :     octal_pos_ = Location::invalid();
     316         964 :     octal_message_ = MessageTemplate::kNone;
     317             :   }
     318             :   MessageTemplate::Template octal_message() const { return octal_message_; }
     319             : 
     320             :   // Returns the value of the last smi that was scanned.
     321             :   uint32_t smi_value() const { return current_.smi_value_; }
     322             : 
     323             :   // Seek forward to the given position.  This operation does not
     324             :   // work in general, for instance when there are pushed back
     325             :   // characters, but works for seeking forward until simple delimiter
     326             :   // tokens, which is what it is used for.
     327             :   void SeekForward(int pos);
     328             : 
     329             :   // Returns true if there was a line terminator before the peek'ed token,
     330             :   // possibly inside a multi-line comment.
     331             :   bool HasAnyLineTerminatorBeforeNext() const {
     332    67251907 :     return has_line_terminator_before_next_ ||
     333             :            has_multiline_comment_before_next_;
     334             :   }
     335             : 
     336             :   bool HasAnyLineTerminatorAfterNext() {
     337      247099 :     Token::Value ensure_next_next = PeekAhead();
     338             :     USE(ensure_next_next);
     339      247099 :     return has_line_terminator_after_next_;
     340             :   }
     341             : 
     342             :   // Scans the input as a regular expression pattern, next token must be /(=).
     343             :   // Returns true if a pattern is scanned.
     344             :   bool ScanRegExpPattern();
     345             :   // Scans the input as regular expression flags. Returns the flags on success.
     346             :   Maybe<RegExp::Flags> ScanRegExpFlags();
     347             : 
     348             :   // Scans the input as a template literal
     349             :   Token::Value ScanTemplateStart();
     350       67056 :   Token::Value ScanTemplateContinuation() {
     351             :     DCHECK_EQ(next_.token, Token::RBRACE);
     352       67056 :     next_.location.beg_pos = source_pos() - 1;  // We already consumed }
     353       67056 :     return ScanTemplateSpan();
     354             :   }
     355             : 
     356             :   Handle<String> SourceUrl(Isolate* isolate) const;
     357             :   Handle<String> SourceMappingUrl(Isolate* isolate) const;
     358             : 
     359             :   bool FoundHtmlComment() const { return found_html_comment_; }
     360             : 
     361             :   bool allow_harmony_bigint() const { return allow_harmony_bigint_; }
     362     2706851 :   void set_allow_harmony_bigint(bool allow) { allow_harmony_bigint_ = allow; }
     363             : 
     364             :  private:
     365             :   // Scoped helper for saving & restoring scanner error state.
     366             :   // This is used for tagged template literals, in which normally forbidden
     367             :   // escape sequences are allowed.
     368             :   class ErrorState;
     369             : 
     370             :   // Scoped helper for literal recording. Automatically drops the literal
     371             :   // if aborting the scanning before it's complete.
     372             :   class LiteralScope {
     373             :    public:
     374   141650367 :     explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {
     375             :       scanner_->StartLiteral();
     376             :     }
     377             :     ~LiteralScope() {
     378   182170394 :       if (!complete_) scanner_->DropLiteral();
     379             :     }
     380   101441959 :     void Complete() { complete_ = true; }
     381             : 
     382             :    private:
     383             :     Scanner* scanner_;
     384             :     bool complete_;
     385             :   };
     386             : 
     387             :   // LiteralBuffer -  Collector of chars of literals.
     388             :   class LiteralBuffer {
     389             :    public:
     390    22291327 :     LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() {}
     391             : 
     392             :     ~LiteralBuffer() { backing_store_.Dispose(); }
     393             : 
     394             :     INLINE(void AddChar(char code_unit)) {
     395             :       DCHECK(IsValidAscii(code_unit));
     396  1028143557 :       AddOneByteChar(static_cast<byte>(code_unit));
     397             :     }
     398             : 
     399             :     INLINE(void AddChar(uc32 code_unit)) {
     400    95462195 :       if (is_one_byte_ &&
     401             :           code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
     402    94475283 :         AddOneByteChar(static_cast<byte>(code_unit));
     403             :       } else {
     404      986842 :         AddCharSlow(code_unit);
     405             :       }
     406             :     }
     407             : 
     408             :     bool is_one_byte() const { return is_one_byte_; }
     409             : 
     410     8584249 :     bool Equals(Vector<const char> keyword) const {
     411    17679061 :       return is_one_byte() && keyword.length() == position_ &&
     412     9103983 :              (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
     413             :     }
     414             : 
     415             :     Vector<const uint16_t> two_byte_literal() const {
     416             :       DCHECK(!is_one_byte_);
     417             :       DCHECK_EQ(position_ & 0x1, 0);
     418             :       return Vector<const uint16_t>(
     419       42425 :           reinterpret_cast<const uint16_t*>(backing_store_.start()),
     420       42425 :           position_ >> 1);
     421             :     }
     422             : 
     423             :     Vector<const uint8_t> one_byte_literal() const {
     424             :       DCHECK(is_one_byte_);
     425             :       return Vector<const uint8_t>(
     426   248530131 :           reinterpret_cast<const uint8_t*>(backing_store_.start()), position_);
     427             :     }
     428             : 
     429    45694595 :     int length() const { return is_one_byte_ ? position_ : (position_ >> 1); }
     430             : 
     431      136889 :     void ReduceLength(int delta) {
     432      136889 :       position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);
     433      136889 :     }
     434             : 
     435             :     void Reset() {
     436   182448608 :       position_ = 0;
     437   182448608 :       is_one_byte_ = true;
     438             :     }
     439             : 
     440             :     Handle<String> Internalize(Isolate* isolate) const;
     441             : 
     442             :    private:
     443             :     static const int kInitialCapacity = 16;
     444             :     static const int kGrowthFactory = 4;
     445             :     static const int kMinConversionSlack = 256;
     446             :     static const int kMaxGrowth = 1 * MB;
     447             : 
     448             :     inline bool IsValidAscii(char code_unit) {
     449             :       // Control characters and printable characters span the range of
     450             :       // valid ASCII characters (0-127). Chars are unsigned on some
     451             :       // platforms which causes compiler warnings if the validity check
     452             :       // tests the lower bound >= 0 as it's always true.
     453             :       return iscntrl(code_unit) || isprint(code_unit);
     454             :     }
     455             : 
     456             :     INLINE(void AddOneByteChar(byte one_byte_char)) {
     457             :       DCHECK(is_one_byte_);
     458  1122621963 :       if (position_ >= backing_store_.length()) ExpandBuffer();
     459  1122622003 :       backing_store_[position_] = one_byte_char;
     460  1122622069 :       position_ += kOneByteSize;
     461             :     }
     462             : 
     463             :     void AddCharSlow(uc32 code_unit);
     464             :     int NewCapacity(int min_capacity);
     465             :     void ExpandBuffer();
     466             :     void ConvertToTwoByte();
     467             : 
     468             :     bool is_one_byte_;
     469             :     int position_;
     470             :     Vector<byte> backing_store_;
     471             : 
     472             :     DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
     473             :   };
     474             : 
     475             :   // The current and look-ahead token.
     476             :   struct TokenDesc {
     477             :     Location location;
     478             :     LiteralBuffer* literal_chars;
     479             :     LiteralBuffer* raw_literal_chars;
     480             :     uint32_t smi_value_;
     481             :     Token::Value token;
     482             :     MessageTemplate::Template invalid_template_escape_message;
     483             :     Location invalid_template_escape_location;
     484             :     Token::Value contextual_token;
     485             :   };
     486             : 
     487             :   static const int kCharacterLookaheadBufferSize = 1;
     488             :   const int kMaxAscii = 127;
     489             : 
     490             :   // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
     491             :   template <bool capture_raw>
     492             :   uc32 ScanOctalEscape(uc32 c, int length);
     493             : 
     494             :   // Call this after setting source_ to the input.
     495     2786055 :   void Init() {
     496             :     // Set c0_ (one character ahead)
     497             :     STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
     498     2786055 :     Advance();
     499             :     // Initialize current_ to not refer to a literal.
     500     2786055 :     current_.token = Token::UNINITIALIZED;
     501     2786055 :     current_.contextual_token = Token::UNINITIALIZED;
     502     2786055 :     current_.literal_chars = nullptr;
     503     2786055 :     current_.raw_literal_chars = nullptr;
     504     2786055 :     current_.invalid_template_escape_message = MessageTemplate::kNone;
     505     2786055 :     next_.token = Token::UNINITIALIZED;
     506     2786055 :     next_.contextual_token = Token::UNINITIALIZED;
     507     2786055 :     next_.literal_chars = nullptr;
     508     2786055 :     next_.raw_literal_chars = nullptr;
     509     2786055 :     next_.invalid_template_escape_message = MessageTemplate::kNone;
     510     2786055 :     next_next_.token = Token::UNINITIALIZED;
     511     2786055 :     next_next_.contextual_token = Token::UNINITIALIZED;
     512     2786055 :     next_next_.literal_chars = nullptr;
     513     2786055 :     next_next_.raw_literal_chars = nullptr;
     514     2786055 :     next_next_.invalid_template_escape_message = MessageTemplate::kNone;
     515     2786055 :     found_html_comment_ = false;
     516     2786055 :     scanner_error_ = MessageTemplate::kNone;
     517     2786055 :   }
     518             : 
     519             :   void ReportScannerError(const Location& location,
     520       13716 :                           MessageTemplate::Template error) {
     521       13716 :     if (has_error()) return;
     522       13696 :     scanner_error_ = error;
     523       13696 :     scanner_error_location_ = location;
     524             :   }
     525             : 
     526       10245 :   void ReportScannerError(int pos, MessageTemplate::Template error) {
     527       10245 :     if (has_error()) return;
     528        7965 :     scanner_error_ = error;
     529        7965 :     scanner_error_location_ = Location(pos, pos + 1);
     530             :   }
     531             : 
     532             :   // Seek to the next_ token at the given position.
     533             :   void SeekNext(size_t position);
     534             : 
     535             :   // Literal buffer support
     536             :   inline void StartLiteral() {
     537             :     LiteralBuffer* free_buffer =
     538   182307967 :         (current_.literal_chars == &literal_buffer0_)
     539             :             ? &literal_buffer1_
     540   181611035 :             : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_
     541   363919002 :                                                             : &literal_buffer0_;
     542             :     free_buffer->Reset();
     543   182307967 :     next_.literal_chars = free_buffer;
     544             :   }
     545             : 
     546             :   inline void StartRawLiteral() {
     547             :     LiteralBuffer* free_buffer =
     548      137771 :         (current_.raw_literal_chars == &raw_literal_buffer0_)
     549             :             ? &raw_literal_buffer1_
     550      136049 :             : (current_.raw_literal_chars == &raw_literal_buffer1_)
     551             :                   ? &raw_literal_buffer2_
     552      273820 :                   : &raw_literal_buffer0_;
     553             :     free_buffer->Reset();
     554      137771 :     next_.raw_literal_chars = free_buffer;
     555             :   }
     556             : 
     557             :   INLINE(void AddLiteralChar(uc32 c)) {
     558             :     DCHECK_NOT_NULL(next_.literal_chars);
     559             :     next_.literal_chars->AddChar(c);
     560             :   }
     561             : 
     562             :   INLINE(void AddLiteralChar(char c)) {
     563             :     DCHECK_NOT_NULL(next_.literal_chars);
     564             :     next_.literal_chars->AddChar(c);
     565             :   }
     566             : 
     567             :   INLINE(void AddRawLiteralChar(uc32 c)) {
     568             :     DCHECK_NOT_NULL(next_.raw_literal_chars);
     569             :     next_.raw_literal_chars->AddChar(c);
     570             :   }
     571             : 
     572             :   INLINE(void ReduceRawLiteralLength(int delta)) {
     573             :     DCHECK_NOT_NULL(next_.raw_literal_chars);
     574      136889 :     next_.raw_literal_chars->ReduceLength(delta);
     575             :   }
     576             : 
     577             :   // Stops scanning of a literal and drop the collected characters,
     578             :   // e.g., due to an encountered error.
     579             :   inline void DropLiteral() {
     580    40211000 :     next_.literal_chars = nullptr;
     581    40211000 :     next_.raw_literal_chars = nullptr;
     582             :   }
     583             : 
     584    32595526 :   inline void AddLiteralCharAdvance() {
     585    16297763 :     AddLiteralChar(c0_);
     586    16297763 :     Advance();
     587    16297763 :   }
     588             : 
     589             :   // Low-level scanning support.
     590             :   template <bool capture_raw = false, bool check_surrogate = true>
     591   961782112 :   void Advance() {
     592             :     if (capture_raw) {
     593     1567329 :       AddRawLiteralChar(c0_);
     594             :     }
     595  2063090062 :     c0_ = source_->Advance();
     596   960214828 :     if (check_surrogate) HandleLeadSurrogate();
     597   960214909 :   }
     598             : 
     599   989403650 :   void HandleLeadSurrogate() {
     600  1978807300 :     if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
     601         773 :       uc32 c1 = source_->Advance();
     602         773 :       if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
     603          32 :         source_->Back();
     604             :       } else {
     605        1482 :         c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
     606             :       }
     607             :     }
     608   989403650 :   }
     609             : 
     610             :   void PushBack(uc32 ch) {
     611     5331044 :     if (c0_ > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
     612           0 :       source_->Back2();
     613             :     } else {
     614     5331044 :       source_->Back();
     615             :     }
     616     5331044 :     c0_ = ch;
     617             :   }
     618             : 
     619             :   // Same as PushBack(ch1); PushBack(ch2).
     620             :   // - Potentially more efficient as it uses Back2() on the stream.
     621             :   // - Uses char as parameters, since we're only calling it with ASCII chars in
     622             :   //   practice. This way, we can avoid a few edge cases.
     623             :   void PushBack2(char ch1, char ch2) {
     624          18 :     source_->Back2();
     625          18 :     c0_ = ch2;
     626             :   }
     627             : 
     628             :   inline Token::Value Select(Token::Value tok) {
     629    33621036 :     Advance();
     630             :     return tok;
     631             :   }
     632             : 
     633             :   inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
     634     4947688 :     Advance();
     635     4947688 :     if (c0_ == next) {
     636     2584493 :       Advance();
     637             :       return then;
     638             :     } else {
     639             :       return else_;
     640             :     }
     641             :   }
     642             :   // Returns the literal string, if any, for the current token (the
     643             :   // token last returned by Next()). The string is 0-terminated.
     644             :   // Literal strings are collected for identifiers, strings, numbers as well
     645             :   // as for template literals. For template literals we also collect the raw
     646             :   // form.
     647             :   // These functions only give the correct result if the literal was scanned
     648             :   // when a LiteralScope object is alive.
     649             :   //
     650             :   // Current usage of these functions is unfortunately a little undisciplined,
     651             :   // and is_literal_one_byte() + is_literal_one_byte_string() is also
     652             :   // requested for tokens that do not have a literal. Hence, we treat any
     653             :   // token as a one-byte literal. E.g. Token::FUNCTION pretends to have a
     654             :   // literal "function".
     655             :   Vector<const uint8_t> literal_one_byte_string() const {
     656   115913456 :     if (current_.literal_chars)
     657             :       return current_.literal_chars->one_byte_literal();
     658      132777 :     const char* str = Token::String(current_.token);
     659             :     const uint8_t* str_as_uint8 = reinterpret_cast<const uint8_t*>(str);
     660             :     return Vector<const uint8_t>(str_as_uint8,
     661      132777 :                                  Token::StringLength(current_.token));
     662             :   }
     663             :   Vector<const uint16_t> literal_two_byte_string() const {
     664             :     DCHECK_NOT_NULL(current_.literal_chars);
     665             :     return current_.literal_chars->two_byte_literal();
     666             :   }
     667             :   bool is_literal_one_byte() const {
     668   113320097 :     return !current_.literal_chars || current_.literal_chars->is_one_byte();
     669             :   }
     670             :   int literal_length() const {
     671             :     if (current_.literal_chars) return current_.literal_chars->length();
     672             :     return Token::StringLength(current_.token);
     673             :   }
     674             :   // Returns the literal string for the next token (the token that
     675             :   // would be returned if Next() were called).
     676             :   Vector<const uint8_t> next_literal_one_byte_string() const {
     677             :     DCHECK_NOT_NULL(next_.literal_chars);
     678             :     return next_.literal_chars->one_byte_literal();
     679             :   }
     680             :   Vector<const uint16_t> next_literal_two_byte_string() const {
     681             :     DCHECK_NOT_NULL(next_.literal_chars);
     682             :     return next_.literal_chars->two_byte_literal();
     683             :   }
     684             :   bool is_next_literal_one_byte() const {
     685             :     DCHECK_NOT_NULL(next_.literal_chars);
     686       62607 :     return next_.literal_chars->is_one_byte();
     687             :   }
     688             :   Vector<const uint8_t> raw_literal_one_byte_string() const {
     689             :     DCHECK_NOT_NULL(current_.raw_literal_chars);
     690             :     return current_.raw_literal_chars->one_byte_literal();
     691             :   }
     692             :   Vector<const uint16_t> raw_literal_two_byte_string() const {
     693             :     DCHECK_NOT_NULL(current_.raw_literal_chars);
     694             :     return current_.raw_literal_chars->two_byte_literal();
     695             :   }
     696             :   bool is_raw_literal_one_byte() const {
     697             :     DCHECK_NOT_NULL(current_.raw_literal_chars);
     698       62557 :     return current_.raw_literal_chars->is_one_byte();
     699             :   }
     700             : 
     701             :   template <bool capture_raw, bool unicode = false>
     702             :   uc32 ScanHexNumber(int expected_length);
     703             :   // Scan a number of any length but not bigger than max_value. For example, the
     704             :   // number can be 000000001, so it's very long in characters but its value is
     705             :   // small.
     706             :   template <bool capture_raw>
     707             :   uc32 ScanUnlimitedLengthHexNumber(int max_value, int beg_pos);
     708             : 
     709             :   // Scans a single JavaScript token.
     710             :   void Scan();
     711             : 
     712             :   Token::Value SkipWhiteSpace();
     713             :   Token::Value SkipSingleHTMLComment();
     714             :   Token::Value SkipSingleLineComment();
     715             :   Token::Value SkipSourceURLComment();
     716             :   void TryToParseSourceURLComment();
     717             :   Token::Value SkipMultiLineComment();
     718             :   // Scans a possible HTML comment -- begins with '<!'.
     719             :   Token::Value ScanHtmlComment();
     720             : 
     721             :   void ScanDecimalDigits();
     722             :   Token::Value ScanNumber(bool seen_period);
     723             :   Token::Value ScanIdentifierOrKeyword();
     724             :   Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped);
     725             : 
     726             :   Token::Value ScanString();
     727             : 
     728             :   // Scans an escape-sequence which is part of a string and adds the
     729             :   // decoded character to the current literal. Returns true if a pattern
     730             :   // is scanned.
     731             :   template <bool capture_raw, bool in_template_literal>
     732             :   bool ScanEscape();
     733             : 
     734             :   // Decodes a Unicode escape-sequence which is part of an identifier.
     735             :   // If the escape sequence cannot be decoded the result is kBadChar.
     736             :   uc32 ScanIdentifierUnicodeEscape();
     737             :   // Helper for the above functions.
     738             :   template <bool capture_raw>
     739       28351 :   uc32 ScanUnicodeEscape();
     740             : 
     741             :   bool is_module_;
     742             : 
     743             :   bool IsLineTerminator(uc32 c);
     744             : 
     745             :   Token::Value ScanTemplateSpan();
     746             : 
     747             :   // Return the current source position.
     748             :   int source_pos() {
     749  1044130745 :     return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
     750             :   }
     751             : 
     752    42386538 :   static bool LiteralContainsEscapes(const TokenDesc& token) {
     753    42386538 :     Location location = token.location;
     754    42386538 :     int source_length = (location.end_pos - location.beg_pos);
     755    42386538 :     if (token.token == Token::STRING) {
     756             :       // Subtract delimiters.
     757       37190 :       source_length -= 2;
     758             :     }
     759    84724179 :     return token.literal_chars &&
     760    42386538 :            (token.literal_chars->length() != source_length);
     761             :   }
     762             : 
     763             : #ifdef DEBUG
     764             :   void SanityCheckTokenDesc(const TokenDesc&) const;
     765             : #endif
     766             : 
     767             :   UnicodeCache* unicode_cache_;
     768             : 
     769             :   // Buffers collecting literal strings, numbers, etc.
     770             :   LiteralBuffer literal_buffer0_;
     771             :   LiteralBuffer literal_buffer1_;
     772             :   LiteralBuffer literal_buffer2_;
     773             : 
     774             :   // Values parsed from magic comments.
     775             :   LiteralBuffer source_url_;
     776             :   LiteralBuffer source_mapping_url_;
     777             : 
     778             :   // Buffer to store raw string values
     779             :   LiteralBuffer raw_literal_buffer0_;
     780             :   LiteralBuffer raw_literal_buffer1_;
     781             :   LiteralBuffer raw_literal_buffer2_;
     782             : 
     783             :   TokenDesc current_;    // desc for current token (as returned by Next())
     784             :   TokenDesc next_;       // desc for next token (one token look-ahead)
     785             :   TokenDesc next_next_;  // desc for the token after next (after PeakAhead())
     786             : 
     787             :   // Input stream. Must be initialized to an Utf16CharacterStream.
     788             :   Utf16CharacterStream* source_;
     789             : 
     790             :   // Last-seen positions of potentially problematic tokens.
     791             :   Location octal_pos_;
     792             :   MessageTemplate::Template octal_message_;
     793             : 
     794             :   // One Unicode character look-ahead; c0_ < 0 at the end of the input.
     795             :   uc32 c0_;
     796             : 
     797             :   // Whether there is a line terminator whitespace character after
     798             :   // the current token, and  before the next. Does not count newlines
     799             :   // inside multiline comments.
     800             :   bool has_line_terminator_before_next_;
     801             :   // Whether there is a multi-line comment that contains a
     802             :   // line-terminator after the current token, and before the next.
     803             :   bool has_multiline_comment_before_next_;
     804             :   bool has_line_terminator_after_next_;
     805             : 
     806             :   // Whether this scanner encountered an HTML comment.
     807             :   bool found_html_comment_;
     808             : 
     809             :   // Whether to recognize BIGINT tokens.
     810             :   bool allow_harmony_bigint_;
     811             : 
     812             :   int* use_counts_;
     813             : 
     814             :   MessageTemplate::Template scanner_error_;
     815             :   Location scanner_error_location_;
     816             : };
     817             : 
     818             : }  // namespace internal
     819             : }  // namespace v8
     820             : 
     821             : #endif  // V8_PARSING_SCANNER_H_

Generated by: LCOV version 1.10