LCOV - app.info - src/parsing/scanner-character-streams.cc

LCOV - code coverage report

Current view:	top level - src/parsing - scanner-character-streams.cc (source / functions)		Hit	Total	Coverage
Test:	app.info	Lines:	208	223	93.3 %
Date:	2017-10-20	Functions:	33	44	75.0 %

          Line data    Source code

       1             : // Copyright 2011 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/parsing/scanner-character-streams.h"
       6             : 
       7             : #include "include/v8.h"
       8             : #include "src/counters.h"
       9             : #include "src/globals.h"
      10             : #include "src/handles.h"
      11             : #include "src/objects-inl.h"
      12             : #include "src/parsing/scanner.h"
      13             : #include "src/unicode-inl.h"
      14             : 
      15             : namespace v8 {
      16             : namespace internal {
      17             : 
      18             : namespace {
      19             : const unibrow::uchar kUtf8Bom = 0xfeff;
      20             : }  // namespace
      21             : 
      22             : // ----------------------------------------------------------------------------
      23             : // BufferedUtf16CharacterStreams
      24             : //
      25             : // A buffered character stream based on a random access character
      26             : // source (ReadBlock can be called with pos() pointing to any position,
      27             : // even positions before the current).
      28     2786566 : class BufferedUtf16CharacterStream : public Utf16CharacterStream {
      29             :  public:
      30             :   BufferedUtf16CharacterStream();
      31             : 
      32             :  protected:
      33             :   static const size_t kBufferSize = 512;
      34             : 
      35             :   bool ReadBlock() override;
      36             : 
      37             :   // FillBuffer should read up to kBufferSize characters at position and store
      38             :   // them into buffer_[0..]. It returns the number of characters stored.
      39             :   virtual size_t FillBuffer(size_t position) = 0;
      40             : 
      41             :   // Fixed sized buffer that this class reads from.
      42             :   // The base class' buffer_start_ should always point to buffer_.
      43             :   uc16 buffer_[kBufferSize];
      44             : };
      45             : 
      46           0 : BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
      47     5573138 :     : Utf16CharacterStream(buffer_, buffer_, buffer_, 0) {}
      48             : 
      49     9221120 : bool BufferedUtf16CharacterStream::ReadBlock() {
      50             :   DCHECK_EQ(buffer_start_, buffer_);
      51             : 
      52             :   size_t position = pos();
      53     9221120 :   buffer_pos_ = position;
      54     9221120 :   buffer_cursor_ = buffer_;
      55     9221120 :   buffer_end_ = buffer_ + FillBuffer(position);
      56             :   DCHECK_EQ(pos(), position);
      57             :   DCHECK_LE(buffer_end_, buffer_start_ + kBufferSize);
      58     9221127 :   return buffer_cursor_ < buffer_end_;
      59             : }
      60             : 
      61             : // ----------------------------------------------------------------------------
      62             : // GenericStringUtf16CharacterStream.
      63             : //
      64             : // A stream w/ a data source being a (flattened) Handle<String>.
      65             : 
      66     5362144 : class GenericStringUtf16CharacterStream : public BufferedUtf16CharacterStream {
      67             :  public:
      68             :   GenericStringUtf16CharacterStream(Handle<String> data, size_t start_position,
      69             :                                     size_t end_position);
      70             : 
      71      325795 :   bool can_access_heap() override { return true; }
      72             : 
      73             :  protected:
      74             :   size_t FillBuffer(size_t position) override;
      75             : 
      76             :   Handle<String> string_;
      77             :   size_t length_;
      78             : };
      79             : 
      80           0 : GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream(
      81             :     Handle<String> data, size_t start_position, size_t end_position)
      82     2681073 :     : string_(data), length_(end_position) {
      83             :   DCHECK_GE(end_position, start_position);
      84             :   DCHECK_GE(static_cast<size_t>(string_->length()),
      85             :             end_position - start_position);
      86     2681073 :   buffer_pos_ = start_position;
      87           0 : }
      88             : 
      89     8370897 : size_t GenericStringUtf16CharacterStream::FillBuffer(size_t from_pos) {
      90     8370897 :   if (from_pos >= length_) return 0;
      91             : 
      92     6095886 :   size_t length = i::Min(kBufferSize, length_ - from_pos);
      93             :   String::WriteToFlat<uc16>(*string_, buffer_, static_cast<int>(from_pos),
      94    12191772 :                             static_cast<int>(from_pos + length));
      95     6095886 :   return length;
      96             : }
      97             : 
      98             : // ----------------------------------------------------------------------------
      99             : // ExternalTwoByteStringUtf16CharacterStream.
     100             : //
     101             : // A stream whose data source is a Handle<ExternalTwoByteString>. It avoids
     102             : // all data copying.
     103             : 
     104         124 : class ExternalTwoByteStringUtf16CharacterStream : public Utf16CharacterStream {
     105             :  public:
     106             :   ExternalTwoByteStringUtf16CharacterStream(Handle<ExternalTwoByteString> data,
     107             :                                             size_t start_position,
     108             :                                             size_t end_position);
     109             : 
     110           6 :   bool can_access_heap() override { return false; }
     111             : 
     112             :  private:
     113             :   bool ReadBlock() override;
     114             : 
     115             :   const uc16* raw_data_;  // Pointer to the actual array of characters.
     116             :   size_t start_pos_;
     117             :   size_t end_pos_;
     118             : };
     119             : 
     120          62 : ExternalTwoByteStringUtf16CharacterStream::
     121             :     ExternalTwoByteStringUtf16CharacterStream(
     122             :         Handle<ExternalTwoByteString> data, size_t start_position,
     123             :         size_t end_position)
     124         124 :     : raw_data_(data->GetTwoByteData(static_cast<int>(start_position))),
     125             :       start_pos_(start_position),
     126         124 :       end_pos_(end_position) {
     127          62 :   buffer_start_ = raw_data_;
     128          62 :   buffer_cursor_ = raw_data_;
     129          62 :   buffer_end_ = raw_data_ + (end_pos_ - start_pos_);
     130          62 :   buffer_pos_ = start_pos_;
     131          62 : }
     132             : 
     133         218 : bool ExternalTwoByteStringUtf16CharacterStream::ReadBlock() {
     134             :   size_t position = pos();
     135         218 :   bool have_data = start_pos_ <= position && position < end_pos_;
     136         218 :   if (have_data) {
     137          65 :     buffer_pos_ = start_pos_;
     138          65 :     buffer_cursor_ = raw_data_ + (position - start_pos_),
     139          65 :     buffer_end_ = raw_data_ + (end_pos_ - start_pos_);
     140             :   } else {
     141         153 :     buffer_pos_ = position;
     142         153 :     buffer_cursor_ = raw_data_;
     143         153 :     buffer_end_ = raw_data_;
     144             :   }
     145         218 :   return have_data;
     146             : }
     147             : 
     148             : // ----------------------------------------------------------------------------
     149             : // ExternalOneByteStringUtf16CharacterStream
     150             : //
     151             : // A stream whose data source is a Handle<ExternalOneByteString>.
     152             : 
     153      209940 : class ExternalOneByteStringUtf16CharacterStream
     154             :     : public BufferedUtf16CharacterStream {
     155             :  public:
     156             :   ExternalOneByteStringUtf16CharacterStream(Handle<ExternalOneByteString> data,
     157             :                                             size_t start_position,
     158             :                                             size_t end_position);
     159             : 
     160             :   // For testing:
     161             :   ExternalOneByteStringUtf16CharacterStream(const char* data, size_t length);
     162             : 
     163       20225 :   bool can_access_heap() override { return false; }
     164             : 
     165             :  protected:
     166             :   size_t FillBuffer(size_t position) override;
     167             : 
     168             :   const uint8_t* raw_data_;  // Pointer to the actual array of characters.
     169             :   size_t length_;
     170             : };
     171             : 
     172           0 : ExternalOneByteStringUtf16CharacterStream::
     173             :     ExternalOneByteStringUtf16CharacterStream(
     174             :         Handle<ExternalOneByteString> data, size_t start_position,
     175             :         size_t end_position)
     176      206514 :     : raw_data_(data->GetChars()), length_(end_position) {
     177             :   DCHECK(end_position >= start_position);
     178      103257 :   buffer_pos_ = start_position;
     179           0 : }
     180             : 
     181           0 : ExternalOneByteStringUtf16CharacterStream::
     182             :     ExternalOneByteStringUtf16CharacterStream(const char* data, size_t length)
     183        1715 :     : raw_data_(reinterpret_cast<const uint8_t*>(data)), length_(length) {}
     184             : 
     185      772223 : size_t ExternalOneByteStringUtf16CharacterStream::FillBuffer(size_t from_pos) {
     186      772223 :   if (from_pos >= length_) return 0;
     187             : 
     188      667146 :   size_t length = Min(kBufferSize, length_ - from_pos);
     189      667146 :   i::CopyCharsUnsigned(buffer_, raw_data_ + from_pos, length);
     190             :   return length;
     191             : }
     192             : 
     193             : // ----------------------------------------------------------------------------
     194             : // Utf8ExternalStreamingStream - chunked streaming of Utf-8 data.
     195             : //
     196             : // This implementation is fairly complex, since data arrives in chunks which
     197             : // may 'cut' arbitrarily into utf-8 characters. Also, seeking to a given
     198             : // character position is tricky because the byte position cannot be dericed
     199             : // from the character position.
     200             : 
     201             : class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream {
     202             :  public:
     203             :   Utf8ExternalStreamingStream(
     204             :       ScriptCompiler::ExternalSourceStream* source_stream,
     205             :       RuntimeCallStats* stats)
     206             :       : current_({0, {0, 0, unibrow::Utf8::Utf8IncrementalBuffer(0)}}),
     207             :         source_stream_(source_stream),
     208         838 :         stats_(stats) {}
     209         838 :   ~Utf8ExternalStreamingStream() override {
     210        4476 :     for (size_t i = 0; i < chunks_.size(); i++) delete[] chunks_[i].data;
     211         838 :   }
     212             : 
     213          12 :   bool can_access_heap() override { return false; }
     214             : 
     215             :  protected:
     216             :   size_t FillBuffer(size_t position) override;
     217             : 
     218             :  private:
     219             :   // A position within the data stream. It stores:
     220             :   // - The 'physical' position (# of bytes in the stream),
     221             :   // - the 'logical' position (# of ucs-2 characters, also within the stream),
     222             :   // - a possibly incomplete utf-8 char at the current 'physical' position.
     223             :   struct StreamPosition {
     224             :     size_t bytes;
     225             :     size_t chars;
     226             :     unibrow::Utf8::Utf8IncrementalBuffer incomplete_char;
     227             :   };
     228             : 
     229             :   // Position contains a StreamPosition and the index of the chunk the position
     230             :   // points into. (The chunk_no could be derived from pos, but that'd be
     231             :   // an expensive search through all chunks.)
     232             :   struct Position {
     233             :     size_t chunk_no;
     234             :     StreamPosition pos;
     235             :   };
     236             : 
     237             :   // A chunk in the list of chunks, containing:
     238             :   // - The chunk data (data pointer and length), and
     239             :   // - the position at the first byte of the chunk.
     240             :   struct Chunk {
     241             :     const uint8_t* data;
     242             :     size_t length;
     243             :     StreamPosition start;
     244             :   };
     245             : 
     246             :   // Within the current chunk, skip forward from current_ towards position.
     247             :   bool SkipToPosition(size_t position);
     248             :   // Within the current chunk, fill the buffer_ (while it has capacity).
     249             :   void FillBufferFromCurrentChunk();
     250             :   // Fetch a new chunk (assuming current_ is at the end of the current data).
     251             :   bool FetchChunk();
     252             :   // Search through the chunks and set current_ to point to the given position.
     253             :   // (This call is potentially expensive.)
     254             :   void SearchPosition(size_t position);
     255             : 
     256             :   std::vector<Chunk> chunks_;
     257             :   Position current_;
     258             :   ScriptCompiler::ExternalSourceStream* source_stream_;
     259             :   RuntimeCallStats* stats_;
     260             : };
     261             : 
     262          85 : bool Utf8ExternalStreamingStream::SkipToPosition(size_t position) {
     263             :   DCHECK_LE(current_.pos.chars, position);  // We can only skip forward.
     264             : 
     265             :   // Already there? Then return immediately.
     266          85 :   if (current_.pos.chars == position) return true;
     267             : 
     268           6 :   const Chunk& chunk = chunks_[current_.chunk_no];
     269             :   DCHECK(current_.pos.bytes >= chunk.start.bytes);
     270             : 
     271             :   unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =
     272           6 :       chunk.start.incomplete_char;
     273           6 :   size_t it = current_.pos.bytes - chunk.start.bytes;
     274           6 :   size_t chars = chunk.start.chars;
     275          78 :   while (it < chunk.length && chars < position) {
     276             :     unibrow::uchar t =
     277          66 :         unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);
     278          66 :     if (t == kUtf8Bom && current_.pos.chars == 0) {
     279             :       // BOM detected at beginning of the stream. Don't copy it.
     280          60 :     } else if (t != unibrow::Utf8::kIncomplete) {
     281          30 :       chars++;
     282          30 :       if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++;
     283             :     }
     284          66 :     it++;
     285             :   }
     286             : 
     287           6 :   current_.pos.bytes += it;
     288           6 :   current_.pos.chars = chars;
     289           6 :   current_.pos.incomplete_char = incomplete_char;
     290           6 :   current_.chunk_no += (it == chunk.length);
     291             : 
     292           6 :   return current_.pos.chars == position;
     293             : }
     294             : 
     295       39319 : void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
     296             :   DCHECK_LT(current_.chunk_no, chunks_.size());
     297             :   DCHECK_EQ(buffer_start_, buffer_cursor_);
     298             :   DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize);
     299             : 
     300       39319 :   const Chunk& chunk = chunks_[current_.chunk_no];
     301             : 
     302             :   // The buffer_ is writable, but buffer_*_ members are const. So we get a
     303             :   // non-const pointer into buffer that points to the same char as buffer_end_.
     304       39319 :   uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_);
     305             :   DCHECK_EQ(cursor, buffer_end_);
     306             : 
     307             :   // If the current chunk is the last (empty) chunk we'll have to process
     308             :   // any left-over, partial characters.
     309       39319 :   if (chunk.length == 0) {
     310             :     unibrow::uchar t =
     311         444 :         unibrow::Utf8::ValueOfIncrementalFinish(&current_.pos.incomplete_char);
     312         444 :     if (t != unibrow::Utf8::kBufferEmpty) {
     313             :       DCHECK_LT(t, unibrow::Utf16::kMaxNonSurrogateCharCode);
     314         102 :       *cursor = static_cast<uc16>(t);
     315         102 :       buffer_end_++;
     316         102 :       current_.pos.chars++;
     317             :     }
     318         444 :     return;
     319             :   }
     320             : 
     321             :   unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =
     322       38875 :       current_.pos.incomplete_char;
     323             :   size_t it;
     324    18844426 :   for (it = current_.pos.bytes - chunk.start.bytes;
     325    18820995 :        it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) {
     326             :     unibrow::uchar t =
     327     9383338 :         unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);
     328     9383338 :     if (t == unibrow::Utf8::kIncomplete) continue;
     329     9381715 :     if (V8_LIKELY(t < kUtf8Bom)) {
     330     9381215 :       *(cursor++) = static_cast<uc16>(t);  // The by most frequent case.
     331         500 :     } else if (t == kUtf8Bom && current_.pos.bytes + it == 2) {
     332             :       // BOM detected at beginning of the stream. Don't copy it.
     333         476 :     } else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
     334         260 :       *(cursor++) = static_cast<uc16>(t);
     335             :     } else {
     336         216 :       *(cursor++) = unibrow::Utf16::LeadSurrogate(t);
     337         432 :       *(cursor++) = unibrow::Utf16::TrailSurrogate(t);
     338             :     }
     339             :   }
     340             : 
     341       38875 :   current_.pos.bytes = chunk.start.bytes + it;
     342       38875 :   current_.pos.chars += (cursor - buffer_end_);
     343       38875 :   current_.pos.incomplete_char = incomplete_char;
     344       38875 :   current_.chunk_no += (it == chunk.length);
     345             : 
     346       38875 :   buffer_end_ = cursor;
     347             : }
     348             : 
     349        1819 : bool Utf8ExternalStreamingStream::FetchChunk() {
     350        1819 :   RuntimeCallTimerScope scope(stats_, &RuntimeCallStats::GetMoreDataCallback);
     351             :   DCHECK_EQ(current_.chunk_no, chunks_.size());
     352             :   DCHECK(chunks_.empty() || chunks_.back().length != 0);
     353             : 
     354        1819 :   const uint8_t* chunk = nullptr;
     355        1819 :   size_t length = source_stream_->GetMoreData(&chunk);
     356        3638 :   chunks_.push_back({chunk, length, current_.pos});
     357        3638 :   return length > 0;
     358             : }
     359             : 
     360       39591 : void Utf8ExternalStreamingStream::SearchPosition(size_t position) {
     361             :   // If current_ already points to the right position, we're done.
     362             :   //
     363             :   // This is expected to be the common case, since we typically call
     364             :   // FillBuffer right after the current buffer.
     365       39591 :   if (current_.pos.chars == position) return;
     366             : 
     367             :   // No chunks. Fetch at least one, so we can assume !chunks_.empty() below.
     368       74640 :   if (chunks_.empty()) {
     369             :     DCHECK_EQ(current_.chunk_no, 0u);
     370             :     DCHECK_EQ(current_.pos.bytes, 0u);
     371             :     DCHECK_EQ(current_.pos.chars, 0u);
     372           0 :     FetchChunk();
     373             :   }
     374             : 
     375             :   // Search for the last chunk whose start position is less or equal to
     376             :   // position.
     377       37320 :   size_t chunk_no = chunks_.size() - 1;
     378      928932 :   while (chunk_no > 0 && chunks_[chunk_no].start.chars > position) {
     379      417825 :     chunk_no--;
     380             :   }
     381             : 
     382             :   // Did we find the terminating (zero-length) chunk? Then we're seeking
     383             :   // behind the end of the data, and position does not exist.
     384             :   // Set current_ to point to the terminating chunk.
     385       37320 :   if (chunks_[chunk_no].length == 0) {
     386         120 :     current_ = {chunk_no, chunks_[chunk_no].start};
     387         120 :     return;
     388             :   }
     389             : 
     390             :   // Did we find the non-last chunk? Then our position must be within chunk_no.
     391       37200 :   if (chunk_no + 1 < chunks_.size()) {
     392             :     // Fancy-pants optimization for ASCII chunks within a utf-8 stream.
     393             :     // (Many web sites declare utf-8 encoding, but use only (or almost only) the
     394             :     //  ASCII subset for their JavaScript sources. We can exploit this, by
     395             :     //  checking whether the # bytes in a chunk are equal to the # chars, and if
     396             :     //  so avoid the expensive SkipToPosition.)
     397             :     bool ascii_only_chunk =
     398       37200 :         chunks_[chunk_no].start.incomplete_char ==
     399       74376 :             unibrow::Utf8::Utf8IncrementalBuffer(0) &&
     400       37176 :         (chunks_[chunk_no + 1].start.bytes - chunks_[chunk_no].start.bytes) ==
     401       37176 :             (chunks_[chunk_no + 1].start.chars - chunks_[chunk_no].start.chars);
     402       37200 :     if (ascii_only_chunk) {
     403       37115 :       size_t skip = position - chunks_[chunk_no].start.chars;
     404             :       current_ = {chunk_no,
     405       37115 :                   {chunks_[chunk_no].start.bytes + skip,
     406       37115 :                    chunks_[chunk_no].start.chars + skip,
     407       37115 :                    unibrow::Utf8::Utf8IncrementalBuffer(0)}};
     408             :     } else {
     409          85 :       current_ = {chunk_no, chunks_[chunk_no].start};
     410          85 :       SkipToPosition(position);
     411             :     }
     412             : 
     413             :     // Since position was within the chunk, SkipToPosition should have found
     414             :     // something.
     415             :     DCHECK_EQ(position, current_.pos.chars);
     416             :     return;
     417             :   }
     418             : 
     419             :   // What's left: We're in the last, non-terminating chunk. Our position
     420             :   // may be in the chunk, but it may also be in 'future' chunks, which we'll
     421             :   // have to obtain.
     422             :   DCHECK_EQ(chunk_no, chunks_.size() - 1);
     423           0 :   current_ = {chunk_no, chunks_[chunk_no].start};
     424             :   bool have_more_data = true;
     425           0 :   bool found = SkipToPosition(position);
     426           0 :   while (have_more_data && !found) {
     427             :     DCHECK_EQ(current_.chunk_no, chunks_.size());
     428           0 :     have_more_data = FetchChunk();
     429           0 :     found = have_more_data && SkipToPosition(position);
     430             :   }
     431             : 
     432             :   // We'll return with a postion != the desired position only if we're out
     433             :   // of data. In that case, we'll point to the terminating chunk.
     434             :   DCHECK_EQ(found, current_.pos.chars == position);
     435             :   DCHECK_EQ(have_more_data, chunks_.back().length != 0);
     436             :   DCHECK_IMPLIES(!found, !have_more_data);
     437             :   DCHECK_IMPLIES(!found, current_.chunk_no == chunks_.size() - 1);
     438             : }
     439             : 
     440       39591 : size_t Utf8ExternalStreamingStream::FillBuffer(size_t position) {
     441       39591 :   buffer_cursor_ = buffer_;
     442       39591 :   buffer_end_ = buffer_;
     443             : 
     444       39591 :   SearchPosition(position);
     445      116770 :   bool out_of_data = current_.chunk_no != chunks_.size() &&
     446       77853 :                      chunks_[current_.chunk_no].length == 0 &&
     447         402 :                      current_.pos.incomplete_char == 0;
     448             : 
     449       39591 :   if (out_of_data) return 0;
     450             : 
     451             :   // Fill the buffer, until we have at least one char (or are out of data).
     452             :   // (The embedder might give us 1-byte blocks within a utf-8 char, so we
     453             :   //  can't guarantee progress with one chunk. Thus we iterate.)
     454       78550 :   while (!out_of_data && buffer_cursor_ == buffer_end_) {
     455             :     // At end of current data, but there might be more? Then fetch it.
     456       78638 :     if (current_.chunk_no == chunks_.size()) {
     457        1819 :       out_of_data = !FetchChunk();
     458             :     }
     459       39319 :     FillBufferFromCurrentChunk();
     460             :   }
     461             : 
     462             :   DCHECK_EQ(current_.pos.chars - position,
     463             :             static_cast<size_t>(buffer_end_ - buffer_cursor_));
     464       39231 :   return buffer_end_ - buffer_cursor_;
     465             : }
     466             : 
     467             : // ----------------------------------------------------------------------------
     468             : // Chunks - helper for One- + TwoByteExternalStreamingStream
     469             : namespace {
     470             : 
     471             : struct Chunk {
     472             :   const uint8_t* data;
     473             :   size_t byte_length;
     474             :   size_t byte_pos;
     475             : };
     476             : 
     477             : typedef std::vector<struct Chunk> Chunks;
     478             : 
     479        2109 : void DeleteChunks(Chunks& chunks) {
     480        3888 :   for (size_t i = 0; i < chunks.size(); i++) delete[] chunks[i].data;
     481         165 : }
     482             : 
     483             : // Return the chunk index for the chunk containing position.
     484             : // If position is behind the end of the stream, the index of the last,
     485             : // zero-length chunk is returned.
     486       82028 : size_t FindChunk(Chunks& chunks, ScriptCompiler::ExternalSourceStream* source,
     487             :                  size_t position, RuntimeCallStats* stats) {
     488             :   size_t end_pos =
     489       41014 :       chunks.empty() ? 0 : (chunks.back().byte_pos + chunks.back().byte_length);
     490             : 
     491             :   // Get more data if needed. We usually won't enter the loop body.
     492       41014 :   bool out_of_data = !chunks.empty() && chunks.back().byte_length == 0;
     493             :   {
     494       41014 :     RuntimeCallTimerScope scope(stats, &RuntimeCallStats::GetMoreDataCallback);
     495       83807 :     while (!out_of_data && end_pos <= position + 1) {
     496        1779 :       const uint8_t* chunk = nullptr;
     497        1779 :       size_t len = source->GetMoreData(&chunk);
     498             : 
     499        3558 :       chunks.push_back({chunk, len, end_pos});
     500        1779 :       end_pos += len;
     501        1779 :       out_of_data = (len == 0);
     502             :     }
     503             :   }
     504             : 
     505             :   // Here, we should always have at least one chunk, and we either have the
     506             :   // chunk we were looking for, or we're out of data. Also, out_of_data and
     507             :   // end_pos are current (and designate whether we have exhausted the stream,
     508             :   // and the length of data received so far, respectively).
     509             :   DCHECK(!chunks.empty());
     510             :   DCHECK_EQ(end_pos, chunks.back().byte_pos + chunks.back().byte_length);
     511             :   DCHECK_EQ(out_of_data, chunks.back().byte_length == 0);
     512             :   DCHECK(position < end_pos || out_of_data);
     513             : 
     514             :   // Edge case: position is behind the end of stream: Return the last (length 0)
     515             :   // chunk to indicate the end of the stream.
     516       41014 :   if (position >= end_pos) {
     517             :     DCHECK(out_of_data);
     518         592 :     return chunks.size() - 1;
     519             :   }
     520             : 
     521             :   // We almost always 'stream', meaning we want data from the last chunk, so
     522             :   // let's look at chunks back-to-front.
     523       40422 :   size_t chunk_no = chunks.size() - 1;
     524      527558 :   while (chunks[chunk_no].byte_pos > position) {
     525             :     DCHECK_NE(chunk_no, 0u);
     526      446714 :     chunk_no--;
     527             :   }
     528             :   DCHECK_LE(chunks[chunk_no].byte_pos, position);
     529             :   DCHECK_LT(position, chunks[chunk_no].byte_pos + chunks[chunk_no].byte_length);
     530             :   return chunk_no;
     531             : }
     532             : 
     533             : }  // anonymous namespace
     534             : 
     535             : // ----------------------------------------------------------------------------
     536             : // OneByteExternalStreamingStream
     537             : //
     538             : // A stream of latin-1 encoded, chunked data.
     539             : 
     540             : class OneByteExternalStreamingStream : public BufferedUtf16CharacterStream {
     541             :  public:
     542             :   explicit OneByteExternalStreamingStream(
     543             :       ScriptCompiler::ExternalSourceStream* source, RuntimeCallStats* stats)
     544         210 :       : source_(source), stats_(stats) {}
     545         315 :   ~OneByteExternalStreamingStream() override { DeleteChunks(chunks_); }
     546             : 
     547           9 :   bool can_access_heap() override { return false; }
     548             : 
     549             :  protected:
     550             :   size_t FillBuffer(size_t position) override;
     551             : 
     552             :  private:
     553             :   Chunks chunks_;
     554             :   ScriptCompiler::ExternalSourceStream* source_;
     555             :   RuntimeCallStats* stats_;
     556             : };
     557             : 
     558       38410 : size_t OneByteExternalStreamingStream::FillBuffer(size_t position) {
     559       38410 :   const Chunk& chunk = chunks_[FindChunk(chunks_, source_, position, stats_)];
     560       38410 :   if (chunk.byte_length == 0) return 0;
     561             : 
     562       38094 :   size_t start_pos = position - chunk.byte_pos;
     563       38094 :   size_t len = i::Min(kBufferSize, chunk.byte_length - start_pos);
     564       38094 :   i::CopyCharsUnsigned(buffer_, chunk.data + start_pos, len);
     565             :   return len;
     566             : }
     567             : 
     568             : #if !(V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64)
     569             : // ----------------------------------------------------------------------------
     570             : // TwoByteExternalStreamingStream
     571             : //
     572             : // A stream of ucs-2 data, delivered in chunks. Chunks may be 'cut' into the
     573             : // middle of characters (or even contain only one byte), which adds a bit
     574             : // of complexity. This stream avoid all data copying, except for characters
     575             : // that cross chunk boundaries.
     576             : 
     577             : class TwoByteExternalStreamingStream : public Utf16CharacterStream {
     578             :  public:
     579             :   explicit TwoByteExternalStreamingStream(
     580             :       ScriptCompiler::ExternalSourceStream* source, RuntimeCallStats* stats);
     581             :   ~TwoByteExternalStreamingStream() override;
     582             : 
     583           0 :   bool can_access_heap() override { return false; }
     584             : 
     585             :  protected:
     586             :   bool ReadBlock() override;
     587             : 
     588             :   Chunks chunks_;
     589             :   ScriptCompiler::ExternalSourceStream* source_;
     590             :   RuntimeCallStats* stats_;
     591             :   uc16 one_char_buffer_;
     592             : };
     593             : 
     594           0 : TwoByteExternalStreamingStream::TwoByteExternalStreamingStream(
     595             :     ScriptCompiler::ExternalSourceStream* source, RuntimeCallStats* stats)
     596             :     : Utf16CharacterStream(&one_char_buffer_, &one_char_buffer_,
     597             :                            &one_char_buffer_, 0),
     598             :       source_(source),
     599             :       stats_(stats),
     600         180 :       one_char_buffer_(0) {}
     601             : 
     602         120 : TwoByteExternalStreamingStream::~TwoByteExternalStreamingStream() {
     603          60 :   DeleteChunks(chunks_);
     604         120 : }
     605             : 
     606        2604 : bool TwoByteExternalStreamingStream::ReadBlock() {
     607             :   size_t position = pos();
     608             : 
     609             :   // We'll search for the 2nd byte of our character, to make sure we
     610             :   // have enough data for at least one character.
     611        2604 :   size_t chunk_no = FindChunk(chunks_, source_, 2 * position + 1, stats_);
     612             : 
     613             :   // Out of data? Return 0.
     614        2604 :   if (chunks_[chunk_no].byte_length == 0) {
     615         276 :     buffer_pos_ = position;
     616         276 :     buffer_cursor_ = buffer_start_;
     617         276 :     buffer_end_ = buffer_start_;
     618         276 :     return false;
     619             :   }
     620             : 
     621             :   Chunk& current = chunks_[chunk_no];
     622             : 
     623             :   // Annoying edge case: Chunks may not be 2-byte aligned, meaning that a
     624             :   // character may be split between the previous and the current chunk.
     625             :   // If we find such a lonely byte at the beginning of the chunk, we'll use
     626             :   // one_char_buffer_ to hold the full character.
     627        2328 :   bool lonely_byte = (chunks_[chunk_no].byte_pos == (2 * position + 1));
     628        2328 :   if (lonely_byte) {
     629             :     DCHECK_NE(chunk_no, 0u);
     630         768 :     Chunk& previous_chunk = chunks_[chunk_no - 1];
     631             : #ifdef V8_TARGET_BIG_ENDIAN
     632             :     uc16 character = current.data[0] |
     633             :                      previous_chunk.data[previous_chunk.byte_length - 1] << 8;
     634             : #else
     635         768 :     uc16 character = previous_chunk.data[previous_chunk.byte_length - 1] |
     636         768 :                      current.data[0] << 8;
     637             : #endif
     638             : 
     639         768 :     one_char_buffer_ = character;
     640         768 :     buffer_pos_ = position;
     641         768 :     buffer_start_ = &one_char_buffer_;
     642         768 :     buffer_cursor_ = &one_char_buffer_;
     643         768 :     buffer_end_ = &one_char_buffer_ + 1;
     644         768 :     return true;
     645             :   }
     646             : 
     647             :   // Common case: character is in current chunk.
     648             :   DCHECK_LE(current.byte_pos, 2 * position);
     649             :   DCHECK_LT(2 * position + 1, current.byte_pos + current.byte_length);
     650             : 
     651             :   // Determine # of full ucs-2 chars in stream, and whether we started on an odd
     652             :   // byte boundary.
     653        1560 :   bool odd_start = (current.byte_pos % 2) == 1;
     654        1560 :   size_t number_chars = (current.byte_length - odd_start) / 2;
     655             : 
     656             :   // Point the buffer_*_ members into the current chunk and set buffer_cursor_
     657             :   // to point to position. Be careful when converting the byte positions (in
     658             :   // Chunk) to the ucs-2 character positions (in buffer_*_ members).
     659        1560 :   buffer_start_ = reinterpret_cast<const uint16_t*>(current.data + odd_start);
     660        1560 :   buffer_end_ = buffer_start_ + number_chars;
     661        1560 :   buffer_pos_ = (current.byte_pos + odd_start) / 2;
     662        1560 :   buffer_cursor_ = buffer_start_ + (position - buffer_pos_);
     663             :   DCHECK_EQ(position, pos());
     664        1560 :   return true;
     665             : }
     666             : 
     667             : #else
     668             : 
     669             : // ----------------------------------------------------------------------------
     670             : // TwoByteExternalBufferedStream
     671             : //
     672             : // This class is made specifically to address unaligned access to 16-bit data
     673             : // in MIPS and ARM architectures. It replaces class
     674             : // TwoByteExternalStreamingStream which in some cases does have unaligned
     675             : // accesse to 16-bit data
     676             : 
     677             : class TwoByteExternalBufferedStream : public Utf16CharacterStream {
     678             :  public:
     679             :   explicit TwoByteExternalBufferedStream(
     680             :       ScriptCompiler::ExternalSourceStream* source, RuntimeCallStats* stats);
     681             :   ~TwoByteExternalBufferedStream();
     682             : 
     683             :   bool can_access_heap() override { return false; }
     684             : 
     685             :  protected:
     686             :   static const size_t kBufferSize = 512;
     687             : 
     688             :   bool ReadBlock() override;
     689             : 
     690             :   // FillBuffer should read up to kBufferSize characters at position and store
     691             :   // them into buffer_[0..]. It returns the number of characters stored.
     692             :   size_t FillBuffer(size_t position, size_t chunk_no);
     693             : 
     694             :   // Fixed sized buffer that this class reads from.
     695             :   // The base class' buffer_start_ should always point to buffer_.
     696             :   uc16 buffer_[kBufferSize];
     697             : 
     698             :   Chunks chunks_;
     699             :   ScriptCompiler::ExternalSourceStream* source_;
     700             :   RuntimeCallStats* stats_;
     701             : };
     702             : 
     703             : TwoByteExternalBufferedStream::TwoByteExternalBufferedStream(
     704             :     ScriptCompiler::ExternalSourceStream* source, RuntimeCallStats* stats)
     705             :     : Utf16CharacterStream(buffer_, buffer_, buffer_, 0),
     706             :       source_(source),
     707             :       stats_(stats) {}
     708             : 
     709             : TwoByteExternalBufferedStream::~TwoByteExternalBufferedStream() {
     710             :   DeleteChunks(chunks_);
     711             : }
     712             : 
     713             : bool TwoByteExternalBufferedStream::ReadBlock() {
     714             :   size_t position = pos();
     715             :   // Find chunk in which the position belongs
     716             :   size_t chunk_no = FindChunk(chunks_, source_, 2 * position + 1, stats_);
     717             : 
     718             :   // Out of data? Return 0.
     719             :   if (chunks_[chunk_no].byte_length == 0) {
     720             :     buffer_pos_ = position;
     721             :     buffer_cursor_ = buffer_start_;
     722             :     buffer_end_ = buffer_start_;
     723             :     return false;
     724             :   }
     725             : 
     726             :   Chunk& current = chunks_[chunk_no];
     727             : 
     728             :   bool odd_start = current.byte_pos % 2;
     729             :   // Common case: character is in current chunk.
     730             :   DCHECK_LE(current.byte_pos, 2 * position + odd_start);
     731             :   DCHECK_LT(2 * position + 1, current.byte_pos + current.byte_length);
     732             : 
     733             :   // If character starts on odd address copy text in buffer so there is always
     734             :   // aligned access to characters. This is important on MIPS and ARM
     735             :   // architectures. Otherwise read characters from memory directly.
     736             :   if (!odd_start) {
     737             :     buffer_start_ = reinterpret_cast<const uint16_t*>(current.data);
     738             :     size_t number_chars = current.byte_length / 2;
     739             :     buffer_end_ = buffer_start_ + number_chars;
     740             :     buffer_pos_ = current.byte_pos / 2;
     741             :     buffer_cursor_ = buffer_start_ + (position - buffer_pos_);
     742             :     DCHECK_EQ(position, pos());
     743             :     return true;
     744             :   } else {
     745             :     buffer_start_ = buffer_;
     746             :     buffer_pos_ = position;
     747             :     buffer_cursor_ = buffer_;
     748             :     buffer_end_ = buffer_ + FillBuffer(position, chunk_no);
     749             :     DCHECK_EQ(pos(), position);
     750             :     DCHECK_LE(buffer_end_, buffer_start_ + kBufferSize);
     751             :     return buffer_cursor_ < buffer_end_;
     752             :   }
     753             : }
     754             : 
     755             : size_t TwoByteExternalBufferedStream::FillBuffer(size_t position,
     756             :                                                  size_t chunk_no) {
     757             :   DCHECK_EQ(chunks_[chunk_no].byte_pos % 2, 1u);
     758             :   bool odd_start = true;
     759             :   // Align buffer_pos_ to the size of the buffer.
     760             :   {
     761             :     size_t new_pos = position / kBufferSize * kBufferSize;
     762             :     if (new_pos != position) {
     763             :       chunk_no = FindChunk(chunks_, source_, 2 * new_pos + 1, stats_);
     764             :       buffer_pos_ = new_pos;
     765             :       buffer_cursor_ = buffer_start_ + (position - buffer_pos_);
     766             :       position = new_pos;
     767             :       odd_start = chunks_[chunk_no].byte_pos % 2;
     768             :     }
     769             :   }
     770             : 
     771             :   Chunk* current = &chunks_[chunk_no];
     772             : 
     773             :   // Annoying edge case: Chunks may not be 2-byte aligned, meaning that a
     774             :   // character may be split between the previous and the current chunk.
     775             :   // If we find such a lonely byte at the beginning of the chunk, we'll copy
     776             :   // it to the first byte in buffer_.
     777             :   size_t totalLength = 0;
     778             :   bool lonely_byte = (current->byte_pos == (2 * position + 1));
     779             :   if (lonely_byte) {
     780             :     DCHECK_NE(chunk_no, 0u);
     781             :     Chunk& previous_chunk = chunks_[chunk_no - 1];
     782             :     *reinterpret_cast<uint8_t*>(buffer_) =
     783             :         previous_chunk.data[previous_chunk.byte_length - 1];
     784             :     totalLength++;
     785             :   }
     786             : 
     787             :   // Common case: character is in current chunk.
     788             :   DCHECK_LE(current->byte_pos, 2 * position + odd_start);
     789             :   DCHECK_LT(2 * position + 1, current->byte_pos + current->byte_length);
     790             : 
     791             :   // Copy characters from current chunk starting from chunk_pos to the end of
     792             :   // buffer or chunk.
     793             :   size_t chunk_pos = position - current->byte_pos / 2;
     794             :   size_t start_offset = odd_start && chunk_pos != 0;
     795             :   size_t bytes_to_move =
     796             :       i::Min(2 * kBufferSize - lonely_byte,
     797             :              current->byte_length - 2 * chunk_pos + start_offset);
     798             :   i::MemMove(reinterpret_cast<uint8_t*>(buffer_) + lonely_byte,
     799             :              current->data + 2 * chunk_pos - start_offset, bytes_to_move);
     800             : 
     801             :   // Fill up the rest of the buffer if there is space and data left.
     802             :   totalLength += bytes_to_move;
     803             :   position = (current->byte_pos + current->byte_length) / 2;
     804             :   if (position - buffer_pos_ < kBufferSize) {
     805             :     chunk_no = FindChunk(chunks_, source_, 2 * position + 1, stats_);
     806             :     current = &chunks_[chunk_no];
     807             :     odd_start = current->byte_pos % 2;
     808             :     bytes_to_move = i::Min(2 * kBufferSize - totalLength, current->byte_length);
     809             :     while (bytes_to_move) {
     810             :       // Common case: character is in current chunk.
     811             :       DCHECK_LE(current->byte_pos, 2 * position + odd_start);
     812             :       DCHECK_LT(2 * position + 1, current->byte_pos + current->byte_length);
     813             : 
     814             :       i::MemMove(reinterpret_cast<uint8_t*>(buffer_) + totalLength,
     815             :                  current->data, bytes_to_move);
     816             :       totalLength += bytes_to_move;
     817             :       position = (current->byte_pos + current->byte_length) / 2;
     818             :       chunk_no = FindChunk(chunks_, source_, 2 * position + 1, stats_);
     819             :       current = &chunks_[chunk_no];
     820             :       odd_start = current->byte_pos % 2;
     821             :       bytes_to_move =
     822             :           i::Min(2 * kBufferSize - totalLength, current->byte_length);
     823             :     }
     824             :   }
     825             :   return totalLength / 2;
     826             : }
     827             : #endif
     828             : 
     829             : // ----------------------------------------------------------------------------
     830             : // ScannerStream: Create stream instances.
     831             : 
     832     2114694 : Utf16CharacterStream* ScannerStream::For(Handle<String> data) {
     833     2114694 :   return ScannerStream::For(data, 0, data->length());
     834             : }
     835             : 
     836     2784390 : Utf16CharacterStream* ScannerStream::For(Handle<String> data, int start_pos,
     837             :                                          int end_pos) {
     838             :   DCHECK_GE(start_pos, 0);
     839             :   DCHECK_LE(start_pos, end_pos);
     840             :   DCHECK_LE(end_pos, data->length());
     841     2784390 :   if (data->IsExternalOneByteString()) {
     842             :     return new ExternalOneByteStringUtf16CharacterStream(
     843             :         Handle<ExternalOneByteString>::cast(data),
     844      103256 :         static_cast<size_t>(start_pos), static_cast<size_t>(end_pos));
     845     2681134 :   } else if (data->IsExternalTwoByteString()) {
     846             :     return new ExternalTwoByteStringUtf16CharacterStream(
     847             :         Handle<ExternalTwoByteString>::cast(data),
     848          62 :         static_cast<size_t>(start_pos), static_cast<size_t>(end_pos));
     849             :   } else {
     850             :     // TODO(vogelheim): Maybe call data.Flatten() first?
     851             :     return new GenericStringUtf16CharacterStream(
     852     2681072 :         data, static_cast<size_t>(start_pos), static_cast<size_t>(end_pos));
     853             :   }
     854             : }
     855             : 
     856         480 : std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
     857             :     const char* data) {
     858         480 :   return ScannerStream::ForTesting(data, strlen(data));
     859             : }
     860             : 
     861        1715 : std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
     862             :     const char* data, size_t length) {
     863             :   return std::unique_ptr<Utf16CharacterStream>(
     864        3430 :       new ExternalOneByteStringUtf16CharacterStream(data, length));
     865             : }
     866             : 
     867         584 : Utf16CharacterStream* ScannerStream::For(
     868             :     ScriptCompiler::ExternalSourceStream* source_stream,
     869             :     v8::ScriptCompiler::StreamedSource::Encoding encoding,
     870             :     RuntimeCallStats* stats) {
     871         584 :   switch (encoding) {
     872             :     case v8::ScriptCompiler::StreamedSource::TWO_BYTE:
     873             : #if !(V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64)
     874         120 :       return new TwoByteExternalStreamingStream(source_stream, stats);
     875             : #else
     876             :       return new TwoByteExternalBufferedStream(source_stream, stats);
     877             : #endif
     878             :     case v8::ScriptCompiler::StreamedSource::ONE_BYTE:
     879         210 :       return new OneByteExternalStreamingStream(source_stream, stats);
     880             :     case v8::ScriptCompiler::StreamedSource::UTF8:
     881         838 :       return new Utf8ExternalStreamingStream(source_stream, stats);
     882             :   }
     883           0 :   UNREACHABLE();
     884             :   return nullptr;
     885             : }
     886             : 
     887             : }  // namespace internal
     888             : }  // namespace v8

Generated by: LCOV version 1.10