LCOV - code coverage report
Current view: top level - test/cctest/parsing - test-scanner-streams.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 355 357 99.4 %
Date: 2019-01-20 Functions: 42 50 84.0 %

          Line data    Source code
       1             : // Copyright 2016 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/heap/factory-inl.h"
       6             : #include "src/objects-inl.h"
       7             : #include "src/parsing/scanner-character-streams.h"
       8             : #include "src/parsing/scanner.h"
       9             : #include "test/cctest/cctest.h"
      10             : 
      11             : namespace {
      12             : 
      13             : // Implement ExternalSourceStream based on const char**.
      14             : // This will take each string as one chunk. The last chunk must be empty.
      15             : class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
      16             :  public:
      17         735 :   explicit ChunkSource(const char** chunks) : current_(0) {
      18         785 :     do {
      19             :       chunks_.push_back(
      20        1570 :           {reinterpret_cast<const uint8_t*>(*chunks), strlen(*chunks)});
      21         785 :       chunks++;
      22         785 :     } while (chunks_.back().len > 0);
      23         245 :   }
      24         210 :   explicit ChunkSource(const char* chunks) : current_(0) {
      25         175 :     do {
      26             :       chunks_.push_back(
      27         350 :           {reinterpret_cast<const uint8_t*>(chunks), strlen(chunks)});
      28         175 :       chunks += strlen(chunks) + 1;
      29         175 :     } while (chunks_.back().len > 0);
      30          70 :   }
      31         175 :   ChunkSource(const uint8_t* data, size_t char_size, size_t len,
      32             :               bool extra_chunky)
      33         350 :       : current_(0) {
      34             :     // If extra_chunky, we'll use increasingly large chunk sizes.  If not, we'll
      35             :     // have a single chunk of full length. Make sure that chunks are always
      36             :     // aligned to char-size though.
      37         175 :     size_t chunk_size = extra_chunky ? char_size : len;
      38        1755 :     for (size_t i = 0; i < len; i += chunk_size, chunk_size += char_size) {
      39        4740 :       chunks_.push_back({data + i, i::Min(chunk_size, len - i)});
      40             :     }
      41         350 :     chunks_.push_back({nullptr, 0});
      42         175 :   }
      43         490 :   ~ChunkSource() override = default;
      44           0 :   bool SetBookmark() override { return false; }
      45           0 :   void ResetToBookmark() override {}
      46        2610 :   size_t GetMoreData(const uint8_t** src) override {
      47             :     DCHECK_LT(current_, chunks_.size());
      48        2610 :     Chunk& next = chunks_[current_++];
      49        2610 :     uint8_t* chunk = new uint8_t[next.len];
      50        2610 :     i::MemMove(chunk, next.ptr, next.len);
      51        2610 :     *src = chunk;
      52        2610 :     return next.len;
      53             :   }
      54             : 
      55             :  private:
      56             :   struct Chunk {
      57             :     const uint8_t* ptr;
      58             :     size_t len;
      59             :   };
      60             :   std::vector<Chunk> chunks_;
      61             :   size_t current_;
      62             : };
      63             : 
      64             : // Checks that Lock() / Unlock() pairs are balanced. Not thread-safe.
      65             : class LockChecker {
      66             :  public:
      67          70 :   LockChecker() : lock_depth_(0) {}
      68          70 :   ~LockChecker() { CHECK_EQ(0, lock_depth_); }
      69             : 
      70         110 :   void Lock() const { lock_depth_++; }
      71             : 
      72         110 :   void Unlock() const {
      73         110 :     CHECK_GT(lock_depth_, 0);
      74         110 :     lock_depth_--;
      75         110 :   }
      76             : 
      77          85 :   bool IsLocked() const { return lock_depth_ > 0; }
      78             : 
      79          10 :   int LockDepth() const { return lock_depth_; }
      80             : 
      81             :  protected:
      82             :   mutable int lock_depth_;
      83             : };
      84             : 
      85          35 : class TestExternalResource : public v8::String::ExternalStringResource,
      86             :                              public LockChecker {
      87             :  public:
      88             :   explicit TestExternalResource(uint16_t* data, int length)
      89          35 :       : LockChecker(), data_(data), length_(static_cast<size_t>(length)) {}
      90             : 
      91          50 :   const uint16_t* data() const override {
      92          50 :     CHECK(IsLocked());
      93          50 :     return data_;
      94             :   }
      95             : 
      96          65 :   size_t length() const override { return length_; }
      97             : 
      98          30 :   bool IsCacheable() const override { return false; }
      99         150 :   void Lock() const override { LockChecker::Lock(); }
     100          75 :   void Unlock() const override { LockChecker::Unlock(); }
     101             : 
     102             :  private:
     103             :   uint16_t* data_;
     104             :   size_t length_;
     105             : };
     106             : 
     107          35 : class TestExternalOneByteResource
     108             :     : public v8::String::ExternalOneByteStringResource,
     109             :       public LockChecker {
     110             :  public:
     111             :   TestExternalOneByteResource(const char* data, size_t length)
     112          35 :       : data_(data), length_(length) {}
     113             : 
     114          30 :   const char* data() const override {
     115          30 :     CHECK(IsLocked());
     116          30 :     return data_;
     117             :   }
     118          65 :   size_t length() const override { return length_; }
     119             : 
     120          30 :   bool IsCacheable() const override { return false; }
     121          70 :   void Lock() const override { LockChecker::Lock(); }
     122          35 :   void Unlock() const override { LockChecker::Unlock(); }
     123             : 
     124             :  private:
     125             :   const char* data_;
     126             :   size_t length_;
     127             : };
     128             : 
     129             : // A test string with all lengths of utf-8 encodings.
     130             : const char unicode_utf8[] =
     131             :     "abc"               // 3x ascii
     132             :     "\xc3\xa4"          // a Umlaut, code point 228
     133             :     "\xe2\xa8\xa0"      // >> (math symbol), code point 10784
     134             :     "\xf0\x9f\x92\xa9"  // best character, code point 128169,
     135             :                         //     as utf-16 surrogates: 55357 56489
     136             :     "def";              // 3x ascii again.
     137             : const uint16_t unicode_ucs2[] = {97,    98,  99,  228, 10784, 55357,
     138             :                                  56489, 100, 101, 102, 0};
     139             : 
     140          35 : i::Handle<i::String> NewExternalTwoByteStringFromResource(
     141             :     i::Isolate* isolate, TestExternalResource* resource) {
     142             :   i::Factory* factory = isolate->factory();
     143             :   // String creation accesses the resource.
     144          35 :   resource->Lock();
     145             :   i::Handle<i::String> uc16_string(
     146          70 :       factory->NewExternalStringFromTwoByte(resource).ToHandleChecked());
     147          35 :   resource->Unlock();
     148          35 :   return uc16_string;
     149             : }
     150             : 
     151             : }  // anonymous namespace
     152             : 
     153       28342 : TEST(Utf8StreamAsciiOnly) {
     154           5 :   const char* chunks[] = {"abc", "def", "ghi", ""};
     155           5 :   ChunkSource chunk_source(chunks);
     156             :   std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     157             :       v8::internal::ScannerStream::For(
     158           5 :           &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     159             : 
     160             :   // Read the data without dying.
     161             :   v8::internal::uc32 c;
     162          50 :   do {
     163             :     c = stream->Advance();
     164             :   } while (c != v8::internal::Utf16CharacterStream::kEndOfInput);
     165           5 : }
     166             : 
     167       28342 : TEST(Utf8StreamBOM) {
     168             :   // Construct test string w/ UTF-8 BOM (byte order mark)
     169           5 :   char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"};
     170             :   strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8));
     171             : 
     172           5 :   const char* chunks[] = {data, "\0"};
     173           5 :   ChunkSource chunk_source(chunks);
     174             :   std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     175             :       v8::internal::ScannerStream::For(
     176           5 :           &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     177             : 
     178             :   // Read the data without tripping over the BOM.
     179          55 :   for (size_t i = 0; unicode_ucs2[i]; i++) {
     180          50 :     CHECK_EQ(unicode_ucs2[i], stream->Advance());
     181             :   }
     182           5 :   CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance());
     183             : 
     184             :   // Make sure seek works.
     185           5 :   stream->Seek(0);
     186           5 :   CHECK_EQ(unicode_ucs2[0], stream->Advance());
     187             : 
     188           5 :   stream->Seek(5);
     189           5 :   CHECK_EQ(unicode_ucs2[5], stream->Advance());
     190             : 
     191             :   // Try again, but make sure we have to seek 'backwards'.
     192          25 :   while (v8::internal::Utf16CharacterStream::kEndOfInput != stream->Advance()) {
     193             :     // Do nothing. We merely advance the stream to the end of its input.
     194             :   }
     195           5 :   stream->Seek(5);
     196           5 :   CHECK_EQ(unicode_ucs2[5], stream->Advance());
     197           5 : }
     198             : 
     199       28342 : TEST(Utf8SplitBOM) {
     200             :   // Construct chunks with a BOM split into two chunks.
     201           5 :   char partial_bom[] = "\xef\xbb";
     202           5 :   char data[1 + arraysize(unicode_utf8)] = {"\xbf"};
     203             :   strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8));
     204             : 
     205             :   {
     206           5 :     const char* chunks[] = {partial_bom, data, "\0"};
     207           5 :     ChunkSource chunk_source(chunks);
     208             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     209             :         v8::internal::ScannerStream::For(
     210           5 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     211             : 
     212             :     // Read the data without tripping over the BOM.
     213          55 :     for (size_t i = 0; unicode_ucs2[i]; i++) {
     214          50 :       CHECK_EQ(unicode_ucs2[i], stream->Advance());
     215             :     }
     216             :   }
     217             : 
     218             :   // And now with single-byte BOM chunks.
     219           5 :   char bom_byte_1[] = "\xef";
     220           5 :   char bom_byte_2[] = "\xbb";
     221             :   {
     222           5 :     const char* chunks[] = {bom_byte_1, bom_byte_2, data, "\0"};
     223           5 :     ChunkSource chunk_source(chunks);
     224             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     225             :         v8::internal::ScannerStream::For(
     226           5 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     227             : 
     228             :     // Read the data without tripping over the BOM.
     229          55 :     for (size_t i = 0; unicode_ucs2[i]; i++) {
     230          50 :       CHECK_EQ(unicode_ucs2[i], stream->Advance());
     231             :     }
     232             :   }
     233           5 : }
     234             : 
     235       28342 : TEST(Utf8SplitMultiBOM) {
     236             :   // Construct chunks with a split BOM followed by another split BOM.
     237             :   const char* chunks = "\xef\xbb\0\xbf\xef\xbb\0\xbf\0\0";
     238           5 :   ChunkSource chunk_source(chunks);
     239             :   std::unique_ptr<i::Utf16CharacterStream> stream(
     240             :       v8::internal::ScannerStream::For(
     241           5 :           &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     242             : 
     243             :   // Read the data, ensuring we get exactly one of the two BOMs back.
     244           5 :   CHECK_EQ(0xFEFF, stream->Advance());
     245           5 :   CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     246           5 : }
     247             : 
     248       28342 : TEST(Utf8AdvanceUntil) {
     249             :   // Test utf-8 advancing until a certain char.
     250             : 
     251             :   const char line_term = '\n';
     252             :   const size_t kLen = arraysize(unicode_utf8);
     253             :   char data[kLen + 1];
     254             :   strncpy(data, unicode_utf8, kLen);
     255           5 :   data[kLen - 1] = line_term;
     256           5 :   data[kLen] = '\0';
     257             : 
     258             :   {
     259           5 :     const char* chunks[] = {data, "\0"};
     260           5 :     ChunkSource chunk_source(chunks);
     261             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     262             :         v8::internal::ScannerStream::For(
     263           5 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     264             : 
     265             :     int32_t res = stream->AdvanceUntil(
     266         110 :         [](int32_t c0_) { return unibrow::IsLineTerminator(c0_); });
     267           5 :     CHECK_EQ(line_term, res);
     268             :   }
     269           5 : }
     270             : 
     271       28342 : TEST(AdvanceMatchAdvanceUntil) {
     272             :   // Test if single advance and advanceUntil behave the same
     273             : 
     274           5 :   char data[] = {'a', 'b', '\n', 'c', '\0'};
     275             : 
     276             :   {
     277           5 :     const char* chunks[] = {data, "\0"};
     278           5 :     ChunkSource chunk_source_a(chunks);
     279             : 
     280             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream_advance(
     281             :         v8::internal::ScannerStream::For(
     282           5 :             &chunk_source_a, v8::ScriptCompiler::StreamedSource::UTF8));
     283             : 
     284           5 :     ChunkSource chunk_source_au(chunks);
     285             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream_advance_until(
     286             :         v8::internal::ScannerStream::For(
     287           5 :             &chunk_source_au, v8::ScriptCompiler::StreamedSource::UTF8));
     288             : 
     289             :     int32_t au_c0_ = stream_advance_until->AdvanceUntil(
     290          30 :         [](int32_t c0_) { return unibrow::IsLineTerminator(c0_); });
     291             : 
     292             :     int32_t a_c0_ = '0';
     293          45 :     while (!unibrow::IsLineTerminator(a_c0_)) {
     294             :       a_c0_ = stream_advance->Advance();
     295             :     }
     296             : 
     297             :     // Check both advances methods have the same output
     298           5 :     CHECK_EQ(a_c0_, au_c0_);
     299             : 
     300             :     // Check if both set the cursor to the correct position by advancing both
     301             :     // streams by one character.
     302             :     a_c0_ = stream_advance->Advance();
     303             :     au_c0_ = stream_advance_until->Advance();
     304           5 :     CHECK_EQ(a_c0_, au_c0_);
     305             :   }
     306           5 : }
     307             : 
     308       28342 : TEST(Utf8AdvanceUntilOverChunkBoundaries) {
     309             :   // Test utf-8 advancing until a certain char, crossing chunk boundaries.
     310             : 
     311             :   // Split the test string at each byte and pass it to the stream. This way,
     312             :   // we'll have a split at each possible boundary.
     313             :   size_t len = strlen(unicode_utf8);
     314             :   char buffer[arraysize(unicode_utf8) + 4];
     315          80 :   for (size_t i = 1; i < len; i++) {
     316             :     // Copy source string into buffer, splitting it at i.
     317             :     // Then add three chunks, 0..i-1, i..strlen-1, empty.
     318             :     strncpy(buffer, unicode_utf8, i);
     319          70 :     strncpy(buffer + i + 1, unicode_utf8 + i, len - i);
     320          70 :     buffer[i] = '\0';
     321          70 :     buffer[len + 1] = '\n';
     322          70 :     buffer[len + 2] = '\0';
     323          70 :     buffer[len + 3] = '\0';
     324          70 :     const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2};
     325             : 
     326          70 :     ChunkSource chunk_source(chunks);
     327             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     328             :         v8::internal::ScannerStream::For(
     329          70 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     330             : 
     331             :     int32_t res = stream->AdvanceUntil(
     332        1540 :         [](int32_t c0_) { return unibrow::IsLineTerminator(c0_); });
     333         140 :     CHECK_EQ(buffer[len + 1], res);
     334             :   }
     335           5 : }
     336             : 
     337       28342 : TEST(Utf8ChunkBoundaries) {
     338             :   // Test utf-8 parsing at chunk boundaries.
     339             : 
     340             :   // Split the test string at each byte and pass it to the stream. This way,
     341             :   // we'll have a split at each possible boundary.
     342             :   size_t len = strlen(unicode_utf8);
     343             :   char buffer[arraysize(unicode_utf8) + 3];
     344          80 :   for (size_t i = 1; i < len; i++) {
     345             :     // Copy source string into buffer, splitting it at i.
     346             :     // Then add three chunks, 0..i-1, i..strlen-1, empty.
     347             :     strncpy(buffer, unicode_utf8, i);
     348          70 :     strncpy(buffer + i + 1, unicode_utf8 + i, len - i);
     349          70 :     buffer[i] = '\0';
     350          70 :     buffer[len + 1] = '\0';
     351          70 :     buffer[len + 2] = '\0';
     352          70 :     const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2};
     353             : 
     354          70 :     ChunkSource chunk_source(chunks);
     355             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     356             :         v8::internal::ScannerStream::For(
     357          70 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     358             : 
     359         770 :     for (size_t i = 0; unicode_ucs2[i]; i++) {
     360         700 :       CHECK_EQ(unicode_ucs2[i], stream->Advance());
     361             :     }
     362          70 :     CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
     363             :              stream->Advance());
     364             :   }
     365           5 : }
     366             : 
     367       28342 : TEST(Utf8SingleByteChunks) {
     368             :   // Have each byte as a single-byte chunk.
     369             :   size_t len = strlen(unicode_utf8);
     370             :   char buffer[arraysize(unicode_utf8) + 4];
     371          75 :   for (size_t i = 1; i < len - 1; i++) {
     372             :     // Copy source string into buffer, make a single-byte chunk at i.
     373             :     strncpy(buffer, unicode_utf8, i);
     374          65 :     strncpy(buffer + i + 3, unicode_utf8 + i + 1, len - i - 1);
     375          65 :     buffer[i] = '\0';
     376          65 :     buffer[i + 1] = unicode_utf8[i];
     377          65 :     buffer[i + 2] = '\0';
     378          65 :     buffer[len + 2] = '\0';
     379          65 :     buffer[len + 3] = '\0';
     380             :     const char* chunks[] = {buffer, buffer + i + 1, buffer + i + 3,
     381          65 :                             buffer + len + 3};
     382             : 
     383          65 :     ChunkSource chunk_source(chunks);
     384             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     385             :         v8::internal::ScannerStream::For(
     386          65 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     387             : 
     388         715 :     for (size_t j = 0; unicode_ucs2[j]; j++) {
     389         650 :       CHECK_EQ(unicode_ucs2[j], stream->Advance());
     390             :     }
     391          65 :     CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
     392             :              stream->Advance());
     393             :   }
     394           5 : }
     395             : 
     396             : #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
     397             : 
     398         290 : void TestCharacterStream(const char* reference, i::Utf16CharacterStream* stream,
     399             :                          unsigned length, unsigned start, unsigned end) {
     400             :   // Read streams one char at a time
     401             :   unsigned i;
     402      260520 :   for (i = start; i < end; i++) {
     403      260230 :     CHECK_EQU(i, stream->pos());
     404      260230 :     CHECK_EQU(reference[i], stream->Advance());
     405             :   }
     406         290 :   CHECK_EQU(end, stream->pos());
     407         290 :   CHECK_EQU(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     408         290 :   CHECK_EQU(end + 1, stream->pos());
     409         290 :   stream->Back();
     410             : 
     411             :   // Pushback, re-read, pushback again.
     412         290 :   while (i > end / 4) {
     413      203920 :     int32_t c0 = reference[i - 1];
     414      203920 :     CHECK_EQU(i, stream->pos());
     415      203920 :     stream->Back();
     416             :     i--;
     417      203920 :     CHECK_EQU(i, stream->pos());
     418             :     int32_t c1 = stream->Advance();
     419             :     i++;
     420      203920 :     CHECK_EQU(i, stream->pos());
     421      203920 :     CHECK_EQ(c0, c1);
     422      203920 :     stream->Back();
     423             :     i--;
     424      203920 :     CHECK_EQU(i, stream->pos());
     425             :   }
     426             : 
     427             :   // Seek + read streams one char at a time.
     428         290 :   unsigned halfway = end / 2;
     429         290 :   stream->Seek(stream->pos() + halfway - i);
     430      136270 :   for (i = halfway; i < end; i++) {
     431      135980 :     CHECK_EQU(i, stream->pos());
     432      135980 :     CHECK_EQU(reference[i], stream->Advance());
     433             :   }
     434         290 :   CHECK_EQU(i, stream->pos());
     435         290 :   CHECK_LT(stream->Advance(), 0);
     436             : 
     437             :   // Seek back, then seek beyond end of stream.
     438         290 :   stream->Seek(start);
     439         290 :   if (start < length) {
     440         480 :     CHECK_EQU(stream->Advance(), reference[start]);
     441             :   } else {
     442          50 :     CHECK_LT(stream->Advance(), 0);
     443             :   }
     444         290 :   stream->Seek(length + 5);
     445         290 :   CHECK_LT(stream->Advance(), 0);
     446         290 : }
     447             : 
     448          10 : void TestCloneCharacterStream(const char* reference,
     449             :                               i::Utf16CharacterStream* stream,
     450             :                               unsigned length) {
     451          10 :   std::unique_ptr<i::Utf16CharacterStream> clone = stream->Clone();
     452             : 
     453             :   unsigned i;
     454          10 :   unsigned halfway = length / 2;
     455             :   // Advance original half way.
     456          50 :   for (i = 0; i < halfway; i++) {
     457          40 :     CHECK_EQU(i, stream->pos());
     458          40 :     CHECK_EQU(reference[i], stream->Advance());
     459             :   }
     460             : 
     461             :   // Test advancing original stream didn't affect the clone.
     462          10 :   TestCharacterStream(reference, clone.get(), length, 0, length);
     463             : 
     464             :   // Test advancing clone didn't affect original stream.
     465          10 :   TestCharacterStream(reference, stream, length, i, length);
     466          10 : }
     467             : 
     468             : #undef CHECK_EQU
     469             : 
     470          30 : void TestCharacterStreams(const char* one_byte_source, unsigned length,
     471             :                           unsigned start = 0, unsigned end = 0) {
     472          30 :   if (end == 0) end = length;
     473             : 
     474             :   i::Isolate* isolate = CcTest::i_isolate();
     475             :   i::Factory* factory = isolate->factory();
     476             : 
     477             :   // 2-byte external string
     478          30 :   std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]);
     479             :   i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(),
     480          30 :                                            static_cast<int>(length));
     481             :   {
     482       41075 :     for (unsigned i = 0; i < length; i++) {
     483       82090 :       uc16_buffer[i] = static_cast<i::uc16>(one_byte_source[i]);
     484             :     }
     485             :     TestExternalResource resource(uc16_buffer.get(), length);
     486             :     i::Handle<i::String> uc16_string(
     487          30 :         NewExternalTwoByteStringFromResource(isolate, &resource));
     488             :     std::unique_ptr<i::Utf16CharacterStream> uc16_stream(
     489          30 :         i::ScannerStream::For(isolate, uc16_string, start, end));
     490          30 :     TestCharacterStream(one_byte_source, uc16_stream.get(), length, start, end);
     491             : 
     492             :     // This avoids the GC from trying to free a stack allocated resource.
     493          60 :     if (uc16_string->IsExternalString())
     494             :       i::Handle<i::ExternalTwoByteString>::cast(uc16_string)
     495          50 :           ->SetResource(isolate, nullptr);
     496             :   }
     497             : 
     498             :   // 1-byte external string
     499             :   i::Vector<const uint8_t> one_byte_vector =
     500             :       i::OneByteVector(one_byte_source, static_cast<int>(length));
     501             :   i::Handle<i::String> one_byte_string =
     502          60 :       factory->NewStringFromOneByte(one_byte_vector).ToHandleChecked();
     503             :   {
     504             :     TestExternalOneByteResource one_byte_resource(one_byte_source, length);
     505             :     i::Handle<i::String> ext_one_byte_string(
     506             :         factory->NewExternalStringFromOneByte(&one_byte_resource)
     507          60 :             .ToHandleChecked());
     508             :     std::unique_ptr<i::Utf16CharacterStream> one_byte_stream(
     509          30 :         i::ScannerStream::For(isolate, ext_one_byte_string, start, end));
     510             :     TestCharacterStream(one_byte_source, one_byte_stream.get(), length, start,
     511          30 :                         end);
     512             :     // This avoids the GC from trying to free a stack allocated resource.
     513          60 :     if (ext_one_byte_string->IsExternalString())
     514             :       i::Handle<i::ExternalOneByteString>::cast(ext_one_byte_string)
     515          50 :           ->SetResource(isolate, nullptr);
     516             :   }
     517             : 
     518             :   // 1-byte generic i::String
     519             :   {
     520             :     std::unique_ptr<i::Utf16CharacterStream> string_stream(
     521          30 :         i::ScannerStream::For(isolate, one_byte_string, start, end));
     522             :     TestCharacterStream(one_byte_source, string_stream.get(), length, start,
     523          30 :                         end);
     524             :   }
     525             : 
     526             :   // 2-byte generic i::String
     527             :   {
     528             :     i::Handle<i::String> two_byte_string =
     529          60 :         factory->NewStringFromTwoByte(two_byte_vector).ToHandleChecked();
     530             :     std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
     531          30 :         i::ScannerStream::For(isolate, two_byte_string, start, end));
     532             :     TestCharacterStream(one_byte_source, two_byte_string_stream.get(), length,
     533          30 :                         start, end);
     534             :   }
     535             : 
     536             :   // Streaming has no notion of start/end, so let's skip streaming tests for
     537             :   // these cases.
     538          60 :   if (start != 0 || end != length) return;
     539             : 
     540             :   // 1-byte streaming stream, single + many chunks.
     541             :   {
     542             :     const uint8_t* data = one_byte_vector.begin();
     543             :     const uint8_t* data_end = one_byte_vector.end();
     544             : 
     545          25 :     ChunkSource single_chunk(data, 1, data_end - data, false);
     546             :     std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(
     547             :         i::ScannerStream::For(&single_chunk,
     548          25 :                               v8::ScriptCompiler::StreamedSource::ONE_BYTE));
     549             :     TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
     550          25 :                         length, start, end);
     551             : 
     552          25 :     ChunkSource many_chunks(data, 1, data_end - data, true);
     553             :     one_byte_streaming_stream.reset(i::ScannerStream::For(
     554          25 :         &many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE));
     555             :     TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
     556          25 :                         length, start, end);
     557             :   }
     558             : 
     559             :   // UTF-8 streaming stream, single + many chunks.
     560             :   {
     561             :     const uint8_t* data = one_byte_vector.begin();
     562             :     const uint8_t* data_end = one_byte_vector.end();
     563          25 :     ChunkSource chunks(data, 1, data_end - data, false);
     564             :     std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(
     565             :         i::ScannerStream::For(&chunks,
     566          25 :                               v8::ScriptCompiler::StreamedSource::UTF8));
     567             :     TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
     568          25 :                         start, end);
     569             : 
     570          25 :     ChunkSource many_chunks(data, 1, data_end - data, true);
     571             :     utf8_streaming_stream.reset(i::ScannerStream::For(
     572          25 :         &many_chunks, v8::ScriptCompiler::StreamedSource::UTF8));
     573             :     TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
     574          25 :                         start, end);
     575             :   }
     576             : 
     577             :   // 2-byte streaming stream, single + many chunks.
     578             :   {
     579             :     const uint8_t* data =
     580             :         reinterpret_cast<const uint8_t*>(two_byte_vector.begin());
     581             :     const uint8_t* data_end =
     582             :         reinterpret_cast<const uint8_t*>(two_byte_vector.end());
     583          25 :     ChunkSource chunks(data, 2, data_end - data, false);
     584             :     std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(
     585             :         i::ScannerStream::For(&chunks,
     586          25 :                               v8::ScriptCompiler::StreamedSource::TWO_BYTE));
     587             :     TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
     588          25 :                         length, start, end);
     589             : 
     590          25 :     ChunkSource many_chunks(data, 2, data_end - data, true);
     591             :     two_byte_streaming_stream.reset(i::ScannerStream::For(
     592          25 :         &many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE));
     593             :     TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
     594          25 :                         length, start, end);
     595             :   }
     596             : }
     597             : 
     598       28342 : TEST(CharacterStreams) {
     599           5 :   v8::Isolate* isolate = CcTest::isolate();
     600           5 :   v8::HandleScope handles(isolate);
     601           5 :   v8::Local<v8::Context> context = v8::Context::New(isolate);
     602             :   v8::Context::Scope context_scope(context);
     603             : 
     604           5 :   TestCharacterStreams("abcdefghi", 9);
     605           5 :   TestCharacterStreams("abc\0\n\r\x7f", 7);
     606           5 :   TestCharacterStreams("\0", 1);
     607           5 :   TestCharacterStreams("", 0);
     608             : 
     609             :   // 4k large buffer.
     610             :   char buffer[4096 + 1];
     611       20490 :   for (unsigned i = 0; i < arraysize(buffer); i++) {
     612       20485 :     buffer[i] = static_cast<char>(i & 0x7F);
     613             :   }
     614           5 :   buffer[arraysize(buffer) - 1] = '\0';
     615           5 :   TestCharacterStreams(buffer, arraysize(buffer) - 1);
     616          10 :   TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298);
     617           5 : }
     618             : 
     619             : // Regression test for crbug.com/651333. Read invalid utf-8.
     620       28342 : TEST(Regress651333) {
     621             :   const uint8_t bytes[] =
     622             :       "A\xf1"
     623           5 :       "ad";  // Anad, with n == n-with-tilde.
     624           5 :   const uint16_t unicode[] = {65, 65533, 97, 100};
     625             : 
     626             :   // Run the test for all sub-strings 0..N of bytes, to make sure we hit the
     627             :   // error condition in and at chunk boundaries.
     628          30 :   for (size_t len = 0; len < arraysize(bytes); len++) {
     629             :     // Read len bytes from bytes, and compare against the expected unicode
     630             :     // characters. Expect kBadChar ( == Unicode replacement char == code point
     631             :     // 65533) instead of the incorrectly coded Latin1 char.
     632          25 :     ChunkSource chunks(bytes, 1, len, false);
     633             :     std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
     634          25 :         &chunks, v8::ScriptCompiler::StreamedSource::UTF8));
     635          75 :     for (size_t i = 0; i < len; i++) {
     636         100 :       CHECK_EQ(unicode[i], stream->Advance());
     637             :     }
     638          25 :     CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     639             :   }
     640           5 : }
     641             : 
     642          15 : void TestChunkStreamAgainstReference(
     643             :     const char* cases[],
     644        1170 :     const std::vector<std::vector<uint16_t>>& unicode_expected) {
     645         160 :   for (size_t c = 0; c < unicode_expected.size(); ++c) {
     646          65 :     ChunkSource chunk_source(cases[c]);
     647             :     std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
     648          65 :         &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     649         915 :     for (size_t i = 0; i < unicode_expected[c].size(); i++) {
     650         720 :       CHECK_EQ(unicode_expected[c][i], stream->Advance());
     651             :     }
     652          65 :     CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     653          65 :     stream->Seek(0);
     654         915 :     for (size_t i = 0; i < unicode_expected[c].size(); i++) {
     655         720 :       CHECK_EQ(unicode_expected[c][i], stream->Advance());
     656             :     }
     657          65 :     CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     658             :   }
     659          15 : }
     660             : 
     661       28342 : TEST(Regress6377) {
     662             :   const char* cases[] = {
     663             :       "\xf0\x90\0"  // first chunk - start of 4-byte seq
     664             :       "\x80\x80"    // second chunk - end of 4-byte seq
     665             :       "a\0",        // and an 'a'
     666             : 
     667             :       "\xe0\xbf\0"  // first chunk - start of 3-byte seq
     668             :       "\xbf"        // second chunk - one-byte end of 3-byte seq
     669             :       "a\0",        // and an 'a'
     670             : 
     671             :       "\xc3\0"  // first chunk - start of 2-byte seq
     672             :       "\xbf"    // second chunk - end of 2-byte seq
     673             :       "a\0",    // and an 'a'
     674             : 
     675             :       "\xf0\x90\x80\0"  // first chunk - start of 4-byte seq
     676             :       "\x80"            // second chunk - one-byte end of 4-byte seq
     677             :       "a\xc3\0"         // and an 'a' + start of 2-byte seq
     678             :       "\xbf\0",         // third chunk - end of 2-byte seq
     679           5 :   };
     680             :   const std::vector<std::vector<uint16_t>> unicode_expected = {
     681             :       {0xD800, 0xDC00, 97}, {0xFFF, 97}, {0xFF, 97}, {0xD800, 0xDC00, 97, 0xFF},
     682           5 :   };
     683          10 :   CHECK_EQ(unicode_expected.size(), arraysize(cases));
     684           5 :   TestChunkStreamAgainstReference(cases, unicode_expected);
     685           5 : }
     686             : 
     687       28342 : TEST(Regress6836) {
     688             :   const char* cases[] = {
     689             :       // 0xC2 is a lead byte, but there's no continuation. The bug occurs when
     690             :       // this happens near the chunk end.
     691             :       "X\xc2Y\0",
     692             :       // Last chunk ends with a 2-byte char lead.
     693             :       "X\xc2\0",
     694             :       // Last chunk ends with a 3-byte char lead and only one continuation
     695             :       // character.
     696             :       "X\xe0\xbf\0",
     697           5 :   };
     698             :   const std::vector<std::vector<uint16_t>> unicode_expected = {
     699             :       {0x58, 0xFFFD, 0x59}, {0x58, 0xFFFD}, {0x58, 0xFFFD},
     700           5 :   };
     701          10 :   CHECK_EQ(unicode_expected.size(), arraysize(cases));
     702           5 :   TestChunkStreamAgainstReference(cases, unicode_expected);
     703           5 : }
     704             : 
     705       28342 : TEST(TestOverlongAndInvalidSequences) {
     706             :   const char* cases[] = {
     707             :       // Overlong 2-byte sequence.
     708             :       "X\xc0\xbfY\0",
     709             :       // Another overlong 2-byte sequence.
     710             :       "X\xc1\xbfY\0",
     711             :       // Overlong 3-byte sequence.
     712             :       "X\xe0\x9f\xbfY\0",
     713             :       // Overlong 4-byte sequence.
     714             :       "X\xf0\x89\xbf\xbfY\0",
     715             :       // Invalid 3-byte sequence (reserved for surrogates).
     716             :       "X\xed\xa0\x80Y\0",
     717             :       // Invalid 4-bytes sequence (value out of range).
     718             :       "X\xf4\x90\x80\x80Y\0",
     719           5 :   };
     720             :   const std::vector<std::vector<uint16_t>> unicode_expected = {
     721             :       {0x58, 0xFFFD, 0xFFFD, 0x59},
     722             :       {0x58, 0xFFFD, 0xFFFD, 0x59},
     723             :       {0x58, 0xFFFD, 0xFFFD, 0xFFFD, 0x59},
     724             :       {0x58, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x59},
     725             :       {0x58, 0xFFFD, 0xFFFD, 0xFFFD, 0x59},
     726             :       {0x58, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x59},
     727           5 :   };
     728          10 :   CHECK_EQ(unicode_expected.size(), arraysize(cases));
     729           5 :   TestChunkStreamAgainstReference(cases, unicode_expected);
     730           5 : }
     731             : 
     732       28342 : TEST(RelocatingCharacterStream) {
     733             :   ManualGCScope manual_gc_scope;
     734           5 :   CcTest::InitializeVM();
     735             :   i::Isolate* i_isolate = CcTest::i_isolate();
     736          10 :   v8::HandleScope scope(CcTest::isolate());
     737             : 
     738             :   const char* string = "abcd";
     739             :   int length = static_cast<int>(strlen(string));
     740           5 :   std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]);
     741          25 :   for (int i = 0; i < length; i++) {
     742          40 :     uc16_buffer[i] = string[i];
     743             :   }
     744             :   i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(), length);
     745             :   i::Handle<i::String> two_byte_string =
     746             :       i_isolate->factory()
     747             :           ->NewStringFromTwoByte(two_byte_vector, i::NOT_TENURED)
     748          10 :           .ToHandleChecked();
     749             :   std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
     750           5 :       i::ScannerStream::For(i_isolate, two_byte_string, 0, length));
     751           5 :   CHECK_EQ('a', two_byte_string_stream->Advance());
     752           5 :   CHECK_EQ('b', two_byte_string_stream->Advance());
     753           5 :   CHECK_EQ(size_t{2}, two_byte_string_stream->pos());
     754           5 :   i::String raw = *two_byte_string;
     755             :   i_isolate->heap()->CollectGarbage(i::NEW_SPACE,
     756           5 :                                     i::GarbageCollectionReason::kUnknown);
     757             :   // GC moved the string.
     758          10 :   CHECK_NE(raw, *two_byte_string);
     759           5 :   CHECK_EQ('c', two_byte_string_stream->Advance());
     760           5 :   CHECK_EQ('d', two_byte_string_stream->Advance());
     761           5 : }
     762             : 
     763       28342 : TEST(CloneCharacterStreams) {
     764           5 :   v8::HandleScope handles(CcTest::isolate());
     765           5 :   v8::Local<v8::Context> context = v8::Context::New(CcTest::isolate());
     766             :   v8::Context::Scope context_scope(context);
     767             : 
     768             :   i::Isolate* isolate = CcTest::i_isolate();
     769             :   i::Factory* factory = isolate->factory();
     770             : 
     771             :   const char* one_byte_source = "abcdefghi";
     772             :   unsigned length = static_cast<unsigned>(strlen(one_byte_source));
     773             : 
     774             :   // Check that cloning a character stream does not update
     775             : 
     776             :   // 2-byte external string
     777           5 :   std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]);
     778             :   i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(),
     779             :                                            static_cast<int>(length));
     780             :   {
     781          50 :     for (unsigned i = 0; i < length; i++) {
     782          90 :       uc16_buffer[i] = static_cast<i::uc16>(one_byte_source[i]);
     783             :     }
     784             :     TestExternalResource resource(uc16_buffer.get(), length);
     785             :     i::Handle<i::String> uc16_string(
     786           5 :         NewExternalTwoByteStringFromResource(isolate, &resource));
     787             :     std::unique_ptr<i::Utf16CharacterStream> uc16_stream(
     788           5 :         i::ScannerStream::For(isolate, uc16_string, 0, length));
     789             : 
     790           5 :     CHECK(resource.IsLocked());
     791           5 :     CHECK_EQ(1, resource.LockDepth());
     792           5 :     std::unique_ptr<i::Utf16CharacterStream> cloned = uc16_stream->Clone();
     793           5 :     CHECK_EQ(2, resource.LockDepth());
     794             :     uc16_stream = std::move(cloned);
     795           5 :     CHECK_EQ(1, resource.LockDepth());
     796             : 
     797           5 :     TestCloneCharacterStream(one_byte_source, uc16_stream.get(), length);
     798             : 
     799             :     // This avoids the GC from trying to free a stack allocated resource.
     800          10 :     if (uc16_string->IsExternalString())
     801             :       i::Handle<i::ExternalTwoByteString>::cast(uc16_string)
     802          10 :           ->SetResource(isolate, nullptr);
     803             :   }
     804             : 
     805             :   // 1-byte external string
     806             :   i::Vector<const uint8_t> one_byte_vector =
     807           5 :       i::OneByteVector(one_byte_source, static_cast<int>(length));
     808             :   i::Handle<i::String> one_byte_string =
     809          10 :       factory->NewStringFromOneByte(one_byte_vector).ToHandleChecked();
     810             :   {
     811             :     TestExternalOneByteResource one_byte_resource(one_byte_source, length);
     812             :     i::Handle<i::String> ext_one_byte_string(
     813             :         factory->NewExternalStringFromOneByte(&one_byte_resource)
     814          10 :             .ToHandleChecked());
     815             :     std::unique_ptr<i::Utf16CharacterStream> one_byte_stream(
     816           5 :         i::ScannerStream::For(isolate, ext_one_byte_string, 0, length));
     817           5 :     TestCloneCharacterStream(one_byte_source, one_byte_stream.get(), length);
     818             :     // This avoids the GC from trying to free a stack allocated resource.
     819          10 :     if (ext_one_byte_string->IsExternalString())
     820             :       i::Handle<i::ExternalOneByteString>::cast(ext_one_byte_string)
     821          10 :           ->SetResource(isolate, nullptr);
     822             :   }
     823             : 
     824             :   // Relocatinable streams aren't clonable.
     825             :   {
     826             :     std::unique_ptr<i::Utf16CharacterStream> string_stream(
     827           5 :         i::ScannerStream::For(isolate, one_byte_string, 0, length));
     828           5 :     CHECK(!string_stream->can_be_cloned());
     829             : 
     830             :     i::Handle<i::String> two_byte_string =
     831          10 :         factory->NewStringFromTwoByte(two_byte_vector).ToHandleChecked();
     832             :     std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
     833           5 :         i::ScannerStream::For(isolate, two_byte_string, 0, length));
     834           5 :     CHECK(!two_byte_string_stream->can_be_cloned());
     835             :   }
     836             : 
     837             :   // Chunk sources currently not cloneable.
     838             :   {
     839           5 :     const char* chunks[] = {"1234", "\0"};
     840           5 :     ChunkSource chunk_source(chunks);
     841             :     std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(
     842             :         i::ScannerStream::For(&chunk_source,
     843           5 :                               v8::ScriptCompiler::StreamedSource::ONE_BYTE));
     844           5 :     CHECK(!one_byte_streaming_stream->can_be_cloned());
     845             : 
     846             :     std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(
     847             :         i::ScannerStream::For(&chunk_source,
     848           5 :                               v8::ScriptCompiler::StreamedSource::UTF8));
     849           5 :     CHECK(!utf8_streaming_stream->can_be_cloned());
     850             : 
     851             :     std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(
     852             :         i::ScannerStream::For(&chunk_source,
     853           5 :                               v8::ScriptCompiler::StreamedSource::TWO_BYTE));
     854           5 :     CHECK(!two_byte_streaming_stream->can_be_cloned());
     855           5 :   }
     856       85016 : }

Generated by: LCOV version 1.10