LCOV - code coverage report
Current view: top level - test/cctest/parsing - test-scanner-streams.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 359 361 99.4 %
Date: 2019-04-17 Functions: 38 46 82.6 %

          Line data    Source code
       1             : // Copyright 2016 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/heap/factory-inl.h"
       6             : #include "src/objects-inl.h"
       7             : #include "src/parsing/scanner-character-streams.h"
       8             : #include "src/parsing/scanner.h"
       9             : #include "test/cctest/cctest.h"
      10             : 
      11             : namespace {
      12             : 
      13             : // Implement ExternalSourceStream based on const char**.
      14             : // This will take each string as one chunk. The last chunk must be empty.
      15             : class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
      16             :  public:
      17         750 :   explicit ChunkSource(const char** chunks) : current_(0) {
      18         795 :     do {
      19        2385 :       chunks_.push_back(
      20         795 :           {reinterpret_cast<const uint8_t*>(*chunks), strlen(*chunks)});
      21         795 :       chunks++;
      22         795 :     } while (chunks_.back().len > 0);
      23         250 :   }
      24         210 :   explicit ChunkSource(const char* chunks) : current_(0) {
      25         175 :     do {
      26         525 :       chunks_.push_back(
      27         175 :           {reinterpret_cast<const uint8_t*>(chunks), strlen(chunks)});
      28         175 :       chunks += strlen(chunks) + 1;
      29         175 :     } while (chunks_.back().len > 0);
      30          70 :   }
      31         175 :   ChunkSource(const uint8_t* data, size_t char_size, size_t len,
      32             :               bool extra_chunky)
      33         350 :       : current_(0) {
      34             :     // If extra_chunky, we'll use increasingly large chunk sizes.  If not, we'll
      35             :     // have a single chunk of full length. Make sure that chunks are always
      36             :     // aligned to char-size though.
      37         175 :     size_t chunk_size = extra_chunky ? char_size : len;
      38        3335 :     for (size_t i = 0; i < len; i += chunk_size, chunk_size += char_size) {
      39        4740 :       chunks_.push_back({data + i, i::Min(chunk_size, len - i)});
      40             :     }
      41         350 :     chunks_.push_back({nullptr, 0});
      42         175 :   }
      43         990 :   ~ChunkSource() override = default;
      44           0 :   bool SetBookmark() override { return false; }
      45           0 :   void ResetToBookmark() override {}
      46        2620 :   size_t GetMoreData(const uint8_t** src) override {
      47             :     DCHECK_LT(current_, chunks_.size());
      48        2620 :     Chunk& next = chunks_[current_++];
      49        2620 :     uint8_t* chunk = new uint8_t[next.len];
      50        2620 :     if (next.len > 0) {
      51        2225 :       i::MemMove(chunk, next.ptr, next.len);
      52             :     }
      53        2620 :     *src = chunk;
      54        2620 :     return next.len;
      55             :   }
      56             : 
      57             :  private:
      58             :   struct Chunk {
      59             :     const uint8_t* ptr;
      60             :     size_t len;
      61             :   };
      62             :   std::vector<Chunk> chunks_;
      63             :   size_t current_;
      64             : };
      65             : 
      66             : // Checks that Lock() / Unlock() pairs are balanced. Not thread-safe.
      67             : class LockChecker {
      68             :  public:
      69          70 :   LockChecker() : lock_depth_(0) {}
      70          70 :   ~LockChecker() { CHECK_EQ(0, lock_depth_); }
      71             : 
      72         110 :   void Lock() const { lock_depth_++; }
      73             : 
      74             :   void Unlock() const {
      75         110 :     CHECK_GT(lock_depth_, 0);
      76         110 :     lock_depth_--;
      77             :   }
      78             : 
      79          65 :   bool IsLocked() const { return lock_depth_ > 0; }
      80             : 
      81          10 :   int LockDepth() const { return lock_depth_; }
      82             : 
      83             :  protected:
      84             :   mutable int lock_depth_;
      85             : };
      86             : 
      87          70 : class TestExternalResource : public v8::String::ExternalStringResource,
      88             :                              public LockChecker {
      89             :  public:
      90             :   explicit TestExternalResource(uint16_t* data, int length)
      91          35 :       : LockChecker(), data_(data), length_(static_cast<size_t>(length)) {}
      92             : 
      93          30 :   const uint16_t* data() const override {
      94          30 :     CHECK(IsLocked());
      95          30 :     return data_;
      96             :   }
      97             : 
      98          65 :   size_t length() const override { return length_; }
      99             : 
     100          30 :   bool IsCacheable() const override { return false; }
     101          80 :   void Lock() const override { LockChecker::Lock(); }
     102          80 :   void Unlock() const override { LockChecker::Unlock(); }
     103             : 
     104             :  private:
     105             :   uint16_t* data_;
     106             :   size_t length_;
     107             : };
     108             : 
     109          70 : class TestExternalOneByteResource
     110             :     : public v8::String::ExternalOneByteStringResource,
     111             :       public LockChecker {
     112             :  public:
     113             :   TestExternalOneByteResource(const char* data, size_t length)
     114          35 :       : data_(data), length_(length) {}
     115             : 
     116          30 :   const char* data() const override {
     117          30 :     CHECK(IsLocked());
     118          30 :     return data_;
     119             :   }
     120          65 :   size_t length() const override { return length_; }
     121             : 
     122          30 :   bool IsCacheable() const override { return false; }
     123          70 :   void Lock() const override { LockChecker::Lock(); }
     124          70 :   void Unlock() const override { LockChecker::Unlock(); }
     125             : 
     126             :  private:
     127             :   const char* data_;
     128             :   size_t length_;
     129             : };
     130             : 
     131             : // A test string with all lengths of utf-8 encodings.
     132             : const char unicode_utf8[] =
     133             :     "abc"               // 3x ascii
     134             :     "\xc3\xa4"          // a Umlaut, code point 228
     135             :     "\xe2\xa8\xa0"      // >> (math symbol), code point 10784
     136             :     "\xf0\x9f\x92\xa9"  // best character, code point 128169,
     137             :                         //     as utf-16 surrogates: 55357 56489
     138             :     "def";              // 3x ascii again.
     139             : const uint16_t unicode_ucs2[] = {97,    98,  99,  228, 10784, 55357,
     140             :                                  56489, 100, 101, 102, 0};
     141             : 
     142          35 : i::Handle<i::String> NewExternalTwoByteStringFromResource(
     143             :     i::Isolate* isolate, TestExternalResource* resource) {
     144             :   i::Factory* factory = isolate->factory();
     145             :   // String creation accesses the resource.
     146             :   resource->Lock();
     147             :   i::Handle<i::String> uc16_string(
     148          70 :       factory->NewExternalStringFromTwoByte(resource).ToHandleChecked());
     149             :   resource->Unlock();
     150          35 :   return uc16_string;
     151             : }
     152             : 
     153             : }  // anonymous namespace
     154             : 
     155       26644 : TEST(Utf8StreamAsciiOnly) {
     156           5 :   const char* chunks[] = {"abc", "def", "ghi", ""};
     157           5 :   ChunkSource chunk_source(chunks);
     158             :   std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     159             :       v8::internal::ScannerStream::For(
     160           5 :           &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     161             : 
     162             :   // Read the data without dying.
     163             :   v8::internal::uc32 c;
     164             :   do {
     165             :     c = stream->Advance();
     166          50 :   } while (c != v8::internal::Utf16CharacterStream::kEndOfInput);
     167           5 : }
     168             : 
     169       26644 : TEST(Utf8StreamMaxNonSurrogateCharCode) {
     170           5 :   const char* chunks[] = {"\uffff\uffff", ""};
     171           5 :   ChunkSource chunk_source(chunks);
     172             :   std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     173             :       v8::internal::ScannerStream::For(
     174           5 :           &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     175             : 
     176             :   // Read the correct character.
     177             :   uint16_t max = unibrow::Utf16::kMaxNonSurrogateCharCode;
     178           5 :   CHECK_EQ(max, static_cast<uint32_t>(stream->Advance()));
     179           5 :   CHECK_EQ(max, static_cast<uint32_t>(stream->Advance()));
     180           5 :   CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     181           5 : }
     182             : 
     183       26644 : TEST(Utf8StreamBOM) {
     184             :   // Construct test string w/ UTF-8 BOM (byte order mark)
     185           5 :   char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"};
     186             :   strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8));
     187             : 
     188           5 :   const char* chunks[] = {data, "\0"};
     189           5 :   ChunkSource chunk_source(chunks);
     190             :   std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     191             :       v8::internal::ScannerStream::For(
     192           5 :           &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     193             : 
     194             :   // Read the data without tripping over the BOM.
     195         105 :   for (size_t i = 0; unicode_ucs2[i]; i++) {
     196          50 :     CHECK_EQ(unicode_ucs2[i], stream->Advance());
     197             :   }
     198           5 :   CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance());
     199             : 
     200             :   // Make sure seek works.
     201           5 :   stream->Seek(0);
     202           5 :   CHECK_EQ(unicode_ucs2[0], stream->Advance());
     203             : 
     204           5 :   stream->Seek(5);
     205           5 :   CHECK_EQ(unicode_ucs2[5], stream->Advance());
     206             : 
     207             :   // Try again, but make sure we have to seek 'backwards'.
     208          25 :   while (v8::internal::Utf16CharacterStream::kEndOfInput != stream->Advance()) {
     209             :     // Do nothing. We merely advance the stream to the end of its input.
     210             :   }
     211           5 :   stream->Seek(5);
     212           5 :   CHECK_EQ(unicode_ucs2[5], stream->Advance());
     213           5 : }
     214             : 
     215       26644 : TEST(Utf8SplitBOM) {
     216             :   // Construct chunks with a BOM split into two chunks.
     217           5 :   char partial_bom[] = "\xef\xbb";
     218           5 :   char data[1 + arraysize(unicode_utf8)] = {"\xbf"};
     219             :   strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8));
     220             : 
     221             :   {
     222           5 :     const char* chunks[] = {partial_bom, data, "\0"};
     223           5 :     ChunkSource chunk_source(chunks);
     224             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     225             :         v8::internal::ScannerStream::For(
     226           5 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     227             : 
     228             :     // Read the data without tripping over the BOM.
     229         105 :     for (size_t i = 0; unicode_ucs2[i]; i++) {
     230          50 :       CHECK_EQ(unicode_ucs2[i], stream->Advance());
     231             :     }
     232             :   }
     233             : 
     234             :   // And now with single-byte BOM chunks.
     235           5 :   char bom_byte_1[] = "\xef";
     236           5 :   char bom_byte_2[] = "\xbb";
     237             :   {
     238           5 :     const char* chunks[] = {bom_byte_1, bom_byte_2, data, "\0"};
     239           5 :     ChunkSource chunk_source(chunks);
     240             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     241             :         v8::internal::ScannerStream::For(
     242           5 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     243             : 
     244             :     // Read the data without tripping over the BOM.
     245         105 :     for (size_t i = 0; unicode_ucs2[i]; i++) {
     246          50 :       CHECK_EQ(unicode_ucs2[i], stream->Advance());
     247             :     }
     248             :   }
     249           5 : }
     250             : 
     251       26644 : TEST(Utf8SplitMultiBOM) {
     252             :   // Construct chunks with a split BOM followed by another split BOM.
     253             :   const char* chunks = "\xef\xbb\0\xbf\xef\xbb\0\xbf\0\0";
     254           5 :   ChunkSource chunk_source(chunks);
     255             :   std::unique_ptr<i::Utf16CharacterStream> stream(
     256             :       v8::internal::ScannerStream::For(
     257           5 :           &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     258             : 
     259             :   // Read the data, ensuring we get exactly one of the two BOMs back.
     260           5 :   CHECK_EQ(0xFEFF, stream->Advance());
     261           5 :   CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     262           5 : }
     263             : 
     264       26644 : TEST(Utf8AdvanceUntil) {
     265             :   // Test utf-8 advancing until a certain char.
     266             : 
     267             :   const char line_term = '\n';
     268             :   const size_t kLen = arraysize(unicode_utf8);
     269             :   char data[kLen + 1];
     270             :   strncpy(data, unicode_utf8, kLen);
     271           5 :   data[kLen - 1] = line_term;
     272           5 :   data[kLen] = '\0';
     273             : 
     274             :   {
     275           5 :     const char* chunks[] = {data, "\0"};
     276           5 :     ChunkSource chunk_source(chunks);
     277             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     278             :         v8::internal::ScannerStream::For(
     279           5 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     280             : 
     281             :     int32_t res = stream->AdvanceUntil(
     282          55 :         [](int32_t c0_) { return unibrow::IsLineTerminator(c0_); });
     283           5 :     CHECK_EQ(line_term, res);
     284             :   }
     285           5 : }
     286             : 
     287       26644 : TEST(AdvanceMatchAdvanceUntil) {
     288             :   // Test if single advance and advanceUntil behave the same
     289             : 
     290           5 :   char data[] = {'a', 'b', '\n', 'c', '\0'};
     291             : 
     292             :   {
     293           5 :     const char* chunks[] = {data, "\0"};
     294           5 :     ChunkSource chunk_source_a(chunks);
     295             : 
     296             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream_advance(
     297             :         v8::internal::ScannerStream::For(
     298           5 :             &chunk_source_a, v8::ScriptCompiler::StreamedSource::UTF8));
     299             : 
     300           5 :     ChunkSource chunk_source_au(chunks);
     301             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream_advance_until(
     302             :         v8::internal::ScannerStream::For(
     303           5 :             &chunk_source_au, v8::ScriptCompiler::StreamedSource::UTF8));
     304             : 
     305             :     int32_t au_c0_ = stream_advance_until->AdvanceUntil(
     306          15 :         [](int32_t c0_) { return unibrow::IsLineTerminator(c0_); });
     307             : 
     308             :     int32_t a_c0_ = '0';
     309          55 :     while (!unibrow::IsLineTerminator(a_c0_)) {
     310             :       a_c0_ = stream_advance->Advance();
     311             :     }
     312             : 
     313             :     // Check both advances methods have the same output
     314           5 :     CHECK_EQ(a_c0_, au_c0_);
     315             : 
     316             :     // Check if both set the cursor to the correct position by advancing both
     317             :     // streams by one character.
     318             :     a_c0_ = stream_advance->Advance();
     319             :     au_c0_ = stream_advance_until->Advance();
     320           5 :     CHECK_EQ(a_c0_, au_c0_);
     321             :   }
     322           5 : }
     323             : 
     324       26644 : TEST(Utf8AdvanceUntilOverChunkBoundaries) {
     325             :   // Test utf-8 advancing until a certain char, crossing chunk boundaries.
     326             : 
     327             :   // Split the test string at each byte and pass it to the stream. This way,
     328             :   // we'll have a split at each possible boundary.
     329             :   size_t len = strlen(unicode_utf8);
     330             :   char buffer[arraysize(unicode_utf8) + 4];
     331         145 :   for (size_t i = 1; i < len; i++) {
     332             :     // Copy source string into buffer, splitting it at i.
     333             :     // Then add three chunks, 0..i-1, i..strlen-1, empty.
     334             :     strncpy(buffer, unicode_utf8, i);
     335          70 :     strncpy(buffer + i + 1, unicode_utf8 + i, len - i);
     336          70 :     buffer[i] = '\0';
     337          70 :     buffer[len + 1] = '\n';
     338          70 :     buffer[len + 2] = '\0';
     339          70 :     buffer[len + 3] = '\0';
     340          70 :     const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2};
     341             : 
     342          70 :     ChunkSource chunk_source(chunks);
     343             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     344             :         v8::internal::ScannerStream::For(
     345          70 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     346             : 
     347             :     int32_t res = stream->AdvanceUntil(
     348         770 :         [](int32_t c0_) { return unibrow::IsLineTerminator(c0_); });
     349         140 :     CHECK_EQ(buffer[len + 1], res);
     350             :   }
     351           5 : }
     352             : 
     353       26644 : TEST(Utf8ChunkBoundaries) {
     354             :   // Test utf-8 parsing at chunk boundaries.
     355             : 
     356             :   // Split the test string at each byte and pass it to the stream. This way,
     357             :   // we'll have a split at each possible boundary.
     358             :   size_t len = strlen(unicode_utf8);
     359             :   char buffer[arraysize(unicode_utf8) + 3];
     360         145 :   for (size_t i = 1; i < len; i++) {
     361             :     // Copy source string into buffer, splitting it at i.
     362             :     // Then add three chunks, 0..i-1, i..strlen-1, empty.
     363             :     strncpy(buffer, unicode_utf8, i);
     364          70 :     strncpy(buffer + i + 1, unicode_utf8 + i, len - i);
     365          70 :     buffer[i] = '\0';
     366          70 :     buffer[len + 1] = '\0';
     367          70 :     buffer[len + 2] = '\0';
     368          70 :     const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2};
     369             : 
     370          70 :     ChunkSource chunk_source(chunks);
     371             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     372             :         v8::internal::ScannerStream::For(
     373          70 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     374             : 
     375        1470 :     for (size_t i = 0; unicode_ucs2[i]; i++) {
     376         700 :       CHECK_EQ(unicode_ucs2[i], stream->Advance());
     377             :     }
     378          70 :     CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
     379             :              stream->Advance());
     380             :   }
     381           5 : }
     382             : 
     383       26644 : TEST(Utf8SingleByteChunks) {
     384             :   // Have each byte as a single-byte chunk.
     385             :   size_t len = strlen(unicode_utf8);
     386             :   char buffer[arraysize(unicode_utf8) + 4];
     387         135 :   for (size_t i = 1; i < len - 1; i++) {
     388             :     // Copy source string into buffer, make a single-byte chunk at i.
     389             :     strncpy(buffer, unicode_utf8, i);
     390          65 :     strncpy(buffer + i + 3, unicode_utf8 + i + 1, len - i - 1);
     391          65 :     buffer[i] = '\0';
     392          65 :     buffer[i + 1] = unicode_utf8[i];
     393          65 :     buffer[i + 2] = '\0';
     394          65 :     buffer[len + 2] = '\0';
     395          65 :     buffer[len + 3] = '\0';
     396             :     const char* chunks[] = {buffer, buffer + i + 1, buffer + i + 3,
     397          65 :                             buffer + len + 3};
     398             : 
     399          65 :     ChunkSource chunk_source(chunks);
     400             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     401             :         v8::internal::ScannerStream::For(
     402          65 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     403             : 
     404        1365 :     for (size_t j = 0; unicode_ucs2[j]; j++) {
     405         650 :       CHECK_EQ(unicode_ucs2[j], stream->Advance());
     406             :     }
     407          65 :     CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
     408             :              stream->Advance());
     409             :   }
     410           5 : }
     411             : 
     412             : #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
     413             : 
     414         290 : void TestCharacterStream(const char* reference, i::Utf16CharacterStream* stream,
     415             :                          unsigned length, unsigned start, unsigned end) {
     416             :   // Read streams one char at a time
     417             :   unsigned i;
     418      520750 :   for (i = start; i < end; i++) {
     419      260230 :     CHECK_EQU(i, stream->pos());
     420      260230 :     CHECK_EQU(reference[i], stream->Advance());
     421             :   }
     422         290 :   CHECK_EQU(end, stream->pos());
     423         290 :   CHECK_EQU(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     424         290 :   CHECK_EQU(end + 1, stream->pos());
     425         290 :   stream->Back();
     426             : 
     427             :   // Pushback, re-read, pushback again.
     428      204210 :   while (i > end / 4) {
     429      203920 :     int32_t c0 = reference[i - 1];
     430      203920 :     CHECK_EQU(i, stream->pos());
     431      203920 :     stream->Back();
     432             :     i--;
     433      203920 :     CHECK_EQU(i, stream->pos());
     434             :     int32_t c1 = stream->Advance();
     435             :     i++;
     436      203920 :     CHECK_EQU(i, stream->pos());
     437      203920 :     CHECK_EQ(c0, c1);
     438      203920 :     stream->Back();
     439             :     i--;
     440      203920 :     CHECK_EQU(i, stream->pos());
     441             :   }
     442             : 
     443             :   // Seek + read streams one char at a time.
     444         290 :   unsigned halfway = end / 2;
     445         290 :   stream->Seek(stream->pos() + halfway - i);
     446      272250 :   for (i = halfway; i < end; i++) {
     447      135980 :     CHECK_EQU(i, stream->pos());
     448      135980 :     CHECK_EQU(reference[i], stream->Advance());
     449             :   }
     450         290 :   CHECK_EQU(i, stream->pos());
     451         290 :   CHECK_LT(stream->Advance(), 0);
     452             : 
     453             :   // Seek back, then seek beyond end of stream.
     454         290 :   stream->Seek(start);
     455         290 :   if (start < length) {
     456         480 :     CHECK_EQU(stream->Advance(), reference[start]);
     457             :   } else {
     458          50 :     CHECK_LT(stream->Advance(), 0);
     459             :   }
     460         290 :   stream->Seek(length + 5);
     461         290 :   CHECK_LT(stream->Advance(), 0);
     462         290 : }
     463             : 
     464          10 : void TestCloneCharacterStream(const char* reference,
     465             :                               i::Utf16CharacterStream* stream,
     466             :                               unsigned length) {
     467          10 :   std::unique_ptr<i::Utf16CharacterStream> clone = stream->Clone();
     468             : 
     469             :   unsigned i;
     470          10 :   unsigned halfway = length / 2;
     471             :   // Advance original half way.
     472          90 :   for (i = 0; i < halfway; i++) {
     473          40 :     CHECK_EQU(i, stream->pos());
     474          40 :     CHECK_EQU(reference[i], stream->Advance());
     475             :   }
     476             : 
     477             :   // Test advancing original stream didn't affect the clone.
     478          10 :   TestCharacterStream(reference, clone.get(), length, 0, length);
     479             : 
     480             :   // Test advancing clone didn't affect original stream.
     481          10 :   TestCharacterStream(reference, stream, length, i, length);
     482          10 : }
     483             : 
     484             : #undef CHECK_EQU
     485             : 
     486          30 : void TestCharacterStreams(const char* one_byte_source, unsigned length,
     487             :                           unsigned start = 0, unsigned end = 0) {
     488          30 :   if (end == 0) end = length;
     489             : 
     490             :   i::Isolate* isolate = CcTest::i_isolate();
     491             :   i::Factory* factory = isolate->factory();
     492             : 
     493             :   // 2-byte external string
     494          30 :   std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]);
     495             :   i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(),
     496          30 :                                            static_cast<int>(length));
     497             :   {
     498       82120 :     for (unsigned i = 0; i < length; i++) {
     499       82090 :       uc16_buffer[i] = static_cast<i::uc16>(one_byte_source[i]);
     500             :     }
     501             :     TestExternalResource resource(uc16_buffer.get(), length);
     502             :     i::Handle<i::String> uc16_string(
     503          30 :         NewExternalTwoByteStringFromResource(isolate, &resource));
     504             :     std::unique_ptr<i::Utf16CharacterStream> uc16_stream(
     505          30 :         i::ScannerStream::For(isolate, uc16_string, start, end));
     506          30 :     TestCharacterStream(one_byte_source, uc16_stream.get(), length, start, end);
     507             : 
     508             :     // This avoids the GC from trying to free a stack allocated resource.
     509          30 :     if (uc16_string->IsExternalString())
     510             :       i::Handle<i::ExternalTwoByteString>::cast(uc16_string)
     511          25 :           ->SetResource(isolate, nullptr);
     512             :   }
     513             : 
     514             :   // 1-byte external string
     515             :   i::Vector<const uint8_t> one_byte_vector =
     516             :       i::OneByteVector(one_byte_source, static_cast<int>(length));
     517             :   i::Handle<i::String> one_byte_string =
     518          60 :       factory->NewStringFromOneByte(one_byte_vector).ToHandleChecked();
     519             :   {
     520             :     TestExternalOneByteResource one_byte_resource(one_byte_source, length);
     521             :     i::Handle<i::String> ext_one_byte_string(
     522          60 :         factory->NewExternalStringFromOneByte(&one_byte_resource)
     523             :             .ToHandleChecked());
     524             :     std::unique_ptr<i::Utf16CharacterStream> one_byte_stream(
     525          30 :         i::ScannerStream::For(isolate, ext_one_byte_string, start, end));
     526             :     TestCharacterStream(one_byte_source, one_byte_stream.get(), length, start,
     527          30 :                         end);
     528             :     // This avoids the GC from trying to free a stack allocated resource.
     529          30 :     if (ext_one_byte_string->IsExternalString())
     530             :       i::Handle<i::ExternalOneByteString>::cast(ext_one_byte_string)
     531          25 :           ->SetResource(isolate, nullptr);
     532             :   }
     533             : 
     534             :   // 1-byte generic i::String
     535             :   {
     536             :     std::unique_ptr<i::Utf16CharacterStream> string_stream(
     537          30 :         i::ScannerStream::For(isolate, one_byte_string, start, end));
     538             :     TestCharacterStream(one_byte_source, string_stream.get(), length, start,
     539          30 :                         end);
     540             :   }
     541             : 
     542             :   // 2-byte generic i::String
     543             :   {
     544             :     i::Handle<i::String> two_byte_string =
     545          60 :         factory->NewStringFromTwoByte(two_byte_vector).ToHandleChecked();
     546             :     std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
     547          30 :         i::ScannerStream::For(isolate, two_byte_string, start, end));
     548             :     TestCharacterStream(one_byte_source, two_byte_string_stream.get(), length,
     549          30 :                         start, end);
     550             :   }
     551             : 
     552             :   // Streaming has no notion of start/end, so let's skip streaming tests for
     553             :   // these cases.
     554          30 :   if (start != 0 || end != length) return;
     555             : 
     556             :   // 1-byte streaming stream, single + many chunks.
     557             :   {
     558             :     const uint8_t* data = one_byte_vector.begin();
     559             :     const uint8_t* data_end = one_byte_vector.end();
     560             : 
     561          25 :     ChunkSource single_chunk(data, 1, data_end - data, false);
     562             :     std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(
     563             :         i::ScannerStream::For(&single_chunk,
     564          25 :                               v8::ScriptCompiler::StreamedSource::ONE_BYTE));
     565             :     TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
     566          25 :                         length, start, end);
     567             : 
     568          25 :     ChunkSource many_chunks(data, 1, data_end - data, true);
     569          25 :     one_byte_streaming_stream.reset(i::ScannerStream::For(
     570             :         &many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE));
     571             :     TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
     572          25 :                         length, start, end);
     573             :   }
     574             : 
     575             :   // UTF-8 streaming stream, single + many chunks.
     576             :   {
     577             :     const uint8_t* data = one_byte_vector.begin();
     578             :     const uint8_t* data_end = one_byte_vector.end();
     579          25 :     ChunkSource chunks(data, 1, data_end - data, false);
     580             :     std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(
     581             :         i::ScannerStream::For(&chunks,
     582          25 :                               v8::ScriptCompiler::StreamedSource::UTF8));
     583             :     TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
     584          25 :                         start, end);
     585             : 
     586          25 :     ChunkSource many_chunks(data, 1, data_end - data, true);
     587          25 :     utf8_streaming_stream.reset(i::ScannerStream::For(
     588             :         &many_chunks, v8::ScriptCompiler::StreamedSource::UTF8));
     589             :     TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
     590          25 :                         start, end);
     591             :   }
     592             : 
     593             :   // 2-byte streaming stream, single + many chunks.
     594             :   {
     595             :     const uint8_t* data =
     596             :         reinterpret_cast<const uint8_t*>(two_byte_vector.begin());
     597             :     const uint8_t* data_end =
     598             :         reinterpret_cast<const uint8_t*>(two_byte_vector.end());
     599          25 :     ChunkSource chunks(data, 2, data_end - data, false);
     600             :     std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(
     601             :         i::ScannerStream::For(&chunks,
     602          25 :                               v8::ScriptCompiler::StreamedSource::TWO_BYTE));
     603             :     TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
     604          25 :                         length, start, end);
     605             : 
     606          25 :     ChunkSource many_chunks(data, 2, data_end - data, true);
     607          25 :     two_byte_streaming_stream.reset(i::ScannerStream::For(
     608             :         &many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE));
     609             :     TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
     610          25 :                         length, start, end);
     611             :   }
     612             : }
     613             : 
     614       26644 : TEST(CharacterStreams) {
     615           5 :   v8::Isolate* isolate = CcTest::isolate();
     616          10 :   v8::HandleScope handles(isolate);
     617           5 :   v8::Local<v8::Context> context = v8::Context::New(isolate);
     618             :   v8::Context::Scope context_scope(context);
     619             : 
     620           5 :   TestCharacterStreams("abcdefghi", 9);
     621           5 :   TestCharacterStreams("abc\0\n\r\x7f", 7);
     622           5 :   TestCharacterStreams("\0", 1);
     623           5 :   TestCharacterStreams("", 0);
     624             : 
     625             :   // 4k large buffer.
     626             :   char buffer[4096 + 1];
     627       40975 :   for (unsigned i = 0; i < arraysize(buffer); i++) {
     628       20485 :     buffer[i] = static_cast<char>(i & 0x7F);
     629             :   }
     630           5 :   buffer[arraysize(buffer) - 1] = '\0';
     631           5 :   TestCharacterStreams(buffer, arraysize(buffer) - 1);
     632           5 :   TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298);
     633           5 : }
     634             : 
     635             : // Regression test for crbug.com/651333. Read invalid utf-8.
     636       26644 : TEST(Regress651333) {
     637             :   const uint8_t bytes[] =
     638             :       "A\xf1"
     639           5 :       "ad";  // Anad, with n == n-with-tilde.
     640           5 :   const uint16_t unicode[] = {65, 65533, 97, 100};
     641             : 
     642             :   // Run the test for all sub-strings 0..N of bytes, to make sure we hit the
     643             :   // error condition in and at chunk boundaries.
     644          55 :   for (size_t len = 0; len < arraysize(bytes); len++) {
     645             :     // Read len bytes from bytes, and compare against the expected unicode
     646             :     // characters. Expect kBadChar ( == Unicode replacement char == code point
     647             :     // 65533) instead of the incorrectly coded Latin1 char.
     648          25 :     ChunkSource chunks(bytes, 1, len, false);
     649             :     std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
     650          25 :         &chunks, v8::ScriptCompiler::StreamedSource::UTF8));
     651         125 :     for (size_t i = 0; i < len; i++) {
     652         100 :       CHECK_EQ(unicode[i], stream->Advance());
     653             :     }
     654          25 :     CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     655             :   }
     656           5 : }
     657             : 
     658          15 : void TestChunkStreamAgainstReference(
     659             :     const char* cases[],
     660             :     const std::vector<std::vector<uint16_t>>& unicode_expected) {
     661         145 :   for (size_t c = 0; c < unicode_expected.size(); ++c) {
     662          65 :     ChunkSource chunk_source(cases[c]);
     663             :     std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
     664          65 :         &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
     665         545 :     for (size_t i = 0; i < unicode_expected[c].size(); i++) {
     666         480 :       CHECK_EQ(unicode_expected[c][i], stream->Advance());
     667             :     }
     668          65 :     CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     669          65 :     stream->Seek(0);
     670         545 :     for (size_t i = 0; i < unicode_expected[c].size(); i++) {
     671         480 :       CHECK_EQ(unicode_expected[c][i], stream->Advance());
     672             :     }
     673          65 :     CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     674             :   }
     675          15 : }
     676             : 
     677       26644 : TEST(Regress6377) {
     678             :   const char* cases[] = {
     679             :       "\xf0\x90\0"  // first chunk - start of 4-byte seq
     680             :       "\x80\x80"    // second chunk - end of 4-byte seq
     681             :       "a\0",        // and an 'a'
     682             : 
     683             :       "\xe0\xbf\0"  // first chunk - start of 3-byte seq
     684             :       "\xbf"        // second chunk - one-byte end of 3-byte seq
     685             :       "a\0",        // and an 'a'
     686             : 
     687             :       "\xc3\0"  // first chunk - start of 2-byte seq
     688             :       "\xbf"    // second chunk - end of 2-byte seq
     689             :       "a\0",    // and an 'a'
     690             : 
     691             :       "\xf0\x90\x80\0"  // first chunk - start of 4-byte seq
     692             :       "\x80"            // second chunk - one-byte end of 4-byte seq
     693             :       "a\xc3\0"         // and an 'a' + start of 2-byte seq
     694             :       "\xbf\0",         // third chunk - end of 2-byte seq
     695           5 :   };
     696             :   const std::vector<std::vector<uint16_t>> unicode_expected = {
     697             :       {0xD800, 0xDC00, 97}, {0xFFF, 97}, {0xFF, 97}, {0xD800, 0xDC00, 97, 0xFF},
     698          10 :   };
     699           5 :   CHECK_EQ(unicode_expected.size(), arraysize(cases));
     700           5 :   TestChunkStreamAgainstReference(cases, unicode_expected);
     701           5 : }
     702             : 
     703       26644 : TEST(Regress6836) {
     704             :   const char* cases[] = {
     705             :       // 0xC2 is a lead byte, but there's no continuation. The bug occurs when
     706             :       // this happens near the chunk end.
     707             :       "X\xc2Y\0",
     708             :       // Last chunk ends with a 2-byte char lead.
     709             :       "X\xc2\0",
     710             :       // Last chunk ends with a 3-byte char lead and only one continuation
     711             :       // character.
     712             :       "X\xe0\xbf\0",
     713           5 :   };
     714             :   const std::vector<std::vector<uint16_t>> unicode_expected = {
     715             :       {0x58, 0xFFFD, 0x59}, {0x58, 0xFFFD}, {0x58, 0xFFFD},
     716          10 :   };
     717           5 :   CHECK_EQ(unicode_expected.size(), arraysize(cases));
     718           5 :   TestChunkStreamAgainstReference(cases, unicode_expected);
     719           5 : }
     720             : 
     721       26644 : TEST(TestOverlongAndInvalidSequences) {
     722             :   const char* cases[] = {
     723             :       // Overlong 2-byte sequence.
     724             :       "X\xc0\xbfY\0",
     725             :       // Another overlong 2-byte sequence.
     726             :       "X\xc1\xbfY\0",
     727             :       // Overlong 3-byte sequence.
     728             :       "X\xe0\x9f\xbfY\0",
     729             :       // Overlong 4-byte sequence.
     730             :       "X\xf0\x89\xbf\xbfY\0",
     731             :       // Invalid 3-byte sequence (reserved for surrogates).
     732             :       "X\xed\xa0\x80Y\0",
     733             :       // Invalid 4-bytes sequence (value out of range).
     734             :       "X\xf4\x90\x80\x80Y\0",
     735           5 :   };
     736             :   const std::vector<std::vector<uint16_t>> unicode_expected = {
     737             :       {0x58, 0xFFFD, 0xFFFD, 0x59},
     738             :       {0x58, 0xFFFD, 0xFFFD, 0x59},
     739             :       {0x58, 0xFFFD, 0xFFFD, 0xFFFD, 0x59},
     740             :       {0x58, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x59},
     741             :       {0x58, 0xFFFD, 0xFFFD, 0xFFFD, 0x59},
     742             :       {0x58, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x59},
     743          10 :   };
     744           5 :   CHECK_EQ(unicode_expected.size(), arraysize(cases));
     745           5 :   TestChunkStreamAgainstReference(cases, unicode_expected);
     746           5 : }
     747             : 
     748       26644 : TEST(RelocatingCharacterStream) {
     749             :   ManualGCScope manual_gc_scope;
     750           5 :   CcTest::InitializeVM();
     751             :   i::Isolate* i_isolate = CcTest::i_isolate();
     752          10 :   v8::HandleScope scope(CcTest::isolate());
     753             : 
     754             :   const char* string = "abcd";
     755             :   int length = static_cast<int>(strlen(string));
     756           5 :   std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]);
     757          45 :   for (int i = 0; i < length; i++) {
     758          40 :     uc16_buffer[i] = string[i];
     759             :   }
     760             :   i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(), length);
     761             :   i::Handle<i::String> two_byte_string =
     762             :       i_isolate->factory()
     763          10 :           ->NewStringFromTwoByte(two_byte_vector, i::AllocationType::kYoung)
     764             :           .ToHandleChecked();
     765             :   std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
     766           5 :       i::ScannerStream::For(i_isolate, two_byte_string, 0, length));
     767           5 :   CHECK_EQ('a', two_byte_string_stream->Advance());
     768           5 :   CHECK_EQ('b', two_byte_string_stream->Advance());
     769           5 :   CHECK_EQ(size_t{2}, two_byte_string_stream->pos());
     770             :   i::String raw = *two_byte_string;
     771             :   i_isolate->heap()->CollectGarbage(i::NEW_SPACE,
     772           5 :                                     i::GarbageCollectionReason::kUnknown);
     773             :   // GC moved the string.
     774           5 :   CHECK_NE(raw, *two_byte_string);
     775           5 :   CHECK_EQ('c', two_byte_string_stream->Advance());
     776           5 :   CHECK_EQ('d', two_byte_string_stream->Advance());
     777           5 : }
     778             : 
     779       26644 : TEST(CloneCharacterStreams) {
     780          10 :   v8::HandleScope handles(CcTest::isolate());
     781           5 :   v8::Local<v8::Context> context = v8::Context::New(CcTest::isolate());
     782             :   v8::Context::Scope context_scope(context);
     783             : 
     784             :   i::Isolate* isolate = CcTest::i_isolate();
     785             :   i::Factory* factory = isolate->factory();
     786             : 
     787             :   const char* one_byte_source = "abcdefghi";
     788             :   unsigned length = static_cast<unsigned>(strlen(one_byte_source));
     789             : 
     790             :   // Check that cloning a character stream does not update
     791             : 
     792             :   // 2-byte external string
     793           5 :   std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]);
     794             :   i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(),
     795             :                                            static_cast<int>(length));
     796             :   {
     797          95 :     for (unsigned i = 0; i < length; i++) {
     798          90 :       uc16_buffer[i] = static_cast<i::uc16>(one_byte_source[i]);
     799             :     }
     800             :     TestExternalResource resource(uc16_buffer.get(), length);
     801             :     i::Handle<i::String> uc16_string(
     802           5 :         NewExternalTwoByteStringFromResource(isolate, &resource));
     803             :     std::unique_ptr<i::Utf16CharacterStream> uc16_stream(
     804           5 :         i::ScannerStream::For(isolate, uc16_string, 0, length));
     805             : 
     806           5 :     CHECK(resource.IsLocked());
     807           5 :     CHECK_EQ(1, resource.LockDepth());
     808           5 :     std::unique_ptr<i::Utf16CharacterStream> cloned = uc16_stream->Clone();
     809           5 :     CHECK_EQ(2, resource.LockDepth());
     810             :     uc16_stream = std::move(cloned);
     811           5 :     CHECK_EQ(1, resource.LockDepth());
     812             : 
     813           5 :     TestCloneCharacterStream(one_byte_source, uc16_stream.get(), length);
     814             : 
     815             :     // This avoids the GC from trying to free a stack allocated resource.
     816           5 :     if (uc16_string->IsExternalString())
     817             :       i::Handle<i::ExternalTwoByteString>::cast(uc16_string)
     818           5 :           ->SetResource(isolate, nullptr);
     819             :   }
     820             : 
     821             :   // 1-byte external string
     822             :   i::Vector<const uint8_t> one_byte_vector =
     823           5 :       i::OneByteVector(one_byte_source, static_cast<int>(length));
     824             :   i::Handle<i::String> one_byte_string =
     825          10 :       factory->NewStringFromOneByte(one_byte_vector).ToHandleChecked();
     826             :   {
     827             :     TestExternalOneByteResource one_byte_resource(one_byte_source, length);
     828             :     i::Handle<i::String> ext_one_byte_string(
     829          10 :         factory->NewExternalStringFromOneByte(&one_byte_resource)
     830             :             .ToHandleChecked());
     831             :     std::unique_ptr<i::Utf16CharacterStream> one_byte_stream(
     832           5 :         i::ScannerStream::For(isolate, ext_one_byte_string, 0, length));
     833           5 :     TestCloneCharacterStream(one_byte_source, one_byte_stream.get(), length);
     834             :     // This avoids the GC from trying to free a stack allocated resource.
     835           5 :     if (ext_one_byte_string->IsExternalString())
     836             :       i::Handle<i::ExternalOneByteString>::cast(ext_one_byte_string)
     837           5 :           ->SetResource(isolate, nullptr);
     838             :   }
     839             : 
     840             :   // Relocatinable streams aren't clonable.
     841             :   {
     842             :     std::unique_ptr<i::Utf16CharacterStream> string_stream(
     843           5 :         i::ScannerStream::For(isolate, one_byte_string, 0, length));
     844           5 :     CHECK(!string_stream->can_be_cloned());
     845             : 
     846             :     i::Handle<i::String> two_byte_string =
     847          10 :         factory->NewStringFromTwoByte(two_byte_vector).ToHandleChecked();
     848             :     std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
     849           5 :         i::ScannerStream::For(isolate, two_byte_string, 0, length));
     850           5 :     CHECK(!two_byte_string_stream->can_be_cloned());
     851             :   }
     852             : 
     853             :   // Chunk sources currently not cloneable.
     854             :   {
     855           5 :     const char* chunks[] = {"1234", "\0"};
     856           5 :     ChunkSource chunk_source(chunks);
     857             :     std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(
     858             :         i::ScannerStream::For(&chunk_source,
     859           5 :                               v8::ScriptCompiler::StreamedSource::ONE_BYTE));
     860           5 :     CHECK(!one_byte_streaming_stream->can_be_cloned());
     861             : 
     862             :     std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(
     863             :         i::ScannerStream::For(&chunk_source,
     864           5 :                               v8::ScriptCompiler::StreamedSource::UTF8));
     865           5 :     CHECK(!utf8_streaming_stream->can_be_cloned());
     866             : 
     867             :     std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(
     868             :         i::ScannerStream::For(&chunk_source,
     869           5 :                               v8::ScriptCompiler::StreamedSource::TWO_BYTE));
     870           5 :     CHECK(!two_byte_streaming_stream->can_be_cloned());
     871             :   }
     872       79922 : }

Generated by: LCOV version 1.10