LCOV - app.info - test/cctest/parsing/test-scanner-streams.cc

LCOV - code coverage report

Current view:	top level - test/cctest/parsing - test-scanner-streams.cc (source / functions)		Hit	Total	Coverage
Test:	app.info	Lines:	225	227	99.1 %
Date:	2017-10-20	Functions:	23	31	74.2 %

          Line data    Source code

       1             : // Copyright 2016 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/factory.h"  // for i::Factory::NewExternalStringFrom*Byte
       6             : #include "src/feedback-vector-inl.h"  // for include "src/factory.h"
       7             : #include "src/objects-inl.h"
       8             : #include "src/parsing/scanner-character-streams.h"
       9             : #include "src/parsing/scanner.h"
      10             : #include "test/cctest/cctest.h"
      11             : 
      12             : namespace {
      13             : 
      14             : // Implement ExternalSourceStream based on const char**.
      15             : // This will take each string as one chunk. The last chunk must be empty.
      16             : class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
      17             :  public:
      18         558 :   explicit ChunkSource(const char** chunks) : current_(0) {
      19         642 :     do {
      20             :       chunks_.push_back(
      21        1284 :           {reinterpret_cast<const uint8_t*>(*chunks), strlen(*chunks)});
      22         642 :       chunks++;
      23         642 :     } while (chunks_.back().len > 0);
      24         186 :   }
      25         234 :   explicit ChunkSource(const char* chunks) : current_(0) {
      26         186 :     do {
      27             :       chunks_.push_back(
      28         372 :           {reinterpret_cast<const uint8_t*>(chunks), strlen(chunks)});
      29         186 :       chunks += strlen(chunks) + 1;
      30         186 :     } while (chunks_.back().len > 0);
      31          78 :   }
      32         210 :   ChunkSource(const uint8_t* data, size_t len, bool extra_chunky)
      33         420 :       : current_(0) {
      34             :     // If extra_chunky, we'll use increasingly large chunk sizes.
      35             :     // If not, we'll have a single chunk of full length.
      36         210 :     size_t chunk_size = extra_chunky ? 1 : len;
      37        2352 :     for (size_t i = 0; i < len; i += chunk_size, chunk_size++) {
      38        6426 :       chunks_.push_back({data + i, i::Min(chunk_size, len - i)});
      39             :     }
      40         420 :     chunks_.push_back({nullptr, 0});
      41         210 :   }
      42         474 :   ~ChunkSource() {}
      43           0 :   bool SetBookmark() override { return false; }
      44           0 :   void ResetToBookmark() override {}
      45        3168 :   size_t GetMoreData(const uint8_t** src) override {
      46             :     DCHECK_LT(current_, chunks_.size());
      47        3168 :     Chunk& next = chunks_[current_++];
      48        3168 :     uint8_t* chunk = new uint8_t[next.len];
      49        3168 :     i::MemMove(chunk, next.ptr, next.len);
      50        3168 :     *src = chunk;
      51        3168 :     return next.len;
      52             :   }
      53             : 
      54             :  private:
      55             :   struct Chunk {
      56             :     const uint8_t* ptr;
      57             :     size_t len;
      58             :   };
      59             :   std::vector<Chunk> chunks_;
      60             :   size_t current_;
      61             : };
      62             : 
      63             : class TestExternalResource : public v8::String::ExternalStringResource {
      64             :  public:
      65             :   explicit TestExternalResource(uint16_t* data, int length)
      66          36 :       : data_(data), length_(static_cast<size_t>(length)) {}
      67             : 
      68          36 :   ~TestExternalResource() {}
      69             : 
      70          78 :   const uint16_t* data() const { return data_; }
      71          36 :   size_t length() const { return length_; }
      72             : 
      73             :  private:
      74             :   uint16_t* data_;
      75             :   size_t length_;
      76             : };
      77             : 
      78          36 : class TestExternalOneByteResource
      79             :     : public v8::String::ExternalOneByteStringResource {
      80             :  public:
      81             :   TestExternalOneByteResource(const char* data, size_t length)
      82          36 :       : data_(data), length_(length) {}
      83             : 
      84          60 :   const char* data() const { return data_; }
      85          36 :   size_t length() const { return length_; }
      86             : 
      87             :  private:
      88             :   const char* data_;
      89             :   size_t length_;
      90             : };
      91             : 
      92             : // A test string with all lengths of utf-8 encodings.
      93             : const char unicode_utf8[] =
      94             :     "abc"               // 3x ascii
      95             :     "\xc3\xa4"          // a Umlaut, code point 228
      96             :     "\xe2\xa8\xa0"      // >> (math symbol), code point 10784
      97             :     "\xf0\x9f\x92\xa9"  // best character, code point 128169,
      98             :                         //     as utf-16 surrogates: 55357 56489
      99             :     "def";              // 3x ascii again.
     100             : const uint16_t unicode_ucs2[] = {97,    98,  99,  228, 10784, 55357,
     101             :                                  56489, 100, 101, 102, 0};
     102             : 
     103             : }  // anonymous namespace
     104             : 
     105       23724 : TEST(Utf8StreamAsciiOnly) {
     106           6 :   const char* chunks[] = {"abc", "def", "ghi", ""};
     107           6 :   ChunkSource chunk_source(chunks);
     108             :   std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     109             :       v8::internal::ScannerStream::For(
     110           6 :           &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
     111             : 
     112             :   // Read the data without dying.
     113             :   v8::internal::uc32 c;
     114          60 :   do {
     115          60 :     c = stream->Advance();
     116             :   } while (c != v8::internal::Utf16CharacterStream::kEndOfInput);
     117           6 : }
     118             : 
     119       23724 : TEST(Utf8StreamBOM) {
     120             :   // Construct test string w/ UTF-8 BOM (byte order mark)
     121           6 :   char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"};
     122             :   strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8));
     123             : 
     124           6 :   const char* chunks[] = {data, "\0"};
     125           6 :   ChunkSource chunk_source(chunks);
     126             :   std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     127             :       v8::internal::ScannerStream::For(
     128           6 :           &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
     129             : 
     130             :   // Read the data without tripping over the BOM.
     131          66 :   for (size_t i = 0; unicode_ucs2[i]; i++) {
     132         120 :     CHECK_EQ(unicode_ucs2[i], stream->Advance());
     133             :   }
     134           6 :   CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance());
     135             : 
     136             :   // Make sure seek works.
     137           6 :   stream->Seek(0);
     138          12 :   CHECK_EQ(unicode_ucs2[0], stream->Advance());
     139             : 
     140           6 :   stream->Seek(5);
     141          12 :   CHECK_EQ(unicode_ucs2[5], stream->Advance());
     142             : 
     143             :   // Try again, but make sure we have to seek 'backwards'.
     144          30 :   while (v8::internal::Utf16CharacterStream::kEndOfInput != stream->Advance()) {
     145             :     // Do nothing. We merely advance the stream to the end of its input.
     146             :   }
     147           6 :   stream->Seek(5);
     148          12 :   CHECK_EQ(unicode_ucs2[5], stream->Advance());
     149           6 : }
     150             : 
     151       23724 : TEST(Utf8SplitBOM) {
     152             :   // Construct chunks with a BOM split into two chunks.
     153           6 :   char partial_bom[] = "\xef\xbb";
     154           6 :   char data[1 + arraysize(unicode_utf8)] = {"\xbf"};
     155             :   strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8));
     156             : 
     157             :   {
     158           6 :     const char* chunks[] = {partial_bom, data, "\0"};
     159           6 :     ChunkSource chunk_source(chunks);
     160             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     161             :         v8::internal::ScannerStream::For(
     162           6 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
     163             : 
     164             :     // Read the data without tripping over the BOM.
     165          66 :     for (size_t i = 0; unicode_ucs2[i]; i++) {
     166         120 :       CHECK_EQ(unicode_ucs2[i], stream->Advance());
     167             :     }
     168             :   }
     169             : 
     170             :   // And now with single-byte BOM chunks.
     171           6 :   char bom_byte_1[] = "\xef";
     172           6 :   char bom_byte_2[] = "\xbb";
     173             :   {
     174           6 :     const char* chunks[] = {bom_byte_1, bom_byte_2, data, "\0"};
     175           6 :     ChunkSource chunk_source(chunks);
     176             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     177             :         v8::internal::ScannerStream::For(
     178           6 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
     179             : 
     180             :     // Read the data without tripping over the BOM.
     181          66 :     for (size_t i = 0; unicode_ucs2[i]; i++) {
     182         120 :       CHECK_EQ(unicode_ucs2[i], stream->Advance());
     183             :     }
     184             :   }
     185           6 : }
     186             : 
     187       23724 : TEST(Utf8ChunkBoundaries) {
     188             :   // Test utf-8 parsing at chunk boundaries.
     189             : 
     190             :   // Split the test string at each byte and pass it to the stream. This way,
     191             :   // we'll have a split at each possible boundary.
     192             :   size_t len = strlen(unicode_utf8);
     193             :   char buffer[arraysize(unicode_utf8) + 3];
     194          96 :   for (size_t i = 1; i < len; i++) {
     195             :     // Copy source string into buffer, splitting it at i.
     196             :     // Then add three chunks, 0..i-1, i..strlen-1, empty.
     197             :     strncpy(buffer, unicode_utf8, i);
     198          84 :     strncpy(buffer + i + 1, unicode_utf8 + i, len - i);
     199          84 :     buffer[i] = '\0';
     200          84 :     buffer[len + 1] = '\0';
     201          84 :     buffer[len + 2] = '\0';
     202          84 :     const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2};
     203             : 
     204          84 :     ChunkSource chunk_source(chunks);
     205             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     206             :         v8::internal::ScannerStream::For(
     207          84 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
     208             : 
     209         924 :     for (size_t i = 0; unicode_ucs2[i]; i++) {
     210        1680 :       CHECK_EQ(unicode_ucs2[i], stream->Advance());
     211             :     }
     212          84 :     CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
     213             :              stream->Advance());
     214             :   }
     215           6 : }
     216             : 
     217       23724 : TEST(Utf8SingleByteChunks) {
     218             :   // Have each byte as a single-byte chunk.
     219             :   size_t len = strlen(unicode_utf8);
     220             :   char buffer[arraysize(unicode_utf8) + 4];
     221          90 :   for (size_t i = 1; i < len - 1; i++) {
     222             :     // Copy source string into buffer, make a single-byte chunk at i.
     223             :     strncpy(buffer, unicode_utf8, i);
     224          78 :     strncpy(buffer + i + 3, unicode_utf8 + i + 1, len - i - 1);
     225          78 :     buffer[i] = '\0';
     226          78 :     buffer[i + 1] = unicode_utf8[i];
     227          78 :     buffer[i + 2] = '\0';
     228          78 :     buffer[len + 2] = '\0';
     229          78 :     buffer[len + 3] = '\0';
     230             :     const char* chunks[] = {buffer, buffer + i + 1, buffer + i + 3,
     231          78 :                             buffer + len + 3};
     232             : 
     233          78 :     ChunkSource chunk_source(chunks);
     234             :     std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
     235             :         v8::internal::ScannerStream::For(
     236          78 :             &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
     237             : 
     238         858 :     for (size_t j = 0; unicode_ucs2[j]; j++) {
     239        1560 :       CHECK_EQ(unicode_ucs2[j], stream->Advance());
     240             :     }
     241          78 :     CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
     242             :              stream->Advance());
     243             :   }
     244           6 : }
     245             : 
     246             : #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
     247             : 
     248         324 : void TestCharacterStream(const char* reference, i::Utf16CharacterStream* stream,
     249             :                          unsigned length, unsigned start, unsigned end) {
     250             :   // Read streams one char at a time
     251             :   unsigned i;
     252      312432 :   for (i = start; i < end; i++) {
     253      312108 :     CHECK_EQU(i, stream->pos());
     254      312108 :     CHECK_EQU(reference[i], stream->Advance());
     255             :   }
     256         324 :   CHECK_EQU(end, stream->pos());
     257         324 :   CHECK_EQU(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     258         324 :   CHECK_EQU(end + 1, stream->pos());
     259         324 :   stream->Back();
     260             : 
     261             :   // Pushback, re-read, pushback again.
     262         324 :   while (i > end / 4) {
     263      244536 :     int32_t c0 = reference[i - 1];
     264      244536 :     CHECK_EQU(i, stream->pos());
     265      244536 :     stream->Back();
     266             :     i--;
     267      244536 :     CHECK_EQU(i, stream->pos());
     268      244536 :     int32_t c1 = stream->Advance();
     269             :     i++;
     270      244536 :     CHECK_EQU(i, stream->pos());
     271      244536 :     CHECK_EQ(c0, c1);
     272      244536 :     stream->Back();
     273             :     i--;
     274      244536 :     CHECK_EQU(i, stream->pos());
     275             :   }
     276             : 
     277             :   // Seek + read streams one char at a time.
     278         324 :   unsigned halfway = end / 2;
     279         324 :   stream->Seek(stream->pos() + halfway - i);
     280      163380 :   for (i = halfway; i < end; i++) {
     281      163056 :     CHECK_EQU(i, stream->pos());
     282      163056 :     CHECK_EQU(reference[i], stream->Advance());
     283             :   }
     284         324 :   CHECK_EQU(i, stream->pos());
     285         324 :   CHECK_LT(stream->Advance(), 0);
     286             : 
     287             :   // Seek back, then seek beyond end of stream.
     288         324 :   stream->Seek(start);
     289         324 :   if (start < length) {
     290         264 :     CHECK_EQU(stream->Advance(), reference[start]);
     291             :   } else {
     292          60 :     CHECK_LT(stream->Advance(), 0);
     293             :   }
     294         324 :   stream->Seek(length + 5);
     295         324 :   CHECK_LT(stream->Advance(), 0);
     296         324 : }
     297             : 
     298             : #undef CHECK_EQU
     299             : 
     300          36 : void TestCharacterStreams(const char* one_byte_source, unsigned length,
     301             :                           unsigned start = 0, unsigned end = 0) {
     302          36 :   if (end == 0) end = length;
     303             : 
     304             :   i::Isolate* isolate = CcTest::i_isolate();
     305             :   i::Factory* factory = isolate->factory();
     306             : 
     307             :   // 2-byte external string
     308          36 :   std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]);
     309             :   i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(),
     310          36 :                                            static_cast<int>(length));
     311             :   {
     312       49290 :     for (unsigned i = 0; i < length; i++) {
     313       98508 :       uc16_buffer[i] = static_cast<i::uc16>(one_byte_source[i]);
     314             :     }
     315             :     TestExternalResource resource(uc16_buffer.get(), length);
     316             :     i::Handle<i::String> uc16_string(
     317          72 :         factory->NewExternalStringFromTwoByte(&resource).ToHandleChecked());
     318             :     std::unique_ptr<i::Utf16CharacterStream> uc16_stream(
     319          36 :         i::ScannerStream::For(uc16_string, start, end));
     320          36 :     TestCharacterStream(one_byte_source, uc16_stream.get(), length, start, end);
     321             :   }
     322             : 
     323             :   // 1-byte external string
     324             :   i::Vector<const uint8_t> one_byte_vector =
     325             :       i::OneByteVector(one_byte_source, static_cast<int>(length));
     326             :   i::Handle<i::String> one_byte_string =
     327          72 :       factory->NewStringFromOneByte(one_byte_vector).ToHandleChecked();
     328             :   {
     329             :     TestExternalOneByteResource one_byte_resource(one_byte_source, length);
     330             :     i::Handle<i::String> ext_one_byte_string(
     331             :         factory->NewExternalStringFromOneByte(&one_byte_resource)
     332          72 :             .ToHandleChecked());
     333             :     std::unique_ptr<i::Utf16CharacterStream> one_byte_stream(
     334          36 :         i::ScannerStream::For(ext_one_byte_string, start, end));
     335             :     TestCharacterStream(one_byte_source, one_byte_stream.get(), length, start,
     336          36 :                         end);
     337             :   }
     338             : 
     339             :   // 1-byte generic i::String
     340             :   {
     341             :     std::unique_ptr<i::Utf16CharacterStream> string_stream(
     342          36 :         i::ScannerStream::For(one_byte_string, start, end));
     343             :     TestCharacterStream(one_byte_source, string_stream.get(), length, start,
     344          36 :                         end);
     345             :   }
     346             : 
     347             :   // 2-byte generic i::String
     348             :   {
     349             :     i::Handle<i::String> two_byte_string =
     350          72 :         factory->NewStringFromTwoByte(two_byte_vector).ToHandleChecked();
     351             :     std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
     352          36 :         i::ScannerStream::For(two_byte_string, start, end));
     353             :     TestCharacterStream(one_byte_source, two_byte_string_stream.get(), length,
     354          36 :                         start, end);
     355             :   }
     356             : 
     357             :   // Streaming has no notion of start/end, so let's skip streaming tests for
     358             :   // these cases.
     359          72 :   if (start != 0 || end != length) return;
     360             : 
     361             :   // 1-byte streaming stream, single + many chunks.
     362             :   {
     363             :     const uint8_t* data = one_byte_vector.begin();
     364             :     const uint8_t* data_end = one_byte_vector.end();
     365             : 
     366          30 :     ChunkSource single_chunk(data, data_end - data, false);
     367             :     std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(
     368             :         i::ScannerStream::For(&single_chunk,
     369             :                               v8::ScriptCompiler::StreamedSource::ONE_BYTE,
     370          30 :                               nullptr));
     371             :     TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
     372          30 :                         length, start, end);
     373             : 
     374          30 :     ChunkSource many_chunks(data, data_end - data, true);
     375             :     one_byte_streaming_stream.reset(i::ScannerStream::For(
     376          30 :         &many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE, nullptr));
     377             :     TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
     378          30 :                         length, start, end);
     379             :   }
     380             : 
     381             :   // UTF-8 streaming stream, single + many chunks.
     382             :   {
     383             :     const uint8_t* data = one_byte_vector.begin();
     384             :     const uint8_t* data_end = one_byte_vector.end();
     385          30 :     ChunkSource chunks(data, data_end - data, false);
     386             :     std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(
     387             :         i::ScannerStream::For(&chunks, v8::ScriptCompiler::StreamedSource::UTF8,
     388          30 :                               nullptr));
     389             :     TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
     390          30 :                         start, end);
     391             : 
     392          30 :     ChunkSource many_chunks(data, data_end - data, true);
     393             :     utf8_streaming_stream.reset(i::ScannerStream::For(
     394          30 :         &many_chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
     395             :     TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
     396          30 :                         start, end);
     397             :   }
     398             : 
     399             :   // 2-byte streaming stream, single + many chunks.
     400             :   {
     401             :     const uint8_t* data =
     402             :         reinterpret_cast<const uint8_t*>(two_byte_vector.begin());
     403             :     const uint8_t* data_end =
     404             :         reinterpret_cast<const uint8_t*>(two_byte_vector.end());
     405          30 :     ChunkSource chunks(data, data_end - data, false);
     406             :     std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(
     407             :         i::ScannerStream::For(
     408          30 :             &chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
     409             :     TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
     410          30 :                         length, start, end);
     411             : 
     412          30 :     ChunkSource many_chunks(data, data_end - data, true);
     413             :     two_byte_streaming_stream.reset(i::ScannerStream::For(
     414          30 :         &many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
     415             :     TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
     416          30 :                         length, start, end);
     417             :   }
     418             : }
     419             : 
     420       23724 : TEST(CharacterStreams) {
     421           6 :   v8::Isolate* isolate = CcTest::isolate();
     422           6 :   v8::HandleScope handles(isolate);
     423           6 :   v8::Local<v8::Context> context = v8::Context::New(isolate);
     424             :   v8::Context::Scope context_scope(context);
     425             : 
     426           6 :   TestCharacterStreams("abcdefghi", 9);
     427           6 :   TestCharacterStreams("abc\0\n\r\x7f", 7);
     428           6 :   TestCharacterStreams("\0", 1);
     429           6 :   TestCharacterStreams("", 0);
     430             : 
     431             :   // 4k large buffer.
     432             :   char buffer[4096 + 1];
     433       24588 :   for (unsigned i = 0; i < arraysize(buffer); i++) {
     434       24582 :     buffer[i] = static_cast<char>(i & 0x7F);
     435             :   }
     436           6 :   buffer[arraysize(buffer) - 1] = '\0';
     437           6 :   TestCharacterStreams(buffer, arraysize(buffer) - 1);
     438          12 :   TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298);
     439           6 : }
     440             : 
     441             : // Regression test for crbug.com/651333. Read invalid utf-8.
     442       23724 : TEST(Regress651333) {
     443             :   const uint8_t bytes[] =
     444             :       "A\xf1"
     445           6 :       "ad";  // Anad, with n == n-with-tilde.
     446           6 :   const uint16_t unicode[] = {65, 65533, 97, 100};
     447             : 
     448             :   // Run the test for all sub-strings 0..N of bytes, to make sure we hit the
     449             :   // error condition in and at chunk boundaries.
     450          36 :   for (size_t len = 0; len < arraysize(bytes); len++) {
     451             :     // Read len bytes from bytes, and compare against the expected unicode
     452             :     // characters. Expect kBadChar ( == Unicode replacement char == code point
     453             :     // 65533) instead of the incorrectly coded Latin1 char.
     454          30 :     ChunkSource chunks(bytes, len, false);
     455             :     std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
     456          30 :         &chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
     457          90 :     for (size_t i = 0; i < len; i++) {
     458         120 :       CHECK_EQ(unicode[i], stream->Advance());
     459             :     }
     460          30 :     CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     461             :   }
     462           6 : }
     463             : 
     464          18 : void TestChunkStreamAgainstReference(
     465             :     const char* cases[],
     466        1404 :     const std::vector<std::vector<uint16_t>>& unicode_expected) {
     467         192 :   for (size_t c = 0; c < unicode_expected.size(); ++c) {
     468          78 :     ChunkSource chunk_source(cases[c]);
     469             :     std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
     470          78 :         &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
     471        1098 :     for (size_t i = 0; i < unicode_expected[c].size(); i++) {
     472        1152 :       CHECK_EQ(unicode_expected[c][i], stream->Advance());
     473             :     }
     474          78 :     CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     475          78 :     stream->Seek(0);
     476        1098 :     for (size_t i = 0; i < unicode_expected[c].size(); i++) {
     477        1152 :       CHECK_EQ(unicode_expected[c][i], stream->Advance());
     478             :     }
     479          78 :     CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
     480             :   }
     481          18 : }
     482             : 
     483       23724 : TEST(Regress6377) {
     484             :   const char* cases[] = {
     485             :       "\xf0\x90\0"  // first chunk - start of 4-byte seq
     486             :       "\x80\x80"    // second chunk - end of 4-byte seq
     487             :       "a\0",        // and an 'a'
     488             : 
     489             :       "\xe0\xbf\0"  // first chunk - start of 3-byte seq
     490             :       "\xbf"        // second chunk - one-byte end of 3-byte seq
     491             :       "a\0",        // and an 'a'
     492             : 
     493             :       "\xc3\0"  // first chunk - start of 2-byte seq
     494             :       "\xbf"    // second chunk - end of 2-byte seq
     495             :       "a\0",    // and an 'a'
     496             : 
     497             :       "\xf0\x90\x80\0"  // first chunk - start of 4-byte seq
     498             :       "\x80"            // second chunk - one-byte end of 4-byte seq
     499             :       "a\xc3\0"         // and an 'a' + start of 2-byte seq
     500             :       "\xbf\0",         // third chunk - end of 2-byte seq
     501           6 :   };
     502             :   const std::vector<std::vector<uint16_t>> unicode_expected = {
     503             :       {0xd800, 0xdc00, 97}, {0xfff, 97}, {0xff, 97}, {0xd800, 0xdc00, 97, 0xff},
     504           6 :   };
     505          12 :   CHECK_EQ(unicode_expected.size(), arraysize(cases));
     506           6 :   TestChunkStreamAgainstReference(cases, unicode_expected);
     507           6 : }
     508             : 
     509       23724 : TEST(Regress6836) {
     510             :   const char* cases[] = {
     511             :       // 0xc2 is a lead byte, but there's no continuation. The bug occurs when
     512             :       // this happens near the chunk end.
     513             :       "X\xc2Y\0",
     514             :       // Last chunk ends with a 2-byte char lead.
     515             :       "X\xc2\0",
     516             :       // Last chunk ends with a 3-byte char lead and only one continuation
     517             :       // character.
     518             :       "X\xe0\xbf\0",
     519           6 :   };
     520             :   const std::vector<std::vector<uint16_t>> unicode_expected = {
     521             :       {0x58, 0xfffd, 0x59}, {0x58, 0xfffd}, {0x58, 0xfffd},
     522           6 :   };
     523          12 :   CHECK_EQ(unicode_expected.size(), arraysize(cases));
     524           6 :   TestChunkStreamAgainstReference(cases, unicode_expected);
     525           6 : }
     526             : 
     527       23724 : TEST(TestOverlongAndInvalidSequences) {
     528             :   const char* cases[] = {
     529             :       // Overlong 2-byte sequence.
     530             :       "X\xc0\xbfY\0",
     531             :       // Another overlong 2-byte sequence.
     532             :       "X\xc1\xbfY\0",
     533             :       // Overlong 3-byte sequence.
     534             :       "X\xe0\x9f\xbfY\0",
     535             :       // Overlong 4-byte sequence.
     536             :       "X\xf0\x89\xbf\xbfY\0",
     537             :       // Invalid 3-byte sequence (reserved for surrogates).
     538             :       "X\xed\xa0\x80Y\0",
     539             :       // Invalid 4-bytes sequence (value out of range).
     540             :       "X\xf4\x90\x80\x80Y\0",
     541           6 :   };
     542             :   const std::vector<std::vector<uint16_t>> unicode_expected = {
     543             :       {0x58, 0xfffd, 0xfffd, 0x59},
     544             :       {0x58, 0xfffd, 0xfffd, 0x59},
     545             :       {0x58, 0xfffd, 0xfffd, 0xfffd, 0x59},
     546             :       {0x58, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x59},
     547             :       {0x58, 0xfffd, 0xfffd, 0xfffd, 0x59},
     548             :       {0x58, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x59},
     549           6 :   };
     550          12 :   CHECK_EQ(unicode_expected.size(), arraysize(cases));
     551           6 :   TestChunkStreamAgainstReference(cases, unicode_expected);
     552       71160 : }

Generated by: LCOV version 1.10