LCOV - code coverage report
Current view: top level - src - unicode-decoder.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 35 38 92.1 %
Date: 2017-10-20 Functions: 2 2 100.0 %

          Line data    Source code
       1             : // Copyright 2014 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : 
       6             : #include "src/unicode-inl.h"
       7             : #include "src/unicode-decoder.h"
       8             : #include <stdio.h>
       9             : #include <stdlib.h>
      10             : 
      11             : namespace unibrow {
      12             : 
      13       10618 : void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length,
      14             :                             const uint8_t* stream, size_t stream_length) {
      15             :   // Assume everything will fit in the buffer and stream won't be needed.
      16       10618 :   last_byte_of_buffer_unused_ = false;
      17       10618 :   unbuffered_start_ = nullptr;
      18       10618 :   unbuffered_length_ = 0;
      19             :   bool writing_to_buffer = true;
      20             :   // Loop until stream is read, writing to buffer as long as buffer has space.
      21             :   size_t utf16_length = 0;
      22     5052330 :   while (stream_length != 0) {
      23     5031094 :     size_t cursor = 0;
      24     5031094 :     uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
      25             :     DCHECK(cursor > 0 && cursor <= stream_length);
      26     5031094 :     stream += cursor;
      27     5031094 :     stream_length -= cursor;
      28     5031094 :     bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
      29     5031094 :     utf16_length += is_two_characters ? 2 : 1;
      30             :     // Don't need to write to the buffer, but still need utf16_length.
      31    10062188 :     if (!writing_to_buffer) continue;
      32             :     // Write out the characters to the buffer.
      33             :     // Must check for equality with buffer_length as we've already updated it.
      34      271535 :     if (utf16_length <= buffer_length) {
      35      271535 :       if (is_two_characters) {
      36        5213 :         *buffer++ = Utf16::LeadSurrogate(character);
      37       10426 :         *buffer++ = Utf16::TrailSurrogate(character);
      38             :       } else {
      39      266322 :         *buffer++ = character;
      40             :       }
      41      271535 :       if (utf16_length == buffer_length) {
      42             :         // Just wrote last character of buffer
      43             :         writing_to_buffer = false;
      44         349 :         unbuffered_start_ = stream;
      45         349 :         unbuffered_length_ = stream_length;
      46             :       }
      47             :       continue;
      48             :     }
      49             :     // Have gone over buffer.
      50             :     // Last char of buffer is unused, set cursor back.
      51             :     DCHECK(is_two_characters);
      52             :     writing_to_buffer = false;
      53           0 :     last_byte_of_buffer_unused_ = true;
      54           0 :     unbuffered_start_ = stream - cursor;
      55           0 :     unbuffered_length_ = stream_length + cursor;
      56             :   }
      57       10618 :   utf16_length_ = utf16_length;
      58       10618 : }
      59             : 
      60             : 
      61         349 : void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream,
      62             :                                      size_t stream_length, uint16_t* data,
      63             :                                      size_t data_length) {
      64     4760257 :   while (data_length != 0) {
      65     4759559 :     size_t cursor = 0;
      66     4759559 :     uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
      67             :     // There's a total lack of bounds checking for stream
      68             :     // as it was already done in Reset.
      69     4759559 :     stream += cursor;
      70             :     DCHECK(stream_length >= cursor);
      71     4759559 :     stream_length -= cursor;
      72     4759559 :     if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
      73       96942 :       *data++ = Utf16::LeadSurrogate(character);
      74      193884 :       *data++ = Utf16::TrailSurrogate(character);
      75             :       DCHECK_GT(data_length, 1);
      76       96942 :       data_length -= 2;
      77             :     } else {
      78     4662617 :       *data++ = character;
      79     4662617 :       data_length -= 1;
      80             :     }
      81             :   }
      82         349 : }
      83             : 
      84             : }  // namespace unibrow

Generated by: LCOV version 1.10