LCOV - code coverage report
Current view: top level - src - unicode-decoder.h (source / functions) Hit Total Coverage
Test: app.info Lines: 13 13 100.0 %
Date: 2019-04-17 Functions: 4 4 100.0 %

          Line data    Source code
       1             : // Copyright 2014 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_UNICODE_DECODER_H_
       6             : #define V8_UNICODE_DECODER_H_
       7             : 
       8             : #include <sys/types.h>
       9             : #include <algorithm>
      10             : #include "src/globals.h"
      11             : #include "src/memcopy.h"
      12             : #include "src/unicode.h"
      13             : #include "src/vector.h"
      14             : 
      15             : namespace unibrow {
      16             : 
      17             : class Utf8Iterator {
      18             :  public:
      19             :   explicit Utf8Iterator(const v8::internal::Vector<const char>& stream)
      20             :       : Utf8Iterator(stream, 0, false) {}
      21             :   Utf8Iterator(const v8::internal::Vector<const char>& stream, size_t offset,
      22             :                bool trailing)
      23             :       : stream_(stream),
      24             :         cursor_(offset),
      25             :         offset_(0),
      26             :         char_(0),
      27    30416398 :         trailing_(false) {
      28             :     DCHECK_LE(offset, stream.length());
      29             :     // Read the first char, setting offset_ to offset in the process.
      30    30416398 :     ++*this;
      31             : 
      32             :     // This must be set after reading the first char, since the offset marks
      33             :     // the start of the octet sequence that the trailing char is part of.
      34    30416372 :     trailing_ = trailing;
      35             :     if (trailing) {
      36             :       DCHECK_GT(char_, Utf16::kMaxNonSurrogateCharCode);
      37             :     }
      38             :   }
      39             : 
      40             :   uint16_t operator*();
      41             :   Utf8Iterator& operator++();
      42             :   Utf8Iterator operator++(int);
      43             :   bool Done();
      44             :   bool Trailing() { return trailing_; }
      45             :   size_t Offset() { return offset_; }
      46             : 
      47             :  private:
      48             :   const v8::internal::Vector<const char>& stream_;
      49             :   size_t cursor_;
      50             :   size_t offset_;
      51             :   uint32_t char_;
      52             :   bool trailing_;
      53             : };
      54             : 
      55             : class V8_EXPORT_PRIVATE Utf8DecoderBase {
      56             :  public:
      57             :   // Initialization done in subclass.
      58             :   inline Utf8DecoderBase();
      59             :   inline Utf8DecoderBase(uint16_t* buffer, size_t buffer_length,
      60             :                          const v8::internal::Vector<const char>& stream);
      61             :   inline size_t Utf16Length() const { return utf16_length_; }
      62             : 
      63             :  protected:
      64             :   // This reads all characters and sets the utf16_length_.
      65             :   // The first buffer_length utf16 chars are cached in the buffer.
      66             :   void Reset(uint16_t* buffer, size_t buffer_length,
      67             :              const v8::internal::Vector<const char>& vector);
      68             :   static void WriteUtf16Slow(uint16_t* data, size_t length,
      69             :                              const v8::internal::Vector<const char>& stream,
      70             :                              size_t offset, bool trailing);
      71             : 
      72             :   size_t bytes_read_;
      73             :   size_t chars_written_;
      74             :   size_t utf16_length_;
      75             :   bool trailing_;
      76             : 
      77             :  private:
      78             :   DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
      79             : };
      80             : 
      81             : template <size_t kBufferSize>
      82             : class Utf8Decoder : public Utf8DecoderBase {
      83             :  public:
      84             :   inline Utf8Decoder() = default;
      85             :   explicit inline Utf8Decoder(const v8::internal::Vector<const char>& stream);
      86             :   inline void Reset(const v8::internal::Vector<const char>& stream);
      87             :   inline size_t WriteUtf16(
      88             :       uint16_t* data, size_t length,
      89             :       const v8::internal::Vector<const char>& stream) const;
      90             : 
      91             :  private:
      92             :   uint16_t buffer_[kBufferSize];
      93             : };
      94             : 
      95             : Utf8DecoderBase::Utf8DecoderBase()
      96       62428 :     : bytes_read_(0), chars_written_(0), utf16_length_(0), trailing_(false) {}
      97             : 
      98             : Utf8DecoderBase::Utf8DecoderBase(
      99             :     uint16_t* buffer, size_t buffer_length,
     100             :     const v8::internal::Vector<const char>& stream) {
     101             :   Reset(buffer, buffer_length, stream);
     102             : }
     103             : 
     104             : template <size_t kBufferSize>
     105             : Utf8Decoder<kBufferSize>::Utf8Decoder(
     106             :     const v8::internal::Vector<const char>& stream)
     107             :     : Utf8DecoderBase(buffer_, kBufferSize, stream) {}
     108             : 
     109             : template <size_t kBufferSize>
     110             : void Utf8Decoder<kBufferSize>::Reset(
     111             :     const v8::internal::Vector<const char>& stream) {
     112         145 :   Utf8DecoderBase::Reset(buffer_, kBufferSize, stream);
     113             : }
     114             : 
     115             : template <size_t kBufferSize>
     116         145 : size_t Utf8Decoder<kBufferSize>::WriteUtf16(
     117             :     uint16_t* data, size_t data_length,
     118             :     const v8::internal::Vector<const char>& stream) const {
     119             :   DCHECK_GT(data_length, 0);
     120         290 :   data_length = std::min(data_length, utf16_length_);
     121             : 
     122             :   // memcpy everything in buffer.
     123         290 :   size_t memcpy_length = std::min(data_length, chars_written_);
     124         145 :   v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
     125             : 
     126         145 :   if (data_length <= chars_written_) return data_length;
     127             : 
     128             :   // Copy the rest the slow way.
     129           9 :   WriteUtf16Slow(data + chars_written_, data_length - chars_written_, stream,
     130             :                  bytes_read_, trailing_);
     131           9 :   return data_length;
     132             : }
     133             : 
     134             : class Latin1 {
     135             :  public:
     136             :   static const unsigned kMaxChar = 0xff;
     137             :   // Convert the character to Latin-1 case equivalent if possible.
     138             :   static inline uint16_t TryConvertToLatin1(uint16_t);
     139             : };
     140             : 
     141             : uint16_t Latin1::TryConvertToLatin1(uint16_t c) {
     142       27272 :   switch (c) {
     143             :     // This are equivalent characters in unicode.
     144             :     case 0x39c:
     145             :     case 0x3bc:
     146             :       return 0xb5;
     147             :     // This is an uppercase of a Latin-1 character
     148             :     // outside of Latin-1.
     149             :     case 0x178:
     150             :       return 0xff;
     151             :   }
     152             :   return c;
     153             : }
     154             : 
     155             : 
     156             : }  // namespace unibrow
     157             : 
     158             : #endif  // V8_UNICODE_DECODER_H_

Generated by: LCOV version 1.10