LCOV - code coverage report
Current view: top level - src - unicode-inl.h (source / functions) Hit Total Coverage
Test: app.info Lines: 53 53 100.0 %
Date: 2019-01-20 Functions: 8 8 100.0 %

          Line data    Source code
       1             : // Copyright 2007-2010 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_UNICODE_INL_H_
       6             : #define V8_UNICODE_INL_H_
       7             : 
       8             : #include "src/unicode.h"
       9             : #include "src/base/logging.h"
      10             : #include "src/utils.h"
      11             : 
      12             : namespace unibrow {
      13             : 
      14             : template <class T, int s> bool Predicate<T, s>::get(uchar code_point) {
      15             :   CacheEntry entry = entries_[code_point & kMask];
      16             :   if (entry.code_point() == code_point) return entry.value();
      17             :   return CalculateValue(code_point);
      18             : }
      19             : 
      20             : template <class T, int s> bool Predicate<T, s>::CalculateValue(
      21             :     uchar code_point) {
      22             :   bool result = T::Is(code_point);
      23             :   entries_[code_point & kMask] = CacheEntry(code_point, result);
      24             :   return result;
      25             : }
      26             : 
      27    16725407 : template <class T, int s> int Mapping<T, s>::get(uchar c, uchar n,
      28             :     uchar* result) {
      29    16725407 :   CacheEntry entry = entries_[c & kMask];
      30    16725407 :   if (entry.code_point_ == c) {
      31      960211 :     if (entry.offset_ == 0) {
      32             :       return 0;
      33             :     } else {
      34       59717 :       result[0] = c + entry.offset_;
      35       59717 :       return 1;
      36             :     }
      37             :   } else {
      38    15765196 :     return CalculateValue(c, n, result);
      39             :   }
      40             : }
      41             : 
      42    15765196 : template <class T, int s> int Mapping<T, s>::CalculateValue(uchar c, uchar n,
      43             :     uchar* result) {
      44    15765196 :   bool allow_caching = true;
      45    15765196 :   int length = T::Convert(c, n, result, &allow_caching);
      46    15765196 :   if (allow_caching) {
      47    15547965 :     if (length == 1) {
      48        7351 :       entries_[c & kMask] = CacheEntry(c, result[0] - c);
      49        7351 :       return 1;
      50             :     } else {
      51    15540614 :       entries_[c & kMask] = CacheEntry(c, 0);
      52    15540614 :       return 0;
      53             :     }
      54             :   } else {
      55             :     return length;
      56             :   }
      57             : }
      58             : 
      59             : 
      60             : unsigned Utf8::EncodeOneByte(char* str, uint8_t c) {
      61             :   static const int kMask = ~(1 << 6);
      62        6378 :   if (c <= kMaxOneByteChar) {
      63        6026 :     str[0] = c;
      64             :     return 1;
      65             :   }
      66         352 :   str[0] = 0xC0 | (c >> 6);
      67         352 :   str[1] = 0x80 | (c & kMask);
      68             :   return 2;
      69             : }
      70             : 
      71             : // Encode encodes the UTF-16 code units c and previous into the given str
      72             : // buffer, and combines surrogate code units into single code points. If
      73             : // replace_invalid is set to true, orphan surrogate code units will be replaced
      74             : // with kBadChar.
      75   182446240 : unsigned Utf8::Encode(char* str,
      76             :                       uchar c,
      77             :                       int previous,
      78             :                       bool replace_invalid) {
      79             :   static const int kMask = ~(1 << 6);
      80   182446240 :   if (c <= kMaxOneByteChar) {
      81    32065586 :     str[0] = c;
      82    32065586 :     return 1;
      83   150380654 :   } else if (c <= kMaxTwoByteChar) {
      84    45125592 :     str[0] = 0xC0 | (c >> 6);
      85    45125592 :     str[1] = 0x80 | (c & kMask);
      86    45125592 :     return 2;
      87   105255062 :   } else if (c <= kMaxThreeByteChar) {
      88             :     DCHECK(!Utf16::IsLeadSurrogate(Utf16::kNoPreviousCharacter));
      89   180508858 :     if (Utf16::IsSurrogatePair(previous, c)) {
      90             :       const int kUnmatchedSize = kSizeOfUnmatchedSurrogate;
      91             :       return Encode(str - kUnmatchedSize,
      92    15000327 :                     Utf16::CombineSurrogatePair(previous, c),
      93             :                     Utf16::kNoPreviousCharacter,
      94    30000654 :                     replace_invalid) - kUnmatchedSize;
      95   150508238 :     } else if (replace_invalid &&
      96          12 :                (Utf16::IsLeadSurrogate(c) ||
      97             :                Utf16::IsTrailSurrogate(c))) {
      98             :       c = kBadChar;
      99             :     }
     100    75254102 :     str[0] = 0xE0 | (c >> 12);
     101    75254102 :     str[1] = 0x80 | ((c >> 6) & kMask);
     102    75254102 :     str[2] = 0x80 | (c & kMask);
     103    75254102 :     return 3;
     104             :   } else {
     105    15000633 :     str[0] = 0xF0 | (c >> 18);
     106    15000633 :     str[1] = 0x80 | ((c >> 12) & kMask);
     107    15000633 :     str[2] = 0x80 | ((c >> 6) & kMask);
     108    15000633 :     str[3] = 0x80 | (c & kMask);
     109    15000633 :     return 4;
     110             :   }
     111             : }
     112             : 
     113             : 
     114   245925533 : uchar Utf8::ValueOf(const byte* bytes, size_t length, size_t* cursor) {
     115   245925533 :   if (length <= 0) return kBadChar;
     116   217649584 :   byte first = bytes[0];
     117             :   // Characters between 0000 and 007F are encoded as a single character
     118   217649584 :   if (V8_LIKELY(first <= kMaxOneByteChar)) {
     119   215481511 :     *cursor += 1;
     120   215481511 :     return first;
     121             :   }
     122     2168073 :   return CalculateValue(bytes, length, cursor);
     123             : }
     124             : 
     125             : unsigned Utf8::Length(uchar c, int previous) {
     126    29464514 :   if (c <= kMaxOneByteChar) {
     127             :     return 1;
     128      375145 :   } else if (c <= kMaxTwoByteChar) {
     129             :     return 2;
     130             :   } else if (c <= kMaxThreeByteChar) {
     131             :     DCHECK(!Utf16::IsLeadSurrogate(Utf16::kNoPreviousCharacter));
     132      250050 :     if (Utf16::IsSurrogatePair(previous, c)) {
     133             :       return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates;
     134             :     }
     135             :     return 3;
     136             :   } else {
     137             :     return 4;
     138             :   }
     139             : }
     140             : 
     141             : bool Utf8::IsValidCharacter(uchar c) {
     142             :   return c < 0xD800u || (c >= 0xE000u && c < 0xFDD0u) ||
     143             :          (c > 0xFDEFu && c <= 0x10FFFFu && (c & 0xFFFEu) != 0xFFFEu &&
     144             :           c != kBadChar);
     145             : }
     146             : 
     147             : }  // namespace unibrow
     148             : 
     149             : #endif  // V8_UNICODE_INL_H_

Generated by: LCOV version 1.10