LCOV - code coverage report
Current view: top level - src - unicode-inl.h (source / functions) Hit Total Coverage
Test: app.info Lines: 64 64 100.0 %
Date: 2017-04-26 Functions: 14 18 77.8 %

          Line data    Source code
       1             : // Copyright 2007-2010 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_UNICODE_INL_H_
       6             : #define V8_UNICODE_INL_H_
       7             : 
       8             : #include "src/unicode.h"
       9             : #include "src/base/logging.h"
      10             : #include "src/utils.h"
      11             : 
      12             : namespace unibrow {
      13             : 
      14  1011499941 : template <class T, int s> bool Predicate<T, s>::get(uchar code_point) {
      15  1011499941 :   CacheEntry entry = entries_[code_point & kMask];
      16  2017611114 :   if (entry.code_point() == code_point) return entry.value();
      17     5388768 :   return CalculateValue(code_point);
      18             : }
      19             : 
      20             : template <class T, int s> bool Predicate<T, s>::CalculateValue(
      21             :     uchar code_point) {
      22     5261344 :   bool result = T::Is(code_point);
      23     5388768 :   entries_[code_point & kMask] = CacheEntry(code_point, result);
      24             :   return result;
      25             : }
      26             : 
      27    56612230 : template <class T, int s> int Mapping<T, s>::get(uchar c, uchar n,
      28             :     uchar* result) {
      29    56612230 :   CacheEntry entry = entries_[c & kMask];
      30    56612230 :   if (entry.code_point_ == c) {
      31     1507204 :     if (entry.offset_ == 0) {
      32             :       return 0;
      33             :     } else {
      34       80166 :       result[0] = c + entry.offset_;
      35       80166 :       return 1;
      36             :     }
      37             :   } else {
      38    55105026 :     return CalculateValue(c, n, result);
      39             :   }
      40             : }
      41             : 
      42    55105026 : template <class T, int s> int Mapping<T, s>::CalculateValue(uchar c, uchar n,
      43             :     uchar* result) {
      44    55105026 :   bool allow_caching = true;
      45    55105026 :   int length = T::Convert(c, n, result, &allow_caching);
      46    55105026 :   if (allow_caching) {
      47    54452044 :     if (length == 1) {
      48       23888 :       entries_[c & kMask] = CacheEntry(c, result[0] - c);
      49       23888 :       return 1;
      50             :     } else {
      51    54428156 :       entries_[c & kMask] = CacheEntry(c, 0);
      52    54428156 :       return 0;
      53             :     }
      54             :   } else {
      55             :     return length;
      56             :   }
      57             : }
      58             : 
      59             : 
      60             : unsigned Utf8::EncodeOneByte(char* str, uint8_t c) {
      61             :   static const int kMask = ~(1 << 6);
      62   112500791 :   if (c <= kMaxOneByteChar) {
      63   112500020 :     str[0] = c;
      64             :     return 1;
      65             :   }
      66         771 :   str[0] = 0xC0 | (c >> 6);
      67         771 :   str[1] = 0x80 | (c & kMask);
      68             :   return 2;
      69             : }
      70             : 
      71             : // Encode encodes the UTF-16 code units c and previous into the given str
      72             : // buffer, and combines surrogate code units into single code points. If
      73             : // replace_invalid is set to true, orphan surrogate code units will be replaced
      74             : // with kBadChar.
      75    43491053 : unsigned Utf8::Encode(char* str,
      76             :                       uchar c,
      77             :                       int previous,
      78             :                       bool replace_invalid) {
      79             :   static const int kMask = ~(1 << 6);
      80    43491053 :   if (c <= kMaxOneByteChar) {
      81    43269009 :     str[0] = c;
      82    43269009 :     return 1;
      83      222044 :   } else if (c <= kMaxTwoByteChar) {
      84       22635 :     str[0] = 0xC0 | (c >> 6);
      85       22635 :     str[1] = 0x80 | (c & kMask);
      86       22635 :     return 2;
      87      199409 :   } else if (c <= kMaxThreeByteChar) {
      88      397164 :     if (Utf16::IsSurrogatePair(previous, c)) {
      89             :       const int kUnmatchedSize = kSizeOfUnmatchedSurrogate;
      90             :       return Encode(str - kUnmatchedSize,
      91         345 :                     Utf16::CombineSurrogatePair(previous, c),
      92             :                     Utf16::kNoPreviousCharacter,
      93         690 :                     replace_invalid) - kUnmatchedSize;
      94      396502 :     } else if (replace_invalid &&
      95          14 :                (Utf16::IsLeadSurrogate(c) ||
      96             :                Utf16::IsTrailSurrogate(c))) {
      97             :       c = kBadChar;
      98             :     }
      99      198237 :     str[0] = 0xE0 | (c >> 12);
     100      198237 :     str[1] = 0x80 | ((c >> 6) & kMask);
     101      198237 :     str[2] = 0x80 | (c & kMask);
     102      198237 :     return 3;
     103             :   } else {
     104         827 :     str[0] = 0xF0 | (c >> 18);
     105         827 :     str[1] = 0x80 | ((c >> 12) & kMask);
     106         827 :     str[2] = 0x80 | ((c >> 6) & kMask);
     107         827 :     str[3] = 0x80 | (c & kMask);
     108         827 :     return 4;
     109             :   }
     110             : }
     111             : 
     112             : 
     113  1082989937 : uchar Utf8::ValueOf(const byte* bytes, size_t length, size_t* cursor) {
     114  1082989937 :   if (length <= 0) return kBadChar;
     115  1082989952 :   byte first = bytes[0];
     116             :   // Characters between 0000 and 0007F are encoded as a single character
     117  1082989952 :   if (first <= kMaxOneByteChar) {
     118  1079033626 :     *cursor += 1;
     119  1079033626 :     return first;
     120             :   }
     121     3956326 :   return CalculateValue(bytes, length, cursor);
     122             : }
     123             : 
     124    38759027 : unsigned Utf8::Length(uchar c, int previous) {
     125    38759027 :   if (c <= kMaxOneByteChar) {
     126             :     return 1;
     127      406725 :   } else if (c <= kMaxTwoByteChar) {
     128             :     return 2;
     129      378710 :   } else if (c <= kMaxThreeByteChar) {
     130      758570 :     if (Utf16::IsTrailSurrogate(c) &&
     131             :         Utf16::IsLeadSurrogate(previous)) {
     132             :       return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates;
     133             :     }
     134      378197 :     return 3;
     135             :   } else {
     136             :     return 4;
     137             :   }
     138             : }
     139             : 
     140             : bool Utf8::IsValidCharacter(uchar c) {
     141        2375 :   return c < 0xD800u || (c >= 0xE000u && c < 0xFDD0u) ||
     142         806 :          (c > 0xFDEFu && c <= 0x10FFFFu && (c & 0xFFFEu) != 0xFFFEu &&
     143             :           c != kBadChar);
     144             : }
     145             : 
     146             : }  // namespace unibrow
     147             : 
     148             : #endif  // V8_UNICODE_INL_H_

Generated by: LCOV version 1.10