LCOV - code coverage report
Current view: top level - src - unicode-inl.h (source / functions) Hit Total Coverage
Test: app.info Lines: 62 62 100.0 %
Date: 2017-10-20 Functions: 13 13 100.0 %

          Line data    Source code
       1             : // Copyright 2007-2010 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_UNICODE_INL_H_
       6             : #define V8_UNICODE_INL_H_
       7             : 
       8             : #include "src/unicode.h"
       9             : #include "src/base/logging.h"
      10             : #include "src/utils.h"
      11             : 
      12             : namespace unibrow {
      13             : 
      14   209002978 : template <class T, int s> bool Predicate<T, s>::get(uchar code_point) {
      15   209002978 :   CacheEntry entry = entries_[code_point & kMask];
      16   414805297 :   if (entry.code_point() == code_point) return entry.value();
      17     3200659 :   return CalculateValue(code_point);
      18             : }
      19             : 
      20             : template <class T, int s> bool Predicate<T, s>::CalculateValue(
      21             :     uchar code_point) {
      22     3200659 :   bool result = T::Is(code_point);
      23     3200659 :   entries_[code_point & kMask] = CacheEntry(code_point, result);
      24             :   return result;
      25             : }
      26             : 
      27    47567770 : template <class T, int s> int Mapping<T, s>::get(uchar c, uchar n,
      28             :     uchar* result) {
      29    47567770 :   CacheEntry entry = entries_[c & kMask];
      30    47567770 :   if (entry.code_point_ == c) {
      31     1100840 :     if (entry.offset_ == 0) {
      32             :       return 0;
      33             :     } else {
      34       78894 :       result[0] = c + entry.offset_;
      35       78894 :       return 1;
      36             :     }
      37             :   } else {
      38    46466930 :     return CalculateValue(c, n, result);
      39             :   }
      40             : }
      41             : 
      42    46466930 : template <class T, int s> int Mapping<T, s>::CalculateValue(uchar c, uchar n,
      43             :     uchar* result) {
      44    46466930 :   bool allow_caching = true;
      45    46466930 :   int length = T::Convert(c, n, result, &allow_caching);
      46    46466930 :   if (allow_caching) {
      47    45873503 :     if (length == 1) {
      48       20652 :       entries_[c & kMask] = CacheEntry(c, result[0] - c);
      49       20652 :       return 1;
      50             :     } else {
      51    45852851 :       entries_[c & kMask] = CacheEntry(c, 0);
      52    45852851 :       return 0;
      53             :     }
      54             :   } else {
      55             :     return length;
      56             :   }
      57             : }
      58             : 
      59             : 
      60             : unsigned Utf8::EncodeOneByte(char* str, uint8_t c) {
      61             :   static const int kMask = ~(1 << 6);
      62    81839401 :   if (c <= kMaxOneByteChar) {
      63    81838857 :     str[0] = c;
      64             :     return 1;
      65             :   }
      66         544 :   str[0] = 0xC0 | (c >> 6);
      67         544 :   str[1] = 0x80 | (c & kMask);
      68             :   return 2;
      69             : }
      70             : 
      71             : // Encode encodes the UTF-16 code units c and previous into the given str
      72             : // buffer, and combines surrogate code units into single code points. If
      73             : // replace_invalid is set to true, orphan surrogate code units will be replaced
      74             : // with kBadChar.
      75    33541667 : unsigned Utf8::Encode(char* str,
      76             :                       uchar c,
      77             :                       int previous,
      78             :                       bool replace_invalid) {
      79             :   static const int kMask = ~(1 << 6);
      80    33541667 :   if (c <= kMaxOneByteChar) {
      81    33356377 :     str[0] = c;
      82    33356377 :     return 1;
      83      185290 :   } else if (c <= kMaxTwoByteChar) {
      84       16034 :     str[0] = 0xC0 | (c >> 6);
      85       16034 :     str[1] = 0x80 | (c & kMask);
      86       16034 :     return 2;
      87      169256 :   } else if (c <= kMaxThreeByteChar) {
      88      337438 :     if (Utf16::IsSurrogatePair(previous, c)) {
      89             :       const int kUnmatchedSize = kSizeOfUnmatchedSurrogate;
      90             :       return Encode(str - kUnmatchedSize,
      91         225 :                     Utf16::CombineSurrogatePair(previous, c),
      92             :                     Utf16::kNoPreviousCharacter,
      93         450 :                     replace_invalid) - kUnmatchedSize;
      94      337012 :     } else if (replace_invalid &&
      95          12 :                (Utf16::IsLeadSurrogate(c) ||
      96             :                Utf16::IsTrailSurrogate(c))) {
      97             :       c = kBadChar;
      98             :     }
      99      168494 :     str[0] = 0xE0 | (c >> 12);
     100      168494 :     str[1] = 0x80 | ((c >> 6) & kMask);
     101      168494 :     str[2] = 0x80 | (c & kMask);
     102      168494 :     return 3;
     103             :   } else {
     104         537 :     str[0] = 0xF0 | (c >> 18);
     105         537 :     str[1] = 0x80 | ((c >> 12) & kMask);
     106         537 :     str[2] = 0x80 | ((c >> 6) & kMask);
     107         537 :     str[3] = 0x80 | (c & kMask);
     108         537 :     return 4;
     109             :   }
     110             : }
     111             : 
     112             : 
     113   666761645 : uchar Utf8::ValueOf(const byte* bytes, size_t length, size_t* cursor) {
     114   666761645 :   if (length <= 0) return kBadChar;
     115   666761646 :   byte first = bytes[0];
     116             :   // Characters between 0000 and 0007F are encoded as a single character
     117   666761646 :   if (first <= kMaxOneByteChar) {
     118   664715199 :     *cursor += 1;
     119   664715199 :     return first;
     120             :   }
     121     2046447 :   return CalculateValue(bytes, length, cursor);
     122             : }
     123             : 
     124    30423120 : unsigned Utf8::Length(uchar c, int previous) {
     125    30423120 :   if (c <= kMaxOneByteChar) {
     126             :     return 1;
     127      344807 :   } else if (c <= kMaxTwoByteChar) {
     128             :     return 2;
     129      324056 :   } else if (c <= kMaxThreeByteChar) {
     130      649027 :     if (Utf16::IsTrailSurrogate(c) &&
     131             :         Utf16::IsLeadSurrogate(previous)) {
     132             :       return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates;
     133             :     }
     134      323687 :     return 3;
     135             :   } else {
     136             :     return 4;
     137             :   }
     138             : }
     139             : 
     140             : bool Utf8::IsValidCharacter(uchar c) {
     141             :   return c < 0xD800u || (c >= 0xE000u && c < 0xFDD0u) ||
     142             :          (c > 0xFDEFu && c <= 0x10FFFFu && (c & 0xFFFEu) != 0xFFFEu &&
     143             :           c != kBadChar);
     144             : }
     145             : 
     146             : }  // namespace unibrow
     147             : 
     148             : #endif  // V8_UNICODE_INL_H_

Generated by: LCOV version 1.10