LCOV - code coverage report
Current view: top level - src - string-case.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 30 30 100.0 %
Date: 2019-03-21 Functions: 3 3 100.0 %

          Line data    Source code
       1             : // Copyright 2016 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/string-case.h"
       6             : 
       7             : #include "src/assert-scope.h"
       8             : #include "src/base/logging.h"
       9             : #include "src/globals.h"
      10             : #include "src/utils.h"
      11             : 
      12             : namespace v8 {
      13             : namespace internal {
      14             : 
      15             : // FastAsciiConvert tries to do character processing on a word_t basis if
      16             : // source and destination strings are properly aligned. Natural alignment of
      17             : // string data depends on kTaggedSize so we define word_t via Tagged_t.
      18             : using word_t = std::make_unsigned<Tagged_t>::type;
      19             : 
      20             : const word_t kWordTAllBitsSet = std::numeric_limits<word_t>::max();
      21             : const word_t kOneInEveryByte = kWordTAllBitsSet / 0xFF;
      22             : const word_t kAsciiMask = kOneInEveryByte << 7;
      23             : 
      24             : #ifdef DEBUG
      25             : bool CheckFastAsciiConvert(char* dst, const char* src, int length, bool changed,
      26             :                            bool is_to_lower) {
      27             :   bool expected_changed = false;
      28             :   for (int i = 0; i < length; i++) {
      29             :     if (dst[i] == src[i]) continue;
      30             :     expected_changed = true;
      31             :     if (is_to_lower) {
      32             :       DCHECK('A' <= src[i] && src[i] <= 'Z');
      33             :       DCHECK(dst[i] == src[i] + ('a' - 'A'));
      34             :     } else {
      35             :       DCHECK('a' <= src[i] && src[i] <= 'z');
      36             :       DCHECK(dst[i] == src[i] - ('a' - 'A'));
      37             :     }
      38             :   }
      39             :   return (expected_changed == changed);
      40             : }
      41             : #endif
      42             : 
      43             : // Given a word and two range boundaries returns a word with high bit
      44             : // set in every byte iff the corresponding input byte was strictly in
      45             : // the range (m, n). All the other bits in the result are cleared.
      46             : // This function is only useful when it can be inlined and the
      47             : // boundaries are statically known.
      48             : // Requires: all bytes in the input word and the boundaries must be
      49             : // ASCII (less than 0x7F).
      50             : static inline word_t AsciiRangeMask(word_t w, char m, char n) {
      51             :   // Use strict inequalities since in edge cases the function could be
      52             :   // further simplified.
      53             :   DCHECK(0 < m && m < n);
      54             :   // Has high bit set in every w byte less than n.
      55      184737 :   word_t tmp1 = kOneInEveryByte * (0x7F + n) - w;
      56             :   // Has high bit set in every w byte greater than m.
      57      184737 :   word_t tmp2 = w + kOneInEveryByte * (0x7F - m);
      58      184737 :   return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));
      59             : }
      60             : 
      61             : template <bool is_lower>
      62        6870 : int FastAsciiConvert(char* dst, const char* src, int length,
      63             :                      bool* changed_out) {
      64             : #ifdef DEBUG
      65             :   char* saved_dst = dst;
      66             : #endif
      67             :   const char* saved_src = src;
      68             :   DisallowHeapAllocation no_gc;
      69             :   // We rely on the distance between upper and lower case letters
      70             :   // being a known power of 2.
      71             :   DCHECK_EQ('a' - 'A', 1 << 5);
      72             :   // Boundaries for the range of input characters than require conversion.
      73             :   static const char lo = is_lower ? 'A' - 1 : 'a' - 1;
      74             :   static const char hi = is_lower ? 'Z' + 1 : 'z' + 1;
      75             :   bool changed = false;
      76        6870 :   const char* const limit = src + length;
      77             : 
      78             :   // dst is newly allocated and always aligned.
      79             :   DCHECK(IsAligned(reinterpret_cast<Address>(dst), sizeof(word_t)));
      80             :   // Only attempt processing one word at a time if src is also aligned.
      81       13740 :   if (IsAligned(reinterpret_cast<Address>(src), sizeof(word_t))) {
      82             :     // Process the prefix of the input that requires no conversion one aligned
      83             :     // (machine) word at a time.
      84       15420 :     while (src <= limit - sizeof(word_t)) {
      85        8065 :       const word_t w = *reinterpret_cast<const word_t*>(src);
      86        8065 :       if ((w & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
      87        7984 :       if (AsciiRangeMask(w, lo, hi) != 0) {
      88             :         changed = true;
      89             :         break;
      90             :       }
      91        4491 :       *reinterpret_cast<word_t*>(dst) = w;
      92        4491 :       src += sizeof(word_t);
      93        4491 :       dst += sizeof(word_t);
      94             :     }
      95             :     // Process the remainder of the input performing conversion when
      96             :     // required one word at a time.
      97      359863 :     while (src <= limit - sizeof(word_t)) {
      98      176780 :       const word_t w = *reinterpret_cast<const word_t*>(src);
      99      176780 :       if ((w & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
     100             :       word_t m = AsciiRangeMask(w, lo, hi);
     101             :       // The mask has high (7th) bit set in every byte that needs
     102             :       // conversion and we know that the distance between cases is
     103             :       // 1 << 5.
     104      176753 :       *reinterpret_cast<word_t*>(dst) = w ^ (m >> 2);
     105      176753 :       src += sizeof(word_t);
     106      176753 :       dst += sizeof(word_t);
     107             :     }
     108             :   }
     109             :   // Process the last few bytes of the input (or the whole input if
     110             :   // unaligned access is not supported).
     111      243308 :   while (src < limit) {
     112      118395 :     char c = *src;
     113      118395 :     if ((c & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
     114      118273 :     if (lo < c && c < hi) {
     115       25178 :       c ^= (1 << 5);
     116             :       changed = true;
     117             :     }
     118      118273 :     *dst = c;
     119      118273 :     ++src;
     120      118273 :     ++dst;
     121             :   }
     122             : 
     123             :   DCHECK(
     124             :       CheckFastAsciiConvert(saved_dst, saved_src, length, changed, is_lower));
     125             : 
     126        6640 :   *changed_out = changed;
     127        6640 :   return length;
     128             : }
     129             : 
     130             : template int FastAsciiConvert<false>(char* dst, const char* src, int length,
     131             :                                      bool* changed_out);
     132             : template int FastAsciiConvert<true>(char* dst, const char* src, int length,
     133             :                                     bool* changed_out);
     134             : 
     135             : }  // namespace internal
     136      120216 : }  // namespace v8

Generated by: LCOV version 1.10