LCOV - code coverage report
Current view: top level - src - intl.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 114 133 85.7 %
Date: 2017-04-26 Functions: 15 20 75.0 %

          Line data    Source code
       1             : // Copyright 2013 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_INTL_SUPPORT
       6             : #error Internationalization is expected to be enabled.
       7             : #endif  // V8_INTL_SUPPORT
       8             : 
       9             : #include "src/intl.h"
      10             : 
      11             : #include <memory>
      12             : 
      13             : #include "src/factory.h"
      14             : #include "src/isolate.h"
      15             : #include "src/objects-inl.h"
      16             : #include "src/string-case.h"
      17             : #include "unicode/calendar.h"
      18             : #include "unicode/gregocal.h"
      19             : #include "unicode/timezone.h"
      20             : #include "unicode/uchar.h"
      21             : #include "unicode/uvernum.h"
      22             : #include "unicode/uversion.h"
      23             : 
      24             : #if U_ICU_VERSION_MAJOR_NUM >= 59
      25             : #include "unicode/char16ptr.h"
      26             : #endif
      27             : 
      28             : namespace v8 {
      29             : namespace internal {
      30             : 
      31             : namespace {
      32      200090 : inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
      33             : 
      34             : const uint8_t kToLower[256] = {
      35             :     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
      36             :     0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
      37             :     0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
      38             :     0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
      39             :     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
      40             :     0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
      41             :     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
      42             :     0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
      43             :     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
      44             :     0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
      45             :     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
      46             :     0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
      47             :     0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
      48             :     0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
      49             :     0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
      50             :     0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
      51             :     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
      52             :     0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
      53             :     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
      54             :     0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
      55             :     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
      56             :     0xFC, 0xFD, 0xFE, 0xFF,
      57             : };
      58             : 
      59             : inline uint16_t ToLatin1Lower(uint16_t ch) {
      60        4943 :   return static_cast<uint16_t>(kToLower[ch]);
      61             : }
      62             : 
      63             : inline uint16_t ToASCIIUpper(uint16_t ch) {
      64           0 :   return ch & ~((ch >= 'a' && ch <= 'z') << 5);
      65             : }
      66             : 
      67             : // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
      68             : inline uint16_t ToLatin1Upper(uint16_t ch) {
      69             :   DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
      70             :   return ch &
      71        2113 :          ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xF7))
      72        2113 :            << 5);
      73             : }
      74             : 
      75             : template <typename Char>
      76           0 : bool ToUpperFastASCII(const Vector<const Char>& src,
      77             :                       Handle<SeqOneByteString> result) {
      78             :   // Do a faster loop for the case where all the characters are ASCII.
      79             :   uint16_t ored = 0;
      80             :   int32_t index = 0;
      81           0 :   for (auto it = src.begin(); it != src.end(); ++it) {
      82           0 :     uint16_t ch = static_cast<uint16_t>(*it);
      83           0 :     ored |= ch;
      84           0 :     result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
      85             :   }
      86           0 :   return !(ored & ~0x7F);
      87             : }
      88             : 
      89             : const uint16_t sharp_s = 0xDF;
      90             : 
      91             : template <typename Char>
      92        2467 : bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest,
      93             :                     int* sharp_s_count) {
      94             :   // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
      95             : 
      96             :   // There are two special cases.
      97             :   //  1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
      98             :   //  2. Lower case sharp-S converts to "SS" (two characters)
      99         329 :   *sharp_s_count = 0;
     100        3618 :   for (auto it = src.begin(); it != src.end(); ++it) {
     101        1581 :     uint16_t ch = static_cast<uint16_t>(*it);
     102        1581 :     if (V8_UNLIKELY(ch == sharp_s)) {
     103         160 :       ++(*sharp_s_count);
     104         160 :       continue;
     105             :     }
     106        1421 :     if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
     107             :       // Since this upper-cased character does not fit in an 8-bit string, we
     108             :       // need to take the 16-bit path.
     109             :       return false;
     110             :     }
     111        2640 :     *dest++ = ToLatin1Upper(ch);
     112             :   }
     113             : 
     114             :   return true;
     115             : }
     116             : 
     117             : template <typename Char>
     118        1213 : void ToUpperWithSharpS(const Vector<const Char>& src,
     119             :                        Handle<SeqOneByteString> result) {
     120             :   int32_t dest_index = 0;
     121        2166 :   for (auto it = src.begin(); it != src.end(); ++it) {
     122         953 :     uint16_t ch = static_cast<uint16_t>(*it);
     123         953 :     if (ch == sharp_s) {
     124         160 :       result->SeqOneByteStringSet(dest_index++, 'S');
     125         160 :       result->SeqOneByteStringSet(dest_index++, 'S');
     126             :     } else {
     127         793 :       result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
     128             :     }
     129             :   }
     130         130 : }
     131             : 
     132      194510 : inline int FindFirstUpperOrNonAscii(Handle<String> s, int length) {
     133      203608 :   for (int index = 0; index < length; ++index) {
     134             :     uint16_t ch = s->Get(index);
     135      200090 :     if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
     136             :       return index;
     137             :     }
     138             :   }
     139             :   return length;
     140             : }
     141             : 
     142             : }  // namespace
     143             : 
     144      584292 : const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
     145             :                                     std::unique_ptr<uc16[]>* dest,
     146             :                                     int32_t length) {
     147             :   DCHECK(flat.IsFlat());
     148      292202 :   if (flat.IsOneByte()) {
     149      283982 :     if (!*dest) {
     150      283870 :       dest->reset(NewArray<uc16>(length));
     151             :       CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
     152             :     }
     153      283982 :     return reinterpret_cast<const UChar*>(dest->get());
     154             :   } else {
     155             :     return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
     156             :   }
     157             : }
     158             : 
     159        3233 : MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
     160             :                                           bool is_to_upper, const char* lang) {
     161        3233 :   auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
     162             :   int32_t src_length = s->length();
     163             :   int32_t dest_length = src_length;
     164             :   UErrorCode status;
     165             :   Handle<SeqTwoByteString> result;
     166        3233 :   std::unique_ptr<uc16[]> sap;
     167             : 
     168        3233 :   if (dest_length == 0) return isolate->heap()->empty_string();
     169             : 
     170             :   // This is not a real loop. It'll be executed only once (no overflow) or
     171             :   // twice (overflow).
     172         286 :   for (int i = 0; i < 2; ++i) {
     173             :     // Case conversion can increase the string length (e.g. sharp-S => SS) so
     174             :     // that we have to handle RangeError exceptions here.
     175        7038 :     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
     176             :         isolate, result, isolate->factory()->NewRawTwoByteString(dest_length));
     177             :     DisallowHeapAllocation no_gc;
     178             :     DCHECK(s->IsFlat());
     179        3519 :     String::FlatContent flat = s->GetFlatContent();
     180        3519 :     const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
     181        3519 :     status = U_ZERO_ERROR;
     182        3519 :     dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars()),
     183        3519 :                                  dest_length, src, src_length, lang, &status);
     184        3519 :     if (status != U_BUFFER_OVERFLOW_ERROR) break;
     185             :   }
     186             : 
     187             :   // In most cases, the output will fill the destination buffer completely
     188             :   // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
     189             :   // Only in rare cases, it'll be shorter than the destination buffer and
     190             :   // |result| has to be truncated.
     191             :   DCHECK(U_SUCCESS(status));
     192        3233 :   if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
     193             :     DCHECK(dest_length == result->length());
     194        3023 :     return *result;
     195             :   }
     196         210 :   if (U_SUCCESS(status)) {
     197             :     DCHECK(dest_length < result->length());
     198             :     return *Handle<SeqTwoByteString>::cast(
     199         420 :         SeqString::Truncate(result, dest_length));
     200             :   }
     201           0 :   return *s;
     202             : }
     203             : 
     204      201128 : MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) {
     205      201128 :   if (!s->HasOnlyOneByteChars()) {
     206             :     // Use a slower implementation for strings with characters beyond U+00FF.
     207        2105 :     return LocaleConvertCase(s, isolate, false, "");
     208             :   }
     209             : 
     210             :   int length = s->length();
     211             : 
     212             :   // We depend here on the invariant that the length of a Latin1
     213             :   // string is invariant under ToLowerCase, and the result always
     214             :   // fits in the Latin1 range in the *root locale*. It does not hold
     215             :   // for ToUpperCase even in the root locale.
     216             : 
     217             :   // Scan the string for uppercase and non-ASCII characters for strings
     218             :   // shorter than a machine-word without any memory allocation overhead.
     219             :   // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
     220             :   // to two parts, one for scanning the prefix with no change and the other for
     221             :   // handling ASCII-only characters.
     222             :   int index_to_first_unprocessed = length;
     223             :   const bool is_short = length < static_cast<int>(sizeof(uintptr_t));
     224      199023 :   if (is_short) {
     225      194510 :     index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
     226             :     // Nothing to do if the string is all ASCII with no uppercase.
     227      198028 :     if (index_to_first_unprocessed == length) return *s;
     228             :   }
     229             : 
     230             :   Handle<SeqOneByteString> result =
     231      391010 :       isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
     232             : 
     233             :   DisallowHeapAllocation no_gc;
     234             :   DCHECK(s->IsFlat());
     235      195505 :   String::FlatContent flat = s->GetFlatContent();
     236      195505 :   uint8_t* dest = result->GetChars();
     237      195505 :   if (flat.IsOneByte()) {
     238             :     const uint8_t* src = flat.ToOneByteVector().start();
     239      195505 :     bool has_changed_character = false;
     240             :     index_to_first_unprocessed = FastAsciiConvert<true>(
     241             :         reinterpret_cast<char*>(dest), reinterpret_cast<const char*>(src),
     242      195505 :         length, &has_changed_character);
     243             :     // If not ASCII, we keep the result up to index_to_first_unprocessed and
     244             :     // process the rest.
     245      195505 :     if (index_to_first_unprocessed == length)
     246      386892 :       return has_changed_character ? *result : *s;
     247             : 
     248        4943 :     for (int index = index_to_first_unprocessed; index < length; ++index) {
     249        9886 :       dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
     250             :     }
     251             :   } else {
     252           0 :     if (index_to_first_unprocessed == length) {
     253             :       DCHECK(!is_short);
     254           0 :       index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
     255             :     }
     256             :     // Nothing to do if the string is all ASCII with no uppercase.
     257           0 :     if (index_to_first_unprocessed == length) return *s;
     258             :     const uint16_t* src = flat.ToUC16Vector().start();
     259           0 :     CopyChars(dest, src, index_to_first_unprocessed);
     260           0 :     for (int index = index_to_first_unprocessed; index < length; ++index) {
     261           0 :       dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
     262             :     }
     263             :   }
     264             : 
     265        2059 :   return *result;
     266             : }
     267             : 
     268        8932 : MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) {
     269             :   int32_t length = s->length();
     270        8932 :   if (s->HasOnlyOneByteChars() && length > 0) {
     271             :     Handle<SeqOneByteString> result =
     272       17014 :         isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
     273             : 
     274             :     DCHECK(s->IsFlat());
     275             :     int sharp_s_count;
     276             :     bool is_result_single_byte;
     277             :     {
     278             :       DisallowHeapAllocation no_gc;
     279        8507 :       String::FlatContent flat = s->GetFlatContent();
     280        8507 :       uint8_t* dest = result->GetChars();
     281        8507 :       if (flat.IsOneByte()) {
     282             :         Vector<const uint8_t> src = flat.ToOneByteVector();
     283        8507 :         bool has_changed_character = false;
     284             :         int index_to_first_unprocessed =
     285             :             FastAsciiConvert<false>(reinterpret_cast<char*>(result->GetChars()),
     286             :                                     reinterpret_cast<const char*>(src.start()),
     287        8507 :                                     length, &has_changed_character);
     288        8507 :         if (index_to_first_unprocessed == length)
     289       16356 :           return has_changed_character ? *result : *s;
     290             :         // If not ASCII, we keep the result up to index_to_first_unprocessed and
     291             :         // process the rest.
     292             :         is_result_single_byte =
     293             :             ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
     294         658 :                            dest + index_to_first_unprocessed, &sharp_s_count);
     295             :       } else {
     296             :         DCHECK(flat.IsTwoByte());
     297           0 :         Vector<const uint16_t> src = flat.ToUC16Vector();
     298           0 :         if (ToUpperFastASCII(src, result)) return *result;
     299           0 :         is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
     300             :       }
     301             :     }
     302             : 
     303             :     // Go to the full Unicode path if there are characters whose uppercase
     304             :     // is beyond the Latin-1 range (cannot be represented in OneByteString).
     305         329 :     if (V8_UNLIKELY(!is_result_single_byte)) {
     306         101 :       return LocaleConvertCase(s, isolate, true, "");
     307             :     }
     308             : 
     309         326 :     if (sharp_s_count == 0) return *result;
     310             : 
     311             :     // We have sharp_s_count sharp-s characters, but the result is still
     312             :     // in the Latin-1 range.
     313         260 :     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
     314             :         isolate, result,
     315             :         isolate->factory()->NewRawOneByteString(length + sharp_s_count));
     316             :     DisallowHeapAllocation no_gc;
     317         130 :     String::FlatContent flat = s->GetFlatContent();
     318         130 :     if (flat.IsOneByte()) {
     319         130 :       ToUpperWithSharpS(flat.ToOneByteVector(), result);
     320             :     } else {
     321           0 :       ToUpperWithSharpS(flat.ToUC16Vector(), result);
     322             :     }
     323             : 
     324         130 :     return *result;
     325             :   }
     326             : 
     327         425 :   return LocaleConvertCase(s, isolate, true, "");
     328             : }
     329             : 
     330      206312 : MUST_USE_RESULT Object* ConvertCase(Handle<String> s, bool is_upper,
     331             :                                     Isolate* isolate) {
     332      206312 :   return is_upper ? ConvertToUpper(s, isolate) : ConvertToLower(s, isolate);
     333             : }
     334             : 
     335          42 : ICUTimezoneCache::ICUTimezoneCache() : timezone_(nullptr) { Clear(); }
     336             : 
     337          28 : ICUTimezoneCache::~ICUTimezoneCache() { Clear(); }
     338             : 
     339          28 : const char* ICUTimezoneCache::LocalTimezone(double time_ms) {
     340          28 :   bool is_dst = DaylightSavingsOffset(time_ms) != 0;
     341          28 :   char* name = is_dst ? dst_timezone_name_ : timezone_name_;
     342          28 :   if (name[0] == '\0') {
     343             :     icu::UnicodeString result;
     344          56 :     GetTimeZone()->getDisplayName(is_dst, icu::TimeZone::LONG, result);
     345             :     result += '\0';
     346             : 
     347          56 :     icu::CheckedArrayByteSink byte_sink(name, kMaxTimezoneChars);
     348          28 :     result.toUTF8(byte_sink);
     349          56 :     CHECK(!byte_sink.Overflowed());
     350             :   }
     351          28 :   return const_cast<const char*>(name);
     352             : }
     353             : 
     354           0 : icu::TimeZone* ICUTimezoneCache::GetTimeZone() {
     355         105 :   if (timezone_ == nullptr) {
     356          14 :     timezone_ = icu::TimeZone::createDefault();
     357             :   }
     358         105 :   return timezone_;
     359             : }
     360             : 
     361          77 : bool ICUTimezoneCache::GetOffsets(double time_ms, int32_t* raw_offset,
     362             :                                   int32_t* dst_offset) {
     363          77 :   UErrorCode status = U_ZERO_ERROR;
     364          77 :   GetTimeZone()->getOffset(time_ms, false, *raw_offset, *dst_offset, status);
     365         154 :   return U_SUCCESS(status);
     366             : }
     367             : 
     368          63 : double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) {
     369             :   int32_t raw_offset, dst_offset;
     370          63 :   if (!GetOffsets(time_ms, &raw_offset, &dst_offset)) return 0;
     371          63 :   return dst_offset;
     372             : }
     373             : 
     374          14 : double ICUTimezoneCache::LocalTimeOffset() {
     375             :   int32_t raw_offset, dst_offset;
     376          14 :   if (!GetOffsets(icu::Calendar::getNow(), &raw_offset, &dst_offset)) return 0;
     377          14 :   return raw_offset;
     378             : }
     379             : 
     380          14 : void ICUTimezoneCache::Clear() {
     381          28 :   delete timezone_;
     382          42 :   timezone_ = nullptr;
     383          42 :   timezone_name_[0] = '\0';
     384          42 :   dst_timezone_name_[0] = '\0';
     385          14 : }
     386             : 
     387             : }  // namespace internal
     388             : }  // namespace v8

Generated by: LCOV version 1.10