LCOV - code coverage report
Current view: top level - src/builtins - builtins-intl.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 99 105 94.3 %
Date: 2017-10-20 Functions: 11 14 78.6 %

          Line data    Source code
       1             : // Copyright 2017 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_INTL_SUPPORT
       6             : #error Internationalization is expected to be enabled.
       7             : #endif  // V8_INTL_SUPPORT
       8             : 
       9             : #include "src/builtins/builtins-intl.h"
      10             : #include "src/builtins/builtins-utils.h"
      11             : #include "src/builtins/builtins.h"
      12             : #include "src/intl.h"
      13             : #include "src/objects-inl.h"
      14             : #include "src/objects/intl-objects.h"
      15             : 
      16             : #include "unicode/decimfmt.h"
      17             : #include "unicode/fieldpos.h"
      18             : #include "unicode/fpositer.h"
      19             : #include "unicode/normalizer2.h"
      20             : #include "unicode/numfmt.h"
      21             : #include "unicode/ufieldpositer.h"
      22             : #include "unicode/unistr.h"
      23             : #include "unicode/ustring.h"
      24             : 
      25             : namespace v8 {
      26             : namespace internal {
      27             : 
      28       15189 : BUILTIN(StringPrototypeToUpperCaseIntl) {
      29             :   HandleScope scope(isolate);
      30       10486 :   TO_THIS_STRING(string, "String.prototype.toUpperCase");
      31        4883 :   string = String::Flatten(string);
      32        4883 :   return ConvertCase(string, true, isolate);
      33             : }
      34             : 
      35        6975 : BUILTIN(StringPrototypeNormalizeIntl) {
      36             :   HandleScope handle_scope(isolate);
      37        4650 :   TO_THIS_STRING(string, "String.prototype.normalize");
      38             : 
      39             :   Handle<Object> form_input = args.atOrUndefined(isolate, 1);
      40             :   const char* form_name;
      41             :   UNormalization2Mode form_mode;
      42        2325 :   if (form_input->IsUndefined(isolate)) {
      43             :     // default is FNC
      44             :     form_name = "nfc";
      45             :     form_mode = UNORM2_COMPOSE;
      46             :   } else {
      47             :     Handle<String> form;
      48        3908 :     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
      49             :                                        Object::ToString(isolate, form_input));
      50             : 
      51        1900 :     if (String::Equals(form, isolate->factory()->NFC_string())) {
      52             :       form_name = "nfc";
      53             :       form_mode = UNORM2_COMPOSE;
      54        1485 :     } else if (String::Equals(form, isolate->factory()->NFD_string())) {
      55             :       form_name = "nfc";
      56             :       form_mode = UNORM2_DECOMPOSE;
      57        1080 :     } else if (String::Equals(form, isolate->factory()->NFKC_string())) {
      58             :       form_name = "nfkc";
      59             :       form_mode = UNORM2_COMPOSE;
      60         666 :     } else if (String::Equals(form, isolate->factory()->NFKD_string())) {
      61             :       form_name = "nfkc";
      62             :       form_mode = UNORM2_DECOMPOSE;
      63             :     } else {
      64             :       Handle<String> valid_forms =
      65         108 :           isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
      66         216 :       THROW_NEW_ERROR_RETURN_FAILURE(
      67             :           isolate,
      68             :           NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
      69             :     }
      70             :   }
      71             : 
      72             :   int length = string->length();
      73        2217 :   string = String::Flatten(string);
      74        2217 :   icu::UnicodeString result;
      75        2217 :   std::unique_ptr<uc16[]> sap;
      76        2217 :   UErrorCode status = U_ZERO_ERROR;
      77             :   {
      78             :     DisallowHeapAllocation no_gc;
      79        2217 :     String::FlatContent flat = string->GetFlatContent();
      80        2217 :     const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
      81        4434 :     icu::UnicodeString input(false, src, length);
      82             :     // Getting a singleton. Should not free it.
      83             :     const icu::Normalizer2* normalizer =
      84        2217 :         icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
      85             :     DCHECK(U_SUCCESS(status));
      86        2217 :     CHECK_NOT_NULL(normalizer);
      87             :     int32_t normalized_prefix_length =
      88        2217 :         normalizer->spanQuickCheckYes(input, status);
      89             :     // Quick return if the input is already normalized.
      90        3471 :     if (length == normalized_prefix_length) return *string;
      91             :     icu::UnicodeString unnormalized =
      92        1926 :         input.tempSubString(normalized_prefix_length);
      93             :     // Read-only alias of the normalized prefix.
      94        1926 :     result.setTo(false, input.getBuffer(), normalized_prefix_length);
      95             :     // copy-on-write; normalize the suffix and append to |result|.
      96        1926 :     normalizer->normalizeSecondAndAppend(result, unnormalized, status);
      97             :   }
      98             : 
      99         963 :   if (U_FAILURE(status)) {
     100           0 :     return isolate->heap()->undefined_value();
     101             :   }
     102             : 
     103        3852 :   RETURN_RESULT_OR_FAILURE(
     104             :       isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
     105             :                    reinterpret_cast<const uint16_t*>(result.getBuffer()),
     106             :                    result.length())));
     107             : }
     108             : 
     109             : namespace {
     110             : 
     111             : // The list comes from third_party/icu/source/i18n/unicode/unum.h.
     112             : // They're mapped to NumberFormat part types mentioned throughout
     113             : // https://tc39.github.io/ecma402/#sec-partitionnumberpattern .
     114        1017 : Handle<String> IcuNumberFieldIdToNumberType(int32_t field_id, double number,
     115             :                                             Isolate* isolate) {
     116        1017 :   switch (static_cast<UNumberFormatFields>(field_id)) {
     117             :     case UNUM_INTEGER_FIELD:
     118         396 :       if (std::isfinite(number)) return isolate->factory()->integer_string();
     119          27 :       if (std::isnan(number)) return isolate->factory()->nan_string();
     120             :       return isolate->factory()->infinity_string();
     121             :     case UNUM_FRACTION_FIELD:
     122             :       return isolate->factory()->fraction_string();
     123             :     case UNUM_DECIMAL_SEPARATOR_FIELD:
     124             :       return isolate->factory()->decimal_string();
     125             :     case UNUM_GROUPING_SEPARATOR_FIELD:
     126             :       return isolate->factory()->group_string();
     127             :     case UNUM_CURRENCY_FIELD:
     128             :       return isolate->factory()->currency_string();
     129             :     case UNUM_PERCENT_FIELD:
     130             :       return isolate->factory()->percentSign_string();
     131             :     case UNUM_SIGN_FIELD:
     132             :       return number < 0 ? isolate->factory()->minusSign_string()
     133         108 :                         : isolate->factory()->plusSign_string();
     134             : 
     135             :     case UNUM_EXPONENT_SYMBOL_FIELD:
     136             :     case UNUM_EXPONENT_SIGN_FIELD:
     137             :     case UNUM_EXPONENT_FIELD:
     138             :       // We should never get these because we're not using any scientific
     139             :       // formatter.
     140           0 :       UNREACHABLE();
     141             :       return Handle<String>();
     142             : 
     143             :     case UNUM_PERMILL_FIELD:
     144             :       // We're not creating any permill formatter, and it's not even clear how
     145             :       // that would be possible with the ICU API.
     146           0 :       UNREACHABLE();
     147             :       return Handle<String>();
     148             : 
     149             :     default:
     150           0 :       UNREACHABLE();
     151             :       return Handle<String>();
     152             :   }
     153             : }
     154             : 
     155        1062 : bool AddElement(Handle<JSArray> array, int index,
     156             :                 Handle<String> field_type_string,
     157             :                 const icu::UnicodeString& formatted, int32_t begin, int32_t end,
     158             :                 Isolate* isolate) {
     159             :   HandleScope scope(isolate);
     160             :   Factory* factory = isolate->factory();
     161        1062 :   Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
     162             :   Handle<String> value;
     163             :   JSObject::AddProperty(element, factory->type_string(), field_type_string,
     164        1062 :                         NONE);
     165             : 
     166        1062 :   icu::UnicodeString field(formatted.tempSubStringBetween(begin, end));
     167        3186 :   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
     168             :       isolate, value,
     169             :       factory->NewStringFromTwoByte(Vector<const uint16_t>(
     170             :           reinterpret_cast<const uint16_t*>(field.getBuffer()),
     171             :           field.length())),
     172             :       false);
     173             : 
     174        1062 :   JSObject::AddProperty(element, factory->value_string(), value, NONE);
     175        3186 :   RETURN_ON_EXCEPTION_VALUE(
     176             :       isolate, JSObject::AddDataElement(array, index, element, NONE), false);
     177        1062 :   return true;
     178             : }
     179             : 
     180        2292 : bool cmp_NumberFormatSpan(const NumberFormatSpan& a,
     181             :                           const NumberFormatSpan& b) {
     182             :   // Regions that start earlier should be encountered earlier.
     183        2292 :   if (a.begin_pos < b.begin_pos) return true;
     184        2112 :   if (a.begin_pos > b.begin_pos) return false;
     185             :   // For regions that start in the same place, regions that last longer should
     186             :   // be encountered earlier.
     187         708 :   if (a.end_pos < b.end_pos) return false;
     188         204 :   if (a.end_pos > b.end_pos) return true;
     189             :   // For regions that are exactly the same, one of them must be the "literal"
     190             :   // backdrop we added, which has a field_id of -1, so consider higher field_ids
     191             :   // to be later.
     192         144 :   return a.field_id < b.field_id;
     193             : }
     194             : 
     195         270 : Object* FormatNumberToParts(Isolate* isolate, icu::NumberFormat* fmt,
     196             :                             double number) {
     197             :   Factory* factory = isolate->factory();
     198             : 
     199             :   icu::UnicodeString formatted;
     200         540 :   icu::FieldPositionIterator fp_iter;
     201         270 :   UErrorCode status = U_ZERO_ERROR;
     202         270 :   fmt->format(number, formatted, &fp_iter, status);
     203         270 :   if (U_FAILURE(status)) return isolate->heap()->undefined_value();
     204             : 
     205         270 :   Handle<JSArray> result = factory->NewJSArray(0);
     206             :   int32_t length = formatted.length();
     207         270 :   if (length == 0) return *result;
     208             : 
     209             :   std::vector<NumberFormatSpan> regions;
     210             :   // Add a "literal" backdrop for the entire string. This will be used if no
     211             :   // other region covers some part of the formatted string. It's possible
     212             :   // there's another field with exactly the same begin and end as this backdrop,
     213             :   // in which case the backdrop's field_id of -1 will give it lower priority.
     214         270 :   regions.push_back(NumberFormatSpan(-1, 0, formatted.length()));
     215             : 
     216             :   {
     217             :     icu::FieldPosition fp;
     218        1161 :     while (fp_iter.next(fp)) {
     219             :       regions.push_back(NumberFormatSpan(fp.getField(), fp.getBeginIndex(),
     220        1782 :                                          fp.getEndIndex()));
     221         270 :     }
     222             :   }
     223             : 
     224         270 :   std::vector<NumberFormatSpan> parts = FlattenRegionsToParts(&regions);
     225             : 
     226             :   int index = 0;
     227        1602 :   for (auto it = parts.begin(); it < parts.end(); it++) {
     228        1062 :     NumberFormatSpan part = *it;
     229             :     Handle<String> field_type_string =
     230             :         part.field_id == -1
     231             :             ? isolate->factory()->literal_string()
     232        1107 :             : IcuNumberFieldIdToNumberType(part.field_id, number, isolate);
     233        1062 :     if (!AddElement(result, index, field_type_string, formatted, part.begin_pos,
     234        1062 :                     part.end_pos, isolate)) {
     235           0 :       return isolate->heap()->undefined_value();
     236             :     }
     237        1062 :     ++index;
     238             :   }
     239         270 :   JSObject::ValidateElements(*result);
     240             : 
     241         540 :   return *result;
     242             : }
     243             : }  // namespace
     244             : 
     245             : // Flattens a list of possibly-overlapping "regions" to a list of
     246             : // non-overlapping "parts". At least one of the input regions must span the
     247             : // entire space of possible indexes. The regions parameter will sorted in-place
     248             : // according to some criteria; this is done for performance to avoid copying the
     249             : // input.
     250         306 : std::vector<NumberFormatSpan> FlattenRegionsToParts(
     251        2670 :     std::vector<NumberFormatSpan>* regions) {
     252             :   // The intention of this algorithm is that it's used to translate ICU "fields"
     253             :   // to JavaScript "parts" of a formatted string. Each ICU field and JavaScript
     254             :   // part has an integer field_id, which corresponds to something like "grouping
     255             :   // separator", "fraction", or "percent sign", and has a begin and end
     256             :   // position. Here's a diagram of:
     257             : 
     258             :   // var nf = new Intl.NumberFormat(['de'], {style:'currency',currency:'EUR'});
     259             :   // nf.formatToParts(123456.78);
     260             : 
     261             :   //               :       6
     262             :   //  input regions:    0000000211 7
     263             :   // ('-' means -1):    ------------
     264             :   // formatted string: "123.456,78 €"
     265             :   // output parts:      0006000211-7
     266             : 
     267             :   // To illustrate the requirements of this algorithm, here's a contrived and
     268             :   // convoluted example of inputs and expected outputs:
     269             : 
     270             :   //              :          4
     271             :   //              :      22 33    3
     272             :   //              :      11111   22
     273             :   // input regions:     0000000  111
     274             :   //              :     ------------
     275             :   // formatted string: "abcdefghijkl"
     276             :   // output parts:      0221340--231
     277             :   // (The characters in the formatted string are irrelevant to this function.)
     278             : 
     279             :   // We arrange the overlapping input regions like a mountain range where
     280             :   // smaller regions are "on top" of larger regions, and we output a birds-eye
     281             :   // view of the mountains, so that smaller regions take priority over larger
     282             :   // regions.
     283         306 :   std::sort(regions->begin(), regions->end(), cmp_NumberFormatSpan);
     284             :   std::vector<size_t> overlapping_region_index_stack;
     285             :   // At least one item in regions must be a region spanning the entire string.
     286             :   // Due to the sorting above, the first item in the vector will be one of them.
     287         612 :   overlapping_region_index_stack.push_back(0);
     288         306 :   NumberFormatSpan top_region = regions->at(0);
     289             :   size_t region_iterator = 1;
     290             :   int32_t entire_size = top_region.end_pos;
     291             : 
     292             :   std::vector<NumberFormatSpan> out_parts;
     293             : 
     294             :   // The "climber" is a cursor that advances from left to right climbing "up"
     295             :   // and "down" the mountains. Whenever the climber moves to the right, that
     296             :   // represents an item of output.
     297             :   int32_t climber = 0;
     298        1947 :   while (climber < entire_size) {
     299             :     int32_t next_region_begin_pos;
     300        1335 :     if (region_iterator < regions->size()) {
     301        1029 :       next_region_begin_pos = regions->at(region_iterator).begin_pos;
     302             :     } else {
     303             :       // finish off the rest of the input by proceeding to the end.
     304             :       next_region_begin_pos = entire_size;
     305             :     }
     306             : 
     307        1335 :     if (climber < next_region_begin_pos) {
     308        1308 :       while (top_region.end_pos < next_region_begin_pos) {
     309         309 :         if (climber < top_region.end_pos) {
     310             :           // step down
     311             :           out_parts.push_back(NumberFormatSpan(top_region.field_id, climber,
     312         231 :                                                top_region.end_pos));
     313             :           climber = top_region.end_pos;
     314             :         } else {
     315             :           // drop down
     316             :         }
     317             :         overlapping_region_index_stack.pop_back();
     318         618 :         top_region = regions->at(overlapping_region_index_stack.back());
     319             :       }
     320         999 :       if (climber < next_region_begin_pos) {
     321             :         // cross a plateau/mesa/valley
     322             :         out_parts.push_back(NumberFormatSpan(top_region.field_id, climber,
     323         999 :                                              next_region_begin_pos));
     324             :         climber = next_region_begin_pos;
     325             :       }
     326             :     }
     327        1335 :     if (region_iterator < regions->size()) {
     328        2058 :       overlapping_region_index_stack.push_back(region_iterator++);
     329        2058 :       top_region = regions->at(overlapping_region_index_stack.back());
     330             :     }
     331             :   }
     332         306 :   return out_parts;
     333             : }
     334             : 
     335         810 : BUILTIN(NumberFormatPrototypeFormatToParts) {
     336             :   const char* const method = "Intl.NumberFormat.prototype.formatToParts";
     337             :   HandleScope handle_scope(isolate);
     338         270 :   CHECK_RECEIVER(JSObject, number_format_holder, method);
     339             : 
     340             :   Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
     341             :   Handle<Object> tag =
     342         270 :       JSReceiver::GetDataProperty(number_format_holder, marker);
     343             :   Handle<String> expected_tag =
     344         270 :       isolate->factory()->NewStringFromStaticChars("numberformat");
     345         540 :   if (!(tag->IsString() && String::cast(*tag)->Equals(*expected_tag))) {
     346           0 :     THROW_NEW_ERROR_RETURN_FAILURE(
     347             :         isolate,
     348             :         NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
     349             :                      isolate->factory()->NewStringFromAsciiChecked(method),
     350             :                      number_format_holder));
     351             :   }
     352             : 
     353             :   Handle<Object> x;
     354         270 :   if (args.length() >= 2) {
     355         540 :     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, x,
     356             :                                        Object::ToNumber(args.at(1)));
     357             :   } else {
     358             :     x = isolate->factory()->nan_value();
     359             :   }
     360             : 
     361             :   icu::DecimalFormat* number_format =
     362         270 :       NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
     363         270 :   CHECK_NOT_NULL(number_format);
     364             : 
     365         270 :   Object* result = FormatNumberToParts(isolate, number_format, x->Number());
     366         270 :   return result;
     367             : }
     368             : 
     369             : }  // namespace internal
     370             : }  // namespace v8

Generated by: LCOV version 1.10