LCOV - code coverage report
Current view: top level - src/objects - js-list-format.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 114 139 82.0 %
Date: 2019-02-19 Functions: 13 15 86.7 %

          Line data    Source code
       1             : // Copyright 2018 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_INTL_SUPPORT
       6             : #error Internationalization is expected to be enabled.
       7             : #endif  // V8_INTL_SUPPORT
       8             : 
       9             : #include "src/objects/js-list-format.h"
      10             : 
      11             : #include <memory>
      12             : #include <vector>
      13             : 
      14             : #include "src/elements-inl.h"
      15             : #include "src/elements.h"
      16             : #include "src/heap/factory.h"
      17             : #include "src/isolate.h"
      18             : #include "src/objects-inl.h"
      19             : #include "src/objects/intl-objects.h"
      20             : #include "src/objects/js-array-inl.h"
      21             : #include "src/objects/js-list-format-inl.h"
      22             : #include "src/objects/managed.h"
      23             : #include "unicode/fieldpos.h"
      24             : #include "unicode/fpositer.h"
      25             : #include "unicode/listformatter.h"
      26             : #include "unicode/ulistformatter.h"
      27             : 
      28             : namespace v8 {
      29             : namespace internal {
      30             : 
      31             : namespace {
      32             : const char* kStandard = "standard";
      33             : const char* kOr = "or";
      34             : const char* kUnit = "unit";
      35             : const char* kStandardShort = "standard-short";
      36             : const char* kUnitShort = "unit-short";
      37             : const char* kUnitNarrow = "unit-narrow";
      38             : 
      39         900 : const char* GetIcuStyleString(JSListFormat::Style style,
      40             :                               JSListFormat::Type type) {
      41         900 :   switch (type) {
      42             :     case JSListFormat::Type::CONJUNCTION:
      43         513 :       switch (style) {
      44             :         case JSListFormat::Style::LONG:
      45         414 :           return kStandard;
      46             :         case JSListFormat::Style::SHORT:
      47          99 :           return kStandardShort;
      48             :         // NARROW is now not allowed if type is not unit
      49             :         // It is impossible to reach because we've already thrown a RangeError
      50             :         // when style is "narrow" and type is not "unit".
      51             :         case JSListFormat::Style::NARROW:
      52             :         case JSListFormat::Style::COUNT:
      53           0 :           UNREACHABLE();
      54             :       }
      55             :     case JSListFormat::Type::DISJUNCTION:
      56         162 :       switch (style) {
      57             :         // Currently, ListFormat::createInstance on "or-short"
      58             :         // will fail so we use "or" here.
      59             :         // See https://unicode.org/cldr/trac/ticket/11254
      60             :         // TODO(ftang): change to return kOr or kOrShort depend on
      61             :         // style after the above issue fixed in CLDR/ICU.
      62             :         // CLDR bug: https://unicode.org/cldr/trac/ticket/11254
      63             :         // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014
      64             :         case JSListFormat::Style::LONG:
      65             :         case JSListFormat::Style::SHORT:
      66         162 :           return kOr;
      67             :         // NARROW is now not allowed if type is not unit
      68             :         // It is impossible to reach because we've already thrown a RangeError
      69             :         // when style is "narrow" and type is not "unit".
      70             :         case JSListFormat::Style::NARROW:
      71             :         case JSListFormat::Style::COUNT:
      72           0 :           UNREACHABLE();
      73             :       }
      74             :     case JSListFormat::Type::UNIT:
      75         225 :       switch (style) {
      76             :         case JSListFormat::Style::LONG:
      77          99 :           return kUnit;
      78             :         case JSListFormat::Style::SHORT:
      79          63 :           return kUnitShort;
      80             :         case JSListFormat::Style::NARROW:
      81          63 :           return kUnitNarrow;
      82             :         case JSListFormat::Style::COUNT:
      83           0 :           UNREACHABLE();
      84             :       }
      85             :     case JSListFormat::Type::COUNT:
      86           0 :       UNREACHABLE();
      87             :   }
      88           0 : }
      89             : 
      90             : }  // namespace
      91             : 
      92           0 : JSListFormat::Style get_style(const char* str) {
      93           0 :   switch (str[0]) {
      94             :     case 'n':
      95           0 :       if (strcmp(&str[1], "arrow") == 0) return JSListFormat::Style::NARROW;
      96             :       break;
      97             :     case 'l':
      98           0 :       if (strcmp(&str[1], "ong") == 0) return JSListFormat::Style::LONG;
      99             :       break;
     100             :     case 's':
     101           0 :       if (strcmp(&str[1], "hort") == 0) return JSListFormat::Style::SHORT;
     102             :       break;
     103             :   }
     104           0 :   UNREACHABLE();
     105             : }
     106             : 
     107           0 : JSListFormat::Type get_type(const char* str) {
     108           0 :   switch (str[0]) {
     109             :     case 'c':
     110           0 :       if (strcmp(&str[1], "onjunction") == 0)
     111             :         return JSListFormat::Type::CONJUNCTION;
     112             :       break;
     113             :     case 'd':
     114           0 :       if (strcmp(&str[1], "isjunction") == 0)
     115             :         return JSListFormat::Type::DISJUNCTION;
     116             :       break;
     117             :     case 'u':
     118           0 :       if (strcmp(&str[1], "nit") == 0) return JSListFormat::Type::UNIT;
     119             :       break;
     120             :   }
     121           0 :   UNREACHABLE();
     122             : }
     123             : 
     124        1013 : MaybeHandle<JSListFormat> JSListFormat::Initialize(
     125             :     Isolate* isolate, Handle<JSListFormat> list_format, Handle<Object> locales,
     126             :     Handle<Object> input_options) {
     127             :   list_format->set_flags(0);
     128             : 
     129             :   Handle<JSReceiver> options;
     130             :   // 3. Let requestedLocales be ? CanonicalizeLocaleList(locales).
     131             :   Maybe<std::vector<std::string>> maybe_requested_locales =
     132        1013 :       Intl::CanonicalizeLocaleList(isolate, locales);
     133        1013 :   MAYBE_RETURN(maybe_requested_locales, Handle<JSListFormat>());
     134             :   std::vector<std::string> requested_locales =
     135        1004 :       maybe_requested_locales.FromJust();
     136             : 
     137             :   // 4. If options is undefined, then
     138        2008 :   if (input_options->IsUndefined(isolate)) {
     139             :     // 4. a. Let options be ObjectCreate(null).
     140         207 :     options = isolate->factory()->NewJSObjectWithNullProto();
     141             :     // 5. Else
     142             :   } else {
     143             :     // 5. a. Let options be ? ToObject(options).
     144        1594 :     ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
     145             :                                Object::ToObject(isolate, input_options),
     146             :                                JSListFormat);
     147             :   }
     148             : 
     149             :   // Note: No need to create a record. It's not observable.
     150             :   // 6. Let opt be a new Record.
     151             : 
     152             :   // 7. Let matcher be ? GetOption(options, "localeMatcher", "string", «
     153             :   // "lookup", "best fit" », "best fit").
     154             :   Maybe<Intl::MatcherOption> maybe_locale_matcher =
     155        1004 :       Intl::GetLocaleMatcher(isolate, options, "Intl.ListFormat");
     156        1004 :   MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSListFormat>());
     157             : 
     158             :   // 8. Set opt.[[localeMatcher]] to matcher.
     159             :   Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
     160             : 
     161             :   // 10. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]],
     162             :   // requestedLocales, opt, undefined, localeData).
     163             :   Intl::ResolvedLocale r =
     164             :       Intl::ResolveLocale(isolate, JSListFormat::GetAvailableLocales(),
     165        3012 :                           requested_locales, matcher, {});
     166             : 
     167             :   // 11. Set listFormat.[[Locale]] to r.[[Locale]].
     168             :   Handle<String> locale_str =
     169        1004 :       isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
     170        1004 :   list_format->set_locale(*locale_str);
     171             : 
     172             :   // 12. Let t be GetOption(options, "type", "string", «"conjunction",
     173             :   //    "disjunction", "unit"», "conjunction").
     174             :   Maybe<Type> maybe_type = Intl::GetStringOption<Type>(
     175             :       isolate, options, "type", "Intl.ListFormat",
     176             :       {"conjunction", "disjunction", "unit"},
     177        3012 :       {Type::CONJUNCTION, Type::DISJUNCTION, Type::UNIT}, Type::CONJUNCTION);
     178        1004 :   MAYBE_RETURN(maybe_type, MaybeHandle<JSListFormat>());
     179             :   Type type_enum = maybe_type.FromJust();
     180             : 
     181             :   // 13. Set listFormat.[[Type]] to t.
     182         995 :   list_format->set_type(type_enum);
     183             : 
     184             :   // NOTE: Keep the old way of GetOptions on style for now. I discover a
     185             :   // disadvantage of following the lastest spec and propose to rollback that
     186             :   // part in https://github.com/tc39/proposal-intl-list-format/pull/40
     187             : 
     188             :   // Let s be ? GetOption(options, "style", "string",
     189             :   //                          «"long", "short", "narrow"», "long").
     190             :   Maybe<Style> maybe_style = Intl::GetStringOption<Style>(
     191             :       isolate, options, "style", "Intl.ListFormat", {"long", "short", "narrow"},
     192        2985 :       {Style::LONG, Style::SHORT, Style::NARROW}, Style::LONG);
     193         995 :   MAYBE_RETURN(maybe_style, MaybeHandle<JSListFormat>());
     194             :   Style style_enum = maybe_style.FromJust();
     195             : 
     196             :   // If _style_ is `"narrow"` and _type_ is not `"unit"`, throw a *RangeError*
     197             :   // exception.
     198         986 :   if (style_enum == Style::NARROW && type_enum != Type::UNIT) {
     199          86 :     THROW_NEW_ERROR(
     200             :         isolate, NewRangeError(MessageTemplate::kIllegalTypeWhileStyleNarrow),
     201             :         JSListFormat);
     202             :   }
     203             : 
     204             :   // 17. Set listFormat.[[Style]] to s.
     205         900 :   list_format->set_style(style_enum);
     206             : 
     207        1800 :   icu::Locale icu_locale = r.icu_locale;
     208         900 :   UErrorCode status = U_ZERO_ERROR;
     209             :   icu::ListFormatter* formatter = icu::ListFormatter::createInstance(
     210         900 :       icu_locale, GetIcuStyleString(style_enum, type_enum), status);
     211         900 :   if (U_FAILURE(status)) {
     212           0 :     delete formatter;
     213           0 :     FATAL("Failed to create ICU list formatter, are ICU data files missing?");
     214             :   }
     215         900 :   CHECK_NOT_NULL(formatter);
     216             : 
     217             :   Handle<Managed<icu::ListFormatter>> managed_formatter =
     218         900 :       Managed<icu::ListFormatter>::FromRawPtr(isolate, 0, formatter);
     219             : 
     220         900 :   list_format->set_icu_formatter(*managed_formatter);
     221         900 :   return list_format;
     222             : }
     223             : 
     224             : // ecma402 #sec-intl.pluralrules.prototype.resolvedoptions
     225         315 : Handle<JSObject> JSListFormat::ResolvedOptions(Isolate* isolate,
     226             :                                                Handle<JSListFormat> format) {
     227             :   Factory* factory = isolate->factory();
     228             :   // 4. Let options be ! ObjectCreate(%ObjectPrototype%).
     229         315 :   Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
     230             : 
     231             :   // 5.  For each row of Table 1, except the header row, do
     232             :   //  Table 1: Resolved Options of ListFormat Instances
     233             :   //  Internal Slot    Property
     234             :   //  [[Locale]]       "locale"
     235             :   //  [[Type]]         "type"
     236             :   //  [[Style]]        "style"
     237         630 :   Handle<String> locale(format->locale(), isolate);
     238             :   JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
     239         315 :                         NONE);
     240             :   JSObject::AddProperty(isolate, result, factory->type_string(),
     241         630 :                         format->TypeAsString(), NONE);
     242             :   JSObject::AddProperty(isolate, result, factory->style_string(),
     243         630 :                         format->StyleAsString(), NONE);
     244             :   // 6. Return options.
     245         315 :   return result;
     246             : }
     247             : 
     248         315 : Handle<String> JSListFormat::StyleAsString() const {
     249         315 :   switch (style()) {
     250             :     case Style::LONG:
     251         450 :       return GetReadOnlyRoots().long_string_handle();
     252             :     case Style::SHORT:
     253         144 :       return GetReadOnlyRoots().short_string_handle();
     254             :     case Style::NARROW:
     255          36 :       return GetReadOnlyRoots().narrow_string_handle();
     256             :     case Style::COUNT:
     257           0 :       UNREACHABLE();
     258             :   }
     259           0 : }
     260             : 
     261         315 : Handle<String> JSListFormat::TypeAsString() const {
     262         315 :   switch (type()) {
     263             :     case Type::CONJUNCTION:
     264         378 :       return GetReadOnlyRoots().conjunction_string_handle();
     265             :     case Type::DISJUNCTION:
     266         108 :       return GetReadOnlyRoots().disjunction_string_handle();
     267             :     case Type::UNIT:
     268         144 :       return GetReadOnlyRoots().unit_string_handle();
     269             :     case Type::COUNT:
     270           0 :       UNREACHABLE();
     271             :   }
     272           0 : }
     273             : 
     274             : namespace {
     275             : 
     276        1899 : MaybeHandle<JSArray> GenerateListFormatParts(
     277             :     Isolate* isolate, const icu::UnicodeString& formatted,
     278        1899 :     const std::vector<icu::FieldPosition>& positions) {
     279             :   Factory* factory = isolate->factory();
     280             :   Handle<JSArray> array =
     281        1899 :       factory->NewJSArray(static_cast<int>(positions.size()));
     282             :   int index = 0;
     283             :   int prev_item_end_index = 0;
     284             :   Handle<String> substring;
     285        8478 :   for (const icu::FieldPosition pos : positions) {
     286        4680 :     CHECK(pos.getBeginIndex() >= prev_item_end_index);
     287        4680 :     CHECK(pos.getField() == ULISTFMT_ELEMENT_FIELD);
     288        4680 :     if (pos.getBeginIndex() != prev_item_end_index) {
     289        5760 :       ASSIGN_RETURN_ON_EXCEPTION(
     290             :           isolate, substring,
     291             :           Intl::ToString(isolate, formatted, prev_item_end_index,
     292             :                          pos.getBeginIndex()),
     293             :           JSArray);
     294             :       Intl::AddElement(isolate, array, index++, factory->literal_string(),
     295        2880 :                        substring);
     296             :     }
     297        9360 :     ASSIGN_RETURN_ON_EXCEPTION(
     298             :         isolate, substring,
     299             :         Intl::ToString(isolate, formatted, pos.getBeginIndex(),
     300             :                        pos.getEndIndex()),
     301             :         JSArray);
     302             :     Intl::AddElement(isolate, array, index++, factory->element_string(),
     303        4680 :                      substring);
     304        4680 :     prev_item_end_index = pos.getEndIndex();
     305        4680 :   }
     306        1899 :   if (prev_item_end_index != formatted.length()) {
     307           0 :     ASSIGN_RETURN_ON_EXCEPTION(
     308             :         isolate, substring,
     309             :         Intl::ToString(isolate, formatted, prev_item_end_index,
     310             :                        formatted.length()),
     311             :         JSArray);
     312             :     Intl::AddElement(isolate, array, index++, factory->literal_string(),
     313           0 :                      substring);
     314             :   }
     315        1899 :   return array;
     316             : }
     317             : 
     318             : // Get all the FieldPosition into a vector from FieldPositionIterator and return
     319             : // them in output order.
     320        1899 : std::vector<icu::FieldPosition> GenerateFieldPosition(
     321             :     icu::FieldPositionIterator iter) {
     322             :   std::vector<icu::FieldPosition> positions;
     323        1899 :   icu::FieldPosition pos;
     324        9459 :   while (iter.next(pos)) {
     325             :     // Only take the information of the ULISTFMT_ELEMENT_FIELD field.
     326        7560 :     if (pos.getField() == ULISTFMT_ELEMENT_FIELD) {
     327        4680 :       positions.push_back(pos);
     328             :     }
     329             :   }
     330             :   // Because the format may reoder the items, ICU FieldPositionIterator
     331             :   // keep the order for FieldPosition based on the order of the input items.
     332             :   // But the formatToParts API in ECMA402 expects in formatted output order.
     333             :   // Therefore we have to sort based on beginIndex of the FieldPosition.
     334             :   // Example of such is in the "ur" (Urdu) locale with type: "unit", where the
     335             :   // main text flows from right to left, the formatted list of unit should flow
     336             :   // from left to right and therefore in the memory the formatted result will
     337             :   // put the first item on the last in the result string according the current
     338             :   // CLDR patterns.
     339             :   // See 'listPattern' pattern in
     340             :   // third_party/icu/source/data/locales/ur_IN.txt
     341             :   std::sort(positions.begin(), positions.end(),
     342             :             [](icu::FieldPosition a, icu::FieldPosition b) {
     343             :               return a.getBeginIndex() < b.getBeginIndex();
     344        1899 :             });
     345        1899 :   return positions;
     346             : }
     347             : 
     348             : // Extract String from JSArray into array of UnicodeString
     349        6444 : Maybe<std::vector<icu::UnicodeString>> ToUnicodeStringArray(
     350             :     Isolate* isolate, Handle<JSArray> array) {
     351             :   Factory* factory = isolate->factory();
     352             :   // In general, ElementsAccessor::Get actually isn't guaranteed to give us the
     353             :   // elements in order. But if it is a holey array, it will cause the exception
     354             :   // with the IsString check.
     355        6444 :   auto* accessor = array->GetElementsAccessor();
     356       12888 :   uint32_t length = accessor->NumberOfElements(*array);
     357             : 
     358             :   // ecma402 #sec-createpartsfromlist
     359             :   // 2. If list contains any element value such that Type(value) is not String,
     360             :   // throw a TypeError exception.
     361             :   //
     362             :   // Per spec it looks like we're supposed to throw a TypeError exception if the
     363             :   // item isn't already a string, rather than coercing to a string.
     364             :   std::vector<icu::UnicodeString> result;
     365      195966 :   for (uint32_t i = 0; i < length; i++) {
     366             :     DCHECK(accessor->HasElement(*array, i));
     367      384624 :     Handle<Object> item = accessor->Get(array, i);
     368             :     DCHECK(!item.is_null());
     369      384624 :     if (!item->IsString()) {
     370       11160 :       THROW_NEW_ERROR_RETURN_VALUE(
     371             :           isolate,
     372             :           NewTypeError(MessageTemplate::kArrayItemNotType,
     373             :                        factory->list_string(),
     374             :                        // TODO(ftang): For dictionary-mode arrays, i isn't
     375             :                        // actually the index in the array but the index in the
     376             :                        // dictionary.
     377             :                        factory->NewNumber(i), factory->String_string()),
     378             :           Nothing<std::vector<icu::UnicodeString>>());
     379             :     }
     380             :     result.push_back(
     381      379044 :         Intl::ToICUUnicodeString(isolate, Handle<String>::cast(item)));
     382             :   }
     383             :   DCHECK(!array->HasDictionaryElements());
     384        6444 :   return Just(result);
     385             : }
     386             : 
     387             : }  // namespace
     388             : 
     389             : // ecma402 #sec-formatlist
     390        3159 : MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate,
     391             :                                              Handle<JSListFormat> format,
     392             :                                              Handle<JSArray> list) {
     393             :   DCHECK(!list->IsUndefined());
     394             :   // ecma402 #sec-createpartsfromlist
     395             :   // 2. If list contains any element value such that Type(value) is not String,
     396             :   // throw a TypeError exception.
     397             :   Maybe<std::vector<icu::UnicodeString>> maybe_array =
     398        3159 :       ToUnicodeStringArray(isolate, list);
     399        3159 :   MAYBE_RETURN(maybe_array, Handle<String>());
     400        1755 :   std::vector<icu::UnicodeString> array = maybe_array.FromJust();
     401             : 
     402        3510 :   icu::ListFormatter* formatter = format->icu_formatter()->raw();
     403        1755 :   CHECK_NOT_NULL(formatter);
     404             : 
     405        1755 :   UErrorCode status = U_ZERO_ERROR;
     406        1755 :   icu::UnicodeString formatted;
     407        1755 :   formatter->format(array.data(), static_cast<int32_t>(array.size()), formatted,
     408        1755 :                     status);
     409             :   DCHECK(U_SUCCESS(status));
     410             : 
     411        1755 :   return Intl::ToString(isolate, formatted);
     412             : }
     413             : 
     414          41 : const std::set<std::string>& JSListFormat::GetAvailableLocales() {
     415             :   // Since ListFormatter does not have a method to list all supported
     416             :   // locales, use the one in icu::Locale per comments in
     417             :   // ICU FR at https://unicode-org.atlassian.net/browse/ICU-20015
     418        1045 :   return Intl::GetAvailableLocalesForLocale();
     419             : }
     420             : 
     421             : // ecma42 #sec-formatlisttoparts
     422        3285 : MaybeHandle<JSArray> JSListFormat::FormatListToParts(
     423             :     Isolate* isolate, Handle<JSListFormat> format, Handle<JSArray> list) {
     424             :   DCHECK(!list->IsUndefined());
     425             :   // ecma402 #sec-createpartsfromlist
     426             :   // 2. If list contains any element value such that Type(value) is not String,
     427             :   // throw a TypeError exception.
     428             :   Maybe<std::vector<icu::UnicodeString>> maybe_array =
     429        3285 :       ToUnicodeStringArray(isolate, list);
     430        3285 :   MAYBE_RETURN(maybe_array, Handle<JSArray>());
     431        1899 :   std::vector<icu::UnicodeString> array = maybe_array.FromJust();
     432             : 
     433        3798 :   icu::ListFormatter* formatter = format->icu_formatter()->raw();
     434        1899 :   CHECK_NOT_NULL(formatter);
     435             : 
     436        1899 :   UErrorCode status = U_ZERO_ERROR;
     437        1899 :   icu::UnicodeString formatted;
     438        3798 :   icu::FieldPositionIterator iter;
     439        1899 :   formatter->format(array.data(), static_cast<int32_t>(array.size()), formatted,
     440        1899 :                     &iter, status);
     441             :   DCHECK(U_SUCCESS(status));
     442             : 
     443        3798 :   std::vector<icu::FieldPosition> field_positions = GenerateFieldPosition(iter);
     444        1899 :   return GenerateListFormatParts(isolate, formatted, field_positions);
     445             : }
     446             : }  // namespace internal
     447      178779 : }  // namespace v8

Generated by: LCOV version 1.10