Line data Source code
1 : // Copyright 2018 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_INTL_SUPPORT
6 : #error Internationalization is expected to be enabled.
7 : #endif // V8_INTL_SUPPORT
8 :
9 : #include "src/objects/js-list-format.h"
10 :
11 : #include <memory>
12 : #include <vector>
13 :
14 : #include "src/elements-inl.h"
15 : #include "src/elements.h"
16 : #include "src/heap/factory.h"
17 : #include "src/isolate.h"
18 : #include "src/objects-inl.h"
19 : #include "src/objects/intl-objects.h"
20 : #include "src/objects/js-array-inl.h"
21 : #include "src/objects/js-list-format-inl.h"
22 : #include "src/objects/managed.h"
23 : #include "unicode/fieldpos.h"
24 : #include "unicode/fpositer.h"
25 : #include "unicode/listformatter.h"
26 : #include "unicode/ulistformatter.h"
27 :
28 : namespace v8 {
29 : namespace internal {
30 :
31 : namespace {
32 : const char* kStandard = "standard";
33 : const char* kOr = "or";
34 : const char* kUnit = "unit";
35 : const char* kStandardShort = "standard-short";
36 : const char* kOrShort = "or-short";
37 : const char* kUnitShort = "unit-short";
38 : const char* kStandardNarrow = "standard-narrow";
39 : const char* kOrNarrow = "or-narrow";
40 : const char* kUnitNarrow = "unit-narrow";
41 :
42 981 : const char* GetIcuStyleString(JSListFormat::Style style,
43 : JSListFormat::Type type) {
44 981 : switch (type) {
45 : case JSListFormat::Type::CONJUNCTION:
46 567 : switch (style) {
47 : case JSListFormat::Style::LONG:
48 414 : return kStandard;
49 : case JSListFormat::Style::SHORT:
50 99 : return kStandardShort;
51 : case JSListFormat::Style::NARROW:
52 54 : return kStandardNarrow;
53 : case JSListFormat::Style::COUNT:
54 0 : UNREACHABLE();
55 : }
56 : case JSListFormat::Type::DISJUNCTION:
57 189 : switch (style) {
58 : case JSListFormat::Style::LONG:
59 99 : return kOr;
60 : case JSListFormat::Style::SHORT:
61 63 : return kOrShort;
62 : case JSListFormat::Style::NARROW:
63 27 : return kOrNarrow;
64 : case JSListFormat::Style::COUNT:
65 0 : UNREACHABLE();
66 : }
67 : case JSListFormat::Type::UNIT:
68 225 : switch (style) {
69 : case JSListFormat::Style::LONG:
70 99 : return kUnit;
71 : case JSListFormat::Style::SHORT:
72 63 : return kUnitShort;
73 : case JSListFormat::Style::NARROW:
74 63 : return kUnitNarrow;
75 : case JSListFormat::Style::COUNT:
76 0 : UNREACHABLE();
77 : }
78 : case JSListFormat::Type::COUNT:
79 0 : UNREACHABLE();
80 : }
81 0 : }
82 :
83 : } // namespace
84 :
85 0 : JSListFormat::Style get_style(const char* str) {
86 0 : switch (str[0]) {
87 : case 'n':
88 0 : if (strcmp(&str[1], "arrow") == 0) return JSListFormat::Style::NARROW;
89 : break;
90 : case 'l':
91 0 : if (strcmp(&str[1], "ong") == 0) return JSListFormat::Style::LONG;
92 : break;
93 : case 's':
94 0 : if (strcmp(&str[1], "hort") == 0) return JSListFormat::Style::SHORT;
95 : break;
96 : }
97 0 : UNREACHABLE();
98 : }
99 :
100 0 : JSListFormat::Type get_type(const char* str) {
101 0 : switch (str[0]) {
102 : case 'c':
103 0 : if (strcmp(&str[1], "onjunction") == 0)
104 : return JSListFormat::Type::CONJUNCTION;
105 : break;
106 : case 'd':
107 0 : if (strcmp(&str[1], "isjunction") == 0)
108 : return JSListFormat::Type::DISJUNCTION;
109 : break;
110 : case 'u':
111 0 : if (strcmp(&str[1], "nit") == 0) return JSListFormat::Type::UNIT;
112 : break;
113 : }
114 0 : UNREACHABLE();
115 : }
116 :
117 1008 : MaybeHandle<JSListFormat> JSListFormat::Initialize(
118 : Isolate* isolate, Handle<JSListFormat> list_format, Handle<Object> locales,
119 : Handle<Object> input_options) {
120 : list_format->set_flags(0);
121 :
122 : Handle<JSReceiver> options;
123 : // 3. Let requestedLocales be ? CanonicalizeLocaleList(locales).
124 : Maybe<std::vector<std::string>> maybe_requested_locales =
125 1008 : Intl::CanonicalizeLocaleList(isolate, locales);
126 1008 : MAYBE_RETURN(maybe_requested_locales, Handle<JSListFormat>());
127 : std::vector<std::string> requested_locales =
128 999 : maybe_requested_locales.FromJust();
129 :
130 : // 4. If options is undefined, then
131 999 : if (input_options->IsUndefined(isolate)) {
132 : // 4. a. Let options be ObjectCreate(null).
133 207 : options = isolate->factory()->NewJSObjectWithNullProto();
134 : // 5. Else
135 : } else {
136 : // 5. a. Let options be ? ToObject(options).
137 1584 : ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
138 : Object::ToObject(isolate, input_options),
139 : JSListFormat);
140 : }
141 :
142 : // Note: No need to create a record. It's not observable.
143 : // 6. Let opt be a new Record.
144 :
145 : // 7. Let matcher be ? GetOption(options, "localeMatcher", "string", «
146 : // "lookup", "best fit" », "best fit").
147 : Maybe<Intl::MatcherOption> maybe_locale_matcher =
148 999 : Intl::GetLocaleMatcher(isolate, options, "Intl.ListFormat");
149 999 : MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSListFormat>());
150 :
151 : // 8. Set opt.[[localeMatcher]] to matcher.
152 : Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
153 :
154 : // 10. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]],
155 : // requestedLocales, opt, undefined, localeData).
156 : Intl::ResolvedLocale r =
157 : Intl::ResolveLocale(isolate, JSListFormat::GetAvailableLocales(),
158 2997 : requested_locales, matcher, {});
159 :
160 : // 11. Set listFormat.[[Locale]] to r.[[Locale]].
161 : Handle<String> locale_str =
162 999 : isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
163 999 : list_format->set_locale(*locale_str);
164 :
165 : // 12. Let t be GetOption(options, "type", "string", «"conjunction",
166 : // "disjunction", "unit"», "conjunction").
167 : Maybe<Type> maybe_type = Intl::GetStringOption<Type>(
168 : isolate, options, "type", "Intl.ListFormat",
169 : {"conjunction", "disjunction", "unit"},
170 2997 : {Type::CONJUNCTION, Type::DISJUNCTION, Type::UNIT}, Type::CONJUNCTION);
171 999 : MAYBE_RETURN(maybe_type, MaybeHandle<JSListFormat>());
172 : Type type_enum = maybe_type.FromJust();
173 :
174 : // 13. Set listFormat.[[Type]] to t.
175 990 : list_format->set_type(type_enum);
176 :
177 : // 14. Let s be ? GetOption(options, "style", "string",
178 : // «"long", "short", "narrow"», "long").
179 : Maybe<Style> maybe_style = Intl::GetStringOption<Style>(
180 : isolate, options, "style", "Intl.ListFormat", {"long", "short", "narrow"},
181 2970 : {Style::LONG, Style::SHORT, Style::NARROW}, Style::LONG);
182 990 : MAYBE_RETURN(maybe_style, MaybeHandle<JSListFormat>());
183 : Style style_enum = maybe_style.FromJust();
184 :
185 : // 15. Set listFormat.[[Style]] to s.
186 981 : list_format->set_style(style_enum);
187 :
188 1962 : icu::Locale icu_locale = r.icu_locale;
189 981 : UErrorCode status = U_ZERO_ERROR;
190 981 : icu::ListFormatter* formatter = icu::ListFormatter::createInstance(
191 981 : icu_locale, GetIcuStyleString(style_enum, type_enum), status);
192 981 : if (U_FAILURE(status)) {
193 0 : delete formatter;
194 0 : FATAL("Failed to create ICU list formatter, are ICU data files missing?");
195 : }
196 981 : CHECK_NOT_NULL(formatter);
197 :
198 : Handle<Managed<icu::ListFormatter>> managed_formatter =
199 981 : Managed<icu::ListFormatter>::FromRawPtr(isolate, 0, formatter);
200 :
201 981 : list_format->set_icu_formatter(*managed_formatter);
202 981 : return list_format;
203 : }
204 :
205 : // ecma402 #sec-intl.pluralrules.prototype.resolvedoptions
206 315 : Handle<JSObject> JSListFormat::ResolvedOptions(Isolate* isolate,
207 : Handle<JSListFormat> format) {
208 : Factory* factory = isolate->factory();
209 : // 4. Let options be ! ObjectCreate(%ObjectPrototype%).
210 315 : Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
211 :
212 : // 5. For each row of Table 1, except the header row, do
213 : // Table 1: Resolved Options of ListFormat Instances
214 : // Internal Slot Property
215 : // [[Locale]] "locale"
216 : // [[Type]] "type"
217 : // [[Style]] "style"
218 : Handle<String> locale(format->locale(), isolate);
219 315 : JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
220 315 : NONE);
221 630 : JSObject::AddProperty(isolate, result, factory->type_string(),
222 315 : format->TypeAsString(), NONE);
223 630 : JSObject::AddProperty(isolate, result, factory->style_string(),
224 315 : format->StyleAsString(), NONE);
225 : // 6. Return options.
226 315 : return result;
227 : }
228 :
229 315 : Handle<String> JSListFormat::StyleAsString() const {
230 315 : switch (style()) {
231 : case Style::LONG:
232 : return GetReadOnlyRoots().long_string_handle();
233 : case Style::SHORT:
234 : return GetReadOnlyRoots().short_string_handle();
235 : case Style::NARROW:
236 : return GetReadOnlyRoots().narrow_string_handle();
237 : case Style::COUNT:
238 0 : UNREACHABLE();
239 : }
240 0 : }
241 :
242 315 : Handle<String> JSListFormat::TypeAsString() const {
243 315 : switch (type()) {
244 : case Type::CONJUNCTION:
245 : return GetReadOnlyRoots().conjunction_string_handle();
246 : case Type::DISJUNCTION:
247 : return GetReadOnlyRoots().disjunction_string_handle();
248 : case Type::UNIT:
249 : return GetReadOnlyRoots().unit_string_handle();
250 : case Type::COUNT:
251 0 : UNREACHABLE();
252 : }
253 0 : }
254 :
255 : namespace {
256 :
257 : // Extract String from JSArray into array of UnicodeString
258 7686 : Maybe<std::vector<icu::UnicodeString>> ToUnicodeStringArray(
259 : Isolate* isolate, Handle<JSArray> array) {
260 : Factory* factory = isolate->factory();
261 : // In general, ElementsAccessor::Get actually isn't guaranteed to give us the
262 : // elements in order. But if it is a holey array, it will cause the exception
263 : // with the IsString check.
264 7686 : auto* accessor = array->GetElementsAccessor();
265 15372 : uint32_t length = accessor->NumberOfElements(*array);
266 :
267 : // ecma402 #sec-createpartsfromlist
268 : // 2. If list contains any element value such that Type(value) is not String,
269 : // throw a TypeError exception.
270 : //
271 : // Per spec it looks like we're supposed to throw a TypeError exception if the
272 : // item isn't already a string, rather than coercing to a string.
273 7686 : std::vector<icu::UnicodeString> result;
274 390186 : for (uint32_t i = 0; i < length; i++) {
275 : DCHECK(accessor->HasElement(*array, i));
276 389268 : Handle<Object> item = accessor->Get(array, i);
277 : DCHECK(!item.is_null());
278 194634 : if (!item->IsString()) {
279 13536 : THROW_NEW_ERROR_RETURN_VALUE(
280 : isolate,
281 : NewTypeError(MessageTemplate::kArrayItemNotType,
282 : factory->list_string(),
283 : // TODO(ftang): For dictionary-mode arrays, i isn't
284 : // actually the index in the array but the index in the
285 : // dictionary.
286 : factory->NewNumber(i), factory->String_string()),
287 : Nothing<std::vector<icu::UnicodeString>>());
288 : }
289 : result.push_back(
290 382500 : Intl::ToICUUnicodeString(isolate, Handle<String>::cast(item)));
291 : }
292 : DCHECK(!array->HasDictionaryElements());
293 : return Just(result);
294 : }
295 :
296 : template <typename T>
297 7686 : MaybeHandle<T> FormatListCommon(
298 : Isolate* isolate, Handle<JSListFormat> format, Handle<JSArray> list,
299 : MaybeHandle<T> (*formatToResult)(Isolate*, const icu::FormattedList&)) {
300 : DCHECK(!list->IsUndefined());
301 : // ecma402 #sec-createpartsfromlist
302 : // 2. If list contains any element value such that Type(value) is not String,
303 : // throw a TypeError exception.
304 : Maybe<std::vector<icu::UnicodeString>> maybe_array =
305 7686 : ToUnicodeStringArray(isolate, list);
306 7686 : MAYBE_RETURN(maybe_array, Handle<T>());
307 4302 : std::vector<icu::UnicodeString> array = maybe_array.FromJust();
308 :
309 : icu::ListFormatter* formatter = format->icu_formatter()->raw();
310 4302 : CHECK_NOT_NULL(formatter);
311 :
312 4302 : UErrorCode status = U_ZERO_ERROR;
313 : icu::FormattedList formatted = formatter->formatStringsToValue(
314 8604 : array.data(), static_cast<int32_t>(array.size()), status);
315 4302 : if (U_FAILURE(status)) {
316 0 : THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), T);
317 : }
318 4302 : return formatToResult(isolate, formatted);
319 : }
320 :
321 : // A helper function to convert the FormattedList to a
322 : // MaybeHandle<String> for the implementation of format.
323 2025 : MaybeHandle<String> FormattedToString(Isolate* isolate,
324 : const icu::FormattedList& formatted) {
325 2025 : UErrorCode status = U_ZERO_ERROR;
326 4050 : icu::UnicodeString result = formatted.toString(status);
327 2025 : if (U_FAILURE(status)) {
328 0 : THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
329 : }
330 2025 : return Intl::ToString(isolate, result);
331 : }
332 :
333 9099 : Handle<String> IcuFieldIdToType(Isolate* isolate, int32_t field_id) {
334 9099 : switch (field_id) {
335 : case ULISTFMT_LITERAL_FIELD:
336 : return isolate->factory()->literal_string();
337 : case ULISTFMT_ELEMENT_FIELD:
338 : return isolate->factory()->element_string();
339 : default:
340 0 : UNREACHABLE();
341 : // To prevent MSVC from issuing C4715 warning.
342 : return Handle<String>();
343 : }
344 : }
345 :
346 : // A helper function to convert the FormattedList to a
347 : // MaybeHandle<JSArray> for the implementation of formatToParts.
348 2277 : MaybeHandle<JSArray> FormattedToJSArray(Isolate* isolate,
349 : const icu::FormattedList& formatted) {
350 : Handle<JSArray> array = isolate->factory()->NewJSArray(0);
351 4554 : icu::ConstrainedFieldPosition cfpos;
352 2277 : cfpos.constrainCategory(UFIELD_CATEGORY_LIST);
353 : int index = 0;
354 2277 : UErrorCode status = U_ZERO_ERROR;
355 4554 : icu::UnicodeString string = formatted.toString(status);
356 : Handle<String> substring;
357 20475 : while (formatted.nextPosition(cfpos, status) && U_SUCCESS(status)) {
358 18198 : ASSIGN_RETURN_ON_EXCEPTION(
359 : isolate, substring,
360 : Intl::ToString(isolate, string, cfpos.getStart(), cfpos.getLimit()),
361 : JSArray);
362 9099 : Intl::AddElement(isolate, array, index++,
363 9099 : IcuFieldIdToType(isolate, cfpos.getField()), substring);
364 : }
365 2277 : if (U_FAILURE(status)) {
366 0 : THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), JSArray);
367 : }
368 2277 : JSObject::ValidateElements(*array);
369 2277 : return array;
370 : }
371 :
372 : } // namespace
373 :
374 : // ecma402 #sec-formatlist
375 3726 : MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate,
376 : Handle<JSListFormat> format,
377 : Handle<JSArray> list) {
378 3726 : return FormatListCommon<String>(isolate, format, list, FormattedToString);
379 : }
380 :
381 : // ecma42 #sec-formatlisttoparts
382 3960 : MaybeHandle<JSArray> JSListFormat::FormatListToParts(
383 : Isolate* isolate, Handle<JSListFormat> format, Handle<JSArray> list) {
384 3960 : return FormatListCommon<JSArray>(isolate, format, list, FormattedToJSArray);
385 : }
386 :
387 41 : const std::set<std::string>& JSListFormat::GetAvailableLocales() {
388 : // Since ListFormatter does not have a method to list all supported
389 : // locales, use the one in icu::Locale per comments in
390 : // ICU FR at https://unicode-org.atlassian.net/browse/ICU-20015
391 1040 : return Intl::GetAvailableLocalesForLocale();
392 : }
393 :
394 : } // namespace internal
395 121996 : } // namespace v8
|