Line data Source code
1 : // Copyright 2018 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_INTL_SUPPORT
6 : #error Internationalization is expected to be enabled.
7 : #endif // V8_INTL_SUPPORT
8 :
9 : #include "src/objects/js-list-format.h"
10 :
11 : #include <memory>
12 : #include <vector>
13 :
14 : #include "src/elements-inl.h"
15 : #include "src/elements.h"
16 : #include "src/heap/factory.h"
17 : #include "src/isolate.h"
18 : #include "src/objects-inl.h"
19 : #include "src/objects/intl-objects.h"
20 : #include "src/objects/js-array-inl.h"
21 : #include "src/objects/js-list-format-inl.h"
22 : #include "src/objects/managed.h"
23 : #include "unicode/fieldpos.h"
24 : #include "unicode/fpositer.h"
25 : #include "unicode/listformatter.h"
26 : #include "unicode/ulistformatter.h"
27 :
28 : namespace v8 {
29 : namespace internal {
30 :
31 : namespace {
32 : const char* kStandard = "standard";
33 : const char* kOr = "or";
34 : const char* kUnit = "unit";
35 : const char* kStandardShort = "standard-short";
36 : const char* kUnitShort = "unit-short";
37 : const char* kUnitNarrow = "unit-narrow";
38 :
39 900 : const char* GetIcuStyleString(JSListFormat::Style style,
40 : JSListFormat::Type type) {
41 900 : switch (type) {
42 : case JSListFormat::Type::CONJUNCTION:
43 513 : switch (style) {
44 : case JSListFormat::Style::LONG:
45 414 : return kStandard;
46 : case JSListFormat::Style::SHORT:
47 99 : return kStandardShort;
48 : // NARROW is now not allowed if type is not unit
49 : // It is impossible to reach because we've already thrown a RangeError
50 : // when style is "narrow" and type is not "unit".
51 : case JSListFormat::Style::NARROW:
52 : case JSListFormat::Style::COUNT:
53 0 : UNREACHABLE();
54 : }
55 : case JSListFormat::Type::DISJUNCTION:
56 162 : switch (style) {
57 : // Currently, ListFormat::createInstance on "or-short"
58 : // will fail so we use "or" here.
59 : // See https://unicode.org/cldr/trac/ticket/11254
60 : // TODO(ftang): change to return kOr or kOrShort depend on
61 : // style after the above issue fixed in CLDR/ICU.
62 : // CLDR bug: https://unicode.org/cldr/trac/ticket/11254
63 : // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014
64 : case JSListFormat::Style::LONG:
65 : case JSListFormat::Style::SHORT:
66 162 : return kOr;
67 : // NARROW is now not allowed if type is not unit
68 : // It is impossible to reach because we've already thrown a RangeError
69 : // when style is "narrow" and type is not "unit".
70 : case JSListFormat::Style::NARROW:
71 : case JSListFormat::Style::COUNT:
72 0 : UNREACHABLE();
73 : }
74 : case JSListFormat::Type::UNIT:
75 225 : switch (style) {
76 : case JSListFormat::Style::LONG:
77 99 : return kUnit;
78 : case JSListFormat::Style::SHORT:
79 63 : return kUnitShort;
80 : case JSListFormat::Style::NARROW:
81 63 : return kUnitNarrow;
82 : case JSListFormat::Style::COUNT:
83 0 : UNREACHABLE();
84 : }
85 : case JSListFormat::Type::COUNT:
86 0 : UNREACHABLE();
87 : }
88 0 : }
89 :
90 : } // namespace
91 :
92 0 : JSListFormat::Style get_style(const char* str) {
93 0 : switch (str[0]) {
94 : case 'n':
95 0 : if (strcmp(&str[1], "arrow") == 0) return JSListFormat::Style::NARROW;
96 : break;
97 : case 'l':
98 0 : if (strcmp(&str[1], "ong") == 0) return JSListFormat::Style::LONG;
99 : break;
100 : case 's':
101 0 : if (strcmp(&str[1], "hort") == 0) return JSListFormat::Style::SHORT;
102 : break;
103 : }
104 0 : UNREACHABLE();
105 : }
106 :
107 0 : JSListFormat::Type get_type(const char* str) {
108 0 : switch (str[0]) {
109 : case 'c':
110 0 : if (strcmp(&str[1], "onjunction") == 0)
111 : return JSListFormat::Type::CONJUNCTION;
112 : break;
113 : case 'd':
114 0 : if (strcmp(&str[1], "isjunction") == 0)
115 : return JSListFormat::Type::DISJUNCTION;
116 : break;
117 : case 'u':
118 0 : if (strcmp(&str[1], "nit") == 0) return JSListFormat::Type::UNIT;
119 : break;
120 : }
121 0 : UNREACHABLE();
122 : }
123 :
124 1013 : MaybeHandle<JSListFormat> JSListFormat::Initialize(
125 : Isolate* isolate, Handle<JSListFormat> list_format, Handle<Object> locales,
126 : Handle<Object> input_options) {
127 : list_format->set_flags(0);
128 :
129 : Handle<JSReceiver> options;
130 : // 3. Let requestedLocales be ? CanonicalizeLocaleList(locales).
131 : Maybe<std::vector<std::string>> maybe_requested_locales =
132 1013 : Intl::CanonicalizeLocaleList(isolate, locales);
133 1013 : MAYBE_RETURN(maybe_requested_locales, Handle<JSListFormat>());
134 : std::vector<std::string> requested_locales =
135 1004 : maybe_requested_locales.FromJust();
136 :
137 : // 4. If options is undefined, then
138 1004 : if (input_options->IsUndefined(isolate)) {
139 : // 4. a. Let options be ObjectCreate(null).
140 207 : options = isolate->factory()->NewJSObjectWithNullProto();
141 : // 5. Else
142 : } else {
143 : // 5. a. Let options be ? ToObject(options).
144 1594 : ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
145 : Object::ToObject(isolate, input_options),
146 : JSListFormat);
147 : }
148 :
149 : // Note: No need to create a record. It's not observable.
150 : // 6. Let opt be a new Record.
151 :
152 : // 7. Let matcher be ? GetOption(options, "localeMatcher", "string", «
153 : // "lookup", "best fit" », "best fit").
154 : Maybe<Intl::MatcherOption> maybe_locale_matcher =
155 1004 : Intl::GetLocaleMatcher(isolate, options, "Intl.ListFormat");
156 1004 : MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSListFormat>());
157 :
158 : // 8. Set opt.[[localeMatcher]] to matcher.
159 : Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
160 :
161 : // 10. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]],
162 : // requestedLocales, opt, undefined, localeData).
163 : Intl::ResolvedLocale r =
164 : Intl::ResolveLocale(isolate, JSListFormat::GetAvailableLocales(),
165 3012 : requested_locales, matcher, {});
166 :
167 : // 11. Set listFormat.[[Locale]] to r.[[Locale]].
168 : Handle<String> locale_str =
169 1004 : isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
170 1004 : list_format->set_locale(*locale_str);
171 :
172 : // 12. Let t be GetOption(options, "type", "string", «"conjunction",
173 : // "disjunction", "unit"», "conjunction").
174 : Maybe<Type> maybe_type = Intl::GetStringOption<Type>(
175 : isolate, options, "type", "Intl.ListFormat",
176 : {"conjunction", "disjunction", "unit"},
177 3012 : {Type::CONJUNCTION, Type::DISJUNCTION, Type::UNIT}, Type::CONJUNCTION);
178 1004 : MAYBE_RETURN(maybe_type, MaybeHandle<JSListFormat>());
179 : Type type_enum = maybe_type.FromJust();
180 :
181 : // 13. Set listFormat.[[Type]] to t.
182 995 : list_format->set_type(type_enum);
183 :
184 : // NOTE: Keep the old way of GetOptions on style for now. I discover a
185 : // disadvantage of following the lastest spec and propose to rollback that
186 : // part in https://github.com/tc39/proposal-intl-list-format/pull/40
187 :
188 : // Let s be ? GetOption(options, "style", "string",
189 : // «"long", "short", "narrow"», "long").
190 : Maybe<Style> maybe_style = Intl::GetStringOption<Style>(
191 : isolate, options, "style", "Intl.ListFormat", {"long", "short", "narrow"},
192 2985 : {Style::LONG, Style::SHORT, Style::NARROW}, Style::LONG);
193 995 : MAYBE_RETURN(maybe_style, MaybeHandle<JSListFormat>());
194 : Style style_enum = maybe_style.FromJust();
195 :
196 : // If _style_ is `"narrow"` and _type_ is not `"unit"`, throw a *RangeError*
197 : // exception.
198 986 : if (style_enum == Style::NARROW && type_enum != Type::UNIT) {
199 172 : THROW_NEW_ERROR(
200 : isolate, NewRangeError(MessageTemplate::kIllegalTypeWhileStyleNarrow),
201 : JSListFormat);
202 : }
203 :
204 : // 17. Set listFormat.[[Style]] to s.
205 900 : list_format->set_style(style_enum);
206 :
207 1800 : icu::Locale icu_locale = r.icu_locale;
208 900 : UErrorCode status = U_ZERO_ERROR;
209 900 : icu::ListFormatter* formatter = icu::ListFormatter::createInstance(
210 900 : icu_locale, GetIcuStyleString(style_enum, type_enum), status);
211 900 : if (U_FAILURE(status)) {
212 0 : delete formatter;
213 0 : FATAL("Failed to create ICU list formatter, are ICU data files missing?");
214 : }
215 900 : CHECK_NOT_NULL(formatter);
216 :
217 : Handle<Managed<icu::ListFormatter>> managed_formatter =
218 900 : Managed<icu::ListFormatter>::FromRawPtr(isolate, 0, formatter);
219 :
220 900 : list_format->set_icu_formatter(*managed_formatter);
221 900 : return list_format;
222 : }
223 :
224 : // ecma402 #sec-intl.pluralrules.prototype.resolvedoptions
225 315 : Handle<JSObject> JSListFormat::ResolvedOptions(Isolate* isolate,
226 : Handle<JSListFormat> format) {
227 : Factory* factory = isolate->factory();
228 : // 4. Let options be ! ObjectCreate(%ObjectPrototype%).
229 315 : Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
230 :
231 : // 5. For each row of Table 1, except the header row, do
232 : // Table 1: Resolved Options of ListFormat Instances
233 : // Internal Slot Property
234 : // [[Locale]] "locale"
235 : // [[Type]] "type"
236 : // [[Style]] "style"
237 : Handle<String> locale(format->locale(), isolate);
238 315 : JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
239 315 : NONE);
240 630 : JSObject::AddProperty(isolate, result, factory->type_string(),
241 315 : format->TypeAsString(), NONE);
242 630 : JSObject::AddProperty(isolate, result, factory->style_string(),
243 315 : format->StyleAsString(), NONE);
244 : // 6. Return options.
245 315 : return result;
246 : }
247 :
248 315 : Handle<String> JSListFormat::StyleAsString() const {
249 315 : switch (style()) {
250 : case Style::LONG:
251 : return GetReadOnlyRoots().long_string_handle();
252 : case Style::SHORT:
253 : return GetReadOnlyRoots().short_string_handle();
254 : case Style::NARROW:
255 : return GetReadOnlyRoots().narrow_string_handle();
256 : case Style::COUNT:
257 0 : UNREACHABLE();
258 : }
259 0 : }
260 :
261 315 : Handle<String> JSListFormat::TypeAsString() const {
262 315 : switch (type()) {
263 : case Type::CONJUNCTION:
264 : return GetReadOnlyRoots().conjunction_string_handle();
265 : case Type::DISJUNCTION:
266 : return GetReadOnlyRoots().disjunction_string_handle();
267 : case Type::UNIT:
268 : return GetReadOnlyRoots().unit_string_handle();
269 : case Type::COUNT:
270 0 : UNREACHABLE();
271 : }
272 0 : }
273 :
274 : namespace {
275 :
276 1899 : MaybeHandle<JSArray> GenerateListFormatParts(
277 : Isolate* isolate, const icu::UnicodeString& formatted,
278 : const std::vector<icu::FieldPosition>& positions) {
279 : Factory* factory = isolate->factory();
280 : Handle<JSArray> array =
281 1899 : factory->NewJSArray(static_cast<int>(positions.size()));
282 : int index = 0;
283 : int prev_item_end_index = 0;
284 : Handle<String> substring;
285 11259 : for (const icu::FieldPosition pos : positions) {
286 4680 : CHECK(pos.getBeginIndex() >= prev_item_end_index);
287 4680 : CHECK(pos.getField() == ULISTFMT_ELEMENT_FIELD);
288 4680 : if (pos.getBeginIndex() != prev_item_end_index) {
289 5760 : ASSIGN_RETURN_ON_EXCEPTION(
290 : isolate, substring,
291 : Intl::ToString(isolate, formatted, prev_item_end_index,
292 : pos.getBeginIndex()),
293 : JSArray);
294 2880 : Intl::AddElement(isolate, array, index++, factory->literal_string(),
295 2880 : substring);
296 : }
297 9360 : ASSIGN_RETURN_ON_EXCEPTION(
298 : isolate, substring,
299 : Intl::ToString(isolate, formatted, pos.getBeginIndex(),
300 : pos.getEndIndex()),
301 : JSArray);
302 4680 : Intl::AddElement(isolate, array, index++, factory->element_string(),
303 4680 : substring);
304 : prev_item_end_index = pos.getEndIndex();
305 : }
306 1899 : if (prev_item_end_index != formatted.length()) {
307 0 : ASSIGN_RETURN_ON_EXCEPTION(
308 : isolate, substring,
309 : Intl::ToString(isolate, formatted, prev_item_end_index,
310 : formatted.length()),
311 : JSArray);
312 : Intl::AddElement(isolate, array, index++, factory->literal_string(),
313 0 : substring);
314 : }
315 1899 : return array;
316 : }
317 :
318 : // Get all the FieldPosition into a vector from FieldPositionIterator and return
319 : // them in output order.
320 1899 : std::vector<icu::FieldPosition> GenerateFieldPosition(
321 : icu::FieldPositionIterator iter) {
322 : std::vector<icu::FieldPosition> positions;
323 1899 : icu::FieldPosition pos;
324 9459 : while (iter.next(pos)) {
325 : // Only take the information of the ULISTFMT_ELEMENT_FIELD field.
326 7560 : if (pos.getField() == ULISTFMT_ELEMENT_FIELD) {
327 4680 : positions.push_back(pos);
328 : }
329 : }
330 : // Because the format may reoder the items, ICU FieldPositionIterator
331 : // keep the order for FieldPosition based on the order of the input items.
332 : // But the formatToParts API in ECMA402 expects in formatted output order.
333 : // Therefore we have to sort based on beginIndex of the FieldPosition.
334 : // Example of such is in the "ur" (Urdu) locale with type: "unit", where the
335 : // main text flows from right to left, the formatted list of unit should flow
336 : // from left to right and therefore in the memory the formatted result will
337 : // put the first item on the last in the result string according the current
338 : // CLDR patterns.
339 : // See 'listPattern' pattern in
340 : // third_party/icu/source/data/locales/ur_IN.txt
341 : std::sort(positions.begin(), positions.end(),
342 : [](icu::FieldPosition a, icu::FieldPosition b) {
343 : return a.getBeginIndex() < b.getBeginIndex();
344 5868 : });
345 1899 : return positions;
346 : }
347 :
348 : // Extract String from JSArray into array of UnicodeString
349 6444 : Maybe<std::vector<icu::UnicodeString>> ToUnicodeStringArray(
350 : Isolate* isolate, Handle<JSArray> array) {
351 : Factory* factory = isolate->factory();
352 : // In general, ElementsAccessor::Get actually isn't guaranteed to give us the
353 : // elements in order. But if it is a holey array, it will cause the exception
354 : // with the IsString check.
355 6444 : auto* accessor = array->GetElementsAccessor();
356 12888 : uint32_t length = accessor->NumberOfElements(*array);
357 :
358 : // ecma402 #sec-createpartsfromlist
359 : // 2. If list contains any element value such that Type(value) is not String,
360 : // throw a TypeError exception.
361 : //
362 : // Per spec it looks like we're supposed to throw a TypeError exception if the
363 : // item isn't already a string, rather than coercing to a string.
364 6444 : std::vector<icu::UnicodeString> result;
365 385488 : for (uint32_t i = 0; i < length; i++) {
366 : DCHECK(accessor->HasElement(*array, i));
367 384624 : Handle<Object> item = accessor->Get(array, i);
368 : DCHECK(!item.is_null());
369 192312 : if (!item->IsString()) {
370 11160 : THROW_NEW_ERROR_RETURN_VALUE(
371 : isolate,
372 : NewTypeError(MessageTemplate::kArrayItemNotType,
373 : factory->list_string(),
374 : // TODO(ftang): For dictionary-mode arrays, i isn't
375 : // actually the index in the array but the index in the
376 : // dictionary.
377 : factory->NewNumber(i), factory->String_string()),
378 : Nothing<std::vector<icu::UnicodeString>>());
379 : }
380 : result.push_back(
381 379044 : Intl::ToICUUnicodeString(isolate, Handle<String>::cast(item)));
382 : }
383 : DCHECK(!array->HasDictionaryElements());
384 : return Just(result);
385 : }
386 :
387 : } // namespace
388 :
389 : // ecma402 #sec-formatlist
390 3159 : MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate,
391 : Handle<JSListFormat> format,
392 : Handle<JSArray> list) {
393 : DCHECK(!list->IsUndefined());
394 : // ecma402 #sec-createpartsfromlist
395 : // 2. If list contains any element value such that Type(value) is not String,
396 : // throw a TypeError exception.
397 : Maybe<std::vector<icu::UnicodeString>> maybe_array =
398 3159 : ToUnicodeStringArray(isolate, list);
399 3159 : MAYBE_RETURN(maybe_array, Handle<String>());
400 1755 : std::vector<icu::UnicodeString> array = maybe_array.FromJust();
401 :
402 : icu::ListFormatter* formatter = format->icu_formatter()->raw();
403 1755 : CHECK_NOT_NULL(formatter);
404 :
405 1755 : UErrorCode status = U_ZERO_ERROR;
406 1755 : icu::UnicodeString formatted;
407 : formatter->format(array.data(), static_cast<int32_t>(array.size()), formatted,
408 1755 : status);
409 : DCHECK(U_SUCCESS(status));
410 :
411 1755 : return Intl::ToString(isolate, formatted);
412 : }
413 :
414 41 : const std::set<std::string>& JSListFormat::GetAvailableLocales() {
415 : // Since ListFormatter does not have a method to list all supported
416 : // locales, use the one in icu::Locale per comments in
417 : // ICU FR at https://unicode-org.atlassian.net/browse/ICU-20015
418 1045 : return Intl::GetAvailableLocalesForLocale();
419 : }
420 :
421 : // ecma42 #sec-formatlisttoparts
422 3285 : MaybeHandle<JSArray> JSListFormat::FormatListToParts(
423 : Isolate* isolate, Handle<JSListFormat> format, Handle<JSArray> list) {
424 : DCHECK(!list->IsUndefined());
425 : // ecma402 #sec-createpartsfromlist
426 : // 2. If list contains any element value such that Type(value) is not String,
427 : // throw a TypeError exception.
428 : Maybe<std::vector<icu::UnicodeString>> maybe_array =
429 3285 : ToUnicodeStringArray(isolate, list);
430 3285 : MAYBE_RETURN(maybe_array, Handle<JSArray>());
431 1899 : std::vector<icu::UnicodeString> array = maybe_array.FromJust();
432 :
433 : icu::ListFormatter* formatter = format->icu_formatter()->raw();
434 1899 : CHECK_NOT_NULL(formatter);
435 :
436 1899 : UErrorCode status = U_ZERO_ERROR;
437 1899 : icu::UnicodeString formatted;
438 3798 : icu::FieldPositionIterator iter;
439 : formatter->format(array.data(), static_cast<int32_t>(array.size()), formatted,
440 1899 : &iter, status);
441 : DCHECK(U_SUCCESS(status));
442 :
443 3798 : std::vector<icu::FieldPosition> field_positions = GenerateFieldPosition(iter);
444 1899 : return GenerateListFormatParts(isolate, formatted, field_positions);
445 : }
446 : } // namespace internal
447 120216 : } // namespace v8
|