Line data Source code
1 : // Copyright 2018 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_INTL_SUPPORT
6 : #error Internationalization is expected to be enabled.
7 : #endif // V8_INTL_SUPPORT
8 :
9 : #include "src/objects/js-collator.h"
10 :
11 : #include "src/isolate.h"
12 : #include "src/objects-inl.h"
13 : #include "src/objects/js-collator-inl.h"
14 : #include "unicode/coll.h"
15 : #include "unicode/locid.h"
16 : #include "unicode/strenum.h"
17 : #include "unicode/ucol.h"
18 : #include "unicode/uloc.h"
19 :
20 : namespace v8 {
21 : namespace internal {
22 :
23 : namespace {
24 :
25 : enum class Usage {
26 : SORT,
27 : SEARCH,
28 : };
29 :
30 : enum class Sensitivity {
31 : kBase,
32 : kAccent,
33 : kCase,
34 : kVariant,
35 : kUndefined,
36 : };
37 :
38 : // TODO(gsathya): Consider internalizing the value strings.
39 2255 : void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
40 : Handle<String> key, const char* value) {
41 2255 : CHECK_NOT_NULL(value);
42 : Handle<String> value_str =
43 2255 : isolate->factory()->NewStringFromAsciiChecked(value);
44 :
45 : // This is a brand new JSObject that shouldn't already have the same
46 : // key so this shouldn't fail.
47 4510 : CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_str,
48 : Just(kDontThrow))
49 : .FromJust());
50 2255 : }
51 :
52 902 : void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
53 : Handle<String> key, bool value) {
54 902 : Handle<Object> value_obj = isolate->factory()->ToBoolean(value);
55 :
56 : // This is a brand new JSObject that shouldn't already have the same
57 : // key so this shouldn't fail.
58 1804 : CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_obj,
59 : Just(kDontThrow))
60 : .FromJust());
61 902 : }
62 :
63 : } // anonymous namespace
64 :
65 : // static
66 451 : Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
67 : Handle<JSCollator> collator) {
68 : Handle<JSObject> options =
69 451 : isolate->factory()->NewJSObject(isolate->object_function());
70 :
71 902 : icu::Collator* icu_collator = collator->icu_collator()->raw();
72 451 : CHECK_NOT_NULL(icu_collator);
73 :
74 451 : UErrorCode status = U_ZERO_ERROR;
75 : bool numeric =
76 451 : icu_collator->getAttribute(UCOL_NUMERIC_COLLATION, status) == UCOL_ON;
77 902 : CHECK(U_SUCCESS(status));
78 :
79 : const char* case_first = nullptr;
80 451 : status = U_ZERO_ERROR;
81 451 : switch (icu_collator->getAttribute(UCOL_CASE_FIRST, status)) {
82 : case UCOL_LOWER_FIRST:
83 : case_first = "lower";
84 : break;
85 : case UCOL_UPPER_FIRST:
86 : case_first = "upper";
87 0 : break;
88 : default:
89 : case_first = "false";
90 : }
91 902 : CHECK(U_SUCCESS(status));
92 :
93 : const char* sensitivity = nullptr;
94 451 : status = U_ZERO_ERROR;
95 451 : switch (icu_collator->getAttribute(UCOL_STRENGTH, status)) {
96 : case UCOL_PRIMARY: {
97 0 : CHECK(U_SUCCESS(status));
98 0 : status = U_ZERO_ERROR;
99 : // case level: true + s1 -> case, s1 -> base.
100 0 : if (UCOL_ON == icu_collator->getAttribute(UCOL_CASE_LEVEL, status)) {
101 : sensitivity = "case";
102 : } else {
103 : sensitivity = "base";
104 : }
105 0 : CHECK(U_SUCCESS(status));
106 : break;
107 : }
108 : case UCOL_SECONDARY:
109 : sensitivity = "accent";
110 : break;
111 : case UCOL_TERTIARY:
112 : sensitivity = "variant";
113 451 : break;
114 : case UCOL_QUATERNARY:
115 : // We shouldn't get quaternary and identical from ICU, but if we do
116 : // put them into variant.
117 : sensitivity = "variant";
118 0 : break;
119 : default:
120 : sensitivity = "variant";
121 : }
122 902 : CHECK(U_SUCCESS(status));
123 :
124 451 : status = U_ZERO_ERROR;
125 : bool ignore_punctuation = icu_collator->getAttribute(UCOL_ALTERNATE_HANDLING,
126 451 : status) == UCOL_SHIFTED;
127 902 : CHECK(U_SUCCESS(status));
128 :
129 451 : status = U_ZERO_ERROR;
130 :
131 451 : icu::Locale icu_locale(icu_collator->getLocale(ULOC_VALID_LOCALE, status));
132 902 : CHECK(U_SUCCESS(status));
133 :
134 : const char* collation = "default";
135 : const char* usage = "sort";
136 : const char* collation_key = "co";
137 451 : const char* legacy_collation_key = uloc_toLegacyKey(collation_key);
138 : DCHECK_NOT_NULL(legacy_collation_key);
139 :
140 : char legacy_collation_value[ULOC_FULLNAME_CAPACITY];
141 451 : status = U_ZERO_ERROR;
142 : int32_t length =
143 : icu_locale.getKeywordValue(legacy_collation_key, legacy_collation_value,
144 451 : ULOC_FULLNAME_CAPACITY, status);
145 :
146 : std::string locale;
147 451 : if (length > 0 && U_SUCCESS(status)) {
148 : const char* collation_value =
149 162 : uloc_toUnicodeLocaleType(collation_key, legacy_collation_value);
150 162 : CHECK_NOT_NULL(collation_value);
151 :
152 162 : if (strcmp(collation_value, "search") == 0) {
153 : usage = "search";
154 :
155 : // Search is disallowed as a collation value per spec. Let's
156 : // use `default`, instead.
157 : //
158 : // https://tc39.github.io/ecma402/#sec-properties-of-intl-collator-instances
159 : collation = "default";
160 :
161 : // We clone the icu::Locale because we don't want the
162 : // icu_collator to be affected when we remove the collation key
163 : // below.
164 72 : icu::Locale new_icu_locale = icu_locale;
165 :
166 : // The spec forbids the search as a collation value in the
167 : // locale tag, so let's filter it out.
168 72 : status = U_ZERO_ERROR;
169 72 : new_icu_locale.setKeywordValue(legacy_collation_key, nullptr, status);
170 144 : CHECK(U_SUCCESS(status));
171 :
172 216 : locale = Intl::ToLanguageTag(new_icu_locale).FromJust();
173 : } else {
174 : collation = collation_value;
175 270 : locale = Intl::ToLanguageTag(icu_locale).FromJust();
176 : }
177 : } else {
178 867 : locale = Intl::ToLanguageTag(icu_locale).FromJust();
179 : }
180 :
181 : // 5. For each row of Table 2, except the header row, in table order, do
182 : // ...
183 : // Table 2: Resolved Options of Collator Instances
184 : // Internal Slot Property Extension Key
185 : // [[Locale] "locale"
186 : // [[Usage] "usage"
187 : // [[Sensitivity]] "sensitivity"
188 : // [[IgnorePunctuation]] "ignorePunctuation"
189 : // [[Collation]] "collation"
190 : // [[Numeric]] "numeric" kn
191 : // [[CaseFirst]] "caseFirst" kf
192 : CreateDataPropertyForOptions(
193 451 : isolate, options, isolate->factory()->locale_string(), locale.c_str());
194 : CreateDataPropertyForOptions(isolate, options,
195 451 : isolate->factory()->usage_string(), usage);
196 : CreateDataPropertyForOptions(
197 451 : isolate, options, isolate->factory()->sensitivity_string(), sensitivity);
198 : CreateDataPropertyForOptions(isolate, options,
199 : isolate->factory()->ignorePunctuation_string(),
200 902 : ignore_punctuation);
201 : CreateDataPropertyForOptions(
202 451 : isolate, options, isolate->factory()->collation_string(), collation);
203 : CreateDataPropertyForOptions(isolate, options,
204 902 : isolate->factory()->numeric_string(), numeric);
205 : CreateDataPropertyForOptions(
206 451 : isolate, options, isolate->factory()->caseFirst_string(), case_first);
207 902 : return options;
208 : }
209 :
210 : namespace {
211 :
212 5 : Intl::CaseFirst ToCaseFirst(const char* str) {
213 5 : if (strcmp(str, "upper") == 0) return Intl::CaseFirst::kUpper;
214 5 : if (strcmp(str, "lower") == 0) return Intl::CaseFirst::kLower;
215 5 : if (strcmp(str, "false") == 0) return Intl::CaseFirst::kFalse;
216 0 : return Intl::CaseFirst::kUndefined;
217 : }
218 :
219 : UColAttributeValue ToUColAttributeValue(Intl::CaseFirst case_first) {
220 221 : switch (case_first) {
221 : case Intl::CaseFirst::kUpper:
222 : return UCOL_UPPER_FIRST;
223 : case Intl::CaseFirst::kLower:
224 : return UCOL_LOWER_FIRST;
225 : case Intl::CaseFirst::kFalse:
226 : case Intl::CaseFirst::kUndefined:
227 : return UCOL_OFF;
228 : }
229 : }
230 :
231 5 : void SetNumericOption(icu::Collator* icu_collator, bool numeric) {
232 5 : CHECK_NOT_NULL(icu_collator);
233 5 : UErrorCode status = U_ZERO_ERROR;
234 : icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
235 5 : numeric ? UCOL_ON : UCOL_OFF, status);
236 10 : CHECK(U_SUCCESS(status));
237 5 : }
238 :
239 221 : void SetCaseFirstOption(icu::Collator* icu_collator,
240 : Intl::CaseFirst case_first) {
241 221 : CHECK_NOT_NULL(icu_collator);
242 221 : UErrorCode status = U_ZERO_ERROR;
243 : icu_collator->setAttribute(UCOL_CASE_FIRST, ToUColAttributeValue(case_first),
244 442 : status);
245 442 : CHECK(U_SUCCESS(status));
246 221 : }
247 :
248 : } // anonymous namespace
249 :
250 : // static
251 6075 : MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate,
252 : Handle<JSCollator> collator,
253 : Handle<Object> locales,
254 : Handle<Object> options_obj) {
255 : // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
256 : Maybe<std::vector<std::string>> maybe_requested_locales =
257 6075 : Intl::CanonicalizeLocaleList(isolate, locales);
258 6075 : MAYBE_RETURN(maybe_requested_locales, Handle<JSCollator>());
259 : std::vector<std::string> requested_locales =
260 6075 : maybe_requested_locales.FromJust();
261 :
262 : // 2. If options is undefined, then
263 12150 : if (options_obj->IsUndefined(isolate)) {
264 : // 2. a. Let options be ObjectCreate(null).
265 5697 : options_obj = isolate->factory()->NewJSObjectWithNullProto();
266 : } else {
267 : // 3. Else
268 : // 3. a. Let options be ? ToObject(options).
269 756 : ASSIGN_RETURN_ON_EXCEPTION(
270 : isolate, options_obj,
271 : Object::ToObject(isolate, options_obj, "Intl.Collator"), JSCollator);
272 : }
273 :
274 : // At this point, options_obj can either be a JSObject or a JSProxy only.
275 6075 : Handle<JSReceiver> options = Handle<JSReceiver>::cast(options_obj);
276 :
277 : // 4. Let usage be ? GetOption(options, "usage", "string", « "sort",
278 : // "search" », "sort").
279 : Maybe<Usage> maybe_usage = Intl::GetStringOption<Usage>(
280 : isolate, options, "usage", "Intl.Collator", {"sort", "search"},
281 18225 : {Usage::SORT, Usage::SEARCH}, Usage::SORT);
282 6075 : MAYBE_RETURN(maybe_usage, MaybeHandle<JSCollator>());
283 : Usage usage = maybe_usage.FromJust();
284 :
285 : // 9. Let matcher be ? GetOption(options, "localeMatcher", "string",
286 : // « "lookup", "best fit" », "best fit").
287 : // 10. Set opt.[[localeMatcher]] to matcher.
288 : Maybe<Intl::MatcherOption> maybe_locale_matcher =
289 6075 : Intl::GetLocaleMatcher(isolate, options, "Intl.Collator");
290 6075 : MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSCollator>());
291 : Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
292 :
293 : // 11. Let numeric be ? GetOption(options, "numeric", "boolean",
294 : // undefined, undefined).
295 : // 12. If numeric is not undefined, then
296 : // a. Let numeric be ! ToString(numeric).
297 : //
298 : // Note: We omit the ToString(numeric) operation as it's not
299 : // observable. Intl::GetBoolOption returns a Boolean and
300 : // ToString(Boolean) is not side-effecting.
301 : //
302 : // 13. Set opt.[[kn]] to numeric.
303 : bool numeric;
304 : Maybe<bool> found_numeric = Intl::GetBoolOption(isolate, options, "numeric",
305 6075 : "Intl.Collator", &numeric);
306 6075 : MAYBE_RETURN(found_numeric, MaybeHandle<JSCollator>());
307 :
308 : // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string",
309 : // « "upper", "lower", "false" », undefined).
310 : Maybe<Intl::CaseFirst> maybe_case_first =
311 6075 : Intl::GetCaseFirst(isolate, options, "Intl.Collator");
312 6075 : MAYBE_RETURN(maybe_case_first, MaybeHandle<JSCollator>());
313 : Intl::CaseFirst case_first = maybe_case_first.FromJust();
314 :
315 : // The relevant unicode extensions accepted by Collator as specified here:
316 : // https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots
317 : //
318 : // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
319 12150 : std::set<std::string> relevant_extension_keys{"co", "kn", "kf"};
320 :
321 : // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]],
322 : // requestedLocales, opt, %Collator%.[[RelevantExtensionKeys]],
323 : // localeData).
324 : Intl::ResolvedLocale r =
325 : Intl::ResolveLocale(isolate, JSCollator::GetAvailableLocales(),
326 12150 : requested_locales, matcher, relevant_extension_keys);
327 :
328 : // 18. Set collator.[[Locale]] to r.[[locale]].
329 12150 : icu::Locale icu_locale = r.icu_locale;
330 : DCHECK(!icu_locale.isBogus());
331 :
332 : // 19. Let collation be r.[[co]].
333 :
334 : // 5. Set collator.[[Usage]] to usage.
335 : //
336 : // 6. If usage is "sort", then
337 : // a. Let localeData be %Collator%.[[SortLocaleData]].
338 : // 7. Else,
339 : // a. Let localeData be %Collator%.[[SearchLocaleData]].
340 : //
341 : // The Intl spec doesn't allow us to use "search" as an extension
342 : // value for collation as per:
343 : // https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots
344 : //
345 : // But the only way to pass the value "search" for collation from
346 : // the options object to ICU is to use the 'co' extension keyword.
347 : //
348 : // This will need to be filtered out when creating the
349 : // resolvedOptions object.
350 6075 : if (usage == Usage::SEARCH) {
351 81 : const char* key = uloc_toLegacyKey("co");
352 81 : CHECK_NOT_NULL(key);
353 81 : const char* value = uloc_toLegacyType(key, "search");
354 81 : CHECK_NOT_NULL(value);
355 81 : UErrorCode status = U_ZERO_ERROR;
356 81 : icu_locale.setKeywordValue(key, value, status);
357 162 : CHECK(U_SUCCESS(status));
358 : }
359 :
360 : // 20. If collation is null, let collation be "default".
361 : // 21. Set collator.[[Collation]] to collation.
362 : //
363 : // We don't store the collation value as per the above two steps
364 : // here. The collation value can be looked up from icu::Collator on
365 : // demand, as part of Intl.Collator.prototype.resolvedOptions.
366 :
367 6075 : UErrorCode status = U_ZERO_ERROR;
368 : std::unique_ptr<icu::Collator> icu_collator(
369 6075 : icu::Collator::createInstance(icu_locale, status));
370 6075 : if (U_FAILURE(status) || icu_collator.get() == nullptr) {
371 0 : status = U_ZERO_ERROR;
372 : // Remove extensions and try again.
373 0 : icu::Locale no_extension_locale(icu_locale.getBaseName());
374 : icu_collator.reset(
375 0 : icu::Collator::createInstance(no_extension_locale, status));
376 :
377 0 : if (U_FAILURE(status) || icu_collator.get() == nullptr) {
378 0 : FATAL("Failed to create ICU collator, are ICU data files missing?");
379 0 : }
380 : }
381 : DCHECK(U_SUCCESS(status));
382 6075 : CHECK_NOT_NULL(icu_collator.get());
383 :
384 : // 22. If relevantExtensionKeys contains "kn", then
385 : // a. Set collator.[[Numeric]] to ! SameValue(r.[[kn]], "true").
386 : //
387 : // If the numeric value is passed in through the options object,
388 : // then we use it. Otherwise, we check if the numeric value is
389 : // passed in through the unicode extensions.
390 6075 : status = U_ZERO_ERROR;
391 6075 : if (found_numeric.FromJust()) {
392 0 : SetNumericOption(icu_collator.get(), numeric);
393 : } else {
394 12150 : auto kn_extension_it = r.extensions.find("kn");
395 6075 : if (kn_extension_it != r.extensions.end()) {
396 10 : SetNumericOption(icu_collator.get(), (kn_extension_it->second == "true"));
397 : }
398 : }
399 :
400 : // 23. If relevantExtensionKeys contains "kf", then
401 : // a. Set collator.[[CaseFirst]] to r.[[kf]].
402 : //
403 : // If the caseFirst value is passed in through the options object,
404 : // then we use it. Otherwise, we check if the caseFirst value is
405 : // passed in through the unicode extensions.
406 6075 : if (case_first != Intl::CaseFirst::kUndefined) {
407 216 : SetCaseFirstOption(icu_collator.get(), case_first);
408 : } else {
409 11718 : auto kf_extension_it = r.extensions.find("kf");
410 5859 : if (kf_extension_it != r.extensions.end()) {
411 : SetCaseFirstOption(icu_collator.get(),
412 5 : ToCaseFirst(kf_extension_it->second.c_str()));
413 : }
414 : }
415 :
416 : // Normalization is always on, by the spec. We are free to optimize
417 : // if the strings are already normalized (but we don't have a way to tell
418 : // that right now).
419 6075 : status = U_ZERO_ERROR;
420 6075 : icu_collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
421 12150 : CHECK(U_SUCCESS(status));
422 :
423 : // 24. Let sensitivity be ? GetOption(options, "sensitivity",
424 : // "string", « "base", "accent", "case", "variant" », undefined).
425 : Maybe<Sensitivity> maybe_sensitivity = Intl::GetStringOption<Sensitivity>(
426 : isolate, options, "sensitivity", "Intl.Collator",
427 : {"base", "accent", "case", "variant"},
428 : {Sensitivity::kBase, Sensitivity::kAccent, Sensitivity::kCase,
429 : Sensitivity::kVariant},
430 18225 : Sensitivity::kUndefined);
431 6075 : MAYBE_RETURN(maybe_sensitivity, MaybeHandle<JSCollator>());
432 : Sensitivity sensitivity = maybe_sensitivity.FromJust();
433 :
434 : // 25. If sensitivity is undefined, then
435 6075 : if (sensitivity == Sensitivity::kUndefined) {
436 : // 25. a. If usage is "sort", then
437 6075 : if (usage == Usage::SORT) {
438 : // 25. a. i. Let sensitivity be "variant".
439 : sensitivity = Sensitivity::kVariant;
440 : }
441 : }
442 : // 26. Set collator.[[Sensitivity]] to sensitivity.
443 6075 : switch (sensitivity) {
444 : case Sensitivity::kBase:
445 0 : icu_collator->setStrength(icu::Collator::PRIMARY);
446 0 : break;
447 : case Sensitivity::kAccent:
448 0 : icu_collator->setStrength(icu::Collator::SECONDARY);
449 0 : break;
450 : case Sensitivity::kCase:
451 0 : icu_collator->setStrength(icu::Collator::PRIMARY);
452 0 : status = U_ZERO_ERROR;
453 0 : icu_collator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
454 0 : CHECK(U_SUCCESS(status));
455 : break;
456 : case Sensitivity::kVariant:
457 5994 : icu_collator->setStrength(icu::Collator::TERTIARY);
458 5994 : break;
459 : case Sensitivity::kUndefined:
460 : break;
461 : }
462 :
463 : // 27.Let ignorePunctuation be ? GetOption(options,
464 : // "ignorePunctuation", "boolean", undefined, false).
465 : bool ignore_punctuation;
466 : Maybe<bool> found_ignore_punctuation =
467 : Intl::GetBoolOption(isolate, options, "ignorePunctuation",
468 6075 : "Intl.Collator", &ignore_punctuation);
469 6075 : MAYBE_RETURN(found_ignore_punctuation, MaybeHandle<JSCollator>());
470 :
471 : // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
472 6075 : if (found_ignore_punctuation.FromJust() && ignore_punctuation) {
473 0 : status = U_ZERO_ERROR;
474 0 : icu_collator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
475 0 : CHECK(U_SUCCESS(status));
476 : }
477 :
478 : Handle<Managed<icu::Collator>> managed_collator =
479 : Managed<icu::Collator>::FromUniquePtr(isolate, 0,
480 12150 : std::move(icu_collator));
481 6075 : collator->set_icu_collator(*managed_collator);
482 :
483 : // 29. Return collator.
484 6075 : return collator;
485 : }
486 :
487 176 : const std::set<std::string>& JSCollator::GetAvailableLocales() {
488 : static base::LazyInstance<Intl::AvailableLocales<icu::Collator>>::type
489 : available_locales = LAZY_INSTANCE_INITIALIZER;
490 176 : return available_locales.Pointer()->Get();
491 : }
492 :
493 : } // namespace internal
494 178779 : } // namespace v8
|