Line data Source code
1 : // Copyright 2018 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_INTL_SUPPORT
6 : #error Internationalization is expected to be enabled.
7 : #endif // V8_INTL_SUPPORT
8 :
9 : #include "src/objects/js-collator.h"
10 :
11 : #include "src/isolate.h"
12 : #include "src/objects-inl.h"
13 : #include "src/objects/js-collator-inl.h"
14 : #include "unicode/coll.h"
15 : #include "unicode/locid.h"
16 : #include "unicode/strenum.h"
17 : #include "unicode/ucol.h"
18 : #include "unicode/uloc.h"
19 :
20 : namespace v8 {
21 : namespace internal {
22 :
23 : namespace {
24 :
25 : enum class Usage {
26 : SORT,
27 : SEARCH,
28 : };
29 :
30 : enum class Sensitivity {
31 : kBase,
32 : kAccent,
33 : kCase,
34 : kVariant,
35 : kUndefined,
36 : };
37 :
38 : // TODO(gsathya): Consider internalizing the value strings.
39 2255 : void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
40 : Handle<String> key, const char* value) {
41 2255 : CHECK_NOT_NULL(value);
42 : Handle<String> value_str =
43 2255 : isolate->factory()->NewStringFromAsciiChecked(value);
44 :
45 : // This is a brand new JSObject that shouldn't already have the same
46 : // key so this shouldn't fail.
47 4510 : CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_str,
48 : Just(kDontThrow))
49 : .FromJust());
50 2255 : }
51 :
52 902 : void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
53 : Handle<String> key, bool value) {
54 902 : Handle<Object> value_obj = isolate->factory()->ToBoolean(value);
55 :
56 : // This is a brand new JSObject that shouldn't already have the same
57 : // key so this shouldn't fail.
58 1804 : CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_obj,
59 : Just(kDontThrow))
60 : .FromJust());
61 902 : }
62 :
63 : } // anonymous namespace
64 :
65 : // static
66 451 : Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
67 : Handle<JSCollator> collator) {
68 : Handle<JSObject> options =
69 451 : isolate->factory()->NewJSObject(isolate->object_function());
70 :
71 : icu::Collator* icu_collator = collator->icu_collator()->raw();
72 451 : CHECK_NOT_NULL(icu_collator);
73 :
74 451 : UErrorCode status = U_ZERO_ERROR;
75 : bool numeric =
76 451 : icu_collator->getAttribute(UCOL_NUMERIC_COLLATION, status) == UCOL_ON;
77 451 : CHECK(U_SUCCESS(status));
78 :
79 : const char* case_first = nullptr;
80 451 : status = U_ZERO_ERROR;
81 451 : switch (icu_collator->getAttribute(UCOL_CASE_FIRST, status)) {
82 : case UCOL_LOWER_FIRST:
83 : case_first = "lower";
84 : break;
85 : case UCOL_UPPER_FIRST:
86 : case_first = "upper";
87 0 : break;
88 : default:
89 : case_first = "false";
90 : }
91 451 : CHECK(U_SUCCESS(status));
92 :
93 : const char* sensitivity = nullptr;
94 451 : status = U_ZERO_ERROR;
95 451 : switch (icu_collator->getAttribute(UCOL_STRENGTH, status)) {
96 : case UCOL_PRIMARY: {
97 0 : CHECK(U_SUCCESS(status));
98 0 : status = U_ZERO_ERROR;
99 : // case level: true + s1 -> case, s1 -> base.
100 0 : if (UCOL_ON == icu_collator->getAttribute(UCOL_CASE_LEVEL, status)) {
101 : sensitivity = "case";
102 : } else {
103 : sensitivity = "base";
104 : }
105 0 : CHECK(U_SUCCESS(status));
106 : break;
107 : }
108 : case UCOL_SECONDARY:
109 : sensitivity = "accent";
110 : break;
111 : case UCOL_TERTIARY:
112 : sensitivity = "variant";
113 451 : break;
114 : case UCOL_QUATERNARY:
115 : // We shouldn't get quaternary and identical from ICU, but if we do
116 : // put them into variant.
117 : sensitivity = "variant";
118 0 : break;
119 : default:
120 : sensitivity = "variant";
121 : }
122 451 : CHECK(U_SUCCESS(status));
123 :
124 451 : status = U_ZERO_ERROR;
125 451 : bool ignore_punctuation = icu_collator->getAttribute(UCOL_ALTERNATE_HANDLING,
126 902 : status) == UCOL_SHIFTED;
127 451 : CHECK(U_SUCCESS(status));
128 :
129 451 : status = U_ZERO_ERROR;
130 :
131 902 : icu::Locale icu_locale(icu_collator->getLocale(ULOC_VALID_LOCALE, status));
132 451 : CHECK(U_SUCCESS(status));
133 :
134 : const char* collation = "default";
135 : const char* usage = "sort";
136 : const char* collation_key = "co";
137 451 : status = U_ZERO_ERROR;
138 : std::string collation_value =
139 451 : icu_locale.getUnicodeKeywordValue<std::string>(collation_key, status);
140 :
141 : std::string locale;
142 451 : if (U_SUCCESS(status)) {
143 162 : if (collation_value == "search") {
144 : usage = "search";
145 :
146 : // Search is disallowed as a collation value per spec. Let's
147 : // use `default`, instead.
148 : //
149 : // https://tc39.github.io/ecma402/#sec-properties-of-intl-collator-instances
150 : collation = "default";
151 :
152 : // We clone the icu::Locale because we don't want the
153 : // icu_collator to be affected when we remove the collation key
154 : // below.
155 144 : icu::Locale new_icu_locale = icu_locale;
156 :
157 : // The spec forbids the search as a collation value in the
158 : // locale tag, so let's filter it out.
159 72 : status = U_ZERO_ERROR;
160 72 : new_icu_locale.setUnicodeKeywordValue(collation_key, nullptr, status);
161 72 : CHECK(U_SUCCESS(status));
162 :
163 216 : locale = Intl::ToLanguageTag(new_icu_locale).FromJust();
164 : } else {
165 : collation = collation_value.c_str();
166 270 : locale = Intl::ToLanguageTag(icu_locale).FromJust();
167 : }
168 : } else {
169 867 : locale = Intl::ToLanguageTag(icu_locale).FromJust();
170 : }
171 :
172 : // 5. For each row of Table 2, except the header row, in table order, do
173 : // ...
174 : // Table 2: Resolved Options of Collator Instances
175 : // Internal Slot Property Extension Key
176 : // [[Locale] "locale"
177 : // [[Usage] "usage"
178 : // [[Sensitivity]] "sensitivity"
179 : // [[IgnorePunctuation]] "ignorePunctuation"
180 : // [[Collation]] "collation"
181 : // [[Numeric]] "numeric" kn
182 : // [[CaseFirst]] "caseFirst" kf
183 : CreateDataPropertyForOptions(
184 451 : isolate, options, isolate->factory()->locale_string(), locale.c_str());
185 : CreateDataPropertyForOptions(isolate, options,
186 451 : isolate->factory()->usage_string(), usage);
187 : CreateDataPropertyForOptions(
188 451 : isolate, options, isolate->factory()->sensitivity_string(), sensitivity);
189 451 : CreateDataPropertyForOptions(isolate, options,
190 : isolate->factory()->ignorePunctuation_string(),
191 451 : ignore_punctuation);
192 : CreateDataPropertyForOptions(
193 451 : isolate, options, isolate->factory()->collation_string(), collation);
194 451 : CreateDataPropertyForOptions(isolate, options,
195 451 : isolate->factory()->numeric_string(), numeric);
196 : CreateDataPropertyForOptions(
197 451 : isolate, options, isolate->factory()->caseFirst_string(), case_first);
198 902 : return options;
199 : }
200 :
201 : namespace {
202 :
203 5 : Intl::CaseFirst ToCaseFirst(const char* str) {
204 5 : if (strcmp(str, "upper") == 0) return Intl::CaseFirst::kUpper;
205 5 : if (strcmp(str, "lower") == 0) return Intl::CaseFirst::kLower;
206 5 : if (strcmp(str, "false") == 0) return Intl::CaseFirst::kFalse;
207 0 : return Intl::CaseFirst::kUndefined;
208 : }
209 :
210 : UColAttributeValue ToUColAttributeValue(Intl::CaseFirst case_first) {
211 221 : switch (case_first) {
212 : case Intl::CaseFirst::kUpper:
213 : return UCOL_UPPER_FIRST;
214 : case Intl::CaseFirst::kLower:
215 : return UCOL_LOWER_FIRST;
216 : case Intl::CaseFirst::kFalse:
217 : case Intl::CaseFirst::kUndefined:
218 : return UCOL_OFF;
219 : }
220 : }
221 :
222 5 : void SetNumericOption(icu::Collator* icu_collator, bool numeric) {
223 5 : CHECK_NOT_NULL(icu_collator);
224 5 : UErrorCode status = U_ZERO_ERROR;
225 5 : icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
226 10 : numeric ? UCOL_ON : UCOL_OFF, status);
227 5 : CHECK(U_SUCCESS(status));
228 5 : }
229 :
230 221 : void SetCaseFirstOption(icu::Collator* icu_collator,
231 : Intl::CaseFirst case_first) {
232 221 : CHECK_NOT_NULL(icu_collator);
233 221 : UErrorCode status = U_ZERO_ERROR;
234 : icu_collator->setAttribute(UCOL_CASE_FIRST, ToUColAttributeValue(case_first),
235 442 : status);
236 221 : CHECK(U_SUCCESS(status));
237 221 : }
238 :
239 : } // anonymous namespace
240 :
241 : // static
242 6107 : MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate,
243 : Handle<JSCollator> collator,
244 : Handle<Object> locales,
245 : Handle<Object> options_obj) {
246 : // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
247 : Maybe<std::vector<std::string>> maybe_requested_locales =
248 6107 : Intl::CanonicalizeLocaleList(isolate, locales);
249 6107 : MAYBE_RETURN(maybe_requested_locales, Handle<JSCollator>());
250 : std::vector<std::string> requested_locales =
251 6107 : maybe_requested_locales.FromJust();
252 :
253 : // 2. If options is undefined, then
254 6107 : if (options_obj->IsUndefined(isolate)) {
255 : // 2. a. Let options be ObjectCreate(null).
256 5729 : options_obj = isolate->factory()->NewJSObjectWithNullProto();
257 : } else {
258 : // 3. Else
259 : // 3. a. Let options be ? ToObject(options).
260 756 : ASSIGN_RETURN_ON_EXCEPTION(
261 : isolate, options_obj,
262 : Object::ToObject(isolate, options_obj, "Intl.Collator"), JSCollator);
263 : }
264 :
265 : // At this point, options_obj can either be a JSObject or a JSProxy only.
266 6107 : Handle<JSReceiver> options = Handle<JSReceiver>::cast(options_obj);
267 :
268 : // 4. Let usage be ? GetOption(options, "usage", "string", « "sort",
269 : // "search" », "sort").
270 : Maybe<Usage> maybe_usage = Intl::GetStringOption<Usage>(
271 : isolate, options, "usage", "Intl.Collator", {"sort", "search"},
272 18321 : {Usage::SORT, Usage::SEARCH}, Usage::SORT);
273 6107 : MAYBE_RETURN(maybe_usage, MaybeHandle<JSCollator>());
274 : Usage usage = maybe_usage.FromJust();
275 :
276 : // 9. Let matcher be ? GetOption(options, "localeMatcher", "string",
277 : // « "lookup", "best fit" », "best fit").
278 : // 10. Set opt.[[localeMatcher]] to matcher.
279 : Maybe<Intl::MatcherOption> maybe_locale_matcher =
280 6107 : Intl::GetLocaleMatcher(isolate, options, "Intl.Collator");
281 6107 : MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSCollator>());
282 : Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
283 :
284 : // 11. Let numeric be ? GetOption(options, "numeric", "boolean",
285 : // undefined, undefined).
286 : // 12. If numeric is not undefined, then
287 : // a. Let numeric be ! ToString(numeric).
288 : //
289 : // Note: We omit the ToString(numeric) operation as it's not
290 : // observable. Intl::GetBoolOption returns a Boolean and
291 : // ToString(Boolean) is not side-effecting.
292 : //
293 : // 13. Set opt.[[kn]] to numeric.
294 : bool numeric;
295 : Maybe<bool> found_numeric = Intl::GetBoolOption(isolate, options, "numeric",
296 6107 : "Intl.Collator", &numeric);
297 6107 : MAYBE_RETURN(found_numeric, MaybeHandle<JSCollator>());
298 :
299 : // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string",
300 : // « "upper", "lower", "false" », undefined).
301 : Maybe<Intl::CaseFirst> maybe_case_first =
302 6107 : Intl::GetCaseFirst(isolate, options, "Intl.Collator");
303 6107 : MAYBE_RETURN(maybe_case_first, MaybeHandle<JSCollator>());
304 : Intl::CaseFirst case_first = maybe_case_first.FromJust();
305 :
306 : // The relevant unicode extensions accepted by Collator as specified here:
307 : // https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots
308 : //
309 : // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
310 12214 : std::set<std::string> relevant_extension_keys{"co", "kn", "kf"};
311 :
312 : // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]],
313 : // requestedLocales, opt, %Collator%.[[RelevantExtensionKeys]],
314 : // localeData).
315 : Intl::ResolvedLocale r =
316 : Intl::ResolveLocale(isolate, JSCollator::GetAvailableLocales(),
317 12214 : requested_locales, matcher, relevant_extension_keys);
318 :
319 : // 18. Set collator.[[Locale]] to r.[[locale]].
320 12214 : icu::Locale icu_locale = r.icu_locale;
321 : DCHECK(!icu_locale.isBogus());
322 :
323 : // 19. Let collation be r.[[co]].
324 :
325 : // 5. Set collator.[[Usage]] to usage.
326 : //
327 : // 6. If usage is "sort", then
328 : // a. Let localeData be %Collator%.[[SortLocaleData]].
329 : // 7. Else,
330 : // a. Let localeData be %Collator%.[[SearchLocaleData]].
331 : //
332 : // The Intl spec doesn't allow us to use "search" as an extension
333 : // value for collation as per:
334 : // https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots
335 : //
336 : // But the only way to pass the value "search" for collation from
337 : // the options object to ICU is to use the 'co' extension keyword.
338 : //
339 : // This will need to be filtered out when creating the
340 : // resolvedOptions object.
341 6107 : if (usage == Usage::SEARCH) {
342 81 : UErrorCode status = U_ZERO_ERROR;
343 81 : icu_locale.setUnicodeKeywordValue("co", "search", status);
344 81 : CHECK(U_SUCCESS(status));
345 : }
346 :
347 : // 20. If collation is null, let collation be "default".
348 : // 21. Set collator.[[Collation]] to collation.
349 : //
350 : // We don't store the collation value as per the above two steps
351 : // here. The collation value can be looked up from icu::Collator on
352 : // demand, as part of Intl.Collator.prototype.resolvedOptions.
353 :
354 6107 : UErrorCode status = U_ZERO_ERROR;
355 : std::unique_ptr<icu::Collator> icu_collator(
356 6107 : icu::Collator::createInstance(icu_locale, status));
357 6107 : if (U_FAILURE(status) || icu_collator.get() == nullptr) {
358 0 : status = U_ZERO_ERROR;
359 : // Remove extensions and try again.
360 0 : icu::Locale no_extension_locale(icu_locale.getBaseName());
361 0 : icu_collator.reset(
362 : icu::Collator::createInstance(no_extension_locale, status));
363 :
364 0 : if (U_FAILURE(status) || icu_collator.get() == nullptr) {
365 0 : FATAL("Failed to create ICU collator, are ICU data files missing?");
366 : }
367 : }
368 : DCHECK(U_SUCCESS(status));
369 6107 : CHECK_NOT_NULL(icu_collator.get());
370 :
371 : // 22. If relevantExtensionKeys contains "kn", then
372 : // a. Set collator.[[Numeric]] to ! SameValue(r.[[kn]], "true").
373 : //
374 : // If the numeric value is passed in through the options object,
375 : // then we use it. Otherwise, we check if the numeric value is
376 : // passed in through the unicode extensions.
377 6107 : status = U_ZERO_ERROR;
378 6107 : if (found_numeric.FromJust()) {
379 0 : SetNumericOption(icu_collator.get(), numeric);
380 : } else {
381 12214 : auto kn_extension_it = r.extensions.find("kn");
382 6107 : if (kn_extension_it != r.extensions.end()) {
383 10 : SetNumericOption(icu_collator.get(), (kn_extension_it->second == "true"));
384 : }
385 : }
386 :
387 : // 23. If relevantExtensionKeys contains "kf", then
388 : // a. Set collator.[[CaseFirst]] to r.[[kf]].
389 : //
390 : // If the caseFirst value is passed in through the options object,
391 : // then we use it. Otherwise, we check if the caseFirst value is
392 : // passed in through the unicode extensions.
393 6107 : if (case_first != Intl::CaseFirst::kUndefined) {
394 216 : SetCaseFirstOption(icu_collator.get(), case_first);
395 : } else {
396 11782 : auto kf_extension_it = r.extensions.find("kf");
397 5891 : if (kf_extension_it != r.extensions.end()) {
398 5 : SetCaseFirstOption(icu_collator.get(),
399 5 : ToCaseFirst(kf_extension_it->second.c_str()));
400 : }
401 : }
402 :
403 : // Normalization is always on, by the spec. We are free to optimize
404 : // if the strings are already normalized (but we don't have a way to tell
405 : // that right now).
406 6107 : status = U_ZERO_ERROR;
407 6107 : icu_collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
408 6107 : CHECK(U_SUCCESS(status));
409 :
410 : // 24. Let sensitivity be ? GetOption(options, "sensitivity",
411 : // "string", « "base", "accent", "case", "variant" », undefined).
412 : Maybe<Sensitivity> maybe_sensitivity = Intl::GetStringOption<Sensitivity>(
413 : isolate, options, "sensitivity", "Intl.Collator",
414 : {"base", "accent", "case", "variant"},
415 : {Sensitivity::kBase, Sensitivity::kAccent, Sensitivity::kCase,
416 : Sensitivity::kVariant},
417 18321 : Sensitivity::kUndefined);
418 6107 : MAYBE_RETURN(maybe_sensitivity, MaybeHandle<JSCollator>());
419 : Sensitivity sensitivity = maybe_sensitivity.FromJust();
420 :
421 : // 25. If sensitivity is undefined, then
422 6107 : if (sensitivity == Sensitivity::kUndefined) {
423 : // 25. a. If usage is "sort", then
424 6107 : if (usage == Usage::SORT) {
425 : // 25. a. i. Let sensitivity be "variant".
426 : sensitivity = Sensitivity::kVariant;
427 : }
428 : }
429 : // 26. Set collator.[[Sensitivity]] to sensitivity.
430 6107 : switch (sensitivity) {
431 : case Sensitivity::kBase:
432 0 : icu_collator->setStrength(icu::Collator::PRIMARY);
433 0 : break;
434 : case Sensitivity::kAccent:
435 0 : icu_collator->setStrength(icu::Collator::SECONDARY);
436 0 : break;
437 : case Sensitivity::kCase:
438 0 : icu_collator->setStrength(icu::Collator::PRIMARY);
439 0 : status = U_ZERO_ERROR;
440 0 : icu_collator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
441 0 : CHECK(U_SUCCESS(status));
442 : break;
443 : case Sensitivity::kVariant:
444 6026 : icu_collator->setStrength(icu::Collator::TERTIARY);
445 6026 : break;
446 : case Sensitivity::kUndefined:
447 : break;
448 : }
449 :
450 : // 27.Let ignorePunctuation be ? GetOption(options,
451 : // "ignorePunctuation", "boolean", undefined, false).
452 : bool ignore_punctuation;
453 : Maybe<bool> found_ignore_punctuation =
454 : Intl::GetBoolOption(isolate, options, "ignorePunctuation",
455 6107 : "Intl.Collator", &ignore_punctuation);
456 6107 : MAYBE_RETURN(found_ignore_punctuation, MaybeHandle<JSCollator>());
457 :
458 : // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
459 6107 : if (found_ignore_punctuation.FromJust() && ignore_punctuation) {
460 0 : status = U_ZERO_ERROR;
461 0 : icu_collator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
462 0 : CHECK(U_SUCCESS(status));
463 : }
464 :
465 : Handle<Managed<icu::Collator>> managed_collator =
466 : Managed<icu::Collator>::FromUniquePtr(isolate, 0,
467 12214 : std::move(icu_collator));
468 6107 : collator->set_icu_collator(*managed_collator);
469 :
470 : // 29. Return collator.
471 6107 : return collator;
472 : }
473 :
474 176 : const std::set<std::string>& JSCollator::GetAvailableLocales() {
475 : static base::LazyInstance<Intl::AvailableLocales<icu::Collator>>::type
476 : available_locales = LAZY_INSTANCE_INITIALIZER;
477 176 : return available_locales.Pointer()->Get();
478 : }
479 :
480 : } // namespace internal
481 121996 : } // namespace v8
|