Line data Source code
1 : // Copyright 2018 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_INTL_SUPPORT
6 : #error Internationalization is expected to be enabled.
7 : #endif // V8_INTL_SUPPORT
8 :
9 : #include "src/objects/js-locale.h"
10 :
11 : #include <map>
12 : #include <memory>
13 : #include <string>
14 : #include <vector>
15 :
16 : #include "src/api.h"
17 : #include "src/global-handles.h"
18 : #include "src/heap/factory.h"
19 : #include "src/isolate.h"
20 : #include "src/objects-inl.h"
21 : #include "src/objects/intl-objects.h"
22 : #include "src/objects/js-locale-inl.h"
23 : #include "unicode/char16ptr.h"
24 : #include "unicode/locid.h"
25 : #include "unicode/uloc.h"
26 : #include "unicode/unistr.h"
27 :
28 : namespace v8 {
29 : namespace internal {
30 :
31 : namespace {
32 :
33 : // Helper function to check a locale is valid. It will return false if
34 : // the length of the extension fields are incorrect. For example, en-u-a or
35 : // en-u-co-b will return false.
36 105624 : bool IsValidLocale(const icu::Locale& locale) {
37 : // icu::Locale::toLanguageTag won't return U_STRING_NOT_TERMINATED_WARNING for
38 : // incorrect locale yet. So we still need the following uloc_toLanguageTag
39 : // TODO(ftang): Change to use icu::Locale::toLanguageTag once it indicate
40 : // error.
41 : char result[ULOC_FULLNAME_CAPACITY];
42 52812 : UErrorCode status = U_ZERO_ERROR;
43 : uloc_toLanguageTag(locale.getName(), result, ULOC_FULLNAME_CAPACITY, true,
44 52812 : &status);
45 52812 : return U_SUCCESS(status) && status != U_STRING_NOT_TERMINATED_WARNING;
46 : }
47 :
48 : struct OptionData {
49 : const char* name;
50 : const char* key;
51 : const std::vector<const char*>* possible_values;
52 : bool is_bool_value;
53 : };
54 :
55 : // Inserts tags from options into locale string.
56 52875 : Maybe<bool> InsertOptionsIntoLocale(Isolate* isolate,
57 : Handle<JSReceiver> options,
58 52875 : icu::Locale* icu_locale) {
59 52875 : CHECK(isolate);
60 52875 : CHECK(!icu_locale->isBogus());
61 :
62 : const std::vector<const char*> hour_cycle_values = {"h11", "h12", "h23",
63 : "h24"};
64 : const std::vector<const char*> case_first_values = {"upper", "lower",
65 : "false"};
66 : const std::vector<const char*> empty_values = {};
67 : const std::array<OptionData, 6> kOptionToUnicodeTagMap = {
68 : {{"calendar", "ca", &empty_values, false},
69 : {"collation", "co", &empty_values, false},
70 : {"hourCycle", "hc", &hour_cycle_values, false},
71 : {"caseFirst", "kf", &case_first_values, false},
72 : {"numeric", "kn", &empty_values, true},
73 52875 : {"numberingSystem", "nu", &empty_values, false}}};
74 :
75 : // TODO(cira): Pass in values as per the spec to make this to be
76 : // spec compliant.
77 :
78 52875 : UErrorCode status = U_ZERO_ERROR;
79 369927 : for (const auto& option_to_bcp47 : kOptionToUnicodeTagMap) {
80 : std::unique_ptr<char[]> value_str = nullptr;
81 317115 : bool value_bool = false;
82 : Maybe<bool> maybe_found =
83 : option_to_bcp47.is_bool_value
84 : ? Intl::GetBoolOption(isolate, options, option_to_bcp47.name,
85 52839 : "locale", &value_bool)
86 : : Intl::GetStringOption(isolate, options, option_to_bcp47.name,
87 : *(option_to_bcp47.possible_values),
88 634230 : "locale", &value_str);
89 317115 : MAYBE_RETURN(maybe_found, Nothing<bool>());
90 :
91 : // TODO(cira): Use fallback value if value is not found to make
92 : // this spec compliant.
93 317061 : if (!maybe_found.FromJust()) continue;
94 :
95 162 : if (option_to_bcp47.is_bool_value) {
96 81 : value_str = value_bool ? isolate->factory()->true_string()->ToCString()
97 : : isolate->factory()->false_string()->ToCString();
98 : }
99 : DCHECK_NOT_NULL(value_str.get());
100 :
101 : // Convert bcp47 key and value into legacy ICU format so we can use
102 : // uloc_setKeywordValue.
103 162 : const char* key = uloc_toLegacyKey(option_to_bcp47.key);
104 : DCHECK_NOT_NULL(key);
105 :
106 : // Overwrite existing, or insert new key-value to the locale string.
107 162 : const char* value = uloc_toLegacyType(key, value_str.get());
108 162 : if (value) {
109 162 : icu_locale->setKeywordValue(key, value, status);
110 162 : if (U_FAILURE(status)) {
111 : return Just(false);
112 : }
113 : } else {
114 : return Just(false);
115 : }
116 : }
117 :
118 : // Check all the unicode extension fields are in the right length.
119 52812 : if (!IsValidLocale(*icu_locale)) {
120 0 : THROW_NEW_ERROR_RETURN_VALUE(
121 : isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
122 : Nothing<bool>());
123 : }
124 :
125 : return Just(true);
126 : }
127 :
128 90 : Handle<Object> UnicodeKeywordValue(Isolate* isolate, Handle<JSLocale> locale,
129 : const char* key) {
130 180 : icu::Locale* icu_locale = locale->icu_locale()->raw();
131 90 : UErrorCode status = U_ZERO_ERROR;
132 : std::string value =
133 90 : icu_locale->getUnicodeKeywordValue<std::string>(key, status);
134 135 : if (status == U_ILLEGAL_ARGUMENT_ERROR || value == "") {
135 45 : return isolate->factory()->undefined_value();
136 : }
137 45 : return isolate->factory()->NewStringFromAsciiChecked(value.c_str());
138 : }
139 :
140 : bool InRange(size_t value, size_t start, size_t end) {
141 0 : return (start <= value) && (value <= end);
142 : }
143 : bool InRange(char value, char start, char end) {
144 0 : return (start <= value) && (value <= end);
145 : }
146 :
147 0 : bool IsCheckRange(const std::string& str, size_t min, size_t max,
148 : bool(range_check_func)(char)) {
149 0 : if (!InRange(str.length(), min, max)) return false;
150 0 : for (size_t i = 0; i < str.length(); i++) {
151 0 : if (!range_check_func(str[i])) return false;
152 : }
153 : return true;
154 : }
155 : bool IsAlpha(const std::string& str, size_t min, size_t max) {
156 0 : return IsCheckRange(str, min, max, [](char c) -> bool {
157 0 : return InRange(c, 'a', 'z') || InRange(c, 'A', 'Z');
158 0 : });
159 : }
160 :
161 : bool IsDigit(const std::string& str, size_t min, size_t max) {
162 : return IsCheckRange(str, min, max,
163 0 : [](char c) -> bool { return InRange(c, '0', '9'); });
164 : }
165 :
166 : bool ValidateLanguageProduction(const std::string& value) {
167 : // language = 2*3ALPHA ; shortest ISO 639 code
168 : // ["-" extlang] ; sometimes followed by
169 : // ; extended language subtags
170 : // / 4ALPHA ; or reserved for future use
171 : // / 5*8ALPHA ; or registered language subtag
172 : //
173 : // extlang = 3ALPHA ; selected ISO 639 codes
174 : // *2("-" 3ALPHA) ; permanently reserved
175 : // TODO(ftang) not handling the [extlang] yet
176 : return IsAlpha(value, 2, 8);
177 : }
178 :
179 : bool ValidateScriptProduction(const std::string& value) {
180 : // script = 4ALPHA ; ISO 15924 code
181 : return IsAlpha(value, 4, 4);
182 : }
183 :
184 0 : bool ValidateRegionProduction(const std::string& value) {
185 : // region = 2ALPHA ; ISO 3166-1 code
186 : // / 3DIGIT ; UN M.49 code
187 0 : return IsAlpha(value, 2, 2) || IsDigit(value, 3, 3);
188 : }
189 :
190 52920 : Maybe<icu::Locale> ApplyOptionsToTag(Isolate* isolate, Handle<String> tag,
191 : Handle<JSReceiver> options) {
192 : v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
193 52920 : if (tag->length() == 0) {
194 18 : THROW_NEW_ERROR_RETURN_VALUE(
195 : isolate, NewRangeError(MessageTemplate::kLocaleNotEmpty),
196 : Nothing<icu::Locale>());
197 : }
198 :
199 52911 : v8::String::Utf8Value bcp47_tag(v8_isolate, v8::Utils::ToLocal(tag));
200 52911 : CHECK_LT(0, bcp47_tag.length());
201 52911 : CHECK_NOT_NULL(*bcp47_tag);
202 : // 2. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError
203 : // exception.
204 52911 : UErrorCode status = U_ZERO_ERROR;
205 : icu::Locale icu_locale =
206 105822 : icu::Locale::forLanguageTag({*bcp47_tag, bcp47_tag.length()}, status);
207 52911 : if (U_FAILURE(status)) {
208 18 : THROW_NEW_ERROR_RETURN_VALUE(
209 : isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
210 : Nothing<icu::Locale>());
211 : }
212 :
213 : // 3. Let language be ? GetOption(options, "language", "string", undefined,
214 : // undefined).
215 : const std::vector<const char*> empty_values = {};
216 : std::unique_ptr<char[]> language_str = nullptr;
217 : Maybe<bool> maybe_language =
218 : Intl::GetStringOption(isolate, options, "language", empty_values,
219 105804 : "ApplyOptionsToTag", &language_str);
220 52902 : MAYBE_RETURN(maybe_language, Nothing<icu::Locale>());
221 : // 4. If language is not undefined, then
222 52893 : if (maybe_language.FromJust()) {
223 : // a. If language does not match the language production, throw a RangeError
224 : // exception.
225 : // b. If language matches the grandfathered production, throw a RangeError
226 : // exception.
227 : // Currently ValidateLanguageProduction only take 2*3ALPHA / 4ALPHA /
228 : // 5*8ALPHA and won't take 2*3ALPHA "-" extlang so none of the grandfathered
229 : // will be matched.
230 0 : if (!ValidateLanguageProduction(language_str.get())) {
231 0 : THROW_NEW_ERROR_RETURN_VALUE(
232 : isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
233 : Nothing<icu::Locale>());
234 : }
235 : }
236 : // 5. Let script be ? GetOption(options, "script", "string", undefined,
237 : // undefined).
238 : std::unique_ptr<char[]> script_str = nullptr;
239 : Maybe<bool> maybe_script =
240 : Intl::GetStringOption(isolate, options, "script", empty_values,
241 105786 : "ApplyOptionsToTag", &script_str);
242 52893 : MAYBE_RETURN(maybe_script, Nothing<icu::Locale>());
243 : // 6. If script is not undefined, then
244 52884 : if (maybe_script.FromJust()) {
245 : // a. If script does not match the script production, throw a RangeError
246 : // exception.
247 0 : if (!ValidateScriptProduction(script_str.get())) {
248 0 : THROW_NEW_ERROR_RETURN_VALUE(
249 : isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
250 : Nothing<icu::Locale>());
251 : }
252 : }
253 : // 7. Let region be ? GetOption(options, "region", "string", undefined,
254 : // undefined).
255 : std::unique_ptr<char[]> region_str = nullptr;
256 : Maybe<bool> maybe_region =
257 : Intl::GetStringOption(isolate, options, "region", empty_values,
258 105768 : "ApplyOptionsToTag", ®ion_str);
259 52884 : MAYBE_RETURN(maybe_region, Nothing<icu::Locale>());
260 : // 8. If region is not undefined, then
261 52875 : if (maybe_region.FromJust()) {
262 : // a. If region does not match the region production, throw a RangeError
263 : // exception.
264 0 : if (!ValidateRegionProduction(region_str.get())) {
265 0 : THROW_NEW_ERROR_RETURN_VALUE(
266 : isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
267 : Nothing<icu::Locale>());
268 : }
269 : }
270 : // 9. Set tag to CanonicalizeLanguageTag(tag).
271 :
272 : // 10. If language is not undefined,
273 : std::string locale_str;
274 52875 : if (maybe_language.FromJust()) {
275 : // a. Assert: tag matches the langtag production.
276 : // b. Set tag to tag with the substring corresponding to the language
277 : // production replaced by the string language.
278 : locale_str = language_str.get();
279 : } else {
280 : locale_str = icu_locale.getLanguage();
281 : }
282 : // 11. If script is not undefined, then
283 : const char* script_ptr = nullptr;
284 52875 : if (maybe_script.FromJust()) {
285 : // a. If tag does not contain a script production, then
286 : // i. Set tag to the concatenation of the language production of tag, "-",
287 : // script, and the rest of tag.
288 : // i. Set tag to tag with the substring corresponding to the script
289 : // production replaced by the string script.
290 : script_ptr = script_str.get();
291 : } else {
292 : script_ptr = icu_locale.getScript();
293 : }
294 52875 : if (script_ptr != nullptr && strlen(script_ptr) > 0) {
295 24300 : locale_str.append("-");
296 24300 : locale_str.append(script_ptr);
297 : }
298 : // 12. If region is not undefined, then
299 : const char* region_ptr = nullptr;
300 52875 : if (maybe_region.FromJust()) {
301 : // a. If tag does not contain a region production, then
302 : //
303 : // i. Set tag to the concatenation of the language production of tag, the
304 : // substring corresponding to the "-" script production if present, "-",
305 : // region, and the rest of tag.
306 : //
307 : // b. Else,
308 : //
309 : // i. Set tag to tag with the substring corresponding to the region
310 : // production replaced by the string region.
311 : region_ptr = region_str.get();
312 : } else {
313 : region_ptr = icu_locale.getCountry();
314 : }
315 :
316 52875 : std::string without_options(icu_locale.getName());
317 :
318 : // replace with values from options
319 158625 : icu_locale =
320 52875 : icu::Locale(locale_str.c_str(), region_ptr, icu_locale.getVariant());
321 52875 : locale_str = icu_locale.getName();
322 :
323 : // Append extensions from tag
324 52875 : size_t others = without_options.find("@");
325 52875 : if (others != std::string::npos) {
326 1080 : locale_str += without_options.substr(others);
327 : }
328 :
329 : // 13. Return CanonicalizeLanguageTag(tag).
330 52875 : icu_locale = icu::Locale::createCanonical(locale_str.c_str());
331 52911 : return Just(icu_locale);
332 : }
333 :
334 : } // namespace
335 :
336 52920 : MaybeHandle<JSLocale> JSLocale::Initialize(Isolate* isolate,
337 : Handle<JSLocale> locale,
338 : Handle<String> locale_str,
339 : Handle<JSReceiver> options) {
340 : Maybe<icu::Locale> maybe_locale =
341 52920 : ApplyOptionsToTag(isolate, locale_str, options);
342 52920 : MAYBE_RETURN(maybe_locale, MaybeHandle<JSLocale>());
343 52875 : icu::Locale icu_locale = maybe_locale.FromJust();
344 :
345 52875 : Maybe<bool> error = InsertOptionsIntoLocale(isolate, options, &icu_locale);
346 52875 : MAYBE_RETURN(error, MaybeHandle<JSLocale>());
347 52821 : if (!error.FromJust()) {
348 9 : THROW_NEW_ERROR(isolate,
349 : NewRangeError(MessageTemplate::kLocaleBadParameters),
350 : JSLocale);
351 : }
352 :
353 : // 31. Set locale.[[Locale]] to r.[[locale]].
354 : Handle<Managed<icu::Locale>> managed_locale =
355 52812 : Managed<icu::Locale>::FromRawPtr(isolate, 0, icu_locale.clone());
356 52812 : locale->set_icu_locale(*managed_locale);
357 :
358 52812 : return locale;
359 : }
360 :
361 : namespace {
362 45837 : Handle<String> MorphLocale(Isolate* isolate, String locale,
363 : void (*morph_func)(icu::Locale*, UErrorCode*)) {
364 45837 : UErrorCode status = U_ZERO_ERROR;
365 : icu::Locale icu_locale =
366 137511 : icu::Locale::forLanguageTag(locale.ToCString().get(), status);
367 91674 : CHECK(U_SUCCESS(status));
368 45837 : CHECK(!icu_locale.isBogus());
369 45837 : (*morph_func)(&icu_locale, &status);
370 91674 : CHECK(U_SUCCESS(status));
371 45837 : CHECK(!icu_locale.isBogus());
372 91674 : std::string locale_str = Intl::ToLanguageTag(icu_locale).FromJust();
373 91674 : return isolate->factory()->NewStringFromAsciiChecked(locale_str.c_str());
374 : }
375 :
376 : } // namespace
377 :
378 22905 : Handle<String> JSLocale::Maximize(Isolate* isolate, String locale) {
379 : return MorphLocale(isolate, locale,
380 22905 : [](icu::Locale* icu_locale, UErrorCode* status) {
381 22905 : icu_locale->addLikelySubtags(*status);
382 45810 : });
383 : }
384 :
385 22932 : Handle<String> JSLocale::Minimize(Isolate* isolate, String locale) {
386 : return MorphLocale(isolate, locale,
387 22932 : [](icu::Locale* icu_locale, UErrorCode* status) {
388 22932 : icu_locale->minimizeSubtags(*status);
389 45864 : });
390 : }
391 :
392 18 : Handle<Object> JSLocale::Language(Isolate* isolate, Handle<JSLocale> locale) {
393 : Factory* factory = isolate->factory();
394 36 : const char* language = locale->icu_locale()->raw()->getLanguage();
395 18 : if (strlen(language) == 0) return factory->undefined_value();
396 18 : return factory->NewStringFromAsciiChecked(language);
397 : }
398 :
399 27 : Handle<Object> JSLocale::Script(Isolate* isolate, Handle<JSLocale> locale) {
400 : Factory* factory = isolate->factory();
401 54 : const char* script = locale->icu_locale()->raw()->getScript();
402 45 : if (strlen(script) == 0) return factory->undefined_value();
403 9 : return factory->NewStringFromAsciiChecked(script);
404 : }
405 :
406 18 : Handle<Object> JSLocale::Region(Isolate* isolate, Handle<JSLocale> locale) {
407 : Factory* factory = isolate->factory();
408 36 : const char* region = locale->icu_locale()->raw()->getCountry();
409 27 : if (strlen(region) == 0) return factory->undefined_value();
410 9 : return factory->NewStringFromAsciiChecked(region);
411 : }
412 :
413 18 : Handle<String> JSLocale::BaseName(Isolate* isolate, Handle<JSLocale> locale) {
414 : icu::Locale icu_locale =
415 36 : icu::Locale::createFromName(locale->icu_locale()->raw()->getBaseName());
416 36 : std::string base_name = Intl::ToLanguageTag(icu_locale).FromJust();
417 36 : return isolate->factory()->NewStringFromAsciiChecked(base_name.c_str());
418 : }
419 :
420 18 : Handle<Object> JSLocale::Calendar(Isolate* isolate, Handle<JSLocale> locale) {
421 18 : return UnicodeKeywordValue(isolate, locale, "ca");
422 : }
423 :
424 18 : Handle<Object> JSLocale::CaseFirst(Isolate* isolate, Handle<JSLocale> locale) {
425 18 : return UnicodeKeywordValue(isolate, locale, "kf");
426 : }
427 :
428 18 : Handle<Object> JSLocale::Collation(Isolate* isolate, Handle<JSLocale> locale) {
429 18 : return UnicodeKeywordValue(isolate, locale, "co");
430 : }
431 :
432 18 : Handle<Object> JSLocale::HourCycle(Isolate* isolate, Handle<JSLocale> locale) {
433 18 : return UnicodeKeywordValue(isolate, locale, "hc");
434 : }
435 :
436 18 : Handle<Object> JSLocale::Numeric(Isolate* isolate, Handle<JSLocale> locale) {
437 : Factory* factory = isolate->factory();
438 36 : icu::Locale* icu_locale = locale->icu_locale()->raw();
439 18 : UErrorCode status = U_ZERO_ERROR;
440 : std::string numeric =
441 18 : icu_locale->getUnicodeKeywordValue<std::string>("kn", status);
442 54 : return (numeric == "true") ? factory->true_value() : factory->false_value();
443 : }
444 :
445 18 : Handle<Object> JSLocale::NumberingSystem(Isolate* isolate,
446 : Handle<JSLocale> locale) {
447 18 : return UnicodeKeywordValue(isolate, locale, "nu");
448 : }
449 :
450 119916 : std::string JSLocale::ToString(Handle<JSLocale> locale) {
451 239832 : icu::Locale* icu_locale = locale->icu_locale()->raw();
452 239832 : return Intl::ToLanguageTag(*icu_locale).FromJust();
453 : }
454 :
455 119889 : Handle<String> JSLocale::ToString(Isolate* isolate, Handle<JSLocale> locale) {
456 119889 : std::string locale_str = JSLocale::ToString(locale);
457 239778 : return isolate->factory()->NewStringFromAsciiChecked(locale_str.c_str());
458 : }
459 :
460 : } // namespace internal
461 183867 : } // namespace v8
|