Line data Source code
1 : // Copyright 2013 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_INTL_SUPPORT
6 : #error Internationalization is expected to be enabled.
7 : #endif // V8_INTL_SUPPORT
8 :
9 : #include "src/objects/intl-objects.h"
10 :
11 : #include <algorithm>
12 : #include <memory>
13 : #include <string>
14 : #include <vector>
15 :
16 : #include "src/api-inl.h"
17 : #include "src/global-handles.h"
18 : #include "src/heap/factory.h"
19 : #include "src/isolate.h"
20 : #include "src/objects-inl.h"
21 : #include "src/objects/js-collator-inl.h"
22 : #include "src/objects/js-date-time-format-inl.h"
23 : #include "src/objects/js-locale-inl.h"
24 : #include "src/objects/js-number-format-inl.h"
25 : #include "src/objects/string.h"
26 : #include "src/property-descriptor.h"
27 : #include "src/string-case.h"
28 : #include "unicode/basictz.h"
29 : #include "unicode/brkiter.h"
30 : #include "unicode/calendar.h"
31 : #include "unicode/coll.h"
32 : #include "unicode/decimfmt.h"
33 : #include "unicode/locid.h"
34 : #include "unicode/normalizer2.h"
35 : #include "unicode/numfmt.h"
36 : #include "unicode/numsys.h"
37 : #include "unicode/timezone.h"
38 : #include "unicode/ustring.h"
39 : #include "unicode/uvernum.h" // U_ICU_VERSION_MAJOR_NUM
40 :
41 : #define XSTR(s) STR(s)
42 : #define STR(s) #s
43 : static_assert(
44 : V8_MINIMUM_ICU_VERSION <= U_ICU_VERSION_MAJOR_NUM,
45 : "v8 is required to build with ICU " XSTR(V8_MINIMUM_ICU_VERSION) " and up");
46 : #undef STR
47 : #undef XSTR
48 :
49 : namespace v8 {
50 : namespace internal {
51 :
52 : namespace {
53 355 : inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
54 :
55 : const uint8_t kToLower[256] = {
56 : 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
57 : 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
58 : 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
59 : 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
60 : 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
61 : 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
62 : 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
63 : 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
64 : 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
65 : 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
66 : 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
67 : 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
68 : 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
69 : 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
70 : 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
71 : 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
72 : 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
73 : 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
74 : 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
75 : 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
76 : 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
77 : 0xFC, 0xFD, 0xFE, 0xFF,
78 : };
79 :
80 : inline uint16_t ToLatin1Lower(uint16_t ch) {
81 1710 : return static_cast<uint16_t>(kToLower[ch]);
82 : }
83 :
84 : inline uint16_t ToASCIIUpper(uint16_t ch) {
85 0 : return ch & ~((ch >= 'a' && ch <= 'z') << 5);
86 : }
87 :
88 : // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
89 : inline uint16_t ToLatin1Upper(uint16_t ch) {
90 : DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
91 : return ch &
92 1332 : ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xF7))
93 1332 : << 5);
94 : }
95 :
96 : template <typename Char>
97 0 : bool ToUpperFastASCII(const Vector<const Char>& src,
98 : Handle<SeqOneByteString> result) {
99 : // Do a faster loop for the case where all the characters are ASCII.
100 : uint16_t ored = 0;
101 : int32_t index = 0;
102 0 : for (auto it = src.begin(); it != src.end(); ++it) {
103 0 : uint16_t ch = static_cast<uint16_t>(*it);
104 0 : ored |= ch;
105 0 : result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
106 : }
107 0 : return !(ored & ~0x7F);
108 : }
109 :
110 : const uint16_t sharp_s = 0xDF;
111 :
112 : template <typename Char>
113 414 : bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest,
114 : int* sharp_s_count) {
115 : // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
116 :
117 : // There are two special cases.
118 : // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
119 : // 2. Lower case sharp-S converts to "SS" (two characters)
120 207 : *sharp_s_count = 0;
121 2286 : for (auto it = src.begin(); it != src.end(); ++it) {
122 999 : uint16_t ch = static_cast<uint16_t>(*it);
123 999 : if (V8_UNLIKELY(ch == sharp_s)) {
124 99 : ++(*sharp_s_count);
125 99 : continue;
126 : }
127 900 : if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
128 : // Since this upper-cased character does not fit in an 8-bit string, we
129 : // need to take the 16-bit path.
130 : return false;
131 : }
132 1674 : *dest++ = ToLatin1Upper(ch);
133 : }
134 :
135 : return true;
136 : }
137 :
138 : template <typename Char>
139 81 : void ToUpperWithSharpS(const Vector<const Char>& src,
140 : Handle<SeqOneByteString> result) {
141 : int32_t dest_index = 0;
142 1350 : for (auto it = src.begin(); it != src.end(); ++it) {
143 594 : uint16_t ch = static_cast<uint16_t>(*it);
144 594 : if (ch == sharp_s) {
145 99 : result->SeqOneByteStringSet(dest_index++, 'S');
146 99 : result->SeqOneByteStringSet(dest_index++, 'S');
147 : } else {
148 495 : result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
149 : }
150 : }
151 81 : }
152 :
153 41 : inline int FindFirstUpperOrNonAscii(String s, int length) {
154 396 : for (int index = 0; index < length; ++index) {
155 : uint16_t ch = s->Get(index);
156 355 : if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
157 : return index;
158 : }
159 : }
160 : return length;
161 : }
162 :
163 662392 : const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
164 : std::unique_ptr<uc16[]>* dest,
165 : int32_t length) {
166 : DCHECK(flat.IsFlat());
167 331232 : if (flat.IsOneByte()) {
168 321269 : if (!*dest) {
169 321197 : dest->reset(NewArray<uc16>(length));
170 : CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
171 : }
172 321269 : return reinterpret_cast<const UChar*>(dest->get());
173 : } else {
174 : return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
175 : }
176 : }
177 :
178 : template <typename T>
179 2531 : MaybeHandle<T> New(Isolate* isolate, Handle<JSFunction> constructor,
180 : Handle<Object> locales, Handle<Object> options) {
181 : Handle<JSObject> result;
182 5062 : ASSIGN_RETURN_ON_EXCEPTION(
183 : isolate, result,
184 : JSObject::New(constructor, constructor, Handle<AllocationSite>::null()),
185 : T);
186 2531 : return T::Initialize(isolate, Handle<T>::cast(result), locales, options);
187 : }
188 : } // namespace
189 :
190 62938 : const uint8_t* Intl::ToLatin1LowerTable() { return &kToLower[0]; }
191 :
192 329027 : icu::UnicodeString Intl::ToICUUnicodeString(Isolate* isolate,
193 : Handle<String> string) {
194 329027 : string = String::Flatten(isolate, string);
195 : {
196 : DisallowHeapAllocation no_gc;
197 329027 : std::unique_ptr<uc16[]> sap;
198 : return icu::UnicodeString(
199 658054 : GetUCharBufferFromFlat(string->GetFlatContent(no_gc), &sap,
200 : string->length()),
201 987081 : string->length());
202 : }
203 : }
204 :
205 : namespace {
206 2025 : MaybeHandle<String> LocaleConvertCase(Isolate* isolate, Handle<String> s,
207 : bool is_to_upper, const char* lang) {
208 2025 : auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
209 : int32_t src_length = s->length();
210 : int32_t dest_length = src_length;
211 : UErrorCode status;
212 : Handle<SeqTwoByteString> result;
213 2025 : std::unique_ptr<uc16[]> sap;
214 :
215 2025 : if (dest_length == 0) return ReadOnlyRoots(isolate).empty_string_handle();
216 :
217 : // This is not a real loop. It'll be executed only once (no overflow) or
218 : // twice (overflow).
219 180 : for (int i = 0; i < 2; ++i) {
220 : // Case conversion can increase the string length (e.g. sharp-S => SS) so
221 : // that we have to handle RangeError exceptions here.
222 4410 : ASSIGN_RETURN_ON_EXCEPTION(
223 : isolate, result, isolate->factory()->NewRawTwoByteString(dest_length),
224 : String);
225 : DisallowHeapAllocation no_gc;
226 : DCHECK(s->IsFlat());
227 2205 : String::FlatContent flat = s->GetFlatContent(no_gc);
228 2205 : const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
229 2205 : status = U_ZERO_ERROR;
230 : dest_length =
231 : case_converter(reinterpret_cast<UChar*>(result->GetChars(no_gc)),
232 2205 : dest_length, src, src_length, lang, &status);
233 2205 : if (status != U_BUFFER_OVERFLOW_ERROR) break;
234 : }
235 :
236 : // In most cases, the output will fill the destination buffer completely
237 : // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
238 : // Only in rare cases, it'll be shorter than the destination buffer and
239 : // |result| has to be truncated.
240 : DCHECK(U_SUCCESS(status));
241 2025 : if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
242 : DCHECK(dest_length == result->length());
243 1890 : return result;
244 : }
245 : DCHECK(dest_length < result->length());
246 135 : return SeqString::Truncate(result, dest_length);
247 : }
248 :
249 : } // namespace
250 :
251 : // A stripped-down version of ConvertToLower that can only handle flat one-byte
252 : // strings and does not allocate. Note that {src} could still be, e.g., a
253 : // one-byte sliced string with a two-byte parent string.
254 : // Called from TF builtins.
255 2283 : String Intl::ConvertOneByteToLower(String src, String dst) {
256 : DCHECK_EQ(src->length(), dst->length());
257 : DCHECK(src->HasOnlyOneByteChars());
258 : DCHECK(src->IsFlat());
259 : DCHECK(dst->IsSeqOneByteString());
260 :
261 : DisallowHeapAllocation no_gc;
262 :
263 : const int length = src->length();
264 2283 : String::FlatContent src_flat = src->GetFlatContent(no_gc);
265 : uint8_t* dst_data = SeqOneByteString::cast(dst)->GetChars(no_gc);
266 :
267 2283 : if (src_flat.IsOneByte()) {
268 : const uint8_t* src_data = src_flat.ToOneByteVector().start();
269 :
270 2278 : bool has_changed_character = false;
271 : int index_to_first_unprocessed =
272 : FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data),
273 : reinterpret_cast<const char*>(src_data), length,
274 2278 : &has_changed_character);
275 :
276 2278 : if (index_to_first_unprocessed == length) {
277 2260 : return has_changed_character ? dst : src;
278 : }
279 :
280 : // If not ASCII, we keep the result up to index_to_first_unprocessed and
281 : // process the rest.
282 1710 : for (int index = index_to_first_unprocessed; index < length; ++index) {
283 3420 : dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
284 : }
285 : } else {
286 : DCHECK(src_flat.IsTwoByte());
287 5 : int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length);
288 5 : if (index_to_first_unprocessed == length) return src;
289 :
290 : const uint16_t* src_data = src_flat.ToUC16Vector().start();
291 0 : CopyChars(dst_data, src_data, index_to_first_unprocessed);
292 0 : for (int index = index_to_first_unprocessed; index < length; ++index) {
293 0 : dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
294 : }
295 : }
296 :
297 18 : return dst;
298 : }
299 :
300 2831 : MaybeHandle<String> Intl::ConvertToLower(Isolate* isolate, Handle<String> s) {
301 2831 : if (!s->HasOnlyOneByteChars()) {
302 : // Use a slower implementation for strings with characters beyond U+00FF.
303 1305 : return LocaleConvertCase(isolate, s, false, "");
304 : }
305 :
306 : int length = s->length();
307 :
308 : // We depend here on the invariant that the length of a Latin1
309 : // string is invariant under ToLowerCase, and the result always
310 : // fits in the Latin1 range in the *root locale*. It does not hold
311 : // for ToUpperCase even in the root locale.
312 :
313 : // Scan the string for uppercase and non-ASCII characters for strings
314 : // shorter than a machine-word without any memory allocation overhead.
315 : // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
316 : // to two parts, one for scanning the prefix with no change and the other for
317 : // handling ASCII-only characters.
318 :
319 : bool is_short = length < static_cast<int>(sizeof(uintptr_t));
320 1526 : if (is_short) {
321 36 : bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length;
322 36 : if (is_lower_ascii) return s;
323 : }
324 :
325 : Handle<SeqOneByteString> result =
326 2980 : isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
327 :
328 4470 : return Handle<String>(Intl::ConvertOneByteToLower(*s, *result), isolate);
329 : }
330 :
331 4797 : MaybeHandle<String> Intl::ConvertToUpper(Isolate* isolate, Handle<String> s) {
332 : int32_t length = s->length();
333 4797 : if (s->HasOnlyOneByteChars() && length > 0) {
334 : Handle<SeqOneByteString> result =
335 9054 : isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
336 :
337 : DCHECK(s->IsFlat());
338 : int sharp_s_count;
339 : bool is_result_single_byte;
340 : {
341 : DisallowHeapAllocation no_gc;
342 4527 : String::FlatContent flat = s->GetFlatContent(no_gc);
343 : uint8_t* dest = result->GetChars(no_gc);
344 4527 : if (flat.IsOneByte()) {
345 : Vector<const uint8_t> src = flat.ToOneByteVector();
346 4527 : bool has_changed_character = false;
347 : int index_to_first_unprocessed = FastAsciiConvert<false>(
348 : reinterpret_cast<char*>(result->GetChars(no_gc)),
349 : reinterpret_cast<const char*>(src.start()), length,
350 4527 : &has_changed_character);
351 4527 : if (index_to_first_unprocessed == length) {
352 4320 : return has_changed_character ? result : s;
353 : }
354 : // If not ASCII, we keep the result up to index_to_first_unprocessed and
355 : // process the rest.
356 : is_result_single_byte =
357 207 : ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
358 414 : dest + index_to_first_unprocessed, &sharp_s_count);
359 : } else {
360 : DCHECK(flat.IsTwoByte());
361 0 : Vector<const uint16_t> src = flat.ToUC16Vector();
362 0 : if (ToUpperFastASCII(src, result)) return result;
363 0 : is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
364 : }
365 : }
366 :
367 : // Go to the full Unicode path if there are characters whose uppercase
368 : // is beyond the Latin-1 range (cannot be represented in OneByteString).
369 207 : if (V8_UNLIKELY(!is_result_single_byte)) {
370 63 : return LocaleConvertCase(isolate, s, true, "");
371 : }
372 :
373 144 : if (sharp_s_count == 0) return result;
374 :
375 : // We have sharp_s_count sharp-s characters, but the result is still
376 : // in the Latin-1 range.
377 162 : ASSIGN_RETURN_ON_EXCEPTION(
378 : isolate, result,
379 : isolate->factory()->NewRawOneByteString(length + sharp_s_count),
380 : String);
381 : DisallowHeapAllocation no_gc;
382 81 : String::FlatContent flat = s->GetFlatContent(no_gc);
383 81 : if (flat.IsOneByte()) {
384 81 : ToUpperWithSharpS(flat.ToOneByteVector(), result);
385 : } else {
386 0 : ToUpperWithSharpS(flat.ToUC16Vector(), result);
387 : }
388 :
389 81 : return result;
390 : }
391 :
392 270 : return LocaleConvertCase(isolate, s, true, "");
393 : }
394 :
395 3672 : std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) {
396 : // Ugly hack. ICU doesn't expose numbering system in any way, so we have
397 : // to assume that for given locale NumberingSystem constructor produces the
398 : // same digits as NumberFormat/Calendar would.
399 3672 : UErrorCode status = U_ZERO_ERROR;
400 : std::unique_ptr<icu::NumberingSystem> numbering_system(
401 3672 : icu::NumberingSystem::createInstance(icu_locale, status));
402 3672 : if (U_SUCCESS(status)) return numbering_system->getName();
403 0 : return "latn";
404 : }
405 :
406 24018 : icu::Locale Intl::CreateICULocale(const std::string& bcp47_locale) {
407 : DisallowHeapAllocation no_gc;
408 :
409 : // Convert BCP47 into ICU locale format.
410 12009 : UErrorCode status = U_ZERO_ERROR;
411 :
412 12009 : icu::Locale icu_locale = icu::Locale::forLanguageTag(bcp47_locale, status);
413 24018 : CHECK(U_SUCCESS(status));
414 12009 : if (icu_locale.isBogus()) {
415 0 : FATAL("Failed to create ICU locale, are ICU data files missing?");
416 : }
417 :
418 12009 : return icu_locale;
419 : }
420 :
421 : // static
422 :
423 36162 : MaybeHandle<String> Intl::ToString(Isolate* isolate,
424 : const icu::UnicodeString& string) {
425 : return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
426 72324 : reinterpret_cast<const uint16_t*>(string.getBuffer()), string.length()));
427 : }
428 :
429 30429 : MaybeHandle<String> Intl::ToString(Isolate* isolate,
430 : const icu::UnicodeString& string,
431 : int32_t begin, int32_t end) {
432 30429 : return Intl::ToString(isolate, string.tempSubStringBetween(begin, end));
433 : }
434 :
435 : namespace {
436 :
437 10224 : Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array,
438 : int index, Handle<String> field_type_string,
439 : Handle<String> value) {
440 : // let element = $array[$index] = {
441 : // type: $field_type_string,
442 : // value: $value
443 : // }
444 : // return element;
445 : Factory* factory = isolate->factory();
446 10224 : Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
447 : JSObject::AddProperty(isolate, element, factory->type_string(),
448 10224 : field_type_string, NONE);
449 :
450 10224 : JSObject::AddProperty(isolate, element, factory->value_string(), value, NONE);
451 20448 : JSObject::AddDataElement(array, index, element, NONE);
452 10224 : return element;
453 : }
454 :
455 : } // namespace
456 :
457 9774 : void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
458 : Handle<String> field_type_string, Handle<String> value) {
459 : // Same as $array[$index] = {type: $field_type_string, value: $value};
460 9774 : InnerAddElement(isolate, array, index, field_type_string, value);
461 9774 : }
462 :
463 450 : void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
464 : Handle<String> field_type_string, Handle<String> value,
465 : Handle<String> additional_property_name,
466 : Handle<String> additional_property_value) {
467 : // Same as $array[$index] = {
468 : // type: $field_type_string, value: $value,
469 : // $additional_property_name: $additional_property_value
470 : // }
471 : Handle<JSObject> element =
472 450 : InnerAddElement(isolate, array, index, field_type_string, value);
473 : JSObject::AddProperty(isolate, element, additional_property_name,
474 450 : additional_property_value, NONE);
475 450 : }
476 :
477 : namespace {
478 :
479 : // Build the shortened locale; eg, convert xx_Yyyy_ZZ to xx_ZZ.
480 : //
481 : // If locale has a script tag then return true and the locale without the
482 : // script else return false and an empty string.
483 3263262 : bool RemoveLocaleScriptTag(const std::string& icu_locale,
484 : std::string* locale_less_script) {
485 3263262 : icu::Locale new_locale = icu::Locale::createCanonical(icu_locale.c_str());
486 : const char* icu_script = new_locale.getScript();
487 3263262 : if (icu_script == nullptr || strlen(icu_script) == 0) {
488 3107722 : *locale_less_script = std::string();
489 3107722 : return false;
490 : }
491 :
492 : const char* icu_language = new_locale.getLanguage();
493 : const char* icu_country = new_locale.getCountry();
494 311080 : icu::Locale short_locale = icu::Locale(icu_language, icu_country);
495 155540 : *locale_less_script = short_locale.getName();
496 3263262 : return true;
497 : }
498 :
499 : } // namespace
500 :
501 11482 : std::set<std::string> Intl::BuildLocaleSet(
502 : const icu::Locale* icu_available_locales, int32_t count) {
503 : std::set<std::string> locales;
504 3274744 : for (int32_t i = 0; i < count; ++i) {
505 : std::string locale =
506 6526524 : Intl::ToLanguageTag(icu_available_locales[i]).FromJust();
507 : locales.insert(locale);
508 :
509 : std::string shortened_locale;
510 3263262 : if (RemoveLocaleScriptTag(locale, &shortened_locale)) {
511 155540 : std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
512 : locales.insert(shortened_locale);
513 : }
514 : }
515 :
516 11482 : return locales;
517 : }
518 :
519 3452410 : Maybe<std::string> Intl::ToLanguageTag(const icu::Locale& locale) {
520 3452410 : UErrorCode status = U_ZERO_ERROR;
521 3452410 : std::string res = locale.toLanguageTag<std::string>(status);
522 3452410 : if (U_FAILURE(status)) {
523 : return Nothing<std::string>();
524 : }
525 3452401 : CHECK(U_SUCCESS(status));
526 :
527 : // Hack to remove -true and -yes from unicode extensions
528 : // Address https://crbug.com/v8/8565
529 : // TODO(ftang): Move the following "remove true" logic into ICU toLanguageTag
530 : // by fixing ICU-20310.
531 3452401 : size_t u_ext_start = res.find("-u-");
532 3452401 : if (u_ext_start != std::string::npos) {
533 : // remove "-true" and "-yes" after -u-
534 36228 : const std::vector<std::string> remove_items({"-true", "-yes"});
535 72456 : for (auto item = remove_items.begin(); item != remove_items.end(); item++) {
536 72483 : for (size_t sep_remove =
537 36228 : res.find(*item, u_ext_start + 5 /* strlen("-u-xx") == 5 */);
538 : sep_remove != std::string::npos; sep_remove = res.find(*item)) {
539 27 : size_t end_of_sep_remove = sep_remove + item->length();
540 54 : if (res.length() == end_of_sep_remove ||
541 27 : res.at(end_of_sep_remove) == '-') {
542 27 : res.erase(sep_remove, item->length());
543 : }
544 : }
545 18114 : }
546 : }
547 : return Just(res);
548 : }
549 :
550 : namespace {
551 2039 : std::string DefaultLocale(Isolate* isolate) {
552 2039 : if (isolate->default_locale().empty()) {
553 487 : icu::Locale default_locale;
554 : // Translate ICU's fallback locale to a well-known locale.
555 487 : if (strcmp(default_locale.getName(), "en_US_POSIX") == 0) {
556 0 : isolate->set_default_locale("en-US");
557 : } else {
558 : // Set the locale
559 : isolate->set_default_locale(
560 487 : default_locale.isBogus()
561 : ? "und"
562 1461 : : Intl::ToLanguageTag(default_locale).FromJust());
563 : }
564 487 : DCHECK(!isolate->default_locale().empty());
565 : }
566 2039 : return isolate->default_locale();
567 : }
568 : } // namespace
569 :
570 : // See ecma402/#legacy-constructor.
571 4545 : MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
572 : Handle<JSReceiver> receiver,
573 : Handle<JSFunction> constructor,
574 : bool has_initialized_slot) {
575 : Handle<Object> obj_is_instance_of;
576 9090 : ASSIGN_RETURN_ON_EXCEPTION(isolate, obj_is_instance_of,
577 : Object::InstanceOf(isolate, receiver, constructor),
578 : Object);
579 4545 : bool is_instance_of = obj_is_instance_of->BooleanValue(isolate);
580 :
581 : // 2. If receiver does not have an [[Initialized...]] internal slot
582 : // and ? InstanceofOperator(receiver, constructor) is true, then
583 4545 : if (!has_initialized_slot && is_instance_of) {
584 : // 2. a. Let new_receiver be ? Get(receiver, %Intl%.[[FallbackSymbol]]).
585 : Handle<Object> new_receiver;
586 180 : ASSIGN_RETURN_ON_EXCEPTION(
587 : isolate, new_receiver,
588 : JSReceiver::GetProperty(isolate, receiver,
589 : isolate->factory()->intl_fallback_symbol()),
590 : Object);
591 90 : return new_receiver;
592 : }
593 :
594 4455 : return receiver;
595 : }
596 :
597 489199 : Maybe<bool> Intl::GetStringOption(Isolate* isolate, Handle<JSReceiver> options,
598 : const char* property,
599 13581 : std::vector<const char*> values,
600 : const char* service,
601 : std::unique_ptr<char[]>* result) {
602 : Handle<String> property_str =
603 489199 : isolate->factory()->NewStringFromAsciiChecked(property);
604 :
605 : // 1. Let value be ? Get(options, property).
606 : Handle<Object> value;
607 978398 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
608 : isolate, value,
609 : Object::GetPropertyOrElement(isolate, options, property_str),
610 : Nothing<bool>());
611 :
612 978182 : if (value->IsUndefined(isolate)) {
613 : return Just(false);
614 : }
615 :
616 : // 2. c. Let value be ? ToString(value).
617 : Handle<String> value_str;
618 27180 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
619 : isolate, value_str, Object::ToString(isolate, value), Nothing<bool>());
620 13581 : std::unique_ptr<char[]> value_cstr = value_str->ToCString();
621 :
622 : // 2. d. if values is not undefined, then
623 13581 : if (values.size() > 0) {
624 : // 2. d. i. If values does not contain an element equal to value,
625 : // throw a RangeError exception.
626 54005 : for (size_t i = 0; i < values.size(); i++) {
627 33304 : if (strcmp(values.at(i), value_cstr.get()) == 0) {
628 : // 2. e. return value
629 : *result = std::move(value_cstr);
630 : return Just(true);
631 : }
632 : }
633 :
634 : Handle<String> service_str =
635 122 : isolate->factory()->NewStringFromAsciiChecked(service);
636 244 : THROW_NEW_ERROR_RETURN_VALUE(
637 : isolate,
638 : NewRangeError(MessageTemplate::kValueOutOfRange, value, service_str,
639 : property_str),
640 : Nothing<bool>());
641 : }
642 :
643 : // 2. e. return value
644 : *result = std::move(value_cstr);
645 : return Just(true);
646 : }
647 :
648 63279 : V8_WARN_UNUSED_RESULT Maybe<bool> Intl::GetBoolOption(
649 : Isolate* isolate, Handle<JSReceiver> options, const char* property,
650 : const char* service, bool* result) {
651 : Handle<String> property_str =
652 63279 : isolate->factory()->NewStringFromAsciiChecked(property);
653 :
654 : // 1. Let value be ? Get(options, property).
655 : Handle<Object> value;
656 126558 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
657 : isolate, value,
658 : Object::GetPropertyOrElement(isolate, options, property_str),
659 : Nothing<bool>());
660 :
661 : // 2. If value is not undefined, then
662 126540 : if (!value->IsUndefined(isolate)) {
663 : // 2. b. i. Let value be ToBoolean(value).
664 100 : *result = value->BooleanValue(isolate);
665 :
666 : // 2. e. return value
667 : return Just(true);
668 : }
669 :
670 : return Just(false);
671 : }
672 :
673 : namespace {
674 :
675 71730 : char AsciiToLower(char c) {
676 71730 : if (c < 'A' || c > 'Z') {
677 : return c;
678 : }
679 3213 : return c | (1 << 5);
680 : }
681 :
682 12011 : bool IsLowerAscii(char c) { return c >= 'a' && c < 'z'; }
683 :
684 : bool IsTwoLetterLanguage(const std::string& locale) {
685 : // Two letters, both in range 'a'-'z'...
686 22863 : return locale.length() == 2 && IsLowerAscii(locale[0]) &&
687 5965 : IsLowerAscii(locale[1]);
688 : }
689 :
690 5929 : bool IsDeprecatedLanguage(const std::string& locale) {
691 : // Check if locale is one of the deprecated language tags:
692 23689 : return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw";
693 : }
694 :
695 : // Reference:
696 : // https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
697 4887 : bool IsGrandfatheredTagWithoutPreferredVaule(const std::string& locale) {
698 9747 : if (V8_UNLIKELY(locale == "zh-min" || locale == "cel-gaulish")) return true;
699 8478 : if (locale.length() > 6 /* i-mingo is 7 chars long */ &&
700 3627 : V8_UNLIKELY(locale[0] == 'i' && locale[1] == '-')) {
701 486 : return locale.substr(2) == "default" || locale.substr(2) == "enochian" ||
702 135 : locale.substr(2) == "mingo";
703 : }
704 : return false;
705 : }
706 :
707 : } // anonymous namespace
708 :
709 10888 : Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
710 : Handle<Object> locale_in) {
711 : Handle<String> locale_str;
712 : // This does part of the validity checking spec'ed in CanonicalizeLocaleList:
713 : // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
714 : // exception.
715 : // 7c iii. Let tag be ? ToString(kValue).
716 : // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
717 : // RangeError exception.
718 :
719 21776 : if (locale_in->IsString()) {
720 10852 : locale_str = Handle<String>::cast(locale_in);
721 72 : } else if (locale_in->IsJSReceiver()) {
722 0 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, locale_str,
723 : Object::ToString(isolate, locale_in),
724 : Nothing<std::string>());
725 : } else {
726 72 : THROW_NEW_ERROR_RETURN_VALUE(isolate,
727 : NewTypeError(MessageTemplate::kLanguageID),
728 : Nothing<std::string>());
729 : }
730 32556 : std::string locale(locale_str->ToCString().get());
731 :
732 21704 : if (locale.length() == 0 ||
733 10852 : !String::IsAscii(locale.data(), static_cast<int>(locale.length()))) {
734 0 : THROW_NEW_ERROR_RETURN_VALUE(
735 : isolate,
736 : NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
737 : Nothing<std::string>());
738 : }
739 :
740 : // Optimize for the most common case: a 2-letter language code in the
741 : // canonical form/lowercase that is not one of the deprecated codes
742 : // (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
743 : // codes. Instead, let them be handled by ICU in the slow path. However,
744 : // fast-track 'fil' (3-letter canonical code).
745 15793 : if ((IsTwoLetterLanguage(locale) && !IsDeprecatedLanguage(locale)) ||
746 : locale == "fil") {
747 : return Just(locale);
748 : }
749 :
750 : // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase
751 : // the input before any more check.
752 4887 : std::transform(locale.begin(), locale.end(), locale.begin(), AsciiToLower);
753 :
754 : // ICU maps a few grandfathered tags to what looks like a regular language
755 : // tag even though IANA language tag registry does not have a preferred
756 : // entry map for them. Return them as they're with lowercasing.
757 4887 : if (IsGrandfatheredTagWithoutPreferredVaule(locale)) {
758 : return Just(locale);
759 : }
760 :
761 : // // ECMA 402 6.2.3
762 : // TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
763 : // language tag if it's too long (much longer than 100 chars). Even if we
764 : // allocate a longer buffer, ICU will still fail if it's too long. Either
765 : // propose to Ecma 402 to put a limit on the locale length or change ICU to
766 : // handle long locale names better. See
767 : // https://unicode-org.atlassian.net/browse/ICU-13417
768 4761 : UErrorCode error = U_ZERO_ERROR;
769 : // uloc_forLanguageTag checks the structrual validity. If the input BCP47
770 : // language tag is parsed all the way to the end, it indicates that the input
771 : // is structurally valid. Due to a couple of bugs, we can't use it
772 : // without Chromium patches or ICU 62 or earlier.
773 9522 : icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error);
774 4761 : if (U_FAILURE(error) || icu_locale.isBogus()) {
775 252 : THROW_NEW_ERROR_RETURN_VALUE(
776 : isolate,
777 : NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
778 : Nothing<std::string>());
779 : }
780 4635 : Maybe<std::string> maybe_to_language_tag = Intl::ToLanguageTag(icu_locale);
781 4635 : if (maybe_to_language_tag.IsNothing()) {
782 18 : THROW_NEW_ERROR_RETURN_VALUE(
783 : isolate,
784 : NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
785 : Nothing<std::string>());
786 : }
787 :
788 4626 : return Intl::ToLanguageTag(icu_locale);
789 : }
790 :
791 12702 : Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
792 : Isolate* isolate, Handle<Object> locales, bool only_return_one_result) {
793 : // 1. If locales is undefined, then
794 25404 : if (locales->IsUndefined(isolate)) {
795 : // 1a. Return a new empty List.
796 1589 : return Just(std::vector<std::string>());
797 : }
798 : // 2. Let seen be a new empty List.
799 : std::vector<std::string> seen;
800 : // 3. If Type(locales) is String or locales has an [[InitializedLocale]]
801 : // internal slot, then
802 22226 : if (locales->IsJSLocale()) {
803 : // Since this value came from JSLocale, which is already went though the
804 : // CanonializeLanguageTag process once, therefore there are no need to
805 : // call CanonializeLanguageTag again.
806 18 : seen.push_back(JSLocale::ToString(Handle<JSLocale>::cast(locales)));
807 : return Just(seen);
808 : }
809 22208 : if (locales->IsString()) {
810 : // 3a. Let O be CreateArrayFromList(« locales »).
811 : // Instead of creating a one-element array and then iterating over it,
812 : // we inline the body of the iteration:
813 : std::string canonicalized_tag;
814 12046 : if (!CanonicalizeLanguageTag(isolate, locales).To(&canonicalized_tag)) {
815 : return Nothing<std::vector<std::string>>();
816 : }
817 5924 : seen.push_back(canonicalized_tag);
818 : return Just(seen);
819 : }
820 : // 4. Else,
821 : // 4a. Let O be ? ToObject(locales).
822 : Handle<JSReceiver> o;
823 10162 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, o,
824 : Object::ToObject(isolate, locales),
825 : Nothing<std::vector<std::string>>());
826 : // 5. Let len be ? ToLength(? Get(O, "length")).
827 : Handle<Object> length_obj;
828 10162 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, length_obj,
829 : Object::GetLengthFromArrayLike(isolate, o),
830 : Nothing<std::vector<std::string>>());
831 : // TODO(jkummerow): Spec violation: strictly speaking, we have to iterate
832 : // up to 2^53-1 if {length_obj} says so. Since cases above 2^32 probably
833 : // don't happen in practice (and would be very slow if they do), we'll keep
834 : // the code simple for now by using a saturating to-uint32 conversion.
835 5081 : double raw_length = length_obj->Number();
836 : uint32_t len =
837 5081 : raw_length >= kMaxUInt32 ? kMaxUInt32 : static_cast<uint32_t>(raw_length);
838 : // 6. Let k be 0.
839 : // 7. Repeat, while k < len
840 9811 : for (uint32_t k = 0; k < len; k++) {
841 : // 7a. Let Pk be ToString(k).
842 : // 7b. Let kPresent be ? HasProperty(O, Pk).
843 4883 : LookupIterator it(isolate, o, k);
844 4883 : Maybe<bool> maybe_found = JSReceiver::HasProperty(&it);
845 5036 : MAYBE_RETURN(maybe_found, Nothing<std::vector<std::string>>());
846 : // 7c. If kPresent is true, then
847 4883 : if (!maybe_found.FromJust()) continue;
848 : // 7c i. Let kValue be ? Get(O, Pk).
849 : Handle<Object> k_value;
850 9766 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it),
851 : Nothing<std::vector<std::string>>());
852 : // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
853 : // exception.
854 : // 7c iii. Let tag be ? ToString(kValue).
855 : // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
856 : // RangeError exception.
857 : // 7c v. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
858 : std::string canonicalized_tag;
859 9766 : if (k_value->IsJSLocale()) {
860 36 : canonicalized_tag = JSLocale::ToString(Handle<JSLocale>::cast(k_value));
861 : } else {
862 9730 : if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
863 : return Nothing<std::vector<std::string>>();
864 : }
865 : }
866 : // 7c vi. If canonicalizedTag is not an element of seen, append
867 : // canonicalizedTag as the last element of seen.
868 4811 : if (std::find(seen.begin(), seen.end(), canonicalized_tag) == seen.end()) {
869 4811 : seen.push_back(canonicalized_tag);
870 : }
871 : // 7d. Increase k by 1. (See loop header.)
872 : // Optimization: some callers only need one result.
873 4811 : if (only_return_one_result) return Just(seen);
874 : }
875 : // 8. Return seen.
876 11113 : return Just(seen);
877 : }
878 :
879 : // ecma402 #sup-string.prototype.tolocalelowercase
880 : // ecma402 #sup-string.prototype.tolocaleuppercase
881 801 : MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
882 : Handle<String> s,
883 : bool to_upper,
884 : Handle<Object> locales) {
885 : std::vector<std::string> requested_locales;
886 1602 : if (!CanonicalizeLocaleList(isolate, locales, true).To(&requested_locales)) {
887 36 : return MaybeHandle<String>();
888 : }
889 765 : std::string requested_locale = requested_locales.size() == 0
890 : ? DefaultLocale(isolate)
891 765 : : requested_locales[0];
892 765 : size_t dash = requested_locale.find('-');
893 765 : if (dash != std::string::npos) {
894 360 : requested_locale = requested_locale.substr(0, dash);
895 : }
896 :
897 : // Primary language tag can be up to 8 characters long in theory.
898 : // https://tools.ietf.org/html/bcp47#section-2.2.1
899 : DCHECK_LE(requested_locale.length(), 8);
900 765 : s = String::Flatten(isolate, s);
901 :
902 : // All the languages requiring special-handling have two-letter codes.
903 : // Note that we have to check for '!= 2' here because private-use language
904 : // tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have
905 : // only 'x' or 'i' when they get here.
906 765 : if (V8_UNLIKELY(requested_locale.length() != 2)) {
907 135 : if (to_upper) {
908 45 : return ConvertToUpper(isolate, s);
909 : }
910 90 : return ConvertToLower(isolate, s);
911 : }
912 : // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
913 : // in the root locale needs to be adjusted for az, lt and tr because even case
914 : // mapping of ASCII range characters are different in those locales.
915 : // Greek (el) does not require any adjustment.
916 1728 : if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") ||
917 : (requested_locale == "lt") || (requested_locale == "az"))) {
918 387 : return LocaleConvertCase(isolate, s, to_upper, requested_locale.c_str());
919 : } else {
920 243 : if (to_upper) {
921 81 : return ConvertToUpper(isolate, s);
922 : }
923 162 : return ConvertToLower(isolate, s);
924 801 : }
925 : }
926 :
927 63286 : MaybeHandle<Object> Intl::StringLocaleCompare(Isolate* isolate,
928 : Handle<String> string1,
929 : Handle<String> string2,
930 : Handle<Object> locales,
931 : Handle<Object> options) {
932 : // We only cache the instance when both locales and options are undefined,
933 : // as that is the only case when the specified side-effects of examining
934 : // those arguments are unobservable.
935 : bool can_cache =
936 248316 : locales->IsUndefined(isolate) && options->IsUndefined(isolate);
937 63286 : if (can_cache) {
938 : // Both locales and options are undefined, check the cache.
939 : icu::Collator* cached_icu_collator =
940 : static_cast<icu::Collator*>(isolate->get_cached_icu_object(
941 60872 : Isolate::ICUObjectCacheType::kDefaultCollator));
942 : // We may use the cached icu::Collator for a fast path.
943 60872 : if (cached_icu_collator != nullptr) {
944 : return Intl::CompareStrings(isolate, *cached_icu_collator, string1,
945 60835 : string2);
946 : }
947 : }
948 :
949 : Handle<JSFunction> constructor = Handle<JSFunction>(
950 : JSFunction::cast(
951 4902 : isolate->context()->native_context()->intl_collator_function()),
952 4902 : isolate);
953 :
954 : Handle<JSCollator> collator;
955 4902 : ASSIGN_RETURN_ON_EXCEPTION(
956 : isolate, collator,
957 : New<JSCollator>(isolate, constructor, locales, options), Object);
958 2451 : if (can_cache) {
959 : isolate->set_icu_object_in_cache(
960 : Isolate::ICUObjectCacheType::kDefaultCollator,
961 : std::static_pointer_cast<icu::UObject>(
962 148 : collator->icu_collator()->get()));
963 : }
964 4902 : icu::Collator* icu_collator = collator->icu_collator()->raw();
965 2451 : return Intl::CompareStrings(isolate, *icu_collator, string1, string2);
966 : }
967 :
968 : // ecma402/#sec-collator-comparestrings
969 66465 : Handle<Object> Intl::CompareStrings(Isolate* isolate,
970 : const icu::Collator& icu_collator,
971 : Handle<String> string1,
972 : Handle<String> string2) {
973 : Factory* factory = isolate->factory();
974 :
975 66465 : string1 = String::Flatten(isolate, string1);
976 66465 : string2 = String::Flatten(isolate, string2);
977 :
978 : UCollationResult result;
979 66465 : UErrorCode status = U_ZERO_ERROR;
980 66465 : icu::UnicodeString string_val1 = Intl::ToICUUnicodeString(isolate, string1);
981 132930 : icu::UnicodeString string_val2 = Intl::ToICUUnicodeString(isolate, string2);
982 66465 : result = icu_collator.compare(string_val1, string_val2, status);
983 : DCHECK(U_SUCCESS(status));
984 :
985 132930 : return factory->NewNumberFromInt(result);
986 : }
987 :
988 : // ecma402/#sup-properties-of-the-number-prototype-object
989 1305 : MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
990 : Handle<Object> num,
991 : Handle<Object> locales,
992 : Handle<Object> options) {
993 : Handle<Object> number_obj;
994 2610 : ASSIGN_RETURN_ON_EXCEPTION(isolate, number_obj,
995 : Object::ToNumber(isolate, num), String);
996 :
997 1305 : double number = number_obj->Number();
998 :
999 : // We only cache the instance when both locales and options are undefined,
1000 : // as that is the only case when the specified side-effects of examining
1001 : // those arguments are unobservable.
1002 : bool can_cache =
1003 5130 : locales->IsUndefined(isolate) && options->IsUndefined(isolate);
1004 1305 : if (can_cache) {
1005 : icu::NumberFormat* cached_number_format =
1006 : static_cast<icu::NumberFormat*>(isolate->get_cached_icu_object(
1007 1260 : Isolate::ICUObjectCacheType::kDefaultNumberFormat));
1008 : // We may use the cached icu::NumberFormat for a fast path.
1009 1260 : if (cached_number_format != nullptr) {
1010 : return JSNumberFormat::FormatNumber(isolate, *cached_number_format,
1011 1225 : number);
1012 : }
1013 : }
1014 :
1015 : Handle<JSFunction> constructor = Handle<JSFunction>(
1016 : JSFunction::cast(
1017 160 : isolate->context()->native_context()->intl_number_format_function()),
1018 160 : isolate);
1019 : Handle<JSNumberFormat> number_format;
1020 : // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
1021 160 : ASSIGN_RETURN_ON_EXCEPTION(
1022 : isolate, number_format,
1023 : New<JSNumberFormat>(isolate, constructor, locales, options), String);
1024 :
1025 71 : if (can_cache) {
1026 : isolate->set_icu_object_in_cache(
1027 : Isolate::ICUObjectCacheType::kDefaultNumberFormat,
1028 : std::static_pointer_cast<icu::UObject>(
1029 140 : number_format->icu_number_format()->get()));
1030 : }
1031 :
1032 : // Return FormatNumber(numberFormat, x).
1033 : icu::NumberFormat* icu_number_format =
1034 142 : number_format->icu_number_format()->raw();
1035 71 : return JSNumberFormat::FormatNumber(isolate, *icu_number_format, number);
1036 : }
1037 :
1038 : namespace {
1039 :
1040 : // ecma402/#sec-defaultnumberoption
1041 4263 : Maybe<int> DefaultNumberOption(Isolate* isolate, Handle<Object> value, int min,
1042 : int max, int fallback, Handle<String> property) {
1043 : // 2. Else, return fallback.
1044 8526 : if (value->IsUndefined()) return Just(fallback);
1045 :
1046 : // 1. If value is not undefined, then
1047 : // a. Let value be ? ToNumber(value).
1048 : Handle<Object> value_num;
1049 936 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1050 : isolate, value_num, Object::ToNumber(isolate, value), Nothing<int>());
1051 : DCHECK(value_num->IsNumber());
1052 :
1053 : // b. If value is NaN or less than minimum or greater than maximum, throw a
1054 : // RangeError exception.
1055 2277 : if (value_num->IsNaN() || value_num->Number() < min ||
1056 873 : value_num->Number() > max) {
1057 216 : THROW_NEW_ERROR_RETURN_VALUE(
1058 : isolate,
1059 : NewRangeError(MessageTemplate::kPropertyValueOutOfRange, property),
1060 : Nothing<int>());
1061 : }
1062 :
1063 : // The max and min arguments are integers and the above check makes
1064 : // sure that we are within the integer range making this double to
1065 : // int conversion safe.
1066 : //
1067 : // c. Return floor(value).
1068 720 : return Just(FastD2I(floor(value_num->Number())));
1069 : }
1070 :
1071 : // ecma402/#sec-getnumberoption
1072 4137 : Maybe<int> GetNumberOption(Isolate* isolate, Handle<JSReceiver> options,
1073 : Handle<String> property, int min, int max,
1074 : int fallback) {
1075 : // 1. Let value be ? Get(options, property).
1076 : Handle<Object> value;
1077 8274 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1078 : isolate, value, JSReceiver::GetProperty(isolate, options, property),
1079 : Nothing<int>());
1080 :
1081 : // Return ? DefaultNumberOption(value, minimum, maximum, fallback).
1082 4137 : return DefaultNumberOption(isolate, value, min, max, fallback, property);
1083 : }
1084 :
1085 4137 : Maybe<int> GetNumberOption(Isolate* isolate, Handle<JSReceiver> options,
1086 : const char* property, int min, int max,
1087 : int fallback) {
1088 : Handle<String> property_str =
1089 4137 : isolate->factory()->NewStringFromAsciiChecked(property);
1090 4137 : return GetNumberOption(isolate, options, property_str, min, max, fallback);
1091 : }
1092 :
1093 : } // namespace
1094 :
1095 1412 : Maybe<bool> Intl::SetNumberFormatDigitOptions(Isolate* isolate,
1096 : icu::DecimalFormat* number_format,
1097 : Handle<JSReceiver> options,
1098 : int mnfd_default,
1099 : int mxfd_default) {
1100 1412 : CHECK_NOT_NULL(number_format);
1101 :
1102 : // 5. Let mnid be ? GetNumberOption(options, "minimumIntegerDigits,", 1, 21,
1103 : // 1).
1104 : int mnid;
1105 1412 : if (!GetNumberOption(isolate, options, "minimumIntegerDigits", 1, 21, 1)
1106 2824 : .To(&mnid)) {
1107 : return Nothing<bool>();
1108 : }
1109 :
1110 : // 6. Let mnfd be ? GetNumberOption(options, "minimumFractionDigits", 0, 20,
1111 : // mnfdDefault).
1112 : int mnfd;
1113 1367 : if (!GetNumberOption(isolate, options, "minimumFractionDigits", 0, 20,
1114 : mnfd_default)
1115 2734 : .To(&mnfd)) {
1116 : return Nothing<bool>();
1117 : }
1118 :
1119 : // 7. Let mxfdActualDefault be max( mnfd, mxfdDefault ).
1120 1358 : int mxfd_actual_default = std::max(mnfd, mxfd_default);
1121 :
1122 : // 8. Let mxfd be ? GetNumberOption(options,
1123 : // "maximumFractionDigits", mnfd, 20, mxfdActualDefault).
1124 : int mxfd;
1125 1358 : if (!GetNumberOption(isolate, options, "maximumFractionDigits", mnfd, 20,
1126 : mxfd_actual_default)
1127 2716 : .To(&mxfd)) {
1128 : return Nothing<bool>();
1129 : }
1130 :
1131 : // 9. Let mnsd be ? Get(options, "minimumSignificantDigits").
1132 : Handle<Object> mnsd_obj;
1133 : Handle<String> mnsd_str =
1134 : isolate->factory()->minimumSignificantDigits_string();
1135 2698 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1136 : isolate, mnsd_obj, JSReceiver::GetProperty(isolate, options, mnsd_str),
1137 : Nothing<bool>());
1138 :
1139 : // 10. Let mxsd be ? Get(options, "maximumSignificantDigits").
1140 : Handle<Object> mxsd_obj;
1141 : Handle<String> mxsd_str =
1142 : isolate->factory()->maximumSignificantDigits_string();
1143 2698 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1144 : isolate, mxsd_obj, JSReceiver::GetProperty(isolate, options, mxsd_str),
1145 : Nothing<bool>());
1146 :
1147 : // 11. Set intlObj.[[MinimumIntegerDigits]] to mnid.
1148 1349 : number_format->setMinimumIntegerDigits(mnid);
1149 :
1150 : // 12. Set intlObj.[[MinimumFractionDigits]] to mnfd.
1151 1349 : number_format->setMinimumFractionDigits(mnfd);
1152 :
1153 : // 13. Set intlObj.[[MaximumFractionDigits]] to mxfd.
1154 1349 : number_format->setMaximumFractionDigits(mxfd);
1155 :
1156 : bool significant_digits_used = false;
1157 : // 14. If mnsd is not undefined or mxsd is not undefined, then
1158 5306 : if (!mnsd_obj->IsUndefined(isolate) || !mxsd_obj->IsUndefined(isolate)) {
1159 : // 14. a. Let mnsd be ? DefaultNumberOption(mnsd, 1, 21, 1).
1160 : int mnsd;
1161 144 : if (!DefaultNumberOption(isolate, mnsd_obj, 1, 21, 1, mnsd_str).To(&mnsd)) {
1162 : return Nothing<bool>();
1163 : }
1164 :
1165 : // 14. b. Let mxsd be ? DefaultNumberOption(mxsd, mnsd, 21, 21).
1166 : int mxsd;
1167 54 : if (!DefaultNumberOption(isolate, mxsd_obj, mnsd, 21, 21, mxsd_str)
1168 108 : .To(&mxsd)) {
1169 : return Nothing<bool>();
1170 : }
1171 :
1172 : significant_digits_used = true;
1173 :
1174 : // 14. c. Set intlObj.[[MinimumSignificantDigits]] to mnsd.
1175 27 : number_format->setMinimumSignificantDigits(mnsd);
1176 :
1177 : // 14. d. Set intlObj.[[MaximumSignificantDigits]] to mxsd.
1178 27 : number_format->setMaximumSignificantDigits(mxsd);
1179 : }
1180 :
1181 1304 : number_format->setSignificantDigitsUsed(significant_digits_used);
1182 1304 : number_format->setRoundingMode(icu::DecimalFormat::kRoundHalfUp);
1183 : return Just(true);
1184 : }
1185 :
1186 : namespace {
1187 :
1188 : // ecma402/#sec-bestavailablelocale
1189 9556 : std::string BestAvailableLocale(const std::set<std::string>& available_locales,
1190 : const std::string& locale) {
1191 : // 1. Let candidate be locale.
1192 9556 : std::string candidate = locale;
1193 :
1194 : // 2. Repeat,
1195 : while (true) {
1196 : // 2.a. If availableLocales contains an element equal to candidate, return
1197 : // candidate.
1198 9988 : if (available_locales.find(candidate) != available_locales.end()) {
1199 : return candidate;
1200 : }
1201 :
1202 : // 2.b. Let pos be the character index of the last occurrence of "-"
1203 : // (U+002D) within candidate. If that character does not occur, return
1204 : // undefined.
1205 684 : size_t pos = candidate.rfind('-');
1206 684 : if (pos == std::string::npos) {
1207 : return std::string();
1208 : }
1209 :
1210 : // 2.c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate,
1211 : // decrease pos by 2.
1212 810 : if (pos >= 2 && candidate[pos - 2] == '-') {
1213 90 : pos -= 2;
1214 : }
1215 :
1216 : // 2.d. Let candidate be the substring of candidate from position 0,
1217 : // inclusive, to position pos, exclusive.
1218 864 : candidate = candidate.substr(0, pos);
1219 : }
1220 : }
1221 :
1222 19112 : struct ParsedLocale {
1223 : std::string no_extensions_locale;
1224 : std::string extension;
1225 : };
1226 :
1227 : // Returns a struct containing a bcp47 tag without unicode extensions
1228 : // and the removed unicode extensions.
1229 : //
1230 : // For example, given 'en-US-u-co-emoji' returns 'en-US' and
1231 : // 'u-co-emoji'.
1232 9556 : ParsedLocale ParseBCP47Locale(const std::string& locale) {
1233 : size_t length = locale.length();
1234 : ParsedLocale parsed_locale;
1235 :
1236 : // Privateuse or grandfathered locales have no extension sequences.
1237 19112 : if ((length > 1) && (locale[1] == '-')) {
1238 : // Check to make sure that this really is a grandfathered or
1239 : // privateuse extension. ICU can sometimes mess up the
1240 : // canonicalization.
1241 63 : CHECK(locale[0] == 'x' || locale[0] == 'i');
1242 63 : parsed_locale.no_extensions_locale = locale;
1243 : return parsed_locale;
1244 : }
1245 :
1246 9493 : size_t unicode_extension_start = locale.find("-u-");
1247 :
1248 : // No unicode extensions found.
1249 9493 : if (unicode_extension_start == std::string::npos) {
1250 6586 : parsed_locale.no_extensions_locale = locale;
1251 : return parsed_locale;
1252 : }
1253 :
1254 2907 : size_t private_extension_start = locale.find("-x-");
1255 :
1256 : // Unicode extensions found within privateuse subtags don't count.
1257 5814 : if (private_extension_start != std::string::npos &&
1258 2907 : private_extension_start < unicode_extension_start) {
1259 36 : parsed_locale.no_extensions_locale = locale;
1260 : return parsed_locale;
1261 : }
1262 :
1263 2871 : const std::string beginning = locale.substr(0, unicode_extension_start);
1264 : size_t unicode_extension_end = length;
1265 : DCHECK_GT(length, 2);
1266 :
1267 : // Find the end of the extension production as per the bcp47 grammar
1268 : // by looking for '-' followed by 2 chars and then another '-'.
1269 23958 : for (size_t i = unicode_extension_start + 1; i < length - 2; i++) {
1270 21096 : if (locale[i] != '-') continue;
1271 :
1272 19530 : if (locale[i + 2] == '-') {
1273 : unicode_extension_end = i;
1274 : break;
1275 : }
1276 :
1277 : i += 2;
1278 : }
1279 :
1280 2871 : const std::string end = locale.substr(unicode_extension_end);
1281 5742 : parsed_locale.no_extensions_locale = beginning + end;
1282 5742 : parsed_locale.extension = locale.substr(
1283 : unicode_extension_start, unicode_extension_end - unicode_extension_start);
1284 : return parsed_locale;
1285 : }
1286 :
1287 : // ecma402/#sec-lookupsupportedlocales
1288 585 : std::vector<std::string> LookupSupportedLocales(
1289 : const std::set<std::string>& available_locales,
1290 : const std::vector<std::string>& requested_locales) {
1291 : // 1. Let subset be a new empty List.
1292 : std::vector<std::string> subset;
1293 :
1294 : // 2. For each element locale of requestedLocales in List order, do
1295 1719 : for (const std::string& locale : requested_locales) {
1296 : // 2. a. Let noExtensionsLocale be the String value that is locale
1297 : // with all Unicode locale extension sequences removed.
1298 : std::string no_extension_locale =
1299 1098 : ParseBCP47Locale(locale).no_extensions_locale;
1300 :
1301 : // 2. b. Let availableLocale be
1302 : // BestAvailableLocale(availableLocales, noExtensionsLocale).
1303 : std::string available_locale =
1304 549 : BestAvailableLocale(available_locales, no_extension_locale);
1305 :
1306 : // 2. c. If availableLocale is not undefined, append locale to the
1307 : // end of subset.
1308 549 : if (!available_locale.empty()) {
1309 441 : subset.push_back(locale);
1310 : }
1311 : }
1312 :
1313 : // 3. Return subset.
1314 585 : return subset;
1315 : }
1316 :
1317 : // ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
1318 : // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
1319 : std::vector<std::string> BestFitSupportedLocales(
1320 : const std::set<std::string>& available_locales,
1321 : const std::vector<std::string>& requested_locales) {
1322 549 : return LookupSupportedLocales(available_locales, requested_locales);
1323 : }
1324 :
1325 : // ecma262 #sec-createarrayfromlist
1326 891 : Handle<JSArray> CreateArrayFromList(Isolate* isolate,
1327 1638 : std::vector<std::string> elements,
1328 : PropertyAttributes attr) {
1329 : Factory* factory = isolate->factory();
1330 : // Let array be ! ArrayCreate(0).
1331 891 : Handle<JSArray> array = factory->NewJSArray(0);
1332 :
1333 891 : uint32_t length = static_cast<uint32_t>(elements.size());
1334 : // 3. Let n be 0.
1335 : // 4. For each element e of elements, do
1336 1638 : for (uint32_t i = 0; i < length; i++) {
1337 : // a. Let status be CreateDataProperty(array, ! ToString(n), e).
1338 747 : const std::string& part = elements[i];
1339 : Handle<String> value =
1340 1494 : factory->NewStringFromUtf8(CStrVector(part.c_str())).ToHandleChecked();
1341 747 : JSObject::AddDataElement(array, i, value, attr);
1342 : }
1343 : // 5. Return array.
1344 891 : return array;
1345 : }
1346 :
1347 : // ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
1348 : // https://tc39.github.io/ecma402/#sec-supportedlocales
1349 621 : MaybeHandle<JSObject> SupportedLocales(
1350 : Isolate* isolate, const char* method,
1351 : const std::set<std::string>& available_locales,
1352 : const std::vector<std::string>& requested_locales, Handle<Object> options) {
1353 : std::vector<std::string> supported_locales;
1354 :
1355 : // 2. Else, let matcher be "best fit".
1356 : Intl::MatcherOption matcher = Intl::MatcherOption::kBestFit;
1357 :
1358 : // 1. If options is not undefined, then
1359 1242 : if (!options->IsUndefined(isolate)) {
1360 : // 1. a. Let options be ? ToObject(options).
1361 : Handle<JSReceiver> options_obj;
1362 216 : ASSIGN_RETURN_ON_EXCEPTION(isolate, options_obj,
1363 : Object::ToObject(isolate, options), JSObject);
1364 :
1365 : // 1. b. Let matcher be ? GetOption(options, "localeMatcher", "string",
1366 : // « "lookup", "best fit" », "best fit").
1367 : Maybe<Intl::MatcherOption> maybe_locale_matcher =
1368 108 : Intl::GetLocaleMatcher(isolate, options_obj, method);
1369 108 : MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSObject>());
1370 : matcher = maybe_locale_matcher.FromJust();
1371 : }
1372 :
1373 : // 3. If matcher is "best fit", then
1374 : // a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
1375 : // requestedLocales).
1376 585 : if (matcher == Intl::MatcherOption::kBestFit) {
1377 549 : supported_locales =
1378 : BestFitSupportedLocales(available_locales, requested_locales);
1379 : } else {
1380 : // 4. Else,
1381 : // a. Let supportedLocales be LookupSupportedLocales(availableLocales,
1382 : // requestedLocales).
1383 : DCHECK_EQ(matcher, Intl::MatcherOption::kLookup);
1384 72 : supported_locales =
1385 : LookupSupportedLocales(available_locales, requested_locales);
1386 : }
1387 :
1388 : // 5. Return CreateArrayFromList(supportedLocales).
1389 : PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
1390 585 : return CreateArrayFromList(isolate, supported_locales, attr);
1391 : }
1392 :
1393 : } // namespace
1394 :
1395 : // ecma-402 #sec-intl.getcanonicallocales
1396 315 : MaybeHandle<JSArray> Intl::GetCanonicalLocales(Isolate* isolate,
1397 : Handle<Object> locales) {
1398 : // 1. Let ll be ? CanonicalizeLocaleList(locales).
1399 : Maybe<std::vector<std::string>> maybe_ll =
1400 315 : CanonicalizeLocaleList(isolate, locales, false);
1401 315 : MAYBE_RETURN(maybe_ll, MaybeHandle<JSArray>());
1402 :
1403 : // 2. Return CreateArrayFromList(ll).
1404 : PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
1405 306 : return CreateArrayFromList(isolate, maybe_ll.FromJust(), attr);
1406 : }
1407 :
1408 : // ECMA 402 Intl.*.supportedLocalesOf
1409 657 : MaybeHandle<JSObject> Intl::SupportedLocalesOf(
1410 : Isolate* isolate, const char* method,
1411 : const std::set<std::string>& available_locales, Handle<Object> locales,
1412 : Handle<Object> options) {
1413 : // Let availableLocales be %Collator%.[[AvailableLocales]].
1414 :
1415 : // Let requestedLocales be ? CanonicalizeLocaleList(locales).
1416 : Maybe<std::vector<std::string>> requested_locales =
1417 657 : CanonicalizeLocaleList(isolate, locales, false);
1418 657 : MAYBE_RETURN(requested_locales, MaybeHandle<JSObject>());
1419 :
1420 : // Return ? SupportedLocales(availableLocales, requestedLocales, options).
1421 : return SupportedLocales(isolate, method, available_locales,
1422 621 : requested_locales.FromJust(), options);
1423 : }
1424 :
1425 : namespace {
1426 : template <typename T>
1427 720 : bool IsValidExtension(const icu::Locale& locale, const char* key,
1428 : const std::string& value) {
1429 720 : UErrorCode status = U_ZERO_ERROR;
1430 : std::unique_ptr<icu::StringEnumeration> enumeration(
1431 : T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
1432 720 : false, status));
1433 720 : if (U_SUCCESS(status)) {
1434 : int32_t length;
1435 720 : std::string legacy_type(uloc_toLegacyType(key, value.c_str()));
1436 5868 : for (const char* item = enumeration->next(&length, status); item != nullptr;
1437 5148 : item = enumeration->next(&length, status)) {
1438 11610 : if (U_SUCCESS(status) && legacy_type == item) {
1439 : return true;
1440 : }
1441 : }
1442 : }
1443 : return false;
1444 : }
1445 :
1446 : bool IsValidCalendar(const icu::Locale& locale, const std::string& value) {
1447 603 : return IsValidExtension<icu::Calendar>(locale, "calendar", value);
1448 : }
1449 :
1450 189 : bool IsValidCollation(const icu::Locale& locale, const std::string& value) {
1451 378 : std::set<std::string> invalid_values = {"standard", "search"};
1452 189 : if (invalid_values.find(value) != invalid_values.end()) return false;
1453 117 : return IsValidExtension<icu::Collator>(locale, "collation", value);
1454 : }
1455 :
1456 1260 : bool IsValidNumberingSystem(const std::string& value) {
1457 2520 : std::set<std::string> invalid_values = {"native", "traditio", "finance"};
1458 1260 : if (invalid_values.find(value) != invalid_values.end()) return false;
1459 1206 : UErrorCode status = U_ZERO_ERROR;
1460 : std::unique_ptr<icu::NumberingSystem> numbering_system(
1461 1206 : icu::NumberingSystem::createInstanceByName(value.c_str(), status));
1462 1206 : return U_SUCCESS(status) && numbering_system.get() != nullptr;
1463 : }
1464 :
1465 10785 : std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
1466 : icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
1467 : std::map<std::string, std::string> extensions;
1468 :
1469 10785 : UErrorCode status = U_ZERO_ERROR;
1470 : std::unique_ptr<icu::StringEnumeration> keywords(
1471 10785 : icu_locale->createKeywords(status));
1472 10785 : if (U_FAILURE(status)) return extensions;
1473 :
1474 10785 : if (!keywords) return extensions;
1475 : char value[ULOC_FULLNAME_CAPACITY];
1476 :
1477 : int32_t length;
1478 2835 : status = U_ZERO_ERROR;
1479 7425 : for (const char* keyword = keywords->next(&length, status);
1480 4590 : keyword != nullptr; keyword = keywords->next(&length, status)) {
1481 : // Ignore failures in ICU and skip to the next keyword.
1482 : //
1483 : // This is fine.™
1484 4590 : if (U_FAILURE(status)) {
1485 0 : status = U_ZERO_ERROR;
1486 0 : continue;
1487 : }
1488 :
1489 4590 : icu_locale->getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
1490 :
1491 : // Ignore failures in ICU and skip to the next keyword.
1492 : //
1493 : // This is fine.™
1494 4590 : if (U_FAILURE(status)) {
1495 0 : status = U_ZERO_ERROR;
1496 0 : continue;
1497 : }
1498 :
1499 4590 : const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
1500 :
1501 18333 : if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
1502 2385 : const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
1503 : bool is_valid_value = false;
1504 : // 8.h.ii.1.a If keyLocaleData contains requestedValue, then
1505 2385 : if (strcmp("ca", bcp47_key) == 0) {
1506 1206 : is_valid_value = IsValidCalendar(*icu_locale, bcp47_value);
1507 1782 : } else if (strcmp("co", bcp47_key) == 0) {
1508 378 : is_valid_value = IsValidCollation(*icu_locale, bcp47_value);
1509 1593 : } else if (strcmp("hc", bcp47_key) == 0) {
1510 : // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
1511 396 : std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
1512 396 : is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1513 1395 : } else if (strcmp("lb", bcp47_key) == 0) {
1514 : // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
1515 270 : std::set<std::string> valid_values = {"strict", "normal", "loose"};
1516 270 : is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1517 1260 : } else if (strcmp("kn", bcp47_key) == 0) {
1518 : // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
1519 0 : std::set<std::string> valid_values = {"true", "false"};
1520 0 : is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1521 1260 : } else if (strcmp("kf", bcp47_key) == 0) {
1522 : // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
1523 0 : std::set<std::string> valid_values = {"upper", "lower", "false"};
1524 0 : is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1525 1260 : } else if (strcmp("nu", bcp47_key) == 0) {
1526 2520 : is_valid_value = IsValidNumberingSystem(bcp47_value);
1527 : }
1528 2385 : if (is_valid_value) {
1529 : extensions.insert(
1530 3924 : std::pair<std::string, std::string>(bcp47_key, bcp47_value));
1531 1962 : continue;
1532 : }
1533 : }
1534 2628 : status = U_ZERO_ERROR;
1535 2628 : icu_locale->setKeywordValue(keyword, nullptr, status);
1536 5256 : CHECK(U_SUCCESS(status));
1537 : }
1538 :
1539 : return extensions;
1540 : }
1541 :
1542 : // ecma402/#sec-lookupmatcher
1543 10785 : std::string LookupMatcher(Isolate* isolate,
1544 : const std::set<std::string>& available_locales,
1545 : const std::vector<std::string>& requested_locales) {
1546 : // 1. Let result be a new Record.
1547 : std::string result;
1548 :
1549 : // 2. For each element locale of requestedLocales in List order, do
1550 21714 : for (const std::string& locale : requested_locales) {
1551 : // 2. a. Let noExtensionsLocale be the String value that is locale
1552 : // with all Unicode locale extension sequences removed.
1553 9007 : ParsedLocale parsed_locale = ParseBCP47Locale(locale);
1554 9007 : std::string no_extensions_locale = parsed_locale.no_extensions_locale;
1555 :
1556 : // 2. b. Let availableLocale be
1557 : // BestAvailableLocale(availableLocales, noExtensionsLocale).
1558 : std::string available_locale =
1559 9007 : BestAvailableLocale(available_locales, no_extensions_locale);
1560 :
1561 : // 2. c. If availableLocale is not undefined, append locale to the
1562 : // end of subset.
1563 9007 : if (!available_locale.empty()) {
1564 : // Note: The following steps are not performed here because we
1565 : // can use ICU to parse the unicode locale extension sequence
1566 : // as part of Intl::ResolveLocale.
1567 : //
1568 : // There's no need to separate the unicode locale extensions
1569 : // right here. Instead just return the available locale with the
1570 : // extensions.
1571 : //
1572 : // 2. c. i. Set result.[[locale]] to availableLocale.
1573 : // 2. c. ii. If locale and noExtensionsLocale are not the same
1574 : // String value, then
1575 : // 2. c. ii. 1. Let extension be the String value consisting of
1576 : // the first substring of locale that is a Unicode locale
1577 : // extension sequence.
1578 : // 2. c. ii. 2. Set result.[[extension]] to extension.
1579 : // 2. c. iii. Return result.
1580 : return available_locale + parsed_locale.extension;
1581 : }
1582 144 : }
1583 :
1584 : // 3. Let defLocale be DefaultLocale();
1585 : // 4. Set result.[[locale]] to defLocale.
1586 : // 5. Return result.
1587 1922 : return DefaultLocale(isolate);
1588 : }
1589 :
1590 : } // namespace
1591 :
1592 : // This function doesn't correspond exactly with the spec. Instead
1593 : // we use ICU to do all the string manipulations that the spec
1594 : // peforms.
1595 : //
1596 : // The spec uses this function to normalize values for various
1597 : // relevant extension keys (such as disallowing "search" for
1598 : // collation). Instead of doing this here, we let the callers of
1599 : // this method perform such normalization.
1600 : //
1601 : // ecma402/#sec-resolvelocale
1602 10785 : Intl::ResolvedLocale Intl::ResolveLocale(
1603 : Isolate* isolate, const std::set<std::string>& available_locales,
1604 : const std::vector<std::string>& requested_locales, MatcherOption matcher,
1605 : const std::set<std::string>& relevant_extension_keys) {
1606 : std::string locale;
1607 10785 : if (matcher == Intl::MatcherOption::kLookup) {
1608 21246 : locale = LookupMatcher(isolate, available_locales, requested_locales);
1609 162 : } else if (matcher == Intl::MatcherOption::kBestFit) {
1610 : // TODO(intl): Implement better lookup algorithm.
1611 324 : locale = LookupMatcher(isolate, available_locales, requested_locales);
1612 : }
1613 :
1614 21570 : icu::Locale icu_locale = CreateICULocale(locale);
1615 : std::map<std::string, std::string> extensions =
1616 10785 : LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys);
1617 :
1618 21570 : std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale).FromJust();
1619 :
1620 : // TODO(gsathya): Remove privateuse subtags from extensions.
1621 :
1622 21570 : return Intl::ResolvedLocale{canonicalized_locale, icu_locale, extensions};
1623 : }
1624 :
1625 4361 : Managed<icu::UnicodeString> Intl::SetTextToBreakIterator(
1626 : Isolate* isolate, Handle<String> text, icu::BreakIterator* break_iterator) {
1627 : icu::UnicodeString* u_text =
1628 4361 : (icu::UnicodeString*)(Intl::ToICUUnicodeString(isolate, text).clone());
1629 :
1630 : Handle<Managed<icu::UnicodeString>> new_u_text =
1631 4361 : Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, u_text);
1632 :
1633 4361 : break_iterator->setText(*u_text);
1634 4361 : return *new_u_text;
1635 : }
1636 :
1637 : // ecma262 #sec-string.prototype.normalize
1638 2313 : MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string,
1639 : Handle<Object> form_input) {
1640 : const char* form_name;
1641 : UNormalization2Mode form_mode;
1642 4626 : if (form_input->IsUndefined(isolate)) {
1643 : // default is FNC
1644 : form_name = "nfc";
1645 : form_mode = UNORM2_COMPOSE;
1646 : } else {
1647 : Handle<String> form;
1648 3780 : ASSIGN_RETURN_ON_EXCEPTION(isolate, form,
1649 : Object::ToString(isolate, form_input), String);
1650 :
1651 1890 : if (String::Equals(isolate, form, isolate->factory()->NFC_string())) {
1652 : form_name = "nfc";
1653 : form_mode = UNORM2_COMPOSE;
1654 1476 : } else if (String::Equals(isolate, form,
1655 : isolate->factory()->NFD_string())) {
1656 : form_name = "nfc";
1657 : form_mode = UNORM2_DECOMPOSE;
1658 1071 : } else if (String::Equals(isolate, form,
1659 : isolate->factory()->NFKC_string())) {
1660 : form_name = "nfkc";
1661 : form_mode = UNORM2_COMPOSE;
1662 657 : } else if (String::Equals(isolate, form,
1663 : isolate->factory()->NFKD_string())) {
1664 : form_name = "nfkc";
1665 : form_mode = UNORM2_DECOMPOSE;
1666 : } else {
1667 : Handle<String> valid_forms =
1668 99 : isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
1669 99 : THROW_NEW_ERROR(
1670 : isolate,
1671 : NewRangeError(MessageTemplate::kNormalizationForm, valid_forms),
1672 : String);
1673 : }
1674 : }
1675 :
1676 : int length = string->length();
1677 2214 : string = String::Flatten(isolate, string);
1678 : icu::UnicodeString result;
1679 : std::unique_ptr<uc16[]> sap;
1680 2214 : UErrorCode status = U_ZERO_ERROR;
1681 4428 : icu::UnicodeString input = ToICUUnicodeString(isolate, string);
1682 : // Getting a singleton. Should not free it.
1683 : const icu::Normalizer2* normalizer =
1684 2214 : icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
1685 : DCHECK(U_SUCCESS(status));
1686 2214 : CHECK_NOT_NULL(normalizer);
1687 : int32_t normalized_prefix_length =
1688 2214 : normalizer->spanQuickCheckYes(input, status);
1689 : // Quick return if the input is already normalized.
1690 2214 : if (length == normalized_prefix_length) return string;
1691 : icu::UnicodeString unnormalized =
1692 1926 : input.tempSubString(normalized_prefix_length);
1693 : // Read-only alias of the normalized prefix.
1694 1926 : result.setTo(false, input.getBuffer(), normalized_prefix_length);
1695 : // copy-on-write; normalize the suffix and append to |result|.
1696 963 : normalizer->normalizeSecondAndAppend(result, unnormalized, status);
1697 :
1698 963 : if (U_FAILURE(status)) {
1699 0 : THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
1700 : }
1701 :
1702 3177 : return Intl::ToString(isolate, result);
1703 : }
1704 :
1705 : // ICUTimezoneCache calls out to ICU for TimezoneCache
1706 : // functionality in a straightforward way.
1707 : class ICUTimezoneCache : public base::TimezoneCache {
1708 : public:
1709 125756 : ICUTimezoneCache() : timezone_(nullptr) { Clear(); }
1710 :
1711 188589 : ~ICUTimezoneCache() override { Clear(); };
1712 :
1713 : const char* LocalTimezone(double time_ms) override;
1714 :
1715 : double DaylightSavingsOffset(double time_ms) override;
1716 :
1717 : double LocalTimeOffset(double time_ms, bool is_utc) override;
1718 :
1719 : void Clear() override;
1720 :
1721 : private:
1722 : icu::TimeZone* GetTimeZone();
1723 :
1724 : bool GetOffsets(double time_ms, bool is_utc, int32_t* raw_offset,
1725 : int32_t* dst_offset);
1726 :
1727 : icu::TimeZone* timezone_;
1728 :
1729 : std::string timezone_name_;
1730 : std::string dst_timezone_name_;
1731 : };
1732 :
1733 120 : const char* ICUTimezoneCache::LocalTimezone(double time_ms) {
1734 120 : bool is_dst = DaylightSavingsOffset(time_ms) != 0;
1735 120 : std::string* name = is_dst ? &dst_timezone_name_ : &timezone_name_;
1736 120 : if (name->empty()) {
1737 : icu::UnicodeString result;
1738 240 : GetTimeZone()->getDisplayName(is_dst, icu::TimeZone::LONG, result);
1739 : result += '\0';
1740 :
1741 : icu::StringByteSink<std::string> byte_sink(name);
1742 240 : result.toUTF8(byte_sink);
1743 : }
1744 : DCHECK(!name->empty());
1745 120 : return name->c_str();
1746 : }
1747 :
1748 0 : icu::TimeZone* ICUTimezoneCache::GetTimeZone() {
1749 135172 : if (timezone_ == nullptr) {
1750 205 : timezone_ = icu::TimeZone::createDefault();
1751 : }
1752 135172 : return timezone_;
1753 : }
1754 :
1755 135052 : bool ICUTimezoneCache::GetOffsets(double time_ms, bool is_utc,
1756 : int32_t* raw_offset, int32_t* dst_offset) {
1757 135052 : UErrorCode status = U_ZERO_ERROR;
1758 : // TODO(jshin): ICU TimeZone class handles skipped time differently from
1759 : // Ecma 262 (https://github.com/tc39/ecma262/pull/778) and icu::TimeZone
1760 : // class does not expose the necessary API. Fixing
1761 : // http://bugs.icu-project.org/trac/ticket/13268 would make it easy to
1762 : // implement the proposed spec change. A proposed fix for ICU is
1763 : // https://chromium-review.googlesource.com/851265 .
1764 : // In the meantime, use an internal (still public) API of icu::BasicTimeZone.
1765 : // Once it's accepted by the upstream, get rid of cast. Note that casting
1766 : // TimeZone to BasicTimeZone is safe because we know that icu::TimeZone used
1767 : // here is a BasicTimeZone.
1768 135052 : if (is_utc) {
1769 132189 : GetTimeZone()->getOffset(time_ms, false, *raw_offset, *dst_offset, status);
1770 : } else {
1771 : static_cast<const icu::BasicTimeZone*>(GetTimeZone())
1772 : ->getOffsetFromLocal(time_ms, icu::BasicTimeZone::kFormer,
1773 : icu::BasicTimeZone::kFormer, *raw_offset,
1774 2863 : *dst_offset, status);
1775 : }
1776 :
1777 270104 : return U_SUCCESS(status);
1778 : }
1779 :
1780 19185 : double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) {
1781 : int32_t raw_offset, dst_offset;
1782 19185 : if (!GetOffsets(time_ms, true, &raw_offset, &dst_offset)) return 0;
1783 19185 : return dst_offset;
1784 : }
1785 :
1786 115867 : double ICUTimezoneCache::LocalTimeOffset(double time_ms, bool is_utc) {
1787 : int32_t raw_offset, dst_offset;
1788 115867 : if (!GetOffsets(time_ms, is_utc, &raw_offset, &dst_offset)) return 0;
1789 115867 : return raw_offset + dst_offset;
1790 : }
1791 :
1792 188619 : void ICUTimezoneCache::Clear() {
1793 188619 : delete timezone_;
1794 188619 : timezone_ = nullptr;
1795 188619 : timezone_name_.clear();
1796 188619 : dst_timezone_name_.clear();
1797 188619 : }
1798 :
1799 62888 : base::TimezoneCache* Intl::CreateTimeZoneCache() {
1800 : return FLAG_icu_timezone_data ? new ICUTimezoneCache()
1801 62898 : : base::OS::CreateTimezoneCache();
1802 : }
1803 :
1804 3020 : Maybe<Intl::CaseFirst> Intl::GetCaseFirst(Isolate* isolate,
1805 : Handle<JSReceiver> options,
1806 : const char* method) {
1807 : return Intl::GetStringOption<Intl::CaseFirst>(
1808 : isolate, options, "caseFirst", method, {"upper", "lower", "false"},
1809 : {Intl::CaseFirst::kUpper, Intl::CaseFirst::kLower,
1810 : Intl::CaseFirst::kFalse},
1811 9060 : Intl::CaseFirst::kUndefined);
1812 : }
1813 :
1814 3126 : Maybe<Intl::HourCycle> Intl::GetHourCycle(Isolate* isolate,
1815 : Handle<JSReceiver> options,
1816 : const char* method) {
1817 : return Intl::GetStringOption<Intl::HourCycle>(
1818 : isolate, options, "hourCycle", method, {"h11", "h12", "h23", "h24"},
1819 : {Intl::HourCycle::kH11, Intl::HourCycle::kH12, Intl::HourCycle::kH23,
1820 : Intl::HourCycle::kH24},
1821 9378 : Intl::HourCycle::kUndefined);
1822 : }
1823 :
1824 10929 : Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate,
1825 : Handle<JSReceiver> options,
1826 : const char* method) {
1827 : return Intl::GetStringOption<Intl::MatcherOption>(
1828 : isolate, options, "localeMatcher", method, {"best fit", "lookup"},
1829 : {Intl::MatcherOption::kLookup, Intl::MatcherOption::kBestFit},
1830 32787 : Intl::MatcherOption::kLookup);
1831 : }
1832 :
1833 108 : Intl::HourCycle Intl::ToHourCycle(const std::string& hc) {
1834 108 : if (hc == "h11") return Intl::HourCycle::kH11;
1835 81 : if (hc == "h12") return Intl::HourCycle::kH12;
1836 54 : if (hc == "h23") return Intl::HourCycle::kH23;
1837 27 : if (hc == "h24") return Intl::HourCycle::kH24;
1838 0 : return Intl::HourCycle::kUndefined;
1839 : }
1840 :
1841 : } // namespace internal
1842 183867 : } // namespace v8
|