Line data Source code
1 : // Copyright 2013 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_INTL_SUPPORT
6 : #error Internationalization is expected to be enabled.
7 : #endif // V8_INTL_SUPPORT
8 :
9 : #include "src/objects/intl-objects.h"
10 :
11 : #include <algorithm>
12 : #include <memory>
13 : #include <string>
14 : #include <vector>
15 :
16 : #include "src/api-inl.h"
17 : #include "src/global-handles.h"
18 : #include "src/heap/factory.h"
19 : #include "src/isolate.h"
20 : #include "src/objects-inl.h"
21 : #include "src/objects/js-collator-inl.h"
22 : #include "src/objects/js-date-time-format-inl.h"
23 : #include "src/objects/js-locale-inl.h"
24 : #include "src/objects/js-number-format-inl.h"
25 : #include "src/objects/string.h"
26 : #include "src/property-descriptor.h"
27 : #include "src/string-case.h"
28 : #include "unicode/basictz.h"
29 : #include "unicode/brkiter.h"
30 : #include "unicode/calendar.h"
31 : #include "unicode/coll.h"
32 : #include "unicode/datefmt.h"
33 : #include "unicode/decimfmt.h"
34 : #include "unicode/locid.h"
35 : #include "unicode/normalizer2.h"
36 : #include "unicode/numfmt.h"
37 : #include "unicode/numsys.h"
38 : #include "unicode/timezone.h"
39 : #include "unicode/ustring.h"
40 : #include "unicode/uvernum.h" // U_ICU_VERSION_MAJOR_NUM
41 :
42 : #define XSTR(s) STR(s)
43 : #define STR(s) #s
44 : static_assert(
45 : V8_MINIMUM_ICU_VERSION <= U_ICU_VERSION_MAJOR_NUM,
46 : "v8 is required to build with ICU " XSTR(V8_MINIMUM_ICU_VERSION) " and up");
47 : #undef STR
48 : #undef XSTR
49 :
50 : namespace v8 {
51 : namespace internal {
52 :
53 : namespace {
54 436 : inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
55 :
56 : const uint8_t kToLower[256] = {
57 : 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
58 : 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
59 : 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
60 : 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
61 : 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
62 : 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
63 : 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
64 : 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
65 : 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
66 : 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
67 : 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
68 : 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
69 : 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
70 : 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
71 : 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
72 : 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
73 : 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
74 : 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
75 : 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
76 : 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
77 : 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
78 : 0xFC, 0xFD, 0xFE, 0xFF,
79 : };
80 :
81 : inline uint16_t ToLatin1Lower(uint16_t ch) {
82 1710 : return static_cast<uint16_t>(kToLower[ch]);
83 : }
84 :
85 : inline uint16_t ToASCIIUpper(uint16_t ch) {
86 0 : return ch & ~((ch >= 'a' && ch <= 'z') << 5);
87 : }
88 :
89 : // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
90 : inline uint16_t ToLatin1Upper(uint16_t ch) {
91 : DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
92 : return ch &
93 1337 : ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xF7))
94 1337 : << 5);
95 : }
96 :
97 : template <typename Char>
98 0 : bool ToUpperFastASCII(const Vector<const Char>& src,
99 : Handle<SeqOneByteString> result) {
100 : // Do a faster loop for the case where all the characters are ASCII.
101 : uint16_t ored = 0;
102 : int32_t index = 0;
103 0 : for (auto it = src.begin(); it != src.end(); ++it) {
104 0 : uint16_t ch = static_cast<uint16_t>(*it);
105 0 : ored |= ch;
106 0 : result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
107 : }
108 0 : return !(ored & ~0x7F);
109 : }
110 :
111 : const uint16_t sharp_s = 0xDF;
112 :
113 : template <typename Char>
114 212 : bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest,
115 : int* sharp_s_count) {
116 : // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
117 :
118 : // There are two special cases.
119 : // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
120 : // 2. Lower case sharp-S converts to "SS" (two characters)
121 212 : *sharp_s_count = 0;
122 2094 : for (auto it = src.begin(); it != src.end(); ++it) {
123 1004 : uint16_t ch = static_cast<uint16_t>(*it);
124 1004 : if (V8_UNLIKELY(ch == sharp_s)) {
125 99 : ++(*sharp_s_count);
126 99 : continue;
127 : }
128 905 : if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
129 : // Since this upper-cased character does not fit in an 8-bit string, we
130 : // need to take the 16-bit path.
131 : return false;
132 : }
133 1684 : *dest++ = ToLatin1Upper(ch);
134 : }
135 :
136 : return true;
137 : }
138 :
139 : template <typename Char>
140 81 : void ToUpperWithSharpS(const Vector<const Char>& src,
141 : Handle<SeqOneByteString> result) {
142 : int32_t dest_index = 0;
143 1269 : for (auto it = src.begin(); it != src.end(); ++it) {
144 594 : uint16_t ch = static_cast<uint16_t>(*it);
145 594 : if (ch == sharp_s) {
146 99 : result->SeqOneByteStringSet(dest_index++, 'S');
147 99 : result->SeqOneByteStringSet(dest_index++, 'S');
148 : } else {
149 495 : result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
150 : }
151 : }
152 81 : }
153 :
154 68 : inline int FindFirstUpperOrNonAscii(String s, int length) {
155 940 : for (int index = 0; index < length; ++index) {
156 : uint16_t ch = s->Get(index);
157 436 : if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
158 : return index;
159 : }
160 : }
161 : return length;
162 : }
163 :
164 348628 : const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
165 : std::unique_ptr<uc16[]>* dest,
166 : int32_t length) {
167 : DCHECK(flat.IsFlat());
168 348628 : if (flat.IsOneByte()) {
169 339696 : if (!*dest) {
170 339624 : dest->reset(NewArray<uc16>(length));
171 : CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
172 : }
173 339696 : return reinterpret_cast<const UChar*>(dest->get());
174 : } else {
175 : return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
176 : }
177 : }
178 :
179 : template <typename T>
180 6160 : MaybeHandle<T> New(Isolate* isolate, Handle<JSFunction> constructor,
181 : Handle<Object> locales, Handle<Object> options) {
182 : Handle<JSObject> result;
183 12320 : ASSIGN_RETURN_ON_EXCEPTION(
184 : isolate, result,
185 : JSObject::New(constructor, constructor, Handle<AllocationSite>::null()),
186 : T);
187 6160 : return T::Initialize(isolate, Handle<T>::cast(result), locales, options);
188 : }
189 : } // namespace
190 :
191 62482 : const uint8_t* Intl::ToLatin1LowerTable() { return &kToLower[0]; }
192 :
193 346292 : icu::UnicodeString Intl::ToICUUnicodeString(Isolate* isolate,
194 : Handle<String> string) {
195 346292 : string = String::Flatten(isolate, string);
196 : {
197 : DisallowHeapAllocation no_gc;
198 346292 : std::unique_ptr<uc16[]> sap;
199 : return icu::UnicodeString(
200 692584 : GetUCharBufferFromFlat(string->GetFlatContent(no_gc), &sap,
201 : string->length()),
202 692584 : string->length());
203 : }
204 : }
205 :
206 : namespace {
207 2156 : MaybeHandle<String> LocaleConvertCase(Isolate* isolate, Handle<String> s,
208 : bool is_to_upper, const char* lang) {
209 2156 : auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
210 : int32_t src_length = s->length();
211 : int32_t dest_length = src_length;
212 : UErrorCode status;
213 : Handle<SeqTwoByteString> result;
214 2156 : std::unique_ptr<uc16[]> sap;
215 :
216 2156 : if (dest_length == 0) return ReadOnlyRoots(isolate).empty_string_handle();
217 :
218 : // This is not a real loop. It'll be executed only once (no overflow) or
219 : // twice (overflow).
220 2516 : for (int i = 0; i < 2; ++i) {
221 : // Case conversion can increase the string length (e.g. sharp-S => SS) so
222 : // that we have to handle RangeError exceptions here.
223 4672 : ASSIGN_RETURN_ON_EXCEPTION(
224 : isolate, result, isolate->factory()->NewRawTwoByteString(dest_length),
225 : String);
226 : DisallowHeapAllocation no_gc;
227 : DCHECK(s->IsFlat());
228 2336 : String::FlatContent flat = s->GetFlatContent(no_gc);
229 2336 : const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
230 2336 : status = U_ZERO_ERROR;
231 : dest_length =
232 : case_converter(reinterpret_cast<UChar*>(result->GetChars(no_gc)),
233 2336 : dest_length, src, src_length, lang, &status);
234 2336 : if (status != U_BUFFER_OVERFLOW_ERROR) break;
235 : }
236 :
237 : // In most cases, the output will fill the destination buffer completely
238 : // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
239 : // Only in rare cases, it'll be shorter than the destination buffer and
240 : // |result| has to be truncated.
241 : DCHECK(U_SUCCESS(status));
242 2156 : if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
243 : DCHECK(dest_length == result->length());
244 2021 : return result;
245 : }
246 : DCHECK(dest_length < result->length());
247 135 : return SeqString::Truncate(result, dest_length);
248 : }
249 :
250 : } // namespace
251 :
252 : // A stripped-down version of ConvertToLower that can only handle flat one-byte
253 : // strings and does not allocate. Note that {src} could still be, e.g., a
254 : // one-byte sliced string with a two-byte parent string.
255 : // Called from TF builtins.
256 2264 : String Intl::ConvertOneByteToLower(String src, String dst) {
257 : DCHECK_EQ(src->length(), dst->length());
258 : DCHECK(src->IsOneByteRepresentation());
259 : DCHECK(src->IsFlat());
260 : DCHECK(dst->IsSeqOneByteString());
261 :
262 : DisallowHeapAllocation no_gc;
263 :
264 : const int length = src->length();
265 2264 : String::FlatContent src_flat = src->GetFlatContent(no_gc);
266 : uint8_t* dst_data = SeqOneByteString::cast(dst)->GetChars(no_gc);
267 :
268 2264 : if (src_flat.IsOneByte()) {
269 : const uint8_t* src_data = src_flat.ToOneByteVector().start();
270 :
271 2259 : bool has_changed_character = false;
272 : int index_to_first_unprocessed =
273 : FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data),
274 : reinterpret_cast<const char*>(src_data), length,
275 2259 : &has_changed_character);
276 :
277 2259 : if (index_to_first_unprocessed == length) {
278 2241 : return has_changed_character ? dst : src;
279 : }
280 :
281 : // If not ASCII, we keep the result up to index_to_first_unprocessed and
282 : // process the rest.
283 3438 : for (int index = index_to_first_unprocessed; index < length; ++index) {
284 3420 : dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
285 : }
286 : } else {
287 : DCHECK(src_flat.IsTwoByte());
288 5 : int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length);
289 5 : if (index_to_first_unprocessed == length) return src;
290 :
291 : const uint16_t* src_data = src_flat.ToUC16Vector().start();
292 0 : CopyChars(dst_data, src_data, index_to_first_unprocessed);
293 0 : for (int index = index_to_first_unprocessed; index < length; ++index) {
294 0 : dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
295 : }
296 : }
297 :
298 18 : return dst;
299 : }
300 :
301 2953 : MaybeHandle<String> Intl::ConvertToLower(Isolate* isolate, Handle<String> s) {
302 2953 : if (!s->IsOneByteRepresentation()) {
303 : // Use a slower implementation for strings with characters beyond U+00FF.
304 1400 : return LocaleConvertCase(isolate, s, false, "");
305 : }
306 :
307 : int length = s->length();
308 :
309 : // We depend here on the invariant that the length of a Latin1
310 : // string is invariant under ToLowerCase, and the result always
311 : // fits in the Latin1 range in the *root locale*. It does not hold
312 : // for ToUpperCase even in the root locale.
313 :
314 : // Scan the string for uppercase and non-ASCII characters for strings
315 : // shorter than a machine-word without any memory allocation overhead.
316 : // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
317 : // to two parts, one for scanning the prefix with no change and the other for
318 : // handling ASCII-only characters.
319 :
320 : bool is_short = length < static_cast<int>(sizeof(uintptr_t));
321 1553 : if (is_short) {
322 63 : bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length;
323 63 : if (is_lower_ascii) return s;
324 : }
325 :
326 : Handle<SeqOneByteString> result =
327 2980 : isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
328 :
329 4470 : return Handle<String>(Intl::ConvertOneByteToLower(*s, *result), isolate);
330 : }
331 :
332 5007 : MaybeHandle<String> Intl::ConvertToUpper(Isolate* isolate, Handle<String> s) {
333 : int32_t length = s->length();
334 5007 : if (s->IsOneByteRepresentation() && length > 0) {
335 : Handle<SeqOneByteString> result =
336 9402 : isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
337 :
338 : DCHECK(s->IsFlat());
339 : int sharp_s_count;
340 : bool is_result_single_byte;
341 : {
342 : DisallowHeapAllocation no_gc;
343 4701 : String::FlatContent flat = s->GetFlatContent(no_gc);
344 : uint8_t* dest = result->GetChars(no_gc);
345 4701 : if (flat.IsOneByte()) {
346 : Vector<const uint8_t> src = flat.ToOneByteVector();
347 4701 : bool has_changed_character = false;
348 : int index_to_first_unprocessed = FastAsciiConvert<false>(
349 : reinterpret_cast<char*>(result->GetChars(no_gc)),
350 : reinterpret_cast<const char*>(src.start()), length,
351 4701 : &has_changed_character);
352 4701 : if (index_to_first_unprocessed == length) {
353 4489 : return has_changed_character ? result : s;
354 : }
355 : // If not ASCII, we keep the result up to index_to_first_unprocessed and
356 : // process the rest.
357 : is_result_single_byte =
358 636 : ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
359 212 : dest + index_to_first_unprocessed, &sharp_s_count);
360 : } else {
361 : DCHECK(flat.IsTwoByte());
362 0 : Vector<const uint16_t> src = flat.ToUC16Vector();
363 0 : if (ToUpperFastASCII(src, result)) return result;
364 0 : is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
365 : }
366 : }
367 :
368 : // Go to the full Unicode path if there are characters whose uppercase
369 : // is beyond the Latin-1 range (cannot be represented in OneByteString).
370 212 : if (V8_UNLIKELY(!is_result_single_byte)) {
371 63 : return LocaleConvertCase(isolate, s, true, "");
372 : }
373 :
374 149 : if (sharp_s_count == 0) return result;
375 :
376 : // We have sharp_s_count sharp-s characters, but the result is still
377 : // in the Latin-1 range.
378 162 : ASSIGN_RETURN_ON_EXCEPTION(
379 : isolate, result,
380 : isolate->factory()->NewRawOneByteString(length + sharp_s_count),
381 : String);
382 : DisallowHeapAllocation no_gc;
383 81 : String::FlatContent flat = s->GetFlatContent(no_gc);
384 81 : if (flat.IsOneByte()) {
385 81 : ToUpperWithSharpS(flat.ToOneByteVector(), result);
386 : } else {
387 0 : ToUpperWithSharpS(flat.ToUC16Vector(), result);
388 : }
389 :
390 81 : return result;
391 : }
392 :
393 306 : return LocaleConvertCase(isolate, s, true, "");
394 : }
395 :
396 3771 : std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) {
397 : // Ugly hack. ICU doesn't expose numbering system in any way, so we have
398 : // to assume that for given locale NumberingSystem constructor produces the
399 : // same digits as NumberFormat/Calendar would.
400 3771 : UErrorCode status = U_ZERO_ERROR;
401 : std::unique_ptr<icu::NumberingSystem> numbering_system(
402 3771 : icu::NumberingSystem::createInstance(icu_locale, status));
403 3771 : if (U_SUCCESS(status)) return numbering_system->getName();
404 0 : return "latn";
405 : }
406 :
407 16126 : icu::Locale Intl::CreateICULocale(const std::string& bcp47_locale) {
408 : DisallowHeapAllocation no_gc;
409 :
410 : // Convert BCP47 into ICU locale format.
411 16126 : UErrorCode status = U_ZERO_ERROR;
412 :
413 16126 : icu::Locale icu_locale = icu::Locale::forLanguageTag(bcp47_locale, status);
414 16126 : CHECK(U_SUCCESS(status));
415 16126 : if (icu_locale.isBogus()) {
416 0 : FATAL("Failed to create ICU locale, are ICU data files missing?");
417 : }
418 :
419 16126 : return icu_locale;
420 : }
421 :
422 : // static
423 :
424 47556 : MaybeHandle<String> Intl::ToString(Isolate* isolate,
425 : const icu::UnicodeString& string) {
426 : return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
427 95112 : reinterpret_cast<const uint16_t*>(string.getBuffer()), string.length()));
428 : }
429 :
430 37413 : MaybeHandle<String> Intl::ToString(Isolate* isolate,
431 : const icu::UnicodeString& string,
432 : int32_t begin, int32_t end) {
433 37413 : return Intl::ToString(isolate, string.tempSubStringBetween(begin, end));
434 : }
435 :
436 : namespace {
437 :
438 20493 : Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array,
439 : int index, Handle<String> field_type_string,
440 : Handle<String> value) {
441 : // let element = $array[$index] = {
442 : // type: $field_type_string,
443 : // value: $value
444 : // }
445 : // return element;
446 : Factory* factory = isolate->factory();
447 20493 : Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
448 20493 : JSObject::AddProperty(isolate, element, factory->type_string(),
449 20493 : field_type_string, NONE);
450 :
451 20493 : JSObject::AddProperty(isolate, element, factory->value_string(), value, NONE);
452 40986 : JSObject::AddDataElement(array, index, element, NONE);
453 20493 : return element;
454 : }
455 :
456 : } // namespace
457 :
458 20043 : void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
459 : Handle<String> field_type_string, Handle<String> value) {
460 : // Same as $array[$index] = {type: $field_type_string, value: $value};
461 20043 : InnerAddElement(isolate, array, index, field_type_string, value);
462 20043 : }
463 :
464 450 : void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
465 : Handle<String> field_type_string, Handle<String> value,
466 : Handle<String> additional_property_name,
467 : Handle<String> additional_property_value) {
468 : // Same as $array[$index] = {
469 : // type: $field_type_string, value: $value,
470 : // $additional_property_name: $additional_property_value
471 : // }
472 : Handle<JSObject> element =
473 450 : InnerAddElement(isolate, array, index, field_type_string, value);
474 450 : JSObject::AddProperty(isolate, element, additional_property_name,
475 450 : additional_property_value, NONE);
476 450 : }
477 :
478 : namespace {
479 :
480 : // Build the shortened locale; eg, convert xx_Yyyy_ZZ to xx_ZZ.
481 : //
482 : // If locale has a script tag then return true and the locale without the
483 : // script else return false and an empty string.
484 331853 : bool RemoveLocaleScriptTag(const std::string& icu_locale,
485 : std::string* locale_less_script) {
486 663706 : icu::Locale new_locale = icu::Locale::createCanonical(icu_locale.c_str());
487 : const char* icu_script = new_locale.getScript();
488 331853 : if (icu_script == nullptr || strlen(icu_script) == 0) {
489 315865 : *locale_less_script = std::string();
490 315865 : return false;
491 : }
492 :
493 : const char* icu_language = new_locale.getLanguage();
494 : const char* icu_country = new_locale.getCountry();
495 31976 : icu::Locale short_locale = icu::Locale(icu_language, icu_country);
496 : *locale_less_script = short_locale.getName();
497 : return true;
498 : }
499 :
500 : } // namespace
501 :
502 1039 : std::set<std::string> Intl::BuildLocaleSet(
503 : const icu::Locale* icu_available_locales, int32_t count) {
504 : std::set<std::string> locales;
505 664745 : for (int32_t i = 0; i < count; ++i) {
506 : std::string locale =
507 663706 : Intl::ToLanguageTag(icu_available_locales[i]).FromJust();
508 : locales.insert(locale);
509 :
510 : std::string shortened_locale;
511 331853 : if (RemoveLocaleScriptTag(locale, &shortened_locale)) {
512 : std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
513 : locales.insert(shortened_locale);
514 : }
515 : }
516 :
517 1039 : return locales;
518 : }
519 :
520 611099 : Maybe<std::string> Intl::ToLanguageTag(const icu::Locale& locale) {
521 611099 : UErrorCode status = U_ZERO_ERROR;
522 611099 : std::string res = locale.toLanguageTag<std::string>(status);
523 1222198 : if (U_FAILURE(status)) {
524 : return Nothing<std::string>();
525 : }
526 611090 : CHECK(U_SUCCESS(status));
527 :
528 : // Hack to remove -true and -yes from unicode extensions
529 : // Address https://crbug.com/v8/8565
530 : // TODO(ftang): Move the following "remove true" logic into ICU toLanguageTag
531 : // by fixing ICU-20310.
532 : size_t u_ext_start = res.find("-u-");
533 611090 : if (u_ext_start != std::string::npos) {
534 : // remove "-true" and "-yes" after -u-
535 27741 : const std::vector<std::string> remove_items({"-true", "-yes"});
536 27741 : for (auto item = remove_items.begin(); item != remove_items.end(); item++) {
537 33 : for (size_t sep_remove =
538 18494 : res.find(*item, u_ext_start + 5 /* strlen("-u-xx") == 5 */);
539 18527 : sep_remove != std::string::npos; sep_remove = res.find(*item)) {
540 33 : size_t end_of_sep_remove = sep_remove + item->length();
541 51 : if (res.length() == end_of_sep_remove ||
542 18 : res.at(end_of_sep_remove) == '-') {
543 33 : res.erase(sep_remove, item->length());
544 : }
545 : }
546 : }
547 : }
548 : return Just(res);
549 : }
550 :
551 : namespace {
552 2242 : std::string DefaultLocale(Isolate* isolate) {
553 2242 : if (isolate->default_locale().empty()) {
554 988 : icu::Locale default_locale;
555 : // Translate ICU's fallback locale to a well-known locale.
556 988 : if (strcmp(default_locale.getName(), "en_US_POSIX") == 0 ||
557 494 : strcmp(default_locale.getName(), "c") == 0) {
558 0 : isolate->set_default_locale("en-US");
559 : } else {
560 : // Set the locale
561 988 : isolate->set_default_locale(
562 : default_locale.isBogus()
563 : ? "und"
564 1482 : : Intl::ToLanguageTag(default_locale).FromJust());
565 : }
566 : DCHECK(!isolate->default_locale().empty());
567 : }
568 2242 : return isolate->default_locale();
569 : }
570 : } // namespace
571 :
572 : // See ecma402/#legacy-constructor.
573 6174 : MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
574 : Handle<JSReceiver> receiver,
575 : Handle<JSFunction> constructor,
576 : bool has_initialized_slot) {
577 : Handle<Object> obj_is_instance_of;
578 12348 : ASSIGN_RETURN_ON_EXCEPTION(isolate, obj_is_instance_of,
579 : Object::InstanceOf(isolate, receiver, constructor),
580 : Object);
581 6174 : bool is_instance_of = obj_is_instance_of->BooleanValue(isolate);
582 :
583 : // 2. If receiver does not have an [[Initialized...]] internal slot
584 : // and ? InstanceofOperator(receiver, constructor) is true, then
585 6174 : if (!has_initialized_slot && is_instance_of) {
586 : // 2. a. Let new_receiver be ? Get(receiver, %Intl%.[[FallbackSymbol]]).
587 : Handle<Object> new_receiver;
588 180 : ASSIGN_RETURN_ON_EXCEPTION(
589 : isolate, new_receiver,
590 : JSReceiver::GetProperty(isolate, receiver,
591 : isolate->factory()->intl_fallback_symbol()),
592 : Object);
593 90 : return new_receiver;
594 : }
595 :
596 6084 : return receiver;
597 : }
598 :
599 508079 : Maybe<bool> Intl::GetStringOption(Isolate* isolate, Handle<JSReceiver> options,
600 : const char* property,
601 : std::vector<const char*> values,
602 : const char* service,
603 : std::unique_ptr<char[]>* result) {
604 : Handle<String> property_str =
605 508079 : isolate->factory()->NewStringFromAsciiChecked(property);
606 :
607 : // 1. Let value be ? Get(options, property).
608 : Handle<Object> value;
609 1016158 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
610 : isolate, value,
611 : Object::GetPropertyOrElement(isolate, options, property_str),
612 : Nothing<bool>());
613 :
614 507980 : if (value->IsUndefined(isolate)) {
615 : return Just(false);
616 : }
617 :
618 : // 2. c. Let value be ? ToString(value).
619 : Handle<String> value_str;
620 27960 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
621 : isolate, value_str, Object::ToString(isolate, value), Nothing<bool>());
622 13971 : std::unique_ptr<char[]> value_cstr = value_str->ToCString();
623 :
624 : // 2. d. if values is not undefined, then
625 13971 : if (values.size() > 0) {
626 : // 2. d. i. If values does not contain an element equal to value,
627 : // throw a RangeError exception.
628 55178 : for (size_t i = 0; i < values.size(); i++) {
629 33820 : if (strcmp(values.at(i), value_cstr.get()) == 0) {
630 : // 2. e. return value
631 : *result = std::move(value_cstr);
632 : return Just(true);
633 : }
634 : }
635 :
636 : Handle<String> service_str =
637 365 : isolate->factory()->NewStringFromAsciiChecked(service);
638 730 : THROW_NEW_ERROR_RETURN_VALUE(
639 : isolate,
640 : NewRangeError(MessageTemplate::kValueOutOfRange, value, service_str,
641 : property_str),
642 : Nothing<bool>());
643 : }
644 :
645 : // 2. e. return value
646 : *result = std::move(value_cstr);
647 : return Just(true);
648 : }
649 :
650 71086 : V8_WARN_UNUSED_RESULT Maybe<bool> Intl::GetBoolOption(
651 : Isolate* isolate, Handle<JSReceiver> options, const char* property,
652 : const char* service, bool* result) {
653 : Handle<String> property_str =
654 71086 : isolate->factory()->NewStringFromAsciiChecked(property);
655 :
656 : // 1. Let value be ? Get(options, property).
657 : Handle<Object> value;
658 142172 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
659 : isolate, value,
660 : Object::GetPropertyOrElement(isolate, options, property_str),
661 : Nothing<bool>());
662 :
663 : // 2. If value is not undefined, then
664 71077 : if (!value->IsUndefined(isolate)) {
665 : // 2. b. i. Let value be ToBoolean(value).
666 127 : *result = value->BooleanValue(isolate);
667 :
668 : // 2. e. return value
669 : return Just(true);
670 : }
671 :
672 : return Just(false);
673 : }
674 :
675 : namespace {
676 :
677 770344 : char AsciiToLower(char c) {
678 770344 : if (c < 'A' || c > 'Z') {
679 : return c;
680 : }
681 205506 : return c | (1 << 5);
682 : }
683 :
684 51662 : bool IsLowerAscii(char c) { return c >= 'a' && c < 'z'; }
685 :
686 : bool IsTwoLetterLanguage(const std::string& locale) {
687 : // Two letters, both in range 'a'-'z'...
688 172205 : return locale.length() == 2 && IsLowerAscii(locale[0]) &&
689 25615 : IsLowerAscii(locale[1]);
690 : }
691 :
692 25057 : bool IsDeprecatedLanguage(const std::string& locale) {
693 : // Check if locale is one of the deprecated language tags:
694 125240 : return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw" ||
695 25057 : locale == "mo";
696 : }
697 :
698 : // Reference:
699 : // https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
700 95324 : bool IsGrandfatheredTagWithoutPreferredVaule(const std::string& locale) {
701 190621 : if (V8_UNLIKELY(locale == "zh-min" || locale == "cel-gaulish")) return true;
702 149131 : if (locale.length() > 6 /* i-mingo is 7 chars long */ &&
703 53843 : V8_UNLIKELY(locale[0] == 'i' && locale[1] == '-')) {
704 486 : return locale.substr(2) == "default" || locale.substr(2) == "enochian" ||
705 135 : locale.substr(2) == "mingo";
706 : }
707 : return false;
708 : }
709 :
710 : } // anonymous namespace
711 :
712 67695 : Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
713 : Handle<Object> locale_in) {
714 : Handle<String> locale_str;
715 : // This does part of the validity checking spec'ed in CanonicalizeLocaleList:
716 : // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
717 : // exception.
718 : // 7c iii. Let tag be ? ToString(kValue).
719 : // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
720 : // RangeError exception.
721 :
722 67695 : if (locale_in->IsString()) {
723 : locale_str = Handle<String>::cast(locale_in);
724 36 : } else if (locale_in->IsJSReceiver()) {
725 0 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, locale_str,
726 : Object::ToString(isolate, locale_in),
727 : Nothing<std::string>());
728 : } else {
729 72 : THROW_NEW_ERROR_RETURN_VALUE(isolate,
730 : NewTypeError(MessageTemplate::kLanguageID),
731 : Nothing<std::string>());
732 : }
733 202977 : std::string locale(locale_str->ToCString().get());
734 :
735 67659 : return Intl::CanonicalizeLanguageTag(isolate, locale);
736 : }
737 :
738 120543 : Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
739 : const std::string& locale_in) {
740 : std::string locale = locale_in;
741 :
742 241086 : if (locale.length() == 0 ||
743 120543 : !String::IsAscii(locale.data(), static_cast<int>(locale.length()))) {
744 0 : THROW_NEW_ERROR_RETURN_VALUE(
745 : isolate,
746 : NewRangeError(
747 : MessageTemplate::kInvalidLanguageTag,
748 : isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
749 : Nothing<std::string>());
750 : }
751 :
752 : // Optimize for the most common case: a 2-letter language code in the
753 : // canonical form/lowercase that is not one of the deprecated codes
754 : // (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
755 : // codes. Instead, let them be handled by ICU in the slow path. However,
756 : // fast-track 'fil' (3-letter canonical code).
757 216047 : if ((IsTwoLetterLanguage(locale) && !IsDeprecatedLanguage(locale)) ||
758 : locale == "fil") {
759 : return Just(locale);
760 : }
761 :
762 : // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase
763 : // the input before any more check.
764 : std::transform(locale.begin(), locale.end(), locale.begin(), AsciiToLower);
765 :
766 : // ICU maps a few grandfathered tags to what looks like a regular language
767 : // tag even though IANA language tag registry does not have a preferred
768 : // entry map for them. Return them as they're with lowercasing.
769 95324 : if (IsGrandfatheredTagWithoutPreferredVaule(locale)) {
770 : return Just(locale);
771 : }
772 :
773 : // // ECMA 402 6.2.3
774 : // TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
775 : // language tag if it's too long (much longer than 100 chars). Even if we
776 : // allocate a longer buffer, ICU will still fail if it's too long. Either
777 : // propose to Ecma 402 to put a limit on the locale length or change ICU to
778 : // handle long locale names better. See
779 : // https://unicode-org.atlassian.net/browse/ICU-13417
780 95198 : UErrorCode error = U_ZERO_ERROR;
781 : // uloc_forLanguageTag checks the structrual validity. If the input BCP47
782 : // language tag is parsed all the way to the end, it indicates that the input
783 : // is structurally valid. Due to a couple of bugs, we can't use it
784 : // without Chromium patches or ICU 62 or earlier.
785 190396 : icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error);
786 95198 : if (U_FAILURE(error) || icu_locale.isBogus()) {
787 378 : THROW_NEW_ERROR_RETURN_VALUE(
788 : isolate,
789 : NewRangeError(
790 : MessageTemplate::kInvalidLanguageTag,
791 : isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
792 : Nothing<std::string>());
793 : }
794 95072 : Maybe<std::string> maybe_to_language_tag = Intl::ToLanguageTag(icu_locale);
795 95072 : if (maybe_to_language_tag.IsNothing()) {
796 27 : THROW_NEW_ERROR_RETURN_VALUE(
797 : isolate,
798 : NewRangeError(
799 : MessageTemplate::kInvalidLanguageTag,
800 : isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
801 : Nothing<std::string>());
802 : }
803 :
804 : return maybe_to_language_tag;
805 : }
806 :
807 16909 : Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
808 : Isolate* isolate, Handle<Object> locales, bool only_return_one_result) {
809 : // 1. If locales is undefined, then
810 16909 : if (locales->IsUndefined(isolate)) {
811 : // 1a. Return a new empty List.
812 1855 : return Just(std::vector<std::string>());
813 : }
814 : // 2. Let seen be a new empty List.
815 15054 : std::vector<std::string> seen;
816 : // 3. If Type(locales) is String or locales has an [[InitializedLocale]]
817 : // internal slot, then
818 15054 : if (locales->IsJSLocale()) {
819 : // Since this value came from JSLocale, which is already went though the
820 : // CanonializeLanguageTag process once, therefore there are no need to
821 : // call CanonializeLanguageTag again.
822 54 : seen.push_back(JSLocale::ToString(Handle<JSLocale>::cast(locales)));
823 : return Just(seen);
824 : }
825 15027 : if (locales->IsString()) {
826 : // 3a. Let O be CreateArrayFromList(« locales »).
827 : // Instead of creating a one-element array and then iterating over it,
828 : // we inline the body of the iteration:
829 : std::string canonicalized_tag;
830 20836 : if (!CanonicalizeLanguageTag(isolate, locales).To(&canonicalized_tag)) {
831 : return Nothing<std::vector<std::string>>();
832 : }
833 10319 : seen.push_back(canonicalized_tag);
834 : return Just(seen);
835 : }
836 : // 4. Else,
837 : // 4a. Let O be ? ToObject(locales).
838 : Handle<JSReceiver> o;
839 9218 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, o,
840 : Object::ToObject(isolate, locales),
841 : Nothing<std::vector<std::string>>());
842 : // 5. Let len be ? ToLength(? Get(O, "length")).
843 : Handle<Object> length_obj;
844 9218 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, length_obj,
845 : Object::GetLengthFromArrayLike(isolate, o),
846 : Nothing<std::vector<std::string>>());
847 : // TODO(jkummerow): Spec violation: strictly speaking, we have to iterate
848 : // up to 2^53-1 if {length_obj} says so. Since cases above 2^32 probably
849 : // don't happen in practice (and would be very slow if they do), we'll keep
850 : // the code simple for now by using a saturating to-uint32 conversion.
851 : double raw_length = length_obj->Number();
852 : uint32_t len =
853 4609 : raw_length >= kMaxUInt32 ? kMaxUInt32 : static_cast<uint32_t>(raw_length);
854 : // 6. Let k be 0.
855 : // 7. Repeat, while k < len
856 13179 : for (uint32_t k = 0; k < len; k++) {
857 : // 7a. Let Pk be ToString(k).
858 : // 7b. Let kPresent be ? HasProperty(O, Pk).
859 4438 : LookupIterator it(isolate, o, k);
860 4438 : Maybe<bool> maybe_found = JSReceiver::HasProperty(&it);
861 4591 : MAYBE_RETURN(maybe_found, Nothing<std::vector<std::string>>());
862 : // 7c. If kPresent is true, then
863 4438 : if (!maybe_found.FromJust()) continue;
864 : // 7c i. Let kValue be ? Get(O, Pk).
865 : Handle<Object> k_value;
866 8876 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it),
867 : Nothing<std::vector<std::string>>());
868 : // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
869 : // exception.
870 : // 7c iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]]
871 : // internal slot, then
872 : std::string canonicalized_tag;
873 4438 : if (k_value->IsJSLocale()) {
874 : // 7c iii. 1. Let tag be kValue.[[Locale]].
875 90 : canonicalized_tag = JSLocale::ToString(Handle<JSLocale>::cast(k_value));
876 : // 7c iv. Else,
877 : } else {
878 : // 7c iv 1. Let tag be ? ToString(kValue).
879 : // 7c v. If IsStructurallyValidLanguageTag(tag) is false, throw a
880 : // RangeError exception.
881 : // 7c vi. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
882 8786 : if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
883 : return Nothing<std::vector<std::string>>();
884 : }
885 : }
886 : // 7c vi. If canonicalizedTag is not an element of seen, append
887 : // canonicalizedTag as the last element of seen.
888 8732 : if (std::find(seen.begin(), seen.end(), canonicalized_tag) == seen.end()) {
889 4366 : seen.push_back(canonicalized_tag);
890 : }
891 : // 7d. Increase k by 1. (See loop header.)
892 : // Optimization: some callers only need one result.
893 4366 : if (only_return_one_result) return Just(seen);
894 : }
895 : // 8. Return seen.
896 : return Just(seen);
897 : }
898 :
899 : // ecma402 #sup-string.prototype.tolocalelowercase
900 : // ecma402 #sup-string.prototype.tolocaleuppercase
901 855 : MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
902 : Handle<String> s,
903 : bool to_upper,
904 : Handle<Object> locales) {
905 855 : std::vector<std::string> requested_locales;
906 1710 : if (!CanonicalizeLocaleList(isolate, locales, true).To(&requested_locales)) {
907 36 : return MaybeHandle<String>();
908 : }
909 : std::string requested_locale = requested_locales.size() == 0
910 : ? DefaultLocale(isolate)
911 819 : : requested_locales[0];
912 819 : size_t dash = requested_locale.find('-');
913 819 : if (dash != std::string::npos) {
914 468 : requested_locale = requested_locale.substr(0, dash);
915 : }
916 :
917 : // Primary language tag can be up to 8 characters long in theory.
918 : // https://tools.ietf.org/html/bcp47#section-2.2.1
919 : DCHECK_LE(requested_locale.length(), 8);
920 819 : s = String::Flatten(isolate, s);
921 :
922 : // All the languages requiring special-handling have two-letter codes.
923 : // Note that we have to check for '!= 2' here because private-use language
924 : // tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have
925 : // only 'x' or 'i' when they get here.
926 819 : if (V8_UNLIKELY(requested_locale.length() != 2)) {
927 135 : if (to_upper) {
928 45 : return ConvertToUpper(isolate, s);
929 : }
930 90 : return ConvertToLower(isolate, s);
931 : }
932 : // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
933 : // in the root locale needs to be adjusted for az, lt and tr because even case
934 : // mapping of ASCII range characters are different in those locales.
935 : // Greek (el) does not require any adjustment.
936 1944 : if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") ||
937 : (requested_locale == "lt") || (requested_locale == "az"))) {
938 387 : return LocaleConvertCase(isolate, s, to_upper, requested_locale.c_str());
939 : } else {
940 297 : if (to_upper) {
941 108 : return ConvertToUpper(isolate, s);
942 : }
943 189 : return ConvertToLower(isolate, s);
944 : }
945 : }
946 :
947 68815 : MaybeHandle<Object> Intl::StringLocaleCompare(Isolate* isolate,
948 : Handle<String> string1,
949 : Handle<String> string2,
950 : Handle<Object> locales,
951 : Handle<Object> options) {
952 : // We only cache the instance when both locales and options are undefined,
953 : // as that is the only case when the specified side-effects of examining
954 : // those arguments are unobservable.
955 : bool can_cache =
956 132216 : locales->IsUndefined(isolate) && options->IsUndefined(isolate);
957 68815 : if (can_cache) {
958 : // Both locales and options are undefined, check the cache.
959 : icu::Collator* cached_icu_collator =
960 : static_cast<icu::Collator*>(isolate->get_cached_icu_object(
961 63401 : Isolate::ICUObjectCacheType::kDefaultCollator));
962 : // We may use the cached icu::Collator for a fast path.
963 63401 : if (cached_icu_collator != nullptr) {
964 : return Intl::CompareStrings(isolate, *cached_icu_collator, string1,
965 63357 : string2);
966 : }
967 : }
968 :
969 : Handle<JSFunction> constructor = Handle<JSFunction>(
970 : JSFunction::cast(
971 10916 : isolate->context()->native_context()->intl_collator_function()),
972 : isolate);
973 :
974 : Handle<JSCollator> collator;
975 10916 : ASSIGN_RETURN_ON_EXCEPTION(
976 : isolate, collator,
977 : New<JSCollator>(isolate, constructor, locales, options), Object);
978 5458 : if (can_cache) {
979 : isolate->set_icu_object_in_cache(
980 : Isolate::ICUObjectCacheType::kDefaultCollator,
981 44 : std::static_pointer_cast<icu::UObject>(
982 44 : collator->icu_collator()->get()));
983 : }
984 : icu::Collator* icu_collator = collator->icu_collator()->raw();
985 5458 : return Intl::CompareStrings(isolate, *icu_collator, string1, string2);
986 : }
987 :
988 : // ecma402/#sec-collator-comparestrings
989 74994 : Handle<Object> Intl::CompareStrings(Isolate* isolate,
990 : const icu::Collator& icu_collator,
991 : Handle<String> string1,
992 : Handle<String> string2) {
993 : Factory* factory = isolate->factory();
994 :
995 74994 : string1 = String::Flatten(isolate, string1);
996 74994 : string2 = String::Flatten(isolate, string2);
997 :
998 : UCollationResult result;
999 74994 : UErrorCode status = U_ZERO_ERROR;
1000 149988 : icu::UnicodeString string_val1 = Intl::ToICUUnicodeString(isolate, string1);
1001 149988 : icu::UnicodeString string_val2 = Intl::ToICUUnicodeString(isolate, string2);
1002 74994 : result = icu_collator.compare(string_val1, string_val2, status);
1003 : DCHECK(U_SUCCESS(status));
1004 :
1005 149988 : return factory->NewNumberFromInt(result);
1006 : }
1007 :
1008 : // ecma402/#sup-properties-of-the-number-prototype-object
1009 2052 : MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
1010 : Handle<Object> num,
1011 : Handle<Object> locales,
1012 : Handle<Object> options) {
1013 : Handle<Object> numeric_obj;
1014 2052 : if (FLAG_harmony_intl_bigint) {
1015 1188 : ASSIGN_RETURN_ON_EXCEPTION(isolate, numeric_obj,
1016 : Object::ToNumeric(isolate, num), String);
1017 : } else {
1018 2916 : ASSIGN_RETURN_ON_EXCEPTION(isolate, numeric_obj,
1019 : Object::ToNumber(isolate, num), String);
1020 : }
1021 :
1022 : // We only cache the instance when both locales and options are undefined,
1023 : // as that is the only case when the specified side-effects of examining
1024 : // those arguments are unobservable.
1025 : bool can_cache =
1026 3447 : locales->IsUndefined(isolate) && options->IsUndefined(isolate);
1027 2052 : if (can_cache) {
1028 : icu::NumberFormat* cached_number_format =
1029 : static_cast<icu::NumberFormat*>(isolate->get_cached_icu_object(
1030 1395 : Isolate::ICUObjectCacheType::kDefaultNumberFormat));
1031 : // We may use the cached icu::NumberFormat for a fast path.
1032 1395 : if (cached_number_format != nullptr) {
1033 : return JSNumberFormat::FormatNumeric(isolate, *cached_number_format,
1034 1350 : numeric_obj);
1035 : }
1036 : }
1037 :
1038 : Handle<JSFunction> constructor = Handle<JSFunction>(
1039 : JSFunction::cast(
1040 1404 : isolate->context()->native_context()->intl_number_format_function()),
1041 : isolate);
1042 : Handle<JSNumberFormat> number_format;
1043 : // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
1044 1404 : ASSIGN_RETURN_ON_EXCEPTION(
1045 : isolate, number_format,
1046 : New<JSNumberFormat>(isolate, constructor, locales, options), String);
1047 :
1048 693 : if (can_cache) {
1049 : isolate->set_icu_object_in_cache(
1050 : Isolate::ICUObjectCacheType::kDefaultNumberFormat,
1051 45 : std::static_pointer_cast<icu::UObject>(
1052 45 : number_format->icu_number_format()->get()));
1053 : }
1054 :
1055 : // Return FormatNumber(numberFormat, x).
1056 : icu::NumberFormat* icu_number_format =
1057 : number_format->icu_number_format()->raw();
1058 : return JSNumberFormat::FormatNumeric(isolate, *icu_number_format,
1059 693 : numeric_obj);
1060 : }
1061 :
1062 : namespace {
1063 :
1064 : // ecma402/#sec-defaultnumberoption
1065 7020 : Maybe<int> DefaultNumberOption(Isolate* isolate, Handle<Object> value, int min,
1066 : int max, int fallback, Handle<String> property) {
1067 : // 2. Else, return fallback.
1068 7020 : if (value->IsUndefined()) return Just(fallback);
1069 :
1070 : // 1. If value is not undefined, then
1071 : // a. Let value be ? ToNumber(value).
1072 : Handle<Object> value_num;
1073 936 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1074 : isolate, value_num, Object::ToNumber(isolate, value), Nothing<int>());
1075 : DCHECK(value_num->IsNumber());
1076 :
1077 : // b. If value is NaN or less than minimum or greater than maximum, throw a
1078 : // RangeError exception.
1079 1341 : if (value_num->IsNaN() || value_num->Number() < min ||
1080 405 : value_num->Number() > max) {
1081 216 : THROW_NEW_ERROR_RETURN_VALUE(
1082 : isolate,
1083 : NewRangeError(MessageTemplate::kPropertyValueOutOfRange, property),
1084 : Nothing<int>());
1085 : }
1086 :
1087 : // The max and min arguments are integers and the above check makes
1088 : // sure that we are within the integer range making this double to
1089 : // int conversion safe.
1090 : //
1091 : // c. Return floor(value).
1092 360 : return Just(FastD2I(floor(value_num->Number())));
1093 : }
1094 :
1095 : // ecma402/#sec-getnumberoption
1096 6894 : Maybe<int> GetNumberOption(Isolate* isolate, Handle<JSReceiver> options,
1097 : Handle<String> property, int min, int max,
1098 : int fallback) {
1099 : // 1. Let value be ? Get(options, property).
1100 : Handle<Object> value;
1101 13788 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1102 : isolate, value, JSReceiver::GetProperty(isolate, options, property),
1103 : Nothing<int>());
1104 :
1105 : // Return ? DefaultNumberOption(value, minimum, maximum, fallback).
1106 6894 : return DefaultNumberOption(isolate, value, min, max, fallback, property);
1107 : }
1108 :
1109 6894 : Maybe<int> GetNumberOption(Isolate* isolate, Handle<JSReceiver> options,
1110 : const char* property, int min, int max,
1111 : int fallback) {
1112 : Handle<String> property_str =
1113 6894 : isolate->factory()->NewStringFromAsciiChecked(property);
1114 6894 : return GetNumberOption(isolate, options, property_str, min, max, fallback);
1115 : }
1116 :
1117 : } // namespace
1118 :
1119 2331 : Maybe<bool> Intl::SetNumberFormatDigitOptions(Isolate* isolate,
1120 : icu::DecimalFormat* number_format,
1121 : Handle<JSReceiver> options,
1122 : int mnfd_default,
1123 : int mxfd_default) {
1124 2331 : CHECK_NOT_NULL(number_format);
1125 :
1126 : // 5. Let mnid be ? GetNumberOption(options, "minimumIntegerDigits,", 1, 21,
1127 : // 1).
1128 : int mnid;
1129 4662 : if (!GetNumberOption(isolate, options, "minimumIntegerDigits", 1, 21, 1)
1130 : .To(&mnid)) {
1131 : return Nothing<bool>();
1132 : }
1133 :
1134 : // 6. Let mnfd be ? GetNumberOption(options, "minimumFractionDigits", 0, 20,
1135 : // mnfdDefault).
1136 : int mnfd;
1137 4572 : if (!GetNumberOption(isolate, options, "minimumFractionDigits", 0, 20,
1138 : mnfd_default)
1139 : .To(&mnfd)) {
1140 : return Nothing<bool>();
1141 : }
1142 :
1143 : // 7. Let mxfdActualDefault be max( mnfd, mxfdDefault ).
1144 2277 : int mxfd_actual_default = std::max(mnfd, mxfd_default);
1145 :
1146 : // 8. Let mxfd be ? GetNumberOption(options,
1147 : // "maximumFractionDigits", mnfd, 20, mxfdActualDefault).
1148 : int mxfd;
1149 4554 : if (!GetNumberOption(isolate, options, "maximumFractionDigits", mnfd, 20,
1150 : mxfd_actual_default)
1151 : .To(&mxfd)) {
1152 : return Nothing<bool>();
1153 : }
1154 :
1155 : // 9. Let mnsd be ? Get(options, "minimumSignificantDigits").
1156 : Handle<Object> mnsd_obj;
1157 : Handle<String> mnsd_str =
1158 : isolate->factory()->minimumSignificantDigits_string();
1159 4536 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1160 : isolate, mnsd_obj, JSReceiver::GetProperty(isolate, options, mnsd_str),
1161 : Nothing<bool>());
1162 :
1163 : // 10. Let mxsd be ? Get(options, "maximumSignificantDigits").
1164 : Handle<Object> mxsd_obj;
1165 : Handle<String> mxsd_str =
1166 : isolate->factory()->maximumSignificantDigits_string();
1167 4536 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1168 : isolate, mxsd_obj, JSReceiver::GetProperty(isolate, options, mxsd_str),
1169 : Nothing<bool>());
1170 :
1171 : // 11. Set intlObj.[[MinimumIntegerDigits]] to mnid.
1172 2268 : number_format->setMinimumIntegerDigits(mnid);
1173 :
1174 : // 12. Set intlObj.[[MinimumFractionDigits]] to mnfd.
1175 2268 : number_format->setMinimumFractionDigits(mnfd);
1176 :
1177 : // 13. Set intlObj.[[MaximumFractionDigits]] to mxfd.
1178 2268 : number_format->setMaximumFractionDigits(mxfd);
1179 :
1180 : bool significant_digits_used = false;
1181 : // 14. If mnsd is not undefined or mxsd is not undefined, then
1182 4491 : if (!mnsd_obj->IsUndefined(isolate) || !mxsd_obj->IsUndefined(isolate)) {
1183 : // 14. a. Let mnsd be ? DefaultNumberOption(mnsd, 1, 21, 1).
1184 : int mnsd;
1185 144 : if (!DefaultNumberOption(isolate, mnsd_obj, 1, 21, 1, mnsd_str).To(&mnsd)) {
1186 : return Nothing<bool>();
1187 : }
1188 :
1189 : // 14. b. Let mxsd be ? DefaultNumberOption(mxsd, mnsd, 21, 21).
1190 : int mxsd;
1191 108 : if (!DefaultNumberOption(isolate, mxsd_obj, mnsd, 21, 21, mxsd_str)
1192 : .To(&mxsd)) {
1193 : return Nothing<bool>();
1194 : }
1195 :
1196 : significant_digits_used = true;
1197 :
1198 : // 14. c. Set intlObj.[[MinimumSignificantDigits]] to mnsd.
1199 27 : number_format->setMinimumSignificantDigits(mnsd);
1200 :
1201 : // 14. d. Set intlObj.[[MaximumSignificantDigits]] to mxsd.
1202 27 : number_format->setMaximumSignificantDigits(mxsd);
1203 : }
1204 :
1205 2223 : number_format->setSignificantDigitsUsed(significant_digits_used);
1206 2223 : number_format->setRoundingMode(icu::DecimalFormat::kRoundHalfUp);
1207 : return Just(true);
1208 : }
1209 :
1210 : namespace {
1211 :
1212 : // ecma402/#sec-bestavailablelocale
1213 13497 : std::string BestAvailableLocale(const std::set<std::string>& available_locales,
1214 : const std::string& locale) {
1215 : // 1. Let candidate be locale.
1216 : std::string candidate = locale;
1217 :
1218 : // 2. Repeat,
1219 396 : while (true) {
1220 : // 2.a. If availableLocales contains an element equal to candidate, return
1221 : // candidate.
1222 13893 : if (available_locales.find(candidate) != available_locales.end()) {
1223 : return candidate;
1224 : }
1225 :
1226 : // 2.b. Let pos be the character index of the last occurrence of "-"
1227 : // (U+002D) within candidate. If that character does not occur, return
1228 : // undefined.
1229 639 : size_t pos = candidate.rfind('-');
1230 639 : if (pos == std::string::npos) {
1231 : return std::string();
1232 : }
1233 :
1234 : // 2.c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate,
1235 : // decrease pos by 2.
1236 738 : if (pos >= 2 && candidate[pos - 2] == '-') {
1237 90 : pos -= 2;
1238 : }
1239 :
1240 : // 2.d. Let candidate be the substring of candidate from position 0,
1241 : // inclusive, to position pos, exclusive.
1242 792 : candidate = candidate.substr(0, pos);
1243 : }
1244 : }
1245 :
1246 26994 : struct ParsedLocale {
1247 : std::string no_extensions_locale;
1248 : std::string extension;
1249 : };
1250 :
1251 : // Returns a struct containing a bcp47 tag without unicode extensions
1252 : // and the removed unicode extensions.
1253 : //
1254 : // For example, given 'en-US-u-co-emoji' returns 'en-US' and
1255 : // 'u-co-emoji'.
1256 13497 : ParsedLocale ParseBCP47Locale(const std::string& locale) {
1257 : size_t length = locale.length();
1258 : ParsedLocale parsed_locale;
1259 :
1260 : // Privateuse or grandfathered locales have no extension sequences.
1261 13497 : if ((length > 1) && (locale[1] == '-')) {
1262 : // Check to make sure that this really is a grandfathered or
1263 : // privateuse extension. ICU can sometimes mess up the
1264 : // canonicalization.
1265 63 : CHECK(locale[0] == 'x' || locale[0] == 'i');
1266 63 : parsed_locale.no_extensions_locale = locale;
1267 : return parsed_locale;
1268 : }
1269 :
1270 : size_t unicode_extension_start = locale.find("-u-");
1271 :
1272 : // No unicode extensions found.
1273 13434 : if (unicode_extension_start == std::string::npos) {
1274 10261 : parsed_locale.no_extensions_locale = locale;
1275 : return parsed_locale;
1276 : }
1277 :
1278 : size_t private_extension_start = locale.find("-x-");
1279 :
1280 : // Unicode extensions found within privateuse subtags don't count.
1281 6346 : if (private_extension_start != std::string::npos &&
1282 3173 : private_extension_start < unicode_extension_start) {
1283 36 : parsed_locale.no_extensions_locale = locale;
1284 : return parsed_locale;
1285 : }
1286 :
1287 3137 : const std::string beginning = locale.substr(0, unicode_extension_start);
1288 : size_t unicode_extension_end = length;
1289 : DCHECK_GT(length, 2);
1290 :
1291 : // Find the end of the extension production as per the bcp47 grammar
1292 : // by looking for '-' followed by 2 chars and then another '-'.
1293 25071 : for (size_t i = unicode_extension_start + 1; i < length - 2; i++) {
1294 21943 : if (locale[i] != '-') continue;
1295 :
1296 20478 : if (locale[i + 2] == '-') {
1297 : unicode_extension_end = i;
1298 : break;
1299 : }
1300 :
1301 : i += 2;
1302 : }
1303 :
1304 3137 : const std::string end = locale.substr(unicode_extension_end);
1305 6274 : parsed_locale.no_extensions_locale = beginning + end;
1306 6274 : parsed_locale.extension = locale.substr(
1307 : unicode_extension_start, unicode_extension_end - unicode_extension_start);
1308 : return parsed_locale;
1309 : }
1310 :
1311 : // ecma402/#sec-lookupsupportedlocales
1312 585 : std::vector<std::string> LookupSupportedLocales(
1313 : const std::set<std::string>& available_locales,
1314 : const std::vector<std::string>& requested_locales) {
1315 : // 1. Let subset be a new empty List.
1316 : std::vector<std::string> subset;
1317 :
1318 : // 2. For each element locale of requestedLocales in List order, do
1319 1134 : for (const std::string& locale : requested_locales) {
1320 : // 2. a. Let noExtensionsLocale be the String value that is locale
1321 : // with all Unicode locale extension sequences removed.
1322 : std::string no_extension_locale =
1323 1098 : ParseBCP47Locale(locale).no_extensions_locale;
1324 :
1325 : // 2. b. Let availableLocale be
1326 : // BestAvailableLocale(availableLocales, noExtensionsLocale).
1327 : std::string available_locale =
1328 549 : BestAvailableLocale(available_locales, no_extension_locale);
1329 :
1330 : // 2. c. If availableLocale is not undefined, append locale to the
1331 : // end of subset.
1332 549 : if (!available_locale.empty()) {
1333 441 : subset.push_back(locale);
1334 : }
1335 : }
1336 :
1337 : // 3. Return subset.
1338 585 : return subset;
1339 : }
1340 :
1341 : // ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
1342 : // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
1343 : std::vector<std::string> BestFitSupportedLocales(
1344 : const std::set<std::string>& available_locales,
1345 : const std::vector<std::string>& requested_locales) {
1346 549 : return LookupSupportedLocales(available_locales, requested_locales);
1347 : }
1348 :
1349 : // ecma262 #sec-createarrayfromlist
1350 945 : Handle<JSArray> CreateArrayFromList(Isolate* isolate,
1351 : std::vector<std::string> elements,
1352 : PropertyAttributes attr) {
1353 : Factory* factory = isolate->factory();
1354 : // Let array be ! ArrayCreate(0).
1355 : Handle<JSArray> array = factory->NewJSArray(0);
1356 :
1357 945 : uint32_t length = static_cast<uint32_t>(elements.size());
1358 : // 3. Let n be 0.
1359 : // 4. For each element e of elements, do
1360 2529 : for (uint32_t i = 0; i < length; i++) {
1361 : // a. Let status be CreateDataProperty(array, ! ToString(n), e).
1362 792 : const std::string& part = elements[i];
1363 : Handle<String> value =
1364 1584 : factory->NewStringFromUtf8(CStrVector(part.c_str())).ToHandleChecked();
1365 792 : JSObject::AddDataElement(array, i, value, attr);
1366 : }
1367 : // 5. Return array.
1368 945 : return array;
1369 : }
1370 :
1371 : // ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
1372 : // https://tc39.github.io/ecma402/#sec-supportedlocales
1373 621 : MaybeHandle<JSObject> SupportedLocales(
1374 : Isolate* isolate, const char* method,
1375 : const std::set<std::string>& available_locales,
1376 : const std::vector<std::string>& requested_locales, Handle<Object> options) {
1377 621 : std::vector<std::string> supported_locales;
1378 :
1379 : // 2. Else, let matcher be "best fit".
1380 : Intl::MatcherOption matcher = Intl::MatcherOption::kBestFit;
1381 :
1382 : // 1. If options is not undefined, then
1383 621 : if (!options->IsUndefined(isolate)) {
1384 : // 1. a. Let options be ? ToObject(options).
1385 : Handle<JSReceiver> options_obj;
1386 216 : ASSIGN_RETURN_ON_EXCEPTION(isolate, options_obj,
1387 : Object::ToObject(isolate, options), JSObject);
1388 :
1389 : // 1. b. Let matcher be ? GetOption(options, "localeMatcher", "string",
1390 : // « "lookup", "best fit" », "best fit").
1391 : Maybe<Intl::MatcherOption> maybe_locale_matcher =
1392 108 : Intl::GetLocaleMatcher(isolate, options_obj, method);
1393 108 : MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSObject>());
1394 : matcher = maybe_locale_matcher.FromJust();
1395 : }
1396 :
1397 : // 3. If matcher is "best fit", then
1398 : // a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
1399 : // requestedLocales).
1400 585 : if (matcher == Intl::MatcherOption::kBestFit) {
1401 : supported_locales =
1402 549 : BestFitSupportedLocales(available_locales, requested_locales);
1403 : } else {
1404 : // 4. Else,
1405 : // a. Let supportedLocales be LookupSupportedLocales(availableLocales,
1406 : // requestedLocales).
1407 : DCHECK_EQ(matcher, Intl::MatcherOption::kLookup);
1408 : supported_locales =
1409 72 : LookupSupportedLocales(available_locales, requested_locales);
1410 : }
1411 :
1412 : // 5. Return CreateArrayFromList(supportedLocales).
1413 : PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
1414 585 : return CreateArrayFromList(isolate, supported_locales, attr);
1415 : }
1416 :
1417 : } // namespace
1418 :
1419 : // ecma-402 #sec-intl.getcanonicallocales
1420 369 : MaybeHandle<JSArray> Intl::GetCanonicalLocales(Isolate* isolate,
1421 : Handle<Object> locales) {
1422 : // 1. Let ll be ? CanonicalizeLocaleList(locales).
1423 : Maybe<std::vector<std::string>> maybe_ll =
1424 369 : CanonicalizeLocaleList(isolate, locales, false);
1425 369 : MAYBE_RETURN(maybe_ll, MaybeHandle<JSArray>());
1426 :
1427 : // 2. Return CreateArrayFromList(ll).
1428 : PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
1429 360 : return CreateArrayFromList(isolate, maybe_ll.FromJust(), attr);
1430 : }
1431 :
1432 : // ECMA 402 Intl.*.supportedLocalesOf
1433 657 : MaybeHandle<JSObject> Intl::SupportedLocalesOf(
1434 : Isolate* isolate, const char* method,
1435 : const std::set<std::string>& available_locales, Handle<Object> locales,
1436 : Handle<Object> options) {
1437 : // Let availableLocales be %Collator%.[[AvailableLocales]].
1438 :
1439 : // Let requestedLocales be ? CanonicalizeLocaleList(locales).
1440 : Maybe<std::vector<std::string>> requested_locales =
1441 657 : CanonicalizeLocaleList(isolate, locales, false);
1442 657 : MAYBE_RETURN(requested_locales, MaybeHandle<JSObject>());
1443 :
1444 : // Return ? SupportedLocales(availableLocales, requestedLocales, options).
1445 : return SupportedLocales(isolate, method, available_locales,
1446 621 : requested_locales.FromJust(), options);
1447 : }
1448 :
1449 : namespace {
1450 : template <typename T>
1451 720 : bool IsValidExtension(const icu::Locale& locale, const char* key,
1452 : const std::string& value) {
1453 720 : UErrorCode status = U_ZERO_ERROR;
1454 : std::unique_ptr<icu::StringEnumeration> enumeration(
1455 : T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
1456 720 : false, status));
1457 720 : if (U_SUCCESS(status)) {
1458 : int32_t length;
1459 720 : std::string legacy_type(uloc_toLegacyType(key, value.c_str()));
1460 5931 : for (const char* item = enumeration->next(&length, status); item != nullptr;
1461 5211 : item = enumeration->next(&length, status)) {
1462 11736 : if (U_SUCCESS(status) && legacy_type == item) {
1463 : return true;
1464 : }
1465 : }
1466 : }
1467 : return false;
1468 : }
1469 :
1470 : bool IsValidCalendar(const icu::Locale& locale, const std::string& value) {
1471 603 : return IsValidExtension<icu::Calendar>(locale, "calendar", value);
1472 : }
1473 :
1474 189 : bool IsValidCollation(const icu::Locale& locale, const std::string& value) {
1475 378 : std::set<std::string> invalid_values = {"standard", "search"};
1476 189 : if (invalid_values.find(value) != invalid_values.end()) return false;
1477 117 : return IsValidExtension<icu::Collator>(locale, "collation", value);
1478 : }
1479 :
1480 1530 : bool IsValidNumberingSystem(const std::string& value) {
1481 3060 : std::set<std::string> invalid_values = {"native", "traditio", "finance"};
1482 1530 : if (invalid_values.find(value) != invalid_values.end()) return false;
1483 1476 : UErrorCode status = U_ZERO_ERROR;
1484 : std::unique_ptr<icu::NumberingSystem> numbering_system(
1485 1476 : icu::NumberingSystem::createInstanceByName(value.c_str(), status));
1486 1476 : return U_SUCCESS(status) && numbering_system.get() != nullptr;
1487 : }
1488 :
1489 14884 : std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
1490 : icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
1491 : std::map<std::string, std::string> extensions;
1492 :
1493 14884 : UErrorCode status = U_ZERO_ERROR;
1494 : std::unique_ptr<icu::StringEnumeration> keywords(
1495 14884 : icu_locale->createKeywords(status));
1496 14884 : if (U_FAILURE(status)) return extensions;
1497 :
1498 14884 : if (!keywords) return extensions;
1499 : char value[ULOC_FULLNAME_CAPACITY];
1500 :
1501 : int32_t length;
1502 3101 : status = U_ZERO_ERROR;
1503 7933 : for (const char* keyword = keywords->next(&length, status);
1504 12765 : keyword != nullptr; keyword = keywords->next(&length, status)) {
1505 : // Ignore failures in ICU and skip to the next keyword.
1506 : //
1507 : // This is fine.™
1508 4832 : if (U_FAILURE(status)) {
1509 0 : status = U_ZERO_ERROR;
1510 0 : continue;
1511 : }
1512 :
1513 4832 : icu_locale->getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
1514 :
1515 : // Ignore failures in ICU and skip to the next keyword.
1516 : //
1517 : // This is fine.™
1518 4832 : if (U_FAILURE(status)) {
1519 0 : status = U_ZERO_ERROR;
1520 0 : continue;
1521 : }
1522 :
1523 4832 : const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
1524 :
1525 19301 : if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
1526 2570 : const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
1527 : bool is_valid_value = false;
1528 : // 8.h.ii.1.a If keyLocaleData contains requestedValue, then
1529 2570 : if (strcmp("ca", bcp47_key) == 0) {
1530 1206 : is_valid_value = IsValidCalendar(*icu_locale, bcp47_value);
1531 1967 : } else if (strcmp("co", bcp47_key) == 0) {
1532 378 : is_valid_value = IsValidCollation(*icu_locale, bcp47_value);
1533 1778 : } else if (strcmp("hc", bcp47_key) == 0) {
1534 : // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
1535 396 : std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
1536 396 : is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1537 1580 : } else if (strcmp("lb", bcp47_key) == 0) {
1538 : // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
1539 0 : std::set<std::string> valid_values = {"strict", "normal", "loose"};
1540 0 : is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1541 1580 : } else if (strcmp("kn", bcp47_key) == 0) {
1542 : // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
1543 50 : std::set<std::string> valid_values = {"true", "false"};
1544 50 : is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1545 1555 : } else if (strcmp("kf", bcp47_key) == 0) {
1546 : // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
1547 50 : std::set<std::string> valid_values = {"upper", "lower", "false"};
1548 50 : is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1549 1530 : } else if (strcmp("nu", bcp47_key) == 0) {
1550 3060 : is_valid_value = IsValidNumberingSystem(bcp47_value);
1551 : }
1552 2570 : if (is_valid_value) {
1553 : extensions.insert(
1554 2161 : std::pair<std::string, std::string>(bcp47_key, bcp47_value));
1555 : continue;
1556 : }
1557 : }
1558 2671 : status = U_ZERO_ERROR;
1559 5342 : icu_locale->setUnicodeKeywordValue(
1560 2671 : bcp47_key == nullptr ? keyword : bcp47_key, nullptr, status);
1561 2671 : CHECK(U_SUCCESS(status));
1562 : }
1563 :
1564 : return extensions;
1565 : }
1566 :
1567 : // ecma402/#sec-lookupmatcher
1568 14884 : std::string LookupMatcher(Isolate* isolate,
1569 : const std::set<std::string>& available_locales,
1570 : const std::vector<std::string>& requested_locales) {
1571 : // 1. Let result be a new Record.
1572 : std::string result;
1573 :
1574 : // 2. For each element locale of requestedLocales in List order, do
1575 15019 : for (const std::string& locale : requested_locales) {
1576 : // 2. a. Let noExtensionsLocale be the String value that is locale
1577 : // with all Unicode locale extension sequences removed.
1578 13083 : ParsedLocale parsed_locale = ParseBCP47Locale(locale);
1579 : std::string no_extensions_locale = parsed_locale.no_extensions_locale;
1580 :
1581 : // 2. b. Let availableLocale be
1582 : // BestAvailableLocale(availableLocales, noExtensionsLocale).
1583 : std::string available_locale =
1584 12948 : BestAvailableLocale(available_locales, no_extensions_locale);
1585 :
1586 : // 2. c. If availableLocale is not undefined, append locale to the
1587 : // end of subset.
1588 12948 : if (!available_locale.empty()) {
1589 : // Note: The following steps are not performed here because we
1590 : // can use ICU to parse the unicode locale extension sequence
1591 : // as part of Intl::ResolveLocale.
1592 : //
1593 : // There's no need to separate the unicode locale extensions
1594 : // right here. Instead just return the available locale with the
1595 : // extensions.
1596 : //
1597 : // 2. c. i. Set result.[[locale]] to availableLocale.
1598 : // 2. c. ii. If locale and noExtensionsLocale are not the same
1599 : // String value, then
1600 : // 2. c. ii. 1. Let extension be the String value consisting of
1601 : // the first substring of locale that is a Unicode locale
1602 : // extension sequence.
1603 : // 2. c. ii. 2. Set result.[[extension]] to extension.
1604 : // 2. c. iii. Return result.
1605 12813 : return available_locale + parsed_locale.extension;
1606 : }
1607 : }
1608 :
1609 : // 3. Let defLocale be DefaultLocale();
1610 : // 4. Set result.[[locale]] to defLocale.
1611 : // 5. Return result.
1612 2071 : return DefaultLocale(isolate);
1613 : }
1614 :
1615 : } // namespace
1616 :
1617 : // This function doesn't correspond exactly with the spec. Instead
1618 : // we use ICU to do all the string manipulations that the spec
1619 : // peforms.
1620 : //
1621 : // The spec uses this function to normalize values for various
1622 : // relevant extension keys (such as disallowing "search" for
1623 : // collation). Instead of doing this here, we let the callers of
1624 : // this method perform such normalization.
1625 : //
1626 : // ecma402/#sec-resolvelocale
1627 14884 : Intl::ResolvedLocale Intl::ResolveLocale(
1628 : Isolate* isolate, const std::set<std::string>& available_locales,
1629 : const std::vector<std::string>& requested_locales, MatcherOption matcher,
1630 : const std::set<std::string>& relevant_extension_keys) {
1631 : std::string locale;
1632 14884 : if (matcher == Intl::MatcherOption::kLookup) {
1633 29444 : locale = LookupMatcher(isolate, available_locales, requested_locales);
1634 162 : } else if (matcher == Intl::MatcherOption::kBestFit) {
1635 : // TODO(intl): Implement better lookup algorithm.
1636 324 : locale = LookupMatcher(isolate, available_locales, requested_locales);
1637 : }
1638 :
1639 29768 : icu::Locale icu_locale = CreateICULocale(locale);
1640 : std::map<std::string, std::string> extensions =
1641 14884 : LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys);
1642 :
1643 29768 : std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale).FromJust();
1644 :
1645 : // TODO(gsathya): Remove privateuse subtags from extensions.
1646 :
1647 44652 : return Intl::ResolvedLocale{canonicalized_locale, icu_locale, extensions};
1648 : }
1649 :
1650 2813 : Managed<icu::UnicodeString> Intl::SetTextToBreakIterator(
1651 : Isolate* isolate, Handle<String> text, icu::BreakIterator* break_iterator) {
1652 : icu::UnicodeString* u_text =
1653 2813 : (icu::UnicodeString*)(Intl::ToICUUnicodeString(isolate, text).clone());
1654 :
1655 : Handle<Managed<icu::UnicodeString>> new_u_text =
1656 2813 : Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, u_text);
1657 :
1658 2813 : break_iterator->setText(*u_text);
1659 2813 : return *new_u_text;
1660 : }
1661 :
1662 : // ecma262 #sec-string.prototype.normalize
1663 2340 : MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string,
1664 : Handle<Object> form_input) {
1665 : const char* form_name;
1666 : UNormalization2Mode form_mode;
1667 2340 : if (form_input->IsUndefined(isolate)) {
1668 : // default is FNC
1669 : form_name = "nfc";
1670 : form_mode = UNORM2_COMPOSE;
1671 : } else {
1672 : Handle<String> form;
1673 3780 : ASSIGN_RETURN_ON_EXCEPTION(isolate, form,
1674 : Object::ToString(isolate, form_input), String);
1675 :
1676 1890 : if (String::Equals(isolate, form, isolate->factory()->NFC_string())) {
1677 : form_name = "nfc";
1678 : form_mode = UNORM2_COMPOSE;
1679 1476 : } else if (String::Equals(isolate, form,
1680 : isolate->factory()->NFD_string())) {
1681 : form_name = "nfc";
1682 : form_mode = UNORM2_DECOMPOSE;
1683 1071 : } else if (String::Equals(isolate, form,
1684 : isolate->factory()->NFKC_string())) {
1685 : form_name = "nfkc";
1686 : form_mode = UNORM2_COMPOSE;
1687 657 : } else if (String::Equals(isolate, form,
1688 : isolate->factory()->NFKD_string())) {
1689 : form_name = "nfkc";
1690 : form_mode = UNORM2_DECOMPOSE;
1691 : } else {
1692 : Handle<String> valid_forms =
1693 99 : isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
1694 198 : THROW_NEW_ERROR(
1695 : isolate,
1696 : NewRangeError(MessageTemplate::kNormalizationForm, valid_forms),
1697 : String);
1698 : }
1699 : }
1700 :
1701 : int length = string->length();
1702 2241 : string = String::Flatten(isolate, string);
1703 2241 : icu::UnicodeString result;
1704 : std::unique_ptr<uc16[]> sap;
1705 2241 : UErrorCode status = U_ZERO_ERROR;
1706 4482 : icu::UnicodeString input = ToICUUnicodeString(isolate, string);
1707 : // Getting a singleton. Should not free it.
1708 : const icu::Normalizer2* normalizer =
1709 2241 : icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
1710 : DCHECK(U_SUCCESS(status));
1711 2241 : CHECK_NOT_NULL(normalizer);
1712 : int32_t normalized_prefix_length =
1713 2241 : normalizer->spanQuickCheckYes(input, status);
1714 : // Quick return if the input is already normalized.
1715 2241 : if (length == normalized_prefix_length) return string;
1716 : icu::UnicodeString unnormalized =
1717 1926 : input.tempSubString(normalized_prefix_length);
1718 : // Read-only alias of the normalized prefix.
1719 1926 : result.setTo(false, input.getBuffer(), normalized_prefix_length);
1720 : // copy-on-write; normalize the suffix and append to |result|.
1721 963 : normalizer->normalizeSecondAndAppend(result, unnormalized, status);
1722 :
1723 963 : if (U_FAILURE(status)) {
1724 0 : THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
1725 : }
1726 :
1727 963 : return Intl::ToString(isolate, result);
1728 : }
1729 :
1730 : // ICUTimezoneCache calls out to ICU for TimezoneCache
1731 : // functionality in a straightforward way.
1732 : class ICUTimezoneCache : public base::TimezoneCache {
1733 : public:
1734 124834 : ICUTimezoneCache() : timezone_(nullptr) { Clear(TimeZoneDetection::kSkip); }
1735 :
1736 187206 : ~ICUTimezoneCache() override { Clear(TimeZoneDetection::kSkip); }
1737 :
1738 : const char* LocalTimezone(double time_ms) override;
1739 :
1740 : double DaylightSavingsOffset(double time_ms) override;
1741 :
1742 : double LocalTimeOffset(double time_ms, bool is_utc) override;
1743 :
1744 : void Clear(TimeZoneDetection time_zone_detection) override;
1745 :
1746 : private:
1747 : icu::TimeZone* GetTimeZone();
1748 :
1749 : bool GetOffsets(double time_ms, bool is_utc, int32_t* raw_offset,
1750 : int32_t* dst_offset);
1751 :
1752 : icu::TimeZone* timezone_;
1753 :
1754 : std::string timezone_name_;
1755 : std::string dst_timezone_name_;
1756 : };
1757 :
1758 145 : const char* ICUTimezoneCache::LocalTimezone(double time_ms) {
1759 145 : bool is_dst = DaylightSavingsOffset(time_ms) != 0;
1760 145 : std::string* name = is_dst ? &dst_timezone_name_ : &timezone_name_;
1761 145 : if (name->empty()) {
1762 145 : icu::UnicodeString result;
1763 290 : GetTimeZone()->getDisplayName(is_dst, icu::TimeZone::LONG, result);
1764 : result += '\0';
1765 :
1766 : icu::StringByteSink<std::string> byte_sink(name);
1767 145 : result.toUTF8(byte_sink);
1768 : }
1769 : DCHECK(!name->empty());
1770 145 : return name->c_str();
1771 : }
1772 :
1773 0 : icu::TimeZone* ICUTimezoneCache::GetTimeZone() {
1774 136221 : if (timezone_ == nullptr) {
1775 235 : timezone_ = icu::TimeZone::createDefault();
1776 : }
1777 136221 : return timezone_;
1778 : }
1779 :
1780 136076 : bool ICUTimezoneCache::GetOffsets(double time_ms, bool is_utc,
1781 : int32_t* raw_offset, int32_t* dst_offset) {
1782 136076 : UErrorCode status = U_ZERO_ERROR;
1783 : // TODO(jshin): ICU TimeZone class handles skipped time differently from
1784 : // Ecma 262 (https://github.com/tc39/ecma262/pull/778) and icu::TimeZone
1785 : // class does not expose the necessary API. Fixing
1786 : // http://bugs.icu-project.org/trac/ticket/13268 would make it easy to
1787 : // implement the proposed spec change. A proposed fix for ICU is
1788 : // https://chromium-review.googlesource.com/851265 .
1789 : // In the meantime, use an internal (still public) API of icu::BasicTimeZone.
1790 : // Once it's accepted by the upstream, get rid of cast. Note that casting
1791 : // TimeZone to BasicTimeZone is safe because we know that icu::TimeZone used
1792 : // here is a BasicTimeZone.
1793 136076 : if (is_utc) {
1794 133141 : GetTimeZone()->getOffset(time_ms, false, *raw_offset, *dst_offset, status);
1795 : } else {
1796 : static_cast<const icu::BasicTimeZone*>(GetTimeZone())
1797 : ->getOffsetFromLocal(time_ms, icu::BasicTimeZone::kFormer,
1798 : icu::BasicTimeZone::kFormer, *raw_offset,
1799 2935 : *dst_offset, status);
1800 : }
1801 :
1802 272152 : return U_SUCCESS(status);
1803 : }
1804 :
1805 19237 : double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) {
1806 : int32_t raw_offset, dst_offset;
1807 19237 : if (!GetOffsets(time_ms, true, &raw_offset, &dst_offset)) return 0;
1808 19237 : return dst_offset;
1809 : }
1810 :
1811 116839 : double ICUTimezoneCache::LocalTimeOffset(double time_ms, bool is_utc) {
1812 : int32_t raw_offset, dst_offset;
1813 116839 : if (!GetOffsets(time_ms, is_utc, &raw_offset, &dst_offset)) return 0;
1814 116839 : return raw_offset + dst_offset;
1815 : }
1816 :
1817 187235 : void ICUTimezoneCache::Clear(TimeZoneDetection time_zone_detection) {
1818 187235 : delete timezone_;
1819 187235 : timezone_ = nullptr;
1820 : timezone_name_.clear();
1821 : dst_timezone_name_.clear();
1822 187235 : if (time_zone_detection == TimeZoneDetection::kRedetect) {
1823 0 : icu::TimeZone::adoptDefault(icu::TimeZone::detectHostTimeZone());
1824 : }
1825 187235 : }
1826 :
1827 62427 : base::TimezoneCache* Intl::CreateTimeZoneCache() {
1828 : return FLAG_icu_timezone_data ? new ICUTimezoneCache()
1829 62437 : : base::OS::CreateTimezoneCache();
1830 : }
1831 :
1832 6107 : Maybe<Intl::CaseFirst> Intl::GetCaseFirst(Isolate* isolate,
1833 : Handle<JSReceiver> options,
1834 : const char* method) {
1835 : return Intl::GetStringOption<Intl::CaseFirst>(
1836 : isolate, options, "caseFirst", method, {"upper", "lower", "false"},
1837 : {Intl::CaseFirst::kUpper, Intl::CaseFirst::kLower,
1838 : Intl::CaseFirst::kFalse},
1839 18321 : Intl::CaseFirst::kUndefined);
1840 : }
1841 :
1842 3831 : Maybe<Intl::HourCycle> Intl::GetHourCycle(Isolate* isolate,
1843 : Handle<JSReceiver> options,
1844 : const char* method) {
1845 : return Intl::GetStringOption<Intl::HourCycle>(
1846 : isolate, options, "hourCycle", method, {"h11", "h12", "h23", "h24"},
1847 : {Intl::HourCycle::kH11, Intl::HourCycle::kH12, Intl::HourCycle::kH23,
1848 : Intl::HourCycle::kH24},
1849 11493 : Intl::HourCycle::kUndefined);
1850 : }
1851 :
1852 15028 : Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate,
1853 : Handle<JSReceiver> options,
1854 : const char* method) {
1855 : return Intl::GetStringOption<Intl::MatcherOption>(
1856 : isolate, options, "localeMatcher", method, {"best fit", "lookup"},
1857 : {Intl::MatcherOption::kLookup, Intl::MatcherOption::kBestFit},
1858 45084 : Intl::MatcherOption::kLookup);
1859 : }
1860 :
1861 108 : Intl::HourCycle Intl::ToHourCycle(const std::string& hc) {
1862 108 : if (hc == "h11") return Intl::HourCycle::kH11;
1863 81 : if (hc == "h12") return Intl::HourCycle::kH12;
1864 54 : if (hc == "h23") return Intl::HourCycle::kH23;
1865 27 : if (hc == "h24") return Intl::HourCycle::kH24;
1866 0 : return Intl::HourCycle::kUndefined;
1867 : }
1868 :
1869 1216 : const std::set<std::string>& Intl::GetAvailableLocalesForLocale() {
1870 : static base::LazyInstance<Intl::AvailableLocales<icu::Locale>>::type
1871 : available_locales = LAZY_INSTANCE_INITIALIZER;
1872 1216 : return available_locales.Pointer()->Get();
1873 : }
1874 :
1875 4759 : const std::set<std::string>& Intl::GetAvailableLocalesForDateFormat() {
1876 : static base::LazyInstance<Intl::AvailableLocales<icu::DateFormat>>::type
1877 : available_locales = LAZY_INSTANCE_INITIALIZER;
1878 4759 : return available_locales.Pointer()->Get();
1879 : }
1880 :
1881 10008 : Handle<String> Intl::NumberFieldToType(Isolate* isolate,
1882 : Handle<Object> numeric_obj,
1883 : int32_t field_id) {
1884 : DCHECK(numeric_obj->IsNumeric());
1885 10008 : switch (static_cast<UNumberFormatFields>(field_id)) {
1886 : case UNUM_INTEGER_FIELD:
1887 5193 : if (numeric_obj->IsBigInt()) {
1888 : // Neither NaN nor Infinite could be stored into BigInt
1889 : // so just return integer.
1890 : return isolate->factory()->integer_string();
1891 : } else {
1892 : double number = numeric_obj->Number();
1893 3006 : if (std::isfinite(number)) return isolate->factory()->integer_string();
1894 27 : if (std::isnan(number)) return isolate->factory()->nan_string();
1895 : return isolate->factory()->infinity_string();
1896 : }
1897 : case UNUM_FRACTION_FIELD:
1898 : return isolate->factory()->fraction_string();
1899 : case UNUM_DECIMAL_SEPARATOR_FIELD:
1900 : return isolate->factory()->decimal_string();
1901 : case UNUM_GROUPING_SEPARATOR_FIELD:
1902 : return isolate->factory()->group_string();
1903 : case UNUM_CURRENCY_FIELD:
1904 : return isolate->factory()->currency_string();
1905 : case UNUM_PERCENT_FIELD:
1906 : return isolate->factory()->percentSign_string();
1907 : case UNUM_SIGN_FIELD:
1908 324 : if (numeric_obj->IsBigInt()) {
1909 : Handle<BigInt> big_int = Handle<BigInt>::cast(numeric_obj);
1910 : return big_int->IsNegative() ? isolate->factory()->minusSign_string()
1911 108 : : isolate->factory()->plusSign_string();
1912 : } else {
1913 : double number = numeric_obj->Number();
1914 : return number < 0 ? isolate->factory()->minusSign_string()
1915 216 : : isolate->factory()->plusSign_string();
1916 : }
1917 : case UNUM_EXPONENT_SYMBOL_FIELD:
1918 : case UNUM_EXPONENT_SIGN_FIELD:
1919 : case UNUM_EXPONENT_FIELD:
1920 : // We should never get these because we're not using any scientific
1921 : // formatter.
1922 0 : UNREACHABLE();
1923 : return Handle<String>();
1924 :
1925 : case UNUM_PERMILL_FIELD:
1926 : // We're not creating any permill formatter, and it's not even clear how
1927 : // that would be possible with the ICU API.
1928 0 : UNREACHABLE();
1929 : return Handle<String>();
1930 :
1931 : default:
1932 0 : UNREACHABLE();
1933 : return Handle<String>();
1934 : }
1935 : }
1936 :
1937 : } // namespace internal
1938 121996 : } // namespace v8
|