Line data Source code
1 : // Copyright 2017 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_INTL_SUPPORT
6 : #error Internationalization is expected to be enabled.
7 : #endif // V8_INTL_SUPPORT
8 :
9 : #include "src/builtins/builtins-intl.h"
10 : #include "src/builtins/builtins-utils.h"
11 : #include "src/builtins/builtins.h"
12 : #include "src/intl.h"
13 : #include "src/objects-inl.h"
14 : #include "src/objects/intl-objects.h"
15 :
16 : #include "unicode/decimfmt.h"
17 : #include "unicode/fieldpos.h"
18 : #include "unicode/fpositer.h"
19 : #include "unicode/normalizer2.h"
20 : #include "unicode/numfmt.h"
21 : #include "unicode/ufieldpositer.h"
22 : #include "unicode/unistr.h"
23 : #include "unicode/ustring.h"
24 :
25 : namespace v8 {
26 : namespace internal {
27 :
28 15189 : BUILTIN(StringPrototypeToUpperCaseIntl) {
29 : HandleScope scope(isolate);
30 10486 : TO_THIS_STRING(string, "String.prototype.toUpperCase");
31 4883 : string = String::Flatten(string);
32 4883 : return ConvertCase(string, true, isolate);
33 : }
34 :
35 6975 : BUILTIN(StringPrototypeNormalizeIntl) {
36 : HandleScope handle_scope(isolate);
37 4650 : TO_THIS_STRING(string, "String.prototype.normalize");
38 :
39 : Handle<Object> form_input = args.atOrUndefined(isolate, 1);
40 : const char* form_name;
41 : UNormalization2Mode form_mode;
42 2325 : if (form_input->IsUndefined(isolate)) {
43 : // default is FNC
44 : form_name = "nfc";
45 : form_mode = UNORM2_COMPOSE;
46 : } else {
47 : Handle<String> form;
48 3908 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
49 : Object::ToString(isolate, form_input));
50 :
51 1900 : if (String::Equals(form, isolate->factory()->NFC_string())) {
52 : form_name = "nfc";
53 : form_mode = UNORM2_COMPOSE;
54 1485 : } else if (String::Equals(form, isolate->factory()->NFD_string())) {
55 : form_name = "nfc";
56 : form_mode = UNORM2_DECOMPOSE;
57 1080 : } else if (String::Equals(form, isolate->factory()->NFKC_string())) {
58 : form_name = "nfkc";
59 : form_mode = UNORM2_COMPOSE;
60 666 : } else if (String::Equals(form, isolate->factory()->NFKD_string())) {
61 : form_name = "nfkc";
62 : form_mode = UNORM2_DECOMPOSE;
63 : } else {
64 : Handle<String> valid_forms =
65 108 : isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
66 216 : THROW_NEW_ERROR_RETURN_FAILURE(
67 : isolate,
68 : NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
69 : }
70 : }
71 :
72 : int length = string->length();
73 2217 : string = String::Flatten(string);
74 2217 : icu::UnicodeString result;
75 2217 : std::unique_ptr<uc16[]> sap;
76 2217 : UErrorCode status = U_ZERO_ERROR;
77 : {
78 : DisallowHeapAllocation no_gc;
79 2217 : String::FlatContent flat = string->GetFlatContent();
80 2217 : const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
81 4434 : icu::UnicodeString input(false, src, length);
82 : // Getting a singleton. Should not free it.
83 : const icu::Normalizer2* normalizer =
84 2217 : icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
85 : DCHECK(U_SUCCESS(status));
86 2217 : CHECK_NOT_NULL(normalizer);
87 : int32_t normalized_prefix_length =
88 2217 : normalizer->spanQuickCheckYes(input, status);
89 : // Quick return if the input is already normalized.
90 3471 : if (length == normalized_prefix_length) return *string;
91 : icu::UnicodeString unnormalized =
92 1926 : input.tempSubString(normalized_prefix_length);
93 : // Read-only alias of the normalized prefix.
94 1926 : result.setTo(false, input.getBuffer(), normalized_prefix_length);
95 : // copy-on-write; normalize the suffix and append to |result|.
96 1926 : normalizer->normalizeSecondAndAppend(result, unnormalized, status);
97 : }
98 :
99 963 : if (U_FAILURE(status)) {
100 0 : return isolate->heap()->undefined_value();
101 : }
102 :
103 3852 : RETURN_RESULT_OR_FAILURE(
104 : isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
105 : reinterpret_cast<const uint16_t*>(result.getBuffer()),
106 : result.length())));
107 : }
108 :
109 : namespace {
110 :
111 : // The list comes from third_party/icu/source/i18n/unicode/unum.h.
112 : // They're mapped to NumberFormat part types mentioned throughout
113 : // https://tc39.github.io/ecma402/#sec-partitionnumberpattern .
114 1017 : Handle<String> IcuNumberFieldIdToNumberType(int32_t field_id, double number,
115 : Isolate* isolate) {
116 1017 : switch (static_cast<UNumberFormatFields>(field_id)) {
117 : case UNUM_INTEGER_FIELD:
118 396 : if (std::isfinite(number)) return isolate->factory()->integer_string();
119 27 : if (std::isnan(number)) return isolate->factory()->nan_string();
120 : return isolate->factory()->infinity_string();
121 : case UNUM_FRACTION_FIELD:
122 : return isolate->factory()->fraction_string();
123 : case UNUM_DECIMAL_SEPARATOR_FIELD:
124 : return isolate->factory()->decimal_string();
125 : case UNUM_GROUPING_SEPARATOR_FIELD:
126 : return isolate->factory()->group_string();
127 : case UNUM_CURRENCY_FIELD:
128 : return isolate->factory()->currency_string();
129 : case UNUM_PERCENT_FIELD:
130 : return isolate->factory()->percentSign_string();
131 : case UNUM_SIGN_FIELD:
132 : return number < 0 ? isolate->factory()->minusSign_string()
133 108 : : isolate->factory()->plusSign_string();
134 :
135 : case UNUM_EXPONENT_SYMBOL_FIELD:
136 : case UNUM_EXPONENT_SIGN_FIELD:
137 : case UNUM_EXPONENT_FIELD:
138 : // We should never get these because we're not using any scientific
139 : // formatter.
140 0 : UNREACHABLE();
141 : return Handle<String>();
142 :
143 : case UNUM_PERMILL_FIELD:
144 : // We're not creating any permill formatter, and it's not even clear how
145 : // that would be possible with the ICU API.
146 0 : UNREACHABLE();
147 : return Handle<String>();
148 :
149 : default:
150 0 : UNREACHABLE();
151 : return Handle<String>();
152 : }
153 : }
154 :
155 1062 : bool AddElement(Handle<JSArray> array, int index,
156 : Handle<String> field_type_string,
157 : const icu::UnicodeString& formatted, int32_t begin, int32_t end,
158 : Isolate* isolate) {
159 : HandleScope scope(isolate);
160 : Factory* factory = isolate->factory();
161 1062 : Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
162 : Handle<String> value;
163 : JSObject::AddProperty(element, factory->type_string(), field_type_string,
164 1062 : NONE);
165 :
166 1062 : icu::UnicodeString field(formatted.tempSubStringBetween(begin, end));
167 3186 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
168 : isolate, value,
169 : factory->NewStringFromTwoByte(Vector<const uint16_t>(
170 : reinterpret_cast<const uint16_t*>(field.getBuffer()),
171 : field.length())),
172 : false);
173 :
174 1062 : JSObject::AddProperty(element, factory->value_string(), value, NONE);
175 3186 : RETURN_ON_EXCEPTION_VALUE(
176 : isolate, JSObject::AddDataElement(array, index, element, NONE), false);
177 1062 : return true;
178 : }
179 :
180 2292 : bool cmp_NumberFormatSpan(const NumberFormatSpan& a,
181 : const NumberFormatSpan& b) {
182 : // Regions that start earlier should be encountered earlier.
183 2292 : if (a.begin_pos < b.begin_pos) return true;
184 2112 : if (a.begin_pos > b.begin_pos) return false;
185 : // For regions that start in the same place, regions that last longer should
186 : // be encountered earlier.
187 708 : if (a.end_pos < b.end_pos) return false;
188 204 : if (a.end_pos > b.end_pos) return true;
189 : // For regions that are exactly the same, one of them must be the "literal"
190 : // backdrop we added, which has a field_id of -1, so consider higher field_ids
191 : // to be later.
192 144 : return a.field_id < b.field_id;
193 : }
194 :
195 270 : Object* FormatNumberToParts(Isolate* isolate, icu::NumberFormat* fmt,
196 : double number) {
197 : Factory* factory = isolate->factory();
198 :
199 : icu::UnicodeString formatted;
200 540 : icu::FieldPositionIterator fp_iter;
201 270 : UErrorCode status = U_ZERO_ERROR;
202 270 : fmt->format(number, formatted, &fp_iter, status);
203 270 : if (U_FAILURE(status)) return isolate->heap()->undefined_value();
204 :
205 270 : Handle<JSArray> result = factory->NewJSArray(0);
206 : int32_t length = formatted.length();
207 270 : if (length == 0) return *result;
208 :
209 : std::vector<NumberFormatSpan> regions;
210 : // Add a "literal" backdrop for the entire string. This will be used if no
211 : // other region covers some part of the formatted string. It's possible
212 : // there's another field with exactly the same begin and end as this backdrop,
213 : // in which case the backdrop's field_id of -1 will give it lower priority.
214 270 : regions.push_back(NumberFormatSpan(-1, 0, formatted.length()));
215 :
216 : {
217 : icu::FieldPosition fp;
218 1161 : while (fp_iter.next(fp)) {
219 : regions.push_back(NumberFormatSpan(fp.getField(), fp.getBeginIndex(),
220 1782 : fp.getEndIndex()));
221 270 : }
222 : }
223 :
224 270 : std::vector<NumberFormatSpan> parts = FlattenRegionsToParts(®ions);
225 :
226 : int index = 0;
227 1602 : for (auto it = parts.begin(); it < parts.end(); it++) {
228 1062 : NumberFormatSpan part = *it;
229 : Handle<String> field_type_string =
230 : part.field_id == -1
231 : ? isolate->factory()->literal_string()
232 1107 : : IcuNumberFieldIdToNumberType(part.field_id, number, isolate);
233 1062 : if (!AddElement(result, index, field_type_string, formatted, part.begin_pos,
234 1062 : part.end_pos, isolate)) {
235 0 : return isolate->heap()->undefined_value();
236 : }
237 1062 : ++index;
238 : }
239 270 : JSObject::ValidateElements(*result);
240 :
241 540 : return *result;
242 : }
243 : } // namespace
244 :
245 : // Flattens a list of possibly-overlapping "regions" to a list of
246 : // non-overlapping "parts". At least one of the input regions must span the
247 : // entire space of possible indexes. The regions parameter will sorted in-place
248 : // according to some criteria; this is done for performance to avoid copying the
249 : // input.
250 306 : std::vector<NumberFormatSpan> FlattenRegionsToParts(
251 2670 : std::vector<NumberFormatSpan>* regions) {
252 : // The intention of this algorithm is that it's used to translate ICU "fields"
253 : // to JavaScript "parts" of a formatted string. Each ICU field and JavaScript
254 : // part has an integer field_id, which corresponds to something like "grouping
255 : // separator", "fraction", or "percent sign", and has a begin and end
256 : // position. Here's a diagram of:
257 :
258 : // var nf = new Intl.NumberFormat(['de'], {style:'currency',currency:'EUR'});
259 : // nf.formatToParts(123456.78);
260 :
261 : // : 6
262 : // input regions: 0000000211 7
263 : // ('-' means -1): ------------
264 : // formatted string: "123.456,78 €"
265 : // output parts: 0006000211-7
266 :
267 : // To illustrate the requirements of this algorithm, here's a contrived and
268 : // convoluted example of inputs and expected outputs:
269 :
270 : // : 4
271 : // : 22 33 3
272 : // : 11111 22
273 : // input regions: 0000000 111
274 : // : ------------
275 : // formatted string: "abcdefghijkl"
276 : // output parts: 0221340--231
277 : // (The characters in the formatted string are irrelevant to this function.)
278 :
279 : // We arrange the overlapping input regions like a mountain range where
280 : // smaller regions are "on top" of larger regions, and we output a birds-eye
281 : // view of the mountains, so that smaller regions take priority over larger
282 : // regions.
283 306 : std::sort(regions->begin(), regions->end(), cmp_NumberFormatSpan);
284 : std::vector<size_t> overlapping_region_index_stack;
285 : // At least one item in regions must be a region spanning the entire string.
286 : // Due to the sorting above, the first item in the vector will be one of them.
287 612 : overlapping_region_index_stack.push_back(0);
288 306 : NumberFormatSpan top_region = regions->at(0);
289 : size_t region_iterator = 1;
290 : int32_t entire_size = top_region.end_pos;
291 :
292 : std::vector<NumberFormatSpan> out_parts;
293 :
294 : // The "climber" is a cursor that advances from left to right climbing "up"
295 : // and "down" the mountains. Whenever the climber moves to the right, that
296 : // represents an item of output.
297 : int32_t climber = 0;
298 1947 : while (climber < entire_size) {
299 : int32_t next_region_begin_pos;
300 1335 : if (region_iterator < regions->size()) {
301 1029 : next_region_begin_pos = regions->at(region_iterator).begin_pos;
302 : } else {
303 : // finish off the rest of the input by proceeding to the end.
304 : next_region_begin_pos = entire_size;
305 : }
306 :
307 1335 : if (climber < next_region_begin_pos) {
308 1308 : while (top_region.end_pos < next_region_begin_pos) {
309 309 : if (climber < top_region.end_pos) {
310 : // step down
311 : out_parts.push_back(NumberFormatSpan(top_region.field_id, climber,
312 231 : top_region.end_pos));
313 : climber = top_region.end_pos;
314 : } else {
315 : // drop down
316 : }
317 : overlapping_region_index_stack.pop_back();
318 618 : top_region = regions->at(overlapping_region_index_stack.back());
319 : }
320 999 : if (climber < next_region_begin_pos) {
321 : // cross a plateau/mesa/valley
322 : out_parts.push_back(NumberFormatSpan(top_region.field_id, climber,
323 999 : next_region_begin_pos));
324 : climber = next_region_begin_pos;
325 : }
326 : }
327 1335 : if (region_iterator < regions->size()) {
328 2058 : overlapping_region_index_stack.push_back(region_iterator++);
329 2058 : top_region = regions->at(overlapping_region_index_stack.back());
330 : }
331 : }
332 306 : return out_parts;
333 : }
334 :
335 810 : BUILTIN(NumberFormatPrototypeFormatToParts) {
336 : const char* const method = "Intl.NumberFormat.prototype.formatToParts";
337 : HandleScope handle_scope(isolate);
338 270 : CHECK_RECEIVER(JSObject, number_format_holder, method);
339 :
340 : Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
341 : Handle<Object> tag =
342 270 : JSReceiver::GetDataProperty(number_format_holder, marker);
343 : Handle<String> expected_tag =
344 270 : isolate->factory()->NewStringFromStaticChars("numberformat");
345 540 : if (!(tag->IsString() && String::cast(*tag)->Equals(*expected_tag))) {
346 0 : THROW_NEW_ERROR_RETURN_FAILURE(
347 : isolate,
348 : NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
349 : isolate->factory()->NewStringFromAsciiChecked(method),
350 : number_format_holder));
351 : }
352 :
353 : Handle<Object> x;
354 270 : if (args.length() >= 2) {
355 540 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, x,
356 : Object::ToNumber(args.at(1)));
357 : } else {
358 : x = isolate->factory()->nan_value();
359 : }
360 :
361 : icu::DecimalFormat* number_format =
362 270 : NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
363 270 : CHECK_NOT_NULL(number_format);
364 :
365 270 : Object* result = FormatNumberToParts(isolate, number_format, x->Number());
366 270 : return result;
367 : }
368 :
369 : } // namespace internal
370 : } // namespace v8
|