Line data Source code
1 : // Copyright 2016 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/builtins/builtins-utils-inl.h"
6 : #include "src/builtins/builtins.h"
7 : #include "src/conversions.h"
8 : #include "src/counters.h"
9 : #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
10 : #include "src/objects-inl.h"
11 : #ifdef V8_INTL_SUPPORT
12 : #include "src/objects/intl-objects.h"
13 : #endif
14 : #include "src/regexp/regexp-utils.h"
15 : #include "src/string-builder-inl.h"
16 : #include "src/string-case.h"
17 : #include "src/unicode-inl.h"
18 : #include "src/unicode.h"
19 :
20 : namespace v8 {
21 : namespace internal {
22 :
23 : namespace { // for String.fromCodePoint
24 :
25 3609398 : bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
26 3609398 : if (!value->IsNumber() &&
27 0 : !Object::ToNumber(isolate, value).ToHandle(&value)) {
28 : return false;
29 : }
30 :
31 3609398 : if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
32 : value->Number()) {
33 : return false;
34 : }
35 :
36 7218626 : if (value->Number() < 0 || value->Number() > 0x10FFFF) {
37 : return false;
38 : }
39 :
40 3609255 : return true;
41 : }
42 :
43 3609416 : uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
44 3609416 : Handle<Object> value = args.at(1 + index);
45 7218832 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, value,
46 : Object::ToNumber(isolate, value), -1);
47 3609398 : if (!IsValidCodePoint(isolate, value)) {
48 286 : isolate->Throw(*isolate->factory()->NewRangeError(
49 286 : MessageTemplate::kInvalidCodePoint, value));
50 : return -1;
51 : }
52 : return DoubleToUint32(value->Number());
53 : }
54 :
55 : } // namespace
56 :
57 : // ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
58 13622935 : BUILTIN(StringFromCodePoint) {
59 : HandleScope scope(isolate);
60 2724587 : int const length = args.length() - 1;
61 2724624 : if (length == 0) return ReadOnlyRoots(isolate).empty_string();
62 : DCHECK_LT(0, length);
63 :
64 : // Optimistically assume that the resulting String contains only one byte
65 : // characters.
66 : std::vector<uint8_t> one_byte_buffer;
67 2724550 : one_byte_buffer.reserve(length);
68 : uc32 code = 0;
69 : int index;
70 3614432 : for (index = 0; index < length; index++) {
71 3166983 : code = NextCodePoint(isolate, args, index);
72 3166983 : if (code < 0) {
73 161 : return ReadOnlyRoots(isolate).exception();
74 : }
75 3166822 : if (code > String::kMaxOneByteCharCode) {
76 : break;
77 : }
78 889882 : one_byte_buffer.push_back(code);
79 : }
80 :
81 2724389 : if (index == length) {
82 5016 : RETURN_RESULT_OR_FAILURE(
83 : isolate, isolate->factory()->NewStringFromOneByte(Vector<uint8_t>(
84 : one_byte_buffer.data(), one_byte_buffer.size())));
85 : }
86 :
87 : std::vector<uc16> two_byte_buffer;
88 2721881 : two_byte_buffer.reserve(length - index);
89 :
90 : while (true) {
91 3164314 : if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
92 357136 : two_byte_buffer.push_back(code);
93 : } else {
94 8957238 : two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code));
95 5971492 : two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code));
96 : }
97 :
98 3164314 : if (++index == length) {
99 : break;
100 : }
101 442433 : code = NextCodePoint(isolate, args, index);
102 442433 : if (code < 0) {
103 0 : return ReadOnlyRoots(isolate).exception();
104 : }
105 : }
106 :
107 : Handle<SeqTwoByteString> result;
108 8165643 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
109 : isolate, result,
110 : isolate->factory()->NewRawTwoByteString(
111 : static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size())));
112 :
113 : DisallowHeapAllocation no_gc;
114 : CopyChars(result->GetChars(no_gc), one_byte_buffer.data(),
115 : one_byte_buffer.size());
116 2721881 : CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(),
117 : two_byte_buffer.data(), two_byte_buffer.size());
118 :
119 2721881 : return *result;
120 : }
121 :
122 : // ES6 section 21.1.3.9
123 : // String.prototype.lastIndexOf ( searchString [ , position ] )
124 8155 : BUILTIN(StringPrototypeLastIndexOf) {
125 : HandleScope handle_scope(isolate);
126 : return String::LastIndexOf(isolate, args.receiver(),
127 : args.atOrUndefined(isolate, 1),
128 3262 : args.atOrUndefined(isolate, 2));
129 : }
130 :
131 : // ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
132 : //
133 : // This function is implementation specific. For now, we do not
134 : // do anything locale specific.
135 344795 : BUILTIN(StringPrototypeLocaleCompare) {
136 : HandleScope handle_scope(isolate);
137 :
138 68959 : isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare);
139 :
140 : #ifdef V8_INTL_SUPPORT
141 138260 : TO_THIS_STRING(str1, "String.prototype.localeCompare");
142 : Handle<String> str2;
143 137576 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
144 : isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, 1)));
145 137576 : RETURN_RESULT_OR_FAILURE(
146 : isolate, Intl::StringLocaleCompare(isolate, str1, str2,
147 : args.atOrUndefined(isolate, 2),
148 : args.atOrUndefined(isolate, 3)));
149 : #else
150 : DCHECK_EQ(2, args.length());
151 :
152 : TO_THIS_STRING(str1, "String.prototype.localeCompare");
153 : Handle<String> str2;
154 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
155 : Object::ToString(isolate, args.at(1)));
156 :
157 : if (str1.is_identical_to(str2)) return Smi::kZero; // Equal.
158 : int str1_length = str1->length();
159 : int str2_length = str2->length();
160 :
161 : // Decide trivial cases without flattening.
162 : if (str1_length == 0) {
163 : if (str2_length == 0) return Smi::kZero; // Equal.
164 : return Smi::FromInt(-str2_length);
165 : } else {
166 : if (str2_length == 0) return Smi::FromInt(str1_length);
167 : }
168 :
169 : int end = str1_length < str2_length ? str1_length : str2_length;
170 :
171 : // No need to flatten if we are going to find the answer on the first
172 : // character. At this point we know there is at least one character
173 : // in each string, due to the trivial case handling above.
174 : int d = str1->Get(0) - str2->Get(0);
175 : if (d != 0) return Smi::FromInt(d);
176 :
177 : str1 = String::Flatten(isolate, str1);
178 : str2 = String::Flatten(isolate, str2);
179 :
180 : DisallowHeapAllocation no_gc;
181 : String::FlatContent flat1 = str1->GetFlatContent(no_gc);
182 : String::FlatContent flat2 = str2->GetFlatContent(no_gc);
183 :
184 : for (int i = 0; i < end; i++) {
185 : if (flat1.Get(i) != flat2.Get(i)) {
186 : return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
187 : }
188 : }
189 :
190 : return Smi::FromInt(str1_length - str2_length);
191 : #endif // !V8_INTL_SUPPORT
192 : }
193 :
194 : #ifndef V8_INTL_SUPPORT
195 : // ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
196 : //
197 : // Simply checks the argument is valid and returns the string itself.
198 : // If internationalization is enabled, then intl.js will override this function
199 : // and provide the proper functionality, so this is just a fallback.
200 : BUILTIN(StringPrototypeNormalize) {
201 : HandleScope handle_scope(isolate);
202 : TO_THIS_STRING(string, "String.prototype.normalize");
203 :
204 : Handle<Object> form_input = args.atOrUndefined(isolate, 1);
205 : if (form_input->IsUndefined(isolate)) return *string;
206 :
207 : Handle<String> form;
208 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
209 : Object::ToString(isolate, form_input));
210 :
211 : if (!(String::Equals(isolate, form,
212 : isolate->factory()->NewStringFromStaticChars("NFC")) ||
213 : String::Equals(isolate, form,
214 : isolate->factory()->NewStringFromStaticChars("NFD")) ||
215 : String::Equals(isolate, form,
216 : isolate->factory()->NewStringFromStaticChars("NFKC")) ||
217 : String::Equals(isolate, form,
218 : isolate->factory()->NewStringFromStaticChars("NFKD")))) {
219 : Handle<String> valid_forms =
220 : isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
221 : THROW_NEW_ERROR_RETURN_FAILURE(
222 : isolate,
223 : NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
224 : }
225 :
226 : return *string;
227 : }
228 : #endif // !V8_INTL_SUPPORT
229 :
230 :
231 : #ifndef V8_INTL_SUPPORT
232 : namespace {
233 :
234 : inline bool ToUpperOverflows(uc32 character) {
235 : // y with umlauts and the micro sign are the only characters that stop
236 : // fitting into one-byte when converting to uppercase.
237 : static const uc32 yuml_code = 0xFF;
238 : static const uc32 micro_code = 0xB5;
239 : return (character == yuml_code || character == micro_code);
240 : }
241 :
242 : template <class Converter>
243 : V8_WARN_UNUSED_RESULT static Object ConvertCaseHelper(
244 : Isolate* isolate, String string, SeqString result, int result_length,
245 : unibrow::Mapping<Converter, 128>* mapping) {
246 : DisallowHeapAllocation no_gc;
247 : // We try this twice, once with the assumption that the result is no longer
248 : // than the input and, if that assumption breaks, again with the exact
249 : // length. This may not be pretty, but it is nicer than what was here before
250 : // and I hereby claim my vaffel-is.
251 : //
252 : // NOTE: This assumes that the upper/lower case of an ASCII
253 : // character is also ASCII. This is currently the case, but it
254 : // might break in the future if we implement more context and locale
255 : // dependent upper/lower conversions.
256 : bool has_changed_character = false;
257 :
258 : // Convert all characters to upper case, assuming that they will fit
259 : // in the buffer
260 : StringCharacterStream stream(string);
261 : unibrow::uchar chars[Converter::kMaxWidth];
262 : // We can assume that the string is not empty
263 : uc32 current = stream.GetNext();
264 : bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
265 : for (int i = 0; i < result_length;) {
266 : bool has_next = stream.HasMore();
267 : uc32 next = has_next ? stream.GetNext() : 0;
268 : int char_length = mapping->get(current, next, chars);
269 : if (char_length == 0) {
270 : // The case conversion of this character is the character itself.
271 : result->Set(i, current);
272 : i++;
273 : } else if (char_length == 1 &&
274 : (ignore_overflow || !ToUpperOverflows(current))) {
275 : // Common case: converting the letter resulted in one character.
276 : DCHECK(static_cast<uc32>(chars[0]) != current);
277 : result->Set(i, chars[0]);
278 : has_changed_character = true;
279 : i++;
280 : } else if (result_length == string->length()) {
281 : bool overflows = ToUpperOverflows(current);
282 : // We've assumed that the result would be as long as the
283 : // input but here is a character that converts to several
284 : // characters. No matter, we calculate the exact length
285 : // of the result and try the whole thing again.
286 : //
287 : // Note that this leaves room for optimization. We could just
288 : // memcpy what we already have to the result string. Also,
289 : // the result string is the last object allocated we could
290 : // "realloc" it and probably, in the vast majority of cases,
291 : // extend the existing string to be able to hold the full
292 : // result.
293 : int next_length = 0;
294 : if (has_next) {
295 : next_length = mapping->get(next, 0, chars);
296 : if (next_length == 0) next_length = 1;
297 : }
298 : int current_length = i + char_length + next_length;
299 : while (stream.HasMore()) {
300 : current = stream.GetNext();
301 : overflows |= ToUpperOverflows(current);
302 : // NOTE: we use 0 as the next character here because, while
303 : // the next character may affect what a character converts to,
304 : // it does not in any case affect the length of what it convert
305 : // to.
306 : int char_length = mapping->get(current, 0, chars);
307 : if (char_length == 0) char_length = 1;
308 : current_length += char_length;
309 : if (current_length > String::kMaxLength) {
310 : AllowHeapAllocation allocate_error_and_return;
311 : THROW_NEW_ERROR_RETURN_FAILURE(isolate,
312 : NewInvalidStringLengthError());
313 : }
314 : }
315 : // Try again with the real length. Return signed if we need
316 : // to allocate a two-byte string for to uppercase.
317 : return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
318 : : Smi::FromInt(current_length);
319 : } else {
320 : for (int j = 0; j < char_length; j++) {
321 : result->Set(i, chars[j]);
322 : i++;
323 : }
324 : has_changed_character = true;
325 : }
326 : current = next;
327 : }
328 : if (has_changed_character) {
329 : return result;
330 : } else {
331 : // If we didn't actually change anything in doing the conversion
332 : // we simple return the result and let the converted string
333 : // become garbage; there is no reason to keep two identical strings
334 : // alive.
335 : return string;
336 : }
337 : }
338 :
339 : template <class Converter>
340 : V8_WARN_UNUSED_RESULT static Object ConvertCase(
341 : Handle<String> s, Isolate* isolate,
342 : unibrow::Mapping<Converter, 128>* mapping) {
343 : s = String::Flatten(isolate, s);
344 : int length = s->length();
345 : // Assume that the string is not empty; we need this assumption later
346 : if (length == 0) return *s;
347 :
348 : // Simpler handling of ASCII strings.
349 : //
350 : // NOTE: This assumes that the upper/lower case of an ASCII
351 : // character is also ASCII. This is currently the case, but it
352 : // might break in the future if we implement more context and locale
353 : // dependent upper/lower conversions.
354 : if (String::IsOneByteRepresentationUnderneath(*s)) {
355 : // Same length as input.
356 : Handle<SeqOneByteString> result =
357 : isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
358 : DisallowHeapAllocation no_gc;
359 : String::FlatContent flat_content = s->GetFlatContent(no_gc);
360 : DCHECK(flat_content.IsFlat());
361 : bool has_changed_character = false;
362 : int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
363 : reinterpret_cast<char*>(result->GetChars(no_gc)),
364 : reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
365 : length, &has_changed_character);
366 : // If not ASCII, we discard the result and take the 2 byte path.
367 : if (index_to_first_unprocessed == length)
368 : return has_changed_character ? *result : *s;
369 : }
370 :
371 : Handle<SeqString> result; // Same length as input.
372 : if (s->IsOneByteRepresentation()) {
373 : result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
374 : } else {
375 : result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
376 : }
377 :
378 : Object answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
379 : if (answer->IsException(isolate) || answer->IsString()) return answer;
380 :
381 : DCHECK(answer->IsSmi());
382 : length = Smi::ToInt(answer);
383 : if (s->IsOneByteRepresentation() && length > 0) {
384 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
385 : isolate, result, isolate->factory()->NewRawOneByteString(length));
386 : } else {
387 : if (length < 0) length = -length;
388 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
389 : isolate, result, isolate->factory()->NewRawTwoByteString(length));
390 : }
391 : return ConvertCaseHelper(isolate, *s, *result, length, mapping);
392 : }
393 :
394 : } // namespace
395 :
396 : BUILTIN(StringPrototypeToLocaleLowerCase) {
397 : HandleScope scope(isolate);
398 : TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
399 : return ConvertCase(string, isolate,
400 : isolate->runtime_state()->to_lower_mapping());
401 : }
402 :
403 : BUILTIN(StringPrototypeToLocaleUpperCase) {
404 : HandleScope scope(isolate);
405 : TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
406 : return ConvertCase(string, isolate,
407 : isolate->runtime_state()->to_upper_mapping());
408 : }
409 :
410 : BUILTIN(StringPrototypeToLowerCase) {
411 : HandleScope scope(isolate);
412 : TO_THIS_STRING(string, "String.prototype.toLowerCase");
413 : return ConvertCase(string, isolate,
414 : isolate->runtime_state()->to_lower_mapping());
415 : }
416 :
417 : BUILTIN(StringPrototypeToUpperCase) {
418 : HandleScope scope(isolate);
419 : TO_THIS_STRING(string, "String.prototype.toUpperCase");
420 : return ConvertCase(string, isolate,
421 : isolate->runtime_state()->to_upper_mapping());
422 : }
423 : #endif // !V8_INTL_SUPPORT
424 :
425 : // ES6 #sec-string.prototype.raw
426 2080 : BUILTIN(StringRaw) {
427 : HandleScope scope(isolate);
428 416 : Handle<Object> templ = args.atOrUndefined(isolate, 1);
429 416 : const uint32_t argc = args.length();
430 : Handle<String> raw_string =
431 416 : isolate->factory()->NewStringFromAsciiChecked("raw");
432 :
433 : Handle<Object> cooked;
434 841 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked,
435 : Object::ToObject(isolate, templ));
436 :
437 : Handle<Object> raw;
438 814 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
439 : isolate, raw, Object::GetProperty(isolate, cooked, raw_string));
440 823 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
441 : Object::ToObject(isolate, raw));
442 : Handle<Object> raw_len;
443 796 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
444 : isolate, raw_len,
445 : Object::GetProperty(isolate, raw, isolate->factory()->length_string()));
446 :
447 796 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len,
448 : Object::ToLength(isolate, raw_len));
449 :
450 398 : IncrementalStringBuilder result_builder(isolate);
451 : // Intentional spec violation: we ignore {length} values >= 2^32, because
452 : // assuming non-empty chunks they would generate too-long strings anyway.
453 : const double raw_len_number = raw_len->Number();
454 : const uint32_t length = raw_len_number > std::numeric_limits<uint32_t>::max()
455 : ? std::numeric_limits<uint32_t>::max()
456 398 : : static_cast<uint32_t>(raw_len_number);
457 398 : if (length > 0) {
458 : Handle<Object> first_element;
459 553 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element,
460 : Object::GetElement(isolate, raw, 0));
461 :
462 : Handle<String> first_string;
463 553 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
464 : isolate, first_string, Object::ToString(isolate, first_element));
465 236 : result_builder.AppendString(first_string);
466 :
467 1008 : for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) {
468 413 : if (arg_i < argc) {
469 : Handle<String> argument_string;
470 861 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
471 : isolate, argument_string,
472 : Object::ToString(isolate, args.at(arg_i)));
473 251 : result_builder.AppendString(argument_string);
474 : }
475 :
476 : Handle<Object> element;
477 772 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element,
478 : Object::GetElement(isolate, raw, i));
479 :
480 : Handle<String> element_string;
481 772 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string,
482 : Object::ToString(isolate, element));
483 386 : result_builder.AppendString(element_string);
484 : }
485 : }
486 :
487 670 : RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish());
488 : }
489 :
490 : } // namespace internal
491 120216 : } // namespace v8
|