Line data Source code
1 : // Copyright 2019 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/objects/string.h"
6 :
7 : #include "src/char-predicates.h"
8 : #include "src/conversions.h"
9 : #include "src/handles-inl.h"
10 : #include "src/heap/heap-inl.h" // For LooksValid implementation.
11 : #include "src/objects/map.h"
12 : #include "src/objects/oddball.h"
13 : #include "src/objects/string-comparator.h"
14 : #include "src/objects/string-inl.h"
15 : #include "src/ostreams.h"
16 : #include "src/string-builder-inl.h"
17 : #include "src/string-hasher.h"
18 : #include "src/string-search.h"
19 : #include "src/string-stream.h"
20 : #include "src/unicode-inl.h"
21 :
22 : namespace v8 {
23 : namespace internal {
24 :
25 6031910 : Handle<String> String::SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
26 : AllocationType allocation) {
27 : DCHECK_NE(cons->second()->length(), 0);
28 :
29 : // TurboFan can create cons strings with empty first parts.
30 6031910 : while (cons->first()->length() == 0) {
31 : // We do not want to call this function recursively. Therefore we call
32 : // String::Flatten only in those cases where String::SlowFlatten is not
33 : // called again.
34 21 : if (cons->second()->IsConsString() && !cons->second()->IsFlat()) {
35 : cons = handle(ConsString::cast(cons->second()), isolate);
36 : } else {
37 21 : return String::Flatten(isolate, handle(cons->second(), isolate));
38 : }
39 : }
40 :
41 : DCHECK(AllowHeapAllocation::IsAllowed());
42 : int length = cons->length();
43 : allocation =
44 6031889 : ObjectInYoungGeneration(*cons) ? allocation : AllocationType::kOld;
45 : Handle<SeqString> result;
46 6031889 : if (cons->IsOneByteRepresentation()) {
47 : Handle<SeqOneByteString> flat =
48 : isolate->factory()
49 11824364 : ->NewRawOneByteString(length, allocation)
50 : .ToHandleChecked();
51 : DisallowHeapAllocation no_gc;
52 5912183 : WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
53 : result = flat;
54 : } else {
55 : Handle<SeqTwoByteString> flat =
56 : isolate->factory()
57 239416 : ->NewRawTwoByteString(length, allocation)
58 : .ToHandleChecked();
59 : DisallowHeapAllocation no_gc;
60 119708 : WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
61 : result = flat;
62 : }
63 12063782 : cons->set_first(isolate, *result);
64 : cons->set_second(isolate, ReadOnlyRoots(isolate).empty_string());
65 : DCHECK(result->IsFlat());
66 6031891 : return result;
67 : }
68 :
69 311 : bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
70 : DisallowHeapAllocation no_allocation;
71 : // Externalizing twice leaks the external resource, so it's
72 : // prohibited by the API.
73 : DCHECK(this->SupportsExternalization());
74 : DCHECK(resource->IsCacheable());
75 : #ifdef ENABLE_SLOW_DCHECKS
76 : if (FLAG_enable_slow_asserts) {
77 : // Assert that the resource and the string are equivalent.
78 : DCHECK(static_cast<size_t>(this->length()) == resource->length());
79 : ScopedVector<uc16> smart_chars(this->length());
80 : String::WriteToFlat(*this, smart_chars.start(), 0, this->length());
81 : DCHECK_EQ(0, memcmp(smart_chars.start(), resource->data(),
82 : resource->length() * sizeof(smart_chars[0])));
83 : }
84 : #endif // DEBUG
85 311 : int size = this->Size(); // Byte size of the original string.
86 : // Abort if size does not allow in-place conversion.
87 311 : if (size < ExternalString::kUncachedSize) return false;
88 : Isolate* isolate;
89 : // Read-only strings cannot be made external, since that would mutate the
90 : // string.
91 311 : if (!GetIsolateFromWritableObject(*this, &isolate)) return false;
92 : Heap* heap = isolate->heap();
93 : bool is_internalized = this->IsInternalizedString();
94 : bool has_pointers = StringShape(*this).IsIndirect();
95 311 : if (has_pointers) {
96 64 : heap->NotifyObjectLayoutChange(*this, size, no_allocation);
97 : }
98 : // Morph the string to an external string by replacing the map and
99 : // reinitializing the fields. This won't work if the space the existing
100 : // string occupies is too small for a regular external string. Instead, we
101 : // resort to an uncached external string instead, omitting the field caching
102 : // the address of the backing store. When we encounter uncached external
103 : // strings in generated code, we need to bailout to runtime.
104 : Map new_map;
105 : ReadOnlyRoots roots(heap);
106 311 : if (size < ExternalString::kSize) {
107 110 : if (is_internalized) {
108 20 : new_map = roots.uncached_external_internalized_string_map();
109 : } else {
110 90 : new_map = roots.uncached_external_string_map();
111 : }
112 : } else {
113 : new_map = is_internalized ? roots.external_internalized_string_map()
114 402 : : roots.external_string_map();
115 : }
116 :
117 : // Byte size of the external String object.
118 311 : int new_size = this->SizeFromMap(new_map);
119 : heap->CreateFillerObjectAt(this->address() + new_size, size - new_size,
120 622 : ClearRecordedSlots::kNo);
121 311 : if (has_pointers) {
122 64 : heap->ClearRecordedSlotRange(this->address(), this->address() + new_size);
123 : }
124 :
125 : // We are storing the new map using release store after creating a filler for
126 : // the left-over space to avoid races with the sweeper thread.
127 311 : this->synchronized_set_map(new_map);
128 :
129 311 : ExternalTwoByteString self = ExternalTwoByteString::cast(*this);
130 311 : self->SetResource(isolate, resource);
131 : heap->RegisterExternalString(*this);
132 311 : if (is_internalized) self->Hash(); // Force regeneration of the hash value.
133 : return true;
134 : }
135 :
136 320 : bool String::MakeExternal(v8::String::ExternalOneByteStringResource* resource) {
137 : DisallowHeapAllocation no_allocation;
138 : // Externalizing twice leaks the external resource, so it's
139 : // prohibited by the API.
140 : DCHECK(this->SupportsExternalization());
141 : DCHECK(resource->IsCacheable());
142 : #ifdef ENABLE_SLOW_DCHECKS
143 : if (FLAG_enable_slow_asserts) {
144 : // Assert that the resource and the string are equivalent.
145 : DCHECK(static_cast<size_t>(this->length()) == resource->length());
146 : if (this->IsTwoByteRepresentation()) {
147 : ScopedVector<uint16_t> smart_chars(this->length());
148 : String::WriteToFlat(*this, smart_chars.start(), 0, this->length());
149 : DCHECK(String::IsOneByte(smart_chars.start(), this->length()));
150 : }
151 : ScopedVector<char> smart_chars(this->length());
152 : String::WriteToFlat(*this, smart_chars.start(), 0, this->length());
153 : DCHECK_EQ(0, memcmp(smart_chars.start(), resource->data(),
154 : resource->length() * sizeof(smart_chars[0])));
155 : }
156 : #endif // DEBUG
157 320 : int size = this->Size(); // Byte size of the original string.
158 : // Abort if size does not allow in-place conversion.
159 320 : if (size < ExternalString::kUncachedSize) return false;
160 : Isolate* isolate;
161 : // Read-only strings cannot be made external, since that would mutate the
162 : // string.
163 320 : if (!GetIsolateFromWritableObject(*this, &isolate)) return false;
164 : Heap* heap = isolate->heap();
165 : bool is_internalized = this->IsInternalizedString();
166 : bool has_pointers = StringShape(*this).IsIndirect();
167 :
168 320 : if (has_pointers) {
169 29 : heap->NotifyObjectLayoutChange(*this, size, no_allocation);
170 : }
171 :
172 : // Morph the string to an external string by replacing the map and
173 : // reinitializing the fields. This won't work if the space the existing
174 : // string occupies is too small for a regular external string. Instead, we
175 : // resort to an uncached external string instead, omitting the field caching
176 : // the address of the backing store. When we encounter uncached external
177 : // strings in generated code, we need to bailout to runtime.
178 : Map new_map;
179 : ReadOnlyRoots roots(heap);
180 320 : if (size < ExternalString::kSize) {
181 : new_map = is_internalized
182 : ? roots.uncached_external_one_byte_internalized_string_map()
183 38 : : roots.uncached_external_one_byte_string_map();
184 : } else {
185 : new_map = is_internalized
186 : ? roots.external_one_byte_internalized_string_map()
187 602 : : roots.external_one_byte_string_map();
188 : }
189 :
190 : // Byte size of the external String object.
191 320 : int new_size = this->SizeFromMap(new_map);
192 : heap->CreateFillerObjectAt(this->address() + new_size, size - new_size,
193 640 : ClearRecordedSlots::kNo);
194 320 : if (has_pointers) {
195 29 : heap->ClearRecordedSlotRange(this->address(), this->address() + new_size);
196 : }
197 :
198 : // We are storing the new map using release store after creating a filler for
199 : // the left-over space to avoid races with the sweeper thread.
200 320 : this->synchronized_set_map(new_map);
201 :
202 320 : ExternalOneByteString self = ExternalOneByteString::cast(*this);
203 320 : self->SetResource(isolate, resource);
204 : heap->RegisterExternalString(*this);
205 320 : if (is_internalized) self->Hash(); // Force regeneration of the hash value.
206 : return true;
207 : }
208 :
209 1534 : bool String::SupportsExternalization() {
210 1534 : if (this->IsThinString()) {
211 0 : return i::ThinString::cast(*this)->actual()->SupportsExternalization();
212 : }
213 :
214 : Isolate* isolate;
215 : // RO_SPACE strings cannot be externalized.
216 1534 : if (!GetIsolateFromWritableObject(*this, &isolate)) {
217 : return false;
218 : }
219 :
220 : // Already an external string.
221 1516 : if (StringShape(*this).IsExternal()) {
222 : return false;
223 : }
224 :
225 : #ifdef V8_COMPRESS_POINTERS
226 : // Small strings may not be in-place externalizable.
227 : if (this->Size() < ExternalString::kUncachedSize) return false;
228 : #else
229 : DCHECK_LE(ExternalString::kUncachedSize, this->Size());
230 : #endif
231 :
232 1168 : return !isolate->heap()->IsInGCPostProcessing();
233 : }
234 :
235 41378 : void String::StringShortPrint(StringStream* accumulator, bool show_details) {
236 41378 : const char* internalized_marker = this->IsInternalizedString() ? "#" : "";
237 :
238 : int len = length();
239 41378 : if (len > kMaxShortPrintLength) {
240 0 : accumulator->Add("<Very long string[%s%u]>", internalized_marker, len);
241 0 : return;
242 : }
243 :
244 41378 : if (!LooksValid()) {
245 0 : accumulator->Add("<Invalid String>");
246 0 : return;
247 : }
248 :
249 : StringCharacterStream stream(*this);
250 :
251 : bool truncated = false;
252 41378 : if (len > kMaxShortPrintLength) {
253 : len = kMaxShortPrintLength;
254 : truncated = true;
255 : }
256 : bool one_byte = true;
257 705452 : for (int i = 0; i < len; i++) {
258 332037 : uint16_t c = stream.GetNext();
259 :
260 332037 : if (c < 32 || c >= 127) {
261 : one_byte = false;
262 : }
263 : }
264 41378 : stream.Reset(*this);
265 41378 : if (one_byte) {
266 41378 : if (show_details)
267 38961 : accumulator->Add("<String[%s%u]: ", internalized_marker, length());
268 705452 : for (int i = 0; i < len; i++) {
269 332037 : accumulator->Put(static_cast<char>(stream.GetNext()));
270 : }
271 41378 : if (show_details) accumulator->Put('>');
272 : } else {
273 : // Backslash indicates that the string contains control
274 : // characters and that backslashes are therefore escaped.
275 0 : if (show_details)
276 0 : accumulator->Add("<String[%s%u]\\: ", internalized_marker, length());
277 0 : for (int i = 0; i < len; i++) {
278 0 : uint16_t c = stream.GetNext();
279 0 : if (c == '\n') {
280 0 : accumulator->Add("\\n");
281 0 : } else if (c == '\r') {
282 0 : accumulator->Add("\\r");
283 0 : } else if (c == '\\') {
284 0 : accumulator->Add("\\\\");
285 0 : } else if (c < 32 || c > 126) {
286 0 : accumulator->Add("\\x%02x", c);
287 : } else {
288 0 : accumulator->Put(static_cast<char>(c));
289 : }
290 : }
291 0 : if (truncated) {
292 0 : accumulator->Put('.');
293 0 : accumulator->Put('.');
294 0 : accumulator->Put('.');
295 : }
296 0 : if (show_details) accumulator->Put('>');
297 : }
298 : return;
299 : }
300 :
301 40 : void String::PrintUC16(std::ostream& os, int start, int end) { // NOLINT
302 40 : if (end < 0) end = length();
303 : StringCharacterStream stream(*this, start);
304 10312 : for (int i = start; i < end && stream.HasMore(); i++) {
305 10272 : os << AsUC16(stream.GetNext());
306 : }
307 40 : }
308 :
309 : // static
310 36 : Handle<String> String::Trim(Isolate* isolate, Handle<String> string,
311 : TrimMode mode) {
312 36 : string = String::Flatten(isolate, string);
313 : int const length = string->length();
314 :
315 : // Perform left trimming if requested.
316 : int left = 0;
317 36 : if (mode == kTrim || mode == kTrimStart) {
318 2376 : while (left < length && IsWhiteSpaceOrLineTerminator(string->Get(left))) {
319 567 : left++;
320 : }
321 : }
322 :
323 : // Perform right trimming if requested.
324 : int right = length;
325 36 : if (mode == kTrim || mode == kTrimEnd) {
326 72 : while (right > left &&
327 72 : IsWhiteSpaceOrLineTerminator(string->Get(right - 1))) {
328 0 : right--;
329 : }
330 : }
331 :
332 36 : return isolate->factory()->NewSubString(string, left, right);
333 : }
334 :
335 3695561 : bool String::LooksValid() {
336 : // TODO(leszeks): Maybe remove this check entirely, Heap::Contains uses
337 : // basically the same logic as the way we access the heap in the first place.
338 : MemoryChunk* chunk = MemoryChunk::FromHeapObject(*this);
339 : // RO_SPACE objects should always be valid.
340 3695561 : if (chunk->owner()->identity() == RO_SPACE) return true;
341 1911588 : if (chunk->heap() == nullptr) return false;
342 1911588 : return chunk->heap()->Contains(*this);
343 : }
344 :
345 : namespace {
346 :
347 : bool AreDigits(const uint8_t* s, int from, int to) {
348 96861 : for (int i = from; i < to; i++) {
349 60409 : if (s[i] < '0' || s[i] > '9') return false;
350 : }
351 :
352 : return true;
353 : }
354 :
355 : int ParseDecimalInteger(const uint8_t* s, int from, int to) {
356 : DCHECK_LT(to - from, 10); // Overflow is not possible.
357 : DCHECK(from < to);
358 1539 : int d = s[from] - '0';
359 :
360 1888 : for (int i = from + 1; i < to; i++) {
361 349 : d = 10 * d + (s[i] - '0');
362 : }
363 :
364 : return d;
365 : }
366 :
367 : } // namespace
368 :
369 : // static
370 3785147 : Handle<Object> String::ToNumber(Isolate* isolate, Handle<String> subject) {
371 : // Flatten {subject} string first.
372 3785147 : subject = String::Flatten(isolate, subject);
373 :
374 : // Fast array index case.
375 : uint32_t index;
376 3785147 : if (subject->AsArrayIndex(&index)) {
377 2170 : return isolate->factory()->NewNumberFromUint(index);
378 : }
379 :
380 : // Fast case: short integer or some sorts of junk values.
381 3782977 : if (subject->IsSeqOneByteString()) {
382 : int len = subject->length();
383 2601250 : if (len == 0) return handle(Smi::kZero, isolate);
384 :
385 : DisallowHeapAllocation no_gc;
386 : uint8_t const* data =
387 : Handle<SeqOneByteString>::cast(subject)->GetChars(no_gc);
388 2580893 : bool minus = (data[0] == '-');
389 2580893 : int start_pos = (minus ? 1 : 0);
390 :
391 2580893 : if (start_pos == len) {
392 0 : return isolate->factory()->nan_value();
393 2580893 : } else if (data[start_pos] > '9') {
394 : // Fast check for a junk value. A valid string may start from a
395 : // whitespace, a sign ('+' or '-'), the decimal point, a decimal digit
396 : // or the 'I' character ('Infinity'). All of that have codes not greater
397 : // than '9' except 'I' and .
398 2430916 : if (data[start_pos] != 'I' && data[start_pos] != 0xA0) {
399 2423993 : return isolate->factory()->nan_value();
400 : }
401 177012 : } else if (len - start_pos < 10 && AreDigits(data, start_pos, len)) {
402 : // The maximal/minimal smi has 10 digits. If the string has less digits
403 : // we know it will fit into the smi-data type.
404 : int d = ParseDecimalInteger(data, start_pos, len);
405 1539 : if (minus) {
406 1745 : if (d == 0) return isolate->factory()->minus_zero_value();
407 1225 : d = -d;
408 108 : } else if (!subject->HasHashCode() && len <= String::kMaxArrayIndexSize &&
409 0 : (len == 1 || data[0] != '0')) {
410 : // String hash is not calculated yet but all the data are present.
411 : // Update the hash field to speed up sequential convertions.
412 0 : uint32_t hash = StringHasher::MakeArrayIndexHash(d, len);
413 : #ifdef DEBUG
414 : subject->Hash(); // Force hash calculation.
415 : DCHECK_EQ(static_cast<int>(subject->hash_field()),
416 : static_cast<int>(hash));
417 : #endif
418 : subject->set_hash_field(hash);
419 : }
420 1279 : return handle(Smi::FromInt(d), isolate);
421 : }
422 : }
423 :
424 : // Slower case.
425 : int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY;
426 1337088 : return isolate->factory()->NewNumber(StringToDouble(isolate, subject, flags));
427 : }
428 :
429 151613590 : String::FlatContent String::GetFlatContent(
430 : const DisallowHeapAllocation& no_gc) {
431 : USE(no_gc);
432 : int length = this->length();
433 : StringShape shape(*this);
434 151613590 : String string = *this;
435 : int offset = 0;
436 151613590 : if (shape.representation_tag() == kConsStringTag) {
437 : ConsString cons = ConsString::cast(string);
438 0 : if (cons->second()->length() != 0) {
439 0 : return FlatContent();
440 : }
441 : string = cons->first();
442 : shape = StringShape(string);
443 151613590 : } else if (shape.representation_tag() == kSlicedStringTag) {
444 : SlicedString slice = SlicedString::cast(string);
445 : offset = slice->offset();
446 : string = slice->parent();
447 : shape = StringShape(string);
448 : DCHECK(shape.representation_tag() != kConsStringTag &&
449 : shape.representation_tag() != kSlicedStringTag);
450 : }
451 151613590 : if (shape.representation_tag() == kThinStringTag) {
452 : ThinString thin = ThinString::cast(string);
453 : string = thin->actual();
454 : shape = StringShape(string);
455 : DCHECK(!shape.IsCons());
456 : DCHECK(!shape.IsSliced());
457 : }
458 151613590 : if (shape.encoding_tag() == kOneByteStringTag) {
459 : const uint8_t* start;
460 143676564 : if (shape.representation_tag() == kSeqStringTag) {
461 : start = SeqOneByteString::cast(string)->GetChars(no_gc);
462 : } else {
463 : start = ExternalOneByteString::cast(string)->GetChars();
464 : }
465 143676592 : return FlatContent(start + offset, length);
466 : } else {
467 : DCHECK_EQ(shape.encoding_tag(), kTwoByteStringTag);
468 : const uc16* start;
469 7937026 : if (shape.representation_tag() == kSeqStringTag) {
470 : start = SeqTwoByteString::cast(string)->GetChars(no_gc);
471 : } else {
472 : start = ExternalTwoByteString::cast(string)->GetChars();
473 : }
474 7937026 : return FlatContent(start + offset, length);
475 : }
476 : }
477 :
478 5227241 : std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
479 : RobustnessFlag robust_flag,
480 : int offset, int length,
481 : int* length_return) {
482 5227241 : if (robust_flag == ROBUST_STRING_TRAVERSAL && !LooksValid()) {
483 : return std::unique_ptr<char[]>();
484 : }
485 : // Negative length means the to the end of the string.
486 5227241 : if (length < 0) length = kMaxInt - offset;
487 :
488 : // Compute the size of the UTF-8 string. Start at the specified offset.
489 : StringCharacterStream stream(*this, offset);
490 : int character_position = offset;
491 : int utf8_bytes = 0;
492 : int last = unibrow::Utf16::kNoPreviousCharacter;
493 63273303 : while (stream.HasMore() && character_position++ < offset + length) {
494 29023034 : uint16_t character = stream.GetNext();
495 58046060 : utf8_bytes += unibrow::Utf8::Length(character, last);
496 29023030 : last = character;
497 : }
498 :
499 5227244 : if (length_return) {
500 3620273 : *length_return = utf8_bytes;
501 : }
502 :
503 5227244 : char* result = NewArray<char>(utf8_bytes + 1);
504 :
505 : // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset.
506 5227246 : stream.Reset(*this, offset);
507 : character_position = offset;
508 : int utf8_byte_position = 0;
509 : last = unibrow::Utf16::kNoPreviousCharacter;
510 63273311 : while (stream.HasMore() && character_position++ < offset + length) {
511 29023036 : uint16_t character = stream.GetNext();
512 29023027 : if (allow_nulls == DISALLOW_NULLS && character == 0) {
513 : character = ' ';
514 : }
515 : utf8_byte_position +=
516 29023027 : unibrow::Utf8::Encode(result + utf8_byte_position, character, last);
517 29023033 : last = character;
518 : }
519 5227245 : result[utf8_byte_position] = 0;
520 : return std::unique_ptr<char[]>(result);
521 : }
522 :
523 1607583 : std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
524 : RobustnessFlag robust_flag,
525 : int* length_return) {
526 1607583 : return ToCString(allow_nulls, robust_flag, 0, -1, length_return);
527 : }
528 :
529 : template <typename sinkchar>
530 147473616 : void String::WriteToFlat(String src, sinkchar* sink, int f, int t) {
531 : DisallowHeapAllocation no_gc;
532 147473616 : String source = src;
533 : int from = f;
534 : int to = t;
535 : while (true) {
536 : DCHECK(0 <= from && from <= to && to <= source->length());
537 218413891 : switch (StringShape(source).full_representation_tag()) {
538 : case kOneByteStringTag | kExternalStringTag: {
539 853986 : CopyChars(sink, ExternalOneByteString::cast(source)->GetChars() + from,
540 : to - from);
541 : return;
542 : }
543 : case kTwoByteStringTag | kExternalStringTag: {
544 : const uc16* data = ExternalTwoByteString::cast(source)->GetChars();
545 280345 : CopyChars(sink, data + from, to - from);
546 : return;
547 : }
548 : case kOneByteStringTag | kSeqStringTag: {
549 188208904 : CopyChars(sink, SeqOneByteString::cast(source)->GetChars(no_gc) + from,
550 : to - from);
551 : return;
552 : }
553 : case kTwoByteStringTag | kSeqStringTag: {
554 27945850 : CopyChars(sink, SeqTwoByteString::cast(source)->GetChars(no_gc) + from,
555 : to - from);
556 : return;
557 : }
558 : case kOneByteStringTag | kConsStringTag:
559 : case kTwoByteStringTag | kConsStringTag: {
560 : ConsString cons_string = ConsString::cast(source);
561 : String first = cons_string->first();
562 : int boundary = first->length();
563 107965881 : if (to - boundary >= boundary - from) {
564 : // Right hand side is longer. Recurse over left.
565 45406946 : if (from < boundary) {
566 45406946 : WriteToFlat(first, sink, from, boundary);
567 90813893 : if (from == 0 && cons_string->second() == first) {
568 37040066 : CopyChars(sink + boundary, sink, boundary);
569 : return;
570 : }
571 8366881 : sink += boundary - from;
572 : from = 0;
573 : } else {
574 0 : from -= boundary;
575 : }
576 : to -= boundary;
577 : source = cons_string->second();
578 : } else {
579 : // Left hand side is longer. Recurse over right.
580 62558935 : if (to > boundary) {
581 : String second = cons_string->second();
582 : // When repeatedly appending to a string, we get a cons string that
583 : // is unbalanced to the left, a list, essentially. We inline the
584 : // common case of sequential one-byte right child.
585 62184032 : if (to - boundary == 1) {
586 52929586 : sink[boundary - from] = static_cast<sinkchar>(second->Get(0));
587 35719239 : } else if (second->IsSeqOneByteString()) {
588 48466192 : CopyChars(sink + boundary - from,
589 : SeqOneByteString::cast(second)->GetChars(no_gc),
590 : to - boundary);
591 : } else {
592 11486143 : WriteToFlat(second, sink + boundary - from, 0, to - boundary);
593 : }
594 : to = boundary;
595 : }
596 : source = first;
597 : }
598 : break;
599 : }
600 : case kOneByteStringTag | kSlicedStringTag:
601 : case kTwoByteStringTag | kSlicedStringTag: {
602 : SlicedString slice = SlicedString::cast(source);
603 1648839 : unsigned offset = slice->offset();
604 3297678 : WriteToFlat(slice->parent(), sink, from + offset, to + offset);
605 : return;
606 : }
607 : case kOneByteStringTag | kThinStringTag:
608 : case kTwoByteStringTag | kThinStringTag:
609 : source = ThinString::cast(source)->actual();
610 14459 : break;
611 : }
612 : }
613 : }
614 :
615 : template <typename SourceChar>
616 49371 : static void CalculateLineEndsImpl(Isolate* isolate, std::vector<int>* line_ends,
617 : Vector<const SourceChar> src,
618 : bool include_ending_line) {
619 49371 : const int src_len = src.length();
620 170826441 : for (int i = 0; i < src_len - 1; i++) {
621 341554140 : SourceChar current = src[i];
622 341554140 : SourceChar next = src[i + 1];
623 170777070 : if (IsLineTerminatorSequence(current, next)) line_ends->push_back(i);
624 : }
625 :
626 147545 : if (src_len > 0 && IsLineTerminatorSequence(src[src_len - 1], 0)) {
627 35852 : line_ends->push_back(src_len - 1);
628 : }
629 49371 : if (include_ending_line) {
630 : // Include one character beyond the end of script. The rewriter uses that
631 : // position for the implicit return statement.
632 46939 : line_ends->push_back(src_len);
633 : }
634 49371 : }
635 :
636 49371 : Handle<FixedArray> String::CalculateLineEnds(Isolate* isolate,
637 : Handle<String> src,
638 : bool include_ending_line) {
639 49371 : src = Flatten(isolate, src);
640 : // Rough estimate of line count based on a roughly estimated average
641 : // length of (unpacked) code.
642 49371 : int line_count_estimate = src->length() >> 4;
643 : std::vector<int> line_ends;
644 49371 : line_ends.reserve(line_count_estimate);
645 : {
646 : DisallowHeapAllocation no_allocation; // ensure vectors stay valid.
647 : // Dispatch on type of strings.
648 49371 : String::FlatContent content = src->GetFlatContent(no_allocation);
649 : DCHECK(content.IsFlat());
650 49371 : if (content.IsOneByte()) {
651 49195 : CalculateLineEndsImpl(isolate, &line_ends, content.ToOneByteVector(),
652 49195 : include_ending_line);
653 : } else {
654 176 : CalculateLineEndsImpl(isolate, &line_ends, content.ToUC16Vector(),
655 176 : include_ending_line);
656 : }
657 : }
658 49371 : int line_count = static_cast<int>(line_ends.size());
659 49371 : Handle<FixedArray> array = isolate->factory()->NewFixedArray(line_count);
660 19736683 : for (int i = 0; i < line_count; i++) {
661 19687312 : array->set(i, Smi::FromInt(line_ends[i]));
662 : }
663 98742 : return array;
664 : }
665 :
666 17037747 : bool String::SlowEquals(String other) {
667 : DisallowHeapAllocation no_gc;
668 : // Fast check: negative check with lengths.
669 : int len = length();
670 17037747 : if (len != other->length()) return false;
671 9592501 : if (len == 0) return true;
672 :
673 : // Fast check: if at least one ThinString is involved, dereference it/them
674 : // and restart.
675 19123564 : if (this->IsThinString() || other->IsThinString()) {
676 61444 : if (other->IsThinString()) other = ThinString::cast(other)->actual();
677 61442 : if (this->IsThinString()) {
678 61440 : return ThinString::cast(*this)->actual()->Equals(other);
679 : } else {
680 2 : return this->Equals(other);
681 : }
682 : }
683 :
684 : // Fast check: if hash code is computed for both strings
685 : // a fast negative check can be performed.
686 19061686 : if (HasHashCode() && other->HasHashCode()) {
687 : #ifdef ENABLE_SLOW_DCHECKS
688 : if (FLAG_enable_slow_asserts) {
689 : if (Hash() != other->Hash()) {
690 : bool found_difference = false;
691 : for (int i = 0; i < len; i++) {
692 : if (Get(i) != other->Get(i)) {
693 : found_difference = true;
694 : break;
695 : }
696 : }
697 : DCHECK(found_difference);
698 : }
699 : }
700 : #endif
701 9512738 : if (Hash() != other->Hash()) return false;
702 : }
703 :
704 : // We know the strings are both non-empty. Compare the first chars
705 : // before we try to flatten the strings.
706 7354908 : if (this->Get(0) != other->Get(0)) return false;
707 :
708 14634971 : if (IsSeqOneByteString() && other->IsSeqOneByteString()) {
709 : const uint8_t* str1 = SeqOneByteString::cast(*this)->GetChars(no_gc);
710 : const uint8_t* str2 = SeqOneByteString::cast(other)->GetChars(no_gc);
711 7286484 : return CompareRawStringContents(str1, str2, len);
712 : }
713 :
714 : StringComparator comparator;
715 61947 : return comparator.Equals(*this, other);
716 : }
717 :
718 743249 : bool String::SlowEquals(Isolate* isolate, Handle<String> one,
719 : Handle<String> two) {
720 : // Fast check: negative check with lengths.
721 : int one_length = one->length();
722 743249 : if (one_length != two->length()) return false;
723 734722 : if (one_length == 0) return true;
724 :
725 : // Fast check: if at least one ThinString is involved, dereference it/them
726 : // and restart.
727 1469444 : if (one->IsThinString() || two->IsThinString()) {
728 23 : if (one->IsThinString())
729 : one = handle(ThinString::cast(*one)->actual(), isolate);
730 23 : if (two->IsThinString())
731 : two = handle(ThinString::cast(*two)->actual(), isolate);
732 23 : return String::Equals(isolate, one, two);
733 : }
734 :
735 : // Fast check: if hash code is computed for both strings
736 : // a fast negative check can be performed.
737 745879 : if (one->HasHashCode() && two->HasHashCode()) {
738 : #ifdef ENABLE_SLOW_DCHECKS
739 : if (FLAG_enable_slow_asserts) {
740 : if (one->Hash() != two->Hash()) {
741 : bool found_difference = false;
742 : for (int i = 0; i < one_length; i++) {
743 : if (one->Get(i) != two->Get(i)) {
744 : found_difference = true;
745 : break;
746 : }
747 : }
748 : DCHECK(found_difference);
749 : }
750 : }
751 : #endif
752 12944 : if (one->Hash() != two->Hash()) return false;
753 : }
754 :
755 : // We know the strings are both non-empty. Compare the first chars
756 : // before we try to flatten the strings.
757 734689 : if (one->Get(0) != two->Get(0)) return false;
758 :
759 734542 : one = String::Flatten(isolate, one);
760 734542 : two = String::Flatten(isolate, two);
761 :
762 : DisallowHeapAllocation no_gc;
763 734542 : String::FlatContent flat1 = one->GetFlatContent(no_gc);
764 734542 : String::FlatContent flat2 = two->GetFlatContent(no_gc);
765 :
766 734542 : if (flat1.IsOneByte() && flat2.IsOneByte()) {
767 : return CompareRawStringContents(flat1.ToOneByteVector().start(),
768 : flat2.ToOneByteVector().start(),
769 : one_length);
770 : } else {
771 15344182 : for (int i = 0; i < one_length; i++) {
772 7643076 : if (flat1.Get(i) != flat2.Get(i)) return false;
773 : }
774 : return true;
775 : }
776 : }
777 :
778 : // static
779 4813958 : ComparisonResult String::Compare(Isolate* isolate, Handle<String> x,
780 : Handle<String> y) {
781 : // A few fast case tests before we flatten.
782 4813958 : if (x.is_identical_to(y)) {
783 : return ComparisonResult::kEqual;
784 4813958 : } else if (y->length() == 0) {
785 : return x->length() == 0 ? ComparisonResult::kEqual
786 0 : : ComparisonResult::kGreaterThan;
787 4813958 : } else if (x->length() == 0) {
788 : return ComparisonResult::kLessThan;
789 : }
790 :
791 9627916 : int const d = x->Get(0) - y->Get(0);
792 4813958 : if (d < 0) {
793 : return ComparisonResult::kLessThan;
794 3933260 : } else if (d > 0) {
795 : return ComparisonResult::kGreaterThan;
796 : }
797 :
798 : // Slow case.
799 392635 : x = String::Flatten(isolate, x);
800 392635 : y = String::Flatten(isolate, y);
801 :
802 : DisallowHeapAllocation no_gc;
803 : ComparisonResult result = ComparisonResult::kEqual;
804 : int prefix_length = x->length();
805 392635 : if (y->length() < prefix_length) {
806 : prefix_length = y->length();
807 : result = ComparisonResult::kGreaterThan;
808 341955 : } else if (y->length() > prefix_length) {
809 : result = ComparisonResult::kLessThan;
810 : }
811 : int r;
812 392635 : String::FlatContent x_content = x->GetFlatContent(no_gc);
813 392635 : String::FlatContent y_content = y->GetFlatContent(no_gc);
814 392635 : if (x_content.IsOneByte()) {
815 : Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
816 388036 : if (y_content.IsOneByte()) {
817 : Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
818 388018 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
819 : } else {
820 : Vector<const uc16> y_chars = y_content.ToUC16Vector();
821 18 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
822 : }
823 : } else {
824 : Vector<const uc16> x_chars = x_content.ToUC16Vector();
825 4599 : if (y_content.IsOneByte()) {
826 : Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
827 18 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
828 : } else {
829 : Vector<const uc16> y_chars = y_content.ToUC16Vector();
830 4581 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
831 : }
832 : }
833 392635 : if (r < 0) {
834 : result = ComparisonResult::kLessThan;
835 155903 : } else if (r > 0) {
836 : result = ComparisonResult::kGreaterThan;
837 : }
838 : return result;
839 : }
840 :
841 816 : Object String::IndexOf(Isolate* isolate, Handle<Object> receiver,
842 : Handle<Object> search, Handle<Object> position) {
843 816 : if (receiver->IsNullOrUndefined(isolate)) {
844 540 : THROW_NEW_ERROR_RETURN_FAILURE(
845 : isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
846 : isolate->factory()->NewStringFromAsciiChecked(
847 : "String.prototype.indexOf")));
848 : }
849 : Handle<String> receiver_string;
850 1272 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
851 : Object::ToString(isolate, receiver));
852 :
853 : Handle<String> search_string;
854 1272 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
855 : Object::ToString(isolate, search));
856 :
857 645 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
858 : Object::ToInteger(isolate, position));
859 :
860 627 : uint32_t index = receiver_string->ToValidIndex(*position);
861 : return Smi::FromInt(
862 1254 : String::IndexOf(isolate, receiver_string, search_string, index));
863 : }
864 :
865 : namespace {
866 :
867 : template <typename T>
868 1055698 : int SearchString(Isolate* isolate, String::FlatContent receiver_content,
869 : Vector<T> pat_vector, int start_index) {
870 1055698 : if (receiver_content.IsOneByte()) {
871 : return SearchString(isolate, receiver_content.ToOneByteVector(), pat_vector,
872 1051828 : start_index);
873 : }
874 : return SearchString(isolate, receiver_content.ToUC16Vector(), pat_vector,
875 3870 : start_index);
876 : }
877 :
878 : } // namespace
879 :
880 1058846 : int String::IndexOf(Isolate* isolate, Handle<String> receiver,
881 : Handle<String> search, int start_index) {
882 : DCHECK_LE(0, start_index);
883 : DCHECK(start_index <= receiver->length());
884 :
885 1058846 : uint32_t search_length = search->length();
886 1058846 : if (search_length == 0) return start_index;
887 :
888 1058801 : uint32_t receiver_length = receiver->length();
889 1058801 : if (start_index + search_length > receiver_length) return -1;
890 :
891 1055698 : receiver = String::Flatten(isolate, receiver);
892 1055698 : search = String::Flatten(isolate, search);
893 :
894 : DisallowHeapAllocation no_gc; // ensure vectors stay valid
895 : // Extract flattened substrings of cons strings before getting encoding.
896 1055698 : String::FlatContent receiver_content = receiver->GetFlatContent(no_gc);
897 1055698 : String::FlatContent search_content = search->GetFlatContent(no_gc);
898 :
899 : // dispatch on type of strings
900 1055698 : if (search_content.IsOneByte()) {
901 1053943 : Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
902 : return SearchString<const uint8_t>(isolate, receiver_content, pat_vector,
903 1053943 : start_index);
904 : }
905 1755 : Vector<const uc16> pat_vector = search_content.ToUC16Vector();
906 : return SearchString<const uc16>(isolate, receiver_content, pat_vector,
907 1755 : start_index);
908 : }
909 :
910 3912 : MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
911 : Handle<String> replacement,
912 : int start_index) {
913 : DCHECK_GE(start_index, 0);
914 :
915 : Factory* factory = isolate->factory();
916 :
917 : const int replacement_length = replacement->length();
918 3912 : const int captures_length = match->CaptureCount();
919 :
920 3912 : replacement = String::Flatten(isolate, replacement);
921 :
922 : Handle<String> dollar_string =
923 3912 : factory->LookupSingleCharacterStringFromCode('$');
924 : int next_dollar_ix =
925 3912 : String::IndexOf(isolate, replacement, dollar_string, start_index);
926 3912 : if (next_dollar_ix < 0) {
927 153 : return replacement;
928 : }
929 :
930 3759 : IncrementalStringBuilder builder(isolate);
931 :
932 3759 : if (next_dollar_ix > 0) {
933 297 : builder.AppendString(factory->NewSubString(replacement, 0, next_dollar_ix));
934 : }
935 :
936 : while (true) {
937 7980 : const int peek_ix = next_dollar_ix + 1;
938 7980 : if (peek_ix >= replacement_length) {
939 : builder.AppendCharacter('$');
940 36 : return builder.Finish();
941 : }
942 :
943 : int continue_from_ix = -1;
944 : const uint16_t peek = replacement->Get(peek_ix);
945 7944 : switch (peek) {
946 : case '$': // $$
947 : builder.AppendCharacter('$');
948 54 : continue_from_ix = peek_ix + 1;
949 54 : break;
950 : case '&': // $& - match
951 54 : builder.AppendString(match->GetMatch());
952 54 : continue_from_ix = peek_ix + 1;
953 54 : break;
954 : case '`': // $` - prefix
955 36 : builder.AppendString(match->GetPrefix());
956 36 : continue_from_ix = peek_ix + 1;
957 36 : break;
958 : case '\'': // $' - suffix
959 36 : builder.AppendString(match->GetSuffix());
960 36 : continue_from_ix = peek_ix + 1;
961 36 : break;
962 : case '0':
963 : case '1':
964 : case '2':
965 : case '3':
966 : case '4':
967 : case '5':
968 : case '6':
969 : case '7':
970 : case '8':
971 : case '9': {
972 : // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
973 7278 : int scaled_index = (peek - '0');
974 : int advance = 1;
975 :
976 7278 : if (peek_ix + 1 < replacement_length) {
977 : const uint16_t next_peek = replacement->Get(peek_ix + 1);
978 6054 : if (next_peek >= '0' && next_peek <= '9') {
979 1341 : const int new_scaled_index = scaled_index * 10 + (next_peek - '0');
980 1341 : if (new_scaled_index < captures_length) {
981 : scaled_index = new_scaled_index;
982 : advance = 2;
983 : }
984 : }
985 : }
986 :
987 7278 : if (scaled_index == 0 || scaled_index >= captures_length) {
988 : builder.AppendCharacter('$');
989 : continue_from_ix = peek_ix;
990 7278 : break;
991 : }
992 :
993 : bool capture_exists;
994 : Handle<String> capture;
995 14376 : ASSIGN_RETURN_ON_EXCEPTION(
996 : isolate, capture, match->GetCapture(scaled_index, &capture_exists),
997 : String);
998 7188 : if (capture_exists) builder.AppendString(capture);
999 7188 : continue_from_ix = peek_ix + advance;
1000 7188 : break;
1001 : }
1002 : case '<': { // $<name> - named capture
1003 : using CaptureState = String::Match::CaptureState;
1004 :
1005 459 : if (!match->HasNamedCaptures()) {
1006 : builder.AppendCharacter('$');
1007 : continue_from_ix = peek_ix;
1008 459 : break;
1009 : }
1010 :
1011 : Handle<String> bracket_string =
1012 378 : factory->LookupSingleCharacterStringFromCode('>');
1013 : const int closing_bracket_ix =
1014 378 : String::IndexOf(isolate, replacement, bracket_string, peek_ix + 1);
1015 :
1016 378 : if (closing_bracket_ix == -1) {
1017 : // No closing bracket was found, treat '$<' as a string literal.
1018 : builder.AppendCharacter('$');
1019 : continue_from_ix = peek_ix;
1020 : break;
1021 : }
1022 :
1023 : Handle<String> capture_name =
1024 306 : factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
1025 : Handle<String> capture;
1026 : CaptureState capture_state;
1027 612 : ASSIGN_RETURN_ON_EXCEPTION(
1028 : isolate, capture,
1029 : match->GetNamedCapture(capture_name, &capture_state), String);
1030 :
1031 306 : switch (capture_state) {
1032 : case CaptureState::INVALID:
1033 : case CaptureState::UNMATCHED:
1034 : break;
1035 : case CaptureState::MATCHED:
1036 126 : builder.AppendString(capture);
1037 126 : break;
1038 : }
1039 :
1040 306 : continue_from_ix = closing_bracket_ix + 1;
1041 306 : break;
1042 : }
1043 : default:
1044 : builder.AppendCharacter('$');
1045 : continue_from_ix = peek_ix;
1046 : break;
1047 : }
1048 :
1049 : // Go the the next $ in the replacement.
1050 : // TODO(jgruber): Single-char lookups could be much more efficient.
1051 : DCHECK_NE(continue_from_ix, -1);
1052 : next_dollar_ix =
1053 7944 : String::IndexOf(isolate, replacement, dollar_string, continue_from_ix);
1054 :
1055 : // Return if there are no more $ characters in the replacement. If we
1056 : // haven't reached the end, we need to append the suffix.
1057 7944 : if (next_dollar_ix < 0) {
1058 3723 : if (continue_from_ix < replacement_length) {
1059 1311 : builder.AppendString(factory->NewSubString(
1060 1311 : replacement, continue_from_ix, replacement_length));
1061 : }
1062 3723 : return builder.Finish();
1063 : }
1064 :
1065 : // Append substring between the previous and the next $ character.
1066 4221 : if (next_dollar_ix > continue_from_ix) {
1067 72 : builder.AppendString(
1068 72 : factory->NewSubString(replacement, continue_from_ix, next_dollar_ix));
1069 : }
1070 : }
1071 :
1072 : UNREACHABLE();
1073 : }
1074 :
1075 : namespace { // for String.Prototype.lastIndexOf
1076 :
1077 : template <typename schar, typename pchar>
1078 6 : int StringMatchBackwards(Vector<const schar> subject,
1079 : Vector<const pchar> pattern, int idx) {
1080 : int pattern_length = pattern.length();
1081 : DCHECK_GE(pattern_length, 1);
1082 : DCHECK(idx + pattern_length <= subject.length());
1083 :
1084 : if (sizeof(schar) == 1 && sizeof(pchar) > 1) {
1085 0 : for (int i = 0; i < pattern_length; i++) {
1086 0 : uc16 c = pattern[i];
1087 0 : if (c > String::kMaxOneByteCharCode) {
1088 : return -1;
1089 : }
1090 : }
1091 : }
1092 :
1093 1249 : pchar pattern_first_char = pattern[0];
1094 10497 : for (int i = idx; i >= 0; i--) {
1095 11422 : if (subject[i] != pattern_first_char) continue;
1096 : int j = 1;
1097 1778 : while (j < pattern_length) {
1098 2073 : if (pattern[j] != subject[i + j]) {
1099 : break;
1100 : }
1101 673 : j++;
1102 : }
1103 1105 : if (j == pattern_length) {
1104 : return i;
1105 : }
1106 : }
1107 : return -1;
1108 : }
1109 :
1110 : } // namespace
1111 :
1112 1663 : Object String::LastIndexOf(Isolate* isolate, Handle<Object> receiver,
1113 : Handle<Object> search, Handle<Object> position) {
1114 1663 : if (receiver->IsNullOrUndefined(isolate)) {
1115 486 : THROW_NEW_ERROR_RETURN_FAILURE(
1116 : isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
1117 : isolate->factory()->NewStringFromAsciiChecked(
1118 : "String.prototype.lastIndexOf")));
1119 : }
1120 : Handle<String> receiver_string;
1121 3002 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
1122 : Object::ToString(isolate, receiver));
1123 :
1124 : Handle<String> search_string;
1125 3002 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1126 : Object::ToString(isolate, search));
1127 :
1128 3002 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1129 : Object::ToNumber(isolate, position));
1130 :
1131 : uint32_t start_index;
1132 :
1133 1501 : if (position->IsNaN()) {
1134 1024 : start_index = receiver_string->length();
1135 : } else {
1136 477 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1137 : Object::ToInteger(isolate, position));
1138 477 : start_index = receiver_string->ToValidIndex(*position);
1139 : }
1140 :
1141 1501 : uint32_t pattern_length = search_string->length();
1142 1501 : uint32_t receiver_length = receiver_string->length();
1143 :
1144 1501 : if (start_index + pattern_length > receiver_length) {
1145 1096 : start_index = receiver_length - pattern_length;
1146 : }
1147 :
1148 1501 : if (pattern_length == 0) {
1149 504 : return Smi::FromInt(start_index);
1150 : }
1151 :
1152 1249 : receiver_string = String::Flatten(isolate, receiver_string);
1153 1249 : search_string = String::Flatten(isolate, search_string);
1154 :
1155 : int last_index = -1;
1156 : DisallowHeapAllocation no_gc; // ensure vectors stay valid
1157 :
1158 1249 : String::FlatContent receiver_content = receiver_string->GetFlatContent(no_gc);
1159 1249 : String::FlatContent search_content = search_string->GetFlatContent(no_gc);
1160 :
1161 1249 : if (search_content.IsOneByte()) {
1162 : Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1163 1249 : if (receiver_content.IsOneByte()) {
1164 1243 : last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1165 : pat_vector, start_index);
1166 : } else {
1167 6 : last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1168 6 : pat_vector, start_index);
1169 : }
1170 : } else {
1171 0 : Vector<const uc16> pat_vector = search_content.ToUC16Vector();
1172 0 : if (receiver_content.IsOneByte()) {
1173 0 : last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1174 0 : pat_vector, start_index);
1175 : } else {
1176 0 : last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1177 0 : pat_vector, start_index);
1178 : }
1179 : }
1180 1249 : return Smi::FromInt(last_index);
1181 : }
1182 :
1183 18957525 : bool String::IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match) {
1184 : int slen = length();
1185 : // Can't check exact length equality, but we can check bounds.
1186 : int str_len = str.length();
1187 18957525 : if (!allow_prefix_match &&
1188 16889677 : (str_len < slen ||
1189 16889677 : str_len > slen * static_cast<int>(unibrow::Utf8::kMaxEncodedSize))) {
1190 : return false;
1191 : }
1192 :
1193 : int i = 0;
1194 : unibrow::Utf8Iterator it = unibrow::Utf8Iterator(str);
1195 190364745 : while (i < slen && !it.Done()) {
1196 181635681 : if (Get(i++) != *it) return false;
1197 86759570 : ++it;
1198 : }
1199 :
1200 12787327 : return (allow_prefix_match || i == slen) && it.Done();
1201 : }
1202 :
1203 : template <>
1204 135 : bool String::IsEqualTo(Vector<const uint8_t> str) {
1205 135 : return IsOneByteEqualTo(str);
1206 : }
1207 :
1208 : template <>
1209 0 : bool String::IsEqualTo(Vector<const uc16> str) {
1210 0 : return IsTwoByteEqualTo(str);
1211 : }
1212 :
1213 106271432 : bool String::IsOneByteEqualTo(Vector<const uint8_t> str) {
1214 : int slen = length();
1215 106271432 : if (str.length() != slen) return false;
1216 : DisallowHeapAllocation no_gc;
1217 82730038 : FlatContent content = GetFlatContent(no_gc);
1218 82730002 : if (content.IsOneByte()) {
1219 82729819 : return CompareChars(content.ToOneByteVector().start(), str.start(), slen) ==
1220 82729819 : 0;
1221 : }
1222 366 : return CompareChars(content.ToUC16Vector().start(), str.start(), slen) == 0;
1223 : }
1224 :
1225 77526 : bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
1226 : int slen = length();
1227 77526 : if (str.length() != slen) return false;
1228 : DisallowHeapAllocation no_gc;
1229 20316 : FlatContent content = GetFlatContent(no_gc);
1230 20316 : if (content.IsOneByte()) {
1231 4403 : return CompareChars(content.ToOneByteVector().start(), str.start(), slen) ==
1232 4403 : 0;
1233 : }
1234 31826 : return CompareChars(content.ToUC16Vector().start(), str.start(), slen) == 0;
1235 : }
1236 :
1237 70374831 : uint32_t String::ComputeAndSetHash() {
1238 : DisallowHeapAllocation no_gc;
1239 : // Should only be called if hash code has not yet been computed.
1240 : DCHECK(!HasHashCode());
1241 :
1242 : // Store the hash code in the object.
1243 : uint32_t field =
1244 70374831 : IteratingStringHasher::Hash(*this, HashSeed(GetReadOnlyRoots()));
1245 : set_hash_field(field);
1246 :
1247 : // Check the hash code is there.
1248 : DCHECK(HasHashCode());
1249 70374892 : uint32_t result = field >> kHashShift;
1250 : DCHECK_NE(result, 0); // Ensure that the hash value of 0 is never computed.
1251 70374892 : return result;
1252 : }
1253 :
1254 1542947 : bool String::ComputeArrayIndex(uint32_t* index) {
1255 : int length = this->length();
1256 1542947 : if (length == 0 || length > kMaxArrayIndexSize) return false;
1257 : StringCharacterStream stream(*this);
1258 961446 : return StringToArrayIndex(&stream, index);
1259 : }
1260 :
1261 6810336 : bool String::SlowAsArrayIndex(uint32_t* index) {
1262 : DisallowHeapAllocation no_gc;
1263 6810336 : if (length() <= kMaxCachedArrayIndexLength) {
1264 5267389 : Hash(); // force computation of hash code
1265 : uint32_t field = hash_field();
1266 5267391 : if ((field & kIsNotArrayIndexMask) != 0) return false;
1267 : // Isolate the array index form the full hash field.
1268 587095 : *index = ArrayIndexValueBits::decode(field);
1269 587095 : return true;
1270 : } else {
1271 1542947 : return ComputeArrayIndex(index);
1272 : }
1273 : }
1274 :
1275 41680 : void String::PrintOn(FILE* file) {
1276 : int length = this->length();
1277 6985420 : for (int i = 0; i < length; i++) {
1278 3471870 : PrintF(file, "%c", Get(i));
1279 : }
1280 41680 : }
1281 :
1282 18160178 : Handle<String> SeqString::Truncate(Handle<SeqString> string, int new_length) {
1283 18160178 : if (new_length == 0) return string->GetReadOnlyRoots().empty_string_handle();
1284 :
1285 : int new_size, old_size;
1286 : int old_length = string->length();
1287 13739641 : if (old_length <= new_length) return string;
1288 :
1289 12673982 : if (string->IsSeqOneByteString()) {
1290 : old_size = SeqOneByteString::SizeFor(old_length);
1291 : new_size = SeqOneByteString::SizeFor(new_length);
1292 : } else {
1293 : DCHECK(string->IsSeqTwoByteString());
1294 : old_size = SeqTwoByteString::SizeFor(old_length);
1295 : new_size = SeqTwoByteString::SizeFor(new_length);
1296 : }
1297 :
1298 12673982 : int delta = old_size - new_size;
1299 :
1300 : Address start_of_string = string->address();
1301 : DCHECK(IsAligned(start_of_string, kObjectAlignment));
1302 : DCHECK(IsAligned(start_of_string + new_size, kObjectAlignment));
1303 :
1304 : Heap* heap = Heap::FromWritableHeapObject(*string);
1305 : // Sizes are pointer size aligned, so that we can use filler objects
1306 : // that are a multiple of pointer size.
1307 : heap->CreateFillerObjectAt(start_of_string + new_size, delta,
1308 12673982 : ClearRecordedSlots::kNo);
1309 : // We are storing the new length using release store after creating a filler
1310 : // for the left-over space to avoid races with the sweeper thread.
1311 : string->synchronized_set_length(new_length);
1312 :
1313 12673979 : return string;
1314 : }
1315 :
1316 334335 : void SeqOneByteString::clear_padding() {
1317 334335 : int data_size = SeqString::kHeaderSize + length() * kOneByteSize;
1318 668670 : memset(reinterpret_cast<void*>(address() + data_size), 0,
1319 334335 : SizeFor(length()) - data_size);
1320 334335 : }
1321 :
1322 0 : void SeqTwoByteString::clear_padding() {
1323 0 : int data_size = SeqString::kHeaderSize + length() * kUC16Size;
1324 0 : memset(reinterpret_cast<void*>(address() + data_size), 0,
1325 0 : SizeFor(length()) - data_size);
1326 0 : }
1327 :
1328 274099 : uint16_t ConsString::ConsStringGet(int index) {
1329 : DCHECK(index >= 0 && index < this->length());
1330 :
1331 : // Check for a flattened cons string
1332 274099 : if (second()->length() == 0) {
1333 : String left = first();
1334 : return left->Get(index);
1335 : }
1336 :
1337 : String string = String::cast(*this);
1338 :
1339 : while (true) {
1340 1124357 : if (StringShape(string).IsCons()) {
1341 : ConsString cons_string = ConsString::cast(string);
1342 : String left = cons_string->first();
1343 927996 : if (left->length() > index) {
1344 : string = left;
1345 : } else {
1346 407450 : index -= left->length();
1347 : string = cons_string->second();
1348 : }
1349 : } else {
1350 196361 : return string->Get(index);
1351 : }
1352 : }
1353 :
1354 : UNREACHABLE();
1355 : }
1356 :
1357 4212 : uint16_t ThinString::ThinStringGet(int index) { return actual()->Get(index); }
1358 :
1359 1093796 : uint16_t SlicedString::SlicedStringGet(int index) {
1360 2187592 : return parent()->Get(offset() + index);
1361 : }
1362 :
1363 88406 : int ExternalString::ExternalPayloadSize() const {
1364 88406 : int length_multiplier = IsTwoByteRepresentation() ? i::kShortSize : kCharSize;
1365 88406 : return length() * length_multiplier;
1366 : }
1367 :
1368 1494882 : FlatStringReader::FlatStringReader(Isolate* isolate, Handle<String> str)
1369 4484646 : : Relocatable(isolate), str_(str.location()), length_(str->length()) {
1370 1494882 : PostGarbageCollection();
1371 1494882 : }
1372 :
1373 1345 : FlatStringReader::FlatStringReader(Isolate* isolate, Vector<const char> input)
1374 : : Relocatable(isolate),
1375 : str_(nullptr),
1376 : is_one_byte_(true),
1377 : length_(input.length()),
1378 4035 : start_(input.start()) {}
1379 :
1380 1495262 : void FlatStringReader::PostGarbageCollection() {
1381 1495262 : if (str_ == nullptr) return;
1382 : Handle<String> str(str_);
1383 : DCHECK(str->IsFlat());
1384 : DisallowHeapAllocation no_gc;
1385 : // This does not actually prevent the vector from being relocated later.
1386 1495262 : String::FlatContent content = str->GetFlatContent(no_gc);
1387 : DCHECK(content.IsFlat());
1388 1495262 : is_one_byte_ = content.IsOneByte();
1389 1495262 : if (is_one_byte_) {
1390 1357683 : start_ = content.ToOneByteVector().start();
1391 : } else {
1392 137579 : start_ = content.ToUC16Vector().start();
1393 : }
1394 : }
1395 :
1396 108701 : void ConsStringIterator::Initialize(ConsString cons_string, int offset) {
1397 : DCHECK(!cons_string.is_null());
1398 124703 : root_ = cons_string;
1399 124703 : consumed_ = offset;
1400 : // Force stack blown condition to trigger restart.
1401 124703 : depth_ = 1;
1402 124703 : maximum_depth_ = kStackSize + depth_;
1403 : DCHECK(StackBlown());
1404 108701 : }
1405 :
1406 211935450 : String ConsStringIterator::Continue(int* offset_out) {
1407 : DCHECK_NE(depth_, 0);
1408 : DCHECK_EQ(0, *offset_out);
1409 211935450 : bool blew_stack = StackBlown();
1410 : String string;
1411 : // Get the next leaf if there is one.
1412 211935450 : if (!blew_stack) string = NextLeaf(&blew_stack);
1413 : // Restart search from root.
1414 211935450 : if (blew_stack) {
1415 : DCHECK(string.is_null());
1416 149088 : string = Search(offset_out);
1417 : }
1418 : // Ensure future calls return null immediately.
1419 211935450 : if (string.is_null()) Reset(ConsString());
1420 211935450 : return string;
1421 : }
1422 :
1423 149088 : String ConsStringIterator::Search(int* offset_out) {
1424 149088 : ConsString cons_string = root_;
1425 : // Reset the stack, pushing the root string.
1426 149088 : depth_ = 1;
1427 149088 : maximum_depth_ = 1;
1428 149088 : frames_[0] = cons_string;
1429 149088 : const int consumed = consumed_;
1430 : int offset = 0;
1431 : while (true) {
1432 : // Loop until the string is found which contains the target offset.
1433 : String string = cons_string->first();
1434 : int length = string->length();
1435 : int32_t type;
1436 34893173 : if (consumed < offset + length) {
1437 : // Target offset is in the left branch.
1438 : // Keep going if we're still in a ConString.
1439 : type = string->map()->instance_type();
1440 33572138 : if ((type & kStringRepresentationMask) == kConsStringTag) {
1441 : cons_string = ConsString::cast(string);
1442 : PushLeft(cons_string);
1443 : continue;
1444 : }
1445 : // Tell the stack we're done descending.
1446 : AdjustMaximumDepth();
1447 : } else {
1448 : // Descend right.
1449 : // Update progress through the string.
1450 : offset += length;
1451 : // Keep going if we're still in a ConString.
1452 : string = cons_string->second();
1453 : type = string->map()->instance_type();
1454 1321035 : if ((type & kStringRepresentationMask) == kConsStringTag) {
1455 : cons_string = ConsString::cast(string);
1456 : PushRight(cons_string);
1457 : continue;
1458 : }
1459 : // Need this to be updated for the current string.
1460 : length = string->length();
1461 : // Account for the possibility of an empty right leaf.
1462 : // This happens only if we have asked for an offset outside the string.
1463 30090 : if (length == 0) {
1464 : // Reset so future operations will return null immediately.
1465 : Reset(ConsString());
1466 145 : return String();
1467 : }
1468 : // Tell the stack we're done descending.
1469 : AdjustMaximumDepth();
1470 : // Pop stack so next iteration is in correct place.
1471 : Pop();
1472 : }
1473 : DCHECK_NE(length, 0);
1474 : // Adjust return values and exit.
1475 148943 : consumed_ = offset + length;
1476 148943 : *offset_out = consumed - offset;
1477 148943 : return string;
1478 : }
1479 : UNREACHABLE();
1480 : }
1481 :
1482 211787057 : String ConsStringIterator::NextLeaf(bool* blew_stack) {
1483 : while (true) {
1484 : // Tree traversal complete.
1485 212205214 : if (depth_ == 0) {
1486 35352 : *blew_stack = false;
1487 35352 : return String();
1488 : }
1489 : // We've lost track of higher nodes.
1490 212169862 : if (StackBlown()) {
1491 695 : *blew_stack = true;
1492 695 : return String();
1493 : }
1494 : // Go right.
1495 424338334 : ConsString cons_string = frames_[OffsetForDepth(depth_ - 1)];
1496 : String string = cons_string->second();
1497 : int32_t type = string->map()->instance_type();
1498 212169167 : if ((type & kStringRepresentationMask) != kConsStringTag) {
1499 : // Pop stack so next iteration is in correct place.
1500 : Pop();
1501 : int length = string->length();
1502 : // Could be a flattened ConsString.
1503 106425521 : if (length == 0) continue;
1504 106007364 : consumed_ += length;
1505 106007364 : return string;
1506 : }
1507 : cons_string = ConsString::cast(string);
1508 : PushRight(cons_string);
1509 : // Need to traverse all the way left.
1510 : while (true) {
1511 : // Continue left.
1512 : string = cons_string->first();
1513 : type = string->map()->instance_type();
1514 211445574 : if ((type & kStringRepresentationMask) != kConsStringTag) {
1515 : AdjustMaximumDepth();
1516 : int length = string->length();
1517 105743646 : if (length == 0) break; // Skip empty left-hand sides of ConsStrings.
1518 105743646 : consumed_ += length;
1519 105743646 : return string;
1520 : }
1521 : cons_string = ConsString::cast(string);
1522 : PushLeft(cons_string);
1523 : }
1524 : }
1525 : UNREACHABLE();
1526 : }
1527 :
1528 : template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
1529 : String source, uint16_t* sink, int from, int to);
1530 :
1531 : } // namespace internal
1532 122036 : } // namespace v8
|