Line data Source code
1 : // Copyright 2019 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/objects/string.h"
6 :
7 : #include "src/char-predicates.h"
8 : #include "src/conversions.h"
9 : #include "src/handles-inl.h"
10 : #include "src/heap/heap-inl.h" // For LooksValid implementation.
11 : #include "src/objects/map.h"
12 : #include "src/objects/oddball.h"
13 : #include "src/objects/string-comparator.h"
14 : #include "src/objects/string-inl.h"
15 : #include "src/ostreams.h"
16 : #include "src/string-builder-inl.h"
17 : #include "src/string-hasher.h"
18 : #include "src/string-search.h"
19 : #include "src/string-stream.h"
20 : #include "src/unicode-inl.h"
21 :
22 : namespace v8 {
23 : namespace internal {
24 :
25 6013540 : Handle<String> String::SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
26 : AllocationType allocation) {
27 : DCHECK_GT(cons->first()->length(), 0);
28 : DCHECK_GT(cons->second()->length(), 0);
29 :
30 : DCHECK(AllowHeapAllocation::IsAllowed());
31 : int length = cons->length();
32 : allocation =
33 6013540 : ObjectInYoungGeneration(*cons) ? allocation : AllocationType::kOld;
34 : Handle<SeqString> result;
35 6013540 : if (cons->IsOneByteRepresentation()) {
36 : Handle<SeqOneByteString> flat =
37 : isolate->factory()
38 11787666 : ->NewRawOneByteString(length, allocation)
39 : .ToHandleChecked();
40 : DisallowHeapAllocation no_gc;
41 5893833 : WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
42 : result = flat;
43 : } else {
44 : Handle<SeqTwoByteString> flat =
45 : isolate->factory()
46 239416 : ->NewRawTwoByteString(length, allocation)
47 : .ToHandleChecked();
48 : DisallowHeapAllocation no_gc;
49 119708 : WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
50 : result = flat;
51 : }
52 12027080 : cons->set_first(isolate, *result);
53 6013541 : cons->set_second(isolate, ReadOnlyRoots(isolate).empty_string());
54 : DCHECK(result->IsFlat());
55 6013540 : return result;
56 : }
57 :
58 301 : bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
59 : DisallowHeapAllocation no_allocation;
60 : // Externalizing twice leaks the external resource, so it's
61 : // prohibited by the API.
62 : DCHECK(this->SupportsExternalization());
63 : DCHECK(resource->IsCacheable());
64 : #ifdef ENABLE_SLOW_DCHECKS
65 : if (FLAG_enable_slow_asserts) {
66 : // Assert that the resource and the string are equivalent.
67 : DCHECK(static_cast<size_t>(this->length()) == resource->length());
68 : ScopedVector<uc16> smart_chars(this->length());
69 : String::WriteToFlat(*this, smart_chars.start(), 0, this->length());
70 : DCHECK_EQ(0, memcmp(smart_chars.start(), resource->data(),
71 : resource->length() * sizeof(smart_chars[0])));
72 : }
73 : #endif // DEBUG
74 301 : int size = this->Size(); // Byte size of the original string.
75 : // Abort if size does not allow in-place conversion.
76 301 : if (size < ExternalString::kUncachedSize) return false;
77 : Isolate* isolate;
78 : // Read-only strings cannot be made external, since that would mutate the
79 : // string.
80 301 : if (!GetIsolateFromWritableObject(*this, &isolate)) return false;
81 : Heap* heap = isolate->heap();
82 : bool is_internalized = this->IsInternalizedString();
83 602 : bool has_pointers = StringShape(*this).IsIndirect();
84 301 : if (has_pointers) {
85 64 : heap->NotifyObjectLayoutChange(*this, size, no_allocation);
86 : }
87 : // Morph the string to an external string by replacing the map and
88 : // reinitializing the fields. This won't work if the space the existing
89 : // string occupies is too small for a regular external string. Instead, we
90 : // resort to an uncached external string instead, omitting the field caching
91 : // the address of the backing store. When we encounter uncached external
92 : // strings in generated code, we need to bailout to runtime.
93 : Map new_map;
94 : ReadOnlyRoots roots(heap);
95 301 : if (size < ExternalString::kSize) {
96 270 : if (is_internalized) {
97 20 : new_map = roots.uncached_external_internalized_string_map();
98 : } else {
99 250 : new_map = roots.uncached_external_string_map();
100 : }
101 : } else {
102 : new_map = is_internalized ? roots.external_internalized_string_map()
103 62 : : roots.external_string_map();
104 : }
105 :
106 : // Byte size of the external String object.
107 301 : int new_size = this->SizeFromMap(new_map);
108 : heap->CreateFillerObjectAt(this->address() + new_size, size - new_size,
109 602 : ClearRecordedSlots::kNo);
110 301 : if (has_pointers) {
111 64 : heap->ClearRecordedSlotRange(this->address(), this->address() + new_size);
112 : }
113 :
114 : // We are storing the new map using release store after creating a filler for
115 : // the left-over space to avoid races with the sweeper thread.
116 301 : this->synchronized_set_map(new_map);
117 :
118 301 : ExternalTwoByteString self = ExternalTwoByteString::cast(*this);
119 301 : self->SetResource(isolate, resource);
120 : heap->RegisterExternalString(*this);
121 301 : if (is_internalized) self->Hash(); // Force regeneration of the hash value.
122 : return true;
123 : }
124 :
125 310 : bool String::MakeExternal(v8::String::ExternalOneByteStringResource* resource) {
126 : DisallowHeapAllocation no_allocation;
127 : // Externalizing twice leaks the external resource, so it's
128 : // prohibited by the API.
129 : DCHECK(this->SupportsExternalization());
130 : DCHECK(resource->IsCacheable());
131 : #ifdef ENABLE_SLOW_DCHECKS
132 : if (FLAG_enable_slow_asserts) {
133 : // Assert that the resource and the string are equivalent.
134 : DCHECK(static_cast<size_t>(this->length()) == resource->length());
135 : if (this->IsTwoByteRepresentation()) {
136 : ScopedVector<uint16_t> smart_chars(this->length());
137 : String::WriteToFlat(*this, smart_chars.start(), 0, this->length());
138 : DCHECK(String::IsOneByte(smart_chars.start(), this->length()));
139 : }
140 : ScopedVector<char> smart_chars(this->length());
141 : String::WriteToFlat(*this, smart_chars.start(), 0, this->length());
142 : DCHECK_EQ(0, memcmp(smart_chars.start(), resource->data(),
143 : resource->length() * sizeof(smart_chars[0])));
144 : }
145 : #endif // DEBUG
146 310 : int size = this->Size(); // Byte size of the original string.
147 : // Abort if size does not allow in-place conversion.
148 310 : if (size < ExternalString::kUncachedSize) return false;
149 : Isolate* isolate;
150 : // Read-only strings cannot be made external, since that would mutate the
151 : // string.
152 310 : if (!GetIsolateFromWritableObject(*this, &isolate)) return false;
153 : Heap* heap = isolate->heap();
154 : bool is_internalized = this->IsInternalizedString();
155 620 : bool has_pointers = StringShape(*this).IsIndirect();
156 :
157 310 : if (has_pointers) {
158 29 : heap->NotifyObjectLayoutChange(*this, size, no_allocation);
159 : }
160 :
161 : // Morph the string to an external string by replacing the map and
162 : // reinitializing the fields. This won't work if the space the existing
163 : // string occupies is too small for a regular external string. Instead, we
164 : // resort to an uncached external string instead, omitting the field caching
165 : // the address of the backing store. When we encounter uncached external
166 : // strings in generated code, we need to bailout to runtime.
167 : Map new_map;
168 : ReadOnlyRoots roots(heap);
169 310 : if (size < ExternalString::kSize) {
170 : new_map = is_internalized
171 : ? roots.uncached_external_one_byte_internalized_string_map()
172 98 : : roots.uncached_external_one_byte_string_map();
173 : } else {
174 : new_map = is_internalized
175 : ? roots.external_one_byte_internalized_string_map()
176 522 : : roots.external_one_byte_string_map();
177 : }
178 :
179 : // Byte size of the external String object.
180 310 : int new_size = this->SizeFromMap(new_map);
181 : heap->CreateFillerObjectAt(this->address() + new_size, size - new_size,
182 620 : ClearRecordedSlots::kNo);
183 310 : if (has_pointers) {
184 29 : heap->ClearRecordedSlotRange(this->address(), this->address() + new_size);
185 : }
186 :
187 : // We are storing the new map using release store after creating a filler for
188 : // the left-over space to avoid races with the sweeper thread.
189 310 : this->synchronized_set_map(new_map);
190 :
191 310 : ExternalOneByteString self = ExternalOneByteString::cast(*this);
192 310 : self->SetResource(isolate, resource);
193 : heap->RegisterExternalString(*this);
194 310 : if (is_internalized) self->Hash(); // Force regeneration of the hash value.
195 : return true;
196 : }
197 :
198 1499 : bool String::SupportsExternalization() {
199 1499 : if (this->IsThinString()) {
200 0 : return i::ThinString::cast(*this)->actual()->SupportsExternalization();
201 : }
202 :
203 : Isolate* isolate;
204 : // RO_SPACE strings cannot be externalized.
205 1499 : if (!GetIsolateFromWritableObject(*this, &isolate)) {
206 : return false;
207 : }
208 :
209 : // Already an external string.
210 2962 : if (StringShape(*this).IsExternal()) {
211 : return false;
212 : }
213 :
214 : #ifdef V8_COMPRESS_POINTERS
215 : // Small strings may not be in-place externalizable.
216 1218 : if (this->Size() < ExternalString::kUncachedSize) return false;
217 : #else
218 : DCHECK_LE(ExternalString::kUncachedSize, this->Size());
219 : #endif
220 :
221 1118 : return !isolate->heap()->IsInGCPostProcessing();
222 : }
223 :
224 41474 : void String::StringShortPrint(StringStream* accumulator, bool show_details) {
225 41474 : const char* internalized_marker = this->IsInternalizedString() ? "#" : "";
226 :
227 : int len = length();
228 41474 : if (len > kMaxShortPrintLength) {
229 0 : accumulator->Add("<Very long string[%s%u]>", internalized_marker, len);
230 0 : return;
231 : }
232 :
233 41474 : if (!LooksValid()) {
234 0 : accumulator->Add("<Invalid String>");
235 0 : return;
236 : }
237 :
238 : StringCharacterStream stream(*this);
239 :
240 : bool truncated = false;
241 41474 : if (len > kMaxShortPrintLength) {
242 : len = kMaxShortPrintLength;
243 : truncated = true;
244 : }
245 : bool one_byte = true;
246 705740 : for (int i = 0; i < len; i++) {
247 332133 : uint16_t c = stream.GetNext();
248 :
249 332133 : if (c < 32 || c >= 127) {
250 : one_byte = false;
251 : }
252 : }
253 41474 : stream.Reset(*this);
254 41474 : if (one_byte) {
255 41474 : if (show_details)
256 39057 : accumulator->Add("<String[%s%u]: ", internalized_marker, length());
257 705740 : for (int i = 0; i < len; i++) {
258 332133 : accumulator->Put(static_cast<char>(stream.GetNext()));
259 : }
260 41474 : if (show_details) accumulator->Put('>');
261 : } else {
262 : // Backslash indicates that the string contains control
263 : // characters and that backslashes are therefore escaped.
264 0 : if (show_details)
265 0 : accumulator->Add("<String[%s%u]\\: ", internalized_marker, length());
266 0 : for (int i = 0; i < len; i++) {
267 0 : uint16_t c = stream.GetNext();
268 0 : if (c == '\n') {
269 0 : accumulator->Add("\\n");
270 0 : } else if (c == '\r') {
271 0 : accumulator->Add("\\r");
272 0 : } else if (c == '\\') {
273 0 : accumulator->Add("\\\\");
274 0 : } else if (c < 32 || c > 126) {
275 0 : accumulator->Add("\\x%02x", c);
276 : } else {
277 0 : accumulator->Put(static_cast<char>(c));
278 : }
279 : }
280 0 : if (truncated) {
281 0 : accumulator->Put('.');
282 0 : accumulator->Put('.');
283 0 : accumulator->Put('.');
284 : }
285 0 : if (show_details) accumulator->Put('>');
286 : }
287 : return;
288 : }
289 :
290 40 : void String::PrintUC16(std::ostream& os, int start, int end) { // NOLINT
291 40 : if (end < 0) end = length();
292 : StringCharacterStream stream(*this, start);
293 10312 : for (int i = start; i < end && stream.HasMore(); i++) {
294 10272 : os << AsUC16(stream.GetNext());
295 : }
296 40 : }
297 :
298 : // static
299 36 : Handle<String> String::Trim(Isolate* isolate, Handle<String> string,
300 : TrimMode mode) {
301 36 : string = String::Flatten(isolate, string);
302 : int const length = string->length();
303 :
304 : // Perform left trimming if requested.
305 : int left = 0;
306 36 : if (mode == kTrim || mode == kTrimStart) {
307 2376 : while (left < length && IsWhiteSpaceOrLineTerminator(string->Get(left))) {
308 567 : left++;
309 : }
310 : }
311 :
312 : // Perform right trimming if requested.
313 : int right = length;
314 36 : if (mode == kTrim || mode == kTrimEnd) {
315 72 : while (right > left &&
316 72 : IsWhiteSpaceOrLineTerminator(string->Get(right - 1))) {
317 0 : right--;
318 : }
319 : }
320 :
321 36 : return isolate->factory()->NewSubString(string, left, right);
322 : }
323 :
324 3681646 : bool String::LooksValid() {
325 : // TODO(leszeks): Maybe remove this check entirely, Heap::Contains uses
326 : // basically the same logic as the way we access the heap in the first place.
327 : MemoryChunk* chunk = MemoryChunk::FromHeapObject(*this);
328 : // RO_SPACE objects should always be valid.
329 3681646 : if (chunk->owner()->identity() == RO_SPACE) return true;
330 1911299 : if (chunk->heap() == nullptr) return false;
331 1911299 : return chunk->heap()->Contains(*this);
332 : }
333 :
334 : namespace {
335 :
336 : bool AreDigits(const uint8_t* s, int from, int to) {
337 96539 : for (int i = from; i < to; i++) {
338 60231 : if (s[i] < '0' || s[i] > '9') return false;
339 : }
340 :
341 : return true;
342 : }
343 :
344 : int ParseDecimalInteger(const uint8_t* s, int from, int to) {
345 : DCHECK_LT(to - from, 10); // Overflow is not possible.
346 : DCHECK(from < to);
347 1539 : int d = s[from] - '0';
348 :
349 1888 : for (int i = from + 1; i < to; i++) {
350 349 : d = 10 * d + (s[i] - '0');
351 : }
352 :
353 : return d;
354 : }
355 :
356 : } // namespace
357 :
358 : // static
359 3766518 : Handle<Object> String::ToNumber(Isolate* isolate, Handle<String> subject) {
360 : // Flatten {subject} string first.
361 3766518 : subject = String::Flatten(isolate, subject);
362 :
363 : // Fast array index case.
364 : uint32_t index;
365 3766518 : if (subject->AsArrayIndex(&index)) {
366 2167 : return isolate->factory()->NewNumberFromUint(index);
367 : }
368 :
369 : // Fast case: short integer or some sorts of junk values.
370 3764351 : if (subject->IsSeqOneByteString()) {
371 : int len = subject->length();
372 2582624 : if (len == 0) return handle(Smi::kZero, isolate);
373 :
374 : DisallowHeapAllocation no_gc;
375 : uint8_t const* data =
376 : Handle<SeqOneByteString>::cast(subject)->GetChars(no_gc);
377 2562276 : bool minus = (data[0] == '-');
378 2562276 : int start_pos = (minus ? 1 : 0);
379 :
380 2562276 : if (start_pos == len) {
381 0 : return isolate->factory()->nan_value();
382 2562276 : } else if (data[start_pos] > '9') {
383 : // Fast check for a junk value. A valid string may start from a
384 : // whitespace, a sign ('+' or '-'), the decimal point, a decimal digit
385 : // or the 'I' character ('Infinity'). All of that have codes not greater
386 : // than '9' except 'I' and .
387 2412333 : if (data[start_pos] != 'I' && data[start_pos] != 0xA0) {
388 2405408 : return isolate->factory()->nan_value();
389 : }
390 176944 : } else if (len - start_pos < 10 && AreDigits(data, start_pos, len)) {
391 : // The maximal/minimal smi has 10 digits. If the string has less digits
392 : // we know it will fit into the smi-data type.
393 : int d = ParseDecimalInteger(data, start_pos, len);
394 1539 : if (minus) {
395 1745 : if (d == 0) return isolate->factory()->minus_zero_value();
396 1225 : d = -d;
397 108 : } else if (!subject->HasHashCode() && len <= String::kMaxArrayIndexSize &&
398 0 : (len == 1 || data[0] != '0')) {
399 : // String hash is not calculated yet but all the data are present.
400 : // Update the hash field to speed up sequential convertions.
401 0 : uint32_t hash = StringHasher::MakeArrayIndexHash(d, len);
402 : #ifdef DEBUG
403 : subject->Hash(); // Force hash calculation.
404 : DCHECK_EQ(static_cast<int>(subject->hash_field()),
405 : static_cast<int>(hash));
406 : #endif
407 : subject->set_hash_field(hash);
408 : }
409 1279 : return handle(Smi::FromInt(d), isolate);
410 : }
411 : }
412 :
413 : // Slower case.
414 : int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY;
415 1337056 : return isolate->factory()->NewNumber(StringToDouble(isolate, subject, flags));
416 : }
417 :
418 173391468 : String::FlatContent String::GetFlatContent(
419 : const DisallowHeapAllocation& no_gc) {
420 : USE(no_gc);
421 : int length = this->length();
422 173391468 : StringShape shape(*this);
423 173391379 : String string = *this;
424 : int offset = 0;
425 173391379 : if (shape.representation_tag() == kConsStringTag) {
426 : ConsString cons = ConsString::cast(string);
427 0 : if (cons->second()->length() != 0) {
428 0 : return FlatContent();
429 : }
430 : string = cons->first();
431 0 : shape = StringShape(string);
432 173391379 : } else if (shape.representation_tag() == kSlicedStringTag) {
433 : SlicedString slice = SlicedString::cast(string);
434 : offset = slice->offset();
435 : string = slice->parent();
436 1106647 : shape = StringShape(string);
437 : DCHECK(shape.representation_tag() != kConsStringTag &&
438 : shape.representation_tag() != kSlicedStringTag);
439 : }
440 173391379 : if (shape.representation_tag() == kThinStringTag) {
441 : ThinString thin = ThinString::cast(string);
442 : string = thin->actual();
443 6 : shape = StringShape(string);
444 : DCHECK(!shape.IsCons());
445 : DCHECK(!shape.IsSliced());
446 : }
447 173391379 : if (shape.encoding_tag() == kOneByteStringTag) {
448 : const uint8_t* start;
449 165428497 : if (shape.representation_tag() == kSeqStringTag) {
450 : start = SeqOneByteString::cast(string)->GetChars(no_gc);
451 : } else {
452 : start = ExternalOneByteString::cast(string)->GetChars();
453 : }
454 165428505 : return FlatContent(start + offset, length);
455 : } else {
456 : DCHECK_EQ(shape.encoding_tag(), kTwoByteStringTag);
457 : const uc16* start;
458 7962882 : if (shape.representation_tag() == kSeqStringTag) {
459 : start = SeqTwoByteString::cast(string)->GetChars(no_gc);
460 : } else {
461 : start = ExternalTwoByteString::cast(string)->GetChars();
462 : }
463 7962882 : return FlatContent(start + offset, length);
464 : }
465 : }
466 :
467 5210424 : std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
468 : RobustnessFlag robust_flag,
469 : int offset, int length,
470 : int* length_return) {
471 5210424 : if (robust_flag == ROBUST_STRING_TRAVERSAL && !LooksValid()) {
472 : return std::unique_ptr<char[]>();
473 : }
474 : // Negative length means the to the end of the string.
475 5210424 : if (length < 0) length = kMaxInt - offset;
476 :
477 : // Compute the size of the UTF-8 string. Start at the specified offset.
478 : StringCharacterStream stream(*this, offset);
479 : int character_position = offset;
480 : int utf8_bytes = 0;
481 : int last = unibrow::Utf16::kNoPreviousCharacter;
482 63012032 : while (stream.HasMore() && character_position++ < offset + length) {
483 28900800 : uint16_t character = stream.GetNext();
484 57801600 : utf8_bytes += unibrow::Utf8::Length(character, last);
485 28900800 : last = character;
486 : }
487 :
488 5210436 : if (length_return) {
489 3606262 : *length_return = utf8_bytes;
490 : }
491 :
492 5210436 : char* result = NewArray<char>(utf8_bytes + 1);
493 :
494 : // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset.
495 5210435 : stream.Reset(*this, offset);
496 : character_position = offset;
497 : int utf8_byte_position = 0;
498 : last = unibrow::Utf16::kNoPreviousCharacter;
499 63012041 : while (stream.HasMore() && character_position++ < offset + length) {
500 28900806 : uint16_t character = stream.GetNext();
501 28900806 : if (allow_nulls == DISALLOW_NULLS && character == 0) {
502 : character = ' ';
503 : }
504 : utf8_byte_position +=
505 28900806 : unibrow::Utf8::Encode(result + utf8_byte_position, character, last);
506 28900803 : last = character;
507 : }
508 5210434 : result[utf8_byte_position] = 0;
509 : return std::unique_ptr<char[]>(result);
510 : }
511 :
512 1604774 : std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
513 : RobustnessFlag robust_flag,
514 : int* length_return) {
515 1604774 : return ToCString(allow_nulls, robust_flag, 0, -1, length_return);
516 : }
517 :
518 : template <typename sinkchar>
519 105344204 : void String::WriteToFlat(String src, sinkchar* sink, int f, int t) {
520 : DisallowHeapAllocation no_gc;
521 105344204 : String source = src;
522 : int from = f;
523 : int to = t;
524 : while (true) {
525 : DCHECK(0 <= from && from <= to && to <= source->length());
526 339317758 : switch (StringShape(source).full_representation_tag()) {
527 : case kOneByteStringTag | kExternalStringTag: {
528 852858 : CopyChars(sink, ExternalOneByteString::cast(source)->GetChars() + from,
529 : to - from);
530 : return;
531 : }
532 : case kTwoByteStringTag | kExternalStringTag: {
533 : const uc16* data = ExternalTwoByteString::cast(source)->GetChars();
534 279778 : CopyChars(sink, data + from, to - from);
535 : return;
536 : }
537 : case kOneByteStringTag | kSeqStringTag: {
538 156816870 : CopyChars(sink, SeqOneByteString::cast(source)->GetChars(no_gc) + from,
539 : to - from);
540 : return;
541 : }
542 : case kTwoByteStringTag | kSeqStringTag: {
543 27943174 : CopyChars(sink, SeqTwoByteString::cast(source)->GetChars(no_gc) + from,
544 : to - from);
545 : return;
546 : }
547 : case kOneByteStringTag | kConsStringTag:
548 : case kTwoByteStringTag | kConsStringTag: {
549 : ConsString cons_string = ConsString::cast(source);
550 : String first = cons_string->first();
551 : int boundary = first->length();
552 74923572 : if (to - boundary >= boundary - from) {
553 : // Right hand side is longer. Recurse over left.
554 18969176 : if (from < boundary) {
555 18969176 : WriteToFlat(first, sink, from, boundary);
556 37938350 : if (from == 0 && cons_string->second() == first) {
557 10623357 : CopyChars(sink + boundary, sink, boundary);
558 : return;
559 : }
560 8345818 : sink += boundary - from;
561 : from = 0;
562 : } else {
563 0 : from -= boundary;
564 : }
565 : to -= boundary;
566 : source = cons_string->second();
567 : } else {
568 : // Left hand side is longer. Recurse over right.
569 55954396 : if (to > boundary) {
570 : String second = cons_string->second();
571 : // When repeatedly appending to a string, we get a cons string that
572 : // is unbalanced to the left, a list, essentially. We inline the
573 : // common case of sequential one-byte right child.
574 55579463 : if (to - boundary == 1) {
575 52933444 : sink[boundary - from] = static_cast<sinkchar>(second->Get(0));
576 29112742 : } else if (second->IsSeqOneByteString()) {
577 48459682 : CopyChars(sink + boundary - from,
578 : SeqOneByteString::cast(second)->GetChars(no_gc),
579 : to - boundary);
580 : } else {
581 4882901 : WriteToFlat(second, sink + boundary - from, 0, to - boundary);
582 : }
583 : to = boundary;
584 : }
585 : source = first;
586 : }
587 : break;
588 : }
589 : case kOneByteStringTag | kSlicedStringTag:
590 : case kTwoByteStringTag | kSlicedStringTag: {
591 : SlicedString slice = SlicedString::cast(source);
592 1634624 : unsigned offset = slice->offset();
593 3269248 : WriteToFlat(slice->parent(), sink, from + offset, to + offset);
594 : return;
595 : }
596 : case kOneByteStringTag | kThinStringTag:
597 : case kTwoByteStringTag | kThinStringTag:
598 : source = ThinString::cast(source)->actual();
599 14463 : break;
600 : }
601 : }
602 : }
603 :
604 : template <typename SourceChar>
605 49217 : static void CalculateLineEndsImpl(Isolate* isolate, std::vector<int>* line_ends,
606 : Vector<const SourceChar> src,
607 : bool include_ending_line) {
608 49217 : const int src_len = src.length();
609 169595313 : for (int i = 0; i < src_len - 1; i++) {
610 339092192 : SourceChar current = src[i];
611 339092192 : SourceChar next = src[i + 1];
612 169546096 : if (IsLineTerminatorSequence(current, next)) line_ends->push_back(i);
613 : }
614 :
615 147083 : if (src_len > 0 && IsLineTerminatorSequence(src[src_len - 1], 0)) {
616 35662 : line_ends->push_back(src_len - 1);
617 : }
618 49217 : if (include_ending_line) {
619 : // Include one character beyond the end of script. The rewriter uses that
620 : // position for the implicit return statement.
621 46785 : line_ends->push_back(src_len);
622 : }
623 49217 : }
624 :
625 49217 : Handle<FixedArray> String::CalculateLineEnds(Isolate* isolate,
626 : Handle<String> src,
627 : bool include_ending_line) {
628 49217 : src = Flatten(isolate, src);
629 : // Rough estimate of line count based on a roughly estimated average
630 : // length of (unpacked) code.
631 49217 : int line_count_estimate = src->length() >> 4;
632 : std::vector<int> line_ends;
633 49217 : line_ends.reserve(line_count_estimate);
634 : {
635 : DisallowHeapAllocation no_allocation; // ensure vectors stay valid.
636 : // Dispatch on type of strings.
637 49217 : String::FlatContent content = src->GetFlatContent(no_allocation);
638 : DCHECK(content.IsFlat());
639 49217 : if (content.IsOneByte()) {
640 49041 : CalculateLineEndsImpl(isolate, &line_ends, content.ToOneByteVector(),
641 49041 : include_ending_line);
642 : } else {
643 176 : CalculateLineEndsImpl(isolate, &line_ends, content.ToUC16Vector(),
644 176 : include_ending_line);
645 : }
646 : }
647 49217 : int line_count = static_cast<int>(line_ends.size());
648 49217 : Handle<FixedArray> array = isolate->factory()->NewFixedArray(line_count);
649 19665227 : for (int i = 0; i < line_count; i++) {
650 19616010 : array->set(i, Smi::FromInt(line_ends[i]));
651 : }
652 98434 : return array;
653 : }
654 :
655 17491571 : bool String::SlowEquals(String other) {
656 : DisallowHeapAllocation no_gc;
657 : // Fast check: negative check with lengths.
658 : int len = length();
659 17491571 : if (len != other->length()) return false;
660 9623666 : if (len == 0) return true;
661 :
662 : // Fast check: if at least one ThinString is involved, dereference it/them
663 : // and restart.
664 19185885 : if (this->IsThinString() || other->IsThinString()) {
665 61446 : if (other->IsThinString()) other = ThinString::cast(other)->actual();
666 61443 : if (this->IsThinString()) {
667 61440 : return ThinString::cast(*this)->actual()->Equals(other);
668 : } else {
669 3 : return this->Equals(other);
670 : }
671 : }
672 :
673 : // Fast check: if hash code is computed for both strings
674 : // a fast negative check can be performed.
675 19124003 : if (HasHashCode() && other->HasHashCode()) {
676 : #ifdef ENABLE_SLOW_DCHECKS
677 : if (FLAG_enable_slow_asserts) {
678 : if (Hash() != other->Hash()) {
679 : bool found_difference = false;
680 : for (int i = 0; i < len; i++) {
681 : if (Get(i) != other->Get(i)) {
682 : found_difference = true;
683 : break;
684 : }
685 : }
686 : DCHECK(found_difference);
687 : }
688 : }
689 : #endif
690 9544058 : if (Hash() != other->Hash()) return false;
691 : }
692 :
693 : // We know the strings are both non-empty. Compare the first chars
694 : // before we try to flatten the strings.
695 7292653 : if (this->Get(0) != other->Get(0)) return false;
696 :
697 14510554 : if (IsSeqOneByteString() && other->IsSeqOneByteString()) {
698 : const uint8_t* str1 = SeqOneByteString::cast(*this)->GetChars(no_gc);
699 : const uint8_t* str2 = SeqOneByteString::cast(other)->GetChars(no_gc);
700 7224352 : return CompareRawStringContents(str1, str2, len);
701 : }
702 :
703 : StringComparator comparator;
704 61795 : return comparator.Equals(*this, other);
705 : }
706 :
707 742345 : bool String::SlowEquals(Isolate* isolate, Handle<String> one,
708 : Handle<String> two) {
709 : // Fast check: negative check with lengths.
710 : int one_length = one->length();
711 742345 : if (one_length != two->length()) return false;
712 733823 : if (one_length == 0) return true;
713 :
714 : // Fast check: if at least one ThinString is involved, dereference it/them
715 : // and restart.
716 1467646 : if (one->IsThinString() || two->IsThinString()) {
717 23 : if (one->IsThinString())
718 : one = handle(ThinString::cast(*one)->actual(), isolate);
719 23 : if (two->IsThinString())
720 : two = handle(ThinString::cast(*two)->actual(), isolate);
721 23 : return String::Equals(isolate, one, two);
722 : }
723 :
724 : // Fast check: if hash code is computed for both strings
725 : // a fast negative check can be performed.
726 744969 : if (one->HasHashCode() && two->HasHashCode()) {
727 : #ifdef ENABLE_SLOW_DCHECKS
728 : if (FLAG_enable_slow_asserts) {
729 : if (one->Hash() != two->Hash()) {
730 : bool found_difference = false;
731 : for (int i = 0; i < one_length; i++) {
732 : if (one->Get(i) != two->Get(i)) {
733 : found_difference = true;
734 : break;
735 : }
736 : }
737 : DCHECK(found_difference);
738 : }
739 : }
740 : #endif
741 12922 : if (one->Hash() != two->Hash()) return false;
742 : }
743 :
744 : // We know the strings are both non-empty. Compare the first chars
745 : // before we try to flatten the strings.
746 733790 : if (one->Get(0) != two->Get(0)) return false;
747 :
748 733643 : one = String::Flatten(isolate, one);
749 733643 : two = String::Flatten(isolate, two);
750 :
751 : DisallowHeapAllocation no_gc;
752 733643 : String::FlatContent flat1 = one->GetFlatContent(no_gc);
753 733643 : String::FlatContent flat2 = two->GetFlatContent(no_gc);
754 :
755 733643 : if (flat1.IsOneByte() && flat2.IsOneByte()) {
756 : return CompareRawStringContents(flat1.ToOneByteVector().start(),
757 : flat2.ToOneByteVector().start(),
758 : one_length);
759 : } else {
760 15344182 : for (int i = 0; i < one_length; i++) {
761 7643076 : if (flat1.Get(i) != flat2.Get(i)) return false;
762 : }
763 : return true;
764 : }
765 : }
766 :
767 : // static
768 4791004 : ComparisonResult String::Compare(Isolate* isolate, Handle<String> x,
769 : Handle<String> y) {
770 : // A few fast case tests before we flatten.
771 4791004 : if (x.is_identical_to(y)) {
772 : return ComparisonResult::kEqual;
773 4791004 : } else if (y->length() == 0) {
774 : return x->length() == 0 ? ComparisonResult::kEqual
775 0 : : ComparisonResult::kGreaterThan;
776 4791004 : } else if (x->length() == 0) {
777 : return ComparisonResult::kLessThan;
778 : }
779 :
780 9582008 : int const d = x->Get(0) - y->Get(0);
781 4791004 : if (d < 0) {
782 : return ComparisonResult::kLessThan;
783 3871570 : } else if (d > 0) {
784 : return ComparisonResult::kGreaterThan;
785 : }
786 :
787 : // Slow case.
788 372363 : x = String::Flatten(isolate, x);
789 372363 : y = String::Flatten(isolate, y);
790 :
791 : DisallowHeapAllocation no_gc;
792 : ComparisonResult result = ComparisonResult::kEqual;
793 : int prefix_length = x->length();
794 372363 : if (y->length() < prefix_length) {
795 : prefix_length = y->length();
796 : result = ComparisonResult::kGreaterThan;
797 330602 : } else if (y->length() > prefix_length) {
798 : result = ComparisonResult::kLessThan;
799 : }
800 : int r;
801 372363 : String::FlatContent x_content = x->GetFlatContent(no_gc);
802 372363 : String::FlatContent y_content = y->GetFlatContent(no_gc);
803 372363 : if (x_content.IsOneByte()) {
804 : Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
805 367764 : if (y_content.IsOneByte()) {
806 : Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
807 367746 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
808 : } else {
809 : Vector<const uc16> y_chars = y_content.ToUC16Vector();
810 18 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
811 : }
812 : } else {
813 : Vector<const uc16> x_chars = x_content.ToUC16Vector();
814 4599 : if (y_content.IsOneByte()) {
815 : Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
816 18 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
817 : } else {
818 : Vector<const uc16> y_chars = y_content.ToUC16Vector();
819 4581 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
820 : }
821 : }
822 372363 : if (r < 0) {
823 : result = ComparisonResult::kLessThan;
824 156236 : } else if (r > 0) {
825 : result = ComparisonResult::kGreaterThan;
826 : }
827 : return result;
828 : }
829 :
830 807 : Object String::IndexOf(Isolate* isolate, Handle<Object> receiver,
831 : Handle<Object> search, Handle<Object> position) {
832 807 : if (receiver->IsNullOrUndefined(isolate)) {
833 540 : THROW_NEW_ERROR_RETURN_FAILURE(
834 : isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
835 : isolate->factory()->NewStringFromAsciiChecked(
836 : "String.prototype.indexOf")));
837 : }
838 : Handle<String> receiver_string;
839 1254 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
840 : Object::ToString(isolate, receiver));
841 :
842 : Handle<String> search_string;
843 1254 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
844 : Object::ToString(isolate, search));
845 :
846 636 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
847 : Object::ToInteger(isolate, position));
848 :
849 618 : uint32_t index = receiver_string->ToValidIndex(*position);
850 : return Smi::FromInt(
851 1236 : String::IndexOf(isolate, receiver_string, search_string, index));
852 : }
853 :
854 : namespace {
855 :
856 : template <typename T>
857 1051897 : int SearchString(Isolate* isolate, String::FlatContent receiver_content,
858 : Vector<T> pat_vector, int start_index) {
859 1051897 : if (receiver_content.IsOneByte()) {
860 : return SearchString(isolate, receiver_content.ToOneByteVector(), pat_vector,
861 1047289 : start_index);
862 : }
863 : return SearchString(isolate, receiver_content.ToUC16Vector(), pat_vector,
864 4608 : start_index);
865 : }
866 :
867 : } // namespace
868 :
869 1054991 : int String::IndexOf(Isolate* isolate, Handle<String> receiver,
870 : Handle<String> search, int start_index) {
871 : DCHECK_LE(0, start_index);
872 : DCHECK(start_index <= receiver->length());
873 :
874 1054991 : uint32_t search_length = search->length();
875 1054991 : if (search_length == 0) return start_index;
876 :
877 1054946 : uint32_t receiver_length = receiver->length();
878 1054946 : if (start_index + search_length > receiver_length) return -1;
879 :
880 1051897 : receiver = String::Flatten(isolate, receiver);
881 1051897 : search = String::Flatten(isolate, search);
882 :
883 : DisallowHeapAllocation no_gc; // ensure vectors stay valid
884 : // Extract flattened substrings of cons strings before getting encoding.
885 1051897 : String::FlatContent receiver_content = receiver->GetFlatContent(no_gc);
886 1051897 : String::FlatContent search_content = search->GetFlatContent(no_gc);
887 :
888 : // dispatch on type of strings
889 1051897 : if (search_content.IsOneByte()) {
890 1050142 : Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
891 : return SearchString<const uint8_t>(isolate, receiver_content, pat_vector,
892 1050142 : start_index);
893 : }
894 1755 : Vector<const uc16> pat_vector = search_content.ToUC16Vector();
895 : return SearchString<const uc16>(isolate, receiver_content, pat_vector,
896 1755 : start_index);
897 : }
898 :
899 3894 : MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
900 : Handle<String> replacement,
901 : int start_index) {
902 : DCHECK_GE(start_index, 0);
903 :
904 : Factory* factory = isolate->factory();
905 :
906 : const int replacement_length = replacement->length();
907 3894 : const int captures_length = match->CaptureCount();
908 :
909 3894 : replacement = String::Flatten(isolate, replacement);
910 :
911 : Handle<String> dollar_string =
912 3894 : factory->LookupSingleCharacterStringFromCode('$');
913 : int next_dollar_ix =
914 3894 : String::IndexOf(isolate, replacement, dollar_string, start_index);
915 3894 : if (next_dollar_ix < 0) {
916 153 : return replacement;
917 : }
918 :
919 3741 : IncrementalStringBuilder builder(isolate);
920 :
921 3741 : if (next_dollar_ix > 0) {
922 279 : builder.AppendString(factory->NewSubString(replacement, 0, next_dollar_ix));
923 : }
924 :
925 : while (true) {
926 7962 : const int peek_ix = next_dollar_ix + 1;
927 7962 : if (peek_ix >= replacement_length) {
928 : builder.AppendCharacter('$');
929 18 : return builder.Finish();
930 : }
931 :
932 : int continue_from_ix = -1;
933 : const uint16_t peek = replacement->Get(peek_ix);
934 7944 : switch (peek) {
935 : case '$': // $$
936 : builder.AppendCharacter('$');
937 54 : continue_from_ix = peek_ix + 1;
938 54 : break;
939 : case '&': // $& - match
940 54 : builder.AppendString(match->GetMatch());
941 54 : continue_from_ix = peek_ix + 1;
942 54 : break;
943 : case '`': // $` - prefix
944 36 : builder.AppendString(match->GetPrefix());
945 36 : continue_from_ix = peek_ix + 1;
946 36 : break;
947 : case '\'': // $' - suffix
948 36 : builder.AppendString(match->GetSuffix());
949 36 : continue_from_ix = peek_ix + 1;
950 36 : break;
951 : case '0':
952 : case '1':
953 : case '2':
954 : case '3':
955 : case '4':
956 : case '5':
957 : case '6':
958 : case '7':
959 : case '8':
960 : case '9': {
961 : // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
962 7278 : int scaled_index = (peek - '0');
963 : int advance = 1;
964 :
965 7278 : if (peek_ix + 1 < replacement_length) {
966 : const uint16_t next_peek = replacement->Get(peek_ix + 1);
967 6054 : if (next_peek >= '0' && next_peek <= '9') {
968 1341 : const int new_scaled_index = scaled_index * 10 + (next_peek - '0');
969 1341 : if (new_scaled_index < captures_length) {
970 : scaled_index = new_scaled_index;
971 : advance = 2;
972 : }
973 : }
974 : }
975 :
976 7278 : if (scaled_index == 0 || scaled_index >= captures_length) {
977 : builder.AppendCharacter('$');
978 : continue_from_ix = peek_ix;
979 7278 : break;
980 : }
981 :
982 : bool capture_exists;
983 : Handle<String> capture;
984 14376 : ASSIGN_RETURN_ON_EXCEPTION(
985 : isolate, capture, match->GetCapture(scaled_index, &capture_exists),
986 : String);
987 7188 : if (capture_exists) builder.AppendString(capture);
988 7188 : continue_from_ix = peek_ix + advance;
989 7188 : break;
990 : }
991 : case '<': { // $<name> - named capture
992 : typedef String::Match::CaptureState CaptureState;
993 :
994 459 : if (!match->HasNamedCaptures()) {
995 : builder.AppendCharacter('$');
996 : continue_from_ix = peek_ix;
997 459 : break;
998 : }
999 :
1000 : Handle<String> bracket_string =
1001 378 : factory->LookupSingleCharacterStringFromCode('>');
1002 : const int closing_bracket_ix =
1003 378 : String::IndexOf(isolate, replacement, bracket_string, peek_ix + 1);
1004 :
1005 378 : if (closing_bracket_ix == -1) {
1006 : // No closing bracket was found, treat '$<' as a string literal.
1007 : builder.AppendCharacter('$');
1008 : continue_from_ix = peek_ix;
1009 : break;
1010 : }
1011 :
1012 : Handle<String> capture_name =
1013 306 : factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
1014 : Handle<String> capture;
1015 : CaptureState capture_state;
1016 612 : ASSIGN_RETURN_ON_EXCEPTION(
1017 : isolate, capture,
1018 : match->GetNamedCapture(capture_name, &capture_state), String);
1019 :
1020 306 : switch (capture_state) {
1021 : case CaptureState::INVALID:
1022 : case CaptureState::UNMATCHED:
1023 : break;
1024 : case CaptureState::MATCHED:
1025 126 : builder.AppendString(capture);
1026 126 : break;
1027 : }
1028 :
1029 306 : continue_from_ix = closing_bracket_ix + 1;
1030 306 : break;
1031 : }
1032 : default:
1033 : builder.AppendCharacter('$');
1034 : continue_from_ix = peek_ix;
1035 : break;
1036 : }
1037 :
1038 : // Go the the next $ in the replacement.
1039 : // TODO(jgruber): Single-char lookups could be much more efficient.
1040 : DCHECK_NE(continue_from_ix, -1);
1041 : next_dollar_ix =
1042 7944 : String::IndexOf(isolate, replacement, dollar_string, continue_from_ix);
1043 :
1044 : // Return if there are no more $ characters in the replacement. If we
1045 : // haven't reached the end, we need to append the suffix.
1046 7944 : if (next_dollar_ix < 0) {
1047 3723 : if (continue_from_ix < replacement_length) {
1048 1311 : builder.AppendString(factory->NewSubString(
1049 1311 : replacement, continue_from_ix, replacement_length));
1050 : }
1051 3723 : return builder.Finish();
1052 : }
1053 :
1054 : // Append substring between the previous and the next $ character.
1055 4221 : if (next_dollar_ix > continue_from_ix) {
1056 72 : builder.AppendString(
1057 72 : factory->NewSubString(replacement, continue_from_ix, next_dollar_ix));
1058 : }
1059 : }
1060 :
1061 : UNREACHABLE();
1062 : }
1063 :
1064 : namespace { // for String.Prototype.lastIndexOf
1065 :
1066 : template <typename schar, typename pchar>
1067 6 : int StringMatchBackwards(Vector<const schar> subject,
1068 : Vector<const pchar> pattern, int idx) {
1069 : int pattern_length = pattern.length();
1070 : DCHECK_GE(pattern_length, 1);
1071 : DCHECK(idx + pattern_length <= subject.length());
1072 :
1073 : if (sizeof(schar) == 1 && sizeof(pchar) > 1) {
1074 0 : for (int i = 0; i < pattern_length; i++) {
1075 0 : uc16 c = pattern[i];
1076 0 : if (c > String::kMaxOneByteCharCode) {
1077 : return -1;
1078 : }
1079 : }
1080 : }
1081 :
1082 1222 : pchar pattern_first_char = pattern[0];
1083 10470 : for (int i = idx; i >= 0; i--) {
1084 11422 : if (subject[i] != pattern_first_char) continue;
1085 : int j = 1;
1086 1778 : while (j < pattern_length) {
1087 2073 : if (pattern[j] != subject[i + j]) {
1088 : break;
1089 : }
1090 673 : j++;
1091 : }
1092 1105 : if (j == pattern_length) {
1093 : return i;
1094 : }
1095 : }
1096 : return -1;
1097 : }
1098 :
1099 : } // namespace
1100 :
1101 1636 : Object String::LastIndexOf(Isolate* isolate, Handle<Object> receiver,
1102 : Handle<Object> search, Handle<Object> position) {
1103 1636 : if (receiver->IsNullOrUndefined(isolate)) {
1104 486 : THROW_NEW_ERROR_RETURN_FAILURE(
1105 : isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
1106 : isolate->factory()->NewStringFromAsciiChecked(
1107 : "String.prototype.lastIndexOf")));
1108 : }
1109 : Handle<String> receiver_string;
1110 2948 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
1111 : Object::ToString(isolate, receiver));
1112 :
1113 : Handle<String> search_string;
1114 2948 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1115 : Object::ToString(isolate, search));
1116 :
1117 2948 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1118 : Object::ToNumber(isolate, position));
1119 :
1120 : uint32_t start_index;
1121 :
1122 1474 : if (position->IsNaN()) {
1123 997 : start_index = receiver_string->length();
1124 : } else {
1125 477 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1126 : Object::ToInteger(isolate, position));
1127 477 : start_index = receiver_string->ToValidIndex(*position);
1128 : }
1129 :
1130 1474 : uint32_t pattern_length = search_string->length();
1131 1474 : uint32_t receiver_length = receiver_string->length();
1132 :
1133 1474 : if (start_index + pattern_length > receiver_length) {
1134 1069 : start_index = receiver_length - pattern_length;
1135 : }
1136 :
1137 1474 : if (pattern_length == 0) {
1138 504 : return Smi::FromInt(start_index);
1139 : }
1140 :
1141 1222 : receiver_string = String::Flatten(isolate, receiver_string);
1142 1222 : search_string = String::Flatten(isolate, search_string);
1143 :
1144 : int last_index = -1;
1145 : DisallowHeapAllocation no_gc; // ensure vectors stay valid
1146 :
1147 1222 : String::FlatContent receiver_content = receiver_string->GetFlatContent(no_gc);
1148 1222 : String::FlatContent search_content = search_string->GetFlatContent(no_gc);
1149 :
1150 1222 : if (search_content.IsOneByte()) {
1151 : Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1152 1222 : if (receiver_content.IsOneByte()) {
1153 1216 : last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1154 : pat_vector, start_index);
1155 : } else {
1156 6 : last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1157 6 : pat_vector, start_index);
1158 : }
1159 : } else {
1160 0 : Vector<const uc16> pat_vector = search_content.ToUC16Vector();
1161 0 : if (receiver_content.IsOneByte()) {
1162 0 : last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1163 0 : pat_vector, start_index);
1164 : } else {
1165 0 : last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1166 0 : pat_vector, start_index);
1167 : }
1168 : }
1169 1222 : return Smi::FromInt(last_index);
1170 : }
1171 :
1172 18650791 : bool String::IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match) {
1173 : int slen = length();
1174 : // Can't check exact length equality, but we can check bounds.
1175 : int str_len = str.length();
1176 18650791 : if (!allow_prefix_match &&
1177 16740886 : (str_len < slen ||
1178 16740886 : str_len > slen * static_cast<int>(unibrow::Utf8::kMaxEncodedSize))) {
1179 : return false;
1180 : }
1181 :
1182 : int i = 0;
1183 : unibrow::Utf8Iterator it = unibrow::Utf8Iterator(str);
1184 190433269 : while (i < slen && !it.Done()) {
1185 181412687 : if (Get(i++) != *it) return false;
1186 86966098 : ++it;
1187 : }
1188 :
1189 12760829 : return (allow_prefix_match || i == slen) && it.Done();
1190 : }
1191 :
1192 : template <>
1193 135 : bool String::IsEqualTo(Vector<const uint8_t> str) {
1194 135 : return IsOneByteEqualTo(str);
1195 : }
1196 :
1197 : template <>
1198 0 : bool String::IsEqualTo(Vector<const uc16> str) {
1199 0 : return IsTwoByteEqualTo(str);
1200 : }
1201 :
1202 107958470 : bool String::IsOneByteEqualTo(Vector<const uint8_t> str) {
1203 : int slen = length();
1204 107958470 : if (str.length() != slen) return false;
1205 : DisallowHeapAllocation no_gc;
1206 82947637 : FlatContent content = GetFlatContent(no_gc);
1207 82947574 : if (content.IsOneByte()) {
1208 82947423 : return CompareChars(content.ToOneByteVector().start(), str.start(), slen) ==
1209 82947423 : 0;
1210 : }
1211 302 : return CompareChars(content.ToUC16Vector().start(), str.start(), slen) == 0;
1212 : }
1213 :
1214 77504 : bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
1215 : int slen = length();
1216 77504 : if (str.length() != slen) return false;
1217 : DisallowHeapAllocation no_gc;
1218 20628 : FlatContent content = GetFlatContent(no_gc);
1219 20628 : if (content.IsOneByte()) {
1220 4609 : return CompareChars(content.ToOneByteVector().start(), str.start(), slen) ==
1221 4609 : 0;
1222 : }
1223 32038 : return CompareChars(content.ToUC16Vector().start(), str.start(), slen) == 0;
1224 : }
1225 :
1226 79898725 : uint32_t String::ComputeAndSetHash() {
1227 : DisallowHeapAllocation no_gc;
1228 : // Should only be called if hash code has not yet been computed.
1229 : DCHECK(!HasHashCode());
1230 :
1231 : // Store the hash code in the object.
1232 : uint32_t field =
1233 79898725 : IteratingStringHasher::Hash(*this, HashSeed(GetReadOnlyRoots()));
1234 : set_hash_field(field);
1235 :
1236 : // Check the hash code is there.
1237 : DCHECK(HasHashCode());
1238 79898738 : uint32_t result = field >> kHashShift;
1239 : DCHECK_NE(result, 0); // Ensure that the hash value of 0 is never computed.
1240 79898738 : return result;
1241 : }
1242 :
1243 1474820 : bool String::ComputeArrayIndex(uint32_t* index) {
1244 : int length = this->length();
1245 1474820 : if (length == 0 || length > kMaxArrayIndexSize) return false;
1246 : StringCharacterStream stream(*this);
1247 949363 : return StringToArrayIndex(&stream, index);
1248 : }
1249 :
1250 7186417 : bool String::SlowAsArrayIndex(uint32_t* index) {
1251 : DisallowHeapAllocation no_gc;
1252 7186417 : if (length() <= kMaxCachedArrayIndexLength) {
1253 5711597 : Hash(); // force computation of hash code
1254 : uint32_t field = hash_field();
1255 5711601 : if ((field & kIsNotArrayIndexMask) != 0) return false;
1256 : // Isolate the array index form the full hash field.
1257 1046707 : *index = ArrayIndexValueBits::decode(field);
1258 1046707 : return true;
1259 : } else {
1260 1474820 : return ComputeArrayIndex(index);
1261 : }
1262 : }
1263 :
1264 41680 : void String::PrintOn(FILE* file) {
1265 : int length = this->length();
1266 6985420 : for (int i = 0; i < length; i++) {
1267 3471870 : PrintF(file, "%c", Get(i));
1268 : }
1269 41680 : }
1270 :
1271 18095607 : Handle<String> SeqString::Truncate(Handle<SeqString> string, int new_length) {
1272 18095607 : if (new_length == 0) return string->GetReadOnlyRoots().empty_string_handle();
1273 :
1274 : int new_size, old_size;
1275 : int old_length = string->length();
1276 13715988 : if (old_length <= new_length) return string;
1277 :
1278 12650330 : if (string->IsSeqOneByteString()) {
1279 : old_size = SeqOneByteString::SizeFor(old_length);
1280 : new_size = SeqOneByteString::SizeFor(new_length);
1281 : } else {
1282 : DCHECK(string->IsSeqTwoByteString());
1283 : old_size = SeqTwoByteString::SizeFor(old_length);
1284 : new_size = SeqTwoByteString::SizeFor(new_length);
1285 : }
1286 :
1287 12650330 : int delta = old_size - new_size;
1288 :
1289 : Address start_of_string = string->address();
1290 : DCHECK(IsAligned(start_of_string, kObjectAlignment));
1291 : DCHECK(IsAligned(start_of_string + new_size, kObjectAlignment));
1292 :
1293 : Heap* heap = Heap::FromWritableHeapObject(*string);
1294 : // Sizes are pointer size aligned, so that we can use filler objects
1295 : // that are a multiple of pointer size.
1296 : heap->CreateFillerObjectAt(start_of_string + new_size, delta,
1297 12650330 : ClearRecordedSlots::kNo);
1298 : // We are storing the new length using release store after creating a filler
1299 : // for the left-over space to avoid races with the sweeper thread.
1300 : string->synchronized_set_length(new_length);
1301 :
1302 12650331 : return string;
1303 : }
1304 :
1305 332095 : void SeqOneByteString::clear_padding() {
1306 332095 : int data_size = SeqString::kHeaderSize + length() * kOneByteSize;
1307 664190 : memset(reinterpret_cast<void*>(address() + data_size), 0,
1308 332095 : SizeFor(length()) - data_size);
1309 332095 : }
1310 :
1311 0 : void SeqTwoByteString::clear_padding() {
1312 0 : int data_size = SeqString::kHeaderSize + length() * kUC16Size;
1313 0 : memset(reinterpret_cast<void*>(address() + data_size), 0,
1314 0 : SizeFor(length()) - data_size);
1315 0 : }
1316 :
1317 273964 : uint16_t ConsString::ConsStringGet(int index) {
1318 : DCHECK(index >= 0 && index < this->length());
1319 :
1320 : // Check for a flattened cons string
1321 273964 : if (second()->length() == 0) {
1322 : String left = first();
1323 : return left->Get(index);
1324 : }
1325 :
1326 : String string = String::cast(*this);
1327 :
1328 : while (true) {
1329 2247532 : if (StringShape(string).IsCons()) {
1330 : ConsString cons_string = ConsString::cast(string);
1331 : String left = cons_string->first();
1332 927564 : if (left->length() > index) {
1333 : string = left;
1334 : } else {
1335 407450 : index -= left->length();
1336 : string = cons_string->second();
1337 : }
1338 : } else {
1339 196202 : return string->Get(index);
1340 : }
1341 : }
1342 :
1343 : UNREACHABLE();
1344 : }
1345 :
1346 4218 : uint16_t ThinString::ThinStringGet(int index) { return actual()->Get(index); }
1347 :
1348 1094429 : uint16_t SlicedString::SlicedStringGet(int index) {
1349 2188858 : return parent()->Get(offset() + index);
1350 : }
1351 :
1352 87433 : int ExternalString::ExternalPayloadSize() const {
1353 87433 : int length_multiplier = IsTwoByteRepresentation() ? i::kShortSize : kCharSize;
1354 87433 : return length() * length_multiplier;
1355 : }
1356 :
1357 1503180 : FlatStringReader::FlatStringReader(Isolate* isolate, Handle<String> str)
1358 4509540 : : Relocatable(isolate), str_(str.location()), length_(str->length()) {
1359 1503180 : PostGarbageCollection();
1360 1503180 : }
1361 :
1362 1345 : FlatStringReader::FlatStringReader(Isolate* isolate, Vector<const char> input)
1363 : : Relocatable(isolate),
1364 : str_(nullptr),
1365 : is_one_byte_(true),
1366 : length_(input.length()),
1367 4035 : start_(input.start()) {}
1368 :
1369 1503228 : void FlatStringReader::PostGarbageCollection() {
1370 1503228 : if (str_ == nullptr) return;
1371 : Handle<String> str(str_);
1372 : DCHECK(str->IsFlat());
1373 : DisallowHeapAllocation no_gc;
1374 : // This does not actually prevent the vector from being relocated later.
1375 1503228 : String::FlatContent content = str->GetFlatContent(no_gc);
1376 : DCHECK(content.IsFlat());
1377 1503228 : is_one_byte_ = content.IsOneByte();
1378 1503228 : if (is_one_byte_) {
1379 1365689 : start_ = content.ToOneByteVector().start();
1380 : } else {
1381 137539 : start_ = content.ToUC16Vector().start();
1382 : }
1383 : }
1384 :
1385 108627 : void ConsStringIterator::Initialize(ConsString cons_string, int offset) {
1386 : DCHECK(!cons_string.is_null());
1387 124635 : root_ = cons_string;
1388 124635 : consumed_ = offset;
1389 : // Force stack blown condition to trigger restart.
1390 124635 : depth_ = 1;
1391 124635 : maximum_depth_ = kStackSize + depth_;
1392 : DCHECK(StackBlown());
1393 108627 : }
1394 :
1395 60940358 : String ConsStringIterator::Continue(int* offset_out) {
1396 : DCHECK_NE(depth_, 0);
1397 : DCHECK_EQ(0, *offset_out);
1398 60940358 : bool blew_stack = StackBlown();
1399 : String string;
1400 : // Get the next leaf if there is one.
1401 60940358 : if (!blew_stack) string = NextLeaf(&blew_stack);
1402 : // Restart search from root.
1403 60940358 : if (blew_stack) {
1404 : DCHECK(string.is_null());
1405 149020 : string = Search(offset_out);
1406 : }
1407 : // Ensure future calls return null immediately.
1408 60940358 : if (string.is_null()) Reset(ConsString());
1409 60940358 : return string;
1410 : }
1411 :
1412 149020 : String ConsStringIterator::Search(int* offset_out) {
1413 149020 : ConsString cons_string = root_;
1414 : // Reset the stack, pushing the root string.
1415 149020 : depth_ = 1;
1416 149020 : maximum_depth_ = 1;
1417 149020 : frames_[0] = cons_string;
1418 149020 : const int consumed = consumed_;
1419 : int offset = 0;
1420 : while (true) {
1421 : // Loop until the string is found which contains the target offset.
1422 : String string = cons_string->first();
1423 : int length = string->length();
1424 : int32_t type;
1425 34893099 : if (consumed < offset + length) {
1426 : // Target offset is in the left branch.
1427 : // Keep going if we're still in a ConString.
1428 : type = string->map()->instance_type();
1429 33572064 : if ((type & kStringRepresentationMask) == kConsStringTag) {
1430 : cons_string = ConsString::cast(string);
1431 : PushLeft(cons_string);
1432 : continue;
1433 : }
1434 : // Tell the stack we're done descending.
1435 : AdjustMaximumDepth();
1436 : } else {
1437 : // Descend right.
1438 : // Update progress through the string.
1439 : offset += length;
1440 : // Keep going if we're still in a ConString.
1441 : string = cons_string->second();
1442 : type = string->map()->instance_type();
1443 1321035 : if ((type & kStringRepresentationMask) == kConsStringTag) {
1444 : cons_string = ConsString::cast(string);
1445 : PushRight(cons_string);
1446 : continue;
1447 : }
1448 : // Need this to be updated for the current string.
1449 : length = string->length();
1450 : // Account for the possibility of an empty right leaf.
1451 : // This happens only if we have asked for an offset outside the string.
1452 30090 : if (length == 0) {
1453 : // Reset so future operations will return null immediately.
1454 : Reset(ConsString());
1455 145 : return String();
1456 : }
1457 : // Tell the stack we're done descending.
1458 : AdjustMaximumDepth();
1459 : // Pop stack so next iteration is in correct place.
1460 : Pop();
1461 : }
1462 : DCHECK_NE(length, 0);
1463 : // Adjust return values and exit.
1464 148875 : consumed_ = offset + length;
1465 148875 : *offset_out = consumed - offset;
1466 148875 : return string;
1467 : }
1468 : UNREACHABLE();
1469 : }
1470 :
1471 60792033 : String ConsStringIterator::NextLeaf(bool* blew_stack) {
1472 : while (true) {
1473 : // Tree traversal complete.
1474 61210215 : if (depth_ == 0) {
1475 35377 : *blew_stack = false;
1476 35377 : return String();
1477 : }
1478 : // We've lost track of higher nodes.
1479 61174838 : if (StackBlown()) {
1480 695 : *blew_stack = true;
1481 695 : return String();
1482 : }
1483 : // Go right.
1484 122348286 : ConsString cons_string = frames_[OffsetForDepth(depth_ - 1)];
1485 : String string = cons_string->second();
1486 : int32_t type = string->map()->instance_type();
1487 61174143 : if ((type & kStringRepresentationMask) != kConsStringTag) {
1488 : // Pop stack so next iteration is in correct place.
1489 : Pop();
1490 : int length = string->length();
1491 : // Could be a flattened ConsString.
1492 30927981 : if (length == 0) continue;
1493 30509799 : consumed_ += length;
1494 30509799 : return string;
1495 : }
1496 : cons_string = ConsString::cast(string);
1497 : PushRight(cons_string);
1498 : // Need to traverse all the way left.
1499 : while (true) {
1500 : // Continue left.
1501 : string = cons_string->first();
1502 : type = string->map()->instance_type();
1503 60450624 : if ((type & kStringRepresentationMask) != kConsStringTag) {
1504 : AdjustMaximumDepth();
1505 : int length = string->length();
1506 30246162 : if (length == 0) break; // Skip empty left-hand sides of ConsStrings.
1507 30246162 : consumed_ += length;
1508 30246162 : return string;
1509 : }
1510 : cons_string = ConsString::cast(string);
1511 : PushLeft(cons_string);
1512 : }
1513 : }
1514 : UNREACHABLE();
1515 : }
1516 :
1517 : } // namespace internal
1518 120216 : } // namespace v8
|