Line data Source code
1 : // Copyright 2019 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/objects/string.h"
6 :
7 : #include "src/char-predicates.h"
8 : #include "src/conversions.h"
9 : #include "src/handles-inl.h"
10 : #include "src/heap/heap-inl.h" // For LooksValid implementation.
11 : #include "src/objects/map.h"
12 : #include "src/objects/oddball.h"
13 : #include "src/objects/string-comparator.h"
14 : #include "src/objects/string-inl.h"
15 : #include "src/ostreams.h"
16 : #include "src/string-builder-inl.h"
17 : #include "src/string-hasher.h"
18 : #include "src/string-search.h"
19 : #include "src/string-stream.h"
20 : #include "src/unicode-inl.h"
21 :
22 : namespace v8 {
23 : namespace internal {
24 :
25 5677811 : Handle<String> String::SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
26 : PretenureFlag pretenure) {
27 : DCHECK_NE(cons->second()->length(), 0);
28 :
29 : // TurboFan can create cons strings with empty first parts.
30 17033433 : while (cons->first()->length() == 0) {
31 : // We do not want to call this function recursively. Therefore we call
32 : // String::Flatten only in those cases where String::SlowFlatten is not
33 : // called again.
34 42 : if (cons->second()->IsConsString() && !cons->second()->IsFlat()) {
35 0 : cons = handle(ConsString::cast(cons->second()), isolate);
36 : } else {
37 42 : return String::Flatten(isolate, handle(cons->second(), isolate));
38 : }
39 : }
40 :
41 : DCHECK(AllowHeapAllocation::IsAllowed());
42 : int length = cons->length();
43 5677790 : PretenureFlag tenure = ObjectInYoungGeneration(*cons) ? pretenure : TENURED;
44 : Handle<SeqString> result;
45 5677790 : if (cons->IsOneByteRepresentation()) {
46 : Handle<SeqOneByteString> flat = isolate->factory()
47 : ->NewRawOneByteString(length, tenure)
48 11116840 : .ToHandleChecked();
49 : DisallowHeapAllocation no_gc;
50 5558420 : WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
51 : result = flat;
52 : } else {
53 : Handle<SeqTwoByteString> flat = isolate->factory()
54 : ->NewRawTwoByteString(length, tenure)
55 238740 : .ToHandleChecked();
56 : DisallowHeapAllocation no_gc;
57 119370 : WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
58 : result = flat;
59 : }
60 11355580 : cons->set_first(isolate, *result);
61 5677790 : cons->set_second(isolate, ReadOnlyRoots(isolate).empty_string());
62 : DCHECK(result->IsFlat());
63 5677790 : return result;
64 : }
65 :
66 306 : bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
67 : DisallowHeapAllocation no_allocation;
68 : // Externalizing twice leaks the external resource, so it's
69 : // prohibited by the API.
70 : DCHECK(this->SupportsExternalization());
71 : DCHECK(resource->IsCacheable());
72 : #ifdef ENABLE_SLOW_DCHECKS
73 : if (FLAG_enable_slow_asserts) {
74 : // Assert that the resource and the string are equivalent.
75 : DCHECK(static_cast<size_t>(this->length()) == resource->length());
76 : ScopedVector<uc16> smart_chars(this->length());
77 : String::WriteToFlat(*this, smart_chars.start(), 0, this->length());
78 : DCHECK_EQ(0, memcmp(smart_chars.start(), resource->data(),
79 : resource->length() * sizeof(smart_chars[0])));
80 : }
81 : #endif // DEBUG
82 306 : int size = this->Size(); // Byte size of the original string.
83 : // Abort if size does not allow in-place conversion.
84 306 : if (size < ExternalString::kUncachedSize) return false;
85 : Isolate* isolate;
86 : // Read-only strings cannot be made external, since that would mutate the
87 : // string.
88 306 : if (!GetIsolateFromWritableObject(*this, &isolate)) return false;
89 306 : Heap* heap = isolate->heap();
90 306 : bool is_one_byte = this->IsOneByteRepresentation();
91 : bool is_internalized = this->IsInternalizedString();
92 : bool has_pointers = StringShape(*this).IsIndirect();
93 306 : if (has_pointers) {
94 64 : heap->NotifyObjectLayoutChange(*this, size, no_allocation);
95 : }
96 : // Morph the string to an external string by replacing the map and
97 : // reinitializing the fields. This won't work if the space the existing
98 : // string occupies is too small for a regular external string. Instead, we
99 : // resort to an uncached external string instead, omitting the field caching
100 : // the address of the backing store. When we encounter uncached external
101 : // strings in generated code, we need to bailout to runtime.
102 : Map new_map;
103 : ReadOnlyRoots roots(heap);
104 306 : if (size < ExternalString::kSize) {
105 110 : if (is_internalized) {
106 20 : if (is_one_byte) {
107 : new_map =
108 : roots
109 15 : .uncached_external_internalized_string_with_one_byte_data_map();
110 : } else {
111 5 : new_map = roots.uncached_external_internalized_string_map();
112 : }
113 : } else {
114 : new_map = is_one_byte
115 : ? roots.uncached_external_string_with_one_byte_data_map()
116 180 : : roots.uncached_external_string_map();
117 : }
118 : } else {
119 : new_map =
120 : is_internalized
121 : ? (is_one_byte
122 : ? roots.external_internalized_string_with_one_byte_data_map()
123 : : roots.external_internalized_string_map())
124 : : (is_one_byte ? roots.external_string_with_one_byte_data_map()
125 392 : : roots.external_string_map());
126 : }
127 :
128 : // Byte size of the external String object.
129 306 : int new_size = this->SizeFromMap(new_map);
130 : heap->CreateFillerObjectAt(this->address() + new_size, size - new_size,
131 612 : ClearRecordedSlots::kNo);
132 306 : if (has_pointers) {
133 64 : heap->ClearRecordedSlotRange(this->address(), this->address() + new_size);
134 : }
135 :
136 : // We are storing the new map using release store after creating a filler for
137 : // the left-over space to avoid races with the sweeper thread.
138 306 : this->synchronized_set_map(new_map);
139 :
140 306 : ExternalTwoByteString self = ExternalTwoByteString::cast(*this);
141 306 : self->SetResource(isolate, resource);
142 : heap->RegisterExternalString(*this);
143 306 : if (is_internalized) self->Hash(); // Force regeneration of the hash value.
144 : return true;
145 : }
146 :
147 315 : bool String::MakeExternal(v8::String::ExternalOneByteStringResource* resource) {
148 : DisallowHeapAllocation no_allocation;
149 : // Externalizing twice leaks the external resource, so it's
150 : // prohibited by the API.
151 : DCHECK(this->SupportsExternalization());
152 : DCHECK(resource->IsCacheable());
153 : #ifdef ENABLE_SLOW_DCHECKS
154 : if (FLAG_enable_slow_asserts) {
155 : // Assert that the resource and the string are equivalent.
156 : DCHECK(static_cast<size_t>(this->length()) == resource->length());
157 : if (this->IsTwoByteRepresentation()) {
158 : ScopedVector<uint16_t> smart_chars(this->length());
159 : String::WriteToFlat(*this, smart_chars.start(), 0, this->length());
160 : DCHECK(String::IsOneByte(smart_chars.start(), this->length()));
161 : }
162 : ScopedVector<char> smart_chars(this->length());
163 : String::WriteToFlat(*this, smart_chars.start(), 0, this->length());
164 : DCHECK_EQ(0, memcmp(smart_chars.start(), resource->data(),
165 : resource->length() * sizeof(smart_chars[0])));
166 : }
167 : #endif // DEBUG
168 315 : int size = this->Size(); // Byte size of the original string.
169 : // Abort if size does not allow in-place conversion.
170 315 : if (size < ExternalString::kUncachedSize) return false;
171 : Isolate* isolate;
172 : // Read-only strings cannot be made external, since that would mutate the
173 : // string.
174 315 : if (!GetIsolateFromWritableObject(*this, &isolate)) return false;
175 315 : Heap* heap = isolate->heap();
176 : bool is_internalized = this->IsInternalizedString();
177 : bool has_pointers = StringShape(*this).IsIndirect();
178 :
179 315 : if (has_pointers) {
180 29 : heap->NotifyObjectLayoutChange(*this, size, no_allocation);
181 : }
182 :
183 : // Morph the string to an external string by replacing the map and
184 : // reinitializing the fields. This won't work if the space the existing
185 : // string occupies is too small for a regular external string. Instead, we
186 : // resort to an uncached external string instead, omitting the field caching
187 : // the address of the backing store. When we encounter uncached external
188 : // strings in generated code, we need to bailout to runtime.
189 : Map new_map;
190 : ReadOnlyRoots roots(heap);
191 315 : if (size < ExternalString::kSize) {
192 : new_map = is_internalized
193 : ? roots.uncached_external_one_byte_internalized_string_map()
194 38 : : roots.uncached_external_one_byte_string_map();
195 : } else {
196 : new_map = is_internalized
197 : ? roots.external_one_byte_internalized_string_map()
198 592 : : roots.external_one_byte_string_map();
199 : }
200 :
201 : // Byte size of the external String object.
202 315 : int new_size = this->SizeFromMap(new_map);
203 : heap->CreateFillerObjectAt(this->address() + new_size, size - new_size,
204 630 : ClearRecordedSlots::kNo);
205 315 : if (has_pointers) {
206 29 : heap->ClearRecordedSlotRange(this->address(), this->address() + new_size);
207 : }
208 :
209 : // We are storing the new map using release store after creating a filler for
210 : // the left-over space to avoid races with the sweeper thread.
211 315 : this->synchronized_set_map(new_map);
212 :
213 315 : ExternalOneByteString self = ExternalOneByteString::cast(*this);
214 315 : self->SetResource(isolate, resource);
215 : heap->RegisterExternalString(*this);
216 315 : if (is_internalized) self->Hash(); // Force regeneration of the hash value.
217 : return true;
218 : }
219 :
220 1504 : bool String::SupportsExternalization() {
221 3008 : if (this->IsThinString()) {
222 0 : return i::ThinString::cast(*this)->actual()->SupportsExternalization();
223 : }
224 :
225 : Isolate* isolate;
226 : // RO_SPACE strings cannot be externalized.
227 1504 : if (!GetIsolateFromWritableObject(*this, &isolate)) {
228 : return false;
229 : }
230 :
231 : // Already an external string.
232 1486 : if (StringShape(*this).IsExternal()) {
233 : return false;
234 : }
235 :
236 1138 : return !isolate->heap()->IsInGCPostProcessing();
237 : }
238 :
239 38149 : void String::StringShortPrint(StringStream* accumulator, bool show_details) {
240 76298 : const char* internalized_marker = this->IsInternalizedString() ? "#" : "";
241 :
242 : int len = length();
243 38149 : if (len > kMaxShortPrintLength) {
244 0 : accumulator->Add("<Very long string[%s%u]>", internalized_marker, len);
245 0 : return;
246 : }
247 :
248 38149 : if (!LooksValid()) {
249 0 : accumulator->Add("<Invalid String>");
250 0 : return;
251 : }
252 :
253 38149 : StringCharacterStream stream(*this);
254 :
255 : bool truncated = false;
256 38149 : if (len > kMaxShortPrintLength) {
257 : len = kMaxShortPrintLength;
258 : truncated = true;
259 : }
260 : bool one_byte = true;
261 338507 : for (int i = 0; i < len; i++) {
262 300358 : uint16_t c = stream.GetNext();
263 :
264 300358 : if (c < 32 || c >= 127) {
265 : one_byte = false;
266 : }
267 : }
268 38149 : stream.Reset(*this);
269 38149 : if (one_byte) {
270 38149 : if (show_details)
271 36082 : accumulator->Add("<String[%s%u]: ", internalized_marker, length());
272 300358 : for (int i = 0; i < len; i++) {
273 300358 : accumulator->Put(static_cast<char>(stream.GetNext()));
274 : }
275 38149 : if (show_details) accumulator->Put('>');
276 : } else {
277 : // Backslash indicates that the string contains control
278 : // characters and that backslashes are therefore escaped.
279 0 : if (show_details)
280 0 : accumulator->Add("<String[%s%u]\\: ", internalized_marker, length());
281 0 : for (int i = 0; i < len; i++) {
282 0 : uint16_t c = stream.GetNext();
283 0 : if (c == '\n') {
284 0 : accumulator->Add("\\n");
285 0 : } else if (c == '\r') {
286 0 : accumulator->Add("\\r");
287 0 : } else if (c == '\\') {
288 0 : accumulator->Add("\\\\");
289 0 : } else if (c < 32 || c > 126) {
290 0 : accumulator->Add("\\x%02x", c);
291 : } else {
292 0 : accumulator->Put(static_cast<char>(c));
293 : }
294 : }
295 0 : if (truncated) {
296 0 : accumulator->Put('.');
297 0 : accumulator->Put('.');
298 0 : accumulator->Put('.');
299 : }
300 0 : if (show_details) accumulator->Put('>');
301 : }
302 : return;
303 : }
304 :
305 40 : void String::PrintUC16(std::ostream& os, int start, int end) { // NOLINT
306 40 : if (end < 0) end = length();
307 40 : StringCharacterStream stream(*this, start);
308 5176 : for (int i = start; i < end && stream.HasMore(); i++) {
309 10272 : os << AsUC16(stream.GetNext());
310 : }
311 40 : }
312 :
313 : // static
314 36 : Handle<String> String::Trim(Isolate* isolate, Handle<String> string,
315 : TrimMode mode) {
316 36 : string = String::Flatten(isolate, string);
317 : int const length = string->length();
318 :
319 : // Perform left trimming if requested.
320 : int left = 0;
321 36 : if (mode == kTrim || mode == kTrimStart) {
322 2412 : while (left < length && IsWhiteSpaceOrLineTerminator(string->Get(left))) {
323 567 : left++;
324 : }
325 : }
326 :
327 : // Perform right trimming if requested.
328 : int right = length;
329 36 : if (mode == kTrim || mode == kTrimEnd) {
330 108 : while (right > left &&
331 180 : IsWhiteSpaceOrLineTerminator(string->Get(right - 1))) {
332 0 : right--;
333 : }
334 : }
335 :
336 36 : return isolate->factory()->NewSubString(string, left, right);
337 : }
338 :
339 3652854 : bool String::LooksValid() {
340 : // TODO(leszeks): Maybe remove this check entirely, Heap::Contains uses
341 : // basically the same logic as the way we access the heap in the first place.
342 1914721 : MemoryChunk* chunk = MemoryChunk::FromHeapObject(*this);
343 : // RO_SPACE objects should always be valid.
344 3652854 : if (chunk->owner()->identity() == RO_SPACE) return true;
345 1914721 : if (chunk->heap() == nullptr) return false;
346 1914721 : return chunk->heap()->Contains(*this);
347 : }
348 :
349 : namespace {
350 :
351 : bool AreDigits(const uint8_t* s, int from, int to) {
352 34769 : for (int i = from; i < to; i++) {
353 60228 : if (s[i] < '0' || s[i] > '9') return false;
354 : }
355 :
356 : return true;
357 : }
358 :
359 : int ParseDecimalInteger(const uint8_t* s, int from, int to) {
360 : DCHECK_LT(to - from, 10); // Overflow is not possible.
361 : DCHECK(from < to);
362 1539 : int d = s[from] - '0';
363 :
364 1888 : for (int i = from + 1; i < to; i++) {
365 349 : d = 10 * d + (s[i] - '0');
366 : }
367 :
368 : return d;
369 : }
370 :
371 : } // namespace
372 :
373 : // static
374 3770471 : Handle<Object> String::ToNumber(Isolate* isolate, Handle<String> subject) {
375 : // Flatten {subject} string first.
376 3770471 : subject = String::Flatten(isolate, subject);
377 :
378 : // Fast array index case.
379 : uint32_t index;
380 3770471 : if (subject->AsArrayIndex(&index)) {
381 2170 : return isolate->factory()->NewNumberFromUint(index);
382 : }
383 :
384 : // Fast case: short integer or some sorts of junk values.
385 7536602 : if (subject->IsSeqOneByteString()) {
386 : int len = subject->length();
387 2586574 : if (len == 0) return handle(Smi::kZero, isolate);
388 :
389 : DisallowHeapAllocation no_gc;
390 : uint8_t const* data =
391 5132452 : Handle<SeqOneByteString>::cast(subject)->GetChars(no_gc);
392 2566226 : bool minus = (data[0] == '-');
393 2566226 : int start_pos = (minus ? 1 : 0);
394 :
395 2566226 : if (start_pos == len) {
396 0 : return isolate->factory()->nan_value();
397 2566226 : } else if (data[start_pos] > '9') {
398 : // Fast check for a junk value. A valid string may start from a
399 : // whitespace, a sign ('+' or '-'), the decimal point, a decimal digit
400 : // or the 'I' character ('Infinity'). All of that have codes not greater
401 : // than '9' except 'I' and .
402 2416286 : if (data[start_pos] != 'I' && data[start_pos] != 0xA0) {
403 2409364 : return isolate->factory()->nan_value();
404 : }
405 176938 : } else if (len - start_pos < 10 && AreDigits(data, start_pos, len)) {
406 : // The maximal/minimal smi has 10 digits. If the string has less digits
407 : // we know it will fit into the smi-data type.
408 : int d = ParseDecimalInteger(data, start_pos, len);
409 1539 : if (minus) {
410 1745 : if (d == 0) return isolate->factory()->minus_zero_value();
411 1225 : d = -d;
412 108 : } else if (!subject->HasHashCode() && len <= String::kMaxArrayIndexSize &&
413 0 : (len == 1 || data[0] != '0')) {
414 : // String hash is not calculated yet but all the data are present.
415 : // Update the hash field to speed up sequential convertions.
416 0 : uint32_t hash = StringHasher::MakeArrayIndexHash(d, len);
417 : #ifdef DEBUG
418 : subject->Hash(); // Force hash calculation.
419 : DCHECK_EQ(static_cast<int>(subject->hash_field()),
420 : static_cast<int>(hash));
421 : #endif
422 : subject->set_hash_field(hash);
423 : }
424 1279 : return handle(Smi::FromInt(d), isolate);
425 : }
426 : }
427 :
428 : // Slower case.
429 : int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY;
430 1337050 : return isolate->factory()->NewNumber(StringToDouble(isolate, subject, flags));
431 : }
432 :
433 153566962 : String::FlatContent String::GetFlatContent(
434 : const DisallowHeapAllocation& no_gc) {
435 : USE(no_gc);
436 : int length = this->length();
437 : StringShape shape(*this);
438 153566962 : String string = *this;
439 : int offset = 0;
440 153566962 : if (shape.representation_tag() == kConsStringTag) {
441 0 : ConsString cons = ConsString::cast(string);
442 0 : if (cons->second()->length() != 0) {
443 0 : return FlatContent();
444 : }
445 0 : string = cons->first();
446 : shape = StringShape(string);
447 153566962 : } else if (shape.representation_tag() == kSlicedStringTag) {
448 1104384 : SlicedString slice = SlicedString::cast(string);
449 : offset = slice->offset();
450 1104384 : string = slice->parent();
451 : shape = StringShape(string);
452 : DCHECK(shape.representation_tag() != kConsStringTag &&
453 : shape.representation_tag() != kSlicedStringTag);
454 : }
455 153566962 : if (shape.representation_tag() == kThinStringTag) {
456 6 : ThinString thin = ThinString::cast(string);
457 6 : string = thin->actual();
458 : shape = StringShape(string);
459 : DCHECK(!shape.IsCons());
460 : DCHECK(!shape.IsSliced());
461 : }
462 153566962 : if (shape.encoding_tag() == kOneByteStringTag) {
463 : const uint8_t* start;
464 145928590 : if (shape.representation_tag() == kSeqStringTag) {
465 : start = SeqOneByteString::cast(string)->GetChars(no_gc);
466 : } else {
467 : start = ExternalOneByteString::cast(string)->GetChars();
468 : }
469 145928581 : return FlatContent(start + offset, length);
470 : } else {
471 : DCHECK_EQ(shape.encoding_tag(), kTwoByteStringTag);
472 : const uc16* start;
473 7638372 : if (shape.representation_tag() == kSeqStringTag) {
474 : start = SeqTwoByteString::cast(string)->GetChars(no_gc);
475 : } else {
476 : start = ExternalTwoByteString::cast(string)->GetChars();
477 : }
478 7638372 : return FlatContent(start + offset, length);
479 : }
480 : }
481 :
482 5162783 : std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
483 : RobustnessFlag robust_flag,
484 : int offset, int length,
485 : int* length_return) {
486 5162783 : if (robust_flag == ROBUST_STRING_TRAVERSAL && !LooksValid()) {
487 : return std::unique_ptr<char[]>();
488 : }
489 : // Negative length means the to the end of the string.
490 5162783 : if (length < 0) length = kMaxInt - offset;
491 :
492 : // Compute the size of the UTF-8 string. Start at the specified offset.
493 5162783 : StringCharacterStream stream(*this, offset);
494 : int character_position = offset;
495 : int utf8_bytes = 0;
496 : int last = unibrow::Utf16::kNoPreviousCharacter;
497 38833348 : while (stream.HasMore() && character_position++ < offset + length) {
498 28507782 : uint16_t character = stream.GetNext();
499 57015564 : utf8_bytes += unibrow::Utf8::Length(character, last);
500 28507782 : last = character;
501 : }
502 :
503 5162783 : if (length_return) {
504 3584725 : *length_return = utf8_bytes;
505 : }
506 :
507 5162783 : char* result = NewArray<char>(utf8_bytes + 1);
508 :
509 : // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset.
510 5162783 : stream.Reset(*this, offset);
511 : character_position = offset;
512 : int utf8_byte_position = 0;
513 : last = unibrow::Utf16::kNoPreviousCharacter;
514 38833347 : while (stream.HasMore() && character_position++ < offset + length) {
515 28507781 : uint16_t character = stream.GetNext();
516 28507782 : if (allow_nulls == DISALLOW_NULLS && character == 0) {
517 : character = ' ';
518 : }
519 : utf8_byte_position +=
520 28507782 : unibrow::Utf8::Encode(result + utf8_byte_position, character, last);
521 28507781 : last = character;
522 : }
523 5162783 : result[utf8_byte_position] = 0;
524 : return std::unique_ptr<char[]>(result);
525 : }
526 :
527 1578662 : std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
528 : RobustnessFlag robust_flag,
529 : int* length_return) {
530 1578662 : return ToCString(allow_nulls, robust_flag, 0, -1, length_return);
531 : }
532 :
533 : template <typename sinkchar>
534 130268910 : void String::WriteToFlat(String src, sinkchar* sink, int f, int t) {
535 : DisallowHeapAllocation no_gc;
536 130268910 : String source = src;
537 : int from = f;
538 : int to = t;
539 : while (true) {
540 : DCHECK(0 <= from && from <= to && to <= source->length());
541 200755172 : switch (StringShape(source).full_representation_tag()) {
542 : case kOneByteStringTag | kExternalStringTag: {
543 : CopyChars(sink, ExternalOneByteString::cast(source)->GetChars() + from,
544 852092 : to - from);
545 : return;
546 : }
547 : case kTwoByteStringTag | kExternalStringTag: {
548 : const uc16* data = ExternalTwoByteString::cast(source)->GetChars();
549 280345 : CopyChars(sink, data + from, to - from);
550 : return;
551 : }
552 : case kOneByteStringTag | kSeqStringTag: {
553 : CopyChars(sink, SeqOneByteString::cast(source)->GetChars(no_gc) + from,
554 168839700 : to - from);
555 : return;
556 : }
557 : case kTwoByteStringTag | kSeqStringTag: {
558 : CopyChars(sink, SeqTwoByteString::cast(source)->GetChars(no_gc) + from,
559 12942614 : to - from);
560 : return;
561 : }
562 : case kOneByteStringTag | kConsStringTag:
563 : case kTwoByteStringTag | kConsStringTag: {
564 107508690 : ConsString cons_string = ConsString::cast(source);
565 107508690 : String first = cons_string->first();
566 : int boundary = first->length();
567 107508690 : if (to - boundary >= boundary - from) {
568 : // Right hand side is longer. Recurse over left.
569 45100078 : if (from < boundary) {
570 45100078 : WriteToFlat(first, sink, from, boundary);
571 90200156 : if (from == 0 && cons_string->second() == first) {
572 37036881 : CopyChars(sink + boundary, sink, boundary);
573 37036881 : return;
574 : }
575 8063197 : sink += boundary - from;
576 : from = 0;
577 : } else {
578 0 : from -= boundary;
579 : }
580 : to -= boundary;
581 8063197 : source = cons_string->second();
582 : } else {
583 : // Left hand side is longer. Recurse over right.
584 62408612 : if (to > boundary) {
585 62033787 : String second = cons_string->second();
586 : // When repeatedly appending to a string, we get a cons string that
587 : // is unbalanced to the left, a list, essentially. We inline the
588 : // common case of sequential one-byte right child.
589 62033787 : if (to - boundary == 1) {
590 53000806 : sink[boundary - from] = static_cast<sinkchar>(second->Get(0));
591 35533384 : } else if (second->IsSeqOneByteString()) {
592 24001002 : CopyChars(sink + boundary - from,
593 : SeqOneByteString::cast(second)->GetChars(no_gc),
594 48002004 : to - boundary);
595 : } else {
596 11532382 : WriteToFlat(second, sink + boundary - from, 0, to - boundary);
597 : }
598 : to = boundary;
599 : }
600 62408612 : source = first;
601 : }
602 70471809 : break;
603 : }
604 : case kOneByteStringTag | kSlicedStringTag:
605 : case kTwoByteStringTag | kSlicedStringTag: {
606 1634481 : SlicedString slice = SlicedString::cast(source);
607 1634481 : unsigned offset = slice->offset();
608 1634481 : WriteToFlat(slice->parent(), sink, from + offset, to + offset);
609 : return;
610 : }
611 : case kOneByteStringTag | kThinStringTag:
612 : case kTwoByteStringTag | kThinStringTag:
613 14453 : source = ThinString::cast(source)->actual();
614 14453 : break;
615 : }
616 : }
617 : }
618 :
619 : template <typename SourceChar>
620 53431 : static void CalculateLineEndsImpl(Isolate* isolate, std::vector<int>* line_ends,
621 : Vector<const SourceChar> src,
622 : bool include_ending_line) {
623 106862 : const int src_len = src.length();
624 161402575 : for (int i = 0; i < src_len - 1; i++) {
625 322698288 : SourceChar current = src[i];
626 322698288 : SourceChar next = src[i + 1];
627 161349144 : if (IsLineTerminatorSequence(current, next)) line_ends->push_back(i);
628 : }
629 :
630 159725 : if (src_len > 0 && IsLineTerminatorSequence(src[src_len - 1], 0)) {
631 35476 : line_ends->push_back(src_len - 1);
632 : }
633 53431 : if (include_ending_line) {
634 : // Include one character beyond the end of script. The rewriter uses that
635 : // position for the implicit return statement.
636 50999 : line_ends->push_back(src_len);
637 : }
638 53431 : }
639 :
640 53431 : Handle<FixedArray> String::CalculateLineEnds(Isolate* isolate,
641 : Handle<String> src,
642 : bool include_ending_line) {
643 53431 : src = Flatten(isolate, src);
644 : // Rough estimate of line count based on a roughly estimated average
645 : // length of (unpacked) code.
646 53431 : int line_count_estimate = src->length() >> 4;
647 : std::vector<int> line_ends;
648 53431 : line_ends.reserve(line_count_estimate);
649 : {
650 : DisallowHeapAllocation no_allocation; // ensure vectors stay valid.
651 : // Dispatch on type of strings.
652 53431 : String::FlatContent content = src->GetFlatContent(no_allocation);
653 : DCHECK(content.IsFlat());
654 53431 : if (content.IsOneByte()) {
655 : CalculateLineEndsImpl(isolate, &line_ends, content.ToOneByteVector(),
656 106830 : include_ending_line);
657 : } else {
658 : CalculateLineEndsImpl(isolate, &line_ends, content.ToUC16Vector(),
659 32 : include_ending_line);
660 : }
661 : }
662 106862 : int line_count = static_cast<int>(line_ends.size());
663 53431 : Handle<FixedArray> array = isolate->factory()->NewFixedArray(line_count);
664 9847102 : for (int i = 0; i < line_count; i++) {
665 19587342 : array->set(i, Smi::FromInt(line_ends[i]));
666 : }
667 106862 : return array;
668 : }
669 :
670 16846476 : bool String::SlowEquals(String other) {
671 : DisallowHeapAllocation no_gc;
672 : // Fast check: negative check with lengths.
673 : int len = length();
674 16846476 : if (len != other->length()) return false;
675 9435772 : if (len == 0) return true;
676 :
677 : // Fast check: if at least one ThinString is involved, dereference it/them
678 : // and restart.
679 28297075 : if (this->IsThinString() || other->IsThinString()) {
680 10244 : if (other->IsThinString()) other = ThinString::cast(other)->actual();
681 10242 : if (this->IsThinString()) {
682 10240 : return ThinString::cast(*this)->actual()->Equals(other);
683 : } else {
684 2 : return this->Equals(other);
685 : }
686 : }
687 :
688 : // Fast check: if hash code is computed for both strings
689 : // a fast negative check can be performed.
690 18850622 : if (HasHashCode() && other->HasHashCode()) {
691 : #ifdef ENABLE_SLOW_DCHECKS
692 : if (FLAG_enable_slow_asserts) {
693 : if (Hash() != other->Hash()) {
694 : bool found_difference = false;
695 : for (int i = 0; i < len; i++) {
696 : if (Get(i) != other->Get(i)) {
697 : found_difference = true;
698 : break;
699 : }
700 : }
701 : DCHECK(found_difference);
702 : }
703 : }
704 : #endif
705 9407732 : if (Hash() != other->Hash()) return false;
706 : }
707 :
708 : // We know the strings are both non-empty. Compare the first chars
709 : // before we try to flatten the strings.
710 7211699 : if (this->Get(0) != other->Get(0)) return false;
711 :
712 14348706 : if (IsSeqOneByteString() && other->IsSeqOneByteString()) {
713 : const uint8_t* str1 = SeqOneByteString::cast(*this)->GetChars(no_gc);
714 : const uint8_t* str2 = SeqOneByteString::cast(other)->GetChars(no_gc);
715 7143388 : return CompareRawStringContents(str1, str2, len);
716 : }
717 :
718 61878 : StringComparator comparator;
719 61878 : return comparator.Equals(*this, other);
720 : }
721 :
722 733907 : bool String::SlowEquals(Isolate* isolate, Handle<String> one,
723 : Handle<String> two) {
724 : // Fast check: negative check with lengths.
725 : int one_length = one->length();
726 733907 : if (one_length != two->length()) return false;
727 725417 : if (one_length == 0) return true;
728 :
729 : // Fast check: if at least one ThinString is involved, dereference it/them
730 : // and restart.
731 2901668 : if (one->IsThinString() || two->IsThinString()) {
732 46 : if (one->IsThinString())
733 0 : one = handle(ThinString::cast(*one)->actual(), isolate);
734 46 : if (two->IsThinString())
735 46 : two = handle(ThinString::cast(*two)->actual(), isolate);
736 23 : return String::Equals(isolate, one, two);
737 : }
738 :
739 : // Fast check: if hash code is computed for both strings
740 : // a fast negative check can be performed.
741 736538 : if (one->HasHashCode() && two->HasHashCode()) {
742 : #ifdef ENABLE_SLOW_DCHECKS
743 : if (FLAG_enable_slow_asserts) {
744 : if (one->Hash() != two->Hash()) {
745 : bool found_difference = false;
746 : for (int i = 0; i < one_length; i++) {
747 : if (one->Get(i) != two->Get(i)) {
748 : found_difference = true;
749 : break;
750 : }
751 : }
752 : DCHECK(found_difference);
753 : }
754 : }
755 : #endif
756 12864 : if (one->Hash() != two->Hash()) return false;
757 : }
758 :
759 : // We know the strings are both non-empty. Compare the first chars
760 : // before we try to flatten the strings.
761 2176104 : if (one->Get(0) != two->Get(0)) return false;
762 :
763 725226 : one = String::Flatten(isolate, one);
764 725226 : two = String::Flatten(isolate, two);
765 :
766 : DisallowHeapAllocation no_gc;
767 725226 : String::FlatContent flat1 = one->GetFlatContent(no_gc);
768 725226 : String::FlatContent flat2 = two->GetFlatContent(no_gc);
769 :
770 725226 : if (flat1.IsOneByte() && flat2.IsOneByte()) {
771 : return CompareRawStringContents(flat1.ToOneByteVector().start(),
772 : flat2.ToOneByteVector().start(),
773 : one_length);
774 : } else {
775 7643076 : for (int i = 0; i < one_length; i++) {
776 7643076 : if (flat1.Get(i) != flat2.Get(i)) return false;
777 : }
778 : return true;
779 : }
780 : }
781 :
782 : // static
783 4802341 : ComparisonResult String::Compare(Isolate* isolate, Handle<String> x,
784 : Handle<String> y) {
785 : // A few fast case tests before we flatten.
786 4802341 : if (x.is_identical_to(y)) {
787 : return ComparisonResult::kEqual;
788 4802341 : } else if (y->length() == 0) {
789 : return x->length() == 0 ? ComparisonResult::kEqual
790 0 : : ComparisonResult::kGreaterThan;
791 4802341 : } else if (x->length() == 0) {
792 : return ComparisonResult::kLessThan;
793 : }
794 :
795 19209364 : int const d = x->Get(0) - y->Get(0);
796 4802341 : if (d < 0) {
797 : return ComparisonResult::kLessThan;
798 3876417 : } else if (d > 0) {
799 : return ComparisonResult::kGreaterThan;
800 : }
801 :
802 : // Slow case.
803 383280 : x = String::Flatten(isolate, x);
804 383280 : y = String::Flatten(isolate, y);
805 :
806 : DisallowHeapAllocation no_gc;
807 : ComparisonResult result = ComparisonResult::kEqual;
808 : int prefix_length = x->length();
809 383280 : if (y->length() < prefix_length) {
810 : prefix_length = y->length();
811 : result = ComparisonResult::kGreaterThan;
812 338675 : } else if (y->length() > prefix_length) {
813 : result = ComparisonResult::kLessThan;
814 : }
815 : int r;
816 383280 : String::FlatContent x_content = x->GetFlatContent(no_gc);
817 383280 : String::FlatContent y_content = y->GetFlatContent(no_gc);
818 383280 : if (x_content.IsOneByte()) {
819 : Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
820 378681 : if (y_content.IsOneByte()) {
821 : Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
822 378663 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
823 : } else {
824 : Vector<const uc16> y_chars = y_content.ToUC16Vector();
825 18 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
826 : }
827 : } else {
828 : Vector<const uc16> x_chars = x_content.ToUC16Vector();
829 4599 : if (y_content.IsOneByte()) {
830 : Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
831 18 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
832 : } else {
833 : Vector<const uc16> y_chars = y_content.ToUC16Vector();
834 4581 : r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
835 : }
836 : }
837 383280 : if (r < 0) {
838 : result = ComparisonResult::kLessThan;
839 155121 : } else if (r > 0) {
840 : result = ComparisonResult::kGreaterThan;
841 : }
842 383280 : return result;
843 : }
844 :
845 789 : Object String::IndexOf(Isolate* isolate, Handle<Object> receiver,
846 : Handle<Object> search, Handle<Object> position) {
847 1578 : if (receiver->IsNullOrUndefined(isolate)) {
848 540 : THROW_NEW_ERROR_RETURN_FAILURE(
849 : isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
850 : isolate->factory()->NewStringFromAsciiChecked(
851 : "String.prototype.indexOf")));
852 : }
853 : Handle<String> receiver_string;
854 1218 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
855 : Object::ToString(isolate, receiver));
856 :
857 : Handle<String> search_string;
858 1218 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
859 : Object::ToString(isolate, search));
860 :
861 1227 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
862 : Object::ToInteger(isolate, position));
863 :
864 : uint32_t index = receiver_string->ToValidIndex(*position);
865 : return Smi::FromInt(
866 1200 : String::IndexOf(isolate, receiver_string, search_string, index));
867 : }
868 :
869 : namespace {
870 :
871 : template <typename T>
872 1050980 : int SearchString(Isolate* isolate, String::FlatContent receiver_content,
873 : Vector<T> pat_vector, int start_index) {
874 1050980 : if (receiver_content.IsOneByte()) {
875 : return SearchString(isolate, receiver_content.ToOneByteVector(), pat_vector,
876 1047110 : start_index);
877 : }
878 : return SearchString(isolate, receiver_content.ToUC16Vector(), pat_vector,
879 3870 : start_index);
880 : }
881 :
882 : } // namespace
883 :
884 1054074 : int String::IndexOf(Isolate* isolate, Handle<String> receiver,
885 : Handle<String> search, int start_index) {
886 : DCHECK_LE(0, start_index);
887 : DCHECK(start_index <= receiver->length());
888 :
889 1054074 : uint32_t search_length = search->length();
890 1054074 : if (search_length == 0) return start_index;
891 :
892 1054029 : uint32_t receiver_length = receiver->length();
893 1054029 : if (start_index + search_length > receiver_length) return -1;
894 :
895 1050980 : receiver = String::Flatten(isolate, receiver);
896 1050980 : search = String::Flatten(isolate, search);
897 :
898 : DisallowHeapAllocation no_gc; // ensure vectors stay valid
899 : // Extract flattened substrings of cons strings before getting encoding.
900 1050980 : String::FlatContent receiver_content = receiver->GetFlatContent(no_gc);
901 1050980 : String::FlatContent search_content = search->GetFlatContent(no_gc);
902 :
903 : // dispatch on type of strings
904 1050980 : if (search_content.IsOneByte()) {
905 1049225 : Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
906 : return SearchString<const uint8_t>(isolate, receiver_content, pat_vector,
907 1049225 : start_index);
908 : }
909 1755 : Vector<const uc16> pat_vector = search_content.ToUC16Vector();
910 : return SearchString<const uc16>(isolate, receiver_content, pat_vector,
911 1755 : start_index);
912 : }
913 :
914 3894 : MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
915 : Handle<String> replacement,
916 : int start_index) {
917 : DCHECK_GE(start_index, 0);
918 :
919 : Factory* factory = isolate->factory();
920 :
921 : const int replacement_length = replacement->length();
922 3894 : const int captures_length = match->CaptureCount();
923 :
924 3894 : replacement = String::Flatten(isolate, replacement);
925 :
926 : Handle<String> dollar_string =
927 3894 : factory->LookupSingleCharacterStringFromCode('$');
928 : int next_dollar_ix =
929 3894 : String::IndexOf(isolate, replacement, dollar_string, start_index);
930 3894 : if (next_dollar_ix < 0) {
931 153 : return replacement;
932 : }
933 :
934 3741 : IncrementalStringBuilder builder(isolate);
935 :
936 3741 : if (next_dollar_ix > 0) {
937 279 : builder.AppendString(factory->NewSubString(replacement, 0, next_dollar_ix));
938 : }
939 :
940 : while (true) {
941 7962 : const int peek_ix = next_dollar_ix + 1;
942 7962 : if (peek_ix >= replacement_length) {
943 : builder.AppendCharacter('$');
944 18 : return builder.Finish();
945 : }
946 :
947 : int continue_from_ix = -1;
948 15888 : const uint16_t peek = replacement->Get(peek_ix);
949 7944 : switch (peek) {
950 : case '$': // $$
951 : builder.AppendCharacter('$');
952 54 : continue_from_ix = peek_ix + 1;
953 54 : break;
954 : case '&': // $& - match
955 54 : builder.AppendString(match->GetMatch());
956 54 : continue_from_ix = peek_ix + 1;
957 54 : break;
958 : case '`': // $` - prefix
959 36 : builder.AppendString(match->GetPrefix());
960 36 : continue_from_ix = peek_ix + 1;
961 36 : break;
962 : case '\'': // $' - suffix
963 36 : builder.AppendString(match->GetSuffix());
964 36 : continue_from_ix = peek_ix + 1;
965 36 : break;
966 : case '0':
967 : case '1':
968 : case '2':
969 : case '3':
970 : case '4':
971 : case '5':
972 : case '6':
973 : case '7':
974 : case '8':
975 : case '9': {
976 : // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
977 7278 : int scaled_index = (peek - '0');
978 : int advance = 1;
979 :
980 7278 : if (peek_ix + 1 < replacement_length) {
981 12108 : const uint16_t next_peek = replacement->Get(peek_ix + 1);
982 6054 : if (next_peek >= '0' && next_peek <= '9') {
983 1341 : const int new_scaled_index = scaled_index * 10 + (next_peek - '0');
984 1341 : if (new_scaled_index < captures_length) {
985 : scaled_index = new_scaled_index;
986 : advance = 2;
987 : }
988 : }
989 : }
990 :
991 7278 : if (scaled_index == 0 || scaled_index >= captures_length) {
992 : builder.AppendCharacter('$');
993 : continue_from_ix = peek_ix;
994 7368 : break;
995 : }
996 :
997 : bool capture_exists;
998 : Handle<String> capture;
999 14376 : ASSIGN_RETURN_ON_EXCEPTION(
1000 : isolate, capture, match->GetCapture(scaled_index, &capture_exists),
1001 : String);
1002 7188 : if (capture_exists) builder.AppendString(capture);
1003 7188 : continue_from_ix = peek_ix + advance;
1004 7188 : break;
1005 : }
1006 : case '<': { // $<name> - named capture
1007 : typedef String::Match::CaptureState CaptureState;
1008 :
1009 459 : if (!match->HasNamedCaptures()) {
1010 : builder.AppendCharacter('$');
1011 : continue_from_ix = peek_ix;
1012 540 : break;
1013 : }
1014 :
1015 : Handle<String> bracket_string =
1016 378 : factory->LookupSingleCharacterStringFromCode('>');
1017 : const int closing_bracket_ix =
1018 378 : String::IndexOf(isolate, replacement, bracket_string, peek_ix + 1);
1019 :
1020 378 : if (closing_bracket_ix == -1) {
1021 : // No closing bracket was found, treat '$<' as a string literal.
1022 : builder.AppendCharacter('$');
1023 : continue_from_ix = peek_ix;
1024 72 : break;
1025 : }
1026 :
1027 : Handle<String> capture_name =
1028 306 : factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
1029 : Handle<String> capture;
1030 : CaptureState capture_state;
1031 612 : ASSIGN_RETURN_ON_EXCEPTION(
1032 : isolate, capture,
1033 : match->GetNamedCapture(capture_name, &capture_state), String);
1034 :
1035 306 : switch (capture_state) {
1036 : case CaptureState::INVALID:
1037 : case CaptureState::UNMATCHED:
1038 : break;
1039 : case CaptureState::MATCHED:
1040 126 : builder.AppendString(capture);
1041 126 : break;
1042 : }
1043 :
1044 306 : continue_from_ix = closing_bracket_ix + 1;
1045 306 : break;
1046 : }
1047 : default:
1048 : builder.AppendCharacter('$');
1049 : continue_from_ix = peek_ix;
1050 27 : break;
1051 : }
1052 :
1053 : // Go the the next $ in the replacement.
1054 : // TODO(jgruber): Single-char lookups could be much more efficient.
1055 : DCHECK_NE(continue_from_ix, -1);
1056 : next_dollar_ix =
1057 7944 : String::IndexOf(isolate, replacement, dollar_string, continue_from_ix);
1058 :
1059 : // Return if there are no more $ characters in the replacement. If we
1060 : // haven't reached the end, we need to append the suffix.
1061 7944 : if (next_dollar_ix < 0) {
1062 3723 : if (continue_from_ix < replacement_length) {
1063 : builder.AppendString(factory->NewSubString(
1064 1311 : replacement, continue_from_ix, replacement_length));
1065 : }
1066 3723 : return builder.Finish();
1067 : }
1068 :
1069 : // Append substring between the previous and the next $ character.
1070 4221 : if (next_dollar_ix > continue_from_ix) {
1071 : builder.AppendString(
1072 72 : factory->NewSubString(replacement, continue_from_ix, next_dollar_ix));
1073 : }
1074 : }
1075 :
1076 : UNREACHABLE();
1077 : }
1078 :
1079 : namespace { // for String.Prototype.lastIndexOf
1080 :
1081 : template <typename schar, typename pchar>
1082 1222 : int StringMatchBackwards(Vector<const schar> subject,
1083 : Vector<const pchar> pattern, int idx) {
1084 1222 : int pattern_length = pattern.length();
1085 : DCHECK_GE(pattern_length, 1);
1086 : DCHECK(idx + pattern_length <= subject.length());
1087 :
1088 : if (sizeof(schar) == 1 && sizeof(pchar) > 1) {
1089 0 : for (int i = 0; i < pattern_length; i++) {
1090 0 : uc16 c = pattern[i];
1091 0 : if (c > String::kMaxOneByteCharCode) {
1092 : return -1;
1093 : }
1094 : }
1095 : }
1096 :
1097 1222 : pchar pattern_first_char = pattern[0];
1098 4624 : for (int i = idx; i >= 0; i--) {
1099 11422 : if (subject[i] != pattern_first_char) continue;
1100 : int j = 1;
1101 1778 : while (j < pattern_length) {
1102 2073 : if (pattern[j] != subject[i + j]) {
1103 : break;
1104 : }
1105 673 : j++;
1106 : }
1107 1105 : if (j == pattern_length) {
1108 : return i;
1109 : }
1110 : }
1111 : return -1;
1112 : }
1113 :
1114 : } // namespace
1115 :
1116 1636 : Object String::LastIndexOf(Isolate* isolate, Handle<Object> receiver,
1117 : Handle<Object> search, Handle<Object> position) {
1118 3272 : if (receiver->IsNullOrUndefined(isolate)) {
1119 486 : THROW_NEW_ERROR_RETURN_FAILURE(
1120 : isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
1121 : isolate->factory()->NewStringFromAsciiChecked(
1122 : "String.prototype.lastIndexOf")));
1123 : }
1124 : Handle<String> receiver_string;
1125 2948 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
1126 : Object::ToString(isolate, receiver));
1127 :
1128 : Handle<String> search_string;
1129 2948 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1130 : Object::ToString(isolate, search));
1131 :
1132 2948 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1133 : Object::ToNumber(isolate, position));
1134 :
1135 : uint32_t start_index;
1136 :
1137 2948 : if (position->IsNaN()) {
1138 997 : start_index = receiver_string->length();
1139 : } else {
1140 954 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1141 : Object::ToInteger(isolate, position));
1142 : start_index = receiver_string->ToValidIndex(*position);
1143 : }
1144 :
1145 1474 : uint32_t pattern_length = search_string->length();
1146 1474 : uint32_t receiver_length = receiver_string->length();
1147 :
1148 1474 : if (start_index + pattern_length > receiver_length) {
1149 1069 : start_index = receiver_length - pattern_length;
1150 : }
1151 :
1152 1474 : if (pattern_length == 0) {
1153 504 : return Smi::FromInt(start_index);
1154 : }
1155 :
1156 1222 : receiver_string = String::Flatten(isolate, receiver_string);
1157 1222 : search_string = String::Flatten(isolate, search_string);
1158 :
1159 : int last_index = -1;
1160 : DisallowHeapAllocation no_gc; // ensure vectors stay valid
1161 :
1162 1222 : String::FlatContent receiver_content = receiver_string->GetFlatContent(no_gc);
1163 1222 : String::FlatContent search_content = search_string->GetFlatContent(no_gc);
1164 :
1165 1222 : if (search_content.IsOneByte()) {
1166 1222 : Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1167 1222 : if (receiver_content.IsOneByte()) {
1168 : last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1169 1216 : pat_vector, start_index);
1170 : } else {
1171 : last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1172 6 : pat_vector, start_index);
1173 : }
1174 : } else {
1175 0 : Vector<const uc16> pat_vector = search_content.ToUC16Vector();
1176 0 : if (receiver_content.IsOneByte()) {
1177 : last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1178 0 : pat_vector, start_index);
1179 : } else {
1180 : last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1181 0 : pat_vector, start_index);
1182 : }
1183 : }
1184 1222 : return Smi::FromInt(last_index);
1185 : }
1186 :
1187 18039056 : bool String::IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match) {
1188 : int slen = length();
1189 : // Can't check exact length equality, but we can check bounds.
1190 18039056 : int str_len = str.length();
1191 18039056 : if (!allow_prefix_match &&
1192 16031518 : (str_len < slen ||
1193 16031518 : str_len > slen * static_cast<int>(unibrow::Utf8::kMaxEncodedSize))) {
1194 : return false;
1195 : }
1196 :
1197 : int i = 0;
1198 : unibrow::Utf8Iterator it = unibrow::Utf8Iterator(str);
1199 99892220 : while (i < slen && !it.Done()) {
1200 174839797 : if (Get(i++) != *it) return false;
1201 83924528 : ++it;
1202 : }
1203 :
1204 12472327 : return (allow_prefix_match || i == slen) && it.Done();
1205 : }
1206 :
1207 : template <>
1208 135 : bool String::IsEqualTo(Vector<const uint8_t> str) {
1209 135 : return IsOneByteEqualTo(str);
1210 : }
1211 :
1212 : template <>
1213 0 : bool String::IsEqualTo(Vector<const uc16> str) {
1214 0 : return IsTwoByteEqualTo(str);
1215 : }
1216 :
1217 112845320 : bool String::IsOneByteEqualTo(Vector<const uint8_t> str) {
1218 : int slen = length();
1219 225690640 : if (str.length() != slen) return false;
1220 : DisallowHeapAllocation no_gc;
1221 86083028 : FlatContent content = GetFlatContent(no_gc);
1222 86083023 : if (content.IsOneByte()) {
1223 86082840 : return CompareChars(content.ToOneByteVector().start(), str.start(), slen) ==
1224 86082840 : 0;
1225 : }
1226 366 : return CompareChars(content.ToUC16Vector().start(), str.start(), slen) == 0;
1227 : }
1228 :
1229 78371 : bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
1230 : int slen = length();
1231 156742 : if (str.length() != slen) return false;
1232 : DisallowHeapAllocation no_gc;
1233 20237 : FlatContent content = GetFlatContent(no_gc);
1234 20237 : if (content.IsOneByte()) {
1235 4722 : return CompareChars(content.ToOneByteVector().start(), str.start(), slen) ==
1236 4722 : 0;
1237 : }
1238 31030 : return CompareChars(content.ToUC16Vector().start(), str.start(), slen) == 0;
1239 : }
1240 :
1241 69784733 : uint32_t String::ComputeAndSetHash(Isolate* isolate) {
1242 : DisallowHeapAllocation no_gc;
1243 : // Should only be called if hash code has not yet been computed.
1244 : DCHECK(!HasHashCode());
1245 :
1246 : // Store the hash code in the object.
1247 69784733 : uint32_t field = IteratingStringHasher::Hash(*this, HashSeed(isolate));
1248 : set_hash_field(field);
1249 :
1250 : // Check the hash code is there.
1251 : DCHECK(HasHashCode());
1252 69784738 : uint32_t result = field >> kHashShift;
1253 : DCHECK_NE(result, 0); // Ensure that the hash value of 0 is never computed.
1254 69784738 : return result;
1255 : }
1256 :
1257 1465969 : bool String::ComputeArrayIndex(uint32_t* index) {
1258 : int length = this->length();
1259 1465969 : if (length == 0 || length > kMaxArrayIndexSize) return false;
1260 943994 : StringCharacterStream stream(*this);
1261 943994 : return StringToArrayIndex(&stream, index);
1262 : }
1263 :
1264 8277487 : bool String::SlowAsArrayIndex(uint32_t* index) {
1265 : DisallowHeapAllocation no_gc;
1266 8277487 : if (length() <= kMaxCachedArrayIndexLength) {
1267 6811518 : Hash(); // force computation of hash code
1268 : uint32_t field = hash_field();
1269 6811518 : if ((field & kIsNotArrayIndexMask) != 0) return false;
1270 : // Isolate the array index form the full hash field.
1271 2218937 : *index = ArrayIndexValueBits::decode(field);
1272 2218937 : return true;
1273 : } else {
1274 1465969 : return ComputeArrayIndex(index);
1275 : }
1276 : }
1277 :
1278 41680 : void String::PrintOn(FILE* file) {
1279 : int length = this->length();
1280 3513550 : for (int i = 0; i < length; i++) {
1281 3471870 : PrintF(file, "%c", Get(i));
1282 : }
1283 41680 : }
1284 :
1285 15507984 : Handle<String> SeqString::Truncate(Handle<SeqString> string, int new_length) {
1286 20793466 : if (new_length == 0) return string->GetReadOnlyRoots().empty_string_handle();
1287 :
1288 : int new_size, old_size;
1289 : int old_length = string->length();
1290 12865243 : if (old_length <= new_length) return string;
1291 :
1292 23599174 : if (string->IsSeqOneByteString()) {
1293 : old_size = SeqOneByteString::SizeFor(old_length);
1294 : new_size = SeqOneByteString::SizeFor(new_length);
1295 : } else {
1296 : DCHECK(string->IsSeqTwoByteString());
1297 : old_size = SeqTwoByteString::SizeFor(old_length);
1298 : new_size = SeqTwoByteString::SizeFor(new_length);
1299 : }
1300 :
1301 11799587 : int delta = old_size - new_size;
1302 :
1303 : Address start_of_string = string->address();
1304 : DCHECK(IsAligned(start_of_string, kObjectAlignment));
1305 : DCHECK(IsAligned(start_of_string + new_size, kObjectAlignment));
1306 :
1307 : Heap* heap = Heap::FromWritableHeapObject(*string);
1308 : // Sizes are pointer size aligned, so that we can use filler objects
1309 : // that are a multiple of pointer size.
1310 : heap->CreateFillerObjectAt(start_of_string + new_size, delta,
1311 11799587 : ClearRecordedSlots::kNo);
1312 : // We are storing the new length using release store after creating a filler
1313 : // for the left-over space to avoid races with the sweeper thread.
1314 : string->synchronized_set_length(new_length);
1315 :
1316 11799587 : return string;
1317 : }
1318 :
1319 328623 : void SeqOneByteString::clear_padding() {
1320 328623 : int data_size = SeqString::kHeaderSize + length() * kOneByteSize;
1321 328623 : memset(reinterpret_cast<void*>(address() + data_size), 0,
1322 657246 : SizeFor(length()) - data_size);
1323 328623 : }
1324 :
1325 0 : void SeqTwoByteString::clear_padding() {
1326 0 : int data_size = SeqString::kHeaderSize + length() * kUC16Size;
1327 0 : memset(reinterpret_cast<void*>(address() + data_size), 0,
1328 0 : SizeFor(length()) - data_size);
1329 0 : }
1330 :
1331 273197 : uint16_t ConsString::ConsStringGet(int index) {
1332 : DCHECK(index >= 0 && index < this->length());
1333 :
1334 : // Check for a flattened cons string
1335 546394 : if (second()->length() == 0) {
1336 77643 : String left = first();
1337 : return left->Get(index);
1338 : }
1339 :
1340 195554 : String string = String::cast(*this);
1341 :
1342 : while (true) {
1343 1120628 : if (StringShape(string).IsCons()) {
1344 925074 : ConsString cons_string = ConsString::cast(string);
1345 925074 : String left = cons_string->first();
1346 925074 : if (left->length() > index) {
1347 517624 : string = left;
1348 : } else {
1349 407450 : index -= left->length();
1350 407450 : string = cons_string->second();
1351 : }
1352 : } else {
1353 195554 : return string->Get(index);
1354 : }
1355 : }
1356 :
1357 925074 : UNREACHABLE();
1358 : }
1359 :
1360 4236 : uint16_t ThinString::ThinStringGet(int index) { return actual()->Get(index); }
1361 :
1362 1093567 : uint16_t SlicedString::SlicedStringGet(int index) {
1363 2187134 : return parent()->Get(offset() + index);
1364 : }
1365 :
1366 86945 : int ExternalString::ExternalPayloadSize() const {
1367 86945 : int length_multiplier = IsTwoByteRepresentation() ? i::kShortSize : kCharSize;
1368 86945 : return length() * length_multiplier;
1369 : }
1370 :
1371 1550817 : FlatStringReader::FlatStringReader(Isolate* isolate, Handle<String> str)
1372 4652451 : : Relocatable(isolate), str_(str.location()), length_(str->length()) {
1373 1550817 : PostGarbageCollection();
1374 1550817 : }
1375 :
1376 1345 : FlatStringReader::FlatStringReader(Isolate* isolate, Vector<const char> input)
1377 : : Relocatable(isolate),
1378 : str_(nullptr),
1379 : is_one_byte_(true),
1380 1345 : length_(input.length()),
1381 4035 : start_(input.start()) {}
1382 :
1383 1551197 : void FlatStringReader::PostGarbageCollection() {
1384 1551197 : if (str_ == nullptr) return;
1385 : Handle<String> str(str_);
1386 : DCHECK(str->IsFlat());
1387 : DisallowHeapAllocation no_gc;
1388 : // This does not actually prevent the vector from being relocated later.
1389 1551197 : String::FlatContent content = str->GetFlatContent(no_gc);
1390 : DCHECK(content.IsFlat());
1391 3102394 : is_one_byte_ = content.IsOneByte();
1392 1551197 : if (is_one_byte_) {
1393 1413669 : start_ = content.ToOneByteVector().start();
1394 : } else {
1395 137528 : start_ = content.ToUC16Vector().start();
1396 : }
1397 : }
1398 :
1399 121817 : void ConsStringIterator::Initialize(ConsString cons_string, int offset) {
1400 : DCHECK(!cons_string.is_null());
1401 137983 : root_ = cons_string;
1402 137983 : consumed_ = offset;
1403 : // Force stack blown condition to trigger restart.
1404 137983 : depth_ = 1;
1405 137983 : maximum_depth_ = kStackSize + depth_;
1406 : DCHECK(StackBlown());
1407 121817 : }
1408 :
1409 10635820 : String ConsStringIterator::Continue(int* offset_out) {
1410 : DCHECK_NE(depth_, 0);
1411 : DCHECK_EQ(0, *offset_out);
1412 10635820 : bool blew_stack = StackBlown();
1413 : String string;
1414 : // Get the next leaf if there is one.
1415 10635820 : if (!blew_stack) string = NextLeaf(&blew_stack);
1416 : // Restart search from root.
1417 10635820 : if (blew_stack) {
1418 : DCHECK(string.is_null());
1419 162358 : string = Search(offset_out);
1420 : }
1421 : // Ensure future calls return null immediately.
1422 10635820 : if (string.is_null()) Reset(ConsString());
1423 10635820 : return string;
1424 : }
1425 :
1426 324571 : String ConsStringIterator::Search(int* offset_out) {
1427 162358 : ConsString cons_string = root_;
1428 : // Reset the stack, pushing the root string.
1429 162358 : depth_ = 1;
1430 162358 : maximum_depth_ = 1;
1431 162358 : frames_[0] = cons_string;
1432 162358 : const int consumed = consumed_;
1433 : int offset = 0;
1434 : while (true) {
1435 : // Loop until the string is found which contains the target offset.
1436 34905794 : String string = cons_string->first();
1437 : int length = string->length();
1438 : int32_t type;
1439 34905794 : if (consumed < offset + length) {
1440 : // Target offset is in the left branch.
1441 : // Keep going if we're still in a ConString.
1442 : type = string->map()->instance_type();
1443 33584769 : if ((type & kStringRepresentationMask) == kConsStringTag) {
1444 33452491 : cons_string = ConsString::cast(string);
1445 : PushLeft(cons_string);
1446 : continue;
1447 : }
1448 : // Tell the stack we're done descending.
1449 : AdjustMaximumDepth();
1450 : } else {
1451 : // Descend right.
1452 : // Update progress through the string.
1453 : offset += length;
1454 : // Keep going if we're still in a ConString.
1455 1321025 : string = cons_string->second();
1456 : type = string->map()->instance_type();
1457 1321025 : if ((type & kStringRepresentationMask) == kConsStringTag) {
1458 1290945 : cons_string = ConsString::cast(string);
1459 : PushRight(cons_string);
1460 : continue;
1461 : }
1462 : // Need this to be updated for the current string.
1463 : length = string->length();
1464 : // Account for the possibility of an empty right leaf.
1465 : // This happens only if we have asked for an offset outside the string.
1466 30080 : if (length == 0) {
1467 : // Reset so future operations will return null immediately.
1468 : Reset(ConsString());
1469 145 : return String();
1470 : }
1471 : // Tell the stack we're done descending.
1472 : AdjustMaximumDepth();
1473 : // Pop stack so next iteration is in correct place.
1474 : Pop();
1475 : }
1476 : DCHECK_NE(length, 0);
1477 : // Adjust return values and exit.
1478 162213 : consumed_ = offset + length;
1479 162213 : *offset_out = consumed - offset;
1480 162213 : return string;
1481 : }
1482 : UNREACHABLE();
1483 : }
1484 :
1485 26412107 : String ConsStringIterator::NextLeaf(bool* blew_stack) {
1486 : while (true) {
1487 : // Tree traversal complete.
1488 10892303 : if (depth_ == 0) {
1489 35341 : *blew_stack = false;
1490 35341 : return String();
1491 : }
1492 : // We've lost track of higher nodes.
1493 10856962 : if (StackBlown()) {
1494 695 : *blew_stack = true;
1495 695 : return String();
1496 : }
1497 : // Go right.
1498 21712534 : ConsString cons_string = frames_[OffsetForDepth(depth_ - 1)];
1499 10856267 : String string = cons_string->second();
1500 : int32_t type = string->map()->instance_type();
1501 10856267 : if ((type & kStringRepresentationMask) != kConsStringTag) {
1502 : // Pop stack so next iteration is in correct place.
1503 : Pop();
1504 : int length = string->length();
1505 : // Could be a flattened ConsString.
1506 6193425 : if (length == 0) continue;
1507 5357133 : consumed_ += length;
1508 5357133 : return string;
1509 : }
1510 5080988 : cons_string = ConsString::cast(string);
1511 : PushRight(cons_string);
1512 : // Need to traverse all the way left.
1513 : while (true) {
1514 : // Continue left.
1515 10119823 : string = cons_string->first();
1516 : type = string->map()->instance_type();
1517 10119823 : if ((type & kStringRepresentationMask) != kConsStringTag) {
1518 : AdjustMaximumDepth();
1519 : int length = string->length();
1520 5080988 : if (length == 0) break; // Skip empty left-hand sides of ConsStrings.
1521 5080988 : consumed_ += length;
1522 5080988 : return string;
1523 : }
1524 5038835 : cons_string = ConsString::cast(string);
1525 : PushLeft(cons_string);
1526 : }
1527 : }
1528 : UNREACHABLE();
1529 : }
1530 :
1531 : } // namespace internal
1532 178779 : } // namespace v8
|