Line data Source code
1 : // Copyright 2014 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_STRING_BUILDER_H_
6 : #define V8_STRING_BUILDER_H_
7 :
8 : #include "src/assert-scope.h"
9 : #include "src/factory.h"
10 : #include "src/handles.h"
11 : #include "src/isolate.h"
12 : #include "src/objects.h"
13 : #include "src/utils.h"
14 :
15 : namespace v8 {
16 : namespace internal {
17 :
18 : const int kStringBuilderConcatHelperLengthBits = 11;
19 : const int kStringBuilderConcatHelperPositionBits = 19;
20 :
21 : typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits>
22 : StringBuilderSubstringLength;
23 : typedef BitField<int, kStringBuilderConcatHelperLengthBits,
24 : kStringBuilderConcatHelperPositionBits>
25 : StringBuilderSubstringPosition;
26 :
27 :
28 : template <typename sinkchar>
29 59691 : static inline void StringBuilderConcatHelper(String* special, sinkchar* sink,
30 : FixedArray* fixed_array,
31 : int array_length) {
32 : DisallowHeapAllocation no_gc;
33 : int position = 0;
34 23334546 : for (int i = 0; i < array_length; i++) {
35 : Object* element = fixed_array->get(i);
36 23274855 : if (element->IsSmi()) {
37 : // Smi encoding of position and length.
38 : int encoded_slice = Smi::cast(element)->value();
39 : int pos;
40 : int len;
41 457415 : if (encoded_slice > 0) {
42 : // Position and length encoded in one smi.
43 457385 : pos = StringBuilderSubstringPosition::decode(encoded_slice);
44 : len = StringBuilderSubstringLength::decode(encoded_slice);
45 : } else {
46 : // Position and length encoded in two smis.
47 30 : Object* obj = fixed_array->get(++i);
48 : DCHECK(obj->IsSmi());
49 : pos = Smi::cast(obj)->value();
50 30 : len = -encoded_slice;
51 : }
52 457415 : String::WriteToFlat(special, sink + position, pos, pos + len);
53 457415 : position += len;
54 : } else {
55 : String* string = String::cast(element);
56 : int element_length = string->length();
57 22817440 : String::WriteToFlat(string, sink + position, 0, element_length);
58 22817440 : position += element_length;
59 : }
60 : }
61 59691 : }
62 :
63 :
64 : // Returns the result length of the concatenation.
65 : // On illegal argument, -1 is returned.
66 49209 : static inline int StringBuilderConcatLength(int special_length,
67 : FixedArray* fixed_array,
68 : int array_length, bool* one_byte) {
69 : DisallowHeapAllocation no_gc;
70 : int position = 0;
71 3442251 : for (int i = 0; i < array_length; i++) {
72 : int increment = 0;
73 : Object* elt = fixed_array->get(i);
74 3393057 : if (elt->IsSmi()) {
75 : // Smi encoding of position and length.
76 : int smi_value = Smi::cast(elt)->value();
77 : int pos;
78 : int len;
79 96417 : if (smi_value > 0) {
80 : // Position and length encoded in one smi.
81 96417 : pos = StringBuilderSubstringPosition::decode(smi_value);
82 : len = StringBuilderSubstringLength::decode(smi_value);
83 : } else {
84 : // Position and length encoded in two smis.
85 0 : len = -smi_value;
86 : // Get the position and check that it is a positive smi.
87 0 : i++;
88 0 : if (i >= array_length) return -1;
89 : Object* next_smi = fixed_array->get(i);
90 0 : if (!next_smi->IsSmi()) return -1;
91 : pos = Smi::cast(next_smi)->value();
92 0 : if (pos < 0) return -1;
93 : }
94 : DCHECK(pos >= 0);
95 : DCHECK(len >= 0);
96 96417 : if (pos > special_length || len > special_length - pos) return -1;
97 : increment = len;
98 3296640 : } else if (elt->IsString()) {
99 : String* element = String::cast(elt);
100 : int element_length = element->length();
101 : increment = element_length;
102 3296640 : if (*one_byte && !element->HasOnlyOneByteChars()) {
103 222 : *one_byte = false;
104 : }
105 : } else {
106 : return -1;
107 : }
108 3393057 : if (increment > String::kMaxLength - position) {
109 : return kMaxInt; // Provoke throw on allocation.
110 : }
111 3393042 : position += increment;
112 : }
113 : return position;
114 : }
115 :
116 :
117 : class FixedArrayBuilder {
118 : public:
119 : explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity)
120 : : array_(isolate->factory()->NewFixedArrayWithHoles(initial_capacity)),
121 : length_(0),
122 10540 : has_non_smi_elements_(false) {
123 : // Require a non-zero initial size. Ensures that doubling the size to
124 : // extend the array will work.
125 : DCHECK(initial_capacity > 0);
126 : }
127 :
128 : explicit FixedArrayBuilder(Handle<FixedArray> backing_store)
129 141288 : : array_(backing_store), length_(0), has_non_smi_elements_(false) {
130 : // Require a non-zero initial size. Ensures that doubling the size to
131 : // extend the array will work.
132 : DCHECK(backing_store->length() > 0);
133 : }
134 :
135 : bool HasCapacity(int elements) {
136 : int length = array_->length();
137 : int required_length = length_ + elements;
138 : return (length >= required_length);
139 : }
140 :
141 20179262 : void EnsureCapacity(int elements) {
142 : int length = array_->length();
143 20179262 : int required_length = length_ + elements;
144 20179262 : if (length < required_length) {
145 : int new_length = length;
146 20612 : do {
147 20612 : new_length *= 2;
148 : } while (new_length < required_length);
149 : Handle<FixedArray> extended_array =
150 20612 : array_->GetIsolate()->factory()->NewFixedArrayWithHoles(new_length);
151 41224 : array_->CopyTo(0, *extended_array, 0, length_);
152 20612 : array_ = extended_array;
153 : }
154 20179262 : }
155 :
156 20169615 : void Add(Object* value) {
157 : DCHECK(!value->IsSmi());
158 : DCHECK(length_ < capacity());
159 40339230 : array_->set(length_, value);
160 20169615 : length_++;
161 20169615 : has_non_smi_elements_ = true;
162 20169615 : }
163 :
164 457445 : void Add(Smi* value) {
165 : DCHECK(value->IsSmi());
166 : DCHECK(length_ < capacity());
167 457445 : array_->set(length_, value);
168 457445 : length_++;
169 457445 : }
170 :
171 : Handle<FixedArray> array() { return array_; }
172 :
173 : int length() { return length_; }
174 :
175 : int capacity() { return array_->length(); }
176 :
177 141190 : Handle<JSArray> ToJSArray(Handle<JSArray> target_array) {
178 141190 : JSArray::SetContent(target_array, array_);
179 141190 : target_array->set_length(Smi::FromInt(length_));
180 141190 : return target_array;
181 : }
182 :
183 : private:
184 : Handle<FixedArray> array_;
185 : int length_;
186 : bool has_non_smi_elements_;
187 : };
188 :
189 :
190 : class ReplacementStringBuilder {
191 : public:
192 10540 : ReplacementStringBuilder(Heap* heap, Handle<String> subject,
193 : int estimated_part_count)
194 : : heap_(heap),
195 : array_builder_(heap->isolate(), estimated_part_count),
196 : subject_(subject),
197 : character_count_(0),
198 42160 : is_one_byte_(subject->IsOneByteRepresentation()) {
199 : // Require a non-zero initial size. Ensures that doubling the size to
200 : // extend the array will work.
201 : DCHECK(estimated_part_count > 0);
202 10540 : }
203 :
204 457415 : static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from,
205 : int to) {
206 : DCHECK(from >= 0);
207 457415 : int length = to - from;
208 : DCHECK(length > 0);
209 914800 : if (StringBuilderSubstringLength::is_valid(length) &&
210 : StringBuilderSubstringPosition::is_valid(from)) {
211 457385 : int encoded_slice = StringBuilderSubstringLength::encode(length) |
212 457385 : StringBuilderSubstringPosition::encode(from);
213 457385 : builder->Add(Smi::FromInt(encoded_slice));
214 : } else {
215 : // Otherwise encode as two smis.
216 60 : builder->Add(Smi::FromInt(-length));
217 30 : builder->Add(Smi::FromInt(from));
218 : }
219 457415 : }
220 :
221 :
222 19588751 : void EnsureCapacity(int elements) { array_builder_.EnsureCapacity(elements); }
223 :
224 :
225 360998 : void AddSubjectSlice(int from, int to) {
226 360998 : AddSubjectSlice(&array_builder_, from, to);
227 360998 : IncrementCharacterCount(to - from);
228 360998 : }
229 :
230 :
231 19579104 : void AddString(Handle<String> string) {
232 : int length = string->length();
233 : DCHECK(length > 0);
234 : AddElement(*string);
235 19579104 : if (!string->IsOneByteRepresentation()) {
236 0 : is_one_byte_ = false;
237 : }
238 : IncrementCharacterCount(length);
239 19579104 : }
240 :
241 :
242 : MaybeHandle<String> ToString();
243 :
244 :
245 : void IncrementCharacterCount(int by) {
246 19940102 : if (character_count_ > String::kMaxLength - by) {
247 : STATIC_ASSERT(String::kMaxLength < kMaxInt);
248 53789 : character_count_ = kMaxInt;
249 : } else {
250 19886313 : character_count_ += by;
251 : }
252 : }
253 :
254 : private:
255 : void AddElement(Object* element) {
256 : DCHECK(element->IsSmi() || element->IsString());
257 : DCHECK(array_builder_.capacity() > array_builder_.length());
258 19579104 : array_builder_.Add(element);
259 : }
260 :
261 : Heap* heap_;
262 : FixedArrayBuilder array_builder_;
263 : Handle<String> subject_;
264 : int character_count_;
265 : bool is_one_byte_;
266 : };
267 :
268 :
269 : class IncrementalStringBuilder {
270 : public:
271 : explicit IncrementalStringBuilder(Isolate* isolate);
272 :
273 : INLINE(String::Encoding CurrentEncoding()) { return encoding_; }
274 :
275 : template <typename SrcChar, typename DestChar>
276 : INLINE(void Append(SrcChar c));
277 :
278 : INLINE(void AppendCharacter(uint8_t c)) {
279 89537058 : if (encoding_ == String::ONE_BYTE_ENCODING) {
280 : Append<uint8_t, uint8_t>(c);
281 : } else {
282 : Append<uint8_t, uc16>(c);
283 : }
284 : }
285 :
286 : INLINE(void AppendCString(const char* s)) {
287 : const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
288 45037166 : if (encoding_ == String::ONE_BYTE_ENCODING) {
289 402340781 : while (*u != '\0') Append<uint8_t, uint8_t>(*(u++));
290 : } else {
291 3909 : while (*u != '\0') Append<uint8_t, uc16>(*(u++));
292 : }
293 : }
294 :
295 : INLINE(void AppendCString(const uc16* s)) {
296 8169912 : if (encoding_ == String::ONE_BYTE_ENCODING) {
297 24520002 : while (*s != '\0') Append<uc16, uint8_t>(*(s++));
298 : } else {
299 90 : while (*s != '\0') Append<uc16, uc16>(*(s++));
300 : }
301 : }
302 :
303 : INLINE(bool CurrentPartCanFit(int length)) {
304 3709783 : return part_length_ - current_index_ > length;
305 : }
306 :
307 : void AppendString(Handle<String> string);
308 :
309 : MaybeHandle<String> Finish();
310 :
311 : INLINE(bool HasOverflowed()) const { return overflowed_; }
312 :
313 3952 : INLINE(int Length()) const { return accumulator_->length() + current_index_; }
314 :
315 : // Change encoding to two-byte.
316 : void ChangeEncoding() {
317 : DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
318 1129590 : ShrinkCurrentPart();
319 1129590 : encoding_ = String::TWO_BYTE_ENCODING;
320 1129590 : Extend();
321 : }
322 :
323 : template <typename DestChar>
324 : class NoExtend {
325 : public:
326 : explicit NoExtend(Handle<String> string, int offset) {
327 : DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString());
328 : if (sizeof(DestChar) == 1) {
329 1577714 : start_ = reinterpret_cast<DestChar*>(
330 1577714 : Handle<SeqOneByteString>::cast(string)->GetChars() + offset);
331 : } else {
332 1129497 : start_ = reinterpret_cast<DestChar*>(
333 1129497 : Handle<SeqTwoByteString>::cast(string)->GetChars() + offset);
334 : }
335 2707211 : cursor_ = start_;
336 : }
337 :
338 72017142 : INLINE(void Append(DestChar c)) { *(cursor_++) = c; }
339 : INLINE(void AppendCString(const char* s)) {
340 : const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
341 25607 : while (*u != '\0') Append(*(u++));
342 : }
343 :
344 2707211 : int written() { return static_cast<int>(cursor_ - start_); }
345 :
346 : private:
347 : DestChar* start_;
348 : DestChar* cursor_;
349 : DisallowHeapAllocation no_gc_;
350 : };
351 :
352 : template <typename DestChar>
353 : class NoExtendString : public NoExtend<DestChar> {
354 : public:
355 : NoExtendString(Handle<String> string, int required_length)
356 : : NoExtend<DestChar>(string, 0), string_(string) {
357 : DCHECK(string->length() >= required_length);
358 : }
359 :
360 : Handle<String> Finalize() {
361 : Handle<SeqString> string = Handle<SeqString>::cast(string_);
362 : int length = NoExtend<DestChar>::written();
363 : Handle<String> result = SeqString::Truncate(string, length);
364 : string_ = Handle<String>();
365 : return result;
366 : }
367 :
368 : private:
369 : Handle<String> string_;
370 : };
371 :
372 : template <typename DestChar>
373 : class NoExtendBuilder : public NoExtend<DestChar> {
374 : public:
375 2707211 : NoExtendBuilder(IncrementalStringBuilder* builder, int required_length)
376 : : NoExtend<DestChar>(builder->current_part(), builder->current_index_),
377 5414422 : builder_(builder) {
378 : DCHECK(builder->CurrentPartCanFit(required_length));
379 2707211 : }
380 :
381 2707211 : ~NoExtendBuilder() {
382 5414422 : builder_->current_index_ += NoExtend<DestChar>::written();
383 2707211 : }
384 :
385 : private:
386 : IncrementalStringBuilder* builder_;
387 : };
388 :
389 : private:
390 : Factory* factory() { return isolate_->factory(); }
391 :
392 : INLINE(Handle<String> accumulator()) { return accumulator_; }
393 :
394 : INLINE(void set_accumulator(Handle<String> string)) {
395 53369070 : *accumulator_.location() = *string;
396 : }
397 :
398 : INLINE(Handle<String> current_part()) { return current_part_; }
399 :
400 : INLINE(void set_current_part(Handle<String> string)) {
401 54498660 : *current_part_.location() = *string;
402 : }
403 :
404 : // Add the current part to the accumulator.
405 : void Accumulate(Handle<String> new_part);
406 :
407 : // Finish the current part and allocate a new part.
408 : void Extend();
409 :
410 : // Shrink current part to the right size.
411 33268769 : void ShrinkCurrentPart() {
412 : DCHECK(current_index_ < part_length_);
413 : set_current_part(SeqString::Truncate(
414 33268769 : Handle<SeqString>::cast(current_part()), current_index_));
415 33268769 : }
416 :
417 : static const int kInitialPartLength = 32;
418 : static const int kMaxPartLength = 16 * 1024;
419 : static const int kPartLengthGrowthFactor = 2;
420 :
421 : Isolate* isolate_;
422 : String::Encoding encoding_;
423 : bool overflowed_;
424 : int part_length_;
425 : int current_index_;
426 : Handle<String> accumulator_;
427 : Handle<String> current_part_;
428 : };
429 :
430 :
431 : template <typename SrcChar, typename DestChar>
432 : void IncrementalStringBuilder::Append(SrcChar c) {
433 : DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1);
434 : if (sizeof(DestChar) == 1) {
435 : DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
436 607099462 : SeqOneByteString::cast(*current_part_)
437 : ->SeqOneByteStringSet(current_index_++, c);
438 : } else {
439 : DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_);
440 4558342 : SeqTwoByteString::cast(*current_part_)
441 : ->SeqTwoByteStringSet(current_index_++, c);
442 : }
443 305828902 : if (current_index_ == part_length_) Extend();
444 : }
445 : } // namespace internal
446 : } // namespace v8
447 :
448 : #endif // V8_STRING_BUILDER_H_
|