Line data Source code
1 : // Copyright 2014 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_STRING_BUILDER_INL_H_
6 : #define V8_STRING_BUILDER_INL_H_
7 :
8 : #include "src/assert-scope.h"
9 : #include "src/handles-inl.h"
10 : #include "src/heap/factory.h"
11 : #include "src/isolate.h"
12 : #include "src/objects.h"
13 : #include "src/objects/fixed-array.h"
14 : #include "src/objects/string-inl.h"
15 : #include "src/utils.h"
16 :
17 : namespace v8 {
18 : namespace internal {
19 :
20 : const int kStringBuilderConcatHelperLengthBits = 11;
21 : const int kStringBuilderConcatHelperPositionBits = 19;
22 :
23 : typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits>
24 : StringBuilderSubstringLength;
25 : typedef BitField<int, kStringBuilderConcatHelperLengthBits,
26 : kStringBuilderConcatHelperPositionBits>
27 : StringBuilderSubstringPosition;
28 :
29 : template <typename sinkchar>
30 : void StringBuilderConcatHelper(String special, sinkchar* sink,
31 : FixedArray fixed_array, int array_length);
32 :
33 : // Returns the result length of the concatenation.
34 : // On illegal argument, -1 is returned.
35 : int StringBuilderConcatLength(int special_length, FixedArray fixed_array,
36 : int array_length, bool* one_byte);
37 :
38 : class FixedArrayBuilder {
39 : public:
40 : explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity);
41 : explicit FixedArrayBuilder(Handle<FixedArray> backing_store);
42 :
43 : bool HasCapacity(int elements);
44 : void EnsureCapacity(Isolate* isolate, int elements);
45 :
46 : void Add(Object value);
47 : void Add(Smi value);
48 :
49 : Handle<FixedArray> array() { return array_; }
50 :
51 : int length() { return length_; }
52 :
53 : int capacity();
54 :
55 : Handle<JSArray> ToJSArray(Handle<JSArray> target_array);
56 :
57 : private:
58 : Handle<FixedArray> array_;
59 : int length_;
60 : bool has_non_smi_elements_;
61 : };
62 :
63 : class ReplacementStringBuilder {
64 : public:
65 : ReplacementStringBuilder(Heap* heap, Handle<String> subject,
66 : int estimated_part_count);
67 :
68 61401 : static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from,
69 : int to) {
70 : DCHECK_GE(from, 0);
71 61401 : int length = to - from;
72 : DCHECK_GT(length, 0);
73 122784 : if (StringBuilderSubstringLength::is_valid(length) &&
74 : StringBuilderSubstringPosition::is_valid(from)) {
75 61383 : int encoded_slice = StringBuilderSubstringLength::encode(length) |
76 61383 : StringBuilderSubstringPosition::encode(from);
77 61383 : builder->Add(Smi::FromInt(encoded_slice));
78 : } else {
79 : // Otherwise encode as two smis.
80 36 : builder->Add(Smi::FromInt(-length));
81 18 : builder->Add(Smi::FromInt(from));
82 : }
83 61401 : }
84 :
85 : void EnsureCapacity(int elements);
86 :
87 2664 : void AddSubjectSlice(int from, int to) {
88 2664 : AddSubjectSlice(&array_builder_, from, to);
89 2664 : IncrementCharacterCount(to - from);
90 2664 : }
91 :
92 : void AddString(Handle<String> string);
93 :
94 : MaybeHandle<String> ToString();
95 :
96 : void IncrementCharacterCount(int by) {
97 4014 : if (character_count_ > String::kMaxLength - by) {
98 : STATIC_ASSERT(String::kMaxLength < kMaxInt);
99 0 : character_count_ = kMaxInt;
100 : } else {
101 4014 : character_count_ += by;
102 : }
103 : }
104 :
105 : private:
106 : void AddElement(Object element);
107 :
108 : Heap* heap_;
109 : FixedArrayBuilder array_builder_;
110 : Handle<String> subject_;
111 : int character_count_;
112 : bool is_one_byte_;
113 : };
114 :
115 : class IncrementalStringBuilder {
116 : public:
117 : explicit IncrementalStringBuilder(Isolate* isolate);
118 :
119 : V8_INLINE String::Encoding CurrentEncoding() { return encoding_; }
120 :
121 : template <typename SrcChar, typename DestChar>
122 : V8_INLINE void Append(SrcChar c);
123 :
124 : V8_INLINE void AppendCharacter(uint8_t c) {
125 60022443 : if (encoding_ == String::ONE_BYTE_ENCODING) {
126 : Append<uint8_t, uint8_t>(c);
127 : } else {
128 : Append<uint8_t, uc16>(c);
129 : }
130 : }
131 :
132 : V8_INLINE void AppendCString(const char* s) {
133 : const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
134 26797664 : if (encoding_ == String::ONE_BYTE_ENCODING) {
135 248004115 : while (*u != '\0') Append<uint8_t, uint8_t>(*(u++));
136 : } else {
137 1267 : while (*u != '\0') Append<uint8_t, uc16>(*(u++));
138 : }
139 : }
140 :
141 : V8_INLINE void AppendCString(const uc16* s) {
142 4902136 : if (encoding_ == String::ONE_BYTE_ENCODING) {
143 14713066 : while (*s != '\0') Append<uc16, uint8_t>(*(s++));
144 : } else {
145 54 : while (*s != '\0') Append<uc16, uc16>(*(s++));
146 : }
147 : }
148 :
149 : V8_INLINE bool CurrentPartCanFit(int length) {
150 2643531 : return part_length_ - current_index_ > length;
151 : }
152 :
153 : // We make a rough estimate to find out if the current string can be
154 : // serialized without allocating a new string part. The worst case length of
155 : // an escaped character is 6. Shifting the remaining string length right by 3
156 : // is a more pessimistic estimate, but faster to calculate.
157 2643531 : V8_INLINE int EscapedLengthIfCurrentPartFits(int length) {
158 2643549 : if (length > kMaxPartLength) return 0;
159 : STATIC_ASSERT((kMaxPartLength << 3) <= String::kMaxLength);
160 : // This shift will not overflow because length is already less than the
161 : // maximum part length.
162 2643531 : int worst_case_length = length << 3;
163 2643531 : return CurrentPartCanFit(worst_case_length) ? worst_case_length : 0;
164 : }
165 :
166 : void AppendString(Handle<String> string);
167 :
168 : MaybeHandle<String> Finish();
169 :
170 : V8_INLINE bool HasOverflowed() const { return overflowed_; }
171 :
172 : int Length() const;
173 :
174 : // Change encoding to two-byte.
175 : void ChangeEncoding() {
176 : DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
177 719122 : ShrinkCurrentPart();
178 719122 : encoding_ = String::TWO_BYTE_ENCODING;
179 719122 : Extend();
180 : }
181 :
182 : template <typename DestChar>
183 : class NoExtend {
184 : public:
185 1447001 : NoExtend(Handle<String> string, int offset,
186 : const DisallowHeapAllocation& no_gc) {
187 : DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString());
188 : if (sizeof(DestChar) == 1) {
189 727909 : start_ = reinterpret_cast<DestChar*>(
190 1455818 : Handle<SeqOneByteString>::cast(string)->GetChars(no_gc) + offset);
191 : } else {
192 719092 : start_ = reinterpret_cast<DestChar*>(
193 1438184 : Handle<SeqTwoByteString>::cast(string)->GetChars(no_gc) + offset);
194 : }
195 1447001 : cursor_ = start_;
196 1447001 : }
197 :
198 44264857 : V8_INLINE void Append(DestChar c) { *(cursor_++) = c; }
199 : V8_INLINE void AppendCString(const char* s) {
200 : const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
201 798417 : while (*u != '\0') Append(*(u++));
202 : }
203 :
204 1447001 : int written() { return static_cast<int>(cursor_ - start_); }
205 :
206 : private:
207 : DestChar* start_;
208 : DestChar* cursor_;
209 : DISALLOW_HEAP_ALLOCATION(no_gc_)
210 : };
211 :
212 : template <typename DestChar>
213 : class NoExtendString : public NoExtend<DestChar> {
214 : public:
215 : NoExtendString(Handle<String> string, int required_length)
216 : : NoExtend<DestChar>(string, 0), string_(string) {
217 : DCHECK(string->length() >= required_length);
218 : }
219 :
220 : Handle<String> Finalize() {
221 : Handle<SeqString> string = Handle<SeqString>::cast(string_);
222 : int length = NoExtend<DestChar>::written();
223 : Handle<String> result = SeqString::Truncate(string, length);
224 : string_ = Handle<String>();
225 : return result;
226 : }
227 :
228 : private:
229 : Handle<String> string_;
230 : };
231 :
232 : template <typename DestChar>
233 : class NoExtendBuilder : public NoExtend<DestChar> {
234 : public:
235 1447001 : NoExtendBuilder(IncrementalStringBuilder* builder, int required_length,
236 : const DisallowHeapAllocation& no_gc)
237 : : NoExtend<DestChar>(builder->current_part(), builder->current_index_,
238 : no_gc),
239 1447001 : builder_(builder) {
240 : DCHECK(builder->CurrentPartCanFit(required_length));
241 1447001 : }
242 :
243 1447001 : ~NoExtendBuilder() {
244 1447001 : builder_->current_index_ += NoExtend<DestChar>::written();
245 1447001 : }
246 :
247 : private:
248 : IncrementalStringBuilder* builder_;
249 : };
250 :
251 : private:
252 : Factory* factory() { return isolate_->factory(); }
253 :
254 : V8_INLINE Handle<String> accumulator() { return accumulator_; }
255 :
256 : V8_INLINE void set_accumulator(Handle<String> string) {
257 18340935 : *accumulator_.location() = string->ptr();
258 : }
259 :
260 : V8_INLINE Handle<String> current_part() { return current_part_; }
261 :
262 : V8_INLINE void set_current_part(Handle<String> string) {
263 19060056 : *current_part_.location() = string->ptr();
264 : }
265 :
266 : // Add the current part to the accumulator.
267 : void Accumulate(Handle<String> new_part);
268 :
269 : // Finish the current part and allocate a new part.
270 : void Extend();
271 :
272 : // Shrink current part to the right size.
273 11787383 : void ShrinkCurrentPart() {
274 : DCHECK(current_index_ < part_length_);
275 : set_current_part(SeqString::Truncate(
276 11787383 : Handle<SeqString>::cast(current_part()), current_index_));
277 11787382 : }
278 :
279 : static const int kInitialPartLength = 32;
280 : static const int kMaxPartLength = 16 * 1024;
281 : static const int kPartLengthGrowthFactor = 2;
282 :
283 : Isolate* isolate_;
284 : String::Encoding encoding_;
285 : bool overflowed_;
286 : int part_length_;
287 : int current_index_;
288 : Handle<String> accumulator_;
289 : Handle<String> current_part_;
290 : };
291 :
292 : template <typename SrcChar, typename DestChar>
293 : void IncrementalStringBuilder::Append(SrcChar c) {
294 : DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1);
295 : if (sizeof(DestChar) == 1) {
296 : DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
297 5222793908 : SeqOneByteString::cast(*current_part_)
298 : ->SeqOneByteStringSet(current_index_++, c);
299 : } else {
300 : DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_);
301 2885586 : SeqTwoByteString::cast(*current_part_)
302 : ->SeqTwoByteStringSet(current_index_++, c);
303 : }
304 2612839747 : if (current_index_ == part_length_) Extend();
305 : }
306 : } // namespace internal
307 : } // namespace v8
308 :
309 : #endif // V8_STRING_BUILDER_INL_H_
|