Line data Source code
1 : // Copyright 2014 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_STRING_BUILDER_INL_H_
6 : #define V8_STRING_BUILDER_INL_H_
7 :
8 : #include "src/assert-scope.h"
9 : #include "src/handles-inl.h"
10 : #include "src/heap/factory.h"
11 : #include "src/isolate.h"
12 : #include "src/objects.h"
13 : #include "src/objects/fixed-array.h"
14 : #include "src/objects/string-inl.h"
15 : #include "src/utils.h"
16 :
17 : namespace v8 {
18 : namespace internal {
19 :
20 : const int kStringBuilderConcatHelperLengthBits = 11;
21 : const int kStringBuilderConcatHelperPositionBits = 19;
22 :
23 : typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits>
24 : StringBuilderSubstringLength;
25 : typedef BitField<int, kStringBuilderConcatHelperLengthBits,
26 : kStringBuilderConcatHelperPositionBits>
27 : StringBuilderSubstringPosition;
28 :
29 : template <typename sinkchar>
30 : void StringBuilderConcatHelper(String special, sinkchar* sink,
31 : FixedArray fixed_array, int array_length);
32 :
33 : // Returns the result length of the concatenation.
34 : // On illegal argument, -1 is returned.
35 : int StringBuilderConcatLength(int special_length, FixedArray fixed_array,
36 : int array_length, bool* one_byte);
37 :
38 : class FixedArrayBuilder {
39 : public:
40 : explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity);
41 : explicit FixedArrayBuilder(Handle<FixedArray> backing_store);
42 :
43 : bool HasCapacity(int elements);
44 : void EnsureCapacity(Isolate* isolate, int elements);
45 :
46 : void Add(Object value);
47 : void Add(Smi value);
48 :
49 : Handle<FixedArray> array() { return array_; }
50 :
51 : int length() { return length_; }
52 :
53 : int capacity();
54 :
55 : Handle<JSArray> ToJSArray(Handle<JSArray> target_array);
56 :
57 : private:
58 : Handle<FixedArray> array_;
59 : int length_;
60 : bool has_non_smi_elements_;
61 : };
62 :
63 : class ReplacementStringBuilder {
64 : public:
65 : ReplacementStringBuilder(Heap* heap, Handle<String> subject,
66 : int estimated_part_count);
67 :
68 : // Caution: Callers must ensure the builder has enough capacity.
69 61437 : static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from,
70 : int to) {
71 : DCHECK_GE(from, 0);
72 61437 : int length = to - from;
73 : DCHECK_GT(length, 0);
74 122856 : if (StringBuilderSubstringLength::is_valid(length) &&
75 : StringBuilderSubstringPosition::is_valid(from)) {
76 61419 : int encoded_slice = StringBuilderSubstringLength::encode(length) |
77 61419 : StringBuilderSubstringPosition::encode(from);
78 61419 : builder->Add(Smi::FromInt(encoded_slice));
79 : } else {
80 : // Otherwise encode as two smis.
81 36 : builder->Add(Smi::FromInt(-length));
82 18 : builder->Add(Smi::FromInt(from));
83 : }
84 61437 : }
85 :
86 2700 : void AddSubjectSlice(int from, int to) {
87 2700 : EnsureCapacity(2); // Subject slices are encoded with up to two smis.
88 2700 : AddSubjectSlice(&array_builder_, from, to);
89 2700 : IncrementCharacterCount(to - from);
90 2700 : }
91 :
92 : void AddString(Handle<String> string);
93 :
94 : MaybeHandle<String> ToString();
95 :
96 : void IncrementCharacterCount(int by) {
97 4113 : if (character_count_ > String::kMaxLength - by) {
98 : STATIC_ASSERT(String::kMaxLength < kMaxInt);
99 0 : character_count_ = kMaxInt;
100 : } else {
101 4113 : character_count_ += by;
102 : }
103 : }
104 :
105 : private:
106 : void AddElement(Handle<Object> element);
107 : void EnsureCapacity(int elements);
108 :
109 : Heap* heap_;
110 : FixedArrayBuilder array_builder_;
111 : Handle<String> subject_;
112 : int character_count_;
113 : bool is_one_byte_;
114 : };
115 :
116 : class IncrementalStringBuilder {
117 : public:
118 : explicit IncrementalStringBuilder(Isolate* isolate);
119 :
120 3294922 : V8_INLINE String::Encoding CurrentEncoding() { return encoding_; }
121 :
122 : template <typename SrcChar, typename DestChar>
123 : V8_INLINE void Append(SrcChar c);
124 :
125 : V8_INLINE void AppendCharacter(uint8_t c) {
126 60997732 : if (encoding_ == String::ONE_BYTE_ENCODING) {
127 : Append<uint8_t, uint8_t>(c);
128 : } else {
129 : Append<uint8_t, uc16>(c);
130 : }
131 : }
132 :
133 : V8_INLINE void AppendCString(const char* s) {
134 : const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
135 26800025 : if (encoding_ == String::ONE_BYTE_ENCODING) {
136 137396710 : while (*u != '\0') Append<uint8_t, uint8_t>(*(u++));
137 : } else {
138 1210 : while (*u != '\0') Append<uint8_t, uc16>(*(u++));
139 : }
140 : }
141 :
142 : V8_INLINE void AppendCString(const uc16* s) {
143 4902136 : if (encoding_ == String::ONE_BYTE_ENCODING) {
144 9807592 : while (*s != '\0') Append<uc16, uint8_t>(*(s++));
145 : } else {
146 36 : while (*s != '\0') Append<uc16, uc16>(*(s++));
147 : }
148 : }
149 :
150 : V8_INLINE bool CurrentPartCanFit(int length) {
151 2575781 : return part_length_ - current_index_ > length;
152 : }
153 :
154 : // We make a rough estimate to find out if the current string can be
155 : // serialized without allocating a new string part. The worst case length of
156 : // an escaped character is 6. Shifting the remaining string length right by 3
157 : // is a more pessimistic estimate, but faster to calculate.
158 : V8_INLINE int EscapedLengthIfCurrentPartFits(int length) {
159 2575799 : if (length > kMaxPartLength) return 0;
160 : STATIC_ASSERT((kMaxPartLength << 3) <= String::kMaxLength);
161 : // This shift will not overflow because length is already less than the
162 : // maximum part length.
163 2575781 : int worst_case_length = length << 3;
164 2575781 : return CurrentPartCanFit(worst_case_length) ? worst_case_length : 0;
165 : }
166 :
167 : void AppendString(Handle<String> string);
168 :
169 : MaybeHandle<String> Finish();
170 :
171 9461472 : V8_INLINE bool HasOverflowed() const { return overflowed_; }
172 :
173 : int Length() const;
174 :
175 : // Change encoding to two-byte.
176 : void ChangeEncoding() {
177 : DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
178 719132 : ShrinkCurrentPart();
179 719132 : encoding_ = String::TWO_BYTE_ENCODING;
180 719132 : Extend();
181 : }
182 :
183 : template <typename DestChar>
184 : class NoExtend {
185 : public:
186 : NoExtend(Handle<String> string, int offset,
187 : const DisallowHeapAllocation& no_gc) {
188 : DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString());
189 : if (sizeof(DestChar) == 1) {
190 705632 : start_ = reinterpret_cast<DestChar*>(
191 705632 : Handle<SeqOneByteString>::cast(string)->GetChars(no_gc) + offset);
192 : } else {
193 719092 : start_ = reinterpret_cast<DestChar*>(
194 719092 : Handle<SeqTwoByteString>::cast(string)->GetChars(no_gc) + offset);
195 : }
196 1424724 : cursor_ = start_;
197 : }
198 :
199 44204720 : V8_INLINE void Append(DestChar c) { *(cursor_++) = c; }
200 : V8_INLINE void AppendCString(const char* s) {
201 : const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
202 457552 : while (*u != '\0') Append(*(u++));
203 : }
204 :
205 1424724 : int written() { return static_cast<int>(cursor_ - start_); }
206 :
207 : private:
208 : DestChar* start_;
209 : DestChar* cursor_;
210 : DISALLOW_HEAP_ALLOCATION(no_gc_)
211 : };
212 :
213 : template <typename DestChar>
214 : class NoExtendString : public NoExtend<DestChar> {
215 : public:
216 : NoExtendString(Handle<String> string, int required_length)
217 : : NoExtend<DestChar>(string, 0), string_(string) {
218 : DCHECK(string->length() >= required_length);
219 : }
220 :
221 : Handle<String> Finalize() {
222 : Handle<SeqString> string = Handle<SeqString>::cast(string_);
223 : int length = NoExtend<DestChar>::written();
224 : Handle<String> result = SeqString::Truncate(string, length);
225 : string_ = Handle<String>();
226 : return result;
227 : }
228 :
229 : private:
230 : Handle<String> string_;
231 : };
232 :
233 : template <typename DestChar>
234 : class NoExtendBuilder : public NoExtend<DestChar> {
235 : public:
236 1424724 : NoExtendBuilder(IncrementalStringBuilder* builder, int required_length,
237 : const DisallowHeapAllocation& no_gc)
238 : : NoExtend<DestChar>(builder->current_part(), builder->current_index_,
239 : no_gc),
240 2849448 : builder_(builder) {
241 : DCHECK(builder->CurrentPartCanFit(required_length));
242 1424724 : }
243 :
244 : ~NoExtendBuilder() {
245 1424724 : builder_->current_index_ += NoExtend<DestChar>::written();
246 1424724 : }
247 :
248 : private:
249 : IncrementalStringBuilder* builder_;
250 : };
251 :
252 : private:
253 : Factory* factory() { return isolate_->factory(); }
254 :
255 28931229 : V8_INLINE Handle<String> accumulator() { return accumulator_; }
256 :
257 : V8_INLINE void set_accumulator(Handle<String> string) {
258 22763155 : *accumulator_.location() = string->ptr();
259 : }
260 :
261 31075131 : V8_INLINE Handle<String> current_part() { return current_part_; }
262 :
263 : V8_INLINE void set_current_part(Handle<String> string) {
264 23482288 : *current_part_.location() = string->ptr();
265 : }
266 :
267 : // Add the current part to the accumulator.
268 : void Accumulate(Handle<String> new_part);
269 :
270 : // Finish the current part and allocate a new part.
271 : void Extend();
272 :
273 : // Shrink current part to the right size.
274 14431996 : void ShrinkCurrentPart() {
275 : DCHECK(current_index_ < part_length_);
276 28863992 : set_current_part(SeqString::Truncate(
277 : Handle<SeqString>::cast(current_part()), current_index_));
278 14431985 : }
279 :
280 : static const int kInitialPartLength = 32;
281 : static const int kMaxPartLength = 16 * 1024;
282 : static const int kPartLengthGrowthFactor = 2;
283 :
284 : Isolate* isolate_;
285 : String::Encoding encoding_;
286 : bool overflowed_;
287 : int part_length_;
288 : int current_index_;
289 : Handle<String> accumulator_;
290 : Handle<String> current_part_;
291 : };
292 :
293 : template <typename SrcChar, typename DestChar>
294 : void IncrementalStringBuilder::Append(SrcChar c) {
295 : DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1);
296 : if (sizeof(DestChar) == 1) {
297 : DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
298 2614383191 : SeqOneByteString::cast(*current_part_)
299 : ->SeqOneByteStringSet(current_index_++, c);
300 : } else {
301 : DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_);
302 1443917 : SeqTwoByteString::cast(*current_part_)
303 : ->SeqTwoByteStringSet(current_index_++, c);
304 : }
305 2613168237 : if (current_index_ == part_length_) Extend();
306 : }
307 : } // namespace internal
308 : } // namespace v8
309 :
310 : #endif // V8_STRING_BUILDER_INL_H_
|