/src/serenity/AK/StringBuilder.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> |
3 | | * Copyright (c) 2023, Liav A. <liavalb@hotmail.co.il> |
4 | | * |
5 | | * SPDX-License-Identifier: BSD-2-Clause |
6 | | */ |
7 | | |
8 | | #include <AK/ByteBuffer.h> |
9 | | #include <AK/Checked.h> |
10 | | #include <AK/PrintfImplementation.h> |
11 | | #include <AK/StringBuilder.h> |
12 | | #include <AK/StringView.h> |
13 | | #include <AK/UnicodeUtils.h> |
14 | | #include <AK/Utf32View.h> |
15 | | |
16 | | #ifndef KERNEL |
17 | | # include <AK/ByteString.h> |
18 | | # include <AK/FlyString.h> |
19 | | # include <AK/String.h> |
20 | | # include <AK/Utf16View.h> |
21 | | #endif |
22 | | |
23 | | namespace AK { |
24 | | |
25 | | inline ErrorOr<void> StringBuilder::will_append(size_t size) |
26 | 2.67G | { |
27 | 2.67G | if (m_use_inline_capacity_only == UseInlineCapacityOnly::Yes) { |
28 | 0 | VERIFY(m_buffer.capacity() == StringBuilder::inline_capacity); |
29 | 0 | Checked<size_t> current_pointer = m_buffer.size(); |
30 | 0 | current_pointer += size; |
31 | 0 | VERIFY(!current_pointer.has_overflow()); |
32 | 0 | if (current_pointer <= StringBuilder::inline_capacity) { |
33 | 0 | return {}; |
34 | 0 | } |
35 | 0 | return Error::from_errno(ENOMEM); |
36 | 0 | } |
37 | | |
38 | 2.67G | Checked<size_t> needed_capacity = m_buffer.size(); |
39 | 2.67G | needed_capacity += size; |
40 | 2.67G | VERIFY(!needed_capacity.has_overflow()); |
41 | | // Prefer to completely use the existing capacity first |
42 | 2.67G | if (needed_capacity <= m_buffer.capacity()) |
43 | 2.67G | return {}; |
44 | 3.20M | Checked<size_t> expanded_capacity = needed_capacity; |
45 | 3.20M | expanded_capacity *= 2; |
46 | 3.20M | VERIFY(!expanded_capacity.has_overflow()); |
47 | 3.20M | TRY(m_buffer.try_ensure_capacity(expanded_capacity.value())); |
48 | 3.20M | return {}; |
49 | 3.20M | } |
50 | | |
51 | | ErrorOr<StringBuilder> StringBuilder::create(size_t initial_capacity) |
52 | 0 | { |
53 | 0 | StringBuilder builder; |
54 | 0 | TRY(builder.m_buffer.try_ensure_capacity(initial_capacity)); |
55 | 0 | return builder; |
56 | 0 | } |
57 | | |
58 | | StringBuilder::StringBuilder(size_t initial_capacity) |
59 | 441M | { |
60 | 441M | m_buffer.ensure_capacity(initial_capacity); |
61 | 441M | } |
62 | | |
63 | | StringBuilder::StringBuilder(UseInlineCapacityOnly use_inline_capacity_only) |
64 | 0 | : m_use_inline_capacity_only(use_inline_capacity_only) |
65 | 0 | { |
66 | 0 | } |
67 | | |
68 | | size_t StringBuilder::length() const |
69 | 279M | { |
70 | 279M | return m_buffer.size(); |
71 | 279M | } |
72 | | |
73 | | bool StringBuilder::is_empty() const |
74 | 558M | { |
75 | 558M | return m_buffer.is_empty(); |
76 | 558M | } |
77 | | |
78 | | void StringBuilder::trim(size_t count) |
79 | 0 | { |
80 | 0 | auto decrease_count = min(m_buffer.size(), count); |
81 | 0 | m_buffer.resize(m_buffer.size() - decrease_count); |
82 | 0 | } |
83 | | |
84 | | ErrorOr<void> StringBuilder::try_append(StringView string) |
85 | 309M | { |
86 | 309M | if (string.is_empty()) |
87 | 87.4M | return {}; |
88 | 443M | TRY(will_append(string.length())); |
89 | 443M | TRY(m_buffer.try_append(string.characters_without_null_termination(), string.length())); |
90 | 221M | return {}; |
91 | 443M | } |
92 | | |
93 | | ErrorOr<void> StringBuilder::try_append(char ch) |
94 | 2.36G | { |
95 | 2.36G | TRY(will_append(1)); |
96 | 2.36G | TRY(m_buffer.try_append(ch)); |
97 | 2.36G | return {}; |
98 | 2.36G | } |
99 | | |
100 | | ErrorOr<void> StringBuilder::try_append_repeated(char ch, size_t n) |
101 | 0 | { |
102 | 0 | TRY(will_append(n)); |
103 | 0 | for (size_t i = 0; i < n; ++i) |
104 | 0 | TRY(try_append(ch)); |
105 | 0 | return {}; |
106 | 0 | } |
107 | | |
108 | | void StringBuilder::append(StringView string) |
109 | 250M | { |
110 | 250M | MUST(try_append(string)); |
111 | 250M | } |
112 | | |
113 | | ErrorOr<void> StringBuilder::try_append(char const* characters, size_t length) |
114 | 0 | { |
115 | 0 | return try_append(StringView { characters, length }); |
116 | 0 | } |
117 | | |
118 | | void StringBuilder::append(char const* characters, size_t length) |
119 | 0 | { |
120 | 0 | MUST(try_append(characters, length)); |
121 | 0 | } |
122 | | |
123 | | void StringBuilder::append(char ch) |
124 | 1.04G | { |
125 | 1.04G | MUST(try_append(ch)); |
126 | 1.04G | } |
127 | | |
128 | | void StringBuilder::appendvf(char const* fmt, va_list ap) |
129 | 0 | { |
130 | 0 | printf_internal([this](char*&, char ch) { |
131 | 0 | append(ch); |
132 | 0 | }, |
133 | 0 | nullptr, fmt, ap); |
134 | 0 | } |
135 | | |
136 | | void StringBuilder::append_repeated(char ch, size_t n) |
137 | 0 | { |
138 | 0 | MUST(try_append_repeated(ch, n)); |
139 | 0 | } |
140 | | |
141 | | ErrorOr<ByteBuffer> StringBuilder::to_byte_buffer() const |
142 | 604 | { |
143 | 604 | return ByteBuffer::copy(data(), length()); |
144 | 604 | } |
145 | | |
146 | | #ifndef KERNEL |
147 | | ByteString StringBuilder::to_byte_string() const |
148 | 318M | { |
149 | 318M | if (is_empty()) |
150 | 53.7M | return ByteString::empty(); |
151 | 264M | return ByteString((char const*)data(), length()); |
152 | 318M | } |
153 | | |
154 | | ErrorOr<String> StringBuilder::to_string() const |
155 | 30.4M | { |
156 | 30.4M | return String::from_utf8(string_view()); |
157 | 30.4M | } |
158 | | |
159 | | String StringBuilder::to_string_without_validation() const |
160 | 9.52M | { |
161 | 9.52M | return String::from_utf8_without_validation(string_view().bytes()); |
162 | 9.52M | } |
163 | | |
164 | | FlyString StringBuilder::to_fly_string_without_validation() const |
165 | 0 | { |
166 | 0 | return FlyString::from_utf8_without_validation(string_view().bytes()); |
167 | 0 | } |
168 | | |
169 | | ErrorOr<FlyString> StringBuilder::to_fly_string() const |
170 | 0 | { |
171 | 0 | return FlyString::from_utf8(string_view()); |
172 | 0 | } |
173 | | #endif |
174 | | |
175 | | u8* StringBuilder::data() |
176 | 0 | { |
177 | 0 | return m_buffer.data(); |
178 | 0 | } |
179 | | |
180 | | u8 const* StringBuilder::data() const |
181 | 552M | { |
182 | 552M | return m_buffer.data(); |
183 | 552M | } |
184 | | |
185 | | StringView StringBuilder::string_view() const |
186 | 287M | { |
187 | 287M | return StringView { data(), m_buffer.size() }; |
188 | 287M | } |
189 | | |
190 | | void StringBuilder::clear() |
191 | 125M | { |
192 | 125M | m_buffer.clear(); |
193 | 125M | } |
194 | | |
195 | | ErrorOr<void> StringBuilder::try_append_code_point(u32 code_point) |
196 | 105M | { |
197 | 105M | auto nwritten = TRY(AK::UnicodeUtils::try_code_point_to_utf8(code_point, [this](char c) { return try_append(c); })); |
198 | 105M | if (nwritten < 0) { |
199 | 111k | TRY(try_append(0xef)); |
200 | 111k | TRY(try_append(0xbf)); |
201 | 111k | TRY(try_append(0xbd)); |
202 | 111k | } |
203 | 105M | return {}; |
204 | 105M | } |
205 | | |
206 | | void StringBuilder::append_code_point(u32 code_point) |
207 | 256M | { |
208 | 256M | if (code_point <= 0x7f) { |
209 | 160M | m_buffer.append(static_cast<char>(code_point)); |
210 | 160M | } else if (code_point <= 0x07ff) { |
211 | 270k | (void)will_append(2); |
212 | 270k | m_buffer.append(static_cast<char>((((code_point >> 6) & 0x1f) | 0xc0))); |
213 | 270k | m_buffer.append(static_cast<char>((((code_point >> 0) & 0x3f) | 0x80))); |
214 | 96.1M | } else if (code_point <= 0xffff) { |
215 | 96.0M | (void)will_append(3); |
216 | 96.0M | m_buffer.append(static_cast<char>((((code_point >> 12) & 0x0f) | 0xe0))); |
217 | 96.0M | m_buffer.append(static_cast<char>((((code_point >> 6) & 0x3f) | 0x80))); |
218 | 96.0M | m_buffer.append(static_cast<char>((((code_point >> 0) & 0x3f) | 0x80))); |
219 | 96.0M | } else if (code_point <= 0x10ffff) { |
220 | 122k | (void)will_append(4); |
221 | 122k | m_buffer.append(static_cast<char>((((code_point >> 18) & 0x07) | 0xf0))); |
222 | 122k | m_buffer.append(static_cast<char>((((code_point >> 12) & 0x3f) | 0x80))); |
223 | 122k | m_buffer.append(static_cast<char>((((code_point >> 6) & 0x3f) | 0x80))); |
224 | 122k | m_buffer.append(static_cast<char>((((code_point >> 0) & 0x3f) | 0x80))); |
225 | 122k | } else { |
226 | 0 | (void)will_append(3); |
227 | 0 | m_buffer.append(0xef); |
228 | 0 | m_buffer.append(0xbf); |
229 | 0 | m_buffer.append(0xbd); |
230 | 0 | } |
231 | 256M | } |
232 | | |
233 | | #ifndef KERNEL |
234 | | ErrorOr<void> StringBuilder::try_append(Utf16View const& utf16_view) |
235 | 0 | { |
236 | | // NOTE: This may under-allocate in the presence of surrogate pairs. |
237 | | // That's okay, appending will still grow the buffer as needed. |
238 | 0 | TRY(will_append(utf16_view.length_in_code_units())); |
239 | |
|
240 | 0 | for (size_t i = 0; i < utf16_view.length_in_code_units();) { |
241 | | // OPTIMIZATION: Fast path for ASCII characters. |
242 | 0 | auto code_unit = utf16_view.data()[i]; |
243 | 0 | if (code_unit <= 0x7f) { |
244 | 0 | append(static_cast<char>(code_unit)); |
245 | 0 | ++i; |
246 | 0 | continue; |
247 | 0 | } |
248 | | |
249 | 0 | auto code_point = utf16_view.code_point_at(i); |
250 | 0 | TRY(try_append_code_point(code_point)); |
251 | |
|
252 | 0 | i += (code_point > 0xffff ? 2 : 1); |
253 | 0 | } |
254 | 0 | return {}; |
255 | 0 | } |
256 | | |
257 | | void StringBuilder::append(Utf16View const& utf16_view) |
258 | 0 | { |
259 | 0 | MUST(try_append(utf16_view)); |
260 | 0 | } |
261 | | #endif |
262 | | |
263 | | ErrorOr<void> StringBuilder::try_append(Utf32View const& utf32_view) |
264 | 90.8k | { |
265 | 181k | for (size_t i = 0; i < utf32_view.length(); ++i) { |
266 | 90.8k | auto code_point = utf32_view.code_points()[i]; |
267 | 90.8k | TRY(try_append_code_point(code_point)); |
268 | 90.8k | } |
269 | 90.8k | return {}; |
270 | 90.8k | } |
271 | | |
272 | | void StringBuilder::append(Utf32View const& utf32_view) |
273 | 90.8k | { |
274 | 90.8k | MUST(try_append(utf32_view)); |
275 | 90.8k | } |
276 | | |
277 | | void StringBuilder::append_as_lowercase(char ch) |
278 | 25.8M | { |
279 | 25.8M | if (ch >= 'A' && ch <= 'Z') |
280 | 535k | append(ch + 0x20); |
281 | 25.3M | else |
282 | 25.3M | append(ch); |
283 | 25.8M | } |
284 | | |
285 | | void StringBuilder::append_escaped_for_json(StringView string) |
286 | 0 | { |
287 | 0 | MUST(try_append_escaped_for_json(string)); |
288 | 0 | } |
289 | | |
290 | | ErrorOr<void> StringBuilder::try_append_escaped_for_json(StringView string) |
291 | 0 | { |
292 | 0 | for (auto ch : string) { |
293 | 0 | switch (ch) { |
294 | 0 | case '\b': |
295 | 0 | TRY(try_append("\\b"sv)); |
296 | 0 | break; |
297 | 0 | case '\n': |
298 | 0 | TRY(try_append("\\n"sv)); |
299 | 0 | break; |
300 | 0 | case '\t': |
301 | 0 | TRY(try_append("\\t"sv)); |
302 | 0 | break; |
303 | 0 | case '\"': |
304 | 0 | TRY(try_append("\\\""sv)); |
305 | 0 | break; |
306 | 0 | case '\\': |
307 | 0 | TRY(try_append("\\\\"sv)); |
308 | 0 | break; |
309 | 0 | default: |
310 | 0 | if (bit_cast<u8>(ch) <= 0x1f) |
311 | 0 | TRY(try_appendff("\\u{:04x}", ch)); |
312 | 0 | else |
313 | 0 | TRY(try_append(ch)); |
314 | 0 | } |
315 | 0 | } |
316 | 0 | return {}; |
317 | 0 | } |
318 | | |
319 | | } |