/src/serenity/AK/ByteString.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/ByteBuffer.h> |
8 | | #include <AK/ByteString.h> |
9 | | #include <AK/DeprecatedFlyString.h> |
10 | | #include <AK/Format.h> |
11 | | #include <AK/Function.h> |
12 | | #include <AK/StdLibExtras.h> |
13 | | #include <AK/StringView.h> |
14 | | #include <AK/Utf8View.h> |
15 | | #include <AK/Vector.h> |
16 | | |
17 | | namespace AK { |
18 | | |
19 | | bool ByteString::operator==(DeprecatedFlyString const& fly_string) const |
20 | 0 | { |
21 | 0 | return m_impl == fly_string.impl() || view() == fly_string.view(); |
22 | 0 | } |
23 | | |
24 | | bool ByteString::operator==(ByteString const& other) const |
25 | 5.96M | { |
26 | 5.96M | return m_impl == other.impl() || view() == other.view(); |
27 | 5.96M | } |
28 | | |
29 | | bool ByteString::operator==(StringView other) const |
30 | 392M | { |
31 | 392M | if (other.is_null()) |
32 | 0 | return is_empty(); |
33 | | |
34 | 392M | return view() == other; |
35 | 392M | } |
36 | | |
37 | | bool ByteString::operator<(ByteString const& other) const |
38 | 0 | { |
39 | 0 | return view() < other.view(); |
40 | 0 | } |
41 | | |
42 | | bool ByteString::operator>(ByteString const& other) const |
43 | 0 | { |
44 | 0 | return view() > other.view(); |
45 | 0 | } |
46 | | |
47 | | bool ByteString::copy_characters_to_buffer(char* buffer, size_t buffer_size) const |
48 | 0 | { |
49 | | // We must fit at least the NUL-terminator. |
50 | 0 | VERIFY(buffer_size > 0); |
51 | | |
52 | 0 | size_t characters_to_copy = min(length(), buffer_size - 1); |
53 | 0 | __builtin_memcpy(buffer, characters(), characters_to_copy); |
54 | 0 | buffer[characters_to_copy] = 0; |
55 | |
|
56 | 0 | return characters_to_copy == length(); |
57 | 0 | } |
58 | | |
59 | | ByteString ByteString::isolated_copy() const |
60 | 0 | { |
61 | 0 | if (m_impl->length() == 0) |
62 | 0 | return empty(); |
63 | 0 | char* buffer; |
64 | 0 | auto impl = StringImpl::create_uninitialized(length(), buffer); |
65 | 0 | memcpy(buffer, m_impl->characters(), m_impl->length()); |
66 | 0 | return ByteString(move(*impl)); |
67 | 0 | } |
68 | | |
69 | | ByteString ByteString::substring(size_t start, size_t length) const |
70 | 1.47M | { |
71 | 1.47M | if (!length) |
72 | 815k | return ByteString::empty(); |
73 | 657k | VERIFY(!Checked<size_t>::addition_would_overflow(start, length)); |
74 | 657k | VERIFY(start + length <= m_impl->length()); |
75 | 657k | return { characters() + start, length }; |
76 | 657k | } |
77 | | |
78 | | ByteString ByteString::substring(size_t start) const |
79 | 815k | { |
80 | 815k | VERIFY(start <= length()); |
81 | 815k | return { characters() + start, length() - start }; |
82 | 815k | } |
83 | | |
84 | | StringView ByteString::substring_view(size_t start, size_t length) const& |
85 | 31.2M | { |
86 | 31.2M | VERIFY(!Checked<size_t>::addition_would_overflow(start, length)); |
87 | 31.2M | VERIFY(start + length <= m_impl->length()); |
88 | 31.2M | return { characters() + start, length }; |
89 | 31.2M | } |
90 | | |
91 | | StringView ByteString::substring_view(size_t start) const& |
92 | 32.1M | { |
93 | 32.1M | VERIFY(start <= length()); |
94 | 32.1M | return { characters() + start, length() - start }; |
95 | 32.1M | } |
96 | | |
97 | | Vector<ByteString> ByteString::split(char separator, SplitBehavior split_behavior) const |
98 | 0 | { |
99 | 0 | return split_limit(separator, 0, split_behavior); |
100 | 0 | } |
101 | | |
102 | | Vector<ByteString> ByteString::split_limit(char separator, size_t limit, SplitBehavior split_behavior) const |
103 | 1.20k | { |
104 | 1.20k | if (is_empty()) |
105 | 363 | return {}; |
106 | | |
107 | 837 | Vector<ByteString> v; |
108 | 837 | size_t substart = 0; |
109 | 837 | bool keep_empty = has_flag(split_behavior, SplitBehavior::KeepEmpty); |
110 | 837 | bool keep_separator = has_flag(split_behavior, SplitBehavior::KeepTrailingSeparator); |
111 | 2.24M | for (size_t i = 0; i < length() && (v.size() + 1) != limit; ++i) { |
112 | 2.24M | char ch = characters()[i]; |
113 | 2.24M | if (ch == separator) { |
114 | 351 | size_t sublen = i - substart; |
115 | 351 | if (sublen != 0 || keep_empty) |
116 | 351 | v.append(substring(substart, keep_separator ? sublen + 1 : sublen)); |
117 | 351 | substart = i + 1; |
118 | 351 | } |
119 | 2.24M | } |
120 | 837 | size_t taillen = length() - substart; |
121 | 837 | if (taillen != 0 || keep_empty) |
122 | 837 | v.append(substring(substart, taillen)); |
123 | 837 | return v; |
124 | 1.20k | } |
125 | | |
126 | | Vector<StringView> ByteString::split_view(Function<bool(char)> separator, SplitBehavior split_behavior) const& |
127 | 1.61M | { |
128 | 1.61M | if (is_empty()) |
129 | 1.00M | return {}; |
130 | | |
131 | 608k | Vector<StringView> v; |
132 | 608k | size_t substart = 0; |
133 | 608k | bool keep_empty = has_flag(split_behavior, SplitBehavior::KeepEmpty); |
134 | 608k | bool keep_separator = has_flag(split_behavior, SplitBehavior::KeepTrailingSeparator); |
135 | 122M | for (size_t i = 0; i < length(); ++i) { |
136 | 122M | char ch = characters()[i]; |
137 | 122M | if (separator(ch)) { |
138 | 2.23M | size_t sublen = i - substart; |
139 | 2.23M | if (sublen != 0 || keep_empty) |
140 | 2.22M | v.append(substring_view(substart, keep_separator ? sublen + 1 : sublen)); |
141 | 2.23M | substart = i + 1; |
142 | 2.23M | } |
143 | 122M | } |
144 | 608k | size_t taillen = length() - substart; |
145 | 608k | if (taillen != 0 || keep_empty) |
146 | 606k | v.append(substring_view(substart, taillen)); |
147 | 608k | return v; |
148 | 1.61M | } |
149 | | |
150 | | Vector<StringView> ByteString::split_view(char const separator, SplitBehavior split_behavior) const& |
151 | 1.61M | { |
152 | 122M | return split_view([separator](char ch) { return ch == separator; }, split_behavior); |
153 | 1.61M | } |
154 | | |
155 | | ByteBuffer ByteString::to_byte_buffer() const |
156 | 0 | { |
157 | | // FIXME: Handle OOM failure. |
158 | 0 | return ByteBuffer::copy(bytes()).release_value_but_fixme_should_propagate_errors(); |
159 | 0 | } |
160 | | |
161 | | bool ByteString::starts_with(StringView str, CaseSensitivity case_sensitivity) const |
162 | 1.09k | { |
163 | 1.09k | return StringUtils::starts_with(*this, str, case_sensitivity); |
164 | 1.09k | } |
165 | | |
166 | | bool ByteString::starts_with(char ch) const |
167 | 1.80M | { |
168 | 1.80M | if (is_empty()) |
169 | 0 | return false; |
170 | 1.80M | return characters()[0] == ch; |
171 | 1.80M | } |
172 | | |
173 | | bool ByteString::ends_with(StringView str, CaseSensitivity case_sensitivity) const |
174 | 0 | { |
175 | 0 | return StringUtils::ends_with(*this, str, case_sensitivity); |
176 | 0 | } |
177 | | |
178 | | bool ByteString::ends_with(char ch) const |
179 | 11.9M | { |
180 | 11.9M | if (is_empty()) |
181 | 7.95M | return false; |
182 | 4.04M | return characters()[length() - 1] == ch; |
183 | 11.9M | } |
184 | | |
185 | | ByteString ByteString::repeated(char ch, size_t count) |
186 | 0 | { |
187 | 0 | if (!count) |
188 | 0 | return empty(); |
189 | 0 | char* buffer; |
190 | 0 | auto impl = StringImpl::create_uninitialized(count, buffer); |
191 | 0 | memset(buffer, ch, count); |
192 | 0 | return *impl; |
193 | 0 | } |
194 | | |
195 | | ByteString ByteString::repeated(StringView string, size_t count) |
196 | 0 | { |
197 | 0 | if (!count || string.is_empty()) |
198 | 0 | return empty(); |
199 | 0 | char* buffer; |
200 | 0 | auto impl = StringImpl::create_uninitialized(count * string.length(), buffer); |
201 | 0 | for (size_t i = 0; i < count; i++) |
202 | 0 | __builtin_memcpy(buffer + i * string.length(), string.characters_without_null_termination(), string.length()); |
203 | 0 | return *impl; |
204 | 0 | } |
205 | | |
206 | | ByteString ByteString::bijective_base_from(size_t value, unsigned base, StringView map) |
207 | 0 | { |
208 | 0 | value++; |
209 | 0 | if (map.is_null()) |
210 | 0 | map = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv; |
211 | |
|
212 | 0 | VERIFY(base >= 2 && base <= map.length()); |
213 | | |
214 | | // The '8 bits per byte' assumption may need to go? |
215 | 0 | Array<char, round_up_to_power_of_two(sizeof(size_t) * 8 + 1, 2)> buffer; |
216 | 0 | size_t i = 0; |
217 | 0 | do { |
218 | 0 | auto remainder = value % base; |
219 | 0 | auto new_value = value / base; |
220 | 0 | if (remainder == 0) { |
221 | 0 | new_value--; |
222 | 0 | remainder = map.length(); |
223 | 0 | } |
224 | |
|
225 | 0 | buffer[i++] = map[remainder - 1]; |
226 | 0 | value = new_value; |
227 | 0 | } while (value > 0); |
228 | |
|
229 | 0 | for (size_t j = 0; j < i / 2; ++j) |
230 | 0 | swap(buffer[j], buffer[i - j - 1]); |
231 | |
|
232 | 0 | return ByteString { ReadonlyBytes(buffer.data(), i) }; |
233 | 0 | } |
234 | | |
235 | | ByteString ByteString::roman_number_from(size_t value) |
236 | 0 | { |
237 | 0 | if (value > 3999) |
238 | 0 | return ByteString::number(value); |
239 | | |
240 | 0 | StringBuilder builder; |
241 | |
|
242 | 0 | while (value > 0) { |
243 | 0 | if (value >= 1000) { |
244 | 0 | builder.append('M'); |
245 | 0 | value -= 1000; |
246 | 0 | } else if (value >= 900) { |
247 | 0 | builder.append("CM"sv); |
248 | 0 | value -= 900; |
249 | 0 | } else if (value >= 500) { |
250 | 0 | builder.append('D'); |
251 | 0 | value -= 500; |
252 | 0 | } else if (value >= 400) { |
253 | 0 | builder.append("CD"sv); |
254 | 0 | value -= 400; |
255 | 0 | } else if (value >= 100) { |
256 | 0 | builder.append('C'); |
257 | 0 | value -= 100; |
258 | 0 | } else if (value >= 90) { |
259 | 0 | builder.append("XC"sv); |
260 | 0 | value -= 90; |
261 | 0 | } else if (value >= 50) { |
262 | 0 | builder.append('L'); |
263 | 0 | value -= 50; |
264 | 0 | } else if (value >= 40) { |
265 | 0 | builder.append("XL"sv); |
266 | 0 | value -= 40; |
267 | 0 | } else if (value >= 10) { |
268 | 0 | builder.append('X'); |
269 | 0 | value -= 10; |
270 | 0 | } else if (value == 9) { |
271 | 0 | builder.append("IX"sv); |
272 | 0 | value -= 9; |
273 | 0 | } else if (value >= 5 && value <= 8) { |
274 | 0 | builder.append('V'); |
275 | 0 | value -= 5; |
276 | 0 | } else if (value == 4) { |
277 | 0 | builder.append("IV"sv); |
278 | 0 | value -= 4; |
279 | 0 | } else if (value <= 3) { |
280 | 0 | builder.append('I'); |
281 | 0 | value -= 1; |
282 | 0 | } |
283 | 0 | } |
284 | |
|
285 | 0 | return builder.to_byte_string(); |
286 | 0 | } |
287 | | |
288 | | bool ByteString::matches(StringView mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const |
289 | 0 | { |
290 | 0 | return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans); |
291 | 0 | } |
292 | | |
293 | | bool ByteString::matches(StringView mask, CaseSensitivity case_sensitivity) const |
294 | 0 | { |
295 | 0 | return StringUtils::matches(*this, mask, case_sensitivity); |
296 | 0 | } |
297 | | |
298 | | bool ByteString::contains(StringView needle, CaseSensitivity case_sensitivity) const |
299 | 0 | { |
300 | 0 | return StringUtils::contains(*this, needle, case_sensitivity); |
301 | 0 | } |
302 | | |
303 | | bool ByteString::contains(char needle, CaseSensitivity case_sensitivity) const |
304 | 0 | { |
305 | 0 | return StringUtils::contains(*this, StringView(&needle, 1), case_sensitivity); |
306 | 0 | } |
307 | | |
308 | | bool ByteString::equals_ignoring_ascii_case(StringView other) const |
309 | 30.1M | { |
310 | 30.1M | return StringUtils::equals_ignoring_ascii_case(view(), other); |
311 | 30.1M | } |
312 | | |
313 | | ByteString ByteString::reverse() const |
314 | 228k | { |
315 | 228k | StringBuilder reversed_string(length()); |
316 | 1.02M | for (size_t i = length(); i-- > 0;) { |
317 | 799k | reversed_string.append(characters()[i]); |
318 | 799k | } |
319 | 228k | return reversed_string.to_byte_string(); |
320 | 228k | } |
321 | | |
322 | | ByteString escape_html_entities(StringView html) |
323 | 0 | { |
324 | 0 | StringBuilder builder; |
325 | 0 | for (size_t i = 0; i < html.length(); ++i) { |
326 | 0 | if (html[i] == '<') |
327 | 0 | builder.append("<"sv); |
328 | 0 | else if (html[i] == '>') |
329 | 0 | builder.append(">"sv); |
330 | 0 | else if (html[i] == '&') |
331 | 0 | builder.append("&"sv); |
332 | 0 | else if (html[i] == '"') |
333 | 0 | builder.append("""sv); |
334 | 0 | else |
335 | 0 | builder.append(html[i]); |
336 | 0 | } |
337 | 0 | return builder.to_byte_string(); |
338 | 0 | } |
339 | | |
340 | | ByteString::ByteString(DeprecatedFlyString const& string) |
341 | 22.9k | : m_impl(string.impl()) |
342 | 22.9k | { |
343 | 22.9k | } |
344 | | |
345 | | ByteString ByteString::to_lowercase() const |
346 | 0 | { |
347 | 0 | return m_impl->to_lowercase(); |
348 | 0 | } |
349 | | |
350 | | ByteString ByteString::to_uppercase() const |
351 | 0 | { |
352 | 0 | return m_impl->to_uppercase(); |
353 | 0 | } |
354 | | |
355 | | ByteString ByteString::to_snakecase() const |
356 | 0 | { |
357 | 0 | return StringUtils::to_snakecase(*this); |
358 | 0 | } |
359 | | |
360 | | ByteString ByteString::to_titlecase() const |
361 | 0 | { |
362 | 0 | return StringUtils::to_titlecase(*this); |
363 | 0 | } |
364 | | |
365 | | ByteString ByteString::invert_case() const |
366 | 0 | { |
367 | 0 | return StringUtils::invert_case(*this); |
368 | 0 | } |
369 | | |
370 | | bool ByteString::operator==(char const* cstring) const |
371 | 162k | { |
372 | 162k | if (!cstring) |
373 | 0 | return is_empty(); |
374 | | |
375 | 162k | return view() == cstring; |
376 | 162k | } |
377 | | |
378 | | ByteString ByteString::vformatted(StringView fmtstr, TypeErasedFormatParams& params) |
379 | 42.8M | { |
380 | 42.8M | StringBuilder builder; |
381 | 42.8M | MUST(vformat(builder, fmtstr, params)); |
382 | 42.8M | return builder.to_byte_string(); |
383 | 42.8M | } |
384 | | |
385 | | Vector<size_t> ByteString::find_all(StringView needle) const |
386 | 0 | { |
387 | 0 | return StringUtils::find_all(*this, needle); |
388 | 0 | } |
389 | | |
390 | | DeprecatedStringCodePointIterator ByteString::code_points() const |
391 | 0 | { |
392 | 0 | return DeprecatedStringCodePointIterator(*this); |
393 | 0 | } |
394 | | |
395 | | ErrorOr<ByteString> ByteString::from_utf8(ReadonlyBytes bytes) |
396 | 0 | { |
397 | 0 | if (!Utf8View(bytes).validate()) |
398 | 0 | return Error::from_string_literal("ByteString::from_utf8: Input was not valid UTF-8"); |
399 | 0 | return ByteString { *StringImpl::create(bytes) }; |
400 | 0 | } |
401 | | |
402 | | } |