/src/serenity/AK/FlyString.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/DeprecatedFlyString.h> |
8 | | #include <AK/FlyString.h> |
9 | | #include <AK/HashMap.h> |
10 | | #include <AK/Singleton.h> |
11 | | #include <AK/String.h> |
12 | | #include <AK/StringData.h> |
13 | | #include <AK/StringView.h> |
14 | | #include <AK/Utf8View.h> |
15 | | |
16 | | namespace AK { |
17 | | |
18 | | struct FlyStringTableHashTraits : public Traits<Detail::StringData const*> { |
19 | 2.25M | static u32 hash(Detail::StringData const* string) { return string->hash(); } |
20 | 1.14M | static bool equals(Detail::StringData const* a, Detail::StringData const* b) { return *a == *b; } |
21 | | }; |
22 | | |
23 | | static auto& all_fly_strings() |
24 | 6.74M | { |
25 | 6.74M | static Singleton<HashTable<Detail::StringData const*, FlyStringTableHashTraits>> table; |
26 | 6.74M | return *table; |
27 | 6.74M | } |
28 | | |
29 | | ErrorOr<FlyString> FlyString::from_utf8(StringView string) |
30 | 1.16M | { |
31 | 1.16M | if (string.is_empty()) |
32 | 32.6k | return FlyString {}; |
33 | 1.12M | if (string.length() <= Detail::MAX_SHORT_STRING_BYTE_COUNT) |
34 | 8.49k | return FlyString { TRY(String::from_utf8(string)) }; |
35 | 1.12M | if (auto it = all_fly_strings().find(string.hash(), [&](auto& entry) { return entry->bytes_as_string_view() == string; }); it != all_fly_strings().end()) |
36 | 0 | return FlyString { Detail::StringBase(**it) }; |
37 | 1.12M | return FlyString { TRY(String::from_utf8(string)) }; |
38 | 1.12M | } |
39 | | |
40 | | FlyString FlyString::from_utf8_without_validation(ReadonlyBytes string) |
41 | 0 | { |
42 | 0 | if (string.is_empty()) |
43 | 0 | return FlyString {}; |
44 | 0 | if (string.size() <= Detail::MAX_SHORT_STRING_BYTE_COUNT) |
45 | 0 | return FlyString { String::from_utf8_without_validation(string) }; |
46 | 0 | if (auto it = all_fly_strings().find(StringView(string).hash(), [&](auto& entry) { return entry->bytes_as_string_view() == string; }); it != all_fly_strings().end()) |
47 | 0 | return FlyString { Detail::StringBase(**it) }; |
48 | 0 | return FlyString { String::from_utf8_without_validation(string) }; |
49 | 0 | } |
50 | | |
51 | | FlyString::FlyString(String const& string) |
52 | 1.13M | { |
53 | 1.13M | if (string.is_short_string()) { |
54 | 11.0k | m_data = string; |
55 | 11.0k | return; |
56 | 11.0k | } |
57 | | |
58 | 1.12M | if (string.m_data->is_fly_string()) { |
59 | 0 | m_data = string; |
60 | 0 | return; |
61 | 0 | } |
62 | | |
63 | 1.12M | auto it = all_fly_strings().find(string.m_data); |
64 | 1.12M | if (it == all_fly_strings().end()) { |
65 | 1.12M | m_data = string; |
66 | 1.12M | all_fly_strings().set(string.m_data); |
67 | 1.12M | string.m_data->set_fly_string(true); |
68 | 1.12M | } else { |
69 | 541 | m_data.m_data = *it; |
70 | 541 | m_data.m_data->ref(); |
71 | 541 | } |
72 | 1.12M | } |
73 | | |
74 | | FlyString& FlyString::operator=(String const& string) |
75 | 0 | { |
76 | 0 | *this = FlyString { string }; |
77 | 0 | return *this; |
78 | 0 | } |
79 | | |
80 | | bool FlyString::is_empty() const |
81 | 0 | { |
82 | 0 | return bytes_as_string_view().is_empty(); |
83 | 0 | } |
84 | | |
85 | | unsigned FlyString::hash() const |
86 | 0 | { |
87 | 0 | return m_data.hash(); |
88 | 0 | } |
89 | | |
90 | | u32 FlyString::ascii_case_insensitive_hash() const |
91 | 0 | { |
92 | 0 | return case_insensitive_string_hash(reinterpret_cast<char const*>(bytes().data()), bytes().size()); |
93 | 0 | } |
94 | | |
95 | | FlyString::operator String() const |
96 | 0 | { |
97 | 0 | return to_string(); |
98 | 0 | } |
99 | | |
100 | | String FlyString::to_string() const |
101 | 0 | { |
102 | 0 | Detail::StringBase copy = m_data; |
103 | 0 | return String(move(copy)); |
104 | 0 | } |
105 | | |
106 | | Utf8View FlyString::code_points() const |
107 | 0 | { |
108 | 0 | return Utf8View { bytes_as_string_view() }; |
109 | 0 | } |
110 | | |
111 | | ReadonlyBytes FlyString::bytes() const |
112 | 0 | { |
113 | 0 | return bytes_as_string_view().bytes(); |
114 | 0 | } |
115 | | |
116 | | StringView FlyString::bytes_as_string_view() const |
117 | 0 | { |
118 | 0 | return m_data.bytes(); |
119 | 0 | } |
120 | | |
121 | | bool FlyString::operator==(String const& other) const |
122 | 0 | { |
123 | 0 | return m_data == other; |
124 | 0 | } |
125 | | |
126 | | bool FlyString::operator==(StringView string) const |
127 | 0 | { |
128 | 0 | return bytes_as_string_view() == string; |
129 | 0 | } |
130 | | |
131 | | bool FlyString::operator==(char const* string) const |
132 | 0 | { |
133 | 0 | return bytes_as_string_view() == string; |
134 | 0 | } |
135 | | |
136 | | void FlyString::did_destroy_fly_string_data(Badge<Detail::StringData>, Detail::StringData const& string_data) |
137 | 1.12M | { |
138 | 1.12M | all_fly_strings().remove(&string_data); |
139 | 1.12M | } |
140 | | |
141 | | Detail::StringBase FlyString::data(Badge<String>) const |
142 | 0 | { |
143 | 0 | return m_data; |
144 | 0 | } |
145 | | |
146 | | size_t FlyString::number_of_fly_strings() |
147 | 0 | { |
148 | 0 | return all_fly_strings().size(); |
149 | 0 | } |
150 | | |
151 | | DeprecatedFlyString FlyString::to_deprecated_fly_string() const |
152 | 0 | { |
153 | 0 | return DeprecatedFlyString(bytes_as_string_view()); |
154 | 0 | } |
155 | | |
156 | | ErrorOr<FlyString> FlyString::from_deprecated_fly_string(DeprecatedFlyString const& deprecated_fly_string) |
157 | 0 | { |
158 | 0 | return FlyString::from_utf8(deprecated_fly_string.view()); |
159 | 0 | } |
160 | | |
161 | | unsigned Traits<FlyString>::hash(FlyString const& fly_string) |
162 | 0 | { |
163 | 0 | return fly_string.hash(); |
164 | 0 | } |
165 | | |
166 | | int FlyString::operator<=>(FlyString const& other) const |
167 | 0 | { |
168 | 0 | return bytes_as_string_view().compare(other.bytes_as_string_view()); |
169 | 0 | } |
170 | | |
171 | | ErrorOr<void> Formatter<FlyString>::format(FormatBuilder& builder, FlyString const& fly_string) |
172 | 0 | { |
173 | 0 | return Formatter<StringView>::format(builder, fly_string.bytes_as_string_view()); |
174 | 0 | } |
175 | | |
176 | | FlyString FlyString::to_ascii_lowercase() const |
177 | 0 | { |
178 | 0 | bool const has_ascii_uppercase = [&] { |
179 | 0 | for (u8 const byte : bytes()) { |
180 | 0 | if (AK::is_ascii_upper_alpha(byte)) |
181 | 0 | return true; |
182 | 0 | } |
183 | 0 | return false; |
184 | 0 | }(); |
185 | |
|
186 | 0 | if (!has_ascii_uppercase) |
187 | 0 | return *this; |
188 | | |
189 | 0 | Vector<u8> lowercase_bytes; |
190 | 0 | lowercase_bytes.ensure_capacity(bytes().size()); |
191 | 0 | for (u8 const byte : bytes()) { |
192 | 0 | if (AK::is_ascii_upper_alpha(byte)) |
193 | 0 | lowercase_bytes.unchecked_append(AK::to_ascii_lowercase(byte)); |
194 | 0 | else |
195 | 0 | lowercase_bytes.unchecked_append(byte); |
196 | 0 | } |
197 | 0 | return String::from_utf8_without_validation(lowercase_bytes); |
198 | 0 | } |
199 | | |
200 | | FlyString FlyString::to_ascii_uppercase() const |
201 | 0 | { |
202 | 0 | bool const has_ascii_lowercase = [&] { |
203 | 0 | for (u8 const byte : bytes()) { |
204 | 0 | if (AK::is_ascii_lower_alpha(byte)) |
205 | 0 | return true; |
206 | 0 | } |
207 | 0 | return false; |
208 | 0 | }(); |
209 | |
|
210 | 0 | if (!has_ascii_lowercase) |
211 | 0 | return *this; |
212 | | |
213 | 0 | Vector<u8> uppercase_bytes; |
214 | 0 | uppercase_bytes.ensure_capacity(bytes().size()); |
215 | 0 | for (u8 const byte : bytes()) { |
216 | 0 | if (AK::is_ascii_lower_alpha(byte)) |
217 | 0 | uppercase_bytes.unchecked_append(AK::to_ascii_uppercase(byte)); |
218 | 0 | else |
219 | 0 | uppercase_bytes.unchecked_append(byte); |
220 | 0 | } |
221 | 0 | return String::from_utf8_without_validation(uppercase_bytes); |
222 | 0 | } |
223 | | |
224 | | bool FlyString::equals_ignoring_ascii_case(FlyString const& other) const |
225 | 0 | { |
226 | 0 | if (*this == other) |
227 | 0 | return true; |
228 | 0 | return StringUtils::equals_ignoring_ascii_case(bytes_as_string_view(), other.bytes_as_string_view()); |
229 | 0 | } |
230 | | |
231 | | bool FlyString::equals_ignoring_ascii_case(StringView other) const |
232 | 0 | { |
233 | 0 | return StringUtils::equals_ignoring_ascii_case(bytes_as_string_view(), other); |
234 | 0 | } |
235 | | |
236 | | bool FlyString::starts_with_bytes(StringView bytes, CaseSensitivity case_sensitivity) const |
237 | 0 | { |
238 | 0 | return bytes_as_string_view().starts_with(bytes, case_sensitivity); |
239 | 0 | } |
240 | | |
241 | | bool FlyString::ends_with_bytes(StringView bytes, CaseSensitivity case_sensitivity) const |
242 | 0 | { |
243 | 0 | return bytes_as_string_view().ends_with(bytes, case_sensitivity); |
244 | 0 | } |
245 | | |
246 | | } |