/src/serenity/AK/StringUtils.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2018-2022, Andreas Kling <awesomekling@gmail.com> |
3 | | * Copyright (c) 2020, Fei Wu <f.eiwu@yahoo.com> |
4 | | * |
5 | | * SPDX-License-Identifier: BSD-2-Clause |
6 | | */ |
7 | | |
8 | | #include <AK/CharacterTypes.h> |
9 | | #include <AK/MemMem.h> |
10 | | #include <AK/Optional.h> |
11 | | #include <AK/String.h> |
12 | | #include <AK/StringBuilder.h> |
13 | | #include <AK/StringUtils.h> |
14 | | #include <AK/StringView.h> |
15 | | #include <AK/Vector.h> |
16 | | |
17 | | #if defined(PREKERNEL) |
18 | | # include <Kernel/Library/MiniStdLib.h> |
19 | | #elif defined(KERNEL) |
20 | | # include <Kernel/Library/StdLib.h> |
21 | | #else |
22 | | # include <AK/ByteString.h> |
23 | | # include <AK/FloatingPointStringConversions.h> |
24 | | # include <string.h> |
25 | | #endif |
26 | | |
27 | | namespace AK { |
28 | | |
29 | | namespace StringUtils { |
30 | | |
31 | | bool matches(StringView str, StringView mask, CaseSensitivity case_sensitivity, Vector<MaskSpan>* match_spans) |
32 | 0 | { |
33 | 0 | auto record_span = [&match_spans](size_t start, size_t length) { |
34 | 0 | if (match_spans) |
35 | 0 | match_spans->append({ start, length }); |
36 | 0 | }; |
37 | |
|
38 | 0 | if (str.is_null() || mask.is_null()) |
39 | 0 | return str.is_null() && mask.is_null(); |
40 | | |
41 | 0 | if (mask == "*"sv) { |
42 | 0 | record_span(0, str.length()); |
43 | 0 | return true; |
44 | 0 | } |
45 | | |
46 | 0 | char const* string_ptr = str.characters_without_null_termination(); |
47 | 0 | char const* string_start = str.characters_without_null_termination(); |
48 | 0 | char const* string_end = string_ptr + str.length(); |
49 | 0 | char const* mask_ptr = mask.characters_without_null_termination(); |
50 | 0 | char const* mask_end = mask_ptr + mask.length(); |
51 | |
|
52 | 0 | while (string_ptr < string_end && mask_ptr < mask_end) { |
53 | 0 | auto string_start_ptr = string_ptr; |
54 | 0 | switch (*mask_ptr) { |
55 | 0 | case '*': |
56 | 0 | if (mask_ptr == mask_end - 1) { |
57 | 0 | record_span(string_ptr - string_start, string_end - string_ptr); |
58 | 0 | return true; |
59 | 0 | } |
60 | 0 | while (string_ptr < string_end && !matches({ string_ptr, static_cast<size_t>(string_end - string_ptr) }, { mask_ptr + 1, static_cast<size_t>(mask_end - mask_ptr - 1) }, case_sensitivity)) |
61 | 0 | ++string_ptr; |
62 | 0 | record_span(string_start_ptr - string_start, string_ptr - string_start_ptr); |
63 | 0 | --string_ptr; |
64 | 0 | break; |
65 | 0 | case '?': |
66 | 0 | record_span(string_ptr - string_start, 1); |
67 | 0 | break; |
68 | 0 | case '\\': |
69 | | // if backslash is last character in mask, just treat it as an exact match |
70 | | // otherwise use it as escape for next character |
71 | 0 | if (mask_ptr + 1 < mask_end) |
72 | 0 | ++mask_ptr; |
73 | 0 | [[fallthrough]]; |
74 | 0 | default: |
75 | 0 | auto p = *mask_ptr; |
76 | 0 | auto ch = *string_ptr; |
77 | 0 | if (case_sensitivity == CaseSensitivity::CaseSensitive ? p != ch : to_ascii_lowercase(p) != to_ascii_lowercase(ch)) |
78 | 0 | return false; |
79 | 0 | break; |
80 | 0 | } |
81 | 0 | ++string_ptr; |
82 | 0 | ++mask_ptr; |
83 | 0 | } |
84 | | |
85 | 0 | if (string_ptr == string_end) { |
86 | | // Allow ending '*' to contain nothing. |
87 | 0 | while (mask_ptr != mask_end && *mask_ptr == '*') { |
88 | 0 | record_span(string_ptr - string_start, 0); |
89 | 0 | ++mask_ptr; |
90 | 0 | } |
91 | 0 | } |
92 | |
|
93 | 0 | return string_ptr == string_end && mask_ptr == mask_end; |
94 | 0 | } |
95 | | |
96 | | template<typename T> |
97 | | Optional<T> convert_to_int(StringView str, TrimWhitespace trim_whitespace) |
98 | 1.67M | { |
99 | 1.67M | auto string = trim_whitespace == TrimWhitespace::Yes |
100 | 1.67M | ? str.trim_whitespace() |
101 | 1.67M | : str; |
102 | 1.67M | if (string.is_empty()) |
103 | 238 | return {}; |
104 | | |
105 | 1.67M | T sign = 1; |
106 | 1.67M | size_t i = 0; |
107 | 1.67M | auto const characters = string.characters_without_null_termination(); |
108 | | |
109 | 1.67M | if (characters[0] == '-' || characters[0] == '+') { |
110 | 135k | if (string.length() == 1) |
111 | 71.2k | return {}; |
112 | 64.6k | i++; |
113 | 64.6k | if (characters[0] == '-') |
114 | 63.9k | sign = -1; |
115 | 64.6k | } |
116 | | |
117 | 1.60M | T value = 0; |
118 | 4.92M | for (; i < string.length(); i++) { |
119 | 3.51M | if (characters[i] < '0' || characters[i] > '9') |
120 | 184k | return {}; |
121 | | |
122 | 3.32M | if (__builtin_mul_overflow(value, 10, &value)) |
123 | 5.95k | return {}; |
124 | | |
125 | 3.32M | if (__builtin_add_overflow(value, sign * (characters[i] - '0'), &value)) |
126 | 397 | return {}; |
127 | 3.32M | } |
128 | 1.41M | return value; |
129 | 1.60M | } Unexecuted instantiation: AK::Optional<signed char> AK::StringUtils::convert_to_int<signed char>(AK::StringView, AK::TrimWhitespace) Unexecuted instantiation: AK::Optional<short> AK::StringUtils::convert_to_int<short>(AK::StringView, AK::TrimWhitespace) AK::Optional<int> AK::StringUtils::convert_to_int<int>(AK::StringView, AK::TrimWhitespace) Line | Count | Source | 98 | 1.64M | { | 99 | 1.64M | auto string = trim_whitespace == TrimWhitespace::Yes | 100 | 1.64M | ? str.trim_whitespace() | 101 | 1.64M | : str; | 102 | 1.64M | if (string.is_empty()) | 103 | 238 | return {}; | 104 | | | 105 | 1.64M | T sign = 1; | 106 | 1.64M | size_t i = 0; | 107 | 1.64M | auto const characters = string.characters_without_null_termination(); | 108 | | | 109 | 1.64M | if (characters[0] == '-' || characters[0] == '+') { | 110 | 112k | if (string.length() == 1) | 111 | 71.2k | return {}; | 112 | 40.7k | i++; | 113 | 40.7k | if (characters[0] == '-') | 114 | 40.0k | sign = -1; | 115 | 40.7k | } | 116 | | | 117 | 1.57M | T value = 0; | 118 | 4.75M | for (; i < string.length(); i++) { | 119 | 3.36M | if (characters[i] < '0' || characters[i] > '9') | 120 | 184k | return {}; | 121 | | | 122 | 3.18M | if (__builtin_mul_overflow(value, 10, &value)) | 123 | 941 | return {}; | 124 | | | 125 | 3.18M | if (__builtin_add_overflow(value, sign * (characters[i] - '0'), &value)) | 126 | 201 | return {}; | 127 | 3.18M | } | 128 | 1.38M | return value; | 129 | 1.57M | } |
AK::Optional<long> AK::StringUtils::convert_to_int<long>(AK::StringView, AK::TrimWhitespace) Line | Count | Source | 98 | 28.7k | { | 99 | 28.7k | auto string = trim_whitespace == TrimWhitespace::Yes | 100 | 28.7k | ? str.trim_whitespace() | 101 | 28.7k | : str; | 102 | 28.7k | if (string.is_empty()) | 103 | 0 | return {}; | 104 | | | 105 | 28.7k | T sign = 1; | 106 | 28.7k | size_t i = 0; | 107 | 28.7k | auto const characters = string.characters_without_null_termination(); | 108 | | | 109 | 28.7k | if (characters[0] == '-' || characters[0] == '+') { | 110 | 23.9k | if (string.length() == 1) | 111 | 0 | return {}; | 112 | 23.9k | i++; | 113 | 23.9k | if (characters[0] == '-') | 114 | 23.9k | sign = -1; | 115 | 23.9k | } | 116 | | | 117 | 28.7k | T value = 0; | 118 | 168k | for (; i < string.length(); i++) { | 119 | 145k | if (characters[i] < '0' || characters[i] > '9') | 120 | 0 | return {}; | 121 | | | 122 | 145k | if (__builtin_mul_overflow(value, 10, &value)) | 123 | 5.01k | return {}; | 124 | | | 125 | 140k | if (__builtin_add_overflow(value, sign * (characters[i] - '0'), &value)) | 126 | 196 | return {}; | 127 | 140k | } | 128 | 23.5k | return value; | 129 | 28.7k | } |
Unexecuted instantiation: AK::Optional<long long> AK::StringUtils::convert_to_int<long long>(AK::StringView, AK::TrimWhitespace) |
130 | | |
131 | | template Optional<i8> convert_to_int(StringView str, TrimWhitespace); |
132 | | template Optional<i16> convert_to_int(StringView str, TrimWhitespace); |
133 | | template Optional<i32> convert_to_int(StringView str, TrimWhitespace); |
134 | | template Optional<long> convert_to_int(StringView str, TrimWhitespace); |
135 | | template Optional<long long> convert_to_int(StringView str, TrimWhitespace); |
136 | | |
137 | | template<typename T> |
138 | | Optional<T> convert_to_uint(StringView str, TrimWhitespace trim_whitespace) |
139 | 8.05M | { |
140 | 8.05M | auto string = trim_whitespace == TrimWhitespace::Yes |
141 | 8.05M | ? str.trim_whitespace() |
142 | 8.05M | : str; |
143 | 8.05M | if (string.is_empty()) |
144 | 109k | return {}; |
145 | | |
146 | 7.94M | T value = 0; |
147 | 7.94M | auto const characters = string.characters_without_null_termination(); |
148 | | |
149 | 16.5M | for (size_t i = 0; i < string.length(); i++) { |
150 | 9.28M | if (characters[i] < '0' || characters[i] > '9') |
151 | 670k | return {}; |
152 | | |
153 | 8.61M | if (__builtin_mul_overflow(value, 10, &value)) |
154 | 16.3k | return {}; |
155 | | |
156 | 8.59M | if (__builtin_add_overflow(value, characters[i] - '0', &value)) |
157 | 1.38k | return {}; |
158 | 8.59M | } |
159 | 7.25M | return value; |
160 | 7.94M | } Unexecuted instantiation: AK::Optional<unsigned char> AK::StringUtils::convert_to_uint<unsigned char>(AK::StringView, AK::TrimWhitespace) AK::Optional<unsigned short> AK::StringUtils::convert_to_uint<unsigned short>(AK::StringView, AK::TrimWhitespace) Line | Count | Source | 139 | 621k | { | 140 | 621k | auto string = trim_whitespace == TrimWhitespace::Yes | 141 | 621k | ? str.trim_whitespace() | 142 | 621k | : str; | 143 | 621k | if (string.is_empty()) | 144 | 968 | return {}; | 145 | | | 146 | 620k | T value = 0; | 147 | 620k | auto const characters = string.characters_without_null_termination(); | 148 | | | 149 | 1.45M | for (size_t i = 0; i < string.length(); i++) { | 150 | 840k | if (characters[i] < '0' || characters[i] > '9') | 151 | 767 | return {}; | 152 | | | 153 | 839k | if (__builtin_mul_overflow(value, 10, &value)) | 154 | 80 | return {}; | 155 | | | 156 | 839k | if (__builtin_add_overflow(value, characters[i] - '0', &value)) | 157 | 6 | return {}; | 158 | 839k | } | 159 | 619k | return value; | 160 | 620k | } |
AK::Optional<unsigned int> AK::StringUtils::convert_to_uint<unsigned int>(AK::StringView, AK::TrimWhitespace) Line | Count | Source | 139 | 1.16M | { | 140 | 1.16M | auto string = trim_whitespace == TrimWhitespace::Yes | 141 | 1.16M | ? str.trim_whitespace() | 142 | 1.16M | : str; | 143 | 1.16M | if (string.is_empty()) | 144 | 1.83k | return {}; | 145 | | | 146 | 1.16M | T value = 0; | 147 | 1.16M | auto const characters = string.characters_without_null_termination(); | 148 | | | 149 | 2.42M | for (size_t i = 0; i < string.length(); i++) { | 150 | 1.91M | if (characters[i] < '0' || characters[i] > '9') | 151 | 645k | return {}; | 152 | | | 153 | 1.26M | if (__builtin_mul_overflow(value, 10, &value)) | 154 | 11.8k | return {}; | 155 | | | 156 | 1.25M | if (__builtin_add_overflow(value, characters[i] - '0', &value)) | 157 | 1.00k | return {}; | 158 | 1.25M | } | 159 | 507k | return value; | 160 | 1.16M | } |
AK::Optional<unsigned long> AK::StringUtils::convert_to_uint<unsigned long>(AK::StringView, AK::TrimWhitespace) Line | Count | Source | 139 | 6.26M | { | 140 | 6.26M | auto string = trim_whitespace == TrimWhitespace::Yes | 141 | 6.26M | ? str.trim_whitespace() | 142 | 6.26M | : str; | 143 | 6.26M | if (string.is_empty()) | 144 | 106k | return {}; | 145 | | | 146 | 6.15M | T value = 0; | 147 | 6.15M | auto const characters = string.characters_without_null_termination(); | 148 | | | 149 | 12.6M | for (size_t i = 0; i < string.length(); i++) { | 150 | 6.53M | if (characters[i] < '0' || characters[i] > '9') | 151 | 23.9k | return {}; | 152 | | | 153 | 6.50M | if (__builtin_mul_overflow(value, 10, &value)) | 154 | 4.48k | return {}; | 155 | | | 156 | 6.50M | if (__builtin_add_overflow(value, characters[i] - '0', &value)) | 157 | 376 | return {}; | 158 | 6.50M | } | 159 | 6.12M | return value; | 160 | 6.15M | } |
Unexecuted instantiation: AK::Optional<unsigned long long> AK::StringUtils::convert_to_uint<unsigned long long>(AK::StringView, AK::TrimWhitespace) |
161 | | |
162 | | template Optional<u8> convert_to_uint(StringView str, TrimWhitespace); |
163 | | template Optional<u16> convert_to_uint(StringView str, TrimWhitespace); |
164 | | template Optional<u32> convert_to_uint(StringView str, TrimWhitespace); |
165 | | template Optional<unsigned long> convert_to_uint(StringView str, TrimWhitespace); |
166 | | template Optional<unsigned long long> convert_to_uint(StringView str, TrimWhitespace); |
167 | | |
168 | | template<typename T> |
169 | | Optional<T> convert_to_uint_from_hex(StringView str, TrimWhitespace trim_whitespace) |
170 | 1.09M | { |
171 | 1.09M | auto string = trim_whitespace == TrimWhitespace::Yes |
172 | 1.09M | ? str.trim_whitespace() |
173 | 1.09M | : str; |
174 | 1.09M | if (string.is_empty()) |
175 | 2 | return {}; |
176 | | |
177 | 1.09M | T value = 0; |
178 | 1.09M | auto const count = string.length(); |
179 | 1.09M | T const upper_bound = NumericLimits<T>::max(); |
180 | | |
181 | 4.04M | for (size_t i = 0; i < count; i++) { |
182 | 3.51M | char digit = string[i]; |
183 | 3.51M | u8 digit_val; |
184 | 3.51M | if (value > (upper_bound >> 4)) |
185 | 330 | return {}; |
186 | | |
187 | 3.51M | if (digit >= '0' && digit <= '9') { |
188 | 1.36M | digit_val = digit - '0'; |
189 | 2.15M | } else if (digit >= 'a' && digit <= 'f') { |
190 | 542k | digit_val = 10 + (digit - 'a'); |
191 | 1.60M | } else if (digit >= 'A' && digit <= 'F') { |
192 | 1.03M | digit_val = 10 + (digit - 'A'); |
193 | 1.03M | } else { |
194 | 571k | return {}; |
195 | 571k | } |
196 | | |
197 | 2.94M | value = (value << 4) + digit_val; |
198 | 2.94M | } |
199 | 527k | return value; |
200 | 1.09M | } Unexecuted instantiation: AK::Optional<unsigned char> AK::StringUtils::convert_to_uint_from_hex<unsigned char>(AK::StringView, AK::TrimWhitespace) Unexecuted instantiation: AK::Optional<unsigned short> AK::StringUtils::convert_to_uint_from_hex<unsigned short>(AK::StringView, AK::TrimWhitespace) AK::Optional<unsigned int> AK::StringUtils::convert_to_uint_from_hex<unsigned int>(AK::StringView, AK::TrimWhitespace) Line | Count | Source | 170 | 1.09M | { | 171 | 1.09M | auto string = trim_whitespace == TrimWhitespace::Yes | 172 | 1.09M | ? str.trim_whitespace() | 173 | 1.09M | : str; | 174 | 1.09M | if (string.is_empty()) | 175 | 2 | return {}; | 176 | | | 177 | 1.09M | T value = 0; | 178 | 1.09M | auto const count = string.length(); | 179 | 1.09M | T const upper_bound = NumericLimits<T>::max(); | 180 | | | 181 | 4.04M | for (size_t i = 0; i < count; i++) { | 182 | 3.51M | char digit = string[i]; | 183 | 3.51M | u8 digit_val; | 184 | 3.51M | if (value > (upper_bound >> 4)) | 185 | 330 | return {}; | 186 | | | 187 | 3.51M | if (digit >= '0' && digit <= '9') { | 188 | 1.36M | digit_val = digit - '0'; | 189 | 2.15M | } else if (digit >= 'a' && digit <= 'f') { | 190 | 542k | digit_val = 10 + (digit - 'a'); | 191 | 1.60M | } else if (digit >= 'A' && digit <= 'F') { | 192 | 1.03M | digit_val = 10 + (digit - 'A'); | 193 | 1.03M | } else { | 194 | 571k | return {}; | 195 | 571k | } | 196 | | | 197 | 2.94M | value = (value << 4) + digit_val; | 198 | 2.94M | } | 199 | 527k | return value; | 200 | 1.09M | } |
Unexecuted instantiation: AK::Optional<unsigned long> AK::StringUtils::convert_to_uint_from_hex<unsigned long>(AK::StringView, AK::TrimWhitespace) |
201 | | |
202 | | template Optional<u8> convert_to_uint_from_hex(StringView str, TrimWhitespace); |
203 | | template Optional<u16> convert_to_uint_from_hex(StringView str, TrimWhitespace); |
204 | | template Optional<u32> convert_to_uint_from_hex(StringView str, TrimWhitespace); |
205 | | template Optional<u64> convert_to_uint_from_hex(StringView str, TrimWhitespace); |
206 | | |
207 | | template<typename T> |
208 | | Optional<T> convert_to_uint_from_octal(StringView str, TrimWhitespace trim_whitespace) |
209 | 75 | { |
210 | 75 | auto string = trim_whitespace == TrimWhitespace::Yes |
211 | 75 | ? str.trim_whitespace() |
212 | 75 | : str; |
213 | 75 | if (string.is_empty()) |
214 | 0 | return {}; |
215 | | |
216 | 75 | T value = 0; |
217 | 75 | auto const count = string.length(); |
218 | 75 | T const upper_bound = NumericLimits<T>::max(); |
219 | | |
220 | 762 | for (size_t i = 0; i < count; i++) { |
221 | 690 | char digit = string[i]; |
222 | 690 | u8 digit_val; |
223 | 690 | if (value > (upper_bound >> 3)) |
224 | 3 | return {}; |
225 | | |
226 | 687 | if (digit >= '0' && digit <= '7') { |
227 | 687 | digit_val = digit - '0'; |
228 | 687 | } else { |
229 | 0 | return {}; |
230 | 0 | } |
231 | | |
232 | 687 | value = (value << 3) + digit_val; |
233 | 687 | } |
234 | 72 | return value; |
235 | 75 | } Unexecuted instantiation: AK::Optional<unsigned char> AK::StringUtils::convert_to_uint_from_octal<unsigned char>(AK::StringView, AK::TrimWhitespace) Unexecuted instantiation: AK::Optional<unsigned short> AK::StringUtils::convert_to_uint_from_octal<unsigned short>(AK::StringView, AK::TrimWhitespace) AK::Optional<unsigned int> AK::StringUtils::convert_to_uint_from_octal<unsigned int>(AK::StringView, AK::TrimWhitespace) Line | Count | Source | 209 | 75 | { | 210 | 75 | auto string = trim_whitespace == TrimWhitespace::Yes | 211 | 75 | ? str.trim_whitespace() | 212 | 75 | : str; | 213 | 75 | if (string.is_empty()) | 214 | 0 | return {}; | 215 | | | 216 | 75 | T value = 0; | 217 | 75 | auto const count = string.length(); | 218 | 75 | T const upper_bound = NumericLimits<T>::max(); | 219 | | | 220 | 762 | for (size_t i = 0; i < count; i++) { | 221 | 690 | char digit = string[i]; | 222 | 690 | u8 digit_val; | 223 | 690 | if (value > (upper_bound >> 3)) | 224 | 3 | return {}; | 225 | | | 226 | 687 | if (digit >= '0' && digit <= '7') { | 227 | 687 | digit_val = digit - '0'; | 228 | 687 | } else { | 229 | 0 | return {}; | 230 | 0 | } | 231 | | | 232 | 687 | value = (value << 3) + digit_val; | 233 | 687 | } | 234 | 72 | return value; | 235 | 75 | } |
Unexecuted instantiation: AK::Optional<unsigned long> AK::StringUtils::convert_to_uint_from_octal<unsigned long>(AK::StringView, AK::TrimWhitespace) |
236 | | |
237 | | template Optional<u8> convert_to_uint_from_octal(StringView str, TrimWhitespace); |
238 | | template Optional<u16> convert_to_uint_from_octal(StringView str, TrimWhitespace); |
239 | | template Optional<u32> convert_to_uint_from_octal(StringView str, TrimWhitespace); |
240 | | template Optional<u64> convert_to_uint_from_octal(StringView str, TrimWhitespace); |
241 | | |
242 | | #ifndef KERNEL |
243 | | template<typename T> |
244 | | Optional<T> convert_to_floating_point(StringView str, TrimWhitespace trim_whitespace) |
245 | 454k | { |
246 | 454k | static_assert(IsSame<T, double> || IsSame<T, float>); |
247 | 454k | auto string = trim_whitespace == TrimWhitespace::Yes |
248 | 454k | ? str.trim_whitespace() |
249 | 454k | : str; |
250 | | |
251 | 454k | char const* start = string.characters_without_null_termination(); |
252 | 454k | return parse_floating_point_completely<T>(start, start + string.length()); |
253 | 454k | } AK::Optional<double> AK::StringUtils::convert_to_floating_point<double>(AK::StringView, AK::TrimWhitespace) Line | Count | Source | 245 | 454k | { | 246 | 454k | static_assert(IsSame<T, double> || IsSame<T, float>); | 247 | 454k | auto string = trim_whitespace == TrimWhitespace::Yes | 248 | 454k | ? str.trim_whitespace() | 249 | 454k | : str; | 250 | | | 251 | 454k | char const* start = string.characters_without_null_termination(); | 252 | 454k | return parse_floating_point_completely<T>(start, start + string.length()); | 253 | 454k | } |
Unexecuted instantiation: AK::Optional<float> AK::StringUtils::convert_to_floating_point<float>(AK::StringView, AK::TrimWhitespace) |
254 | | |
255 | | template Optional<double> convert_to_floating_point(StringView str, TrimWhitespace); |
256 | | template Optional<float> convert_to_floating_point(StringView str, TrimWhitespace); |
257 | | #endif |
258 | | |
259 | | bool equals_ignoring_ascii_case(StringView a, StringView b) |
260 | 122M | { |
261 | 122M | if (a.length() != b.length()) |
262 | 103M | return false; |
263 | 52.1M | for (size_t i = 0; i < a.length(); ++i) { |
264 | 33.4M | if (to_ascii_lowercase(a.characters_without_null_termination()[i]) != to_ascii_lowercase(b.characters_without_null_termination()[i])) |
265 | 127k | return false; |
266 | 33.4M | } |
267 | 18.7M | return true; |
268 | 18.8M | } |
269 | | |
270 | | bool ends_with(StringView str, StringView end, CaseSensitivity case_sensitivity) |
271 | 2.26M | { |
272 | 2.26M | if (end.is_empty()) |
273 | 0 | return true; |
274 | 2.26M | if (str.is_empty()) |
275 | 1.92M | return false; |
276 | 334k | if (end.length() > str.length()) |
277 | 34.5k | return false; |
278 | | |
279 | 299k | if (case_sensitivity == CaseSensitivity::CaseSensitive) |
280 | 299k | return !memcmp(str.characters_without_null_termination() + (str.length() - end.length()), end.characters_without_null_termination(), end.length()); |
281 | | |
282 | 0 | auto str_chars = str.characters_without_null_termination(); |
283 | 0 | auto end_chars = end.characters_without_null_termination(); |
284 | |
|
285 | 0 | size_t si = str.length() - end.length(); |
286 | 0 | for (size_t ei = 0; ei < end.length(); ++si, ++ei) { |
287 | 0 | if (to_ascii_lowercase(str_chars[si]) != to_ascii_lowercase(end_chars[ei])) |
288 | 0 | return false; |
289 | 0 | } |
290 | 0 | return true; |
291 | 0 | } |
292 | | |
293 | | bool starts_with(StringView str, StringView start, CaseSensitivity case_sensitivity) |
294 | 395M | { |
295 | 395M | if (start.is_empty()) |
296 | 437 | return true; |
297 | 395M | if (str.is_empty()) |
298 | 95.9M | return false; |
299 | 299M | if (start.length() > str.length()) |
300 | 29.3M | return false; |
301 | 270M | if (str.characters_without_null_termination() == start.characters_without_null_termination()) |
302 | 0 | return true; |
303 | | |
304 | 270M | if (case_sensitivity == CaseSensitivity::CaseSensitive) |
305 | 270M | return !memcmp(str.characters_without_null_termination(), start.characters_without_null_termination(), start.length()); |
306 | | |
307 | 7.61k | auto str_chars = str.characters_without_null_termination(); |
308 | 7.61k | auto start_chars = start.characters_without_null_termination(); |
309 | | |
310 | 7.61k | size_t si = 0; |
311 | 9.00k | for (size_t starti = 0; starti < start.length(); ++si, ++starti) { |
312 | 8.84k | if (to_ascii_lowercase(str_chars[si]) != to_ascii_lowercase(start_chars[starti])) |
313 | 7.45k | return false; |
314 | 8.84k | } |
315 | 159 | return true; |
316 | 7.61k | } |
317 | | |
318 | | bool contains(StringView str, StringView needle, CaseSensitivity case_sensitivity) |
319 | 21.6M | { |
320 | 21.6M | if (str.is_null() || needle.is_null() || str.is_empty() || needle.length() > str.length()) |
321 | 1.67M | return false; |
322 | 20.0M | if (needle.is_empty()) |
323 | 0 | return true; |
324 | 20.0M | auto str_chars = str.characters_without_null_termination(); |
325 | 20.0M | auto needle_chars = needle.characters_without_null_termination(); |
326 | 20.0M | if (case_sensitivity == CaseSensitivity::CaseSensitive) |
327 | 20.0M | return memmem(str_chars, str.length(), needle_chars, needle.length()) != nullptr; |
328 | | |
329 | 0 | auto needle_first = to_ascii_lowercase(needle_chars[0]); |
330 | 0 | for (size_t si = 0; si < str.length(); si++) { |
331 | 0 | if (to_ascii_lowercase(str_chars[si]) != needle_first) |
332 | 0 | continue; |
333 | 0 | for (size_t ni = 0; si + ni < str.length(); ni++) { |
334 | 0 | if (to_ascii_lowercase(str_chars[si + ni]) != to_ascii_lowercase(needle_chars[ni])) { |
335 | 0 | if (ni > 0) |
336 | 0 | si += ni - 1; |
337 | 0 | break; |
338 | 0 | } |
339 | 0 | if (ni + 1 == needle.length()) |
340 | 0 | return true; |
341 | 0 | } |
342 | 0 | } |
343 | 0 | return false; |
344 | 0 | } |
345 | | |
346 | | bool is_whitespace(StringView str) |
347 | 333M | { |
348 | 333M | return all_of(str, is_ascii_space); |
349 | 333M | } |
350 | | |
351 | | StringView trim(StringView str, StringView characters, TrimMode mode) |
352 | 29.6M | { |
353 | 29.6M | size_t substring_start = 0; |
354 | 29.6M | size_t substring_length = str.length(); |
355 | | |
356 | 29.6M | if (mode == TrimMode::Left || mode == TrimMode::Both) { |
357 | 29.9M | for (size_t i = 0; i < str.length(); ++i) { |
358 | 29.0M | if (substring_length == 0) |
359 | 0 | return ""sv; |
360 | 29.0M | if (!characters.contains(str[i])) |
361 | 28.8M | break; |
362 | 228k | ++substring_start; |
363 | 228k | --substring_length; |
364 | 228k | } |
365 | 29.6M | } |
366 | | |
367 | 29.6M | if (mode == TrimMode::Right || mode == TrimMode::Both) { |
368 | 36.6M | for (size_t i = str.length(); i > 0; --i) { |
369 | 35.8M | if (substring_length == 0) |
370 | 11.0k | return ""sv; |
371 | 35.8M | if (!characters.contains(str[i - 1])) |
372 | 28.8M | break; |
373 | 6.96M | --substring_length; |
374 | 6.96M | } |
375 | 29.6M | } |
376 | | |
377 | 29.6M | return str.substring_view(substring_start, substring_length); |
378 | 29.6M | } |
379 | | |
380 | | StringView trim_whitespace(StringView str, TrimMode mode) |
381 | 26.4M | { |
382 | 26.4M | return trim(str, " \n\t\v\f\r"sv, mode); |
383 | 26.4M | } |
384 | | |
385 | | Optional<size_t> find(StringView haystack, char needle, size_t start) |
386 | 7.68M | { |
387 | 7.68M | if (start >= haystack.length()) |
388 | 3.32k | return {}; |
389 | 2.10G | for (size_t i = start; i < haystack.length(); ++i) { |
390 | 2.10G | if (haystack[i] == needle) |
391 | 7.49M | return i; |
392 | 2.10G | } |
393 | 192k | return {}; |
394 | 7.68M | } |
395 | | |
396 | | Optional<size_t> find(StringView haystack, StringView needle, size_t start) |
397 | 55.4M | { |
398 | 55.4M | if (start > haystack.length()) |
399 | 0 | return {}; |
400 | 55.4M | auto index = AK::memmem_optional( |
401 | 55.4M | haystack.characters_without_null_termination() + start, haystack.length() - start, |
402 | 55.4M | needle.characters_without_null_termination(), needle.length()); |
403 | 55.4M | return index.has_value() ? (*index + start) : index; |
404 | 55.4M | } |
405 | | |
406 | | Optional<size_t> find_last(StringView haystack, char needle) |
407 | 0 | { |
408 | 0 | for (size_t i = haystack.length(); i > 0; --i) { |
409 | 0 | if (haystack[i - 1] == needle) |
410 | 0 | return i - 1; |
411 | 0 | } |
412 | 0 | return {}; |
413 | 0 | } |
414 | | |
415 | | Optional<size_t> find_last(StringView haystack, StringView needle) |
416 | 0 | { |
417 | 0 | if (needle.length() > haystack.length()) |
418 | 0 | return {}; |
419 | | |
420 | 0 | for (size_t i = haystack.length() - needle.length();; --i) { |
421 | 0 | if (haystack.substring_view(i, needle.length()) == needle) |
422 | 0 | return i; |
423 | | |
424 | 0 | if (i == 0) |
425 | 0 | break; |
426 | 0 | } |
427 | | |
428 | 0 | return {}; |
429 | 0 | } |
430 | | |
431 | | Optional<size_t> find_last_not(StringView haystack, char needle) |
432 | 0 | { |
433 | 0 | for (size_t i = haystack.length(); i > 0; --i) { |
434 | 0 | if (haystack[i - 1] != needle) |
435 | 0 | return i - 1; |
436 | 0 | } |
437 | 0 | return {}; |
438 | 0 | } |
439 | | |
440 | | Vector<size_t> find_all(StringView haystack, StringView needle) |
441 | 0 | { |
442 | 0 | Vector<size_t> positions; |
443 | 0 | size_t current_position = 0; |
444 | 0 | while (current_position <= haystack.length()) { |
445 | 0 | auto maybe_position = AK::memmem_optional( |
446 | 0 | haystack.characters_without_null_termination() + current_position, haystack.length() - current_position, |
447 | 0 | needle.characters_without_null_termination(), needle.length()); |
448 | 0 | if (!maybe_position.has_value()) |
449 | 0 | break; |
450 | 0 | positions.append(current_position + *maybe_position); |
451 | 0 | current_position += *maybe_position + 1; |
452 | 0 | } |
453 | 0 | return positions; |
454 | 0 | } |
455 | | |
456 | | Optional<size_t> find_any_of(StringView haystack, StringView needles, SearchDirection direction) |
457 | 11.4k | { |
458 | 11.4k | if (haystack.is_empty() || needles.is_empty()) |
459 | 8.29k | return {}; |
460 | 3.15k | if (direction == SearchDirection::Forward) { |
461 | 3.23M | for (size_t i = 0; i < haystack.length(); ++i) { |
462 | 3.23M | if (needles.contains(haystack[i])) |
463 | 2.29k | return i; |
464 | 3.23M | } |
465 | 3.15k | } else if (direction == SearchDirection::Backward) { |
466 | 0 | for (size_t i = haystack.length(); i > 0; --i) { |
467 | 0 | if (needles.contains(haystack[i - 1])) |
468 | 0 | return i - 1; |
469 | 0 | } |
470 | 0 | } |
471 | 860 | return {}; |
472 | 3.15k | } |
473 | | |
474 | | #ifndef KERNEL |
475 | | ByteString to_snakecase(StringView str) |
476 | 0 | { |
477 | 0 | auto should_insert_underscore = [&](auto i, auto current_char) { |
478 | 0 | if (i == 0) |
479 | 0 | return false; |
480 | 0 | auto previous_ch = str[i - 1]; |
481 | 0 | if (is_ascii_lower_alpha(previous_ch) && is_ascii_upper_alpha(current_char)) |
482 | 0 | return true; |
483 | 0 | if (i >= str.length() - 1) |
484 | 0 | return false; |
485 | 0 | auto next_ch = str[i + 1]; |
486 | 0 | if (is_ascii_upper_alpha(current_char) && is_ascii_lower_alpha(next_ch)) |
487 | 0 | return true; |
488 | 0 | return false; |
489 | 0 | }; |
490 | |
|
491 | 0 | StringBuilder builder; |
492 | 0 | for (size_t i = 0; i < str.length(); ++i) { |
493 | 0 | auto ch = str[i]; |
494 | 0 | if (should_insert_underscore(i, ch)) |
495 | 0 | builder.append('_'); |
496 | 0 | builder.append_as_lowercase(ch); |
497 | 0 | } |
498 | 0 | return builder.to_byte_string(); |
499 | 0 | } |
500 | | |
501 | | ByteString to_titlecase(StringView str) |
502 | 0 | { |
503 | 0 | StringBuilder builder; |
504 | 0 | bool next_is_upper = true; |
505 | |
|
506 | 0 | for (auto ch : str) { |
507 | 0 | if (next_is_upper) |
508 | 0 | builder.append(to_ascii_uppercase(ch)); |
509 | 0 | else |
510 | 0 | builder.append(to_ascii_lowercase(ch)); |
511 | 0 | next_is_upper = ch == ' '; |
512 | 0 | } |
513 | |
|
514 | 0 | return builder.to_byte_string(); |
515 | 0 | } |
516 | | |
517 | | ByteString invert_case(StringView str) |
518 | 0 | { |
519 | 0 | StringBuilder builder(str.length()); |
520 | |
|
521 | 0 | for (auto ch : str) { |
522 | 0 | if (is_ascii_lower_alpha(ch)) |
523 | 0 | builder.append(to_ascii_uppercase(ch)); |
524 | 0 | else |
525 | 0 | builder.append(to_ascii_lowercase(ch)); |
526 | 0 | } |
527 | |
|
528 | 0 | return builder.to_byte_string(); |
529 | 0 | } |
530 | | |
531 | | // Finishes the replacing algorithm once it is known that ita least one |
532 | | // replacemnet is going to be done. Otherwise the caller may want to follow a |
533 | | // different route to construct its output. |
534 | | static StringBuilder replace_into_builder(StringView str, StringView needle, StringView replacement, ReplaceMode replace_mode, size_t first_replacement_position) |
535 | 522 | { |
536 | 522 | StringBuilder replaced_string; |
537 | | |
538 | 522 | replaced_string.append(str.substring_view(0, first_replacement_position)); |
539 | 522 | replaced_string.append(replacement); |
540 | | |
541 | 522 | StringView remaining = str.substring_view(first_replacement_position + needle.length()); |
542 | | |
543 | 522 | switch (replace_mode) { |
544 | 522 | case ReplaceMode::All: |
545 | 1.93M | while (!remaining.is_empty()) { |
546 | 1.93M | auto maybe_pos = remaining.find(needle); |
547 | 1.93M | if (!maybe_pos.has_value()) |
548 | 522 | break; |
549 | 1.93M | replaced_string.append(remaining.substring_view(0, *maybe_pos)); |
550 | 1.93M | replaced_string.append(replacement); |
551 | 1.93M | remaining = remaining.substring_view(*maybe_pos + needle.length()); |
552 | 1.93M | } |
553 | 522 | break; |
554 | 0 | case ReplaceMode::FirstOnly: |
555 | | // We already made the first replacement. |
556 | 0 | break; |
557 | 522 | } |
558 | | |
559 | | // The remaining bits either don't contain the needle or are ignored due to |
560 | | // `replace_mode` being `ReplaceMode::FirstOnly`. |
561 | 522 | replaced_string.append(remaining); |
562 | | |
563 | 522 | return replaced_string; |
564 | 522 | } |
565 | | |
566 | | ByteString replace(StringView str, StringView needle, StringView replacement, |
567 | | ReplaceMode replace_mode) |
568 | 1.21k | { |
569 | 1.21k | if (str.is_empty()) |
570 | 0 | return str; |
571 | | |
572 | 1.21k | auto maybe_first = str.find(needle); |
573 | 1.21k | if (!maybe_first.has_value()) |
574 | 695 | return str; |
575 | | |
576 | 522 | auto resulting_builder = replace_into_builder(str, needle, replacement, replace_mode, *maybe_first); |
577 | 522 | return resulting_builder.to_byte_string(); |
578 | 1.21k | } |
579 | | |
580 | | ErrorOr<String> replace(String const& haystack, StringView needle, StringView replacement, ReplaceMode replace_mode) |
581 | 0 | { |
582 | 0 | if (haystack.is_empty()) |
583 | 0 | return haystack; |
584 | | |
585 | 0 | auto const source_bytes = haystack.bytes_as_string_view(); |
586 | |
|
587 | 0 | auto maybe_first = source_bytes.find(needle); |
588 | 0 | if (!maybe_first.has_value()) |
589 | 0 | return haystack; |
590 | | |
591 | 0 | auto resulting_builder = replace_into_builder(source_bytes, needle, replacement, replace_mode, *maybe_first); |
592 | 0 | return resulting_builder.to_string(); |
593 | 0 | } |
594 | | #endif |
595 | | |
596 | | // TODO: Benchmark against KMP (AK/MemMem.h) and switch over if it's faster for short strings too |
597 | | size_t count(StringView str, StringView needle) |
598 | 0 | { |
599 | 0 | if (needle.is_empty()) |
600 | 0 | return str.length(); |
601 | | |
602 | 0 | size_t count = 0; |
603 | 0 | for (size_t i = 0; i < str.length() - needle.length() + 1; ++i) { |
604 | 0 | if (str.substring_view(i).starts_with(needle)) |
605 | 0 | count++; |
606 | 0 | } |
607 | 0 | return count; |
608 | 0 | } |
609 | | |
610 | | size_t count(StringView str, char needle) |
611 | 0 | { |
612 | 0 | size_t count = 0; |
613 | 0 | for (size_t i = 0; i < str.length(); ++i) { |
614 | 0 | if (str[i] == needle) |
615 | 0 | count++; |
616 | 0 | } |
617 | 0 | return count; |
618 | 0 | } |
619 | | |
620 | | } |
621 | | |
622 | | } |