/src/simdutf/fuzz/roundtrip.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | #include <cstring> |
2 | | #include <fuzzer/FuzzedDataProvider.h> |
3 | | #include <memory> |
4 | | #include <string> |
5 | | #include <iostream> |
6 | | |
7 | | #include "simdutf.h" |
8 | | |
9 | | // useful for debugging |
10 | | static void print_input(const std::string& s, |
11 | 0 | const simdutf::implementation* const e) { |
12 | 0 | printf("We are about to abort on the following input: "); |
13 | 0 | for (auto c : s) { |
14 | 0 | printf("%02x ", (unsigned char)c); |
15 | 0 | } |
16 | 0 | printf("\n"); |
17 | 0 | std::cout << "string length : " << s.size() << " bytes" << std::endl; |
18 | 0 | std::cout << "implementation->name() = " << e->name() << std::endl; |
19 | 0 | } |
20 | | |
21 | | /** |
22 | | * We do round trips from UTF-8 to UTF-16, from UTF-8 to UTF-32, from UTF-16 to |
23 | | * UTF-8. |
24 | | * We do round trips from Latin 1 to UTF-8, from Latin 1 to UTF-16, from Latin 1 |
25 | | * to UTF-32. We test all available kernels. We also try to transcode invalid |
26 | | * inputs. |
27 | | */ |
28 | 5.03k | extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { |
29 | 5.03k | FuzzedDataProvider fdp(data, size); |
30 | 5.03k | constexpr int kMaxStringSize = 1024; |
31 | 5.03k | std::string source = fdp.ConsumeRandomLengthString(kMaxStringSize); |
32 | 20.1k | for (auto& e : simdutf::get_available_implementations()) { |
33 | 20.1k | if (!e->supported_by_runtime_system()) { |
34 | 5.03k | continue; |
35 | 5.03k | } |
36 | | /** |
37 | | * Transcoding from UTF-8 to UTF-16LE. |
38 | | */ |
39 | 15.0k | bool validutf8 = e->validate_utf8(source.c_str(), source.size()); |
40 | 15.0k | auto rutf8 = e->validate_utf8_with_errors(source.c_str(), source.size()); |
41 | 15.0k | if (validutf8 != (rutf8.error == simdutf::SUCCESS)) { // they should agree |
42 | 0 | print_input(source, e); |
43 | 0 | abort(); |
44 | 0 | } |
45 | 15.0k | if (validutf8) { |
46 | | // We need a buffer of size where to write the UTF-16LE words. |
47 | 13.8k | size_t expected_utf16words = |
48 | 13.8k | e->utf16_length_from_utf8(source.c_str(), source.size()); |
49 | 13.8k | std::unique_ptr<char16_t[]> utf16_output{ |
50 | 13.8k | new char16_t[expected_utf16words]}; |
51 | | // convert to UTF-16LE |
52 | 13.8k | size_t utf16words = e->convert_utf8_to_utf16le( |
53 | 13.8k | source.c_str(), source.size(), utf16_output.get()); |
54 | | // It wrote utf16words * sizeof(char16_t) bytes. |
55 | 13.8k | bool validutf16 = e->validate_utf16le(utf16_output.get(), utf16words); |
56 | 13.8k | if (!validutf16) { |
57 | 0 | print_input(source, e); |
58 | 0 | abort(); |
59 | 0 | } |
60 | | // convert it back: |
61 | | // We need a buffer of size where to write the UTF-8 words. |
62 | 13.8k | size_t expected_utf8words = |
63 | 13.8k | e->utf8_length_from_utf16le(utf16_output.get(), utf16words); |
64 | 13.8k | std::unique_ptr<char[]> utf8_output{new char[expected_utf8words]}; |
65 | | // convert to UTF-8 |
66 | 13.8k | size_t utf8words = e->convert_utf16le_to_utf8( |
67 | 13.8k | utf16_output.get(), utf16words, utf8_output.get()); |
68 | 13.8k | std::string final_string(utf8_output.get(), utf8words); |
69 | 13.8k | if (final_string != source) { |
70 | 0 | print_input(source, e); |
71 | 0 | abort(); |
72 | 0 | } |
73 | 13.8k | } else { |
74 | | // invalid input!!! |
75 | | // We need a buffer of size where to write the UTF-16LE words. |
76 | 1.24k | size_t expected_utf16words = |
77 | 1.24k | e->utf16_length_from_utf8(source.c_str(), source.size()); |
78 | 1.24k | std::unique_ptr<char16_t[]> utf16_output{ |
79 | 1.24k | new char16_t[expected_utf16words]}; |
80 | | // convert to UTF-16LE |
81 | 1.24k | size_t utf16words = e->convert_utf8_to_utf16le( |
82 | 1.24k | source.c_str(), source.size(), utf16_output.get()); |
83 | 1.24k | if (utf16words != 0) { |
84 | 0 | print_input(source, e); |
85 | 0 | abort(); |
86 | 0 | } |
87 | 1.24k | } |
88 | | |
89 | | /** |
90 | | * Transcoding from UTF-8 to UTF-16BE. |
91 | | */ |
92 | 15.0k | if (validutf8) { |
93 | | // We need a buffer of size where to write the UTF-16BE words. |
94 | 13.8k | size_t expected_utf16words = |
95 | 13.8k | e->utf16_length_from_utf8(source.c_str(), source.size()); |
96 | 13.8k | std::unique_ptr<char16_t[]> utf16_output{ |
97 | 13.8k | new char16_t[expected_utf16words]}; |
98 | | // convert to UTF-16BE |
99 | 13.8k | size_t utf16words = e->convert_utf8_to_utf16be( |
100 | 13.8k | source.c_str(), source.size(), utf16_output.get()); |
101 | | // It wrote utf16words * sizeof(char16_t) bytes. |
102 | 13.8k | bool validutf16 = e->validate_utf16be(utf16_output.get(), utf16words); |
103 | 13.8k | if (!validutf16) { |
104 | 0 | print_input(source, e); |
105 | 0 | abort(); |
106 | 0 | } |
107 | | // convert it back: |
108 | | // We need a buffer of size where to write the UTF-8 words. |
109 | 13.8k | size_t expected_utf8words = |
110 | 13.8k | e->utf8_length_from_utf16be(utf16_output.get(), utf16words); |
111 | 13.8k | std::unique_ptr<char[]> utf8_output{new char[expected_utf8words]}; |
112 | | // convert to UTF-8 |
113 | 13.8k | size_t utf8words = e->convert_utf16be_to_utf8( |
114 | 13.8k | utf16_output.get(), utf16words, utf8_output.get()); |
115 | 13.8k | std::string final_string(utf8_output.get(), utf8words); |
116 | 13.8k | if (final_string != source) { |
117 | 0 | print_input(source, e); |
118 | 0 | abort(); |
119 | 0 | } |
120 | 13.8k | } else { |
121 | | // invalid input!!! |
122 | | // We need a buffer of size where to write the UTF-16BE words. |
123 | 1.24k | size_t expected_utf16words = |
124 | 1.24k | e->utf16_length_from_utf8(source.c_str(), source.size()); |
125 | 1.24k | std::unique_ptr<char16_t[]> utf16_output{ |
126 | 1.24k | new char16_t[expected_utf16words]}; |
127 | | // convert to UTF-16BE |
128 | 1.24k | size_t utf16words = e->convert_utf8_to_utf16be( |
129 | 1.24k | source.c_str(), source.size(), utf16_output.get()); |
130 | 1.24k | if (utf16words != 0) { |
131 | 0 | print_input(source, e); |
132 | 0 | abort(); |
133 | 0 | } |
134 | 1.24k | } |
135 | | /** |
136 | | * Transcoding from UTF-8 to UTF-32. |
137 | | */ |
138 | 15.0k | if (validutf8) { |
139 | | // We need a buffer of size where to write the UTF-32 words. |
140 | 13.8k | size_t expected_utf32words = |
141 | 13.8k | e->utf32_length_from_utf8(source.c_str(), source.size()); |
142 | 13.8k | std::unique_ptr<char32_t[]> utf32_output{ |
143 | 13.8k | new char32_t[expected_utf32words]}; |
144 | | // convert to UTF-32 |
145 | 13.8k | size_t utf32words = e->convert_utf8_to_utf32( |
146 | 13.8k | source.c_str(), source.size(), utf32_output.get()); |
147 | | // It wrote utf32words * sizeof(char32_t) bytes. |
148 | 13.8k | bool validutf32 = e->validate_utf32(utf32_output.get(), utf32words); |
149 | 13.8k | if (!validutf32) { |
150 | 0 | return -1; |
151 | 0 | } |
152 | | // convert it back: |
153 | | // We need a buffer of size where to write the UTF-8 words. |
154 | 13.8k | size_t expected_utf8words = |
155 | 13.8k | e->utf8_length_from_utf32(utf32_output.get(), utf32words); |
156 | 13.8k | std::unique_ptr<char[]> utf8_output{new char[expected_utf8words]}; |
157 | | // convert to UTF-8 |
158 | 13.8k | size_t utf8words = e->convert_utf32_to_utf8( |
159 | 13.8k | utf32_output.get(), utf32words, utf8_output.get()); |
160 | 13.8k | std::string final_string(utf8_output.get(), utf8words); |
161 | 13.8k | if (source != final_string) { |
162 | 0 | print_input(source, e); |
163 | 0 | abort(); |
164 | 0 | } |
165 | 13.8k | } else { |
166 | | // invalid input!!! |
167 | 1.24k | size_t expected_utf32words = |
168 | 1.24k | e->utf32_length_from_utf8(source.c_str(), source.size()); |
169 | 1.24k | std::unique_ptr<char32_t[]> utf32_output{ |
170 | 1.24k | new char32_t[expected_utf32words]}; |
171 | | // convert to UTF-32 |
172 | 1.24k | size_t utf32words = e->convert_utf8_to_utf32( |
173 | 1.24k | source.c_str(), source.size(), utf32_output.get()); |
174 | 1.24k | if (utf32words != 0) { |
175 | 0 | print_input(source, e); |
176 | 0 | abort(); |
177 | 0 | } |
178 | 1.24k | } |
179 | | |
180 | | /** |
181 | | * Transcoding from UTF-8 to Latin 1 |
182 | | */ |
183 | 15.0k | if (validutf8) { |
184 | | // We need a buffer of size where to write the UTF-16LE words. |
185 | 13.8k | size_t expected_latin1words = |
186 | 13.8k | e->latin1_length_from_utf8(source.c_str(), source.size()); |
187 | 13.8k | std::unique_ptr<char[]> latin1_output{new char[expected_latin1words]}; |
188 | | // convert to latin1 |
189 | 13.8k | size_t latin1words = e->convert_utf8_to_latin1( |
190 | 13.8k | source.c_str(), source.size(), latin1_output.get()); |
191 | 13.8k | if (latin1words != 0) { |
192 | | // convert it back: |
193 | | // We need a buffer of size where to write the UTF-8 words. |
194 | 676 | size_t expected_utf8words = |
195 | 676 | e->utf8_length_from_latin1(latin1_output.get(), latin1words); |
196 | 676 | std::unique_ptr<char[]> utf8_output{new char[expected_utf8words]}; |
197 | | // convert to UTF-8 |
198 | 676 | size_t utf8words = e->convert_latin1_to_utf8( |
199 | 676 | latin1_output.get(), latin1words, utf8_output.get()); |
200 | 676 | std::string final_string(utf8_output.get(), utf8words); |
201 | 676 | if (final_string != source) { |
202 | 0 | print_input(source, e); |
203 | 0 | abort(); |
204 | 0 | } |
205 | 676 | } |
206 | 13.8k | } else { |
207 | | // invalid input!!! |
208 | | // We need a buffer of size where to write the Latin 1 words. |
209 | 1.24k | size_t expected_latin1words = |
210 | 1.24k | e->latin1_length_from_utf8(source.c_str(), source.size()); |
211 | 1.24k | std::unique_ptr<char[]> latin1_output{new char[expected_latin1words]}; |
212 | | // convert to Latin 1 |
213 | 1.24k | size_t latin1words = e->convert_utf8_to_latin1( |
214 | 1.24k | source.c_str(), source.size(), latin1_output.get()); |
215 | 1.24k | if (latin1words != 0) { |
216 | 0 | print_input(source, e); |
217 | 0 | abort(); |
218 | 0 | } |
219 | 1.24k | } |
220 | | /** |
221 | | * Transcoding from UTF-16LE to UTF-8. |
222 | | */ |
223 | 15.0k | { |
224 | | // Get new source data here as this will allow the fuzzer to optimize it's |
225 | | // input for UTF16-LE. |
226 | 15.0k | source = fdp.ConsumeRandomLengthString(kMaxStringSize); |
227 | | // We copy to avoid alignment issues. |
228 | 15.0k | std::unique_ptr<char16_t[]> utf16_source{new char16_t[source.size() / 2]}; |
229 | 15.0k | if (source.data() != nullptr) { |
230 | 15.0k | std::memcpy(utf16_source.get(), source.data(), source.size() / 2 * 2); |
231 | 15.0k | } |
232 | 15.0k | bool validutf16le = |
233 | 15.0k | e->validate_utf16le(utf16_source.get(), source.size() / 2); |
234 | 15.0k | auto rutf16le = e->validate_utf16le_with_errors(utf16_source.get(), |
235 | 15.0k | source.size() / 2); |
236 | 15.0k | if (validutf16le != |
237 | 15.0k | (rutf16le.error == simdutf::SUCCESS)) { // they should agree |
238 | 0 | print_input(source, e); |
239 | 0 | abort(); |
240 | 0 | } |
241 | 15.0k | if (validutf16le) { |
242 | | // We need a buffer of size where to write the UTF-16 words. |
243 | 14.8k | size_t expected_utf8words = |
244 | 14.8k | e->utf8_length_from_utf16le(utf16_source.get(), source.size() / 2); |
245 | 14.8k | std::unique_ptr<char[]> utf8_output{new char[expected_utf8words]}; |
246 | 14.8k | size_t utf8words = e->convert_utf16le_to_utf8( |
247 | 14.8k | utf16_source.get(), source.size() / 2, utf8_output.get()); |
248 | | // It wrote utf16words * sizeof(char16_t) bytes. |
249 | 14.8k | bool validutf8 = e->validate_utf8(utf8_output.get(), utf8words); |
250 | 14.8k | if (!validutf8) { |
251 | 0 | print_input(source, e); |
252 | 0 | abort(); |
253 | 0 | } |
254 | | // convert it back: |
255 | | // We need a buffer of size where to write the UTF-16 words. |
256 | 14.8k | size_t expected_utf16words = |
257 | 14.8k | e->utf16_length_from_utf8(utf8_output.get(), utf8words); |
258 | 14.8k | std::unique_ptr<char16_t[]> utf16_output{ |
259 | 14.8k | new char16_t[expected_utf16words]}; |
260 | | // convert to UTF-8 |
261 | 14.8k | size_t utf16words = e->convert_utf8_to_utf16le( |
262 | 14.8k | utf8_output.get(), utf8words, utf16_output.get()); |
263 | 73.5k | for (size_t i = 0; i < source.size() / 2; i++) { |
264 | 58.6k | if (utf16_output.get()[i] != (utf16_source.get())[i]) { |
265 | 0 | print_input(source, e); |
266 | 0 | abort(); |
267 | 0 | } |
268 | 58.6k | } |
269 | 14.8k | } else { |
270 | | // invalid input!!! |
271 | | // We need a buffer of size where to write the UTF-16 words. |
272 | 191 | size_t expected_utf8words = |
273 | 191 | e->utf8_length_from_utf16le(utf16_source.get(), source.size() / 2); |
274 | 191 | std::unique_ptr<char[]> utf8_output{new char[expected_utf8words]}; |
275 | 191 | size_t utf8words = e->convert_utf16le_to_utf8( |
276 | 191 | utf16_source.get(), source.size() / 2, utf8_output.get()); |
277 | 191 | if (utf8words != 0) { |
278 | 0 | print_input(source, e); |
279 | 0 | abort(); |
280 | 0 | } |
281 | 191 | } |
282 | 15.0k | } |
283 | | |
284 | | /** |
285 | | * Transcoding from UTF-16BE to UTF-8. |
286 | | */ |
287 | 15.0k | { |
288 | | // Get new source data here as this will allow the fuzzer to optimize it's |
289 | | // input for UTF16-BE. |
290 | 15.0k | source = fdp.ConsumeRandomLengthString(kMaxStringSize); |
291 | 15.0k | std::unique_ptr<char16_t[]> utf16_source{new char16_t[source.size() / 2]}; |
292 | 15.0k | if (source.data() != nullptr) { |
293 | 15.0k | std::memcpy(utf16_source.get(), source.data(), source.size() / 2 * 2); |
294 | 15.0k | } |
295 | 15.0k | bool validutf16be = |
296 | 15.0k | e->validate_utf16be(utf16_source.get(), source.size() / 2); |
297 | 15.0k | auto rutf16be = e->validate_utf16be_with_errors(utf16_source.get(), |
298 | 15.0k | source.size() / 2); |
299 | 15.0k | if (validutf16be != |
300 | 15.0k | (rutf16be.error == simdutf::SUCCESS)) { // they should agree |
301 | 0 | print_input(source, e); |
302 | 0 | abort(); |
303 | 0 | } |
304 | 15.0k | if (validutf16be) { |
305 | | // We need a buffer of size where to write the UTF-16 words. |
306 | 14.8k | size_t expected_utf8words = |
307 | 14.8k | e->utf8_length_from_utf16be(utf16_source.get(), source.size() / 2); |
308 | 14.8k | std::unique_ptr<char[]> utf8_output{new char[expected_utf8words]}; |
309 | 14.8k | size_t utf8words = e->convert_utf16be_to_utf8( |
310 | 14.8k | utf16_source.get(), source.size() / 2, utf8_output.get()); |
311 | | // It wrote utf16words * sizeof(char16_t) bytes. |
312 | 14.8k | bool validutf8 = e->validate_utf8(utf8_output.get(), utf8words); |
313 | 14.8k | if (!validutf8) { |
314 | 0 | print_input(source, e); |
315 | 0 | abort(); |
316 | 0 | } |
317 | | // convert it back: |
318 | | // We need a buffer of size where to write the UTF-16 words. |
319 | 14.8k | size_t expected_utf16words = |
320 | 14.8k | e->utf16_length_from_utf8(utf8_output.get(), utf8words); |
321 | 14.8k | std::unique_ptr<char16_t[]> utf16_output{ |
322 | 14.8k | new char16_t[expected_utf16words]}; |
323 | | // convert to UTF-8 |
324 | 14.8k | size_t utf16words = e->convert_utf8_to_utf16be( |
325 | 14.8k | utf8_output.get(), utf8words, utf16_output.get()); |
326 | 72.4k | for (size_t i = 0; i < source.size() / 2; i++) { |
327 | 57.5k | if (utf16_output.get()[i] != (utf16_source.get())[i]) { |
328 | 0 | print_input(source, e); |
329 | 0 | abort(); |
330 | 0 | } |
331 | 57.5k | } |
332 | 14.8k | } else { |
333 | | // invalid input!!! |
334 | | // We need a buffer of size where to write the UTF-16 words. |
335 | 194 | size_t expected_utf8words = |
336 | 194 | e->utf8_length_from_utf16be(utf16_source.get(), source.size() / 2); |
337 | 194 | std::unique_ptr<char[]> utf8_output{new char[expected_utf8words]}; |
338 | 194 | size_t utf8words = e->convert_utf16be_to_utf8( |
339 | 194 | utf16_source.get(), source.size() / 2, utf8_output.get()); |
340 | 194 | if (utf8words != 0) { |
341 | 0 | print_input(source, e); |
342 | 0 | abort(); |
343 | 0 | } |
344 | 194 | } |
345 | 15.0k | } |
346 | | |
347 | | /** |
348 | | * Transcoding from latin1 to UTF-8. |
349 | | */ |
350 | | // Get new source data here as this will allow the fuzzer to optimize it's |
351 | | // input for latin1. |
352 | 15.0k | source = fdp.ConsumeRandomLengthString(kMaxStringSize); |
353 | 15.0k | bool validlatin1 = true; // has to be |
354 | 15.0k | if (validlatin1) { |
355 | | // We need a buffer of size where to write the UTF-8 words. |
356 | 15.0k | size_t expected_utf8words = |
357 | 15.0k | e->utf8_length_from_latin1(source.c_str(), source.size()); |
358 | 15.0k | std::unique_ptr<char[]> utf8_output{new char[expected_utf8words]}; |
359 | 15.0k | size_t utf8words = e->convert_latin1_to_utf8( |
360 | 15.0k | source.c_str(), source.size(), utf8_output.get()); |
361 | | // It wrote utf8words * sizeof(char) bytes. |
362 | 15.0k | bool validutf8 = e->validate_utf8(utf8_output.get(), utf8words); |
363 | 15.0k | if (!validutf8) { |
364 | 0 | print_input(source, e); |
365 | 0 | abort(); |
366 | 0 | } |
367 | | // convert it back: |
368 | | // We need a buffer of size where to write the latin1 words. |
369 | 15.0k | size_t expected_latin1words = |
370 | 15.0k | e->latin1_length_from_utf8(utf8_output.get(), utf8words); |
371 | 15.0k | std::unique_ptr<char[]> latin1_output{new char[expected_latin1words]}; |
372 | | // convert to latin1 |
373 | 15.0k | size_t latin1words = e->convert_utf8_to_latin1( |
374 | 15.0k | utf8_output.get(), utf8words, latin1_output.get()); |
375 | 317k | for (size_t i = 0; i < source.size(); i++) { |
376 | 302k | if (latin1_output.get()[i] != (source.c_str())[i]) { |
377 | 0 | print_input(source, e); |
378 | 0 | abort(); |
379 | 0 | } |
380 | 302k | } |
381 | 15.0k | } |
382 | 15.0k | if (validlatin1) { |
383 | | // We need a buffer of size where to write the UTF-16 words. |
384 | 15.0k | size_t expected_utf16words = e->utf16_length_from_latin1(source.size()); |
385 | 15.0k | std::unique_ptr<char16_t[]> utf16_output{ |
386 | 15.0k | new char16_t[expected_utf16words]}; |
387 | 15.0k | size_t utf16words = e->convert_latin1_to_utf16le( |
388 | 15.0k | source.c_str(), source.size(), utf16_output.get()); |
389 | | // It wrote utf16words * sizeof(char16_t) bytes. |
390 | 15.0k | bool validutf16 = e->validate_utf16le(utf16_output.get(), utf16words); |
391 | 15.0k | if (!validutf16) { |
392 | 0 | print_input(source, e); |
393 | 0 | abort(); |
394 | 0 | } |
395 | | // convert it back: |
396 | | // We need a buffer of size where to write the latin1 words. |
397 | 15.0k | size_t expected_latin1words = e->latin1_length_from_utf16(utf16words); |
398 | 15.0k | std::unique_ptr<char[]> latin1_output{new char[expected_latin1words]}; |
399 | | // convert to latin1 |
400 | 15.0k | size_t latin1words = e->convert_utf16le_to_latin1( |
401 | 15.0k | utf16_output.get(), utf16words, latin1_output.get()); |
402 | 317k | for (size_t i = 0; i < source.size(); i++) { |
403 | 302k | if (latin1_output.get()[i] != (source.c_str())[i]) { |
404 | 0 | print_input(source, e); |
405 | 0 | abort(); |
406 | 0 | } |
407 | 302k | } |
408 | 15.0k | } |
409 | 15.0k | if (validlatin1) { |
410 | | // We need a buffer of size where to write the UTF-16 words. |
411 | 15.0k | size_t expected_utf16words = e->utf16_length_from_latin1(source.size()); |
412 | 15.0k | std::unique_ptr<char16_t[]> utf16_output{ |
413 | 15.0k | new char16_t[expected_utf16words]}; |
414 | 15.0k | size_t utf16words = e->convert_latin1_to_utf16be( |
415 | 15.0k | source.c_str(), source.size(), utf16_output.get()); |
416 | | // It wrote utf16words * sizeof(char16_t) bytes. |
417 | 15.0k | bool validutf16 = e->validate_utf16be(utf16_output.get(), utf16words); |
418 | 15.0k | if (!validutf16) { |
419 | 0 | print_input(source, e); |
420 | 0 | abort(); |
421 | 0 | } |
422 | | // convert it back: |
423 | | // We need a buffer of size where to write the latin1 words. |
424 | 15.0k | size_t expected_latin1words = e->latin1_length_from_utf16(utf16words); |
425 | 15.0k | std::unique_ptr<char[]> latin1_output{new char[expected_latin1words]}; |
426 | | // convert to latin1 |
427 | 15.0k | size_t latin1words = e->convert_utf16be_to_latin1( |
428 | 15.0k | utf16_output.get(), utf16words, latin1_output.get()); |
429 | 317k | for (size_t i = 0; i < source.size(); i++) { |
430 | 302k | if (latin1_output.get()[i] != (source.c_str())[i]) { |
431 | 0 | print_input(source, e); |
432 | 0 | abort(); |
433 | 0 | } |
434 | 302k | } |
435 | 15.0k | } |
436 | | |
437 | 15.0k | if (validlatin1) { |
438 | | // We need a buffer of size where to write the UTF-16 words. |
439 | 15.0k | size_t expected_utf32words = e->utf32_length_from_latin1(source.size()); |
440 | 15.0k | std::unique_ptr<char32_t[]> utf32_output{ |
441 | 15.0k | new char32_t[expected_utf32words]}; |
442 | 15.0k | size_t utf32words = e->convert_latin1_to_utf32( |
443 | 15.0k | source.c_str(), source.size(), utf32_output.get()); |
444 | | // It wrote utf16words * sizeof(char16_t) bytes. |
445 | 15.0k | bool validutf32 = e->validate_utf32(utf32_output.get(), utf32words); |
446 | 15.0k | if (!validutf32) { |
447 | 0 | print_input(source, e); |
448 | 0 | abort(); |
449 | 0 | } |
450 | | // convert it back: |
451 | | // We need a buffer of size where to write the latin1 words. |
452 | 15.0k | size_t expected_latin1words = e->latin1_length_from_utf32(utf32words); |
453 | 15.0k | std::unique_ptr<char[]> latin1_output{new char[expected_latin1words]}; |
454 | | // convert to latin1 |
455 | 15.0k | size_t latin1words = e->convert_utf32_to_latin1( |
456 | 15.0k | utf32_output.get(), utf32words, latin1_output.get()); |
457 | 317k | for (size_t i = 0; i < source.size(); i++) { |
458 | 302k | if (latin1_output.get()[i] != (source.c_str())[i]) { |
459 | 0 | print_input(source, e); |
460 | 0 | abort(); |
461 | 0 | } |
462 | 302k | } |
463 | 15.0k | } |
464 | | |
465 | | /// Base64 tests. We begin by trying to decode the input, even if we |
466 | | /// expect it to fail. |
467 | 15.0k | { |
468 | 15.0k | size_t max_length_needed = |
469 | 15.0k | e->maximal_binary_length_from_base64(source.data(), source.size()); |
470 | 15.0k | std::vector<char> back(max_length_needed); |
471 | 15.0k | simdutf::result r = |
472 | 15.0k | e->base64_to_binary(source.data(), source.size(), back.data()); |
473 | 15.0k | if (r.error == simdutf::error_code::SUCCESS) { |
474 | | // We expect failure but if we succeed, then we should have a roundtrip. |
475 | 13.8k | back.resize(r.count); |
476 | 13.8k | std::vector<char> back2(e->base64_length_from_binary(back.size())); |
477 | 13.8k | size_t base64size = |
478 | 13.8k | e->binary_to_base64(back.data(), back.size(), back2.data()); |
479 | 13.8k | back2.resize(base64size); |
480 | 13.8k | std::vector<char> back3( |
481 | 13.8k | e->maximal_binary_length_from_base64(back2.data(), back2.size())); |
482 | 13.8k | simdutf::result r2 = |
483 | 13.8k | e->base64_to_binary(back2.data(), back2.size(), back3.data()); |
484 | 13.8k | if (r2.error != simdutf::error_code::SUCCESS) { |
485 | 0 | print_input(source, e); |
486 | 0 | return false; |
487 | 0 | } |
488 | 13.8k | if (r2.count != back.size()) { |
489 | 0 | print_input(source, e); |
490 | 0 | return false; |
491 | 0 | } |
492 | 13.8k | if (back3.size() != back.size()) { |
493 | 0 | print_input(source, e); |
494 | 0 | return false; |
495 | 0 | } |
496 | 13.8k | } |
497 | 15.0k | } |
498 | | |
499 | | // Same as above, but we use the safe decoder version. |
500 | 15.0k | { |
501 | 15.0k | size_t max_length_needed = |
502 | 15.0k | e->maximal_binary_length_from_base64(source.data(), source.size()); |
503 | 15.0k | std::vector<char> back(max_length_needed); |
504 | 15.0k | simdutf::result r = simdutf::base64_to_binary_safe( |
505 | 15.0k | source.data(), source.size(), back.data(), max_length_needed); |
506 | 15.0k | if (r.error == simdutf::error_code::SUCCESS) { |
507 | | // We expect failure but if we succeed, then we should have a roundtrip. |
508 | 13.8k | back.resize(max_length_needed); |
509 | 13.8k | std::vector<char> back2(e->base64_length_from_binary(back.size())); |
510 | 13.8k | size_t base64size = |
511 | 13.8k | e->binary_to_base64(back.data(), back.size(), back2.data()); |
512 | 13.8k | back2.resize(base64size); |
513 | 13.8k | size_t max_length_needed2 = |
514 | 13.8k | e->maximal_binary_length_from_base64(back2.data(), back2.size()); |
515 | 13.8k | std::vector<char> back3(max_length_needed2); |
516 | 13.8k | simdutf::result r2 = simdutf::base64_to_binary_safe( |
517 | 13.8k | back2.data(), back2.size(), back3.data(), max_length_needed2); |
518 | 13.8k | if (r2.error != simdutf::error_code::SUCCESS) { |
519 | 0 | print_input(source, e); |
520 | 0 | return false; |
521 | 0 | } |
522 | 13.8k | if (max_length_needed != back.size()) { |
523 | 0 | print_input(source, e); |
524 | 0 | return false; |
525 | 0 | } |
526 | 13.8k | if (back3.size() != back.size()) { |
527 | 0 | print_input(source, e); |
528 | 0 | return false; |
529 | 0 | } |
530 | 13.8k | } |
531 | 15.0k | } |
532 | | /// Base64 tests. We encode the content as binary in base64 and we decode |
533 | | /// it, it should always succeed. |
534 | 15.0k | { |
535 | 15.0k | source = fdp.ConsumeRandomLengthString(kMaxStringSize); |
536 | 15.0k | std::vector<char> base64buffer( |
537 | 15.0k | e->base64_length_from_binary(source.size())); |
538 | 15.0k | size_t base64size = e->binary_to_base64(source.data(), source.size(), |
539 | 15.0k | base64buffer.data()); |
540 | 15.0k | if (base64size != base64buffer.size()) { |
541 | 0 | print_input(source, e); |
542 | 0 | abort(); |
543 | 0 | } |
544 | 15.0k | std::vector<char> back(e->maximal_binary_length_from_base64( |
545 | 15.0k | base64buffer.data(), base64buffer.size())); |
546 | 15.0k | simdutf::result r = e->base64_to_binary(base64buffer.data(), |
547 | 15.0k | base64buffer.size(), back.data()); |
548 | 15.0k | if (r.error != simdutf::error_code::SUCCESS) { |
549 | 0 | print_input(source, e); |
550 | 0 | abort(); |
551 | 0 | } |
552 | 15.0k | if (r.count != source.size()) { |
553 | 0 | print_input(source, e); |
554 | 0 | abort(); |
555 | 0 | } |
556 | 252k | for (size_t i = 0; i < source.size(); i++) { |
557 | 237k | if (back[i] != (source.c_str())[i]) { |
558 | 0 | print_input(source, e); |
559 | 0 | abort(); |
560 | 0 | } |
561 | 237k | } |
562 | 15.0k | size_t max_length = back.size(); |
563 | 15.0k | r = simdutf::base64_to_binary_safe( |
564 | 15.0k | base64buffer.data(), base64buffer.size(), back.data(), max_length); |
565 | 15.0k | if (r.error != simdutf::error_code::SUCCESS) { |
566 | 0 | printf("base64 round trip failed, error code %d\n", r.error); |
567 | 0 | print_input(source, e); |
568 | 0 | return false; |
569 | 0 | } |
570 | 15.0k | if (max_length != source.size()) { |
571 | 0 | printf("base64 safe round trip failed, not the same size %zu %zu\n", |
572 | 0 | max_length, source.size()); |
573 | 0 | print_input(source, e); |
574 | 0 | return false; |
575 | 0 | } |
576 | 252k | for (size_t i = 0; i < source.size(); i++) { |
577 | 237k | if (back[i] != (source.c_str())[i]) { |
578 | 0 | printf("base64 round trip failed, same size, different content\n"); |
579 | 0 | print_input(source, e); |
580 | 0 | return false; |
581 | 0 | } |
582 | 237k | } |
583 | 15.0k | } |
584 | | |
585 | 15.0k | } // for (auto &e : simdutf::get_available_implementations()) { |
586 | | |
587 | 5.03k | return 0; |
588 | 5.03k | } // extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { |