/src/simdutf/fuzz/base64.cpp
Line | Count | Source |
1 | | #include <cstddef> |
2 | | #include <cstdint> |
3 | | #include <array> |
4 | | |
5 | | #include "helpers/common.h" |
6 | | #include "simdutf.h" |
7 | | |
8 | | constexpr std::array options = { |
9 | | simdutf::base64_default, |
10 | | simdutf::base64_url, |
11 | | simdutf::base64_default_no_padding, |
12 | | simdutf::base64_url_with_padding, |
13 | | }; |
14 | | |
15 | | constexpr std::array last_chunk = { |
16 | | simdutf::last_chunk_handling_options::loose, |
17 | | simdutf::last_chunk_handling_options::strict, |
18 | | simdutf::last_chunk_handling_options::stop_before_partial}; |
19 | | |
20 | | struct decoderesult { |
21 | | std::size_t maxbinarylength{}; |
22 | | simdutf::result convertresult{}; |
23 | | auto operator<=>(const decoderesult&) const = default; |
24 | | }; |
25 | | |
26 | | template <typename FromChar> |
27 | | void decode(std::span<const FromChar> base64_, const auto selected_option, |
28 | 2.21k | const auto last_chunk_option) { |
29 | 2.21k | std::vector<FromChar> base64(begin(base64_), end(base64_)); |
30 | 2.21k | const auto implementations = get_supported_implementations(); |
31 | 2.21k | std::vector<decoderesult> results; |
32 | 2.21k | results.reserve(implementations.size()); |
33 | 6.65k | for (auto impl : implementations) { |
34 | 6.65k | auto& r = results.emplace_back(); |
35 | 6.65k | r.maxbinarylength = |
36 | 6.65k | impl->maximal_binary_length_from_base64(base64.data(), base64.size()); |
37 | 6.65k | std::vector<char> output(r.maxbinarylength); |
38 | 6.65k | r.convertresult = |
39 | 6.65k | impl->base64_to_binary(base64.data(), base64.size(), output.data(), |
40 | 6.65k | selected_option, last_chunk_option); |
41 | 6.65k | } |
42 | 4.43k | auto neq = [](const auto& a, const auto& b) { return a != b; };auto decode<char, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char const, 18446744073709551615ul>, simdutf::base64_options, simdutf::last_chunk_handling_options)::{lambda(auto:1 const&, auto:2 const&)#1}::operator()<decoderesult, {lambda(auto:1 const&, auto:2 const&)#1}::operator()>(decoderesult const&, {lambda(auto:1 const&, auto:2 const&)#1}::operator() const&) constLine | Count | Source | 42 | 1.99k | auto neq = [](const auto& a, const auto& b) { return a != b; }; |
auto decode<char16_t, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char16_t const, 18446744073709551615ul>, simdutf::base64_options, simdutf::last_chunk_handling_options)::{lambda(auto:1 const&, auto:2 const&)#1}::operator()<decoderesult, {lambda(auto:1 const&, auto:2 const&)#1}::operator()>(decoderesult const&, {lambda(auto:1 const&, auto:2 const&)#1}::operator() const&) constLine | Count | Source | 42 | 2.43k | auto neq = [](const auto& a, const auto& b) { return a != b; }; |
|
43 | 2.21k | if (std::ranges::adjacent_find(results, neq) != results.end()) { |
44 | 0 | std::cerr << "output differs between implementations for decode\n"; |
45 | 0 | const auto implementations = get_supported_implementations(); |
46 | 0 | std::size_t i = 0; |
47 | 0 | for (const auto& r : results) { |
48 | 0 | std::cerr << "impl " << implementations[i]->name() |
49 | 0 | << " got maxbinarylength=" << r.maxbinarylength |
50 | 0 | << " convertresult=" << r.convertresult << "\n"; |
51 | 0 | ++i; |
52 | 0 | } |
53 | 0 | std::cerr << "option: " << selected_option << '\n'; |
54 | 0 | std::cerr << "data: " |
55 | 0 | << (std::is_same_v<FromChar, char> ? "char" : "char16_t") << "{"; |
56 | 0 | for (int v : base64) { |
57 | 0 | std::cerr << v << ", "; |
58 | 0 | } |
59 | 0 | std::cerr << "}\n"; |
60 | 0 | std::abort(); |
61 | 0 | } |
62 | 2.21k | } void decode<char, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char const, 18446744073709551615ul>, simdutf::base64_options, simdutf::last_chunk_handling_options) Line | Count | Source | 28 | 999 | const auto last_chunk_option) { | 29 | 999 | std::vector<FromChar> base64(begin(base64_), end(base64_)); | 30 | 999 | const auto implementations = get_supported_implementations(); | 31 | 999 | std::vector<decoderesult> results; | 32 | 999 | results.reserve(implementations.size()); | 33 | 2.99k | for (auto impl : implementations) { | 34 | 2.99k | auto& r = results.emplace_back(); | 35 | 2.99k | r.maxbinarylength = | 36 | 2.99k | impl->maximal_binary_length_from_base64(base64.data(), base64.size()); | 37 | 2.99k | std::vector<char> output(r.maxbinarylength); | 38 | 2.99k | r.convertresult = | 39 | 2.99k | impl->base64_to_binary(base64.data(), base64.size(), output.data(), | 40 | 2.99k | selected_option, last_chunk_option); | 41 | 2.99k | } | 42 | 999 | auto neq = [](const auto& a, const auto& b) { return a != b; }; | 43 | 999 | if (std::ranges::adjacent_find(results, neq) != results.end()) { | 44 | 0 | std::cerr << "output differs between implementations for decode\n"; | 45 | 0 | const auto implementations = get_supported_implementations(); | 46 | 0 | std::size_t i = 0; | 47 | 0 | for (const auto& r : results) { | 48 | 0 | std::cerr << "impl " << implementations[i]->name() | 49 | 0 | << " got maxbinarylength=" << r.maxbinarylength | 50 | 0 | << " convertresult=" << r.convertresult << "\n"; | 51 | 0 | ++i; | 52 | 0 | } | 53 | 0 | std::cerr << "option: " << selected_option << '\n'; | 54 | 0 | std::cerr << "data: " | 55 | 0 | << (std::is_same_v<FromChar, char> ? "char" : "char16_t") << "{"; | 56 | 0 | for (int v : base64) { | 57 | 0 | std::cerr << v << ", "; | 58 | 0 | } | 59 | 0 | std::cerr << "}\n"; | 60 | 0 | std::abort(); | 61 | 0 | } | 62 | 999 | } |
void decode<char16_t, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char16_t const, 18446744073709551615ul>, simdutf::base64_options, simdutf::last_chunk_handling_options) Line | Count | Source | 28 | 1.21k | const auto last_chunk_option) { | 29 | 1.21k | std::vector<FromChar> base64(begin(base64_), end(base64_)); | 30 | 1.21k | const auto implementations = get_supported_implementations(); | 31 | 1.21k | std::vector<decoderesult> results; | 32 | 1.21k | results.reserve(implementations.size()); | 33 | 3.65k | for (auto impl : implementations) { | 34 | 3.65k | auto& r = results.emplace_back(); | 35 | 3.65k | r.maxbinarylength = | 36 | 3.65k | impl->maximal_binary_length_from_base64(base64.data(), base64.size()); | 37 | 3.65k | std::vector<char> output(r.maxbinarylength); | 38 | 3.65k | r.convertresult = | 39 | 3.65k | impl->base64_to_binary(base64.data(), base64.size(), output.data(), | 40 | 3.65k | selected_option, last_chunk_option); | 41 | 3.65k | } | 42 | 1.21k | auto neq = [](const auto& a, const auto& b) { return a != b; }; | 43 | 1.21k | if (std::ranges::adjacent_find(results, neq) != results.end()) { | 44 | 0 | std::cerr << "output differs between implementations for decode\n"; | 45 | 0 | const auto implementations = get_supported_implementations(); | 46 | 0 | std::size_t i = 0; | 47 | 0 | for (const auto& r : results) { | 48 | 0 | std::cerr << "impl " << implementations[i]->name() | 49 | 0 | << " got maxbinarylength=" << r.maxbinarylength | 50 | 0 | << " convertresult=" << r.convertresult << "\n"; | 51 | 0 | ++i; | 52 | 0 | } | 53 | 0 | std::cerr << "option: " << selected_option << '\n'; | 54 | 0 | std::cerr << "data: " | 55 | 0 | << (std::is_same_v<FromChar, char> ? "char" : "char16_t") << "{"; | 56 | 0 | for (int v : base64) { | 57 | 0 | std::cerr << v << ", "; | 58 | 0 | } | 59 | 0 | std::cerr << "}\n"; | 60 | 0 | std::abort(); | 61 | 0 | } | 62 | 1.21k | } |
|
63 | | |
64 | | template <typename FromChar> |
65 | | void decode_safe(std::span<const FromChar> base64_, const auto selected_option, |
66 | | const std::size_t decode_buf_size, |
67 | 984 | const auto last_chunk_option) { |
68 | 984 | std::vector<FromChar> base64(begin(base64_), end(base64_)); |
69 | 984 | std::vector<char> output(decode_buf_size); |
70 | 984 | std::size_t outlen = decode_buf_size; |
71 | 984 | const auto convertresult = simdutf::base64_to_binary_safe( |
72 | 984 | base64.data(), base64.size(), output.data(), outlen, selected_option, |
73 | 984 | last_chunk_option); |
74 | | |
75 | | // the number of written bytes must always be less than the supplied buffer |
76 | 984 | assert(outlen <= decode_buf_size); |
77 | | |
78 | 984 | switch (convertresult.error) { |
79 | 125 | case simdutf::error_code::OUTPUT_BUFFER_TOO_SMALL: { |
80 | 125 | if (!(convertresult.count <= base64.size())) { |
81 | 0 | std::cerr << " decode_buf_size=" << decode_buf_size |
82 | 0 | << " outlen=" << outlen << " and result=" << convertresult |
83 | 0 | << '\n'; |
84 | 0 | std::abort(); |
85 | 0 | } |
86 | 125 | } break; |
87 | 299 | case simdutf::error_code::INVALID_BASE64_CHARACTER: { |
88 | 299 | assert(convertresult.count < base64.size()); |
89 | 299 | } break; |
90 | 299 | case simdutf::error_code::BASE64_INPUT_REMAINDER: { |
91 | 78 | if (!(convertresult.count <= base64.size())) { |
92 | 0 | std::cerr << "on input with size=" << base64.size() |
93 | 0 | << ": got BASE64_INPUT_REMAINDER decode_buf_size=" |
94 | 0 | << decode_buf_size << " outlen=" << outlen |
95 | 0 | << " and result=" << convertresult << '\n'; |
96 | 0 | std::abort(); |
97 | 0 | } |
98 | 78 | } break; |
99 | 471 | case simdutf::error_code::SUCCESS: { |
100 | | // possibility to compare with the normal function |
101 | 471 | } break; |
102 | 11 | default:; |
103 | 984 | } |
104 | 984 | } void decode_safe<char, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char const, 18446744073709551615ul>, simdutf::base64_options, unsigned long, simdutf::last_chunk_handling_options) Line | Count | Source | 67 | 422 | const auto last_chunk_option) { | 68 | 422 | std::vector<FromChar> base64(begin(base64_), end(base64_)); | 69 | 422 | std::vector<char> output(decode_buf_size); | 70 | 422 | std::size_t outlen = decode_buf_size; | 71 | 422 | const auto convertresult = simdutf::base64_to_binary_safe( | 72 | 422 | base64.data(), base64.size(), output.data(), outlen, selected_option, | 73 | 422 | last_chunk_option); | 74 | | | 75 | | // the number of written bytes must always be less than the supplied buffer | 76 | 422 | assert(outlen <= decode_buf_size); | 77 | | | 78 | 422 | switch (convertresult.error) { | 79 | 61 | case simdutf::error_code::OUTPUT_BUFFER_TOO_SMALL: { | 80 | 61 | if (!(convertresult.count <= base64.size())) { | 81 | 0 | std::cerr << " decode_buf_size=" << decode_buf_size | 82 | 0 | << " outlen=" << outlen << " and result=" << convertresult | 83 | 0 | << '\n'; | 84 | 0 | std::abort(); | 85 | 0 | } | 86 | 61 | } break; | 87 | 93 | case simdutf::error_code::INVALID_BASE64_CHARACTER: { | 88 | 93 | assert(convertresult.count < base64.size()); | 89 | 93 | } break; | 90 | 93 | case simdutf::error_code::BASE64_INPUT_REMAINDER: { | 91 | 31 | if (!(convertresult.count <= base64.size())) { | 92 | 0 | std::cerr << "on input with size=" << base64.size() | 93 | 0 | << ": got BASE64_INPUT_REMAINDER decode_buf_size=" | 94 | 0 | << decode_buf_size << " outlen=" << outlen | 95 | 0 | << " and result=" << convertresult << '\n'; | 96 | 0 | std::abort(); | 97 | 0 | } | 98 | 31 | } break; | 99 | 233 | case simdutf::error_code::SUCCESS: { | 100 | | // possibility to compare with the normal function | 101 | 233 | } break; | 102 | 4 | default:; | 103 | 422 | } | 104 | 422 | } |
void decode_safe<char16_t, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char16_t const, 18446744073709551615ul>, simdutf::base64_options, unsigned long, simdutf::last_chunk_handling_options) Line | Count | Source | 67 | 562 | const auto last_chunk_option) { | 68 | 562 | std::vector<FromChar> base64(begin(base64_), end(base64_)); | 69 | 562 | std::vector<char> output(decode_buf_size); | 70 | 562 | std::size_t outlen = decode_buf_size; | 71 | 562 | const auto convertresult = simdutf::base64_to_binary_safe( | 72 | 562 | base64.data(), base64.size(), output.data(), outlen, selected_option, | 73 | 562 | last_chunk_option); | 74 | | | 75 | | // the number of written bytes must always be less than the supplied buffer | 76 | 562 | assert(outlen <= decode_buf_size); | 77 | | | 78 | 562 | switch (convertresult.error) { | 79 | 64 | case simdutf::error_code::OUTPUT_BUFFER_TOO_SMALL: { | 80 | 64 | if (!(convertresult.count <= base64.size())) { | 81 | 0 | std::cerr << " decode_buf_size=" << decode_buf_size | 82 | 0 | << " outlen=" << outlen << " and result=" << convertresult | 83 | 0 | << '\n'; | 84 | 0 | std::abort(); | 85 | 0 | } | 86 | 64 | } break; | 87 | 206 | case simdutf::error_code::INVALID_BASE64_CHARACTER: { | 88 | 206 | assert(convertresult.count < base64.size()); | 89 | 206 | } break; | 90 | 206 | case simdutf::error_code::BASE64_INPUT_REMAINDER: { | 91 | 47 | if (!(convertresult.count <= base64.size())) { | 92 | 0 | std::cerr << "on input with size=" << base64.size() | 93 | 0 | << ": got BASE64_INPUT_REMAINDER decode_buf_size=" | 94 | 0 | << decode_buf_size << " outlen=" << outlen | 95 | 0 | << " and result=" << convertresult << '\n'; | 96 | 0 | std::abort(); | 97 | 0 | } | 98 | 47 | } break; | 99 | 238 | case simdutf::error_code::SUCCESS: { | 100 | | // possibility to compare with the normal function | 101 | 238 | } break; | 102 | 7 | default:; | 103 | 562 | } | 104 | 562 | } |
|
105 | | |
106 | | struct roundtripresult { |
107 | | std::size_t length{}; |
108 | | std::size_t maxbinarylength{}; |
109 | | std::string outputhash; |
110 | | std::size_t written{}; |
111 | | simdutf::result convertbackresult{}; |
112 | | auto operator<=>(const roundtripresult&) const = default; |
113 | | }; |
114 | | |
115 | | /// verifies that base64 with lines is the same as without lines, but with |
116 | | /// newlines every line_length:th byte |
117 | | void verify_lines(std::span<const char> without_lines, |
118 | | std::span<const char> with_lines, |
119 | 3.39k | const std::size_t line_length) { |
120 | | // ensure we get the same as output, with a newline every line_length:th |
121 | | // byte |
122 | 39.6M | for (std::size_t i = 0, j = 0;;) { |
123 | | // check one line |
124 | 270M | for (int count = 0; count < line_length && j < with_lines.size(); ++count) { |
125 | 230M | if (without_lines[i++] != with_lines[j++]) { |
126 | | // unexpected - different content |
127 | 0 | std::abort(); |
128 | 0 | } |
129 | 230M | } |
130 | 39.6M | if (j == with_lines.size()) { |
131 | | // we are at the end of with_lines |
132 | 3.39k | if (i != without_lines.size()) { |
133 | | // unexpected - we are not at the end of without_lines |
134 | 0 | std::abort(); |
135 | 0 | } |
136 | 3.39k | break; |
137 | 3.39k | } |
138 | 39.6M | if (with_lines[j++] != '\n') { |
139 | | // unexpected - not a newline |
140 | 0 | std::abort(); |
141 | 0 | } |
142 | 39.6M | } |
143 | 3.39k | } |
144 | | |
145 | | void roundtrip(std::span<const char> binary, const auto selected_option, |
146 | 1.13k | const auto last_chunk_option, const std::size_t line_length) { |
147 | 1.13k | if (last_chunk_option == |
148 | 1.13k | simdutf::last_chunk_handling_options::stop_before_partial) { |
149 | 1 | return; // this is not a valid option for roundtrip |
150 | 1 | } |
151 | 1.13k | const auto inputhash = FNV1A_hash::as_str(binary); |
152 | 1.13k | const auto implementations = get_supported_implementations(); |
153 | 1.13k | std::vector<roundtripresult> results; |
154 | 1.13k | results.reserve(implementations.size()); |
155 | 3.39k | for (auto impl : implementations) { |
156 | 3.39k | auto& r = results.emplace_back(); |
157 | 3.39k | r.length = impl->base64_length_from_binary(binary.size(), selected_option); |
158 | 3.39k | std::vector<char> output(r.length); |
159 | 3.39k | r.written = impl->binary_to_base64(binary.data(), binary.size(), |
160 | 3.39k | output.data(), selected_option); |
161 | 3.39k | if (r.length != r.written) { |
162 | 0 | std::abort(); |
163 | 0 | } |
164 | | |
165 | | // make sure generating base64 with lines gives the expected result |
166 | 3.39k | const auto length_with_lines = |
167 | 3.39k | simdutf::base64_length_from_binary_with_lines( |
168 | 3.39k | binary.size(), selected_option, line_length); |
169 | 3.39k | assert(length_with_lines >= r.length); |
170 | 3.39k | std::string output_with_lines(length_with_lines, '\0'); |
171 | 3.39k | const auto nwritten_with_lines = impl->binary_to_base64_with_lines( |
172 | 3.39k | binary.data(), binary.size(), output_with_lines.data(), line_length, |
173 | 3.39k | selected_option); |
174 | 3.39k | if (nwritten_with_lines != length_with_lines) { |
175 | 0 | std::cerr << nwritten_with_lines << "!=" << length_with_lines << '\n'; |
176 | 0 | std::abort(); |
177 | 0 | } |
178 | 3.39k | verify_lines(output, output_with_lines, line_length); |
179 | | |
180 | 3.39k | r.outputhash = FNV1A_hash::as_str(output); |
181 | | // convert back to binary |
182 | 3.39k | r.maxbinarylength = |
183 | 3.39k | impl->maximal_binary_length_from_base64(output.data(), output.size()); |
184 | 3.39k | std::vector<char> restored(r.maxbinarylength); |
185 | 3.39k | r.convertbackresult = |
186 | 3.39k | impl->base64_to_binary(output.data(), output.size(), restored.data(), |
187 | 3.39k | selected_option, last_chunk_option); |
188 | 3.39k | if (const auto restoredhash = FNV1A_hash::as_str(restored); |
189 | 3.39k | inputhash != restoredhash) { |
190 | 0 | std::abort(); |
191 | 0 | } |
192 | 3.39k | if (restored.size() != binary.size()) { |
193 | 0 | std::abort(); |
194 | 0 | } |
195 | 3.39k | } |
196 | | |
197 | 2.26k | auto neq = [](const auto& a, const auto& b) { return a != b; }; |
198 | 1.13k | if (std::ranges::adjacent_find(results, neq) != results.end()) { |
199 | 0 | std::cerr << "output differs between implementations\n"; |
200 | 0 | for (const auto& r : results) { |
201 | 0 | std::cout << "written=" << r.written << " maxlength=" << r.maxbinarylength |
202 | 0 | << " length=" << r.length << '\n'; |
203 | 0 | } |
204 | 0 | std::abort(); |
205 | 0 | } |
206 | 1.13k | } |
207 | | |
208 | 4.34k | extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { |
209 | | // pick one of the function pointers, based on the fuzz data |
210 | | // the first byte is which action to take. step forward |
211 | | // several bytes so the input is aligned. |
212 | 4.34k | constexpr auto optionbytes = 6u; |
213 | 4.34k | static_assert(optionbytes % 2 == 0, |
214 | 4.34k | "optionbytes must be even to avoid misaligned char16 pointers"); |
215 | | |
216 | 4.34k | if (size < optionbytes) { |
217 | 4 | return 0; |
218 | 4 | } |
219 | 4.33k | constexpr auto Ncases = 5u; |
220 | 4.33k | constexpr auto actionmask = std::bit_ceil(Ncases) - 1; |
221 | 4.33k | const auto action = data[0] & actionmask; |
222 | | |
223 | | // pick a random option |
224 | 4.33k | const auto selected_option = [](auto index) { |
225 | 4.33k | if (index >= options.size()) |
226 | 0 | return options[0]; |
227 | 4.33k | else { |
228 | 4.33k | return options[index]; |
229 | 4.33k | } |
230 | 4.33k | }(data[1] & (std::bit_ceil(options.size()) - 1)); |
231 | 4.33k | const auto selected_last_chunk = |
232 | 4.33k | (selected_option == simdutf::base64_url || |
233 | 3.55k | selected_option == simdutf::base64_default_no_padding) |
234 | 4.33k | ? simdutf::last_chunk_handling_options::loose |
235 | 4.33k | : [](auto index) { |
236 | 3.10k | if (index >= last_chunk.size()) |
237 | 340 | return last_chunk[0]; |
238 | 2.76k | else { |
239 | 2.76k | return last_chunk[index]; |
240 | 2.76k | } |
241 | 3.10k | }(data[2] & (std::bit_ceil(last_chunk.size()) - 1)); |
242 | | |
243 | | // decode buffer size |
244 | 4.33k | const std::size_t decode_buffer_size = (data[4] << 8) + data[3]; |
245 | | |
246 | | // line length must be at least 4 |
247 | 4.33k | const std::size_t line_length = unsigned{data[5]} + 4u; |
248 | | |
249 | 4.33k | data += optionbytes; |
250 | 4.33k | size -= optionbytes; |
251 | | |
252 | 4.33k | switch (action) { |
253 | 1.13k | case 0: { |
254 | 1.13k | const std::span<const char> chardata{(const char*)data, size}; |
255 | 1.13k | roundtrip(chardata, selected_option, selected_last_chunk, line_length); |
256 | 1.13k | } break; |
257 | 999 | case 1: { |
258 | 999 | const std::span<const char> chardata{(const char*)data, size}; |
259 | 999 | decode(chardata, selected_option, selected_last_chunk); |
260 | 999 | } break; |
261 | 1.21k | case 2: { |
262 | 1.21k | const std::span<const char16_t> chardata{(const char16_t*)data, size / 2}; |
263 | 1.21k | decode(chardata, selected_option, selected_last_chunk); |
264 | 1.21k | } break; |
265 | 422 | case 3: { |
266 | 422 | const std::span<const char> chardata{(const char*)data, size}; |
267 | 422 | decode_safe(chardata, selected_option, decode_buffer_size, |
268 | 422 | selected_last_chunk); |
269 | 422 | } break; |
270 | 562 | case 4: { |
271 | 562 | const std::span<const char16_t> chardata{(const char16_t*)data, size / 2}; |
272 | 562 | decode_safe(chardata, selected_option, decode_buffer_size, |
273 | 562 | selected_last_chunk); |
274 | 562 | } break; |
275 | 4.33k | } |
276 | | |
277 | 4.33k | return 0; |
278 | 4.33k | } |