Coverage Report

Created: 2025-12-31 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/simdutf/fuzz/base64.cpp
Line
Count
Source
1
#include <cstddef>
2
#include <cstdint>
3
#include <array>
4
5
#include "helpers/common.h"
6
#include "simdutf.h"
7
8
constexpr std::array options = {
9
    simdutf::base64_default,
10
    simdutf::base64_url,
11
    simdutf::base64_default_no_padding,
12
    simdutf::base64_url_with_padding,
13
};
14
15
constexpr std::array last_chunk = {
16
    simdutf::last_chunk_handling_options::loose,
17
    simdutf::last_chunk_handling_options::strict,
18
    simdutf::last_chunk_handling_options::stop_before_partial};
19
20
struct decoderesult {
21
  std::size_t maxbinarylength{};
22
  simdutf::result convertresult{};
23
  auto operator<=>(const decoderesult&) const = default;
24
};
25
26
template <typename FromChar>
27
void decode(std::span<const FromChar> base64_, const auto selected_option,
28
2.21k
            const auto last_chunk_option) {
29
2.21k
  std::vector<FromChar> base64(begin(base64_), end(base64_));
30
2.21k
  const auto implementations = get_supported_implementations();
31
2.21k
  std::vector<decoderesult> results;
32
2.21k
  results.reserve(implementations.size());
33
6.65k
  for (auto impl : implementations) {
34
6.65k
    auto& r = results.emplace_back();
35
6.65k
    r.maxbinarylength =
36
6.65k
        impl->maximal_binary_length_from_base64(base64.data(), base64.size());
37
6.65k
    std::vector<char> output(r.maxbinarylength);
38
6.65k
    r.convertresult =
39
6.65k
        impl->base64_to_binary(base64.data(), base64.size(), output.data(),
40
6.65k
                               selected_option, last_chunk_option);
41
6.65k
  }
42
4.43k
  auto neq = [](const auto& a, const auto& b) { return a != b; };
auto decode<char, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char const, 18446744073709551615ul>, simdutf::base64_options, simdutf::last_chunk_handling_options)::{lambda(auto:1 const&, auto:2 const&)#1}::operator()<decoderesult, {lambda(auto:1 const&, auto:2 const&)#1}::operator()>(decoderesult const&, {lambda(auto:1 const&, auto:2 const&)#1}::operator() const&) const
Line
Count
Source
42
1.99k
  auto neq = [](const auto& a, const auto& b) { return a != b; };
auto decode<char16_t, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char16_t const, 18446744073709551615ul>, simdutf::base64_options, simdutf::last_chunk_handling_options)::{lambda(auto:1 const&, auto:2 const&)#1}::operator()<decoderesult, {lambda(auto:1 const&, auto:2 const&)#1}::operator()>(decoderesult const&, {lambda(auto:1 const&, auto:2 const&)#1}::operator() const&) const
Line
Count
Source
42
2.43k
  auto neq = [](const auto& a, const auto& b) { return a != b; };
43
2.21k
  if (std::ranges::adjacent_find(results, neq) != results.end()) {
44
0
    std::cerr << "output differs between implementations for decode\n";
45
0
    const auto implementations = get_supported_implementations();
46
0
    std::size_t i = 0;
47
0
    for (const auto& r : results) {
48
0
      std::cerr << "impl " << implementations[i]->name()
49
0
                << " got maxbinarylength=" << r.maxbinarylength
50
0
                << " convertresult=" << r.convertresult << "\n";
51
0
      ++i;
52
0
    }
53
0
    std::cerr << "option: " << selected_option << '\n';
54
0
    std::cerr << "data: "
55
0
              << (std::is_same_v<FromChar, char> ? "char" : "char16_t") << "{";
56
0
    for (int v : base64) {
57
0
      std::cerr << v << ", ";
58
0
    }
59
0
    std::cerr << "}\n";
60
0
    std::abort();
61
0
  }
62
2.21k
}
void decode<char, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char const, 18446744073709551615ul>, simdutf::base64_options, simdutf::last_chunk_handling_options)
Line
Count
Source
28
999
            const auto last_chunk_option) {
29
999
  std::vector<FromChar> base64(begin(base64_), end(base64_));
30
999
  const auto implementations = get_supported_implementations();
31
999
  std::vector<decoderesult> results;
32
999
  results.reserve(implementations.size());
33
2.99k
  for (auto impl : implementations) {
34
2.99k
    auto& r = results.emplace_back();
35
2.99k
    r.maxbinarylength =
36
2.99k
        impl->maximal_binary_length_from_base64(base64.data(), base64.size());
37
2.99k
    std::vector<char> output(r.maxbinarylength);
38
2.99k
    r.convertresult =
39
2.99k
        impl->base64_to_binary(base64.data(), base64.size(), output.data(),
40
2.99k
                               selected_option, last_chunk_option);
41
2.99k
  }
42
999
  auto neq = [](const auto& a, const auto& b) { return a != b; };
43
999
  if (std::ranges::adjacent_find(results, neq) != results.end()) {
44
0
    std::cerr << "output differs between implementations for decode\n";
45
0
    const auto implementations = get_supported_implementations();
46
0
    std::size_t i = 0;
47
0
    for (const auto& r : results) {
48
0
      std::cerr << "impl " << implementations[i]->name()
49
0
                << " got maxbinarylength=" << r.maxbinarylength
50
0
                << " convertresult=" << r.convertresult << "\n";
51
0
      ++i;
52
0
    }
53
0
    std::cerr << "option: " << selected_option << '\n';
54
0
    std::cerr << "data: "
55
0
              << (std::is_same_v<FromChar, char> ? "char" : "char16_t") << "{";
56
0
    for (int v : base64) {
57
0
      std::cerr << v << ", ";
58
0
    }
59
0
    std::cerr << "}\n";
60
0
    std::abort();
61
0
  }
62
999
}
void decode<char16_t, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char16_t const, 18446744073709551615ul>, simdutf::base64_options, simdutf::last_chunk_handling_options)
Line
Count
Source
28
1.21k
            const auto last_chunk_option) {
29
1.21k
  std::vector<FromChar> base64(begin(base64_), end(base64_));
30
1.21k
  const auto implementations = get_supported_implementations();
31
1.21k
  std::vector<decoderesult> results;
32
1.21k
  results.reserve(implementations.size());
33
3.65k
  for (auto impl : implementations) {
34
3.65k
    auto& r = results.emplace_back();
35
3.65k
    r.maxbinarylength =
36
3.65k
        impl->maximal_binary_length_from_base64(base64.data(), base64.size());
37
3.65k
    std::vector<char> output(r.maxbinarylength);
38
3.65k
    r.convertresult =
39
3.65k
        impl->base64_to_binary(base64.data(), base64.size(), output.data(),
40
3.65k
                               selected_option, last_chunk_option);
41
3.65k
  }
42
1.21k
  auto neq = [](const auto& a, const auto& b) { return a != b; };
43
1.21k
  if (std::ranges::adjacent_find(results, neq) != results.end()) {
44
0
    std::cerr << "output differs between implementations for decode\n";
45
0
    const auto implementations = get_supported_implementations();
46
0
    std::size_t i = 0;
47
0
    for (const auto& r : results) {
48
0
      std::cerr << "impl " << implementations[i]->name()
49
0
                << " got maxbinarylength=" << r.maxbinarylength
50
0
                << " convertresult=" << r.convertresult << "\n";
51
0
      ++i;
52
0
    }
53
0
    std::cerr << "option: " << selected_option << '\n';
54
0
    std::cerr << "data: "
55
0
              << (std::is_same_v<FromChar, char> ? "char" : "char16_t") << "{";
56
0
    for (int v : base64) {
57
0
      std::cerr << v << ", ";
58
0
    }
59
0
    std::cerr << "}\n";
60
0
    std::abort();
61
0
  }
62
1.21k
}
63
64
template <typename FromChar>
65
void decode_safe(std::span<const FromChar> base64_, const auto selected_option,
66
                 const std::size_t decode_buf_size,
67
984
                 const auto last_chunk_option) {
68
984
  std::vector<FromChar> base64(begin(base64_), end(base64_));
69
984
  std::vector<char> output(decode_buf_size);
70
984
  std::size_t outlen = decode_buf_size;
71
984
  const auto convertresult = simdutf::base64_to_binary_safe(
72
984
      base64.data(), base64.size(), output.data(), outlen, selected_option,
73
984
      last_chunk_option);
74
75
  // the number of written bytes must always be less than the supplied buffer
76
984
  assert(outlen <= decode_buf_size);
77
78
984
  switch (convertresult.error) {
79
125
  case simdutf::error_code::OUTPUT_BUFFER_TOO_SMALL: {
80
125
    if (!(convertresult.count <= base64.size())) {
81
0
      std::cerr << " decode_buf_size=" << decode_buf_size
82
0
                << " outlen=" << outlen << " and result=" << convertresult
83
0
                << '\n';
84
0
      std::abort();
85
0
    }
86
125
  } break;
87
299
  case simdutf::error_code::INVALID_BASE64_CHARACTER: {
88
299
    assert(convertresult.count < base64.size());
89
299
  } break;
90
299
  case simdutf::error_code::BASE64_INPUT_REMAINDER: {
91
78
    if (!(convertresult.count <= base64.size())) {
92
0
      std::cerr << "on input with size=" << base64.size()
93
0
                << ": got BASE64_INPUT_REMAINDER decode_buf_size="
94
0
                << decode_buf_size << " outlen=" << outlen
95
0
                << " and result=" << convertresult << '\n';
96
0
      std::abort();
97
0
    }
98
78
  } break;
99
471
  case simdutf::error_code::SUCCESS: {
100
    // possibility to compare with the normal function
101
471
  } break;
102
11
  default:;
103
984
  }
104
984
}
void decode_safe<char, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char const, 18446744073709551615ul>, simdutf::base64_options, unsigned long, simdutf::last_chunk_handling_options)
Line
Count
Source
67
422
                 const auto last_chunk_option) {
68
422
  std::vector<FromChar> base64(begin(base64_), end(base64_));
69
422
  std::vector<char> output(decode_buf_size);
70
422
  std::size_t outlen = decode_buf_size;
71
422
  const auto convertresult = simdutf::base64_to_binary_safe(
72
422
      base64.data(), base64.size(), output.data(), outlen, selected_option,
73
422
      last_chunk_option);
74
75
  // the number of written bytes must always be less than the supplied buffer
76
422
  assert(outlen <= decode_buf_size);
77
78
422
  switch (convertresult.error) {
79
61
  case simdutf::error_code::OUTPUT_BUFFER_TOO_SMALL: {
80
61
    if (!(convertresult.count <= base64.size())) {
81
0
      std::cerr << " decode_buf_size=" << decode_buf_size
82
0
                << " outlen=" << outlen << " and result=" << convertresult
83
0
                << '\n';
84
0
      std::abort();
85
0
    }
86
61
  } break;
87
93
  case simdutf::error_code::INVALID_BASE64_CHARACTER: {
88
93
    assert(convertresult.count < base64.size());
89
93
  } break;
90
93
  case simdutf::error_code::BASE64_INPUT_REMAINDER: {
91
31
    if (!(convertresult.count <= base64.size())) {
92
0
      std::cerr << "on input with size=" << base64.size()
93
0
                << ": got BASE64_INPUT_REMAINDER decode_buf_size="
94
0
                << decode_buf_size << " outlen=" << outlen
95
0
                << " and result=" << convertresult << '\n';
96
0
      std::abort();
97
0
    }
98
31
  } break;
99
233
  case simdutf::error_code::SUCCESS: {
100
    // possibility to compare with the normal function
101
233
  } break;
102
4
  default:;
103
422
  }
104
422
}
void decode_safe<char16_t, simdutf::base64_options, simdutf::last_chunk_handling_options>(std::__1::span<char16_t const, 18446744073709551615ul>, simdutf::base64_options, unsigned long, simdutf::last_chunk_handling_options)
Line
Count
Source
67
562
                 const auto last_chunk_option) {
68
562
  std::vector<FromChar> base64(begin(base64_), end(base64_));
69
562
  std::vector<char> output(decode_buf_size);
70
562
  std::size_t outlen = decode_buf_size;
71
562
  const auto convertresult = simdutf::base64_to_binary_safe(
72
562
      base64.data(), base64.size(), output.data(), outlen, selected_option,
73
562
      last_chunk_option);
74
75
  // the number of written bytes must always be less than the supplied buffer
76
562
  assert(outlen <= decode_buf_size);
77
78
562
  switch (convertresult.error) {
79
64
  case simdutf::error_code::OUTPUT_BUFFER_TOO_SMALL: {
80
64
    if (!(convertresult.count <= base64.size())) {
81
0
      std::cerr << " decode_buf_size=" << decode_buf_size
82
0
                << " outlen=" << outlen << " and result=" << convertresult
83
0
                << '\n';
84
0
      std::abort();
85
0
    }
86
64
  } break;
87
206
  case simdutf::error_code::INVALID_BASE64_CHARACTER: {
88
206
    assert(convertresult.count < base64.size());
89
206
  } break;
90
206
  case simdutf::error_code::BASE64_INPUT_REMAINDER: {
91
47
    if (!(convertresult.count <= base64.size())) {
92
0
      std::cerr << "on input with size=" << base64.size()
93
0
                << ": got BASE64_INPUT_REMAINDER decode_buf_size="
94
0
                << decode_buf_size << " outlen=" << outlen
95
0
                << " and result=" << convertresult << '\n';
96
0
      std::abort();
97
0
    }
98
47
  } break;
99
238
  case simdutf::error_code::SUCCESS: {
100
    // possibility to compare with the normal function
101
238
  } break;
102
7
  default:;
103
562
  }
104
562
}
105
106
struct roundtripresult {
107
  std::size_t length{};
108
  std::size_t maxbinarylength{};
109
  std::string outputhash;
110
  std::size_t written{};
111
  simdutf::result convertbackresult{};
112
  auto operator<=>(const roundtripresult&) const = default;
113
};
114
115
/// verifies that base64 with lines is the same as without lines, but with
116
/// newlines every line_length:th byte
117
void verify_lines(std::span<const char> without_lines,
118
                  std::span<const char> with_lines,
119
3.39k
                  const std::size_t line_length) {
120
  // ensure we get the same as output, with a newline every line_length:th
121
  // byte
122
39.6M
  for (std::size_t i = 0, j = 0;;) {
123
    // check one line
124
270M
    for (int count = 0; count < line_length && j < with_lines.size(); ++count) {
125
230M
      if (without_lines[i++] != with_lines[j++]) {
126
        // unexpected - different content
127
0
        std::abort();
128
0
      }
129
230M
    }
130
39.6M
    if (j == with_lines.size()) {
131
      // we are at the end of with_lines
132
3.39k
      if (i != without_lines.size()) {
133
        // unexpected - we are not at the end of without_lines
134
0
        std::abort();
135
0
      }
136
3.39k
      break;
137
3.39k
    }
138
39.6M
    if (with_lines[j++] != '\n') {
139
      // unexpected - not a newline
140
0
      std::abort();
141
0
    }
142
39.6M
  }
143
3.39k
}
144
145
void roundtrip(std::span<const char> binary, const auto selected_option,
146
1.13k
               const auto last_chunk_option, const std::size_t line_length) {
147
1.13k
  if (last_chunk_option ==
148
1.13k
      simdutf::last_chunk_handling_options::stop_before_partial) {
149
1
    return; // this is not a valid option for roundtrip
150
1
  }
151
1.13k
  const auto inputhash = FNV1A_hash::as_str(binary);
152
1.13k
  const auto implementations = get_supported_implementations();
153
1.13k
  std::vector<roundtripresult> results;
154
1.13k
  results.reserve(implementations.size());
155
3.39k
  for (auto impl : implementations) {
156
3.39k
    auto& r = results.emplace_back();
157
3.39k
    r.length = impl->base64_length_from_binary(binary.size(), selected_option);
158
3.39k
    std::vector<char> output(r.length);
159
3.39k
    r.written = impl->binary_to_base64(binary.data(), binary.size(),
160
3.39k
                                       output.data(), selected_option);
161
3.39k
    if (r.length != r.written) {
162
0
      std::abort();
163
0
    }
164
165
    // make sure generating base64 with lines gives the expected result
166
3.39k
    const auto length_with_lines =
167
3.39k
        simdutf::base64_length_from_binary_with_lines(
168
3.39k
            binary.size(), selected_option, line_length);
169
3.39k
    assert(length_with_lines >= r.length);
170
3.39k
    std::string output_with_lines(length_with_lines, '\0');
171
3.39k
    const auto nwritten_with_lines = impl->binary_to_base64_with_lines(
172
3.39k
        binary.data(), binary.size(), output_with_lines.data(), line_length,
173
3.39k
        selected_option);
174
3.39k
    if (nwritten_with_lines != length_with_lines) {
175
0
      std::cerr << nwritten_with_lines << "!=" << length_with_lines << '\n';
176
0
      std::abort();
177
0
    }
178
3.39k
    verify_lines(output, output_with_lines, line_length);
179
180
3.39k
    r.outputhash = FNV1A_hash::as_str(output);
181
    // convert back to binary
182
3.39k
    r.maxbinarylength =
183
3.39k
        impl->maximal_binary_length_from_base64(output.data(), output.size());
184
3.39k
    std::vector<char> restored(r.maxbinarylength);
185
3.39k
    r.convertbackresult =
186
3.39k
        impl->base64_to_binary(output.data(), output.size(), restored.data(),
187
3.39k
                               selected_option, last_chunk_option);
188
3.39k
    if (const auto restoredhash = FNV1A_hash::as_str(restored);
189
3.39k
        inputhash != restoredhash) {
190
0
      std::abort();
191
0
    }
192
3.39k
    if (restored.size() != binary.size()) {
193
0
      std::abort();
194
0
    }
195
3.39k
  }
196
197
2.26k
  auto neq = [](const auto& a, const auto& b) { return a != b; };
198
1.13k
  if (std::ranges::adjacent_find(results, neq) != results.end()) {
199
0
    std::cerr << "output differs between implementations\n";
200
0
    for (const auto& r : results) {
201
0
      std::cout << "written=" << r.written << " maxlength=" << r.maxbinarylength
202
0
                << " length=" << r.length << '\n';
203
0
    }
204
0
    std::abort();
205
0
  }
206
1.13k
}
207
208
4.34k
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
209
  // pick one of the function pointers, based on the fuzz data
210
  // the first byte is which action to take. step forward
211
  // several bytes so the input is aligned.
212
4.34k
  constexpr auto optionbytes = 6u;
213
4.34k
  static_assert(optionbytes % 2 == 0,
214
4.34k
                "optionbytes must be even to avoid misaligned char16 pointers");
215
216
4.34k
  if (size < optionbytes) {
217
4
    return 0;
218
4
  }
219
4.33k
  constexpr auto Ncases = 5u;
220
4.33k
  constexpr auto actionmask = std::bit_ceil(Ncases) - 1;
221
4.33k
  const auto action = data[0] & actionmask;
222
223
  // pick a random option
224
4.33k
  const auto selected_option = [](auto index) {
225
4.33k
    if (index >= options.size())
226
0
      return options[0];
227
4.33k
    else {
228
4.33k
      return options[index];
229
4.33k
    }
230
4.33k
  }(data[1] & (std::bit_ceil(options.size()) - 1));
231
4.33k
  const auto selected_last_chunk =
232
4.33k
      (selected_option == simdutf::base64_url ||
233
3.55k
       selected_option == simdutf::base64_default_no_padding)
234
4.33k
          ? simdutf::last_chunk_handling_options::loose
235
4.33k
          : [](auto index) {
236
3.10k
              if (index >= last_chunk.size())
237
340
                return last_chunk[0];
238
2.76k
              else {
239
2.76k
                return last_chunk[index];
240
2.76k
              }
241
3.10k
            }(data[2] & (std::bit_ceil(last_chunk.size()) - 1));
242
243
  // decode buffer size
244
4.33k
  const std::size_t decode_buffer_size = (data[4] << 8) + data[3];
245
246
  // line length must be at least 4
247
4.33k
  const std::size_t line_length = unsigned{data[5]} + 4u;
248
249
4.33k
  data += optionbytes;
250
4.33k
  size -= optionbytes;
251
252
4.33k
  switch (action) {
253
1.13k
  case 0: {
254
1.13k
    const std::span<const char> chardata{(const char*)data, size};
255
1.13k
    roundtrip(chardata, selected_option, selected_last_chunk, line_length);
256
1.13k
  } break;
257
999
  case 1: {
258
999
    const std::span<const char> chardata{(const char*)data, size};
259
999
    decode(chardata, selected_option, selected_last_chunk);
260
999
  } break;
261
1.21k
  case 2: {
262
1.21k
    const std::span<const char16_t> chardata{(const char16_t*)data, size / 2};
263
1.21k
    decode(chardata, selected_option, selected_last_chunk);
264
1.21k
  } break;
265
422
  case 3: {
266
422
    const std::span<const char> chardata{(const char*)data, size};
267
422
    decode_safe(chardata, selected_option, decode_buffer_size,
268
422
                selected_last_chunk);
269
422
  } break;
270
562
  case 4: {
271
562
    const std::span<const char16_t> chardata{(const char16_t*)data, size / 2};
272
562
    decode_safe(chardata, selected_option, decode_buffer_size,
273
562
                selected_last_chunk);
274
562
  } break;
275
4.33k
  }
276
277
4.33k
  return 0;
278
4.33k
}