/src/simdutf/fuzz/safe_conversion.cpp
Line | Count | Source |
1 | | #include <cassert> |
2 | | #include <vector> |
3 | | |
4 | | #include "simdutf.h" |
5 | | |
6 | | void test_latin1_to_utf8(std::span<const uint8_t> input_bytes, |
7 | 736 | std::size_t output_size) { |
8 | 736 | std::vector<char> output(output_size); |
9 | 736 | const auto written_bytes_safe = |
10 | 736 | simdutf::convert_latin1_to_utf8_safe(input_bytes, output); |
11 | 736 | if (written_bytes_safe > output_size) { |
12 | 0 | std::abort(); |
13 | 0 | } |
14 | 736 | const auto needed_size = simdutf::utf8_length_from_latin1(input_bytes); |
15 | 736 | std::vector<char> reference(needed_size); |
16 | 736 | const auto written_bytes_unsafe = |
17 | 736 | simdutf::convert_latin1_to_utf8(input_bytes, reference); |
18 | 736 | if (written_bytes_unsafe != needed_size) { |
19 | 0 | std::abort(); |
20 | 0 | } |
21 | 736 | if (written_bytes_safe > needed_size) { |
22 | | // convert_latin1_to_utf8_safe wrote more output buffer than the unsafe |
23 | | // version needed! |
24 | 0 | std::abort(); |
25 | 0 | } |
26 | | // ensure output is equal to the beginning of reference |
27 | 736 | if (!std::ranges::equal( |
28 | 736 | std::span(output).subspan(0, written_bytes_safe), |
29 | 736 | std::span(reference).subspan(0, written_bytes_safe))) { |
30 | 0 | std::abort(); |
31 | 0 | } |
32 | 736 | } |
33 | | |
34 | | void test_utf16_to_utf8(std::span<const char16_t> input, |
35 | 1.01k | std::size_t output_size) { |
36 | 1.01k | std::vector<char> output(output_size); |
37 | 1.01k | const auto written_bytes_safe = |
38 | 1.01k | simdutf::convert_utf16_to_utf8_safe(input, output); |
39 | 1.01k | if (written_bytes_safe > output_size) { |
40 | 0 | std::abort(); |
41 | 0 | } |
42 | | // result is implementation defined in case of garbage input |
43 | 1.01k | const auto unreliable_needed_size = simdutf::utf8_length_from_utf16(input); |
44 | 1.01k | std::vector<char> reference(unreliable_needed_size); |
45 | 1.01k | const auto written_bytes_unsafe = |
46 | 1.01k | simdutf::convert_utf16_to_utf8(input, reference); |
47 | | |
48 | | // ensure output is equal to the beginning of reference |
49 | 1.01k | const auto Ncompare = std::min(written_bytes_safe, written_bytes_unsafe); |
50 | 1.01k | const auto matches = |
51 | 1.01k | std::ranges::equal(std::span(output).subspan(0, Ncompare), |
52 | 1.01k | std::span(reference).subspan(0, Ncompare)); |
53 | 1.01k | assert(matches); |
54 | 1.01k | if (!matches) { |
55 | 0 | std::abort(); |
56 | 0 | } |
57 | 1.01k | } |
58 | | |
59 | 1.75k | void select_implementation(auto index) { |
60 | 1.75k | static const auto implementations = []() { |
61 | 1 | const auto list = simdutf::get_available_implementations(); |
62 | 1 | using Impl = std::decay_t<decltype(*list.begin())>; |
63 | 1 | std::vector<Impl> ret; |
64 | 4 | for (auto& e : list) { |
65 | 4 | if (e->supported_by_runtime_system()) { |
66 | 3 | ret.push_back(e); |
67 | 3 | } |
68 | 4 | } |
69 | 1 | return ret; |
70 | 1 | }(); |
71 | 1.75k | assert(!implementations.empty()); |
72 | 1.75k | simdutf::get_active_implementation() = |
73 | 1.75k | implementations.at(index % implementations.size()); |
74 | 1.75k | } |
75 | | |
76 | 1.75k | extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { |
77 | | |
78 | 1.75k | if (size < 4) { |
79 | 2 | return 0; |
80 | 2 | } |
81 | | |
82 | 1.75k | const auto action = data[0] & 0x1; |
83 | 1.75k | const auto output_size = (data[1] << 8 | data[2]); |
84 | 1.75k | const auto implementation_index = data[3] & 0b0111; |
85 | 1.75k | data += 4; |
86 | 1.75k | size -= 4; |
87 | | |
88 | 1.75k | const std::span<const uint8_t> input_bytes{data, data + size}; |
89 | | |
90 | 1.75k | select_implementation(implementation_index); |
91 | | |
92 | 1.75k | switch (action) { |
93 | 736 | case 0: |
94 | 736 | test_latin1_to_utf8(input_bytes, output_size); |
95 | 736 | break; |
96 | 1.01k | case 1: { |
97 | 1.01k | const auto* ptr = reinterpret_cast<const char16_t*>(input_bytes.data()); |
98 | 1.01k | test_utf16_to_utf8(std::span(ptr, ptr + input_bytes.size() / 2), |
99 | 1.01k | output_size); |
100 | 1.01k | } break; |
101 | 1.75k | } |
102 | | |
103 | 1.75k | return 0; |
104 | 1.75k | } |