/src/simdjson/fuzz/FuzzUtils.h
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef SIMDJSON_FUZZUTILS_H |
2 | | #define SIMDJSON_FUZZUTILS_H |
3 | | |
4 | | #include <cstdint> |
5 | | #include <vector> |
6 | | #include <string_view> |
7 | | #include <cstring> //memcpy |
8 | | |
9 | | // view data as a byte pointer |
10 | | template <typename T> inline const std::uint8_t* as_bytes(const T* data) { |
11 | | return static_cast<const std::uint8_t*>(static_cast<const void*>(data)); |
12 | | } |
13 | | |
14 | | // view data as a char pointer |
15 | | template <typename T> inline const char* as_chars(const T* data) { |
16 | | return static_cast<const char*>(static_cast<const void*>(data)); |
17 | | } |
18 | | |
19 | | |
20 | | |
21 | | |
22 | | // Splits the input into strings, using a four byte separator which is human |
23 | | // readable. Makes for nicer debugging of fuzz data. |
24 | | // See https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#magic-separator |
25 | | // for background. Note: don't use memmem, it is not standard C++. |
26 | 9.92k | inline std::vector<std::string_view> split(const char* Data, size_t Size) { |
27 | | |
28 | 9.92k | std::vector<std::string_view> ret; |
29 | | |
30 | 9.92k | using namespace std::literals; |
31 | 9.92k | constexpr auto sep="\n~~\n"sv; |
32 | | |
33 | 9.92k | std::string_view all(Data,Size); |
34 | 9.92k | auto pos=all.find(sep); |
35 | 1.21M | while(pos!=std::string_view::npos) { |
36 | 1.20M | ret.push_back(all.substr(0,pos)); |
37 | 1.20M | all=all.substr(pos+sep.size()); |
38 | 1.20M | pos=all.find(sep); |
39 | 1.20M | } |
40 | 9.92k | ret.push_back(all); |
41 | 9.92k | return ret; |
42 | 9.92k | } |
43 | | |
44 | | // Generic helper to split fuzz data into usable parts, like ints etc. |
45 | | // Note that it does not throw, instead it sets the data pointer to null |
46 | | // if the input is exhausted. |
47 | | struct FuzzData { |
48 | | // data may not be null, even if size is zero. |
49 | | FuzzData(const uint8_t* data, |
50 | 9.92k | size_t size) : Data(data),Size(size){} |
51 | | |
52 | | ///range is inclusive |
53 | | template<int Min, int Max> |
54 | | int getInt() { |
55 | | static_assert (Min<Max,"min must be <max"); |
56 | | |
57 | | // make this constexpr, can't overflow because that is UB and is forbidden |
58 | | // in constexpr evaluation |
59 | | constexpr int range=(Max-Min)+1; |
60 | | constexpr unsigned int urange=range; |
61 | | |
62 | | // don't use std::uniform_int_distribution, we don't want to pay for |
63 | | // over consumption of random data. Accept the slightly non-uniform distribution. |
64 | | if(range<256) |
65 | | return Min+static_cast<int>(get<uint8_t>()%urange); |
66 | | if(range<65536) |
67 | | return Min+static_cast<int>(get<uint16_t>()%urange); |
68 | | |
69 | | return Min+static_cast<int>(get<uint32_t>()%urange); |
70 | | } |
71 | | |
72 | | template<typename T> |
73 | | T get() { |
74 | | const auto Nbytes=sizeof(T); |
75 | | T ret{}; |
76 | | if(Size<Nbytes) { |
77 | | //don't throw, signal with null instead. |
78 | | Data=nullptr; |
79 | | Size=0; |
80 | | return ret; |
81 | | } |
82 | | std::memcpy(&ret,Data,Nbytes); |
83 | | Data+=Nbytes; |
84 | | Size-=Nbytes; |
85 | | return ret; |
86 | | } |
87 | | |
88 | | // gets a string view with length in [Min,Max] |
89 | | template<int Min, int Max> |
90 | | std::string_view get_stringview() { |
91 | | static_assert (Min>=0,"Min must be positive"); |
92 | | const int len=getInt<Min,Max>(); |
93 | | const unsigned int ulen=static_cast<unsigned int>(len); |
94 | | if(ulen<Size) { |
95 | | std::string_view ret(chardata(),ulen); |
96 | | Data+=len; |
97 | | Size-=ulen; |
98 | | return ret; |
99 | | } |
100 | | |
101 | | //mark that there is too little data to fulfill the request |
102 | | Data=nullptr; |
103 | | Size=0; |
104 | | |
105 | | return {}; |
106 | | } |
107 | | |
108 | | // consumes the rest of the data as a string view |
109 | 0 | std::string_view remainder_as_stringview() { |
110 | 0 | std::string_view ret{chardata(),Size}; |
111 | 0 | Data+=Size; |
112 | 0 | Size=0; |
113 | 0 | return ret; |
114 | 0 | } |
115 | | |
116 | | // split the remainder of the data into string views, |
117 | 9.92k | std::vector<std::string_view> splitIntoStrings() { |
118 | 9.92k | std::vector<std::string_view> ret; |
119 | 9.92k | if(Size>0) { |
120 | 9.92k | ret=split(chardata(),Size); |
121 | | // all data consumed. |
122 | 9.92k | Data+=Size; |
123 | 9.92k | Size=0; |
124 | 9.92k | } |
125 | 9.92k | return ret; |
126 | 9.92k | } |
127 | | |
128 | | //are we good? |
129 | 0 | explicit operator bool() const { return Data!=nullptr;} |
130 | | |
131 | | //we are a URBG |
132 | | // https://en.cppreference.com/w/cpp/named_req/UniformRandomBitGenerator |
133 | | //The type G satisfies UniformRandomBitGenerator if Given |
134 | | // T, the type named by G::result_type |
135 | | // g, a value of type G |
136 | | // |
137 | | // The following expressions must be valid and have their specified effects |
138 | | // Expression Return type Requirements |
139 | | // G::result_type T T is an unsigned integer type |
140 | | using result_type=uint8_t; |
141 | | // G::min() T Returns the smallest value that G's operator() may return. The value is strictly less than G::max(). The function must be constexpr. |
142 | 0 | static constexpr result_type min() {return 0;} |
143 | | // G::max() T Returns the largest value that G's operator() may return. The value is strictly greater than G::min(). The function must be constexpr. |
144 | 0 | static constexpr result_type max() {return 255;} |
145 | | // g() T Returns a value in the closed interval [G::min(), G::max()]. Has amortized constant complexity. |
146 | 0 | result_type operator()() { |
147 | 0 | if(Size==0) { |
148 | 0 | // return something varying, otherwise uniform_int_distribution may get |
149 | 0 | // stuck |
150 | 0 | return failcount++; |
151 | 0 | } |
152 | 0 | const result_type ret=Data[0]; |
153 | 0 | Data++; |
154 | 0 | Size--; |
155 | 0 | return ret; |
156 | 0 | } |
157 | | // returns a pointer to data as const char* to avoid those cstyle casts |
158 | 9.92k | const char* chardata() const {return static_cast<const char*>(static_cast<const void*>(Data));} |
159 | | // members |
160 | | const uint8_t* Data; |
161 | | size_t Size; |
162 | | uint8_t failcount=0; |
163 | | }; |
164 | | |
165 | | |
166 | | #endif // SIMDJSON_FUZZUTILS_H |