/src/flatbuffers/tests/fuzzer/flatbuffers_scalar_fuzzer.cc
Line | Count | Source |
1 | | /* |
2 | | * Copyright 2014 Google Inc. All rights reserved. |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <assert.h> |
18 | | #include <stddef.h> |
19 | | #include <stdint.h> |
20 | | |
21 | | #include <algorithm> |
22 | | #include <clocale> |
23 | | #include <memory> |
24 | | #include <regex> |
25 | | #include <string> |
26 | | |
27 | | #include "flatbuffers/idl.h" |
28 | | #include "test_init.h" |
29 | | |
30 | | static constexpr size_t kMinInputLength = 1; |
31 | | static constexpr size_t kMaxInputLength = 3000; |
32 | | |
33 | | static constexpr uint8_t flags_scalar_type = 0x0F; // type of scalar value |
34 | | static constexpr uint8_t flags_quotes_kind = 0x10; // quote " or ' |
35 | | // reserved for future: json {named} or [unnamed] |
36 | | // static constexpr uint8_t flags_json_bracer = 0x20; |
37 | | |
38 | | // Find all 'subj' sub-strings and replace first character of sub-string. |
39 | | // BreakSequence("testest","tes", 'X') -> "XesXest". |
40 | | // BreakSequence("xxx","xx", 'Y') -> "YYx". |
41 | 53.3k | static void BreakSequence(std::string& s, const char* subj, char repl) { |
42 | 53.3k | size_t pos = 0; |
43 | 59.3k | while (pos = s.find(subj, pos), pos != std::string::npos) { |
44 | 6.01k | s.at(pos) = repl; |
45 | 6.01k | pos++; |
46 | 6.01k | } |
47 | 53.3k | } |
48 | | |
49 | | // Remove all leading and trailing symbols matched with pattern set. |
50 | | // StripString("xy{xy}y", "xy") -> "{xy}" |
51 | | static std::string StripString(const std::string& s, const char* pattern, |
52 | 9.03k | size_t* pos = nullptr) { |
53 | 9.03k | if (pos) *pos = 0; |
54 | | // leading |
55 | 9.03k | auto first = s.find_first_not_of(pattern); |
56 | 9.03k | if (std::string::npos == first) return ""; |
57 | 8.96k | if (pos) *pos = first; |
58 | | // trailing |
59 | 8.96k | auto last = s.find_last_not_of(pattern); |
60 | 8.96k | assert(last < s.length()); |
61 | 8.96k | assert(first <= last); |
62 | 8.96k | return s.substr(first, last - first + 1); |
63 | 8.96k | } |
64 | | |
65 | | class RegexMatcher { |
66 | | protected: |
67 | | virtual bool MatchNumber(const std::string& input) const = 0; |
68 | | |
69 | | public: |
70 | 4.85k | virtual ~RegexMatcher() = default; |
71 | | |
72 | | struct MatchResult { |
73 | | size_t pos{0}; |
74 | | size_t len{0}; |
75 | | bool res{false}; |
76 | | bool quoted{false}; |
77 | | }; |
78 | | |
79 | 4.85k | MatchResult Match(const std::string& input) const { |
80 | 4.85k | MatchResult r; |
81 | | // strip leading and trailing "spaces" accepted by flatbuffer |
82 | 4.85k | auto test = StripString(input, "\t\r\n ", &r.pos); |
83 | 4.85k | r.len = test.size(); |
84 | | // check quotes |
85 | 4.85k | if (test.size() >= 2) { |
86 | 4.23k | auto fch = test.front(); |
87 | 4.23k | auto lch = test.back(); |
88 | 4.23k | r.quoted = (fch == lch) && (fch == '\'' || fch == '\"'); |
89 | 4.23k | if (r.quoted) { |
90 | | // remove quotes for regex test |
91 | 106 | test = test.substr(1, test.size() - 2); |
92 | 106 | } |
93 | 4.23k | } |
94 | | // Fast check: |
95 | 4.85k | if (test.empty()) return r; |
96 | | // A string with a valid scalar shouldn't have non-ascii or non-printable |
97 | | // symbols. |
98 | 374k | for (auto c : test) { |
99 | 374k | if ((c < ' ') || (c > '~')) return r; |
100 | 374k | } |
101 | | // Check with regex |
102 | 4.19k | r.res = MatchNumber(test); |
103 | 4.19k | return r; |
104 | 4.78k | } |
105 | | |
106 | | bool MatchRegexList(const std::string& input, |
107 | 4.17k | const std::vector<std::regex>& re_list) const { |
108 | 4.17k | auto str = StripString(input, " "); |
109 | 4.17k | if (str.empty()) return false; |
110 | 8.84k | for (auto& re : re_list) { |
111 | 8.84k | std::smatch match; |
112 | 8.84k | if (std::regex_match(str, match, re)) return true; |
113 | 8.84k | } |
114 | 2.34k | return false; |
115 | 4.17k | } |
116 | | }; |
117 | | |
118 | | class IntegerRegex : public RegexMatcher { |
119 | | protected: |
120 | 1.76k | bool MatchNumber(const std::string& input) const override { |
121 | 1.76k | static const std::vector<std::regex> re_list = { |
122 | 1.76k | std::regex{R"(^[-+]?[0-9]+$)", std::regex_constants::optimize}, |
123 | | |
124 | 1.76k | std::regex{R"(^[-+]?0[xX][0-9a-fA-F]+$)", |
125 | 1.76k | std::regex_constants::optimize}}; |
126 | 1.76k | return MatchRegexList(input, re_list); |
127 | 1.76k | } |
128 | | |
129 | | public: |
130 | 1.99k | IntegerRegex() = default; |
131 | | virtual ~IntegerRegex() = default; |
132 | | }; |
133 | | |
134 | | class UIntegerRegex : public RegexMatcher { |
135 | | protected: |
136 | 1.40k | bool MatchNumber(const std::string& input) const override { |
137 | 1.40k | static const std::vector<std::regex> re_list = { |
138 | 1.40k | std::regex{R"(^[+]?[0-9]+$)", std::regex_constants::optimize}, |
139 | 1.40k | std::regex{R"(^[+]?0[xX][0-9a-fA-F]+$)", |
140 | 1.40k | std::regex_constants::optimize}, |
141 | | // accept -0 number |
142 | 1.40k | std::regex{R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize}}; |
143 | 1.40k | return MatchRegexList(input, re_list); |
144 | 1.40k | } |
145 | | |
146 | | public: |
147 | 1.56k | UIntegerRegex() = default; |
148 | | virtual ~UIntegerRegex() = default; |
149 | | }; |
150 | | |
151 | | class BooleanRegex : public IntegerRegex { |
152 | | protected: |
153 | 302 | bool MatchNumber(const std::string& input) const override { |
154 | 302 | if (input == "true" || input == "false") return true; |
155 | 288 | return IntegerRegex::MatchNumber(input); |
156 | 302 | } |
157 | | |
158 | | public: |
159 | 364 | BooleanRegex() = default; |
160 | | virtual ~BooleanRegex() = default; |
161 | | }; |
162 | | |
163 | | class FloatRegex : public RegexMatcher { |
164 | | protected: |
165 | 1.01k | bool MatchNumber(const std::string& input) const override { |
166 | 1.01k | static const std::vector<std::regex> re_list = { |
167 | | // hex-float |
168 | 1.01k | std::regex{ |
169 | 1.01k | R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)", |
170 | 1.01k | std::regex_constants::optimize}, |
171 | | // dec-float |
172 | 1.01k | std::regex{ |
173 | 1.01k | R"(^[-+]?(?:(?:[.][0-9]+)|(?:[0-9]+[.][0-9]*)|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)", |
174 | 1.01k | std::regex_constants::optimize}, |
175 | | |
176 | 1.01k | std::regex{ |
177 | 1.01k | R"(^[-+]?(?:nan|inf|infinity)$)", |
178 | 1.01k | std::regex_constants::optimize | std::regex_constants::icase}}; |
179 | 1.01k | return MatchRegexList(input, re_list); |
180 | 1.01k | } |
181 | | |
182 | | public: |
183 | 1.29k | FloatRegex() = default; |
184 | | virtual ~FloatRegex() = default; |
185 | | }; |
186 | | |
187 | | class ScalarReferenceResult { |
188 | | private: |
189 | | ScalarReferenceResult(const char* _type, RegexMatcher::MatchResult _matched) |
190 | 4.85k | : type(_type), matched(_matched) {} |
191 | | |
192 | | public: |
193 | | // Decode scalar type and check if the input string satisfies the scalar type. |
194 | 4.85k | static ScalarReferenceResult Check(uint8_t code, const std::string& input) { |
195 | 4.85k | switch (code) { |
196 | 432 | case 0x0: |
197 | 432 | return {"double", FloatRegex().Match(input)}; |
198 | 201 | case 0x1: |
199 | 201 | return {"float", FloatRegex().Match(input)}; |
200 | 412 | case 0x2: |
201 | 412 | return {"int8", IntegerRegex().Match(input)}; |
202 | 394 | case 0x3: |
203 | 394 | return {"int16", IntegerRegex().Match(input)}; |
204 | 465 | case 0x4: |
205 | 465 | return {"int32", IntegerRegex().Match(input)}; |
206 | 364 | case 0x5: |
207 | 364 | return {"int64", IntegerRegex().Match(input)}; |
208 | 265 | case 0x6: |
209 | 265 | return {"uint8", UIntegerRegex().Match(input)}; |
210 | 403 | case 0x7: |
211 | 403 | return {"uint16", UIntegerRegex().Match(input)}; |
212 | 412 | case 0x8: |
213 | 412 | return {"uint32", UIntegerRegex().Match(input)}; |
214 | 481 | case 0x9: |
215 | 481 | return {"uint64", UIntegerRegex().Match(input)}; |
216 | 364 | case 0xA: |
217 | 364 | return {"bool", BooleanRegex().Match(input)}; |
218 | 660 | default: |
219 | 660 | return {"float", FloatRegex().Match(input)}; |
220 | 4.85k | }; |
221 | 0 | } |
222 | | |
223 | | const char* type; |
224 | | const RegexMatcher::MatchResult matched; |
225 | | }; |
226 | | |
227 | | bool Parse(flatbuffers::Parser& parser, const std::string& json, |
228 | 659k | std::string* _text) { |
229 | 659k | auto done = parser.ParseJson(json.c_str()); |
230 | 659k | if (done) { |
231 | 187k | TEST_NULL(GenText(parser, parser.builder_.GetBufferPointer(), _text)); |
232 | 471k | } else { |
233 | 471k | *_text = parser.error_; |
234 | 471k | } |
235 | 659k | return done; |
236 | 659k | } |
237 | | |
238 | | // Utility for test run. |
239 | | OneTimeTestInit OneTimeTestInit::one_time_init_; |
240 | | |
241 | | // llvm std::regex have problem with stack overflow, limit maximum length. |
242 | | // ./scalar_fuzzer -max_len=3000 |
243 | 4.90k | extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { |
244 | | // Reserve one byte for Parser flags and one byte for repetition counter. |
245 | 4.90k | if (size < 3) return 0; |
246 | 4.90k | const uint8_t flags = data[0]; |
247 | | // normalize to ascii alphabet |
248 | 4.90k | const int extra_rep_number = |
249 | 4.90k | std::max(5, (data[1] > '0' ? (data[1] - '0') : 0)); |
250 | 4.90k | data += 2; |
251 | 4.90k | size -= 2; // bypass |
252 | | |
253 | | // Guarantee 0-termination. |
254 | 4.90k | const std::string original(reinterpret_cast<const char*>(data), size); |
255 | 4.90k | auto input = std::string(original.c_str()); // until '\0' |
256 | 4.90k | if (input.size() < kMinInputLength || input.size() > kMaxInputLength) |
257 | 54 | return 0; |
258 | | |
259 | | // Break comments in json to avoid complexity with regex matcher. |
260 | | // The string " 12345 /* text */" will be accepted if insert it to string |
261 | | // expression: "table X { Y: " + " 12345 /* text */" + "; }. |
262 | | // But strings like this will complicate regex matcher. |
263 | | // We reject this by transform "/* text */ 12345" to "@* text */ 12345". |
264 | 4.85k | BreakSequence(input, "//", '@'); // "//" -> "@/" |
265 | 4.85k | BreakSequence(input, "/*", '@'); // "/*" -> "@*" |
266 | | // { "$schema: "text" } is exceptional case. |
267 | | // This key:value ignored by the parser. Numbers can not have $. |
268 | 4.85k | BreakSequence(input, "$schema", '@'); // "$schema" -> "@schema" |
269 | | // Break all known scalar functions (todo: add them to regex?): |
270 | 38.8k | for (auto f : {"deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan"}) { |
271 | 38.8k | BreakSequence(input, f, '_'); // ident -> ident |
272 | 38.8k | } |
273 | | |
274 | | // Extract type of scalar from 'flags' and check if the input string satisfies |
275 | | // the scalar type. |
276 | 4.85k | const auto ref_res = |
277 | 4.85k | ScalarReferenceResult::Check(flags & flags_scalar_type, input); |
278 | 4.85k | auto& recheck = ref_res.matched; |
279 | | |
280 | | // Create parser |
281 | 4.85k | flatbuffers::IDLOptions opts; |
282 | 4.85k | opts.force_defaults = true; |
283 | 4.85k | opts.output_default_scalars_in_json = true; |
284 | 4.85k | opts.indent_step = -1; |
285 | 4.85k | opts.strict_json = true; |
286 | | |
287 | 4.85k | flatbuffers::Parser parser(opts); |
288 | 4.85k | auto schema = |
289 | 4.85k | "table X { Y: " + std::string(ref_res.type) + "; } root_type X;"; |
290 | 4.85k | TEST_EQ_FUNC(parser.Parse(schema.c_str()), true); |
291 | | |
292 | | // The fuzzer can adjust the number repetition if a side-effects have found. |
293 | | // Each test should pass at least two times to ensure that the parser doesn't |
294 | | // have any hidden-states or locale-depended effects. |
295 | 340k | for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) { |
296 | | // Each even run (0,2,4..) will test locale independed code. |
297 | 335k | auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2)); |
298 | | // Set new locale. |
299 | 335k | if (use_locale) { |
300 | 169k | FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale())); |
301 | 169k | } |
302 | | |
303 | | // Parse original input as-is. |
304 | 335k | auto orig_scalar = "{\"Y\" : " + input + "}"; |
305 | 335k | std::string orig_back; |
306 | 335k | auto orig_done = Parse(parser, orig_scalar, &orig_back); |
307 | | |
308 | 335k | if (recheck.res != orig_done) { |
309 | | // look for "does not fit" or "doesn't fit" or "out of range" |
310 | 38.7k | auto not_fit = |
311 | 38.7k | (true == recheck.res) |
312 | 38.7k | ? ((orig_back.find("does not fit") != std::string::npos) || |
313 | 0 | (orig_back.find("out of range") != std::string::npos)) |
314 | 38.7k | : false; |
315 | | |
316 | 38.7k | if (false == not_fit) { |
317 | 0 | TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done, |
318 | 0 | recheck.res); |
319 | 0 | TEST_EQ_STR(orig_back.c_str(), |
320 | 0 | input.substr(recheck.pos, recheck.len).c_str()); |
321 | 0 | TEST_EQ_FUNC(orig_done, recheck.res); |
322 | 0 | } |
323 | 38.7k | } |
324 | | |
325 | | // Try to make quoted string and test it. |
326 | 335k | std::string qouted_input; |
327 | 335k | if (true == recheck.quoted) { |
328 | | // we can't simply remove quotes, they may be nested "'12'". |
329 | | // Original string "\'12\'" converted to "'12'". |
330 | | // The string can be an invalid string by JSON rules, but after quotes |
331 | | // removed can transform to valid. |
332 | 11.3k | assert(recheck.len >= 2); |
333 | 323k | } else { |
334 | 323k | const auto quote = (flags & flags_quotes_kind) ? '\"' : '\''; |
335 | 323k | qouted_input = input; // copy |
336 | 323k | qouted_input.insert(recheck.pos + recheck.len, 1, quote); |
337 | 323k | qouted_input.insert(recheck.pos, 1, quote); |
338 | 323k | } |
339 | | |
340 | | // Test quoted version of the string |
341 | 335k | if (!qouted_input.empty()) { |
342 | 323k | auto fix_scalar = "{\"Y\" : " + qouted_input + "}"; |
343 | 323k | std::string fix_back; |
344 | 323k | auto fix_done = Parse(parser, fix_scalar, &fix_back); |
345 | | |
346 | 323k | if (orig_done != fix_done) { |
347 | 0 | TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done, |
348 | 0 | orig_done); |
349 | 0 | TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); |
350 | 0 | } |
351 | 323k | if (orig_done) { |
352 | 90.4k | TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); |
353 | 90.4k | } |
354 | 323k | TEST_EQ_FUNC(fix_done, orig_done); |
355 | 323k | } |
356 | | |
357 | | // Create new parser and test default value |
358 | 335k | if (true == orig_done) { |
359 | 97.3k | flatbuffers::Parser def_parser(opts); // re-use options |
360 | 97.3k | auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " + |
361 | 97.3k | input + "; } root_type X;" + |
362 | 97.3k | "{}"; // <- with empty json {}! |
363 | | |
364 | 97.3k | auto def_done = def_parser.Parse(def_schema.c_str()); |
365 | 97.3k | if (false == def_done) { |
366 | 0 | TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s", |
367 | 0 | def_parser.error_.c_str()); |
368 | 0 | FLATBUFFERS_ASSERT(false); |
369 | 0 | } |
370 | | // Compare with print. |
371 | 97.3k | std::string ref_string, def_string; |
372 | 97.3k | FLATBUFFERS_ASSERT( |
373 | 97.3k | !GenText(parser, parser.builder_.GetBufferPointer(), &ref_string)); |
374 | 97.3k | FLATBUFFERS_ASSERT(!GenText( |
375 | 97.3k | def_parser, def_parser.builder_.GetBufferPointer(), &def_string)); |
376 | 97.3k | if (ref_string != def_string) { |
377 | 0 | TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(), |
378 | 0 | ref_string.c_str()); |
379 | 0 | FLATBUFFERS_ASSERT(false); |
380 | 0 | } |
381 | 97.3k | } |
382 | | |
383 | | // Restore locale. |
384 | 335k | if (use_locale) { |
385 | 169k | FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); |
386 | 169k | } |
387 | 335k | } |
388 | 4.85k | return 0; |
389 | 4.85k | } |