/src/wuffs/fuzz/c/std/json_fuzzer.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2020 The Wuffs Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
6 | | // option. This file may not be copied, modified, or distributed |
7 | | // except according to those terms. |
8 | | // |
9 | | // SPDX-License-Identifier: Apache-2.0 OR MIT |
10 | | |
11 | | // ---------------- |
12 | | |
13 | | // Silence the nested slash-star warning for the next comment's command line. |
14 | | #pragma clang diagnostic push |
15 | | #pragma clang diagnostic ignored "-Wcomment" |
16 | | |
17 | | /* |
18 | | This fuzzer (the fuzz function) is typically run indirectly, by a framework |
19 | | such as https://github.com/google/oss-fuzz calling LLVMFuzzerTestOneInput. |
20 | | |
21 | | When working on the fuzz implementation, or as a coherence check, defining |
22 | | WUFFS_CONFIG__FUZZLIB_MAIN will let you manually run fuzz over a set of files: |
23 | | |
24 | | gcc -DWUFFS_CONFIG__FUZZLIB_MAIN json_fuzzer.c |
25 | | ./a.out ../../../test/data/*.json |
26 | | rm -f ./a.out |
27 | | |
28 | | It should print "PASS", amongst other information, and exit(0). |
29 | | */ |
30 | | |
31 | | #pragma clang diagnostic pop |
32 | | |
33 | | // Wuffs ships as a "single file C library" or "header file library" as per |
34 | | // https://github.com/nothings/stb/blob/master/docs/stb_howto.txt |
35 | | // |
36 | | // To use that single file as a "foo.c"-like implementation, instead of a |
37 | | // "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or |
38 | | // compiling it. |
39 | | #define WUFFS_IMPLEMENTATION |
40 | | |
41 | | #if defined(WUFFS_CONFIG__FUZZLIB_MAIN) |
42 | | // Defining the WUFFS_CONFIG__STATIC_FUNCTIONS macro is optional, but when |
43 | | // combined with WUFFS_IMPLEMENTATION, it demonstrates making all of Wuffs' |
44 | | // functions have static storage. |
45 | | // |
46 | | // This can help the compiler ignore or discard unused code, which can produce |
47 | | // faster compiles and smaller binaries. Other motivations are discussed in the |
48 | | // "ALLOW STATIC IMPLEMENTATION" section of |
49 | | // https://raw.githubusercontent.com/nothings/stb/master/docs/stb_howto.txt |
50 | | #define WUFFS_CONFIG__STATIC_FUNCTIONS |
51 | | #endif // defined(WUFFS_CONFIG__FUZZLIB_MAIN) |
52 | | |
53 | | // Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of |
54 | | // release/c/etc.c choose which parts of Wuffs to build. That file contains the |
55 | | // entire Wuffs standard library, implementing a variety of codecs and file |
56 | | // formats. Without this macro definition, an optimizing compiler or linker may |
57 | | // very well discard Wuffs code for unused codecs, but listing the Wuffs |
58 | | // modules we use makes that process explicit. Preprocessing means that such |
59 | | // code simply isn't compiled. |
60 | | #define WUFFS_CONFIG__MODULES |
61 | | #define WUFFS_CONFIG__MODULE__AUX__BASE |
62 | | #define WUFFS_CONFIG__MODULE__AUX__JSON |
63 | | #define WUFFS_CONFIG__MODULE__BASE |
64 | | #define WUFFS_CONFIG__MODULE__JSON |
65 | | |
66 | | // If building this program in an environment that doesn't easily accommodate |
67 | | // relative includes, you can use the script/inline-c-relative-includes.go |
68 | | // program to generate a stand-alone C file. |
69 | | #include "../../../release/c/wuffs-unsupported-snapshot.c" |
70 | | #include "../fuzzlib/fuzzlib.c" |
71 | | |
72 | 11.9k | #define TOK_BUFFER_ARRAY_SIZE 4096 |
73 | 244k | #define STACK_SIZE (WUFFS_JSON__DECODER_DEPTH_MAX_INCL + 1) |
74 | | |
75 | | // Wuffs allows either statically or dynamically allocated work buffers. This |
76 | | // program exercises static allocation. |
77 | | #define WORK_BUFFER_ARRAY_SIZE \ |
78 | 2.44M | WUFFS_JSON__DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE |
79 | | #if WORK_BUFFER_ARRAY_SIZE > 0 |
80 | | uint8_t g_work_buffer_array[WORK_BUFFER_ARRAY_SIZE]; |
81 | | #else |
82 | | // Not all C/C++ compilers support 0-length arrays. |
83 | | uint8_t g_work_buffer_array[1]; |
84 | | #endif |
85 | | |
86 | | // Each stack element is 1 byte. The low 7 bits denote the container: |
87 | | // - 0x01 means no container: we are at the top level. |
88 | | // - 0x02 means a [] list. |
89 | | // - 0x04 means a {} dictionary. |
90 | | // |
91 | | // The high 0x80 bit holds the even/odd-ness of the number of elements in that |
92 | | // container. A valid dictionary contains key-value pairs and should therefore |
93 | | // contain an even number of elements. |
94 | | typedef uint8_t stack_element; |
95 | | |
96 | | const char* // |
97 | | fuzz_one_token(wuffs_base__token t, |
98 | | wuffs_base__token prev_token, |
99 | | wuffs_base__io_buffer* src, |
100 | | size_t* ti, |
101 | | stack_element* stack, |
102 | 5.41M | size_t* depth) { |
103 | 5.41M | uint64_t len = wuffs_base__token__length(&t); |
104 | 5.41M | if (len > 0xFFFF) { |
105 | 0 | return "fuzz: internal error: length too long (vs 0xFFFF)"; |
106 | 5.41M | } else if (len > (src->meta.wi - *ti)) { |
107 | 0 | return "fuzz: internal error: length too long (vs wi - ti)"; |
108 | 0 | } |
109 | 5.41M | *ti += len; |
110 | | |
111 | 5.41M | if ((wuffs_base__token__value_extension(&t) >= 0) && |
112 | 5.41M | !wuffs_base__token__continued(&prev_token)) { |
113 | 0 | return "fuzz: internal error: extended token not after continued token"; |
114 | 0 | } |
115 | | |
116 | 5.41M | int64_t vbc = wuffs_base__token__value_base_category(&t); |
117 | 5.41M | uint64_t vbd = wuffs_base__token__value_base_detail(&t); |
118 | | |
119 | 5.41M | switch (vbc) { |
120 | 434k | case WUFFS_BASE__TOKEN__VBC__STRUCTURE: { |
121 | 434k | bool from_consistent = false; |
122 | 434k | if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_NONE) { |
123 | 1.91k | from_consistent = stack[*depth] & 0x01; |
124 | 432k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_LIST) { |
125 | 427k | from_consistent = stack[*depth] & 0x02; |
126 | 427k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) { |
127 | 5.10k | from_consistent = stack[*depth] & 0x04; |
128 | 5.10k | } |
129 | 434k | if (!from_consistent) { |
130 | 0 | return "fuzz: internal error: inconsistent VBD__STRUCTURE__FROM_ETC"; |
131 | 0 | } |
132 | | |
133 | 434k | if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__PUSH) { |
134 | 244k | (*depth)++; |
135 | 244k | if ((*depth >= STACK_SIZE) || (*depth == 0)) { |
136 | 0 | return "fuzz: internal error: depth too large"; |
137 | 0 | } |
138 | | |
139 | 244k | if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) { |
140 | 0 | return "fuzz: internal error: push to the 'none' container"; |
141 | 244k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) { |
142 | 239k | stack[*depth] = 0x02; |
143 | 239k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) { |
144 | 4.96k | stack[*depth] = 0x04; |
145 | 4.96k | } else { |
146 | 0 | return "fuzz: internal error: unrecognized VBD__STRUCTURE__TO_ETC"; |
147 | 0 | } |
148 | | |
149 | 244k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP) { |
150 | 189k | if ((vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) && |
151 | 189k | (0 != (0x80 & stack[*depth]))) { |
152 | 0 | return "fuzz: internal error: dictionary had an incomplete key/value " |
153 | 0 | "pair"; |
154 | 0 | } |
155 | | |
156 | 189k | if (*depth <= 0) { |
157 | 0 | return "fuzz: internal error: depth too small"; |
158 | 0 | } |
159 | 189k | (*depth)--; |
160 | | |
161 | 189k | bool to_consistent = false; |
162 | 189k | if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) { |
163 | 74 | to_consistent = stack[*depth] & 0x01; |
164 | 189k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) { |
165 | 188k | to_consistent = stack[*depth] & 0x02; |
166 | 188k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) { |
167 | 607 | to_consistent = stack[*depth] & 0x04; |
168 | 607 | } |
169 | 189k | if (!to_consistent) { |
170 | 0 | return "fuzz: internal error: inconsistent VBD__STRUCTURE__TO_ETC"; |
171 | 0 | } |
172 | | |
173 | 189k | } else { |
174 | 0 | return "fuzz: internal error: unrecognized VBC__STRUCTURE"; |
175 | 0 | } |
176 | 434k | break; |
177 | 434k | } |
178 | | |
179 | 434k | case WUFFS_BASE__TOKEN__VBC__STRING: { |
180 | 126k | if (vbd & WUFFS_BASE__TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY) { |
181 | 99.1k | wuffs_base__slice_u8 s = |
182 | 99.1k | wuffs_base__make_slice_u8(src->data.ptr + *ti - len, len); |
183 | 99.1k | if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_UTF_8) && |
184 | 99.1k | (s.len != wuffs_base__utf_8__longest_valid_prefix(s.ptr, s.len))) { |
185 | 0 | return "fuzz: internal error: invalid UTF-8"; |
186 | 0 | } |
187 | 99.1k | if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_ASCII) && |
188 | 99.1k | (s.len != wuffs_base__ascii__longest_valid_prefix(s.ptr, s.len))) { |
189 | 0 | return "fuzz: internal error: invalid ASCII"; |
190 | 0 | } |
191 | 99.1k | } |
192 | 126k | break; |
193 | 126k | } |
194 | | |
195 | 3.78M | case WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT: { |
196 | 3.78M | if ((WUFFS_BASE__UNICODE_SURROGATE__MIN_INCL <= vbd) && |
197 | 3.78M | (vbd <= WUFFS_BASE__UNICODE_SURROGATE__MAX_INCL)) { |
198 | 0 | return "fuzz: internal error: invalid Unicode surrogate"; |
199 | 3.78M | } else if (WUFFS_BASE__UNICODE_CODE_POINT__MAX_INCL < vbd) { |
200 | 0 | return "fuzz: internal error: invalid Unicode code point"; |
201 | 0 | } |
202 | 3.78M | break; |
203 | 3.78M | } |
204 | | |
205 | 3.78M | default: |
206 | 1.07M | break; |
207 | 5.41M | } |
208 | | |
209 | | // After a complete JSON value, update the parity (even/odd count) of the |
210 | | // container. |
211 | 5.41M | if (!wuffs_base__token__continued(&t) && |
212 | 5.41M | (vbc != WUFFS_BASE__TOKEN__VBC__FILLER) && |
213 | 5.41M | ((vbc != WUFFS_BASE__TOKEN__VBC__STRUCTURE) || |
214 | 735k | (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP))) { |
215 | 490k | stack[*depth] ^= 0x80; |
216 | 490k | } |
217 | | |
218 | 5.41M | return NULL; |
219 | 5.41M | } |
220 | | |
221 | | uint64_t // |
222 | 11.7k | buffer_limit(uint64_t hash, uint64_t min, uint64_t max) { |
223 | 11.7k | hash &= 0x3F; |
224 | 11.7k | uint64_t n; |
225 | 11.7k | if (hash < 0x20) { |
226 | 5.98k | n = min + hash; |
227 | 5.98k | } else { |
228 | 5.77k | n = max - (0x3F - hash); |
229 | 5.77k | } |
230 | 11.7k | if (n < min) { |
231 | 0 | return min; |
232 | 11.7k | } else if (n > max) { |
233 | 0 | return max; |
234 | 0 | } |
235 | 11.7k | return n; |
236 | 11.7k | } |
237 | | |
238 | | uint32_t g_quirks[] = { |
239 | | WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_A, |
240 | | WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_CAPITAL_U, |
241 | | WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_E, |
242 | | WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_QUESTION_MARK, |
243 | | WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_SINGLE_QUOTE, |
244 | | WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_V, |
245 | | WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_X_AS_CODE_POINTS, |
246 | | WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_ZERO, |
247 | | WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK, |
248 | | WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE, |
249 | | WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA, |
250 | | WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS, |
251 | | WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR, |
252 | | WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK, |
253 | | WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER, |
254 | | WUFFS_JSON__QUIRK_JSON_POINTER_ALLOW_TILDE_N_TILDE_R_TILDE_T, |
255 | | WUFFS_JSON__QUIRK_REPLACE_INVALID_UNICODE, |
256 | | 0, |
257 | | }; |
258 | | |
259 | 5.88k | void set_quirks(wuffs_json__decoder* dec, uint64_t hash) { |
260 | 105k | for (uint32_t i = 0; g_quirks[i]; i++) { |
261 | 99.9k | uint64_t bit = 1 << (i & 63); |
262 | 99.9k | if (hash & bit) { |
263 | 49.4k | wuffs_json__decoder__set_quirk(dec, g_quirks[i], 1); |
264 | 49.4k | } |
265 | 99.9k | } |
266 | 5.88k | } |
267 | | |
268 | | const char* // |
269 | 5.88k | fuzz_complex(wuffs_base__io_buffer* full_src, uint64_t hash) { |
270 | 5.88k | uint64_t tok_limit = buffer_limit( |
271 | 5.88k | hash & 0x3F, WUFFS_JSON__DECODER_DST_TOKEN_BUFFER_LENGTH_MIN_INCL, |
272 | 5.88k | TOK_BUFFER_ARRAY_SIZE); |
273 | 5.88k | hash = wuffs_base__u64__rotate_right(hash, 6); |
274 | | |
275 | 5.88k | uint64_t src_limit = buffer_limit( |
276 | 5.88k | hash & 0x3F, WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL, 4096); |
277 | 5.88k | hash = wuffs_base__u64__rotate_right(hash, 6); |
278 | | |
279 | | // ---- |
280 | | |
281 | 5.88k | wuffs_json__decoder dec; |
282 | 5.88k | wuffs_base__status status = wuffs_json__decoder__initialize( |
283 | 5.88k | &dec, sizeof dec, WUFFS_VERSION, |
284 | 5.88k | WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED); |
285 | 5.88k | if (!wuffs_base__status__is_ok(&status)) { |
286 | 0 | return wuffs_base__status__message(&status); |
287 | 0 | } |
288 | 5.88k | set_quirks(&dec, hash); |
289 | | |
290 | 5.88k | wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE]; |
291 | 5.88k | wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){ |
292 | 5.88k | .data = ((wuffs_base__slice_token){ |
293 | 5.88k | .ptr = tok_array, |
294 | 5.88k | .len = (size_t)((tok_limit < TOK_BUFFER_ARRAY_SIZE) |
295 | 5.88k | ? tok_limit |
296 | 5.88k | : TOK_BUFFER_ARRAY_SIZE), |
297 | 5.88k | }), |
298 | 5.88k | }); |
299 | | |
300 | 5.88k | wuffs_base__token prev_token = wuffs_base__make_token(0); |
301 | 5.88k | uint32_t no_progress_count = 0; |
302 | | |
303 | 5.88k | stack_element stack[STACK_SIZE]; |
304 | 5.88k | stack[0] = 0x01; // We start in the 'none' container. |
305 | 5.88k | size_t depth = 0; |
306 | | |
307 | | // ---- |
308 | | |
309 | 2.44M | while (true) { // Outer loop. |
310 | 2.44M | wuffs_base__io_buffer src = make_limited_reader(*full_src, src_limit); |
311 | | |
312 | 2.44M | size_t old_tok_wi = tok.meta.wi; |
313 | 2.44M | size_t old_tok_ri = tok.meta.ri; |
314 | 2.44M | size_t old_src_wi = src.meta.wi; |
315 | 2.44M | size_t old_src_ri = src.meta.ri; |
316 | 2.44M | size_t ti = old_src_ri; |
317 | | |
318 | 2.44M | status = wuffs_json__decoder__decode_tokens( |
319 | 2.44M | &dec, &tok, &src, |
320 | 2.44M | wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE)); |
321 | 2.44M | if ((tok.data.len < tok.meta.wi) || // |
322 | 2.44M | (tok.meta.wi < tok.meta.ri) || // |
323 | 2.44M | (tok.meta.ri != old_tok_ri)) { |
324 | 0 | return "fuzz: internal error: inconsistent tok indexes"; |
325 | 2.44M | } else if ((src.data.len < src.meta.wi) || // |
326 | 2.44M | (src.meta.wi < src.meta.ri) || // |
327 | 2.44M | (src.meta.wi != old_src_wi)) { |
328 | 0 | return "fuzz: internal error: inconsistent src indexes"; |
329 | 0 | } |
330 | 2.44M | full_src->meta.ri += src.meta.ri - old_src_ri; |
331 | | |
332 | 2.44M | if ((tok.meta.wi > old_tok_wi) || (src.meta.ri > old_src_ri) || |
333 | 2.44M | !wuffs_base__status__is_suspension(&status)) { |
334 | 322k | no_progress_count = 0; |
335 | 2.12M | } else if (no_progress_count < 999) { |
336 | 2.12M | no_progress_count++; |
337 | 2.12M | } else if (!full_src->meta.closed && |
338 | 2.11k | (status.repr == wuffs_base__suspension__short_read)) { |
339 | 2.11k | return wuffs_base__status__message(&status); |
340 | 2.11k | } else { |
341 | 0 | return "fuzz: internal error: no progress"; |
342 | 0 | } |
343 | | |
344 | | // ---- |
345 | | |
346 | 7.86M | while (tok.meta.ri < tok.meta.wi) { // Inner loop. |
347 | 5.41M | wuffs_base__token t = tok.data.ptr[tok.meta.ri++]; |
348 | 5.41M | const char* z = |
349 | 5.41M | fuzz_one_token(t, prev_token, &src, &ti, &stack[0], &depth); |
350 | 5.41M | if (z != NULL) { |
351 | 0 | return z; |
352 | 0 | } |
353 | 5.41M | prev_token = t; |
354 | 5.41M | } // Inner loop. |
355 | | |
356 | | // ---- |
357 | | |
358 | | // Check that, starting from old_src_ri, summing the token lengths brings |
359 | | // us to the new src.meta.ri. |
360 | 2.44M | if (ti != src.meta.ri) { |
361 | 0 | return "fuzz: internal error: ti != ri"; |
362 | 0 | } |
363 | | |
364 | 2.44M | if (status.repr == NULL) { |
365 | 1.18k | break; |
366 | | |
367 | 2.44M | } else if (status.repr == wuffs_base__suspension__short_read) { |
368 | 2.26M | if (src.meta.closed) { |
369 | 0 | return "fuzz: internal error: short read on a closed io_reader"; |
370 | 0 | } |
371 | | // We don't compact full_src as it may be mmap'ed read-only. |
372 | 2.26M | continue; |
373 | | |
374 | 2.26M | } else if (status.repr == wuffs_base__suspension__short_write) { |
375 | 173k | wuffs_base__token_buffer__compact(&tok); |
376 | 173k | continue; |
377 | 173k | } |
378 | | |
379 | 2.57k | return wuffs_base__status__message(&status); |
380 | 2.44M | } // Outer loop. |
381 | | |
382 | | // ---- |
383 | | |
384 | 1.18k | if (depth != 0) { |
385 | 0 | return "fuzz: internal error: decoded OK but final depth was not zero"; |
386 | 1.18k | } else if (wuffs_base__token__continued(&prev_token)) { |
387 | 0 | return "fuzz: internal error: decoded OK but final token was continued"; |
388 | 0 | } |
389 | 1.18k | return NULL; |
390 | 1.18k | } |
391 | | |
392 | | const char* // |
393 | 100 | fuzz_simple(wuffs_base__io_buffer* full_src) { |
394 | 100 | wuffs_json__decoder dec; |
395 | 100 | wuffs_base__status status = |
396 | 100 | wuffs_json__decoder__initialize(&dec, sizeof dec, WUFFS_VERSION, 0); |
397 | 100 | if (!wuffs_base__status__is_ok(&status)) { |
398 | 0 | return wuffs_base__status__message(&status); |
399 | 0 | } |
400 | | |
401 | 100 | wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE]; |
402 | 100 | wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){ |
403 | 100 | .data = ((wuffs_base__slice_token){ |
404 | 100 | .ptr = tok_array, |
405 | 100 | .len = TOK_BUFFER_ARRAY_SIZE, |
406 | 100 | }), |
407 | 100 | }); |
408 | | |
409 | 295 | while (true) { |
410 | 295 | status = wuffs_json__decoder__decode_tokens( |
411 | 295 | &dec, &tok, full_src, |
412 | 295 | wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE)); |
413 | 295 | if (status.repr == NULL) { |
414 | 6 | break; |
415 | | |
416 | 289 | } else if (status.repr == wuffs_base__suspension__short_write) { |
417 | 195 | tok.meta.ri = tok.meta.wi; |
418 | 195 | wuffs_base__token_buffer__compact(&tok); |
419 | 195 | continue; |
420 | 195 | } |
421 | | |
422 | 94 | return wuffs_base__status__message(&status); |
423 | 295 | } |
424 | | |
425 | 6 | return NULL; |
426 | 100 | } |
427 | | |
428 | | #if defined(__cplusplus) |
429 | | #include <vector> |
430 | | |
431 | | class Callbacks : public wuffs_aux::DecodeJsonCallbacks { |
432 | | public: |
433 | 5.98k | Callbacks() : m_depth(0) {} |
434 | | |
435 | 2.95k | std::string AppendNull() override { return ""; } |
436 | | |
437 | 3.37k | std::string AppendBool(bool val) override { return ""; } |
438 | | |
439 | 354k | std::string AppendI64(int64_t val) override { return ""; } |
440 | | |
441 | 58.1k | std::string AppendF64(double val) override { return ""; } |
442 | | |
443 | 4.71k | std::string AppendTextString(std::string&& val) override { return ""; } |
444 | | |
445 | 90.2k | std::string Push(uint32_t flags) override { |
446 | 90.2k | m_depth++; |
447 | 90.2k | return ""; |
448 | 90.2k | } |
449 | | |
450 | 60.7k | std::string Pop(uint32_t flags) override { |
451 | 60.7k | m_depth--; |
452 | 60.7k | if (m_depth < 0) { |
453 | 0 | fprintf(stderr, "negative depth\n"); |
454 | 0 | intentional_segfault(); |
455 | 0 | } |
456 | 60.7k | return ""; |
457 | 60.7k | } |
458 | | |
459 | | void Done(wuffs_aux::DecodeJsonResult& result, |
460 | | wuffs_aux::sync_io::Input& input, |
461 | 5.98k | wuffs_aux::IOBuffer& buffer) override { |
462 | 5.98k | if (result.error_message.empty()) { |
463 | 1.94k | if (m_depth != 0) { |
464 | 0 | fprintf(stderr, "no error message but final depth is non-zero\n"); |
465 | 0 | intentional_segfault(); |
466 | 0 | } |
467 | 4.03k | } else if (result.error_message.find("internal error:") != |
468 | 4.03k | std::string::npos) { |
469 | 0 | fprintf(stderr, "internal errors shouldn't occur: \"%s\"\n", |
470 | 0 | result.error_message.c_str()); |
471 | 0 | intentional_segfault(); |
472 | 0 | } |
473 | 5.98k | } |
474 | | |
475 | | private: |
476 | | int64_t m_depth; |
477 | | }; |
478 | | |
479 | | void // |
480 | 5.98k | fuzz_cpp(const uint8_t* in_ptr, size_t in_len, uint64_t hash) { |
481 | 5.98k | static const char* json_pointers[16] = { |
482 | 5.98k | "", // |
483 | 5.98k | "", // |
484 | 5.98k | "", // |
485 | 5.98k | "", // |
486 | 5.98k | "", // |
487 | 5.98k | "", // |
488 | 5.98k | "", // |
489 | 5.98k | "", // |
490 | 5.98k | "", // |
491 | 5.98k | "", // |
492 | 5.98k | "/", // |
493 | 5.98k | "/2/3/4/5", // |
494 | 5.98k | "/k0", // |
495 | 5.98k | "/k0/1", // |
496 | 5.98k | "/x/y", // |
497 | 5.98k | "/~0/~1/~n", // |
498 | 5.98k | }; |
499 | 5.98k | const char* json_pointer = json_pointers[hash & 15]; |
500 | 5.98k | hash = wuffs_base__u64__rotate_right(hash, 4); |
501 | | |
502 | 5.98k | std::vector<wuffs_aux::QuirkKeyValuePair> quirks; |
503 | 107k | for (uint32_t i = 0; g_quirks[i]; i++) { |
504 | 101k | uint64_t bit = 1 << (i & 63); |
505 | 101k | if (hash & bit) { |
506 | 50.1k | quirks.push_back({g_quirks[i], 1}); |
507 | 50.1k | } |
508 | 101k | } |
509 | | |
510 | 5.98k | Callbacks callbacks; |
511 | 5.98k | wuffs_aux::sync_io::MemoryInput input(in_ptr, in_len); |
512 | 5.98k | wuffs_aux::DecodeJson( |
513 | 5.98k | callbacks, input, |
514 | 5.98k | wuffs_aux::DecodeJsonArgQuirks(quirks.data(), quirks.size()), |
515 | 5.98k | wuffs_aux::DecodeJsonArgJsonPointer(json_pointer)); |
516 | 5.98k | } |
517 | | #endif // defined(__cplusplus) |
518 | | |
519 | | const char* // |
520 | 5.98k | fuzz(wuffs_base__io_buffer* full_src, uint64_t hash) { |
521 | 5.98k | #if defined(__cplusplus) |
522 | 5.98k | fuzz_cpp(full_src->reader_pointer(), full_src->reader_length(), |
523 | 5.98k | wuffs_base__u64__rotate_right(hash, 32)); |
524 | 5.98k | #endif // defined(__cplusplus) |
525 | | |
526 | | // Send 99.6% of inputs to fuzz_complex and the remainder to fuzz_simple. The |
527 | | // 0xA5 constant is arbitrary but non-zero. If the hash function maps the |
528 | | // empty input to 0, this still sends the empty input to fuzz_complex. |
529 | | // |
530 | | // The fuzz_simple implementation shows how easy decoding with Wuffs is when |
531 | | // all you want is to run LLVMFuzzerTestOneInput's built-in (Wuffs API |
532 | | // independent) checks (e.g. the ASan address sanitizer) and you don't really |
533 | | // care what the output is, just that it doesn't crash. |
534 | | // |
535 | | // The fuzz_complex implementation adds many more Wuffs API specific checks |
536 | | // (e.g. that the sum of the tokens' lengths do not exceed the input length). |
537 | 5.98k | if ((hash & 0xFF) != 0xA5) { |
538 | 5.88k | return fuzz_complex(full_src, wuffs_base__u64__rotate_right(hash, 8)); |
539 | 5.88k | } |
540 | 100 | return fuzz_simple(full_src); |
541 | 5.98k | } |