/src/wuffs/fuzz/c/std/cbor_fuzzer.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2020 The Wuffs Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
6 | | // option. This file may not be copied, modified, or distributed |
7 | | // except according to those terms. |
8 | | // |
9 | | // SPDX-License-Identifier: Apache-2.0 OR MIT |
10 | | |
11 | | // ---------------- |
12 | | |
13 | | // Silence the nested slash-star warning for the next comment's command line. |
14 | | #pragma clang diagnostic push |
15 | | #pragma clang diagnostic ignored "-Wcomment" |
16 | | |
17 | | /* |
18 | | This fuzzer (the fuzz function) is typically run indirectly, by a framework |
19 | | such as https://github.com/google/oss-fuzz calling LLVMFuzzerTestOneInput. |
20 | | |
21 | | When working on the fuzz implementation, or as a coherence check, defining |
22 | | WUFFS_CONFIG__FUZZLIB_MAIN will let you manually run fuzz over a set of files: |
23 | | |
24 | | gcc -DWUFFS_CONFIG__FUZZLIB_MAIN cbor_fuzzer.c |
25 | | ./a.out ../../../test/data/*.cbor |
26 | | rm -f ./a.out |
27 | | |
28 | | It should print "PASS", amongst other information, and exit(0). |
29 | | */ |
30 | | |
31 | | #pragma clang diagnostic pop |
32 | | |
33 | | // Wuffs ships as a "single file C library" or "header file library" as per |
34 | | // https://github.com/nothings/stb/blob/master/docs/stb_howto.txt |
35 | | // |
36 | | // To use that single file as a "foo.c"-like implementation, instead of a |
37 | | // "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or |
38 | | // compiling it. |
39 | | #define WUFFS_IMPLEMENTATION |
40 | | |
41 | | #if defined(WUFFS_CONFIG__FUZZLIB_MAIN) |
42 | | // Defining the WUFFS_CONFIG__STATIC_FUNCTIONS macro is optional, but when |
43 | | // combined with WUFFS_IMPLEMENTATION, it demonstrates making all of Wuffs' |
44 | | // functions have static storage. |
45 | | // |
46 | | // This can help the compiler ignore or discard unused code, which can produce |
47 | | // faster compiles and smaller binaries. Other motivations are discussed in the |
48 | | // "ALLOW STATIC IMPLEMENTATION" section of |
49 | | // https://raw.githubusercontent.com/nothings/stb/master/docs/stb_howto.txt |
50 | | #define WUFFS_CONFIG__STATIC_FUNCTIONS |
51 | | #endif // defined(WUFFS_CONFIG__FUZZLIB_MAIN) |
52 | | |
53 | | // Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of |
54 | | // release/c/etc.c choose which parts of Wuffs to build. That file contains the |
55 | | // entire Wuffs standard library, implementing a variety of codecs and file |
56 | | // formats. Without this macro definition, an optimizing compiler or linker may |
57 | | // very well discard Wuffs code for unused codecs, but listing the Wuffs |
58 | | // modules we use makes that process explicit. Preprocessing means that such |
59 | | // code simply isn't compiled. |
60 | | #define WUFFS_CONFIG__MODULES |
61 | | #define WUFFS_CONFIG__MODULE__BASE |
62 | | #define WUFFS_CONFIG__MODULE__CBOR |
63 | | |
64 | | // If building this program in an environment that doesn't easily accommodate |
65 | | // relative includes, you can use the script/inline-c-relative-includes.go |
66 | | // program to generate a stand-alone C file. |
67 | | #include "../../../release/c/wuffs-unsupported-snapshot.c" |
68 | | #include "../fuzzlib/fuzzlib.c" |
69 | | |
70 | 4.07k | #define TOK_BUFFER_ARRAY_SIZE 4096 |
71 | 82.0k | #define STACK_SIZE (WUFFS_CBOR__DECODER_DEPTH_MAX_INCL + 1) |
72 | | |
73 | | // Wuffs allows either statically or dynamically allocated work buffers. This |
74 | | // program exercises static allocation. |
75 | | #define WORK_BUFFER_ARRAY_SIZE \ |
76 | 706k | WUFFS_CBOR__DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE |
77 | | #if WORK_BUFFER_ARRAY_SIZE > 0 |
78 | | uint8_t g_work_buffer_array[WORK_BUFFER_ARRAY_SIZE]; |
79 | | #else |
80 | | // Not all C/C++ compilers support 0-length arrays. |
81 | | uint8_t g_work_buffer_array[1]; |
82 | | #endif |
83 | | |
84 | | // Each stack element is 1 byte. The low 7 bits denote the container: |
85 | | // - 0x01 means no container: we are at the top level. |
86 | | // - 0x02 means a [] list. |
87 | | // - 0x04 means a {} dictionary. |
88 | | // |
89 | | // The high 0x80 bit holds the even/odd-ness of the number of elements in that |
90 | | // container. A valid dictionary contains key-value pairs and should therefore |
91 | | // contain an even number of elements. |
92 | | typedef uint8_t stack_element; |
93 | | |
94 | | bool // |
95 | 250k | token_is_cbor_tag(wuffs_base__token t) { |
96 | 250k | return (wuffs_base__token__value_major(&t) == |
97 | 250k | WUFFS_CBOR__TOKEN_VALUE_MAJOR) && |
98 | 250k | (wuffs_base__token__value_minor(&t) & |
99 | 3.06k | WUFFS_CBOR__TOKEN_VALUE_MINOR__TAG); |
100 | 250k | } |
101 | | |
102 | | const char* // |
103 | | fuzz_one_token(wuffs_base__token t, |
104 | | wuffs_base__token prev_token, |
105 | | wuffs_base__io_buffer* src, |
106 | | size_t* ti, |
107 | | stack_element* stack, |
108 | 248k | size_t* depth) { |
109 | 248k | uint64_t len = wuffs_base__token__length(&t); |
110 | 248k | if (len > 0xFFFF) { |
111 | 0 | return "fuzz: internal error: length too long (vs 0xFFFF)"; |
112 | 248k | } else if (len > (src->meta.wi - *ti)) { |
113 | 0 | return "fuzz: internal error: length too long (vs wi - ti)"; |
114 | 0 | } |
115 | 248k | *ti += len; |
116 | | |
117 | 248k | bool is_cbor_tag = token_is_cbor_tag(t); |
118 | 248k | if (wuffs_base__token__value_extension(&t) >= 0) { |
119 | 1.84k | if (!wuffs_base__token__continued(&prev_token)) { |
120 | 0 | return "fuzz: internal error: extended token not after continued token"; |
121 | 0 | } |
122 | 1.84k | is_cbor_tag = token_is_cbor_tag(prev_token); |
123 | 1.84k | } |
124 | | |
125 | 248k | int64_t vbc = wuffs_base__token__value_base_category(&t); |
126 | 248k | uint64_t vbd = wuffs_base__token__value_base_detail(&t); |
127 | | |
128 | 248k | switch (vbc) { |
129 | 127k | case WUFFS_BASE__TOKEN__VBC__STRUCTURE: { |
130 | 127k | bool from_consistent = false; |
131 | 127k | if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_NONE) { |
132 | 722 | from_consistent = stack[*depth] & 0x01; |
133 | 126k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_LIST) { |
134 | 84.4k | from_consistent = stack[*depth] & 0x02; |
135 | 84.4k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) { |
136 | 42.1k | from_consistent = stack[*depth] & 0x04; |
137 | 42.1k | } |
138 | 127k | if (!from_consistent) { |
139 | 0 | return "fuzz: internal error: inconsistent VBD__STRUCTURE__FROM_ETC"; |
140 | 0 | } |
141 | | |
142 | 127k | if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__PUSH) { |
143 | 82.0k | (*depth)++; |
144 | 82.0k | if ((*depth >= STACK_SIZE) || (*depth == 0)) { |
145 | 0 | return "fuzz: internal error: depth too large"; |
146 | 0 | } |
147 | | |
148 | 82.0k | if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) { |
149 | 0 | return "fuzz: internal error: push to the 'none' container"; |
150 | 82.0k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) { |
151 | 75.2k | stack[*depth] = 0x02; |
152 | 75.2k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) { |
153 | 6.76k | stack[*depth] = 0x04; |
154 | 6.76k | } else { |
155 | 0 | return "fuzz: internal error: unrecognized VBD__STRUCTURE__TO_ETC"; |
156 | 0 | } |
157 | | |
158 | 82.0k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP) { |
159 | 45.2k | if ((vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) && |
160 | 45.2k | (0 != (0x80 & stack[*depth]))) { |
161 | 0 | return "fuzz: internal error: dictionary had an incomplete key/value " |
162 | 0 | "pair"; |
163 | 0 | } |
164 | | |
165 | 45.2k | if (*depth <= 0) { |
166 | 0 | return "fuzz: internal error: depth too small"; |
167 | 0 | } |
168 | 45.2k | (*depth)--; |
169 | | |
170 | 45.2k | bool to_consistent = false; |
171 | 45.2k | if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) { |
172 | 69 | to_consistent = stack[*depth] & 0x01; |
173 | 45.1k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) { |
174 | 9.57k | to_consistent = stack[*depth] & 0x02; |
175 | 35.6k | } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) { |
176 | 35.6k | to_consistent = stack[*depth] & 0x04; |
177 | 35.6k | } |
178 | 45.2k | if (!to_consistent) { |
179 | 0 | return "fuzz: internal error: inconsistent VBD__STRUCTURE__TO_ETC"; |
180 | 0 | } |
181 | | |
182 | 45.2k | } else { |
183 | 0 | return "fuzz: internal error: unrecognized VBC__STRUCTURE"; |
184 | 0 | } |
185 | 127k | break; |
186 | 127k | } |
187 | | |
188 | 127k | case WUFFS_BASE__TOKEN__VBC__STRING: { |
189 | 31.4k | if (vbd & WUFFS_BASE__TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY) { |
190 | 23.3k | wuffs_base__slice_u8 s = |
191 | 23.3k | wuffs_base__make_slice_u8(src->data.ptr + *ti - len, len); |
192 | 23.3k | if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_UTF_8) && |
193 | 23.3k | (s.len != wuffs_base__utf_8__longest_valid_prefix(s.ptr, s.len))) { |
194 | 0 | return "fuzz: internal error: invalid UTF-8"; |
195 | 0 | } |
196 | 23.3k | if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_ASCII) && |
197 | 23.3k | (s.len != wuffs_base__ascii__longest_valid_prefix(s.ptr, s.len))) { |
198 | 0 | return "fuzz: internal error: invalid ASCII"; |
199 | 0 | } |
200 | 23.3k | } |
201 | 31.4k | break; |
202 | 31.4k | } |
203 | | |
204 | 31.4k | case WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT: { |
205 | 0 | if ((WUFFS_BASE__UNICODE_SURROGATE__MIN_INCL <= vbd) && |
206 | 0 | (vbd <= WUFFS_BASE__UNICODE_SURROGATE__MAX_INCL)) { |
207 | 0 | return "fuzz: internal error: invalid Unicode surrogate"; |
208 | 0 | } else if (WUFFS_BASE__UNICODE_CODE_POINT__MAX_INCL < vbd) { |
209 | 0 | return "fuzz: internal error: invalid Unicode code point"; |
210 | 0 | } |
211 | 0 | break; |
212 | 0 | } |
213 | | |
214 | 89.5k | default: |
215 | 89.5k | break; |
216 | 248k | } |
217 | | |
218 | | // After a complete CBOR value, update the parity (even/odd count) of the |
219 | | // container. |
220 | 248k | if (!wuffs_base__token__continued(&t) && |
221 | 248k | (vbc != WUFFS_BASE__TOKEN__VBC__FILLER) && |
222 | 248k | ((vbc != WUFFS_BASE__TOKEN__VBC__STRUCTURE) || |
223 | 219k | (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP)) && |
224 | 248k | !is_cbor_tag) { |
225 | 136k | stack[*depth] ^= 0x80; |
226 | 136k | } |
227 | | |
228 | 248k | return NULL; |
229 | 248k | } |
230 | | |
231 | | uint64_t // |
232 | 3.89k | buffer_limit(uint64_t hash, uint64_t min, uint64_t max) { |
233 | 3.89k | hash &= 0x3F; |
234 | 3.89k | uint64_t n; |
235 | 3.89k | if (hash < 0x20) { |
236 | 2.00k | n = min + hash; |
237 | 2.00k | } else { |
238 | 1.89k | n = max - (0x3F - hash); |
239 | 1.89k | } |
240 | 3.89k | if (n < min) { |
241 | 0 | return min; |
242 | 3.89k | } else if (n > max) { |
243 | 0 | return max; |
244 | 0 | } |
245 | 3.89k | return n; |
246 | 3.89k | } |
247 | | |
248 | | const char* // |
249 | 1.94k | fuzz_complex(wuffs_base__io_buffer* full_src, uint64_t hash) { |
250 | 1.94k | uint64_t tok_limit = buffer_limit( |
251 | 1.94k | hash & 0x3F, WUFFS_CBOR__DECODER_DST_TOKEN_BUFFER_LENGTH_MIN_INCL, |
252 | 1.94k | TOK_BUFFER_ARRAY_SIZE); |
253 | 1.94k | hash = wuffs_base__u64__rotate_right(hash, 6); |
254 | | |
255 | 1.94k | uint64_t src_limit = buffer_limit( |
256 | 1.94k | hash & 0x3F, WUFFS_CBOR__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL, 4096); |
257 | 1.94k | hash = wuffs_base__u64__rotate_right(hash, 6); |
258 | | |
259 | | // ---- |
260 | | |
261 | 1.94k | wuffs_cbor__decoder dec; |
262 | 1.94k | wuffs_base__status status = wuffs_cbor__decoder__initialize( |
263 | 1.94k | &dec, sizeof dec, WUFFS_VERSION, |
264 | 1.94k | WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED); |
265 | 1.94k | if (!wuffs_base__status__is_ok(&status)) { |
266 | 0 | return wuffs_base__status__message(&status); |
267 | 0 | } |
268 | | |
269 | 1.94k | wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE]; |
270 | 1.94k | wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){ |
271 | 1.94k | .data = ((wuffs_base__slice_token){ |
272 | 1.94k | .ptr = tok_array, |
273 | 1.94k | .len = (size_t)((tok_limit < TOK_BUFFER_ARRAY_SIZE) |
274 | 1.94k | ? tok_limit |
275 | 1.94k | : TOK_BUFFER_ARRAY_SIZE), |
276 | 1.94k | }), |
277 | 1.94k | }); |
278 | | |
279 | 1.94k | wuffs_base__token prev_token = wuffs_base__make_token(0); |
280 | 1.94k | uint32_t no_progress_count = 0; |
281 | | |
282 | 1.94k | stack_element stack[STACK_SIZE]; |
283 | 1.94k | stack[0] = 0x01; // We start in the 'none' container. |
284 | 1.94k | size_t depth = 0; |
285 | | |
286 | | // ---- |
287 | | |
288 | 706k | while (true) { // Outer loop. |
289 | 706k | wuffs_base__io_buffer src = make_limited_reader(*full_src, src_limit); |
290 | | |
291 | 706k | size_t old_tok_wi = tok.meta.wi; |
292 | 706k | size_t old_tok_ri = tok.meta.ri; |
293 | 706k | size_t old_src_wi = src.meta.wi; |
294 | 706k | size_t old_src_ri = src.meta.ri; |
295 | 706k | size_t ti = old_src_ri; |
296 | | |
297 | 706k | status = wuffs_cbor__decoder__decode_tokens( |
298 | 706k | &dec, &tok, &src, |
299 | 706k | wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE)); |
300 | 706k | if ((tok.data.len < tok.meta.wi) || // |
301 | 706k | (tok.meta.wi < tok.meta.ri) || // |
302 | 706k | (tok.meta.ri != old_tok_ri)) { |
303 | 0 | return "fuzz: internal error: inconsistent tok indexes"; |
304 | 706k | } else if ((src.data.len < src.meta.wi) || // |
305 | 706k | (src.meta.wi < src.meta.ri) || // |
306 | 706k | (src.meta.wi != old_src_wi)) { |
307 | 0 | return "fuzz: internal error: inconsistent src indexes"; |
308 | 0 | } |
309 | 706k | full_src->meta.ri += src.meta.ri - old_src_ri; |
310 | | |
311 | 706k | if ((tok.meta.wi > old_tok_wi) || (src.meta.ri > old_src_ri) || |
312 | 706k | !wuffs_base__status__is_suspension(&status)) { |
313 | 110k | no_progress_count = 0; |
314 | 596k | } else if (no_progress_count < 999) { |
315 | 595k | no_progress_count++; |
316 | 595k | } else if (!full_src->meta.closed && |
317 | 596 | (status.repr == wuffs_base__suspension__short_read)) { |
318 | 596 | return wuffs_base__status__message(&status); |
319 | 596 | } else { |
320 | 0 | return "fuzz: internal error: no progress"; |
321 | 0 | } |
322 | | |
323 | | // ---- |
324 | | |
325 | 954k | while (tok.meta.ri < tok.meta.wi) { // Inner loop. |
326 | 248k | wuffs_base__token t = tok.data.ptr[tok.meta.ri++]; |
327 | 248k | const char* z = |
328 | 248k | fuzz_one_token(t, prev_token, &src, &ti, &stack[0], &depth); |
329 | 248k | if (z != NULL) { |
330 | 0 | return z; |
331 | 0 | } |
332 | 248k | prev_token = t; |
333 | 248k | } // Inner loop. |
334 | | |
335 | | // ---- |
336 | | |
337 | | // Check that, starting from old_src_ri, summing the token lengths brings |
338 | | // us to the new src.meta.ri. |
339 | 705k | if (ti != src.meta.ri) { |
340 | 0 | return "fuzz: internal error: ti != ri"; |
341 | 0 | } |
342 | | |
343 | 705k | if (status.repr == NULL) { |
344 | 303 | break; |
345 | | |
346 | 705k | } else if (status.repr == wuffs_base__suspension__short_read) { |
347 | 615k | if (src.meta.closed) { |
348 | 0 | return "fuzz: internal error: short read on a closed io_reader"; |
349 | 0 | } |
350 | | // We don't compact full_src as it may be mmap'ed read-only. |
351 | 615k | continue; |
352 | | |
353 | 615k | } else if (status.repr == wuffs_base__suspension__short_write) { |
354 | 88.7k | wuffs_base__token_buffer__compact(&tok); |
355 | 88.7k | continue; |
356 | 88.7k | } |
357 | | |
358 | 1.04k | return wuffs_base__status__message(&status); |
359 | 705k | } // Outer loop. |
360 | | |
361 | | // ---- |
362 | | |
363 | 303 | if (depth != 0) { |
364 | 0 | return "fuzz: internal error: decoded OK but final depth was not zero"; |
365 | 303 | } else if (wuffs_base__token__continued(&prev_token)) { |
366 | 0 | return "fuzz: internal error: decoded OK but final token was continued"; |
367 | 0 | } |
368 | 303 | return NULL; |
369 | 303 | } |
370 | | |
371 | | const char* // |
372 | 153 | fuzz_simple(wuffs_base__io_buffer* full_src) { |
373 | 153 | wuffs_cbor__decoder dec; |
374 | 153 | wuffs_base__status status = |
375 | 153 | wuffs_cbor__decoder__initialize(&dec, sizeof dec, WUFFS_VERSION, 0); |
376 | 153 | if (!wuffs_base__status__is_ok(&status)) { |
377 | 0 | return wuffs_base__status__message(&status); |
378 | 0 | } |
379 | | |
380 | 153 | wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE]; |
381 | 153 | wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){ |
382 | 153 | .data = ((wuffs_base__slice_token){ |
383 | 153 | .ptr = tok_array, |
384 | 153 | .len = TOK_BUFFER_ARRAY_SIZE, |
385 | 153 | }), |
386 | 153 | }); |
387 | | |
388 | 354 | while (true) { |
389 | 354 | status = wuffs_cbor__decoder__decode_tokens( |
390 | 354 | &dec, &tok, full_src, |
391 | 354 | wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE)); |
392 | 354 | if (status.repr == NULL) { |
393 | 23 | break; |
394 | | |
395 | 331 | } else if (status.repr == wuffs_base__suspension__short_write) { |
396 | 201 | tok.meta.ri = tok.meta.wi; |
397 | 201 | wuffs_base__token_buffer__compact(&tok); |
398 | 201 | continue; |
399 | 201 | } |
400 | | |
401 | 130 | return wuffs_base__status__message(&status); |
402 | 354 | } |
403 | | |
404 | 23 | return NULL; |
405 | 153 | } |
406 | | |
407 | | const char* // |
408 | 2.10k | fuzz(wuffs_base__io_buffer* full_src, uint64_t hash) { |
409 | | // Send 99.6% of inputs to fuzz_complex and the remainder to fuzz_simple. The |
410 | | // 0xA5 constant is arbitrary but non-zero. If the hash function maps the |
411 | | // empty input to 0, this still sends the empty input to fuzz_complex. |
412 | | // |
413 | | // The fuzz_simple implementation shows how easy decoding with Wuffs is when |
414 | | // all you want is to run LLVMFuzzerTestOneInput's built-in (Wuffs API |
415 | | // independent) checks (e.g. the ASan address sanitizer) and you don't really |
416 | | // care what the output is, just that it doesn't crash. |
417 | | // |
418 | | // The fuzz_complex implementation adds many more Wuffs API specific checks |
419 | | // (e.g. that the sum of the tokens' lengths do not exceed the input length). |
420 | 2.10k | if ((hash & 0xFF) != 0xA5) { |
421 | 1.94k | return fuzz_complex(full_src, wuffs_base__u64__rotate_right(hash, 8)); |
422 | 1.94k | } |
423 | 153 | return fuzz_simple(full_src); |
424 | 2.10k | } |