Coverage Report

Created: 2024-10-11 15:22

/src/wuffs/fuzz/c/std/cbor_fuzzer.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2020 The Wuffs Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6
// option. This file may not be copied, modified, or distributed
7
// except according to those terms.
8
//
9
// SPDX-License-Identifier: Apache-2.0 OR MIT
10
11
// ----------------
12
13
// Silence the nested slash-star warning for the next comment's command line.
14
#pragma clang diagnostic push
15
#pragma clang diagnostic ignored "-Wcomment"
16
17
/*
18
This fuzzer (the fuzz function) is typically run indirectly, by a framework
19
such as https://github.com/google/oss-fuzz calling LLVMFuzzerTestOneInput.
20
21
When working on the fuzz implementation, or as a coherence check, defining
22
WUFFS_CONFIG__FUZZLIB_MAIN will let you manually run fuzz over a set of files:
23
24
gcc -DWUFFS_CONFIG__FUZZLIB_MAIN cbor_fuzzer.c
25
./a.out ../../../test/data/*.cbor
26
rm -f ./a.out
27
28
It should print "PASS", amongst other information, and exit(0).
29
*/
30
31
#pragma clang diagnostic pop
32
33
// Wuffs ships as a "single file C library" or "header file library" as per
34
// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
35
//
36
// To use that single file as a "foo.c"-like implementation, instead of a
37
// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
38
// compiling it.
39
#define WUFFS_IMPLEMENTATION
40
41
#if defined(WUFFS_CONFIG__FUZZLIB_MAIN)
42
// Defining the WUFFS_CONFIG__STATIC_FUNCTIONS macro is optional, but when
43
// combined with WUFFS_IMPLEMENTATION, it demonstrates making all of Wuffs'
44
// functions have static storage.
45
//
46
// This can help the compiler ignore or discard unused code, which can produce
47
// faster compiles and smaller binaries. Other motivations are discussed in the
48
// "ALLOW STATIC IMPLEMENTATION" section of
49
// https://raw.githubusercontent.com/nothings/stb/master/docs/stb_howto.txt
50
#define WUFFS_CONFIG__STATIC_FUNCTIONS
51
#endif  // defined(WUFFS_CONFIG__FUZZLIB_MAIN)
52
53
// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
54
// release/c/etc.c choose which parts of Wuffs to build. That file contains the
55
// entire Wuffs standard library, implementing a variety of codecs and file
56
// formats. Without this macro definition, an optimizing compiler or linker may
57
// very well discard Wuffs code for unused codecs, but listing the Wuffs
58
// modules we use makes that process explicit. Preprocessing means that such
59
// code simply isn't compiled.
60
#define WUFFS_CONFIG__MODULES
61
#define WUFFS_CONFIG__MODULE__BASE
62
#define WUFFS_CONFIG__MODULE__CBOR
63
64
// If building this program in an environment that doesn't easily accommodate
65
// relative includes, you can use the script/inline-c-relative-includes.go
66
// program to generate a stand-alone C file.
67
#include "../../../release/c/wuffs-unsupported-snapshot.c"
68
#include "../fuzzlib/fuzzlib.c"
69
70
4.07k
#define TOK_BUFFER_ARRAY_SIZE 4096
71
82.0k
#define STACK_SIZE (WUFFS_CBOR__DECODER_DEPTH_MAX_INCL + 1)
72
73
// Wuffs allows either statically or dynamically allocated work buffers. This
74
// program exercises static allocation.
75
#define WORK_BUFFER_ARRAY_SIZE \
76
706k
  WUFFS_CBOR__DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE
77
#if WORK_BUFFER_ARRAY_SIZE > 0
78
uint8_t g_work_buffer_array[WORK_BUFFER_ARRAY_SIZE];
79
#else
80
// Not all C/C++ compilers support 0-length arrays.
81
uint8_t g_work_buffer_array[1];
82
#endif
83
84
// Each stack element is 1 byte. The low 7 bits denote the container:
85
//  - 0x01 means no container: we are at the top level.
86
//  - 0x02 means a [] list.
87
//  - 0x04 means a {} dictionary.
88
//
89
// The high 0x80 bit holds the even/odd-ness of the number of elements in that
90
// container. A valid dictionary contains key-value pairs and should therefore
91
// contain an even number of elements.
92
typedef uint8_t stack_element;
93
94
bool  //
95
250k
token_is_cbor_tag(wuffs_base__token t) {
96
250k
  return (wuffs_base__token__value_major(&t) ==
97
250k
          WUFFS_CBOR__TOKEN_VALUE_MAJOR) &&
98
250k
         (wuffs_base__token__value_minor(&t) &
99
3.06k
          WUFFS_CBOR__TOKEN_VALUE_MINOR__TAG);
100
250k
}
101
102
const char*  //
103
fuzz_one_token(wuffs_base__token t,
104
               wuffs_base__token prev_token,
105
               wuffs_base__io_buffer* src,
106
               size_t* ti,
107
               stack_element* stack,
108
248k
               size_t* depth) {
109
248k
  uint64_t len = wuffs_base__token__length(&t);
110
248k
  if (len > 0xFFFF) {
111
0
    return "fuzz: internal error: length too long (vs 0xFFFF)";
112
248k
  } else if (len > (src->meta.wi - *ti)) {
113
0
    return "fuzz: internal error: length too long (vs wi - ti)";
114
0
  }
115
248k
  *ti += len;
116
117
248k
  bool is_cbor_tag = token_is_cbor_tag(t);
118
248k
  if (wuffs_base__token__value_extension(&t) >= 0) {
119
1.84k
    if (!wuffs_base__token__continued(&prev_token)) {
120
0
      return "fuzz: internal error: extended token not after continued token";
121
0
    }
122
1.84k
    is_cbor_tag = token_is_cbor_tag(prev_token);
123
1.84k
  }
124
125
248k
  int64_t vbc = wuffs_base__token__value_base_category(&t);
126
248k
  uint64_t vbd = wuffs_base__token__value_base_detail(&t);
127
128
248k
  switch (vbc) {
129
127k
    case WUFFS_BASE__TOKEN__VBC__STRUCTURE: {
130
127k
      bool from_consistent = false;
131
127k
      if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_NONE) {
132
722
        from_consistent = stack[*depth] & 0x01;
133
126k
      } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_LIST) {
134
84.4k
        from_consistent = stack[*depth] & 0x02;
135
84.4k
      } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) {
136
42.1k
        from_consistent = stack[*depth] & 0x04;
137
42.1k
      }
138
127k
      if (!from_consistent) {
139
0
        return "fuzz: internal error: inconsistent VBD__STRUCTURE__FROM_ETC";
140
0
      }
141
142
127k
      if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__PUSH) {
143
82.0k
        (*depth)++;
144
82.0k
        if ((*depth >= STACK_SIZE) || (*depth == 0)) {
145
0
          return "fuzz: internal error: depth too large";
146
0
        }
147
148
82.0k
        if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) {
149
0
          return "fuzz: internal error: push to the 'none' container";
150
82.0k
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
151
75.2k
          stack[*depth] = 0x02;
152
75.2k
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
153
6.76k
          stack[*depth] = 0x04;
154
6.76k
        } else {
155
0
          return "fuzz: internal error: unrecognized VBD__STRUCTURE__TO_ETC";
156
0
        }
157
158
82.0k
      } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP) {
159
45.2k
        if ((vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) &&
160
45.2k
            (0 != (0x80 & stack[*depth]))) {
161
0
          return "fuzz: internal error: dictionary had an incomplete key/value "
162
0
                 "pair";
163
0
        }
164
165
45.2k
        if (*depth <= 0) {
166
0
          return "fuzz: internal error: depth too small";
167
0
        }
168
45.2k
        (*depth)--;
169
170
45.2k
        bool to_consistent = false;
171
45.2k
        if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) {
172
69
          to_consistent = stack[*depth] & 0x01;
173
45.1k
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
174
9.57k
          to_consistent = stack[*depth] & 0x02;
175
35.6k
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
176
35.6k
          to_consistent = stack[*depth] & 0x04;
177
35.6k
        }
178
45.2k
        if (!to_consistent) {
179
0
          return "fuzz: internal error: inconsistent VBD__STRUCTURE__TO_ETC";
180
0
        }
181
182
45.2k
      } else {
183
0
        return "fuzz: internal error: unrecognized VBC__STRUCTURE";
184
0
      }
185
127k
      break;
186
127k
    }
187
188
127k
    case WUFFS_BASE__TOKEN__VBC__STRING: {
189
31.4k
      if (vbd & WUFFS_BASE__TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY) {
190
23.3k
        wuffs_base__slice_u8 s =
191
23.3k
            wuffs_base__make_slice_u8(src->data.ptr + *ti - len, len);
192
23.3k
        if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_UTF_8) &&
193
23.3k
            (s.len != wuffs_base__utf_8__longest_valid_prefix(s.ptr, s.len))) {
194
0
          return "fuzz: internal error: invalid UTF-8";
195
0
        }
196
23.3k
        if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_ASCII) &&
197
23.3k
            (s.len != wuffs_base__ascii__longest_valid_prefix(s.ptr, s.len))) {
198
0
          return "fuzz: internal error: invalid ASCII";
199
0
        }
200
23.3k
      }
201
31.4k
      break;
202
31.4k
    }
203
204
31.4k
    case WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT: {
205
0
      if ((WUFFS_BASE__UNICODE_SURROGATE__MIN_INCL <= vbd) &&
206
0
          (vbd <= WUFFS_BASE__UNICODE_SURROGATE__MAX_INCL)) {
207
0
        return "fuzz: internal error: invalid Unicode surrogate";
208
0
      } else if (WUFFS_BASE__UNICODE_CODE_POINT__MAX_INCL < vbd) {
209
0
        return "fuzz: internal error: invalid Unicode code point";
210
0
      }
211
0
      break;
212
0
    }
213
214
89.5k
    default:
215
89.5k
      break;
216
248k
  }
217
218
  // After a complete CBOR value, update the parity (even/odd count) of the
219
  // container.
220
248k
  if (!wuffs_base__token__continued(&t) &&
221
248k
      (vbc != WUFFS_BASE__TOKEN__VBC__FILLER) &&
222
248k
      ((vbc != WUFFS_BASE__TOKEN__VBC__STRUCTURE) ||
223
219k
       (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP)) &&
224
248k
      !is_cbor_tag) {
225
136k
    stack[*depth] ^= 0x80;
226
136k
  }
227
228
248k
  return NULL;
229
248k
}
230
231
uint64_t  //
232
3.89k
buffer_limit(uint64_t hash, uint64_t min, uint64_t max) {
233
3.89k
  hash &= 0x3F;
234
3.89k
  uint64_t n;
235
3.89k
  if (hash < 0x20) {
236
2.00k
    n = min + hash;
237
2.00k
  } else {
238
1.89k
    n = max - (0x3F - hash);
239
1.89k
  }
240
3.89k
  if (n < min) {
241
0
    return min;
242
3.89k
  } else if (n > max) {
243
0
    return max;
244
0
  }
245
3.89k
  return n;
246
3.89k
}
247
248
const char*  //
249
1.94k
fuzz_complex(wuffs_base__io_buffer* full_src, uint64_t hash) {
250
1.94k
  uint64_t tok_limit = buffer_limit(
251
1.94k
      hash & 0x3F, WUFFS_CBOR__DECODER_DST_TOKEN_BUFFER_LENGTH_MIN_INCL,
252
1.94k
      TOK_BUFFER_ARRAY_SIZE);
253
1.94k
  hash = wuffs_base__u64__rotate_right(hash, 6);
254
255
1.94k
  uint64_t src_limit = buffer_limit(
256
1.94k
      hash & 0x3F, WUFFS_CBOR__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL, 4096);
257
1.94k
  hash = wuffs_base__u64__rotate_right(hash, 6);
258
259
  // ----
260
261
1.94k
  wuffs_cbor__decoder dec;
262
1.94k
  wuffs_base__status status = wuffs_cbor__decoder__initialize(
263
1.94k
      &dec, sizeof dec, WUFFS_VERSION,
264
1.94k
      WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED);
265
1.94k
  if (!wuffs_base__status__is_ok(&status)) {
266
0
    return wuffs_base__status__message(&status);
267
0
  }
268
269
1.94k
  wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE];
270
1.94k
  wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){
271
1.94k
      .data = ((wuffs_base__slice_token){
272
1.94k
          .ptr = tok_array,
273
1.94k
          .len = (size_t)((tok_limit < TOK_BUFFER_ARRAY_SIZE)
274
1.94k
                              ? tok_limit
275
1.94k
                              : TOK_BUFFER_ARRAY_SIZE),
276
1.94k
      }),
277
1.94k
  });
278
279
1.94k
  wuffs_base__token prev_token = wuffs_base__make_token(0);
280
1.94k
  uint32_t no_progress_count = 0;
281
282
1.94k
  stack_element stack[STACK_SIZE];
283
1.94k
  stack[0] = 0x01;  // We start in the 'none' container.
284
1.94k
  size_t depth = 0;
285
286
  // ----
287
288
706k
  while (true) {  // Outer loop.
289
706k
    wuffs_base__io_buffer src = make_limited_reader(*full_src, src_limit);
290
291
706k
    size_t old_tok_wi = tok.meta.wi;
292
706k
    size_t old_tok_ri = tok.meta.ri;
293
706k
    size_t old_src_wi = src.meta.wi;
294
706k
    size_t old_src_ri = src.meta.ri;
295
706k
    size_t ti = old_src_ri;
296
297
706k
    status = wuffs_cbor__decoder__decode_tokens(
298
706k
        &dec, &tok, &src,
299
706k
        wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE));
300
706k
    if ((tok.data.len < tok.meta.wi) ||  //
301
706k
        (tok.meta.wi < tok.meta.ri) ||   //
302
706k
        (tok.meta.ri != old_tok_ri)) {
303
0
      return "fuzz: internal error: inconsistent tok indexes";
304
706k
    } else if ((src.data.len < src.meta.wi) ||  //
305
706k
               (src.meta.wi < src.meta.ri) ||   //
306
706k
               (src.meta.wi != old_src_wi)) {
307
0
      return "fuzz: internal error: inconsistent src indexes";
308
0
    }
309
706k
    full_src->meta.ri += src.meta.ri - old_src_ri;
310
311
706k
    if ((tok.meta.wi > old_tok_wi) || (src.meta.ri > old_src_ri) ||
312
706k
        !wuffs_base__status__is_suspension(&status)) {
313
110k
      no_progress_count = 0;
314
596k
    } else if (no_progress_count < 999) {
315
595k
      no_progress_count++;
316
595k
    } else if (!full_src->meta.closed &&
317
596
               (status.repr == wuffs_base__suspension__short_read)) {
318
596
      return wuffs_base__status__message(&status);
319
596
    } else {
320
0
      return "fuzz: internal error: no progress";
321
0
    }
322
323
    // ----
324
325
954k
    while (tok.meta.ri < tok.meta.wi) {  // Inner loop.
326
248k
      wuffs_base__token t = tok.data.ptr[tok.meta.ri++];
327
248k
      const char* z =
328
248k
          fuzz_one_token(t, prev_token, &src, &ti, &stack[0], &depth);
329
248k
      if (z != NULL) {
330
0
        return z;
331
0
      }
332
248k
      prev_token = t;
333
248k
    }  // Inner loop.
334
335
    // ----
336
337
    // Check that, starting from old_src_ri, summing the token lengths brings
338
    // us to the new src.meta.ri.
339
705k
    if (ti != src.meta.ri) {
340
0
      return "fuzz: internal error: ti != ri";
341
0
    }
342
343
705k
    if (status.repr == NULL) {
344
303
      break;
345
346
705k
    } else if (status.repr == wuffs_base__suspension__short_read) {
347
615k
      if (src.meta.closed) {
348
0
        return "fuzz: internal error: short read on a closed io_reader";
349
0
      }
350
      // We don't compact full_src as it may be mmap'ed read-only.
351
615k
      continue;
352
353
615k
    } else if (status.repr == wuffs_base__suspension__short_write) {
354
88.7k
      wuffs_base__token_buffer__compact(&tok);
355
88.7k
      continue;
356
88.7k
    }
357
358
1.04k
    return wuffs_base__status__message(&status);
359
705k
  }  // Outer loop.
360
361
  // ----
362
363
303
  if (depth != 0) {
364
0
    return "fuzz: internal error: decoded OK but final depth was not zero";
365
303
  } else if (wuffs_base__token__continued(&prev_token)) {
366
0
    return "fuzz: internal error: decoded OK but final token was continued";
367
0
  }
368
303
  return NULL;
369
303
}
370
371
const char*  //
372
153
fuzz_simple(wuffs_base__io_buffer* full_src) {
373
153
  wuffs_cbor__decoder dec;
374
153
  wuffs_base__status status =
375
153
      wuffs_cbor__decoder__initialize(&dec, sizeof dec, WUFFS_VERSION, 0);
376
153
  if (!wuffs_base__status__is_ok(&status)) {
377
0
    return wuffs_base__status__message(&status);
378
0
  }
379
380
153
  wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE];
381
153
  wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){
382
153
      .data = ((wuffs_base__slice_token){
383
153
          .ptr = tok_array,
384
153
          .len = TOK_BUFFER_ARRAY_SIZE,
385
153
      }),
386
153
  });
387
388
354
  while (true) {
389
354
    status = wuffs_cbor__decoder__decode_tokens(
390
354
        &dec, &tok, full_src,
391
354
        wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE));
392
354
    if (status.repr == NULL) {
393
23
      break;
394
395
331
    } else if (status.repr == wuffs_base__suspension__short_write) {
396
201
      tok.meta.ri = tok.meta.wi;
397
201
      wuffs_base__token_buffer__compact(&tok);
398
201
      continue;
399
201
    }
400
401
130
    return wuffs_base__status__message(&status);
402
354
  }
403
404
23
  return NULL;
405
153
}
406
407
const char*  //
408
2.10k
fuzz(wuffs_base__io_buffer* full_src, uint64_t hash) {
409
  // Send 99.6% of inputs to fuzz_complex and the remainder to fuzz_simple. The
410
  // 0xA5 constant is arbitrary but non-zero. If the hash function maps the
411
  // empty input to 0, this still sends the empty input to fuzz_complex.
412
  //
413
  // The fuzz_simple implementation shows how easy decoding with Wuffs is when
414
  // all you want is to run LLVMFuzzerTestOneInput's built-in (Wuffs API
415
  // independent) checks (e.g. the ASan address sanitizer) and you don't really
416
  // care what the output is, just that it doesn't crash.
417
  //
418
  // The fuzz_complex implementation adds many more Wuffs API specific checks
419
  // (e.g. that the sum of the tokens' lengths do not exceed the input length).
420
2.10k
  if ((hash & 0xFF) != 0xA5) {
421
1.94k
    return fuzz_complex(full_src, wuffs_base__u64__rotate_right(hash, 8));
422
1.94k
  }
423
153
  return fuzz_simple(full_src);
424
2.10k
}