/src/wuffs/fuzz/c/std/cbor_fuzzer.c

Source (jump to first uncovered line)
// Copyright 2020 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//
// SPDX-License-Identifier: Apache-2.0 OR MIT

// ----------------

// Silence the nested slash-star warning for the next comment's command line.
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wcomment"

/*
This fuzzer (the fuzz function) is typically run indirectly, by a framework
such as https://github.com/google/oss-fuzz calling LLVMFuzzerTestOneInput.

When working on the fuzz implementation, or as a coherence check, defining
WUFFS_CONFIG__FUZZLIB_MAIN will let you manually run fuzz over a set of files:

gcc -DWUFFS_CONFIG__FUZZLIB_MAIN cbor_fuzzer.c
./a.out ../../../test/data/*.cbor
rm -f ./a.out

It should print "PASS", amongst other information, and exit(0).
*/

#pragma clang diagnostic pop

// Wuffs ships as a "single file C library" or "header file library" as per
// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
//
// To use that single file as a "foo.c"-like implementation, instead of a
// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
// compiling it.
#define WUFFS_IMPLEMENTATION

#if defined(WUFFS_CONFIG__FUZZLIB_MAIN)
// Defining the WUFFS_CONFIG__STATIC_FUNCTIONS macro is optional, but when
// combined with WUFFS_IMPLEMENTATION, it demonstrates making all of Wuffs'
// functions have static storage.
//
// This can help the compiler ignore or discard unused code, which can produce
// faster compiles and smaller binaries. Other motivations are discussed in the
// "ALLOW STATIC IMPLEMENTATION" section of
// https://raw.githubusercontent.com/nothings/stb/master/docs/stb_howto.txt
#define WUFFS_CONFIG__STATIC_FUNCTIONS
#endif  // defined(WUFFS_CONFIG__FUZZLIB_MAIN)

// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
// release/c/etc.c choose which parts of Wuffs to build. That file contains the
// entire Wuffs standard library, implementing a variety of codecs and file
// formats. Without this macro definition, an optimizing compiler or linker may
// very well discard Wuffs code for unused codecs, but listing the Wuffs
// modules we use makes that process explicit. Preprocessing means that such
// code simply isn't compiled.
#define WUFFS_CONFIG__MODULES
#define WUFFS_CONFIG__MODULE__BASE
#define WUFFS_CONFIG__MODULE__CBOR

// If building this program in an environment that doesn't easily accommodate
// relative includes, you can use the script/inline-c-relative-includes.go
// program to generate a stand-alone C file.
#include "../../../release/c/wuffs-unsupported-snapshot.c"
#include "../fuzzlib/fuzzlib.c"

#define TOK_BUFFER_ARRAY_SIZE 4096
#define STACK_SIZE (WUFFS_CBOR__DECODER_DEPTH_MAX_INCL + 1)

// Wuffs allows either statically or dynamically allocated work buffers. This
// program exercises static allocation.
#define WORK_BUFFER_ARRAY_SIZE \
  WUFFS_CBOR__DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE
#if WORK_BUFFER_ARRAY_SIZE > 0
uint8_t g_work_buffer_array[WORK_BUFFER_ARRAY_SIZE];
#else
// Not all C/C++ compilers support 0-length arrays.
uint8_t g_work_buffer_array[1];
#endif

// Each stack element is 1 byte. The low 7 bits denote the container:
//  - 0x01 means no container: we are at the top level.
//  - 0x02 means a [] list.
//  - 0x04 means a {} dictionary.
//
// The high 0x80 bit holds the even/odd-ness of the number of elements in that
// container. A valid dictionary contains key-value pairs and should therefore
// contain an even number of elements.
typedef uint8_t stack_element;

bool  //
token_is_cbor_tag(wuffs_base__token t) {
  return (wuffs_base__token__value_major(&t) ==
          WUFFS_CBOR__TOKEN_VALUE_MAJOR) &&
         (wuffs_base__token__value_minor(&t) &
          WUFFS_CBOR__TOKEN_VALUE_MINOR__TAG);
}

const char*  //
fuzz_one_token(wuffs_base__token t,
               wuffs_base__token prev_token,
               wuffs_base__io_buffer* src,
               size_t* ti,
               stack_element* stack,
               size_t* depth) {
  uint64_t len = wuffs_base__token__length(&t);
  if (len > 0xFFFF) {
    return "fuzz: internal error: length too long (vs 0xFFFF)";
  } else if (len > (src->meta.wi - *ti)) {
    return "fuzz: internal error: length too long (vs wi - ti)";
  }
  *ti += len;

  bool is_cbor_tag = token_is_cbor_tag(t);
  if (wuffs_base__token__value_extension(&t) >= 0) {
    if (!wuffs_base__token__continued(&prev_token)) {
      return "fuzz: internal error: extended token not after continued token";
    }
    is_cbor_tag = token_is_cbor_tag(prev_token);
  }

  int64_t vbc = wuffs_base__token__value_base_category(&t);
  uint64_t vbd = wuffs_base__token__value_base_detail(&t);

  switch (vbc) {
    case WUFFS_BASE__TOKEN__VBC__STRUCTURE: {
      bool from_consistent = false;
      if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_NONE) {
        from_consistent = stack[*depth] & 0x01;
      } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_LIST) {
        from_consistent = stack[*depth] & 0x02;
      } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) {
        from_consistent = stack[*depth] & 0x04;
      }
      if (!from_consistent) {
        return "fuzz: internal error: inconsistent VBD__STRUCTURE__FROM_ETC";
      }

      if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__PUSH) {
        (*depth)++;
        if ((*depth >= STACK_SIZE) || (*depth == 0)) {
          return "fuzz: internal error: depth too large";
        }

        if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) {
          return "fuzz: internal error: push to the 'none' container";
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
          stack[*depth] = 0x02;
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
          stack[*depth] = 0x04;
        } else {
          return "fuzz: internal error: unrecognized VBD__STRUCTURE__TO_ETC";
        }

      } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP) {
        if ((vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) &&
            (0 != (0x80 & stack[*depth]))) {
          return "fuzz: internal error: dictionary had an incomplete key/value "
                 "pair";
        }

        if (*depth <= 0) {
          return "fuzz: internal error: depth too small";
        }
        (*depth)--;

        bool to_consistent = false;
        if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) {
          to_consistent = stack[*depth] & 0x01;
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
          to_consistent = stack[*depth] & 0x02;
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
          to_consistent = stack[*depth] & 0x04;
        }
        if (!to_consistent) {
          return "fuzz: internal error: inconsistent VBD__STRUCTURE__TO_ETC";
        }

      } else {
        return "fuzz: internal error: unrecognized VBC__STRUCTURE";
      }
      break;
    }

    case WUFFS_BASE__TOKEN__VBC__STRING: {
      if (vbd & WUFFS_BASE__TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY) {
        wuffs_base__slice_u8 s =
            wuffs_base__make_slice_u8(src->data.ptr + *ti - len, len);
        if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_UTF_8) &&
            (s.len != wuffs_base__utf_8__longest_valid_prefix(s.ptr, s.len))) {
          return "fuzz: internal error: invalid UTF-8";
        }
        if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_ASCII) &&
            (s.len != wuffs_base__ascii__longest_valid_prefix(s.ptr, s.len))) {
          return "fuzz: internal error: invalid ASCII";
        }
      }
      break;
    }

    case WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT: {
      if ((WUFFS_BASE__UNICODE_SURROGATE__MIN_INCL <= vbd) &&
          (vbd <= WUFFS_BASE__UNICODE_SURROGATE__MAX_INCL)) {
        return "fuzz: internal error: invalid Unicode surrogate";
      } else if (WUFFS_BASE__UNICODE_CODE_POINT__MAX_INCL < vbd) {
        return "fuzz: internal error: invalid Unicode code point";
      }
      break;
    }

    default:
      break;
  }

  // After a complete CBOR value, update the parity (even/odd count) of the
  // container.
  if (!wuffs_base__token__continued(&t) &&
      (vbc != WUFFS_BASE__TOKEN__VBC__FILLER) &&
      ((vbc != WUFFS_BASE__TOKEN__VBC__STRUCTURE) ||
       (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP)) &&
      !is_cbor_tag) {
    stack[*depth] ^= 0x80;
  }

  return NULL;
}

uint64_t  //
buffer_limit(uint64_t hash, uint64_t min, uint64_t max) {
  hash &= 0x3F;
  uint64_t n;
  if (hash < 0x20) {
    n = min + hash;
  } else {
    n = max - (0x3F - hash);
  }
  if (n < min) {
    return min;
  } else if (n > max) {
    return max;
  }
  return n;
}

const char*  //
fuzz_complex(wuffs_base__io_buffer* full_src, uint64_t hash) {
  uint64_t tok_limit = buffer_limit(
      hash & 0x3F, WUFFS_CBOR__DECODER_DST_TOKEN_BUFFER_LENGTH_MIN_INCL,
      TOK_BUFFER_ARRAY_SIZE);
  hash = wuffs_base__u64__rotate_right(hash, 6);

  uint64_t src_limit = buffer_limit(
      hash & 0x3F, WUFFS_CBOR__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL, 4096);
  hash = wuffs_base__u64__rotate_right(hash, 6);

  // ----

  wuffs_cbor__decoder dec;
  wuffs_base__status status = wuffs_cbor__decoder__initialize(
      &dec, sizeof dec, WUFFS_VERSION,
      WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED);
  if (!wuffs_base__status__is_ok(&status)) {
    return wuffs_base__status__message(&status);
  }

  wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE];
  wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){
      .data = ((wuffs_base__slice_token){
          .ptr = tok_array,
          .len = (size_t)((tok_limit < TOK_BUFFER_ARRAY_SIZE)
                              ? tok_limit
                              : TOK_BUFFER_ARRAY_SIZE),
      }),
  });

  wuffs_base__token prev_token = wuffs_base__make_token(0);
  uint32_t no_progress_count = 0;

  stack_element stack[STACK_SIZE];
  stack[0] = 0x01;  // We start in the 'none' container.
  size_t depth = 0;

  // ----

  while (true) {  // Outer loop.
    wuffs_base__io_buffer src = make_limited_reader(*full_src, src_limit);

    size_t old_tok_wi = tok.meta.wi;
    size_t old_tok_ri = tok.meta.ri;
    size_t old_src_wi = src.meta.wi;
    size_t old_src_ri = src.meta.ri;
    size_t ti = old_src_ri;

    status = wuffs_cbor__decoder__decode_tokens(
        &dec, &tok, &src,
        wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE));
    if ((tok.data.len < tok.meta.wi) ||  //
        (tok.meta.wi < tok.meta.ri) ||   //
        (tok.meta.ri != old_tok_ri)) {
      return "fuzz: internal error: inconsistent tok indexes";
    } else if ((src.data.len < src.meta.wi) ||  //
               (src.meta.wi < src.meta.ri) ||   //
               (src.meta.wi != old_src_wi)) {
      return "fuzz: internal error: inconsistent src indexes";
    }
    full_src->meta.ri += src.meta.ri - old_src_ri;

    if ((tok.meta.wi > old_tok_wi) || (src.meta.ri > old_src_ri) ||
        !wuffs_base__status__is_suspension(&status)) {
      no_progress_count = 0;
    } else if (no_progress_count < 999) {
      no_progress_count++;
    } else if (!full_src->meta.closed &&
               (status.repr == wuffs_base__suspension__short_read)) {
      return wuffs_base__status__message(&status);
    } else {
      return "fuzz: internal error: no progress";
    }

    // ----

    while (tok.meta.ri < tok.meta.wi) {  // Inner loop.
      wuffs_base__token t = tok.data.ptr[tok.meta.ri++];
      const char* z =
          fuzz_one_token(t, prev_token, &src, &ti, &stack[0], &depth);
      if (z != NULL) {
        return z;
      }
      prev_token = t;
    }  // Inner loop.

    // ----

    // Check that, starting from old_src_ri, summing the token lengths brings
    // us to the new src.meta.ri.
    if (ti != src.meta.ri) {
      return "fuzz: internal error: ti != ri";
    }

    if (status.repr == NULL) {
      break;

    } else if (status.repr == wuffs_base__suspension__short_read) {
      if (src.meta.closed) {
        return "fuzz: internal error: short read on a closed io_reader";
      }
      // We don't compact full_src as it may be mmap'ed read-only.
      continue;

    } else if (status.repr == wuffs_base__suspension__short_write) {
      wuffs_base__token_buffer__compact(&tok);
      continue;
    }

    return wuffs_base__status__message(&status);
  }  // Outer loop.

  // ----

  if (depth != 0) {
    return "fuzz: internal error: decoded OK but final depth was not zero";
  } else if (wuffs_base__token__continued(&prev_token)) {
    return "fuzz: internal error: decoded OK but final token was continued";
  }
  return NULL;
}

const char*  //
fuzz_simple(wuffs_base__io_buffer* full_src) {
  wuffs_cbor__decoder dec;
  wuffs_base__status status =
      wuffs_cbor__decoder__initialize(&dec, sizeof dec, WUFFS_VERSION, 0);
  if (!wuffs_base__status__is_ok(&status)) {
    return wuffs_base__status__message(&status);
  }

  wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE];
  wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){
      .data = ((wuffs_base__slice_token){
          .ptr = tok_array,
          .len = TOK_BUFFER_ARRAY_SIZE,
      }),
  });

  while (true) {
    status = wuffs_cbor__decoder__decode_tokens(
        &dec, &tok, full_src,
        wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE));
    if (status.repr == NULL) {
      break;

    } else if (status.repr == wuffs_base__suspension__short_write) {
      tok.meta.ri = tok.meta.wi;
      wuffs_base__token_buffer__compact(&tok);
      continue;
    }

    return wuffs_base__status__message(&status);
  }

  return NULL;
}

const char*  //
fuzz(wuffs_base__io_buffer* full_src, uint64_t hash) {
  // Send 99.6% of inputs to fuzz_complex and the remainder to fuzz_simple. The
  // 0xA5 constant is arbitrary but non-zero. If the hash function maps the
  // empty input to 0, this still sends the empty input to fuzz_complex.
  //
  // The fuzz_simple implementation shows how easy decoding with Wuffs is when
  // all you want is to run LLVMFuzzerTestOneInput's built-in (Wuffs API
  // independent) checks (e.g. the ASan address sanitizer) and you don't really
  // care what the output is, just that it doesn't crash.
  //
  // The fuzz_complex implementation adds many more Wuffs API specific checks
  // (e.g. that the sum of the tokens' lengths do not exceed the input length).
  if ((hash & 0xFF) != 0xA5) {
    return fuzz_complex(full_src, wuffs_base__u64__rotate_right(hash, 8));
  }
  return fuzz_simple(full_src);
}

Coverage Report

Created: 2024-10-11 15:22

Line	Count	Source (jump to first uncovered line)
1		// Copyright 2020 The Wuffs Authors.
2		//
3		// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4		// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5		// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6		// option. This file may not be copied, modified, or distributed
7		// except according to those terms.
8		//
9		// SPDX-License-Identifier: Apache-2.0 OR MIT
10
11		// ----------------
12
13		// Silence the nested slash-star warning for the next comment's command line.
14		#pragma clang diagnostic push
15		#pragma clang diagnostic ignored "-Wcomment"
16
17		/*
18		This fuzzer (the fuzz function) is typically run indirectly, by a framework
19		such as https://github.com/google/oss-fuzz calling LLVMFuzzerTestOneInput.
20
21		When working on the fuzz implementation, or as a coherence check, defining
22		WUFFS_CONFIG__FUZZLIB_MAIN will let you manually run fuzz over a set of files:
23
24		gcc -DWUFFS_CONFIG__FUZZLIB_MAIN cbor_fuzzer.c
25		./a.out ../../../test/data/*.cbor
26		rm -f ./a.out
27
28		It should print "PASS", amongst other information, and exit(0).
29		*/
30
31		#pragma clang diagnostic pop
32
33		// Wuffs ships as a "single file C library" or "header file library" as per
34		// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
35		//
36		// To use that single file as a "foo.c"-like implementation, instead of a
37		// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
38		// compiling it.
39		#define WUFFS_IMPLEMENTATION
40
41		#if defined(WUFFS_CONFIG__FUZZLIB_MAIN)
42		// Defining the WUFFS_CONFIG__STATIC_FUNCTIONS macro is optional, but when
43		// combined with WUFFS_IMPLEMENTATION, it demonstrates making all of Wuffs'
44		// functions have static storage.
45		//
46		// This can help the compiler ignore or discard unused code, which can produce
47		// faster compiles and smaller binaries. Other motivations are discussed in the
48		// "ALLOW STATIC IMPLEMENTATION" section of
49		// https://raw.githubusercontent.com/nothings/stb/master/docs/stb_howto.txt
50		#define WUFFS_CONFIG__STATIC_FUNCTIONS
51		#endif // defined(WUFFS_CONFIG__FUZZLIB_MAIN)
52
53		// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
54		// release/c/etc.c choose which parts of Wuffs to build. That file contains the
55		// entire Wuffs standard library, implementing a variety of codecs and file
56		// formats. Without this macro definition, an optimizing compiler or linker may
57		// very well discard Wuffs code for unused codecs, but listing the Wuffs
58		// modules we use makes that process explicit. Preprocessing means that such
59		// code simply isn't compiled.
60		#define WUFFS_CONFIG__MODULES
61		#define WUFFS_CONFIG__MODULE__BASE
62		#define WUFFS_CONFIG__MODULE__CBOR
63
64		// If building this program in an environment that doesn't easily accommodate
65		// relative includes, you can use the script/inline-c-relative-includes.go
66		// program to generate a stand-alone C file.
67		#include "../../../release/c/wuffs-unsupported-snapshot.c"
68		#include "../fuzzlib/fuzzlib.c"
69
70	4.07k	#define TOK_BUFFER_ARRAY_SIZE 4096
71	82.0k	#define STACK_SIZE (WUFFS_CBOR__DECODER_DEPTH_MAX_INCL + 1)
72
73		// Wuffs allows either statically or dynamically allocated work buffers. This
74		// program exercises static allocation.
75		#define WORK_BUFFER_ARRAY_SIZE \
76	706k	WUFFS_CBOR__DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE
77		#if WORK_BUFFER_ARRAY_SIZE > 0
78		uint8_t g_work_buffer_array[WORK_BUFFER_ARRAY_SIZE];
79		#else
80		// Not all C/C++ compilers support 0-length arrays.
81		uint8_t g_work_buffer_array[1];
82		#endif
83
84		// Each stack element is 1 byte. The low 7 bits denote the container:
85		// - 0x01 means no container: we are at the top level.
86		// - 0x02 means a [] list.
87		// - 0x04 means a {} dictionary.
88		//
89		// The high 0x80 bit holds the even/odd-ness of the number of elements in that
90		// container. A valid dictionary contains key-value pairs and should therefore
91		// contain an even number of elements.
92		typedef uint8_t stack_element;
93
94		bool //
95	250k	token_is_cbor_tag(wuffs_base__token t) {
96	250k	return (wuffs_base__token__value_major(&t) ==
97	250k	WUFFS_CBOR__TOKEN_VALUE_MAJOR) &&
98	250k	(wuffs_base__token__value_minor(&t) &
99	3.06k	WUFFS_CBOR__TOKEN_VALUE_MINOR__TAG);
100	250k	}
101
102		const char* //
103		fuzz_one_token(wuffs_base__token t,
104		wuffs_base__token prev_token,
105		wuffs_base__io_buffer* src,
106		size_t* ti,
107		stack_element* stack,
108	248k	size_t* depth) {
109	248k	uint64_t len = wuffs_base__token__length(&t);
110	248k	if (len > 0xFFFF) {
111	0	return "fuzz: internal error: length too long (vs 0xFFFF)";
112	248k	} else if (len > (src->meta.wi - *ti)) {
113	0	return "fuzz: internal error: length too long (vs wi - ti)";
114	0	}
115	248k	*ti += len;
116
117	248k	bool is_cbor_tag = token_is_cbor_tag(t);
118	248k	if (wuffs_base__token__value_extension(&t) >= 0) {
119	1.84k	if (!wuffs_base__token__continued(&prev_token)) {
120	0	return "fuzz: internal error: extended token not after continued token";
121	0	}
122	1.84k	is_cbor_tag = token_is_cbor_tag(prev_token);
123	1.84k	}
124
125	248k	int64_t vbc = wuffs_base__token__value_base_category(&t);
126	248k	uint64_t vbd = wuffs_base__token__value_base_detail(&t);
127
128	248k	switch (vbc) {
129	127k	case WUFFS_BASE__TOKEN__VBC__STRUCTURE: {
130	127k	bool from_consistent = false;
131	127k	if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_NONE) {
132	722	from_consistent = stack[*depth] & 0x01;
133	126k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_LIST) {
134	84.4k	from_consistent = stack[*depth] & 0x02;
135	84.4k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) {
136	42.1k	from_consistent = stack[*depth] & 0x04;
137	42.1k	}
138	127k	if (!from_consistent) {
139	0	return "fuzz: internal error: inconsistent VBD__STRUCTURE__FROM_ETC";
140	0	}
141
142	127k	if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__PUSH) {
143	82.0k	(*depth)++;
144	82.0k	if ((depth >= STACK_SIZE) \|\| (depth == 0)) {
145	0	return "fuzz: internal error: depth too large";
146	0	}
147
148	82.0k	if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) {
149	0	return "fuzz: internal error: push to the 'none' container";
150	82.0k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
151	75.2k	stack[*depth] = 0x02;
152	75.2k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
153	6.76k	stack[*depth] = 0x04;
154	6.76k	} else {
155	0	return "fuzz: internal error: unrecognized VBD__STRUCTURE__TO_ETC";
156	0	}
157
158	82.0k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP) {
159	45.2k	if ((vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) &&
160	45.2k	(0 != (0x80 & stack[*depth]))) {
161	0	return "fuzz: internal error: dictionary had an incomplete key/value "
162	0	"pair";
163	0	}
164
165	45.2k	if (*depth <= 0) {
166	0	return "fuzz: internal error: depth too small";
167	0	}
168	45.2k	(*depth)--;
169
170	45.2k	bool to_consistent = false;
171	45.2k	if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) {
172	69	to_consistent = stack[*depth] & 0x01;
173	45.1k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
174	9.57k	to_consistent = stack[*depth] & 0x02;
175	35.6k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
176	35.6k	to_consistent = stack[*depth] & 0x04;
177	35.6k	}
178	45.2k	if (!to_consistent) {
179	0	return "fuzz: internal error: inconsistent VBD__STRUCTURE__TO_ETC";
180	0	}
181
182	45.2k	} else {
183	0	return "fuzz: internal error: unrecognized VBC__STRUCTURE";
184	0	}
185	127k	break;
186	127k	}
187
188	127k	case WUFFS_BASE__TOKEN__VBC__STRING: {
189	31.4k	if (vbd & WUFFS_BASE__TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY) {
190	23.3k	wuffs_base__slice_u8 s =
191	23.3k	wuffs_base__make_slice_u8(src->data.ptr + *ti - len, len);
192	23.3k	if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_UTF_8) &&
193	23.3k	(s.len != wuffs_base__utf_8__longest_valid_prefix(s.ptr, s.len))) {
194	0	return "fuzz: internal error: invalid UTF-8";
195	0	}
196	23.3k	if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_ASCII) &&
197	23.3k	(s.len != wuffs_base__ascii__longest_valid_prefix(s.ptr, s.len))) {
198	0	return "fuzz: internal error: invalid ASCII";
199	0	}
200	23.3k	}
201	31.4k	break;
202	31.4k	}
203
204	31.4k	case WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT: {
205	0	if ((WUFFS_BASE__UNICODE_SURROGATE__MIN_INCL <= vbd) &&
206	0	(vbd <= WUFFS_BASE__UNICODE_SURROGATE__MAX_INCL)) {
207	0	return "fuzz: internal error: invalid Unicode surrogate";
208	0	} else if (WUFFS_BASE__UNICODE_CODE_POINT__MAX_INCL < vbd) {
209	0	return "fuzz: internal error: invalid Unicode code point";
210	0	}
211	0	break;
212	0	}
213
214	89.5k	default:
215	89.5k	break;
216	248k	}
217
218		// After a complete CBOR value, update the parity (even/odd count) of the
219		// container.
220	248k	if (!wuffs_base__token__continued(&t) &&
221	248k	(vbc != WUFFS_BASE__TOKEN__VBC__FILLER) &&
222	248k	((vbc != WUFFS_BASE__TOKEN__VBC__STRUCTURE) \|\|
223	219k	(vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP)) &&
224	248k	!is_cbor_tag) {
225	136k	stack[*depth] ^= 0x80;
226	136k	}
227
228	248k	return NULL;
229	248k	}
230
231		uint64_t //
232	3.89k	buffer_limit(uint64_t hash, uint64_t min, uint64_t max) {
233	3.89k	hash &= 0x3F;
234	3.89k	uint64_t n;
235	3.89k	if (hash < 0x20) {
236	2.00k	n = min + hash;
237	2.00k	} else {
238	1.89k	n = max - (0x3F - hash);
239	1.89k	}
240	3.89k	if (n < min) {
241	0	return min;
242	3.89k	} else if (n > max) {
243	0	return max;
244	0	}
245	3.89k	return n;
246	3.89k	}
247
248		const char* //
249	1.94k	fuzz_complex(wuffs_base__io_buffer* full_src, uint64_t hash) {
250	1.94k	uint64_t tok_limit = buffer_limit(
251	1.94k	hash & 0x3F, WUFFS_CBOR__DECODER_DST_TOKEN_BUFFER_LENGTH_MIN_INCL,
252	1.94k	TOK_BUFFER_ARRAY_SIZE);
253	1.94k	hash = wuffs_base__u64__rotate_right(hash, 6);
254
255	1.94k	uint64_t src_limit = buffer_limit(
256	1.94k	hash & 0x3F, WUFFS_CBOR__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL, 4096);
257	1.94k	hash = wuffs_base__u64__rotate_right(hash, 6);
258
259		// ----
260
261	1.94k	wuffs_cbor__decoder dec;
262	1.94k	wuffs_base__status status = wuffs_cbor__decoder__initialize(
263	1.94k	&dec, sizeof dec, WUFFS_VERSION,
264	1.94k	WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED);
265	1.94k	if (!wuffs_base__status__is_ok(&status)) {
266	0	return wuffs_base__status__message(&status);
267	0	}
268
269	1.94k	wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE];
270	1.94k	wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){
271	1.94k	.data = ((wuffs_base__slice_token){
272	1.94k	.ptr = tok_array,
273	1.94k	.len = (size_t)((tok_limit < TOK_BUFFER_ARRAY_SIZE)
274	1.94k	? tok_limit
275	1.94k	: TOK_BUFFER_ARRAY_SIZE),
276	1.94k	}),
277	1.94k	});
278
279	1.94k	wuffs_base__token prev_token = wuffs_base__make_token(0);
280	1.94k	uint32_t no_progress_count = 0;
281
282	1.94k	stack_element stack[STACK_SIZE];
283	1.94k	stack[0] = 0x01; // We start in the 'none' container.
284	1.94k	size_t depth = 0;
285
286		// ----
287
288	706k	while (true) { // Outer loop.
289	706k	wuffs_base__io_buffer src = make_limited_reader(*full_src, src_limit);
290
291	706k	size_t old_tok_wi = tok.meta.wi;
292	706k	size_t old_tok_ri = tok.meta.ri;
293	706k	size_t old_src_wi = src.meta.wi;
294	706k	size_t old_src_ri = src.meta.ri;
295	706k	size_t ti = old_src_ri;
296
297	706k	status = wuffs_cbor__decoder__decode_tokens(
298	706k	&dec, &tok, &src,
299	706k	wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE));
300	706k	if ((tok.data.len < tok.meta.wi) \|\| //
301	706k	(tok.meta.wi < tok.meta.ri) \|\| //
302	706k	(tok.meta.ri != old_tok_ri)) {
303	0	return "fuzz: internal error: inconsistent tok indexes";
304	706k	} else if ((src.data.len < src.meta.wi) \|\| //
305	706k	(src.meta.wi < src.meta.ri) \|\| //
306	706k	(src.meta.wi != old_src_wi)) {
307	0	return "fuzz: internal error: inconsistent src indexes";
308	0	}
309	706k	full_src->meta.ri += src.meta.ri - old_src_ri;
310
311	706k	if ((tok.meta.wi > old_tok_wi) \|\| (src.meta.ri > old_src_ri) \|\|
312	706k	!wuffs_base__status__is_suspension(&status)) {
313	110k	no_progress_count = 0;
314	596k	} else if (no_progress_count < 999) {
315	595k	no_progress_count++;
316	595k	} else if (!full_src->meta.closed &&
317	596	(status.repr == wuffs_base__suspension__short_read)) {
318	596	return wuffs_base__status__message(&status);
319	596	} else {
320	0	return "fuzz: internal error: no progress";
321	0	}
322
323		// ----
324
325	954k	while (tok.meta.ri < tok.meta.wi) { // Inner loop.
326	248k	wuffs_base__token t = tok.data.ptr[tok.meta.ri++];
327	248k	const char* z =
328	248k	fuzz_one_token(t, prev_token, &src, &ti, &stack[0], &depth);
329	248k	if (z != NULL) {
330	0	return z;
331	0	}
332	248k	prev_token = t;
333	248k	} // Inner loop.
334
335		// ----
336
337		// Check that, starting from old_src_ri, summing the token lengths brings
338		// us to the new src.meta.ri.
339	705k	if (ti != src.meta.ri) {
340	0	return "fuzz: internal error: ti != ri";
341	0	}
342
343	705k	if (status.repr == NULL) {
344	303	break;
345
346	705k	} else if (status.repr == wuffs_base__suspension__short_read) {
347	615k	if (src.meta.closed) {
348	0	return "fuzz: internal error: short read on a closed io_reader";
349	0	}
350		// We don't compact full_src as it may be mmap'ed read-only.
351	615k	continue;
352
353	615k	} else if (status.repr == wuffs_base__suspension__short_write) {
354	88.7k	wuffs_base__token_buffer__compact(&tok);
355	88.7k	continue;
356	88.7k	}
357
358	1.04k	return wuffs_base__status__message(&status);
359	705k	} // Outer loop.
360
361		// ----
362
363	303	if (depth != 0) {
364	0	return "fuzz: internal error: decoded OK but final depth was not zero";
365	303	} else if (wuffs_base__token__continued(&prev_token)) {
366	0	return "fuzz: internal error: decoded OK but final token was continued";
367	0	}
368	303	return NULL;
369	303	}
370
371		const char* //
372	153	fuzz_simple(wuffs_base__io_buffer* full_src) {
373	153	wuffs_cbor__decoder dec;
374	153	wuffs_base__status status =
375	153	wuffs_cbor__decoder__initialize(&dec, sizeof dec, WUFFS_VERSION, 0);
376	153	if (!wuffs_base__status__is_ok(&status)) {
377	0	return wuffs_base__status__message(&status);
378	0	}
379
380	153	wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE];
381	153	wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){
382	153	.data = ((wuffs_base__slice_token){
383	153	.ptr = tok_array,
384	153	.len = TOK_BUFFER_ARRAY_SIZE,
385	153	}),
386	153	});
387
388	354	while (true) {
389	354	status = wuffs_cbor__decoder__decode_tokens(
390	354	&dec, &tok, full_src,
391	354	wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE));
392	354	if (status.repr == NULL) {
393	23	break;
394
395	331	} else if (status.repr == wuffs_base__suspension__short_write) {
396	201	tok.meta.ri = tok.meta.wi;
397	201	wuffs_base__token_buffer__compact(&tok);
398	201	continue;
399	201	}
400
401	130	return wuffs_base__status__message(&status);
402	354	}
403
404	23	return NULL;
405	153	}
406
407		const char* //
408	2.10k	fuzz(wuffs_base__io_buffer* full_src, uint64_t hash) {
409		// Send 99.6% of inputs to fuzz_complex and the remainder to fuzz_simple. The
410		// 0xA5 constant is arbitrary but non-zero. If the hash function maps the
411		// empty input to 0, this still sends the empty input to fuzz_complex.
412		//
413		// The fuzz_simple implementation shows how easy decoding with Wuffs is when
414		// all you want is to run LLVMFuzzerTestOneInput's built-in (Wuffs API
415		// independent) checks (e.g. the ASan address sanitizer) and you don't really
416		// care what the output is, just that it doesn't crash.
417		//
418		// The fuzz_complex implementation adds many more Wuffs API specific checks
419		// (e.g. that the sum of the tokens' lengths do not exceed the input length).
420	2.10k	if ((hash & 0xFF) != 0xA5) {
421	1.94k	return fuzz_complex(full_src, wuffs_base__u64__rotate_right(hash, 8));
422	1.94k	}
423	153	return fuzz_simple(full_src);
424	2.10k	}