/src/wuffs/fuzz/c/std/json_fuzzer.cc

Source
// Copyright 2020 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//
// SPDX-License-Identifier: Apache-2.0 OR MIT

// ----------------

// Silence the nested slash-star warning for the next comment's command line.
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wcomment"

/*
This fuzzer (the fuzz function) is typically run indirectly, by a framework
such as https://github.com/google/oss-fuzz calling LLVMFuzzerTestOneInput.

When working on the fuzz implementation, or as a coherence check, defining
WUFFS_CONFIG__FUZZLIB_MAIN will let you manually run fuzz over a set of files:

gcc -DWUFFS_CONFIG__FUZZLIB_MAIN json_fuzzer.c
./a.out ../../../test/data/*.json
rm -f ./a.out

It should print "PASS", amongst other information, and exit(0).
*/

#pragma clang diagnostic pop

// Wuffs ships as a "single file C library" or "header file library" as per
// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
//
// To use that single file as a "foo.c"-like implementation, instead of a
// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
// compiling it.
#define WUFFS_IMPLEMENTATION

#if defined(WUFFS_CONFIG__FUZZLIB_MAIN)
// Defining the WUFFS_CONFIG__STATIC_FUNCTIONS macro is optional, but when
// combined with WUFFS_IMPLEMENTATION, it demonstrates making all of Wuffs'
// functions have static storage.
//
// This can help the compiler ignore or discard unused code, which can produce
// faster compiles and smaller binaries. Other motivations are discussed in the
// "ALLOW STATIC IMPLEMENTATION" section of
// https://raw.githubusercontent.com/nothings/stb/master/docs/stb_howto.txt
#define WUFFS_CONFIG__STATIC_FUNCTIONS
#endif  // defined(WUFFS_CONFIG__FUZZLIB_MAIN)

// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
// release/c/etc.c choose which parts of Wuffs to build. That file contains the
// entire Wuffs standard library, implementing a variety of codecs and file
// formats. Without this macro definition, an optimizing compiler or linker may
// very well discard Wuffs code for unused codecs, but listing the Wuffs
// modules we use makes that process explicit. Preprocessing means that such
// code simply isn't compiled.
#define WUFFS_CONFIG__MODULES
#define WUFFS_CONFIG__MODULE__AUX__BASE
#define WUFFS_CONFIG__MODULE__AUX__JSON
#define WUFFS_CONFIG__MODULE__BASE
#define WUFFS_CONFIG__MODULE__JSON

// If building this program in an environment that doesn't easily accommodate
// relative includes, you can use the script/inline-c-relative-includes.go
// program to generate a stand-alone C file.
#include "../../../release/c/wuffs-unsupported-snapshot.c"
#include "../fuzzlib/fuzzlib.c"

#define TOK_BUFFER_ARRAY_SIZE 4096
#define STACK_SIZE (WUFFS_JSON__DECODER_DEPTH_MAX_INCL + 1)

// Wuffs allows either statically or dynamically allocated work buffers. This
// program exercises static allocation.
#define WORK_BUFFER_ARRAY_SIZE \
  WUFFS_JSON__DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE
#if WORK_BUFFER_ARRAY_SIZE > 0
uint8_t g_work_buffer_array[WORK_BUFFER_ARRAY_SIZE];
#else
// Not all C/C++ compilers support 0-length arrays.
uint8_t g_work_buffer_array[1];
#endif

// Each stack element is 1 byte. The low 7 bits denote the container:
//  - 0x01 means no container: we are at the top level.
//  - 0x02 means a [] list.
//  - 0x04 means a {} dictionary.
//
// The high 0x80 bit holds the even/odd-ness of the number of elements in that
// container. A valid dictionary contains key-value pairs and should therefore
// contain an even number of elements.
typedef uint8_t stack_element;

const char*  //
fuzz_one_token(wuffs_base__token t,
               wuffs_base__token prev_token,
               wuffs_base__io_buffer* src,
               size_t* ti,
               stack_element* stack,
               size_t* depth) {
  uint64_t len = wuffs_base__token__length(&t);
  if (len > 0xFFFF) {
    return "fuzz: internal error: length too long (vs 0xFFFF)";
  } else if (len > (src->meta.wi - *ti)) {
    return "fuzz: internal error: length too long (vs wi - ti)";
  }
  *ti += len;

  if ((wuffs_base__token__value_extension(&t) >= 0) &&
      !wuffs_base__token__continued(&prev_token)) {
    return "fuzz: internal error: extended token not after continued token";
  }

  int64_t vbc = wuffs_base__token__value_base_category(&t);
  uint64_t vbd = wuffs_base__token__value_base_detail(&t);

  switch (vbc) {
    case WUFFS_BASE__TOKEN__VBC__STRUCTURE: {
      bool from_consistent = false;
      if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_NONE) {
        from_consistent = stack[*depth] & 0x01;
      } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_LIST) {
        from_consistent = stack[*depth] & 0x02;
      } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) {
        from_consistent = stack[*depth] & 0x04;
      }
      if (!from_consistent) {
        return "fuzz: internal error: inconsistent VBD__STRUCTURE__FROM_ETC";
      }

      if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__PUSH) {
        (*depth)++;
        if ((*depth >= STACK_SIZE) || (*depth == 0)) {
          return "fuzz: internal error: depth too large";
        }

        if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) {
          return "fuzz: internal error: push to the 'none' container";
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
          stack[*depth] = 0x02;
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
          stack[*depth] = 0x04;
        } else {
          return "fuzz: internal error: unrecognized VBD__STRUCTURE__TO_ETC";
        }

      } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP) {
        if ((vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) &&
            (0 != (0x80 & stack[*depth]))) {
          return "fuzz: internal error: dictionary had an incomplete key/value "
                 "pair";
        }

        if (*depth <= 0) {
          return "fuzz: internal error: depth too small";
        }
        (*depth)--;

        bool to_consistent = false;
        if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) {
          to_consistent = stack[*depth] & 0x01;
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
          to_consistent = stack[*depth] & 0x02;
        } else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
          to_consistent = stack[*depth] & 0x04;
        }
        if (!to_consistent) {
          return "fuzz: internal error: inconsistent VBD__STRUCTURE__TO_ETC";
        }

      } else {
        return "fuzz: internal error: unrecognized VBC__STRUCTURE";
      }
      break;
    }

    case WUFFS_BASE__TOKEN__VBC__STRING: {
      if (vbd & WUFFS_BASE__TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY) {
        wuffs_base__slice_u8 s =
            wuffs_base__make_slice_u8(src->data.ptr + *ti - len, len);
        if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_UTF_8) &&
            (s.len != wuffs_base__utf_8__longest_valid_prefix(s.ptr, s.len))) {
          return "fuzz: internal error: invalid UTF-8";
        }
        if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_ASCII) &&
            (s.len != wuffs_base__ascii__longest_valid_prefix(s.ptr, s.len))) {
          return "fuzz: internal error: invalid ASCII";
        }
      }
      break;
    }

    case WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT: {
      if ((WUFFS_BASE__UNICODE_SURROGATE__MIN_INCL <= vbd) &&
          (vbd <= WUFFS_BASE__UNICODE_SURROGATE__MAX_INCL)) {
        return "fuzz: internal error: invalid Unicode surrogate";
      } else if (WUFFS_BASE__UNICODE_CODE_POINT__MAX_INCL < vbd) {
        return "fuzz: internal error: invalid Unicode code point";
      }
      break;
    }

    default:
      break;
  }

  // After a complete JSON value, update the parity (even/odd count) of the
  // container.
  if (!wuffs_base__token__continued(&t) &&
      (vbc != WUFFS_BASE__TOKEN__VBC__FILLER) &&
      ((vbc != WUFFS_BASE__TOKEN__VBC__STRUCTURE) ||
       (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP))) {
    stack[*depth] ^= 0x80;
  }

  return NULL;
}

uint64_t  //
buffer_limit(uint64_t hash, uint64_t min, uint64_t max) {
  hash &= 0x3F;
  uint64_t n;
  if (hash < 0x20) {
    n = min + hash;
  } else {
    n = max - (0x3F - hash);
  }
  if (n < min) {
    return min;
  } else if (n > max) {
    return max;
  }
  return n;
}

uint32_t g_quirks[] = {
    WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_A,
    WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_CAPITAL_U,
    WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_E,
    WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_QUESTION_MARK,
    WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_SINGLE_QUOTE,
    WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_V,
    WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_X_AS_CODE_POINTS,
    WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_ZERO,
    WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK,
    WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE,
    WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA,
    WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS,
    WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR,
    WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK,
    WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER,
    WUFFS_JSON__QUIRK_JSON_POINTER_ALLOW_TILDE_N_TILDE_R_TILDE_T,
    WUFFS_JSON__QUIRK_REPLACE_INVALID_UNICODE,
    0,
};

void set_quirks(wuffs_json__decoder* dec, uint64_t hash) {
  for (uint32_t i = 0; g_quirks[i]; i++) {
    uint64_t bit = 1 << (i & 63);
    if (hash & bit) {
      wuffs_json__decoder__set_quirk(dec, g_quirks[i], 1);
    }
  }
}

const char*  //
fuzz_complex(wuffs_base__io_buffer* full_src, uint64_t hash) {
  uint64_t tok_limit = buffer_limit(
      hash & 0x3F, WUFFS_JSON__DECODER_DST_TOKEN_BUFFER_LENGTH_MIN_INCL,
      TOK_BUFFER_ARRAY_SIZE);
  hash = wuffs_base__u64__rotate_right(hash, 6);

  uint64_t src_limit = buffer_limit(
      hash & 0x3F, WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL, 4096);
  hash = wuffs_base__u64__rotate_right(hash, 6);

  // ----

  wuffs_json__decoder dec;
  wuffs_base__status status = wuffs_json__decoder__initialize(
      &dec, sizeof dec, WUFFS_VERSION,
      WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED);
  if (!wuffs_base__status__is_ok(&status)) {
    return wuffs_base__status__message(&status);
  }
  set_quirks(&dec, hash);

  wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE];
  wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){
      .data = ((wuffs_base__slice_token){
          .ptr = tok_array,
          .len = (size_t)((tok_limit < TOK_BUFFER_ARRAY_SIZE)
                              ? tok_limit
                              : TOK_BUFFER_ARRAY_SIZE),
      }),
  });

  wuffs_base__token prev_token = wuffs_base__make_token(0);
  uint32_t no_progress_count = 0;

  stack_element stack[STACK_SIZE];
  stack[0] = 0x01;  // We start in the 'none' container.
  size_t depth = 0;

  // ----

  while (true) {  // Outer loop.
    wuffs_base__io_buffer src = make_limited_reader(*full_src, src_limit);

    size_t old_tok_wi = tok.meta.wi;
    size_t old_tok_ri = tok.meta.ri;
    size_t old_src_wi = src.meta.wi;
    size_t old_src_ri = src.meta.ri;
    size_t ti = old_src_ri;

    status = wuffs_json__decoder__decode_tokens(
        &dec, &tok, &src,
        wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE));
    if ((tok.data.len < tok.meta.wi) ||  //
        (tok.meta.wi < tok.meta.ri) ||   //
        (tok.meta.ri != old_tok_ri)) {
      return "fuzz: internal error: inconsistent tok indexes";
    } else if ((src.data.len < src.meta.wi) ||  //
               (src.meta.wi < src.meta.ri) ||   //
               (src.meta.wi != old_src_wi)) {
      return "fuzz: internal error: inconsistent src indexes";
    }
    full_src->meta.ri += src.meta.ri - old_src_ri;

    if ((tok.meta.wi > old_tok_wi) || (src.meta.ri > old_src_ri) ||
        !wuffs_base__status__is_suspension(&status)) {
      no_progress_count = 0;
    } else if (no_progress_count < 999) {
      no_progress_count++;
    } else if (!full_src->meta.closed &&
               (status.repr == wuffs_base__suspension__short_read)) {
      return wuffs_base__status__message(&status);
    } else {
      return "fuzz: internal error: no progress";
    }

    // ----

    while (tok.meta.ri < tok.meta.wi) {  // Inner loop.
      wuffs_base__token t = tok.data.ptr[tok.meta.ri++];
      const char* z =
          fuzz_one_token(t, prev_token, &src, &ti, &stack[0], &depth);
      if (z != NULL) {
        return z;
      }
      prev_token = t;
    }  // Inner loop.

    // ----

    // Check that, starting from old_src_ri, summing the token lengths brings
    // us to the new src.meta.ri.
    if (ti != src.meta.ri) {
      return "fuzz: internal error: ti != ri";
    }

    if (status.repr == NULL) {
      break;

    } else if (status.repr == wuffs_base__suspension__short_read) {
      if (src.meta.closed) {
        return "fuzz: internal error: short read on a closed io_reader";
      }
      // We don't compact full_src as it may be mmap'ed read-only.
      continue;

    } else if (status.repr == wuffs_base__suspension__short_write) {
      wuffs_base__token_buffer__compact(&tok);
      continue;
    }

    return wuffs_base__status__message(&status);
  }  // Outer loop.

  // ----

  if (depth != 0) {
    return "fuzz: internal error: decoded OK but final depth was not zero";
  } else if (wuffs_base__token__continued(&prev_token)) {
    return "fuzz: internal error: decoded OK but final token was continued";
  }
  return NULL;
}

const char*  //
fuzz_simple(wuffs_base__io_buffer* full_src) {
  wuffs_json__decoder dec;
  wuffs_base__status status =
      wuffs_json__decoder__initialize(&dec, sizeof dec, WUFFS_VERSION, 0);
  if (!wuffs_base__status__is_ok(&status)) {
    return wuffs_base__status__message(&status);
  }

  wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE];
  wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){
      .data = ((wuffs_base__slice_token){
          .ptr = tok_array,
          .len = TOK_BUFFER_ARRAY_SIZE,
      }),
  });

  while (true) {
    status = wuffs_json__decoder__decode_tokens(
        &dec, &tok, full_src,
        wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE));
    if (status.repr == NULL) {
      break;

    } else if (status.repr == wuffs_base__suspension__short_write) {
      tok.meta.ri = tok.meta.wi;
      wuffs_base__token_buffer__compact(&tok);
      continue;
    }

    return wuffs_base__status__message(&status);
  }

  return NULL;
}

#if defined(__cplusplus)
#include <vector>

class Callbacks : public wuffs_aux::DecodeJsonCallbacks {
 public:
  Callbacks() : m_depth(0) {}

  std::string AppendNull() override { return ""; }

  std::string AppendBool(bool val) override { return ""; }

  std::string AppendI64(int64_t val) override { return ""; }

  std::string AppendF64(double val) override { return ""; }

  std::string AppendTextString(std::string&& val) override { return ""; }

  std::string Push(uint32_t flags) override {
    m_depth++;
    return "";
  }

  std::string Pop(uint32_t flags) override {
    m_depth--;
    if (m_depth < 0) {
      fprintf(stderr, "negative depth\n");
      intentional_segfault();
    }
    return "";
  }

  void Done(wuffs_aux::DecodeJsonResult& result,
            wuffs_aux::sync_io::Input& input,
            wuffs_aux::IOBuffer& buffer) override {
    if (result.error_message.empty()) {
      if (m_depth != 0) {
        fprintf(stderr, "no error message but final depth is non-zero\n");
        intentional_segfault();
      }
    } else if (result.error_message.find("internal error:") !=
               std::string::npos) {
      fprintf(stderr, "internal errors shouldn't occur: \"%s\"\n",
              result.error_message.c_str());
      intentional_segfault();
    }
  }

 private:
  int64_t m_depth;
};

void  //
fuzz_cpp(const uint8_t* in_ptr, size_t in_len, uint64_t hash) {
  static const char* json_pointers[16] = {
      "",           //
      "",           //
      "",           //
      "",           //
      "",           //
      "",           //
      "",           //
      "",           //
      "",           //
      "",           //
      "/",          //
      "/2/3/4/5",   //
      "/k0",        //
      "/k0/1",      //
      "/x/y",       //
      "/~0/~1/~n",  //
  };
  const char* json_pointer = json_pointers[hash & 15];
  hash = wuffs_base__u64__rotate_right(hash, 4);

  std::vector<wuffs_aux::QuirkKeyValuePair> quirks;
  for (uint32_t i = 0; g_quirks[i]; i++) {
    uint64_t bit = 1 << (i & 63);
    if (hash & bit) {
      quirks.push_back({g_quirks[i], 1});
    }
  }

  Callbacks callbacks;
  wuffs_aux::sync_io::MemoryInput input(in_ptr, in_len);
  wuffs_aux::DecodeJson(
      callbacks, input,
      wuffs_aux::DecodeJsonArgQuirks(quirks.data(), quirks.size()),
      wuffs_aux::DecodeJsonArgJsonPointer(json_pointer));
}
#endif  // defined(__cplusplus)

const char*  //
fuzz(wuffs_base__io_buffer* full_src, uint64_t hash) {
#if defined(__cplusplus)
  fuzz_cpp(full_src->reader_pointer(), full_src->reader_length(),
           wuffs_base__u64__rotate_right(hash, 32));
#endif  // defined(__cplusplus)

  // Send 99.6% of inputs to fuzz_complex and the remainder to fuzz_simple. The
  // 0xA5 constant is arbitrary but non-zero. If the hash function maps the
  // empty input to 0, this still sends the empty input to fuzz_complex.
  //
  // The fuzz_simple implementation shows how easy decoding with Wuffs is when
  // all you want is to run LLVMFuzzerTestOneInput's built-in (Wuffs API
  // independent) checks (e.g. the ASan address sanitizer) and you don't really
  // care what the output is, just that it doesn't crash.
  //
  // The fuzz_complex implementation adds many more Wuffs API specific checks
  // (e.g. that the sum of the tokens' lengths do not exceed the input length).
  if ((hash & 0xFF) != 0xA5) {
    return fuzz_complex(full_src, wuffs_base__u64__rotate_right(hash, 8));
  }
  return fuzz_simple(full_src);
}

Coverage Report

Created: 2025-10-28 06:37

Line	Count	Source
1		// Copyright 2020 The Wuffs Authors.
2		//
3		// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4		// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5		// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6		// option. This file may not be copied, modified, or distributed
7		// except according to those terms.
8		//
9		// SPDX-License-Identifier: Apache-2.0 OR MIT
10
11		// ----------------
12
13		// Silence the nested slash-star warning for the next comment's command line.
14		#pragma clang diagnostic push
15		#pragma clang diagnostic ignored "-Wcomment"
16
17		/*
18		This fuzzer (the fuzz function) is typically run indirectly, by a framework
19		such as https://github.com/google/oss-fuzz calling LLVMFuzzerTestOneInput.
20
21		When working on the fuzz implementation, or as a coherence check, defining
22		WUFFS_CONFIG__FUZZLIB_MAIN will let you manually run fuzz over a set of files:
23
24		gcc -DWUFFS_CONFIG__FUZZLIB_MAIN json_fuzzer.c
25		./a.out ../../../test/data/*.json
26		rm -f ./a.out
27
28		It should print "PASS", amongst other information, and exit(0).
29		*/
30
31		#pragma clang diagnostic pop
32
33		// Wuffs ships as a "single file C library" or "header file library" as per
34		// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
35		//
36		// To use that single file as a "foo.c"-like implementation, instead of a
37		// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
38		// compiling it.
39		#define WUFFS_IMPLEMENTATION
40
41		#if defined(WUFFS_CONFIG__FUZZLIB_MAIN)
42		// Defining the WUFFS_CONFIG__STATIC_FUNCTIONS macro is optional, but when
43		// combined with WUFFS_IMPLEMENTATION, it demonstrates making all of Wuffs'
44		// functions have static storage.
45		//
46		// This can help the compiler ignore or discard unused code, which can produce
47		// faster compiles and smaller binaries. Other motivations are discussed in the
48		// "ALLOW STATIC IMPLEMENTATION" section of
49		// https://raw.githubusercontent.com/nothings/stb/master/docs/stb_howto.txt
50		#define WUFFS_CONFIG__STATIC_FUNCTIONS
51		#endif // defined(WUFFS_CONFIG__FUZZLIB_MAIN)
52
53		// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
54		// release/c/etc.c choose which parts of Wuffs to build. That file contains the
55		// entire Wuffs standard library, implementing a variety of codecs and file
56		// formats. Without this macro definition, an optimizing compiler or linker may
57		// very well discard Wuffs code for unused codecs, but listing the Wuffs
58		// modules we use makes that process explicit. Preprocessing means that such
59		// code simply isn't compiled.
60		#define WUFFS_CONFIG__MODULES
61		#define WUFFS_CONFIG__MODULE__AUX__BASE
62		#define WUFFS_CONFIG__MODULE__AUX__JSON
63		#define WUFFS_CONFIG__MODULE__BASE
64		#define WUFFS_CONFIG__MODULE__JSON
65
66		// If building this program in an environment that doesn't easily accommodate
67		// relative includes, you can use the script/inline-c-relative-includes.go
68		// program to generate a stand-alone C file.
69		#include "../../../release/c/wuffs-unsupported-snapshot.c"
70		#include "../fuzzlib/fuzzlib.c"
71
72	11.8k	#define TOK_BUFFER_ARRAY_SIZE 4096
73	236k	#define STACK_SIZE (WUFFS_JSON__DECODER_DEPTH_MAX_INCL + 1)
74
75		// Wuffs allows either statically or dynamically allocated work buffers. This
76		// program exercises static allocation.
77		#define WORK_BUFFER_ARRAY_SIZE \
78	2.56M	WUFFS_JSON__DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE
79		#if WORK_BUFFER_ARRAY_SIZE > 0
80		uint8_t g_work_buffer_array[WORK_BUFFER_ARRAY_SIZE];
81		#else
82		// Not all C/C++ compilers support 0-length arrays.
83		uint8_t g_work_buffer_array[1];
84		#endif
85
86		// Each stack element is 1 byte. The low 7 bits denote the container:
87		// - 0x01 means no container: we are at the top level.
88		// - 0x02 means a [] list.
89		// - 0x04 means a {} dictionary.
90		//
91		// The high 0x80 bit holds the even/odd-ness of the number of elements in that
92		// container. A valid dictionary contains key-value pairs and should therefore
93		// contain an even number of elements.
94		typedef uint8_t stack_element;
95
96		const char* //
97		fuzz_one_token(wuffs_base__token t,
98		wuffs_base__token prev_token,
99		wuffs_base__io_buffer* src,
100		size_t* ti,
101		stack_element* stack,
102	7.76M	size_t* depth) {
103	7.76M	uint64_t len = wuffs_base__token__length(&t);
104	7.76M	if (len > 0xFFFF) {
105	0	return "fuzz: internal error: length too long (vs 0xFFFF)";
106	7.76M	} else if (len > (src->meta.wi - *ti)) {
107	0	return "fuzz: internal error: length too long (vs wi - ti)";
108	0	}
109	7.76M	*ti += len;
110
111	7.76M	if ((wuffs_base__token__value_extension(&t) >= 0) &&
112	0	!wuffs_base__token__continued(&prev_token)) {
113	0	return "fuzz: internal error: extended token not after continued token";
114	0	}
115
116	7.76M	int64_t vbc = wuffs_base__token__value_base_category(&t);
117	7.76M	uint64_t vbd = wuffs_base__token__value_base_detail(&t);
118
119	7.76M	switch (vbc) {
120	413k	case WUFFS_BASE__TOKEN__VBC__STRUCTURE: {
121	413k	bool from_consistent = false;
122	413k	if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_NONE) {
123	1.91k	from_consistent = stack[*depth] & 0x01;
124	411k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_LIST) {
125	407k	from_consistent = stack[*depth] & 0x02;
126	407k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) {
127	4.67k	from_consistent = stack[*depth] & 0x04;
128	4.67k	}
129	413k	if (!from_consistent) {
130	0	return "fuzz: internal error: inconsistent VBD__STRUCTURE__FROM_ETC";
131	0	}
132
133	413k	if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__PUSH) {
134	236k	(*depth)++;
135	236k	if ((depth >= STACK_SIZE) \|\| (depth == 0)) {
136	0	return "fuzz: internal error: depth too large";
137	0	}
138
139	236k	if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) {
140	0	return "fuzz: internal error: push to the 'none' container";
141	236k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
142	232k	stack[*depth] = 0x02;
143	232k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
144	4.50k	stack[*depth] = 0x04;
145	4.50k	} else {
146	0	return "fuzz: internal error: unrecognized VBD__STRUCTURE__TO_ETC";
147	0	}
148
149	236k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP) {
150	176k	if ((vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT) &&
151	2.13k	(0 != (0x80 & stack[*depth]))) {
152	0	return "fuzz: internal error: dictionary had an incomplete key/value "
153	0	"pair";
154	0	}
155
156	176k	if (*depth <= 0) {
157	0	return "fuzz: internal error: depth too small";
158	0	}
159	176k	(*depth)--;
160
161	176k	bool to_consistent = false;
162	176k	if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE) {
163	78	to_consistent = stack[*depth] & 0x01;
164	176k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
165	176k	to_consistent = stack[*depth] & 0x02;
166	176k	} else if (vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
167	675	to_consistent = stack[*depth] & 0x04;
168	675	}
169	176k	if (!to_consistent) {
170	0	return "fuzz: internal error: inconsistent VBD__STRUCTURE__TO_ETC";
171	0	}
172
173	176k	} else {
174	0	return "fuzz: internal error: unrecognized VBC__STRUCTURE";
175	0	}
176	413k	break;
177	413k	}
178
179	413k	case WUFFS_BASE__TOKEN__VBC__STRING: {
180	121k	if (vbd & WUFFS_BASE__TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY) {
181	93.8k	wuffs_base__slice_u8 s =
182	93.8k	wuffs_base__make_slice_u8(src->data.ptr + *ti - len, len);
183	93.8k	if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_UTF_8) &&
184	93.8k	(s.len != wuffs_base__utf_8__longest_valid_prefix(s.ptr, s.len))) {
185	0	return "fuzz: internal error: invalid UTF-8";
186	0	}
187	93.8k	if ((vbd & WUFFS_BASE__TOKEN__VBD__STRING__DEFINITELY_ASCII) &&
188	0	(s.len != wuffs_base__ascii__longest_valid_prefix(s.ptr, s.len))) {
189	0	return "fuzz: internal error: invalid ASCII";
190	0	}
191	93.8k	}
192	121k	break;
193	121k	}
194
195	4.03M	case WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT: {
196	4.03M	if ((WUFFS_BASE__UNICODE_SURROGATE__MIN_INCL <= vbd) &&
197	4.01M	(vbd <= WUFFS_BASE__UNICODE_SURROGATE__MAX_INCL)) {
198	0	return "fuzz: internal error: invalid Unicode surrogate";
199	4.03M	} else if (WUFFS_BASE__UNICODE_CODE_POINT__MAX_INCL < vbd) {
200	0	return "fuzz: internal error: invalid Unicode code point";
201	0	}
202	4.03M	break;
203	4.03M	}
204
205	4.03M	default:
206	3.19M	break;
207	7.76M	}
208
209		// After a complete JSON value, update the parity (even/odd count) of the
210		// container.
211	7.76M	if (!wuffs_base__token__continued(&t) &&
212	3.51M	(vbc != WUFFS_BASE__TOKEN__VBC__FILLER) &&
213	1.74M	((vbc != WUFFS_BASE__TOKEN__VBC__STRUCTURE) \|\|
214	1.50M	(vbd & WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP))) {
215	1.50M	stack[*depth] ^= 0x80;
216	1.50M	}
217
218	7.76M	return NULL;
219	7.76M	}
220
221		uint64_t //
222	11.6k	buffer_limit(uint64_t hash, uint64_t min, uint64_t max) {
223	11.6k	hash &= 0x3F;
224	11.6k	uint64_t n;
225	11.6k	if (hash < 0x20) {
226	5.95k	n = min + hash;
227	5.95k	} else {
228	5.66k	n = max - (0x3F - hash);
229	5.66k	}
230	11.6k	if (n < min) {
231	0	return min;
232	11.6k	} else if (n > max) {
233	0	return max;
234	0	}
235	11.6k	return n;
236	11.6k	}
237
238		uint32_t g_quirks[] = {
239		WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_A,
240		WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_CAPITAL_U,
241		WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_E,
242		WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_QUESTION_MARK,
243		WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_SINGLE_QUOTE,
244		WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_V,
245		WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_X_AS_CODE_POINTS,
246		WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_ZERO,
247		WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK,
248		WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE,
249		WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA,
250		WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS,
251		WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR,
252		WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK,
253		WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER,
254		WUFFS_JSON__QUIRK_JSON_POINTER_ALLOW_TILDE_N_TILDE_R_TILDE_T,
255		WUFFS_JSON__QUIRK_REPLACE_INVALID_UNICODE,
256		0,
257		};
258
259	5.80k	void set_quirks(wuffs_json__decoder* dec, uint64_t hash) {
260	104k	for (uint32_t i = 0; g_quirks[i]; i++) {
261	98.7k	uint64_t bit = 1 << (i & 63);
262	98.7k	if (hash & bit) {
263	48.6k	wuffs_json__decoder__set_quirk(dec, g_quirks[i], 1);
264	48.6k	}
265	98.7k	}
266	5.80k	}
267
268		const char* //
269	5.80k	fuzz_complex(wuffs_base__io_buffer* full_src, uint64_t hash) {
270	5.80k	uint64_t tok_limit = buffer_limit(
271	5.80k	hash & 0x3F, WUFFS_JSON__DECODER_DST_TOKEN_BUFFER_LENGTH_MIN_INCL,
272	5.80k	TOK_BUFFER_ARRAY_SIZE);
273	5.80k	hash = wuffs_base__u64__rotate_right(hash, 6);
274
275	5.80k	uint64_t src_limit = buffer_limit(
276	5.80k	hash & 0x3F, WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL, 4096);
277	5.80k	hash = wuffs_base__u64__rotate_right(hash, 6);
278
279		// ----
280
281	5.80k	wuffs_json__decoder dec;
282	5.80k	wuffs_base__status status = wuffs_json__decoder__initialize(
283	5.80k	&dec, sizeof dec, WUFFS_VERSION,
284	5.80k	WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED);
285	5.80k	if (!wuffs_base__status__is_ok(&status)) {
286	0	return wuffs_base__status__message(&status);
287	0	}
288	5.80k	set_quirks(&dec, hash);
289
290	5.80k	wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE];
291	5.80k	wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){
292	5.80k	.data = ((wuffs_base__slice_token){
293	5.80k	.ptr = tok_array,
294	5.80k	.len = (size_t)((tok_limit < TOK_BUFFER_ARRAY_SIZE)
295	5.80k	? tok_limit
296	5.80k	: TOK_BUFFER_ARRAY_SIZE),
297	5.80k	}),
298	5.80k	});
299
300	5.80k	wuffs_base__token prev_token = wuffs_base__make_token(0);
301	5.80k	uint32_t no_progress_count = 0;
302
303	5.80k	stack_element stack[STACK_SIZE];
304	5.80k	stack[0] = 0x01; // We start in the 'none' container.
305	5.80k	size_t depth = 0;
306
307		// ----
308
309	2.56M	while (true) { // Outer loop.
310	2.56M	wuffs_base__io_buffer src = make_limited_reader(*full_src, src_limit);
311
312	2.56M	size_t old_tok_wi = tok.meta.wi;
313	2.56M	size_t old_tok_ri = tok.meta.ri;
314	2.56M	size_t old_src_wi = src.meta.wi;
315	2.56M	size_t old_src_ri = src.meta.ri;
316	2.56M	size_t ti = old_src_ri;
317
318	2.56M	status = wuffs_json__decoder__decode_tokens(
319	2.56M	&dec, &tok, &src,
320	2.56M	wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE));
321	2.56M	if ((tok.data.len < tok.meta.wi) \|\| //
322	2.56M	(tok.meta.wi < tok.meta.ri) \|\| //
323	2.56M	(tok.meta.ri != old_tok_ri)) {
324	0	return "fuzz: internal error: inconsistent tok indexes";
325	2.56M	} else if ((src.data.len < src.meta.wi) \|\| //
326	2.56M	(src.meta.wi < src.meta.ri) \|\| //
327	2.56M	(src.meta.wi != old_src_wi)) {
328	0	return "fuzz: internal error: inconsistent src indexes";
329	0	}
330	2.56M	full_src->meta.ri += src.meta.ri - old_src_ri;
331
332	2.56M	if ((tok.meta.wi > old_tok_wi) \|\| (src.meta.ri > old_src_ri) \|\|
333	2.11M	!wuffs_base__status__is_suspension(&status)) {
334	457k	no_progress_count = 0;
335	2.11M	} else if (no_progress_count < 999) {
336	2.10M	no_progress_count++;
337	2.10M	} else if (!full_src->meta.closed &&
338	2.09k	(status.repr == wuffs_base__suspension__short_read)) {
339	2.09k	return wuffs_base__status__message(&status);
340	2.09k	} else {
341	0	return "fuzz: internal error: no progress";
342	0	}
343
344		// ----
345
346	10.3M	while (tok.meta.ri < tok.meta.wi) { // Inner loop.
347	7.76M	wuffs_base__token t = tok.data.ptr[tok.meta.ri++];
348	7.76M	const char* z =
349	7.76M	fuzz_one_token(t, prev_token, &src, &ti, &stack[0], &depth);
350	7.76M	if (z != NULL) {
351	0	return z;
352	0	}
353	7.76M	prev_token = t;
354	7.76M	} // Inner loop.
355
356		// ----
357
358		// Check that, starting from old_src_ri, summing the token lengths brings
359		// us to the new src.meta.ri.
360	2.56M	if (ti != src.meta.ri) {
361	0	return "fuzz: internal error: ti != ri";
362	0	}
363
364	2.56M	if (status.repr == NULL) {
365	1.22k	break;
366
367	2.56M	} else if (status.repr == wuffs_base__suspension__short_read) {
368	2.32M	if (src.meta.closed) {
369	0	return "fuzz: internal error: short read on a closed io_reader";
370	0	}
371		// We don't compact full_src as it may be mmap'ed read-only.
372	2.32M	continue;
373
374	2.32M	} else if (status.repr == wuffs_base__suspension__short_write) {
375	237k	wuffs_base__token_buffer__compact(&tok);
376	237k	continue;
377	237k	}
378
379	2.48k	return wuffs_base__status__message(&status);
380	2.56M	} // Outer loop.
381
382		// ----
383
384	1.22k	if (depth != 0) {
385	0	return "fuzz: internal error: decoded OK but final depth was not zero";
386	1.22k	} else if (wuffs_base__token__continued(&prev_token)) {
387	0	return "fuzz: internal error: decoded OK but final token was continued";
388	0	}
389	1.22k	return NULL;
390	1.22k	}
391
392		const char* //
393	94	fuzz_simple(wuffs_base__io_buffer* full_src) {
394	94	wuffs_json__decoder dec;
395	94	wuffs_base__status status =
396	94	wuffs_json__decoder__initialize(&dec, sizeof dec, WUFFS_VERSION, 0);
397	94	if (!wuffs_base__status__is_ok(&status)) {
398	0	return wuffs_base__status__message(&status);
399	0	}
400
401	94	wuffs_base__token tok_array[TOK_BUFFER_ARRAY_SIZE];
402	94	wuffs_base__token_buffer tok = ((wuffs_base__token_buffer){
403	94	.data = ((wuffs_base__slice_token){
404	94	.ptr = tok_array,
405	94	.len = TOK_BUFFER_ARRAY_SIZE,
406	94	}),
407	94	});
408
409	289	while (true) {
410	289	status = wuffs_json__decoder__decode_tokens(
411	289	&dec, &tok, full_src,
412	289	wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE));
413	289	if (status.repr == NULL) {
414	5	break;
415
416	284	} else if (status.repr == wuffs_base__suspension__short_write) {
417	195	tok.meta.ri = tok.meta.wi;
418	195	wuffs_base__token_buffer__compact(&tok);
419	195	continue;
420	195	}
421
422	89	return wuffs_base__status__message(&status);
423	289	}
424
425	5	return NULL;
426	94	}
427
428		#if defined(__cplusplus)
429		#include <vector>
430
431		class Callbacks : public wuffs_aux::DecodeJsonCallbacks {
432		public:
433	5.90k	Callbacks() : m_depth(0) {}
434
435	3.20k	std::string AppendNull() override { return ""; }
436
437	3.62k	std::string AppendBool(bool val) override { return ""; }
438
439	1.41M	std::string AppendI64(int64_t val) override { return ""; }
440
441	74.9k	std::string AppendF64(double val) override { return ""; }
442
443	5.40k	std::string AppendTextString(std::string&& val) override { return ""; }
444
445	115k	std::string Push(uint32_t flags) override {
446	115k	m_depth++;
447	115k	return "";
448	115k	}
449
450	78.1k	std::string Pop(uint32_t flags) override {
451	78.1k	m_depth--;
452	78.1k	if (m_depth < 0) {
453	0	fprintf(stderr, "negative depth\n");
454	0	intentional_segfault();
455	0	}
456	78.1k	return "";
457	78.1k	}
458
459		void Done(wuffs_aux::DecodeJsonResult& result,
460		wuffs_aux::sync_io::Input& input,
461	5.90k	wuffs_aux::IOBuffer& buffer) override {
462	5.90k	if (result.error_message.empty()) {
463	1.93k	if (m_depth != 0) {
464	0	fprintf(stderr, "no error message but final depth is non-zero\n");
465	0	intentional_segfault();
466	0	}
467	3.96k	} else if (result.error_message.find("internal error:") !=
468	3.96k	std::string::npos) {
469	0	fprintf(stderr, "internal errors shouldn't occur: \"%s\"\n",
470	0	result.error_message.c_str());
471	0	intentional_segfault();
472	0	}
473	5.90k	}
474
475		private:
476		int64_t m_depth;
477		};
478
479		void //
480	5.90k	fuzz_cpp(const uint8_t* in_ptr, size_t in_len, uint64_t hash) {
481	5.90k	static const char* json_pointers[16] = {
482	5.90k	"", //
483	5.90k	"", //
484	5.90k	"", //
485	5.90k	"", //
486	5.90k	"", //
487	5.90k	"", //
488	5.90k	"", //
489	5.90k	"", //
490	5.90k	"", //
491	5.90k	"", //
492	5.90k	"/", //
493	5.90k	"/2/3/4/5", //
494	5.90k	"/k0", //
495	5.90k	"/k0/1", //
496	5.90k	"/x/y", //
497	5.90k	"/~0/~1/~n", //
498	5.90k	};
499	5.90k	const char* json_pointer = json_pointers[hash & 15];
500	5.90k	hash = wuffs_base__u64__rotate_right(hash, 4);
501
502	5.90k	std::vector<wuffs_aux::QuirkKeyValuePair> quirks;
503	106k	for (uint32_t i = 0; g_quirks[i]; i++) {
504	100k	uint64_t bit = 1 << (i & 63);
505	100k	if (hash & bit) {
506	49.7k	quirks.push_back({g_quirks[i], 1});
507	49.7k	}
508	100k	}
509
510	5.90k	Callbacks callbacks;
511	5.90k	wuffs_aux::sync_io::MemoryInput input(in_ptr, in_len);
512	5.90k	wuffs_aux::DecodeJson(
513	5.90k	callbacks, input,
514	5.90k	wuffs_aux::DecodeJsonArgQuirks(quirks.data(), quirks.size()),
515	5.90k	wuffs_aux::DecodeJsonArgJsonPointer(json_pointer));
516	5.90k	}
517		#endif // defined(__cplusplus)
518
519		const char* //
520	5.90k	fuzz(wuffs_base__io_buffer* full_src, uint64_t hash) {
521	5.90k	#if defined(__cplusplus)
522	5.90k	fuzz_cpp(full_src->reader_pointer(), full_src->reader_length(),
523	5.90k	wuffs_base__u64__rotate_right(hash, 32));
524	5.90k	#endif // defined(__cplusplus)
525
526		// Send 99.6% of inputs to fuzz_complex and the remainder to fuzz_simple. The
527		// 0xA5 constant is arbitrary but non-zero. If the hash function maps the
528		// empty input to 0, this still sends the empty input to fuzz_complex.
529		//
530		// The fuzz_simple implementation shows how easy decoding with Wuffs is when
531		// all you want is to run LLVMFuzzerTestOneInput's built-in (Wuffs API
532		// independent) checks (e.g. the ASan address sanitizer) and you don't really
533		// care what the output is, just that it doesn't crash.
534		//
535		// The fuzz_complex implementation adds many more Wuffs API specific checks
536		// (e.g. that the sum of the tokens' lengths do not exceed the input length).
537	5.90k	if ((hash & 0xFF) != 0xA5) {
538	5.80k	return fuzz_complex(full_src, wuffs_base__u64__rotate_right(hash, 8));
539	5.80k	}
540	94	return fuzz_simple(full_src);
541	5.90k	}