/src/zstd/tests/fuzz/sequence_compression_api.c

Source (jump to first uncovered line)
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

/**
 * This fuzz target performs a zstd round-trip test by generating an arbitrary
 * array of sequences, generating the associated source buffer, calling
 * ZSTD_compressSequences(), and then decompresses and compares the result with
 * the original generated source buffer.
 */

#define ZSTD_STATIC_LINKING_ONLY
#include "zstd_errors.h"

#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

static ZSTD_CCtx* cctx = NULL;
static ZSTD_DCtx* dctx = NULL;
static void* literalsBuffer = NULL;
static void* generatedSrc = NULL;
static ZSTD_Sequence* generatedSequences = NULL;

static void* dictBuffer = NULL;
static ZSTD_CDict* cdict = NULL;
static ZSTD_DDict* ddict = NULL;

#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */
#define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */
#define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */

/* Deterministic random number generator */
#define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
static uint32_t FUZZ_RDG_rand(uint32_t* src)
{
    static const uint32_t prime1 = 2654435761U;
    static const uint32_t prime2 = 2246822519U;
    uint32_t rand32 = *src;
    rand32 *= prime1;
    rand32 ^= prime2;
    rand32  = FUZZ_RDG_rotl32(rand32, 13);
    *src = rand32;
    return rand32 >> 5;
}

/* Make a pseudorandom string - this simple function exists to avoid
 * taking a dependency on datagen.h to have RDG_genBuffer().
 */
static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) {
    const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
    uint32_t seed = FUZZ_dataProducer_uint32(producer);
    if (size) {
        for (size_t n = 0; n < size; n++) {
            int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
            str[n] = charset[key];
        }
    }
    return str;
}

/* Returns size of source buffer */
static size_t decodeSequences(void* dst, size_t nbSequences,
                              size_t literalsSize,
                              const void* dict, size_t dictSize,
                              ZSTD_SequenceFormat_e mode)
{
    const uint8_t* litPtr = literalsBuffer;
    const uint8_t* const litBegin = literalsBuffer;
    const uint8_t* const litEnd = litBegin + literalsSize;
    const uint8_t* dictPtr = dict;
    uint8_t* op = dst;
    const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
    size_t generatedSrcBufferSize = 0;
    size_t bytesWritten = 0;

    for (size_t i = 0; i < nbSequences; ++i) {
        /* block boundary */
        if (generatedSequences[i].offset == 0)
            FUZZ_ASSERT(generatedSequences[i].matchLength == 0);

        if (litPtr + generatedSequences[i].litLength > litEnd) {
            litPtr = litBegin;
        }
        memcpy(op, litPtr, generatedSequences[i].litLength);
        bytesWritten += generatedSequences[i].litLength;
        op += generatedSequences[i].litLength;
        litPtr += generatedSequences[i].litLength;

        /* Copy over the match */
        {   size_t matchLength = generatedSequences[i].matchLength;
            size_t j = 0;
            size_t k = 0;
            if (dictSize != 0) {
                if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */
                    size_t dictOffset = generatedSequences[i].offset - bytesWritten;
                    size_t matchInDict = MIN(matchLength, dictOffset);
                    for (; k < matchInDict; ++k) {
                        op[k] = dictPtr[dictSize - dictOffset + k];
                    }
                    matchLength -= matchInDict;
                    op += matchInDict;
                }
            }
            for (; j < matchLength; ++j) {
                op[j] = op[(ptrdiff_t)(j - generatedSequences[i].offset)];
            }
            op += j;
            FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
            bytesWritten += generatedSequences[i].matchLength;
        }
    }
    generatedSrcBufferSize = bytesWritten;
    FUZZ_ASSERT(litPtr <= litEnd);
    if (mode == ZSTD_sf_noBlockDelimiters) {
        const uint32_t lastLLSize = (uint32_t)(litEnd - litPtr);
        if (lastLLSize <= (uint32_t)(oend - op)) {
            memcpy(op, litPtr, lastLLSize);
            generatedSrcBufferSize += lastLLSize;
    }   }
    return generatedSrcBufferSize;
}

/* Returns nb sequences generated
 * Note : random sequences are always valid in ZSTD_sf_noBlockDelimiters mode.
 * However, it can fail with ZSTD_sf_explicitBlockDelimiters,
 * due to potential lack of space in
 */
static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
                                      size_t literalsSizeLimit, size_t dictSize,
                                      size_t windowLog, ZSTD_SequenceFormat_e mode)
{
    const uint32_t repCode = 0;  /* not used by sequence ingestion api */
    size_t windowSize = 1ULL << windowLog;
    size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
    uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
    uint32_t bytesGenerated = 0;
    uint32_t nbSeqGenerated = 0;
    uint32_t isFirstSequence = 1;
    uint32_t blockSize = 0;

    if (mode == ZSTD_sf_explicitBlockDelimiters) {
        /* ensure that no sequence can be larger than one block */
        literalsSizeLimit = MIN(literalsSizeLimit, blockSizeMax/2);
        matchLengthMax = MIN(matchLengthMax, (uint32_t)blockSizeMax/2);
    }

    while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* extra room for explicit delimiters */
         && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
         && !FUZZ_dataProducer_empty(producer)) {
        uint32_t matchLength;
        uint32_t matchBound = matchLengthMax;
        uint32_t offset;
        uint32_t offsetBound;
        const uint32_t minLitLength = (isFirstSequence && (dictSize == 0));
        const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit);
        bytesGenerated += litLength;
        if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
            break;
        }
        offsetBound = (bytesGenerated > windowSize) ? (uint32_t)windowSize : bytesGenerated + (uint32_t)dictSize;
        offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound);
        if (dictSize > 0 && bytesGenerated <= windowSize) {
            /* Prevent match length from being such that it would be associated with an offset too large
             * from the decoder's perspective. If not possible (match would be too small),
             * then reduce the offset if necessary.
             */
            const size_t bytesToReachWindowSize = windowSize - bytesGenerated;
            if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) {
                const uint32_t newOffsetBound = offsetBound > windowSize ? (uint32_t)windowSize : offsetBound;
                offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound);
            } else {
                matchBound = MIN(matchLengthMax, (uint32_t)bytesToReachWindowSize);
            }
        }
        matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound);
        bytesGenerated += matchLength;
        if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
            break;
        }
        {   ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
            const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength);
            #define SPLITPROB 6000
            #define SPLITMARK 5234
            const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK);
            if (mode == ZSTD_sf_explicitBlockDelimiters) {
                const size_t seqSize = seq.litLength + seq.matchLength;
                if (blockSize + seqSize > blockSizeMax) {  /* reaching limit : must end block now */
                    const ZSTD_Sequence endBlock = {0, 0, 0, 0};
                    generatedSequences[nbSeqGenerated++] = endBlock;
                    blockSize = (uint32_t)seqSize;
                }
                if (split) {
                    const ZSTD_Sequence endBlock = {0, lastLits, 0, 0};
                    generatedSequences[nbSeqGenerated++] = endBlock;
                    assert(lastLits <= seq.litLength);
                    seq.litLength -= lastLits;
                    blockSize = (uint32_t)(seqSize - lastLits);
                } else {
                    blockSize += seqSize;
                }
            }
            generatedSequences[nbSeqGenerated++] = seq;
            isFirstSequence = 0;
        }
    }

    if (mode == ZSTD_sf_explicitBlockDelimiters) {
        /* always end sequences with a block delimiter */
        const ZSTD_Sequence endBlock = {0, 0, 0, 0};
        assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ);
        generatedSequences[nbSeqGenerated++] = endBlock;
    }
    return nbSeqGenerated;
}

static size_t
transferLiterals(void* dst, size_t dstCapacity, const ZSTD_Sequence* seqs, size_t nbSeqs, const void* src, size_t srcSize)
{
    size_t n;
    char* op = dst;
    char* const oend = op + dstCapacity;
    const char* ip = src;
    const char* const iend = ip + srcSize;
    for (n=0; n<nbSeqs; n++) {
        size_t litLen = seqs[n].litLength;
        size_t mlen = seqs[n].matchLength;
        assert(op + litLen < oend); (void)oend;
        assert(ip + litLen + mlen <= iend); (void)iend;
        memcpy(op, ip, litLen);
        op += litLen;
        ip += litLen + mlen;
    }
    assert(oend - op >= 8);
    return (size_t)(op - (char*)dst);
}

static size_t roundTripTest_compressSequencesAndLiterals(
                    void* result, size_t resultCapacity,
                    void* compressed, size_t compressedCapacity,
                    const void* src, size_t srcSize,
                    const ZSTD_Sequence* seqs, size_t nbSeqs)
{
    size_t const litCapacity = srcSize + 8;
    void* literals = malloc(litCapacity);
    size_t cSize, litSize;

    assert(literals);
    litSize = transferLiterals(literals, litCapacity, seqs, nbSeqs, src, srcSize);

    cSize = ZSTD_compressSequencesAndLiterals(cctx,
                                compressed, compressedCapacity,
                                   seqs, nbSeqs,
                                   literals, litSize, litCapacity, srcSize);
    free(literals);
    if (ZSTD_getErrorCode(cSize) == ZSTD_error_cannotProduce_uncompressedBlock) {
        /* Valid scenario : ZSTD_compressSequencesAndLiterals cannot generate uncompressed blocks */
        return 0;
    }
    if (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall) {
        /* Valid scenario : in explicit delimiter mode,
         * it might be possible for the compressed size to outgrow dstCapacity.
         * In which case, it's still a valid fuzzer scenario,
         * but no roundtrip shall be possible */
        return 0;
    }

    /* round-trip */
    FUZZ_ZASSERT(cSize);
    {   size_t const dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
        FUZZ_ZASSERT(dSize);
        FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
        FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!");
        return dSize;
    }
}

static size_t roundTripTest(void* result, size_t resultCapacity,
                            void* compressed, size_t compressedCapacity,
                            const void* src, size_t srcSize,
                            const ZSTD_Sequence* seqs, size_t nbSeqs,
                            unsigned hasDict,
                            ZSTD_SequenceFormat_e mode)
{
    size_t cSize;
    size_t dSize;

    if (hasDict) {
        FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict));
        FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict));
    }

    {   int blockMode, validation;
        /* compressSequencesAndLiterals() only supports explicitBlockDelimiters and no validation */
        FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_blockDelimiters, &blockMode));
        FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_validateSequences, &validation));
        if ((blockMode == ZSTD_sf_explicitBlockDelimiters) && (!validation)) {
            FUZZ_ZASSERT(roundTripTest_compressSequencesAndLiterals(result, resultCapacity, compressed, compressedCapacity, src, srcSize, seqs, nbSeqs));
        }
    }

    cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
                                   seqs, nbSeqs,
                                   src, srcSize);
    if ( (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall)
      && (mode == ZSTD_sf_explicitBlockDelimiters) ) {
        /* Valid scenario : in explicit delimiter mode,
         * it might be possible for the compressed size to outgrow dstCapacity.
         * In which case, it's still a valid fuzzer scenario,
         * but no roundtrip shall be possible */
        return 0;
    }
    /* round-trip */
    FUZZ_ZASSERT(cSize);
    dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
    FUZZ_ZASSERT(dSize);
    FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
    FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!");
    return dSize;
}

int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
{
    FUZZ_SEQ_PROD_SETUP();

    void* rBuf;
    size_t rBufSize;
    void* cBuf;
    size_t cBufSize;
    size_t generatedSrcSize;
    size_t nbSequences;
    size_t dictSize = 0;
    unsigned hasDict;
    unsigned wLog;
    int cLevel;
    ZSTD_SequenceFormat_e mode;

    FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size);
    FUZZ_ASSERT(producer);

    if (!cctx) {
        cctx = ZSTD_createCCtx();
        FUZZ_ASSERT(cctx);
    }
    if (!dctx) {
        dctx = ZSTD_createDCtx();
        FUZZ_ASSERT(dctx);
    }

    /* Generate window log first so we don't generate offsets too large */
    wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
    cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
    mode = (ZSTD_SequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);

    ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)wLog);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, (int)mode);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach);

    if (!literalsBuffer) {
        literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
        FUZZ_ASSERT(literalsBuffer);
        literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer);
    }

    if (!dictBuffer) { /* Generate global dictionary buffer */
        ZSTD_compressionParameters cParams;

        /* Generate a large dictionary buffer */
        dictBuffer = calloc(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, 1);
        FUZZ_ASSERT(dictBuffer);

        /* Create global cdict and ddict */
        cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
        cParams.minMatch = ZSTD_MINMATCH_MIN;
        cParams.hashLog = ZSTD_HASHLOG_MIN;
        cParams.chainLog = ZSTD_CHAINLOG_MIN;

        cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem);
        ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem);
        FUZZ_ASSERT(cdict);
        FUZZ_ASSERT(ddict);
    }

    FUZZ_ASSERT(cdict);
    FUZZ_ASSERT(ddict);

    hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
    if (hasDict) {
        dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE;
    }

    if (!generatedSequences) {
        generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
    }
    if (!generatedSrc) {
        generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
    }

    nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode);
    generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode);

    /* Note : in explicit block delimiters mode,
     * the fuzzer might generate a lot of small blocks.
     * In which case, the final compressed size might be > ZSTD_compressBound().
     * This is still a valid scenario fuzzer though, which makes it possible to check under-sized dstCapacity.
     * The test just doesn't roundtrip. */
    cBufSize = ZSTD_compressBound(generatedSrcSize);
    cBuf = FUZZ_malloc(cBufSize);

    rBufSize = generatedSrcSize;
    rBuf = FUZZ_malloc(rBufSize);

    {   const size_t result = roundTripTest(rBuf, rBufSize,
                                        cBuf, cBufSize,
                                        generatedSrc, generatedSrcSize,
                                        generatedSequences, nbSequences,
                                        hasDict, mode);
        FUZZ_ASSERT(result <= generatedSrcSize);  /* can be 0 when no round-trip */
    }

    free(rBuf);
    free(cBuf);
    FUZZ_dataProducer_free(producer);
#ifndef STATEFUL_FUZZING
    ZSTD_freeCCtx(cctx); cctx = NULL;
    ZSTD_freeDCtx(dctx); dctx = NULL;
    free(generatedSequences); generatedSequences = NULL;
    free(generatedSrc); generatedSrc = NULL;
    free(literalsBuffer); literalsBuffer = NULL;
#endif
    FUZZ_SEQ_PROD_TEARDOWN();
    return 0;
}

Coverage Report

Created: 2025-08-26 06:15

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) Meta Platforms, Inc. and affiliates.
3		* All rights reserved.
4		*
5		* This source code is licensed under both the BSD-style license (found in the
6		* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7		* in the COPYING file in the root directory of this source tree).
8		* You may select, at your option, one of the above-listed licenses.
9		*/
10
11		/**
12		* This fuzz target performs a zstd round-trip test by generating an arbitrary
13		* array of sequences, generating the associated source buffer, calling
14		* ZSTD_compressSequences(), and then decompresses and compares the result with
15		* the original generated source buffer.
16		*/
17
18		#define ZSTD_STATIC_LINKING_ONLY
19		#include "zstd_errors.h"
20
21		#include <stddef.h>
22		#include <stdlib.h>
23		#include <stdio.h>
24		#include <string.h>
25		#include <time.h>
26		#include "fuzz_helpers.h"
27		#include "zstd_helpers.h"
28		#include "fuzz_data_producer.h"
29		#include "fuzz_third_party_seq_prod.h"
30
31		static ZSTD_CCtx* cctx = NULL;
32		static ZSTD_DCtx* dctx = NULL;
33		static void* literalsBuffer = NULL;
34		static void* generatedSrc = NULL;
35		static ZSTD_Sequence* generatedSequences = NULL;
36
37		static void* dictBuffer = NULL;
38		static ZSTD_CDict* cdict = NULL;
39		static ZSTD_DDict* ddict = NULL;
40
41	18.8M	#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
42	26.1k	#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */
43	6.53k	#define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
44	2.67k	#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */
45	4.72M	#define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */
46
47		/* Deterministic random number generator */
48	6.84G	#define FUZZ_RDG_rotl32(x,r) ((x << r) \| (x >> (32 - r)))
49		static uint32_t FUZZ_RDG_rand(uint32_t* src)
50	6.84G	{
51	6.84G	static const uint32_t prime1 = 2654435761U;
52	6.84G	static const uint32_t prime2 = 2246822519U;
53	6.84G	uint32_t rand32 = *src;
54	6.84G	rand32 *= prime1;
55	6.84G	rand32 ^= prime2;
56	6.84G	rand32 = FUZZ_RDG_rotl32(rand32, 13);
57	6.84G	*src = rand32;
58	6.84G	return rand32 >> 5;
59	6.84G	}
60
61		/* Make a pseudorandom string - this simple function exists to avoid
62		* taking a dependency on datagen.h to have RDG_genBuffer().
63		*/
64	6.53k	static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) {
65	6.53k	const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
66	6.53k	uint32_t seed = FUZZ_dataProducer_uint32(producer);
67	6.53k	if (size) {
68	6.84G	for (size_t n = 0; n < size; n++) {
69	6.84G	int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
70	6.84G	str[n] = charset[key];
71	6.84G	}
72	6.53k	}
73	6.53k	return str;
74	6.53k	}
75
76		/* Returns size of source buffer */
77		static size_t decodeSequences(void* dst, size_t nbSequences,
78		size_t literalsSize,
79		const void* dict, size_t dictSize,
80		ZSTD_SequenceFormat_e mode)
81	6.53k	{
82	6.53k	const uint8_t* litPtr = literalsBuffer;
83	6.53k	const uint8_t* const litBegin = literalsBuffer;
84	6.53k	const uint8_t* const litEnd = litBegin + literalsSize;
85	6.53k	const uint8_t* dictPtr = dict;
86	6.53k	uint8_t* op = dst;
87	6.53k	const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
88	6.53k	size_t generatedSrcBufferSize = 0;
89	6.53k	size_t bytesWritten = 0;
90
91	6.38M	for (size_t i = 0; i < nbSequences; ++i) {
92		/* block boundary */
93	6.37M	if (generatedSequences[i].offset == 0)
94	6.37M	FUZZ_ASSERT(generatedSequences[i].matchLength == 0);
95
96	6.37M	if (litPtr + generatedSequences[i].litLength > litEnd) {
97	0	litPtr = litBegin;
98	0	}
99	6.37M	memcpy(op, litPtr, generatedSequences[i].litLength);
100	6.37M	bytesWritten += generatedSequences[i].litLength;
101	6.37M	op += generatedSequences[i].litLength;
102	6.37M	litPtr += generatedSequences[i].litLength;
103
104		/* Copy over the match */
105	6.37M	{ size_t matchLength = generatedSequences[i].matchLength;
106	6.37M	size_t j = 0;
107	6.37M	size_t k = 0;
108	6.37M	if (dictSize != 0) {
109	3.63M	if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */
110	22.5k	size_t dictOffset = generatedSequences[i].offset - bytesWritten;
111	22.5k	size_t matchInDict = MIN(matchLength, dictOffset);
112	90.4M	for (; k < matchInDict; ++k) {
113	90.4M	op[k] = dictPtr[dictSize - dictOffset + k];
114	90.4M	}
115	22.5k	matchLength -= matchInDict;
116	22.5k	op += matchInDict;
117	22.5k	}
118	3.63M	}
119	308M	for (; j < matchLength; ++j) {
120	302M	op[j] = op[(ptrdiff_t)(j - generatedSequences[i].offset)];
121	302M	}
122	6.37M	op += j;
123	6.37M	FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
124	6.37M	bytesWritten += generatedSequences[i].matchLength;
125	6.37M	}
126	6.37M	}
127	6.53k	generatedSrcBufferSize = bytesWritten;
128	6.53k	FUZZ_ASSERT(litPtr <= litEnd);
129	6.53k	if (mode == ZSTD_sf_noBlockDelimiters) {
130	2.84k	const uint32_t lastLLSize = (uint32_t)(litEnd - litPtr);
131	2.84k	if (lastLLSize <= (uint32_t)(oend - op)) {
132	417	memcpy(op, litPtr, lastLLSize);
133	417	generatedSrcBufferSize += lastLLSize;
134	417	} }
135	6.53k	return generatedSrcBufferSize;
136	6.53k	}
137
138		/* Returns nb sequences generated
139		* Note : random sequences are always valid in ZSTD_sf_noBlockDelimiters mode.
140		* However, it can fail with ZSTD_sf_explicitBlockDelimiters,
141		* due to potential lack of space in
142		*/
143		static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
144		size_t literalsSizeLimit, size_t dictSize,
145		size_t windowLog, ZSTD_SequenceFormat_e mode)
146	6.53k	{
147	6.53k	const uint32_t repCode = 0; /* not used by sequence ingestion api */
148	6.53k	size_t windowSize = 1ULL << windowLog;
149	6.53k	size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
150	6.53k	uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
151	6.53k	uint32_t bytesGenerated = 0;
152	6.53k	uint32_t nbSeqGenerated = 0;
153	6.53k	uint32_t isFirstSequence = 1;
154	6.53k	uint32_t blockSize = 0;
155
156	6.53k	if (mode == ZSTD_sf_explicitBlockDelimiters) {
157		/* ensure that no sequence can be larger than one block */
158	3.68k	literalsSizeLimit = MIN(literalsSizeLimit, blockSizeMax/2);
159	3.68k	matchLengthMax = MIN(matchLengthMax, (uint32_t)blockSizeMax/2);
160	3.68k	}
161
162	4.72M	while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* extra room for explicit delimiters */
163	4.72M	&& bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
164	4.72M	&& !FUZZ_dataProducer_empty(producer)) {
165	4.71M	uint32_t matchLength;
166	4.71M	uint32_t matchBound = matchLengthMax;
167	4.71M	uint32_t offset;
168	4.71M	uint32_t offsetBound;
169	4.71M	const uint32_t minLitLength = (isFirstSequence && (dictSize == 0));
170	4.71M	const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit);
171	4.71M	bytesGenerated += litLength;
172	4.71M	if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
173	51	break;
174	51	}
175	4.71M	offsetBound = (bytesGenerated > windowSize) ? (uint32_t)windowSize : bytesGenerated + (uint32_t)dictSize;
176	4.71M	offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound);
177	4.71M	if (dictSize > 0 && bytesGenerated <= windowSize) {
178		/* Prevent match length from being such that it would be associated with an offset too large
179		* from the decoder's perspective. If not possible (match would be too small),
180		* then reduce the offset if necessary.
181		*/
182	187k	const size_t bytesToReachWindowSize = windowSize - bytesGenerated;
183	187k	if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) {
184	23	const uint32_t newOffsetBound = offsetBound > windowSize ? (uint32_t)windowSize : offsetBound;
185	23	offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound);
186	187k	} else {
187	187k	matchBound = MIN(matchLengthMax, (uint32_t)bytesToReachWindowSize);
188	187k	}
189	187k	}
190	4.71M	matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound);
191	4.71M	bytesGenerated += matchLength;
192	4.71M	if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
193	33	break;
194	33	}
195	4.71M	{ ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
196	4.71M	const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength);
197	4.71M	#define SPLITPROB 6000
198	4.71M	#define SPLITMARK 5234
199	4.71M	const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK);
200	4.71M	if (mode == ZSTD_sf_explicitBlockDelimiters) {
201	4.17M	const size_t seqSize = seq.litLength + seq.matchLength;
202	4.17M	if (blockSize + seqSize > blockSizeMax) { /* reaching limit : must end block now */
203	44.9k	const ZSTD_Sequence endBlock = {0, 0, 0, 0};
204	44.9k	generatedSequences[nbSeqGenerated++] = endBlock;
205	44.9k	blockSize = (uint32_t)seqSize;
206	44.9k	}
207	4.17M	if (split) {
208	1.61M	const ZSTD_Sequence endBlock = {0, lastLits, 0, 0};
209	1.61M	generatedSequences[nbSeqGenerated++] = endBlock;
210	1.61M	assert(lastLits <= seq.litLength);
211	1.61M	seq.litLength -= lastLits;
212	1.61M	blockSize = (uint32_t)(seqSize - lastLits);
213	2.55M	} else {
214	2.55M	blockSize += seqSize;
215	2.55M	}
216	4.17M	}
217	4.71M	generatedSequences[nbSeqGenerated++] = seq;
218	4.71M	isFirstSequence = 0;
219	4.71M	}
220	4.71M	}
221
222	6.53k	if (mode == ZSTD_sf_explicitBlockDelimiters) {
223		/* always end sequences with a block delimiter */
224	3.68k	const ZSTD_Sequence endBlock = {0, 0, 0, 0};
225	3.68k	assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ);
226	3.68k	generatedSequences[nbSeqGenerated++] = endBlock;
227	3.68k	}
228	6.53k	return nbSeqGenerated;
229	6.53k	}
230
231		static size_t
232		transferLiterals(void* dst, size_t dstCapacity, const ZSTD_Sequence* seqs, size_t nbSeqs, const void* src, size_t srcSize)
233	0	{
234	0	size_t n;
235	0	char* op = dst;
236	0	char* const oend = op + dstCapacity;
237	0	const char* ip = src;
238	0	const char* const iend = ip + srcSize;
239	0	for (n=0; n<nbSeqs; n++) {
240	0	size_t litLen = seqs[n].litLength;
241	0	size_t mlen = seqs[n].matchLength;
242	0	assert(op + litLen < oend); (void)oend;
243	0	assert(ip + litLen + mlen <= iend); (void)iend;
244	0	memcpy(op, ip, litLen);
245	0	op += litLen;
246	0	ip += litLen + mlen;
247	0	}
248	0	assert(oend - op >= 8);
249	0	return (size_t)(op - (char*)dst);
250	0	}
251
252		static size_t roundTripTest_compressSequencesAndLiterals(
253		void* result, size_t resultCapacity,
254		void* compressed, size_t compressedCapacity,
255		const void* src, size_t srcSize,
256		const ZSTD_Sequence* seqs, size_t nbSeqs)
257	0	{
258	0	size_t const litCapacity = srcSize + 8;
259	0	void* literals = malloc(litCapacity);
260	0	size_t cSize, litSize;
261
262	0	assert(literals);
263	0	litSize = transferLiterals(literals, litCapacity, seqs, nbSeqs, src, srcSize);
264
265	0	cSize = ZSTD_compressSequencesAndLiterals(cctx,
266	0	compressed, compressedCapacity,
267	0	seqs, nbSeqs,
268	0	literals, litSize, litCapacity, srcSize);
269	0	free(literals);
270	0	if (ZSTD_getErrorCode(cSize) == ZSTD_error_cannotProduce_uncompressedBlock) {
271		/* Valid scenario : ZSTD_compressSequencesAndLiterals cannot generate uncompressed blocks */
272	0	return 0;
273	0	}
274	0	if (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall) {
275		/* Valid scenario : in explicit delimiter mode,
276		* it might be possible for the compressed size to outgrow dstCapacity.
277		* In which case, it's still a valid fuzzer scenario,
278		* but no roundtrip shall be possible */
279	0	return 0;
280	0	}
281
282		/* round-trip */
283	0	FUZZ_ZASSERT(cSize);
284	0	{ size_t const dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
285	0	FUZZ_ZASSERT(dSize);
286	0	FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
287	0	FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!");
288	0	return dSize;
289	0	}
290	0	}
291
292		static size_t roundTripTest(void* result, size_t resultCapacity,
293		void* compressed, size_t compressedCapacity,
294		const void* src, size_t srcSize,
295		const ZSTD_Sequence* seqs, size_t nbSeqs,
296		unsigned hasDict,
297		ZSTD_SequenceFormat_e mode)
298	6.53k	{
299	6.53k	size_t cSize;
300	6.53k	size_t dSize;
301
302	6.53k	if (hasDict) {
303	2.66k	FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict));
304	2.66k	FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict));
305	2.66k	}
306
307	6.53k	{ int blockMode, validation;
308		/* compressSequencesAndLiterals() only supports explicitBlockDelimiters and no validation */
309	6.53k	FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_blockDelimiters, &blockMode));
310	6.53k	FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_validateSequences, &validation));
311	6.53k	if ((blockMode == ZSTD_sf_explicitBlockDelimiters) && (!validation)) {
312	0	FUZZ_ZASSERT(roundTripTest_compressSequencesAndLiterals(result, resultCapacity, compressed, compressedCapacity, src, srcSize, seqs, nbSeqs));
313	0	}
314	6.53k	}
315
316	6.53k	cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
317	6.53k	seqs, nbSeqs,
318	6.53k	src, srcSize);
319	6.53k	if ( (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall)
320	6.53k	&& (mode == ZSTD_sf_explicitBlockDelimiters) ) {
321		/* Valid scenario : in explicit delimiter mode,
322		* it might be possible for the compressed size to outgrow dstCapacity.
323		* In which case, it's still a valid fuzzer scenario,
324		* but no roundtrip shall be possible */
325	161	return 0;
326	161	}
327		/* round-trip */
328	6.37k	FUZZ_ZASSERT(cSize);
329	6.37k	dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
330	6.37k	FUZZ_ZASSERT(dSize);
331	6.37k	FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
332	6.37k	FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!");
333	6.37k	return dSize;
334	6.37k	}
335
336		int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
337	6.53k	{
338	6.53k	FUZZ_SEQ_PROD_SETUP();
339
340	6.53k	void* rBuf;
341	6.53k	size_t rBufSize;
342	6.53k	void* cBuf;
343	6.53k	size_t cBufSize;
344	6.53k	size_t generatedSrcSize;
345	6.53k	size_t nbSequences;
346	6.53k	size_t dictSize = 0;
347	6.53k	unsigned hasDict;
348	6.53k	unsigned wLog;
349	6.53k	int cLevel;
350	6.53k	ZSTD_SequenceFormat_e mode;
351
352	6.53k	FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size);
353	6.53k	FUZZ_ASSERT(producer);
354
355	6.53k	if (!cctx) {
356	6.53k	cctx = ZSTD_createCCtx();
357	6.53k	FUZZ_ASSERT(cctx);
358	6.53k	}
359	6.53k	if (!dctx) {
360	6.53k	dctx = ZSTD_createDCtx();
361	6.53k	FUZZ_ASSERT(dctx);
362	6.53k	}
363
364		/* Generate window log first so we don't generate offsets too large */
365	6.53k	wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
366	6.53k	cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
367	6.53k	mode = (ZSTD_SequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);
368
369	6.53k	ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
370	6.53k	ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
371	6.53k	ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
372	6.53k	ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)wLog);
373	6.53k	ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
374	6.53k	ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
375	6.53k	ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, (int)mode);
376	6.53k	ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach);
377
378	6.53k	if (!literalsBuffer) {
379	6.53k	literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
380	6.53k	FUZZ_ASSERT(literalsBuffer);
381	6.53k	literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer);
382	6.53k	}
383
384	6.53k	if (!dictBuffer) { /* Generate global dictionary buffer */
385	1	ZSTD_compressionParameters cParams;
386
387		/* Generate a large dictionary buffer */
388	1	dictBuffer = calloc(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, 1);
389	1	FUZZ_ASSERT(dictBuffer);
390
391		/* Create global cdict and ddict */
392	1	cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
393	1	cParams.minMatch = ZSTD_MINMATCH_MIN;
394	1	cParams.hashLog = ZSTD_HASHLOG_MIN;
395	1	cParams.chainLog = ZSTD_CHAINLOG_MIN;
396
397	1	cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem);
398	1	ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem);
399	1	FUZZ_ASSERT(cdict);
400	1	FUZZ_ASSERT(ddict);
401	1	}
402
403	6.53k	FUZZ_ASSERT(cdict);
404	6.53k	FUZZ_ASSERT(ddict);
405
406	6.53k	hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
407	6.53k	if (hasDict) {
408	2.66k	dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE;
409	2.66k	}
410
411	6.53k	if (!generatedSequences) {
412	6.53k	generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
413	6.53k	}
414	6.53k	if (!generatedSrc) {
415	6.53k	generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
416	6.53k	}
417
418	6.53k	nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode);
419	6.53k	generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode);
420
421		/* Note : in explicit block delimiters mode,
422		* the fuzzer might generate a lot of small blocks.
423		* In which case, the final compressed size might be > ZSTD_compressBound().
424		* This is still a valid scenario fuzzer though, which makes it possible to check under-sized dstCapacity.
425		* The test just doesn't roundtrip. */
426	6.53k	cBufSize = ZSTD_compressBound(generatedSrcSize);
427	6.53k	cBuf = FUZZ_malloc(cBufSize);
428
429	6.53k	rBufSize = generatedSrcSize;
430	6.53k	rBuf = FUZZ_malloc(rBufSize);
431
432	6.53k	{ const size_t result = roundTripTest(rBuf, rBufSize,
433	6.53k	cBuf, cBufSize,
434	6.53k	generatedSrc, generatedSrcSize,
435	6.53k	generatedSequences, nbSequences,
436	6.53k	hasDict, mode);
437	6.53k	FUZZ_ASSERT(result <= generatedSrcSize); /* can be 0 when no round-trip */
438	6.53k	}
439
440	0	free(rBuf);
441	6.53k	free(cBuf);
442	6.53k	FUZZ_dataProducer_free(producer);
443	6.53k	#ifndef STATEFUL_FUZZING
444	6.53k	ZSTD_freeCCtx(cctx); cctx = NULL;
445	6.53k	ZSTD_freeDCtx(dctx); dctx = NULL;
446	6.53k	free(generatedSequences); generatedSequences = NULL;
447	6.53k	free(generatedSrc); generatedSrc = NULL;
448	6.53k	free(literalsBuffer); literalsBuffer = NULL;
449	6.53k	#endif
450	6.53k	FUZZ_SEQ_PROD_TEARDOWN();
451	6.53k	return 0;
452	6.53k	}