Coverage Report

Created: 2025-07-12 06:54

/src/zstd/tests/fuzz/zstd_helpers.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10
11
#define ZSTD_STATIC_LINKING_ONLY
12
#define ZDICT_STATIC_LINKING_ONLY
13
14
#include <string.h>
15
16
#include "zstd_helpers.h"
17
#include "fuzz_helpers.h"
18
#include "zstd.h"
19
#include "zdict.h"
20
#include "sequence_producer.h"
21
#include "fuzz_third_party_seq_prod.h"
22
23
const int kMinClevel = -3;
24
const int kMaxClevel = 19;
25
26
void* FUZZ_seqProdState = NULL;
27
28
static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value)
29
6.83M
{
30
6.83M
    FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value));
31
6.83M
}
32
33
static unsigned produceParamValue(unsigned min, unsigned max,
34
5.26M
                                  FUZZ_dataProducer_t *producer) {
35
5.26M
    return FUZZ_dataProducer_uint32Range(producer, min, max);
36
5.26M
}
37
38
static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min,
39
5.26M
                    unsigned max, FUZZ_dataProducer_t *producer) {
40
5.26M
    unsigned const value = produceParamValue(min, max, producer);
41
5.26M
    set(cctx, param, value);
42
5.26M
}
43
44
ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer)
45
224k
{
46
    /* Select compression parameters */
47
224k
    ZSTD_compressionParameters cParams;
48
224k
    cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15);
49
224k
    cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15);
50
224k
    cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16);
51
224k
    cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9);
52
224k
    cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN,
53
224k
                                          ZSTD_MINMATCH_MAX);
54
224k
    cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512);
55
224k
    cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX);
56
224k
    return ZSTD_adjustCParams(cParams, srcSize, 0);
57
224k
}
58
59
ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer)
60
0
{
61
    /* Select frame parameters */
62
0
    ZSTD_frameParameters fParams;
63
0
    fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
64
0
    fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
65
0
    fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
66
0
    return fParams;
67
0
}
68
69
ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer)
70
0
{
71
0
    ZSTD_parameters params;
72
0
    params.cParams = FUZZ_randomCParams(srcSize, producer);
73
0
    params.fParams = FUZZ_randomFParams(producer);
74
0
    return params;
75
0
}
76
77
4.35k
static void setSequenceProducerParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) {
78
#ifdef FUZZ_THIRD_PARTY_SEQ_PROD
79
    ZSTD_registerSequenceProducer(
80
        cctx,
81
        FUZZ_seqProdState,
82
        FUZZ_thirdPartySeqProd
83
    );
84
#else
85
4.35k
    ZSTD_registerSequenceProducer(
86
4.35k
        cctx,
87
4.35k
        NULL,
88
4.35k
        simpleSequenceProducer
89
4.35k
    );
90
4.35k
#endif
91
92
#ifdef FUZZ_THIRD_PARTY_SEQ_PROD
93
    FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableSeqProducerFallback, 1));
94
#else
95
4.35k
    setRand(cctx, ZSTD_c_enableSeqProducerFallback, 0, 1, producer);
96
4.35k
#endif
97
4.35k
    FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0));
98
4.35k
    FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable));
99
4.35k
}
100
101
void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer)
102
224k
{
103
224k
    ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer);
104
224k
    set(cctx, ZSTD_c_windowLog, cParams.windowLog);
105
224k
    set(cctx, ZSTD_c_hashLog, cParams.hashLog);
106
224k
    set(cctx, ZSTD_c_chainLog, cParams.chainLog);
107
224k
    set(cctx, ZSTD_c_searchLog, cParams.searchLog);
108
224k
    set(cctx, ZSTD_c_minMatch, cParams.minMatch);
109
224k
    set(cctx, ZSTD_c_targetLength, cParams.targetLength);
110
224k
    set(cctx, ZSTD_c_strategy, cParams.strategy);
111
    /* Select frame parameters */
112
224k
    setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer);
113
224k
    setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer);
114
224k
    setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer);
115
    /* Select long distance matching parameters */
116
224k
    setRand(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_auto, ZSTD_ps_disable, producer);
117
224k
    setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer);
118
224k
    setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN,
119
224k
            ZSTD_LDM_MINMATCH_MAX, producer);
120
224k
    setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX,
121
224k
            producer);
122
224k
    setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN,
123
224k
            ZSTD_LDM_HASHRATELOG_MAX, producer);
124
    /* Set misc parameters */
125
#ifndef ZSTD_MULTITHREAD
126
    // To reproduce with or without ZSTD_MULTITHREAD, we are going to use
127
    // the same amount of entropy.
128
    unsigned const nbWorkers_value = produceParamValue(0, 2, producer);
129
    unsigned const rsyncable_value = produceParamValue(0, 1, producer);
130
    (void)nbWorkers_value;
131
    (void)rsyncable_value;
132
    set(cctx, ZSTD_c_nbWorkers, 0);
133
    set(cctx, ZSTD_c_rsyncable, 0);
134
#else
135
224k
    setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer);
136
224k
    setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer);
137
224k
#endif
138
224k
    setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer);
139
224k
    setRand(cctx, ZSTD_c_enableDedicatedDictSearch, 0, 1, producer);
140
224k
    setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
141
224k
    setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
142
224k
    setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
143
224k
    setRand(cctx, ZSTD_c_blockSplitterLevel, 0, ZSTD_BLOCKSPLITTER_LEVEL_MAX, producer);
144
224k
    setRand(cctx, ZSTD_c_splitAfterSequences, 0, 2, producer);
145
224k
    setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
146
224k
    setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
147
224k
    setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer);
148
224k
    setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer);
149
224k
    setRand(cctx, ZSTD_c_repcodeResolution, 0, 2, producer);
150
224k
    if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
151
166k
      setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
152
166k
    }
153
224k
    if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
154
162k
      setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer);
155
162k
    }
156
157
#ifdef FUZZ_THIRD_PARTY_SEQ_PROD
158
    setSequenceProducerParams(cctx, producer);
159
#else
160
224k
    if (FUZZ_dataProducer_uint32Range(producer, 0, 10) == 1) {
161
4.35k
        setSequenceProducerParams(cctx, producer);
162
219k
    } else {
163
219k
        ZSTD_registerSequenceProducer(cctx, NULL, NULL);
164
219k
    }
165
224k
#endif
166
224k
}
167
168
FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer)
169
89.5k
{
170
89.5k
    size_t const dictSize = MAX(srcSize / 8, 1024);
171
89.5k
    size_t const totalSampleSize = dictSize * 11;
172
89.5k
    FUZZ_dict_t dict = { FUZZ_malloc(dictSize), dictSize };
173
89.5k
    char* const samples = (char*)FUZZ_malloc(totalSampleSize);
174
89.5k
    unsigned nbSamples = 100;
175
89.5k
    size_t* const samplesSizes = (size_t*)FUZZ_malloc(sizeof(size_t) * nbSamples);
176
89.5k
    size_t pos = 0;
177
89.5k
    size_t sample = 0;
178
89.5k
    ZDICT_fastCover_params_t params;
179
180
9.04M
    for (sample = 0; sample < nbSamples; ++sample) {
181
8.95M
      size_t const remaining = totalSampleSize - pos;
182
8.95M
      size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1);
183
8.95M
      size_t const limit = MIN(srcSize - offset, remaining);
184
8.95M
      size_t const toCopy = MIN(limit, remaining / (nbSamples - sample));
185
8.95M
      memcpy(samples + pos, (const char*)src + offset, toCopy);
186
8.95M
      pos += toCopy;
187
8.95M
      samplesSizes[sample] = toCopy;
188
8.95M
    }
189
89.5k
    memset(samples + pos, 0, totalSampleSize - pos);
190
191
89.5k
    memset(&params, 0, sizeof(params));
192
89.5k
    params.accel = 5;
193
89.5k
    params.k = 40;
194
89.5k
    params.d = 8;
195
89.5k
    params.f = 14;
196
89.5k
    params.zParams.compressionLevel = 1;
197
89.5k
    dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize,
198
89.5k
        samples, samplesSizes, nbSamples, params);
199
89.5k
    if (ZSTD_isError(dict.size)) {
200
273
        free(dict.buff);
201
273
        memset(&dict, 0, sizeof(dict));
202
273
    }
203
204
89.5k
    free(samplesSizes);
205
89.5k
    free(samples);
206
207
89.5k
    return dict;
208
89.5k
}