/src/zstd/tests/fuzz/zstd_helpers.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under both the BSD-style license (found in the |
6 | | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | | * in the COPYING file in the root directory of this source tree). |
8 | | * You may select, at your option, one of the above-listed licenses. |
9 | | */ |
10 | | |
11 | | #define ZSTD_STATIC_LINKING_ONLY |
12 | | #define ZDICT_STATIC_LINKING_ONLY |
13 | | |
14 | | #include <string.h> |
15 | | |
16 | | #include "zstd_helpers.h" |
17 | | #include "fuzz_helpers.h" |
18 | | #include "zstd.h" |
19 | | #include "zdict.h" |
20 | | #include "sequence_producer.h" |
21 | | #include "fuzz_third_party_seq_prod.h" |
22 | | |
23 | | const int kMinClevel = -3; |
24 | | const int kMaxClevel = 19; |
25 | | |
26 | | void* FUZZ_seqProdState = NULL; |
27 | | |
28 | | static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value) |
29 | 6.83M | { |
30 | 6.83M | FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value)); |
31 | 6.83M | } |
32 | | |
33 | | static unsigned produceParamValue(unsigned min, unsigned max, |
34 | 5.26M | FUZZ_dataProducer_t *producer) { |
35 | 5.26M | return FUZZ_dataProducer_uint32Range(producer, min, max); |
36 | 5.26M | } |
37 | | |
38 | | static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min, |
39 | 5.26M | unsigned max, FUZZ_dataProducer_t *producer) { |
40 | 5.26M | unsigned const value = produceParamValue(min, max, producer); |
41 | 5.26M | set(cctx, param, value); |
42 | 5.26M | } |
43 | | |
44 | | ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer) |
45 | 224k | { |
46 | | /* Select compression parameters */ |
47 | 224k | ZSTD_compressionParameters cParams; |
48 | 224k | cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15); |
49 | 224k | cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15); |
50 | 224k | cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16); |
51 | 224k | cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9); |
52 | 224k | cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, |
53 | 224k | ZSTD_MINMATCH_MAX); |
54 | 224k | cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512); |
55 | 224k | cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX); |
56 | 224k | return ZSTD_adjustCParams(cParams, srcSize, 0); |
57 | 224k | } |
58 | | |
59 | | ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer) |
60 | 0 | { |
61 | | /* Select frame parameters */ |
62 | 0 | ZSTD_frameParameters fParams; |
63 | 0 | fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); |
64 | 0 | fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); |
65 | 0 | fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); |
66 | 0 | return fParams; |
67 | 0 | } |
68 | | |
69 | | ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer) |
70 | 0 | { |
71 | 0 | ZSTD_parameters params; |
72 | 0 | params.cParams = FUZZ_randomCParams(srcSize, producer); |
73 | 0 | params.fParams = FUZZ_randomFParams(producer); |
74 | 0 | return params; |
75 | 0 | } |
76 | | |
77 | 4.35k | static void setSequenceProducerParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) { |
78 | | #ifdef FUZZ_THIRD_PARTY_SEQ_PROD |
79 | | ZSTD_registerSequenceProducer( |
80 | | cctx, |
81 | | FUZZ_seqProdState, |
82 | | FUZZ_thirdPartySeqProd |
83 | | ); |
84 | | #else |
85 | 4.35k | ZSTD_registerSequenceProducer( |
86 | 4.35k | cctx, |
87 | 4.35k | NULL, |
88 | 4.35k | simpleSequenceProducer |
89 | 4.35k | ); |
90 | 4.35k | #endif |
91 | | |
92 | | #ifdef FUZZ_THIRD_PARTY_SEQ_PROD |
93 | | FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableSeqProducerFallback, 1)); |
94 | | #else |
95 | 4.35k | setRand(cctx, ZSTD_c_enableSeqProducerFallback, 0, 1, producer); |
96 | 4.35k | #endif |
97 | 4.35k | FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0)); |
98 | 4.35k | FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable)); |
99 | 4.35k | } |
100 | | |
101 | | void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer) |
102 | 224k | { |
103 | 224k | ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer); |
104 | 224k | set(cctx, ZSTD_c_windowLog, cParams.windowLog); |
105 | 224k | set(cctx, ZSTD_c_hashLog, cParams.hashLog); |
106 | 224k | set(cctx, ZSTD_c_chainLog, cParams.chainLog); |
107 | 224k | set(cctx, ZSTD_c_searchLog, cParams.searchLog); |
108 | 224k | set(cctx, ZSTD_c_minMatch, cParams.minMatch); |
109 | 224k | set(cctx, ZSTD_c_targetLength, cParams.targetLength); |
110 | 224k | set(cctx, ZSTD_c_strategy, cParams.strategy); |
111 | | /* Select frame parameters */ |
112 | 224k | setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer); |
113 | 224k | setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer); |
114 | 224k | setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer); |
115 | | /* Select long distance matching parameters */ |
116 | 224k | setRand(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_auto, ZSTD_ps_disable, producer); |
117 | 224k | setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer); |
118 | 224k | setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN, |
119 | 224k | ZSTD_LDM_MINMATCH_MAX, producer); |
120 | 224k | setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX, |
121 | 224k | producer); |
122 | 224k | setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN, |
123 | 224k | ZSTD_LDM_HASHRATELOG_MAX, producer); |
124 | | /* Set misc parameters */ |
125 | | #ifndef ZSTD_MULTITHREAD |
126 | | // To reproduce with or without ZSTD_MULTITHREAD, we are going to use |
127 | | // the same amount of entropy. |
128 | | unsigned const nbWorkers_value = produceParamValue(0, 2, producer); |
129 | | unsigned const rsyncable_value = produceParamValue(0, 1, producer); |
130 | | (void)nbWorkers_value; |
131 | | (void)rsyncable_value; |
132 | | set(cctx, ZSTD_c_nbWorkers, 0); |
133 | | set(cctx, ZSTD_c_rsyncable, 0); |
134 | | #else |
135 | 224k | setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer); |
136 | 224k | setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer); |
137 | 224k | #endif |
138 | 224k | setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer); |
139 | 224k | setRand(cctx, ZSTD_c_enableDedicatedDictSearch, 0, 1, producer); |
140 | 224k | setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer); |
141 | 224k | setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer); |
142 | 224k | setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer); |
143 | 224k | setRand(cctx, ZSTD_c_blockSplitterLevel, 0, ZSTD_BLOCKSPLITTER_LEVEL_MAX, producer); |
144 | 224k | setRand(cctx, ZSTD_c_splitAfterSequences, 0, 2, producer); |
145 | 224k | setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer); |
146 | 224k | setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer); |
147 | 224k | setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer); |
148 | 224k | setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer); |
149 | 224k | setRand(cctx, ZSTD_c_repcodeResolution, 0, 2, producer); |
150 | 224k | if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { |
151 | 166k | setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); |
152 | 166k | } |
153 | 224k | if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { |
154 | 162k | setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer); |
155 | 162k | } |
156 | | |
157 | | #ifdef FUZZ_THIRD_PARTY_SEQ_PROD |
158 | | setSequenceProducerParams(cctx, producer); |
159 | | #else |
160 | 224k | if (FUZZ_dataProducer_uint32Range(producer, 0, 10) == 1) { |
161 | 4.35k | setSequenceProducerParams(cctx, producer); |
162 | 219k | } else { |
163 | 219k | ZSTD_registerSequenceProducer(cctx, NULL, NULL); |
164 | 219k | } |
165 | 224k | #endif |
166 | 224k | } |
167 | | |
168 | | FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer) |
169 | 89.5k | { |
170 | 89.5k | size_t const dictSize = MAX(srcSize / 8, 1024); |
171 | 89.5k | size_t const totalSampleSize = dictSize * 11; |
172 | 89.5k | FUZZ_dict_t dict = { FUZZ_malloc(dictSize), dictSize }; |
173 | 89.5k | char* const samples = (char*)FUZZ_malloc(totalSampleSize); |
174 | 89.5k | unsigned nbSamples = 100; |
175 | 89.5k | size_t* const samplesSizes = (size_t*)FUZZ_malloc(sizeof(size_t) * nbSamples); |
176 | 89.5k | size_t pos = 0; |
177 | 89.5k | size_t sample = 0; |
178 | 89.5k | ZDICT_fastCover_params_t params; |
179 | | |
180 | 9.04M | for (sample = 0; sample < nbSamples; ++sample) { |
181 | 8.95M | size_t const remaining = totalSampleSize - pos; |
182 | 8.95M | size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1); |
183 | 8.95M | size_t const limit = MIN(srcSize - offset, remaining); |
184 | 8.95M | size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); |
185 | 8.95M | memcpy(samples + pos, (const char*)src + offset, toCopy); |
186 | 8.95M | pos += toCopy; |
187 | 8.95M | samplesSizes[sample] = toCopy; |
188 | 8.95M | } |
189 | 89.5k | memset(samples + pos, 0, totalSampleSize - pos); |
190 | | |
191 | 89.5k | memset(¶ms, 0, sizeof(params)); |
192 | 89.5k | params.accel = 5; |
193 | 89.5k | params.k = 40; |
194 | 89.5k | params.d = 8; |
195 | 89.5k | params.f = 14; |
196 | 89.5k | params.zParams.compressionLevel = 1; |
197 | 89.5k | dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize, |
198 | 89.5k | samples, samplesSizes, nbSamples, params); |
199 | 89.5k | if (ZSTD_isError(dict.size)) { |
200 | 273 | free(dict.buff); |
201 | 273 | memset(&dict, 0, sizeof(dict)); |
202 | 273 | } |
203 | | |
204 | 89.5k | free(samplesSizes); |
205 | 89.5k | free(samples); |
206 | | |
207 | 89.5k | return dict; |
208 | 89.5k | } |