/src/zstd-rs/zstd-safe/zstd-sys/zstd/lib/dictBuilder/fastcover.c
Line  | Count  | Source  | 
1  |  | /*  | 
2  |  |  * Copyright (c) Meta Platforms, Inc. and affiliates.  | 
3  |  |  * All rights reserved.  | 
4  |  |  *  | 
5  |  |  * This source code is licensed under both the BSD-style license (found in the  | 
6  |  |  * LICENSE file in the root directory of this source tree) and the GPLv2 (found  | 
7  |  |  * in the COPYING file in the root directory of this source tree).  | 
8  |  |  * You may select, at your option, one of the above-listed licenses.  | 
9  |  |  */  | 
10  |  |  | 
11  |  | /*-*************************************  | 
12  |  | *  Dependencies  | 
13  |  | ***************************************/  | 
14  |  | #include <stdio.h>  /* fprintf */  | 
15  |  | #include <stdlib.h> /* malloc, free, qsort */  | 
16  |  | #include <string.h> /* memset */  | 
17  |  | #include <time.h>   /* clock */  | 
18  |  |  | 
19  |  | #ifndef ZDICT_STATIC_LINKING_ONLY  | 
20  |  | #  define ZDICT_STATIC_LINKING_ONLY  | 
21  |  | #endif  | 
22  |  |  | 
23  |  | #include "../common/mem.h" /* read */  | 
24  |  | #include "../common/pool.h"  | 
25  |  | #include "../common/threading.h"  | 
26  |  | #include "../common/zstd_internal.h" /* includes zstd.h */  | 
27  |  | #include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */  | 
28  |  | #include "../zdict.h"  | 
29  |  | #include "cover.h"  | 
30  |  |  | 
31  |  |  | 
32  |  | /*-*************************************  | 
33  |  | *  Constants  | 
34  |  | ***************************************/  | 
35  |  | /**  | 
36  |  | * There are 32bit indexes used to ref samples, so limit samples size to 4GB  | 
37  |  | * on 64bit builds.  | 
38  |  | * For 32bit builds we choose 1 GB.  | 
39  |  | * Most 32bit platforms have 2GB user-mode addressable space and we allocate a large  | 
40  |  | * contiguous buffer, so 1GB is already a high limit.  | 
41  |  | */  | 
42  | 0  | #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))  | 
43  | 0  | #define FASTCOVER_MAX_F 31  | 
44  | 0  | #define FASTCOVER_MAX_ACCEL 10  | 
45  | 0  | #define FASTCOVER_DEFAULT_SPLITPOINT 0.75  | 
46  | 0  | #define DEFAULT_F 20  | 
47  | 0  | #define DEFAULT_ACCEL 1  | 
48  |  |  | 
49  |  |  | 
50  |  | /*-*************************************  | 
51  |  | *  Console display  | 
52  |  | ***************************************/  | 
53  |  | #ifndef LOCALDISPLAYLEVEL  | 
54  |  | static int g_displayLevel = 0;  | 
55  |  | #endif  | 
56  |  | #undef  DISPLAY  | 
57  |  | #define DISPLAY(...)                                                           \  | 
58  | 0  |   {                                                                            \ | 
59  | 0  |     fprintf(stderr, __VA_ARGS__);                                              \  | 
60  | 0  |     fflush(stderr);                                                            \  | 
61  | 0  |   }  | 
62  |  | #undef  LOCALDISPLAYLEVEL  | 
63  |  | #define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \  | 
64  | 0  |   if (displayLevel >= l) {                                                     \ | 
65  | 0  |     DISPLAY(__VA_ARGS__);                                                      \  | 
66  | 0  |   } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */  | 
67  |  | #undef  DISPLAYLEVEL  | 
68  | 0  | #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)  | 
69  |  |  | 
70  |  | #ifndef LOCALDISPLAYUPDATE  | 
71  |  | static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;  | 
72  |  | static clock_t g_time = 0;  | 
73  |  | #endif  | 
74  |  | #undef  LOCALDISPLAYUPDATE  | 
75  |  | #define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \  | 
76  | 0  |   if (displayLevel >= l) {                                                     \ | 
77  | 0  |     if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) {             \ | 
78  | 0  |       g_time = clock();                                                        \  | 
79  | 0  |       DISPLAY(__VA_ARGS__);                                                    \  | 
80  | 0  |     }                                                                          \  | 
81  | 0  |   }  | 
82  |  | #undef  DISPLAYUPDATE  | 
83  | 0  | #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)  | 
84  |  |  | 
85  |  |  | 
86  |  | /*-*************************************  | 
87  |  | * Hash Functions  | 
88  |  | ***************************************/  | 
89  |  | /**  | 
90  |  |  * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector  | 
91  |  |  */  | 
92  | 0  | static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) { | 
93  | 0  |   if (d == 6) { | 
94  | 0  |     return ZSTD_hash6Ptr(p, f);  | 
95  | 0  |   }  | 
96  | 0  |   return ZSTD_hash8Ptr(p, f);  | 
97  | 0  | }  | 
98  |  |  | 
99  |  |  | 
100  |  | /*-*************************************  | 
101  |  | * Acceleration  | 
102  |  | ***************************************/  | 
103  |  | typedef struct { | 
104  |  |   unsigned finalize;    /* Percentage of training samples used for ZDICT_finalizeDictionary */  | 
105  |  |   unsigned skip;        /* Number of dmer skipped between each dmer counted in computeFrequency */  | 
106  |  | } FASTCOVER_accel_t;  | 
107  |  |  | 
108  |  |  | 
109  |  | static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = { | 
110  |  |   { 100, 0 },   /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */ | 
111  |  |   { 100, 0 },   /* accel = 1 */ | 
112  |  |   { 50, 1 },   /* accel = 2 */ | 
113  |  |   { 34, 2 },   /* accel = 3 */ | 
114  |  |   { 25, 3 },   /* accel = 4 */ | 
115  |  |   { 20, 4 },   /* accel = 5 */ | 
116  |  |   { 17, 5 },   /* accel = 6 */ | 
117  |  |   { 14, 6 },   /* accel = 7 */ | 
118  |  |   { 13, 7 },   /* accel = 8 */ | 
119  |  |   { 11, 8 },   /* accel = 9 */ | 
120  |  |   { 10, 9 },   /* accel = 10 */ | 
121  |  | };  | 
122  |  |  | 
123  |  |  | 
124  |  | /*-*************************************  | 
125  |  | * Context  | 
126  |  | ***************************************/  | 
127  |  | typedef struct { | 
128  |  |   const BYTE *samples;  | 
129  |  |   size_t *offsets;  | 
130  |  |   const size_t *samplesSizes;  | 
131  |  |   size_t nbSamples;  | 
132  |  |   size_t nbTrainSamples;  | 
133  |  |   size_t nbTestSamples;  | 
134  |  |   size_t nbDmers;  | 
135  |  |   U32 *freqs;  | 
136  |  |   unsigned d;  | 
137  |  |   unsigned f;  | 
138  |  |   FASTCOVER_accel_t accelParams;  | 
139  |  | } FASTCOVER_ctx_t;  | 
140  |  |  | 
141  |  |  | 
142  |  | /*-*************************************  | 
143  |  | *  Helper functions  | 
144  |  | ***************************************/  | 
145  |  | /**  | 
146  |  |  * Selects the best segment in an epoch.  | 
147  |  |  * Segments of are scored according to the function:  | 
148  |  |  *  | 
149  |  |  * Let F(d) be the frequency of all dmers with hash value d.  | 
150  |  |  * Let S_i be hash value of the dmer at position i of segment S which has length k.  | 
151  |  |  *  | 
152  |  |  *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) | 
153  |  |  *  | 
154  |  |  * Once the dmer with hash value d is in the dictionary we set F(d) = 0.  | 
155  |  |  */  | 
156  |  | static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,  | 
157  |  |                                               U32 *freqs, U32 begin, U32 end,  | 
158  |  |                                               ZDICT_cover_params_t parameters,  | 
159  | 0  |                                               U16* segmentFreqs) { | 
160  |  |   /* Constants */  | 
161  | 0  |   const U32 k = parameters.k;  | 
162  | 0  |   const U32 d = parameters.d;  | 
163  | 0  |   const U32 f = ctx->f;  | 
164  | 0  |   const U32 dmersInK = k - d + 1;  | 
165  |  |  | 
166  |  |   /* Try each segment (activeSegment) and save the best (bestSegment) */  | 
167  | 0  |   COVER_segment_t bestSegment = {0, 0, 0}; | 
168  | 0  |   COVER_segment_t activeSegment;  | 
169  |  |  | 
170  |  |   /* Reset the activeDmers in the segment */  | 
171  |  |   /* The activeSegment starts at the beginning of the epoch. */  | 
172  | 0  |   activeSegment.begin = begin;  | 
173  | 0  |   activeSegment.end = begin;  | 
174  | 0  |   activeSegment.score = 0;  | 
175  |  |  | 
176  |  |   /* Slide the activeSegment through the whole epoch.  | 
177  |  |    * Save the best segment in bestSegment.  | 
178  |  |    */  | 
179  | 0  |   while (activeSegment.end < end) { | 
180  |  |     /* Get hash value of current dmer */  | 
181  | 0  |     const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);  | 
182  |  |  | 
183  |  |     /* Add frequency of this index to score if this is the first occurrence of index in active segment */  | 
184  | 0  |     if (segmentFreqs[idx] == 0) { | 
185  | 0  |       activeSegment.score += freqs[idx];  | 
186  | 0  |     }  | 
187  |  |     /* Increment end of segment and segmentFreqs*/  | 
188  | 0  |     activeSegment.end += 1;  | 
189  | 0  |     segmentFreqs[idx] += 1;  | 
190  |  |     /* If the window is now too large, drop the first position */  | 
191  | 0  |     if (activeSegment.end - activeSegment.begin == dmersInK + 1) { | 
192  |  |       /* Get hash value of the dmer to be eliminated from active segment */  | 
193  | 0  |       const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);  | 
194  | 0  |       segmentFreqs[delIndex] -= 1;  | 
195  |  |       /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */  | 
196  | 0  |       if (segmentFreqs[delIndex] == 0) { | 
197  | 0  |         activeSegment.score -= freqs[delIndex];  | 
198  | 0  |       }  | 
199  |  |       /* Increment start of segment */  | 
200  | 0  |       activeSegment.begin += 1;  | 
201  | 0  |     }  | 
202  |  |  | 
203  |  |     /* If this segment is the best so far save it */  | 
204  | 0  |     if (activeSegment.score > bestSegment.score) { | 
205  | 0  |       bestSegment = activeSegment;  | 
206  | 0  |     }  | 
207  | 0  |   }  | 
208  |  |  | 
209  |  |   /* Zero out rest of segmentFreqs array */  | 
210  | 0  |   while (activeSegment.begin < end) { | 
211  | 0  |     const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);  | 
212  | 0  |     segmentFreqs[delIndex] -= 1;  | 
213  | 0  |     activeSegment.begin += 1;  | 
214  | 0  |   }  | 
215  |  | 
  | 
216  | 0  |   { | 
217  |  |     /*  Zero the frequency of hash value of each dmer covered by the chosen segment. */  | 
218  | 0  |     U32 pos;  | 
219  | 0  |     for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { | 
220  | 0  |       const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d);  | 
221  | 0  |       freqs[i] = 0;  | 
222  | 0  |     }  | 
223  | 0  |   }  | 
224  |  | 
  | 
225  | 0  |   return bestSegment;  | 
226  | 0  | }  | 
227  |  |  | 
228  |  |  | 
229  |  | static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters,  | 
230  |  |                                      size_t maxDictSize, unsigned f,  | 
231  | 0  |                                      unsigned accel) { | 
232  |  |   /* k, d, and f are required parameters */  | 
233  | 0  |   if (parameters.d == 0 || parameters.k == 0) { | 
234  | 0  |     return 0;  | 
235  | 0  |   }  | 
236  |  |   /* d has to be 6 or 8 */  | 
237  | 0  |   if (parameters.d != 6 && parameters.d != 8) { | 
238  | 0  |     return 0;  | 
239  | 0  |   }  | 
240  |  |   /* k <= maxDictSize */  | 
241  | 0  |   if (parameters.k > maxDictSize) { | 
242  | 0  |     return 0;  | 
243  | 0  |   }  | 
244  |  |   /* d <= k */  | 
245  | 0  |   if (parameters.d > parameters.k) { | 
246  | 0  |     return 0;  | 
247  | 0  |   }  | 
248  |  |   /* 0 < f <= FASTCOVER_MAX_F*/  | 
249  | 0  |   if (f > FASTCOVER_MAX_F || f == 0) { | 
250  | 0  |     return 0;  | 
251  | 0  |   }  | 
252  |  |   /* 0 < splitPoint <= 1 */  | 
253  | 0  |   if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) { | 
254  | 0  |     return 0;  | 
255  | 0  |   }  | 
256  |  |   /* 0 < accel <= 10 */  | 
257  | 0  |   if (accel > 10 || accel == 0) { | 
258  | 0  |     return 0;  | 
259  | 0  |   }  | 
260  | 0  |   return 1;  | 
261  | 0  | }  | 
262  |  |  | 
263  |  |  | 
264  |  | /**  | 
265  |  |  * Clean up a context initialized with `FASTCOVER_ctx_init()`.  | 
266  |  |  */  | 
267  |  | static void  | 
268  |  | FASTCOVER_ctx_destroy(FASTCOVER_ctx_t* ctx)  | 
269  | 0  | { | 
270  | 0  |     if (!ctx) return;  | 
271  |  |  | 
272  | 0  |     free(ctx->freqs);  | 
273  | 0  |     ctx->freqs = NULL;  | 
274  |  | 
  | 
275  | 0  |     free(ctx->offsets);  | 
276  | 0  |     ctx->offsets = NULL;  | 
277  | 0  | }  | 
278  |  |  | 
279  |  |  | 
280  |  | /**  | 
281  |  |  * Calculate for frequency of hash value of each dmer in ctx->samples  | 
282  |  |  */  | 
283  |  | static void  | 
284  |  | FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)  | 
285  | 0  | { | 
286  | 0  |     const unsigned f = ctx->f;  | 
287  | 0  |     const unsigned d = ctx->d;  | 
288  | 0  |     const unsigned skip = ctx->accelParams.skip;  | 
289  | 0  |     const unsigned readLength = MAX(d, 8);  | 
290  | 0  |     size_t i;  | 
291  | 0  |     assert(ctx->nbTrainSamples >= 5);  | 
292  | 0  |     assert(ctx->nbTrainSamples <= ctx->nbSamples);  | 
293  | 0  |     for (i = 0; i < ctx->nbTrainSamples; i++) { | 
294  | 0  |         size_t start = ctx->offsets[i];  /* start of current dmer */  | 
295  | 0  |         size_t const currSampleEnd = ctx->offsets[i+1];  | 
296  | 0  |         while (start + readLength <= currSampleEnd) { | 
297  | 0  |             const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d);  | 
298  | 0  |             freqs[dmerIndex]++;  | 
299  | 0  |             start = start + skip + 1;  | 
300  | 0  |         }  | 
301  | 0  |     }  | 
302  | 0  | }  | 
303  |  |  | 
304  |  |  | 
305  |  | /**  | 
306  |  |  * Prepare a context for dictionary building.  | 
307  |  |  * The context is only dependent on the parameter `d` and can be used multiple  | 
308  |  |  * times.  | 
309  |  |  * Returns 0 on success or error code on error.  | 
310  |  |  * The context must be destroyed with `FASTCOVER_ctx_destroy()`.  | 
311  |  |  */  | 
312  |  | static size_t  | 
313  |  | FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,  | 
314  |  |                    const void* samplesBuffer,  | 
315  |  |                    const size_t* samplesSizes, unsigned nbSamples,  | 
316  |  |                    unsigned d, double splitPoint, unsigned f,  | 
317  |  |                    FASTCOVER_accel_t accelParams)  | 
318  | 0  | { | 
319  | 0  |     const BYTE* const samples = (const BYTE*)samplesBuffer;  | 
320  | 0  |     const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);  | 
321  |  |     /* Split samples into testing and training sets */  | 
322  | 0  |     const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;  | 
323  | 0  |     const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;  | 
324  | 0  |     const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;  | 
325  | 0  |     const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;  | 
326  |  |  | 
327  |  |     /* Checks */  | 
328  | 0  |     if (totalSamplesSize < MAX(d, sizeof(U64)) ||  | 
329  | 0  |         totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { | 
330  | 0  |         DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",  | 
331  | 0  |                     (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));  | 
332  | 0  |         return ERROR(srcSize_wrong);  | 
333  | 0  |     }  | 
334  |  |  | 
335  |  |     /* Check if there are at least 5 training samples */  | 
336  | 0  |     if (nbTrainSamples < 5) { | 
337  | 0  |         DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);  | 
338  | 0  |         return ERROR(srcSize_wrong);  | 
339  | 0  |     }  | 
340  |  |  | 
341  |  |     /* Check if there's testing sample */  | 
342  | 0  |     if (nbTestSamples < 1) { | 
343  | 0  |         DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);  | 
344  | 0  |         return ERROR(srcSize_wrong);  | 
345  | 0  |     }  | 
346  |  |  | 
347  |  |     /* Zero the context */  | 
348  | 0  |     memset(ctx, 0, sizeof(*ctx));  | 
349  | 0  |     DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,  | 
350  | 0  |                     (unsigned)trainingSamplesSize);  | 
351  | 0  |     DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,  | 
352  | 0  |                     (unsigned)testSamplesSize);  | 
353  |  | 
  | 
354  | 0  |     ctx->samples = samples;  | 
355  | 0  |     ctx->samplesSizes = samplesSizes;  | 
356  | 0  |     ctx->nbSamples = nbSamples;  | 
357  | 0  |     ctx->nbTrainSamples = nbTrainSamples;  | 
358  | 0  |     ctx->nbTestSamples = nbTestSamples;  | 
359  | 0  |     ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;  | 
360  | 0  |     ctx->d = d;  | 
361  | 0  |     ctx->f = f;  | 
362  | 0  |     ctx->accelParams = accelParams;  | 
363  |  |  | 
364  |  |     /* The offsets of each file */  | 
365  | 0  |     ctx->offsets = (size_t*)calloc((nbSamples + 1), sizeof(size_t));  | 
366  | 0  |     if (ctx->offsets == NULL) { | 
367  | 0  |         DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");  | 
368  | 0  |         FASTCOVER_ctx_destroy(ctx);  | 
369  | 0  |         return ERROR(memory_allocation);  | 
370  | 0  |     }  | 
371  |  |  | 
372  |  |     /* Fill offsets from the samplesSizes */  | 
373  | 0  |     {   U32 i; | 
374  | 0  |         ctx->offsets[0] = 0;  | 
375  | 0  |         assert(nbSamples >= 5);  | 
376  | 0  |         for (i = 1; i <= nbSamples; ++i) { | 
377  | 0  |             ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];  | 
378  | 0  |         }  | 
379  | 0  |     }  | 
380  |  |  | 
381  |  |     /* Initialize frequency array of size 2^f */  | 
382  | 0  |     ctx->freqs = (U32*)calloc(((U64)1 << f), sizeof(U32));  | 
383  | 0  |     if (ctx->freqs == NULL) { | 
384  | 0  |         DISPLAYLEVEL(1, "Failed to allocate frequency table \n");  | 
385  | 0  |         FASTCOVER_ctx_destroy(ctx);  | 
386  | 0  |         return ERROR(memory_allocation);  | 
387  | 0  |     }  | 
388  |  |  | 
389  | 0  |     DISPLAYLEVEL(2, "Computing frequencies\n");  | 
390  | 0  |     FASTCOVER_computeFrequency(ctx->freqs, ctx);  | 
391  |  | 
  | 
392  | 0  |     return 0;  | 
393  | 0  | }  | 
394  |  |  | 
395  |  |  | 
396  |  | /**  | 
397  |  |  * Given the prepared context build the dictionary.  | 
398  |  |  */  | 
399  |  | static size_t  | 
400  |  | FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,  | 
401  |  |                           U32* freqs,  | 
402  |  |                           void* dictBuffer, size_t dictBufferCapacity,  | 
403  |  |                           ZDICT_cover_params_t parameters,  | 
404  |  |                           U16* segmentFreqs)  | 
405  | 0  | { | 
406  | 0  |   BYTE *const dict = (BYTE *)dictBuffer;  | 
407  | 0  |   size_t tail = dictBufferCapacity;  | 
408  |  |   /* Divide the data into epochs. We will select one segment from each epoch. */  | 
409  | 0  |   const COVER_epoch_info_t epochs = COVER_computeEpochs(  | 
410  | 0  |       (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1);  | 
411  | 0  |   const size_t maxZeroScoreRun = 10;  | 
412  | 0  |   size_t zeroScoreRun = 0;  | 
413  | 0  |   size_t epoch;  | 
414  | 0  |   DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",  | 
415  | 0  |                 (U32)epochs.num, (U32)epochs.size);  | 
416  |  |   /* Loop through the epochs until there are no more segments or the dictionary  | 
417  |  |    * is full.  | 
418  |  |    */  | 
419  | 0  |   for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { | 
420  | 0  |     const U32 epochBegin = (U32)(epoch * epochs.size);  | 
421  | 0  |     const U32 epochEnd = epochBegin + epochs.size;  | 
422  | 0  |     size_t segmentSize;  | 
423  |  |     /* Select a segment */  | 
424  | 0  |     COVER_segment_t segment = FASTCOVER_selectSegment(  | 
425  | 0  |         ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);  | 
426  |  |  | 
427  |  |     /* If the segment covers no dmers, then we are out of content.  | 
428  |  |      * There may be new content in other epochs, for continue for some time.  | 
429  |  |      */  | 
430  | 0  |     if (segment.score == 0) { | 
431  | 0  |       if (++zeroScoreRun >= maxZeroScoreRun) { | 
432  | 0  |           break;  | 
433  | 0  |       }  | 
434  | 0  |       continue;  | 
435  | 0  |     }  | 
436  | 0  |     zeroScoreRun = 0;  | 
437  |  |  | 
438  |  |     /* Trim the segment if necessary and if it is too small then we are done */  | 
439  | 0  |     segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);  | 
440  | 0  |     if (segmentSize < parameters.d) { | 
441  | 0  |       break;  | 
442  | 0  |     }  | 
443  |  |  | 
444  |  |     /* We fill the dictionary from the back to allow the best segments to be  | 
445  |  |      * referenced with the smallest offsets.  | 
446  |  |      */  | 
447  | 0  |     tail -= segmentSize;  | 
448  | 0  |     memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);  | 
449  | 0  |     DISPLAYUPDATE(  | 
450  | 0  |         2, "\r%u%%       ",  | 
451  | 0  |         (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));  | 
452  | 0  |   }  | 
453  | 0  |   DISPLAYLEVEL(2, "\r%79s\r", "");  | 
454  | 0  |   return tail;  | 
455  | 0  | }  | 
456  |  |  | 
457  |  | /**  | 
458  |  |  * Parameters for FASTCOVER_tryParameters().  | 
459  |  |  */  | 
460  |  | typedef struct FASTCOVER_tryParameters_data_s { | 
461  |  |     const FASTCOVER_ctx_t* ctx;  | 
462  |  |     COVER_best_t* best;  | 
463  |  |     size_t dictBufferCapacity;  | 
464  |  |     ZDICT_cover_params_t parameters;  | 
465  |  | } FASTCOVER_tryParameters_data_t;  | 
466  |  |  | 
467  |  |  | 
468  |  | /**  | 
469  |  |  * Tries a set of parameters and updates the COVER_best_t with the results.  | 
470  |  |  * This function is thread safe if zstd is compiled with multithreaded support.  | 
471  |  |  * It takes its parameters as an *OWNING* opaque pointer to support threading.  | 
472  |  |  */  | 
473  |  | static void FASTCOVER_tryParameters(void* opaque)  | 
474  | 0  | { | 
475  |  |   /* Save parameters as local variables */  | 
476  | 0  |   FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;  | 
477  | 0  |   const FASTCOVER_ctx_t *const ctx = data->ctx;  | 
478  | 0  |   const ZDICT_cover_params_t parameters = data->parameters;  | 
479  | 0  |   size_t dictBufferCapacity = data->dictBufferCapacity;  | 
480  | 0  |   size_t totalCompressedSize = ERROR(GENERIC);  | 
481  |  |   /* Initialize array to keep track of frequency of dmer within activeSegment */  | 
482  | 0  |   U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));  | 
483  |  |   /* Allocate space for hash table, dict, and freqs */  | 
484  | 0  |   BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);  | 
485  | 0  |   COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));  | 
486  | 0  |   U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));  | 
487  | 0  |   if (!segmentFreqs || !dict || !freqs) { | 
488  | 0  |     DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");  | 
489  | 0  |     goto _cleanup;  | 
490  | 0  |   }  | 
491  |  |   /* Copy the frequencies because we need to modify them */  | 
492  | 0  |   memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));  | 
493  |  |   /* Build the dictionary */  | 
494  | 0  |   { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, | 
495  | 0  |                                                     parameters, segmentFreqs);  | 
496  |  | 
  | 
497  | 0  |     const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);  | 
498  | 0  |     selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,  | 
499  | 0  |          ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,  | 
500  | 0  |          totalCompressedSize);  | 
501  |  | 
  | 
502  | 0  |     if (COVER_dictSelectionIsError(selection)) { | 
503  | 0  |       DISPLAYLEVEL(1, "Failed to select dictionary\n");  | 
504  | 0  |       goto _cleanup;  | 
505  | 0  |     }  | 
506  | 0  |   }  | 
507  | 0  | _cleanup:  | 
508  | 0  |   free(dict);  | 
509  | 0  |   COVER_best_finish(data->best, parameters, selection);  | 
510  | 0  |   free(data);  | 
511  | 0  |   free(segmentFreqs);  | 
512  | 0  |   COVER_dictSelectionFree(selection);  | 
513  | 0  |   free(freqs);  | 
514  | 0  | }  | 
515  |  |  | 
516  |  |  | 
517  |  | static void  | 
518  |  | FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,  | 
519  |  |                                ZDICT_cover_params_t* coverParams)  | 
520  | 0  | { | 
521  | 0  |     coverParams->k = fastCoverParams.k;  | 
522  | 0  |     coverParams->d = fastCoverParams.d;  | 
523  | 0  |     coverParams->steps = fastCoverParams.steps;  | 
524  | 0  |     coverParams->nbThreads = fastCoverParams.nbThreads;  | 
525  | 0  |     coverParams->splitPoint = fastCoverParams.splitPoint;  | 
526  | 0  |     coverParams->zParams = fastCoverParams.zParams;  | 
527  | 0  |     coverParams->shrinkDict = fastCoverParams.shrinkDict;  | 
528  | 0  | }  | 
529  |  |  | 
530  |  |  | 
531  |  | static void  | 
532  |  | FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,  | 
533  |  |                                    ZDICT_fastCover_params_t* fastCoverParams,  | 
534  |  |                                    unsigned f, unsigned accel)  | 
535  | 0  | { | 
536  | 0  |     fastCoverParams->k = coverParams.k;  | 
537  | 0  |     fastCoverParams->d = coverParams.d;  | 
538  | 0  |     fastCoverParams->steps = coverParams.steps;  | 
539  | 0  |     fastCoverParams->nbThreads = coverParams.nbThreads;  | 
540  | 0  |     fastCoverParams->splitPoint = coverParams.splitPoint;  | 
541  | 0  |     fastCoverParams->f = f;  | 
542  | 0  |     fastCoverParams->accel = accel;  | 
543  | 0  |     fastCoverParams->zParams = coverParams.zParams;  | 
544  | 0  |     fastCoverParams->shrinkDict = coverParams.shrinkDict;  | 
545  | 0  | }  | 
546  |  |  | 
547  |  |  | 
548  |  | ZDICTLIB_STATIC_API size_t  | 
549  |  | ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,  | 
550  |  |                                 const void* samplesBuffer,  | 
551  |  |                                 const size_t* samplesSizes, unsigned nbSamples,  | 
552  |  |                                 ZDICT_fastCover_params_t parameters)  | 
553  | 0  | { | 
554  | 0  |     BYTE* const dict = (BYTE*)dictBuffer;  | 
555  | 0  |     FASTCOVER_ctx_t ctx;  | 
556  | 0  |     ZDICT_cover_params_t coverParams;  | 
557  | 0  |     FASTCOVER_accel_t accelParams;  | 
558  |  |     /* Initialize global data */  | 
559  | 0  |     g_displayLevel = (int)parameters.zParams.notificationLevel;  | 
560  |  |     /* Assign splitPoint and f if not provided */  | 
561  | 0  |     parameters.splitPoint = 1.0;  | 
562  | 0  |     parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;  | 
563  | 0  |     parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel;  | 
564  |  |     /* Convert to cover parameter */  | 
565  | 0  |     memset(&coverParams, 0 , sizeof(coverParams));  | 
566  | 0  |     FASTCOVER_convertToCoverParams(parameters, &coverParams);  | 
567  |  |     /* Checks */  | 
568  | 0  |     if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,  | 
569  | 0  |                                    parameters.accel)) { | 
570  | 0  |       DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");  | 
571  | 0  |       return ERROR(parameter_outOfBound);  | 
572  | 0  |     }  | 
573  | 0  |     if (nbSamples == 0) { | 
574  | 0  |       DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");  | 
575  | 0  |       return ERROR(srcSize_wrong);  | 
576  | 0  |     }  | 
577  | 0  |     if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | 
578  | 0  |       DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",  | 
579  | 0  |                    ZDICT_DICTSIZE_MIN);  | 
580  | 0  |       return ERROR(dstSize_tooSmall);  | 
581  | 0  |     }  | 
582  |  |     /* Assign corresponding FASTCOVER_accel_t to accelParams*/  | 
583  | 0  |     accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];  | 
584  |  |     /* Initialize context */  | 
585  | 0  |     { | 
586  | 0  |       size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,  | 
587  | 0  |                             coverParams.d, parameters.splitPoint, parameters.f,  | 
588  | 0  |                             accelParams);  | 
589  | 0  |       if (ZSTD_isError(initVal)) { | 
590  | 0  |         DISPLAYLEVEL(1, "Failed to initialize context\n");  | 
591  | 0  |         return initVal;  | 
592  | 0  |       }  | 
593  | 0  |     }  | 
594  | 0  |     COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);  | 
595  |  |     /* Build the dictionary */  | 
596  | 0  |     DISPLAYLEVEL(2, "Building dictionary\n");  | 
597  | 0  |     { | 
598  |  |       /* Initialize array to keep track of frequency of dmer within activeSegment */  | 
599  | 0  |       U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16));  | 
600  | 0  |       const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,  | 
601  | 0  |                                                 dictBufferCapacity, coverParams, segmentFreqs);  | 
602  | 0  |       const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100);  | 
603  | 0  |       const size_t dictionarySize = ZDICT_finalizeDictionary(  | 
604  | 0  |           dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,  | 
605  | 0  |           samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams);  | 
606  | 0  |       if (!ZSTD_isError(dictionarySize)) { | 
607  | 0  |           DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",  | 
608  | 0  |                       (unsigned)dictionarySize);  | 
609  | 0  |       }  | 
610  | 0  |       FASTCOVER_ctx_destroy(&ctx);  | 
611  | 0  |       free(segmentFreqs);  | 
612  | 0  |       return dictionarySize;  | 
613  | 0  |     }  | 
614  | 0  | }  | 
615  |  |  | 
616  |  |  | 
617  |  | ZDICTLIB_STATIC_API size_t  | 
618  |  | ZDICT_optimizeTrainFromBuffer_fastCover(  | 
619  |  |                     void* dictBuffer, size_t dictBufferCapacity,  | 
620  |  |                     const void* samplesBuffer,  | 
621  |  |                     const size_t* samplesSizes, unsigned nbSamples,  | 
622  |  |                     ZDICT_fastCover_params_t* parameters)  | 
623  | 0  | { | 
624  | 0  |     ZDICT_cover_params_t coverParams;  | 
625  | 0  |     FASTCOVER_accel_t accelParams;  | 
626  |  |     /* constants */  | 
627  | 0  |     const unsigned nbThreads = parameters->nbThreads;  | 
628  | 0  |     const double splitPoint =  | 
629  | 0  |         parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;  | 
630  | 0  |     const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;  | 
631  | 0  |     const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;  | 
632  | 0  |     const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;  | 
633  | 0  |     const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;  | 
634  | 0  |     const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;  | 
635  | 0  |     const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);  | 
636  | 0  |     const unsigned kIterations =  | 
637  | 0  |         (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);  | 
638  | 0  |     const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;  | 
639  | 0  |     const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;  | 
640  | 0  |     const unsigned shrinkDict = 0;  | 
641  |  |     /* Local variables */  | 
642  | 0  |     const int displayLevel = (int)parameters->zParams.notificationLevel;  | 
643  | 0  |     unsigned iteration = 1;  | 
644  | 0  |     unsigned d;  | 
645  | 0  |     unsigned k;  | 
646  | 0  |     COVER_best_t best;  | 
647  | 0  |     POOL_ctx *pool = NULL;  | 
648  | 0  |     int warned = 0;  | 
649  |  |     /* Checks */  | 
650  | 0  |     if (splitPoint <= 0 || splitPoint > 1) { | 
651  | 0  |       LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");  | 
652  | 0  |       return ERROR(parameter_outOfBound);  | 
653  | 0  |     }  | 
654  | 0  |     if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { | 
655  | 0  |       LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");  | 
656  | 0  |       return ERROR(parameter_outOfBound);  | 
657  | 0  |     }  | 
658  | 0  |     if (kMinK < kMaxD || kMaxK < kMinK) { | 
659  | 0  |       LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");  | 
660  | 0  |       return ERROR(parameter_outOfBound);  | 
661  | 0  |     }  | 
662  | 0  |     if (nbSamples == 0) { | 
663  | 0  |       LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");  | 
664  | 0  |       return ERROR(srcSize_wrong);  | 
665  | 0  |     }  | 
666  | 0  |     if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | 
667  | 0  |       LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",  | 
668  | 0  |                    ZDICT_DICTSIZE_MIN);  | 
669  | 0  |       return ERROR(dstSize_tooSmall);  | 
670  | 0  |     }  | 
671  | 0  |     if (nbThreads > 1) { | 
672  | 0  |       pool = POOL_create(nbThreads, 1);  | 
673  | 0  |       if (!pool) { | 
674  | 0  |         return ERROR(memory_allocation);  | 
675  | 0  |       }  | 
676  | 0  |     }  | 
677  |  |     /* Initialization */  | 
678  | 0  |     COVER_best_init(&best);  | 
679  | 0  |     memset(&coverParams, 0 , sizeof(coverParams));  | 
680  | 0  |     FASTCOVER_convertToCoverParams(*parameters, &coverParams);  | 
681  | 0  |     accelParams = FASTCOVER_defaultAccelParameters[accel];  | 
682  |  |     /* Turn down global display level to clean up display at level 2 and below */  | 
683  | 0  |     g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;  | 
684  |  |     /* Loop through d first because each new value needs a new context */  | 
685  | 0  |     LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",  | 
686  | 0  |                       kIterations);  | 
687  | 0  |     for (d = kMinD; d <= kMaxD; d += 2) { | 
688  |  |       /* Initialize the context for this value of d */  | 
689  | 0  |       FASTCOVER_ctx_t ctx;  | 
690  | 0  |       LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);  | 
691  | 0  |       { | 
692  | 0  |         size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);  | 
693  | 0  |         if (ZSTD_isError(initVal)) { | 
694  | 0  |           LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");  | 
695  | 0  |           COVER_best_destroy(&best);  | 
696  | 0  |           POOL_free(pool);  | 
697  | 0  |           return initVal;  | 
698  | 0  |         }  | 
699  | 0  |       }  | 
700  | 0  |       if (!warned) { | 
701  | 0  |         COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);  | 
702  | 0  |         warned = 1;  | 
703  | 0  |       }  | 
704  |  |       /* Loop through k reusing the same context */  | 
705  | 0  |       for (k = kMinK; k <= kMaxK; k += kStepSize) { | 
706  |  |         /* Prepare the arguments */  | 
707  | 0  |         FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(  | 
708  | 0  |             sizeof(FASTCOVER_tryParameters_data_t));  | 
709  | 0  |         LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);  | 
710  | 0  |         if (!data) { | 
711  | 0  |           LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");  | 
712  | 0  |           COVER_best_destroy(&best);  | 
713  | 0  |           FASTCOVER_ctx_destroy(&ctx);  | 
714  | 0  |           POOL_free(pool);  | 
715  | 0  |           return ERROR(memory_allocation);  | 
716  | 0  |         }  | 
717  | 0  |         data->ctx = &ctx;  | 
718  | 0  |         data->best = &best;  | 
719  | 0  |         data->dictBufferCapacity = dictBufferCapacity;  | 
720  | 0  |         data->parameters = coverParams;  | 
721  | 0  |         data->parameters.k = k;  | 
722  | 0  |         data->parameters.d = d;  | 
723  | 0  |         data->parameters.splitPoint = splitPoint;  | 
724  | 0  |         data->parameters.steps = kSteps;  | 
725  | 0  |         data->parameters.shrinkDict = shrinkDict;  | 
726  | 0  |         data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;  | 
727  |  |         /* Check the parameters */  | 
728  | 0  |         if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,  | 
729  | 0  |                                        data->ctx->f, accel)) { | 
730  | 0  |           DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");  | 
731  | 0  |           free(data);  | 
732  | 0  |           continue;  | 
733  | 0  |         }  | 
734  |  |         /* Call the function and pass ownership of data to it */  | 
735  | 0  |         COVER_best_start(&best);  | 
736  | 0  |         if (pool) { | 
737  | 0  |           POOL_add(pool, &FASTCOVER_tryParameters, data);  | 
738  | 0  |         } else { | 
739  | 0  |           FASTCOVER_tryParameters(data);  | 
740  | 0  |         }  | 
741  |  |         /* Print status */  | 
742  | 0  |         LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%%       ",  | 
743  | 0  |                            (unsigned)((iteration * 100) / kIterations));  | 
744  | 0  |         ++iteration;  | 
745  | 0  |       }  | 
746  | 0  |       COVER_best_wait(&best);  | 
747  | 0  |       FASTCOVER_ctx_destroy(&ctx);  | 
748  | 0  |     }  | 
749  | 0  |     LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");  | 
750  |  |     /* Fill the output buffer and parameters with output of the best parameters */  | 
751  | 0  |     { | 
752  | 0  |       const size_t dictSize = best.dictSize;  | 
753  | 0  |       if (ZSTD_isError(best.compressedSize)) { | 
754  | 0  |         const size_t compressedSize = best.compressedSize;  | 
755  | 0  |         COVER_best_destroy(&best);  | 
756  | 0  |         POOL_free(pool);  | 
757  | 0  |         return compressedSize;  | 
758  | 0  |       }  | 
759  | 0  |       FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel);  | 
760  | 0  |       memcpy(dictBuffer, best.dict, dictSize);  | 
761  | 0  |       COVER_best_destroy(&best);  | 
762  | 0  |       POOL_free(pool);  | 
763  | 0  |       return dictSize;  | 
764  | 0  |     }  | 
765  |  | 
  | 
766  | 0  | }  |