Coverage Report

Created: 2024-05-21 06:17

/src/zstd/lib/compress/fse_compress.c
Line
Count
Source (jump to first uncovered line)
1
/* ******************************************************************
2
 * FSE : Finite State Entropy encoder
3
 * Copyright (c) Meta Platforms, Inc. and affiliates.
4
 *
5
 *  You can contact the author at :
6
 *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
7
 *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
8
 *
9
 * This source code is licensed under both the BSD-style license (found in the
10
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
 * in the COPYING file in the root directory of this source tree).
12
 * You may select, at your option, one of the above-listed licenses.
13
****************************************************************** */
14
15
/* **************************************************************
16
*  Includes
17
****************************************************************/
18
#include "../common/compiler.h"
19
#include "../common/mem.h"        /* U32, U16, etc. */
20
#include "../common/debug.h"      /* assert, DEBUGLOG */
21
#include "hist.h"       /* HIST_count_wksp */
22
#include "../common/bitstream.h"
23
#define FSE_STATIC_LINKING_ONLY
24
#include "../common/fse.h"
25
#include "../common/error_private.h"
26
#define ZSTD_DEPS_NEED_MALLOC
27
#define ZSTD_DEPS_NEED_MATH64
28
#include "../common/zstd_deps.h"  /* ZSTD_memset */
29
#include "../common/bits.h" /* ZSTD_highbit32 */
30
31
32
/* **************************************************************
33
*  Error Management
34
****************************************************************/
35
4.92M
#define FSE_isError ERR_isError
36
37
38
/* **************************************************************
39
*  Templates
40
****************************************************************/
41
/*
42
  designed to be included
43
  for type-specific functions (template emulation in C)
44
  Objective is to write these functions only once, for improved maintenance
45
*/
46
47
/* safety checks */
48
#ifndef FSE_FUNCTION_EXTENSION
49
#  error "FSE_FUNCTION_EXTENSION must be defined"
50
#endif
51
#ifndef FSE_FUNCTION_TYPE
52
#  error "FSE_FUNCTION_TYPE must be defined"
53
#endif
54
55
/* Function names */
56
#define FSE_CAT(X,Y) X##Y
57
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
58
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
59
60
61
/* Function templates */
62
63
/* FSE_buildCTable_wksp() :
64
 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
65
 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
66
 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
67
 */
68
size_t FSE_buildCTable_wksp(FSE_CTable* ct,
69
                      const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
70
                            void* workSpace, size_t wkspSize)
71
20.8M
{
72
20.8M
    U32 const tableSize = 1 << tableLog;
73
20.8M
    U32 const tableMask = tableSize - 1;
74
20.8M
    void* const ptr = ct;
75
20.8M
    U16* const tableU16 = ( (U16*) ptr) + 2;
76
18.4E
    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
77
20.8M
    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
78
20.8M
    U32 const step = FSE_TABLESTEP(tableSize);
79
20.8M
    U32 const maxSV1 = maxSymbolValue+1;
80
81
20.8M
    U16* cumul = (U16*)workSpace;   /* size = maxSV1 */
82
20.8M
    FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1));  /* size = tableSize */
83
84
20.8M
    U32 highThreshold = tableSize-1;
85
86
20.8M
    assert(((size_t)workSpace & 1) == 0);  /* Must be 2 bytes-aligned */
87
20.8M
    if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
88
    /* CTable header */
89
20.8M
    tableU16[-2] = (U16) tableLog;
90
20.8M
    tableU16[-1] = (U16) maxSymbolValue;
91
20.8M
    assert(tableLog < 16);   /* required for threshold strategy to work */
92
93
    /* For explanations on how to distribute symbol values over the table :
94
     * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
95
96
     #ifdef __clang_analyzer__
97
     ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
98
     #endif
99
100
    /* symbol start positions */
101
20.8M
    {   U32 u;
102
20.8M
        cumul[0] = 0;
103
603M
        for (u=1; u <= maxSV1; u++) {
104
583M
            if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
105
64.1M
                cumul[u] = cumul[u-1] + 1;
106
64.1M
                tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
107
518M
            } else {
108
518M
                assert(normalizedCounter[u-1] >= 0);
109
518M
                cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
110
518M
                assert(cumul[u] >= cumul[u-1]);  /* no overflow */
111
518M
        }   }
112
20.8M
        cumul[maxSV1] = (U16)(tableSize+1);
113
20.8M
    }
114
115
    /* Spread symbols */
116
20.8M
    if (highThreshold == tableSize - 1) {
117
        /* Case for no low prob count symbols. Lay down 8 bytes at a time
118
         * to reduce branch misses since we are operating on a small block
119
         */
120
9.20M
        BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
121
9.20M
        {   U64 const add = 0x0101010101010101ull;
122
9.20M
            size_t pos = 0;
123
9.20M
            U64 sv = 0;
124
9.20M
            U32 s;
125
121M
            for (s=0; s<maxSV1; ++s, sv += add) {
126
112M
                int i;
127
112M
                int const n = normalizedCounter[s];
128
112M
                MEM_write64(spread + pos, sv);
129
149M
                for (i = 8; i < n; i += 8) {
130
37.0M
                    MEM_write64(spread + pos + i, sv);
131
37.0M
                }
132
112M
                assert(n>=0);
133
112M
                pos += (size_t)n;
134
112M
            }
135
9.20M
        }
136
        /* Spread symbols across the table. Lack of lowprob symbols means that
137
         * we don't need variable sized inner loop, so we can unroll the loop and
138
         * reduce branch misses.
139
         */
140
9.20M
        {   size_t position = 0;
141
9.20M
            size_t s;
142
9.20M
            size_t const unroll = 2; /* Experimentally determined optimal unroll */
143
9.20M
            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
144
259M
            for (s = 0; s < (size_t)tableSize; s += unroll) {
145
249M
                size_t u;
146
749M
                for (u = 0; u < unroll; ++u) {
147
499M
                    size_t const uPosition = (position + (u * step)) & tableMask;
148
499M
                    tableSymbol[uPosition] = spread[s + u];
149
499M
                }
150
249M
                position = (position + (unroll * step)) & tableMask;
151
249M
            }
152
9.19M
            assert(position == 0);   /* Must have initialized all positions */
153
9.19M
        }
154
11.6M
    } else {
155
11.6M
        U32 position = 0;
156
11.6M
        U32 symbol;
157
482M
        for (symbol=0; symbol<maxSV1; symbol++) {
158
470M
            int nbOccurrences;
159
470M
            int const freq = normalizedCounter[symbol];
160
1.08G
            for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
161
612M
                tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
162
612M
                position = (position + step) & tableMask;
163
677M
                while (position > highThreshold)
164
64.1M
                    position = (position + step) & tableMask;   /* Low proba area */
165
612M
        }   }
166
11.6M
        assert(position==0);  /* Must have initialized all positions */
167
11.6M
    }
168
169
    /* Build table */
170
1.19G
    {   U32 u; for (u=0; u<tableSize; u++) {
171
1.17G
        FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
172
1.17G
        tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
173
1.17G
    }   }
174
175
    /* Build Symbol Transformation Table */
176
20.8M
    {   unsigned total = 0;
177
20.8M
        unsigned s;
178
603M
        for (s=0; s<=maxSymbolValue; s++) {
179
583M
            switch (normalizedCounter[s])
180
583M
            {
181
36.6M
            case  0:
182
                /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
183
36.6M
                symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
184
36.6M
                break;
185
186
64.1M
            case -1:
187
367M
            case  1:
188
367M
                symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
189
367M
                assert(total <= INT_MAX);
190
367M
                symbolTT[s].deltaFindState = (int)(total - 1);
191
367M
                total ++;
192
367M
                break;
193
179M
            default :
194
179M
                assert(normalizedCounter[s] > 1);
195
179M
                {   U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
196
179M
                    U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
197
179M
                    symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
198
179M
                    symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
199
179M
                    total +=  (unsigned)normalizedCounter[s];
200
179M
    }   }   }   }
201
202
#if 0  /* debug : symbol costs */
203
    DEBUGLOG(5, "\n --- table statistics : ");
204
    {   U32 symbol;
205
        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
206
            DEBUGLOG(5, "%3u: w=%3i,   maxBits=%u, fracBits=%.2f",
207
                symbol, normalizedCounter[symbol],
208
                FSE_getMaxNbBits(symbolTT, symbol),
209
                (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
210
    }   }
211
#endif
212
213
20.8M
    return 0;
214
20.8M
}
215
216
217
218
#ifndef FSE_COMMONDEFS_ONLY
219
220
/*-**************************************************************
221
*  FSE NCount encoding
222
****************************************************************/
223
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
224
16.6M
{
225
16.6M
    size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
226
16.6M
                                   + 4 /* bitCount initialized at 4 */
227
16.6M
                                   + 2 /* first two symbols may use one additional bit each */) / 8)
228
16.6M
                                   + 1 /* round up to whole nb bytes */
229
16.6M
                                   + 2 /* additional two bytes for bitstream flush */;
230
18.4E
    return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
231
16.6M
}
232
233
static size_t
234
FSE_writeNCount_generic (void* header, size_t headerBufferSize,
235
                   const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
236
                         unsigned writeIsSafe)
237
16.6M
{
238
16.6M
    BYTE* const ostart = (BYTE*) header;
239
16.6M
    BYTE* out = ostart;
240
16.6M
    BYTE* const oend = ostart + headerBufferSize;
241
16.6M
    int nbBits;
242
16.6M
    const int tableSize = 1 << tableLog;
243
16.6M
    int remaining;
244
16.6M
    int threshold;
245
16.6M
    U32 bitStream = 0;
246
16.6M
    int bitCount = 0;
247
16.6M
    unsigned symbol = 0;
248
16.6M
    unsigned const alphabetSize = maxSymbolValue + 1;
249
16.6M
    int previousIs0 = 0;
250
251
    /* Table Size */
252
16.6M
    bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
253
16.6M
    bitCount  += 4;
254
255
    /* Init */
256
16.6M
    remaining = tableSize+1;   /* +1 for extra accuracy */
257
16.6M
    threshold = tableSize;
258
16.6M
    nbBits = (int)tableLog+1;
259
260
198M
    while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
261
182M
        if (previousIs0) {
262
32.3M
            unsigned start = symbol;
263
106M
            while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
264
32.3M
            if (symbol == alphabetSize) break;   /* incorrect distribution */
265
32.8M
            while (symbol >= start+24) {
266
505k
                start+=24;
267
505k
                bitStream += 0xFFFFU << bitCount;
268
505k
                if ((!writeIsSafe) && (out > oend-2))
269
3
                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
270
505k
                out[0] = (BYTE) bitStream;
271
505k
                out[1] = (BYTE)(bitStream>>8);
272
505k
                out+=2;
273
505k
                bitStream>>=16;
274
505k
            }
275
47.1M
            while (symbol >= start+3) {
276
14.7M
                start+=3;
277
14.7M
                bitStream += 3U << bitCount;
278
14.7M
                bitCount += 2;
279
14.7M
            }
280
32.3M
            bitStream += (symbol-start) << bitCount;
281
32.3M
            bitCount += 2;
282
32.3M
            if (bitCount>16) {
283
5.44M
                if ((!writeIsSafe) && (out > oend - 2))
284
6
                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
285
5.44M
                out[0] = (BYTE)bitStream;
286
5.44M
                out[1] = (BYTE)(bitStream>>8);
287
5.44M
                out += 2;
288
5.44M
                bitStream >>= 16;
289
5.44M
                bitCount -= 16;
290
5.44M
        }   }
291
182M
        {   int count = normalizedCounter[symbol++];
292
182M
            int const max = (2*threshold-1) - remaining;
293
182M
            remaining -= count < 0 ? -count : count;
294
182M
            count++;   /* +1 for extra accuracy */
295
182M
            if (count>=threshold)
296
30.2M
                count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
297
182M
            bitStream += (U32)count << bitCount;
298
182M
            bitCount  += nbBits;
299
182M
            bitCount  -= (count<max);
300
182M
            previousIs0  = (count==1);
301
182M
            if (remaining<1) return ERROR(GENERIC);
302
276M
            while (remaining<threshold) { nbBits--; threshold>>=1; }
303
182M
        }
304
182M
        if (bitCount>16) {
305
40.8M
            if ((!writeIsSafe) && (out > oend - 2))
306
30
                return ERROR(dstSize_tooSmall);   /* Buffer overflow */
307
40.8M
            out[0] = (BYTE)bitStream;
308
40.8M
            out[1] = (BYTE)(bitStream>>8);
309
40.8M
            out += 2;
310
40.8M
            bitStream >>= 16;
311
40.8M
            bitCount -= 16;
312
40.8M
    }   }
313
314
16.6M
    if (remaining != 1)
315
0
        return ERROR(GENERIC);  /* incorrect normalized distribution */
316
16.6M
    assert(symbol <= alphabetSize);
317
318
    /* flush remaining bitStream */
319
16.6M
    if ((!writeIsSafe) && (out > oend - 2))
320
38
        return ERROR(dstSize_tooSmall);   /* Buffer overflow */
321
16.6M
    out[0] = (BYTE)bitStream;
322
16.6M
    out[1] = (BYTE)(bitStream>>8);
323
16.6M
    out+= (bitCount+7) /8;
324
325
16.6M
    assert(out >= ostart);
326
16.6M
    return (size_t)(out-ostart);
327
16.6M
}
328
329
330
size_t FSE_writeNCount (void* buffer, size_t bufferSize,
331
                  const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
332
16.6M
{
333
16.6M
    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
334
16.6M
    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
335
336
16.6M
    if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
337
294
        return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
338
339
16.6M
    return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
340
16.6M
}
341
342
343
/*-**************************************************************
344
*  FSE Compression Code
345
****************************************************************/
346
347
/* provides the minimum logSize to safely represent a distribution */
348
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
349
34.9M
{
350
34.9M
    U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
351
34.9M
    U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
352
34.9M
    U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
353
34.9M
    assert(srcSize > 1); /* Not supported, RLE should be used instead */
354
34.9M
    return minBits;
355
34.9M
}
356
357
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
358
18.3M
{
359
18.3M
    U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
360
18.3M
    U32 tableLog = maxTableLog;
361
18.3M
    U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
362
18.3M
    assert(srcSize > 1); /* Not supported, RLE should be used instead */
363
18.3M
    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
364
18.3M
    if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
365
18.3M
    if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
366
18.3M
    if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
367
18.3M
    if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
368
18.3M
    return tableLog;
369
18.3M
}
370
371
unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
372
16.4M
{
373
16.4M
    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
374
16.4M
}
375
376
/* Secondary normalization method.
377
   To be used when primary method fails. */
378
379
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
380
180k
{
381
180k
    short const NOT_YET_ASSIGNED = -2;
382
180k
    U32 s;
383
180k
    U32 distributed = 0;
384
180k
    U32 ToDistribute;
385
386
    /* Init */
387
180k
    U32 const lowThreshold = (U32)(total >> tableLog);
388
180k
    U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
389
390
6.16M
    for (s=0; s<=maxSymbolValue; s++) {
391
5.98M
        if (count[s] == 0) {
392
3.05M
            norm[s]=0;
393
3.05M
            continue;
394
3.05M
        }
395
2.92M
        if (count[s] <= lowThreshold) {
396
299k
            norm[s] = lowProbCount;
397
299k
            distributed++;
398
299k
            total -= count[s];
399
299k
            continue;
400
299k
        }
401
2.62M
        if (count[s] <= lowOne) {
402
384k
            norm[s] = 1;
403
384k
            distributed++;
404
384k
            total -= count[s];
405
384k
            continue;
406
384k
        }
407
408
2.23M
        norm[s]=NOT_YET_ASSIGNED;
409
2.23M
    }
410
180k
    ToDistribute = (1 << tableLog) - distributed;
411
412
180k
    if (ToDistribute == 0)
413
0
        return 0;
414
415
180k
    if ((total / ToDistribute) > lowOne) {
416
        /* risk of rounding to zero */
417
108
        lowOne = (U32)((total * 3) / (ToDistribute * 2));
418
2.54k
        for (s=0; s<=maxSymbolValue; s++) {
419
2.43k
            if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
420
94
                norm[s] = 1;
421
94
                distributed++;
422
94
                total -= count[s];
423
94
                continue;
424
94
        }   }
425
108
        ToDistribute = (1 << tableLog) - distributed;
426
108
    }
427
428
180k
    if (distributed == maxSymbolValue+1) {
429
        /* all values are pretty poor;
430
           probably incompressible data (should have already been detected);
431
           find max, then give all remaining points to max */
432
0
        U32 maxV = 0, maxC = 0;
433
0
        for (s=0; s<=maxSymbolValue; s++)
434
0
            if (count[s] > maxC) { maxV=s; maxC=count[s]; }
435
0
        norm[maxV] += (short)ToDistribute;
436
0
        return 0;
437
0
    }
438
439
180k
    if (total == 0) {
440
        /* all of the symbols were low enough for the lowOne or lowThreshold */
441
1.27k
        for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
442
1.20k
            if (norm[s] > 0) { ToDistribute--; norm[s]++; }
443
71
        return 0;
444
71
    }
445
446
180k
    {   U64 const vStepLog = 62 - tableLog;
447
180k
        U64 const mid = (1ULL << (vStepLog-1)) - 1;
448
180k
        U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total);   /* scale on remaining */
449
180k
        U64 tmpTotal = mid;
450
6.16M
        for (s=0; s<=maxSymbolValue; s++) {
451
5.97M
            if (norm[s]==NOT_YET_ASSIGNED) {
452
2.23M
                U64 const end = tmpTotal + (count[s] * rStep);
453
2.23M
                U32 const sStart = (U32)(tmpTotal >> vStepLog);
454
2.23M
                U32 const sEnd = (U32)(end >> vStepLog);
455
2.23M
                U32 const weight = sEnd - sStart;
456
2.23M
                if (weight < 1)
457
0
                    return ERROR(GENERIC);
458
2.23M
                norm[s] = (short)weight;
459
2.23M
                tmpTotal = end;
460
2.23M
    }   }   }
461
462
180k
    return 0;
463
180k
}
464
465
size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
466
                           const unsigned* count, size_t total,
467
                           unsigned maxSymbolValue, unsigned useLowProbCount)
468
16.6M
{
469
    /* Sanity checks */
470
16.6M
    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
471
16.6M
    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
472
16.6M
    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
473
16.6M
    if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
474
475
16.6M
    {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
476
16.6M
        short const lowProbCount = useLowProbCount ? -1 : 1;
477
16.6M
        U64 const scale = 62 - tableLog;
478
16.6M
        U64 const step = ZSTD_div64((U64)1<<62, (U32)total);   /* <== here, one division ! */
479
16.6M
        U64 const vStep = 1ULL<<(scale-20);
480
16.6M
        int stillToDistribute = 1<<tableLog;
481
16.6M
        unsigned s;
482
16.6M
        unsigned largest=0;
483
16.6M
        short largestP=0;
484
16.6M
        U32 lowThreshold = (U32)(total >> tableLog);
485
486
272M
        for (s=0; s<=maxSymbolValue; s++) {
487
256M
            if (count[s] == total) return 0;   /* rle special case */
488
256M
            if (count[s] == 0) { normalizedCounter[s]=0; continue; }
489
149M
            if (count[s] <= lowThreshold) {
490
40.0M
                normalizedCounter[s] = lowProbCount;
491
40.0M
                stillToDistribute--;
492
109M
            } else {
493
109M
                short proba = (short)((count[s]*step) >> scale);
494
109M
                if (proba<8) {
495
80.8M
                    U64 restToBeat = vStep * rtbTable[proba];
496
80.8M
                    proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
497
80.8M
                }
498
109M
                if (proba > largestP) { largestP=proba; largest=s; }
499
109M
                normalizedCounter[s] = proba;
500
109M
                stillToDistribute -= proba;
501
109M
        }   }
502
16.6M
        if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
503
            /* corner case, need another normalization method */
504
180k
            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
505
180k
            if (FSE_isError(errorCode)) return errorCode;
506
180k
        }
507
16.4M
        else normalizedCounter[largest] += (short)stillToDistribute;
508
16.6M
    }
509
510
#if 0
511
    {   /* Print Table (debug) */
512
        U32 s;
513
        U32 nTotal = 0;
514
        for (s=0; s<=maxSymbolValue; s++)
515
            RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
516
        for (s=0; s<=maxSymbolValue; s++)
517
            nTotal += abs(normalizedCounter[s]);
518
        if (nTotal != (1U<<tableLog))
519
            RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
520
        getchar();
521
    }
522
#endif
523
524
16.6M
    return tableLog;
525
16.6M
}
526
527
/* fake FSE_CTable, for rle input (always same symbol) */
528
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
529
959k
{
530
959k
    void* ptr = ct;
531
959k
    U16* tableU16 = ( (U16*) ptr) + 2;
532
959k
    void* FSCTptr = (U32*)ptr + 2;
533
959k
    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
534
535
    /* header */
536
959k
    tableU16[-2] = (U16) 0;
537
959k
    tableU16[-1] = (U16) symbolValue;
538
539
    /* Build table */
540
959k
    tableU16[0] = 0;
541
959k
    tableU16[1] = 0;   /* just in case */
542
543
    /* Build Symbol Transformation Table */
544
959k
    symbolTT[symbolValue].deltaNbBits = 0;
545
959k
    symbolTT[symbolValue].deltaFindState = 0;
546
547
959k
    return 0;
548
959k
}
549
550
551
static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
552
                           const void* src, size_t srcSize,
553
                           const FSE_CTable* ct, const unsigned fast)
554
4.74M
{
555
4.74M
    const BYTE* const istart = (const BYTE*) src;
556
4.74M
    const BYTE* const iend = istart + srcSize;
557
4.74M
    const BYTE* ip=iend;
558
559
4.74M
    BIT_CStream_t bitC;
560
4.74M
    FSE_CState_t CState1, CState2;
561
562
    /* init */
563
4.74M
    if (srcSize <= 2) return 0;
564
4.74M
    { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
565
4.74M
      if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
566
567
251M
#define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
568
569
4.74M
    if (srcSize & 1) {
570
3.45M
        FSE_initCState2(&CState1, ct, *--ip);
571
3.45M
        FSE_initCState2(&CState2, ct, *--ip);
572
3.45M
        FSE_encodeSymbol(&bitC, &CState1, *--ip);
573
3.45M
        FSE_FLUSHBITS(&bitC);
574
3.45M
    } else {
575
1.29M
        FSE_initCState2(&CState2, ct, *--ip);
576
1.29M
        FSE_initCState2(&CState1, ct, *--ip);
577
1.29M
    }
578
579
    /* join to mod 4 */
580
4.74M
    srcSize -= 2;
581
4.74M
    if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
582
771k
        FSE_encodeSymbol(&bitC, &CState2, *--ip);
583
771k
        FSE_encodeSymbol(&bitC, &CState1, *--ip);
584
771k
        FSE_FLUSHBITS(&bitC);
585
771k
    }
586
587
    /* 2 or 4 encoding per loop */
588
252M
    while ( ip>istart ) {
589
590
247M
        FSE_encodeSymbol(&bitC, &CState2, *--ip);
591
592
247M
        if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
593
0
            FSE_FLUSHBITS(&bitC);
594
595
247M
        FSE_encodeSymbol(&bitC, &CState1, *--ip);
596
597
247M
        if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
598
247M
            FSE_encodeSymbol(&bitC, &CState2, *--ip);
599
247M
            FSE_encodeSymbol(&bitC, &CState1, *--ip);
600
247M
        }
601
602
247M
        FSE_FLUSHBITS(&bitC);
603
247M
    }
604
605
4.74M
    FSE_flushCState(&bitC, &CState2);
606
4.74M
    FSE_flushCState(&bitC, &CState1);
607
4.74M
    return BIT_closeCStream(&bitC);
608
4.74M
}
609
610
size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
611
                           const void* src, size_t srcSize,
612
                           const FSE_CTable* ct)
613
4.74M
{
614
4.74M
    unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
615
616
4.74M
    if (fast)
617
3.38M
        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
618
1.36M
    else
619
1.36M
        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
620
4.74M
}
621
622
623
0
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
624
625
#endif   /* FSE_COMMONDEFS_ONLY */