Coverage Report

Created: 2025-07-23 08:18

/src/zstd/lib/compress/zstd_compress_literals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10
11
 /*-*************************************
12
 *  Dependencies
13
 ***************************************/
14
#include "zstd_compress_literals.h"
15
16
17
/* **************************************************************
18
*  Debug Traces
19
****************************************************************/
20
#if DEBUGLEVEL >= 2
21
22
static size_t showHexa(const void* src, size_t srcSize)
23
{
24
    const BYTE* const ip = (const BYTE*)src;
25
    size_t u;
26
    for (u=0; u<srcSize; u++) {
27
        RAWLOG(5, " %02X", ip[u]); (void)ip;
28
    }
29
    RAWLOG(5, " \n");
30
    return srcSize;
31
}
32
33
#endif
34
35
36
/* **************************************************************
37
*  Literals compression - special cases
38
****************************************************************/
39
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
40
35.0k
{
41
35.0k
    BYTE* const ostart = (BYTE*)dst;
42
35.0k
    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
43
44
35.0k
    DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
45
46
35.0k
    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
47
48
35.0k
    switch(flSize)
49
35.0k
    {
50
30.3k
        case 1: /* 2 - 1 - 5 */
51
30.3k
            ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
52
30.3k
            break;
53
4.41k
        case 2: /* 2 - 2 - 12 */
54
4.41k
            MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
55
4.41k
            break;
56
257
        case 3: /* 2 - 2 - 20 */
57
257
            MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
58
257
            break;
59
0
        default:   /* not necessary : flSize is {1,2,3} */
60
0
            assert(0);
61
35.0k
    }
62
63
35.0k
    ZSTD_memcpy(ostart + flSize, src, srcSize);
64
35.0k
    DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
65
35.0k
    return srcSize + flSize;
66
35.0k
}
67
68
static int allBytesIdentical(const void* src, size_t srcSize)
69
0
{
70
0
    assert(srcSize >= 1);
71
0
    assert(src != NULL);
72
0
    {   const BYTE b = ((const BYTE*)src)[0];
73
0
        size_t p;
74
0
        for (p=1; p<srcSize; p++) {
75
0
            if (((const BYTE*)src)[p] != b) return 0;
76
0
        }
77
0
        return 1;
78
0
    }
79
0
}
80
81
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
82
382
{
83
382
    BYTE* const ostart = (BYTE*)dst;
84
382
    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
85
86
382
    assert(dstCapacity >= 4); (void)dstCapacity;
87
382
    assert(allBytesIdentical(src, srcSize));
88
89
382
    switch(flSize)
90
382
    {
91
0
        case 1: /* 2 - 1 - 5 */
92
0
            ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
93
0
            break;
94
382
        case 2: /* 2 - 2 - 12 */
95
382
            MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
96
382
            break;
97
0
        case 3: /* 2 - 2 - 20 */
98
0
            MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
99
0
            break;
100
0
        default:   /* not necessary : flSize is {1,2,3} */
101
0
            assert(0);
102
382
    }
103
104
382
    ostart[flSize] = *(const BYTE*)src;
105
382
    DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
106
382
    return flSize+1;
107
382
}
108
109
/* ZSTD_minLiteralsToCompress() :
110
 * returns minimal amount of literals
111
 * for literal compression to even be attempted.
112
 * Minimum is made tighter as compression strategy increases.
113
 */
114
static size_t
115
ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
116
53.6k
{
117
53.6k
    assert((int)strategy >= 0);
118
53.6k
    assert((int)strategy <= 9);
119
    /* btultra2 : min 8 bytes;
120
     * then 2x larger for each successive compression strategy
121
     * max threshold 64 bytes */
122
53.6k
    {   int const shift = MIN(9-(int)strategy, 3);
123
53.6k
        size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
124
53.6k
        DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
125
53.6k
        return mintc;
126
53.6k
    }
127
53.6k
}
128
129
size_t ZSTD_compressLiterals (
130
                  void* dst, size_t dstCapacity,
131
            const void* src, size_t srcSize,
132
                  void* entropyWorkspace, size_t entropyWorkspaceSize,
133
            const ZSTD_hufCTables_t* prevHuf,
134
                  ZSTD_hufCTables_t* nextHuf,
135
                  ZSTD_strategy strategy,
136
                  int disableLiteralCompression,
137
                  int suspectUncompressible,
138
                  int bmi2)
139
53.6k
{
140
53.6k
    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
141
53.6k
    BYTE*  const ostart = (BYTE*)dst;
142
53.6k
    U32 singleStream = srcSize < 256;
143
53.6k
    SymbolEncodingType_e hType = set_compressed;
144
53.6k
    size_t cLitSize;
145
146
53.6k
    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
147
53.6k
                disableLiteralCompression, (U32)srcSize, dstCapacity);
148
149
53.6k
    DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
150
151
    /* Prepare nextEntropy assuming reusing the existing table */
152
53.6k
    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
153
154
53.6k
    if (disableLiteralCompression)
155
0
        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
156
157
    /* if too small, don't even attempt compression (speed opt) */
158
53.6k
    if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
159
32.9k
        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
160
161
20.6k
    RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
162
20.6k
    {   HUF_repeat repeat = prevHuf->repeatMode;
163
20.6k
        int const flags = 0
164
20.6k
            | (bmi2 ? HUF_flags_bmi2 : 0)
165
20.6k
            | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
166
20.6k
            | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
167
20.6k
            | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
168
169
20.6k
        typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
170
20.6k
        huf_compress_f huf_compress;
171
20.6k
        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
172
20.6k
        huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
173
20.6k
        cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
174
20.6k
                                src, srcSize,
175
20.6k
                                HUF_SYMBOLVALUE_MAX, LitHufLog,
176
20.6k
                                entropyWorkspace, entropyWorkspaceSize,
177
20.6k
                                (HUF_CElt*)nextHuf->CTable,
178
20.6k
                                &repeat, flags);
179
20.6k
        DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
180
20.6k
        if (repeat != HUF_repeat_none) {
181
            /* reused the existing table */
182
8.15k
            DEBUGLOG(5, "reusing statistics from previous huffman block");
183
8.15k
            hType = set_repeat;
184
8.15k
        }
185
20.6k
    }
186
187
20.6k
    {   size_t const minGain = ZSTD_minGain(srcSize, strategy);
188
20.6k
        if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
189
2.09k
            ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
190
2.09k
            return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
191
2.09k
    }   }
192
18.5k
    if (cLitSize==1) {
193
        /* A return value of 1 signals that the alphabet consists of a single symbol.
194
         * However, in some rare circumstances, it could be the compressed size (a single byte).
195
         * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
196
         * (it's also necessary to not generate statistics).
197
         * Therefore, in such a case, actively check that all bytes are identical. */
198
382
        if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
199
382
            ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
200
382
            return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
201
382
    }   }
202
203
18.1k
    if (hType == set_compressed) {
204
        /* using a newly constructed table */
205
10.4k
        nextHuf->repeatMode = HUF_repeat_check;
206
10.4k
    }
207
208
    /* Build header */
209
18.1k
    switch(lhSize)
210
18.1k
    {
211
11.1k
    case 3: /* 2 - 2 - 10 - 10 */
212
11.1k
        if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
213
11.1k
        {   U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
214
11.1k
            MEM_writeLE24(ostart, lhc);
215
11.1k
            break;
216
0
        }
217
6.07k
    case 4: /* 2 - 2 - 14 - 14 */
218
6.07k
        assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
219
6.07k
        {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
220
6.07k
            MEM_writeLE32(ostart, lhc);
221
6.07k
            break;
222
0
        }
223
907
    case 5: /* 2 - 2 - 18 - 18 */
224
907
        assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
225
907
        {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
226
907
            MEM_writeLE32(ostart, lhc);
227
907
            ostart[4] = (BYTE)(cLitSize >> 10);
228
907
            break;
229
0
        }
230
0
    default:  /* not possible : lhSize is {3,4,5} */
231
0
        assert(0);
232
18.1k
    }
233
18.1k
    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
234
18.1k
    return lhSize+cLitSize;
235
18.1k
}