Coverage Report

Created: 2025-03-15 06:58

/src/zstd/lib/compress/zstd_compress_literals.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10
11
 /*-*************************************
12
 *  Dependencies
13
 ***************************************/
14
#include "zstd_compress_literals.h"
15
16
17
/* **************************************************************
18
*  Debug Traces
19
****************************************************************/
20
#if DEBUGLEVEL >= 2
21
22
static size_t showHexa(const void* src, size_t srcSize)
23
{
24
    const BYTE* const ip = (const BYTE*)src;
25
    size_t u;
26
    for (u=0; u<srcSize; u++) {
27
        RAWLOG(5, " %02X", ip[u]); (void)ip;
28
    }
29
    RAWLOG(5, " \n");
30
    return srcSize;
31
}
32
33
#endif
34
35
36
/* **************************************************************
37
*  Literals compression - special cases
38
****************************************************************/
39
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
40
0
{
41
0
    BYTE* const ostart = (BYTE*)dst;
42
0
    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
43
44
0
    DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
45
46
0
    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
47
48
0
    switch(flSize)
49
0
    {
50
0
        case 1: /* 2 - 1 - 5 */
51
0
            ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
52
0
            break;
53
0
        case 2: /* 2 - 2 - 12 */
54
0
            MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
55
0
            break;
56
0
        case 3: /* 2 - 2 - 20 */
57
0
            MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
58
0
            break;
59
0
        default:   /* not necessary : flSize is {1,2,3} */
60
0
            assert(0);
61
0
    }
62
63
0
    ZSTD_memcpy(ostart + flSize, src, srcSize);
64
0
    DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
65
0
    return srcSize + flSize;
66
0
}
67
68
static int allBytesIdentical(const void* src, size_t srcSize)
69
0
{
70
0
    assert(srcSize >= 1);
71
0
    assert(src != NULL);
72
0
    {   const BYTE b = ((const BYTE*)src)[0];
73
0
        size_t p;
74
0
        for (p=1; p<srcSize; p++) {
75
0
            if (((const BYTE*)src)[p] != b) return 0;
76
0
        }
77
0
        return 1;
78
0
    }
79
0
}
80
81
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
82
0
{
83
0
    BYTE* const ostart = (BYTE*)dst;
84
0
    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
85
86
0
    assert(dstCapacity >= 4); (void)dstCapacity;
87
0
    assert(allBytesIdentical(src, srcSize));
88
89
0
    switch(flSize)
90
0
    {
91
0
        case 1: /* 2 - 1 - 5 */
92
0
            ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
93
0
            break;
94
0
        case 2: /* 2 - 2 - 12 */
95
0
            MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
96
0
            break;
97
0
        case 3: /* 2 - 2 - 20 */
98
0
            MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
99
0
            break;
100
0
        default:   /* not necessary : flSize is {1,2,3} */
101
0
            assert(0);
102
0
    }
103
104
0
    ostart[flSize] = *(const BYTE*)src;
105
0
    DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
106
0
    return flSize+1;
107
0
}
108
109
/* ZSTD_minLiteralsToCompress() :
110
 * returns minimal amount of literals
111
 * for literal compression to even be attempted.
112
 * Minimum is made tighter as compression strategy increases.
113
 */
114
static size_t
115
ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
116
0
{
117
0
    assert((int)strategy >= 0);
118
0
    assert((int)strategy <= 9);
119
    /* btultra2 : min 8 bytes;
120
     * then 2x larger for each successive compression strategy
121
     * max threshold 64 bytes */
122
0
    {   int const shift = MIN(9-(int)strategy, 3);
123
0
        size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
124
0
        DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
125
0
        return mintc;
126
0
    }
127
0
}
128
129
size_t ZSTD_compressLiterals (
130
                  void* dst, size_t dstCapacity,
131
            const void* src, size_t srcSize,
132
                  void* entropyWorkspace, size_t entropyWorkspaceSize,
133
            const ZSTD_hufCTables_t* prevHuf,
134
                  ZSTD_hufCTables_t* nextHuf,
135
                  ZSTD_strategy strategy,
136
                  int disableLiteralCompression,
137
                  int suspectUncompressible,
138
                  int bmi2)
139
0
{
140
0
    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
141
0
    BYTE*  const ostart = (BYTE*)dst;
142
0
    U32 singleStream = srcSize < 256;
143
0
    SymbolEncodingType_e hType = set_compressed;
144
0
    size_t cLitSize;
145
146
0
    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
147
0
                disableLiteralCompression, (U32)srcSize, dstCapacity);
148
149
0
    DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
150
151
    /* Prepare nextEntropy assuming reusing the existing table */
152
0
    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
153
154
0
    if (disableLiteralCompression)
155
0
        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
156
157
    /* if too small, don't even attempt compression (speed opt) */
158
0
    if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
159
0
        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
160
161
0
    RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
162
0
    {   HUF_repeat repeat = prevHuf->repeatMode;
163
0
        int const flags = 0
164
0
            | (bmi2 ? HUF_flags_bmi2 : 0)
165
0
            | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
166
0
            | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
167
0
            | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
168
169
0
        typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
170
0
        huf_compress_f huf_compress;
171
0
        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
172
0
        huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
173
0
        cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
174
0
                                src, srcSize,
175
0
                                HUF_SYMBOLVALUE_MAX, LitHufLog,
176
0
                                entropyWorkspace, entropyWorkspaceSize,
177
0
                                (HUF_CElt*)nextHuf->CTable,
178
0
                                &repeat, flags);
179
0
        DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
180
0
        if (repeat != HUF_repeat_none) {
181
            /* reused the existing table */
182
0
            DEBUGLOG(5, "reusing statistics from previous huffman block");
183
0
            hType = set_repeat;
184
0
        }
185
0
    }
186
187
0
    {   size_t const minGain = ZSTD_minGain(srcSize, strategy);
188
0
        if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
189
0
            ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
190
0
            return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
191
0
    }   }
192
0
    if (cLitSize==1) {
193
        /* A return value of 1 signals that the alphabet consists of a single symbol.
194
         * However, in some rare circumstances, it could be the compressed size (a single byte).
195
         * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
196
         * (it's also necessary to not generate statistics).
197
         * Therefore, in such a case, actively check that all bytes are identical. */
198
0
        if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
199
0
            ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
200
0
            return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
201
0
    }   }
202
203
0
    if (hType == set_compressed) {
204
        /* using a newly constructed table */
205
0
        nextHuf->repeatMode = HUF_repeat_check;
206
0
    }
207
208
    /* Build header */
209
0
    switch(lhSize)
210
0
    {
211
0
    case 3: /* 2 - 2 - 10 - 10 */
212
0
        if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
213
0
        {   U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
214
0
            MEM_writeLE24(ostart, lhc);
215
0
            break;
216
0
        }
217
0
    case 4: /* 2 - 2 - 14 - 14 */
218
0
        assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
219
0
        {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
220
0
            MEM_writeLE32(ostart, lhc);
221
0
            break;
222
0
        }
223
0
    case 5: /* 2 - 2 - 18 - 18 */
224
0
        assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
225
0
        {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
226
0
            MEM_writeLE32(ostart, lhc);
227
0
            ostart[4] = (BYTE)(cLitSize >> 10);
228
0
            break;
229
0
        }
230
0
    default:  /* not possible : lhSize is {3,4,5} */
231
0
        assert(0);
232
0
    }
233
0
    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
234
0
    return lhSize+cLitSize;
235
0
}