/src/zstd/lib/compress/zstd_compress_literals.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under both the BSD-style license (found in the |
6 | | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | | * in the COPYING file in the root directory of this source tree). |
8 | | * You may select, at your option, one of the above-listed licenses. |
9 | | */ |
10 | | |
11 | | /*-************************************* |
12 | | * Dependencies |
13 | | ***************************************/ |
14 | | #include "zstd_compress_literals.h" |
15 | | |
16 | | |
17 | | /* ************************************************************** |
18 | | * Debug Traces |
19 | | ****************************************************************/ |
20 | | #if DEBUGLEVEL >= 2 |
21 | | |
22 | | static size_t showHexa(const void* src, size_t srcSize) |
23 | | { |
24 | | const BYTE* const ip = (const BYTE*)src; |
25 | | size_t u; |
26 | | for (u=0; u<srcSize; u++) { |
27 | | RAWLOG(5, " %02X", ip[u]); (void)ip; |
28 | | } |
29 | | RAWLOG(5, " \n"); |
30 | | return srcSize; |
31 | | } |
32 | | |
33 | | #endif |
34 | | |
35 | | |
36 | | /* ************************************************************** |
37 | | * Literals compression - special cases |
38 | | ****************************************************************/ |
39 | | size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
40 | 0 | { |
41 | 0 | BYTE* const ostart = (BYTE*)dst; |
42 | 0 | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); |
43 | |
|
44 | 0 | DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity); |
45 | |
|
46 | 0 | RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); |
47 | | |
48 | 0 | switch(flSize) |
49 | 0 | { |
50 | 0 | case 1: /* 2 - 1 - 5 */ |
51 | 0 | ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); |
52 | 0 | break; |
53 | 0 | case 2: /* 2 - 2 - 12 */ |
54 | 0 | MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); |
55 | 0 | break; |
56 | 0 | case 3: /* 2 - 2 - 20 */ |
57 | 0 | MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); |
58 | 0 | break; |
59 | 0 | default: /* not necessary : flSize is {1,2,3} */ |
60 | 0 | assert(0); |
61 | 0 | } |
62 | | |
63 | 0 | ZSTD_memcpy(ostart + flSize, src, srcSize); |
64 | 0 | DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); |
65 | 0 | return srcSize + flSize; |
66 | 0 | } |
67 | | |
68 | | static int allBytesIdentical(const void* src, size_t srcSize) |
69 | 0 | { |
70 | 0 | assert(srcSize >= 1); |
71 | 0 | assert(src != NULL); |
72 | 0 | { const BYTE b = ((const BYTE*)src)[0]; |
73 | 0 | size_t p; |
74 | 0 | for (p=1; p<srcSize; p++) { |
75 | 0 | if (((const BYTE*)src)[p] != b) return 0; |
76 | 0 | } |
77 | 0 | return 1; |
78 | 0 | } |
79 | 0 | } |
80 | | |
81 | | size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
82 | 0 | { |
83 | 0 | BYTE* const ostart = (BYTE*)dst; |
84 | 0 | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); |
85 | |
|
86 | 0 | assert(dstCapacity >= 4); (void)dstCapacity; |
87 | 0 | assert(allBytesIdentical(src, srcSize)); |
88 | |
|
89 | 0 | switch(flSize) |
90 | 0 | { |
91 | 0 | case 1: /* 2 - 1 - 5 */ |
92 | 0 | ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); |
93 | 0 | break; |
94 | 0 | case 2: /* 2 - 2 - 12 */ |
95 | 0 | MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); |
96 | 0 | break; |
97 | 0 | case 3: /* 2 - 2 - 20 */ |
98 | 0 | MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); |
99 | 0 | break; |
100 | 0 | default: /* not necessary : flSize is {1,2,3} */ |
101 | 0 | assert(0); |
102 | 0 | } |
103 | | |
104 | 0 | ostart[flSize] = *(const BYTE*)src; |
105 | 0 | DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1); |
106 | 0 | return flSize+1; |
107 | 0 | } |
108 | | |
109 | | /* ZSTD_minLiteralsToCompress() : |
110 | | * returns minimal amount of literals |
111 | | * for literal compression to even be attempted. |
112 | | * Minimum is made tighter as compression strategy increases. |
113 | | */ |
114 | | static size_t |
115 | | ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat) |
116 | 0 | { |
117 | 0 | assert((int)strategy >= 0); |
118 | 0 | assert((int)strategy <= 9); |
119 | | /* btultra2 : min 8 bytes; |
120 | | * then 2x larger for each successive compression strategy |
121 | | * max threshold 64 bytes */ |
122 | 0 | { int const shift = MIN(9-(int)strategy, 3); |
123 | 0 | size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift; |
124 | 0 | DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc); |
125 | 0 | return mintc; |
126 | 0 | } |
127 | 0 | } |
128 | | |
129 | | size_t ZSTD_compressLiterals ( |
130 | | void* dst, size_t dstCapacity, |
131 | | const void* src, size_t srcSize, |
132 | | void* entropyWorkspace, size_t entropyWorkspaceSize, |
133 | | const ZSTD_hufCTables_t* prevHuf, |
134 | | ZSTD_hufCTables_t* nextHuf, |
135 | | ZSTD_strategy strategy, |
136 | | int disableLiteralCompression, |
137 | | int suspectUncompressible, |
138 | | int bmi2) |
139 | 0 | { |
140 | 0 | size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); |
141 | 0 | BYTE* const ostart = (BYTE*)dst; |
142 | 0 | U32 singleStream = srcSize < 256; |
143 | 0 | SymbolEncodingType_e hType = set_compressed; |
144 | 0 | size_t cLitSize; |
145 | |
|
146 | 0 | DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)", |
147 | 0 | disableLiteralCompression, (U32)srcSize, dstCapacity); |
148 | |
|
149 | 0 | DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize)); |
150 | | |
151 | | /* Prepare nextEntropy assuming reusing the existing table */ |
152 | 0 | ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
153 | |
|
154 | 0 | if (disableLiteralCompression) |
155 | 0 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); |
156 | | |
157 | | /* if too small, don't even attempt compression (speed opt) */ |
158 | 0 | if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode)) |
159 | 0 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); |
160 | | |
161 | 0 | RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); |
162 | 0 | { HUF_repeat repeat = prevHuf->repeatMode; |
163 | 0 | int const flags = 0 |
164 | 0 | | (bmi2 ? HUF_flags_bmi2 : 0) |
165 | 0 | | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0) |
166 | 0 | | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0) |
167 | 0 | | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0); |
168 | |
|
169 | 0 | typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int); |
170 | 0 | huf_compress_f huf_compress; |
171 | 0 | if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; |
172 | 0 | huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat; |
173 | 0 | cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize, |
174 | 0 | src, srcSize, |
175 | 0 | HUF_SYMBOLVALUE_MAX, LitHufLog, |
176 | 0 | entropyWorkspace, entropyWorkspaceSize, |
177 | 0 | (HUF_CElt*)nextHuf->CTable, |
178 | 0 | &repeat, flags); |
179 | 0 | DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize); |
180 | 0 | if (repeat != HUF_repeat_none) { |
181 | | /* reused the existing table */ |
182 | 0 | DEBUGLOG(5, "reusing statistics from previous huffman block"); |
183 | 0 | hType = set_repeat; |
184 | 0 | } |
185 | 0 | } |
186 | |
|
187 | 0 | { size_t const minGain = ZSTD_minGain(srcSize, strategy); |
188 | 0 | if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { |
189 | 0 | ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
190 | 0 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); |
191 | 0 | } } |
192 | 0 | if (cLitSize==1) { |
193 | | /* A return value of 1 signals that the alphabet consists of a single symbol. |
194 | | * However, in some rare circumstances, it could be the compressed size (a single byte). |
195 | | * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`. |
196 | | * (it's also necessary to not generate statistics). |
197 | | * Therefore, in such a case, actively check that all bytes are identical. */ |
198 | 0 | if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) { |
199 | 0 | ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
200 | 0 | return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); |
201 | 0 | } } |
202 | | |
203 | 0 | if (hType == set_compressed) { |
204 | | /* using a newly constructed table */ |
205 | 0 | nextHuf->repeatMode = HUF_repeat_check; |
206 | 0 | } |
207 | | |
208 | | /* Build header */ |
209 | 0 | switch(lhSize) |
210 | 0 | { |
211 | 0 | case 3: /* 2 - 2 - 10 - 10 */ |
212 | 0 | if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); |
213 | 0 | { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); |
214 | 0 | MEM_writeLE24(ostart, lhc); |
215 | 0 | break; |
216 | 0 | } |
217 | 0 | case 4: /* 2 - 2 - 14 - 14 */ |
218 | 0 | assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); |
219 | 0 | { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); |
220 | 0 | MEM_writeLE32(ostart, lhc); |
221 | 0 | break; |
222 | 0 | } |
223 | 0 | case 5: /* 2 - 2 - 18 - 18 */ |
224 | 0 | assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); |
225 | 0 | { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); |
226 | 0 | MEM_writeLE32(ostart, lhc); |
227 | 0 | ostart[4] = (BYTE)(cLitSize >> 10); |
228 | 0 | break; |
229 | 0 | } |
230 | 0 | default: /* not possible : lhSize is {3,4,5} */ |
231 | 0 | assert(0); |
232 | 0 | } |
233 | 0 | DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); |
234 | 0 | return lhSize+cLitSize; |
235 | 0 | } |