/src/zstd/lib/compress/zstd_compress_literals.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under both the BSD-style license (found in the |
6 | | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | | * in the COPYING file in the root directory of this source tree). |
8 | | * You may select, at your option, one of the above-listed licenses. |
9 | | */ |
10 | | |
11 | | /*-************************************* |
12 | | * Dependencies |
13 | | ***************************************/ |
14 | | #include "zstd_compress_literals.h" |
15 | | |
16 | | |
17 | | /* ************************************************************** |
18 | | * Debug Traces |
19 | | ****************************************************************/ |
20 | | #if DEBUGLEVEL >= 2 |
21 | | |
22 | | static size_t showHexa(const void* src, size_t srcSize) |
23 | | { |
24 | | const BYTE* const ip = (const BYTE*)src; |
25 | | size_t u; |
26 | | for (u=0; u<srcSize; u++) { |
27 | | RAWLOG(5, " %02X", ip[u]); (void)ip; |
28 | | } |
29 | | RAWLOG(5, " \n"); |
30 | | return srcSize; |
31 | | } |
32 | | |
33 | | #endif |
34 | | |
35 | | |
36 | | /* ************************************************************** |
37 | | * Literals compression - special cases |
38 | | ****************************************************************/ |
39 | | size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
40 | 35.0k | { |
41 | 35.0k | BYTE* const ostart = (BYTE*)dst; |
42 | 35.0k | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); |
43 | | |
44 | 35.0k | DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity); |
45 | | |
46 | 35.0k | RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); |
47 | | |
48 | 35.0k | switch(flSize) |
49 | 35.0k | { |
50 | 30.3k | case 1: /* 2 - 1 - 5 */ |
51 | 30.3k | ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); |
52 | 30.3k | break; |
53 | 4.41k | case 2: /* 2 - 2 - 12 */ |
54 | 4.41k | MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); |
55 | 4.41k | break; |
56 | 257 | case 3: /* 2 - 2 - 20 */ |
57 | 257 | MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); |
58 | 257 | break; |
59 | 0 | default: /* not necessary : flSize is {1,2,3} */ |
60 | 0 | assert(0); |
61 | 35.0k | } |
62 | | |
63 | 35.0k | ZSTD_memcpy(ostart + flSize, src, srcSize); |
64 | 35.0k | DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); |
65 | 35.0k | return srcSize + flSize; |
66 | 35.0k | } |
67 | | |
68 | | static int allBytesIdentical(const void* src, size_t srcSize) |
69 | 0 | { |
70 | 0 | assert(srcSize >= 1); |
71 | 0 | assert(src != NULL); |
72 | 0 | { const BYTE b = ((const BYTE*)src)[0]; |
73 | 0 | size_t p; |
74 | 0 | for (p=1; p<srcSize; p++) { |
75 | 0 | if (((const BYTE*)src)[p] != b) return 0; |
76 | 0 | } |
77 | 0 | return 1; |
78 | 0 | } |
79 | 0 | } |
80 | | |
81 | | size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
82 | 382 | { |
83 | 382 | BYTE* const ostart = (BYTE*)dst; |
84 | 382 | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); |
85 | | |
86 | 382 | assert(dstCapacity >= 4); (void)dstCapacity; |
87 | 382 | assert(allBytesIdentical(src, srcSize)); |
88 | | |
89 | 382 | switch(flSize) |
90 | 382 | { |
91 | 0 | case 1: /* 2 - 1 - 5 */ |
92 | 0 | ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); |
93 | 0 | break; |
94 | 382 | case 2: /* 2 - 2 - 12 */ |
95 | 382 | MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); |
96 | 382 | break; |
97 | 0 | case 3: /* 2 - 2 - 20 */ |
98 | 0 | MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); |
99 | 0 | break; |
100 | 0 | default: /* not necessary : flSize is {1,2,3} */ |
101 | 0 | assert(0); |
102 | 382 | } |
103 | | |
104 | 382 | ostart[flSize] = *(const BYTE*)src; |
105 | 382 | DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1); |
106 | 382 | return flSize+1; |
107 | 382 | } |
108 | | |
109 | | /* ZSTD_minLiteralsToCompress() : |
110 | | * returns minimal amount of literals |
111 | | * for literal compression to even be attempted. |
112 | | * Minimum is made tighter as compression strategy increases. |
113 | | */ |
114 | | static size_t |
115 | | ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat) |
116 | 53.6k | { |
117 | 53.6k | assert((int)strategy >= 0); |
118 | 53.6k | assert((int)strategy <= 9); |
119 | | /* btultra2 : min 8 bytes; |
120 | | * then 2x larger for each successive compression strategy |
121 | | * max threshold 64 bytes */ |
122 | 53.6k | { int const shift = MIN(9-(int)strategy, 3); |
123 | 53.6k | size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift; |
124 | 53.6k | DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc); |
125 | 53.6k | return mintc; |
126 | 53.6k | } |
127 | 53.6k | } |
128 | | |
129 | | size_t ZSTD_compressLiterals ( |
130 | | void* dst, size_t dstCapacity, |
131 | | const void* src, size_t srcSize, |
132 | | void* entropyWorkspace, size_t entropyWorkspaceSize, |
133 | | const ZSTD_hufCTables_t* prevHuf, |
134 | | ZSTD_hufCTables_t* nextHuf, |
135 | | ZSTD_strategy strategy, |
136 | | int disableLiteralCompression, |
137 | | int suspectUncompressible, |
138 | | int bmi2) |
139 | 53.6k | { |
140 | 53.6k | size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); |
141 | 53.6k | BYTE* const ostart = (BYTE*)dst; |
142 | 53.6k | U32 singleStream = srcSize < 256; |
143 | 53.6k | SymbolEncodingType_e hType = set_compressed; |
144 | 53.6k | size_t cLitSize; |
145 | | |
146 | 53.6k | DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)", |
147 | 53.6k | disableLiteralCompression, (U32)srcSize, dstCapacity); |
148 | | |
149 | 53.6k | DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize)); |
150 | | |
151 | | /* Prepare nextEntropy assuming reusing the existing table */ |
152 | 53.6k | ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
153 | | |
154 | 53.6k | if (disableLiteralCompression) |
155 | 0 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); |
156 | | |
157 | | /* if too small, don't even attempt compression (speed opt) */ |
158 | 53.6k | if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode)) |
159 | 32.9k | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); |
160 | | |
161 | 20.6k | RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); |
162 | 20.6k | { HUF_repeat repeat = prevHuf->repeatMode; |
163 | 20.6k | int const flags = 0 |
164 | 20.6k | | (bmi2 ? HUF_flags_bmi2 : 0) |
165 | 20.6k | | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0) |
166 | 20.6k | | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0) |
167 | 20.6k | | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0); |
168 | | |
169 | 20.6k | typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int); |
170 | 20.6k | huf_compress_f huf_compress; |
171 | 20.6k | if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; |
172 | 20.6k | huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat; |
173 | 20.6k | cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize, |
174 | 20.6k | src, srcSize, |
175 | 20.6k | HUF_SYMBOLVALUE_MAX, LitHufLog, |
176 | 20.6k | entropyWorkspace, entropyWorkspaceSize, |
177 | 20.6k | (HUF_CElt*)nextHuf->CTable, |
178 | 20.6k | &repeat, flags); |
179 | 20.6k | DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize); |
180 | 20.6k | if (repeat != HUF_repeat_none) { |
181 | | /* reused the existing table */ |
182 | 8.15k | DEBUGLOG(5, "reusing statistics from previous huffman block"); |
183 | 8.15k | hType = set_repeat; |
184 | 8.15k | } |
185 | 20.6k | } |
186 | | |
187 | 20.6k | { size_t const minGain = ZSTD_minGain(srcSize, strategy); |
188 | 20.6k | if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { |
189 | 2.09k | ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
190 | 2.09k | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); |
191 | 2.09k | } } |
192 | 18.5k | if (cLitSize==1) { |
193 | | /* A return value of 1 signals that the alphabet consists of a single symbol. |
194 | | * However, in some rare circumstances, it could be the compressed size (a single byte). |
195 | | * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`. |
196 | | * (it's also necessary to not generate statistics). |
197 | | * Therefore, in such a case, actively check that all bytes are identical. */ |
198 | 382 | if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) { |
199 | 382 | ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
200 | 382 | return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); |
201 | 382 | } } |
202 | | |
203 | 18.1k | if (hType == set_compressed) { |
204 | | /* using a newly constructed table */ |
205 | 10.4k | nextHuf->repeatMode = HUF_repeat_check; |
206 | 10.4k | } |
207 | | |
208 | | /* Build header */ |
209 | 18.1k | switch(lhSize) |
210 | 18.1k | { |
211 | 11.1k | case 3: /* 2 - 2 - 10 - 10 */ |
212 | 11.1k | if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); |
213 | 11.1k | { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); |
214 | 11.1k | MEM_writeLE24(ostart, lhc); |
215 | 11.1k | break; |
216 | 0 | } |
217 | 6.07k | case 4: /* 2 - 2 - 14 - 14 */ |
218 | 6.07k | assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); |
219 | 6.07k | { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); |
220 | 6.07k | MEM_writeLE32(ostart, lhc); |
221 | 6.07k | break; |
222 | 0 | } |
223 | 907 | case 5: /* 2 - 2 - 18 - 18 */ |
224 | 907 | assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); |
225 | 907 | { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); |
226 | 907 | MEM_writeLE32(ostart, lhc); |
227 | 907 | ostart[4] = (BYTE)(cLitSize >> 10); |
228 | 907 | break; |
229 | 0 | } |
230 | 0 | default: /* not possible : lhSize is {3,4,5} */ |
231 | 0 | assert(0); |
232 | 18.1k | } |
233 | 18.1k | DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); |
234 | 18.1k | return lhSize+cLitSize; |
235 | 18.1k | } |