/src/tdengine/contrib/TSZ/zstd/compress/huf_compress.c
Line | Count | Source |
1 | | /* ****************************************************************** |
2 | | Huffman encoder, part of New Generation Entropy library |
3 | | Copyright (C) 2013-2016, Yann Collet. |
4 | | |
5 | | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
6 | | |
7 | | Redistribution and use in source and binary forms, with or without |
8 | | modification, are permitted provided that the following conditions are |
9 | | met: |
10 | | |
11 | | * Redistributions of source code must retain the above copyright |
12 | | notice, this list of conditions and the following disclaimer. |
13 | | * Redistributions in binary form must reproduce the above |
14 | | copyright notice, this list of conditions and the following disclaimer |
15 | | in the documentation and/or other materials provided with the |
16 | | distribution. |
17 | | |
18 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | | |
30 | | You can contact the author at : |
31 | | - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy |
32 | | - Public forum : https://groups.google.com/forum/#!forum/lz4c |
33 | | ****************************************************************** */ |
34 | | |
35 | | /* ************************************************************** |
36 | | * Compiler specifics |
37 | | ****************************************************************/ |
38 | | #ifdef _MSC_VER /* Visual Studio */ |
39 | | # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ |
40 | | #endif |
41 | | |
42 | | |
43 | | /* ************************************************************** |
44 | | * Includes |
45 | | ****************************************************************/ |
46 | | #include <string.h> /* memcpy, memset */ |
47 | | #include <stdio.h> /* printf (debug) */ |
48 | | #include "compiler.h" |
49 | | #include "bitstream.h" |
50 | | #include "hist.h" |
51 | | #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ |
52 | | #include "fse.h" /* header compression */ |
53 | | #define HUF_STATIC_LINKING_ONLY |
54 | | #include "huf.h" |
55 | | #include "error_private.h" |
56 | | |
57 | | |
58 | | /* ************************************************************** |
59 | | * Error Management |
60 | | ****************************************************************/ |
61 | 0 | #define HUF_isError ERR_isError |
62 | | #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ |
63 | 0 | #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e |
64 | 0 | #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } |
65 | | |
66 | | |
67 | | /* ************************************************************** |
68 | | * Utils |
69 | | ****************************************************************/ |
70 | | unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) |
71 | 0 | { |
72 | 0 | return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); |
73 | 0 | } |
74 | | |
75 | | |
76 | | /* ******************************************************* |
77 | | * HUF : Huffman block compression |
78 | | *********************************************************/ |
79 | | /* HUF_compressWeights() : |
80 | | * Same as FSE_compress(), but dedicated to huff0's weights compression. |
81 | | * The use case needs much less stack memory. |
82 | | * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. |
83 | | */ |
84 | 0 | #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 |
85 | | size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) |
86 | 0 | { |
87 | 0 | BYTE* const ostart = (BYTE*) dst; |
88 | 0 | BYTE* op = ostart; |
89 | 0 | BYTE* const oend = ostart + dstSize; |
90 | |
|
91 | 0 | U32 maxSymbolValue = HUF_TABLELOG_MAX; |
92 | 0 | U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; |
93 | |
|
94 | 0 | FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; |
95 | 0 | BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; |
96 | |
|
97 | 0 | U32 count[HUF_TABLELOG_MAX+1]; |
98 | 0 | S16 norm[HUF_TABLELOG_MAX+1]; |
99 | | |
100 | | /* init conditions */ |
101 | 0 | if (wtSize <= 1) return 0; /* Not compressible */ |
102 | | |
103 | | /* Scan input and build symbol stats */ |
104 | 0 | { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ |
105 | 0 | if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ |
106 | 0 | if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ |
107 | 0 | } |
108 | | |
109 | 0 | tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); |
110 | 0 | CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); |
111 | | |
112 | | /* Write table description header */ |
113 | 0 | { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); |
114 | 0 | op += hSize; |
115 | 0 | } |
116 | | |
117 | | /* Compress */ |
118 | 0 | CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); |
119 | 0 | { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); |
120 | 0 | if (cSize == 0) return 0; /* not enough space for compressed data */ |
121 | 0 | op += cSize; |
122 | 0 | } |
123 | | |
124 | 0 | return op-ostart; |
125 | 0 | } |
126 | | |
127 | | |
128 | | struct HUF_CElt_s { |
129 | | U16 val; |
130 | | BYTE nbBits; |
131 | | }; /* typedef'd to HUF_CElt within "huf.h" */ |
132 | | |
133 | | /*! HUF_writeCTable() : |
134 | | `CTable` : Huffman tree to save, using huf representation. |
135 | | @return : size of saved CTable */ |
136 | | size_t HUF_writeCTable (void* dst, size_t maxDstSize, |
137 | | const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) |
138 | 0 | { |
139 | 0 | BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ |
140 | 0 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; |
141 | 0 | BYTE* op = (BYTE*)dst; |
142 | 0 | U32 n; |
143 | | |
144 | | /* check conditions */ |
145 | 0 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
146 | | |
147 | | /* convert to weight */ |
148 | 0 | bitsToWeight[0] = 0; |
149 | 0 | for (n=1; n<huffLog+1; n++) |
150 | 0 | bitsToWeight[n] = (BYTE)(huffLog + 1 - n); |
151 | 0 | for (n=0; n<maxSymbolValue; n++) |
152 | 0 | huffWeight[n] = bitsToWeight[CTable[n].nbBits]; |
153 | | |
154 | | /* attempt weights compression by FSE */ |
155 | 0 | { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); |
156 | 0 | if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ |
157 | 0 | op[0] = (BYTE)hSize; |
158 | 0 | return hSize+1; |
159 | 0 | } } |
160 | | |
161 | | /* write raw values as 4-bits (max : 15) */ |
162 | 0 | if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ |
163 | 0 | if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ |
164 | 0 | op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); |
165 | 0 | huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ |
166 | 0 | for (n=0; n<maxSymbolValue; n+=2) |
167 | 0 | op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); |
168 | 0 | return ((maxSymbolValue+1)/2) + 1; |
169 | 0 | } |
170 | | |
171 | | |
172 | | size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize) |
173 | 0 | { |
174 | 0 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ |
175 | 0 | U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ |
176 | 0 | U32 tableLog = 0; |
177 | 0 | U32 nbSymbols = 0; |
178 | | |
179 | | /* get symbol weights */ |
180 | 0 | CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); |
181 | | |
182 | | /* check result */ |
183 | 0 | if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); |
184 | 0 | if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); |
185 | | |
186 | | /* Prepare base value per rank */ |
187 | 0 | { U32 n, nextRankStart = 0; |
188 | 0 | for (n=1; n<=tableLog; n++) { |
189 | 0 | U32 current = nextRankStart; |
190 | 0 | nextRankStart += (rankVal[n] << (n-1)); |
191 | 0 | rankVal[n] = current; |
192 | 0 | } } |
193 | | |
194 | | /* fill nbBits */ |
195 | 0 | { U32 n; for (n=0; n<nbSymbols; n++) { |
196 | 0 | const U32 w = huffWeight[n]; |
197 | 0 | CTable[n].nbBits = (BYTE)(tableLog + 1 - w); |
198 | 0 | } } |
199 | | |
200 | | /* fill val */ |
201 | 0 | { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ |
202 | 0 | U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; |
203 | 0 | { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } |
204 | | /* determine stating value per rank */ |
205 | 0 | valPerRank[tableLog+1] = 0; /* for w==0 */ |
206 | 0 | { U16 min = 0; |
207 | 0 | U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ |
208 | 0 | valPerRank[n] = min; /* get starting value within each rank */ |
209 | 0 | min += nbPerRank[n]; |
210 | 0 | min >>= 1; |
211 | 0 | } } |
212 | | /* assign value within rank, symbol order */ |
213 | 0 | { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } |
214 | 0 | } |
215 | |
|
216 | 0 | *maxSymbolValuePtr = nbSymbols - 1; |
217 | 0 | return readSize; |
218 | 0 | } |
219 | | |
220 | | U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue) |
221 | 0 | { |
222 | 0 | const HUF_CElt* table = (const HUF_CElt*)symbolTable; |
223 | 0 | assert(symbolValue <= HUF_SYMBOLVALUE_MAX); |
224 | 0 | return table[symbolValue].nbBits; |
225 | 0 | } |
226 | | |
227 | | |
228 | | typedef struct nodeElt_s { |
229 | | U32 count; |
230 | | U16 parent; |
231 | | BYTE byte; |
232 | | BYTE nbBits; |
233 | | } nodeElt; |
234 | | |
235 | | static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) |
236 | 0 | { |
237 | 0 | const U32 largestBits = huffNode[lastNonNull].nbBits; |
238 | 0 | if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ |
239 | | |
240 | | /* there are several too large elements (at least >= 2) */ |
241 | 0 | { int totalCost = 0; |
242 | 0 | const U32 baseCost = 1 << (largestBits - maxNbBits); |
243 | 0 | U32 n = lastNonNull; |
244 | |
|
245 | 0 | while (huffNode[n].nbBits > maxNbBits) { |
246 | 0 | totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); |
247 | 0 | huffNode[n].nbBits = (BYTE)maxNbBits; |
248 | 0 | n --; |
249 | 0 | } /* n stops at huffNode[n].nbBits <= maxNbBits */ |
250 | 0 | while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ |
251 | | |
252 | | /* renorm totalCost */ |
253 | 0 | totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ |
254 | | |
255 | | /* repay normalized cost */ |
256 | 0 | { U32 const noSymbol = 0xF0F0F0F0; |
257 | 0 | U32 rankLast[HUF_TABLELOG_MAX+2]; |
258 | 0 | int pos; |
259 | | |
260 | | /* Get pos of last (smallest) symbol per rank */ |
261 | 0 | memset(rankLast, 0xF0, sizeof(rankLast)); |
262 | 0 | { U32 currentNbBits = maxNbBits; |
263 | 0 | for (pos=n ; pos >= 0; pos--) { |
264 | 0 | if (huffNode[pos].nbBits >= currentNbBits) continue; |
265 | 0 | currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ |
266 | 0 | rankLast[maxNbBits-currentNbBits] = pos; |
267 | 0 | } } |
268 | |
|
269 | 0 | while (totalCost > 0) { |
270 | 0 | U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; |
271 | 0 | for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { |
272 | 0 | U32 highPos = rankLast[nBitsToDecrease]; |
273 | 0 | U32 lowPos = rankLast[nBitsToDecrease-1]; |
274 | 0 | if (highPos == noSymbol) continue; |
275 | 0 | if (lowPos == noSymbol) break; |
276 | 0 | { U32 const highTotal = huffNode[highPos].count; |
277 | 0 | U32 const lowTotal = 2 * huffNode[lowPos].count; |
278 | 0 | if (highTotal <= lowTotal) break; |
279 | 0 | } } |
280 | | /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ |
281 | | /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ |
282 | 0 | while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) |
283 | 0 | nBitsToDecrease ++; |
284 | 0 | totalCost -= 1 << (nBitsToDecrease-1); |
285 | 0 | if (rankLast[nBitsToDecrease-1] == noSymbol) |
286 | 0 | rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ |
287 | 0 | huffNode[rankLast[nBitsToDecrease]].nbBits ++; |
288 | 0 | if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ |
289 | 0 | rankLast[nBitsToDecrease] = noSymbol; |
290 | 0 | else { |
291 | 0 | rankLast[nBitsToDecrease]--; |
292 | 0 | if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) |
293 | 0 | rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ |
294 | 0 | } } /* while (totalCost > 0) */ |
295 | |
|
296 | 0 | while (totalCost < 0) { /* Sometimes, cost correction overshoot */ |
297 | 0 | if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ |
298 | 0 | while (huffNode[n].nbBits == maxNbBits) n--; |
299 | 0 | huffNode[n+1].nbBits--; |
300 | 0 | rankLast[1] = n+1; |
301 | 0 | totalCost++; |
302 | 0 | continue; |
303 | 0 | } |
304 | 0 | huffNode[ rankLast[1] + 1 ].nbBits--; |
305 | 0 | rankLast[1]++; |
306 | 0 | totalCost ++; |
307 | 0 | } } } /* there are several too large elements (at least >= 2) */ |
308 | |
|
309 | 0 | return maxNbBits; |
310 | 0 | } |
311 | | |
312 | | |
313 | | typedef struct { |
314 | | U32 base; |
315 | | U32 current; |
316 | | } rankPos; |
317 | | |
318 | | static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) |
319 | 0 | { |
320 | 0 | rankPos rank[32]; |
321 | 0 | U32 n; |
322 | |
|
323 | 0 | memset(rank, 0, sizeof(rank)); |
324 | 0 | for (n=0; n<=maxSymbolValue; n++) { |
325 | 0 | U32 r = BIT_highbit32(count[n] + 1); |
326 | 0 | rank[r].base ++; |
327 | 0 | } |
328 | 0 | for (n=30; n>0; n--) rank[n-1].base += rank[n].base; |
329 | 0 | for (n=0; n<32; n++) rank[n].current = rank[n].base; |
330 | 0 | for (n=0; n<=maxSymbolValue; n++) { |
331 | 0 | U32 const c = count[n]; |
332 | 0 | U32 const r = BIT_highbit32(c+1) + 1; |
333 | 0 | U32 pos = rank[r].current++; |
334 | 0 | while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { |
335 | 0 | huffNode[pos] = huffNode[pos-1]; |
336 | 0 | pos--; |
337 | 0 | } |
338 | 0 | huffNode[pos].count = c; |
339 | 0 | huffNode[pos].byte = (BYTE)n; |
340 | 0 | } |
341 | 0 | } |
342 | | |
343 | | |
344 | | /** HUF_buildCTable_wksp() : |
345 | | * Same as HUF_buildCTable(), but using externally allocated scratch buffer. |
346 | | * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. |
347 | | */ |
348 | 0 | #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) |
349 | | typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; |
350 | | size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) |
351 | 0 | { |
352 | 0 | nodeElt* const huffNode0 = (nodeElt*)workSpace; |
353 | 0 | nodeElt* const huffNode = huffNode0+1; |
354 | 0 | U32 n, nonNullRank; |
355 | 0 | int lowS, lowN; |
356 | 0 | U16 nodeNb = STARTNODE; |
357 | 0 | U32 nodeRoot; |
358 | | |
359 | | /* safety checks */ |
360 | 0 | if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ |
361 | 0 | if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); |
362 | 0 | if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; |
363 | 0 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
364 | 0 | memset(huffNode0, 0, sizeof(huffNodeTable)); |
365 | | |
366 | | /* sort, decreasing order */ |
367 | 0 | HUF_sort(huffNode, count, maxSymbolValue); |
368 | | |
369 | | /* init for parents */ |
370 | 0 | nonNullRank = maxSymbolValue; |
371 | 0 | while(huffNode[nonNullRank].count == 0) nonNullRank--; |
372 | 0 | lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; |
373 | 0 | huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; |
374 | 0 | huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; |
375 | 0 | nodeNb++; lowS-=2; |
376 | 0 | for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); |
377 | 0 | huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ |
378 | | |
379 | | /* create parents */ |
380 | 0 | while (nodeNb <= nodeRoot) { |
381 | 0 | U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; |
382 | 0 | U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; |
383 | 0 | huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; |
384 | 0 | huffNode[n1].parent = huffNode[n2].parent = nodeNb; |
385 | 0 | nodeNb++; |
386 | 0 | } |
387 | | |
388 | | /* distribute weights (unlimited tree height) */ |
389 | 0 | huffNode[nodeRoot].nbBits = 0; |
390 | 0 | for (n=nodeRoot-1; n>=STARTNODE; n--) |
391 | 0 | huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; |
392 | 0 | for (n=0; n<=nonNullRank; n++) |
393 | 0 | huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; |
394 | | |
395 | | /* enforce maxTableLog */ |
396 | 0 | maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); |
397 | | |
398 | | /* fill result into tree (val, nbBits) */ |
399 | 0 | { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; |
400 | 0 | U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; |
401 | 0 | if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ |
402 | 0 | for (n=0; n<=nonNullRank; n++) |
403 | 0 | nbPerRank[huffNode[n].nbBits]++; |
404 | | /* determine stating value per rank */ |
405 | 0 | { U16 min = 0; |
406 | 0 | for (n=maxNbBits; n>0; n--) { |
407 | 0 | valPerRank[n] = min; /* get starting value within each rank */ |
408 | 0 | min += nbPerRank[n]; |
409 | 0 | min >>= 1; |
410 | 0 | } } |
411 | 0 | for (n=0; n<=maxSymbolValue; n++) |
412 | 0 | tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ |
413 | 0 | for (n=0; n<=maxSymbolValue; n++) |
414 | 0 | tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ |
415 | 0 | } |
416 | | |
417 | 0 | return maxNbBits; |
418 | 0 | } |
419 | | |
420 | | /** HUF_buildCTable() : |
421 | | * @return : maxNbBits |
422 | | * Note : count is used before tree is written, so they can safely overlap |
423 | | */ |
424 | | size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) |
425 | 0 | { |
426 | 0 | huffNodeTable nodeTable; |
427 | 0 | return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); |
428 | 0 | } |
429 | | |
430 | | static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) |
431 | 0 | { |
432 | 0 | size_t nbBits = 0; |
433 | 0 | int s; |
434 | 0 | for (s = 0; s <= (int)maxSymbolValue; ++s) { |
435 | 0 | nbBits += CTable[s].nbBits * count[s]; |
436 | 0 | } |
437 | 0 | return nbBits >> 3; |
438 | 0 | } |
439 | | |
440 | 0 | static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { |
441 | 0 | int bad = 0; |
442 | 0 | int s; |
443 | 0 | for (s = 0; s <= (int)maxSymbolValue; ++s) { |
444 | 0 | bad |= (count[s] != 0) & (CTable[s].nbBits == 0); |
445 | 0 | } |
446 | 0 | return !bad; |
447 | 0 | } |
448 | | |
449 | 0 | size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } |
450 | | |
451 | | FORCE_INLINE_TEMPLATE void |
452 | | HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) |
453 | 0 | { |
454 | 0 | BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); |
455 | 0 | } |
456 | | |
457 | 0 | #define HUF_FLUSHBITS(s) BIT_flushBits(s) |
458 | | |
459 | | #define HUF_FLUSHBITS_1(stream) \ |
460 | 0 | if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) |
461 | | |
462 | | #define HUF_FLUSHBITS_2(stream) \ |
463 | 0 | if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) |
464 | | |
465 | | FORCE_INLINE_TEMPLATE size_t |
466 | | HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, |
467 | | const void* src, size_t srcSize, |
468 | | const HUF_CElt* CTable) |
469 | 0 | { |
470 | 0 | const BYTE* ip = (const BYTE*) src; |
471 | 0 | BYTE* const ostart = (BYTE*)dst; |
472 | 0 | BYTE* const oend = ostart + dstSize; |
473 | 0 | BYTE* op = ostart; |
474 | 0 | size_t n; |
475 | 0 | BIT_CStream_t bitC; |
476 | | |
477 | | /* init */ |
478 | 0 | if (dstSize < 8) return 0; /* not enough space to compress */ |
479 | 0 | { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); |
480 | 0 | if (HUF_isError(initErr)) return 0; } |
481 | | |
482 | 0 | n = srcSize & ~3; /* join to mod 4 */ |
483 | 0 | switch (srcSize & 3) |
484 | 0 | { |
485 | 0 | case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); |
486 | 0 | HUF_FLUSHBITS_2(&bitC); |
487 | | /* fall-through */ |
488 | 0 | case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); |
489 | 0 | HUF_FLUSHBITS_1(&bitC); |
490 | | /* fall-through */ |
491 | 0 | case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); |
492 | 0 | HUF_FLUSHBITS(&bitC); |
493 | | /* fall-through */ |
494 | 0 | case 0 : /* fall-through */ |
495 | 0 | default: break; |
496 | 0 | } |
497 | | |
498 | 0 | for (; n>0; n-=4) { /* note : n&3==0 at this stage */ |
499 | 0 | HUF_encodeSymbol(&bitC, ip[n- 1], CTable); |
500 | 0 | HUF_FLUSHBITS_1(&bitC); |
501 | 0 | HUF_encodeSymbol(&bitC, ip[n- 2], CTable); |
502 | 0 | HUF_FLUSHBITS_2(&bitC); |
503 | 0 | HUF_encodeSymbol(&bitC, ip[n- 3], CTable); |
504 | 0 | HUF_FLUSHBITS_1(&bitC); |
505 | 0 | HUF_encodeSymbol(&bitC, ip[n- 4], CTable); |
506 | 0 | HUF_FLUSHBITS(&bitC); |
507 | 0 | } |
508 | |
|
509 | 0 | return BIT_closeCStream(&bitC); |
510 | 0 | } |
511 | | |
512 | | #if DYNAMIC_BMI2 |
513 | | |
514 | | static TARGET_ATTRIBUTE("bmi2") size_t |
515 | | HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, |
516 | | const void* src, size_t srcSize, |
517 | | const HUF_CElt* CTable) |
518 | 0 | { |
519 | 0 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
520 | 0 | } |
521 | | |
522 | | static size_t |
523 | | HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, |
524 | | const void* src, size_t srcSize, |
525 | | const HUF_CElt* CTable) |
526 | 0 | { |
527 | 0 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
528 | 0 | } |
529 | | |
530 | | static size_t |
531 | | HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, |
532 | | const void* src, size_t srcSize, |
533 | | const HUF_CElt* CTable, const int bmi2) |
534 | 0 | { |
535 | 0 | if (bmi2) { |
536 | 0 | return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); |
537 | 0 | } |
538 | 0 | return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); |
539 | 0 | } |
540 | | |
541 | | #else |
542 | | |
543 | | static size_t |
544 | | HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, |
545 | | const void* src, size_t srcSize, |
546 | | const HUF_CElt* CTable, const int bmi2) |
547 | | { |
548 | | (void)bmi2; |
549 | | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
550 | | } |
551 | | |
552 | | #endif |
553 | | |
554 | | size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) |
555 | 0 | { |
556 | 0 | return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); |
557 | 0 | } |
558 | | |
559 | | |
560 | | static size_t |
561 | | HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, |
562 | | const void* src, size_t srcSize, |
563 | | const HUF_CElt* CTable, int bmi2) |
564 | 0 | { |
565 | 0 | size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ |
566 | 0 | const BYTE* ip = (const BYTE*) src; |
567 | 0 | const BYTE* const iend = ip + srcSize; |
568 | 0 | BYTE* const ostart = (BYTE*) dst; |
569 | 0 | BYTE* const oend = ostart + dstSize; |
570 | 0 | BYTE* op = ostart; |
571 | |
|
572 | 0 | if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ |
573 | 0 | if (srcSize < 12) return 0; /* no saving possible : too small input */ |
574 | 0 | op += 6; /* jumpTable */ |
575 | |
|
576 | 0 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
577 | 0 | if (cSize==0) return 0; |
578 | 0 | assert(cSize <= 65535); |
579 | 0 | MEM_writeLE16(ostart, (U16)cSize); |
580 | 0 | op += cSize; |
581 | 0 | } |
582 | | |
583 | 0 | ip += segmentSize; |
584 | 0 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
585 | 0 | if (cSize==0) return 0; |
586 | 0 | assert(cSize <= 65535); |
587 | 0 | MEM_writeLE16(ostart+2, (U16)cSize); |
588 | 0 | op += cSize; |
589 | 0 | } |
590 | | |
591 | 0 | ip += segmentSize; |
592 | 0 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
593 | 0 | if (cSize==0) return 0; |
594 | 0 | assert(cSize <= 65535); |
595 | 0 | MEM_writeLE16(ostart+4, (U16)cSize); |
596 | 0 | op += cSize; |
597 | 0 | } |
598 | | |
599 | 0 | ip += segmentSize; |
600 | 0 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); |
601 | 0 | if (cSize==0) return 0; |
602 | 0 | op += cSize; |
603 | 0 | } |
604 | | |
605 | 0 | return op-ostart; |
606 | 0 | } |
607 | | |
608 | | size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) |
609 | 0 | { |
610 | 0 | return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); |
611 | 0 | } |
612 | | |
613 | | |
614 | | static size_t HUF_compressCTable_internal( |
615 | | BYTE* const ostart, BYTE* op, BYTE* const oend, |
616 | | const void* src, size_t srcSize, |
617 | | unsigned singleStream, const HUF_CElt* CTable, const int bmi2) |
618 | 0 | { |
619 | 0 | size_t const cSize = singleStream ? |
620 | 0 | HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : |
621 | 0 | HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); |
622 | 0 | if (HUF_isError(cSize)) { return cSize; } |
623 | 0 | if (cSize==0) { return 0; } /* uncompressible */ |
624 | 0 | op += cSize; |
625 | | /* check compressibility */ |
626 | 0 | if ((size_t)(op-ostart) >= srcSize-1) { return 0; } |
627 | 0 | return op-ostart; |
628 | 0 | } |
629 | | |
630 | | typedef struct { |
631 | | U32 count[HUF_SYMBOLVALUE_MAX + 1]; |
632 | | HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; |
633 | | huffNodeTable nodeTable; |
634 | | } HUF_compress_tables_t; |
635 | | |
636 | | /* HUF_compress_internal() : |
637 | | * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ |
638 | | static size_t HUF_compress_internal ( |
639 | | void* dst, size_t dstSize, |
640 | | const void* src, size_t srcSize, |
641 | | unsigned maxSymbolValue, unsigned huffLog, |
642 | | unsigned singleStream, |
643 | | void* workSpace, size_t wkspSize, |
644 | | HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, |
645 | | const int bmi2) |
646 | 0 | { |
647 | 0 | HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; |
648 | 0 | BYTE* const ostart = (BYTE*)dst; |
649 | 0 | BYTE* const oend = ostart + dstSize; |
650 | 0 | BYTE* op = ostart; |
651 | | |
652 | | /* checks & inits */ |
653 | 0 | if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ |
654 | 0 | if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); |
655 | 0 | if (!srcSize) return 0; /* Uncompressed */ |
656 | 0 | if (!dstSize) return 0; /* cannot fit anything within dst budget */ |
657 | 0 | if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ |
658 | 0 | if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); |
659 | 0 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
660 | 0 | if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; |
661 | 0 | if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; |
662 | | |
663 | | /* Heuristic : If old table is valid, use it for small inputs */ |
664 | 0 | if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { |
665 | 0 | return HUF_compressCTable_internal(ostart, op, oend, |
666 | 0 | src, srcSize, |
667 | 0 | singleStream, oldHufTable, bmi2); |
668 | 0 | } |
669 | | |
670 | | /* Scan input and build symbol stats */ |
671 | 0 | { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) ); |
672 | 0 | if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ |
673 | 0 | if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ |
674 | 0 | } |
675 | | |
676 | | /* Check validity of previous table */ |
677 | 0 | if ( repeat |
678 | 0 | && *repeat == HUF_repeat_check |
679 | 0 | && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { |
680 | 0 | *repeat = HUF_repeat_none; |
681 | 0 | } |
682 | | /* Heuristic : use existing table for small inputs */ |
683 | 0 | if (preferRepeat && repeat && *repeat != HUF_repeat_none) { |
684 | 0 | return HUF_compressCTable_internal(ostart, op, oend, |
685 | 0 | src, srcSize, |
686 | 0 | singleStream, oldHufTable, bmi2); |
687 | 0 | } |
688 | | |
689 | | /* Build Huffman Tree */ |
690 | 0 | huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); |
691 | 0 | { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count, |
692 | 0 | maxSymbolValue, huffLog, |
693 | 0 | table->nodeTable, sizeof(table->nodeTable)) ); |
694 | 0 | huffLog = (U32)maxBits; |
695 | | /* Zero unused symbols in CTable, so we can check it for validity */ |
696 | 0 | memset(table->CTable + (maxSymbolValue + 1), 0, |
697 | 0 | sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); |
698 | 0 | } |
699 | | |
700 | | /* Write table description header */ |
701 | 0 | { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); |
702 | | /* Check if using previous huffman table is beneficial */ |
703 | 0 | if (repeat && *repeat != HUF_repeat_none) { |
704 | 0 | size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); |
705 | 0 | size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); |
706 | 0 | if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { |
707 | 0 | return HUF_compressCTable_internal(ostart, op, oend, |
708 | 0 | src, srcSize, |
709 | 0 | singleStream, oldHufTable, bmi2); |
710 | 0 | } } |
711 | | |
712 | | /* Use the new huffman table */ |
713 | 0 | if (hSize + 12ul >= srcSize) { return 0; } |
714 | 0 | op += hSize; |
715 | 0 | if (repeat) { *repeat = HUF_repeat_none; } |
716 | 0 | if (oldHufTable) |
717 | 0 | memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ |
718 | 0 | } |
719 | 0 | return HUF_compressCTable_internal(ostart, op, oend, |
720 | 0 | src, srcSize, |
721 | 0 | singleStream, table->CTable, bmi2); |
722 | 0 | } |
723 | | |
724 | | |
725 | | size_t HUF_compress1X_wksp (void* dst, size_t dstSize, |
726 | | const void* src, size_t srcSize, |
727 | | unsigned maxSymbolValue, unsigned huffLog, |
728 | | void* workSpace, size_t wkspSize) |
729 | 0 | { |
730 | 0 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
731 | 0 | maxSymbolValue, huffLog, 1 /*single stream*/, |
732 | 0 | workSpace, wkspSize, |
733 | 0 | NULL, NULL, 0, 0 /*bmi2*/); |
734 | 0 | } |
735 | | |
736 | | size_t HUF_compress1X_repeat (void* dst, size_t dstSize, |
737 | | const void* src, size_t srcSize, |
738 | | unsigned maxSymbolValue, unsigned huffLog, |
739 | | void* workSpace, size_t wkspSize, |
740 | | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) |
741 | 0 | { |
742 | 0 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
743 | 0 | maxSymbolValue, huffLog, 1 /*single stream*/, |
744 | 0 | workSpace, wkspSize, hufTable, |
745 | 0 | repeat, preferRepeat, bmi2); |
746 | 0 | } |
747 | | |
748 | | size_t HUF_compress1X (void* dst, size_t dstSize, |
749 | | const void* src, size_t srcSize, |
750 | | unsigned maxSymbolValue, unsigned huffLog) |
751 | 0 | { |
752 | 0 | unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; |
753 | 0 | return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); |
754 | 0 | } |
755 | | |
756 | | /* HUF_compress4X_repeat(): |
757 | | * compress input using 4 streams. |
758 | | * provide workspace to generate compression tables */ |
759 | | size_t HUF_compress4X_wksp (void* dst, size_t dstSize, |
760 | | const void* src, size_t srcSize, |
761 | | unsigned maxSymbolValue, unsigned huffLog, |
762 | | void* workSpace, size_t wkspSize) |
763 | 0 | { |
764 | 0 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
765 | 0 | maxSymbolValue, huffLog, 0 /*4 streams*/, |
766 | 0 | workSpace, wkspSize, |
767 | 0 | NULL, NULL, 0, 0 /*bmi2*/); |
768 | 0 | } |
769 | | |
770 | | /* HUF_compress4X_repeat(): |
771 | | * compress input using 4 streams. |
772 | | * re-use an existing huffman compression table */ |
773 | | size_t HUF_compress4X_repeat (void* dst, size_t dstSize, |
774 | | const void* src, size_t srcSize, |
775 | | unsigned maxSymbolValue, unsigned huffLog, |
776 | | void* workSpace, size_t wkspSize, |
777 | | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) |
778 | 0 | { |
779 | 0 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
780 | 0 | maxSymbolValue, huffLog, 0 /* 4 streams */, |
781 | 0 | workSpace, wkspSize, |
782 | 0 | hufTable, repeat, preferRepeat, bmi2); |
783 | 0 | } |
784 | | |
785 | | size_t HUF_compress2 (void* dst, size_t dstSize, |
786 | | const void* src, size_t srcSize, |
787 | | unsigned maxSymbolValue, unsigned huffLog) |
788 | 0 | { |
789 | 0 | unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; |
790 | 0 | return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); |
791 | 0 | } |
792 | | |
793 | | size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) |
794 | 0 | { |
795 | 0 | return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); |
796 | 0 | } |