/src/bzip2-1.0.8/huffman.c
Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | /*-------------------------------------------------------------*/ |
3 | | /*--- Huffman coding low-level stuff ---*/ |
4 | | /*--- huffman.c ---*/ |
5 | | /*-------------------------------------------------------------*/ |
6 | | |
7 | | /* ------------------------------------------------------------------ |
8 | | This file is part of bzip2/libbzip2, a program and library for |
9 | | lossless, block-sorting data compression. |
10 | | |
11 | | bzip2/libbzip2 version 1.0.8 of 13 July 2019 |
12 | | Copyright (C) 1996-2019 Julian Seward <jseward@acm.org> |
13 | | |
14 | | Please read the WARNING, DISCLAIMER and PATENTS sections in the |
15 | | README file. |
16 | | |
17 | | This program is released under the terms of the license contained |
18 | | in the file LICENSE. |
19 | | ------------------------------------------------------------------ */ |
20 | | |
21 | | |
22 | | #include "bzlib_private.h" |
23 | | |
24 | | /*---------------------------------------------------*/ |
25 | 0 | #define WEIGHTOF(zz0) ((zz0) & 0xffffff00) |
26 | | #define DEPTHOF(zz1) ((zz1) & 0x000000ff) |
27 | 0 | #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) |
28 | | |
29 | | #define ADDWEIGHTS(zw1,zw2) \ |
30 | 0 | (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ |
31 | 0 | (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) |
32 | | |
33 | 0 | #define UPHEAP(z) \ |
34 | 0 | { \ |
35 | 0 | Int32 zz, tmp; \ |
36 | 0 | zz = z; tmp = heap[zz]; \ |
37 | 0 | while (weight[tmp] < weight[heap[zz >> 1]]) { \ |
38 | 0 | heap[zz] = heap[zz >> 1]; \ |
39 | 0 | zz >>= 1; \ |
40 | 0 | } \ |
41 | 0 | heap[zz] = tmp; \ |
42 | 0 | } |
43 | | |
44 | 0 | #define DOWNHEAP(z) \ |
45 | 0 | { \ |
46 | 0 | Int32 zz, yy, tmp; \ |
47 | 0 | zz = z; tmp = heap[zz]; \ |
48 | 0 | while (True) { \ |
49 | 0 | yy = zz << 1; \ |
50 | 0 | if (yy > nHeap) break; \ |
51 | 0 | if (yy < nHeap && \ |
52 | 0 | weight[heap[yy+1]] < weight[heap[yy]]) \ |
53 | 0 | yy++; \ |
54 | 0 | if (weight[tmp] < weight[heap[yy]]) break; \ |
55 | 0 | heap[zz] = heap[yy]; \ |
56 | 0 | zz = yy; \ |
57 | 0 | } \ |
58 | 0 | heap[zz] = tmp; \ |
59 | 0 | } |
60 | | |
61 | | |
62 | | /*---------------------------------------------------*/ |
63 | | void BZ2_hbMakeCodeLengths ( UChar *len, |
64 | | Int32 *freq, |
65 | | Int32 alphaSize, |
66 | | Int32 maxLen ) |
67 | 0 | { |
68 | | /*-- |
69 | | Nodes and heap entries run from 1. Entry 0 |
70 | | for both the heap and nodes is a sentinel. |
71 | | --*/ |
72 | 0 | Int32 nNodes, nHeap, n1, n2, i, j, k; |
73 | 0 | Bool tooLong; |
74 | |
|
75 | 0 | Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; |
76 | 0 | Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; |
77 | 0 | Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; |
78 | |
|
79 | 0 | for (i = 0; i < alphaSize; i++) |
80 | 0 | weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; |
81 | |
|
82 | 0 | while (True) { |
83 | |
|
84 | 0 | nNodes = alphaSize; |
85 | 0 | nHeap = 0; |
86 | |
|
87 | 0 | heap[0] = 0; |
88 | 0 | weight[0] = 0; |
89 | 0 | parent[0] = -2; |
90 | |
|
91 | 0 | for (i = 1; i <= alphaSize; i++) { |
92 | 0 | parent[i] = -1; |
93 | 0 | nHeap++; |
94 | 0 | heap[nHeap] = i; |
95 | 0 | UPHEAP(nHeap); |
96 | 0 | } |
97 | |
|
98 | 0 | AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); |
99 | | |
100 | 0 | while (nHeap > 1) { |
101 | 0 | n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); |
102 | 0 | n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); |
103 | 0 | nNodes++; |
104 | 0 | parent[n1] = parent[n2] = nNodes; |
105 | 0 | weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); |
106 | 0 | parent[nNodes] = -1; |
107 | 0 | nHeap++; |
108 | 0 | heap[nHeap] = nNodes; |
109 | 0 | UPHEAP(nHeap); |
110 | 0 | } |
111 | |
|
112 | 0 | AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); |
113 | |
|
114 | 0 | tooLong = False; |
115 | 0 | for (i = 1; i <= alphaSize; i++) { |
116 | 0 | j = 0; |
117 | 0 | k = i; |
118 | 0 | while (parent[k] >= 0) { k = parent[k]; j++; } |
119 | 0 | len[i-1] = j; |
120 | 0 | if (j > maxLen) tooLong = True; |
121 | 0 | } |
122 | | |
123 | 0 | if (! tooLong) break; |
124 | | |
125 | | /* 17 Oct 04: keep-going condition for the following loop used |
126 | | to be 'i < alphaSize', which missed the last element, |
127 | | theoretically leading to the possibility of the compressor |
128 | | looping. However, this count-scaling step is only needed if |
129 | | one of the generated Huffman code words is longer than |
130 | | maxLen, which up to and including version 1.0.2 was 20 bits, |
131 | | which is extremely unlikely. In version 1.0.3 maxLen was |
132 | | changed to 17 bits, which has minimal effect on compression |
133 | | ratio, but does mean this scaling step is used from time to |
134 | | time, enough to verify that it works. |
135 | | |
136 | | This means that bzip2-1.0.3 and later will only produce |
137 | | Huffman codes with a maximum length of 17 bits. However, in |
138 | | order to preserve backwards compatibility with bitstreams |
139 | | produced by versions pre-1.0.3, the decompressor must still |
140 | | handle lengths of up to 20. */ |
141 | | |
142 | 0 | for (i = 1; i <= alphaSize; i++) { |
143 | 0 | j = weight[i] >> 8; |
144 | 0 | j = 1 + (j / 2); |
145 | 0 | weight[i] = j << 8; |
146 | 0 | } |
147 | 0 | } |
148 | 0 | } |
149 | | |
150 | | |
151 | | /*---------------------------------------------------*/ |
152 | | void BZ2_hbAssignCodes ( Int32 *code, |
153 | | UChar *length, |
154 | | Int32 minLen, |
155 | | Int32 maxLen, |
156 | | Int32 alphaSize ) |
157 | 0 | { |
158 | 0 | Int32 n, vec, i; |
159 | |
|
160 | 0 | vec = 0; |
161 | 0 | for (n = minLen; n <= maxLen; n++) { |
162 | 0 | for (i = 0; i < alphaSize; i++) |
163 | 0 | if (length[i] == n) { code[i] = vec; vec++; }; |
164 | 0 | vec <<= 1; |
165 | 0 | } |
166 | 0 | } |
167 | | |
168 | | |
169 | | /*---------------------------------------------------*/ |
170 | | void BZ2_hbCreateDecodeTables ( Int32 *limit, |
171 | | Int32 *base, |
172 | | Int32 *perm, |
173 | | UChar *length, |
174 | | Int32 minLen, |
175 | | Int32 maxLen, |
176 | | Int32 alphaSize ) |
177 | 49.1k | { |
178 | 49.1k | Int32 pp, i, j, vec; |
179 | | |
180 | 49.1k | pp = 0; |
181 | 146k | for (i = minLen; i <= maxLen; i++) |
182 | 3.46M | for (j = 0; j < alphaSize; j++) |
183 | 3.36M | if (length[j] == i) { perm[pp] = j; pp++; }; |
184 | | |
185 | 1.17M | for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; |
186 | 725k | for (i = 0; i < alphaSize; i++) base[length[i]+1]++; |
187 | | |
188 | 1.12M | for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; |
189 | | |
190 | 1.17M | for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; |
191 | 49.1k | vec = 0; |
192 | | |
193 | 146k | for (i = minLen; i <= maxLen; i++) { |
194 | 97.7k | vec += (base[i+1] - base[i]); |
195 | 97.7k | limit[i] = vec-1; |
196 | 97.7k | vec <<= 1; |
197 | 97.7k | } |
198 | 97.7k | for (i = minLen + 1; i <= maxLen; i++) |
199 | 48.6k | base[i] = ((limit[i-1] + 1) << 1) - base[i]; |
200 | 49.1k | } |
201 | | |
202 | | |
203 | | /*-------------------------------------------------------------*/ |
204 | | /*--- end huffman.c ---*/ |
205 | | /*-------------------------------------------------------------*/ |