/src/zopfli/src/zopfli/katajainen.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright 2011 Google Inc. All Rights Reserved. |
3 | | |
4 | | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | you may not use this file except in compliance with the License. |
6 | | You may obtain a copy of the License at |
7 | | |
8 | | http://www.apache.org/licenses/LICENSE-2.0 |
9 | | |
10 | | Unless required by applicable law or agreed to in writing, software |
11 | | distributed under the License is distributed on an "AS IS" BASIS, |
12 | | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | See the License for the specific language governing permissions and |
14 | | limitations under the License. |
15 | | |
16 | | Author: lode.vandevenne@gmail.com (Lode Vandevenne) |
17 | | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) |
18 | | */ |
19 | | |
20 | | /* |
21 | | Bounded package merge algorithm, based on the paper |
22 | | "A Fast and Space-Economical Algorithm for Length-Limited Coding |
23 | | Jyrki Katajainen, Alistair Moffat, Andrew Turpin". |
24 | | */ |
25 | | |
26 | | #include "katajainen.h" |
27 | | #include <assert.h> |
28 | | #include <stdlib.h> |
29 | | #include <limits.h> |
30 | | |
31 | | typedef struct Node Node; |
32 | | |
33 | | /* |
34 | | Nodes forming chains. Also used to represent leaves. |
35 | | */ |
36 | | struct Node { |
37 | | size_t weight; /* Total weight (symbol count) of this chain. */ |
38 | | Node* tail; /* Previous node(s) of this chain, or 0 if none. */ |
39 | | int count; /* Leaf symbol index, or number of leaves before this chain. */ |
40 | | }; |
41 | | |
42 | | /* |
43 | | Memory pool for nodes. |
44 | | */ |
45 | | typedef struct NodePool { |
46 | | Node* next; /* Pointer to a free node in the pool. */ |
47 | | } NodePool; |
48 | | |
49 | | /* |
50 | | Initializes a chain node with the given values and marks it as in use. |
51 | | */ |
52 | 10.5G | static void InitNode(size_t weight, int count, Node* tail, Node* node) { |
53 | 10.5G | node->weight = weight; |
54 | 10.5G | node->count = count; |
55 | 10.5G | node->tail = tail; |
56 | 10.5G | } |
57 | | |
58 | | /* |
59 | | Performs a Boundary Package-Merge step. Puts a new chain in the given list. The |
60 | | new chain is, depending on the weights, a leaf or a combination of two chains |
61 | | from the previous list. |
62 | | lists: The lists of chains. |
63 | | maxbits: Number of lists. |
64 | | leaves: The leaves, one per symbol. |
65 | | numsymbols: Number of leaves. |
66 | | pool: the node memory pool. |
67 | | index: The index of the list in which a new chain or leaf is required. |
68 | | */ |
69 | | static void BoundaryPM(Node* (*lists)[2], Node* leaves, int numsymbols, |
70 | 10.5G | NodePool* pool, int index) { |
71 | 10.5G | Node* newchain; |
72 | 10.5G | Node* oldchain; |
73 | 10.5G | int lastcount = lists[index][1]->count; /* Count of last chain of list. */ |
74 | | |
75 | 10.5G | if (index == 0 && lastcount >= numsymbols) return; |
76 | | |
77 | 10.4G | newchain = pool->next++; |
78 | 10.4G | oldchain = lists[index][1]; |
79 | | |
80 | | /* These are set up before the recursive calls below, so that there is a list |
81 | | pointing to the new node, to let the garbage collection know it's in use. */ |
82 | 10.4G | lists[index][0] = oldchain; |
83 | 10.4G | lists[index][1] = newchain; |
84 | | |
85 | 10.4G | if (index == 0) { |
86 | | /* New leaf node in list 0. */ |
87 | 219M | InitNode(leaves[lastcount].weight, lastcount + 1, 0, newchain); |
88 | 10.2G | } else { |
89 | 10.2G | size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight; |
90 | 10.2G | if (lastcount < numsymbols && sum > leaves[lastcount].weight) { |
91 | | /* New leaf inserted in list, so count is incremented. */ |
92 | 5.68G | InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail, |
93 | 5.68G | newchain); |
94 | 5.68G | } else { |
95 | 4.56G | InitNode(sum, lastcount, lists[index - 1][1], newchain); |
96 | | /* Two lookahead chains of previous list used up, create new ones. */ |
97 | 4.56G | BoundaryPM(lists, leaves, numsymbols, pool, index - 1); |
98 | 4.56G | BoundaryPM(lists, leaves, numsymbols, pool, index - 1); |
99 | 4.56G | } |
100 | 10.2G | } |
101 | 10.4G | } |
102 | | |
103 | | static void BoundaryPMFinal(Node* (*lists)[2], |
104 | 50.0M | Node* leaves, int numsymbols, NodePool* pool, int index) { |
105 | 50.0M | int lastcount = lists[index][1]->count; /* Count of last chain of list. */ |
106 | | |
107 | 50.0M | size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight; |
108 | | |
109 | 50.0M | if (lastcount < numsymbols && sum > leaves[lastcount].weight) { |
110 | 19.1M | Node* newchain = pool->next; |
111 | 19.1M | Node* oldchain = lists[index][1]->tail; |
112 | | |
113 | 19.1M | lists[index][1] = newchain; |
114 | 19.1M | newchain->count = lastcount + 1; |
115 | 19.1M | newchain->tail = oldchain; |
116 | 30.9M | } else { |
117 | 30.9M | lists[index][1]->tail = lists[index - 1][1]; |
118 | 30.9M | } |
119 | 50.0M | } |
120 | | |
121 | | /* |
122 | | Initializes each list with as lookahead chains the two leaves with lowest |
123 | | weights. |
124 | | */ |
125 | | static void InitLists( |
126 | 50.0M | NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) { |
127 | 50.0M | int i; |
128 | 50.0M | Node* node0 = pool->next++; |
129 | 50.0M | Node* node1 = pool->next++; |
130 | 50.0M | InitNode(leaves[0].weight, 1, 0, node0); |
131 | 50.0M | InitNode(leaves[1].weight, 2, 0, node1); |
132 | 387M | for (i = 0; i < maxbits; i++) { |
133 | 337M | lists[i][0] = node0; |
134 | 337M | lists[i][1] = node1; |
135 | 337M | } |
136 | 50.0M | } |
137 | | |
138 | | /* |
139 | | Converts result of boundary package-merge to the bitlengths. The result in the |
140 | | last chain of the last list contains the amount of active leaves in each list. |
141 | | chain: Chain to extract the bit length from (last chain from last list). |
142 | | */ |
143 | 50.0M | static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) { |
144 | 50.0M | int counts[16] = {0}; |
145 | 50.0M | unsigned end = 16; |
146 | 50.0M | unsigned ptr = 15; |
147 | 50.0M | unsigned value = 1; |
148 | 50.0M | Node* node; |
149 | 50.0M | int val; |
150 | | |
151 | 317M | for (node = chain; node; node = node->tail) { |
152 | 267M | counts[--end] = node->count; |
153 | 267M | } |
154 | | |
155 | 50.0M | val = counts[15]; |
156 | 317M | while (ptr >= end) { |
157 | 1.11G | for (; val > counts[ptr - 1]; val--) { |
158 | 846M | bitlengths[leaves[val - 1].count] = value; |
159 | 846M | } |
160 | 267M | ptr--; |
161 | 267M | value++; |
162 | 267M | } |
163 | 50.0M | } |
164 | | |
165 | | /* |
166 | | Comparator for sorting the leaves. Has the function signature for qsort. |
167 | | */ |
168 | 3.20G | static int LeafComparator(const void* a, const void* b) { |
169 | 3.20G | return ((const Node*)a)->weight - ((const Node*)b)->weight; |
170 | 3.20G | } |
171 | | |
172 | | int ZopfliLengthLimitedCodeLengths( |
173 | 54.2M | const size_t* frequencies, int n, int maxbits, unsigned* bitlengths) { |
174 | 54.2M | NodePool pool; |
175 | 54.2M | int i; |
176 | 54.2M | int numsymbols = 0; /* Amount of symbols with frequency > 0. */ |
177 | 54.2M | int numBoundaryPMRuns; |
178 | 54.2M | Node* nodes; |
179 | | |
180 | | /* Array of lists of chains. Each list requires only two lookahead chains at |
181 | | a time, so each list is a array of two Node*'s. */ |
182 | 54.2M | Node* (*lists)[2]; |
183 | | |
184 | | /* One leaf per symbol. Only numsymbols leaves will be used. */ |
185 | 54.2M | Node* leaves = (Node*)malloc(n * sizeof(*leaves)); |
186 | | |
187 | | /* Initialize all bitlengths at 0. */ |
188 | 2.61G | for (i = 0; i < n; i++) { |
189 | 2.55G | bitlengths[i] = 0; |
190 | 2.55G | } |
191 | | |
192 | | /* Count used symbols and place them in the leaves. */ |
193 | 2.61G | for (i = 0; i < n; i++) { |
194 | 2.55G | if (frequencies[i]) { |
195 | 848M | leaves[numsymbols].weight = frequencies[i]; |
196 | 848M | leaves[numsymbols].count = i; /* Index of symbol this leaf represents. */ |
197 | 848M | numsymbols++; |
198 | 848M | } |
199 | 2.55G | } |
200 | | |
201 | | /* Check special cases and error conditions. */ |
202 | 54.2M | if ((1 << maxbits) < numsymbols) { |
203 | 0 | free(leaves); |
204 | 0 | return 1; /* Error, too few maxbits to represent symbols. */ |
205 | 0 | } |
206 | 54.2M | if (numsymbols == 0) { |
207 | 2.18M | free(leaves); |
208 | 2.18M | return 0; /* No symbols at all. OK. */ |
209 | 2.18M | } |
210 | 52.0M | if (numsymbols == 1) { |
211 | 970k | bitlengths[leaves[0].count] = 1; |
212 | 970k | free(leaves); |
213 | 970k | return 0; /* Only one symbol, give it bitlength 1, not 0. OK. */ |
214 | 970k | } |
215 | 51.0M | if (numsymbols == 2) { |
216 | 963k | bitlengths[leaves[0].count]++; |
217 | 963k | bitlengths[leaves[1].count]++; |
218 | 963k | free(leaves); |
219 | 963k | return 0; |
220 | 963k | } |
221 | | |
222 | | /* Sort the leaves from lightest to heaviest. Add count into the same |
223 | | variable for stable sorting. */ |
224 | 896M | for (i = 0; i < numsymbols; i++) { |
225 | 846M | if (leaves[i].weight >= |
226 | 846M | ((size_t)1 << (sizeof(leaves[0].weight) * CHAR_BIT - 9))) { |
227 | 0 | free(leaves); |
228 | 0 | return 1; /* Error, we need 9 bits for the count. */ |
229 | 0 | } |
230 | 846M | leaves[i].weight = (leaves[i].weight << 9) | leaves[i].count; |
231 | 846M | } |
232 | 50.0M | qsort(leaves, numsymbols, sizeof(Node), LeafComparator); |
233 | 896M | for (i = 0; i < numsymbols; i++) { |
234 | 846M | leaves[i].weight >>= 9; |
235 | 846M | } |
236 | | |
237 | 50.0M | if (numsymbols - 1 < maxbits) { |
238 | 26.6M | maxbits = numsymbols - 1; |
239 | 26.6M | } |
240 | | |
241 | | /* Initialize node memory pool. */ |
242 | 50.0M | nodes = (Node*)malloc(maxbits * 2 * numsymbols * sizeof(Node)); |
243 | 50.0M | pool.next = nodes; |
244 | | |
245 | 50.0M | lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists)); |
246 | 50.0M | InitLists(&pool, leaves, maxbits, lists); |
247 | | |
248 | | /* In the last list, 2 * numsymbols - 2 active chains need to be created. Two |
249 | | are already created in the initialization. Each BoundaryPM run creates one. */ |
250 | 50.0M | numBoundaryPMRuns = 2 * numsymbols - 4; |
251 | 1.49G | for (i = 0; i < numBoundaryPMRuns - 1; i++) { |
252 | 1.44G | BoundaryPM(lists, leaves, numsymbols, &pool, maxbits - 1); |
253 | 1.44G | } |
254 | 50.0M | BoundaryPMFinal(lists, leaves, numsymbols, &pool, maxbits - 1); |
255 | | |
256 | 50.0M | ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths); |
257 | | |
258 | 50.0M | free(lists); |
259 | 50.0M | free(leaves); |
260 | 50.0M | free(nodes); |
261 | 50.0M | return 0; /* OK. */ |
262 | 50.0M | } |