Coverage Report

Created: 2023-03-26 06:36

/src/zopfli/src/zopfli/katajainen.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright 2011 Google Inc. All Rights Reserved.
3
4
Licensed under the Apache License, Version 2.0 (the "License");
5
you may not use this file except in compliance with the License.
6
You may obtain a copy of the License at
7
8
    http://www.apache.org/licenses/LICENSE-2.0
9
10
Unless required by applicable law or agreed to in writing, software
11
distributed under the License is distributed on an "AS IS" BASIS,
12
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
See the License for the specific language governing permissions and
14
limitations under the License.
15
16
Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17
Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
*/
19
20
/*
21
Bounded package merge algorithm, based on the paper
22
"A Fast and Space-Economical Algorithm for Length-Limited Coding
23
Jyrki Katajainen, Alistair Moffat, Andrew Turpin".
24
*/
25
26
#include "katajainen.h"
27
#include <assert.h>
28
#include <stdlib.h>
29
#include <limits.h>
30
31
typedef struct Node Node;
32
33
/*
34
Nodes forming chains. Also used to represent leaves.
35
*/
36
struct Node {
37
  size_t weight;  /* Total weight (symbol count) of this chain. */
38
  Node* tail;  /* Previous node(s) of this chain, or 0 if none. */
39
  int count;  /* Leaf symbol index, or number of leaves before this chain. */
40
};
41
42
/*
43
Memory pool for nodes.
44
*/
45
typedef struct NodePool {
46
  Node* next;  /* Pointer to a free node in the pool. */
47
} NodePool;
48
49
/*
50
Initializes a chain node with the given values and marks it as in use.
51
*/
52
10.5G
static void InitNode(size_t weight, int count, Node* tail, Node* node) {
53
10.5G
  node->weight = weight;
54
10.5G
  node->count = count;
55
10.5G
  node->tail = tail;
56
10.5G
}
57
58
/*
59
Performs a Boundary Package-Merge step. Puts a new chain in the given list. The
60
new chain is, depending on the weights, a leaf or a combination of two chains
61
from the previous list.
62
lists: The lists of chains.
63
maxbits: Number of lists.
64
leaves: The leaves, one per symbol.
65
numsymbols: Number of leaves.
66
pool: the node memory pool.
67
index: The index of the list in which a new chain or leaf is required.
68
*/
69
static void BoundaryPM(Node* (*lists)[2], Node* leaves, int numsymbols,
70
10.5G
                       NodePool* pool, int index) {
71
10.5G
  Node* newchain;
72
10.5G
  Node* oldchain;
73
10.5G
  int lastcount = lists[index][1]->count;  /* Count of last chain of list. */
74
75
10.5G
  if (index == 0 && lastcount >= numsymbols) return;
76
77
10.4G
  newchain = pool->next++;
78
10.4G
  oldchain = lists[index][1];
79
80
  /* These are set up before the recursive calls below, so that there is a list
81
  pointing to the new node, to let the garbage collection know it's in use. */
82
10.4G
  lists[index][0] = oldchain;
83
10.4G
  lists[index][1] = newchain;
84
85
10.4G
  if (index == 0) {
86
    /* New leaf node in list 0. */
87
219M
    InitNode(leaves[lastcount].weight, lastcount + 1, 0, newchain);
88
10.2G
  } else {
89
10.2G
    size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight;
90
10.2G
    if (lastcount < numsymbols && sum > leaves[lastcount].weight) {
91
      /* New leaf inserted in list, so count is incremented. */
92
5.68G
      InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail,
93
5.68G
          newchain);
94
5.68G
    } else {
95
4.56G
      InitNode(sum, lastcount, lists[index - 1][1], newchain);
96
      /* Two lookahead chains of previous list used up, create new ones. */
97
4.56G
      BoundaryPM(lists, leaves, numsymbols, pool, index - 1);
98
4.56G
      BoundaryPM(lists, leaves, numsymbols, pool, index - 1);
99
4.56G
    }
100
10.2G
  }
101
10.4G
}
102
103
static void BoundaryPMFinal(Node* (*lists)[2],
104
50.0M
    Node* leaves, int numsymbols, NodePool* pool, int index) {
105
50.0M
  int lastcount = lists[index][1]->count;  /* Count of last chain of list. */
106
107
50.0M
  size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight;
108
109
50.0M
  if (lastcount < numsymbols && sum > leaves[lastcount].weight) {
110
19.1M
    Node* newchain = pool->next;
111
19.1M
    Node* oldchain = lists[index][1]->tail;
112
113
19.1M
    lists[index][1] = newchain;
114
19.1M
    newchain->count = lastcount + 1;
115
19.1M
    newchain->tail = oldchain;
116
30.9M
  } else {
117
30.9M
    lists[index][1]->tail = lists[index - 1][1];
118
30.9M
  }
119
50.0M
}
120
121
/*
122
Initializes each list with as lookahead chains the two leaves with lowest
123
weights.
124
*/
125
static void InitLists(
126
50.0M
    NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) {
127
50.0M
  int i;
128
50.0M
  Node* node0 = pool->next++;
129
50.0M
  Node* node1 = pool->next++;
130
50.0M
  InitNode(leaves[0].weight, 1, 0, node0);
131
50.0M
  InitNode(leaves[1].weight, 2, 0, node1);
132
387M
  for (i = 0; i < maxbits; i++) {
133
337M
    lists[i][0] = node0;
134
337M
    lists[i][1] = node1;
135
337M
  }
136
50.0M
}
137
138
/*
139
Converts result of boundary package-merge to the bitlengths. The result in the
140
last chain of the last list contains the amount of active leaves in each list.
141
chain: Chain to extract the bit length from (last chain from last list).
142
*/
143
50.0M
static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) {
144
50.0M
  int counts[16] = {0};
145
50.0M
  unsigned end = 16;
146
50.0M
  unsigned ptr = 15;
147
50.0M
  unsigned value = 1;
148
50.0M
  Node* node;
149
50.0M
  int val;
150
151
317M
  for (node = chain; node; node = node->tail) {
152
267M
    counts[--end] = node->count;
153
267M
  }
154
155
50.0M
  val = counts[15];
156
317M
  while (ptr >= end) {
157
1.11G
    for (; val > counts[ptr - 1]; val--) {
158
846M
      bitlengths[leaves[val - 1].count] = value;
159
846M
    }
160
267M
    ptr--;
161
267M
    value++;
162
267M
  }
163
50.0M
}
164
165
/*
166
Comparator for sorting the leaves. Has the function signature for qsort.
167
*/
168
3.20G
static int LeafComparator(const void* a, const void* b) {
169
3.20G
  return ((const Node*)a)->weight - ((const Node*)b)->weight;
170
3.20G
}
171
172
int ZopfliLengthLimitedCodeLengths(
173
54.2M
    const size_t* frequencies, int n, int maxbits, unsigned* bitlengths) {
174
54.2M
  NodePool pool;
175
54.2M
  int i;
176
54.2M
  int numsymbols = 0;  /* Amount of symbols with frequency > 0. */
177
54.2M
  int numBoundaryPMRuns;
178
54.2M
  Node* nodes;
179
180
  /* Array of lists of chains. Each list requires only two lookahead chains at
181
  a time, so each list is a array of two Node*'s. */
182
54.2M
  Node* (*lists)[2];
183
184
  /* One leaf per symbol. Only numsymbols leaves will be used. */
185
54.2M
  Node* leaves = (Node*)malloc(n * sizeof(*leaves));
186
187
  /* Initialize all bitlengths at 0. */
188
2.61G
  for (i = 0; i < n; i++) {
189
2.55G
    bitlengths[i] = 0;
190
2.55G
  }
191
192
  /* Count used symbols and place them in the leaves. */
193
2.61G
  for (i = 0; i < n; i++) {
194
2.55G
    if (frequencies[i]) {
195
848M
      leaves[numsymbols].weight = frequencies[i];
196
848M
      leaves[numsymbols].count = i;  /* Index of symbol this leaf represents. */
197
848M
      numsymbols++;
198
848M
    }
199
2.55G
  }
200
201
  /* Check special cases and error conditions. */
202
54.2M
  if ((1 << maxbits) < numsymbols) {
203
0
    free(leaves);
204
0
    return 1;  /* Error, too few maxbits to represent symbols. */
205
0
  }
206
54.2M
  if (numsymbols == 0) {
207
2.18M
    free(leaves);
208
2.18M
    return 0;  /* No symbols at all. OK. */
209
2.18M
  }
210
52.0M
  if (numsymbols == 1) {
211
970k
    bitlengths[leaves[0].count] = 1;
212
970k
    free(leaves);
213
970k
    return 0;  /* Only one symbol, give it bitlength 1, not 0. OK. */
214
970k
  }
215
51.0M
  if (numsymbols == 2) {
216
963k
    bitlengths[leaves[0].count]++;
217
963k
    bitlengths[leaves[1].count]++;
218
963k
    free(leaves);
219
963k
    return 0;
220
963k
  }
221
222
  /* Sort the leaves from lightest to heaviest. Add count into the same
223
  variable for stable sorting. */
224
896M
  for (i = 0; i < numsymbols; i++) {
225
846M
    if (leaves[i].weight >=
226
846M
        ((size_t)1 << (sizeof(leaves[0].weight) * CHAR_BIT - 9))) {
227
0
      free(leaves);
228
0
      return 1;  /* Error, we need 9 bits for the count. */
229
0
    }
230
846M
    leaves[i].weight = (leaves[i].weight << 9) | leaves[i].count;
231
846M
  }
232
50.0M
  qsort(leaves, numsymbols, sizeof(Node), LeafComparator);
233
896M
  for (i = 0; i < numsymbols; i++) {
234
846M
    leaves[i].weight >>= 9;
235
846M
  }
236
237
50.0M
  if (numsymbols - 1 < maxbits) {
238
26.6M
    maxbits = numsymbols - 1;
239
26.6M
  }
240
241
  /* Initialize node memory pool. */
242
50.0M
  nodes = (Node*)malloc(maxbits * 2 * numsymbols * sizeof(Node));
243
50.0M
  pool.next = nodes;
244
245
50.0M
  lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists));
246
50.0M
  InitLists(&pool, leaves, maxbits, lists);
247
248
  /* In the last list, 2 * numsymbols - 2 active chains need to be created. Two
249
  are already created in the initialization. Each BoundaryPM run creates one. */
250
50.0M
  numBoundaryPMRuns = 2 * numsymbols - 4;
251
1.49G
  for (i = 0; i < numBoundaryPMRuns - 1; i++) {
252
1.44G
    BoundaryPM(lists, leaves, numsymbols, &pool, maxbits - 1);
253
1.44G
  }
254
50.0M
  BoundaryPMFinal(lists, leaves, numsymbols, &pool, maxbits - 1);
255
256
50.0M
  ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths);
257
258
50.0M
  free(lists);
259
50.0M
  free(leaves);
260
50.0M
  free(nodes);
261
50.0M
  return 0;  /* OK. */
262
50.0M
}