Coverage Report

Created: 2023-03-26 06:33

/src/zopfli/src/zopfli/blocksplitter.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright 2011 Google Inc. All Rights Reserved.
3
4
Licensed under the Apache License, Version 2.0 (the "License");
5
you may not use this file except in compliance with the License.
6
You may obtain a copy of the License at
7
8
    http://www.apache.org/licenses/LICENSE-2.0
9
10
Unless required by applicable law or agreed to in writing, software
11
distributed under the License is distributed on an "AS IS" BASIS,
12
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
See the License for the specific language governing permissions and
14
limitations under the License.
15
16
Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17
Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
*/
19
20
#include "blocksplitter.h"
21
22
#include <assert.h>
23
#include <stdio.h>
24
#include <stdlib.h>
25
26
#include "deflate.h"
27
#include "squeeze.h"
28
#include "tree.h"
29
#include "util.h"
30
31
/*
32
The "f" for the FindMinimum function below.
33
i: the current parameter of f(i)
34
context: for your implementation
35
*/
36
typedef double FindMinimumFun(size_t i, void* context);
37
38
/*
39
Finds minimum of function f(i) where is is of type size_t, f(i) is of type
40
double, i is in range start-end (excluding end).
41
Outputs the minimum value in *smallest and returns the index of this value.
42
*/
43
static size_t FindMinimum(FindMinimumFun f, void* context,
44
10.8k
                          size_t start, size_t end, double* smallest) {
45
10.8k
  if (end - start < 1024) {
46
10.0k
    double best = ZOPFLI_LARGE_FLOAT;
47
10.0k
    size_t result = start;
48
10.0k
    size_t i;
49
1.40M
    for (i = start; i < end; i++) {
50
1.39M
      double v = f(i, context);
51
1.39M
      if (v < best) {
52
45.0k
        best = v;
53
45.0k
        result = i;
54
45.0k
      }
55
1.39M
    }
56
10.0k
    *smallest = best;
57
10.0k
    return result;
58
10.0k
  } else {
59
    /* Try to find minimum faster by recursively checking multiple points. */
60
90.1k
#define NUM 9  /* Good value: 9. */
61
823
    size_t i;
62
823
    size_t p[NUM];
63
823
    double vp[NUM];
64
823
    size_t besti;
65
823
    double best;
66
823
    double lastbest = ZOPFLI_LARGE_FLOAT;
67
823
    size_t pos = start;
68
69
3.68k
    for (;;) {
70
3.68k
      if (end - start <= NUM) break;
71
72
29.8k
      for (i = 0; i < NUM; i++) {
73
26.8k
        p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
74
26.8k
        vp[i] = f(p[i], context);
75
26.8k
      }
76
2.98k
      besti = 0;
77
2.98k
      best = vp[0];
78
26.8k
      for (i = 1; i < NUM; i++) {
79
23.8k
        if (vp[i] < best) {
80
6.01k
          best = vp[i];
81
6.01k
          besti = i;
82
6.01k
        }
83
23.8k
      }
84
2.98k
      if (best > lastbest) break;
85
86
2.86k
      start = besti == 0 ? start : p[besti - 1];
87
2.86k
      end = besti == NUM - 1 ? end : p[besti + 1];
88
89
2.86k
      pos = p[besti];
90
2.86k
      lastbest = best;
91
2.86k
    }
92
823
    *smallest = lastbest;
93
823
    return pos;
94
823
#undef NUM
95
823
  }
96
10.8k
}
97
98
/*
99
Returns estimated cost of a block in bits.  It includes the size to encode the
100
tree and the size to encode all literal, length and distance symbols and their
101
extra bits.
102
103
litlens: lz77 lit/lengths
104
dists: ll77 distances
105
lstart: start of block
106
lend: end of block (not inclusive)
107
*/
108
static double EstimateCost(const ZopfliLZ77Store* lz77,
109
2.86M
                           size_t lstart, size_t lend) {
110
2.86M
  return ZopfliCalculateBlockSizeAutoType(lz77, lstart, lend);
111
2.86M
}
112
113
typedef struct SplitCostContext {
114
  const ZopfliLZ77Store* lz77;
115
  size_t start;
116
  size_t end;
117
} SplitCostContext;
118
119
120
/*
121
Gets the cost which is the sum of the cost of the left and the right section
122
of the data.
123
type: FindMinimumFun
124
*/
125
1.42M
static double SplitCost(size_t i, void* context) {
126
1.42M
  SplitCostContext* c = (SplitCostContext*)context;
127
1.42M
  return EstimateCost(c->lz77, c->start, i) + EstimateCost(c->lz77, i, c->end);
128
1.42M
}
129
130
5.54k
static void AddSorted(size_t value, size_t** out, size_t* outsize) {
131
5.54k
  size_t i;
132
5.54k
  ZOPFLI_APPEND_DATA(value, out, outsize);
133
12.0k
  for (i = 0; i + 1 < *outsize; i++) {
134
9.17k
    if ((*out)[i] > value) {
135
2.65k
      size_t j;
136
9.97k
      for (j = *outsize - 1; j > i; j--) {
137
7.32k
        (*out)[j] = (*out)[j - 1];
138
7.32k
      }
139
2.65k
      (*out)[i] = value;
140
2.65k
      break;
141
2.65k
    }
142
9.17k
  }
143
5.54k
}
144
145
/*
146
Prints the block split points as decimal and hex values in the terminal.
147
*/
148
static void PrintBlockSplitPoints(const ZopfliLZ77Store* lz77,
149
                                  const size_t* lz77splitpoints,
150
0
                                  size_t nlz77points) {
151
0
  size_t* splitpoints = 0;
152
0
  size_t npoints = 0;
153
0
  size_t i;
154
  /* The input is given as lz77 indices, but we want to see the uncompressed
155
  index values. */
156
0
  size_t pos = 0;
157
0
  if (nlz77points > 0) {
158
0
    for (i = 0; i < lz77->size; i++) {
159
0
      size_t length = lz77->dists[i] == 0 ? 1 : lz77->litlens[i];
160
0
      if (lz77splitpoints[npoints] == i) {
161
0
        ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
162
0
        if (npoints == nlz77points) break;
163
0
      }
164
0
      pos += length;
165
0
    }
166
0
  }
167
0
  assert(npoints == nlz77points);
168
169
0
  fprintf(stderr, "block split points: ");
170
0
  for (i = 0; i < npoints; i++) {
171
0
    fprintf(stderr, "%d ", (int)splitpoints[i]);
172
0
  }
173
0
  fprintf(stderr, "(hex:");
174
0
  for (i = 0; i < npoints; i++) {
175
0
    fprintf(stderr, " %x", (int)splitpoints[i]);
176
0
  }
177
0
  fprintf(stderr, ")\n");
178
179
0
  free(splitpoints);
180
0
}
181
182
/*
183
Finds next block to try to split, the largest of the available ones.
184
The largest is chosen to make sure that if only a limited amount of blocks is
185
requested, their sizes are spread evenly.
186
lz77size: the size of the LL77 data, which is the size of the done array here.
187
done: array indicating which blocks starting at that position are no longer
188
    splittable (splitting them increases rather than decreases cost).
189
splitpoints: the splitpoints found so far.
190
npoints: the amount of splitpoints found so far.
191
lstart: output variable, giving start of block.
192
lend: output variable, giving end of block.
193
returns 1 if a block was found, 0 if no block found (all are done).
194
*/
195
static int FindLargestSplittableBlock(
196
    size_t lz77size, const unsigned char* done,
197
    const size_t* splitpoints, size_t npoints,
198
10.8k
    size_t* lstart, size_t* lend) {
199
10.8k
  size_t longest = 0;
200
10.8k
  int found = 0;
201
10.8k
  size_t i;
202
60.3k
  for (i = 0; i <= npoints; i++) {
203
49.4k
    size_t start = i == 0 ? 0 : splitpoints[i - 1];
204
49.4k
    size_t end = i == npoints ? lz77size - 1 : splitpoints[i];
205
49.4k
    if (!done[start] && end - start > longest) {
206
17.4k
      *lstart = start;
207
17.4k
      *lend = end;
208
17.4k
      found = 1;
209
17.4k
      longest = end - start;
210
17.4k
    }
211
49.4k
  }
212
10.8k
  return found;
213
10.8k
}
214
215
void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
216
                          const ZopfliLZ77Store* lz77, size_t maxblocks,
217
3.61k
                          size_t** splitpoints, size_t* npoints) {
218
3.61k
  size_t lstart, lend;
219
3.61k
  size_t i;
220
3.61k
  size_t llpos = 0;
221
3.61k
  size_t numblocks = 1;
222
3.61k
  unsigned char* done;
223
3.61k
  double splitcost, origcost;
224
225
3.61k
  if (lz77->size < 10) return;  /* This code fails on tiny files. */
226
227
2.69k
  done = (unsigned char*)malloc(lz77->size);
228
2.69k
  if (!done) exit(-1); /* Allocation failed. */
229
1.44M
  for (i = 0; i < lz77->size; i++) done[i] = 0;
230
231
2.69k
  lstart = 0;
232
2.69k
  lend = lz77->size;
233
10.8k
  for (;;) {
234
10.8k
    SplitCostContext c;
235
236
10.8k
    if (maxblocks > 0 && numblocks >= maxblocks) {
237
43
      break;
238
43
    }
239
240
10.8k
    c.lz77 = lz77;
241
10.8k
    c.start = lstart;
242
10.8k
    c.end = lend;
243
10.8k
    assert(lstart < lend);
244
0
    llpos = FindMinimum(SplitCost, &c, lstart + 1, lend, &splitcost);
245
246
10.8k
    assert(llpos > lstart);
247
0
    assert(llpos < lend);
248
249
0
    origcost = EstimateCost(lz77, lstart, lend);
250
251
10.8k
    if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
252
5.30k
      done[lstart] = 1;
253
5.54k
    } else {
254
5.54k
      AddSorted(llpos, splitpoints, npoints);
255
5.54k
      numblocks++;
256
5.54k
    }
257
258
10.8k
    if (!FindLargestSplittableBlock(
259
10.8k
        lz77->size, done, *splitpoints, *npoints, &lstart, &lend)) {
260
1.48k
      break;  /* No further split will probably reduce compression. */
261
1.48k
    }
262
263
9.36k
    if (lend - lstart < 10) {
264
1.17k
      break;
265
1.17k
    }
266
9.36k
  }
267
268
2.69k
  if (options->verbose) {
269
0
    PrintBlockSplitPoints(lz77, *splitpoints, *npoints);
270
0
  }
271
272
2.69k
  free(done);
273
2.69k
}
274
275
void ZopfliBlockSplit(const ZopfliOptions* options,
276
                      const unsigned char* in, size_t instart, size_t inend,
277
2.88k
                      size_t maxblocks, size_t** splitpoints, size_t* npoints) {
278
2.88k
  size_t pos = 0;
279
2.88k
  size_t i;
280
2.88k
  ZopfliBlockState s;
281
2.88k
  size_t* lz77splitpoints = 0;
282
2.88k
  size_t nlz77points = 0;
283
2.88k
  ZopfliLZ77Store store;
284
2.88k
  ZopfliHash hash;
285
2.88k
  ZopfliHash* h = &hash;
286
287
2.88k
  ZopfliInitLZ77Store(in, &store);
288
2.88k
  ZopfliInitBlockState(options, instart, inend, 0, &s);
289
2.88k
  ZopfliAllocHash(ZOPFLI_WINDOW_SIZE, h);
290
291
2.88k
  *npoints = 0;
292
2.88k
  *splitpoints = 0;
293
294
  /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
295
  results in better blocks. */
296
2.88k
  ZopfliLZ77Greedy(&s, in, instart, inend, &store, h);
297
298
2.88k
  ZopfliBlockSplitLZ77(options,
299
2.88k
                       &store, maxblocks,
300
2.88k
                       &lz77splitpoints, &nlz77points);
301
302
  /* Convert LZ77 positions to positions in the uncompressed input. */
303
2.88k
  pos = instart;
304
2.88k
  if (nlz77points > 0) {
305
398k
    for (i = 0; i < store.size; i++) {
306
398k
      size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
307
398k
      if (lz77splitpoints[*npoints] == i) {
308
3.23k
        ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
309
3.23k
        if (*npoints == nlz77points) break;
310
3.23k
      }
311
396k
      pos += length;
312
396k
    }
313
1.01k
  }
314
2.88k
  assert(*npoints == nlz77points);
315
316
0
  free(lz77splitpoints);
317
2.88k
  ZopfliCleanBlockState(&s);
318
2.88k
  ZopfliCleanLZ77Store(&store);
319
2.88k
  ZopfliCleanHash(h);
320
2.88k
}
321
322
void ZopfliBlockSplitSimple(const unsigned char* in,
323
                            size_t instart, size_t inend,
324
                            size_t blocksize,
325
0
                            size_t** splitpoints, size_t* npoints) {
326
0
  size_t i = instart;
327
0
  while (i < inend) {
328
0
    ZOPFLI_APPEND_DATA(i, splitpoints, npoints);
329
0
    i += blocksize;
330
0
  }
331
0
  (void)in;
332
0
}