Coverage Report

Created: 2025-08-28 07:58

/src/duckdb/third_party/brotli/enc/bit_cost.cpp
Line
Count
Source (jump to first uncovered line)
1
/* Copyright 2013 Google Inc. All Rights Reserved.
2
3
   Distributed under MIT license.
4
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
*/
6
7
/* Functions to estimate the bit cost of Huffman trees. */
8
9
#include "bit_cost.h"
10
11
#include <brotli/types.h>
12
13
#include "../common/brotli_constants.h"
14
#include "../common/brotli_platform.h"
15
#include "fast_log.h"
16
#include "histogram.h"
17
18
using namespace duckdb_brotli;
19
20
0
#define FN(X) duckdb_brotli:: X ## Literal
21
/* NOLINT(build/header_guard) */
22
/* Copyright 2013 Google Inc. All Rights Reserved.
23
24
   Distributed under MIT license.
25
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
26
*/
27
28
/* template parameters: FN */
29
30
#define HistogramType FN(Histogram)
31
32
0
double FN(BrotliPopulationCost)(const HistogramType* histogram) {
33
0
  static const double kOneSymbolHistogramCost = 12;
34
0
  static const double kTwoSymbolHistogramCost = 20;
35
0
  static const double kThreeSymbolHistogramCost = 28;
36
0
  static const double kFourSymbolHistogramCost = 37;
37
0
  const size_t data_size = FN(HistogramDataSize)();
38
0
  int count = 0;
39
0
  size_t s[5];
40
0
  double bits = 0.0;
41
0
  size_t i;
42
0
  if (histogram->total_count_ == 0) {
43
0
    return kOneSymbolHistogramCost;
44
0
  }
45
0
  for (i = 0; i < data_size; ++i) {
46
0
    if (histogram->data_[i] > 0) {
47
0
      s[count] = i;
48
0
      ++count;
49
0
      if (count > 4) break;
50
0
    }
51
0
  }
52
0
  if (count == 1) {
53
0
    return kOneSymbolHistogramCost;
54
0
  }
55
0
  if (count == 2) {
56
0
    return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
57
0
  }
58
0
  if (count == 3) {
59
0
    const uint32_t histo0 = histogram->data_[s[0]];
60
0
    const uint32_t histo1 = histogram->data_[s[1]];
61
0
    const uint32_t histo2 = histogram->data_[s[2]];
62
0
    const uint32_t histomax =
63
0
        BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
64
0
    return (kThreeSymbolHistogramCost +
65
0
            2 * (histo0 + histo1 + histo2) - histomax);
66
0
  }
67
0
  if (count == 4) {
68
0
    uint32_t histo[4];
69
0
    uint32_t h23;
70
0
    uint32_t histomax;
71
0
    for (i = 0; i < 4; ++i) {
72
0
      histo[i] = histogram->data_[s[i]];
73
0
    }
74
    /* Sort */
75
0
    for (i = 0; i < 4; ++i) {
76
0
      size_t j;
77
0
      for (j = i + 1; j < 4; ++j) {
78
0
        if (histo[j] > histo[i]) {
79
0
          BROTLI_SWAP(uint32_t, histo, j, i);
80
0
        }
81
0
      }
82
0
    }
83
0
    h23 = histo[2] + histo[3];
84
0
    histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
85
0
    return (kFourSymbolHistogramCost +
86
0
            3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
87
0
  }
88
89
0
  {
90
    /* In this loop we compute the entropy of the histogram and simultaneously
91
       build a simplified histogram of the code length codes where we use the
92
       zero repeat code 17, but we don't use the non-zero repeat code 16. */
93
0
    size_t max_depth = 1;
94
0
    uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
95
0
    const double log2total = FastLog2(histogram->total_count_);
96
0
    for (i = 0; i < data_size;) {
97
0
      if (histogram->data_[i] > 0) {
98
        /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
99
                                    = log2(total_count) - log2(count(symbol)) */
100
0
        double log2p = log2total - FastLog2(histogram->data_[i]);
101
        /* Approximate the bit depth by round(-log2(P(symbol))) */
102
0
        size_t depth = (size_t)(log2p + 0.5);
103
0
        bits += histogram->data_[i] * log2p;
104
0
        if (depth > 15) {
105
0
          depth = 15;
106
0
        }
107
0
        if (depth > max_depth) {
108
0
          max_depth = depth;
109
0
        }
110
0
        ++depth_histo[depth];
111
0
        ++i;
112
0
      } else {
113
        /* Compute the run length of zeros and add the appropriate number of 0
114
           and 17 code length codes to the code length code histogram. */
115
0
        uint32_t reps = 1;
116
0
        size_t k;
117
0
        for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
118
0
          ++reps;
119
0
        }
120
0
        i += reps;
121
0
        if (i == data_size) {
122
          /* Don't add any cost for the last zero run, since these are encoded
123
             only implicitly. */
124
0
          break;
125
0
        }
126
0
        if (reps < 3) {
127
0
          depth_histo[0] += reps;
128
0
        } else {
129
0
          reps -= 2;
130
0
          while (reps > 0) {
131
0
            ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
132
            /* Add the 3 extra bits for the 17 code length code. */
133
0
            bits += 3;
134
0
            reps >>= 3;
135
0
          }
136
0
        }
137
0
      }
138
0
    }
139
    /* Add the estimated encoding cost of the code length code histogram. */
140
0
    bits += (double)(18 + 2 * max_depth);
141
    /* Add the entropy of the code length code histogram. */
142
0
    bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
143
0
  }
144
0
  return bits;
145
0
}
146
147
#undef HistogramType
148
#undef FN
149
150
0
#define FN(X) duckdb_brotli:: X ## Command
151
/* NOLINT(build/header_guard) */
152
/* Copyright 2013 Google Inc. All Rights Reserved.
153
154
   Distributed under MIT license.
155
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
156
*/
157
158
/* template parameters: FN */
159
160
#define HistogramType FN(Histogram)
161
162
0
double FN(BrotliPopulationCost)(const HistogramType* histogram) {
163
0
  static const double kOneSymbolHistogramCost = 12;
164
0
  static const double kTwoSymbolHistogramCost = 20;
165
0
  static const double kThreeSymbolHistogramCost = 28;
166
0
  static const double kFourSymbolHistogramCost = 37;
167
0
  const size_t data_size = FN(HistogramDataSize)();
168
0
  int count = 0;
169
0
  size_t s[5];
170
0
  double bits = 0.0;
171
0
  size_t i;
172
0
  if (histogram->total_count_ == 0) {
173
0
    return kOneSymbolHistogramCost;
174
0
  }
175
0
  for (i = 0; i < data_size; ++i) {
176
0
    if (histogram->data_[i] > 0) {
177
0
      s[count] = i;
178
0
      ++count;
179
0
      if (count > 4) break;
180
0
    }
181
0
  }
182
0
  if (count == 1) {
183
0
    return kOneSymbolHistogramCost;
184
0
  }
185
0
  if (count == 2) {
186
0
    return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
187
0
  }
188
0
  if (count == 3) {
189
0
    const uint32_t histo0 = histogram->data_[s[0]];
190
0
    const uint32_t histo1 = histogram->data_[s[1]];
191
0
    const uint32_t histo2 = histogram->data_[s[2]];
192
0
    const uint32_t histomax =
193
0
        BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
194
0
    return (kThreeSymbolHistogramCost +
195
0
            2 * (histo0 + histo1 + histo2) - histomax);
196
0
  }
197
0
  if (count == 4) {
198
0
    uint32_t histo[4];
199
0
    uint32_t h23;
200
0
    uint32_t histomax;
201
0
    for (i = 0; i < 4; ++i) {
202
0
      histo[i] = histogram->data_[s[i]];
203
0
    }
204
    /* Sort */
205
0
    for (i = 0; i < 4; ++i) {
206
0
      size_t j;
207
0
      for (j = i + 1; j < 4; ++j) {
208
0
        if (histo[j] > histo[i]) {
209
0
          BROTLI_SWAP(uint32_t, histo, j, i);
210
0
        }
211
0
      }
212
0
    }
213
0
    h23 = histo[2] + histo[3];
214
0
    histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
215
0
    return (kFourSymbolHistogramCost +
216
0
            3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
217
0
  }
218
219
0
  {
220
    /* In this loop we compute the entropy of the histogram and simultaneously
221
       build a simplified histogram of the code length codes where we use the
222
       zero repeat code 17, but we don't use the non-zero repeat code 16. */
223
0
    size_t max_depth = 1;
224
0
    uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
225
0
    const double log2total = FastLog2(histogram->total_count_);
226
0
    for (i = 0; i < data_size;) {
227
0
      if (histogram->data_[i] > 0) {
228
        /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
229
                                    = log2(total_count) - log2(count(symbol)) */
230
0
        double log2p = log2total - FastLog2(histogram->data_[i]);
231
        /* Approximate the bit depth by round(-log2(P(symbol))) */
232
0
        size_t depth = (size_t)(log2p + 0.5);
233
0
        bits += histogram->data_[i] * log2p;
234
0
        if (depth > 15) {
235
0
          depth = 15;
236
0
        }
237
0
        if (depth > max_depth) {
238
0
          max_depth = depth;
239
0
        }
240
0
        ++depth_histo[depth];
241
0
        ++i;
242
0
      } else {
243
        /* Compute the run length of zeros and add the appropriate number of 0
244
           and 17 code length codes to the code length code histogram. */
245
0
        uint32_t reps = 1;
246
0
        size_t k;
247
0
        for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
248
0
          ++reps;
249
0
        }
250
0
        i += reps;
251
0
        if (i == data_size) {
252
          /* Don't add any cost for the last zero run, since these are encoded
253
             only implicitly. */
254
0
          break;
255
0
        }
256
0
        if (reps < 3) {
257
0
          depth_histo[0] += reps;
258
0
        } else {
259
0
          reps -= 2;
260
0
          while (reps > 0) {
261
0
            ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
262
            /* Add the 3 extra bits for the 17 code length code. */
263
0
            bits += 3;
264
0
            reps >>= 3;
265
0
          }
266
0
        }
267
0
      }
268
0
    }
269
    /* Add the estimated encoding cost of the code length code histogram. */
270
0
    bits += (double)(18 + 2 * max_depth);
271
    /* Add the entropy of the code length code histogram. */
272
0
    bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
273
0
  }
274
0
  return bits;
275
0
}
276
277
#undef HistogramType
278
#undef FN
279
280
0
#define FN(X) duckdb_brotli:: X ## Distance
281
/* NOLINT(build/header_guard) */
282
/* Copyright 2013 Google Inc. All Rights Reserved.
283
284
   Distributed under MIT license.
285
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
286
*/
287
288
/* template parameters: FN */
289
290
#define HistogramType FN(Histogram)
291
292
0
double FN(BrotliPopulationCost)(const HistogramType* histogram) {
293
0
  static const double kOneSymbolHistogramCost = 12;
294
0
  static const double kTwoSymbolHistogramCost = 20;
295
0
  static const double kThreeSymbolHistogramCost = 28;
296
0
  static const double kFourSymbolHistogramCost = 37;
297
0
  const size_t data_size = FN(HistogramDataSize)();
298
0
  int count = 0;
299
0
  size_t s[5];
300
0
  double bits = 0.0;
301
0
  size_t i;
302
0
  if (histogram->total_count_ == 0) {
303
0
    return kOneSymbolHistogramCost;
304
0
  }
305
0
  for (i = 0; i < data_size; ++i) {
306
0
    if (histogram->data_[i] > 0) {
307
0
      s[count] = i;
308
0
      ++count;
309
0
      if (count > 4) break;
310
0
    }
311
0
  }
312
0
  if (count == 1) {
313
0
    return kOneSymbolHistogramCost;
314
0
  }
315
0
  if (count == 2) {
316
0
    return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
317
0
  }
318
0
  if (count == 3) {
319
0
    const uint32_t histo0 = histogram->data_[s[0]];
320
0
    const uint32_t histo1 = histogram->data_[s[1]];
321
0
    const uint32_t histo2 = histogram->data_[s[2]];
322
0
    const uint32_t histomax =
323
0
        BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
324
0
    return (kThreeSymbolHistogramCost +
325
0
            2 * (histo0 + histo1 + histo2) - histomax);
326
0
  }
327
0
  if (count == 4) {
328
0
    uint32_t histo[4];
329
0
    uint32_t h23;
330
0
    uint32_t histomax;
331
0
    for (i = 0; i < 4; ++i) {
332
0
      histo[i] = histogram->data_[s[i]];
333
0
    }
334
    /* Sort */
335
0
    for (i = 0; i < 4; ++i) {
336
0
      size_t j;
337
0
      for (j = i + 1; j < 4; ++j) {
338
0
        if (histo[j] > histo[i]) {
339
0
          BROTLI_SWAP(uint32_t, histo, j, i);
340
0
        }
341
0
      }
342
0
    }
343
0
    h23 = histo[2] + histo[3];
344
0
    histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
345
0
    return (kFourSymbolHistogramCost +
346
0
            3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
347
0
  }
348
349
0
  {
350
    /* In this loop we compute the entropy of the histogram and simultaneously
351
       build a simplified histogram of the code length codes where we use the
352
       zero repeat code 17, but we don't use the non-zero repeat code 16. */
353
0
    size_t max_depth = 1;
354
0
    uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
355
0
    const double log2total = FastLog2(histogram->total_count_);
356
0
    for (i = 0; i < data_size;) {
357
0
      if (histogram->data_[i] > 0) {
358
        /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
359
                                    = log2(total_count) - log2(count(symbol)) */
360
0
        double log2p = log2total - FastLog2(histogram->data_[i]);
361
        /* Approximate the bit depth by round(-log2(P(symbol))) */
362
0
        size_t depth = (size_t)(log2p + 0.5);
363
0
        bits += histogram->data_[i] * log2p;
364
0
        if (depth > 15) {
365
0
          depth = 15;
366
0
        }
367
0
        if (depth > max_depth) {
368
0
          max_depth = depth;
369
0
        }
370
0
        ++depth_histo[depth];
371
0
        ++i;
372
0
      } else {
373
        /* Compute the run length of zeros and add the appropriate number of 0
374
           and 17 code length codes to the code length code histogram. */
375
0
        uint32_t reps = 1;
376
0
        size_t k;
377
0
        for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
378
0
          ++reps;
379
0
        }
380
0
        i += reps;
381
0
        if (i == data_size) {
382
          /* Don't add any cost for the last zero run, since these are encoded
383
             only implicitly. */
384
0
          break;
385
0
        }
386
0
        if (reps < 3) {
387
0
          depth_histo[0] += reps;
388
0
        } else {
389
0
          reps -= 2;
390
0
          while (reps > 0) {
391
0
            ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
392
            /* Add the 3 extra bits for the 17 code length code. */
393
0
            bits += 3;
394
0
            reps >>= 3;
395
0
          }
396
0
        }
397
0
      }
398
0
    }
399
    /* Add the estimated encoding cost of the code length code histogram. */
400
0
    bits += (double)(18 + 2 * max_depth);
401
    /* Add the entropy of the code length code histogram. */
402
0
    bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
403
0
  }
404
0
  return bits;
405
0
}
406
407
#undef HistogramType
408
#undef FN
409
410