Coverage Report

Created: 2025-11-15 07:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/duckdb/third_party/brotli/enc/block_splitter.cpp
Line
Count
Source
1
/* Copyright 2013 Google Inc. All Rights Reserved.
2
3
   Distributed under MIT license.
4
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
*/
6
7
/* Block split point selection utilities. */
8
9
#include "block_splitter.h"
10
11
#include <string.h>  /* memcpy, memset */
12
13
#include "../common/brotli_platform.h"
14
#include "bit_cost.h"
15
#include "cluster.h"
16
#include "command.h"
17
#include "fast_log.h"
18
#include "histogram.h"
19
#include "memory.h"
20
#include "quality.h"
21
22
using namespace duckdb_brotli;
23
24
static const size_t kMaxLiteralHistograms = 100;
25
static const size_t kMaxCommandHistograms = 50;
26
static const double kLiteralBlockSwitchCost = 28.1;
27
static const double kCommandBlockSwitchCost = 13.5;
28
static const double kDistanceBlockSwitchCost = 14.6;
29
static const size_t kLiteralStrideLength = 70;
30
static const size_t kCommandStrideLength = 40;
31
static const size_t kDistanceStrideLength = 40;
32
static const size_t kSymbolsPerLiteralHistogram = 544;
33
static const size_t kSymbolsPerCommandHistogram = 530;
34
static const size_t kSymbolsPerDistanceHistogram = 544;
35
static const size_t kMinLengthForBlockSplitting = 128;
36
static const size_t kIterMulForRefining = 2;
37
static const size_t kMinItersForRefining = 100;
38
39
0
static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
40
  /* Count how many we have. */
41
0
  size_t total_length = 0;
42
0
  size_t i;
43
0
  for (i = 0; i < num_commands; ++i) {
44
0
    total_length += cmds[i].insert_len_;
45
0
  }
46
0
  return total_length;
47
0
}
48
49
static void CopyLiteralsToByteArray(const Command* cmds,
50
                                    const size_t num_commands,
51
                                    const uint8_t* data,
52
                                    const size_t offset,
53
                                    const size_t mask,
54
0
                                    uint8_t* literals) {
55
0
  size_t pos = 0;
56
0
  size_t from_pos = offset & mask;
57
0
  size_t i;
58
0
  for (i = 0; i < num_commands; ++i) {
59
0
    size_t insert_len = cmds[i].insert_len_;
60
0
    if (from_pos + insert_len > mask) {
61
0
      size_t head_size = mask + 1 - from_pos;
62
0
      memcpy(literals + pos, data + from_pos, head_size);
63
0
      from_pos = 0;
64
0
      pos += head_size;
65
0
      insert_len -= head_size;
66
0
    }
67
0
    if (insert_len > 0) {
68
0
      memcpy(literals + pos, data + from_pos, insert_len);
69
0
      pos += insert_len;
70
0
    }
71
0
    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
72
0
  }
73
0
}
74
75
0
static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
76
  /* Initial seed should be 7. In this case, loop length is (1 << 29). */
77
0
  *seed *= 16807U;
78
0
  return *seed;
79
0
}
80
81
0
static BROTLI_INLINE double BitCost(size_t count) {
82
0
  return count == 0 ? -2.0 : FastLog2(count);
83
0
}
84
85
0
#define HISTOGRAMS_PER_BATCH 64
86
0
#define CLUSTERS_PER_BATCH 16
87
88
0
#define FN(X) X ## Literal
89
#define DataType uint8_t
90
/* NOLINTNEXTLINE(build/include) */
91
/* NOLINT(build/header_guard) */
92
/* Copyright 2013 Google Inc. All Rights Reserved.
93
94
   Distributed under MIT license.
95
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
96
*/
97
98
/* template parameters: FN, DataType */
99
100
0
#define HistogramType FN(Histogram)
101
102
static void FN(InitialEntropyCodes)(const DataType* data, size_t length,
103
                                    size_t stride,
104
                                    size_t num_histograms,
105
0
                                    HistogramType* histograms) {
106
0
  uint32_t seed = 7;
107
0
  size_t block_length = length / num_histograms;
108
0
  size_t i;
109
0
  FN(ClearHistograms)(histograms, num_histograms);
110
0
  for (i = 0; i < num_histograms; ++i) {
111
0
    size_t pos = length * i / num_histograms;
112
0
    if (i != 0) {
113
0
      pos += MyRand(&seed) % block_length;
114
0
    }
115
0
    if (pos + stride >= length) {
116
0
      pos = length - stride - 1;
117
0
    }
118
0
    FN(HistogramAddVector)(&histograms[i], data + pos, stride);
119
0
  }
120
0
}
121
122
static void FN(RandomSample)(uint32_t* seed,
123
                             const DataType* data,
124
                             size_t length,
125
                             size_t stride,
126
0
                             HistogramType* sample) {
127
0
  size_t pos = 0;
128
0
  if (stride >= length) {
129
0
    stride = length;
130
0
  } else {
131
0
    pos = MyRand(seed) % (length - stride + 1);
132
0
  }
133
0
  FN(HistogramAddVector)(sample, data + pos, stride);
134
0
}
135
136
static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
137
                                   size_t stride,
138
                                   size_t num_histograms,
139
                                   HistogramType* histograms,
140
0
                                   HistogramType* tmp) {
141
0
  size_t iters =
142
0
      kIterMulForRefining * length / stride + kMinItersForRefining;
143
0
  uint32_t seed = 7;
144
0
  size_t iter;
145
0
  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
146
0
  for (iter = 0; iter < iters; ++iter) {
147
0
    FN(HistogramClear)(tmp);
148
0
    FN(RandomSample)(&seed, data, length, stride, tmp);
149
0
    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp);
150
0
  }
151
0
}
152
153
/* Assigns a block id from the range [0, num_histograms) to each data element
154
   in data[0..length) and fills in block_id[0..length) with the assigned values.
155
   Returns the number of blocks, i.e. one plus the number of block switches. */
156
static size_t FN(FindBlocks)(const DataType* data, const size_t length,
157
                             const double block_switch_bitcost,
158
                             const size_t num_histograms,
159
                             const HistogramType* histograms,
160
                             double* insert_cost,
161
                             double* cost,
162
                             uint8_t* switch_signal,
163
0
                             uint8_t* block_id) {
164
0
  const size_t alphabet_size = FN(HistogramDataSize)();
165
0
  const size_t bitmap_len = (num_histograms + 7) >> 3;
166
0
  size_t num_blocks = 1;
167
0
  size_t byte_ix;
168
0
  size_t i;
169
0
  size_t j;
170
0
  BROTLI_DCHECK(num_histograms <= 256);
171
172
  /* Trivial case: single historgram -> single block type. */
173
0
  if (num_histograms <= 1) {
174
0
    for (i = 0; i < length; ++i) {
175
0
      block_id[i] = 0;
176
0
    }
177
0
    return 1;
178
0
  }
179
180
  /* Fill bitcost for each symbol of all histograms.
181
   * Non-existing symbol cost: 2 + log2(total_count).
182
   * Regular symbol cost: -log2(symbol_count / total_count). */
183
0
  memset(insert_cost, 0,
184
0
         sizeof(insert_cost[0]) * alphabet_size * num_histograms);
185
0
  for (i = 0; i < num_histograms; ++i) {
186
0
    insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
187
0
  }
188
0
  for (i = alphabet_size; i != 0;) {
189
    /* Reverse order to use the 0-th row as a temporary storage. */
190
0
    --i;
191
0
    for (j = 0; j < num_histograms; ++j) {
192
0
      insert_cost[i * num_histograms + j] =
193
0
          insert_cost[j] - BitCost(histograms[j].data_[i]);
194
0
    }
195
0
  }
196
197
  /* After each iteration of this loop, cost[k] will contain the difference
198
     between the minimum cost of arriving at the current byte position using
199
     entropy code k, and the minimum cost of arriving at the current byte
200
     position. This difference is capped at the block switch cost, and if it
201
     reaches block switch cost, it means that when we trace back from the last
202
     position, we need to switch here. */
203
0
  memset(cost, 0, sizeof(cost[0]) * num_histograms);
204
0
  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmap_len);
205
0
  for (byte_ix = 0; byte_ix < length; ++byte_ix) {
206
0
    size_t ix = byte_ix * bitmap_len;
207
0
    size_t symbol = data[byte_ix];
208
0
    size_t insert_cost_ix = symbol * num_histograms;
209
0
    double min_cost = 1e99;
210
0
    double block_switch_cost = block_switch_bitcost;
211
0
    size_t k;
212
0
    for (k = 0; k < num_histograms; ++k) {
213
      /* We are coding the symbol with entropy code k. */
214
0
      cost[k] += insert_cost[insert_cost_ix + k];
215
0
      if (cost[k] < min_cost) {
216
0
        min_cost = cost[k];
217
0
        block_id[byte_ix] = (uint8_t)k;
218
0
      }
219
0
    }
220
    /* More blocks for the beginning. */
221
0
    if (byte_ix < 2000) {
222
0
      block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
223
0
    }
224
0
    for (k = 0; k < num_histograms; ++k) {
225
0
      cost[k] -= min_cost;
226
0
      if (cost[k] >= block_switch_cost) {
227
0
        const uint8_t mask = (uint8_t)(1u << (k & 7));
228
0
        cost[k] = block_switch_cost;
229
0
        BROTLI_DCHECK((k >> 3) < bitmap_len);
230
0
        switch_signal[ix + (k >> 3)] |= mask;
231
0
      }
232
0
    }
233
0
  }
234
235
0
  byte_ix = length - 1;
236
0
  {  /* Trace back from the last position and switch at the marked places. */
237
0
    size_t ix = byte_ix * bitmap_len;
238
0
    uint8_t cur_id = block_id[byte_ix];
239
0
    while (byte_ix > 0) {
240
0
      const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
241
0
      BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmap_len);
242
0
      --byte_ix;
243
0
      ix -= bitmap_len;
244
0
      if (switch_signal[ix + (cur_id >> 3)] & mask) {
245
0
        if (cur_id != block_id[byte_ix]) {
246
0
          cur_id = block_id[byte_ix];
247
0
          ++num_blocks;
248
0
        }
249
0
      }
250
0
      block_id[byte_ix] = cur_id;
251
0
    }
252
0
  }
253
0
  return num_blocks;
254
0
}
255
256
static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
257
0
                                uint16_t* new_id, const size_t num_histograms) {
258
0
  static const uint16_t kInvalidId = 256;
259
0
  uint16_t next_id = 0;
260
0
  size_t i;
261
0
  for (i = 0; i < num_histograms; ++i) {
262
0
    new_id[i] = kInvalidId;
263
0
  }
264
0
  for (i = 0; i < length; ++i) {
265
0
    BROTLI_DCHECK(block_ids[i] < num_histograms);
266
0
    if (new_id[block_ids[i]] == kInvalidId) {
267
0
      new_id[block_ids[i]] = next_id++;
268
0
    }
269
0
  }
270
0
  for (i = 0; i < length; ++i) {
271
0
    block_ids[i] = (uint8_t)new_id[block_ids[i]];
272
0
    BROTLI_DCHECK(block_ids[i] < num_histograms);
273
0
  }
274
0
  BROTLI_DCHECK(next_id <= num_histograms);
275
0
  return next_id;
276
0
}
277
278
static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
279
                                     const uint8_t* block_ids,
280
                                     const size_t num_histograms,
281
0
                                     HistogramType* histograms) {
282
0
  size_t i;
283
0
  FN(ClearHistograms)(histograms, num_histograms);
284
0
  for (i = 0; i < length; ++i) {
285
0
    FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
286
0
  }
287
0
}
288
289
/* Given the initial partitioning build partitioning with limited number
290
 * of histograms (and block types). */
291
static void FN(ClusterBlocks)(MemoryManager* m,
292
                              const DataType* data, const size_t length,
293
                              const size_t num_blocks,
294
                              uint8_t* block_ids,
295
0
                              BlockSplit* split) {
296
0
  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
297
0
  uint32_t* u32 =
298
0
      BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH);
299
0
  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
300
0
      (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
301
0
  size_t all_histograms_size = 0;
302
0
  size_t all_histograms_capacity = expected_num_clusters;
303
0
  HistogramType* all_histograms =
304
0
      BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
305
0
  size_t cluster_size_size = 0;
306
0
  size_t cluster_size_capacity = expected_num_clusters;
307
0
  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
308
0
  size_t num_clusters = 0;
309
0
  HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
310
0
      BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
311
0
  size_t max_num_pairs =
312
0
      HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
313
0
  size_t pairs_capacity = max_num_pairs + 1;
314
0
  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
315
0
  size_t pos = 0;
316
0
  uint32_t* clusters;
317
0
  size_t num_final_clusters;
318
0
  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
319
0
  uint32_t* new_index;
320
0
  size_t i;
321
0
  uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH;
322
0
  uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH;
323
0
  uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH;
324
0
  uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH;
325
0
  uint32_t* BROTLI_RESTRICT const block_lengths =
326
0
      u32 + 4 * HISTOGRAMS_PER_BATCH;
327
  /* TODO(eustas): move to arena? */
328
0
  HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2);
329
330
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
331
0
      BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) ||
332
0
      BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
333
0
      BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) {
334
0
    return;
335
0
  }
336
337
0
  memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t));
338
339
  /* Calculate block lengths (convert repeating values -> series length). */
340
0
  {
341
0
    size_t block_idx = 0;
342
0
    for (i = 0; i < length; ++i) {
343
0
      BROTLI_DCHECK(block_idx < num_blocks);
344
0
      ++block_lengths[block_idx];
345
0
      if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
346
0
        ++block_idx;
347
0
      }
348
0
    }
349
0
    BROTLI_DCHECK(block_idx == num_blocks);
350
0
  }
351
352
  /* Pre-cluster blocks (cluster batches). */
353
0
  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
354
0
    const size_t num_to_combine =
355
0
        BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
356
0
    size_t num_new_clusters;
357
0
    size_t j;
358
0
    for (j = 0; j < num_to_combine; ++j) {
359
0
      size_t k;
360
0
      size_t block_length = block_lengths[i + j];
361
0
      FN(HistogramClear)(&histograms[j]);
362
0
      for (k = 0; k < block_length; ++k) {
363
0
        FN(HistogramAdd)(&histograms[j], data[pos++]);
364
0
      }
365
0
      histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
366
0
      new_clusters[j] = (uint32_t)j;
367
0
      symbols[j] = (uint32_t)j;
368
0
      sizes[j] = 1;
369
0
    }
370
0
    num_new_clusters = FN(BrotliHistogramCombine)(
371
0
        histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine,
372
0
        num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
373
0
    BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
374
0
        all_histograms_capacity, all_histograms_size + num_new_clusters);
375
0
    BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
376
0
        cluster_size_capacity, cluster_size_size + num_new_clusters);
377
0
    if (BROTLI_IS_OOM(m)) return;
378
0
    for (j = 0; j < num_new_clusters; ++j) {
379
0
      all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
380
0
      cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
381
0
      remap[new_clusters[j]] = (uint32_t)j;
382
0
    }
383
0
    for (j = 0; j < num_to_combine; ++j) {
384
0
      histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
385
0
    }
386
0
    num_clusters += num_new_clusters;
387
0
    BROTLI_DCHECK(num_clusters == cluster_size_size);
388
0
    BROTLI_DCHECK(num_clusters == all_histograms_size);
389
0
  }
390
0
  BROTLI_FREE(m, histograms);
391
392
  /* Final clustering. */
393
0
  max_num_pairs =
394
0
      BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
395
0
  if (pairs_capacity < max_num_pairs + 1) {
396
0
    BROTLI_FREE(m, pairs);
397
0
    pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
398
0
    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
399
0
  }
400
0
  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
401
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
402
0
  for (i = 0; i < num_clusters; ++i) {
403
0
    clusters[i] = (uint32_t)i;
404
0
  }
405
0
  num_final_clusters = FN(BrotliHistogramCombine)(
406
0
      all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs,
407
0
      num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
408
0
      max_num_pairs);
409
0
  BROTLI_FREE(m, pairs);
410
0
  BROTLI_FREE(m, cluster_size);
411
412
  /* Assign blocks to final histograms. */
413
0
  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
414
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
415
0
  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
416
0
  pos = 0;
417
0
  {
418
0
    uint32_t next_index = 0;
419
0
    for (i = 0; i < num_blocks; ++i) {
420
0
      size_t j;
421
0
      uint32_t best_out;
422
0
      double best_bits;
423
0
      FN(HistogramClear)(tmp);
424
0
      for (j = 0; j < block_lengths[i]; ++j) {
425
0
        FN(HistogramAdd)(tmp, data[pos++]);
426
0
      }
427
      /* Among equally good histograms prefer last used. */
428
      /* TODO(eustas): should we give a block-switch discount here? */
429
0
      best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
430
0
      best_bits = FN(BrotliHistogramBitCostDistance)(
431
0
          tmp, &all_histograms[best_out], tmp + 1);
432
0
      for (j = 0; j < num_final_clusters; ++j) {
433
0
        const double cur_bits = FN(BrotliHistogramBitCostDistance)(
434
0
            tmp, &all_histograms[clusters[j]], tmp + 1);
435
0
        if (cur_bits < best_bits) {
436
0
          best_bits = cur_bits;
437
0
          best_out = clusters[j];
438
0
        }
439
0
      }
440
0
      histogram_symbols[i] = best_out;
441
0
      if (new_index[best_out] == kInvalidIndex) {
442
0
        new_index[best_out] = next_index++;
443
0
      }
444
0
    }
445
0
  }
446
0
  BROTLI_FREE(m, tmp);
447
0
  BROTLI_FREE(m, clusters);
448
0
  BROTLI_FREE(m, all_histograms);
449
0
  BROTLI_ENSURE_CAPACITY(
450
0
      m, uint8_t, split->types, split->types_alloc_size, num_blocks);
451
0
  BROTLI_ENSURE_CAPACITY(
452
0
      m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
453
0
  if (BROTLI_IS_OOM(m)) return;
454
455
  /* Rewrite final assignment to block-split. There might be less blocks
456
   * than |num_blocks| due to clustering. */
457
0
  {
458
0
    uint32_t cur_length = 0;
459
0
    size_t block_idx = 0;
460
0
    uint8_t max_type = 0;
461
0
    for (i = 0; i < num_blocks; ++i) {
462
0
      cur_length += block_lengths[i];
463
0
      if (i + 1 == num_blocks ||
464
0
          histogram_symbols[i] != histogram_symbols[i + 1]) {
465
0
        const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
466
0
        split->types[block_idx] = id;
467
0
        split->lengths[block_idx] = cur_length;
468
0
        max_type = BROTLI_MAX(uint8_t, max_type, id);
469
0
        cur_length = 0;
470
0
        ++block_idx;
471
0
      }
472
0
    }
473
0
    split->num_blocks = block_idx;
474
0
    split->num_types = (size_t)max_type + 1;
475
0
  }
476
0
  BROTLI_FREE(m, new_index);
477
0
  BROTLI_FREE(m, u32);
478
0
  BROTLI_FREE(m, histogram_symbols);
479
0
}
480
481
/* Create BlockSplit (partitioning) given the limits, estimates and "effort"
482
 * parameters.
483
 *
484
 * NB: max_histograms is often less than number of histograms allowed by format;
485
 *     this is done intentionally, to save some "space" for context-aware
486
 *     clustering (here entropy is estimated for context-free symbols). */
487
static void FN(SplitByteVector)(MemoryManager* m,
488
                                const DataType* data, const size_t length,
489
                                const size_t symbols_per_histogram,
490
                                const size_t max_histograms,
491
                                const size_t sampling_stride_length,
492
                                const double block_switch_cost,
493
                                const BrotliEncoderParams* params,
494
0
                                BlockSplit* split) {
495
0
  const size_t data_size = FN(HistogramDataSize)();
496
0
  HistogramType* histograms;
497
0
  HistogramType* tmp;
498
  /* Calculate number of histograms; initial estimate is one histogram per
499
   * specified amount of symbols; however, this value is capped. */
500
0
  size_t num_histograms = length / symbols_per_histogram + 1;
501
0
  if (num_histograms > max_histograms) {
502
0
    num_histograms = max_histograms;
503
0
  }
504
505
  /* Corner case: no input. */
506
0
  if (length == 0) {
507
0
    split->num_types = 1;
508
0
    return;
509
0
  }
510
511
0
  if (length < kMinLengthForBlockSplitting) {
512
0
    BROTLI_ENSURE_CAPACITY(m, uint8_t,
513
0
        split->types, split->types_alloc_size, split->num_blocks + 1);
514
0
    BROTLI_ENSURE_CAPACITY(m, uint32_t,
515
0
        split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
516
0
    if (BROTLI_IS_OOM(m)) return;
517
0
    split->num_types = 1;
518
0
    split->types[split->num_blocks] = 0;
519
0
    split->lengths[split->num_blocks] = (uint32_t)length;
520
0
    split->num_blocks++;
521
0
    return;
522
0
  }
523
0
  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1);
524
0
  tmp = histograms + num_histograms;
525
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
526
  /* Find good entropy codes. */
527
0
  FN(InitialEntropyCodes)(data, length,
528
0
                          sampling_stride_length,
529
0
                          num_histograms, histograms);
530
0
  FN(RefineEntropyCodes)(data, length,
531
0
                         sampling_stride_length,
532
0
                         num_histograms, histograms, tmp);
533
0
  {
534
    /* Find a good path through literals with the good entropy codes. */
535
0
    uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
536
0
    size_t num_blocks = 0;
537
0
    const size_t bitmaplen = (num_histograms + 7) >> 3;
538
0
    double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
539
0
    double* cost = BROTLI_ALLOC(m, double, num_histograms);
540
0
    uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
541
0
    uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
542
0
    const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
543
0
    size_t i;
544
0
    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) ||
545
0
        BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) ||
546
0
        BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) {
547
0
      return;
548
0
    }
549
0
    for (i = 0; i < iters; ++i) {
550
0
      num_blocks = FN(FindBlocks)(data, length,
551
0
                                  block_switch_cost,
552
0
                                  num_histograms, histograms,
553
0
                                  insert_cost, cost, switch_signal,
554
0
                                  block_ids);
555
0
      num_histograms = FN(RemapBlockIds)(block_ids, length,
556
0
                                         new_id, num_histograms);
557
0
      FN(BuildBlockHistograms)(data, length, block_ids,
558
0
                               num_histograms, histograms);
559
0
    }
560
0
    BROTLI_FREE(m, insert_cost);
561
0
    BROTLI_FREE(m, cost);
562
0
    BROTLI_FREE(m, switch_signal);
563
0
    BROTLI_FREE(m, new_id);
564
0
    BROTLI_FREE(m, histograms);
565
0
    FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
566
0
    if (BROTLI_IS_OOM(m)) return;
567
0
    BROTLI_FREE(m, block_ids);
568
0
  }
569
0
}
570
571
#undef HistogramType
572
#undef DataType
573
#undef FN
574
575
0
#define FN(X) X ## Command
576
#define DataType uint16_t
577
/* NOLINTNEXTLINE(build/include) */
578
/* NOLINT(build/header_guard) */
579
/* Copyright 2013 Google Inc. All Rights Reserved.
580
581
   Distributed under MIT license.
582
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
583
*/
584
585
/* template parameters: FN, DataType */
586
587
0
#define HistogramType FN(Histogram)
588
589
static void FN(InitialEntropyCodes)(const DataType* data, size_t length,
590
                                    size_t stride,
591
                                    size_t num_histograms,
592
0
                                    HistogramType* histograms) {
593
0
  uint32_t seed = 7;
594
0
  size_t block_length = length / num_histograms;
595
0
  size_t i;
596
0
  FN(ClearHistograms)(histograms, num_histograms);
597
0
  for (i = 0; i < num_histograms; ++i) {
598
0
    size_t pos = length * i / num_histograms;
599
0
    if (i != 0) {
600
0
      pos += MyRand(&seed) % block_length;
601
0
    }
602
0
    if (pos + stride >= length) {
603
0
      pos = length - stride - 1;
604
0
    }
605
0
    FN(HistogramAddVector)(&histograms[i], data + pos, stride);
606
0
  }
607
0
}
608
609
static void FN(RandomSample)(uint32_t* seed,
610
                             const DataType* data,
611
                             size_t length,
612
                             size_t stride,
613
0
                             HistogramType* sample) {
614
0
  size_t pos = 0;
615
0
  if (stride >= length) {
616
0
    stride = length;
617
0
  } else {
618
0
    pos = MyRand(seed) % (length - stride + 1);
619
0
  }
620
0
  FN(HistogramAddVector)(sample, data + pos, stride);
621
0
}
622
623
static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
624
                                   size_t stride,
625
                                   size_t num_histograms,
626
                                   HistogramType* histograms,
627
0
                                   HistogramType* tmp) {
628
0
  size_t iters =
629
0
      kIterMulForRefining * length / stride + kMinItersForRefining;
630
0
  uint32_t seed = 7;
631
0
  size_t iter;
632
0
  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
633
0
  for (iter = 0; iter < iters; ++iter) {
634
0
    FN(HistogramClear)(tmp);
635
0
    FN(RandomSample)(&seed, data, length, stride, tmp);
636
0
    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp);
637
0
  }
638
0
}
639
640
/* Assigns a block id from the range [0, num_histograms) to each data element
641
   in data[0..length) and fills in block_id[0..length) with the assigned values.
642
   Returns the number of blocks, i.e. one plus the number of block switches. */
643
static size_t FN(FindBlocks)(const DataType* data, const size_t length,
644
                             const double block_switch_bitcost,
645
                             const size_t num_histograms,
646
                             const HistogramType* histograms,
647
                             double* insert_cost,
648
                             double* cost,
649
                             uint8_t* switch_signal,
650
0
                             uint8_t* block_id) {
651
0
  const size_t alphabet_size = FN(HistogramDataSize)();
652
0
  const size_t bitmap_len = (num_histograms + 7) >> 3;
653
0
  size_t num_blocks = 1;
654
0
  size_t byte_ix;
655
0
  size_t i;
656
0
  size_t j;
657
0
  BROTLI_DCHECK(num_histograms <= 256);
658
659
  /* Trivial case: single historgram -> single block type. */
660
0
  if (num_histograms <= 1) {
661
0
    for (i = 0; i < length; ++i) {
662
0
      block_id[i] = 0;
663
0
    }
664
0
    return 1;
665
0
  }
666
667
  /* Fill bitcost for each symbol of all histograms.
668
   * Non-existing symbol cost: 2 + log2(total_count).
669
   * Regular symbol cost: -log2(symbol_count / total_count). */
670
0
  memset(insert_cost, 0,
671
0
         sizeof(insert_cost[0]) * alphabet_size * num_histograms);
672
0
  for (i = 0; i < num_histograms; ++i) {
673
0
    insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
674
0
  }
675
0
  for (i = alphabet_size; i != 0;) {
676
    /* Reverse order to use the 0-th row as a temporary storage. */
677
0
    --i;
678
0
    for (j = 0; j < num_histograms; ++j) {
679
0
      insert_cost[i * num_histograms + j] =
680
0
          insert_cost[j] - BitCost(histograms[j].data_[i]);
681
0
    }
682
0
  }
683
684
  /* After each iteration of this loop, cost[k] will contain the difference
685
     between the minimum cost of arriving at the current byte position using
686
     entropy code k, and the minimum cost of arriving at the current byte
687
     position. This difference is capped at the block switch cost, and if it
688
     reaches block switch cost, it means that when we trace back from the last
689
     position, we need to switch here. */
690
0
  memset(cost, 0, sizeof(cost[0]) * num_histograms);
691
0
  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmap_len);
692
0
  for (byte_ix = 0; byte_ix < length; ++byte_ix) {
693
0
    size_t ix = byte_ix * bitmap_len;
694
0
    size_t symbol = data[byte_ix];
695
0
    size_t insert_cost_ix = symbol * num_histograms;
696
0
    double min_cost = 1e99;
697
0
    double block_switch_cost = block_switch_bitcost;
698
0
    size_t k;
699
0
    for (k = 0; k < num_histograms; ++k) {
700
      /* We are coding the symbol with entropy code k. */
701
0
      cost[k] += insert_cost[insert_cost_ix + k];
702
0
      if (cost[k] < min_cost) {
703
0
        min_cost = cost[k];
704
0
        block_id[byte_ix] = (uint8_t)k;
705
0
      }
706
0
    }
707
    /* More blocks for the beginning. */
708
0
    if (byte_ix < 2000) {
709
0
      block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
710
0
    }
711
0
    for (k = 0; k < num_histograms; ++k) {
712
0
      cost[k] -= min_cost;
713
0
      if (cost[k] >= block_switch_cost) {
714
0
        const uint8_t mask = (uint8_t)(1u << (k & 7));
715
0
        cost[k] = block_switch_cost;
716
0
        BROTLI_DCHECK((k >> 3) < bitmap_len);
717
0
        switch_signal[ix + (k >> 3)] |= mask;
718
0
      }
719
0
    }
720
0
  }
721
722
0
  byte_ix = length - 1;
723
0
  {  /* Trace back from the last position and switch at the marked places. */
724
0
    size_t ix = byte_ix * bitmap_len;
725
0
    uint8_t cur_id = block_id[byte_ix];
726
0
    while (byte_ix > 0) {
727
0
      const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
728
0
      BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmap_len);
729
0
      --byte_ix;
730
0
      ix -= bitmap_len;
731
0
      if (switch_signal[ix + (cur_id >> 3)] & mask) {
732
0
        if (cur_id != block_id[byte_ix]) {
733
0
          cur_id = block_id[byte_ix];
734
0
          ++num_blocks;
735
0
        }
736
0
      }
737
0
      block_id[byte_ix] = cur_id;
738
0
    }
739
0
  }
740
0
  return num_blocks;
741
0
}
742
743
static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
744
0
                                uint16_t* new_id, const size_t num_histograms) {
745
0
  static const uint16_t kInvalidId = 256;
746
0
  uint16_t next_id = 0;
747
0
  size_t i;
748
0
  for (i = 0; i < num_histograms; ++i) {
749
0
    new_id[i] = kInvalidId;
750
0
  }
751
0
  for (i = 0; i < length; ++i) {
752
0
    BROTLI_DCHECK(block_ids[i] < num_histograms);
753
0
    if (new_id[block_ids[i]] == kInvalidId) {
754
0
      new_id[block_ids[i]] = next_id++;
755
0
    }
756
0
  }
757
0
  for (i = 0; i < length; ++i) {
758
0
    block_ids[i] = (uint8_t)new_id[block_ids[i]];
759
0
    BROTLI_DCHECK(block_ids[i] < num_histograms);
760
0
  }
761
0
  BROTLI_DCHECK(next_id <= num_histograms);
762
0
  return next_id;
763
0
}
764
765
static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
766
                                     const uint8_t* block_ids,
767
                                     const size_t num_histograms,
768
0
                                     HistogramType* histograms) {
769
0
  size_t i;
770
0
  FN(ClearHistograms)(histograms, num_histograms);
771
0
  for (i = 0; i < length; ++i) {
772
0
    FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
773
0
  }
774
0
}
775
776
/* Given the initial partitioning build partitioning with limited number
777
 * of histograms (and block types). */
778
static void FN(ClusterBlocks)(MemoryManager* m,
779
                              const DataType* data, const size_t length,
780
                              const size_t num_blocks,
781
                              uint8_t* block_ids,
782
0
                              BlockSplit* split) {
783
0
  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
784
0
  uint32_t* u32 =
785
0
      BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH);
786
0
  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
787
0
      (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
788
0
  size_t all_histograms_size = 0;
789
0
  size_t all_histograms_capacity = expected_num_clusters;
790
0
  HistogramType* all_histograms =
791
0
      BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
792
0
  size_t cluster_size_size = 0;
793
0
  size_t cluster_size_capacity = expected_num_clusters;
794
0
  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
795
0
  size_t num_clusters = 0;
796
0
  HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
797
0
      BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
798
0
  size_t max_num_pairs =
799
0
      HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
800
0
  size_t pairs_capacity = max_num_pairs + 1;
801
0
  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
802
0
  size_t pos = 0;
803
0
  uint32_t* clusters;
804
0
  size_t num_final_clusters;
805
0
  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
806
0
  uint32_t* new_index;
807
0
  size_t i;
808
0
  uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH;
809
0
  uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH;
810
0
  uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH;
811
0
  uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH;
812
0
  uint32_t* BROTLI_RESTRICT const block_lengths =
813
0
      u32 + 4 * HISTOGRAMS_PER_BATCH;
814
  /* TODO(eustas): move to arena? */
815
0
  HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2);
816
817
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
818
0
      BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) ||
819
0
      BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
820
0
      BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) {
821
0
    return;
822
0
  }
823
824
0
  memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t));
825
826
  /* Calculate block lengths (convert repeating values -> series length). */
827
0
  {
828
0
    size_t block_idx = 0;
829
0
    for (i = 0; i < length; ++i) {
830
0
      BROTLI_DCHECK(block_idx < num_blocks);
831
0
      ++block_lengths[block_idx];
832
0
      if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
833
0
        ++block_idx;
834
0
      }
835
0
    }
836
0
    BROTLI_DCHECK(block_idx == num_blocks);
837
0
  }
838
839
  /* Pre-cluster blocks (cluster batches). */
840
0
  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
841
0
    const size_t num_to_combine =
842
0
        BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
843
0
    size_t num_new_clusters;
844
0
    size_t j;
845
0
    for (j = 0; j < num_to_combine; ++j) {
846
0
      size_t k;
847
0
      size_t block_length = block_lengths[i + j];
848
0
      FN(HistogramClear)(&histograms[j]);
849
0
      for (k = 0; k < block_length; ++k) {
850
0
        FN(HistogramAdd)(&histograms[j], data[pos++]);
851
0
      }
852
0
      histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
853
0
      new_clusters[j] = (uint32_t)j;
854
0
      symbols[j] = (uint32_t)j;
855
0
      sizes[j] = 1;
856
0
    }
857
0
    num_new_clusters = FN(BrotliHistogramCombine)(
858
0
        histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine,
859
0
        num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
860
0
    BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
861
0
        all_histograms_capacity, all_histograms_size + num_new_clusters);
862
0
    BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
863
0
        cluster_size_capacity, cluster_size_size + num_new_clusters);
864
0
    if (BROTLI_IS_OOM(m)) return;
865
0
    for (j = 0; j < num_new_clusters; ++j) {
866
0
      all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
867
0
      cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
868
0
      remap[new_clusters[j]] = (uint32_t)j;
869
0
    }
870
0
    for (j = 0; j < num_to_combine; ++j) {
871
0
      histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
872
0
    }
873
0
    num_clusters += num_new_clusters;
874
0
    BROTLI_DCHECK(num_clusters == cluster_size_size);
875
0
    BROTLI_DCHECK(num_clusters == all_histograms_size);
876
0
  }
877
0
  BROTLI_FREE(m, histograms);
878
879
  /* Final clustering. */
880
0
  max_num_pairs =
881
0
      BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
882
0
  if (pairs_capacity < max_num_pairs + 1) {
883
0
    BROTLI_FREE(m, pairs);
884
0
    pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
885
0
    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
886
0
  }
887
0
  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
888
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
889
0
  for (i = 0; i < num_clusters; ++i) {
890
0
    clusters[i] = (uint32_t)i;
891
0
  }
892
0
  num_final_clusters = FN(BrotliHistogramCombine)(
893
0
      all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs,
894
0
      num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
895
0
      max_num_pairs);
896
0
  BROTLI_FREE(m, pairs);
897
0
  BROTLI_FREE(m, cluster_size);
898
899
  /* Assign blocks to final histograms. */
900
0
  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
901
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
902
0
  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
903
0
  pos = 0;
904
0
  {
905
0
    uint32_t next_index = 0;
906
0
    for (i = 0; i < num_blocks; ++i) {
907
0
      size_t j;
908
0
      uint32_t best_out;
909
0
      double best_bits;
910
0
      FN(HistogramClear)(tmp);
911
0
      for (j = 0; j < block_lengths[i]; ++j) {
912
0
        FN(HistogramAdd)(tmp, data[pos++]);
913
0
      }
914
      /* Among equally good histograms prefer last used. */
915
      /* TODO(eustas): should we give a block-switch discount here? */
916
0
      best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
917
0
      best_bits = FN(BrotliHistogramBitCostDistance)(
918
0
          tmp, &all_histograms[best_out], tmp + 1);
919
0
      for (j = 0; j < num_final_clusters; ++j) {
920
0
        const double cur_bits = FN(BrotliHistogramBitCostDistance)(
921
0
            tmp, &all_histograms[clusters[j]], tmp + 1);
922
0
        if (cur_bits < best_bits) {
923
0
          best_bits = cur_bits;
924
0
          best_out = clusters[j];
925
0
        }
926
0
      }
927
0
      histogram_symbols[i] = best_out;
928
0
      if (new_index[best_out] == kInvalidIndex) {
929
0
        new_index[best_out] = next_index++;
930
0
      }
931
0
    }
932
0
  }
933
0
  BROTLI_FREE(m, tmp);
934
0
  BROTLI_FREE(m, clusters);
935
0
  BROTLI_FREE(m, all_histograms);
936
0
  BROTLI_ENSURE_CAPACITY(
937
0
      m, uint8_t, split->types, split->types_alloc_size, num_blocks);
938
0
  BROTLI_ENSURE_CAPACITY(
939
0
      m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
940
0
  if (BROTLI_IS_OOM(m)) return;
941
942
  /* Rewrite final assignment to block-split. There might be less blocks
943
   * than |num_blocks| due to clustering. */
944
0
  {
945
0
    uint32_t cur_length = 0;
946
0
    size_t block_idx = 0;
947
0
    uint8_t max_type = 0;
948
0
    for (i = 0; i < num_blocks; ++i) {
949
0
      cur_length += block_lengths[i];
950
0
      if (i + 1 == num_blocks ||
951
0
          histogram_symbols[i] != histogram_symbols[i + 1]) {
952
0
        const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
953
0
        split->types[block_idx] = id;
954
0
        split->lengths[block_idx] = cur_length;
955
0
        max_type = BROTLI_MAX(uint8_t, max_type, id);
956
0
        cur_length = 0;
957
0
        ++block_idx;
958
0
      }
959
0
    }
960
0
    split->num_blocks = block_idx;
961
0
    split->num_types = (size_t)max_type + 1;
962
0
  }
963
0
  BROTLI_FREE(m, new_index);
964
0
  BROTLI_FREE(m, u32);
965
0
  BROTLI_FREE(m, histogram_symbols);
966
0
}
967
968
/* Create BlockSplit (partitioning) given the limits, estimates and "effort"
969
 * parameters.
970
 *
971
 * NB: max_histograms is often less than number of histograms allowed by format;
972
 *     this is done intentionally, to save some "space" for context-aware
973
 *     clustering (here entropy is estimated for context-free symbols). */
974
static void FN(SplitByteVector)(MemoryManager* m,
975
                                const DataType* data, const size_t length,
976
                                const size_t symbols_per_histogram,
977
                                const size_t max_histograms,
978
                                const size_t sampling_stride_length,
979
                                const double block_switch_cost,
980
                                const BrotliEncoderParams* params,
981
0
                                BlockSplit* split) {
982
0
  const size_t data_size = FN(HistogramDataSize)();
983
0
  HistogramType* histograms;
984
0
  HistogramType* tmp;
985
  /* Calculate number of histograms; initial estimate is one histogram per
986
   * specified amount of symbols; however, this value is capped. */
987
0
  size_t num_histograms = length / symbols_per_histogram + 1;
988
0
  if (num_histograms > max_histograms) {
989
0
    num_histograms = max_histograms;
990
0
  }
991
992
  /* Corner case: no input. */
993
0
  if (length == 0) {
994
0
    split->num_types = 1;
995
0
    return;
996
0
  }
997
998
0
  if (length < kMinLengthForBlockSplitting) {
999
0
    BROTLI_ENSURE_CAPACITY(m, uint8_t,
1000
0
        split->types, split->types_alloc_size, split->num_blocks + 1);
1001
0
    BROTLI_ENSURE_CAPACITY(m, uint32_t,
1002
0
        split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
1003
0
    if (BROTLI_IS_OOM(m)) return;
1004
0
    split->num_types = 1;
1005
0
    split->types[split->num_blocks] = 0;
1006
0
    split->lengths[split->num_blocks] = (uint32_t)length;
1007
0
    split->num_blocks++;
1008
0
    return;
1009
0
  }
1010
0
  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1);
1011
0
  tmp = histograms + num_histograms;
1012
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
1013
  /* Find good entropy codes. */
1014
0
  FN(InitialEntropyCodes)(data, length,
1015
0
                          sampling_stride_length,
1016
0
                          num_histograms, histograms);
1017
0
  FN(RefineEntropyCodes)(data, length,
1018
0
                         sampling_stride_length,
1019
0
                         num_histograms, histograms, tmp);
1020
0
  {
1021
    /* Find a good path through literals with the good entropy codes. */
1022
0
    uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
1023
0
    size_t num_blocks = 0;
1024
0
    const size_t bitmaplen = (num_histograms + 7) >> 3;
1025
0
    double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
1026
0
    double* cost = BROTLI_ALLOC(m, double, num_histograms);
1027
0
    uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
1028
0
    uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
1029
0
    const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
1030
0
    size_t i;
1031
0
    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) ||
1032
0
        BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) ||
1033
0
        BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) {
1034
0
      return;
1035
0
    }
1036
0
    for (i = 0; i < iters; ++i) {
1037
0
      num_blocks = FN(FindBlocks)(data, length,
1038
0
                                  block_switch_cost,
1039
0
                                  num_histograms, histograms,
1040
0
                                  insert_cost, cost, switch_signal,
1041
0
                                  block_ids);
1042
0
      num_histograms = FN(RemapBlockIds)(block_ids, length,
1043
0
                                         new_id, num_histograms);
1044
0
      FN(BuildBlockHistograms)(data, length, block_ids,
1045
0
                               num_histograms, histograms);
1046
0
    }
1047
0
    BROTLI_FREE(m, insert_cost);
1048
0
    BROTLI_FREE(m, cost);
1049
0
    BROTLI_FREE(m, switch_signal);
1050
0
    BROTLI_FREE(m, new_id);
1051
0
    BROTLI_FREE(m, histograms);
1052
0
    FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
1053
0
    if (BROTLI_IS_OOM(m)) return;
1054
0
    BROTLI_FREE(m, block_ids);
1055
0
  }
1056
0
}
1057
1058
#undef HistogramType
1059
#undef FN
1060
1061
0
#define FN(X) X ## Distance
1062
/* NOLINTNEXTLINE(build/include) */
1063
/* NOLINT(build/header_guard) */
1064
/* Copyright 2013 Google Inc. All Rights Reserved.
1065
1066
   Distributed under MIT license.
1067
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
1068
*/
1069
1070
/* template parameters: FN, DataType */
1071
1072
0
#define HistogramType FN(Histogram)
1073
1074
static void FN(InitialEntropyCodes)(const DataType* data, size_t length,
1075
                                    size_t stride,
1076
                                    size_t num_histograms,
1077
0
                                    HistogramType* histograms) {
1078
0
  uint32_t seed = 7;
1079
0
  size_t block_length = length / num_histograms;
1080
0
  size_t i;
1081
0
  FN(ClearHistograms)(histograms, num_histograms);
1082
0
  for (i = 0; i < num_histograms; ++i) {
1083
0
    size_t pos = length * i / num_histograms;
1084
0
    if (i != 0) {
1085
0
      pos += MyRand(&seed) % block_length;
1086
0
    }
1087
0
    if (pos + stride >= length) {
1088
0
      pos = length - stride - 1;
1089
0
    }
1090
0
    FN(HistogramAddVector)(&histograms[i], data + pos, stride);
1091
0
  }
1092
0
}
1093
1094
static void FN(RandomSample)(uint32_t* seed,
1095
                             const DataType* data,
1096
                             size_t length,
1097
                             size_t stride,
1098
0
                             HistogramType* sample) {
1099
0
  size_t pos = 0;
1100
0
  if (stride >= length) {
1101
0
    stride = length;
1102
0
  } else {
1103
0
    pos = MyRand(seed) % (length - stride + 1);
1104
0
  }
1105
0
  FN(HistogramAddVector)(sample, data + pos, stride);
1106
0
}
1107
1108
static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
1109
                                   size_t stride,
1110
                                   size_t num_histograms,
1111
                                   HistogramType* histograms,
1112
0
                                   HistogramType* tmp) {
1113
0
  size_t iters =
1114
0
      kIterMulForRefining * length / stride + kMinItersForRefining;
1115
0
  uint32_t seed = 7;
1116
0
  size_t iter;
1117
0
  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
1118
0
  for (iter = 0; iter < iters; ++iter) {
1119
0
    FN(HistogramClear)(tmp);
1120
0
    FN(RandomSample)(&seed, data, length, stride, tmp);
1121
0
    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp);
1122
0
  }
1123
0
}
1124
1125
/* Assigns a block id from the range [0, num_histograms) to each data element
1126
   in data[0..length) and fills in block_id[0..length) with the assigned values.
1127
   Returns the number of blocks, i.e. one plus the number of block switches. */
1128
static size_t FN(FindBlocks)(const DataType* data, const size_t length,
1129
                             const double block_switch_bitcost,
1130
                             const size_t num_histograms,
1131
                             const HistogramType* histograms,
1132
                             double* insert_cost,
1133
                             double* cost,
1134
                             uint8_t* switch_signal,
1135
0
                             uint8_t* block_id) {
1136
0
  const size_t alphabet_size = FN(HistogramDataSize)();
1137
0
  const size_t bitmap_len = (num_histograms + 7) >> 3;
1138
0
  size_t num_blocks = 1;
1139
0
  size_t byte_ix;
1140
0
  size_t i;
1141
0
  size_t j;
1142
0
  BROTLI_DCHECK(num_histograms <= 256);
1143
1144
  /* Trivial case: single historgram -> single block type. */
1145
0
  if (num_histograms <= 1) {
1146
0
    for (i = 0; i < length; ++i) {
1147
0
      block_id[i] = 0;
1148
0
    }
1149
0
    return 1;
1150
0
  }
1151
1152
  /* Fill bitcost for each symbol of all histograms.
1153
   * Non-existing symbol cost: 2 + log2(total_count).
1154
   * Regular symbol cost: -log2(symbol_count / total_count). */
1155
0
  memset(insert_cost, 0,
1156
0
         sizeof(insert_cost[0]) * alphabet_size * num_histograms);
1157
0
  for (i = 0; i < num_histograms; ++i) {
1158
0
    insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
1159
0
  }
1160
0
  for (i = alphabet_size; i != 0;) {
1161
    /* Reverse order to use the 0-th row as a temporary storage. */
1162
0
    --i;
1163
0
    for (j = 0; j < num_histograms; ++j) {
1164
0
      insert_cost[i * num_histograms + j] =
1165
0
          insert_cost[j] - BitCost(histograms[j].data_[i]);
1166
0
    }
1167
0
  }
1168
1169
  /* After each iteration of this loop, cost[k] will contain the difference
1170
     between the minimum cost of arriving at the current byte position using
1171
     entropy code k, and the minimum cost of arriving at the current byte
1172
     position. This difference is capped at the block switch cost, and if it
1173
     reaches block switch cost, it means that when we trace back from the last
1174
     position, we need to switch here. */
1175
0
  memset(cost, 0, sizeof(cost[0]) * num_histograms);
1176
0
  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmap_len);
1177
0
  for (byte_ix = 0; byte_ix < length; ++byte_ix) {
1178
0
    size_t ix = byte_ix * bitmap_len;
1179
0
    size_t symbol = data[byte_ix];
1180
0
    size_t insert_cost_ix = symbol * num_histograms;
1181
0
    double min_cost = 1e99;
1182
0
    double block_switch_cost = block_switch_bitcost;
1183
0
    size_t k;
1184
0
    for (k = 0; k < num_histograms; ++k) {
1185
      /* We are coding the symbol with entropy code k. */
1186
0
      cost[k] += insert_cost[insert_cost_ix + k];
1187
0
      if (cost[k] < min_cost) {
1188
0
        min_cost = cost[k];
1189
0
        block_id[byte_ix] = (uint8_t)k;
1190
0
      }
1191
0
    }
1192
    /* More blocks for the beginning. */
1193
0
    if (byte_ix < 2000) {
1194
0
      block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
1195
0
    }
1196
0
    for (k = 0; k < num_histograms; ++k) {
1197
0
      cost[k] -= min_cost;
1198
0
      if (cost[k] >= block_switch_cost) {
1199
0
        const uint8_t mask = (uint8_t)(1u << (k & 7));
1200
0
        cost[k] = block_switch_cost;
1201
0
        BROTLI_DCHECK((k >> 3) < bitmap_len);
1202
0
        switch_signal[ix + (k >> 3)] |= mask;
1203
0
      }
1204
0
    }
1205
0
  }
1206
1207
0
  byte_ix = length - 1;
1208
0
  {  /* Trace back from the last position and switch at the marked places. */
1209
0
    size_t ix = byte_ix * bitmap_len;
1210
0
    uint8_t cur_id = block_id[byte_ix];
1211
0
    while (byte_ix > 0) {
1212
0
      const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
1213
0
      BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmap_len);
1214
0
      --byte_ix;
1215
0
      ix -= bitmap_len;
1216
0
      if (switch_signal[ix + (cur_id >> 3)] & mask) {
1217
0
        if (cur_id != block_id[byte_ix]) {
1218
0
          cur_id = block_id[byte_ix];
1219
0
          ++num_blocks;
1220
0
        }
1221
0
      }
1222
0
      block_id[byte_ix] = cur_id;
1223
0
    }
1224
0
  }
1225
0
  return num_blocks;
1226
0
}
1227
1228
static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
1229
0
                                uint16_t* new_id, const size_t num_histograms) {
1230
0
  static const uint16_t kInvalidId = 256;
1231
0
  uint16_t next_id = 0;
1232
0
  size_t i;
1233
0
  for (i = 0; i < num_histograms; ++i) {
1234
0
    new_id[i] = kInvalidId;
1235
0
  }
1236
0
  for (i = 0; i < length; ++i) {
1237
0
    BROTLI_DCHECK(block_ids[i] < num_histograms);
1238
0
    if (new_id[block_ids[i]] == kInvalidId) {
1239
0
      new_id[block_ids[i]] = next_id++;
1240
0
    }
1241
0
  }
1242
0
  for (i = 0; i < length; ++i) {
1243
0
    block_ids[i] = (uint8_t)new_id[block_ids[i]];
1244
0
    BROTLI_DCHECK(block_ids[i] < num_histograms);
1245
0
  }
1246
0
  BROTLI_DCHECK(next_id <= num_histograms);
1247
0
  return next_id;
1248
0
}
1249
1250
static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
1251
                                     const uint8_t* block_ids,
1252
                                     const size_t num_histograms,
1253
0
                                     HistogramType* histograms) {
1254
0
  size_t i;
1255
0
  FN(ClearHistograms)(histograms, num_histograms);
1256
0
  for (i = 0; i < length; ++i) {
1257
0
    FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
1258
0
  }
1259
0
}
1260
1261
/* Given the initial partitioning build partitioning with limited number
1262
 * of histograms (and block types). */
1263
static void FN(ClusterBlocks)(MemoryManager* m,
1264
                              const DataType* data, const size_t length,
1265
                              const size_t num_blocks,
1266
                              uint8_t* block_ids,
1267
0
                              BlockSplit* split) {
1268
0
  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
1269
0
  uint32_t* u32 =
1270
0
      BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH);
1271
0
  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
1272
0
      (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
1273
0
  size_t all_histograms_size = 0;
1274
0
  size_t all_histograms_capacity = expected_num_clusters;
1275
0
  HistogramType* all_histograms =
1276
0
      BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
1277
0
  size_t cluster_size_size = 0;
1278
0
  size_t cluster_size_capacity = expected_num_clusters;
1279
0
  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
1280
0
  size_t num_clusters = 0;
1281
0
  HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
1282
0
      BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
1283
0
  size_t max_num_pairs =
1284
0
      HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
1285
0
  size_t pairs_capacity = max_num_pairs + 1;
1286
0
  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
1287
0
  size_t pos = 0;
1288
0
  uint32_t* clusters;
1289
0
  size_t num_final_clusters;
1290
0
  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
1291
0
  uint32_t* new_index;
1292
0
  size_t i;
1293
0
  uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH;
1294
0
  uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH;
1295
0
  uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH;
1296
0
  uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH;
1297
0
  uint32_t* BROTLI_RESTRICT const block_lengths =
1298
0
      u32 + 4 * HISTOGRAMS_PER_BATCH;
1299
  /* TODO(eustas): move to arena? */
1300
0
  HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2);
1301
1302
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
1303
0
      BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) ||
1304
0
      BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
1305
0
      BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) {
1306
0
    return;
1307
0
  }
1308
1309
0
  memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t));
1310
1311
  /* Calculate block lengths (convert repeating values -> series length). */
1312
0
  {
1313
0
    size_t block_idx = 0;
1314
0
    for (i = 0; i < length; ++i) {
1315
0
      BROTLI_DCHECK(block_idx < num_blocks);
1316
0
      ++block_lengths[block_idx];
1317
0
      if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
1318
0
        ++block_idx;
1319
0
      }
1320
0
    }
1321
0
    BROTLI_DCHECK(block_idx == num_blocks);
1322
0
  }
1323
1324
  /* Pre-cluster blocks (cluster batches). */
1325
0
  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
1326
0
    const size_t num_to_combine =
1327
0
        BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
1328
0
    size_t num_new_clusters;
1329
0
    size_t j;
1330
0
    for (j = 0; j < num_to_combine; ++j) {
1331
0
      size_t k;
1332
0
      size_t block_length = block_lengths[i + j];
1333
0
      FN(HistogramClear)(&histograms[j]);
1334
0
      for (k = 0; k < block_length; ++k) {
1335
0
        FN(HistogramAdd)(&histograms[j], data[pos++]);
1336
0
      }
1337
0
      histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
1338
0
      new_clusters[j] = (uint32_t)j;
1339
0
      symbols[j] = (uint32_t)j;
1340
0
      sizes[j] = 1;
1341
0
    }
1342
0
    num_new_clusters = FN(BrotliHistogramCombine)(
1343
0
        histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine,
1344
0
        num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
1345
0
    BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
1346
0
        all_histograms_capacity, all_histograms_size + num_new_clusters);
1347
0
    BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
1348
0
        cluster_size_capacity, cluster_size_size + num_new_clusters);
1349
0
    if (BROTLI_IS_OOM(m)) return;
1350
0
    for (j = 0; j < num_new_clusters; ++j) {
1351
0
      all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
1352
0
      cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
1353
0
      remap[new_clusters[j]] = (uint32_t)j;
1354
0
    }
1355
0
    for (j = 0; j < num_to_combine; ++j) {
1356
0
      histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
1357
0
    }
1358
0
    num_clusters += num_new_clusters;
1359
0
    BROTLI_DCHECK(num_clusters == cluster_size_size);
1360
0
    BROTLI_DCHECK(num_clusters == all_histograms_size);
1361
0
  }
1362
0
  BROTLI_FREE(m, histograms);
1363
1364
  /* Final clustering. */
1365
0
  max_num_pairs =
1366
0
      BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
1367
0
  if (pairs_capacity < max_num_pairs + 1) {
1368
0
    BROTLI_FREE(m, pairs);
1369
0
    pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
1370
0
    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
1371
0
  }
1372
0
  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
1373
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
1374
0
  for (i = 0; i < num_clusters; ++i) {
1375
0
    clusters[i] = (uint32_t)i;
1376
0
  }
1377
0
  num_final_clusters = FN(BrotliHistogramCombine)(
1378
0
      all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs,
1379
0
      num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
1380
0
      max_num_pairs);
1381
0
  BROTLI_FREE(m, pairs);
1382
0
  BROTLI_FREE(m, cluster_size);
1383
1384
  /* Assign blocks to final histograms. */
1385
0
  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
1386
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
1387
0
  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
1388
0
  pos = 0;
1389
0
  {
1390
0
    uint32_t next_index = 0;
1391
0
    for (i = 0; i < num_blocks; ++i) {
1392
0
      size_t j;
1393
0
      uint32_t best_out;
1394
0
      double best_bits;
1395
0
      FN(HistogramClear)(tmp);
1396
0
      for (j = 0; j < block_lengths[i]; ++j) {
1397
0
        FN(HistogramAdd)(tmp, data[pos++]);
1398
0
      }
1399
      /* Among equally good histograms prefer last used. */
1400
      /* TODO(eustas): should we give a block-switch discount here? */
1401
0
      best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
1402
0
      best_bits = FN(BrotliHistogramBitCostDistance)(
1403
0
          tmp, &all_histograms[best_out], tmp + 1);
1404
0
      for (j = 0; j < num_final_clusters; ++j) {
1405
0
        const double cur_bits = FN(BrotliHistogramBitCostDistance)(
1406
0
            tmp, &all_histograms[clusters[j]], tmp + 1);
1407
0
        if (cur_bits < best_bits) {
1408
0
          best_bits = cur_bits;
1409
0
          best_out = clusters[j];
1410
0
        }
1411
0
      }
1412
0
      histogram_symbols[i] = best_out;
1413
0
      if (new_index[best_out] == kInvalidIndex) {
1414
0
        new_index[best_out] = next_index++;
1415
0
      }
1416
0
    }
1417
0
  }
1418
0
  BROTLI_FREE(m, tmp);
1419
0
  BROTLI_FREE(m, clusters);
1420
0
  BROTLI_FREE(m, all_histograms);
1421
0
  BROTLI_ENSURE_CAPACITY(
1422
0
      m, uint8_t, split->types, split->types_alloc_size, num_blocks);
1423
0
  BROTLI_ENSURE_CAPACITY(
1424
0
      m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
1425
0
  if (BROTLI_IS_OOM(m)) return;
1426
1427
  /* Rewrite final assignment to block-split. There might be less blocks
1428
   * than |num_blocks| due to clustering. */
1429
0
  {
1430
0
    uint32_t cur_length = 0;
1431
0
    size_t block_idx = 0;
1432
0
    uint8_t max_type = 0;
1433
0
    for (i = 0; i < num_blocks; ++i) {
1434
0
      cur_length += block_lengths[i];
1435
0
      if (i + 1 == num_blocks ||
1436
0
          histogram_symbols[i] != histogram_symbols[i + 1]) {
1437
0
        const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
1438
0
        split->types[block_idx] = id;
1439
0
        split->lengths[block_idx] = cur_length;
1440
0
        max_type = BROTLI_MAX(uint8_t, max_type, id);
1441
0
        cur_length = 0;
1442
0
        ++block_idx;
1443
0
      }
1444
0
    }
1445
0
    split->num_blocks = block_idx;
1446
0
    split->num_types = (size_t)max_type + 1;
1447
0
  }
1448
0
  BROTLI_FREE(m, new_index);
1449
0
  BROTLI_FREE(m, u32);
1450
0
  BROTLI_FREE(m, histogram_symbols);
1451
0
}
1452
1453
/* Create BlockSplit (partitioning) given the limits, estimates and "effort"
1454
 * parameters.
1455
 *
1456
 * NB: max_histograms is often less than number of histograms allowed by format;
1457
 *     this is done intentionally, to save some "space" for context-aware
1458
 *     clustering (here entropy is estimated for context-free symbols). */
1459
static void FN(SplitByteVector)(MemoryManager* m,
1460
                                const DataType* data, const size_t length,
1461
                                const size_t symbols_per_histogram,
1462
                                const size_t max_histograms,
1463
                                const size_t sampling_stride_length,
1464
                                const double block_switch_cost,
1465
                                const BrotliEncoderParams* params,
1466
0
                                BlockSplit* split) {
1467
0
  const size_t data_size = FN(HistogramDataSize)();
1468
0
  HistogramType* histograms;
1469
0
  HistogramType* tmp;
1470
  /* Calculate number of histograms; initial estimate is one histogram per
1471
   * specified amount of symbols; however, this value is capped. */
1472
0
  size_t num_histograms = length / symbols_per_histogram + 1;
1473
0
  if (num_histograms > max_histograms) {
1474
0
    num_histograms = max_histograms;
1475
0
  }
1476
1477
  /* Corner case: no input. */
1478
0
  if (length == 0) {
1479
0
    split->num_types = 1;
1480
0
    return;
1481
0
  }
1482
1483
0
  if (length < kMinLengthForBlockSplitting) {
1484
0
    BROTLI_ENSURE_CAPACITY(m, uint8_t,
1485
0
        split->types, split->types_alloc_size, split->num_blocks + 1);
1486
0
    BROTLI_ENSURE_CAPACITY(m, uint32_t,
1487
0
        split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
1488
0
    if (BROTLI_IS_OOM(m)) return;
1489
0
    split->num_types = 1;
1490
0
    split->types[split->num_blocks] = 0;
1491
0
    split->lengths[split->num_blocks] = (uint32_t)length;
1492
0
    split->num_blocks++;
1493
0
    return;
1494
0
  }
1495
0
  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1);
1496
0
  tmp = histograms + num_histograms;
1497
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
1498
  /* Find good entropy codes. */
1499
0
  FN(InitialEntropyCodes)(data, length,
1500
0
                          sampling_stride_length,
1501
0
                          num_histograms, histograms);
1502
0
  FN(RefineEntropyCodes)(data, length,
1503
0
                         sampling_stride_length,
1504
0
                         num_histograms, histograms, tmp);
1505
0
  {
1506
    /* Find a good path through literals with the good entropy codes. */
1507
0
    uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
1508
0
    size_t num_blocks = 0;
1509
0
    const size_t bitmaplen = (num_histograms + 7) >> 3;
1510
0
    double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
1511
0
    double* cost = BROTLI_ALLOC(m, double, num_histograms);
1512
0
    uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
1513
0
    uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
1514
0
    const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
1515
0
    size_t i;
1516
0
    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) ||
1517
0
        BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) ||
1518
0
        BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) {
1519
0
      return;
1520
0
    }
1521
0
    for (i = 0; i < iters; ++i) {
1522
0
      num_blocks = FN(FindBlocks)(data, length,
1523
0
                                  block_switch_cost,
1524
0
                                  num_histograms, histograms,
1525
0
                                  insert_cost, cost, switch_signal,
1526
0
                                  block_ids);
1527
0
      num_histograms = FN(RemapBlockIds)(block_ids, length,
1528
0
                                         new_id, num_histograms);
1529
0
      FN(BuildBlockHistograms)(data, length, block_ids,
1530
0
                               num_histograms, histograms);
1531
0
    }
1532
0
    BROTLI_FREE(m, insert_cost);
1533
0
    BROTLI_FREE(m, cost);
1534
0
    BROTLI_FREE(m, switch_signal);
1535
0
    BROTLI_FREE(m, new_id);
1536
0
    BROTLI_FREE(m, histograms);
1537
0
    FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
1538
0
    if (BROTLI_IS_OOM(m)) return;
1539
0
    BROTLI_FREE(m, block_ids);
1540
0
  }
1541
0
}
1542
1543
#undef HistogramType
1544
#undef DataType
1545
#undef FN
1546
1547
0
void duckdb_brotli::BrotliInitBlockSplit(BlockSplit* self) {
1548
0
  self->num_types = 0;
1549
0
  self->num_blocks = 0;
1550
0
  self->types = 0;
1551
0
  self->lengths = 0;
1552
0
  self->types_alloc_size = 0;
1553
0
  self->lengths_alloc_size = 0;
1554
0
}
1555
1556
0
void duckdb_brotli::BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
1557
0
  BROTLI_FREE(m, self->types);
1558
0
  BROTLI_FREE(m, self->lengths);
1559
0
}
1560
1561
/* Extracts literals, command distance and prefix codes, then applies
1562
 * SplitByteVector to create partitioning. */
1563
void duckdb_brotli::BrotliSplitBlock(MemoryManager* m,
1564
                      const Command* cmds,
1565
                      const size_t num_commands,
1566
                      const uint8_t* data,
1567
                      const size_t pos,
1568
                      const size_t mask,
1569
                      const BrotliEncoderParams* params,
1570
                      BlockSplit* literal_split,
1571
                      BlockSplit* insert_and_copy_split,
1572
0
                      BlockSplit* dist_split) {
1573
0
  {
1574
0
    size_t literals_count = CountLiterals(cmds, num_commands);
1575
0
    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
1576
0
    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return;
1577
    /* Create a continuous array of literals. */
1578
0
    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
1579
    /* Create the block split on the array of literals.
1580
     * Literal histograms can have alphabet size up to 256.
1581
     * Though, to accomodate context modeling, less than half of maximum size
1582
     * is allowed. */
1583
0
    SplitByteVectorLiteral(
1584
0
        m, literals, literals_count,
1585
0
        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
1586
0
        kLiteralStrideLength, kLiteralBlockSwitchCost, params,
1587
0
        literal_split);
1588
0
    if (BROTLI_IS_OOM(m)) return;
1589
0
    BROTLI_FREE(m, literals);
1590
    /* NB: this might be a good place for injecting extra splitting without
1591
     *     increasing encoder complexity; however, output parition would be less
1592
     *     optimal than one produced with forced splitting inside
1593
     *     SplitByteVector (FindBlocks / ClusterBlocks). */
1594
0
  }
1595
1596
0
  {
1597
    /* Compute prefix codes for commands. */
1598
0
    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
1599
0
    size_t i;
1600
0
    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return;
1601
0
    for (i = 0; i < num_commands; ++i) {
1602
0
      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
1603
0
    }
1604
    /* Create the block split on the array of command prefixes. */
1605
0
    SplitByteVectorCommand(
1606
0
        m, insert_and_copy_codes, num_commands,
1607
0
        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
1608
0
        kCommandStrideLength, kCommandBlockSwitchCost, params,
1609
0
        insert_and_copy_split);
1610
0
    if (BROTLI_IS_OOM(m)) return;
1611
    /* TODO(eustas): reuse for distances? */
1612
0
    BROTLI_FREE(m, insert_and_copy_codes);
1613
0
  }
1614
1615
0
  {
1616
    /* Create a continuous array of distance prefixes. */
1617
0
    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
1618
0
    size_t j = 0;
1619
0
    size_t i;
1620
0
    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return;
1621
0
    for (i = 0; i < num_commands; ++i) {
1622
0
      const Command* cmd = &cmds[i];
1623
0
      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
1624
0
        distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
1625
0
      }
1626
0
    }
1627
    /* Create the block split on the array of distance prefixes. */
1628
0
    SplitByteVectorDistance(
1629
0
        m, distance_prefixes, j,
1630
0
        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
1631
0
        kDistanceStrideLength, kDistanceBlockSwitchCost, params,
1632
0
        dist_split);
1633
0
    if (BROTLI_IS_OOM(m)) return;
1634
0
    BROTLI_FREE(m, distance_prefixes);
1635
0
  }
1636
0
}
1637
1638
#if defined(BROTLI_TEST)
1639
size_t CountLiteralsForTest(const Command*, const size_t);
1640
size_t CountLiteralsForTest(const Command* cmds, const size_t num_commands) {
1641
  return CountLiterals(cmds, num_commands);
1642
}
1643
1644
void CopyLiteralsToByteArrayForTest(const Command*,
1645
    const size_t, const uint8_t*, const size_t, const size_t, uint8_t*);
1646
void CopyLiteralsToByteArrayForTest(const Command* cmds,
1647
    const size_t num_commands, const uint8_t* data, const size_t offset,
1648
    const size_t mask, uint8_t* literals) {
1649
  CopyLiteralsToByteArray(cmds, num_commands, data, offset, mask, literals);
1650
}
1651
#endif
1652
1653