/src/duckdb/third_party/brotli/enc/block_splitter.cpp
Line | Count | Source |
1 | | /* Copyright 2013 Google Inc. All Rights Reserved. |
2 | | |
3 | | Distributed under MIT license. |
4 | | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 | | */ |
6 | | |
7 | | /* Block split point selection utilities. */ |
8 | | |
9 | | #include "block_splitter.h" |
10 | | |
11 | | #include <string.h> /* memcpy, memset */ |
12 | | |
13 | | #include "../common/brotli_platform.h" |
14 | | #include "bit_cost.h" |
15 | | #include "cluster.h" |
16 | | #include "command.h" |
17 | | #include "fast_log.h" |
18 | | #include "histogram.h" |
19 | | #include "memory.h" |
20 | | #include "quality.h" |
21 | | |
22 | | using namespace duckdb_brotli; |
23 | | |
24 | | static const size_t kMaxLiteralHistograms = 100; |
25 | | static const size_t kMaxCommandHistograms = 50; |
26 | | static const double kLiteralBlockSwitchCost = 28.1; |
27 | | static const double kCommandBlockSwitchCost = 13.5; |
28 | | static const double kDistanceBlockSwitchCost = 14.6; |
29 | | static const size_t kLiteralStrideLength = 70; |
30 | | static const size_t kCommandStrideLength = 40; |
31 | | static const size_t kDistanceStrideLength = 40; |
32 | | static const size_t kSymbolsPerLiteralHistogram = 544; |
33 | | static const size_t kSymbolsPerCommandHistogram = 530; |
34 | | static const size_t kSymbolsPerDistanceHistogram = 544; |
35 | | static const size_t kMinLengthForBlockSplitting = 128; |
36 | | static const size_t kIterMulForRefining = 2; |
37 | | static const size_t kMinItersForRefining = 100; |
38 | | |
39 | 0 | static size_t CountLiterals(const Command* cmds, const size_t num_commands) { |
40 | | /* Count how many we have. */ |
41 | 0 | size_t total_length = 0; |
42 | 0 | size_t i; |
43 | 0 | for (i = 0; i < num_commands; ++i) { |
44 | 0 | total_length += cmds[i].insert_len_; |
45 | 0 | } |
46 | 0 | return total_length; |
47 | 0 | } |
48 | | |
49 | | static void CopyLiteralsToByteArray(const Command* cmds, |
50 | | const size_t num_commands, |
51 | | const uint8_t* data, |
52 | | const size_t offset, |
53 | | const size_t mask, |
54 | 0 | uint8_t* literals) { |
55 | 0 | size_t pos = 0; |
56 | 0 | size_t from_pos = offset & mask; |
57 | 0 | size_t i; |
58 | 0 | for (i = 0; i < num_commands; ++i) { |
59 | 0 | size_t insert_len = cmds[i].insert_len_; |
60 | 0 | if (from_pos + insert_len > mask) { |
61 | 0 | size_t head_size = mask + 1 - from_pos; |
62 | 0 | memcpy(literals + pos, data + from_pos, head_size); |
63 | 0 | from_pos = 0; |
64 | 0 | pos += head_size; |
65 | 0 | insert_len -= head_size; |
66 | 0 | } |
67 | 0 | if (insert_len > 0) { |
68 | 0 | memcpy(literals + pos, data + from_pos, insert_len); |
69 | 0 | pos += insert_len; |
70 | 0 | } |
71 | 0 | from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask; |
72 | 0 | } |
73 | 0 | } |
74 | | |
75 | 0 | static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) { |
76 | | /* Initial seed should be 7. In this case, loop length is (1 << 29). */ |
77 | 0 | *seed *= 16807U; |
78 | 0 | return *seed; |
79 | 0 | } |
80 | | |
81 | 0 | static BROTLI_INLINE double BitCost(size_t count) { |
82 | 0 | return count == 0 ? -2.0 : FastLog2(count); |
83 | 0 | } |
84 | | |
85 | 0 | #define HISTOGRAMS_PER_BATCH 64 |
86 | 0 | #define CLUSTERS_PER_BATCH 16 |
87 | | |
88 | 0 | #define FN(X) X ## Literal |
89 | | #define DataType uint8_t |
90 | | /* NOLINTNEXTLINE(build/include) */ |
91 | | /* NOLINT(build/header_guard) */ |
92 | | /* Copyright 2013 Google Inc. All Rights Reserved. |
93 | | |
94 | | Distributed under MIT license. |
95 | | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
96 | | */ |
97 | | |
98 | | /* template parameters: FN, DataType */ |
99 | | |
100 | 0 | #define HistogramType FN(Histogram) |
101 | | |
102 | | static void FN(InitialEntropyCodes)(const DataType* data, size_t length, |
103 | | size_t stride, |
104 | | size_t num_histograms, |
105 | 0 | HistogramType* histograms) { |
106 | 0 | uint32_t seed = 7; |
107 | 0 | size_t block_length = length / num_histograms; |
108 | 0 | size_t i; |
109 | 0 | FN(ClearHistograms)(histograms, num_histograms); |
110 | 0 | for (i = 0; i < num_histograms; ++i) { |
111 | 0 | size_t pos = length * i / num_histograms; |
112 | 0 | if (i != 0) { |
113 | 0 | pos += MyRand(&seed) % block_length; |
114 | 0 | } |
115 | 0 | if (pos + stride >= length) { |
116 | 0 | pos = length - stride - 1; |
117 | 0 | } |
118 | 0 | FN(HistogramAddVector)(&histograms[i], data + pos, stride); |
119 | 0 | } |
120 | 0 | } |
121 | | |
122 | | static void FN(RandomSample)(uint32_t* seed, |
123 | | const DataType* data, |
124 | | size_t length, |
125 | | size_t stride, |
126 | 0 | HistogramType* sample) { |
127 | 0 | size_t pos = 0; |
128 | 0 | if (stride >= length) { |
129 | 0 | stride = length; |
130 | 0 | } else { |
131 | 0 | pos = MyRand(seed) % (length - stride + 1); |
132 | 0 | } |
133 | 0 | FN(HistogramAddVector)(sample, data + pos, stride); |
134 | 0 | } |
135 | | |
136 | | static void FN(RefineEntropyCodes)(const DataType* data, size_t length, |
137 | | size_t stride, |
138 | | size_t num_histograms, |
139 | | HistogramType* histograms, |
140 | 0 | HistogramType* tmp) { |
141 | 0 | size_t iters = |
142 | 0 | kIterMulForRefining * length / stride + kMinItersForRefining; |
143 | 0 | uint32_t seed = 7; |
144 | 0 | size_t iter; |
145 | 0 | iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms; |
146 | 0 | for (iter = 0; iter < iters; ++iter) { |
147 | 0 | FN(HistogramClear)(tmp); |
148 | 0 | FN(RandomSample)(&seed, data, length, stride, tmp); |
149 | 0 | FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp); |
150 | 0 | } |
151 | 0 | } |
152 | | |
153 | | /* Assigns a block id from the range [0, num_histograms) to each data element |
154 | | in data[0..length) and fills in block_id[0..length) with the assigned values. |
155 | | Returns the number of blocks, i.e. one plus the number of block switches. */ |
156 | | static size_t FN(FindBlocks)(const DataType* data, const size_t length, |
157 | | const double block_switch_bitcost, |
158 | | const size_t num_histograms, |
159 | | const HistogramType* histograms, |
160 | | double* insert_cost, |
161 | | double* cost, |
162 | | uint8_t* switch_signal, |
163 | 0 | uint8_t* block_id) { |
164 | 0 | const size_t alphabet_size = FN(HistogramDataSize)(); |
165 | 0 | const size_t bitmap_len = (num_histograms + 7) >> 3; |
166 | 0 | size_t num_blocks = 1; |
167 | 0 | size_t byte_ix; |
168 | 0 | size_t i; |
169 | 0 | size_t j; |
170 | 0 | BROTLI_DCHECK(num_histograms <= 256); |
171 | | |
172 | | /* Trivial case: single historgram -> single block type. */ |
173 | 0 | if (num_histograms <= 1) { |
174 | 0 | for (i = 0; i < length; ++i) { |
175 | 0 | block_id[i] = 0; |
176 | 0 | } |
177 | 0 | return 1; |
178 | 0 | } |
179 | | |
180 | | /* Fill bitcost for each symbol of all histograms. |
181 | | * Non-existing symbol cost: 2 + log2(total_count). |
182 | | * Regular symbol cost: -log2(symbol_count / total_count). */ |
183 | 0 | memset(insert_cost, 0, |
184 | 0 | sizeof(insert_cost[0]) * alphabet_size * num_histograms); |
185 | 0 | for (i = 0; i < num_histograms; ++i) { |
186 | 0 | insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_); |
187 | 0 | } |
188 | 0 | for (i = alphabet_size; i != 0;) { |
189 | | /* Reverse order to use the 0-th row as a temporary storage. */ |
190 | 0 | --i; |
191 | 0 | for (j = 0; j < num_histograms; ++j) { |
192 | 0 | insert_cost[i * num_histograms + j] = |
193 | 0 | insert_cost[j] - BitCost(histograms[j].data_[i]); |
194 | 0 | } |
195 | 0 | } |
196 | | |
197 | | /* After each iteration of this loop, cost[k] will contain the difference |
198 | | between the minimum cost of arriving at the current byte position using |
199 | | entropy code k, and the minimum cost of arriving at the current byte |
200 | | position. This difference is capped at the block switch cost, and if it |
201 | | reaches block switch cost, it means that when we trace back from the last |
202 | | position, we need to switch here. */ |
203 | 0 | memset(cost, 0, sizeof(cost[0]) * num_histograms); |
204 | 0 | memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmap_len); |
205 | 0 | for (byte_ix = 0; byte_ix < length; ++byte_ix) { |
206 | 0 | size_t ix = byte_ix * bitmap_len; |
207 | 0 | size_t symbol = data[byte_ix]; |
208 | 0 | size_t insert_cost_ix = symbol * num_histograms; |
209 | 0 | double min_cost = 1e99; |
210 | 0 | double block_switch_cost = block_switch_bitcost; |
211 | 0 | size_t k; |
212 | 0 | for (k = 0; k < num_histograms; ++k) { |
213 | | /* We are coding the symbol with entropy code k. */ |
214 | 0 | cost[k] += insert_cost[insert_cost_ix + k]; |
215 | 0 | if (cost[k] < min_cost) { |
216 | 0 | min_cost = cost[k]; |
217 | 0 | block_id[byte_ix] = (uint8_t)k; |
218 | 0 | } |
219 | 0 | } |
220 | | /* More blocks for the beginning. */ |
221 | 0 | if (byte_ix < 2000) { |
222 | 0 | block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000; |
223 | 0 | } |
224 | 0 | for (k = 0; k < num_histograms; ++k) { |
225 | 0 | cost[k] -= min_cost; |
226 | 0 | if (cost[k] >= block_switch_cost) { |
227 | 0 | const uint8_t mask = (uint8_t)(1u << (k & 7)); |
228 | 0 | cost[k] = block_switch_cost; |
229 | 0 | BROTLI_DCHECK((k >> 3) < bitmap_len); |
230 | 0 | switch_signal[ix + (k >> 3)] |= mask; |
231 | 0 | } |
232 | 0 | } |
233 | 0 | } |
234 | |
|
235 | 0 | byte_ix = length - 1; |
236 | 0 | { /* Trace back from the last position and switch at the marked places. */ |
237 | 0 | size_t ix = byte_ix * bitmap_len; |
238 | 0 | uint8_t cur_id = block_id[byte_ix]; |
239 | 0 | while (byte_ix > 0) { |
240 | 0 | const uint8_t mask = (uint8_t)(1u << (cur_id & 7)); |
241 | 0 | BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmap_len); |
242 | 0 | --byte_ix; |
243 | 0 | ix -= bitmap_len; |
244 | 0 | if (switch_signal[ix + (cur_id >> 3)] & mask) { |
245 | 0 | if (cur_id != block_id[byte_ix]) { |
246 | 0 | cur_id = block_id[byte_ix]; |
247 | 0 | ++num_blocks; |
248 | 0 | } |
249 | 0 | } |
250 | 0 | block_id[byte_ix] = cur_id; |
251 | 0 | } |
252 | 0 | } |
253 | 0 | return num_blocks; |
254 | 0 | } |
255 | | |
256 | | static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length, |
257 | 0 | uint16_t* new_id, const size_t num_histograms) { |
258 | 0 | static const uint16_t kInvalidId = 256; |
259 | 0 | uint16_t next_id = 0; |
260 | 0 | size_t i; |
261 | 0 | for (i = 0; i < num_histograms; ++i) { |
262 | 0 | new_id[i] = kInvalidId; |
263 | 0 | } |
264 | 0 | for (i = 0; i < length; ++i) { |
265 | 0 | BROTLI_DCHECK(block_ids[i] < num_histograms); |
266 | 0 | if (new_id[block_ids[i]] == kInvalidId) { |
267 | 0 | new_id[block_ids[i]] = next_id++; |
268 | 0 | } |
269 | 0 | } |
270 | 0 | for (i = 0; i < length; ++i) { |
271 | 0 | block_ids[i] = (uint8_t)new_id[block_ids[i]]; |
272 | 0 | BROTLI_DCHECK(block_ids[i] < num_histograms); |
273 | 0 | } |
274 | 0 | BROTLI_DCHECK(next_id <= num_histograms); |
275 | 0 | return next_id; |
276 | 0 | } |
277 | | |
278 | | static void FN(BuildBlockHistograms)(const DataType* data, const size_t length, |
279 | | const uint8_t* block_ids, |
280 | | const size_t num_histograms, |
281 | 0 | HistogramType* histograms) { |
282 | 0 | size_t i; |
283 | 0 | FN(ClearHistograms)(histograms, num_histograms); |
284 | 0 | for (i = 0; i < length; ++i) { |
285 | 0 | FN(HistogramAdd)(&histograms[block_ids[i]], data[i]); |
286 | 0 | } |
287 | 0 | } |
288 | | |
289 | | /* Given the initial partitioning build partitioning with limited number |
290 | | * of histograms (and block types). */ |
291 | | static void FN(ClusterBlocks)(MemoryManager* m, |
292 | | const DataType* data, const size_t length, |
293 | | const size_t num_blocks, |
294 | | uint8_t* block_ids, |
295 | 0 | BlockSplit* split) { |
296 | 0 | uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks); |
297 | 0 | uint32_t* u32 = |
298 | 0 | BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH); |
299 | 0 | const size_t expected_num_clusters = CLUSTERS_PER_BATCH * |
300 | 0 | (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH; |
301 | 0 | size_t all_histograms_size = 0; |
302 | 0 | size_t all_histograms_capacity = expected_num_clusters; |
303 | 0 | HistogramType* all_histograms = |
304 | 0 | BROTLI_ALLOC(m, HistogramType, all_histograms_capacity); |
305 | 0 | size_t cluster_size_size = 0; |
306 | 0 | size_t cluster_size_capacity = expected_num_clusters; |
307 | 0 | uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity); |
308 | 0 | size_t num_clusters = 0; |
309 | 0 | HistogramType* histograms = BROTLI_ALLOC(m, HistogramType, |
310 | 0 | BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH)); |
311 | 0 | size_t max_num_pairs = |
312 | 0 | HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2; |
313 | 0 | size_t pairs_capacity = max_num_pairs + 1; |
314 | 0 | HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity); |
315 | 0 | size_t pos = 0; |
316 | 0 | uint32_t* clusters; |
317 | 0 | size_t num_final_clusters; |
318 | 0 | static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX; |
319 | 0 | uint32_t* new_index; |
320 | 0 | size_t i; |
321 | 0 | uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH; |
322 | 0 | uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH; |
323 | 0 | uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH; |
324 | 0 | uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH; |
325 | 0 | uint32_t* BROTLI_RESTRICT const block_lengths = |
326 | 0 | u32 + 4 * HISTOGRAMS_PER_BATCH; |
327 | | /* TODO(eustas): move to arena? */ |
328 | 0 | HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2); |
329 | |
|
330 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) || |
331 | 0 | BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) || |
332 | 0 | BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) || |
333 | 0 | BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) { |
334 | 0 | return; |
335 | 0 | } |
336 | | |
337 | 0 | memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t)); |
338 | | |
339 | | /* Calculate block lengths (convert repeating values -> series length). */ |
340 | 0 | { |
341 | 0 | size_t block_idx = 0; |
342 | 0 | for (i = 0; i < length; ++i) { |
343 | 0 | BROTLI_DCHECK(block_idx < num_blocks); |
344 | 0 | ++block_lengths[block_idx]; |
345 | 0 | if (i + 1 == length || block_ids[i] != block_ids[i + 1]) { |
346 | 0 | ++block_idx; |
347 | 0 | } |
348 | 0 | } |
349 | 0 | BROTLI_DCHECK(block_idx == num_blocks); |
350 | 0 | } |
351 | | |
352 | | /* Pre-cluster blocks (cluster batches). */ |
353 | 0 | for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) { |
354 | 0 | const size_t num_to_combine = |
355 | 0 | BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH); |
356 | 0 | size_t num_new_clusters; |
357 | 0 | size_t j; |
358 | 0 | for (j = 0; j < num_to_combine; ++j) { |
359 | 0 | size_t k; |
360 | 0 | size_t block_length = block_lengths[i + j]; |
361 | 0 | FN(HistogramClear)(&histograms[j]); |
362 | 0 | for (k = 0; k < block_length; ++k) { |
363 | 0 | FN(HistogramAdd)(&histograms[j], data[pos++]); |
364 | 0 | } |
365 | 0 | histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]); |
366 | 0 | new_clusters[j] = (uint32_t)j; |
367 | 0 | symbols[j] = (uint32_t)j; |
368 | 0 | sizes[j] = 1; |
369 | 0 | } |
370 | 0 | num_new_clusters = FN(BrotliHistogramCombine)( |
371 | 0 | histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine, |
372 | 0 | num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs); |
373 | 0 | BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms, |
374 | 0 | all_histograms_capacity, all_histograms_size + num_new_clusters); |
375 | 0 | BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size, |
376 | 0 | cluster_size_capacity, cluster_size_size + num_new_clusters); |
377 | 0 | if (BROTLI_IS_OOM(m)) return; |
378 | 0 | for (j = 0; j < num_new_clusters; ++j) { |
379 | 0 | all_histograms[all_histograms_size++] = histograms[new_clusters[j]]; |
380 | 0 | cluster_size[cluster_size_size++] = sizes[new_clusters[j]]; |
381 | 0 | remap[new_clusters[j]] = (uint32_t)j; |
382 | 0 | } |
383 | 0 | for (j = 0; j < num_to_combine; ++j) { |
384 | 0 | histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]]; |
385 | 0 | } |
386 | 0 | num_clusters += num_new_clusters; |
387 | 0 | BROTLI_DCHECK(num_clusters == cluster_size_size); |
388 | 0 | BROTLI_DCHECK(num_clusters == all_histograms_size); |
389 | 0 | } |
390 | 0 | BROTLI_FREE(m, histograms); |
391 | | |
392 | | /* Final clustering. */ |
393 | 0 | max_num_pairs = |
394 | 0 | BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters); |
395 | 0 | if (pairs_capacity < max_num_pairs + 1) { |
396 | 0 | BROTLI_FREE(m, pairs); |
397 | 0 | pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1); |
398 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return; |
399 | 0 | } |
400 | 0 | clusters = BROTLI_ALLOC(m, uint32_t, num_clusters); |
401 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return; |
402 | 0 | for (i = 0; i < num_clusters; ++i) { |
403 | 0 | clusters[i] = (uint32_t)i; |
404 | 0 | } |
405 | 0 | num_final_clusters = FN(BrotliHistogramCombine)( |
406 | 0 | all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs, |
407 | 0 | num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES, |
408 | 0 | max_num_pairs); |
409 | 0 | BROTLI_FREE(m, pairs); |
410 | 0 | BROTLI_FREE(m, cluster_size); |
411 | | |
412 | | /* Assign blocks to final histograms. */ |
413 | 0 | new_index = BROTLI_ALLOC(m, uint32_t, num_clusters); |
414 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return; |
415 | 0 | for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex; |
416 | 0 | pos = 0; |
417 | 0 | { |
418 | 0 | uint32_t next_index = 0; |
419 | 0 | for (i = 0; i < num_blocks; ++i) { |
420 | 0 | size_t j; |
421 | 0 | uint32_t best_out; |
422 | 0 | double best_bits; |
423 | 0 | FN(HistogramClear)(tmp); |
424 | 0 | for (j = 0; j < block_lengths[i]; ++j) { |
425 | 0 | FN(HistogramAdd)(tmp, data[pos++]); |
426 | 0 | } |
427 | | /* Among equally good histograms prefer last used. */ |
428 | | /* TODO(eustas): should we give a block-switch discount here? */ |
429 | 0 | best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1]; |
430 | 0 | best_bits = FN(BrotliHistogramBitCostDistance)( |
431 | 0 | tmp, &all_histograms[best_out], tmp + 1); |
432 | 0 | for (j = 0; j < num_final_clusters; ++j) { |
433 | 0 | const double cur_bits = FN(BrotliHistogramBitCostDistance)( |
434 | 0 | tmp, &all_histograms[clusters[j]], tmp + 1); |
435 | 0 | if (cur_bits < best_bits) { |
436 | 0 | best_bits = cur_bits; |
437 | 0 | best_out = clusters[j]; |
438 | 0 | } |
439 | 0 | } |
440 | 0 | histogram_symbols[i] = best_out; |
441 | 0 | if (new_index[best_out] == kInvalidIndex) { |
442 | 0 | new_index[best_out] = next_index++; |
443 | 0 | } |
444 | 0 | } |
445 | 0 | } |
446 | 0 | BROTLI_FREE(m, tmp); |
447 | 0 | BROTLI_FREE(m, clusters); |
448 | 0 | BROTLI_FREE(m, all_histograms); |
449 | 0 | BROTLI_ENSURE_CAPACITY( |
450 | 0 | m, uint8_t, split->types, split->types_alloc_size, num_blocks); |
451 | 0 | BROTLI_ENSURE_CAPACITY( |
452 | 0 | m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks); |
453 | 0 | if (BROTLI_IS_OOM(m)) return; |
454 | | |
455 | | /* Rewrite final assignment to block-split. There might be less blocks |
456 | | * than |num_blocks| due to clustering. */ |
457 | 0 | { |
458 | 0 | uint32_t cur_length = 0; |
459 | 0 | size_t block_idx = 0; |
460 | 0 | uint8_t max_type = 0; |
461 | 0 | for (i = 0; i < num_blocks; ++i) { |
462 | 0 | cur_length += block_lengths[i]; |
463 | 0 | if (i + 1 == num_blocks || |
464 | 0 | histogram_symbols[i] != histogram_symbols[i + 1]) { |
465 | 0 | const uint8_t id = (uint8_t)new_index[histogram_symbols[i]]; |
466 | 0 | split->types[block_idx] = id; |
467 | 0 | split->lengths[block_idx] = cur_length; |
468 | 0 | max_type = BROTLI_MAX(uint8_t, max_type, id); |
469 | 0 | cur_length = 0; |
470 | 0 | ++block_idx; |
471 | 0 | } |
472 | 0 | } |
473 | 0 | split->num_blocks = block_idx; |
474 | 0 | split->num_types = (size_t)max_type + 1; |
475 | 0 | } |
476 | 0 | BROTLI_FREE(m, new_index); |
477 | 0 | BROTLI_FREE(m, u32); |
478 | 0 | BROTLI_FREE(m, histogram_symbols); |
479 | 0 | } |
480 | | |
481 | | /* Create BlockSplit (partitioning) given the limits, estimates and "effort" |
482 | | * parameters. |
483 | | * |
484 | | * NB: max_histograms is often less than number of histograms allowed by format; |
485 | | * this is done intentionally, to save some "space" for context-aware |
486 | | * clustering (here entropy is estimated for context-free symbols). */ |
487 | | static void FN(SplitByteVector)(MemoryManager* m, |
488 | | const DataType* data, const size_t length, |
489 | | const size_t symbols_per_histogram, |
490 | | const size_t max_histograms, |
491 | | const size_t sampling_stride_length, |
492 | | const double block_switch_cost, |
493 | | const BrotliEncoderParams* params, |
494 | 0 | BlockSplit* split) { |
495 | 0 | const size_t data_size = FN(HistogramDataSize)(); |
496 | 0 | HistogramType* histograms; |
497 | 0 | HistogramType* tmp; |
498 | | /* Calculate number of histograms; initial estimate is one histogram per |
499 | | * specified amount of symbols; however, this value is capped. */ |
500 | 0 | size_t num_histograms = length / symbols_per_histogram + 1; |
501 | 0 | if (num_histograms > max_histograms) { |
502 | 0 | num_histograms = max_histograms; |
503 | 0 | } |
504 | | |
505 | | /* Corner case: no input. */ |
506 | 0 | if (length == 0) { |
507 | 0 | split->num_types = 1; |
508 | 0 | return; |
509 | 0 | } |
510 | | |
511 | 0 | if (length < kMinLengthForBlockSplitting) { |
512 | 0 | BROTLI_ENSURE_CAPACITY(m, uint8_t, |
513 | 0 | split->types, split->types_alloc_size, split->num_blocks + 1); |
514 | 0 | BROTLI_ENSURE_CAPACITY(m, uint32_t, |
515 | 0 | split->lengths, split->lengths_alloc_size, split->num_blocks + 1); |
516 | 0 | if (BROTLI_IS_OOM(m)) return; |
517 | 0 | split->num_types = 1; |
518 | 0 | split->types[split->num_blocks] = 0; |
519 | 0 | split->lengths[split->num_blocks] = (uint32_t)length; |
520 | 0 | split->num_blocks++; |
521 | 0 | return; |
522 | 0 | } |
523 | 0 | histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1); |
524 | 0 | tmp = histograms + num_histograms; |
525 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return; |
526 | | /* Find good entropy codes. */ |
527 | 0 | FN(InitialEntropyCodes)(data, length, |
528 | 0 | sampling_stride_length, |
529 | 0 | num_histograms, histograms); |
530 | 0 | FN(RefineEntropyCodes)(data, length, |
531 | 0 | sampling_stride_length, |
532 | 0 | num_histograms, histograms, tmp); |
533 | 0 | { |
534 | | /* Find a good path through literals with the good entropy codes. */ |
535 | 0 | uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length); |
536 | 0 | size_t num_blocks = 0; |
537 | 0 | const size_t bitmaplen = (num_histograms + 7) >> 3; |
538 | 0 | double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms); |
539 | 0 | double* cost = BROTLI_ALLOC(m, double, num_histograms); |
540 | 0 | uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen); |
541 | 0 | uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms); |
542 | 0 | const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10; |
543 | 0 | size_t i; |
544 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) || |
545 | 0 | BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) || |
546 | 0 | BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) { |
547 | 0 | return; |
548 | 0 | } |
549 | 0 | for (i = 0; i < iters; ++i) { |
550 | 0 | num_blocks = FN(FindBlocks)(data, length, |
551 | 0 | block_switch_cost, |
552 | 0 | num_histograms, histograms, |
553 | 0 | insert_cost, cost, switch_signal, |
554 | 0 | block_ids); |
555 | 0 | num_histograms = FN(RemapBlockIds)(block_ids, length, |
556 | 0 | new_id, num_histograms); |
557 | 0 | FN(BuildBlockHistograms)(data, length, block_ids, |
558 | 0 | num_histograms, histograms); |
559 | 0 | } |
560 | 0 | BROTLI_FREE(m, insert_cost); |
561 | 0 | BROTLI_FREE(m, cost); |
562 | 0 | BROTLI_FREE(m, switch_signal); |
563 | 0 | BROTLI_FREE(m, new_id); |
564 | 0 | BROTLI_FREE(m, histograms); |
565 | 0 | FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split); |
566 | 0 | if (BROTLI_IS_OOM(m)) return; |
567 | 0 | BROTLI_FREE(m, block_ids); |
568 | 0 | } |
569 | 0 | } |
570 | | |
571 | | #undef HistogramType |
572 | | #undef DataType |
573 | | #undef FN |
574 | | |
575 | 0 | #define FN(X) X ## Command |
576 | | #define DataType uint16_t |
577 | | /* NOLINTNEXTLINE(build/include) */ |
578 | | /* NOLINT(build/header_guard) */ |
579 | | /* Copyright 2013 Google Inc. All Rights Reserved. |
580 | | |
581 | | Distributed under MIT license. |
582 | | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
583 | | */ |
584 | | |
585 | | /* template parameters: FN, DataType */ |
586 | | |
587 | 0 | #define HistogramType FN(Histogram) |
588 | | |
589 | | static void FN(InitialEntropyCodes)(const DataType* data, size_t length, |
590 | | size_t stride, |
591 | | size_t num_histograms, |
592 | 0 | HistogramType* histograms) { |
593 | 0 | uint32_t seed = 7; |
594 | 0 | size_t block_length = length / num_histograms; |
595 | 0 | size_t i; |
596 | 0 | FN(ClearHistograms)(histograms, num_histograms); |
597 | 0 | for (i = 0; i < num_histograms; ++i) { |
598 | 0 | size_t pos = length * i / num_histograms; |
599 | 0 | if (i != 0) { |
600 | 0 | pos += MyRand(&seed) % block_length; |
601 | 0 | } |
602 | 0 | if (pos + stride >= length) { |
603 | 0 | pos = length - stride - 1; |
604 | 0 | } |
605 | 0 | FN(HistogramAddVector)(&histograms[i], data + pos, stride); |
606 | 0 | } |
607 | 0 | } |
608 | | |
609 | | static void FN(RandomSample)(uint32_t* seed, |
610 | | const DataType* data, |
611 | | size_t length, |
612 | | size_t stride, |
613 | 0 | HistogramType* sample) { |
614 | 0 | size_t pos = 0; |
615 | 0 | if (stride >= length) { |
616 | 0 | stride = length; |
617 | 0 | } else { |
618 | 0 | pos = MyRand(seed) % (length - stride + 1); |
619 | 0 | } |
620 | 0 | FN(HistogramAddVector)(sample, data + pos, stride); |
621 | 0 | } |
622 | | |
623 | | static void FN(RefineEntropyCodes)(const DataType* data, size_t length, |
624 | | size_t stride, |
625 | | size_t num_histograms, |
626 | | HistogramType* histograms, |
627 | 0 | HistogramType* tmp) { |
628 | 0 | size_t iters = |
629 | 0 | kIterMulForRefining * length / stride + kMinItersForRefining; |
630 | 0 | uint32_t seed = 7; |
631 | 0 | size_t iter; |
632 | 0 | iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms; |
633 | 0 | for (iter = 0; iter < iters; ++iter) { |
634 | 0 | FN(HistogramClear)(tmp); |
635 | 0 | FN(RandomSample)(&seed, data, length, stride, tmp); |
636 | 0 | FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp); |
637 | 0 | } |
638 | 0 | } |
639 | | |
640 | | /* Assigns a block id from the range [0, num_histograms) to each data element |
641 | | in data[0..length) and fills in block_id[0..length) with the assigned values. |
642 | | Returns the number of blocks, i.e. one plus the number of block switches. */ |
643 | | static size_t FN(FindBlocks)(const DataType* data, const size_t length, |
644 | | const double block_switch_bitcost, |
645 | | const size_t num_histograms, |
646 | | const HistogramType* histograms, |
647 | | double* insert_cost, |
648 | | double* cost, |
649 | | uint8_t* switch_signal, |
650 | 0 | uint8_t* block_id) { |
651 | 0 | const size_t alphabet_size = FN(HistogramDataSize)(); |
652 | 0 | const size_t bitmap_len = (num_histograms + 7) >> 3; |
653 | 0 | size_t num_blocks = 1; |
654 | 0 | size_t byte_ix; |
655 | 0 | size_t i; |
656 | 0 | size_t j; |
657 | 0 | BROTLI_DCHECK(num_histograms <= 256); |
658 | | |
659 | | /* Trivial case: single historgram -> single block type. */ |
660 | 0 | if (num_histograms <= 1) { |
661 | 0 | for (i = 0; i < length; ++i) { |
662 | 0 | block_id[i] = 0; |
663 | 0 | } |
664 | 0 | return 1; |
665 | 0 | } |
666 | | |
667 | | /* Fill bitcost for each symbol of all histograms. |
668 | | * Non-existing symbol cost: 2 + log2(total_count). |
669 | | * Regular symbol cost: -log2(symbol_count / total_count). */ |
670 | 0 | memset(insert_cost, 0, |
671 | 0 | sizeof(insert_cost[0]) * alphabet_size * num_histograms); |
672 | 0 | for (i = 0; i < num_histograms; ++i) { |
673 | 0 | insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_); |
674 | 0 | } |
675 | 0 | for (i = alphabet_size; i != 0;) { |
676 | | /* Reverse order to use the 0-th row as a temporary storage. */ |
677 | 0 | --i; |
678 | 0 | for (j = 0; j < num_histograms; ++j) { |
679 | 0 | insert_cost[i * num_histograms + j] = |
680 | 0 | insert_cost[j] - BitCost(histograms[j].data_[i]); |
681 | 0 | } |
682 | 0 | } |
683 | | |
684 | | /* After each iteration of this loop, cost[k] will contain the difference |
685 | | between the minimum cost of arriving at the current byte position using |
686 | | entropy code k, and the minimum cost of arriving at the current byte |
687 | | position. This difference is capped at the block switch cost, and if it |
688 | | reaches block switch cost, it means that when we trace back from the last |
689 | | position, we need to switch here. */ |
690 | 0 | memset(cost, 0, sizeof(cost[0]) * num_histograms); |
691 | 0 | memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmap_len); |
692 | 0 | for (byte_ix = 0; byte_ix < length; ++byte_ix) { |
693 | 0 | size_t ix = byte_ix * bitmap_len; |
694 | 0 | size_t symbol = data[byte_ix]; |
695 | 0 | size_t insert_cost_ix = symbol * num_histograms; |
696 | 0 | double min_cost = 1e99; |
697 | 0 | double block_switch_cost = block_switch_bitcost; |
698 | 0 | size_t k; |
699 | 0 | for (k = 0; k < num_histograms; ++k) { |
700 | | /* We are coding the symbol with entropy code k. */ |
701 | 0 | cost[k] += insert_cost[insert_cost_ix + k]; |
702 | 0 | if (cost[k] < min_cost) { |
703 | 0 | min_cost = cost[k]; |
704 | 0 | block_id[byte_ix] = (uint8_t)k; |
705 | 0 | } |
706 | 0 | } |
707 | | /* More blocks for the beginning. */ |
708 | 0 | if (byte_ix < 2000) { |
709 | 0 | block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000; |
710 | 0 | } |
711 | 0 | for (k = 0; k < num_histograms; ++k) { |
712 | 0 | cost[k] -= min_cost; |
713 | 0 | if (cost[k] >= block_switch_cost) { |
714 | 0 | const uint8_t mask = (uint8_t)(1u << (k & 7)); |
715 | 0 | cost[k] = block_switch_cost; |
716 | 0 | BROTLI_DCHECK((k >> 3) < bitmap_len); |
717 | 0 | switch_signal[ix + (k >> 3)] |= mask; |
718 | 0 | } |
719 | 0 | } |
720 | 0 | } |
721 | |
|
722 | 0 | byte_ix = length - 1; |
723 | 0 | { /* Trace back from the last position and switch at the marked places. */ |
724 | 0 | size_t ix = byte_ix * bitmap_len; |
725 | 0 | uint8_t cur_id = block_id[byte_ix]; |
726 | 0 | while (byte_ix > 0) { |
727 | 0 | const uint8_t mask = (uint8_t)(1u << (cur_id & 7)); |
728 | 0 | BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmap_len); |
729 | 0 | --byte_ix; |
730 | 0 | ix -= bitmap_len; |
731 | 0 | if (switch_signal[ix + (cur_id >> 3)] & mask) { |
732 | 0 | if (cur_id != block_id[byte_ix]) { |
733 | 0 | cur_id = block_id[byte_ix]; |
734 | 0 | ++num_blocks; |
735 | 0 | } |
736 | 0 | } |
737 | 0 | block_id[byte_ix] = cur_id; |
738 | 0 | } |
739 | 0 | } |
740 | 0 | return num_blocks; |
741 | 0 | } |
742 | | |
743 | | static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length, |
744 | 0 | uint16_t* new_id, const size_t num_histograms) { |
745 | 0 | static const uint16_t kInvalidId = 256; |
746 | 0 | uint16_t next_id = 0; |
747 | 0 | size_t i; |
748 | 0 | for (i = 0; i < num_histograms; ++i) { |
749 | 0 | new_id[i] = kInvalidId; |
750 | 0 | } |
751 | 0 | for (i = 0; i < length; ++i) { |
752 | 0 | BROTLI_DCHECK(block_ids[i] < num_histograms); |
753 | 0 | if (new_id[block_ids[i]] == kInvalidId) { |
754 | 0 | new_id[block_ids[i]] = next_id++; |
755 | 0 | } |
756 | 0 | } |
757 | 0 | for (i = 0; i < length; ++i) { |
758 | 0 | block_ids[i] = (uint8_t)new_id[block_ids[i]]; |
759 | 0 | BROTLI_DCHECK(block_ids[i] < num_histograms); |
760 | 0 | } |
761 | 0 | BROTLI_DCHECK(next_id <= num_histograms); |
762 | 0 | return next_id; |
763 | 0 | } |
764 | | |
765 | | static void FN(BuildBlockHistograms)(const DataType* data, const size_t length, |
766 | | const uint8_t* block_ids, |
767 | | const size_t num_histograms, |
768 | 0 | HistogramType* histograms) { |
769 | 0 | size_t i; |
770 | 0 | FN(ClearHistograms)(histograms, num_histograms); |
771 | 0 | for (i = 0; i < length; ++i) { |
772 | 0 | FN(HistogramAdd)(&histograms[block_ids[i]], data[i]); |
773 | 0 | } |
774 | 0 | } |
775 | | |
776 | | /* Given the initial partitioning build partitioning with limited number |
777 | | * of histograms (and block types). */ |
778 | | static void FN(ClusterBlocks)(MemoryManager* m, |
779 | | const DataType* data, const size_t length, |
780 | | const size_t num_blocks, |
781 | | uint8_t* block_ids, |
782 | 0 | BlockSplit* split) { |
783 | 0 | uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks); |
784 | 0 | uint32_t* u32 = |
785 | 0 | BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH); |
786 | 0 | const size_t expected_num_clusters = CLUSTERS_PER_BATCH * |
787 | 0 | (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH; |
788 | 0 | size_t all_histograms_size = 0; |
789 | 0 | size_t all_histograms_capacity = expected_num_clusters; |
790 | 0 | HistogramType* all_histograms = |
791 | 0 | BROTLI_ALLOC(m, HistogramType, all_histograms_capacity); |
792 | 0 | size_t cluster_size_size = 0; |
793 | 0 | size_t cluster_size_capacity = expected_num_clusters; |
794 | 0 | uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity); |
795 | 0 | size_t num_clusters = 0; |
796 | 0 | HistogramType* histograms = BROTLI_ALLOC(m, HistogramType, |
797 | 0 | BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH)); |
798 | 0 | size_t max_num_pairs = |
799 | 0 | HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2; |
800 | 0 | size_t pairs_capacity = max_num_pairs + 1; |
801 | 0 | HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity); |
802 | 0 | size_t pos = 0; |
803 | 0 | uint32_t* clusters; |
804 | 0 | size_t num_final_clusters; |
805 | 0 | static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX; |
806 | 0 | uint32_t* new_index; |
807 | 0 | size_t i; |
808 | 0 | uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH; |
809 | 0 | uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH; |
810 | 0 | uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH; |
811 | 0 | uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH; |
812 | 0 | uint32_t* BROTLI_RESTRICT const block_lengths = |
813 | 0 | u32 + 4 * HISTOGRAMS_PER_BATCH; |
814 | | /* TODO(eustas): move to arena? */ |
815 | 0 | HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2); |
816 | |
|
817 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) || |
818 | 0 | BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) || |
819 | 0 | BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) || |
820 | 0 | BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) { |
821 | 0 | return; |
822 | 0 | } |
823 | | |
824 | 0 | memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t)); |
825 | | |
826 | | /* Calculate block lengths (convert repeating values -> series length). */ |
827 | 0 | { |
828 | 0 | size_t block_idx = 0; |
829 | 0 | for (i = 0; i < length; ++i) { |
830 | 0 | BROTLI_DCHECK(block_idx < num_blocks); |
831 | 0 | ++block_lengths[block_idx]; |
832 | 0 | if (i + 1 == length || block_ids[i] != block_ids[i + 1]) { |
833 | 0 | ++block_idx; |
834 | 0 | } |
835 | 0 | } |
836 | 0 | BROTLI_DCHECK(block_idx == num_blocks); |
837 | 0 | } |
838 | | |
839 | | /* Pre-cluster blocks (cluster batches). */ |
840 | 0 | for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) { |
841 | 0 | const size_t num_to_combine = |
842 | 0 | BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH); |
843 | 0 | size_t num_new_clusters; |
844 | 0 | size_t j; |
845 | 0 | for (j = 0; j < num_to_combine; ++j) { |
846 | 0 | size_t k; |
847 | 0 | size_t block_length = block_lengths[i + j]; |
848 | 0 | FN(HistogramClear)(&histograms[j]); |
849 | 0 | for (k = 0; k < block_length; ++k) { |
850 | 0 | FN(HistogramAdd)(&histograms[j], data[pos++]); |
851 | 0 | } |
852 | 0 | histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]); |
853 | 0 | new_clusters[j] = (uint32_t)j; |
854 | 0 | symbols[j] = (uint32_t)j; |
855 | 0 | sizes[j] = 1; |
856 | 0 | } |
857 | 0 | num_new_clusters = FN(BrotliHistogramCombine)( |
858 | 0 | histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine, |
859 | 0 | num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs); |
860 | 0 | BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms, |
861 | 0 | all_histograms_capacity, all_histograms_size + num_new_clusters); |
862 | 0 | BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size, |
863 | 0 | cluster_size_capacity, cluster_size_size + num_new_clusters); |
864 | 0 | if (BROTLI_IS_OOM(m)) return; |
865 | 0 | for (j = 0; j < num_new_clusters; ++j) { |
866 | 0 | all_histograms[all_histograms_size++] = histograms[new_clusters[j]]; |
867 | 0 | cluster_size[cluster_size_size++] = sizes[new_clusters[j]]; |
868 | 0 | remap[new_clusters[j]] = (uint32_t)j; |
869 | 0 | } |
870 | 0 | for (j = 0; j < num_to_combine; ++j) { |
871 | 0 | histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]]; |
872 | 0 | } |
873 | 0 | num_clusters += num_new_clusters; |
874 | 0 | BROTLI_DCHECK(num_clusters == cluster_size_size); |
875 | 0 | BROTLI_DCHECK(num_clusters == all_histograms_size); |
876 | 0 | } |
877 | 0 | BROTLI_FREE(m, histograms); |
878 | | |
879 | | /* Final clustering. */ |
880 | 0 | max_num_pairs = |
881 | 0 | BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters); |
882 | 0 | if (pairs_capacity < max_num_pairs + 1) { |
883 | 0 | BROTLI_FREE(m, pairs); |
884 | 0 | pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1); |
885 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return; |
886 | 0 | } |
887 | 0 | clusters = BROTLI_ALLOC(m, uint32_t, num_clusters); |
888 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return; |
889 | 0 | for (i = 0; i < num_clusters; ++i) { |
890 | 0 | clusters[i] = (uint32_t)i; |
891 | 0 | } |
892 | 0 | num_final_clusters = FN(BrotliHistogramCombine)( |
893 | 0 | all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs, |
894 | 0 | num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES, |
895 | 0 | max_num_pairs); |
896 | 0 | BROTLI_FREE(m, pairs); |
897 | 0 | BROTLI_FREE(m, cluster_size); |
898 | | |
899 | | /* Assign blocks to final histograms. */ |
900 | 0 | new_index = BROTLI_ALLOC(m, uint32_t, num_clusters); |
901 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return; |
902 | 0 | for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex; |
903 | 0 | pos = 0; |
904 | 0 | { |
905 | 0 | uint32_t next_index = 0; |
906 | 0 | for (i = 0; i < num_blocks; ++i) { |
907 | 0 | size_t j; |
908 | 0 | uint32_t best_out; |
909 | 0 | double best_bits; |
910 | 0 | FN(HistogramClear)(tmp); |
911 | 0 | for (j = 0; j < block_lengths[i]; ++j) { |
912 | 0 | FN(HistogramAdd)(tmp, data[pos++]); |
913 | 0 | } |
914 | | /* Among equally good histograms prefer last used. */ |
915 | | /* TODO(eustas): should we give a block-switch discount here? */ |
916 | 0 | best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1]; |
917 | 0 | best_bits = FN(BrotliHistogramBitCostDistance)( |
918 | 0 | tmp, &all_histograms[best_out], tmp + 1); |
919 | 0 | for (j = 0; j < num_final_clusters; ++j) { |
920 | 0 | const double cur_bits = FN(BrotliHistogramBitCostDistance)( |
921 | 0 | tmp, &all_histograms[clusters[j]], tmp + 1); |
922 | 0 | if (cur_bits < best_bits) { |
923 | 0 | best_bits = cur_bits; |
924 | 0 | best_out = clusters[j]; |
925 | 0 | } |
926 | 0 | } |
927 | 0 | histogram_symbols[i] = best_out; |
928 | 0 | if (new_index[best_out] == kInvalidIndex) { |
929 | 0 | new_index[best_out] = next_index++; |
930 | 0 | } |
931 | 0 | } |
932 | 0 | } |
933 | 0 | BROTLI_FREE(m, tmp); |
934 | 0 | BROTLI_FREE(m, clusters); |
935 | 0 | BROTLI_FREE(m, all_histograms); |
936 | 0 | BROTLI_ENSURE_CAPACITY( |
937 | 0 | m, uint8_t, split->types, split->types_alloc_size, num_blocks); |
938 | 0 | BROTLI_ENSURE_CAPACITY( |
939 | 0 | m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks); |
940 | 0 | if (BROTLI_IS_OOM(m)) return; |
941 | | |
942 | | /* Rewrite final assignment to block-split. There might be less blocks |
943 | | * than |num_blocks| due to clustering. */ |
944 | 0 | { |
945 | 0 | uint32_t cur_length = 0; |
946 | 0 | size_t block_idx = 0; |
947 | 0 | uint8_t max_type = 0; |
948 | 0 | for (i = 0; i < num_blocks; ++i) { |
949 | 0 | cur_length += block_lengths[i]; |
950 | 0 | if (i + 1 == num_blocks || |
951 | 0 | histogram_symbols[i] != histogram_symbols[i + 1]) { |
952 | 0 | const uint8_t id = (uint8_t)new_index[histogram_symbols[i]]; |
953 | 0 | split->types[block_idx] = id; |
954 | 0 | split->lengths[block_idx] = cur_length; |
955 | 0 | max_type = BROTLI_MAX(uint8_t, max_type, id); |
956 | 0 | cur_length = 0; |
957 | 0 | ++block_idx; |
958 | 0 | } |
959 | 0 | } |
960 | 0 | split->num_blocks = block_idx; |
961 | 0 | split->num_types = (size_t)max_type + 1; |
962 | 0 | } |
963 | 0 | BROTLI_FREE(m, new_index); |
964 | 0 | BROTLI_FREE(m, u32); |
965 | 0 | BROTLI_FREE(m, histogram_symbols); |
966 | 0 | } |
967 | | |
968 | | /* Create BlockSplit (partitioning) given the limits, estimates and "effort" |
969 | | * parameters. |
970 | | * |
971 | | * NB: max_histograms is often less than number of histograms allowed by format; |
972 | | * this is done intentionally, to save some "space" for context-aware |
973 | | * clustering (here entropy is estimated for context-free symbols). */ |
974 | | static void FN(SplitByteVector)(MemoryManager* m, |
975 | | const DataType* data, const size_t length, |
976 | | const size_t symbols_per_histogram, |
977 | | const size_t max_histograms, |
978 | | const size_t sampling_stride_length, |
979 | | const double block_switch_cost, |
980 | | const BrotliEncoderParams* params, |
981 | 0 | BlockSplit* split) { |
982 | 0 | const size_t data_size = FN(HistogramDataSize)(); |
983 | 0 | HistogramType* histograms; |
984 | 0 | HistogramType* tmp; |
985 | | /* Calculate number of histograms; initial estimate is one histogram per |
986 | | * specified amount of symbols; however, this value is capped. */ |
987 | 0 | size_t num_histograms = length / symbols_per_histogram + 1; |
988 | 0 | if (num_histograms > max_histograms) { |
989 | 0 | num_histograms = max_histograms; |
990 | 0 | } |
991 | | |
992 | | /* Corner case: no input. */ |
993 | 0 | if (length == 0) { |
994 | 0 | split->num_types = 1; |
995 | 0 | return; |
996 | 0 | } |
997 | | |
998 | 0 | if (length < kMinLengthForBlockSplitting) { |
999 | 0 | BROTLI_ENSURE_CAPACITY(m, uint8_t, |
1000 | 0 | split->types, split->types_alloc_size, split->num_blocks + 1); |
1001 | 0 | BROTLI_ENSURE_CAPACITY(m, uint32_t, |
1002 | 0 | split->lengths, split->lengths_alloc_size, split->num_blocks + 1); |
1003 | 0 | if (BROTLI_IS_OOM(m)) return; |
1004 | 0 | split->num_types = 1; |
1005 | 0 | split->types[split->num_blocks] = 0; |
1006 | 0 | split->lengths[split->num_blocks] = (uint32_t)length; |
1007 | 0 | split->num_blocks++; |
1008 | 0 | return; |
1009 | 0 | } |
1010 | 0 | histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1); |
1011 | 0 | tmp = histograms + num_histograms; |
1012 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return; |
1013 | | /* Find good entropy codes. */ |
1014 | 0 | FN(InitialEntropyCodes)(data, length, |
1015 | 0 | sampling_stride_length, |
1016 | 0 | num_histograms, histograms); |
1017 | 0 | FN(RefineEntropyCodes)(data, length, |
1018 | 0 | sampling_stride_length, |
1019 | 0 | num_histograms, histograms, tmp); |
1020 | 0 | { |
1021 | | /* Find a good path through literals with the good entropy codes. */ |
1022 | 0 | uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length); |
1023 | 0 | size_t num_blocks = 0; |
1024 | 0 | const size_t bitmaplen = (num_histograms + 7) >> 3; |
1025 | 0 | double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms); |
1026 | 0 | double* cost = BROTLI_ALLOC(m, double, num_histograms); |
1027 | 0 | uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen); |
1028 | 0 | uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms); |
1029 | 0 | const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10; |
1030 | 0 | size_t i; |
1031 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) || |
1032 | 0 | BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) || |
1033 | 0 | BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) { |
1034 | 0 | return; |
1035 | 0 | } |
1036 | 0 | for (i = 0; i < iters; ++i) { |
1037 | 0 | num_blocks = FN(FindBlocks)(data, length, |
1038 | 0 | block_switch_cost, |
1039 | 0 | num_histograms, histograms, |
1040 | 0 | insert_cost, cost, switch_signal, |
1041 | 0 | block_ids); |
1042 | 0 | num_histograms = FN(RemapBlockIds)(block_ids, length, |
1043 | 0 | new_id, num_histograms); |
1044 | 0 | FN(BuildBlockHistograms)(data, length, block_ids, |
1045 | 0 | num_histograms, histograms); |
1046 | 0 | } |
1047 | 0 | BROTLI_FREE(m, insert_cost); |
1048 | 0 | BROTLI_FREE(m, cost); |
1049 | 0 | BROTLI_FREE(m, switch_signal); |
1050 | 0 | BROTLI_FREE(m, new_id); |
1051 | 0 | BROTLI_FREE(m, histograms); |
1052 | 0 | FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split); |
1053 | 0 | if (BROTLI_IS_OOM(m)) return; |
1054 | 0 | BROTLI_FREE(m, block_ids); |
1055 | 0 | } |
1056 | 0 | } |
1057 | | |
1058 | | #undef HistogramType |
1059 | | #undef FN |
1060 | | |
1061 | 0 | #define FN(X) X ## Distance |
1062 | | /* NOLINTNEXTLINE(build/include) */ |
1063 | | /* NOLINT(build/header_guard) */ |
1064 | | /* Copyright 2013 Google Inc. All Rights Reserved. |
1065 | | |
1066 | | Distributed under MIT license. |
1067 | | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
1068 | | */ |
1069 | | |
1070 | | /* template parameters: FN, DataType */ |
1071 | | |
1072 | 0 | #define HistogramType FN(Histogram) |
1073 | | |
1074 | | static void FN(InitialEntropyCodes)(const DataType* data, size_t length, |
1075 | | size_t stride, |
1076 | | size_t num_histograms, |
1077 | 0 | HistogramType* histograms) { |
1078 | 0 | uint32_t seed = 7; |
1079 | 0 | size_t block_length = length / num_histograms; |
1080 | 0 | size_t i; |
1081 | 0 | FN(ClearHistograms)(histograms, num_histograms); |
1082 | 0 | for (i = 0; i < num_histograms; ++i) { |
1083 | 0 | size_t pos = length * i / num_histograms; |
1084 | 0 | if (i != 0) { |
1085 | 0 | pos += MyRand(&seed) % block_length; |
1086 | 0 | } |
1087 | 0 | if (pos + stride >= length) { |
1088 | 0 | pos = length - stride - 1; |
1089 | 0 | } |
1090 | 0 | FN(HistogramAddVector)(&histograms[i], data + pos, stride); |
1091 | 0 | } |
1092 | 0 | } |
1093 | | |
1094 | | static void FN(RandomSample)(uint32_t* seed, |
1095 | | const DataType* data, |
1096 | | size_t length, |
1097 | | size_t stride, |
1098 | 0 | HistogramType* sample) { |
1099 | 0 | size_t pos = 0; |
1100 | 0 | if (stride >= length) { |
1101 | 0 | stride = length; |
1102 | 0 | } else { |
1103 | 0 | pos = MyRand(seed) % (length - stride + 1); |
1104 | 0 | } |
1105 | 0 | FN(HistogramAddVector)(sample, data + pos, stride); |
1106 | 0 | } |
1107 | | |
1108 | | static void FN(RefineEntropyCodes)(const DataType* data, size_t length, |
1109 | | size_t stride, |
1110 | | size_t num_histograms, |
1111 | | HistogramType* histograms, |
1112 | 0 | HistogramType* tmp) { |
1113 | 0 | size_t iters = |
1114 | 0 | kIterMulForRefining * length / stride + kMinItersForRefining; |
1115 | 0 | uint32_t seed = 7; |
1116 | 0 | size_t iter; |
1117 | 0 | iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms; |
1118 | 0 | for (iter = 0; iter < iters; ++iter) { |
1119 | 0 | FN(HistogramClear)(tmp); |
1120 | 0 | FN(RandomSample)(&seed, data, length, stride, tmp); |
1121 | 0 | FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp); |
1122 | 0 | } |
1123 | 0 | } |
1124 | | |
1125 | | /* Assigns a block id from the range [0, num_histograms) to each data element |
1126 | | in data[0..length) and fills in block_id[0..length) with the assigned values. |
1127 | | Returns the number of blocks, i.e. one plus the number of block switches. */ |
1128 | | static size_t FN(FindBlocks)(const DataType* data, const size_t length, |
1129 | | const double block_switch_bitcost, |
1130 | | const size_t num_histograms, |
1131 | | const HistogramType* histograms, |
1132 | | double* insert_cost, |
1133 | | double* cost, |
1134 | | uint8_t* switch_signal, |
1135 | 0 | uint8_t* block_id) { |
1136 | 0 | const size_t alphabet_size = FN(HistogramDataSize)(); |
1137 | 0 | const size_t bitmap_len = (num_histograms + 7) >> 3; |
1138 | 0 | size_t num_blocks = 1; |
1139 | 0 | size_t byte_ix; |
1140 | 0 | size_t i; |
1141 | 0 | size_t j; |
1142 | 0 | BROTLI_DCHECK(num_histograms <= 256); |
1143 | | |
1144 | | /* Trivial case: single historgram -> single block type. */ |
1145 | 0 | if (num_histograms <= 1) { |
1146 | 0 | for (i = 0; i < length; ++i) { |
1147 | 0 | block_id[i] = 0; |
1148 | 0 | } |
1149 | 0 | return 1; |
1150 | 0 | } |
1151 | | |
1152 | | /* Fill bitcost for each symbol of all histograms. |
1153 | | * Non-existing symbol cost: 2 + log2(total_count). |
1154 | | * Regular symbol cost: -log2(symbol_count / total_count). */ |
1155 | 0 | memset(insert_cost, 0, |
1156 | 0 | sizeof(insert_cost[0]) * alphabet_size * num_histograms); |
1157 | 0 | for (i = 0; i < num_histograms; ++i) { |
1158 | 0 | insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_); |
1159 | 0 | } |
1160 | 0 | for (i = alphabet_size; i != 0;) { |
1161 | | /* Reverse order to use the 0-th row as a temporary storage. */ |
1162 | 0 | --i; |
1163 | 0 | for (j = 0; j < num_histograms; ++j) { |
1164 | 0 | insert_cost[i * num_histograms + j] = |
1165 | 0 | insert_cost[j] - BitCost(histograms[j].data_[i]); |
1166 | 0 | } |
1167 | 0 | } |
1168 | | |
1169 | | /* After each iteration of this loop, cost[k] will contain the difference |
1170 | | between the minimum cost of arriving at the current byte position using |
1171 | | entropy code k, and the minimum cost of arriving at the current byte |
1172 | | position. This difference is capped at the block switch cost, and if it |
1173 | | reaches block switch cost, it means that when we trace back from the last |
1174 | | position, we need to switch here. */ |
1175 | 0 | memset(cost, 0, sizeof(cost[0]) * num_histograms); |
1176 | 0 | memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmap_len); |
1177 | 0 | for (byte_ix = 0; byte_ix < length; ++byte_ix) { |
1178 | 0 | size_t ix = byte_ix * bitmap_len; |
1179 | 0 | size_t symbol = data[byte_ix]; |
1180 | 0 | size_t insert_cost_ix = symbol * num_histograms; |
1181 | 0 | double min_cost = 1e99; |
1182 | 0 | double block_switch_cost = block_switch_bitcost; |
1183 | 0 | size_t k; |
1184 | 0 | for (k = 0; k < num_histograms; ++k) { |
1185 | | /* We are coding the symbol with entropy code k. */ |
1186 | 0 | cost[k] += insert_cost[insert_cost_ix + k]; |
1187 | 0 | if (cost[k] < min_cost) { |
1188 | 0 | min_cost = cost[k]; |
1189 | 0 | block_id[byte_ix] = (uint8_t)k; |
1190 | 0 | } |
1191 | 0 | } |
1192 | | /* More blocks for the beginning. */ |
1193 | 0 | if (byte_ix < 2000) { |
1194 | 0 | block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000; |
1195 | 0 | } |
1196 | 0 | for (k = 0; k < num_histograms; ++k) { |
1197 | 0 | cost[k] -= min_cost; |
1198 | 0 | if (cost[k] >= block_switch_cost) { |
1199 | 0 | const uint8_t mask = (uint8_t)(1u << (k & 7)); |
1200 | 0 | cost[k] = block_switch_cost; |
1201 | 0 | BROTLI_DCHECK((k >> 3) < bitmap_len); |
1202 | 0 | switch_signal[ix + (k >> 3)] |= mask; |
1203 | 0 | } |
1204 | 0 | } |
1205 | 0 | } |
1206 | |
|
1207 | 0 | byte_ix = length - 1; |
1208 | 0 | { /* Trace back from the last position and switch at the marked places. */ |
1209 | 0 | size_t ix = byte_ix * bitmap_len; |
1210 | 0 | uint8_t cur_id = block_id[byte_ix]; |
1211 | 0 | while (byte_ix > 0) { |
1212 | 0 | const uint8_t mask = (uint8_t)(1u << (cur_id & 7)); |
1213 | 0 | BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmap_len); |
1214 | 0 | --byte_ix; |
1215 | 0 | ix -= bitmap_len; |
1216 | 0 | if (switch_signal[ix + (cur_id >> 3)] & mask) { |
1217 | 0 | if (cur_id != block_id[byte_ix]) { |
1218 | 0 | cur_id = block_id[byte_ix]; |
1219 | 0 | ++num_blocks; |
1220 | 0 | } |
1221 | 0 | } |
1222 | 0 | block_id[byte_ix] = cur_id; |
1223 | 0 | } |
1224 | 0 | } |
1225 | 0 | return num_blocks; |
1226 | 0 | } |
1227 | | |
1228 | | static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length, |
1229 | 0 | uint16_t* new_id, const size_t num_histograms) { |
1230 | 0 | static const uint16_t kInvalidId = 256; |
1231 | 0 | uint16_t next_id = 0; |
1232 | 0 | size_t i; |
1233 | 0 | for (i = 0; i < num_histograms; ++i) { |
1234 | 0 | new_id[i] = kInvalidId; |
1235 | 0 | } |
1236 | 0 | for (i = 0; i < length; ++i) { |
1237 | 0 | BROTLI_DCHECK(block_ids[i] < num_histograms); |
1238 | 0 | if (new_id[block_ids[i]] == kInvalidId) { |
1239 | 0 | new_id[block_ids[i]] = next_id++; |
1240 | 0 | } |
1241 | 0 | } |
1242 | 0 | for (i = 0; i < length; ++i) { |
1243 | 0 | block_ids[i] = (uint8_t)new_id[block_ids[i]]; |
1244 | 0 | BROTLI_DCHECK(block_ids[i] < num_histograms); |
1245 | 0 | } |
1246 | 0 | BROTLI_DCHECK(next_id <= num_histograms); |
1247 | 0 | return next_id; |
1248 | 0 | } |
1249 | | |
1250 | | static void FN(BuildBlockHistograms)(const DataType* data, const size_t length, |
1251 | | const uint8_t* block_ids, |
1252 | | const size_t num_histograms, |
1253 | 0 | HistogramType* histograms) { |
1254 | 0 | size_t i; |
1255 | 0 | FN(ClearHistograms)(histograms, num_histograms); |
1256 | 0 | for (i = 0; i < length; ++i) { |
1257 | 0 | FN(HistogramAdd)(&histograms[block_ids[i]], data[i]); |
1258 | 0 | } |
1259 | 0 | } |
1260 | | |
1261 | | /* Given the initial partitioning build partitioning with limited number |
1262 | | * of histograms (and block types). */ |
1263 | | static void FN(ClusterBlocks)(MemoryManager* m, |
1264 | | const DataType* data, const size_t length, |
1265 | | const size_t num_blocks, |
1266 | | uint8_t* block_ids, |
1267 | 0 | BlockSplit* split) { |
1268 | 0 | uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks); |
1269 | 0 | uint32_t* u32 = |
1270 | 0 | BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH); |
1271 | 0 | const size_t expected_num_clusters = CLUSTERS_PER_BATCH * |
1272 | 0 | (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH; |
1273 | 0 | size_t all_histograms_size = 0; |
1274 | 0 | size_t all_histograms_capacity = expected_num_clusters; |
1275 | 0 | HistogramType* all_histograms = |
1276 | 0 | BROTLI_ALLOC(m, HistogramType, all_histograms_capacity); |
1277 | 0 | size_t cluster_size_size = 0; |
1278 | 0 | size_t cluster_size_capacity = expected_num_clusters; |
1279 | 0 | uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity); |
1280 | 0 | size_t num_clusters = 0; |
1281 | 0 | HistogramType* histograms = BROTLI_ALLOC(m, HistogramType, |
1282 | 0 | BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH)); |
1283 | 0 | size_t max_num_pairs = |
1284 | 0 | HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2; |
1285 | 0 | size_t pairs_capacity = max_num_pairs + 1; |
1286 | 0 | HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity); |
1287 | 0 | size_t pos = 0; |
1288 | 0 | uint32_t* clusters; |
1289 | 0 | size_t num_final_clusters; |
1290 | 0 | static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX; |
1291 | 0 | uint32_t* new_index; |
1292 | 0 | size_t i; |
1293 | 0 | uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH; |
1294 | 0 | uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH; |
1295 | 0 | uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH; |
1296 | 0 | uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH; |
1297 | 0 | uint32_t* BROTLI_RESTRICT const block_lengths = |
1298 | 0 | u32 + 4 * HISTOGRAMS_PER_BATCH; |
1299 | | /* TODO(eustas): move to arena? */ |
1300 | 0 | HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2); |
1301 | |
|
1302 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) || |
1303 | 0 | BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) || |
1304 | 0 | BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) || |
1305 | 0 | BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) { |
1306 | 0 | return; |
1307 | 0 | } |
1308 | | |
1309 | 0 | memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t)); |
1310 | | |
1311 | | /* Calculate block lengths (convert repeating values -> series length). */ |
1312 | 0 | { |
1313 | 0 | size_t block_idx = 0; |
1314 | 0 | for (i = 0; i < length; ++i) { |
1315 | 0 | BROTLI_DCHECK(block_idx < num_blocks); |
1316 | 0 | ++block_lengths[block_idx]; |
1317 | 0 | if (i + 1 == length || block_ids[i] != block_ids[i + 1]) { |
1318 | 0 | ++block_idx; |
1319 | 0 | } |
1320 | 0 | } |
1321 | 0 | BROTLI_DCHECK(block_idx == num_blocks); |
1322 | 0 | } |
1323 | | |
1324 | | /* Pre-cluster blocks (cluster batches). */ |
1325 | 0 | for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) { |
1326 | 0 | const size_t num_to_combine = |
1327 | 0 | BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH); |
1328 | 0 | size_t num_new_clusters; |
1329 | 0 | size_t j; |
1330 | 0 | for (j = 0; j < num_to_combine; ++j) { |
1331 | 0 | size_t k; |
1332 | 0 | size_t block_length = block_lengths[i + j]; |
1333 | 0 | FN(HistogramClear)(&histograms[j]); |
1334 | 0 | for (k = 0; k < block_length; ++k) { |
1335 | 0 | FN(HistogramAdd)(&histograms[j], data[pos++]); |
1336 | 0 | } |
1337 | 0 | histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]); |
1338 | 0 | new_clusters[j] = (uint32_t)j; |
1339 | 0 | symbols[j] = (uint32_t)j; |
1340 | 0 | sizes[j] = 1; |
1341 | 0 | } |
1342 | 0 | num_new_clusters = FN(BrotliHistogramCombine)( |
1343 | 0 | histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine, |
1344 | 0 | num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs); |
1345 | 0 | BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms, |
1346 | 0 | all_histograms_capacity, all_histograms_size + num_new_clusters); |
1347 | 0 | BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size, |
1348 | 0 | cluster_size_capacity, cluster_size_size + num_new_clusters); |
1349 | 0 | if (BROTLI_IS_OOM(m)) return; |
1350 | 0 | for (j = 0; j < num_new_clusters; ++j) { |
1351 | 0 | all_histograms[all_histograms_size++] = histograms[new_clusters[j]]; |
1352 | 0 | cluster_size[cluster_size_size++] = sizes[new_clusters[j]]; |
1353 | 0 | remap[new_clusters[j]] = (uint32_t)j; |
1354 | 0 | } |
1355 | 0 | for (j = 0; j < num_to_combine; ++j) { |
1356 | 0 | histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]]; |
1357 | 0 | } |
1358 | 0 | num_clusters += num_new_clusters; |
1359 | 0 | BROTLI_DCHECK(num_clusters == cluster_size_size); |
1360 | 0 | BROTLI_DCHECK(num_clusters == all_histograms_size); |
1361 | 0 | } |
1362 | 0 | BROTLI_FREE(m, histograms); |
1363 | | |
1364 | | /* Final clustering. */ |
1365 | 0 | max_num_pairs = |
1366 | 0 | BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters); |
1367 | 0 | if (pairs_capacity < max_num_pairs + 1) { |
1368 | 0 | BROTLI_FREE(m, pairs); |
1369 | 0 | pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1); |
1370 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return; |
1371 | 0 | } |
1372 | 0 | clusters = BROTLI_ALLOC(m, uint32_t, num_clusters); |
1373 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return; |
1374 | 0 | for (i = 0; i < num_clusters; ++i) { |
1375 | 0 | clusters[i] = (uint32_t)i; |
1376 | 0 | } |
1377 | 0 | num_final_clusters = FN(BrotliHistogramCombine)( |
1378 | 0 | all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs, |
1379 | 0 | num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES, |
1380 | 0 | max_num_pairs); |
1381 | 0 | BROTLI_FREE(m, pairs); |
1382 | 0 | BROTLI_FREE(m, cluster_size); |
1383 | | |
1384 | | /* Assign blocks to final histograms. */ |
1385 | 0 | new_index = BROTLI_ALLOC(m, uint32_t, num_clusters); |
1386 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return; |
1387 | 0 | for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex; |
1388 | 0 | pos = 0; |
1389 | 0 | { |
1390 | 0 | uint32_t next_index = 0; |
1391 | 0 | for (i = 0; i < num_blocks; ++i) { |
1392 | 0 | size_t j; |
1393 | 0 | uint32_t best_out; |
1394 | 0 | double best_bits; |
1395 | 0 | FN(HistogramClear)(tmp); |
1396 | 0 | for (j = 0; j < block_lengths[i]; ++j) { |
1397 | 0 | FN(HistogramAdd)(tmp, data[pos++]); |
1398 | 0 | } |
1399 | | /* Among equally good histograms prefer last used. */ |
1400 | | /* TODO(eustas): should we give a block-switch discount here? */ |
1401 | 0 | best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1]; |
1402 | 0 | best_bits = FN(BrotliHistogramBitCostDistance)( |
1403 | 0 | tmp, &all_histograms[best_out], tmp + 1); |
1404 | 0 | for (j = 0; j < num_final_clusters; ++j) { |
1405 | 0 | const double cur_bits = FN(BrotliHistogramBitCostDistance)( |
1406 | 0 | tmp, &all_histograms[clusters[j]], tmp + 1); |
1407 | 0 | if (cur_bits < best_bits) { |
1408 | 0 | best_bits = cur_bits; |
1409 | 0 | best_out = clusters[j]; |
1410 | 0 | } |
1411 | 0 | } |
1412 | 0 | histogram_symbols[i] = best_out; |
1413 | 0 | if (new_index[best_out] == kInvalidIndex) { |
1414 | 0 | new_index[best_out] = next_index++; |
1415 | 0 | } |
1416 | 0 | } |
1417 | 0 | } |
1418 | 0 | BROTLI_FREE(m, tmp); |
1419 | 0 | BROTLI_FREE(m, clusters); |
1420 | 0 | BROTLI_FREE(m, all_histograms); |
1421 | 0 | BROTLI_ENSURE_CAPACITY( |
1422 | 0 | m, uint8_t, split->types, split->types_alloc_size, num_blocks); |
1423 | 0 | BROTLI_ENSURE_CAPACITY( |
1424 | 0 | m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks); |
1425 | 0 | if (BROTLI_IS_OOM(m)) return; |
1426 | | |
1427 | | /* Rewrite final assignment to block-split. There might be less blocks |
1428 | | * than |num_blocks| due to clustering. */ |
1429 | 0 | { |
1430 | 0 | uint32_t cur_length = 0; |
1431 | 0 | size_t block_idx = 0; |
1432 | 0 | uint8_t max_type = 0; |
1433 | 0 | for (i = 0; i < num_blocks; ++i) { |
1434 | 0 | cur_length += block_lengths[i]; |
1435 | 0 | if (i + 1 == num_blocks || |
1436 | 0 | histogram_symbols[i] != histogram_symbols[i + 1]) { |
1437 | 0 | const uint8_t id = (uint8_t)new_index[histogram_symbols[i]]; |
1438 | 0 | split->types[block_idx] = id; |
1439 | 0 | split->lengths[block_idx] = cur_length; |
1440 | 0 | max_type = BROTLI_MAX(uint8_t, max_type, id); |
1441 | 0 | cur_length = 0; |
1442 | 0 | ++block_idx; |
1443 | 0 | } |
1444 | 0 | } |
1445 | 0 | split->num_blocks = block_idx; |
1446 | 0 | split->num_types = (size_t)max_type + 1; |
1447 | 0 | } |
1448 | 0 | BROTLI_FREE(m, new_index); |
1449 | 0 | BROTLI_FREE(m, u32); |
1450 | 0 | BROTLI_FREE(m, histogram_symbols); |
1451 | 0 | } |
1452 | | |
1453 | | /* Create BlockSplit (partitioning) given the limits, estimates and "effort" |
1454 | | * parameters. |
1455 | | * |
1456 | | * NB: max_histograms is often less than number of histograms allowed by format; |
1457 | | * this is done intentionally, to save some "space" for context-aware |
1458 | | * clustering (here entropy is estimated for context-free symbols). */ |
1459 | | static void FN(SplitByteVector)(MemoryManager* m, |
1460 | | const DataType* data, const size_t length, |
1461 | | const size_t symbols_per_histogram, |
1462 | | const size_t max_histograms, |
1463 | | const size_t sampling_stride_length, |
1464 | | const double block_switch_cost, |
1465 | | const BrotliEncoderParams* params, |
1466 | 0 | BlockSplit* split) { |
1467 | 0 | const size_t data_size = FN(HistogramDataSize)(); |
1468 | 0 | HistogramType* histograms; |
1469 | 0 | HistogramType* tmp; |
1470 | | /* Calculate number of histograms; initial estimate is one histogram per |
1471 | | * specified amount of symbols; however, this value is capped. */ |
1472 | 0 | size_t num_histograms = length / symbols_per_histogram + 1; |
1473 | 0 | if (num_histograms > max_histograms) { |
1474 | 0 | num_histograms = max_histograms; |
1475 | 0 | } |
1476 | | |
1477 | | /* Corner case: no input. */ |
1478 | 0 | if (length == 0) { |
1479 | 0 | split->num_types = 1; |
1480 | 0 | return; |
1481 | 0 | } |
1482 | | |
1483 | 0 | if (length < kMinLengthForBlockSplitting) { |
1484 | 0 | BROTLI_ENSURE_CAPACITY(m, uint8_t, |
1485 | 0 | split->types, split->types_alloc_size, split->num_blocks + 1); |
1486 | 0 | BROTLI_ENSURE_CAPACITY(m, uint32_t, |
1487 | 0 | split->lengths, split->lengths_alloc_size, split->num_blocks + 1); |
1488 | 0 | if (BROTLI_IS_OOM(m)) return; |
1489 | 0 | split->num_types = 1; |
1490 | 0 | split->types[split->num_blocks] = 0; |
1491 | 0 | split->lengths[split->num_blocks] = (uint32_t)length; |
1492 | 0 | split->num_blocks++; |
1493 | 0 | return; |
1494 | 0 | } |
1495 | 0 | histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1); |
1496 | 0 | tmp = histograms + num_histograms; |
1497 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return; |
1498 | | /* Find good entropy codes. */ |
1499 | 0 | FN(InitialEntropyCodes)(data, length, |
1500 | 0 | sampling_stride_length, |
1501 | 0 | num_histograms, histograms); |
1502 | 0 | FN(RefineEntropyCodes)(data, length, |
1503 | 0 | sampling_stride_length, |
1504 | 0 | num_histograms, histograms, tmp); |
1505 | 0 | { |
1506 | | /* Find a good path through literals with the good entropy codes. */ |
1507 | 0 | uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length); |
1508 | 0 | size_t num_blocks = 0; |
1509 | 0 | const size_t bitmaplen = (num_histograms + 7) >> 3; |
1510 | 0 | double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms); |
1511 | 0 | double* cost = BROTLI_ALLOC(m, double, num_histograms); |
1512 | 0 | uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen); |
1513 | 0 | uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms); |
1514 | 0 | const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10; |
1515 | 0 | size_t i; |
1516 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) || |
1517 | 0 | BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) || |
1518 | 0 | BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) { |
1519 | 0 | return; |
1520 | 0 | } |
1521 | 0 | for (i = 0; i < iters; ++i) { |
1522 | 0 | num_blocks = FN(FindBlocks)(data, length, |
1523 | 0 | block_switch_cost, |
1524 | 0 | num_histograms, histograms, |
1525 | 0 | insert_cost, cost, switch_signal, |
1526 | 0 | block_ids); |
1527 | 0 | num_histograms = FN(RemapBlockIds)(block_ids, length, |
1528 | 0 | new_id, num_histograms); |
1529 | 0 | FN(BuildBlockHistograms)(data, length, block_ids, |
1530 | 0 | num_histograms, histograms); |
1531 | 0 | } |
1532 | 0 | BROTLI_FREE(m, insert_cost); |
1533 | 0 | BROTLI_FREE(m, cost); |
1534 | 0 | BROTLI_FREE(m, switch_signal); |
1535 | 0 | BROTLI_FREE(m, new_id); |
1536 | 0 | BROTLI_FREE(m, histograms); |
1537 | 0 | FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split); |
1538 | 0 | if (BROTLI_IS_OOM(m)) return; |
1539 | 0 | BROTLI_FREE(m, block_ids); |
1540 | 0 | } |
1541 | 0 | } |
1542 | | |
1543 | | #undef HistogramType |
1544 | | #undef DataType |
1545 | | #undef FN |
1546 | | |
1547 | 0 | void duckdb_brotli::BrotliInitBlockSplit(BlockSplit* self) { |
1548 | 0 | self->num_types = 0; |
1549 | 0 | self->num_blocks = 0; |
1550 | 0 | self->types = 0; |
1551 | 0 | self->lengths = 0; |
1552 | 0 | self->types_alloc_size = 0; |
1553 | 0 | self->lengths_alloc_size = 0; |
1554 | 0 | } |
1555 | | |
1556 | 0 | void duckdb_brotli::BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) { |
1557 | 0 | BROTLI_FREE(m, self->types); |
1558 | 0 | BROTLI_FREE(m, self->lengths); |
1559 | 0 | } |
1560 | | |
1561 | | /* Extracts literals, command distance and prefix codes, then applies |
1562 | | * SplitByteVector to create partitioning. */ |
1563 | | void duckdb_brotli::BrotliSplitBlock(MemoryManager* m, |
1564 | | const Command* cmds, |
1565 | | const size_t num_commands, |
1566 | | const uint8_t* data, |
1567 | | const size_t pos, |
1568 | | const size_t mask, |
1569 | | const BrotliEncoderParams* params, |
1570 | | BlockSplit* literal_split, |
1571 | | BlockSplit* insert_and_copy_split, |
1572 | 0 | BlockSplit* dist_split) { |
1573 | 0 | { |
1574 | 0 | size_t literals_count = CountLiterals(cmds, num_commands); |
1575 | 0 | uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count); |
1576 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return; |
1577 | | /* Create a continuous array of literals. */ |
1578 | 0 | CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals); |
1579 | | /* Create the block split on the array of literals. |
1580 | | * Literal histograms can have alphabet size up to 256. |
1581 | | * Though, to accomodate context modeling, less than half of maximum size |
1582 | | * is allowed. */ |
1583 | 0 | SplitByteVectorLiteral( |
1584 | 0 | m, literals, literals_count, |
1585 | 0 | kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, |
1586 | 0 | kLiteralStrideLength, kLiteralBlockSwitchCost, params, |
1587 | 0 | literal_split); |
1588 | 0 | if (BROTLI_IS_OOM(m)) return; |
1589 | 0 | BROTLI_FREE(m, literals); |
1590 | | /* NB: this might be a good place for injecting extra splitting without |
1591 | | * increasing encoder complexity; however, output parition would be less |
1592 | | * optimal than one produced with forced splitting inside |
1593 | | * SplitByteVector (FindBlocks / ClusterBlocks). */ |
1594 | 0 | } |
1595 | | |
1596 | 0 | { |
1597 | | /* Compute prefix codes for commands. */ |
1598 | 0 | uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands); |
1599 | 0 | size_t i; |
1600 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return; |
1601 | 0 | for (i = 0; i < num_commands; ++i) { |
1602 | 0 | insert_and_copy_codes[i] = cmds[i].cmd_prefix_; |
1603 | 0 | } |
1604 | | /* Create the block split on the array of command prefixes. */ |
1605 | 0 | SplitByteVectorCommand( |
1606 | 0 | m, insert_and_copy_codes, num_commands, |
1607 | 0 | kSymbolsPerCommandHistogram, kMaxCommandHistograms, |
1608 | 0 | kCommandStrideLength, kCommandBlockSwitchCost, params, |
1609 | 0 | insert_and_copy_split); |
1610 | 0 | if (BROTLI_IS_OOM(m)) return; |
1611 | | /* TODO(eustas): reuse for distances? */ |
1612 | 0 | BROTLI_FREE(m, insert_and_copy_codes); |
1613 | 0 | } |
1614 | | |
1615 | 0 | { |
1616 | | /* Create a continuous array of distance prefixes. */ |
1617 | 0 | uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands); |
1618 | 0 | size_t j = 0; |
1619 | 0 | size_t i; |
1620 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return; |
1621 | 0 | for (i = 0; i < num_commands; ++i) { |
1622 | 0 | const Command* cmd = &cmds[i]; |
1623 | 0 | if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { |
1624 | 0 | distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF; |
1625 | 0 | } |
1626 | 0 | } |
1627 | | /* Create the block split on the array of distance prefixes. */ |
1628 | 0 | SplitByteVectorDistance( |
1629 | 0 | m, distance_prefixes, j, |
1630 | 0 | kSymbolsPerDistanceHistogram, kMaxCommandHistograms, |
1631 | 0 | kDistanceStrideLength, kDistanceBlockSwitchCost, params, |
1632 | 0 | dist_split); |
1633 | 0 | if (BROTLI_IS_OOM(m)) return; |
1634 | 0 | BROTLI_FREE(m, distance_prefixes); |
1635 | 0 | } |
1636 | 0 | } |
1637 | | |
1638 | | #if defined(BROTLI_TEST) |
1639 | | size_t CountLiteralsForTest(const Command*, const size_t); |
1640 | | size_t CountLiteralsForTest(const Command* cmds, const size_t num_commands) { |
1641 | | return CountLiterals(cmds, num_commands); |
1642 | | } |
1643 | | |
1644 | | void CopyLiteralsToByteArrayForTest(const Command*, |
1645 | | const size_t, const uint8_t*, const size_t, const size_t, uint8_t*); |
1646 | | void CopyLiteralsToByteArrayForTest(const Command* cmds, |
1647 | | const size_t num_commands, const uint8_t* data, const size_t offset, |
1648 | | const size_t mask, uint8_t* literals) { |
1649 | | CopyLiteralsToByteArray(cmds, num_commands, data, offset, mask, literals); |
1650 | | } |
1651 | | #endif |
1652 | | |
1653 | | |