/src/ghostpdl/brotli/c/enc/block_splitter_inc.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* NOLINT(build/header_guard) */ |
2 | | /* Copyright 2013 Google Inc. All Rights Reserved. |
3 | | |
4 | | Distributed under MIT license. |
5 | | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
6 | | */ |
7 | | |
8 | | /* template parameters: FN, DataType */ |
9 | | |
10 | 0 | #define HistogramType FN(Histogram) |
11 | | |
12 | | static void FN(InitialEntropyCodes)(const DataType* data, size_t length, |
13 | | size_t stride, |
14 | | size_t num_histograms, |
15 | 0 | HistogramType* histograms) { |
16 | 0 | uint32_t seed = 7; |
17 | 0 | size_t block_length = length / num_histograms; |
18 | 0 | size_t i; |
19 | 0 | FN(ClearHistograms)(histograms, num_histograms); |
20 | 0 | for (i = 0; i < num_histograms; ++i) { |
21 | 0 | size_t pos = length * i / num_histograms; |
22 | 0 | if (i != 0) { |
23 | 0 | pos += MyRand(&seed) % block_length; |
24 | 0 | } |
25 | 0 | if (pos + stride >= length) { |
26 | 0 | pos = length - stride - 1; |
27 | 0 | } |
28 | 0 | FN(HistogramAddVector)(&histograms[i], data + pos, stride); |
29 | 0 | } |
30 | 0 | } Unexecuted instantiation: block_splitter.c:InitialEntropyCodesLiteral Unexecuted instantiation: block_splitter.c:InitialEntropyCodesCommand Unexecuted instantiation: block_splitter.c:InitialEntropyCodesDistance |
31 | | |
32 | | static void FN(RandomSample)(uint32_t* seed, |
33 | | const DataType* data, |
34 | | size_t length, |
35 | | size_t stride, |
36 | 0 | HistogramType* sample) { |
37 | 0 | size_t pos = 0; |
38 | 0 | if (stride >= length) { |
39 | 0 | stride = length; |
40 | 0 | } else { |
41 | 0 | pos = MyRand(seed) % (length - stride + 1); |
42 | 0 | } |
43 | 0 | FN(HistogramAddVector)(sample, data + pos, stride); |
44 | 0 | } Unexecuted instantiation: block_splitter.c:RandomSampleLiteral Unexecuted instantiation: block_splitter.c:RandomSampleCommand Unexecuted instantiation: block_splitter.c:RandomSampleDistance |
45 | | |
46 | | static void FN(RefineEntropyCodes)(const DataType* data, size_t length, |
47 | | size_t stride, |
48 | | size_t num_histograms, |
49 | | HistogramType* histograms, |
50 | 0 | HistogramType* tmp) { |
51 | 0 | size_t iters = |
52 | 0 | kIterMulForRefining * length / stride + kMinItersForRefining; |
53 | 0 | uint32_t seed = 7; |
54 | 0 | size_t iter; |
55 | 0 | iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms; |
56 | 0 | for (iter = 0; iter < iters; ++iter) { |
57 | 0 | FN(HistogramClear)(tmp); |
58 | 0 | FN(RandomSample)(&seed, data, length, stride, tmp); |
59 | 0 | FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp); |
60 | 0 | } |
61 | 0 | } Unexecuted instantiation: block_splitter.c:RefineEntropyCodesLiteral Unexecuted instantiation: block_splitter.c:RefineEntropyCodesCommand Unexecuted instantiation: block_splitter.c:RefineEntropyCodesDistance |
62 | | |
63 | | /* Assigns a block id from the range [0, num_histograms) to each data element |
64 | | in data[0..length) and fills in block_id[0..length) with the assigned values. |
65 | | Returns the number of blocks, i.e. one plus the number of block switches. */ |
66 | | static size_t FN(FindBlocks)(const DataType* data, const size_t length, |
67 | | const double block_switch_bitcost, |
68 | | const size_t num_histograms, |
69 | | const HistogramType* histograms, |
70 | | double* insert_cost, |
71 | | double* cost, |
72 | | uint8_t* switch_signal, |
73 | 0 | uint8_t* block_id) { |
74 | 0 | const size_t alphabet_size = FN(HistogramDataSize)(); |
75 | 0 | const size_t bitmap_len = (num_histograms + 7) >> 3; |
76 | 0 | size_t num_blocks = 1; |
77 | 0 | size_t byte_ix; |
78 | 0 | size_t i; |
79 | 0 | size_t j; |
80 | 0 | BROTLI_DCHECK(num_histograms <= 256); |
81 | | |
82 | | /* Trivial case: single historgram -> single block type. */ |
83 | 0 | if (num_histograms <= 1) { |
84 | 0 | for (i = 0; i < length; ++i) { |
85 | 0 | block_id[i] = 0; |
86 | 0 | } |
87 | 0 | return 1; |
88 | 0 | } |
89 | | |
90 | | /* Fill bitcost for each symbol of all histograms. |
91 | | * Non-existing symbol cost: 2 + log2(total_count). |
92 | | * Regular symbol cost: -log2(symbol_count / total_count). */ |
93 | 0 | memset(insert_cost, 0, |
94 | 0 | sizeof(insert_cost[0]) * alphabet_size * num_histograms); |
95 | 0 | for (i = 0; i < num_histograms; ++i) { |
96 | 0 | insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_); |
97 | 0 | } |
98 | 0 | for (i = alphabet_size; i != 0;) { |
99 | | /* Reverse order to use the 0-th row as a temporary storage. */ |
100 | 0 | --i; |
101 | 0 | for (j = 0; j < num_histograms; ++j) { |
102 | 0 | insert_cost[i * num_histograms + j] = |
103 | 0 | insert_cost[j] - BitCost(histograms[j].data_[i]); |
104 | 0 | } |
105 | 0 | } |
106 | | |
107 | | /* After each iteration of this loop, cost[k] will contain the difference |
108 | | between the minimum cost of arriving at the current byte position using |
109 | | entropy code k, and the minimum cost of arriving at the current byte |
110 | | position. This difference is capped at the block switch cost, and if it |
111 | | reaches block switch cost, it means that when we trace back from the last |
112 | | position, we need to switch here. */ |
113 | 0 | memset(cost, 0, sizeof(cost[0]) * num_histograms); |
114 | 0 | memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmap_len); |
115 | 0 | for (byte_ix = 0; byte_ix < length; ++byte_ix) { |
116 | 0 | size_t ix = byte_ix * bitmap_len; |
117 | 0 | size_t symbol = data[byte_ix]; |
118 | 0 | size_t insert_cost_ix = symbol * num_histograms; |
119 | 0 | double min_cost = 1e99; |
120 | 0 | double block_switch_cost = block_switch_bitcost; |
121 | 0 | size_t k; |
122 | 0 | for (k = 0; k < num_histograms; ++k) { |
123 | | /* We are coding the symbol with entropy code k. */ |
124 | 0 | cost[k] += insert_cost[insert_cost_ix + k]; |
125 | 0 | if (cost[k] < min_cost) { |
126 | 0 | min_cost = cost[k]; |
127 | 0 | block_id[byte_ix] = (uint8_t)k; |
128 | 0 | } |
129 | 0 | } |
130 | | /* More blocks for the beginning. */ |
131 | 0 | if (byte_ix < 2000) { |
132 | 0 | block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000; |
133 | 0 | } |
134 | 0 | for (k = 0; k < num_histograms; ++k) { |
135 | 0 | cost[k] -= min_cost; |
136 | 0 | if (cost[k] >= block_switch_cost) { |
137 | 0 | const uint8_t mask = (uint8_t)(1u << (k & 7)); |
138 | 0 | cost[k] = block_switch_cost; |
139 | 0 | BROTLI_DCHECK((k >> 3) < bitmap_len); |
140 | 0 | switch_signal[ix + (k >> 3)] |= mask; |
141 | 0 | } |
142 | 0 | } |
143 | 0 | } |
144 | |
|
145 | 0 | byte_ix = length - 1; |
146 | 0 | { /* Trace back from the last position and switch at the marked places. */ |
147 | 0 | size_t ix = byte_ix * bitmap_len; |
148 | 0 | uint8_t cur_id = block_id[byte_ix]; |
149 | 0 | while (byte_ix > 0) { |
150 | 0 | const uint8_t mask = (uint8_t)(1u << (cur_id & 7)); |
151 | 0 | BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmap_len); |
152 | 0 | --byte_ix; |
153 | 0 | ix -= bitmap_len; |
154 | 0 | if (switch_signal[ix + (cur_id >> 3)] & mask) { |
155 | 0 | if (cur_id != block_id[byte_ix]) { |
156 | 0 | cur_id = block_id[byte_ix]; |
157 | 0 | ++num_blocks; |
158 | 0 | } |
159 | 0 | } |
160 | 0 | block_id[byte_ix] = cur_id; |
161 | 0 | } |
162 | 0 | } |
163 | 0 | return num_blocks; |
164 | 0 | } Unexecuted instantiation: block_splitter.c:FindBlocksLiteral Unexecuted instantiation: block_splitter.c:FindBlocksCommand Unexecuted instantiation: block_splitter.c:FindBlocksDistance |
165 | | |
166 | | static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length, |
167 | 0 | uint16_t* new_id, const size_t num_histograms) { |
168 | 0 | static const uint16_t kInvalidId = 256; |
169 | 0 | uint16_t next_id = 0; |
170 | 0 | size_t i; |
171 | 0 | for (i = 0; i < num_histograms; ++i) { |
172 | 0 | new_id[i] = kInvalidId; |
173 | 0 | } |
174 | 0 | for (i = 0; i < length; ++i) { |
175 | 0 | BROTLI_DCHECK(block_ids[i] < num_histograms); |
176 | 0 | if (new_id[block_ids[i]] == kInvalidId) { |
177 | 0 | new_id[block_ids[i]] = next_id++; |
178 | 0 | } |
179 | 0 | } |
180 | 0 | for (i = 0; i < length; ++i) { |
181 | 0 | block_ids[i] = (uint8_t)new_id[block_ids[i]]; |
182 | 0 | BROTLI_DCHECK(block_ids[i] < num_histograms); |
183 | 0 | } |
184 | 0 | BROTLI_DCHECK(next_id <= num_histograms); |
185 | 0 | return next_id; |
186 | 0 | } Unexecuted instantiation: block_splitter.c:RemapBlockIdsLiteral Unexecuted instantiation: block_splitter.c:RemapBlockIdsCommand Unexecuted instantiation: block_splitter.c:RemapBlockIdsDistance |
187 | | |
188 | | static void FN(BuildBlockHistograms)(const DataType* data, const size_t length, |
189 | | const uint8_t* block_ids, |
190 | | const size_t num_histograms, |
191 | 0 | HistogramType* histograms) { |
192 | 0 | size_t i; |
193 | 0 | FN(ClearHistograms)(histograms, num_histograms); |
194 | 0 | for (i = 0; i < length; ++i) { |
195 | 0 | FN(HistogramAdd)(&histograms[block_ids[i]], data[i]); |
196 | 0 | } |
197 | 0 | } Unexecuted instantiation: block_splitter.c:BuildBlockHistogramsLiteral Unexecuted instantiation: block_splitter.c:BuildBlockHistogramsCommand Unexecuted instantiation: block_splitter.c:BuildBlockHistogramsDistance |
198 | | |
199 | | /* Given the initial partitioning build partitioning with limited number |
200 | | * of histograms (and block types). */ |
201 | | static void FN(ClusterBlocks)(MemoryManager* m, |
202 | | const DataType* data, const size_t length, |
203 | | const size_t num_blocks, |
204 | | uint8_t* block_ids, |
205 | 0 | BlockSplit* split) { |
206 | 0 | uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks); |
207 | 0 | uint32_t* u32 = |
208 | 0 | BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH); |
209 | 0 | const size_t expected_num_clusters = CLUSTERS_PER_BATCH * |
210 | 0 | (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH; |
211 | 0 | size_t all_histograms_size = 0; |
212 | 0 | size_t all_histograms_capacity = expected_num_clusters; |
213 | 0 | HistogramType* all_histograms = |
214 | 0 | BROTLI_ALLOC(m, HistogramType, all_histograms_capacity); |
215 | 0 | size_t cluster_size_size = 0; |
216 | 0 | size_t cluster_size_capacity = expected_num_clusters; |
217 | 0 | uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity); |
218 | 0 | size_t num_clusters = 0; |
219 | 0 | HistogramType* histograms = BROTLI_ALLOC(m, HistogramType, |
220 | 0 | BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH)); |
221 | 0 | size_t max_num_pairs = |
222 | 0 | HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2; |
223 | 0 | size_t pairs_capacity = max_num_pairs + 1; |
224 | 0 | HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity); |
225 | 0 | size_t pos = 0; |
226 | 0 | uint32_t* clusters; |
227 | 0 | size_t num_final_clusters; |
228 | 0 | static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX; |
229 | 0 | uint32_t* new_index; |
230 | 0 | size_t i; |
231 | 0 | uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH; |
232 | 0 | uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH; |
233 | 0 | uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH; |
234 | 0 | uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH; |
235 | 0 | uint32_t* BROTLI_RESTRICT const block_lengths = |
236 | 0 | u32 + 4 * HISTOGRAMS_PER_BATCH; |
237 | | /* TODO(eustas): move to arena? */ |
238 | 0 | HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2); |
239 | |
|
240 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) || |
241 | 0 | BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) || |
242 | 0 | BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) || |
243 | 0 | BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) { |
244 | 0 | return; |
245 | 0 | } |
246 | | |
247 | 0 | memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t)); |
248 | | |
249 | | /* Calculate block lengths (convert repeating values -> series length). */ |
250 | 0 | { |
251 | 0 | size_t block_idx = 0; |
252 | 0 | for (i = 0; i < length; ++i) { |
253 | 0 | BROTLI_DCHECK(block_idx < num_blocks); |
254 | 0 | ++block_lengths[block_idx]; |
255 | 0 | if (i + 1 == length || block_ids[i] != block_ids[i + 1]) { |
256 | 0 | ++block_idx; |
257 | 0 | } |
258 | 0 | } |
259 | 0 | BROTLI_DCHECK(block_idx == num_blocks); |
260 | 0 | } |
261 | | |
262 | | /* Pre-cluster blocks (cluster batches). */ |
263 | 0 | for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) { |
264 | 0 | const size_t num_to_combine = |
265 | 0 | BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH); |
266 | 0 | size_t num_new_clusters; |
267 | 0 | size_t j; |
268 | 0 | for (j = 0; j < num_to_combine; ++j) { |
269 | 0 | size_t k; |
270 | 0 | size_t block_length = block_lengths[i + j]; |
271 | 0 | FN(HistogramClear)(&histograms[j]); |
272 | 0 | for (k = 0; k < block_length; ++k) { |
273 | 0 | FN(HistogramAdd)(&histograms[j], data[pos++]); |
274 | 0 | } |
275 | 0 | histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]); |
276 | 0 | new_clusters[j] = (uint32_t)j; |
277 | 0 | symbols[j] = (uint32_t)j; |
278 | 0 | sizes[j] = 1; |
279 | 0 | } |
280 | 0 | num_new_clusters = FN(BrotliHistogramCombine)( |
281 | 0 | histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine, |
282 | 0 | num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs); |
283 | 0 | BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms, |
284 | 0 | all_histograms_capacity, all_histograms_size + num_new_clusters); |
285 | 0 | BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size, |
286 | 0 | cluster_size_capacity, cluster_size_size + num_new_clusters); |
287 | 0 | if (BROTLI_IS_OOM(m)) return; |
288 | 0 | for (j = 0; j < num_new_clusters; ++j) { |
289 | 0 | all_histograms[all_histograms_size++] = histograms[new_clusters[j]]; |
290 | 0 | cluster_size[cluster_size_size++] = sizes[new_clusters[j]]; |
291 | 0 | remap[new_clusters[j]] = (uint32_t)j; |
292 | 0 | } |
293 | 0 | for (j = 0; j < num_to_combine; ++j) { |
294 | 0 | histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]]; |
295 | 0 | } |
296 | 0 | num_clusters += num_new_clusters; |
297 | 0 | BROTLI_DCHECK(num_clusters == cluster_size_size); |
298 | 0 | BROTLI_DCHECK(num_clusters == all_histograms_size); |
299 | 0 | } |
300 | 0 | BROTLI_FREE(m, histograms); |
301 | | |
302 | | /* Final clustering. */ |
303 | 0 | max_num_pairs = |
304 | 0 | BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters); |
305 | 0 | if (pairs_capacity < max_num_pairs + 1) { |
306 | 0 | BROTLI_FREE(m, pairs); |
307 | 0 | pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1); |
308 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return; |
309 | 0 | } |
310 | 0 | clusters = BROTLI_ALLOC(m, uint32_t, num_clusters); |
311 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return; |
312 | 0 | for (i = 0; i < num_clusters; ++i) { |
313 | 0 | clusters[i] = (uint32_t)i; |
314 | 0 | } |
315 | 0 | num_final_clusters = FN(BrotliHistogramCombine)( |
316 | 0 | all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs, |
317 | 0 | num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES, |
318 | 0 | max_num_pairs); |
319 | 0 | BROTLI_FREE(m, pairs); |
320 | 0 | BROTLI_FREE(m, cluster_size); |
321 | | |
322 | | /* Assign blocks to final histograms. */ |
323 | 0 | new_index = BROTLI_ALLOC(m, uint32_t, num_clusters); |
324 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return; |
325 | 0 | for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex; |
326 | 0 | pos = 0; |
327 | 0 | { |
328 | 0 | uint32_t next_index = 0; |
329 | 0 | for (i = 0; i < num_blocks; ++i) { |
330 | 0 | size_t j; |
331 | 0 | uint32_t best_out; |
332 | 0 | double best_bits; |
333 | 0 | FN(HistogramClear)(tmp); |
334 | 0 | for (j = 0; j < block_lengths[i]; ++j) { |
335 | 0 | FN(HistogramAdd)(tmp, data[pos++]); |
336 | 0 | } |
337 | | /* Among equally good histograms prefer last used. */ |
338 | | /* TODO(eustas): should we give a block-switch discount here? */ |
339 | 0 | best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1]; |
340 | 0 | best_bits = FN(BrotliHistogramBitCostDistance)( |
341 | 0 | tmp, &all_histograms[best_out], tmp + 1); |
342 | 0 | for (j = 0; j < num_final_clusters; ++j) { |
343 | 0 | const double cur_bits = FN(BrotliHistogramBitCostDistance)( |
344 | 0 | tmp, &all_histograms[clusters[j]], tmp + 1); |
345 | 0 | if (cur_bits < best_bits) { |
346 | 0 | best_bits = cur_bits; |
347 | 0 | best_out = clusters[j]; |
348 | 0 | } |
349 | 0 | } |
350 | 0 | histogram_symbols[i] = best_out; |
351 | 0 | if (new_index[best_out] == kInvalidIndex) { |
352 | 0 | new_index[best_out] = next_index++; |
353 | 0 | } |
354 | 0 | } |
355 | 0 | } |
356 | 0 | BROTLI_FREE(m, tmp); |
357 | 0 | BROTLI_FREE(m, clusters); |
358 | 0 | BROTLI_FREE(m, all_histograms); |
359 | 0 | BROTLI_ENSURE_CAPACITY( |
360 | 0 | m, uint8_t, split->types, split->types_alloc_size, num_blocks); |
361 | 0 | BROTLI_ENSURE_CAPACITY( |
362 | 0 | m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks); |
363 | 0 | if (BROTLI_IS_OOM(m)) return; |
364 | | |
365 | | /* Rewrite final assignment to block-split. There might be less blocks |
366 | | * than |num_blocks| due to clustering. */ |
367 | 0 | { |
368 | 0 | uint32_t cur_length = 0; |
369 | 0 | size_t block_idx = 0; |
370 | 0 | uint8_t max_type = 0; |
371 | 0 | for (i = 0; i < num_blocks; ++i) { |
372 | 0 | cur_length += block_lengths[i]; |
373 | 0 | if (i + 1 == num_blocks || |
374 | 0 | histogram_symbols[i] != histogram_symbols[i + 1]) { |
375 | 0 | const uint8_t id = (uint8_t)new_index[histogram_symbols[i]]; |
376 | 0 | split->types[block_idx] = id; |
377 | 0 | split->lengths[block_idx] = cur_length; |
378 | 0 | max_type = BROTLI_MAX(uint8_t, max_type, id); |
379 | 0 | cur_length = 0; |
380 | 0 | ++block_idx; |
381 | 0 | } |
382 | 0 | } |
383 | 0 | split->num_blocks = block_idx; |
384 | 0 | split->num_types = (size_t)max_type + 1; |
385 | 0 | } |
386 | 0 | BROTLI_FREE(m, new_index); |
387 | 0 | BROTLI_FREE(m, u32); |
388 | 0 | BROTLI_FREE(m, histogram_symbols); |
389 | 0 | } Unexecuted instantiation: block_splitter.c:ClusterBlocksLiteral Unexecuted instantiation: block_splitter.c:ClusterBlocksCommand Unexecuted instantiation: block_splitter.c:ClusterBlocksDistance |
390 | | |
391 | | /* Create BlockSplit (partitioning) given the limits, estimates and "effort" |
392 | | * parameters. |
393 | | * |
394 | | * NB: max_histograms is often less than number of histograms allowed by format; |
395 | | * this is done intentionally, to save some "space" for context-aware |
396 | | * clustering (here entropy is estimated for context-free symbols). */ |
397 | | static void FN(SplitByteVector)(MemoryManager* m, |
398 | | const DataType* data, const size_t length, |
399 | | const size_t symbols_per_histogram, |
400 | | const size_t max_histograms, |
401 | | const size_t sampling_stride_length, |
402 | | const double block_switch_cost, |
403 | | const BrotliEncoderParams* params, |
404 | 0 | BlockSplit* split) { |
405 | 0 | const size_t data_size = FN(HistogramDataSize)(); |
406 | 0 | HistogramType* histograms; |
407 | 0 | HistogramType* tmp; |
408 | | /* Calculate number of histograms; initial estimate is one histogram per |
409 | | * specified amount of symbols; however, this value is capped. */ |
410 | 0 | size_t num_histograms = length / symbols_per_histogram + 1; |
411 | 0 | if (num_histograms > max_histograms) { |
412 | 0 | num_histograms = max_histograms; |
413 | 0 | } |
414 | | |
415 | | /* Corner case: no input. */ |
416 | 0 | if (length == 0) { |
417 | 0 | split->num_types = 1; |
418 | 0 | return; |
419 | 0 | } |
420 | | |
421 | 0 | if (length < kMinLengthForBlockSplitting) { |
422 | 0 | BROTLI_ENSURE_CAPACITY(m, uint8_t, |
423 | 0 | split->types, split->types_alloc_size, split->num_blocks + 1); |
424 | 0 | BROTLI_ENSURE_CAPACITY(m, uint32_t, |
425 | 0 | split->lengths, split->lengths_alloc_size, split->num_blocks + 1); |
426 | 0 | if (BROTLI_IS_OOM(m)) return; |
427 | 0 | split->num_types = 1; |
428 | 0 | split->types[split->num_blocks] = 0; |
429 | 0 | split->lengths[split->num_blocks] = (uint32_t)length; |
430 | 0 | split->num_blocks++; |
431 | 0 | return; |
432 | 0 | } |
433 | 0 | histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1); |
434 | 0 | tmp = histograms + num_histograms; |
435 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return; |
436 | | /* Find good entropy codes. */ |
437 | 0 | FN(InitialEntropyCodes)(data, length, |
438 | 0 | sampling_stride_length, |
439 | 0 | num_histograms, histograms); |
440 | 0 | FN(RefineEntropyCodes)(data, length, |
441 | 0 | sampling_stride_length, |
442 | 0 | num_histograms, histograms, tmp); |
443 | 0 | { |
444 | | /* Find a good path through literals with the good entropy codes. */ |
445 | 0 | uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length); |
446 | 0 | size_t num_blocks = 0; |
447 | 0 | const size_t bitmaplen = (num_histograms + 7) >> 3; |
448 | 0 | double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms); |
449 | 0 | double* cost = BROTLI_ALLOC(m, double, num_histograms); |
450 | 0 | uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen); |
451 | 0 | uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms); |
452 | 0 | const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10; |
453 | 0 | size_t i; |
454 | 0 | if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) || |
455 | 0 | BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) || |
456 | 0 | BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) { |
457 | 0 | return; |
458 | 0 | } |
459 | 0 | for (i = 0; i < iters; ++i) { |
460 | 0 | num_blocks = FN(FindBlocks)(data, length, |
461 | 0 | block_switch_cost, |
462 | 0 | num_histograms, histograms, |
463 | 0 | insert_cost, cost, switch_signal, |
464 | 0 | block_ids); |
465 | 0 | num_histograms = FN(RemapBlockIds)(block_ids, length, |
466 | 0 | new_id, num_histograms); |
467 | 0 | FN(BuildBlockHistograms)(data, length, block_ids, |
468 | 0 | num_histograms, histograms); |
469 | 0 | } |
470 | 0 | BROTLI_FREE(m, insert_cost); |
471 | 0 | BROTLI_FREE(m, cost); |
472 | 0 | BROTLI_FREE(m, switch_signal); |
473 | 0 | BROTLI_FREE(m, new_id); |
474 | 0 | BROTLI_FREE(m, histograms); |
475 | 0 | FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split); |
476 | 0 | if (BROTLI_IS_OOM(m)) return; |
477 | 0 | BROTLI_FREE(m, block_ids); |
478 | 0 | } |
479 | 0 | } Unexecuted instantiation: block_splitter.c:SplitByteVectorLiteral Unexecuted instantiation: block_splitter.c:SplitByteVectorCommand Unexecuted instantiation: block_splitter.c:SplitByteVectorDistance |
480 | | |
481 | | #undef HistogramType |