/src/ghostpdl/brotli/c/enc/encoder_dict.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright 2017 Google Inc. All Rights Reserved. |
2 | | |
3 | | Distributed under MIT license. |
4 | | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 | | */ |
6 | | |
7 | | #include "encoder_dict.h" |
8 | | |
9 | | #include <stdlib.h> /* malloc, free */ |
10 | | |
11 | | #include "../common/dictionary.h" |
12 | | #include "../common/platform.h" |
13 | | #include "../common/shared_dictionary_internal.h" |
14 | | #include "../common/transform.h" |
15 | | #include "compound_dictionary.h" |
16 | | #include "dictionary_hash.h" |
17 | | #include "memory.h" |
18 | | #include "quality.h" |
19 | | #include "hash.h" |
20 | | |
21 | | #if defined(__cplusplus) || defined(c_plusplus) |
22 | | extern "C" { |
23 | | #endif |
24 | | |
25 | 0 | #define NUM_HASH_BITS 15u |
26 | 0 | #define NUM_HASH_BUCKETS (1u << NUM_HASH_BITS) |
27 | | |
28 | 0 | static void BrotliTrieInit(BrotliTrie* trie) { |
29 | 0 | trie->pool_capacity = 0; |
30 | 0 | trie->pool_size = 0; |
31 | 0 | trie->pool = 0; |
32 | | |
33 | | /* Set up the root node */ |
34 | 0 | trie->root.single = 0; |
35 | 0 | trie->root.len_ = 0; |
36 | 0 | trie->root.idx_ = 0; |
37 | 0 | trie->root.sub = 0; |
38 | 0 | } |
39 | | |
40 | 0 | static void BrotliTrieFree(MemoryManager* m, BrotliTrie* trie) { |
41 | 0 | BrotliFree(m, trie->pool); |
42 | 0 | } |
43 | | |
44 | | /* Initializes to RFC 7932 static dictionary / transforms. */ |
45 | 0 | static void InitEncoderDictionary(BrotliEncoderDictionary* dict) { |
46 | 0 | dict->words = BrotliGetDictionary(); |
47 | 0 | dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms; |
48 | |
|
49 | 0 | dict->hash_table_words = kStaticDictionaryHashWords; |
50 | 0 | dict->hash_table_lengths = kStaticDictionaryHashLengths; |
51 | 0 | dict->buckets = kStaticDictionaryBuckets; |
52 | 0 | dict->dict_words = kStaticDictionaryWords; |
53 | |
|
54 | 0 | dict->cutoffTransformsCount = kCutoffTransformsCount; |
55 | 0 | dict->cutoffTransforms = kCutoffTransforms; |
56 | |
|
57 | 0 | dict->parent = 0; |
58 | |
|
59 | 0 | dict->hash_table_data_words_ = 0; |
60 | 0 | dict->hash_table_data_lengths_ = 0; |
61 | 0 | dict->buckets_alloc_size_ = 0; |
62 | 0 | dict->buckets_data_ = 0; |
63 | 0 | dict->dict_words_alloc_size_ = 0; |
64 | 0 | dict->dict_words_data_ = 0; |
65 | 0 | dict->words_instance_ = 0; |
66 | 0 | dict->has_words_heavy = BROTLI_FALSE; |
67 | 0 | BrotliTrieInit(&dict->trie); |
68 | 0 | } |
69 | | |
70 | | static void BrotliDestroyEncoderDictionary(MemoryManager* m, |
71 | 0 | BrotliEncoderDictionary* dict) { |
72 | 0 | BrotliFree(m, dict->hash_table_data_words_); |
73 | 0 | BrotliFree(m, dict->hash_table_data_lengths_); |
74 | 0 | BrotliFree(m, dict->buckets_data_); |
75 | 0 | BrotliFree(m, dict->dict_words_data_); |
76 | 0 | BrotliFree(m, dict->words_instance_); |
77 | 0 | BrotliTrieFree(m, &dict->trie); |
78 | 0 | } |
79 | | |
80 | | /* Word length must be at least 4 bytes */ |
81 | 0 | static uint32_t Hash(const uint8_t* data, int bits) { |
82 | 0 | uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; |
83 | | /* The higher bits contain more mixture from the multiplication, |
84 | | so we take our results from there. */ |
85 | 0 | return h >> (32 - bits); |
86 | 0 | } |
87 | | |
88 | | /* Theoretical max possible word size after transform */ |
89 | | #define kTransformedBufferSize \ |
90 | | (256 + 256 + SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH) |
91 | | |
92 | | /* To be safe buffer must have at least kTransformedBufferSize */ |
93 | | static void TransformedDictionaryWord(uint32_t word_idx, int len, int transform, |
94 | | const BrotliTransforms* transforms, |
95 | | const BrotliEncoderDictionary* dict, |
96 | 0 | uint8_t* buffer, size_t* size) { |
97 | 0 | const uint8_t* dict_word = &dict->words->data[ |
98 | 0 | dict->words->offsets_by_length[len] + (uint32_t)len * word_idx]; |
99 | 0 | *size = (size_t)BrotliTransformDictionaryWord(buffer, dict_word, len, |
100 | 0 | transforms, transform); |
101 | 0 | } |
102 | | |
103 | 0 | static DictWord MakeDictWord(uint8_t len, uint8_t transform, uint16_t idx) { |
104 | 0 | DictWord result; |
105 | 0 | result.len = len; |
106 | 0 | result.transform = transform; |
107 | 0 | result.idx = idx; |
108 | 0 | return result; |
109 | 0 | } |
110 | | |
111 | | static uint32_t BrotliTrieAlloc(MemoryManager* m, size_t num, BrotliTrie* trie, |
112 | 0 | BrotliTrieNode** keep) { |
113 | 0 | uint32_t result; |
114 | 0 | uint32_t keep_index = 0; |
115 | 0 | if (keep && *keep != &trie->root) { |
116 | | /* Optional node to keep, since address may change after re-allocating */ |
117 | 0 | keep_index = (uint32_t)(*keep - trie->pool); |
118 | 0 | } |
119 | 0 | if (trie->pool_size == 0) { |
120 | | /* Have a dummy node in the front. We do not want the result to be 0, it |
121 | | must be at least 1, 0 represents "null pointer" */ |
122 | 0 | trie->pool_size = 1; |
123 | 0 | } |
124 | 0 | BROTLI_ENSURE_CAPACITY(m, BrotliTrieNode, trie->pool, trie->pool_capacity, |
125 | 0 | trie->pool_size + num); |
126 | 0 | if (BROTLI_IS_OOM(m)) return 0; |
127 | | /* Init the new nodes to empty */ |
128 | 0 | memset(trie->pool + trie->pool_size, 0, sizeof(*trie->pool) * num); |
129 | 0 | result = (uint32_t)trie->pool_size; |
130 | 0 | trie->pool_size += num; |
131 | 0 | if (keep && *keep != &trie->root) { |
132 | 0 | *keep = trie->pool + keep_index; |
133 | 0 | } |
134 | 0 | return result; |
135 | 0 | } |
136 | | |
137 | | /** |
138 | | * len and idx: payload for last node |
139 | | * word, size: the string |
140 | | * index: position in the string |
141 | | */ |
142 | | static BROTLI_BOOL BrotliTrieNodeAdd(MemoryManager* m, uint8_t len, |
143 | | uint32_t idx, const uint8_t* word, size_t size, int index, |
144 | 0 | BrotliTrieNode* node, BrotliTrie* trie) { |
145 | 0 | BrotliTrieNode* child = 0; |
146 | 0 | uint8_t c; |
147 | 0 | if ((size_t)index == size) { |
148 | 0 | if (!node->len_ || idx < node->idx_) { |
149 | 0 | node->len_ = len; |
150 | 0 | node->idx_ = idx; |
151 | 0 | } |
152 | 0 | return BROTLI_TRUE; |
153 | 0 | } |
154 | 0 | c = word[index]; |
155 | 0 | if (node->single && c != node->c) { |
156 | 0 | BrotliTrieNode old = trie->pool[node->sub]; |
157 | 0 | uint32_t new_nodes = BrotliTrieAlloc(m, 32, trie, &node); |
158 | 0 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
159 | 0 | node->single = 0; |
160 | 0 | node->sub = new_nodes; |
161 | 0 | trie->pool[node->sub + (node->c >> 4)].sub = new_nodes + 16; |
162 | 0 | trie->pool[trie->pool[node->sub + (node->c >> 4)].sub + (node->c & 15)] = |
163 | 0 | old; |
164 | 0 | } |
165 | 0 | if (!node->sub) { |
166 | 0 | uint32_t new_node = BrotliTrieAlloc(m, 1, trie, &node); |
167 | 0 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
168 | 0 | node->single = 1; |
169 | 0 | node->c = c; |
170 | 0 | node->sub = new_node; |
171 | 0 | } |
172 | 0 | if (node->single) { |
173 | 0 | child = &trie->pool[node->sub]; |
174 | 0 | } else { |
175 | 0 | if (!trie->pool[node->sub + (c >> 4)].sub) { |
176 | 0 | uint32_t new_nodes = BrotliTrieAlloc(m, 16, trie, &node); |
177 | 0 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
178 | 0 | trie->pool[node->sub + (c >> 4)].sub = new_nodes; |
179 | 0 | } |
180 | 0 | child = &trie->pool[trie->pool[node->sub + (c >> 4)].sub + (c & 15)]; |
181 | 0 | } |
182 | 0 | return BrotliTrieNodeAdd(m, len, idx, word, size, index + 1, child, trie); |
183 | 0 | } |
184 | | |
185 | | static BROTLI_BOOL BrotliTrieAdd(MemoryManager* m, uint8_t len, uint32_t idx, |
186 | 0 | const uint8_t* word, size_t size, BrotliTrie* trie) { |
187 | 0 | return BrotliTrieNodeAdd(m, len, idx, word, size, 0, &trie->root, trie); |
188 | 0 | } |
189 | | |
190 | | const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie, |
191 | 0 | const BrotliTrieNode* node, uint8_t c) { |
192 | 0 | BrotliTrieNode* temp_node; |
193 | 0 | if (node->single) { |
194 | 0 | if (node->c == c) return &trie->pool[node->sub]; |
195 | 0 | return 0; |
196 | 0 | } |
197 | 0 | if (!node->sub) return 0; |
198 | 0 | temp_node = &trie->pool[node->sub + (c >> 4)]; |
199 | 0 | if (!temp_node->sub) return 0; |
200 | 0 | return &trie->pool[temp_node->sub + (c & 15)]; |
201 | 0 | } |
202 | | |
203 | | static const BrotliTrieNode* BrotliTrieFind(const BrotliTrie* trie, |
204 | 0 | const uint8_t* word, size_t size) { |
205 | 0 | const BrotliTrieNode* node = &trie->root; |
206 | 0 | size_t i; |
207 | 0 | for (i = 0; i < size; i++) { |
208 | 0 | node = BrotliTrieSub(trie, node, word[i]); |
209 | 0 | if (!node) return 0; |
210 | 0 | } |
211 | 0 | return node; |
212 | 0 | } |
213 | | |
214 | | static BROTLI_BOOL BuildDictionaryLut(MemoryManager* m, |
215 | | const BrotliTransforms* transforms, |
216 | 0 | BrotliEncoderDictionary* dict) { |
217 | 0 | uint32_t i; |
218 | 0 | DictWord* dict_words; |
219 | 0 | uint16_t* buckets; |
220 | 0 | DictWord** words_by_hash; |
221 | 0 | size_t* words_by_hash_size; |
222 | 0 | size_t* words_by_hash_capacity; |
223 | 0 | BrotliTrie dedup; |
224 | 0 | uint8_t word[kTransformedBufferSize]; |
225 | 0 | size_t word_size; |
226 | 0 | size_t total = 0; |
227 | 0 | uint8_t l; |
228 | 0 | uint16_t idx; |
229 | |
|
230 | 0 | BrotliTrieInit(&dedup); |
231 | |
|
232 | 0 | words_by_hash = (DictWord**)BrotliAllocate(m, |
233 | 0 | sizeof(*words_by_hash) * NUM_HASH_BUCKETS); |
234 | 0 | words_by_hash_size = (size_t*)BrotliAllocate(m, |
235 | 0 | sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS); |
236 | 0 | words_by_hash_capacity = (size_t*)BrotliAllocate(m, |
237 | 0 | sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS); |
238 | 0 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
239 | 0 | memset(words_by_hash, 0, sizeof(*words_by_hash) * NUM_HASH_BUCKETS); |
240 | 0 | memset(words_by_hash_size, 0, sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS); |
241 | 0 | memset(words_by_hash_capacity, 0, |
242 | 0 | sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS); |
243 | |
|
244 | 0 | if (transforms->num_transforms > 0) { |
245 | 0 | for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; |
246 | 0 | l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) { |
247 | 0 | uint16_t n = dict->words->size_bits_by_length[l] ? |
248 | 0 | (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u; |
249 | 0 | for (idx = 0; idx < n; ++idx) { |
250 | 0 | uint32_t key; |
251 | | /* First transform (usually identity) */ |
252 | 0 | TransformedDictionaryWord(idx, l, 0, transforms, dict, word, |
253 | 0 | &word_size); |
254 | | /* Cannot hash words smaller than 4 bytes */ |
255 | 0 | if (word_size < 4) { |
256 | | /* Break instead of continue, all next words of this length will have |
257 | | same length after transform */ |
258 | 0 | break; |
259 | 0 | } |
260 | 0 | if (!BrotliTrieAdd(m, 0, idx, word, word_size, &dedup)) { |
261 | 0 | return BROTLI_FALSE; |
262 | 0 | } |
263 | 0 | key = Hash(word, NUM_HASH_BITS); |
264 | 0 | BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], |
265 | 0 | words_by_hash_capacity[key], words_by_hash_size[key], |
266 | 0 | MakeDictWord(l, 0, idx)); |
267 | 0 | ++total; |
268 | 0 | } |
269 | 0 | } |
270 | 0 | } |
271 | | |
272 | | /* These LUT transforms only supported if no custom transforms. This is |
273 | | ok, we will use the heavy trie instead. */ |
274 | 0 | if (transforms == BrotliGetTransforms()) { |
275 | 0 | for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; |
276 | 0 | l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) { |
277 | 0 | uint16_t n = dict->words->size_bits_by_length[l] ? |
278 | 0 | (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u; |
279 | 0 | for (idx = 0; idx < n; ++idx) { |
280 | 0 | int k; |
281 | 0 | BROTLI_BOOL is_ascii = BROTLI_TRUE; |
282 | 0 | size_t offset = dict->words->offsets_by_length[l] + (size_t)l * idx; |
283 | 0 | const uint8_t* data = &dict->words->data[offset]; |
284 | 0 | for (k = 0; k < l; ++k) { |
285 | 0 | if (data[k] >= 128) is_ascii = BROTLI_FALSE; |
286 | 0 | } |
287 | 0 | if (data[0] < 128) { |
288 | 0 | int transform = 9; /* {empty, uppercase first, empty} */ |
289 | 0 | uint32_t ix = idx + (uint32_t)transform * n; |
290 | 0 | const BrotliTrieNode* it; |
291 | 0 | TransformedDictionaryWord(idx, l, transform, transforms, |
292 | 0 | dict, word, &word_size); |
293 | 0 | it = BrotliTrieFind(&dedup, word, word_size); |
294 | 0 | if (!it || it->idx_ > ix) { |
295 | 0 | uint32_t key = Hash(word, NUM_HASH_BITS); |
296 | 0 | if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) { |
297 | 0 | return BROTLI_FALSE; |
298 | 0 | } |
299 | 0 | BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], |
300 | 0 | words_by_hash_capacity[key], words_by_hash_size[key], |
301 | 0 | MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_FIRST, idx)); |
302 | 0 | ++total; |
303 | 0 | } |
304 | 0 | } |
305 | 0 | if (is_ascii) { |
306 | 0 | int transform = 44; /* {empty, uppercase all, empty} */ |
307 | 0 | uint32_t ix = idx + (uint32_t)transform * n; |
308 | 0 | const BrotliTrieNode* it; |
309 | 0 | TransformedDictionaryWord(idx, l, transform, transforms, |
310 | 0 | dict, word, &word_size); |
311 | 0 | it = BrotliTrieFind(&dedup, word, word_size); |
312 | 0 | if (!it || it->idx_ > ix) { |
313 | 0 | uint32_t key = Hash(word, NUM_HASH_BITS); |
314 | 0 | if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) { |
315 | 0 | return BROTLI_FALSE; |
316 | 0 | } |
317 | 0 | BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], |
318 | 0 | words_by_hash_capacity[key], words_by_hash_size[key], |
319 | 0 | MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_ALL, idx)); |
320 | 0 | ++total; |
321 | 0 | } |
322 | 0 | } |
323 | 0 | } |
324 | 0 | } |
325 | 0 | } |
326 | | |
327 | 0 | dict_words = (DictWord*)BrotliAllocate(m, |
328 | 0 | sizeof(*dict->dict_words) * (total + 1)); |
329 | 0 | buckets = (uint16_t*)BrotliAllocate(m, |
330 | 0 | sizeof(*dict->buckets) * NUM_HASH_BUCKETS); |
331 | 0 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
332 | 0 | dict->dict_words_alloc_size_ = total + 1; |
333 | 0 | dict->dict_words = dict->dict_words_data_ = dict_words; |
334 | 0 | dict->buckets_alloc_size_ = NUM_HASH_BUCKETS; |
335 | 0 | dict->buckets = dict->buckets_data_ = buckets; |
336 | | |
337 | | /* Unused; makes offsets start from 1. */ |
338 | 0 | dict_words[0] = MakeDictWord(0, 0, 0); |
339 | 0 | total = 1; |
340 | 0 | for (i = 0; i < NUM_HASH_BUCKETS; ++i) { |
341 | 0 | size_t num_words = words_by_hash_size[i]; |
342 | 0 | if (num_words > 0) { |
343 | 0 | buckets[i] = (uint16_t)(total); |
344 | 0 | memcpy(&dict_words[total], &words_by_hash[i][0], |
345 | 0 | sizeof(dict_words[0]) * num_words); |
346 | 0 | total += num_words; |
347 | 0 | dict_words[total - 1].len |= 0x80; |
348 | 0 | } else { |
349 | 0 | buckets[i] = 0; |
350 | 0 | } |
351 | 0 | } |
352 | |
|
353 | 0 | for (i = 0; i < NUM_HASH_BUCKETS; ++i) { |
354 | 0 | BrotliFree(m, words_by_hash[i]); |
355 | 0 | } |
356 | 0 | BrotliFree(m, words_by_hash); |
357 | 0 | BrotliFree(m, words_by_hash_size); |
358 | 0 | BrotliFree(m, words_by_hash_capacity); |
359 | 0 | BrotliTrieFree(m, &dedup); |
360 | |
|
361 | 0 | return BROTLI_TRUE; |
362 | 0 | } |
363 | | |
364 | | static void BuildDictionaryHashTable(uint16_t* hash_table_words, |
365 | 0 | uint8_t* hash_table_lengths, const BrotliDictionary* dict) { |
366 | 0 | int j, len; |
367 | | /* The order of the loops is such that in case of collision, words with |
368 | | shorter length are preferred, and in case of same length, words with |
369 | | smaller index. There is only a single word per bucket. */ |
370 | | /* TODO(lode): consider adding optional user-supplied frequency_map to use |
371 | | for preferred words instead, this can make the encoder better for |
372 | | quality 9 and below without affecting the decoder */ |
373 | 0 | memset(hash_table_words, 0, sizeof(kStaticDictionaryHashWords)); |
374 | 0 | memset(hash_table_lengths, 0, sizeof(kStaticDictionaryHashLengths)); |
375 | 0 | for (len = SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; |
376 | 0 | len >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; --len) { |
377 | 0 | const size_t num_words = dict->size_bits_by_length[len] ? |
378 | 0 | (1u << dict->size_bits_by_length[len]) : 0; |
379 | 0 | for (j = (int)num_words - 1; j >= 0; --j) { |
380 | 0 | size_t offset = dict->offsets_by_length[len] + |
381 | 0 | (size_t)len * (size_t)j; |
382 | 0 | const uint8_t* word = &dict->data[offset]; |
383 | 0 | const uint32_t key = Hash(word, 14); |
384 | 0 | int idx = (int)(key << 1) + (len < 8 ? 1 : 0); |
385 | 0 | BROTLI_DCHECK(idx < (int)NUM_HASH_BUCKETS); |
386 | 0 | hash_table_words[idx] = (uint16_t)j; |
387 | 0 | hash_table_lengths[idx] = (uint8_t)len; |
388 | 0 | } |
389 | 0 | } |
390 | 0 | } |
391 | | |
392 | | static BROTLI_BOOL GenerateWordsHeavy(MemoryManager* m, |
393 | | const BrotliTransforms* transforms, |
394 | 0 | BrotliEncoderDictionary* dict) { |
395 | 0 | int i, j, l; |
396 | 0 | for (j = (int)transforms->num_transforms - 1; j >= 0 ; --j) { |
397 | 0 | for (l = 0; l < 32; l++) { |
398 | 0 | int num = (int)((1u << dict->words->size_bits_by_length[l]) & ~1u); |
399 | 0 | for (i = 0; i < num; i++) { |
400 | 0 | uint8_t transformed[kTransformedBufferSize]; |
401 | 0 | size_t size; |
402 | 0 | TransformedDictionaryWord( |
403 | 0 | (uint32_t)i, l, j, transforms, dict, transformed, &size); |
404 | 0 | if (size < 4) continue; |
405 | 0 | if (!BrotliTrieAdd(m, (uint8_t)l, (uint32_t)(i + num * j), |
406 | 0 | transformed, size, &dict->trie)) { |
407 | 0 | return BROTLI_FALSE; |
408 | 0 | } |
409 | 0 | } |
410 | 0 | } |
411 | 0 | } |
412 | 0 | return BROTLI_TRUE; |
413 | 0 | } |
414 | | |
415 | | /* Computes cutoffTransformsCount (in count) and cutoffTransforms (in data) for |
416 | | the custom transforms, where possible within the limits of the |
417 | | cutoffTransforms encoding. The fast encoder uses this to do fast lookup for |
418 | | transforms that remove the N last characters (OmitLast). */ |
419 | | static void ComputeCutoffTransforms( |
420 | | const BrotliTransforms* transforms, |
421 | 0 | uint32_t* count, uint64_t* data) { |
422 | 0 | int i; |
423 | | /* The encoding in a 64-bit integer of transform N in the data is: (N << 2) + |
424 | | ((cutoffTransforms >> (N * 6)) & 0x3F), so for example the identity |
425 | | transform code must be 0-63, for N=1 the transform code must be 4-67, ..., |
426 | | for N=9 it must be 36-99. |
427 | | TODO(lode): consider a simple flexible uint8_t[10] instead of the uint64_t |
428 | | for the cutoff transforms, so that shared dictionaries can have the |
429 | | OmitLast transforms anywhere without loss. */ |
430 | 0 | *count = 0; |
431 | 0 | *data = 0; |
432 | 0 | for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) { |
433 | 0 | int idx = transforms->cutOffTransforms[i]; |
434 | 0 | if (idx == -1) break; /* Not found */ |
435 | 0 | if (idx < (i << 2)) break; /* Too small for the encoding */ |
436 | 0 | if (idx >= (i << 2) + 64) break; /* Too large for the encoding */ |
437 | 0 | (*count)++; |
438 | 0 | *data |= (uint64_t)(((uint64_t)idx - |
439 | 0 | ((uint64_t)i << 2u)) << ((uint64_t)i * 6u)); |
440 | 0 | } |
441 | 0 | } |
442 | | |
443 | | static BROTLI_BOOL ComputeDictionary(MemoryManager* m, int quality, |
444 | | const BrotliTransforms* transforms, |
445 | 0 | BrotliEncoderDictionary* current) { |
446 | 0 | int default_words = current->words == BrotliGetDictionary(); |
447 | 0 | int default_transforms = transforms == BrotliGetTransforms(); |
448 | |
|
449 | 0 | if (default_words && default_transforms) { |
450 | | /* hashes are already set to Brotli defaults */ |
451 | 0 | return BROTLI_TRUE; |
452 | 0 | } |
453 | | |
454 | 0 | current->hash_table_data_words_ = (uint16_t*)BrotliAllocate( |
455 | 0 | m, sizeof(kStaticDictionaryHashWords)); |
456 | 0 | current->hash_table_data_lengths_ = (uint8_t*)BrotliAllocate( |
457 | 0 | m, sizeof(kStaticDictionaryHashLengths)); |
458 | 0 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
459 | 0 | current->hash_table_words = current->hash_table_data_words_; |
460 | 0 | current->hash_table_lengths = current->hash_table_data_lengths_; |
461 | |
|
462 | 0 | BuildDictionaryHashTable(current->hash_table_data_words_, |
463 | 0 | current->hash_table_data_lengths_, current->words); |
464 | |
|
465 | 0 | ComputeCutoffTransforms(transforms, |
466 | 0 | ¤t->cutoffTransformsCount, ¤t->cutoffTransforms); |
467 | | |
468 | | /* Only compute the data for slow encoder if the requested quality is high |
469 | | enough to need it */ |
470 | 0 | if (quality >= ZOPFLIFICATION_QUALITY) { |
471 | 0 | if (!BuildDictionaryLut(m, transforms, current)) return BROTLI_FALSE; |
472 | | |
473 | | /* For the built-in Brotli transforms, there is a hard-coded function to |
474 | | handle all transforms, but for custom transforms, we use the following |
475 | | large hammer instead */ |
476 | 0 | current->has_words_heavy = !default_transforms; |
477 | 0 | if (current->has_words_heavy) { |
478 | 0 | if (!GenerateWordsHeavy(m, transforms, current)) return BROTLI_FALSE; |
479 | 0 | } |
480 | 0 | } |
481 | | |
482 | 0 | return BROTLI_TRUE; |
483 | 0 | } |
484 | | |
485 | 0 | void BrotliInitSharedEncoderDictionary(SharedEncoderDictionary* dict) { |
486 | 0 | dict->magic = kSharedDictionaryMagic; |
487 | |
|
488 | 0 | dict->compound.num_chunks = 0; |
489 | 0 | dict->compound.total_size = 0; |
490 | 0 | dict->compound.chunk_offsets[0] = 0; |
491 | 0 | dict->compound.num_prepared_instances_ = 0; |
492 | |
|
493 | 0 | dict->contextual.context_based = 0; |
494 | 0 | dict->contextual.num_dictionaries = 1; |
495 | 0 | dict->contextual.instances_ = 0; |
496 | 0 | dict->contextual.num_instances_ = 1; /* The instance_ field */ |
497 | 0 | dict->contextual.dict[0] = &dict->contextual.instance_; |
498 | 0 | InitEncoderDictionary(&dict->contextual.instance_); |
499 | 0 | dict->contextual.instance_.parent = &dict->contextual; |
500 | |
|
501 | 0 | dict->max_quality = BROTLI_MAX_QUALITY; |
502 | 0 | } |
503 | | |
504 | | /* TODO(eustas): make sure that tooling will warn user if not all the cutoff |
505 | | transforms are available (for low-quality encoder). */ |
506 | | static BROTLI_BOOL InitCustomSharedEncoderDictionary( |
507 | | MemoryManager* m, const BrotliSharedDictionary* decoded_dict, |
508 | 0 | int quality, SharedEncoderDictionary* dict) { |
509 | 0 | ContextualEncoderDictionary* contextual; |
510 | 0 | CompoundDictionary* compound; |
511 | 0 | BrotliEncoderDictionary* instances; |
512 | 0 | int i; |
513 | 0 | BrotliInitSharedEncoderDictionary(dict); |
514 | |
|
515 | 0 | contextual = &dict->contextual; |
516 | 0 | compound = &dict->compound; |
517 | |
|
518 | 0 | for (i = 0; i < (int)decoded_dict->num_prefix; i++) { |
519 | 0 | PreparedDictionary* prepared = CreatePreparedDictionary(m, |
520 | 0 | decoded_dict->prefix[i], decoded_dict->prefix_size[i]); |
521 | 0 | AttachPreparedDictionary(compound, prepared); |
522 | | /* remember for cleanup */ |
523 | 0 | compound->prepared_instances_[ |
524 | 0 | compound->num_prepared_instances_++] = prepared; |
525 | 0 | } |
526 | |
|
527 | 0 | dict->max_quality = quality; |
528 | 0 | contextual->context_based = decoded_dict->context_based; |
529 | 0 | if (decoded_dict->context_based) { |
530 | 0 | memcpy(contextual->context_map, decoded_dict->context_map, |
531 | 0 | SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS); |
532 | 0 | } |
533 | |
|
534 | 0 | contextual->num_dictionaries = decoded_dict->num_dictionaries; |
535 | 0 | contextual->num_instances_ = decoded_dict->num_dictionaries; |
536 | 0 | if (contextual->num_instances_ == 1) { |
537 | 0 | instances = &contextual->instance_; |
538 | 0 | } else { |
539 | 0 | contextual->instances_ = (BrotliEncoderDictionary*) |
540 | 0 | BrotliAllocate(m, sizeof(*contextual->instances_) * |
541 | 0 | contextual->num_instances_); |
542 | 0 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
543 | 0 | instances = contextual->instances_; |
544 | 0 | } |
545 | 0 | for (i = 0; i < (int)contextual->num_instances_; i++) { |
546 | 0 | BrotliEncoderDictionary* current = &instances[i]; |
547 | 0 | InitEncoderDictionary(current); |
548 | 0 | current->parent = &dict->contextual; |
549 | 0 | if (decoded_dict->words[i] == BrotliGetDictionary()) { |
550 | 0 | current->words = BrotliGetDictionary(); |
551 | 0 | } else { |
552 | 0 | current->words_instance_ = (BrotliDictionary*)BrotliAllocate( |
553 | 0 | m, sizeof(BrotliDictionary)); |
554 | 0 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
555 | 0 | *current->words_instance_ = *decoded_dict->words[i]; |
556 | 0 | current->words = current->words_instance_; |
557 | 0 | } |
558 | 0 | current->num_transforms = |
559 | 0 | (uint32_t)decoded_dict->transforms[i]->num_transforms; |
560 | 0 | if (!ComputeDictionary( |
561 | 0 | m, quality, decoded_dict->transforms[i], current)) { |
562 | 0 | return BROTLI_FALSE; |
563 | 0 | } |
564 | | |
565 | 0 | contextual->dict[i] = current; |
566 | 0 | } |
567 | | |
568 | 0 | return BROTLI_TRUE; /* success */ |
569 | 0 | } |
570 | | |
571 | | BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary( |
572 | | MemoryManager* m, const uint8_t* encoded_dict, size_t size, |
573 | 0 | int quality, SharedEncoderDictionary* dict) { |
574 | 0 | BROTLI_BOOL success = BROTLI_FALSE; |
575 | 0 | BrotliSharedDictionary* decoded_dict = BrotliSharedDictionaryCreateInstance( |
576 | 0 | m->alloc_func, m->free_func, m->opaque); |
577 | 0 | if (!decoded_dict) { /* OOM */ |
578 | 0 | return BROTLI_FALSE; |
579 | 0 | } |
580 | 0 | success = BrotliSharedDictionaryAttach( |
581 | 0 | decoded_dict, BROTLI_SHARED_DICTIONARY_SERIALIZED, size, encoded_dict); |
582 | 0 | if (success) { |
583 | 0 | success = InitCustomSharedEncoderDictionary(m, |
584 | 0 | decoded_dict, quality, dict); |
585 | 0 | } |
586 | 0 | BrotliSharedDictionaryDestroyInstance(decoded_dict); |
587 | 0 | return success; |
588 | 0 | } |
589 | | |
590 | | void BrotliCleanupSharedEncoderDictionary(MemoryManager* m, |
591 | 0 | SharedEncoderDictionary* dict) { |
592 | 0 | size_t i; |
593 | 0 | for (i = 0; i < dict->compound.num_prepared_instances_; i++) { |
594 | 0 | DestroyPreparedDictionary(m, |
595 | 0 | (PreparedDictionary*)dict->compound.prepared_instances_[i]); |
596 | 0 | } |
597 | 0 | if (dict->contextual.num_instances_ == 1) { |
598 | 0 | BrotliDestroyEncoderDictionary(m, &dict->contextual.instance_); |
599 | 0 | } else if (dict->contextual.num_instances_ > 1) { |
600 | 0 | for (i = 0; i < dict->contextual.num_instances_; i++) { |
601 | 0 | BrotliDestroyEncoderDictionary(m, &dict->contextual.instances_[i]); |
602 | 0 | } |
603 | 0 | BrotliFree(m, dict->contextual.instances_); |
604 | 0 | } |
605 | 0 | } |
606 | | |
607 | | ManagedDictionary* BrotliCreateManagedDictionary( |
608 | 0 | brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { |
609 | 0 | ManagedDictionary* result = (ManagedDictionary*)BrotliBootstrapAlloc( |
610 | 0 | sizeof(ManagedDictionary), alloc_func, free_func, opaque); |
611 | 0 | if (result == NULL) return NULL; |
612 | | |
613 | 0 | result->magic = kManagedDictionaryMagic; |
614 | 0 | BrotliInitMemoryManager( |
615 | 0 | &result->memory_manager_, alloc_func, free_func, opaque); |
616 | 0 | result->dictionary = NULL; |
617 | |
|
618 | 0 | return result; |
619 | 0 | } |
620 | | |
621 | 0 | void BrotliDestroyManagedDictionary(ManagedDictionary* dictionary) { |
622 | 0 | if (!dictionary) return; |
623 | 0 | BrotliBootstrapFree(dictionary, &dictionary->memory_manager_); |
624 | 0 | } |
625 | | |
626 | | /* Escalate internal functions visibility; for testing purposes only. */ |
627 | | #if defined(BROTLI_TEST) |
628 | | void InitEncoderDictionaryForTest(BrotliEncoderDictionary*); |
629 | | void InitEncoderDictionaryForTest(BrotliEncoderDictionary* d) { |
630 | | InitEncoderDictionary(d); |
631 | | } |
632 | | #endif |
633 | | |
634 | | #if defined(__cplusplus) || defined(c_plusplus) |
635 | | } /* extern "C" */ |
636 | | #endif |