/src/mupdf/thirdparty/brotli/c/enc/encoder_dict.c
Line | Count | Source |
1 | | /* Copyright 2017 Google Inc. All Rights Reserved. |
2 | | |
3 | | Distributed under MIT license. |
4 | | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 | | */ |
6 | | |
7 | | #include "encoder_dict.h" |
8 | | |
9 | | #include <stdlib.h> /* malloc, free */ |
10 | | |
11 | | #include "../common/dictionary.h" |
12 | | #include "../common/platform.h" |
13 | | #include "../common/shared_dictionary_internal.h" |
14 | | #include "../common/transform.h" |
15 | | #include "compound_dictionary.h" |
16 | | #include "dictionary_hash.h" |
17 | | #include "memory.h" |
18 | | #include "quality.h" |
19 | | #include "hash.h" |
20 | | |
21 | | #if defined(__cplusplus) || defined(c_plusplus) |
22 | | extern "C" { |
23 | | #endif |
24 | | |
25 | | #define NUM_HASH_BITS 15u |
26 | | #define NUM_HASH_BUCKETS (1u << NUM_HASH_BITS) |
27 | | |
28 | 0 | static void BrotliTrieInit(BrotliTrie* trie) { |
29 | 0 | trie->pool_capacity = 0; |
30 | 0 | trie->pool_size = 0; |
31 | 0 | trie->pool = 0; |
32 | | |
33 | | /* Set up the root node */ |
34 | 0 | trie->root.single = 0; |
35 | 0 | trie->root.len_ = 0; |
36 | 0 | trie->root.idx_ = 0; |
37 | 0 | trie->root.sub = 0; |
38 | 0 | } |
39 | | |
40 | 0 | static void BrotliTrieFree(MemoryManager* m, BrotliTrie* trie) { |
41 | 0 | BrotliFree(m, trie->pool); |
42 | 0 | } |
43 | | |
44 | | /* Initializes to RFC 7932 static dictionary / transforms. */ |
45 | 0 | static void InitEncoderDictionary(BrotliEncoderDictionary* dict) { |
46 | 0 | dict->words = BrotliGetDictionary(); |
47 | 0 | dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms; |
48 | |
|
49 | 0 | dict->hash_table_words = kStaticDictionaryHashWords; |
50 | 0 | dict->hash_table_lengths = kStaticDictionaryHashLengths; |
51 | 0 | dict->buckets = kStaticDictionaryBuckets; |
52 | 0 | dict->dict_words = kStaticDictionaryWords; |
53 | |
|
54 | 0 | dict->cutoffTransformsCount = kCutoffTransformsCount; |
55 | 0 | dict->cutoffTransforms = kCutoffTransforms; |
56 | |
|
57 | 0 | dict->parent = 0; |
58 | |
|
59 | 0 | dict->hash_table_data_words_ = 0; |
60 | 0 | dict->hash_table_data_lengths_ = 0; |
61 | 0 | dict->buckets_alloc_size_ = 0; |
62 | 0 | dict->buckets_data_ = 0; |
63 | 0 | dict->dict_words_alloc_size_ = 0; |
64 | 0 | dict->dict_words_data_ = 0; |
65 | 0 | dict->words_instance_ = 0; |
66 | 0 | dict->has_words_heavy = BROTLI_FALSE; |
67 | 0 | BrotliTrieInit(&dict->trie); |
68 | 0 | } |
69 | | |
70 | | static void BrotliDestroyEncoderDictionary(MemoryManager* m, |
71 | 0 | BrotliEncoderDictionary* dict) { |
72 | 0 | BrotliFree(m, dict->hash_table_data_words_); |
73 | 0 | BrotliFree(m, dict->hash_table_data_lengths_); |
74 | 0 | BrotliFree(m, dict->buckets_data_); |
75 | 0 | BrotliFree(m, dict->dict_words_data_); |
76 | 0 | BrotliFree(m, dict->words_instance_); |
77 | 0 | BrotliTrieFree(m, &dict->trie); |
78 | 0 | } |
79 | | |
80 | | #if defined(BROTLI_EXPERIMENTAL) |
81 | | /* Word length must be at least 4 bytes */ |
82 | | static uint32_t Hash(const uint8_t* data, int bits) { |
83 | | uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; |
84 | | /* The higher bits contain more mixture from the multiplication, |
85 | | so we take our results from there. */ |
86 | | return h >> (32 - bits); |
87 | | } |
88 | | |
89 | | /* Theoretical max possible word size after transform */ |
90 | | #define kTransformedBufferSize \ |
91 | | (256 + 256 + SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH) |
92 | | |
93 | | /* To be safe buffer must have at least kTransformedBufferSize */ |
94 | | static void TransformedDictionaryWord(uint32_t word_idx, int len, int transform, |
95 | | const BrotliTransforms* transforms, |
96 | | const BrotliEncoderDictionary* dict, |
97 | | uint8_t* buffer, size_t* size) { |
98 | | const uint8_t* dict_word = &dict->words->data[ |
99 | | dict->words->offsets_by_length[len] + (uint32_t)len * word_idx]; |
100 | | *size = (size_t)BrotliTransformDictionaryWord(buffer, dict_word, len, |
101 | | transforms, transform); |
102 | | } |
103 | | |
104 | | static DictWord MakeDictWord(uint8_t len, uint8_t transform, uint16_t idx) { |
105 | | DictWord result; |
106 | | result.len = len; |
107 | | result.transform = transform; |
108 | | result.idx = idx; |
109 | | return result; |
110 | | } |
111 | | |
112 | | static uint32_t BrotliTrieAlloc(MemoryManager* m, size_t num, BrotliTrie* trie, |
113 | | BrotliTrieNode** keep) { |
114 | | uint32_t result; |
115 | | uint32_t keep_index = 0; |
116 | | if (keep && *keep != &trie->root) { |
117 | | /* Optional node to keep, since address may change after re-allocating */ |
118 | | keep_index = (uint32_t)(*keep - trie->pool); |
119 | | } |
120 | | if (trie->pool_size == 0) { |
121 | | /* Have a placeholder node in the front. We do not want the result to be 0, |
122 | | it must be at least 1, 0 represents "null pointer" */ |
123 | | trie->pool_size = 1; |
124 | | } |
125 | | BROTLI_ENSURE_CAPACITY(m, BrotliTrieNode, trie->pool, trie->pool_capacity, |
126 | | trie->pool_size + num); |
127 | | if (BROTLI_IS_OOM(m)) return 0; |
128 | | /* Init the new nodes to empty */ |
129 | | memset(trie->pool + trie->pool_size, 0, sizeof(*trie->pool) * num); |
130 | | result = (uint32_t)trie->pool_size; |
131 | | trie->pool_size += num; |
132 | | if (keep && *keep != &trie->root) { |
133 | | *keep = trie->pool + keep_index; |
134 | | } |
135 | | return result; |
136 | | } |
137 | | |
138 | | /** |
139 | | * len and idx: payload for last node |
140 | | * word, size: the string |
141 | | * index: position in the string |
142 | | */ |
143 | | static BROTLI_BOOL BrotliTrieNodeAdd(MemoryManager* m, uint8_t len, |
144 | | uint32_t idx, const uint8_t* word, size_t size, int index, |
145 | | BrotliTrieNode* node, BrotliTrie* trie) { |
146 | | BrotliTrieNode* child = 0; |
147 | | uint8_t c; |
148 | | if ((size_t)index == size) { |
149 | | if (!node->len_ || idx < node->idx_) { |
150 | | node->len_ = len; |
151 | | node->idx_ = idx; |
152 | | } |
153 | | return BROTLI_TRUE; |
154 | | } |
155 | | c = word[index]; |
156 | | if (node->single && c != node->c) { |
157 | | BrotliTrieNode old = trie->pool[node->sub]; |
158 | | uint32_t new_nodes = BrotliTrieAlloc(m, 32, trie, &node); |
159 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
160 | | node->single = 0; |
161 | | node->sub = new_nodes; |
162 | | trie->pool[node->sub + (node->c >> 4)].sub = new_nodes + 16; |
163 | | trie->pool[trie->pool[node->sub + (node->c >> 4)].sub + (node->c & 15)] = |
164 | | old; |
165 | | } |
166 | | if (!node->sub) { |
167 | | uint32_t new_node = BrotliTrieAlloc(m, 1, trie, &node); |
168 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
169 | | node->single = 1; |
170 | | node->c = c; |
171 | | node->sub = new_node; |
172 | | } |
173 | | if (node->single) { |
174 | | child = &trie->pool[node->sub]; |
175 | | } else { |
176 | | if (!trie->pool[node->sub + (c >> 4)].sub) { |
177 | | uint32_t new_nodes = BrotliTrieAlloc(m, 16, trie, &node); |
178 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
179 | | trie->pool[node->sub + (c >> 4)].sub = new_nodes; |
180 | | } |
181 | | child = &trie->pool[trie->pool[node->sub + (c >> 4)].sub + (c & 15)]; |
182 | | } |
183 | | return BrotliTrieNodeAdd(m, len, idx, word, size, index + 1, child, trie); |
184 | | } |
185 | | |
186 | | static BROTLI_BOOL BrotliTrieAdd(MemoryManager* m, uint8_t len, uint32_t idx, |
187 | | const uint8_t* word, size_t size, BrotliTrie* trie) { |
188 | | return BrotliTrieNodeAdd(m, len, idx, word, size, 0, &trie->root, trie); |
189 | | } |
190 | | |
191 | | const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie, |
192 | | const BrotliTrieNode* node, uint8_t c) { |
193 | | BrotliTrieNode* temp_node; |
194 | | if (node->single) { |
195 | | if (node->c == c) return &trie->pool[node->sub]; |
196 | | return 0; |
197 | | } |
198 | | if (!node->sub) return 0; |
199 | | temp_node = &trie->pool[node->sub + (c >> 4)]; |
200 | | if (!temp_node->sub) return 0; |
201 | | return &trie->pool[temp_node->sub + (c & 15)]; |
202 | | } |
203 | | |
204 | | static const BrotliTrieNode* BrotliTrieFind(const BrotliTrie* trie, |
205 | | const uint8_t* word, size_t size) { |
206 | | const BrotliTrieNode* node = &trie->root; |
207 | | size_t i; |
208 | | for (i = 0; i < size; i++) { |
209 | | node = BrotliTrieSub(trie, node, word[i]); |
210 | | if (!node) return 0; |
211 | | } |
212 | | return node; |
213 | | } |
214 | | |
215 | | static BROTLI_BOOL BuildDictionaryLut(MemoryManager* m, |
216 | | const BrotliTransforms* transforms, |
217 | | BrotliEncoderDictionary* dict) { |
218 | | uint32_t i; |
219 | | DictWord* dict_words; |
220 | | uint16_t* buckets; |
221 | | DictWord** words_by_hash; |
222 | | size_t* words_by_hash_size; |
223 | | size_t* words_by_hash_capacity; |
224 | | BrotliTrie dedup; |
225 | | uint8_t word[kTransformedBufferSize]; |
226 | | size_t word_size; |
227 | | size_t total = 0; |
228 | | uint8_t l; |
229 | | uint16_t idx; |
230 | | |
231 | | BrotliTrieInit(&dedup); |
232 | | |
233 | | words_by_hash = (DictWord**)BrotliAllocate(m, |
234 | | sizeof(*words_by_hash) * NUM_HASH_BUCKETS); |
235 | | words_by_hash_size = (size_t*)BrotliAllocate(m, |
236 | | sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS); |
237 | | words_by_hash_capacity = (size_t*)BrotliAllocate(m, |
238 | | sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS); |
239 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
240 | | memset(words_by_hash, 0, sizeof(*words_by_hash) * NUM_HASH_BUCKETS); |
241 | | memset(words_by_hash_size, 0, sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS); |
242 | | memset(words_by_hash_capacity, 0, |
243 | | sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS); |
244 | | |
245 | | if (transforms->num_transforms > 0) { |
246 | | for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; |
247 | | l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) { |
248 | | uint16_t n = dict->words->size_bits_by_length[l] ? |
249 | | (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u; |
250 | | for (idx = 0; idx < n; ++idx) { |
251 | | uint32_t key; |
252 | | /* First transform (usually identity) */ |
253 | | TransformedDictionaryWord(idx, l, 0, transforms, dict, word, |
254 | | &word_size); |
255 | | /* Cannot hash words smaller than 4 bytes */ |
256 | | if (word_size < 4) { |
257 | | /* Break instead of continue, all next words of this length will have |
258 | | same length after transform */ |
259 | | break; |
260 | | } |
261 | | if (!BrotliTrieAdd(m, 0, idx, word, word_size, &dedup)) { |
262 | | return BROTLI_FALSE; |
263 | | } |
264 | | key = Hash(word, NUM_HASH_BITS); |
265 | | BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], |
266 | | words_by_hash_capacity[key], words_by_hash_size[key], |
267 | | MakeDictWord(l, 0, idx)); |
268 | | ++total; |
269 | | } |
270 | | } |
271 | | } |
272 | | |
273 | | /* These LUT transforms only supported if no custom transforms. This is |
274 | | ok, we will use the heavy trie instead. */ |
275 | | if (transforms == BrotliGetTransforms()) { |
276 | | for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; |
277 | | l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) { |
278 | | uint16_t n = dict->words->size_bits_by_length[l] ? |
279 | | (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u; |
280 | | for (idx = 0; idx < n; ++idx) { |
281 | | int k; |
282 | | BROTLI_BOOL is_ascii = BROTLI_TRUE; |
283 | | size_t offset = dict->words->offsets_by_length[l] + (size_t)l * idx; |
284 | | const uint8_t* data = &dict->words->data[offset]; |
285 | | for (k = 0; k < l; ++k) { |
286 | | if (data[k] >= 128) is_ascii = BROTLI_FALSE; |
287 | | } |
288 | | if (data[0] < 128) { |
289 | | int transform = 9; /* {empty, uppercase first, empty} */ |
290 | | uint32_t ix = idx + (uint32_t)transform * n; |
291 | | const BrotliTrieNode* it; |
292 | | TransformedDictionaryWord(idx, l, transform, transforms, |
293 | | dict, word, &word_size); |
294 | | it = BrotliTrieFind(&dedup, word, word_size); |
295 | | if (!it || it->idx_ > ix) { |
296 | | uint32_t key = Hash(word, NUM_HASH_BITS); |
297 | | if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) { |
298 | | return BROTLI_FALSE; |
299 | | } |
300 | | BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], |
301 | | words_by_hash_capacity[key], words_by_hash_size[key], |
302 | | MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_FIRST, idx)); |
303 | | ++total; |
304 | | } |
305 | | } |
306 | | if (is_ascii) { |
307 | | int transform = 44; /* {empty, uppercase all, empty} */ |
308 | | uint32_t ix = idx + (uint32_t)transform * n; |
309 | | const BrotliTrieNode* it; |
310 | | TransformedDictionaryWord(idx, l, transform, transforms, |
311 | | dict, word, &word_size); |
312 | | it = BrotliTrieFind(&dedup, word, word_size); |
313 | | if (!it || it->idx_ > ix) { |
314 | | uint32_t key = Hash(word, NUM_HASH_BITS); |
315 | | if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) { |
316 | | return BROTLI_FALSE; |
317 | | } |
318 | | BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], |
319 | | words_by_hash_capacity[key], words_by_hash_size[key], |
320 | | MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_ALL, idx)); |
321 | | ++total; |
322 | | } |
323 | | } |
324 | | } |
325 | | } |
326 | | } |
327 | | |
328 | | dict_words = (DictWord*)BrotliAllocate(m, |
329 | | sizeof(*dict->dict_words) * (total + 1)); |
330 | | buckets = (uint16_t*)BrotliAllocate(m, |
331 | | sizeof(*dict->buckets) * NUM_HASH_BUCKETS); |
332 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
333 | | dict->dict_words_alloc_size_ = total + 1; |
334 | | dict->dict_words = dict->dict_words_data_ = dict_words; |
335 | | dict->buckets_alloc_size_ = NUM_HASH_BUCKETS; |
336 | | dict->buckets = dict->buckets_data_ = buckets; |
337 | | |
338 | | /* Unused; makes offsets start from 1. */ |
339 | | dict_words[0] = MakeDictWord(0, 0, 0); |
340 | | total = 1; |
341 | | for (i = 0; i < NUM_HASH_BUCKETS; ++i) { |
342 | | size_t num_words = words_by_hash_size[i]; |
343 | | if (num_words > 0) { |
344 | | buckets[i] = (uint16_t)(total); |
345 | | memcpy(&dict_words[total], &words_by_hash[i][0], |
346 | | sizeof(dict_words[0]) * num_words); |
347 | | total += num_words; |
348 | | dict_words[total - 1].len |= 0x80; |
349 | | } else { |
350 | | buckets[i] = 0; |
351 | | } |
352 | | } |
353 | | |
354 | | for (i = 0; i < NUM_HASH_BUCKETS; ++i) { |
355 | | BrotliFree(m, words_by_hash[i]); |
356 | | } |
357 | | BrotliFree(m, words_by_hash); |
358 | | BrotliFree(m, words_by_hash_size); |
359 | | BrotliFree(m, words_by_hash_capacity); |
360 | | BrotliTrieFree(m, &dedup); |
361 | | |
362 | | return BROTLI_TRUE; |
363 | | } |
364 | | |
365 | | static void BuildDictionaryHashTable(uint16_t* hash_table_words, |
366 | | uint8_t* hash_table_lengths, const BrotliDictionary* dict) { |
367 | | int j, len; |
368 | | /* The order of the loops is such that in case of collision, words with |
369 | | shorter length are preferred, and in case of same length, words with |
370 | | smaller index. There is only a single word per bucket. */ |
371 | | /* TODO(lode): consider adding optional user-supplied frequency_map to use |
372 | | for preferred words instead, this can make the encoder better for |
373 | | quality 9 and below without affecting the decoder */ |
374 | | memset(hash_table_words, 0, sizeof(kStaticDictionaryHashWords)); |
375 | | memset(hash_table_lengths, 0, sizeof(kStaticDictionaryHashLengths)); |
376 | | for (len = SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; |
377 | | len >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; --len) { |
378 | | const size_t num_words = dict->size_bits_by_length[len] ? |
379 | | (1u << dict->size_bits_by_length[len]) : 0; |
380 | | for (j = (int)num_words - 1; j >= 0; --j) { |
381 | | size_t offset = dict->offsets_by_length[len] + |
382 | | (size_t)len * (size_t)j; |
383 | | const uint8_t* word = &dict->data[offset]; |
384 | | const uint32_t key = Hash(word, 14); |
385 | | int idx = (int)(key << 1) + (len < 8 ? 1 : 0); |
386 | | BROTLI_DCHECK(idx < (int)NUM_HASH_BUCKETS); |
387 | | hash_table_words[idx] = (uint16_t)j; |
388 | | hash_table_lengths[idx] = (uint8_t)len; |
389 | | } |
390 | | } |
391 | | } |
392 | | |
393 | | static BROTLI_BOOL GenerateWordsHeavy(MemoryManager* m, |
394 | | const BrotliTransforms* transforms, |
395 | | BrotliEncoderDictionary* dict) { |
396 | | int i, j, l; |
397 | | for (j = (int)transforms->num_transforms - 1; j >= 0 ; --j) { |
398 | | for (l = 0; l < 32; l++) { |
399 | | int num = (int)((1u << dict->words->size_bits_by_length[l]) & ~1u); |
400 | | for (i = 0; i < num; i++) { |
401 | | uint8_t transformed[kTransformedBufferSize]; |
402 | | size_t size; |
403 | | TransformedDictionaryWord( |
404 | | (uint32_t)i, l, j, transforms, dict, transformed, &size); |
405 | | if (size < 4) continue; |
406 | | if (!BrotliTrieAdd(m, (uint8_t)l, (uint32_t)(i + num * j), |
407 | | transformed, size, &dict->trie)) { |
408 | | return BROTLI_FALSE; |
409 | | } |
410 | | } |
411 | | } |
412 | | } |
413 | | return BROTLI_TRUE; |
414 | | } |
415 | | |
416 | | /* Computes cutoffTransformsCount (in count) and cutoffTransforms (in data) for |
417 | | the custom transforms, where possible within the limits of the |
418 | | cutoffTransforms encoding. The fast encoder uses this to do fast lookup for |
419 | | transforms that remove the N last characters (OmitLast). */ |
420 | | static void ComputeCutoffTransforms( |
421 | | const BrotliTransforms* transforms, |
422 | | uint32_t* count, uint64_t* data) { |
423 | | int i; |
424 | | /* The encoding in a 64-bit integer of transform N in the data is: (N << 2) + |
425 | | ((cutoffTransforms >> (N * 6)) & 0x3F), so for example the identity |
426 | | transform code must be 0-63, for N=1 the transform code must be 4-67, ..., |
427 | | for N=9 it must be 36-99. |
428 | | TODO(lode): consider a simple flexible uint8_t[10] instead of the uint64_t |
429 | | for the cutoff transforms, so that shared dictionaries can have the |
430 | | OmitLast transforms anywhere without loss. */ |
431 | | *count = 0; |
432 | | *data = 0; |
433 | | for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) { |
434 | | int idx = transforms->cutOffTransforms[i]; |
435 | | if (idx == -1) break; /* Not found */ |
436 | | if (idx < (i << 2)) break; /* Too small for the encoding */ |
437 | | if (idx >= (i << 2) + 64) break; /* Too large for the encoding */ |
438 | | (*count)++; |
439 | | *data |= (uint64_t)(((uint64_t)idx - |
440 | | ((uint64_t)i << 2u)) << ((uint64_t)i * 6u)); |
441 | | } |
442 | | } |
443 | | |
444 | | static BROTLI_BOOL ComputeDictionary(MemoryManager* m, int quality, |
445 | | const BrotliTransforms* transforms, |
446 | | BrotliEncoderDictionary* current) { |
447 | | int default_words = current->words == BrotliGetDictionary(); |
448 | | int default_transforms = transforms == BrotliGetTransforms(); |
449 | | |
450 | | if (default_words && default_transforms) { |
451 | | /* hashes are already set to Brotli defaults */ |
452 | | return BROTLI_TRUE; |
453 | | } |
454 | | |
455 | | current->hash_table_data_words_ = (uint16_t*)BrotliAllocate( |
456 | | m, sizeof(kStaticDictionaryHashWords)); |
457 | | current->hash_table_data_lengths_ = (uint8_t*)BrotliAllocate( |
458 | | m, sizeof(kStaticDictionaryHashLengths)); |
459 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
460 | | current->hash_table_words = current->hash_table_data_words_; |
461 | | current->hash_table_lengths = current->hash_table_data_lengths_; |
462 | | |
463 | | BuildDictionaryHashTable(current->hash_table_data_words_, |
464 | | current->hash_table_data_lengths_, current->words); |
465 | | |
466 | | ComputeCutoffTransforms(transforms, |
467 | | ¤t->cutoffTransformsCount, ¤t->cutoffTransforms); |
468 | | |
469 | | /* Only compute the data for slow encoder if the requested quality is high |
470 | | enough to need it */ |
471 | | if (quality >= ZOPFLIFICATION_QUALITY) { |
472 | | if (!BuildDictionaryLut(m, transforms, current)) return BROTLI_FALSE; |
473 | | |
474 | | /* For the built-in Brotli transforms, there is a hard-coded function to |
475 | | handle all transforms, but for custom transforms, we use the following |
476 | | large hammer instead */ |
477 | | current->has_words_heavy = !default_transforms; |
478 | | if (current->has_words_heavy) { |
479 | | if (!GenerateWordsHeavy(m, transforms, current)) return BROTLI_FALSE; |
480 | | } |
481 | | } |
482 | | |
483 | | return BROTLI_TRUE; |
484 | | } |
485 | | #endif /* BROTLI_EXPERIMENTAL */ |
486 | | |
487 | 0 | void BrotliInitSharedEncoderDictionary(SharedEncoderDictionary* dict) { |
488 | 0 | dict->magic = kSharedDictionaryMagic; |
489 | |
|
490 | 0 | dict->compound.num_chunks = 0; |
491 | 0 | dict->compound.total_size = 0; |
492 | 0 | dict->compound.chunk_offsets[0] = 0; |
493 | 0 | dict->compound.num_prepared_instances_ = 0; |
494 | |
|
495 | 0 | dict->contextual.context_based = 0; |
496 | 0 | dict->contextual.num_dictionaries = 1; |
497 | 0 | dict->contextual.instances_ = 0; |
498 | 0 | dict->contextual.num_instances_ = 1; /* The instance_ field */ |
499 | 0 | dict->contextual.dict[0] = &dict->contextual.instance_; |
500 | 0 | InitEncoderDictionary(&dict->contextual.instance_); |
501 | 0 | dict->contextual.instance_.parent = &dict->contextual; |
502 | |
|
503 | 0 | dict->max_quality = BROTLI_MAX_QUALITY; |
504 | 0 | } |
505 | | |
506 | | #if defined(BROTLI_EXPERIMENTAL) |
507 | | /* TODO(eustas): make sure that tooling will warn user if not all the cutoff |
508 | | transforms are available (for low-quality encoder). */ |
509 | | static BROTLI_BOOL InitCustomSharedEncoderDictionary( |
510 | | MemoryManager* m, const BrotliSharedDictionary* decoded_dict, |
511 | | int quality, SharedEncoderDictionary* dict) { |
512 | | ContextualEncoderDictionary* contextual; |
513 | | CompoundDictionary* compound; |
514 | | BrotliEncoderDictionary* instances; |
515 | | int i; |
516 | | BrotliInitSharedEncoderDictionary(dict); |
517 | | |
518 | | contextual = &dict->contextual; |
519 | | compound = &dict->compound; |
520 | | |
521 | | for (i = 0; i < (int)decoded_dict->num_prefix; i++) { |
522 | | PreparedDictionary* prepared = CreatePreparedDictionary(m, |
523 | | decoded_dict->prefix[i], decoded_dict->prefix_size[i]); |
524 | | AttachPreparedDictionary(compound, prepared); |
525 | | /* remember for cleanup */ |
526 | | compound->prepared_instances_[ |
527 | | compound->num_prepared_instances_++] = prepared; |
528 | | } |
529 | | |
530 | | dict->max_quality = quality; |
531 | | contextual->context_based = decoded_dict->context_based; |
532 | | if (decoded_dict->context_based) { |
533 | | memcpy(contextual->context_map, decoded_dict->context_map, |
534 | | SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS); |
535 | | } |
536 | | |
537 | | contextual->num_dictionaries = decoded_dict->num_dictionaries; |
538 | | contextual->num_instances_ = decoded_dict->num_dictionaries; |
539 | | if (contextual->num_instances_ == 1) { |
540 | | instances = &contextual->instance_; |
541 | | } else { |
542 | | contextual->instances_ = (BrotliEncoderDictionary*) |
543 | | BrotliAllocate(m, sizeof(*contextual->instances_) * |
544 | | contextual->num_instances_); |
545 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
546 | | instances = contextual->instances_; |
547 | | } |
548 | | for (i = 0; i < (int)contextual->num_instances_; i++) { |
549 | | BrotliEncoderDictionary* current = &instances[i]; |
550 | | InitEncoderDictionary(current); |
551 | | current->parent = &dict->contextual; |
552 | | if (decoded_dict->words[i] == BrotliGetDictionary()) { |
553 | | current->words = BrotliGetDictionary(); |
554 | | } else { |
555 | | current->words_instance_ = (BrotliDictionary*)BrotliAllocate( |
556 | | m, sizeof(BrotliDictionary)); |
557 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
558 | | *current->words_instance_ = *decoded_dict->words[i]; |
559 | | current->words = current->words_instance_; |
560 | | } |
561 | | current->num_transforms = |
562 | | (uint32_t)decoded_dict->transforms[i]->num_transforms; |
563 | | if (!ComputeDictionary( |
564 | | m, quality, decoded_dict->transforms[i], current)) { |
565 | | return BROTLI_FALSE; |
566 | | } |
567 | | |
568 | | contextual->dict[i] = current; |
569 | | } |
570 | | |
571 | | return BROTLI_TRUE; /* success */ |
572 | | } |
573 | | |
574 | | BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary( |
575 | | MemoryManager* m, const uint8_t* encoded_dict, size_t size, |
576 | | int quality, SharedEncoderDictionary* dict) { |
577 | | BROTLI_BOOL success = BROTLI_FALSE; |
578 | | BrotliSharedDictionary* decoded_dict = BrotliSharedDictionaryCreateInstance( |
579 | | m->alloc_func, m->free_func, m->opaque); |
580 | | if (!decoded_dict) { /* OOM */ |
581 | | return BROTLI_FALSE; |
582 | | } |
583 | | success = BrotliSharedDictionaryAttach( |
584 | | decoded_dict, BROTLI_SHARED_DICTIONARY_SERIALIZED, size, encoded_dict); |
585 | | if (success) { |
586 | | success = InitCustomSharedEncoderDictionary(m, |
587 | | decoded_dict, quality, dict); |
588 | | } |
589 | | BrotliSharedDictionaryDestroyInstance(decoded_dict); |
590 | | return success; |
591 | | } |
592 | | #endif /* BROTLI_EXPERIMENTAL */ |
593 | | |
594 | | void BrotliCleanupSharedEncoderDictionary(MemoryManager* m, |
595 | 0 | SharedEncoderDictionary* dict) { |
596 | 0 | size_t i; |
597 | 0 | for (i = 0; i < dict->compound.num_prepared_instances_; i++) { |
598 | 0 | DestroyPreparedDictionary(m, |
599 | 0 | (PreparedDictionary*)dict->compound.prepared_instances_[i]); |
600 | 0 | } |
601 | 0 | if (dict->contextual.num_instances_ == 1) { |
602 | 0 | BrotliDestroyEncoderDictionary(m, &dict->contextual.instance_); |
603 | 0 | } else if (dict->contextual.num_instances_ > 1) { |
604 | 0 | for (i = 0; i < dict->contextual.num_instances_; i++) { |
605 | 0 | BrotliDestroyEncoderDictionary(m, &dict->contextual.instances_[i]); |
606 | 0 | } |
607 | 0 | BrotliFree(m, dict->contextual.instances_); |
608 | 0 | } |
609 | 0 | } |
610 | | |
611 | | ManagedDictionary* BrotliCreateManagedDictionary( |
612 | 0 | brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { |
613 | 0 | ManagedDictionary* result = (ManagedDictionary*)BrotliBootstrapAlloc( |
614 | 0 | sizeof(ManagedDictionary), alloc_func, free_func, opaque); |
615 | 0 | if (result == NULL) return NULL; |
616 | | |
617 | 0 | result->magic = kManagedDictionaryMagic; |
618 | 0 | BrotliInitMemoryManager( |
619 | 0 | &result->memory_manager_, alloc_func, free_func, opaque); |
620 | 0 | result->dictionary = NULL; |
621 | |
|
622 | 0 | return result; |
623 | 0 | } |
624 | | |
625 | 0 | void BrotliDestroyManagedDictionary(ManagedDictionary* dictionary) { |
626 | 0 | if (!dictionary) return; |
627 | 0 | BrotliBootstrapFree(dictionary, &dictionary->memory_manager_); |
628 | 0 | } |
629 | | |
630 | | /* Escalate internal functions visibility; for testing purposes only. */ |
631 | | #if defined(BROTLI_TEST) |
632 | | void InitEncoderDictionaryForTest(BrotliEncoderDictionary*); |
633 | | void InitEncoderDictionaryForTest(BrotliEncoderDictionary* d) { |
634 | | InitEncoderDictionary(d); |
635 | | } |
636 | | #endif |
637 | | |
638 | | #if defined(__cplusplus) || defined(c_plusplus) |
639 | | } /* extern "C" */ |
640 | | #endif |