/src/ghostpdl/brotli/c/enc/encoder_dict.c
Line | Count | Source |
1 | | /* Copyright 2017 Google Inc. All Rights Reserved. |
2 | | |
3 | | Distributed under MIT license. |
4 | | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 | | */ |
6 | | |
7 | | #include "encoder_dict.h" |
8 | | |
9 | | #include "../common/dictionary.h" |
10 | | #include "../common/platform.h" |
11 | | #include <brotli/shared_dictionary.h> |
12 | | #include "../common/shared_dictionary_internal.h" |
13 | | #include "../common/transform.h" |
14 | | #include <brotli/encode.h> |
15 | | #include "compound_dictionary.h" |
16 | | #include "dictionary_hash.h" |
17 | | #include "hash_base.h" |
18 | | #include "hash.h" |
19 | | #include "memory.h" |
20 | | #include "quality.h" |
21 | | #include "static_dict_lut.h" |
22 | | |
23 | | #if defined(__cplusplus) || defined(c_plusplus) |
24 | | extern "C" { |
25 | | #endif |
26 | | |
27 | | #define NUM_HASH_BITS 15u |
28 | | #define NUM_HASH_BUCKETS (1u << NUM_HASH_BITS) |
29 | | |
30 | 0 | static void BrotliTrieInit(BrotliTrie* trie) { |
31 | 0 | trie->pool_capacity = 0; |
32 | 0 | trie->pool_size = 0; |
33 | 0 | trie->pool = 0; |
34 | | |
35 | | /* Set up the root node */ |
36 | 0 | trie->root.single = 0; |
37 | 0 | trie->root.len_ = 0; |
38 | 0 | trie->root.idx_ = 0; |
39 | 0 | trie->root.sub = 0; |
40 | 0 | } |
41 | | |
42 | 0 | static void BrotliTrieFree(MemoryManager* m, BrotliTrie* trie) { |
43 | 0 | BrotliFree(m, trie->pool); |
44 | 0 | } |
45 | | |
46 | | /* Initializes to RFC 7932 static dictionary / transforms. */ |
47 | 0 | static void InitEncoderDictionary(BrotliEncoderDictionary* dict) { |
48 | 0 | dict->words = BrotliGetDictionary(); |
49 | 0 | dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms; |
50 | |
|
51 | 0 | dict->hash_table_words = kStaticDictionaryHashWords; |
52 | 0 | dict->hash_table_lengths = kStaticDictionaryHashLengths; |
53 | 0 | dict->buckets = kStaticDictionaryBuckets; |
54 | 0 | dict->dict_words = kStaticDictionaryWords; |
55 | |
|
56 | 0 | dict->cutoffTransformsCount = kCutoffTransformsCount; |
57 | 0 | dict->cutoffTransforms = kCutoffTransforms; |
58 | |
|
59 | 0 | dict->parent = 0; |
60 | |
|
61 | 0 | dict->hash_table_data_words_ = 0; |
62 | 0 | dict->hash_table_data_lengths_ = 0; |
63 | 0 | dict->buckets_alloc_size_ = 0; |
64 | 0 | dict->buckets_data_ = 0; |
65 | 0 | dict->dict_words_alloc_size_ = 0; |
66 | 0 | dict->dict_words_data_ = 0; |
67 | 0 | dict->words_instance_ = 0; |
68 | 0 | dict->has_words_heavy = BROTLI_FALSE; |
69 | 0 | BrotliTrieInit(&dict->trie); |
70 | 0 | } |
71 | | |
72 | | static void BrotliDestroyEncoderDictionary(MemoryManager* m, |
73 | 0 | BrotliEncoderDictionary* dict) { |
74 | 0 | BrotliFree(m, dict->hash_table_data_words_); |
75 | 0 | BrotliFree(m, dict->hash_table_data_lengths_); |
76 | 0 | BrotliFree(m, dict->buckets_data_); |
77 | 0 | BrotliFree(m, dict->dict_words_data_); |
78 | 0 | BrotliFree(m, dict->words_instance_); |
79 | 0 | BrotliTrieFree(m, &dict->trie); |
80 | 0 | } |
81 | | |
82 | | #if defined(BROTLI_EXPERIMENTAL) |
83 | | /* Word length must be at least 4 bytes */ |
84 | | static uint32_t Hash(const uint8_t* data, int bits) { |
85 | | uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; |
86 | | /* The higher bits contain more mixture from the multiplication, |
87 | | so we take our results from there. */ |
88 | | return h >> (32 - bits); |
89 | | } |
90 | | |
91 | | /* Theoretical max possible word size after transform */ |
92 | | #define kTransformedBufferSize \ |
93 | | (256 + 256 + SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH) |
94 | | |
95 | | /* To be safe buffer must have at least kTransformedBufferSize */ |
96 | | static void TransformedDictionaryWord(uint32_t word_idx, int len, int transform, |
97 | | const BrotliTransforms* transforms, |
98 | | const BrotliEncoderDictionary* dict, |
99 | | uint8_t* buffer, size_t* size) { |
100 | | const uint8_t* dict_word = &dict->words->data[ |
101 | | dict->words->offsets_by_length[len] + (uint32_t)len * word_idx]; |
102 | | *size = (size_t)BrotliTransformDictionaryWord(buffer, dict_word, len, |
103 | | transforms, transform); |
104 | | } |
105 | | |
106 | | static DictWord MakeDictWord(uint8_t len, uint8_t transform, uint16_t idx) { |
107 | | DictWord result; |
108 | | result.len = len; |
109 | | result.transform = transform; |
110 | | result.idx = idx; |
111 | | return result; |
112 | | } |
113 | | |
114 | | static uint32_t BrotliTrieAlloc(MemoryManager* m, size_t num, BrotliTrie* trie, |
115 | | BrotliTrieNode** keep) { |
116 | | uint32_t result; |
117 | | uint32_t keep_index = 0; |
118 | | if (keep && *keep != &trie->root) { |
119 | | /* Optional node to keep, since address may change after re-allocating */ |
120 | | keep_index = (uint32_t)(*keep - trie->pool); |
121 | | } |
122 | | if (trie->pool_size == 0) { |
123 | | /* Have a placeholder node in the front. We do not want the result to be 0, |
124 | | it must be at least 1, 0 represents "null pointer" */ |
125 | | trie->pool_size = 1; |
126 | | } |
127 | | BROTLI_ENSURE_CAPACITY(m, BrotliTrieNode, trie->pool, trie->pool_capacity, |
128 | | trie->pool_size + num); |
129 | | if (BROTLI_IS_OOM(m)) return 0; |
130 | | /* Init the new nodes to empty */ |
131 | | memset(trie->pool + trie->pool_size, 0, sizeof(*trie->pool) * num); |
132 | | result = (uint32_t)trie->pool_size; |
133 | | trie->pool_size += num; |
134 | | if (keep && *keep != &trie->root) { |
135 | | *keep = trie->pool + keep_index; |
136 | | } |
137 | | return result; |
138 | | } |
139 | | |
140 | | /** |
141 | | * len and idx: payload for last node |
142 | | * word, size: the string |
143 | | * index: position in the string |
144 | | */ |
145 | | static BROTLI_BOOL BrotliTrieNodeAdd(MemoryManager* m, uint8_t len, |
146 | | uint32_t idx, const uint8_t* word, size_t size, int index, |
147 | | BrotliTrieNode* node, BrotliTrie* trie) { |
148 | | BrotliTrieNode* child = 0; |
149 | | uint8_t c; |
150 | | if ((size_t)index == size) { |
151 | | if (!node->len_ || idx < node->idx_) { |
152 | | node->len_ = len; |
153 | | node->idx_ = idx; |
154 | | } |
155 | | return BROTLI_TRUE; |
156 | | } |
157 | | c = word[index]; |
158 | | if (node->single && c != node->c) { |
159 | | BrotliTrieNode old = trie->pool[node->sub]; |
160 | | uint32_t new_nodes = BrotliTrieAlloc(m, 32, trie, &node); |
161 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
162 | | node->single = 0; |
163 | | node->sub = new_nodes; |
164 | | trie->pool[node->sub + (node->c >> 4)].sub = new_nodes + 16; |
165 | | trie->pool[trie->pool[node->sub + (node->c >> 4)].sub + (node->c & 15)] = |
166 | | old; |
167 | | } |
168 | | if (!node->sub) { |
169 | | uint32_t new_node = BrotliTrieAlloc(m, 1, trie, &node); |
170 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
171 | | node->single = 1; |
172 | | node->c = c; |
173 | | node->sub = new_node; |
174 | | } |
175 | | if (node->single) { |
176 | | child = &trie->pool[node->sub]; |
177 | | } else { |
178 | | if (!trie->pool[node->sub + (c >> 4)].sub) { |
179 | | uint32_t new_nodes = BrotliTrieAlloc(m, 16, trie, &node); |
180 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
181 | | trie->pool[node->sub + (c >> 4)].sub = new_nodes; |
182 | | } |
183 | | child = &trie->pool[trie->pool[node->sub + (c >> 4)].sub + (c & 15)]; |
184 | | } |
185 | | return BrotliTrieNodeAdd(m, len, idx, word, size, index + 1, child, trie); |
186 | | } |
187 | | |
188 | | static BROTLI_BOOL BrotliTrieAdd(MemoryManager* m, uint8_t len, uint32_t idx, |
189 | | const uint8_t* word, size_t size, BrotliTrie* trie) { |
190 | | return BrotliTrieNodeAdd(m, len, idx, word, size, 0, &trie->root, trie); |
191 | | } |
192 | | |
193 | | const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie, |
194 | | const BrotliTrieNode* node, uint8_t c) { |
195 | | BrotliTrieNode* temp_node; |
196 | | if (node->single) { |
197 | | if (node->c == c) return &trie->pool[node->sub]; |
198 | | return 0; |
199 | | } |
200 | | if (!node->sub) return 0; |
201 | | temp_node = &trie->pool[node->sub + (c >> 4)]; |
202 | | if (!temp_node->sub) return 0; |
203 | | return &trie->pool[temp_node->sub + (c & 15)]; |
204 | | } |
205 | | |
206 | | static const BrotliTrieNode* BrotliTrieFind(const BrotliTrie* trie, |
207 | | const uint8_t* word, size_t size) { |
208 | | const BrotliTrieNode* node = &trie->root; |
209 | | size_t i; |
210 | | for (i = 0; i < size; i++) { |
211 | | node = BrotliTrieSub(trie, node, word[i]); |
212 | | if (!node) return 0; |
213 | | } |
214 | | return node; |
215 | | } |
216 | | |
217 | | static BROTLI_BOOL BuildDictionaryLut(MemoryManager* m, |
218 | | const BrotliTransforms* transforms, |
219 | | BrotliEncoderDictionary* dict) { |
220 | | uint32_t i; |
221 | | DictWord* dict_words; |
222 | | uint16_t* buckets; |
223 | | DictWord** words_by_hash; |
224 | | size_t* words_by_hash_size; |
225 | | size_t* words_by_hash_capacity; |
226 | | BrotliTrie dedup; |
227 | | uint8_t word[kTransformedBufferSize]; |
228 | | size_t word_size; |
229 | | size_t total = 0; |
230 | | uint8_t l; |
231 | | uint16_t idx; |
232 | | |
233 | | BrotliTrieInit(&dedup); |
234 | | |
235 | | words_by_hash = (DictWord**)BrotliAllocate(m, |
236 | | sizeof(*words_by_hash) * NUM_HASH_BUCKETS); |
237 | | words_by_hash_size = (size_t*)BrotliAllocate(m, |
238 | | sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS); |
239 | | words_by_hash_capacity = (size_t*)BrotliAllocate(m, |
240 | | sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS); |
241 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
242 | | memset(words_by_hash, 0, sizeof(*words_by_hash) * NUM_HASH_BUCKETS); |
243 | | memset(words_by_hash_size, 0, sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS); |
244 | | memset(words_by_hash_capacity, 0, |
245 | | sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS); |
246 | | |
247 | | if (transforms->num_transforms > 0) { |
248 | | for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; |
249 | | l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) { |
250 | | uint16_t n = dict->words->size_bits_by_length[l] ? |
251 | | (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u; |
252 | | for (idx = 0; idx < n; ++idx) { |
253 | | uint32_t key; |
254 | | /* First transform (usually identity) */ |
255 | | TransformedDictionaryWord(idx, l, 0, transforms, dict, word, |
256 | | &word_size); |
257 | | /* Cannot hash words smaller than 4 bytes */ |
258 | | if (word_size < 4) { |
259 | | /* Break instead of continue, all next words of this length will have |
260 | | same length after transform */ |
261 | | break; |
262 | | } |
263 | | if (!BrotliTrieAdd(m, 0, idx, word, word_size, &dedup)) { |
264 | | return BROTLI_FALSE; |
265 | | } |
266 | | key = Hash(word, NUM_HASH_BITS); |
267 | | BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], |
268 | | words_by_hash_capacity[key], words_by_hash_size[key], |
269 | | MakeDictWord(l, 0, idx)); |
270 | | ++total; |
271 | | } |
272 | | } |
273 | | } |
274 | | |
275 | | /* These LUT transforms only supported if no custom transforms. This is |
276 | | ok, we will use the heavy trie instead. */ |
277 | | if (transforms == BrotliGetTransforms()) { |
278 | | for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; |
279 | | l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) { |
280 | | uint16_t n = dict->words->size_bits_by_length[l] ? |
281 | | (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u; |
282 | | for (idx = 0; idx < n; ++idx) { |
283 | | int k; |
284 | | BROTLI_BOOL is_ascii = BROTLI_TRUE; |
285 | | size_t offset = dict->words->offsets_by_length[l] + (size_t)l * idx; |
286 | | const uint8_t* data = &dict->words->data[offset]; |
287 | | for (k = 0; k < l; ++k) { |
288 | | if (data[k] >= 128) is_ascii = BROTLI_FALSE; |
289 | | } |
290 | | if (data[0] < 128) { |
291 | | int transform = 9; /* {empty, uppercase first, empty} */ |
292 | | uint32_t ix = idx + (uint32_t)transform * n; |
293 | | const BrotliTrieNode* it; |
294 | | TransformedDictionaryWord(idx, l, transform, transforms, |
295 | | dict, word, &word_size); |
296 | | it = BrotliTrieFind(&dedup, word, word_size); |
297 | | if (!it || it->idx_ > ix) { |
298 | | uint32_t key = Hash(word, NUM_HASH_BITS); |
299 | | if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) { |
300 | | return BROTLI_FALSE; |
301 | | } |
302 | | BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], |
303 | | words_by_hash_capacity[key], words_by_hash_size[key], |
304 | | MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_FIRST, idx)); |
305 | | ++total; |
306 | | } |
307 | | } |
308 | | if (is_ascii) { |
309 | | int transform = 44; /* {empty, uppercase all, empty} */ |
310 | | uint32_t ix = idx + (uint32_t)transform * n; |
311 | | const BrotliTrieNode* it; |
312 | | TransformedDictionaryWord(idx, l, transform, transforms, |
313 | | dict, word, &word_size); |
314 | | it = BrotliTrieFind(&dedup, word, word_size); |
315 | | if (!it || it->idx_ > ix) { |
316 | | uint32_t key = Hash(word, NUM_HASH_BITS); |
317 | | if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) { |
318 | | return BROTLI_FALSE; |
319 | | } |
320 | | BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], |
321 | | words_by_hash_capacity[key], words_by_hash_size[key], |
322 | | MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_ALL, idx)); |
323 | | ++total; |
324 | | } |
325 | | } |
326 | | } |
327 | | } |
328 | | } |
329 | | |
330 | | dict_words = (DictWord*)BrotliAllocate(m, |
331 | | sizeof(*dict->dict_words) * (total + 1)); |
332 | | buckets = (uint16_t*)BrotliAllocate(m, |
333 | | sizeof(*dict->buckets) * NUM_HASH_BUCKETS); |
334 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
335 | | dict->dict_words_alloc_size_ = total + 1; |
336 | | dict->dict_words = dict->dict_words_data_ = dict_words; |
337 | | dict->buckets_alloc_size_ = NUM_HASH_BUCKETS; |
338 | | dict->buckets = dict->buckets_data_ = buckets; |
339 | | |
340 | | /* Unused; makes offsets start from 1. */ |
341 | | dict_words[0] = MakeDictWord(0, 0, 0); |
342 | | total = 1; |
343 | | for (i = 0; i < NUM_HASH_BUCKETS; ++i) { |
344 | | size_t num_words = words_by_hash_size[i]; |
345 | | if (num_words > 0) { |
346 | | buckets[i] = (uint16_t)(total); |
347 | | memcpy(&dict_words[total], &words_by_hash[i][0], |
348 | | sizeof(dict_words[0]) * num_words); |
349 | | total += num_words; |
350 | | dict_words[total - 1].len |= 0x80; |
351 | | } else { |
352 | | buckets[i] = 0; |
353 | | } |
354 | | } |
355 | | |
356 | | for (i = 0; i < NUM_HASH_BUCKETS; ++i) { |
357 | | BrotliFree(m, words_by_hash[i]); |
358 | | } |
359 | | BrotliFree(m, words_by_hash); |
360 | | BrotliFree(m, words_by_hash_size); |
361 | | BrotliFree(m, words_by_hash_capacity); |
362 | | BrotliTrieFree(m, &dedup); |
363 | | |
364 | | return BROTLI_TRUE; |
365 | | } |
366 | | |
367 | | static void BuildDictionaryHashTable(uint16_t* hash_table_words, |
368 | | uint8_t* hash_table_lengths, const BrotliDictionary* dict) { |
369 | | int j, len; |
370 | | /* The order of the loops is such that in case of collision, words with |
371 | | shorter length are preferred, and in case of same length, words with |
372 | | smaller index. There is only a single word per bucket. */ |
373 | | /* TODO(lode): consider adding optional user-supplied frequency_map to use |
374 | | for preferred words instead, this can make the encoder better for |
375 | | quality 9 and below without affecting the decoder */ |
376 | | memset(hash_table_words, 0, sizeof(kStaticDictionaryHashWords)); |
377 | | memset(hash_table_lengths, 0, sizeof(kStaticDictionaryHashLengths)); |
378 | | for (len = SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; |
379 | | len >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; --len) { |
380 | | const size_t num_words = dict->size_bits_by_length[len] ? |
381 | | (1u << dict->size_bits_by_length[len]) : 0; |
382 | | for (j = (int)num_words - 1; j >= 0; --j) { |
383 | | size_t offset = dict->offsets_by_length[len] + |
384 | | (size_t)len * (size_t)j; |
385 | | const uint8_t* word = &dict->data[offset]; |
386 | | const uint32_t key = Hash(word, 14); |
387 | | int idx = (int)(key << 1) + (len < 8 ? 1 : 0); |
388 | | BROTLI_DCHECK(idx < (int)NUM_HASH_BUCKETS); |
389 | | hash_table_words[idx] = (uint16_t)j; |
390 | | hash_table_lengths[idx] = (uint8_t)len; |
391 | | } |
392 | | } |
393 | | } |
394 | | |
395 | | static BROTLI_BOOL GenerateWordsHeavy(MemoryManager* m, |
396 | | const BrotliTransforms* transforms, |
397 | | BrotliEncoderDictionary* dict) { |
398 | | int i, j, l; |
399 | | for (j = (int)transforms->num_transforms - 1; j >= 0 ; --j) { |
400 | | for (l = 0; l < 32; l++) { |
401 | | int num = (int)((1u << dict->words->size_bits_by_length[l]) & ~1u); |
402 | | for (i = 0; i < num; i++) { |
403 | | uint8_t transformed[kTransformedBufferSize]; |
404 | | size_t size; |
405 | | TransformedDictionaryWord( |
406 | | (uint32_t)i, l, j, transforms, dict, transformed, &size); |
407 | | if (size < 4) continue; |
408 | | if (!BrotliTrieAdd(m, (uint8_t)l, (uint32_t)(i + num * j), |
409 | | transformed, size, &dict->trie)) { |
410 | | return BROTLI_FALSE; |
411 | | } |
412 | | } |
413 | | } |
414 | | } |
415 | | return BROTLI_TRUE; |
416 | | } |
417 | | |
418 | | /* Computes cutoffTransformsCount (in count) and cutoffTransforms (in data) for |
419 | | the custom transforms, where possible within the limits of the |
420 | | cutoffTransforms encoding. The fast encoder uses this to do fast lookup for |
421 | | transforms that remove the N last characters (OmitLast). */ |
422 | | static void ComputeCutoffTransforms( |
423 | | const BrotliTransforms* transforms, |
424 | | uint32_t* count, uint64_t* data) { |
425 | | int i; |
426 | | /* The encoding in a 64-bit integer of transform N in the data is: (N << 2) + |
427 | | ((cutoffTransforms >> (N * 6)) & 0x3F), so for example the identity |
428 | | transform code must be 0-63, for N=1 the transform code must be 4-67, ..., |
429 | | for N=9 it must be 36-99. |
430 | | TODO(lode): consider a simple flexible uint8_t[10] instead of the uint64_t |
431 | | for the cutoff transforms, so that shared dictionaries can have the |
432 | | OmitLast transforms anywhere without loss. */ |
433 | | *count = 0; |
434 | | *data = 0; |
435 | | for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) { |
436 | | int idx = transforms->cutOffTransforms[i]; |
437 | | if (idx == -1) break; /* Not found */ |
438 | | if (idx < (i << 2)) break; /* Too small for the encoding */ |
439 | | if (idx >= (i << 2) + 64) break; /* Too large for the encoding */ |
440 | | (*count)++; |
441 | | *data |= (uint64_t)(((uint64_t)idx - |
442 | | ((uint64_t)i << 2u)) << ((uint64_t)i * 6u)); |
443 | | } |
444 | | } |
445 | | |
446 | | static BROTLI_BOOL ComputeDictionary(MemoryManager* m, int quality, |
447 | | const BrotliTransforms* transforms, |
448 | | BrotliEncoderDictionary* current) { |
449 | | int default_words = current->words == BrotliGetDictionary(); |
450 | | int default_transforms = transforms == BrotliGetTransforms(); |
451 | | |
452 | | if (default_words && default_transforms) { |
453 | | /* hashes are already set to Brotli defaults */ |
454 | | return BROTLI_TRUE; |
455 | | } |
456 | | |
457 | | current->hash_table_data_words_ = (uint16_t*)BrotliAllocate( |
458 | | m, sizeof(kStaticDictionaryHashWords)); |
459 | | current->hash_table_data_lengths_ = (uint8_t*)BrotliAllocate( |
460 | | m, sizeof(kStaticDictionaryHashLengths)); |
461 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
462 | | current->hash_table_words = current->hash_table_data_words_; |
463 | | current->hash_table_lengths = current->hash_table_data_lengths_; |
464 | | |
465 | | BuildDictionaryHashTable(current->hash_table_data_words_, |
466 | | current->hash_table_data_lengths_, current->words); |
467 | | |
468 | | ComputeCutoffTransforms(transforms, |
469 | | ¤t->cutoffTransformsCount, ¤t->cutoffTransforms); |
470 | | |
471 | | /* Only compute the data for slow encoder if the requested quality is high |
472 | | enough to need it */ |
473 | | if (quality >= ZOPFLIFICATION_QUALITY) { |
474 | | if (!BuildDictionaryLut(m, transforms, current)) return BROTLI_FALSE; |
475 | | |
476 | | /* For the built-in Brotli transforms, there is a hard-coded function to |
477 | | handle all transforms, but for custom transforms, we use the following |
478 | | large hammer instead */ |
479 | | current->has_words_heavy = !default_transforms; |
480 | | if (current->has_words_heavy) { |
481 | | if (!GenerateWordsHeavy(m, transforms, current)) return BROTLI_FALSE; |
482 | | } |
483 | | } |
484 | | |
485 | | return BROTLI_TRUE; |
486 | | } |
487 | | #endif /* BROTLI_EXPERIMENTAL */ |
488 | | |
489 | 0 | void BrotliInitSharedEncoderDictionary(SharedEncoderDictionary* dict) { |
490 | 0 | dict->magic = kSharedDictionaryMagic; |
491 | |
|
492 | 0 | dict->compound.num_chunks = 0; |
493 | 0 | dict->compound.total_size = 0; |
494 | 0 | dict->compound.chunk_offsets[0] = 0; |
495 | 0 | dict->compound.num_prepared_instances_ = 0; |
496 | |
|
497 | 0 | dict->contextual.context_based = 0; |
498 | 0 | dict->contextual.num_dictionaries = 1; |
499 | 0 | dict->contextual.instances_ = 0; |
500 | 0 | dict->contextual.num_instances_ = 1; /* The instance_ field */ |
501 | 0 | dict->contextual.dict[0] = &dict->contextual.instance_; |
502 | 0 | InitEncoderDictionary(&dict->contextual.instance_); |
503 | 0 | dict->contextual.instance_.parent = &dict->contextual; |
504 | |
|
505 | 0 | dict->max_quality = BROTLI_MAX_QUALITY; |
506 | 0 | } |
507 | | |
508 | | #if defined(BROTLI_EXPERIMENTAL) |
509 | | /* TODO(eustas): make sure that tooling will warn user if not all the cutoff |
510 | | transforms are available (for low-quality encoder). */ |
511 | | static BROTLI_BOOL InitCustomSharedEncoderDictionary( |
512 | | MemoryManager* m, const BrotliSharedDictionary* decoded_dict, |
513 | | int quality, SharedEncoderDictionary* dict) { |
514 | | ContextualEncoderDictionary* contextual; |
515 | | CompoundDictionary* compound; |
516 | | BrotliEncoderDictionary* instances; |
517 | | int i; |
518 | | BrotliInitSharedEncoderDictionary(dict); |
519 | | |
520 | | contextual = &dict->contextual; |
521 | | compound = &dict->compound; |
522 | | |
523 | | for (i = 0; i < (int)decoded_dict->num_prefix; i++) { |
524 | | PreparedDictionary* prepared = CreatePreparedDictionary(m, |
525 | | decoded_dict->prefix[i], decoded_dict->prefix_size[i]); |
526 | | AttachPreparedDictionary(compound, prepared); |
527 | | /* remember for cleanup */ |
528 | | compound->prepared_instances_[ |
529 | | compound->num_prepared_instances_++] = prepared; |
530 | | } |
531 | | |
532 | | dict->max_quality = quality; |
533 | | contextual->context_based = decoded_dict->context_based; |
534 | | if (decoded_dict->context_based) { |
535 | | memcpy(contextual->context_map, decoded_dict->context_map, |
536 | | SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS); |
537 | | } |
538 | | |
539 | | contextual->num_dictionaries = decoded_dict->num_dictionaries; |
540 | | contextual->num_instances_ = decoded_dict->num_dictionaries; |
541 | | if (contextual->num_instances_ == 1) { |
542 | | instances = &contextual->instance_; |
543 | | } else { |
544 | | contextual->instances_ = (BrotliEncoderDictionary*) |
545 | | BrotliAllocate(m, sizeof(*contextual->instances_) * |
546 | | contextual->num_instances_); |
547 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
548 | | instances = contextual->instances_; |
549 | | } |
550 | | for (i = 0; i < (int)contextual->num_instances_; i++) { |
551 | | BrotliEncoderDictionary* current = &instances[i]; |
552 | | InitEncoderDictionary(current); |
553 | | current->parent = &dict->contextual; |
554 | | if (decoded_dict->words[i] == BrotliGetDictionary()) { |
555 | | current->words = BrotliGetDictionary(); |
556 | | } else { |
557 | | current->words_instance_ = (BrotliDictionary*)BrotliAllocate( |
558 | | m, sizeof(BrotliDictionary)); |
559 | | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
560 | | *current->words_instance_ = *decoded_dict->words[i]; |
561 | | current->words = current->words_instance_; |
562 | | } |
563 | | current->num_transforms = |
564 | | (uint32_t)decoded_dict->transforms[i]->num_transforms; |
565 | | if (!ComputeDictionary( |
566 | | m, quality, decoded_dict->transforms[i], current)) { |
567 | | return BROTLI_FALSE; |
568 | | } |
569 | | |
570 | | contextual->dict[i] = current; |
571 | | } |
572 | | |
573 | | return BROTLI_TRUE; /* success */ |
574 | | } |
575 | | |
576 | | BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary( |
577 | | MemoryManager* m, const uint8_t* encoded_dict, size_t size, |
578 | | int quality, SharedEncoderDictionary* dict) { |
579 | | BROTLI_BOOL success = BROTLI_FALSE; |
580 | | BrotliSharedDictionary* decoded_dict = BrotliSharedDictionaryCreateInstance( |
581 | | m->alloc_func, m->free_func, m->opaque); |
582 | | if (!decoded_dict) { /* OOM */ |
583 | | return BROTLI_FALSE; |
584 | | } |
585 | | success = BrotliSharedDictionaryAttach( |
586 | | decoded_dict, BROTLI_SHARED_DICTIONARY_SERIALIZED, size, encoded_dict); |
587 | | if (success) { |
588 | | success = InitCustomSharedEncoderDictionary(m, |
589 | | decoded_dict, quality, dict); |
590 | | } |
591 | | BrotliSharedDictionaryDestroyInstance(decoded_dict); |
592 | | return success; |
593 | | } |
594 | | #endif /* BROTLI_EXPERIMENTAL */ |
595 | | |
596 | | void BrotliCleanupSharedEncoderDictionary(MemoryManager* m, |
597 | 0 | SharedEncoderDictionary* dict) { |
598 | 0 | size_t i; |
599 | 0 | for (i = 0; i < dict->compound.num_prepared_instances_; i++) { |
600 | 0 | DestroyPreparedDictionary(m, |
601 | 0 | (PreparedDictionary*)dict->compound.prepared_instances_[i]); |
602 | 0 | } |
603 | 0 | if (dict->contextual.num_instances_ == 1) { |
604 | 0 | BrotliDestroyEncoderDictionary(m, &dict->contextual.instance_); |
605 | 0 | } else if (dict->contextual.num_instances_ > 1) { |
606 | 0 | for (i = 0; i < dict->contextual.num_instances_; i++) { |
607 | 0 | BrotliDestroyEncoderDictionary(m, &dict->contextual.instances_[i]); |
608 | 0 | } |
609 | 0 | BrotliFree(m, dict->contextual.instances_); |
610 | 0 | } |
611 | 0 | } |
612 | | |
613 | | ManagedDictionary* BrotliCreateManagedDictionary( |
614 | 0 | brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { |
615 | 0 | ManagedDictionary* result = (ManagedDictionary*)BrotliBootstrapAlloc( |
616 | 0 | sizeof(ManagedDictionary), alloc_func, free_func, opaque); |
617 | 0 | if (result == NULL) return NULL; |
618 | | |
619 | 0 | result->magic = kManagedDictionaryMagic; |
620 | 0 | BrotliInitMemoryManager( |
621 | 0 | &result->memory_manager_, alloc_func, free_func, opaque); |
622 | 0 | result->dictionary = NULL; |
623 | |
|
624 | 0 | return result; |
625 | 0 | } |
626 | | |
627 | 0 | void BrotliDestroyManagedDictionary(ManagedDictionary* dictionary) { |
628 | 0 | if (!dictionary) return; |
629 | 0 | BrotliBootstrapFree(dictionary, &dictionary->memory_manager_); |
630 | 0 | } |
631 | | |
632 | | /* Escalate internal functions visibility; for testing purposes only. */ |
633 | | #if defined(BROTLI_TEST) |
634 | | void BrotliInitEncoderDictionaryForTest(BrotliEncoderDictionary*); |
635 | | void BrotliInitEncoderDictionaryForTest(BrotliEncoderDictionary* d) { |
636 | | InitEncoderDictionary(d); |
637 | | } |
638 | | #endif |
639 | | |
640 | | #if defined(__cplusplus) || defined(c_plusplus) |
641 | | } /* extern "C" */ |
642 | | #endif |