/src/brunsli/c/dec/huffman_decode.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) Google LLC 2019 |
2 | | // |
3 | | // Use of this source code is governed by an MIT-style |
4 | | // license that can be found in the LICENSE file or at |
5 | | // https://opensource.org/licenses/MIT. |
6 | | |
7 | | #include "./huffman_decode.h" |
8 | | |
9 | | #include <cstring> /* for memset */ |
10 | | #include <vector> |
11 | | |
12 | | #include "../common/constants.h" |
13 | | #include "../common/platform.h" |
14 | | #include <brunsli/types.h> |
15 | | #include "./bit_reader.h" |
16 | | #include "./huffman_table.h" |
17 | | |
18 | | namespace brunsli { |
19 | | |
20 | | static const int kCodeLengthCodes = 18; |
21 | | static const uint8_t kCodeLengthCodeOrder[kCodeLengthCodes] = { |
22 | | 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
23 | | }; |
24 | | static const uint8_t kDefaultCodeLength = 8; |
25 | | static const uint8_t kCodeLengthRepeatCode = 16; |
26 | | |
27 | | bool ReadHuffmanCodeLengths(const uint8_t* code_length_code_lengths, |
28 | | size_t num_symbols, uint8_t* code_lengths, |
29 | 606 | BrunsliBitReader* br) { |
30 | 606 | size_t symbol = 0; |
31 | 606 | uint8_t prev_code_len = kDefaultCodeLength; |
32 | 606 | size_t repeat = 0; |
33 | 606 | uint8_t repeat_code_len = 0; |
34 | 606 | const int kFullSpace = 1 << 15; |
35 | 606 | int space = kFullSpace; |
36 | 606 | HuffmanCode table[32]; |
37 | | |
38 | 606 | uint16_t counts[16] = {0}; |
39 | 11.5k | for (int i = 0; i < kCodeLengthCodes; ++i) { |
40 | 10.9k | ++counts[code_length_code_lengths[i]]; |
41 | 10.9k | } |
42 | 606 | if (!BuildHuffmanTable(table, 5, code_length_code_lengths, kCodeLengthCodes, |
43 | 606 | &counts[0])) { |
44 | 0 | return false; |
45 | 0 | } |
46 | | |
47 | 17.3k | while (symbol < num_symbols && space > 0) { |
48 | 16.8k | const HuffmanCode* p = table; |
49 | 16.8k | uint8_t code_len; |
50 | 16.8k | p += BrunsliBitReaderGet(br, 5); |
51 | 16.8k | BrunsliBitReaderDrop(br, p->bits); |
52 | 16.8k | code_len = (uint8_t)p->value; |
53 | 16.8k | if (code_len < kCodeLengthRepeatCode) { |
54 | 15.0k | repeat = 0; |
55 | 15.0k | code_lengths[symbol++] = code_len; |
56 | 15.0k | if (code_len != 0) { |
57 | 11.9k | prev_code_len = code_len; |
58 | 11.9k | space -= kFullSpace >> code_len; |
59 | 11.9k | } |
60 | 15.0k | } else { |
61 | 1.78k | uint32_t extra_bits = code_len - 14; // >= 2 |
62 | 1.78k | size_t old_repeat; |
63 | 1.78k | size_t repeat_delta; |
64 | 1.78k | uint8_t new_len = 0; |
65 | 1.78k | if (code_len == kCodeLengthRepeatCode) { |
66 | 1.26k | new_len = prev_code_len; |
67 | 1.26k | } |
68 | 1.78k | if (repeat_code_len != new_len) { |
69 | 543 | repeat = 0; |
70 | 543 | repeat_code_len = new_len; |
71 | 543 | } |
72 | 1.78k | old_repeat = repeat; |
73 | 1.78k | if (repeat > 0) { // >= 3 |
74 | 547 | repeat -= 2; |
75 | 547 | repeat <<= extra_bits; |
76 | 547 | } |
77 | 1.78k | repeat += BrunsliBitReaderRead(br, extra_bits) + 3u; |
78 | 1.78k | repeat_delta = repeat - old_repeat; |
79 | 1.78k | if (symbol + repeat_delta > num_symbols) { |
80 | 89 | return false; |
81 | 89 | } |
82 | 1.69k | memset(&code_lengths[symbol], repeat_code_len, (size_t)repeat_delta); |
83 | 1.69k | symbol += repeat_delta; |
84 | 1.69k | if (repeat_code_len != 0) { |
85 | 1.22k | space -= static_cast<int>(repeat_delta * kFullSpace) >> repeat_code_len; |
86 | 1.22k | } |
87 | 1.69k | } |
88 | 16.8k | } |
89 | 517 | if (space != 0) { |
90 | 171 | return false; |
91 | 171 | } |
92 | 346 | memset(&code_lengths[symbol], 0, (size_t)(num_symbols - symbol)); |
93 | 346 | return BrunsliBitReaderIsHealthy(br); |
94 | 517 | } |
95 | | |
96 | | static BRUNSLI_INLINE bool ReadSimpleCode(uint16_t alphabet_size, |
97 | | BrunsliBitReader* br, |
98 | 1.30k | HuffmanCode* table) { |
99 | 1.30k | uint32_t max_bits = |
100 | 1.30k | (alphabet_size > 1u) ? Log2FloorNonZero(alphabet_size - 1u) + 1 : 0; |
101 | | |
102 | 1.30k | size_t num_symbols = BrunsliBitReaderRead(br, 2) + 1; |
103 | | |
104 | 1.30k | uint16_t symbols[4] = {0}; |
105 | 3.36k | for (size_t i = 0; i < num_symbols; ++i) { |
106 | 2.13k | uint16_t symbol = BrunsliBitReaderRead(br, max_bits); |
107 | 2.13k | if (symbol >= alphabet_size) { |
108 | 70 | return false; |
109 | 70 | } |
110 | 2.06k | symbols[i] = symbol; |
111 | 2.06k | } |
112 | | |
113 | 1.84k | for (size_t i = 0; i < num_symbols - 1; ++i) { |
114 | 1.74k | for (size_t j = i + 1; j < num_symbols; ++j) { |
115 | 1.13k | if (symbols[i] == symbols[j]) return false; |
116 | 1.13k | } |
117 | 667 | } |
118 | | |
119 | | // 4 symbols have to option to encode. |
120 | 1.17k | if (num_symbols == 4) num_symbols += BrunsliBitReaderRead(br, 1); |
121 | | |
122 | 1.17k | const auto swap_symbols = [&symbols] (size_t i, size_t j) { |
123 | 441 | uint16_t t = symbols[j]; |
124 | 441 | symbols[j] = symbols[i]; |
125 | 441 | symbols[i] = t; |
126 | 441 | }; |
127 | | |
128 | 1.17k | size_t table_size = 1; |
129 | 1.17k | switch (num_symbols) { |
130 | 887 | case 1: |
131 | 887 | table[0] = {0, symbols[0]}; |
132 | 887 | break; |
133 | 112 | case 2: |
134 | 112 | if (symbols[0] > symbols[1]) swap_symbols(0, 1); |
135 | 112 | table[0] = {1, symbols[0]}; |
136 | 112 | table[1] = {1, symbols[1]}; |
137 | 112 | table_size = 2; |
138 | 112 | break; |
139 | 74 | case 3: |
140 | 74 | if (symbols[1] > symbols[2]) swap_symbols(1, 2); |
141 | 74 | table[0] = {1, symbols[0]}; |
142 | 74 | table[2] = {1, symbols[0]}; |
143 | 74 | table[1] = {2, symbols[1]}; |
144 | 74 | table[3] = {2, symbols[2]}; |
145 | 74 | table_size = 4; |
146 | 74 | break; |
147 | 85 | case 4: { |
148 | 340 | for (size_t i = 0; i < 3; ++i) { |
149 | 765 | for (size_t j = i + 1; j < 4; ++j) { |
150 | 510 | if (symbols[i] > symbols[j]) swap_symbols(i, j); |
151 | 510 | } |
152 | 255 | } |
153 | 85 | table[0] = {2, symbols[0]}; |
154 | 85 | table[2] = {2, symbols[1]}; |
155 | 85 | table[1] = {2, symbols[2]}; |
156 | 85 | table[3] = {2, symbols[3]}; |
157 | 85 | table_size = 4; |
158 | 85 | break; |
159 | 0 | } |
160 | 19 | case 5: { |
161 | 19 | if (symbols[2] > symbols[3]) swap_symbols(2, 3); |
162 | 19 | table[0] = {1, symbols[0]}; |
163 | 19 | table[1] = {2, symbols[1]}; |
164 | 19 | table[2] = {1, symbols[0]}; |
165 | 19 | table[3] = {3, symbols[2]}; |
166 | 19 | table[4] = {1, symbols[0]}; |
167 | 19 | table[5] = {2, symbols[1]}; |
168 | 19 | table[6] = {1, symbols[0]}; |
169 | 19 | table[7] = {3, symbols[3]}; |
170 | 19 | table_size = 8; |
171 | 19 | break; |
172 | 0 | } |
173 | 0 | default: { |
174 | | // Unreachable. |
175 | 0 | return false; |
176 | 0 | } |
177 | 1.17k | } |
178 | | |
179 | 1.17k | const uint32_t goal_size = 1u << kHuffmanTableBits; |
180 | 10.1k | while (table_size != goal_size) { |
181 | 8.92k | memcpy(&table[table_size], &table[0], |
182 | 8.92k | (size_t)table_size * sizeof(table[0])); |
183 | 8.92k | table_size <<= 1; |
184 | 8.92k | } |
185 | | |
186 | 1.17k | return BrunsliBitReaderIsHealthy(br); |
187 | 1.17k | } |
188 | | |
189 | | bool HuffmanDecodingData::ReadFromBitStream( |
190 | | size_t alphabet_size, BrunsliBitReader* br, |
191 | 2.04k | Arena<HuffmanCode>* arena) { |
192 | 2.04k | Arena<HuffmanCode> local_arena; |
193 | 2.04k | if (arena == nullptr) arena = &local_arena; |
194 | | |
195 | 2.04k | if (alphabet_size > (1 << kMaxHuffmanBits)) return false; |
196 | | |
197 | 2.04k | std::vector<uint8_t> code_lengths(alphabet_size, 0); |
198 | | /* simple_code_or_skip is used as follows: |
199 | | 1 for simple code; |
200 | | 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */ |
201 | 2.04k | uint32_t simple_code_or_skip = BrunsliBitReaderRead(br, 2); |
202 | 2.04k | if (simple_code_or_skip == 1u) { |
203 | 1.30k | table_.resize(1u << kHuffmanTableBits); |
204 | 1.30k | return ReadSimpleCode(static_cast<uint16_t>(alphabet_size), br, |
205 | 1.30k | table_.data()); |
206 | 1.30k | } |
207 | | |
208 | 743 | uint8_t code_length_code_lengths[kCodeLengthCodes] = {0}; |
209 | 743 | int space = 32; |
210 | 743 | int num_codes = 0; |
211 | | /* Static Huffman code for the code length code lengths */ |
212 | 743 | static const HuffmanCode huff[16] = { |
213 | 743 | {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 1}, |
214 | 743 | {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 5}, |
215 | 743 | }; |
216 | 9.74k | for (size_t i = simple_code_or_skip; i < kCodeLengthCodes && space > 0; ++i) { |
217 | 9.00k | const int code_len_idx = kCodeLengthCodeOrder[i]; |
218 | 9.00k | const HuffmanCode* p = huff; |
219 | 9.00k | uint8_t v; |
220 | 9.00k | p += BrunsliBitReaderGet(br, 4); |
221 | 9.00k | BrunsliBitReaderDrop(br, p->bits); |
222 | 9.00k | v = (uint8_t)p->value; |
223 | 9.00k | code_length_code_lengths[code_len_idx] = v; |
224 | 9.00k | if (v != 0) { |
225 | 4.31k | space -= (32u >> v); |
226 | 4.31k | ++num_codes; |
227 | 4.31k | } |
228 | 9.00k | } |
229 | 743 | bool ok = (num_codes == 1 || space == 0) && |
230 | 743 | ReadHuffmanCodeLengths(code_length_code_lengths, alphabet_size, |
231 | 606 | &code_lengths[0], br); |
232 | | |
233 | 743 | if (!ok || !BrunsliBitReaderIsHealthy(br)) return false; |
234 | 324 | uint16_t counts[16] = {0}; |
235 | 36.5k | for (size_t i = 0; i < alphabet_size; ++i) { |
236 | 36.2k | ++counts[code_lengths[i]]; |
237 | 36.2k | } |
238 | 324 | arena->reserve(alphabet_size + 376); |
239 | 324 | uint32_t table_size = |
240 | 324 | BuildHuffmanTable(arena->data(), kHuffmanTableBits, &code_lengths[0], |
241 | 324 | alphabet_size, &counts[0]); |
242 | 324 | table_ = std::vector<HuffmanCode>(arena->data(), arena->data() + table_size); |
243 | 324 | return (table_size > 0); |
244 | 743 | } |
245 | | |
246 | | // Decodes the next Huffman coded symbol from the bit-stream. |
247 | 1.48M | uint16_t HuffmanDecodingData::ReadSymbol(BrunsliBitReader* br) const { |
248 | 1.48M | uint32_t n_bits; |
249 | 1.48M | const HuffmanCode* table = table_.data(); |
250 | 1.48M | table += BrunsliBitReaderGet(br, kHuffmanTableBits); |
251 | 1.48M | n_bits = table->bits; |
252 | 1.48M | if (n_bits > kHuffmanTableBits) { |
253 | 1.24k | BrunsliBitReaderDrop(br, kHuffmanTableBits); |
254 | 1.24k | n_bits -= kHuffmanTableBits; |
255 | 1.24k | table += table->value; |
256 | 1.24k | table += BrunsliBitReaderGet(br, n_bits); |
257 | 1.24k | } |
258 | 1.48M | BrunsliBitReaderDrop(br, table->bits); |
259 | 1.48M | return table->value; |
260 | 1.48M | } |
261 | | |
262 | | } // namespace brunsli |