/rust/registry/src/index.crates.io-6f17d22bba15001f/fdeflate-0.3.7/src/huffman.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use crate::decompress::{EXCEPTIONAL_ENTRY, LITERAL_ENTRY, SECONDARY_TABLE_ENTRY}; |
2 | | |
3 | | /// Return the next code, or if the codeword is already all ones (which is the final code), return |
4 | | /// the same code again. |
5 | 160k | fn next_codeword(mut codeword: u16, table_size: u16) -> u16 { |
6 | 160k | if codeword == table_size - 1 { |
7 | 15.1k | return codeword; |
8 | 145k | } |
9 | 145k | |
10 | 145k | let adv = (u16::BITS - 1) - (codeword ^ (table_size - 1)).leading_zeros(); |
11 | 145k | let bit = 1 << adv; |
12 | 145k | codeword &= bit - 1; |
13 | 145k | codeword |= bit; |
14 | 145k | codeword |
15 | 160k | } |
16 | | |
17 | | #[allow(clippy::needless_range_loop)] |
18 | 17.3k | pub fn build_table( |
19 | 17.3k | lengths: &[u8], |
20 | 17.3k | entries: &[u32], |
21 | 17.3k | codes: &mut [u16], |
22 | 17.3k | primary_table: &mut [u32], |
23 | 17.3k | secondary_table: &mut Vec<u16>, |
24 | 17.3k | is_distance_table: bool, |
25 | 17.3k | double_literal: bool, |
26 | 17.3k | ) -> bool { |
27 | 17.3k | // Count the number of symbols with each code length. |
28 | 17.3k | let mut histogram = [0; 16]; |
29 | 2.22M | for &length in lengths { |
30 | 2.20M | histogram[length as usize] += 1; |
31 | 2.20M | } |
32 | | |
33 | | // Determine the maximum code length. |
34 | 17.3k | let mut max_length = 15; |
35 | 213k | while max_length > 1 && histogram[max_length] == 0 { |
36 | 195k | max_length -= 1; |
37 | 195k | } |
38 | | |
39 | | // Handle zero and one symbol huffman codes (which are only allowed for distance codes). |
40 | 17.3k | if is_distance_table { |
41 | 3.30k | if max_length == 0 { |
42 | 0 | primary_table.fill(0); |
43 | 0 | secondary_table.clear(); |
44 | 0 | return true; |
45 | 3.30k | } else if max_length == 1 && histogram[1] == 1 { |
46 | 3.21k | let symbol = lengths.iter().position(|&l| l == 1).unwrap(); |
47 | 2.07k | codes[symbol] = 0; |
48 | 2.07k | let entry = entries |
49 | 2.07k | .get(symbol) |
50 | 2.07k | .cloned() |
51 | 2.07k | .unwrap_or((symbol as u32) << 16) |
52 | 2.07k | | 1; |
53 | 530k | for chunk in primary_table.chunks_mut(2) { |
54 | 530k | chunk[0] = entry; |
55 | 530k | chunk[1] = 0; |
56 | 530k | } |
57 | 2.07k | return true; |
58 | 1.23k | } |
59 | 14.0k | } |
60 | | |
61 | | // Sort symbols by code length. Given the histogram, we can determine the starting offset |
62 | | // for each code length. |
63 | 15.3k | let mut offsets = [0; 16]; |
64 | 15.3k | let mut codespace_used = 0; |
65 | 15.3k | offsets[1] = histogram[0]; |
66 | 47.5k | for i in 1..max_length { |
67 | 47.5k | offsets[i + 1] = offsets[i] + histogram[i]; |
68 | 47.5k | codespace_used = (codespace_used << 1) + histogram[i]; |
69 | 47.5k | } |
70 | 15.3k | codespace_used = (codespace_used << 1) + histogram[max_length]; |
71 | 15.3k | |
72 | 15.3k | // Check that the provided lengths form a valid Huffman tree. |
73 | 15.3k | if codespace_used != (1 << max_length) { |
74 | 201 | return false; |
75 | 15.1k | } |
76 | 15.1k | |
77 | 15.1k | // Sort the symbols by code length. |
78 | 15.1k | let mut next_index = offsets; |
79 | 15.1k | let mut sorted_symbols = [0; 288]; |
80 | 2.11M | for symbol in 0..lengths.len() { |
81 | 2.11M | let length = lengths[symbol]; |
82 | 2.11M | sorted_symbols[next_index[length as usize]] = symbol; |
83 | 2.11M | next_index[length as usize] += 1; |
84 | 2.11M | } |
85 | | |
86 | 15.1k | let mut codeword = 0u16; |
87 | 15.1k | let mut i = histogram[0]; |
88 | 15.1k | |
89 | 15.1k | // Populate the primary decoding table |
90 | 15.1k | let primary_table_bits = primary_table.len().ilog2() as usize; |
91 | 15.1k | let primary_table_mask = (1 << primary_table_bits) - 1; |
92 | 142k | for length in 1..=primary_table_bits { |
93 | 142k | let current_table_end = 1 << length; |
94 | 142k | |
95 | 142k | // Loop over all symbols with the current code length and set their table entries. |
96 | 151k | for _ in 0..histogram[length] { |
97 | 151k | let symbol = sorted_symbols[i]; |
98 | 151k | i += 1; |
99 | 151k | |
100 | 151k | primary_table[codeword as usize] = entries |
101 | 151k | .get(symbol) |
102 | 151k | .cloned() |
103 | 151k | .unwrap_or((symbol as u32) << 16) |
104 | 151k | | length as u32; |
105 | 151k | |
106 | 151k | codes[symbol] = codeword; |
107 | 151k | codeword = next_codeword(codeword, current_table_end as u16); |
108 | 151k | } |
109 | | |
110 | 142k | if double_literal { |
111 | 377k | for len1 in 1..(length - 1) { |
112 | 377k | let len2 = length - len1; |
113 | 377k | for sym1_index in offsets[len1]..next_index[len1] { |
114 | 631k | for sym2_index in offsets[len2]..next_index[len2] { |
115 | 631k | let sym1 = sorted_symbols[sym1_index]; |
116 | 631k | let sym2 = sorted_symbols[sym2_index]; |
117 | 631k | if sym1 < 256 && sym2 < 256 { |
118 | 376k | let codeword1 = codes[sym1]; |
119 | 376k | let codeword2 = codes[sym2]; |
120 | 376k | let codeword = codeword1 | (codeword2 << len1); |
121 | 376k | let entry = (sym1 as u32) << 16 |
122 | 376k | | (sym2 as u32) << 24 |
123 | 376k | | LITERAL_ENTRY |
124 | 376k | | (2 << 8); |
125 | 376k | primary_table[codeword as usize] = entry | (length as u32); |
126 | 376k | } |
127 | | } |
128 | | } |
129 | | } |
130 | 60.0k | } |
131 | | |
132 | | // If we aren't at the maximum table size, double the size of the table. |
133 | 142k | if length < primary_table_bits { |
134 | 127k | primary_table.copy_within(0..current_table_end, current_table_end); |
135 | 127k | } |
136 | | } |
137 | | |
138 | | // Populate the secondary decoding table. |
139 | 15.1k | secondary_table.clear(); |
140 | 15.1k | if max_length > primary_table_bits { |
141 | 205 | let mut subtable_start = 0; |
142 | 205 | let mut subtable_prefix = !0; |
143 | 208 | for length in (primary_table_bits + 1)..=max_length { |
144 | 208 | let subtable_size = 1 << (length - primary_table_bits); |
145 | 208 | for _ in 0..histogram[length] { |
146 | | // If the codeword's prefix doesn't match the current subtable, create a new |
147 | | // subtable. |
148 | 9.22k | if codeword & primary_table_mask != subtable_prefix { |
149 | 4.61k | subtable_prefix = codeword & primary_table_mask; |
150 | 4.61k | subtable_start = secondary_table.len(); |
151 | 4.61k | primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16) |
152 | 4.61k | | EXCEPTIONAL_ENTRY |
153 | 4.61k | | SECONDARY_TABLE_ENTRY |
154 | 4.61k | | (subtable_size as u32 - 1); |
155 | 4.61k | secondary_table.resize(subtable_start + subtable_size, 0); |
156 | 4.61k | } |
157 | | |
158 | | // Lookup the symbol. |
159 | 9.22k | let symbol = sorted_symbols[i]; |
160 | 9.22k | i += 1; |
161 | 9.22k | |
162 | 9.22k | // Insert the symbol into the secondary table and advance to the next codeword. |
163 | 9.22k | codes[symbol] = codeword; |
164 | 9.22k | secondary_table[subtable_start + (codeword >> primary_table_bits) as usize] = |
165 | 9.22k | ((symbol as u16) << 4) | (length as u16); |
166 | 9.22k | codeword = next_codeword(codeword, 1 << length); |
167 | | } |
168 | | |
169 | | // If there are more codes with the same subtable prefix, extend the subtable. |
170 | 208 | if length < max_length && codeword & primary_table_mask == subtable_prefix { |
171 | 2 | secondary_table.extend_from_within(subtable_start..); |
172 | 2 | let subtable_size = secondary_table.len() - subtable_start; |
173 | 2 | primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16) |
174 | 2 | | EXCEPTIONAL_ENTRY |
175 | 2 | | SECONDARY_TABLE_ENTRY |
176 | 2 | | (subtable_size as u32 - 1); |
177 | 206 | } |
178 | | } |
179 | 14.8k | } |
180 | | |
181 | 15.1k | true |
182 | 17.3k | } |