/rust/registry/src/index.crates.io-6f17d22bba15001f/deflate-1.0.0/src/length_encode.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use std::clone::Clone; |
2 | | use std::iter::Iterator; |
3 | | |
4 | | /// An enum representing the different types in the run-length encoded data used to encode |
5 | | /// Huffman table lengths |
6 | | #[derive(Debug, PartialEq, Eq)] |
7 | | pub enum EncodedLength { |
8 | | // An actual length value |
9 | | Length(u8), |
10 | | // Copy the previous value n times |
11 | | CopyPrevious(u8), |
12 | | // Repeat zero n times (with n represented by 3 bits) |
13 | | RepeatZero3Bits(u8), |
14 | | // Repeat zero n times (with n represented by 7 bits) |
15 | | RepeatZero7Bits(u8), |
16 | | } |
17 | | |
18 | | impl EncodedLength { |
19 | 0 | fn from_prev_and_repeat(prev: u8, repeat: u8) -> EncodedLength { |
20 | 0 | match prev { |
21 | | 0 => { |
22 | 0 | if repeat <= 10 { |
23 | 0 | EncodedLength::RepeatZero3Bits(repeat) |
24 | | } else { |
25 | 0 | EncodedLength::RepeatZero7Bits(repeat) |
26 | | } |
27 | | } |
28 | 0 | 1..=15 => EncodedLength::CopyPrevious(repeat), |
29 | 0 | _ => panic!(), |
30 | | } |
31 | 0 | } |
32 | | } |
33 | | |
34 | | pub const COPY_PREVIOUS: usize = 16; |
35 | | pub const REPEAT_ZERO_3_BITS: usize = 17; |
36 | | pub const REPEAT_ZERO_7_BITS: usize = 18; |
37 | | |
38 | | const MIN_REPEAT: u8 = 3; |
39 | | |
40 | | /// Push an `EncodedLength` to the vector and update the frequency table. |
41 | 0 | fn update_out_and_freq( |
42 | 0 | encoded: EncodedLength, |
43 | 0 | output: &mut Vec<EncodedLength>, |
44 | 0 | frequencies: &mut [u16; 19], |
45 | 0 | ) { |
46 | 0 | let index = match encoded { |
47 | 0 | EncodedLength::Length(l) => usize::from(l), |
48 | 0 | EncodedLength::CopyPrevious(_) => COPY_PREVIOUS, |
49 | 0 | EncodedLength::RepeatZero3Bits(_) => REPEAT_ZERO_3_BITS, |
50 | 0 | EncodedLength::RepeatZero7Bits(_) => REPEAT_ZERO_7_BITS, |
51 | | }; |
52 | | |
53 | 0 | frequencies[index] += 1; |
54 | 0 |
|
55 | 0 | output.push(encoded); |
56 | 0 | } |
57 | | |
58 | | /// Convenience function to check if the repeat counter should be incremented further |
59 | 0 | fn not_max_repetitions(length_value: u8, repeats: u8) -> bool { |
60 | 0 | (length_value == 0 && repeats < 138) || repeats < 6 |
61 | 0 | } |
62 | | |
63 | | ///Convenience version for unit tests. |
64 | | #[cfg(test)] |
65 | | pub fn encode_lengths<'a, I>(lengths: I) -> (Vec<EncodedLength>, [u16; 19]) |
66 | | where |
67 | | I: Iterator<Item = &'a u8> + Clone, |
68 | | { |
69 | | let mut freqs = [0u16; 19]; |
70 | | let mut encoded: Vec<EncodedLength> = Vec::new(); |
71 | | encode_lengths_m(lengths, &mut encoded, &mut freqs); |
72 | | (encoded, freqs) |
73 | | } |
74 | | |
75 | | /// Run-length encodes the lengths of the values in `lengths` according to the deflate |
76 | | /// specification. This is used for writing the code lengths for the Huffman tables for |
77 | | /// the deflate stream. |
78 | | /// |
79 | | /// Populates the supplied array with the frequency of the different encoded length values |
80 | | /// The frequency array is taken as a parameter rather than returned to avoid |
81 | | /// excessive `memcpy`-ing. |
82 | 0 | pub fn encode_lengths_m<'a, I>( |
83 | 0 | lengths: I, |
84 | 0 | mut out: &mut Vec<EncodedLength>, |
85 | 0 | mut frequencies: &mut [u16; 19], |
86 | 0 | ) where |
87 | 0 | I: Iterator<Item = &'a u8> + Clone, |
88 | 0 | { |
89 | 0 | out.clear(); |
90 | 0 | // Number of repetitions of the current value |
91 | 0 | let mut repeat = 0; |
92 | 0 | let mut iter = lengths.clone().enumerate().peekable(); |
93 | 0 | // Previous value |
94 | 0 | // We set it to the compliment of the first falue to simplify the code. |
95 | 0 | let mut prev = !iter.peek().expect("No length values!").1; |
96 | | |
97 | 0 | while let Some((n, &l)) = iter.next() { |
98 | 0 | if l == prev && not_max_repetitions(l, repeat) { |
99 | 0 | repeat += 1; |
100 | 0 | } |
101 | 0 | if l != prev || iter.peek().is_none() || !not_max_repetitions(l, repeat) { |
102 | 0 | if repeat >= MIN_REPEAT { |
103 | | // The previous value has been repeated enough times to write out a repeat code. |
104 | | |
105 | 0 | let val = EncodedLength::from_prev_and_repeat(prev, repeat); |
106 | 0 | update_out_and_freq(val, &mut out, &mut frequencies); |
107 | 0 | repeat = 0; |
108 | 0 | // If we have a new length value, output l unless the last value is 0 or l is the |
109 | 0 | // last byte. |
110 | 0 | if l != prev { |
111 | 0 | if l != 0 || iter.peek().is_none() { |
112 | 0 | update_out_and_freq(EncodedLength::Length(l), &mut out, &mut frequencies); |
113 | 0 | repeat = 0; |
114 | 0 | } else { |
115 | 0 | // If we have a zero, we start repeat at one instead of outputting, as |
116 | 0 | // there are separate codes for repeats of zero so we don't need a literal |
117 | 0 | // to define what byte to repeat. |
118 | 0 | repeat = 1; |
119 | 0 | } |
120 | 0 | } |
121 | | } else { |
122 | | // There haven't been enough repetitions of the previous value, |
123 | | // so just we output the lengths directly. |
124 | | |
125 | | // If we are at the end, and we have a value that is repeated, we need to |
126 | | // skip a byte and output the last one. |
127 | 0 | let extra_skip = if iter.peek().is_none() && l == prev { |
128 | 0 | 1 |
129 | | } else { |
130 | 0 | 0 |
131 | | }; |
132 | | |
133 | | // Get to the position of the next byte to output by starting at zero and skipping. |
134 | 0 | let b_iter = lengths.clone().skip(n + extra_skip - repeat as usize); |
135 | | |
136 | | // As repeats of zeroes have separate codes, we don't need to output a literal here |
137 | | // if we have a zero (unless we are at the end). |
138 | 0 | let extra = if l != 0 || iter.peek().is_none() { |
139 | 0 | 1 |
140 | | } else { |
141 | 0 | 0 |
142 | | }; |
143 | | |
144 | 0 | for &i in b_iter.take(repeat as usize + extra) { |
145 | 0 | update_out_and_freq(EncodedLength::Length(i), &mut out, &mut frequencies); |
146 | 0 | } |
147 | | |
148 | | // If the current byte is zero we start repeat at 1 as we didn't output the literal |
149 | | // directly. |
150 | 0 | repeat = 1 - extra as u8; |
151 | | } |
152 | 0 | } |
153 | 0 | prev = l; |
154 | | } |
155 | 0 | } |
156 | | |
157 | | #[cfg(test)] |
158 | | pub fn huffman_lengths_from_frequency(frequencies: &[u16], max_len: usize) -> Vec<u8> { |
159 | | in_place::gen_lengths(frequencies, max_len) |
160 | | } |
161 | | |
162 | | pub type LeafVec = Vec<in_place::Node>; |
163 | | |
164 | | /// Generate a set of canonical huffman lengths from the given frequencies, with a maximum length |
165 | | /// of `max_len`. The lengths are put in the lens slice parameter. Unused lengths are set to 0. |
166 | | /// |
167 | | /// The leaf buffer is passed in to avoid allocating it every time this function is called. |
168 | | /// The existing data contained in it is not preserved. |
169 | 0 | pub fn huffman_lengths_from_frequency_m( |
170 | 0 | frequencies: &[u16], |
171 | 0 | max_len: usize, |
172 | 0 | leaf_buffer: &mut LeafVec, |
173 | 0 | lens: &mut [u8], |
174 | 0 | ) { |
175 | 0 | in_place::in_place_lengths(frequencies, max_len, leaf_buffer, lens); |
176 | 0 | } |
177 | | |
178 | | mod in_place { |
179 | | type WeightType = u32; |
180 | | |
181 | | #[cfg(debug)] |
182 | | pub fn validate_lengths(lengths: &[u8]) -> bool { |
183 | | // Avoid issue with floating point on mips: https://github.com/image-rs/deflate-rs/issues/23 |
184 | | if cfg!(any( |
185 | | target_arch = "mips", |
186 | | target_arch = "mipsel", |
187 | | target_arch = "mips64", |
188 | | target_arch = "mipsel64" |
189 | | )) { |
190 | | true |
191 | | } else { |
192 | | let v = lengths.iter().fold(0f64, |acc, &n| { |
193 | | acc + if n != 0 { |
194 | | 2f64.powi(-(i32::from(n))) |
195 | | } else { |
196 | | 0f64 |
197 | | } |
198 | | }); |
199 | | |
200 | | match v.partial_cmp(&1.0) { |
201 | | Some(std::cmp::Ordering::Greater) => false, |
202 | | _ => true, |
203 | | } |
204 | | } |
205 | | } |
206 | | |
207 | | #[cfg(not(debug))] |
208 | 0 | pub fn validate_lengths(_: &[u8]) -> bool { |
209 | 0 | true |
210 | 0 | } |
211 | | |
212 | | #[derive(Eq, PartialEq, Debug)] |
213 | | pub struct Node { |
214 | | value: WeightType, |
215 | | symbol: u16, |
216 | | } |
217 | | |
218 | 0 | fn step_1(leaves: &mut [Node]) { |
219 | 0 | // If there are less than 2 non-zero frequencies, this function should not have been |
220 | 0 | // called and we should not have gotten to this point. |
221 | 0 | debug_assert!(leaves.len() >= 2); |
222 | 0 | let mut root = 0; |
223 | 0 | let mut leaf = 2; |
224 | 0 |
|
225 | 0 | leaves[0].value += leaves[1].value; |
226 | | |
227 | 0 | for next in 1..leaves.len() - 1 { |
228 | 0 | if (leaf >= leaves.len()) || (leaves[root].value < leaves[leaf].value) { |
229 | 0 | leaves[next].value = leaves[root].value; |
230 | 0 | leaves[root].value = next as WeightType; |
231 | 0 | root += 1; |
232 | 0 | } else { |
233 | 0 | leaves[next].value = leaves[leaf].value; |
234 | 0 | leaf += 1; |
235 | 0 | } |
236 | | |
237 | 0 | if (leaf >= leaves.len()) || (root < next && (leaves[root].value < leaves[leaf].value)) |
238 | 0 | { |
239 | 0 | leaves[next].value += leaves[root].value; |
240 | 0 | leaves[root].value = next as WeightType; |
241 | 0 | root += 1; |
242 | 0 | } else { |
243 | 0 | leaves[next].value += leaves[leaf].value; |
244 | 0 | leaf += 1; |
245 | 0 | } |
246 | | } |
247 | 0 | } |
248 | | |
249 | 0 | fn step_2(leaves: &mut [Node]) { |
250 | 0 | debug_assert!(leaves.len() >= 2); |
251 | 0 | let n = leaves.len(); |
252 | 0 |
|
253 | 0 | leaves[n - 2].value = 0; |
254 | 0 | for t in (0..(n + 1 - 3)).rev() { |
255 | 0 | leaves[t].value = leaves[leaves[t].value as usize].value + 1; |
256 | 0 | } |
257 | | |
258 | 0 | let mut available = 1_usize; |
259 | 0 | let mut used = 0; |
260 | 0 | let mut depth = 0; |
261 | 0 | let mut root = n as isize - 2; |
262 | 0 | let mut next = n as isize - 1; |
263 | | |
264 | 0 | while available > 0 { |
265 | 0 | while root >= 0 && leaves[root as usize].value == depth { |
266 | 0 | used += 1; |
267 | 0 | root -= 1; |
268 | 0 | } |
269 | 0 | while available > used { |
270 | 0 | leaves[next as usize].value = depth; |
271 | 0 | next -= 1; |
272 | 0 | available -= 1; |
273 | 0 | } |
274 | 0 | available = 2 * used; |
275 | 0 | depth += 1; |
276 | 0 | used = 0; |
277 | | } |
278 | 0 | } |
279 | | |
280 | | const MAX_NUMBER_OF_CODES: usize = 32; |
281 | | const NUM_CODES_LENGTH: usize = MAX_NUMBER_OF_CODES + 1; |
282 | | |
283 | | /// Checks if any of the lengths exceed `max_len`, and if that is the case, alters the length |
284 | | /// table so that no codes exceed `max_len`. |
285 | | /// This is ported from miniz (which is released as public domain by Rich Geldreich |
286 | | /// https://github.com/richgel999/miniz/blob/master/miniz.c) |
287 | | /// |
288 | | /// This will not generate optimal (minimim-redundancy) codes, however in most cases |
289 | | /// this won't make a large difference. |
290 | 0 | pub fn enforce_max_code_lengths( |
291 | 0 | num_codes: &mut [u16; NUM_CODES_LENGTH], |
292 | 0 | num_used: usize, |
293 | 0 | max_len: usize, |
294 | 0 | ) { |
295 | 0 | debug_assert!(max_len <= 15); |
296 | | |
297 | 0 | if num_used > 1 { |
298 | 0 | let mut num_above_max = 0u16; |
299 | 0 | for &l in num_codes[(max_len as usize + 1)..].iter() { |
300 | 0 | num_above_max += l; |
301 | 0 | } |
302 | | |
303 | 0 | num_codes[max_len] += num_above_max; |
304 | 0 |
|
305 | 0 | let mut total = 0u32; |
306 | 0 | for i in (1..=max_len).rev() { |
307 | 0 | // This should be safe as max_len won't be higher than 15, and num_codes[i] can't |
308 | 0 | // be higher than 288, |
309 | 0 | // and 288 << 15 will not be anywhere close to overflowing 32 bits |
310 | 0 | total += (u32::from(num_codes[i])) << (max_len - i); |
311 | 0 | } |
312 | | |
313 | | // miniz uses unsigned long here. 32-bits should be sufficient though, |
314 | | // as max_len won't be longer than 15 anyhow. |
315 | 0 | while total != 1u32 << max_len { |
316 | 0 | num_codes[max_len] -= 1; |
317 | 0 | for i in (1..max_len).rev() { |
318 | 0 | if num_codes[i] != 0 { |
319 | 0 | num_codes[i] -= 1; |
320 | 0 | num_codes[i + 1] += 2; |
321 | 0 | break; |
322 | 0 | } |
323 | | } |
324 | 0 | total -= 1; |
325 | | } |
326 | 0 | } |
327 | 0 | } |
328 | | |
329 | | #[cfg(test)] |
330 | | /// Convenience wrapper for tests. |
331 | | pub fn gen_lengths(frequencies: &[u16], max_len: usize) -> Vec<u8> { |
332 | | let mut lens = vec![0u8; frequencies.len()]; |
333 | | let mut leaves = Vec::new(); |
334 | | in_place_lengths(frequencies, max_len, &mut leaves, lens.as_mut_slice()); |
335 | | lens |
336 | | } |
337 | | |
338 | | /// Generate huffman code lengths, using the algorithm described by |
339 | | /// Moffat and Katajainen in In-Place Calculation of Minimum-Redundancy Codes |
340 | | /// https://people.eng.unimelb.edu.au/ammoffat/abstracts/mk95wads.html |
341 | | /// and it's implementation. |
342 | | /// |
343 | | /// This is significantly faster, and seems to generally create lengths that result in length |
344 | | /// tables that are better compressible than the algorithm used previously. The downside of this |
345 | | /// algorithm is that it's not length-limited, so if too long code lengths are generated, |
346 | | /// it might result in a sub-optimal tables as the length-restricting function isn't optimal. |
347 | 0 | pub fn in_place_lengths( |
348 | 0 | frequencies: &[u16], |
349 | 0 | max_len: usize, |
350 | 0 | mut leaves: &mut Vec<Node>, |
351 | 0 | lengths: &mut [u8], |
352 | 0 | ) { |
353 | 0 | debug_assert!(lengths.len() >= frequencies.len()); |
354 | | |
355 | 0 | for l in lengths.iter_mut() { |
356 | 0 | *l = 0; |
357 | 0 | } |
358 | | |
359 | | // Clear any previous leaves in the leaf buffer. |
360 | 0 | leaves.clear(); |
361 | 0 |
|
362 | 0 | // Discard zero length nodes as they won't be given a code and thus don't need to |
363 | 0 | // participate in code length generation and create a new vec of the remaining |
364 | 0 | // symbols and weights. |
365 | 0 | leaves.extend(frequencies.iter().enumerate().filter_map(|(n, f)| { |
366 | 0 | if *f > 0 { |
367 | 0 | Some(Node { |
368 | 0 | value: u32::from(*f), |
369 | 0 | symbol: n as u16, |
370 | 0 | }) |
371 | | } else { |
372 | 0 | None |
373 | | } |
374 | 0 | })); |
375 | 0 |
|
376 | 0 | // Special cases with zero or 1 value having a non-zero frequency |
377 | 0 | if leaves.len() == 1 { |
378 | 0 | lengths[leaves[0].symbol as usize] = 1; |
379 | 0 | return; |
380 | 0 | } else if leaves.is_empty() { |
381 | 0 | return; |
382 | 0 | } |
383 | 0 |
|
384 | 0 | // Sort the leaves by value. As the sort in the standard library is stable, we don't |
385 | 0 | // have to worry about the symbol code here. |
386 | 0 | leaves.sort_by(|a, b| a.value.cmp(&b.value)); |
387 | 0 |
|
388 | 0 | step_1(&mut leaves); |
389 | 0 | step_2(&mut leaves); |
390 | 0 |
|
391 | 0 | // Count how many codes of each length used, for usage in the next section. |
392 | 0 | let mut num_codes = [0u16; NUM_CODES_LENGTH]; |
393 | 0 | for l in leaves.iter() { |
394 | 0 | num_codes[l.value as usize] += 1; |
395 | 0 | } |
396 | | |
397 | | // As the algorithm used here doesn't limit the maximum length that can be generated |
398 | | // we need to make sure none of the lengths exceed `max_len` |
399 | 0 | enforce_max_code_lengths(&mut num_codes, leaves.len(), max_len); |
400 | 0 |
|
401 | 0 | // Output the actual lengths |
402 | 0 | let mut leaf_it = leaves.iter().rev(); |
403 | | // Start at 1 since the length table is already filled with zeroes. |
404 | 0 | for (&n_codes, i) in num_codes[1..=max_len].iter().zip(1..=(max_len as u8)) { |
405 | 0 | for _ in 0..n_codes { |
406 | 0 | lengths[leaf_it.next().unwrap().symbol as usize] = i; |
407 | 0 | } |
408 | | } |
409 | | |
410 | 0 | debug_assert_eq!(leaf_it.next(), None); |
411 | 0 | debug_assert!( |
412 | 0 | validate_lengths(lengths), |
413 | | "The generated length codes were not valid!" |
414 | | ); |
415 | 0 | } |
416 | | } |
417 | | |
418 | | #[cfg(test)] |
419 | | mod test { |
420 | | use super::*; |
421 | | use crate::huffman_table::NUM_LITERALS_AND_LENGTHS; |
422 | | use std::u16; |
423 | | |
424 | | fn lit(value: u8) -> EncodedLength { |
425 | | EncodedLength::Length(value) |
426 | | } |
427 | | |
428 | | fn zero(repeats: u8) -> EncodedLength { |
429 | | match repeats { |
430 | | 0..=1 => EncodedLength::Length(0), |
431 | | 2..=10 => EncodedLength::RepeatZero3Bits(repeats), |
432 | | _ => EncodedLength::RepeatZero7Bits(repeats), |
433 | | } |
434 | | } |
435 | | |
436 | | fn copy(copies: u8) -> EncodedLength { |
437 | | EncodedLength::CopyPrevious(copies) |
438 | | } |
439 | | |
440 | | #[test] |
441 | | fn test_encode_lengths() { |
442 | | use crate::huffman_table::FIXED_CODE_LENGTHS; |
443 | | let enc = encode_lengths(FIXED_CODE_LENGTHS.iter()); |
444 | | // There are no lengths lower than 6 in the fixed table |
445 | | assert_eq!(enc.1[0..7], [0, 0, 0, 0, 0, 0, 0]); |
446 | | // Neither are there any lengths above 9 |
447 | | assert_eq!(enc.1[10..16], [0, 0, 0, 0, 0, 0]); |
448 | | // Also there are no zero-length codes so there shouldn't be any repetitions of zero |
449 | | assert_eq!(enc.1[17..19], [0, 0]); |
450 | | |
451 | | let test_lengths = [0, 0, 5, 0, 15, 1, 0, 0, 0, 2, 4, 4, 4, 4, 3, 5, 5, 5, 5]; |
452 | | let enc = encode_lengths(test_lengths.iter()).0; |
453 | | assert_eq!( |
454 | | enc, |
455 | | vec![ |
456 | | lit(0), |
457 | | lit(0), |
458 | | lit(5), |
459 | | lit(0), |
460 | | lit(15), |
461 | | lit(1), |
462 | | zero(3), |
463 | | lit(2), |
464 | | lit(4), |
465 | | copy(3), |
466 | | lit(3), |
467 | | lit(5), |
468 | | copy(3), |
469 | | ] |
470 | | ); |
471 | | let test_lengths = [0, 0, 0, 5, 2, 3, 0, 0, 0]; |
472 | | let enc = encode_lengths(test_lengths.iter()).0; |
473 | | assert_eq!(enc, vec![zero(3), lit(5), lit(2), lit(3), zero(3)]); |
474 | | |
475 | | let test_lengths = [0, 0, 0, 3, 3, 3, 5, 4, 4, 4, 4, 0, 0]; |
476 | | let enc = encode_lengths(test_lengths.iter()).0; |
477 | | assert_eq!( |
478 | | enc, |
479 | | vec![ |
480 | | zero(3), |
481 | | lit(3), |
482 | | lit(3), |
483 | | lit(3), |
484 | | lit(5), |
485 | | lit(4), |
486 | | copy(3), |
487 | | lit(0), |
488 | | lit(0), |
489 | | ] |
490 | | ); |
491 | | |
492 | | let lens = [ |
493 | | 0, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, |
494 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, |
495 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
496 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
497 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
498 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
499 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
500 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
501 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, |
502 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, |
503 | | ]; |
504 | | |
505 | | let _ = encode_lengths(lens.iter()).0; |
506 | | |
507 | | let lens = [ |
508 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
509 | | 0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 7, 8, 7, 8, 6, 6, 8, 0, 7, 6, 7, 8, 7, 7, |
510 | | 8, 0, 0, 0, 0, 0, 8, 8, 0, 8, 7, 0, 10, 8, 0, 8, 0, 10, 10, 8, 8, 10, 8, 0, 8, 7, 0, |
511 | | 10, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 7, 7, 6, 7, 8, 8, 6, 0, 0, 8, 8, 7, 8, 8, 0, |
512 | | 7, 6, 6, 8, 8, 8, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
513 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
514 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
515 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
516 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 4, |
517 | | 3, 3, 4, 4, 5, 5, 5, 5, 5, 8, 8, 6, 7, 8, 10, 10, 0, 9, /* litlen */ |
518 | | 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 6, 6, 5, 5, 5, 5, 6, 5, 5, 4, 4, 4, 4, 4, 4, 3, 4, 3, |
519 | | 4, |
520 | | ]; |
521 | | |
522 | | let enc = encode_lengths(lens.iter()).0; |
523 | | |
524 | | assert_eq!( |
525 | | &enc[..10], |
526 | | &[ |
527 | | zero(10), |
528 | | lit(9), |
529 | | lit(0), |
530 | | lit(0), |
531 | | lit(9), |
532 | | zero(18), |
533 | | lit(6), |
534 | | zero(3), |
535 | | lit(8), |
536 | | zero(4) |
537 | | ] |
538 | | ); |
539 | | assert_eq!( |
540 | | &enc[10..20], |
541 | | &[ |
542 | | lit(8), |
543 | | lit(0), |
544 | | lit(0), |
545 | | lit(7), |
546 | | lit(8), |
547 | | lit(7), |
548 | | lit(8), |
549 | | lit(6), |
550 | | lit(6), |
551 | | lit(8) |
552 | | ] |
553 | | ); |
554 | | |
555 | | let enc = encode_lengths([1, 1, 1, 2].iter()).0; |
556 | | assert_eq!(enc, vec![lit(1), lit(1), lit(1), lit(2)]); |
557 | | let enc = encode_lengths([0, 0, 3].iter()).0; |
558 | | assert_eq!(enc, vec![lit(0), lit(0), lit(3)]); |
559 | | let enc = encode_lengths([0, 0, 0, 5, 2].iter()).0; |
560 | | assert_eq!(enc, vec![zero(3), lit(5), lit(2)]); |
561 | | |
562 | | let enc = encode_lengths([0, 0, 0, 5, 0].iter()).0; |
563 | | assert!(*enc.last().unwrap() != lit(5)); |
564 | | |
565 | | let enc = encode_lengths([0, 4, 4, 4, 4, 0].iter()).0; |
566 | | assert_eq!(*enc.last().unwrap(), zero(0)); |
567 | | } |
568 | | |
569 | | #[test] |
570 | | fn test_lengths_from_frequencies() { |
571 | | let frequencies = [1, 1, 5, 7, 10, 14]; |
572 | | |
573 | | let expected = [4, 4, 3, 2, 2, 2]; |
574 | | let res = huffman_lengths_from_frequency(&frequencies, 4); |
575 | | |
576 | | assert_eq!(expected, res.as_slice()); |
577 | | |
578 | | let frequencies = [1, 5, 1, 7, 10, 14]; |
579 | | let expected = [4, 3, 4, 2, 2, 2]; |
580 | | |
581 | | let res = huffman_lengths_from_frequency(&frequencies, 4); |
582 | | |
583 | | assert_eq!(expected, res.as_slice()); |
584 | | |
585 | | let frequencies = [0, 25, 0, 10, 2, 4]; |
586 | | |
587 | | let res = huffman_lengths_from_frequency(&frequencies, 4); |
588 | | assert_eq!(res[0], 0); |
589 | | assert_eq!(res[2], 0); |
590 | | assert!(res[1] < 4); |
591 | | |
592 | | // Only one value |
593 | | let frequencies = [0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0]; |
594 | | let res = huffman_lengths_from_frequency(&frequencies, 5); |
595 | | let expected = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]; |
596 | | assert_eq!(expected, res.as_slice()); |
597 | | |
598 | | // No values |
599 | | let frequencies = [0; 30]; |
600 | | let res = huffman_lengths_from_frequency(&frequencies, 5); |
601 | | for (a, b) in frequencies.iter().zip(res.iter()) { |
602 | | assert_eq!(*a, (*b).into()); |
603 | | } |
604 | | // assert_eq!(frequencies, res.as_slice()); |
605 | | |
606 | | let mut frequencies = vec![3; NUM_LITERALS_AND_LENGTHS]; |
607 | | frequencies[55] = u16::MAX / 3; |
608 | | frequencies[125] = u16::MAX / 3; |
609 | | |
610 | | let res = huffman_lengths_from_frequency(&frequencies, 15); |
611 | | assert_eq!(res.len(), NUM_LITERALS_AND_LENGTHS); |
612 | | assert!(res[55] < 3); |
613 | | assert!(res[125] < 3); |
614 | | } |
615 | | |
616 | | #[test] |
617 | | /// Test if the bit lengths for a set of frequencies are optimal (give the best compression |
618 | | /// give the provided frequencies). |
619 | | fn optimal_lengths() { |
620 | | let freqs = [ |
621 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
622 | | 0, 0, 0, 68, 0, 14, 0, 0, 0, 0, 3, 7, 6, 1, 0, 12, 14, 9, 2, 6, 9, 4, 1, 1, 4, 1, 1, 0, |
623 | | 0, 1, 3, 0, 6, 0, 0, 0, 4, 4, 1, 2, 5, 3, 2, 2, 9, 0, 0, 3, 1, 5, 5, 8, 0, 6, 10, 5, 2, |
624 | | 0, 0, 1, 2, 0, 8, 11, 4, 0, 1, 3, 31, 13, 23, 22, 56, 22, 8, 11, 43, 0, 7, 33, 15, 45, |
625 | | 40, 16, 1, 28, 37, 35, 26, 3, 7, 11, 9, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
626 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
627 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
628 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
629 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
630 | | 0, 0, 1, 126, 114, 66, 31, 41, 25, 15, 21, 20, 16, 15, 10, 7, 5, 1, 1, |
631 | | ]; |
632 | | |
633 | | let lens = huffman_lengths_from_frequency(&freqs, 15); |
634 | | |
635 | | // Lengths produced by miniz for this frequency table for comparison |
636 | | // the number of total bits encoded with these huffman codes is 7701 |
637 | | // NOTE: There can be more than one set of optimal lengths for a set of frequencies, |
638 | | // (though there may be a difference in how well the table itself can be represented) |
639 | | // so testing for a specific length table is not ideal since different algorithms |
640 | | // may produce different length tables. |
641 | | // let lens3 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
642 | | // 0, 0, 0, 0, 0, |
643 | | // 0, 0, 0, 0, 0, 0, 4, 0, 7, 0, 0, 0, 0, 9, 8, 8, 10, 0, 7, 7, 7, 10, 8, 7, 8, |
644 | | // 10, 10, 8, 10, 10, 0, 0, 10, 9, 0, 8, 0, 0, 0, 8, 8, 10, 9, 8, 9, 9, 9, 7, 0, |
645 | | // 0, 9, 10, 8, 8, 7, 0, 8, 7, 8, 9, 0, 0, 10, 9, 0, 7, 7, 8, 0, 10, 9, 6, 7, 6, |
646 | | // 6, 5, 6, 7, 7, 5, 0, 8, 5, 7, 5, 5, 6, 10, 6, 5, 5, 6, 9, 8, 7, 7, 10, 10, 0, |
647 | | // 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
648 | | // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
649 | | // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
650 | | // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
651 | | // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
652 | | // 0, 0, 10, 4, 4, 4, 5, 5, 6, 7, 6, 6, 6, 6, 7, 8, 8, 10, 10]; |
653 | | // |
654 | | |
655 | | let num_bits = lens |
656 | | .iter() |
657 | | .zip(freqs.iter()) |
658 | | .fold(0, |a, (&f, &l)| a + (f as u16 * l)); |
659 | | assert_eq!(num_bits, 7701); |
660 | | } |
661 | | } |