Coverage Report

Created: 2024-12-17 06:15

/rust/registry/src/index.crates.io-6f17d22bba15001f/deflate-1.0.0/src/length_encode.rs
Line
Count
Source (jump to first uncovered line)
1
use std::clone::Clone;
2
use std::iter::Iterator;
3
4
/// An enum representing the different types in the run-length encoded data used to encode
5
/// Huffman table lengths
6
#[derive(Debug, PartialEq, Eq)]
7
pub enum EncodedLength {
8
    // An actual length value
9
    Length(u8),
10
    // Copy the previous value n times
11
    CopyPrevious(u8),
12
    // Repeat zero n times (with n represented by 3 bits)
13
    RepeatZero3Bits(u8),
14
    // Repeat zero n times (with n represented by 7 bits)
15
    RepeatZero7Bits(u8),
16
}
17
18
impl EncodedLength {
19
0
    fn from_prev_and_repeat(prev: u8, repeat: u8) -> EncodedLength {
20
0
        match prev {
21
            0 => {
22
0
                if repeat <= 10 {
23
0
                    EncodedLength::RepeatZero3Bits(repeat)
24
                } else {
25
0
                    EncodedLength::RepeatZero7Bits(repeat)
26
                }
27
            }
28
0
            1..=15 => EncodedLength::CopyPrevious(repeat),
29
0
            _ => panic!(),
30
        }
31
0
    }
32
}
33
34
pub const COPY_PREVIOUS: usize = 16;
35
pub const REPEAT_ZERO_3_BITS: usize = 17;
36
pub const REPEAT_ZERO_7_BITS: usize = 18;
37
38
const MIN_REPEAT: u8 = 3;
39
40
/// Push an `EncodedLength` to the vector and update the frequency table.
41
0
fn update_out_and_freq(
42
0
    encoded: EncodedLength,
43
0
    output: &mut Vec<EncodedLength>,
44
0
    frequencies: &mut [u16; 19],
45
0
) {
46
0
    let index = match encoded {
47
0
        EncodedLength::Length(l) => usize::from(l),
48
0
        EncodedLength::CopyPrevious(_) => COPY_PREVIOUS,
49
0
        EncodedLength::RepeatZero3Bits(_) => REPEAT_ZERO_3_BITS,
50
0
        EncodedLength::RepeatZero7Bits(_) => REPEAT_ZERO_7_BITS,
51
    };
52
53
0
    frequencies[index] += 1;
54
0
55
0
    output.push(encoded);
56
0
}
57
58
/// Convenience function to check if the repeat counter should be incremented further
59
0
fn not_max_repetitions(length_value: u8, repeats: u8) -> bool {
60
0
    (length_value == 0 && repeats < 138) || repeats < 6
61
0
}
62
63
///Convenience version for unit tests.
64
#[cfg(test)]
65
pub fn encode_lengths<'a, I>(lengths: I) -> (Vec<EncodedLength>, [u16; 19])
66
where
67
    I: Iterator<Item = &'a u8> + Clone,
68
{
69
    let mut freqs = [0u16; 19];
70
    let mut encoded: Vec<EncodedLength> = Vec::new();
71
    encode_lengths_m(lengths, &mut encoded, &mut freqs);
72
    (encoded, freqs)
73
}
74
75
/// Run-length encodes the lengths of the values in `lengths` according to the deflate
76
/// specification. This is used for writing the code lengths for the Huffman tables for
77
/// the deflate stream.
78
///
79
/// Populates the supplied array with the frequency of the different encoded length values
80
/// The frequency array is taken as a parameter rather than returned to avoid
81
/// excessive `memcpy`-ing.
82
0
pub fn encode_lengths_m<'a, I>(
83
0
    lengths: I,
84
0
    mut out: &mut Vec<EncodedLength>,
85
0
    mut frequencies: &mut [u16; 19],
86
0
) where
87
0
    I: Iterator<Item = &'a u8> + Clone,
88
0
{
89
0
    out.clear();
90
0
    // Number of repetitions of the current value
91
0
    let mut repeat = 0;
92
0
    let mut iter = lengths.clone().enumerate().peekable();
93
0
    // Previous value
94
0
    // We set it to the compliment of the first falue to simplify the code.
95
0
    let mut prev = !iter.peek().expect("No length values!").1;
96
97
0
    while let Some((n, &l)) = iter.next() {
98
0
        if l == prev && not_max_repetitions(l, repeat) {
99
0
            repeat += 1;
100
0
        }
101
0
        if l != prev || iter.peek().is_none() || !not_max_repetitions(l, repeat) {
102
0
            if repeat >= MIN_REPEAT {
103
                // The previous value has been repeated enough times to write out a repeat code.
104
105
0
                let val = EncodedLength::from_prev_and_repeat(prev, repeat);
106
0
                update_out_and_freq(val, &mut out, &mut frequencies);
107
0
                repeat = 0;
108
0
                // If we have a new length value, output l unless the last value is 0 or l is the
109
0
                // last byte.
110
0
                if l != prev {
111
0
                    if l != 0 || iter.peek().is_none() {
112
0
                        update_out_and_freq(EncodedLength::Length(l), &mut out, &mut frequencies);
113
0
                        repeat = 0;
114
0
                    } else {
115
0
                        // If we have a zero, we start repeat at one instead of outputting, as
116
0
                        // there are separate codes for repeats of zero so we don't need a literal
117
0
                        // to define what byte to repeat.
118
0
                        repeat = 1;
119
0
                    }
120
0
                }
121
            } else {
122
                // There haven't been enough repetitions of the previous value,
123
                // so just we output the lengths directly.
124
125
                // If we are at the end, and we have a value that is repeated, we need to
126
                // skip a byte and output the last one.
127
0
                let extra_skip = if iter.peek().is_none() && l == prev {
128
0
                    1
129
                } else {
130
0
                    0
131
                };
132
133
                // Get to the position of the next byte to output by starting at zero and skipping.
134
0
                let b_iter = lengths.clone().skip(n + extra_skip - repeat as usize);
135
136
                // As repeats of zeroes have separate codes, we don't need to output a literal here
137
                // if we have a zero (unless we are at the end).
138
0
                let extra = if l != 0 || iter.peek().is_none() {
139
0
                    1
140
                } else {
141
0
                    0
142
                };
143
144
0
                for &i in b_iter.take(repeat as usize + extra) {
145
0
                    update_out_and_freq(EncodedLength::Length(i), &mut out, &mut frequencies);
146
0
                }
147
148
                // If the current byte is zero we start repeat at 1 as we didn't output the literal
149
                // directly.
150
0
                repeat = 1 - extra as u8;
151
            }
152
0
        }
153
0
        prev = l;
154
    }
155
0
}
156
157
#[cfg(test)]
158
pub fn huffman_lengths_from_frequency(frequencies: &[u16], max_len: usize) -> Vec<u8> {
159
    in_place::gen_lengths(frequencies, max_len)
160
}
161
162
pub type LeafVec = Vec<in_place::Node>;
163
164
/// Generate a set of canonical huffman lengths from the given frequencies, with a maximum length
165
/// of `max_len`. The lengths are put in the lens slice parameter. Unused lengths are set to 0.
166
///
167
/// The leaf buffer is passed in to avoid allocating it every time this function is called.
168
/// The existing data contained in it is not preserved.
169
0
pub fn huffman_lengths_from_frequency_m(
170
0
    frequencies: &[u16],
171
0
    max_len: usize,
172
0
    leaf_buffer: &mut LeafVec,
173
0
    lens: &mut [u8],
174
0
) {
175
0
    in_place::in_place_lengths(frequencies, max_len, leaf_buffer, lens);
176
0
}
177
178
mod in_place {
179
    type WeightType = u32;
180
181
    #[cfg(debug)]
182
    pub fn validate_lengths(lengths: &[u8]) -> bool {
183
        // Avoid issue with floating point on mips: https://github.com/image-rs/deflate-rs/issues/23
184
        if cfg!(any(
185
            target_arch = "mips",
186
            target_arch = "mipsel",
187
            target_arch = "mips64",
188
            target_arch = "mipsel64"
189
        )) {
190
            true
191
        } else {
192
            let v = lengths.iter().fold(0f64, |acc, &n| {
193
                acc + if n != 0 {
194
                    2f64.powi(-(i32::from(n)))
195
                } else {
196
                    0f64
197
                }
198
            });
199
200
            match v.partial_cmp(&1.0) {
201
                Some(std::cmp::Ordering::Greater) => false,
202
                _ => true,
203
            }
204
        }
205
    }
206
207
    #[cfg(not(debug))]
208
0
    pub fn validate_lengths(_: &[u8]) -> bool {
209
0
        true
210
0
    }
211
212
    #[derive(Eq, PartialEq, Debug)]
213
    pub struct Node {
214
        value: WeightType,
215
        symbol: u16,
216
    }
217
218
0
    fn step_1(leaves: &mut [Node]) {
219
0
        // If there are less than 2 non-zero frequencies, this function should not have been
220
0
        // called and we should not have gotten to this point.
221
0
        debug_assert!(leaves.len() >= 2);
222
0
        let mut root = 0;
223
0
        let mut leaf = 2;
224
0
225
0
        leaves[0].value += leaves[1].value;
226
227
0
        for next in 1..leaves.len() - 1 {
228
0
            if (leaf >= leaves.len()) || (leaves[root].value < leaves[leaf].value) {
229
0
                leaves[next].value = leaves[root].value;
230
0
                leaves[root].value = next as WeightType;
231
0
                root += 1;
232
0
            } else {
233
0
                leaves[next].value = leaves[leaf].value;
234
0
                leaf += 1;
235
0
            }
236
237
0
            if (leaf >= leaves.len()) || (root < next && (leaves[root].value < leaves[leaf].value))
238
0
            {
239
0
                leaves[next].value += leaves[root].value;
240
0
                leaves[root].value = next as WeightType;
241
0
                root += 1;
242
0
            } else {
243
0
                leaves[next].value += leaves[leaf].value;
244
0
                leaf += 1;
245
0
            }
246
        }
247
0
    }
248
249
0
    fn step_2(leaves: &mut [Node]) {
250
0
        debug_assert!(leaves.len() >= 2);
251
0
        let n = leaves.len();
252
0
253
0
        leaves[n - 2].value = 0;
254
0
        for t in (0..(n + 1 - 3)).rev() {
255
0
            leaves[t].value = leaves[leaves[t].value as usize].value + 1;
256
0
        }
257
258
0
        let mut available = 1_usize;
259
0
        let mut used = 0;
260
0
        let mut depth = 0;
261
0
        let mut root = n as isize - 2;
262
0
        let mut next = n as isize - 1;
263
264
0
        while available > 0 {
265
0
            while root >= 0 && leaves[root as usize].value == depth {
266
0
                used += 1;
267
0
                root -= 1;
268
0
            }
269
0
            while available > used {
270
0
                leaves[next as usize].value = depth;
271
0
                next -= 1;
272
0
                available -= 1;
273
0
            }
274
0
            available = 2 * used;
275
0
            depth += 1;
276
0
            used = 0;
277
        }
278
0
    }
279
280
    const MAX_NUMBER_OF_CODES: usize = 32;
281
    const NUM_CODES_LENGTH: usize = MAX_NUMBER_OF_CODES + 1;
282
283
    /// Checks if any of the lengths exceed `max_len`, and if that is the case, alters the length
284
    /// table so that no codes exceed `max_len`.
285
    /// This is ported from miniz (which is released as public domain by Rich Geldreich
286
    /// https://github.com/richgel999/miniz/blob/master/miniz.c)
287
    ///
288
    /// This will not generate optimal (minimim-redundancy) codes, however in most cases
289
    /// this won't make a large difference.
290
0
    pub fn enforce_max_code_lengths(
291
0
        num_codes: &mut [u16; NUM_CODES_LENGTH],
292
0
        num_used: usize,
293
0
        max_len: usize,
294
0
    ) {
295
0
        debug_assert!(max_len <= 15);
296
297
0
        if num_used > 1 {
298
0
            let mut num_above_max = 0u16;
299
0
            for &l in num_codes[(max_len as usize + 1)..].iter() {
300
0
                num_above_max += l;
301
0
            }
302
303
0
            num_codes[max_len] += num_above_max;
304
0
305
0
            let mut total = 0u32;
306
0
            for i in (1..=max_len).rev() {
307
0
                // This should be safe as max_len won't be higher than 15, and num_codes[i] can't
308
0
                // be higher than 288,
309
0
                // and 288 << 15 will not be anywhere close to overflowing 32 bits
310
0
                total += (u32::from(num_codes[i])) << (max_len - i);
311
0
            }
312
313
            // miniz uses unsigned long here. 32-bits should be sufficient though,
314
            // as max_len won't be longer than 15 anyhow.
315
0
            while total != 1u32 << max_len {
316
0
                num_codes[max_len] -= 1;
317
0
                for i in (1..max_len).rev() {
318
0
                    if num_codes[i] != 0 {
319
0
                        num_codes[i] -= 1;
320
0
                        num_codes[i + 1] += 2;
321
0
                        break;
322
0
                    }
323
                }
324
0
                total -= 1;
325
            }
326
0
        }
327
0
    }
328
329
    #[cfg(test)]
330
    /// Convenience wrapper for tests.
331
    pub fn gen_lengths(frequencies: &[u16], max_len: usize) -> Vec<u8> {
332
        let mut lens = vec![0u8; frequencies.len()];
333
        let mut leaves = Vec::new();
334
        in_place_lengths(frequencies, max_len, &mut leaves, lens.as_mut_slice());
335
        lens
336
    }
337
338
    /// Generate huffman code lengths, using the algorithm described by
339
    /// Moffat and Katajainen in In-Place Calculation of Minimum-Redundancy Codes
340
    /// https://people.eng.unimelb.edu.au/ammoffat/abstracts/mk95wads.html
341
    /// and it's implementation.
342
    ///
343
    /// This is significantly faster, and seems to generally create lengths that result in length
344
    /// tables that are better compressible than the algorithm used previously. The downside of this
345
    /// algorithm is that it's not length-limited, so if too long code lengths are generated,
346
    /// it might result in a sub-optimal tables as the length-restricting function isn't optimal.
347
0
    pub fn in_place_lengths(
348
0
        frequencies: &[u16],
349
0
        max_len: usize,
350
0
        mut leaves: &mut Vec<Node>,
351
0
        lengths: &mut [u8],
352
0
    ) {
353
0
        debug_assert!(lengths.len() >= frequencies.len());
354
355
0
        for l in lengths.iter_mut() {
356
0
            *l = 0;
357
0
        }
358
359
        // Clear any previous leaves in the leaf buffer.
360
0
        leaves.clear();
361
0
362
0
        // Discard zero length nodes as they won't be given a code and thus don't need to
363
0
        // participate in code length generation and create a new vec of the remaining
364
0
        // symbols and weights.
365
0
        leaves.extend(frequencies.iter().enumerate().filter_map(|(n, f)| {
366
0
            if *f > 0 {
367
0
                Some(Node {
368
0
                    value: u32::from(*f),
369
0
                    symbol: n as u16,
370
0
                })
371
            } else {
372
0
                None
373
            }
374
0
        }));
375
0
376
0
        // Special cases with zero or 1 value having a non-zero frequency
377
0
        if leaves.len() == 1 {
378
0
            lengths[leaves[0].symbol as usize] = 1;
379
0
            return;
380
0
        } else if leaves.is_empty() {
381
0
            return;
382
0
        }
383
0
384
0
        // Sort the leaves by value. As the sort in the standard library is stable, we don't
385
0
        // have to worry about the symbol code here.
386
0
        leaves.sort_by(|a, b| a.value.cmp(&b.value));
387
0
388
0
        step_1(&mut leaves);
389
0
        step_2(&mut leaves);
390
0
391
0
        // Count how many codes of each length used, for usage in the next section.
392
0
        let mut num_codes = [0u16; NUM_CODES_LENGTH];
393
0
        for l in leaves.iter() {
394
0
            num_codes[l.value as usize] += 1;
395
0
        }
396
397
        // As the algorithm used here doesn't limit the maximum length that can be generated
398
        // we need to make sure none of the lengths exceed `max_len`
399
0
        enforce_max_code_lengths(&mut num_codes, leaves.len(), max_len);
400
0
401
0
        // Output the actual lengths
402
0
        let mut leaf_it = leaves.iter().rev();
403
        // Start at 1 since the length table is already filled with zeroes.
404
0
        for (&n_codes, i) in num_codes[1..=max_len].iter().zip(1..=(max_len as u8)) {
405
0
            for _ in 0..n_codes {
406
0
                lengths[leaf_it.next().unwrap().symbol as usize] = i;
407
0
            }
408
        }
409
410
0
        debug_assert_eq!(leaf_it.next(), None);
411
0
        debug_assert!(
412
0
            validate_lengths(lengths),
413
            "The generated length codes were not valid!"
414
        );
415
0
    }
416
}
417
418
#[cfg(test)]
419
mod test {
420
    use super::*;
421
    use crate::huffman_table::NUM_LITERALS_AND_LENGTHS;
422
    use std::u16;
423
424
    fn lit(value: u8) -> EncodedLength {
425
        EncodedLength::Length(value)
426
    }
427
428
    fn zero(repeats: u8) -> EncodedLength {
429
        match repeats {
430
            0..=1 => EncodedLength::Length(0),
431
            2..=10 => EncodedLength::RepeatZero3Bits(repeats),
432
            _ => EncodedLength::RepeatZero7Bits(repeats),
433
        }
434
    }
435
436
    fn copy(copies: u8) -> EncodedLength {
437
        EncodedLength::CopyPrevious(copies)
438
    }
439
440
    #[test]
441
    fn test_encode_lengths() {
442
        use crate::huffman_table::FIXED_CODE_LENGTHS;
443
        let enc = encode_lengths(FIXED_CODE_LENGTHS.iter());
444
        // There are no lengths lower than 6 in the fixed table
445
        assert_eq!(enc.1[0..7], [0, 0, 0, 0, 0, 0, 0]);
446
        // Neither are there any lengths above 9
447
        assert_eq!(enc.1[10..16], [0, 0, 0, 0, 0, 0]);
448
        // Also there are no zero-length codes so there shouldn't be any repetitions of zero
449
        assert_eq!(enc.1[17..19], [0, 0]);
450
451
        let test_lengths = [0, 0, 5, 0, 15, 1, 0, 0, 0, 2, 4, 4, 4, 4, 3, 5, 5, 5, 5];
452
        let enc = encode_lengths(test_lengths.iter()).0;
453
        assert_eq!(
454
            enc,
455
            vec![
456
                lit(0),
457
                lit(0),
458
                lit(5),
459
                lit(0),
460
                lit(15),
461
                lit(1),
462
                zero(3),
463
                lit(2),
464
                lit(4),
465
                copy(3),
466
                lit(3),
467
                lit(5),
468
                copy(3),
469
            ]
470
        );
471
        let test_lengths = [0, 0, 0, 5, 2, 3, 0, 0, 0];
472
        let enc = encode_lengths(test_lengths.iter()).0;
473
        assert_eq!(enc, vec![zero(3), lit(5), lit(2), lit(3), zero(3)]);
474
475
        let test_lengths = [0, 0, 0, 3, 3, 3, 5, 4, 4, 4, 4, 0, 0];
476
        let enc = encode_lengths(test_lengths.iter()).0;
477
        assert_eq!(
478
            enc,
479
            vec![
480
                zero(3),
481
                lit(3),
482
                lit(3),
483
                lit(3),
484
                lit(5),
485
                lit(4),
486
                copy(3),
487
                lit(0),
488
                lit(0),
489
            ]
490
        );
491
492
        let lens = [
493
            0, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0,
494
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0,
495
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
502
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
503
        ];
504
505
        let _ = encode_lengths(lens.iter()).0;
506
507
        let lens = [
508
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509
            0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 7, 8, 7, 8, 6, 6, 8, 0, 7, 6, 7, 8, 7, 7,
510
            8, 0, 0, 0, 0, 0, 8, 8, 0, 8, 7, 0, 10, 8, 0, 8, 0, 10, 10, 8, 8, 10, 8, 0, 8, 7, 0,
511
            10, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 7, 7, 6, 7, 8, 8, 6, 0, 0, 8, 8, 7, 8, 8, 0,
512
            7, 6, 6, 8, 8, 8, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 4,
517
            3, 3, 4, 4, 5, 5, 5, 5, 5, 8, 8, 6, 7, 8, 10, 10, 0, 9, /* litlen */
518
            0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 6, 6, 5, 5, 5, 5, 6, 5, 5, 4, 4, 4, 4, 4, 4, 3, 4, 3,
519
            4,
520
        ];
521
522
        let enc = encode_lengths(lens.iter()).0;
523
524
        assert_eq!(
525
            &enc[..10],
526
            &[
527
                zero(10),
528
                lit(9),
529
                lit(0),
530
                lit(0),
531
                lit(9),
532
                zero(18),
533
                lit(6),
534
                zero(3),
535
                lit(8),
536
                zero(4)
537
            ]
538
        );
539
        assert_eq!(
540
            &enc[10..20],
541
            &[
542
                lit(8),
543
                lit(0),
544
                lit(0),
545
                lit(7),
546
                lit(8),
547
                lit(7),
548
                lit(8),
549
                lit(6),
550
                lit(6),
551
                lit(8)
552
            ]
553
        );
554
555
        let enc = encode_lengths([1, 1, 1, 2].iter()).0;
556
        assert_eq!(enc, vec![lit(1), lit(1), lit(1), lit(2)]);
557
        let enc = encode_lengths([0, 0, 3].iter()).0;
558
        assert_eq!(enc, vec![lit(0), lit(0), lit(3)]);
559
        let enc = encode_lengths([0, 0, 0, 5, 2].iter()).0;
560
        assert_eq!(enc, vec![zero(3), lit(5), lit(2)]);
561
562
        let enc = encode_lengths([0, 0, 0, 5, 0].iter()).0;
563
        assert!(*enc.last().unwrap() != lit(5));
564
565
        let enc = encode_lengths([0, 4, 4, 4, 4, 0].iter()).0;
566
        assert_eq!(*enc.last().unwrap(), zero(0));
567
    }
568
569
    #[test]
570
    fn test_lengths_from_frequencies() {
571
        let frequencies = [1, 1, 5, 7, 10, 14];
572
573
        let expected = [4, 4, 3, 2, 2, 2];
574
        let res = huffman_lengths_from_frequency(&frequencies, 4);
575
576
        assert_eq!(expected, res.as_slice());
577
578
        let frequencies = [1, 5, 1, 7, 10, 14];
579
        let expected = [4, 3, 4, 2, 2, 2];
580
581
        let res = huffman_lengths_from_frequency(&frequencies, 4);
582
583
        assert_eq!(expected, res.as_slice());
584
585
        let frequencies = [0, 25, 0, 10, 2, 4];
586
587
        let res = huffman_lengths_from_frequency(&frequencies, 4);
588
        assert_eq!(res[0], 0);
589
        assert_eq!(res[2], 0);
590
        assert!(res[1] < 4);
591
592
        // Only one value
593
        let frequencies = [0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0];
594
        let res = huffman_lengths_from_frequency(&frequencies, 5);
595
        let expected = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0];
596
        assert_eq!(expected, res.as_slice());
597
598
        // No values
599
        let frequencies = [0; 30];
600
        let res = huffman_lengths_from_frequency(&frequencies, 5);
601
        for (a, b) in frequencies.iter().zip(res.iter()) {
602
            assert_eq!(*a, (*b).into());
603
        }
604
        // assert_eq!(frequencies, res.as_slice());
605
606
        let mut frequencies = vec![3; NUM_LITERALS_AND_LENGTHS];
607
        frequencies[55] = u16::MAX / 3;
608
        frequencies[125] = u16::MAX / 3;
609
610
        let res = huffman_lengths_from_frequency(&frequencies, 15);
611
        assert_eq!(res.len(), NUM_LITERALS_AND_LENGTHS);
612
        assert!(res[55] < 3);
613
        assert!(res[125] < 3);
614
    }
615
616
    #[test]
617
    /// Test if the bit lengths for a set of frequencies are optimal (give the best compression
618
    /// give the provided frequencies).
619
    fn optimal_lengths() {
620
        let freqs = [
621
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
622
            0, 0, 0, 68, 0, 14, 0, 0, 0, 0, 3, 7, 6, 1, 0, 12, 14, 9, 2, 6, 9, 4, 1, 1, 4, 1, 1, 0,
623
            0, 1, 3, 0, 6, 0, 0, 0, 4, 4, 1, 2, 5, 3, 2, 2, 9, 0, 0, 3, 1, 5, 5, 8, 0, 6, 10, 5, 2,
624
            0, 0, 1, 2, 0, 8, 11, 4, 0, 1, 3, 31, 13, 23, 22, 56, 22, 8, 11, 43, 0, 7, 33, 15, 45,
625
            40, 16, 1, 28, 37, 35, 26, 3, 7, 11, 9, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
626
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
627
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630
            0, 0, 1, 126, 114, 66, 31, 41, 25, 15, 21, 20, 16, 15, 10, 7, 5, 1, 1,
631
        ];
632
633
        let lens = huffman_lengths_from_frequency(&freqs, 15);
634
635
        // Lengths produced by miniz for this frequency table for comparison
636
        // the number of total bits encoded with these huffman codes is 7701
637
        // NOTE: There can be more than one set of optimal lengths for a set of frequencies,
638
        // (though there may be a difference in how well the table itself can be represented)
639
        // so testing for a specific length table is not ideal since different algorithms
640
        // may produce different length tables.
641
        // let lens3 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
642
        // 0, 0, 0, 0, 0,
643
        // 0, 0, 0, 0, 0, 0, 4, 0, 7, 0, 0, 0, 0, 9, 8, 8, 10, 0, 7, 7, 7, 10, 8, 7, 8,
644
        // 10, 10, 8, 10, 10, 0, 0, 10, 9, 0, 8, 0, 0, 0, 8, 8, 10, 9, 8, 9, 9, 9, 7, 0,
645
        // 0, 9, 10, 8, 8, 7, 0, 8, 7, 8, 9, 0, 0, 10, 9, 0, 7, 7, 8, 0, 10, 9, 6, 7, 6,
646
        // 6, 5, 6, 7, 7, 5, 0, 8, 5, 7, 5, 5, 6, 10, 6, 5, 5, 6, 9, 8, 7, 7, 10, 10, 0,
647
        // 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648
        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649
        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650
        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651
        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
652
        // 0, 0, 10, 4, 4, 4, 5, 5, 6, 7, 6, 6, 6, 6, 7, 8, 8, 10, 10];
653
        //
654
655
        let num_bits = lens
656
            .iter()
657
            .zip(freqs.iter())
658
            .fold(0, |a, (&f, &l)| a + (f as u16 * l));
659
        assert_eq!(num_bits, 7701);
660
    }
661
}