/src/serenity/Userland/Libraries/LibCompress/Huffman.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2024, the SerenityOS developers. |
3 | | * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org> |
4 | | * |
5 | | * SPDX-License-Identifier: BSD-2-Clause |
6 | | */ |
7 | | |
8 | | #pragma once |
9 | | |
10 | | #include <AK/Array.h> |
11 | | #include <AK/BinaryHeap.h> |
12 | | |
13 | | namespace Compress { |
14 | | |
15 | | inline void generate_huffman_lengths(Bytes lengths, ReadonlySpan<u16> frequencies, size_t max_bit_length, u16 shift = 0) |
16 | 55.8k | { |
17 | 55.8k | VERIFY(lengths.size() == frequencies.size()); |
18 | 55.8k | auto const size = lengths.size(); |
19 | 55.8k | VERIFY((1u << max_bit_length) >= size); |
20 | 55.8k | u16 heap_keys[size]; // Used for O(n) heap construction |
21 | 55.8k | u16 heap_values[size]; |
22 | | |
23 | 55.8k | u16 huffman_links[size * 2]; |
24 | 55.8k | size_t non_zero_freqs = 0; |
25 | 6.04M | for (auto frequency : frequencies) { |
26 | 6.04M | if (frequency == 0) |
27 | 3.18M | continue; |
28 | | |
29 | 2.85M | frequency = max(1, frequency >> shift); |
30 | 2.85M | heap_keys[non_zero_freqs] = frequency; // sort symbols by frequency |
31 | 2.85M | heap_values[non_zero_freqs] = size + non_zero_freqs; // huffman_links "links" |
32 | 2.85M | non_zero_freqs++; |
33 | 2.85M | } |
34 | | |
35 | | // special case for only 1 used symbol |
36 | 55.8k | if (non_zero_freqs < 2) { |
37 | 295k | for (size_t i = 0; i < size; i++) |
38 | 286k | lengths[i] = (frequencies[i] == 0) ? 0 : 1; |
39 | 8.96k | return; |
40 | 8.96k | } |
41 | | |
42 | 46.9k | BinaryHeap<u16, u16, 288> heap { heap_keys, heap_values, non_zero_freqs }; |
43 | | |
44 | | // build the huffman tree - binary heap is used for efficient frequency comparisons |
45 | 2.85M | while (heap.size() > 1) { |
46 | 2.80M | u16 lowest_frequency = heap.peek_min_key(); |
47 | 2.80M | u16 lowest_link = heap.pop_min(); |
48 | 2.80M | u16 second_lowest_frequency = heap.peek_min_key(); |
49 | 2.80M | u16 second_lowest_link = heap.pop_min(); |
50 | | |
51 | 2.80M | u16 new_link = heap.size() + 1; |
52 | | |
53 | 2.80M | u32 sum = lowest_frequency + second_lowest_frequency; |
54 | 2.80M | sum = min(sum, UINT16_MAX); |
55 | 2.80M | heap.insert(sum, new_link); |
56 | | |
57 | 2.80M | huffman_links[lowest_link] = new_link; |
58 | 2.80M | huffman_links[second_lowest_link] = new_link; |
59 | 2.80M | } |
60 | | |
61 | 46.9k | non_zero_freqs = 0; |
62 | 5.72M | for (size_t i = 0; i < size; i++) { |
63 | 5.68M | if (frequencies[i] == 0) { |
64 | 2.87M | lengths[i] = 0; |
65 | 2.87M | continue; |
66 | 2.87M | } |
67 | | |
68 | 2.80M | u16 link = huffman_links[size + non_zero_freqs]; |
69 | 2.80M | non_zero_freqs++; |
70 | | |
71 | 2.80M | size_t bit_length = 1; |
72 | 22.1M | while (link != 1) { |
73 | 19.3M | bit_length++; |
74 | 19.3M | link = huffman_links[link]; |
75 | 19.3M | } |
76 | | |
77 | 2.80M | if (bit_length > max_bit_length) { |
78 | 3.59k | VERIFY(shift < 15); |
79 | 3.59k | return generate_huffman_lengths(lengths, frequencies, max_bit_length, shift + 1); |
80 | 3.59k | } |
81 | | |
82 | 2.80M | lengths[i] = bit_length; |
83 | 2.80M | } |
84 | 46.9k | } |
85 | | |
86 | | } |