Coverage Report

Created: 2026-05-16 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibCompress/Lzw.h
Line
Count
Source
1
/*
2
 * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
3
 * Copyright (c) 2022, the SerenityOS developers.
4
 *
5
 * SPDX-License-Identifier: BSD-2-Clause
6
 */
7
8
#pragma once
9
10
#include <AK/BitStream.h>
11
#include <AK/Concepts.h>
12
#include <AK/Debug.h>
13
#include <AK/Format.h>
14
#include <AK/IntegralMath.h>
15
#include <AK/MemoryStream.h>
16
#include <AK/Vector.h>
17
18
namespace Compress {
19
20
namespace Details {
21
22
class LzwState {
23
public:
24
    u16 add_control_code()
25
987k
    {
26
987k
        u16 const control_code = m_code_table.size();
27
987k
        m_code_table.append(Vector<u8> {});
28
987k
        m_original_code_table.append(Vector<u8> {});
29
987k
        if (m_code_table.size() >= m_table_capacity && m_code_size < max_code_size) {
30
981k
            ++m_code_size;
31
981k
            ++m_original_code_size;
32
981k
            m_table_capacity *= 2;
33
981k
        }
34
987k
        return control_code;
35
987k
    }
36
37
    void reset()
38
195k
    {
39
195k
        m_code_table.clear();
40
195k
        m_code_table.extend(m_original_code_table);
41
195k
        m_code_size = m_original_code_size;
42
195k
        m_table_capacity = AK::exp2<u32>(m_code_size);
43
195k
    }
44
45
protected:
46
    static constexpr int max_code_size = 12;
47
    static constexpr int max_table_size = 1 << max_code_size;
48
49
    LzwState(u8 min_code_size, i32 offset_for_size_change)
50
493k
        : m_code_size(min_code_size)
51
493k
        , m_original_code_size(min_code_size)
52
493k
        , m_table_capacity(AK::exp2<u32>(min_code_size))
53
493k
        , m_offset_for_size_change(offset_for_size_change)
54
493k
    {
55
493k
        init_code_table();
56
493k
    }
57
58
    void init_code_table()
59
493k
    {
60
493k
        m_code_table.ensure_capacity(m_table_capacity);
61
1.93M
        for (u16 i = 0; i < m_table_capacity; ++i) {
62
1.44M
            m_code_table.unchecked_append({ (u8)i });
63
1.44M
        }
64
493k
        m_original_code_table = m_code_table;
65
493k
    }
66
67
    void extend_code_table(Vector<u8> const& entry)
68
7.78M
    {
69
7.78M
        if (entry.size() > 1 && m_code_table.size() < max_table_size) {
70
1.97M
            m_code_table.append(entry);
71
1.97M
            if (m_code_table.size() >= (m_table_capacity + m_offset_for_size_change) && m_code_size < max_code_size) {
72
24.0k
                ++m_code_size;
73
24.0k
                m_table_capacity *= 2;
74
24.0k
            }
75
1.97M
        }
76
7.78M
    }
77
78
    Vector<Vector<u8>> m_code_table {};
79
    Vector<Vector<u8>> m_original_code_table {};
80
81
    u8 m_code_size { 0 };
82
    u8 m_original_code_size { 0 };
83
84
    u32 m_table_capacity { 0 };
85
    i32 m_offset_for_size_change {};
86
};
87
88
}
89
90
template<InputBitStream InputStream>
91
class LzwDecompressor : private Details::LzwState {
92
public:
93
    explicit LzwDecompressor(MaybeOwned<InputStream> lzw_stream, u8 min_code_size, i32 offset_for_size_change = 0)
94
493k
        : LzwState(min_code_size, offset_for_size_change)
95
493k
        , m_bit_stream(move(lzw_stream))
96
97
493k
    {
98
493k
    }
Compress::LzwDecompressor<AK::LittleEndianInputBitStream>::LzwDecompressor(AK::MaybeOwned<AK::LittleEndianInputBitStream>, unsigned char, int)
Line
Count
Source
94
490k
        : LzwState(min_code_size, offset_for_size_change)
95
490k
        , m_bit_stream(move(lzw_stream))
96
97
490k
    {
98
490k
    }
Compress::LzwDecompressor<AK::BigEndianInputBitStream>::LzwDecompressor(AK::MaybeOwned<AK::BigEndianInputBitStream>, unsigned char, int)
Line
Count
Source
94
2.97k
        : LzwState(min_code_size, offset_for_size_change)
95
2.97k
        , m_bit_stream(move(lzw_stream))
96
97
2.97k
    {
98
2.97k
    }
99
100
    static ErrorOr<ByteBuffer> decompress_all(ReadonlyBytes bytes, u8 initial_code_size, i32 offset_for_size_change = 0)
101
493k
    {
102
493k
        auto memory_stream = make<FixedMemoryStream>(bytes);
103
493k
        auto lzw_stream = make<InputStream>(MaybeOwned<Stream>(move(memory_stream)));
104
493k
        LzwDecompressor lzw_decompressor { MaybeOwned<InputStream> { move(lzw_stream) }, initial_code_size, offset_for_size_change };
105
106
493k
        ByteBuffer decompressed;
107
108
493k
        u16 const clear_code = lzw_decompressor.add_control_code();
109
493k
        u16 const end_of_data_code = lzw_decompressor.add_control_code();
110
111
8.47M
        while (true) {
112
8.47M
            auto const code = TRY(lzw_decompressor.next_code());
113
114
8.47M
            if (code == clear_code) {
115
195k
                lzw_decompressor.reset();
116
195k
                continue;
117
195k
            }
118
119
8.28M
            if (code == end_of_data_code)
120
493k
                break;
121
122
15.5M
            TRY(decompressed.try_append(lzw_decompressor.get_output()));
123
15.5M
        }
124
125
493k
        return decompressed;
126
493k
    }
Compress::LzwDecompressor<AK::LittleEndianInputBitStream>::decompress_all(AK::Span<unsigned char const>, unsigned char, int)
Line
Count
Source
101
490k
    {
102
490k
        auto memory_stream = make<FixedMemoryStream>(bytes);
103
490k
        auto lzw_stream = make<InputStream>(MaybeOwned<Stream>(move(memory_stream)));
104
490k
        LzwDecompressor lzw_decompressor { MaybeOwned<InputStream> { move(lzw_stream) }, initial_code_size, offset_for_size_change };
105
106
490k
        ByteBuffer decompressed;
107
108
490k
        u16 const clear_code = lzw_decompressor.add_control_code();
109
490k
        u16 const end_of_data_code = lzw_decompressor.add_control_code();
110
111
7.68M
        while (true) {
112
7.68M
            auto const code = TRY(lzw_decompressor.next_code());
113
114
7.68M
            if (code == clear_code) {
115
194k
                lzw_decompressor.reset();
116
194k
                continue;
117
194k
            }
118
119
7.48M
            if (code == end_of_data_code)
120
490k
                break;
121
122
13.9M
            TRY(decompressed.try_append(lzw_decompressor.get_output()));
123
13.9M
        }
124
125
490k
        return decompressed;
126
490k
    }
Compress::LzwDecompressor<AK::BigEndianInputBitStream>::decompress_all(AK::Span<unsigned char const>, unsigned char, int)
Line
Count
Source
101
2.97k
    {
102
2.97k
        auto memory_stream = make<FixedMemoryStream>(bytes);
103
2.97k
        auto lzw_stream = make<InputStream>(MaybeOwned<Stream>(move(memory_stream)));
104
2.97k
        LzwDecompressor lzw_decompressor { MaybeOwned<InputStream> { move(lzw_stream) }, initial_code_size, offset_for_size_change };
105
106
2.97k
        ByteBuffer decompressed;
107
108
2.97k
        u16 const clear_code = lzw_decompressor.add_control_code();
109
2.97k
        u16 const end_of_data_code = lzw_decompressor.add_control_code();
110
111
791k
        while (true) {
112
791k
            auto const code = TRY(lzw_decompressor.next_code());
113
114
791k
            if (code == clear_code) {
115
472
                lzw_decompressor.reset();
116
472
                continue;
117
472
            }
118
119
790k
            if (code == end_of_data_code)
120
2.96k
                break;
121
122
1.57M
            TRY(decompressed.try_append(lzw_decompressor.get_output()));
123
1.57M
        }
124
125
2.96k
        return decompressed;
126
2.97k
    }
127
128
    void reset()
129
195k
    {
130
195k
        LzwState::reset();
131
195k
        m_output.clear();
132
195k
    }
Compress::LzwDecompressor<AK::LittleEndianInputBitStream>::reset()
Line
Count
Source
129
194k
    {
130
194k
        LzwState::reset();
131
194k
        m_output.clear();
132
194k
    }
Compress::LzwDecompressor<AK::BigEndianInputBitStream>::reset()
Line
Count
Source
129
472
    {
130
472
        LzwState::reset();
131
472
        m_output.clear();
132
472
    }
133
134
    ErrorOr<u16> next_code()
135
8.47M
    {
136
8.47M
        m_current_code = TRY(m_bit_stream->template read_bits<u16>(m_code_size));
137
138
8.47M
        if (m_current_code > m_code_table.size()) {
139
97
            dbgln_if(LZW_DEBUG, "Corrupted LZW stream, invalid code: {}, code table size: {}",
140
97
                m_current_code,
141
97
                m_code_table.size());
142
97
            return Error::from_string_literal("Corrupted LZW stream, invalid code");
143
8.47M
        } else if (m_current_code == m_code_table.size() && m_output.is_empty()) {
144
10
            dbgln_if(LZW_DEBUG, "Corrupted LZW stream, valid new code but output buffer is empty: {}, code table size: {}",
145
10
                m_current_code,
146
10
                m_code_table.size());
147
10
            return Error::from_string_literal("Corrupted LZW stream, valid new code but output buffer is empty");
148
10
        }
149
150
8.47M
        return m_current_code;
151
8.47M
    }
Compress::LzwDecompressor<AK::LittleEndianInputBitStream>::next_code()
Line
Count
Source
135
7.68M
    {
136
7.68M
        m_current_code = TRY(m_bit_stream->template read_bits<u16>(m_code_size));
137
138
7.68M
        if (m_current_code > m_code_table.size()) {
139
88
            dbgln_if(LZW_DEBUG, "Corrupted LZW stream, invalid code: {}, code table size: {}",
140
88
                m_current_code,
141
88
                m_code_table.size());
142
88
            return Error::from_string_literal("Corrupted LZW stream, invalid code");
143
7.68M
        } else if (m_current_code == m_code_table.size() && m_output.is_empty()) {
144
10
            dbgln_if(LZW_DEBUG, "Corrupted LZW stream, valid new code but output buffer is empty: {}, code table size: {}",
145
10
                m_current_code,
146
10
                m_code_table.size());
147
10
            return Error::from_string_literal("Corrupted LZW stream, valid new code but output buffer is empty");
148
10
        }
149
150
7.68M
        return m_current_code;
151
7.68M
    }
Compress::LzwDecompressor<AK::BigEndianInputBitStream>::next_code()
Line
Count
Source
135
791k
    {
136
791k
        m_current_code = TRY(m_bit_stream->template read_bits<u16>(m_code_size));
137
138
791k
        if (m_current_code > m_code_table.size()) {
139
9
            dbgln_if(LZW_DEBUG, "Corrupted LZW stream, invalid code: {}, code table size: {}",
140
9
                m_current_code,
141
9
                m_code_table.size());
142
9
            return Error::from_string_literal("Corrupted LZW stream, invalid code");
143
791k
        } else if (m_current_code == m_code_table.size() && m_output.is_empty()) {
144
0
            dbgln_if(LZW_DEBUG, "Corrupted LZW stream, valid new code but output buffer is empty: {}, code table size: {}",
145
0
                m_current_code,
146
0
                m_code_table.size());
147
0
            return Error::from_string_literal("Corrupted LZW stream, valid new code but output buffer is empty");
148
0
        }
149
150
791k
        return m_current_code;
151
791k
    }
152
153
    Vector<u8>& get_output()
154
7.78M
    {
155
7.78M
        VERIFY(m_current_code <= m_code_table.size());
156
7.78M
        if (m_current_code < m_code_table.size()) {
157
7.69M
            Vector<u8> new_entry = m_output;
158
7.69M
            m_output = m_code_table.at(m_current_code);
159
7.69M
            new_entry.append(m_output[0]);
160
7.69M
            extend_code_table(new_entry);
161
7.69M
        } else if (m_current_code == m_code_table.size()) {
162
89.5k
            VERIFY(!m_output.is_empty());
163
89.5k
            m_output.append(m_output[0]);
164
89.5k
            extend_code_table(m_output);
165
89.5k
        }
166
7.78M
        return m_output;
167
7.78M
    }
Compress::LzwDecompressor<AK::LittleEndianInputBitStream>::get_output()
Line
Count
Source
154
6.99M
    {
155
6.99M
        VERIFY(m_current_code <= m_code_table.size());
156
6.99M
        if (m_current_code < m_code_table.size()) {
157
6.92M
            Vector<u8> new_entry = m_output;
158
6.92M
            m_output = m_code_table.at(m_current_code);
159
6.92M
            new_entry.append(m_output[0]);
160
6.92M
            extend_code_table(new_entry);
161
6.92M
        } else if (m_current_code == m_code_table.size()) {
162
70.1k
            VERIFY(!m_output.is_empty());
163
70.1k
            m_output.append(m_output[0]);
164
70.1k
            extend_code_table(m_output);
165
70.1k
        }
166
6.99M
        return m_output;
167
6.99M
    }
Compress::LzwDecompressor<AK::BigEndianInputBitStream>::get_output()
Line
Count
Source
154
787k
    {
155
787k
        VERIFY(m_current_code <= m_code_table.size());
156
787k
        if (m_current_code < m_code_table.size()) {
157
768k
            Vector<u8> new_entry = m_output;
158
768k
            m_output = m_code_table.at(m_current_code);
159
768k
            new_entry.append(m_output[0]);
160
768k
            extend_code_table(new_entry);
161
768k
        } else if (m_current_code == m_code_table.size()) {
162
19.4k
            VERIFY(!m_output.is_empty());
163
19.4k
            m_output.append(m_output[0]);
164
19.4k
            extend_code_table(m_output);
165
19.4k
        }
166
787k
        return m_output;
167
787k
    }
168
169
private:
170
    MaybeOwned<InputStream> m_bit_stream;
171
172
    u16 m_current_code { 0 };
173
    Vector<u8> m_output {};
174
};
175
176
class LzwCompressor : private Details::LzwState {
177
public:
178
    static ErrorOr<ByteBuffer> compress_all(ReadonlyBytes bytes, u8 initial_code_size)
179
0
    {
180
0
        LzwCompressor compressor { initial_code_size };
181
0
        AllocatingMemoryStream buffer;
182
0
        LittleEndianOutputBitStream output_stream { MaybeOwned<Stream>(buffer) };
183
0
184
0
        u16 const clear_code = compressor.add_control_code();
185
0
        u16 const end_of_data_code = compressor.add_control_code();
186
0
187
0
        TRY(output_stream.write_bits(clear_code, compressor.m_code_size));
188
0
189
0
        u32 last_offset = 0;
190
0
191
0
        while (last_offset < bytes.size()) {
192
0
            ReadonlyBytes current_symbol {};
193
0
            u16 current_code {};
194
0
195
0
            if (compressor.m_code_table.size() == max_table_size - 2) {
196
0
                TRY(output_stream.write_bits(clear_code, compressor.m_code_size));
197
0
                compressor.reset();
198
0
            }
199
0
200
0
            bool found_symbol = false;
201
0
202
0
            for (u32 symbol_size = 1; last_offset + symbol_size <= bytes.size(); ++symbol_size) {
203
0
                current_symbol = bytes.slice(last_offset, symbol_size);
204
0
                auto const new_code = compressor.code_for_symbol(current_symbol);
205
0
206
0
                if (new_code.has_value()) {
207
0
                    current_code = *new_code;
208
0
                } else {
209
0
                    found_symbol = true;
210
0
                    break;
211
0
                }
212
0
            }
213
0
214
0
            TRY(output_stream.write_bits(current_code, compressor.m_code_size));
215
0
216
0
            if (found_symbol) {
217
0
                compressor.extend_code_table(Vector(current_symbol));
218
0
                current_symbol = current_symbol.trim(current_symbol.size() - 1);
219
0
            }
220
0
            last_offset += current_symbol.size();
221
0
        }
222
0
223
0
        TRY(output_stream.write_bits(end_of_data_code, compressor.m_code_size));
224
0
        TRY(output_stream.align_to_byte_boundary());
225
0
        TRY(output_stream.flush_buffer_to_stream());
226
0
227
0
        return TRY(buffer.read_until_eof());
228
0
    }
229
230
private:
231
    LzwCompressor(u8 initial_code_size)
232
        : Details::LzwState(initial_code_size, 1)
233
0
    {
234
0
    }
235
236
    Optional<u16> code_for_symbol(ReadonlyBytes bytes)
237
0
    {
238
0
        for (u16 i = 0; i < m_code_table.size(); ++i) {
239
0
            if (m_code_table[i].span() == bytes)
240
0
                return i;
241
0
        }
242
0
243
0
        return OptionalNone {};
244
0
    }
245
};
246
247
}