Coverage Report

Created: 2026-06-07 07:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibCompress/Deflate.h
Line
Count
Source
1
/*
2
 * Copyright (c) 2020, the SerenityOS developers.
3
 * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
4
 *
5
 * SPDX-License-Identifier: BSD-2-Clause
6
 */
7
8
#pragma once
9
10
#include <AK/BitStream.h>
11
#include <AK/ByteBuffer.h>
12
#include <AK/CircularBuffer.h>
13
#include <AK/Endian.h>
14
#include <AK/Forward.h>
15
#include <AK/MaybeOwned.h>
16
#include <AK/Stream.h>
17
#include <AK/Vector.h>
18
#include <LibCompress/DeflateTables.h>
19
20
namespace Compress {
21
22
class CanonicalCode {
23
public:
24
175k
    CanonicalCode() = default;
25
    ErrorOr<u32> read_symbol(LittleEndianInputBitStream&) const;
26
    ErrorOr<void> write_symbol(LittleEndianOutputBitStream&, u32) const;
27
28
    static CanonicalCode const& fixed_literal_codes();
29
    static CanonicalCode const& fixed_distance_codes();
30
31
    static ErrorOr<CanonicalCode> from_bytes(ReadonlyBytes);
32
33
private:
34
    static constexpr size_t max_allowed_prefixed_code_length = 8;
35
36
    struct PrefixTableEntry {
37
        u16 symbol_value { 0 };
38
        u16 code_length { 0 };
39
    };
40
41
    // Decompression - indexed by code
42
    Vector<u16, 286> m_symbol_values;
43
44
    Vector<u32, 16> m_first_symbol_of_length_after;
45
    Vector<u16, 16> m_offset_to_first_symbol_index;
46
47
    Array<PrefixTableEntry, 1 << max_allowed_prefixed_code_length> m_prefix_table {};
48
    size_t m_max_prefixed_code_length { 0 };
49
50
    // Compression - indexed by symbol
51
    // Deflate uses a maximum of 288 symbols (maximum of 32 for distances),
52
    // but this is also used by webp, which can use up to 256 + 24 + (1 << 11) == 2328 symbols.
53
    Vector<u16, 288> m_bit_codes {};
54
    Vector<u16, 288> m_bit_code_lengths {};
55
};
56
57
ALWAYS_INLINE ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& stream, u32 symbol) const
58
98.1M
{
59
98.1M
    auto code = m_bit_codes[symbol];
60
98.1M
    auto length = m_bit_code_lengths[symbol];
61
98.1M
    TRY(stream.write_bits(code, length));
62
98.1M
    return {};
63
98.1M
}
64
65
class DeflateDecompressor final : public Stream {
66
private:
67
    class CompressedBlock {
68
    public:
69
        CompressedBlock(DeflateDecompressor&, CanonicalCode literal_codes, Optional<CanonicalCode> distance_codes);
70
71
        ErrorOr<bool> try_read_more();
72
73
    private:
74
        bool m_eof { false };
75
76
        DeflateDecompressor& m_decompressor;
77
        CanonicalCode m_literal_codes;
78
        Optional<CanonicalCode> m_distance_codes;
79
    };
80
81
    class UncompressedBlock {
82
    public:
83
        UncompressedBlock(DeflateDecompressor&, size_t);
84
85
        ErrorOr<bool> try_read_more();
86
87
    private:
88
        DeflateDecompressor& m_decompressor;
89
        size_t m_bytes_remaining;
90
    };
91
92
    enum class State {
93
        Idle,
94
        ReadingCompressedBlock,
95
        ReadingUncompressedBlock
96
    };
97
98
public:
99
    friend CompressedBlock;
100
    friend UncompressedBlock;
101
102
    static ErrorOr<NonnullOwnPtr<DeflateDecompressor>> construct(MaybeOwned<LittleEndianInputBitStream> stream);
103
    ~DeflateDecompressor();
104
105
    virtual ErrorOr<Bytes> read_some(Bytes) override;
106
    virtual ErrorOr<size_t> write_some(ReadonlyBytes) override;
107
    virtual bool is_eof() const override;
108
    virtual bool is_open() const override;
109
    virtual void close() override;
110
111
    static ErrorOr<ByteBuffer> decompress_all(ReadonlyBytes);
112
113
private:
114
    DeflateDecompressor(MaybeOwned<LittleEndianInputBitStream> stream, CircularBuffer buffer);
115
116
    ErrorOr<u32> decode_length(u32);
117
    ErrorOr<u32> decode_distance(u32);
118
    ErrorOr<void> decode_codes(CanonicalCode& literal_code, Optional<CanonicalCode>& distance_code);
119
120
    static constexpr u16 max_back_reference_length = 258;
121
122
    bool m_read_final_block { false };
123
124
    State m_state { State::Idle };
125
    union {
126
        CompressedBlock m_compressed_block;
127
        UncompressedBlock m_uncompressed_block;
128
    };
129
130
    MaybeOwned<LittleEndianInputBitStream> m_input_stream;
131
    CircularBuffer m_output_buffer;
132
};
133
134
class DeflateCompressor final : public Stream {
135
public:
136
    static constexpr size_t block_size = 32 * KiB - 1; // TODO: this can theoretically be increased to 64 KiB - 2
137
    static constexpr size_t window_size = block_size * 2;
138
    static constexpr size_t hash_bits = 15;
139
    static constexpr size_t max_huffman_literals = 288;
140
    static constexpr size_t max_huffman_distances = 32;
141
    static constexpr size_t min_match_length = 4;   // matches smaller than these are not worth the size of the back reference
142
    static constexpr size_t max_match_length = 258; // matches longer than these cannot be encoded using huffman codes
143
    static constexpr u16 empty_slot = UINT16_MAX;
144
145
    struct CompressionConstants {
146
        size_t good_match_length;  // Once we find a match of at least this length (a good enough match) we reduce max_chain to lower processing time
147
        size_t max_lazy_length;    // If the match is at least this long we dont defer matching to the next byte (which takes time) as its good enough
148
        size_t great_match_length; // Once we find a match of at least this length (a great match) we can just stop searching for longer ones
149
        size_t max_chain;          // We only check the actual length of the max_chain closest matches
150
    };
151
152
    // These constants were shamelessly "borrowed" from zlib
153
    static constexpr CompressionConstants compression_constants[] = {
154
        { 0, 0, 0, 0 },
155
        { 4, 4, 8, 4 },
156
        { 8, 16, 128, 128 },
157
        { 32, 258, 258, 4096 },
158
        { max_match_length, max_match_length, max_match_length, 1 << hash_bits } // disable all limits
159
    };
160
161
    enum class CompressionLevel : int {
162
        STORE = 0,
163
        FAST,
164
        GOOD,
165
        GREAT,
166
        BEST // WARNING: this one can take an unreasonable amount of time!
167
    };
168
169
    static ErrorOr<NonnullOwnPtr<DeflateCompressor>> construct(MaybeOwned<Stream>, CompressionLevel = CompressionLevel::GOOD);
170
    ~DeflateCompressor();
171
172
    virtual ErrorOr<Bytes> read_some(Bytes) override;
173
    virtual ErrorOr<size_t> write_some(ReadonlyBytes) override;
174
    virtual bool is_eof() const override;
175
    virtual bool is_open() const override;
176
    virtual void close() override;
177
    ErrorOr<void> final_flush();
178
179
    static ErrorOr<ByteBuffer> compress_all(ReadonlyBytes bytes, CompressionLevel = CompressionLevel::GOOD);
180
181
private:
182
    DeflateCompressor(NonnullOwnPtr<LittleEndianOutputBitStream>, CompressionLevel = CompressionLevel::GOOD);
183
184
39.2k
    Bytes pending_block() { return { m_rolling_window + block_size, block_size }; }
185
186
    // LZ77 Compression
187
    static u16 hash_sequence(u8 const* bytes);
188
    size_t compare_match_candidate(size_t start, size_t candidate, size_t prev_match_length, size_t max_match_length);
189
    size_t find_back_match(size_t start, u16 hash, size_t previous_match_length, size_t max_match_length, size_t& match_position);
190
    void lz77_compress_block();
191
192
    // Huffman Coding
193
    struct code_length_symbol {
194
        u8 symbol;
195
        u8 count; // used for special symbols 16-18
196
    };
197
    static u8 distance_to_base(u16 distance);
198
    size_t huffman_block_length(Array<u8, max_huffman_literals> const& literal_bit_lengths, Array<u8, max_huffman_distances> const& distance_bit_lengths);
199
    ErrorOr<void> write_huffman(CanonicalCode const& literal_code, Optional<CanonicalCode> const& distance_code);
200
    static size_t encode_huffman_lengths(ReadonlyBytes lengths, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths);
201
    size_t encode_block_lengths(Array<u8, max_huffman_literals> const& literal_bit_lengths, Array<u8, max_huffman_distances> const& distance_bit_lengths, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t& literal_code_count, size_t& distance_code_count);
202
    ErrorOr<void> write_dynamic_huffman(CanonicalCode const& literal_code, size_t literal_code_count, Optional<CanonicalCode> const& distance_code, size_t distance_code_count, Array<u8, 19> const& code_lengths_bit_lengths, size_t code_length_count, Array<code_length_symbol, max_huffman_literals + max_huffman_distances> const& encoded_lengths, size_t encoded_lengths_count);
203
204
    size_t uncompressed_block_length();
205
    size_t fixed_block_length();
206
    size_t dynamic_block_length(Array<u8, max_huffman_literals> const& literal_bit_lengths, Array<u8, max_huffman_distances> const& distance_bit_lengths, Array<u8, 19> const& code_lengths_bit_lengths, Array<u16, 19> const& code_lengths_frequencies, size_t code_lengths_count);
207
    ErrorOr<void> flush();
208
209
    bool m_finished { false };
210
    CompressionLevel m_compression_level;
211
    CompressionConstants m_compression_constants;
212
    NonnullOwnPtr<LittleEndianOutputBitStream> m_output_stream;
213
214
    u8 m_rolling_window[window_size];
215
    size_t m_pending_block_size { 0 };
216
217
    struct [[gnu::packed]] {
218
        u16 distance; // back reference length
219
        union {
220
            u16 literal; // literal byte or on of block symbol
221
            u16 length;  // back reference length (if distance != 0)
222
        };
223
    } m_symbol_buffer[block_size + 1];
224
    size_t m_pending_symbol_size { 0 };
225
    Array<u16, max_huffman_literals> m_symbol_frequencies;    // there are 286 valid symbol values (symbols 286-287 never occur)
226
    Array<u16, max_huffman_distances> m_distance_frequencies; // there are 30 valid distance values (distances 30-31 never occur)
227
228
    // LZ77 Chained hash table
229
    u16 m_hash_head[1 << hash_bits];
230
    u16 m_hash_prev[window_size];
231
};
232
233
}