Coverage Report

Created: 2025-12-14 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/lz4_flex/src/frame/header.rs
Line
Count
Source
1
use twox_hash::XxHash32;
2
3
use super::Error;
4
use std::{
5
    fmt::Debug,
6
    hash::Hasher,
7
    io,
8
    io::{Read, Write},
9
};
10
11
const FLG_RESERVED_MASK: u8 = 0b00000010;
12
const FLG_VERSION_MASK: u8 = 0b11000000;
13
const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000;
14
15
const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000;
16
const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000;
17
const FLG_CONTENT_SIZE: u8 = 0b00001000;
18
const FLG_CONTENT_CHECKSUM: u8 = 0b00000100;
19
const FLG_DICTIONARY_ID: u8 = 0b00000001;
20
21
const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK;
22
const BD_BLOCK_SIZE_MASK: u8 = 0b01110000;
23
const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4;
24
25
const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000;
26
27
const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204;
28
pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102;
29
const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive<u32> = 0x184D2A50..=0x184D2A5F;
30
31
pub(crate) const MAGIC_NUMBER_SIZE: usize = 4;
32
pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7;
33
pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19;
34
pub(crate) const BLOCK_INFO_SIZE: usize = 4;
35
36
#[derive(Clone, Copy, PartialEq, Debug)]
37
/// Different predefines blocksizes to choose when compressing data.
38
#[derive(Default)]
39
pub enum BlockSize {
40
    /// Will detect optimal frame size based on the size of the first write call
41
    #[default]
42
    Auto = 0,
43
    /// The default block size.
44
    Max64KB = 4,
45
    /// 256KB block size.
46
    Max256KB = 5,
47
    /// 1MB block size.
48
    Max1MB = 6,
49
    /// 4MB block size.
50
    Max4MB = 7,
51
    /// 8MB block size.
52
    Max8MB = 8,
53
}
54
55
impl BlockSize {
56
    /// Try to find optimal size based on passed buffer length.
57
0
    pub(crate) fn from_buf_length(buf_len: usize) -> Self {
58
0
        let mut blocksize = BlockSize::Max4MB;
59
60
0
        for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] {
61
0
            if buf_len > candidate.get_size() {
62
0
                return blocksize;
63
0
            }
64
0
            blocksize = candidate;
65
        }
66
0
        BlockSize::Max64KB
67
0
    }
68
668M
    pub(crate) fn get_size(&self) -> usize {
69
668M
        match self {
70
0
            BlockSize::Auto => unreachable!(),
71
167M
            BlockSize::Max64KB => 64 * 1024,
72
167M
            BlockSize::Max256KB => 256 * 1024,
73
167M
            BlockSize::Max1MB => 1024 * 1024,
74
167M
            BlockSize::Max4MB => 4 * 1024 * 1024,
75
3.23k
            BlockSize::Max8MB => 8 * 1024 * 1024,
76
        }
77
668M
    }
78
}
79
80
#[derive(Clone, Copy, PartialEq, Debug)]
81
/// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo]
82
#[derive(Default)]
83
pub enum BlockMode {
84
    /// Every block is compressed independently. The default.
85
    #[default]
86
    Independent,
87
    /// Blocks can reference data from previous blocks.
88
    ///
89
    /// Effective when the stream contains small blocks.
90
    Linked,
91
}
92
93
// From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
94
//
95
// General Structure of LZ4 Frame format
96
// -------------------------------------
97
//
98
// | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum |
99
// |:-------:|:-------------:| ----- | ----- | ------- | ----------- |
100
// | 4 bytes |  3-15 bytes   |       |       | 4 bytes | 0-4 bytes   |
101
//
102
// Frame Descriptor
103
// ----------------
104
//
105
// | FLG     | BD      | (Content Size) | (Dictionary ID) | HC      |
106
// | ------- | ------- |:--------------:|:---------------:| ------- |
107
// | 1 byte  | 1 byte  |  0 - 8 bytes   |   0 - 4 bytes   | 1 byte  |
108
//
109
// __FLG byte__
110
//
111
// |  BitNb  |  7-6  |   5   |    4     |  3   |    2     |    1     |   0  |
112
// | ------- |-------|-------|----------|------|----------|----------|------|
113
// |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID|
114
//
115
// __BD byte__
116
//
117
// |  BitNb  |     7    |     6-5-4     |  3-2-1-0 |
118
// | ------- | -------- | ------------- | -------- |
119
// |FieldName|*Reserved*| Block MaxSize |*Reserved*|
120
//
121
// Data Blocks
122
// -----------
123
//
124
// | Block Size |  data  | (Block Checksum) |
125
// |:----------:| ------ |:----------------:|
126
// |  4 bytes   |        |   0 - 4 bytes    |
127
//
128
#[derive(Debug, Default, Clone)]
129
/// The metadata for de/compressing with lz4 frame format.
130
pub struct FrameInfo {
131
    /// If set, includes the total uncompressed size of data in the frame.
132
    pub content_size: Option<u64>,
133
    /// The identifier for the dictionary that must be used to correctly decode data.
134
    /// The compressor and the decompressor must use exactly the same dictionary.
135
    ///
136
    /// Note that this is currently unsupported and for this reason it's not pub.
137
    pub(crate) dict_id: Option<u32>,
138
    /// The maximum uncompressed size of each data block.
139
    pub block_size: BlockSize,
140
    /// The block mode.
141
    pub block_mode: BlockMode,
142
    /// If set, includes a checksum for each data block in the frame.
143
    pub block_checksums: bool,
144
    /// If set, includes a content checksum to verify that the full frame contents have been
145
    /// decoded correctly.
146
    pub content_checksum: bool,
147
    /// If set, use the legacy frame format
148
    pub legacy_frame: bool,
149
}
150
151
impl FrameInfo {
152
    /// Create a new `FrameInfo`.
153
0
    pub fn new() -> Self {
154
0
        Self::default()
155
0
    }
156
157
    /// Whether to include the total uncompressed size of data in the frame.
158
0
    pub fn content_size(mut self, content_size: Option<u64>) -> Self {
159
0
        self.content_size = content_size;
160
0
        self
161
0
    }
162
163
    /// The maximum uncompressed size of each data block.
164
0
    pub fn block_size(mut self, block_size: BlockSize) -> Self {
165
0
        self.block_size = block_size;
166
0
        self
167
0
    }
168
169
    /// The block mode.
170
0
    pub fn block_mode(mut self, block_mode: BlockMode) -> Self {
171
0
        self.block_mode = block_mode;
172
0
        self
173
0
    }
174
175
    /// If set, includes a checksum for each data block in the frame.
176
0
    pub fn block_checksums(mut self, block_checksums: bool) -> Self {
177
0
        self.block_checksums = block_checksums;
178
0
        self
179
0
    }
180
181
    /// If set, includes a content checksum to verify that the full frame contents have been
182
    /// decoded correctly.
183
0
    pub fn content_checksum(mut self, content_checksum: bool) -> Self {
184
0
        self.content_checksum = content_checksum;
185
0
        self
186
0
    }
187
188
    /// If set, use the legacy frame format.
189
0
    pub fn legacy_frame(mut self, legacy_frame: bool) -> Self {
190
0
        self.legacy_frame = legacy_frame;
191
0
        self
192
0
    }
193
194
38.5k
    pub(crate) fn read_size(input: &[u8]) -> Result<usize, Error> {
195
38.5k
        let mut required = MIN_FRAME_INFO_SIZE;
196
38.5k
        let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap());
197
38.5k
        if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
198
308
            return Ok(MAGIC_NUMBER_SIZE);
199
38.2k
        }
200
201
38.2k
        if input.len() < required {
202
0
            return Ok(required);
203
38.2k
        }
204
205
38.2k
        if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
206
16
            return Ok(8);
207
38.2k
        }
208
38.2k
        if magic_num != LZ4F_MAGIC_NUMBER {
209
4.41k
            return Err(Error::WrongMagicNumber);
210
33.8k
        }
211
212
33.8k
        if input[4] & FLG_CONTENT_SIZE != 0 {
213
1.27k
            required += 8;
214
32.5k
        }
215
33.8k
        if input[4] & FLG_DICTIONARY_ID != 0 {
216
1.20k
            required += 4
217
32.6k
        }
218
33.8k
        Ok(required)
219
38.5k
    }
220
221
16.9k
    pub(crate) fn write_size(&self) -> usize {
222
16.9k
        let mut required = MIN_FRAME_INFO_SIZE;
223
16.9k
        if self.content_size.is_some() {
224
0
            required += 8;
225
16.9k
        }
226
16.9k
        if self.dict_id.is_some() {
227
0
            required += 4;
228
16.9k
        }
229
16.9k
        required
230
16.9k
    }
231
232
16.9k
    pub(crate) fn write(&self, output: &mut [u8]) -> Result<usize, Error> {
233
16.9k
        let write_size = self.write_size();
234
16.9k
        if output.len() < write_size {
235
0
            return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into()));
236
16.9k
        }
237
16.9k
        let mut buffer = [0u8; MAX_FRAME_INFO_SIZE];
238
16.9k
        assert!(write_size <= buffer.len());
239
16.9k
        buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes());
240
16.9k
        buffer[4] = FLG_SUPPORTED_VERSION_BITS;
241
16.9k
        if self.block_checksums {
242
8.49k
            buffer[4] |= FLG_BLOCK_CHECKSUMS;
243
8.49k
        }
244
16.9k
        if self.content_checksum {
245
8.49k
            buffer[4] |= FLG_CONTENT_CHECKSUM;
246
8.49k
        }
247
16.9k
        if self.block_mode == BlockMode::Independent {
248
8.49k
            buffer[4] |= FLG_INDEPENDENT_BLOCKS;
249
8.49k
        }
250
16.9k
        buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT;
251
252
        // Optional section
253
16.9k
        let mut offset = 6;
254
16.9k
        if let Some(size) = self.content_size {
255
0
            buffer[4] |= FLG_CONTENT_SIZE;
256
0
            buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes());
257
0
            offset += 8;
258
16.9k
        }
259
16.9k
        if let Some(dict_id) = self.dict_id {
260
0
            buffer[4] |= FLG_DICTIONARY_ID;
261
0
            buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes());
262
0
            offset += 4;
263
16.9k
        }
264
265
        // Header checksum
266
16.9k
        let mut hasher = XxHash32::with_seed(0);
267
16.9k
        hasher.write(&buffer[4..offset]);
268
16.9k
        let header_checksum = (hasher.finish() >> 8) as u8;
269
16.9k
        buffer[offset] = header_checksum;
270
16.9k
        offset += 1;
271
272
16.9k
        debug_assert_eq!(offset, write_size);
273
16.9k
        output[..write_size].copy_from_slice(&buffer[..write_size]);
274
16.9k
        Ok(write_size)
275
16.9k
    }
276
277
33.8k
    pub(crate) fn read(mut input: &[u8]) -> Result<FrameInfo, Error> {
278
33.8k
        let original_input = input;
279
        // 4 byte Magic
280
33.8k
        let magic_num = {
281
33.8k
            let mut buffer = [0u8; 4];
282
33.8k
            input.read_exact(&mut buffer)?;
283
33.8k
            u32::from_le_bytes(buffer)
284
        };
285
33.8k
        if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
286
308
            return Ok(FrameInfo {
287
308
                block_size: BlockSize::Max8MB,
288
308
                legacy_frame: true,
289
308
                ..FrameInfo::default()
290
308
            });
291
33.5k
        }
292
33.5k
        if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
293
14
            let mut buffer = [0u8; 4];
294
14
            input.read_exact(&mut buffer)?;
295
14
            let user_data_len = u32::from_le_bytes(buffer);
296
14
            return Err(Error::SkippableFrame(user_data_len));
297
33.5k
        }
298
33.5k
        if magic_num != LZ4F_MAGIC_NUMBER {
299
0
            return Err(Error::WrongMagicNumber);
300
33.5k
        }
301
302
        // fixed size section
303
33.5k
        let [flg_byte, bd_byte] = {
304
33.5k
            let mut buffer = [0u8, 0];
305
33.5k
            input.read_exact(&mut buffer)?;
306
33.5k
            buffer
307
        };
308
309
33.5k
        if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS {
310
            // version is always 01
311
1.90k
            return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK));
312
31.6k
        }
313
314
31.6k
        if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 {
315
60
            return Err(Error::ReservedBitsSet);
316
31.5k
        }
317
318
31.5k
        let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 {
319
15.4k
            BlockMode::Independent
320
        } else {
321
16.0k
            BlockMode::Linked
322
        };
323
31.5k
        let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0;
324
31.5k
        let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0;
325
326
31.5k
        let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT {
327
31.5k
            i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)),
328
17.2k
            4 => BlockSize::Max64KB,
329
5.11k
            5 => BlockSize::Max256KB,
330
4.55k
            6 => BlockSize::Max1MB,
331
4.57k
            7 => BlockSize::Max4MB,
332
0
            _ => unreachable!(),
333
        };
334
335
        // var len section
336
31.5k
        let mut content_size = None;
337
31.5k
        if flg_byte & FLG_CONTENT_SIZE != 0 {
338
645
            let mut buffer = [0u8; 8];
339
645
            input.read_exact(&mut buffer).unwrap();
340
645
            content_size = Some(u64::from_le_bytes(buffer));
341
30.8k
        }
342
343
31.5k
        let mut dict_id = None;
344
31.5k
        if flg_byte & FLG_DICTIONARY_ID != 0 {
345
12
            let mut buffer = [0u8; 4];
346
12
            input.read_exact(&mut buffer)?;
347
12
            dict_id = Some(u32::from_le_bytes(buffer));
348
31.5k
        }
349
350
        // 1 byte header checksum
351
31.5k
        let expected_checksum = {
352
31.5k
            let mut buffer = [0u8; 1];
353
31.5k
            input.read_exact(&mut buffer)?;
354
31.5k
            buffer[0]
355
        };
356
31.5k
        let mut hasher = XxHash32::with_seed(0);
357
31.5k
        hasher.write(&original_input[4..original_input.len() - input.len() - 1]);
358
31.5k
        let header_hash = (hasher.finish() >> 8) as u8;
359
31.5k
        if header_hash != expected_checksum {
360
38
            return Err(Error::HeaderChecksumError);
361
31.5k
        }
362
363
31.5k
        Ok(FrameInfo {
364
31.5k
            content_size,
365
31.5k
            dict_id,
366
31.5k
            block_size,
367
31.5k
            block_mode,
368
31.5k
            block_checksums,
369
31.5k
            content_checksum,
370
31.5k
            legacy_frame: false,
371
31.5k
        })
372
33.8k
    }
373
}
374
375
#[derive(Debug)]
376
pub(crate) enum BlockInfo {
377
    Compressed(u32),
378
    Uncompressed(u32),
379
    EndMark,
380
}
381
382
impl BlockInfo {
383
223M
    pub(crate) fn read(mut input: &[u8]) -> Result<Self, Error> {
384
223M
        let mut size_buffer = [0u8; 4];
385
223M
        input.read_exact(&mut size_buffer)?;
386
223M
        let size = u32::from_le_bytes(size_buffer);
387
223M
        if size == 0 {
388
17.2k
            Ok(BlockInfo::EndMark)
389
222M
        } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 {
390
124M
            Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT))
391
        } else {
392
98.4M
            Ok(BlockInfo::Compressed(size))
393
        }
394
223M
    }
395
396
222M
    pub(crate) fn write(&self, mut output: &mut [u8]) -> Result<usize, Error> {
397
222M
        let value = match self {
398
98.2M
            BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo),
399
124M
            BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len)
400
98.2M
                if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 =>
401
            {
402
0
                return Err(Error::InvalidBlockInfo)
403
            }
404
98.2M
            BlockInfo::Compressed(len) => *len,
405
124M
            BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT,
406
16.9k
            BlockInfo::EndMark => 0,
407
        };
408
222M
        output.write_all(&value.to_le_bytes())?;
409
222M
        Ok(4)
410
222M
    }
411
}