/src/lz4_flex/src/frame/header.rs
Line | Count | Source |
1 | | use twox_hash::XxHash32; |
2 | | |
3 | | use super::Error; |
4 | | use std::{ |
5 | | fmt::Debug, |
6 | | hash::Hasher, |
7 | | io, |
8 | | io::{Read, Write}, |
9 | | }; |
10 | | |
11 | | const FLG_RESERVED_MASK: u8 = 0b00000010; |
12 | | const FLG_VERSION_MASK: u8 = 0b11000000; |
13 | | const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000; |
14 | | |
15 | | const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000; |
16 | | const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000; |
17 | | const FLG_CONTENT_SIZE: u8 = 0b00001000; |
18 | | const FLG_CONTENT_CHECKSUM: u8 = 0b00000100; |
19 | | const FLG_DICTIONARY_ID: u8 = 0b00000001; |
20 | | |
21 | | const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK; |
22 | | const BD_BLOCK_SIZE_MASK: u8 = 0b01110000; |
23 | | const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4; |
24 | | |
25 | | const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000; |
26 | | |
27 | | const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204; |
28 | | pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102; |
29 | | const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive<u32> = 0x184D2A50..=0x184D2A5F; |
30 | | |
31 | | pub(crate) const MAGIC_NUMBER_SIZE: usize = 4; |
32 | | pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7; |
33 | | pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19; |
34 | | pub(crate) const BLOCK_INFO_SIZE: usize = 4; |
35 | | |
36 | | #[derive(Clone, Copy, PartialEq, Debug)] |
37 | | /// Different predefines blocksizes to choose when compressing data. |
38 | | #[derive(Default)] |
39 | | pub enum BlockSize { |
40 | | /// Will detect optimal frame size based on the size of the first write call |
41 | | #[default] |
42 | | Auto = 0, |
43 | | /// The default block size. |
44 | | Max64KB = 4, |
45 | | /// 256KB block size. |
46 | | Max256KB = 5, |
47 | | /// 1MB block size. |
48 | | Max1MB = 6, |
49 | | /// 4MB block size. |
50 | | Max4MB = 7, |
51 | | /// 8MB block size. |
52 | | Max8MB = 8, |
53 | | } |
54 | | |
55 | | impl BlockSize { |
56 | | /// Try to find optimal size based on passed buffer length. |
57 | 0 | pub(crate) fn from_buf_length(buf_len: usize) -> Self { |
58 | 0 | let mut blocksize = BlockSize::Max4MB; |
59 | | |
60 | 0 | for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] { |
61 | 0 | if buf_len > candidate.get_size() { |
62 | 0 | return blocksize; |
63 | 0 | } |
64 | 0 | blocksize = candidate; |
65 | | } |
66 | 0 | BlockSize::Max64KB |
67 | 0 | } |
68 | 668M | pub(crate) fn get_size(&self) -> usize { |
69 | 668M | match self { |
70 | 0 | BlockSize::Auto => unreachable!(), |
71 | 167M | BlockSize::Max64KB => 64 * 1024, |
72 | 167M | BlockSize::Max256KB => 256 * 1024, |
73 | 167M | BlockSize::Max1MB => 1024 * 1024, |
74 | 167M | BlockSize::Max4MB => 4 * 1024 * 1024, |
75 | 3.23k | BlockSize::Max8MB => 8 * 1024 * 1024, |
76 | | } |
77 | 668M | } |
78 | | } |
79 | | |
80 | | #[derive(Clone, Copy, PartialEq, Debug)] |
81 | | /// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo] |
82 | | #[derive(Default)] |
83 | | pub enum BlockMode { |
84 | | /// Every block is compressed independently. The default. |
85 | | #[default] |
86 | | Independent, |
87 | | /// Blocks can reference data from previous blocks. |
88 | | /// |
89 | | /// Effective when the stream contains small blocks. |
90 | | Linked, |
91 | | } |
92 | | |
93 | | // From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md |
94 | | // |
95 | | // General Structure of LZ4 Frame format |
96 | | // ------------------------------------- |
97 | | // |
98 | | // | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum | |
99 | | // |:-------:|:-------------:| ----- | ----- | ------- | ----------- | |
100 | | // | 4 bytes | 3-15 bytes | | | 4 bytes | 0-4 bytes | |
101 | | // |
102 | | // Frame Descriptor |
103 | | // ---------------- |
104 | | // |
105 | | // | FLG | BD | (Content Size) | (Dictionary ID) | HC | |
106 | | // | ------- | ------- |:--------------:|:---------------:| ------- | |
107 | | // | 1 byte | 1 byte | 0 - 8 bytes | 0 - 4 bytes | 1 byte | |
108 | | // |
109 | | // __FLG byte__ |
110 | | // |
111 | | // | BitNb | 7-6 | 5 | 4 | 3 | 2 | 1 | 0 | |
112 | | // | ------- |-------|-------|----------|------|----------|----------|------| |
113 | | // |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID| |
114 | | // |
115 | | // __BD byte__ |
116 | | // |
117 | | // | BitNb | 7 | 6-5-4 | 3-2-1-0 | |
118 | | // | ------- | -------- | ------------- | -------- | |
119 | | // |FieldName|*Reserved*| Block MaxSize |*Reserved*| |
120 | | // |
121 | | // Data Blocks |
122 | | // ----------- |
123 | | // |
124 | | // | Block Size | data | (Block Checksum) | |
125 | | // |:----------:| ------ |:----------------:| |
126 | | // | 4 bytes | | 0 - 4 bytes | |
127 | | // |
128 | | #[derive(Debug, Default, Clone)] |
129 | | /// The metadata for de/compressing with lz4 frame format. |
130 | | pub struct FrameInfo { |
131 | | /// If set, includes the total uncompressed size of data in the frame. |
132 | | pub content_size: Option<u64>, |
133 | | /// The identifier for the dictionary that must be used to correctly decode data. |
134 | | /// The compressor and the decompressor must use exactly the same dictionary. |
135 | | /// |
136 | | /// Note that this is currently unsupported and for this reason it's not pub. |
137 | | pub(crate) dict_id: Option<u32>, |
138 | | /// The maximum uncompressed size of each data block. |
139 | | pub block_size: BlockSize, |
140 | | /// The block mode. |
141 | | pub block_mode: BlockMode, |
142 | | /// If set, includes a checksum for each data block in the frame. |
143 | | pub block_checksums: bool, |
144 | | /// If set, includes a content checksum to verify that the full frame contents have been |
145 | | /// decoded correctly. |
146 | | pub content_checksum: bool, |
147 | | /// If set, use the legacy frame format |
148 | | pub legacy_frame: bool, |
149 | | } |
150 | | |
151 | | impl FrameInfo { |
152 | | /// Create a new `FrameInfo`. |
153 | 0 | pub fn new() -> Self { |
154 | 0 | Self::default() |
155 | 0 | } |
156 | | |
157 | | /// Whether to include the total uncompressed size of data in the frame. |
158 | 0 | pub fn content_size(mut self, content_size: Option<u64>) -> Self { |
159 | 0 | self.content_size = content_size; |
160 | 0 | self |
161 | 0 | } |
162 | | |
163 | | /// The maximum uncompressed size of each data block. |
164 | 0 | pub fn block_size(mut self, block_size: BlockSize) -> Self { |
165 | 0 | self.block_size = block_size; |
166 | 0 | self |
167 | 0 | } |
168 | | |
169 | | /// The block mode. |
170 | 0 | pub fn block_mode(mut self, block_mode: BlockMode) -> Self { |
171 | 0 | self.block_mode = block_mode; |
172 | 0 | self |
173 | 0 | } |
174 | | |
175 | | /// If set, includes a checksum for each data block in the frame. |
176 | 0 | pub fn block_checksums(mut self, block_checksums: bool) -> Self { |
177 | 0 | self.block_checksums = block_checksums; |
178 | 0 | self |
179 | 0 | } |
180 | | |
181 | | /// If set, includes a content checksum to verify that the full frame contents have been |
182 | | /// decoded correctly. |
183 | 0 | pub fn content_checksum(mut self, content_checksum: bool) -> Self { |
184 | 0 | self.content_checksum = content_checksum; |
185 | 0 | self |
186 | 0 | } |
187 | | |
188 | | /// If set, use the legacy frame format. |
189 | 0 | pub fn legacy_frame(mut self, legacy_frame: bool) -> Self { |
190 | 0 | self.legacy_frame = legacy_frame; |
191 | 0 | self |
192 | 0 | } |
193 | | |
194 | 38.5k | pub(crate) fn read_size(input: &[u8]) -> Result<usize, Error> { |
195 | 38.5k | let mut required = MIN_FRAME_INFO_SIZE; |
196 | 38.5k | let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap()); |
197 | 38.5k | if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { |
198 | 308 | return Ok(MAGIC_NUMBER_SIZE); |
199 | 38.2k | } |
200 | | |
201 | 38.2k | if input.len() < required { |
202 | 0 | return Ok(required); |
203 | 38.2k | } |
204 | | |
205 | 38.2k | if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { |
206 | 16 | return Ok(8); |
207 | 38.2k | } |
208 | 38.2k | if magic_num != LZ4F_MAGIC_NUMBER { |
209 | 4.41k | return Err(Error::WrongMagicNumber); |
210 | 33.8k | } |
211 | | |
212 | 33.8k | if input[4] & FLG_CONTENT_SIZE != 0 { |
213 | 1.27k | required += 8; |
214 | 32.5k | } |
215 | 33.8k | if input[4] & FLG_DICTIONARY_ID != 0 { |
216 | 1.20k | required += 4 |
217 | 32.6k | } |
218 | 33.8k | Ok(required) |
219 | 38.5k | } |
220 | | |
221 | 16.9k | pub(crate) fn write_size(&self) -> usize { |
222 | 16.9k | let mut required = MIN_FRAME_INFO_SIZE; |
223 | 16.9k | if self.content_size.is_some() { |
224 | 0 | required += 8; |
225 | 16.9k | } |
226 | 16.9k | if self.dict_id.is_some() { |
227 | 0 | required += 4; |
228 | 16.9k | } |
229 | 16.9k | required |
230 | 16.9k | } |
231 | | |
232 | 16.9k | pub(crate) fn write(&self, output: &mut [u8]) -> Result<usize, Error> { |
233 | 16.9k | let write_size = self.write_size(); |
234 | 16.9k | if output.len() < write_size { |
235 | 0 | return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into())); |
236 | 16.9k | } |
237 | 16.9k | let mut buffer = [0u8; MAX_FRAME_INFO_SIZE]; |
238 | 16.9k | assert!(write_size <= buffer.len()); |
239 | 16.9k | buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes()); |
240 | 16.9k | buffer[4] = FLG_SUPPORTED_VERSION_BITS; |
241 | 16.9k | if self.block_checksums { |
242 | 8.49k | buffer[4] |= FLG_BLOCK_CHECKSUMS; |
243 | 8.49k | } |
244 | 16.9k | if self.content_checksum { |
245 | 8.49k | buffer[4] |= FLG_CONTENT_CHECKSUM; |
246 | 8.49k | } |
247 | 16.9k | if self.block_mode == BlockMode::Independent { |
248 | 8.49k | buffer[4] |= FLG_INDEPENDENT_BLOCKS; |
249 | 8.49k | } |
250 | 16.9k | buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT; |
251 | | |
252 | | // Optional section |
253 | 16.9k | let mut offset = 6; |
254 | 16.9k | if let Some(size) = self.content_size { |
255 | 0 | buffer[4] |= FLG_CONTENT_SIZE; |
256 | 0 | buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes()); |
257 | 0 | offset += 8; |
258 | 16.9k | } |
259 | 16.9k | if let Some(dict_id) = self.dict_id { |
260 | 0 | buffer[4] |= FLG_DICTIONARY_ID; |
261 | 0 | buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes()); |
262 | 0 | offset += 4; |
263 | 16.9k | } |
264 | | |
265 | | // Header checksum |
266 | 16.9k | let mut hasher = XxHash32::with_seed(0); |
267 | 16.9k | hasher.write(&buffer[4..offset]); |
268 | 16.9k | let header_checksum = (hasher.finish() >> 8) as u8; |
269 | 16.9k | buffer[offset] = header_checksum; |
270 | 16.9k | offset += 1; |
271 | | |
272 | 16.9k | debug_assert_eq!(offset, write_size); |
273 | 16.9k | output[..write_size].copy_from_slice(&buffer[..write_size]); |
274 | 16.9k | Ok(write_size) |
275 | 16.9k | } |
276 | | |
277 | 33.8k | pub(crate) fn read(mut input: &[u8]) -> Result<FrameInfo, Error> { |
278 | 33.8k | let original_input = input; |
279 | | // 4 byte Magic |
280 | 33.8k | let magic_num = { |
281 | 33.8k | let mut buffer = [0u8; 4]; |
282 | 33.8k | input.read_exact(&mut buffer)?; |
283 | 33.8k | u32::from_le_bytes(buffer) |
284 | | }; |
285 | 33.8k | if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { |
286 | 308 | return Ok(FrameInfo { |
287 | 308 | block_size: BlockSize::Max8MB, |
288 | 308 | legacy_frame: true, |
289 | 308 | ..FrameInfo::default() |
290 | 308 | }); |
291 | 33.5k | } |
292 | 33.5k | if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { |
293 | 14 | let mut buffer = [0u8; 4]; |
294 | 14 | input.read_exact(&mut buffer)?; |
295 | 14 | let user_data_len = u32::from_le_bytes(buffer); |
296 | 14 | return Err(Error::SkippableFrame(user_data_len)); |
297 | 33.5k | } |
298 | 33.5k | if magic_num != LZ4F_MAGIC_NUMBER { |
299 | 0 | return Err(Error::WrongMagicNumber); |
300 | 33.5k | } |
301 | | |
302 | | // fixed size section |
303 | 33.5k | let [flg_byte, bd_byte] = { |
304 | 33.5k | let mut buffer = [0u8, 0]; |
305 | 33.5k | input.read_exact(&mut buffer)?; |
306 | 33.5k | buffer |
307 | | }; |
308 | | |
309 | 33.5k | if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS { |
310 | | // version is always 01 |
311 | 1.90k | return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK)); |
312 | 31.6k | } |
313 | | |
314 | 31.6k | if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 { |
315 | 60 | return Err(Error::ReservedBitsSet); |
316 | 31.5k | } |
317 | | |
318 | 31.5k | let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 { |
319 | 15.4k | BlockMode::Independent |
320 | | } else { |
321 | 16.0k | BlockMode::Linked |
322 | | }; |
323 | 31.5k | let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0; |
324 | 31.5k | let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0; |
325 | | |
326 | 31.5k | let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT { |
327 | 31.5k | i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)), |
328 | 17.2k | 4 => BlockSize::Max64KB, |
329 | 5.11k | 5 => BlockSize::Max256KB, |
330 | 4.55k | 6 => BlockSize::Max1MB, |
331 | 4.57k | 7 => BlockSize::Max4MB, |
332 | 0 | _ => unreachable!(), |
333 | | }; |
334 | | |
335 | | // var len section |
336 | 31.5k | let mut content_size = None; |
337 | 31.5k | if flg_byte & FLG_CONTENT_SIZE != 0 { |
338 | 645 | let mut buffer = [0u8; 8]; |
339 | 645 | input.read_exact(&mut buffer).unwrap(); |
340 | 645 | content_size = Some(u64::from_le_bytes(buffer)); |
341 | 30.8k | } |
342 | | |
343 | 31.5k | let mut dict_id = None; |
344 | 31.5k | if flg_byte & FLG_DICTIONARY_ID != 0 { |
345 | 12 | let mut buffer = [0u8; 4]; |
346 | 12 | input.read_exact(&mut buffer)?; |
347 | 12 | dict_id = Some(u32::from_le_bytes(buffer)); |
348 | 31.5k | } |
349 | | |
350 | | // 1 byte header checksum |
351 | 31.5k | let expected_checksum = { |
352 | 31.5k | let mut buffer = [0u8; 1]; |
353 | 31.5k | input.read_exact(&mut buffer)?; |
354 | 31.5k | buffer[0] |
355 | | }; |
356 | 31.5k | let mut hasher = XxHash32::with_seed(0); |
357 | 31.5k | hasher.write(&original_input[4..original_input.len() - input.len() - 1]); |
358 | 31.5k | let header_hash = (hasher.finish() >> 8) as u8; |
359 | 31.5k | if header_hash != expected_checksum { |
360 | 38 | return Err(Error::HeaderChecksumError); |
361 | 31.5k | } |
362 | | |
363 | 31.5k | Ok(FrameInfo { |
364 | 31.5k | content_size, |
365 | 31.5k | dict_id, |
366 | 31.5k | block_size, |
367 | 31.5k | block_mode, |
368 | 31.5k | block_checksums, |
369 | 31.5k | content_checksum, |
370 | 31.5k | legacy_frame: false, |
371 | 31.5k | }) |
372 | 33.8k | } |
373 | | } |
374 | | |
375 | | #[derive(Debug)] |
376 | | pub(crate) enum BlockInfo { |
377 | | Compressed(u32), |
378 | | Uncompressed(u32), |
379 | | EndMark, |
380 | | } |
381 | | |
382 | | impl BlockInfo { |
383 | 223M | pub(crate) fn read(mut input: &[u8]) -> Result<Self, Error> { |
384 | 223M | let mut size_buffer = [0u8; 4]; |
385 | 223M | input.read_exact(&mut size_buffer)?; |
386 | 223M | let size = u32::from_le_bytes(size_buffer); |
387 | 223M | if size == 0 { |
388 | 17.2k | Ok(BlockInfo::EndMark) |
389 | 222M | } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 { |
390 | 124M | Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT)) |
391 | | } else { |
392 | 98.4M | Ok(BlockInfo::Compressed(size)) |
393 | | } |
394 | 223M | } |
395 | | |
396 | 222M | pub(crate) fn write(&self, mut output: &mut [u8]) -> Result<usize, Error> { |
397 | 222M | let value = match self { |
398 | 98.2M | BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo), |
399 | 124M | BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len) |
400 | 98.2M | if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 => |
401 | | { |
402 | 0 | return Err(Error::InvalidBlockInfo) |
403 | | } |
404 | 98.2M | BlockInfo::Compressed(len) => *len, |
405 | 124M | BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT, |
406 | 16.9k | BlockInfo::EndMark => 0, |
407 | | }; |
408 | 222M | output.write_all(&value.to_le_bytes())?; |
409 | 222M | Ok(4) |
410 | 222M | } |
411 | | } |