/rust/registry/src/index.crates.io-1949cf8c6b5b557f/ruzstd-0.8.2/src/decoding/dictionary.rs
Line | Count | Source |
1 | | use alloc::vec::Vec; |
2 | | use core::convert::TryInto; |
3 | | |
4 | | use crate::decoding::errors::DictionaryDecodeError; |
5 | | use crate::decoding::scratch::FSEScratch; |
6 | | use crate::decoding::scratch::HuffmanScratch; |
7 | | |
8 | | /// Zstandard includes support for "raw content" dictionaries, that store bytes optionally used |
9 | | /// during sequence execution. |
10 | | /// |
11 | | /// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format> |
12 | | pub struct Dictionary { |
13 | | /// A 4 byte value used by decoders to check if they can use |
14 | | /// the correct dictionary. This value must not be zero. |
15 | | pub id: u32, |
16 | | /// A dictionary can contain an entropy table, either FSE or |
17 | | /// Huffman. |
18 | | pub fse: FSEScratch, |
19 | | /// A dictionary can contain an entropy table, either FSE or |
20 | | /// Huffman. |
21 | | pub huf: HuffmanScratch, |
22 | | /// The content of a dictionary acts as a "past" in front of data |
23 | | /// to compress or decompress, |
24 | | /// so it can be referenced in sequence commands. |
25 | | /// As long as the amount of data decoded from this frame is less than or |
26 | | /// equal to Window_Size, sequence commands may specify offsets longer than |
27 | | /// the total length of decoded output so far to reference back to the |
28 | | /// dictionary, even parts of the dictionary with offsets larger than Window_Size. |
29 | | /// After the total output has surpassed Window_Size however, |
30 | | /// this is no longer allowed and the dictionary is no longer accessible |
31 | | pub dict_content: Vec<u8>, |
32 | | /// The 3 most recent offsets are stored so that they can be used |
33 | | /// during sequence execution, see |
34 | | /// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#repeat-offsets> |
35 | | /// for more. |
36 | | pub offset_hist: [u32; 3], |
37 | | } |
38 | | |
39 | | /// This 4 byte (little endian) magic number refers to the start of a dictionary |
40 | | pub const MAGIC_NUM: [u8; 4] = [0x37, 0xA4, 0x30, 0xEC]; |
41 | | |
42 | | impl Dictionary { |
43 | | /// Parses the dictionary from `raw` and set the tables |
44 | | /// it returns the dict_id for checking with the frame's `dict_id`` |
45 | 0 | pub fn decode_dict(raw: &[u8]) -> Result<Dictionary, DictionaryDecodeError> { |
46 | 0 | let mut new_dict = Dictionary { |
47 | 0 | id: 0, |
48 | 0 | fse: FSEScratch::new(), |
49 | 0 | huf: HuffmanScratch::new(), |
50 | 0 | dict_content: Vec::new(), |
51 | 0 | offset_hist: [2, 4, 8], |
52 | 0 | }; |
53 | | |
54 | 0 | let magic_num: [u8; 4] = raw[..4].try_into().expect("optimized away"); |
55 | 0 | if magic_num != MAGIC_NUM { |
56 | 0 | return Err(DictionaryDecodeError::BadMagicNum { got: magic_num }); |
57 | 0 | } |
58 | | |
59 | 0 | let dict_id = raw[4..8].try_into().expect("optimized away"); |
60 | 0 | let dict_id = u32::from_le_bytes(dict_id); |
61 | 0 | new_dict.id = dict_id; |
62 | | |
63 | 0 | let raw_tables = &raw[8..]; |
64 | | |
65 | 0 | let huf_size = new_dict.huf.table.build_decoder(raw_tables)?; |
66 | 0 | let raw_tables = &raw_tables[huf_size as usize..]; |
67 | | |
68 | 0 | let of_size = new_dict.fse.offsets.build_decoder( |
69 | 0 | raw_tables, |
70 | | crate::decoding::sequence_section_decoder::OF_MAX_LOG, |
71 | 0 | )?; |
72 | 0 | let raw_tables = &raw_tables[of_size..]; |
73 | | |
74 | 0 | let ml_size = new_dict.fse.match_lengths.build_decoder( |
75 | 0 | raw_tables, |
76 | | crate::decoding::sequence_section_decoder::ML_MAX_LOG, |
77 | 0 | )?; |
78 | 0 | let raw_tables = &raw_tables[ml_size..]; |
79 | | |
80 | 0 | let ll_size = new_dict.fse.literal_lengths.build_decoder( |
81 | 0 | raw_tables, |
82 | | crate::decoding::sequence_section_decoder::LL_MAX_LOG, |
83 | 0 | )?; |
84 | 0 | let raw_tables = &raw_tables[ll_size..]; |
85 | | |
86 | 0 | let offset1 = raw_tables[0..4].try_into().expect("optimized away"); |
87 | 0 | let offset1 = u32::from_le_bytes(offset1); |
88 | | |
89 | 0 | let offset2 = raw_tables[4..8].try_into().expect("optimized away"); |
90 | 0 | let offset2 = u32::from_le_bytes(offset2); |
91 | | |
92 | 0 | let offset3 = raw_tables[8..12].try_into().expect("optimized away"); |
93 | 0 | let offset3 = u32::from_le_bytes(offset3); |
94 | | |
95 | 0 | new_dict.offset_hist[0] = offset1; |
96 | 0 | new_dict.offset_hist[1] = offset2; |
97 | 0 | new_dict.offset_hist[2] = offset3; |
98 | | |
99 | 0 | let raw_content = &raw_tables[12..]; |
100 | 0 | new_dict.dict_content.extend(raw_content); |
101 | | |
102 | 0 | Ok(new_dict) |
103 | 0 | } |
104 | | } |