/rust/registry/src/index.crates.io-1949cf8c6b5b557f/ruzstd-0.8.2/src/encoding/mod.rs
Line | Count | Source |
1 | | //! Structures and utilities used for compressing/encoding data into the Zstd format. |
2 | | |
3 | | pub(crate) mod block_header; |
4 | | pub(crate) mod blocks; |
5 | | pub(crate) mod frame_header; |
6 | | pub(crate) mod match_generator; |
7 | | pub(crate) mod util; |
8 | | |
9 | | mod frame_compressor; |
10 | | mod levels; |
11 | | pub use frame_compressor::FrameCompressor; |
12 | | pub use match_generator::MatchGeneratorDriver; |
13 | | |
14 | | use crate::io::{Read, Write}; |
15 | | use alloc::vec::Vec; |
16 | | |
17 | | /// Convenience function to compress some source into a target without reusing any resources of the compressor |
18 | | /// ```rust |
19 | | /// use ruzstd::encoding::{compress, CompressionLevel}; |
20 | | /// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0]; |
21 | | /// let mut target = Vec::new(); |
22 | | /// compress(data, &mut target, CompressionLevel::Fastest); |
23 | | /// ``` |
24 | 0 | pub fn compress<R: Read, W: Write>(source: R, target: W, level: CompressionLevel) { |
25 | 0 | let mut frame_enc = FrameCompressor::new(level); |
26 | 0 | frame_enc.set_source(source); |
27 | 0 | frame_enc.set_drain(target); |
28 | 0 | frame_enc.compress(); |
29 | 0 | } |
30 | | |
31 | | /// Convenience function to compress some source into a Vec without reusing any resources of the compressor |
32 | | /// ```rust |
33 | | /// use ruzstd::encoding::{compress_to_vec, CompressionLevel}; |
34 | | /// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0]; |
35 | | /// let compressed = compress_to_vec(data, CompressionLevel::Fastest); |
36 | | /// ``` |
37 | 0 | pub fn compress_to_vec<R: Read>(source: R, level: CompressionLevel) -> Vec<u8> { |
38 | 0 | let mut vec = Vec::new(); |
39 | 0 | compress(source, &mut vec, level); |
40 | 0 | vec |
41 | 0 | } |
42 | | |
43 | | /// The compression mode used impacts the speed of compression, |
44 | | /// and resulting compression ratios. Faster compression will result |
45 | | /// in worse compression ratios, and vice versa. |
46 | | #[derive(Copy, Clone)] |
47 | | pub enum CompressionLevel { |
48 | | /// This level does not compress the data at all, and simply wraps |
49 | | /// it in a Zstandard frame. |
50 | | Uncompressed, |
51 | | /// This level is roughly equivalent to Zstd compression level 1 |
52 | | Fastest, |
53 | | /// This level is roughly equivalent to Zstd level 3, |
54 | | /// or the one used by the official compressor when no level |
55 | | /// is specified. |
56 | | /// |
57 | | /// UNIMPLEMENTED |
58 | | Default, |
59 | | /// This level is roughly equivalent to Zstd level 7. |
60 | | /// |
61 | | /// UNIMPLEMENTED |
62 | | Better, |
63 | | /// This level is roughly equivalent to Zstd level 11. |
64 | | /// |
65 | | /// UNIMPLEMENTED |
66 | | Best, |
67 | | } |
68 | | |
69 | | /// Trait used by the encoder that users can use to extend the matching facilities with their own algorithm |
70 | | /// making their own tradeoffs between runtime, memory usage and compression ratio |
71 | | /// |
72 | | /// This trait operates on buffers that represent the chunks of data the matching algorithm wants to work on. |
73 | | /// Each one of these buffers is referred to as a *space*. One or more of these buffers represent the window |
74 | | /// the decoder will need to decode the data again. |
75 | | /// |
76 | | /// This library asks the Matcher for a new buffer using `get_next_space` to allow reusing of allocated buffers when they are no longer part of the |
77 | | /// window of data that is being used for matching. |
78 | | /// |
79 | | /// The library fills the buffer with data that is to be compressed and commits them back to the matcher using `commit_space`. |
80 | | /// |
81 | | /// Then it will either call `start_matching` or, if the space is deemed not worth compressing, `skip_matching` is called. |
82 | | /// |
83 | | /// This is repeated until no more data is left to be compressed. |
84 | | pub trait Matcher { |
85 | | /// Get a space where we can put data to be matched on. Will be encoded as one block. The maximum allowed size is 128 kB. |
86 | | fn get_next_space(&mut self) -> alloc::vec::Vec<u8>; |
87 | | /// Get a reference to the last commited space |
88 | | fn get_last_space(&mut self) -> &[u8]; |
89 | | /// Commit a space to the matcher so it can be matched against |
90 | | fn commit_space(&mut self, space: alloc::vec::Vec<u8>); |
91 | | /// Just process the data in the last commited space for future matching |
92 | | fn skip_matching(&mut self); |
93 | | /// Process the data in the last commited space for future matching AND generate matches for the data |
94 | | fn start_matching(&mut self, handle_sequence: impl for<'a> FnMut(Sequence<'a>)); |
95 | | /// Reset this matcher so it can be used for the next new frame |
96 | | fn reset(&mut self, level: CompressionLevel); |
97 | | /// The size of the window the decoder will need to execute all sequences produced by this matcher |
98 | | /// |
99 | | /// May change after a call to reset with a different compression level |
100 | | fn window_size(&self) -> u64; |
101 | | } |
102 | | |
103 | | #[derive(PartialEq, Eq, Debug)] |
104 | | /// Sequences that a [`Matcher`] can produce |
105 | | pub enum Sequence<'data> { |
106 | | /// Is encoded as a sequence for the decoder sequence execution. |
107 | | /// |
108 | | /// First the literals will be copied to the decoded data, |
109 | | /// then `match_len` bytes are copied from `offset` bytes back in the decoded data |
110 | | Triple { |
111 | | literals: &'data [u8], |
112 | | offset: usize, |
113 | | match_len: usize, |
114 | | }, |
115 | | /// This is returned as the last sequence in a block |
116 | | /// |
117 | | /// These literals will just be copied at the end of the sequence execution by the decoder |
118 | | Literals { literals: &'data [u8] }, |
119 | | } |