/rust/registry/src/index.crates.io-1949cf8c6b5b557f/weezl-0.1.12/src/encode.rs
Line | Count | Source |
1 | | //! A module for all encoding needs. |
2 | | use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult}; |
3 | | use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE}; |
4 | | |
5 | | use crate::alloc::{boxed::Box, vec::Vec}; |
6 | | #[cfg(feature = "std")] |
7 | | use crate::error::StreamResult; |
8 | | #[cfg(feature = "std")] |
9 | | use std::io::{self, BufRead, Write}; |
10 | | |
11 | | /// The state for encoding data with an LZW algorithm. |
12 | | /// |
13 | | /// The same structure can be utilized with streams as well as your own buffers and driver logic. |
14 | | /// It may even be possible to mix them if you are sufficiently careful not to lose any written |
15 | | /// data in the process. |
16 | | /// |
17 | | /// This is a sans-IO implementation, meaning that it only contains the state of the encoder and |
18 | | /// the caller will provide buffers for input and output data when calling the basic |
19 | | /// [`encode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_*` |
20 | | /// methods for enoding with a particular style of common IO. |
21 | | /// |
22 | | /// * [`encode`] for encoding once without any IO-loop. |
23 | | /// * [`into_async`] for encoding with the `futures` traits for asynchronous IO. |
24 | | /// * [`into_stream`] for encoding with the standard `io` traits. |
25 | | /// * [`into_vec`] for in-memory encoding. |
26 | | /// |
27 | | /// [`encode_bytes`]: #method.encode_bytes |
28 | | /// [`encode`]: #method.encode |
29 | | /// [`into_async`]: #method.into_async |
30 | | /// [`into_stream`]: #method.into_stream |
31 | | /// [`into_vec`]: #method.into_vec |
32 | | pub struct Encoder { |
33 | | /// Internally dispatch via a dynamic trait object. This did not have any significant |
34 | | /// performance impact as we batch data internally and this pointer does not change after |
35 | | /// creation! |
36 | | state: Box<dyn Stateful + Send + 'static>, |
37 | | } |
38 | | |
39 | | /// A encoding stream sink. |
40 | | /// |
41 | | /// See [`Encoder::into_stream`] on how to create this type. |
42 | | /// |
43 | | /// [`Encoder::into_stream`]: struct.Encoder.html#method.into_stream |
44 | | #[cfg_attr( |
45 | | not(feature = "std"), |
46 | | deprecated = "This type is only useful with the `std` feature." |
47 | | )] |
48 | | #[cfg_attr(not(feature = "std"), allow(dead_code))] |
49 | | pub struct IntoStream<'d, W> { |
50 | | encoder: &'d mut Encoder, |
51 | | writer: W, |
52 | | buffer: Option<StreamBuf<'d>>, |
53 | | default_size: usize, |
54 | | } |
55 | | |
56 | | /// An async decoding sink. |
57 | | /// |
58 | | /// See [`Encoder::into_async`] on how to create this type. |
59 | | /// |
60 | | /// [`Encoder::into_async`]: struct.Encoder.html#method.into_async |
61 | | #[cfg(feature = "async")] |
62 | | pub struct IntoAsync<'d, W> { |
63 | | encoder: &'d mut Encoder, |
64 | | writer: W, |
65 | | buffer: Option<StreamBuf<'d>>, |
66 | | default_size: usize, |
67 | | } |
68 | | |
69 | | /// A encoding sink into a vector. |
70 | | /// |
71 | | /// See [`Encoder::into_vec`] on how to create this type. |
72 | | /// |
73 | | /// [`Encoder::into_vec`]: struct.Encoder.html#method.into_vec |
74 | | pub struct IntoVec<'d> { |
75 | | encoder: &'d mut Encoder, |
76 | | vector: &'d mut Vec<u8>, |
77 | | } |
78 | | |
79 | | trait Stateful { |
80 | | fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult; |
81 | | fn mark_ended(&mut self) -> bool; |
82 | | /// Reset the state tracking if end code has been written. |
83 | | fn restart(&mut self); |
84 | | /// Reset the encoder to the beginning, dropping all buffers etc. |
85 | | fn reset(&mut self); |
86 | | } |
87 | | |
88 | | struct EncodeState<B: Buffer> { |
89 | | /// The configured minimal code size. |
90 | | min_size: u8, |
91 | | /// The current encoding symbol tree. |
92 | | tree: Tree, |
93 | | /// If we have pushed the end code. |
94 | | has_ended: bool, |
95 | | /// If tiff then bumps are a single code sooner. |
96 | | is_tiff: bool, |
97 | | /// The code corresponding to the currently read characters. |
98 | | current_code: Code, |
99 | | /// The clear code for resetting the dictionary. |
100 | | clear_code: Code, |
101 | | /// The bit buffer for encoding. |
102 | | buffer: B, |
103 | | } |
104 | | |
105 | | struct MsbBuffer { |
106 | | /// The current code length. |
107 | | code_size: u8, |
108 | | /// The buffer bits. |
109 | | buffer: u64, |
110 | | /// The number of valid buffer bits. |
111 | | bits_in_buffer: u8, |
112 | | } |
113 | | |
114 | | struct LsbBuffer { |
115 | | /// The current code length. |
116 | | code_size: u8, |
117 | | /// The buffer bits. |
118 | | buffer: u64, |
119 | | /// The number of valid buffer bits. |
120 | | bits_in_buffer: u8, |
121 | | } |
122 | | |
123 | | trait Buffer { |
124 | | fn new(size: u8) -> Self; |
125 | | /// Reset the code size in the buffer. |
126 | | fn reset(&mut self, min_size: u8); |
127 | | /// Apply effects of a Clear Code. |
128 | | fn clear(&mut self, min_size: u8); |
129 | | /// Insert a code into the buffer. |
130 | | fn buffer_code(&mut self, code: Code); |
131 | | /// Push bytes if the buffer space is getting small. |
132 | | fn push_out(&mut self, out: &mut &mut [u8]) -> bool; |
133 | | /// Flush all full bytes, returning if at least one more byte remains. |
134 | | fn flush_out(&mut self, out: &mut &mut [u8]) -> bool; |
135 | | /// Pad the buffer to a full byte. |
136 | | fn buffer_pad(&mut self); |
137 | | /// Increase the maximum code size. |
138 | | fn bump_code_size(&mut self); |
139 | | /// Return the maximum code with the current code size. |
140 | | fn max_code(&self) -> Code; |
141 | | /// Return the current code size in bits. |
142 | | fn code_size(&self) -> u8; |
143 | | } |
144 | | |
145 | | /// One tree node for at most each code. |
146 | | /// To avoid using too much memory we keep nodes with few successors in optimized form. This form |
147 | | /// doesn't offer lookup by indexing but instead does a linear search. |
148 | | #[derive(Default)] |
149 | | struct Tree { |
150 | | simples: Vec<Simple>, |
151 | | complex: Vec<Full>, |
152 | | keys: Vec<CompressedKey>, |
153 | | } |
154 | | |
155 | | #[derive(Clone, Copy)] |
156 | | enum FullKey { |
157 | | NoSuccessor, |
158 | | Simple(u16), |
159 | | Full(u16), |
160 | | } |
161 | | |
162 | | #[derive(Clone, Copy)] |
163 | | struct CompressedKey(u16); |
164 | | |
165 | | const SHORT: usize = 16; |
166 | | |
167 | | #[derive(Clone, Copy)] |
168 | | struct Simple { |
169 | | codes: [Code; SHORT], |
170 | | chars: [u8; SHORT], |
171 | | count: u8, |
172 | | } |
173 | | |
174 | | #[derive(Clone, Copy)] |
175 | | struct Full { |
176 | | char_continuation: [Code; 256], |
177 | | } |
178 | | |
179 | | /// Describes the static parameters for creating a decoder. |
180 | | #[derive(Clone, Debug)] |
181 | | pub struct Configuration { |
182 | | order: BitOrder, |
183 | | size: u8, |
184 | | tiff: bool, |
185 | | } |
186 | | |
187 | | impl Configuration { |
188 | | /// Create a configuration to decode with the specified bit order and symbol size. |
189 | | /// |
190 | | /// # Panics |
191 | | /// |
192 | | /// The `size` needs to be in the interval `2..=12`. |
193 | 0 | pub fn new(order: BitOrder, size: u8) -> Self { |
194 | 0 | super::assert_encode_size(size); |
195 | 0 | Configuration { |
196 | 0 | order, |
197 | 0 | size, |
198 | 0 | tiff: false, |
199 | 0 | } |
200 | 0 | } |
201 | | |
202 | | /// Create a configuration for a TIFF compatible decoder. |
203 | | /// |
204 | | /// # Panics |
205 | | /// |
206 | | /// The `size` needs to be in the interval `2..=12`. |
207 | 0 | pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self { |
208 | 0 | super::assert_encode_size(size); |
209 | 0 | Configuration { |
210 | 0 | order, |
211 | 0 | size, |
212 | 0 | tiff: true, |
213 | 0 | } |
214 | 0 | } |
215 | | |
216 | | /// Create a new decoder with the define configuration. |
217 | 0 | pub fn build(self) -> Encoder { |
218 | 0 | Encoder { |
219 | 0 | state: Encoder::from_configuration(&self), |
220 | 0 | } |
221 | 0 | } |
222 | | } |
223 | | |
224 | | impl Encoder { |
225 | | /// Create a new encoder with the specified bit order and symbol size. |
226 | | /// |
227 | | /// The algorithm for dynamically increasing the code symbol bit width is compatible with the |
228 | | /// original specification. In particular you will need to specify an `Lsb` bit oder to encode |
229 | | /// the data portion of a compressed `gif` image. |
230 | | /// |
231 | | /// # Panics |
232 | | /// |
233 | | /// The `size` needs to be in the interval `2..=12`. |
234 | 0 | pub fn new(order: BitOrder, size: u8) -> Self { |
235 | 0 | Configuration::new(order, size).build() |
236 | 0 | } |
237 | | |
238 | | /// Create a TIFF compatible encoder with the specified bit order and symbol size. |
239 | | /// |
240 | | /// The algorithm for dynamically increasing the code symbol bit width is compatible with the |
241 | | /// TIFF specification, which is a misinterpretation of the original algorithm for increasing |
242 | | /// the code size. It switches one symbol sooner. |
243 | | /// |
244 | | /// # Panics |
245 | | /// |
246 | | /// The `size` needs to be in the interval `2..=12`. |
247 | 0 | pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self { |
248 | 0 | Configuration::with_tiff_size_switch(order, size).build() |
249 | 0 | } |
250 | | |
251 | 0 | fn from_configuration(cfg: &Configuration) -> Box<dyn Stateful + Send + 'static> { |
252 | 0 | match cfg.order { |
253 | | BitOrder::Lsb => { |
254 | 0 | let mut state = EncodeState::<LsbBuffer>::new(cfg.size); |
255 | 0 | state.is_tiff = cfg.tiff; |
256 | 0 | Box::new(state) |
257 | | } |
258 | | BitOrder::Msb => { |
259 | 0 | let mut state = EncodeState::<MsbBuffer>::new(cfg.size); |
260 | 0 | state.is_tiff = cfg.tiff; |
261 | 0 | Box::new(state) |
262 | | } |
263 | | } |
264 | 0 | } |
265 | | |
266 | | /// Encode some bytes from `inp` into `out`. |
267 | | /// |
268 | | /// See [`into_stream`] for high-level functions (this interface is only available with the |
269 | | /// `std` feature) and [`finish`] for marking the input data as complete. |
270 | | /// |
271 | | /// When some input byte is invalid, i.e. is not smaller than `1 << size`, then that byte and |
272 | | /// all following ones will _not_ be consumed and the `status` of the result will signal an |
273 | | /// error. The result will also indicate that all bytes up to but not including the offending |
274 | | /// byte have been consumed. You may try again with a fixed byte. |
275 | | /// |
276 | | /// [`into_stream`]: #method.into_stream |
277 | | /// [`finish`]: #method.finish |
278 | 0 | pub fn encode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult { |
279 | 0 | self.state.advance(inp, out) |
280 | 0 | } |
281 | | |
282 | | /// Encode a single chunk of data. |
283 | | /// |
284 | | /// This method will add an end marker to the encoded chunk. |
285 | | /// |
286 | | /// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize |
287 | | /// buffer size, to supply an existing vector, to control whether an end marker is required, or |
288 | | /// to preserve partial data in the case of a decoding error. |
289 | | /// |
290 | | /// [`into_vec`]: #into_vec |
291 | | /// |
292 | | /// # Example |
293 | | /// |
294 | | /// ``` |
295 | | /// use weezl::{BitOrder, encode::Encoder}; |
296 | | /// |
297 | | /// let data = b"Hello, world"; |
298 | | /// let encoded = Encoder::new(BitOrder::Msb, 9) |
299 | | /// .encode(data) |
300 | | /// .expect("All bytes valid for code size"); |
301 | | /// ``` |
302 | 0 | pub fn encode(&mut self, data: &[u8]) -> Result<Vec<u8>, LzwError> { |
303 | 0 | let mut output = Vec::new(); |
304 | 0 | self.into_vec(&mut output).encode_all(data).status?; |
305 | 0 | Ok(output) |
306 | 0 | } |
307 | | |
308 | | /// Construct a encoder into a writer. |
309 | | #[cfg(feature = "std")] |
310 | 0 | pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> { |
311 | 0 | IntoStream { |
312 | 0 | encoder: self, |
313 | 0 | writer, |
314 | 0 | buffer: None, |
315 | 0 | default_size: STREAM_BUF_SIZE, |
316 | 0 | } |
317 | 0 | } Unexecuted instantiation: <weezl::encode::Encoder>::into_stream::<_> Unexecuted instantiation: <weezl::encode::Encoder>::into_stream::<&mut &mut alloc::vec::Vec<u8>> Unexecuted instantiation: <weezl::encode::Encoder>::into_stream::<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>> |
318 | | |
319 | | /// Construct a encoder into an async writer. |
320 | | #[cfg(feature = "async")] |
321 | | pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> { |
322 | | IntoAsync { |
323 | | encoder: self, |
324 | | writer, |
325 | | buffer: None, |
326 | | default_size: STREAM_BUF_SIZE, |
327 | | } |
328 | | } |
329 | | |
330 | | /// Construct an encoder into a vector. |
331 | | /// |
332 | | /// All encoded data is appended and the vector is __not__ cleared. |
333 | | /// |
334 | | /// Compared to `into_stream` this interface allows a high-level access to encoding without |
335 | | /// requires the `std`-feature. Also, it can make full use of the extra buffer control that the |
336 | | /// special target exposes. |
337 | 0 | pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec<u8>) -> IntoVec<'lt> { |
338 | 0 | IntoVec { |
339 | 0 | encoder: self, |
340 | 0 | vector: vec, |
341 | 0 | } |
342 | 0 | } |
343 | | |
344 | | /// Mark the encoding as in the process of finishing. |
345 | | /// |
346 | | /// The next following call to `encode_bytes` which is able to consume the complete input will |
347 | | /// also try to emit an end code. It's not recommended, but also not unsound, to use different |
348 | | /// byte slices in different calls from this point forward and thus to 'delay' the actual end |
349 | | /// of the data stream. The behaviour after the end marker has been written is unspecified but |
350 | | /// sound. |
351 | 0 | pub fn finish(&mut self) { |
352 | 0 | self.state.mark_ended(); |
353 | 0 | } |
354 | | |
355 | | /// Undo marking this data stream as ending. |
356 | | /// FIXME: clarify how this interacts with padding introduced after end code. |
357 | | #[allow(dead_code)] |
358 | 0 | pub(crate) fn restart(&mut self) { |
359 | 0 | self.state.restart() |
360 | 0 | } |
361 | | |
362 | | /// Reset all internal state. |
363 | | /// |
364 | | /// This produce an encoder as if just constructed with `new` but taking slightly less work. In |
365 | | /// particular it will not deallocate any internal allocations. It will also avoid some |
366 | | /// duplicate setup work. |
367 | 0 | pub fn reset(&mut self) { |
368 | 0 | self.state.reset() |
369 | 0 | } |
370 | | } |
371 | | |
372 | | #[cfg(feature = "std")] |
373 | | impl<'d, W: Write> IntoStream<'d, W> { |
374 | | /// Encode data from a reader. |
375 | | /// |
376 | | /// This will drain the supplied reader. It will not encode an end marker after all data has |
377 | | /// been processed. |
378 | 0 | pub fn encode(&mut self, read: impl BufRead) -> StreamResult { |
379 | 0 | self.encode_part(read, false) |
380 | 0 | } |
381 | | |
382 | | /// Encode data from a reader and an end marker. |
383 | 0 | pub fn encode_all(mut self, read: impl BufRead) -> StreamResult { |
384 | 0 | self.encode_part(read, true) |
385 | 0 | } Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_all::<_> Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_all::<&[u8]> Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_all::<&[u8]> |
386 | | |
387 | | /// Set the size of the intermediate encode buffer. |
388 | | /// |
389 | | /// A buffer of this size is allocated to hold one part of the encoded stream when no buffer is |
390 | | /// available and any encoding method is called. No buffer is allocated if `set_buffer` has |
391 | | /// been called. The buffer is reused. |
392 | | /// |
393 | | /// # Panics |
394 | | /// This method panics if `size` is `0`. |
395 | 0 | pub fn set_buffer_size(&mut self, size: usize) { |
396 | 0 | assert_ne!(size, 0, "Attempted to set empty buffer"); |
397 | 0 | self.default_size = size; |
398 | 0 | } |
399 | | |
400 | | /// Use a particular buffer as an intermediate encode buffer. |
401 | | /// |
402 | | /// Calling this sets or replaces the buffer. When a buffer has been set then it is used |
403 | | /// instead of a dynamically allocating a buffer. Note that the size of the buffer is relevant |
404 | | /// for efficient encoding as there is additional overhead from `write` calls each time the |
405 | | /// buffer has been filled. |
406 | | /// |
407 | | /// # Panics |
408 | | /// This method panics if the `buffer` is empty. |
409 | 0 | pub fn set_buffer(&mut self, buffer: &'d mut [u8]) { |
410 | 0 | assert_ne!(buffer.len(), 0, "Attempted to set empty buffer"); |
411 | 0 | self.buffer = Some(StreamBuf::Borrowed(buffer)); |
412 | 0 | } |
413 | | |
414 | 0 | fn encode_part(&mut self, mut read: impl BufRead, finish: bool) -> StreamResult { |
415 | | let IntoStream { |
416 | 0 | encoder, |
417 | 0 | writer, |
418 | 0 | buffer, |
419 | 0 | default_size, |
420 | 0 | } = self; |
421 | | enum Progress { |
422 | | Ok, |
423 | | Done, |
424 | | } |
425 | | |
426 | 0 | let mut bytes_read = 0; |
427 | 0 | let mut bytes_written = 0; |
428 | | |
429 | 0 | let read_bytes = &mut bytes_read; |
430 | 0 | let write_bytes = &mut bytes_written; |
431 | | |
432 | 0 | let outbuf: &mut [u8] = |
433 | 0 | match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } {Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_part::<_>::{closure#0}Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_part::<&[u8]>::{closure#0}Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_part::<&[u8]>::{closure#0} |
434 | 0 | StreamBuf::Borrowed(slice) => &mut *slice, |
435 | 0 | StreamBuf::Owned(vec) => &mut *vec, |
436 | | }; |
437 | 0 | assert!(!outbuf.is_empty()); |
438 | | |
439 | 0 | let once = move || { |
440 | 0 | let data = read.fill_buf()?; |
441 | | |
442 | 0 | if data.is_empty() { |
443 | 0 | if finish { |
444 | 0 | encoder.finish(); |
445 | 0 | } else { |
446 | 0 | return Ok(Progress::Done); |
447 | | } |
448 | 0 | } |
449 | | |
450 | 0 | let result = encoder.encode_bytes(data, &mut outbuf[..]); |
451 | 0 | *read_bytes += result.consumed_in; |
452 | 0 | *write_bytes += result.consumed_out; |
453 | 0 | read.consume(result.consumed_in); |
454 | | |
455 | 0 | let done = result.status.map_err(|err| { |
456 | 0 | io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err)) |
457 | 0 | })?; Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_part::<_>::{closure#1}::{closure#0}Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_part::<&[u8]>::{closure#1}::{closure#0}Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_part::<&[u8]>::{closure#1}::{closure#0} |
458 | | |
459 | 0 | if let LzwStatus::Done = done { |
460 | 0 | writer.write_all(&outbuf[..result.consumed_out])?; |
461 | 0 | return Ok(Progress::Done); |
462 | 0 | } |
463 | | |
464 | 0 | if let LzwStatus::NoProgress = done { |
465 | 0 | return Err(io::Error::new( |
466 | 0 | io::ErrorKind::UnexpectedEof, |
467 | 0 | "No more data but no end marker detected", |
468 | 0 | )); |
469 | 0 | } |
470 | | |
471 | 0 | writer.write_all(&outbuf[..result.consumed_out])?; |
472 | 0 | Ok(Progress::Ok) |
473 | 0 | }; Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_part::<_>::{closure#1}Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_part::<&[u8]>::{closure#1}Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_part::<&[u8]>::{closure#1} |
474 | | |
475 | 0 | let status = core::iter::repeat_with(once) |
476 | | // scan+fuse can be replaced with map_while |
477 | 0 | .scan((), |(), result| match result { |
478 | 0 | Ok(Progress::Ok) => Some(Ok(())), |
479 | 0 | Err(err) => Some(Err(err)), |
480 | 0 | Ok(Progress::Done) => None, |
481 | 0 | }) Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_part::<_>::{closure#2}Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_part::<&[u8]>::{closure#2}Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_part::<&[u8]>::{closure#2} |
482 | 0 | .fuse() |
483 | 0 | .collect(); |
484 | | |
485 | 0 | StreamResult { |
486 | 0 | bytes_read, |
487 | 0 | bytes_written, |
488 | 0 | status, |
489 | 0 | } |
490 | 0 | } Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_part::<_> Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_part::<&[u8]> Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_part::<&[u8]> |
491 | | } |
492 | | |
493 | | impl IntoVec<'_> { |
494 | | /// Encode data from a slice. |
495 | 0 | pub fn encode(&mut self, read: &[u8]) -> VectorResult { |
496 | 0 | self.encode_part(read, false) |
497 | 0 | } |
498 | | |
499 | | /// Decode data from a reader, adding an end marker. |
500 | 0 | pub fn encode_all(mut self, read: &[u8]) -> VectorResult { |
501 | 0 | self.encode_part(read, true) |
502 | 0 | } |
503 | | |
504 | 0 | fn grab_buffer(&mut self) -> (&mut [u8], &mut Encoder) { |
505 | | const CHUNK_SIZE: usize = 1 << 12; |
506 | 0 | let decoder = &mut self.encoder; |
507 | 0 | let length = self.vector.len(); |
508 | | |
509 | | // Use the vector to do overflow checks and w/e. |
510 | 0 | self.vector.reserve(CHUNK_SIZE); |
511 | | // FIXME: encoding into uninit buffer? |
512 | 0 | self.vector.resize(length + CHUNK_SIZE, 0u8); |
513 | | |
514 | 0 | (&mut self.vector[length..], decoder) |
515 | 0 | } |
516 | | |
517 | 0 | fn encode_part(&mut self, part: &[u8], finish: bool) -> VectorResult { |
518 | 0 | let mut result = VectorResult { |
519 | 0 | consumed_in: 0, |
520 | 0 | consumed_out: 0, |
521 | 0 | status: Ok(LzwStatus::Ok), |
522 | 0 | }; |
523 | | |
524 | | enum Progress { |
525 | | Ok, |
526 | | Done, |
527 | | } |
528 | | |
529 | | // Converting to mutable refs to move into the `once` closure. |
530 | 0 | let read_bytes = &mut result.consumed_in; |
531 | 0 | let write_bytes = &mut result.consumed_out; |
532 | 0 | let mut data = part; |
533 | | |
534 | | // A 64 MB buffer is quite large but should get alloc_zeroed. |
535 | | // Note that the decoded size can be up to quadratic in code block. |
536 | 0 | let once = move || { |
537 | | // Grab a new output buffer. |
538 | 0 | let (outbuf, encoder) = self.grab_buffer(); |
539 | | |
540 | 0 | if finish { |
541 | 0 | encoder.finish(); |
542 | 0 | } |
543 | | |
544 | | // Decode as much of the buffer as fits. |
545 | 0 | let result = encoder.encode_bytes(data, &mut outbuf[..]); |
546 | | // Do the bookkeeping and consume the buffer. |
547 | 0 | *read_bytes += result.consumed_in; |
548 | 0 | *write_bytes += result.consumed_out; |
549 | 0 | data = &data[result.consumed_in..]; |
550 | | |
551 | 0 | let unfilled = outbuf.len() - result.consumed_out; |
552 | 0 | let filled = self.vector.len() - unfilled; |
553 | 0 | self.vector.truncate(filled); |
554 | | |
555 | | // Handle the status in the result. |
556 | 0 | let done = result.status?; |
557 | 0 | if let LzwStatus::Done = done { |
558 | 0 | Ok(Progress::Done) |
559 | | } else { |
560 | 0 | Ok(Progress::Ok) |
561 | | } |
562 | 0 | }; |
563 | | |
564 | | // Decode chunks of input data until we're done. |
565 | 0 | let status: Result<(), _> = core::iter::repeat_with(once) |
566 | | // scan+fuse can be replaced with map_while |
567 | 0 | .scan((), |(), result| match result { |
568 | 0 | Ok(Progress::Ok) => Some(Ok(())), |
569 | 0 | Err(err) => Some(Err(err)), |
570 | 0 | Ok(Progress::Done) => None, |
571 | 0 | }) |
572 | 0 | .fuse() |
573 | 0 | .collect(); |
574 | | |
575 | 0 | if let Err(err) = status { |
576 | 0 | result.status = Err(err); |
577 | 0 | } |
578 | | |
579 | 0 | result |
580 | 0 | } |
581 | | } |
582 | | |
583 | | // This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would |
584 | | // trip over the usage of await, which is a reserved keyword in that edition/version. It only |
585 | | // contains an impl block. |
586 | | #[cfg(feature = "async")] |
587 | | #[path = "encode_into_async.rs"] |
588 | | mod impl_encode_into_async; |
589 | | |
590 | | impl<B: Buffer> EncodeState<B> { |
591 | 0 | fn new(min_size: u8) -> Self { |
592 | 0 | let clear_code = 1 << min_size; |
593 | 0 | let mut tree = Tree::default(); |
594 | 0 | tree.init(min_size); |
595 | 0 | let mut state = EncodeState { |
596 | 0 | min_size, |
597 | 0 | tree, |
598 | 0 | has_ended: false, |
599 | 0 | is_tiff: false, |
600 | 0 | current_code: clear_code, |
601 | 0 | clear_code, |
602 | 0 | buffer: B::new(min_size), |
603 | 0 | }; |
604 | 0 | state.buffer_code(clear_code); |
605 | 0 | state |
606 | 0 | } Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::new Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::new |
607 | | } |
608 | | |
609 | | impl<B: Buffer> Stateful for EncodeState<B> { |
610 | 0 | fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult { |
611 | 0 | let c_in = inp.len(); |
612 | 0 | let c_out = out.len(); |
613 | 0 | let mut status = Ok(LzwStatus::Ok); |
614 | | |
615 | | 'encoding: loop { |
616 | 0 | if self.push_out(&mut out) { |
617 | 0 | break; |
618 | 0 | } |
619 | | |
620 | 0 | if inp.is_empty() && self.has_ended { |
621 | 0 | let end = self.end_code(); |
622 | 0 | if self.current_code != end { |
623 | 0 | if self.current_code != self.clear_code { |
624 | 0 | self.buffer_code(self.current_code); |
625 | | |
626 | | // When reading this code, the decoder will add an extra entry to its table |
627 | | // before reading th end code. Thusly, it may increase its code size based |
628 | | // on this additional entry. |
629 | 0 | if self.tree.keys.len() + usize::from(self.is_tiff) |
630 | 0 | > usize::from(self.buffer.max_code()) |
631 | 0 | && self.buffer.code_size() < MAX_CODESIZE |
632 | 0 | { |
633 | 0 | self.buffer.bump_code_size(); |
634 | 0 | } |
635 | 0 | } |
636 | 0 | self.buffer_code(end); |
637 | 0 | self.current_code = end; |
638 | 0 | self.buffer_pad(); |
639 | 0 | } |
640 | | |
641 | 0 | break; |
642 | 0 | } |
643 | | |
644 | 0 | let mut next_code = None; |
645 | 0 | let mut bytes = inp.iter(); |
646 | 0 | while let Some(&byte) = bytes.next() { |
647 | 0 | if self.min_size < 8 && byte >= 1 << self.min_size { |
648 | 0 | status = Err(LzwError::InvalidCode); |
649 | 0 | break 'encoding; |
650 | 0 | } |
651 | | |
652 | 0 | inp = bytes.as_slice(); |
653 | 0 | match self.tree.iterate(self.current_code, byte) { |
654 | 0 | Ok(code) => self.current_code = code, |
655 | | Err(_) => { |
656 | 0 | next_code = Some(self.current_code); |
657 | | |
658 | 0 | self.current_code = u16::from(byte); |
659 | 0 | break; |
660 | | } |
661 | | } |
662 | | } |
663 | | |
664 | 0 | match next_code { |
665 | | // No more bytes, no code produced. |
666 | 0 | None => break, |
667 | 0 | Some(code) => { |
668 | 0 | self.buffer_code(code); |
669 | | |
670 | 0 | if self.tree.keys.len() + usize::from(self.is_tiff) |
671 | 0 | > usize::from(self.buffer.max_code()) + 1 |
672 | 0 | && self.buffer.code_size() < MAX_CODESIZE |
673 | 0 | { |
674 | 0 | self.buffer.bump_code_size(); |
675 | 0 | } |
676 | | |
677 | 0 | if self.tree.keys.len() > MAX_ENTRIES { |
678 | 0 | self.buffer_code(self.clear_code); |
679 | 0 | self.tree.reset(self.min_size); |
680 | 0 | self.buffer.clear(self.min_size); |
681 | 0 | } |
682 | | } |
683 | | } |
684 | | } |
685 | | |
686 | 0 | if inp.is_empty() && self.current_code == self.end_code() { |
687 | 0 | if !self.flush_out(&mut out) { |
688 | 0 | status = Ok(LzwStatus::Done); |
689 | 0 | } |
690 | 0 | } |
691 | | |
692 | 0 | BufferResult { |
693 | 0 | consumed_in: c_in - inp.len(), |
694 | 0 | consumed_out: c_out - out.len(), |
695 | 0 | status, |
696 | 0 | } |
697 | 0 | } Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer> as weezl::encode::Stateful>::advance Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer> as weezl::encode::Stateful>::advance |
698 | | |
699 | 0 | fn mark_ended(&mut self) -> bool { |
700 | 0 | core::mem::replace(&mut self.has_ended, true) |
701 | 0 | } Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer> as weezl::encode::Stateful>::mark_ended Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer> as weezl::encode::Stateful>::mark_ended |
702 | | |
703 | 0 | fn restart(&mut self) { |
704 | 0 | self.has_ended = false; |
705 | 0 | } Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer> as weezl::encode::Stateful>::restart Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer> as weezl::encode::Stateful>::restart |
706 | | |
707 | 0 | fn reset(&mut self) { |
708 | 0 | self.restart(); |
709 | 0 | self.current_code = self.clear_code; |
710 | 0 | self.tree.reset(self.min_size); |
711 | 0 | self.buffer.reset(self.min_size); |
712 | 0 | self.buffer_code(self.clear_code); |
713 | 0 | } Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer> as weezl::encode::Stateful>::reset Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer> as weezl::encode::Stateful>::reset |
714 | | } |
715 | | |
716 | | impl<B: Buffer> EncodeState<B> { |
717 | 0 | fn push_out(&mut self, out: &mut &mut [u8]) -> bool { |
718 | 0 | self.buffer.push_out(out) |
719 | 0 | } Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::push_out Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::push_out |
720 | | |
721 | 0 | fn flush_out(&mut self, out: &mut &mut [u8]) -> bool { |
722 | 0 | self.buffer.flush_out(out) |
723 | 0 | } Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::flush_out Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::flush_out |
724 | | |
725 | 0 | fn end_code(&self) -> Code { |
726 | 0 | self.clear_code + 1 |
727 | 0 | } Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::end_code Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::end_code |
728 | | |
729 | 0 | fn buffer_pad(&mut self) { |
730 | 0 | self.buffer.buffer_pad(); |
731 | 0 | } Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::buffer_pad Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::buffer_pad |
732 | | |
733 | 0 | fn buffer_code(&mut self, code: Code) { |
734 | 0 | self.buffer.buffer_code(code); |
735 | 0 | } Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::buffer_code Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::buffer_code |
736 | | } |
737 | | |
738 | | impl Buffer for MsbBuffer { |
739 | 0 | fn new(min_size: u8) -> Self { |
740 | 0 | MsbBuffer { |
741 | 0 | code_size: min_size + 1, |
742 | 0 | buffer: 0, |
743 | 0 | bits_in_buffer: 0, |
744 | 0 | } |
745 | 0 | } |
746 | | |
747 | 0 | fn reset(&mut self, min_size: u8) { |
748 | 0 | self.code_size = min_size + 1; |
749 | 0 | self.buffer = 0; |
750 | 0 | self.bits_in_buffer = 0; |
751 | 0 | } |
752 | | |
753 | 0 | fn clear(&mut self, min_size: u8) { |
754 | 0 | self.code_size = min_size + 1; |
755 | 0 | } |
756 | | |
757 | 0 | fn buffer_code(&mut self, code: Code) { |
758 | 0 | let shift = 64 - self.bits_in_buffer - self.code_size; |
759 | 0 | self.buffer |= u64::from(code) << shift; |
760 | 0 | self.bits_in_buffer += self.code_size; |
761 | 0 | } |
762 | | |
763 | 0 | fn push_out(&mut self, out: &mut &mut [u8]) -> bool { |
764 | 0 | if self.bits_in_buffer + 2 * self.code_size < 64 { |
765 | 0 | return false; |
766 | 0 | } |
767 | | |
768 | 0 | self.flush_out(out) |
769 | 0 | } |
770 | | |
771 | 0 | fn flush_out(&mut self, out: &mut &mut [u8]) -> bool { |
772 | 0 | let want = usize::from(self.bits_in_buffer / 8); |
773 | 0 | let count = want.min((*out).len()); |
774 | 0 | let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count); |
775 | 0 | *out = tail; |
776 | | |
777 | 0 | for b in bytes { |
778 | 0 | *b = ((self.buffer & 0xff00_0000_0000_0000) >> 56) as u8; |
779 | 0 | self.buffer <<= 8; |
780 | 0 | self.bits_in_buffer -= 8; |
781 | 0 | } |
782 | | |
783 | 0 | count < want |
784 | 0 | } |
785 | | |
786 | 0 | fn buffer_pad(&mut self) { |
787 | 0 | let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7; |
788 | 0 | self.bits_in_buffer += to_byte; |
789 | 0 | } |
790 | | |
791 | 0 | fn bump_code_size(&mut self) { |
792 | 0 | self.code_size += 1; |
793 | 0 | } |
794 | | |
795 | 0 | fn max_code(&self) -> Code { |
796 | 0 | (1 << self.code_size) - 1 |
797 | 0 | } |
798 | | |
799 | 0 | fn code_size(&self) -> u8 { |
800 | 0 | self.code_size |
801 | 0 | } |
802 | | } |
803 | | |
804 | | impl Buffer for LsbBuffer { |
805 | 0 | fn new(min_size: u8) -> Self { |
806 | 0 | LsbBuffer { |
807 | 0 | code_size: min_size + 1, |
808 | 0 | buffer: 0, |
809 | 0 | bits_in_buffer: 0, |
810 | 0 | } |
811 | 0 | } |
812 | | |
813 | 0 | fn reset(&mut self, min_size: u8) { |
814 | 0 | self.code_size = min_size + 1; |
815 | 0 | self.buffer = 0; |
816 | 0 | self.bits_in_buffer = 0; |
817 | 0 | } |
818 | | |
819 | 0 | fn clear(&mut self, min_size: u8) { |
820 | 0 | self.code_size = min_size + 1; |
821 | 0 | } |
822 | | |
823 | 0 | fn buffer_code(&mut self, code: Code) { |
824 | 0 | self.buffer |= u64::from(code) << self.bits_in_buffer; |
825 | 0 | self.bits_in_buffer += self.code_size; |
826 | 0 | } |
827 | | |
828 | 0 | fn push_out(&mut self, out: &mut &mut [u8]) -> bool { |
829 | 0 | if self.bits_in_buffer + 2 * self.code_size < 64 { |
830 | 0 | return false; |
831 | 0 | } |
832 | | |
833 | 0 | self.flush_out(out) |
834 | 0 | } |
835 | | |
836 | 0 | fn flush_out(&mut self, out: &mut &mut [u8]) -> bool { |
837 | 0 | let want = usize::from(self.bits_in_buffer / 8); |
838 | 0 | let count = want.min((*out).len()); |
839 | 0 | let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count); |
840 | 0 | *out = tail; |
841 | | |
842 | 0 | for b in bytes { |
843 | 0 | *b = (self.buffer & 0x0000_0000_0000_00ff) as u8; |
844 | 0 | self.buffer >>= 8; |
845 | 0 | self.bits_in_buffer -= 8; |
846 | 0 | } |
847 | | |
848 | 0 | count < want |
849 | 0 | } |
850 | | |
851 | 0 | fn buffer_pad(&mut self) { |
852 | 0 | let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7; |
853 | 0 | self.bits_in_buffer += to_byte; |
854 | 0 | } |
855 | | |
856 | 0 | fn bump_code_size(&mut self) { |
857 | 0 | self.code_size += 1; |
858 | 0 | } |
859 | | |
860 | 0 | fn max_code(&self) -> Code { |
861 | 0 | (1 << self.code_size) - 1 |
862 | 0 | } |
863 | | |
864 | 0 | fn code_size(&self) -> u8 { |
865 | 0 | self.code_size |
866 | 0 | } |
867 | | } |
868 | | |
869 | | impl Tree { |
870 | 0 | fn init(&mut self, min_size: u8) { |
871 | | // We need a way to represent the state of a currently empty buffer. We use the clear code |
872 | | // for this, thus create one complex mapping that leads to the one-char base codes. |
873 | 0 | self.keys |
874 | 0 | .resize((1 << min_size) + 2, FullKey::NoSuccessor.into()); |
875 | 0 | self.complex.push(Full { |
876 | 0 | char_continuation: [0; 256], |
877 | 0 | }); |
878 | 0 | let map_of_begin = self.complex.last_mut().unwrap(); |
879 | 0 | for ch in 0u16..256 { |
880 | 0 | map_of_begin.char_continuation[usize::from(ch)] = ch; |
881 | 0 | } |
882 | 0 | self.keys[1 << min_size] = FullKey::Full(0).into(); |
883 | 0 | } |
884 | | |
885 | 0 | fn reset(&mut self, min_size: u8) { |
886 | 0 | self.simples.clear(); |
887 | 0 | self.keys.truncate((1 << min_size) + 2); |
888 | | // Keep entry for clear code. |
889 | 0 | self.complex.truncate(1); |
890 | | // The first complex is not changed.. |
891 | 0 | for k in self.keys[..(1 << min_size) + 2].iter_mut() { |
892 | 0 | *k = FullKey::NoSuccessor.into(); |
893 | 0 | } |
894 | 0 | self.keys[1 << min_size] = FullKey::Full(0).into(); |
895 | 0 | } |
896 | | |
897 | 0 | fn at_key(&self, code: Code, ch: u8) -> Option<Code> { |
898 | 0 | let key = self.keys[usize::from(code)]; |
899 | 0 | match FullKey::from(key) { |
900 | 0 | FullKey::NoSuccessor => None, |
901 | 0 | FullKey::Simple(idx) => { |
902 | 0 | let nexts = &self.simples[usize::from(idx)]; |
903 | 0 | let successors = nexts |
904 | 0 | .codes |
905 | 0 | .iter() |
906 | 0 | .zip(nexts.chars.iter()) |
907 | 0 | .take(usize::from(nexts.count)); |
908 | 0 | for (&scode, &sch) in successors { |
909 | 0 | if sch == ch { |
910 | 0 | return Some(scode); |
911 | 0 | } |
912 | | } |
913 | | |
914 | 0 | None |
915 | | } |
916 | 0 | FullKey::Full(idx) => { |
917 | 0 | let full = &self.complex[usize::from(idx)]; |
918 | 0 | let precode = full.char_continuation[usize::from(ch)]; |
919 | 0 | if usize::from(precode) < MAX_ENTRIES { |
920 | 0 | Some(precode) |
921 | | } else { |
922 | 0 | None |
923 | | } |
924 | | } |
925 | | } |
926 | 0 | } |
927 | | |
928 | | /// Iterate to the next char. |
929 | | /// Return Ok when it was already in the tree or creates a new entry for it and returns Err. |
930 | 0 | fn iterate(&mut self, code: Code, ch: u8) -> Result<Code, Code> { |
931 | 0 | if let Some(next) = self.at_key(code, ch) { |
932 | 0 | Ok(next) |
933 | | } else { |
934 | 0 | Err(self.append(code, ch)) |
935 | | } |
936 | 0 | } |
937 | | |
938 | 0 | fn append(&mut self, code: Code, ch: u8) -> Code { |
939 | 0 | let next: Code = self.keys.len() as u16; |
940 | 0 | let key = self.keys[usize::from(code)]; |
941 | | // TODO: with debug assertions, check for non-existence |
942 | 0 | match FullKey::from(key) { |
943 | 0 | FullKey::NoSuccessor => { |
944 | 0 | let new_key = FullKey::Simple(self.simples.len() as u16); |
945 | 0 | self.simples.push(Simple::default()); |
946 | 0 | let simples = self.simples.last_mut().unwrap(); |
947 | 0 | simples.codes[0] = next; |
948 | 0 | simples.chars[0] = ch; |
949 | 0 | simples.count = 1; |
950 | 0 | self.keys[usize::from(code)] = new_key.into(); |
951 | 0 | } |
952 | 0 | FullKey::Simple(idx) if usize::from(self.simples[usize::from(idx)].count) < SHORT => { |
953 | 0 | let nexts = &mut self.simples[usize::from(idx)]; |
954 | 0 | let nidx = usize::from(nexts.count); |
955 | 0 | nexts.chars[nidx] = ch; |
956 | 0 | nexts.codes[nidx] = next; |
957 | 0 | nexts.count += 1; |
958 | 0 | } |
959 | 0 | FullKey::Simple(idx) => { |
960 | 0 | let new_key = FullKey::Full(self.complex.len() as u16); |
961 | 0 | let simples = &self.simples[usize::from(idx)]; |
962 | 0 | self.complex.push(Full { |
963 | 0 | char_continuation: [Code::max_value(); 256], |
964 | 0 | }); |
965 | 0 | let full = self.complex.last_mut().unwrap(); |
966 | 0 | for (&pch, &pcont) in simples.chars.iter().zip(simples.codes.iter()) { |
967 | 0 | full.char_continuation[usize::from(pch)] = pcont; |
968 | 0 | } |
969 | 0 | self.keys[usize::from(code)] = new_key.into(); |
970 | | } |
971 | 0 | FullKey::Full(idx) => { |
972 | 0 | let full = &mut self.complex[usize::from(idx)]; |
973 | 0 | full.char_continuation[usize::from(ch)] = next; |
974 | 0 | } |
975 | | } |
976 | 0 | self.keys.push(FullKey::NoSuccessor.into()); |
977 | 0 | next |
978 | 0 | } |
979 | | } |
980 | | |
981 | | impl Default for FullKey { |
982 | 0 | fn default() -> Self { |
983 | 0 | FullKey::NoSuccessor |
984 | 0 | } |
985 | | } |
986 | | |
987 | | impl Default for Simple { |
988 | 0 | fn default() -> Self { |
989 | 0 | Simple { |
990 | 0 | codes: [0; SHORT], |
991 | 0 | chars: [0; SHORT], |
992 | 0 | count: 0, |
993 | 0 | } |
994 | 0 | } |
995 | | } |
996 | | |
997 | | impl From<CompressedKey> for FullKey { |
998 | 0 | fn from(CompressedKey(key): CompressedKey) -> Self { |
999 | 0 | match (key >> MAX_CODESIZE) & 0xf { |
1000 | 0 | 0 => FullKey::Full(key & 0xfff), |
1001 | 0 | 1 => FullKey::Simple(key & 0xfff), |
1002 | 0 | _ => FullKey::NoSuccessor, |
1003 | | } |
1004 | 0 | } |
1005 | | } |
1006 | | |
1007 | | impl From<FullKey> for CompressedKey { |
1008 | 0 | fn from(full: FullKey) -> Self { |
1009 | 0 | CompressedKey(match full { |
1010 | 0 | FullKey::NoSuccessor => 0x2000, |
1011 | 0 | FullKey::Simple(code) => 0x1000 | code, |
1012 | 0 | FullKey::Full(code) => code, |
1013 | | }) |
1014 | 0 | } |
1015 | | } |
1016 | | |
1017 | | #[cfg(test)] |
1018 | | mod tests { |
1019 | | use super::{BitOrder, Encoder, LzwError, LzwStatus}; |
1020 | | use crate::alloc::vec::Vec; |
1021 | | use crate::decode::Decoder; |
1022 | | #[cfg(feature = "std")] |
1023 | | use crate::StreamBuf; |
1024 | | |
1025 | | #[test] |
1026 | | fn invalid_input_rejected() { |
1027 | | const BIT_LEN: u8 = 2; |
1028 | | let ref input = [0, 1 << BIT_LEN /* invalid */, 0]; |
1029 | | let ref mut target = [0u8; 128]; |
1030 | | let mut encoder = Encoder::new(BitOrder::Msb, BIT_LEN); |
1031 | | |
1032 | | encoder.finish(); |
1033 | | // We require simulation of normality, that is byte-for-byte compression. |
1034 | | let result = encoder.encode_bytes(input, target); |
1035 | | assert!(if let Err(LzwError::InvalidCode) = result.status { |
1036 | | true |
1037 | | } else { |
1038 | | false |
1039 | | }); |
1040 | | assert_eq!(result.consumed_in, 1); |
1041 | | |
1042 | | let fixed = encoder.encode_bytes(&[1, 0], &mut target[result.consumed_out..]); |
1043 | | assert!(if let Ok(LzwStatus::Done) = fixed.status { |
1044 | | true |
1045 | | } else { |
1046 | | false |
1047 | | }); |
1048 | | assert_eq!(fixed.consumed_in, 2); |
1049 | | |
1050 | | // Okay, now test we actually fixed it. |
1051 | | let ref mut compare = [0u8; 4]; |
1052 | | let mut todo = &target[..result.consumed_out + fixed.consumed_out]; |
1053 | | let mut free = &mut compare[..]; |
1054 | | let mut decoder = Decoder::new(BitOrder::Msb, BIT_LEN); |
1055 | | |
1056 | | // Decode with up to 16 rounds, far too much but inconsequential. |
1057 | | for _ in 0..16 { |
1058 | | if decoder.has_ended() { |
1059 | | break; |
1060 | | } |
1061 | | |
1062 | | let result = decoder.decode_bytes(todo, free); |
1063 | | assert!(result.status.is_ok()); |
1064 | | todo = &todo[result.consumed_in..]; |
1065 | | free = &mut free[result.consumed_out..]; |
1066 | | } |
1067 | | |
1068 | | let remaining = { free }.len(); |
1069 | | let len = compare.len() - remaining; |
1070 | | assert_eq!(todo, &[]); |
1071 | | assert_eq!(compare[..len], [0, 1, 0]); |
1072 | | } |
1073 | | |
1074 | | #[test] |
1075 | | #[should_panic] |
1076 | | fn invalid_code_size_low() { |
1077 | | let _ = Encoder::new(BitOrder::Msb, 1); |
1078 | | } |
1079 | | |
1080 | | #[test] |
1081 | | #[should_panic] |
1082 | | fn invalid_code_size_high() { |
1083 | | let _ = Encoder::new(BitOrder::Msb, 14); |
1084 | | } |
1085 | | |
1086 | | fn make_decoded() -> Vec<u8> { |
1087 | | const FILE: &'static [u8] = |
1088 | | include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/Cargo.lock")); |
1089 | | return Vec::from(FILE); |
1090 | | } |
1091 | | |
1092 | | #[test] |
1093 | | #[cfg(feature = "std")] |
1094 | | fn into_stream_buffer_no_alloc() { |
1095 | | let encoded = make_decoded(); |
1096 | | let mut encoder = Encoder::new(BitOrder::Msb, 8); |
1097 | | |
1098 | | let mut output = vec![]; |
1099 | | let mut buffer = [0; 512]; |
1100 | | let mut istream = encoder.into_stream(&mut output); |
1101 | | istream.set_buffer(&mut buffer[..]); |
1102 | | istream.encode(&encoded[..]).status.unwrap(); |
1103 | | |
1104 | | match istream.buffer { |
1105 | | Some(StreamBuf::Borrowed(_)) => {} |
1106 | | None => panic!("Decoded without buffer??"), |
1107 | | Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"), |
1108 | | } |
1109 | | } |
1110 | | |
1111 | | #[test] |
1112 | | #[cfg(feature = "std")] |
1113 | | fn into_stream_buffer_small_alloc() { |
1114 | | struct WriteTap<W: std::io::Write>(W); |
1115 | | const BUF_SIZE: usize = 512; |
1116 | | |
1117 | | impl<W: std::io::Write> std::io::Write for WriteTap<W> { |
1118 | | fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { |
1119 | | assert!(buf.len() <= BUF_SIZE); |
1120 | | self.0.write(buf) |
1121 | | } |
1122 | | fn flush(&mut self) -> std::io::Result<()> { |
1123 | | self.0.flush() |
1124 | | } |
1125 | | } |
1126 | | |
1127 | | let encoded = make_decoded(); |
1128 | | let mut encoder = Encoder::new(BitOrder::Msb, 8); |
1129 | | |
1130 | | let mut output = vec![]; |
1131 | | let mut istream = encoder.into_stream(WriteTap(&mut output)); |
1132 | | istream.set_buffer_size(512); |
1133 | | istream.encode(&encoded[..]).status.unwrap(); |
1134 | | |
1135 | | match istream.buffer { |
1136 | | Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE), |
1137 | | Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"), |
1138 | | None => panic!("Decoded without buffer??"), |
1139 | | } |
1140 | | } |
1141 | | |
1142 | | #[test] |
1143 | | #[cfg(feature = "std")] |
1144 | | fn reset() { |
1145 | | let encoded = make_decoded(); |
1146 | | let mut encoder = Encoder::new(BitOrder::Msb, 8); |
1147 | | let mut reference = None; |
1148 | | |
1149 | | for _ in 0..2 { |
1150 | | let mut output = vec![]; |
1151 | | let mut buffer = [0; 512]; |
1152 | | let mut istream = encoder.into_stream(&mut output); |
1153 | | istream.set_buffer(&mut buffer[..]); |
1154 | | istream.encode_all(&encoded[..]).status.unwrap(); |
1155 | | |
1156 | | encoder.reset(); |
1157 | | if let Some(reference) = &reference { |
1158 | | assert_eq!(output, *reference); |
1159 | | } else { |
1160 | | reference = Some(output); |
1161 | | } |
1162 | | } |
1163 | | } |
1164 | | } |