/rust/registry/src/index.crates.io-1949cf8c6b5b557f/weezl-0.1.12/src/decode.rs
Line | Count | Source |
1 | | //! A module for all decoding needs. |
2 | | #[cfg(feature = "std")] |
3 | | use crate::error::StreamResult; |
4 | | use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult}; |
5 | | use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE}; |
6 | | |
7 | | use crate::alloc::{boxed::Box, vec, vec::Vec}; |
8 | | #[cfg(feature = "std")] |
9 | | use std::io::{self, BufRead, Write}; |
10 | | |
11 | | /// The state for decoding data with an LZW algorithm. |
12 | | /// |
13 | | /// The same structure can be utilized with streams as well as your own buffers and driver logic. |
14 | | /// It may even be possible to mix them if you are sufficiently careful not to lose or skip any |
15 | | /// already decode data in the process. |
16 | | /// |
17 | | /// This is a sans-IO implementation, meaning that it only contains the state of the decoder and |
18 | | /// the caller will provide buffers for input and output data when calling the basic |
19 | | /// [`decode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_*` |
20 | | /// methods for decoding with a particular style of common IO. |
21 | | /// |
22 | | /// * [`decode`] for decoding once without any IO-loop. |
23 | | /// * [`into_async`] for decoding with the `futures` traits for asynchronous IO. |
24 | | /// * [`into_stream`] for decoding with the standard `io` traits. |
25 | | /// * [`into_vec`] for in-memory decoding. |
26 | | /// |
27 | | /// [`decode_bytes`]: #method.decode_bytes |
28 | | /// [`decode`]: #method.decode |
29 | | /// [`into_async`]: #method.into_async |
30 | | /// [`into_stream`]: #method.into_stream |
31 | | /// [`into_vec`]: #method.into_vec |
32 | | pub struct Decoder { |
33 | | state: Box<dyn Stateful + Send + 'static>, |
34 | | } |
35 | | |
36 | | /// A decoding stream sink. |
37 | | /// |
38 | | /// See [`Decoder::into_stream`] on how to create this type. |
39 | | /// |
40 | | /// [`Decoder::into_stream`]: struct.Decoder.html#method.into_stream |
41 | | #[cfg_attr( |
42 | | not(feature = "std"), |
43 | | deprecated = "This type is only useful with the `std` feature." |
44 | | )] |
45 | | #[cfg_attr(not(feature = "std"), allow(dead_code))] |
46 | | pub struct IntoStream<'d, W> { |
47 | | decoder: &'d mut Decoder, |
48 | | writer: W, |
49 | | buffer: Option<StreamBuf<'d>>, |
50 | | default_size: usize, |
51 | | } |
52 | | |
53 | | /// An async decoding sink. |
54 | | /// |
55 | | /// See [`Decoder::into_async`] on how to create this type. |
56 | | /// |
57 | | /// [`Decoder::into_async`]: struct.Decoder.html#method.into_async |
58 | | #[cfg(feature = "async")] |
59 | | pub struct IntoAsync<'d, W> { |
60 | | decoder: &'d mut Decoder, |
61 | | writer: W, |
62 | | buffer: Option<StreamBuf<'d>>, |
63 | | default_size: usize, |
64 | | } |
65 | | |
66 | | /// A decoding sink into a vector. |
67 | | /// |
68 | | /// See [`Decoder::into_vec`] on how to create this type. |
69 | | /// |
70 | | /// [`Decoder::into_vec`]: struct.Decoder.html#method.into_vec |
71 | | pub struct IntoVec<'d> { |
72 | | decoder: &'d mut Decoder, |
73 | | vector: &'d mut Vec<u8>, |
74 | | } |
75 | | |
76 | | trait Stateful { |
77 | | fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult; |
78 | | fn has_ended(&self) -> bool; |
79 | | /// Ignore an end code and continue decoding (no implied reset). |
80 | | fn restart(&mut self); |
81 | | /// Reset the decoder to the beginning, dropping all buffers etc. |
82 | | fn reset(&mut self); |
83 | | } |
84 | | |
85 | | #[derive(Clone)] |
86 | | struct Link { |
87 | | prev: Code, |
88 | | byte: u8, |
89 | | first: u8, |
90 | | } |
91 | | |
92 | | #[derive(Clone)] |
93 | | struct DerivationBase { |
94 | | code: Code, |
95 | | first: u8, |
96 | | } |
97 | | |
98 | | #[derive(Default)] |
99 | | struct MsbBuffer { |
100 | | /// A buffer of individual bits. The oldest code is kept in the high-order bits. |
101 | | bit_buffer: u64, |
102 | | /// A precomputed mask for this code. |
103 | | code_mask: u16, |
104 | | /// The current code size. |
105 | | code_size: u8, |
106 | | /// The number of bits in the buffer. |
107 | | bits: u8, |
108 | | } |
109 | | |
110 | | #[derive(Default)] |
111 | | struct LsbBuffer { |
112 | | /// A buffer of individual bits. The oldest code is kept in the high-order bits. |
113 | | bit_buffer: u64, |
114 | | /// A precomputed mask for this code. |
115 | | code_mask: u16, |
116 | | /// The current code size. |
117 | | code_size: u8, |
118 | | /// The number of bits in the buffer. |
119 | | bits: u8, |
120 | | } |
121 | | |
122 | | trait CodeBuffer { |
123 | | fn new(min_size: u8) -> Self; |
124 | | fn reset(&mut self, min_size: u8); |
125 | | fn bump_code_size(&mut self); |
126 | | |
127 | | /// Retrieve the next symbol, refilling if necessary. |
128 | | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code>; |
129 | | /// Refill the internal buffer. |
130 | | fn refill_bits(&mut self, inp: &mut &[u8]); |
131 | | |
132 | | fn peek_bits(&self, code: &mut [Code; BURST]) -> usize; |
133 | | fn consume_bits(&mut self, code_cnt: u8); |
134 | | |
135 | | fn max_code(&self) -> Code; |
136 | | fn code_size(&self) -> u8; |
137 | | } |
138 | | |
139 | | trait CodegenConstants { |
140 | | const YIELD_ON_FULL: bool; |
141 | | } |
142 | | |
143 | | struct DecodeState<CodeBuffer, Constants: CodegenConstants> { |
144 | | /// The original minimum code size. |
145 | | min_size: u8, |
146 | | /// The table of decoded codes. |
147 | | table: Table, |
148 | | /// The buffer of decoded data. |
149 | | buffer: Buffer, |
150 | | /// The link which we are still decoding and its original code. |
151 | | last: Option<DerivationBase>, |
152 | | /// The next code entry. |
153 | | next_code: Code, |
154 | | /// Code to reset all tables. |
155 | | clear_code: Code, |
156 | | /// Code to signal the end of the stream. |
157 | | end_code: Code, |
158 | | /// A stored flag if the end code has already appeared. |
159 | | has_ended: bool, |
160 | | /// If tiff then bumps are a single code sooner. |
161 | | is_tiff: bool, |
162 | | /// Do we allow stream to start without an explicit reset code? |
163 | | implicit_reset: bool, |
164 | | /// The buffer for decoded words. |
165 | | code_buffer: CodeBuffer, |
166 | | #[allow(dead_code)] |
167 | | constants: core::marker::PhantomData<Constants>, |
168 | | } |
169 | | |
170 | | // We have a buffer of 64 bits. So at max size at most 5 units can be read at once without |
171 | | // refilling the buffer. At smaller code sizes there are more. We tune for 6 here, by slight |
172 | | // experimentation. This may be an architecture dependent constant. |
173 | | const BURST: usize = 6; |
174 | | |
175 | | struct Buffer { |
176 | | bytes: Box<[u8]>, |
177 | | read_mark: usize, |
178 | | write_mark: usize, |
179 | | } |
180 | | |
181 | | struct Table { |
182 | | inner: Vec<Link>, |
183 | | depths: Vec<u16>, |
184 | | } |
185 | | |
186 | | /// Describes the static parameters for creating a decoder. |
187 | | #[derive(Clone, Debug)] |
188 | | pub struct Configuration { |
189 | | order: BitOrder, |
190 | | size: u8, |
191 | | tiff: bool, |
192 | | yield_on_full: bool, |
193 | | } |
194 | | |
195 | | impl Configuration { |
196 | | /// Create a configuration to decode with the specified bit order and symbol size. |
197 | 2.42k | pub fn new(order: BitOrder, size: u8) -> Self { |
198 | 2.42k | super::assert_decode_size(size); |
199 | 2.42k | Configuration { |
200 | 2.42k | order, |
201 | 2.42k | size, |
202 | 2.42k | tiff: false, |
203 | 2.42k | yield_on_full: false, |
204 | 2.42k | } |
205 | 2.42k | } |
206 | | |
207 | | /// Create a configuration for a TIFF compatible decoder. |
208 | 2.33k | pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self { |
209 | 2.33k | super::assert_decode_size(size); |
210 | 2.33k | Configuration { |
211 | 2.33k | order, |
212 | 2.33k | size, |
213 | 2.33k | tiff: true, |
214 | 2.33k | yield_on_full: false, |
215 | 2.33k | } |
216 | 2.33k | } |
217 | | |
218 | | /// Immediately yield to the caller when the decoder buffer is full. |
219 | | /// |
220 | | /// This can be used for `libtiff` compatibility. It will use a "relaxed" stream interpretation |
221 | | /// that need not contain an explicit EOF. Instead, the decoder is expected to stop fetching |
222 | | /// symbols when some out-of-band specified length of the decoded text has been reached. The |
223 | | /// caller indicates this maximum length through the available output buffer space. |
224 | | /// |
225 | | /// Symbols afterwards must not be expected to be valid. On filling the output buffer space |
226 | | /// completely, the decoder will return immediately to the caller instead of potentially |
227 | | /// interpreting the following bit-stream (and returning an error on doing so). |
228 | | /// |
229 | | /// Default: `false`. |
230 | 2.33k | pub fn with_yield_on_full_buffer(self, do_yield: bool) -> Self { |
231 | 2.33k | Configuration { |
232 | 2.33k | yield_on_full: do_yield, |
233 | 2.33k | ..self |
234 | 2.33k | } |
235 | 2.33k | } |
236 | | |
237 | | /// Create a new decoder with the define configuration. |
238 | 4.75k | pub fn build(self) -> Decoder { |
239 | 4.75k | Decoder { |
240 | 4.75k | state: Decoder::from_configuration(&self), |
241 | 4.75k | } |
242 | 4.75k | } |
243 | | } |
244 | | |
245 | | impl Decoder { |
246 | | /// Create a new decoder with the specified bit order and symbol size. |
247 | | /// |
248 | | /// The algorithm for dynamically increasing the code symbol bit width is compatible with the |
249 | | /// original specification. In particular you will need to specify an `Lsb` bit oder to decode |
250 | | /// the data portion of a compressed `gif` image. |
251 | | /// |
252 | | /// # Panics |
253 | | /// |
254 | | /// The `size` needs to be in the interval `0..=12`. |
255 | 2.42k | pub fn new(order: BitOrder, size: u8) -> Self { |
256 | 2.42k | Configuration::new(order, size).build() |
257 | 2.42k | } |
258 | | |
259 | | /// Create a TIFF compatible decoder with the specified bit order and symbol size. |
260 | | /// |
261 | | /// The algorithm for dynamically increasing the code symbol bit width is compatible with the |
262 | | /// TIFF specification, which is a misinterpretation of the original algorithm for increasing |
263 | | /// the code size. It switches one symbol sooner. |
264 | | /// |
265 | | /// # Panics |
266 | | /// |
267 | | /// The `size` needs to be in the interval `0..=12`. |
268 | 0 | pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self { |
269 | 0 | Configuration::with_tiff_size_switch(order, size).build() |
270 | 0 | } |
271 | | |
272 | 4.75k | fn from_configuration(configuration: &Configuration) -> Box<dyn Stateful + Send + 'static> { |
273 | | struct NoYield; |
274 | | struct YieldOnFull; |
275 | | |
276 | | impl CodegenConstants for NoYield { |
277 | | const YIELD_ON_FULL: bool = false; |
278 | | } |
279 | | |
280 | | impl CodegenConstants for YieldOnFull { |
281 | | const YIELD_ON_FULL: bool = true; |
282 | | } |
283 | | |
284 | | type Boxed = Box<dyn Stateful + Send + 'static>; |
285 | 4.75k | match (configuration.order, configuration.yield_on_full) { |
286 | | (BitOrder::Lsb, false) => { |
287 | 2.42k | let mut state = |
288 | 2.42k | Box::new(DecodeState::<LsbBuffer, NoYield>::new(configuration.size)); |
289 | 2.42k | state.is_tiff = configuration.tiff; |
290 | 2.42k | state as Boxed |
291 | | } |
292 | | (BitOrder::Lsb, true) => { |
293 | 0 | let mut state = Box::new(DecodeState::<LsbBuffer, YieldOnFull>::new( |
294 | 0 | configuration.size, |
295 | | )); |
296 | 0 | state.is_tiff = configuration.tiff; |
297 | 0 | state as Boxed |
298 | | } |
299 | | (BitOrder::Msb, false) => { |
300 | 0 | let mut state = |
301 | 0 | Box::new(DecodeState::<MsbBuffer, NoYield>::new(configuration.size)); |
302 | 0 | state.is_tiff = configuration.tiff; |
303 | 0 | state as Boxed |
304 | | } |
305 | | (BitOrder::Msb, true) => { |
306 | 2.33k | let mut state = Box::new(DecodeState::<MsbBuffer, YieldOnFull>::new( |
307 | 2.33k | configuration.size, |
308 | | )); |
309 | 2.33k | state.is_tiff = configuration.tiff; |
310 | 2.33k | state as Boxed |
311 | | } |
312 | | } |
313 | 4.75k | } |
314 | | |
315 | | /// Decode some bytes from `inp` and write result to `out`. |
316 | | /// |
317 | | /// This will consume a prefix of the input buffer and write decoded output into a prefix of |
318 | | /// the output buffer. See the respective fields of the return value for the count of consumed |
319 | | /// and written bytes. For the next call You should have adjusted the inputs accordingly. |
320 | | /// |
321 | | /// The call will try to decode and write as many bytes of output as available. It will be |
322 | | /// much more optimized (and avoid intermediate buffering) if it is allowed to write a large |
323 | | /// contiguous chunk at once. |
324 | | /// |
325 | | /// See [`into_stream`] for high-level functions (that are only available with the `std` |
326 | | /// feature). |
327 | | /// |
328 | | /// [`into_stream`]: #method.into_stream |
329 | 722k | pub fn decode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult { |
330 | 722k | self.state.advance(inp, out) |
331 | 722k | } |
332 | | |
333 | | /// Decode a single chunk of lzw encoded data. |
334 | | /// |
335 | | /// This method requires the data to contain an end marker, and returns an error otherwise. |
336 | | /// |
337 | | /// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize |
338 | | /// buffer size, to supply an existing vector, to control whether an end marker is required, or |
339 | | /// to preserve partial data in the case of a decoding error. |
340 | | /// |
341 | | /// [`into_vec`]: #into_vec |
342 | | /// |
343 | | /// # Example |
344 | | /// |
345 | | /// ``` |
346 | | /// use weezl::{BitOrder, decode::Decoder}; |
347 | | /// |
348 | | /// // Encoded that was created with an encoder. |
349 | | /// let data = b"\x80\x04\x81\x94l\x1b\x06\xf0\xb0 \x1d\xc6\xf1\xc8l\x19 \x10"; |
350 | | /// let decoded = Decoder::new(BitOrder::Msb, 9) |
351 | | /// .decode(data) |
352 | | /// .unwrap(); |
353 | | /// assert_eq!(decoded, b"Hello, world"); |
354 | | /// ``` |
355 | 0 | pub fn decode(&mut self, data: &[u8]) -> Result<Vec<u8>, LzwError> { |
356 | 0 | let mut output = vec![]; |
357 | 0 | self.into_vec(&mut output).decode_all(data).status?; |
358 | 0 | Ok(output) |
359 | 0 | } |
360 | | |
361 | | /// Construct a decoder into a writer. |
362 | | #[cfg(feature = "std")] |
363 | 0 | pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> { |
364 | 0 | IntoStream { |
365 | 0 | decoder: self, |
366 | 0 | writer, |
367 | 0 | buffer: None, |
368 | 0 | default_size: STREAM_BUF_SIZE, |
369 | 0 | } |
370 | 0 | } |
371 | | |
372 | | /// Construct a decoder into an async writer. |
373 | | #[cfg(feature = "async")] |
374 | | pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> { |
375 | | IntoAsync { |
376 | | decoder: self, |
377 | | writer, |
378 | | buffer: None, |
379 | | default_size: STREAM_BUF_SIZE, |
380 | | } |
381 | | } |
382 | | |
383 | | /// Construct a decoder into a vector. |
384 | | /// |
385 | | /// All decoded data is appended and the vector is __not__ cleared. |
386 | | /// |
387 | | /// Compared to `into_stream` this interface allows a high-level access to decoding without |
388 | | /// requires the `std`-feature. Also, it can make full use of the extra buffer control that the |
389 | | /// special target exposes. |
390 | 0 | pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec<u8>) -> IntoVec<'lt> { |
391 | 0 | IntoVec { |
392 | 0 | decoder: self, |
393 | 0 | vector: vec, |
394 | 0 | } |
395 | 0 | } |
396 | | |
397 | | /// Check if the decoding has finished. |
398 | | /// |
399 | | /// No more output is produced beyond the end code that marked the finish of the stream. The |
400 | | /// decoder may have read additional bytes, including padding bits beyond the last code word |
401 | | /// but also excess bytes provided. |
402 | 559k | pub fn has_ended(&self) -> bool { |
403 | 559k | self.state.has_ended() |
404 | 559k | } |
405 | | |
406 | | /// Ignore an end code and continue. |
407 | | /// |
408 | | /// This will _not_ reset any of the inner code tables and not have the effect of a clear code. |
409 | | /// It will instead continue as if the end code had not been present. If no end code has |
410 | | /// occurred then this is a no-op. |
411 | | /// |
412 | | /// You can test if an end code has occurred with [`has_ended`](#method.has_ended). |
413 | | /// FIXME: clarify how this interacts with padding introduced after end code. |
414 | | #[allow(dead_code)] |
415 | 0 | pub(crate) fn restart(&mut self) { |
416 | 0 | self.state.restart(); |
417 | 0 | } |
418 | | |
419 | | /// Reset all internal state. |
420 | | /// |
421 | | /// This produce a decoder as if just constructed with `new` but taking slightly less work. In |
422 | | /// particular it will not deallocate any internal allocations. It will also avoid some |
423 | | /// duplicate setup work. |
424 | 0 | pub fn reset(&mut self) { |
425 | 0 | self.state.reset(); |
426 | 0 | } |
427 | | } |
428 | | |
429 | | #[cfg(feature = "std")] |
430 | | impl<'d, W: Write> IntoStream<'d, W> { |
431 | | /// Decode data from a reader. |
432 | | /// |
433 | | /// This will read data until the stream is empty or an end marker is reached. |
434 | 0 | pub fn decode(&mut self, read: impl BufRead) -> StreamResult { |
435 | 0 | self.decode_part(read, false) |
436 | 0 | } |
437 | | |
438 | | /// Decode data from a reader, requiring an end marker. |
439 | 0 | pub fn decode_all(mut self, read: impl BufRead) -> StreamResult { |
440 | 0 | self.decode_part(read, true) |
441 | 0 | } |
442 | | |
443 | | /// Set the size of the intermediate decode buffer. |
444 | | /// |
445 | | /// A buffer of this size is allocated to hold one part of the decoded stream when no buffer is |
446 | | /// available and any decoding method is called. No buffer is allocated if `set_buffer` has |
447 | | /// been called. The buffer is reused. |
448 | | /// |
449 | | /// # Panics |
450 | | /// This method panics if `size` is `0`. |
451 | 0 | pub fn set_buffer_size(&mut self, size: usize) { |
452 | 0 | assert_ne!(size, 0, "Attempted to set empty buffer"); |
453 | 0 | self.default_size = size; |
454 | 0 | } |
455 | | |
456 | | /// Use a particular buffer as an intermediate decode buffer. |
457 | | /// |
458 | | /// Calling this sets or replaces the buffer. When a buffer has been set then it is used |
459 | | /// instead of dynamically allocating a buffer. Note that the size of the buffer is critical |
460 | | /// for efficient decoding. Some optimization techniques require the buffer to hold one or more |
461 | | /// previous decoded words. There is also additional overhead from `write` calls each time the |
462 | | /// buffer has been filled. |
463 | | /// |
464 | | /// # Panics |
465 | | /// This method panics if the `buffer` is empty. |
466 | 0 | pub fn set_buffer(&mut self, buffer: &'d mut [u8]) { |
467 | 0 | assert_ne!(buffer.len(), 0, "Attempted to set empty buffer"); |
468 | 0 | self.buffer = Some(StreamBuf::Borrowed(buffer)); |
469 | 0 | } |
470 | | |
471 | 0 | fn decode_part(&mut self, mut read: impl BufRead, must_finish: bool) -> StreamResult { |
472 | | let IntoStream { |
473 | 0 | decoder, |
474 | 0 | writer, |
475 | 0 | buffer, |
476 | 0 | default_size, |
477 | 0 | } = self; |
478 | | |
479 | | enum Progress { |
480 | | Ok, |
481 | | Done, |
482 | | } |
483 | | |
484 | 0 | let mut bytes_read = 0; |
485 | 0 | let mut bytes_written = 0; |
486 | | |
487 | | // Converting to mutable refs to move into the `once` closure. |
488 | 0 | let read_bytes = &mut bytes_read; |
489 | 0 | let write_bytes = &mut bytes_written; |
490 | | |
491 | 0 | let outbuf: &mut [u8] = |
492 | 0 | match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } { |
493 | 0 | StreamBuf::Borrowed(slice) => &mut *slice, |
494 | 0 | StreamBuf::Owned(vec) => &mut *vec, |
495 | | }; |
496 | 0 | assert!(!outbuf.is_empty()); |
497 | | |
498 | 0 | let once = move || { |
499 | | // Try to grab one buffer of input data. |
500 | 0 | let data = read.fill_buf()?; |
501 | | |
502 | | // Decode as much of the buffer as fits. |
503 | 0 | let result = decoder.decode_bytes(data, &mut outbuf[..]); |
504 | | // Do the bookkeeping and consume the buffer. |
505 | 0 | *read_bytes += result.consumed_in; |
506 | 0 | *write_bytes += result.consumed_out; |
507 | 0 | read.consume(result.consumed_in); |
508 | | |
509 | | // Handle the status in the result. |
510 | 0 | let done = result.status.map_err(|err| { |
511 | 0 | io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err)) |
512 | 0 | })?; |
513 | | |
514 | | // Check if we had any new data at all. |
515 | 0 | if let LzwStatus::NoProgress = done { |
516 | 0 | debug_assert_eq!( |
517 | | result.consumed_out, 0, |
518 | 0 | "No progress means we have not decoded any data" |
519 | | ); |
520 | | // In particular we did not finish decoding. |
521 | 0 | if must_finish { |
522 | 0 | return Err(io::Error::new( |
523 | 0 | io::ErrorKind::UnexpectedEof, |
524 | 0 | "No more data but no end marker detected", |
525 | 0 | )); |
526 | | } else { |
527 | 0 | return Ok(Progress::Done); |
528 | | } |
529 | 0 | } |
530 | | |
531 | | // And finish by writing our result. |
532 | | // TODO: we may lose data on error (also on status error above) which we might want to |
533 | | // deterministically handle so that we don't need to restart everything from scratch as |
534 | | // the only recovery strategy. Any changes welcome. |
535 | 0 | writer.write_all(&outbuf[..result.consumed_out])?; |
536 | | |
537 | 0 | Ok(if let LzwStatus::Done = done { |
538 | 0 | Progress::Done |
539 | | } else { |
540 | 0 | Progress::Ok |
541 | | }) |
542 | 0 | }; |
543 | | |
544 | | // Decode chunks of input data until we're done. |
545 | 0 | let status = core::iter::repeat_with(once) |
546 | | // scan+fuse can be replaced with map_while |
547 | 0 | .scan((), |(), result| match result { |
548 | 0 | Ok(Progress::Ok) => Some(Ok(())), |
549 | 0 | Err(err) => Some(Err(err)), |
550 | 0 | Ok(Progress::Done) => None, |
551 | 0 | }) |
552 | 0 | .fuse() |
553 | 0 | .collect(); |
554 | | |
555 | 0 | StreamResult { |
556 | 0 | bytes_read, |
557 | 0 | bytes_written, |
558 | 0 | status, |
559 | 0 | } |
560 | 0 | } |
561 | | } |
562 | | |
563 | | impl IntoVec<'_> { |
564 | | /// Decode data from a slice. |
565 | | /// |
566 | | /// This will read data until the slice is empty or an end marker is reached. |
567 | 0 | pub fn decode(&mut self, read: &[u8]) -> VectorResult { |
568 | 0 | self.decode_part(read, false) |
569 | 0 | } |
570 | | |
571 | | /// Decode data from a slice, requiring an end marker. |
572 | 0 | pub fn decode_all(mut self, read: &[u8]) -> VectorResult { |
573 | 0 | self.decode_part(read, true) |
574 | 0 | } |
575 | | |
576 | 0 | fn grab_buffer(&mut self) -> (&mut [u8], &mut Decoder) { |
577 | | const CHUNK_SIZE: usize = 1 << 12; |
578 | 0 | let decoder = &mut self.decoder; |
579 | 0 | let length = self.vector.len(); |
580 | | |
581 | | // Use the vector to do overflow checks and w/e. |
582 | 0 | self.vector.reserve(CHUNK_SIZE); |
583 | | // FIXME: decoding into uninit buffer? |
584 | 0 | self.vector.resize(length + CHUNK_SIZE, 0u8); |
585 | | |
586 | 0 | (&mut self.vector[length..], decoder) |
587 | 0 | } |
588 | | |
589 | 0 | fn decode_part(&mut self, part: &[u8], must_finish: bool) -> VectorResult { |
590 | 0 | let mut result = VectorResult { |
591 | 0 | consumed_in: 0, |
592 | 0 | consumed_out: 0, |
593 | 0 | status: Ok(LzwStatus::Ok), |
594 | 0 | }; |
595 | | |
596 | | enum Progress { |
597 | | Ok, |
598 | | Done, |
599 | | } |
600 | | |
601 | | // Converting to mutable refs to move into the `once` closure. |
602 | 0 | let read_bytes = &mut result.consumed_in; |
603 | 0 | let write_bytes = &mut result.consumed_out; |
604 | 0 | let mut data = part; |
605 | | |
606 | | // A 64 MB buffer is quite large but should get alloc_zeroed. |
607 | | // Note that the decoded size can be up to quadratic in code block. |
608 | 0 | let once = move || { |
609 | | // Grab a new output buffer. |
610 | 0 | let (outbuf, decoder) = self.grab_buffer(); |
611 | | |
612 | | // Decode as much of the buffer as fits. |
613 | 0 | let result = decoder.decode_bytes(data, &mut outbuf[..]); |
614 | | // Do the bookkeeping and consume the buffer. |
615 | 0 | *read_bytes += result.consumed_in; |
616 | 0 | *write_bytes += result.consumed_out; |
617 | 0 | data = &data[result.consumed_in..]; |
618 | | |
619 | 0 | let unfilled = outbuf.len() - result.consumed_out; |
620 | 0 | let filled = self.vector.len() - unfilled; |
621 | 0 | self.vector.truncate(filled); |
622 | | |
623 | | // Handle the status in the result. |
624 | 0 | match result.status { |
625 | 0 | Err(err) => Err(err), |
626 | 0 | Ok(LzwStatus::NoProgress) if must_finish => Err(LzwError::InvalidCode), |
627 | 0 | Ok(LzwStatus::NoProgress) | Ok(LzwStatus::Done) => Ok(Progress::Done), |
628 | 0 | Ok(LzwStatus::Ok) => Ok(Progress::Ok), |
629 | | } |
630 | 0 | }; |
631 | | |
632 | | // Decode chunks of input data until we're done. |
633 | 0 | let status: Result<(), _> = core::iter::repeat_with(once) |
634 | | // scan+fuse can be replaced with map_while |
635 | 0 | .scan((), |(), result| match result { |
636 | 0 | Ok(Progress::Ok) => Some(Ok(())), |
637 | 0 | Err(err) => Some(Err(err)), |
638 | 0 | Ok(Progress::Done) => None, |
639 | 0 | }) |
640 | 0 | .fuse() |
641 | 0 | .collect(); |
642 | | |
643 | 0 | if let Err(err) = status { |
644 | 0 | result.status = Err(err); |
645 | 0 | } |
646 | | |
647 | 0 | result |
648 | 0 | } |
649 | | } |
650 | | |
651 | | // This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would |
652 | | // trip over the usage of await, which is a reserved keyword in that edition/version. It only |
653 | | // contains an impl block. |
654 | | #[cfg(feature = "async")] |
655 | | #[path = "decode_into_async.rs"] |
656 | | mod impl_decode_into_async; |
657 | | |
658 | | impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> { |
659 | 4.75k | fn new(min_size: u8) -> Self { |
660 | 4.75k | DecodeState { |
661 | 4.75k | min_size, |
662 | 4.75k | table: Table::new(), |
663 | 4.75k | buffer: Buffer::new(), |
664 | 4.75k | last: None, |
665 | 4.75k | clear_code: 1 << min_size, |
666 | 4.75k | end_code: (1 << min_size) + 1, |
667 | 4.75k | next_code: (1 << min_size) + 2, |
668 | 4.75k | has_ended: false, |
669 | 4.75k | is_tiff: false, |
670 | 4.75k | implicit_reset: true, |
671 | 4.75k | code_buffer: CodeBuffer::new(min_size), |
672 | 4.75k | constants: core::marker::PhantomData, |
673 | 4.75k | } |
674 | 4.75k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::new <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::new Line | Count | Source | 659 | 2.42k | fn new(min_size: u8) -> Self { | 660 | 2.42k | DecodeState { | 661 | 2.42k | min_size, | 662 | 2.42k | table: Table::new(), | 663 | 2.42k | buffer: Buffer::new(), | 664 | 2.42k | last: None, | 665 | 2.42k | clear_code: 1 << min_size, | 666 | 2.42k | end_code: (1 << min_size) + 1, | 667 | 2.42k | next_code: (1 << min_size) + 2, | 668 | 2.42k | has_ended: false, | 669 | 2.42k | is_tiff: false, | 670 | 2.42k | implicit_reset: true, | 671 | 2.42k | code_buffer: CodeBuffer::new(min_size), | 672 | 2.42k | constants: core::marker::PhantomData, | 673 | 2.42k | } | 674 | 2.42k | } |
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::new Line | Count | Source | 659 | 2.33k | fn new(min_size: u8) -> Self { | 660 | 2.33k | DecodeState { | 661 | 2.33k | min_size, | 662 | 2.33k | table: Table::new(), | 663 | 2.33k | buffer: Buffer::new(), | 664 | 2.33k | last: None, | 665 | 2.33k | clear_code: 1 << min_size, | 666 | 2.33k | end_code: (1 << min_size) + 1, | 667 | 2.33k | next_code: (1 << min_size) + 2, | 668 | 2.33k | has_ended: false, | 669 | 2.33k | is_tiff: false, | 670 | 2.33k | implicit_reset: true, | 671 | 2.33k | code_buffer: CodeBuffer::new(min_size), | 672 | 2.33k | constants: core::marker::PhantomData, | 673 | 2.33k | } | 674 | 2.33k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::new |
675 | | |
676 | 14.7k | fn init_tables(&mut self) { |
677 | 14.7k | self.code_buffer.reset(self.min_size); |
678 | 14.7k | self.next_code = (1 << self.min_size) + 2; |
679 | 14.7k | self.table.init(self.min_size); |
680 | 14.7k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::init_tables <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::init_tables Line | Count | Source | 676 | 11.9k | fn init_tables(&mut self) { | 677 | 11.9k | self.code_buffer.reset(self.min_size); | 678 | 11.9k | self.next_code = (1 << self.min_size) + 2; | 679 | 11.9k | self.table.init(self.min_size); | 680 | 11.9k | } |
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::init_tables Line | Count | Source | 676 | 2.74k | fn init_tables(&mut self) { | 677 | 2.74k | self.code_buffer.reset(self.min_size); | 678 | 2.74k | self.next_code = (1 << self.min_size) + 2; | 679 | 2.74k | self.table.init(self.min_size); | 680 | 2.74k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::init_tables |
681 | | |
682 | 68.1k | fn reset_tables(&mut self) { |
683 | 68.1k | self.code_buffer.reset(self.min_size); |
684 | 68.1k | self.next_code = (1 << self.min_size) + 2; |
685 | 68.1k | self.table.clear(self.min_size); |
686 | 68.1k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::reset_tables <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::reset_tables Line | Count | Source | 682 | 41.0k | fn reset_tables(&mut self) { | 683 | 41.0k | self.code_buffer.reset(self.min_size); | 684 | 41.0k | self.next_code = (1 << self.min_size) + 2; | 685 | 41.0k | self.table.clear(self.min_size); | 686 | 41.0k | } |
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::reset_tables Line | Count | Source | 682 | 27.1k | fn reset_tables(&mut self) { | 683 | 27.1k | self.code_buffer.reset(self.min_size); | 684 | 27.1k | self.next_code = (1 << self.min_size) + 2; | 685 | 27.1k | self.table.clear(self.min_size); | 686 | 27.1k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::reset_tables |
687 | | } |
688 | | |
689 | | impl<C: CodeBuffer, CgC: CodegenConstants> Stateful for DecodeState<C, CgC> { |
690 | 559k | fn has_ended(&self) -> bool { |
691 | 559k | self.has_ended |
692 | 559k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::has_ended <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::has_ended Line | Count | Source | 690 | 559k | fn has_ended(&self) -> bool { | 691 | 559k | self.has_ended | 692 | 559k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::has_ended Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::has_ended |
693 | | |
694 | 0 | fn restart(&mut self) { |
695 | 0 | self.has_ended = false; |
696 | 0 | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::restart Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::restart Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::restart Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::restart |
697 | | |
698 | 0 | fn reset(&mut self) { |
699 | 0 | self.table.init(self.min_size); |
700 | 0 | self.next_code = (1 << self.min_size) + 2; |
701 | 0 | self.buffer.read_mark = 0; |
702 | 0 | self.buffer.write_mark = 0; |
703 | 0 | self.last = None; |
704 | 0 | self.restart(); |
705 | 0 | self.code_buffer = CodeBuffer::new(self.min_size); |
706 | 0 | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::reset Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::reset Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::reset Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::reset |
707 | | |
708 | 722k | fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult { |
709 | | // Skip everything if there is nothing to do. |
710 | 722k | if self.has_ended { |
711 | 79 | return BufferResult { |
712 | 79 | consumed_in: 0, |
713 | 79 | consumed_out: 0, |
714 | 79 | status: Ok(LzwStatus::Done), |
715 | 79 | }; |
716 | 722k | } |
717 | | |
718 | | // Rough description: |
719 | | // We will fill the output slice as much as possible until either there is no more symbols |
720 | | // to decode or an end code has been reached. This requires an internal buffer to hold a |
721 | | // potential tail of the word corresponding to the last symbol. This tail will then be |
722 | | // decoded first before continuing with the regular decoding. The same buffer is required |
723 | | // to persist some symbol state across calls. |
724 | | // |
725 | | // We store the words corresponding to code symbols in an index chain, bytewise, where we |
726 | | // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain |
727 | | // is traversed for each symbol when it is decoded and bytes are placed directly into the |
728 | | // output slice. In the special case (new_code == next_code) we use an existing decoded |
729 | | // version that is present in either the out bytes of this call or in buffer to copy the |
730 | | // repeated prefix slice. |
731 | | // TODO: I played with a 'decoding cache' to remember the position of long symbols and |
732 | | // avoid traversing the chain, doing a copy of memory instead. It did however not lead to |
733 | | // a serious improvement. It's just unlikely to both have a long symbol and have that |
734 | | // repeated twice in the same output buffer. |
735 | | // |
736 | | // You will also find the (to my knowledge novel) concept of a _decoding burst_ which |
737 | | // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order |
738 | | // execution as much as possible and for this reason have the least possible stress on |
739 | | // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but |
740 | | // only for re-used codes, not novel ones. This lookahead also makes the loop termination |
741 | | // when restoring each byte of the code word perfectly predictable! So a burst is a chunk |
742 | | // of code words which are all independent of each other, have known lengths _and_ are |
743 | | // guaranteed to fit into the out slice without requiring a buffer. One burst can be |
744 | | // decoded in an extremely tight loop. |
745 | | // |
746 | | // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid |
747 | | // that intermediate buffer at the expense of not always filling the output buffer |
748 | | // completely. Alternatively we might follow its chain of precursor states twice. This may |
749 | | // be even cheaper if we store more than one byte per link so it really should be |
750 | | // evaluated. |
751 | | // TODO: if the caller was required to provide the previous last word we could also avoid |
752 | | // the buffer for cases where we need it to restore the next code! This could be built |
753 | | // backwards compatible by only doing it after an opt-in call that enables the behaviour. |
754 | | |
755 | | // Record initial lengths for the result that is returned. |
756 | 722k | let o_in = inp.len(); |
757 | 722k | let o_out = out.len(); |
758 | | |
759 | | // The code_link is the previously decoded symbol. |
760 | | // It's used to link the new code back to its predecessor. |
761 | 722k | let mut code_link = None; |
762 | | // The status, which is written to on an invalid code. |
763 | 722k | let mut status = Ok(LzwStatus::Ok); |
764 | | |
765 | 722k | match self.last.take() { |
766 | | // No last state? This is the first code after a reset? |
767 | | None => { |
768 | 85.0k | match self.next_symbol(&mut inp) { |
769 | | // Plainly invalid code. |
770 | 84.5k | Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode), |
771 | | // next_code would require an actual predecessor. |
772 | 84.5k | Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode), |
773 | | // No more symbols available and nothing decoded yet. |
774 | | // Assume that we didn't make progress, this may get reset to Done if we read |
775 | | // some bytes from the input. |
776 | 407 | None => status = Ok(LzwStatus::NoProgress), |
777 | | // Handle a valid code. |
778 | 84.5k | Some(init_code) => { |
779 | 84.5k | if init_code == self.clear_code { |
780 | 12.2k | self.init_tables(); |
781 | 72.2k | } else if init_code == self.end_code { |
782 | 34 | self.has_ended = true; |
783 | 34 | status = Ok(LzwStatus::Done); |
784 | 72.2k | } else if self.table.is_empty() { |
785 | 2.41k | if self.implicit_reset { |
786 | 2.41k | self.init_tables(); |
787 | 2.41k | |
788 | 2.41k | self.buffer.fill_reconstruct(&self.table, init_code); |
789 | 2.41k | let link = self.table.at(init_code).clone(); |
790 | 2.41k | code_link = Some(DerivationBase { |
791 | 2.41k | code: init_code, |
792 | 2.41k | first: link.first, |
793 | 2.41k | }); |
794 | 2.41k | } else { |
795 | 0 | // We require an explicit reset. |
796 | 0 | status = Err(LzwError::InvalidCode); |
797 | 0 | } |
798 | 69.8k | } else { |
799 | 69.8k | // Reconstruct the first code in the buffer. |
800 | 69.8k | self.buffer.fill_reconstruct(&self.table, init_code); |
801 | 69.8k | let link = self.table.at(init_code).clone(); |
802 | 69.8k | code_link = Some(DerivationBase { |
803 | 69.8k | code: init_code, |
804 | 69.8k | first: link.first, |
805 | 69.8k | }); |
806 | 69.8k | } |
807 | | } |
808 | | } |
809 | | } |
810 | | // Move the tracking state to the stack. |
811 | 637k | Some(tup) => code_link = Some(tup), |
812 | | }; |
813 | | |
814 | | // Track an empty `burst` (see below) means we made no progress. |
815 | 722k | let mut have_yet_to_decode_data = false; |
816 | | |
817 | | // Restore the previous state, if any. |
818 | 722k | if code_link.is_some() { |
819 | 709k | let remain = self.buffer.buffer(); |
820 | | // Check if we can fully finish the buffer. |
821 | 709k | if remain.len() > out.len() { |
822 | 287k | if out.is_empty() { |
823 | 0 | // This also implies the buffer is _not_ empty and we will not enter any |
824 | 0 | // decoding loop. |
825 | 0 | status = Ok(LzwStatus::NoProgress); |
826 | 287k | } else { |
827 | 287k | out.copy_from_slice(&remain[..out.len()]); |
828 | 287k | self.buffer.consume(out.len()); |
829 | 287k | out = &mut []; |
830 | 287k | } |
831 | 422k | } else if remain.is_empty() { |
832 | 259k | status = Ok(LzwStatus::NoProgress); |
833 | 259k | have_yet_to_decode_data = true; |
834 | 259k | } else { |
835 | 163k | let consumed = remain.len(); |
836 | 163k | out[..consumed].copy_from_slice(remain); |
837 | 163k | self.buffer.consume(consumed); |
838 | 163k | out = &mut out[consumed..]; |
839 | 163k | have_yet_to_decode_data = false; |
840 | 163k | } |
841 | 12.7k | } |
842 | | |
843 | | // A special reference to out slice which holds the last decoded symbol. |
844 | 722k | let mut last_decoded: Option<&[u8]> = None; |
845 | | |
846 | 722k | if self.buffer.buffer().is_empty() { |
847 | | // Hot loop that writes data to the output as long as we can do so directly from the |
848 | | // input stream. As an invariant of this block we did not need to use the buffer to |
849 | | // store a decoded code word. Testing the condition ahead of time avoids a test in the |
850 | | // loop body since every code path where the buffer is filled already breaks. |
851 | | // |
852 | | // In a previous iteration of the code we trusted compiler optimization to work this |
853 | | // out but it seems that it does not. Another edit hidden behind some performance work |
854 | | // then edited out the check, inadvertently changing the behavior for callers that |
855 | | // relied on being able to provide an empty output buffer and still receiving a useful |
856 | | // signal about the state of the stream. |
857 | | |
858 | | // A burst is a sequence of code words that are independently decoded, i.e. they do not |
859 | | // change the state of the decoder in ways that would influence the interpretation of |
860 | | // each other. That is: they are not special symbols, they do not make us increase the |
861 | | // code size, they are each codes already in the tree before the burst. |
862 | | // |
863 | | // The tracking state for a burst. These are actually initialized later but compiler |
864 | | // wasn't smart enough to fully optimize out the init code so that appears outside the |
865 | | // loop. |
866 | 435k | let mut burst = [0; BURST]; |
867 | 435k | let mut burst_byte_len = [0u16; BURST]; |
868 | 435k | let mut burst_byte = [0u8; BURST]; |
869 | 435k | let mut target: [&mut [u8]; BURST] = Default::default(); |
870 | | |
871 | | loop { |
872 | | // In particular, we *also* break if the output buffer is still empty. Especially |
873 | | // when the output parameter was an empty slice, we must try to fetch at least one |
874 | | // code but with YIELD_ON_FULL we do not. |
875 | 4.46M | if CgC::YIELD_ON_FULL && out.is_empty() { |
876 | 1.05k | break; |
877 | 4.46M | } |
878 | | |
879 | 4.46M | let mut deriv = match code_link.take() { |
880 | 4.44M | Some(link) => link, |
881 | | None => { |
882 | | // TODO: we do not need to break here. This does not indicate that the buffer |
883 | | // has been filled, rather it indicates we have reset the state. The next code |
884 | | // should be part of the initial alphabet. However the first code is special in |
885 | | // the sense of not creating a new code itself. This is handled correctly in |
886 | | // the initialization prior to the loop; and in particular that handling as |
887 | | // written currently relies on putting it into the buffer; so handling it we |
888 | | // would need to ensure that either the buffer is fully cleared after its use, |
889 | | // or use another implementation of handling that first code. |
890 | 12.7k | break; |
891 | | } |
892 | | }; |
893 | | |
894 | | // Ensure the code buffer is full, we're about to request some codes. |
895 | | // Note that this also ensures at least one code is in the buffer if any input is left. |
896 | 4.44M | self.refill_bits(&mut inp); |
897 | 4.44M | let cnt = self.code_buffer.peek_bits(&mut burst); |
898 | | |
899 | | // No code left in the buffer, and no more bytes to refill the buffer. |
900 | 4.44M | if cnt == 0 { |
901 | 237k | if have_yet_to_decode_data { |
902 | 64.1k | status = Ok(LzwStatus::NoProgress); |
903 | 172k | } |
904 | | |
905 | 237k | code_link = Some(deriv); |
906 | 237k | break; |
907 | 4.21M | } |
908 | | |
909 | 4.21M | debug_assert!( |
910 | | // When the table is full, we have a max code above the size switch. |
911 | 0 | self.table.inner.len() >= MAX_ENTRIES - usize::from(self.is_tiff) |
912 | | // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the |
913 | | // computed next_code is 4 which already exceeds the bit width from the start. |
914 | | // Then we will immediately switch code size after this code. |
915 | | // |
916 | | // TODO: this is the reason for some saturating and non-sharp comparisons in |
917 | | // the code below. Maybe it makes sense to revisit turning this into a compile |
918 | | // time choice? |
919 | 0 | || (self.code_buffer.code_size() == 1 && self.next_code < 4) |
920 | 0 | || (self.code_buffer.code_size() == 2 && self.next_code == 4) |
921 | 0 | || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code, |
922 | 0 | "Table: {}, code_size: {}, next_code: {}, table_condition: {}", |
923 | 0 | self.table.is_full(), |
924 | 0 | self.code_buffer.code_size(), |
925 | | self.next_code, |
926 | 0 | self.code_buffer.max_code() - Code::from(self.is_tiff), |
927 | | ); |
928 | | |
929 | 4.21M | let mut burst_size = 0; |
930 | 4.21M | let left_before_size_switch = (self.code_buffer.max_code() |
931 | 4.21M | - Code::from(self.is_tiff)) |
932 | 4.21M | .saturating_sub(self.next_code); |
933 | | |
934 | | // A burst is a sequence of decodes that are completely independent of each other. This |
935 | | // is the case if neither is an end code, a clear code, or a next code, i.e. we have |
936 | | // all of them in the decoding table and thus known their depths, and additionally if |
937 | | // we can decode them directly into the output buffer. |
938 | 8.26M | for b in &burst[..cnt] { |
939 | | // We can commit the previous burst code, and will take a slice from the output |
940 | | // buffer. This also avoids the bounds check in the tight loop later. |
941 | 8.26M | if burst_size > 0 { |
942 | 4.05M | let len = burst_byte_len[burst_size - 1]; |
943 | 4.05M | let (into, tail) = out.split_at_mut(usize::from(len)); |
944 | 4.05M | target[burst_size - 1] = into; |
945 | 4.05M | out = tail; |
946 | 4.21M | } |
947 | | |
948 | | // Check that we don't overflow the code size with all codes we burst decode. |
949 | 8.26M | burst_size += 1; |
950 | | |
951 | 8.26M | if burst_size > usize::from(left_before_size_switch) { |
952 | 2.85M | break; |
953 | 5.40M | } |
954 | | |
955 | 5.40M | let read_code = *b; |
956 | | |
957 | | // A burst code can't be special. |
958 | 5.40M | if read_code == self.clear_code |
959 | 5.34M | || read_code == self.end_code |
960 | 5.34M | || read_code >= self.next_code |
961 | | { |
962 | 279k | break; |
963 | 5.12M | } |
964 | | |
965 | | // Read the code length and check that we can decode directly into the out slice. |
966 | 5.12M | let len = self.table.depths[usize::from(read_code)]; |
967 | | |
968 | 5.12M | if out.len() < usize::from(len) { |
969 | 35.9k | break; |
970 | 5.08M | } |
971 | | |
972 | | // We do exactly one more code (the one being inspected in the current iteration) |
973 | | // after the 'burst'. When we want to break decoding precisely on the supplied |
974 | | // buffer, we check if this is the last code to be decoded into it. |
975 | 5.08M | if CgC::YIELD_ON_FULL { |
976 | 2.72M | if out.len() == usize::from(len) { |
977 | 17.0k | break; |
978 | 2.70M | } |
979 | 2.36M | } |
980 | | |
981 | 5.07M | burst_byte_len[burst_size - 1] = len; |
982 | | } |
983 | | |
984 | 4.21M | self.code_buffer.consume_bits(burst_size as u8); |
985 | 4.21M | have_yet_to_decode_data = false; |
986 | | |
987 | | // Note that the very last code in the burst buffer doesn't actually belong to the |
988 | | // burst itself. TODO: sometimes it could, we just don't differentiate between the |
989 | | // breaks and a loop end condition above. That may be a speed advantage? |
990 | 4.21M | let (&new_code, burst) = burst[..burst_size].split_last().unwrap(); |
991 | | |
992 | | // The very tight loop for restoring the actual burst. These can be reconstructed in |
993 | | // parallel since none of them depend on a prior constructed. Only the derivation of |
994 | | // new codes is not parallel. There are no size changes here either. |
995 | 4.21M | let burst_targets = &mut target[..burst_size - 1]; |
996 | | |
997 | 4.21M | if !self.table.is_full() { |
998 | 1.41M | self.next_code += burst_targets.len() as u16; |
999 | 2.79M | } |
1000 | | |
1001 | 4.05M | for ((&burst, target), byte) in |
1002 | 4.21M | burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte) |
1003 | 4.05M | { |
1004 | 4.05M | *byte = self.table.reconstruct(burst, target); |
1005 | 4.05M | } |
1006 | | |
1007 | 4.21M | self.table.derive_burst(&mut deriv, burst, &burst_byte[..]); |
1008 | | |
1009 | | // Now handle the special codes. |
1010 | 4.21M | if new_code == self.clear_code { |
1011 | 68.1k | self.reset_tables(); |
1012 | 68.1k | last_decoded = None; |
1013 | | // Restarts in the next call to the entry point. |
1014 | 68.1k | break; |
1015 | 4.14M | } |
1016 | | |
1017 | 4.14M | if new_code == self.end_code { |
1018 | 144 | self.has_ended = true; |
1019 | 144 | status = Ok(LzwStatus::Done); |
1020 | 144 | last_decoded = None; |
1021 | 144 | break; |
1022 | 4.14M | } |
1023 | | |
1024 | 4.14M | if new_code > self.next_code { |
1025 | 921 | status = Err(LzwError::InvalidCode); |
1026 | 921 | last_decoded = None; |
1027 | 921 | break; |
1028 | 4.14M | } |
1029 | | |
1030 | 4.14M | let required_len = if new_code == self.next_code { |
1031 | 185k | self.table.depths[usize::from(deriv.code)] + 1 |
1032 | | } else { |
1033 | 3.95M | self.table.depths[usize::from(new_code)] |
1034 | | }; |
1035 | | |
1036 | | // We need the decoded data of the new code if it is the `next_code`. This is the |
1037 | | // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In |
1038 | | // all other cases we only need the first character of the decoded data. |
1039 | 4.14M | let have_next_code = new_code == self.next_code; |
1040 | | |
1041 | | // Update the slice holding the last decoded word. |
1042 | 4.14M | if have_next_code { |
1043 | | // If we did not have any burst code, we still hold that slice in the buffer. |
1044 | 185k | if let Some(new_last) = target[..burst_size - 1].last_mut() { |
1045 | 40.5k | let slice = core::mem::replace(new_last, &mut []); |
1046 | 40.5k | last_decoded = Some(&*slice); |
1047 | 144k | } |
1048 | 3.95M | } |
1049 | | |
1050 | | let cha; |
1051 | 4.14M | let is_in_buffer = usize::from(required_len) > out.len(); |
1052 | | // Check if we will need to store our current state into the buffer. |
1053 | 4.14M | if is_in_buffer { |
1054 | 91.6k | if have_next_code { |
1055 | | // last_decoded will be Some if we have restored any code into the out slice. |
1056 | | // Otherwise it will still be present in the buffer. |
1057 | 49.1k | if let Some(last) = last_decoded.take() { |
1058 | 15.7k | self.buffer.bytes[..last.len()].copy_from_slice(last); |
1059 | 15.7k | self.buffer.write_mark = last.len(); |
1060 | 15.7k | self.buffer.read_mark = last.len(); |
1061 | 33.4k | } |
1062 | | |
1063 | 49.1k | cha = self.buffer.fill_cscsc(); |
1064 | 42.5k | } else { |
1065 | 42.5k | // Restore the decoded word into the buffer. |
1066 | 42.5k | last_decoded = None; |
1067 | 42.5k | cha = self.buffer.fill_reconstruct(&self.table, new_code); |
1068 | 42.5k | } |
1069 | | } else { |
1070 | 4.05M | let (target, tail) = out.split_at_mut(usize::from(required_len)); |
1071 | 4.05M | out = tail; |
1072 | | |
1073 | 4.05M | if have_next_code { |
1074 | | // Reconstruct high. |
1075 | 136k | let source = match last_decoded.take() { |
1076 | 117k | Some(last) => last, |
1077 | 18.5k | None => &self.buffer.bytes[..self.buffer.write_mark], |
1078 | | }; |
1079 | | |
1080 | | // We don't *actually* expect the unwrap to happen. Each source is at least 1 |
1081 | | // byte long. But llvm doesn't know this (too much indirect loads and cases). |
1082 | 136k | cha = source.get(0).map(|x| *x).unwrap_or(0); |
1083 | 136k | target[..source.len()].copy_from_slice(source); |
1084 | 136k | target[source.len()..][0] = cha; |
1085 | 3.91M | } else { |
1086 | 3.91M | cha = self.table.reconstruct(new_code, target); |
1087 | 3.91M | } |
1088 | | |
1089 | | // A new decoded word. |
1090 | 4.05M | last_decoded = Some(target); |
1091 | | } |
1092 | | |
1093 | | // Each newly read code creates one new code/link based on the preceding code if we |
1094 | | // have enough space to put it there. |
1095 | 4.14M | if !self.table.is_full() { |
1096 | 1.34M | self.table.derive(&deriv, cha); |
1097 | | |
1098 | 1.34M | if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff) |
1099 | 54.2k | && self.code_buffer.code_size() < MAX_CODESIZE |
1100 | 53.7k | { |
1101 | 53.7k | self.bump_code_size(); |
1102 | 1.28M | } |
1103 | | |
1104 | 1.34M | self.next_code += 1; |
1105 | 2.79M | } |
1106 | | |
1107 | | // store the information on the decoded word. |
1108 | 4.14M | code_link = Some(DerivationBase { |
1109 | 4.14M | code: new_code, |
1110 | 4.14M | first: cha, |
1111 | 4.14M | }); |
1112 | | |
1113 | | // Can't make any more progress with decoding. |
1114 | | // |
1115 | | // We have more data buffered but not enough space to put it? We want fetch a next |
1116 | | // symbol if possible as in the case of it being a new symbol we can refer to the |
1117 | | // buffered output as the source for that symbol's meaning and do a memcpy. |
1118 | | // |
1119 | | // Since this test is after decoding at least one code, we can now check for an |
1120 | | // empty buffer and still guarantee progress when one was passed as a parameter. |
1121 | 4.14M | if is_in_buffer || out.is_empty() { |
1122 | 115k | break; |
1123 | 4.02M | } |
1124 | | } |
1125 | 287k | } |
1126 | | |
1127 | | // We need to store the last word into the buffer in case the first code in the next |
1128 | | // iteration is the next_code. |
1129 | 722k | if let Some(tail) = last_decoded { |
1130 | 196k | self.buffer.bytes[..tail.len()].copy_from_slice(tail); |
1131 | 196k | self.buffer.write_mark = tail.len(); |
1132 | 196k | // Mark the full buffer as having been consumed. |
1133 | 196k | self.buffer.read_mark = tail.len(); |
1134 | 526k | } |
1135 | | |
1136 | | // Ensure we don't indicate that no progress was made if we read some bytes from the input |
1137 | | // (which is progress). |
1138 | 722k | if o_in > inp.len() { |
1139 | 415k | if let Ok(LzwStatus::NoProgress) = status { |
1140 | 258k | status = Ok(LzwStatus::Ok); |
1141 | 258k | } |
1142 | 305k | } |
1143 | | |
1144 | | // Store the code/link state. |
1145 | 722k | self.last = code_link; |
1146 | | |
1147 | 722k | BufferResult { |
1148 | 722k | consumed_in: o_in.wrapping_sub(inp.len()), |
1149 | 722k | consumed_out: o_out.wrapping_sub(out.len()), |
1150 | 722k | status, |
1151 | 722k | } |
1152 | 722k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance Line | Count | Source | 708 | 558k | fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult { | 709 | | // Skip everything if there is nothing to do. | 710 | 558k | if self.has_ended { | 711 | 7 | return BufferResult { | 712 | 7 | consumed_in: 0, | 713 | 7 | consumed_out: 0, | 714 | 7 | status: Ok(LzwStatus::Done), | 715 | 7 | }; | 716 | 558k | } | 717 | | | 718 | | // Rough description: | 719 | | // We will fill the output slice as much as possible until either there is no more symbols | 720 | | // to decode or an end code has been reached. This requires an internal buffer to hold a | 721 | | // potential tail of the word corresponding to the last symbol. This tail will then be | 722 | | // decoded first before continuing with the regular decoding. The same buffer is required | 723 | | // to persist some symbol state across calls. | 724 | | // | 725 | | // We store the words corresponding to code symbols in an index chain, bytewise, where we | 726 | | // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain | 727 | | // is traversed for each symbol when it is decoded and bytes are placed directly into the | 728 | | // output slice. In the special case (new_code == next_code) we use an existing decoded | 729 | | // version that is present in either the out bytes of this call or in buffer to copy the | 730 | | // repeated prefix slice. | 731 | | // TODO: I played with a 'decoding cache' to remember the position of long symbols and | 732 | | // avoid traversing the chain, doing a copy of memory instead. It did however not lead to | 733 | | // a serious improvement. It's just unlikely to both have a long symbol and have that | 734 | | // repeated twice in the same output buffer. | 735 | | // | 736 | | // You will also find the (to my knowledge novel) concept of a _decoding burst_ which | 737 | | // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order | 738 | | // execution as much as possible and for this reason have the least possible stress on | 739 | | // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but | 740 | | // only for re-used codes, not novel ones. This lookahead also makes the loop termination | 741 | | // when restoring each byte of the code word perfectly predictable! So a burst is a chunk | 742 | | // of code words which are all independent of each other, have known lengths _and_ are | 743 | | // guaranteed to fit into the out slice without requiring a buffer. One burst can be | 744 | | // decoded in an extremely tight loop. | 745 | | // | 746 | | // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid | 747 | | // that intermediate buffer at the expense of not always filling the output buffer | 748 | | // completely. Alternatively we might follow its chain of precursor states twice. This may | 749 | | // be even cheaper if we store more than one byte per link so it really should be | 750 | | // evaluated. | 751 | | // TODO: if the caller was required to provide the previous last word we could also avoid | 752 | | // the buffer for cases where we need it to restore the next code! This could be built | 753 | | // backwards compatible by only doing it after an opt-in call that enables the behaviour. | 754 | | | 755 | | // Record initial lengths for the result that is returned. | 756 | 558k | let o_in = inp.len(); | 757 | 558k | let o_out = out.len(); | 758 | | | 759 | | // The code_link is the previously decoded symbol. | 760 | | // It's used to link the new code back to its predecessor. | 761 | 558k | let mut code_link = None; | 762 | | // The status, which is written to on an invalid code. | 763 | 558k | let mut status = Ok(LzwStatus::Ok); | 764 | | | 765 | 558k | match self.last.take() { | 766 | | // No last state? This is the first code after a reset? | 767 | | None => { | 768 | 53.6k | match self.next_symbol(&mut inp) { | 769 | | // Plainly invalid code. | 770 | 53.2k | Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode), | 771 | | // next_code would require an actual predecessor. | 772 | 53.2k | Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode), | 773 | | // No more symbols available and nothing decoded yet. | 774 | | // Assume that we didn't make progress, this may get reset to Done if we read | 775 | | // some bytes from the input. | 776 | 334 | None => status = Ok(LzwStatus::NoProgress), | 777 | | // Handle a valid code. | 778 | 53.2k | Some(init_code) => { | 779 | 53.2k | if init_code == self.clear_code { | 780 | 10.3k | self.init_tables(); | 781 | 42.9k | } else if init_code == self.end_code { | 782 | 29 | self.has_ended = true; | 783 | 29 | status = Ok(LzwStatus::Done); | 784 | 42.8k | } else if self.table.is_empty() { | 785 | 1.61k | if self.implicit_reset { | 786 | 1.61k | self.init_tables(); | 787 | 1.61k | | 788 | 1.61k | self.buffer.fill_reconstruct(&self.table, init_code); | 789 | 1.61k | let link = self.table.at(init_code).clone(); | 790 | 1.61k | code_link = Some(DerivationBase { | 791 | 1.61k | code: init_code, | 792 | 1.61k | first: link.first, | 793 | 1.61k | }); | 794 | 1.61k | } else { | 795 | 0 | // We require an explicit reset. | 796 | 0 | status = Err(LzwError::InvalidCode); | 797 | 0 | } | 798 | 41.2k | } else { | 799 | 41.2k | // Reconstruct the first code in the buffer. | 800 | 41.2k | self.buffer.fill_reconstruct(&self.table, init_code); | 801 | 41.2k | let link = self.table.at(init_code).clone(); | 802 | 41.2k | code_link = Some(DerivationBase { | 803 | 41.2k | code: init_code, | 804 | 41.2k | first: link.first, | 805 | 41.2k | }); | 806 | 41.2k | } | 807 | | } | 808 | | } | 809 | | } | 810 | | // Move the tracking state to the stack. | 811 | 505k | Some(tup) => code_link = Some(tup), | 812 | | }; | 813 | | | 814 | | // Track an empty `burst` (see below) means we made no progress. | 815 | 558k | let mut have_yet_to_decode_data = false; | 816 | | | 817 | | // Restore the previous state, if any. | 818 | 558k | if code_link.is_some() { | 819 | 548k | let remain = self.buffer.buffer(); | 820 | | // Check if we can fully finish the buffer. | 821 | 548k | if remain.len() > out.len() { | 822 | 224k | if out.is_empty() { | 823 | 0 | // This also implies the buffer is _not_ empty and we will not enter any | 824 | 0 | // decoding loop. | 825 | 0 | status = Ok(LzwStatus::NoProgress); | 826 | 224k | } else { | 827 | 224k | out.copy_from_slice(&remain[..out.len()]); | 828 | 224k | self.buffer.consume(out.len()); | 829 | 224k | out = &mut []; | 830 | 224k | } | 831 | 323k | } else if remain.is_empty() { | 832 | 241k | status = Ok(LzwStatus::NoProgress); | 833 | 241k | have_yet_to_decode_data = true; | 834 | 241k | } else { | 835 | 82.1k | let consumed = remain.len(); | 836 | 82.1k | out[..consumed].copy_from_slice(remain); | 837 | 82.1k | self.buffer.consume(consumed); | 838 | 82.1k | out = &mut out[consumed..]; | 839 | 82.1k | have_yet_to_decode_data = false; | 840 | 82.1k | } | 841 | 10.7k | } | 842 | | | 843 | | // A special reference to out slice which holds the last decoded symbol. | 844 | 558k | let mut last_decoded: Option<&[u8]> = None; | 845 | | | 846 | 558k | if self.buffer.buffer().is_empty() { | 847 | | // Hot loop that writes data to the output as long as we can do so directly from the | 848 | | // input stream. As an invariant of this block we did not need to use the buffer to | 849 | | // store a decoded code word. Testing the condition ahead of time avoids a test in the | 850 | | // loop body since every code path where the buffer is filled already breaks. | 851 | | // | 852 | | // In a previous iteration of the code we trusted compiler optimization to work this | 853 | | // out but it seems that it does not. Another edit hidden behind some performance work | 854 | | // then edited out the check, inadvertently changing the behavior for callers that | 855 | | // relied on being able to provide an empty output buffer and still receiving a useful | 856 | | // signal about the state of the stream. | 857 | | | 858 | | // A burst is a sequence of code words that are independently decoded, i.e. they do not | 859 | | // change the state of the decoder in ways that would influence the interpretation of | 860 | | // each other. That is: they are not special symbols, they do not make us increase the | 861 | | // code size, they are each codes already in the tree before the burst. | 862 | | // | 863 | | // The tracking state for a burst. These are actually initialized later but compiler | 864 | | // wasn't smart enough to fully optimize out the init code so that appears outside the | 865 | | // loop. | 866 | 333k | let mut burst = [0; BURST]; | 867 | 333k | let mut burst_byte_len = [0u16; BURST]; | 868 | 333k | let mut burst_byte = [0u8; BURST]; | 869 | 333k | let mut target: [&mut [u8]; BURST] = Default::default(); | 870 | | | 871 | | loop { | 872 | | // In particular, we *also* break if the output buffer is still empty. Especially | 873 | | // when the output parameter was an empty slice, we must try to fetch at least one | 874 | | // code but with YIELD_ON_FULL we do not. | 875 | 1.37M | if CgC::YIELD_ON_FULL && out.is_empty() { | 876 | 0 | break; | 877 | 1.37M | } | 878 | | | 879 | 1.37M | let mut deriv = match code_link.take() { | 880 | 1.35M | Some(link) => link, | 881 | | None => { | 882 | | // TODO: we do not need to break here. This does not indicate that the buffer | 883 | | // has been filled, rather it indicates we have reset the state. The next code | 884 | | // should be part of the initial alphabet. However the first code is special in | 885 | | // the sense of not creating a new code itself. This is handled correctly in | 886 | | // the initialization prior to the loop; and in particular that handling as | 887 | | // written currently relies on putting it into the buffer; so handling it we | 888 | | // would need to ensure that either the buffer is fully cleared after its use, | 889 | | // or use another implementation of handling that first code. | 890 | 10.7k | break; | 891 | | } | 892 | | }; | 893 | | | 894 | | // Ensure the code buffer is full, we're about to request some codes. | 895 | | // Note that this also ensures at least one code is in the buffer if any input is left. | 896 | 1.35M | self.refill_bits(&mut inp); | 897 | 1.35M | let cnt = self.code_buffer.peek_bits(&mut burst); | 898 | | | 899 | | // No code left in the buffer, and no more bytes to refill the buffer. | 900 | 1.35M | if cnt == 0 { | 901 | 236k | if have_yet_to_decode_data { | 902 | 63.8k | status = Ok(LzwStatus::NoProgress); | 903 | 172k | } | 904 | | | 905 | 236k | code_link = Some(deriv); | 906 | 236k | break; | 907 | 1.12M | } | 908 | | | 909 | 1.12M | debug_assert!( | 910 | | // When the table is full, we have a max code above the size switch. | 911 | 0 | self.table.inner.len() >= MAX_ENTRIES - usize::from(self.is_tiff) | 912 | | // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the | 913 | | // computed next_code is 4 which already exceeds the bit width from the start. | 914 | | // Then we will immediately switch code size after this code. | 915 | | // | 916 | | // TODO: this is the reason for some saturating and non-sharp comparisons in | 917 | | // the code below. Maybe it makes sense to revisit turning this into a compile | 918 | | // time choice? | 919 | 0 | || (self.code_buffer.code_size() == 1 && self.next_code < 4) | 920 | 0 | || (self.code_buffer.code_size() == 2 && self.next_code == 4) | 921 | 0 | || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code, | 922 | 0 | "Table: {}, code_size: {}, next_code: {}, table_condition: {}", | 923 | 0 | self.table.is_full(), | 924 | 0 | self.code_buffer.code_size(), | 925 | | self.next_code, | 926 | 0 | self.code_buffer.max_code() - Code::from(self.is_tiff), | 927 | | ); | 928 | | | 929 | 1.12M | let mut burst_size = 0; | 930 | 1.12M | let left_before_size_switch = (self.code_buffer.max_code() | 931 | 1.12M | - Code::from(self.is_tiff)) | 932 | 1.12M | .saturating_sub(self.next_code); | 933 | | | 934 | | // A burst is a sequence of decodes that are completely independent of each other. This | 935 | | // is the case if neither is an end code, a clear code, or a next code, i.e. we have | 936 | | // all of them in the decoding table and thus known their depths, and additionally if | 937 | | // we can decode them directly into the output buffer. | 938 | 2.93M | for b in &burst[..cnt] { | 939 | | // We can commit the previous burst code, and will take a slice from the output | 940 | | // buffer. This also avoids the bounds check in the tight loop later. | 941 | 2.93M | if burst_size > 0 { | 942 | 1.81M | let len = burst_byte_len[burst_size - 1]; | 943 | 1.81M | let (into, tail) = out.split_at_mut(usize::from(len)); | 944 | 1.81M | target[burst_size - 1] = into; | 945 | 1.81M | out = tail; | 946 | 1.81M | } | 947 | | | 948 | | // Check that we don't overflow the code size with all codes we burst decode. | 949 | 2.93M | burst_size += 1; | 950 | | | 951 | 2.93M | if burst_size > usize::from(left_before_size_switch) { | 952 | 417k | break; | 953 | 2.51M | } | 954 | | | 955 | 2.51M | let read_code = *b; | 956 | | | 957 | | // A burst code can't be special. | 958 | 2.51M | if read_code == self.clear_code | 959 | 2.48M | || read_code == self.end_code | 960 | 2.48M | || read_code >= self.next_code | 961 | | { | 962 | 127k | break; | 963 | 2.38M | } | 964 | | | 965 | | // Read the code length and check that we can decode directly into the out slice. | 966 | 2.38M | let len = self.table.depths[usize::from(read_code)]; | 967 | | | 968 | 2.38M | if out.len() < usize::from(len) { | 969 | 25.3k | break; | 970 | 2.36M | } | 971 | | | 972 | | // We do exactly one more code (the one being inspected in the current iteration) | 973 | | // after the 'burst'. When we want to break decoding precisely on the supplied | 974 | | // buffer, we check if this is the last code to be decoded into it. | 975 | 2.36M | if CgC::YIELD_ON_FULL { | 976 | 0 | if out.len() == usize::from(len) { | 977 | 0 | break; | 978 | 0 | } | 979 | 2.36M | } | 980 | | | 981 | 2.36M | burst_byte_len[burst_size - 1] = len; | 982 | | } | 983 | | | 984 | 1.12M | self.code_buffer.consume_bits(burst_size as u8); | 985 | 1.12M | have_yet_to_decode_data = false; | 986 | | | 987 | | // Note that the very last code in the burst buffer doesn't actually belong to the | 988 | | // burst itself. TODO: sometimes it could, we just don't differentiate between the | 989 | | // breaks and a loop end condition above. That may be a speed advantage? | 990 | 1.12M | let (&new_code, burst) = burst[..burst_size].split_last().unwrap(); | 991 | | | 992 | | // The very tight loop for restoring the actual burst. These can be reconstructed in | 993 | | // parallel since none of them depend on a prior constructed. Only the derivation of | 994 | | // new codes is not parallel. There are no size changes here either. | 995 | 1.12M | let burst_targets = &mut target[..burst_size - 1]; | 996 | | | 997 | 1.12M | if !self.table.is_full() { | 998 | 759k | self.next_code += burst_targets.len() as u16; | 999 | 759k | } | 1000 | | | 1001 | 1.81M | for ((&burst, target), byte) in | 1002 | 1.12M | burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte) | 1003 | 1.81M | { | 1004 | 1.81M | *byte = self.table.reconstruct(burst, target); | 1005 | 1.81M | } | 1006 | | | 1007 | 1.12M | self.table.derive_burst(&mut deriv, burst, &burst_byte[..]); | 1008 | | | 1009 | | // Now handle the special codes. | 1010 | 1.12M | if new_code == self.clear_code { | 1011 | 41.0k | self.reset_tables(); | 1012 | 41.0k | last_decoded = None; | 1013 | | // Restarts in the next call to the entry point. | 1014 | 41.0k | break; | 1015 | 1.08M | } | 1016 | | | 1017 | 1.08M | if new_code == self.end_code { | 1018 | 78 | self.has_ended = true; | 1019 | 78 | status = Ok(LzwStatus::Done); | 1020 | 78 | last_decoded = None; | 1021 | 78 | break; | 1022 | 1.08M | } | 1023 | | | 1024 | 1.08M | if new_code > self.next_code { | 1025 | 351 | status = Err(LzwError::InvalidCode); | 1026 | 351 | last_decoded = None; | 1027 | 351 | break; | 1028 | 1.08M | } | 1029 | | | 1030 | 1.08M | let required_len = if new_code == self.next_code { | 1031 | 78.3k | self.table.depths[usize::from(deriv.code)] + 1 | 1032 | | } else { | 1033 | 1.00M | self.table.depths[usize::from(new_code)] | 1034 | | }; | 1035 | | | 1036 | | // We need the decoded data of the new code if it is the `next_code`. This is the | 1037 | | // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In | 1038 | | // all other cases we only need the first character of the decoded data. | 1039 | 1.08M | let have_next_code = new_code == self.next_code; | 1040 | | | 1041 | | // Update the slice holding the last decoded word. | 1042 | 1.08M | if have_next_code { | 1043 | | // If we did not have any burst code, we still hold that slice in the buffer. | 1044 | 78.3k | if let Some(new_last) = target[..burst_size - 1].last_mut() { | 1045 | 20.0k | let slice = core::mem::replace(new_last, &mut []); | 1046 | 20.0k | last_decoded = Some(&*slice); | 1047 | 58.3k | } | 1048 | 1.00M | } | 1049 | | | 1050 | | let cha; | 1051 | 1.08M | let is_in_buffer = usize::from(required_len) > out.len(); | 1052 | | // Check if we will need to store our current state into the buffer. | 1053 | 1.08M | if is_in_buffer { | 1054 | 39.5k | if have_next_code { | 1055 | | // last_decoded will be Some if we have restored any code into the out slice. | 1056 | | // Otherwise it will still be present in the buffer. | 1057 | 9.55k | if let Some(last) = last_decoded.take() { | 1058 | 4.72k | self.buffer.bytes[..last.len()].copy_from_slice(last); | 1059 | 4.72k | self.buffer.write_mark = last.len(); | 1060 | 4.72k | self.buffer.read_mark = last.len(); | 1061 | 4.82k | } | 1062 | | | 1063 | 9.55k | cha = self.buffer.fill_cscsc(); | 1064 | 29.9k | } else { | 1065 | 29.9k | // Restore the decoded word into the buffer. | 1066 | 29.9k | last_decoded = None; | 1067 | 29.9k | cha = self.buffer.fill_reconstruct(&self.table, new_code); | 1068 | 29.9k | } | 1069 | | } else { | 1070 | 1.04M | let (target, tail) = out.split_at_mut(usize::from(required_len)); | 1071 | 1.04M | out = tail; | 1072 | | | 1073 | 1.04M | if have_next_code { | 1074 | | // Reconstruct high. | 1075 | 68.8k | let source = match last_decoded.take() { | 1076 | 62.4k | Some(last) => last, | 1077 | 6.35k | None => &self.buffer.bytes[..self.buffer.write_mark], | 1078 | | }; | 1079 | | | 1080 | | // We don't *actually* expect the unwrap to happen. Each source is at least 1 | 1081 | | // byte long. But llvm doesn't know this (too much indirect loads and cases). | 1082 | 68.8k | cha = source.get(0).map(|x| *x).unwrap_or(0); | 1083 | 68.8k | target[..source.len()].copy_from_slice(source); | 1084 | 68.8k | target[source.len()..][0] = cha; | 1085 | 973k | } else { | 1086 | 973k | cha = self.table.reconstruct(new_code, target); | 1087 | 973k | } | 1088 | | | 1089 | | // A new decoded word. | 1090 | 1.04M | last_decoded = Some(target); | 1091 | | } | 1092 | | | 1093 | | // Each newly read code creates one new code/link based on the preceding code if we | 1094 | | // have enough space to put it there. | 1095 | 1.08M | if !self.table.is_full() { | 1096 | 717k | self.table.derive(&deriv, cha); | 1097 | | | 1098 | 717k | if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff) | 1099 | 49.9k | && self.code_buffer.code_size() < MAX_CODESIZE | 1100 | 49.7k | { | 1101 | 49.7k | self.bump_code_size(); | 1102 | 668k | } | 1103 | | | 1104 | 717k | self.next_code += 1; | 1105 | 363k | } | 1106 | | | 1107 | | // store the information on the decoded word. | 1108 | 1.08M | code_link = Some(DerivationBase { | 1109 | 1.08M | code: new_code, | 1110 | 1.08M | first: cha, | 1111 | 1.08M | }); | 1112 | | | 1113 | | // Can't make any more progress with decoding. | 1114 | | // | 1115 | | // We have more data buffered but not enough space to put it? We want fetch a next | 1116 | | // symbol if possible as in the case of it being a new symbol we can refer to the | 1117 | | // buffered output as the source for that symbol's meaning and do a memcpy. | 1118 | | // | 1119 | | // Since this test is after decoding at least one code, we can now check for an | 1120 | | // empty buffer and still guarantee progress when one was passed as a parameter. | 1121 | 1.08M | if is_in_buffer || out.is_empty() { | 1122 | 45.5k | break; | 1123 | 1.03M | } | 1124 | | } | 1125 | 224k | } | 1126 | | | 1127 | | // We need to store the last word into the buffer in case the first code in the next | 1128 | | // iteration is the next_code. | 1129 | 558k | if let Some(tail) = last_decoded { | 1130 | 178k | self.buffer.bytes[..tail.len()].copy_from_slice(tail); | 1131 | 178k | self.buffer.write_mark = tail.len(); | 1132 | 178k | // Mark the full buffer as having been consumed. | 1133 | 178k | self.buffer.read_mark = tail.len(); | 1134 | 380k | } | 1135 | | | 1136 | | // Ensure we don't indicate that no progress was made if we read some bytes from the input | 1137 | | // (which is progress). | 1138 | 558k | if o_in > inp.len() { | 1139 | 318k | if let Ok(LzwStatus::NoProgress) = status { | 1140 | 240k | status = Ok(LzwStatus::Ok); | 1141 | 240k | } | 1142 | 239k | } | 1143 | | | 1144 | | // Store the code/link state. | 1145 | 558k | self.last = code_link; | 1146 | | | 1147 | 558k | BufferResult { | 1148 | 558k | consumed_in: o_in.wrapping_sub(inp.len()), | 1149 | 558k | consumed_out: o_out.wrapping_sub(out.len()), | 1150 | 558k | status, | 1151 | 558k | } | 1152 | 558k | } |
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance Line | Count | Source | 708 | 164k | fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult { | 709 | | // Skip everything if there is nothing to do. | 710 | 164k | if self.has_ended { | 711 | 72 | return BufferResult { | 712 | 72 | consumed_in: 0, | 713 | 72 | consumed_out: 0, | 714 | 72 | status: Ok(LzwStatus::Done), | 715 | 72 | }; | 716 | 163k | } | 717 | | | 718 | | // Rough description: | 719 | | // We will fill the output slice as much as possible until either there is no more symbols | 720 | | // to decode or an end code has been reached. This requires an internal buffer to hold a | 721 | | // potential tail of the word corresponding to the last symbol. This tail will then be | 722 | | // decoded first before continuing with the regular decoding. The same buffer is required | 723 | | // to persist some symbol state across calls. | 724 | | // | 725 | | // We store the words corresponding to code symbols in an index chain, bytewise, where we | 726 | | // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain | 727 | | // is traversed for each symbol when it is decoded and bytes are placed directly into the | 728 | | // output slice. In the special case (new_code == next_code) we use an existing decoded | 729 | | // version that is present in either the out bytes of this call or in buffer to copy the | 730 | | // repeated prefix slice. | 731 | | // TODO: I played with a 'decoding cache' to remember the position of long symbols and | 732 | | // avoid traversing the chain, doing a copy of memory instead. It did however not lead to | 733 | | // a serious improvement. It's just unlikely to both have a long symbol and have that | 734 | | // repeated twice in the same output buffer. | 735 | | // | 736 | | // You will also find the (to my knowledge novel) concept of a _decoding burst_ which | 737 | | // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order | 738 | | // execution as much as possible and for this reason have the least possible stress on | 739 | | // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but | 740 | | // only for re-used codes, not novel ones. This lookahead also makes the loop termination | 741 | | // when restoring each byte of the code word perfectly predictable! So a burst is a chunk | 742 | | // of code words which are all independent of each other, have known lengths _and_ are | 743 | | // guaranteed to fit into the out slice without requiring a buffer. One burst can be | 744 | | // decoded in an extremely tight loop. | 745 | | // | 746 | | // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid | 747 | | // that intermediate buffer at the expense of not always filling the output buffer | 748 | | // completely. Alternatively we might follow its chain of precursor states twice. This may | 749 | | // be even cheaper if we store more than one byte per link so it really should be | 750 | | // evaluated. | 751 | | // TODO: if the caller was required to provide the previous last word we could also avoid | 752 | | // the buffer for cases where we need it to restore the next code! This could be built | 753 | | // backwards compatible by only doing it after an opt-in call that enables the behaviour. | 754 | | | 755 | | // Record initial lengths for the result that is returned. | 756 | 163k | let o_in = inp.len(); | 757 | 163k | let o_out = out.len(); | 758 | | | 759 | | // The code_link is the previously decoded symbol. | 760 | | // It's used to link the new code back to its predecessor. | 761 | 163k | let mut code_link = None; | 762 | | // The status, which is written to on an invalid code. | 763 | 163k | let mut status = Ok(LzwStatus::Ok); | 764 | | | 765 | 163k | match self.last.take() { | 766 | | // No last state? This is the first code after a reset? | 767 | | None => { | 768 | 31.3k | match self.next_symbol(&mut inp) { | 769 | | // Plainly invalid code. | 770 | 31.3k | Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode), | 771 | | // next_code would require an actual predecessor. | 772 | 31.2k | Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode), | 773 | | // No more symbols available and nothing decoded yet. | 774 | | // Assume that we didn't make progress, this may get reset to Done if we read | 775 | | // some bytes from the input. | 776 | 73 | None => status = Ok(LzwStatus::NoProgress), | 777 | | // Handle a valid code. | 778 | 31.2k | Some(init_code) => { | 779 | 31.2k | if init_code == self.clear_code { | 780 | 1.93k | self.init_tables(); | 781 | 29.3k | } else if init_code == self.end_code { | 782 | 5 | self.has_ended = true; | 783 | 5 | status = Ok(LzwStatus::Done); | 784 | 29.3k | } else if self.table.is_empty() { | 785 | 805 | if self.implicit_reset { | 786 | 805 | self.init_tables(); | 787 | 805 | | 788 | 805 | self.buffer.fill_reconstruct(&self.table, init_code); | 789 | 805 | let link = self.table.at(init_code).clone(); | 790 | 805 | code_link = Some(DerivationBase { | 791 | 805 | code: init_code, | 792 | 805 | first: link.first, | 793 | 805 | }); | 794 | 805 | } else { | 795 | 0 | // We require an explicit reset. | 796 | 0 | status = Err(LzwError::InvalidCode); | 797 | 0 | } | 798 | 28.5k | } else { | 799 | 28.5k | // Reconstruct the first code in the buffer. | 800 | 28.5k | self.buffer.fill_reconstruct(&self.table, init_code); | 801 | 28.5k | let link = self.table.at(init_code).clone(); | 802 | 28.5k | code_link = Some(DerivationBase { | 803 | 28.5k | code: init_code, | 804 | 28.5k | first: link.first, | 805 | 28.5k | }); | 806 | 28.5k | } | 807 | | } | 808 | | } | 809 | | } | 810 | | // Move the tracking state to the stack. | 811 | 132k | Some(tup) => code_link = Some(tup), | 812 | | }; | 813 | | | 814 | | // Track an empty `burst` (see below) means we made no progress. | 815 | 163k | let mut have_yet_to_decode_data = false; | 816 | | | 817 | | // Restore the previous state, if any. | 818 | 163k | if code_link.is_some() { | 819 | 161k | let remain = self.buffer.buffer(); | 820 | | // Check if we can fully finish the buffer. | 821 | 161k | if remain.len() > out.len() { | 822 | 62.2k | if out.is_empty() { | 823 | 0 | // This also implies the buffer is _not_ empty and we will not enter any | 824 | 0 | // decoding loop. | 825 | 0 | status = Ok(LzwStatus::NoProgress); | 826 | 62.2k | } else { | 827 | 62.2k | out.copy_from_slice(&remain[..out.len()]); | 828 | 62.2k | self.buffer.consume(out.len()); | 829 | 62.2k | out = &mut []; | 830 | 62.2k | } | 831 | 99.6k | } else if remain.is_empty() { | 832 | 18.3k | status = Ok(LzwStatus::NoProgress); | 833 | 18.3k | have_yet_to_decode_data = true; | 834 | 81.3k | } else { | 835 | 81.3k | let consumed = remain.len(); | 836 | 81.3k | out[..consumed].copy_from_slice(remain); | 837 | 81.3k | self.buffer.consume(consumed); | 838 | 81.3k | out = &mut out[consumed..]; | 839 | 81.3k | have_yet_to_decode_data = false; | 840 | 81.3k | } | 841 | 2.04k | } | 842 | | | 843 | | // A special reference to out slice which holds the last decoded symbol. | 844 | 163k | let mut last_decoded: Option<&[u8]> = None; | 845 | | | 846 | 163k | if self.buffer.buffer().is_empty() { | 847 | | // Hot loop that writes data to the output as long as we can do so directly from the | 848 | | // input stream. As an invariant of this block we did not need to use the buffer to | 849 | | // store a decoded code word. Testing the condition ahead of time avoids a test in the | 850 | | // loop body since every code path where the buffer is filled already breaks. | 851 | | // | 852 | | // In a previous iteration of the code we trusted compiler optimization to work this | 853 | | // out but it seems that it does not. Another edit hidden behind some performance work | 854 | | // then edited out the check, inadvertently changing the behavior for callers that | 855 | | // relied on being able to provide an empty output buffer and still receiving a useful | 856 | | // signal about the state of the stream. | 857 | | | 858 | | // A burst is a sequence of code words that are independently decoded, i.e. they do not | 859 | | // change the state of the decoder in ways that would influence the interpretation of | 860 | | // each other. That is: they are not special symbols, they do not make us increase the | 861 | | // code size, they are each codes already in the tree before the burst. | 862 | | // | 863 | | // The tracking state for a burst. These are actually initialized later but compiler | 864 | | // wasn't smart enough to fully optimize out the init code so that appears outside the | 865 | | // loop. | 866 | 101k | let mut burst = [0; BURST]; | 867 | 101k | let mut burst_byte_len = [0u16; BURST]; | 868 | 101k | let mut burst_byte = [0u8; BURST]; | 869 | 101k | let mut target: [&mut [u8]; BURST] = Default::default(); | 870 | | | 871 | | loop { | 872 | | // In particular, we *also* break if the output buffer is still empty. Especially | 873 | | // when the output parameter was an empty slice, we must try to fetch at least one | 874 | | // code but with YIELD_ON_FULL we do not. | 875 | 3.09M | if CgC::YIELD_ON_FULL && out.is_empty() { | 876 | 1.05k | break; | 877 | 3.09M | } | 878 | | | 879 | 3.09M | let mut deriv = match code_link.take() { | 880 | 3.08M | Some(link) => link, | 881 | | None => { | 882 | | // TODO: we do not need to break here. This does not indicate that the buffer | 883 | | // has been filled, rather it indicates we have reset the state. The next code | 884 | | // should be part of the initial alphabet. However the first code is special in | 885 | | // the sense of not creating a new code itself. This is handled correctly in | 886 | | // the initialization prior to the loop; and in particular that handling as | 887 | | // written currently relies on putting it into the buffer; so handling it we | 888 | | // would need to ensure that either the buffer is fully cleared after its use, | 889 | | // or use another implementation of handling that first code. | 890 | 2.04k | break; | 891 | | } | 892 | | }; | 893 | | | 894 | | // Ensure the code buffer is full, we're about to request some codes. | 895 | | // Note that this also ensures at least one code is in the buffer if any input is left. | 896 | 3.08M | self.refill_bits(&mut inp); | 897 | 3.08M | let cnt = self.code_buffer.peek_bits(&mut burst); | 898 | | | 899 | | // No code left in the buffer, and no more bytes to refill the buffer. | 900 | 3.08M | if cnt == 0 { | 901 | 889 | if have_yet_to_decode_data { | 902 | 365 | status = Ok(LzwStatus::NoProgress); | 903 | 524 | } | 904 | | | 905 | 889 | code_link = Some(deriv); | 906 | 889 | break; | 907 | 3.08M | } | 908 | | | 909 | 3.08M | debug_assert!( | 910 | | // When the table is full, we have a max code above the size switch. | 911 | 0 | self.table.inner.len() >= MAX_ENTRIES - usize::from(self.is_tiff) | 912 | | // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the | 913 | | // computed next_code is 4 which already exceeds the bit width from the start. | 914 | | // Then we will immediately switch code size after this code. | 915 | | // | 916 | | // TODO: this is the reason for some saturating and non-sharp comparisons in | 917 | | // the code below. Maybe it makes sense to revisit turning this into a compile | 918 | | // time choice? | 919 | 0 | || (self.code_buffer.code_size() == 1 && self.next_code < 4) | 920 | 0 | || (self.code_buffer.code_size() == 2 && self.next_code == 4) | 921 | 0 | || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code, | 922 | 0 | "Table: {}, code_size: {}, next_code: {}, table_condition: {}", | 923 | 0 | self.table.is_full(), | 924 | 0 | self.code_buffer.code_size(), | 925 | | self.next_code, | 926 | 0 | self.code_buffer.max_code() - Code::from(self.is_tiff), | 927 | | ); | 928 | | | 929 | 3.08M | let mut burst_size = 0; | 930 | 3.08M | let left_before_size_switch = (self.code_buffer.max_code() | 931 | 3.08M | - Code::from(self.is_tiff)) | 932 | 3.08M | .saturating_sub(self.next_code); | 933 | | | 934 | | // A burst is a sequence of decodes that are completely independent of each other. This | 935 | | // is the case if neither is an end code, a clear code, or a next code, i.e. we have | 936 | | // all of them in the decoding table and thus known their depths, and additionally if | 937 | | // we can decode them directly into the output buffer. | 938 | 5.32M | for b in &burst[..cnt] { | 939 | | // We can commit the previous burst code, and will take a slice from the output | 940 | | // buffer. This also avoids the bounds check in the tight loop later. | 941 | 5.32M | if burst_size > 0 { | 942 | 2.23M | let len = burst_byte_len[burst_size - 1]; | 943 | 2.23M | let (into, tail) = out.split_at_mut(usize::from(len)); | 944 | 2.23M | target[burst_size - 1] = into; | 945 | 2.23M | out = tail; | 946 | 3.08M | } | 947 | | | 948 | | // Check that we don't overflow the code size with all codes we burst decode. | 949 | 5.32M | burst_size += 1; | 950 | | | 951 | 5.32M | if burst_size > usize::from(left_before_size_switch) { | 952 | 2.44M | break; | 953 | 2.88M | } | 954 | | | 955 | 2.88M | let read_code = *b; | 956 | | | 957 | | // A burst code can't be special. | 958 | 2.88M | if read_code == self.clear_code | 959 | 2.86M | || read_code == self.end_code | 960 | 2.86M | || read_code >= self.next_code | 961 | | { | 962 | 151k | break; | 963 | 2.73M | } | 964 | | | 965 | | // Read the code length and check that we can decode directly into the out slice. | 966 | 2.73M | let len = self.table.depths[usize::from(read_code)]; | 967 | | | 968 | 2.73M | if out.len() < usize::from(len) { | 969 | 10.5k | break; | 970 | 2.72M | } | 971 | | | 972 | | // We do exactly one more code (the one being inspected in the current iteration) | 973 | | // after the 'burst'. When we want to break decoding precisely on the supplied | 974 | | // buffer, we check if this is the last code to be decoded into it. | 975 | 2.72M | if CgC::YIELD_ON_FULL { | 976 | 2.72M | if out.len() == usize::from(len) { | 977 | 17.0k | break; | 978 | 2.70M | } | 979 | 0 | } | 980 | | | 981 | 2.70M | burst_byte_len[burst_size - 1] = len; | 982 | | } | 983 | | | 984 | 3.08M | self.code_buffer.consume_bits(burst_size as u8); | 985 | 3.08M | have_yet_to_decode_data = false; | 986 | | | 987 | | // Note that the very last code in the burst buffer doesn't actually belong to the | 988 | | // burst itself. TODO: sometimes it could, we just don't differentiate between the | 989 | | // breaks and a loop end condition above. That may be a speed advantage? | 990 | 3.08M | let (&new_code, burst) = burst[..burst_size].split_last().unwrap(); | 991 | | | 992 | | // The very tight loop for restoring the actual burst. These can be reconstructed in | 993 | | // parallel since none of them depend on a prior constructed. Only the derivation of | 994 | | // new codes is not parallel. There are no size changes here either. | 995 | 3.08M | let burst_targets = &mut target[..burst_size - 1]; | 996 | | | 997 | 3.08M | if !self.table.is_full() { | 998 | 652k | self.next_code += burst_targets.len() as u16; | 999 | 2.43M | } | 1000 | | | 1001 | 2.23M | for ((&burst, target), byte) in | 1002 | 3.08M | burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte) | 1003 | 2.23M | { | 1004 | 2.23M | *byte = self.table.reconstruct(burst, target); | 1005 | 2.23M | } | 1006 | | | 1007 | 3.08M | self.table.derive_burst(&mut deriv, burst, &burst_byte[..]); | 1008 | | | 1009 | | // Now handle the special codes. | 1010 | 3.08M | if new_code == self.clear_code { | 1011 | 27.1k | self.reset_tables(); | 1012 | 27.1k | last_decoded = None; | 1013 | | // Restarts in the next call to the entry point. | 1014 | 27.1k | break; | 1015 | 3.06M | } | 1016 | | | 1017 | 3.06M | if new_code == self.end_code { | 1018 | 66 | self.has_ended = true; | 1019 | 66 | status = Ok(LzwStatus::Done); | 1020 | 66 | last_decoded = None; | 1021 | 66 | break; | 1022 | 3.06M | } | 1023 | | | 1024 | 3.06M | if new_code > self.next_code { | 1025 | 570 | status = Err(LzwError::InvalidCode); | 1026 | 570 | last_decoded = None; | 1027 | 570 | break; | 1028 | 3.06M | } | 1029 | | | 1030 | 3.06M | let required_len = if new_code == self.next_code { | 1031 | 107k | self.table.depths[usize::from(deriv.code)] + 1 | 1032 | | } else { | 1033 | 2.95M | self.table.depths[usize::from(new_code)] | 1034 | | }; | 1035 | | | 1036 | | // We need the decoded data of the new code if it is the `next_code`. This is the | 1037 | | // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In | 1038 | | // all other cases we only need the first character of the decoded data. | 1039 | 3.06M | let have_next_code = new_code == self.next_code; | 1040 | | | 1041 | | // Update the slice holding the last decoded word. | 1042 | 3.06M | if have_next_code { | 1043 | | // If we did not have any burst code, we still hold that slice in the buffer. | 1044 | 107k | if let Some(new_last) = target[..burst_size - 1].last_mut() { | 1045 | 20.5k | let slice = core::mem::replace(new_last, &mut []); | 1046 | 20.5k | last_decoded = Some(&*slice); | 1047 | 86.5k | } | 1048 | 2.95M | } | 1049 | | | 1050 | | let cha; | 1051 | 3.06M | let is_in_buffer = usize::from(required_len) > out.len(); | 1052 | | // Check if we will need to store our current state into the buffer. | 1053 | 3.06M | if is_in_buffer { | 1054 | 52.1k | if have_next_code { | 1055 | | // last_decoded will be Some if we have restored any code into the out slice. | 1056 | | // Otherwise it will still be present in the buffer. | 1057 | 39.6k | if let Some(last) = last_decoded.take() { | 1058 | 10.9k | self.buffer.bytes[..last.len()].copy_from_slice(last); | 1059 | 10.9k | self.buffer.write_mark = last.len(); | 1060 | 10.9k | self.buffer.read_mark = last.len(); | 1061 | 28.6k | } | 1062 | | | 1063 | 39.6k | cha = self.buffer.fill_cscsc(); | 1064 | 12.5k | } else { | 1065 | 12.5k | // Restore the decoded word into the buffer. | 1066 | 12.5k | last_decoded = None; | 1067 | 12.5k | cha = self.buffer.fill_reconstruct(&self.table, new_code); | 1068 | 12.5k | } | 1069 | | } else { | 1070 | 3.00M | let (target, tail) = out.split_at_mut(usize::from(required_len)); | 1071 | 3.00M | out = tail; | 1072 | | | 1073 | 3.00M | if have_next_code { | 1074 | | // Reconstruct high. | 1075 | 67.4k | let source = match last_decoded.take() { | 1076 | 55.2k | Some(last) => last, | 1077 | 12.1k | None => &self.buffer.bytes[..self.buffer.write_mark], | 1078 | | }; | 1079 | | | 1080 | | // We don't *actually* expect the unwrap to happen. Each source is at least 1 | 1081 | | // byte long. But llvm doesn't know this (too much indirect loads and cases). | 1082 | 67.4k | cha = source.get(0).map(|x| *x).unwrap_or(0); | 1083 | 67.4k | target[..source.len()].copy_from_slice(source); | 1084 | 67.4k | target[source.len()..][0] = cha; | 1085 | 2.94M | } else { | 1086 | 2.94M | cha = self.table.reconstruct(new_code, target); | 1087 | 2.94M | } | 1088 | | | 1089 | | // A new decoded word. | 1090 | 3.00M | last_decoded = Some(target); | 1091 | | } | 1092 | | | 1093 | | // Each newly read code creates one new code/link based on the preceding code if we | 1094 | | // have enough space to put it there. | 1095 | 3.06M | if !self.table.is_full() { | 1096 | 625k | self.table.derive(&deriv, cha); | 1097 | | | 1098 | 625k | if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff) | 1099 | 4.28k | && self.code_buffer.code_size() < MAX_CODESIZE | 1100 | 4.09k | { | 1101 | 4.09k | self.bump_code_size(); | 1102 | 621k | } | 1103 | | | 1104 | 625k | self.next_code += 1; | 1105 | 2.43M | } | 1106 | | | 1107 | | // store the information on the decoded word. | 1108 | 3.06M | code_link = Some(DerivationBase { | 1109 | 3.06M | code: new_code, | 1110 | 3.06M | first: cha, | 1111 | 3.06M | }); | 1112 | | | 1113 | | // Can't make any more progress with decoding. | 1114 | | // | 1115 | | // We have more data buffered but not enough space to put it? We want fetch a next | 1116 | | // symbol if possible as in the case of it being a new symbol we can refer to the | 1117 | | // buffered output as the source for that symbol's meaning and do a memcpy. | 1118 | | // | 1119 | | // Since this test is after decoding at least one code, we can now check for an | 1120 | | // empty buffer and still guarantee progress when one was passed as a parameter. | 1121 | 3.06M | if is_in_buffer || out.is_empty() { | 1122 | 70.0k | break; | 1123 | 2.99M | } | 1124 | | } | 1125 | 62.2k | } | 1126 | | | 1127 | | // We need to store the last word into the buffer in case the first code in the next | 1128 | | // iteration is the next_code. | 1129 | 163k | if let Some(tail) = last_decoded { | 1130 | 18.3k | self.buffer.bytes[..tail.len()].copy_from_slice(tail); | 1131 | 18.3k | self.buffer.write_mark = tail.len(); | 1132 | 18.3k | // Mark the full buffer as having been consumed. | 1133 | 18.3k | self.buffer.read_mark = tail.len(); | 1134 | 145k | } | 1135 | | | 1136 | | // Ensure we don't indicate that no progress was made if we read some bytes from the input | 1137 | | // (which is progress). | 1138 | 163k | if o_in > inp.len() { | 1139 | 97.4k | if let Ok(LzwStatus::NoProgress) = status { | 1140 | 17.7k | status = Ok(LzwStatus::Ok); | 1141 | 80.2k | } | 1142 | 65.9k | } | 1143 | | | 1144 | | // Store the code/link state. | 1145 | 163k | self.last = code_link; | 1146 | | | 1147 | 163k | BufferResult { | 1148 | 163k | consumed_in: o_in.wrapping_sub(inp.len()), | 1149 | 163k | consumed_out: o_out.wrapping_sub(out.len()), | 1150 | 163k | status, | 1151 | 163k | } | 1152 | 164k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance |
1153 | | } |
1154 | | |
1155 | | impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> { |
1156 | 85.0k | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> { |
1157 | 85.0k | self.code_buffer.next_symbol(inp) |
1158 | 85.0k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::next_symbol <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::next_symbol Line | Count | Source | 1156 | 53.6k | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> { | 1157 | 53.6k | self.code_buffer.next_symbol(inp) | 1158 | 53.6k | } |
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::next_symbol Line | Count | Source | 1156 | 31.3k | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> { | 1157 | 31.3k | self.code_buffer.next_symbol(inp) | 1158 | 31.3k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::next_symbol |
1159 | | |
1160 | 53.7k | fn bump_code_size(&mut self) { |
1161 | 53.7k | self.code_buffer.bump_code_size() |
1162 | 53.7k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::bump_code_size <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::bump_code_size Line | Count | Source | 1160 | 49.7k | fn bump_code_size(&mut self) { | 1161 | 49.7k | self.code_buffer.bump_code_size() | 1162 | 49.7k | } |
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::bump_code_size Line | Count | Source | 1160 | 4.09k | fn bump_code_size(&mut self) { | 1161 | 4.09k | self.code_buffer.bump_code_size() | 1162 | 4.09k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::bump_code_size |
1163 | | |
1164 | 4.44M | fn refill_bits(&mut self, inp: &mut &[u8]) { |
1165 | 4.44M | self.code_buffer.refill_bits(inp) |
1166 | 4.44M | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::refill_bits <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::refill_bits Line | Count | Source | 1164 | 1.35M | fn refill_bits(&mut self, inp: &mut &[u8]) { | 1165 | 1.35M | self.code_buffer.refill_bits(inp) | 1166 | 1.35M | } |
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::refill_bits Line | Count | Source | 1164 | 3.08M | fn refill_bits(&mut self, inp: &mut &[u8]) { | 1165 | 3.08M | self.code_buffer.refill_bits(inp) | 1166 | 3.08M | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::refill_bits |
1167 | | } |
1168 | | |
1169 | | impl CodeBuffer for MsbBuffer { |
1170 | 2.33k | fn new(min_size: u8) -> Self { |
1171 | 2.33k | MsbBuffer { |
1172 | 2.33k | code_size: min_size + 1, |
1173 | 2.33k | code_mask: (1u16 << (min_size + 1)) - 1, |
1174 | 2.33k | bit_buffer: 0, |
1175 | 2.33k | bits: 0, |
1176 | 2.33k | } |
1177 | 2.33k | } |
1178 | | |
1179 | 29.8k | fn reset(&mut self, min_size: u8) { |
1180 | 29.8k | self.code_size = min_size + 1; |
1181 | 29.8k | self.code_mask = (1 << self.code_size) - 1; |
1182 | 29.8k | } |
1183 | | |
1184 | 31.3k | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> { |
1185 | 31.3k | if self.bits < self.code_size { |
1186 | 4.23k | self.refill_bits(inp); |
1187 | 27.1k | } |
1188 | | |
1189 | 31.3k | if self.bits < self.code_size { |
1190 | 73 | return None; |
1191 | 31.3k | } |
1192 | | |
1193 | 31.3k | let mask = u64::from(self.code_mask); |
1194 | 31.3k | let rotbuf = self.bit_buffer.rotate_left(self.code_size.into()); |
1195 | 31.3k | self.bit_buffer = rotbuf & !mask; |
1196 | 31.3k | self.bits -= self.code_size; |
1197 | 31.3k | Some((rotbuf & mask) as u16) |
1198 | 31.3k | } |
1199 | | |
1200 | 4.09k | fn bump_code_size(&mut self) { |
1201 | 4.09k | self.code_size += 1; |
1202 | 4.09k | self.code_mask = (self.code_mask << 1) | 1; |
1203 | 4.09k | } |
1204 | | |
1205 | 3.09M | fn refill_bits(&mut self, inp: &mut &[u8]) { |
1206 | 3.09M | let wish_count = (64 - self.bits) / 8; |
1207 | 3.09M | let mut buffer = [0u8; 8]; |
1208 | 3.09M | let new_bits = match inp.get(..usize::from(wish_count)) { |
1209 | 3.08M | Some(bytes) => { |
1210 | 3.08M | buffer[..usize::from(wish_count)].copy_from_slice(bytes); |
1211 | 3.08M | *inp = &inp[usize::from(wish_count)..]; |
1212 | 3.08M | wish_count * 8 |
1213 | | } |
1214 | | None => { |
1215 | 4.42k | let new_bits = inp.len() * 8; |
1216 | 4.42k | buffer[..inp.len()].copy_from_slice(inp); |
1217 | 4.42k | *inp = &[]; |
1218 | 4.42k | new_bits as u8 |
1219 | | } |
1220 | | }; |
1221 | 3.09M | self.bit_buffer |= u64::from_be_bytes(buffer) >> self.bits; |
1222 | 3.09M | self.bits += new_bits; |
1223 | 3.09M | } |
1224 | | |
1225 | 3.08M | fn peek_bits(&self, code: &mut [Code; BURST]) -> usize { |
1226 | 3.08M | let mut bit_buffer = self.bit_buffer; |
1227 | 3.08M | let mask = u64::from(self.code_mask); |
1228 | 3.08M | let mut consumed = 0; |
1229 | 3.08M | let mut cnt = 0; |
1230 | | |
1231 | 17.8M | for b in code { |
1232 | 17.4M | let consumed_after = consumed + self.code_size; |
1233 | 17.4M | if consumed_after > self.bits { |
1234 | 2.68M | break; |
1235 | 14.7M | } |
1236 | | |
1237 | 14.7M | cnt += 1; |
1238 | 14.7M | consumed = consumed_after; |
1239 | | |
1240 | 14.7M | let rotbuf = bit_buffer.rotate_left(self.code_size.into()); |
1241 | 14.7M | *b = (rotbuf & mask) as u16; |
1242 | | // The read bits are 'appended' but we never interpret those appended bits. |
1243 | 14.7M | bit_buffer = rotbuf; |
1244 | | } |
1245 | | |
1246 | 3.08M | cnt |
1247 | 3.08M | } |
1248 | | |
1249 | 3.08M | fn consume_bits(&mut self, code_cnt: u8) { |
1250 | 3.08M | let bits = self.code_size * code_cnt; |
1251 | 3.08M | debug_assert!(bits <= self.bits); |
1252 | | |
1253 | 3.08M | if bits >= self.bits { |
1254 | 18.0k | self.bit_buffer = 0; |
1255 | 3.07M | } else { |
1256 | 3.07M | // bits < self.bits so this must be smaller than the number size. |
1257 | 3.07M | self.bit_buffer = self.bit_buffer << bits; |
1258 | 3.07M | } |
1259 | | |
1260 | 3.08M | self.bits = self.bits.wrapping_sub(bits); |
1261 | 3.08M | } |
1262 | | |
1263 | 3.71M | fn max_code(&self) -> Code { |
1264 | 3.71M | self.code_mask |
1265 | 3.71M | } |
1266 | | |
1267 | 4.28k | fn code_size(&self) -> u8 { |
1268 | 4.28k | self.code_size |
1269 | 4.28k | } |
1270 | | } |
1271 | | |
1272 | | impl CodeBuffer for LsbBuffer { |
1273 | 2.42k | fn new(min_size: u8) -> Self { |
1274 | 2.42k | LsbBuffer { |
1275 | 2.42k | code_size: min_size + 1, |
1276 | 2.42k | code_mask: (1u16 << (min_size + 1)) - 1, |
1277 | 2.42k | bit_buffer: 0, |
1278 | 2.42k | bits: 0, |
1279 | 2.42k | } |
1280 | 2.42k | } |
1281 | | |
1282 | 53.0k | fn reset(&mut self, min_size: u8) { |
1283 | 53.0k | self.code_size = min_size + 1; |
1284 | 53.0k | self.code_mask = (1 << self.code_size) - 1; |
1285 | 53.0k | } |
1286 | | |
1287 | 53.6k | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> { |
1288 | 53.6k | if self.bits < self.code_size { |
1289 | 3.21k | self.refill_bits(inp); |
1290 | 50.4k | } |
1291 | | |
1292 | 53.6k | if self.bits < self.code_size { |
1293 | 334 | return None; |
1294 | 53.2k | } |
1295 | | |
1296 | 53.2k | let mask = u64::from(self.code_mask); |
1297 | 53.2k | let code = self.bit_buffer & mask; |
1298 | 53.2k | self.bit_buffer >>= self.code_size; |
1299 | 53.2k | self.bits -= self.code_size; |
1300 | 53.2k | Some(code as u16) |
1301 | 53.6k | } |
1302 | | |
1303 | 49.7k | fn bump_code_size(&mut self) { |
1304 | 49.7k | self.code_size += 1; |
1305 | 49.7k | self.code_mask = (self.code_mask << 1) | 1; |
1306 | 49.7k | } |
1307 | | |
1308 | 1.36M | fn refill_bits(&mut self, inp: &mut &[u8]) { |
1309 | 1.36M | let wish_count = (64 - self.bits) / 8; |
1310 | 1.36M | let mut buffer = [0u8; 8]; |
1311 | 1.36M | let new_bits = match inp.get(..usize::from(wish_count)) { |
1312 | 900k | Some(bytes) => { |
1313 | 900k | buffer[..usize::from(wish_count)].copy_from_slice(bytes); |
1314 | 900k | *inp = &inp[usize::from(wish_count)..]; |
1315 | 900k | wish_count * 8 |
1316 | | } |
1317 | | None => { |
1318 | 461k | let new_bits = inp.len() * 8; |
1319 | 461k | buffer[..inp.len()].copy_from_slice(inp); |
1320 | 461k | *inp = &[]; |
1321 | 461k | new_bits as u8 |
1322 | | } |
1323 | | }; |
1324 | 1.36M | self.bit_buffer |= u64::from_be_bytes(buffer).swap_bytes() << self.bits; |
1325 | 1.36M | self.bits += new_bits; |
1326 | 1.36M | } |
1327 | | |
1328 | 1.35M | fn peek_bits(&self, code: &mut [Code; BURST]) -> usize { |
1329 | 1.35M | let mut bit_buffer = self.bit_buffer; |
1330 | 1.35M | let mask = u64::from(self.code_mask); |
1331 | 1.35M | let mut consumed = 0; |
1332 | 1.35M | let mut cnt = 0; |
1333 | | |
1334 | 6.21M | for b in code { |
1335 | 6.00M | let consumed_after = consumed + self.code_size; |
1336 | 6.00M | if consumed_after > self.bits { |
1337 | 1.15M | break; |
1338 | 4.85M | } |
1339 | | |
1340 | 4.85M | cnt += 1; |
1341 | 4.85M | consumed = consumed_after; |
1342 | | |
1343 | 4.85M | *b = (bit_buffer & mask) as u16; |
1344 | 4.85M | bit_buffer = bit_buffer >> self.code_size; |
1345 | | } |
1346 | | |
1347 | 1.35M | cnt |
1348 | 1.35M | } |
1349 | | |
1350 | 1.12M | fn consume_bits(&mut self, code_cnt: u8) { |
1351 | 1.12M | let bits = self.code_size * code_cnt; |
1352 | 1.12M | debug_assert!(bits <= self.bits); |
1353 | | |
1354 | 1.12M | if bits >= self.bits { |
1355 | 134k | self.bit_buffer = 0; |
1356 | 988k | } else { |
1357 | 988k | // bits < self.bits so this must be smaller than the number size. |
1358 | 988k | self.bit_buffer = self.bit_buffer >> bits; |
1359 | 988k | } |
1360 | | |
1361 | 1.12M | self.bits = self.bits.wrapping_sub(bits); |
1362 | 1.12M | } |
1363 | | |
1364 | 1.84M | fn max_code(&self) -> Code { |
1365 | 1.84M | self.code_mask |
1366 | 1.84M | } |
1367 | | |
1368 | 49.9k | fn code_size(&self) -> u8 { |
1369 | 49.9k | self.code_size |
1370 | 49.9k | } |
1371 | | } |
1372 | | |
1373 | | impl Buffer { |
1374 | 4.75k | fn new() -> Self { |
1375 | 4.75k | Buffer { |
1376 | 4.75k | bytes: vec![0; MAX_ENTRIES].into_boxed_slice(), |
1377 | 4.75k | read_mark: 0, |
1378 | 4.75k | write_mark: 0, |
1379 | 4.75k | } |
1380 | 4.75k | } |
1381 | | |
1382 | | /// When encoding a sequence `cScSc` where `c` is any character and `S` is any string |
1383 | | /// this results in two codes `AB`, `A` encoding `cS` and `B` encoding `cSc`. Supposing |
1384 | | /// the buffer is already filled with the reconstruction of `A`, we can easily fill it |
1385 | | /// with the reconstruction of `B`. |
1386 | 49.1k | fn fill_cscsc(&mut self) -> u8 { |
1387 | 49.1k | self.bytes[self.write_mark] = self.bytes[0]; |
1388 | 49.1k | self.write_mark += 1; |
1389 | 49.1k | self.read_mark = 0; |
1390 | 49.1k | self.bytes[0] |
1391 | 49.1k | } |
1392 | | |
1393 | | // Fill the buffer by decoding from the table |
1394 | 114k | fn fill_reconstruct(&mut self, table: &Table, code: Code) -> u8 { |
1395 | 114k | self.write_mark = 0; |
1396 | 114k | self.read_mark = 0; |
1397 | 114k | let depth = table.depths[usize::from(code)]; |
1398 | 114k | let mut memory = core::mem::replace(&mut self.bytes, Box::default()); |
1399 | | |
1400 | 114k | let out = &mut memory[..usize::from(depth)]; |
1401 | 114k | let last = table.reconstruct(code, out); |
1402 | | |
1403 | 114k | self.bytes = memory; |
1404 | 114k | self.write_mark = usize::from(depth); |
1405 | 114k | last |
1406 | 114k | } |
1407 | | |
1408 | 1.43M | fn buffer(&self) -> &[u8] { |
1409 | 1.43M | &self.bytes[self.read_mark..self.write_mark] |
1410 | 1.43M | } |
1411 | | |
1412 | 450k | fn consume(&mut self, amt: usize) { |
1413 | 450k | self.read_mark += amt; |
1414 | 450k | } |
1415 | | } |
1416 | | |
1417 | | impl Table { |
1418 | 4.75k | fn new() -> Self { |
1419 | 4.75k | Table { |
1420 | 4.75k | inner: Vec::with_capacity(MAX_ENTRIES), |
1421 | 4.75k | depths: Vec::with_capacity(MAX_ENTRIES), |
1422 | 4.75k | } |
1423 | 4.75k | } |
1424 | | |
1425 | 68.1k | fn clear(&mut self, min_size: u8) { |
1426 | 68.1k | let static_count = usize::from(1u16 << u16::from(min_size)) + 2; |
1427 | 68.1k | self.inner.truncate(static_count); |
1428 | 68.1k | self.depths.truncate(static_count); |
1429 | 68.1k | } |
1430 | | |
1431 | 14.7k | fn init(&mut self, min_size: u8) { |
1432 | 14.7k | self.inner.clear(); |
1433 | 14.7k | self.depths.clear(); |
1434 | 1.16M | for i in 0..(1u16 << u16::from(min_size)) { |
1435 | 1.16M | self.inner.push(Link::base(i as u8)); |
1436 | 1.16M | self.depths.push(1); |
1437 | 1.16M | } |
1438 | | // Clear code. |
1439 | 14.7k | self.inner.push(Link::base(0)); |
1440 | 14.7k | self.depths.push(0); |
1441 | | // End code. |
1442 | 14.7k | self.inner.push(Link::base(0)); |
1443 | 14.7k | self.depths.push(0); |
1444 | 14.7k | } |
1445 | | |
1446 | 72.2k | fn at(&self, code: Code) -> &Link { |
1447 | 72.2k | &self.inner[usize::from(code)] |
1448 | 72.2k | } |
1449 | | |
1450 | 72.2k | fn is_empty(&self) -> bool { |
1451 | 72.2k | self.inner.is_empty() |
1452 | 72.2k | } |
1453 | | |
1454 | 8.35M | fn is_full(&self) -> bool { |
1455 | 8.35M | self.inner.len() >= MAX_ENTRIES |
1456 | 8.35M | } |
1457 | | |
1458 | 1.34M | fn derive(&mut self, from: &DerivationBase, byte: u8) { |
1459 | 1.34M | let link = from.derive(byte); |
1460 | 1.34M | let depth = self.depths[usize::from(from.code)] + 1; |
1461 | 1.34M | self.inner.push(link); |
1462 | 1.34M | self.depths.push(depth); |
1463 | 1.34M | } |
1464 | | |
1465 | | // Derive multiple codes in a row, where each base is guaranteed to already exist. |
1466 | 4.21M | fn derive_burst(&mut self, from: &mut DerivationBase, burst: &[Code], first: &[u8]) { |
1467 | 4.21M | let mut depth_of = from.code; |
1468 | | // Note that false data dependency we want to get rid of! |
1469 | | // TODO: this pushes into a Vec, maybe we can make this cleaner. |
1470 | 8.26M | for &code in burst { |
1471 | 4.05M | let depth = self.depths[usize::from(depth_of)] + 1; |
1472 | 4.05M | self.depths.push(depth); |
1473 | 4.05M | depth_of = code; |
1474 | 4.05M | } |
1475 | | |
1476 | | // Llvm tends to be flaky with code layout for the case of requiring an allocation. It's |
1477 | | // not clear if that can occur in practice but it relies on iterator size hint.. |
1478 | 4.21M | let extensions = burst.iter().zip(first); |
1479 | 4.21M | self.inner.extend(extensions.map(|(&code, &first)| { |
1480 | 4.05M | let link = from.derive(first); |
1481 | 4.05M | from.code = code; |
1482 | 4.05M | from.first = first; |
1483 | 4.05M | link |
1484 | 4.05M | })); |
1485 | 4.21M | } |
1486 | | |
1487 | 8.08M | fn reconstruct(&self, code: Code, out: &mut [u8]) -> u8 { |
1488 | 8.08M | let mut code_iter = code; |
1489 | 8.08M | let table = &self.inner[..=usize::from(code)]; |
1490 | 8.08M | let first = table[usize::from(code)].first; |
1491 | | |
1492 | 8.08M | let len = code_iter; |
1493 | 36.9M | for ch in out.iter_mut().rev() { |
1494 | 36.9M | //(code, cha) = self.table[k as usize]; |
1495 | 36.9M | // Note: This could possibly be replaced with an unchecked array access if |
1496 | 36.9M | // - value is asserted to be < self.next_code() in push |
1497 | 36.9M | // - min_size is asserted to be < MAX_CODESIZE |
1498 | 36.9M | let entry = &table[usize::from(code_iter)]; |
1499 | 36.9M | code_iter = core::cmp::min(len, entry.prev); |
1500 | 36.9M | *ch = entry.byte; |
1501 | 36.9M | } |
1502 | | |
1503 | 8.08M | first |
1504 | 8.08M | } |
1505 | | } |
1506 | | |
1507 | | impl Link { |
1508 | 1.19M | fn base(byte: u8) -> Self { |
1509 | 1.19M | Link { |
1510 | 1.19M | prev: 0, |
1511 | 1.19M | byte, |
1512 | 1.19M | first: byte, |
1513 | 1.19M | } |
1514 | 1.19M | } |
1515 | | } |
1516 | | |
1517 | | impl DerivationBase { |
1518 | | // TODO: this has self type to make it clear we might depend on the old in a future |
1519 | | // optimization. However, that has no practical purpose right now. |
1520 | 5.39M | fn derive(&self, byte: u8) -> Link { |
1521 | 5.39M | Link { |
1522 | 5.39M | prev: self.code, |
1523 | 5.39M | byte, |
1524 | 5.39M | first: self.first, |
1525 | 5.39M | } |
1526 | 5.39M | } |
1527 | | } |
1528 | | |
1529 | | #[cfg(test)] |
1530 | | mod tests { |
1531 | | use crate::alloc::vec::Vec; |
1532 | | #[cfg(feature = "std")] |
1533 | | use crate::StreamBuf; |
1534 | | use crate::{decode::Decoder, BitOrder}; |
1535 | | |
1536 | | #[test] |
1537 | | fn invalid_code_size_low() { |
1538 | | let _ = Decoder::new(BitOrder::Msb, 0); |
1539 | | let _ = Decoder::new(BitOrder::Msb, 1); |
1540 | | } |
1541 | | |
1542 | | #[test] |
1543 | | #[should_panic] |
1544 | | fn invalid_code_size_high() { |
1545 | | let _ = Decoder::new(BitOrder::Msb, 14); |
1546 | | } |
1547 | | |
1548 | | fn make_encoded() -> Vec<u8> { |
1549 | | const FILE: &'static [u8] = include_bytes!(concat!( |
1550 | | env!("CARGO_MANIFEST_DIR"), |
1551 | | "/benches/binary-8-msb.lzw" |
1552 | | )); |
1553 | | return Vec::from(FILE); |
1554 | | } |
1555 | | |
1556 | | #[test] |
1557 | | #[cfg(feature = "std")] |
1558 | | fn into_stream_buffer_no_alloc() { |
1559 | | let encoded = make_encoded(); |
1560 | | let mut decoder = Decoder::new(BitOrder::Msb, 8); |
1561 | | |
1562 | | let mut output = vec![]; |
1563 | | let mut buffer = [0; 512]; |
1564 | | let mut istream = decoder.into_stream(&mut output); |
1565 | | istream.set_buffer(&mut buffer[..]); |
1566 | | istream.decode(&encoded[..]).status.unwrap(); |
1567 | | |
1568 | | match istream.buffer { |
1569 | | Some(StreamBuf::Borrowed(_)) => {} |
1570 | | None => panic!("Decoded without buffer??"), |
1571 | | Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"), |
1572 | | } |
1573 | | } |
1574 | | |
1575 | | #[test] |
1576 | | #[cfg(feature = "std")] |
1577 | | fn into_stream_buffer_small_alloc() { |
1578 | | struct WriteTap<W: std::io::Write>(W); |
1579 | | const BUF_SIZE: usize = 512; |
1580 | | |
1581 | | impl<W: std::io::Write> std::io::Write for WriteTap<W> { |
1582 | | fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { |
1583 | | assert!(buf.len() <= BUF_SIZE); |
1584 | | self.0.write(buf) |
1585 | | } |
1586 | | fn flush(&mut self) -> std::io::Result<()> { |
1587 | | self.0.flush() |
1588 | | } |
1589 | | } |
1590 | | |
1591 | | let encoded = make_encoded(); |
1592 | | let mut decoder = Decoder::new(BitOrder::Msb, 8); |
1593 | | |
1594 | | let mut output = vec![]; |
1595 | | let mut istream = decoder.into_stream(WriteTap(&mut output)); |
1596 | | istream.set_buffer_size(512); |
1597 | | istream.decode(&encoded[..]).status.unwrap(); |
1598 | | |
1599 | | match istream.buffer { |
1600 | | Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE), |
1601 | | Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"), |
1602 | | None => panic!("Decoded without buffer??"), |
1603 | | } |
1604 | | } |
1605 | | |
1606 | | #[test] |
1607 | | #[cfg(feature = "std")] |
1608 | | fn reset() { |
1609 | | let encoded = make_encoded(); |
1610 | | let mut decoder = Decoder::new(BitOrder::Msb, 8); |
1611 | | let mut reference = None; |
1612 | | |
1613 | | for _ in 0..2 { |
1614 | | let mut output = vec![]; |
1615 | | let mut buffer = [0; 512]; |
1616 | | let mut istream = decoder.into_stream(&mut output); |
1617 | | istream.set_buffer(&mut buffer[..]); |
1618 | | istream.decode_all(&encoded[..]).status.unwrap(); |
1619 | | |
1620 | | decoder.reset(); |
1621 | | if let Some(reference) = &reference { |
1622 | | assert_eq!(output, *reference); |
1623 | | } else { |
1624 | | reference = Some(output); |
1625 | | } |
1626 | | } |
1627 | | } |
1628 | | } |