/rust/registry/src/index.crates.io-6f17d22bba15001f/weezl-0.1.10/src/decode.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! A module for all decoding needs. |
2 | | #[cfg(feature = "std")] |
3 | | use crate::error::StreamResult; |
4 | | use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult}; |
5 | | use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE}; |
6 | | |
7 | | use crate::alloc::{boxed::Box, vec, vec::Vec}; |
8 | | #[cfg(feature = "std")] |
9 | | use std::io::{self, BufRead, Write}; |
10 | | |
11 | | /// The state for decoding data with an LZW algorithm. |
12 | | /// |
13 | | /// The same structure can be utilized with streams as well as your own buffers and driver logic. |
14 | | /// It may even be possible to mix them if you are sufficiently careful not to lose or skip any |
15 | | /// already decode data in the process. |
16 | | /// |
17 | | /// This is a sans-IO implementation, meaning that it only contains the state of the decoder and |
18 | | /// the caller will provide buffers for input and output data when calling the basic |
19 | | /// [`decode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_*` |
20 | | /// methods for decoding with a particular style of common IO. |
21 | | /// |
22 | | /// * [`decode`] for decoding once without any IO-loop. |
23 | | /// * [`into_async`] for decoding with the `futures` traits for asynchronous IO. |
24 | | /// * [`into_stream`] for decoding with the standard `io` traits. |
25 | | /// * [`into_vec`] for in-memory decoding. |
26 | | /// |
27 | | /// [`decode_bytes`]: #method.decode_bytes |
28 | | /// [`decode`]: #method.decode |
29 | | /// [`into_async`]: #method.into_async |
30 | | /// [`into_stream`]: #method.into_stream |
31 | | /// [`into_vec`]: #method.into_vec |
32 | | pub struct Decoder { |
33 | | state: Box<dyn Stateful + Send + 'static>, |
34 | | } |
35 | | |
36 | | /// A decoding stream sink. |
37 | | /// |
38 | | /// See [`Decoder::into_stream`] on how to create this type. |
39 | | /// |
40 | | /// [`Decoder::into_stream`]: struct.Decoder.html#method.into_stream |
41 | | #[cfg_attr( |
42 | | not(feature = "std"), |
43 | | deprecated = "This type is only useful with the `std` feature." |
44 | | )] |
45 | | #[cfg_attr(not(feature = "std"), allow(dead_code))] |
46 | | pub struct IntoStream<'d, W> { |
47 | | decoder: &'d mut Decoder, |
48 | | writer: W, |
49 | | buffer: Option<StreamBuf<'d>>, |
50 | | default_size: usize, |
51 | | } |
52 | | |
53 | | /// An async decoding sink. |
54 | | /// |
55 | | /// See [`Decoder::into_async`] on how to create this type. |
56 | | /// |
57 | | /// [`Decoder::into_async`]: struct.Decoder.html#method.into_async |
58 | | #[cfg(feature = "async")] |
59 | | pub struct IntoAsync<'d, W> { |
60 | | decoder: &'d mut Decoder, |
61 | | writer: W, |
62 | | buffer: Option<StreamBuf<'d>>, |
63 | | default_size: usize, |
64 | | } |
65 | | |
66 | | /// A decoding sink into a vector. |
67 | | /// |
68 | | /// See [`Decoder::into_vec`] on how to create this type. |
69 | | /// |
70 | | /// [`Decoder::into_vec`]: struct.Decoder.html#method.into_vec |
71 | | pub struct IntoVec<'d> { |
72 | | decoder: &'d mut Decoder, |
73 | | vector: &'d mut Vec<u8>, |
74 | | } |
75 | | |
76 | | trait Stateful { |
77 | | fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult; |
78 | | fn has_ended(&self) -> bool; |
79 | | /// Ignore an end code and continue decoding (no implied reset). |
80 | | fn restart(&mut self); |
81 | | /// Reset the decoder to the beginning, dropping all buffers etc. |
82 | | fn reset(&mut self); |
83 | | } |
84 | | |
85 | | #[derive(Clone)] |
86 | | struct Link { |
87 | | prev: Code, |
88 | | byte: u8, |
89 | | first: u8, |
90 | | } |
91 | | |
92 | | #[derive(Clone)] |
93 | | struct DerivationBase { |
94 | | code: Code, |
95 | | first: u8, |
96 | | } |
97 | | |
98 | | #[derive(Default)] |
99 | | struct MsbBuffer { |
100 | | /// A buffer of individual bits. The oldest code is kept in the high-order bits. |
101 | | bit_buffer: u64, |
102 | | /// A precomputed mask for this code. |
103 | | code_mask: u16, |
104 | | /// The current code size. |
105 | | code_size: u8, |
106 | | /// The number of bits in the buffer. |
107 | | bits: u8, |
108 | | } |
109 | | |
110 | | #[derive(Default)] |
111 | | struct LsbBuffer { |
112 | | /// A buffer of individual bits. The oldest code is kept in the high-order bits. |
113 | | bit_buffer: u64, |
114 | | /// A precomputed mask for this code. |
115 | | code_mask: u16, |
116 | | /// The current code size. |
117 | | code_size: u8, |
118 | | /// The number of bits in the buffer. |
119 | | bits: u8, |
120 | | } |
121 | | |
122 | | trait CodeBuffer { |
123 | | fn new(min_size: u8) -> Self; |
124 | | fn reset(&mut self, min_size: u8); |
125 | | fn bump_code_size(&mut self); |
126 | | |
127 | | /// Retrieve the next symbol, refilling if necessary. |
128 | | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code>; |
129 | | /// Refill the internal buffer. |
130 | | fn refill_bits(&mut self, inp: &mut &[u8]); |
131 | | |
132 | | fn peek_bits(&self, code: &mut [Code; BURST]) -> usize; |
133 | | fn consume_bits(&mut self, code_cnt: u8); |
134 | | |
135 | | fn max_code(&self) -> Code; |
136 | | fn code_size(&self) -> u8; |
137 | | } |
138 | | |
139 | | trait CodegenConstants { |
140 | | const YIELD_ON_FULL: bool; |
141 | | } |
142 | | |
143 | | struct DecodeState<CodeBuffer, Constants: CodegenConstants> { |
144 | | /// The original minimum code size. |
145 | | min_size: u8, |
146 | | /// The table of decoded codes. |
147 | | table: Table, |
148 | | /// The buffer of decoded data. |
149 | | buffer: Buffer, |
150 | | /// The link which we are still decoding and its original code. |
151 | | last: Option<DerivationBase>, |
152 | | /// The next code entry. |
153 | | next_code: Code, |
154 | | /// Code to reset all tables. |
155 | | clear_code: Code, |
156 | | /// Code to signal the end of the stream. |
157 | | end_code: Code, |
158 | | /// A stored flag if the end code has already appeared. |
159 | | has_ended: bool, |
160 | | /// If tiff then bumps are a single code sooner. |
161 | | is_tiff: bool, |
162 | | /// Do we allow stream to start without an explicit reset code? |
163 | | implicit_reset: bool, |
164 | | /// The buffer for decoded words. |
165 | | code_buffer: CodeBuffer, |
166 | | #[allow(dead_code)] |
167 | | constants: core::marker::PhantomData<Constants>, |
168 | | } |
169 | | |
170 | | // We have a buffer of 64 bits. So at max size at most 5 units can be read at once without |
171 | | // refilling the buffer. At smaller code sizes there are more. We tune for 6 here, by slight |
172 | | // experimentation. This may be an architecture dependent constant. |
173 | | const BURST: usize = 6; |
174 | | |
175 | | struct Buffer { |
176 | | bytes: Box<[u8]>, |
177 | | read_mark: usize, |
178 | | write_mark: usize, |
179 | | } |
180 | | |
181 | | struct Table { |
182 | | inner: Vec<Link>, |
183 | | depths: Vec<u16>, |
184 | | } |
185 | | |
186 | | /// Describes the static parameters for creating a decoder. |
187 | | #[derive(Clone, Debug)] |
188 | | pub struct Configuration { |
189 | | order: BitOrder, |
190 | | size: u8, |
191 | | tiff: bool, |
192 | | yield_on_full: bool, |
193 | | } |
194 | | |
195 | | impl Configuration { |
196 | | /// Create a configuration to decode with the specified bit order and symbol size. |
197 | 1.38k | pub fn new(order: BitOrder, size: u8) -> Self { |
198 | 1.38k | super::assert_decode_size(size); |
199 | 1.38k | Configuration { |
200 | 1.38k | order, |
201 | 1.38k | size, |
202 | 1.38k | tiff: false, |
203 | 1.38k | yield_on_full: false, |
204 | 1.38k | } |
205 | 1.38k | } |
206 | | |
207 | | /// Create a configuration for a TIFF compatible decoder. |
208 | 549 | pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self { |
209 | 549 | super::assert_decode_size(size); |
210 | 549 | Configuration { |
211 | 549 | order, |
212 | 549 | size, |
213 | 549 | tiff: true, |
214 | 549 | yield_on_full: false, |
215 | 549 | } |
216 | 549 | } |
217 | | |
218 | | /// Immediately yield to the caller when the decoder buffer is full. |
219 | | /// |
220 | | /// This can be used for `libtiff` compatibility. It will use a "relaxed" stream interpretation |
221 | | /// that need not contain an explicit EOF. Instead, the decoder is expected to stop fetching |
222 | | /// symbols when some out-of-band specified length of the decoded text has been reached. The |
223 | | /// caller indicates this maximum length through the available output buffer space. |
224 | | /// |
225 | | /// Symbols afterwards must not be expected to be valid. On filling the output buffer space |
226 | | /// completely, the decoder will return immediately to the caller instead of potentially |
227 | | /// interpreting the following bit-stream (and returning an error on doing so). |
228 | | /// |
229 | | /// Default: `false`. |
230 | 0 | pub fn with_yield_on_full_buffer(self, do_yield: bool) -> Self { |
231 | 0 | Configuration { |
232 | 0 | yield_on_full: do_yield, |
233 | 0 | ..self |
234 | 0 | } |
235 | 0 | } |
236 | | |
237 | | /// Create a new decoder with the define configuration. |
238 | 1.93k | pub fn build(self) -> Decoder { |
239 | 1.93k | Decoder { |
240 | 1.93k | state: Decoder::from_configuration(&self), |
241 | 1.93k | } |
242 | 1.93k | } |
243 | | } |
244 | | |
245 | | impl Decoder { |
246 | | /// Create a new decoder with the specified bit order and symbol size. |
247 | | /// |
248 | | /// The algorithm for dynamically increasing the code symbol bit width is compatible with the |
249 | | /// original specification. In particular you will need to specify an `Lsb` bit oder to decode |
250 | | /// the data portion of a compressed `gif` image. |
251 | | /// |
252 | | /// # Panics |
253 | | /// |
254 | | /// The `size` needs to be in the interval `0..=12`. |
255 | 1.38k | pub fn new(order: BitOrder, size: u8) -> Self { |
256 | 1.38k | Configuration::new(order, size).build() |
257 | 1.38k | } |
258 | | |
259 | | /// Create a TIFF compatible decoder with the specified bit order and symbol size. |
260 | | /// |
261 | | /// The algorithm for dynamically increasing the code symbol bit width is compatible with the |
262 | | /// TIFF specification, which is a misinterpretation of the original algorithm for increasing |
263 | | /// the code size. It switches one symbol sooner. |
264 | | /// |
265 | | /// # Panics |
266 | | /// |
267 | | /// The `size` needs to be in the interval `0..=12`. |
268 | 549 | pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self { |
269 | 549 | Configuration::with_tiff_size_switch(order, size).build() |
270 | 549 | } |
271 | | |
272 | 1.93k | fn from_configuration(configuration: &Configuration) -> Box<dyn Stateful + Send + 'static> { |
273 | | struct NoYield; |
274 | | struct YieldOnFull; |
275 | | |
276 | | impl CodegenConstants for NoYield { |
277 | | const YIELD_ON_FULL: bool = false; |
278 | | } |
279 | | |
280 | | impl CodegenConstants for YieldOnFull { |
281 | | const YIELD_ON_FULL: bool = true; |
282 | | } |
283 | | |
284 | | type Boxed = Box<dyn Stateful + Send + 'static>; |
285 | 1.93k | match (configuration.order, configuration.yield_on_full) { |
286 | | (BitOrder::Lsb, false) => { |
287 | 1.38k | let mut state = |
288 | 1.38k | Box::new(DecodeState::<LsbBuffer, NoYield>::new(configuration.size)); |
289 | 1.38k | state.is_tiff = configuration.tiff; |
290 | 1.38k | state as Boxed |
291 | | } |
292 | | (BitOrder::Lsb, true) => { |
293 | 0 | let mut state = Box::new(DecodeState::<LsbBuffer, YieldOnFull>::new( |
294 | 0 | configuration.size, |
295 | 0 | )); |
296 | 0 | state.is_tiff = configuration.tiff; |
297 | 0 | state as Boxed |
298 | | } |
299 | | (BitOrder::Msb, false) => { |
300 | 549 | let mut state = |
301 | 549 | Box::new(DecodeState::<MsbBuffer, NoYield>::new(configuration.size)); |
302 | 549 | state.is_tiff = configuration.tiff; |
303 | 549 | state as Boxed |
304 | | } |
305 | | (BitOrder::Msb, true) => { |
306 | 0 | let mut state = Box::new(DecodeState::<MsbBuffer, YieldOnFull>::new( |
307 | 0 | configuration.size, |
308 | 0 | )); |
309 | 0 | state.is_tiff = configuration.tiff; |
310 | 0 | state as Boxed |
311 | | } |
312 | | } |
313 | 1.93k | } |
314 | | |
315 | | /// Decode some bytes from `inp` and write result to `out`. |
316 | | /// |
317 | | /// This will consume a prefix of the input buffer and write decoded output into a prefix of |
318 | | /// the output buffer. See the respective fields of the return value for the count of consumed |
319 | | /// and written bytes. For the next call You should have adjusted the inputs accordingly. |
320 | | /// |
321 | | /// The call will try to decode and write as many bytes of output as available. It will be |
322 | | /// much more optimized (and avoid intermediate buffering) if it is allowed to write a large |
323 | | /// contiguous chunk at once. |
324 | | /// |
325 | | /// See [`into_stream`] for high-level functions (that are only available with the `std` |
326 | | /// feature). |
327 | | /// |
328 | | /// [`into_stream`]: #method.into_stream |
329 | 467k | pub fn decode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult { |
330 | 467k | self.state.advance(inp, out) |
331 | 467k | } |
332 | | |
333 | | /// Decode a single chunk of lzw encoded data. |
334 | | /// |
335 | | /// This method requires the data to contain an end marker, and returns an error otherwise. |
336 | | /// |
337 | | /// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize |
338 | | /// buffer size, to supply an existing vector, to control whether an end marker is required, or |
339 | | /// to preserve partial data in the case of a decoding error. |
340 | | /// |
341 | | /// [`into_vec`]: #into_vec |
342 | | /// |
343 | | /// # Example |
344 | | /// |
345 | | /// ``` |
346 | | /// use weezl::{BitOrder, decode::Decoder}; |
347 | | /// |
348 | | /// // Encoded that was created with an encoder. |
349 | | /// let data = b"\x80\x04\x81\x94l\x1b\x06\xf0\xb0 \x1d\xc6\xf1\xc8l\x19 \x10"; |
350 | | /// let decoded = Decoder::new(BitOrder::Msb, 9) |
351 | | /// .decode(data) |
352 | | /// .unwrap(); |
353 | | /// assert_eq!(decoded, b"Hello, world"); |
354 | | /// ``` |
355 | 0 | pub fn decode(&mut self, data: &[u8]) -> Result<Vec<u8>, LzwError> { |
356 | 0 | let mut output = vec![]; |
357 | 0 | self.into_vec(&mut output).decode_all(data).status?; |
358 | 0 | Ok(output) |
359 | 0 | } |
360 | | |
361 | | /// Construct a decoder into a writer. |
362 | | #[cfg(feature = "std")] |
363 | 0 | pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> { |
364 | 0 | IntoStream { |
365 | 0 | decoder: self, |
366 | 0 | writer, |
367 | 0 | buffer: None, |
368 | 0 | default_size: STREAM_BUF_SIZE, |
369 | 0 | } |
370 | 0 | } |
371 | | |
372 | | /// Construct a decoder into an async writer. |
373 | | #[cfg(feature = "async")] |
374 | | pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> { |
375 | | IntoAsync { |
376 | | decoder: self, |
377 | | writer, |
378 | | buffer: None, |
379 | | default_size: STREAM_BUF_SIZE, |
380 | | } |
381 | | } |
382 | | |
383 | | /// Construct a decoder into a vector. |
384 | | /// |
385 | | /// All decoded data is appended and the vector is __not__ cleared. |
386 | | /// |
387 | | /// Compared to `into_stream` this interface allows a high-level access to decoding without |
388 | | /// requires the `std`-feature. Also, it can make full use of the extra buffer control that the |
389 | | /// special target exposes. |
390 | 0 | pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec<u8>) -> IntoVec<'lt> { |
391 | 0 | IntoVec { |
392 | 0 | decoder: self, |
393 | 0 | vector: vec, |
394 | 0 | } |
395 | 0 | } |
396 | | |
397 | | /// Check if the decoding has finished. |
398 | | /// |
399 | | /// No more output is produced beyond the end code that marked the finish of the stream. The |
400 | | /// decoder may have read additional bytes, including padding bits beyond the last code word |
401 | | /// but also excess bytes provided. |
402 | 400k | pub fn has_ended(&self) -> bool { |
403 | 400k | self.state.has_ended() |
404 | 400k | } |
405 | | |
406 | | /// Ignore an end code and continue. |
407 | | /// |
408 | | /// This will _not_ reset any of the inner code tables and not have the effect of a clear code. |
409 | | /// It will instead continue as if the end code had not been present. If no end code has |
410 | | /// occurred then this is a no-op. |
411 | | /// |
412 | | /// You can test if an end code has occurred with [`has_ended`](#method.has_ended). |
413 | | /// FIXME: clarify how this interacts with padding introduced after end code. |
414 | | #[allow(dead_code)] |
415 | 0 | pub(crate) fn restart(&mut self) { |
416 | 0 | self.state.restart(); |
417 | 0 | } |
418 | | |
419 | | /// Reset all internal state. |
420 | | /// |
421 | | /// This produce a decoder as if just constructed with `new` but taking slightly less work. In |
422 | | /// particular it will not deallocate any internal allocations. It will also avoid some |
423 | | /// duplicate setup work. |
424 | 0 | pub fn reset(&mut self) { |
425 | 0 | self.state.reset(); |
426 | 0 | } |
427 | | } |
428 | | |
429 | | #[cfg(feature = "std")] |
430 | | impl<'d, W: Write> IntoStream<'d, W> { |
431 | | /// Decode data from a reader. |
432 | | /// |
433 | | /// This will read data until the stream is empty or an end marker is reached. |
434 | 0 | pub fn decode(&mut self, read: impl BufRead) -> StreamResult { |
435 | 0 | self.decode_part(read, false) |
436 | 0 | } |
437 | | |
438 | | /// Decode data from a reader, requiring an end marker. |
439 | 0 | pub fn decode_all(mut self, read: impl BufRead) -> StreamResult { |
440 | 0 | self.decode_part(read, true) |
441 | 0 | } |
442 | | |
443 | | /// Set the size of the intermediate decode buffer. |
444 | | /// |
445 | | /// A buffer of this size is allocated to hold one part of the decoded stream when no buffer is |
446 | | /// available and any decoding method is called. No buffer is allocated if `set_buffer` has |
447 | | /// been called. The buffer is reused. |
448 | | /// |
449 | | /// # Panics |
450 | | /// This method panics if `size` is `0`. |
451 | 0 | pub fn set_buffer_size(&mut self, size: usize) { |
452 | 0 | assert_ne!(size, 0, "Attempted to set empty buffer"); |
453 | 0 | self.default_size = size; |
454 | 0 | } |
455 | | |
456 | | /// Use a particular buffer as an intermediate decode buffer. |
457 | | /// |
458 | | /// Calling this sets or replaces the buffer. When a buffer has been set then it is used |
459 | | /// instead of dynamically allocating a buffer. Note that the size of the buffer is critical |
460 | | /// for efficient decoding. Some optimization techniques require the buffer to hold one or more |
461 | | /// previous decoded words. There is also additional overhead from `write` calls each time the |
462 | | /// buffer has been filled. |
463 | | /// |
464 | | /// # Panics |
465 | | /// This method panics if the `buffer` is empty. |
466 | 0 | pub fn set_buffer(&mut self, buffer: &'d mut [u8]) { |
467 | 0 | assert_ne!(buffer.len(), 0, "Attempted to set empty buffer"); |
468 | 0 | self.buffer = Some(StreamBuf::Borrowed(buffer)); |
469 | 0 | } |
470 | | |
471 | 0 | fn decode_part(&mut self, mut read: impl BufRead, must_finish: bool) -> StreamResult { |
472 | 0 | let IntoStream { |
473 | 0 | decoder, |
474 | 0 | writer, |
475 | 0 | buffer, |
476 | 0 | default_size, |
477 | 0 | } = self; |
478 | | |
479 | | enum Progress { |
480 | | Ok, |
481 | | Done, |
482 | | } |
483 | | |
484 | 0 | let mut bytes_read = 0; |
485 | 0 | let mut bytes_written = 0; |
486 | 0 |
|
487 | 0 | // Converting to mutable refs to move into the `once` closure. |
488 | 0 | let read_bytes = &mut bytes_read; |
489 | 0 | let write_bytes = &mut bytes_written; |
490 | | |
491 | 0 | let outbuf: &mut [u8] = |
492 | 0 | match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } { |
493 | 0 | StreamBuf::Borrowed(slice) => &mut *slice, |
494 | 0 | StreamBuf::Owned(vec) => &mut *vec, |
495 | | }; |
496 | 0 | assert!(!outbuf.is_empty()); |
497 | | |
498 | 0 | let once = move || { |
499 | | // Try to grab one buffer of input data. |
500 | 0 | let data = read.fill_buf()?; |
501 | | |
502 | | // Decode as much of the buffer as fits. |
503 | 0 | let result = decoder.decode_bytes(data, &mut outbuf[..]); |
504 | 0 | // Do the bookkeeping and consume the buffer. |
505 | 0 | *read_bytes += result.consumed_in; |
506 | 0 | *write_bytes += result.consumed_out; |
507 | 0 | read.consume(result.consumed_in); |
508 | | |
509 | | // Handle the status in the result. |
510 | 0 | let done = result.status.map_err(|err| { |
511 | 0 | io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err)) |
512 | 0 | })?; |
513 | | |
514 | | // Check if we had any new data at all. |
515 | 0 | if let LzwStatus::NoProgress = done { |
516 | 0 | debug_assert_eq!( |
517 | | result.consumed_out, 0, |
518 | 0 | "No progress means we have not decoded any data" |
519 | | ); |
520 | | // In particular we did not finish decoding. |
521 | 0 | if must_finish { |
522 | 0 | return Err(io::Error::new( |
523 | 0 | io::ErrorKind::UnexpectedEof, |
524 | 0 | "No more data but no end marker detected", |
525 | 0 | )); |
526 | | } else { |
527 | 0 | return Ok(Progress::Done); |
528 | | } |
529 | 0 | } |
530 | 0 |
|
531 | 0 | // And finish by writing our result. |
532 | 0 | // TODO: we may lose data on error (also on status error above) which we might want to |
533 | 0 | // deterministically handle so that we don't need to restart everything from scratch as |
534 | 0 | // the only recovery strategy. Any changes welcome. |
535 | 0 | writer.write_all(&outbuf[..result.consumed_out])?; |
536 | | |
537 | 0 | Ok(if let LzwStatus::Done = done { |
538 | 0 | Progress::Done |
539 | | } else { |
540 | 0 | Progress::Ok |
541 | | }) |
542 | 0 | }; |
543 | | |
544 | | // Decode chunks of input data until we're done. |
545 | 0 | let status = core::iter::repeat_with(once) |
546 | 0 | // scan+fuse can be replaced with map_while |
547 | 0 | .scan((), |(), result| match result { |
548 | 0 | Ok(Progress::Ok) => Some(Ok(())), |
549 | 0 | Err(err) => Some(Err(err)), |
550 | 0 | Ok(Progress::Done) => None, |
551 | 0 | }) |
552 | 0 | .fuse() |
553 | 0 | .collect(); |
554 | 0 |
|
555 | 0 | StreamResult { |
556 | 0 | bytes_read, |
557 | 0 | bytes_written, |
558 | 0 | status, |
559 | 0 | } |
560 | 0 | } |
561 | | } |
562 | | |
563 | | impl IntoVec<'_> { |
564 | | /// Decode data from a slice. |
565 | | /// |
566 | | /// This will read data until the slice is empty or an end marker is reached. |
567 | 0 | pub fn decode(&mut self, read: &[u8]) -> VectorResult { |
568 | 0 | self.decode_part(read, false) |
569 | 0 | } |
570 | | |
571 | | /// Decode data from a slice, requiring an end marker. |
572 | 0 | pub fn decode_all(mut self, read: &[u8]) -> VectorResult { |
573 | 0 | self.decode_part(read, true) |
574 | 0 | } |
575 | | |
576 | 0 | fn grab_buffer(&mut self) -> (&mut [u8], &mut Decoder) { |
577 | | const CHUNK_SIZE: usize = 1 << 12; |
578 | 0 | let decoder = &mut self.decoder; |
579 | 0 | let length = self.vector.len(); |
580 | 0 |
|
581 | 0 | // Use the vector to do overflow checks and w/e. |
582 | 0 | self.vector.reserve(CHUNK_SIZE); |
583 | 0 | // FIXME: decoding into uninit buffer? |
584 | 0 | self.vector.resize(length + CHUNK_SIZE, 0u8); |
585 | 0 |
|
586 | 0 | (&mut self.vector[length..], decoder) |
587 | 0 | } |
588 | | |
589 | 0 | fn decode_part(&mut self, part: &[u8], must_finish: bool) -> VectorResult { |
590 | 0 | let mut result = VectorResult { |
591 | 0 | consumed_in: 0, |
592 | 0 | consumed_out: 0, |
593 | 0 | status: Ok(LzwStatus::Ok), |
594 | 0 | }; |
595 | | |
596 | | enum Progress { |
597 | | Ok, |
598 | | Done, |
599 | | } |
600 | | |
601 | | // Converting to mutable refs to move into the `once` closure. |
602 | 0 | let read_bytes = &mut result.consumed_in; |
603 | 0 | let write_bytes = &mut result.consumed_out; |
604 | 0 | let mut data = part; |
605 | 0 |
|
606 | 0 | // A 64 MB buffer is quite large but should get alloc_zeroed. |
607 | 0 | // Note that the decoded size can be up to quadratic in code block. |
608 | 0 | let once = move || { |
609 | 0 | // Grab a new output buffer. |
610 | 0 | let (outbuf, decoder) = self.grab_buffer(); |
611 | 0 |
|
612 | 0 | // Decode as much of the buffer as fits. |
613 | 0 | let result = decoder.decode_bytes(data, &mut outbuf[..]); |
614 | 0 | // Do the bookkeeping and consume the buffer. |
615 | 0 | *read_bytes += result.consumed_in; |
616 | 0 | *write_bytes += result.consumed_out; |
617 | 0 | data = &data[result.consumed_in..]; |
618 | 0 |
|
619 | 0 | let unfilled = outbuf.len() - result.consumed_out; |
620 | 0 | let filled = self.vector.len() - unfilled; |
621 | 0 | self.vector.truncate(filled); |
622 | | |
623 | | // Handle the status in the result. |
624 | 0 | match result.status { |
625 | 0 | Err(err) => Err(err), |
626 | 0 | Ok(LzwStatus::NoProgress) if must_finish => Err(LzwError::InvalidCode), |
627 | 0 | Ok(LzwStatus::NoProgress) | Ok(LzwStatus::Done) => Ok(Progress::Done), |
628 | 0 | Ok(LzwStatus::Ok) => Ok(Progress::Ok), |
629 | | } |
630 | 0 | }; |
631 | | |
632 | | // Decode chunks of input data until we're done. |
633 | 0 | let status: Result<(), _> = core::iter::repeat_with(once) |
634 | 0 | // scan+fuse can be replaced with map_while |
635 | 0 | .scan((), |(), result| match result { |
636 | 0 | Ok(Progress::Ok) => Some(Ok(())), |
637 | 0 | Err(err) => Some(Err(err)), |
638 | 0 | Ok(Progress::Done) => None, |
639 | 0 | }) |
640 | 0 | .fuse() |
641 | 0 | .collect(); |
642 | | |
643 | 0 | if let Err(err) = status { |
644 | 0 | result.status = Err(err); |
645 | 0 | } |
646 | | |
647 | 0 | result |
648 | 0 | } |
649 | | } |
650 | | |
651 | | // This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would |
652 | | // trip over the usage of await, which is a reserved keyword in that edition/version. It only |
653 | | // contains an impl block. |
654 | | #[cfg(feature = "async")] |
655 | | #[path = "decode_into_async.rs"] |
656 | | mod impl_decode_into_async; |
657 | | |
658 | | impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> { |
659 | 1.93k | fn new(min_size: u8) -> Self { |
660 | 1.93k | DecodeState { |
661 | 1.93k | min_size, |
662 | 1.93k | table: Table::new(), |
663 | 1.93k | buffer: Buffer::new(), |
664 | 1.93k | last: None, |
665 | 1.93k | clear_code: 1 << min_size, |
666 | 1.93k | end_code: (1 << min_size) + 1, |
667 | 1.93k | next_code: (1 << min_size) + 2, |
668 | 1.93k | has_ended: false, |
669 | 1.93k | is_tiff: false, |
670 | 1.93k | implicit_reset: true, |
671 | 1.93k | code_buffer: CodeBuffer::new(min_size), |
672 | 1.93k | constants: core::marker::PhantomData, |
673 | 1.93k | } |
674 | 1.93k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::new <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::new Line | Count | Source | 659 | 1.38k | fn new(min_size: u8) -> Self { | 660 | 1.38k | DecodeState { | 661 | 1.38k | min_size, | 662 | 1.38k | table: Table::new(), | 663 | 1.38k | buffer: Buffer::new(), | 664 | 1.38k | last: None, | 665 | 1.38k | clear_code: 1 << min_size, | 666 | 1.38k | end_code: (1 << min_size) + 1, | 667 | 1.38k | next_code: (1 << min_size) + 2, | 668 | 1.38k | has_ended: false, | 669 | 1.38k | is_tiff: false, | 670 | 1.38k | implicit_reset: true, | 671 | 1.38k | code_buffer: CodeBuffer::new(min_size), | 672 | 1.38k | constants: core::marker::PhantomData, | 673 | 1.38k | } | 674 | 1.38k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::new <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::new Line | Count | Source | 659 | 549 | fn new(min_size: u8) -> Self { | 660 | 549 | DecodeState { | 661 | 549 | min_size, | 662 | 549 | table: Table::new(), | 663 | 549 | buffer: Buffer::new(), | 664 | 549 | last: None, | 665 | 549 | clear_code: 1 << min_size, | 666 | 549 | end_code: (1 << min_size) + 1, | 667 | 549 | next_code: (1 << min_size) + 2, | 668 | 549 | has_ended: false, | 669 | 549 | is_tiff: false, | 670 | 549 | implicit_reset: true, | 671 | 549 | code_buffer: CodeBuffer::new(min_size), | 672 | 549 | constants: core::marker::PhantomData, | 673 | 549 | } | 674 | 549 | } |
|
675 | | |
676 | 7.95k | fn init_tables(&mut self) { |
677 | 7.95k | self.code_buffer.reset(self.min_size); |
678 | 7.95k | self.next_code = (1 << self.min_size) + 2; |
679 | 7.95k | self.table.init(self.min_size); |
680 | 7.95k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::init_tables <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::init_tables Line | Count | Source | 676 | 7.33k | fn init_tables(&mut self) { | 677 | 7.33k | self.code_buffer.reset(self.min_size); | 678 | 7.33k | self.next_code = (1 << self.min_size) + 2; | 679 | 7.33k | self.table.init(self.min_size); | 680 | 7.33k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::init_tables <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::init_tables Line | Count | Source | 676 | 616 | fn init_tables(&mut self) { | 677 | 616 | self.code_buffer.reset(self.min_size); | 678 | 616 | self.next_code = (1 << self.min_size) + 2; | 679 | 616 | self.table.init(self.min_size); | 680 | 616 | } |
|
681 | | |
682 | 25.1k | fn reset_tables(&mut self) { |
683 | 25.1k | self.code_buffer.reset(self.min_size); |
684 | 25.1k | self.next_code = (1 << self.min_size) + 2; |
685 | 25.1k | self.table.clear(self.min_size); |
686 | 25.1k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::reset_tables <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::reset_tables Line | Count | Source | 682 | 15.3k | fn reset_tables(&mut self) { | 683 | 15.3k | self.code_buffer.reset(self.min_size); | 684 | 15.3k | self.next_code = (1 << self.min_size) + 2; | 685 | 15.3k | self.table.clear(self.min_size); | 686 | 15.3k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::reset_tables <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::reset_tables Line | Count | Source | 682 | 9.80k | fn reset_tables(&mut self) { | 683 | 9.80k | self.code_buffer.reset(self.min_size); | 684 | 9.80k | self.next_code = (1 << self.min_size) + 2; | 685 | 9.80k | self.table.clear(self.min_size); | 686 | 9.80k | } |
|
687 | | } |
688 | | |
689 | | impl<C: CodeBuffer, CgC: CodegenConstants> Stateful for DecodeState<C, CgC> { |
690 | 400k | fn has_ended(&self) -> bool { |
691 | 400k | self.has_ended |
692 | 400k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::has_ended <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::has_ended Line | Count | Source | 690 | 400k | fn has_ended(&self) -> bool { | 691 | 400k | self.has_ended | 692 | 400k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::has_ended Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::has_ended |
693 | | |
694 | 0 | fn restart(&mut self) { |
695 | 0 | self.has_ended = false; |
696 | 0 | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::restart Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::restart Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::restart Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::restart |
697 | | |
698 | 0 | fn reset(&mut self) { |
699 | 0 | self.table.init(self.min_size); |
700 | 0 | self.next_code = (1 << self.min_size) + 2; |
701 | 0 | self.buffer.read_mark = 0; |
702 | 0 | self.buffer.write_mark = 0; |
703 | 0 | self.last = None; |
704 | 0 | self.restart(); |
705 | 0 | self.code_buffer = CodeBuffer::new(self.min_size); |
706 | 0 | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::reset Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::reset Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::reset Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::reset |
707 | | |
708 | 467k | fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult { |
709 | 467k | // Skip everything if there is nothing to do. |
710 | 467k | if self.has_ended { |
711 | 28 | return BufferResult { |
712 | 28 | consumed_in: 0, |
713 | 28 | consumed_out: 0, |
714 | 28 | status: Ok(LzwStatus::Done), |
715 | 28 | }; |
716 | 467k | } |
717 | 467k | |
718 | 467k | // Rough description: |
719 | 467k | // We will fill the output slice as much as possible until either there is no more symbols |
720 | 467k | // to decode or an end code has been reached. This requires an internal buffer to hold a |
721 | 467k | // potential tail of the word corresponding to the last symbol. This tail will then be |
722 | 467k | // decoded first before continuing with the regular decoding. The same buffer is required |
723 | 467k | // to persist some symbol state across calls. |
724 | 467k | // |
725 | 467k | // We store the words corresponding to code symbols in an index chain, bytewise, where we |
726 | 467k | // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain |
727 | 467k | // is traversed for each symbol when it is decoded and bytes are placed directly into the |
728 | 467k | // output slice. In the special case (new_code == next_code) we use an existing decoded |
729 | 467k | // version that is present in either the out bytes of this call or in buffer to copy the |
730 | 467k | // repeated prefix slice. |
731 | 467k | // TODO: I played with a 'decoding cache' to remember the position of long symbols and |
732 | 467k | // avoid traversing the chain, doing a copy of memory instead. It did however not lead to |
733 | 467k | // a serious improvement. It's just unlikely to both have a long symbol and have that |
734 | 467k | // repeated twice in the same output buffer. |
735 | 467k | // |
736 | 467k | // You will also find the (to my knowledge novel) concept of a _decoding burst_ which |
737 | 467k | // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order |
738 | 467k | // execution as much as possible and for this reason have the least possible stress on |
739 | 467k | // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but |
740 | 467k | // only for re-used codes, not novel ones. This lookahead also makes the loop termination |
741 | 467k | // when restoring each byte of the code word perfectly predictable! So a burst is a chunk |
742 | 467k | // of code words which are all independent of each other, have known lengths _and_ are |
743 | 467k | // guaranteed to fit into the out slice without requiring a buffer. One burst can be |
744 | 467k | // decoded in an extremely tight loop. |
745 | 467k | // |
746 | 467k | // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid |
747 | 467k | // that intermediate buffer at the expense of not always filling the output buffer |
748 | 467k | // completely. Alternatively we might follow its chain of precursor states twice. This may |
749 | 467k | // be even cheaper if we store more than one byte per link so it really should be |
750 | 467k | // evaluated. |
751 | 467k | // TODO: if the caller was required to provide the previous last word we could also avoid |
752 | 467k | // the buffer for cases where we need it to restore the next code! This could be built |
753 | 467k | // backwards compatible by only doing it after an opt-in call that enables the behaviour. |
754 | 467k | |
755 | 467k | // Record initial lengths for the result that is returned. |
756 | 467k | let o_in = inp.len(); |
757 | 467k | let o_out = out.len(); |
758 | 467k | |
759 | 467k | // The code_link is the previously decoded symbol. |
760 | 467k | // It's used to link the new code back to its predecessor. |
761 | 467k | let mut code_link = None; |
762 | 467k | // The status, which is written to on an invalid code. |
763 | 467k | let mut status = Ok(LzwStatus::Ok); |
764 | 467k | |
765 | 467k | match self.last.take() { |
766 | | // No last state? This is the first code after a reset? |
767 | | None => { |
768 | 34.1k | match self.next_symbol(&mut inp) { |
769 | | // Plainly invalid code. |
770 | 33.7k | Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode), |
771 | | // next_code would require an actual predecessor. |
772 | 33.7k | Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode), |
773 | | // No more symbols available and nothing decoded yet. |
774 | | // Assume that we didn't make progress, this may get reset to Done if we read |
775 | | // some bytes from the input. |
776 | 332 | None => status = Ok(LzwStatus::NoProgress), |
777 | | // Handle a valid code. |
778 | 33.7k | Some(init_code) => { |
779 | 33.7k | if init_code == self.clear_code { |
780 | 6.99k | self.init_tables(); |
781 | 26.7k | } else if init_code == self.end_code { |
782 | 13 | self.has_ended = true; |
783 | 13 | status = Ok(LzwStatus::Done); |
784 | 26.7k | } else if self.table.is_empty() { |
785 | 957 | if self.implicit_reset { |
786 | 957 | self.init_tables(); |
787 | 957 | |
788 | 957 | self.buffer.fill_reconstruct(&self.table, init_code); |
789 | 957 | let link = self.table.at(init_code).clone(); |
790 | 957 | code_link = Some(DerivationBase { |
791 | 957 | code: init_code, |
792 | 957 | first: link.first, |
793 | 957 | }); |
794 | 957 | } else { |
795 | 0 | // We require an explicit reset. |
796 | 0 | status = Err(LzwError::InvalidCode); |
797 | 0 | } |
798 | 25.7k | } else { |
799 | 25.7k | // Reconstruct the first code in the buffer. |
800 | 25.7k | self.buffer.fill_reconstruct(&self.table, init_code); |
801 | 25.7k | let link = self.table.at(init_code).clone(); |
802 | 25.7k | code_link = Some(DerivationBase { |
803 | 25.7k | code: init_code, |
804 | 25.7k | first: link.first, |
805 | 25.7k | }); |
806 | 25.7k | } |
807 | | } |
808 | | } |
809 | | } |
810 | | // Move the tracking state to the stack. |
811 | 433k | Some(tup) => code_link = Some(tup), |
812 | | }; |
813 | | |
814 | | // Track an empty `burst` (see below) means we made no progress. |
815 | 467k | let mut have_yet_to_decode_data = false; |
816 | 467k | |
817 | 467k | // Restore the previous state, if any. |
818 | 467k | if code_link.is_some() { |
819 | 460k | let remain = self.buffer.buffer(); |
820 | 460k | // Check if we can fully finish the buffer. |
821 | 460k | if remain.len() > out.len() { |
822 | 161k | if out.is_empty() { |
823 | 0 | // This also implies the buffer is _not_ empty and we will not enter any |
824 | 0 | // decoding loop. |
825 | 0 | status = Ok(LzwStatus::NoProgress); |
826 | 161k | } else { |
827 | 161k | out.copy_from_slice(&remain[..out.len()]); |
828 | 161k | self.buffer.consume(out.len()); |
829 | 161k | out = &mut []; |
830 | 161k | } |
831 | 298k | } else if remain.is_empty() { |
832 | 220k | status = Ok(LzwStatus::NoProgress); |
833 | 220k | have_yet_to_decode_data = true; |
834 | 220k | } else { |
835 | 77.9k | let consumed = remain.len(); |
836 | 77.9k | out[..consumed].copy_from_slice(remain); |
837 | 77.9k | self.buffer.consume(consumed); |
838 | 77.9k | out = &mut out[consumed..]; |
839 | 77.9k | have_yet_to_decode_data = false; |
840 | 77.9k | } |
841 | 7.36k | } |
842 | | |
843 | | // A special reference to out slice which holds the last decoded symbol. |
844 | 467k | let mut last_decoded: Option<&[u8]> = None; |
845 | 467k | |
846 | 467k | if self.buffer.buffer().is_empty() { |
847 | | // Hot loop that writes data to the output as long as we can do so directly from the |
848 | | // input stream. As an invariant of this block we did not need to use the buffer to |
849 | | // store a decoded code word. Testing the condition ahead of time avoids a test in the |
850 | | // loop body since every code path where the buffer is filled already breaks. |
851 | | // |
852 | | // In a previous iteration of the code we trusted compiler optimization to work this |
853 | | // out but it seems that it does not. Another edit hidden behind some performance work |
854 | | // then edited out the check, inadvertently changing the behavior for callers that |
855 | | // relied on being able to provide an empty output buffer and still receiving a useful |
856 | | // signal about the state of the stream. |
857 | | |
858 | | // A burst is a sequence of code words that are independently decoded, i.e. they do not |
859 | | // change the state of the decoder in ways that would influence the interpretation of |
860 | | // each other. That is: they are not special symbols, they do not make us increase the |
861 | | // code size, they are each codes already in the tree before the burst. |
862 | | // |
863 | | // The tracking state for a burst. These are actually initialized later but compiler |
864 | | // wasn't smart enough to fully optimize out the init code so that appears outside the |
865 | | // loop. |
866 | 306k | let mut burst = [0; BURST]; |
867 | 306k | let mut burst_byte_len = [0u16; BURST]; |
868 | 306k | let mut burst_byte = [0u8; BURST]; |
869 | 306k | let mut target: [&mut [u8]; BURST] = Default::default(); |
870 | | |
871 | | loop { |
872 | | // In particular, we *also* break if the output buffer is still empty. Especially |
873 | | // when the output parameter was an empty slice, we must try to fetch at least one |
874 | | // code but with YIELD_ON_FULL we do not. |
875 | 3.50M | if CgC::YIELD_ON_FULL && out.is_empty() { |
876 | 0 | break; |
877 | 3.50M | } |
878 | | |
879 | 3.50M | let mut deriv = match code_link.take() { |
880 | 3.49M | Some(link) => link, |
881 | | None => { |
882 | | // TODO: we do not need to break here. This does not indicate that the buffer |
883 | | // has been filled, rather it indicates we have reset the state. The next code |
884 | | // should be part of the initial alphabet. However the first code is special in |
885 | | // the sense of not creating a new code itself. This is handled correctly in |
886 | | // the initialization prior to the loop; and in particular that handling as |
887 | | // written currently relies on putting it into the buffer; so handling it we |
888 | | // would need to ensure that either the buffer is fully cleared after its use, |
889 | | // or use another implementation of handling that first code. |
890 | 7.36k | break; |
891 | | } |
892 | | }; |
893 | | |
894 | | // Ensure the code buffer is full, we're about to request some codes. |
895 | | // Note that this also ensures at least one code is in the buffer if any input is left. |
896 | 3.49M | self.refill_bits(&mut inp); |
897 | 3.49M | let cnt = self.code_buffer.peek_bits(&mut burst); |
898 | 3.49M | |
899 | 3.49M | // No code left in the buffer, and no more bytes to refill the buffer. |
900 | 3.49M | if cnt == 0 { |
901 | 218k | if have_yet_to_decode_data { |
902 | 65.0k | status = Ok(LzwStatus::NoProgress); |
903 | 153k | } |
904 | | |
905 | 218k | code_link = Some(deriv); |
906 | 218k | break; |
907 | 3.27M | } |
908 | 3.27M | |
909 | 3.27M | debug_assert!( |
910 | | // When the table is full, we have a max code one above the mask. |
911 | 0 | self.table.is_full() |
912 | | // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the |
913 | | // computed next_code is 4 which already exceeds the bit width from the start. |
914 | | // Then we will immediately switch code size after this code. |
915 | | // |
916 | | // TODO: this is the reason for some saturating and non-sharp comparisons in |
917 | | // the code below. Maybe it makes sense to revisit turning this into a compile |
918 | | // time choice? |
919 | 0 | || (self.code_buffer.code_size() == 2 && self.next_code == 4) |
920 | 0 | || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code, |
921 | | ); |
922 | | |
923 | 3.27M | let mut burst_size = 0; |
924 | 3.27M | let left_before_size_switch = (self.code_buffer.max_code() |
925 | 3.27M | - Code::from(self.is_tiff)) |
926 | 3.27M | .saturating_sub(self.next_code); |
927 | | |
928 | | // A burst is a sequence of decodes that are completely independent of each other. This |
929 | | // is the case if neither is an end code, a clear code, or a next code, i.e. we have |
930 | | // all of them in the decoding table and thus known their depths, and additionally if |
931 | | // we can decode them directly into the output buffer. |
932 | 4.56M | for b in &burst[..cnt] { |
933 | | // We can commit the previous burst code, and will take a slice from the output |
934 | | // buffer. This also avoids the bounds check in the tight loop later. |
935 | 4.56M | if burst_size > 0 { |
936 | 1.28M | let len = burst_byte_len[burst_size - 1]; |
937 | 1.28M | let (into, tail) = out.split_at_mut(usize::from(len)); |
938 | 1.28M | target[burst_size - 1] = into; |
939 | 1.28M | out = tail; |
940 | 3.27M | } |
941 | | |
942 | | // Check that we don't overflow the code size with all codes we burst decode. |
943 | 4.56M | burst_size += 1; |
944 | 4.56M | |
945 | 4.56M | if burst_size > usize::from(left_before_size_switch) { |
946 | 2.73M | break; |
947 | 1.82M | } |
948 | 1.82M | |
949 | 1.82M | let read_code = *b; |
950 | 1.82M | |
951 | 1.82M | // A burst code can't be special. |
952 | 1.82M | if read_code == self.clear_code |
953 | 1.80M | || read_code == self.end_code |
954 | 1.79M | || read_code >= self.next_code |
955 | | { |
956 | 131k | break; |
957 | 1.69M | } |
958 | 1.69M | |
959 | 1.69M | // Read the code length and check that we can decode directly into the out slice. |
960 | 1.69M | let len = self.table.depths[usize::from(read_code)]; |
961 | 1.69M | |
962 | 1.69M | if out.len() < usize::from(len) { |
963 | 21.9k | break; |
964 | 1.67M | } |
965 | 1.67M | |
966 | 1.67M | // We do exactly one more code (the one being inspected in the current iteration) |
967 | 1.67M | // after the 'burst'. When we want to break decoding precisely on the supplied |
968 | 1.67M | // buffer, we check if this is the last code to be decoded into it. |
969 | 1.67M | if CgC::YIELD_ON_FULL { |
970 | 0 | if out.len() == usize::from(len) { |
971 | 0 | break; |
972 | 0 | } |
973 | 1.67M | } |
974 | | |
975 | 1.67M | burst_byte_len[burst_size - 1] = len; |
976 | | } |
977 | | |
978 | 3.27M | self.code_buffer.consume_bits(burst_size as u8); |
979 | 3.27M | have_yet_to_decode_data = false; |
980 | 3.27M | |
981 | 3.27M | // Note that the very last code in the burst buffer doesn't actually belong to the |
982 | 3.27M | // burst itself. TODO: sometimes it could, we just don't differentiate between the |
983 | 3.27M | // breaks and a loop end condition above. That may be a speed advantage? |
984 | 3.27M | let (&new_code, burst) = burst[..burst_size].split_last().unwrap(); |
985 | 3.27M | |
986 | 3.27M | // The very tight loop for restoring the actual burst. These can be reconstructed in |
987 | 3.27M | // parallel since none of them depend on a prior constructed. Only the derivation of |
988 | 3.27M | // new codes is not parallel. There are no size changes here either. |
989 | 3.27M | let burst_targets = &mut target[..burst_size - 1]; |
990 | 3.27M | |
991 | 3.27M | if !self.table.is_full() { |
992 | 562k | self.next_code += burst_targets.len() as u16; |
993 | 2.71M | } |
994 | | |
995 | 1.28M | for ((&burst, target), byte) in |
996 | 3.27M | burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte) |
997 | 1.28M | { |
998 | 1.28M | *byte = self.table.reconstruct(burst, target); |
999 | 1.28M | } |
1000 | | |
1001 | 3.27M | self.table.derive_burst(&mut deriv, burst, &burst_byte[..]); |
1002 | 3.27M | |
1003 | 3.27M | // Now handle the special codes. |
1004 | 3.27M | if new_code == self.clear_code { |
1005 | 25.1k | self.reset_tables(); |
1006 | 25.1k | last_decoded = None; |
1007 | 25.1k | // Restarts in the next call to the entry point. |
1008 | 25.1k | break; |
1009 | 3.24M | } |
1010 | 3.24M | |
1011 | 3.24M | if new_code == self.end_code { |
1012 | 303 | self.has_ended = true; |
1013 | 303 | status = Ok(LzwStatus::Done); |
1014 | 303 | last_decoded = None; |
1015 | 303 | break; |
1016 | 3.24M | } |
1017 | 3.24M | |
1018 | 3.24M | if new_code > self.next_code { |
1019 | 307 | status = Err(LzwError::InvalidCode); |
1020 | 307 | last_decoded = None; |
1021 | 307 | break; |
1022 | 3.24M | } |
1023 | | |
1024 | 3.24M | let required_len = if new_code == self.next_code { |
1025 | 99.4k | self.table.depths[usize::from(deriv.code)] + 1 |
1026 | | } else { |
1027 | 3.14M | self.table.depths[usize::from(new_code)] |
1028 | | }; |
1029 | | |
1030 | | // We need the decoded data of the new code if it is the `next_code`. This is the |
1031 | | // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In |
1032 | | // all other cases we only need the first character of the decoded data. |
1033 | 3.24M | let have_next_code = new_code == self.next_code; |
1034 | 3.24M | |
1035 | 3.24M | // Update the slice holding the last decoded word. |
1036 | 3.24M | if have_next_code { |
1037 | | // If we did not have any burst code, we still hold that slice in the buffer. |
1038 | 99.4k | if let Some(new_last) = target[..burst_size - 1].last_mut() { |
1039 | 11.6k | let slice = core::mem::replace(new_last, &mut []); |
1040 | 11.6k | last_decoded = Some(&*slice); |
1041 | 87.7k | } |
1042 | 3.14M | } |
1043 | | |
1044 | | let cha; |
1045 | 3.24M | let is_in_buffer = usize::from(required_len) > out.len(); |
1046 | 3.24M | // Check if we will need to store our current state into the buffer. |
1047 | 3.24M | if is_in_buffer { |
1048 | 51.4k | if have_next_code { |
1049 | | // last_decoded will be Some if we have restored any code into the out slice. |
1050 | | // Otherwise it will still be present in the buffer. |
1051 | 26.7k | if let Some(last) = last_decoded.take() { |
1052 | 9.28k | self.buffer.bytes[..last.len()].copy_from_slice(last); |
1053 | 9.28k | self.buffer.write_mark = last.len(); |
1054 | 9.28k | self.buffer.read_mark = last.len(); |
1055 | 17.4k | } |
1056 | | |
1057 | 26.7k | cha = self.buffer.fill_cscsc(); |
1058 | 24.6k | } else { |
1059 | 24.6k | // Restore the decoded word into the buffer. |
1060 | 24.6k | last_decoded = None; |
1061 | 24.6k | cha = self.buffer.fill_reconstruct(&self.table, new_code); |
1062 | 24.6k | } |
1063 | | } else { |
1064 | 3.19M | let (target, tail) = out.split_at_mut(usize::from(required_len)); |
1065 | 3.19M | out = tail; |
1066 | 3.19M | |
1067 | 3.19M | if have_next_code { |
1068 | | // Reconstruct high. |
1069 | 72.6k | let source = match last_decoded.take() { |
1070 | 61.6k | Some(last) => last, |
1071 | 10.9k | None => &self.buffer.bytes[..self.buffer.write_mark], |
1072 | | }; |
1073 | | |
1074 | | // We don't *actually* expect the unwrap to happen. Each source is at least 1 |
1075 | | // byte long. But llvm doesn't know this (too much indirect loads and cases). |
1076 | 72.6k | cha = source.get(0).map(|x| *x).unwrap_or(0); Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance::{closure#0} <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance::{closure#0} Line | Count | Source | 1076 | 53.0k | cha = source.get(0).map(|x| *x).unwrap_or(0); |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance::{closure#0} <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance::{closure#0} Line | Count | Source | 1076 | 19.5k | cha = source.get(0).map(|x| *x).unwrap_or(0); |
|
1077 | 72.6k | target[..source.len()].copy_from_slice(source); |
1078 | 72.6k | target[source.len()..][0] = cha; |
1079 | 3.12M | } else { |
1080 | 3.12M | cha = self.table.reconstruct(new_code, target); |
1081 | 3.12M | } |
1082 | | |
1083 | | // A new decoded word. |
1084 | 3.19M | last_decoded = Some(target); |
1085 | | } |
1086 | | |
1087 | | // Each newly read code creates one new code/link based on the preceding code if we |
1088 | | // have enough space to put it there. |
1089 | 3.24M | if !self.table.is_full() { |
1090 | 536k | self.table.derive(&deriv, cha); |
1091 | 536k | |
1092 | 536k | if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff) |
1093 | 23.2k | && self.code_buffer.code_size() < MAX_CODESIZE |
1094 | 22.8k | { |
1095 | 22.8k | self.bump_code_size(); |
1096 | 514k | } |
1097 | | |
1098 | 536k | self.next_code += 1; |
1099 | 2.71M | } |
1100 | | |
1101 | | // store the information on the decoded word. |
1102 | 3.24M | code_link = Some(DerivationBase { |
1103 | 3.24M | code: new_code, |
1104 | 3.24M | first: cha, |
1105 | 3.24M | }); |
1106 | 3.24M | |
1107 | 3.24M | // Can't make any more progress with decoding. |
1108 | 3.24M | // |
1109 | 3.24M | // We have more data buffered but not enough space to put it? We want fetch a next |
1110 | 3.24M | // symbol if possible as in the case of it being a new symbol we can refer to the |
1111 | 3.24M | // buffered output as the source for that symbol's meaning and do a memcpy. |
1112 | 3.24M | // |
1113 | 3.24M | // Since this test is after decoding at least one code, we can now check for an |
1114 | 3.24M | // empty buffer and still guarantee progress when one was passed as a parameter. |
1115 | 3.24M | if is_in_buffer || out.is_empty() { |
1116 | 54.2k | break; |
1117 | 3.19M | } |
1118 | | } |
1119 | 161k | } |
1120 | | |
1121 | | // We need to store the last word into the buffer in case the first code in the next |
1122 | | // iteration is the next_code. |
1123 | 467k | if let Some(tail) = last_decoded { |
1124 | 156k | self.buffer.bytes[..tail.len()].copy_from_slice(tail); |
1125 | 156k | self.buffer.write_mark = tail.len(); |
1126 | 156k | // Mark the full buffer as having been consumed. |
1127 | 156k | self.buffer.read_mark = tail.len(); |
1128 | 311k | } |
1129 | | |
1130 | | // Ensure we don't indicate that no progress was made if we read some bytes from the input |
1131 | | // (which is progress). |
1132 | 467k | if o_in > inp.len() { |
1133 | 295k | if let Ok(LzwStatus::NoProgress) = status { |
1134 | 220k | status = Ok(LzwStatus::Ok); |
1135 | 220k | } |
1136 | 171k | } |
1137 | | |
1138 | | // Store the code/link state. |
1139 | 467k | self.last = code_link; |
1140 | 467k | |
1141 | 467k | BufferResult { |
1142 | 467k | consumed_in: o_in.wrapping_sub(inp.len()), |
1143 | 467k | consumed_out: o_out.wrapping_sub(out.len()), |
1144 | 467k | status, |
1145 | 467k | } |
1146 | 467k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance Line | Count | Source | 708 | 399k | fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult { | 709 | 399k | // Skip everything if there is nothing to do. | 710 | 399k | if self.has_ended { | 711 | 4 | return BufferResult { | 712 | 4 | consumed_in: 0, | 713 | 4 | consumed_out: 0, | 714 | 4 | status: Ok(LzwStatus::Done), | 715 | 4 | }; | 716 | 399k | } | 717 | 399k | | 718 | 399k | // Rough description: | 719 | 399k | // We will fill the output slice as much as possible until either there is no more symbols | 720 | 399k | // to decode or an end code has been reached. This requires an internal buffer to hold a | 721 | 399k | // potential tail of the word corresponding to the last symbol. This tail will then be | 722 | 399k | // decoded first before continuing with the regular decoding. The same buffer is required | 723 | 399k | // to persist some symbol state across calls. | 724 | 399k | // | 725 | 399k | // We store the words corresponding to code symbols in an index chain, bytewise, where we | 726 | 399k | // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain | 727 | 399k | // is traversed for each symbol when it is decoded and bytes are placed directly into the | 728 | 399k | // output slice. In the special case (new_code == next_code) we use an existing decoded | 729 | 399k | // version that is present in either the out bytes of this call or in buffer to copy the | 730 | 399k | // repeated prefix slice. | 731 | 399k | // TODO: I played with a 'decoding cache' to remember the position of long symbols and | 732 | 399k | // avoid traversing the chain, doing a copy of memory instead. It did however not lead to | 733 | 399k | // a serious improvement. It's just unlikely to both have a long symbol and have that | 734 | 399k | // repeated twice in the same output buffer. | 735 | 399k | // | 736 | 399k | // You will also find the (to my knowledge novel) concept of a _decoding burst_ which | 737 | 399k | // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order | 738 | 399k | // execution as much as possible and for this reason have the least possible stress on | 739 | 399k | // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but | 740 | 399k | // only for re-used codes, not novel ones. This lookahead also makes the loop termination | 741 | 399k | // when restoring each byte of the code word perfectly predictable! So a burst is a chunk | 742 | 399k | // of code words which are all independent of each other, have known lengths _and_ are | 743 | 399k | // guaranteed to fit into the out slice without requiring a buffer. One burst can be | 744 | 399k | // decoded in an extremely tight loop. | 745 | 399k | // | 746 | 399k | // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid | 747 | 399k | // that intermediate buffer at the expense of not always filling the output buffer | 748 | 399k | // completely. Alternatively we might follow its chain of precursor states twice. This may | 749 | 399k | // be even cheaper if we store more than one byte per link so it really should be | 750 | 399k | // evaluated. | 751 | 399k | // TODO: if the caller was required to provide the previous last word we could also avoid | 752 | 399k | // the buffer for cases where we need it to restore the next code! This could be built | 753 | 399k | // backwards compatible by only doing it after an opt-in call that enables the behaviour. | 754 | 399k | | 755 | 399k | // Record initial lengths for the result that is returned. | 756 | 399k | let o_in = inp.len(); | 757 | 399k | let o_out = out.len(); | 758 | 399k | | 759 | 399k | // The code_link is the previously decoded symbol. | 760 | 399k | // It's used to link the new code back to its predecessor. | 761 | 399k | let mut code_link = None; | 762 | 399k | // The status, which is written to on an invalid code. | 763 | 399k | let mut status = Ok(LzwStatus::Ok); | 764 | 399k | | 765 | 399k | match self.last.take() { | 766 | | // No last state? This is the first code after a reset? | 767 | | None => { | 768 | 23.2k | match self.next_symbol(&mut inp) { | 769 | | // Plainly invalid code. | 770 | 22.9k | Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode), | 771 | | // next_code would require an actual predecessor. | 772 | 22.9k | Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode), | 773 | | // No more symbols available and nothing decoded yet. | 774 | | // Assume that we didn't make progress, this may get reset to Done if we read | 775 | | // some bytes from the input. | 776 | 326 | None => status = Ok(LzwStatus::NoProgress), | 777 | | // Handle a valid code. | 778 | 22.9k | Some(init_code) => { | 779 | 22.9k | if init_code == self.clear_code { | 780 | 6.52k | self.init_tables(); | 781 | 16.4k | } else if init_code == self.end_code { | 782 | 13 | self.has_ended = true; | 783 | 13 | status = Ok(LzwStatus::Done); | 784 | 16.4k | } else if self.table.is_empty() { | 785 | 815 | if self.implicit_reset { | 786 | 815 | self.init_tables(); | 787 | 815 | | 788 | 815 | self.buffer.fill_reconstruct(&self.table, init_code); | 789 | 815 | let link = self.table.at(init_code).clone(); | 790 | 815 | code_link = Some(DerivationBase { | 791 | 815 | code: init_code, | 792 | 815 | first: link.first, | 793 | 815 | }); | 794 | 815 | } else { | 795 | 0 | // We require an explicit reset. | 796 | 0 | status = Err(LzwError::InvalidCode); | 797 | 0 | } | 798 | 15.5k | } else { | 799 | 15.5k | // Reconstruct the first code in the buffer. | 800 | 15.5k | self.buffer.fill_reconstruct(&self.table, init_code); | 801 | 15.5k | let link = self.table.at(init_code).clone(); | 802 | 15.5k | code_link = Some(DerivationBase { | 803 | 15.5k | code: init_code, | 804 | 15.5k | first: link.first, | 805 | 15.5k | }); | 806 | 15.5k | } | 807 | | } | 808 | | } | 809 | | } | 810 | | // Move the tracking state to the stack. | 811 | 376k | Some(tup) => code_link = Some(tup), | 812 | | }; | 813 | | | 814 | | // Track an empty `burst` (see below) means we made no progress. | 815 | 399k | let mut have_yet_to_decode_data = false; | 816 | 399k | | 817 | 399k | // Restore the previous state, if any. | 818 | 399k | if code_link.is_some() { | 819 | 392k | let remain = self.buffer.buffer(); | 820 | 392k | // Check if we can fully finish the buffer. | 821 | 392k | if remain.len() > out.len() { | 822 | 134k | if out.is_empty() { | 823 | 0 | // This also implies the buffer is _not_ empty and we will not enter any | 824 | 0 | // decoding loop. | 825 | 0 | status = Ok(LzwStatus::NoProgress); | 826 | 134k | } else { | 827 | 134k | out.copy_from_slice(&remain[..out.len()]); | 828 | 134k | self.buffer.consume(out.len()); | 829 | 134k | out = &mut []; | 830 | 134k | } | 831 | 257k | } else if remain.is_empty() { | 832 | 219k | status = Ok(LzwStatus::NoProgress); | 833 | 219k | have_yet_to_decode_data = true; | 834 | 219k | } else { | 835 | 38.3k | let consumed = remain.len(); | 836 | 38.3k | out[..consumed].copy_from_slice(remain); | 837 | 38.3k | self.buffer.consume(consumed); | 838 | 38.3k | out = &mut out[consumed..]; | 839 | 38.3k | have_yet_to_decode_data = false; | 840 | 38.3k | } | 841 | 6.88k | } | 842 | | | 843 | | // A special reference to out slice which holds the last decoded symbol. | 844 | 399k | let mut last_decoded: Option<&[u8]> = None; | 845 | 399k | | 846 | 399k | if self.buffer.buffer().is_empty() { | 847 | | // Hot loop that writes data to the output as long as we can do so directly from the | 848 | | // input stream. As an invariant of this block we did not need to use the buffer to | 849 | | // store a decoded code word. Testing the condition ahead of time avoids a test in the | 850 | | // loop body since every code path where the buffer is filled already breaks. | 851 | | // | 852 | | // In a previous iteration of the code we trusted compiler optimization to work this | 853 | | // out but it seems that it does not. Another edit hidden behind some performance work | 854 | | // then edited out the check, inadvertently changing the behavior for callers that | 855 | | // relied on being able to provide an empty output buffer and still receiving a useful | 856 | | // signal about the state of the stream. | 857 | | | 858 | | // A burst is a sequence of code words that are independently decoded, i.e. they do not | 859 | | // change the state of the decoder in ways that would influence the interpretation of | 860 | | // each other. That is: they are not special symbols, they do not make us increase the | 861 | | // code size, they are each codes already in the tree before the burst. | 862 | | // | 863 | | // The tracking state for a burst. These are actually initialized later but compiler | 864 | | // wasn't smart enough to fully optimize out the init code so that appears outside the | 865 | | // loop. | 866 | 264k | let mut burst = [0; BURST]; | 867 | 264k | let mut burst_byte_len = [0u16; BURST]; | 868 | 264k | let mut burst_byte = [0u8; BURST]; | 869 | 264k | let mut target: [&mut [u8]; BURST] = Default::default(); | 870 | | | 871 | | loop { | 872 | | // In particular, we *also* break if the output buffer is still empty. Especially | 873 | | // when the output parameter was an empty slice, we must try to fetch at least one | 874 | | // code but with YIELD_ON_FULL we do not. | 875 | 791k | if CgC::YIELD_ON_FULL && out.is_empty() { | 876 | 0 | break; | 877 | 791k | } | 878 | | | 879 | 791k | let mut deriv = match code_link.take() { | 880 | 784k | Some(link) => link, | 881 | | None => { | 882 | | // TODO: we do not need to break here. This does not indicate that the buffer | 883 | | // has been filled, rather it indicates we have reset the state. The next code | 884 | | // should be part of the initial alphabet. However the first code is special in | 885 | | // the sense of not creating a new code itself. This is handled correctly in | 886 | | // the initialization prior to the loop; and in particular that handling as | 887 | | // written currently relies on putting it into the buffer; so handling it we | 888 | | // would need to ensure that either the buffer is fully cleared after its use, | 889 | | // or use another implementation of handling that first code. | 890 | 6.88k | break; | 891 | | } | 892 | | }; | 893 | | | 894 | | // Ensure the code buffer is full, we're about to request some codes. | 895 | | // Note that this also ensures at least one code is in the buffer if any input is left. | 896 | 784k | self.refill_bits(&mut inp); | 897 | 784k | let cnt = self.code_buffer.peek_bits(&mut burst); | 898 | 784k | | 899 | 784k | // No code left in the buffer, and no more bytes to refill the buffer. | 900 | 784k | if cnt == 0 { | 901 | 218k | if have_yet_to_decode_data { | 902 | 64.9k | status = Ok(LzwStatus::NoProgress); | 903 | 153k | } | 904 | | | 905 | 218k | code_link = Some(deriv); | 906 | 218k | break; | 907 | 565k | } | 908 | 565k | | 909 | 565k | debug_assert!( | 910 | | // When the table is full, we have a max code one above the mask. | 911 | 0 | self.table.is_full() | 912 | | // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the | 913 | | // computed next_code is 4 which already exceeds the bit width from the start. | 914 | | // Then we will immediately switch code size after this code. | 915 | | // | 916 | | // TODO: this is the reason for some saturating and non-sharp comparisons in | 917 | | // the code below. Maybe it makes sense to revisit turning this into a compile | 918 | | // time choice? | 919 | 0 | || (self.code_buffer.code_size() == 2 && self.next_code == 4) | 920 | 0 | || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code, | 921 | | ); | 922 | | | 923 | 565k | let mut burst_size = 0; | 924 | 565k | let left_before_size_switch = (self.code_buffer.max_code() | 925 | 565k | - Code::from(self.is_tiff)) | 926 | 565k | .saturating_sub(self.next_code); | 927 | | | 928 | | // A burst is a sequence of decodes that are completely independent of each other. This | 929 | | // is the case if neither is an end code, a clear code, or a next code, i.e. we have | 930 | | // all of them in the decoding table and thus known their depths, and additionally if | 931 | | // we can decode them directly into the output buffer. | 932 | 969k | for b in &burst[..cnt] { | 933 | | // We can commit the previous burst code, and will take a slice from the output | 934 | | // buffer. This also avoids the bounds check in the tight loop later. | 935 | 969k | if burst_size > 0 { | 936 | 403k | let len = burst_byte_len[burst_size - 1]; | 937 | 403k | let (into, tail) = out.split_at_mut(usize::from(len)); | 938 | 403k | target[burst_size - 1] = into; | 939 | 403k | out = tail; | 940 | 565k | } | 941 | | | 942 | | // Check that we don't overflow the code size with all codes we burst decode. | 943 | 969k | burst_size += 1; | 944 | 969k | | 945 | 969k | if burst_size > usize::from(left_before_size_switch) { | 946 | 274k | break; | 947 | 694k | } | 948 | 694k | | 949 | 694k | let read_code = *b; | 950 | 694k | | 951 | 694k | // A burst code can't be special. | 952 | 694k | if read_code == self.clear_code | 953 | 680k | || read_code == self.end_code | 954 | 680k | || read_code >= self.next_code | 955 | | { | 956 | 77.8k | break; | 957 | 616k | } | 958 | 616k | | 959 | 616k | // Read the code length and check that we can decode directly into the out slice. | 960 | 616k | let len = self.table.depths[usize::from(read_code)]; | 961 | 616k | | 962 | 616k | if out.len() < usize::from(len) { | 963 | 13.5k | break; | 964 | 603k | } | 965 | 603k | | 966 | 603k | // We do exactly one more code (the one being inspected in the current iteration) | 967 | 603k | // after the 'burst'. When we want to break decoding precisely on the supplied | 968 | 603k | // buffer, we check if this is the last code to be decoded into it. | 969 | 603k | if CgC::YIELD_ON_FULL { | 970 | 0 | if out.len() == usize::from(len) { | 971 | 0 | break; | 972 | 0 | } | 973 | 603k | } | 974 | | | 975 | 603k | burst_byte_len[burst_size - 1] = len; | 976 | | } | 977 | | | 978 | 565k | self.code_buffer.consume_bits(burst_size as u8); | 979 | 565k | have_yet_to_decode_data = false; | 980 | 565k | | 981 | 565k | // Note that the very last code in the burst buffer doesn't actually belong to the | 982 | 565k | // burst itself. TODO: sometimes it could, we just don't differentiate between the | 983 | 565k | // breaks and a loop end condition above. That may be a speed advantage? | 984 | 565k | let (&new_code, burst) = burst[..burst_size].split_last().unwrap(); | 985 | 565k | | 986 | 565k | // The very tight loop for restoring the actual burst. These can be reconstructed in | 987 | 565k | // parallel since none of them depend on a prior constructed. Only the derivation of | 988 | 565k | // new codes is not parallel. There are no size changes here either. | 989 | 565k | let burst_targets = &mut target[..burst_size - 1]; | 990 | 565k | | 991 | 565k | if !self.table.is_full() { | 992 | 312k | self.next_code += burst_targets.len() as u16; | 993 | 312k | } | 994 | | | 995 | 403k | for ((&burst, target), byte) in | 996 | 565k | burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte) | 997 | 403k | { | 998 | 403k | *byte = self.table.reconstruct(burst, target); | 999 | 403k | } | 1000 | | | 1001 | 565k | self.table.derive_burst(&mut deriv, burst, &burst_byte[..]); | 1002 | 565k | | 1003 | 565k | // Now handle the special codes. | 1004 | 565k | if new_code == self.clear_code { | 1005 | 15.3k | self.reset_tables(); | 1006 | 15.3k | last_decoded = None; | 1007 | 15.3k | // Restarts in the next call to the entry point. | 1008 | 15.3k | break; | 1009 | 550k | } | 1010 | 550k | | 1011 | 550k | if new_code == self.end_code { | 1012 | 43 | self.has_ended = true; | 1013 | 43 | status = Ok(LzwStatus::Done); | 1014 | 43 | last_decoded = None; | 1015 | 43 | break; | 1016 | 550k | } | 1017 | 550k | | 1018 | 550k | if new_code > self.next_code { | 1019 | 179 | status = Err(LzwError::InvalidCode); | 1020 | 179 | last_decoded = None; | 1021 | 179 | break; | 1022 | 550k | } | 1023 | | | 1024 | 550k | let required_len = if new_code == self.next_code { | 1025 | 59.8k | self.table.depths[usize::from(deriv.code)] + 1 | 1026 | | } else { | 1027 | 490k | self.table.depths[usize::from(new_code)] | 1028 | | }; | 1029 | | | 1030 | | // We need the decoded data of the new code if it is the `next_code`. This is the | 1031 | | // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In | 1032 | | // all other cases we only need the first character of the decoded data. | 1033 | 550k | let have_next_code = new_code == self.next_code; | 1034 | 550k | | 1035 | 550k | // Update the slice holding the last decoded word. | 1036 | 550k | if have_next_code { | 1037 | | // If we did not have any burst code, we still hold that slice in the buffer. | 1038 | 59.8k | if let Some(new_last) = target[..burst_size - 1].last_mut() { | 1039 | 8.62k | let slice = core::mem::replace(new_last, &mut []); | 1040 | 8.62k | last_decoded = Some(&*slice); | 1041 | 51.2k | } | 1042 | 490k | } | 1043 | | | 1044 | | let cha; | 1045 | 550k | let is_in_buffer = usize::from(required_len) > out.len(); | 1046 | 550k | // Check if we will need to store our current state into the buffer. | 1047 | 550k | if is_in_buffer { | 1048 | 22.1k | if have_next_code { | 1049 | | // last_decoded will be Some if we have restored any code into the out slice. | 1050 | | // Otherwise it will still be present in the buffer. | 1051 | 6.82k | if let Some(last) = last_decoded.take() { | 1052 | 3.57k | self.buffer.bytes[..last.len()].copy_from_slice(last); | 1053 | 3.57k | self.buffer.write_mark = last.len(); | 1054 | 3.57k | self.buffer.read_mark = last.len(); | 1055 | 3.57k | } | 1056 | | | 1057 | 6.82k | cha = self.buffer.fill_cscsc(); | 1058 | 15.2k | } else { | 1059 | 15.2k | // Restore the decoded word into the buffer. | 1060 | 15.2k | last_decoded = None; | 1061 | 15.2k | cha = self.buffer.fill_reconstruct(&self.table, new_code); | 1062 | 15.2k | } | 1063 | | } else { | 1064 | 527k | let (target, tail) = out.split_at_mut(usize::from(required_len)); | 1065 | 527k | out = tail; | 1066 | 527k | | 1067 | 527k | if have_next_code { | 1068 | | // Reconstruct high. | 1069 | 53.0k | let source = match last_decoded.take() { | 1070 | 48.7k | Some(last) => last, | 1071 | 4.29k | None => &self.buffer.bytes[..self.buffer.write_mark], | 1072 | | }; | 1073 | | | 1074 | | // We don't *actually* expect the unwrap to happen. Each source is at least 1 | 1075 | | // byte long. But llvm doesn't know this (too much indirect loads and cases). | 1076 | 53.0k | cha = source.get(0).map(|x| *x).unwrap_or(0); | 1077 | 53.0k | target[..source.len()].copy_from_slice(source); | 1078 | 53.0k | target[source.len()..][0] = cha; | 1079 | 474k | } else { | 1080 | 474k | cha = self.table.reconstruct(new_code, target); | 1081 | 474k | } | 1082 | | | 1083 | | // A new decoded word. | 1084 | 527k | last_decoded = Some(target); | 1085 | | } | 1086 | | | 1087 | | // Each newly read code creates one new code/link based on the preceding code if we | 1088 | | // have enough space to put it there. | 1089 | 550k | if !self.table.is_full() { | 1090 | 297k | self.table.derive(&deriv, cha); | 1091 | 297k | | 1092 | 297k | if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff) | 1093 | 21.2k | && self.code_buffer.code_size() < MAX_CODESIZE | 1094 | 21.0k | { | 1095 | 21.0k | self.bump_code_size(); | 1096 | 276k | } | 1097 | | | 1098 | 297k | self.next_code += 1; | 1099 | 252k | } | 1100 | | | 1101 | | // store the information on the decoded word. | 1102 | 550k | code_link = Some(DerivationBase { | 1103 | 550k | code: new_code, | 1104 | 550k | first: cha, | 1105 | 550k | }); | 1106 | 550k | | 1107 | 550k | // Can't make any more progress with decoding. | 1108 | 550k | // | 1109 | 550k | // We have more data buffered but not enough space to put it? We want fetch a next | 1110 | 550k | // symbol if possible as in the case of it being a new symbol we can refer to the | 1111 | 550k | // buffered output as the source for that symbol's meaning and do a memcpy. | 1112 | 550k | // | 1113 | 550k | // Since this test is after decoding at least one code, we can now check for an | 1114 | 550k | // empty buffer and still guarantee progress when one was passed as a parameter. | 1115 | 550k | if is_in_buffer || out.is_empty() { | 1116 | 23.9k | break; | 1117 | 526k | } | 1118 | | } | 1119 | 134k | } | 1120 | | | 1121 | | // We need to store the last word into the buffer in case the first code in the next | 1122 | | // iteration is the next_code. | 1123 | 399k | if let Some(tail) = last_decoded { | 1124 | 155k | self.buffer.bytes[..tail.len()].copy_from_slice(tail); | 1125 | 155k | self.buffer.write_mark = tail.len(); | 1126 | 155k | // Mark the full buffer as having been consumed. | 1127 | 155k | self.buffer.read_mark = tail.len(); | 1128 | 244k | } | 1129 | | | 1130 | | // Ensure we don't indicate that no progress was made if we read some bytes from the input | 1131 | | // (which is progress). | 1132 | 399k | if o_in > inp.len() { | 1133 | 255k | if let Ok(LzwStatus::NoProgress) = status { | 1134 | 219k | status = Ok(LzwStatus::Ok); | 1135 | 219k | } | 1136 | 143k | } | 1137 | | | 1138 | | // Store the code/link state. | 1139 | 399k | self.last = code_link; | 1140 | 399k | | 1141 | 399k | BufferResult { | 1142 | 399k | consumed_in: o_in.wrapping_sub(inp.len()), | 1143 | 399k | consumed_out: o_out.wrapping_sub(out.len()), | 1144 | 399k | status, | 1145 | 399k | } | 1146 | 399k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance Line | Count | Source | 708 | 68.2k | fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult { | 709 | 68.2k | // Skip everything if there is nothing to do. | 710 | 68.2k | if self.has_ended { | 711 | 24 | return BufferResult { | 712 | 24 | consumed_in: 0, | 713 | 24 | consumed_out: 0, | 714 | 24 | status: Ok(LzwStatus::Done), | 715 | 24 | }; | 716 | 68.2k | } | 717 | 68.2k | | 718 | 68.2k | // Rough description: | 719 | 68.2k | // We will fill the output slice as much as possible until either there is no more symbols | 720 | 68.2k | // to decode or an end code has been reached. This requires an internal buffer to hold a | 721 | 68.2k | // potential tail of the word corresponding to the last symbol. This tail will then be | 722 | 68.2k | // decoded first before continuing with the regular decoding. The same buffer is required | 723 | 68.2k | // to persist some symbol state across calls. | 724 | 68.2k | // | 725 | 68.2k | // We store the words corresponding to code symbols in an index chain, bytewise, where we | 726 | 68.2k | // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain | 727 | 68.2k | // is traversed for each symbol when it is decoded and bytes are placed directly into the | 728 | 68.2k | // output slice. In the special case (new_code == next_code) we use an existing decoded | 729 | 68.2k | // version that is present in either the out bytes of this call or in buffer to copy the | 730 | 68.2k | // repeated prefix slice. | 731 | 68.2k | // TODO: I played with a 'decoding cache' to remember the position of long symbols and | 732 | 68.2k | // avoid traversing the chain, doing a copy of memory instead. It did however not lead to | 733 | 68.2k | // a serious improvement. It's just unlikely to both have a long symbol and have that | 734 | 68.2k | // repeated twice in the same output buffer. | 735 | 68.2k | // | 736 | 68.2k | // You will also find the (to my knowledge novel) concept of a _decoding burst_ which | 737 | 68.2k | // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order | 738 | 68.2k | // execution as much as possible and for this reason have the least possible stress on | 739 | 68.2k | // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but | 740 | 68.2k | // only for re-used codes, not novel ones. This lookahead also makes the loop termination | 741 | 68.2k | // when restoring each byte of the code word perfectly predictable! So a burst is a chunk | 742 | 68.2k | // of code words which are all independent of each other, have known lengths _and_ are | 743 | 68.2k | // guaranteed to fit into the out slice without requiring a buffer. One burst can be | 744 | 68.2k | // decoded in an extremely tight loop. | 745 | 68.2k | // | 746 | 68.2k | // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid | 747 | 68.2k | // that intermediate buffer at the expense of not always filling the output buffer | 748 | 68.2k | // completely. Alternatively we might follow its chain of precursor states twice. This may | 749 | 68.2k | // be even cheaper if we store more than one byte per link so it really should be | 750 | 68.2k | // evaluated. | 751 | 68.2k | // TODO: if the caller was required to provide the previous last word we could also avoid | 752 | 68.2k | // the buffer for cases where we need it to restore the next code! This could be built | 753 | 68.2k | // backwards compatible by only doing it after an opt-in call that enables the behaviour. | 754 | 68.2k | | 755 | 68.2k | // Record initial lengths for the result that is returned. | 756 | 68.2k | let o_in = inp.len(); | 757 | 68.2k | let o_out = out.len(); | 758 | 68.2k | | 759 | 68.2k | // The code_link is the previously decoded symbol. | 760 | 68.2k | // It's used to link the new code back to its predecessor. | 761 | 68.2k | let mut code_link = None; | 762 | 68.2k | // The status, which is written to on an invalid code. | 763 | 68.2k | let mut status = Ok(LzwStatus::Ok); | 764 | 68.2k | | 765 | 68.2k | match self.last.take() { | 766 | | // No last state? This is the first code after a reset? | 767 | | None => { | 768 | 10.8k | match self.next_symbol(&mut inp) { | 769 | | // Plainly invalid code. | 770 | 10.8k | Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode), | 771 | | // next_code would require an actual predecessor. | 772 | 10.8k | Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode), | 773 | | // No more symbols available and nothing decoded yet. | 774 | | // Assume that we didn't make progress, this may get reset to Done if we read | 775 | | // some bytes from the input. | 776 | 6 | None => status = Ok(LzwStatus::NoProgress), | 777 | | // Handle a valid code. | 778 | 10.8k | Some(init_code) => { | 779 | 10.8k | if init_code == self.clear_code { | 780 | 474 | self.init_tables(); | 781 | 10.3k | } else if init_code == self.end_code { | 782 | 0 | self.has_ended = true; | 783 | 0 | status = Ok(LzwStatus::Done); | 784 | 10.3k | } else if self.table.is_empty() { | 785 | 142 | if self.implicit_reset { | 786 | 142 | self.init_tables(); | 787 | 142 | | 788 | 142 | self.buffer.fill_reconstruct(&self.table, init_code); | 789 | 142 | let link = self.table.at(init_code).clone(); | 790 | 142 | code_link = Some(DerivationBase { | 791 | 142 | code: init_code, | 792 | 142 | first: link.first, | 793 | 142 | }); | 794 | 142 | } else { | 795 | 0 | // We require an explicit reset. | 796 | 0 | status = Err(LzwError::InvalidCode); | 797 | 0 | } | 798 | 10.2k | } else { | 799 | 10.2k | // Reconstruct the first code in the buffer. | 800 | 10.2k | self.buffer.fill_reconstruct(&self.table, init_code); | 801 | 10.2k | let link = self.table.at(init_code).clone(); | 802 | 10.2k | code_link = Some(DerivationBase { | 803 | 10.2k | code: init_code, | 804 | 10.2k | first: link.first, | 805 | 10.2k | }); | 806 | 10.2k | } | 807 | | } | 808 | | } | 809 | | } | 810 | | // Move the tracking state to the stack. | 811 | 57.3k | Some(tup) => code_link = Some(tup), | 812 | | }; | 813 | | | 814 | | // Track an empty `burst` (see below) means we made no progress. | 815 | 68.2k | let mut have_yet_to_decode_data = false; | 816 | 68.2k | | 817 | 68.2k | // Restore the previous state, if any. | 818 | 68.2k | if code_link.is_some() { | 819 | 67.7k | let remain = self.buffer.buffer(); | 820 | 67.7k | // Check if we can fully finish the buffer. | 821 | 67.7k | if remain.len() > out.len() { | 822 | 26.9k | if out.is_empty() { | 823 | 0 | // This also implies the buffer is _not_ empty and we will not enter any | 824 | 0 | // decoding loop. | 825 | 0 | status = Ok(LzwStatus::NoProgress); | 826 | 26.9k | } else { | 827 | 26.9k | out.copy_from_slice(&remain[..out.len()]); | 828 | 26.9k | self.buffer.consume(out.len()); | 829 | 26.9k | out = &mut []; | 830 | 26.9k | } | 831 | 40.8k | } else if remain.is_empty() { | 832 | 1.23k | status = Ok(LzwStatus::NoProgress); | 833 | 1.23k | have_yet_to_decode_data = true; | 834 | 39.5k | } else { | 835 | 39.5k | let consumed = remain.len(); | 836 | 39.5k | out[..consumed].copy_from_slice(remain); | 837 | 39.5k | self.buffer.consume(consumed); | 838 | 39.5k | out = &mut out[consumed..]; | 839 | 39.5k | have_yet_to_decode_data = false; | 840 | 39.5k | } | 841 | 483 | } | 842 | | | 843 | | // A special reference to out slice which holds the last decoded symbol. | 844 | 68.2k | let mut last_decoded: Option<&[u8]> = None; | 845 | 68.2k | | 846 | 68.2k | if self.buffer.buffer().is_empty() { | 847 | | // Hot loop that writes data to the output as long as we can do so directly from the | 848 | | // input stream. As an invariant of this block we did not need to use the buffer to | 849 | | // store a decoded code word. Testing the condition ahead of time avoids a test in the | 850 | | // loop body since every code path where the buffer is filled already breaks. | 851 | | // | 852 | | // In a previous iteration of the code we trusted compiler optimization to work this | 853 | | // out but it seems that it does not. Another edit hidden behind some performance work | 854 | | // then edited out the check, inadvertently changing the behavior for callers that | 855 | | // relied on being able to provide an empty output buffer and still receiving a useful | 856 | | // signal about the state of the stream. | 857 | | | 858 | | // A burst is a sequence of code words that are independently decoded, i.e. they do not | 859 | | // change the state of the decoder in ways that would influence the interpretation of | 860 | | // each other. That is: they are not special symbols, they do not make us increase the | 861 | | // code size, they are each codes already in the tree before the burst. | 862 | | // | 863 | | // The tracking state for a burst. These are actually initialized later but compiler | 864 | | // wasn't smart enough to fully optimize out the init code so that appears outside the | 865 | | // loop. | 866 | 41.3k | let mut burst = [0; BURST]; | 867 | 41.3k | let mut burst_byte_len = [0u16; BURST]; | 868 | 41.3k | let mut burst_byte = [0u8; BURST]; | 869 | 41.3k | let mut target: [&mut [u8]; BURST] = Default::default(); | 870 | | | 871 | | loop { | 872 | | // In particular, we *also* break if the output buffer is still empty. Especially | 873 | | // when the output parameter was an empty slice, we must try to fetch at least one | 874 | | // code but with YIELD_ON_FULL we do not. | 875 | 2.70M | if CgC::YIELD_ON_FULL && out.is_empty() { | 876 | 0 | break; | 877 | 2.70M | } | 878 | | | 879 | 2.70M | let mut deriv = match code_link.take() { | 880 | 2.70M | Some(link) => link, | 881 | | None => { | 882 | | // TODO: we do not need to break here. This does not indicate that the buffer | 883 | | // has been filled, rather it indicates we have reset the state. The next code | 884 | | // should be part of the initial alphabet. However the first code is special in | 885 | | // the sense of not creating a new code itself. This is handled correctly in | 886 | | // the initialization prior to the loop; and in particular that handling as | 887 | | // written currently relies on putting it into the buffer; so handling it we | 888 | | // would need to ensure that either the buffer is fully cleared after its use, | 889 | | // or use another implementation of handling that first code. | 890 | 483 | break; | 891 | | } | 892 | | }; | 893 | | | 894 | | // Ensure the code buffer is full, we're about to request some codes. | 895 | | // Note that this also ensures at least one code is in the buffer if any input is left. | 896 | 2.70M | self.refill_bits(&mut inp); | 897 | 2.70M | let cnt = self.code_buffer.peek_bits(&mut burst); | 898 | 2.70M | | 899 | 2.70M | // No code left in the buffer, and no more bytes to refill the buffer. | 900 | 2.70M | if cnt == 0 { | 901 | 312 | if have_yet_to_decode_data { | 902 | 95 | status = Ok(LzwStatus::NoProgress); | 903 | 217 | } | 904 | | | 905 | 312 | code_link = Some(deriv); | 906 | 312 | break; | 907 | 2.70M | } | 908 | 2.70M | | 909 | 2.70M | debug_assert!( | 910 | | // When the table is full, we have a max code one above the mask. | 911 | 0 | self.table.is_full() | 912 | | // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the | 913 | | // computed next_code is 4 which already exceeds the bit width from the start. | 914 | | // Then we will immediately switch code size after this code. | 915 | | // | 916 | | // TODO: this is the reason for some saturating and non-sharp comparisons in | 917 | | // the code below. Maybe it makes sense to revisit turning this into a compile | 918 | | // time choice? | 919 | 0 | || (self.code_buffer.code_size() == 2 && self.next_code == 4) | 920 | 0 | || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code, | 921 | | ); | 922 | | | 923 | 2.70M | let mut burst_size = 0; | 924 | 2.70M | let left_before_size_switch = (self.code_buffer.max_code() | 925 | 2.70M | - Code::from(self.is_tiff)) | 926 | 2.70M | .saturating_sub(self.next_code); | 927 | | | 928 | | // A burst is a sequence of decodes that are completely independent of each other. This | 929 | | // is the case if neither is an end code, a clear code, or a next code, i.e. we have | 930 | | // all of them in the decoding table and thus known their depths, and additionally if | 931 | | // we can decode them directly into the output buffer. | 932 | 3.59M | for b in &burst[..cnt] { | 933 | | // We can commit the previous burst code, and will take a slice from the output | 934 | | // buffer. This also avoids the bounds check in the tight loop later. | 935 | 3.59M | if burst_size > 0 { | 936 | 882k | let len = burst_byte_len[burst_size - 1]; | 937 | 882k | let (into, tail) = out.split_at_mut(usize::from(len)); | 938 | 882k | target[burst_size - 1] = into; | 939 | 882k | out = tail; | 940 | 2.70M | } | 941 | | | 942 | | // Check that we don't overflow the code size with all codes we burst decode. | 943 | 3.59M | burst_size += 1; | 944 | 3.59M | | 945 | 3.59M | if burst_size > usize::from(left_before_size_switch) { | 946 | 2.46M | break; | 947 | 1.12M | } | 948 | 1.12M | | 949 | 1.12M | let read_code = *b; | 950 | 1.12M | | 951 | 1.12M | // A burst code can't be special. | 952 | 1.12M | if read_code == self.clear_code | 953 | 1.12M | || read_code == self.end_code | 954 | 1.11M | || read_code >= self.next_code | 955 | | { | 956 | 53.4k | break; | 957 | 1.07M | } | 958 | 1.07M | | 959 | 1.07M | // Read the code length and check that we can decode directly into the out slice. | 960 | 1.07M | let len = self.table.depths[usize::from(read_code)]; | 961 | 1.07M | | 962 | 1.07M | if out.len() < usize::from(len) { | 963 | 8.47k | break; | 964 | 1.06M | } | 965 | 1.06M | | 966 | 1.06M | // We do exactly one more code (the one being inspected in the current iteration) | 967 | 1.06M | // after the 'burst'. When we want to break decoding precisely on the supplied | 968 | 1.06M | // buffer, we check if this is the last code to be decoded into it. | 969 | 1.06M | if CgC::YIELD_ON_FULL { | 970 | 0 | if out.len() == usize::from(len) { | 971 | 0 | break; | 972 | 0 | } | 973 | 1.06M | } | 974 | | | 975 | 1.06M | burst_byte_len[burst_size - 1] = len; | 976 | | } | 977 | | | 978 | 2.70M | self.code_buffer.consume_bits(burst_size as u8); | 979 | 2.70M | have_yet_to_decode_data = false; | 980 | 2.70M | | 981 | 2.70M | // Note that the very last code in the burst buffer doesn't actually belong to the | 982 | 2.70M | // burst itself. TODO: sometimes it could, we just don't differentiate between the | 983 | 2.70M | // breaks and a loop end condition above. That may be a speed advantage? | 984 | 2.70M | let (&new_code, burst) = burst[..burst_size].split_last().unwrap(); | 985 | 2.70M | | 986 | 2.70M | // The very tight loop for restoring the actual burst. These can be reconstructed in | 987 | 2.70M | // parallel since none of them depend on a prior constructed. Only the derivation of | 988 | 2.70M | // new codes is not parallel. There are no size changes here either. | 989 | 2.70M | let burst_targets = &mut target[..burst_size - 1]; | 990 | 2.70M | | 991 | 2.70M | if !self.table.is_full() { | 992 | 249k | self.next_code += burst_targets.len() as u16; | 993 | 2.45M | } | 994 | | | 995 | 882k | for ((&burst, target), byte) in | 996 | 2.70M | burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte) | 997 | 882k | { | 998 | 882k | *byte = self.table.reconstruct(burst, target); | 999 | 882k | } | 1000 | | | 1001 | 2.70M | self.table.derive_burst(&mut deriv, burst, &burst_byte[..]); | 1002 | 2.70M | | 1003 | 2.70M | // Now handle the special codes. | 1004 | 2.70M | if new_code == self.clear_code { | 1005 | 9.80k | self.reset_tables(); | 1006 | 9.80k | last_decoded = None; | 1007 | 9.80k | // Restarts in the next call to the entry point. | 1008 | 9.80k | break; | 1009 | 2.69M | } | 1010 | 2.69M | | 1011 | 2.69M | if new_code == self.end_code { | 1012 | 260 | self.has_ended = true; | 1013 | 260 | status = Ok(LzwStatus::Done); | 1014 | 260 | last_decoded = None; | 1015 | 260 | break; | 1016 | 2.69M | } | 1017 | 2.69M | | 1018 | 2.69M | if new_code > self.next_code { | 1019 | 128 | status = Err(LzwError::InvalidCode); | 1020 | 128 | last_decoded = None; | 1021 | 128 | break; | 1022 | 2.69M | } | 1023 | | | 1024 | 2.69M | let required_len = if new_code == self.next_code { | 1025 | 39.5k | self.table.depths[usize::from(deriv.code)] + 1 | 1026 | | } else { | 1027 | 2.65M | self.table.depths[usize::from(new_code)] | 1028 | | }; | 1029 | | | 1030 | | // We need the decoded data of the new code if it is the `next_code`. This is the | 1031 | | // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In | 1032 | | // all other cases we only need the first character of the decoded data. | 1033 | 2.69M | let have_next_code = new_code == self.next_code; | 1034 | 2.69M | | 1035 | 2.69M | // Update the slice holding the last decoded word. | 1036 | 2.69M | if have_next_code { | 1037 | | // If we did not have any burst code, we still hold that slice in the buffer. | 1038 | 39.5k | if let Some(new_last) = target[..burst_size - 1].last_mut() { | 1039 | 3.03k | let slice = core::mem::replace(new_last, &mut []); | 1040 | 3.03k | last_decoded = Some(&*slice); | 1041 | 36.4k | } | 1042 | 2.65M | } | 1043 | | | 1044 | | let cha; | 1045 | 2.69M | let is_in_buffer = usize::from(required_len) > out.len(); | 1046 | 2.69M | // Check if we will need to store our current state into the buffer. | 1047 | 2.69M | if is_in_buffer { | 1048 | 29.2k | if have_next_code { | 1049 | | // last_decoded will be Some if we have restored any code into the out slice. | 1050 | | // Otherwise it will still be present in the buffer. | 1051 | 19.9k | if let Some(last) = last_decoded.take() { | 1052 | 5.70k | self.buffer.bytes[..last.len()].copy_from_slice(last); | 1053 | 5.70k | self.buffer.write_mark = last.len(); | 1054 | 5.70k | self.buffer.read_mark = last.len(); | 1055 | 14.2k | } | 1056 | | | 1057 | 19.9k | cha = self.buffer.fill_cscsc(); | 1058 | 9.33k | } else { | 1059 | 9.33k | // Restore the decoded word into the buffer. | 1060 | 9.33k | last_decoded = None; | 1061 | 9.33k | cha = self.buffer.fill_reconstruct(&self.table, new_code); | 1062 | 9.33k | } | 1063 | | } else { | 1064 | 2.66M | let (target, tail) = out.split_at_mut(usize::from(required_len)); | 1065 | 2.66M | out = tail; | 1066 | 2.66M | | 1067 | 2.66M | if have_next_code { | 1068 | | // Reconstruct high. | 1069 | 19.5k | let source = match last_decoded.take() { | 1070 | 12.8k | Some(last) => last, | 1071 | 6.67k | None => &self.buffer.bytes[..self.buffer.write_mark], | 1072 | | }; | 1073 | | | 1074 | | // We don't *actually* expect the unwrap to happen. Each source is at least 1 | 1075 | | // byte long. But llvm doesn't know this (too much indirect loads and cases). | 1076 | 19.5k | cha = source.get(0).map(|x| *x).unwrap_or(0); | 1077 | 19.5k | target[..source.len()].copy_from_slice(source); | 1078 | 19.5k | target[source.len()..][0] = cha; | 1079 | 2.64M | } else { | 1080 | 2.64M | cha = self.table.reconstruct(new_code, target); | 1081 | 2.64M | } | 1082 | | | 1083 | | // A new decoded word. | 1084 | 2.66M | last_decoded = Some(target); | 1085 | | } | 1086 | | | 1087 | | // Each newly read code creates one new code/link based on the preceding code if we | 1088 | | // have enough space to put it there. | 1089 | 2.69M | if !self.table.is_full() { | 1090 | 239k | self.table.derive(&deriv, cha); | 1091 | 239k | | 1092 | 239k | if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff) | 1093 | 1.92k | && self.code_buffer.code_size() < MAX_CODESIZE | 1094 | 1.78k | { | 1095 | 1.78k | self.bump_code_size(); | 1096 | 237k | } | 1097 | | | 1098 | 239k | self.next_code += 1; | 1099 | 2.45M | } | 1100 | | | 1101 | | // store the information on the decoded word. | 1102 | 2.69M | code_link = Some(DerivationBase { | 1103 | 2.69M | code: new_code, | 1104 | 2.69M | first: cha, | 1105 | 2.69M | }); | 1106 | 2.69M | | 1107 | 2.69M | // Can't make any more progress with decoding. | 1108 | 2.69M | // | 1109 | 2.69M | // We have more data buffered but not enough space to put it? We want fetch a next | 1110 | 2.69M | // symbol if possible as in the case of it being a new symbol we can refer to the | 1111 | 2.69M | // buffered output as the source for that symbol's meaning and do a memcpy. | 1112 | 2.69M | // | 1113 | 2.69M | // Since this test is after decoding at least one code, we can now check for an | 1114 | 2.69M | // empty buffer and still guarantee progress when one was passed as a parameter. | 1115 | 2.69M | if is_in_buffer || out.is_empty() { | 1116 | 30.3k | break; | 1117 | 2.66M | } | 1118 | | } | 1119 | 26.9k | } | 1120 | | | 1121 | | // We need to store the last word into the buffer in case the first code in the next | 1122 | | // iteration is the next_code. | 1123 | 68.2k | if let Some(tail) = last_decoded { | 1124 | 1.25k | self.buffer.bytes[..tail.len()].copy_from_slice(tail); | 1125 | 1.25k | self.buffer.write_mark = tail.len(); | 1126 | 1.25k | // Mark the full buffer as having been consumed. | 1127 | 1.25k | self.buffer.read_mark = tail.len(); | 1128 | 66.9k | } | 1129 | | | 1130 | | // Ensure we don't indicate that no progress was made if we read some bytes from the input | 1131 | | // (which is progress). | 1132 | 68.2k | if o_in > inp.len() { | 1133 | 40.1k | if let Ok(LzwStatus::NoProgress) = status { | 1134 | 1.13k | status = Ok(LzwStatus::Ok); | 1135 | 39.1k | } | 1136 | 27.9k | } | 1137 | | | 1138 | | // Store the code/link state. | 1139 | 68.2k | self.last = code_link; | 1140 | 68.2k | | 1141 | 68.2k | BufferResult { | 1142 | 68.2k | consumed_in: o_in.wrapping_sub(inp.len()), | 1143 | 68.2k | consumed_out: o_out.wrapping_sub(out.len()), | 1144 | 68.2k | status, | 1145 | 68.2k | } | 1146 | 68.2k | } |
|
1147 | | } |
1148 | | |
1149 | | impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> { |
1150 | 34.1k | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> { |
1151 | 34.1k | self.code_buffer.next_symbol(inp) |
1152 | 34.1k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::next_symbol <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::next_symbol Line | Count | Source | 1150 | 23.2k | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> { | 1151 | 23.2k | self.code_buffer.next_symbol(inp) | 1152 | 23.2k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::next_symbol <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::next_symbol Line | Count | Source | 1150 | 10.8k | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> { | 1151 | 10.8k | self.code_buffer.next_symbol(inp) | 1152 | 10.8k | } |
|
1153 | | |
1154 | 22.8k | fn bump_code_size(&mut self) { |
1155 | 22.8k | self.code_buffer.bump_code_size() |
1156 | 22.8k | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::bump_code_size <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::bump_code_size Line | Count | Source | 1154 | 21.0k | fn bump_code_size(&mut self) { | 1155 | 21.0k | self.code_buffer.bump_code_size() | 1156 | 21.0k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::bump_code_size <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::bump_code_size Line | Count | Source | 1154 | 1.78k | fn bump_code_size(&mut self) { | 1155 | 1.78k | self.code_buffer.bump_code_size() | 1156 | 1.78k | } |
|
1157 | | |
1158 | 3.49M | fn refill_bits(&mut self, inp: &mut &[u8]) { |
1159 | 3.49M | self.code_buffer.refill_bits(inp) |
1160 | 3.49M | } Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::refill_bits <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::refill_bits Line | Count | Source | 1158 | 784k | fn refill_bits(&mut self, inp: &mut &[u8]) { | 1159 | 784k | self.code_buffer.refill_bits(inp) | 1160 | 784k | } |
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::refill_bits <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::refill_bits Line | Count | Source | 1158 | 2.70M | fn refill_bits(&mut self, inp: &mut &[u8]) { | 1159 | 2.70M | self.code_buffer.refill_bits(inp) | 1160 | 2.70M | } |
|
1161 | | } |
1162 | | |
1163 | | impl CodeBuffer for MsbBuffer { |
1164 | 549 | fn new(min_size: u8) -> Self { |
1165 | 549 | MsbBuffer { |
1166 | 549 | code_size: min_size + 1, |
1167 | 549 | code_mask: (1u16 << (min_size + 1)) - 1, |
1168 | 549 | bit_buffer: 0, |
1169 | 549 | bits: 0, |
1170 | 549 | } |
1171 | 549 | } |
1172 | | |
1173 | 10.4k | fn reset(&mut self, min_size: u8) { |
1174 | 10.4k | self.code_size = min_size + 1; |
1175 | 10.4k | self.code_mask = (1 << self.code_size) - 1; |
1176 | 10.4k | } |
1177 | | |
1178 | 10.8k | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> { |
1179 | 10.8k | if self.bits < self.code_size { |
1180 | 1.43k | self.refill_bits(inp); |
1181 | 9.39k | } |
1182 | | |
1183 | 10.8k | if self.bits < self.code_size { |
1184 | 6 | return None; |
1185 | 10.8k | } |
1186 | 10.8k | |
1187 | 10.8k | let mask = u64::from(self.code_mask); |
1188 | 10.8k | let rotbuf = self.bit_buffer.rotate_left(self.code_size.into()); |
1189 | 10.8k | self.bit_buffer = rotbuf & !mask; |
1190 | 10.8k | self.bits -= self.code_size; |
1191 | 10.8k | Some((rotbuf & mask) as u16) |
1192 | 10.8k | } |
1193 | | |
1194 | 1.78k | fn bump_code_size(&mut self) { |
1195 | 1.78k | self.code_size += 1; |
1196 | 1.78k | self.code_mask = (self.code_mask << 1) | 1; |
1197 | 1.78k | } |
1198 | | |
1199 | 2.71M | fn refill_bits(&mut self, inp: &mut &[u8]) { |
1200 | 2.71M | let wish_count = (64 - self.bits) / 8; |
1201 | 2.71M | let mut buffer = [0u8; 8]; |
1202 | 2.71M | let new_bits = match inp.get(..usize::from(wish_count)) { |
1203 | 2.70M | Some(bytes) => { |
1204 | 2.70M | buffer[..usize::from(wish_count)].copy_from_slice(bytes); |
1205 | 2.70M | *inp = &inp[usize::from(wish_count)..]; |
1206 | 2.70M | wish_count * 8 |
1207 | | } |
1208 | | None => { |
1209 | 1.92k | let new_bits = inp.len() * 8; |
1210 | 1.92k | buffer[..inp.len()].copy_from_slice(inp); |
1211 | 1.92k | *inp = &[]; |
1212 | 1.92k | new_bits as u8 |
1213 | | } |
1214 | | }; |
1215 | 2.71M | self.bit_buffer |= u64::from_be_bytes(buffer) >> self.bits; |
1216 | 2.71M | self.bits += new_bits; |
1217 | 2.71M | } |
1218 | | |
1219 | 2.70M | fn peek_bits(&self, code: &mut [Code; BURST]) -> usize { |
1220 | 2.70M | let mut bit_buffer = self.bit_buffer; |
1221 | 2.70M | let mask = u64::from(self.code_mask); |
1222 | 2.70M | let mut consumed = 0; |
1223 | 2.70M | let mut cnt = 0; |
1224 | | |
1225 | 15.5M | for b in code { |
1226 | 15.3M | let consumed_after = consumed + self.code_size; |
1227 | 15.3M | if consumed_after > self.bits { |
1228 | 2.54M | break; |
1229 | 12.8M | } |
1230 | 12.8M | |
1231 | 12.8M | cnt += 1; |
1232 | 12.8M | consumed = consumed_after; |
1233 | 12.8M | |
1234 | 12.8M | let rotbuf = bit_buffer.rotate_left(self.code_size.into()); |
1235 | 12.8M | *b = (rotbuf & mask) as u16; |
1236 | 12.8M | // The read bits are 'appended' but we never interpret those appended bits. |
1237 | 12.8M | bit_buffer = rotbuf; |
1238 | | } |
1239 | | |
1240 | 2.70M | cnt |
1241 | 2.70M | } |
1242 | | |
1243 | 2.70M | fn consume_bits(&mut self, code_cnt: u8) { |
1244 | 2.70M | let bits = self.code_size * code_cnt; |
1245 | 2.70M | debug_assert!(bits <= self.bits); |
1246 | | |
1247 | 2.70M | if bits >= self.bits { |
1248 | 11.6k | self.bit_buffer = 0; |
1249 | 2.69M | } else { |
1250 | 2.69M | // bits < self.bits so this must be smaller than the number size. |
1251 | 2.69M | self.bit_buffer = self.bit_buffer << bits; |
1252 | 2.69M | } |
1253 | | |
1254 | 2.70M | self.bits = self.bits.wrapping_sub(bits); |
1255 | 2.70M | } |
1256 | | |
1257 | 2.94M | fn max_code(&self) -> Code { |
1258 | 2.94M | self.code_mask |
1259 | 2.94M | } |
1260 | | |
1261 | 1.92k | fn code_size(&self) -> u8 { |
1262 | 1.92k | self.code_size |
1263 | 1.92k | } |
1264 | | } |
1265 | | |
1266 | | impl CodeBuffer for LsbBuffer { |
1267 | 1.38k | fn new(min_size: u8) -> Self { |
1268 | 1.38k | LsbBuffer { |
1269 | 1.38k | code_size: min_size + 1, |
1270 | 1.38k | code_mask: (1u16 << (min_size + 1)) - 1, |
1271 | 1.38k | bit_buffer: 0, |
1272 | 1.38k | bits: 0, |
1273 | 1.38k | } |
1274 | 1.38k | } |
1275 | | |
1276 | 22.6k | fn reset(&mut self, min_size: u8) { |
1277 | 22.6k | self.code_size = min_size + 1; |
1278 | 22.6k | self.code_mask = (1 << self.code_size) - 1; |
1279 | 22.6k | } |
1280 | | |
1281 | 23.2k | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> { |
1282 | 23.2k | if self.bits < self.code_size { |
1283 | 2.06k | self.refill_bits(inp); |
1284 | 21.2k | } |
1285 | | |
1286 | 23.2k | if self.bits < self.code_size { |
1287 | 326 | return None; |
1288 | 22.9k | } |
1289 | 22.9k | |
1290 | 22.9k | let mask = u64::from(self.code_mask); |
1291 | 22.9k | let code = self.bit_buffer & mask; |
1292 | 22.9k | self.bit_buffer >>= self.code_size; |
1293 | 22.9k | self.bits -= self.code_size; |
1294 | 22.9k | Some(code as u16) |
1295 | 23.2k | } |
1296 | | |
1297 | 21.0k | fn bump_code_size(&mut self) { |
1298 | 21.0k | self.code_size += 1; |
1299 | 21.0k | self.code_mask = (self.code_mask << 1) | 1; |
1300 | 21.0k | } |
1301 | | |
1302 | 786k | fn refill_bits(&mut self, inp: &mut &[u8]) { |
1303 | 786k | let wish_count = (64 - self.bits) / 8; |
1304 | 786k | let mut buffer = [0u8; 8]; |
1305 | 786k | let new_bits = match inp.get(..usize::from(wish_count)) { |
1306 | 382k | Some(bytes) => { |
1307 | 382k | buffer[..usize::from(wish_count)].copy_from_slice(bytes); |
1308 | 382k | *inp = &inp[usize::from(wish_count)..]; |
1309 | 382k | wish_count * 8 |
1310 | | } |
1311 | | None => { |
1312 | 403k | let new_bits = inp.len() * 8; |
1313 | 403k | buffer[..inp.len()].copy_from_slice(inp); |
1314 | 403k | *inp = &[]; |
1315 | 403k | new_bits as u8 |
1316 | | } |
1317 | | }; |
1318 | 786k | self.bit_buffer |= u64::from_be_bytes(buffer).swap_bytes() << self.bits; |
1319 | 786k | self.bits += new_bits; |
1320 | 786k | } |
1321 | | |
1322 | 784k | fn peek_bits(&self, code: &mut [Code; BURST]) -> usize { |
1323 | 784k | let mut bit_buffer = self.bit_buffer; |
1324 | 784k | let mask = u64::from(self.code_mask); |
1325 | 784k | let mut consumed = 0; |
1326 | 784k | let mut cnt = 0; |
1327 | | |
1328 | 2.99M | for b in code { |
1329 | 2.90M | let consumed_after = consumed + self.code_size; |
1330 | 2.90M | if consumed_after > self.bits { |
1331 | 696k | break; |
1332 | 2.20M | } |
1333 | 2.20M | |
1334 | 2.20M | cnt += 1; |
1335 | 2.20M | consumed = consumed_after; |
1336 | 2.20M | |
1337 | 2.20M | *b = (bit_buffer & mask) as u16; |
1338 | 2.20M | bit_buffer = bit_buffer >> self.code_size; |
1339 | | } |
1340 | | |
1341 | 784k | cnt |
1342 | 784k | } |
1343 | | |
1344 | 565k | fn consume_bits(&mut self, code_cnt: u8) { |
1345 | 565k | let bits = self.code_size * code_cnt; |
1346 | 565k | debug_assert!(bits <= self.bits); |
1347 | | |
1348 | 565k | if bits >= self.bits { |
1349 | 101k | self.bit_buffer = 0; |
1350 | 464k | } else { |
1351 | 464k | // bits < self.bits so this must be smaller than the number size. |
1352 | 464k | self.bit_buffer = self.bit_buffer >> bits; |
1353 | 464k | } |
1354 | | |
1355 | 565k | self.bits = self.bits.wrapping_sub(bits); |
1356 | 565k | } |
1357 | | |
1358 | 863k | fn max_code(&self) -> Code { |
1359 | 863k | self.code_mask |
1360 | 863k | } |
1361 | | |
1362 | 21.2k | fn code_size(&self) -> u8 { |
1363 | 21.2k | self.code_size |
1364 | 21.2k | } |
1365 | | } |
1366 | | |
1367 | | impl Buffer { |
1368 | 1.93k | fn new() -> Self { |
1369 | 1.93k | Buffer { |
1370 | 1.93k | bytes: vec![0; MAX_ENTRIES].into_boxed_slice(), |
1371 | 1.93k | read_mark: 0, |
1372 | 1.93k | write_mark: 0, |
1373 | 1.93k | } |
1374 | 1.93k | } |
1375 | | |
1376 | | /// When encoding a sequence `cScSc` where `c` is any character and `S` is any string |
1377 | | /// this results in two codes `AB`, `A` encoding `cS` and `B` encoding `cSc`. Supposing |
1378 | | /// the buffer is already filled with the reconstruction of `A`, we can easily fill it |
1379 | | /// with the reconstruction of `B`. |
1380 | 26.7k | fn fill_cscsc(&mut self) -> u8 { |
1381 | 26.7k | self.bytes[self.write_mark] = self.bytes[0]; |
1382 | 26.7k | self.write_mark += 1; |
1383 | 26.7k | self.read_mark = 0; |
1384 | 26.7k | self.bytes[0] |
1385 | 26.7k | } |
1386 | | |
1387 | | // Fill the buffer by decoding from the table |
1388 | 51.3k | fn fill_reconstruct(&mut self, table: &Table, code: Code) -> u8 { |
1389 | 51.3k | self.write_mark = 0; |
1390 | 51.3k | self.read_mark = 0; |
1391 | 51.3k | let depth = table.depths[usize::from(code)]; |
1392 | 51.3k | let mut memory = core::mem::replace(&mut self.bytes, Box::default()); |
1393 | 51.3k | |
1394 | 51.3k | let out = &mut memory[..usize::from(depth)]; |
1395 | 51.3k | let last = table.reconstruct(code, out); |
1396 | 51.3k | |
1397 | 51.3k | self.bytes = memory; |
1398 | 51.3k | self.write_mark = usize::from(depth); |
1399 | 51.3k | last |
1400 | 51.3k | } |
1401 | | |
1402 | 927k | fn buffer(&self) -> &[u8] { |
1403 | 927k | &self.bytes[self.read_mark..self.write_mark] |
1404 | 927k | } |
1405 | | |
1406 | 239k | fn consume(&mut self, amt: usize) { |
1407 | 239k | self.read_mark += amt; |
1408 | 239k | } |
1409 | | } |
1410 | | |
1411 | | impl Table { |
1412 | 1.93k | fn new() -> Self { |
1413 | 1.93k | Table { |
1414 | 1.93k | inner: Vec::with_capacity(MAX_ENTRIES), |
1415 | 1.93k | depths: Vec::with_capacity(MAX_ENTRIES), |
1416 | 1.93k | } |
1417 | 1.93k | } |
1418 | | |
1419 | 25.1k | fn clear(&mut self, min_size: u8) { |
1420 | 25.1k | let static_count = usize::from(1u16 << u16::from(min_size)) + 2; |
1421 | 25.1k | self.inner.truncate(static_count); |
1422 | 25.1k | self.depths.truncate(static_count); |
1423 | 25.1k | } |
1424 | | |
1425 | 7.95k | fn init(&mut self, min_size: u8) { |
1426 | 7.95k | self.inner.clear(); |
1427 | 7.95k | self.depths.clear(); |
1428 | 436k | for i in 0..(1u16 << u16::from(min_size)) { |
1429 | 436k | self.inner.push(Link::base(i as u8)); |
1430 | 436k | self.depths.push(1); |
1431 | 436k | } |
1432 | | // Clear code. |
1433 | 7.95k | self.inner.push(Link::base(0)); |
1434 | 7.95k | self.depths.push(0); |
1435 | 7.95k | // End code. |
1436 | 7.95k | self.inner.push(Link::base(0)); |
1437 | 7.95k | self.depths.push(0); |
1438 | 7.95k | } |
1439 | | |
1440 | 26.7k | fn at(&self, code: Code) -> &Link { |
1441 | 26.7k | &self.inner[usize::from(code)] |
1442 | 26.7k | } |
1443 | | |
1444 | 26.7k | fn is_empty(&self) -> bool { |
1445 | 26.7k | self.inner.is_empty() |
1446 | 26.7k | } |
1447 | | |
1448 | 6.52M | fn is_full(&self) -> bool { |
1449 | 6.52M | self.inner.len() >= MAX_ENTRIES |
1450 | 6.52M | } |
1451 | | |
1452 | 536k | fn derive(&mut self, from: &DerivationBase, byte: u8) { |
1453 | 536k | let link = from.derive(byte); |
1454 | 536k | let depth = self.depths[usize::from(from.code)] + 1; |
1455 | 536k | self.inner.push(link); |
1456 | 536k | self.depths.push(depth); |
1457 | 536k | } |
1458 | | |
1459 | | // Derive multiple codes in a row, where each base is guaranteed to already exist. |
1460 | 3.27M | fn derive_burst(&mut self, from: &mut DerivationBase, burst: &[Code], first: &[u8]) { |
1461 | 3.27M | let mut depth_of = from.code; |
1462 | | // Note that false data dependency we want to get rid of! |
1463 | | // TODO: this pushes into a Vec, maybe we can make this cleaner. |
1464 | 4.56M | for &code in burst { |
1465 | 1.28M | let depth = self.depths[usize::from(depth_of)] + 1; |
1466 | 1.28M | self.depths.push(depth); |
1467 | 1.28M | depth_of = code; |
1468 | 1.28M | } |
1469 | | |
1470 | | // Llvm tends to be flaky with code layout for the case of requiring an allocation. It's |
1471 | | // not clear if that can occur in practice but it relies on iterator size hint.. |
1472 | 3.27M | let extensions = burst.iter().zip(first); |
1473 | 3.27M | self.inner.extend(extensions.map(|(&code, &first)| { |
1474 | 1.28M | let link = from.derive(first); |
1475 | 1.28M | from.code = code; |
1476 | 1.28M | from.first = first; |
1477 | 1.28M | link |
1478 | 3.27M | })); |
1479 | 3.27M | } |
1480 | | |
1481 | 4.46M | fn reconstruct(&self, code: Code, out: &mut [u8]) -> u8 { |
1482 | 4.46M | let mut code_iter = code; |
1483 | 4.46M | let table = &self.inner[..=usize::from(code)]; |
1484 | 4.46M | let first = table[usize::from(code)].first; |
1485 | 4.46M | |
1486 | 4.46M | let len = code_iter; |
1487 | 20.4M | for ch in out.iter_mut().rev() { |
1488 | 20.4M | //(code, cha) = self.table[k as usize]; |
1489 | 20.4M | // Note: This could possibly be replaced with an unchecked array access if |
1490 | 20.4M | // - value is asserted to be < self.next_code() in push |
1491 | 20.4M | // - min_size is asserted to be < MAX_CODESIZE |
1492 | 20.4M | let entry = &table[usize::from(code_iter)]; |
1493 | 20.4M | code_iter = core::cmp::min(len, entry.prev); |
1494 | 20.4M | *ch = entry.byte; |
1495 | 20.4M | } |
1496 | | |
1497 | 4.46M | first |
1498 | 4.46M | } |
1499 | | } |
1500 | | |
1501 | | impl Link { |
1502 | 452k | fn base(byte: u8) -> Self { |
1503 | 452k | Link { |
1504 | 452k | prev: 0, |
1505 | 452k | byte, |
1506 | 452k | first: byte, |
1507 | 452k | } |
1508 | 452k | } |
1509 | | } |
1510 | | |
1511 | | impl DerivationBase { |
1512 | | // TODO: this has self type to make it clear we might depend on the old in a future |
1513 | | // optimization. However, that has no practical purpose right now. |
1514 | 1.82M | fn derive(&self, byte: u8) -> Link { |
1515 | 1.82M | Link { |
1516 | 1.82M | prev: self.code, |
1517 | 1.82M | byte, |
1518 | 1.82M | first: self.first, |
1519 | 1.82M | } |
1520 | 1.82M | } |
1521 | | } |
1522 | | |
1523 | | #[cfg(test)] |
1524 | | mod tests { |
1525 | | use crate::alloc::vec::Vec; |
1526 | | #[cfg(feature = "std")] |
1527 | | use crate::StreamBuf; |
1528 | | use crate::{decode::Decoder, BitOrder}; |
1529 | | |
1530 | | #[test] |
1531 | | fn invalid_code_size_low() { |
1532 | | let _ = Decoder::new(BitOrder::Msb, 0); |
1533 | | let _ = Decoder::new(BitOrder::Msb, 1); |
1534 | | } |
1535 | | |
1536 | | #[test] |
1537 | | #[should_panic] |
1538 | | fn invalid_code_size_high() { |
1539 | | let _ = Decoder::new(BitOrder::Msb, 14); |
1540 | | } |
1541 | | |
1542 | | fn make_encoded() -> Vec<u8> { |
1543 | | const FILE: &'static [u8] = include_bytes!(concat!( |
1544 | | env!("CARGO_MANIFEST_DIR"), |
1545 | | "/benches/binary-8-msb.lzw" |
1546 | | )); |
1547 | | return Vec::from(FILE); |
1548 | | } |
1549 | | |
1550 | | #[test] |
1551 | | #[cfg(feature = "std")] |
1552 | | fn into_stream_buffer_no_alloc() { |
1553 | | let encoded = make_encoded(); |
1554 | | let mut decoder = Decoder::new(BitOrder::Msb, 8); |
1555 | | |
1556 | | let mut output = vec![]; |
1557 | | let mut buffer = [0; 512]; |
1558 | | let mut istream = decoder.into_stream(&mut output); |
1559 | | istream.set_buffer(&mut buffer[..]); |
1560 | | istream.decode(&encoded[..]).status.unwrap(); |
1561 | | |
1562 | | match istream.buffer { |
1563 | | Some(StreamBuf::Borrowed(_)) => {} |
1564 | | None => panic!("Decoded without buffer??"), |
1565 | | Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"), |
1566 | | } |
1567 | | } |
1568 | | |
1569 | | #[test] |
1570 | | #[cfg(feature = "std")] |
1571 | | fn into_stream_buffer_small_alloc() { |
1572 | | struct WriteTap<W: std::io::Write>(W); |
1573 | | const BUF_SIZE: usize = 512; |
1574 | | |
1575 | | impl<W: std::io::Write> std::io::Write for WriteTap<W> { |
1576 | | fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { |
1577 | | assert!(buf.len() <= BUF_SIZE); |
1578 | | self.0.write(buf) |
1579 | | } |
1580 | | fn flush(&mut self) -> std::io::Result<()> { |
1581 | | self.0.flush() |
1582 | | } |
1583 | | } |
1584 | | |
1585 | | let encoded = make_encoded(); |
1586 | | let mut decoder = Decoder::new(BitOrder::Msb, 8); |
1587 | | |
1588 | | let mut output = vec![]; |
1589 | | let mut istream = decoder.into_stream(WriteTap(&mut output)); |
1590 | | istream.set_buffer_size(512); |
1591 | | istream.decode(&encoded[..]).status.unwrap(); |
1592 | | |
1593 | | match istream.buffer { |
1594 | | Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE), |
1595 | | Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"), |
1596 | | None => panic!("Decoded without buffer??"), |
1597 | | } |
1598 | | } |
1599 | | |
1600 | | #[test] |
1601 | | #[cfg(feature = "std")] |
1602 | | fn reset() { |
1603 | | let encoded = make_encoded(); |
1604 | | let mut decoder = Decoder::new(BitOrder::Msb, 8); |
1605 | | let mut reference = None; |
1606 | | |
1607 | | for _ in 0..2 { |
1608 | | let mut output = vec![]; |
1609 | | let mut buffer = [0; 512]; |
1610 | | let mut istream = decoder.into_stream(&mut output); |
1611 | | istream.set_buffer(&mut buffer[..]); |
1612 | | istream.decode_all(&encoded[..]).status.unwrap(); |
1613 | | |
1614 | | decoder.reset(); |
1615 | | if let Some(reference) = &reference { |
1616 | | assert_eq!(output, *reference); |
1617 | | } else { |
1618 | | reference = Some(output); |
1619 | | } |
1620 | | } |
1621 | | } |
1622 | | } |