Coverage Report

Created: 2026-04-12 07:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/weezl-0.1.12/src/decode.rs
Line
Count
Source
1
//! A module for all decoding needs.
2
#[cfg(feature = "std")]
3
use crate::error::StreamResult;
4
use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult};
5
use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE};
6
7
use crate::alloc::{boxed::Box, vec, vec::Vec};
8
#[cfg(feature = "std")]
9
use std::io::{self, BufRead, Write};
10
11
/// The state for decoding data with an LZW algorithm.
12
///
13
/// The same structure can be utilized with streams as well as your own buffers and driver logic.
14
/// It may even be possible to mix them if you are sufficiently careful not to lose or skip any
15
/// already decode data in the process.
16
///
17
/// This is a sans-IO implementation, meaning that it only contains the state of the decoder and
18
/// the caller will provide buffers for input and output data when calling the basic
19
/// [`decode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_*`
20
/// methods for decoding with a particular style of common IO.
21
///
22
/// * [`decode`] for decoding once without any IO-loop.
23
/// * [`into_async`] for decoding with the `futures` traits for asynchronous IO.
24
/// * [`into_stream`] for decoding with the standard `io` traits.
25
/// * [`into_vec`] for in-memory decoding.
26
///
27
/// [`decode_bytes`]: #method.decode_bytes
28
/// [`decode`]: #method.decode
29
/// [`into_async`]: #method.into_async
30
/// [`into_stream`]: #method.into_stream
31
/// [`into_vec`]: #method.into_vec
32
pub struct Decoder {
33
    state: Box<dyn Stateful + Send + 'static>,
34
}
35
36
/// A decoding stream sink.
37
///
38
/// See [`Decoder::into_stream`] on how to create this type.
39
///
40
/// [`Decoder::into_stream`]: struct.Decoder.html#method.into_stream
41
#[cfg_attr(
42
    not(feature = "std"),
43
    deprecated = "This type is only useful with the `std` feature."
44
)]
45
#[cfg_attr(not(feature = "std"), allow(dead_code))]
46
pub struct IntoStream<'d, W> {
47
    decoder: &'d mut Decoder,
48
    writer: W,
49
    buffer: Option<StreamBuf<'d>>,
50
    default_size: usize,
51
}
52
53
/// An async decoding sink.
54
///
55
/// See [`Decoder::into_async`] on how to create this type.
56
///
57
/// [`Decoder::into_async`]: struct.Decoder.html#method.into_async
58
#[cfg(feature = "async")]
59
pub struct IntoAsync<'d, W> {
60
    decoder: &'d mut Decoder,
61
    writer: W,
62
    buffer: Option<StreamBuf<'d>>,
63
    default_size: usize,
64
}
65
66
/// A decoding sink into a vector.
67
///
68
/// See [`Decoder::into_vec`] on how to create this type.
69
///
70
/// [`Decoder::into_vec`]: struct.Decoder.html#method.into_vec
71
pub struct IntoVec<'d> {
72
    decoder: &'d mut Decoder,
73
    vector: &'d mut Vec<u8>,
74
}
75
76
trait Stateful {
77
    fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult;
78
    fn has_ended(&self) -> bool;
79
    /// Ignore an end code and continue decoding (no implied reset).
80
    fn restart(&mut self);
81
    /// Reset the decoder to the beginning, dropping all buffers etc.
82
    fn reset(&mut self);
83
}
84
85
#[derive(Clone)]
86
struct Link {
87
    prev: Code,
88
    byte: u8,
89
    first: u8,
90
}
91
92
#[derive(Clone)]
93
struct DerivationBase {
94
    code: Code,
95
    first: u8,
96
}
97
98
#[derive(Default)]
99
struct MsbBuffer {
100
    /// A buffer of individual bits. The oldest code is kept in the high-order bits.
101
    bit_buffer: u64,
102
    /// A precomputed mask for this code.
103
    code_mask: u16,
104
    /// The current code size.
105
    code_size: u8,
106
    /// The number of bits in the buffer.
107
    bits: u8,
108
}
109
110
#[derive(Default)]
111
struct LsbBuffer {
112
    /// A buffer of individual bits. The oldest code is kept in the high-order bits.
113
    bit_buffer: u64,
114
    /// A precomputed mask for this code.
115
    code_mask: u16,
116
    /// The current code size.
117
    code_size: u8,
118
    /// The number of bits in the buffer.
119
    bits: u8,
120
}
121
122
trait CodeBuffer {
123
    fn new(min_size: u8) -> Self;
124
    fn reset(&mut self, min_size: u8);
125
    fn bump_code_size(&mut self);
126
127
    /// Retrieve the next symbol, refilling if necessary.
128
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code>;
129
    /// Refill the internal buffer.
130
    fn refill_bits(&mut self, inp: &mut &[u8]);
131
132
    fn peek_bits(&self, code: &mut [Code; BURST]) -> usize;
133
    fn consume_bits(&mut self, code_cnt: u8);
134
135
    fn max_code(&self) -> Code;
136
    fn code_size(&self) -> u8;
137
}
138
139
trait CodegenConstants {
140
    const YIELD_ON_FULL: bool;
141
}
142
143
struct DecodeState<CodeBuffer, Constants: CodegenConstants> {
144
    /// The original minimum code size.
145
    min_size: u8,
146
    /// The table of decoded codes.
147
    table: Table,
148
    /// The buffer of decoded data.
149
    buffer: Buffer,
150
    /// The link which we are still decoding and its original code.
151
    last: Option<DerivationBase>,
152
    /// The next code entry.
153
    next_code: Code,
154
    /// Code to reset all tables.
155
    clear_code: Code,
156
    /// Code to signal the end of the stream.
157
    end_code: Code,
158
    /// A stored flag if the end code has already appeared.
159
    has_ended: bool,
160
    /// If tiff then bumps are a single code sooner.
161
    is_tiff: bool,
162
    /// Do we allow stream to start without an explicit reset code?
163
    implicit_reset: bool,
164
    /// The buffer for decoded words.
165
    code_buffer: CodeBuffer,
166
    #[allow(dead_code)]
167
    constants: core::marker::PhantomData<Constants>,
168
}
169
170
// We have a buffer of 64 bits. So at max size at most 5 units can be read at once without
171
// refilling the buffer. At smaller code sizes there are more. We tune for 6 here, by slight
172
// experimentation. This may be an architecture dependent constant.
173
const BURST: usize = 6;
174
175
struct Buffer {
176
    bytes: Box<[u8]>,
177
    read_mark: usize,
178
    write_mark: usize,
179
}
180
181
struct Table {
182
    inner: Vec<Link>,
183
    depths: Vec<u16>,
184
}
185
186
/// Describes the static parameters for creating a decoder.
187
#[derive(Clone, Debug)]
188
pub struct Configuration {
189
    order: BitOrder,
190
    size: u8,
191
    tiff: bool,
192
    yield_on_full: bool,
193
}
194
195
impl Configuration {
196
    /// Create a configuration to decode with the specified bit order and symbol size.
197
2.42k
    pub fn new(order: BitOrder, size: u8) -> Self {
198
2.42k
        super::assert_decode_size(size);
199
2.42k
        Configuration {
200
2.42k
            order,
201
2.42k
            size,
202
2.42k
            tiff: false,
203
2.42k
            yield_on_full: false,
204
2.42k
        }
205
2.42k
    }
206
207
    /// Create a configuration for a TIFF compatible decoder.
208
2.33k
    pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
209
2.33k
        super::assert_decode_size(size);
210
2.33k
        Configuration {
211
2.33k
            order,
212
2.33k
            size,
213
2.33k
            tiff: true,
214
2.33k
            yield_on_full: false,
215
2.33k
        }
216
2.33k
    }
217
218
    /// Immediately yield to the caller when the decoder buffer is full.
219
    ///
220
    /// This can be used for `libtiff` compatibility. It will use a "relaxed" stream interpretation
221
    /// that need not contain an explicit EOF. Instead, the decoder is expected to stop fetching
222
    /// symbols when some out-of-band specified length of the decoded text has been reached. The
223
    /// caller indicates this maximum length through the available output buffer space.
224
    ///
225
    /// Symbols afterwards must not be expected to be valid. On filling the output buffer space
226
    /// completely, the decoder will return immediately to the caller instead of potentially
227
    /// interpreting the following bit-stream (and returning an error on doing so).
228
    ///
229
    /// Default: `false`.
230
2.33k
    pub fn with_yield_on_full_buffer(self, do_yield: bool) -> Self {
231
2.33k
        Configuration {
232
2.33k
            yield_on_full: do_yield,
233
2.33k
            ..self
234
2.33k
        }
235
2.33k
    }
236
237
    /// Create a new decoder with the define configuration.
238
4.75k
    pub fn build(self) -> Decoder {
239
4.75k
        Decoder {
240
4.75k
            state: Decoder::from_configuration(&self),
241
4.75k
        }
242
4.75k
    }
243
}
244
245
impl Decoder {
246
    /// Create a new decoder with the specified bit order and symbol size.
247
    ///
248
    /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
249
    /// original specification. In particular you will need to specify an `Lsb` bit oder to decode
250
    /// the data portion of a compressed `gif` image.
251
    ///
252
    /// # Panics
253
    ///
254
    /// The `size` needs to be in the interval `0..=12`.
255
2.42k
    pub fn new(order: BitOrder, size: u8) -> Self {
256
2.42k
        Configuration::new(order, size).build()
257
2.42k
    }
258
259
    /// Create a TIFF compatible decoder with the specified bit order and symbol size.
260
    ///
261
    /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
262
    /// TIFF specification, which is a misinterpretation of the original algorithm for increasing
263
    /// the code size. It switches one symbol sooner.
264
    ///
265
    /// # Panics
266
    ///
267
    /// The `size` needs to be in the interval `0..=12`.
268
0
    pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
269
0
        Configuration::with_tiff_size_switch(order, size).build()
270
0
    }
271
272
4.75k
    fn from_configuration(configuration: &Configuration) -> Box<dyn Stateful + Send + 'static> {
273
        struct NoYield;
274
        struct YieldOnFull;
275
276
        impl CodegenConstants for NoYield {
277
            const YIELD_ON_FULL: bool = false;
278
        }
279
280
        impl CodegenConstants for YieldOnFull {
281
            const YIELD_ON_FULL: bool = true;
282
        }
283
284
        type Boxed = Box<dyn Stateful + Send + 'static>;
285
4.75k
        match (configuration.order, configuration.yield_on_full) {
286
            (BitOrder::Lsb, false) => {
287
2.42k
                let mut state =
288
2.42k
                    Box::new(DecodeState::<LsbBuffer, NoYield>::new(configuration.size));
289
2.42k
                state.is_tiff = configuration.tiff;
290
2.42k
                state as Boxed
291
            }
292
            (BitOrder::Lsb, true) => {
293
0
                let mut state = Box::new(DecodeState::<LsbBuffer, YieldOnFull>::new(
294
0
                    configuration.size,
295
                ));
296
0
                state.is_tiff = configuration.tiff;
297
0
                state as Boxed
298
            }
299
            (BitOrder::Msb, false) => {
300
0
                let mut state =
301
0
                    Box::new(DecodeState::<MsbBuffer, NoYield>::new(configuration.size));
302
0
                state.is_tiff = configuration.tiff;
303
0
                state as Boxed
304
            }
305
            (BitOrder::Msb, true) => {
306
2.33k
                let mut state = Box::new(DecodeState::<MsbBuffer, YieldOnFull>::new(
307
2.33k
                    configuration.size,
308
                ));
309
2.33k
                state.is_tiff = configuration.tiff;
310
2.33k
                state as Boxed
311
            }
312
        }
313
4.75k
    }
314
315
    /// Decode some bytes from `inp` and write result to `out`.
316
    ///
317
    /// This will consume a prefix of the input buffer and write decoded output into a prefix of
318
    /// the output buffer. See the respective fields of the return value for the count of consumed
319
    /// and written bytes. For the next call You should have adjusted the inputs accordingly.
320
    ///
321
    /// The call will try to decode and write as many bytes of output as available. It will be
322
    /// much more optimized (and avoid intermediate buffering) if it is allowed to write a large
323
    /// contiguous chunk at once.
324
    ///
325
    /// See [`into_stream`] for high-level functions (that are only available with the `std`
326
    /// feature).
327
    ///
328
    /// [`into_stream`]: #method.into_stream
329
722k
    pub fn decode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult {
330
722k
        self.state.advance(inp, out)
331
722k
    }
332
333
    /// Decode a single chunk of lzw encoded data.
334
    ///
335
    /// This method requires the data to contain an end marker, and returns an error otherwise.
336
    ///
337
    /// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize
338
    /// buffer size, to supply an existing vector, to control whether an end marker is required, or
339
    /// to preserve partial data in the case of a decoding error.
340
    ///
341
    /// [`into_vec`]: #into_vec
342
    ///
343
    /// # Example
344
    ///
345
    /// ```
346
    /// use weezl::{BitOrder, decode::Decoder};
347
    ///
348
    /// // Encoded that was created with an encoder.
349
    /// let data = b"\x80\x04\x81\x94l\x1b\x06\xf0\xb0 \x1d\xc6\xf1\xc8l\x19 \x10";
350
    /// let decoded = Decoder::new(BitOrder::Msb, 9)
351
    ///     .decode(data)
352
    ///     .unwrap();
353
    /// assert_eq!(decoded, b"Hello, world");
354
    /// ```
355
0
    pub fn decode(&mut self, data: &[u8]) -> Result<Vec<u8>, LzwError> {
356
0
        let mut output = vec![];
357
0
        self.into_vec(&mut output).decode_all(data).status?;
358
0
        Ok(output)
359
0
    }
360
361
    /// Construct a decoder into a writer.
362
    #[cfg(feature = "std")]
363
0
    pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> {
364
0
        IntoStream {
365
0
            decoder: self,
366
0
            writer,
367
0
            buffer: None,
368
0
            default_size: STREAM_BUF_SIZE,
369
0
        }
370
0
    }
371
372
    /// Construct a decoder into an async writer.
373
    #[cfg(feature = "async")]
374
    pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> {
375
        IntoAsync {
376
            decoder: self,
377
            writer,
378
            buffer: None,
379
            default_size: STREAM_BUF_SIZE,
380
        }
381
    }
382
383
    /// Construct a decoder into a vector.
384
    ///
385
    /// All decoded data is appended and the vector is __not__ cleared.
386
    ///
387
    /// Compared to `into_stream` this interface allows a high-level access to decoding without
388
    /// requires the `std`-feature. Also, it can make full use of the extra buffer control that the
389
    /// special target exposes.
390
0
    pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec<u8>) -> IntoVec<'lt> {
391
0
        IntoVec {
392
0
            decoder: self,
393
0
            vector: vec,
394
0
        }
395
0
    }
396
397
    /// Check if the decoding has finished.
398
    ///
399
    /// No more output is produced beyond the end code that marked the finish of the stream. The
400
    /// decoder may have read additional bytes, including padding bits beyond the last code word
401
    /// but also excess bytes provided.
402
559k
    pub fn has_ended(&self) -> bool {
403
559k
        self.state.has_ended()
404
559k
    }
405
406
    /// Ignore an end code and continue.
407
    ///
408
    /// This will _not_ reset any of the inner code tables and not have the effect of a clear code.
409
    /// It will instead continue as if the end code had not been present. If no end code has
410
    /// occurred then this is a no-op.
411
    ///
412
    /// You can test if an end code has occurred with [`has_ended`](#method.has_ended).
413
    /// FIXME: clarify how this interacts with padding introduced after end code.
414
    #[allow(dead_code)]
415
0
    pub(crate) fn restart(&mut self) {
416
0
        self.state.restart();
417
0
    }
418
419
    /// Reset all internal state.
420
    ///
421
    /// This produce a decoder as if just constructed with `new` but taking slightly less work. In
422
    /// particular it will not deallocate any internal allocations. It will also avoid some
423
    /// duplicate setup work.
424
0
    pub fn reset(&mut self) {
425
0
        self.state.reset();
426
0
    }
427
}
428
429
#[cfg(feature = "std")]
430
impl<'d, W: Write> IntoStream<'d, W> {
431
    /// Decode data from a reader.
432
    ///
433
    /// This will read data until the stream is empty or an end marker is reached.
434
0
    pub fn decode(&mut self, read: impl BufRead) -> StreamResult {
435
0
        self.decode_part(read, false)
436
0
    }
437
438
    /// Decode data from a reader, requiring an end marker.
439
0
    pub fn decode_all(mut self, read: impl BufRead) -> StreamResult {
440
0
        self.decode_part(read, true)
441
0
    }
442
443
    /// Set the size of the intermediate decode buffer.
444
    ///
445
    /// A buffer of this size is allocated to hold one part of the decoded stream when no buffer is
446
    /// available and any decoding method is called. No buffer is allocated if `set_buffer` has
447
    /// been called. The buffer is reused.
448
    ///
449
    /// # Panics
450
    /// This method panics if `size` is `0`.
451
0
    pub fn set_buffer_size(&mut self, size: usize) {
452
0
        assert_ne!(size, 0, "Attempted to set empty buffer");
453
0
        self.default_size = size;
454
0
    }
455
456
    /// Use a particular buffer as an intermediate decode buffer.
457
    ///
458
    /// Calling this sets or replaces the buffer. When a buffer has been set then it is used
459
    /// instead of dynamically allocating a buffer. Note that the size of the buffer is critical
460
    /// for efficient decoding. Some optimization techniques require the buffer to hold one or more
461
    /// previous decoded words. There is also additional overhead from `write` calls each time the
462
    /// buffer has been filled.
463
    ///
464
    /// # Panics
465
    /// This method panics if the `buffer` is empty.
466
0
    pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
467
0
        assert_ne!(buffer.len(), 0, "Attempted to set empty buffer");
468
0
        self.buffer = Some(StreamBuf::Borrowed(buffer));
469
0
    }
470
471
0
    fn decode_part(&mut self, mut read: impl BufRead, must_finish: bool) -> StreamResult {
472
        let IntoStream {
473
0
            decoder,
474
0
            writer,
475
0
            buffer,
476
0
            default_size,
477
0
        } = self;
478
479
        enum Progress {
480
            Ok,
481
            Done,
482
        }
483
484
0
        let mut bytes_read = 0;
485
0
        let mut bytes_written = 0;
486
487
        // Converting to mutable refs to move into the `once` closure.
488
0
        let read_bytes = &mut bytes_read;
489
0
        let write_bytes = &mut bytes_written;
490
491
0
        let outbuf: &mut [u8] =
492
0
            match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } {
493
0
                StreamBuf::Borrowed(slice) => &mut *slice,
494
0
                StreamBuf::Owned(vec) => &mut *vec,
495
            };
496
0
        assert!(!outbuf.is_empty());
497
498
0
        let once = move || {
499
            // Try to grab one buffer of input data.
500
0
            let data = read.fill_buf()?;
501
502
            // Decode as much of the buffer as fits.
503
0
            let result = decoder.decode_bytes(data, &mut outbuf[..]);
504
            // Do the bookkeeping and consume the buffer.
505
0
            *read_bytes += result.consumed_in;
506
0
            *write_bytes += result.consumed_out;
507
0
            read.consume(result.consumed_in);
508
509
            // Handle the status in the result.
510
0
            let done = result.status.map_err(|err| {
511
0
                io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err))
512
0
            })?;
513
514
            // Check if we had any new data at all.
515
0
            if let LzwStatus::NoProgress = done {
516
0
                debug_assert_eq!(
517
                    result.consumed_out, 0,
518
0
                    "No progress means we have not decoded any data"
519
                );
520
                // In particular we did not finish decoding.
521
0
                if must_finish {
522
0
                    return Err(io::Error::new(
523
0
                        io::ErrorKind::UnexpectedEof,
524
0
                        "No more data but no end marker detected",
525
0
                    ));
526
                } else {
527
0
                    return Ok(Progress::Done);
528
                }
529
0
            }
530
531
            // And finish by writing our result.
532
            // TODO: we may lose data on error (also on status error above) which we might want to
533
            // deterministically handle so that we don't need to restart everything from scratch as
534
            // the only recovery strategy. Any changes welcome.
535
0
            writer.write_all(&outbuf[..result.consumed_out])?;
536
537
0
            Ok(if let LzwStatus::Done = done {
538
0
                Progress::Done
539
            } else {
540
0
                Progress::Ok
541
            })
542
0
        };
543
544
        // Decode chunks of input data until we're done.
545
0
        let status = core::iter::repeat_with(once)
546
            // scan+fuse can be replaced with map_while
547
0
            .scan((), |(), result| match result {
548
0
                Ok(Progress::Ok) => Some(Ok(())),
549
0
                Err(err) => Some(Err(err)),
550
0
                Ok(Progress::Done) => None,
551
0
            })
552
0
            .fuse()
553
0
            .collect();
554
555
0
        StreamResult {
556
0
            bytes_read,
557
0
            bytes_written,
558
0
            status,
559
0
        }
560
0
    }
561
}
562
563
impl IntoVec<'_> {
564
    /// Decode data from a slice.
565
    ///
566
    /// This will read data until the slice is empty or an end marker is reached.
567
0
    pub fn decode(&mut self, read: &[u8]) -> VectorResult {
568
0
        self.decode_part(read, false)
569
0
    }
570
571
    /// Decode data from a slice, requiring an end marker.
572
0
    pub fn decode_all(mut self, read: &[u8]) -> VectorResult {
573
0
        self.decode_part(read, true)
574
0
    }
575
576
0
    fn grab_buffer(&mut self) -> (&mut [u8], &mut Decoder) {
577
        const CHUNK_SIZE: usize = 1 << 12;
578
0
        let decoder = &mut self.decoder;
579
0
        let length = self.vector.len();
580
581
        // Use the vector to do overflow checks and w/e.
582
0
        self.vector.reserve(CHUNK_SIZE);
583
        // FIXME: decoding into uninit buffer?
584
0
        self.vector.resize(length + CHUNK_SIZE, 0u8);
585
586
0
        (&mut self.vector[length..], decoder)
587
0
    }
588
589
0
    fn decode_part(&mut self, part: &[u8], must_finish: bool) -> VectorResult {
590
0
        let mut result = VectorResult {
591
0
            consumed_in: 0,
592
0
            consumed_out: 0,
593
0
            status: Ok(LzwStatus::Ok),
594
0
        };
595
596
        enum Progress {
597
            Ok,
598
            Done,
599
        }
600
601
        // Converting to mutable refs to move into the `once` closure.
602
0
        let read_bytes = &mut result.consumed_in;
603
0
        let write_bytes = &mut result.consumed_out;
604
0
        let mut data = part;
605
606
        // A 64 MB buffer is quite large but should get alloc_zeroed.
607
        // Note that the decoded size can be up to quadratic in code block.
608
0
        let once = move || {
609
            // Grab a new output buffer.
610
0
            let (outbuf, decoder) = self.grab_buffer();
611
612
            // Decode as much of the buffer as fits.
613
0
            let result = decoder.decode_bytes(data, &mut outbuf[..]);
614
            // Do the bookkeeping and consume the buffer.
615
0
            *read_bytes += result.consumed_in;
616
0
            *write_bytes += result.consumed_out;
617
0
            data = &data[result.consumed_in..];
618
619
0
            let unfilled = outbuf.len() - result.consumed_out;
620
0
            let filled = self.vector.len() - unfilled;
621
0
            self.vector.truncate(filled);
622
623
            // Handle the status in the result.
624
0
            match result.status {
625
0
                Err(err) => Err(err),
626
0
                Ok(LzwStatus::NoProgress) if must_finish => Err(LzwError::InvalidCode),
627
0
                Ok(LzwStatus::NoProgress) | Ok(LzwStatus::Done) => Ok(Progress::Done),
628
0
                Ok(LzwStatus::Ok) => Ok(Progress::Ok),
629
            }
630
0
        };
631
632
        // Decode chunks of input data until we're done.
633
0
        let status: Result<(), _> = core::iter::repeat_with(once)
634
            // scan+fuse can be replaced with map_while
635
0
            .scan((), |(), result| match result {
636
0
                Ok(Progress::Ok) => Some(Ok(())),
637
0
                Err(err) => Some(Err(err)),
638
0
                Ok(Progress::Done) => None,
639
0
            })
640
0
            .fuse()
641
0
            .collect();
642
643
0
        if let Err(err) = status {
644
0
            result.status = Err(err);
645
0
        }
646
647
0
        result
648
0
    }
649
}
650
651
// This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would
652
// trip over the usage of await, which is a reserved keyword in that edition/version. It only
653
// contains an impl block.
654
#[cfg(feature = "async")]
655
#[path = "decode_into_async.rs"]
656
mod impl_decode_into_async;
657
658
impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> {
659
4.75k
    fn new(min_size: u8) -> Self {
660
4.75k
        DecodeState {
661
4.75k
            min_size,
662
4.75k
            table: Table::new(),
663
4.75k
            buffer: Buffer::new(),
664
4.75k
            last: None,
665
4.75k
            clear_code: 1 << min_size,
666
4.75k
            end_code: (1 << min_size) + 1,
667
4.75k
            next_code: (1 << min_size) + 2,
668
4.75k
            has_ended: false,
669
4.75k
            is_tiff: false,
670
4.75k
            implicit_reset: true,
671
4.75k
            code_buffer: CodeBuffer::new(min_size),
672
4.75k
            constants: core::marker::PhantomData,
673
4.75k
        }
674
4.75k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::new
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::new
Line
Count
Source
659
2.42k
    fn new(min_size: u8) -> Self {
660
2.42k
        DecodeState {
661
2.42k
            min_size,
662
2.42k
            table: Table::new(),
663
2.42k
            buffer: Buffer::new(),
664
2.42k
            last: None,
665
2.42k
            clear_code: 1 << min_size,
666
2.42k
            end_code: (1 << min_size) + 1,
667
2.42k
            next_code: (1 << min_size) + 2,
668
2.42k
            has_ended: false,
669
2.42k
            is_tiff: false,
670
2.42k
            implicit_reset: true,
671
2.42k
            code_buffer: CodeBuffer::new(min_size),
672
2.42k
            constants: core::marker::PhantomData,
673
2.42k
        }
674
2.42k
    }
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::new
Line
Count
Source
659
2.33k
    fn new(min_size: u8) -> Self {
660
2.33k
        DecodeState {
661
2.33k
            min_size,
662
2.33k
            table: Table::new(),
663
2.33k
            buffer: Buffer::new(),
664
2.33k
            last: None,
665
2.33k
            clear_code: 1 << min_size,
666
2.33k
            end_code: (1 << min_size) + 1,
667
2.33k
            next_code: (1 << min_size) + 2,
668
2.33k
            has_ended: false,
669
2.33k
            is_tiff: false,
670
2.33k
            implicit_reset: true,
671
2.33k
            code_buffer: CodeBuffer::new(min_size),
672
2.33k
            constants: core::marker::PhantomData,
673
2.33k
        }
674
2.33k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::new
675
676
14.7k
    fn init_tables(&mut self) {
677
14.7k
        self.code_buffer.reset(self.min_size);
678
14.7k
        self.next_code = (1 << self.min_size) + 2;
679
14.7k
        self.table.init(self.min_size);
680
14.7k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::init_tables
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::init_tables
Line
Count
Source
676
11.9k
    fn init_tables(&mut self) {
677
11.9k
        self.code_buffer.reset(self.min_size);
678
11.9k
        self.next_code = (1 << self.min_size) + 2;
679
11.9k
        self.table.init(self.min_size);
680
11.9k
    }
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::init_tables
Line
Count
Source
676
2.74k
    fn init_tables(&mut self) {
677
2.74k
        self.code_buffer.reset(self.min_size);
678
2.74k
        self.next_code = (1 << self.min_size) + 2;
679
2.74k
        self.table.init(self.min_size);
680
2.74k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::init_tables
681
682
68.1k
    fn reset_tables(&mut self) {
683
68.1k
        self.code_buffer.reset(self.min_size);
684
68.1k
        self.next_code = (1 << self.min_size) + 2;
685
68.1k
        self.table.clear(self.min_size);
686
68.1k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::reset_tables
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::reset_tables
Line
Count
Source
682
41.0k
    fn reset_tables(&mut self) {
683
41.0k
        self.code_buffer.reset(self.min_size);
684
41.0k
        self.next_code = (1 << self.min_size) + 2;
685
41.0k
        self.table.clear(self.min_size);
686
41.0k
    }
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::reset_tables
Line
Count
Source
682
27.1k
    fn reset_tables(&mut self) {
683
27.1k
        self.code_buffer.reset(self.min_size);
684
27.1k
        self.next_code = (1 << self.min_size) + 2;
685
27.1k
        self.table.clear(self.min_size);
686
27.1k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::reset_tables
687
}
688
689
impl<C: CodeBuffer, CgC: CodegenConstants> Stateful for DecodeState<C, CgC> {
690
559k
    fn has_ended(&self) -> bool {
691
559k
        self.has_ended
692
559k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::has_ended
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::has_ended
Line
Count
Source
690
559k
    fn has_ended(&self) -> bool {
691
559k
        self.has_ended
692
559k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::has_ended
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::has_ended
693
694
0
    fn restart(&mut self) {
695
0
        self.has_ended = false;
696
0
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::restart
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::restart
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::restart
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::restart
697
698
0
    fn reset(&mut self) {
699
0
        self.table.init(self.min_size);
700
0
        self.next_code = (1 << self.min_size) + 2;
701
0
        self.buffer.read_mark = 0;
702
0
        self.buffer.write_mark = 0;
703
0
        self.last = None;
704
0
        self.restart();
705
0
        self.code_buffer = CodeBuffer::new(self.min_size);
706
0
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::reset
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::reset
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::reset
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::reset
707
708
722k
    fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
709
        // Skip everything if there is nothing to do.
710
722k
        if self.has_ended {
711
79
            return BufferResult {
712
79
                consumed_in: 0,
713
79
                consumed_out: 0,
714
79
                status: Ok(LzwStatus::Done),
715
79
            };
716
722k
        }
717
718
        // Rough description:
719
        // We will fill the output slice as much as possible until either there is no more symbols
720
        // to decode or an end code has been reached. This requires an internal buffer to hold a
721
        // potential tail of the word corresponding to the last symbol. This tail will then be
722
        // decoded first before continuing with the regular decoding. The same buffer is required
723
        // to persist some symbol state across calls.
724
        //
725
        // We store the words corresponding to code symbols in an index chain, bytewise, where we
726
        // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain
727
        // is traversed for each symbol when it is decoded and bytes are placed directly into the
728
        // output slice. In the special case (new_code == next_code) we use an existing decoded
729
        // version that is present in either the out bytes of this call or in buffer to copy the
730
        // repeated prefix slice.
731
        // TODO: I played with a 'decoding cache' to remember the position of long symbols and
732
        // avoid traversing the chain, doing a copy of memory instead. It did however not lead to
733
        // a serious improvement. It's just unlikely to both have a long symbol and have that
734
        // repeated twice in the same output buffer.
735
        //
736
        // You will also find the (to my knowledge novel) concept of a _decoding burst_ which
737
        // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order
738
        // execution as much as possible and for this reason have the least possible stress on
739
        // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but
740
        // only for re-used codes, not novel ones. This lookahead also makes the loop termination
741
        // when restoring each byte of the code word perfectly predictable! So a burst is a chunk
742
        // of code words which are all independent of each other, have known lengths _and_ are
743
        // guaranteed to fit into the out slice without requiring a buffer. One burst can be
744
        // decoded in an extremely tight loop.
745
        //
746
        // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid
747
        // that intermediate buffer at the expense of not always filling the output buffer
748
        // completely. Alternatively we might follow its chain of precursor states twice. This may
749
        // be even cheaper if we store more than one byte per link so it really should be
750
        // evaluated.
751
        // TODO: if the caller was required to provide the previous last word we could also avoid
752
        // the buffer for cases where we need it to restore the next code! This could be built
753
        // backwards compatible by only doing it after an opt-in call that enables the behaviour.
754
755
        // Record initial lengths for the result that is returned.
756
722k
        let o_in = inp.len();
757
722k
        let o_out = out.len();
758
759
        // The code_link is the previously decoded symbol.
760
        // It's used to link the new code back to its predecessor.
761
722k
        let mut code_link = None;
762
        // The status, which is written to on an invalid code.
763
722k
        let mut status = Ok(LzwStatus::Ok);
764
765
722k
        match self.last.take() {
766
            // No last state? This is the first code after a reset?
767
            None => {
768
85.0k
                match self.next_symbol(&mut inp) {
769
                    // Plainly invalid code.
770
84.5k
                    Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode),
771
                    // next_code would require an actual predecessor.
772
84.5k
                    Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode),
773
                    // No more symbols available and nothing decoded yet.
774
                    // Assume that we didn't make progress, this may get reset to Done if we read
775
                    // some bytes from the input.
776
407
                    None => status = Ok(LzwStatus::NoProgress),
777
                    // Handle a valid code.
778
84.5k
                    Some(init_code) => {
779
84.5k
                        if init_code == self.clear_code {
780
12.2k
                            self.init_tables();
781
72.2k
                        } else if init_code == self.end_code {
782
34
                            self.has_ended = true;
783
34
                            status = Ok(LzwStatus::Done);
784
72.2k
                        } else if self.table.is_empty() {
785
2.41k
                            if self.implicit_reset {
786
2.41k
                                self.init_tables();
787
2.41k
788
2.41k
                                self.buffer.fill_reconstruct(&self.table, init_code);
789
2.41k
                                let link = self.table.at(init_code).clone();
790
2.41k
                                code_link = Some(DerivationBase {
791
2.41k
                                    code: init_code,
792
2.41k
                                    first: link.first,
793
2.41k
                                });
794
2.41k
                            } else {
795
0
                                // We require an explicit reset.
796
0
                                status = Err(LzwError::InvalidCode);
797
0
                            }
798
69.8k
                        } else {
799
69.8k
                            // Reconstruct the first code in the buffer.
800
69.8k
                            self.buffer.fill_reconstruct(&self.table, init_code);
801
69.8k
                            let link = self.table.at(init_code).clone();
802
69.8k
                            code_link = Some(DerivationBase {
803
69.8k
                                code: init_code,
804
69.8k
                                first: link.first,
805
69.8k
                            });
806
69.8k
                        }
807
                    }
808
                }
809
            }
810
            // Move the tracking state to the stack.
811
637k
            Some(tup) => code_link = Some(tup),
812
        };
813
814
        // Track an empty `burst` (see below) means we made no progress.
815
722k
        let mut have_yet_to_decode_data = false;
816
817
        // Restore the previous state, if any.
818
722k
        if code_link.is_some() {
819
709k
            let remain = self.buffer.buffer();
820
            // Check if we can fully finish the buffer.
821
709k
            if remain.len() > out.len() {
822
287k
                if out.is_empty() {
823
0
                    // This also implies the buffer is _not_ empty and we will not enter any
824
0
                    // decoding loop.
825
0
                    status = Ok(LzwStatus::NoProgress);
826
287k
                } else {
827
287k
                    out.copy_from_slice(&remain[..out.len()]);
828
287k
                    self.buffer.consume(out.len());
829
287k
                    out = &mut [];
830
287k
                }
831
422k
            } else if remain.is_empty() {
832
259k
                status = Ok(LzwStatus::NoProgress);
833
259k
                have_yet_to_decode_data = true;
834
259k
            } else {
835
163k
                let consumed = remain.len();
836
163k
                out[..consumed].copy_from_slice(remain);
837
163k
                self.buffer.consume(consumed);
838
163k
                out = &mut out[consumed..];
839
163k
                have_yet_to_decode_data = false;
840
163k
            }
841
12.7k
        }
842
843
        // A special reference to out slice which holds the last decoded symbol.
844
722k
        let mut last_decoded: Option<&[u8]> = None;
845
846
722k
        if self.buffer.buffer().is_empty() {
847
            // Hot loop that writes data to the output as long as we can do so directly from the
848
            // input stream. As an invariant of this block we did not need to use the buffer to
849
            // store a decoded code word. Testing the condition ahead of time avoids a test in the
850
            // loop body since every code path where the buffer is filled already breaks.
851
            //
852
            // In a previous iteration of the code we trusted compiler optimization to work this
853
            // out but it seems that it does not. Another edit hidden behind some performance work
854
            // then edited out the check, inadvertently changing the behavior for callers that
855
            // relied on being able to provide an empty output buffer and still receiving a useful
856
            // signal about the state of the stream.
857
858
            // A burst is a sequence of code words that are independently decoded, i.e. they do not
859
            // change the state of the decoder in ways that would influence the interpretation of
860
            // each other. That is: they are not special symbols, they do not make us increase the
861
            // code size, they are each codes already in the tree before the burst.
862
            //
863
            // The tracking state for a burst. These are actually initialized later but compiler
864
            // wasn't smart enough to fully optimize out the init code so that appears outside the
865
            // loop.
866
435k
            let mut burst = [0; BURST];
867
435k
            let mut burst_byte_len = [0u16; BURST];
868
435k
            let mut burst_byte = [0u8; BURST];
869
435k
            let mut target: [&mut [u8]; BURST] = Default::default();
870
871
            loop {
872
                // In particular, we *also* break if the output buffer is still empty. Especially
873
                // when the output parameter was an empty slice, we must try to fetch at least one
874
                // code but with YIELD_ON_FULL we do not.
875
4.46M
                if CgC::YIELD_ON_FULL && out.is_empty() {
876
1.05k
                    break;
877
4.46M
                }
878
879
4.46M
                let mut deriv = match code_link.take() {
880
4.44M
                    Some(link) => link,
881
                    None => {
882
                        // TODO: we do not need to break here. This does not indicate that the buffer
883
                        // has been filled, rather it indicates we have reset the state. The next code
884
                        // should be part of the initial alphabet. However the first code is special in
885
                        // the sense of not creating a new code itself. This is handled correctly in
886
                        // the initialization prior to the loop; and in particular that handling as
887
                        // written currently relies on putting it into the buffer; so handling it we
888
                        // would need to ensure that either the buffer is fully cleared after its use,
889
                        // or use another implementation of handling that first code.
890
12.7k
                        break;
891
                    }
892
                };
893
894
                // Ensure the code buffer is full, we're about to request some codes.
895
                // Note that this also ensures at least one code is in the buffer if any input is left.
896
4.44M
                self.refill_bits(&mut inp);
897
4.44M
                let cnt = self.code_buffer.peek_bits(&mut burst);
898
899
                // No code left in the buffer, and no more bytes to refill the buffer.
900
4.44M
                if cnt == 0 {
901
237k
                    if have_yet_to_decode_data {
902
64.1k
                        status = Ok(LzwStatus::NoProgress);
903
172k
                    }
904
905
237k
                    code_link = Some(deriv);
906
237k
                    break;
907
4.21M
                }
908
909
4.21M
                debug_assert!(
910
                    // When the table is full, we have a max code above the size switch.
911
0
                    self.table.inner.len() >= MAX_ENTRIES - usize::from(self.is_tiff)
912
                    // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the
913
                    // computed next_code is 4 which already exceeds the bit width from the start.
914
                    // Then we will immediately switch code size after this code.
915
                    //
916
                    // TODO: this is the reason for some saturating and non-sharp comparisons in
917
                    // the code below. Maybe it makes sense to revisit turning this into a compile
918
                    // time choice?
919
0
                    || (self.code_buffer.code_size() == 1 && self.next_code < 4)
920
0
                    || (self.code_buffer.code_size() == 2 && self.next_code == 4)
921
0
                    || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code,
922
0
                    "Table: {}, code_size: {}, next_code: {}, table_condition: {}",
923
0
                    self.table.is_full(),
924
0
                    self.code_buffer.code_size(),
925
                    self.next_code,
926
0
                    self.code_buffer.max_code() - Code::from(self.is_tiff),
927
                );
928
929
4.21M
                let mut burst_size = 0;
930
4.21M
                let left_before_size_switch = (self.code_buffer.max_code()
931
4.21M
                    - Code::from(self.is_tiff))
932
4.21M
                .saturating_sub(self.next_code);
933
934
                // A burst is a sequence of decodes that are completely independent of each other. This
935
                // is the case if neither is an end code, a clear code, or a next code, i.e. we have
936
                // all of them in the decoding table and thus known their depths, and additionally if
937
                // we can decode them directly into the output buffer.
938
8.26M
                for b in &burst[..cnt] {
939
                    // We can commit the previous burst code, and will take a slice from the output
940
                    // buffer. This also avoids the bounds check in the tight loop later.
941
8.26M
                    if burst_size > 0 {
942
4.05M
                        let len = burst_byte_len[burst_size - 1];
943
4.05M
                        let (into, tail) = out.split_at_mut(usize::from(len));
944
4.05M
                        target[burst_size - 1] = into;
945
4.05M
                        out = tail;
946
4.21M
                    }
947
948
                    // Check that we don't overflow the code size with all codes we burst decode.
949
8.26M
                    burst_size += 1;
950
951
8.26M
                    if burst_size > usize::from(left_before_size_switch) {
952
2.85M
                        break;
953
5.40M
                    }
954
955
5.40M
                    let read_code = *b;
956
957
                    // A burst code can't be special.
958
5.40M
                    if read_code == self.clear_code
959
5.34M
                        || read_code == self.end_code
960
5.34M
                        || read_code >= self.next_code
961
                    {
962
279k
                        break;
963
5.12M
                    }
964
965
                    // Read the code length and check that we can decode directly into the out slice.
966
5.12M
                    let len = self.table.depths[usize::from(read_code)];
967
968
5.12M
                    if out.len() < usize::from(len) {
969
35.9k
                        break;
970
5.08M
                    }
971
972
                    // We do exactly one more code (the one being inspected in the current iteration)
973
                    // after the 'burst'. When we want to break decoding precisely on the supplied
974
                    // buffer, we check if this is the last code to be decoded into it.
975
5.08M
                    if CgC::YIELD_ON_FULL {
976
2.72M
                        if out.len() == usize::from(len) {
977
17.0k
                            break;
978
2.70M
                        }
979
2.36M
                    }
980
981
5.07M
                    burst_byte_len[burst_size - 1] = len;
982
                }
983
984
4.21M
                self.code_buffer.consume_bits(burst_size as u8);
985
4.21M
                have_yet_to_decode_data = false;
986
987
                // Note that the very last code in the burst buffer doesn't actually belong to the
988
                // burst itself. TODO: sometimes it could, we just don't differentiate between the
989
                // breaks and a loop end condition above. That may be a speed advantage?
990
4.21M
                let (&new_code, burst) = burst[..burst_size].split_last().unwrap();
991
992
                // The very tight loop for restoring the actual burst. These can be reconstructed in
993
                // parallel since none of them depend on a prior constructed. Only the derivation of
994
                // new codes is not parallel. There are no size changes here either.
995
4.21M
                let burst_targets = &mut target[..burst_size - 1];
996
997
4.21M
                if !self.table.is_full() {
998
1.41M
                    self.next_code += burst_targets.len() as u16;
999
2.79M
                }
1000
1001
4.05M
                for ((&burst, target), byte) in
1002
4.21M
                    burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte)
1003
4.05M
                {
1004
4.05M
                    *byte = self.table.reconstruct(burst, target);
1005
4.05M
                }
1006
1007
4.21M
                self.table.derive_burst(&mut deriv, burst, &burst_byte[..]);
1008
1009
                // Now handle the special codes.
1010
4.21M
                if new_code == self.clear_code {
1011
68.1k
                    self.reset_tables();
1012
68.1k
                    last_decoded = None;
1013
                    // Restarts in the next call to the entry point.
1014
68.1k
                    break;
1015
4.14M
                }
1016
1017
4.14M
                if new_code == self.end_code {
1018
144
                    self.has_ended = true;
1019
144
                    status = Ok(LzwStatus::Done);
1020
144
                    last_decoded = None;
1021
144
                    break;
1022
4.14M
                }
1023
1024
4.14M
                if new_code > self.next_code {
1025
921
                    status = Err(LzwError::InvalidCode);
1026
921
                    last_decoded = None;
1027
921
                    break;
1028
4.14M
                }
1029
1030
4.14M
                let required_len = if new_code == self.next_code {
1031
185k
                    self.table.depths[usize::from(deriv.code)] + 1
1032
                } else {
1033
3.95M
                    self.table.depths[usize::from(new_code)]
1034
                };
1035
1036
                // We need the decoded data of the new code if it is the `next_code`. This is the
1037
                // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In
1038
                // all other cases we only need the first character of the decoded data.
1039
4.14M
                let have_next_code = new_code == self.next_code;
1040
1041
                // Update the slice holding the last decoded word.
1042
4.14M
                if have_next_code {
1043
                    // If we did not have any burst code, we still hold that slice in the buffer.
1044
185k
                    if let Some(new_last) = target[..burst_size - 1].last_mut() {
1045
40.5k
                        let slice = core::mem::replace(new_last, &mut []);
1046
40.5k
                        last_decoded = Some(&*slice);
1047
144k
                    }
1048
3.95M
                }
1049
1050
                let cha;
1051
4.14M
                let is_in_buffer = usize::from(required_len) > out.len();
1052
                // Check if we will need to store our current state into the buffer.
1053
4.14M
                if is_in_buffer {
1054
91.6k
                    if have_next_code {
1055
                        // last_decoded will be Some if we have restored any code into the out slice.
1056
                        // Otherwise it will still be present in the buffer.
1057
49.1k
                        if let Some(last) = last_decoded.take() {
1058
15.7k
                            self.buffer.bytes[..last.len()].copy_from_slice(last);
1059
15.7k
                            self.buffer.write_mark = last.len();
1060
15.7k
                            self.buffer.read_mark = last.len();
1061
33.4k
                        }
1062
1063
49.1k
                        cha = self.buffer.fill_cscsc();
1064
42.5k
                    } else {
1065
42.5k
                        // Restore the decoded word into the buffer.
1066
42.5k
                        last_decoded = None;
1067
42.5k
                        cha = self.buffer.fill_reconstruct(&self.table, new_code);
1068
42.5k
                    }
1069
                } else {
1070
4.05M
                    let (target, tail) = out.split_at_mut(usize::from(required_len));
1071
4.05M
                    out = tail;
1072
1073
4.05M
                    if have_next_code {
1074
                        // Reconstruct high.
1075
136k
                        let source = match last_decoded.take() {
1076
117k
                            Some(last) => last,
1077
18.5k
                            None => &self.buffer.bytes[..self.buffer.write_mark],
1078
                        };
1079
1080
                        // We don't *actually* expect the unwrap to happen. Each source is at least 1
1081
                        // byte long. But llvm doesn't know this (too much indirect loads and cases).
1082
136k
                        cha = source.get(0).map(|x| *x).unwrap_or(0);
1083
136k
                        target[..source.len()].copy_from_slice(source);
1084
136k
                        target[source.len()..][0] = cha;
1085
3.91M
                    } else {
1086
3.91M
                        cha = self.table.reconstruct(new_code, target);
1087
3.91M
                    }
1088
1089
                    // A new decoded word.
1090
4.05M
                    last_decoded = Some(target);
1091
                }
1092
1093
                // Each newly read code creates one new code/link based on the preceding code if we
1094
                // have enough space to put it there.
1095
4.14M
                if !self.table.is_full() {
1096
1.34M
                    self.table.derive(&deriv, cha);
1097
1098
1.34M
                    if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff)
1099
54.2k
                        && self.code_buffer.code_size() < MAX_CODESIZE
1100
53.7k
                    {
1101
53.7k
                        self.bump_code_size();
1102
1.28M
                    }
1103
1104
1.34M
                    self.next_code += 1;
1105
2.79M
                }
1106
1107
                // store the information on the decoded word.
1108
4.14M
                code_link = Some(DerivationBase {
1109
4.14M
                    code: new_code,
1110
4.14M
                    first: cha,
1111
4.14M
                });
1112
1113
                // Can't make any more progress with decoding.
1114
                //
1115
                // We have more data buffered but not enough space to put it? We want fetch a next
1116
                // symbol if possible as in the case of it being a new symbol we can refer to the
1117
                // buffered output as the source for that symbol's meaning and do a memcpy.
1118
                //
1119
                // Since this test is after decoding at least one code, we can now check for an
1120
                // empty buffer and still guarantee progress when one was passed as a parameter.
1121
4.14M
                if is_in_buffer || out.is_empty() {
1122
115k
                    break;
1123
4.02M
                }
1124
            }
1125
287k
        }
1126
1127
        // We need to store the last word into the buffer in case the first code in the next
1128
        // iteration is the next_code.
1129
722k
        if let Some(tail) = last_decoded {
1130
196k
            self.buffer.bytes[..tail.len()].copy_from_slice(tail);
1131
196k
            self.buffer.write_mark = tail.len();
1132
196k
            // Mark the full buffer as having been consumed.
1133
196k
            self.buffer.read_mark = tail.len();
1134
526k
        }
1135
1136
        // Ensure we don't indicate that no progress was made if we read some bytes from the input
1137
        // (which is progress).
1138
722k
        if o_in > inp.len() {
1139
415k
            if let Ok(LzwStatus::NoProgress) = status {
1140
258k
                status = Ok(LzwStatus::Ok);
1141
258k
            }
1142
305k
        }
1143
1144
        // Store the code/link state.
1145
722k
        self.last = code_link;
1146
1147
722k
        BufferResult {
1148
722k
            consumed_in: o_in.wrapping_sub(inp.len()),
1149
722k
            consumed_out: o_out.wrapping_sub(out.len()),
1150
722k
            status,
1151
722k
        }
1152
722k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance
Line
Count
Source
708
558k
    fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
709
        // Skip everything if there is nothing to do.
710
558k
        if self.has_ended {
711
7
            return BufferResult {
712
7
                consumed_in: 0,
713
7
                consumed_out: 0,
714
7
                status: Ok(LzwStatus::Done),
715
7
            };
716
558k
        }
717
718
        // Rough description:
719
        // We will fill the output slice as much as possible until either there is no more symbols
720
        // to decode or an end code has been reached. This requires an internal buffer to hold a
721
        // potential tail of the word corresponding to the last symbol. This tail will then be
722
        // decoded first before continuing with the regular decoding. The same buffer is required
723
        // to persist some symbol state across calls.
724
        //
725
        // We store the words corresponding to code symbols in an index chain, bytewise, where we
726
        // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain
727
        // is traversed for each symbol when it is decoded and bytes are placed directly into the
728
        // output slice. In the special case (new_code == next_code) we use an existing decoded
729
        // version that is present in either the out bytes of this call or in buffer to copy the
730
        // repeated prefix slice.
731
        // TODO: I played with a 'decoding cache' to remember the position of long symbols and
732
        // avoid traversing the chain, doing a copy of memory instead. It did however not lead to
733
        // a serious improvement. It's just unlikely to both have a long symbol and have that
734
        // repeated twice in the same output buffer.
735
        //
736
        // You will also find the (to my knowledge novel) concept of a _decoding burst_ which
737
        // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order
738
        // execution as much as possible and for this reason have the least possible stress on
739
        // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but
740
        // only for re-used codes, not novel ones. This lookahead also makes the loop termination
741
        // when restoring each byte of the code word perfectly predictable! So a burst is a chunk
742
        // of code words which are all independent of each other, have known lengths _and_ are
743
        // guaranteed to fit into the out slice without requiring a buffer. One burst can be
744
        // decoded in an extremely tight loop.
745
        //
746
        // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid
747
        // that intermediate buffer at the expense of not always filling the output buffer
748
        // completely. Alternatively we might follow its chain of precursor states twice. This may
749
        // be even cheaper if we store more than one byte per link so it really should be
750
        // evaluated.
751
        // TODO: if the caller was required to provide the previous last word we could also avoid
752
        // the buffer for cases where we need it to restore the next code! This could be built
753
        // backwards compatible by only doing it after an opt-in call that enables the behaviour.
754
755
        // Record initial lengths for the result that is returned.
756
558k
        let o_in = inp.len();
757
558k
        let o_out = out.len();
758
759
        // The code_link is the previously decoded symbol.
760
        // It's used to link the new code back to its predecessor.
761
558k
        let mut code_link = None;
762
        // The status, which is written to on an invalid code.
763
558k
        let mut status = Ok(LzwStatus::Ok);
764
765
558k
        match self.last.take() {
766
            // No last state? This is the first code after a reset?
767
            None => {
768
53.6k
                match self.next_symbol(&mut inp) {
769
                    // Plainly invalid code.
770
53.2k
                    Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode),
771
                    // next_code would require an actual predecessor.
772
53.2k
                    Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode),
773
                    // No more symbols available and nothing decoded yet.
774
                    // Assume that we didn't make progress, this may get reset to Done if we read
775
                    // some bytes from the input.
776
334
                    None => status = Ok(LzwStatus::NoProgress),
777
                    // Handle a valid code.
778
53.2k
                    Some(init_code) => {
779
53.2k
                        if init_code == self.clear_code {
780
10.3k
                            self.init_tables();
781
42.9k
                        } else if init_code == self.end_code {
782
29
                            self.has_ended = true;
783
29
                            status = Ok(LzwStatus::Done);
784
42.8k
                        } else if self.table.is_empty() {
785
1.61k
                            if self.implicit_reset {
786
1.61k
                                self.init_tables();
787
1.61k
788
1.61k
                                self.buffer.fill_reconstruct(&self.table, init_code);
789
1.61k
                                let link = self.table.at(init_code).clone();
790
1.61k
                                code_link = Some(DerivationBase {
791
1.61k
                                    code: init_code,
792
1.61k
                                    first: link.first,
793
1.61k
                                });
794
1.61k
                            } else {
795
0
                                // We require an explicit reset.
796
0
                                status = Err(LzwError::InvalidCode);
797
0
                            }
798
41.2k
                        } else {
799
41.2k
                            // Reconstruct the first code in the buffer.
800
41.2k
                            self.buffer.fill_reconstruct(&self.table, init_code);
801
41.2k
                            let link = self.table.at(init_code).clone();
802
41.2k
                            code_link = Some(DerivationBase {
803
41.2k
                                code: init_code,
804
41.2k
                                first: link.first,
805
41.2k
                            });
806
41.2k
                        }
807
                    }
808
                }
809
            }
810
            // Move the tracking state to the stack.
811
505k
            Some(tup) => code_link = Some(tup),
812
        };
813
814
        // Track an empty `burst` (see below) means we made no progress.
815
558k
        let mut have_yet_to_decode_data = false;
816
817
        // Restore the previous state, if any.
818
558k
        if code_link.is_some() {
819
548k
            let remain = self.buffer.buffer();
820
            // Check if we can fully finish the buffer.
821
548k
            if remain.len() > out.len() {
822
224k
                if out.is_empty() {
823
0
                    // This also implies the buffer is _not_ empty and we will not enter any
824
0
                    // decoding loop.
825
0
                    status = Ok(LzwStatus::NoProgress);
826
224k
                } else {
827
224k
                    out.copy_from_slice(&remain[..out.len()]);
828
224k
                    self.buffer.consume(out.len());
829
224k
                    out = &mut [];
830
224k
                }
831
323k
            } else if remain.is_empty() {
832
241k
                status = Ok(LzwStatus::NoProgress);
833
241k
                have_yet_to_decode_data = true;
834
241k
            } else {
835
82.1k
                let consumed = remain.len();
836
82.1k
                out[..consumed].copy_from_slice(remain);
837
82.1k
                self.buffer.consume(consumed);
838
82.1k
                out = &mut out[consumed..];
839
82.1k
                have_yet_to_decode_data = false;
840
82.1k
            }
841
10.7k
        }
842
843
        // A special reference to out slice which holds the last decoded symbol.
844
558k
        let mut last_decoded: Option<&[u8]> = None;
845
846
558k
        if self.buffer.buffer().is_empty() {
847
            // Hot loop that writes data to the output as long as we can do so directly from the
848
            // input stream. As an invariant of this block we did not need to use the buffer to
849
            // store a decoded code word. Testing the condition ahead of time avoids a test in the
850
            // loop body since every code path where the buffer is filled already breaks.
851
            //
852
            // In a previous iteration of the code we trusted compiler optimization to work this
853
            // out but it seems that it does not. Another edit hidden behind some performance work
854
            // then edited out the check, inadvertently changing the behavior for callers that
855
            // relied on being able to provide an empty output buffer and still receiving a useful
856
            // signal about the state of the stream.
857
858
            // A burst is a sequence of code words that are independently decoded, i.e. they do not
859
            // change the state of the decoder in ways that would influence the interpretation of
860
            // each other. That is: they are not special symbols, they do not make us increase the
861
            // code size, they are each codes already in the tree before the burst.
862
            //
863
            // The tracking state for a burst. These are actually initialized later but compiler
864
            // wasn't smart enough to fully optimize out the init code so that appears outside the
865
            // loop.
866
333k
            let mut burst = [0; BURST];
867
333k
            let mut burst_byte_len = [0u16; BURST];
868
333k
            let mut burst_byte = [0u8; BURST];
869
333k
            let mut target: [&mut [u8]; BURST] = Default::default();
870
871
            loop {
872
                // In particular, we *also* break if the output buffer is still empty. Especially
873
                // when the output parameter was an empty slice, we must try to fetch at least one
874
                // code but with YIELD_ON_FULL we do not.
875
1.37M
                if CgC::YIELD_ON_FULL && out.is_empty() {
876
0
                    break;
877
1.37M
                }
878
879
1.37M
                let mut deriv = match code_link.take() {
880
1.35M
                    Some(link) => link,
881
                    None => {
882
                        // TODO: we do not need to break here. This does not indicate that the buffer
883
                        // has been filled, rather it indicates we have reset the state. The next code
884
                        // should be part of the initial alphabet. However the first code is special in
885
                        // the sense of not creating a new code itself. This is handled correctly in
886
                        // the initialization prior to the loop; and in particular that handling as
887
                        // written currently relies on putting it into the buffer; so handling it we
888
                        // would need to ensure that either the buffer is fully cleared after its use,
889
                        // or use another implementation of handling that first code.
890
10.7k
                        break;
891
                    }
892
                };
893
894
                // Ensure the code buffer is full, we're about to request some codes.
895
                // Note that this also ensures at least one code is in the buffer if any input is left.
896
1.35M
                self.refill_bits(&mut inp);
897
1.35M
                let cnt = self.code_buffer.peek_bits(&mut burst);
898
899
                // No code left in the buffer, and no more bytes to refill the buffer.
900
1.35M
                if cnt == 0 {
901
236k
                    if have_yet_to_decode_data {
902
63.8k
                        status = Ok(LzwStatus::NoProgress);
903
172k
                    }
904
905
236k
                    code_link = Some(deriv);
906
236k
                    break;
907
1.12M
                }
908
909
1.12M
                debug_assert!(
910
                    // When the table is full, we have a max code above the size switch.
911
0
                    self.table.inner.len() >= MAX_ENTRIES - usize::from(self.is_tiff)
912
                    // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the
913
                    // computed next_code is 4 which already exceeds the bit width from the start.
914
                    // Then we will immediately switch code size after this code.
915
                    //
916
                    // TODO: this is the reason for some saturating and non-sharp comparisons in
917
                    // the code below. Maybe it makes sense to revisit turning this into a compile
918
                    // time choice?
919
0
                    || (self.code_buffer.code_size() == 1 && self.next_code < 4)
920
0
                    || (self.code_buffer.code_size() == 2 && self.next_code == 4)
921
0
                    || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code,
922
0
                    "Table: {}, code_size: {}, next_code: {}, table_condition: {}",
923
0
                    self.table.is_full(),
924
0
                    self.code_buffer.code_size(),
925
                    self.next_code,
926
0
                    self.code_buffer.max_code() - Code::from(self.is_tiff),
927
                );
928
929
1.12M
                let mut burst_size = 0;
930
1.12M
                let left_before_size_switch = (self.code_buffer.max_code()
931
1.12M
                    - Code::from(self.is_tiff))
932
1.12M
                .saturating_sub(self.next_code);
933
934
                // A burst is a sequence of decodes that are completely independent of each other. This
935
                // is the case if neither is an end code, a clear code, or a next code, i.e. we have
936
                // all of them in the decoding table and thus known their depths, and additionally if
937
                // we can decode them directly into the output buffer.
938
2.93M
                for b in &burst[..cnt] {
939
                    // We can commit the previous burst code, and will take a slice from the output
940
                    // buffer. This also avoids the bounds check in the tight loop later.
941
2.93M
                    if burst_size > 0 {
942
1.81M
                        let len = burst_byte_len[burst_size - 1];
943
1.81M
                        let (into, tail) = out.split_at_mut(usize::from(len));
944
1.81M
                        target[burst_size - 1] = into;
945
1.81M
                        out = tail;
946
1.81M
                    }
947
948
                    // Check that we don't overflow the code size with all codes we burst decode.
949
2.93M
                    burst_size += 1;
950
951
2.93M
                    if burst_size > usize::from(left_before_size_switch) {
952
417k
                        break;
953
2.51M
                    }
954
955
2.51M
                    let read_code = *b;
956
957
                    // A burst code can't be special.
958
2.51M
                    if read_code == self.clear_code
959
2.48M
                        || read_code == self.end_code
960
2.48M
                        || read_code >= self.next_code
961
                    {
962
127k
                        break;
963
2.38M
                    }
964
965
                    // Read the code length and check that we can decode directly into the out slice.
966
2.38M
                    let len = self.table.depths[usize::from(read_code)];
967
968
2.38M
                    if out.len() < usize::from(len) {
969
25.3k
                        break;
970
2.36M
                    }
971
972
                    // We do exactly one more code (the one being inspected in the current iteration)
973
                    // after the 'burst'. When we want to break decoding precisely on the supplied
974
                    // buffer, we check if this is the last code to be decoded into it.
975
2.36M
                    if CgC::YIELD_ON_FULL {
976
0
                        if out.len() == usize::from(len) {
977
0
                            break;
978
0
                        }
979
2.36M
                    }
980
981
2.36M
                    burst_byte_len[burst_size - 1] = len;
982
                }
983
984
1.12M
                self.code_buffer.consume_bits(burst_size as u8);
985
1.12M
                have_yet_to_decode_data = false;
986
987
                // Note that the very last code in the burst buffer doesn't actually belong to the
988
                // burst itself. TODO: sometimes it could, we just don't differentiate between the
989
                // breaks and a loop end condition above. That may be a speed advantage?
990
1.12M
                let (&new_code, burst) = burst[..burst_size].split_last().unwrap();
991
992
                // The very tight loop for restoring the actual burst. These can be reconstructed in
993
                // parallel since none of them depend on a prior constructed. Only the derivation of
994
                // new codes is not parallel. There are no size changes here either.
995
1.12M
                let burst_targets = &mut target[..burst_size - 1];
996
997
1.12M
                if !self.table.is_full() {
998
759k
                    self.next_code += burst_targets.len() as u16;
999
759k
                }
1000
1001
1.81M
                for ((&burst, target), byte) in
1002
1.12M
                    burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte)
1003
1.81M
                {
1004
1.81M
                    *byte = self.table.reconstruct(burst, target);
1005
1.81M
                }
1006
1007
1.12M
                self.table.derive_burst(&mut deriv, burst, &burst_byte[..]);
1008
1009
                // Now handle the special codes.
1010
1.12M
                if new_code == self.clear_code {
1011
41.0k
                    self.reset_tables();
1012
41.0k
                    last_decoded = None;
1013
                    // Restarts in the next call to the entry point.
1014
41.0k
                    break;
1015
1.08M
                }
1016
1017
1.08M
                if new_code == self.end_code {
1018
78
                    self.has_ended = true;
1019
78
                    status = Ok(LzwStatus::Done);
1020
78
                    last_decoded = None;
1021
78
                    break;
1022
1.08M
                }
1023
1024
1.08M
                if new_code > self.next_code {
1025
351
                    status = Err(LzwError::InvalidCode);
1026
351
                    last_decoded = None;
1027
351
                    break;
1028
1.08M
                }
1029
1030
1.08M
                let required_len = if new_code == self.next_code {
1031
78.3k
                    self.table.depths[usize::from(deriv.code)] + 1
1032
                } else {
1033
1.00M
                    self.table.depths[usize::from(new_code)]
1034
                };
1035
1036
                // We need the decoded data of the new code if it is the `next_code`. This is the
1037
                // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In
1038
                // all other cases we only need the first character of the decoded data.
1039
1.08M
                let have_next_code = new_code == self.next_code;
1040
1041
                // Update the slice holding the last decoded word.
1042
1.08M
                if have_next_code {
1043
                    // If we did not have any burst code, we still hold that slice in the buffer.
1044
78.3k
                    if let Some(new_last) = target[..burst_size - 1].last_mut() {
1045
20.0k
                        let slice = core::mem::replace(new_last, &mut []);
1046
20.0k
                        last_decoded = Some(&*slice);
1047
58.3k
                    }
1048
1.00M
                }
1049
1050
                let cha;
1051
1.08M
                let is_in_buffer = usize::from(required_len) > out.len();
1052
                // Check if we will need to store our current state into the buffer.
1053
1.08M
                if is_in_buffer {
1054
39.5k
                    if have_next_code {
1055
                        // last_decoded will be Some if we have restored any code into the out slice.
1056
                        // Otherwise it will still be present in the buffer.
1057
9.55k
                        if let Some(last) = last_decoded.take() {
1058
4.72k
                            self.buffer.bytes[..last.len()].copy_from_slice(last);
1059
4.72k
                            self.buffer.write_mark = last.len();
1060
4.72k
                            self.buffer.read_mark = last.len();
1061
4.82k
                        }
1062
1063
9.55k
                        cha = self.buffer.fill_cscsc();
1064
29.9k
                    } else {
1065
29.9k
                        // Restore the decoded word into the buffer.
1066
29.9k
                        last_decoded = None;
1067
29.9k
                        cha = self.buffer.fill_reconstruct(&self.table, new_code);
1068
29.9k
                    }
1069
                } else {
1070
1.04M
                    let (target, tail) = out.split_at_mut(usize::from(required_len));
1071
1.04M
                    out = tail;
1072
1073
1.04M
                    if have_next_code {
1074
                        // Reconstruct high.
1075
68.8k
                        let source = match last_decoded.take() {
1076
62.4k
                            Some(last) => last,
1077
6.35k
                            None => &self.buffer.bytes[..self.buffer.write_mark],
1078
                        };
1079
1080
                        // We don't *actually* expect the unwrap to happen. Each source is at least 1
1081
                        // byte long. But llvm doesn't know this (too much indirect loads and cases).
1082
68.8k
                        cha = source.get(0).map(|x| *x).unwrap_or(0);
1083
68.8k
                        target[..source.len()].copy_from_slice(source);
1084
68.8k
                        target[source.len()..][0] = cha;
1085
973k
                    } else {
1086
973k
                        cha = self.table.reconstruct(new_code, target);
1087
973k
                    }
1088
1089
                    // A new decoded word.
1090
1.04M
                    last_decoded = Some(target);
1091
                }
1092
1093
                // Each newly read code creates one new code/link based on the preceding code if we
1094
                // have enough space to put it there.
1095
1.08M
                if !self.table.is_full() {
1096
717k
                    self.table.derive(&deriv, cha);
1097
1098
717k
                    if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff)
1099
49.9k
                        && self.code_buffer.code_size() < MAX_CODESIZE
1100
49.7k
                    {
1101
49.7k
                        self.bump_code_size();
1102
668k
                    }
1103
1104
717k
                    self.next_code += 1;
1105
363k
                }
1106
1107
                // store the information on the decoded word.
1108
1.08M
                code_link = Some(DerivationBase {
1109
1.08M
                    code: new_code,
1110
1.08M
                    first: cha,
1111
1.08M
                });
1112
1113
                // Can't make any more progress with decoding.
1114
                //
1115
                // We have more data buffered but not enough space to put it? We want fetch a next
1116
                // symbol if possible as in the case of it being a new symbol we can refer to the
1117
                // buffered output as the source for that symbol's meaning and do a memcpy.
1118
                //
1119
                // Since this test is after decoding at least one code, we can now check for an
1120
                // empty buffer and still guarantee progress when one was passed as a parameter.
1121
1.08M
                if is_in_buffer || out.is_empty() {
1122
45.5k
                    break;
1123
1.03M
                }
1124
            }
1125
224k
        }
1126
1127
        // We need to store the last word into the buffer in case the first code in the next
1128
        // iteration is the next_code.
1129
558k
        if let Some(tail) = last_decoded {
1130
178k
            self.buffer.bytes[..tail.len()].copy_from_slice(tail);
1131
178k
            self.buffer.write_mark = tail.len();
1132
178k
            // Mark the full buffer as having been consumed.
1133
178k
            self.buffer.read_mark = tail.len();
1134
380k
        }
1135
1136
        // Ensure we don't indicate that no progress was made if we read some bytes from the input
1137
        // (which is progress).
1138
558k
        if o_in > inp.len() {
1139
318k
            if let Ok(LzwStatus::NoProgress) = status {
1140
240k
                status = Ok(LzwStatus::Ok);
1141
240k
            }
1142
239k
        }
1143
1144
        // Store the code/link state.
1145
558k
        self.last = code_link;
1146
1147
558k
        BufferResult {
1148
558k
            consumed_in: o_in.wrapping_sub(inp.len()),
1149
558k
            consumed_out: o_out.wrapping_sub(out.len()),
1150
558k
            status,
1151
558k
        }
1152
558k
    }
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance
Line
Count
Source
708
164k
    fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
709
        // Skip everything if there is nothing to do.
710
164k
        if self.has_ended {
711
72
            return BufferResult {
712
72
                consumed_in: 0,
713
72
                consumed_out: 0,
714
72
                status: Ok(LzwStatus::Done),
715
72
            };
716
163k
        }
717
718
        // Rough description:
719
        // We will fill the output slice as much as possible until either there is no more symbols
720
        // to decode or an end code has been reached. This requires an internal buffer to hold a
721
        // potential tail of the word corresponding to the last symbol. This tail will then be
722
        // decoded first before continuing with the regular decoding. The same buffer is required
723
        // to persist some symbol state across calls.
724
        //
725
        // We store the words corresponding to code symbols in an index chain, bytewise, where we
726
        // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain
727
        // is traversed for each symbol when it is decoded and bytes are placed directly into the
728
        // output slice. In the special case (new_code == next_code) we use an existing decoded
729
        // version that is present in either the out bytes of this call or in buffer to copy the
730
        // repeated prefix slice.
731
        // TODO: I played with a 'decoding cache' to remember the position of long symbols and
732
        // avoid traversing the chain, doing a copy of memory instead. It did however not lead to
733
        // a serious improvement. It's just unlikely to both have a long symbol and have that
734
        // repeated twice in the same output buffer.
735
        //
736
        // You will also find the (to my knowledge novel) concept of a _decoding burst_ which
737
        // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order
738
        // execution as much as possible and for this reason have the least possible stress on
739
        // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but
740
        // only for re-used codes, not novel ones. This lookahead also makes the loop termination
741
        // when restoring each byte of the code word perfectly predictable! So a burst is a chunk
742
        // of code words which are all independent of each other, have known lengths _and_ are
743
        // guaranteed to fit into the out slice without requiring a buffer. One burst can be
744
        // decoded in an extremely tight loop.
745
        //
746
        // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid
747
        // that intermediate buffer at the expense of not always filling the output buffer
748
        // completely. Alternatively we might follow its chain of precursor states twice. This may
749
        // be even cheaper if we store more than one byte per link so it really should be
750
        // evaluated.
751
        // TODO: if the caller was required to provide the previous last word we could also avoid
752
        // the buffer for cases where we need it to restore the next code! This could be built
753
        // backwards compatible by only doing it after an opt-in call that enables the behaviour.
754
755
        // Record initial lengths for the result that is returned.
756
163k
        let o_in = inp.len();
757
163k
        let o_out = out.len();
758
759
        // The code_link is the previously decoded symbol.
760
        // It's used to link the new code back to its predecessor.
761
163k
        let mut code_link = None;
762
        // The status, which is written to on an invalid code.
763
163k
        let mut status = Ok(LzwStatus::Ok);
764
765
163k
        match self.last.take() {
766
            // No last state? This is the first code after a reset?
767
            None => {
768
31.3k
                match self.next_symbol(&mut inp) {
769
                    // Plainly invalid code.
770
31.3k
                    Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode),
771
                    // next_code would require an actual predecessor.
772
31.2k
                    Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode),
773
                    // No more symbols available and nothing decoded yet.
774
                    // Assume that we didn't make progress, this may get reset to Done if we read
775
                    // some bytes from the input.
776
73
                    None => status = Ok(LzwStatus::NoProgress),
777
                    // Handle a valid code.
778
31.2k
                    Some(init_code) => {
779
31.2k
                        if init_code == self.clear_code {
780
1.93k
                            self.init_tables();
781
29.3k
                        } else if init_code == self.end_code {
782
5
                            self.has_ended = true;
783
5
                            status = Ok(LzwStatus::Done);
784
29.3k
                        } else if self.table.is_empty() {
785
805
                            if self.implicit_reset {
786
805
                                self.init_tables();
787
805
788
805
                                self.buffer.fill_reconstruct(&self.table, init_code);
789
805
                                let link = self.table.at(init_code).clone();
790
805
                                code_link = Some(DerivationBase {
791
805
                                    code: init_code,
792
805
                                    first: link.first,
793
805
                                });
794
805
                            } else {
795
0
                                // We require an explicit reset.
796
0
                                status = Err(LzwError::InvalidCode);
797
0
                            }
798
28.5k
                        } else {
799
28.5k
                            // Reconstruct the first code in the buffer.
800
28.5k
                            self.buffer.fill_reconstruct(&self.table, init_code);
801
28.5k
                            let link = self.table.at(init_code).clone();
802
28.5k
                            code_link = Some(DerivationBase {
803
28.5k
                                code: init_code,
804
28.5k
                                first: link.first,
805
28.5k
                            });
806
28.5k
                        }
807
                    }
808
                }
809
            }
810
            // Move the tracking state to the stack.
811
132k
            Some(tup) => code_link = Some(tup),
812
        };
813
814
        // Track an empty `burst` (see below) means we made no progress.
815
163k
        let mut have_yet_to_decode_data = false;
816
817
        // Restore the previous state, if any.
818
163k
        if code_link.is_some() {
819
161k
            let remain = self.buffer.buffer();
820
            // Check if we can fully finish the buffer.
821
161k
            if remain.len() > out.len() {
822
62.2k
                if out.is_empty() {
823
0
                    // This also implies the buffer is _not_ empty and we will not enter any
824
0
                    // decoding loop.
825
0
                    status = Ok(LzwStatus::NoProgress);
826
62.2k
                } else {
827
62.2k
                    out.copy_from_slice(&remain[..out.len()]);
828
62.2k
                    self.buffer.consume(out.len());
829
62.2k
                    out = &mut [];
830
62.2k
                }
831
99.6k
            } else if remain.is_empty() {
832
18.3k
                status = Ok(LzwStatus::NoProgress);
833
18.3k
                have_yet_to_decode_data = true;
834
81.3k
            } else {
835
81.3k
                let consumed = remain.len();
836
81.3k
                out[..consumed].copy_from_slice(remain);
837
81.3k
                self.buffer.consume(consumed);
838
81.3k
                out = &mut out[consumed..];
839
81.3k
                have_yet_to_decode_data = false;
840
81.3k
            }
841
2.04k
        }
842
843
        // A special reference to out slice which holds the last decoded symbol.
844
163k
        let mut last_decoded: Option<&[u8]> = None;
845
846
163k
        if self.buffer.buffer().is_empty() {
847
            // Hot loop that writes data to the output as long as we can do so directly from the
848
            // input stream. As an invariant of this block we did not need to use the buffer to
849
            // store a decoded code word. Testing the condition ahead of time avoids a test in the
850
            // loop body since every code path where the buffer is filled already breaks.
851
            //
852
            // In a previous iteration of the code we trusted compiler optimization to work this
853
            // out but it seems that it does not. Another edit hidden behind some performance work
854
            // then edited out the check, inadvertently changing the behavior for callers that
855
            // relied on being able to provide an empty output buffer and still receiving a useful
856
            // signal about the state of the stream.
857
858
            // A burst is a sequence of code words that are independently decoded, i.e. they do not
859
            // change the state of the decoder in ways that would influence the interpretation of
860
            // each other. That is: they are not special symbols, they do not make us increase the
861
            // code size, they are each codes already in the tree before the burst.
862
            //
863
            // The tracking state for a burst. These are actually initialized later but compiler
864
            // wasn't smart enough to fully optimize out the init code so that appears outside the
865
            // loop.
866
101k
            let mut burst = [0; BURST];
867
101k
            let mut burst_byte_len = [0u16; BURST];
868
101k
            let mut burst_byte = [0u8; BURST];
869
101k
            let mut target: [&mut [u8]; BURST] = Default::default();
870
871
            loop {
872
                // In particular, we *also* break if the output buffer is still empty. Especially
873
                // when the output parameter was an empty slice, we must try to fetch at least one
874
                // code but with YIELD_ON_FULL we do not.
875
3.09M
                if CgC::YIELD_ON_FULL && out.is_empty() {
876
1.05k
                    break;
877
3.09M
                }
878
879
3.09M
                let mut deriv = match code_link.take() {
880
3.08M
                    Some(link) => link,
881
                    None => {
882
                        // TODO: we do not need to break here. This does not indicate that the buffer
883
                        // has been filled, rather it indicates we have reset the state. The next code
884
                        // should be part of the initial alphabet. However the first code is special in
885
                        // the sense of not creating a new code itself. This is handled correctly in
886
                        // the initialization prior to the loop; and in particular that handling as
887
                        // written currently relies on putting it into the buffer; so handling it we
888
                        // would need to ensure that either the buffer is fully cleared after its use,
889
                        // or use another implementation of handling that first code.
890
2.04k
                        break;
891
                    }
892
                };
893
894
                // Ensure the code buffer is full, we're about to request some codes.
895
                // Note that this also ensures at least one code is in the buffer if any input is left.
896
3.08M
                self.refill_bits(&mut inp);
897
3.08M
                let cnt = self.code_buffer.peek_bits(&mut burst);
898
899
                // No code left in the buffer, and no more bytes to refill the buffer.
900
3.08M
                if cnt == 0 {
901
889
                    if have_yet_to_decode_data {
902
365
                        status = Ok(LzwStatus::NoProgress);
903
524
                    }
904
905
889
                    code_link = Some(deriv);
906
889
                    break;
907
3.08M
                }
908
909
3.08M
                debug_assert!(
910
                    // When the table is full, we have a max code above the size switch.
911
0
                    self.table.inner.len() >= MAX_ENTRIES - usize::from(self.is_tiff)
912
                    // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the
913
                    // computed next_code is 4 which already exceeds the bit width from the start.
914
                    // Then we will immediately switch code size after this code.
915
                    //
916
                    // TODO: this is the reason for some saturating and non-sharp comparisons in
917
                    // the code below. Maybe it makes sense to revisit turning this into a compile
918
                    // time choice?
919
0
                    || (self.code_buffer.code_size() == 1 && self.next_code < 4)
920
0
                    || (self.code_buffer.code_size() == 2 && self.next_code == 4)
921
0
                    || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code,
922
0
                    "Table: {}, code_size: {}, next_code: {}, table_condition: {}",
923
0
                    self.table.is_full(),
924
0
                    self.code_buffer.code_size(),
925
                    self.next_code,
926
0
                    self.code_buffer.max_code() - Code::from(self.is_tiff),
927
                );
928
929
3.08M
                let mut burst_size = 0;
930
3.08M
                let left_before_size_switch = (self.code_buffer.max_code()
931
3.08M
                    - Code::from(self.is_tiff))
932
3.08M
                .saturating_sub(self.next_code);
933
934
                // A burst is a sequence of decodes that are completely independent of each other. This
935
                // is the case if neither is an end code, a clear code, or a next code, i.e. we have
936
                // all of them in the decoding table and thus known their depths, and additionally if
937
                // we can decode them directly into the output buffer.
938
5.32M
                for b in &burst[..cnt] {
939
                    // We can commit the previous burst code, and will take a slice from the output
940
                    // buffer. This also avoids the bounds check in the tight loop later.
941
5.32M
                    if burst_size > 0 {
942
2.23M
                        let len = burst_byte_len[burst_size - 1];
943
2.23M
                        let (into, tail) = out.split_at_mut(usize::from(len));
944
2.23M
                        target[burst_size - 1] = into;
945
2.23M
                        out = tail;
946
3.08M
                    }
947
948
                    // Check that we don't overflow the code size with all codes we burst decode.
949
5.32M
                    burst_size += 1;
950
951
5.32M
                    if burst_size > usize::from(left_before_size_switch) {
952
2.44M
                        break;
953
2.88M
                    }
954
955
2.88M
                    let read_code = *b;
956
957
                    // A burst code can't be special.
958
2.88M
                    if read_code == self.clear_code
959
2.86M
                        || read_code == self.end_code
960
2.86M
                        || read_code >= self.next_code
961
                    {
962
151k
                        break;
963
2.73M
                    }
964
965
                    // Read the code length and check that we can decode directly into the out slice.
966
2.73M
                    let len = self.table.depths[usize::from(read_code)];
967
968
2.73M
                    if out.len() < usize::from(len) {
969
10.5k
                        break;
970
2.72M
                    }
971
972
                    // We do exactly one more code (the one being inspected in the current iteration)
973
                    // after the 'burst'. When we want to break decoding precisely on the supplied
974
                    // buffer, we check if this is the last code to be decoded into it.
975
2.72M
                    if CgC::YIELD_ON_FULL {
976
2.72M
                        if out.len() == usize::from(len) {
977
17.0k
                            break;
978
2.70M
                        }
979
0
                    }
980
981
2.70M
                    burst_byte_len[burst_size - 1] = len;
982
                }
983
984
3.08M
                self.code_buffer.consume_bits(burst_size as u8);
985
3.08M
                have_yet_to_decode_data = false;
986
987
                // Note that the very last code in the burst buffer doesn't actually belong to the
988
                // burst itself. TODO: sometimes it could, we just don't differentiate between the
989
                // breaks and a loop end condition above. That may be a speed advantage?
990
3.08M
                let (&new_code, burst) = burst[..burst_size].split_last().unwrap();
991
992
                // The very tight loop for restoring the actual burst. These can be reconstructed in
993
                // parallel since none of them depend on a prior constructed. Only the derivation of
994
                // new codes is not parallel. There are no size changes here either.
995
3.08M
                let burst_targets = &mut target[..burst_size - 1];
996
997
3.08M
                if !self.table.is_full() {
998
652k
                    self.next_code += burst_targets.len() as u16;
999
2.43M
                }
1000
1001
2.23M
                for ((&burst, target), byte) in
1002
3.08M
                    burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte)
1003
2.23M
                {
1004
2.23M
                    *byte = self.table.reconstruct(burst, target);
1005
2.23M
                }
1006
1007
3.08M
                self.table.derive_burst(&mut deriv, burst, &burst_byte[..]);
1008
1009
                // Now handle the special codes.
1010
3.08M
                if new_code == self.clear_code {
1011
27.1k
                    self.reset_tables();
1012
27.1k
                    last_decoded = None;
1013
                    // Restarts in the next call to the entry point.
1014
27.1k
                    break;
1015
3.06M
                }
1016
1017
3.06M
                if new_code == self.end_code {
1018
66
                    self.has_ended = true;
1019
66
                    status = Ok(LzwStatus::Done);
1020
66
                    last_decoded = None;
1021
66
                    break;
1022
3.06M
                }
1023
1024
3.06M
                if new_code > self.next_code {
1025
570
                    status = Err(LzwError::InvalidCode);
1026
570
                    last_decoded = None;
1027
570
                    break;
1028
3.06M
                }
1029
1030
3.06M
                let required_len = if new_code == self.next_code {
1031
107k
                    self.table.depths[usize::from(deriv.code)] + 1
1032
                } else {
1033
2.95M
                    self.table.depths[usize::from(new_code)]
1034
                };
1035
1036
                // We need the decoded data of the new code if it is the `next_code`. This is the
1037
                // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In
1038
                // all other cases we only need the first character of the decoded data.
1039
3.06M
                let have_next_code = new_code == self.next_code;
1040
1041
                // Update the slice holding the last decoded word.
1042
3.06M
                if have_next_code {
1043
                    // If we did not have any burst code, we still hold that slice in the buffer.
1044
107k
                    if let Some(new_last) = target[..burst_size - 1].last_mut() {
1045
20.5k
                        let slice = core::mem::replace(new_last, &mut []);
1046
20.5k
                        last_decoded = Some(&*slice);
1047
86.5k
                    }
1048
2.95M
                }
1049
1050
                let cha;
1051
3.06M
                let is_in_buffer = usize::from(required_len) > out.len();
1052
                // Check if we will need to store our current state into the buffer.
1053
3.06M
                if is_in_buffer {
1054
52.1k
                    if have_next_code {
1055
                        // last_decoded will be Some if we have restored any code into the out slice.
1056
                        // Otherwise it will still be present in the buffer.
1057
39.6k
                        if let Some(last) = last_decoded.take() {
1058
10.9k
                            self.buffer.bytes[..last.len()].copy_from_slice(last);
1059
10.9k
                            self.buffer.write_mark = last.len();
1060
10.9k
                            self.buffer.read_mark = last.len();
1061
28.6k
                        }
1062
1063
39.6k
                        cha = self.buffer.fill_cscsc();
1064
12.5k
                    } else {
1065
12.5k
                        // Restore the decoded word into the buffer.
1066
12.5k
                        last_decoded = None;
1067
12.5k
                        cha = self.buffer.fill_reconstruct(&self.table, new_code);
1068
12.5k
                    }
1069
                } else {
1070
3.00M
                    let (target, tail) = out.split_at_mut(usize::from(required_len));
1071
3.00M
                    out = tail;
1072
1073
3.00M
                    if have_next_code {
1074
                        // Reconstruct high.
1075
67.4k
                        let source = match last_decoded.take() {
1076
55.2k
                            Some(last) => last,
1077
12.1k
                            None => &self.buffer.bytes[..self.buffer.write_mark],
1078
                        };
1079
1080
                        // We don't *actually* expect the unwrap to happen. Each source is at least 1
1081
                        // byte long. But llvm doesn't know this (too much indirect loads and cases).
1082
67.4k
                        cha = source.get(0).map(|x| *x).unwrap_or(0);
1083
67.4k
                        target[..source.len()].copy_from_slice(source);
1084
67.4k
                        target[source.len()..][0] = cha;
1085
2.94M
                    } else {
1086
2.94M
                        cha = self.table.reconstruct(new_code, target);
1087
2.94M
                    }
1088
1089
                    // A new decoded word.
1090
3.00M
                    last_decoded = Some(target);
1091
                }
1092
1093
                // Each newly read code creates one new code/link based on the preceding code if we
1094
                // have enough space to put it there.
1095
3.06M
                if !self.table.is_full() {
1096
625k
                    self.table.derive(&deriv, cha);
1097
1098
625k
                    if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff)
1099
4.28k
                        && self.code_buffer.code_size() < MAX_CODESIZE
1100
4.09k
                    {
1101
4.09k
                        self.bump_code_size();
1102
621k
                    }
1103
1104
625k
                    self.next_code += 1;
1105
2.43M
                }
1106
1107
                // store the information on the decoded word.
1108
3.06M
                code_link = Some(DerivationBase {
1109
3.06M
                    code: new_code,
1110
3.06M
                    first: cha,
1111
3.06M
                });
1112
1113
                // Can't make any more progress with decoding.
1114
                //
1115
                // We have more data buffered but not enough space to put it? We want fetch a next
1116
                // symbol if possible as in the case of it being a new symbol we can refer to the
1117
                // buffered output as the source for that symbol's meaning and do a memcpy.
1118
                //
1119
                // Since this test is after decoding at least one code, we can now check for an
1120
                // empty buffer and still guarantee progress when one was passed as a parameter.
1121
3.06M
                if is_in_buffer || out.is_empty() {
1122
70.0k
                    break;
1123
2.99M
                }
1124
            }
1125
62.2k
        }
1126
1127
        // We need to store the last word into the buffer in case the first code in the next
1128
        // iteration is the next_code.
1129
163k
        if let Some(tail) = last_decoded {
1130
18.3k
            self.buffer.bytes[..tail.len()].copy_from_slice(tail);
1131
18.3k
            self.buffer.write_mark = tail.len();
1132
18.3k
            // Mark the full buffer as having been consumed.
1133
18.3k
            self.buffer.read_mark = tail.len();
1134
145k
        }
1135
1136
        // Ensure we don't indicate that no progress was made if we read some bytes from the input
1137
        // (which is progress).
1138
163k
        if o_in > inp.len() {
1139
97.4k
            if let Ok(LzwStatus::NoProgress) = status {
1140
17.7k
                status = Ok(LzwStatus::Ok);
1141
80.2k
            }
1142
65.9k
        }
1143
1144
        // Store the code/link state.
1145
163k
        self.last = code_link;
1146
1147
163k
        BufferResult {
1148
163k
            consumed_in: o_in.wrapping_sub(inp.len()),
1149
163k
            consumed_out: o_out.wrapping_sub(out.len()),
1150
163k
            status,
1151
163k
        }
1152
164k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance
1153
}
1154
1155
impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> {
1156
85.0k
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1157
85.0k
        self.code_buffer.next_symbol(inp)
1158
85.0k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::next_symbol
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::next_symbol
Line
Count
Source
1156
53.6k
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1157
53.6k
        self.code_buffer.next_symbol(inp)
1158
53.6k
    }
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::next_symbol
Line
Count
Source
1156
31.3k
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1157
31.3k
        self.code_buffer.next_symbol(inp)
1158
31.3k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::next_symbol
1159
1160
53.7k
    fn bump_code_size(&mut self) {
1161
53.7k
        self.code_buffer.bump_code_size()
1162
53.7k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::bump_code_size
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::bump_code_size
Line
Count
Source
1160
49.7k
    fn bump_code_size(&mut self) {
1161
49.7k
        self.code_buffer.bump_code_size()
1162
49.7k
    }
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::bump_code_size
Line
Count
Source
1160
4.09k
    fn bump_code_size(&mut self) {
1161
4.09k
        self.code_buffer.bump_code_size()
1162
4.09k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::bump_code_size
1163
1164
4.44M
    fn refill_bits(&mut self, inp: &mut &[u8]) {
1165
4.44M
        self.code_buffer.refill_bits(inp)
1166
4.44M
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::refill_bits
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::refill_bits
Line
Count
Source
1164
1.35M
    fn refill_bits(&mut self, inp: &mut &[u8]) {
1165
1.35M
        self.code_buffer.refill_bits(inp)
1166
1.35M
    }
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::refill_bits
Line
Count
Source
1164
3.08M
    fn refill_bits(&mut self, inp: &mut &[u8]) {
1165
3.08M
        self.code_buffer.refill_bits(inp)
1166
3.08M
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::refill_bits
1167
}
1168
1169
impl CodeBuffer for MsbBuffer {
1170
2.33k
    fn new(min_size: u8) -> Self {
1171
2.33k
        MsbBuffer {
1172
2.33k
            code_size: min_size + 1,
1173
2.33k
            code_mask: (1u16 << (min_size + 1)) - 1,
1174
2.33k
            bit_buffer: 0,
1175
2.33k
            bits: 0,
1176
2.33k
        }
1177
2.33k
    }
1178
1179
29.8k
    fn reset(&mut self, min_size: u8) {
1180
29.8k
        self.code_size = min_size + 1;
1181
29.8k
        self.code_mask = (1 << self.code_size) - 1;
1182
29.8k
    }
1183
1184
31.3k
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1185
31.3k
        if self.bits < self.code_size {
1186
4.23k
            self.refill_bits(inp);
1187
27.1k
        }
1188
1189
31.3k
        if self.bits < self.code_size {
1190
73
            return None;
1191
31.3k
        }
1192
1193
31.3k
        let mask = u64::from(self.code_mask);
1194
31.3k
        let rotbuf = self.bit_buffer.rotate_left(self.code_size.into());
1195
31.3k
        self.bit_buffer = rotbuf & !mask;
1196
31.3k
        self.bits -= self.code_size;
1197
31.3k
        Some((rotbuf & mask) as u16)
1198
31.3k
    }
1199
1200
4.09k
    fn bump_code_size(&mut self) {
1201
4.09k
        self.code_size += 1;
1202
4.09k
        self.code_mask = (self.code_mask << 1) | 1;
1203
4.09k
    }
1204
1205
3.09M
    fn refill_bits(&mut self, inp: &mut &[u8]) {
1206
3.09M
        let wish_count = (64 - self.bits) / 8;
1207
3.09M
        let mut buffer = [0u8; 8];
1208
3.09M
        let new_bits = match inp.get(..usize::from(wish_count)) {
1209
3.08M
            Some(bytes) => {
1210
3.08M
                buffer[..usize::from(wish_count)].copy_from_slice(bytes);
1211
3.08M
                *inp = &inp[usize::from(wish_count)..];
1212
3.08M
                wish_count * 8
1213
            }
1214
            None => {
1215
4.42k
                let new_bits = inp.len() * 8;
1216
4.42k
                buffer[..inp.len()].copy_from_slice(inp);
1217
4.42k
                *inp = &[];
1218
4.42k
                new_bits as u8
1219
            }
1220
        };
1221
3.09M
        self.bit_buffer |= u64::from_be_bytes(buffer) >> self.bits;
1222
3.09M
        self.bits += new_bits;
1223
3.09M
    }
1224
1225
3.08M
    fn peek_bits(&self, code: &mut [Code; BURST]) -> usize {
1226
3.08M
        let mut bit_buffer = self.bit_buffer;
1227
3.08M
        let mask = u64::from(self.code_mask);
1228
3.08M
        let mut consumed = 0;
1229
3.08M
        let mut cnt = 0;
1230
1231
17.8M
        for b in code {
1232
17.4M
            let consumed_after = consumed + self.code_size;
1233
17.4M
            if consumed_after > self.bits {
1234
2.68M
                break;
1235
14.7M
            }
1236
1237
14.7M
            cnt += 1;
1238
14.7M
            consumed = consumed_after;
1239
1240
14.7M
            let rotbuf = bit_buffer.rotate_left(self.code_size.into());
1241
14.7M
            *b = (rotbuf & mask) as u16;
1242
            // The read bits are 'appended' but we never interpret those appended bits.
1243
14.7M
            bit_buffer = rotbuf;
1244
        }
1245
1246
3.08M
        cnt
1247
3.08M
    }
1248
1249
3.08M
    fn consume_bits(&mut self, code_cnt: u8) {
1250
3.08M
        let bits = self.code_size * code_cnt;
1251
3.08M
        debug_assert!(bits <= self.bits);
1252
1253
3.08M
        if bits >= self.bits {
1254
18.0k
            self.bit_buffer = 0;
1255
3.07M
        } else {
1256
3.07M
            // bits < self.bits so this must be smaller than the number size.
1257
3.07M
            self.bit_buffer = self.bit_buffer << bits;
1258
3.07M
        }
1259
1260
3.08M
        self.bits = self.bits.wrapping_sub(bits);
1261
3.08M
    }
1262
1263
3.71M
    fn max_code(&self) -> Code {
1264
3.71M
        self.code_mask
1265
3.71M
    }
1266
1267
4.28k
    fn code_size(&self) -> u8 {
1268
4.28k
        self.code_size
1269
4.28k
    }
1270
}
1271
1272
impl CodeBuffer for LsbBuffer {
1273
2.42k
    fn new(min_size: u8) -> Self {
1274
2.42k
        LsbBuffer {
1275
2.42k
            code_size: min_size + 1,
1276
2.42k
            code_mask: (1u16 << (min_size + 1)) - 1,
1277
2.42k
            bit_buffer: 0,
1278
2.42k
            bits: 0,
1279
2.42k
        }
1280
2.42k
    }
1281
1282
53.0k
    fn reset(&mut self, min_size: u8) {
1283
53.0k
        self.code_size = min_size + 1;
1284
53.0k
        self.code_mask = (1 << self.code_size) - 1;
1285
53.0k
    }
1286
1287
53.6k
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1288
53.6k
        if self.bits < self.code_size {
1289
3.21k
            self.refill_bits(inp);
1290
50.4k
        }
1291
1292
53.6k
        if self.bits < self.code_size {
1293
334
            return None;
1294
53.2k
        }
1295
1296
53.2k
        let mask = u64::from(self.code_mask);
1297
53.2k
        let code = self.bit_buffer & mask;
1298
53.2k
        self.bit_buffer >>= self.code_size;
1299
53.2k
        self.bits -= self.code_size;
1300
53.2k
        Some(code as u16)
1301
53.6k
    }
1302
1303
49.7k
    fn bump_code_size(&mut self) {
1304
49.7k
        self.code_size += 1;
1305
49.7k
        self.code_mask = (self.code_mask << 1) | 1;
1306
49.7k
    }
1307
1308
1.36M
    fn refill_bits(&mut self, inp: &mut &[u8]) {
1309
1.36M
        let wish_count = (64 - self.bits) / 8;
1310
1.36M
        let mut buffer = [0u8; 8];
1311
1.36M
        let new_bits = match inp.get(..usize::from(wish_count)) {
1312
900k
            Some(bytes) => {
1313
900k
                buffer[..usize::from(wish_count)].copy_from_slice(bytes);
1314
900k
                *inp = &inp[usize::from(wish_count)..];
1315
900k
                wish_count * 8
1316
            }
1317
            None => {
1318
461k
                let new_bits = inp.len() * 8;
1319
461k
                buffer[..inp.len()].copy_from_slice(inp);
1320
461k
                *inp = &[];
1321
461k
                new_bits as u8
1322
            }
1323
        };
1324
1.36M
        self.bit_buffer |= u64::from_be_bytes(buffer).swap_bytes() << self.bits;
1325
1.36M
        self.bits += new_bits;
1326
1.36M
    }
1327
1328
1.35M
    fn peek_bits(&self, code: &mut [Code; BURST]) -> usize {
1329
1.35M
        let mut bit_buffer = self.bit_buffer;
1330
1.35M
        let mask = u64::from(self.code_mask);
1331
1.35M
        let mut consumed = 0;
1332
1.35M
        let mut cnt = 0;
1333
1334
6.21M
        for b in code {
1335
6.00M
            let consumed_after = consumed + self.code_size;
1336
6.00M
            if consumed_after > self.bits {
1337
1.15M
                break;
1338
4.85M
            }
1339
1340
4.85M
            cnt += 1;
1341
4.85M
            consumed = consumed_after;
1342
1343
4.85M
            *b = (bit_buffer & mask) as u16;
1344
4.85M
            bit_buffer = bit_buffer >> self.code_size;
1345
        }
1346
1347
1.35M
        cnt
1348
1.35M
    }
1349
1350
1.12M
    fn consume_bits(&mut self, code_cnt: u8) {
1351
1.12M
        let bits = self.code_size * code_cnt;
1352
1.12M
        debug_assert!(bits <= self.bits);
1353
1354
1.12M
        if bits >= self.bits {
1355
134k
            self.bit_buffer = 0;
1356
988k
        } else {
1357
988k
            // bits < self.bits so this must be smaller than the number size.
1358
988k
            self.bit_buffer = self.bit_buffer >> bits;
1359
988k
        }
1360
1361
1.12M
        self.bits = self.bits.wrapping_sub(bits);
1362
1.12M
    }
1363
1364
1.84M
    fn max_code(&self) -> Code {
1365
1.84M
        self.code_mask
1366
1.84M
    }
1367
1368
49.9k
    fn code_size(&self) -> u8 {
1369
49.9k
        self.code_size
1370
49.9k
    }
1371
}
1372
1373
impl Buffer {
1374
4.75k
    fn new() -> Self {
1375
4.75k
        Buffer {
1376
4.75k
            bytes: vec![0; MAX_ENTRIES].into_boxed_slice(),
1377
4.75k
            read_mark: 0,
1378
4.75k
            write_mark: 0,
1379
4.75k
        }
1380
4.75k
    }
1381
1382
    /// When encoding a sequence `cScSc` where `c` is any character and `S` is any string
1383
    /// this results in two codes `AB`, `A` encoding `cS` and `B` encoding `cSc`. Supposing
1384
    /// the buffer is already filled with the reconstruction of `A`, we can easily fill it
1385
    /// with the reconstruction of `B`.
1386
49.1k
    fn fill_cscsc(&mut self) -> u8 {
1387
49.1k
        self.bytes[self.write_mark] = self.bytes[0];
1388
49.1k
        self.write_mark += 1;
1389
49.1k
        self.read_mark = 0;
1390
49.1k
        self.bytes[0]
1391
49.1k
    }
1392
1393
    // Fill the buffer by decoding from the table
1394
114k
    fn fill_reconstruct(&mut self, table: &Table, code: Code) -> u8 {
1395
114k
        self.write_mark = 0;
1396
114k
        self.read_mark = 0;
1397
114k
        let depth = table.depths[usize::from(code)];
1398
114k
        let mut memory = core::mem::replace(&mut self.bytes, Box::default());
1399
1400
114k
        let out = &mut memory[..usize::from(depth)];
1401
114k
        let last = table.reconstruct(code, out);
1402
1403
114k
        self.bytes = memory;
1404
114k
        self.write_mark = usize::from(depth);
1405
114k
        last
1406
114k
    }
1407
1408
1.43M
    fn buffer(&self) -> &[u8] {
1409
1.43M
        &self.bytes[self.read_mark..self.write_mark]
1410
1.43M
    }
1411
1412
450k
    fn consume(&mut self, amt: usize) {
1413
450k
        self.read_mark += amt;
1414
450k
    }
1415
}
1416
1417
impl Table {
1418
4.75k
    fn new() -> Self {
1419
4.75k
        Table {
1420
4.75k
            inner: Vec::with_capacity(MAX_ENTRIES),
1421
4.75k
            depths: Vec::with_capacity(MAX_ENTRIES),
1422
4.75k
        }
1423
4.75k
    }
1424
1425
68.1k
    fn clear(&mut self, min_size: u8) {
1426
68.1k
        let static_count = usize::from(1u16 << u16::from(min_size)) + 2;
1427
68.1k
        self.inner.truncate(static_count);
1428
68.1k
        self.depths.truncate(static_count);
1429
68.1k
    }
1430
1431
14.7k
    fn init(&mut self, min_size: u8) {
1432
14.7k
        self.inner.clear();
1433
14.7k
        self.depths.clear();
1434
1.16M
        for i in 0..(1u16 << u16::from(min_size)) {
1435
1.16M
            self.inner.push(Link::base(i as u8));
1436
1.16M
            self.depths.push(1);
1437
1.16M
        }
1438
        // Clear code.
1439
14.7k
        self.inner.push(Link::base(0));
1440
14.7k
        self.depths.push(0);
1441
        // End code.
1442
14.7k
        self.inner.push(Link::base(0));
1443
14.7k
        self.depths.push(0);
1444
14.7k
    }
1445
1446
72.2k
    fn at(&self, code: Code) -> &Link {
1447
72.2k
        &self.inner[usize::from(code)]
1448
72.2k
    }
1449
1450
72.2k
    fn is_empty(&self) -> bool {
1451
72.2k
        self.inner.is_empty()
1452
72.2k
    }
1453
1454
8.35M
    fn is_full(&self) -> bool {
1455
8.35M
        self.inner.len() >= MAX_ENTRIES
1456
8.35M
    }
1457
1458
1.34M
    fn derive(&mut self, from: &DerivationBase, byte: u8) {
1459
1.34M
        let link = from.derive(byte);
1460
1.34M
        let depth = self.depths[usize::from(from.code)] + 1;
1461
1.34M
        self.inner.push(link);
1462
1.34M
        self.depths.push(depth);
1463
1.34M
    }
1464
1465
    // Derive multiple codes in a row, where each base is guaranteed to already exist.
1466
4.21M
    fn derive_burst(&mut self, from: &mut DerivationBase, burst: &[Code], first: &[u8]) {
1467
4.21M
        let mut depth_of = from.code;
1468
        // Note that false data dependency we want to get rid of!
1469
        // TODO: this pushes into a Vec, maybe we can make this cleaner.
1470
8.26M
        for &code in burst {
1471
4.05M
            let depth = self.depths[usize::from(depth_of)] + 1;
1472
4.05M
            self.depths.push(depth);
1473
4.05M
            depth_of = code;
1474
4.05M
        }
1475
1476
        // Llvm tends to be flaky with code layout for the case of requiring an allocation. It's
1477
        // not clear if that can occur in practice but it relies on iterator size hint..
1478
4.21M
        let extensions = burst.iter().zip(first);
1479
4.21M
        self.inner.extend(extensions.map(|(&code, &first)| {
1480
4.05M
            let link = from.derive(first);
1481
4.05M
            from.code = code;
1482
4.05M
            from.first = first;
1483
4.05M
            link
1484
4.05M
        }));
1485
4.21M
    }
1486
1487
8.08M
    fn reconstruct(&self, code: Code, out: &mut [u8]) -> u8 {
1488
8.08M
        let mut code_iter = code;
1489
8.08M
        let table = &self.inner[..=usize::from(code)];
1490
8.08M
        let first = table[usize::from(code)].first;
1491
1492
8.08M
        let len = code_iter;
1493
36.9M
        for ch in out.iter_mut().rev() {
1494
36.9M
            //(code, cha) = self.table[k as usize];
1495
36.9M
            // Note: This could possibly be replaced with an unchecked array access if
1496
36.9M
            //  - value is asserted to be < self.next_code() in push
1497
36.9M
            //  - min_size is asserted to be < MAX_CODESIZE
1498
36.9M
            let entry = &table[usize::from(code_iter)];
1499
36.9M
            code_iter = core::cmp::min(len, entry.prev);
1500
36.9M
            *ch = entry.byte;
1501
36.9M
        }
1502
1503
8.08M
        first
1504
8.08M
    }
1505
}
1506
1507
impl Link {
1508
1.19M
    fn base(byte: u8) -> Self {
1509
1.19M
        Link {
1510
1.19M
            prev: 0,
1511
1.19M
            byte,
1512
1.19M
            first: byte,
1513
1.19M
        }
1514
1.19M
    }
1515
}
1516
1517
impl DerivationBase {
1518
    // TODO: this has self type to make it clear we might depend on the old in a future
1519
    // optimization. However, that has no practical purpose right now.
1520
5.39M
    fn derive(&self, byte: u8) -> Link {
1521
5.39M
        Link {
1522
5.39M
            prev: self.code,
1523
5.39M
            byte,
1524
5.39M
            first: self.first,
1525
5.39M
        }
1526
5.39M
    }
1527
}
1528
1529
#[cfg(test)]
1530
mod tests {
1531
    use crate::alloc::vec::Vec;
1532
    #[cfg(feature = "std")]
1533
    use crate::StreamBuf;
1534
    use crate::{decode::Decoder, BitOrder};
1535
1536
    #[test]
1537
    fn invalid_code_size_low() {
1538
        let _ = Decoder::new(BitOrder::Msb, 0);
1539
        let _ = Decoder::new(BitOrder::Msb, 1);
1540
    }
1541
1542
    #[test]
1543
    #[should_panic]
1544
    fn invalid_code_size_high() {
1545
        let _ = Decoder::new(BitOrder::Msb, 14);
1546
    }
1547
1548
    fn make_encoded() -> Vec<u8> {
1549
        const FILE: &'static [u8] = include_bytes!(concat!(
1550
            env!("CARGO_MANIFEST_DIR"),
1551
            "/benches/binary-8-msb.lzw"
1552
        ));
1553
        return Vec::from(FILE);
1554
    }
1555
1556
    #[test]
1557
    #[cfg(feature = "std")]
1558
    fn into_stream_buffer_no_alloc() {
1559
        let encoded = make_encoded();
1560
        let mut decoder = Decoder::new(BitOrder::Msb, 8);
1561
1562
        let mut output = vec![];
1563
        let mut buffer = [0; 512];
1564
        let mut istream = decoder.into_stream(&mut output);
1565
        istream.set_buffer(&mut buffer[..]);
1566
        istream.decode(&encoded[..]).status.unwrap();
1567
1568
        match istream.buffer {
1569
            Some(StreamBuf::Borrowed(_)) => {}
1570
            None => panic!("Decoded without buffer??"),
1571
            Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"),
1572
        }
1573
    }
1574
1575
    #[test]
1576
    #[cfg(feature = "std")]
1577
    fn into_stream_buffer_small_alloc() {
1578
        struct WriteTap<W: std::io::Write>(W);
1579
        const BUF_SIZE: usize = 512;
1580
1581
        impl<W: std::io::Write> std::io::Write for WriteTap<W> {
1582
            fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1583
                assert!(buf.len() <= BUF_SIZE);
1584
                self.0.write(buf)
1585
            }
1586
            fn flush(&mut self) -> std::io::Result<()> {
1587
                self.0.flush()
1588
            }
1589
        }
1590
1591
        let encoded = make_encoded();
1592
        let mut decoder = Decoder::new(BitOrder::Msb, 8);
1593
1594
        let mut output = vec![];
1595
        let mut istream = decoder.into_stream(WriteTap(&mut output));
1596
        istream.set_buffer_size(512);
1597
        istream.decode(&encoded[..]).status.unwrap();
1598
1599
        match istream.buffer {
1600
            Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE),
1601
            Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"),
1602
            None => panic!("Decoded without buffer??"),
1603
        }
1604
    }
1605
1606
    #[test]
1607
    #[cfg(feature = "std")]
1608
    fn reset() {
1609
        let encoded = make_encoded();
1610
        let mut decoder = Decoder::new(BitOrder::Msb, 8);
1611
        let mut reference = None;
1612
1613
        for _ in 0..2 {
1614
            let mut output = vec![];
1615
            let mut buffer = [0; 512];
1616
            let mut istream = decoder.into_stream(&mut output);
1617
            istream.set_buffer(&mut buffer[..]);
1618
            istream.decode_all(&encoded[..]).status.unwrap();
1619
1620
            decoder.reset();
1621
            if let Some(reference) = &reference {
1622
                assert_eq!(output, *reference);
1623
            } else {
1624
                reference = Some(output);
1625
            }
1626
        }
1627
    }
1628
}