Coverage Report

Created: 2025-07-11 07:25

/rust/registry/src/index.crates.io-6f17d22bba15001f/weezl-0.1.10/src/decode.rs
Line
Count
Source (jump to first uncovered line)
1
//! A module for all decoding needs.
2
#[cfg(feature = "std")]
3
use crate::error::StreamResult;
4
use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult};
5
use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE};
6
7
use crate::alloc::{boxed::Box, vec, vec::Vec};
8
#[cfg(feature = "std")]
9
use std::io::{self, BufRead, Write};
10
11
/// The state for decoding data with an LZW algorithm.
12
///
13
/// The same structure can be utilized with streams as well as your own buffers and driver logic.
14
/// It may even be possible to mix them if you are sufficiently careful not to lose or skip any
15
/// already decode data in the process.
16
///
17
/// This is a sans-IO implementation, meaning that it only contains the state of the decoder and
18
/// the caller will provide buffers for input and output data when calling the basic
19
/// [`decode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_*`
20
/// methods for decoding with a particular style of common IO.
21
///
22
/// * [`decode`] for decoding once without any IO-loop.
23
/// * [`into_async`] for decoding with the `futures` traits for asynchronous IO.
24
/// * [`into_stream`] for decoding with the standard `io` traits.
25
/// * [`into_vec`] for in-memory decoding.
26
///
27
/// [`decode_bytes`]: #method.decode_bytes
28
/// [`decode`]: #method.decode
29
/// [`into_async`]: #method.into_async
30
/// [`into_stream`]: #method.into_stream
31
/// [`into_vec`]: #method.into_vec
32
pub struct Decoder {
33
    state: Box<dyn Stateful + Send + 'static>,
34
}
35
36
/// A decoding stream sink.
37
///
38
/// See [`Decoder::into_stream`] on how to create this type.
39
///
40
/// [`Decoder::into_stream`]: struct.Decoder.html#method.into_stream
41
#[cfg_attr(
42
    not(feature = "std"),
43
    deprecated = "This type is only useful with the `std` feature."
44
)]
45
#[cfg_attr(not(feature = "std"), allow(dead_code))]
46
pub struct IntoStream<'d, W> {
47
    decoder: &'d mut Decoder,
48
    writer: W,
49
    buffer: Option<StreamBuf<'d>>,
50
    default_size: usize,
51
}
52
53
/// An async decoding sink.
54
///
55
/// See [`Decoder::into_async`] on how to create this type.
56
///
57
/// [`Decoder::into_async`]: struct.Decoder.html#method.into_async
58
#[cfg(feature = "async")]
59
pub struct IntoAsync<'d, W> {
60
    decoder: &'d mut Decoder,
61
    writer: W,
62
    buffer: Option<StreamBuf<'d>>,
63
    default_size: usize,
64
}
65
66
/// A decoding sink into a vector.
67
///
68
/// See [`Decoder::into_vec`] on how to create this type.
69
///
70
/// [`Decoder::into_vec`]: struct.Decoder.html#method.into_vec
71
pub struct IntoVec<'d> {
72
    decoder: &'d mut Decoder,
73
    vector: &'d mut Vec<u8>,
74
}
75
76
trait Stateful {
77
    fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult;
78
    fn has_ended(&self) -> bool;
79
    /// Ignore an end code and continue decoding (no implied reset).
80
    fn restart(&mut self);
81
    /// Reset the decoder to the beginning, dropping all buffers etc.
82
    fn reset(&mut self);
83
}
84
85
#[derive(Clone)]
86
struct Link {
87
    prev: Code,
88
    byte: u8,
89
    first: u8,
90
}
91
92
#[derive(Clone)]
93
struct DerivationBase {
94
    code: Code,
95
    first: u8,
96
}
97
98
#[derive(Default)]
99
struct MsbBuffer {
100
    /// A buffer of individual bits. The oldest code is kept in the high-order bits.
101
    bit_buffer: u64,
102
    /// A precomputed mask for this code.
103
    code_mask: u16,
104
    /// The current code size.
105
    code_size: u8,
106
    /// The number of bits in the buffer.
107
    bits: u8,
108
}
109
110
#[derive(Default)]
111
struct LsbBuffer {
112
    /// A buffer of individual bits. The oldest code is kept in the high-order bits.
113
    bit_buffer: u64,
114
    /// A precomputed mask for this code.
115
    code_mask: u16,
116
    /// The current code size.
117
    code_size: u8,
118
    /// The number of bits in the buffer.
119
    bits: u8,
120
}
121
122
trait CodeBuffer {
123
    fn new(min_size: u8) -> Self;
124
    fn reset(&mut self, min_size: u8);
125
    fn bump_code_size(&mut self);
126
127
    /// Retrieve the next symbol, refilling if necessary.
128
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code>;
129
    /// Refill the internal buffer.
130
    fn refill_bits(&mut self, inp: &mut &[u8]);
131
132
    fn peek_bits(&self, code: &mut [Code; BURST]) -> usize;
133
    fn consume_bits(&mut self, code_cnt: u8);
134
135
    fn max_code(&self) -> Code;
136
    fn code_size(&self) -> u8;
137
}
138
139
trait CodegenConstants {
140
    const YIELD_ON_FULL: bool;
141
}
142
143
struct DecodeState<CodeBuffer, Constants: CodegenConstants> {
144
    /// The original minimum code size.
145
    min_size: u8,
146
    /// The table of decoded codes.
147
    table: Table,
148
    /// The buffer of decoded data.
149
    buffer: Buffer,
150
    /// The link which we are still decoding and its original code.
151
    last: Option<DerivationBase>,
152
    /// The next code entry.
153
    next_code: Code,
154
    /// Code to reset all tables.
155
    clear_code: Code,
156
    /// Code to signal the end of the stream.
157
    end_code: Code,
158
    /// A stored flag if the end code has already appeared.
159
    has_ended: bool,
160
    /// If tiff then bumps are a single code sooner.
161
    is_tiff: bool,
162
    /// Do we allow stream to start without an explicit reset code?
163
    implicit_reset: bool,
164
    /// The buffer for decoded words.
165
    code_buffer: CodeBuffer,
166
    #[allow(dead_code)]
167
    constants: core::marker::PhantomData<Constants>,
168
}
169
170
// We have a buffer of 64 bits. So at max size at most 5 units can be read at once without
171
// refilling the buffer. At smaller code sizes there are more. We tune for 6 here, by slight
172
// experimentation. This may be an architecture dependent constant.
173
const BURST: usize = 6;
174
175
struct Buffer {
176
    bytes: Box<[u8]>,
177
    read_mark: usize,
178
    write_mark: usize,
179
}
180
181
struct Table {
182
    inner: Vec<Link>,
183
    depths: Vec<u16>,
184
}
185
186
/// Describes the static parameters for creating a decoder.
187
#[derive(Clone, Debug)]
188
pub struct Configuration {
189
    order: BitOrder,
190
    size: u8,
191
    tiff: bool,
192
    yield_on_full: bool,
193
}
194
195
impl Configuration {
196
    /// Create a configuration to decode with the specified bit order and symbol size.
197
1.38k
    pub fn new(order: BitOrder, size: u8) -> Self {
198
1.38k
        super::assert_decode_size(size);
199
1.38k
        Configuration {
200
1.38k
            order,
201
1.38k
            size,
202
1.38k
            tiff: false,
203
1.38k
            yield_on_full: false,
204
1.38k
        }
205
1.38k
    }
206
207
    /// Create a configuration for a TIFF compatible decoder.
208
549
    pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
209
549
        super::assert_decode_size(size);
210
549
        Configuration {
211
549
            order,
212
549
            size,
213
549
            tiff: true,
214
549
            yield_on_full: false,
215
549
        }
216
549
    }
217
218
    /// Immediately yield to the caller when the decoder buffer is full.
219
    ///
220
    /// This can be used for `libtiff` compatibility. It will use a "relaxed" stream interpretation
221
    /// that need not contain an explicit EOF. Instead, the decoder is expected to stop fetching
222
    /// symbols when some out-of-band specified length of the decoded text has been reached. The
223
    /// caller indicates this maximum length through the available output buffer space.
224
    ///
225
    /// Symbols afterwards must not be expected to be valid. On filling the output buffer space
226
    /// completely, the decoder will return immediately to the caller instead of potentially
227
    /// interpreting the following bit-stream (and returning an error on doing so).
228
    ///
229
    /// Default: `false`.
230
0
    pub fn with_yield_on_full_buffer(self, do_yield: bool) -> Self {
231
0
        Configuration {
232
0
            yield_on_full: do_yield,
233
0
            ..self
234
0
        }
235
0
    }
236
237
    /// Create a new decoder with the define configuration.
238
1.93k
    pub fn build(self) -> Decoder {
239
1.93k
        Decoder {
240
1.93k
            state: Decoder::from_configuration(&self),
241
1.93k
        }
242
1.93k
    }
243
}
244
245
impl Decoder {
246
    /// Create a new decoder with the specified bit order and symbol size.
247
    ///
248
    /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
249
    /// original specification. In particular you will need to specify an `Lsb` bit oder to decode
250
    /// the data portion of a compressed `gif` image.
251
    ///
252
    /// # Panics
253
    ///
254
    /// The `size` needs to be in the interval `0..=12`.
255
1.38k
    pub fn new(order: BitOrder, size: u8) -> Self {
256
1.38k
        Configuration::new(order, size).build()
257
1.38k
    }
258
259
    /// Create a TIFF compatible decoder with the specified bit order and symbol size.
260
    ///
261
    /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
262
    /// TIFF specification, which is a misinterpretation of the original algorithm for increasing
263
    /// the code size. It switches one symbol sooner.
264
    ///
265
    /// # Panics
266
    ///
267
    /// The `size` needs to be in the interval `0..=12`.
268
549
    pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
269
549
        Configuration::with_tiff_size_switch(order, size).build()
270
549
    }
271
272
1.93k
    fn from_configuration(configuration: &Configuration) -> Box<dyn Stateful + Send + 'static> {
273
        struct NoYield;
274
        struct YieldOnFull;
275
276
        impl CodegenConstants for NoYield {
277
            const YIELD_ON_FULL: bool = false;
278
        }
279
280
        impl CodegenConstants for YieldOnFull {
281
            const YIELD_ON_FULL: bool = true;
282
        }
283
284
        type Boxed = Box<dyn Stateful + Send + 'static>;
285
1.93k
        match (configuration.order, configuration.yield_on_full) {
286
            (BitOrder::Lsb, false) => {
287
1.38k
                let mut state =
288
1.38k
                    Box::new(DecodeState::<LsbBuffer, NoYield>::new(configuration.size));
289
1.38k
                state.is_tiff = configuration.tiff;
290
1.38k
                state as Boxed
291
            }
292
            (BitOrder::Lsb, true) => {
293
0
                let mut state = Box::new(DecodeState::<LsbBuffer, YieldOnFull>::new(
294
0
                    configuration.size,
295
0
                ));
296
0
                state.is_tiff = configuration.tiff;
297
0
                state as Boxed
298
            }
299
            (BitOrder::Msb, false) => {
300
549
                let mut state =
301
549
                    Box::new(DecodeState::<MsbBuffer, NoYield>::new(configuration.size));
302
549
                state.is_tiff = configuration.tiff;
303
549
                state as Boxed
304
            }
305
            (BitOrder::Msb, true) => {
306
0
                let mut state = Box::new(DecodeState::<MsbBuffer, YieldOnFull>::new(
307
0
                    configuration.size,
308
0
                ));
309
0
                state.is_tiff = configuration.tiff;
310
0
                state as Boxed
311
            }
312
        }
313
1.93k
    }
314
315
    /// Decode some bytes from `inp` and write result to `out`.
316
    ///
317
    /// This will consume a prefix of the input buffer and write decoded output into a prefix of
318
    /// the output buffer. See the respective fields of the return value for the count of consumed
319
    /// and written bytes. For the next call You should have adjusted the inputs accordingly.
320
    ///
321
    /// The call will try to decode and write as many bytes of output as available. It will be
322
    /// much more optimized (and avoid intermediate buffering) if it is allowed to write a large
323
    /// contiguous chunk at once.
324
    ///
325
    /// See [`into_stream`] for high-level functions (that are only available with the `std`
326
    /// feature).
327
    ///
328
    /// [`into_stream`]: #method.into_stream
329
467k
    pub fn decode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult {
330
467k
        self.state.advance(inp, out)
331
467k
    }
332
333
    /// Decode a single chunk of lzw encoded data.
334
    ///
335
    /// This method requires the data to contain an end marker, and returns an error otherwise.
336
    ///
337
    /// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize
338
    /// buffer size, to supply an existing vector, to control whether an end marker is required, or
339
    /// to preserve partial data in the case of a decoding error.
340
    ///
341
    /// [`into_vec`]: #into_vec
342
    ///
343
    /// # Example
344
    ///
345
    /// ```
346
    /// use weezl::{BitOrder, decode::Decoder};
347
    ///
348
    /// // Encoded that was created with an encoder.
349
    /// let data = b"\x80\x04\x81\x94l\x1b\x06\xf0\xb0 \x1d\xc6\xf1\xc8l\x19 \x10";
350
    /// let decoded = Decoder::new(BitOrder::Msb, 9)
351
    ///     .decode(data)
352
    ///     .unwrap();
353
    /// assert_eq!(decoded, b"Hello, world");
354
    /// ```
355
0
    pub fn decode(&mut self, data: &[u8]) -> Result<Vec<u8>, LzwError> {
356
0
        let mut output = vec![];
357
0
        self.into_vec(&mut output).decode_all(data).status?;
358
0
        Ok(output)
359
0
    }
360
361
    /// Construct a decoder into a writer.
362
    #[cfg(feature = "std")]
363
0
    pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> {
364
0
        IntoStream {
365
0
            decoder: self,
366
0
            writer,
367
0
            buffer: None,
368
0
            default_size: STREAM_BUF_SIZE,
369
0
        }
370
0
    }
371
372
    /// Construct a decoder into an async writer.
373
    #[cfg(feature = "async")]
374
    pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> {
375
        IntoAsync {
376
            decoder: self,
377
            writer,
378
            buffer: None,
379
            default_size: STREAM_BUF_SIZE,
380
        }
381
    }
382
383
    /// Construct a decoder into a vector.
384
    ///
385
    /// All decoded data is appended and the vector is __not__ cleared.
386
    ///
387
    /// Compared to `into_stream` this interface allows a high-level access to decoding without
388
    /// requires the `std`-feature. Also, it can make full use of the extra buffer control that the
389
    /// special target exposes.
390
0
    pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec<u8>) -> IntoVec<'lt> {
391
0
        IntoVec {
392
0
            decoder: self,
393
0
            vector: vec,
394
0
        }
395
0
    }
396
397
    /// Check if the decoding has finished.
398
    ///
399
    /// No more output is produced beyond the end code that marked the finish of the stream. The
400
    /// decoder may have read additional bytes, including padding bits beyond the last code word
401
    /// but also excess bytes provided.
402
400k
    pub fn has_ended(&self) -> bool {
403
400k
        self.state.has_ended()
404
400k
    }
405
406
    /// Ignore an end code and continue.
407
    ///
408
    /// This will _not_ reset any of the inner code tables and not have the effect of a clear code.
409
    /// It will instead continue as if the end code had not been present. If no end code has
410
    /// occurred then this is a no-op.
411
    ///
412
    /// You can test if an end code has occurred with [`has_ended`](#method.has_ended).
413
    /// FIXME: clarify how this interacts with padding introduced after end code.
414
    #[allow(dead_code)]
415
0
    pub(crate) fn restart(&mut self) {
416
0
        self.state.restart();
417
0
    }
418
419
    /// Reset all internal state.
420
    ///
421
    /// This produce a decoder as if just constructed with `new` but taking slightly less work. In
422
    /// particular it will not deallocate any internal allocations. It will also avoid some
423
    /// duplicate setup work.
424
0
    pub fn reset(&mut self) {
425
0
        self.state.reset();
426
0
    }
427
}
428
429
#[cfg(feature = "std")]
430
impl<'d, W: Write> IntoStream<'d, W> {
431
    /// Decode data from a reader.
432
    ///
433
    /// This will read data until the stream is empty or an end marker is reached.
434
0
    pub fn decode(&mut self, read: impl BufRead) -> StreamResult {
435
0
        self.decode_part(read, false)
436
0
    }
437
438
    /// Decode data from a reader, requiring an end marker.
439
0
    pub fn decode_all(mut self, read: impl BufRead) -> StreamResult {
440
0
        self.decode_part(read, true)
441
0
    }
442
443
    /// Set the size of the intermediate decode buffer.
444
    ///
445
    /// A buffer of this size is allocated to hold one part of the decoded stream when no buffer is
446
    /// available and any decoding method is called. No buffer is allocated if `set_buffer` has
447
    /// been called. The buffer is reused.
448
    ///
449
    /// # Panics
450
    /// This method panics if `size` is `0`.
451
0
    pub fn set_buffer_size(&mut self, size: usize) {
452
0
        assert_ne!(size, 0, "Attempted to set empty buffer");
453
0
        self.default_size = size;
454
0
    }
455
456
    /// Use a particular buffer as an intermediate decode buffer.
457
    ///
458
    /// Calling this sets or replaces the buffer. When a buffer has been set then it is used
459
    /// instead of dynamically allocating a buffer. Note that the size of the buffer is critical
460
    /// for efficient decoding. Some optimization techniques require the buffer to hold one or more
461
    /// previous decoded words. There is also additional overhead from `write` calls each time the
462
    /// buffer has been filled.
463
    ///
464
    /// # Panics
465
    /// This method panics if the `buffer` is empty.
466
0
    pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
467
0
        assert_ne!(buffer.len(), 0, "Attempted to set empty buffer");
468
0
        self.buffer = Some(StreamBuf::Borrowed(buffer));
469
0
    }
470
471
0
    fn decode_part(&mut self, mut read: impl BufRead, must_finish: bool) -> StreamResult {
472
0
        let IntoStream {
473
0
            decoder,
474
0
            writer,
475
0
            buffer,
476
0
            default_size,
477
0
        } = self;
478
479
        enum Progress {
480
            Ok,
481
            Done,
482
        }
483
484
0
        let mut bytes_read = 0;
485
0
        let mut bytes_written = 0;
486
0
487
0
        // Converting to mutable refs to move into the `once` closure.
488
0
        let read_bytes = &mut bytes_read;
489
0
        let write_bytes = &mut bytes_written;
490
491
0
        let outbuf: &mut [u8] =
492
0
            match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } {
493
0
                StreamBuf::Borrowed(slice) => &mut *slice,
494
0
                StreamBuf::Owned(vec) => &mut *vec,
495
            };
496
0
        assert!(!outbuf.is_empty());
497
498
0
        let once = move || {
499
            // Try to grab one buffer of input data.
500
0
            let data = read.fill_buf()?;
501
502
            // Decode as much of the buffer as fits.
503
0
            let result = decoder.decode_bytes(data, &mut outbuf[..]);
504
0
            // Do the bookkeeping and consume the buffer.
505
0
            *read_bytes += result.consumed_in;
506
0
            *write_bytes += result.consumed_out;
507
0
            read.consume(result.consumed_in);
508
509
            // Handle the status in the result.
510
0
            let done = result.status.map_err(|err| {
511
0
                io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err))
512
0
            })?;
513
514
            // Check if we had any new data at all.
515
0
            if let LzwStatus::NoProgress = done {
516
0
                debug_assert_eq!(
517
                    result.consumed_out, 0,
518
0
                    "No progress means we have not decoded any data"
519
                );
520
                // In particular we did not finish decoding.
521
0
                if must_finish {
522
0
                    return Err(io::Error::new(
523
0
                        io::ErrorKind::UnexpectedEof,
524
0
                        "No more data but no end marker detected",
525
0
                    ));
526
                } else {
527
0
                    return Ok(Progress::Done);
528
                }
529
0
            }
530
0
531
0
            // And finish by writing our result.
532
0
            // TODO: we may lose data on error (also on status error above) which we might want to
533
0
            // deterministically handle so that we don't need to restart everything from scratch as
534
0
            // the only recovery strategy. Any changes welcome.
535
0
            writer.write_all(&outbuf[..result.consumed_out])?;
536
537
0
            Ok(if let LzwStatus::Done = done {
538
0
                Progress::Done
539
            } else {
540
0
                Progress::Ok
541
            })
542
0
        };
543
544
        // Decode chunks of input data until we're done.
545
0
        let status = core::iter::repeat_with(once)
546
0
            // scan+fuse can be replaced with map_while
547
0
            .scan((), |(), result| match result {
548
0
                Ok(Progress::Ok) => Some(Ok(())),
549
0
                Err(err) => Some(Err(err)),
550
0
                Ok(Progress::Done) => None,
551
0
            })
552
0
            .fuse()
553
0
            .collect();
554
0
555
0
        StreamResult {
556
0
            bytes_read,
557
0
            bytes_written,
558
0
            status,
559
0
        }
560
0
    }
561
}
562
563
impl IntoVec<'_> {
564
    /// Decode data from a slice.
565
    ///
566
    /// This will read data until the slice is empty or an end marker is reached.
567
0
    pub fn decode(&mut self, read: &[u8]) -> VectorResult {
568
0
        self.decode_part(read, false)
569
0
    }
570
571
    /// Decode data from a slice, requiring an end marker.
572
0
    pub fn decode_all(mut self, read: &[u8]) -> VectorResult {
573
0
        self.decode_part(read, true)
574
0
    }
575
576
0
    fn grab_buffer(&mut self) -> (&mut [u8], &mut Decoder) {
577
        const CHUNK_SIZE: usize = 1 << 12;
578
0
        let decoder = &mut self.decoder;
579
0
        let length = self.vector.len();
580
0
581
0
        // Use the vector to do overflow checks and w/e.
582
0
        self.vector.reserve(CHUNK_SIZE);
583
0
        // FIXME: decoding into uninit buffer?
584
0
        self.vector.resize(length + CHUNK_SIZE, 0u8);
585
0
586
0
        (&mut self.vector[length..], decoder)
587
0
    }
588
589
0
    fn decode_part(&mut self, part: &[u8], must_finish: bool) -> VectorResult {
590
0
        let mut result = VectorResult {
591
0
            consumed_in: 0,
592
0
            consumed_out: 0,
593
0
            status: Ok(LzwStatus::Ok),
594
0
        };
595
596
        enum Progress {
597
            Ok,
598
            Done,
599
        }
600
601
        // Converting to mutable refs to move into the `once` closure.
602
0
        let read_bytes = &mut result.consumed_in;
603
0
        let write_bytes = &mut result.consumed_out;
604
0
        let mut data = part;
605
0
606
0
        // A 64 MB buffer is quite large but should get alloc_zeroed.
607
0
        // Note that the decoded size can be up to quadratic in code block.
608
0
        let once = move || {
609
0
            // Grab a new output buffer.
610
0
            let (outbuf, decoder) = self.grab_buffer();
611
0
612
0
            // Decode as much of the buffer as fits.
613
0
            let result = decoder.decode_bytes(data, &mut outbuf[..]);
614
0
            // Do the bookkeeping and consume the buffer.
615
0
            *read_bytes += result.consumed_in;
616
0
            *write_bytes += result.consumed_out;
617
0
            data = &data[result.consumed_in..];
618
0
619
0
            let unfilled = outbuf.len() - result.consumed_out;
620
0
            let filled = self.vector.len() - unfilled;
621
0
            self.vector.truncate(filled);
622
623
            // Handle the status in the result.
624
0
            match result.status {
625
0
                Err(err) => Err(err),
626
0
                Ok(LzwStatus::NoProgress) if must_finish => Err(LzwError::InvalidCode),
627
0
                Ok(LzwStatus::NoProgress) | Ok(LzwStatus::Done) => Ok(Progress::Done),
628
0
                Ok(LzwStatus::Ok) => Ok(Progress::Ok),
629
            }
630
0
        };
631
632
        // Decode chunks of input data until we're done.
633
0
        let status: Result<(), _> = core::iter::repeat_with(once)
634
0
            // scan+fuse can be replaced with map_while
635
0
            .scan((), |(), result| match result {
636
0
                Ok(Progress::Ok) => Some(Ok(())),
637
0
                Err(err) => Some(Err(err)),
638
0
                Ok(Progress::Done) => None,
639
0
            })
640
0
            .fuse()
641
0
            .collect();
642
643
0
        if let Err(err) = status {
644
0
            result.status = Err(err);
645
0
        }
646
647
0
        result
648
0
    }
649
}
650
651
// This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would
652
// trip over the usage of await, which is a reserved keyword in that edition/version. It only
653
// contains an impl block.
654
#[cfg(feature = "async")]
655
#[path = "decode_into_async.rs"]
656
mod impl_decode_into_async;
657
658
impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> {
659
1.93k
    fn new(min_size: u8) -> Self {
660
1.93k
        DecodeState {
661
1.93k
            min_size,
662
1.93k
            table: Table::new(),
663
1.93k
            buffer: Buffer::new(),
664
1.93k
            last: None,
665
1.93k
            clear_code: 1 << min_size,
666
1.93k
            end_code: (1 << min_size) + 1,
667
1.93k
            next_code: (1 << min_size) + 2,
668
1.93k
            has_ended: false,
669
1.93k
            is_tiff: false,
670
1.93k
            implicit_reset: true,
671
1.93k
            code_buffer: CodeBuffer::new(min_size),
672
1.93k
            constants: core::marker::PhantomData,
673
1.93k
        }
674
1.93k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::new
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::new
Line
Count
Source
659
1.38k
    fn new(min_size: u8) -> Self {
660
1.38k
        DecodeState {
661
1.38k
            min_size,
662
1.38k
            table: Table::new(),
663
1.38k
            buffer: Buffer::new(),
664
1.38k
            last: None,
665
1.38k
            clear_code: 1 << min_size,
666
1.38k
            end_code: (1 << min_size) + 1,
667
1.38k
            next_code: (1 << min_size) + 2,
668
1.38k
            has_ended: false,
669
1.38k
            is_tiff: false,
670
1.38k
            implicit_reset: true,
671
1.38k
            code_buffer: CodeBuffer::new(min_size),
672
1.38k
            constants: core::marker::PhantomData,
673
1.38k
        }
674
1.38k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::new
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::new
Line
Count
Source
659
549
    fn new(min_size: u8) -> Self {
660
549
        DecodeState {
661
549
            min_size,
662
549
            table: Table::new(),
663
549
            buffer: Buffer::new(),
664
549
            last: None,
665
549
            clear_code: 1 << min_size,
666
549
            end_code: (1 << min_size) + 1,
667
549
            next_code: (1 << min_size) + 2,
668
549
            has_ended: false,
669
549
            is_tiff: false,
670
549
            implicit_reset: true,
671
549
            code_buffer: CodeBuffer::new(min_size),
672
549
            constants: core::marker::PhantomData,
673
549
        }
674
549
    }
675
676
7.95k
    fn init_tables(&mut self) {
677
7.95k
        self.code_buffer.reset(self.min_size);
678
7.95k
        self.next_code = (1 << self.min_size) + 2;
679
7.95k
        self.table.init(self.min_size);
680
7.95k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::init_tables
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::init_tables
Line
Count
Source
676
7.33k
    fn init_tables(&mut self) {
677
7.33k
        self.code_buffer.reset(self.min_size);
678
7.33k
        self.next_code = (1 << self.min_size) + 2;
679
7.33k
        self.table.init(self.min_size);
680
7.33k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::init_tables
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::init_tables
Line
Count
Source
676
616
    fn init_tables(&mut self) {
677
616
        self.code_buffer.reset(self.min_size);
678
616
        self.next_code = (1 << self.min_size) + 2;
679
616
        self.table.init(self.min_size);
680
616
    }
681
682
25.1k
    fn reset_tables(&mut self) {
683
25.1k
        self.code_buffer.reset(self.min_size);
684
25.1k
        self.next_code = (1 << self.min_size) + 2;
685
25.1k
        self.table.clear(self.min_size);
686
25.1k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::reset_tables
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::reset_tables
Line
Count
Source
682
15.3k
    fn reset_tables(&mut self) {
683
15.3k
        self.code_buffer.reset(self.min_size);
684
15.3k
        self.next_code = (1 << self.min_size) + 2;
685
15.3k
        self.table.clear(self.min_size);
686
15.3k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::reset_tables
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::reset_tables
Line
Count
Source
682
9.80k
    fn reset_tables(&mut self) {
683
9.80k
        self.code_buffer.reset(self.min_size);
684
9.80k
        self.next_code = (1 << self.min_size) + 2;
685
9.80k
        self.table.clear(self.min_size);
686
9.80k
    }
687
}
688
689
impl<C: CodeBuffer, CgC: CodegenConstants> Stateful for DecodeState<C, CgC> {
690
400k
    fn has_ended(&self) -> bool {
691
400k
        self.has_ended
692
400k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::has_ended
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::has_ended
Line
Count
Source
690
400k
    fn has_ended(&self) -> bool {
691
400k
        self.has_ended
692
400k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::has_ended
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::has_ended
693
694
0
    fn restart(&mut self) {
695
0
        self.has_ended = false;
696
0
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::restart
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::restart
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::restart
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::restart
697
698
0
    fn reset(&mut self) {
699
0
        self.table.init(self.min_size);
700
0
        self.next_code = (1 << self.min_size) + 2;
701
0
        self.buffer.read_mark = 0;
702
0
        self.buffer.write_mark = 0;
703
0
        self.last = None;
704
0
        self.restart();
705
0
        self.code_buffer = CodeBuffer::new(self.min_size);
706
0
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::reset
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::reset
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::reset
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::reset
707
708
467k
    fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
709
467k
        // Skip everything if there is nothing to do.
710
467k
        if self.has_ended {
711
28
            return BufferResult {
712
28
                consumed_in: 0,
713
28
                consumed_out: 0,
714
28
                status: Ok(LzwStatus::Done),
715
28
            };
716
467k
        }
717
467k
718
467k
        // Rough description:
719
467k
        // We will fill the output slice as much as possible until either there is no more symbols
720
467k
        // to decode or an end code has been reached. This requires an internal buffer to hold a
721
467k
        // potential tail of the word corresponding to the last symbol. This tail will then be
722
467k
        // decoded first before continuing with the regular decoding. The same buffer is required
723
467k
        // to persist some symbol state across calls.
724
467k
        //
725
467k
        // We store the words corresponding to code symbols in an index chain, bytewise, where we
726
467k
        // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain
727
467k
        // is traversed for each symbol when it is decoded and bytes are placed directly into the
728
467k
        // output slice. In the special case (new_code == next_code) we use an existing decoded
729
467k
        // version that is present in either the out bytes of this call or in buffer to copy the
730
467k
        // repeated prefix slice.
731
467k
        // TODO: I played with a 'decoding cache' to remember the position of long symbols and
732
467k
        // avoid traversing the chain, doing a copy of memory instead. It did however not lead to
733
467k
        // a serious improvement. It's just unlikely to both have a long symbol and have that
734
467k
        // repeated twice in the same output buffer.
735
467k
        //
736
467k
        // You will also find the (to my knowledge novel) concept of a _decoding burst_ which
737
467k
        // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order
738
467k
        // execution as much as possible and for this reason have the least possible stress on
739
467k
        // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but
740
467k
        // only for re-used codes, not novel ones. This lookahead also makes the loop termination
741
467k
        // when restoring each byte of the code word perfectly predictable! So a burst is a chunk
742
467k
        // of code words which are all independent of each other, have known lengths _and_ are
743
467k
        // guaranteed to fit into the out slice without requiring a buffer. One burst can be
744
467k
        // decoded in an extremely tight loop.
745
467k
        //
746
467k
        // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid
747
467k
        // that intermediate buffer at the expense of not always filling the output buffer
748
467k
        // completely. Alternatively we might follow its chain of precursor states twice. This may
749
467k
        // be even cheaper if we store more than one byte per link so it really should be
750
467k
        // evaluated.
751
467k
        // TODO: if the caller was required to provide the previous last word we could also avoid
752
467k
        // the buffer for cases where we need it to restore the next code! This could be built
753
467k
        // backwards compatible by only doing it after an opt-in call that enables the behaviour.
754
467k
755
467k
        // Record initial lengths for the result that is returned.
756
467k
        let o_in = inp.len();
757
467k
        let o_out = out.len();
758
467k
759
467k
        // The code_link is the previously decoded symbol.
760
467k
        // It's used to link the new code back to its predecessor.
761
467k
        let mut code_link = None;
762
467k
        // The status, which is written to on an invalid code.
763
467k
        let mut status = Ok(LzwStatus::Ok);
764
467k
765
467k
        match self.last.take() {
766
            // No last state? This is the first code after a reset?
767
            None => {
768
34.1k
                match self.next_symbol(&mut inp) {
769
                    // Plainly invalid code.
770
33.7k
                    Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode),
771
                    // next_code would require an actual predecessor.
772
33.7k
                    Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode),
773
                    // No more symbols available and nothing decoded yet.
774
                    // Assume that we didn't make progress, this may get reset to Done if we read
775
                    // some bytes from the input.
776
332
                    None => status = Ok(LzwStatus::NoProgress),
777
                    // Handle a valid code.
778
33.7k
                    Some(init_code) => {
779
33.7k
                        if init_code == self.clear_code {
780
6.99k
                            self.init_tables();
781
26.7k
                        } else if init_code == self.end_code {
782
13
                            self.has_ended = true;
783
13
                            status = Ok(LzwStatus::Done);
784
26.7k
                        } else if self.table.is_empty() {
785
957
                            if self.implicit_reset {
786
957
                                self.init_tables();
787
957
788
957
                                self.buffer.fill_reconstruct(&self.table, init_code);
789
957
                                let link = self.table.at(init_code).clone();
790
957
                                code_link = Some(DerivationBase {
791
957
                                    code: init_code,
792
957
                                    first: link.first,
793
957
                                });
794
957
                            } else {
795
0
                                // We require an explicit reset.
796
0
                                status = Err(LzwError::InvalidCode);
797
0
                            }
798
25.7k
                        } else {
799
25.7k
                            // Reconstruct the first code in the buffer.
800
25.7k
                            self.buffer.fill_reconstruct(&self.table, init_code);
801
25.7k
                            let link = self.table.at(init_code).clone();
802
25.7k
                            code_link = Some(DerivationBase {
803
25.7k
                                code: init_code,
804
25.7k
                                first: link.first,
805
25.7k
                            });
806
25.7k
                        }
807
                    }
808
                }
809
            }
810
            // Move the tracking state to the stack.
811
433k
            Some(tup) => code_link = Some(tup),
812
        };
813
814
        // Track an empty `burst` (see below) means we made no progress.
815
467k
        let mut have_yet_to_decode_data = false;
816
467k
817
467k
        // Restore the previous state, if any.
818
467k
        if code_link.is_some() {
819
460k
            let remain = self.buffer.buffer();
820
460k
            // Check if we can fully finish the buffer.
821
460k
            if remain.len() > out.len() {
822
161k
                if out.is_empty() {
823
0
                    // This also implies the buffer is _not_ empty and we will not enter any
824
0
                    // decoding loop.
825
0
                    status = Ok(LzwStatus::NoProgress);
826
161k
                } else {
827
161k
                    out.copy_from_slice(&remain[..out.len()]);
828
161k
                    self.buffer.consume(out.len());
829
161k
                    out = &mut [];
830
161k
                }
831
298k
            } else if remain.is_empty() {
832
220k
                status = Ok(LzwStatus::NoProgress);
833
220k
                have_yet_to_decode_data = true;
834
220k
            } else {
835
77.9k
                let consumed = remain.len();
836
77.9k
                out[..consumed].copy_from_slice(remain);
837
77.9k
                self.buffer.consume(consumed);
838
77.9k
                out = &mut out[consumed..];
839
77.9k
                have_yet_to_decode_data = false;
840
77.9k
            }
841
7.36k
        }
842
843
        // A special reference to out slice which holds the last decoded symbol.
844
467k
        let mut last_decoded: Option<&[u8]> = None;
845
467k
846
467k
        if self.buffer.buffer().is_empty() {
847
            // Hot loop that writes data to the output as long as we can do so directly from the
848
            // input stream. As an invariant of this block we did not need to use the buffer to
849
            // store a decoded code word. Testing the condition ahead of time avoids a test in the
850
            // loop body since every code path where the buffer is filled already breaks.
851
            //
852
            // In a previous iteration of the code we trusted compiler optimization to work this
853
            // out but it seems that it does not. Another edit hidden behind some performance work
854
            // then edited out the check, inadvertently changing the behavior for callers that
855
            // relied on being able to provide an empty output buffer and still receiving a useful
856
            // signal about the state of the stream.
857
858
            // A burst is a sequence of code words that are independently decoded, i.e. they do not
859
            // change the state of the decoder in ways that would influence the interpretation of
860
            // each other. That is: they are not special symbols, they do not make us increase the
861
            // code size, they are each codes already in the tree before the burst.
862
            //
863
            // The tracking state for a burst. These are actually initialized later but compiler
864
            // wasn't smart enough to fully optimize out the init code so that appears outside the
865
            // loop.
866
306k
            let mut burst = [0; BURST];
867
306k
            let mut burst_byte_len = [0u16; BURST];
868
306k
            let mut burst_byte = [0u8; BURST];
869
306k
            let mut target: [&mut [u8]; BURST] = Default::default();
870
871
            loop {
872
                // In particular, we *also* break if the output buffer is still empty. Especially
873
                // when the output parameter was an empty slice, we must try to fetch at least one
874
                // code but with YIELD_ON_FULL we do not.
875
3.50M
                if CgC::YIELD_ON_FULL && out.is_empty() {
876
0
                    break;
877
3.50M
                }
878
879
3.50M
                let mut deriv = match code_link.take() {
880
3.49M
                    Some(link) => link,
881
                    None => {
882
                        // TODO: we do not need to break here. This does not indicate that the buffer
883
                        // has been filled, rather it indicates we have reset the state. The next code
884
                        // should be part of the initial alphabet. However the first code is special in
885
                        // the sense of not creating a new code itself. This is handled correctly in
886
                        // the initialization prior to the loop; and in particular that handling as
887
                        // written currently relies on putting it into the buffer; so handling it we
888
                        // would need to ensure that either the buffer is fully cleared after its use,
889
                        // or use another implementation of handling that first code.
890
7.36k
                        break;
891
                    }
892
                };
893
894
                // Ensure the code buffer is full, we're about to request some codes.
895
                // Note that this also ensures at least one code is in the buffer if any input is left.
896
3.49M
                self.refill_bits(&mut inp);
897
3.49M
                let cnt = self.code_buffer.peek_bits(&mut burst);
898
3.49M
899
3.49M
                // No code left in the buffer, and no more bytes to refill the buffer.
900
3.49M
                if cnt == 0 {
901
218k
                    if have_yet_to_decode_data {
902
65.0k
                        status = Ok(LzwStatus::NoProgress);
903
153k
                    }
904
905
218k
                    code_link = Some(deriv);
906
218k
                    break;
907
3.27M
                }
908
3.27M
909
3.27M
                debug_assert!(
910
                    // When the table is full, we have a max code one above the mask.
911
0
                    self.table.is_full()
912
                    // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the
913
                    // computed next_code is 4 which already exceeds the bit width from the start.
914
                    // Then we will immediately switch code size after this code.
915
                    //
916
                    // TODO: this is the reason for some saturating and non-sharp comparisons in
917
                    // the code below. Maybe it makes sense to revisit turning this into a compile
918
                    // time choice?
919
0
                        || (self.code_buffer.code_size() == 2 && self.next_code == 4)
920
0
                        || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code,
921
                );
922
923
3.27M
                let mut burst_size = 0;
924
3.27M
                let left_before_size_switch = (self.code_buffer.max_code()
925
3.27M
                    - Code::from(self.is_tiff))
926
3.27M
                .saturating_sub(self.next_code);
927
928
                // A burst is a sequence of decodes that are completely independent of each other. This
929
                // is the case if neither is an end code, a clear code, or a next code, i.e. we have
930
                // all of them in the decoding table and thus known their depths, and additionally if
931
                // we can decode them directly into the output buffer.
932
4.56M
                for b in &burst[..cnt] {
933
                    // We can commit the previous burst code, and will take a slice from the output
934
                    // buffer. This also avoids the bounds check in the tight loop later.
935
4.56M
                    if burst_size > 0 {
936
1.28M
                        let len = burst_byte_len[burst_size - 1];
937
1.28M
                        let (into, tail) = out.split_at_mut(usize::from(len));
938
1.28M
                        target[burst_size - 1] = into;
939
1.28M
                        out = tail;
940
3.27M
                    }
941
942
                    // Check that we don't overflow the code size with all codes we burst decode.
943
4.56M
                    burst_size += 1;
944
4.56M
945
4.56M
                    if burst_size > usize::from(left_before_size_switch) {
946
2.73M
                        break;
947
1.82M
                    }
948
1.82M
949
1.82M
                    let read_code = *b;
950
1.82M
951
1.82M
                    // A burst code can't be special.
952
1.82M
                    if read_code == self.clear_code
953
1.80M
                        || read_code == self.end_code
954
1.79M
                        || read_code >= self.next_code
955
                    {
956
131k
                        break;
957
1.69M
                    }
958
1.69M
959
1.69M
                    // Read the code length and check that we can decode directly into the out slice.
960
1.69M
                    let len = self.table.depths[usize::from(read_code)];
961
1.69M
962
1.69M
                    if out.len() < usize::from(len) {
963
21.9k
                        break;
964
1.67M
                    }
965
1.67M
966
1.67M
                    // We do exactly one more code (the one being inspected in the current iteration)
967
1.67M
                    // after the 'burst'. When we want to break decoding precisely on the supplied
968
1.67M
                    // buffer, we check if this is the last code to be decoded into it.
969
1.67M
                    if CgC::YIELD_ON_FULL {
970
0
                        if out.len() == usize::from(len) {
971
0
                            break;
972
0
                        }
973
1.67M
                    }
974
975
1.67M
                    burst_byte_len[burst_size - 1] = len;
976
                }
977
978
3.27M
                self.code_buffer.consume_bits(burst_size as u8);
979
3.27M
                have_yet_to_decode_data = false;
980
3.27M
981
3.27M
                // Note that the very last code in the burst buffer doesn't actually belong to the
982
3.27M
                // burst itself. TODO: sometimes it could, we just don't differentiate between the
983
3.27M
                // breaks and a loop end condition above. That may be a speed advantage?
984
3.27M
                let (&new_code, burst) = burst[..burst_size].split_last().unwrap();
985
3.27M
986
3.27M
                // The very tight loop for restoring the actual burst. These can be reconstructed in
987
3.27M
                // parallel since none of them depend on a prior constructed. Only the derivation of
988
3.27M
                // new codes is not parallel. There are no size changes here either.
989
3.27M
                let burst_targets = &mut target[..burst_size - 1];
990
3.27M
991
3.27M
                if !self.table.is_full() {
992
562k
                    self.next_code += burst_targets.len() as u16;
993
2.71M
                }
994
995
1.28M
                for ((&burst, target), byte) in
996
3.27M
                    burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte)
997
1.28M
                {
998
1.28M
                    *byte = self.table.reconstruct(burst, target);
999
1.28M
                }
1000
1001
3.27M
                self.table.derive_burst(&mut deriv, burst, &burst_byte[..]);
1002
3.27M
1003
3.27M
                // Now handle the special codes.
1004
3.27M
                if new_code == self.clear_code {
1005
25.1k
                    self.reset_tables();
1006
25.1k
                    last_decoded = None;
1007
25.1k
                    // Restarts in the next call to the entry point.
1008
25.1k
                    break;
1009
3.24M
                }
1010
3.24M
1011
3.24M
                if new_code == self.end_code {
1012
303
                    self.has_ended = true;
1013
303
                    status = Ok(LzwStatus::Done);
1014
303
                    last_decoded = None;
1015
303
                    break;
1016
3.24M
                }
1017
3.24M
1018
3.24M
                if new_code > self.next_code {
1019
307
                    status = Err(LzwError::InvalidCode);
1020
307
                    last_decoded = None;
1021
307
                    break;
1022
3.24M
                }
1023
1024
3.24M
                let required_len = if new_code == self.next_code {
1025
99.4k
                    self.table.depths[usize::from(deriv.code)] + 1
1026
                } else {
1027
3.14M
                    self.table.depths[usize::from(new_code)]
1028
                };
1029
1030
                // We need the decoded data of the new code if it is the `next_code`. This is the
1031
                // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In
1032
                // all other cases we only need the first character of the decoded data.
1033
3.24M
                let have_next_code = new_code == self.next_code;
1034
3.24M
1035
3.24M
                // Update the slice holding the last decoded word.
1036
3.24M
                if have_next_code {
1037
                    // If we did not have any burst code, we still hold that slice in the buffer.
1038
99.4k
                    if let Some(new_last) = target[..burst_size - 1].last_mut() {
1039
11.6k
                        let slice = core::mem::replace(new_last, &mut []);
1040
11.6k
                        last_decoded = Some(&*slice);
1041
87.7k
                    }
1042
3.14M
                }
1043
1044
                let cha;
1045
3.24M
                let is_in_buffer = usize::from(required_len) > out.len();
1046
3.24M
                // Check if we will need to store our current state into the buffer.
1047
3.24M
                if is_in_buffer {
1048
51.4k
                    if have_next_code {
1049
                        // last_decoded will be Some if we have restored any code into the out slice.
1050
                        // Otherwise it will still be present in the buffer.
1051
26.7k
                        if let Some(last) = last_decoded.take() {
1052
9.28k
                            self.buffer.bytes[..last.len()].copy_from_slice(last);
1053
9.28k
                            self.buffer.write_mark = last.len();
1054
9.28k
                            self.buffer.read_mark = last.len();
1055
17.4k
                        }
1056
1057
26.7k
                        cha = self.buffer.fill_cscsc();
1058
24.6k
                    } else {
1059
24.6k
                        // Restore the decoded word into the buffer.
1060
24.6k
                        last_decoded = None;
1061
24.6k
                        cha = self.buffer.fill_reconstruct(&self.table, new_code);
1062
24.6k
                    }
1063
                } else {
1064
3.19M
                    let (target, tail) = out.split_at_mut(usize::from(required_len));
1065
3.19M
                    out = tail;
1066
3.19M
1067
3.19M
                    if have_next_code {
1068
                        // Reconstruct high.
1069
72.6k
                        let source = match last_decoded.take() {
1070
61.6k
                            Some(last) => last,
1071
10.9k
                            None => &self.buffer.bytes[..self.buffer.write_mark],
1072
                        };
1073
1074
                        // We don't *actually* expect the unwrap to happen. Each source is at least 1
1075
                        // byte long. But llvm doesn't know this (too much indirect loads and cases).
1076
72.6k
                        cha = source.get(0).map(|x| *x).unwrap_or(0);
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance::{closure#0}
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance::{closure#0}
Line
Count
Source
1076
53.0k
                        cha = source.get(0).map(|x| *x).unwrap_or(0);
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance::{closure#0}
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance::{closure#0}
Line
Count
Source
1076
19.5k
                        cha = source.get(0).map(|x| *x).unwrap_or(0);
1077
72.6k
                        target[..source.len()].copy_from_slice(source);
1078
72.6k
                        target[source.len()..][0] = cha;
1079
3.12M
                    } else {
1080
3.12M
                        cha = self.table.reconstruct(new_code, target);
1081
3.12M
                    }
1082
1083
                    // A new decoded word.
1084
3.19M
                    last_decoded = Some(target);
1085
                }
1086
1087
                // Each newly read code creates one new code/link based on the preceding code if we
1088
                // have enough space to put it there.
1089
3.24M
                if !self.table.is_full() {
1090
536k
                    self.table.derive(&deriv, cha);
1091
536k
1092
536k
                    if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff)
1093
23.2k
                        && self.code_buffer.code_size() < MAX_CODESIZE
1094
22.8k
                    {
1095
22.8k
                        self.bump_code_size();
1096
514k
                    }
1097
1098
536k
                    self.next_code += 1;
1099
2.71M
                }
1100
1101
                // store the information on the decoded word.
1102
3.24M
                code_link = Some(DerivationBase {
1103
3.24M
                    code: new_code,
1104
3.24M
                    first: cha,
1105
3.24M
                });
1106
3.24M
1107
3.24M
                // Can't make any more progress with decoding.
1108
3.24M
                //
1109
3.24M
                // We have more data buffered but not enough space to put it? We want fetch a next
1110
3.24M
                // symbol if possible as in the case of it being a new symbol we can refer to the
1111
3.24M
                // buffered output as the source for that symbol's meaning and do a memcpy.
1112
3.24M
                //
1113
3.24M
                // Since this test is after decoding at least one code, we can now check for an
1114
3.24M
                // empty buffer and still guarantee progress when one was passed as a parameter.
1115
3.24M
                if is_in_buffer || out.is_empty() {
1116
54.2k
                    break;
1117
3.19M
                }
1118
            }
1119
161k
        }
1120
1121
        // We need to store the last word into the buffer in case the first code in the next
1122
        // iteration is the next_code.
1123
467k
        if let Some(tail) = last_decoded {
1124
156k
            self.buffer.bytes[..tail.len()].copy_from_slice(tail);
1125
156k
            self.buffer.write_mark = tail.len();
1126
156k
            // Mark the full buffer as having been consumed.
1127
156k
            self.buffer.read_mark = tail.len();
1128
311k
        }
1129
1130
        // Ensure we don't indicate that no progress was made if we read some bytes from the input
1131
        // (which is progress).
1132
467k
        if o_in > inp.len() {
1133
295k
            if let Ok(LzwStatus::NoProgress) = status {
1134
220k
                status = Ok(LzwStatus::Ok);
1135
220k
            }
1136
171k
        }
1137
1138
        // Store the code/link state.
1139
467k
        self.last = code_link;
1140
467k
1141
467k
        BufferResult {
1142
467k
            consumed_in: o_in.wrapping_sub(inp.len()),
1143
467k
            consumed_out: o_out.wrapping_sub(out.len()),
1144
467k
            status,
1145
467k
        }
1146
467k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance
Line
Count
Source
708
399k
    fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
709
399k
        // Skip everything if there is nothing to do.
710
399k
        if self.has_ended {
711
4
            return BufferResult {
712
4
                consumed_in: 0,
713
4
                consumed_out: 0,
714
4
                status: Ok(LzwStatus::Done),
715
4
            };
716
399k
        }
717
399k
718
399k
        // Rough description:
719
399k
        // We will fill the output slice as much as possible until either there is no more symbols
720
399k
        // to decode or an end code has been reached. This requires an internal buffer to hold a
721
399k
        // potential tail of the word corresponding to the last symbol. This tail will then be
722
399k
        // decoded first before continuing with the regular decoding. The same buffer is required
723
399k
        // to persist some symbol state across calls.
724
399k
        //
725
399k
        // We store the words corresponding to code symbols in an index chain, bytewise, where we
726
399k
        // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain
727
399k
        // is traversed for each symbol when it is decoded and bytes are placed directly into the
728
399k
        // output slice. In the special case (new_code == next_code) we use an existing decoded
729
399k
        // version that is present in either the out bytes of this call or in buffer to copy the
730
399k
        // repeated prefix slice.
731
399k
        // TODO: I played with a 'decoding cache' to remember the position of long symbols and
732
399k
        // avoid traversing the chain, doing a copy of memory instead. It did however not lead to
733
399k
        // a serious improvement. It's just unlikely to both have a long symbol and have that
734
399k
        // repeated twice in the same output buffer.
735
399k
        //
736
399k
        // You will also find the (to my knowledge novel) concept of a _decoding burst_ which
737
399k
        // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order
738
399k
        // execution as much as possible and for this reason have the least possible stress on
739
399k
        // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but
740
399k
        // only for re-used codes, not novel ones. This lookahead also makes the loop termination
741
399k
        // when restoring each byte of the code word perfectly predictable! So a burst is a chunk
742
399k
        // of code words which are all independent of each other, have known lengths _and_ are
743
399k
        // guaranteed to fit into the out slice without requiring a buffer. One burst can be
744
399k
        // decoded in an extremely tight loop.
745
399k
        //
746
399k
        // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid
747
399k
        // that intermediate buffer at the expense of not always filling the output buffer
748
399k
        // completely. Alternatively we might follow its chain of precursor states twice. This may
749
399k
        // be even cheaper if we store more than one byte per link so it really should be
750
399k
        // evaluated.
751
399k
        // TODO: if the caller was required to provide the previous last word we could also avoid
752
399k
        // the buffer for cases where we need it to restore the next code! This could be built
753
399k
        // backwards compatible by only doing it after an opt-in call that enables the behaviour.
754
399k
755
399k
        // Record initial lengths for the result that is returned.
756
399k
        let o_in = inp.len();
757
399k
        let o_out = out.len();
758
399k
759
399k
        // The code_link is the previously decoded symbol.
760
399k
        // It's used to link the new code back to its predecessor.
761
399k
        let mut code_link = None;
762
399k
        // The status, which is written to on an invalid code.
763
399k
        let mut status = Ok(LzwStatus::Ok);
764
399k
765
399k
        match self.last.take() {
766
            // No last state? This is the first code after a reset?
767
            None => {
768
23.2k
                match self.next_symbol(&mut inp) {
769
                    // Plainly invalid code.
770
22.9k
                    Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode),
771
                    // next_code would require an actual predecessor.
772
22.9k
                    Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode),
773
                    // No more symbols available and nothing decoded yet.
774
                    // Assume that we didn't make progress, this may get reset to Done if we read
775
                    // some bytes from the input.
776
326
                    None => status = Ok(LzwStatus::NoProgress),
777
                    // Handle a valid code.
778
22.9k
                    Some(init_code) => {
779
22.9k
                        if init_code == self.clear_code {
780
6.52k
                            self.init_tables();
781
16.4k
                        } else if init_code == self.end_code {
782
13
                            self.has_ended = true;
783
13
                            status = Ok(LzwStatus::Done);
784
16.4k
                        } else if self.table.is_empty() {
785
815
                            if self.implicit_reset {
786
815
                                self.init_tables();
787
815
788
815
                                self.buffer.fill_reconstruct(&self.table, init_code);
789
815
                                let link = self.table.at(init_code).clone();
790
815
                                code_link = Some(DerivationBase {
791
815
                                    code: init_code,
792
815
                                    first: link.first,
793
815
                                });
794
815
                            } else {
795
0
                                // We require an explicit reset.
796
0
                                status = Err(LzwError::InvalidCode);
797
0
                            }
798
15.5k
                        } else {
799
15.5k
                            // Reconstruct the first code in the buffer.
800
15.5k
                            self.buffer.fill_reconstruct(&self.table, init_code);
801
15.5k
                            let link = self.table.at(init_code).clone();
802
15.5k
                            code_link = Some(DerivationBase {
803
15.5k
                                code: init_code,
804
15.5k
                                first: link.first,
805
15.5k
                            });
806
15.5k
                        }
807
                    }
808
                }
809
            }
810
            // Move the tracking state to the stack.
811
376k
            Some(tup) => code_link = Some(tup),
812
        };
813
814
        // Track an empty `burst` (see below) means we made no progress.
815
399k
        let mut have_yet_to_decode_data = false;
816
399k
817
399k
        // Restore the previous state, if any.
818
399k
        if code_link.is_some() {
819
392k
            let remain = self.buffer.buffer();
820
392k
            // Check if we can fully finish the buffer.
821
392k
            if remain.len() > out.len() {
822
134k
                if out.is_empty() {
823
0
                    // This also implies the buffer is _not_ empty and we will not enter any
824
0
                    // decoding loop.
825
0
                    status = Ok(LzwStatus::NoProgress);
826
134k
                } else {
827
134k
                    out.copy_from_slice(&remain[..out.len()]);
828
134k
                    self.buffer.consume(out.len());
829
134k
                    out = &mut [];
830
134k
                }
831
257k
            } else if remain.is_empty() {
832
219k
                status = Ok(LzwStatus::NoProgress);
833
219k
                have_yet_to_decode_data = true;
834
219k
            } else {
835
38.3k
                let consumed = remain.len();
836
38.3k
                out[..consumed].copy_from_slice(remain);
837
38.3k
                self.buffer.consume(consumed);
838
38.3k
                out = &mut out[consumed..];
839
38.3k
                have_yet_to_decode_data = false;
840
38.3k
            }
841
6.88k
        }
842
843
        // A special reference to out slice which holds the last decoded symbol.
844
399k
        let mut last_decoded: Option<&[u8]> = None;
845
399k
846
399k
        if self.buffer.buffer().is_empty() {
847
            // Hot loop that writes data to the output as long as we can do so directly from the
848
            // input stream. As an invariant of this block we did not need to use the buffer to
849
            // store a decoded code word. Testing the condition ahead of time avoids a test in the
850
            // loop body since every code path where the buffer is filled already breaks.
851
            //
852
            // In a previous iteration of the code we trusted compiler optimization to work this
853
            // out but it seems that it does not. Another edit hidden behind some performance work
854
            // then edited out the check, inadvertently changing the behavior for callers that
855
            // relied on being able to provide an empty output buffer and still receiving a useful
856
            // signal about the state of the stream.
857
858
            // A burst is a sequence of code words that are independently decoded, i.e. they do not
859
            // change the state of the decoder in ways that would influence the interpretation of
860
            // each other. That is: they are not special symbols, they do not make us increase the
861
            // code size, they are each codes already in the tree before the burst.
862
            //
863
            // The tracking state for a burst. These are actually initialized later but compiler
864
            // wasn't smart enough to fully optimize out the init code so that appears outside the
865
            // loop.
866
264k
            let mut burst = [0; BURST];
867
264k
            let mut burst_byte_len = [0u16; BURST];
868
264k
            let mut burst_byte = [0u8; BURST];
869
264k
            let mut target: [&mut [u8]; BURST] = Default::default();
870
871
            loop {
872
                // In particular, we *also* break if the output buffer is still empty. Especially
873
                // when the output parameter was an empty slice, we must try to fetch at least one
874
                // code but with YIELD_ON_FULL we do not.
875
791k
                if CgC::YIELD_ON_FULL && out.is_empty() {
876
0
                    break;
877
791k
                }
878
879
791k
                let mut deriv = match code_link.take() {
880
784k
                    Some(link) => link,
881
                    None => {
882
                        // TODO: we do not need to break here. This does not indicate that the buffer
883
                        // has been filled, rather it indicates we have reset the state. The next code
884
                        // should be part of the initial alphabet. However the first code is special in
885
                        // the sense of not creating a new code itself. This is handled correctly in
886
                        // the initialization prior to the loop; and in particular that handling as
887
                        // written currently relies on putting it into the buffer; so handling it we
888
                        // would need to ensure that either the buffer is fully cleared after its use,
889
                        // or use another implementation of handling that first code.
890
6.88k
                        break;
891
                    }
892
                };
893
894
                // Ensure the code buffer is full, we're about to request some codes.
895
                // Note that this also ensures at least one code is in the buffer if any input is left.
896
784k
                self.refill_bits(&mut inp);
897
784k
                let cnt = self.code_buffer.peek_bits(&mut burst);
898
784k
899
784k
                // No code left in the buffer, and no more bytes to refill the buffer.
900
784k
                if cnt == 0 {
901
218k
                    if have_yet_to_decode_data {
902
64.9k
                        status = Ok(LzwStatus::NoProgress);
903
153k
                    }
904
905
218k
                    code_link = Some(deriv);
906
218k
                    break;
907
565k
                }
908
565k
909
565k
                debug_assert!(
910
                    // When the table is full, we have a max code one above the mask.
911
0
                    self.table.is_full()
912
                    // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the
913
                    // computed next_code is 4 which already exceeds the bit width from the start.
914
                    // Then we will immediately switch code size after this code.
915
                    //
916
                    // TODO: this is the reason for some saturating and non-sharp comparisons in
917
                    // the code below. Maybe it makes sense to revisit turning this into a compile
918
                    // time choice?
919
0
                        || (self.code_buffer.code_size() == 2 && self.next_code == 4)
920
0
                        || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code,
921
                );
922
923
565k
                let mut burst_size = 0;
924
565k
                let left_before_size_switch = (self.code_buffer.max_code()
925
565k
                    - Code::from(self.is_tiff))
926
565k
                .saturating_sub(self.next_code);
927
928
                // A burst is a sequence of decodes that are completely independent of each other. This
929
                // is the case if neither is an end code, a clear code, or a next code, i.e. we have
930
                // all of them in the decoding table and thus known their depths, and additionally if
931
                // we can decode them directly into the output buffer.
932
969k
                for b in &burst[..cnt] {
933
                    // We can commit the previous burst code, and will take a slice from the output
934
                    // buffer. This also avoids the bounds check in the tight loop later.
935
969k
                    if burst_size > 0 {
936
403k
                        let len = burst_byte_len[burst_size - 1];
937
403k
                        let (into, tail) = out.split_at_mut(usize::from(len));
938
403k
                        target[burst_size - 1] = into;
939
403k
                        out = tail;
940
565k
                    }
941
942
                    // Check that we don't overflow the code size with all codes we burst decode.
943
969k
                    burst_size += 1;
944
969k
945
969k
                    if burst_size > usize::from(left_before_size_switch) {
946
274k
                        break;
947
694k
                    }
948
694k
949
694k
                    let read_code = *b;
950
694k
951
694k
                    // A burst code can't be special.
952
694k
                    if read_code == self.clear_code
953
680k
                        || read_code == self.end_code
954
680k
                        || read_code >= self.next_code
955
                    {
956
77.8k
                        break;
957
616k
                    }
958
616k
959
616k
                    // Read the code length and check that we can decode directly into the out slice.
960
616k
                    let len = self.table.depths[usize::from(read_code)];
961
616k
962
616k
                    if out.len() < usize::from(len) {
963
13.5k
                        break;
964
603k
                    }
965
603k
966
603k
                    // We do exactly one more code (the one being inspected in the current iteration)
967
603k
                    // after the 'burst'. When we want to break decoding precisely on the supplied
968
603k
                    // buffer, we check if this is the last code to be decoded into it.
969
603k
                    if CgC::YIELD_ON_FULL {
970
0
                        if out.len() == usize::from(len) {
971
0
                            break;
972
0
                        }
973
603k
                    }
974
975
603k
                    burst_byte_len[burst_size - 1] = len;
976
                }
977
978
565k
                self.code_buffer.consume_bits(burst_size as u8);
979
565k
                have_yet_to_decode_data = false;
980
565k
981
565k
                // Note that the very last code in the burst buffer doesn't actually belong to the
982
565k
                // burst itself. TODO: sometimes it could, we just don't differentiate between the
983
565k
                // breaks and a loop end condition above. That may be a speed advantage?
984
565k
                let (&new_code, burst) = burst[..burst_size].split_last().unwrap();
985
565k
986
565k
                // The very tight loop for restoring the actual burst. These can be reconstructed in
987
565k
                // parallel since none of them depend on a prior constructed. Only the derivation of
988
565k
                // new codes is not parallel. There are no size changes here either.
989
565k
                let burst_targets = &mut target[..burst_size - 1];
990
565k
991
565k
                if !self.table.is_full() {
992
312k
                    self.next_code += burst_targets.len() as u16;
993
312k
                }
994
995
403k
                for ((&burst, target), byte) in
996
565k
                    burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte)
997
403k
                {
998
403k
                    *byte = self.table.reconstruct(burst, target);
999
403k
                }
1000
1001
565k
                self.table.derive_burst(&mut deriv, burst, &burst_byte[..]);
1002
565k
1003
565k
                // Now handle the special codes.
1004
565k
                if new_code == self.clear_code {
1005
15.3k
                    self.reset_tables();
1006
15.3k
                    last_decoded = None;
1007
15.3k
                    // Restarts in the next call to the entry point.
1008
15.3k
                    break;
1009
550k
                }
1010
550k
1011
550k
                if new_code == self.end_code {
1012
43
                    self.has_ended = true;
1013
43
                    status = Ok(LzwStatus::Done);
1014
43
                    last_decoded = None;
1015
43
                    break;
1016
550k
                }
1017
550k
1018
550k
                if new_code > self.next_code {
1019
179
                    status = Err(LzwError::InvalidCode);
1020
179
                    last_decoded = None;
1021
179
                    break;
1022
550k
                }
1023
1024
550k
                let required_len = if new_code == self.next_code {
1025
59.8k
                    self.table.depths[usize::from(deriv.code)] + 1
1026
                } else {
1027
490k
                    self.table.depths[usize::from(new_code)]
1028
                };
1029
1030
                // We need the decoded data of the new code if it is the `next_code`. This is the
1031
                // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In
1032
                // all other cases we only need the first character of the decoded data.
1033
550k
                let have_next_code = new_code == self.next_code;
1034
550k
1035
550k
                // Update the slice holding the last decoded word.
1036
550k
                if have_next_code {
1037
                    // If we did not have any burst code, we still hold that slice in the buffer.
1038
59.8k
                    if let Some(new_last) = target[..burst_size - 1].last_mut() {
1039
8.62k
                        let slice = core::mem::replace(new_last, &mut []);
1040
8.62k
                        last_decoded = Some(&*slice);
1041
51.2k
                    }
1042
490k
                }
1043
1044
                let cha;
1045
550k
                let is_in_buffer = usize::from(required_len) > out.len();
1046
550k
                // Check if we will need to store our current state into the buffer.
1047
550k
                if is_in_buffer {
1048
22.1k
                    if have_next_code {
1049
                        // last_decoded will be Some if we have restored any code into the out slice.
1050
                        // Otherwise it will still be present in the buffer.
1051
6.82k
                        if let Some(last) = last_decoded.take() {
1052
3.57k
                            self.buffer.bytes[..last.len()].copy_from_slice(last);
1053
3.57k
                            self.buffer.write_mark = last.len();
1054
3.57k
                            self.buffer.read_mark = last.len();
1055
3.57k
                        }
1056
1057
6.82k
                        cha = self.buffer.fill_cscsc();
1058
15.2k
                    } else {
1059
15.2k
                        // Restore the decoded word into the buffer.
1060
15.2k
                        last_decoded = None;
1061
15.2k
                        cha = self.buffer.fill_reconstruct(&self.table, new_code);
1062
15.2k
                    }
1063
                } else {
1064
527k
                    let (target, tail) = out.split_at_mut(usize::from(required_len));
1065
527k
                    out = tail;
1066
527k
1067
527k
                    if have_next_code {
1068
                        // Reconstruct high.
1069
53.0k
                        let source = match last_decoded.take() {
1070
48.7k
                            Some(last) => last,
1071
4.29k
                            None => &self.buffer.bytes[..self.buffer.write_mark],
1072
                        };
1073
1074
                        // We don't *actually* expect the unwrap to happen. Each source is at least 1
1075
                        // byte long. But llvm doesn't know this (too much indirect loads and cases).
1076
53.0k
                        cha = source.get(0).map(|x| *x).unwrap_or(0);
1077
53.0k
                        target[..source.len()].copy_from_slice(source);
1078
53.0k
                        target[source.len()..][0] = cha;
1079
474k
                    } else {
1080
474k
                        cha = self.table.reconstruct(new_code, target);
1081
474k
                    }
1082
1083
                    // A new decoded word.
1084
527k
                    last_decoded = Some(target);
1085
                }
1086
1087
                // Each newly read code creates one new code/link based on the preceding code if we
1088
                // have enough space to put it there.
1089
550k
                if !self.table.is_full() {
1090
297k
                    self.table.derive(&deriv, cha);
1091
297k
1092
297k
                    if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff)
1093
21.2k
                        && self.code_buffer.code_size() < MAX_CODESIZE
1094
21.0k
                    {
1095
21.0k
                        self.bump_code_size();
1096
276k
                    }
1097
1098
297k
                    self.next_code += 1;
1099
252k
                }
1100
1101
                // store the information on the decoded word.
1102
550k
                code_link = Some(DerivationBase {
1103
550k
                    code: new_code,
1104
550k
                    first: cha,
1105
550k
                });
1106
550k
1107
550k
                // Can't make any more progress with decoding.
1108
550k
                //
1109
550k
                // We have more data buffered but not enough space to put it? We want fetch a next
1110
550k
                // symbol if possible as in the case of it being a new symbol we can refer to the
1111
550k
                // buffered output as the source for that symbol's meaning and do a memcpy.
1112
550k
                //
1113
550k
                // Since this test is after decoding at least one code, we can now check for an
1114
550k
                // empty buffer and still guarantee progress when one was passed as a parameter.
1115
550k
                if is_in_buffer || out.is_empty() {
1116
23.9k
                    break;
1117
526k
                }
1118
            }
1119
134k
        }
1120
1121
        // We need to store the last word into the buffer in case the first code in the next
1122
        // iteration is the next_code.
1123
399k
        if let Some(tail) = last_decoded {
1124
155k
            self.buffer.bytes[..tail.len()].copy_from_slice(tail);
1125
155k
            self.buffer.write_mark = tail.len();
1126
155k
            // Mark the full buffer as having been consumed.
1127
155k
            self.buffer.read_mark = tail.len();
1128
244k
        }
1129
1130
        // Ensure we don't indicate that no progress was made if we read some bytes from the input
1131
        // (which is progress).
1132
399k
        if o_in > inp.len() {
1133
255k
            if let Ok(LzwStatus::NoProgress) = status {
1134
219k
                status = Ok(LzwStatus::Ok);
1135
219k
            }
1136
143k
        }
1137
1138
        // Store the code/link state.
1139
399k
        self.last = code_link;
1140
399k
1141
399k
        BufferResult {
1142
399k
            consumed_in: o_in.wrapping_sub(inp.len()),
1143
399k
            consumed_out: o_out.wrapping_sub(out.len()),
1144
399k
            status,
1145
399k
        }
1146
399k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull> as weezl::decode::Stateful>::advance
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield> as weezl::decode::Stateful>::advance
Line
Count
Source
708
68.2k
    fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
709
68.2k
        // Skip everything if there is nothing to do.
710
68.2k
        if self.has_ended {
711
24
            return BufferResult {
712
24
                consumed_in: 0,
713
24
                consumed_out: 0,
714
24
                status: Ok(LzwStatus::Done),
715
24
            };
716
68.2k
        }
717
68.2k
718
68.2k
        // Rough description:
719
68.2k
        // We will fill the output slice as much as possible until either there is no more symbols
720
68.2k
        // to decode or an end code has been reached. This requires an internal buffer to hold a
721
68.2k
        // potential tail of the word corresponding to the last symbol. This tail will then be
722
68.2k
        // decoded first before continuing with the regular decoding. The same buffer is required
723
68.2k
        // to persist some symbol state across calls.
724
68.2k
        //
725
68.2k
        // We store the words corresponding to code symbols in an index chain, bytewise, where we
726
68.2k
        // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain
727
68.2k
        // is traversed for each symbol when it is decoded and bytes are placed directly into the
728
68.2k
        // output slice. In the special case (new_code == next_code) we use an existing decoded
729
68.2k
        // version that is present in either the out bytes of this call or in buffer to copy the
730
68.2k
        // repeated prefix slice.
731
68.2k
        // TODO: I played with a 'decoding cache' to remember the position of long symbols and
732
68.2k
        // avoid traversing the chain, doing a copy of memory instead. It did however not lead to
733
68.2k
        // a serious improvement. It's just unlikely to both have a long symbol and have that
734
68.2k
        // repeated twice in the same output buffer.
735
68.2k
        //
736
68.2k
        // You will also find the (to my knowledge novel) concept of a _decoding burst_ which
737
68.2k
        // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order
738
68.2k
        // execution as much as possible and for this reason have the least possible stress on
739
68.2k
        // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but
740
68.2k
        // only for re-used codes, not novel ones. This lookahead also makes the loop termination
741
68.2k
        // when restoring each byte of the code word perfectly predictable! So a burst is a chunk
742
68.2k
        // of code words which are all independent of each other, have known lengths _and_ are
743
68.2k
        // guaranteed to fit into the out slice without requiring a buffer. One burst can be
744
68.2k
        // decoded in an extremely tight loop.
745
68.2k
        //
746
68.2k
        // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid
747
68.2k
        // that intermediate buffer at the expense of not always filling the output buffer
748
68.2k
        // completely. Alternatively we might follow its chain of precursor states twice. This may
749
68.2k
        // be even cheaper if we store more than one byte per link so it really should be
750
68.2k
        // evaluated.
751
68.2k
        // TODO: if the caller was required to provide the previous last word we could also avoid
752
68.2k
        // the buffer for cases where we need it to restore the next code! This could be built
753
68.2k
        // backwards compatible by only doing it after an opt-in call that enables the behaviour.
754
68.2k
755
68.2k
        // Record initial lengths for the result that is returned.
756
68.2k
        let o_in = inp.len();
757
68.2k
        let o_out = out.len();
758
68.2k
759
68.2k
        // The code_link is the previously decoded symbol.
760
68.2k
        // It's used to link the new code back to its predecessor.
761
68.2k
        let mut code_link = None;
762
68.2k
        // The status, which is written to on an invalid code.
763
68.2k
        let mut status = Ok(LzwStatus::Ok);
764
68.2k
765
68.2k
        match self.last.take() {
766
            // No last state? This is the first code after a reset?
767
            None => {
768
10.8k
                match self.next_symbol(&mut inp) {
769
                    // Plainly invalid code.
770
10.8k
                    Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode),
771
                    // next_code would require an actual predecessor.
772
10.8k
                    Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode),
773
                    // No more symbols available and nothing decoded yet.
774
                    // Assume that we didn't make progress, this may get reset to Done if we read
775
                    // some bytes from the input.
776
6
                    None => status = Ok(LzwStatus::NoProgress),
777
                    // Handle a valid code.
778
10.8k
                    Some(init_code) => {
779
10.8k
                        if init_code == self.clear_code {
780
474
                            self.init_tables();
781
10.3k
                        } else if init_code == self.end_code {
782
0
                            self.has_ended = true;
783
0
                            status = Ok(LzwStatus::Done);
784
10.3k
                        } else if self.table.is_empty() {
785
142
                            if self.implicit_reset {
786
142
                                self.init_tables();
787
142
788
142
                                self.buffer.fill_reconstruct(&self.table, init_code);
789
142
                                let link = self.table.at(init_code).clone();
790
142
                                code_link = Some(DerivationBase {
791
142
                                    code: init_code,
792
142
                                    first: link.first,
793
142
                                });
794
142
                            } else {
795
0
                                // We require an explicit reset.
796
0
                                status = Err(LzwError::InvalidCode);
797
0
                            }
798
10.2k
                        } else {
799
10.2k
                            // Reconstruct the first code in the buffer.
800
10.2k
                            self.buffer.fill_reconstruct(&self.table, init_code);
801
10.2k
                            let link = self.table.at(init_code).clone();
802
10.2k
                            code_link = Some(DerivationBase {
803
10.2k
                                code: init_code,
804
10.2k
                                first: link.first,
805
10.2k
                            });
806
10.2k
                        }
807
                    }
808
                }
809
            }
810
            // Move the tracking state to the stack.
811
57.3k
            Some(tup) => code_link = Some(tup),
812
        };
813
814
        // Track an empty `burst` (see below) means we made no progress.
815
68.2k
        let mut have_yet_to_decode_data = false;
816
68.2k
817
68.2k
        // Restore the previous state, if any.
818
68.2k
        if code_link.is_some() {
819
67.7k
            let remain = self.buffer.buffer();
820
67.7k
            // Check if we can fully finish the buffer.
821
67.7k
            if remain.len() > out.len() {
822
26.9k
                if out.is_empty() {
823
0
                    // This also implies the buffer is _not_ empty and we will not enter any
824
0
                    // decoding loop.
825
0
                    status = Ok(LzwStatus::NoProgress);
826
26.9k
                } else {
827
26.9k
                    out.copy_from_slice(&remain[..out.len()]);
828
26.9k
                    self.buffer.consume(out.len());
829
26.9k
                    out = &mut [];
830
26.9k
                }
831
40.8k
            } else if remain.is_empty() {
832
1.23k
                status = Ok(LzwStatus::NoProgress);
833
1.23k
                have_yet_to_decode_data = true;
834
39.5k
            } else {
835
39.5k
                let consumed = remain.len();
836
39.5k
                out[..consumed].copy_from_slice(remain);
837
39.5k
                self.buffer.consume(consumed);
838
39.5k
                out = &mut out[consumed..];
839
39.5k
                have_yet_to_decode_data = false;
840
39.5k
            }
841
483
        }
842
843
        // A special reference to out slice which holds the last decoded symbol.
844
68.2k
        let mut last_decoded: Option<&[u8]> = None;
845
68.2k
846
68.2k
        if self.buffer.buffer().is_empty() {
847
            // Hot loop that writes data to the output as long as we can do so directly from the
848
            // input stream. As an invariant of this block we did not need to use the buffer to
849
            // store a decoded code word. Testing the condition ahead of time avoids a test in the
850
            // loop body since every code path where the buffer is filled already breaks.
851
            //
852
            // In a previous iteration of the code we trusted compiler optimization to work this
853
            // out but it seems that it does not. Another edit hidden behind some performance work
854
            // then edited out the check, inadvertently changing the behavior for callers that
855
            // relied on being able to provide an empty output buffer and still receiving a useful
856
            // signal about the state of the stream.
857
858
            // A burst is a sequence of code words that are independently decoded, i.e. they do not
859
            // change the state of the decoder in ways that would influence the interpretation of
860
            // each other. That is: they are not special symbols, they do not make us increase the
861
            // code size, they are each codes already in the tree before the burst.
862
            //
863
            // The tracking state for a burst. These are actually initialized later but compiler
864
            // wasn't smart enough to fully optimize out the init code so that appears outside the
865
            // loop.
866
41.3k
            let mut burst = [0; BURST];
867
41.3k
            let mut burst_byte_len = [0u16; BURST];
868
41.3k
            let mut burst_byte = [0u8; BURST];
869
41.3k
            let mut target: [&mut [u8]; BURST] = Default::default();
870
871
            loop {
872
                // In particular, we *also* break if the output buffer is still empty. Especially
873
                // when the output parameter was an empty slice, we must try to fetch at least one
874
                // code but with YIELD_ON_FULL we do not.
875
2.70M
                if CgC::YIELD_ON_FULL && out.is_empty() {
876
0
                    break;
877
2.70M
                }
878
879
2.70M
                let mut deriv = match code_link.take() {
880
2.70M
                    Some(link) => link,
881
                    None => {
882
                        // TODO: we do not need to break here. This does not indicate that the buffer
883
                        // has been filled, rather it indicates we have reset the state. The next code
884
                        // should be part of the initial alphabet. However the first code is special in
885
                        // the sense of not creating a new code itself. This is handled correctly in
886
                        // the initialization prior to the loop; and in particular that handling as
887
                        // written currently relies on putting it into the buffer; so handling it we
888
                        // would need to ensure that either the buffer is fully cleared after its use,
889
                        // or use another implementation of handling that first code.
890
483
                        break;
891
                    }
892
                };
893
894
                // Ensure the code buffer is full, we're about to request some codes.
895
                // Note that this also ensures at least one code is in the buffer if any input is left.
896
2.70M
                self.refill_bits(&mut inp);
897
2.70M
                let cnt = self.code_buffer.peek_bits(&mut burst);
898
2.70M
899
2.70M
                // No code left in the buffer, and no more bytes to refill the buffer.
900
2.70M
                if cnt == 0 {
901
312
                    if have_yet_to_decode_data {
902
95
                        status = Ok(LzwStatus::NoProgress);
903
217
                    }
904
905
312
                    code_link = Some(deriv);
906
312
                    break;
907
2.70M
                }
908
2.70M
909
2.70M
                debug_assert!(
910
                    // When the table is full, we have a max code one above the mask.
911
0
                    self.table.is_full()
912
                    // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the
913
                    // computed next_code is 4 which already exceeds the bit width from the start.
914
                    // Then we will immediately switch code size after this code.
915
                    //
916
                    // TODO: this is the reason for some saturating and non-sharp comparisons in
917
                    // the code below. Maybe it makes sense to revisit turning this into a compile
918
                    // time choice?
919
0
                        || (self.code_buffer.code_size() == 2 && self.next_code == 4)
920
0
                        || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code,
921
                );
922
923
2.70M
                let mut burst_size = 0;
924
2.70M
                let left_before_size_switch = (self.code_buffer.max_code()
925
2.70M
                    - Code::from(self.is_tiff))
926
2.70M
                .saturating_sub(self.next_code);
927
928
                // A burst is a sequence of decodes that are completely independent of each other. This
929
                // is the case if neither is an end code, a clear code, or a next code, i.e. we have
930
                // all of them in the decoding table and thus known their depths, and additionally if
931
                // we can decode them directly into the output buffer.
932
3.59M
                for b in &burst[..cnt] {
933
                    // We can commit the previous burst code, and will take a slice from the output
934
                    // buffer. This also avoids the bounds check in the tight loop later.
935
3.59M
                    if burst_size > 0 {
936
882k
                        let len = burst_byte_len[burst_size - 1];
937
882k
                        let (into, tail) = out.split_at_mut(usize::from(len));
938
882k
                        target[burst_size - 1] = into;
939
882k
                        out = tail;
940
2.70M
                    }
941
942
                    // Check that we don't overflow the code size with all codes we burst decode.
943
3.59M
                    burst_size += 1;
944
3.59M
945
3.59M
                    if burst_size > usize::from(left_before_size_switch) {
946
2.46M
                        break;
947
1.12M
                    }
948
1.12M
949
1.12M
                    let read_code = *b;
950
1.12M
951
1.12M
                    // A burst code can't be special.
952
1.12M
                    if read_code == self.clear_code
953
1.12M
                        || read_code == self.end_code
954
1.11M
                        || read_code >= self.next_code
955
                    {
956
53.4k
                        break;
957
1.07M
                    }
958
1.07M
959
1.07M
                    // Read the code length and check that we can decode directly into the out slice.
960
1.07M
                    let len = self.table.depths[usize::from(read_code)];
961
1.07M
962
1.07M
                    if out.len() < usize::from(len) {
963
8.47k
                        break;
964
1.06M
                    }
965
1.06M
966
1.06M
                    // We do exactly one more code (the one being inspected in the current iteration)
967
1.06M
                    // after the 'burst'. When we want to break decoding precisely on the supplied
968
1.06M
                    // buffer, we check if this is the last code to be decoded into it.
969
1.06M
                    if CgC::YIELD_ON_FULL {
970
0
                        if out.len() == usize::from(len) {
971
0
                            break;
972
0
                        }
973
1.06M
                    }
974
975
1.06M
                    burst_byte_len[burst_size - 1] = len;
976
                }
977
978
2.70M
                self.code_buffer.consume_bits(burst_size as u8);
979
2.70M
                have_yet_to_decode_data = false;
980
2.70M
981
2.70M
                // Note that the very last code in the burst buffer doesn't actually belong to the
982
2.70M
                // burst itself. TODO: sometimes it could, we just don't differentiate between the
983
2.70M
                // breaks and a loop end condition above. That may be a speed advantage?
984
2.70M
                let (&new_code, burst) = burst[..burst_size].split_last().unwrap();
985
2.70M
986
2.70M
                // The very tight loop for restoring the actual burst. These can be reconstructed in
987
2.70M
                // parallel since none of them depend on a prior constructed. Only the derivation of
988
2.70M
                // new codes is not parallel. There are no size changes here either.
989
2.70M
                let burst_targets = &mut target[..burst_size - 1];
990
2.70M
991
2.70M
                if !self.table.is_full() {
992
249k
                    self.next_code += burst_targets.len() as u16;
993
2.45M
                }
994
995
882k
                for ((&burst, target), byte) in
996
2.70M
                    burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte)
997
882k
                {
998
882k
                    *byte = self.table.reconstruct(burst, target);
999
882k
                }
1000
1001
2.70M
                self.table.derive_burst(&mut deriv, burst, &burst_byte[..]);
1002
2.70M
1003
2.70M
                // Now handle the special codes.
1004
2.70M
                if new_code == self.clear_code {
1005
9.80k
                    self.reset_tables();
1006
9.80k
                    last_decoded = None;
1007
9.80k
                    // Restarts in the next call to the entry point.
1008
9.80k
                    break;
1009
2.69M
                }
1010
2.69M
1011
2.69M
                if new_code == self.end_code {
1012
260
                    self.has_ended = true;
1013
260
                    status = Ok(LzwStatus::Done);
1014
260
                    last_decoded = None;
1015
260
                    break;
1016
2.69M
                }
1017
2.69M
1018
2.69M
                if new_code > self.next_code {
1019
128
                    status = Err(LzwError::InvalidCode);
1020
128
                    last_decoded = None;
1021
128
                    break;
1022
2.69M
                }
1023
1024
2.69M
                let required_len = if new_code == self.next_code {
1025
39.5k
                    self.table.depths[usize::from(deriv.code)] + 1
1026
                } else {
1027
2.65M
                    self.table.depths[usize::from(new_code)]
1028
                };
1029
1030
                // We need the decoded data of the new code if it is the `next_code`. This is the
1031
                // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In
1032
                // all other cases we only need the first character of the decoded data.
1033
2.69M
                let have_next_code = new_code == self.next_code;
1034
2.69M
1035
2.69M
                // Update the slice holding the last decoded word.
1036
2.69M
                if have_next_code {
1037
                    // If we did not have any burst code, we still hold that slice in the buffer.
1038
39.5k
                    if let Some(new_last) = target[..burst_size - 1].last_mut() {
1039
3.03k
                        let slice = core::mem::replace(new_last, &mut []);
1040
3.03k
                        last_decoded = Some(&*slice);
1041
36.4k
                    }
1042
2.65M
                }
1043
1044
                let cha;
1045
2.69M
                let is_in_buffer = usize::from(required_len) > out.len();
1046
2.69M
                // Check if we will need to store our current state into the buffer.
1047
2.69M
                if is_in_buffer {
1048
29.2k
                    if have_next_code {
1049
                        // last_decoded will be Some if we have restored any code into the out slice.
1050
                        // Otherwise it will still be present in the buffer.
1051
19.9k
                        if let Some(last) = last_decoded.take() {
1052
5.70k
                            self.buffer.bytes[..last.len()].copy_from_slice(last);
1053
5.70k
                            self.buffer.write_mark = last.len();
1054
5.70k
                            self.buffer.read_mark = last.len();
1055
14.2k
                        }
1056
1057
19.9k
                        cha = self.buffer.fill_cscsc();
1058
9.33k
                    } else {
1059
9.33k
                        // Restore the decoded word into the buffer.
1060
9.33k
                        last_decoded = None;
1061
9.33k
                        cha = self.buffer.fill_reconstruct(&self.table, new_code);
1062
9.33k
                    }
1063
                } else {
1064
2.66M
                    let (target, tail) = out.split_at_mut(usize::from(required_len));
1065
2.66M
                    out = tail;
1066
2.66M
1067
2.66M
                    if have_next_code {
1068
                        // Reconstruct high.
1069
19.5k
                        let source = match last_decoded.take() {
1070
12.8k
                            Some(last) => last,
1071
6.67k
                            None => &self.buffer.bytes[..self.buffer.write_mark],
1072
                        };
1073
1074
                        // We don't *actually* expect the unwrap to happen. Each source is at least 1
1075
                        // byte long. But llvm doesn't know this (too much indirect loads and cases).
1076
19.5k
                        cha = source.get(0).map(|x| *x).unwrap_or(0);
1077
19.5k
                        target[..source.len()].copy_from_slice(source);
1078
19.5k
                        target[source.len()..][0] = cha;
1079
2.64M
                    } else {
1080
2.64M
                        cha = self.table.reconstruct(new_code, target);
1081
2.64M
                    }
1082
1083
                    // A new decoded word.
1084
2.66M
                    last_decoded = Some(target);
1085
                }
1086
1087
                // Each newly read code creates one new code/link based on the preceding code if we
1088
                // have enough space to put it there.
1089
2.69M
                if !self.table.is_full() {
1090
239k
                    self.table.derive(&deriv, cha);
1091
239k
1092
239k
                    if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff)
1093
1.92k
                        && self.code_buffer.code_size() < MAX_CODESIZE
1094
1.78k
                    {
1095
1.78k
                        self.bump_code_size();
1096
237k
                    }
1097
1098
239k
                    self.next_code += 1;
1099
2.45M
                }
1100
1101
                // store the information on the decoded word.
1102
2.69M
                code_link = Some(DerivationBase {
1103
2.69M
                    code: new_code,
1104
2.69M
                    first: cha,
1105
2.69M
                });
1106
2.69M
1107
2.69M
                // Can't make any more progress with decoding.
1108
2.69M
                //
1109
2.69M
                // We have more data buffered but not enough space to put it? We want fetch a next
1110
2.69M
                // symbol if possible as in the case of it being a new symbol we can refer to the
1111
2.69M
                // buffered output as the source for that symbol's meaning and do a memcpy.
1112
2.69M
                //
1113
2.69M
                // Since this test is after decoding at least one code, we can now check for an
1114
2.69M
                // empty buffer and still guarantee progress when one was passed as a parameter.
1115
2.69M
                if is_in_buffer || out.is_empty() {
1116
30.3k
                    break;
1117
2.66M
                }
1118
            }
1119
26.9k
        }
1120
1121
        // We need to store the last word into the buffer in case the first code in the next
1122
        // iteration is the next_code.
1123
68.2k
        if let Some(tail) = last_decoded {
1124
1.25k
            self.buffer.bytes[..tail.len()].copy_from_slice(tail);
1125
1.25k
            self.buffer.write_mark = tail.len();
1126
1.25k
            // Mark the full buffer as having been consumed.
1127
1.25k
            self.buffer.read_mark = tail.len();
1128
66.9k
        }
1129
1130
        // Ensure we don't indicate that no progress was made if we read some bytes from the input
1131
        // (which is progress).
1132
68.2k
        if o_in > inp.len() {
1133
40.1k
            if let Ok(LzwStatus::NoProgress) = status {
1134
1.13k
                status = Ok(LzwStatus::Ok);
1135
39.1k
            }
1136
27.9k
        }
1137
1138
        // Store the code/link state.
1139
68.2k
        self.last = code_link;
1140
68.2k
1141
68.2k
        BufferResult {
1142
68.2k
            consumed_in: o_in.wrapping_sub(inp.len()),
1143
68.2k
            consumed_out: o_out.wrapping_sub(out.len()),
1144
68.2k
            status,
1145
68.2k
        }
1146
68.2k
    }
1147
}
1148
1149
impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> {
1150
34.1k
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1151
34.1k
        self.code_buffer.next_symbol(inp)
1152
34.1k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::next_symbol
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::next_symbol
Line
Count
Source
1150
23.2k
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1151
23.2k
        self.code_buffer.next_symbol(inp)
1152
23.2k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::next_symbol
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::next_symbol
Line
Count
Source
1150
10.8k
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1151
10.8k
        self.code_buffer.next_symbol(inp)
1152
10.8k
    }
1153
1154
22.8k
    fn bump_code_size(&mut self) {
1155
22.8k
        self.code_buffer.bump_code_size()
1156
22.8k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::bump_code_size
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::bump_code_size
Line
Count
Source
1154
21.0k
    fn bump_code_size(&mut self) {
1155
21.0k
        self.code_buffer.bump_code_size()
1156
21.0k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::bump_code_size
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::bump_code_size
Line
Count
Source
1154
1.78k
    fn bump_code_size(&mut self) {
1155
1.78k
        self.code_buffer.bump_code_size()
1156
1.78k
    }
1157
1158
3.49M
    fn refill_bits(&mut self, inp: &mut &[u8]) {
1159
3.49M
        self.code_buffer.refill_bits(inp)
1160
3.49M
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::refill_bits
<weezl::decode::DecodeState<weezl::decode::LsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::refill_bits
Line
Count
Source
1158
784k
    fn refill_bits(&mut self, inp: &mut &[u8]) {
1159
784k
        self.code_buffer.refill_bits(inp)
1160
784k
    }
Unexecuted instantiation: <weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::YieldOnFull>>::refill_bits
<weezl::decode::DecodeState<weezl::decode::MsbBuffer, <weezl::decode::Decoder>::from_configuration::NoYield>>::refill_bits
Line
Count
Source
1158
2.70M
    fn refill_bits(&mut self, inp: &mut &[u8]) {
1159
2.70M
        self.code_buffer.refill_bits(inp)
1160
2.70M
    }
1161
}
1162
1163
impl CodeBuffer for MsbBuffer {
1164
549
    fn new(min_size: u8) -> Self {
1165
549
        MsbBuffer {
1166
549
            code_size: min_size + 1,
1167
549
            code_mask: (1u16 << (min_size + 1)) - 1,
1168
549
            bit_buffer: 0,
1169
549
            bits: 0,
1170
549
        }
1171
549
    }
1172
1173
10.4k
    fn reset(&mut self, min_size: u8) {
1174
10.4k
        self.code_size = min_size + 1;
1175
10.4k
        self.code_mask = (1 << self.code_size) - 1;
1176
10.4k
    }
1177
1178
10.8k
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1179
10.8k
        if self.bits < self.code_size {
1180
1.43k
            self.refill_bits(inp);
1181
9.39k
        }
1182
1183
10.8k
        if self.bits < self.code_size {
1184
6
            return None;
1185
10.8k
        }
1186
10.8k
1187
10.8k
        let mask = u64::from(self.code_mask);
1188
10.8k
        let rotbuf = self.bit_buffer.rotate_left(self.code_size.into());
1189
10.8k
        self.bit_buffer = rotbuf & !mask;
1190
10.8k
        self.bits -= self.code_size;
1191
10.8k
        Some((rotbuf & mask) as u16)
1192
10.8k
    }
1193
1194
1.78k
    fn bump_code_size(&mut self) {
1195
1.78k
        self.code_size += 1;
1196
1.78k
        self.code_mask = (self.code_mask << 1) | 1;
1197
1.78k
    }
1198
1199
2.71M
    fn refill_bits(&mut self, inp: &mut &[u8]) {
1200
2.71M
        let wish_count = (64 - self.bits) / 8;
1201
2.71M
        let mut buffer = [0u8; 8];
1202
2.71M
        let new_bits = match inp.get(..usize::from(wish_count)) {
1203
2.70M
            Some(bytes) => {
1204
2.70M
                buffer[..usize::from(wish_count)].copy_from_slice(bytes);
1205
2.70M
                *inp = &inp[usize::from(wish_count)..];
1206
2.70M
                wish_count * 8
1207
            }
1208
            None => {
1209
1.92k
                let new_bits = inp.len() * 8;
1210
1.92k
                buffer[..inp.len()].copy_from_slice(inp);
1211
1.92k
                *inp = &[];
1212
1.92k
                new_bits as u8
1213
            }
1214
        };
1215
2.71M
        self.bit_buffer |= u64::from_be_bytes(buffer) >> self.bits;
1216
2.71M
        self.bits += new_bits;
1217
2.71M
    }
1218
1219
2.70M
    fn peek_bits(&self, code: &mut [Code; BURST]) -> usize {
1220
2.70M
        let mut bit_buffer = self.bit_buffer;
1221
2.70M
        let mask = u64::from(self.code_mask);
1222
2.70M
        let mut consumed = 0;
1223
2.70M
        let mut cnt = 0;
1224
1225
15.5M
        for b in code {
1226
15.3M
            let consumed_after = consumed + self.code_size;
1227
15.3M
            if consumed_after > self.bits {
1228
2.54M
                break;
1229
12.8M
            }
1230
12.8M
1231
12.8M
            cnt += 1;
1232
12.8M
            consumed = consumed_after;
1233
12.8M
1234
12.8M
            let rotbuf = bit_buffer.rotate_left(self.code_size.into());
1235
12.8M
            *b = (rotbuf & mask) as u16;
1236
12.8M
            // The read bits are 'appended' but we never interpret those appended bits.
1237
12.8M
            bit_buffer = rotbuf;
1238
        }
1239
1240
2.70M
        cnt
1241
2.70M
    }
1242
1243
2.70M
    fn consume_bits(&mut self, code_cnt: u8) {
1244
2.70M
        let bits = self.code_size * code_cnt;
1245
2.70M
        debug_assert!(bits <= self.bits);
1246
1247
2.70M
        if bits >= self.bits {
1248
11.6k
            self.bit_buffer = 0;
1249
2.69M
        } else {
1250
2.69M
            // bits < self.bits so this must be smaller than the number size.
1251
2.69M
            self.bit_buffer = self.bit_buffer << bits;
1252
2.69M
        }
1253
1254
2.70M
        self.bits = self.bits.wrapping_sub(bits);
1255
2.70M
    }
1256
1257
2.94M
    fn max_code(&self) -> Code {
1258
2.94M
        self.code_mask
1259
2.94M
    }
1260
1261
1.92k
    fn code_size(&self) -> u8 {
1262
1.92k
        self.code_size
1263
1.92k
    }
1264
}
1265
1266
impl CodeBuffer for LsbBuffer {
1267
1.38k
    fn new(min_size: u8) -> Self {
1268
1.38k
        LsbBuffer {
1269
1.38k
            code_size: min_size + 1,
1270
1.38k
            code_mask: (1u16 << (min_size + 1)) - 1,
1271
1.38k
            bit_buffer: 0,
1272
1.38k
            bits: 0,
1273
1.38k
        }
1274
1.38k
    }
1275
1276
22.6k
    fn reset(&mut self, min_size: u8) {
1277
22.6k
        self.code_size = min_size + 1;
1278
22.6k
        self.code_mask = (1 << self.code_size) - 1;
1279
22.6k
    }
1280
1281
23.2k
    fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1282
23.2k
        if self.bits < self.code_size {
1283
2.06k
            self.refill_bits(inp);
1284
21.2k
        }
1285
1286
23.2k
        if self.bits < self.code_size {
1287
326
            return None;
1288
22.9k
        }
1289
22.9k
1290
22.9k
        let mask = u64::from(self.code_mask);
1291
22.9k
        let code = self.bit_buffer & mask;
1292
22.9k
        self.bit_buffer >>= self.code_size;
1293
22.9k
        self.bits -= self.code_size;
1294
22.9k
        Some(code as u16)
1295
23.2k
    }
1296
1297
21.0k
    fn bump_code_size(&mut self) {
1298
21.0k
        self.code_size += 1;
1299
21.0k
        self.code_mask = (self.code_mask << 1) | 1;
1300
21.0k
    }
1301
1302
786k
    fn refill_bits(&mut self, inp: &mut &[u8]) {
1303
786k
        let wish_count = (64 - self.bits) / 8;
1304
786k
        let mut buffer = [0u8; 8];
1305
786k
        let new_bits = match inp.get(..usize::from(wish_count)) {
1306
382k
            Some(bytes) => {
1307
382k
                buffer[..usize::from(wish_count)].copy_from_slice(bytes);
1308
382k
                *inp = &inp[usize::from(wish_count)..];
1309
382k
                wish_count * 8
1310
            }
1311
            None => {
1312
403k
                let new_bits = inp.len() * 8;
1313
403k
                buffer[..inp.len()].copy_from_slice(inp);
1314
403k
                *inp = &[];
1315
403k
                new_bits as u8
1316
            }
1317
        };
1318
786k
        self.bit_buffer |= u64::from_be_bytes(buffer).swap_bytes() << self.bits;
1319
786k
        self.bits += new_bits;
1320
786k
    }
1321
1322
784k
    fn peek_bits(&self, code: &mut [Code; BURST]) -> usize {
1323
784k
        let mut bit_buffer = self.bit_buffer;
1324
784k
        let mask = u64::from(self.code_mask);
1325
784k
        let mut consumed = 0;
1326
784k
        let mut cnt = 0;
1327
1328
2.99M
        for b in code {
1329
2.90M
            let consumed_after = consumed + self.code_size;
1330
2.90M
            if consumed_after > self.bits {
1331
696k
                break;
1332
2.20M
            }
1333
2.20M
1334
2.20M
            cnt += 1;
1335
2.20M
            consumed = consumed_after;
1336
2.20M
1337
2.20M
            *b = (bit_buffer & mask) as u16;
1338
2.20M
            bit_buffer = bit_buffer >> self.code_size;
1339
        }
1340
1341
784k
        cnt
1342
784k
    }
1343
1344
565k
    fn consume_bits(&mut self, code_cnt: u8) {
1345
565k
        let bits = self.code_size * code_cnt;
1346
565k
        debug_assert!(bits <= self.bits);
1347
1348
565k
        if bits >= self.bits {
1349
101k
            self.bit_buffer = 0;
1350
464k
        } else {
1351
464k
            // bits < self.bits so this must be smaller than the number size.
1352
464k
            self.bit_buffer = self.bit_buffer >> bits;
1353
464k
        }
1354
1355
565k
        self.bits = self.bits.wrapping_sub(bits);
1356
565k
    }
1357
1358
863k
    fn max_code(&self) -> Code {
1359
863k
        self.code_mask
1360
863k
    }
1361
1362
21.2k
    fn code_size(&self) -> u8 {
1363
21.2k
        self.code_size
1364
21.2k
    }
1365
}
1366
1367
impl Buffer {
1368
1.93k
    fn new() -> Self {
1369
1.93k
        Buffer {
1370
1.93k
            bytes: vec![0; MAX_ENTRIES].into_boxed_slice(),
1371
1.93k
            read_mark: 0,
1372
1.93k
            write_mark: 0,
1373
1.93k
        }
1374
1.93k
    }
1375
1376
    /// When encoding a sequence `cScSc` where `c` is any character and `S` is any string
1377
    /// this results in two codes `AB`, `A` encoding `cS` and `B` encoding `cSc`. Supposing
1378
    /// the buffer is already filled with the reconstruction of `A`, we can easily fill it
1379
    /// with the reconstruction of `B`.
1380
26.7k
    fn fill_cscsc(&mut self) -> u8 {
1381
26.7k
        self.bytes[self.write_mark] = self.bytes[0];
1382
26.7k
        self.write_mark += 1;
1383
26.7k
        self.read_mark = 0;
1384
26.7k
        self.bytes[0]
1385
26.7k
    }
1386
1387
    // Fill the buffer by decoding from the table
1388
51.3k
    fn fill_reconstruct(&mut self, table: &Table, code: Code) -> u8 {
1389
51.3k
        self.write_mark = 0;
1390
51.3k
        self.read_mark = 0;
1391
51.3k
        let depth = table.depths[usize::from(code)];
1392
51.3k
        let mut memory = core::mem::replace(&mut self.bytes, Box::default());
1393
51.3k
1394
51.3k
        let out = &mut memory[..usize::from(depth)];
1395
51.3k
        let last = table.reconstruct(code, out);
1396
51.3k
1397
51.3k
        self.bytes = memory;
1398
51.3k
        self.write_mark = usize::from(depth);
1399
51.3k
        last
1400
51.3k
    }
1401
1402
927k
    fn buffer(&self) -> &[u8] {
1403
927k
        &self.bytes[self.read_mark..self.write_mark]
1404
927k
    }
1405
1406
239k
    fn consume(&mut self, amt: usize) {
1407
239k
        self.read_mark += amt;
1408
239k
    }
1409
}
1410
1411
impl Table {
1412
1.93k
    fn new() -> Self {
1413
1.93k
        Table {
1414
1.93k
            inner: Vec::with_capacity(MAX_ENTRIES),
1415
1.93k
            depths: Vec::with_capacity(MAX_ENTRIES),
1416
1.93k
        }
1417
1.93k
    }
1418
1419
25.1k
    fn clear(&mut self, min_size: u8) {
1420
25.1k
        let static_count = usize::from(1u16 << u16::from(min_size)) + 2;
1421
25.1k
        self.inner.truncate(static_count);
1422
25.1k
        self.depths.truncate(static_count);
1423
25.1k
    }
1424
1425
7.95k
    fn init(&mut self, min_size: u8) {
1426
7.95k
        self.inner.clear();
1427
7.95k
        self.depths.clear();
1428
436k
        for i in 0..(1u16 << u16::from(min_size)) {
1429
436k
            self.inner.push(Link::base(i as u8));
1430
436k
            self.depths.push(1);
1431
436k
        }
1432
        // Clear code.
1433
7.95k
        self.inner.push(Link::base(0));
1434
7.95k
        self.depths.push(0);
1435
7.95k
        // End code.
1436
7.95k
        self.inner.push(Link::base(0));
1437
7.95k
        self.depths.push(0);
1438
7.95k
    }
1439
1440
26.7k
    fn at(&self, code: Code) -> &Link {
1441
26.7k
        &self.inner[usize::from(code)]
1442
26.7k
    }
1443
1444
26.7k
    fn is_empty(&self) -> bool {
1445
26.7k
        self.inner.is_empty()
1446
26.7k
    }
1447
1448
6.52M
    fn is_full(&self) -> bool {
1449
6.52M
        self.inner.len() >= MAX_ENTRIES
1450
6.52M
    }
1451
1452
536k
    fn derive(&mut self, from: &DerivationBase, byte: u8) {
1453
536k
        let link = from.derive(byte);
1454
536k
        let depth = self.depths[usize::from(from.code)] + 1;
1455
536k
        self.inner.push(link);
1456
536k
        self.depths.push(depth);
1457
536k
    }
1458
1459
    // Derive multiple codes in a row, where each base is guaranteed to already exist.
1460
3.27M
    fn derive_burst(&mut self, from: &mut DerivationBase, burst: &[Code], first: &[u8]) {
1461
3.27M
        let mut depth_of = from.code;
1462
        // Note that false data dependency we want to get rid of!
1463
        // TODO: this pushes into a Vec, maybe we can make this cleaner.
1464
4.56M
        for &code in burst {
1465
1.28M
            let depth = self.depths[usize::from(depth_of)] + 1;
1466
1.28M
            self.depths.push(depth);
1467
1.28M
            depth_of = code;
1468
1.28M
        }
1469
1470
        // Llvm tends to be flaky with code layout for the case of requiring an allocation. It's
1471
        // not clear if that can occur in practice but it relies on iterator size hint..
1472
3.27M
        let extensions = burst.iter().zip(first);
1473
3.27M
        self.inner.extend(extensions.map(|(&code, &first)| {
1474
1.28M
            let link = from.derive(first);
1475
1.28M
            from.code = code;
1476
1.28M
            from.first = first;
1477
1.28M
            link
1478
3.27M
        }));
1479
3.27M
    }
1480
1481
4.46M
    fn reconstruct(&self, code: Code, out: &mut [u8]) -> u8 {
1482
4.46M
        let mut code_iter = code;
1483
4.46M
        let table = &self.inner[..=usize::from(code)];
1484
4.46M
        let first = table[usize::from(code)].first;
1485
4.46M
1486
4.46M
        let len = code_iter;
1487
20.4M
        for ch in out.iter_mut().rev() {
1488
20.4M
            //(code, cha) = self.table[k as usize];
1489
20.4M
            // Note: This could possibly be replaced with an unchecked array access if
1490
20.4M
            //  - value is asserted to be < self.next_code() in push
1491
20.4M
            //  - min_size is asserted to be < MAX_CODESIZE
1492
20.4M
            let entry = &table[usize::from(code_iter)];
1493
20.4M
            code_iter = core::cmp::min(len, entry.prev);
1494
20.4M
            *ch = entry.byte;
1495
20.4M
        }
1496
1497
4.46M
        first
1498
4.46M
    }
1499
}
1500
1501
impl Link {
1502
452k
    fn base(byte: u8) -> Self {
1503
452k
        Link {
1504
452k
            prev: 0,
1505
452k
            byte,
1506
452k
            first: byte,
1507
452k
        }
1508
452k
    }
1509
}
1510
1511
impl DerivationBase {
1512
    // TODO: this has self type to make it clear we might depend on the old in a future
1513
    // optimization. However, that has no practical purpose right now.
1514
1.82M
    fn derive(&self, byte: u8) -> Link {
1515
1.82M
        Link {
1516
1.82M
            prev: self.code,
1517
1.82M
            byte,
1518
1.82M
            first: self.first,
1519
1.82M
        }
1520
1.82M
    }
1521
}
1522
1523
#[cfg(test)]
1524
mod tests {
1525
    use crate::alloc::vec::Vec;
1526
    #[cfg(feature = "std")]
1527
    use crate::StreamBuf;
1528
    use crate::{decode::Decoder, BitOrder};
1529
1530
    #[test]
1531
    fn invalid_code_size_low() {
1532
        let _ = Decoder::new(BitOrder::Msb, 0);
1533
        let _ = Decoder::new(BitOrder::Msb, 1);
1534
    }
1535
1536
    #[test]
1537
    #[should_panic]
1538
    fn invalid_code_size_high() {
1539
        let _ = Decoder::new(BitOrder::Msb, 14);
1540
    }
1541
1542
    fn make_encoded() -> Vec<u8> {
1543
        const FILE: &'static [u8] = include_bytes!(concat!(
1544
            env!("CARGO_MANIFEST_DIR"),
1545
            "/benches/binary-8-msb.lzw"
1546
        ));
1547
        return Vec::from(FILE);
1548
    }
1549
1550
    #[test]
1551
    #[cfg(feature = "std")]
1552
    fn into_stream_buffer_no_alloc() {
1553
        let encoded = make_encoded();
1554
        let mut decoder = Decoder::new(BitOrder::Msb, 8);
1555
1556
        let mut output = vec![];
1557
        let mut buffer = [0; 512];
1558
        let mut istream = decoder.into_stream(&mut output);
1559
        istream.set_buffer(&mut buffer[..]);
1560
        istream.decode(&encoded[..]).status.unwrap();
1561
1562
        match istream.buffer {
1563
            Some(StreamBuf::Borrowed(_)) => {}
1564
            None => panic!("Decoded without buffer??"),
1565
            Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"),
1566
        }
1567
    }
1568
1569
    #[test]
1570
    #[cfg(feature = "std")]
1571
    fn into_stream_buffer_small_alloc() {
1572
        struct WriteTap<W: std::io::Write>(W);
1573
        const BUF_SIZE: usize = 512;
1574
1575
        impl<W: std::io::Write> std::io::Write for WriteTap<W> {
1576
            fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1577
                assert!(buf.len() <= BUF_SIZE);
1578
                self.0.write(buf)
1579
            }
1580
            fn flush(&mut self) -> std::io::Result<()> {
1581
                self.0.flush()
1582
            }
1583
        }
1584
1585
        let encoded = make_encoded();
1586
        let mut decoder = Decoder::new(BitOrder::Msb, 8);
1587
1588
        let mut output = vec![];
1589
        let mut istream = decoder.into_stream(WriteTap(&mut output));
1590
        istream.set_buffer_size(512);
1591
        istream.decode(&encoded[..]).status.unwrap();
1592
1593
        match istream.buffer {
1594
            Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE),
1595
            Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"),
1596
            None => panic!("Decoded without buffer??"),
1597
        }
1598
    }
1599
1600
    #[test]
1601
    #[cfg(feature = "std")]
1602
    fn reset() {
1603
        let encoded = make_encoded();
1604
        let mut decoder = Decoder::new(BitOrder::Msb, 8);
1605
        let mut reference = None;
1606
1607
        for _ in 0..2 {
1608
            let mut output = vec![];
1609
            let mut buffer = [0; 512];
1610
            let mut istream = decoder.into_stream(&mut output);
1611
            istream.set_buffer(&mut buffer[..]);
1612
            istream.decode_all(&encoded[..]).status.unwrap();
1613
1614
            decoder.reset();
1615
            if let Some(reference) = &reference {
1616
                assert_eq!(output, *reference);
1617
            } else {
1618
                reference = Some(output);
1619
            }
1620
        }
1621
    }
1622
}