Coverage Report

Created: 2026-01-19 07:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/weezl-0.1.12/src/encode.rs
Line
Count
Source
1
//! A module for all encoding needs.
2
use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult};
3
use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE};
4
5
use crate::alloc::{boxed::Box, vec::Vec};
6
#[cfg(feature = "std")]
7
use crate::error::StreamResult;
8
#[cfg(feature = "std")]
9
use std::io::{self, BufRead, Write};
10
11
/// The state for encoding data with an LZW algorithm.
12
///
13
/// The same structure can be utilized with streams as well as your own buffers and driver logic.
14
/// It may even be possible to mix them if you are sufficiently careful not to lose any written
15
/// data in the process.
16
///
17
/// This is a sans-IO implementation, meaning that it only contains the state of the encoder and
18
/// the caller will provide buffers for input and output data when calling the basic
19
/// [`encode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_*`
20
/// methods for enoding with a particular style of common IO.
21
///
22
/// * [`encode`] for encoding once without any IO-loop.
23
/// * [`into_async`] for encoding with the `futures` traits for asynchronous IO.
24
/// * [`into_stream`] for encoding with the standard `io` traits.
25
/// * [`into_vec`] for in-memory encoding.
26
///
27
/// [`encode_bytes`]: #method.encode_bytes
28
/// [`encode`]: #method.encode
29
/// [`into_async`]: #method.into_async
30
/// [`into_stream`]: #method.into_stream
31
/// [`into_vec`]: #method.into_vec
32
pub struct Encoder {
33
    /// Internally dispatch via a dynamic trait object. This did not have any significant
34
    /// performance impact as we batch data internally and this pointer does not change after
35
    /// creation!
36
    state: Box<dyn Stateful + Send + 'static>,
37
}
38
39
/// A encoding stream sink.
40
///
41
/// See [`Encoder::into_stream`] on how to create this type.
42
///
43
/// [`Encoder::into_stream`]: struct.Encoder.html#method.into_stream
44
#[cfg_attr(
45
    not(feature = "std"),
46
    deprecated = "This type is only useful with the `std` feature."
47
)]
48
#[cfg_attr(not(feature = "std"), allow(dead_code))]
49
pub struct IntoStream<'d, W> {
50
    encoder: &'d mut Encoder,
51
    writer: W,
52
    buffer: Option<StreamBuf<'d>>,
53
    default_size: usize,
54
}
55
56
/// An async decoding sink.
57
///
58
/// See [`Encoder::into_async`] on how to create this type.
59
///
60
/// [`Encoder::into_async`]: struct.Encoder.html#method.into_async
61
#[cfg(feature = "async")]
62
pub struct IntoAsync<'d, W> {
63
    encoder: &'d mut Encoder,
64
    writer: W,
65
    buffer: Option<StreamBuf<'d>>,
66
    default_size: usize,
67
}
68
69
/// A encoding sink into a vector.
70
///
71
/// See [`Encoder::into_vec`] on how to create this type.
72
///
73
/// [`Encoder::into_vec`]: struct.Encoder.html#method.into_vec
74
pub struct IntoVec<'d> {
75
    encoder: &'d mut Encoder,
76
    vector: &'d mut Vec<u8>,
77
}
78
79
trait Stateful {
80
    fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult;
81
    fn mark_ended(&mut self) -> bool;
82
    /// Reset the state tracking if end code has been written.
83
    fn restart(&mut self);
84
    /// Reset the encoder to the beginning, dropping all buffers etc.
85
    fn reset(&mut self);
86
}
87
88
struct EncodeState<B: Buffer> {
89
    /// The configured minimal code size.
90
    min_size: u8,
91
    /// The current encoding symbol tree.
92
    tree: Tree,
93
    /// If we have pushed the end code.
94
    has_ended: bool,
95
    /// If tiff then bumps are a single code sooner.
96
    is_tiff: bool,
97
    /// The code corresponding to the currently read characters.
98
    current_code: Code,
99
    /// The clear code for resetting the dictionary.
100
    clear_code: Code,
101
    /// The bit buffer for encoding.
102
    buffer: B,
103
}
104
105
struct MsbBuffer {
106
    /// The current code length.
107
    code_size: u8,
108
    /// The buffer bits.
109
    buffer: u64,
110
    /// The number of valid buffer bits.
111
    bits_in_buffer: u8,
112
}
113
114
struct LsbBuffer {
115
    /// The current code length.
116
    code_size: u8,
117
    /// The buffer bits.
118
    buffer: u64,
119
    /// The number of valid buffer bits.
120
    bits_in_buffer: u8,
121
}
122
123
trait Buffer {
124
    fn new(size: u8) -> Self;
125
    /// Reset the code size in the buffer.
126
    fn reset(&mut self, min_size: u8);
127
    /// Apply effects of a Clear Code.
128
    fn clear(&mut self, min_size: u8);
129
    /// Insert a code into the buffer.
130
    fn buffer_code(&mut self, code: Code);
131
    /// Push bytes if the buffer space is getting small.
132
    fn push_out(&mut self, out: &mut &mut [u8]) -> bool;
133
    /// Flush all full bytes, returning if at least one more byte remains.
134
    fn flush_out(&mut self, out: &mut &mut [u8]) -> bool;
135
    /// Pad the buffer to a full byte.
136
    fn buffer_pad(&mut self);
137
    /// Increase the maximum code size.
138
    fn bump_code_size(&mut self);
139
    /// Return the maximum code with the current code size.
140
    fn max_code(&self) -> Code;
141
    /// Return the current code size in bits.
142
    fn code_size(&self) -> u8;
143
}
144
145
/// One tree node for at most each code.
146
/// To avoid using too much memory we keep nodes with few successors in optimized form. This form
147
/// doesn't offer lookup by indexing but instead does a linear search.
148
#[derive(Default)]
149
struct Tree {
150
    simples: Vec<Simple>,
151
    complex: Vec<Full>,
152
    keys: Vec<CompressedKey>,
153
}
154
155
#[derive(Clone, Copy)]
156
enum FullKey {
157
    NoSuccessor,
158
    Simple(u16),
159
    Full(u16),
160
}
161
162
#[derive(Clone, Copy)]
163
struct CompressedKey(u16);
164
165
const SHORT: usize = 16;
166
167
#[derive(Clone, Copy)]
168
struct Simple {
169
    codes: [Code; SHORT],
170
    chars: [u8; SHORT],
171
    count: u8,
172
}
173
174
#[derive(Clone, Copy)]
175
struct Full {
176
    char_continuation: [Code; 256],
177
}
178
179
/// Describes the static parameters for creating a decoder.
180
#[derive(Clone, Debug)]
181
pub struct Configuration {
182
    order: BitOrder,
183
    size: u8,
184
    tiff: bool,
185
}
186
187
impl Configuration {
188
    /// Create a configuration to decode with the specified bit order and symbol size.
189
    ///
190
    /// # Panics
191
    ///
192
    /// The `size` needs to be in the interval `2..=12`.
193
0
    pub fn new(order: BitOrder, size: u8) -> Self {
194
0
        super::assert_encode_size(size);
195
0
        Configuration {
196
0
            order,
197
0
            size,
198
0
            tiff: false,
199
0
        }
200
0
    }
201
202
    /// Create a configuration for a TIFF compatible decoder.
203
    ///
204
    /// # Panics
205
    ///
206
    /// The `size` needs to be in the interval `2..=12`.
207
0
    pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
208
0
        super::assert_encode_size(size);
209
0
        Configuration {
210
0
            order,
211
0
            size,
212
0
            tiff: true,
213
0
        }
214
0
    }
215
216
    /// Create a new decoder with the define configuration.
217
0
    pub fn build(self) -> Encoder {
218
0
        Encoder {
219
0
            state: Encoder::from_configuration(&self),
220
0
        }
221
0
    }
222
}
223
224
impl Encoder {
225
    /// Create a new encoder with the specified bit order and symbol size.
226
    ///
227
    /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
228
    /// original specification. In particular you will need to specify an `Lsb` bit oder to encode
229
    /// the data portion of a compressed `gif` image.
230
    ///
231
    /// # Panics
232
    ///
233
    /// The `size` needs to be in the interval `2..=12`.
234
0
    pub fn new(order: BitOrder, size: u8) -> Self {
235
0
        Configuration::new(order, size).build()
236
0
    }
237
238
    /// Create a TIFF compatible encoder with the specified bit order and symbol size.
239
    ///
240
    /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
241
    /// TIFF specification, which is a misinterpretation of the original algorithm for increasing
242
    /// the code size. It switches one symbol sooner.
243
    ///
244
    /// # Panics
245
    ///
246
    /// The `size` needs to be in the interval `2..=12`.
247
0
    pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
248
0
        Configuration::with_tiff_size_switch(order, size).build()
249
0
    }
250
251
0
    fn from_configuration(cfg: &Configuration) -> Box<dyn Stateful + Send + 'static> {
252
0
        match cfg.order {
253
            BitOrder::Lsb => {
254
0
                let mut state = EncodeState::<LsbBuffer>::new(cfg.size);
255
0
                state.is_tiff = cfg.tiff;
256
0
                Box::new(state)
257
            }
258
            BitOrder::Msb => {
259
0
                let mut state = EncodeState::<MsbBuffer>::new(cfg.size);
260
0
                state.is_tiff = cfg.tiff;
261
0
                Box::new(state)
262
            }
263
        }
264
0
    }
265
266
    /// Encode some bytes from `inp` into `out`.
267
    ///
268
    /// See [`into_stream`] for high-level functions (this interface is only available with the
269
    /// `std` feature) and [`finish`] for marking the input data as complete.
270
    ///
271
    /// When some input byte is invalid, i.e. is not smaller than `1 << size`, then that byte and
272
    /// all following ones will _not_ be consumed and the `status` of the result will signal an
273
    /// error. The result will also indicate that all bytes up to but not including the offending
274
    /// byte have been consumed. You may try again with a fixed byte.
275
    ///
276
    /// [`into_stream`]: #method.into_stream
277
    /// [`finish`]: #method.finish
278
0
    pub fn encode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult {
279
0
        self.state.advance(inp, out)
280
0
    }
281
282
    /// Encode a single chunk of data.
283
    ///
284
    /// This method will add an end marker to the encoded chunk.
285
    ///
286
    /// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize
287
    /// buffer size, to supply an existing vector, to control whether an end marker is required, or
288
    /// to preserve partial data in the case of a decoding error.
289
    ///
290
    /// [`into_vec`]: #into_vec
291
    ///
292
    /// # Example
293
    ///
294
    /// ```
295
    /// use weezl::{BitOrder, encode::Encoder};
296
    ///
297
    /// let data = b"Hello, world";
298
    /// let encoded = Encoder::new(BitOrder::Msb, 9)
299
    ///     .encode(data)
300
    ///     .expect("All bytes valid for code size");
301
    /// ```
302
0
    pub fn encode(&mut self, data: &[u8]) -> Result<Vec<u8>, LzwError> {
303
0
        let mut output = Vec::new();
304
0
        self.into_vec(&mut output).encode_all(data).status?;
305
0
        Ok(output)
306
0
    }
307
308
    /// Construct a encoder into a writer.
309
    #[cfg(feature = "std")]
310
0
    pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> {
311
0
        IntoStream {
312
0
            encoder: self,
313
0
            writer,
314
0
            buffer: None,
315
0
            default_size: STREAM_BUF_SIZE,
316
0
        }
317
0
    }
Unexecuted instantiation: <weezl::encode::Encoder>::into_stream::<_>
Unexecuted instantiation: <weezl::encode::Encoder>::into_stream::<&mut &mut alloc::vec::Vec<u8>>
Unexecuted instantiation: <weezl::encode::Encoder>::into_stream::<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>
318
319
    /// Construct a encoder into an async writer.
320
    #[cfg(feature = "async")]
321
    pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> {
322
        IntoAsync {
323
            encoder: self,
324
            writer,
325
            buffer: None,
326
            default_size: STREAM_BUF_SIZE,
327
        }
328
    }
329
330
    /// Construct an encoder into a vector.
331
    ///
332
    /// All encoded data is appended and the vector is __not__ cleared.
333
    ///
334
    /// Compared to `into_stream` this interface allows a high-level access to encoding without
335
    /// requires the `std`-feature. Also, it can make full use of the extra buffer control that the
336
    /// special target exposes.
337
0
    pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec<u8>) -> IntoVec<'lt> {
338
0
        IntoVec {
339
0
            encoder: self,
340
0
            vector: vec,
341
0
        }
342
0
    }
343
344
    /// Mark the encoding as in the process of finishing.
345
    ///
346
    /// The next following call to `encode_bytes` which is able to consume the complete input will
347
    /// also try to emit an end code. It's not recommended, but also not unsound, to use different
348
    /// byte slices in different calls from this point forward and thus to 'delay' the actual end
349
    /// of the data stream. The behaviour after the end marker has been written is unspecified but
350
    /// sound.
351
0
    pub fn finish(&mut self) {
352
0
        self.state.mark_ended();
353
0
    }
354
355
    /// Undo marking this data stream as ending.
356
    /// FIXME: clarify how this interacts with padding introduced after end code.
357
    #[allow(dead_code)]
358
0
    pub(crate) fn restart(&mut self) {
359
0
        self.state.restart()
360
0
    }
361
362
    /// Reset all internal state.
363
    ///
364
    /// This produce an encoder as if just constructed with `new` but taking slightly less work. In
365
    /// particular it will not deallocate any internal allocations. It will also avoid some
366
    /// duplicate setup work.
367
0
    pub fn reset(&mut self) {
368
0
        self.state.reset()
369
0
    }
370
}
371
372
#[cfg(feature = "std")]
373
impl<'d, W: Write> IntoStream<'d, W> {
374
    /// Encode data from a reader.
375
    ///
376
    /// This will drain the supplied reader. It will not encode an end marker after all data has
377
    /// been processed.
378
0
    pub fn encode(&mut self, read: impl BufRead) -> StreamResult {
379
0
        self.encode_part(read, false)
380
0
    }
381
382
    /// Encode data from a reader and an end marker.
383
0
    pub fn encode_all(mut self, read: impl BufRead) -> StreamResult {
384
0
        self.encode_part(read, true)
385
0
    }
Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_all::<_>
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_all::<&[u8]>
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_all::<&[u8]>
386
387
    /// Set the size of the intermediate encode buffer.
388
    ///
389
    /// A buffer of this size is allocated to hold one part of the encoded stream when no buffer is
390
    /// available and any encoding method is called. No buffer is allocated if `set_buffer` has
391
    /// been called. The buffer is reused.
392
    ///
393
    /// # Panics
394
    /// This method panics if `size` is `0`.
395
0
    pub fn set_buffer_size(&mut self, size: usize) {
396
0
        assert_ne!(size, 0, "Attempted to set empty buffer");
397
0
        self.default_size = size;
398
0
    }
399
400
    /// Use a particular buffer as an intermediate encode buffer.
401
    ///
402
    /// Calling this sets or replaces the buffer. When a buffer has been set then it is used
403
    /// instead of a dynamically allocating a buffer. Note that the size of the buffer is relevant
404
    /// for efficient encoding as there is additional overhead from `write` calls each time the
405
    /// buffer has been filled.
406
    ///
407
    /// # Panics
408
    /// This method panics if the `buffer` is empty.
409
0
    pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
410
0
        assert_ne!(buffer.len(), 0, "Attempted to set empty buffer");
411
0
        self.buffer = Some(StreamBuf::Borrowed(buffer));
412
0
    }
413
414
0
    fn encode_part(&mut self, mut read: impl BufRead, finish: bool) -> StreamResult {
415
        let IntoStream {
416
0
            encoder,
417
0
            writer,
418
0
            buffer,
419
0
            default_size,
420
0
        } = self;
421
        enum Progress {
422
            Ok,
423
            Done,
424
        }
425
426
0
        let mut bytes_read = 0;
427
0
        let mut bytes_written = 0;
428
429
0
        let read_bytes = &mut bytes_read;
430
0
        let write_bytes = &mut bytes_written;
431
432
0
        let outbuf: &mut [u8] =
433
0
            match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } {
Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_part::<_>::{closure#0}
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_part::<&[u8]>::{closure#0}
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_part::<&[u8]>::{closure#0}
434
0
                StreamBuf::Borrowed(slice) => &mut *slice,
435
0
                StreamBuf::Owned(vec) => &mut *vec,
436
            };
437
0
        assert!(!outbuf.is_empty());
438
439
0
        let once = move || {
440
0
            let data = read.fill_buf()?;
441
442
0
            if data.is_empty() {
443
0
                if finish {
444
0
                    encoder.finish();
445
0
                } else {
446
0
                    return Ok(Progress::Done);
447
                }
448
0
            }
449
450
0
            let result = encoder.encode_bytes(data, &mut outbuf[..]);
451
0
            *read_bytes += result.consumed_in;
452
0
            *write_bytes += result.consumed_out;
453
0
            read.consume(result.consumed_in);
454
455
0
            let done = result.status.map_err(|err| {
456
0
                io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err))
457
0
            })?;
Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_part::<_>::{closure#1}::{closure#0}
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_part::<&[u8]>::{closure#1}::{closure#0}
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_part::<&[u8]>::{closure#1}::{closure#0}
458
459
0
            if let LzwStatus::Done = done {
460
0
                writer.write_all(&outbuf[..result.consumed_out])?;
461
0
                return Ok(Progress::Done);
462
0
            }
463
464
0
            if let LzwStatus::NoProgress = done {
465
0
                return Err(io::Error::new(
466
0
                    io::ErrorKind::UnexpectedEof,
467
0
                    "No more data but no end marker detected",
468
0
                ));
469
0
            }
470
471
0
            writer.write_all(&outbuf[..result.consumed_out])?;
472
0
            Ok(Progress::Ok)
473
0
        };
Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_part::<_>::{closure#1}
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_part::<&[u8]>::{closure#1}
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_part::<&[u8]>::{closure#1}
474
475
0
        let status = core::iter::repeat_with(once)
476
            // scan+fuse can be replaced with map_while
477
0
            .scan((), |(), result| match result {
478
0
                Ok(Progress::Ok) => Some(Ok(())),
479
0
                Err(err) => Some(Err(err)),
480
0
                Ok(Progress::Done) => None,
481
0
            })
Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_part::<_>::{closure#2}
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_part::<&[u8]>::{closure#2}
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_part::<&[u8]>::{closure#2}
482
0
            .fuse()
483
0
            .collect();
484
485
0
        StreamResult {
486
0
            bytes_read,
487
0
            bytes_written,
488
0
            status,
489
0
        }
490
0
    }
Unexecuted instantiation: <weezl::encode::IntoStream<_>>::encode_part::<_>
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut alloc::vec::Vec<u8>>>::encode_part::<&[u8]>
Unexecuted instantiation: <weezl::encode::IntoStream<&mut &mut std::io::cursor::Cursor<alloc::vec::Vec<u8>>>>::encode_part::<&[u8]>
491
}
492
493
impl IntoVec<'_> {
494
    /// Encode data from a slice.
495
0
    pub fn encode(&mut self, read: &[u8]) -> VectorResult {
496
0
        self.encode_part(read, false)
497
0
    }
498
499
    /// Decode data from a reader, adding an end marker.
500
0
    pub fn encode_all(mut self, read: &[u8]) -> VectorResult {
501
0
        self.encode_part(read, true)
502
0
    }
503
504
0
    fn grab_buffer(&mut self) -> (&mut [u8], &mut Encoder) {
505
        const CHUNK_SIZE: usize = 1 << 12;
506
0
        let decoder = &mut self.encoder;
507
0
        let length = self.vector.len();
508
509
        // Use the vector to do overflow checks and w/e.
510
0
        self.vector.reserve(CHUNK_SIZE);
511
        // FIXME: encoding into uninit buffer?
512
0
        self.vector.resize(length + CHUNK_SIZE, 0u8);
513
514
0
        (&mut self.vector[length..], decoder)
515
0
    }
516
517
0
    fn encode_part(&mut self, part: &[u8], finish: bool) -> VectorResult {
518
0
        let mut result = VectorResult {
519
0
            consumed_in: 0,
520
0
            consumed_out: 0,
521
0
            status: Ok(LzwStatus::Ok),
522
0
        };
523
524
        enum Progress {
525
            Ok,
526
            Done,
527
        }
528
529
        // Converting to mutable refs to move into the `once` closure.
530
0
        let read_bytes = &mut result.consumed_in;
531
0
        let write_bytes = &mut result.consumed_out;
532
0
        let mut data = part;
533
534
        // A 64 MB buffer is quite large but should get alloc_zeroed.
535
        // Note that the decoded size can be up to quadratic in code block.
536
0
        let once = move || {
537
            // Grab a new output buffer.
538
0
            let (outbuf, encoder) = self.grab_buffer();
539
540
0
            if finish {
541
0
                encoder.finish();
542
0
            }
543
544
            // Decode as much of the buffer as fits.
545
0
            let result = encoder.encode_bytes(data, &mut outbuf[..]);
546
            // Do the bookkeeping and consume the buffer.
547
0
            *read_bytes += result.consumed_in;
548
0
            *write_bytes += result.consumed_out;
549
0
            data = &data[result.consumed_in..];
550
551
0
            let unfilled = outbuf.len() - result.consumed_out;
552
0
            let filled = self.vector.len() - unfilled;
553
0
            self.vector.truncate(filled);
554
555
            // Handle the status in the result.
556
0
            let done = result.status?;
557
0
            if let LzwStatus::Done = done {
558
0
                Ok(Progress::Done)
559
            } else {
560
0
                Ok(Progress::Ok)
561
            }
562
0
        };
563
564
        // Decode chunks of input data until we're done.
565
0
        let status: Result<(), _> = core::iter::repeat_with(once)
566
            // scan+fuse can be replaced with map_while
567
0
            .scan((), |(), result| match result {
568
0
                Ok(Progress::Ok) => Some(Ok(())),
569
0
                Err(err) => Some(Err(err)),
570
0
                Ok(Progress::Done) => None,
571
0
            })
572
0
            .fuse()
573
0
            .collect();
574
575
0
        if let Err(err) = status {
576
0
            result.status = Err(err);
577
0
        }
578
579
0
        result
580
0
    }
581
}
582
583
// This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would
584
// trip over the usage of await, which is a reserved keyword in that edition/version. It only
585
// contains an impl block.
586
#[cfg(feature = "async")]
587
#[path = "encode_into_async.rs"]
588
mod impl_encode_into_async;
589
590
impl<B: Buffer> EncodeState<B> {
591
0
    fn new(min_size: u8) -> Self {
592
0
        let clear_code = 1 << min_size;
593
0
        let mut tree = Tree::default();
594
0
        tree.init(min_size);
595
0
        let mut state = EncodeState {
596
0
            min_size,
597
0
            tree,
598
0
            has_ended: false,
599
0
            is_tiff: false,
600
0
            current_code: clear_code,
601
0
            clear_code,
602
0
            buffer: B::new(min_size),
603
0
        };
604
0
        state.buffer_code(clear_code);
605
0
        state
606
0
    }
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::new
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::new
607
}
608
609
impl<B: Buffer> Stateful for EncodeState<B> {
610
0
    fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
611
0
        let c_in = inp.len();
612
0
        let c_out = out.len();
613
0
        let mut status = Ok(LzwStatus::Ok);
614
615
        'encoding: loop {
616
0
            if self.push_out(&mut out) {
617
0
                break;
618
0
            }
619
620
0
            if inp.is_empty() && self.has_ended {
621
0
                let end = self.end_code();
622
0
                if self.current_code != end {
623
0
                    if self.current_code != self.clear_code {
624
0
                        self.buffer_code(self.current_code);
625
626
                        // When reading this code, the decoder will add an extra entry to its table
627
                        // before reading th end code. Thusly, it may increase its code size based
628
                        // on this additional entry.
629
0
                        if self.tree.keys.len() + usize::from(self.is_tiff)
630
0
                            > usize::from(self.buffer.max_code())
631
0
                            && self.buffer.code_size() < MAX_CODESIZE
632
0
                        {
633
0
                            self.buffer.bump_code_size();
634
0
                        }
635
0
                    }
636
0
                    self.buffer_code(end);
637
0
                    self.current_code = end;
638
0
                    self.buffer_pad();
639
0
                }
640
641
0
                break;
642
0
            }
643
644
0
            let mut next_code = None;
645
0
            let mut bytes = inp.iter();
646
0
            while let Some(&byte) = bytes.next() {
647
0
                if self.min_size < 8 && byte >= 1 << self.min_size {
648
0
                    status = Err(LzwError::InvalidCode);
649
0
                    break 'encoding;
650
0
                }
651
652
0
                inp = bytes.as_slice();
653
0
                match self.tree.iterate(self.current_code, byte) {
654
0
                    Ok(code) => self.current_code = code,
655
                    Err(_) => {
656
0
                        next_code = Some(self.current_code);
657
658
0
                        self.current_code = u16::from(byte);
659
0
                        break;
660
                    }
661
                }
662
            }
663
664
0
            match next_code {
665
                // No more bytes, no code produced.
666
0
                None => break,
667
0
                Some(code) => {
668
0
                    self.buffer_code(code);
669
670
0
                    if self.tree.keys.len() + usize::from(self.is_tiff)
671
0
                        > usize::from(self.buffer.max_code()) + 1
672
0
                        && self.buffer.code_size() < MAX_CODESIZE
673
0
                    {
674
0
                        self.buffer.bump_code_size();
675
0
                    }
676
677
0
                    if self.tree.keys.len() > MAX_ENTRIES {
678
0
                        self.buffer_code(self.clear_code);
679
0
                        self.tree.reset(self.min_size);
680
0
                        self.buffer.clear(self.min_size);
681
0
                    }
682
                }
683
            }
684
        }
685
686
0
        if inp.is_empty() && self.current_code == self.end_code() {
687
0
            if !self.flush_out(&mut out) {
688
0
                status = Ok(LzwStatus::Done);
689
0
            }
690
0
        }
691
692
0
        BufferResult {
693
0
            consumed_in: c_in - inp.len(),
694
0
            consumed_out: c_out - out.len(),
695
0
            status,
696
0
        }
697
0
    }
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer> as weezl::encode::Stateful>::advance
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer> as weezl::encode::Stateful>::advance
698
699
0
    fn mark_ended(&mut self) -> bool {
700
0
        core::mem::replace(&mut self.has_ended, true)
701
0
    }
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer> as weezl::encode::Stateful>::mark_ended
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer> as weezl::encode::Stateful>::mark_ended
702
703
0
    fn restart(&mut self) {
704
0
        self.has_ended = false;
705
0
    }
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer> as weezl::encode::Stateful>::restart
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer> as weezl::encode::Stateful>::restart
706
707
0
    fn reset(&mut self) {
708
0
        self.restart();
709
0
        self.current_code = self.clear_code;
710
0
        self.tree.reset(self.min_size);
711
0
        self.buffer.reset(self.min_size);
712
0
        self.buffer_code(self.clear_code);
713
0
    }
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer> as weezl::encode::Stateful>::reset
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer> as weezl::encode::Stateful>::reset
714
}
715
716
impl<B: Buffer> EncodeState<B> {
717
0
    fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
718
0
        self.buffer.push_out(out)
719
0
    }
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::push_out
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::push_out
720
721
0
    fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
722
0
        self.buffer.flush_out(out)
723
0
    }
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::flush_out
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::flush_out
724
725
0
    fn end_code(&self) -> Code {
726
0
        self.clear_code + 1
727
0
    }
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::end_code
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::end_code
728
729
0
    fn buffer_pad(&mut self) {
730
0
        self.buffer.buffer_pad();
731
0
    }
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::buffer_pad
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::buffer_pad
732
733
0
    fn buffer_code(&mut self, code: Code) {
734
0
        self.buffer.buffer_code(code);
735
0
    }
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::LsbBuffer>>::buffer_code
Unexecuted instantiation: <weezl::encode::EncodeState<weezl::encode::MsbBuffer>>::buffer_code
736
}
737
738
impl Buffer for MsbBuffer {
739
0
    fn new(min_size: u8) -> Self {
740
0
        MsbBuffer {
741
0
            code_size: min_size + 1,
742
0
            buffer: 0,
743
0
            bits_in_buffer: 0,
744
0
        }
745
0
    }
746
747
0
    fn reset(&mut self, min_size: u8) {
748
0
        self.code_size = min_size + 1;
749
0
        self.buffer = 0;
750
0
        self.bits_in_buffer = 0;
751
0
    }
752
753
0
    fn clear(&mut self, min_size: u8) {
754
0
        self.code_size = min_size + 1;
755
0
    }
756
757
0
    fn buffer_code(&mut self, code: Code) {
758
0
        let shift = 64 - self.bits_in_buffer - self.code_size;
759
0
        self.buffer |= u64::from(code) << shift;
760
0
        self.bits_in_buffer += self.code_size;
761
0
    }
762
763
0
    fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
764
0
        if self.bits_in_buffer + 2 * self.code_size < 64 {
765
0
            return false;
766
0
        }
767
768
0
        self.flush_out(out)
769
0
    }
770
771
0
    fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
772
0
        let want = usize::from(self.bits_in_buffer / 8);
773
0
        let count = want.min((*out).len());
774
0
        let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count);
775
0
        *out = tail;
776
777
0
        for b in bytes {
778
0
            *b = ((self.buffer & 0xff00_0000_0000_0000) >> 56) as u8;
779
0
            self.buffer <<= 8;
780
0
            self.bits_in_buffer -= 8;
781
0
        }
782
783
0
        count < want
784
0
    }
785
786
0
    fn buffer_pad(&mut self) {
787
0
        let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7;
788
0
        self.bits_in_buffer += to_byte;
789
0
    }
790
791
0
    fn bump_code_size(&mut self) {
792
0
        self.code_size += 1;
793
0
    }
794
795
0
    fn max_code(&self) -> Code {
796
0
        (1 << self.code_size) - 1
797
0
    }
798
799
0
    fn code_size(&self) -> u8 {
800
0
        self.code_size
801
0
    }
802
}
803
804
impl Buffer for LsbBuffer {
805
0
    fn new(min_size: u8) -> Self {
806
0
        LsbBuffer {
807
0
            code_size: min_size + 1,
808
0
            buffer: 0,
809
0
            bits_in_buffer: 0,
810
0
        }
811
0
    }
812
813
0
    fn reset(&mut self, min_size: u8) {
814
0
        self.code_size = min_size + 1;
815
0
        self.buffer = 0;
816
0
        self.bits_in_buffer = 0;
817
0
    }
818
819
0
    fn clear(&mut self, min_size: u8) {
820
0
        self.code_size = min_size + 1;
821
0
    }
822
823
0
    fn buffer_code(&mut self, code: Code) {
824
0
        self.buffer |= u64::from(code) << self.bits_in_buffer;
825
0
        self.bits_in_buffer += self.code_size;
826
0
    }
827
828
0
    fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
829
0
        if self.bits_in_buffer + 2 * self.code_size < 64 {
830
0
            return false;
831
0
        }
832
833
0
        self.flush_out(out)
834
0
    }
835
836
0
    fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
837
0
        let want = usize::from(self.bits_in_buffer / 8);
838
0
        let count = want.min((*out).len());
839
0
        let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count);
840
0
        *out = tail;
841
842
0
        for b in bytes {
843
0
            *b = (self.buffer & 0x0000_0000_0000_00ff) as u8;
844
0
            self.buffer >>= 8;
845
0
            self.bits_in_buffer -= 8;
846
0
        }
847
848
0
        count < want
849
0
    }
850
851
0
    fn buffer_pad(&mut self) {
852
0
        let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7;
853
0
        self.bits_in_buffer += to_byte;
854
0
    }
855
856
0
    fn bump_code_size(&mut self) {
857
0
        self.code_size += 1;
858
0
    }
859
860
0
    fn max_code(&self) -> Code {
861
0
        (1 << self.code_size) - 1
862
0
    }
863
864
0
    fn code_size(&self) -> u8 {
865
0
        self.code_size
866
0
    }
867
}
868
869
impl Tree {
870
0
    fn init(&mut self, min_size: u8) {
871
        // We need a way to represent the state of a currently empty buffer. We use the clear code
872
        // for this, thus create one complex mapping that leads to the one-char base codes.
873
0
        self.keys
874
0
            .resize((1 << min_size) + 2, FullKey::NoSuccessor.into());
875
0
        self.complex.push(Full {
876
0
            char_continuation: [0; 256],
877
0
        });
878
0
        let map_of_begin = self.complex.last_mut().unwrap();
879
0
        for ch in 0u16..256 {
880
0
            map_of_begin.char_continuation[usize::from(ch)] = ch;
881
0
        }
882
0
        self.keys[1 << min_size] = FullKey::Full(0).into();
883
0
    }
884
885
0
    fn reset(&mut self, min_size: u8) {
886
0
        self.simples.clear();
887
0
        self.keys.truncate((1 << min_size) + 2);
888
        // Keep entry for clear code.
889
0
        self.complex.truncate(1);
890
        // The first complex is not changed..
891
0
        for k in self.keys[..(1 << min_size) + 2].iter_mut() {
892
0
            *k = FullKey::NoSuccessor.into();
893
0
        }
894
0
        self.keys[1 << min_size] = FullKey::Full(0).into();
895
0
    }
896
897
0
    fn at_key(&self, code: Code, ch: u8) -> Option<Code> {
898
0
        let key = self.keys[usize::from(code)];
899
0
        match FullKey::from(key) {
900
0
            FullKey::NoSuccessor => None,
901
0
            FullKey::Simple(idx) => {
902
0
                let nexts = &self.simples[usize::from(idx)];
903
0
                let successors = nexts
904
0
                    .codes
905
0
                    .iter()
906
0
                    .zip(nexts.chars.iter())
907
0
                    .take(usize::from(nexts.count));
908
0
                for (&scode, &sch) in successors {
909
0
                    if sch == ch {
910
0
                        return Some(scode);
911
0
                    }
912
                }
913
914
0
                None
915
            }
916
0
            FullKey::Full(idx) => {
917
0
                let full = &self.complex[usize::from(idx)];
918
0
                let precode = full.char_continuation[usize::from(ch)];
919
0
                if usize::from(precode) < MAX_ENTRIES {
920
0
                    Some(precode)
921
                } else {
922
0
                    None
923
                }
924
            }
925
        }
926
0
    }
927
928
    /// Iterate to the next char.
929
    /// Return Ok when it was already in the tree or creates a new entry for it and returns Err.
930
0
    fn iterate(&mut self, code: Code, ch: u8) -> Result<Code, Code> {
931
0
        if let Some(next) = self.at_key(code, ch) {
932
0
            Ok(next)
933
        } else {
934
0
            Err(self.append(code, ch))
935
        }
936
0
    }
937
938
0
    fn append(&mut self, code: Code, ch: u8) -> Code {
939
0
        let next: Code = self.keys.len() as u16;
940
0
        let key = self.keys[usize::from(code)];
941
        // TODO: with debug assertions, check for non-existence
942
0
        match FullKey::from(key) {
943
0
            FullKey::NoSuccessor => {
944
0
                let new_key = FullKey::Simple(self.simples.len() as u16);
945
0
                self.simples.push(Simple::default());
946
0
                let simples = self.simples.last_mut().unwrap();
947
0
                simples.codes[0] = next;
948
0
                simples.chars[0] = ch;
949
0
                simples.count = 1;
950
0
                self.keys[usize::from(code)] = new_key.into();
951
0
            }
952
0
            FullKey::Simple(idx) if usize::from(self.simples[usize::from(idx)].count) < SHORT => {
953
0
                let nexts = &mut self.simples[usize::from(idx)];
954
0
                let nidx = usize::from(nexts.count);
955
0
                nexts.chars[nidx] = ch;
956
0
                nexts.codes[nidx] = next;
957
0
                nexts.count += 1;
958
0
            }
959
0
            FullKey::Simple(idx) => {
960
0
                let new_key = FullKey::Full(self.complex.len() as u16);
961
0
                let simples = &self.simples[usize::from(idx)];
962
0
                self.complex.push(Full {
963
0
                    char_continuation: [Code::max_value(); 256],
964
0
                });
965
0
                let full = self.complex.last_mut().unwrap();
966
0
                for (&pch, &pcont) in simples.chars.iter().zip(simples.codes.iter()) {
967
0
                    full.char_continuation[usize::from(pch)] = pcont;
968
0
                }
969
0
                self.keys[usize::from(code)] = new_key.into();
970
            }
971
0
            FullKey::Full(idx) => {
972
0
                let full = &mut self.complex[usize::from(idx)];
973
0
                full.char_continuation[usize::from(ch)] = next;
974
0
            }
975
        }
976
0
        self.keys.push(FullKey::NoSuccessor.into());
977
0
        next
978
0
    }
979
}
980
981
impl Default for FullKey {
982
0
    fn default() -> Self {
983
0
        FullKey::NoSuccessor
984
0
    }
985
}
986
987
impl Default for Simple {
988
0
    fn default() -> Self {
989
0
        Simple {
990
0
            codes: [0; SHORT],
991
0
            chars: [0; SHORT],
992
0
            count: 0,
993
0
        }
994
0
    }
995
}
996
997
impl From<CompressedKey> for FullKey {
998
0
    fn from(CompressedKey(key): CompressedKey) -> Self {
999
0
        match (key >> MAX_CODESIZE) & 0xf {
1000
0
            0 => FullKey::Full(key & 0xfff),
1001
0
            1 => FullKey::Simple(key & 0xfff),
1002
0
            _ => FullKey::NoSuccessor,
1003
        }
1004
0
    }
1005
}
1006
1007
impl From<FullKey> for CompressedKey {
1008
0
    fn from(full: FullKey) -> Self {
1009
0
        CompressedKey(match full {
1010
0
            FullKey::NoSuccessor => 0x2000,
1011
0
            FullKey::Simple(code) => 0x1000 | code,
1012
0
            FullKey::Full(code) => code,
1013
        })
1014
0
    }
1015
}
1016
1017
#[cfg(test)]
1018
mod tests {
1019
    use super::{BitOrder, Encoder, LzwError, LzwStatus};
1020
    use crate::alloc::vec::Vec;
1021
    use crate::decode::Decoder;
1022
    #[cfg(feature = "std")]
1023
    use crate::StreamBuf;
1024
1025
    #[test]
1026
    fn invalid_input_rejected() {
1027
        const BIT_LEN: u8 = 2;
1028
        let ref input = [0, 1 << BIT_LEN /* invalid */, 0];
1029
        let ref mut target = [0u8; 128];
1030
        let mut encoder = Encoder::new(BitOrder::Msb, BIT_LEN);
1031
1032
        encoder.finish();
1033
        // We require simulation of normality, that is byte-for-byte compression.
1034
        let result = encoder.encode_bytes(input, target);
1035
        assert!(if let Err(LzwError::InvalidCode) = result.status {
1036
            true
1037
        } else {
1038
            false
1039
        });
1040
        assert_eq!(result.consumed_in, 1);
1041
1042
        let fixed = encoder.encode_bytes(&[1, 0], &mut target[result.consumed_out..]);
1043
        assert!(if let Ok(LzwStatus::Done) = fixed.status {
1044
            true
1045
        } else {
1046
            false
1047
        });
1048
        assert_eq!(fixed.consumed_in, 2);
1049
1050
        // Okay, now test we actually fixed it.
1051
        let ref mut compare = [0u8; 4];
1052
        let mut todo = &target[..result.consumed_out + fixed.consumed_out];
1053
        let mut free = &mut compare[..];
1054
        let mut decoder = Decoder::new(BitOrder::Msb, BIT_LEN);
1055
1056
        // Decode with up to 16 rounds, far too much but inconsequential.
1057
        for _ in 0..16 {
1058
            if decoder.has_ended() {
1059
                break;
1060
            }
1061
1062
            let result = decoder.decode_bytes(todo, free);
1063
            assert!(result.status.is_ok());
1064
            todo = &todo[result.consumed_in..];
1065
            free = &mut free[result.consumed_out..];
1066
        }
1067
1068
        let remaining = { free }.len();
1069
        let len = compare.len() - remaining;
1070
        assert_eq!(todo, &[]);
1071
        assert_eq!(compare[..len], [0, 1, 0]);
1072
    }
1073
1074
    #[test]
1075
    #[should_panic]
1076
    fn invalid_code_size_low() {
1077
        let _ = Encoder::new(BitOrder::Msb, 1);
1078
    }
1079
1080
    #[test]
1081
    #[should_panic]
1082
    fn invalid_code_size_high() {
1083
        let _ = Encoder::new(BitOrder::Msb, 14);
1084
    }
1085
1086
    fn make_decoded() -> Vec<u8> {
1087
        const FILE: &'static [u8] =
1088
            include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/Cargo.lock"));
1089
        return Vec::from(FILE);
1090
    }
1091
1092
    #[test]
1093
    #[cfg(feature = "std")]
1094
    fn into_stream_buffer_no_alloc() {
1095
        let encoded = make_decoded();
1096
        let mut encoder = Encoder::new(BitOrder::Msb, 8);
1097
1098
        let mut output = vec![];
1099
        let mut buffer = [0; 512];
1100
        let mut istream = encoder.into_stream(&mut output);
1101
        istream.set_buffer(&mut buffer[..]);
1102
        istream.encode(&encoded[..]).status.unwrap();
1103
1104
        match istream.buffer {
1105
            Some(StreamBuf::Borrowed(_)) => {}
1106
            None => panic!("Decoded without buffer??"),
1107
            Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"),
1108
        }
1109
    }
1110
1111
    #[test]
1112
    #[cfg(feature = "std")]
1113
    fn into_stream_buffer_small_alloc() {
1114
        struct WriteTap<W: std::io::Write>(W);
1115
        const BUF_SIZE: usize = 512;
1116
1117
        impl<W: std::io::Write> std::io::Write for WriteTap<W> {
1118
            fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1119
                assert!(buf.len() <= BUF_SIZE);
1120
                self.0.write(buf)
1121
            }
1122
            fn flush(&mut self) -> std::io::Result<()> {
1123
                self.0.flush()
1124
            }
1125
        }
1126
1127
        let encoded = make_decoded();
1128
        let mut encoder = Encoder::new(BitOrder::Msb, 8);
1129
1130
        let mut output = vec![];
1131
        let mut istream = encoder.into_stream(WriteTap(&mut output));
1132
        istream.set_buffer_size(512);
1133
        istream.encode(&encoded[..]).status.unwrap();
1134
1135
        match istream.buffer {
1136
            Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE),
1137
            Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"),
1138
            None => panic!("Decoded without buffer??"),
1139
        }
1140
    }
1141
1142
    #[test]
1143
    #[cfg(feature = "std")]
1144
    fn reset() {
1145
        let encoded = make_decoded();
1146
        let mut encoder = Encoder::new(BitOrder::Msb, 8);
1147
        let mut reference = None;
1148
1149
        for _ in 0..2 {
1150
            let mut output = vec![];
1151
            let mut buffer = [0; 512];
1152
            let mut istream = encoder.into_stream(&mut output);
1153
            istream.set_buffer(&mut buffer[..]);
1154
            istream.encode_all(&encoded[..]).status.unwrap();
1155
1156
            encoder.reset();
1157
            if let Some(reference) = &reference {
1158
                assert_eq!(output, *reference);
1159
            } else {
1160
                reference = Some(output);
1161
            }
1162
        }
1163
    }
1164
}