Coverage Report

Created: 2024-06-18 07:39

/rust/registry/src/index.crates.io-6f17d22bba15001f/flate2-1.0.30/src/gz/bufread.rs
Line
Count
Source (jump to first uncovered line)
1
use std::cmp;
2
use std::io;
3
use std::io::prelude::*;
4
use std::mem;
5
6
use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser};
7
use crate::crc::CrcReader;
8
use crate::deflate;
9
use crate::Compression;
10
11
0
fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
12
0
    let min = cmp::min(into.len(), from.len() - *pos);
13
0
    for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) {
14
0
        *slot = *val;
15
0
    }
16
0
    *pos += min;
17
0
    min
18
0
}
19
20
/// A gzip streaming encoder
21
///
22
/// This structure implements a [`Read`] interface. When read from, it reads
23
/// uncompressed data from the underlying [`BufRead`] and provides the compressed data.
24
///
25
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
26
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
27
///
28
/// # Examples
29
///
30
/// ```
31
/// use std::io::prelude::*;
32
/// use std::io;
33
/// use flate2::Compression;
34
/// use flate2::bufread::GzEncoder;
35
/// use std::fs::File;
36
/// use std::io::BufReader;
37
///
38
/// // Opens sample file, compresses the contents and returns a Vector or error
39
/// // File wrapped in a BufReader implements BufRead
40
///
41
/// fn open_hello_world() -> io::Result<Vec<u8>> {
42
///     let f = File::open("examples/hello_world.txt")?;
43
///     let b = BufReader::new(f);
44
///     let mut gz = GzEncoder::new(b, Compression::fast());
45
///     let mut buffer = Vec::new();
46
///     gz.read_to_end(&mut buffer)?;
47
///     Ok(buffer)
48
/// }
49
/// ```
50
0
#[derive(Debug)]
51
pub struct GzEncoder<R> {
52
    inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
53
    header: Vec<u8>,
54
    pos: usize,
55
    eof: bool,
56
}
57
58
0
pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
59
0
    let crc = CrcReader::new(r);
60
0
    GzEncoder {
61
0
        inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
62
0
        header,
63
0
        pos: 0,
64
0
        eof: false,
65
0
    }
66
0
}
67
68
impl<R: BufRead> GzEncoder<R> {
69
    /// Creates a new encoder which will use the given compression level.
70
    ///
71
    /// The encoder is not configured specially for the emitted header. For
72
    /// header configuration, see the `GzBuilder` type.
73
    ///
74
    /// The data read from the stream `r` will be compressed and available
75
    /// through the returned reader.
76
0
    pub fn new(r: R, level: Compression) -> GzEncoder<R> {
77
0
        GzBuilder::new().buf_read(r, level)
78
0
    }
79
80
0
    fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
81
0
        if self.pos == 8 {
82
0
            return Ok(0);
83
0
        }
84
0
        let crc = self.inner.get_ref().crc();
85
0
        let ref arr = [
86
0
            (crc.sum() >> 0) as u8,
87
0
            (crc.sum() >> 8) as u8,
88
0
            (crc.sum() >> 16) as u8,
89
0
            (crc.sum() >> 24) as u8,
90
0
            (crc.amount() >> 0) as u8,
91
0
            (crc.amount() >> 8) as u8,
92
0
            (crc.amount() >> 16) as u8,
93
0
            (crc.amount() >> 24) as u8,
94
0
        ];
95
0
        Ok(copy(into, arr, &mut self.pos))
96
0
    }
97
}
98
99
impl<R> GzEncoder<R> {
100
    /// Acquires a reference to the underlying reader.
101
0
    pub fn get_ref(&self) -> &R {
102
0
        self.inner.get_ref().get_ref()
103
0
    }
104
105
    /// Acquires a mutable reference to the underlying reader.
106
    ///
107
    /// Note that mutation of the reader may result in surprising results if
108
    /// this encoder is continued to be used.
109
0
    pub fn get_mut(&mut self) -> &mut R {
110
0
        self.inner.get_mut().get_mut()
111
0
    }
112
113
    /// Returns the underlying stream, consuming this encoder
114
0
    pub fn into_inner(self) -> R {
115
0
        self.inner.into_inner().into_inner()
116
0
    }
117
}
118
119
#[inline]
120
0
fn finish(buf: &[u8; 8]) -> (u32, u32) {
121
0
    let crc = ((buf[0] as u32) << 0)
122
0
        | ((buf[1] as u32) << 8)
123
0
        | ((buf[2] as u32) << 16)
124
0
        | ((buf[3] as u32) << 24);
125
0
    let amt = ((buf[4] as u32) << 0)
126
0
        | ((buf[5] as u32) << 8)
127
0
        | ((buf[6] as u32) << 16)
128
0
        | ((buf[7] as u32) << 24);
129
0
    (crc, amt)
130
0
}
131
132
impl<R: BufRead> Read for GzEncoder<R> {
133
0
    fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
134
0
        let mut amt = 0;
135
0
        if self.eof {
136
0
            return self.read_footer(into);
137
0
        } else if self.pos < self.header.len() {
138
0
            amt += copy(into, &self.header, &mut self.pos);
139
0
            if amt == into.len() {
140
0
                return Ok(amt);
141
0
            }
142
0
            let tmp = into;
143
0
            into = &mut tmp[amt..];
144
0
        }
145
0
        match self.inner.read(into)? {
146
            0 => {
147
0
                self.eof = true;
148
0
                self.pos = 0;
149
0
                self.read_footer(into)
150
            }
151
0
            n => Ok(amt + n),
152
        }
153
0
    }
154
}
155
156
impl<R: BufRead + Write> Write for GzEncoder<R> {
157
0
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
158
0
        self.get_mut().write(buf)
159
0
    }
160
161
0
    fn flush(&mut self) -> io::Result<()> {
162
0
        self.get_mut().flush()
163
0
    }
164
}
165
166
/// A decoder for a single member of a [gzip file].
167
///
168
/// This structure implements a [`Read`] interface. When read from, it reads
169
/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
170
///
171
/// After reading a single member of the gzip data this reader will return
172
/// Ok(0) even if there are more bytes available in the underlying reader.
173
/// If you need the following bytes, call `into_inner()` after Ok(0) to
174
/// recover the underlying reader.
175
///
176
/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
177
/// or read more
178
/// [in the introduction](../index.html#about-multi-member-gzip-files).
179
///
180
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
181
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
182
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
183
///
184
/// # Examples
185
///
186
/// ```
187
/// use std::io::prelude::*;
188
/// use std::io;
189
/// # use flate2::Compression;
190
/// # use flate2::write::GzEncoder;
191
/// use flate2::bufread::GzDecoder;
192
///
193
/// # fn main() {
194
/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
195
/// #   e.write_all(b"Hello World").unwrap();
196
/// #   let bytes = e.finish().unwrap();
197
/// #   println!("{}", decode_reader(bytes).unwrap());
198
/// # }
199
/// #
200
/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
201
/// // Here &[u8] implements BufRead
202
///
203
/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
204
///    let mut gz = GzDecoder::new(&bytes[..]);
205
///    let mut s = String::new();
206
///    gz.read_to_string(&mut s)?;
207
///    Ok(s)
208
/// }
209
/// ```
210
0
#[derive(Debug)]
211
pub struct GzDecoder<R> {
212
    state: GzState,
213
    reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
214
    multi: bool,
215
}
216
217
0
#[derive(Debug)]
218
enum GzState {
219
    Header(GzHeaderParser),
220
    Body(GzHeader),
221
    Finished(GzHeader, usize, [u8; 8]),
222
    Err(io::Error),
223
    End(Option<GzHeader>),
224
}
225
226
impl<R: BufRead> GzDecoder<R> {
227
    /// Creates a new decoder from the given reader, immediately parsing the
228
    /// gzip header.
229
0
    pub fn new(mut r: R) -> GzDecoder<R> {
230
0
        let mut header_parser = GzHeaderParser::new();
231
232
0
        let state = match header_parser.parse(&mut r) {
233
0
            Ok(_) => GzState::Body(GzHeader::from(header_parser)),
234
0
            Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => {
235
0
                GzState::Header(header_parser)
236
            }
237
0
            Err(err) => GzState::Err(err),
238
        };
239
240
0
        GzDecoder {
241
0
            state,
242
0
            reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
243
0
            multi: false,
244
0
        }
245
0
    }
246
247
0
    fn multi(mut self, flag: bool) -> GzDecoder<R> {
248
0
        self.multi = flag;
249
0
        self
250
0
    }
251
}
252
253
impl<R> GzDecoder<R> {
254
    /// Returns the header associated with this stream, if it was valid
255
0
    pub fn header(&self) -> Option<&GzHeader> {
256
0
        match &self.state {
257
0
            GzState::Body(header) | GzState::Finished(header, _, _) => Some(header),
258
0
            GzState::End(header) => header.as_ref(),
259
0
            _ => None,
260
        }
261
0
    }
262
263
    /// Acquires a reference to the underlying reader.
264
0
    pub fn get_ref(&self) -> &R {
265
0
        self.reader.get_ref().get_ref()
266
0
    }
267
268
    /// Acquires a mutable reference to the underlying stream.
269
    ///
270
    /// Note that mutation of the stream may result in surprising results if
271
    /// this decoder is continued to be used.
272
0
    pub fn get_mut(&mut self) -> &mut R {
273
0
        self.reader.get_mut().get_mut()
274
0
    }
275
276
    /// Consumes this decoder, returning the underlying reader.
277
0
    pub fn into_inner(self) -> R {
278
0
        self.reader.into_inner().into_inner()
279
0
    }
280
}
281
282
impl<R: BufRead> Read for GzDecoder<R> {
283
0
    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
284
0
        loop {
285
0
            match &mut self.state {
286
0
                GzState::Header(parser) => {
287
0
                    parser.parse(self.reader.get_mut().get_mut())?;
288
0
                    self.state = GzState::Body(GzHeader::from(mem::take(parser)));
289
                }
290
0
                GzState::Body(header) => {
291
0
                    if into.is_empty() {
292
0
                        return Ok(0);
293
0
                    }
294
0
                    match self.reader.read(into)? {
295
0
                        0 => {
296
0
                            self.state = GzState::Finished(mem::take(header), 0, [0; 8]);
297
0
                        }
298
0
                        n => {
299
0
                            return Ok(n);
300
                        }
301
                    }
302
                }
303
0
                GzState::Finished(header, pos, buf) => {
304
0
                    if *pos < buf.len() {
305
0
                        *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?;
306
                    } else {
307
0
                        let (crc, amt) = finish(&buf);
308
0
309
0
                        if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() {
310
0
                            self.state = GzState::End(Some(mem::take(header)));
311
0
                            return Err(corrupt());
312
0
                        } else if self.multi {
313
0
                            let is_eof = self
314
0
                                .reader
315
0
                                .get_mut()
316
0
                                .get_mut()
317
0
                                .fill_buf()
318
0
                                .map(|buf| buf.is_empty())?;
319
320
0
                            if is_eof {
321
0
                                self.state = GzState::End(Some(mem::take(header)));
322
0
                            } else {
323
0
                                self.reader.reset();
324
0
                                self.reader.get_mut().reset_data();
325
0
                                self.state = GzState::Header(GzHeaderParser::new())
326
                            }
327
0
                        } else {
328
0
                            self.state = GzState::End(Some(mem::take(header)));
329
0
                        }
330
                    }
331
                }
332
0
                GzState::Err(err) => {
333
0
                    let result = Err(mem::replace(err, io::ErrorKind::Other.into()));
334
0
                    self.state = GzState::End(None);
335
0
                    return result;
336
                }
337
0
                GzState::End(_) => return Ok(0),
338
            }
339
        }
340
0
    }
341
}
342
343
impl<R: BufRead + Write> Write for GzDecoder<R> {
344
0
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
345
0
        self.get_mut().write(buf)
346
0
    }
347
348
0
    fn flush(&mut self) -> io::Result<()> {
349
0
        self.get_mut().flush()
350
0
    }
351
}
352
353
/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
354
///
355
/// This structure implements a [`Read`] interface. When read from, it reads
356
/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
357
///
358
/// A gzip file consists of a series of *members* concatenated one after another.
359
/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the
360
/// underlying reader does. For a file, this reads to the end of the file.
361
///
362
/// To handle members separately, see [GzDecoder] or read more
363
/// [in the introduction](../index.html#about-multi-member-gzip-files).
364
///
365
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
366
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
367
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
368
///
369
/// # Examples
370
///
371
/// ```
372
/// use std::io::prelude::*;
373
/// use std::io;
374
/// # use flate2::Compression;
375
/// # use flate2::write::GzEncoder;
376
/// use flate2::bufread::MultiGzDecoder;
377
///
378
/// # fn main() {
379
/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
380
/// #   e.write_all(b"Hello World").unwrap();
381
/// #   let bytes = e.finish().unwrap();
382
/// #   println!("{}", decode_reader(bytes).unwrap());
383
/// # }
384
/// #
385
/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
386
/// // Here &[u8] implements BufRead
387
///
388
/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
389
///    let mut gz = MultiGzDecoder::new(&bytes[..]);
390
///    let mut s = String::new();
391
///    gz.read_to_string(&mut s)?;
392
///    Ok(s)
393
/// }
394
/// ```
395
0
#[derive(Debug)]
396
pub struct MultiGzDecoder<R>(GzDecoder<R>);
397
398
impl<R: BufRead> MultiGzDecoder<R> {
399
    /// Creates a new decoder from the given reader, immediately parsing the
400
    /// (first) gzip header. If the gzip stream contains multiple members all will
401
    /// be decoded.
402
0
    pub fn new(r: R) -> MultiGzDecoder<R> {
403
0
        MultiGzDecoder(GzDecoder::new(r).multi(true))
404
0
    }
405
}
406
407
impl<R> MultiGzDecoder<R> {
408
    /// Returns the current header associated with this stream, if it's valid
409
0
    pub fn header(&self) -> Option<&GzHeader> {
410
0
        self.0.header()
411
0
    }
412
413
    /// Acquires a reference to the underlying reader.
414
0
    pub fn get_ref(&self) -> &R {
415
0
        self.0.get_ref()
416
0
    }
417
418
    /// Acquires a mutable reference to the underlying stream.
419
    ///
420
    /// Note that mutation of the stream may result in surprising results if
421
    /// this decoder is continued to be used.
422
0
    pub fn get_mut(&mut self) -> &mut R {
423
0
        self.0.get_mut()
424
0
    }
425
426
    /// Consumes this decoder, returning the underlying reader.
427
0
    pub fn into_inner(self) -> R {
428
0
        self.0.into_inner()
429
0
    }
430
}
431
432
impl<R: BufRead> Read for MultiGzDecoder<R> {
433
0
    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
434
0
        self.0.read(into)
435
0
    }
436
}
437
438
#[cfg(test)]
439
mod test {
440
    use crate::bufread::GzDecoder;
441
    use crate::gz::write;
442
    use crate::Compression;
443
    use std::io::{Read, Write};
444
445
    // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any
446
    // additional data to be consumed by the caller.
447
    #[test]
448
    fn decode_extra_data() {
449
        let expected = "Hello World";
450
451
        let compressed = {
452
            let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
453
            e.write(expected.as_ref()).unwrap();
454
            let mut b = e.finish().unwrap();
455
            b.push(b'x');
456
            b
457
        };
458
459
        let mut output = Vec::new();
460
        let mut decoder = GzDecoder::new(compressed.as_slice());
461
        let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
462
        assert_eq!(decoded_bytes, output.len());
463
        let actual = std::str::from_utf8(&output).expect("String parsing error");
464
        assert_eq!(
465
            actual, expected,
466
            "after decompression we obtain the original input"
467
        );
468
469
        output.clear();
470
        assert_eq!(
471
            decoder.read(&mut output).unwrap(),
472
            0,
473
            "subsequent read of decoder returns 0, but inner reader can return additional data"
474
        );
475
        let mut reader = decoder.into_inner();
476
        assert_eq!(
477
            reader.read_to_end(&mut output).unwrap(),
478
            1,
479
            "extra data is accessible in underlying buf-read"
480
        );
481
        assert_eq!(output, b"x");
482
    }
483
}