Coverage Report

Created: 2025-07-01 06:50

/rust/registry/src/index.crates.io-6f17d22bba15001f/flate2-1.1.2/src/gz/bufread.rs
Line
Count
Source (jump to first uncovered line)
1
use std::cmp;
2
use std::io;
3
use std::io::prelude::*;
4
use std::mem;
5
6
use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser};
7
use crate::crc::CrcReader;
8
use crate::deflate;
9
use crate::Compression;
10
11
0
fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
12
0
    let min = cmp::min(into.len(), from.len() - *pos);
13
0
    into[..min].copy_from_slice(&from[*pos..*pos + min]);
14
0
    *pos += min;
15
0
    min
16
0
}
17
18
/// A gzip streaming encoder
19
///
20
/// This structure implements a [`Read`] interface. When read from, it reads
21
/// uncompressed data from the underlying [`BufRead`] and provides the compressed data.
22
///
23
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
24
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
25
///
26
/// # Examples
27
///
28
/// ```
29
/// use std::io::prelude::*;
30
/// use std::io;
31
/// use flate2::Compression;
32
/// use flate2::bufread::GzEncoder;
33
/// use std::fs::File;
34
/// use std::io::BufReader;
35
///
36
/// // Opens sample file, compresses the contents and returns a Vector or error
37
/// // File wrapped in a BufReader implements BufRead
38
///
39
/// fn open_hello_world() -> io::Result<Vec<u8>> {
40
///     let f = File::open("examples/hello_world.txt")?;
41
///     let b = BufReader::new(f);
42
///     let mut gz = GzEncoder::new(b, Compression::fast());
43
///     let mut buffer = Vec::new();
44
///     gz.read_to_end(&mut buffer)?;
45
///     Ok(buffer)
46
/// }
47
/// ```
48
#[derive(Debug)]
49
pub struct GzEncoder<R> {
50
    inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
51
    header: Vec<u8>,
52
    pos: usize,
53
    eof: bool,
54
}
55
56
0
pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
57
0
    let crc = CrcReader::new(r);
58
0
    GzEncoder {
59
0
        inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
60
0
        header,
61
0
        pos: 0,
62
0
        eof: false,
63
0
    }
64
0
}
65
66
impl<R: BufRead> GzEncoder<R> {
67
    /// Creates a new encoder which will use the given compression level.
68
    ///
69
    /// The encoder is not configured specially for the emitted header. For
70
    /// header configuration, see the `GzBuilder` type.
71
    ///
72
    /// The data read from the stream `r` will be compressed and available
73
    /// through the returned reader.
74
0
    pub fn new(r: R, level: Compression) -> GzEncoder<R> {
75
0
        GzBuilder::new().buf_read(r, level)
76
0
    }
77
78
0
    fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
79
0
        if self.pos == 8 {
80
0
            return Ok(0);
81
0
        }
82
0
        let crc = self.inner.get_ref().crc();
83
0
        let calced_crc_bytes = crc.sum().to_le_bytes();
84
0
        let arr = [
85
0
            calced_crc_bytes[0],
86
0
            calced_crc_bytes[1],
87
0
            calced_crc_bytes[2],
88
0
            calced_crc_bytes[3],
89
0
            crc.amount() as u8,
90
0
            (crc.amount() >> 8) as u8,
91
0
            (crc.amount() >> 16) as u8,
92
0
            (crc.amount() >> 24) as u8,
93
0
        ];
94
0
        Ok(copy(into, &arr, &mut self.pos))
95
0
    }
96
}
97
98
impl<R> GzEncoder<R> {
99
    /// Acquires a reference to the underlying reader.
100
0
    pub fn get_ref(&self) -> &R {
101
0
        self.inner.get_ref().get_ref()
102
0
    }
103
104
    /// Acquires a mutable reference to the underlying reader.
105
    ///
106
    /// Note that mutation of the reader may result in surprising results if
107
    /// this encoder is continued to be used.
108
0
    pub fn get_mut(&mut self) -> &mut R {
109
0
        self.inner.get_mut().get_mut()
110
0
    }
111
112
    /// Returns the underlying stream, consuming this encoder
113
0
    pub fn into_inner(self) -> R {
114
0
        self.inner.into_inner().into_inner()
115
0
    }
116
}
117
118
#[inline]
119
0
fn finish(buf: &[u8; 8]) -> (u32, u32) {
120
0
    let crc = (buf[0] as u32)
121
0
        | ((buf[1] as u32) << 8)
122
0
        | ((buf[2] as u32) << 16)
123
0
        | ((buf[3] as u32) << 24);
124
0
    let amt = (buf[4] as u32)
125
0
        | ((buf[5] as u32) << 8)
126
0
        | ((buf[6] as u32) << 16)
127
0
        | ((buf[7] as u32) << 24);
128
0
    (crc, amt)
129
0
}
130
131
impl<R: BufRead> Read for GzEncoder<R> {
132
0
    fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
133
0
        let mut amt = 0;
134
0
        if self.eof {
135
0
            return self.read_footer(into);
136
0
        } else if self.pos < self.header.len() {
137
0
            amt += copy(into, &self.header, &mut self.pos);
138
0
            if amt == into.len() {
139
0
                return Ok(amt);
140
0
            }
141
0
            let tmp = into;
142
0
            into = &mut tmp[amt..];
143
0
        }
144
0
        match self.inner.read(into)? {
145
            0 => {
146
0
                self.eof = true;
147
0
                self.pos = 0;
148
0
                self.read_footer(into)
149
            }
150
0
            n => Ok(amt + n),
151
        }
152
0
    }
153
}
154
155
impl<R: BufRead + Write> Write for GzEncoder<R> {
156
0
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
157
0
        self.get_mut().write(buf)
158
0
    }
159
160
0
    fn flush(&mut self) -> io::Result<()> {
161
0
        self.get_mut().flush()
162
0
    }
163
}
164
165
/// A decoder for a single member of a [gzip file].
166
///
167
/// This structure implements a [`Read`] interface. When read from, it reads
168
/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
169
///
170
/// After reading a single member of the gzip data this reader will return
171
/// Ok(0) even if there are more bytes available in the underlying reader.
172
/// If you need the following bytes, call `into_inner()` after Ok(0) to
173
/// recover the underlying reader.
174
///
175
/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
176
/// or read more
177
/// [in the introduction](../index.html#about-multi-member-gzip-files).
178
///
179
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
180
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
181
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
182
///
183
/// # Examples
184
///
185
/// ```
186
/// use std::io::prelude::*;
187
/// use std::io;
188
/// # use flate2::Compression;
189
/// # use flate2::write::GzEncoder;
190
/// use flate2::bufread::GzDecoder;
191
///
192
/// # fn main() {
193
/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
194
/// #   e.write_all(b"Hello World").unwrap();
195
/// #   let bytes = e.finish().unwrap();
196
/// #   println!("{}", decode_reader(bytes).unwrap());
197
/// # }
198
/// #
199
/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
200
/// // Here &[u8] implements BufRead
201
///
202
/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
203
///    let mut gz = GzDecoder::new(&bytes[..]);
204
///    let mut s = String::new();
205
///    gz.read_to_string(&mut s)?;
206
///    Ok(s)
207
/// }
208
/// ```
209
#[derive(Debug)]
210
pub struct GzDecoder<R> {
211
    state: GzState,
212
    reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
213
    multi: bool,
214
}
215
216
#[derive(Debug)]
217
enum GzState {
218
    Header(GzHeaderParser),
219
    Body(GzHeader),
220
    Finished(GzHeader, usize, [u8; 8]),
221
    Err(io::Error),
222
    End(Option<GzHeader>),
223
}
224
225
impl<R: BufRead> GzDecoder<R> {
226
    /// Creates a new decoder from the given reader, immediately parsing the
227
    /// gzip header.
228
0
    pub fn new(mut r: R) -> GzDecoder<R> {
229
0
        let mut header_parser = GzHeaderParser::new();
230
231
0
        let state = match header_parser.parse(&mut r) {
232
0
            Ok(_) => GzState::Body(GzHeader::from(header_parser)),
233
0
            Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => {
234
0
                GzState::Header(header_parser)
235
            }
236
0
            Err(err) => GzState::Err(err),
237
        };
238
239
0
        GzDecoder {
240
0
            state,
241
0
            reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
242
0
            multi: false,
243
0
        }
244
0
    }
245
246
0
    fn multi(mut self, flag: bool) -> GzDecoder<R> {
247
0
        self.multi = flag;
248
0
        self
249
0
    }
250
}
251
252
impl<R> GzDecoder<R> {
253
    /// Returns the header associated with this stream, if it was valid
254
0
    pub fn header(&self) -> Option<&GzHeader> {
255
0
        match &self.state {
256
0
            GzState::Body(header) | GzState::Finished(header, _, _) => Some(header),
257
0
            GzState::End(header) => header.as_ref(),
258
0
            _ => None,
259
        }
260
0
    }
261
262
    /// Acquires a reference to the underlying reader.
263
0
    pub fn get_ref(&self) -> &R {
264
0
        self.reader.get_ref().get_ref()
265
0
    }
266
267
    /// Acquires a mutable reference to the underlying stream.
268
    ///
269
    /// Note that mutation of the stream may result in surprising results if
270
    /// this decoder is continued to be used.
271
0
    pub fn get_mut(&mut self) -> &mut R {
272
0
        self.reader.get_mut().get_mut()
273
0
    }
274
275
    /// Consumes this decoder, returning the underlying reader.
276
0
    pub fn into_inner(self) -> R {
277
0
        self.reader.into_inner().into_inner()
278
0
    }
279
}
280
281
impl<R: BufRead> Read for GzDecoder<R> {
282
0
    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
283
        loop {
284
0
            match &mut self.state {
285
0
                GzState::Header(parser) => {
286
0
                    parser.parse(self.reader.get_mut().get_mut())?;
287
0
                    self.state = GzState::Body(GzHeader::from(mem::take(parser)));
288
                }
289
0
                GzState::Body(header) => {
290
0
                    if into.is_empty() {
291
0
                        return Ok(0);
292
0
                    }
293
0
                    match self.reader.read(into)? {
294
0
                        0 => {
295
0
                            self.state = GzState::Finished(mem::take(header), 0, [0; 8]);
296
0
                        }
297
0
                        n => {
298
0
                            return Ok(n);
299
                        }
300
                    }
301
                }
302
0
                GzState::Finished(header, pos, buf) => {
303
0
                    if *pos < buf.len() {
304
0
                        *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?;
305
                    } else {
306
0
                        let (crc, amt) = finish(buf);
307
0
308
0
                        if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() {
309
0
                            self.state = GzState::End(Some(mem::take(header)));
310
0
                            return Err(corrupt());
311
0
                        } else if self.multi {
312
0
                            let is_eof = self
313
0
                                .reader
314
0
                                .get_mut()
315
0
                                .get_mut()
316
0
                                .fill_buf()
317
0
                                .map(|buf| buf.is_empty())?;
318
319
0
                            if is_eof {
320
0
                                self.state = GzState::End(Some(mem::take(header)));
321
0
                            } else {
322
0
                                self.reader.reset();
323
0
                                self.reader.get_mut().reset_data();
324
0
                                self.state = GzState::Header(GzHeaderParser::new())
325
                            }
326
0
                        } else {
327
0
                            self.state = GzState::End(Some(mem::take(header)));
328
0
                        }
329
                    }
330
                }
331
0
                GzState::Err(err) => {
332
0
                    let result = Err(mem::replace(err, io::ErrorKind::Other.into()));
333
0
                    self.state = GzState::End(None);
334
0
                    return result;
335
                }
336
0
                GzState::End(_) => return Ok(0),
337
            }
338
        }
339
0
    }
340
}
341
342
impl<R: BufRead + Write> Write for GzDecoder<R> {
343
0
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
344
0
        self.get_mut().write(buf)
345
0
    }
346
347
0
    fn flush(&mut self) -> io::Result<()> {
348
0
        self.get_mut().flush()
349
0
    }
350
}
351
352
/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
353
///
354
/// This structure implements a [`Read`] interface. When read from, it reads
355
/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
356
///
357
/// A gzip file consists of a series of *members* concatenated one after another.
358
/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the
359
/// underlying reader does. For a file, this reads to the end of the file.
360
///
361
/// To handle members separately, see [GzDecoder] or read more
362
/// [in the introduction](../index.html#about-multi-member-gzip-files).
363
///
364
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
365
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
366
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
367
///
368
/// # Examples
369
///
370
/// ```
371
/// use std::io::prelude::*;
372
/// use std::io;
373
/// # use flate2::Compression;
374
/// # use flate2::write::GzEncoder;
375
/// use flate2::bufread::MultiGzDecoder;
376
///
377
/// # fn main() {
378
/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
379
/// #   e.write_all(b"Hello World").unwrap();
380
/// #   let bytes = e.finish().unwrap();
381
/// #   println!("{}", decode_reader(bytes).unwrap());
382
/// # }
383
/// #
384
/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
385
/// // Here &[u8] implements BufRead
386
///
387
/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
388
///    let mut gz = MultiGzDecoder::new(&bytes[..]);
389
///    let mut s = String::new();
390
///    gz.read_to_string(&mut s)?;
391
///    Ok(s)
392
/// }
393
/// ```
394
#[derive(Debug)]
395
pub struct MultiGzDecoder<R>(GzDecoder<R>);
396
397
impl<R: BufRead> MultiGzDecoder<R> {
398
    /// Creates a new decoder from the given reader, immediately parsing the
399
    /// (first) gzip header. If the gzip stream contains multiple members all will
400
    /// be decoded.
401
0
    pub fn new(r: R) -> MultiGzDecoder<R> {
402
0
        MultiGzDecoder(GzDecoder::new(r).multi(true))
403
0
    }
404
}
405
406
impl<R> MultiGzDecoder<R> {
407
    /// Returns the current header associated with this stream, if it's valid
408
0
    pub fn header(&self) -> Option<&GzHeader> {
409
0
        self.0.header()
410
0
    }
411
412
    /// Acquires a reference to the underlying reader.
413
0
    pub fn get_ref(&self) -> &R {
414
0
        self.0.get_ref()
415
0
    }
416
417
    /// Acquires a mutable reference to the underlying stream.
418
    ///
419
    /// Note that mutation of the stream may result in surprising results if
420
    /// this decoder is continued to be used.
421
0
    pub fn get_mut(&mut self) -> &mut R {
422
0
        self.0.get_mut()
423
0
    }
424
425
    /// Consumes this decoder, returning the underlying reader.
426
0
    pub fn into_inner(self) -> R {
427
0
        self.0.into_inner()
428
0
    }
429
}
430
431
impl<R: BufRead> Read for MultiGzDecoder<R> {
432
0
    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
433
0
        self.0.read(into)
434
0
    }
435
}
436
437
#[cfg(test)]
438
mod test {
439
    use crate::bufread::GzDecoder;
440
    use crate::gz::write;
441
    use crate::Compression;
442
    use std::io::{Read, Write};
443
444
    // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any
445
    // additional data to be consumed by the caller.
446
    #[test]
447
    fn decode_extra_data() {
448
        let expected = "Hello World";
449
450
        let compressed = {
451
            let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
452
            e.write(expected.as_ref()).unwrap();
453
            let mut b = e.finish().unwrap();
454
            b.push(b'x');
455
            b
456
        };
457
458
        let mut output = Vec::new();
459
        let mut decoder = GzDecoder::new(compressed.as_slice());
460
        let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
461
        assert_eq!(decoded_bytes, output.len());
462
        let actual = std::str::from_utf8(&output).expect("String parsing error");
463
        assert_eq!(
464
            actual, expected,
465
            "after decompression we obtain the original input"
466
        );
467
468
        output.clear();
469
        assert_eq!(
470
            decoder.read(&mut output).unwrap(),
471
            0,
472
            "subsequent read of decoder returns 0, but inner reader can return additional data"
473
        );
474
        let mut reader = decoder.into_inner();
475
        assert_eq!(
476
            reader.read_to_end(&mut output).unwrap(),
477
            1,
478
            "extra data is accessible in underlying buf-read"
479
        );
480
        assert_eq!(output, b"x");
481
    }
482
}