/rust/registry/src/index.crates.io-1949cf8c6b5b557f/flate2-1.1.2/src/gz/mod.rs

Source
use std::ffi::CString;
use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
use std::time;

use crate::bufreader::BufReader;
use crate::{Compression, Crc};

pub static FHCRC: u8 = 1 << 1;
pub static FEXTRA: u8 = 1 << 2;
pub static FNAME: u8 = 1 << 3;
pub static FCOMMENT: u8 = 1 << 4;
pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;

pub mod bufread;
pub mod read;
pub mod write;

// The maximum length of the header filename and comment fields. More than
// enough for these fields in reasonable use, but prevents possible attacks.
const MAX_HEADER_BUF: usize = 65535;

/// A structure representing the header of a gzip stream.
///
/// The header can contain metadata about the file that was compressed, if
/// present.
#[derive(PartialEq, Clone, Debug, Default)]
pub struct GzHeader {
    extra: Option<Vec<u8>>,
    filename: Option<Vec<u8>>,
    comment: Option<Vec<u8>>,
    operating_system: u8,
    mtime: u32,
}

impl GzHeader {
    /// Returns the `filename` field of this gzip stream's header, if present.
    pub fn filename(&self) -> Option<&[u8]> {
        self.filename.as_ref().map(|s| &s[..])
    }

    /// Returns the `extra` field of this gzip stream's header, if present.
    pub fn extra(&self) -> Option<&[u8]> {
        self.extra.as_ref().map(|s| &s[..])
    }

    /// Returns the `comment` field of this gzip stream's header, if present.
    pub fn comment(&self) -> Option<&[u8]> {
        self.comment.as_ref().map(|s| &s[..])
    }

    /// Returns the `operating_system` field of this gzip stream's header.
    ///
    /// There are predefined values for various operating systems.
    /// 255 means that the value is unknown.
    pub fn operating_system(&self) -> u8 {
        self.operating_system
    }

    /// This gives the most recent modification time of the original file being compressed.
    ///
    /// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970.
    /// (Note that this may cause problems for MS-DOS and other systems that use local
    /// rather than Universal time.) If the compressed data did not come from a file,
    /// `mtime` is set to the time at which compression started.
    /// `mtime` = 0 means no time stamp is available.
    ///
    /// The usage of `mtime` is discouraged because of Year 2038 problem.
    pub fn mtime(&self) -> u32 {
        self.mtime
    }

    /// Returns the most recent modification time represented by a date-time type.
    /// Returns `None` if the value of the underlying counter is 0,
    /// indicating no time stamp is available.
    ///
    ///
    /// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970.
    /// See [`mtime`](#method.mtime) for more detail.
    pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
        if self.mtime == 0 {
            None
        } else {
            let duration = time::Duration::new(u64::from(self.mtime), 0);
            let datetime = time::UNIX_EPOCH + duration;
            Some(datetime)
        }
    }
}

#[derive(Debug, Default)]
pub enum GzHeaderState {
    Start(u8, [u8; 10]),
    Xlen(Option<Box<Crc>>, u8, [u8; 2]),
    Extra(Option<Box<Crc>>, u16),
    Filename(Option<Box<Crc>>),
    Comment(Option<Box<Crc>>),
    Crc(Option<Box<Crc>>, u8, [u8; 2]),
    #[default]
    Complete,
}

#[derive(Debug, Default)]
pub struct GzHeaderParser {
    state: GzHeaderState,
    flags: u8,
    header: GzHeader,
}

impl GzHeaderParser {
    fn new() -> Self {
        GzHeaderParser {
            state: GzHeaderState::Start(0, [0; 10]),
            flags: 0,
            header: GzHeader::default(),
        }
    }

    fn parse<R: BufRead>(&mut self, r: &mut R) -> Result<()> {
        loop {
            match &mut self.state {
                GzHeaderState::Start(count, buffer) => {
                    while (*count as usize) < buffer.len() {
                        *count += read_into(r, &mut buffer[*count as usize..])? as u8;
                    }
                    // Gzip identification bytes
                    if buffer[0] != 0x1f || buffer[1] != 0x8b {
                        return Err(bad_header());
                    }
                    // Gzip compression method (8 = deflate)
                    if buffer[2] != 8 {
                        return Err(bad_header());
                    }
                    self.flags = buffer[3];
                    // RFC1952: "must give an error indication if any reserved bit is non-zero"
                    if self.flags & FRESERVED != 0 {
                        return Err(bad_header());
                    }
                    self.header.mtime = (buffer[4] as u32)
                        | ((buffer[5] as u32) << 8)
                        | ((buffer[6] as u32) << 16)
                        | ((buffer[7] as u32) << 24);
                    let _xfl = buffer[8];
                    self.header.operating_system = buffer[9];
                    let crc = if self.flags & FHCRC != 0 {
                        let mut crc = Box::new(Crc::new());
                        crc.update(buffer);
                        Some(crc)
                    } else {
                        None
                    };
                    self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
                }
                GzHeaderState::Xlen(crc, count, buffer) => {
                    if self.flags & FEXTRA != 0 {
                        while (*count as usize) < buffer.len() {
                            *count += read_into(r, &mut buffer[*count as usize..])? as u8;
                        }
                        if let Some(crc) = crc {
                            crc.update(buffer);
                        }
                        let xlen = parse_le_u16(buffer);
                        self.header.extra = Some(vec![0; xlen as usize]);
                        self.state = GzHeaderState::Extra(crc.take(), 0);
                    } else {
                        self.state = GzHeaderState::Filename(crc.take());
                    }
                }
                GzHeaderState::Extra(crc, count) => {
                    debug_assert!(self.header.extra.is_some());
                    let extra = self.header.extra.as_mut().unwrap();
                    while (*count as usize) < extra.len() {
                        *count += read_into(r, &mut extra[*count as usize..])? as u16;
                    }
                    if let Some(crc) = crc {
                        crc.update(extra);
                    }
                    self.state = GzHeaderState::Filename(crc.take());
                }
                GzHeaderState::Filename(crc) => {
                    if self.flags & FNAME != 0 {
                        let filename = self.header.filename.get_or_insert_with(Vec::new);
                        read_to_nul(r, filename)?;
                        if let Some(crc) = crc {
                            crc.update(filename);
                            crc.update(b"\0");
                        }
                    }
                    self.state = GzHeaderState::Comment(crc.take());
                }
                GzHeaderState::Comment(crc) => {
                    if self.flags & FCOMMENT != 0 {
                        let comment = self.header.comment.get_or_insert_with(Vec::new);
                        read_to_nul(r, comment)?;
                        if let Some(crc) = crc {
                            crc.update(comment);
                            crc.update(b"\0");
                        }
                    }
                    self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
                }
                GzHeaderState::Crc(crc, count, buffer) => {
                    if let Some(crc) = crc {
                        debug_assert!(self.flags & FHCRC != 0);
                        while (*count as usize) < buffer.len() {
                            *count += read_into(r, &mut buffer[*count as usize..])? as u8;
                        }
                        let stored_crc = parse_le_u16(buffer);
                        let calced_crc = crc.sum() as u16;
                        if stored_crc != calced_crc {
                            return Err(corrupt());
                        }
                    }
                    self.state = GzHeaderState::Complete;
                }
                GzHeaderState::Complete => {
                    return Ok(());
                }
            }
        }
    }

    fn header(&self) -> Option<&GzHeader> {
        match self.state {
            GzHeaderState::Complete => Some(&self.header),
            _ => None,
        }
    }
}

impl From<GzHeaderParser> for GzHeader {
    fn from(parser: GzHeaderParser) -> Self {
        debug_assert!(matches!(parser.state, GzHeaderState::Complete));
        parser.header
    }
}

// Attempt to fill the `buffer` from `r`. Return the number of bytes read.
// Return an error if EOF is read before the buffer is full.  This differs
// from `read` in that Ok(0) means that more data may be available.
fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
    debug_assert!(!buffer.is_empty());
    match r.read(buffer) {
        Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
        Ok(n) => Ok(n),
        Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
        Err(e) => Err(e),
    }
}

// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`.
fn read_to_nul<R: BufRead>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
    let mut bytes = r.bytes();
    loop {
        match bytes.next().transpose()? {
            Some(0) => return Ok(()),
            Some(_) if buffer.len() == MAX_HEADER_BUF => {
                return Err(Error::new(
                    ErrorKind::InvalidInput,
                    "gzip header field too long",
                ));
            }
            Some(byte) => {
                buffer.push(byte);
            }
            None => {
                return Err(ErrorKind::UnexpectedEof.into());
            }
        }
    }
}

fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
    u16::from_le_bytes(*buffer)
}

fn bad_header() -> Error {
    Error::new(ErrorKind::InvalidInput, "invalid gzip header")
}

fn corrupt() -> Error {
    Error::new(
        ErrorKind::InvalidInput,
        "corrupt gzip stream does not have a matching checksum",
    )
}

/// A builder structure to create a new gzip Encoder.
///
/// This structure controls header configuration options such as the filename.
///
/// # Examples
///
/// ```
/// use std::io::prelude::*;
/// # use std::io;
/// use std::fs::File;
/// use flate2::GzBuilder;
/// use flate2::Compression;
///
/// // GzBuilder opens a file and writes a sample string using GzBuilder pattern
///
/// # fn sample_builder() -> Result<(), io::Error> {
/// let f = File::create("examples/hello_world.gz")?;
/// let mut gz = GzBuilder::new()
///                 .filename("hello_world.txt")
///                 .comment("test file, please delete")
///                 .write(f, Compression::default());
/// gz.write_all(b"hello world")?;
/// gz.finish()?;
/// # Ok(())
/// # }
/// ```
#[derive(Debug, Default)]
pub struct GzBuilder {
    extra: Option<Vec<u8>>,
    filename: Option<CString>,
    comment: Option<CString>,
    operating_system: Option<u8>,
    mtime: u32,
}

impl GzBuilder {
    /// Create a new blank builder with no header by default.
    pub fn new() -> GzBuilder {
        Self::default()
    }

    /// Configure the `mtime` field in the gzip header.
    pub fn mtime(mut self, mtime: u32) -> GzBuilder {
        self.mtime = mtime;
        self
    }

    /// Configure the `operating_system` field in the gzip header.
    pub fn operating_system(mut self, os: u8) -> GzBuilder {
        self.operating_system = Some(os);
        self
    }

    /// Configure the `extra` field in the gzip header.
    pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
        self.extra = Some(extra.into());
        self
    }

    /// Configure the `filename` field in the gzip header.
    ///
    /// # Panics
    ///
    /// Panics if the `filename` slice contains a zero.
    pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
        self.filename = Some(CString::new(filename.into()).unwrap());
        self
    }

    /// Configure the `comment` field in the gzip header.
    ///
    /// # Panics
    ///
    /// Panics if the `comment` slice contains a zero.
    pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
        self.comment = Some(CString::new(comment.into()).unwrap());
        self
    }

    /// Consume this builder, creating a writer encoder in the process.
    ///
    /// The data written to the returned encoder will be compressed and then
    /// written out to the supplied parameter `w`.
    pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
        write::gz_encoder(self.into_header(lvl), w, lvl)
    }

    /// Consume this builder, creating a reader encoder in the process.
    ///
    /// Data read from the returned encoder will be the compressed version of
    /// the data read from the given reader.
    pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
        read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
    }

    /// Consume this builder, creating a reader encoder in the process.
    ///
    /// Data read from the returned encoder will be the compressed version of
    /// the data read from the given reader.
    pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
    where
        R: BufRead,
    {
        bufread::gz_encoder(self.into_header(lvl), r, lvl)
    }

    fn into_header(self, lvl: Compression) -> Vec<u8> {
        let GzBuilder {
            extra,
            filename,
            comment,
            operating_system,
            mtime,
        } = self;
        let mut flg = 0;
        let mut header = vec![0u8; 10];
        if let Some(v) = extra {
            flg |= FEXTRA;
            header.extend((v.len() as u16).to_le_bytes());
            header.extend(v);
        }
        if let Some(filename) = filename {
            flg |= FNAME;
            header.extend(filename.as_bytes_with_nul().iter().copied());
        }
        if let Some(comment) = comment {
            flg |= FCOMMENT;
            header.extend(comment.as_bytes_with_nul().iter().copied());
        }
        header[0] = 0x1f;
        header[1] = 0x8b;
        header[2] = 8;
        header[3] = flg;
        header[4] = mtime as u8;
        header[5] = (mtime >> 8) as u8;
        header[6] = (mtime >> 16) as u8;
        header[7] = (mtime >> 24) as u8;
        header[8] = if lvl.0 >= Compression::best().0 {
            2
        } else if lvl.0 <= Compression::fast().0 {
            4
        } else {
            0
        };

        // Typically this byte indicates what OS the gz stream was created on,
        // but in an effort to have cross-platform reproducible streams just
        // default this value to 255. I'm not sure that if we "correctly" set
        // this it'd do anything anyway...
        header[9] = operating_system.unwrap_or(255);
        header
    }
}

#[cfg(test)]
mod tests {
    use std::io::prelude::*;

    use super::{read, write, GzBuilder, GzHeaderParser};
    use crate::{Compression, GzHeader};
    use rand::{rng, Rng};

    #[test]
    fn roundtrip() {
        let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
        e.write_all(b"foo bar baz").unwrap();
        let inner = e.finish().unwrap();
        let mut d = read::GzDecoder::new(&inner[..]);
        let mut s = String::new();
        d.read_to_string(&mut s).unwrap();
        assert_eq!(s, "foo bar baz");
    }

    #[test]
    fn roundtrip_zero() {
        let e = write::GzEncoder::new(Vec::new(), Compression::default());
        let inner = e.finish().unwrap();
        let mut d = read::GzDecoder::new(&inner[..]);
        let mut s = String::new();
        d.read_to_string(&mut s).unwrap();
        assert_eq!(s, "");
    }

    #[test]
    fn roundtrip_big() {
        let mut real = Vec::new();
        let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
        let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
        for _ in 0..200 {
            let to_write = &v[..rng().random_range(0..v.len())];
            real.extend(to_write.iter().copied());
            w.write_all(to_write).unwrap();
        }
        let result = w.finish().unwrap();
        let mut r = read::GzDecoder::new(&result[..]);
        let mut v = Vec::new();
        r.read_to_end(&mut v).unwrap();
        assert_eq!(v, real);
    }

    #[test]
    fn roundtrip_big2() {
        let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
        let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
        let mut res = Vec::new();
        r.read_to_end(&mut res).unwrap();
        assert_eq!(res, v);
    }

    // A Rust implementation of CRC that closely matches the C code in RFC1952.
    // Only use this to create CRCs for tests.
    struct Rfc1952Crc {
        /* Table of CRCs of all 8-bit messages. */
        crc_table: [u32; 256],
    }

    impl Rfc1952Crc {
        fn new() -> Self {
            let mut crc = Rfc1952Crc {
                crc_table: [0; 256],
            };
            /* Make the table for a fast CRC. */
            for n in 0usize..256 {
                let mut c = n as u32;
                for _k in 0..8 {
                    if c & 1 != 0 {
                        c = 0xedb88320 ^ (c >> 1);
                    } else {
                        c = c >> 1;
                    }
                }
                crc.crc_table[n] = c;
            }
            crc
        }

        /*
         Update a running crc with the bytes buf and return
         the updated crc. The crc should be initialized to zero. Pre- and
         post-conditioning (one's complement) is performed within this
         function so it shouldn't be done by the caller.
        */
        fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
            let mut c = crc ^ 0xffffffff;

            for b in buf {
                c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
            }
            c ^ 0xffffffff
        }

        /* Return the CRC of the bytes buf. */
        fn crc(&self, buf: &[u8]) -> u32 {
            self.update_crc(0, buf)
        }
    }

    #[test]
    fn roundtrip_header() {
        let mut header = GzBuilder::new()
            .mtime(1234)
            .operating_system(57)
            .filename("filename")
            .comment("comment")
            .into_header(Compression::fast());

        // Add a CRC to the header
        header[3] = header[3] ^ super::FHCRC;
        let rfc1952_crc = Rfc1952Crc::new();
        let crc32 = rfc1952_crc.crc(&header);
        let crc16 = crc32 as u16;
        header.extend(&crc16.to_le_bytes());

        let mut parser = GzHeaderParser::new();
        parser.parse(&mut header.as_slice()).unwrap();
        let actual = parser.header().unwrap();
        assert_eq!(
            actual,
            &GzHeader {
                extra: None,
                filename: Some("filename".as_bytes().to_vec()),
                comment: Some("comment".as_bytes().to_vec()),
                operating_system: 57,
                mtime: 1234
            }
        )
    }

    #[test]
    fn fields() {
        let r = vec![0, 2, 4, 6];
        let e = GzBuilder::new()
            .filename("foo.rs")
            .comment("bar")
            .extra(vec![0, 1, 2, 3])
            .read(&r[..], Compression::default());
        let mut d = read::GzDecoder::new(e);
        assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
        assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
        assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
        let mut res = Vec::new();
        d.read_to_end(&mut res).unwrap();
        assert_eq!(res, vec![0, 2, 4, 6]);
    }

    #[test]
    fn keep_reading_after_end() {
        let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
        e.write_all(b"foo bar baz").unwrap();
        let inner = e.finish().unwrap();
        let mut d = read::GzDecoder::new(&inner[..]);
        let mut s = String::new();
        d.read_to_string(&mut s).unwrap();
        assert_eq!(s, "foo bar baz");
        d.read_to_string(&mut s).unwrap();
        assert_eq!(s, "foo bar baz");
    }

    #[test]
    fn qc_reader() {
        ::quickcheck::quickcheck(test as fn(_) -> _);

        fn test(v: Vec<u8>) -> bool {
            let r = read::GzEncoder::new(&v[..], Compression::default());
            let mut r = read::GzDecoder::new(r);
            let mut v2 = Vec::new();
            r.read_to_end(&mut v2).unwrap();
            v == v2
        }
    }

    #[test]
    fn flush_after_write() {
        let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
        write!(f, "Hello world").unwrap();
        f.flush().unwrap();
    }
}

Coverage Report

Created: 2025-09-27 07:34

Line	Count	Source
1		use std::ffi::CString;
2		use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
3		use std::time;
4
5		use crate::bufreader::BufReader;
6		use crate::{Compression, Crc};
7
8		pub static FHCRC: u8 = 1 << 1;
9		pub static FEXTRA: u8 = 1 << 2;
10		pub static FNAME: u8 = 1 << 3;
11		pub static FCOMMENT: u8 = 1 << 4;
12		pub static FRESERVED: u8 = 1 << 5 \| 1 << 6 \| 1 << 7;
13
14		pub mod bufread;
15		pub mod read;
16		pub mod write;
17
18		// The maximum length of the header filename and comment fields. More than
19		// enough for these fields in reasonable use, but prevents possible attacks.
20		const MAX_HEADER_BUF: usize = 65535;
21
22		/// A structure representing the header of a gzip stream.
23		///
24		/// The header can contain metadata about the file that was compressed, if
25		/// present.
26		#[derive(PartialEq, Clone, Debug, Default)]
27		pub struct GzHeader {
28		extra: Option<Vec<u8>>,
29		filename: Option<Vec<u8>>,
30		comment: Option<Vec<u8>>,
31		operating_system: u8,
32		mtime: u32,
33		}
34
35		impl GzHeader {
36		/// Returns the `filename` field of this gzip stream's header, if present.
37	0	pub fn filename(&self) -> Option<&[u8]> {
38	0	self.filename.as_ref().map(\|s\| &s[..])
39	0	}
40
41		/// Returns the `extra` field of this gzip stream's header, if present.
42	0	pub fn extra(&self) -> Option<&[u8]> {
43	0	self.extra.as_ref().map(\|s\| &s[..])
44	0	}
45
46		/// Returns the `comment` field of this gzip stream's header, if present.
47	0	pub fn comment(&self) -> Option<&[u8]> {
48	0	self.comment.as_ref().map(\|s\| &s[..])
49	0	}
50
51		/// Returns the `operating_system` field of this gzip stream's header.
52		///
53		/// There are predefined values for various operating systems.
54		/// 255 means that the value is unknown.
55	0	pub fn operating_system(&self) -> u8 {
56	0	self.operating_system
57	0	}
58
59		/// This gives the most recent modification time of the original file being compressed.
60		///
61		/// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970.
62		/// (Note that this may cause problems for MS-DOS and other systems that use local
63		/// rather than Universal time.) If the compressed data did not come from a file,
64		/// `mtime` is set to the time at which compression started.
65		/// `mtime` = 0 means no time stamp is available.
66		///
67		/// The usage of `mtime` is discouraged because of Year 2038 problem.
68	0	pub fn mtime(&self) -> u32 {
69	0	self.mtime
70	0	}
71
72		/// Returns the most recent modification time represented by a date-time type.
73		/// Returns `None` if the value of the underlying counter is 0,
74		/// indicating no time stamp is available.
75		///
76		///
77		/// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970.
78		/// See [`mtime`](#method.mtime) for more detail.
79	0	pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
80	0	if self.mtime == 0 {
81	0	None
82		} else {
83	0	let duration = time::Duration::new(u64::from(self.mtime), 0);
84	0	let datetime = time::UNIX_EPOCH + duration;
85	0	Some(datetime)
86		}
87	0	}
88		}
89
90		#[derive(Debug, Default)]
91		pub enum GzHeaderState {
92		Start(u8, [u8; 10]),
93		Xlen(Option<Box<Crc>>, u8, [u8; 2]),
94		Extra(Option<Box<Crc>>, u16),
95		Filename(Option<Box<Crc>>),
96		Comment(Option<Box<Crc>>),
97		Crc(Option<Box<Crc>>, u8, [u8; 2]),
98		#[default]
99		Complete,
100		}
101
102		#[derive(Debug, Default)]
103		pub struct GzHeaderParser {
104		state: GzHeaderState,
105		flags: u8,
106		header: GzHeader,
107		}
108
109		impl GzHeaderParser {
110	0	fn new() -> Self {
111	0	GzHeaderParser {
112	0	state: GzHeaderState::Start(0, [0; 10]),
113	0	flags: 0,
114	0	header: GzHeader::default(),
115	0	}
116	0	}
117
118	0	fn parse<R: BufRead>(&mut self, r: &mut R) -> Result<()> {
119		loop {
120	0	match &mut self.state {
121	0	GzHeaderState::Start(count, buffer) => {
122	0	while (*count as usize) < buffer.len() {
123	0	count += read_into(r, &mut buffer[count as usize..])? as u8;
124		}
125		// Gzip identification bytes
126	0	if buffer[0] != 0x1f \|\| buffer[1] != 0x8b {
127	0	return Err(bad_header());
128	0	}
129		// Gzip compression method (8 = deflate)
130	0	if buffer[2] != 8 {
131	0	return Err(bad_header());
132	0	}
133	0	self.flags = buffer[3];
134		// RFC1952: "must give an error indication if any reserved bit is non-zero"
135	0	if self.flags & FRESERVED != 0 {
136	0	return Err(bad_header());
137	0	}
138	0	self.header.mtime = (buffer[4] as u32)
139	0	\| ((buffer[5] as u32) << 8)
140	0	\| ((buffer[6] as u32) << 16)
141	0	\| ((buffer[7] as u32) << 24);
142	0	let _xfl = buffer[8];
143	0	self.header.operating_system = buffer[9];
144	0	let crc = if self.flags & FHCRC != 0 {
145	0	let mut crc = Box::new(Crc::new());
146	0	crc.update(buffer);
147	0	Some(crc)
148		} else {
149	0	None
150		};
151	0	self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
152		}
153	0	GzHeaderState::Xlen(crc, count, buffer) => {
154	0	if self.flags & FEXTRA != 0 {
155	0	while (*count as usize) < buffer.len() {
156	0	count += read_into(r, &mut buffer[count as usize..])? as u8;
157		}
158	0	if let Some(crc) = crc {
159	0	crc.update(buffer);
160	0	}
161	0	let xlen = parse_le_u16(buffer);
162	0	self.header.extra = Some(vec![0; xlen as usize]);
163	0	self.state = GzHeaderState::Extra(crc.take(), 0);
164	0	} else {
165	0	self.state = GzHeaderState::Filename(crc.take());
166	0	}
167		}
168	0	GzHeaderState::Extra(crc, count) => {
169	0	debug_assert!(self.header.extra.is_some());
170	0	let extra = self.header.extra.as_mut().unwrap();
171	0	while (*count as usize) < extra.len() {
172	0	count += read_into(r, &mut extra[count as usize..])? as u16;
173		}
174	0	if let Some(crc) = crc {
175	0	crc.update(extra);
176	0	}
177	0	self.state = GzHeaderState::Filename(crc.take());
178		}
179	0	GzHeaderState::Filename(crc) => {
180	0	if self.flags & FNAME != 0 {
181	0	let filename = self.header.filename.get_or_insert_with(Vec::new);
182	0	read_to_nul(r, filename)?;
183	0	if let Some(crc) = crc {
184	0	crc.update(filename);
185	0	crc.update(b"\0");
186	0	}
187	0	}
188	0	self.state = GzHeaderState::Comment(crc.take());
189		}
190	0	GzHeaderState::Comment(crc) => {
191	0	if self.flags & FCOMMENT != 0 {
192	0	let comment = self.header.comment.get_or_insert_with(Vec::new);
193	0	read_to_nul(r, comment)?;
194	0	if let Some(crc) = crc {
195	0	crc.update(comment);
196	0	crc.update(b"\0");
197	0	}
198	0	}
199	0	self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
200		}
201	0	GzHeaderState::Crc(crc, count, buffer) => {
202	0	if let Some(crc) = crc {
203	0	debug_assert!(self.flags & FHCRC != 0);
204	0	while (*count as usize) < buffer.len() {
205	0	count += read_into(r, &mut buffer[count as usize..])? as u8;
206		}
207	0	let stored_crc = parse_le_u16(buffer);
208	0	let calced_crc = crc.sum() as u16;
209	0	if stored_crc != calced_crc {
210	0	return Err(corrupt());
211	0	}
212	0	}
213	0	self.state = GzHeaderState::Complete;
214		}
215		GzHeaderState::Complete => {
216	0	return Ok(());
217		}
218		}
219		}
220	0	}
221
222	0	fn header(&self) -> Option<&GzHeader> {
223	0	match self.state {
224	0	GzHeaderState::Complete => Some(&self.header),
225	0	_ => None,
226		}
227	0	}
228		}
229
230		impl From<GzHeaderParser> for GzHeader {
231	0	fn from(parser: GzHeaderParser) -> Self {
232	0	debug_assert!(matches!(parser.state, GzHeaderState::Complete));
233	0	parser.header
234	0	}
235		}
236
237		// Attempt to fill the `buffer` from `r`. Return the number of bytes read.
238		// Return an error if EOF is read before the buffer is full. This differs
239		// from `read` in that Ok(0) means that more data may be available.
240	0	fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
241	0	debug_assert!(!buffer.is_empty());
242	0	match r.read(buffer) {
243	0	Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
244	0	Ok(n) => Ok(n),
245	0	Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
246	0	Err(e) => Err(e),
247		}
248	0	}
249
250		// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`.
251	0	fn read_to_nul<R: BufRead>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
252	0	let mut bytes = r.bytes();
253		loop {
254	0	match bytes.next().transpose()? {
255	0	Some(0) => return Ok(()),
256	0	Some(_) if buffer.len() == MAX_HEADER_BUF => {
257	0	return Err(Error::new(
258	0	ErrorKind::InvalidInput,
259	0	"gzip header field too long",
260	0	));
261		}
262	0	Some(byte) => {
263	0	buffer.push(byte);
264	0	}
265		None => {
266	0	return Err(ErrorKind::UnexpectedEof.into());
267		}
268		}
269		}
270	0	}
271
272	0	fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
273	0	u16::from_le_bytes(*buffer)
274	0	}
275
276	0	fn bad_header() -> Error {
277	0	Error::new(ErrorKind::InvalidInput, "invalid gzip header")
278	0	}
279
280	0	fn corrupt() -> Error {
281	0	Error::new(
282	0	ErrorKind::InvalidInput,
283		"corrupt gzip stream does not have a matching checksum",
284		)
285	0	}
286
287		/// A builder structure to create a new gzip Encoder.
288		///
289		/// This structure controls header configuration options such as the filename.
290		///
291		/// # Examples
292		///
293		/// ```
294		/// use std::io::prelude::*;
295		/// # use std::io;
296		/// use std::fs::File;
297		/// use flate2::GzBuilder;
298		/// use flate2::Compression;
299		///
300		/// // GzBuilder opens a file and writes a sample string using GzBuilder pattern
301		///
302		/// # fn sample_builder() -> Result<(), io::Error> {
303		/// let f = File::create("examples/hello_world.gz")?;
304		/// let mut gz = GzBuilder::new()
305		/// .filename("hello_world.txt")
306		/// .comment("test file, please delete")
307		/// .write(f, Compression::default());
308		/// gz.write_all(b"hello world")?;
309		/// gz.finish()?;
310		/// # Ok(())
311		/// # }
312		/// ```
313		#[derive(Debug, Default)]
314		pub struct GzBuilder {
315		extra: Option<Vec<u8>>,
316		filename: Option<CString>,
317		comment: Option<CString>,
318		operating_system: Option<u8>,
319		mtime: u32,
320		}
321
322		impl GzBuilder {
323		/// Create a new blank builder with no header by default.
324	0	pub fn new() -> GzBuilder {
325	0	Self::default()
326	0	}
327
328		/// Configure the `mtime` field in the gzip header.
329	0	pub fn mtime(mut self, mtime: u32) -> GzBuilder {
330	0	self.mtime = mtime;
331	0	self
332	0	}
333
334		/// Configure the `operating_system` field in the gzip header.
335	0	pub fn operating_system(mut self, os: u8) -> GzBuilder {
336	0	self.operating_system = Some(os);
337	0	self
338	0	}
339
340		/// Configure the `extra` field in the gzip header.
341	0	pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
342	0	self.extra = Some(extra.into());
343	0	self
344	0	}
345
346		/// Configure the `filename` field in the gzip header.
347		///
348		/// # Panics
349		///
350		/// Panics if the `filename` slice contains a zero.
351	0	pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
352	0	self.filename = Some(CString::new(filename.into()).unwrap());
353	0	self
354	0	}
355
356		/// Configure the `comment` field in the gzip header.
357		///
358		/// # Panics
359		///
360		/// Panics if the `comment` slice contains a zero.
361	0	pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
362	0	self.comment = Some(CString::new(comment.into()).unwrap());
363	0	self
364	0	}
365
366		/// Consume this builder, creating a writer encoder in the process.
367		///
368		/// The data written to the returned encoder will be compressed and then
369		/// written out to the supplied parameter `w`.
370	0	pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
371	0	write::gz_encoder(self.into_header(lvl), w, lvl)
372	0	}
373
374		/// Consume this builder, creating a reader encoder in the process.
375		///
376		/// Data read from the returned encoder will be the compressed version of
377		/// the data read from the given reader.
378	0	pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
379	0	read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
380	0	}
381
382		/// Consume this builder, creating a reader encoder in the process.
383		///
384		/// Data read from the returned encoder will be the compressed version of
385		/// the data read from the given reader.
386	0	pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
387	0	where
388	0	R: BufRead,
389		{
390	0	bufread::gz_encoder(self.into_header(lvl), r, lvl)
391	0	}
392
393	0	fn into_header(self, lvl: Compression) -> Vec<u8> {
394		let GzBuilder {
395	0	extra,
396	0	filename,
397	0	comment,
398	0	operating_system,
399	0	mtime,
400	0	} = self;
401	0	let mut flg = 0;
402	0	let mut header = vec![0u8; 10];
403	0	if let Some(v) = extra {
404	0	flg \|= FEXTRA;
405	0	header.extend((v.len() as u16).to_le_bytes());
406	0	header.extend(v);
407	0	}
408	0	if let Some(filename) = filename {
409	0	flg \|= FNAME;
410	0	header.extend(filename.as_bytes_with_nul().iter().copied());
411	0	}
412	0	if let Some(comment) = comment {
413	0	flg \|= FCOMMENT;
414	0	header.extend(comment.as_bytes_with_nul().iter().copied());
415	0	}
416	0	header[0] = 0x1f;
417	0	header[1] = 0x8b;
418	0	header[2] = 8;
419	0	header[3] = flg;
420	0	header[4] = mtime as u8;
421	0	header[5] = (mtime >> 8) as u8;
422	0	header[6] = (mtime >> 16) as u8;
423	0	header[7] = (mtime >> 24) as u8;
424	0	header[8] = if lvl.0 >= Compression::best().0 {
425	0	2
426	0	} else if lvl.0 <= Compression::fast().0 {
427	0	4
428		} else {
429	0	0
430		};
431
432		// Typically this byte indicates what OS the gz stream was created on,
433		// but in an effort to have cross-platform reproducible streams just
434		// default this value to 255. I'm not sure that if we "correctly" set
435		// this it'd do anything anyway...
436	0	header[9] = operating_system.unwrap_or(255);
437	0	header
438	0	}
439		}
440
441		#[cfg(test)]
442		mod tests {
443		use std::io::prelude::*;
444
445		use super::{read, write, GzBuilder, GzHeaderParser};
446		use crate::{Compression, GzHeader};
447		use rand::{rng, Rng};
448
449		#[test]
450		fn roundtrip() {
451		let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
452		e.write_all(b"foo bar baz").unwrap();
453		let inner = e.finish().unwrap();
454		let mut d = read::GzDecoder::new(&inner[..]);
455		let mut s = String::new();
456		d.read_to_string(&mut s).unwrap();
457		assert_eq!(s, "foo bar baz");
458		}
459
460		#[test]
461		fn roundtrip_zero() {
462		let e = write::GzEncoder::new(Vec::new(), Compression::default());
463		let inner = e.finish().unwrap();
464		let mut d = read::GzDecoder::new(&inner[..]);
465		let mut s = String::new();
466		d.read_to_string(&mut s).unwrap();
467		assert_eq!(s, "");
468		}
469
470		#[test]
471		fn roundtrip_big() {
472		let mut real = Vec::new();
473		let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
474		let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
475		for _ in 0..200 {
476		let to_write = &v[..rng().random_range(0..v.len())];
477		real.extend(to_write.iter().copied());
478		w.write_all(to_write).unwrap();
479		}
480		let result = w.finish().unwrap();
481		let mut r = read::GzDecoder::new(&result[..]);
482		let mut v = Vec::new();
483		r.read_to_end(&mut v).unwrap();
484		assert_eq!(v, real);
485		}
486
487		#[test]
488		fn roundtrip_big2() {
489		let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
490		let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
491		let mut res = Vec::new();
492		r.read_to_end(&mut res).unwrap();
493		assert_eq!(res, v);
494		}
495
496		// A Rust implementation of CRC that closely matches the C code in RFC1952.
497		// Only use this to create CRCs for tests.
498		struct Rfc1952Crc {
499		/* Table of CRCs of all 8-bit messages. */
500		crc_table: [u32; 256],
501		}
502
503		impl Rfc1952Crc {
504		fn new() -> Self {
505		let mut crc = Rfc1952Crc {
506		crc_table: [0; 256],
507		};
508		/* Make the table for a fast CRC. */
509		for n in 0usize..256 {
510		let mut c = n as u32;
511		for _k in 0..8 {
512		if c & 1 != 0 {
513		c = 0xedb88320 ^ (c >> 1);
514		} else {
515		c = c >> 1;
516		}
517		}
518		crc.crc_table[n] = c;
519		}
520		crc
521		}
522
523		/*
524		Update a running crc with the bytes buf and return
525		the updated crc. The crc should be initialized to zero. Pre- and
526		post-conditioning (one's complement) is performed within this
527		function so it shouldn't be done by the caller.
528		*/
529		fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
530		let mut c = crc ^ 0xffffffff;
531
532		for b in buf {
533		c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
534		}
535		c ^ 0xffffffff
536		}
537
538		/* Return the CRC of the bytes buf. */
539		fn crc(&self, buf: &[u8]) -> u32 {
540		self.update_crc(0, buf)
541		}
542		}
543
544		#[test]
545		fn roundtrip_header() {
546		let mut header = GzBuilder::new()
547		.mtime(1234)
548		.operating_system(57)
549		.filename("filename")
550		.comment("comment")
551		.into_header(Compression::fast());
552
553		// Add a CRC to the header
554		header[3] = header[3] ^ super::FHCRC;
555		let rfc1952_crc = Rfc1952Crc::new();
556		let crc32 = rfc1952_crc.crc(&header);
557		let crc16 = crc32 as u16;
558		header.extend(&crc16.to_le_bytes());
559
560		let mut parser = GzHeaderParser::new();
561		parser.parse(&mut header.as_slice()).unwrap();
562		let actual = parser.header().unwrap();
563		assert_eq!(
564		actual,
565		&GzHeader {
566		extra: None,
567		filename: Some("filename".as_bytes().to_vec()),
568		comment: Some("comment".as_bytes().to_vec()),
569		operating_system: 57,
570		mtime: 1234
571		}
572		)
573		}
574
575		#[test]
576		fn fields() {
577		let r = vec![0, 2, 4, 6];
578		let e = GzBuilder::new()
579		.filename("foo.rs")
580		.comment("bar")
581		.extra(vec![0, 1, 2, 3])
582		.read(&r[..], Compression::default());
583		let mut d = read::GzDecoder::new(e);
584		assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
585		assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
586		assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
587		let mut res = Vec::new();
588		d.read_to_end(&mut res).unwrap();
589		assert_eq!(res, vec![0, 2, 4, 6]);
590		}
591
592		#[test]
593		fn keep_reading_after_end() {
594		let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
595		e.write_all(b"foo bar baz").unwrap();
596		let inner = e.finish().unwrap();
597		let mut d = read::GzDecoder::new(&inner[..]);
598		let mut s = String::new();
599		d.read_to_string(&mut s).unwrap();
600		assert_eq!(s, "foo bar baz");
601		d.read_to_string(&mut s).unwrap();
602		assert_eq!(s, "foo bar baz");
603		}
604
605		#[test]
606		fn qc_reader() {
607		::quickcheck::quickcheck(test as fn(_) -> _);
608
609		fn test(v: Vec<u8>) -> bool {
610		let r = read::GzEncoder::new(&v[..], Compression::default());
611		let mut r = read::GzDecoder::new(r);
612		let mut v2 = Vec::new();
613		r.read_to_end(&mut v2).unwrap();
614		v == v2
615		}
616		}
617
618		#[test]
619		fn flush_after_write() {
620		let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
621		write!(f, "Hello world").unwrap();
622		f.flush().unwrap();
623		}
624		}