Coverage Report

Created: 2025-09-27 07:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/flate2-1.1.2/src/gz/mod.rs
Line
Count
Source
1
use std::ffi::CString;
2
use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
3
use std::time;
4
5
use crate::bufreader::BufReader;
6
use crate::{Compression, Crc};
7
8
pub static FHCRC: u8 = 1 << 1;
9
pub static FEXTRA: u8 = 1 << 2;
10
pub static FNAME: u8 = 1 << 3;
11
pub static FCOMMENT: u8 = 1 << 4;
12
pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
13
14
pub mod bufread;
15
pub mod read;
16
pub mod write;
17
18
// The maximum length of the header filename and comment fields. More than
19
// enough for these fields in reasonable use, but prevents possible attacks.
20
const MAX_HEADER_BUF: usize = 65535;
21
22
/// A structure representing the header of a gzip stream.
23
///
24
/// The header can contain metadata about the file that was compressed, if
25
/// present.
26
#[derive(PartialEq, Clone, Debug, Default)]
27
pub struct GzHeader {
28
    extra: Option<Vec<u8>>,
29
    filename: Option<Vec<u8>>,
30
    comment: Option<Vec<u8>>,
31
    operating_system: u8,
32
    mtime: u32,
33
}
34
35
impl GzHeader {
36
    /// Returns the `filename` field of this gzip stream's header, if present.
37
0
    pub fn filename(&self) -> Option<&[u8]> {
38
0
        self.filename.as_ref().map(|s| &s[..])
39
0
    }
40
41
    /// Returns the `extra` field of this gzip stream's header, if present.
42
0
    pub fn extra(&self) -> Option<&[u8]> {
43
0
        self.extra.as_ref().map(|s| &s[..])
44
0
    }
45
46
    /// Returns the `comment` field of this gzip stream's header, if present.
47
0
    pub fn comment(&self) -> Option<&[u8]> {
48
0
        self.comment.as_ref().map(|s| &s[..])
49
0
    }
50
51
    /// Returns the `operating_system` field of this gzip stream's header.
52
    ///
53
    /// There are predefined values for various operating systems.
54
    /// 255 means that the value is unknown.
55
0
    pub fn operating_system(&self) -> u8 {
56
0
        self.operating_system
57
0
    }
58
59
    /// This gives the most recent modification time of the original file being compressed.
60
    ///
61
    /// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970.
62
    /// (Note that this may cause problems for MS-DOS and other systems that use local
63
    /// rather than Universal time.) If the compressed data did not come from a file,
64
    /// `mtime` is set to the time at which compression started.
65
    /// `mtime` = 0 means no time stamp is available.
66
    ///
67
    /// The usage of `mtime` is discouraged because of Year 2038 problem.
68
0
    pub fn mtime(&self) -> u32 {
69
0
        self.mtime
70
0
    }
71
72
    /// Returns the most recent modification time represented by a date-time type.
73
    /// Returns `None` if the value of the underlying counter is 0,
74
    /// indicating no time stamp is available.
75
    ///
76
    ///
77
    /// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970.
78
    /// See [`mtime`](#method.mtime) for more detail.
79
0
    pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
80
0
        if self.mtime == 0 {
81
0
            None
82
        } else {
83
0
            let duration = time::Duration::new(u64::from(self.mtime), 0);
84
0
            let datetime = time::UNIX_EPOCH + duration;
85
0
            Some(datetime)
86
        }
87
0
    }
88
}
89
90
#[derive(Debug, Default)]
91
pub enum GzHeaderState {
92
    Start(u8, [u8; 10]),
93
    Xlen(Option<Box<Crc>>, u8, [u8; 2]),
94
    Extra(Option<Box<Crc>>, u16),
95
    Filename(Option<Box<Crc>>),
96
    Comment(Option<Box<Crc>>),
97
    Crc(Option<Box<Crc>>, u8, [u8; 2]),
98
    #[default]
99
    Complete,
100
}
101
102
#[derive(Debug, Default)]
103
pub struct GzHeaderParser {
104
    state: GzHeaderState,
105
    flags: u8,
106
    header: GzHeader,
107
}
108
109
impl GzHeaderParser {
110
0
    fn new() -> Self {
111
0
        GzHeaderParser {
112
0
            state: GzHeaderState::Start(0, [0; 10]),
113
0
            flags: 0,
114
0
            header: GzHeader::default(),
115
0
        }
116
0
    }
117
118
0
    fn parse<R: BufRead>(&mut self, r: &mut R) -> Result<()> {
119
        loop {
120
0
            match &mut self.state {
121
0
                GzHeaderState::Start(count, buffer) => {
122
0
                    while (*count as usize) < buffer.len() {
123
0
                        *count += read_into(r, &mut buffer[*count as usize..])? as u8;
124
                    }
125
                    // Gzip identification bytes
126
0
                    if buffer[0] != 0x1f || buffer[1] != 0x8b {
127
0
                        return Err(bad_header());
128
0
                    }
129
                    // Gzip compression method (8 = deflate)
130
0
                    if buffer[2] != 8 {
131
0
                        return Err(bad_header());
132
0
                    }
133
0
                    self.flags = buffer[3];
134
                    // RFC1952: "must give an error indication if any reserved bit is non-zero"
135
0
                    if self.flags & FRESERVED != 0 {
136
0
                        return Err(bad_header());
137
0
                    }
138
0
                    self.header.mtime = (buffer[4] as u32)
139
0
                        | ((buffer[5] as u32) << 8)
140
0
                        | ((buffer[6] as u32) << 16)
141
0
                        | ((buffer[7] as u32) << 24);
142
0
                    let _xfl = buffer[8];
143
0
                    self.header.operating_system = buffer[9];
144
0
                    let crc = if self.flags & FHCRC != 0 {
145
0
                        let mut crc = Box::new(Crc::new());
146
0
                        crc.update(buffer);
147
0
                        Some(crc)
148
                    } else {
149
0
                        None
150
                    };
151
0
                    self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
152
                }
153
0
                GzHeaderState::Xlen(crc, count, buffer) => {
154
0
                    if self.flags & FEXTRA != 0 {
155
0
                        while (*count as usize) < buffer.len() {
156
0
                            *count += read_into(r, &mut buffer[*count as usize..])? as u8;
157
                        }
158
0
                        if let Some(crc) = crc {
159
0
                            crc.update(buffer);
160
0
                        }
161
0
                        let xlen = parse_le_u16(buffer);
162
0
                        self.header.extra = Some(vec![0; xlen as usize]);
163
0
                        self.state = GzHeaderState::Extra(crc.take(), 0);
164
0
                    } else {
165
0
                        self.state = GzHeaderState::Filename(crc.take());
166
0
                    }
167
                }
168
0
                GzHeaderState::Extra(crc, count) => {
169
0
                    debug_assert!(self.header.extra.is_some());
170
0
                    let extra = self.header.extra.as_mut().unwrap();
171
0
                    while (*count as usize) < extra.len() {
172
0
                        *count += read_into(r, &mut extra[*count as usize..])? as u16;
173
                    }
174
0
                    if let Some(crc) = crc {
175
0
                        crc.update(extra);
176
0
                    }
177
0
                    self.state = GzHeaderState::Filename(crc.take());
178
                }
179
0
                GzHeaderState::Filename(crc) => {
180
0
                    if self.flags & FNAME != 0 {
181
0
                        let filename = self.header.filename.get_or_insert_with(Vec::new);
182
0
                        read_to_nul(r, filename)?;
183
0
                        if let Some(crc) = crc {
184
0
                            crc.update(filename);
185
0
                            crc.update(b"\0");
186
0
                        }
187
0
                    }
188
0
                    self.state = GzHeaderState::Comment(crc.take());
189
                }
190
0
                GzHeaderState::Comment(crc) => {
191
0
                    if self.flags & FCOMMENT != 0 {
192
0
                        let comment = self.header.comment.get_or_insert_with(Vec::new);
193
0
                        read_to_nul(r, comment)?;
194
0
                        if let Some(crc) = crc {
195
0
                            crc.update(comment);
196
0
                            crc.update(b"\0");
197
0
                        }
198
0
                    }
199
0
                    self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
200
                }
201
0
                GzHeaderState::Crc(crc, count, buffer) => {
202
0
                    if let Some(crc) = crc {
203
0
                        debug_assert!(self.flags & FHCRC != 0);
204
0
                        while (*count as usize) < buffer.len() {
205
0
                            *count += read_into(r, &mut buffer[*count as usize..])? as u8;
206
                        }
207
0
                        let stored_crc = parse_le_u16(buffer);
208
0
                        let calced_crc = crc.sum() as u16;
209
0
                        if stored_crc != calced_crc {
210
0
                            return Err(corrupt());
211
0
                        }
212
0
                    }
213
0
                    self.state = GzHeaderState::Complete;
214
                }
215
                GzHeaderState::Complete => {
216
0
                    return Ok(());
217
                }
218
            }
219
        }
220
0
    }
221
222
0
    fn header(&self) -> Option<&GzHeader> {
223
0
        match self.state {
224
0
            GzHeaderState::Complete => Some(&self.header),
225
0
            _ => None,
226
        }
227
0
    }
228
}
229
230
impl From<GzHeaderParser> for GzHeader {
231
0
    fn from(parser: GzHeaderParser) -> Self {
232
0
        debug_assert!(matches!(parser.state, GzHeaderState::Complete));
233
0
        parser.header
234
0
    }
235
}
236
237
// Attempt to fill the `buffer` from `r`. Return the number of bytes read.
238
// Return an error if EOF is read before the buffer is full.  This differs
239
// from `read` in that Ok(0) means that more data may be available.
240
0
fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
241
0
    debug_assert!(!buffer.is_empty());
242
0
    match r.read(buffer) {
243
0
        Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
244
0
        Ok(n) => Ok(n),
245
0
        Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
246
0
        Err(e) => Err(e),
247
    }
248
0
}
249
250
// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`.
251
0
fn read_to_nul<R: BufRead>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
252
0
    let mut bytes = r.bytes();
253
    loop {
254
0
        match bytes.next().transpose()? {
255
0
            Some(0) => return Ok(()),
256
0
            Some(_) if buffer.len() == MAX_HEADER_BUF => {
257
0
                return Err(Error::new(
258
0
                    ErrorKind::InvalidInput,
259
0
                    "gzip header field too long",
260
0
                ));
261
            }
262
0
            Some(byte) => {
263
0
                buffer.push(byte);
264
0
            }
265
            None => {
266
0
                return Err(ErrorKind::UnexpectedEof.into());
267
            }
268
        }
269
    }
270
0
}
271
272
0
fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
273
0
    u16::from_le_bytes(*buffer)
274
0
}
275
276
0
fn bad_header() -> Error {
277
0
    Error::new(ErrorKind::InvalidInput, "invalid gzip header")
278
0
}
279
280
0
fn corrupt() -> Error {
281
0
    Error::new(
282
0
        ErrorKind::InvalidInput,
283
        "corrupt gzip stream does not have a matching checksum",
284
    )
285
0
}
286
287
/// A builder structure to create a new gzip Encoder.
288
///
289
/// This structure controls header configuration options such as the filename.
290
///
291
/// # Examples
292
///
293
/// ```
294
/// use std::io::prelude::*;
295
/// # use std::io;
296
/// use std::fs::File;
297
/// use flate2::GzBuilder;
298
/// use flate2::Compression;
299
///
300
/// // GzBuilder opens a file and writes a sample string using GzBuilder pattern
301
///
302
/// # fn sample_builder() -> Result<(), io::Error> {
303
/// let f = File::create("examples/hello_world.gz")?;
304
/// let mut gz = GzBuilder::new()
305
///                 .filename("hello_world.txt")
306
///                 .comment("test file, please delete")
307
///                 .write(f, Compression::default());
308
/// gz.write_all(b"hello world")?;
309
/// gz.finish()?;
310
/// # Ok(())
311
/// # }
312
/// ```
313
#[derive(Debug, Default)]
314
pub struct GzBuilder {
315
    extra: Option<Vec<u8>>,
316
    filename: Option<CString>,
317
    comment: Option<CString>,
318
    operating_system: Option<u8>,
319
    mtime: u32,
320
}
321
322
impl GzBuilder {
323
    /// Create a new blank builder with no header by default.
324
0
    pub fn new() -> GzBuilder {
325
0
        Self::default()
326
0
    }
327
328
    /// Configure the `mtime` field in the gzip header.
329
0
    pub fn mtime(mut self, mtime: u32) -> GzBuilder {
330
0
        self.mtime = mtime;
331
0
        self
332
0
    }
333
334
    /// Configure the `operating_system` field in the gzip header.
335
0
    pub fn operating_system(mut self, os: u8) -> GzBuilder {
336
0
        self.operating_system = Some(os);
337
0
        self
338
0
    }
339
340
    /// Configure the `extra` field in the gzip header.
341
0
    pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
342
0
        self.extra = Some(extra.into());
343
0
        self
344
0
    }
345
346
    /// Configure the `filename` field in the gzip header.
347
    ///
348
    /// # Panics
349
    ///
350
    /// Panics if the `filename` slice contains a zero.
351
0
    pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
352
0
        self.filename = Some(CString::new(filename.into()).unwrap());
353
0
        self
354
0
    }
355
356
    /// Configure the `comment` field in the gzip header.
357
    ///
358
    /// # Panics
359
    ///
360
    /// Panics if the `comment` slice contains a zero.
361
0
    pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
362
0
        self.comment = Some(CString::new(comment.into()).unwrap());
363
0
        self
364
0
    }
365
366
    /// Consume this builder, creating a writer encoder in the process.
367
    ///
368
    /// The data written to the returned encoder will be compressed and then
369
    /// written out to the supplied parameter `w`.
370
0
    pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
371
0
        write::gz_encoder(self.into_header(lvl), w, lvl)
372
0
    }
373
374
    /// Consume this builder, creating a reader encoder in the process.
375
    ///
376
    /// Data read from the returned encoder will be the compressed version of
377
    /// the data read from the given reader.
378
0
    pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
379
0
        read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
380
0
    }
381
382
    /// Consume this builder, creating a reader encoder in the process.
383
    ///
384
    /// Data read from the returned encoder will be the compressed version of
385
    /// the data read from the given reader.
386
0
    pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
387
0
    where
388
0
        R: BufRead,
389
    {
390
0
        bufread::gz_encoder(self.into_header(lvl), r, lvl)
391
0
    }
392
393
0
    fn into_header(self, lvl: Compression) -> Vec<u8> {
394
        let GzBuilder {
395
0
            extra,
396
0
            filename,
397
0
            comment,
398
0
            operating_system,
399
0
            mtime,
400
0
        } = self;
401
0
        let mut flg = 0;
402
0
        let mut header = vec![0u8; 10];
403
0
        if let Some(v) = extra {
404
0
            flg |= FEXTRA;
405
0
            header.extend((v.len() as u16).to_le_bytes());
406
0
            header.extend(v);
407
0
        }
408
0
        if let Some(filename) = filename {
409
0
            flg |= FNAME;
410
0
            header.extend(filename.as_bytes_with_nul().iter().copied());
411
0
        }
412
0
        if let Some(comment) = comment {
413
0
            flg |= FCOMMENT;
414
0
            header.extend(comment.as_bytes_with_nul().iter().copied());
415
0
        }
416
0
        header[0] = 0x1f;
417
0
        header[1] = 0x8b;
418
0
        header[2] = 8;
419
0
        header[3] = flg;
420
0
        header[4] = mtime as u8;
421
0
        header[5] = (mtime >> 8) as u8;
422
0
        header[6] = (mtime >> 16) as u8;
423
0
        header[7] = (mtime >> 24) as u8;
424
0
        header[8] = if lvl.0 >= Compression::best().0 {
425
0
            2
426
0
        } else if lvl.0 <= Compression::fast().0 {
427
0
            4
428
        } else {
429
0
            0
430
        };
431
432
        // Typically this byte indicates what OS the gz stream was created on,
433
        // but in an effort to have cross-platform reproducible streams just
434
        // default this value to 255. I'm not sure that if we "correctly" set
435
        // this it'd do anything anyway...
436
0
        header[9] = operating_system.unwrap_or(255);
437
0
        header
438
0
    }
439
}
440
441
#[cfg(test)]
442
mod tests {
443
    use std::io::prelude::*;
444
445
    use super::{read, write, GzBuilder, GzHeaderParser};
446
    use crate::{Compression, GzHeader};
447
    use rand::{rng, Rng};
448
449
    #[test]
450
    fn roundtrip() {
451
        let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
452
        e.write_all(b"foo bar baz").unwrap();
453
        let inner = e.finish().unwrap();
454
        let mut d = read::GzDecoder::new(&inner[..]);
455
        let mut s = String::new();
456
        d.read_to_string(&mut s).unwrap();
457
        assert_eq!(s, "foo bar baz");
458
    }
459
460
    #[test]
461
    fn roundtrip_zero() {
462
        let e = write::GzEncoder::new(Vec::new(), Compression::default());
463
        let inner = e.finish().unwrap();
464
        let mut d = read::GzDecoder::new(&inner[..]);
465
        let mut s = String::new();
466
        d.read_to_string(&mut s).unwrap();
467
        assert_eq!(s, "");
468
    }
469
470
    #[test]
471
    fn roundtrip_big() {
472
        let mut real = Vec::new();
473
        let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
474
        let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
475
        for _ in 0..200 {
476
            let to_write = &v[..rng().random_range(0..v.len())];
477
            real.extend(to_write.iter().copied());
478
            w.write_all(to_write).unwrap();
479
        }
480
        let result = w.finish().unwrap();
481
        let mut r = read::GzDecoder::new(&result[..]);
482
        let mut v = Vec::new();
483
        r.read_to_end(&mut v).unwrap();
484
        assert_eq!(v, real);
485
    }
486
487
    #[test]
488
    fn roundtrip_big2() {
489
        let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
490
        let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
491
        let mut res = Vec::new();
492
        r.read_to_end(&mut res).unwrap();
493
        assert_eq!(res, v);
494
    }
495
496
    // A Rust implementation of CRC that closely matches the C code in RFC1952.
497
    // Only use this to create CRCs for tests.
498
    struct Rfc1952Crc {
499
        /* Table of CRCs of all 8-bit messages. */
500
        crc_table: [u32; 256],
501
    }
502
503
    impl Rfc1952Crc {
504
        fn new() -> Self {
505
            let mut crc = Rfc1952Crc {
506
                crc_table: [0; 256],
507
            };
508
            /* Make the table for a fast CRC. */
509
            for n in 0usize..256 {
510
                let mut c = n as u32;
511
                for _k in 0..8 {
512
                    if c & 1 != 0 {
513
                        c = 0xedb88320 ^ (c >> 1);
514
                    } else {
515
                        c = c >> 1;
516
                    }
517
                }
518
                crc.crc_table[n] = c;
519
            }
520
            crc
521
        }
522
523
        /*
524
         Update a running crc with the bytes buf and return
525
         the updated crc. The crc should be initialized to zero. Pre- and
526
         post-conditioning (one's complement) is performed within this
527
         function so it shouldn't be done by the caller.
528
        */
529
        fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
530
            let mut c = crc ^ 0xffffffff;
531
532
            for b in buf {
533
                c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
534
            }
535
            c ^ 0xffffffff
536
        }
537
538
        /* Return the CRC of the bytes buf. */
539
        fn crc(&self, buf: &[u8]) -> u32 {
540
            self.update_crc(0, buf)
541
        }
542
    }
543
544
    #[test]
545
    fn roundtrip_header() {
546
        let mut header = GzBuilder::new()
547
            .mtime(1234)
548
            .operating_system(57)
549
            .filename("filename")
550
            .comment("comment")
551
            .into_header(Compression::fast());
552
553
        // Add a CRC to the header
554
        header[3] = header[3] ^ super::FHCRC;
555
        let rfc1952_crc = Rfc1952Crc::new();
556
        let crc32 = rfc1952_crc.crc(&header);
557
        let crc16 = crc32 as u16;
558
        header.extend(&crc16.to_le_bytes());
559
560
        let mut parser = GzHeaderParser::new();
561
        parser.parse(&mut header.as_slice()).unwrap();
562
        let actual = parser.header().unwrap();
563
        assert_eq!(
564
            actual,
565
            &GzHeader {
566
                extra: None,
567
                filename: Some("filename".as_bytes().to_vec()),
568
                comment: Some("comment".as_bytes().to_vec()),
569
                operating_system: 57,
570
                mtime: 1234
571
            }
572
        )
573
    }
574
575
    #[test]
576
    fn fields() {
577
        let r = vec![0, 2, 4, 6];
578
        let e = GzBuilder::new()
579
            .filename("foo.rs")
580
            .comment("bar")
581
            .extra(vec![0, 1, 2, 3])
582
            .read(&r[..], Compression::default());
583
        let mut d = read::GzDecoder::new(e);
584
        assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
585
        assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
586
        assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
587
        let mut res = Vec::new();
588
        d.read_to_end(&mut res).unwrap();
589
        assert_eq!(res, vec![0, 2, 4, 6]);
590
    }
591
592
    #[test]
593
    fn keep_reading_after_end() {
594
        let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
595
        e.write_all(b"foo bar baz").unwrap();
596
        let inner = e.finish().unwrap();
597
        let mut d = read::GzDecoder::new(&inner[..]);
598
        let mut s = String::new();
599
        d.read_to_string(&mut s).unwrap();
600
        assert_eq!(s, "foo bar baz");
601
        d.read_to_string(&mut s).unwrap();
602
        assert_eq!(s, "foo bar baz");
603
    }
604
605
    #[test]
606
    fn qc_reader() {
607
        ::quickcheck::quickcheck(test as fn(_) -> _);
608
609
        fn test(v: Vec<u8>) -> bool {
610
            let r = read::GzEncoder::new(&v[..], Compression::default());
611
            let mut r = read::GzDecoder::new(r);
612
            let mut v2 = Vec::new();
613
            r.read_to_end(&mut v2).unwrap();
614
            v == v2
615
        }
616
    }
617
618
    #[test]
619
    fn flush_after_write() {
620
        let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
621
        write!(f, "Hello world").unwrap();
622
        f.flush().unwrap();
623
    }
624
}