Coverage Report

Created: 2025-06-04 06:23

/src/zip/src/read/stream.rs
Line
Count
Source (jump to first uncovered line)
1
use std::fs;
2
use std::io::{self, Read};
3
use std::path::Path;
4
5
use super::{
6
    central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile,
7
    ZipFileData, ZipResult,
8
};
9
10
use byteorder::{LittleEndian, ReadBytesExt};
11
12
/// Stream decoder for zip.
13
#[derive(Debug)]
14
pub struct ZipStreamReader<R>(R);
15
16
impl<R> ZipStreamReader<R> {
17
    /// Create a new ZipStreamReader
18
0
    pub fn new(reader: R) -> Self {
19
0
        Self(reader)
20
0
    }
21
}
22
23
impl<R: Read> ZipStreamReader<R> {
24
0
    fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> {
25
0
        // Give archive_offset and central_header_start dummy value 0, since
26
0
        // they are not used in the output.
27
0
        let archive_offset = 0;
28
0
        let central_header_start = 0;
29
30
        // Parse central header
31
0
        let signature = self.0.read_u32::<LittleEndian>()?;
32
0
        if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
33
0
            Ok(None)
34
        } else {
35
0
            central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start)
36
0
                .map(ZipStreamFileMetadata)
37
0
                .map(Some)
38
        }
39
0
    }
40
41
    /// Iteraate over the stream and extract all file and their
42
    /// metadata.
43
0
    pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
44
0
        while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
45
0
            visitor.visit_file(&mut file)?;
46
        }
47
48
0
        while let Some(metadata) = self.parse_central_directory()? {
49
0
            visitor.visit_additional_metadata(&metadata)?;
50
        }
51
52
0
        Ok(())
53
0
    }
54
55
    /// Extract a Zip archive into a directory, overwriting files if they
56
    /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
57
    ///
58
    /// Extraction is not atomic; If an error is encountered, some of the files
59
    /// may be left on disk.
60
0
    pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> {
61
        struct Extractor<'a>(&'a Path);
62
        impl ZipStreamVisitor for Extractor<'_> {
63
            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
64
                let filepath = file
65
                    .enclosed_name()
66
                    .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
67
68
                let outpath = self.0.join(filepath);
69
70
                if file.name().ends_with('/') {
71
                    fs::create_dir_all(&outpath)?;
72
                } else {
73
                    if let Some(p) = outpath.parent() {
74
                        fs::create_dir_all(p)?;
75
                    }
76
                    let mut outfile = fs::File::create(&outpath)?;
77
                    io::copy(file, &mut outfile)?;
78
                }
79
80
                Ok(())
81
            }
82
83
            #[allow(unused)]
84
            fn visit_additional_metadata(
85
                &mut self,
86
                metadata: &ZipStreamFileMetadata,
87
            ) -> ZipResult<()> {
88
                #[cfg(unix)]
89
                {
90
                    let filepath = metadata
91
                        .enclosed_name()
92
                        .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
93
94
                    let outpath = self.0.join(filepath);
95
96
                    use std::os::unix::fs::PermissionsExt;
97
                    if let Some(mode) = metadata.unix_mode() {
98
                        fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?;
99
                    }
100
                }
101
102
                Ok(())
103
            }
104
        }
105
106
0
        self.visit(&mut Extractor(directory.as_ref()))
107
0
    }
108
}
109
110
/// Visitor for ZipStreamReader
111
pub trait ZipStreamVisitor {
112
    ///  * `file` - contains the content of the file and most of the metadata,
113
    ///    except:
114
    ///     - `comment`: set to an empty string
115
    ///     - `data_start`: set to 0
116
    ///     - `external_attributes`: `unix_mode()`: will return None
117
    fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>;
118
119
    /// This function is guranteed to be called after all `visit_file`s.
120
    ///
121
    ///  * `metadata` - Provides missing metadata in `visit_file`.
122
    fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>;
123
}
124
125
/// Additional metadata for the file.
126
#[derive(Debug)]
127
pub struct ZipStreamFileMetadata(ZipFileData);
128
129
impl ZipStreamFileMetadata {
130
    /// Get the name of the file
131
    ///
132
    /// # Warnings
133
    ///
134
    /// It is dangerous to use this name directly when extracting an archive.
135
    /// It may contain an absolute path (`/etc/shadow`), or break out of the
136
    /// current directory (`../runtime`). Carelessly writing to these paths
137
    /// allows an attacker to craft a ZIP archive that will overwrite critical
138
    /// files.
139
    ///
140
    /// You can use the [`ZipFile::enclosed_name`] method to validate the name
141
    /// as a safe path.
142
    pub fn name(&self) -> &str {
143
        &self.0.file_name
144
    }
145
146
    /// Get the name of the file, in the raw (internal) byte representation.
147
    ///
148
    /// The encoding of this data is currently undefined.
149
    pub fn name_raw(&self) -> &[u8] {
150
        &self.0.file_name_raw
151
    }
152
153
    /// Rewrite the path, ignoring any path components with special meaning.
154
    ///
155
    /// - Absolute paths are made relative
156
    /// - [`ParentDir`]s are ignored
157
    /// - Truncates the filename at a NULL byte
158
    ///
159
    /// This is appropriate if you need to be able to extract *something* from
160
    /// any archive, but will easily misrepresent trivial paths like
161
    /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
162
    /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
163
    ///
164
    /// [`ParentDir`]: `Component::ParentDir`
165
    pub fn mangled_name(&self) -> ::std::path::PathBuf {
166
        self.0.file_name_sanitized()
167
    }
168
169
    /// Ensure the file path is safe to use as a [`Path`].
170
    ///
171
    /// - It can't contain NULL bytes
172
    /// - It can't resolve to a path outside the current directory
173
    ///   > `foo/../bar` is fine, `foo/../../bar` is not.
174
    /// - It can't be an absolute path
175
    ///
176
    /// This will read well-formed ZIP files correctly, and is resistant
177
    /// to path-based exploits. It is recommended over
178
    /// [`ZipFile::mangled_name`].
179
    pub fn enclosed_name(&self) -> Option<&Path> {
180
        self.0.enclosed_name()
181
    }
182
183
    /// Returns whether the file is actually a directory
184
    pub fn is_dir(&self) -> bool {
185
        self.name()
186
            .chars()
187
            .rev()
188
            .next()
189
0
            .map_or(false, |c| c == '/' || c == '\\')
190
    }
191
192
    /// Returns whether the file is a regular file
193
    pub fn is_file(&self) -> bool {
194
        !self.is_dir()
195
    }
196
197
    /// Get the comment of the file
198
    pub fn comment(&self) -> &str {
199
        &self.0.file_comment
200
    }
201
202
    /// Get the starting offset of the data of the compressed file
203
    pub fn data_start(&self) -> u64 {
204
        self.0.data_start.load()
205
    }
206
207
    /// Get unix mode for the file
208
    pub fn unix_mode(&self) -> Option<u32> {
209
        self.0.unix_mode()
210
    }
211
}
212
213
#[cfg(test)]
214
mod test {
215
    use super::*;
216
    use std::collections::BTreeSet;
217
    use std::io;
218
219
    struct DummyVisitor;
220
    impl ZipStreamVisitor for DummyVisitor {
221
        fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
222
            Ok(())
223
        }
224
225
        fn visit_additional_metadata(
226
            &mut self,
227
            _metadata: &ZipStreamFileMetadata,
228
        ) -> ZipResult<()> {
229
            Ok(())
230
        }
231
    }
232
233
    #[derive(Default, Debug, Eq, PartialEq)]
234
    struct CounterVisitor(u64, u64);
235
    impl ZipStreamVisitor for CounterVisitor {
236
        fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
237
            self.0 += 1;
238
            Ok(())
239
        }
240
241
        fn visit_additional_metadata(
242
            &mut self,
243
            _metadata: &ZipStreamFileMetadata,
244
        ) -> ZipResult<()> {
245
            self.1 += 1;
246
            Ok(())
247
        }
248
    }
249
250
    #[test]
251
    fn invalid_offset() {
252
        ZipStreamReader::new(io::Cursor::new(include_bytes!(
253
            "../../tests/data/invalid_offset.zip"
254
        )))
255
        .visit(&mut DummyVisitor)
256
        .unwrap_err();
257
    }
258
259
    #[test]
260
    fn invalid_offset2() {
261
        ZipStreamReader::new(io::Cursor::new(include_bytes!(
262
            "../../tests/data/invalid_offset2.zip"
263
        )))
264
        .visit(&mut DummyVisitor)
265
        .unwrap_err();
266
    }
267
268
    #[test]
269
    fn zip_read_streaming() {
270
        let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
271
            "../../tests/data/mimetype.zip"
272
        )));
273
274
        #[derive(Default)]
275
        struct V {
276
            filenames: BTreeSet<Box<str>>,
277
        }
278
        impl ZipStreamVisitor for V {
279
            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
280
                if file.is_file() {
281
                    self.filenames.insert(file.name().into());
282
                }
283
284
                Ok(())
285
            }
286
            fn visit_additional_metadata(
287
                &mut self,
288
                metadata: &ZipStreamFileMetadata,
289
            ) -> ZipResult<()> {
290
                if metadata.is_file() {
291
                    assert!(
292
                        self.filenames.contains(metadata.name()),
293
                        "{} is missing its file content",
294
                        metadata.name()
295
                    );
296
                }
297
298
                Ok(())
299
            }
300
        }
301
302
        reader.visit(&mut V::default()).unwrap();
303
    }
304
305
    #[test]
306
    fn file_and_dir_predicates() {
307
        let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
308
            "../../tests/data/files_and_dirs.zip"
309
        )));
310
311
        #[derive(Default)]
312
        struct V {
313
            filenames: BTreeSet<Box<str>>,
314
        }
315
        impl ZipStreamVisitor for V {
316
            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
317
                let full_name = file.enclosed_name().unwrap();
318
                let file_name = full_name.file_name().unwrap().to_str().unwrap();
319
                assert!(
320
                    (file_name.starts_with("dir") && file.is_dir())
321
                        || (file_name.starts_with("file") && file.is_file())
322
                );
323
324
                if file.is_file() {
325
                    self.filenames.insert(file.name().into());
326
                }
327
328
                Ok(())
329
            }
330
            fn visit_additional_metadata(
331
                &mut self,
332
                metadata: &ZipStreamFileMetadata,
333
            ) -> ZipResult<()> {
334
                if metadata.is_file() {
335
                    assert!(
336
                        self.filenames.contains(metadata.name()),
337
                        "{} is missing its file content",
338
                        metadata.name()
339
                    );
340
                }
341
342
                Ok(())
343
            }
344
        }
345
346
        reader.visit(&mut V::default()).unwrap();
347
    }
348
349
    /// test case to ensure we don't preemptively over allocate based on the
350
    /// declared number of files in the CDE of an invalid zip when the number of
351
    /// files declared is more than the alleged offset in the CDE
352
    #[test]
353
    fn invalid_cde_number_of_files_allocation_smaller_offset() {
354
        ZipStreamReader::new(io::Cursor::new(include_bytes!(
355
            "../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
356
        )))
357
        .visit(&mut DummyVisitor)
358
        .unwrap_err();
359
    }
360
361
    /// test case to ensure we don't preemptively over allocate based on the
362
    /// declared number of files in the CDE of an invalid zip when the number of
363
    /// files declared is less than the alleged offset in the CDE
364
    #[test]
365
    fn invalid_cde_number_of_files_allocation_greater_offset() {
366
        ZipStreamReader::new(io::Cursor::new(include_bytes!(
367
            "../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
368
        )))
369
        .visit(&mut DummyVisitor)
370
        .unwrap_err();
371
    }
372
}