/src/zip/src/read/stream.rs

Source (jump to first uncovered line)
use std::fs;
use std::io::{self, Read};
use std::path::Path;

use super::{
    central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile,
    ZipFileData, ZipResult,
};

use byteorder::{LittleEndian, ReadBytesExt};

/// Stream decoder for zip.
#[derive(Debug)]
pub struct ZipStreamReader<R>(R);

impl<R> ZipStreamReader<R> {
    /// Create a new ZipStreamReader
    pub fn new(reader: R) -> Self {
        Self(reader)
    }
}

impl<R: Read> ZipStreamReader<R> {
    fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> {
        // Give archive_offset and central_header_start dummy value 0, since
        // they are not used in the output.
        let archive_offset = 0;
        let central_header_start = 0;

        // Parse central header
        let signature = self.0.read_u32::<LittleEndian>()?;
        if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
            Ok(None)
        } else {
            central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start)
                .map(ZipStreamFileMetadata)
                .map(Some)
        }
    }

    /// Iteraate over the stream and extract all file and their
    /// metadata.
    pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
        while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
            visitor.visit_file(&mut file)?;
        }

        while let Some(metadata) = self.parse_central_directory()? {
            visitor.visit_additional_metadata(&metadata)?;
        }

        Ok(())
    }

    /// Extract a Zip archive into a directory, overwriting files if they
    /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
    ///
    /// Extraction is not atomic; If an error is encountered, some of the files
    /// may be left on disk.
    pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> {
        struct Extractor<'a>(&'a Path);
        impl ZipStreamVisitor for Extractor<'_> {
            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
                let filepath = file
                    .enclosed_name()
                    .ok_or(ZipError::InvalidArchive("Invalid file path"))?;

                let outpath = self.0.join(filepath);

                if file.name().ends_with('/') {
                    fs::create_dir_all(&outpath)?;
                } else {
                    if let Some(p) = outpath.parent() {
                        fs::create_dir_all(p)?;
                    }
                    let mut outfile = fs::File::create(&outpath)?;
                    io::copy(file, &mut outfile)?;
                }

                Ok(())
            }

            #[allow(unused)]
            fn visit_additional_metadata(
                &mut self,
                metadata: &ZipStreamFileMetadata,
            ) -> ZipResult<()> {
                #[cfg(unix)]
                {
                    let filepath = metadata
                        .enclosed_name()
                        .ok_or(ZipError::InvalidArchive("Invalid file path"))?;

                    let outpath = self.0.join(filepath);

                    use std::os::unix::fs::PermissionsExt;
                    if let Some(mode) = metadata.unix_mode() {
                        fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?;
                    }
                }

                Ok(())
            }
        }

        self.visit(&mut Extractor(directory.as_ref()))
    }
}

/// Visitor for ZipStreamReader
pub trait ZipStreamVisitor {
    ///  * `file` - contains the content of the file and most of the metadata,
    ///    except:
    ///     - `comment`: set to an empty string
    ///     - `data_start`: set to 0
    ///     - `external_attributes`: `unix_mode()`: will return None
    fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>;

    /// This function is guranteed to be called after all `visit_file`s.
    ///
    ///  * `metadata` - Provides missing metadata in `visit_file`.
    fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>;
}

/// Additional metadata for the file.
#[derive(Debug)]
pub struct ZipStreamFileMetadata(ZipFileData);

impl ZipStreamFileMetadata {
    /// Get the name of the file
    ///
    /// # Warnings
    ///
    /// It is dangerous to use this name directly when extracting an archive.
    /// It may contain an absolute path (`/etc/shadow`), or break out of the
    /// current directory (`../runtime`). Carelessly writing to these paths
    /// allows an attacker to craft a ZIP archive that will overwrite critical
    /// files.
    ///
    /// You can use the [`ZipFile::enclosed_name`] method to validate the name
    /// as a safe path.
    pub fn name(&self) -> &str {
        &self.0.file_name
    }

    /// Get the name of the file, in the raw (internal) byte representation.
    ///
    /// The encoding of this data is currently undefined.
    pub fn name_raw(&self) -> &[u8] {
        &self.0.file_name_raw
    }

    /// Rewrite the path, ignoring any path components with special meaning.
    ///
    /// - Absolute paths are made relative
    /// - [`ParentDir`]s are ignored
    /// - Truncates the filename at a NULL byte
    ///
    /// This is appropriate if you need to be able to extract *something* from
    /// any archive, but will easily misrepresent trivial paths like
    /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
    /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
    ///
    /// [`ParentDir`]: `Component::ParentDir`
    pub fn mangled_name(&self) -> ::std::path::PathBuf {
        self.0.file_name_sanitized()
    }

    /// Ensure the file path is safe to use as a [`Path`].
    ///
    /// - It can't contain NULL bytes
    /// - It can't resolve to a path outside the current directory
    ///   > `foo/../bar` is fine, `foo/../../bar` is not.
    /// - It can't be an absolute path
    ///
    /// This will read well-formed ZIP files correctly, and is resistant
    /// to path-based exploits. It is recommended over
    /// [`ZipFile::mangled_name`].
    pub fn enclosed_name(&self) -> Option<&Path> {
        self.0.enclosed_name()
    }

    /// Returns whether the file is actually a directory
    pub fn is_dir(&self) -> bool {
        self.name()
            .chars()
            .rev()
            .next()
            .map_or(false, |c| c == '/' || c == '\\')
    }

    /// Returns whether the file is a regular file
    pub fn is_file(&self) -> bool {
        !self.is_dir()
    }

    /// Get the comment of the file
    pub fn comment(&self) -> &str {
        &self.0.file_comment
    }

    /// Get the starting offset of the data of the compressed file
    pub fn data_start(&self) -> u64 {
        self.0.data_start.load()
    }

    /// Get unix mode for the file
    pub fn unix_mode(&self) -> Option<u32> {
        self.0.unix_mode()
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use std::collections::BTreeSet;
    use std::io;

    struct DummyVisitor;
    impl ZipStreamVisitor for DummyVisitor {
        fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
            Ok(())
        }

        fn visit_additional_metadata(
            &mut self,
            _metadata: &ZipStreamFileMetadata,
        ) -> ZipResult<()> {
            Ok(())
        }
    }

    #[derive(Default, Debug, Eq, PartialEq)]
    struct CounterVisitor(u64, u64);
    impl ZipStreamVisitor for CounterVisitor {
        fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
            self.0 += 1;
            Ok(())
        }

        fn visit_additional_metadata(
            &mut self,
            _metadata: &ZipStreamFileMetadata,
        ) -> ZipResult<()> {
            self.1 += 1;
            Ok(())
        }
    }

    #[test]
    fn invalid_offset() {
        ZipStreamReader::new(io::Cursor::new(include_bytes!(
            "../../tests/data/invalid_offset.zip"
        )))
        .visit(&mut DummyVisitor)
        .unwrap_err();
    }

    #[test]
    fn invalid_offset2() {
        ZipStreamReader::new(io::Cursor::new(include_bytes!(
            "../../tests/data/invalid_offset2.zip"
        )))
        .visit(&mut DummyVisitor)
        .unwrap_err();
    }

    #[test]
    fn zip_read_streaming() {
        let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
            "../../tests/data/mimetype.zip"
        )));

        #[derive(Default)]
        struct V {
            filenames: BTreeSet<Box<str>>,
        }
        impl ZipStreamVisitor for V {
            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
                if file.is_file() {
                    self.filenames.insert(file.name().into());
                }

                Ok(())
            }
            fn visit_additional_metadata(
                &mut self,
                metadata: &ZipStreamFileMetadata,
            ) -> ZipResult<()> {
                if metadata.is_file() {
                    assert!(
                        self.filenames.contains(metadata.name()),
                        "{} is missing its file content",
                        metadata.name()
                    );
                }

                Ok(())
            }
        }

        reader.visit(&mut V::default()).unwrap();
    }

    #[test]
    fn file_and_dir_predicates() {
        let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
            "../../tests/data/files_and_dirs.zip"
        )));

        #[derive(Default)]
        struct V {
            filenames: BTreeSet<Box<str>>,
        }
        impl ZipStreamVisitor for V {
            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
                let full_name = file.enclosed_name().unwrap();
                let file_name = full_name.file_name().unwrap().to_str().unwrap();
                assert!(
                    (file_name.starts_with("dir") && file.is_dir())
                        || (file_name.starts_with("file") && file.is_file())
                );

                if file.is_file() {
                    self.filenames.insert(file.name().into());
                }

                Ok(())
            }
            fn visit_additional_metadata(
                &mut self,
                metadata: &ZipStreamFileMetadata,
            ) -> ZipResult<()> {
                if metadata.is_file() {
                    assert!(
                        self.filenames.contains(metadata.name()),
                        "{} is missing its file content",
                        metadata.name()
                    );
                }

                Ok(())
            }
        }

        reader.visit(&mut V::default()).unwrap();
    }

    /// test case to ensure we don't preemptively over allocate based on the
    /// declared number of files in the CDE of an invalid zip when the number of
    /// files declared is more than the alleged offset in the CDE
    #[test]
    fn invalid_cde_number_of_files_allocation_smaller_offset() {
        ZipStreamReader::new(io::Cursor::new(include_bytes!(
            "../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
        )))
        .visit(&mut DummyVisitor)
        .unwrap_err();
    }

    /// test case to ensure we don't preemptively over allocate based on the
    /// declared number of files in the CDE of an invalid zip when the number of
    /// files declared is less than the alleged offset in the CDE
    #[test]
    fn invalid_cde_number_of_files_allocation_greater_offset() {
        ZipStreamReader::new(io::Cursor::new(include_bytes!(
            "../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
        )))
        .visit(&mut DummyVisitor)
        .unwrap_err();
    }
}

Coverage Report

Created: 2025-06-04 06:23

Line	Count	Source (jump to first uncovered line)
1		use std::fs;
2		use std::io::{self, Read};
3		use std::path::Path;
4
5		use super::{
6		central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile,
7		ZipFileData, ZipResult,
8		};
9
10		use byteorder::{LittleEndian, ReadBytesExt};
11
12		/// Stream decoder for zip.
13		#[derive(Debug)]
14		pub struct ZipStreamReader<R>(R);
15
16		impl<R> ZipStreamReader<R> {
17		/// Create a new ZipStreamReader
18	0	pub fn new(reader: R) -> Self {
19	0	Self(reader)
20	0	}
21		}
22
23		impl<R: Read> ZipStreamReader<R> {
24	0	fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> {
25	0	// Give archive_offset and central_header_start dummy value 0, since
26	0	// they are not used in the output.
27	0	let archive_offset = 0;
28	0	let central_header_start = 0;
29
30		// Parse central header
31	0	let signature = self.0.read_u32::<LittleEndian>()?;
32	0	if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
33	0	Ok(None)
34		} else {
35	0	central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start)
36	0	.map(ZipStreamFileMetadata)
37	0	.map(Some)
38		}
39	0	}
40
41		/// Iteraate over the stream and extract all file and their
42		/// metadata.
43	0	pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
44	0	while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
45	0	visitor.visit_file(&mut file)?;
46		}
47
48	0	while let Some(metadata) = self.parse_central_directory()? {
49	0	visitor.visit_additional_metadata(&metadata)?;
50		}
51
52	0	Ok(())
53	0	}
54
55		/// Extract a Zip archive into a directory, overwriting files if they
56		/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
57		///
58		/// Extraction is not atomic; If an error is encountered, some of the files
59		/// may be left on disk.
60	0	pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> {
61		struct Extractor<'a>(&'a Path);
62		impl ZipStreamVisitor for Extractor<'_> {
63		fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
64		let filepath = file
65		.enclosed_name()
66		.ok_or(ZipError::InvalidArchive("Invalid file path"))?;
67
68		let outpath = self.0.join(filepath);
69
70		if file.name().ends_with('/') {
71		fs::create_dir_all(&outpath)?;
72		} else {
73		if let Some(p) = outpath.parent() {
74		fs::create_dir_all(p)?;
75		}
76		let mut outfile = fs::File::create(&outpath)?;
77		io::copy(file, &mut outfile)?;
78		}
79
80		Ok(())
81		}
82
83		#[allow(unused)]
84		fn visit_additional_metadata(
85		&mut self,
86		metadata: &ZipStreamFileMetadata,
87		) -> ZipResult<()> {
88		#[cfg(unix)]
89		{
90		let filepath = metadata
91		.enclosed_name()
92		.ok_or(ZipError::InvalidArchive("Invalid file path"))?;
93
94		let outpath = self.0.join(filepath);
95
96		use std::os::unix::fs::PermissionsExt;
97		if let Some(mode) = metadata.unix_mode() {
98		fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?;
99		}
100		}
101
102		Ok(())
103		}
104		}
105
106	0	self.visit(&mut Extractor(directory.as_ref()))
107	0	}
108		}
109
110		/// Visitor for ZipStreamReader
111		pub trait ZipStreamVisitor {
112		/// * `file` - contains the content of the file and most of the metadata,
113		/// except:
114		/// - `comment`: set to an empty string
115		/// - `data_start`: set to 0
116		/// - `external_attributes`: `unix_mode()`: will return None
117		fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>;
118
119		/// This function is guranteed to be called after all `visit_file`s.
120		///
121		/// * `metadata` - Provides missing metadata in `visit_file`.
122		fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>;
123		}
124
125		/// Additional metadata for the file.
126		#[derive(Debug)]
127		pub struct ZipStreamFileMetadata(ZipFileData);
128
129		impl ZipStreamFileMetadata {
130		/// Get the name of the file
131		///
132		/// # Warnings
133		///
134		/// It is dangerous to use this name directly when extracting an archive.
135		/// It may contain an absolute path (`/etc/shadow`), or break out of the
136		/// current directory (`../runtime`). Carelessly writing to these paths
137		/// allows an attacker to craft a ZIP archive that will overwrite critical
138		/// files.
139		///
140		/// You can use the [`ZipFile::enclosed_name`] method to validate the name
141		/// as a safe path.
142		pub fn name(&self) -> &str {
143		&self.0.file_name
144		}
145
146		/// Get the name of the file, in the raw (internal) byte representation.
147		///
148		/// The encoding of this data is currently undefined.
149		pub fn name_raw(&self) -> &[u8] {
150		&self.0.file_name_raw
151		}
152
153		/// Rewrite the path, ignoring any path components with special meaning.
154		///
155		/// - Absolute paths are made relative
156		/// - [`ParentDir`]s are ignored
157		/// - Truncates the filename at a NULL byte
158		///
159		/// This is appropriate if you need to be able to extract something from
160		/// any archive, but will easily misrepresent trivial paths like
161		/// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
162		/// [`ZipFile::enclosed_name`] is the better option in most scenarios.
163		///
164		/// [`ParentDir`]: `Component::ParentDir`
165		pub fn mangled_name(&self) -> ::std::path::PathBuf {
166		self.0.file_name_sanitized()
167		}
168
169		/// Ensure the file path is safe to use as a [`Path`].
170		///
171		/// - It can't contain NULL bytes
172		/// - It can't resolve to a path outside the current directory
173		/// > `foo/../bar` is fine, `foo/../../bar` is not.
174		/// - It can't be an absolute path
175		///
176		/// This will read well-formed ZIP files correctly, and is resistant
177		/// to path-based exploits. It is recommended over
178		/// [`ZipFile::mangled_name`].
179		pub fn enclosed_name(&self) -> Option<&Path> {
180		self.0.enclosed_name()
181		}
182
183		/// Returns whether the file is actually a directory
184		pub fn is_dir(&self) -> bool {
185		self.name()
186		.chars()
187		.rev()
188		.next()
189	0	.map_or(false, \|c\| c == '/' \|\| c == '\\')
190		}
191
192		/// Returns whether the file is a regular file
193		pub fn is_file(&self) -> bool {
194		!self.is_dir()
195		}
196
197		/// Get the comment of the file
198		pub fn comment(&self) -> &str {
199		&self.0.file_comment
200		}
201
202		/// Get the starting offset of the data of the compressed file
203		pub fn data_start(&self) -> u64 {
204		self.0.data_start.load()
205		}
206
207		/// Get unix mode for the file
208		pub fn unix_mode(&self) -> Option<u32> {
209		self.0.unix_mode()
210		}
211		}
212
213		#[cfg(test)]
214		mod test {
215		use super::*;
216		use std::collections::BTreeSet;
217		use std::io;
218
219		struct DummyVisitor;
220		impl ZipStreamVisitor for DummyVisitor {
221		fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
222		Ok(())
223		}
224
225		fn visit_additional_metadata(
226		&mut self,
227		_metadata: &ZipStreamFileMetadata,
228		) -> ZipResult<()> {
229		Ok(())
230		}
231		}
232
233		#[derive(Default, Debug, Eq, PartialEq)]
234		struct CounterVisitor(u64, u64);
235		impl ZipStreamVisitor for CounterVisitor {
236		fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
237		self.0 += 1;
238		Ok(())
239		}
240
241		fn visit_additional_metadata(
242		&mut self,
243		_metadata: &ZipStreamFileMetadata,
244		) -> ZipResult<()> {
245		self.1 += 1;
246		Ok(())
247		}
248		}
249
250		#[test]
251		fn invalid_offset() {
252		ZipStreamReader::new(io::Cursor::new(include_bytes!(
253		"../../tests/data/invalid_offset.zip"
254		)))
255		.visit(&mut DummyVisitor)
256		.unwrap_err();
257		}
258
259		#[test]
260		fn invalid_offset2() {
261		ZipStreamReader::new(io::Cursor::new(include_bytes!(
262		"../../tests/data/invalid_offset2.zip"
263		)))
264		.visit(&mut DummyVisitor)
265		.unwrap_err();
266		}
267
268		#[test]
269		fn zip_read_streaming() {
270		let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
271		"../../tests/data/mimetype.zip"
272		)));
273
274		#[derive(Default)]
275		struct V {
276		filenames: BTreeSet<Box<str>>,
277		}
278		impl ZipStreamVisitor for V {
279		fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
280		if file.is_file() {
281		self.filenames.insert(file.name().into());
282		}
283
284		Ok(())
285		}
286		fn visit_additional_metadata(
287		&mut self,
288		metadata: &ZipStreamFileMetadata,
289		) -> ZipResult<()> {
290		if metadata.is_file() {
291		assert!(
292		self.filenames.contains(metadata.name()),
293		"{} is missing its file content",
294		metadata.name()
295		);
296		}
297
298		Ok(())
299		}
300		}
301
302		reader.visit(&mut V::default()).unwrap();
303		}
304
305		#[test]
306		fn file_and_dir_predicates() {
307		let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
308		"../../tests/data/files_and_dirs.zip"
309		)));
310
311		#[derive(Default)]
312		struct V {
313		filenames: BTreeSet<Box<str>>,
314		}
315		impl ZipStreamVisitor for V {
316		fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
317		let full_name = file.enclosed_name().unwrap();
318		let file_name = full_name.file_name().unwrap().to_str().unwrap();
319		assert!(
320		(file_name.starts_with("dir") && file.is_dir())
321		\|\| (file_name.starts_with("file") && file.is_file())
322		);
323
324		if file.is_file() {
325		self.filenames.insert(file.name().into());
326		}
327
328		Ok(())
329		}
330		fn visit_additional_metadata(
331		&mut self,
332		metadata: &ZipStreamFileMetadata,
333		) -> ZipResult<()> {
334		if metadata.is_file() {
335		assert!(
336		self.filenames.contains(metadata.name()),
337		"{} is missing its file content",
338		metadata.name()
339		);
340		}
341
342		Ok(())
343		}
344		}
345
346		reader.visit(&mut V::default()).unwrap();
347		}
348
349		/// test case to ensure we don't preemptively over allocate based on the
350		/// declared number of files in the CDE of an invalid zip when the number of
351		/// files declared is more than the alleged offset in the CDE
352		#[test]
353		fn invalid_cde_number_of_files_allocation_smaller_offset() {
354		ZipStreamReader::new(io::Cursor::new(include_bytes!(
355		"../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
356		)))
357		.visit(&mut DummyVisitor)
358		.unwrap_err();
359		}
360
361		/// test case to ensure we don't preemptively over allocate based on the
362		/// declared number of files in the CDE of an invalid zip when the number of
363		/// files declared is less than the alleged offset in the CDE
364		#[test]
365		fn invalid_cde_number_of_files_allocation_greater_offset() {
366		ZipStreamReader::new(io::Cursor::new(include_bytes!(
367		"../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
368		)))
369		.visit(&mut DummyVisitor)
370		.unwrap_err();
371		}
372		}