/src/zip/src/read/stream.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use std::fs; |
2 | | use std::io::{self, Read}; |
3 | | use std::path::Path; |
4 | | |
5 | | use super::{ |
6 | | central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile, |
7 | | ZipFileData, ZipResult, |
8 | | }; |
9 | | |
10 | | use byteorder::{LittleEndian, ReadBytesExt}; |
11 | | |
12 | | /// Stream decoder for zip. |
13 | | #[derive(Debug)] |
14 | | pub struct ZipStreamReader<R>(R); |
15 | | |
16 | | impl<R> ZipStreamReader<R> { |
17 | | /// Create a new ZipStreamReader |
18 | 0 | pub fn new(reader: R) -> Self { |
19 | 0 | Self(reader) |
20 | 0 | } |
21 | | } |
22 | | |
23 | | impl<R: Read> ZipStreamReader<R> { |
24 | 0 | fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> { |
25 | 0 | // Give archive_offset and central_header_start dummy value 0, since |
26 | 0 | // they are not used in the output. |
27 | 0 | let archive_offset = 0; |
28 | 0 | let central_header_start = 0; |
29 | | |
30 | | // Parse central header |
31 | 0 | let signature = self.0.read_u32::<LittleEndian>()?; |
32 | 0 | if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { |
33 | 0 | Ok(None) |
34 | | } else { |
35 | 0 | central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start) |
36 | 0 | .map(ZipStreamFileMetadata) |
37 | 0 | .map(Some) |
38 | | } |
39 | 0 | } |
40 | | |
41 | | /// Iteraate over the stream and extract all file and their |
42 | | /// metadata. |
43 | 0 | pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> { |
44 | 0 | while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? { |
45 | 0 | visitor.visit_file(&mut file)?; |
46 | | } |
47 | | |
48 | 0 | while let Some(metadata) = self.parse_central_directory()? { |
49 | 0 | visitor.visit_additional_metadata(&metadata)?; |
50 | | } |
51 | | |
52 | 0 | Ok(()) |
53 | 0 | } |
54 | | |
55 | | /// Extract a Zip archive into a directory, overwriting files if they |
56 | | /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. |
57 | | /// |
58 | | /// Extraction is not atomic; If an error is encountered, some of the files |
59 | | /// may be left on disk. |
60 | 0 | pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> { |
61 | | struct Extractor<'a>(&'a Path); |
62 | | impl ZipStreamVisitor for Extractor<'_> { |
63 | | fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { |
64 | | let filepath = file |
65 | | .enclosed_name() |
66 | | .ok_or(ZipError::InvalidArchive("Invalid file path"))?; |
67 | | |
68 | | let outpath = self.0.join(filepath); |
69 | | |
70 | | if file.name().ends_with('/') { |
71 | | fs::create_dir_all(&outpath)?; |
72 | | } else { |
73 | | if let Some(p) = outpath.parent() { |
74 | | fs::create_dir_all(p)?; |
75 | | } |
76 | | let mut outfile = fs::File::create(&outpath)?; |
77 | | io::copy(file, &mut outfile)?; |
78 | | } |
79 | | |
80 | | Ok(()) |
81 | | } |
82 | | |
83 | | #[allow(unused)] |
84 | | fn visit_additional_metadata( |
85 | | &mut self, |
86 | | metadata: &ZipStreamFileMetadata, |
87 | | ) -> ZipResult<()> { |
88 | | #[cfg(unix)] |
89 | | { |
90 | | let filepath = metadata |
91 | | .enclosed_name() |
92 | | .ok_or(ZipError::InvalidArchive("Invalid file path"))?; |
93 | | |
94 | | let outpath = self.0.join(filepath); |
95 | | |
96 | | use std::os::unix::fs::PermissionsExt; |
97 | | if let Some(mode) = metadata.unix_mode() { |
98 | | fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?; |
99 | | } |
100 | | } |
101 | | |
102 | | Ok(()) |
103 | | } |
104 | | } |
105 | | |
106 | 0 | self.visit(&mut Extractor(directory.as_ref())) |
107 | 0 | } |
108 | | } |
109 | | |
110 | | /// Visitor for ZipStreamReader |
111 | | pub trait ZipStreamVisitor { |
112 | | /// * `file` - contains the content of the file and most of the metadata, |
113 | | /// except: |
114 | | /// - `comment`: set to an empty string |
115 | | /// - `data_start`: set to 0 |
116 | | /// - `external_attributes`: `unix_mode()`: will return None |
117 | | fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>; |
118 | | |
119 | | /// This function is guranteed to be called after all `visit_file`s. |
120 | | /// |
121 | | /// * `metadata` - Provides missing metadata in `visit_file`. |
122 | | fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>; |
123 | | } |
124 | | |
125 | | /// Additional metadata for the file. |
126 | | #[derive(Debug)] |
127 | | pub struct ZipStreamFileMetadata(ZipFileData); |
128 | | |
129 | | impl ZipStreamFileMetadata { |
130 | | /// Get the name of the file |
131 | | /// |
132 | | /// # Warnings |
133 | | /// |
134 | | /// It is dangerous to use this name directly when extracting an archive. |
135 | | /// It may contain an absolute path (`/etc/shadow`), or break out of the |
136 | | /// current directory (`../runtime`). Carelessly writing to these paths |
137 | | /// allows an attacker to craft a ZIP archive that will overwrite critical |
138 | | /// files. |
139 | | /// |
140 | | /// You can use the [`ZipFile::enclosed_name`] method to validate the name |
141 | | /// as a safe path. |
142 | | pub fn name(&self) -> &str { |
143 | | &self.0.file_name |
144 | | } |
145 | | |
146 | | /// Get the name of the file, in the raw (internal) byte representation. |
147 | | /// |
148 | | /// The encoding of this data is currently undefined. |
149 | | pub fn name_raw(&self) -> &[u8] { |
150 | | &self.0.file_name_raw |
151 | | } |
152 | | |
153 | | /// Rewrite the path, ignoring any path components with special meaning. |
154 | | /// |
155 | | /// - Absolute paths are made relative |
156 | | /// - [`ParentDir`]s are ignored |
157 | | /// - Truncates the filename at a NULL byte |
158 | | /// |
159 | | /// This is appropriate if you need to be able to extract *something* from |
160 | | /// any archive, but will easily misrepresent trivial paths like |
161 | | /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, |
162 | | /// [`ZipFile::enclosed_name`] is the better option in most scenarios. |
163 | | /// |
164 | | /// [`ParentDir`]: `Component::ParentDir` |
165 | | pub fn mangled_name(&self) -> ::std::path::PathBuf { |
166 | | self.0.file_name_sanitized() |
167 | | } |
168 | | |
169 | | /// Ensure the file path is safe to use as a [`Path`]. |
170 | | /// |
171 | | /// - It can't contain NULL bytes |
172 | | /// - It can't resolve to a path outside the current directory |
173 | | /// > `foo/../bar` is fine, `foo/../../bar` is not. |
174 | | /// - It can't be an absolute path |
175 | | /// |
176 | | /// This will read well-formed ZIP files correctly, and is resistant |
177 | | /// to path-based exploits. It is recommended over |
178 | | /// [`ZipFile::mangled_name`]. |
179 | | pub fn enclosed_name(&self) -> Option<&Path> { |
180 | | self.0.enclosed_name() |
181 | | } |
182 | | |
183 | | /// Returns whether the file is actually a directory |
184 | | pub fn is_dir(&self) -> bool { |
185 | | self.name() |
186 | | .chars() |
187 | | .rev() |
188 | | .next() |
189 | 0 | .map_or(false, |c| c == '/' || c == '\\') |
190 | | } |
191 | | |
192 | | /// Returns whether the file is a regular file |
193 | | pub fn is_file(&self) -> bool { |
194 | | !self.is_dir() |
195 | | } |
196 | | |
197 | | /// Get the comment of the file |
198 | | pub fn comment(&self) -> &str { |
199 | | &self.0.file_comment |
200 | | } |
201 | | |
202 | | /// Get the starting offset of the data of the compressed file |
203 | | pub fn data_start(&self) -> u64 { |
204 | | self.0.data_start.load() |
205 | | } |
206 | | |
207 | | /// Get unix mode for the file |
208 | | pub fn unix_mode(&self) -> Option<u32> { |
209 | | self.0.unix_mode() |
210 | | } |
211 | | } |
212 | | |
213 | | #[cfg(test)] |
214 | | mod test { |
215 | | use super::*; |
216 | | use std::collections::BTreeSet; |
217 | | use std::io; |
218 | | |
219 | | struct DummyVisitor; |
220 | | impl ZipStreamVisitor for DummyVisitor { |
221 | | fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { |
222 | | Ok(()) |
223 | | } |
224 | | |
225 | | fn visit_additional_metadata( |
226 | | &mut self, |
227 | | _metadata: &ZipStreamFileMetadata, |
228 | | ) -> ZipResult<()> { |
229 | | Ok(()) |
230 | | } |
231 | | } |
232 | | |
233 | | #[derive(Default, Debug, Eq, PartialEq)] |
234 | | struct CounterVisitor(u64, u64); |
235 | | impl ZipStreamVisitor for CounterVisitor { |
236 | | fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { |
237 | | self.0 += 1; |
238 | | Ok(()) |
239 | | } |
240 | | |
241 | | fn visit_additional_metadata( |
242 | | &mut self, |
243 | | _metadata: &ZipStreamFileMetadata, |
244 | | ) -> ZipResult<()> { |
245 | | self.1 += 1; |
246 | | Ok(()) |
247 | | } |
248 | | } |
249 | | |
250 | | #[test] |
251 | | fn invalid_offset() { |
252 | | ZipStreamReader::new(io::Cursor::new(include_bytes!( |
253 | | "../../tests/data/invalid_offset.zip" |
254 | | ))) |
255 | | .visit(&mut DummyVisitor) |
256 | | .unwrap_err(); |
257 | | } |
258 | | |
259 | | #[test] |
260 | | fn invalid_offset2() { |
261 | | ZipStreamReader::new(io::Cursor::new(include_bytes!( |
262 | | "../../tests/data/invalid_offset2.zip" |
263 | | ))) |
264 | | .visit(&mut DummyVisitor) |
265 | | .unwrap_err(); |
266 | | } |
267 | | |
268 | | #[test] |
269 | | fn zip_read_streaming() { |
270 | | let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!( |
271 | | "../../tests/data/mimetype.zip" |
272 | | ))); |
273 | | |
274 | | #[derive(Default)] |
275 | | struct V { |
276 | | filenames: BTreeSet<Box<str>>, |
277 | | } |
278 | | impl ZipStreamVisitor for V { |
279 | | fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { |
280 | | if file.is_file() { |
281 | | self.filenames.insert(file.name().into()); |
282 | | } |
283 | | |
284 | | Ok(()) |
285 | | } |
286 | | fn visit_additional_metadata( |
287 | | &mut self, |
288 | | metadata: &ZipStreamFileMetadata, |
289 | | ) -> ZipResult<()> { |
290 | | if metadata.is_file() { |
291 | | assert!( |
292 | | self.filenames.contains(metadata.name()), |
293 | | "{} is missing its file content", |
294 | | metadata.name() |
295 | | ); |
296 | | } |
297 | | |
298 | | Ok(()) |
299 | | } |
300 | | } |
301 | | |
302 | | reader.visit(&mut V::default()).unwrap(); |
303 | | } |
304 | | |
305 | | #[test] |
306 | | fn file_and_dir_predicates() { |
307 | | let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!( |
308 | | "../../tests/data/files_and_dirs.zip" |
309 | | ))); |
310 | | |
311 | | #[derive(Default)] |
312 | | struct V { |
313 | | filenames: BTreeSet<Box<str>>, |
314 | | } |
315 | | impl ZipStreamVisitor for V { |
316 | | fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { |
317 | | let full_name = file.enclosed_name().unwrap(); |
318 | | let file_name = full_name.file_name().unwrap().to_str().unwrap(); |
319 | | assert!( |
320 | | (file_name.starts_with("dir") && file.is_dir()) |
321 | | || (file_name.starts_with("file") && file.is_file()) |
322 | | ); |
323 | | |
324 | | if file.is_file() { |
325 | | self.filenames.insert(file.name().into()); |
326 | | } |
327 | | |
328 | | Ok(()) |
329 | | } |
330 | | fn visit_additional_metadata( |
331 | | &mut self, |
332 | | metadata: &ZipStreamFileMetadata, |
333 | | ) -> ZipResult<()> { |
334 | | if metadata.is_file() { |
335 | | assert!( |
336 | | self.filenames.contains(metadata.name()), |
337 | | "{} is missing its file content", |
338 | | metadata.name() |
339 | | ); |
340 | | } |
341 | | |
342 | | Ok(()) |
343 | | } |
344 | | } |
345 | | |
346 | | reader.visit(&mut V::default()).unwrap(); |
347 | | } |
348 | | |
349 | | /// test case to ensure we don't preemptively over allocate based on the |
350 | | /// declared number of files in the CDE of an invalid zip when the number of |
351 | | /// files declared is more than the alleged offset in the CDE |
352 | | #[test] |
353 | | fn invalid_cde_number_of_files_allocation_smaller_offset() { |
354 | | ZipStreamReader::new(io::Cursor::new(include_bytes!( |
355 | | "../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip" |
356 | | ))) |
357 | | .visit(&mut DummyVisitor) |
358 | | .unwrap_err(); |
359 | | } |
360 | | |
361 | | /// test case to ensure we don't preemptively over allocate based on the |
362 | | /// declared number of files in the CDE of an invalid zip when the number of |
363 | | /// files declared is less than the alleged offset in the CDE |
364 | | #[test] |
365 | | fn invalid_cde_number_of_files_allocation_greater_offset() { |
366 | | ZipStreamReader::new(io::Cursor::new(include_bytes!( |
367 | | "../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip" |
368 | | ))) |
369 | | .visit(&mut DummyVisitor) |
370 | | .unwrap_err(); |
371 | | } |
372 | | } |