/rust/registry/src/index.crates.io-6f17d22bba15001f/flate2-1.0.30/src/gz/bufread.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use std::cmp; |
2 | | use std::io; |
3 | | use std::io::prelude::*; |
4 | | use std::mem; |
5 | | |
6 | | use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser}; |
7 | | use crate::crc::CrcReader; |
8 | | use crate::deflate; |
9 | | use crate::Compression; |
10 | | |
11 | 0 | fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { |
12 | 0 | let min = cmp::min(into.len(), from.len() - *pos); |
13 | 0 | for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) { |
14 | 0 | *slot = *val; |
15 | 0 | } |
16 | 0 | *pos += min; |
17 | 0 | min |
18 | 0 | } |
19 | | |
20 | | /// A gzip streaming encoder |
21 | | /// |
22 | | /// This structure implements a [`Read`] interface. When read from, it reads |
23 | | /// uncompressed data from the underlying [`BufRead`] and provides the compressed data. |
24 | | /// |
25 | | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
26 | | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
27 | | /// |
28 | | /// # Examples |
29 | | /// |
30 | | /// ``` |
31 | | /// use std::io::prelude::*; |
32 | | /// use std::io; |
33 | | /// use flate2::Compression; |
34 | | /// use flate2::bufread::GzEncoder; |
35 | | /// use std::fs::File; |
36 | | /// use std::io::BufReader; |
37 | | /// |
38 | | /// // Opens sample file, compresses the contents and returns a Vector or error |
39 | | /// // File wrapped in a BufReader implements BufRead |
40 | | /// |
41 | | /// fn open_hello_world() -> io::Result<Vec<u8>> { |
42 | | /// let f = File::open("examples/hello_world.txt")?; |
43 | | /// let b = BufReader::new(f); |
44 | | /// let mut gz = GzEncoder::new(b, Compression::fast()); |
45 | | /// let mut buffer = Vec::new(); |
46 | | /// gz.read_to_end(&mut buffer)?; |
47 | | /// Ok(buffer) |
48 | | /// } |
49 | | /// ``` |
50 | 0 | #[derive(Debug)] |
51 | | pub struct GzEncoder<R> { |
52 | | inner: deflate::bufread::DeflateEncoder<CrcReader<R>>, |
53 | | header: Vec<u8>, |
54 | | pos: usize, |
55 | | eof: bool, |
56 | | } |
57 | | |
58 | 0 | pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> { |
59 | 0 | let crc = CrcReader::new(r); |
60 | 0 | GzEncoder { |
61 | 0 | inner: deflate::bufread::DeflateEncoder::new(crc, lvl), |
62 | 0 | header, |
63 | 0 | pos: 0, |
64 | 0 | eof: false, |
65 | 0 | } |
66 | 0 | } |
67 | | |
68 | | impl<R: BufRead> GzEncoder<R> { |
69 | | /// Creates a new encoder which will use the given compression level. |
70 | | /// |
71 | | /// The encoder is not configured specially for the emitted header. For |
72 | | /// header configuration, see the `GzBuilder` type. |
73 | | /// |
74 | | /// The data read from the stream `r` will be compressed and available |
75 | | /// through the returned reader. |
76 | 0 | pub fn new(r: R, level: Compression) -> GzEncoder<R> { |
77 | 0 | GzBuilder::new().buf_read(r, level) |
78 | 0 | } |
79 | | |
80 | 0 | fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> { |
81 | 0 | if self.pos == 8 { |
82 | 0 | return Ok(0); |
83 | 0 | } |
84 | 0 | let crc = self.inner.get_ref().crc(); |
85 | 0 | let ref arr = [ |
86 | 0 | (crc.sum() >> 0) as u8, |
87 | 0 | (crc.sum() >> 8) as u8, |
88 | 0 | (crc.sum() >> 16) as u8, |
89 | 0 | (crc.sum() >> 24) as u8, |
90 | 0 | (crc.amount() >> 0) as u8, |
91 | 0 | (crc.amount() >> 8) as u8, |
92 | 0 | (crc.amount() >> 16) as u8, |
93 | 0 | (crc.amount() >> 24) as u8, |
94 | 0 | ]; |
95 | 0 | Ok(copy(into, arr, &mut self.pos)) |
96 | 0 | } |
97 | | } |
98 | | |
99 | | impl<R> GzEncoder<R> { |
100 | | /// Acquires a reference to the underlying reader. |
101 | 0 | pub fn get_ref(&self) -> &R { |
102 | 0 | self.inner.get_ref().get_ref() |
103 | 0 | } |
104 | | |
105 | | /// Acquires a mutable reference to the underlying reader. |
106 | | /// |
107 | | /// Note that mutation of the reader may result in surprising results if |
108 | | /// this encoder is continued to be used. |
109 | 0 | pub fn get_mut(&mut self) -> &mut R { |
110 | 0 | self.inner.get_mut().get_mut() |
111 | 0 | } |
112 | | |
113 | | /// Returns the underlying stream, consuming this encoder |
114 | 0 | pub fn into_inner(self) -> R { |
115 | 0 | self.inner.into_inner().into_inner() |
116 | 0 | } |
117 | | } |
118 | | |
119 | | #[inline] |
120 | 0 | fn finish(buf: &[u8; 8]) -> (u32, u32) { |
121 | 0 | let crc = ((buf[0] as u32) << 0) |
122 | 0 | | ((buf[1] as u32) << 8) |
123 | 0 | | ((buf[2] as u32) << 16) |
124 | 0 | | ((buf[3] as u32) << 24); |
125 | 0 | let amt = ((buf[4] as u32) << 0) |
126 | 0 | | ((buf[5] as u32) << 8) |
127 | 0 | | ((buf[6] as u32) << 16) |
128 | 0 | | ((buf[7] as u32) << 24); |
129 | 0 | (crc, amt) |
130 | 0 | } |
131 | | |
132 | | impl<R: BufRead> Read for GzEncoder<R> { |
133 | 0 | fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> { |
134 | 0 | let mut amt = 0; |
135 | 0 | if self.eof { |
136 | 0 | return self.read_footer(into); |
137 | 0 | } else if self.pos < self.header.len() { |
138 | 0 | amt += copy(into, &self.header, &mut self.pos); |
139 | 0 | if amt == into.len() { |
140 | 0 | return Ok(amt); |
141 | 0 | } |
142 | 0 | let tmp = into; |
143 | 0 | into = &mut tmp[amt..]; |
144 | 0 | } |
145 | 0 | match self.inner.read(into)? { |
146 | | 0 => { |
147 | 0 | self.eof = true; |
148 | 0 | self.pos = 0; |
149 | 0 | self.read_footer(into) |
150 | | } |
151 | 0 | n => Ok(amt + n), |
152 | | } |
153 | 0 | } |
154 | | } |
155 | | |
156 | | impl<R: BufRead + Write> Write for GzEncoder<R> { |
157 | 0 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
158 | 0 | self.get_mut().write(buf) |
159 | 0 | } |
160 | | |
161 | 0 | fn flush(&mut self) -> io::Result<()> { |
162 | 0 | self.get_mut().flush() |
163 | 0 | } |
164 | | } |
165 | | |
166 | | /// A decoder for a single member of a [gzip file]. |
167 | | /// |
168 | | /// This structure implements a [`Read`] interface. When read from, it reads |
169 | | /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
170 | | /// |
171 | | /// After reading a single member of the gzip data this reader will return |
172 | | /// Ok(0) even if there are more bytes available in the underlying reader. |
173 | | /// If you need the following bytes, call `into_inner()` after Ok(0) to |
174 | | /// recover the underlying reader. |
175 | | /// |
176 | | /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] |
177 | | /// or read more |
178 | | /// [in the introduction](../index.html#about-multi-member-gzip-files). |
179 | | /// |
180 | | /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
181 | | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
182 | | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
183 | | /// |
184 | | /// # Examples |
185 | | /// |
186 | | /// ``` |
187 | | /// use std::io::prelude::*; |
188 | | /// use std::io; |
189 | | /// # use flate2::Compression; |
190 | | /// # use flate2::write::GzEncoder; |
191 | | /// use flate2::bufread::GzDecoder; |
192 | | /// |
193 | | /// # fn main() { |
194 | | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
195 | | /// # e.write_all(b"Hello World").unwrap(); |
196 | | /// # let bytes = e.finish().unwrap(); |
197 | | /// # println!("{}", decode_reader(bytes).unwrap()); |
198 | | /// # } |
199 | | /// # |
200 | | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
201 | | /// // Here &[u8] implements BufRead |
202 | | /// |
203 | | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
204 | | /// let mut gz = GzDecoder::new(&bytes[..]); |
205 | | /// let mut s = String::new(); |
206 | | /// gz.read_to_string(&mut s)?; |
207 | | /// Ok(s) |
208 | | /// } |
209 | | /// ``` |
210 | 0 | #[derive(Debug)] |
211 | | pub struct GzDecoder<R> { |
212 | | state: GzState, |
213 | | reader: CrcReader<deflate::bufread::DeflateDecoder<R>>, |
214 | | multi: bool, |
215 | | } |
216 | | |
217 | 0 | #[derive(Debug)] |
218 | | enum GzState { |
219 | | Header(GzHeaderParser), |
220 | | Body(GzHeader), |
221 | | Finished(GzHeader, usize, [u8; 8]), |
222 | | Err(io::Error), |
223 | | End(Option<GzHeader>), |
224 | | } |
225 | | |
226 | | impl<R: BufRead> GzDecoder<R> { |
227 | | /// Creates a new decoder from the given reader, immediately parsing the |
228 | | /// gzip header. |
229 | 0 | pub fn new(mut r: R) -> GzDecoder<R> { |
230 | 0 | let mut header_parser = GzHeaderParser::new(); |
231 | | |
232 | 0 | let state = match header_parser.parse(&mut r) { |
233 | 0 | Ok(_) => GzState::Body(GzHeader::from(header_parser)), |
234 | 0 | Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => { |
235 | 0 | GzState::Header(header_parser) |
236 | | } |
237 | 0 | Err(err) => GzState::Err(err), |
238 | | }; |
239 | | |
240 | 0 | GzDecoder { |
241 | 0 | state, |
242 | 0 | reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), |
243 | 0 | multi: false, |
244 | 0 | } |
245 | 0 | } |
246 | | |
247 | 0 | fn multi(mut self, flag: bool) -> GzDecoder<R> { |
248 | 0 | self.multi = flag; |
249 | 0 | self |
250 | 0 | } |
251 | | } |
252 | | |
253 | | impl<R> GzDecoder<R> { |
254 | | /// Returns the header associated with this stream, if it was valid |
255 | 0 | pub fn header(&self) -> Option<&GzHeader> { |
256 | 0 | match &self.state { |
257 | 0 | GzState::Body(header) | GzState::Finished(header, _, _) => Some(header), |
258 | 0 | GzState::End(header) => header.as_ref(), |
259 | 0 | _ => None, |
260 | | } |
261 | 0 | } |
262 | | |
263 | | /// Acquires a reference to the underlying reader. |
264 | 0 | pub fn get_ref(&self) -> &R { |
265 | 0 | self.reader.get_ref().get_ref() |
266 | 0 | } |
267 | | |
268 | | /// Acquires a mutable reference to the underlying stream. |
269 | | /// |
270 | | /// Note that mutation of the stream may result in surprising results if |
271 | | /// this decoder is continued to be used. |
272 | 0 | pub fn get_mut(&mut self) -> &mut R { |
273 | 0 | self.reader.get_mut().get_mut() |
274 | 0 | } |
275 | | |
276 | | /// Consumes this decoder, returning the underlying reader. |
277 | 0 | pub fn into_inner(self) -> R { |
278 | 0 | self.reader.into_inner().into_inner() |
279 | 0 | } |
280 | | } |
281 | | |
282 | | impl<R: BufRead> Read for GzDecoder<R> { |
283 | 0 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
284 | 0 | loop { |
285 | 0 | match &mut self.state { |
286 | 0 | GzState::Header(parser) => { |
287 | 0 | parser.parse(self.reader.get_mut().get_mut())?; |
288 | 0 | self.state = GzState::Body(GzHeader::from(mem::take(parser))); |
289 | | } |
290 | 0 | GzState::Body(header) => { |
291 | 0 | if into.is_empty() { |
292 | 0 | return Ok(0); |
293 | 0 | } |
294 | 0 | match self.reader.read(into)? { |
295 | 0 | 0 => { |
296 | 0 | self.state = GzState::Finished(mem::take(header), 0, [0; 8]); |
297 | 0 | } |
298 | 0 | n => { |
299 | 0 | return Ok(n); |
300 | | } |
301 | | } |
302 | | } |
303 | 0 | GzState::Finished(header, pos, buf) => { |
304 | 0 | if *pos < buf.len() { |
305 | 0 | *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?; |
306 | | } else { |
307 | 0 | let (crc, amt) = finish(&buf); |
308 | 0 |
|
309 | 0 | if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() { |
310 | 0 | self.state = GzState::End(Some(mem::take(header))); |
311 | 0 | return Err(corrupt()); |
312 | 0 | } else if self.multi { |
313 | 0 | let is_eof = self |
314 | 0 | .reader |
315 | 0 | .get_mut() |
316 | 0 | .get_mut() |
317 | 0 | .fill_buf() |
318 | 0 | .map(|buf| buf.is_empty())?; |
319 | | |
320 | 0 | if is_eof { |
321 | 0 | self.state = GzState::End(Some(mem::take(header))); |
322 | 0 | } else { |
323 | 0 | self.reader.reset(); |
324 | 0 | self.reader.get_mut().reset_data(); |
325 | 0 | self.state = GzState::Header(GzHeaderParser::new()) |
326 | | } |
327 | 0 | } else { |
328 | 0 | self.state = GzState::End(Some(mem::take(header))); |
329 | 0 | } |
330 | | } |
331 | | } |
332 | 0 | GzState::Err(err) => { |
333 | 0 | let result = Err(mem::replace(err, io::ErrorKind::Other.into())); |
334 | 0 | self.state = GzState::End(None); |
335 | 0 | return result; |
336 | | } |
337 | 0 | GzState::End(_) => return Ok(0), |
338 | | } |
339 | | } |
340 | 0 | } |
341 | | } |
342 | | |
343 | | impl<R: BufRead + Write> Write for GzDecoder<R> { |
344 | 0 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
345 | 0 | self.get_mut().write(buf) |
346 | 0 | } |
347 | | |
348 | 0 | fn flush(&mut self) -> io::Result<()> { |
349 | 0 | self.get_mut().flush() |
350 | 0 | } |
351 | | } |
352 | | |
353 | | /// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. |
354 | | /// |
355 | | /// This structure implements a [`Read`] interface. When read from, it reads |
356 | | /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
357 | | /// |
358 | | /// A gzip file consists of a series of *members* concatenated one after another. |
359 | | /// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the |
360 | | /// underlying reader does. For a file, this reads to the end of the file. |
361 | | /// |
362 | | /// To handle members separately, see [GzDecoder] or read more |
363 | | /// [in the introduction](../index.html#about-multi-member-gzip-files). |
364 | | /// |
365 | | /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
366 | | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
367 | | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
368 | | /// |
369 | | /// # Examples |
370 | | /// |
371 | | /// ``` |
372 | | /// use std::io::prelude::*; |
373 | | /// use std::io; |
374 | | /// # use flate2::Compression; |
375 | | /// # use flate2::write::GzEncoder; |
376 | | /// use flate2::bufread::MultiGzDecoder; |
377 | | /// |
378 | | /// # fn main() { |
379 | | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
380 | | /// # e.write_all(b"Hello World").unwrap(); |
381 | | /// # let bytes = e.finish().unwrap(); |
382 | | /// # println!("{}", decode_reader(bytes).unwrap()); |
383 | | /// # } |
384 | | /// # |
385 | | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
386 | | /// // Here &[u8] implements BufRead |
387 | | /// |
388 | | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
389 | | /// let mut gz = MultiGzDecoder::new(&bytes[..]); |
390 | | /// let mut s = String::new(); |
391 | | /// gz.read_to_string(&mut s)?; |
392 | | /// Ok(s) |
393 | | /// } |
394 | | /// ``` |
395 | 0 | #[derive(Debug)] |
396 | | pub struct MultiGzDecoder<R>(GzDecoder<R>); |
397 | | |
398 | | impl<R: BufRead> MultiGzDecoder<R> { |
399 | | /// Creates a new decoder from the given reader, immediately parsing the |
400 | | /// (first) gzip header. If the gzip stream contains multiple members all will |
401 | | /// be decoded. |
402 | 0 | pub fn new(r: R) -> MultiGzDecoder<R> { |
403 | 0 | MultiGzDecoder(GzDecoder::new(r).multi(true)) |
404 | 0 | } |
405 | | } |
406 | | |
407 | | impl<R> MultiGzDecoder<R> { |
408 | | /// Returns the current header associated with this stream, if it's valid |
409 | 0 | pub fn header(&self) -> Option<&GzHeader> { |
410 | 0 | self.0.header() |
411 | 0 | } |
412 | | |
413 | | /// Acquires a reference to the underlying reader. |
414 | 0 | pub fn get_ref(&self) -> &R { |
415 | 0 | self.0.get_ref() |
416 | 0 | } |
417 | | |
418 | | /// Acquires a mutable reference to the underlying stream. |
419 | | /// |
420 | | /// Note that mutation of the stream may result in surprising results if |
421 | | /// this decoder is continued to be used. |
422 | 0 | pub fn get_mut(&mut self) -> &mut R { |
423 | 0 | self.0.get_mut() |
424 | 0 | } |
425 | | |
426 | | /// Consumes this decoder, returning the underlying reader. |
427 | 0 | pub fn into_inner(self) -> R { |
428 | 0 | self.0.into_inner() |
429 | 0 | } |
430 | | } |
431 | | |
432 | | impl<R: BufRead> Read for MultiGzDecoder<R> { |
433 | 0 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
434 | 0 | self.0.read(into) |
435 | 0 | } |
436 | | } |
437 | | |
438 | | #[cfg(test)] |
439 | | mod test { |
440 | | use crate::bufread::GzDecoder; |
441 | | use crate::gz::write; |
442 | | use crate::Compression; |
443 | | use std::io::{Read, Write}; |
444 | | |
445 | | // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any |
446 | | // additional data to be consumed by the caller. |
447 | | #[test] |
448 | | fn decode_extra_data() { |
449 | | let expected = "Hello World"; |
450 | | |
451 | | let compressed = { |
452 | | let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); |
453 | | e.write(expected.as_ref()).unwrap(); |
454 | | let mut b = e.finish().unwrap(); |
455 | | b.push(b'x'); |
456 | | b |
457 | | }; |
458 | | |
459 | | let mut output = Vec::new(); |
460 | | let mut decoder = GzDecoder::new(compressed.as_slice()); |
461 | | let decoded_bytes = decoder.read_to_end(&mut output).unwrap(); |
462 | | assert_eq!(decoded_bytes, output.len()); |
463 | | let actual = std::str::from_utf8(&output).expect("String parsing error"); |
464 | | assert_eq!( |
465 | | actual, expected, |
466 | | "after decompression we obtain the original input" |
467 | | ); |
468 | | |
469 | | output.clear(); |
470 | | assert_eq!( |
471 | | decoder.read(&mut output).unwrap(), |
472 | | 0, |
473 | | "subsequent read of decoder returns 0, but inner reader can return additional data" |
474 | | ); |
475 | | let mut reader = decoder.into_inner(); |
476 | | assert_eq!( |
477 | | reader.read_to_end(&mut output).unwrap(), |
478 | | 1, |
479 | | "extra data is accessible in underlying buf-read" |
480 | | ); |
481 | | assert_eq!(output, b"x"); |
482 | | } |
483 | | } |