/rust/registry/src/index.crates.io-6f17d22bba15001f/png-0.17.16/src/decoder/zlib.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use super::{stream::FormatErrorInner, DecodingError, CHUNK_BUFFER_SIZE}; |
2 | | |
3 | | use fdeflate::Decompressor; |
4 | | |
5 | | /// Ergonomics wrapper around `miniz_oxide::inflate::stream` for zlib compressed data. |
6 | | pub(super) struct ZlibStream { |
7 | | /// Current decoding state. |
8 | | state: Box<fdeflate::Decompressor>, |
9 | | /// If there has been a call to decompress already. |
10 | | started: bool, |
11 | | /// Remaining buffered decoded bytes. |
12 | | /// The decoder sometimes wants inspect some already finished bytes for further decoding. So we |
13 | | /// keep a total of 32KB of decoded data available as long as more data may be appended. |
14 | | out_buffer: Vec<u8>, |
15 | | /// The first index of `out_buffer` where new data can be written. |
16 | | out_pos: usize, |
17 | | /// The first index of `out_buffer` that hasn't yet been passed to our client |
18 | | /// (i.e. not yet appended to the `image_data` parameter of `fn decompress` or `fn |
19 | | /// finish_compressed_chunks`). |
20 | | read_pos: usize, |
21 | | /// Limit on how many bytes can be decompressed in total. This field is mostly used for |
22 | | /// performance optimizations (e.g. to avoid allocating and zeroing out large buffers when only |
23 | | /// a small image is being decoded). |
24 | | max_total_output: usize, |
25 | | /// Ignore and do not calculate the Adler-32 checksum. Defaults to `true`. |
26 | | /// |
27 | | /// This flag overrides `TINFL_FLAG_COMPUTE_ADLER32`. |
28 | | /// |
29 | | /// This flag should not be modified after decompression has started. |
30 | | ignore_adler32: bool, |
31 | | } |
32 | | |
33 | | impl ZlibStream { |
34 | 4.72k | pub(crate) fn new() -> Self { |
35 | 4.72k | ZlibStream { |
36 | 4.72k | state: Box::new(Decompressor::new()), |
37 | 4.72k | started: false, |
38 | 4.72k | out_buffer: Vec::new(), |
39 | 4.72k | out_pos: 0, |
40 | 4.72k | read_pos: 0, |
41 | 4.72k | max_total_output: usize::MAX, |
42 | 4.72k | ignore_adler32: true, |
43 | 4.72k | } |
44 | 4.72k | } |
45 | | |
46 | 278 | pub(crate) fn reset(&mut self) { |
47 | 278 | self.started = false; |
48 | 278 | self.out_buffer.clear(); |
49 | 278 | self.out_pos = 0; |
50 | 278 | self.read_pos = 0; |
51 | 278 | self.max_total_output = usize::MAX; |
52 | 278 | *self.state = Decompressor::new(); |
53 | 278 | } |
54 | | |
55 | 4.43k | pub(crate) fn set_max_total_output(&mut self, n: usize) { |
56 | 4.43k | self.max_total_output = n; |
57 | 4.43k | } |
58 | | |
59 | | /// Set the `ignore_adler32` flag and return `true` if the flag was |
60 | | /// successfully set. |
61 | | /// |
62 | | /// The default is `true`. |
63 | | /// |
64 | | /// This flag cannot be modified after decompression has started until the |
65 | | /// [ZlibStream] is reset. |
66 | 4.72k | pub(crate) fn set_ignore_adler32(&mut self, flag: bool) -> bool { |
67 | 4.72k | if !self.started { |
68 | 4.72k | self.ignore_adler32 = flag; |
69 | 4.72k | true |
70 | | } else { |
71 | 0 | false |
72 | | } |
73 | 4.72k | } |
74 | | |
75 | | /// Return the `ignore_adler32` flag. |
76 | 0 | pub(crate) fn ignore_adler32(&self) -> bool { |
77 | 0 | self.ignore_adler32 |
78 | 0 | } |
79 | | |
80 | | /// Fill the decoded buffer as far as possible from `data`. |
81 | | /// On success returns the number of consumed input bytes. |
82 | 9.52k | pub(crate) fn decompress( |
83 | 9.52k | &mut self, |
84 | 9.52k | data: &[u8], |
85 | 9.52k | image_data: &mut Vec<u8>, |
86 | 9.52k | ) -> Result<usize, DecodingError> { |
87 | 9.52k | // There may be more data past the adler32 checksum at the end of the deflate stream. We |
88 | 9.52k | // match libpng's default behavior and ignore any trailing data. In the future we may want |
89 | 9.52k | // to add a flag to control this behavior. |
90 | 9.52k | if self.state.is_done() { |
91 | 553 | return Ok(data.len()); |
92 | 8.96k | } |
93 | 8.96k | |
94 | 8.96k | self.prepare_vec_for_appending(); |
95 | 8.96k | |
96 | 8.96k | if !self.started && self.ignore_adler32 { |
97 | 2.29k | self.state.ignore_adler32(); |
98 | 6.67k | } |
99 | | |
100 | 8.96k | let (in_consumed, out_consumed) = self |
101 | 8.96k | .state |
102 | 8.96k | .read(data, self.out_buffer.as_mut_slice(), self.out_pos, false) |
103 | 8.96k | .map_err(|err| { |
104 | 474 | DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into()) |
105 | 8.96k | })?; |
106 | | |
107 | 8.49k | self.started = true; |
108 | 8.49k | self.out_pos += out_consumed; |
109 | 8.49k | self.transfer_finished_data(image_data); |
110 | 8.49k | self.compact_out_buffer_if_needed(); |
111 | 8.49k | |
112 | 8.49k | Ok(in_consumed) |
113 | 9.52k | } |
114 | | |
115 | | /// Called after all consecutive IDAT chunks were handled. |
116 | | /// |
117 | | /// The compressed stream can be split on arbitrary byte boundaries. This enables some cleanup |
118 | | /// within the decompressor and flushing additional data which may have been kept back in case |
119 | | /// more data were passed to it. |
120 | 236 | pub(crate) fn finish_compressed_chunks( |
121 | 236 | &mut self, |
122 | 236 | image_data: &mut Vec<u8>, |
123 | 236 | ) -> Result<(), DecodingError> { |
124 | 236 | if !self.started { |
125 | 0 | return Ok(()); |
126 | 236 | } |
127 | | |
128 | 244 | while !self.state.is_done() { |
129 | 194 | self.prepare_vec_for_appending(); |
130 | 194 | let (_in_consumed, out_consumed) = self |
131 | 194 | .state |
132 | 194 | .read(&[], self.out_buffer.as_mut_slice(), self.out_pos, true) |
133 | 194 | .map_err(|err| { |
134 | 186 | DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into()) |
135 | 194 | })?; |
136 | | |
137 | 8 | self.out_pos += out_consumed; |
138 | 8 | |
139 | 8 | if !self.state.is_done() { |
140 | 0 | let transferred = self.transfer_finished_data(image_data); |
141 | 0 | assert!( |
142 | 0 | transferred > 0 || out_consumed > 0, |
143 | | "No more forward progress made in stream decoding." |
144 | | ); |
145 | 0 | self.compact_out_buffer_if_needed(); |
146 | 8 | } |
147 | | } |
148 | | |
149 | 50 | self.transfer_finished_data(image_data); |
150 | 50 | self.out_buffer.clear(); |
151 | 50 | Ok(()) |
152 | 236 | } |
153 | | |
154 | | /// Resize the vector to allow allocation of more data. |
155 | 9.16k | fn prepare_vec_for_appending(&mut self) { |
156 | 9.16k | // The `debug_assert` below explains why we can use `>=` instead of `>` in the condition |
157 | 9.16k | // that compares `self.out_post >= self.max_total_output` in the next `if` statement. |
158 | 9.16k | debug_assert!(!self.state.is_done()); |
159 | 9.16k | if self.out_pos >= self.max_total_output { |
160 | 340 | // This can happen when the `max_total_output` was miscalculated (e.g. |
161 | 340 | // because the `IHDR` chunk was malformed and didn't match the `IDAT` chunk). In |
162 | 340 | // this case, let's reset `self.max_total_output` before further calculations. |
163 | 340 | self.max_total_output = usize::MAX; |
164 | 8.82k | } |
165 | | |
166 | 9.16k | let current_len = self.out_buffer.len(); |
167 | 9.16k | let desired_len = self |
168 | 9.16k | .out_pos |
169 | 9.16k | .saturating_add(CHUNK_BUFFER_SIZE) |
170 | 9.16k | .min(self.max_total_output); |
171 | 9.16k | if current_len >= desired_len { |
172 | 5.65k | return; |
173 | 3.50k | } |
174 | 3.50k | |
175 | 3.50k | let buffered_len = self.decoding_size(self.out_buffer.len()); |
176 | 3.50k | debug_assert!(self.out_buffer.len() <= buffered_len); |
177 | 3.50k | self.out_buffer.resize(buffered_len, 0u8); |
178 | 9.16k | } |
179 | | |
180 | 3.50k | fn decoding_size(&self, len: usize) -> usize { |
181 | 3.50k | // Allocate one more chunk size than currently or double the length while ensuring that the |
182 | 3.50k | // allocation is valid and that any cursor within it will be valid. |
183 | 3.50k | len |
184 | 3.50k | // This keeps the buffer size a power-of-two, required by miniz_oxide. |
185 | 3.50k | .saturating_add(CHUNK_BUFFER_SIZE.max(len)) |
186 | 3.50k | // Ensure all buffer indices are valid cursor positions. |
187 | 3.50k | // Note: both cut off and zero extension give correct results. |
188 | 3.50k | .min(u64::MAX as usize) |
189 | 3.50k | // Ensure the allocation request is valid. |
190 | 3.50k | // TODO: maximum allocation limits? |
191 | 3.50k | .min(isize::MAX as usize) |
192 | 3.50k | // Don't unnecessarily allocate more than `max_total_output`. |
193 | 3.50k | .min(self.max_total_output) |
194 | 3.50k | } |
195 | | |
196 | 8.54k | fn transfer_finished_data(&mut self, image_data: &mut Vec<u8>) -> usize { |
197 | 8.54k | let transferred = &self.out_buffer[self.read_pos..self.out_pos]; |
198 | 8.54k | image_data.extend_from_slice(transferred); |
199 | 8.54k | self.read_pos = self.out_pos; |
200 | 8.54k | transferred.len() |
201 | 8.54k | } |
202 | | |
203 | 8.49k | fn compact_out_buffer_if_needed(&mut self) { |
204 | | // [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#10Compression) says that |
205 | | // "deflate/inflate compression with a sliding window (which is an upper bound on the |
206 | | // distances appearing in the deflate stream) of at most 32768 bytes". |
207 | | // |
208 | | // `fdeflate` requires that we keep this many most recently decompressed bytes in the |
209 | | // `out_buffer` - this allows referring back to them when handling "length and distance |
210 | | // codes" in the deflate stream). |
211 | | const LOOKBACK_SIZE: usize = 32768; |
212 | | |
213 | | // Compact `self.out_buffer` when "needed". Doing this conditionally helps to put an upper |
214 | | // bound on the amortized cost of copying the data within `self.out_buffer`. |
215 | | // |
216 | | // TODO: The factor of 4 is an ad-hoc heuristic. Consider measuring and using a different |
217 | | // factor. (Early experiments seem to indicate that factor of 4 is faster than a factor of |
218 | | // 2 and 4 * `LOOKBACK_SIZE` seems like an acceptable memory trade-off. Higher factors |
219 | | // result in higher memory usage, but the compaction cost is lower - factor of 4 means |
220 | | // that 1 byte gets copied during compaction for 3 decompressed bytes.) |
221 | 8.49k | if self.out_pos > LOOKBACK_SIZE * 4 { |
222 | 2.96k | // Only preserve the `lookback_buffer` and "throw away" the earlier prefix. |
223 | 2.96k | let lookback_buffer = self.out_pos.saturating_sub(LOOKBACK_SIZE)..self.out_pos; |
224 | 2.96k | let preserved_len = lookback_buffer.len(); |
225 | 2.96k | self.out_buffer.copy_within(lookback_buffer, 0); |
226 | 2.96k | self.read_pos = preserved_len; |
227 | 2.96k | self.out_pos = preserved_len; |
228 | 5.53k | } |
229 | 8.49k | } |
230 | | } |