/rust/registry/src/index.crates.io-6f17d22bba15001f/base64-0.21.0/src/read/decoder.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use crate::{engine::Engine, DecodeError}; |
2 | | use std::{cmp, fmt, io}; |
3 | | |
4 | | // This should be large, but it has to fit on the stack. |
5 | | pub(crate) const BUF_SIZE: usize = 1024; |
6 | | |
7 | | // 4 bytes of base64 data encode 3 bytes of raw data (modulo padding). |
8 | | const BASE64_CHUNK_SIZE: usize = 4; |
9 | | const DECODED_CHUNK_SIZE: usize = 3; |
10 | | |
11 | | /// A `Read` implementation that decodes base64 data read from an underlying reader. |
12 | | /// |
13 | | /// # Examples |
14 | | /// |
15 | | /// ``` |
16 | | /// use std::io::Read; |
17 | | /// use std::io::Cursor; |
18 | | /// use base64::engine::general_purpose; |
19 | | /// |
20 | | /// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc. |
21 | | /// let mut wrapped_reader = Cursor::new(b"YXNkZg=="); |
22 | | /// let mut decoder = base64::read::DecoderReader::new( |
23 | | /// &mut wrapped_reader, |
24 | | /// &general_purpose::STANDARD); |
25 | | /// |
26 | | /// // handle errors as you normally would |
27 | | /// let mut result = Vec::new(); |
28 | | /// decoder.read_to_end(&mut result).unwrap(); |
29 | | /// |
30 | | /// assert_eq!(b"asdf", &result[..]); |
31 | | /// |
32 | | /// ``` |
33 | | pub struct DecoderReader<'e, E: Engine, R: io::Read> { |
34 | | engine: &'e E, |
35 | | /// Where b64 data is read from |
36 | | inner: R, |
37 | | |
38 | | // Holds b64 data read from the delegate reader. |
39 | | b64_buffer: [u8; BUF_SIZE], |
40 | | // The start of the pending buffered data in b64_buffer. |
41 | | b64_offset: usize, |
42 | | // The amount of buffered b64 data. |
43 | | b64_len: usize, |
44 | | // Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a |
45 | | // decoded chunk in to, we have to be able to hang on to a few decoded bytes. |
46 | | // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to |
47 | | // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest |
48 | | // into here, which seems like a lot of complexity for 1 extra byte of storage. |
49 | | decoded_buffer: [u8; 3], |
50 | | // index of start of decoded data |
51 | | decoded_offset: usize, |
52 | | // length of decoded data |
53 | | decoded_len: usize, |
54 | | // used to provide accurate offsets in errors |
55 | | total_b64_decoded: usize, |
56 | | } |
57 | | |
58 | | impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> { |
59 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
60 | 0 | f.debug_struct("DecoderReader") |
61 | 0 | .field("b64_offset", &self.b64_offset) |
62 | 0 | .field("b64_len", &self.b64_len) |
63 | 0 | .field("decoded_buffer", &self.decoded_buffer) |
64 | 0 | .field("decoded_offset", &self.decoded_offset) |
65 | 0 | .field("decoded_len", &self.decoded_len) |
66 | 0 | .field("total_b64_decoded", &self.total_b64_decoded) |
67 | 0 | .finish() |
68 | 0 | } |
69 | | } |
70 | | |
71 | | impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> { |
72 | | /// Create a new decoder that will read from the provided reader `r`. |
73 | 0 | pub fn new(reader: R, engine: &'e E) -> Self { |
74 | 0 | DecoderReader { |
75 | 0 | engine, |
76 | 0 | inner: reader, |
77 | 0 | b64_buffer: [0; BUF_SIZE], |
78 | 0 | b64_offset: 0, |
79 | 0 | b64_len: 0, |
80 | 0 | decoded_buffer: [0; DECODED_CHUNK_SIZE], |
81 | 0 | decoded_offset: 0, |
82 | 0 | decoded_len: 0, |
83 | 0 | total_b64_decoded: 0, |
84 | 0 | } |
85 | 0 | } |
86 | | |
87 | | /// Write as much as possible of the decoded buffer into the target buffer. |
88 | | /// Must only be called when there is something to write and space to write into. |
89 | | /// Returns a Result with the number of (decoded) bytes copied. |
90 | 0 | fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
91 | 0 | debug_assert!(self.decoded_len > 0); |
92 | 0 | debug_assert!(!buf.is_empty()); |
93 | | |
94 | 0 | let copy_len = cmp::min(self.decoded_len, buf.len()); |
95 | 0 | debug_assert!(copy_len > 0); |
96 | 0 | debug_assert!(copy_len <= self.decoded_len); |
97 | | |
98 | 0 | buf[..copy_len].copy_from_slice( |
99 | 0 | &self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len], |
100 | 0 | ); |
101 | 0 |
|
102 | 0 | self.decoded_offset += copy_len; |
103 | 0 | self.decoded_len -= copy_len; |
104 | | |
105 | 0 | debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); |
106 | | |
107 | 0 | Ok(copy_len) |
108 | 0 | } |
109 | | |
110 | | /// Read into the remaining space in the buffer after the current contents. |
111 | | /// Must only be called when there is space to read into in the buffer. |
112 | | /// Returns the number of bytes read. |
113 | 0 | fn read_from_delegate(&mut self) -> io::Result<usize> { |
114 | 0 | debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE); |
115 | | |
116 | 0 | let read = self |
117 | 0 | .inner |
118 | 0 | .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?; |
119 | 0 | self.b64_len += read; |
120 | | |
121 | 0 | debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); |
122 | | |
123 | 0 | Ok(read) |
124 | 0 | } |
125 | | |
126 | | /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the |
127 | | /// caller's responsibility to choose the number of b64 bytes to decode correctly. |
128 | | /// |
129 | | /// Returns a Result with the number of decoded bytes written to `buf`. |
130 | 0 | fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> { |
131 | 0 | debug_assert!(self.b64_len >= num_bytes); |
132 | 0 | debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); |
133 | 0 | debug_assert!(!buf.is_empty()); |
134 | | |
135 | 0 | let decoded = self |
136 | 0 | .engine |
137 | 0 | .internal_decode( |
138 | 0 | &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes], |
139 | 0 | buf, |
140 | 0 | self.engine.internal_decoded_len_estimate(num_bytes), |
141 | 0 | ) |
142 | 0 | .map_err(|e| match e { |
143 | 0 | DecodeError::InvalidByte(offset, byte) => { |
144 | 0 | DecodeError::InvalidByte(self.total_b64_decoded + offset, byte) |
145 | | } |
146 | 0 | DecodeError::InvalidLength => DecodeError::InvalidLength, |
147 | 0 | DecodeError::InvalidLastSymbol(offset, byte) => { |
148 | 0 | DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte) |
149 | | } |
150 | 0 | DecodeError::InvalidPadding => DecodeError::InvalidPadding, |
151 | 0 | }) |
152 | 0 | .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; |
153 | | |
154 | 0 | self.total_b64_decoded += num_bytes; |
155 | 0 | self.b64_offset += num_bytes; |
156 | 0 | self.b64_len -= num_bytes; |
157 | | |
158 | 0 | debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); |
159 | | |
160 | 0 | Ok(decoded) |
161 | 0 | } |
162 | | |
163 | | /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded |
164 | | /// input from. |
165 | | /// |
166 | | /// Because `DecoderReader` performs internal buffering, the state of the inner reader is |
167 | | /// unspecified. This function is mainly provided because the inner reader type may provide |
168 | | /// additional functionality beyond the `Read` implementation which may still be useful. |
169 | 0 | pub fn into_inner(self) -> R { |
170 | 0 | self.inner |
171 | 0 | } |
172 | | } |
173 | | |
174 | | impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> { |
175 | | /// Decode input from the wrapped reader. |
176 | | /// |
177 | | /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes |
178 | | /// written in `buf`. |
179 | | /// |
180 | | /// Where possible, this function buffers base64 to minimize the number of read() calls to the |
181 | | /// delegate reader. |
182 | | /// |
183 | | /// # Errors |
184 | | /// |
185 | | /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid |
186 | | /// base64 are also possible, and will have `io::ErrorKind::InvalidData`. |
187 | 0 | fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
188 | 0 | if buf.is_empty() { |
189 | 0 | return Ok(0); |
190 | 0 | } |
191 | | |
192 | | // offset == BUF_SIZE when we copied it all last time |
193 | 0 | debug_assert!(self.b64_offset <= BUF_SIZE); |
194 | 0 | debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); |
195 | 0 | debug_assert!(if self.b64_offset == BUF_SIZE { |
196 | 0 | self.b64_len == 0 |
197 | | } else { |
198 | 0 | self.b64_len <= BUF_SIZE |
199 | | }); |
200 | | |
201 | 0 | debug_assert!(if self.decoded_len == 0 { |
202 | | // can be = when we were able to copy the complete chunk |
203 | 0 | self.decoded_offset <= DECODED_CHUNK_SIZE |
204 | | } else { |
205 | 0 | self.decoded_offset < DECODED_CHUNK_SIZE |
206 | | }); |
207 | | |
208 | | // We shouldn't ever decode into here when we can't immediately write at least one byte into |
209 | | // the provided buf, so the effective length should only be 3 momentarily between when we |
210 | | // decode and when we copy into the target buffer. |
211 | 0 | debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); |
212 | 0 | debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE); |
213 | | |
214 | 0 | if self.decoded_len > 0 { |
215 | | // we have a few leftover decoded bytes; flush that rather than pull in more b64 |
216 | 0 | self.flush_decoded_buf(buf) |
217 | | } else { |
218 | 0 | let mut at_eof = false; |
219 | 0 | while self.b64_len < BASE64_CHUNK_SIZE { |
220 | | // Work around lack of copy_within, which is only present in 1.37 |
221 | | // Copy any bytes we have to the start of the buffer. |
222 | | // We know we have < 1 chunk, so we can use a tiny tmp buffer. |
223 | 0 | let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE]; |
224 | 0 | memmove_buf[..self.b64_len].copy_from_slice( |
225 | 0 | &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len], |
226 | 0 | ); |
227 | 0 | self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]); |
228 | 0 | self.b64_offset = 0; |
229 | | |
230 | | // then fill in more data |
231 | 0 | let read = self.read_from_delegate()?; |
232 | 0 | if read == 0 { |
233 | | // we never pass in an empty buf, so 0 => we've hit EOF |
234 | 0 | at_eof = true; |
235 | 0 | break; |
236 | 0 | } |
237 | | } |
238 | | |
239 | 0 | if self.b64_len == 0 { |
240 | 0 | debug_assert!(at_eof); |
241 | | // we must be at EOF, and we have no data left to decode |
242 | 0 | return Ok(0); |
243 | 0 | }; |
244 | | |
245 | 0 | debug_assert!(if at_eof { |
246 | | // if we are at eof, we may not have a complete chunk |
247 | 0 | self.b64_len > 0 |
248 | | } else { |
249 | | // otherwise, we must have at least one chunk |
250 | 0 | self.b64_len >= BASE64_CHUNK_SIZE |
251 | | }); |
252 | | |
253 | 0 | debug_assert_eq!(0, self.decoded_len); |
254 | | |
255 | 0 | if buf.len() < DECODED_CHUNK_SIZE { |
256 | | // caller requested an annoyingly short read |
257 | | // have to write to a tmp buf first to avoid double mutable borrow |
258 | 0 | let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE]; |
259 | 0 | // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have |
260 | 0 | // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64 |
261 | 0 | // tokens, not 1, since 1 token can't decode to 1 byte). |
262 | 0 | let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE); |
263 | | |
264 | 0 | let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?; |
265 | 0 | self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]); |
266 | 0 |
|
267 | 0 | self.decoded_offset = 0; |
268 | 0 | self.decoded_len = decoded; |
269 | | |
270 | | // can be less than 3 on last block due to padding |
271 | 0 | debug_assert!(decoded <= 3); |
272 | | |
273 | 0 | self.flush_decoded_buf(buf) |
274 | | } else { |
275 | 0 | let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE) |
276 | 0 | .checked_mul(BASE64_CHUNK_SIZE) |
277 | 0 | .expect("too many chunks"); |
278 | 0 | debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE); |
279 | | |
280 | 0 | let b64_bytes_available_to_decode = if at_eof { |
281 | 0 | self.b64_len |
282 | | } else { |
283 | | // only use complete chunks |
284 | 0 | self.b64_len - self.b64_len % 4 |
285 | | }; |
286 | | |
287 | 0 | let actual_decode_len = cmp::min( |
288 | 0 | b64_bytes_that_can_decode_into_buf, |
289 | 0 | b64_bytes_available_to_decode, |
290 | 0 | ); |
291 | 0 | self.decode_to_buf(actual_decode_len, buf) |
292 | | } |
293 | | } |
294 | 0 | } |
295 | | } |