/rust/registry/src/index.crates.io-6f17d22bba15001f/base64-0.21.0/src/read/decoder.rs

Source (jump to first uncovered line)
use crate::{engine::Engine, DecodeError};
use std::{cmp, fmt, io};

// This should be large, but it has to fit on the stack.
pub(crate) const BUF_SIZE: usize = 1024;

// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
const BASE64_CHUNK_SIZE: usize = 4;
const DECODED_CHUNK_SIZE: usize = 3;

/// A `Read` implementation that decodes base64 data read from an underlying reader.
///
/// # Examples
///
/// ```
/// use std::io::Read;
/// use std::io::Cursor;
/// use base64::engine::general_purpose;
///
/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
/// let mut decoder = base64::read::DecoderReader::new(
///     &mut wrapped_reader,
///     &general_purpose::STANDARD);
///
/// // handle errors as you normally would
/// let mut result = Vec::new();
/// decoder.read_to_end(&mut result).unwrap();
///
/// assert_eq!(b"asdf", &result[..]);
///
/// ```
pub struct DecoderReader<'e, E: Engine, R: io::Read> {
    engine: &'e E,
    /// Where b64 data is read from
    inner: R,

    // Holds b64 data read from the delegate reader.
    b64_buffer: [u8; BUF_SIZE],
    // The start of the pending buffered data in b64_buffer.
    b64_offset: usize,
    // The amount of buffered b64 data.
    b64_len: usize,
    // Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
    // decoded chunk in to, we have to be able to hang on to a few decoded bytes.
    // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
    // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
    // into here, which seems like a lot of complexity for 1 extra byte of storage.
    decoded_buffer: [u8; 3],
    // index of start of decoded data
    decoded_offset: usize,
    // length of decoded data
    decoded_len: usize,
    // used to provide accurate offsets in errors
    total_b64_decoded: usize,
}

impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.debug_struct("DecoderReader")
            .field("b64_offset", &self.b64_offset)
            .field("b64_len", &self.b64_len)
            .field("decoded_buffer", &self.decoded_buffer)
            .field("decoded_offset", &self.decoded_offset)
            .field("decoded_len", &self.decoded_len)
            .field("total_b64_decoded", &self.total_b64_decoded)
            .finish()
    }
}

impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
    /// Create a new decoder that will read from the provided reader `r`.
    pub fn new(reader: R, engine: &'e E) -> Self {
        DecoderReader {
            engine,
            inner: reader,
            b64_buffer: [0; BUF_SIZE],
            b64_offset: 0,
            b64_len: 0,
            decoded_buffer: [0; DECODED_CHUNK_SIZE],
            decoded_offset: 0,
            decoded_len: 0,
            total_b64_decoded: 0,
        }
    }

    /// Write as much as possible of the decoded buffer into the target buffer.
    /// Must only be called when there is something to write and space to write into.
    /// Returns a Result with the number of (decoded) bytes copied.
    fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        debug_assert!(self.decoded_len > 0);
        debug_assert!(!buf.is_empty());

        let copy_len = cmp::min(self.decoded_len, buf.len());
        debug_assert!(copy_len > 0);
        debug_assert!(copy_len <= self.decoded_len);

        buf[..copy_len].copy_from_slice(
            &self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len],
        );

        self.decoded_offset += copy_len;
        self.decoded_len -= copy_len;

        debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);

        Ok(copy_len)
    }

    /// Read into the remaining space in the buffer after the current contents.
    /// Must only be called when there is space to read into in the buffer.
    /// Returns the number of bytes read.
    fn read_from_delegate(&mut self) -> io::Result<usize> {
        debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);

        let read = self
            .inner
            .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
        self.b64_len += read;

        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);

        Ok(read)
    }

    /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
    /// caller's responsibility to choose the number of b64 bytes to decode correctly.
    ///
    /// Returns a Result with the number of decoded bytes written to `buf`.
    fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> {
        debug_assert!(self.b64_len >= num_bytes);
        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
        debug_assert!(!buf.is_empty());

        let decoded = self
            .engine
            .internal_decode(
                &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
                buf,
                self.engine.internal_decoded_len_estimate(num_bytes),
            )
            .map_err(|e| match e {
                DecodeError::InvalidByte(offset, byte) => {
                    DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
                }
                DecodeError::InvalidLength => DecodeError::InvalidLength,
                DecodeError::InvalidLastSymbol(offset, byte) => {
                    DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
                }
                DecodeError::InvalidPadding => DecodeError::InvalidPadding,
            })
            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;

        self.total_b64_decoded += num_bytes;
        self.b64_offset += num_bytes;
        self.b64_len -= num_bytes;

        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);

        Ok(decoded)
    }

    /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
    /// input from.
    ///
    /// Because `DecoderReader` performs internal buffering, the state of the inner reader is
    /// unspecified. This function is mainly provided because the inner reader type may provide
    /// additional functionality beyond the `Read` implementation which may still be useful.
    pub fn into_inner(self) -> R {
        self.inner
    }
}

impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
    /// Decode input from the wrapped reader.
    ///
    /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
    /// written in `buf`.
    ///
    /// Where possible, this function buffers base64 to minimize the number of read() calls to the
    /// delegate reader.
    ///
    /// # Errors
    ///
    /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
    /// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        if buf.is_empty() {
            return Ok(0);
        }

        // offset == BUF_SIZE when we copied it all last time
        debug_assert!(self.b64_offset <= BUF_SIZE);
        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
        debug_assert!(if self.b64_offset == BUF_SIZE {
            self.b64_len == 0
        } else {
            self.b64_len <= BUF_SIZE
        });

        debug_assert!(if self.decoded_len == 0 {
            // can be = when we were able to copy the complete chunk
            self.decoded_offset <= DECODED_CHUNK_SIZE
        } else {
            self.decoded_offset < DECODED_CHUNK_SIZE
        });

        // We shouldn't ever decode into here when we can't immediately write at least one byte into
        // the provided buf, so the effective length should only be 3 momentarily between when we
        // decode and when we copy into the target buffer.
        debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
        debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);

        if self.decoded_len > 0 {
            // we have a few leftover decoded bytes; flush that rather than pull in more b64
            self.flush_decoded_buf(buf)
        } else {
            let mut at_eof = false;
            while self.b64_len < BASE64_CHUNK_SIZE {
                // Work around lack of copy_within, which is only present in 1.37
                // Copy any bytes we have to the start of the buffer.
                // We know we have < 1 chunk, so we can use a tiny tmp buffer.
                let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE];
                memmove_buf[..self.b64_len].copy_from_slice(
                    &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len],
                );
                self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]);
                self.b64_offset = 0;

                // then fill in more data
                let read = self.read_from_delegate()?;
                if read == 0 {
                    // we never pass in an empty buf, so 0 => we've hit EOF
                    at_eof = true;
                    break;
                }
            }

            if self.b64_len == 0 {
                debug_assert!(at_eof);
                // we must be at EOF, and we have no data left to decode
                return Ok(0);
            };

            debug_assert!(if at_eof {
                // if we are at eof, we may not have a complete chunk
                self.b64_len > 0
            } else {
                // otherwise, we must have at least one chunk
                self.b64_len >= BASE64_CHUNK_SIZE
            });

            debug_assert_eq!(0, self.decoded_len);

            if buf.len() < DECODED_CHUNK_SIZE {
                // caller requested an annoyingly short read
                // have to write to a tmp buf first to avoid double mutable borrow
                let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
                // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
                // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
                // tokens, not 1, since 1 token can't decode to 1 byte).
                let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);

                let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
                self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);

                self.decoded_offset = 0;
                self.decoded_len = decoded;

                // can be less than 3 on last block due to padding
                debug_assert!(decoded <= 3);

                self.flush_decoded_buf(buf)
            } else {
                let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
                    .checked_mul(BASE64_CHUNK_SIZE)
                    .expect("too many chunks");
                debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);

                let b64_bytes_available_to_decode = if at_eof {
                    self.b64_len
                } else {
                    // only use complete chunks
                    self.b64_len - self.b64_len % 4
                };

                let actual_decode_len = cmp::min(
                    b64_bytes_that_can_decode_into_buf,
                    b64_bytes_available_to_decode,
                );
                self.decode_to_buf(actual_decode_len, buf)
            }
        }
    }
}

Coverage Report

Created: 2023-04-25 07:07

Line	Count	Source (jump to first uncovered line)
1		use crate::{engine::Engine, DecodeError};
2		use std::{cmp, fmt, io};
3
4		// This should be large, but it has to fit on the stack.
5		pub(crate) const BUF_SIZE: usize = 1024;
6
7		// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
8		const BASE64_CHUNK_SIZE: usize = 4;
9		const DECODED_CHUNK_SIZE: usize = 3;
10
11		/// A `Read` implementation that decodes base64 data read from an underlying reader.
12		///
13		/// # Examples
14		///
15		/// ```
16		/// use std::io::Read;
17		/// use std::io::Cursor;
18		/// use base64::engine::general_purpose;
19		///
20		/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
21		/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
22		/// let mut decoder = base64::read::DecoderReader::new(
23		/// &mut wrapped_reader,
24		/// &general_purpose::STANDARD);
25		///
26		/// // handle errors as you normally would
27		/// let mut result = Vec::new();
28		/// decoder.read_to_end(&mut result).unwrap();
29		///
30		/// assert_eq!(b"asdf", &result[..]);
31		///
32		/// ```
33		pub struct DecoderReader<'e, E: Engine, R: io::Read> {
34		engine: &'e E,
35		/// Where b64 data is read from
36		inner: R,
37
38		// Holds b64 data read from the delegate reader.
39		b64_buffer: [u8; BUF_SIZE],
40		// The start of the pending buffered data in b64_buffer.
41		b64_offset: usize,
42		// The amount of buffered b64 data.
43		b64_len: usize,
44		// Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
45		// decoded chunk in to, we have to be able to hang on to a few decoded bytes.
46		// Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
47		// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
48		// into here, which seems like a lot of complexity for 1 extra byte of storage.
49		decoded_buffer: [u8; 3],
50		// index of start of decoded data
51		decoded_offset: usize,
52		// length of decoded data
53		decoded_len: usize,
54		// used to provide accurate offsets in errors
55		total_b64_decoded: usize,
56		}
57
58		impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
59	0	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
60	0	f.debug_struct("DecoderReader")
61	0	.field("b64_offset", &self.b64_offset)
62	0	.field("b64_len", &self.b64_len)
63	0	.field("decoded_buffer", &self.decoded_buffer)
64	0	.field("decoded_offset", &self.decoded_offset)
65	0	.field("decoded_len", &self.decoded_len)
66	0	.field("total_b64_decoded", &self.total_b64_decoded)
67	0	.finish()
68	0	}
69		}
70
71		impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
72		/// Create a new decoder that will read from the provided reader `r`.
73	0	pub fn new(reader: R, engine: &'e E) -> Self {
74	0	DecoderReader {
75	0	engine,
76	0	inner: reader,
77	0	b64_buffer: [0; BUF_SIZE],
78	0	b64_offset: 0,
79	0	b64_len: 0,
80	0	decoded_buffer: [0; DECODED_CHUNK_SIZE],
81	0	decoded_offset: 0,
82	0	decoded_len: 0,
83	0	total_b64_decoded: 0,
84	0	}
85	0	}
86
87		/// Write as much as possible of the decoded buffer into the target buffer.
88		/// Must only be called when there is something to write and space to write into.
89		/// Returns a Result with the number of (decoded) bytes copied.
90	0	fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
91	0	debug_assert!(self.decoded_len > 0);
92	0	debug_assert!(!buf.is_empty());
93
94	0	let copy_len = cmp::min(self.decoded_len, buf.len());
95	0	debug_assert!(copy_len > 0);
96	0	debug_assert!(copy_len <= self.decoded_len);
97
98	0	buf[..copy_len].copy_from_slice(
99	0	&self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len],
100	0	);
101	0
102	0	self.decoded_offset += copy_len;
103	0	self.decoded_len -= copy_len;
104
105	0	debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
106
107	0	Ok(copy_len)
108	0	}
109
110		/// Read into the remaining space in the buffer after the current contents.
111		/// Must only be called when there is space to read into in the buffer.
112		/// Returns the number of bytes read.
113	0	fn read_from_delegate(&mut self) -> io::Result<usize> {
114	0	debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
115
116	0	let read = self
117	0	.inner
118	0	.read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
119	0	self.b64_len += read;
120
121	0	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
122
123	0	Ok(read)
124	0	}
125
126		/// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
127		/// caller's responsibility to choose the number of b64 bytes to decode correctly.
128		///
129		/// Returns a Result with the number of decoded bytes written to `buf`.
130	0	fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> {
131	0	debug_assert!(self.b64_len >= num_bytes);
132	0	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
133	0	debug_assert!(!buf.is_empty());
134
135	0	let decoded = self
136	0	.engine
137	0	.internal_decode(
138	0	&self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
139	0	buf,
140	0	self.engine.internal_decoded_len_estimate(num_bytes),
141	0	)
142	0	.map_err(\|e\| match e {
143	0	DecodeError::InvalidByte(offset, byte) => {
144	0	DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
145		}
146	0	DecodeError::InvalidLength => DecodeError::InvalidLength,
147	0	DecodeError::InvalidLastSymbol(offset, byte) => {
148	0	DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
149		}
150	0	DecodeError::InvalidPadding => DecodeError::InvalidPadding,
151	0	})
152	0	.map_err(\|e\| io::Error::new(io::ErrorKind::InvalidData, e))?;
153
154	0	self.total_b64_decoded += num_bytes;
155	0	self.b64_offset += num_bytes;
156	0	self.b64_len -= num_bytes;
157
158	0	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
159
160	0	Ok(decoded)
161	0	}
162
163		/// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
164		/// input from.
165		///
166		/// Because `DecoderReader` performs internal buffering, the state of the inner reader is
167		/// unspecified. This function is mainly provided because the inner reader type may provide
168		/// additional functionality beyond the `Read` implementation which may still be useful.
169	0	pub fn into_inner(self) -> R {
170	0	self.inner
171	0	}
172		}
173
174		impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
175		/// Decode input from the wrapped reader.
176		///
177		/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
178		/// written in `buf`.
179		///
180		/// Where possible, this function buffers base64 to minimize the number of read() calls to the
181		/// delegate reader.
182		///
183		/// # Errors
184		///
185		/// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
186		/// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
187	0	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
188	0	if buf.is_empty() {
189	0	return Ok(0);
190	0	}
191
192		// offset == BUF_SIZE when we copied it all last time
193	0	debug_assert!(self.b64_offset <= BUF_SIZE);
194	0	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
195	0	debug_assert!(if self.b64_offset == BUF_SIZE {
196	0	self.b64_len == 0
197		} else {
198	0	self.b64_len <= BUF_SIZE
199		});
200
201	0	debug_assert!(if self.decoded_len == 0 {
202		// can be = when we were able to copy the complete chunk
203	0	self.decoded_offset <= DECODED_CHUNK_SIZE
204		} else {
205	0	self.decoded_offset < DECODED_CHUNK_SIZE
206		});
207
208		// We shouldn't ever decode into here when we can't immediately write at least one byte into
209		// the provided buf, so the effective length should only be 3 momentarily between when we
210		// decode and when we copy into the target buffer.
211	0	debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
212	0	debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
213
214	0	if self.decoded_len > 0 {
215		// we have a few leftover decoded bytes; flush that rather than pull in more b64
216	0	self.flush_decoded_buf(buf)
217		} else {
218	0	let mut at_eof = false;
219	0	while self.b64_len < BASE64_CHUNK_SIZE {
220		// Work around lack of copy_within, which is only present in 1.37
221		// Copy any bytes we have to the start of the buffer.
222		// We know we have < 1 chunk, so we can use a tiny tmp buffer.
223	0	let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE];
224	0	memmove_buf[..self.b64_len].copy_from_slice(
225	0	&self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len],
226	0	);
227	0	self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]);
228	0	self.b64_offset = 0;
229
230		// then fill in more data
231	0	let read = self.read_from_delegate()?;
232	0	if read == 0 {
233		// we never pass in an empty buf, so 0 => we've hit EOF
234	0	at_eof = true;
235	0	break;
236	0	}
237		}
238
239	0	if self.b64_len == 0 {
240	0	debug_assert!(at_eof);
241		// we must be at EOF, and we have no data left to decode
242	0	return Ok(0);
243	0	};
244
245	0	debug_assert!(if at_eof {
246		// if we are at eof, we may not have a complete chunk
247	0	self.b64_len > 0
248		} else {
249		// otherwise, we must have at least one chunk
250	0	self.b64_len >= BASE64_CHUNK_SIZE
251		});
252
253	0	debug_assert_eq!(0, self.decoded_len);
254
255	0	if buf.len() < DECODED_CHUNK_SIZE {
256		// caller requested an annoyingly short read
257		// have to write to a tmp buf first to avoid double mutable borrow
258	0	let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
259	0	// if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
260	0	// to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
261	0	// tokens, not 1, since 1 token can't decode to 1 byte).
262	0	let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
263
264	0	let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
265	0	self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
266	0
267	0	self.decoded_offset = 0;
268	0	self.decoded_len = decoded;
269
270		// can be less than 3 on last block due to padding
271	0	debug_assert!(decoded <= 3);
272
273	0	self.flush_decoded_buf(buf)
274		} else {
275	0	let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
276	0	.checked_mul(BASE64_CHUNK_SIZE)
277	0	.expect("too many chunks");
278	0	debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
279
280	0	let b64_bytes_available_to_decode = if at_eof {
281	0	self.b64_len
282		} else {
283		// only use complete chunks
284	0	self.b64_len - self.b64_len % 4
285		};
286
287	0	let actual_decode_len = cmp::min(
288	0	b64_bytes_that_can_decode_into_buf,
289	0	b64_bytes_available_to_decode,
290	0	);
291	0	self.decode_to_buf(actual_decode_len, buf)
292		}
293		}
294	0	}
295		}