Coverage Report

Created: 2023-04-25 07:07

/rust/registry/src/index.crates.io-6f17d22bba15001f/base64-0.21.0/src/read/decoder.rs
Line
Count
Source (jump to first uncovered line)
1
use crate::{engine::Engine, DecodeError};
2
use std::{cmp, fmt, io};
3
4
// This should be large, but it has to fit on the stack.
5
pub(crate) const BUF_SIZE: usize = 1024;
6
7
// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
8
const BASE64_CHUNK_SIZE: usize = 4;
9
const DECODED_CHUNK_SIZE: usize = 3;
10
11
/// A `Read` implementation that decodes base64 data read from an underlying reader.
12
///
13
/// # Examples
14
///
15
/// ```
16
/// use std::io::Read;
17
/// use std::io::Cursor;
18
/// use base64::engine::general_purpose;
19
///
20
/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
21
/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
22
/// let mut decoder = base64::read::DecoderReader::new(
23
///     &mut wrapped_reader,
24
///     &general_purpose::STANDARD);
25
///
26
/// // handle errors as you normally would
27
/// let mut result = Vec::new();
28
/// decoder.read_to_end(&mut result).unwrap();
29
///
30
/// assert_eq!(b"asdf", &result[..]);
31
///
32
/// ```
33
pub struct DecoderReader<'e, E: Engine, R: io::Read> {
34
    engine: &'e E,
35
    /// Where b64 data is read from
36
    inner: R,
37
38
    // Holds b64 data read from the delegate reader.
39
    b64_buffer: [u8; BUF_SIZE],
40
    // The start of the pending buffered data in b64_buffer.
41
    b64_offset: usize,
42
    // The amount of buffered b64 data.
43
    b64_len: usize,
44
    // Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
45
    // decoded chunk in to, we have to be able to hang on to a few decoded bytes.
46
    // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
47
    // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
48
    // into here, which seems like a lot of complexity for 1 extra byte of storage.
49
    decoded_buffer: [u8; 3],
50
    // index of start of decoded data
51
    decoded_offset: usize,
52
    // length of decoded data
53
    decoded_len: usize,
54
    // used to provide accurate offsets in errors
55
    total_b64_decoded: usize,
56
}
57
58
impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
59
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
60
0
        f.debug_struct("DecoderReader")
61
0
            .field("b64_offset", &self.b64_offset)
62
0
            .field("b64_len", &self.b64_len)
63
0
            .field("decoded_buffer", &self.decoded_buffer)
64
0
            .field("decoded_offset", &self.decoded_offset)
65
0
            .field("decoded_len", &self.decoded_len)
66
0
            .field("total_b64_decoded", &self.total_b64_decoded)
67
0
            .finish()
68
0
    }
69
}
70
71
impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
72
    /// Create a new decoder that will read from the provided reader `r`.
73
0
    pub fn new(reader: R, engine: &'e E) -> Self {
74
0
        DecoderReader {
75
0
            engine,
76
0
            inner: reader,
77
0
            b64_buffer: [0; BUF_SIZE],
78
0
            b64_offset: 0,
79
0
            b64_len: 0,
80
0
            decoded_buffer: [0; DECODED_CHUNK_SIZE],
81
0
            decoded_offset: 0,
82
0
            decoded_len: 0,
83
0
            total_b64_decoded: 0,
84
0
        }
85
0
    }
86
87
    /// Write as much as possible of the decoded buffer into the target buffer.
88
    /// Must only be called when there is something to write and space to write into.
89
    /// Returns a Result with the number of (decoded) bytes copied.
90
0
    fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
91
0
        debug_assert!(self.decoded_len > 0);
92
0
        debug_assert!(!buf.is_empty());
93
94
0
        let copy_len = cmp::min(self.decoded_len, buf.len());
95
0
        debug_assert!(copy_len > 0);
96
0
        debug_assert!(copy_len <= self.decoded_len);
97
98
0
        buf[..copy_len].copy_from_slice(
99
0
            &self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len],
100
0
        );
101
0
102
0
        self.decoded_offset += copy_len;
103
0
        self.decoded_len -= copy_len;
104
105
0
        debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
106
107
0
        Ok(copy_len)
108
0
    }
109
110
    /// Read into the remaining space in the buffer after the current contents.
111
    /// Must only be called when there is space to read into in the buffer.
112
    /// Returns the number of bytes read.
113
0
    fn read_from_delegate(&mut self) -> io::Result<usize> {
114
0
        debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
115
116
0
        let read = self
117
0
            .inner
118
0
            .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
119
0
        self.b64_len += read;
120
121
0
        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
122
123
0
        Ok(read)
124
0
    }
125
126
    /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
127
    /// caller's responsibility to choose the number of b64 bytes to decode correctly.
128
    ///
129
    /// Returns a Result with the number of decoded bytes written to `buf`.
130
0
    fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> {
131
0
        debug_assert!(self.b64_len >= num_bytes);
132
0
        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
133
0
        debug_assert!(!buf.is_empty());
134
135
0
        let decoded = self
136
0
            .engine
137
0
            .internal_decode(
138
0
                &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
139
0
                buf,
140
0
                self.engine.internal_decoded_len_estimate(num_bytes),
141
0
            )
142
0
            .map_err(|e| match e {
143
0
                DecodeError::InvalidByte(offset, byte) => {
144
0
                    DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
145
                }
146
0
                DecodeError::InvalidLength => DecodeError::InvalidLength,
147
0
                DecodeError::InvalidLastSymbol(offset, byte) => {
148
0
                    DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
149
                }
150
0
                DecodeError::InvalidPadding => DecodeError::InvalidPadding,
151
0
            })
152
0
            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
153
154
0
        self.total_b64_decoded += num_bytes;
155
0
        self.b64_offset += num_bytes;
156
0
        self.b64_len -= num_bytes;
157
158
0
        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
159
160
0
        Ok(decoded)
161
0
    }
162
163
    /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
164
    /// input from.
165
    ///
166
    /// Because `DecoderReader` performs internal buffering, the state of the inner reader is
167
    /// unspecified. This function is mainly provided because the inner reader type may provide
168
    /// additional functionality beyond the `Read` implementation which may still be useful.
169
0
    pub fn into_inner(self) -> R {
170
0
        self.inner
171
0
    }
172
}
173
174
impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
175
    /// Decode input from the wrapped reader.
176
    ///
177
    /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
178
    /// written in `buf`.
179
    ///
180
    /// Where possible, this function buffers base64 to minimize the number of read() calls to the
181
    /// delegate reader.
182
    ///
183
    /// # Errors
184
    ///
185
    /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
186
    /// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
187
0
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
188
0
        if buf.is_empty() {
189
0
            return Ok(0);
190
0
        }
191
192
        // offset == BUF_SIZE when we copied it all last time
193
0
        debug_assert!(self.b64_offset <= BUF_SIZE);
194
0
        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
195
0
        debug_assert!(if self.b64_offset == BUF_SIZE {
196
0
            self.b64_len == 0
197
        } else {
198
0
            self.b64_len <= BUF_SIZE
199
        });
200
201
0
        debug_assert!(if self.decoded_len == 0 {
202
            // can be = when we were able to copy the complete chunk
203
0
            self.decoded_offset <= DECODED_CHUNK_SIZE
204
        } else {
205
0
            self.decoded_offset < DECODED_CHUNK_SIZE
206
        });
207
208
        // We shouldn't ever decode into here when we can't immediately write at least one byte into
209
        // the provided buf, so the effective length should only be 3 momentarily between when we
210
        // decode and when we copy into the target buffer.
211
0
        debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
212
0
        debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
213
214
0
        if self.decoded_len > 0 {
215
            // we have a few leftover decoded bytes; flush that rather than pull in more b64
216
0
            self.flush_decoded_buf(buf)
217
        } else {
218
0
            let mut at_eof = false;
219
0
            while self.b64_len < BASE64_CHUNK_SIZE {
220
                // Work around lack of copy_within, which is only present in 1.37
221
                // Copy any bytes we have to the start of the buffer.
222
                // We know we have < 1 chunk, so we can use a tiny tmp buffer.
223
0
                let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE];
224
0
                memmove_buf[..self.b64_len].copy_from_slice(
225
0
                    &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len],
226
0
                );
227
0
                self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]);
228
0
                self.b64_offset = 0;
229
230
                // then fill in more data
231
0
                let read = self.read_from_delegate()?;
232
0
                if read == 0 {
233
                    // we never pass in an empty buf, so 0 => we've hit EOF
234
0
                    at_eof = true;
235
0
                    break;
236
0
                }
237
            }
238
239
0
            if self.b64_len == 0 {
240
0
                debug_assert!(at_eof);
241
                // we must be at EOF, and we have no data left to decode
242
0
                return Ok(0);
243
0
            };
244
245
0
            debug_assert!(if at_eof {
246
                // if we are at eof, we may not have a complete chunk
247
0
                self.b64_len > 0
248
            } else {
249
                // otherwise, we must have at least one chunk
250
0
                self.b64_len >= BASE64_CHUNK_SIZE
251
            });
252
253
0
            debug_assert_eq!(0, self.decoded_len);
254
255
0
            if buf.len() < DECODED_CHUNK_SIZE {
256
                // caller requested an annoyingly short read
257
                // have to write to a tmp buf first to avoid double mutable borrow
258
0
                let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
259
0
                // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
260
0
                // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
261
0
                // tokens, not 1, since 1 token can't decode to 1 byte).
262
0
                let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
263
264
0
                let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
265
0
                self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
266
0
267
0
                self.decoded_offset = 0;
268
0
                self.decoded_len = decoded;
269
270
                // can be less than 3 on last block due to padding
271
0
                debug_assert!(decoded <= 3);
272
273
0
                self.flush_decoded_buf(buf)
274
            } else {
275
0
                let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
276
0
                    .checked_mul(BASE64_CHUNK_SIZE)
277
0
                    .expect("too many chunks");
278
0
                debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
279
280
0
                let b64_bytes_available_to_decode = if at_eof {
281
0
                    self.b64_len
282
                } else {
283
                    // only use complete chunks
284
0
                    self.b64_len - self.b64_len % 4
285
                };
286
287
0
                let actual_decode_len = cmp::min(
288
0
                    b64_bytes_that_can_decode_into_buf,
289
0
                    b64_bytes_available_to_decode,
290
0
                );
291
0
                self.decode_to_buf(actual_decode_len, buf)
292
            }
293
        }
294
0
    }
295
}