Coverage Report

Created: 2025-10-31 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/base64-0.22.1/src/read/decoder.rs
Line
Count
Source
1
use crate::{engine::Engine, DecodeError, DecodeSliceError, PAD_BYTE};
2
use std::{cmp, fmt, io};
3
4
// This should be large, but it has to fit on the stack.
5
pub(crate) const BUF_SIZE: usize = 1024;
6
7
// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
8
const BASE64_CHUNK_SIZE: usize = 4;
9
const DECODED_CHUNK_SIZE: usize = 3;
10
11
/// A `Read` implementation that decodes base64 data read from an underlying reader.
12
///
13
/// # Examples
14
///
15
/// ```
16
/// use std::io::Read;
17
/// use std::io::Cursor;
18
/// use base64::engine::general_purpose;
19
///
20
/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
21
/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
22
/// let mut decoder = base64::read::DecoderReader::new(
23
///     &mut wrapped_reader,
24
///     &general_purpose::STANDARD);
25
///
26
/// // handle errors as you normally would
27
/// let mut result = Vec::new();
28
/// decoder.read_to_end(&mut result).unwrap();
29
///
30
/// assert_eq!(b"asdf", &result[..]);
31
///
32
/// ```
33
pub struct DecoderReader<'e, E: Engine, R: io::Read> {
34
    engine: &'e E,
35
    /// Where b64 data is read from
36
    inner: R,
37
38
    /// Holds b64 data read from the delegate reader.
39
    b64_buffer: [u8; BUF_SIZE],
40
    /// The start of the pending buffered data in `b64_buffer`.
41
    b64_offset: usize,
42
    /// The amount of buffered b64 data after `b64_offset` in `b64_len`.
43
    b64_len: usize,
44
    /// Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
45
    /// decoded chunk in to, we have to be able to hang on to a few decoded bytes.
46
    /// Technically we only need to hold 2 bytes, but then we'd need a separate temporary buffer to
47
    /// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
48
    /// into here, which seems like a lot of complexity for 1 extra byte of storage.
49
    decoded_chunk_buffer: [u8; DECODED_CHUNK_SIZE],
50
    /// Index of start of decoded data in `decoded_chunk_buffer`
51
    decoded_offset: usize,
52
    /// Length of decoded data after `decoded_offset` in `decoded_chunk_buffer`
53
    decoded_len: usize,
54
    /// Input length consumed so far.
55
    /// Used to provide accurate offsets in errors
56
    input_consumed_len: usize,
57
    /// offset of previously seen padding, if any
58
    padding_offset: Option<usize>,
59
}
60
61
// exclude b64_buffer as it's uselessly large
62
impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
63
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
64
0
        f.debug_struct("DecoderReader")
65
0
            .field("b64_offset", &self.b64_offset)
66
0
            .field("b64_len", &self.b64_len)
67
0
            .field("decoded_chunk_buffer", &self.decoded_chunk_buffer)
68
0
            .field("decoded_offset", &self.decoded_offset)
69
0
            .field("decoded_len", &self.decoded_len)
70
0
            .field("input_consumed_len", &self.input_consumed_len)
71
0
            .field("padding_offset", &self.padding_offset)
72
0
            .finish()
73
0
    }
74
}
75
76
impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
77
    /// Create a new decoder that will read from the provided reader `r`.
78
0
    pub fn new(reader: R, engine: &'e E) -> Self {
79
0
        DecoderReader {
80
0
            engine,
81
0
            inner: reader,
82
0
            b64_buffer: [0; BUF_SIZE],
83
0
            b64_offset: 0,
84
0
            b64_len: 0,
85
0
            decoded_chunk_buffer: [0; DECODED_CHUNK_SIZE],
86
0
            decoded_offset: 0,
87
0
            decoded_len: 0,
88
0
            input_consumed_len: 0,
89
0
            padding_offset: None,
90
0
        }
91
0
    }
92
93
    /// Write as much as possible of the decoded buffer into the target buffer.
94
    /// Must only be called when there is something to write and space to write into.
95
    /// Returns a Result with the number of (decoded) bytes copied.
96
0
    fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
97
0
        debug_assert!(self.decoded_len > 0);
98
0
        debug_assert!(!buf.is_empty());
99
100
0
        let copy_len = cmp::min(self.decoded_len, buf.len());
101
0
        debug_assert!(copy_len > 0);
102
0
        debug_assert!(copy_len <= self.decoded_len);
103
104
0
        buf[..copy_len].copy_from_slice(
105
0
            &self.decoded_chunk_buffer[self.decoded_offset..self.decoded_offset + copy_len],
106
        );
107
108
0
        self.decoded_offset += copy_len;
109
0
        self.decoded_len -= copy_len;
110
111
0
        debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
112
113
0
        Ok(copy_len)
114
0
    }
115
116
    /// Read into the remaining space in the buffer after the current contents.
117
    /// Must only be called when there is space to read into in the buffer.
118
    /// Returns the number of bytes read.
119
0
    fn read_from_delegate(&mut self) -> io::Result<usize> {
120
0
        debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
121
122
0
        let read = self
123
0
            .inner
124
0
            .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
125
0
        self.b64_len += read;
126
127
0
        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
128
129
0
        Ok(read)
130
0
    }
131
132
    /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
133
    /// caller's responsibility to choose the number of b64 bytes to decode correctly.
134
    ///
135
    /// Returns a Result with the number of decoded bytes written to `buf`.
136
    ///
137
    /// # Panics
138
    ///
139
    /// panics if `buf` is too small
140
0
    fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> {
141
0
        debug_assert!(self.b64_len >= b64_len_to_decode);
142
0
        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
143
0
        debug_assert!(!buf.is_empty());
144
145
0
        let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode];
146
0
        let decode_metadata = self
147
0
            .engine
148
0
            .internal_decode(
149
0
                b64_to_decode,
150
0
                buf,
151
0
                self.engine.internal_decoded_len_estimate(b64_len_to_decode),
152
            )
153
0
            .map_err(|dse| match dse {
154
0
                DecodeSliceError::DecodeError(de) => {
155
0
                    match de {
156
0
                        DecodeError::InvalidByte(offset, byte) => {
157
0
                            match (byte, self.padding_offset) {
158
                                // if there was padding in a previous block of decoding that happened to
159
                                // be correct, and we now find more padding that happens to be incorrect,
160
                                // to be consistent with non-reader decodes, record the error at the first
161
                                // padding
162
0
                                (PAD_BYTE, Some(first_pad_offset)) => {
163
0
                                    DecodeError::InvalidByte(first_pad_offset, PAD_BYTE)
164
                                }
165
                                _ => {
166
0
                                    DecodeError::InvalidByte(self.input_consumed_len + offset, byte)
167
                                }
168
                            }
169
                        }
170
0
                        DecodeError::InvalidLength(len) => {
171
0
                            DecodeError::InvalidLength(self.input_consumed_len + len)
172
                        }
173
0
                        DecodeError::InvalidLastSymbol(offset, byte) => {
174
0
                            DecodeError::InvalidLastSymbol(self.input_consumed_len + offset, byte)
175
                        }
176
0
                        DecodeError::InvalidPadding => DecodeError::InvalidPadding,
177
                    }
178
                }
179
                DecodeSliceError::OutputSliceTooSmall => {
180
0
                    unreachable!("buf is sized correctly in calling code")
181
                }
182
0
            })
183
0
            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
184
185
0
        if let Some(offset) = self.padding_offset {
186
            // we've already seen padding
187
0
            if decode_metadata.decoded_len > 0 {
188
                // we read more after already finding padding; report error at first padding byte
189
0
                return Err(io::Error::new(
190
0
                    io::ErrorKind::InvalidData,
191
0
                    DecodeError::InvalidByte(offset, PAD_BYTE),
192
0
                ));
193
0
            }
194
0
        }
195
196
0
        self.padding_offset = self.padding_offset.or(decode_metadata
197
0
            .padding_offset
198
0
            .map(|offset| self.input_consumed_len + offset));
199
0
        self.input_consumed_len += b64_len_to_decode;
200
0
        self.b64_offset += b64_len_to_decode;
201
0
        self.b64_len -= b64_len_to_decode;
202
203
0
        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
204
205
0
        Ok(decode_metadata.decoded_len)
206
0
    }
207
208
    /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
209
    /// input from.
210
    ///
211
    /// Because `DecoderReader` performs internal buffering, the state of the inner reader is
212
    /// unspecified. This function is mainly provided because the inner reader type may provide
213
    /// additional functionality beyond the `Read` implementation which may still be useful.
214
0
    pub fn into_inner(self) -> R {
215
0
        self.inner
216
0
    }
217
}
218
219
impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
220
    /// Decode input from the wrapped reader.
221
    ///
222
    /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
223
    /// written in `buf`.
224
    ///
225
    /// Where possible, this function buffers base64 to minimize the number of read() calls to the
226
    /// delegate reader.
227
    ///
228
    /// # Errors
229
    ///
230
    /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
231
    /// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
232
0
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
233
0
        if buf.is_empty() {
234
0
            return Ok(0);
235
0
        }
236
237
        // offset == BUF_SIZE when we copied it all last time
238
0
        debug_assert!(self.b64_offset <= BUF_SIZE);
239
0
        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
240
0
        debug_assert!(if self.b64_offset == BUF_SIZE {
241
0
            self.b64_len == 0
242
        } else {
243
0
            self.b64_len <= BUF_SIZE
244
        });
245
246
0
        debug_assert!(if self.decoded_len == 0 {
247
            // can be = when we were able to copy the complete chunk
248
0
            self.decoded_offset <= DECODED_CHUNK_SIZE
249
        } else {
250
0
            self.decoded_offset < DECODED_CHUNK_SIZE
251
        });
252
253
        // We shouldn't ever decode into decoded_buffer when we can't immediately write at least one
254
        // byte into the provided buf, so the effective length should only be 3 momentarily between
255
        // when we decode and when we copy into the target buffer.
256
0
        debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
257
0
        debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
258
259
0
        if self.decoded_len > 0 {
260
            // we have a few leftover decoded bytes; flush that rather than pull in more b64
261
0
            self.flush_decoded_buf(buf)
262
        } else {
263
0
            let mut at_eof = false;
264
0
            while self.b64_len < BASE64_CHUNK_SIZE {
265
                // Copy any bytes we have to the start of the buffer.
266
0
                self.b64_buffer
267
0
                    .copy_within(self.b64_offset..self.b64_offset + self.b64_len, 0);
268
0
                self.b64_offset = 0;
269
270
                // then fill in more data
271
0
                let read = self.read_from_delegate()?;
272
0
                if read == 0 {
273
                    // we never read into an empty buf, so 0 => we've hit EOF
274
0
                    at_eof = true;
275
0
                    break;
276
0
                }
277
            }
278
279
0
            if self.b64_len == 0 {
280
0
                debug_assert!(at_eof);
281
                // we must be at EOF, and we have no data left to decode
282
0
                return Ok(0);
283
0
            };
284
285
0
            debug_assert!(if at_eof {
286
                // if we are at eof, we may not have a complete chunk
287
0
                self.b64_len > 0
288
            } else {
289
                // otherwise, we must have at least one chunk
290
0
                self.b64_len >= BASE64_CHUNK_SIZE
291
            });
292
293
0
            debug_assert_eq!(0, self.decoded_len);
294
295
0
            if buf.len() < DECODED_CHUNK_SIZE {
296
                // caller requested an annoyingly short read
297
                // have to write to a tmp buf first to avoid double mutable borrow
298
0
                let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
299
                // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
300
                // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
301
                // tokens, not 1, since 1 token can't decode to 1 byte).
302
0
                let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
303
304
0
                let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
305
0
                self.decoded_chunk_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
306
307
0
                self.decoded_offset = 0;
308
0
                self.decoded_len = decoded;
309
310
                // can be less than 3 on last block due to padding
311
0
                debug_assert!(decoded <= 3);
312
313
0
                self.flush_decoded_buf(buf)
314
            } else {
315
0
                let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
316
0
                    .checked_mul(BASE64_CHUNK_SIZE)
317
0
                    .expect("too many chunks");
318
0
                debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
319
320
0
                let b64_bytes_available_to_decode = if at_eof {
321
0
                    self.b64_len
322
                } else {
323
                    // only use complete chunks
324
0
                    self.b64_len - self.b64_len % 4
325
                };
326
327
0
                let actual_decode_len = cmp::min(
328
0
                    b64_bytes_that_can_decode_into_buf,
329
0
                    b64_bytes_available_to_decode,
330
                );
331
0
                self.decode_to_buf(actual_decode_len, buf)
332
            }
333
        }
334
0
    }
335
}