Coverage Report

Created: 2025-02-21 07:11

/rust/registry/src/index.crates.io-6f17d22bba15001f/base64-0.22.1/src/alphabet.rs
Line
Count
Source (jump to first uncovered line)
1
//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
2
3
use crate::PAD_BYTE;
4
use core::{convert, fmt};
5
#[cfg(any(feature = "std", test))]
6
use std::error;
7
8
const ALPHABET_SIZE: usize = 64;
9
10
/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
11
///
12
/// Common alphabets are provided as constants, and custom alphabets
13
/// can be made via `from_str` or the `TryFrom<str>` implementation.
14
///
15
/// # Examples
16
///
17
/// Building and using a custom Alphabet:
18
///
19
/// ```
20
/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
21
///
22
/// let engine = base64::engine::GeneralPurpose::new(
23
///     &custom,
24
///     base64::engine::general_purpose::PAD);
25
/// ```
26
///
27
/// Building a const:
28
///
29
/// ```
30
/// use base64::alphabet::Alphabet;
31
///
32
/// static CUSTOM: Alphabet = {
33
///     // Result::unwrap() isn't const yet, but panic!() is OK
34
///     match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
35
///         Ok(x) => x,
36
///         Err(_) => panic!("creation of alphabet failed"),
37
///     }
38
/// };
39
/// ```
40
///
41
/// Building lazily:
42
///
43
/// ```
44
/// use base64::{
45
///     alphabet::Alphabet,
46
///     engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
47
/// };
48
/// use once_cell::sync::Lazy;
49
///
50
/// static CUSTOM: Lazy<Alphabet> = Lazy::new(||
51
///     Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
52
/// );
53
/// ```
54
#[derive(Clone, Debug, Eq, PartialEq)]
55
pub struct Alphabet {
56
    pub(crate) symbols: [u8; ALPHABET_SIZE],
57
}
58
59
impl Alphabet {
60
    /// Performs no checks so that it can be const.
61
    /// Used only for known-valid strings.
62
0
    const fn from_str_unchecked(alphabet: &str) -> Self {
63
0
        let mut symbols = [0_u8; ALPHABET_SIZE];
64
0
        let source_bytes = alphabet.as_bytes();
65
0
66
0
        // a way to copy that's allowed in const fn
67
0
        let mut index = 0;
68
0
        while index < ALPHABET_SIZE {
69
0
            symbols[index] = source_bytes[index];
70
0
            index += 1;
71
0
        }
72
73
0
        Self { symbols }
74
0
    }
75
76
    /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
77
    ///
78
    /// The `=` byte is not allowed as it is used for padding.
79
0
    pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
80
0
        let bytes = alphabet.as_bytes();
81
0
        if bytes.len() != ALPHABET_SIZE {
82
0
            return Err(ParseAlphabetError::InvalidLength);
83
0
        }
84
0
85
0
        {
86
0
            let mut index = 0;
87
0
            while index < ALPHABET_SIZE {
88
0
                let byte = bytes[index];
89
0
90
0
                // must be ascii printable. 127 (DEL) is commonly considered printable
91
0
                // for some reason but clearly unsuitable for base64.
92
0
                if !(byte >= 32_u8 && byte <= 126_u8) {
93
0
                    return Err(ParseAlphabetError::UnprintableByte(byte));
94
0
                }
95
0
                // = is assumed to be padding, so cannot be used as a symbol
96
0
                if byte == PAD_BYTE {
97
0
                    return Err(ParseAlphabetError::ReservedByte(byte));
98
0
                }
99
0
100
0
                // Check for duplicates while staying within what const allows.
101
0
                // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
102
0
                // microsecond range.
103
0
104
0
                let mut probe_index = 0;
105
0
                while probe_index < ALPHABET_SIZE {
106
0
                    if probe_index == index {
107
0
                        probe_index += 1;
108
0
                        continue;
109
0
                    }
110
0
111
0
                    let probe_byte = bytes[probe_index];
112
0
113
0
                    if byte == probe_byte {
114
0
                        return Err(ParseAlphabetError::DuplicatedByte(byte));
115
0
                    }
116
0
117
0
                    probe_index += 1;
118
                }
119
120
0
                index += 1;
121
            }
122
        }
123
124
0
        Ok(Self::from_str_unchecked(alphabet))
125
0
    }
126
127
    /// Create a `&str` from the symbols in the `Alphabet`
128
0
    pub fn as_str(&self) -> &str {
129
0
        core::str::from_utf8(&self.symbols).unwrap()
130
0
    }
131
}
132
133
impl convert::TryFrom<&str> for Alphabet {
134
    type Error = ParseAlphabetError;
135
136
0
    fn try_from(value: &str) -> Result<Self, Self::Error> {
137
0
        Self::new(value)
138
0
    }
139
}
140
141
/// Possible errors when constructing an [Alphabet] from a `str`.
142
#[derive(Debug, Eq, PartialEq)]
143
pub enum ParseAlphabetError {
144
    /// Alphabets must be 64 ASCII bytes
145
    InvalidLength,
146
    /// All bytes must be unique
147
    DuplicatedByte(u8),
148
    /// All bytes must be printable (in the range `[32, 126]`).
149
    UnprintableByte(u8),
150
    /// `=` cannot be used
151
    ReservedByte(u8),
152
}
153
154
impl fmt::Display for ParseAlphabetError {
155
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156
0
        match self {
157
0
            Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
158
0
            Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
159
0
            Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
160
0
            Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
161
        }
162
0
    }
163
}
164
165
#[cfg(any(feature = "std", test))]
166
impl error::Error for ParseAlphabetError {}
167
168
/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
169
///
170
/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
171
pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
172
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
173
);
174
175
/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
176
///
177
/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
178
pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
179
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
180
);
181
182
/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
183
///
184
/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
185
pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
186
    "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
187
);
188
189
/// The bcrypt alphabet.
190
pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
191
    "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
192
);
193
194
/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
195
///
196
/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
197
pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
198
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
199
);
200
201
/// The alphabet used in BinHex 4.0 files.
202
///
203
/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
204
pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
205
    "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr",
206
);
207
208
#[cfg(test)]
209
mod tests {
210
    use crate::alphabet::*;
211
    use core::convert::TryFrom as _;
212
213
    #[test]
214
    fn detects_duplicate_start() {
215
        assert_eq!(
216
            ParseAlphabetError::DuplicatedByte(b'A'),
217
            Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
218
                .unwrap_err()
219
        );
220
    }
221
222
    #[test]
223
    fn detects_duplicate_end() {
224
        assert_eq!(
225
            ParseAlphabetError::DuplicatedByte(b'/'),
226
            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
227
                .unwrap_err()
228
        );
229
    }
230
231
    #[test]
232
    fn detects_duplicate_middle() {
233
        assert_eq!(
234
            ParseAlphabetError::DuplicatedByte(b'Z'),
235
            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
236
                .unwrap_err()
237
        );
238
    }
239
240
    #[test]
241
    fn detects_length() {
242
        assert_eq!(
243
            ParseAlphabetError::InvalidLength,
244
            Alphabet::new(
245
                "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
246
            )
247
            .unwrap_err()
248
        );
249
    }
250
251
    #[test]
252
    fn detects_padding() {
253
        assert_eq!(
254
            ParseAlphabetError::ReservedByte(b'='),
255
            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
256
                .unwrap_err()
257
        );
258
    }
259
260
    #[test]
261
    fn detects_unprintable() {
262
        // form feed
263
        assert_eq!(
264
            ParseAlphabetError::UnprintableByte(0xc),
265
            Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
266
                .unwrap_err()
267
        );
268
    }
269
270
    #[test]
271
    fn same_as_unchecked() {
272
        assert_eq!(
273
            STANDARD,
274
            Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
275
                .unwrap()
276
        );
277
    }
278
279
    #[test]
280
    fn str_same_as_input() {
281
        let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
282
        let a = Alphabet::try_from(alphabet).unwrap();
283
        assert_eq!(alphabet, a.as_str())
284
    }
285
}