/rust/registry/src/index.crates.io-6f17d22bba15001f/base64-0.22.1/src/alphabet.rs

Source (jump to first uncovered line)
//! Provides [Alphabet] and constants for alphabets commonly used in the wild.

use crate::PAD_BYTE;
use core::{convert, fmt};
#[cfg(any(feature = "std", test))]
use std::error;

const ALPHABET_SIZE: usize = 64;

/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
///
/// Common alphabets are provided as constants, and custom alphabets
/// can be made via `from_str` or the `TryFrom<str>` implementation.
///
/// # Examples
///
/// Building and using a custom Alphabet:
///
/// ```
/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
///
/// let engine = base64::engine::GeneralPurpose::new(
///     &custom,
///     base64::engine::general_purpose::PAD);
/// ```
///
/// Building a const:
///
/// ```
/// use base64::alphabet::Alphabet;
///
/// static CUSTOM: Alphabet = {
///     // Result::unwrap() isn't const yet, but panic!() is OK
///     match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
///         Ok(x) => x,
///         Err(_) => panic!("creation of alphabet failed"),
///     }
/// };
/// ```
///
/// Building lazily:
///
/// ```
/// use base64::{
///     alphabet::Alphabet,
///     engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
/// };
/// use once_cell::sync::Lazy;
///
/// static CUSTOM: Lazy<Alphabet> = Lazy::new(||
///     Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
/// );
/// ```
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Alphabet {
    pub(crate) symbols: [u8; ALPHABET_SIZE],
}

impl Alphabet {
    /// Performs no checks so that it can be const.
    /// Used only for known-valid strings.
    const fn from_str_unchecked(alphabet: &str) -> Self {
        let mut symbols = [0_u8; ALPHABET_SIZE];
        let source_bytes = alphabet.as_bytes();

        // a way to copy that's allowed in const fn
        let mut index = 0;
        while index < ALPHABET_SIZE {
            symbols[index] = source_bytes[index];
            index += 1;
        }

        Self { symbols }
    }

    /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
    ///
    /// The `=` byte is not allowed as it is used for padding.
    pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
        let bytes = alphabet.as_bytes();
        if bytes.len() != ALPHABET_SIZE {
            return Err(ParseAlphabetError::InvalidLength);
        }

        {
            let mut index = 0;
            while index < ALPHABET_SIZE {
                let byte = bytes[index];

                // must be ascii printable. 127 (DEL) is commonly considered printable
                // for some reason but clearly unsuitable for base64.
                if !(byte >= 32_u8 && byte <= 126_u8) {
                    return Err(ParseAlphabetError::UnprintableByte(byte));
                }
                // = is assumed to be padding, so cannot be used as a symbol
                if byte == PAD_BYTE {
                    return Err(ParseAlphabetError::ReservedByte(byte));
                }

                // Check for duplicates while staying within what const allows.
                // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
                // microsecond range.

                let mut probe_index = 0;
                while probe_index < ALPHABET_SIZE {
                    if probe_index == index {
                        probe_index += 1;
                        continue;
                    }

                    let probe_byte = bytes[probe_index];

                    if byte == probe_byte {
                        return Err(ParseAlphabetError::DuplicatedByte(byte));
                    }

                    probe_index += 1;
                }

                index += 1;
            }
        }

        Ok(Self::from_str_unchecked(alphabet))
    }

    /// Create a `&str` from the symbols in the `Alphabet`
    pub fn as_str(&self) -> &str {
        core::str::from_utf8(&self.symbols).unwrap()
    }
}

impl convert::TryFrom<&str> for Alphabet {
    type Error = ParseAlphabetError;

    fn try_from(value: &str) -> Result<Self, Self::Error> {
        Self::new(value)
    }
}

/// Possible errors when constructing an [Alphabet] from a `str`.
#[derive(Debug, Eq, PartialEq)]
pub enum ParseAlphabetError {
    /// Alphabets must be 64 ASCII bytes
    InvalidLength,
    /// All bytes must be unique
    DuplicatedByte(u8),
    /// All bytes must be printable (in the range `[32, 126]`).
    UnprintableByte(u8),
    /// `=` cannot be used
    ReservedByte(u8),
}

impl fmt::Display for ParseAlphabetError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
            Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
            Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
            Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
        }
    }
}

#[cfg(any(feature = "std", test))]
impl error::Error for ParseAlphabetError {}

/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
///
/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
);

/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
///
/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
);

/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
///
/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
    "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
);

/// The bcrypt alphabet.
pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
    "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
);

/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
///
/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
);

/// The alphabet used in BinHex 4.0 files.
///
/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
    "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr",
);

#[cfg(test)]
mod tests {
    use crate::alphabet::*;
    use core::convert::TryFrom as _;

    #[test]
    fn detects_duplicate_start() {
        assert_eq!(
            ParseAlphabetError::DuplicatedByte(b'A'),
            Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
                .unwrap_err()
        );
    }

    #[test]
    fn detects_duplicate_end() {
        assert_eq!(
            ParseAlphabetError::DuplicatedByte(b'/'),
            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
                .unwrap_err()
        );
    }

    #[test]
    fn detects_duplicate_middle() {
        assert_eq!(
            ParseAlphabetError::DuplicatedByte(b'Z'),
            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
                .unwrap_err()
        );
    }

    #[test]
    fn detects_length() {
        assert_eq!(
            ParseAlphabetError::InvalidLength,
            Alphabet::new(
                "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
            )
            .unwrap_err()
        );
    }

    #[test]
    fn detects_padding() {
        assert_eq!(
            ParseAlphabetError::ReservedByte(b'='),
            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
                .unwrap_err()
        );
    }

    #[test]
    fn detects_unprintable() {
        // form feed
        assert_eq!(
            ParseAlphabetError::UnprintableByte(0xc),
            Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
                .unwrap_err()
        );
    }

    #[test]
    fn same_as_unchecked() {
        assert_eq!(
            STANDARD,
            Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
                .unwrap()
        );
    }

    #[test]
    fn str_same_as_input() {
        let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
        let a = Alphabet::try_from(alphabet).unwrap();
        assert_eq!(alphabet, a.as_str())
    }
}

Coverage Report

Created: 2025-02-21 07:11

Line	Count	Source (jump to first uncovered line)
1		//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
2
3		use crate::PAD_BYTE;
4		use core::{convert, fmt};
5		#[cfg(any(feature = "std", test))]
6		use std::error;
7
8		const ALPHABET_SIZE: usize = 64;
9
10		/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
11		///
12		/// Common alphabets are provided as constants, and custom alphabets
13		/// can be made via `from_str` or the `TryFrom<str>` implementation.
14		///
15		/// # Examples
16		///
17		/// Building and using a custom Alphabet:
18		///
19		/// ```
20		/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
21		///
22		/// let engine = base64::engine::GeneralPurpose::new(
23		/// &custom,
24		/// base64::engine::general_purpose::PAD);
25		/// ```
26		///
27		/// Building a const:
28		///
29		/// ```
30		/// use base64::alphabet::Alphabet;
31		///
32		/// static CUSTOM: Alphabet = {
33		/// // Result::unwrap() isn't const yet, but panic!() is OK
34		/// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
35		/// Ok(x) => x,
36		/// Err(_) => panic!("creation of alphabet failed"),
37		/// }
38		/// };
39		/// ```
40		///
41		/// Building lazily:
42		///
43		/// ```
44		/// use base64::{
45		/// alphabet::Alphabet,
46		/// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
47		/// };
48		/// use once_cell::sync::Lazy;
49		///
50		/// static CUSTOM: Lazy<Alphabet> = Lazy::new(\|\|
51		/// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
52		/// );
53		/// ```
54		#[derive(Clone, Debug, Eq, PartialEq)]
55		pub struct Alphabet {
56		pub(crate) symbols: [u8; ALPHABET_SIZE],
57		}
58
59		impl Alphabet {
60		/// Performs no checks so that it can be const.
61		/// Used only for known-valid strings.
62	0	const fn from_str_unchecked(alphabet: &str) -> Self {
63	0	let mut symbols = [0_u8; ALPHABET_SIZE];
64	0	let source_bytes = alphabet.as_bytes();
65	0
66	0	// a way to copy that's allowed in const fn
67	0	let mut index = 0;
68	0	while index < ALPHABET_SIZE {
69	0	symbols[index] = source_bytes[index];
70	0	index += 1;
71	0	}
72
73	0	Self { symbols }
74	0	}
75
76		/// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
77		///
78		/// The `=` byte is not allowed as it is used for padding.
79	0	pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
80	0	let bytes = alphabet.as_bytes();
81	0	if bytes.len() != ALPHABET_SIZE {
82	0	return Err(ParseAlphabetError::InvalidLength);
83	0	}
84	0
85	0	{
86	0	let mut index = 0;
87	0	while index < ALPHABET_SIZE {
88	0	let byte = bytes[index];
89	0
90	0	// must be ascii printable. 127 (DEL) is commonly considered printable
91	0	// for some reason but clearly unsuitable for base64.
92	0	if !(byte >= 32_u8 && byte <= 126_u8) {
93	0	return Err(ParseAlphabetError::UnprintableByte(byte));
94	0	}
95	0	// = is assumed to be padding, so cannot be used as a symbol
96	0	if byte == PAD_BYTE {
97	0	return Err(ParseAlphabetError::ReservedByte(byte));
98	0	}
99	0
100	0	// Check for duplicates while staying within what const allows.
101	0	// It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
102	0	// microsecond range.
103	0
104	0	let mut probe_index = 0;
105	0	while probe_index < ALPHABET_SIZE {
106	0	if probe_index == index {
107	0	probe_index += 1;
108	0	continue;
109	0	}
110	0
111	0	let probe_byte = bytes[probe_index];
112	0
113	0	if byte == probe_byte {
114	0	return Err(ParseAlphabetError::DuplicatedByte(byte));
115	0	}
116	0
117	0	probe_index += 1;
118		}
119
120	0	index += 1;
121		}
122		}
123
124	0	Ok(Self::from_str_unchecked(alphabet))
125	0	}
126
127		/// Create a `&str` from the symbols in the `Alphabet`
128	0	pub fn as_str(&self) -> &str {
129	0	core::str::from_utf8(&self.symbols).unwrap()
130	0	}
131		}
132
133		impl convert::TryFrom<&str> for Alphabet {
134		type Error = ParseAlphabetError;
135
136	0	fn try_from(value: &str) -> Result<Self, Self::Error> {
137	0	Self::new(value)
138	0	}
139		}
140
141		/// Possible errors when constructing an [Alphabet] from a `str`.
142		#[derive(Debug, Eq, PartialEq)]
143		pub enum ParseAlphabetError {
144		/// Alphabets must be 64 ASCII bytes
145		InvalidLength,
146		/// All bytes must be unique
147		DuplicatedByte(u8),
148		/// All bytes must be printable (in the range `[32, 126]`).
149		UnprintableByte(u8),
150		/// `=` cannot be used
151		ReservedByte(u8),
152		}
153
154		impl fmt::Display for ParseAlphabetError {
155	0	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156	0	match self {
157	0	Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
158	0	Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
159	0	Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
160	0	Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
161		}
162	0	}
163		}
164
165		#[cfg(any(feature = "std", test))]
166		impl error::Error for ParseAlphabetError {}
167
168		/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
169		///
170		/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
171		pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
172		"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
173		);
174
175		/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
176		///
177		/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
178		pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
179		"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
180		);
181
182		/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
183		///
184		/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
185		pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
186		"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
187		);
188
189		/// The bcrypt alphabet.
190		pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
191		"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
192		);
193
194		/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
195		///
196		/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
197		pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
198		"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
199		);
200
201		/// The alphabet used in BinHex 4.0 files.
202		///
203		/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
204		pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
205		"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr",
206		);
207
208		#[cfg(test)]
209		mod tests {
210		use crate::alphabet::*;
211		use core::convert::TryFrom as _;
212
213		#[test]
214		fn detects_duplicate_start() {
215		assert_eq!(
216		ParseAlphabetError::DuplicatedByte(b'A'),
217		Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
218		.unwrap_err()
219		);
220		}
221
222		#[test]
223		fn detects_duplicate_end() {
224		assert_eq!(
225		ParseAlphabetError::DuplicatedByte(b'/'),
226		Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
227		.unwrap_err()
228		);
229		}
230
231		#[test]
232		fn detects_duplicate_middle() {
233		assert_eq!(
234		ParseAlphabetError::DuplicatedByte(b'Z'),
235		Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
236		.unwrap_err()
237		);
238		}
239
240		#[test]
241		fn detects_length() {
242		assert_eq!(
243		ParseAlphabetError::InvalidLength,
244		Alphabet::new(
245		"xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
246		)
247		.unwrap_err()
248		);
249		}
250
251		#[test]
252		fn detects_padding() {
253		assert_eq!(
254		ParseAlphabetError::ReservedByte(b'='),
255		Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
256		.unwrap_err()
257		);
258		}
259
260		#[test]
261		fn detects_unprintable() {
262		// form feed
263		assert_eq!(
264		ParseAlphabetError::UnprintableByte(0xc),
265		Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
266		.unwrap_err()
267		);
268		}
269
270		#[test]
271		fn same_as_unchecked() {
272		assert_eq!(
273		STANDARD,
274		Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
275		.unwrap()
276		);
277		}
278
279		#[test]
280		fn str_same_as_input() {
281		let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
282		let a = Alphabet::try_from(alphabet).unwrap();
283		assert_eq!(alphabet, a.as_str())
284		}
285		}