/rust/registry/src/index.crates.io-6f17d22bba15001f/base64-0.22.1/src/alphabet.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! Provides [Alphabet] and constants for alphabets commonly used in the wild. |
2 | | |
3 | | use crate::PAD_BYTE; |
4 | | use core::{convert, fmt}; |
5 | | #[cfg(any(feature = "std", test))] |
6 | | use std::error; |
7 | | |
8 | | const ALPHABET_SIZE: usize = 64; |
9 | | |
10 | | /// An alphabet defines the 64 ASCII characters (symbols) used for base64. |
11 | | /// |
12 | | /// Common alphabets are provided as constants, and custom alphabets |
13 | | /// can be made via `from_str` or the `TryFrom<str>` implementation. |
14 | | /// |
15 | | /// # Examples |
16 | | /// |
17 | | /// Building and using a custom Alphabet: |
18 | | /// |
19 | | /// ``` |
20 | | /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap(); |
21 | | /// |
22 | | /// let engine = base64::engine::GeneralPurpose::new( |
23 | | /// &custom, |
24 | | /// base64::engine::general_purpose::PAD); |
25 | | /// ``` |
26 | | /// |
27 | | /// Building a const: |
28 | | /// |
29 | | /// ``` |
30 | | /// use base64::alphabet::Alphabet; |
31 | | /// |
32 | | /// static CUSTOM: Alphabet = { |
33 | | /// // Result::unwrap() isn't const yet, but panic!() is OK |
34 | | /// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") { |
35 | | /// Ok(x) => x, |
36 | | /// Err(_) => panic!("creation of alphabet failed"), |
37 | | /// } |
38 | | /// }; |
39 | | /// ``` |
40 | | /// |
41 | | /// Building lazily: |
42 | | /// |
43 | | /// ``` |
44 | | /// use base64::{ |
45 | | /// alphabet::Alphabet, |
46 | | /// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig}, |
47 | | /// }; |
48 | | /// use once_cell::sync::Lazy; |
49 | | /// |
50 | | /// static CUSTOM: Lazy<Alphabet> = Lazy::new(|| |
51 | | /// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap() |
52 | | /// ); |
53 | | /// ``` |
54 | | #[derive(Clone, Debug, Eq, PartialEq)] |
55 | | pub struct Alphabet { |
56 | | pub(crate) symbols: [u8; ALPHABET_SIZE], |
57 | | } |
58 | | |
59 | | impl Alphabet { |
60 | | /// Performs no checks so that it can be const. |
61 | | /// Used only for known-valid strings. |
62 | 0 | const fn from_str_unchecked(alphabet: &str) -> Self { |
63 | 0 | let mut symbols = [0_u8; ALPHABET_SIZE]; |
64 | 0 | let source_bytes = alphabet.as_bytes(); |
65 | 0 |
|
66 | 0 | // a way to copy that's allowed in const fn |
67 | 0 | let mut index = 0; |
68 | 0 | while index < ALPHABET_SIZE { |
69 | 0 | symbols[index] = source_bytes[index]; |
70 | 0 | index += 1; |
71 | 0 | } |
72 | | |
73 | 0 | Self { symbols } |
74 | 0 | } |
75 | | |
76 | | /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes. |
77 | | /// |
78 | | /// The `=` byte is not allowed as it is used for padding. |
79 | 0 | pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> { |
80 | 0 | let bytes = alphabet.as_bytes(); |
81 | 0 | if bytes.len() != ALPHABET_SIZE { |
82 | 0 | return Err(ParseAlphabetError::InvalidLength); |
83 | 0 | } |
84 | 0 |
|
85 | 0 | { |
86 | 0 | let mut index = 0; |
87 | 0 | while index < ALPHABET_SIZE { |
88 | 0 | let byte = bytes[index]; |
89 | 0 |
|
90 | 0 | // must be ascii printable. 127 (DEL) is commonly considered printable |
91 | 0 | // for some reason but clearly unsuitable for base64. |
92 | 0 | if !(byte >= 32_u8 && byte <= 126_u8) { |
93 | 0 | return Err(ParseAlphabetError::UnprintableByte(byte)); |
94 | 0 | } |
95 | 0 | // = is assumed to be padding, so cannot be used as a symbol |
96 | 0 | if byte == PAD_BYTE { |
97 | 0 | return Err(ParseAlphabetError::ReservedByte(byte)); |
98 | 0 | } |
99 | 0 |
|
100 | 0 | // Check for duplicates while staying within what const allows. |
101 | 0 | // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit |
102 | 0 | // microsecond range. |
103 | 0 |
|
104 | 0 | let mut probe_index = 0; |
105 | 0 | while probe_index < ALPHABET_SIZE { |
106 | 0 | if probe_index == index { |
107 | 0 | probe_index += 1; |
108 | 0 | continue; |
109 | 0 | } |
110 | 0 |
|
111 | 0 | let probe_byte = bytes[probe_index]; |
112 | 0 |
|
113 | 0 | if byte == probe_byte { |
114 | 0 | return Err(ParseAlphabetError::DuplicatedByte(byte)); |
115 | 0 | } |
116 | 0 |
|
117 | 0 | probe_index += 1; |
118 | | } |
119 | | |
120 | 0 | index += 1; |
121 | | } |
122 | | } |
123 | | |
124 | 0 | Ok(Self::from_str_unchecked(alphabet)) |
125 | 0 | } |
126 | | |
127 | | /// Create a `&str` from the symbols in the `Alphabet` |
128 | 0 | pub fn as_str(&self) -> &str { |
129 | 0 | core::str::from_utf8(&self.symbols).unwrap() |
130 | 0 | } |
131 | | } |
132 | | |
133 | | impl convert::TryFrom<&str> for Alphabet { |
134 | | type Error = ParseAlphabetError; |
135 | | |
136 | 0 | fn try_from(value: &str) -> Result<Self, Self::Error> { |
137 | 0 | Self::new(value) |
138 | 0 | } |
139 | | } |
140 | | |
141 | | /// Possible errors when constructing an [Alphabet] from a `str`. |
142 | | #[derive(Debug, Eq, PartialEq)] |
143 | | pub enum ParseAlphabetError { |
144 | | /// Alphabets must be 64 ASCII bytes |
145 | | InvalidLength, |
146 | | /// All bytes must be unique |
147 | | DuplicatedByte(u8), |
148 | | /// All bytes must be printable (in the range `[32, 126]`). |
149 | | UnprintableByte(u8), |
150 | | /// `=` cannot be used |
151 | | ReservedByte(u8), |
152 | | } |
153 | | |
154 | | impl fmt::Display for ParseAlphabetError { |
155 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
156 | 0 | match self { |
157 | 0 | Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"), |
158 | 0 | Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b), |
159 | 0 | Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b), |
160 | 0 | Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b), |
161 | | } |
162 | 0 | } |
163 | | } |
164 | | |
165 | | #[cfg(any(feature = "std", test))] |
166 | | impl error::Error for ParseAlphabetError {} |
167 | | |
168 | | /// The standard alphabet (with `+` and `/`) specified in [RFC 4648][]. |
169 | | /// |
170 | | /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4 |
171 | | pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( |
172 | | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", |
173 | | ); |
174 | | |
175 | | /// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][]. |
176 | | /// |
177 | | /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5 |
178 | | pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( |
179 | | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", |
180 | | ); |
181 | | |
182 | | /// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters). |
183 | | /// |
184 | | /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. |
185 | | pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( |
186 | | "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", |
187 | | ); |
188 | | |
189 | | /// The bcrypt alphabet. |
190 | | pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( |
191 | | "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", |
192 | | ); |
193 | | |
194 | | /// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`). |
195 | | /// |
196 | | /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) |
197 | | pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( |
198 | | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,", |
199 | | ); |
200 | | |
201 | | /// The alphabet used in BinHex 4.0 files. |
202 | | /// |
203 | | /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt) |
204 | | pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked( |
205 | | "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr", |
206 | | ); |
207 | | |
208 | | #[cfg(test)] |
209 | | mod tests { |
210 | | use crate::alphabet::*; |
211 | | use core::convert::TryFrom as _; |
212 | | |
213 | | #[test] |
214 | | fn detects_duplicate_start() { |
215 | | assert_eq!( |
216 | | ParseAlphabetError::DuplicatedByte(b'A'), |
217 | | Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") |
218 | | .unwrap_err() |
219 | | ); |
220 | | } |
221 | | |
222 | | #[test] |
223 | | fn detects_duplicate_end() { |
224 | | assert_eq!( |
225 | | ParseAlphabetError::DuplicatedByte(b'/'), |
226 | | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//") |
227 | | .unwrap_err() |
228 | | ); |
229 | | } |
230 | | |
231 | | #[test] |
232 | | fn detects_duplicate_middle() { |
233 | | assert_eq!( |
234 | | ParseAlphabetError::DuplicatedByte(b'Z'), |
235 | | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/") |
236 | | .unwrap_err() |
237 | | ); |
238 | | } |
239 | | |
240 | | #[test] |
241 | | fn detects_length() { |
242 | | assert_eq!( |
243 | | ParseAlphabetError::InvalidLength, |
244 | | Alphabet::new( |
245 | | "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/", |
246 | | ) |
247 | | .unwrap_err() |
248 | | ); |
249 | | } |
250 | | |
251 | | #[test] |
252 | | fn detects_padding() { |
253 | | assert_eq!( |
254 | | ParseAlphabetError::ReservedByte(b'='), |
255 | | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=") |
256 | | .unwrap_err() |
257 | | ); |
258 | | } |
259 | | |
260 | | #[test] |
261 | | fn detects_unprintable() { |
262 | | // form feed |
263 | | assert_eq!( |
264 | | ParseAlphabetError::UnprintableByte(0xc), |
265 | | Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") |
266 | | .unwrap_err() |
267 | | ); |
268 | | } |
269 | | |
270 | | #[test] |
271 | | fn same_as_unchecked() { |
272 | | assert_eq!( |
273 | | STANDARD, |
274 | | Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") |
275 | | .unwrap() |
276 | | ); |
277 | | } |
278 | | |
279 | | #[test] |
280 | | fn str_same_as_input() { |
281 | | let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
282 | | let a = Alphabet::try_from(alphabet).unwrap(); |
283 | | assert_eq!(alphabet, a.as_str()) |
284 | | } |
285 | | } |