/src/simdutf/src/scalar/base64.h
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef SIMDUTF_BASE64_H |
2 | | #define SIMDUTF_BASE64_H |
3 | | |
4 | | #include <algorithm> |
5 | | #include <cstddef> |
6 | | #include <cstdint> |
7 | | #include <cstring> |
8 | | #include <iostream> |
9 | | |
10 | | namespace simdutf { |
11 | | namespace scalar { |
12 | | namespace { |
13 | | namespace base64 { |
14 | | |
15 | | // This function is not expected to be fast. Do not use in long loops. |
16 | | // In most instances you should be using is_ignorable. |
17 | | template <class char_type> bool is_ascii_white_space(char_type c) { |
18 | | return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; |
19 | | } |
20 | | |
21 | 0 | template <class char_type> bool is_eight_byte(char_type c) { |
22 | 0 | if (sizeof(char_type) == 1) { |
23 | 0 | return true; |
24 | 0 | } |
25 | 0 | return uint8_t(c) == c; |
26 | 0 | } Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_eight_byte<char>(char) Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_eight_byte<char16_t>(char16_t) |
27 | | |
28 | | template <class char_type> |
29 | 0 | bool is_ignorable(char_type c, simdutf::base64_options options) { |
30 | 0 | const uint8_t *to_base64 = |
31 | 0 | (options & base64_default_or_url) |
32 | 0 | ? tables::base64::to_base64_default_or_url_value |
33 | 0 | : ((options & base64_url) ? tables::base64::to_base64_url_value |
34 | 0 | : tables::base64::to_base64_value); |
35 | 0 | const bool ignore_garbage = |
36 | 0 | (options == base64_options::base64_url_accept_garbage) || |
37 | 0 | (options == base64_options::base64_default_accept_garbage) || |
38 | 0 | (options == base64_options::base64_default_or_url_accept_garbage); |
39 | 0 | uint8_t code = to_base64[uint8_t(c)]; |
40 | 0 | if (is_eight_byte(c) && code <= 63) { |
41 | 0 | return false; |
42 | 0 | } |
43 | 0 | if (is_eight_byte(c) && code == 64) { |
44 | 0 | return true; |
45 | 0 | } |
46 | 0 | return ignore_garbage; |
47 | 0 | } Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_ignorable<char>(char, simdutf::base64_options) Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_ignorable<char16_t>(char16_t, simdutf::base64_options) |
48 | | template <class char_type> |
49 | 0 | bool is_base64(char_type c, simdutf::base64_options options) { |
50 | 0 | const uint8_t *to_base64 = |
51 | 0 | (options & base64_default_or_url) |
52 | 0 | ? tables::base64::to_base64_default_or_url_value |
53 | 0 | : ((options & base64_url) ? tables::base64::to_base64_url_value |
54 | 0 | : tables::base64::to_base64_value); |
55 | 0 | uint8_t code = to_base64[uint8_t(c)]; |
56 | 0 | if (is_eight_byte(c) && code <= 63) { |
57 | 0 | return true; |
58 | 0 | } |
59 | 0 | return false; |
60 | 0 | } Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64<char>(char, simdutf::base64_options) Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64<char16_t>(char16_t, simdutf::base64_options) |
61 | | |
62 | | template <class char_type> |
63 | 0 | bool is_base64_or_padding(char_type c, simdutf::base64_options options) { |
64 | 0 | const uint8_t *to_base64 = |
65 | 0 | (options & base64_default_or_url) |
66 | 0 | ? tables::base64::to_base64_default_or_url_value |
67 | 0 | : ((options & base64_url) ? tables::base64::to_base64_url_value |
68 | 0 | : tables::base64::to_base64_value); |
69 | 0 | if (c == '=') { |
70 | 0 | return true; |
71 | 0 | } |
72 | 0 | uint8_t code = to_base64[uint8_t(c)]; |
73 | 0 | if (is_eight_byte(c) && code <= 63) { |
74 | 0 | return true; |
75 | 0 | } |
76 | 0 | return false; |
77 | 0 | } Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64_or_padding<char>(char, simdutf::base64_options) Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64_or_padding<char16_t>(char16_t, simdutf::base64_options) |
78 | | |
79 | | template <class char_type> |
80 | | bool is_ignorable_or_padding(char_type c, simdutf::base64_options options) { |
81 | | return is_ignorable(c, options) || c == '='; |
82 | | } |
83 | | |
84 | | struct reduced_input { |
85 | | size_t equalsigns; // number of padding characters '=', typically 0, 1, 2. |
86 | | size_t equallocation; // location of the first padding character if any |
87 | | size_t srclen; // length of the input buffer before padding |
88 | | size_t full_input_length; // length of the input buffer with padding but |
89 | | // without ignorable characters |
90 | | }; |
91 | | |
92 | | // find the end of the base64 input buffer |
93 | | // It returns the number of padding characters, the location of the first |
94 | | // padding character if any, the length of the input buffer before padding |
95 | | // and the length of the input buffer with padding. The input buffer is not |
96 | | // modified. The function assumes that there are at most two padding characters. |
97 | | template <class char_type> |
98 | | reduced_input find_end(const char_type *src, size_t srclen, |
99 | 0 | simdutf::base64_options options) { |
100 | 0 | const uint8_t *to_base64 = |
101 | 0 | (options & base64_default_or_url) |
102 | 0 | ? tables::base64::to_base64_default_or_url_value |
103 | 0 | : ((options & base64_url) ? tables::base64::to_base64_url_value |
104 | 0 | : tables::base64::to_base64_value); |
105 | 0 | const bool ignore_garbage = |
106 | 0 | (options == base64_options::base64_url_accept_garbage) || |
107 | 0 | (options == base64_options::base64_default_accept_garbage) || |
108 | 0 | (options == base64_options::base64_default_or_url_accept_garbage); |
109 | |
|
110 | 0 | size_t equalsigns = 0; |
111 | | // We intentionally include trailing spaces in the full input length. |
112 | | // See https://github.com/simdutf/simdutf/issues/824 |
113 | 0 | size_t full_input_length = srclen; |
114 | | // skip trailing spaces |
115 | 0 | while (!ignore_garbage && srclen > 0 && |
116 | 0 | scalar::base64::is_eight_byte(src[srclen - 1]) && |
117 | 0 | to_base64[uint8_t(src[srclen - 1])] == 64) { |
118 | 0 | srclen--; |
119 | 0 | } |
120 | 0 | size_t equallocation = |
121 | 0 | srclen; // location of the first padding character if any |
122 | 0 | if (ignore_garbage) { |
123 | | // Technically, we don't need to find the first padding character, we can |
124 | | // just change our algorithms, but it adds substantial complexity. |
125 | 0 | auto it = simdutf::find(src, src + srclen, '='); |
126 | 0 | if (it != src + srclen) { |
127 | 0 | equallocation = it - src; |
128 | 0 | equalsigns = 1; |
129 | 0 | srclen = equallocation; |
130 | 0 | full_input_length = equallocation + 1; |
131 | 0 | } |
132 | 0 | return {equalsigns, equallocation, srclen, full_input_length}; |
133 | 0 | } |
134 | 0 | if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { |
135 | | // This is the last '=' sign. |
136 | 0 | equallocation = srclen - 1; |
137 | 0 | srclen--; |
138 | 0 | equalsigns = 1; |
139 | | // skip trailing spaces |
140 | 0 | while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && |
141 | 0 | to_base64[uint8_t(src[srclen - 1])] == 64) { |
142 | 0 | srclen--; |
143 | 0 | } |
144 | 0 | if (srclen > 0 && src[srclen - 1] == '=') { |
145 | | // This is the second '=' sign. |
146 | 0 | equallocation = srclen - 1; |
147 | 0 | srclen--; |
148 | 0 | equalsigns = 2; |
149 | 0 | } |
150 | 0 | } |
151 | 0 | return {equalsigns, equallocation, srclen, full_input_length}; |
152 | 0 | } Unexecuted instantiation: simdutf.cpp:simdutf::scalar::(anonymous namespace)::base64::reduced_input simdutf::scalar::(anonymous namespace)::base64::find_end<char>(char const*, unsigned long, simdutf::base64_options) Unexecuted instantiation: simdutf.cpp:simdutf::scalar::(anonymous namespace)::base64::reduced_input simdutf::scalar::(anonymous namespace)::base64::find_end<char16_t>(char16_t const*, unsigned long, simdutf::base64_options) |
153 | | |
154 | | // Returns true upon success. The destination buffer must be large enough. |
155 | | // This functions assumes that the padding (=) has been removed. |
156 | | // if check_capacity is true, it will check that the destination buffer is |
157 | | // large enough. If it is not, it will return OUTPUT_BUFFER_TOO_SMALL. |
158 | | template <bool check_capacity, class char_type> |
159 | | full_result base64_tail_decode_impl( |
160 | | char *dst, size_t outlen, const char_type *src, size_t length, |
161 | | size_t padding_characters, // number of padding characters |
162 | | // '=', typically 0, 1, 2. |
163 | 0 | base64_options options, last_chunk_handling_options last_chunk_options) { |
164 | 0 | char *dstend = dst + outlen; |
165 | 0 | (void)dstend; |
166 | | // This looks like 10 branches, but we expect the compiler to resolve this to |
167 | | // two branches (easily predicted): |
168 | 0 | const uint8_t *to_base64 = |
169 | 0 | (options & base64_default_or_url) |
170 | 0 | ? tables::base64::to_base64_default_or_url_value |
171 | 0 | : ((options & base64_url) ? tables::base64::to_base64_url_value |
172 | 0 | : tables::base64::to_base64_value); |
173 | 0 | const uint32_t *d0 = |
174 | 0 | (options & base64_default_or_url) |
175 | 0 | ? tables::base64::base64_default_or_url::d0 |
176 | 0 | : ((options & base64_url) ? tables::base64::base64_url::d0 |
177 | 0 | : tables::base64::base64_default::d0); |
178 | 0 | const uint32_t *d1 = |
179 | 0 | (options & base64_default_or_url) |
180 | 0 | ? tables::base64::base64_default_or_url::d1 |
181 | 0 | : ((options & base64_url) ? tables::base64::base64_url::d1 |
182 | 0 | : tables::base64::base64_default::d1); |
183 | 0 | const uint32_t *d2 = |
184 | 0 | (options & base64_default_or_url) |
185 | 0 | ? tables::base64::base64_default_or_url::d2 |
186 | 0 | : ((options & base64_url) ? tables::base64::base64_url::d2 |
187 | 0 | : tables::base64::base64_default::d2); |
188 | 0 | const uint32_t *d3 = |
189 | 0 | (options & base64_default_or_url) |
190 | 0 | ? tables::base64::base64_default_or_url::d3 |
191 | 0 | : ((options & base64_url) ? tables::base64::base64_url::d3 |
192 | 0 | : tables::base64::base64_default::d3); |
193 | 0 | const bool ignore_garbage = |
194 | 0 | (options == base64_options::base64_url_accept_garbage) || |
195 | 0 | (options == base64_options::base64_default_accept_garbage) || |
196 | 0 | (options == base64_options::base64_default_or_url_accept_garbage); |
197 | |
|
198 | 0 | const char_type *srcend = src + length; |
199 | 0 | const char_type *srcinit = src; |
200 | 0 | const char *dstinit = dst; |
201 | |
|
202 | 0 | uint32_t x; |
203 | 0 | size_t idx; |
204 | 0 | uint8_t buffer[4]; |
205 | 0 | while (true) { |
206 | 0 | while (src + 4 <= srcend && is_eight_byte(src[0]) && |
207 | 0 | is_eight_byte(src[1]) && is_eight_byte(src[2]) && |
208 | 0 | is_eight_byte(src[3]) && |
209 | 0 | (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | |
210 | 0 | d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { |
211 | 0 | if (match_system(endianness::BIG)) { |
212 | 0 | x = scalar::u32_swap_bytes(x); |
213 | 0 | } |
214 | 0 | if (check_capacity && dstend - dst < 3) { |
215 | 0 | return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit), |
216 | 0 | size_t(dst - dstinit)}; |
217 | 0 | } |
218 | 0 | std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes |
219 | 0 | dst += 3; |
220 | 0 | src += 4; |
221 | 0 | } |
222 | 0 | const char_type *srccur = src; |
223 | 0 | idx = 0; |
224 | | // we need at least four characters. |
225 | 0 | #ifdef __clang__ |
226 | | // If possible, we read four characters at a time. (It is an optimization.) |
227 | 0 | if (ignore_garbage && src + 4 <= srcend) { |
228 | 0 | char_type c0 = src[0]; |
229 | 0 | char_type c1 = src[1]; |
230 | 0 | char_type c2 = src[2]; |
231 | 0 | char_type c3 = src[3]; |
232 | |
|
233 | 0 | uint8_t code0 = to_base64[uint8_t(c0)]; |
234 | 0 | uint8_t code1 = to_base64[uint8_t(c1)]; |
235 | 0 | uint8_t code2 = to_base64[uint8_t(c2)]; |
236 | 0 | uint8_t code3 = to_base64[uint8_t(c3)]; |
237 | |
|
238 | 0 | buffer[idx] = code0; |
239 | 0 | idx += (is_eight_byte(c0) && code0 <= 63); |
240 | 0 | buffer[idx] = code1; |
241 | 0 | idx += (is_eight_byte(c1) && code1 <= 63); |
242 | 0 | buffer[idx] = code2; |
243 | 0 | idx += (is_eight_byte(c2) && code2 <= 63); |
244 | 0 | buffer[idx] = code3; |
245 | 0 | idx += (is_eight_byte(c3) && code3 <= 63); |
246 | 0 | src += 4; |
247 | 0 | } |
248 | 0 | #endif |
249 | 0 | while ((idx < 4) && (src < srcend)) { |
250 | 0 | char_type c = *src; |
251 | |
|
252 | 0 | uint8_t code = to_base64[uint8_t(c)]; |
253 | 0 | buffer[idx] = uint8_t(code); |
254 | 0 | if (is_eight_byte(c) && code <= 63) { |
255 | 0 | idx++; |
256 | 0 | } else if (!ignore_garbage && |
257 | 0 | (code > 64 || !scalar::base64::is_eight_byte(c))) { |
258 | 0 | return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), |
259 | 0 | size_t(dst - dstinit)}; |
260 | 0 | } else { |
261 | | // We have a space or a newline or garbage. We ignore it. |
262 | 0 | } |
263 | 0 | src++; |
264 | 0 | } |
265 | 0 | if (idx != 4) { |
266 | 0 | simdutf_log_assert(idx < 4, "idx should be less than 4"); |
267 | | // We never should have that the number of base64 characters + the |
268 | | // number of padding characters is more than 4. |
269 | 0 | if (!ignore_garbage && (idx + padding_characters > 4)) { |
270 | 0 | return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), |
271 | 0 | size_t(dst - dstinit), true}; |
272 | 0 | } |
273 | | |
274 | | // The idea here is that in loose mode, |
275 | | // if there is padding at all, it must be used |
276 | | // to form 4-wise chunk. However, in loose mode, |
277 | | // we do accept no padding at all. |
278 | 0 | if (!ignore_garbage && |
279 | 0 | last_chunk_options == last_chunk_handling_options::loose && |
280 | 0 | (idx >= 2) && padding_characters > 0 && |
281 | 0 | ((idx + padding_characters) & 3) != 0) { |
282 | 0 | return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), |
283 | 0 | size_t(dst - dstinit), true}; |
284 | 0 | } else |
285 | | |
286 | | // The idea here is that in strict mode, we do not want to accept |
287 | | // incomplete base64 chunks. So if the chunk was otherwise valid, we |
288 | | // return BASE64_INPUT_REMAINDER. |
289 | 0 | if (!ignore_garbage && |
290 | 0 | last_chunk_options == last_chunk_handling_options::strict && |
291 | 0 | (idx >= 2) && ((idx + padding_characters) & 3) != 0) { |
292 | | // The partial chunk was at src - idx |
293 | 0 | return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), |
294 | 0 | size_t(dst - dstinit), true}; |
295 | 0 | } else |
296 | | // If there is a partial chunk with insufficient padding, with |
297 | | // stop_before_partial, we need to just ignore it. In "only full" |
298 | | // mode, skip the minute there are padding characters. |
299 | 0 | if ((last_chunk_options == |
300 | 0 | last_chunk_handling_options::stop_before_partial && |
301 | 0 | (padding_characters + idx < 4) && (idx != 0) && |
302 | 0 | (idx >= 2 || padding_characters == 0)) || |
303 | 0 | (last_chunk_options == |
304 | 0 | last_chunk_handling_options::only_full_chunks && |
305 | 0 | (idx >= 2 || padding_characters == 0))) { |
306 | | // partial means that we are *not* going to consume the read |
307 | | // characters. We need to rewind the src pointer. |
308 | 0 | src = srccur; |
309 | 0 | return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; |
310 | 0 | } else { |
311 | 0 | if (idx == 2) { |
312 | 0 | uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + |
313 | 0 | (uint32_t(buffer[1]) << 2 * 6); |
314 | 0 | if (!ignore_garbage && |
315 | 0 | (last_chunk_options == last_chunk_handling_options::strict) && |
316 | 0 | (triple & 0xffff)) { |
317 | 0 | return {BASE64_EXTRA_BITS, size_t(src - srcinit), |
318 | 0 | size_t(dst - dstinit)}; |
319 | 0 | } |
320 | 0 | if (check_capacity && dstend - dst < 1) { |
321 | 0 | return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), |
322 | 0 | size_t(dst - dstinit)}; |
323 | 0 | } |
324 | 0 | if (match_system(endianness::BIG)) { |
325 | 0 | triple <<= 8; |
326 | 0 | std::memcpy(dst, &triple, 1); |
327 | 0 | } else { |
328 | 0 | triple = scalar::u32_swap_bytes(triple); |
329 | 0 | triple >>= 8; |
330 | 0 | std::memcpy(dst, &triple, 1); |
331 | 0 | } |
332 | 0 | dst += 1; |
333 | 0 | } else if (idx == 3) { |
334 | 0 | uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + |
335 | 0 | (uint32_t(buffer[1]) << 2 * 6) + |
336 | 0 | (uint32_t(buffer[2]) << 1 * 6); |
337 | 0 | if (!ignore_garbage && |
338 | 0 | (last_chunk_options == last_chunk_handling_options::strict) && |
339 | 0 | (triple & 0xff)) { |
340 | 0 | return {BASE64_EXTRA_BITS, size_t(src - srcinit), |
341 | 0 | size_t(dst - dstinit)}; |
342 | 0 | } |
343 | 0 | if (check_capacity && dstend - dst < 2) { |
344 | 0 | return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), |
345 | 0 | size_t(dst - dstinit)}; |
346 | 0 | } |
347 | 0 | if (match_system(endianness::BIG)) { |
348 | 0 | triple <<= 8; |
349 | 0 | std::memcpy(dst, &triple, 2); |
350 | 0 | } else { |
351 | 0 | triple = scalar::u32_swap_bytes(triple); |
352 | 0 | triple >>= 8; |
353 | 0 | std::memcpy(dst, &triple, 2); |
354 | 0 | } |
355 | 0 | dst += 2; |
356 | 0 | } else if (!ignore_garbage && idx == 1 && |
357 | 0 | (!is_partial(last_chunk_options) || |
358 | 0 | (is_partial(last_chunk_options) && |
359 | 0 | padding_characters > 0))) { |
360 | 0 | return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), |
361 | 0 | size_t(dst - dstinit)}; |
362 | 0 | } else if (!ignore_garbage && idx == 0 && padding_characters > 0) { |
363 | 0 | return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), |
364 | 0 | size_t(dst - dstinit), true}; |
365 | 0 | } |
366 | 0 | return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; |
367 | 0 | } |
368 | 0 | } |
369 | 0 | if (check_capacity && dstend - dst < 3) { |
370 | 0 | return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), |
371 | 0 | size_t(dst - dstinit)}; |
372 | 0 | } |
373 | 0 | uint32_t triple = |
374 | 0 | (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) + |
375 | 0 | (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6); |
376 | 0 | if (match_system(endianness::BIG)) { |
377 | 0 | triple <<= 8; |
378 | 0 | std::memcpy(dst, &triple, 3); |
379 | 0 | } else { |
380 | 0 | triple = scalar::u32_swap_bytes(triple); |
381 | 0 | triple >>= 8; |
382 | 0 | std::memcpy(dst, &triple, 3); |
383 | 0 | } |
384 | 0 | dst += 3; |
385 | 0 | } |
386 | 0 | } Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_impl<true, char>(char*, unsigned long, char const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options) Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_impl<true, char16_t>(char*, unsigned long, char16_t const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options) Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_impl<false, char>(char*, unsigned long, char const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options) Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_impl<false, char16_t>(char*, unsigned long, char16_t const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options) |
387 | | |
388 | | template <class char_type> |
389 | | full_result |
390 | | base64_tail_decode(char *dst, const char_type *src, size_t length, |
391 | | size_t padding_characters, // number of padding characters |
392 | | // '=', typically 0, 1, 2. |
393 | | base64_options options, |
394 | 0 | last_chunk_handling_options last_chunk_options) { |
395 | 0 | return base64_tail_decode_impl<false>(dst, 0, src, length, padding_characters, |
396 | 0 | options, last_chunk_options); |
397 | 0 | } Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode<char>(char*, char const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options) Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode<char16_t>(char*, char16_t const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options) |
398 | | |
399 | | // like base64_tail_decode, but it will not write past the end of the output |
400 | | // buffer. The outlen parameter is modified to reflect the number of bytes |
401 | | // written. This functions assumes that the padding (=) has been removed. |
402 | | // |
403 | | template <class char_type> |
404 | | full_result base64_tail_decode_safe( |
405 | | char *dst, size_t outlen, const char_type *src, size_t length, |
406 | | size_t padding_characters, // number of padding characters |
407 | | // '=', typically 0, 1, 2. |
408 | 0 | base64_options options, last_chunk_handling_options last_chunk_options) { |
409 | 0 | return base64_tail_decode_impl<true>(dst, outlen, src, length, |
410 | 0 | padding_characters, options, |
411 | 0 | last_chunk_options); |
412 | 0 | } Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_safe<char>(char*, unsigned long, char const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options) Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_safe<char16_t>(char*, unsigned long, char16_t const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options) |
413 | | |
414 | | inline full_result |
415 | | patch_tail_result(full_result r, size_t previous_input, size_t previous_output, |
416 | | size_t equallocation, size_t full_input_length, |
417 | 0 | last_chunk_handling_options last_chunk_options) { |
418 | 0 | r.input_count += previous_input; |
419 | 0 | r.output_count += previous_output; |
420 | 0 | if (r.padding_error) { |
421 | 0 | r.input_count = equallocation; |
422 | 0 | } |
423 | |
|
424 | 0 | if (r.error == error_code::SUCCESS) { |
425 | 0 | if (!is_partial(last_chunk_options)) { |
426 | | // A success when we are not in stop_before_partial mode. |
427 | | // means that we have consumed the whole input buffer. |
428 | 0 | r.input_count = full_input_length; |
429 | 0 | } else if (r.output_count % 3 != 0) { |
430 | 0 | r.input_count = full_input_length; |
431 | 0 | } |
432 | 0 | } |
433 | 0 | return r; |
434 | 0 | } |
435 | | |
436 | | // Returns the number of bytes written. The destination buffer must be large |
437 | | // enough. It will add padding (=) if needed. |
438 | | size_t tail_encode_base64(char *dst, const char *src, size_t srclen, |
439 | 0 | base64_options options) { |
440 | | // By default, we use padding if we are not using the URL variant. |
441 | | // This is check with ((options & base64_url) == 0) which returns true if we |
442 | | // are not using the URL variant. However, we also allow 'inversion' of the |
443 | | // convention with the base64_reverse_padding option. If the |
444 | | // base64_reverse_padding option is set, we use padding if we are using the |
445 | | // URL variant, and we omit it if we are not using the URL variant. This is |
446 | | // checked with |
447 | | // ((options & base64_reverse_padding) == base64_reverse_padding). |
448 | 0 | bool use_padding = |
449 | 0 | ((options & base64_url) == 0) ^ |
450 | 0 | ((options & base64_reverse_padding) == base64_reverse_padding); |
451 | | // This looks like 3 branches, but we expect the compiler to resolve this to |
452 | | // a single branch: |
453 | 0 | const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0 |
454 | 0 | : tables::base64::base64_default::e0; |
455 | 0 | const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1 |
456 | 0 | : tables::base64::base64_default::e1; |
457 | 0 | const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2 |
458 | 0 | : tables::base64::base64_default::e2; |
459 | 0 | char *out = dst; |
460 | 0 | size_t i = 0; |
461 | 0 | uint8_t t1, t2, t3; |
462 | 0 | for (; i + 2 < srclen; i += 3) { |
463 | 0 | t1 = uint8_t(src[i]); |
464 | 0 | t2 = uint8_t(src[i + 1]); |
465 | 0 | t3 = uint8_t(src[i + 2]); |
466 | 0 | *out++ = e0[t1]; |
467 | 0 | *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; |
468 | 0 | *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; |
469 | 0 | *out++ = e2[t3]; |
470 | 0 | } |
471 | 0 | switch (srclen - i) { |
472 | 0 | case 0: |
473 | 0 | break; |
474 | 0 | case 1: |
475 | 0 | t1 = uint8_t(src[i]); |
476 | 0 | *out++ = e0[t1]; |
477 | 0 | *out++ = e1[(t1 & 0x03) << 4]; |
478 | 0 | if (use_padding) { |
479 | 0 | *out++ = '='; |
480 | 0 | *out++ = '='; |
481 | 0 | } |
482 | 0 | break; |
483 | 0 | default: /* case 2 */ |
484 | 0 | t1 = uint8_t(src[i]); |
485 | 0 | t2 = uint8_t(src[i + 1]); |
486 | 0 | *out++ = e0[t1]; |
487 | 0 | *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; |
488 | 0 | *out++ = e2[(t2 & 0x0F) << 2]; |
489 | 0 | if (use_padding) { |
490 | 0 | *out++ = '='; |
491 | 0 | } |
492 | 0 | } |
493 | 0 | return (size_t)(out - dst); |
494 | 0 | } |
495 | | |
496 | | template <class char_type> |
497 | | simdutf_warn_unused size_t maximal_binary_length_from_base64( |
498 | 0 | const char_type *input, size_t length) noexcept { |
499 | | // We follow https://infra.spec.whatwg.org/#forgiving-base64-decode |
500 | 0 | size_t padding = 0; |
501 | 0 | if (length > 0) { |
502 | 0 | if (input[length - 1] == '=') { |
503 | 0 | padding++; |
504 | 0 | if (length > 1 && input[length - 2] == '=') { |
505 | 0 | padding++; |
506 | 0 | } |
507 | 0 | } |
508 | 0 | } |
509 | 0 | size_t actual_length = length - padding; |
510 | 0 | if (actual_length % 4 <= 1) { |
511 | 0 | return actual_length / 4 * 3; |
512 | 0 | } |
513 | | // if we have a valid input, then the remainder must be 2 or 3 adding one or |
514 | | // two extra bytes. |
515 | 0 | return actual_length / 4 * 3 + (actual_length % 4) - 1; |
516 | 0 | } Unexecuted instantiation: simdutf.cpp:unsigned long simdutf::scalar::(anonymous namespace)::base64::maximal_binary_length_from_base64<char>(char const*, unsigned long) Unexecuted instantiation: simdutf.cpp:unsigned long simdutf::scalar::(anonymous namespace)::base64::maximal_binary_length_from_base64<char16_t>(char16_t const*, unsigned long) |
517 | | |
518 | | template <typename char_type> |
519 | | simdutf_warn_unused full_result base64_to_binary_details_impl( |
520 | | const char_type *input, size_t length, char *output, base64_options options, |
521 | 0 | last_chunk_handling_options last_chunk_options) noexcept { |
522 | 0 | const bool ignore_garbage = |
523 | 0 | (options == base64_options::base64_url_accept_garbage) || |
524 | 0 | (options == base64_options::base64_default_accept_garbage) || |
525 | 0 | (options == base64_options::base64_default_or_url_accept_garbage); |
526 | 0 | auto ri = simdutf::scalar::base64::find_end(input, length, options); |
527 | 0 | size_t equallocation = ri.equallocation; |
528 | 0 | size_t equalsigns = ri.equalsigns; |
529 | 0 | length = ri.srclen; |
530 | 0 | size_t full_input_length = ri.full_input_length; |
531 | 0 | if (length == 0) { |
532 | 0 | if (!ignore_garbage && equalsigns > 0) { |
533 | 0 | return {INVALID_BASE64_CHARACTER, equallocation, 0}; |
534 | 0 | } |
535 | 0 | return {SUCCESS, full_input_length, 0}; |
536 | 0 | } |
537 | 0 | full_result r = scalar::base64::base64_tail_decode( |
538 | 0 | output, input, length, equalsigns, options, last_chunk_options); |
539 | 0 | r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, |
540 | 0 | full_input_length, last_chunk_options); |
541 | 0 | if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && |
542 | 0 | equalsigns > 0 && !ignore_garbage) { |
543 | | // additional checks |
544 | 0 | if ((r.output_count % 3 == 0) || |
545 | 0 | ((r.output_count % 3) + 1 + equalsigns != 4)) { |
546 | 0 | return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; |
547 | 0 | } |
548 | 0 | } |
549 | | // When is_partial(last_chunk_options) is true, we must either end with |
550 | | // the end of the stream (beyond whitespace) or right after a non-ignorable |
551 | | // character or at the very beginning of the stream. |
552 | | // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 |
553 | 0 | if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && |
554 | 0 | r.input_count < full_input_length) { |
555 | | // First check if we can extend the input to the end of the stream |
556 | 0 | while (r.input_count < full_input_length && |
557 | 0 | base64_ignorable(*(input + r.input_count), options)) { |
558 | 0 | r.input_count++; |
559 | 0 | } |
560 | | // If we are still not at the end of the stream, then we must backtrack |
561 | | // to the last non-ignorable character. |
562 | 0 | if (r.input_count < full_input_length) { |
563 | 0 | while (r.input_count > 0 && |
564 | 0 | base64_ignorable(*(input + r.input_count - 1), options)) { |
565 | 0 | r.input_count--; |
566 | 0 | } |
567 | 0 | } |
568 | 0 | } |
569 | 0 | return r; |
570 | 0 | } Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_to_binary_details_impl<char>(char const*, unsigned long, char*, simdutf::base64_options, simdutf::last_chunk_handling_options) Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_to_binary_details_impl<char16_t>(char16_t const*, unsigned long, char*, simdutf::base64_options, simdutf::last_chunk_handling_options) |
571 | | |
572 | | template <typename char_type> |
573 | | simdutf_warn_unused full_result base64_to_binary_details_safe_impl( |
574 | | const char_type *input, size_t length, char *output, size_t outlen, |
575 | | base64_options options, |
576 | 0 | last_chunk_handling_options last_chunk_options) noexcept { |
577 | 0 | const bool ignore_garbage = |
578 | 0 | (options == base64_options::base64_url_accept_garbage) || |
579 | 0 | (options == base64_options::base64_default_accept_garbage) || |
580 | 0 | (options == base64_options::base64_default_or_url_accept_garbage); |
581 | 0 | auto ri = simdutf::scalar::base64::find_end(input, length, options); |
582 | 0 | size_t equallocation = ri.equallocation; |
583 | 0 | size_t equalsigns = ri.equalsigns; |
584 | 0 | length = ri.srclen; |
585 | 0 | size_t full_input_length = ri.full_input_length; |
586 | 0 | if (length == 0) { |
587 | 0 | if (!ignore_garbage && equalsigns > 0) { |
588 | 0 | return {INVALID_BASE64_CHARACTER, equallocation, 0}; |
589 | 0 | } |
590 | 0 | return {SUCCESS, full_input_length, 0}; |
591 | 0 | } |
592 | 0 | full_result r = scalar::base64::base64_tail_decode_safe( |
593 | 0 | output, outlen, input, length, equalsigns, options, last_chunk_options); |
594 | 0 | r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, |
595 | 0 | full_input_length, last_chunk_options); |
596 | 0 | if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && |
597 | 0 | equalsigns > 0 && !ignore_garbage) { |
598 | | // additional checks |
599 | 0 | if ((r.output_count % 3 == 0) || |
600 | 0 | ((r.output_count % 3) + 1 + equalsigns != 4)) { |
601 | 0 | return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; |
602 | 0 | } |
603 | 0 | } |
604 | | |
605 | | // When is_partial(last_chunk_options) is true, we must either end with |
606 | | // the end of the stream (beyond whitespace) or right after a non-ignorable |
607 | | // character or at the very beginning of the stream. |
608 | | // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 |
609 | 0 | if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && |
610 | 0 | r.input_count < full_input_length) { |
611 | | // First check if we can extend the input to the end of the stream |
612 | 0 | while (r.input_count < full_input_length && |
613 | 0 | base64_ignorable(*(input + r.input_count), options)) { |
614 | 0 | r.input_count++; |
615 | 0 | } |
616 | | // If we are still not at the end of the stream, then we must backtrack |
617 | | // to the last non-ignorable character. |
618 | 0 | if (r.input_count < full_input_length) { |
619 | 0 | while (r.input_count > 0 && |
620 | 0 | base64_ignorable(*(input + r.input_count - 1), options)) { |
621 | 0 | r.input_count--; |
622 | 0 | } |
623 | 0 | } |
624 | 0 | } |
625 | 0 | return r; |
626 | 0 | } Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_to_binary_details_safe_impl<char>(char const*, unsigned long, char*, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options) Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_to_binary_details_safe_impl<char16_t>(char16_t const*, unsigned long, char*, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options) |
627 | | |
628 | | simdutf_warn_unused size_t |
629 | 0 | base64_length_from_binary(size_t length, base64_options options) noexcept { |
630 | | // By default, we use padding if we are not using the URL variant. |
631 | | // This is check with ((options & base64_url) == 0) which returns true if we |
632 | | // are not using the URL variant. However, we also allow 'inversion' of the |
633 | | // convention with the base64_reverse_padding option. If the |
634 | | // base64_reverse_padding option is set, we use padding if we are using the |
635 | | // URL variant, and we omit it if we are not using the URL variant. This is |
636 | | // checked with |
637 | | // ((options & base64_reverse_padding) == base64_reverse_padding). |
638 | 0 | bool use_padding = |
639 | 0 | ((options & base64_url) == 0) ^ |
640 | 0 | ((options & base64_reverse_padding) == base64_reverse_padding); |
641 | 0 | if (!use_padding) { |
642 | 0 | return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0); |
643 | 0 | } |
644 | 0 | return (length + 2) / 3 * |
645 | 0 | 4; // We use padding to make the length a multiple of 4. |
646 | 0 | } |
647 | | |
648 | | // Return the length of the prefix that contains count base64 characters. |
649 | | // Thus, if count is 3, the function returns the length of the prefix |
650 | | // that contains 3 base64 characters. |
651 | | // The function returns (size_t)-1 if there is not enough base64 characters in |
652 | | // the input. |
653 | | template <typename char_type> |
654 | | simdutf_warn_unused size_t prefix_length(size_t count, |
655 | | simdutf::base64_options options, |
656 | | const char_type *input, |
657 | | size_t length) noexcept { |
658 | | size_t i = 0; |
659 | | while (i < length && is_ignorable(input[i], options)) { |
660 | | i++; |
661 | | } |
662 | | if (count == 0) { |
663 | | return i; // duh! |
664 | | } |
665 | | for (; i < length; i++) { |
666 | | if (is_ignorable(input[i], options)) { |
667 | | continue; |
668 | | } |
669 | | // We have a base64 character or a padding character. |
670 | | count--; |
671 | | if (count == 0) { |
672 | | return i + 1; |
673 | | } |
674 | | } |
675 | | simdutf_log_assert(false, "You never get here"); |
676 | | |
677 | | return -1; // should never happen |
678 | | } |
679 | | |
680 | | } // namespace base64 |
681 | | } // unnamed namespace |
682 | | } // namespace scalar |
683 | | } // namespace simdutf |
684 | | |
685 | | #endif |