/src/simdutf/include/simdutf/base64_implementation.h
Line | Count | Source |
1 | | #ifndef SIMDUTF_BASE64_IMPLEMENTATION_H |
2 | | #define SIMDUTF_BASE64_IMPLEMENTATION_H |
3 | | |
4 | | // this is not part of the public api |
5 | | |
6 | | namespace simdutf { |
7 | | |
8 | | template <typename chartype> |
9 | | simdutf_warn_unused simdutf_constexpr23 result slow_base64_to_binary_safe_impl( |
10 | | const chartype *input, size_t length, char *output, size_t &outlen, |
11 | | base64_options options, |
12 | 0 | last_chunk_handling_options last_chunk_options) noexcept { |
13 | 0 | const bool ignore_garbage = (options & base64_default_accept_garbage) != 0; |
14 | 0 | auto ri = simdutf::scalar::base64::find_end(input, length, options); |
15 | 0 | size_t equallocation = ri.equallocation; |
16 | 0 | size_t equalsigns = ri.equalsigns; |
17 | 0 | length = ri.srclen; |
18 | 0 | size_t full_input_length = ri.full_input_length; |
19 | 0 | (void)full_input_length; |
20 | 0 | if (length == 0) { |
21 | 0 | outlen = 0; |
22 | 0 | if (!ignore_garbage && equalsigns > 0) { |
23 | 0 | return {INVALID_BASE64_CHARACTER, equallocation}; |
24 | 0 | } |
25 | 0 | return {SUCCESS, 0}; |
26 | 0 | } |
27 | | |
28 | | // The parameters of base64_tail_decode_safe are: |
29 | | // - dst: the output buffer |
30 | | // - outlen: the size of the output buffer |
31 | | // - srcr: the input buffer |
32 | | // - length: the size of the input buffer |
33 | | // - padded_characters: the number of padding characters |
34 | | // - options: the options for the base64 decoder |
35 | | // - last_chunk_options: the options for the last chunk |
36 | | // The function will return the number of bytes written to the output buffer |
37 | | // and the number of bytes read from the input buffer. |
38 | | // The function will also return an error code if the input buffer is not |
39 | | // valid base64. |
40 | 0 | full_result r = scalar::base64::base64_tail_decode_safe( |
41 | 0 | output, outlen, input, length, equalsigns, options, last_chunk_options); |
42 | 0 | r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, |
43 | 0 | full_input_length, last_chunk_options); |
44 | 0 | outlen = r.output_count; |
45 | 0 | if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && |
46 | 0 | equalsigns > 0) { |
47 | | // additional checks |
48 | 0 | if ((outlen % 3 == 0) || ((outlen % 3) + 1 + equalsigns != 4)) { |
49 | 0 | r.error = error_code::INVALID_BASE64_CHARACTER; |
50 | 0 | } |
51 | 0 | } |
52 | 0 | return {r.error, r.input_count}; // we cannot return r itself because it gets |
53 | | // converted to error/output_count |
54 | 0 | } Unexecuted instantiation: simdutf::result simdutf::slow_base64_to_binary_safe_impl<char>(char const*, unsigned long, char*, unsigned long&, simdutf::base64_options, simdutf::last_chunk_handling_options) Unexecuted instantiation: simdutf::result simdutf::slow_base64_to_binary_safe_impl<char16_t>(char16_t const*, unsigned long, char*, unsigned long&, simdutf::base64_options, simdutf::last_chunk_handling_options) |
55 | | |
56 | | template <typename chartype> |
57 | | simdutf_warn_unused simdutf_constexpr23 result base64_to_binary_safe_impl( |
58 | | const chartype *input, size_t length, char *output, size_t &outlen, |
59 | | base64_options options, |
60 | | last_chunk_handling_options last_chunk_handling_options, |
61 | 47.0k | bool decode_up_to_bad_char) noexcept { |
62 | 47.0k | static_assert(std::is_same<chartype, char>::value || |
63 | 47.0k | std::is_same<chartype, char16_t>::value, |
64 | 47.0k | "Only char and char16_t are supported."); |
65 | 47.0k | size_t remaining_input_length = length; |
66 | 47.0k | size_t remaining_output_length = outlen; |
67 | 47.0k | size_t input_position = 0; |
68 | 47.0k | size_t output_position = 0; |
69 | | |
70 | | // We also do a first pass using the fast path to decode as much as possible |
71 | 47.0k | size_t safe_input = (std::min)( |
72 | 47.0k | remaining_input_length, |
73 | 47.0k | base64_length_from_binary(remaining_output_length / 3 * 3, options)); |
74 | 47.0k | bool done_with_partial = (safe_input == remaining_input_length); |
75 | 47.0k | simdutf::full_result r; |
76 | | |
77 | | #if SIMDUTF_CPLUSPLUS23 |
78 | | if consteval { |
79 | | r = scalar::base64::base64_to_binary_details_impl( |
80 | | input + input_position, safe_input, output + output_position, options, |
81 | | done_with_partial |
82 | | ? last_chunk_handling_options |
83 | | : simdutf::last_chunk_handling_options::only_full_chunks); |
84 | | } else |
85 | | #endif |
86 | 47.0k | { |
87 | 47.0k | r = get_active_implementation()->base64_to_binary_details( |
88 | 47.0k | input + input_position, safe_input, output + output_position, options, |
89 | 47.0k | done_with_partial |
90 | 47.0k | ? last_chunk_handling_options |
91 | 47.0k | : simdutf::last_chunk_handling_options::only_full_chunks); |
92 | 47.0k | } |
93 | 47.0k | simdutf_log_assert(r.input_count <= safe_input, |
94 | 47.0k | "You should not read more than safe_input"); |
95 | 47.0k | simdutf_log_assert(r.output_count <= remaining_output_length, |
96 | 47.0k | "You should not write more than remaining_output_length"); |
97 | | // Technically redundant, but we want to be explicit about it. |
98 | 47.0k | input_position += r.input_count; |
99 | 47.0k | output_position += r.output_count; |
100 | 47.0k | remaining_input_length -= r.input_count; |
101 | 47.0k | remaining_output_length -= r.output_count; |
102 | 47.0k | if (r.error != simdutf::error_code::SUCCESS) { |
103 | | // There is an error. We return. |
104 | 1.06k | if (decode_up_to_bad_char && |
105 | 0 | r.error == error_code::INVALID_BASE64_CHARACTER) { |
106 | 0 | return slow_base64_to_binary_safe_impl( |
107 | 0 | input, length, output, outlen, options, last_chunk_handling_options); |
108 | 0 | } |
109 | 1.06k | outlen = output_position; |
110 | 1.06k | return {r.error, input_position}; |
111 | 1.06k | } |
112 | | |
113 | 46.0k | if (done_with_partial) { |
114 | | // We are done. We have decoded everything. |
115 | 43.7k | outlen = output_position; |
116 | 43.7k | return {simdutf::error_code::SUCCESS, input_position}; |
117 | 43.7k | } |
118 | | // We have decoded some data, but we still have some data to decode. |
119 | | // We need to decode the rest of the input buffer. |
120 | 2.28k | r = simdutf::scalar::base64::base64_to_binary_details_safe_impl( |
121 | 2.28k | input + input_position, remaining_input_length, output + output_position, |
122 | 2.28k | remaining_output_length, options, last_chunk_handling_options); |
123 | 2.28k | input_position += r.input_count; |
124 | 2.28k | output_position += r.output_count; |
125 | 2.28k | remaining_input_length -= r.input_count; |
126 | 2.28k | remaining_output_length -= r.output_count; |
127 | | |
128 | 2.28k | if (r.error != simdutf::error_code::SUCCESS) { |
129 | | // There is an error. We return. |
130 | 270 | if (decode_up_to_bad_char && |
131 | 0 | r.error == error_code::INVALID_BASE64_CHARACTER) { |
132 | 0 | return slow_base64_to_binary_safe_impl( |
133 | 0 | input, length, output, outlen, options, last_chunk_handling_options); |
134 | 0 | } |
135 | 270 | outlen = output_position; |
136 | 270 | return {r.error, input_position}; |
137 | 270 | } |
138 | 2.01k | if (input_position < length) { |
139 | | // We cannot process the entire input in one go, so we need to |
140 | | // process it in two steps: first the fast path, then the slow path. |
141 | | // In some cases, the processing might 'eat up' trailing ignorable |
142 | | // characters in the fast path, but that can be a problem. |
143 | | // suppose we have just white space followed by a single base64 character. |
144 | | // If we first process the white space with the fast path, it will |
145 | | // eat all of it. But, by the JavaScript standard, we should consume |
146 | | // no character. See |
147 | | // https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 |
148 | 0 | while (input_position > 0 && |
149 | 0 | base64_ignorable(input[input_position - 1], options)) { |
150 | 0 | input_position--; |
151 | 0 | } |
152 | 0 | } |
153 | 2.01k | outlen = output_position; |
154 | 2.01k | return {simdutf::error_code::SUCCESS, input_position}; |
155 | 2.28k | } simdutf::result simdutf::base64_to_binary_safe_impl<char>(char const*, unsigned long, char*, unsigned long&, simdutf::base64_options, simdutf::last_chunk_handling_options, bool) Line | Count | Source | 61 | 47.0k | bool decode_up_to_bad_char) noexcept { | 62 | 47.0k | static_assert(std::is_same<chartype, char>::value || | 63 | 47.0k | std::is_same<chartype, char16_t>::value, | 64 | 47.0k | "Only char and char16_t are supported."); | 65 | 47.0k | size_t remaining_input_length = length; | 66 | 47.0k | size_t remaining_output_length = outlen; | 67 | 47.0k | size_t input_position = 0; | 68 | 47.0k | size_t output_position = 0; | 69 | | | 70 | | // We also do a first pass using the fast path to decode as much as possible | 71 | 47.0k | size_t safe_input = (std::min)( | 72 | 47.0k | remaining_input_length, | 73 | 47.0k | base64_length_from_binary(remaining_output_length / 3 * 3, options)); | 74 | 47.0k | bool done_with_partial = (safe_input == remaining_input_length); | 75 | 47.0k | simdutf::full_result r; | 76 | | | 77 | | #if SIMDUTF_CPLUSPLUS23 | 78 | | if consteval { | 79 | | r = scalar::base64::base64_to_binary_details_impl( | 80 | | input + input_position, safe_input, output + output_position, options, | 81 | | done_with_partial | 82 | | ? last_chunk_handling_options | 83 | | : simdutf::last_chunk_handling_options::only_full_chunks); | 84 | | } else | 85 | | #endif | 86 | 47.0k | { | 87 | 47.0k | r = get_active_implementation()->base64_to_binary_details( | 88 | 47.0k | input + input_position, safe_input, output + output_position, options, | 89 | 47.0k | done_with_partial | 90 | 47.0k | ? last_chunk_handling_options | 91 | 47.0k | : simdutf::last_chunk_handling_options::only_full_chunks); | 92 | 47.0k | } | 93 | 47.0k | simdutf_log_assert(r.input_count <= safe_input, | 94 | 47.0k | "You should not read more than safe_input"); | 95 | 47.0k | simdutf_log_assert(r.output_count <= remaining_output_length, | 96 | 47.0k | "You should not write more than remaining_output_length"); | 97 | | // Technically redundant, but we want to be explicit about it. | 98 | 47.0k | input_position += r.input_count; | 99 | 47.0k | output_position += r.output_count; | 100 | 47.0k | remaining_input_length -= r.input_count; | 101 | 47.0k | remaining_output_length -= r.output_count; | 102 | 47.0k | if (r.error != simdutf::error_code::SUCCESS) { | 103 | | // There is an error. We return. | 104 | 1.06k | if (decode_up_to_bad_char && | 105 | 0 | r.error == error_code::INVALID_BASE64_CHARACTER) { | 106 | 0 | return slow_base64_to_binary_safe_impl( | 107 | 0 | input, length, output, outlen, options, last_chunk_handling_options); | 108 | 0 | } | 109 | 1.06k | outlen = output_position; | 110 | 1.06k | return {r.error, input_position}; | 111 | 1.06k | } | 112 | | | 113 | 46.0k | if (done_with_partial) { | 114 | | // We are done. We have decoded everything. | 115 | 43.7k | outlen = output_position; | 116 | 43.7k | return {simdutf::error_code::SUCCESS, input_position}; | 117 | 43.7k | } | 118 | | // We have decoded some data, but we still have some data to decode. | 119 | | // We need to decode the rest of the input buffer. | 120 | 2.28k | r = simdutf::scalar::base64::base64_to_binary_details_safe_impl( | 121 | 2.28k | input + input_position, remaining_input_length, output + output_position, | 122 | 2.28k | remaining_output_length, options, last_chunk_handling_options); | 123 | 2.28k | input_position += r.input_count; | 124 | 2.28k | output_position += r.output_count; | 125 | 2.28k | remaining_input_length -= r.input_count; | 126 | 2.28k | remaining_output_length -= r.output_count; | 127 | | | 128 | 2.28k | if (r.error != simdutf::error_code::SUCCESS) { | 129 | | // There is an error. We return. | 130 | 270 | if (decode_up_to_bad_char && | 131 | 0 | r.error == error_code::INVALID_BASE64_CHARACTER) { | 132 | 0 | return slow_base64_to_binary_safe_impl( | 133 | 0 | input, length, output, outlen, options, last_chunk_handling_options); | 134 | 0 | } | 135 | 270 | outlen = output_position; | 136 | 270 | return {r.error, input_position}; | 137 | 270 | } | 138 | 2.01k | if (input_position < length) { | 139 | | // We cannot process the entire input in one go, so we need to | 140 | | // process it in two steps: first the fast path, then the slow path. | 141 | | // In some cases, the processing might 'eat up' trailing ignorable | 142 | | // characters in the fast path, but that can be a problem. | 143 | | // suppose we have just white space followed by a single base64 character. | 144 | | // If we first process the white space with the fast path, it will | 145 | | // eat all of it. But, by the JavaScript standard, we should consume | 146 | | // no character. See | 147 | | // https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 | 148 | 0 | while (input_position > 0 && | 149 | 0 | base64_ignorable(input[input_position - 1], options)) { | 150 | 0 | input_position--; | 151 | 0 | } | 152 | 0 | } | 153 | 2.01k | outlen = output_position; | 154 | 2.01k | return {simdutf::error_code::SUCCESS, input_position}; | 155 | 2.28k | } |
Unexecuted instantiation: simdutf::result simdutf::base64_to_binary_safe_impl<char16_t>(char16_t const*, unsigned long, char*, unsigned long&, simdutf::base64_options, simdutf::last_chunk_handling_options, bool) |
156 | | |
157 | | } // namespace simdutf |
158 | | #endif // SIMDUTF_BASE64_IMPLEMENTATION_H |