/src/botan/src/lib/codec/base64/base64.cpp
Line | Count | Source |
1 | | /* |
2 | | * Base64 Encoding and Decoding |
3 | | * (C) 2010,2015,2020 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #include <botan/base64.h> |
9 | | |
10 | | #include <botan/exceptn.h> |
11 | | #include <botan/internal/charset.h> |
12 | | #include <botan/internal/codec_base.h> |
13 | | #include <botan/internal/fmt.h> |
14 | | #include <botan/internal/int_utils.h> |
15 | | #include <botan/internal/loadstor.h> |
16 | | #include <botan/internal/rounding.h> |
17 | | |
18 | | namespace Botan { |
19 | | |
20 | | namespace { |
21 | | |
22 | | class Base64 final { |
23 | | public: |
24 | 0 | static std::string name() noexcept { return "base64"; } |
25 | | |
26 | 0 | static constexpr size_t encoding_bytes_in() noexcept { return m_encoding_bytes_in; } |
27 | | |
28 | 0 | static constexpr size_t encoding_bytes_out() noexcept { return m_encoding_bytes_out; } |
29 | | |
30 | 0 | static constexpr size_t decoding_bytes_in() noexcept { return m_encoding_bytes_out; } |
31 | | |
32 | 0 | static constexpr size_t decoding_bytes_out() noexcept { return m_encoding_bytes_in; } |
33 | | |
34 | 0 | static constexpr size_t bits_consumed() noexcept { return m_encoding_bits; } |
35 | | |
36 | 0 | static constexpr size_t remaining_bits_before_padding() noexcept { return m_remaining_bits_before_padding; } |
37 | | |
38 | 0 | static constexpr size_t encode_max_output(size_t input_length) { |
39 | 0 | return (round_up(input_length, m_encoding_bytes_in) / m_encoding_bytes_in) * m_encoding_bytes_out; |
40 | 0 | } |
41 | | |
42 | 0 | static constexpr size_t decode_max_output(size_t input_length) { |
43 | 0 | return (round_up(input_length, m_encoding_bytes_out) * m_encoding_bytes_in) / m_encoding_bytes_out; |
44 | 0 | } |
45 | | |
46 | | static void encode(char out[4], const uint8_t in[3]) noexcept; |
47 | | |
48 | | static uint8_t lookup_binary_value(char input) noexcept; |
49 | | |
50 | | static bool check_bad_char(uint8_t bin, char input, bool ignore_ws); |
51 | | |
52 | 0 | static void decode(uint8_t* out_ptr, const uint8_t decode_buf[4]) { |
53 | 0 | out_ptr[0] = (decode_buf[0] << 2) | (decode_buf[1] >> 4); |
54 | 0 | out_ptr[1] = (decode_buf[1] << 4) | (decode_buf[2] >> 2); |
55 | 0 | out_ptr[2] = (decode_buf[2] << 6) | decode_buf[3]; |
56 | 0 | } |
57 | | |
58 | 0 | static size_t bytes_to_remove(size_t final_truncate) { return final_truncate; } |
59 | | |
60 | | private: |
61 | | static constexpr size_t m_encoding_bits = 6; |
62 | | static constexpr size_t m_remaining_bits_before_padding = 8; |
63 | | |
64 | | static constexpr size_t m_encoding_bytes_in = 3; |
65 | | static constexpr size_t m_encoding_bytes_out = 4; |
66 | | }; |
67 | | |
68 | 0 | uint32_t lookup_base64_chars(uint32_t x32) { |
69 | | /* |
70 | | * The basic insight of this approach is that our goal is computing |
71 | | * f(x) = y where x is in [0,63) and y is the correct base64 encoding. |
72 | | * |
73 | | * Instead of doing this directly, we compute |
74 | | * offset(x) such that f(x) = x + offset(x) |
75 | | * |
76 | | * This is described in |
77 | | * http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html#improved-version |
78 | | * |
79 | | * Here we do a SWAR (simd within a register) implementation of Wojciech's lookup_version2_swar |
80 | | */ |
81 | |
|
82 | 0 | uint32_t r = x32 + 0x41414141; |
83 | |
|
84 | 0 | r += (~swar_lt<uint32_t>(x32, 0x1A1A1A1A)) & 0x06060606; |
85 | 0 | r -= (~swar_lt<uint32_t>(x32, 0x34343434)) & 0x4B4B4B4B; |
86 | 0 | r -= (~swar_lt<uint32_t>(x32, 0x3E3E3E3E)) & 0x0F0F0F0F; |
87 | 0 | r += (~swar_lt<uint32_t>(x32, 0x3F3F3F3F)) & 0x03030303; |
88 | |
|
89 | 0 | return r; |
90 | 0 | } |
91 | | |
92 | | //static |
93 | 0 | void Base64::encode(char out[4], const uint8_t in[3]) noexcept { |
94 | 0 | const uint32_t b0 = (in[0] & 0xFC) >> 2; |
95 | 0 | const uint32_t b1 = ((in[0] & 0x03) << 4) | (in[1] >> 4); |
96 | 0 | const uint32_t b2 = ((in[1] & 0x0F) << 2) | (in[2] >> 6); |
97 | 0 | const uint32_t b3 = in[2] & 0x3F; |
98 | |
|
99 | 0 | const uint32_t z = (b0 << 24) | (b1 << 16) | (b2 << 8) | b3; |
100 | |
|
101 | 0 | const uint32_t b64 = lookup_base64_chars(z); |
102 | |
|
103 | 0 | out[0] = static_cast<char>(get_byte<0>(b64)); |
104 | 0 | out[1] = static_cast<char>(get_byte<1>(b64)); |
105 | 0 | out[2] = static_cast<char>(get_byte<2>(b64)); |
106 | 0 | out[3] = static_cast<char>(get_byte<3>(b64)); |
107 | 0 | } |
108 | | |
109 | | //static |
110 | 0 | uint8_t Base64::lookup_binary_value(char input) noexcept { |
111 | 0 | auto has_zero_byte = [](uint64_t v) { return ((v - 0x0101010101010101) & ~(v) & 0x8080808080808080); }; |
112 | | |
113 | | // Assumes each byte is either 0x00 or 0x80 |
114 | 0 | auto index_of_first_set_byte = [](uint64_t v) { |
115 | 0 | return ((((v - 1) & 0x0101010101010101) * 0x0101010101010101) >> 56) - 1; |
116 | 0 | }; |
117 | |
|
118 | 0 | constexpr uint64_t lo = 0x0101010101010101; |
119 | |
|
120 | 0 | const uint8_t x = static_cast<uint8_t>(input); |
121 | |
|
122 | 0 | const uint64_t x8 = x * lo; |
123 | | |
124 | | // Defines the valid ASCII ranges of base64, except the special chars (below) |
125 | 0 | constexpr uint64_t val_l = make_uint64(0, 0, 0, 0, 0, 'A', 'a', '0'); |
126 | 0 | constexpr uint64_t val_u = make_uint64(0, 0, 0, 0, 0, 26, 26, 10); |
127 | | |
128 | | // If x is in one of the ranges return a mask. Otherwise we xor in at the |
129 | | // high word which will be our invalid marker |
130 | 0 | auto v_mask = swar_in_range<uint64_t>(x8, val_l, val_u) ^ 0x80000000; |
131 | | |
132 | | // This is the offset added to x to get the value |
133 | 0 | const uint64_t val_v = 0xbfb904 ^ (0xFF000000 - (x << 24)); |
134 | |
|
135 | 0 | uint8_t z = x + static_cast<uint8_t>(val_v >> (8 * index_of_first_set_byte(v_mask))); |
136 | | |
137 | | // Valid base64 special characters, and some whitespace chars |
138 | 0 | constexpr uint64_t specials_i = make_uint64(0, '+', '/', '=', ' ', '\n', '\t', '\r'); |
139 | |
|
140 | 0 | const uint64_t specials_v = 0x3e3f8180808080 ^ (static_cast<uint64_t>(z) << 56); |
141 | |
|
142 | 0 | const uint64_t smask = has_zero_byte(x8 ^ specials_i) ^ 0x8000000000000000; |
143 | |
|
144 | 0 | return static_cast<uint8_t>(specials_v >> (8 * index_of_first_set_byte(smask))); |
145 | 0 | } |
146 | | |
147 | | //static |
148 | 0 | bool Base64::check_bad_char(uint8_t bin, char input, bool ignore_ws) { |
149 | 0 | if(bin <= 0x3F) { |
150 | 0 | return true; |
151 | 0 | } else if(!(bin == 0x81 || (bin == 0x80 && ignore_ws))) { |
152 | 0 | throw Invalid_Argument(fmt("base64_decode: invalid character '{}'", format_char_for_display(input))); |
153 | 0 | } |
154 | 0 | return false; |
155 | 0 | } |
156 | | |
157 | | } // namespace |
158 | | |
159 | 0 | size_t base64_encode(char out[], const uint8_t in[], size_t input_length, size_t& input_consumed, bool final_inputs) { |
160 | 0 | return base_encode(Base64(), out, in, input_length, input_consumed, final_inputs); |
161 | 0 | } |
162 | | |
163 | 0 | std::string base64_encode(const uint8_t input[], size_t input_length) { |
164 | 0 | return base_encode_to_string(Base64(), input, input_length); |
165 | 0 | } |
166 | | |
167 | | size_t base64_decode( |
168 | 0 | uint8_t out[], const char in[], size_t input_length, size_t& input_consumed, bool final_inputs, bool ignore_ws) { |
169 | 0 | return base_decode(Base64(), out, in, input_length, input_consumed, final_inputs, ignore_ws); |
170 | 0 | } |
171 | | |
172 | 0 | size_t base64_decode(uint8_t output[], const char input[], size_t input_length, bool ignore_ws) { |
173 | 0 | return base_decode_full(Base64(), output, input, input_length, ignore_ws); |
174 | 0 | } |
175 | | |
176 | 0 | size_t base64_decode(uint8_t output[], std::string_view input, bool ignore_ws) { |
177 | 0 | return base64_decode(output, input.data(), input.length(), ignore_ws); |
178 | 0 | } |
179 | | |
180 | 0 | size_t base64_decode(std::span<uint8_t> output, std::string_view input, bool ignore_ws) { |
181 | 0 | if(output.size() < base64_decode_max_output(input.size())) { |
182 | 0 | throw Invalid_Argument("base64_decode: output buffer is too short"); |
183 | 0 | } |
184 | 0 | return base64_decode(output.data(), input.data(), input.length(), ignore_ws); |
185 | 0 | } |
186 | | |
187 | 0 | secure_vector<uint8_t> base64_decode(const char input[], size_t input_length, bool ignore_ws) { |
188 | 0 | return base_decode_to_vec<secure_vector<uint8_t>>(Base64(), input, input_length, ignore_ws); |
189 | 0 | } |
190 | | |
191 | 0 | secure_vector<uint8_t> base64_decode(std::string_view input, bool ignore_ws) { |
192 | 0 | return base64_decode(input.data(), input.size(), ignore_ws); |
193 | 0 | } |
194 | | |
195 | 0 | size_t base64_encode_max_output(size_t input_length) { |
196 | 0 | return Base64::encode_max_output(input_length); |
197 | 0 | } |
198 | | |
199 | 0 | size_t base64_decode_max_output(size_t input_length) { |
200 | 0 | return Base64::decode_max_output(input_length); |
201 | 0 | } |
202 | | |
203 | | } // namespace Botan |