/src/botan/src/lib/codec/base64/base64.cpp

Source (jump to first uncovered line)
/*
* Base64 Encoding and Decoding
* (C) 2010,2015,2020 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/

#include <botan/base64.h>

#include <botan/exceptn.h>
#include <botan/internal/charset.h>
#include <botan/internal/codec_base.h>
#include <botan/internal/ct_utils.h>
#include <botan/internal/fmt.h>
#include <botan/internal/int_utils.h>
#include <botan/internal/loadstor.h>
#include <botan/internal/rounding.h>

namespace Botan {

namespace {

class Base64 final {
   public:
      static std::string name() noexcept { return "base64"; }

      static size_t encoding_bytes_in() noexcept { return m_encoding_bytes_in; }

      static size_t encoding_bytes_out() noexcept { return m_encoding_bytes_out; }

      static size_t decoding_bytes_in() noexcept { return m_encoding_bytes_out; }

      static size_t decoding_bytes_out() noexcept { return m_encoding_bytes_in; }

      static size_t bits_consumed() noexcept { return m_encoding_bits; }

      static size_t remaining_bits_before_padding() noexcept { return m_remaining_bits_before_padding; }

      static size_t encode_max_output(size_t input_length) {
         return (round_up(input_length, m_encoding_bytes_in) / m_encoding_bytes_in) * m_encoding_bytes_out;
      }

      static size_t decode_max_output(size_t input_length) {
         return (round_up(input_length, m_encoding_bytes_out) * m_encoding_bytes_in) / m_encoding_bytes_out;
      }

      static void encode(char out[4], const uint8_t in[3]) noexcept;

      static uint8_t lookup_binary_value(char input) noexcept;

      static bool check_bad_char(uint8_t bin, char input, bool ignore_ws);

      static void decode(uint8_t* out_ptr, const uint8_t decode_buf[4]) {
         out_ptr[0] = (decode_buf[0] << 2) | (decode_buf[1] >> 4);
         out_ptr[1] = (decode_buf[1] << 4) | (decode_buf[2] >> 2);
         out_ptr[2] = (decode_buf[2] << 6) | decode_buf[3];
      }

      static size_t bytes_to_remove(size_t final_truncate) { return final_truncate; }

   private:
      static const size_t m_encoding_bits = 6;
      static const size_t m_remaining_bits_before_padding = 8;

      static const size_t m_encoding_bytes_in = 3;
      static const size_t m_encoding_bytes_out = 4;
};

uint32_t lookup_base64_chars(uint32_t x32) {
   /*
   * The basic insight of this approach is that our goal is computing
   * f(x) = y where x is in [0,63) and y is the correct base64 encoding.
   *
   * Instead of doing this directly, we compute
   * offset(x) such that f(x) = x + offset(x)
   *
   * This is described in
   * http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html#improved-version
   *
   * Here we do a SWAR (simd within a register) implementation of Wojciech's lookup_version2_swar
   */

   uint32_t r = x32 + 0x41414141;

   r += (~swar_lt<uint32_t>(x32, 0x1A1A1A1A)) & 0x06060606;
   r -= (~swar_lt<uint32_t>(x32, 0x34343434)) & 0x4B4B4B4B;
   r -= (~swar_lt<uint32_t>(x32, 0x3E3E3E3E)) & 0x0F0F0F0F;
   r += (~swar_lt<uint32_t>(x32, 0x3F3F3F3F)) & 0x03030303;

   return r;
}

//static
void Base64::encode(char out[4], const uint8_t in[3]) noexcept {
   const uint32_t b0 = (in[0] & 0xFC) >> 2;
   const uint32_t b1 = ((in[0] & 0x03) << 4) | (in[1] >> 4);
   const uint32_t b2 = ((in[1] & 0x0F) << 2) | (in[2] >> 6);
   const uint32_t b3 = in[2] & 0x3F;

   const uint32_t z = (b0 << 24) | (b1 << 16) | (b2 << 8) | b3;

   const uint32_t b64 = lookup_base64_chars(z);

   out[0] = static_cast<char>(get_byte<0>(b64));
   out[1] = static_cast<char>(get_byte<1>(b64));
   out[2] = static_cast<char>(get_byte<2>(b64));
   out[3] = static_cast<char>(get_byte<3>(b64));
}

//static
uint8_t Base64::lookup_binary_value(char input) noexcept {
   auto has_zero_byte = [](uint64_t v) { return ((v - 0x0101010101010101) & ~(v) & 0x8080808080808080); };

   // Assumes each byte is either 0x00 or 0x80
   auto index_of_first_set_byte = [](uint64_t v) {
      return ((((v - 1) & 0x0101010101010101) * 0x0101010101010101) >> 56) - 1;
   };

   constexpr uint64_t lo = 0x0101010101010101;

   const uint8_t x = static_cast<uint8_t>(input);

   const uint64_t x8 = x * lo;

   // Defines the valid ASCII ranges of base64, except the special chars (below)
   constexpr uint64_t val_l = make_uint64(0, 0, 0, 0, 0, 'A', 'a', '0');
   constexpr uint64_t val_u = make_uint64(0, 0, 0, 0, 0, 26, 26, 10);

   // If x is in one of the ranges return a mask. Otherwise we xor in at the
   // high word which will be our invalid marker
   auto v_mask = swar_in_range<uint64_t>(x8, val_l, val_u) ^ 0x80000000;

   // This is the offset added to x to get the value
   const uint64_t val_v = 0xbfb904 ^ (0xFF000000 - (x << 24));

   uint8_t z = x + static_cast<uint8_t>(val_v >> (8 * index_of_first_set_byte(v_mask)));

   // Valid base64 special characters, and some whitespace chars
   constexpr uint64_t specials_i = make_uint64(0, '+', '/', '=', ' ', '\n', '\t', '\r');

   const uint64_t specials_v = 0x3e3f8180808080 ^ (static_cast<uint64_t>(z) << 56);

   const uint64_t smask = has_zero_byte(x8 ^ specials_i) ^ 0x8000000000000000;

   return static_cast<uint8_t>(specials_v >> (8 * index_of_first_set_byte(smask)));
}

//static
bool Base64::check_bad_char(uint8_t bin, char input, bool ignore_ws) {
   if(bin <= 0x3F) {
      return true;
   } else if(!(bin == 0x81 || (bin == 0x80 && ignore_ws))) {
      throw Invalid_Argument(fmt("base64_decode: invalid character '{}'", format_char_for_display(input)));
   }
   return false;
}

}  // namespace

size_t base64_encode(char out[], const uint8_t in[], size_t input_length, size_t& input_consumed, bool final_inputs) {
   return base_encode(Base64(), out, in, input_length, input_consumed, final_inputs);
}

std::string base64_encode(const uint8_t input[], size_t input_length) {
   return base_encode_to_string(Base64(), input, input_length);
}

size_t base64_decode(
   uint8_t out[], const char in[], size_t input_length, size_t& input_consumed, bool final_inputs, bool ignore_ws) {
   return base_decode(Base64(), out, in, input_length, input_consumed, final_inputs, ignore_ws);
}

size_t base64_decode(uint8_t output[], const char input[], size_t input_length, bool ignore_ws) {
   return base_decode_full(Base64(), output, input, input_length, ignore_ws);
}

size_t base64_decode(uint8_t output[], std::string_view input, bool ignore_ws) {
   return base64_decode(output, input.data(), input.length(), ignore_ws);
}

size_t base64_decode(std::span<uint8_t> output, std::string_view input, bool ignore_ws) {
   if(output.size() < base64_decode_max_output(input.size())) {
      throw Invalid_Argument("base64_decode: output buffer is too short");
   }
   return base64_decode(output.data(), input.data(), input.length(), ignore_ws);
}

secure_vector<uint8_t> base64_decode(const char input[], size_t input_length, bool ignore_ws) {
   return base_decode_to_vec<secure_vector<uint8_t>>(Base64(), input, input_length, ignore_ws);
}

secure_vector<uint8_t> base64_decode(std::string_view input, bool ignore_ws) {
   return base64_decode(input.data(), input.size(), ignore_ws);
}

size_t base64_encode_max_output(size_t input_length) {
   return Base64::encode_max_output(input_length);
}

size_t base64_decode_max_output(size_t input_length) {
   return Base64::decode_max_output(input_length);
}

}  // namespace Botan

Coverage Report

Created: 2024-11-29 06:10

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Base64 Encoding and Decoding
3		* (C) 2010,2015,2020 Jack Lloyd
4		*
5		* Botan is released under the Simplified BSD License (see license.txt)
6		*/
7
8		#include <botan/base64.h>
9
10		#include <botan/exceptn.h>
11		#include <botan/internal/charset.h>
12		#include <botan/internal/codec_base.h>
13		#include <botan/internal/ct_utils.h>
14		#include <botan/internal/fmt.h>
15		#include <botan/internal/int_utils.h>
16		#include <botan/internal/loadstor.h>
17		#include <botan/internal/rounding.h>
18
19		namespace Botan {
20
21		namespace {
22
23		class Base64 final {
24		public:
25	106	static std::string name() noexcept { return "base64"; }
26
27	0	static size_t encoding_bytes_in() noexcept { return m_encoding_bytes_in; }
28
29	0	static size_t encoding_bytes_out() noexcept { return m_encoding_bytes_out; }
30
31	11.6k	static size_t decoding_bytes_in() noexcept { return m_encoding_bytes_out; }
32
33	11.6k	static size_t decoding_bytes_out() noexcept { return m_encoding_bytes_in; }
34
35	0	static size_t bits_consumed() noexcept { return m_encoding_bits; }
36
37	0	static size_t remaining_bits_before_padding() noexcept { return m_remaining_bits_before_padding; }
38
39	0	static size_t encode_max_output(size_t input_length) {
40	0	return (round_up(input_length, m_encoding_bytes_in) / m_encoding_bytes_in) * m_encoding_bytes_out;
41	0	}
42
43	23.2k	static size_t decode_max_output(size_t input_length) {
44	23.2k	return (round_up(input_length, m_encoding_bytes_out) * m_encoding_bytes_in) / m_encoding_bytes_out;
45	23.2k	}
46
47		static void encode(char out[4], const uint8_t in[3]) noexcept;
48
49		static uint8_t lookup_binary_value(char input) noexcept;
50
51		static bool check_bad_char(uint8_t bin, char input, bool ignore_ws);
52
53	3.71M	static void decode(uint8_t* out_ptr, const uint8_t decode_buf[4]) {
54	3.71M	out_ptr[0] = (decode_buf[0] << 2) \| (decode_buf[1] >> 4);
55	3.71M	out_ptr[1] = (decode_buf[1] << 4) \| (decode_buf[2] >> 2);
56	3.71M	out_ptr[2] = (decode_buf[2] << 6) \| decode_buf[3];
57	3.71M	}
58
59	11.4k	static size_t bytes_to_remove(size_t final_truncate) { return final_truncate; }
60
61		private:
62		static const size_t m_encoding_bits = 6;
63		static const size_t m_remaining_bits_before_padding = 8;
64
65		static const size_t m_encoding_bytes_in = 3;
66		static const size_t m_encoding_bytes_out = 4;
67		};
68
69	0	uint32_t lookup_base64_chars(uint32_t x32) {
70		/*
71		* The basic insight of this approach is that our goal is computing
72		* f(x) = y where x is in [0,63) and y is the correct base64 encoding.
73		*
74		* Instead of doing this directly, we compute
75		* offset(x) such that f(x) = x + offset(x)
76		*
77		* This is described in
78		* http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html#improved-version
79		*
80		* Here we do a SWAR (simd within a register) implementation of Wojciech's lookup_version2_swar
81		*/
82
83	0	uint32_t r = x32 + 0x41414141;
84
85	0	r += (~swar_lt<uint32_t>(x32, 0x1A1A1A1A)) & 0x06060606;
86	0	r -= (~swar_lt<uint32_t>(x32, 0x34343434)) & 0x4B4B4B4B;
87	0	r -= (~swar_lt<uint32_t>(x32, 0x3E3E3E3E)) & 0x0F0F0F0F;
88	0	r += (~swar_lt<uint32_t>(x32, 0x3F3F3F3F)) & 0x03030303;
89
90	0	return r;
91	0	}
92
93		//static
94	0	void Base64::encode(char out[4], const uint8_t in[3]) noexcept {
95	0	const uint32_t b0 = (in[0] & 0xFC) >> 2;
96	0	const uint32_t b1 = ((in[0] & 0x03) << 4) \| (in[1] >> 4);
97	0	const uint32_t b2 = ((in[1] & 0x0F) << 2) \| (in[2] >> 6);
98	0	const uint32_t b3 = in[2] & 0x3F;
99
100	0	const uint32_t z = (b0 << 24) \| (b1 << 16) \| (b2 << 8) \| b3;
101
102	0	const uint32_t b64 = lookup_base64_chars(z);
103
104	0	out[0] = static_cast<char>(get_byte<0>(b64));
105	0	out[1] = static_cast<char>(get_byte<1>(b64));
106	0	out[2] = static_cast<char>(get_byte<2>(b64));
107	0	out[3] = static_cast<char>(get_byte<3>(b64));
108	0	}
109
110		//static
111	15.2M	uint8_t Base64::lookup_binary_value(char input) noexcept {
112	15.2M	auto has_zero_byte = [](uint64_t v) { return ((v - 0x0101010101010101) & ~(v) & 0x8080808080808080); };
113
114		// Assumes each byte is either 0x00 or 0x80
115	30.5M	auto index_of_first_set_byte = [](uint64_t v) {
116	30.5M	return ((((v - 1) & 0x0101010101010101) * 0x0101010101010101) >> 56) - 1;
117	30.5M	};
118
119	15.2M	constexpr uint64_t lo = 0x0101010101010101;
120
121	15.2M	const uint8_t x = static_cast<uint8_t>(input);
122
123	15.2M	const uint64_t x8 = x * lo;
124
125		// Defines the valid ASCII ranges of base64, except the special chars (below)
126	15.2M	constexpr uint64_t val_l = make_uint64(0, 0, 0, 0, 0, 'A', 'a', '0');
127	15.2M	constexpr uint64_t val_u = make_uint64(0, 0, 0, 0, 0, 26, 26, 10);
128
129		// If x is in one of the ranges return a mask. Otherwise we xor in at the
130		// high word which will be our invalid marker
131	15.2M	auto v_mask = swar_in_range<uint64_t>(x8, val_l, val_u) ^ 0x80000000;
132
133		// This is the offset added to x to get the value
134	15.2M	const uint64_t val_v = 0xbfb904 ^ (0xFF000000 - (x << 24));
135
136	15.2M	uint8_t z = x + static_cast<uint8_t>(val_v >> (8 * index_of_first_set_byte(v_mask)));
137
138		// Valid base64 special characters, and some whitespace chars
139	15.2M	constexpr uint64_t specials_i = make_uint64(0, '+', '/', '=', ' ', '\n', '\t', '\r');
140
141	15.2M	const uint64_t specials_v = 0x3e3f8180808080 ^ (static_cast<uint64_t>(z) << 56);
142
143	15.2M	const uint64_t smask = has_zero_byte(x8 ^ specials_i) ^ 0x8000000000000000;
144
145	15.2M	return static_cast<uint8_t>(specials_v >> (8 * index_of_first_set_byte(smask)));
146	15.2M	}
147
148		//static
149	15.2M	bool Base64::check_bad_char(uint8_t bin, char input, bool ignore_ws) {
150	15.2M	if(bin <= 0x3F) {
151	14.8M	return true;
152	14.8M	} else if(!(bin == 0x81 \|\| (bin == 0x80 && ignore_ws))) {
153	211	throw Invalid_Argument(fmt("base64_decode: invalid character '{}'", format_char_for_display(input)));
154	211	}
155	386k	return false;
156	15.2M	}
157
158		} // namespace
159
160	0	size_t base64_encode(char out[], const uint8_t in[], size_t input_length, size_t& input_consumed, bool final_inputs) {
161	0	return base_encode(Base64(), out, in, input_length, input_consumed, final_inputs);
162	0	}
163
164	0	std::string base64_encode(const uint8_t input[], size_t input_length) {
165	0	return base_encode_to_string(Base64(), input, input_length);
166	0	}
167
168		size_t base64_decode(
169	0	uint8_t out[], const char in[], size_t input_length, size_t& input_consumed, bool final_inputs, bool ignore_ws) {
170	0	return base_decode(Base64(), out, in, input_length, input_consumed, final_inputs, ignore_ws);
171	0	}
172
173	0	size_t base64_decode(uint8_t output[], const char input[], size_t input_length, bool ignore_ws) {
174	0	return base_decode_full(Base64(), output, input, input_length, ignore_ws);
175	0	}
176
177	0	size_t base64_decode(uint8_t output[], std::string_view input, bool ignore_ws) {
178	0	return base64_decode(output, input.data(), input.length(), ignore_ws);
179	0	}
180
181	0	size_t base64_decode(std::span<uint8_t> output, std::string_view input, bool ignore_ws) {
182	0	if(output.size() < base64_decode_max_output(input.size())) {
183	0	throw Invalid_Argument("base64_decode: output buffer is too short");
184	0	}
185	0	return base64_decode(output.data(), input.data(), input.length(), ignore_ws);
186	0	}
187
188	11.6k	secure_vector<uint8_t> base64_decode(const char input[], size_t input_length, bool ignore_ws) {
189	11.6k	return base_decode_to_vec<secure_vector<uint8_t>>(Base64(), input, input_length, ignore_ws);
190	11.6k	}
191
192	0	secure_vector<uint8_t> base64_decode(std::string_view input, bool ignore_ws) {
193	0	return base64_decode(input.data(), input.size(), ignore_ws);
194	0	}
195
196	0	size_t base64_encode_max_output(size_t input_length) {
197	0	return Base64::encode_max_output(input_length);
198	0	}
199
200	0	size_t base64_decode_max_output(size_t input_length) {
201	0	return Base64::decode_max_output(input_length);
202	0	}
203
204		} // namespace Botan