/src/botan/src/lib/stream/chacha/chacha.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * ChaCha |
3 | | * (C) 2014,2018,2023 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #include <botan/internal/chacha.h> |
9 | | |
10 | | #include <botan/exceptn.h> |
11 | | #include <botan/internal/cpuid.h> |
12 | | #include <botan/internal/fmt.h> |
13 | | #include <botan/internal/loadstor.h> |
14 | | #include <botan/internal/rotate.h> |
15 | | |
16 | | namespace Botan { |
17 | | |
18 | | namespace { |
19 | | |
20 | 19.9k | inline void chacha_quarter_round(uint32_t& a, uint32_t& b, uint32_t& c, uint32_t& d) { |
21 | 19.9k | a += b; |
22 | 19.9k | d ^= a; |
23 | 19.9k | d = rotl<16>(d); |
24 | 19.9k | c += d; |
25 | 19.9k | b ^= c; |
26 | 19.9k | b = rotl<12>(b); |
27 | 19.9k | a += b; |
28 | 19.9k | d ^= a; |
29 | 19.9k | d = rotl<8>(d); |
30 | 19.9k | c += d; |
31 | 19.9k | b ^= c; |
32 | 19.9k | b = rotl<7>(b); |
33 | 19.9k | } |
34 | | |
35 | | /* |
36 | | * Generate HChaCha cipher stream (for XChaCha IV setup) |
37 | | */ |
38 | 249 | void hchacha(uint32_t output[8], const uint32_t input[16], size_t rounds) { |
39 | 249 | BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds"); |
40 | | |
41 | 249 | uint32_t x00 = input[0], x01 = input[1], x02 = input[2], x03 = input[3], x04 = input[4], x05 = input[5], |
42 | 249 | x06 = input[6], x07 = input[7], x08 = input[8], x09 = input[9], x10 = input[10], x11 = input[11], |
43 | 249 | x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15]; |
44 | | |
45 | 2.73k | for(size_t i = 0; i != rounds / 2; ++i) { |
46 | 2.49k | chacha_quarter_round(x00, x04, x08, x12); |
47 | 2.49k | chacha_quarter_round(x01, x05, x09, x13); |
48 | 2.49k | chacha_quarter_round(x02, x06, x10, x14); |
49 | 2.49k | chacha_quarter_round(x03, x07, x11, x15); |
50 | | |
51 | 2.49k | chacha_quarter_round(x00, x05, x10, x15); |
52 | 2.49k | chacha_quarter_round(x01, x06, x11, x12); |
53 | 2.49k | chacha_quarter_round(x02, x07, x08, x13); |
54 | 2.49k | chacha_quarter_round(x03, x04, x09, x14); |
55 | 2.49k | } |
56 | | |
57 | 249 | output[0] = x00; |
58 | 249 | output[1] = x01; |
59 | 249 | output[2] = x02; |
60 | 249 | output[3] = x03; |
61 | 249 | output[4] = x12; |
62 | 249 | output[5] = x13; |
63 | 249 | output[6] = x14; |
64 | 249 | output[7] = x15; |
65 | 249 | } |
66 | | |
67 | | } // namespace |
68 | | |
69 | 442 | ChaCha::ChaCha(size_t rounds) : m_rounds(rounds) { |
70 | 442 | BOTAN_ARG_CHECK(m_rounds == 8 || m_rounds == 12 || m_rounds == 20, "ChaCha only supports 8, 12 or 20 rounds"); |
71 | 442 | } |
72 | | |
73 | 423 | size_t ChaCha::parallelism() { |
74 | 423 | #if defined(BOTAN_HAS_CHACHA_AVX512) |
75 | 423 | if(CPUID::has_avx512()) { |
76 | 0 | return 16; |
77 | 0 | } |
78 | 423 | #endif |
79 | | |
80 | 423 | #if defined(BOTAN_HAS_CHACHA_AVX2) |
81 | 423 | if(CPUID::has_avx2()) { |
82 | 423 | return 8; |
83 | 423 | } |
84 | 0 | #endif |
85 | | |
86 | 0 | return 4; |
87 | 423 | } |
88 | | |
89 | 0 | std::string ChaCha::provider() const { |
90 | 0 | #if defined(BOTAN_HAS_CHACHA_AVX512) |
91 | 0 | if(CPUID::has_avx512()) { |
92 | 0 | return "avx512"; |
93 | 0 | } |
94 | 0 | #endif |
95 | | |
96 | 0 | #if defined(BOTAN_HAS_CHACHA_AVX2) |
97 | 0 | if(CPUID::has_avx2()) { |
98 | 0 | return "avx2"; |
99 | 0 | } |
100 | 0 | #endif |
101 | | |
102 | 0 | #if defined(BOTAN_HAS_CHACHA_SIMD32) |
103 | 0 | if(CPUID::has_simd_32()) { |
104 | 0 | return "simd32"; |
105 | 0 | } |
106 | 0 | #endif |
107 | | |
108 | 0 | return "base"; |
109 | 0 | } |
110 | | |
111 | 1.03k | void ChaCha::chacha(uint8_t output[], size_t output_blocks, uint32_t state[16], size_t rounds) { |
112 | 1.03k | BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds"); |
113 | | |
114 | 1.03k | #if defined(BOTAN_HAS_CHACHA_AVX512) |
115 | 1.03k | if(CPUID::has_avx512()) { |
116 | 0 | while(output_blocks >= 16) { |
117 | 0 | ChaCha::chacha_avx512_x16(output, state, rounds); |
118 | 0 | output += 16 * 64; |
119 | 0 | output_blocks -= 16; |
120 | 0 | } |
121 | 0 | } |
122 | 1.03k | #endif |
123 | | |
124 | 1.03k | #if defined(BOTAN_HAS_CHACHA_AVX2) |
125 | 1.03k | if(CPUID::has_avx2()) { |
126 | 2.07k | while(output_blocks >= 8) { |
127 | 1.03k | ChaCha::chacha_avx2_x8(output, state, rounds); |
128 | 1.03k | output += 8 * 64; |
129 | 1.03k | output_blocks -= 8; |
130 | 1.03k | } |
131 | 1.03k | } |
132 | 1.03k | #endif |
133 | | |
134 | 1.03k | #if defined(BOTAN_HAS_CHACHA_SIMD32) |
135 | 1.03k | if(CPUID::has_simd_32()) { |
136 | 1.03k | while(output_blocks >= 4) { |
137 | 0 | ChaCha::chacha_simd32_x4(output, state, rounds); |
138 | 0 | output += 4 * 64; |
139 | 0 | output_blocks -= 4; |
140 | 0 | } |
141 | 1.03k | } |
142 | 1.03k | #endif |
143 | | |
144 | | // TODO interleave rounds |
145 | 1.03k | for(size_t i = 0; i != output_blocks; ++i) { |
146 | 0 | uint32_t x00 = state[0], x01 = state[1], x02 = state[2], x03 = state[3], x04 = state[4], x05 = state[5], |
147 | 0 | x06 = state[6], x07 = state[7], x08 = state[8], x09 = state[9], x10 = state[10], x11 = state[11], |
148 | 0 | x12 = state[12], x13 = state[13], x14 = state[14], x15 = state[15]; |
149 | |
|
150 | 0 | for(size_t r = 0; r != rounds / 2; ++r) { |
151 | 0 | chacha_quarter_round(x00, x04, x08, x12); |
152 | 0 | chacha_quarter_round(x01, x05, x09, x13); |
153 | 0 | chacha_quarter_round(x02, x06, x10, x14); |
154 | 0 | chacha_quarter_round(x03, x07, x11, x15); |
155 | |
|
156 | 0 | chacha_quarter_round(x00, x05, x10, x15); |
157 | 0 | chacha_quarter_round(x01, x06, x11, x12); |
158 | 0 | chacha_quarter_round(x02, x07, x08, x13); |
159 | 0 | chacha_quarter_round(x03, x04, x09, x14); |
160 | 0 | } |
161 | |
|
162 | 0 | x00 += state[0]; |
163 | 0 | x01 += state[1]; |
164 | 0 | x02 += state[2]; |
165 | 0 | x03 += state[3]; |
166 | 0 | x04 += state[4]; |
167 | 0 | x05 += state[5]; |
168 | 0 | x06 += state[6]; |
169 | 0 | x07 += state[7]; |
170 | 0 | x08 += state[8]; |
171 | 0 | x09 += state[9]; |
172 | 0 | x10 += state[10]; |
173 | 0 | x11 += state[11]; |
174 | 0 | x12 += state[12]; |
175 | 0 | x13 += state[13]; |
176 | 0 | x14 += state[14]; |
177 | 0 | x15 += state[15]; |
178 | |
|
179 | 0 | store_le(x00, output + 64 * i + 4 * 0); |
180 | 0 | store_le(x01, output + 64 * i + 4 * 1); |
181 | 0 | store_le(x02, output + 64 * i + 4 * 2); |
182 | 0 | store_le(x03, output + 64 * i + 4 * 3); |
183 | 0 | store_le(x04, output + 64 * i + 4 * 4); |
184 | 0 | store_le(x05, output + 64 * i + 4 * 5); |
185 | 0 | store_le(x06, output + 64 * i + 4 * 6); |
186 | 0 | store_le(x07, output + 64 * i + 4 * 7); |
187 | 0 | store_le(x08, output + 64 * i + 4 * 8); |
188 | 0 | store_le(x09, output + 64 * i + 4 * 9); |
189 | 0 | store_le(x10, output + 64 * i + 4 * 10); |
190 | 0 | store_le(x11, output + 64 * i + 4 * 11); |
191 | 0 | store_le(x12, output + 64 * i + 4 * 12); |
192 | 0 | store_le(x13, output + 64 * i + 4 * 13); |
193 | 0 | store_le(x14, output + 64 * i + 4 * 14); |
194 | 0 | store_le(x15, output + 64 * i + 4 * 15); |
195 | |
|
196 | 0 | state[12]++; |
197 | 0 | state[13] += (state[12] == 0); |
198 | 0 | } |
199 | 1.03k | } |
200 | | |
201 | | /* |
202 | | * Combine cipher stream with message |
203 | | */ |
204 | 1.40k | void ChaCha::cipher_bytes(const uint8_t in[], uint8_t out[], size_t length) { |
205 | 1.40k | assert_key_material_set(); |
206 | | |
207 | 1.66k | while(length >= m_buffer.size() - m_position) { |
208 | 260 | const size_t available = m_buffer.size() - m_position; |
209 | | |
210 | 260 | xor_buf(out, in, &m_buffer[m_position], available); |
211 | 260 | chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds); |
212 | | |
213 | 260 | length -= available; |
214 | 260 | in += available; |
215 | 260 | out += available; |
216 | 260 | m_position = 0; |
217 | 260 | } |
218 | | |
219 | 1.40k | xor_buf(out, in, &m_buffer[m_position], length); |
220 | | |
221 | 1.40k | m_position += length; |
222 | 1.40k | } |
223 | | |
224 | 0 | void ChaCha::generate_keystream(uint8_t out[], size_t length) { |
225 | 0 | assert_key_material_set(); |
226 | |
|
227 | 0 | while(length >= m_buffer.size() - m_position) { |
228 | 0 | const size_t available = m_buffer.size() - m_position; |
229 | | |
230 | | // TODO: this could write directly to the output buffer |
231 | | // instead of bouncing it through m_buffer first |
232 | 0 | copy_mem(out, &m_buffer[m_position], available); |
233 | 0 | chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds); |
234 | |
|
235 | 0 | length -= available; |
236 | 0 | out += available; |
237 | 0 | m_position = 0; |
238 | 0 | } |
239 | |
|
240 | 0 | copy_mem(out, &m_buffer[m_position], length); |
241 | |
|
242 | 0 | m_position += length; |
243 | 0 | } |
244 | | |
245 | 778 | void ChaCha::initialize_state() { |
246 | 778 | static const uint32_t TAU[] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574}; |
247 | | |
248 | 778 | static const uint32_t SIGMA[] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}; |
249 | | |
250 | 778 | m_state[4] = m_key[0]; |
251 | 778 | m_state[5] = m_key[1]; |
252 | 778 | m_state[6] = m_key[2]; |
253 | 778 | m_state[7] = m_key[3]; |
254 | | |
255 | 778 | if(m_key.size() == 4) { |
256 | 513 | m_state[0] = TAU[0]; |
257 | 513 | m_state[1] = TAU[1]; |
258 | 513 | m_state[2] = TAU[2]; |
259 | 513 | m_state[3] = TAU[3]; |
260 | | |
261 | 513 | m_state[8] = m_key[0]; |
262 | 513 | m_state[9] = m_key[1]; |
263 | 513 | m_state[10] = m_key[2]; |
264 | 513 | m_state[11] = m_key[3]; |
265 | 513 | } else { |
266 | 265 | m_state[0] = SIGMA[0]; |
267 | 265 | m_state[1] = SIGMA[1]; |
268 | 265 | m_state[2] = SIGMA[2]; |
269 | 265 | m_state[3] = SIGMA[3]; |
270 | | |
271 | 265 | m_state[8] = m_key[4]; |
272 | 265 | m_state[9] = m_key[5]; |
273 | 265 | m_state[10] = m_key[6]; |
274 | 265 | m_state[11] = m_key[7]; |
275 | 265 | } |
276 | | |
277 | 778 | m_state[12] = 0; |
278 | 778 | m_state[13] = 0; |
279 | 778 | m_state[14] = 0; |
280 | 778 | m_state[15] = 0; |
281 | | |
282 | 778 | m_position = 0; |
283 | 778 | } |
284 | | |
285 | 2.19k | bool ChaCha::has_keying_material() const { |
286 | 2.19k | return !m_state.empty(); |
287 | 2.19k | } |
288 | | |
289 | 0 | size_t ChaCha::buffer_size() const { |
290 | 0 | return 64; |
291 | 0 | } |
292 | | |
293 | | /* |
294 | | * ChaCha Key Schedule |
295 | | */ |
296 | 423 | void ChaCha::key_schedule(std::span<const uint8_t> key) { |
297 | 423 | m_key.resize(key.size() / 4); |
298 | 423 | load_le<uint32_t>(m_key.data(), key.data(), m_key.size()); |
299 | | |
300 | 423 | m_state.resize(16); |
301 | | |
302 | 423 | const size_t chacha_block = 64; |
303 | 423 | m_buffer.resize(parallelism() * chacha_block); |
304 | | |
305 | 423 | set_iv(nullptr, 0); |
306 | 423 | } |
307 | | |
308 | 0 | size_t ChaCha::default_iv_length() const { |
309 | 0 | return 24; |
310 | 0 | } |
311 | | |
312 | 865 | Key_Length_Specification ChaCha::key_spec() const { |
313 | 865 | return Key_Length_Specification(16, 32, 16); |
314 | 865 | } |
315 | | |
316 | 0 | std::unique_ptr<StreamCipher> ChaCha::new_object() const { |
317 | 0 | return std::make_unique<ChaCha>(m_rounds); |
318 | 0 | } |
319 | | |
320 | 791 | bool ChaCha::valid_iv_length(size_t iv_len) const { |
321 | 791 | return (iv_len == 0 || iv_len == 8 || iv_len == 12 || iv_len == 24); |
322 | 791 | } |
323 | | |
324 | 791 | void ChaCha::set_iv_bytes(const uint8_t iv[], size_t length) { |
325 | 791 | assert_key_material_set(); |
326 | | |
327 | 791 | if(!valid_iv_length(length)) { |
328 | 13 | throw Invalid_IV_Length(name(), length); |
329 | 13 | } |
330 | | |
331 | 778 | initialize_state(); |
332 | | |
333 | 778 | if(length == 0) { |
334 | | // Treat zero length IV same as an all-zero IV |
335 | 423 | m_state[14] = 0; |
336 | 423 | m_state[15] = 0; |
337 | 423 | } else if(length == 8) { |
338 | 78 | m_state[14] = load_le<uint32_t>(iv, 0); |
339 | 78 | m_state[15] = load_le<uint32_t>(iv, 1); |
340 | 277 | } else if(length == 12) { |
341 | 28 | m_state[13] = load_le<uint32_t>(iv, 0); |
342 | 28 | m_state[14] = load_le<uint32_t>(iv, 1); |
343 | 28 | m_state[15] = load_le<uint32_t>(iv, 2); |
344 | 249 | } else if(length == 24) { |
345 | 249 | m_state[12] = load_le<uint32_t>(iv, 0); |
346 | 249 | m_state[13] = load_le<uint32_t>(iv, 1); |
347 | 249 | m_state[14] = load_le<uint32_t>(iv, 2); |
348 | 249 | m_state[15] = load_le<uint32_t>(iv, 3); |
349 | | |
350 | 249 | secure_vector<uint32_t> hc(8); |
351 | 249 | hchacha(hc.data(), m_state.data(), m_rounds); |
352 | | |
353 | 249 | m_state[4] = hc[0]; |
354 | 249 | m_state[5] = hc[1]; |
355 | 249 | m_state[6] = hc[2]; |
356 | 249 | m_state[7] = hc[3]; |
357 | 249 | m_state[8] = hc[4]; |
358 | 249 | m_state[9] = hc[5]; |
359 | 249 | m_state[10] = hc[6]; |
360 | 249 | m_state[11] = hc[7]; |
361 | 249 | m_state[12] = 0; |
362 | 249 | m_state[13] = 0; |
363 | 249 | m_state[14] = load_le<uint32_t>(iv, 4); |
364 | 249 | m_state[15] = load_le<uint32_t>(iv, 5); |
365 | 249 | } |
366 | | |
367 | 778 | chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds); |
368 | 778 | m_position = 0; |
369 | 778 | } |
370 | | |
371 | 0 | void ChaCha::clear() { |
372 | 0 | zap(m_key); |
373 | 0 | zap(m_state); |
374 | 0 | zap(m_buffer); |
375 | 0 | m_position = 0; |
376 | 0 | } |
377 | | |
378 | 32 | std::string ChaCha::name() const { |
379 | 32 | return fmt("ChaCha({})", m_rounds); |
380 | 32 | } |
381 | | |
382 | 0 | void ChaCha::seek(uint64_t offset) { |
383 | 0 | assert_key_material_set(); |
384 | | |
385 | | // Find the block offset |
386 | 0 | const uint64_t counter = offset / 64; |
387 | |
|
388 | 0 | uint8_t out[8]; |
389 | |
|
390 | 0 | store_le(counter, out); |
391 | |
|
392 | 0 | m_state[12] = load_le<uint32_t>(out, 0); |
393 | 0 | m_state[13] += load_le<uint32_t>(out, 1); |
394 | |
|
395 | 0 | chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds); |
396 | 0 | m_position = offset % 64; |
397 | 0 | } |
398 | | } // namespace Botan |