/src/wolfssl/wolfcrypt/src/chacha.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* chacha.c |
2 | | * |
3 | | * Copyright (C) 2006-2023 wolfSSL Inc. |
4 | | * |
5 | | * This file is part of wolfSSL. |
6 | | * |
7 | | * wolfSSL is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * wolfSSL is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA |
20 | | */ |
21 | | /* |
22 | | |
23 | | DESCRIPTION |
24 | | This library contains implementation for the ChaCha20 stream cipher. |
25 | | |
26 | | Based from chacha-ref.c version 20080118 |
27 | | D. J. Bernstein |
28 | | Public domain. |
29 | | |
30 | | */ |
31 | | |
32 | | #ifdef HAVE_CONFIG_H |
33 | | #include <config.h> |
34 | | #endif |
35 | | |
36 | | #include <wolfssl/wolfcrypt/settings.h> |
37 | | |
38 | | #if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON) |
39 | | /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */ |
40 | | |
41 | | #else |
42 | | #if defined(HAVE_CHACHA) |
43 | | |
44 | | #include <wolfssl/wolfcrypt/chacha.h> |
45 | | #include <wolfssl/wolfcrypt/error-crypt.h> |
46 | | #include <wolfssl/wolfcrypt/logging.h> |
47 | | #include <wolfssl/wolfcrypt/cpuid.h> |
48 | | #ifdef NO_INLINE |
49 | | #include <wolfssl/wolfcrypt/misc.h> |
50 | | #else |
51 | | #define WOLFSSL_MISC_INCLUDED |
52 | | #include <wolfcrypt/src/misc.c> |
53 | | #endif |
54 | | |
55 | | #ifdef CHACHA_AEAD_TEST |
56 | | #include <stdio.h> |
57 | | #endif |
58 | | |
59 | | #ifdef USE_INTEL_CHACHA_SPEEDUP |
60 | | #include <emmintrin.h> |
61 | | #include <immintrin.h> |
62 | | |
63 | | #if defined(__GNUC__) && ((__GNUC__ < 4) || \ |
64 | | (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) |
65 | | #undef NO_AVX2_SUPPORT |
66 | | #define NO_AVX2_SUPPORT |
67 | | #endif |
68 | | #if defined(__clang__) && ((__clang_major__ < 3) || \ |
69 | | (__clang_major__ == 3 && __clang_minor__ <= 5)) |
70 | | #undef NO_AVX2_SUPPORT |
71 | | #define NO_AVX2_SUPPORT |
72 | | #elif defined(__clang__) && defined(NO_AVX2_SUPPORT) |
73 | | #undef NO_AVX2_SUPPORT |
74 | | #endif |
75 | | |
76 | | #ifndef NO_AVX2_SUPPORT |
77 | | #define HAVE_INTEL_AVX2 |
78 | | #endif |
79 | | |
80 | | static int cpuidFlagsSet = 0; |
81 | | static word32 cpuidFlags = 0; |
82 | | #endif |
83 | | |
84 | | #ifdef BIG_ENDIAN_ORDER |
85 | | #define LITTLE32(x) ByteReverseWord32(x) |
86 | | #else |
87 | 0 | #define LITTLE32(x) (x) |
88 | | #endif |
89 | | |
90 | | /* Number of rounds */ |
91 | 0 | #define ROUNDS 20 |
92 | | |
93 | 0 | #define U32C(v) (v##U) |
94 | 0 | #define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF)) |
95 | 0 | #define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0]) |
96 | | |
97 | 0 | #define ROTATE(v,c) rotlFixed(v, c) |
98 | | #define XOR(v,w) ((v) ^ (w)) |
99 | 0 | #define PLUS(v,w) (U32V((v) + (w))) |
100 | 0 | #define PLUSONE(v) (PLUS((v),1)) |
101 | | |
102 | | #define QUARTERROUND(a,b,c,d) \ |
103 | 0 | x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ |
104 | 0 | x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ |
105 | 0 | x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ |
106 | 0 | x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); |
107 | | |
108 | | |
109 | | /** |
110 | | * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version |
111 | | * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB. |
112 | | */ |
113 | | int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter) |
114 | 0 | { |
115 | 0 | word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */ |
116 | | |
117 | |
|
118 | 0 | if (ctx == NULL || inIv == NULL) |
119 | 0 | return BAD_FUNC_ARG; |
120 | | |
121 | 0 | XMEMCPY(temp, inIv, CHACHA_IV_BYTES); |
122 | |
|
123 | 0 | ctx->left = 0; /* resets state */ |
124 | 0 | ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter; /* block counter */ |
125 | 0 | ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]); /* fixed variable from nonce */ |
126 | 0 | ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]); /* counter from nonce */ |
127 | 0 | ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]); /* counter from nonce */ |
128 | |
|
129 | 0 | return 0; |
130 | 0 | } |
131 | | |
132 | | /* "expand 32-byte k" as unsigned 32 byte */ |
133 | | static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}; |
134 | | /* "expand 16-byte k" as unsigned 16 byte */ |
135 | | static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574}; |
136 | | |
137 | | /** |
138 | | * Key setup. 8 word iv (nonce) |
139 | | */ |
140 | | int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz) |
141 | 0 | { |
142 | 0 | const word32* constants; |
143 | 0 | const byte* k; |
144 | |
|
145 | | #ifdef XSTREAM_ALIGN |
146 | | word32 alignKey[8]; |
147 | | #endif |
148 | |
|
149 | 0 | if (ctx == NULL || key == NULL) |
150 | 0 | return BAD_FUNC_ARG; |
151 | | |
152 | 0 | if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ) |
153 | 0 | return BAD_FUNC_ARG; |
154 | | |
155 | | #ifdef XSTREAM_ALIGN |
156 | | if ((wc_ptr_t)key % 4) { |
157 | | WOLFSSL_MSG("wc_ChachaSetKey unaligned key"); |
158 | | XMEMCPY(alignKey, key, keySz); |
159 | | k = (byte*)alignKey; |
160 | | } |
161 | | else { |
162 | | k = key; |
163 | | } |
164 | | #else |
165 | 0 | k = key; |
166 | 0 | #endif /* XSTREAM_ALIGN */ |
167 | |
|
168 | | #ifdef CHACHA_AEAD_TEST |
169 | | word32 i; |
170 | | printf("ChaCha key used :\n"); |
171 | | for (i = 0; i < keySz; i++) { |
172 | | printf("%02x", key[i]); |
173 | | if ((i + 1) % 8 == 0) |
174 | | printf("\n"); |
175 | | } |
176 | | printf("\n\n"); |
177 | | #endif |
178 | |
|
179 | 0 | ctx->X[4] = U8TO32_LITTLE(k + 0); |
180 | 0 | ctx->X[5] = U8TO32_LITTLE(k + 4); |
181 | 0 | ctx->X[6] = U8TO32_LITTLE(k + 8); |
182 | 0 | ctx->X[7] = U8TO32_LITTLE(k + 12); |
183 | 0 | if (keySz == CHACHA_MAX_KEY_SZ) { |
184 | 0 | k += 16; |
185 | 0 | constants = sigma; |
186 | 0 | } |
187 | 0 | else { |
188 | 0 | constants = tau; |
189 | 0 | } |
190 | 0 | ctx->X[ 8] = U8TO32_LITTLE(k + 0); |
191 | 0 | ctx->X[ 9] = U8TO32_LITTLE(k + 4); |
192 | 0 | ctx->X[10] = U8TO32_LITTLE(k + 8); |
193 | 0 | ctx->X[11] = U8TO32_LITTLE(k + 12); |
194 | 0 | ctx->X[ 0] = constants[0]; |
195 | 0 | ctx->X[ 1] = constants[1]; |
196 | 0 | ctx->X[ 2] = constants[2]; |
197 | 0 | ctx->X[ 3] = constants[3]; |
198 | 0 | ctx->left = 0; /* resets state */ |
199 | |
|
200 | 0 | return 0; |
201 | 0 | } |
202 | | |
203 | | /** |
204 | | * Converts word into bytes with rotations having been done. |
205 | | */ |
206 | | static WC_INLINE void wc_Chacha_wordtobyte(word32 x[CHACHA_CHUNK_WORDS], |
207 | | word32 state[CHACHA_CHUNK_WORDS]) |
208 | 0 | { |
209 | 0 | word32 i; |
210 | |
|
211 | 0 | XMEMCPY(x, state, CHACHA_CHUNK_BYTES); |
212 | |
|
213 | 0 | for (i = (ROUNDS); i > 0; i -= 2) { |
214 | 0 | QUARTERROUND(0, 4, 8, 12) |
215 | 0 | QUARTERROUND(1, 5, 9, 13) |
216 | 0 | QUARTERROUND(2, 6, 10, 14) |
217 | 0 | QUARTERROUND(3, 7, 11, 15) |
218 | 0 | QUARTERROUND(0, 5, 10, 15) |
219 | 0 | QUARTERROUND(1, 6, 11, 12) |
220 | 0 | QUARTERROUND(2, 7, 8, 13) |
221 | 0 | QUARTERROUND(3, 4, 9, 14) |
222 | 0 | } |
223 | |
|
224 | 0 | for (i = 0; i < CHACHA_CHUNK_WORDS; i++) { |
225 | 0 | x[i] = PLUS(x[i], state[i]); |
226 | | #ifdef BIG_ENDIAN_ORDER |
227 | | x[i] = LITTLE32(x[i]); |
228 | | #endif |
229 | 0 | } |
230 | 0 | } |
231 | | |
232 | | |
233 | | #ifdef HAVE_XCHACHA |
234 | | |
235 | | /* |
236 | | * wc_HChacha_block - half a ChaCha block, for XChaCha |
237 | | * |
238 | | * see https://tools.ietf.org/html/draft-arciszewski-xchacha-03 |
239 | | */ |
240 | | static WC_INLINE void wc_HChacha_block(ChaCha* ctx, word32 stream[CHACHA_CHUNK_WORDS/2], word32 nrounds) |
241 | 0 | { |
242 | 0 | word32 x[CHACHA_CHUNK_WORDS]; |
243 | 0 | word32 i; |
244 | |
|
245 | 0 | for (i = 0; i < CHACHA_CHUNK_WORDS; i++) { |
246 | 0 | x[i] = ctx->X[i]; |
247 | 0 | } |
248 | |
|
249 | 0 | for (i = nrounds; i > 0; i -= 2) { |
250 | 0 | QUARTERROUND(0, 4, 8, 12) |
251 | 0 | QUARTERROUND(1, 5, 9, 13) |
252 | 0 | QUARTERROUND(2, 6, 10, 14) |
253 | 0 | QUARTERROUND(3, 7, 11, 15) |
254 | 0 | QUARTERROUND(0, 5, 10, 15) |
255 | 0 | QUARTERROUND(1, 6, 11, 12) |
256 | 0 | QUARTERROUND(2, 7, 8, 13) |
257 | 0 | QUARTERROUND(3, 4, 9, 14) |
258 | 0 | } |
259 | |
|
260 | 0 | for (i = 0; i < CHACHA_CHUNK_WORDS/4; ++i) |
261 | 0 | stream[i] = x[i]; |
262 | 0 | for (i = CHACHA_CHUNK_WORDS/4; i < CHACHA_CHUNK_WORDS/2; ++i) |
263 | 0 | stream[i] = x[i + CHACHA_CHUNK_WORDS/2]; |
264 | 0 | } |
265 | | |
266 | | /* XChaCha -- https://tools.ietf.org/html/draft-arciszewski-xchacha-03 */ |
267 | | int wc_XChacha_SetKey(ChaCha *ctx, |
268 | | const byte *key, word32 keySz, |
269 | | const byte *nonce, word32 nonceSz, |
270 | 0 | word32 counter) { |
271 | 0 | word32 k[CHACHA_MAX_KEY_SZ]; |
272 | 0 | byte iv[CHACHA_IV_BYTES]; |
273 | 0 | int ret; |
274 | |
|
275 | 0 | if (nonceSz != XCHACHA_NONCE_BYTES) |
276 | 0 | return BAD_FUNC_ARG; |
277 | | |
278 | 0 | if ((ret = wc_Chacha_SetKey(ctx, key, keySz)) < 0) |
279 | 0 | return ret; |
280 | | |
281 | | /* form a first chacha IV from the first 16 bytes of the nonce. |
282 | | * the first word is supplied in the "counter" arg, and |
283 | | * the result is a full 128 bit nonceful IV for the one-time block |
284 | | * crypto op that follows. |
285 | | */ |
286 | 0 | if ((ret = wc_Chacha_SetIV(ctx, nonce + 4, U8TO32_LITTLE(nonce))) < 0) |
287 | 0 | return ret; |
288 | | |
289 | 0 | wc_HChacha_block(ctx, k, 20); /* 20 rounds, but keeping half the output. */ |
290 | | |
291 | | /* the HChacha output is used as a 256 bit key for the main cipher. */ |
292 | 0 | XMEMCPY(&ctx->X[4], k, 8 * sizeof(word32)); |
293 | | |
294 | | /* use 8 bytes from the end of the 24 byte nonce, padded up to 12 bytes, |
295 | | * to form the IV for the main cipher. |
296 | | */ |
297 | 0 | XMEMSET(iv, 0, 4); |
298 | 0 | XMEMCPY(iv + 4, nonce + 16, 8); |
299 | |
|
300 | 0 | if ((ret = wc_Chacha_SetIV(ctx, iv, counter)) < 0) |
301 | 0 | return ret; |
302 | | |
303 | 0 | ForceZero(k, sizeof k); |
304 | 0 | ForceZero(iv, sizeof iv); |
305 | |
|
306 | 0 | return 0; |
307 | 0 | } |
308 | | |
309 | | #endif /* HAVE_XCHACHA */ |
310 | | |
311 | | |
312 | | #ifdef __cplusplus |
313 | | extern "C" { |
314 | | #endif |
315 | | |
316 | | extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c, |
317 | | word32 bytes); |
318 | | extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c, |
319 | | word32 bytes); |
320 | | extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c, |
321 | | word32 bytes); |
322 | | |
323 | | #ifdef __cplusplus |
324 | | } /* extern "C" */ |
325 | | #endif |
326 | | |
327 | | |
328 | | /** |
329 | | * Encrypt a stream of bytes |
330 | | */ |
331 | | static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c, |
332 | | word32 bytes) |
333 | 0 | { |
334 | 0 | union { |
335 | 0 | byte state[CHACHA_CHUNK_BYTES]; |
336 | 0 | word32 state32[CHACHA_CHUNK_WORDS]; |
337 | 0 | wolfssl_word align_word; /* align for xorbufout */ |
338 | 0 | } tmp; |
339 | | |
340 | | /* handle left overs */ |
341 | 0 | if (bytes > 0 && ctx->left > 0) { |
342 | 0 | word32 processed = min(bytes, ctx->left); |
343 | 0 | wc_Chacha_wordtobyte(tmp.state32, ctx->X); /* recreate the stream */ |
344 | 0 | xorbufout(c, m, tmp.state + CHACHA_CHUNK_BYTES - ctx->left, processed); |
345 | 0 | ctx->left -= processed; |
346 | | |
347 | | /* Used up all of the stream that was left, increment the counter */ |
348 | 0 | if (ctx->left == 0) { |
349 | 0 | ctx->X[CHACHA_MATRIX_CNT_IV] = |
350 | 0 | PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]); |
351 | 0 | } |
352 | 0 | bytes -= processed; |
353 | 0 | c += processed; |
354 | 0 | m += processed; |
355 | 0 | } |
356 | |
|
357 | 0 | while (bytes >= CHACHA_CHUNK_BYTES) { |
358 | 0 | wc_Chacha_wordtobyte(tmp.state32, ctx->X); |
359 | 0 | ctx->X[CHACHA_MATRIX_CNT_IV] = PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]); |
360 | 0 | xorbufout(c, m, tmp.state, CHACHA_CHUNK_BYTES); |
361 | 0 | bytes -= CHACHA_CHUNK_BYTES; |
362 | 0 | c += CHACHA_CHUNK_BYTES; |
363 | 0 | m += CHACHA_CHUNK_BYTES; |
364 | 0 | } |
365 | |
|
366 | 0 | if (bytes) { |
367 | | /* in this case there will always be some left over since bytes is less |
368 | | * than CHACHA_CHUNK_BYTES, so do not increment counter after getting |
369 | | * stream in order for the stream to be recreated on next call */ |
370 | 0 | wc_Chacha_wordtobyte(tmp.state32, ctx->X); |
371 | 0 | xorbufout(c, m, tmp.state, bytes); |
372 | 0 | ctx->left = CHACHA_CHUNK_BYTES - bytes; |
373 | 0 | } |
374 | 0 | } |
375 | | |
376 | | /** |
377 | | * API to encrypt/decrypt a message of any size. |
378 | | */ |
379 | | int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input, |
380 | | word32 msglen) |
381 | 0 | { |
382 | 0 | if (ctx == NULL || input == NULL || output == NULL) |
383 | 0 | return BAD_FUNC_ARG; |
384 | | |
385 | | #ifdef USE_INTEL_CHACHA_SPEEDUP |
386 | | /* handle left overs */ |
387 | | if (msglen > 0 && ctx->left > 0) { |
388 | | byte* out; |
389 | | word32 processed = min(msglen, ctx->left); |
390 | | |
391 | | out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left; |
392 | | xorbufout(output, input, out, processed); |
393 | | ctx->left -= processed; |
394 | | msglen -= processed; |
395 | | output += processed; |
396 | | input += processed; |
397 | | } |
398 | | |
399 | | if (msglen == 0) { |
400 | | return 0; |
401 | | } |
402 | | |
403 | | if (!cpuidFlagsSet) { |
404 | | cpuidFlags = cpuid_get_flags(); |
405 | | cpuidFlagsSet = 1; |
406 | | } |
407 | | |
408 | | #ifdef HAVE_INTEL_AVX2 |
409 | | if (IS_INTEL_AVX2(cpuidFlags)) { |
410 | | SAVE_VECTOR_REGISTERS(return _svr_ret;); |
411 | | chacha_encrypt_avx2(ctx, input, output, msglen); |
412 | | RESTORE_VECTOR_REGISTERS(); |
413 | | return 0; |
414 | | } |
415 | | #endif |
416 | | if (IS_INTEL_AVX1(cpuidFlags)) { |
417 | | SAVE_VECTOR_REGISTERS(return _svr_ret;); |
418 | | chacha_encrypt_avx1(ctx, input, output, msglen); |
419 | | RESTORE_VECTOR_REGISTERS(); |
420 | | return 0; |
421 | | } |
422 | | else { |
423 | | chacha_encrypt_x64(ctx, input, output, msglen); |
424 | | return 0; |
425 | | } |
426 | | #endif |
427 | 0 | wc_Chacha_encrypt_bytes(ctx, input, output, msglen); |
428 | |
|
429 | 0 | return 0; |
430 | 0 | } |
431 | | |
432 | 0 | void wc_Chacha_purge_current_block(ChaCha* ctx) { |
433 | 0 | if (ctx->left > 0) { |
434 | 0 | byte scratch[CHACHA_CHUNK_BYTES]; |
435 | 0 | XMEMSET(scratch, 0, sizeof(scratch)); |
436 | 0 | (void)wc_Chacha_Process(ctx, scratch, scratch, CHACHA_CHUNK_BYTES - ctx->left); |
437 | 0 | } |
438 | 0 | } |
439 | | |
440 | | #endif /* HAVE_CHACHA */ |
441 | | |
442 | | #endif /* WOLFSSL_ARMASM && !WOLFSSL_ARMASM_NO_NEON */ |