/src/libgcrypt/cipher/chacha20.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* chacha20.c - Bernstein's ChaCha20 cipher |
2 | | * Copyright (C) 2014,2017-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
3 | | * |
4 | | * This file is part of Libgcrypt. |
5 | | * |
6 | | * Libgcrypt is free software; you can redistribute it and/or modify |
7 | | * it under the terms of the GNU Lesser general Public License as |
8 | | * published by the Free Software Foundation; either version 2.1 of |
9 | | * the License, or (at your option) any later version. |
10 | | * |
11 | | * Libgcrypt is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with this program; if not, see <http://www.gnu.org/licenses/>. |
18 | | * |
19 | | * For a description of the algorithm, see: |
20 | | * http://cr.yp.to/chacha.html |
21 | | */ |
22 | | |
23 | | /* |
24 | | * Based on D. J. Bernstein reference implementation at |
25 | | * http://cr.yp.to/chacha.html: |
26 | | * |
27 | | * chacha-regs.c version 20080118 |
28 | | * D. J. Bernstein |
29 | | * Public domain. |
30 | | */ |
31 | | |
32 | | #include <config.h> |
33 | | #include <stdio.h> |
34 | | #include <stdlib.h> |
35 | | #include <string.h> |
36 | | #include "types.h" |
37 | | #include "g10lib.h" |
38 | | #include "cipher.h" |
39 | | #include "cipher-internal.h" |
40 | | #include "bufhelp.h" |
41 | | |
42 | | |
43 | 0 | #define CHACHA20_MIN_KEY_SIZE 16 /* Bytes. */ |
44 | 0 | #define CHACHA20_MAX_KEY_SIZE 32 /* Bytes. */ |
45 | 0 | #define CHACHA20_BLOCK_SIZE 64 /* Bytes. */ |
46 | 0 | #define CHACHA20_MIN_IV_SIZE 8 /* Bytes. */ |
47 | 0 | #define CHACHA20_MAX_IV_SIZE 12 /* Bytes. */ |
48 | 0 | #define CHACHA20_CTR_SIZE 16 /* Bytes. */ |
49 | | |
50 | | |
51 | | /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ |
52 | | #undef USE_SSSE3 |
53 | | #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ |
54 | | (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
55 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
56 | | # define USE_SSSE3 1 |
57 | | #endif |
58 | | |
59 | | /* USE_AVX2 indicates whether to compile with Intel AVX2 code. */ |
60 | | #undef USE_AVX2 |
61 | | #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ |
62 | | (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
63 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
64 | | # define USE_AVX2 1 |
65 | | #endif |
66 | | |
67 | | /* USE_AVX512 indicates whether to compile with Intel AVX512 code. */ |
68 | | #undef USE_AVX512 |
69 | | #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX512) && \ |
70 | | (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
71 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
72 | | # define USE_AVX512 1 |
73 | | #endif |
74 | | |
75 | | /* USE_ARMV7_NEON indicates whether to enable ARMv7 NEON assembly code. */ |
76 | | #undef USE_ARMV7_NEON |
77 | | #ifdef ENABLE_NEON_SUPPORT |
78 | | # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ |
79 | | && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ |
80 | | && defined(HAVE_GCC_INLINE_ASM_NEON) |
81 | | # define USE_ARMV7_NEON 1 |
82 | | # endif |
83 | | #endif |
84 | | |
85 | | /* USE_AARCH64_SIMD indicates whether to enable ARMv8 SIMD assembly |
86 | | * code. */ |
87 | | #undef USE_AARCH64_SIMD |
88 | | #ifdef ENABLE_NEON_SUPPORT |
89 | | # if defined(__AARCH64EL__) \ |
90 | | && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ |
91 | | && defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) |
92 | | # define USE_AARCH64_SIMD 1 |
93 | | # endif |
94 | | #endif |
95 | | |
96 | | /* USE_PPC_VEC indicates whether to enable PowerPC vector |
97 | | * accelerated code. */ |
98 | | #undef USE_PPC_VEC |
99 | | #ifdef ENABLE_PPC_CRYPTO_SUPPORT |
100 | | # if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ |
101 | | defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) |
102 | | # if __GNUC__ >= 4 |
103 | | # define USE_PPC_VEC 1 |
104 | | # endif |
105 | | # endif |
106 | | #endif |
107 | | |
108 | | /* USE_S390X_VX indicates whether to enable zSeries code. */ |
109 | | #undef USE_S390X_VX |
110 | | #if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9 |
111 | | # if defined(HAVE_GCC_INLINE_ASM_S390X_VX) |
112 | | # define USE_S390X_VX 1 |
113 | | # endif /* USE_S390X_VX */ |
114 | | #endif |
115 | | |
116 | | /* Assembly implementations use SystemV ABI, ABI conversion and additional |
117 | | * stack to store XMM6-XMM15 needed on Win64. */ |
118 | | #undef ASM_FUNC_ABI |
119 | | #undef ASM_EXTRA_STACK |
120 | | #if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) |
121 | | # define ASM_FUNC_ABI __attribute__((sysv_abi)) |
122 | | #else |
123 | | # define ASM_FUNC_ABI |
124 | | #endif |
125 | | |
126 | | |
127 | | typedef struct CHACHA20_context_s |
128 | | { |
129 | | u32 input[16]; |
130 | | unsigned char pad[CHACHA20_BLOCK_SIZE]; |
131 | | unsigned int unused; /* bytes in the pad. */ |
132 | | unsigned int use_ssse3:1; |
133 | | unsigned int use_avx2:1; |
134 | | unsigned int use_avx512:1; |
135 | | unsigned int use_neon:1; |
136 | | unsigned int use_ppc:1; |
137 | | unsigned int use_p10:1; |
138 | | unsigned int use_s390x:1; |
139 | | } CHACHA20_context_t; |
140 | | |
141 | | |
142 | | #ifdef USE_SSSE3 |
143 | | |
144 | | unsigned int _gcry_chacha20_amd64_ssse3_blocks4(u32 *state, byte *dst, |
145 | | const byte *src, |
146 | | size_t nblks) ASM_FUNC_ABI; |
147 | | |
148 | | unsigned int _gcry_chacha20_amd64_ssse3_blocks1(u32 *state, byte *dst, |
149 | | const byte *src, |
150 | | size_t nblks) ASM_FUNC_ABI; |
151 | | |
152 | | unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks4( |
153 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
154 | | void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; |
155 | | |
156 | | unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks1( |
157 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
158 | | void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; |
159 | | |
160 | | #endif /* USE_SSSE3 */ |
161 | | |
162 | | #ifdef USE_AVX2 |
163 | | |
164 | | unsigned int _gcry_chacha20_amd64_avx2_blocks8(u32 *state, byte *dst, |
165 | | const byte *src, |
166 | | size_t nblks) ASM_FUNC_ABI; |
167 | | |
168 | | unsigned int _gcry_chacha20_poly1305_amd64_avx2_blocks8( |
169 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
170 | | void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; |
171 | | |
172 | | #endif /* USE_AVX2 */ |
173 | | |
174 | | #ifdef USE_AVX512 |
175 | | |
176 | | unsigned int _gcry_chacha20_amd64_avx512_blocks16(u32 *state, byte *dst, |
177 | | const byte *src, |
178 | | size_t nblks) ASM_FUNC_ABI; |
179 | | |
180 | | #endif /* USE_AVX2 */ |
181 | | |
182 | | #ifdef USE_PPC_VEC |
183 | | |
184 | | #ifndef WORDS_BIGENDIAN |
185 | | unsigned int _gcry_chacha20_p10le_8x(u32 *state, byte *dst, |
186 | | const byte *src, |
187 | | size_t len); |
188 | | #endif |
189 | | |
190 | | unsigned int _gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, |
191 | | const byte *src, |
192 | | size_t nblks); |
193 | | |
194 | | unsigned int _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, |
195 | | const byte *src, |
196 | | size_t nblks); |
197 | | |
198 | | #undef USE_PPC_VEC_POLY1305 |
199 | | #if SIZEOF_UNSIGNED_LONG == 8 |
200 | | #define USE_PPC_VEC_POLY1305 1 |
201 | | unsigned int _gcry_chacha20_poly1305_ppc8_blocks4( |
202 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
203 | | POLY1305_STATE *st, const byte *poly1305_src); |
204 | | #endif /* SIZEOF_UNSIGNED_LONG == 8 */ |
205 | | |
206 | | #endif /* USE_PPC_VEC */ |
207 | | |
208 | | #ifdef USE_S390X_VX |
209 | | |
210 | | unsigned int _gcry_chacha20_s390x_vx_blocks8(u32 *state, byte *dst, |
211 | | const byte *src, size_t nblks); |
212 | | |
213 | | unsigned int _gcry_chacha20_s390x_vx_blocks4_2_1(u32 *state, byte *dst, |
214 | | const byte *src, size_t nblks); |
215 | | |
216 | | #undef USE_S390X_VX_POLY1305 |
217 | | #if SIZEOF_UNSIGNED_LONG == 8 |
218 | | #define USE_S390X_VX_POLY1305 1 |
219 | | unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks8( |
220 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
221 | | POLY1305_STATE *st, const byte *poly1305_src); |
222 | | |
223 | | unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( |
224 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
225 | | POLY1305_STATE *st, const byte *poly1305_src); |
226 | | #endif /* SIZEOF_UNSIGNED_LONG == 8 */ |
227 | | |
228 | | #endif /* USE_S390X_VX */ |
229 | | |
230 | | #ifdef USE_ARMV7_NEON |
231 | | |
232 | | unsigned int _gcry_chacha20_armv7_neon_blocks4(u32 *state, byte *dst, |
233 | | const byte *src, |
234 | | size_t nblks); |
235 | | |
236 | | #endif /* USE_ARMV7_NEON */ |
237 | | |
238 | | #ifdef USE_AARCH64_SIMD |
239 | | |
240 | | unsigned int _gcry_chacha20_aarch64_blocks4(u32 *state, byte *dst, |
241 | | const byte *src, size_t nblks); |
242 | | |
243 | | unsigned int _gcry_chacha20_poly1305_aarch64_blocks4( |
244 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
245 | | void *poly1305_state, const byte *poly1305_src); |
246 | | |
247 | | #endif /* USE_AARCH64_SIMD */ |
248 | | |
249 | | |
250 | | static const char *selftest (void); |
251 | | |
252 | | |
253 | 0 | #define ROTATE(v,c) (rol(v,c)) |
254 | | #define XOR(v,w) ((v) ^ (w)) |
255 | 0 | #define PLUS(v,w) ((u32)((v) + (w))) |
256 | 0 | #define PLUSONE(v) (PLUS((v),1)) |
257 | | |
258 | | #define QUARTERROUND(a,b,c,d) \ |
259 | 0 | a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ |
260 | 0 | c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ |
261 | 0 | a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ |
262 | 0 | c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); |
263 | | |
264 | | #define BUF_XOR_LE32(dst, src, offset, x) \ |
265 | 0 | buf_put_le32((dst) + (offset), buf_get_le32((src) + (offset)) ^ (x)) |
266 | | |
267 | | static unsigned int |
268 | | do_chacha20_blocks (u32 *input, byte *dst, const byte *src, size_t nblks) |
269 | 0 | { |
270 | 0 | u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; |
271 | 0 | unsigned int i; |
272 | |
|
273 | 0 | while (nblks) |
274 | 0 | { |
275 | 0 | x0 = input[0]; |
276 | 0 | x1 = input[1]; |
277 | 0 | x2 = input[2]; |
278 | 0 | x3 = input[3]; |
279 | 0 | x4 = input[4]; |
280 | 0 | x5 = input[5]; |
281 | 0 | x6 = input[6]; |
282 | 0 | x7 = input[7]; |
283 | 0 | x8 = input[8]; |
284 | 0 | x9 = input[9]; |
285 | 0 | x10 = input[10]; |
286 | 0 | x11 = input[11]; |
287 | 0 | x12 = input[12]; |
288 | 0 | x13 = input[13]; |
289 | 0 | x14 = input[14]; |
290 | 0 | x15 = input[15]; |
291 | |
|
292 | 0 | for (i = 20; i > 0; i -= 2) |
293 | 0 | { |
294 | 0 | QUARTERROUND(x0, x4, x8, x12) |
295 | 0 | QUARTERROUND(x1, x5, x9, x13) |
296 | 0 | QUARTERROUND(x2, x6, x10, x14) |
297 | 0 | QUARTERROUND(x3, x7, x11, x15) |
298 | 0 | QUARTERROUND(x0, x5, x10, x15) |
299 | 0 | QUARTERROUND(x1, x6, x11, x12) |
300 | 0 | QUARTERROUND(x2, x7, x8, x13) |
301 | 0 | QUARTERROUND(x3, x4, x9, x14) |
302 | 0 | } |
303 | |
|
304 | 0 | x0 = PLUS(x0, input[0]); |
305 | 0 | x1 = PLUS(x1, input[1]); |
306 | 0 | x2 = PLUS(x2, input[2]); |
307 | 0 | x3 = PLUS(x3, input[3]); |
308 | 0 | x4 = PLUS(x4, input[4]); |
309 | 0 | x5 = PLUS(x5, input[5]); |
310 | 0 | x6 = PLUS(x6, input[6]); |
311 | 0 | x7 = PLUS(x7, input[7]); |
312 | 0 | x8 = PLUS(x8, input[8]); |
313 | 0 | x9 = PLUS(x9, input[9]); |
314 | 0 | x10 = PLUS(x10, input[10]); |
315 | 0 | x11 = PLUS(x11, input[11]); |
316 | 0 | x12 = PLUS(x12, input[12]); |
317 | 0 | x13 = PLUS(x13, input[13]); |
318 | 0 | x14 = PLUS(x14, input[14]); |
319 | 0 | x15 = PLUS(x15, input[15]); |
320 | |
|
321 | 0 | input[12] = PLUSONE(input[12]); |
322 | 0 | input[13] = PLUS(input[13], !input[12]); |
323 | |
|
324 | 0 | BUF_XOR_LE32(dst, src, 0, x0); |
325 | 0 | BUF_XOR_LE32(dst, src, 4, x1); |
326 | 0 | BUF_XOR_LE32(dst, src, 8, x2); |
327 | 0 | BUF_XOR_LE32(dst, src, 12, x3); |
328 | 0 | BUF_XOR_LE32(dst, src, 16, x4); |
329 | 0 | BUF_XOR_LE32(dst, src, 20, x5); |
330 | 0 | BUF_XOR_LE32(dst, src, 24, x6); |
331 | 0 | BUF_XOR_LE32(dst, src, 28, x7); |
332 | 0 | BUF_XOR_LE32(dst, src, 32, x8); |
333 | 0 | BUF_XOR_LE32(dst, src, 36, x9); |
334 | 0 | BUF_XOR_LE32(dst, src, 40, x10); |
335 | 0 | BUF_XOR_LE32(dst, src, 44, x11); |
336 | 0 | BUF_XOR_LE32(dst, src, 48, x12); |
337 | 0 | BUF_XOR_LE32(dst, src, 52, x13); |
338 | 0 | BUF_XOR_LE32(dst, src, 56, x14); |
339 | 0 | BUF_XOR_LE32(dst, src, 60, x15); |
340 | |
|
341 | 0 | src += CHACHA20_BLOCK_SIZE; |
342 | 0 | dst += CHACHA20_BLOCK_SIZE; |
343 | 0 | nblks--; |
344 | 0 | } |
345 | | |
346 | | /* burn_stack */ |
347 | 0 | return (17 * sizeof(u32) + 6 * sizeof(void *)); |
348 | 0 | } |
349 | | |
350 | | |
351 | | static unsigned int |
352 | | chacha20_blocks (CHACHA20_context_t *ctx, byte *dst, const byte *src, |
353 | | size_t nblks) |
354 | 0 | { |
355 | 0 | #ifdef USE_SSSE3 |
356 | 0 | if (ctx->use_ssse3) |
357 | 0 | { |
358 | 0 | return _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, dst, src, nblks); |
359 | 0 | } |
360 | 0 | #endif |
361 | | |
362 | | #ifdef USE_PPC_VEC |
363 | | if (ctx->use_ppc) |
364 | | { |
365 | | return _gcry_chacha20_ppc8_blocks1(ctx->input, dst, src, nblks); |
366 | | } |
367 | | #endif |
368 | | |
369 | | #ifdef USE_S390X_VX |
370 | | if (ctx->use_s390x) |
371 | | { |
372 | | return _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, dst, src, nblks); |
373 | | } |
374 | | #endif |
375 | | |
376 | 0 | return do_chacha20_blocks (ctx->input, dst, src, nblks); |
377 | 0 | } |
378 | | |
379 | | |
380 | | static void |
381 | | chacha20_keysetup (CHACHA20_context_t *ctx, const byte *key, |
382 | | unsigned int keylen) |
383 | 0 | { |
384 | 0 | static const char sigma[16] = "expand 32-byte k"; |
385 | 0 | static const char tau[16] = "expand 16-byte k"; |
386 | 0 | const char *constants; |
387 | |
|
388 | 0 | ctx->input[4] = buf_get_le32(key + 0); |
389 | 0 | ctx->input[5] = buf_get_le32(key + 4); |
390 | 0 | ctx->input[6] = buf_get_le32(key + 8); |
391 | 0 | ctx->input[7] = buf_get_le32(key + 12); |
392 | 0 | if (keylen == CHACHA20_MAX_KEY_SIZE) /* 256 bits */ |
393 | 0 | { |
394 | 0 | key += 16; |
395 | 0 | constants = sigma; |
396 | 0 | } |
397 | 0 | else /* 128 bits */ |
398 | 0 | { |
399 | 0 | constants = tau; |
400 | 0 | } |
401 | 0 | ctx->input[8] = buf_get_le32(key + 0); |
402 | 0 | ctx->input[9] = buf_get_le32(key + 4); |
403 | 0 | ctx->input[10] = buf_get_le32(key + 8); |
404 | 0 | ctx->input[11] = buf_get_le32(key + 12); |
405 | 0 | ctx->input[0] = buf_get_le32(constants + 0); |
406 | 0 | ctx->input[1] = buf_get_le32(constants + 4); |
407 | 0 | ctx->input[2] = buf_get_le32(constants + 8); |
408 | 0 | ctx->input[3] = buf_get_le32(constants + 12); |
409 | 0 | } |
410 | | |
411 | | |
412 | | static void |
413 | | chacha20_ivsetup (CHACHA20_context_t * ctx, const byte *iv, size_t ivlen) |
414 | 0 | { |
415 | 0 | if (ivlen == CHACHA20_CTR_SIZE) |
416 | 0 | { |
417 | 0 | ctx->input[12] = buf_get_le32 (iv + 0); |
418 | 0 | ctx->input[13] = buf_get_le32 (iv + 4); |
419 | 0 | ctx->input[14] = buf_get_le32 (iv + 8); |
420 | 0 | ctx->input[15] = buf_get_le32 (iv + 12); |
421 | 0 | } |
422 | 0 | else if (ivlen == CHACHA20_MAX_IV_SIZE) |
423 | 0 | { |
424 | 0 | ctx->input[12] = 0; |
425 | 0 | ctx->input[13] = buf_get_le32 (iv + 0); |
426 | 0 | ctx->input[14] = buf_get_le32 (iv + 4); |
427 | 0 | ctx->input[15] = buf_get_le32 (iv + 8); |
428 | 0 | } |
429 | 0 | else if (ivlen == CHACHA20_MIN_IV_SIZE) |
430 | 0 | { |
431 | 0 | ctx->input[12] = 0; |
432 | 0 | ctx->input[13] = 0; |
433 | 0 | ctx->input[14] = buf_get_le32 (iv + 0); |
434 | 0 | ctx->input[15] = buf_get_le32 (iv + 4); |
435 | 0 | } |
436 | 0 | else |
437 | 0 | { |
438 | 0 | ctx->input[12] = 0; |
439 | 0 | ctx->input[13] = 0; |
440 | 0 | ctx->input[14] = 0; |
441 | 0 | ctx->input[15] = 0; |
442 | 0 | } |
443 | 0 | } |
444 | | |
445 | | |
446 | | static void |
447 | | chacha20_setiv (void *context, const byte *iv, size_t ivlen) |
448 | 0 | { |
449 | 0 | CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; |
450 | | |
451 | | /* draft-nir-cfrg-chacha20-poly1305-02 defines 96-bit and 64-bit nonce. */ |
452 | 0 | if (iv && ivlen != CHACHA20_MAX_IV_SIZE && ivlen != CHACHA20_MIN_IV_SIZE |
453 | 0 | && ivlen != CHACHA20_CTR_SIZE) |
454 | 0 | log_info ("WARNING: chacha20_setiv: bad ivlen=%u\n", (u32) ivlen); |
455 | |
|
456 | 0 | if (iv && (ivlen == CHACHA20_MAX_IV_SIZE || ivlen == CHACHA20_MIN_IV_SIZE |
457 | 0 | || ivlen == CHACHA20_CTR_SIZE)) |
458 | 0 | chacha20_ivsetup (ctx, iv, ivlen); |
459 | 0 | else |
460 | 0 | chacha20_ivsetup (ctx, NULL, 0); |
461 | | |
462 | | /* Reset the unused pad bytes counter. */ |
463 | 0 | ctx->unused = 0; |
464 | 0 | } |
465 | | |
466 | | |
467 | | static gcry_err_code_t |
468 | | chacha20_do_setkey (CHACHA20_context_t *ctx, |
469 | | const byte *key, unsigned int keylen) |
470 | 0 | { |
471 | 0 | static int initialized; |
472 | 0 | static const char *selftest_failed; |
473 | 0 | unsigned int features = _gcry_get_hw_features (); |
474 | |
|
475 | 0 | if (!initialized) |
476 | 0 | { |
477 | 0 | initialized = 1; |
478 | 0 | selftest_failed = selftest (); |
479 | 0 | if (selftest_failed) |
480 | 0 | log_error ("CHACHA20 selftest failed (%s)\n", selftest_failed); |
481 | 0 | } |
482 | 0 | if (selftest_failed) |
483 | 0 | return GPG_ERR_SELFTEST_FAILED; |
484 | | |
485 | 0 | if (keylen != CHACHA20_MAX_KEY_SIZE && keylen != CHACHA20_MIN_KEY_SIZE) |
486 | 0 | return GPG_ERR_INV_KEYLEN; |
487 | | |
488 | 0 | #ifdef USE_SSSE3 |
489 | 0 | ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; |
490 | 0 | #endif |
491 | 0 | #ifdef USE_AVX512 |
492 | 0 | ctx->use_avx512 = (features & HWF_INTEL_AVX512) != 0; |
493 | 0 | #endif |
494 | 0 | #ifdef USE_AVX2 |
495 | 0 | ctx->use_avx2 = (features & HWF_INTEL_AVX2) != 0; |
496 | 0 | #endif |
497 | | #ifdef USE_ARMV7_NEON |
498 | | ctx->use_neon = (features & HWF_ARM_NEON) != 0; |
499 | | #endif |
500 | | #ifdef USE_AARCH64_SIMD |
501 | | ctx->use_neon = (features & HWF_ARM_NEON) != 0; |
502 | | #endif |
503 | | #ifdef USE_PPC_VEC |
504 | | ctx->use_ppc = (features & HWF_PPC_ARCH_2_07) != 0; |
505 | | # ifndef WORDS_BIGENDIAN |
506 | | ctx->use_p10 = (features & HWF_PPC_ARCH_3_10) != 0; |
507 | | # ifdef ENABLE_FORCE_SOFT_HWFEATURES |
508 | | /* HWF_PPC_ARCH_3_10 above is used as soft HW-feature indicator for P10. |
509 | | * Actual implementation works with HWF_PPC_ARCH_3_00 also. */ |
510 | | ctx->use_p10 |= (features & HWF_PPC_ARCH_3_00) != 0; |
511 | | # endif |
512 | | # endif |
513 | | #endif |
514 | | #ifdef USE_S390X_VX |
515 | | ctx->use_s390x = (features & HWF_S390X_VX) != 0; |
516 | | #endif |
517 | |
|
518 | 0 | (void)features; |
519 | |
|
520 | 0 | chacha20_keysetup (ctx, key, keylen); |
521 | | |
522 | | /* We default to a zero nonce. */ |
523 | 0 | chacha20_setiv (ctx, NULL, 0); |
524 | |
|
525 | 0 | return 0; |
526 | 0 | } |
527 | | |
528 | | |
529 | | static gcry_err_code_t |
530 | | chacha20_setkey (void *context, const byte *key, unsigned int keylen, |
531 | | cipher_bulk_ops_t *bulk_ops) |
532 | 0 | { |
533 | 0 | CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; |
534 | 0 | gcry_err_code_t rc = chacha20_do_setkey (ctx, key, keylen); |
535 | 0 | (void)bulk_ops; |
536 | 0 | _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *)); |
537 | 0 | return rc; |
538 | 0 | } |
539 | | |
540 | | |
541 | | static unsigned int |
542 | | do_chacha20_encrypt_stream_tail (CHACHA20_context_t *ctx, byte *outbuf, |
543 | | const byte *inbuf, size_t length) |
544 | 0 | { |
545 | 0 | static const unsigned char zero_pad[CHACHA20_BLOCK_SIZE] = { 0, }; |
546 | 0 | unsigned int nburn, burn = 0; |
547 | |
|
548 | 0 | #ifdef USE_AVX512 |
549 | 0 | if (ctx->use_avx512 && length >= CHACHA20_BLOCK_SIZE * 16) |
550 | 0 | { |
551 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
552 | 0 | nblocks -= nblocks % 16; |
553 | 0 | nburn = _gcry_chacha20_amd64_avx512_blocks16(ctx->input, outbuf, inbuf, |
554 | 0 | nblocks); |
555 | 0 | burn = nburn > burn ? nburn : burn; |
556 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
557 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
558 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
559 | 0 | } |
560 | 0 | #endif |
561 | |
|
562 | 0 | #ifdef USE_AVX2 |
563 | 0 | if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8) |
564 | 0 | { |
565 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
566 | 0 | nblocks -= nblocks % 8; |
567 | 0 | nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf, |
568 | 0 | nblocks); |
569 | 0 | burn = nburn > burn ? nburn : burn; |
570 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
571 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
572 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
573 | 0 | } |
574 | 0 | #endif |
575 | |
|
576 | 0 | #ifdef USE_SSSE3 |
577 | 0 | if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4) |
578 | 0 | { |
579 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
580 | 0 | nblocks -= nblocks % 4; |
581 | 0 | nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf, |
582 | 0 | nblocks); |
583 | 0 | burn = nburn > burn ? nburn : burn; |
584 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
585 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
586 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
587 | 0 | } |
588 | 0 | #endif |
589 | |
|
590 | | #ifdef USE_ARMV7_NEON |
591 | | if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) |
592 | | { |
593 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
594 | | nblocks -= nblocks % 4; |
595 | | nburn = _gcry_chacha20_armv7_neon_blocks4(ctx->input, outbuf, inbuf, |
596 | | nblocks); |
597 | | burn = nburn > burn ? nburn : burn; |
598 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
599 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
600 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
601 | | } |
602 | | #endif |
603 | |
|
604 | | #ifdef USE_AARCH64_SIMD |
605 | | if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) |
606 | | { |
607 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
608 | | nblocks -= nblocks % 4; |
609 | | nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf, |
610 | | nblocks); |
611 | | burn = nburn > burn ? nburn : burn; |
612 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
613 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
614 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
615 | | } |
616 | | #endif |
617 | |
|
618 | | #ifdef USE_PPC_VEC |
619 | | if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4) |
620 | | { |
621 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
622 | | nblocks -= nblocks % 4; |
623 | | #ifndef WORDS_BIGENDIAN |
624 | | /* |
625 | | * A workaround to skip counter overflow. This is rare. |
626 | | */ |
627 | | if (ctx->use_p10 && nblocks >= 8 |
628 | | && ((u64)ctx->input[12] + nblocks) <= 0xffffffffU) |
629 | | { |
630 | | size_t len = nblocks * CHACHA20_BLOCK_SIZE; |
631 | | nburn = _gcry_chacha20_p10le_8x(ctx->input, outbuf, inbuf, len); |
632 | | } |
633 | | else |
634 | | #endif |
635 | | { |
636 | | nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, |
637 | | nblocks); |
638 | | } |
639 | | burn = nburn > burn ? nburn : burn; |
640 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
641 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
642 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
643 | | } |
644 | | #endif |
645 | |
|
646 | | #ifdef USE_S390X_VX |
647 | | if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 8) |
648 | | { |
649 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
650 | | nblocks -= nblocks % 8; |
651 | | nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf, |
652 | | nblocks); |
653 | | burn = nburn > burn ? nburn : burn; |
654 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
655 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
656 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
657 | | } |
658 | | #endif |
659 | |
|
660 | 0 | if (length >= CHACHA20_BLOCK_SIZE) |
661 | 0 | { |
662 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
663 | 0 | nburn = chacha20_blocks(ctx, outbuf, inbuf, nblocks); |
664 | 0 | burn = nburn > burn ? nburn : burn; |
665 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
666 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
667 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
668 | 0 | } |
669 | |
|
670 | 0 | if (length > 0) |
671 | 0 | { |
672 | 0 | nburn = chacha20_blocks(ctx, ctx->pad, zero_pad, 1); |
673 | 0 | burn = nburn > burn ? nburn : burn; |
674 | |
|
675 | 0 | buf_xor (outbuf, inbuf, ctx->pad, length); |
676 | 0 | ctx->unused = CHACHA20_BLOCK_SIZE - length; |
677 | 0 | } |
678 | |
|
679 | 0 | if (burn) |
680 | 0 | burn += 5 * sizeof(void *); |
681 | |
|
682 | 0 | return burn; |
683 | 0 | } |
684 | | |
685 | | |
686 | | static void |
687 | | chacha20_encrypt_stream (void *context, byte *outbuf, const byte *inbuf, |
688 | | size_t length) |
689 | 0 | { |
690 | 0 | CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; |
691 | 0 | unsigned int nburn, burn = 0; |
692 | |
|
693 | 0 | if (!length) |
694 | 0 | return; |
695 | | |
696 | 0 | if (ctx->unused) |
697 | 0 | { |
698 | 0 | unsigned char *p = ctx->pad; |
699 | 0 | size_t n; |
700 | |
|
701 | 0 | gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); |
702 | | |
703 | 0 | n = ctx->unused; |
704 | 0 | if (n > length) |
705 | 0 | n = length; |
706 | |
|
707 | 0 | buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); |
708 | 0 | length -= n; |
709 | 0 | outbuf += n; |
710 | 0 | inbuf += n; |
711 | 0 | ctx->unused -= n; |
712 | |
|
713 | 0 | if (!length) |
714 | 0 | return; |
715 | 0 | gcry_assert (!ctx->unused); |
716 | 0 | } |
717 | | |
718 | 0 | nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, length); |
719 | 0 | burn = nburn > burn ? nburn : burn; |
720 | |
|
721 | 0 | if (burn) |
722 | 0 | _gcry_burn_stack (burn); |
723 | 0 | } |
724 | | |
725 | | |
726 | | gcry_err_code_t |
727 | | _gcry_chacha20_poly1305_encrypt(gcry_cipher_hd_t c, byte *outbuf, |
728 | | const byte *inbuf, size_t length) |
729 | 0 | { |
730 | 0 | CHACHA20_context_t *ctx = (void *) &c->context.c; |
731 | 0 | unsigned int nburn, burn = 0; |
732 | 0 | byte *authptr = NULL; |
733 | |
|
734 | 0 | if (!length) |
735 | 0 | return 0; |
736 | | |
737 | 0 | if (ctx->unused) |
738 | 0 | { |
739 | 0 | unsigned char *p = ctx->pad; |
740 | 0 | size_t n; |
741 | |
|
742 | 0 | gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); |
743 | | |
744 | 0 | n = ctx->unused; |
745 | 0 | if (n > length) |
746 | 0 | n = length; |
747 | |
|
748 | 0 | buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); |
749 | 0 | nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf, n); |
750 | 0 | burn = nburn > burn ? nburn : burn; |
751 | 0 | length -= n; |
752 | 0 | outbuf += n; |
753 | 0 | inbuf += n; |
754 | 0 | ctx->unused -= n; |
755 | |
|
756 | 0 | if (!length) |
757 | 0 | { |
758 | 0 | if (burn) |
759 | 0 | _gcry_burn_stack (burn); |
760 | |
|
761 | 0 | return 0; |
762 | 0 | } |
763 | 0 | gcry_assert (!ctx->unused); |
764 | 0 | } |
765 | | |
766 | 0 | gcry_assert (c->u_mode.poly1305.ctx.leftover == 0); |
767 | | |
768 | 0 | if (0) |
769 | 0 | { } |
770 | 0 | #ifdef USE_AVX512 |
771 | 0 | else if (ctx->use_avx512) |
772 | 0 | { |
773 | | /* Skip stitched chacha20-poly1305 for AVX512. */ |
774 | 0 | authptr = NULL; |
775 | 0 | } |
776 | 0 | #endif |
777 | 0 | #ifdef USE_AVX2 |
778 | 0 | else if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8) |
779 | 0 | { |
780 | 0 | nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf, 8); |
781 | 0 | burn = nburn > burn ? nburn : burn; |
782 | |
|
783 | 0 | authptr = outbuf; |
784 | 0 | length -= 8 * CHACHA20_BLOCK_SIZE; |
785 | 0 | outbuf += 8 * CHACHA20_BLOCK_SIZE; |
786 | 0 | inbuf += 8 * CHACHA20_BLOCK_SIZE; |
787 | 0 | } |
788 | 0 | #endif |
789 | 0 | #ifdef USE_SSSE3 |
790 | 0 | else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4) |
791 | 0 | { |
792 | 0 | nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf, 4); |
793 | 0 | burn = nburn > burn ? nburn : burn; |
794 | |
|
795 | 0 | authptr = outbuf; |
796 | 0 | length -= 4 * CHACHA20_BLOCK_SIZE; |
797 | 0 | outbuf += 4 * CHACHA20_BLOCK_SIZE; |
798 | 0 | inbuf += 4 * CHACHA20_BLOCK_SIZE; |
799 | 0 | } |
800 | 0 | else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 2) |
801 | 0 | { |
802 | 0 | nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 2); |
803 | 0 | burn = nburn > burn ? nburn : burn; |
804 | |
|
805 | 0 | authptr = outbuf; |
806 | 0 | length -= 2 * CHACHA20_BLOCK_SIZE; |
807 | 0 | outbuf += 2 * CHACHA20_BLOCK_SIZE; |
808 | 0 | inbuf += 2 * CHACHA20_BLOCK_SIZE; |
809 | 0 | } |
810 | 0 | else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE) |
811 | 0 | { |
812 | 0 | nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 1); |
813 | 0 | burn = nburn > burn ? nburn : burn; |
814 | |
|
815 | 0 | authptr = outbuf; |
816 | 0 | length -= 1 * CHACHA20_BLOCK_SIZE; |
817 | 0 | outbuf += 1 * CHACHA20_BLOCK_SIZE; |
818 | 0 | inbuf += 1 * CHACHA20_BLOCK_SIZE; |
819 | 0 | } |
820 | 0 | #endif |
821 | | #ifdef USE_AARCH64_SIMD |
822 | | else if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) |
823 | | { |
824 | | nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf, 4); |
825 | | burn = nburn > burn ? nburn : burn; |
826 | | |
827 | | authptr = outbuf; |
828 | | length -= 4 * CHACHA20_BLOCK_SIZE; |
829 | | outbuf += 4 * CHACHA20_BLOCK_SIZE; |
830 | | inbuf += 4 * CHACHA20_BLOCK_SIZE; |
831 | | } |
832 | | #endif |
833 | | #ifdef USE_PPC_VEC_POLY1305 |
834 | | else if (ctx->use_ppc && ctx->use_p10) |
835 | | { |
836 | | /* Skip stitched chacha20-poly1305 for P10. */ |
837 | | authptr = NULL; |
838 | | } |
839 | | else if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4) |
840 | | { |
841 | | nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, 4); |
842 | | burn = nburn > burn ? nburn : burn; |
843 | | |
844 | | authptr = outbuf; |
845 | | length -= 4 * CHACHA20_BLOCK_SIZE; |
846 | | outbuf += 4 * CHACHA20_BLOCK_SIZE; |
847 | | inbuf += 4 * CHACHA20_BLOCK_SIZE; |
848 | | } |
849 | | #endif |
850 | | #ifdef USE_S390X_VX_POLY1305 |
851 | | else if (ctx->use_s390x && length >= 2 * CHACHA20_BLOCK_SIZE * 8) |
852 | | { |
853 | | nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf, 8); |
854 | | burn = nburn > burn ? nburn : burn; |
855 | | |
856 | | authptr = outbuf; |
857 | | length -= 8 * CHACHA20_BLOCK_SIZE; |
858 | | outbuf += 8 * CHACHA20_BLOCK_SIZE; |
859 | | inbuf += 8 * CHACHA20_BLOCK_SIZE; |
860 | | } |
861 | | else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 4) |
862 | | { |
863 | | nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 4); |
864 | | burn = nburn > burn ? nburn : burn; |
865 | | |
866 | | authptr = outbuf; |
867 | | length -= 4 * CHACHA20_BLOCK_SIZE; |
868 | | outbuf += 4 * CHACHA20_BLOCK_SIZE; |
869 | | inbuf += 4 * CHACHA20_BLOCK_SIZE; |
870 | | } |
871 | | else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 2) |
872 | | { |
873 | | nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 2); |
874 | | burn = nburn > burn ? nburn : burn; |
875 | | |
876 | | authptr = outbuf; |
877 | | length -= 2 * CHACHA20_BLOCK_SIZE; |
878 | | outbuf += 2 * CHACHA20_BLOCK_SIZE; |
879 | | inbuf += 2 * CHACHA20_BLOCK_SIZE; |
880 | | } |
881 | | else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE) |
882 | | { |
883 | | nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 1); |
884 | | burn = nburn > burn ? nburn : burn; |
885 | | |
886 | | authptr = outbuf; |
887 | | length -= 1 * CHACHA20_BLOCK_SIZE; |
888 | | outbuf += 1 * CHACHA20_BLOCK_SIZE; |
889 | | inbuf += 1 * CHACHA20_BLOCK_SIZE; |
890 | | } |
891 | | #endif |
892 | |
|
893 | 0 | if (authptr) |
894 | 0 | { |
895 | 0 | size_t authoffset = outbuf - authptr; |
896 | |
|
897 | 0 | #ifdef USE_AVX2 |
898 | 0 | if (ctx->use_avx2 && |
899 | 0 | length >= 8 * CHACHA20_BLOCK_SIZE && |
900 | 0 | authoffset >= 8 * CHACHA20_BLOCK_SIZE) |
901 | 0 | { |
902 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
903 | 0 | nblocks -= nblocks % 8; |
904 | |
|
905 | 0 | nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8( |
906 | 0 | ctx->input, outbuf, inbuf, nblocks, |
907 | 0 | &c->u_mode.poly1305.ctx.state, authptr); |
908 | 0 | burn = nburn > burn ? nburn : burn; |
909 | |
|
910 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
911 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
912 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
913 | 0 | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
914 | 0 | } |
915 | 0 | #endif |
916 | |
|
917 | 0 | #ifdef USE_SSSE3 |
918 | 0 | if (ctx->use_ssse3) |
919 | 0 | { |
920 | 0 | if (length >= 4 * CHACHA20_BLOCK_SIZE && |
921 | 0 | authoffset >= 4 * CHACHA20_BLOCK_SIZE) |
922 | 0 | { |
923 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
924 | 0 | nblocks -= nblocks % 4; |
925 | |
|
926 | 0 | nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4( |
927 | 0 | ctx->input, outbuf, inbuf, nblocks, |
928 | 0 | &c->u_mode.poly1305.ctx.state, authptr); |
929 | 0 | burn = nburn > burn ? nburn : burn; |
930 | |
|
931 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
932 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
933 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
934 | 0 | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
935 | 0 | } |
936 | |
|
937 | 0 | if (length >= CHACHA20_BLOCK_SIZE && |
938 | 0 | authoffset >= CHACHA20_BLOCK_SIZE) |
939 | 0 | { |
940 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
941 | |
|
942 | 0 | nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1( |
943 | 0 | ctx->input, outbuf, inbuf, nblocks, |
944 | 0 | &c->u_mode.poly1305.ctx.state, authptr); |
945 | 0 | burn = nburn > burn ? nburn : burn; |
946 | |
|
947 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
948 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
949 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
950 | 0 | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
951 | 0 | } |
952 | 0 | } |
953 | 0 | #endif |
954 | |
|
955 | | #ifdef USE_AARCH64_SIMD |
956 | | if (ctx->use_neon && |
957 | | length >= 4 * CHACHA20_BLOCK_SIZE && |
958 | | authoffset >= 4 * CHACHA20_BLOCK_SIZE) |
959 | | { |
960 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
961 | | nblocks -= nblocks % 4; |
962 | | |
963 | | nburn = _gcry_chacha20_poly1305_aarch64_blocks4( |
964 | | ctx->input, outbuf, inbuf, nblocks, |
965 | | &c->u_mode.poly1305.ctx.state, authptr); |
966 | | burn = nburn > burn ? nburn : burn; |
967 | | |
968 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
969 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
970 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
971 | | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
972 | | } |
973 | | #endif |
974 | |
|
975 | | #ifdef USE_PPC_VEC_POLY1305 |
976 | | if (ctx->use_ppc && |
977 | | length >= 4 * CHACHA20_BLOCK_SIZE && |
978 | | authoffset >= 4 * CHACHA20_BLOCK_SIZE) |
979 | | { |
980 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
981 | | nblocks -= nblocks % 4; |
982 | | |
983 | | nburn = _gcry_chacha20_poly1305_ppc8_blocks4( |
984 | | ctx->input, outbuf, inbuf, nblocks, |
985 | | &c->u_mode.poly1305.ctx.state, authptr); |
986 | | burn = nburn > burn ? nburn : burn; |
987 | | |
988 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
989 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
990 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
991 | | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
992 | | } |
993 | | #endif |
994 | |
|
995 | | #ifdef USE_S390X_VX_POLY1305 |
996 | | if (ctx->use_s390x) |
997 | | { |
998 | | if (length >= 8 * CHACHA20_BLOCK_SIZE && |
999 | | authoffset >= 8 * CHACHA20_BLOCK_SIZE) |
1000 | | { |
1001 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1002 | | nblocks -= nblocks % 8; |
1003 | | |
1004 | | burn = _gcry_chacha20_poly1305_s390x_vx_blocks8( |
1005 | | ctx->input, outbuf, inbuf, nblocks, |
1006 | | &c->u_mode.poly1305.ctx.state, authptr); |
1007 | | burn = nburn > burn ? nburn : burn; |
1008 | | |
1009 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1010 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1011 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1012 | | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
1013 | | } |
1014 | | |
1015 | | if (length >= CHACHA20_BLOCK_SIZE && |
1016 | | authoffset >= CHACHA20_BLOCK_SIZE) |
1017 | | { |
1018 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1019 | | |
1020 | | burn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( |
1021 | | ctx->input, outbuf, inbuf, nblocks, |
1022 | | &c->u_mode.poly1305.ctx.state, authptr); |
1023 | | burn = nburn > burn ? nburn : burn; |
1024 | | |
1025 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1026 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1027 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1028 | | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
1029 | | } |
1030 | | } |
1031 | | #endif |
1032 | |
|
1033 | 0 | if (authoffset > 0) |
1034 | 0 | { |
1035 | 0 | _gcry_poly1305_update (&c->u_mode.poly1305.ctx, authptr, authoffset); |
1036 | 0 | authptr += authoffset; |
1037 | 0 | authoffset = 0; |
1038 | 0 | } |
1039 | |
|
1040 | 0 | gcry_assert(authptr == outbuf); |
1041 | 0 | } |
1042 | | |
1043 | 0 | while (length) |
1044 | 0 | { |
1045 | 0 | size_t currlen = length; |
1046 | | |
1047 | | /* Since checksumming is done after encryption, process input in 24KiB |
1048 | | * chunks to keep data loaded in L1 cache for checksumming. However |
1049 | | * only do splitting if input is large enough so that last chunks does |
1050 | | * not end up being short. */ |
1051 | 0 | if (currlen > 32 * 1024) |
1052 | 0 | currlen = 24 * 1024; |
1053 | |
|
1054 | 0 | nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen); |
1055 | 0 | burn = nburn > burn ? nburn : burn; |
1056 | |
|
1057 | 0 | nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf, |
1058 | 0 | currlen); |
1059 | 0 | burn = nburn > burn ? nburn : burn; |
1060 | |
|
1061 | 0 | outbuf += currlen; |
1062 | 0 | inbuf += currlen; |
1063 | 0 | length -= currlen; |
1064 | 0 | } |
1065 | |
|
1066 | 0 | if (burn) |
1067 | 0 | _gcry_burn_stack (burn); |
1068 | |
|
1069 | 0 | return 0; |
1070 | 0 | } |
1071 | | |
1072 | | |
1073 | | gcry_err_code_t |
1074 | | _gcry_chacha20_poly1305_decrypt(gcry_cipher_hd_t c, byte *outbuf, |
1075 | | const byte *inbuf, size_t length) |
1076 | 0 | { |
1077 | 0 | CHACHA20_context_t *ctx = (void *) &c->context.c; |
1078 | 0 | unsigned int nburn, burn = 0; |
1079 | 0 | #if defined(USE_AVX512) || defined(USE_PPC_VEC_POLY1305) \ |
1080 | 0 | || defined(USE_AVX2) || defined(USE_SSSE3) || defined(USE_AARCH64_SIMD) \ |
1081 | 0 | || defined(USE_S390X_VX_POLY1305) |
1082 | 0 | int skip_stitched = 0; |
1083 | 0 | #endif |
1084 | |
|
1085 | 0 | if (!length) |
1086 | 0 | return 0; |
1087 | | |
1088 | 0 | if (ctx->unused) |
1089 | 0 | { |
1090 | 0 | unsigned char *p = ctx->pad; |
1091 | 0 | size_t n; |
1092 | |
|
1093 | 0 | gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); |
1094 | | |
1095 | 0 | n = ctx->unused; |
1096 | 0 | if (n > length) |
1097 | 0 | n = length; |
1098 | |
|
1099 | 0 | nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf, n); |
1100 | 0 | burn = nburn > burn ? nburn : burn; |
1101 | 0 | buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); |
1102 | 0 | length -= n; |
1103 | 0 | outbuf += n; |
1104 | 0 | inbuf += n; |
1105 | 0 | ctx->unused -= n; |
1106 | |
|
1107 | 0 | if (!length) |
1108 | 0 | { |
1109 | 0 | if (burn) |
1110 | 0 | _gcry_burn_stack (burn); |
1111 | |
|
1112 | 0 | return 0; |
1113 | 0 | } |
1114 | 0 | gcry_assert (!ctx->unused); |
1115 | 0 | } |
1116 | | |
1117 | 0 | gcry_assert (c->u_mode.poly1305.ctx.leftover == 0); |
1118 | | |
1119 | 0 | #ifdef USE_AVX512 |
1120 | 0 | if (ctx->use_avx512) |
1121 | 0 | { |
1122 | | /* Skip stitched chacha20-poly1305 for AVX512. */ |
1123 | 0 | skip_stitched = 1; |
1124 | 0 | } |
1125 | 0 | #endif |
1126 | | #ifdef USE_PPC_VEC_POLY1305 |
1127 | | if (ctx->use_ppc && ctx->use_p10) |
1128 | | { |
1129 | | /* Skip stitched chacha20-poly1305 for P10. */ |
1130 | | skip_stitched = 1; |
1131 | | } |
1132 | | #endif |
1133 | |
|
1134 | 0 | #ifdef USE_AVX2 |
1135 | 0 | if (!skip_stitched && ctx->use_avx2 && length >= 8 * CHACHA20_BLOCK_SIZE) |
1136 | 0 | { |
1137 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1138 | 0 | nblocks -= nblocks % 8; |
1139 | |
|
1140 | 0 | nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8( |
1141 | 0 | ctx->input, outbuf, inbuf, nblocks, |
1142 | 0 | &c->u_mode.poly1305.ctx.state, inbuf); |
1143 | 0 | burn = nburn > burn ? nburn : burn; |
1144 | |
|
1145 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1146 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1147 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1148 | 0 | } |
1149 | 0 | #endif |
1150 | |
|
1151 | 0 | #ifdef USE_SSSE3 |
1152 | 0 | if (!skip_stitched && ctx->use_ssse3) |
1153 | 0 | { |
1154 | 0 | if (length >= 4 * CHACHA20_BLOCK_SIZE) |
1155 | 0 | { |
1156 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1157 | 0 | nblocks -= nblocks % 4; |
1158 | |
|
1159 | 0 | nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4( |
1160 | 0 | ctx->input, outbuf, inbuf, nblocks, |
1161 | 0 | &c->u_mode.poly1305.ctx.state, inbuf); |
1162 | 0 | burn = nburn > burn ? nburn : burn; |
1163 | |
|
1164 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1165 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1166 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1167 | 0 | } |
1168 | |
|
1169 | 0 | if (length >= CHACHA20_BLOCK_SIZE) |
1170 | 0 | { |
1171 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1172 | |
|
1173 | 0 | nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1( |
1174 | 0 | ctx->input, outbuf, inbuf, nblocks, |
1175 | 0 | &c->u_mode.poly1305.ctx.state, inbuf); |
1176 | 0 | burn = nburn > burn ? nburn : burn; |
1177 | |
|
1178 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1179 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1180 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1181 | 0 | } |
1182 | 0 | } |
1183 | 0 | #endif |
1184 | |
|
1185 | | #ifdef USE_AARCH64_SIMD |
1186 | | if (!skip_stitched && ctx->use_neon && length >= 4 * CHACHA20_BLOCK_SIZE) |
1187 | | { |
1188 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1189 | | nblocks -= nblocks % 4; |
1190 | | |
1191 | | nburn = _gcry_chacha20_poly1305_aarch64_blocks4( |
1192 | | ctx->input, outbuf, inbuf, nblocks, |
1193 | | &c->u_mode.poly1305.ctx.state, inbuf); |
1194 | | burn = nburn > burn ? nburn : burn; |
1195 | | |
1196 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1197 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1198 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1199 | | } |
1200 | | #endif |
1201 | |
|
1202 | | #ifdef USE_PPC_VEC_POLY1305 |
1203 | | /* skip stitch for p10 */ |
1204 | | if (!skip_stitched && ctx->use_ppc && length >= 4 * CHACHA20_BLOCK_SIZE) |
1205 | | { |
1206 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1207 | | nblocks -= nblocks % 4; |
1208 | | |
1209 | | nburn = _gcry_chacha20_poly1305_ppc8_blocks4( |
1210 | | ctx->input, outbuf, inbuf, nblocks, |
1211 | | &c->u_mode.poly1305.ctx.state, inbuf); |
1212 | | burn = nburn > burn ? nburn : burn; |
1213 | | |
1214 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1215 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1216 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1217 | | } |
1218 | | #endif |
1219 | |
|
1220 | | #ifdef USE_S390X_VX_POLY1305 |
1221 | | if (!skip_stitched && ctx->use_s390x) |
1222 | | { |
1223 | | if (length >= 8 * CHACHA20_BLOCK_SIZE) |
1224 | | { |
1225 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1226 | | nblocks -= nblocks % 8; |
1227 | | |
1228 | | nburn = _gcry_chacha20_poly1305_s390x_vx_blocks8( |
1229 | | ctx->input, outbuf, inbuf, nblocks, |
1230 | | &c->u_mode.poly1305.ctx.state, inbuf); |
1231 | | burn = nburn > burn ? nburn : burn; |
1232 | | |
1233 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1234 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1235 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1236 | | } |
1237 | | |
1238 | | if (length >= CHACHA20_BLOCK_SIZE) |
1239 | | { |
1240 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1241 | | |
1242 | | nburn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( |
1243 | | ctx->input, outbuf, inbuf, nblocks, |
1244 | | &c->u_mode.poly1305.ctx.state, inbuf); |
1245 | | burn = nburn > burn ? nburn : burn; |
1246 | | |
1247 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1248 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1249 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1250 | | } |
1251 | | } |
1252 | | #endif |
1253 | |
|
1254 | 0 | while (length) |
1255 | 0 | { |
1256 | 0 | size_t currlen = length; |
1257 | | |
1258 | | /* Since checksumming is done before decryption, process input in 24KiB |
1259 | | * chunks to keep data loaded in L1 cache for decryption. However only |
1260 | | * do splitting if input is large enough so that last chunks does not |
1261 | | * end up being short. */ |
1262 | 0 | if (currlen > 32 * 1024) |
1263 | 0 | currlen = 24 * 1024; |
1264 | |
|
1265 | 0 | nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf, |
1266 | 0 | currlen); |
1267 | 0 | burn = nburn > burn ? nburn : burn; |
1268 | |
|
1269 | 0 | nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen); |
1270 | 0 | burn = nburn > burn ? nburn : burn; |
1271 | |
|
1272 | 0 | outbuf += currlen; |
1273 | 0 | inbuf += currlen; |
1274 | 0 | length -= currlen; |
1275 | 0 | } |
1276 | |
|
1277 | 0 | if (burn) |
1278 | 0 | _gcry_burn_stack (burn); |
1279 | |
|
1280 | 0 | return 0; |
1281 | 0 | } |
1282 | | |
1283 | | |
1284 | | static const char * |
1285 | | selftest (void) |
1286 | 0 | { |
1287 | 0 | byte ctxbuf[sizeof(CHACHA20_context_t) + 15]; |
1288 | 0 | CHACHA20_context_t *ctx; |
1289 | 0 | byte scratch[127 + 1]; |
1290 | 0 | byte buf[512 + 64 + 4]; |
1291 | 0 | int i; |
1292 | | |
1293 | | /* From draft-strombergson-chacha-test-vectors */ |
1294 | 0 | static byte key_1[] = { |
1295 | 0 | 0xc4, 0x6e, 0xc1, 0xb1, 0x8c, 0xe8, 0xa8, 0x78, |
1296 | 0 | 0x72, 0x5a, 0x37, 0xe7, 0x80, 0xdf, 0xb7, 0x35, |
1297 | 0 | 0x1f, 0x68, 0xed, 0x2e, 0x19, 0x4c, 0x79, 0xfb, |
1298 | 0 | 0xc6, 0xae, 0xbe, 0xe1, 0xa6, 0x67, 0x97, 0x5d |
1299 | 0 | }; |
1300 | 0 | static const byte nonce_1[] = |
1301 | 0 | { 0x1a, 0xda, 0x31, 0xd5, 0xcf, 0x68, 0x82, 0x21 }; |
1302 | 0 | static const byte plaintext_1[127] = { |
1303 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1304 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1305 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1306 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1307 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1308 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1309 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1310 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1311 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1312 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1313 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1314 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1315 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1316 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1317 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1318 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1319 | 0 | }; |
1320 | 0 | static const byte ciphertext_1[127] = { |
1321 | 0 | 0xf6, 0x3a, 0x89, 0xb7, 0x5c, 0x22, 0x71, 0xf9, |
1322 | 0 | 0x36, 0x88, 0x16, 0x54, 0x2b, 0xa5, 0x2f, 0x06, |
1323 | 0 | 0xed, 0x49, 0x24, 0x17, 0x92, 0x30, 0x2b, 0x00, |
1324 | 0 | 0xb5, 0xe8, 0xf8, 0x0a, 0xe9, 0xa4, 0x73, 0xaf, |
1325 | 0 | 0xc2, 0x5b, 0x21, 0x8f, 0x51, 0x9a, 0xf0, 0xfd, |
1326 | 0 | 0xd4, 0x06, 0x36, 0x2e, 0x8d, 0x69, 0xde, 0x7f, |
1327 | 0 | 0x54, 0xc6, 0x04, 0xa6, 0xe0, 0x0f, 0x35, 0x3f, |
1328 | 0 | 0x11, 0x0f, 0x77, 0x1b, 0xdc, 0xa8, 0xab, 0x92, |
1329 | 0 | 0xe5, 0xfb, 0xc3, 0x4e, 0x60, 0xa1, 0xd9, 0xa9, |
1330 | 0 | 0xdb, 0x17, 0x34, 0x5b, 0x0a, 0x40, 0x27, 0x36, |
1331 | 0 | 0x85, 0x3b, 0xf9, 0x10, 0xb0, 0x60, 0xbd, 0xf1, |
1332 | 0 | 0xf8, 0x97, 0xb6, 0x29, 0x0f, 0x01, 0xd1, 0x38, |
1333 | 0 | 0xae, 0x2c, 0x4c, 0x90, 0x22, 0x5b, 0xa9, 0xea, |
1334 | 0 | 0x14, 0xd5, 0x18, 0xf5, 0x59, 0x29, 0xde, 0xa0, |
1335 | 0 | 0x98, 0xca, 0x7a, 0x6c, 0xcf, 0xe6, 0x12, 0x27, |
1336 | 0 | 0x05, 0x3c, 0x84, 0xe4, 0x9a, 0x4a, 0x33 |
1337 | 0 | }; |
1338 | | |
1339 | | /* 16-byte alignment required for amd64 implementation. */ |
1340 | 0 | ctx = (CHACHA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15); |
1341 | |
|
1342 | 0 | chacha20_setkey (ctx, key_1, sizeof key_1, NULL); |
1343 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1344 | 0 | scratch[sizeof (scratch) - 1] = 0; |
1345 | 0 | chacha20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1); |
1346 | 0 | if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1)) |
1347 | 0 | return "ChaCha20 encryption test 1 failed."; |
1348 | 0 | if (scratch[sizeof (scratch) - 1]) |
1349 | 0 | return "ChaCha20 wrote too much."; |
1350 | 0 | chacha20_setkey (ctx, key_1, sizeof (key_1), NULL); |
1351 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1352 | 0 | chacha20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1); |
1353 | 0 | if (memcmp (scratch, plaintext_1, sizeof plaintext_1)) |
1354 | 0 | return "ChaCha20 decryption test 1 failed."; |
1355 | | |
1356 | 0 | for (i = 0; i < sizeof buf; i++) |
1357 | 0 | buf[i] = i; |
1358 | 0 | chacha20_setkey (ctx, key_1, sizeof key_1, NULL); |
1359 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1360 | | /*encrypt */ |
1361 | 0 | chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); |
1362 | | /*decrypt */ |
1363 | 0 | chacha20_setkey (ctx, key_1, sizeof key_1, NULL); |
1364 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1365 | 0 | chacha20_encrypt_stream (ctx, buf, buf, 1); |
1366 | 0 | chacha20_encrypt_stream (ctx, buf + 1, buf + 1, (sizeof buf) - 1 - 1); |
1367 | 0 | chacha20_encrypt_stream (ctx, buf + (sizeof buf) - 1, |
1368 | 0 | buf + (sizeof buf) - 1, 1); |
1369 | 0 | for (i = 0; i < sizeof buf; i++) |
1370 | 0 | if (buf[i] != (byte) i) |
1371 | 0 | return "ChaCha20 encryption test 2 failed."; |
1372 | | |
1373 | 0 | chacha20_setkey (ctx, key_1, sizeof key_1, NULL); |
1374 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1375 | | /* encrypt */ |
1376 | 0 | for (i = 0; i < sizeof buf; i++) |
1377 | 0 | chacha20_encrypt_stream (ctx, &buf[i], &buf[i], 1); |
1378 | | /* decrypt */ |
1379 | 0 | chacha20_setkey (ctx, key_1, sizeof key_1, NULL); |
1380 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1381 | 0 | chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); |
1382 | 0 | for (i = 0; i < sizeof buf; i++) |
1383 | 0 | if (buf[i] != (byte) i) |
1384 | 0 | return "ChaCha20 encryption test 3 failed."; |
1385 | | |
1386 | 0 | return NULL; |
1387 | 0 | } |
1388 | | |
1389 | | |
1390 | | gcry_cipher_spec_t _gcry_cipher_spec_chacha20 = { |
1391 | | GCRY_CIPHER_CHACHA20, |
1392 | | {0, 0}, /* flags */ |
1393 | | "CHACHA20", /* name */ |
1394 | | NULL, /* aliases */ |
1395 | | NULL, /* oids */ |
1396 | | 1, /* blocksize in bytes. */ |
1397 | | CHACHA20_MAX_KEY_SIZE * 8, /* standard key length in bits. */ |
1398 | | sizeof (CHACHA20_context_t), |
1399 | | chacha20_setkey, |
1400 | | NULL, |
1401 | | NULL, |
1402 | | chacha20_encrypt_stream, |
1403 | | chacha20_encrypt_stream, |
1404 | | NULL, |
1405 | | NULL, |
1406 | | chacha20_setiv |
1407 | | }; |