/src/libgcrypt/cipher/chacha20.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* chacha20.c - Bernstein's ChaCha20 cipher |
2 | | * Copyright (C) 2014,2017-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
3 | | * |
4 | | * This file is part of Libgcrypt. |
5 | | * |
6 | | * Libgcrypt is free software; you can redistribute it and/or modify |
7 | | * it under the terms of the GNU Lesser General Public License as |
8 | | * published by the Free Software Foundation; either version 2.1 of |
9 | | * the License, or (at your option) any later version. |
10 | | * |
11 | | * Libgcrypt is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with this program; if not, see <http://www.gnu.org/licenses/>. |
18 | | * |
19 | | * For a description of the algorithm, see: |
20 | | * http://cr.yp.to/chacha.html |
21 | | */ |
22 | | |
23 | | /* |
24 | | * Based on D. J. Bernstein reference implementation at |
25 | | * http://cr.yp.to/chacha.html: |
26 | | * |
27 | | * chacha-regs.c version 20080118 |
28 | | * D. J. Bernstein |
29 | | * Public domain. |
30 | | */ |
31 | | |
32 | | #include <config.h> |
33 | | #include <stdio.h> |
34 | | #include <stdlib.h> |
35 | | #include <string.h> |
36 | | #include "types.h" |
37 | | #include "g10lib.h" |
38 | | #include "cipher.h" |
39 | | #include "cipher-internal.h" |
40 | | #include "bufhelp.h" |
41 | | |
42 | | |
43 | 0 | #define CHACHA20_MIN_KEY_SIZE 16 /* Bytes. */ |
44 | 0 | #define CHACHA20_MAX_KEY_SIZE 32 /* Bytes. */ |
45 | 0 | #define CHACHA20_BLOCK_SIZE 64 /* Bytes. */ |
46 | 0 | #define CHACHA20_MIN_IV_SIZE 8 /* Bytes. */ |
47 | 0 | #define CHACHA20_MAX_IV_SIZE 12 /* Bytes. */ |
48 | 0 | #define CHACHA20_CTR_SIZE 16 /* Bytes. */ |
49 | | |
50 | | |
51 | | /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ |
52 | | #undef USE_SSSE3 |
53 | | #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ |
54 | | (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
55 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
56 | | # define USE_SSSE3 1 |
57 | | #endif |
58 | | |
59 | | /* USE_AVX2 indicates whether to compile with Intel AVX2 code. */ |
60 | | #undef USE_AVX2 |
61 | | #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ |
62 | | (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
63 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
64 | | # define USE_AVX2 1 |
65 | | #endif |
66 | | |
67 | | /* USE_AVX512 indicates whether to compile with Intel AVX512 code. */ |
68 | | #undef USE_AVX512 |
69 | | #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX512) && \ |
70 | | (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
71 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
72 | | # define USE_AVX512 1 |
73 | | #endif |
74 | | |
75 | | /* USE_ARMV7_NEON indicates whether to enable ARMv7 NEON assembly code. */ |
76 | | #undef USE_ARMV7_NEON |
77 | | #ifdef ENABLE_NEON_SUPPORT |
78 | | # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ |
79 | | && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ |
80 | | && defined(HAVE_GCC_INLINE_ASM_NEON) |
81 | | # define USE_ARMV7_NEON 1 |
82 | | # endif |
83 | | #endif |
84 | | |
85 | | /* USE_AARCH64_SIMD indicates whether to enable ARMv8 SIMD assembly |
86 | | * code. */ |
87 | | #undef USE_AARCH64_SIMD |
88 | | #ifdef ENABLE_NEON_SUPPORT |
89 | | # if defined(__AARCH64EL__) \ |
90 | | && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ |
91 | | && defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) |
92 | | # define USE_AARCH64_SIMD 1 |
93 | | # endif |
94 | | #endif |
95 | | |
96 | | /* USE_PPC_VEC indicates whether to enable PowerPC vector |
97 | | * accelerated code. */ |
98 | | #undef USE_PPC_VEC |
99 | | #ifdef ENABLE_PPC_CRYPTO_SUPPORT |
100 | | # if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ |
101 | | defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) |
102 | | # if __GNUC__ >= 4 |
103 | | # define USE_PPC_VEC 1 |
104 | | # endif |
105 | | # endif |
106 | | #endif |
107 | | |
108 | | /* USE_S390X_VX indicates whether to enable zSeries code. */ |
109 | | #undef USE_S390X_VX |
110 | | #if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9 |
111 | | # if defined(HAVE_GCC_INLINE_ASM_S390X_VX) |
112 | | # define USE_S390X_VX 1 |
113 | | # endif /* USE_S390X_VX */ |
114 | | #endif |
115 | | |
116 | | /* Assembly implementations use SystemV ABI, ABI conversion and additional |
117 | | * stack to store XMM6-XMM15 needed on Win64. */ |
118 | | #undef ASM_FUNC_ABI |
119 | | #undef ASM_EXTRA_STACK |
120 | | #if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) |
121 | | # define ASM_FUNC_ABI __attribute__((sysv_abi)) |
122 | | #else |
123 | | # define ASM_FUNC_ABI |
124 | | #endif |
125 | | |
126 | | |
127 | | typedef struct CHACHA20_context_s |
128 | | { |
129 | | u32 input[16]; |
130 | | unsigned char pad[CHACHA20_BLOCK_SIZE]; |
131 | | unsigned int unused; /* bytes in the pad. */ |
132 | | unsigned int use_ssse3:1; |
133 | | unsigned int use_avx2:1; |
134 | | unsigned int use_avx512:1; |
135 | | unsigned int use_neon:1; |
136 | | unsigned int use_ppc:1; |
137 | | unsigned int use_p9:1; |
138 | | unsigned int use_p10:1; |
139 | | unsigned int use_s390x:1; |
140 | | } CHACHA20_context_t; |
141 | | |
142 | | |
143 | | #ifdef USE_SSSE3 |
144 | | |
145 | | unsigned int _gcry_chacha20_amd64_ssse3_blocks4(u32 *state, byte *dst, |
146 | | const byte *src, |
147 | | size_t nblks) ASM_FUNC_ABI; |
148 | | |
149 | | unsigned int _gcry_chacha20_amd64_ssse3_blocks1(u32 *state, byte *dst, |
150 | | const byte *src, |
151 | | size_t nblks) ASM_FUNC_ABI; |
152 | | |
153 | | unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks4( |
154 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
155 | | void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; |
156 | | |
157 | | unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks1( |
158 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
159 | | void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; |
160 | | |
161 | | #endif /* USE_SSSE3 */ |
162 | | |
163 | | #ifdef USE_AVX2 |
164 | | |
165 | | unsigned int _gcry_chacha20_amd64_avx2_blocks8(u32 *state, byte *dst, |
166 | | const byte *src, |
167 | | size_t nblks) ASM_FUNC_ABI; |
168 | | |
169 | | unsigned int _gcry_chacha20_poly1305_amd64_avx2_blocks8( |
170 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
171 | | void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; |
172 | | |
173 | | #endif /* USE_AVX2 */ |
174 | | |
175 | | #ifdef USE_AVX512 |
176 | | |
177 | | unsigned int _gcry_chacha20_amd64_avx512_blocks(u32 *state, byte *dst, |
178 | | const byte *src, |
179 | | size_t nblks) ASM_FUNC_ABI; |
180 | | |
181 | | #endif /* USE_AVX2 */ |
182 | | |
183 | | #ifdef USE_PPC_VEC |
184 | | |
185 | | #ifndef WORDS_BIGENDIAN |
186 | | unsigned int _gcry_chacha20_p10le_8x(u32 *state, byte *dst, |
187 | | const byte *src, |
188 | | size_t len); |
189 | | #endif |
190 | | |
191 | | unsigned int _gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, |
192 | | const byte *src, |
193 | | size_t nblks); |
194 | | |
195 | | unsigned int _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, |
196 | | const byte *src, |
197 | | size_t nblks); |
198 | | |
199 | | unsigned int _gcry_chacha20_ppc9_blocks4(u32 *state, byte *dst, |
200 | | const byte *src, |
201 | | size_t nblks); |
202 | | |
203 | | unsigned int _gcry_chacha20_ppc9_blocks1(u32 *state, byte *dst, |
204 | | const byte *src, |
205 | | size_t nblks); |
206 | | |
207 | | #undef USE_PPC_VEC_POLY1305 |
208 | | #if SIZEOF_UNSIGNED_LONG == 8 |
209 | | #define USE_PPC_VEC_POLY1305 1 |
210 | | unsigned int _gcry_chacha20_poly1305_ppc8_blocks4( |
211 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
212 | | POLY1305_STATE *st, const byte *poly1305_src); |
213 | | |
214 | | unsigned int _gcry_chacha20_poly1305_ppc9_blocks4( |
215 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
216 | | POLY1305_STATE *st, const byte *poly1305_src); |
217 | | #endif /* SIZEOF_UNSIGNED_LONG == 8 */ |
218 | | |
219 | | #endif /* USE_PPC_VEC */ |
220 | | |
221 | | #ifdef USE_S390X_VX |
222 | | |
223 | | unsigned int _gcry_chacha20_s390x_vx_blocks8(u32 *state, byte *dst, |
224 | | const byte *src, size_t nblks); |
225 | | |
226 | | unsigned int _gcry_chacha20_s390x_vx_blocks4_2_1(u32 *state, byte *dst, |
227 | | const byte *src, size_t nblks); |
228 | | |
229 | | #undef USE_S390X_VX_POLY1305 |
230 | | #if SIZEOF_UNSIGNED_LONG == 8 |
231 | | #define USE_S390X_VX_POLY1305 1 |
232 | | unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks8( |
233 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
234 | | POLY1305_STATE *st, const byte *poly1305_src); |
235 | | |
236 | | unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( |
237 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
238 | | POLY1305_STATE *st, const byte *poly1305_src); |
239 | | #endif /* SIZEOF_UNSIGNED_LONG == 8 */ |
240 | | |
241 | | #endif /* USE_S390X_VX */ |
242 | | |
243 | | #ifdef USE_ARMV7_NEON |
244 | | |
245 | | unsigned int _gcry_chacha20_armv7_neon_blocks4(u32 *state, byte *dst, |
246 | | const byte *src, |
247 | | size_t nblks); |
248 | | |
249 | | #endif /* USE_ARMV7_NEON */ |
250 | | |
251 | | #ifdef USE_AARCH64_SIMD |
252 | | |
253 | | unsigned int _gcry_chacha20_aarch64_blocks4(u32 *state, byte *dst, |
254 | | const byte *src, size_t nblks); |
255 | | |
256 | | unsigned int _gcry_chacha20_poly1305_aarch64_blocks4( |
257 | | u32 *state, byte *dst, const byte *src, size_t nblks, |
258 | | void *poly1305_state, const byte *poly1305_src); |
259 | | |
260 | | #endif /* USE_AARCH64_SIMD */ |
261 | | |
262 | | |
263 | | static const char *selftest (void); |
264 | | |
265 | | |
266 | 0 | #define ROTATE(v,c) (rol(v,c)) |
267 | | #define XOR(v,w) ((v) ^ (w)) |
268 | 0 | #define PLUS(v,w) ((u32)((v) + (w))) |
269 | 0 | #define PLUSONE(v) (PLUS((v),1)) |
270 | | |
271 | | #define QUARTERROUND(a,b,c,d) \ |
272 | 0 | a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ |
273 | 0 | c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ |
274 | 0 | a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ |
275 | 0 | c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); |
276 | | |
277 | | #define BUF_XOR_LE32(dst, src, offset, x) \ |
278 | 0 | buf_put_le32((dst) + (offset), buf_get_le32((src) + (offset)) ^ (x)) |
279 | | |
280 | | static unsigned int |
281 | | do_chacha20_blocks (u32 *input, byte *dst, const byte *src, size_t nblks) |
282 | 0 | { |
283 | 0 | u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; |
284 | 0 | unsigned int i; |
285 | |
|
286 | 0 | while (nblks) |
287 | 0 | { |
288 | 0 | x0 = input[0]; |
289 | 0 | x1 = input[1]; |
290 | 0 | x2 = input[2]; |
291 | 0 | x3 = input[3]; |
292 | 0 | x4 = input[4]; |
293 | 0 | x5 = input[5]; |
294 | 0 | x6 = input[6]; |
295 | 0 | x7 = input[7]; |
296 | 0 | x8 = input[8]; |
297 | 0 | x9 = input[9]; |
298 | 0 | x10 = input[10]; |
299 | 0 | x11 = input[11]; |
300 | 0 | x12 = input[12]; |
301 | 0 | x13 = input[13]; |
302 | 0 | x14 = input[14]; |
303 | 0 | x15 = input[15]; |
304 | |
|
305 | 0 | for (i = 20; i > 0; i -= 2) |
306 | 0 | { |
307 | 0 | QUARTERROUND(x0, x4, x8, x12) |
308 | 0 | QUARTERROUND(x1, x5, x9, x13) |
309 | 0 | QUARTERROUND(x2, x6, x10, x14) |
310 | 0 | QUARTERROUND(x3, x7, x11, x15) |
311 | 0 | QUARTERROUND(x0, x5, x10, x15) |
312 | 0 | QUARTERROUND(x1, x6, x11, x12) |
313 | 0 | QUARTERROUND(x2, x7, x8, x13) |
314 | 0 | QUARTERROUND(x3, x4, x9, x14) |
315 | 0 | } |
316 | |
|
317 | 0 | x0 = PLUS(x0, input[0]); |
318 | 0 | x1 = PLUS(x1, input[1]); |
319 | 0 | x2 = PLUS(x2, input[2]); |
320 | 0 | x3 = PLUS(x3, input[3]); |
321 | 0 | x4 = PLUS(x4, input[4]); |
322 | 0 | x5 = PLUS(x5, input[5]); |
323 | 0 | x6 = PLUS(x6, input[6]); |
324 | 0 | x7 = PLUS(x7, input[7]); |
325 | 0 | x8 = PLUS(x8, input[8]); |
326 | 0 | x9 = PLUS(x9, input[9]); |
327 | 0 | x10 = PLUS(x10, input[10]); |
328 | 0 | x11 = PLUS(x11, input[11]); |
329 | 0 | x12 = PLUS(x12, input[12]); |
330 | 0 | x13 = PLUS(x13, input[13]); |
331 | 0 | x14 = PLUS(x14, input[14]); |
332 | 0 | x15 = PLUS(x15, input[15]); |
333 | |
|
334 | 0 | input[12] = PLUSONE(input[12]); |
335 | 0 | input[13] = PLUS(input[13], !input[12]); |
336 | |
|
337 | 0 | BUF_XOR_LE32(dst, src, 0, x0); |
338 | 0 | BUF_XOR_LE32(dst, src, 4, x1); |
339 | 0 | BUF_XOR_LE32(dst, src, 8, x2); |
340 | 0 | BUF_XOR_LE32(dst, src, 12, x3); |
341 | 0 | BUF_XOR_LE32(dst, src, 16, x4); |
342 | 0 | BUF_XOR_LE32(dst, src, 20, x5); |
343 | 0 | BUF_XOR_LE32(dst, src, 24, x6); |
344 | 0 | BUF_XOR_LE32(dst, src, 28, x7); |
345 | 0 | BUF_XOR_LE32(dst, src, 32, x8); |
346 | 0 | BUF_XOR_LE32(dst, src, 36, x9); |
347 | 0 | BUF_XOR_LE32(dst, src, 40, x10); |
348 | 0 | BUF_XOR_LE32(dst, src, 44, x11); |
349 | 0 | BUF_XOR_LE32(dst, src, 48, x12); |
350 | 0 | BUF_XOR_LE32(dst, src, 52, x13); |
351 | 0 | BUF_XOR_LE32(dst, src, 56, x14); |
352 | 0 | BUF_XOR_LE32(dst, src, 60, x15); |
353 | |
|
354 | 0 | src += CHACHA20_BLOCK_SIZE; |
355 | 0 | dst += CHACHA20_BLOCK_SIZE; |
356 | 0 | nblks--; |
357 | 0 | } |
358 | | |
359 | | /* burn_stack */ |
360 | 0 | return (17 * sizeof(u32) + 6 * sizeof(void *)); |
361 | 0 | } |
362 | | |
363 | | |
364 | | static unsigned int |
365 | | chacha20_blocks (CHACHA20_context_t *ctx, byte *dst, const byte *src, |
366 | | size_t nblks) |
367 | 0 | { |
368 | 0 | #ifdef USE_AVX512 |
369 | 0 | if (ctx->use_avx512) |
370 | 0 | { |
371 | 0 | return _gcry_chacha20_amd64_avx512_blocks(ctx->input, dst, src, nblks); |
372 | 0 | } |
373 | 0 | #endif |
374 | | |
375 | 0 | #ifdef USE_SSSE3 |
376 | 0 | if (ctx->use_ssse3) |
377 | 0 | { |
378 | 0 | return _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, dst, src, nblks); |
379 | 0 | } |
380 | 0 | #endif |
381 | | |
382 | | #ifdef USE_PPC_VEC |
383 | | if (ctx->use_ppc) |
384 | | { |
385 | | if (ctx->use_p9) |
386 | | return _gcry_chacha20_ppc9_blocks1(ctx->input, dst, src, nblks); |
387 | | else |
388 | | return _gcry_chacha20_ppc8_blocks1(ctx->input, dst, src, nblks); |
389 | | } |
390 | | #endif |
391 | | |
392 | | #ifdef USE_S390X_VX |
393 | | if (ctx->use_s390x) |
394 | | { |
395 | | return _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, dst, src, nblks); |
396 | | } |
397 | | #endif |
398 | | |
399 | 0 | return do_chacha20_blocks (ctx->input, dst, src, nblks); |
400 | 0 | } |
401 | | |
402 | | |
403 | | static void |
404 | | chacha20_keysetup (CHACHA20_context_t *ctx, const byte *key, |
405 | | unsigned int keylen) |
406 | 0 | { |
407 | 0 | static const char sigma[16] = "expand 32-byte k"; |
408 | 0 | static const char tau[16] = "expand 16-byte k"; |
409 | 0 | const char *constants; |
410 | |
|
411 | 0 | ctx->input[4] = buf_get_le32(key + 0); |
412 | 0 | ctx->input[5] = buf_get_le32(key + 4); |
413 | 0 | ctx->input[6] = buf_get_le32(key + 8); |
414 | 0 | ctx->input[7] = buf_get_le32(key + 12); |
415 | 0 | if (keylen == CHACHA20_MAX_KEY_SIZE) /* 256 bits */ |
416 | 0 | { |
417 | 0 | key += 16; |
418 | 0 | constants = sigma; |
419 | 0 | } |
420 | 0 | else /* 128 bits */ |
421 | 0 | { |
422 | 0 | constants = tau; |
423 | 0 | } |
424 | 0 | ctx->input[8] = buf_get_le32(key + 0); |
425 | 0 | ctx->input[9] = buf_get_le32(key + 4); |
426 | 0 | ctx->input[10] = buf_get_le32(key + 8); |
427 | 0 | ctx->input[11] = buf_get_le32(key + 12); |
428 | 0 | ctx->input[0] = buf_get_le32(constants + 0); |
429 | 0 | ctx->input[1] = buf_get_le32(constants + 4); |
430 | 0 | ctx->input[2] = buf_get_le32(constants + 8); |
431 | 0 | ctx->input[3] = buf_get_le32(constants + 12); |
432 | 0 | } |
433 | | |
434 | | |
435 | | static void |
436 | | chacha20_ivsetup (CHACHA20_context_t * ctx, const byte *iv, size_t ivlen) |
437 | 0 | { |
438 | 0 | if (ivlen == CHACHA20_CTR_SIZE) |
439 | 0 | { |
440 | 0 | ctx->input[12] = buf_get_le32 (iv + 0); |
441 | 0 | ctx->input[13] = buf_get_le32 (iv + 4); |
442 | 0 | ctx->input[14] = buf_get_le32 (iv + 8); |
443 | 0 | ctx->input[15] = buf_get_le32 (iv + 12); |
444 | 0 | } |
445 | 0 | else if (ivlen == CHACHA20_MAX_IV_SIZE) |
446 | 0 | { |
447 | 0 | ctx->input[12] = 0; |
448 | 0 | ctx->input[13] = buf_get_le32 (iv + 0); |
449 | 0 | ctx->input[14] = buf_get_le32 (iv + 4); |
450 | 0 | ctx->input[15] = buf_get_le32 (iv + 8); |
451 | 0 | } |
452 | 0 | else if (ivlen == CHACHA20_MIN_IV_SIZE) |
453 | 0 | { |
454 | 0 | ctx->input[12] = 0; |
455 | 0 | ctx->input[13] = 0; |
456 | 0 | ctx->input[14] = buf_get_le32 (iv + 0); |
457 | 0 | ctx->input[15] = buf_get_le32 (iv + 4); |
458 | 0 | } |
459 | 0 | else |
460 | 0 | { |
461 | 0 | ctx->input[12] = 0; |
462 | 0 | ctx->input[13] = 0; |
463 | 0 | ctx->input[14] = 0; |
464 | 0 | ctx->input[15] = 0; |
465 | 0 | } |
466 | 0 | } |
467 | | |
468 | | |
469 | | static void |
470 | | chacha20_setiv (void *context, const byte *iv, size_t ivlen) |
471 | 0 | { |
472 | 0 | CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; |
473 | | |
474 | | /* draft-nir-cfrg-chacha20-poly1305-02 defines 96-bit and 64-bit nonce. */ |
475 | 0 | if (iv && ivlen != CHACHA20_MAX_IV_SIZE && ivlen != CHACHA20_MIN_IV_SIZE |
476 | 0 | && ivlen != CHACHA20_CTR_SIZE) |
477 | 0 | log_info ("WARNING: chacha20_setiv: bad ivlen=%u\n", (u32) ivlen); |
478 | |
|
479 | 0 | if (iv && (ivlen == CHACHA20_MAX_IV_SIZE || ivlen == CHACHA20_MIN_IV_SIZE |
480 | 0 | || ivlen == CHACHA20_CTR_SIZE)) |
481 | 0 | chacha20_ivsetup (ctx, iv, ivlen); |
482 | 0 | else |
483 | 0 | chacha20_ivsetup (ctx, NULL, 0); |
484 | | |
485 | | /* Reset the unused pad bytes counter. */ |
486 | 0 | ctx->unused = 0; |
487 | 0 | } |
488 | | |
489 | | |
490 | | static gcry_err_code_t |
491 | | chacha20_do_setkey (CHACHA20_context_t *ctx, |
492 | | const byte *key, unsigned int keylen) |
493 | 0 | { |
494 | 0 | static int initialized; |
495 | 0 | static const char *selftest_failed; |
496 | 0 | unsigned int features = _gcry_get_hw_features (); |
497 | |
|
498 | 0 | if (!initialized) |
499 | 0 | { |
500 | 0 | initialized = 1; |
501 | 0 | selftest_failed = selftest (); |
502 | 0 | if (selftest_failed) |
503 | 0 | log_error ("CHACHA20 selftest failed (%s)\n", selftest_failed); |
504 | 0 | } |
505 | 0 | if (selftest_failed) |
506 | 0 | return GPG_ERR_SELFTEST_FAILED; |
507 | | |
508 | 0 | if (keylen != CHACHA20_MAX_KEY_SIZE && keylen != CHACHA20_MIN_KEY_SIZE) |
509 | 0 | return GPG_ERR_INV_KEYLEN; |
510 | | |
511 | 0 | #ifdef USE_SSSE3 |
512 | 0 | ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; |
513 | 0 | #endif |
514 | 0 | #ifdef USE_AVX512 |
515 | 0 | ctx->use_avx512 = (features & HWF_INTEL_AVX512) != 0; |
516 | 0 | #endif |
517 | 0 | #ifdef USE_AVX2 |
518 | 0 | ctx->use_avx2 = (features & HWF_INTEL_AVX2) != 0; |
519 | 0 | #endif |
520 | | #ifdef USE_ARMV7_NEON |
521 | | ctx->use_neon = (features & HWF_ARM_NEON) != 0; |
522 | | #endif |
523 | | #ifdef USE_AARCH64_SIMD |
524 | | ctx->use_neon = (features & HWF_ARM_NEON) != 0; |
525 | | #endif |
526 | | #ifdef USE_PPC_VEC |
527 | | ctx->use_ppc = (features & HWF_PPC_ARCH_2_07) != 0; |
528 | | ctx->use_p9 = (features & HWF_PPC_ARCH_3_00) != 0; |
529 | | # ifndef WORDS_BIGENDIAN |
530 | | ctx->use_p10 = (features & HWF_PPC_ARCH_3_10) != 0; |
531 | | # ifdef ENABLE_FORCE_SOFT_HWFEATURES |
532 | | /* HWF_PPC_ARCH_3_10 above is used as soft HW-feature indicator for P10. |
533 | | * Actual implementation works with HWF_PPC_ARCH_3_00 also. */ |
534 | | ctx->use_p10 |= (features & HWF_PPC_ARCH_3_00) != 0; |
535 | | # endif |
536 | | # endif |
537 | | #endif |
538 | | #ifdef USE_S390X_VX |
539 | | ctx->use_s390x = (features & HWF_S390X_VX) != 0; |
540 | | #endif |
541 | |
|
542 | 0 | (void)features; |
543 | |
|
544 | 0 | chacha20_keysetup (ctx, key, keylen); |
545 | | |
546 | | /* We default to a zero nonce. */ |
547 | 0 | chacha20_setiv (ctx, NULL, 0); |
548 | |
|
549 | 0 | return 0; |
550 | 0 | } |
551 | | |
552 | | |
553 | | static gcry_err_code_t |
554 | | chacha20_setkey (void *context, const byte *key, unsigned int keylen, |
555 | | cipher_bulk_ops_t *bulk_ops) |
556 | 0 | { |
557 | 0 | CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; |
558 | 0 | gcry_err_code_t rc = chacha20_do_setkey (ctx, key, keylen); |
559 | 0 | (void)bulk_ops; |
560 | 0 | _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *)); |
561 | 0 | return rc; |
562 | 0 | } |
563 | | |
564 | | |
565 | | static unsigned int |
566 | | do_chacha20_encrypt_stream_tail (CHACHA20_context_t *ctx, byte *outbuf, |
567 | | const byte *inbuf, size_t length) |
568 | 0 | { |
569 | 0 | static const unsigned char zero_pad[CHACHA20_BLOCK_SIZE] = { 0, }; |
570 | 0 | unsigned int nburn, burn = 0; |
571 | |
|
572 | 0 | #ifdef USE_AVX512 |
573 | 0 | if (ctx->use_avx512 && length >= CHACHA20_BLOCK_SIZE) |
574 | 0 | { |
575 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
576 | 0 | nburn = _gcry_chacha20_amd64_avx512_blocks(ctx->input, outbuf, inbuf, |
577 | 0 | nblocks); |
578 | 0 | burn = nburn > burn ? nburn : burn; |
579 | 0 | length %= CHACHA20_BLOCK_SIZE; |
580 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
581 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
582 | 0 | } |
583 | 0 | #endif |
584 | |
|
585 | 0 | #ifdef USE_AVX2 |
586 | 0 | if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8) |
587 | 0 | { |
588 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
589 | 0 | nblocks -= nblocks % 8; |
590 | 0 | nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf, |
591 | 0 | nblocks); |
592 | 0 | burn = nburn > burn ? nburn : burn; |
593 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
594 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
595 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
596 | 0 | } |
597 | 0 | #endif |
598 | |
|
599 | 0 | #ifdef USE_SSSE3 |
600 | 0 | if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4) |
601 | 0 | { |
602 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
603 | 0 | nblocks -= nblocks % 4; |
604 | 0 | nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf, |
605 | 0 | nblocks); |
606 | 0 | burn = nburn > burn ? nburn : burn; |
607 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
608 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
609 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
610 | 0 | } |
611 | 0 | #endif |
612 | |
|
613 | | #ifdef USE_ARMV7_NEON |
614 | | if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) |
615 | | { |
616 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
617 | | nblocks -= nblocks % 4; |
618 | | nburn = _gcry_chacha20_armv7_neon_blocks4(ctx->input, outbuf, inbuf, |
619 | | nblocks); |
620 | | burn = nburn > burn ? nburn : burn; |
621 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
622 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
623 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
624 | | } |
625 | | #endif |
626 | |
|
627 | | #ifdef USE_AARCH64_SIMD |
628 | | if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) |
629 | | { |
630 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
631 | | nblocks -= nblocks % 4; |
632 | | nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf, |
633 | | nblocks); |
634 | | burn = nburn > burn ? nburn : burn; |
635 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
636 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
637 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
638 | | } |
639 | | #endif |
640 | |
|
641 | | #ifdef USE_PPC_VEC |
642 | | if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4) |
643 | | { |
644 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
645 | | nblocks -= nblocks % 4; |
646 | | if (0) |
647 | | {} |
648 | | #ifndef WORDS_BIGENDIAN |
649 | | /* |
650 | | * A workaround to skip counter overflow. This is rare. |
651 | | */ |
652 | | else if (ctx->use_p10 && nblocks >= 8 |
653 | | && ((u64)ctx->input[12] + nblocks) <= 0xffffffffU) |
654 | | { |
655 | | size_t len = nblocks * CHACHA20_BLOCK_SIZE; |
656 | | nburn = _gcry_chacha20_p10le_8x(ctx->input, outbuf, inbuf, len); |
657 | | } |
658 | | #endif |
659 | | else if (ctx->use_p9) |
660 | | { |
661 | | nburn = _gcry_chacha20_ppc9_blocks4(ctx->input, outbuf, inbuf, |
662 | | nblocks); |
663 | | } |
664 | | else |
665 | | { |
666 | | nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, |
667 | | nblocks); |
668 | | } |
669 | | burn = nburn > burn ? nburn : burn; |
670 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
671 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
672 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
673 | | } |
674 | | #endif |
675 | |
|
676 | | #ifdef USE_S390X_VX |
677 | | if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 8) |
678 | | { |
679 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
680 | | nblocks -= nblocks % 8; |
681 | | nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf, |
682 | | nblocks); |
683 | | burn = nburn > burn ? nburn : burn; |
684 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
685 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
686 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
687 | | } |
688 | | #endif |
689 | |
|
690 | 0 | if (length >= CHACHA20_BLOCK_SIZE) |
691 | 0 | { |
692 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
693 | 0 | nburn = chacha20_blocks(ctx, outbuf, inbuf, nblocks); |
694 | 0 | burn = nburn > burn ? nburn : burn; |
695 | 0 | length %= CHACHA20_BLOCK_SIZE; |
696 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
697 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
698 | 0 | } |
699 | |
|
700 | 0 | if (length > 0) |
701 | 0 | { |
702 | 0 | nburn = chacha20_blocks(ctx, ctx->pad, zero_pad, 1); |
703 | 0 | burn = nburn > burn ? nburn : burn; |
704 | |
|
705 | 0 | buf_xor (outbuf, inbuf, ctx->pad, length); |
706 | 0 | ctx->unused = CHACHA20_BLOCK_SIZE - length; |
707 | 0 | } |
708 | |
|
709 | 0 | if (burn) |
710 | 0 | burn += 5 * sizeof(void *); |
711 | |
|
712 | 0 | return burn; |
713 | 0 | } |
714 | | |
715 | | |
716 | | static void |
717 | | chacha20_encrypt_stream (void *context, byte *outbuf, const byte *inbuf, |
718 | | size_t length) |
719 | 0 | { |
720 | 0 | CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; |
721 | 0 | unsigned int nburn, burn = 0; |
722 | |
|
723 | 0 | if (!length) |
724 | 0 | return; |
725 | | |
726 | 0 | if (ctx->unused) |
727 | 0 | { |
728 | 0 | unsigned char *p = ctx->pad; |
729 | 0 | size_t n; |
730 | |
|
731 | 0 | gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); |
732 | | |
733 | 0 | n = ctx->unused; |
734 | 0 | if (n > length) |
735 | 0 | n = length; |
736 | |
|
737 | 0 | buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); |
738 | 0 | length -= n; |
739 | 0 | outbuf += n; |
740 | 0 | inbuf += n; |
741 | 0 | ctx->unused -= n; |
742 | |
|
743 | 0 | if (!length) |
744 | 0 | return; |
745 | 0 | gcry_assert (!ctx->unused); |
746 | 0 | } |
747 | | |
748 | 0 | nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, length); |
749 | 0 | burn = nburn > burn ? nburn : burn; |
750 | |
|
751 | 0 | if (burn) |
752 | 0 | _gcry_burn_stack (burn); |
753 | 0 | } |
754 | | |
755 | | |
756 | | gcry_err_code_t |
757 | | _gcry_chacha20_poly1305_encrypt(gcry_cipher_hd_t c, byte *outbuf, |
758 | | const byte *inbuf, size_t length) |
759 | 0 | { |
760 | 0 | CHACHA20_context_t *ctx = (void *) &c->context.c; |
761 | 0 | unsigned int nburn, burn = 0; |
762 | 0 | byte *authptr = NULL; |
763 | |
|
764 | 0 | if (!length) |
765 | 0 | return 0; |
766 | | |
767 | 0 | if (ctx->unused) |
768 | 0 | { |
769 | 0 | unsigned char *p = ctx->pad; |
770 | 0 | size_t n; |
771 | |
|
772 | 0 | gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); |
773 | | |
774 | 0 | n = ctx->unused; |
775 | 0 | if (n > length) |
776 | 0 | n = length; |
777 | |
|
778 | 0 | buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); |
779 | 0 | nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf, n); |
780 | 0 | burn = nburn > burn ? nburn : burn; |
781 | 0 | length -= n; |
782 | 0 | outbuf += n; |
783 | 0 | inbuf += n; |
784 | 0 | ctx->unused -= n; |
785 | |
|
786 | 0 | if (!length) |
787 | 0 | { |
788 | 0 | if (burn) |
789 | 0 | _gcry_burn_stack (burn); |
790 | |
|
791 | 0 | return 0; |
792 | 0 | } |
793 | 0 | gcry_assert (!ctx->unused); |
794 | 0 | } |
795 | | |
796 | 0 | gcry_assert (c->u_mode.poly1305.ctx.leftover == 0); |
797 | | |
798 | 0 | if (0) |
799 | 0 | { } |
800 | 0 | #ifdef USE_AVX512 |
801 | 0 | else if (ctx->use_avx512) |
802 | 0 | { |
803 | | /* Skip stitched chacha20-poly1305 for AVX512. */ |
804 | 0 | authptr = NULL; |
805 | 0 | } |
806 | 0 | #endif |
807 | 0 | #ifdef USE_AVX2 |
808 | 0 | else if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8) |
809 | 0 | { |
810 | 0 | nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf, 8); |
811 | 0 | burn = nburn > burn ? nburn : burn; |
812 | |
|
813 | 0 | authptr = outbuf; |
814 | 0 | length -= 8 * CHACHA20_BLOCK_SIZE; |
815 | 0 | outbuf += 8 * CHACHA20_BLOCK_SIZE; |
816 | 0 | inbuf += 8 * CHACHA20_BLOCK_SIZE; |
817 | 0 | } |
818 | 0 | #endif |
819 | 0 | #ifdef USE_SSSE3 |
820 | 0 | else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4) |
821 | 0 | { |
822 | 0 | nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf, 4); |
823 | 0 | burn = nburn > burn ? nburn : burn; |
824 | |
|
825 | 0 | authptr = outbuf; |
826 | 0 | length -= 4 * CHACHA20_BLOCK_SIZE; |
827 | 0 | outbuf += 4 * CHACHA20_BLOCK_SIZE; |
828 | 0 | inbuf += 4 * CHACHA20_BLOCK_SIZE; |
829 | 0 | } |
830 | 0 | else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 2) |
831 | 0 | { |
832 | 0 | nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 2); |
833 | 0 | burn = nburn > burn ? nburn : burn; |
834 | |
|
835 | 0 | authptr = outbuf; |
836 | 0 | length -= 2 * CHACHA20_BLOCK_SIZE; |
837 | 0 | outbuf += 2 * CHACHA20_BLOCK_SIZE; |
838 | 0 | inbuf += 2 * CHACHA20_BLOCK_SIZE; |
839 | 0 | } |
840 | 0 | else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE) |
841 | 0 | { |
842 | 0 | nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 1); |
843 | 0 | burn = nburn > burn ? nburn : burn; |
844 | |
|
845 | 0 | authptr = outbuf; |
846 | 0 | length -= 1 * CHACHA20_BLOCK_SIZE; |
847 | 0 | outbuf += 1 * CHACHA20_BLOCK_SIZE; |
848 | 0 | inbuf += 1 * CHACHA20_BLOCK_SIZE; |
849 | 0 | } |
850 | 0 | #endif |
851 | | #ifdef USE_AARCH64_SIMD |
852 | | else if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) |
853 | | { |
854 | | nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf, 4); |
855 | | burn = nburn > burn ? nburn : burn; |
856 | | |
857 | | authptr = outbuf; |
858 | | length -= 4 * CHACHA20_BLOCK_SIZE; |
859 | | outbuf += 4 * CHACHA20_BLOCK_SIZE; |
860 | | inbuf += 4 * CHACHA20_BLOCK_SIZE; |
861 | | } |
862 | | #endif |
863 | | #ifdef USE_PPC_VEC_POLY1305 |
864 | | else if (ctx->use_ppc && ctx->use_p10) |
865 | | { |
866 | | /* Skip stitched chacha20-poly1305 for P10. */ |
867 | | authptr = NULL; |
868 | | } |
869 | | else if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4) |
870 | | { |
871 | | if (ctx->use_p9) |
872 | | nburn = _gcry_chacha20_ppc9_blocks4(ctx->input, outbuf, inbuf, 4); |
873 | | else |
874 | | nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, 4); |
875 | | burn = nburn > burn ? nburn : burn; |
876 | | |
877 | | authptr = outbuf; |
878 | | length -= 4 * CHACHA20_BLOCK_SIZE; |
879 | | outbuf += 4 * CHACHA20_BLOCK_SIZE; |
880 | | inbuf += 4 * CHACHA20_BLOCK_SIZE; |
881 | | } |
882 | | #endif |
883 | | #ifdef USE_S390X_VX_POLY1305 |
884 | | else if (ctx->use_s390x && length >= 2 * CHACHA20_BLOCK_SIZE * 8) |
885 | | { |
886 | | nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf, 8); |
887 | | burn = nburn > burn ? nburn : burn; |
888 | | |
889 | | authptr = outbuf; |
890 | | length -= 8 * CHACHA20_BLOCK_SIZE; |
891 | | outbuf += 8 * CHACHA20_BLOCK_SIZE; |
892 | | inbuf += 8 * CHACHA20_BLOCK_SIZE; |
893 | | } |
894 | | else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 4) |
895 | | { |
896 | | nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 4); |
897 | | burn = nburn > burn ? nburn : burn; |
898 | | |
899 | | authptr = outbuf; |
900 | | length -= 4 * CHACHA20_BLOCK_SIZE; |
901 | | outbuf += 4 * CHACHA20_BLOCK_SIZE; |
902 | | inbuf += 4 * CHACHA20_BLOCK_SIZE; |
903 | | } |
904 | | else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 2) |
905 | | { |
906 | | nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 2); |
907 | | burn = nburn > burn ? nburn : burn; |
908 | | |
909 | | authptr = outbuf; |
910 | | length -= 2 * CHACHA20_BLOCK_SIZE; |
911 | | outbuf += 2 * CHACHA20_BLOCK_SIZE; |
912 | | inbuf += 2 * CHACHA20_BLOCK_SIZE; |
913 | | } |
914 | | else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE) |
915 | | { |
916 | | nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 1); |
917 | | burn = nburn > burn ? nburn : burn; |
918 | | |
919 | | authptr = outbuf; |
920 | | length -= 1 * CHACHA20_BLOCK_SIZE; |
921 | | outbuf += 1 * CHACHA20_BLOCK_SIZE; |
922 | | inbuf += 1 * CHACHA20_BLOCK_SIZE; |
923 | | } |
924 | | #endif |
925 | |
|
926 | 0 | if (authptr) |
927 | 0 | { |
928 | 0 | size_t authoffset = outbuf - authptr; |
929 | |
|
930 | 0 | #ifdef USE_AVX2 |
931 | 0 | if (ctx->use_avx2 && |
932 | 0 | length >= 8 * CHACHA20_BLOCK_SIZE && |
933 | 0 | authoffset >= 8 * CHACHA20_BLOCK_SIZE) |
934 | 0 | { |
935 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
936 | 0 | nblocks -= nblocks % 8; |
937 | |
|
938 | 0 | nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8( |
939 | 0 | ctx->input, outbuf, inbuf, nblocks, |
940 | 0 | &c->u_mode.poly1305.ctx.state, authptr); |
941 | 0 | burn = nburn > burn ? nburn : burn; |
942 | |
|
943 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
944 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
945 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
946 | 0 | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
947 | 0 | } |
948 | 0 | #endif |
949 | |
|
950 | 0 | #ifdef USE_SSSE3 |
951 | 0 | if (ctx->use_ssse3) |
952 | 0 | { |
953 | 0 | if (length >= 4 * CHACHA20_BLOCK_SIZE && |
954 | 0 | authoffset >= 4 * CHACHA20_BLOCK_SIZE) |
955 | 0 | { |
956 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
957 | 0 | nblocks -= nblocks % 4; |
958 | |
|
959 | 0 | nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4( |
960 | 0 | ctx->input, outbuf, inbuf, nblocks, |
961 | 0 | &c->u_mode.poly1305.ctx.state, authptr); |
962 | 0 | burn = nburn > burn ? nburn : burn; |
963 | |
|
964 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
965 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
966 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
967 | 0 | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
968 | 0 | } |
969 | |
|
970 | 0 | if (length >= CHACHA20_BLOCK_SIZE && |
971 | 0 | authoffset >= CHACHA20_BLOCK_SIZE) |
972 | 0 | { |
973 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
974 | |
|
975 | 0 | nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1( |
976 | 0 | ctx->input, outbuf, inbuf, nblocks, |
977 | 0 | &c->u_mode.poly1305.ctx.state, authptr); |
978 | 0 | burn = nburn > burn ? nburn : burn; |
979 | |
|
980 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
981 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
982 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
983 | 0 | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
984 | 0 | } |
985 | 0 | } |
986 | 0 | #endif |
987 | |
|
988 | | #ifdef USE_AARCH64_SIMD |
989 | | if (ctx->use_neon && |
990 | | length >= 4 * CHACHA20_BLOCK_SIZE && |
991 | | authoffset >= 4 * CHACHA20_BLOCK_SIZE) |
992 | | { |
993 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
994 | | nblocks -= nblocks % 4; |
995 | | |
996 | | nburn = _gcry_chacha20_poly1305_aarch64_blocks4( |
997 | | ctx->input, outbuf, inbuf, nblocks, |
998 | | &c->u_mode.poly1305.ctx.state, authptr); |
999 | | burn = nburn > burn ? nburn : burn; |
1000 | | |
1001 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1002 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1003 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1004 | | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
1005 | | } |
1006 | | #endif |
1007 | |
|
1008 | | #ifdef USE_PPC_VEC_POLY1305 |
1009 | | if (ctx->use_ppc && |
1010 | | length >= 4 * CHACHA20_BLOCK_SIZE && |
1011 | | authoffset >= 4 * CHACHA20_BLOCK_SIZE) |
1012 | | { |
1013 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1014 | | nblocks -= nblocks % 4; |
1015 | | |
1016 | | if (ctx->use_p9) |
1017 | | nburn = _gcry_chacha20_poly1305_ppc9_blocks4( |
1018 | | ctx->input, outbuf, inbuf, nblocks, |
1019 | | &c->u_mode.poly1305.ctx.state, authptr); |
1020 | | else |
1021 | | nburn = _gcry_chacha20_poly1305_ppc8_blocks4( |
1022 | | ctx->input, outbuf, inbuf, nblocks, |
1023 | | &c->u_mode.poly1305.ctx.state, authptr); |
1024 | | burn = nburn > burn ? nburn : burn; |
1025 | | |
1026 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1027 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1028 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1029 | | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
1030 | | } |
1031 | | #endif |
1032 | |
|
1033 | | #ifdef USE_S390X_VX_POLY1305 |
1034 | | if (ctx->use_s390x) |
1035 | | { |
1036 | | if (length >= 8 * CHACHA20_BLOCK_SIZE && |
1037 | | authoffset >= 8 * CHACHA20_BLOCK_SIZE) |
1038 | | { |
1039 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1040 | | nblocks -= nblocks % 8; |
1041 | | |
1042 | | burn = _gcry_chacha20_poly1305_s390x_vx_blocks8( |
1043 | | ctx->input, outbuf, inbuf, nblocks, |
1044 | | &c->u_mode.poly1305.ctx.state, authptr); |
1045 | | burn = nburn > burn ? nburn : burn; |
1046 | | |
1047 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1048 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1049 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1050 | | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
1051 | | } |
1052 | | |
1053 | | if (length >= CHACHA20_BLOCK_SIZE && |
1054 | | authoffset >= CHACHA20_BLOCK_SIZE) |
1055 | | { |
1056 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1057 | | |
1058 | | burn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( |
1059 | | ctx->input, outbuf, inbuf, nblocks, |
1060 | | &c->u_mode.poly1305.ctx.state, authptr); |
1061 | | burn = nburn > burn ? nburn : burn; |
1062 | | |
1063 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1064 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1065 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1066 | | authptr += nblocks * CHACHA20_BLOCK_SIZE; |
1067 | | } |
1068 | | } |
1069 | | #endif |
1070 | |
|
1071 | 0 | if (authoffset > 0) |
1072 | 0 | { |
1073 | 0 | _gcry_poly1305_update (&c->u_mode.poly1305.ctx, authptr, authoffset); |
1074 | 0 | authptr += authoffset; |
1075 | 0 | authoffset = 0; |
1076 | 0 | } |
1077 | |
|
1078 | 0 | gcry_assert(authptr == outbuf); |
1079 | 0 | } |
1080 | | |
1081 | 0 | while (length) |
1082 | 0 | { |
1083 | 0 | size_t currlen = length; |
1084 | | |
1085 | | /* Since checksumming is done after encryption, process input in 24KiB |
1086 | | * chunks to keep data loaded in L1 cache for checksumming. However |
1087 | | * only do splitting if input is large enough so that last chunks does |
1088 | | * not end up being short. */ |
1089 | 0 | if (currlen > 32 * 1024) |
1090 | 0 | currlen = 24 * 1024; |
1091 | |
|
1092 | 0 | nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen); |
1093 | 0 | burn = nburn > burn ? nburn : burn; |
1094 | |
|
1095 | 0 | nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf, |
1096 | 0 | currlen); |
1097 | 0 | burn = nburn > burn ? nburn : burn; |
1098 | |
|
1099 | 0 | outbuf += currlen; |
1100 | 0 | inbuf += currlen; |
1101 | 0 | length -= currlen; |
1102 | 0 | } |
1103 | |
|
1104 | 0 | if (burn) |
1105 | 0 | _gcry_burn_stack (burn); |
1106 | |
|
1107 | 0 | return 0; |
1108 | 0 | } |
1109 | | |
1110 | | |
1111 | | gcry_err_code_t |
1112 | | _gcry_chacha20_poly1305_decrypt(gcry_cipher_hd_t c, byte *outbuf, |
1113 | | const byte *inbuf, size_t length) |
1114 | 0 | { |
1115 | 0 | CHACHA20_context_t *ctx = (void *) &c->context.c; |
1116 | 0 | unsigned int nburn, burn = 0; |
1117 | 0 | #if defined(USE_AVX512) || defined(USE_PPC_VEC_POLY1305) \ |
1118 | 0 | || defined(USE_AVX2) || defined(USE_SSSE3) || defined(USE_AARCH64_SIMD) \ |
1119 | 0 | || defined(USE_S390X_VX_POLY1305) |
1120 | 0 | int skip_stitched = 0; |
1121 | 0 | #endif |
1122 | |
|
1123 | 0 | if (!length) |
1124 | 0 | return 0; |
1125 | | |
1126 | 0 | if (ctx->unused) |
1127 | 0 | { |
1128 | 0 | unsigned char *p = ctx->pad; |
1129 | 0 | size_t n; |
1130 | |
|
1131 | 0 | gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); |
1132 | | |
1133 | 0 | n = ctx->unused; |
1134 | 0 | if (n > length) |
1135 | 0 | n = length; |
1136 | |
|
1137 | 0 | nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf, n); |
1138 | 0 | burn = nburn > burn ? nburn : burn; |
1139 | 0 | buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); |
1140 | 0 | length -= n; |
1141 | 0 | outbuf += n; |
1142 | 0 | inbuf += n; |
1143 | 0 | ctx->unused -= n; |
1144 | |
|
1145 | 0 | if (!length) |
1146 | 0 | { |
1147 | 0 | if (burn) |
1148 | 0 | _gcry_burn_stack (burn); |
1149 | |
|
1150 | 0 | return 0; |
1151 | 0 | } |
1152 | 0 | gcry_assert (!ctx->unused); |
1153 | 0 | } |
1154 | | |
1155 | 0 | gcry_assert (c->u_mode.poly1305.ctx.leftover == 0); |
1156 | | |
1157 | 0 | #ifdef USE_AVX512 |
1158 | 0 | if (ctx->use_avx512) |
1159 | 0 | { |
1160 | | /* Skip stitched chacha20-poly1305 for AVX512. */ |
1161 | 0 | skip_stitched = 1; |
1162 | 0 | } |
1163 | 0 | #endif |
1164 | | #ifdef USE_PPC_VEC_POLY1305 |
1165 | | if (ctx->use_ppc && ctx->use_p10) |
1166 | | { |
1167 | | /* Skip stitched chacha20-poly1305 for P10. */ |
1168 | | skip_stitched = 1; |
1169 | | } |
1170 | | #endif |
1171 | |
|
1172 | 0 | #ifdef USE_AVX2 |
1173 | 0 | if (!skip_stitched && ctx->use_avx2 && length >= 8 * CHACHA20_BLOCK_SIZE) |
1174 | 0 | { |
1175 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1176 | 0 | nblocks -= nblocks % 8; |
1177 | |
|
1178 | 0 | nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8( |
1179 | 0 | ctx->input, outbuf, inbuf, nblocks, |
1180 | 0 | &c->u_mode.poly1305.ctx.state, inbuf); |
1181 | 0 | burn = nburn > burn ? nburn : burn; |
1182 | |
|
1183 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1184 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1185 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1186 | 0 | } |
1187 | 0 | #endif |
1188 | |
|
1189 | 0 | #ifdef USE_SSSE3 |
1190 | 0 | if (!skip_stitched && ctx->use_ssse3) |
1191 | 0 | { |
1192 | 0 | if (length >= 4 * CHACHA20_BLOCK_SIZE) |
1193 | 0 | { |
1194 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1195 | 0 | nblocks -= nblocks % 4; |
1196 | |
|
1197 | 0 | nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4( |
1198 | 0 | ctx->input, outbuf, inbuf, nblocks, |
1199 | 0 | &c->u_mode.poly1305.ctx.state, inbuf); |
1200 | 0 | burn = nburn > burn ? nburn : burn; |
1201 | |
|
1202 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1203 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1204 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1205 | 0 | } |
1206 | |
|
1207 | 0 | if (length >= CHACHA20_BLOCK_SIZE) |
1208 | 0 | { |
1209 | 0 | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1210 | |
|
1211 | 0 | nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1( |
1212 | 0 | ctx->input, outbuf, inbuf, nblocks, |
1213 | 0 | &c->u_mode.poly1305.ctx.state, inbuf); |
1214 | 0 | burn = nburn > burn ? nburn : burn; |
1215 | |
|
1216 | 0 | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1217 | 0 | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1218 | 0 | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1219 | 0 | } |
1220 | 0 | } |
1221 | 0 | #endif |
1222 | |
|
1223 | | #ifdef USE_AARCH64_SIMD |
1224 | | if (!skip_stitched && ctx->use_neon && length >= 4 * CHACHA20_BLOCK_SIZE) |
1225 | | { |
1226 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1227 | | nblocks -= nblocks % 4; |
1228 | | |
1229 | | nburn = _gcry_chacha20_poly1305_aarch64_blocks4( |
1230 | | ctx->input, outbuf, inbuf, nblocks, |
1231 | | &c->u_mode.poly1305.ctx.state, inbuf); |
1232 | | burn = nburn > burn ? nburn : burn; |
1233 | | |
1234 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1235 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1236 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1237 | | } |
1238 | | #endif |
1239 | |
|
1240 | | #ifdef USE_PPC_VEC_POLY1305 |
1241 | | /* skip stitch for p10 */ |
1242 | | if (!skip_stitched && ctx->use_ppc && length >= 4 * CHACHA20_BLOCK_SIZE) |
1243 | | { |
1244 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1245 | | nblocks -= nblocks % 4; |
1246 | | |
1247 | | if (ctx->use_p9) |
1248 | | nburn = _gcry_chacha20_poly1305_ppc9_blocks4( |
1249 | | ctx->input, outbuf, inbuf, nblocks, |
1250 | | &c->u_mode.poly1305.ctx.state, inbuf); |
1251 | | else |
1252 | | nburn = _gcry_chacha20_poly1305_ppc8_blocks4( |
1253 | | ctx->input, outbuf, inbuf, nblocks, |
1254 | | &c->u_mode.poly1305.ctx.state, inbuf); |
1255 | | burn = nburn > burn ? nburn : burn; |
1256 | | |
1257 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1258 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1259 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1260 | | } |
1261 | | #endif |
1262 | |
|
1263 | | #ifdef USE_S390X_VX_POLY1305 |
1264 | | if (!skip_stitched && ctx->use_s390x) |
1265 | | { |
1266 | | if (length >= 8 * CHACHA20_BLOCK_SIZE) |
1267 | | { |
1268 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1269 | | nblocks -= nblocks % 8; |
1270 | | |
1271 | | nburn = _gcry_chacha20_poly1305_s390x_vx_blocks8( |
1272 | | ctx->input, outbuf, inbuf, nblocks, |
1273 | | &c->u_mode.poly1305.ctx.state, inbuf); |
1274 | | burn = nburn > burn ? nburn : burn; |
1275 | | |
1276 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1277 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1278 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1279 | | } |
1280 | | |
1281 | | if (length >= CHACHA20_BLOCK_SIZE) |
1282 | | { |
1283 | | size_t nblocks = length / CHACHA20_BLOCK_SIZE; |
1284 | | |
1285 | | nburn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( |
1286 | | ctx->input, outbuf, inbuf, nblocks, |
1287 | | &c->u_mode.poly1305.ctx.state, inbuf); |
1288 | | burn = nburn > burn ? nburn : burn; |
1289 | | |
1290 | | length -= nblocks * CHACHA20_BLOCK_SIZE; |
1291 | | outbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1292 | | inbuf += nblocks * CHACHA20_BLOCK_SIZE; |
1293 | | } |
1294 | | } |
1295 | | #endif |
1296 | |
|
1297 | 0 | while (length) |
1298 | 0 | { |
1299 | 0 | size_t currlen = length; |
1300 | | |
1301 | | /* Since checksumming is done before decryption, process input in 24KiB |
1302 | | * chunks to keep data loaded in L1 cache for decryption. However only |
1303 | | * do splitting if input is large enough so that last chunks does not |
1304 | | * end up being short. */ |
1305 | 0 | if (currlen > 32 * 1024) |
1306 | 0 | currlen = 24 * 1024; |
1307 | |
|
1308 | 0 | nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf, |
1309 | 0 | currlen); |
1310 | 0 | burn = nburn > burn ? nburn : burn; |
1311 | |
|
1312 | 0 | nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen); |
1313 | 0 | burn = nburn > burn ? nburn : burn; |
1314 | |
|
1315 | 0 | outbuf += currlen; |
1316 | 0 | inbuf += currlen; |
1317 | 0 | length -= currlen; |
1318 | 0 | } |
1319 | |
|
1320 | 0 | if (burn) |
1321 | 0 | _gcry_burn_stack (burn); |
1322 | |
|
1323 | 0 | return 0; |
1324 | 0 | } |
1325 | | |
1326 | | |
1327 | | static const char * |
1328 | | selftest (void) |
1329 | 0 | { |
1330 | 0 | byte ctxbuf[sizeof(CHACHA20_context_t) + 15]; |
1331 | 0 | CHACHA20_context_t *ctx; |
1332 | 0 | byte scratch[127 + 1]; |
1333 | 0 | byte buf[512 + 64 + 4]; |
1334 | 0 | int i; |
1335 | | |
1336 | | /* From draft-strombergson-chacha-test-vectors */ |
1337 | 0 | static byte key_1[] = { |
1338 | 0 | 0xc4, 0x6e, 0xc1, 0xb1, 0x8c, 0xe8, 0xa8, 0x78, |
1339 | 0 | 0x72, 0x5a, 0x37, 0xe7, 0x80, 0xdf, 0xb7, 0x35, |
1340 | 0 | 0x1f, 0x68, 0xed, 0x2e, 0x19, 0x4c, 0x79, 0xfb, |
1341 | 0 | 0xc6, 0xae, 0xbe, 0xe1, 0xa6, 0x67, 0x97, 0x5d |
1342 | 0 | }; |
1343 | 0 | static const byte nonce_1[] = |
1344 | 0 | { 0x1a, 0xda, 0x31, 0xd5, 0xcf, 0x68, 0x82, 0x21 }; |
1345 | 0 | static const byte plaintext_1[127] = { |
1346 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1347 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1348 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1349 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1350 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1351 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1352 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1353 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1354 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1355 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1356 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1357 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1358 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1359 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1360 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1361 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
1362 | 0 | }; |
1363 | 0 | static const byte ciphertext_1[127] = { |
1364 | 0 | 0xf6, 0x3a, 0x89, 0xb7, 0x5c, 0x22, 0x71, 0xf9, |
1365 | 0 | 0x36, 0x88, 0x16, 0x54, 0x2b, 0xa5, 0x2f, 0x06, |
1366 | 0 | 0xed, 0x49, 0x24, 0x17, 0x92, 0x30, 0x2b, 0x00, |
1367 | 0 | 0xb5, 0xe8, 0xf8, 0x0a, 0xe9, 0xa4, 0x73, 0xaf, |
1368 | 0 | 0xc2, 0x5b, 0x21, 0x8f, 0x51, 0x9a, 0xf0, 0xfd, |
1369 | 0 | 0xd4, 0x06, 0x36, 0x2e, 0x8d, 0x69, 0xde, 0x7f, |
1370 | 0 | 0x54, 0xc6, 0x04, 0xa6, 0xe0, 0x0f, 0x35, 0x3f, |
1371 | 0 | 0x11, 0x0f, 0x77, 0x1b, 0xdc, 0xa8, 0xab, 0x92, |
1372 | 0 | 0xe5, 0xfb, 0xc3, 0x4e, 0x60, 0xa1, 0xd9, 0xa9, |
1373 | 0 | 0xdb, 0x17, 0x34, 0x5b, 0x0a, 0x40, 0x27, 0x36, |
1374 | 0 | 0x85, 0x3b, 0xf9, 0x10, 0xb0, 0x60, 0xbd, 0xf1, |
1375 | 0 | 0xf8, 0x97, 0xb6, 0x29, 0x0f, 0x01, 0xd1, 0x38, |
1376 | 0 | 0xae, 0x2c, 0x4c, 0x90, 0x22, 0x5b, 0xa9, 0xea, |
1377 | 0 | 0x14, 0xd5, 0x18, 0xf5, 0x59, 0x29, 0xde, 0xa0, |
1378 | 0 | 0x98, 0xca, 0x7a, 0x6c, 0xcf, 0xe6, 0x12, 0x27, |
1379 | 0 | 0x05, 0x3c, 0x84, 0xe4, 0x9a, 0x4a, 0x33 |
1380 | 0 | }; |
1381 | | |
1382 | | /* 16-byte alignment required for amd64 implementation. */ |
1383 | 0 | ctx = (CHACHA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15); |
1384 | |
|
1385 | 0 | chacha20_setkey (ctx, key_1, sizeof key_1, NULL); |
1386 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1387 | 0 | scratch[sizeof (scratch) - 1] = 0; |
1388 | 0 | chacha20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1); |
1389 | 0 | if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1)) |
1390 | 0 | return "ChaCha20 encryption test 1 failed."; |
1391 | 0 | if (scratch[sizeof (scratch) - 1]) |
1392 | 0 | return "ChaCha20 wrote too much."; |
1393 | 0 | chacha20_setkey (ctx, key_1, sizeof (key_1), NULL); |
1394 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1395 | 0 | chacha20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1); |
1396 | 0 | if (memcmp (scratch, plaintext_1, sizeof plaintext_1)) |
1397 | 0 | return "ChaCha20 decryption test 1 failed."; |
1398 | | |
1399 | 0 | for (i = 0; i < sizeof buf; i++) |
1400 | 0 | buf[i] = i; |
1401 | 0 | chacha20_setkey (ctx, key_1, sizeof key_1, NULL); |
1402 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1403 | | /*encrypt */ |
1404 | 0 | chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); |
1405 | | /*decrypt */ |
1406 | 0 | chacha20_setkey (ctx, key_1, sizeof key_1, NULL); |
1407 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1408 | 0 | chacha20_encrypt_stream (ctx, buf, buf, 1); |
1409 | 0 | chacha20_encrypt_stream (ctx, buf + 1, buf + 1, (sizeof buf) - 1 - 1); |
1410 | 0 | chacha20_encrypt_stream (ctx, buf + (sizeof buf) - 1, |
1411 | 0 | buf + (sizeof buf) - 1, 1); |
1412 | 0 | for (i = 0; i < sizeof buf; i++) |
1413 | 0 | if (buf[i] != (byte) i) |
1414 | 0 | return "ChaCha20 encryption test 2 failed."; |
1415 | | |
1416 | 0 | chacha20_setkey (ctx, key_1, sizeof key_1, NULL); |
1417 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1418 | | /* encrypt */ |
1419 | 0 | for (i = 0; i < sizeof buf; i++) |
1420 | 0 | chacha20_encrypt_stream (ctx, &buf[i], &buf[i], 1); |
1421 | | /* decrypt */ |
1422 | 0 | chacha20_setkey (ctx, key_1, sizeof key_1, NULL); |
1423 | 0 | chacha20_setiv (ctx, nonce_1, sizeof nonce_1); |
1424 | 0 | chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); |
1425 | 0 | for (i = 0; i < sizeof buf; i++) |
1426 | 0 | if (buf[i] != (byte) i) |
1427 | 0 | return "ChaCha20 encryption test 3 failed."; |
1428 | | |
1429 | 0 | return NULL; |
1430 | 0 | } |
1431 | | |
1432 | | |
1433 | | gcry_cipher_spec_t _gcry_cipher_spec_chacha20 = { |
1434 | | GCRY_CIPHER_CHACHA20, |
1435 | | {0, 0}, /* flags */ |
1436 | | "CHACHA20", /* name */ |
1437 | | NULL, /* aliases */ |
1438 | | NULL, /* oids */ |
1439 | | 1, /* blocksize in bytes. */ |
1440 | | CHACHA20_MAX_KEY_SIZE * 8, /* standard key length in bits. */ |
1441 | | sizeof (CHACHA20_context_t), |
1442 | | chacha20_setkey, |
1443 | | NULL, |
1444 | | NULL, |
1445 | | chacha20_encrypt_stream, |
1446 | | chacha20_encrypt_stream, |
1447 | | NULL, |
1448 | | NULL, |
1449 | | chacha20_setiv |
1450 | | }; |