/src/libgcrypt/cipher/camellia-glue.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* camellia-glue.c - Glue for the Camellia cipher |
2 | | * Copyright (C) 2007 Free Software Foundation, Inc. |
3 | | * |
4 | | * This file is part of Libgcrypt. |
5 | | * |
6 | | * Libgcrypt is free software; you can redistribute it and/or modify |
7 | | * it under the terms of the GNU Lesser General Public License as |
8 | | * published by the Free Software Foundation; either version 2.1 of |
9 | | * the License, or (at your option) any later version. |
10 | | * |
11 | | * Libgcrypt is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with this program; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
19 | | * 02110-1301, USA. |
20 | | */ |
21 | | |
22 | | /* I put all the libgcrypt-specific stuff in this file to keep the |
23 | | camellia.c/camellia.h files exactly as provided by NTT. If they |
24 | | update their code, this should make it easier to bring the changes |
25 | | in. - dshaw |
26 | | |
27 | | There is one small change which needs to be done: Include the |
28 | | following code at the top of camellia.h: */ |
29 | | #if 0 |
30 | | |
31 | | /* To use Camellia with libraries it is often useful to keep the name |
32 | | * space of the library clean. The following macro is thus useful: |
33 | | * |
34 | | * #define CAMELLIA_EXT_SYM_PREFIX foo_ |
35 | | * |
36 | | * This prefixes all external symbols with "foo_". |
37 | | */ |
38 | | #ifdef HAVE_CONFIG_H |
39 | | #include <config.h> |
40 | | #endif |
41 | | #ifdef CAMELLIA_EXT_SYM_PREFIX |
42 | | #define CAMELLIA_PREFIX1(x,y) x ## y |
43 | | #define CAMELLIA_PREFIX2(x,y) CAMELLIA_PREFIX1(x,y) |
44 | | #define CAMELLIA_PREFIX(x) CAMELLIA_PREFIX2(CAMELLIA_EXT_SYM_PREFIX,x) |
45 | | #define Camellia_Ekeygen CAMELLIA_PREFIX(Camellia_Ekeygen) |
46 | | #define Camellia_EncryptBlock CAMELLIA_PREFIX(Camellia_EncryptBlock) |
47 | | #define Camellia_DecryptBlock CAMELLIA_PREFIX(Camellia_DecryptBlock) |
48 | | #define camellia_decrypt128 CAMELLIA_PREFIX(camellia_decrypt128) |
49 | | #define camellia_decrypt256 CAMELLIA_PREFIX(camellia_decrypt256) |
50 | | #define camellia_encrypt128 CAMELLIA_PREFIX(camellia_encrypt128) |
51 | | #define camellia_encrypt256 CAMELLIA_PREFIX(camellia_encrypt256) |
52 | | #define camellia_setup128 CAMELLIA_PREFIX(camellia_setup128) |
53 | | #define camellia_setup192 CAMELLIA_PREFIX(camellia_setup192) |
54 | | #define camellia_setup256 CAMELLIA_PREFIX(camellia_setup256) |
55 | | #endif /*CAMELLIA_EXT_SYM_PREFIX*/ |
56 | | |
57 | | #endif /* Code sample. */ |
58 | | |
59 | | |
60 | | #include <config.h> |
61 | | #include "types.h" |
62 | | #include "g10lib.h" |
63 | | #include "cipher.h" |
64 | | #include "camellia.h" |
65 | | #include "bufhelp.h" |
66 | | #include "cipher-internal.h" |
67 | | #include "bulkhelp.h" |
68 | | |
69 | | /* Helper macro to force alignment to 16 bytes. */ |
70 | | #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED |
71 | | # define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) |
72 | | #else |
73 | | # define ATTR_ALIGNED_16 |
74 | | #endif |
75 | | |
76 | | /* USE_AESNI inidicates whether to compile with Intel AES-NI/AVX code. */ |
77 | | #undef USE_AESNI_AVX |
78 | | #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) |
79 | | # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
80 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
81 | | # define USE_AESNI_AVX 1 |
82 | | # endif |
83 | | #endif |
84 | | |
85 | | /* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */ |
86 | | #undef USE_AESNI_AVX2 |
87 | | #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) |
88 | | # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
89 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
90 | | # define USE_AESNI_AVX2 1 |
91 | | # endif |
92 | | #endif |
93 | | |
94 | | /* USE_VAES_AVX2 inidicates whether to compile with Intel VAES/AVX2 code. */ |
95 | | #undef USE_VAES_AVX2 |
96 | | #if defined(USE_AESNI_AVX2) && defined(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL) |
97 | | # define USE_VAES_AVX2 1 |
98 | | #endif |
99 | | |
100 | | /* USE_GFNI_AVX2 inidicates whether to compile with Intel GFNI/AVX2 code. */ |
101 | | #undef USE_GFNI_AVX2 |
102 | | #if defined(USE_AESNI_AVX2) && defined(ENABLE_GFNI_SUPPORT) |
103 | | # define USE_GFNI_AVX2 1 |
104 | | #endif |
105 | | |
106 | | /* USE_GFNI_AVX512 inidicates whether to compile with Intel GFNI/AVX512 code. */ |
107 | | #undef USE_GFNI_AVX512 |
108 | | #if defined(USE_GFNI_AVX2) && defined(ENABLE_AVX512_SUPPORT) |
109 | | # define USE_GFNI_AVX512 1 |
110 | | #endif |
111 | | |
112 | | typedef struct |
113 | | { |
114 | | KEY_TABLE_TYPE keytable; |
115 | | int keybitlength; |
116 | | #ifdef USE_AESNI_AVX |
117 | | unsigned int use_aesni_avx:1; /* AES-NI/AVX implementation shall be used. */ |
118 | | #endif /*USE_AESNI_AVX*/ |
119 | | #ifdef USE_AESNI_AVX2 |
120 | | unsigned int use_aesni_avx2:1;/* AES-NI/AVX2 implementation shall be used. */ |
121 | | unsigned int use_vaes_avx2:1; /* VAES/AVX2 implementation shall be used. */ |
122 | | unsigned int use_gfni_avx2:1; /* GFNI/AVX2 implementation shall be used. */ |
123 | | unsigned int use_gfni_avx512:1; /* GFNI/AVX512 implementation shall be used. */ |
124 | | #endif /*USE_AESNI_AVX2*/ |
125 | | } CAMELLIA_context; |
126 | | |
127 | | /* Assembly implementations use SystemV ABI, ABI conversion and additional |
128 | | * stack to store XMM6-XMM15 needed on Win64. */ |
129 | | #undef ASM_FUNC_ABI |
130 | | #undef ASM_EXTRA_STACK |
131 | | #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) |
132 | | # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS |
133 | | # define ASM_FUNC_ABI __attribute__((sysv_abi)) |
134 | | # define ASM_EXTRA_STACK (10 * 16) |
135 | | # else |
136 | | # define ASM_FUNC_ABI |
137 | | # define ASM_EXTRA_STACK 0 |
138 | | # endif |
139 | | #endif |
140 | | |
141 | | #ifdef USE_AESNI_AVX |
142 | | /* Assembler implementations of Camellia using AES-NI and AVX. Process data |
143 | | in 16 blocks same time. |
144 | | */ |
145 | | extern void _gcry_camellia_aesni_avx_ctr_enc(CAMELLIA_context *ctx, |
146 | | unsigned char *out, |
147 | | const unsigned char *in, |
148 | | unsigned char *ctr) ASM_FUNC_ABI; |
149 | | |
150 | | extern void _gcry_camellia_aesni_avx_cbc_dec(CAMELLIA_context *ctx, |
151 | | unsigned char *out, |
152 | | const unsigned char *in, |
153 | | unsigned char *iv) ASM_FUNC_ABI; |
154 | | |
155 | | extern void _gcry_camellia_aesni_avx_cfb_dec(CAMELLIA_context *ctx, |
156 | | unsigned char *out, |
157 | | const unsigned char *in, |
158 | | unsigned char *iv) ASM_FUNC_ABI; |
159 | | |
160 | | extern void _gcry_camellia_aesni_avx_ocb_enc(CAMELLIA_context *ctx, |
161 | | unsigned char *out, |
162 | | const unsigned char *in, |
163 | | unsigned char *offset, |
164 | | unsigned char *checksum, |
165 | | const u64 Ls[16]) ASM_FUNC_ABI; |
166 | | |
167 | | extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx, |
168 | | unsigned char *out, |
169 | | const unsigned char *in, |
170 | | unsigned char *offset, |
171 | | unsigned char *checksum, |
172 | | const u64 Ls[16]) ASM_FUNC_ABI; |
173 | | |
174 | | extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx, |
175 | | const unsigned char *abuf, |
176 | | unsigned char *offset, |
177 | | unsigned char *checksum, |
178 | | const u64 Ls[16]) ASM_FUNC_ABI; |
179 | | |
180 | | extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx, |
181 | | const unsigned char *key, |
182 | | unsigned int keylen) ASM_FUNC_ABI; |
183 | | |
184 | | static const int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 16 + |
185 | | 2 * sizeof(void *) + ASM_EXTRA_STACK; |
186 | | |
187 | | #endif |
188 | | |
189 | | #ifdef USE_AESNI_AVX2 |
190 | | /* Assembler implementations of Camellia using AES-NI and AVX2. Process data |
191 | | in 32 blocks same time. |
192 | | */ |
193 | | extern void _gcry_camellia_aesni_avx2_ctr_enc(CAMELLIA_context *ctx, |
194 | | unsigned char *out, |
195 | | const unsigned char *in, |
196 | | unsigned char *ctr) ASM_FUNC_ABI; |
197 | | |
198 | | extern void _gcry_camellia_aesni_avx2_cbc_dec(CAMELLIA_context *ctx, |
199 | | unsigned char *out, |
200 | | const unsigned char *in, |
201 | | unsigned char *iv) ASM_FUNC_ABI; |
202 | | |
203 | | extern void _gcry_camellia_aesni_avx2_cfb_dec(CAMELLIA_context *ctx, |
204 | | unsigned char *out, |
205 | | const unsigned char *in, |
206 | | unsigned char *iv) ASM_FUNC_ABI; |
207 | | |
208 | | extern void _gcry_camellia_aesni_avx2_ocb_enc(CAMELLIA_context *ctx, |
209 | | unsigned char *out, |
210 | | const unsigned char *in, |
211 | | unsigned char *offset, |
212 | | unsigned char *checksum, |
213 | | const u64 Ls[32]) ASM_FUNC_ABI; |
214 | | |
215 | | extern void _gcry_camellia_aesni_avx2_ocb_dec(CAMELLIA_context *ctx, |
216 | | unsigned char *out, |
217 | | const unsigned char *in, |
218 | | unsigned char *offset, |
219 | | unsigned char *checksum, |
220 | | const u64 Ls[32]) ASM_FUNC_ABI; |
221 | | |
222 | | extern void _gcry_camellia_aesni_avx2_ocb_auth(CAMELLIA_context *ctx, |
223 | | const unsigned char *abuf, |
224 | | unsigned char *offset, |
225 | | unsigned char *checksum, |
226 | | const u64 Ls[32]) ASM_FUNC_ABI; |
227 | | |
228 | | extern void _gcry_camellia_aesni_avx2_enc_blk1_32(const CAMELLIA_context *ctx, |
229 | | unsigned char *out, |
230 | | const unsigned char *in, |
231 | | unsigned int nblocks) |
232 | | ASM_FUNC_ABI; |
233 | | |
234 | | extern void _gcry_camellia_aesni_avx2_dec_blk1_32(const CAMELLIA_context *ctx, |
235 | | unsigned char *out, |
236 | | const unsigned char *in, |
237 | | unsigned int nblocks) |
238 | | ASM_FUNC_ABI; |
239 | | |
240 | | static const int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 + |
241 | | 2 * sizeof(void *) + ASM_EXTRA_STACK; |
242 | | |
243 | | #endif |
244 | | |
245 | | #ifdef USE_VAES_AVX2 |
246 | | /* Assembler implementations of Camellia using VAES and AVX2. Process data |
247 | | in 32 blocks same time. |
248 | | */ |
249 | | extern void _gcry_camellia_vaes_avx2_ctr_enc(CAMELLIA_context *ctx, |
250 | | unsigned char *out, |
251 | | const unsigned char *in, |
252 | | unsigned char *ctr) ASM_FUNC_ABI; |
253 | | |
254 | | extern void _gcry_camellia_vaes_avx2_cbc_dec(CAMELLIA_context *ctx, |
255 | | unsigned char *out, |
256 | | const unsigned char *in, |
257 | | unsigned char *iv) ASM_FUNC_ABI; |
258 | | |
259 | | extern void _gcry_camellia_vaes_avx2_cfb_dec(CAMELLIA_context *ctx, |
260 | | unsigned char *out, |
261 | | const unsigned char *in, |
262 | | unsigned char *iv) ASM_FUNC_ABI; |
263 | | |
264 | | extern void _gcry_camellia_vaes_avx2_ocb_enc(CAMELLIA_context *ctx, |
265 | | unsigned char *out, |
266 | | const unsigned char *in, |
267 | | unsigned char *offset, |
268 | | unsigned char *checksum, |
269 | | const u64 Ls[32]) ASM_FUNC_ABI; |
270 | | |
271 | | extern void _gcry_camellia_vaes_avx2_ocb_dec(CAMELLIA_context *ctx, |
272 | | unsigned char *out, |
273 | | const unsigned char *in, |
274 | | unsigned char *offset, |
275 | | unsigned char *checksum, |
276 | | const u64 Ls[32]) ASM_FUNC_ABI; |
277 | | |
278 | | extern void _gcry_camellia_vaes_avx2_ocb_auth(CAMELLIA_context *ctx, |
279 | | const unsigned char *abuf, |
280 | | unsigned char *offset, |
281 | | unsigned char *checksum, |
282 | | const u64 Ls[32]) ASM_FUNC_ABI; |
283 | | |
284 | | extern void _gcry_camellia_vaes_avx2_enc_blk1_32(const CAMELLIA_context *ctx, |
285 | | unsigned char *out, |
286 | | const unsigned char *in, |
287 | | unsigned int nblocks) |
288 | | ASM_FUNC_ABI; |
289 | | |
290 | | extern void _gcry_camellia_vaes_avx2_dec_blk1_32(const CAMELLIA_context *ctx, |
291 | | unsigned char *out, |
292 | | const unsigned char *in, |
293 | | unsigned int nblocks) |
294 | | ASM_FUNC_ABI; |
295 | | #endif |
296 | | |
297 | | #ifdef USE_GFNI_AVX2 |
298 | | /* Assembler implementations of Camellia using GFNI and AVX2. Process data |
299 | | in 32 blocks same time. |
300 | | */ |
301 | | extern void _gcry_camellia_gfni_avx2_ctr_enc(CAMELLIA_context *ctx, |
302 | | unsigned char *out, |
303 | | const unsigned char *in, |
304 | | unsigned char *ctr) ASM_FUNC_ABI; |
305 | | |
306 | | extern void _gcry_camellia_gfni_avx2_cbc_dec(CAMELLIA_context *ctx, |
307 | | unsigned char *out, |
308 | | const unsigned char *in, |
309 | | unsigned char *iv) ASM_FUNC_ABI; |
310 | | |
311 | | extern void _gcry_camellia_gfni_avx2_cfb_dec(CAMELLIA_context *ctx, |
312 | | unsigned char *out, |
313 | | const unsigned char *in, |
314 | | unsigned char *iv) ASM_FUNC_ABI; |
315 | | |
316 | | extern void _gcry_camellia_gfni_avx2_ocb_enc(CAMELLIA_context *ctx, |
317 | | unsigned char *out, |
318 | | const unsigned char *in, |
319 | | unsigned char *offset, |
320 | | unsigned char *checksum, |
321 | | const u64 Ls[32]) ASM_FUNC_ABI; |
322 | | |
323 | | extern void _gcry_camellia_gfni_avx2_ocb_dec(CAMELLIA_context *ctx, |
324 | | unsigned char *out, |
325 | | const unsigned char *in, |
326 | | unsigned char *offset, |
327 | | unsigned char *checksum, |
328 | | const u64 Ls[32]) ASM_FUNC_ABI; |
329 | | |
330 | | extern void _gcry_camellia_gfni_avx2_ocb_auth(CAMELLIA_context *ctx, |
331 | | const unsigned char *abuf, |
332 | | unsigned char *offset, |
333 | | unsigned char *checksum, |
334 | | const u64 Ls[32]) ASM_FUNC_ABI; |
335 | | |
336 | | extern void _gcry_camellia_gfni_avx2_enc_blk1_32(const CAMELLIA_context *ctx, |
337 | | unsigned char *out, |
338 | | const unsigned char *in, |
339 | | unsigned int nblocks) |
340 | | ASM_FUNC_ABI; |
341 | | |
342 | | extern void _gcry_camellia_gfni_avx2_dec_blk1_32(const CAMELLIA_context *ctx, |
343 | | unsigned char *out, |
344 | | const unsigned char *in, |
345 | | unsigned int nblocks) |
346 | | ASM_FUNC_ABI; |
347 | | #endif |
348 | | |
349 | | #ifdef USE_GFNI_AVX512 |
350 | | /* Assembler implementations of Camellia using GFNI and AVX512. Process data |
351 | | in 64 blocks same time. |
352 | | */ |
353 | | extern void _gcry_camellia_gfni_avx512_ctr_enc(CAMELLIA_context *ctx, |
354 | | unsigned char *out, |
355 | | const unsigned char *in, |
356 | | unsigned char *ctr) ASM_FUNC_ABI; |
357 | | |
358 | | extern void _gcry_camellia_gfni_avx512_cbc_dec(CAMELLIA_context *ctx, |
359 | | unsigned char *out, |
360 | | const unsigned char *in, |
361 | | unsigned char *iv) ASM_FUNC_ABI; |
362 | | |
363 | | extern void _gcry_camellia_gfni_avx512_cfb_dec(CAMELLIA_context *ctx, |
364 | | unsigned char *out, |
365 | | const unsigned char *in, |
366 | | unsigned char *iv) ASM_FUNC_ABI; |
367 | | |
368 | | extern void _gcry_camellia_gfni_avx512_ocb_enc(CAMELLIA_context *ctx, |
369 | | unsigned char *out, |
370 | | const unsigned char *in, |
371 | | unsigned char *offset, |
372 | | unsigned char *checksum, |
373 | | const u64 Ls[32]) ASM_FUNC_ABI; |
374 | | |
375 | | extern void _gcry_camellia_gfni_avx512_ocb_dec(CAMELLIA_context *ctx, |
376 | | unsigned char *out, |
377 | | const unsigned char *in, |
378 | | unsigned char *offset, |
379 | | unsigned char *checksum, |
380 | | const u64 Ls[32]) ASM_FUNC_ABI; |
381 | | |
382 | | extern void _gcry_camellia_gfni_avx512_enc_blk64(const CAMELLIA_context *ctx, |
383 | | unsigned char *out, |
384 | | const unsigned char *in) |
385 | | ASM_FUNC_ABI; |
386 | | |
387 | | extern void _gcry_camellia_gfni_avx512_dec_blk64(const CAMELLIA_context *ctx, |
388 | | unsigned char *out, |
389 | | const unsigned char *in) |
390 | | ASM_FUNC_ABI; |
391 | | |
392 | | /* Stack not used by AVX512 implementation. */ |
393 | | static const int avx512_burn_stack_depth = 0; |
394 | | #endif |
395 | | |
396 | | static const char *selftest(void); |
397 | | |
398 | | static void _gcry_camellia_ctr_enc (void *context, unsigned char *ctr, |
399 | | void *outbuf_arg, const void *inbuf_arg, |
400 | | size_t nblocks); |
401 | | static void _gcry_camellia_cbc_dec (void *context, unsigned char *iv, |
402 | | void *outbuf_arg, const void *inbuf_arg, |
403 | | size_t nblocks); |
404 | | static void _gcry_camellia_cfb_dec (void *context, unsigned char *iv, |
405 | | void *outbuf_arg, const void *inbuf_arg, |
406 | | size_t nblocks); |
407 | | static void _gcry_camellia_xts_crypt (void *context, unsigned char *tweak, |
408 | | void *outbuf_arg, const void *inbuf_arg, |
409 | | size_t nblocks, int encrypt); |
410 | | static void _gcry_camellia_ecb_crypt (void *context, void *outbuf_arg, |
411 | | const void *inbuf_arg, size_t nblocks, |
412 | | int encrypt); |
413 | | static void _gcry_camellia_ctr32le_enc (void *context, unsigned char *ctr, |
414 | | void *outbuf_arg, const void *inbuf_arg, |
415 | | size_t nblocks); |
416 | | static size_t _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, |
417 | | const void *inbuf_arg, size_t nblocks, |
418 | | int encrypt); |
419 | | static size_t _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, |
420 | | size_t nblocks); |
421 | | |
422 | | static gcry_err_code_t |
423 | | camellia_setkey(void *c, const byte *key, unsigned keylen, |
424 | | cipher_bulk_ops_t *bulk_ops) |
425 | 0 | { |
426 | 0 | CAMELLIA_context *ctx=c; |
427 | 0 | static int initialized=0; |
428 | 0 | static const char *selftest_failed=NULL; |
429 | 0 | #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) \ |
430 | 0 | || defined(USE_VAES_AVX2) || defined(USE_GFNI_AVX2) |
431 | 0 | unsigned int hwf = _gcry_get_hw_features (); |
432 | 0 | #endif |
433 | |
|
434 | 0 | if(keylen!=16 && keylen!=24 && keylen!=32) |
435 | 0 | return GPG_ERR_INV_KEYLEN; |
436 | | |
437 | 0 | if(!initialized) |
438 | 0 | { |
439 | 0 | initialized=1; |
440 | 0 | selftest_failed=selftest(); |
441 | 0 | if(selftest_failed) |
442 | 0 | log_error("%s\n",selftest_failed); |
443 | 0 | } |
444 | |
|
445 | 0 | if(selftest_failed) |
446 | 0 | return GPG_ERR_SELFTEST_FAILED; |
447 | | |
448 | 0 | #ifdef USE_AESNI_AVX |
449 | 0 | ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX); |
450 | 0 | #endif |
451 | 0 | #ifdef USE_AESNI_AVX2 |
452 | 0 | ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2); |
453 | 0 | ctx->use_vaes_avx2 = 0; |
454 | 0 | ctx->use_gfni_avx2 = 0; |
455 | 0 | ctx->use_gfni_avx512 = 0; |
456 | 0 | #endif |
457 | 0 | #ifdef USE_VAES_AVX2 |
458 | 0 | ctx->use_vaes_avx2 = (hwf & HWF_INTEL_VAES_VPCLMUL) && (hwf & HWF_INTEL_AVX2); |
459 | 0 | #endif |
460 | 0 | #ifdef USE_GFNI_AVX2 |
461 | 0 | ctx->use_gfni_avx2 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX2); |
462 | 0 | #endif |
463 | 0 | #ifdef USE_GFNI_AVX512 |
464 | 0 | ctx->use_gfni_avx512 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX512); |
465 | 0 | #endif |
466 | |
|
467 | 0 | ctx->keybitlength=keylen*8; |
468 | | |
469 | | /* Setup bulk encryption routines. */ |
470 | 0 | memset (bulk_ops, 0, sizeof(*bulk_ops)); |
471 | 0 | bulk_ops->cbc_dec = _gcry_camellia_cbc_dec; |
472 | 0 | bulk_ops->cfb_dec = _gcry_camellia_cfb_dec; |
473 | 0 | bulk_ops->ctr_enc = _gcry_camellia_ctr_enc; |
474 | 0 | bulk_ops->ocb_crypt = _gcry_camellia_ocb_crypt; |
475 | 0 | bulk_ops->ocb_auth = _gcry_camellia_ocb_auth; |
476 | 0 | #ifdef USE_AESNI_AVX2 |
477 | 0 | if (ctx->use_aesni_avx2 || ctx->use_vaes_avx2 || ctx->use_gfni_avx2) |
478 | 0 | { |
479 | 0 | bulk_ops->xts_crypt = _gcry_camellia_xts_crypt; |
480 | 0 | bulk_ops->ecb_crypt = _gcry_camellia_ecb_crypt; |
481 | 0 | bulk_ops->ctr32le_enc = _gcry_camellia_ctr32le_enc; |
482 | 0 | } |
483 | | #else |
484 | | (void)_gcry_camellia_xts_crypt; |
485 | | (void)_gcry_camellia_ecb_crypt; |
486 | | (void)_gcry_camellia_ctr32le_enc; |
487 | | #endif |
488 | |
|
489 | 0 | if (0) |
490 | 0 | { } |
491 | 0 | #ifdef USE_AESNI_AVX |
492 | 0 | else if (ctx->use_aesni_avx) |
493 | 0 | _gcry_camellia_aesni_avx_keygen(ctx, key, keylen); |
494 | 0 | else |
495 | 0 | #endif |
496 | 0 | { |
497 | 0 | Camellia_Ekeygen(ctx->keybitlength,key,ctx->keytable); |
498 | 0 | _gcry_burn_stack |
499 | 0 | ((19+34+34)*sizeof(u32)+2*sizeof(void*) /* camellia_setup256 */ |
500 | 0 | +(4+32)*sizeof(u32)+2*sizeof(void*) /* camellia_setup192 */ |
501 | 0 | +0+sizeof(int)+2*sizeof(void*) /* Camellia_Ekeygen */ |
502 | 0 | +3*2*sizeof(void*) /* Function calls. */ |
503 | 0 | ); |
504 | 0 | } |
505 | |
|
506 | 0 | #ifdef USE_GFNI_AVX2 |
507 | 0 | if (ctx->use_gfni_avx2) |
508 | 0 | { |
509 | | /* Disable AESNI & VAES implementations when GFNI implementation is |
510 | | * enabled. */ |
511 | 0 | #ifdef USE_AESNI_AVX |
512 | 0 | ctx->use_aesni_avx = 0; |
513 | 0 | #endif |
514 | 0 | #ifdef USE_AESNI_AVX2 |
515 | 0 | ctx->use_aesni_avx2 = 0; |
516 | 0 | #endif |
517 | 0 | #ifdef USE_VAES_AVX2 |
518 | 0 | ctx->use_vaes_avx2 = 0; |
519 | 0 | #endif |
520 | 0 | } |
521 | 0 | #endif |
522 | |
|
523 | 0 | return 0; |
524 | 0 | } |
525 | | |
526 | | #ifdef USE_ARM_ASM |
527 | | |
528 | | /* Assembly implementations of Camellia. */ |
529 | | extern void _gcry_camellia_arm_encrypt_block(const KEY_TABLE_TYPE keyTable, |
530 | | byte *outbuf, const byte *inbuf, |
531 | | const int keybits); |
532 | | |
533 | | extern void _gcry_camellia_arm_decrypt_block(const KEY_TABLE_TYPE keyTable, |
534 | | byte *outbuf, const byte *inbuf, |
535 | | const int keybits); |
536 | | |
537 | | static void Camellia_EncryptBlock(const int keyBitLength, |
538 | | const unsigned char *plaintext, |
539 | | const KEY_TABLE_TYPE keyTable, |
540 | | unsigned char *cipherText) |
541 | | { |
542 | | _gcry_camellia_arm_encrypt_block(keyTable, cipherText, plaintext, |
543 | | keyBitLength); |
544 | | } |
545 | | |
546 | | static void Camellia_DecryptBlock(const int keyBitLength, |
547 | | const unsigned char *cipherText, |
548 | | const KEY_TABLE_TYPE keyTable, |
549 | | unsigned char *plaintext) |
550 | | { |
551 | | _gcry_camellia_arm_decrypt_block(keyTable, plaintext, cipherText, |
552 | | keyBitLength); |
553 | | } |
554 | | |
555 | | #ifdef __aarch64__ |
556 | | # define CAMELLIA_encrypt_stack_burn_size (0) |
557 | | # define CAMELLIA_decrypt_stack_burn_size (0) |
558 | | #else |
559 | | # define CAMELLIA_encrypt_stack_burn_size (15*4) |
560 | | # define CAMELLIA_decrypt_stack_burn_size (15*4) |
561 | | #endif |
562 | | |
563 | | static unsigned int |
564 | | camellia_encrypt(void *c, byte *outbuf, const byte *inbuf) |
565 | | { |
566 | | CAMELLIA_context *ctx = c; |
567 | | Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); |
568 | | return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size); |
569 | | } |
570 | | |
571 | | static unsigned int |
572 | | camellia_decrypt(void *c, byte *outbuf, const byte *inbuf) |
573 | | { |
574 | | CAMELLIA_context *ctx=c; |
575 | | Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); |
576 | | return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size); |
577 | | } |
578 | | |
579 | | #else /*USE_ARM_ASM*/ |
580 | | |
581 | | static unsigned int |
582 | | camellia_encrypt(void *c, byte *outbuf, const byte *inbuf) |
583 | 0 | { |
584 | 0 | CAMELLIA_context *ctx=c; |
585 | |
|
586 | 0 | Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); |
587 | |
|
588 | 0 | #define CAMELLIA_encrypt_stack_burn_size \ |
589 | 0 | (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \ |
590 | 0 | +4*sizeof(u32)+4*sizeof(u32) \ |
591 | 0 | +2*sizeof(u32*)+4*sizeof(u32) \ |
592 | 0 | +2*2*sizeof(void*) /* Function calls. */ \ |
593 | 0 | ) |
594 | |
|
595 | 0 | return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size); |
596 | 0 | } |
597 | | |
598 | | static unsigned int |
599 | | camellia_decrypt(void *c, byte *outbuf, const byte *inbuf) |
600 | 0 | { |
601 | 0 | CAMELLIA_context *ctx=c; |
602 | |
|
603 | 0 | Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); |
604 | |
|
605 | 0 | #define CAMELLIA_decrypt_stack_burn_size \ |
606 | 0 | (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \ |
607 | 0 | +4*sizeof(u32)+4*sizeof(u32) \ |
608 | 0 | +2*sizeof(u32*)+4*sizeof(u32) \ |
609 | 0 | +2*2*sizeof(void*) /* Function calls. */ \ |
610 | 0 | ) |
611 | |
|
612 | 0 | return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size); |
613 | 0 | } |
614 | | |
615 | | #endif /*!USE_ARM_ASM*/ |
616 | | |
617 | | |
618 | | static unsigned int |
619 | | camellia_encrypt_blk1_32 (const void *priv, byte *outbuf, const byte *inbuf, |
620 | | unsigned int num_blks) |
621 | 0 | { |
622 | 0 | const CAMELLIA_context *ctx = priv; |
623 | 0 | unsigned int stack_burn_size = 0; |
624 | |
|
625 | 0 | gcry_assert (num_blks <= 32); |
626 | | |
627 | 0 | #ifdef USE_GFNI_AVX2 |
628 | 0 | if (ctx->use_gfni_avx2 && num_blks >= 3) |
629 | 0 | { |
630 | | /* 3 or more parallel block GFNI processing is faster than |
631 | | * generic C implementation. */ |
632 | 0 | _gcry_camellia_gfni_avx2_enc_blk1_32 (ctx, outbuf, inbuf, num_blks); |
633 | 0 | return avx2_burn_stack_depth; |
634 | 0 | } |
635 | 0 | #endif |
636 | 0 | #ifdef USE_VAES_AVX2 |
637 | 0 | if (ctx->use_vaes_avx2 && num_blks >= 6) |
638 | 0 | { |
639 | | /* 6 or more parallel block VAES processing is faster than |
640 | | * generic C implementation. */ |
641 | 0 | _gcry_camellia_vaes_avx2_enc_blk1_32 (ctx, outbuf, inbuf, num_blks); |
642 | 0 | return avx2_burn_stack_depth; |
643 | 0 | } |
644 | 0 | #endif |
645 | 0 | #ifdef USE_AESNI_AVX2 |
646 | 0 | if (ctx->use_aesni_avx2 && num_blks >= 6) |
647 | 0 | { |
648 | | /* 6 or more parallel block AESNI processing is faster than |
649 | | * generic C implementation. */ |
650 | 0 | _gcry_camellia_aesni_avx2_enc_blk1_32 (ctx, outbuf, inbuf, num_blks); |
651 | 0 | return avx2_burn_stack_depth; |
652 | 0 | } |
653 | 0 | #endif |
654 | | |
655 | 0 | while (num_blks) |
656 | 0 | { |
657 | 0 | stack_burn_size = camellia_encrypt((void *)ctx, outbuf, inbuf); |
658 | 0 | outbuf += CAMELLIA_BLOCK_SIZE; |
659 | 0 | inbuf += CAMELLIA_BLOCK_SIZE; |
660 | 0 | num_blks--; |
661 | 0 | } |
662 | |
|
663 | 0 | return stack_burn_size; |
664 | 0 | } |
665 | | |
666 | | static unsigned int |
667 | | camellia_encrypt_blk1_64 (const void *priv, byte *outbuf, const byte *inbuf, |
668 | | unsigned int num_blks) |
669 | 0 | { |
670 | 0 | const CAMELLIA_context *ctx = priv; |
671 | 0 | unsigned int stack_burn_size = 0; |
672 | 0 | unsigned int nburn; |
673 | |
|
674 | 0 | gcry_assert (num_blks <= 64); |
675 | | |
676 | 0 | #ifdef USE_GFNI_AVX512 |
677 | 0 | if (num_blks == 64 && ctx->use_gfni_avx512) |
678 | 0 | { |
679 | 0 | _gcry_camellia_gfni_avx512_enc_blk64 (ctx, outbuf, inbuf); |
680 | 0 | return avx512_burn_stack_depth; |
681 | 0 | } |
682 | 0 | #endif |
683 | | |
684 | 0 | do |
685 | 0 | { |
686 | 0 | unsigned int curr_blks = num_blks > 32 ? 32 : num_blks; |
687 | 0 | nburn = camellia_encrypt_blk1_32 (ctx, outbuf, inbuf, curr_blks); |
688 | 0 | stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size; |
689 | 0 | outbuf += curr_blks * 16; |
690 | 0 | inbuf += curr_blks * 16; |
691 | 0 | num_blks -= curr_blks; |
692 | 0 | } |
693 | 0 | while (num_blks > 0); |
694 | |
|
695 | 0 | return stack_burn_size; |
696 | 0 | } |
697 | | |
698 | | static unsigned int |
699 | | camellia_decrypt_blk1_32 (const void *priv, byte *outbuf, const byte *inbuf, |
700 | | unsigned int num_blks) |
701 | 0 | { |
702 | 0 | const CAMELLIA_context *ctx = priv; |
703 | 0 | unsigned int stack_burn_size = 0; |
704 | |
|
705 | 0 | gcry_assert (num_blks <= 32); |
706 | | |
707 | 0 | #ifdef USE_GFNI_AVX2 |
708 | 0 | if (ctx->use_gfni_avx2 && num_blks >= 3) |
709 | 0 | { |
710 | | /* 3 or more parallel block GFNI processing is faster than |
711 | | * generic C implementation. */ |
712 | 0 | _gcry_camellia_gfni_avx2_dec_blk1_32 (ctx, outbuf, inbuf, num_blks); |
713 | 0 | return avx2_burn_stack_depth; |
714 | 0 | } |
715 | 0 | #endif |
716 | 0 | #ifdef USE_VAES_AVX2 |
717 | 0 | if (ctx->use_vaes_avx2 && num_blks >= 6) |
718 | 0 | { |
719 | | /* 6 or more parallel block VAES processing is faster than |
720 | | * generic C implementation. */ |
721 | 0 | _gcry_camellia_vaes_avx2_dec_blk1_32 (ctx, outbuf, inbuf, num_blks); |
722 | 0 | return avx2_burn_stack_depth; |
723 | 0 | } |
724 | 0 | #endif |
725 | 0 | #ifdef USE_AESNI_AVX2 |
726 | 0 | if (ctx->use_aesni_avx2 && num_blks >= 6) |
727 | 0 | { |
728 | | /* 6 or more parallel block AESNI processing is faster than |
729 | | * generic C implementation. */ |
730 | 0 | _gcry_camellia_aesni_avx2_dec_blk1_32 (ctx, outbuf, inbuf, num_blks); |
731 | 0 | return avx2_burn_stack_depth; |
732 | 0 | } |
733 | 0 | #endif |
734 | | |
735 | 0 | while (num_blks) |
736 | 0 | { |
737 | 0 | stack_burn_size = camellia_decrypt((void *)ctx, outbuf, inbuf); |
738 | 0 | outbuf += CAMELLIA_BLOCK_SIZE; |
739 | 0 | inbuf += CAMELLIA_BLOCK_SIZE; |
740 | 0 | num_blks--; |
741 | 0 | } |
742 | |
|
743 | 0 | return stack_burn_size; |
744 | 0 | } |
745 | | |
746 | | static unsigned int |
747 | | camellia_decrypt_blk1_64 (const void *priv, byte *outbuf, const byte *inbuf, |
748 | | unsigned int num_blks) |
749 | 0 | { |
750 | 0 | const CAMELLIA_context *ctx = priv; |
751 | 0 | unsigned int stack_burn_size = 0; |
752 | 0 | unsigned int nburn; |
753 | |
|
754 | 0 | gcry_assert (num_blks <= 64); |
755 | | |
756 | 0 | #ifdef USE_GFNI_AVX512 |
757 | 0 | if (num_blks == 64 && ctx->use_gfni_avx512) |
758 | 0 | { |
759 | 0 | _gcry_camellia_gfni_avx512_dec_blk64 (ctx, outbuf, inbuf); |
760 | 0 | return avx512_burn_stack_depth; |
761 | 0 | } |
762 | 0 | #endif |
763 | | |
764 | 0 | do |
765 | 0 | { |
766 | 0 | unsigned int curr_blks = num_blks > 32 ? 32 : num_blks; |
767 | 0 | nburn = camellia_decrypt_blk1_32 (ctx, outbuf, inbuf, curr_blks); |
768 | 0 | stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size; |
769 | 0 | outbuf += curr_blks * 16; |
770 | 0 | inbuf += curr_blks * 16; |
771 | 0 | num_blks -= curr_blks; |
772 | 0 | } |
773 | 0 | while (num_blks > 0); |
774 | |
|
775 | 0 | return stack_burn_size; |
776 | 0 | } |
777 | | |
778 | | |
779 | | /* Bulk encryption of complete blocks in CTR mode. This function is only |
780 | | intended for the bulk encryption feature of cipher.c. CTR is expected to be |
781 | | of size CAMELLIA_BLOCK_SIZE. */ |
782 | | static void |
783 | | _gcry_camellia_ctr_enc(void *context, unsigned char *ctr, |
784 | | void *outbuf_arg, const void *inbuf_arg, |
785 | | size_t nblocks) |
786 | 0 | { |
787 | 0 | CAMELLIA_context *ctx = context; |
788 | 0 | unsigned char *outbuf = outbuf_arg; |
789 | 0 | const unsigned char *inbuf = inbuf_arg; |
790 | 0 | int burn_stack_depth = 0; |
791 | |
|
792 | 0 | #ifdef USE_GFNI_AVX512 |
793 | 0 | if (ctx->use_gfni_avx512) |
794 | 0 | { |
795 | 0 | int did_use_gfni_avx512 = 0; |
796 | | |
797 | | /* Process data in 64 block chunks. */ |
798 | 0 | while (nblocks >= 64) |
799 | 0 | { |
800 | 0 | _gcry_camellia_gfni_avx512_ctr_enc (ctx, outbuf, inbuf, ctr); |
801 | 0 | nblocks -= 64; |
802 | 0 | outbuf += 64 * CAMELLIA_BLOCK_SIZE; |
803 | 0 | inbuf += 64 * CAMELLIA_BLOCK_SIZE; |
804 | 0 | did_use_gfni_avx512 = 1; |
805 | 0 | } |
806 | |
|
807 | 0 | if (did_use_gfni_avx512) |
808 | 0 | { |
809 | 0 | if (burn_stack_depth < avx512_burn_stack_depth) |
810 | 0 | burn_stack_depth = avx512_burn_stack_depth; |
811 | 0 | } |
812 | | |
813 | | /* Use generic code to handle smaller chunks... */ |
814 | 0 | } |
815 | 0 | #endif |
816 | |
|
817 | 0 | #ifdef USE_AESNI_AVX2 |
818 | 0 | if (ctx->use_aesni_avx2) |
819 | 0 | { |
820 | 0 | int did_use_aesni_avx2 = 0; |
821 | 0 | typeof (&_gcry_camellia_aesni_avx2_ctr_enc) bulk_ctr_fn = |
822 | 0 | _gcry_camellia_aesni_avx2_ctr_enc; |
823 | |
|
824 | 0 | #ifdef USE_VAES_AVX2 |
825 | 0 | if (ctx->use_vaes_avx2) |
826 | 0 | bulk_ctr_fn =_gcry_camellia_vaes_avx2_ctr_enc; |
827 | 0 | #endif |
828 | 0 | #ifdef USE_GFNI_AVX2 |
829 | 0 | if (ctx->use_gfni_avx2) |
830 | 0 | bulk_ctr_fn =_gcry_camellia_gfni_avx2_ctr_enc; |
831 | 0 | #endif |
832 | | |
833 | | /* Process data in 32 block chunks. */ |
834 | 0 | while (nblocks >= 32) |
835 | 0 | { |
836 | 0 | bulk_ctr_fn (ctx, outbuf, inbuf, ctr); |
837 | 0 | nblocks -= 32; |
838 | 0 | outbuf += 32 * CAMELLIA_BLOCK_SIZE; |
839 | 0 | inbuf += 32 * CAMELLIA_BLOCK_SIZE; |
840 | 0 | did_use_aesni_avx2 = 1; |
841 | 0 | } |
842 | |
|
843 | 0 | if (did_use_aesni_avx2) |
844 | 0 | { |
845 | 0 | if (burn_stack_depth < avx2_burn_stack_depth) |
846 | 0 | burn_stack_depth = avx2_burn_stack_depth; |
847 | 0 | } |
848 | | |
849 | | /* Use generic code to handle smaller chunks... */ |
850 | 0 | } |
851 | 0 | #endif |
852 | |
|
853 | 0 | #ifdef USE_AESNI_AVX |
854 | 0 | if (ctx->use_aesni_avx) |
855 | 0 | { |
856 | 0 | int did_use_aesni_avx = 0; |
857 | | |
858 | | /* Process data in 16 block chunks. */ |
859 | 0 | while (nblocks >= 16) |
860 | 0 | { |
861 | 0 | _gcry_camellia_aesni_avx_ctr_enc(ctx, outbuf, inbuf, ctr); |
862 | |
|
863 | 0 | nblocks -= 16; |
864 | 0 | outbuf += 16 * CAMELLIA_BLOCK_SIZE; |
865 | 0 | inbuf += 16 * CAMELLIA_BLOCK_SIZE; |
866 | 0 | did_use_aesni_avx = 1; |
867 | 0 | } |
868 | |
|
869 | 0 | if (did_use_aesni_avx) |
870 | 0 | { |
871 | 0 | if (burn_stack_depth < avx_burn_stack_depth) |
872 | 0 | burn_stack_depth = avx_burn_stack_depth; |
873 | 0 | } |
874 | | |
875 | | /* Use generic code to handle smaller chunks... */ |
876 | 0 | } |
877 | 0 | #endif |
878 | | |
879 | | /* Process remaining blocks. */ |
880 | 0 | if (nblocks) |
881 | 0 | { |
882 | 0 | byte tmpbuf[CAMELLIA_BLOCK_SIZE * 32]; |
883 | 0 | unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; |
884 | 0 | size_t nburn; |
885 | |
|
886 | 0 | nburn = bulk_ctr_enc_128(ctx, camellia_encrypt_blk1_32, outbuf, inbuf, |
887 | 0 | nblocks, ctr, tmpbuf, |
888 | 0 | sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, &tmp_used); |
889 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
890 | |
|
891 | 0 | wipememory(tmpbuf, tmp_used); |
892 | 0 | } |
893 | |
|
894 | 0 | if (burn_stack_depth) |
895 | 0 | _gcry_burn_stack(burn_stack_depth); |
896 | 0 | } |
897 | | |
898 | | /* Bulk decryption of complete blocks in CBC mode. This function is only |
899 | | intended for the bulk encryption feature of cipher.c. */ |
900 | | static void |
901 | | _gcry_camellia_cbc_dec(void *context, unsigned char *iv, |
902 | | void *outbuf_arg, const void *inbuf_arg, |
903 | | size_t nblocks) |
904 | 0 | { |
905 | 0 | CAMELLIA_context *ctx = context; |
906 | 0 | unsigned char *outbuf = outbuf_arg; |
907 | 0 | const unsigned char *inbuf = inbuf_arg; |
908 | 0 | int burn_stack_depth = 0; |
909 | |
|
910 | 0 | #ifdef USE_GFNI_AVX512 |
911 | 0 | if (ctx->use_gfni_avx512) |
912 | 0 | { |
913 | 0 | int did_use_gfni_avx512 = 0; |
914 | | |
915 | | /* Process data in 64 block chunks. */ |
916 | 0 | while (nblocks >= 64) |
917 | 0 | { |
918 | 0 | _gcry_camellia_gfni_avx512_cbc_dec (ctx, outbuf, inbuf, iv); |
919 | 0 | nblocks -= 64; |
920 | 0 | outbuf += 64 * CAMELLIA_BLOCK_SIZE; |
921 | 0 | inbuf += 64 * CAMELLIA_BLOCK_SIZE; |
922 | 0 | did_use_gfni_avx512 = 1; |
923 | 0 | } |
924 | |
|
925 | 0 | if (did_use_gfni_avx512) |
926 | 0 | { |
927 | 0 | if (burn_stack_depth < avx512_burn_stack_depth) |
928 | 0 | burn_stack_depth = avx512_burn_stack_depth; |
929 | 0 | } |
930 | | |
931 | | /* Use generic code to handle smaller chunks... */ |
932 | 0 | } |
933 | 0 | #endif |
934 | |
|
935 | 0 | #ifdef USE_AESNI_AVX2 |
936 | 0 | if (ctx->use_aesni_avx2) |
937 | 0 | { |
938 | 0 | int did_use_aesni_avx2 = 0; |
939 | 0 | typeof (&_gcry_camellia_aesni_avx2_cbc_dec) bulk_cbc_fn = |
940 | 0 | _gcry_camellia_aesni_avx2_cbc_dec; |
941 | |
|
942 | 0 | #ifdef USE_VAES_AVX2 |
943 | 0 | if (ctx->use_vaes_avx2) |
944 | 0 | bulk_cbc_fn =_gcry_camellia_vaes_avx2_cbc_dec; |
945 | 0 | #endif |
946 | 0 | #ifdef USE_GFNI_AVX2 |
947 | 0 | if (ctx->use_gfni_avx2) |
948 | 0 | bulk_cbc_fn =_gcry_camellia_gfni_avx2_cbc_dec; |
949 | 0 | #endif |
950 | | |
951 | | /* Process data in 32 block chunks. */ |
952 | 0 | while (nblocks >= 32) |
953 | 0 | { |
954 | 0 | bulk_cbc_fn (ctx, outbuf, inbuf, iv); |
955 | 0 | nblocks -= 32; |
956 | 0 | outbuf += 32 * CAMELLIA_BLOCK_SIZE; |
957 | 0 | inbuf += 32 * CAMELLIA_BLOCK_SIZE; |
958 | 0 | did_use_aesni_avx2 = 1; |
959 | 0 | } |
960 | |
|
961 | 0 | if (did_use_aesni_avx2) |
962 | 0 | { |
963 | 0 | if (burn_stack_depth < avx2_burn_stack_depth) |
964 | 0 | burn_stack_depth = avx2_burn_stack_depth; |
965 | 0 | } |
966 | | |
967 | | /* Use generic code to handle smaller chunks... */ |
968 | 0 | } |
969 | 0 | #endif |
970 | |
|
971 | 0 | #ifdef USE_AESNI_AVX |
972 | 0 | if (ctx->use_aesni_avx) |
973 | 0 | { |
974 | 0 | int did_use_aesni_avx = 0; |
975 | | |
976 | | /* Process data in 16 block chunks. */ |
977 | 0 | while (nblocks >= 16) |
978 | 0 | { |
979 | 0 | _gcry_camellia_aesni_avx_cbc_dec(ctx, outbuf, inbuf, iv); |
980 | |
|
981 | 0 | nblocks -= 16; |
982 | 0 | outbuf += 16 * CAMELLIA_BLOCK_SIZE; |
983 | 0 | inbuf += 16 * CAMELLIA_BLOCK_SIZE; |
984 | 0 | did_use_aesni_avx = 1; |
985 | 0 | } |
986 | |
|
987 | 0 | if (did_use_aesni_avx) |
988 | 0 | { |
989 | 0 | if (burn_stack_depth < avx_burn_stack_depth) |
990 | 0 | burn_stack_depth = avx_burn_stack_depth; |
991 | 0 | } |
992 | | |
993 | | /* Use generic code to handle smaller chunks... */ |
994 | 0 | } |
995 | 0 | #endif |
996 | | |
997 | | /* Process remaining blocks. */ |
998 | 0 | if (nblocks) |
999 | 0 | { |
1000 | 0 | byte tmpbuf[CAMELLIA_BLOCK_SIZE * 32]; |
1001 | 0 | unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; |
1002 | 0 | size_t nburn; |
1003 | |
|
1004 | 0 | nburn = bulk_cbc_dec_128(ctx, camellia_decrypt_blk1_32, outbuf, inbuf, |
1005 | 0 | nblocks, iv, tmpbuf, |
1006 | 0 | sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, &tmp_used); |
1007 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1008 | |
|
1009 | 0 | wipememory(tmpbuf, tmp_used); |
1010 | 0 | } |
1011 | |
|
1012 | 0 | if (burn_stack_depth) |
1013 | 0 | _gcry_burn_stack(burn_stack_depth); |
1014 | 0 | } |
1015 | | |
1016 | | /* Bulk decryption of complete blocks in CFB mode. This function is only |
1017 | | intended for the bulk encryption feature of cipher.c. */ |
1018 | | static void |
1019 | | _gcry_camellia_cfb_dec(void *context, unsigned char *iv, |
1020 | | void *outbuf_arg, const void *inbuf_arg, |
1021 | | size_t nblocks) |
1022 | 0 | { |
1023 | 0 | CAMELLIA_context *ctx = context; |
1024 | 0 | unsigned char *outbuf = outbuf_arg; |
1025 | 0 | const unsigned char *inbuf = inbuf_arg; |
1026 | 0 | int burn_stack_depth = 0; |
1027 | |
|
1028 | 0 | #ifdef USE_GFNI_AVX512 |
1029 | 0 | if (ctx->use_gfni_avx512) |
1030 | 0 | { |
1031 | 0 | int did_use_gfni_avx512 = 0; |
1032 | | |
1033 | | /* Process data in 64 block chunks. */ |
1034 | 0 | while (nblocks >= 64) |
1035 | 0 | { |
1036 | 0 | _gcry_camellia_gfni_avx512_cfb_dec (ctx, outbuf, inbuf, iv); |
1037 | 0 | nblocks -= 64; |
1038 | 0 | outbuf += 64 * CAMELLIA_BLOCK_SIZE; |
1039 | 0 | inbuf += 64 * CAMELLIA_BLOCK_SIZE; |
1040 | 0 | did_use_gfni_avx512 = 1; |
1041 | 0 | } |
1042 | |
|
1043 | 0 | if (did_use_gfni_avx512) |
1044 | 0 | { |
1045 | 0 | if (burn_stack_depth < avx512_burn_stack_depth) |
1046 | 0 | burn_stack_depth = avx512_burn_stack_depth; |
1047 | 0 | } |
1048 | | |
1049 | | /* Use generic code to handle smaller chunks... */ |
1050 | 0 | } |
1051 | 0 | #endif |
1052 | |
|
1053 | 0 | #ifdef USE_AESNI_AVX2 |
1054 | 0 | if (ctx->use_aesni_avx2) |
1055 | 0 | { |
1056 | 0 | int did_use_aesni_avx2 = 0; |
1057 | 0 | typeof (&_gcry_camellia_aesni_avx2_cfb_dec) bulk_cfb_fn = |
1058 | 0 | _gcry_camellia_aesni_avx2_cfb_dec; |
1059 | |
|
1060 | 0 | #ifdef USE_VAES_AVX2 |
1061 | 0 | if (ctx->use_vaes_avx2) |
1062 | 0 | bulk_cfb_fn =_gcry_camellia_vaes_avx2_cfb_dec; |
1063 | 0 | #endif |
1064 | 0 | #ifdef USE_GFNI_AVX2 |
1065 | 0 | if (ctx->use_gfni_avx2) |
1066 | 0 | bulk_cfb_fn =_gcry_camellia_gfni_avx2_cfb_dec; |
1067 | 0 | #endif |
1068 | | |
1069 | | /* Process data in 32 block chunks. */ |
1070 | 0 | while (nblocks >= 32) |
1071 | 0 | { |
1072 | 0 | bulk_cfb_fn (ctx, outbuf, inbuf, iv); |
1073 | 0 | nblocks -= 32; |
1074 | 0 | outbuf += 32 * CAMELLIA_BLOCK_SIZE; |
1075 | 0 | inbuf += 32 * CAMELLIA_BLOCK_SIZE; |
1076 | 0 | did_use_aesni_avx2 = 1; |
1077 | 0 | } |
1078 | |
|
1079 | 0 | if (did_use_aesni_avx2) |
1080 | 0 | { |
1081 | 0 | if (burn_stack_depth < avx2_burn_stack_depth) |
1082 | 0 | burn_stack_depth = avx2_burn_stack_depth; |
1083 | 0 | } |
1084 | | |
1085 | | /* Use generic code to handle smaller chunks... */ |
1086 | 0 | } |
1087 | 0 | #endif |
1088 | |
|
1089 | 0 | #ifdef USE_AESNI_AVX |
1090 | 0 | if (ctx->use_aesni_avx) |
1091 | 0 | { |
1092 | 0 | int did_use_aesni_avx = 0; |
1093 | | |
1094 | | /* Process data in 16 block chunks. */ |
1095 | 0 | while (nblocks >= 16) |
1096 | 0 | { |
1097 | 0 | _gcry_camellia_aesni_avx_cfb_dec(ctx, outbuf, inbuf, iv); |
1098 | |
|
1099 | 0 | nblocks -= 16; |
1100 | 0 | outbuf += 16 * CAMELLIA_BLOCK_SIZE; |
1101 | 0 | inbuf += 16 * CAMELLIA_BLOCK_SIZE; |
1102 | 0 | did_use_aesni_avx = 1; |
1103 | 0 | } |
1104 | |
|
1105 | 0 | if (did_use_aesni_avx) |
1106 | 0 | { |
1107 | 0 | if (burn_stack_depth < avx_burn_stack_depth) |
1108 | 0 | burn_stack_depth = avx_burn_stack_depth; |
1109 | 0 | } |
1110 | | |
1111 | | /* Use generic code to handle smaller chunks... */ |
1112 | 0 | } |
1113 | 0 | #endif |
1114 | | |
1115 | | /* Process remaining blocks. */ |
1116 | 0 | if (nblocks) |
1117 | 0 | { |
1118 | 0 | byte tmpbuf[CAMELLIA_BLOCK_SIZE * 32]; |
1119 | 0 | unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; |
1120 | 0 | size_t nburn; |
1121 | |
|
1122 | 0 | nburn = bulk_cfb_dec_128(ctx, camellia_encrypt_blk1_32, outbuf, inbuf, |
1123 | 0 | nblocks, iv, tmpbuf, |
1124 | 0 | sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, &tmp_used); |
1125 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1126 | |
|
1127 | 0 | wipememory(tmpbuf, tmp_used); |
1128 | 0 | } |
1129 | |
|
1130 | 0 | if (burn_stack_depth) |
1131 | 0 | _gcry_burn_stack(burn_stack_depth); |
1132 | 0 | } |
1133 | | |
1134 | | /* Bulk encryption/decryption in ECB mode. */ |
1135 | | static void |
1136 | | _gcry_camellia_ecb_crypt (void *context, void *outbuf_arg, |
1137 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1138 | 0 | { |
1139 | 0 | CAMELLIA_context *ctx = context; |
1140 | 0 | unsigned char *outbuf = outbuf_arg; |
1141 | 0 | const unsigned char *inbuf = inbuf_arg; |
1142 | 0 | int burn_stack_depth = 0; |
1143 | | |
1144 | | /* Process remaining blocks. */ |
1145 | 0 | if (nblocks) |
1146 | 0 | { |
1147 | 0 | size_t nburn; |
1148 | |
|
1149 | 0 | nburn = bulk_ecb_crypt_128(ctx, encrypt ? camellia_encrypt_blk1_64 |
1150 | 0 | : camellia_decrypt_blk1_64, |
1151 | 0 | outbuf, inbuf, nblocks, 64); |
1152 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1153 | 0 | } |
1154 | |
|
1155 | 0 | if (burn_stack_depth) |
1156 | 0 | _gcry_burn_stack(burn_stack_depth); |
1157 | 0 | } |
1158 | | |
1159 | | /* Bulk encryption/decryption of complete blocks in XTS mode. */ |
1160 | | static void |
1161 | | _gcry_camellia_xts_crypt (void *context, unsigned char *tweak, |
1162 | | void *outbuf_arg, const void *inbuf_arg, |
1163 | | size_t nblocks, int encrypt) |
1164 | 0 | { |
1165 | 0 | CAMELLIA_context *ctx = context; |
1166 | 0 | unsigned char *outbuf = outbuf_arg; |
1167 | 0 | const unsigned char *inbuf = inbuf_arg; |
1168 | 0 | int burn_stack_depth = 0; |
1169 | | |
1170 | | /* Process remaining blocks. */ |
1171 | 0 | if (nblocks) |
1172 | 0 | { |
1173 | 0 | byte tmpbuf[CAMELLIA_BLOCK_SIZE * 64]; |
1174 | 0 | unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; |
1175 | 0 | size_t nburn; |
1176 | |
|
1177 | 0 | nburn = bulk_xts_crypt_128(ctx, encrypt ? camellia_encrypt_blk1_64 |
1178 | 0 | : camellia_decrypt_blk1_64, |
1179 | 0 | outbuf, inbuf, nblocks, tweak, tmpbuf, |
1180 | 0 | sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, |
1181 | 0 | &tmp_used); |
1182 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1183 | |
|
1184 | 0 | wipememory(tmpbuf, tmp_used); |
1185 | 0 | } |
1186 | |
|
1187 | 0 | if (burn_stack_depth) |
1188 | 0 | _gcry_burn_stack(burn_stack_depth); |
1189 | 0 | } |
1190 | | |
1191 | | /* Bulk encryption of complete blocks in CTR32LE mode (for GCM-SIV). */ |
1192 | | static void |
1193 | | _gcry_camellia_ctr32le_enc(void *context, unsigned char *ctr, |
1194 | | void *outbuf_arg, const void *inbuf_arg, |
1195 | | size_t nblocks) |
1196 | 0 | { |
1197 | 0 | CAMELLIA_context *ctx = context; |
1198 | 0 | byte *outbuf = outbuf_arg; |
1199 | 0 | const byte *inbuf = inbuf_arg; |
1200 | 0 | int burn_stack_depth = 0; |
1201 | | |
1202 | | /* Process remaining blocks. */ |
1203 | 0 | if (nblocks) |
1204 | 0 | { |
1205 | 0 | byte tmpbuf[64 * CAMELLIA_BLOCK_SIZE]; |
1206 | 0 | unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; |
1207 | 0 | size_t nburn; |
1208 | |
|
1209 | 0 | nburn = bulk_ctr32le_enc_128 (ctx, camellia_encrypt_blk1_64, outbuf, |
1210 | 0 | inbuf, nblocks, ctr, tmpbuf, |
1211 | 0 | sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, |
1212 | 0 | &tmp_used); |
1213 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1214 | |
|
1215 | 0 | wipememory (tmpbuf, tmp_used); |
1216 | 0 | } |
1217 | |
|
1218 | 0 | if (burn_stack_depth) |
1219 | 0 | _gcry_burn_stack (burn_stack_depth); |
1220 | 0 | } |
1221 | | |
1222 | | /* Bulk encryption/decryption of complete blocks in OCB mode. */ |
1223 | | static size_t |
1224 | | _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, |
1225 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1226 | 0 | { |
1227 | 0 | #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) |
1228 | 0 | CAMELLIA_context *ctx = (void *)&c->context.c; |
1229 | 0 | unsigned char *outbuf = outbuf_arg; |
1230 | 0 | const unsigned char *inbuf = inbuf_arg; |
1231 | 0 | int burn_stack_depth = 0; |
1232 | 0 | u64 blkn = c->u_mode.ocb.data_nblocks; |
1233 | |
|
1234 | | #else |
1235 | | (void)c; |
1236 | | (void)outbuf_arg; |
1237 | | (void)inbuf_arg; |
1238 | | (void)encrypt; |
1239 | | #endif |
1240 | |
|
1241 | 0 | #ifdef USE_GFNI_AVX512 |
1242 | 0 | if (ctx->use_gfni_avx512) |
1243 | 0 | { |
1244 | 0 | int did_use_gfni_avx512 = 0; |
1245 | 0 | u64 Ls[64]; |
1246 | 0 | u64 *l; |
1247 | |
|
1248 | 0 | if (nblocks >= 64) |
1249 | 0 | { |
1250 | 0 | typeof (&_gcry_camellia_gfni_avx512_ocb_dec) bulk_ocb_fn = |
1251 | 0 | encrypt ? _gcry_camellia_gfni_avx512_ocb_enc |
1252 | 0 | : _gcry_camellia_gfni_avx512_ocb_dec; |
1253 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk64 (c, Ls, blkn); |
1254 | | |
1255 | | /* Process data in 64 block chunks. */ |
1256 | 0 | while (nblocks >= 64) |
1257 | 0 | { |
1258 | 0 | blkn += 64; |
1259 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 64); |
1260 | |
|
1261 | 0 | bulk_ocb_fn (ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); |
1262 | |
|
1263 | 0 | nblocks -= 64; |
1264 | 0 | outbuf += 64 * CAMELLIA_BLOCK_SIZE; |
1265 | 0 | inbuf += 64 * CAMELLIA_BLOCK_SIZE; |
1266 | 0 | did_use_gfni_avx512 = 1; |
1267 | 0 | } |
1268 | 0 | } |
1269 | |
|
1270 | 0 | if (did_use_gfni_avx512) |
1271 | 0 | { |
1272 | 0 | if (burn_stack_depth < avx2_burn_stack_depth) |
1273 | 0 | burn_stack_depth = avx2_burn_stack_depth; |
1274 | 0 | } |
1275 | | |
1276 | | /* Use generic code to handle smaller chunks... */ |
1277 | 0 | } |
1278 | 0 | #endif |
1279 | |
|
1280 | 0 | #ifdef USE_AESNI_AVX2 |
1281 | 0 | if (ctx->use_aesni_avx2) |
1282 | 0 | { |
1283 | 0 | int did_use_aesni_avx2 = 0; |
1284 | 0 | u64 Ls[32]; |
1285 | 0 | u64 *l; |
1286 | |
|
1287 | 0 | if (nblocks >= 32) |
1288 | 0 | { |
1289 | 0 | typeof (&_gcry_camellia_aesni_avx2_ocb_dec) bulk_ocb_fn = |
1290 | 0 | encrypt ? _gcry_camellia_aesni_avx2_ocb_enc |
1291 | 0 | : _gcry_camellia_aesni_avx2_ocb_dec; |
1292 | |
|
1293 | 0 | #ifdef USE_VAES_AVX2 |
1294 | 0 | if (ctx->use_vaes_avx2) |
1295 | 0 | bulk_ocb_fn = encrypt ? _gcry_camellia_vaes_avx2_ocb_enc |
1296 | 0 | : _gcry_camellia_vaes_avx2_ocb_dec; |
1297 | 0 | #endif |
1298 | 0 | #ifdef USE_GFNI_AVX2 |
1299 | 0 | if (ctx->use_gfni_avx2) |
1300 | 0 | bulk_ocb_fn = encrypt ? _gcry_camellia_gfni_avx2_ocb_enc |
1301 | 0 | : _gcry_camellia_gfni_avx2_ocb_dec; |
1302 | 0 | #endif |
1303 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn); |
1304 | | |
1305 | | /* Process data in 32 block chunks. */ |
1306 | 0 | while (nblocks >= 32) |
1307 | 0 | { |
1308 | 0 | blkn += 32; |
1309 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32); |
1310 | |
|
1311 | 0 | bulk_ocb_fn (ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); |
1312 | |
|
1313 | 0 | nblocks -= 32; |
1314 | 0 | outbuf += 32 * CAMELLIA_BLOCK_SIZE; |
1315 | 0 | inbuf += 32 * CAMELLIA_BLOCK_SIZE; |
1316 | 0 | did_use_aesni_avx2 = 1; |
1317 | 0 | } |
1318 | 0 | } |
1319 | |
|
1320 | 0 | if (did_use_aesni_avx2) |
1321 | 0 | { |
1322 | 0 | if (burn_stack_depth < avx2_burn_stack_depth) |
1323 | 0 | burn_stack_depth = avx2_burn_stack_depth; |
1324 | 0 | } |
1325 | | |
1326 | | /* Use generic code to handle smaller chunks... */ |
1327 | 0 | } |
1328 | 0 | #endif |
1329 | |
|
1330 | 0 | #ifdef USE_AESNI_AVX |
1331 | 0 | if (ctx->use_aesni_avx) |
1332 | 0 | { |
1333 | 0 | int did_use_aesni_avx = 0; |
1334 | 0 | u64 Ls[16]; |
1335 | 0 | u64 *l; |
1336 | |
|
1337 | 0 | if (nblocks >= 16) |
1338 | 0 | { |
1339 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1340 | | |
1341 | | /* Process data in 16 block chunks. */ |
1342 | 0 | while (nblocks >= 16) |
1343 | 0 | { |
1344 | 0 | blkn += 16; |
1345 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1346 | |
|
1347 | 0 | if (encrypt) |
1348 | 0 | _gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, |
1349 | 0 | c->u_ctr.ctr, Ls); |
1350 | 0 | else |
1351 | 0 | _gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, |
1352 | 0 | c->u_ctr.ctr, Ls); |
1353 | |
|
1354 | 0 | nblocks -= 16; |
1355 | 0 | outbuf += 16 * CAMELLIA_BLOCK_SIZE; |
1356 | 0 | inbuf += 16 * CAMELLIA_BLOCK_SIZE; |
1357 | 0 | did_use_aesni_avx = 1; |
1358 | 0 | } |
1359 | 0 | } |
1360 | |
|
1361 | 0 | if (did_use_aesni_avx) |
1362 | 0 | { |
1363 | 0 | if (burn_stack_depth < avx_burn_stack_depth) |
1364 | 0 | burn_stack_depth = avx_burn_stack_depth; |
1365 | 0 | } |
1366 | | |
1367 | | /* Use generic code to handle smaller chunks... */ |
1368 | 0 | } |
1369 | 0 | #endif |
1370 | |
|
1371 | 0 | #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) |
1372 | | /* Process remaining blocks. */ |
1373 | 0 | if (nblocks) |
1374 | 0 | { |
1375 | 0 | byte tmpbuf[CAMELLIA_BLOCK_SIZE * 32]; |
1376 | 0 | unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; |
1377 | 0 | size_t nburn; |
1378 | |
|
1379 | 0 | nburn = bulk_ocb_crypt_128 (c, ctx, encrypt ? camellia_encrypt_blk1_32 |
1380 | 0 | : camellia_decrypt_blk1_32, |
1381 | 0 | outbuf, inbuf, nblocks, &blkn, encrypt, |
1382 | 0 | tmpbuf, sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, |
1383 | 0 | &tmp_used); |
1384 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1385 | |
|
1386 | 0 | wipememory(tmpbuf, tmp_used); |
1387 | 0 | nblocks = 0; |
1388 | 0 | } |
1389 | |
|
1390 | 0 | c->u_mode.ocb.data_nblocks = blkn; |
1391 | |
|
1392 | 0 | if (burn_stack_depth) |
1393 | 0 | _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); |
1394 | 0 | #endif |
1395 | |
|
1396 | 0 | return nblocks; |
1397 | 0 | } |
1398 | | |
1399 | | /* Bulk authentication of complete blocks in OCB mode. */ |
1400 | | static size_t |
1401 | | _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, |
1402 | | size_t nblocks) |
1403 | 0 | { |
1404 | 0 | #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) |
1405 | 0 | CAMELLIA_context *ctx = (void *)&c->context.c; |
1406 | 0 | const unsigned char *abuf = abuf_arg; |
1407 | 0 | int burn_stack_depth = 0; |
1408 | 0 | u64 blkn = c->u_mode.ocb.aad_nblocks; |
1409 | | #else |
1410 | | (void)c; |
1411 | | (void)abuf_arg; |
1412 | | #endif |
1413 | |
|
1414 | 0 | #ifdef USE_AESNI_AVX2 |
1415 | 0 | if (ctx->use_aesni_avx2) |
1416 | 0 | { |
1417 | 0 | int did_use_aesni_avx2 = 0; |
1418 | 0 | u64 Ls[32]; |
1419 | 0 | u64 *l; |
1420 | |
|
1421 | 0 | if (nblocks >= 32) |
1422 | 0 | { |
1423 | 0 | typeof (&_gcry_camellia_aesni_avx2_ocb_auth) bulk_auth_fn = |
1424 | 0 | _gcry_camellia_aesni_avx2_ocb_auth; |
1425 | |
|
1426 | 0 | #ifdef USE_VAES_AVX2 |
1427 | 0 | if (ctx->use_vaes_avx2) |
1428 | 0 | bulk_auth_fn = _gcry_camellia_vaes_avx2_ocb_auth; |
1429 | 0 | #endif |
1430 | 0 | #ifdef USE_GFNI_AVX2 |
1431 | 0 | if (ctx->use_gfni_avx2) |
1432 | 0 | bulk_auth_fn = _gcry_camellia_gfni_avx2_ocb_auth; |
1433 | 0 | #endif |
1434 | |
|
1435 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn); |
1436 | | |
1437 | | /* Process data in 32 block chunks. */ |
1438 | 0 | while (nblocks >= 32) |
1439 | 0 | { |
1440 | 0 | blkn += 32; |
1441 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32); |
1442 | |
|
1443 | 0 | bulk_auth_fn (ctx, abuf, c->u_mode.ocb.aad_offset, |
1444 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1445 | |
|
1446 | 0 | nblocks -= 32; |
1447 | 0 | abuf += 32 * CAMELLIA_BLOCK_SIZE; |
1448 | 0 | did_use_aesni_avx2 = 1; |
1449 | 0 | } |
1450 | 0 | } |
1451 | |
|
1452 | 0 | if (did_use_aesni_avx2) |
1453 | 0 | { |
1454 | 0 | if (burn_stack_depth < avx2_burn_stack_depth) |
1455 | 0 | burn_stack_depth = avx2_burn_stack_depth; |
1456 | 0 | } |
1457 | | |
1458 | | /* Use generic code to handle smaller chunks... */ |
1459 | 0 | } |
1460 | 0 | #endif |
1461 | |
|
1462 | 0 | #ifdef USE_AESNI_AVX |
1463 | 0 | if (ctx->use_aesni_avx) |
1464 | 0 | { |
1465 | 0 | int did_use_aesni_avx = 0; |
1466 | 0 | u64 Ls[16]; |
1467 | 0 | u64 *l; |
1468 | |
|
1469 | 0 | if (nblocks >= 16) |
1470 | 0 | { |
1471 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1472 | | |
1473 | | /* Process data in 16 block chunks. */ |
1474 | 0 | while (nblocks >= 16) |
1475 | 0 | { |
1476 | 0 | blkn += 16; |
1477 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1478 | |
|
1479 | 0 | _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf, |
1480 | 0 | c->u_mode.ocb.aad_offset, |
1481 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1482 | |
|
1483 | 0 | nblocks -= 16; |
1484 | 0 | abuf += 16 * CAMELLIA_BLOCK_SIZE; |
1485 | 0 | did_use_aesni_avx = 1; |
1486 | 0 | } |
1487 | 0 | } |
1488 | |
|
1489 | 0 | if (did_use_aesni_avx) |
1490 | 0 | { |
1491 | 0 | if (burn_stack_depth < avx_burn_stack_depth) |
1492 | 0 | burn_stack_depth = avx_burn_stack_depth; |
1493 | 0 | } |
1494 | | |
1495 | | /* Use generic code to handle smaller chunks... */ |
1496 | 0 | } |
1497 | 0 | #endif |
1498 | |
|
1499 | 0 | #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) |
1500 | | /* Process remaining blocks. */ |
1501 | 0 | if (nblocks) |
1502 | 0 | { |
1503 | 0 | byte tmpbuf[CAMELLIA_BLOCK_SIZE * 32]; |
1504 | 0 | unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; |
1505 | 0 | size_t nburn; |
1506 | |
|
1507 | 0 | nburn = bulk_ocb_auth_128 (c, ctx, camellia_encrypt_blk1_32, |
1508 | 0 | abuf, nblocks, &blkn, tmpbuf, |
1509 | 0 | sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, |
1510 | 0 | &tmp_used); |
1511 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1512 | |
|
1513 | 0 | wipememory(tmpbuf, tmp_used); |
1514 | 0 | nblocks = 0; |
1515 | 0 | } |
1516 | |
|
1517 | 0 | c->u_mode.ocb.aad_nblocks = blkn; |
1518 | |
|
1519 | 0 | if (burn_stack_depth) |
1520 | 0 | _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); |
1521 | 0 | #endif |
1522 | |
|
1523 | 0 | return nblocks; |
1524 | 0 | } |
1525 | | |
1526 | | |
1527 | | static const char * |
1528 | | selftest(void) |
1529 | 0 | { |
1530 | 0 | CAMELLIA_context ctx; |
1531 | 0 | byte scratch[16]; |
1532 | 0 | cipher_bulk_ops_t bulk_ops; |
1533 | | |
1534 | | /* These test vectors are from RFC-3713 */ |
1535 | 0 | static const byte plaintext[]= |
1536 | 0 | { |
1537 | 0 | 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef, |
1538 | 0 | 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10 |
1539 | 0 | }; |
1540 | 0 | static const byte key_128[]= |
1541 | 0 | { |
1542 | 0 | 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef, |
1543 | 0 | 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10 |
1544 | 0 | }; |
1545 | 0 | static const byte ciphertext_128[]= |
1546 | 0 | { |
1547 | 0 | 0x67,0x67,0x31,0x38,0x54,0x96,0x69,0x73, |
1548 | 0 | 0x08,0x57,0x06,0x56,0x48,0xea,0xbe,0x43 |
1549 | 0 | }; |
1550 | 0 | static const byte key_192[]= |
1551 | 0 | { |
1552 | 0 | 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,0x98, |
1553 | 0 | 0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77 |
1554 | 0 | }; |
1555 | 0 | static const byte ciphertext_192[]= |
1556 | 0 | { |
1557 | 0 | 0xb4,0x99,0x34,0x01,0xb3,0xe9,0x96,0xf8, |
1558 | 0 | 0x4e,0xe5,0xce,0xe7,0xd7,0x9b,0x09,0xb9 |
1559 | 0 | }; |
1560 | 0 | static const byte key_256[]= |
1561 | 0 | { |
1562 | 0 | 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba, |
1563 | 0 | 0x98,0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55, |
1564 | 0 | 0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff |
1565 | 0 | }; |
1566 | 0 | static const byte ciphertext_256[]= |
1567 | 0 | { |
1568 | 0 | 0x9a,0xcc,0x23,0x7d,0xff,0x16,0xd7,0x6c, |
1569 | 0 | 0x20,0xef,0x7c,0x91,0x9e,0x3a,0x75,0x09 |
1570 | 0 | }; |
1571 | |
|
1572 | 0 | camellia_setkey(&ctx,key_128,sizeof(key_128),&bulk_ops); |
1573 | 0 | camellia_encrypt(&ctx,scratch,plaintext); |
1574 | 0 | if(memcmp(scratch,ciphertext_128,sizeof(ciphertext_128))!=0) |
1575 | 0 | return "CAMELLIA-128 test encryption failed."; |
1576 | 0 | camellia_decrypt(&ctx,scratch,scratch); |
1577 | 0 | if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) |
1578 | 0 | return "CAMELLIA-128 test decryption failed."; |
1579 | | |
1580 | 0 | camellia_setkey(&ctx,key_192,sizeof(key_192),&bulk_ops); |
1581 | 0 | camellia_encrypt(&ctx,scratch,plaintext); |
1582 | 0 | if(memcmp(scratch,ciphertext_192,sizeof(ciphertext_192))!=0) |
1583 | 0 | return "CAMELLIA-192 test encryption failed."; |
1584 | 0 | camellia_decrypt(&ctx,scratch,scratch); |
1585 | 0 | if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) |
1586 | 0 | return "CAMELLIA-192 test decryption failed."; |
1587 | | |
1588 | 0 | camellia_setkey(&ctx,key_256,sizeof(key_256),&bulk_ops); |
1589 | 0 | camellia_encrypt(&ctx,scratch,plaintext); |
1590 | 0 | if(memcmp(scratch,ciphertext_256,sizeof(ciphertext_256))!=0) |
1591 | 0 | return "CAMELLIA-256 test encryption failed."; |
1592 | 0 | camellia_decrypt(&ctx,scratch,scratch); |
1593 | 0 | if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) |
1594 | 0 | return "CAMELLIA-256 test decryption failed."; |
1595 | | |
1596 | 0 | return NULL; |
1597 | 0 | } |
1598 | | |
1599 | | /* These oids are from |
1600 | | <http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications_oid.html>, |
1601 | | retrieved May 1, 2007. */ |
1602 | | |
1603 | | static const gcry_cipher_oid_spec_t camellia128_oids[] = |
1604 | | { |
1605 | | {"1.2.392.200011.61.1.1.1.2", GCRY_CIPHER_MODE_CBC}, |
1606 | | {"0.3.4401.5.3.1.9.1", GCRY_CIPHER_MODE_ECB}, |
1607 | | {"0.3.4401.5.3.1.9.3", GCRY_CIPHER_MODE_OFB}, |
1608 | | {"0.3.4401.5.3.1.9.4", GCRY_CIPHER_MODE_CFB}, |
1609 | | { NULL } |
1610 | | }; |
1611 | | |
1612 | | static const gcry_cipher_oid_spec_t camellia192_oids[] = |
1613 | | { |
1614 | | {"1.2.392.200011.61.1.1.1.3", GCRY_CIPHER_MODE_CBC}, |
1615 | | {"0.3.4401.5.3.1.9.21", GCRY_CIPHER_MODE_ECB}, |
1616 | | {"0.3.4401.5.3.1.9.23", GCRY_CIPHER_MODE_OFB}, |
1617 | | {"0.3.4401.5.3.1.9.24", GCRY_CIPHER_MODE_CFB}, |
1618 | | { NULL } |
1619 | | }; |
1620 | | |
1621 | | static const gcry_cipher_oid_spec_t camellia256_oids[] = |
1622 | | { |
1623 | | {"1.2.392.200011.61.1.1.1.4", GCRY_CIPHER_MODE_CBC}, |
1624 | | {"0.3.4401.5.3.1.9.41", GCRY_CIPHER_MODE_ECB}, |
1625 | | {"0.3.4401.5.3.1.9.43", GCRY_CIPHER_MODE_OFB}, |
1626 | | {"0.3.4401.5.3.1.9.44", GCRY_CIPHER_MODE_CFB}, |
1627 | | { NULL } |
1628 | | }; |
1629 | | |
1630 | | gcry_cipher_spec_t _gcry_cipher_spec_camellia128 = |
1631 | | { |
1632 | | GCRY_CIPHER_CAMELLIA128, {0, 0}, |
1633 | | "CAMELLIA128",NULL,camellia128_oids,CAMELLIA_BLOCK_SIZE,128, |
1634 | | sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt |
1635 | | }; |
1636 | | |
1637 | | gcry_cipher_spec_t _gcry_cipher_spec_camellia192 = |
1638 | | { |
1639 | | GCRY_CIPHER_CAMELLIA192, {0, 0}, |
1640 | | "CAMELLIA192",NULL,camellia192_oids,CAMELLIA_BLOCK_SIZE,192, |
1641 | | sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt |
1642 | | }; |
1643 | | |
1644 | | gcry_cipher_spec_t _gcry_cipher_spec_camellia256 = |
1645 | | { |
1646 | | GCRY_CIPHER_CAMELLIA256, {0, 0}, |
1647 | | "CAMELLIA256",NULL,camellia256_oids,CAMELLIA_BLOCK_SIZE,256, |
1648 | | sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt |
1649 | | }; |