/src/libgcrypt/cipher/serpent.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* serpent.c - Implementation of the Serpent encryption algorithm. |
2 | | * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. |
3 | | * |
4 | | * This file is part of Libgcrypt. |
5 | | * |
6 | | * Libgcrypt is free software; you can redistribute it and/or modify |
7 | | * it under the terms of the GNU Lesser general Public License as |
8 | | * published by the Free Software Foundation; either version 2.1 of |
9 | | * the License, or (at your option) any later version. |
10 | | * |
11 | | * Libgcrypt is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with this program; if not, write to the Free Software |
18 | | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA |
19 | | * 02111-1307, USA. |
20 | | */ |
21 | | |
22 | | #include <config.h> |
23 | | |
24 | | #include <string.h> |
25 | | #include <stdio.h> |
26 | | |
27 | | #include "types.h" |
28 | | #include "g10lib.h" |
29 | | #include "cipher.h" |
30 | | #include "bithelp.h" |
31 | | #include "bufhelp.h" |
32 | | #include "cipher-internal.h" |
33 | | #include "bulkhelp.h" |
34 | | |
35 | | |
36 | | /* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */ |
37 | | #undef USE_SSE2 |
38 | | #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
39 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
40 | | # define USE_SSE2 1 |
41 | | #endif |
42 | | |
43 | | /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */ |
44 | | #undef USE_AVX2 |
45 | | #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
46 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
47 | | # if defined(ENABLE_AVX2_SUPPORT) |
48 | | # define USE_AVX2 1 |
49 | | # endif |
50 | | #endif |
51 | | |
52 | | /* USE_NEON indicates whether to enable ARM NEON assembly code. */ |
53 | | #undef USE_NEON |
54 | | #ifdef ENABLE_NEON_SUPPORT |
55 | | # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ |
56 | | && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ |
57 | | && defined(HAVE_GCC_INLINE_ASM_NEON) |
58 | | # define USE_NEON 1 |
59 | | # endif |
60 | | #endif /*ENABLE_NEON_SUPPORT*/ |
61 | | |
62 | | /* Number of rounds per Serpent encrypt/decrypt operation. */ |
63 | 0 | #define ROUNDS 32 |
64 | | |
65 | | /* Magic number, used during generating of the subkeys. */ |
66 | 0 | #define PHI 0x9E3779B9 |
67 | | |
68 | | /* Serpent works on 128 bit blocks. */ |
69 | | typedef u32 serpent_block_t[4]; |
70 | | |
71 | | /* Serpent key, provided by the user. If the original key is shorter |
72 | | than 256 bits, it is padded. */ |
73 | | typedef u32 serpent_key_t[8]; |
74 | | |
75 | | /* The key schedule consists of 33 128 bit subkeys. */ |
76 | | typedef u32 serpent_subkeys_t[ROUNDS + 1][4]; |
77 | | |
78 | | /* A Serpent context. */ |
79 | | typedef struct serpent_context |
80 | | { |
81 | | serpent_subkeys_t keys; /* Generated subkeys. */ |
82 | | |
83 | | #ifdef USE_AVX2 |
84 | | int use_avx2; |
85 | | #endif |
86 | | #ifdef USE_NEON |
87 | | int use_neon; |
88 | | #endif |
89 | | } serpent_context_t; |
90 | | |
91 | | |
92 | | /* Assembly implementations use SystemV ABI, ABI conversion and additional |
93 | | * stack to store XMM6-XMM15 needed on Win64. */ |
94 | | #undef ASM_FUNC_ABI |
95 | | #if defined(USE_SSE2) || defined(USE_AVX2) |
96 | | # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS |
97 | | # define ASM_FUNC_ABI __attribute__((sysv_abi)) |
98 | | # else |
99 | | # define ASM_FUNC_ABI |
100 | | # endif |
101 | | #endif |
102 | | |
103 | | |
104 | | #ifdef USE_SSE2 |
105 | | /* Assembler implementations of Serpent using SSE2. Process 8 block in |
106 | | parallel. |
107 | | */ |
108 | | extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx, |
109 | | unsigned char *out, |
110 | | const unsigned char *in, |
111 | | unsigned char *ctr) ASM_FUNC_ABI; |
112 | | |
113 | | extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx, |
114 | | unsigned char *out, |
115 | | const unsigned char *in, |
116 | | unsigned char *iv) ASM_FUNC_ABI; |
117 | | |
118 | | extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx, |
119 | | unsigned char *out, |
120 | | const unsigned char *in, |
121 | | unsigned char *iv) ASM_FUNC_ABI; |
122 | | |
123 | | extern void _gcry_serpent_sse2_ocb_enc(serpent_context_t *ctx, |
124 | | unsigned char *out, |
125 | | const unsigned char *in, |
126 | | unsigned char *offset, |
127 | | unsigned char *checksum, |
128 | | const u64 Ls[8]) ASM_FUNC_ABI; |
129 | | |
130 | | extern void _gcry_serpent_sse2_ocb_dec(serpent_context_t *ctx, |
131 | | unsigned char *out, |
132 | | const unsigned char *in, |
133 | | unsigned char *offset, |
134 | | unsigned char *checksum, |
135 | | const u64 Ls[8]) ASM_FUNC_ABI; |
136 | | |
137 | | extern void _gcry_serpent_sse2_ocb_auth(serpent_context_t *ctx, |
138 | | const unsigned char *abuf, |
139 | | unsigned char *offset, |
140 | | unsigned char *checksum, |
141 | | const u64 Ls[8]) ASM_FUNC_ABI; |
142 | | |
143 | | extern void _gcry_serpent_sse2_blk8(const serpent_context_t *c, byte *out, |
144 | | const byte *in, int encrypt) ASM_FUNC_ABI; |
145 | | #endif |
146 | | |
147 | | #ifdef USE_AVX2 |
148 | | /* Assembler implementations of Serpent using AVX2. Process 16 block in |
149 | | parallel. |
150 | | */ |
151 | | extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx, |
152 | | unsigned char *out, |
153 | | const unsigned char *in, |
154 | | unsigned char *ctr) ASM_FUNC_ABI; |
155 | | |
156 | | extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx, |
157 | | unsigned char *out, |
158 | | const unsigned char *in, |
159 | | unsigned char *iv) ASM_FUNC_ABI; |
160 | | |
161 | | extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx, |
162 | | unsigned char *out, |
163 | | const unsigned char *in, |
164 | | unsigned char *iv) ASM_FUNC_ABI; |
165 | | |
166 | | extern void _gcry_serpent_avx2_ocb_enc(serpent_context_t *ctx, |
167 | | unsigned char *out, |
168 | | const unsigned char *in, |
169 | | unsigned char *offset, |
170 | | unsigned char *checksum, |
171 | | const u64 Ls[16]) ASM_FUNC_ABI; |
172 | | |
173 | | extern void _gcry_serpent_avx2_ocb_dec(serpent_context_t *ctx, |
174 | | unsigned char *out, |
175 | | const unsigned char *in, |
176 | | unsigned char *offset, |
177 | | unsigned char *checksum, |
178 | | const u64 Ls[16]) ASM_FUNC_ABI; |
179 | | |
180 | | extern void _gcry_serpent_avx2_ocb_auth(serpent_context_t *ctx, |
181 | | const unsigned char *abuf, |
182 | | unsigned char *offset, |
183 | | unsigned char *checksum, |
184 | | const u64 Ls[16]) ASM_FUNC_ABI; |
185 | | |
186 | | extern void _gcry_serpent_avx2_blk16(const serpent_context_t *c, byte *out, |
187 | | const byte *in, int encrypt) ASM_FUNC_ABI; |
188 | | #endif |
189 | | |
190 | | #ifdef USE_NEON |
191 | | /* Assembler implementations of Serpent using ARM NEON. Process 8 block in |
192 | | parallel. |
193 | | */ |
194 | | extern void _gcry_serpent_neon_ctr_enc(serpent_context_t *ctx, |
195 | | unsigned char *out, |
196 | | const unsigned char *in, |
197 | | unsigned char *ctr); |
198 | | |
199 | | extern void _gcry_serpent_neon_cbc_dec(serpent_context_t *ctx, |
200 | | unsigned char *out, |
201 | | const unsigned char *in, |
202 | | unsigned char *iv); |
203 | | |
204 | | extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx, |
205 | | unsigned char *out, |
206 | | const unsigned char *in, |
207 | | unsigned char *iv); |
208 | | |
209 | | extern void _gcry_serpent_neon_ocb_enc(serpent_context_t *ctx, |
210 | | unsigned char *out, |
211 | | const unsigned char *in, |
212 | | unsigned char *offset, |
213 | | unsigned char *checksum, |
214 | | const void *Ls[8]); |
215 | | |
216 | | extern void _gcry_serpent_neon_ocb_dec(serpent_context_t *ctx, |
217 | | unsigned char *out, |
218 | | const unsigned char *in, |
219 | | unsigned char *offset, |
220 | | unsigned char *checksum, |
221 | | const void *Ls[8]); |
222 | | |
223 | | extern void _gcry_serpent_neon_ocb_auth(serpent_context_t *ctx, |
224 | | const unsigned char *abuf, |
225 | | unsigned char *offset, |
226 | | unsigned char *checksum, |
227 | | const void *Ls[8]); |
228 | | |
229 | | extern void _gcry_serpent_neon_blk8(const serpent_context_t *c, byte *out, |
230 | | const byte *in, int encrypt); |
231 | | #endif |
232 | | |
233 | | |
234 | | /* Prototypes. */ |
235 | | static const char *serpent_test (void); |
236 | | |
237 | | static void _gcry_serpent_ctr_enc (void *context, unsigned char *ctr, |
238 | | void *outbuf_arg, const void *inbuf_arg, |
239 | | size_t nblocks); |
240 | | static void _gcry_serpent_cbc_dec (void *context, unsigned char *iv, |
241 | | void *outbuf_arg, const void *inbuf_arg, |
242 | | size_t nblocks); |
243 | | static void _gcry_serpent_cfb_dec (void *context, unsigned char *iv, |
244 | | void *outbuf_arg, const void *inbuf_arg, |
245 | | size_t nblocks); |
246 | | static size_t _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, |
247 | | const void *inbuf_arg, size_t nblocks, |
248 | | int encrypt); |
249 | | static size_t _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, |
250 | | size_t nblocks); |
251 | | static void _gcry_serpent_xts_crypt (void *context, unsigned char *tweak, |
252 | | void *outbuf_arg, const void *inbuf_arg, |
253 | | size_t nblocks, int encrypt); |
254 | | static void _gcry_serpent_ecb_crypt (void *context, void *outbuf_arg, |
255 | | const void *inbuf_arg, size_t nblocks, |
256 | | int encrypt); |
257 | | |
258 | | |
259 | | /* |
260 | | * These are the S-Boxes of Serpent from following research paper. |
261 | | * |
262 | | * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference, |
263 | | * (New York, New York, USA), p. 317–329, National Institute of Standards and |
264 | | * Technology, 2000. |
265 | | * |
266 | | * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf |
267 | | * |
268 | | */ |
269 | | |
270 | | #define SBOX0(r0, r1, r2, r3, w, x, y, z) \ |
271 | 0 | { \ |
272 | 0 | u32 r4; \ |
273 | 0 | \ |
274 | 0 | r3 ^= r0; r4 = r1; \ |
275 | 0 | r1 &= r3; r4 ^= r2; \ |
276 | 0 | r1 ^= r0; r0 |= r3; \ |
277 | 0 | r0 ^= r4; r4 ^= r3; \ |
278 | 0 | r3 ^= r2; r2 |= r1; \ |
279 | 0 | r2 ^= r4; r4 = ~r4; \ |
280 | 0 | r4 |= r1; r1 ^= r3; \ |
281 | 0 | r1 ^= r4; r3 |= r0; \ |
282 | 0 | r1 ^= r3; r4 ^= r3; \ |
283 | 0 | \ |
284 | 0 | w = r1; x = r4; y = r2; z = r0; \ |
285 | 0 | } |
286 | | |
287 | | #define SBOX0_INVERSE(r0, r1, r2, r3, w, x, y, z) \ |
288 | 0 | { \ |
289 | 0 | u32 r4; \ |
290 | 0 | \ |
291 | 0 | r2 = ~r2; r4 = r1; \ |
292 | 0 | r1 |= r0; r4 = ~r4; \ |
293 | 0 | r1 ^= r2; r2 |= r4; \ |
294 | 0 | r1 ^= r3; r0 ^= r4; \ |
295 | 0 | r2 ^= r0; r0 &= r3; \ |
296 | 0 | r4 ^= r0; r0 |= r1; \ |
297 | 0 | r0 ^= r2; r3 ^= r4; \ |
298 | 0 | r2 ^= r1; r3 ^= r0; \ |
299 | 0 | r3 ^= r1; \ |
300 | 0 | r2 &= r3; \ |
301 | 0 | r4 ^= r2; \ |
302 | 0 | \ |
303 | 0 | w = r0; x = r4; y = r1; z = r3; \ |
304 | 0 | } |
305 | | |
306 | | #define SBOX1(r0, r1, r2, r3, w, x, y, z) \ |
307 | 0 | { \ |
308 | 0 | u32 r4; \ |
309 | 0 | \ |
310 | 0 | r0 = ~r0; r2 = ~r2; \ |
311 | 0 | r4 = r0; r0 &= r1; \ |
312 | 0 | r2 ^= r0; r0 |= r3; \ |
313 | 0 | r3 ^= r2; r1 ^= r0; \ |
314 | 0 | r0 ^= r4; r4 |= r1; \ |
315 | 0 | r1 ^= r3; r2 |= r0; \ |
316 | 0 | r2 &= r4; r0 ^= r1; \ |
317 | 0 | r1 &= r2; \ |
318 | 0 | r1 ^= r0; r0 &= r2; \ |
319 | 0 | r0 ^= r4; \ |
320 | 0 | \ |
321 | 0 | w = r2; x = r0; y = r3; z = r1; \ |
322 | 0 | } |
323 | | |
324 | | #define SBOX1_INVERSE(r0, r1, r2, r3, w, x, y, z) \ |
325 | 0 | { \ |
326 | 0 | u32 r4; \ |
327 | 0 | \ |
328 | 0 | r4 = r1; r1 ^= r3; \ |
329 | 0 | r3 &= r1; r4 ^= r2; \ |
330 | 0 | r3 ^= r0; r0 |= r1; \ |
331 | 0 | r2 ^= r3; r0 ^= r4; \ |
332 | 0 | r0 |= r2; r1 ^= r3; \ |
333 | 0 | r0 ^= r1; r1 |= r3; \ |
334 | 0 | r1 ^= r0; r4 = ~r4; \ |
335 | 0 | r4 ^= r1; r1 |= r0; \ |
336 | 0 | r1 ^= r0; \ |
337 | 0 | r1 |= r4; \ |
338 | 0 | r3 ^= r1; \ |
339 | 0 | \ |
340 | 0 | w = r4; x = r0; y = r3; z = r2; \ |
341 | 0 | } |
342 | | |
343 | | #define SBOX2(r0, r1, r2, r3, w, x, y, z) \ |
344 | 0 | { \ |
345 | 0 | u32 r4; \ |
346 | 0 | \ |
347 | 0 | r4 = r0; r0 &= r2; \ |
348 | 0 | r0 ^= r3; r2 ^= r1; \ |
349 | 0 | r2 ^= r0; r3 |= r4; \ |
350 | 0 | r3 ^= r1; r4 ^= r2; \ |
351 | 0 | r1 = r3; r3 |= r4; \ |
352 | 0 | r3 ^= r0; r0 &= r1; \ |
353 | 0 | r4 ^= r0; r1 ^= r3; \ |
354 | 0 | r1 ^= r4; r4 = ~r4; \ |
355 | 0 | \ |
356 | 0 | w = r2; x = r3; y = r1; z = r4; \ |
357 | 0 | } |
358 | | |
359 | | #define SBOX2_INVERSE(r0, r1, r2, r3, w, x, y, z) \ |
360 | 0 | { \ |
361 | 0 | u32 r4; \ |
362 | 0 | \ |
363 | 0 | r2 ^= r3; r3 ^= r0; \ |
364 | 0 | r4 = r3; r3 &= r2; \ |
365 | 0 | r3 ^= r1; r1 |= r2; \ |
366 | 0 | r1 ^= r4; r4 &= r3; \ |
367 | 0 | r2 ^= r3; r4 &= r0; \ |
368 | 0 | r4 ^= r2; r2 &= r1; \ |
369 | 0 | r2 |= r0; r3 = ~r3; \ |
370 | 0 | r2 ^= r3; r0 ^= r3; \ |
371 | 0 | r0 &= r1; r3 ^= r4; \ |
372 | 0 | r3 ^= r0; \ |
373 | 0 | \ |
374 | 0 | w = r1; x = r4; y = r2; z = r3; \ |
375 | 0 | } |
376 | | |
377 | | #define SBOX3(r0, r1, r2, r3, w, x, y, z) \ |
378 | 0 | { \ |
379 | 0 | u32 r4; \ |
380 | 0 | \ |
381 | 0 | r4 = r0; r0 |= r3; \ |
382 | 0 | r3 ^= r1; r1 &= r4; \ |
383 | 0 | r4 ^= r2; r2 ^= r3; \ |
384 | 0 | r3 &= r0; r4 |= r1; \ |
385 | 0 | r3 ^= r4; r0 ^= r1; \ |
386 | 0 | r4 &= r0; r1 ^= r3; \ |
387 | 0 | r4 ^= r2; r1 |= r0; \ |
388 | 0 | r1 ^= r2; r0 ^= r3; \ |
389 | 0 | r2 = r1; r1 |= r3; \ |
390 | 0 | r1 ^= r0; \ |
391 | 0 | \ |
392 | 0 | w = r1; x = r2; y = r3; z = r4; \ |
393 | 0 | } |
394 | | |
395 | | #define SBOX3_INVERSE(r0, r1, r2, r3, w, x, y, z) \ |
396 | 0 | { \ |
397 | 0 | u32 r4; \ |
398 | 0 | \ |
399 | 0 | r4 = r2; r2 ^= r1; \ |
400 | 0 | r0 ^= r2; r4 &= r2; \ |
401 | 0 | r4 ^= r0; r0 &= r1; \ |
402 | 0 | r1 ^= r3; r3 |= r4; \ |
403 | 0 | r2 ^= r3; r0 ^= r3; \ |
404 | 0 | r1 ^= r4; r3 &= r2; \ |
405 | 0 | r3 ^= r1; r1 ^= r0; \ |
406 | 0 | r1 |= r2; r0 ^= r3; \ |
407 | 0 | r1 ^= r4; \ |
408 | 0 | r0 ^= r1; \ |
409 | 0 | \ |
410 | 0 | w = r2; x = r1; y = r3; z = r0; \ |
411 | 0 | } |
412 | | |
413 | | #define SBOX4(r0, r1, r2, r3, w, x, y, z) \ |
414 | 0 | { \ |
415 | 0 | u32 r4; \ |
416 | 0 | \ |
417 | 0 | r1 ^= r3; r3 = ~r3; \ |
418 | 0 | r2 ^= r3; r3 ^= r0; \ |
419 | 0 | r4 = r1; r1 &= r3; \ |
420 | 0 | r1 ^= r2; r4 ^= r3; \ |
421 | 0 | r0 ^= r4; r2 &= r4; \ |
422 | 0 | r2 ^= r0; r0 &= r1; \ |
423 | 0 | r3 ^= r0; r4 |= r1; \ |
424 | 0 | r4 ^= r0; r0 |= r3; \ |
425 | 0 | r0 ^= r2; r2 &= r3; \ |
426 | 0 | r0 = ~r0; r4 ^= r2; \ |
427 | 0 | \ |
428 | 0 | w = r1; x = r4; y = r0; z = r3; \ |
429 | 0 | } |
430 | | |
431 | | #define SBOX4_INVERSE(r0, r1, r2, r3, w, x, y, z) \ |
432 | 0 | { \ |
433 | 0 | u32 r4; \ |
434 | 0 | \ |
435 | 0 | r4 = r2; r2 &= r3; \ |
436 | 0 | r2 ^= r1; r1 |= r3; \ |
437 | 0 | r1 &= r0; r4 ^= r2; \ |
438 | 0 | r4 ^= r1; r1 &= r2; \ |
439 | 0 | r0 = ~r0; r3 ^= r4; \ |
440 | 0 | r1 ^= r3; r3 &= r0; \ |
441 | 0 | r3 ^= r2; r0 ^= r1; \ |
442 | 0 | r2 &= r0; r3 ^= r0; \ |
443 | 0 | r2 ^= r4; \ |
444 | 0 | r2 |= r3; r3 ^= r0; \ |
445 | 0 | r2 ^= r1; \ |
446 | 0 | \ |
447 | 0 | w = r0; x = r3; y = r2; z = r4; \ |
448 | 0 | } |
449 | | |
450 | | #define SBOX5(r0, r1, r2, r3, w, x, y, z) \ |
451 | 0 | { \ |
452 | 0 | u32 r4; \ |
453 | 0 | \ |
454 | 0 | r0 ^= r1; r1 ^= r3; \ |
455 | 0 | r3 = ~r3; r4 = r1; \ |
456 | 0 | r1 &= r0; r2 ^= r3; \ |
457 | 0 | r1 ^= r2; r2 |= r4; \ |
458 | 0 | r4 ^= r3; r3 &= r1; \ |
459 | 0 | r3 ^= r0; r4 ^= r1; \ |
460 | 0 | r4 ^= r2; r2 ^= r0; \ |
461 | 0 | r0 &= r3; r2 = ~r2; \ |
462 | 0 | r0 ^= r4; r4 |= r3; \ |
463 | 0 | r2 ^= r4; \ |
464 | 0 | \ |
465 | 0 | w = r1; x = r3; y = r0; z = r2; \ |
466 | 0 | } |
467 | | |
468 | | #define SBOX5_INVERSE(r0, r1, r2, r3, w, x, y, z) \ |
469 | 0 | { \ |
470 | 0 | u32 r4; \ |
471 | 0 | \ |
472 | 0 | r1 = ~r1; r4 = r3; \ |
473 | 0 | r2 ^= r1; r3 |= r0; \ |
474 | 0 | r3 ^= r2; r2 |= r1; \ |
475 | 0 | r2 &= r0; r4 ^= r3; \ |
476 | 0 | r2 ^= r4; r4 |= r0; \ |
477 | 0 | r4 ^= r1; r1 &= r2; \ |
478 | 0 | r1 ^= r3; r4 ^= r2; \ |
479 | 0 | r3 &= r4; r4 ^= r1; \ |
480 | 0 | r3 ^= r4; r4 = ~r4; \ |
481 | 0 | r3 ^= r0; \ |
482 | 0 | \ |
483 | 0 | w = r1; x = r4; y = r3; z = r2; \ |
484 | 0 | } |
485 | | |
486 | | #define SBOX6(r0, r1, r2, r3, w, x, y, z) \ |
487 | 0 | { \ |
488 | 0 | u32 r4; \ |
489 | 0 | \ |
490 | 0 | r2 = ~r2; r4 = r3; \ |
491 | 0 | r3 &= r0; r0 ^= r4; \ |
492 | 0 | r3 ^= r2; r2 |= r4; \ |
493 | 0 | r1 ^= r3; r2 ^= r0; \ |
494 | 0 | r0 |= r1; r2 ^= r1; \ |
495 | 0 | r4 ^= r0; r0 |= r3; \ |
496 | 0 | r0 ^= r2; r4 ^= r3; \ |
497 | 0 | r4 ^= r0; r3 = ~r3; \ |
498 | 0 | r2 &= r4; \ |
499 | 0 | r2 ^= r3; \ |
500 | 0 | \ |
501 | 0 | w = r0; x = r1; y = r4; z = r2; \ |
502 | 0 | } |
503 | | |
504 | | #define SBOX6_INVERSE(r0, r1, r2, r3, w, x, y, z) \ |
505 | 0 | { \ |
506 | 0 | u32 r4; \ |
507 | 0 | \ |
508 | 0 | r0 ^= r2; r4 = r2; \ |
509 | 0 | r2 &= r0; r4 ^= r3; \ |
510 | 0 | r2 = ~r2; r3 ^= r1; \ |
511 | 0 | r2 ^= r3; r4 |= r0; \ |
512 | 0 | r0 ^= r2; r3 ^= r4; \ |
513 | 0 | r4 ^= r1; r1 &= r3; \ |
514 | 0 | r1 ^= r0; r0 ^= r3; \ |
515 | 0 | r0 |= r2; r3 ^= r1; \ |
516 | 0 | r4 ^= r0; \ |
517 | 0 | \ |
518 | 0 | w = r1; x = r2; y = r4; z = r3; \ |
519 | 0 | } |
520 | | |
521 | | #define SBOX7(r0, r1, r2, r3, w, x, y, z) \ |
522 | 0 | { \ |
523 | 0 | u32 r4; \ |
524 | 0 | \ |
525 | 0 | r4 = r1; r1 |= r2; \ |
526 | 0 | r1 ^= r3; r4 ^= r2; \ |
527 | 0 | r2 ^= r1; r3 |= r4; \ |
528 | 0 | r3 &= r0; r4 ^= r2; \ |
529 | 0 | r3 ^= r1; r1 |= r4; \ |
530 | 0 | r1 ^= r0; r0 |= r4; \ |
531 | 0 | r0 ^= r2; r1 ^= r4; \ |
532 | 0 | r2 ^= r1; r1 &= r0; \ |
533 | 0 | r1 ^= r4; r2 = ~r2; \ |
534 | 0 | r2 |= r0; \ |
535 | 0 | r4 ^= r2; \ |
536 | 0 | \ |
537 | 0 | w = r4; x = r3; y = r1; z = r0; \ |
538 | 0 | } |
539 | | |
540 | | #define SBOX7_INVERSE(r0, r1, r2, r3, w, x, y, z) \ |
541 | 0 | { \ |
542 | 0 | u32 r4; \ |
543 | 0 | \ |
544 | 0 | r4 = r2; r2 ^= r0; \ |
545 | 0 | r0 &= r3; r4 |= r3; \ |
546 | 0 | r2 = ~r2; r3 ^= r1; \ |
547 | 0 | r1 |= r0; r0 ^= r2; \ |
548 | 0 | r2 &= r4; r3 &= r4; \ |
549 | 0 | r1 ^= r2; r2 ^= r0; \ |
550 | 0 | r0 |= r2; r4 ^= r1; \ |
551 | 0 | r0 ^= r3; r3 ^= r4; \ |
552 | 0 | r4 |= r0; r3 ^= r2; \ |
553 | 0 | r4 ^= r2; \ |
554 | 0 | \ |
555 | 0 | w = r3; x = r0; y = r1; z = r4; \ |
556 | 0 | } |
557 | | |
558 | | /* XOR BLOCK1 into BLOCK0. */ |
559 | | #define BLOCK_XOR(block0, block1) \ |
560 | 0 | { \ |
561 | 0 | block0[0] ^= block1[0]; \ |
562 | 0 | block0[1] ^= block1[1]; \ |
563 | 0 | block0[2] ^= block1[2]; \ |
564 | 0 | block0[3] ^= block1[3]; \ |
565 | 0 | } |
566 | | |
567 | | /* Copy BLOCK_SRC to BLOCK_DST. */ |
568 | | #define BLOCK_COPY(block_dst, block_src) \ |
569 | 0 | { \ |
570 | 0 | block_dst[0] = block_src[0]; \ |
571 | 0 | block_dst[1] = block_src[1]; \ |
572 | 0 | block_dst[2] = block_src[2]; \ |
573 | 0 | block_dst[3] = block_src[3]; \ |
574 | 0 | } |
575 | | |
576 | | /* Apply SBOX number WHICH to to the block found in ARRAY0, writing |
577 | | the output to the block found in ARRAY1. */ |
578 | | #define SBOX(which, array0, array1) \ |
579 | 0 | SBOX##which (array0[0], array0[1], array0[2], array0[3], \ |
580 | 0 | array1[0], array1[1], array1[2], array1[3]); |
581 | | |
582 | | /* Apply inverse SBOX number WHICH to to the block found in ARRAY0, writing |
583 | | the output to the block found in ARRAY1. */ |
584 | | #define SBOX_INVERSE(which, array0, array1) \ |
585 | 0 | SBOX##which##_INVERSE (array0[0], array0[1], array0[2], array0[3], \ |
586 | 0 | array1[0], array1[1], array1[2], array1[3]); |
587 | | |
588 | | /* Apply the linear transformation to BLOCK. */ |
589 | | #define LINEAR_TRANSFORMATION(block) \ |
590 | 0 | { \ |
591 | 0 | block[0] = rol (block[0], 13); \ |
592 | 0 | block[2] = rol (block[2], 3); \ |
593 | 0 | block[1] = block[1] ^ block[0] ^ block[2]; \ |
594 | 0 | block[3] = block[3] ^ block[2] ^ (block[0] << 3); \ |
595 | 0 | block[1] = rol (block[1], 1); \ |
596 | 0 | block[3] = rol (block[3], 7); \ |
597 | 0 | block[0] = block[0] ^ block[1] ^ block[3]; \ |
598 | 0 | block[2] = block[2] ^ block[3] ^ (block[1] << 7); \ |
599 | 0 | block[0] = rol (block[0], 5); \ |
600 | 0 | block[2] = rol (block[2], 22); \ |
601 | 0 | } |
602 | | |
603 | | /* Apply the inverse linear transformation to BLOCK. */ |
604 | | #define LINEAR_TRANSFORMATION_INVERSE(block) \ |
605 | 0 | { \ |
606 | 0 | block[2] = ror (block[2], 22); \ |
607 | 0 | block[0] = ror (block[0] , 5); \ |
608 | 0 | block[2] = block[2] ^ block[3] ^ (block[1] << 7); \ |
609 | 0 | block[0] = block[0] ^ block[1] ^ block[3]; \ |
610 | 0 | block[3] = ror (block[3], 7); \ |
611 | 0 | block[1] = ror (block[1], 1); \ |
612 | 0 | block[3] = block[3] ^ block[2] ^ (block[0] << 3); \ |
613 | 0 | block[1] = block[1] ^ block[0] ^ block[2]; \ |
614 | 0 | block[2] = ror (block[2], 3); \ |
615 | 0 | block[0] = ror (block[0], 13); \ |
616 | 0 | } |
617 | | |
618 | | /* Apply a Serpent round to BLOCK, using the SBOX number WHICH and the |
619 | | subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage. |
620 | | This macro increments `round'. */ |
621 | | #define ROUND(which, subkeys, block, block_tmp) \ |
622 | 0 | { \ |
623 | 0 | BLOCK_XOR (block, subkeys[round]); \ |
624 | 0 | round++; \ |
625 | 0 | SBOX (which, block, block_tmp); \ |
626 | 0 | LINEAR_TRANSFORMATION (block_tmp); \ |
627 | 0 | BLOCK_COPY (block, block_tmp); \ |
628 | 0 | } |
629 | | |
630 | | /* Apply the last Serpent round to BLOCK, using the SBOX number WHICH |
631 | | and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary |
632 | | storage. The result will be stored in BLOCK_TMP. This macro |
633 | | increments `round'. */ |
634 | | #define ROUND_LAST(which, subkeys, block, block_tmp) \ |
635 | 0 | { \ |
636 | 0 | BLOCK_XOR (block, subkeys[round]); \ |
637 | 0 | round++; \ |
638 | 0 | SBOX (which, block, block_tmp); \ |
639 | 0 | BLOCK_XOR (block_tmp, subkeys[round]); \ |
640 | 0 | round++; \ |
641 | 0 | } |
642 | | |
643 | | /* Apply an inverse Serpent round to BLOCK, using the SBOX number |
644 | | WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as |
645 | | temporary storage. This macro increments `round'. */ |
646 | | #define ROUND_INVERSE(which, subkey, block, block_tmp) \ |
647 | 0 | { \ |
648 | 0 | LINEAR_TRANSFORMATION_INVERSE (block); \ |
649 | 0 | SBOX_INVERSE (which, block, block_tmp); \ |
650 | 0 | BLOCK_XOR (block_tmp, subkey[round]); \ |
651 | 0 | round--; \ |
652 | 0 | BLOCK_COPY (block, block_tmp); \ |
653 | 0 | } |
654 | | |
655 | | /* Apply the first Serpent round to BLOCK, using the SBOX number WHICH |
656 | | and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary |
657 | | storage. The result will be stored in BLOCK_TMP. This macro |
658 | | increments `round'. */ |
659 | | #define ROUND_FIRST_INVERSE(which, subkeys, block, block_tmp) \ |
660 | 0 | { \ |
661 | 0 | BLOCK_XOR (block, subkeys[round]); \ |
662 | 0 | round--; \ |
663 | 0 | SBOX_INVERSE (which, block, block_tmp); \ |
664 | 0 | BLOCK_XOR (block_tmp, subkeys[round]); \ |
665 | 0 | round--; \ |
666 | 0 | } |
667 | | |
668 | | /* Convert the user provided key KEY of KEY_LENGTH bytes into the |
669 | | internally used format. */ |
670 | | static void |
671 | | serpent_key_prepare (const byte *key, unsigned int key_length, |
672 | | serpent_key_t key_prepared) |
673 | 0 | { |
674 | 0 | int i; |
675 | | |
676 | | /* Copy key. */ |
677 | 0 | key_length /= 4; |
678 | 0 | for (i = 0; i < key_length; i++) |
679 | 0 | key_prepared[i] = buf_get_le32 (key + i * 4); |
680 | |
|
681 | 0 | if (i < 8) |
682 | 0 | { |
683 | | /* Key must be padded according to the Serpent |
684 | | specification. */ |
685 | 0 | key_prepared[i] = 0x00000001; |
686 | |
|
687 | 0 | for (i++; i < 8; i++) |
688 | 0 | key_prepared[i] = 0; |
689 | 0 | } |
690 | 0 | } |
691 | | |
692 | | /* Derive the 33 subkeys from KEY and store them in SUBKEYS. */ |
693 | | static void |
694 | | serpent_subkeys_generate (serpent_key_t key, serpent_subkeys_t subkeys) |
695 | 0 | { |
696 | 0 | u32 w[8]; /* The `prekey'. */ |
697 | 0 | u32 ws[4]; |
698 | 0 | u32 wt[4]; |
699 | | |
700 | | /* Initialize with key values. */ |
701 | 0 | w[0] = key[0]; |
702 | 0 | w[1] = key[1]; |
703 | 0 | w[2] = key[2]; |
704 | 0 | w[3] = key[3]; |
705 | 0 | w[4] = key[4]; |
706 | 0 | w[5] = key[5]; |
707 | 0 | w[6] = key[6]; |
708 | 0 | w[7] = key[7]; |
709 | | |
710 | | /* Expand to intermediate key using the affine recurrence. */ |
711 | 0 | #define EXPAND_KEY4(wo, r) \ |
712 | 0 | wo[0] = w[(r+0)%8] = \ |
713 | 0 | rol (w[(r+0)%8] ^ w[(r+3)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ PHI ^ (r+0), 11); \ |
714 | 0 | wo[1] = w[(r+1)%8] = \ |
715 | 0 | rol (w[(r+1)%8] ^ w[(r+4)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ PHI ^ (r+1), 11); \ |
716 | 0 | wo[2] = w[(r+2)%8] = \ |
717 | 0 | rol (w[(r+2)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ w[(r+1)%8] ^ PHI ^ (r+2), 11); \ |
718 | 0 | wo[3] = w[(r+3)%8] = \ |
719 | 0 | rol (w[(r+3)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ w[(r+2)%8] ^ PHI ^ (r+3), 11); |
720 | |
|
721 | 0 | #define EXPAND_KEY(r) \ |
722 | 0 | EXPAND_KEY4(ws, (r)); \ |
723 | 0 | EXPAND_KEY4(wt, (r + 4)); |
724 | | |
725 | | /* Calculate subkeys via S-Boxes, in bitslice mode. */ |
726 | 0 | EXPAND_KEY (0); SBOX (3, ws, subkeys[0]); SBOX (2, wt, subkeys[1]); |
727 | 0 | EXPAND_KEY (8); SBOX (1, ws, subkeys[2]); SBOX (0, wt, subkeys[3]); |
728 | 0 | EXPAND_KEY (16); SBOX (7, ws, subkeys[4]); SBOX (6, wt, subkeys[5]); |
729 | 0 | EXPAND_KEY (24); SBOX (5, ws, subkeys[6]); SBOX (4, wt, subkeys[7]); |
730 | 0 | EXPAND_KEY (32); SBOX (3, ws, subkeys[8]); SBOX (2, wt, subkeys[9]); |
731 | 0 | EXPAND_KEY (40); SBOX (1, ws, subkeys[10]); SBOX (0, wt, subkeys[11]); |
732 | 0 | EXPAND_KEY (48); SBOX (7, ws, subkeys[12]); SBOX (6, wt, subkeys[13]); |
733 | 0 | EXPAND_KEY (56); SBOX (5, ws, subkeys[14]); SBOX (4, wt, subkeys[15]); |
734 | 0 | EXPAND_KEY (64); SBOX (3, ws, subkeys[16]); SBOX (2, wt, subkeys[17]); |
735 | 0 | EXPAND_KEY (72); SBOX (1, ws, subkeys[18]); SBOX (0, wt, subkeys[19]); |
736 | 0 | EXPAND_KEY (80); SBOX (7, ws, subkeys[20]); SBOX (6, wt, subkeys[21]); |
737 | 0 | EXPAND_KEY (88); SBOX (5, ws, subkeys[22]); SBOX (4, wt, subkeys[23]); |
738 | 0 | EXPAND_KEY (96); SBOX (3, ws, subkeys[24]); SBOX (2, wt, subkeys[25]); |
739 | 0 | EXPAND_KEY (104); SBOX (1, ws, subkeys[26]); SBOX (0, wt, subkeys[27]); |
740 | 0 | EXPAND_KEY (112); SBOX (7, ws, subkeys[28]); SBOX (6, wt, subkeys[29]); |
741 | 0 | EXPAND_KEY (120); SBOX (5, ws, subkeys[30]); SBOX (4, wt, subkeys[31]); |
742 | 0 | EXPAND_KEY4 (ws, 128); SBOX (3, ws, subkeys[32]); |
743 | |
|
744 | 0 | wipememory (ws, sizeof (ws)); |
745 | 0 | wipememory (wt, sizeof (wt)); |
746 | 0 | wipememory (w, sizeof (w)); |
747 | 0 | } |
748 | | |
749 | | /* Initialize CONTEXT with the key KEY of KEY_LENGTH bits. */ |
750 | | static gcry_err_code_t |
751 | | serpent_setkey_internal (serpent_context_t *context, |
752 | | const byte *key, unsigned int key_length) |
753 | 0 | { |
754 | 0 | serpent_key_t key_prepared; |
755 | |
|
756 | 0 | if (key_length > 32) |
757 | 0 | return GPG_ERR_INV_KEYLEN; |
758 | | |
759 | 0 | serpent_key_prepare (key, key_length, key_prepared); |
760 | 0 | serpent_subkeys_generate (key_prepared, context->keys); |
761 | |
|
762 | 0 | #ifdef USE_AVX2 |
763 | 0 | context->use_avx2 = 0; |
764 | 0 | if ((_gcry_get_hw_features () & HWF_INTEL_AVX2)) |
765 | 0 | { |
766 | 0 | context->use_avx2 = 1; |
767 | 0 | } |
768 | 0 | #endif |
769 | |
|
770 | | #ifdef USE_NEON |
771 | | context->use_neon = 0; |
772 | | if ((_gcry_get_hw_features () & HWF_ARM_NEON)) |
773 | | { |
774 | | context->use_neon = 1; |
775 | | } |
776 | | #endif |
777 | |
|
778 | 0 | wipememory (key_prepared, sizeof(key_prepared)); |
779 | 0 | return 0; |
780 | 0 | } |
781 | | |
782 | | /* Initialize CTX with the key KEY of KEY_LENGTH bytes. */ |
783 | | static gcry_err_code_t |
784 | | serpent_setkey (void *ctx, |
785 | | const byte *key, unsigned int key_length, |
786 | | cipher_bulk_ops_t *bulk_ops) |
787 | 0 | { |
788 | 0 | serpent_context_t *context = ctx; |
789 | 0 | static const char *serpent_test_ret; |
790 | 0 | static int serpent_init_done; |
791 | 0 | gcry_err_code_t ret = GPG_ERR_NO_ERROR; |
792 | |
|
793 | 0 | if (! serpent_init_done) |
794 | 0 | { |
795 | | /* Execute a self-test the first time, Serpent is used. */ |
796 | 0 | serpent_init_done = 1; |
797 | 0 | serpent_test_ret = serpent_test (); |
798 | 0 | if (serpent_test_ret) |
799 | 0 | log_error ("Serpent test failure: %s\n", serpent_test_ret); |
800 | 0 | } |
801 | | |
802 | | /* Setup bulk encryption routines. */ |
803 | 0 | memset (bulk_ops, 0, sizeof(*bulk_ops)); |
804 | 0 | bulk_ops->cbc_dec = _gcry_serpent_cbc_dec; |
805 | 0 | bulk_ops->cfb_dec = _gcry_serpent_cfb_dec; |
806 | 0 | bulk_ops->ctr_enc = _gcry_serpent_ctr_enc; |
807 | 0 | bulk_ops->ocb_crypt = _gcry_serpent_ocb_crypt; |
808 | 0 | bulk_ops->ocb_auth = _gcry_serpent_ocb_auth; |
809 | 0 | bulk_ops->xts_crypt = _gcry_serpent_xts_crypt; |
810 | 0 | bulk_ops->ecb_crypt = _gcry_serpent_ecb_crypt; |
811 | |
|
812 | 0 | if (serpent_test_ret) |
813 | 0 | ret = GPG_ERR_SELFTEST_FAILED; |
814 | 0 | else |
815 | 0 | ret = serpent_setkey_internal (context, key, key_length); |
816 | |
|
817 | 0 | return ret; |
818 | 0 | } |
819 | | |
820 | | static void |
821 | | serpent_encrypt_internal (serpent_context_t *context, |
822 | | const byte *input, byte *output) |
823 | 0 | { |
824 | 0 | serpent_block_t b, b_next; |
825 | 0 | int round = 0; |
826 | |
|
827 | 0 | b[0] = buf_get_le32 (input + 0); |
828 | 0 | b[1] = buf_get_le32 (input + 4); |
829 | 0 | b[2] = buf_get_le32 (input + 8); |
830 | 0 | b[3] = buf_get_le32 (input + 12); |
831 | |
|
832 | 0 | ROUND (0, context->keys, b, b_next); |
833 | 0 | ROUND (1, context->keys, b, b_next); |
834 | 0 | ROUND (2, context->keys, b, b_next); |
835 | 0 | ROUND (3, context->keys, b, b_next); |
836 | 0 | ROUND (4, context->keys, b, b_next); |
837 | 0 | ROUND (5, context->keys, b, b_next); |
838 | 0 | ROUND (6, context->keys, b, b_next); |
839 | 0 | ROUND (7, context->keys, b, b_next); |
840 | 0 | ROUND (0, context->keys, b, b_next); |
841 | 0 | ROUND (1, context->keys, b, b_next); |
842 | 0 | ROUND (2, context->keys, b, b_next); |
843 | 0 | ROUND (3, context->keys, b, b_next); |
844 | 0 | ROUND (4, context->keys, b, b_next); |
845 | 0 | ROUND (5, context->keys, b, b_next); |
846 | 0 | ROUND (6, context->keys, b, b_next); |
847 | 0 | ROUND (7, context->keys, b, b_next); |
848 | 0 | ROUND (0, context->keys, b, b_next); |
849 | 0 | ROUND (1, context->keys, b, b_next); |
850 | 0 | ROUND (2, context->keys, b, b_next); |
851 | 0 | ROUND (3, context->keys, b, b_next); |
852 | 0 | ROUND (4, context->keys, b, b_next); |
853 | 0 | ROUND (5, context->keys, b, b_next); |
854 | 0 | ROUND (6, context->keys, b, b_next); |
855 | 0 | ROUND (7, context->keys, b, b_next); |
856 | 0 | ROUND (0, context->keys, b, b_next); |
857 | 0 | ROUND (1, context->keys, b, b_next); |
858 | 0 | ROUND (2, context->keys, b, b_next); |
859 | 0 | ROUND (3, context->keys, b, b_next); |
860 | 0 | ROUND (4, context->keys, b, b_next); |
861 | 0 | ROUND (5, context->keys, b, b_next); |
862 | 0 | ROUND (6, context->keys, b, b_next); |
863 | |
|
864 | 0 | ROUND_LAST (7, context->keys, b, b_next); |
865 | |
|
866 | 0 | buf_put_le32 (output + 0, b_next[0]); |
867 | 0 | buf_put_le32 (output + 4, b_next[1]); |
868 | 0 | buf_put_le32 (output + 8, b_next[2]); |
869 | 0 | buf_put_le32 (output + 12, b_next[3]); |
870 | 0 | } |
871 | | |
872 | | static void |
873 | | serpent_decrypt_internal (serpent_context_t *context, |
874 | | const byte *input, byte *output) |
875 | 0 | { |
876 | 0 | serpent_block_t b, b_next; |
877 | 0 | int round = ROUNDS; |
878 | |
|
879 | 0 | b_next[0] = buf_get_le32 (input + 0); |
880 | 0 | b_next[1] = buf_get_le32 (input + 4); |
881 | 0 | b_next[2] = buf_get_le32 (input + 8); |
882 | 0 | b_next[3] = buf_get_le32 (input + 12); |
883 | |
|
884 | 0 | ROUND_FIRST_INVERSE (7, context->keys, b_next, b); |
885 | |
|
886 | 0 | ROUND_INVERSE (6, context->keys, b, b_next); |
887 | 0 | ROUND_INVERSE (5, context->keys, b, b_next); |
888 | 0 | ROUND_INVERSE (4, context->keys, b, b_next); |
889 | 0 | ROUND_INVERSE (3, context->keys, b, b_next); |
890 | 0 | ROUND_INVERSE (2, context->keys, b, b_next); |
891 | 0 | ROUND_INVERSE (1, context->keys, b, b_next); |
892 | 0 | ROUND_INVERSE (0, context->keys, b, b_next); |
893 | 0 | ROUND_INVERSE (7, context->keys, b, b_next); |
894 | 0 | ROUND_INVERSE (6, context->keys, b, b_next); |
895 | 0 | ROUND_INVERSE (5, context->keys, b, b_next); |
896 | 0 | ROUND_INVERSE (4, context->keys, b, b_next); |
897 | 0 | ROUND_INVERSE (3, context->keys, b, b_next); |
898 | 0 | ROUND_INVERSE (2, context->keys, b, b_next); |
899 | 0 | ROUND_INVERSE (1, context->keys, b, b_next); |
900 | 0 | ROUND_INVERSE (0, context->keys, b, b_next); |
901 | 0 | ROUND_INVERSE (7, context->keys, b, b_next); |
902 | 0 | ROUND_INVERSE (6, context->keys, b, b_next); |
903 | 0 | ROUND_INVERSE (5, context->keys, b, b_next); |
904 | 0 | ROUND_INVERSE (4, context->keys, b, b_next); |
905 | 0 | ROUND_INVERSE (3, context->keys, b, b_next); |
906 | 0 | ROUND_INVERSE (2, context->keys, b, b_next); |
907 | 0 | ROUND_INVERSE (1, context->keys, b, b_next); |
908 | 0 | ROUND_INVERSE (0, context->keys, b, b_next); |
909 | 0 | ROUND_INVERSE (7, context->keys, b, b_next); |
910 | 0 | ROUND_INVERSE (6, context->keys, b, b_next); |
911 | 0 | ROUND_INVERSE (5, context->keys, b, b_next); |
912 | 0 | ROUND_INVERSE (4, context->keys, b, b_next); |
913 | 0 | ROUND_INVERSE (3, context->keys, b, b_next); |
914 | 0 | ROUND_INVERSE (2, context->keys, b, b_next); |
915 | 0 | ROUND_INVERSE (1, context->keys, b, b_next); |
916 | 0 | ROUND_INVERSE (0, context->keys, b, b_next); |
917 | |
|
918 | 0 | buf_put_le32 (output + 0, b_next[0]); |
919 | 0 | buf_put_le32 (output + 4, b_next[1]); |
920 | 0 | buf_put_le32 (output + 8, b_next[2]); |
921 | 0 | buf_put_le32 (output + 12, b_next[3]); |
922 | 0 | } |
923 | | |
924 | | static unsigned int |
925 | | serpent_encrypt (void *ctx, byte *buffer_out, const byte *buffer_in) |
926 | 0 | { |
927 | 0 | serpent_context_t *context = ctx; |
928 | |
|
929 | 0 | serpent_encrypt_internal (context, buffer_in, buffer_out); |
930 | 0 | return /*burn_stack*/ (2 * sizeof (serpent_block_t)); |
931 | 0 | } |
932 | | |
933 | | static unsigned int |
934 | | serpent_decrypt (void *ctx, byte *buffer_out, const byte *buffer_in) |
935 | 0 | { |
936 | 0 | serpent_context_t *context = ctx; |
937 | |
|
938 | 0 | serpent_decrypt_internal (context, buffer_in, buffer_out); |
939 | 0 | return /*burn_stack*/ (2 * sizeof (serpent_block_t)); |
940 | 0 | } |
941 | | |
942 | | |
943 | | |
944 | | /* Bulk encryption of complete blocks in CTR mode. This function is only |
945 | | intended for the bulk encryption feature of cipher.c. CTR is expected to be |
946 | | of size sizeof(serpent_block_t). */ |
947 | | static void |
948 | | _gcry_serpent_ctr_enc(void *context, unsigned char *ctr, |
949 | | void *outbuf_arg, const void *inbuf_arg, |
950 | | size_t nblocks) |
951 | 0 | { |
952 | 0 | serpent_context_t *ctx = context; |
953 | 0 | unsigned char *outbuf = outbuf_arg; |
954 | 0 | const unsigned char *inbuf = inbuf_arg; |
955 | 0 | unsigned char tmpbuf[sizeof(serpent_block_t)]; |
956 | 0 | int burn_stack_depth = 2 * sizeof (serpent_block_t); |
957 | |
|
958 | 0 | #ifdef USE_AVX2 |
959 | 0 | if (ctx->use_avx2) |
960 | 0 | { |
961 | 0 | int did_use_avx2 = 0; |
962 | | |
963 | | /* Process data in 16 block chunks. */ |
964 | 0 | while (nblocks >= 16) |
965 | 0 | { |
966 | 0 | _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr); |
967 | |
|
968 | 0 | nblocks -= 16; |
969 | 0 | outbuf += 16 * sizeof(serpent_block_t); |
970 | 0 | inbuf += 16 * sizeof(serpent_block_t); |
971 | 0 | did_use_avx2 = 1; |
972 | 0 | } |
973 | |
|
974 | 0 | if (did_use_avx2) |
975 | 0 | { |
976 | | /* serpent-avx2 assembly code does not use stack */ |
977 | 0 | if (nblocks == 0) |
978 | 0 | burn_stack_depth = 0; |
979 | 0 | } |
980 | | |
981 | | /* Use generic/sse2 code to handle smaller chunks... */ |
982 | | /* TODO: use caching instead? */ |
983 | 0 | } |
984 | 0 | #endif |
985 | |
|
986 | 0 | #ifdef USE_SSE2 |
987 | 0 | { |
988 | 0 | int did_use_sse2 = 0; |
989 | | |
990 | | /* Process data in 8 block chunks. */ |
991 | 0 | while (nblocks >= 8) |
992 | 0 | { |
993 | 0 | _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr); |
994 | |
|
995 | 0 | nblocks -= 8; |
996 | 0 | outbuf += 8 * sizeof(serpent_block_t); |
997 | 0 | inbuf += 8 * sizeof(serpent_block_t); |
998 | 0 | did_use_sse2 = 1; |
999 | 0 | } |
1000 | |
|
1001 | 0 | if (did_use_sse2) |
1002 | 0 | { |
1003 | | /* serpent-sse2 assembly code does not use stack */ |
1004 | 0 | if (nblocks == 0) |
1005 | 0 | burn_stack_depth = 0; |
1006 | 0 | } |
1007 | | |
1008 | | /* Use generic code to handle smaller chunks... */ |
1009 | | /* TODO: use caching instead? */ |
1010 | 0 | } |
1011 | 0 | #endif |
1012 | |
|
1013 | | #ifdef USE_NEON |
1014 | | if (ctx->use_neon) |
1015 | | { |
1016 | | int did_use_neon = 0; |
1017 | | |
1018 | | /* Process data in 8 block chunks. */ |
1019 | | while (nblocks >= 8) |
1020 | | { |
1021 | | _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr); |
1022 | | |
1023 | | nblocks -= 8; |
1024 | | outbuf += 8 * sizeof(serpent_block_t); |
1025 | | inbuf += 8 * sizeof(serpent_block_t); |
1026 | | did_use_neon = 1; |
1027 | | } |
1028 | | |
1029 | | if (did_use_neon) |
1030 | | { |
1031 | | /* serpent-neon assembly code does not use stack */ |
1032 | | if (nblocks == 0) |
1033 | | burn_stack_depth = 0; |
1034 | | } |
1035 | | |
1036 | | /* Use generic code to handle smaller chunks... */ |
1037 | | /* TODO: use caching instead? */ |
1038 | | } |
1039 | | #endif |
1040 | |
|
1041 | 0 | for ( ;nblocks; nblocks-- ) |
1042 | 0 | { |
1043 | | /* Encrypt the counter. */ |
1044 | 0 | serpent_encrypt_internal(ctx, ctr, tmpbuf); |
1045 | | /* XOR the input with the encrypted counter and store in output. */ |
1046 | 0 | cipher_block_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t)); |
1047 | 0 | outbuf += sizeof(serpent_block_t); |
1048 | 0 | inbuf += sizeof(serpent_block_t); |
1049 | | /* Increment the counter. */ |
1050 | 0 | cipher_block_add(ctr, 1, sizeof(serpent_block_t)); |
1051 | 0 | } |
1052 | |
|
1053 | 0 | wipememory(tmpbuf, sizeof(tmpbuf)); |
1054 | 0 | _gcry_burn_stack(burn_stack_depth); |
1055 | 0 | } |
1056 | | |
1057 | | /* Bulk decryption of complete blocks in CBC mode. This function is only |
1058 | | intended for the bulk encryption feature of cipher.c. */ |
1059 | | static void |
1060 | | _gcry_serpent_cbc_dec(void *context, unsigned char *iv, |
1061 | | void *outbuf_arg, const void *inbuf_arg, |
1062 | | size_t nblocks) |
1063 | 0 | { |
1064 | 0 | serpent_context_t *ctx = context; |
1065 | 0 | unsigned char *outbuf = outbuf_arg; |
1066 | 0 | const unsigned char *inbuf = inbuf_arg; |
1067 | 0 | unsigned char savebuf[sizeof(serpent_block_t)]; |
1068 | 0 | int burn_stack_depth = 2 * sizeof (serpent_block_t); |
1069 | |
|
1070 | 0 | #ifdef USE_AVX2 |
1071 | 0 | if (ctx->use_avx2) |
1072 | 0 | { |
1073 | 0 | int did_use_avx2 = 0; |
1074 | | |
1075 | | /* Process data in 16 block chunks. */ |
1076 | 0 | while (nblocks >= 16) |
1077 | 0 | { |
1078 | 0 | _gcry_serpent_avx2_cbc_dec(ctx, outbuf, inbuf, iv); |
1079 | |
|
1080 | 0 | nblocks -= 16; |
1081 | 0 | outbuf += 16 * sizeof(serpent_block_t); |
1082 | 0 | inbuf += 16 * sizeof(serpent_block_t); |
1083 | 0 | did_use_avx2 = 1; |
1084 | 0 | } |
1085 | |
|
1086 | 0 | if (did_use_avx2) |
1087 | 0 | { |
1088 | | /* serpent-avx2 assembly code does not use stack */ |
1089 | 0 | if (nblocks == 0) |
1090 | 0 | burn_stack_depth = 0; |
1091 | 0 | } |
1092 | | |
1093 | | /* Use generic/sse2 code to handle smaller chunks... */ |
1094 | 0 | } |
1095 | 0 | #endif |
1096 | |
|
1097 | 0 | #ifdef USE_SSE2 |
1098 | 0 | { |
1099 | 0 | int did_use_sse2 = 0; |
1100 | | |
1101 | | /* Process data in 8 block chunks. */ |
1102 | 0 | while (nblocks >= 8) |
1103 | 0 | { |
1104 | 0 | _gcry_serpent_sse2_cbc_dec(ctx, outbuf, inbuf, iv); |
1105 | |
|
1106 | 0 | nblocks -= 8; |
1107 | 0 | outbuf += 8 * sizeof(serpent_block_t); |
1108 | 0 | inbuf += 8 * sizeof(serpent_block_t); |
1109 | 0 | did_use_sse2 = 1; |
1110 | 0 | } |
1111 | |
|
1112 | 0 | if (did_use_sse2) |
1113 | 0 | { |
1114 | | /* serpent-sse2 assembly code does not use stack */ |
1115 | 0 | if (nblocks == 0) |
1116 | 0 | burn_stack_depth = 0; |
1117 | 0 | } |
1118 | | |
1119 | | /* Use generic code to handle smaller chunks... */ |
1120 | 0 | } |
1121 | 0 | #endif |
1122 | |
|
1123 | | #ifdef USE_NEON |
1124 | | if (ctx->use_neon) |
1125 | | { |
1126 | | int did_use_neon = 0; |
1127 | | |
1128 | | /* Process data in 8 block chunks. */ |
1129 | | while (nblocks >= 8) |
1130 | | { |
1131 | | _gcry_serpent_neon_cbc_dec(ctx, outbuf, inbuf, iv); |
1132 | | |
1133 | | nblocks -= 8; |
1134 | | outbuf += 8 * sizeof(serpent_block_t); |
1135 | | inbuf += 8 * sizeof(serpent_block_t); |
1136 | | did_use_neon = 1; |
1137 | | } |
1138 | | |
1139 | | if (did_use_neon) |
1140 | | { |
1141 | | /* serpent-neon assembly code does not use stack */ |
1142 | | if (nblocks == 0) |
1143 | | burn_stack_depth = 0; |
1144 | | } |
1145 | | |
1146 | | /* Use generic code to handle smaller chunks... */ |
1147 | | } |
1148 | | #endif |
1149 | |
|
1150 | 0 | for ( ;nblocks; nblocks-- ) |
1151 | 0 | { |
1152 | | /* INBUF is needed later and it may be identical to OUTBUF, so store |
1153 | | the intermediate result to SAVEBUF. */ |
1154 | 0 | serpent_decrypt_internal (ctx, inbuf, savebuf); |
1155 | |
|
1156 | 0 | cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, |
1157 | 0 | sizeof(serpent_block_t)); |
1158 | 0 | inbuf += sizeof(serpent_block_t); |
1159 | 0 | outbuf += sizeof(serpent_block_t); |
1160 | 0 | } |
1161 | |
|
1162 | 0 | wipememory(savebuf, sizeof(savebuf)); |
1163 | 0 | _gcry_burn_stack(burn_stack_depth); |
1164 | 0 | } |
1165 | | |
1166 | | /* Bulk decryption of complete blocks in CFB mode. This function is only |
1167 | | intended for the bulk encryption feature of cipher.c. */ |
1168 | | static void |
1169 | | _gcry_serpent_cfb_dec(void *context, unsigned char *iv, |
1170 | | void *outbuf_arg, const void *inbuf_arg, |
1171 | | size_t nblocks) |
1172 | 0 | { |
1173 | 0 | serpent_context_t *ctx = context; |
1174 | 0 | unsigned char *outbuf = outbuf_arg; |
1175 | 0 | const unsigned char *inbuf = inbuf_arg; |
1176 | 0 | int burn_stack_depth = 2 * sizeof (serpent_block_t); |
1177 | |
|
1178 | 0 | #ifdef USE_AVX2 |
1179 | 0 | if (ctx->use_avx2) |
1180 | 0 | { |
1181 | 0 | int did_use_avx2 = 0; |
1182 | | |
1183 | | /* Process data in 16 block chunks. */ |
1184 | 0 | while (nblocks >= 16) |
1185 | 0 | { |
1186 | 0 | _gcry_serpent_avx2_cfb_dec(ctx, outbuf, inbuf, iv); |
1187 | |
|
1188 | 0 | nblocks -= 16; |
1189 | 0 | outbuf += 16 * sizeof(serpent_block_t); |
1190 | 0 | inbuf += 16 * sizeof(serpent_block_t); |
1191 | 0 | did_use_avx2 = 1; |
1192 | 0 | } |
1193 | |
|
1194 | 0 | if (did_use_avx2) |
1195 | 0 | { |
1196 | | /* serpent-avx2 assembly code does not use stack */ |
1197 | 0 | if (nblocks == 0) |
1198 | 0 | burn_stack_depth = 0; |
1199 | 0 | } |
1200 | | |
1201 | | /* Use generic/sse2 code to handle smaller chunks... */ |
1202 | 0 | } |
1203 | 0 | #endif |
1204 | |
|
1205 | 0 | #ifdef USE_SSE2 |
1206 | 0 | { |
1207 | 0 | int did_use_sse2 = 0; |
1208 | | |
1209 | | /* Process data in 8 block chunks. */ |
1210 | 0 | while (nblocks >= 8) |
1211 | 0 | { |
1212 | 0 | _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv); |
1213 | |
|
1214 | 0 | nblocks -= 8; |
1215 | 0 | outbuf += 8 * sizeof(serpent_block_t); |
1216 | 0 | inbuf += 8 * sizeof(serpent_block_t); |
1217 | 0 | did_use_sse2 = 1; |
1218 | 0 | } |
1219 | |
|
1220 | 0 | if (did_use_sse2) |
1221 | 0 | { |
1222 | | /* serpent-sse2 assembly code does not use stack */ |
1223 | 0 | if (nblocks == 0) |
1224 | 0 | burn_stack_depth = 0; |
1225 | 0 | } |
1226 | | |
1227 | | /* Use generic code to handle smaller chunks... */ |
1228 | 0 | } |
1229 | 0 | #endif |
1230 | |
|
1231 | | #ifdef USE_NEON |
1232 | | if (ctx->use_neon) |
1233 | | { |
1234 | | int did_use_neon = 0; |
1235 | | |
1236 | | /* Process data in 8 block chunks. */ |
1237 | | while (nblocks >= 8) |
1238 | | { |
1239 | | _gcry_serpent_neon_cfb_dec(ctx, outbuf, inbuf, iv); |
1240 | | |
1241 | | nblocks -= 8; |
1242 | | outbuf += 8 * sizeof(serpent_block_t); |
1243 | | inbuf += 8 * sizeof(serpent_block_t); |
1244 | | did_use_neon = 1; |
1245 | | } |
1246 | | |
1247 | | if (did_use_neon) |
1248 | | { |
1249 | | /* serpent-neon assembly code does not use stack */ |
1250 | | if (nblocks == 0) |
1251 | | burn_stack_depth = 0; |
1252 | | } |
1253 | | |
1254 | | /* Use generic code to handle smaller chunks... */ |
1255 | | } |
1256 | | #endif |
1257 | |
|
1258 | 0 | for ( ;nblocks; nblocks-- ) |
1259 | 0 | { |
1260 | 0 | serpent_encrypt_internal(ctx, iv, iv); |
1261 | 0 | cipher_block_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t)); |
1262 | 0 | outbuf += sizeof(serpent_block_t); |
1263 | 0 | inbuf += sizeof(serpent_block_t); |
1264 | 0 | } |
1265 | |
|
1266 | 0 | _gcry_burn_stack(burn_stack_depth); |
1267 | 0 | } |
1268 | | |
1269 | | /* Bulk encryption/decryption of complete blocks in OCB mode. */ |
1270 | | static size_t |
1271 | | _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, |
1272 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1273 | 0 | { |
1274 | 0 | #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) |
1275 | 0 | serpent_context_t *ctx = (void *)&c->context.c; |
1276 | 0 | unsigned char *outbuf = outbuf_arg; |
1277 | 0 | const unsigned char *inbuf = inbuf_arg; |
1278 | 0 | int burn_stack_depth = 2 * sizeof (serpent_block_t); |
1279 | 0 | u64 blkn = c->u_mode.ocb.data_nblocks; |
1280 | | #else |
1281 | | (void)c; |
1282 | | (void)outbuf_arg; |
1283 | | (void)inbuf_arg; |
1284 | | (void)encrypt; |
1285 | | #endif |
1286 | |
|
1287 | 0 | #ifdef USE_AVX2 |
1288 | 0 | if (ctx->use_avx2) |
1289 | 0 | { |
1290 | 0 | int did_use_avx2 = 0; |
1291 | 0 | u64 Ls[16]; |
1292 | 0 | u64 *l; |
1293 | |
|
1294 | 0 | if (nblocks >= 16) |
1295 | 0 | { |
1296 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1297 | | |
1298 | | /* Process data in 16 block chunks. */ |
1299 | 0 | while (nblocks >= 16) |
1300 | 0 | { |
1301 | 0 | blkn += 16; |
1302 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1303 | |
|
1304 | 0 | if (encrypt) |
1305 | 0 | _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, |
1306 | 0 | c->u_ctr.ctr, Ls); |
1307 | 0 | else |
1308 | 0 | _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, |
1309 | 0 | c->u_ctr.ctr, Ls); |
1310 | |
|
1311 | 0 | nblocks -= 16; |
1312 | 0 | outbuf += 16 * sizeof(serpent_block_t); |
1313 | 0 | inbuf += 16 * sizeof(serpent_block_t); |
1314 | 0 | did_use_avx2 = 1; |
1315 | 0 | } |
1316 | 0 | } |
1317 | |
|
1318 | 0 | if (did_use_avx2) |
1319 | 0 | { |
1320 | | /* serpent-avx2 assembly code does not use stack */ |
1321 | 0 | if (nblocks == 0) |
1322 | 0 | burn_stack_depth = 0; |
1323 | 0 | } |
1324 | | |
1325 | | /* Use generic code to handle smaller chunks... */ |
1326 | 0 | } |
1327 | 0 | #endif |
1328 | |
|
1329 | 0 | #ifdef USE_SSE2 |
1330 | 0 | { |
1331 | 0 | int did_use_sse2 = 0; |
1332 | 0 | u64 Ls[8]; |
1333 | 0 | u64 *l; |
1334 | |
|
1335 | 0 | if (nblocks >= 8) |
1336 | 0 | { |
1337 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); |
1338 | | |
1339 | | /* Process data in 8 block chunks. */ |
1340 | 0 | while (nblocks >= 8) |
1341 | 0 | { |
1342 | 0 | blkn += 8; |
1343 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); |
1344 | |
|
1345 | 0 | if (encrypt) |
1346 | 0 | _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, |
1347 | 0 | c->u_ctr.ctr, Ls); |
1348 | 0 | else |
1349 | 0 | _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, |
1350 | 0 | c->u_ctr.ctr, Ls); |
1351 | |
|
1352 | 0 | nblocks -= 8; |
1353 | 0 | outbuf += 8 * sizeof(serpent_block_t); |
1354 | 0 | inbuf += 8 * sizeof(serpent_block_t); |
1355 | 0 | did_use_sse2 = 1; |
1356 | 0 | } |
1357 | 0 | } |
1358 | |
|
1359 | 0 | if (did_use_sse2) |
1360 | 0 | { |
1361 | | /* serpent-sse2 assembly code does not use stack */ |
1362 | 0 | if (nblocks == 0) |
1363 | 0 | burn_stack_depth = 0; |
1364 | 0 | } |
1365 | | |
1366 | | /* Use generic code to handle smaller chunks... */ |
1367 | 0 | } |
1368 | 0 | #endif |
1369 | |
|
1370 | | #ifdef USE_NEON |
1371 | | if (ctx->use_neon) |
1372 | | { |
1373 | | int did_use_neon = 0; |
1374 | | uintptr_t Ls[8]; |
1375 | | uintptr_t *l; |
1376 | | |
1377 | | if (nblocks >= 8) |
1378 | | { |
1379 | | l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); |
1380 | | |
1381 | | /* Process data in 8 block chunks. */ |
1382 | | while (nblocks >= 8) |
1383 | | { |
1384 | | blkn += 8; |
1385 | | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); |
1386 | | |
1387 | | if (encrypt) |
1388 | | _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, |
1389 | | c->u_ctr.ctr, (const void **)Ls); |
1390 | | else |
1391 | | _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, |
1392 | | c->u_ctr.ctr, (const void **)Ls); |
1393 | | |
1394 | | nblocks -= 8; |
1395 | | outbuf += 8 * sizeof(serpent_block_t); |
1396 | | inbuf += 8 * sizeof(serpent_block_t); |
1397 | | did_use_neon = 1; |
1398 | | } |
1399 | | } |
1400 | | |
1401 | | if (did_use_neon) |
1402 | | { |
1403 | | /* serpent-neon assembly code does not use stack */ |
1404 | | if (nblocks == 0) |
1405 | | burn_stack_depth = 0; |
1406 | | } |
1407 | | |
1408 | | /* Use generic code to handle smaller chunks... */ |
1409 | | } |
1410 | | #endif |
1411 | |
|
1412 | 0 | #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) |
1413 | 0 | c->u_mode.ocb.data_nblocks = blkn; |
1414 | |
|
1415 | 0 | if (burn_stack_depth) |
1416 | 0 | _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); |
1417 | 0 | #endif |
1418 | |
|
1419 | 0 | return nblocks; |
1420 | 0 | } |
1421 | | |
1422 | | /* Bulk authentication of complete blocks in OCB mode. */ |
1423 | | static size_t |
1424 | | _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, |
1425 | | size_t nblocks) |
1426 | 0 | { |
1427 | 0 | #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) |
1428 | 0 | serpent_context_t *ctx = (void *)&c->context.c; |
1429 | 0 | const unsigned char *abuf = abuf_arg; |
1430 | 0 | int burn_stack_depth = 2 * sizeof(serpent_block_t); |
1431 | 0 | u64 blkn = c->u_mode.ocb.aad_nblocks; |
1432 | | #else |
1433 | | (void)c; |
1434 | | (void)abuf_arg; |
1435 | | #endif |
1436 | |
|
1437 | 0 | #ifdef USE_AVX2 |
1438 | 0 | if (ctx->use_avx2) |
1439 | 0 | { |
1440 | 0 | int did_use_avx2 = 0; |
1441 | 0 | u64 Ls[16]; |
1442 | 0 | u64 *l; |
1443 | |
|
1444 | 0 | if (nblocks >= 16) |
1445 | 0 | { |
1446 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1447 | | |
1448 | | /* Process data in 16 block chunks. */ |
1449 | 0 | while (nblocks >= 16) |
1450 | 0 | { |
1451 | 0 | blkn += 16; |
1452 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1453 | |
|
1454 | 0 | _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, |
1455 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1456 | |
|
1457 | 0 | nblocks -= 16; |
1458 | 0 | abuf += 16 * sizeof(serpent_block_t); |
1459 | 0 | did_use_avx2 = 1; |
1460 | 0 | } |
1461 | 0 | } |
1462 | |
|
1463 | 0 | if (did_use_avx2) |
1464 | 0 | { |
1465 | | /* serpent-avx2 assembly code does not use stack */ |
1466 | 0 | if (nblocks == 0) |
1467 | 0 | burn_stack_depth = 0; |
1468 | 0 | } |
1469 | | |
1470 | | /* Use generic code to handle smaller chunks... */ |
1471 | 0 | } |
1472 | 0 | #endif |
1473 | |
|
1474 | 0 | #ifdef USE_SSE2 |
1475 | 0 | { |
1476 | 0 | int did_use_sse2 = 0; |
1477 | 0 | u64 Ls[8]; |
1478 | 0 | u64 *l; |
1479 | |
|
1480 | 0 | if (nblocks >= 8) |
1481 | 0 | { |
1482 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); |
1483 | | |
1484 | | /* Process data in 8 block chunks. */ |
1485 | 0 | while (nblocks >= 8) |
1486 | 0 | { |
1487 | 0 | blkn += 8; |
1488 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); |
1489 | |
|
1490 | 0 | _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, |
1491 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1492 | |
|
1493 | 0 | nblocks -= 8; |
1494 | 0 | abuf += 8 * sizeof(serpent_block_t); |
1495 | 0 | did_use_sse2 = 1; |
1496 | 0 | } |
1497 | 0 | } |
1498 | |
|
1499 | 0 | if (did_use_sse2) |
1500 | 0 | { |
1501 | | /* serpent-avx2 assembly code does not use stack */ |
1502 | 0 | if (nblocks == 0) |
1503 | 0 | burn_stack_depth = 0; |
1504 | 0 | } |
1505 | | |
1506 | | /* Use generic code to handle smaller chunks... */ |
1507 | 0 | } |
1508 | 0 | #endif |
1509 | |
|
1510 | | #ifdef USE_NEON |
1511 | | if (ctx->use_neon) |
1512 | | { |
1513 | | int did_use_neon = 0; |
1514 | | uintptr_t Ls[8]; |
1515 | | uintptr_t *l; |
1516 | | |
1517 | | if (nblocks >= 8) |
1518 | | { |
1519 | | l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); |
1520 | | |
1521 | | /* Process data in 8 block chunks. */ |
1522 | | while (nblocks >= 8) |
1523 | | { |
1524 | | blkn += 8; |
1525 | | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); |
1526 | | |
1527 | | _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, |
1528 | | c->u_mode.ocb.aad_sum, |
1529 | | (const void **)Ls); |
1530 | | |
1531 | | nblocks -= 8; |
1532 | | abuf += 8 * sizeof(serpent_block_t); |
1533 | | did_use_neon = 1; |
1534 | | } |
1535 | | } |
1536 | | |
1537 | | if (did_use_neon) |
1538 | | { |
1539 | | /* serpent-neon assembly code does not use stack */ |
1540 | | if (nblocks == 0) |
1541 | | burn_stack_depth = 0; |
1542 | | } |
1543 | | |
1544 | | /* Use generic code to handle smaller chunks... */ |
1545 | | } |
1546 | | #endif |
1547 | |
|
1548 | 0 | #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) |
1549 | 0 | c->u_mode.ocb.aad_nblocks = blkn; |
1550 | |
|
1551 | 0 | if (burn_stack_depth) |
1552 | 0 | _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); |
1553 | 0 | #endif |
1554 | |
|
1555 | 0 | return nblocks; |
1556 | 0 | } |
1557 | | |
1558 | | |
1559 | | static unsigned int |
1560 | | serpent_crypt_blk1_16(const void *context, byte *out, const byte *in, |
1561 | | unsigned int num_blks, int encrypt) |
1562 | 0 | { |
1563 | 0 | const serpent_context_t *ctx = context; |
1564 | 0 | unsigned int burn, burn_stack_depth = 0; |
1565 | |
|
1566 | 0 | #ifdef USE_AVX2 |
1567 | 0 | if (num_blks == 16 && ctx->use_avx2) |
1568 | 0 | { |
1569 | 0 | _gcry_serpent_avx2_blk16 (ctx, out, in, encrypt); |
1570 | 0 | return 0; |
1571 | 0 | } |
1572 | 0 | #endif |
1573 | | |
1574 | 0 | #ifdef USE_SSE2 |
1575 | 0 | while (num_blks >= 8) |
1576 | 0 | { |
1577 | 0 | _gcry_serpent_sse2_blk8 (ctx, out, in, encrypt); |
1578 | 0 | out += 8 * sizeof(serpent_block_t); |
1579 | 0 | in += 8 * sizeof(serpent_block_t); |
1580 | 0 | num_blks -= 8; |
1581 | 0 | } |
1582 | 0 | #endif |
1583 | |
|
1584 | | #ifdef USE_NEON |
1585 | | if (ctx->use_neon) |
1586 | | { |
1587 | | while (num_blks >= 8) |
1588 | | { |
1589 | | _gcry_serpent_neon_blk8 (ctx, out, in, encrypt); |
1590 | | out += 8 * sizeof(serpent_block_t); |
1591 | | in += 8 * sizeof(serpent_block_t); |
1592 | | num_blks -= 8; |
1593 | | } |
1594 | | } |
1595 | | #endif |
1596 | |
|
1597 | 0 | while (num_blks >= 1) |
1598 | 0 | { |
1599 | 0 | if (encrypt) |
1600 | 0 | serpent_encrypt_internal((void *)ctx, in, out); |
1601 | 0 | else |
1602 | 0 | serpent_decrypt_internal((void *)ctx, in, out); |
1603 | |
|
1604 | 0 | burn = 2 * sizeof(serpent_block_t); |
1605 | 0 | burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth; |
1606 | 0 | out += sizeof(serpent_block_t); |
1607 | 0 | in += sizeof(serpent_block_t); |
1608 | 0 | num_blks--; |
1609 | 0 | } |
1610 | |
|
1611 | 0 | return burn_stack_depth; |
1612 | 0 | } |
1613 | | |
1614 | | static unsigned int |
1615 | | serpent_encrypt_blk1_16(const void *ctx, byte *out, const byte *in, |
1616 | | unsigned int num_blks) |
1617 | 0 | { |
1618 | 0 | return serpent_crypt_blk1_16 (ctx, out, in, num_blks, 1); |
1619 | 0 | } |
1620 | | |
1621 | | static unsigned int |
1622 | | serpent_decrypt_blk1_16(const void *ctx, byte *out, const byte *in, |
1623 | | unsigned int num_blks) |
1624 | 0 | { |
1625 | 0 | return serpent_crypt_blk1_16 (ctx, out, in, num_blks, 0); |
1626 | 0 | } |
1627 | | |
1628 | | |
1629 | | /* Bulk encryption/decryption of complete blocks in XTS mode. */ |
1630 | | static void |
1631 | | _gcry_serpent_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, |
1632 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1633 | 0 | { |
1634 | 0 | serpent_context_t *ctx = context; |
1635 | 0 | unsigned char *outbuf = outbuf_arg; |
1636 | 0 | const unsigned char *inbuf = inbuf_arg; |
1637 | 0 | int burn_stack_depth = 0; |
1638 | | |
1639 | | /* Process remaining blocks. */ |
1640 | 0 | if (nblocks) |
1641 | 0 | { |
1642 | 0 | unsigned char tmpbuf[16 * 16]; |
1643 | 0 | unsigned int tmp_used = 16; |
1644 | 0 | size_t nburn; |
1645 | |
|
1646 | 0 | nburn = bulk_xts_crypt_128(ctx, encrypt ? serpent_encrypt_blk1_16 |
1647 | 0 | : serpent_decrypt_blk1_16, |
1648 | 0 | outbuf, inbuf, nblocks, |
1649 | 0 | tweak, tmpbuf, sizeof(tmpbuf) / 16, |
1650 | 0 | &tmp_used); |
1651 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1652 | |
|
1653 | 0 | wipememory(tmpbuf, tmp_used); |
1654 | 0 | } |
1655 | |
|
1656 | 0 | if (burn_stack_depth) |
1657 | 0 | _gcry_burn_stack(burn_stack_depth); |
1658 | 0 | } |
1659 | | |
1660 | | |
1661 | | /* Bulk encryption/decryption in ECB mode. */ |
1662 | | static void |
1663 | | _gcry_serpent_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, |
1664 | | size_t nblocks, int encrypt) |
1665 | 0 | { |
1666 | 0 | serpent_context_t *ctx = context; |
1667 | 0 | unsigned char *outbuf = outbuf_arg; |
1668 | 0 | const unsigned char *inbuf = inbuf_arg; |
1669 | 0 | int burn_stack_depth = 0; |
1670 | | |
1671 | | /* Process remaining blocks. */ |
1672 | 0 | if (nblocks) |
1673 | 0 | { |
1674 | 0 | size_t nburn; |
1675 | |
|
1676 | 0 | nburn = bulk_ecb_crypt_128(ctx, encrypt ? serpent_encrypt_blk1_16 |
1677 | 0 | : serpent_decrypt_blk1_16, |
1678 | 0 | outbuf, inbuf, nblocks, 16); |
1679 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1680 | 0 | } |
1681 | |
|
1682 | 0 | if (burn_stack_depth) |
1683 | 0 | _gcry_burn_stack(burn_stack_depth); |
1684 | 0 | } |
1685 | | |
1686 | | |
1687 | | |
1688 | | /* Serpent test. */ |
1689 | | |
1690 | | static const char * |
1691 | | serpent_test (void) |
1692 | 0 | { |
1693 | 0 | serpent_context_t context; |
1694 | 0 | unsigned char scratch[16]; |
1695 | 0 | unsigned int i; |
1696 | |
|
1697 | 0 | static struct test |
1698 | 0 | { |
1699 | 0 | int key_length; |
1700 | 0 | unsigned char key[32]; |
1701 | 0 | unsigned char text_plain[16]; |
1702 | 0 | unsigned char text_cipher[16]; |
1703 | 0 | } test_data[] = |
1704 | 0 | { |
1705 | 0 | { |
1706 | 0 | 16, |
1707 | 0 | "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", |
1708 | 0 | "\xD2\x9D\x57\x6F\xCE\xA3\xA3\xA7\xED\x90\x99\xF2\x92\x73\xD7\x8E", |
1709 | 0 | "\xB2\x28\x8B\x96\x8A\xE8\xB0\x86\x48\xD1\xCE\x96\x06\xFD\x99\x2D" |
1710 | 0 | }, |
1711 | 0 | { |
1712 | 0 | 24, |
1713 | 0 | "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" |
1714 | 0 | "\x00\x00\x00\x00\x00\x00\x00\x00", |
1715 | 0 | "\xD2\x9D\x57\x6F\xCE\xAB\xA3\xA7\xED\x98\x99\xF2\x92\x7B\xD7\x8E", |
1716 | 0 | "\x13\x0E\x35\x3E\x10\x37\xC2\x24\x05\xE8\xFA\xEF\xB2\xC3\xC3\xE9" |
1717 | 0 | }, |
1718 | 0 | { |
1719 | 0 | 32, |
1720 | 0 | "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" |
1721 | 0 | "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", |
1722 | 0 | "\xD0\x95\x57\x6F\xCE\xA3\xE3\xA7\xED\x98\xD9\xF2\x90\x73\xD7\x8E", |
1723 | 0 | "\xB9\x0E\xE5\x86\x2D\xE6\x91\x68\xF2\xBD\xD5\x12\x5B\x45\x47\x2B" |
1724 | 0 | }, |
1725 | 0 | { |
1726 | 0 | 32, |
1727 | 0 | "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" |
1728 | 0 | "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", |
1729 | 0 | "\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00", |
1730 | 0 | "\x20\x61\xA4\x27\x82\xBD\x52\xEC\x69\x1E\xC3\x83\xB0\x3B\xA7\x7C" |
1731 | 0 | }, |
1732 | 0 | { |
1733 | 0 | 0 |
1734 | 0 | }, |
1735 | 0 | }; |
1736 | |
|
1737 | 0 | for (i = 0; test_data[i].key_length; i++) |
1738 | 0 | { |
1739 | 0 | serpent_setkey_internal (&context, test_data[i].key, |
1740 | 0 | test_data[i].key_length); |
1741 | 0 | serpent_encrypt_internal (&context, test_data[i].text_plain, scratch); |
1742 | |
|
1743 | 0 | if (memcmp (scratch, test_data[i].text_cipher, sizeof (serpent_block_t))) |
1744 | 0 | switch (test_data[i].key_length) |
1745 | 0 | { |
1746 | 0 | case 16: |
1747 | 0 | return "Serpent-128 test encryption failed."; |
1748 | 0 | case 24: |
1749 | 0 | return "Serpent-192 test encryption failed."; |
1750 | 0 | case 32: |
1751 | 0 | return "Serpent-256 test encryption failed."; |
1752 | 0 | } |
1753 | | |
1754 | 0 | serpent_decrypt_internal (&context, test_data[i].text_cipher, scratch); |
1755 | 0 | if (memcmp (scratch, test_data[i].text_plain, sizeof (serpent_block_t))) |
1756 | 0 | switch (test_data[i].key_length) |
1757 | 0 | { |
1758 | 0 | case 16: |
1759 | 0 | return "Serpent-128 test decryption failed."; |
1760 | 0 | case 24: |
1761 | 0 | return "Serpent-192 test decryption failed."; |
1762 | 0 | case 32: |
1763 | 0 | return "Serpent-256 test decryption failed."; |
1764 | 0 | } |
1765 | 0 | } |
1766 | | |
1767 | 0 | return NULL; |
1768 | 0 | } |
1769 | | |
1770 | | |
1771 | | static const gcry_cipher_oid_spec_t serpent128_oids[] = |
1772 | | { |
1773 | | {"1.3.6.1.4.1.11591.13.2.1", GCRY_CIPHER_MODE_ECB }, |
1774 | | {"1.3.6.1.4.1.11591.13.2.2", GCRY_CIPHER_MODE_CBC }, |
1775 | | {"1.3.6.1.4.1.11591.13.2.3", GCRY_CIPHER_MODE_OFB }, |
1776 | | {"1.3.6.1.4.1.11591.13.2.4", GCRY_CIPHER_MODE_CFB }, |
1777 | | { NULL } |
1778 | | }; |
1779 | | |
1780 | | static const gcry_cipher_oid_spec_t serpent192_oids[] = |
1781 | | { |
1782 | | {"1.3.6.1.4.1.11591.13.2.21", GCRY_CIPHER_MODE_ECB }, |
1783 | | {"1.3.6.1.4.1.11591.13.2.22", GCRY_CIPHER_MODE_CBC }, |
1784 | | {"1.3.6.1.4.1.11591.13.2.23", GCRY_CIPHER_MODE_OFB }, |
1785 | | {"1.3.6.1.4.1.11591.13.2.24", GCRY_CIPHER_MODE_CFB }, |
1786 | | { NULL } |
1787 | | }; |
1788 | | |
1789 | | static const gcry_cipher_oid_spec_t serpent256_oids[] = |
1790 | | { |
1791 | | {"1.3.6.1.4.1.11591.13.2.41", GCRY_CIPHER_MODE_ECB }, |
1792 | | {"1.3.6.1.4.1.11591.13.2.42", GCRY_CIPHER_MODE_CBC }, |
1793 | | {"1.3.6.1.4.1.11591.13.2.43", GCRY_CIPHER_MODE_OFB }, |
1794 | | {"1.3.6.1.4.1.11591.13.2.44", GCRY_CIPHER_MODE_CFB }, |
1795 | | { NULL } |
1796 | | }; |
1797 | | |
1798 | | static const char *serpent128_aliases[] = |
1799 | | { |
1800 | | "SERPENT", |
1801 | | "SERPENT-128", |
1802 | | NULL |
1803 | | }; |
1804 | | static const char *serpent192_aliases[] = |
1805 | | { |
1806 | | "SERPENT-192", |
1807 | | NULL |
1808 | | }; |
1809 | | static const char *serpent256_aliases[] = |
1810 | | { |
1811 | | "SERPENT-256", |
1812 | | NULL |
1813 | | }; |
1814 | | |
1815 | | gcry_cipher_spec_t _gcry_cipher_spec_serpent128 = |
1816 | | { |
1817 | | GCRY_CIPHER_SERPENT128, {0, 0}, |
1818 | | "SERPENT128", serpent128_aliases, serpent128_oids, 16, 128, |
1819 | | sizeof (serpent_context_t), |
1820 | | serpent_setkey, serpent_encrypt, serpent_decrypt |
1821 | | }; |
1822 | | |
1823 | | gcry_cipher_spec_t _gcry_cipher_spec_serpent192 = |
1824 | | { |
1825 | | GCRY_CIPHER_SERPENT192, {0, 0}, |
1826 | | "SERPENT192", serpent192_aliases, serpent192_oids, 16, 192, |
1827 | | sizeof (serpent_context_t), |
1828 | | serpent_setkey, serpent_encrypt, serpent_decrypt |
1829 | | }; |
1830 | | |
1831 | | gcry_cipher_spec_t _gcry_cipher_spec_serpent256 = |
1832 | | { |
1833 | | GCRY_CIPHER_SERPENT256, {0, 0}, |
1834 | | "SERPENT256", serpent256_aliases, serpent256_oids, 16, 256, |
1835 | | sizeof (serpent_context_t), |
1836 | | serpent_setkey, serpent_encrypt, serpent_decrypt |
1837 | | }; |