Coverage Report

Created: 2024-11-21 07:03

/src/libgcrypt/cipher/serpent.c
Line
Count
Source (jump to first uncovered line)
1
/* serpent.c - Implementation of the Serpent encryption algorithm.
2
 *  Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
 *
4
 * This file is part of Libgcrypt.
5
 *
6
 * Libgcrypt is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU Lesser General Public License as
8
 * published by the Free Software Foundation; either version 2.1 of
9
 * the License, or (at your option) any later version.
10
 *
11
 * Libgcrypt is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this program; if not, see <https://www.gnu.org/licenses/>.
18
 * SPDX-License-Identifier: LGPL-2.1-or-later
19
 */
20
21
#include <config.h>
22
23
#include <string.h>
24
#include <stdio.h>
25
26
#include "types.h"
27
#include "g10lib.h"
28
#include "cipher.h"
29
#include "bithelp.h"
30
#include "bufhelp.h"
31
#include "cipher-internal.h"
32
#include "bulkhelp.h"
33
34
35
/* USE_SSE2 indicates whether to compile with x86-64 SSE2 code. */
36
#undef USE_SSE2
37
#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
38
    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
39
# define USE_SSE2 1
40
#endif
41
42
/* USE_AVX2 indicates whether to compile with x86-64 AVX2 code. */
43
#undef USE_AVX2
44
#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
45
    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
46
# if defined(ENABLE_AVX2_SUPPORT)
47
#  define USE_AVX2 1
48
# endif
49
#endif
50
51
/* USE_AVX512 indicates whether to compile with x86 AVX512 code. */
52
#undef USE_AVX512
53
#if (defined(__x86_64) || defined(__i386)) && \
54
    defined(HAVE_COMPATIBLE_CC_X86_AVX512_INTRINSICS)
55
# if defined(ENABLE_AVX512_SUPPORT)
56
#  define USE_AVX512 1
57
# endif
58
#endif
59
60
/* USE_NEON indicates whether to enable ARM NEON assembly code. */
61
#undef USE_NEON
62
#ifdef ENABLE_NEON_SUPPORT
63
# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
64
     && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
65
     && defined(HAVE_GCC_INLINE_ASM_NEON)
66
#  define USE_NEON 1
67
# endif
68
#endif /*ENABLE_NEON_SUPPORT*/
69
70
/* Number of rounds per Serpent encrypt/decrypt operation.  */
71
20
#define ROUNDS 32
72
73
/* Magic number, used during generating of the subkeys.  */
74
5.54k
#define PHI 0x9E3779B9
75
76
/* Serpent works on 128 bit blocks.  */
77
typedef u32 serpent_block_t[4];
78
79
/* Serpent key, provided by the user.  If the original key is shorter
80
   than 256 bits, it is padded.  */
81
typedef u32 serpent_key_t[8];
82
83
/* The key schedule consists of 33 128 bit subkeys.  */
84
typedef u32 serpent_subkeys_t[ROUNDS + 1][4];
85
86
/* A Serpent context.  */
87
typedef struct serpent_context
88
{
89
  serpent_subkeys_t keys; /* Generated subkeys.  */
90
91
#ifdef USE_AVX2
92
  int use_avx2;
93
#endif
94
#ifdef USE_AVX512
95
  int use_avx512;
96
#endif
97
#ifdef USE_NEON
98
  int use_neon;
99
#endif
100
} serpent_context_t;
101
102
103
/* Assembly implementations use SystemV ABI, ABI conversion and additional
104
 * stack to store XMM6-XMM15 needed on Win64. */
105
#undef ASM_FUNC_ABI
106
#if defined(USE_SSE2) || defined(USE_AVX2)
107
# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
108
#  define ASM_FUNC_ABI __attribute__((sysv_abi))
109
# else
110
#  define ASM_FUNC_ABI
111
# endif
112
#endif
113
114
115
#ifdef USE_SSE2
116
/* Assembler implementations of Serpent using SSE2.  Process 8 block in
117
   parallel.
118
 */
119
extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx,
120
               unsigned char *out,
121
               const unsigned char *in,
122
               unsigned char *ctr) ASM_FUNC_ABI;
123
124
extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx,
125
               unsigned char *out,
126
               const unsigned char *in,
127
               unsigned char *iv) ASM_FUNC_ABI;
128
129
extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
130
               unsigned char *out,
131
               const unsigned char *in,
132
               unsigned char *iv) ASM_FUNC_ABI;
133
134
extern void _gcry_serpent_sse2_ocb_enc(serpent_context_t *ctx,
135
               unsigned char *out,
136
               const unsigned char *in,
137
               unsigned char *offset,
138
               unsigned char *checksum,
139
               const u64 Ls[8]) ASM_FUNC_ABI;
140
141
extern void _gcry_serpent_sse2_ocb_dec(serpent_context_t *ctx,
142
               unsigned char *out,
143
               const unsigned char *in,
144
               unsigned char *offset,
145
               unsigned char *checksum,
146
               const u64 Ls[8]) ASM_FUNC_ABI;
147
148
extern void _gcry_serpent_sse2_ocb_auth(serpent_context_t *ctx,
149
          const unsigned char *abuf,
150
          unsigned char *offset,
151
          unsigned char *checksum,
152
          const u64 Ls[8]) ASM_FUNC_ABI;
153
154
extern void _gcry_serpent_sse2_blk8(const serpent_context_t *c, byte *out,
155
            const byte *in, int encrypt) ASM_FUNC_ABI;
156
#endif
157
158
#ifdef USE_AVX2
159
/* Assembler implementations of Serpent using AVX2.  Process 16 block in
160
   parallel.
161
 */
162
extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx,
163
               unsigned char *out,
164
               const unsigned char *in,
165
               unsigned char *ctr) ASM_FUNC_ABI;
166
167
extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx,
168
               unsigned char *out,
169
               const unsigned char *in,
170
               unsigned char *iv) ASM_FUNC_ABI;
171
172
extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx,
173
               unsigned char *out,
174
               const unsigned char *in,
175
               unsigned char *iv) ASM_FUNC_ABI;
176
177
extern void _gcry_serpent_avx2_ocb_enc(serpent_context_t *ctx,
178
               unsigned char *out,
179
               const unsigned char *in,
180
               unsigned char *offset,
181
               unsigned char *checksum,
182
               const u64 Ls[16]) ASM_FUNC_ABI;
183
184
extern void _gcry_serpent_avx2_ocb_dec(serpent_context_t *ctx,
185
               unsigned char *out,
186
               const unsigned char *in,
187
               unsigned char *offset,
188
               unsigned char *checksum,
189
               const u64 Ls[16]) ASM_FUNC_ABI;
190
191
extern void _gcry_serpent_avx2_ocb_auth(serpent_context_t *ctx,
192
          const unsigned char *abuf,
193
          unsigned char *offset,
194
          unsigned char *checksum,
195
          const u64 Ls[16]) ASM_FUNC_ABI;
196
197
extern void _gcry_serpent_avx2_blk16(const serpent_context_t *c, byte *out,
198
             const byte *in, int encrypt) ASM_FUNC_ABI;
199
#endif
200
201
#ifdef USE_AVX512
202
/* Assembler implementations of Serpent using AVX512.  Processing 32 blocks in
203
   parallel.
204
 */
205
extern void _gcry_serpent_avx512_cbc_dec(const void *ctx,
206
           unsigned char *out,
207
           const unsigned char *in,
208
           unsigned char *iv);
209
210
extern void _gcry_serpent_avx512_cfb_dec(const void *ctx,
211
           unsigned char *out,
212
           const unsigned char *in,
213
           unsigned char *iv);
214
215
extern void _gcry_serpent_avx512_ctr_enc(const void *ctx,
216
           unsigned char *out,
217
           const unsigned char *in,
218
           unsigned char *ctr);
219
220
extern void _gcry_serpent_avx512_ocb_crypt(const void *ctx,
221
             unsigned char *out,
222
             const unsigned char *in,
223
             unsigned char *offset,
224
             unsigned char *checksum,
225
             const ocb_L_uintptr_t Ls[32],
226
             int encrypt);
227
228
extern void _gcry_serpent_avx512_blk32(const void *c, byte *out,
229
               const byte *in,
230
               int encrypt);
231
#endif
232
233
#ifdef USE_NEON
234
/* Assembler implementations of Serpent using ARM NEON.  Process 8 block in
235
   parallel.
236
 */
237
extern void _gcry_serpent_neon_ctr_enc(serpent_context_t *ctx,
238
               unsigned char *out,
239
               const unsigned char *in,
240
               unsigned char *ctr);
241
242
extern void _gcry_serpent_neon_cbc_dec(serpent_context_t *ctx,
243
               unsigned char *out,
244
               const unsigned char *in,
245
               unsigned char *iv);
246
247
extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx,
248
               unsigned char *out,
249
               const unsigned char *in,
250
               unsigned char *iv);
251
252
extern void _gcry_serpent_neon_ocb_enc(serpent_context_t *ctx,
253
               unsigned char *out,
254
               const unsigned char *in,
255
               unsigned char *offset,
256
               unsigned char *checksum,
257
               const void *Ls[8]);
258
259
extern void _gcry_serpent_neon_ocb_dec(serpent_context_t *ctx,
260
               unsigned char *out,
261
               const unsigned char *in,
262
               unsigned char *offset,
263
               unsigned char *checksum,
264
               const void *Ls[8]);
265
266
extern void _gcry_serpent_neon_ocb_auth(serpent_context_t *ctx,
267
          const unsigned char *abuf,
268
          unsigned char *offset,
269
          unsigned char *checksum,
270
          const void *Ls[8]);
271
272
extern void _gcry_serpent_neon_blk8(const serpent_context_t *c, byte *out,
273
            const byte *in, int encrypt);
274
#endif
275
276
277
/* Prototypes.  */
278
static const char *serpent_test (void);
279
280
static void _gcry_serpent_ctr_enc (void *context, unsigned char *ctr,
281
           void *outbuf_arg, const void *inbuf_arg,
282
           size_t nblocks);
283
static void _gcry_serpent_cbc_dec (void *context, unsigned char *iv,
284
           void *outbuf_arg, const void *inbuf_arg,
285
           size_t nblocks);
286
static void _gcry_serpent_cfb_dec (void *context, unsigned char *iv,
287
           void *outbuf_arg, const void *inbuf_arg,
288
           size_t nblocks);
289
static size_t _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
290
               const void *inbuf_arg, size_t nblocks,
291
               int encrypt);
292
static size_t _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
293
              size_t nblocks);
294
static void _gcry_serpent_xts_crypt (void *context, unsigned char *tweak,
295
             void *outbuf_arg, const void *inbuf_arg,
296
             size_t nblocks, int encrypt);
297
static void _gcry_serpent_ecb_crypt (void *context, void *outbuf_arg,
298
             const void *inbuf_arg, size_t nblocks,
299
             int encrypt);
300
301
302
/*
303
 * These are the S-Boxes of Serpent from following research paper.
304
 *
305
 *  D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
306
 *   (New York, New York, USA), p. 317–329, National Institute of Standards and
307
 *   Technology, 2000.
308
 *
309
 * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
310
 *
311
 */
312
313
#define SBOX0(r0, r1, r2, r3, w, x, y, z) \
314
28.3k
  { \
315
28.3k
    u32 r4; \
316
28.3k
    \
317
28.3k
    r3 ^= r0; r4 =  r1; \
318
28.3k
    r1 &= r3; r4 ^= r2; \
319
28.3k
    r1 ^= r0; r0 |= r3; \
320
28.3k
    r0 ^= r4; r4 ^= r3; \
321
28.3k
    r3 ^= r2; r2 |= r1; \
322
28.3k
    r2 ^= r4; r4 = ~r4; \
323
28.3k
    r4 |= r1; r1 ^= r3; \
324
28.3k
    r1 ^= r4; r3 |= r0; \
325
28.3k
    r1 ^= r3; r4 ^= r3; \
326
28.3k
    \
327
28.3k
    w = r1; x = r4; y = r2; z = r0; \
328
28.3k
  }
329
330
#define SBOX0_INVERSE(r0, r1, r2, r3, w, x, y, z) \
331
80
  { \
332
80
    u32 r4; \
333
80
    \
334
80
    r2 = ~r2; r4 =  r1; \
335
80
    r1 |= r0; r4 = ~r4; \
336
80
    r1 ^= r2; r2 |= r4; \
337
80
    r1 ^= r3; r0 ^= r4; \
338
80
    r2 ^= r0; r0 &= r3; \
339
80
    r4 ^= r0; r0 |= r1; \
340
80
    r0 ^= r2; r3 ^= r4; \
341
80
    r2 ^= r1; r3 ^= r0; \
342
80
    r3 ^= r1; \
343
80
    r2 &= r3; \
344
80
    r4 ^= r2; \
345
80
    \
346
80
    w = r0; x = r4; y = r1; z = r3; \
347
80
  }
348
349
#define SBOX1(r0, r1, r2, r3, w, x, y, z) \
350
28.3k
  { \
351
28.3k
    u32 r4; \
352
28.3k
    \
353
28.3k
    r0 = ~r0; r2 = ~r2; \
354
28.3k
    r4 =  r0; r0 &= r1; \
355
28.3k
    r2 ^= r0; r0 |= r3; \
356
28.3k
    r3 ^= r2; r1 ^= r0; \
357
28.3k
    r0 ^= r4; r4 |= r1; \
358
28.3k
    r1 ^= r3; r2 |= r0; \
359
28.3k
    r2 &= r4; r0 ^= r1; \
360
28.3k
    r1 &= r2; \
361
28.3k
    r1 ^= r0; r0 &= r2; \
362
28.3k
    r0 ^= r4; \
363
28.3k
    \
364
28.3k
    w = r2; x = r0; y = r3; z = r1; \
365
28.3k
  }
366
367
#define SBOX1_INVERSE(r0, r1, r2, r3, w, x, y, z) \
368
80
  { \
369
80
    u32 r4; \
370
80
    \
371
80
    r4 =  r1; r1 ^= r3; \
372
80
    r3 &= r1; r4 ^= r2; \
373
80
    r3 ^= r0; r0 |= r1; \
374
80
    r2 ^= r3; r0 ^= r4; \
375
80
    r0 |= r2; r1 ^= r3; \
376
80
    r0 ^= r1; r1 |= r3; \
377
80
    r1 ^= r0; r4 = ~r4; \
378
80
    r4 ^= r1; r1 |= r0; \
379
80
    r1 ^= r0; \
380
80
    r1 |= r4; \
381
80
    r3 ^= r1; \
382
80
    \
383
80
    w = r4; x = r0; y = r3; z = r2; \
384
80
  }
385
386
#define SBOX2(r0, r1, r2, r3, w, x, y, z) \
387
28.3k
  { \
388
28.3k
    u32 r4; \
389
28.3k
    \
390
28.3k
    r4 =  r0; r0 &= r2; \
391
28.3k
    r0 ^= r3; r2 ^= r1; \
392
28.3k
    r2 ^= r0; r3 |= r4; \
393
28.3k
    r3 ^= r1; r4 ^= r2; \
394
28.3k
    r1 =  r3; r3 |= r4; \
395
28.3k
    r3 ^= r0; r0 &= r1; \
396
28.3k
    r4 ^= r0; r1 ^= r3; \
397
28.3k
    r1 ^= r4; r4 = ~r4; \
398
28.3k
    \
399
28.3k
    w = r2; x = r3; y = r1; z = r4; \
400
28.3k
  }
401
402
#define SBOX2_INVERSE(r0, r1, r2, r3, w, x, y, z) \
403
80
  { \
404
80
    u32 r4; \
405
80
    \
406
80
    r2 ^= r3; r3 ^= r0; \
407
80
    r4 =  r3; r3 &= r2; \
408
80
    r3 ^= r1; r1 |= r2; \
409
80
    r1 ^= r4; r4 &= r3; \
410
80
    r2 ^= r3; r4 &= r0; \
411
80
    r4 ^= r2; r2 &= r1; \
412
80
    r2 |= r0; r3 = ~r3; \
413
80
    r2 ^= r3; r0 ^= r3; \
414
80
    r0 &= r1; r3 ^= r4; \
415
80
    r3 ^= r0; \
416
80
    \
417
80
    w = r1; x = r4; y = r2; z = r3; \
418
80
  }
419
420
#define SBOX3(r0, r1, r2, r3, w, x, y, z) \
421
28.4k
  { \
422
28.4k
    u32 r4; \
423
28.4k
    \
424
28.4k
    r4 =  r0; r0 |= r3; \
425
28.4k
    r3 ^= r1; r1 &= r4; \
426
28.4k
    r4 ^= r2; r2 ^= r3; \
427
28.4k
    r3 &= r0; r4 |= r1; \
428
28.4k
    r3 ^= r4; r0 ^= r1; \
429
28.4k
    r4 &= r0; r1 ^= r3; \
430
28.4k
    r4 ^= r2; r1 |= r0; \
431
28.4k
    r1 ^= r2; r0 ^= r3; \
432
28.4k
    r2 =  r1; r1 |= r3; \
433
28.4k
    r1 ^= r0; \
434
28.4k
    \
435
28.4k
    w = r1; x = r2; y = r3; z = r4; \
436
28.4k
  }
437
438
#define SBOX3_INVERSE(r0, r1, r2, r3, w, x, y, z) \
439
80
  { \
440
80
    u32 r4; \
441
80
    \
442
80
    r4 =  r2; r2 ^= r1; \
443
80
    r0 ^= r2; r4 &= r2; \
444
80
    r4 ^= r0; r0 &= r1; \
445
80
    r1 ^= r3; r3 |= r4; \
446
80
    r2 ^= r3; r0 ^= r3; \
447
80
    r1 ^= r4; r3 &= r2; \
448
80
    r3 ^= r1; r1 ^= r0; \
449
80
    r1 |= r2; r0 ^= r3; \
450
80
    r1 ^= r4; \
451
80
    r0 ^= r1; \
452
80
    \
453
80
    w = r2; x = r1; y = r3; z = r0; \
454
80
  }
455
456
#define SBOX4(r0, r1, r2, r3, w, x, y, z) \
457
28.3k
  { \
458
28.3k
    u32 r4; \
459
28.3k
    \
460
28.3k
    r1 ^= r3; r3 = ~r3; \
461
28.3k
    r2 ^= r3; r3 ^= r0; \
462
28.3k
    r4 =  r1; r1 &= r3; \
463
28.3k
    r1 ^= r2; r4 ^= r3; \
464
28.3k
    r0 ^= r4; r2 &= r4; \
465
28.3k
    r2 ^= r0; r0 &= r1; \
466
28.3k
    r3 ^= r0; r4 |= r1; \
467
28.3k
    r4 ^= r0; r0 |= r3; \
468
28.3k
    r0 ^= r2; r2 &= r3; \
469
28.3k
    r0 = ~r0; r4 ^= r2; \
470
28.3k
    \
471
28.3k
    w = r1; x = r4; y = r0; z = r3; \
472
28.3k
  }
473
474
#define SBOX4_INVERSE(r0, r1, r2, r3, w, x, y, z) \
475
80
  { \
476
80
    u32 r4; \
477
80
    \
478
80
    r4 =  r2; r2 &= r3; \
479
80
    r2 ^= r1; r1 |= r3; \
480
80
    r1 &= r0; r4 ^= r2; \
481
80
    r4 ^= r1; r1 &= r2; \
482
80
    r0 = ~r0; r3 ^= r4; \
483
80
    r1 ^= r3; r3 &= r0; \
484
80
    r3 ^= r2; r0 ^= r1; \
485
80
    r2 &= r0; r3 ^= r0; \
486
80
    r2 ^= r4; \
487
80
    r2 |= r3; r3 ^= r0; \
488
80
    r2 ^= r1; \
489
80
    \
490
80
    w = r0; x = r3; y = r2; z = r4; \
491
80
  }
492
493
#define SBOX5(r0, r1, r2, r3, w, x, y, z) \
494
28.3k
  { \
495
28.3k
    u32 r4; \
496
28.3k
    \
497
28.3k
    r0 ^= r1; r1 ^= r3; \
498
28.3k
    r3 = ~r3; r4 =  r1; \
499
28.3k
    r1 &= r0; r2 ^= r3; \
500
28.3k
    r1 ^= r2; r2 |= r4; \
501
28.3k
    r4 ^= r3; r3 &= r1; \
502
28.3k
    r3 ^= r0; r4 ^= r1; \
503
28.3k
    r4 ^= r2; r2 ^= r0; \
504
28.3k
    r0 &= r3; r2 = ~r2; \
505
28.3k
    r0 ^= r4; r4 |= r3; \
506
28.3k
    r2 ^= r4; \
507
28.3k
    \
508
28.3k
    w = r1; x = r3; y = r0; z = r2; \
509
28.3k
  }
510
511
#define SBOX5_INVERSE(r0, r1, r2, r3, w, x, y, z) \
512
80
  { \
513
80
    u32 r4; \
514
80
    \
515
80
    r1 = ~r1; r4 =  r3; \
516
80
    r2 ^= r1; r3 |= r0; \
517
80
    r3 ^= r2; r2 |= r1; \
518
80
    r2 &= r0; r4 ^= r3; \
519
80
    r2 ^= r4; r4 |= r0; \
520
80
    r4 ^= r1; r1 &= r2; \
521
80
    r1 ^= r3; r4 ^= r2; \
522
80
    r3 &= r4; r4 ^= r1; \
523
80
    r3 ^= r4; r4 = ~r4; \
524
80
    r3 ^= r0; \
525
80
    \
526
80
    w = r1; x = r4; y = r3; z = r2; \
527
80
  }
528
529
#define SBOX6(r0, r1, r2, r3, w, x, y, z) \
530
28.3k
  { \
531
28.3k
    u32 r4; \
532
28.3k
    \
533
28.3k
    r2 = ~r2; r4 =  r3; \
534
28.3k
    r3 &= r0; r0 ^= r4; \
535
28.3k
    r3 ^= r2; r2 |= r4; \
536
28.3k
    r1 ^= r3; r2 ^= r0; \
537
28.3k
    r0 |= r1; r2 ^= r1; \
538
28.3k
    r4 ^= r0; r0 |= r3; \
539
28.3k
    r0 ^= r2; r4 ^= r3; \
540
28.3k
    r4 ^= r0; r3 = ~r3; \
541
28.3k
    r2 &= r4; \
542
28.3k
    r2 ^= r3; \
543
28.3k
    \
544
28.3k
    w = r0; x = r1; y = r4; z = r2; \
545
28.3k
  }
546
547
#define SBOX6_INVERSE(r0, r1, r2, r3, w, x, y, z) \
548
80
  { \
549
80
    u32 r4; \
550
80
    \
551
80
    r0 ^= r2; r4 =  r2; \
552
80
    r2 &= r0; r4 ^= r3; \
553
80
    r2 = ~r2; r3 ^= r1; \
554
80
    r2 ^= r3; r4 |= r0; \
555
80
    r0 ^= r2; r3 ^= r4; \
556
80
    r4 ^= r1; r1 &= r3; \
557
80
    r1 ^= r0; r0 ^= r3; \
558
80
    r0 |= r2; r3 ^= r1; \
559
80
    r4 ^= r0; \
560
80
    \
561
80
    w = r1; x = r2; y = r4; z = r3; \
562
80
  }
563
564
#define SBOX7(r0, r1, r2, r3, w, x, y, z) \
565
28.3k
  { \
566
28.3k
    u32 r4; \
567
28.3k
    \
568
28.3k
    r4 =  r1; r1 |= r2; \
569
28.3k
    r1 ^= r3; r4 ^= r2; \
570
28.3k
    r2 ^= r1; r3 |= r4; \
571
28.3k
    r3 &= r0; r4 ^= r2; \
572
28.3k
    r3 ^= r1; r1 |= r4; \
573
28.3k
    r1 ^= r0; r0 |= r4; \
574
28.3k
    r0 ^= r2; r1 ^= r4; \
575
28.3k
    r2 ^= r1; r1 &= r0; \
576
28.3k
    r1 ^= r4; r2 = ~r2; \
577
28.3k
    r2 |= r0; \
578
28.3k
    r4 ^= r2; \
579
28.3k
    \
580
28.3k
    w = r4; x = r3; y = r1; z = r0; \
581
28.3k
  }
582
583
#define SBOX7_INVERSE(r0, r1, r2, r3, w, x, y, z) \
584
80
  { \
585
80
    u32 r4; \
586
80
    \
587
80
    r4 =  r2; r2 ^= r0; \
588
80
    r0 &= r3; r4 |= r3; \
589
80
    r2 = ~r2; r3 ^= r1; \
590
80
    r1 |= r0; r0 ^= r2; \
591
80
    r2 &= r4; r3 &= r4; \
592
80
    r1 ^= r2; r2 ^= r0; \
593
80
    r0 |= r2; r4 ^= r1; \
594
80
    r0 ^= r3; r3 ^= r4; \
595
80
    r4 |= r0; r3 ^= r2; \
596
80
    r4 ^= r2; \
597
80
    \
598
80
    w = r3; x = r0; y = r1; z = r4; \
599
80
  }
600
601
/* XOR BLOCK1 into BLOCK0.  */
602
#define BLOCK_XOR(block0, block1) \
603
233k
  {                               \
604
233k
    block0[0] ^= block1[0];       \
605
233k
    block0[1] ^= block1[1];       \
606
233k
    block0[2] ^= block1[2];       \
607
233k
    block0[3] ^= block1[3];       \
608
233k
  }
609
610
/* Copy BLOCK_SRC to BLOCK_DST.  */
611
#define BLOCK_COPY(block_dst, block_src) \
612
219k
  {                                      \
613
219k
    block_dst[0] = block_src[0];         \
614
219k
    block_dst[1] = block_src[1];         \
615
219k
    block_dst[2] = block_src[2];         \
616
219k
    block_dst[3] = block_src[3];         \
617
219k
  }
618
619
/* Apply SBOX number WHICH to to the block found in ARRAY0, writing
620
   the output to the block found in ARRAY1.  */
621
#define SBOX(which, array0, array1)                         \
622
227k
  SBOX##which (array0[0], array0[1], array0[2], array0[3],  \
623
227k
               array1[0], array1[1], array1[2], array1[3]);
624
625
/* Apply inverse SBOX number WHICH to to the block found in ARRAY0, writing
626
   the output to the block found in ARRAY1.  */
627
#define SBOX_INVERSE(which, array0, array1)                           \
628
640
  SBOX##which##_INVERSE (array0[0], array0[1], array0[2], array0[3],  \
629
640
                         array1[0], array1[1], array1[2], array1[3]);
630
631
/* Apply the linear transformation to BLOCK.  */
632
#define LINEAR_TRANSFORMATION(block)                  \
633
218k
  {                                                   \
634
218k
    block[0] = rol (block[0], 13);                    \
635
218k
    block[2] = rol (block[2], 3);                     \
636
218k
    block[1] = block[1] ^ block[0] ^ block[2];        \
637
218k
    block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
638
218k
    block[1] = rol (block[1], 1);                     \
639
218k
    block[3] = rol (block[3], 7);                     \
640
218k
    block[0] = block[0] ^ block[1] ^ block[3];        \
641
218k
    block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
642
218k
    block[0] = rol (block[0], 5);                     \
643
218k
    block[2] = rol (block[2], 22);                    \
644
218k
  }
645
646
/* Apply the inverse linear transformation to BLOCK.  */
647
#define LINEAR_TRANSFORMATION_INVERSE(block)          \
648
620
  {                                                   \
649
620
    block[2] = ror (block[2], 22);                    \
650
620
    block[0] = ror (block[0] , 5);                    \
651
620
    block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
652
620
    block[0] = block[0] ^ block[1] ^ block[3];        \
653
620
    block[3] = ror (block[3], 7);                     \
654
620
    block[1] = ror (block[1], 1);                     \
655
620
    block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
656
620
    block[1] = block[1] ^ block[0] ^ block[2];        \
657
620
    block[2] = ror (block[2], 3);                     \
658
620
    block[0] = ror (block[0], 13);                    \
659
620
  }
660
661
/* Apply a Serpent round to BLOCK, using the SBOX number WHICH and the
662
   subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary storage.
663
   This macro increments `round'.  */
664
#define ROUND(which, subkeys, block, block_tmp) \
665
218k
  {                                             \
666
218k
    BLOCK_XOR (block, subkeys[round]);          \
667
218k
    round++;                                    \
668
218k
    SBOX (which, block, block_tmp);             \
669
218k
    LINEAR_TRANSFORMATION (block_tmp);          \
670
218k
    BLOCK_COPY (block, block_tmp);              \
671
218k
  }
672
673
/* Apply the last Serpent round to BLOCK, using the SBOX number WHICH
674
   and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary
675
   storage.  The result will be stored in BLOCK_TMP.  This macro
676
   increments `round'.  */
677
#define ROUND_LAST(which, subkeys, block, block_tmp) \
678
7.05k
  {                                                  \
679
7.05k
    BLOCK_XOR (block, subkeys[round]);               \
680
7.05k
    round++;                                         \
681
7.05k
    SBOX (which, block, block_tmp);                  \
682
7.05k
    BLOCK_XOR (block_tmp, subkeys[round]);           \
683
7.05k
    round++;                                         \
684
7.05k
  }
685
686
/* Apply an inverse Serpent round to BLOCK, using the SBOX number
687
   WHICH and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as
688
   temporary storage.  This macro increments `round'.  */
689
#define ROUND_INVERSE(which, subkey, block, block_tmp) \
690
620
  {                                                    \
691
620
    LINEAR_TRANSFORMATION_INVERSE (block);             \
692
620
    SBOX_INVERSE (which, block, block_tmp);            \
693
620
    BLOCK_XOR (block_tmp, subkey[round]);              \
694
620
    round--;                                           \
695
620
    BLOCK_COPY (block, block_tmp);                     \
696
620
  }
697
698
/* Apply the first Serpent round to BLOCK, using the SBOX number WHICH
699
   and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary
700
   storage.  The result will be stored in BLOCK_TMP.  This macro
701
   increments `round'.  */
702
#define ROUND_FIRST_INVERSE(which, subkeys, block, block_tmp) \
703
20
  {                                                           \
704
20
    BLOCK_XOR (block, subkeys[round]);                        \
705
20
    round--;                                                  \
706
20
    SBOX_INVERSE (which, block, block_tmp);                   \
707
20
    BLOCK_XOR (block_tmp, subkeys[round]);                    \
708
20
    round--;                                                  \
709
20
  }
710
711
/* Convert the user provided key KEY of KEY_LENGTH bytes into the
712
   internally used format.  */
713
static void
714
serpent_key_prepare (const byte *key, unsigned int key_length,
715
         serpent_key_t key_prepared)
716
42
{
717
42
  int i;
718
719
  /* Copy key.  */
720
42
  key_length /= 4;
721
239
  for (i = 0; i < key_length; i++)
722
197
    key_prepared[i] = buf_get_le32 (key + i * 4);
723
724
42
  if (i < 8)
725
30
    {
726
      /* Key must be padded according to the Serpent
727
   specification.  */
728
30
      key_prepared[i] = 0x00000001;
729
730
139
      for (i++; i < 8; i++)
731
109
  key_prepared[i] = 0;
732
30
    }
733
42
}
734
735
/* Derive the 33 subkeys from KEY and store them in SUBKEYS.  */
736
static void
737
serpent_subkeys_generate (serpent_key_t key, serpent_subkeys_t subkeys)
738
42
{
739
42
  u32 w[8];   /* The `prekey'.  */
740
42
  u32 ws[4];
741
42
  u32 wt[4];
742
743
  /* Initialize with key values.  */
744
42
  w[0] = key[0];
745
42
  w[1] = key[1];
746
42
  w[2] = key[2];
747
42
  w[3] = key[3];
748
42
  w[4] = key[4];
749
42
  w[5] = key[5];
750
42
  w[6] = key[6];
751
42
  w[7] = key[7];
752
753
  /* Expand to intermediate key using the affine recurrence.  */
754
42
#define EXPAND_KEY4(wo, r)                                                     \
755
1.38k
  wo[0] = w[(r+0)%8] =                                                         \
756
1.38k
    rol (w[(r+0)%8] ^ w[(r+3)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ PHI ^ (r+0), 11); \
757
1.38k
  wo[1] = w[(r+1)%8] =                                                         \
758
1.38k
    rol (w[(r+1)%8] ^ w[(r+4)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ PHI ^ (r+1), 11); \
759
1.38k
  wo[2] = w[(r+2)%8] =                                                         \
760
1.38k
    rol (w[(r+2)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ w[(r+1)%8] ^ PHI ^ (r+2), 11); \
761
1.38k
  wo[3] = w[(r+3)%8] =                                                         \
762
1.38k
    rol (w[(r+3)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ w[(r+2)%8] ^ PHI ^ (r+3), 11);
763
764
42
#define EXPAND_KEY(r)       \
765
672
  EXPAND_KEY4(ws, (r));     \
766
672
  EXPAND_KEY4(wt, (r + 4));
767
768
  /* Calculate subkeys via S-Boxes, in bitslice mode.  */
769
42
  EXPAND_KEY (0); SBOX (3, ws, subkeys[0]); SBOX (2, wt, subkeys[1]);
770
42
  EXPAND_KEY (8); SBOX (1, ws, subkeys[2]); SBOX (0, wt, subkeys[3]);
771
42
  EXPAND_KEY (16); SBOX (7, ws, subkeys[4]); SBOX (6, wt, subkeys[5]);
772
42
  EXPAND_KEY (24); SBOX (5, ws, subkeys[6]); SBOX (4, wt, subkeys[7]);
773
42
  EXPAND_KEY (32); SBOX (3, ws, subkeys[8]); SBOX (2, wt, subkeys[9]);
774
42
  EXPAND_KEY (40); SBOX (1, ws, subkeys[10]); SBOX (0, wt, subkeys[11]);
775
42
  EXPAND_KEY (48); SBOX (7, ws, subkeys[12]); SBOX (6, wt, subkeys[13]);
776
42
  EXPAND_KEY (56); SBOX (5, ws, subkeys[14]); SBOX (4, wt, subkeys[15]);
777
42
  EXPAND_KEY (64); SBOX (3, ws, subkeys[16]); SBOX (2, wt, subkeys[17]);
778
42
  EXPAND_KEY (72); SBOX (1, ws, subkeys[18]); SBOX (0, wt, subkeys[19]);
779
42
  EXPAND_KEY (80); SBOX (7, ws, subkeys[20]); SBOX (6, wt, subkeys[21]);
780
42
  EXPAND_KEY (88); SBOX (5, ws, subkeys[22]); SBOX (4, wt, subkeys[23]);
781
42
  EXPAND_KEY (96); SBOX (3, ws, subkeys[24]); SBOX (2, wt, subkeys[25]);
782
42
  EXPAND_KEY (104); SBOX (1, ws, subkeys[26]); SBOX (0, wt, subkeys[27]);
783
42
  EXPAND_KEY (112); SBOX (7, ws, subkeys[28]); SBOX (6, wt, subkeys[29]);
784
42
  EXPAND_KEY (120); SBOX (5, ws, subkeys[30]); SBOX (4, wt, subkeys[31]);
785
42
  EXPAND_KEY4 (ws, 128); SBOX (3, ws, subkeys[32]);
786
787
42
  wipememory (ws, sizeof (ws));
788
42
  wipememory (wt, sizeof (wt));
789
42
  wipememory (w, sizeof (w));
790
42
}
791
792
/* Initialize CONTEXT with the key KEY of KEY_LENGTH bits.  */
793
static gcry_err_code_t
794
serpent_setkey_internal (serpent_context_t *context,
795
       const byte *key, unsigned int key_length)
796
46
{
797
46
  serpent_key_t key_prepared;
798
799
46
  if (key_length > 32)
800
4
    return GPG_ERR_INV_KEYLEN;
801
802
42
  serpent_key_prepare (key, key_length, key_prepared);
803
42
  serpent_subkeys_generate (key_prepared, context->keys);
804
805
42
#ifdef USE_AVX512
806
42
  context->use_avx512 = 0;
807
42
  if ((_gcry_get_hw_features () & HWF_INTEL_AVX512))
808
0
    {
809
0
      context->use_avx512 = 1;
810
0
    }
811
42
#endif
812
813
42
#ifdef USE_AVX2
814
42
  context->use_avx2 = 0;
815
42
  if ((_gcry_get_hw_features () & HWF_INTEL_AVX2))
816
42
    {
817
42
      context->use_avx2 = 1;
818
42
    }
819
42
#endif
820
821
#ifdef USE_NEON
822
  context->use_neon = 0;
823
  if ((_gcry_get_hw_features () & HWF_ARM_NEON))
824
    {
825
      context->use_neon = 1;
826
    }
827
#endif
828
829
42
  wipememory (key_prepared, sizeof(key_prepared));
830
42
  return 0;
831
46
}
832
833
/* Initialize CTX with the key KEY of KEY_LENGTH bytes.  */
834
static gcry_err_code_t
835
serpent_setkey (void *ctx,
836
    const byte *key, unsigned int key_length,
837
                cipher_bulk_ops_t *bulk_ops)
838
26
{
839
26
  serpent_context_t *context = ctx;
840
26
  static const char *serpent_test_ret;
841
26
  static int serpent_init_done;
842
26
  gcry_err_code_t ret = GPG_ERR_NO_ERROR;
843
844
26
  if (! serpent_init_done)
845
5
    {
846
      /* Execute a self-test the first time, Serpent is used.  */
847
5
      serpent_init_done = 1;
848
5
      serpent_test_ret = serpent_test ();
849
5
      if (serpent_test_ret)
850
0
  log_error ("Serpent test failure: %s\n", serpent_test_ret);
851
5
    }
852
853
  /* Setup bulk encryption routines.  */
854
26
  memset (bulk_ops, 0, sizeof(*bulk_ops));
855
26
  bulk_ops->cbc_dec = _gcry_serpent_cbc_dec;
856
26
  bulk_ops->cfb_dec = _gcry_serpent_cfb_dec;
857
26
  bulk_ops->ctr_enc = _gcry_serpent_ctr_enc;
858
26
  bulk_ops->ocb_crypt = _gcry_serpent_ocb_crypt;
859
26
  bulk_ops->ocb_auth = _gcry_serpent_ocb_auth;
860
26
  bulk_ops->xts_crypt = _gcry_serpent_xts_crypt;
861
26
  bulk_ops->ecb_crypt = _gcry_serpent_ecb_crypt;
862
863
26
  if (serpent_test_ret)
864
0
    ret = GPG_ERR_SELFTEST_FAILED;
865
26
  else
866
26
    ret = serpent_setkey_internal (context, key, key_length);
867
868
26
  return ret;
869
26
}
870
871
static void
872
serpent_encrypt_internal (serpent_context_t *context,
873
        const byte *input, byte *output)
874
7.05k
{
875
7.05k
  serpent_block_t b, b_next;
876
7.05k
  int round = 0;
877
878
7.05k
  b[0] = buf_get_le32 (input + 0);
879
7.05k
  b[1] = buf_get_le32 (input + 4);
880
7.05k
  b[2] = buf_get_le32 (input + 8);
881
7.05k
  b[3] = buf_get_le32 (input + 12);
882
883
7.05k
  ROUND (0, context->keys, b, b_next);
884
7.05k
  ROUND (1, context->keys, b, b_next);
885
7.05k
  ROUND (2, context->keys, b, b_next);
886
7.05k
  ROUND (3, context->keys, b, b_next);
887
7.05k
  ROUND (4, context->keys, b, b_next);
888
7.05k
  ROUND (5, context->keys, b, b_next);
889
7.05k
  ROUND (6, context->keys, b, b_next);
890
7.05k
  ROUND (7, context->keys, b, b_next);
891
7.05k
  ROUND (0, context->keys, b, b_next);
892
7.05k
  ROUND (1, context->keys, b, b_next);
893
7.05k
  ROUND (2, context->keys, b, b_next);
894
7.05k
  ROUND (3, context->keys, b, b_next);
895
7.05k
  ROUND (4, context->keys, b, b_next);
896
7.05k
  ROUND (5, context->keys, b, b_next);
897
7.05k
  ROUND (6, context->keys, b, b_next);
898
7.05k
  ROUND (7, context->keys, b, b_next);
899
7.05k
  ROUND (0, context->keys, b, b_next);
900
7.05k
  ROUND (1, context->keys, b, b_next);
901
7.05k
  ROUND (2, context->keys, b, b_next);
902
7.05k
  ROUND (3, context->keys, b, b_next);
903
7.05k
  ROUND (4, context->keys, b, b_next);
904
7.05k
  ROUND (5, context->keys, b, b_next);
905
7.05k
  ROUND (6, context->keys, b, b_next);
906
7.05k
  ROUND (7, context->keys, b, b_next);
907
7.05k
  ROUND (0, context->keys, b, b_next);
908
7.05k
  ROUND (1, context->keys, b, b_next);
909
7.05k
  ROUND (2, context->keys, b, b_next);
910
7.05k
  ROUND (3, context->keys, b, b_next);
911
7.05k
  ROUND (4, context->keys, b, b_next);
912
7.05k
  ROUND (5, context->keys, b, b_next);
913
7.05k
  ROUND (6, context->keys, b, b_next);
914
915
7.05k
  ROUND_LAST (7, context->keys, b, b_next);
916
917
7.05k
  buf_put_le32 (output + 0, b_next[0]);
918
7.05k
  buf_put_le32 (output + 4, b_next[1]);
919
7.05k
  buf_put_le32 (output + 8, b_next[2]);
920
7.05k
  buf_put_le32 (output + 12, b_next[3]);
921
7.05k
}
922
923
static void
924
serpent_decrypt_internal (serpent_context_t *context,
925
        const byte *input, byte *output)
926
20
{
927
20
  serpent_block_t b, b_next;
928
20
  int round = ROUNDS;
929
930
20
  b_next[0] = buf_get_le32 (input + 0);
931
20
  b_next[1] = buf_get_le32 (input + 4);
932
20
  b_next[2] = buf_get_le32 (input + 8);
933
20
  b_next[3] = buf_get_le32 (input + 12);
934
935
20
  ROUND_FIRST_INVERSE (7, context->keys, b_next, b);
936
937
20
  ROUND_INVERSE (6, context->keys, b, b_next);
938
20
  ROUND_INVERSE (5, context->keys, b, b_next);
939
20
  ROUND_INVERSE (4, context->keys, b, b_next);
940
20
  ROUND_INVERSE (3, context->keys, b, b_next);
941
20
  ROUND_INVERSE (2, context->keys, b, b_next);
942
20
  ROUND_INVERSE (1, context->keys, b, b_next);
943
20
  ROUND_INVERSE (0, context->keys, b, b_next);
944
20
  ROUND_INVERSE (7, context->keys, b, b_next);
945
20
  ROUND_INVERSE (6, context->keys, b, b_next);
946
20
  ROUND_INVERSE (5, context->keys, b, b_next);
947
20
  ROUND_INVERSE (4, context->keys, b, b_next);
948
20
  ROUND_INVERSE (3, context->keys, b, b_next);
949
20
  ROUND_INVERSE (2, context->keys, b, b_next);
950
20
  ROUND_INVERSE (1, context->keys, b, b_next);
951
20
  ROUND_INVERSE (0, context->keys, b, b_next);
952
20
  ROUND_INVERSE (7, context->keys, b, b_next);
953
20
  ROUND_INVERSE (6, context->keys, b, b_next);
954
20
  ROUND_INVERSE (5, context->keys, b, b_next);
955
20
  ROUND_INVERSE (4, context->keys, b, b_next);
956
20
  ROUND_INVERSE (3, context->keys, b, b_next);
957
20
  ROUND_INVERSE (2, context->keys, b, b_next);
958
20
  ROUND_INVERSE (1, context->keys, b, b_next);
959
20
  ROUND_INVERSE (0, context->keys, b, b_next);
960
20
  ROUND_INVERSE (7, context->keys, b, b_next);
961
20
  ROUND_INVERSE (6, context->keys, b, b_next);
962
20
  ROUND_INVERSE (5, context->keys, b, b_next);
963
20
  ROUND_INVERSE (4, context->keys, b, b_next);
964
20
  ROUND_INVERSE (3, context->keys, b, b_next);
965
20
  ROUND_INVERSE (2, context->keys, b, b_next);
966
20
  ROUND_INVERSE (1, context->keys, b, b_next);
967
20
  ROUND_INVERSE (0, context->keys, b, b_next);
968
969
20
  buf_put_le32 (output + 0, b_next[0]);
970
20
  buf_put_le32 (output + 4, b_next[1]);
971
20
  buf_put_le32 (output + 8, b_next[2]);
972
20
  buf_put_le32 (output + 12, b_next[3]);
973
20
}
974
975
static unsigned int
976
serpent_encrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
977
7.03k
{
978
7.03k
  serpent_context_t *context = ctx;
979
980
7.03k
  serpent_encrypt_internal (context, buffer_in, buffer_out);
981
7.03k
  return /*burn_stack*/ (2 * sizeof (serpent_block_t));
982
7.03k
}
983
984
static unsigned int
985
serpent_decrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
986
0
{
987
0
  serpent_context_t *context = ctx;
988
989
0
  serpent_decrypt_internal (context, buffer_in, buffer_out);
990
0
  return /*burn_stack*/ (2 * sizeof (serpent_block_t));
991
0
}
992
993

994
995
/* Bulk encryption of complete blocks in CTR mode.  This function is only
996
   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
997
   of size sizeof(serpent_block_t). */
998
static void
999
_gcry_serpent_ctr_enc(void *context, unsigned char *ctr,
1000
                      void *outbuf_arg, const void *inbuf_arg,
1001
                      size_t nblocks)
1002
0
{
1003
0
  serpent_context_t *ctx = context;
1004
0
  unsigned char *outbuf = outbuf_arg;
1005
0
  const unsigned char *inbuf = inbuf_arg;
1006
0
  unsigned char tmpbuf[sizeof(serpent_block_t)];
1007
0
  int burn_stack_depth = 2 * sizeof (serpent_block_t);
1008
1009
0
#ifdef USE_AVX512
1010
0
  if (ctx->use_avx512)
1011
0
    {
1012
0
      int did_use_avx512 = 0;
1013
1014
      /* Process data in 32 block chunks. */
1015
0
      while (nblocks >= 32)
1016
0
        {
1017
0
          _gcry_serpent_avx512_ctr_enc(ctx, outbuf, inbuf, ctr);
1018
1019
0
          nblocks -= 32;
1020
0
          outbuf += 32 * sizeof(serpent_block_t);
1021
0
          inbuf  += 32 * sizeof(serpent_block_t);
1022
0
          did_use_avx512 = 1;
1023
0
        }
1024
1025
0
      if (did_use_avx512)
1026
0
        {
1027
          /* serpent-avx512 code does not use stack */
1028
0
          if (nblocks == 0)
1029
0
            burn_stack_depth = 0;
1030
0
        }
1031
1032
      /* Use generic/avx2/sse2 code to handle smaller chunks... */
1033
      /* TODO: use caching instead? */
1034
0
    }
1035
0
#endif
1036
1037
0
#ifdef USE_AVX2
1038
0
  if (ctx->use_avx2)
1039
0
    {
1040
0
      int did_use_avx2 = 0;
1041
1042
      /* Process data in 16 block chunks. */
1043
0
      while (nblocks >= 16)
1044
0
        {
1045
0
          _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
1046
1047
0
          nblocks -= 16;
1048
0
          outbuf += 16 * sizeof(serpent_block_t);
1049
0
          inbuf  += 16 * sizeof(serpent_block_t);
1050
0
          did_use_avx2 = 1;
1051
0
        }
1052
1053
0
      if (did_use_avx2)
1054
0
        {
1055
          /* serpent-avx2 assembly code does not use stack */
1056
0
          if (nblocks == 0)
1057
0
            burn_stack_depth = 0;
1058
0
        }
1059
1060
      /* Use generic/sse2 code to handle smaller chunks... */
1061
      /* TODO: use caching instead? */
1062
0
    }
1063
0
#endif
1064
1065
0
#ifdef USE_SSE2
1066
0
  {
1067
0
    int did_use_sse2 = 0;
1068
1069
    /* Process data in 8 block chunks. */
1070
0
    while (nblocks >= 8)
1071
0
      {
1072
0
        _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr);
1073
1074
0
        nblocks -= 8;
1075
0
        outbuf += 8 * sizeof(serpent_block_t);
1076
0
        inbuf  += 8 * sizeof(serpent_block_t);
1077
0
        did_use_sse2 = 1;
1078
0
      }
1079
1080
0
    if (did_use_sse2)
1081
0
      {
1082
        /* serpent-sse2 assembly code does not use stack */
1083
0
        if (nblocks == 0)
1084
0
          burn_stack_depth = 0;
1085
0
      }
1086
1087
    /* Use generic code to handle smaller chunks... */
1088
    /* TODO: use caching instead? */
1089
0
  }
1090
0
#endif
1091
1092
#ifdef USE_NEON
1093
  if (ctx->use_neon)
1094
    {
1095
      int did_use_neon = 0;
1096
1097
      /* Process data in 8 block chunks. */
1098
      while (nblocks >= 8)
1099
        {
1100
          _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr);
1101
1102
          nblocks -= 8;
1103
          outbuf += 8 * sizeof(serpent_block_t);
1104
          inbuf  += 8 * sizeof(serpent_block_t);
1105
          did_use_neon = 1;
1106
        }
1107
1108
      if (did_use_neon)
1109
        {
1110
          /* serpent-neon assembly code does not use stack */
1111
          if (nblocks == 0)
1112
            burn_stack_depth = 0;
1113
        }
1114
1115
      /* Use generic code to handle smaller chunks... */
1116
      /* TODO: use caching instead? */
1117
    }
1118
#endif
1119
1120
0
  for ( ;nblocks; nblocks-- )
1121
0
    {
1122
      /* Encrypt the counter. */
1123
0
      serpent_encrypt_internal(ctx, ctr, tmpbuf);
1124
      /* XOR the input with the encrypted counter and store in output.  */
1125
0
      cipher_block_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
1126
0
      outbuf += sizeof(serpent_block_t);
1127
0
      inbuf  += sizeof(serpent_block_t);
1128
      /* Increment the counter.  */
1129
0
      cipher_block_add(ctr, 1, sizeof(serpent_block_t));
1130
0
    }
1131
1132
0
  wipememory(tmpbuf, sizeof(tmpbuf));
1133
0
  _gcry_burn_stack(burn_stack_depth);
1134
0
}
1135
1136
/* Bulk decryption of complete blocks in CBC mode.  This function is only
1137
   intended for the bulk encryption feature of cipher.c. */
1138
static void
1139
_gcry_serpent_cbc_dec(void *context, unsigned char *iv,
1140
                      void *outbuf_arg, const void *inbuf_arg,
1141
                      size_t nblocks)
1142
0
{
1143
0
  serpent_context_t *ctx = context;
1144
0
  unsigned char *outbuf = outbuf_arg;
1145
0
  const unsigned char *inbuf = inbuf_arg;
1146
0
  unsigned char savebuf[sizeof(serpent_block_t)];
1147
0
  int burn_stack_depth = 2 * sizeof (serpent_block_t);
1148
1149
0
#ifdef USE_AVX512
1150
0
  if (ctx->use_avx512)
1151
0
    {
1152
0
      int did_use_avx512 = 0;
1153
1154
      /* Process data in 32 block chunks. */
1155
0
      while (nblocks >= 32)
1156
0
        {
1157
0
          _gcry_serpent_avx512_cbc_dec(ctx, outbuf, inbuf, iv);
1158
1159
0
          nblocks -= 32;
1160
0
          outbuf += 32 * sizeof(serpent_block_t);
1161
0
          inbuf  += 32 * sizeof(serpent_block_t);
1162
0
          did_use_avx512 = 1;
1163
0
        }
1164
1165
0
      if (did_use_avx512)
1166
0
        {
1167
          /* serpent-avx512 code does not use stack */
1168
0
          if (nblocks == 0)
1169
0
            burn_stack_depth = 0;
1170
0
        }
1171
1172
      /* Use generic/avx2/sse2 code to handle smaller chunks... */
1173
0
    }
1174
0
#endif
1175
1176
0
#ifdef USE_AVX2
1177
0
  if (ctx->use_avx2)
1178
0
    {
1179
0
      int did_use_avx2 = 0;
1180
1181
      /* Process data in 16 block chunks. */
1182
0
      while (nblocks >= 16)
1183
0
        {
1184
0
          _gcry_serpent_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
1185
1186
0
          nblocks -= 16;
1187
0
          outbuf += 16 * sizeof(serpent_block_t);
1188
0
          inbuf  += 16 * sizeof(serpent_block_t);
1189
0
          did_use_avx2 = 1;
1190
0
        }
1191
1192
0
      if (did_use_avx2)
1193
0
        {
1194
          /* serpent-avx2 assembly code does not use stack */
1195
0
          if (nblocks == 0)
1196
0
            burn_stack_depth = 0;
1197
0
        }
1198
1199
      /* Use generic/sse2 code to handle smaller chunks... */
1200
0
    }
1201
0
#endif
1202
1203
0
#ifdef USE_SSE2
1204
0
  {
1205
0
    int did_use_sse2 = 0;
1206
1207
    /* Process data in 8 block chunks. */
1208
0
    while (nblocks >= 8)
1209
0
      {
1210
0
        _gcry_serpent_sse2_cbc_dec(ctx, outbuf, inbuf, iv);
1211
1212
0
        nblocks -= 8;
1213
0
        outbuf += 8 * sizeof(serpent_block_t);
1214
0
        inbuf  += 8 * sizeof(serpent_block_t);
1215
0
        did_use_sse2 = 1;
1216
0
      }
1217
1218
0
    if (did_use_sse2)
1219
0
      {
1220
        /* serpent-sse2 assembly code does not use stack */
1221
0
        if (nblocks == 0)
1222
0
          burn_stack_depth = 0;
1223
0
      }
1224
1225
    /* Use generic code to handle smaller chunks... */
1226
0
  }
1227
0
#endif
1228
1229
#ifdef USE_NEON
1230
  if (ctx->use_neon)
1231
    {
1232
      int did_use_neon = 0;
1233
1234
      /* Process data in 8 block chunks. */
1235
      while (nblocks >= 8)
1236
        {
1237
          _gcry_serpent_neon_cbc_dec(ctx, outbuf, inbuf, iv);
1238
1239
          nblocks -= 8;
1240
          outbuf += 8 * sizeof(serpent_block_t);
1241
          inbuf  += 8 * sizeof(serpent_block_t);
1242
          did_use_neon = 1;
1243
        }
1244
1245
      if (did_use_neon)
1246
        {
1247
          /* serpent-neon assembly code does not use stack */
1248
          if (nblocks == 0)
1249
            burn_stack_depth = 0;
1250
        }
1251
1252
      /* Use generic code to handle smaller chunks... */
1253
    }
1254
#endif
1255
1256
0
  for ( ;nblocks; nblocks-- )
1257
0
    {
1258
      /* INBUF is needed later and it may be identical to OUTBUF, so store
1259
         the intermediate result to SAVEBUF.  */
1260
0
      serpent_decrypt_internal (ctx, inbuf, savebuf);
1261
1262
0
      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf,
1263
0
                                sizeof(serpent_block_t));
1264
0
      inbuf += sizeof(serpent_block_t);
1265
0
      outbuf += sizeof(serpent_block_t);
1266
0
    }
1267
1268
0
  wipememory(savebuf, sizeof(savebuf));
1269
0
  _gcry_burn_stack(burn_stack_depth);
1270
0
}
1271
1272
/* Bulk decryption of complete blocks in CFB mode.  This function is only
1273
   intended for the bulk encryption feature of cipher.c. */
1274
static void
1275
_gcry_serpent_cfb_dec(void *context, unsigned char *iv,
1276
                      void *outbuf_arg, const void *inbuf_arg,
1277
                      size_t nblocks)
1278
0
{
1279
0
  serpent_context_t *ctx = context;
1280
0
  unsigned char *outbuf = outbuf_arg;
1281
0
  const unsigned char *inbuf = inbuf_arg;
1282
0
  int burn_stack_depth = 2 * sizeof (serpent_block_t);
1283
1284
0
#ifdef USE_AVX512
1285
0
  if (ctx->use_avx512)
1286
0
    {
1287
0
      int did_use_avx512 = 0;
1288
1289
      /* Process data in 32 block chunks. */
1290
0
      while (nblocks >= 32)
1291
0
        {
1292
0
          _gcry_serpent_avx512_cfb_dec(ctx, outbuf, inbuf, iv);
1293
1294
0
          nblocks -= 32;
1295
0
          outbuf += 32 * sizeof(serpent_block_t);
1296
0
          inbuf  += 32 * sizeof(serpent_block_t);
1297
0
          did_use_avx512 = 1;
1298
0
        }
1299
1300
0
      if (did_use_avx512)
1301
0
        {
1302
          /* serpent-avx512 code does not use stack */
1303
0
          if (nblocks == 0)
1304
0
            burn_stack_depth = 0;
1305
0
        }
1306
1307
      /* Use generic/avx2/sse2 code to handle smaller chunks... */
1308
0
    }
1309
0
#endif
1310
1311
0
#ifdef USE_AVX2
1312
0
  if (ctx->use_avx2)
1313
0
    {
1314
0
      int did_use_avx2 = 0;
1315
1316
      /* Process data in 16 block chunks. */
1317
0
      while (nblocks >= 16)
1318
0
        {
1319
0
          _gcry_serpent_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
1320
1321
0
          nblocks -= 16;
1322
0
          outbuf += 16 * sizeof(serpent_block_t);
1323
0
          inbuf  += 16 * sizeof(serpent_block_t);
1324
0
          did_use_avx2 = 1;
1325
0
        }
1326
1327
0
      if (did_use_avx2)
1328
0
        {
1329
          /* serpent-avx2 assembly code does not use stack */
1330
0
          if (nblocks == 0)
1331
0
            burn_stack_depth = 0;
1332
0
        }
1333
1334
      /* Use generic/sse2 code to handle smaller chunks... */
1335
0
    }
1336
0
#endif
1337
1338
0
#ifdef USE_SSE2
1339
0
  {
1340
0
    int did_use_sse2 = 0;
1341
1342
    /* Process data in 8 block chunks. */
1343
0
    while (nblocks >= 8)
1344
0
      {
1345
0
        _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv);
1346
1347
0
        nblocks -= 8;
1348
0
        outbuf += 8 * sizeof(serpent_block_t);
1349
0
        inbuf  += 8 * sizeof(serpent_block_t);
1350
0
        did_use_sse2 = 1;
1351
0
      }
1352
1353
0
    if (did_use_sse2)
1354
0
      {
1355
        /* serpent-sse2 assembly code does not use stack */
1356
0
        if (nblocks == 0)
1357
0
          burn_stack_depth = 0;
1358
0
      }
1359
1360
    /* Use generic code to handle smaller chunks... */
1361
0
  }
1362
0
#endif
1363
1364
#ifdef USE_NEON
1365
  if (ctx->use_neon)
1366
    {
1367
      int did_use_neon = 0;
1368
1369
      /* Process data in 8 block chunks. */
1370
      while (nblocks >= 8)
1371
        {
1372
          _gcry_serpent_neon_cfb_dec(ctx, outbuf, inbuf, iv);
1373
1374
          nblocks -= 8;
1375
          outbuf += 8 * sizeof(serpent_block_t);
1376
          inbuf  += 8 * sizeof(serpent_block_t);
1377
          did_use_neon = 1;
1378
        }
1379
1380
      if (did_use_neon)
1381
        {
1382
          /* serpent-neon assembly code does not use stack */
1383
          if (nblocks == 0)
1384
            burn_stack_depth = 0;
1385
        }
1386
1387
      /* Use generic code to handle smaller chunks... */
1388
    }
1389
#endif
1390
1391
0
  for ( ;nblocks; nblocks-- )
1392
0
    {
1393
0
      serpent_encrypt_internal(ctx, iv, iv);
1394
0
      cipher_block_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
1395
0
      outbuf += sizeof(serpent_block_t);
1396
0
      inbuf  += sizeof(serpent_block_t);
1397
0
    }
1398
1399
0
  _gcry_burn_stack(burn_stack_depth);
1400
0
}
1401
1402
/* Bulk encryption/decryption of complete blocks in OCB mode. */
1403
static size_t
1404
_gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
1405
      const void *inbuf_arg, size_t nblocks, int encrypt)
1406
0
{
1407
0
#if defined(USE_AVX512) || defined(USE_AVX2) || defined(USE_SSE2) \
1408
0
    || defined(USE_NEON)
1409
0
  serpent_context_t *ctx = (void *)&c->context.c;
1410
0
  unsigned char *outbuf = outbuf_arg;
1411
0
  const unsigned char *inbuf = inbuf_arg;
1412
0
  int burn_stack_depth = 2 * sizeof (serpent_block_t);
1413
0
  u64 blkn = c->u_mode.ocb.data_nblocks;
1414
#else
1415
  (void)c;
1416
  (void)outbuf_arg;
1417
  (void)inbuf_arg;
1418
  (void)encrypt;
1419
#endif
1420
1421
0
#ifdef USE_AVX512
1422
0
  if (ctx->use_avx512)
1423
0
    {
1424
0
      int did_use_avx512 = 0;
1425
0
      ocb_L_uintptr_t Ls[32];
1426
0
      ocb_L_uintptr_t *l;
1427
1428
0
      if (nblocks >= 32)
1429
0
  {
1430
0
          l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn);
1431
1432
    /* Process data in 32 block chunks. */
1433
0
    while (nblocks >= 32)
1434
0
      {
1435
0
        blkn += 32;
1436
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
1437
1438
0
        _gcry_serpent_avx512_ocb_crypt(ctx, outbuf, inbuf, c->u_iv.iv,
1439
0
               c->u_ctr.ctr, Ls, encrypt);
1440
1441
0
        nblocks -= 32;
1442
0
        outbuf += 32 * sizeof(serpent_block_t);
1443
0
        inbuf  += 32 * sizeof(serpent_block_t);
1444
0
        did_use_avx512 = 1;
1445
0
      }
1446
0
  }
1447
1448
0
      if (did_use_avx512)
1449
0
  {
1450
    /* serpent-avx512 code does not use stack */
1451
0
    if (nblocks == 0)
1452
0
      burn_stack_depth = 0;
1453
0
  }
1454
1455
      /* Use generic code to handle smaller chunks... */
1456
0
    }
1457
0
#endif
1458
1459
0
#ifdef USE_AVX2
1460
0
  if (ctx->use_avx2)
1461
0
    {
1462
0
      int did_use_avx2 = 0;
1463
0
      u64 Ls[16];
1464
0
      u64 *l;
1465
1466
0
      if (nblocks >= 16)
1467
0
  {
1468
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1469
1470
    /* Process data in 16 block chunks. */
1471
0
    while (nblocks >= 16)
1472
0
      {
1473
0
        blkn += 16;
1474
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1475
1476
0
        if (encrypt)
1477
0
    _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
1478
0
            c->u_ctr.ctr, Ls);
1479
0
        else
1480
0
    _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
1481
0
            c->u_ctr.ctr, Ls);
1482
1483
0
        nblocks -= 16;
1484
0
        outbuf += 16 * sizeof(serpent_block_t);
1485
0
        inbuf  += 16 * sizeof(serpent_block_t);
1486
0
        did_use_avx2 = 1;
1487
0
      }
1488
0
  }
1489
1490
0
      if (did_use_avx2)
1491
0
  {
1492
    /* serpent-avx2 assembly code does not use stack */
1493
0
    if (nblocks == 0)
1494
0
      burn_stack_depth = 0;
1495
0
  }
1496
1497
      /* Use generic code to handle smaller chunks... */
1498
0
    }
1499
0
#endif
1500
1501
0
#ifdef USE_SSE2
1502
0
  {
1503
0
    int did_use_sse2 = 0;
1504
0
    u64 Ls[8];
1505
0
    u64 *l;
1506
1507
0
    if (nblocks >= 8)
1508
0
      {
1509
0
        l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
1510
1511
  /* Process data in 8 block chunks. */
1512
0
  while (nblocks >= 8)
1513
0
    {
1514
0
      blkn += 8;
1515
0
      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
1516
1517
0
      if (encrypt)
1518
0
        _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
1519
0
            c->u_ctr.ctr, Ls);
1520
0
      else
1521
0
        _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
1522
0
            c->u_ctr.ctr, Ls);
1523
1524
0
      nblocks -= 8;
1525
0
      outbuf += 8 * sizeof(serpent_block_t);
1526
0
      inbuf  += 8 * sizeof(serpent_block_t);
1527
0
      did_use_sse2 = 1;
1528
0
    }
1529
0
      }
1530
1531
0
    if (did_use_sse2)
1532
0
      {
1533
  /* serpent-sse2 assembly code does not use stack */
1534
0
  if (nblocks == 0)
1535
0
    burn_stack_depth = 0;
1536
0
      }
1537
1538
    /* Use generic code to handle smaller chunks... */
1539
0
  }
1540
0
#endif
1541
1542
#ifdef USE_NEON
1543
  if (ctx->use_neon)
1544
    {
1545
      int did_use_neon = 0;
1546
      uintptr_t Ls[8];
1547
      uintptr_t *l;
1548
1549
      if (nblocks >= 8)
1550
  {
1551
          l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
1552
1553
    /* Process data in 8 block chunks. */
1554
    while (nblocks >= 8)
1555
      {
1556
        blkn += 8;
1557
        *l = (uintptr_t)(void *)ocb_get_l(c,  blkn - blkn % 8);
1558
1559
        if (encrypt)
1560
    _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
1561
             c->u_ctr.ctr, (const void **)Ls);
1562
        else
1563
    _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
1564
             c->u_ctr.ctr, (const void **)Ls);
1565
1566
        nblocks -= 8;
1567
        outbuf += 8 * sizeof(serpent_block_t);
1568
        inbuf  += 8 * sizeof(serpent_block_t);
1569
        did_use_neon = 1;
1570
      }
1571
  }
1572
1573
      if (did_use_neon)
1574
  {
1575
    /* serpent-neon assembly code does not use stack */
1576
    if (nblocks == 0)
1577
      burn_stack_depth = 0;
1578
  }
1579
1580
      /* Use generic code to handle smaller chunks... */
1581
    }
1582
#endif
1583
1584
0
#if defined(USE_AVX512) || defined(USE_AVX2) || defined(USE_SSE2) \
1585
0
    || defined(USE_NEON)
1586
0
  c->u_mode.ocb.data_nblocks = blkn;
1587
1588
0
  if (burn_stack_depth)
1589
0
    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
1590
0
#endif
1591
1592
0
  return nblocks;
1593
0
}
1594
1595
/* Bulk authentication of complete blocks in OCB mode. */
1596
static size_t
1597
_gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
1598
      size_t nblocks)
1599
0
{
1600
0
#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
1601
0
  serpent_context_t *ctx = (void *)&c->context.c;
1602
0
  const unsigned char *abuf = abuf_arg;
1603
0
  int burn_stack_depth = 2 * sizeof(serpent_block_t);
1604
0
  u64 blkn = c->u_mode.ocb.aad_nblocks;
1605
#else
1606
  (void)c;
1607
  (void)abuf_arg;
1608
#endif
1609
1610
0
#ifdef USE_AVX2
1611
0
  if (ctx->use_avx2)
1612
0
    {
1613
0
      int did_use_avx2 = 0;
1614
0
      u64 Ls[16];
1615
0
      u64 *l;
1616
1617
0
      if (nblocks >= 16)
1618
0
  {
1619
0
        l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1620
1621
    /* Process data in 16 block chunks. */
1622
0
    while (nblocks >= 16)
1623
0
      {
1624
0
        blkn += 16;
1625
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1626
1627
0
        _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
1628
0
            c->u_mode.ocb.aad_sum, Ls);
1629
1630
0
        nblocks -= 16;
1631
0
        abuf += 16 * sizeof(serpent_block_t);
1632
0
        did_use_avx2 = 1;
1633
0
      }
1634
0
  }
1635
1636
0
      if (did_use_avx2)
1637
0
  {
1638
    /* serpent-avx2 assembly code does not use stack */
1639
0
    if (nblocks == 0)
1640
0
      burn_stack_depth = 0;
1641
0
  }
1642
1643
      /* Use generic code to handle smaller chunks... */
1644
0
    }
1645
0
#endif
1646
1647
0
#ifdef USE_SSE2
1648
0
  {
1649
0
    int did_use_sse2 = 0;
1650
0
    u64 Ls[8];
1651
0
    u64 *l;
1652
1653
0
    if (nblocks >= 8)
1654
0
      {
1655
0
        l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
1656
1657
  /* Process data in 8 block chunks. */
1658
0
  while (nblocks >= 8)
1659
0
    {
1660
0
      blkn += 8;
1661
0
      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
1662
1663
0
      _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
1664
0
          c->u_mode.ocb.aad_sum, Ls);
1665
1666
0
      nblocks -= 8;
1667
0
      abuf += 8 * sizeof(serpent_block_t);
1668
0
      did_use_sse2 = 1;
1669
0
    }
1670
0
      }
1671
1672
0
    if (did_use_sse2)
1673
0
      {
1674
  /* serpent-avx2 assembly code does not use stack */
1675
0
  if (nblocks == 0)
1676
0
    burn_stack_depth = 0;
1677
0
      }
1678
1679
    /* Use generic code to handle smaller chunks... */
1680
0
  }
1681
0
#endif
1682
1683
#ifdef USE_NEON
1684
  if (ctx->use_neon)
1685
    {
1686
      int did_use_neon = 0;
1687
      uintptr_t Ls[8];
1688
      uintptr_t *l;
1689
1690
      if (nblocks >= 8)
1691
  {
1692
          l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
1693
1694
    /* Process data in 8 block chunks. */
1695
    while (nblocks >= 8)
1696
      {
1697
        blkn += 8;
1698
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
1699
1700
        _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
1701
            c->u_mode.ocb.aad_sum,
1702
            (const void **)Ls);
1703
1704
        nblocks -= 8;
1705
        abuf += 8 * sizeof(serpent_block_t);
1706
        did_use_neon = 1;
1707
      }
1708
  }
1709
1710
      if (did_use_neon)
1711
  {
1712
    /* serpent-neon assembly code does not use stack */
1713
    if (nblocks == 0)
1714
      burn_stack_depth = 0;
1715
  }
1716
1717
      /* Use generic code to handle smaller chunks... */
1718
    }
1719
#endif
1720
1721
0
#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
1722
0
  c->u_mode.ocb.aad_nblocks = blkn;
1723
1724
0
  if (burn_stack_depth)
1725
0
    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
1726
0
#endif
1727
1728
0
  return nblocks;
1729
0
}
1730
1731
1732
static unsigned int
1733
serpent_crypt_blk1_32(void *context, byte *out, const byte *in,
1734
          size_t num_blks, int encrypt)
1735
0
{
1736
0
  serpent_context_t *ctx = context;
1737
0
  unsigned int burn, burn_stack_depth = 0;
1738
1739
0
#ifdef USE_AVX512
1740
0
  if (num_blks == 32 && ctx->use_avx512)
1741
0
    {
1742
0
      _gcry_serpent_avx512_blk32 (ctx, out, in, encrypt);
1743
0
      return 0;
1744
0
    }
1745
0
#endif
1746
1747
0
#ifdef USE_AVX2
1748
0
  while (num_blks == 16 && ctx->use_avx2)
1749
0
    {
1750
0
      _gcry_serpent_avx2_blk16 (ctx, out, in, encrypt);
1751
0
      out += 16 * sizeof(serpent_block_t);
1752
0
      in += 16 * sizeof(serpent_block_t);
1753
0
      num_blks -= 16;
1754
0
    }
1755
0
#endif
1756
1757
0
#ifdef USE_SSE2
1758
0
  while (num_blks >= 8)
1759
0
    {
1760
0
      _gcry_serpent_sse2_blk8 (ctx, out, in, encrypt);
1761
0
      out += 8 * sizeof(serpent_block_t);
1762
0
      in += 8 * sizeof(serpent_block_t);
1763
0
      num_blks -= 8;
1764
0
    }
1765
0
#endif
1766
1767
#ifdef USE_NEON
1768
  if (ctx->use_neon)
1769
    {
1770
      while (num_blks >= 8)
1771
  {
1772
    _gcry_serpent_neon_blk8 (ctx, out, in, encrypt);
1773
    out += 8 * sizeof(serpent_block_t);
1774
    in += 8 * sizeof(serpent_block_t);
1775
    num_blks -= 8;
1776
  }
1777
    }
1778
#endif
1779
1780
0
  while (num_blks >= 1)
1781
0
    {
1782
0
      if (encrypt)
1783
0
  serpent_encrypt_internal((void *)ctx, in, out);
1784
0
      else
1785
0
  serpent_decrypt_internal((void *)ctx, in, out);
1786
1787
0
      burn = 2 * sizeof(serpent_block_t);
1788
0
      burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth;
1789
0
      out += sizeof(serpent_block_t);
1790
0
      in += sizeof(serpent_block_t);
1791
0
      num_blks--;
1792
0
    }
1793
1794
0
  return burn_stack_depth;
1795
0
}
1796
1797
static unsigned int
1798
serpent_encrypt_blk1_32(void *ctx, byte *out, const byte *in,
1799
      size_t num_blks)
1800
0
{
1801
0
  return serpent_crypt_blk1_32 (ctx, out, in, num_blks, 1);
1802
0
}
1803
1804
static unsigned int
1805
serpent_decrypt_blk1_32(void *ctx, byte *out, const byte *in,
1806
      size_t num_blks)
1807
0
{
1808
0
  return serpent_crypt_blk1_32 (ctx, out, in, num_blks, 0);
1809
0
}
1810
1811
1812
/* Bulk encryption/decryption of complete blocks in XTS mode. */
1813
static void
1814
_gcry_serpent_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg,
1815
       const void *inbuf_arg, size_t nblocks, int encrypt)
1816
0
{
1817
0
  serpent_context_t *ctx = context;
1818
0
  unsigned char *outbuf = outbuf_arg;
1819
0
  const unsigned char *inbuf = inbuf_arg;
1820
0
  int burn_stack_depth = 0;
1821
1822
  /* Process remaining blocks. */
1823
0
  if (nblocks)
1824
0
    {
1825
0
      unsigned char tmpbuf[32 * 16];
1826
0
      unsigned int tmp_used = 16;
1827
0
      size_t nburn;
1828
1829
0
      nburn = bulk_xts_crypt_128(ctx, encrypt ? serpent_encrypt_blk1_32
1830
0
                                              : serpent_decrypt_blk1_32,
1831
0
                                 outbuf, inbuf, nblocks,
1832
0
                                 tweak, tmpbuf, sizeof(tmpbuf) / 16,
1833
0
                                 &tmp_used);
1834
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1835
1836
0
      wipememory(tmpbuf, tmp_used);
1837
0
    }
1838
1839
0
  if (burn_stack_depth)
1840
0
    _gcry_burn_stack(burn_stack_depth);
1841
0
}
1842
1843
1844
/* Bulk encryption/decryption in ECB mode. */
1845
static void
1846
_gcry_serpent_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg,
1847
       size_t nblocks, int encrypt)
1848
0
{
1849
0
  serpent_context_t *ctx = context;
1850
0
  unsigned char *outbuf = outbuf_arg;
1851
0
  const unsigned char *inbuf = inbuf_arg;
1852
0
  int burn_stack_depth = 0;
1853
1854
  /* Process remaining blocks. */
1855
0
  if (nblocks)
1856
0
    {
1857
0
      size_t nburn;
1858
1859
0
      nburn = bulk_ecb_crypt_128(ctx, encrypt ? serpent_encrypt_blk1_32
1860
0
                                              : serpent_decrypt_blk1_32,
1861
0
                                 outbuf, inbuf, nblocks, 32);
1862
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1863
0
    }
1864
1865
0
  if (burn_stack_depth)
1866
0
    _gcry_burn_stack(burn_stack_depth);
1867
0
}
1868
1869

1870
1871
/* Serpent test.  */
1872
1873
static const char *
1874
serpent_test (void)
1875
5
{
1876
5
  serpent_context_t context;
1877
5
  unsigned char scratch[16];
1878
5
  unsigned int i;
1879
1880
5
  static struct test
1881
5
  {
1882
5
    int key_length;
1883
5
    unsigned char key[32];
1884
5
    unsigned char text_plain[16];
1885
5
    unsigned char text_cipher[16];
1886
5
  } test_data[] =
1887
5
    {
1888
5
      {
1889
5
  16,
1890
5
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
1891
5
  "\xD2\x9D\x57\x6F\xCE\xA3\xA3\xA7\xED\x90\x99\xF2\x92\x73\xD7\x8E",
1892
5
  "\xB2\x28\x8B\x96\x8A\xE8\xB0\x86\x48\xD1\xCE\x96\x06\xFD\x99\x2D"
1893
5
      },
1894
5
      {
1895
5
  24,
1896
5
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
1897
5
  "\x00\x00\x00\x00\x00\x00\x00\x00",
1898
5
  "\xD2\x9D\x57\x6F\xCE\xAB\xA3\xA7\xED\x98\x99\xF2\x92\x7B\xD7\x8E",
1899
5
  "\x13\x0E\x35\x3E\x10\x37\xC2\x24\x05\xE8\xFA\xEF\xB2\xC3\xC3\xE9"
1900
5
      },
1901
5
      {
1902
5
  32,
1903
5
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
1904
5
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
1905
5
  "\xD0\x95\x57\x6F\xCE\xA3\xE3\xA7\xED\x98\xD9\xF2\x90\x73\xD7\x8E",
1906
5
  "\xB9\x0E\xE5\x86\x2D\xE6\x91\x68\xF2\xBD\xD5\x12\x5B\x45\x47\x2B"
1907
5
      },
1908
5
      {
1909
5
  32,
1910
5
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
1911
5
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
1912
5
  "\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00",
1913
5
  "\x20\x61\xA4\x27\x82\xBD\x52\xEC\x69\x1E\xC3\x83\xB0\x3B\xA7\x7C"
1914
5
      },
1915
5
      {
1916
5
  0
1917
5
      },
1918
5
    };
1919
1920
25
  for (i = 0; test_data[i].key_length; i++)
1921
20
    {
1922
20
      serpent_setkey_internal (&context, test_data[i].key,
1923
20
                               test_data[i].key_length);
1924
20
      serpent_encrypt_internal (&context, test_data[i].text_plain, scratch);
1925
1926
20
      if (memcmp (scratch, test_data[i].text_cipher, sizeof (serpent_block_t)))
1927
0
  switch (test_data[i].key_length)
1928
0
    {
1929
0
    case 16:
1930
0
      return "Serpent-128 test encryption failed.";
1931
0
    case  24:
1932
0
      return "Serpent-192 test encryption failed.";
1933
0
    case 32:
1934
0
      return "Serpent-256 test encryption failed.";
1935
0
    }
1936
1937
20
    serpent_decrypt_internal (&context, test_data[i].text_cipher, scratch);
1938
20
    if (memcmp (scratch, test_data[i].text_plain, sizeof (serpent_block_t)))
1939
0
      switch (test_data[i].key_length)
1940
0
  {
1941
0
  case 16:
1942
0
    return "Serpent-128 test decryption failed.";
1943
0
  case  24:
1944
0
    return "Serpent-192 test decryption failed.";
1945
0
  case 32:
1946
0
    return "Serpent-256 test decryption failed.";
1947
0
  }
1948
20
    }
1949
1950
5
  return NULL;
1951
5
}
1952
1953

1954
static const gcry_cipher_oid_spec_t serpent128_oids[] =
1955
  {
1956
    {"1.3.6.1.4.1.11591.13.2.1", GCRY_CIPHER_MODE_ECB },
1957
    {"1.3.6.1.4.1.11591.13.2.2", GCRY_CIPHER_MODE_CBC },
1958
    {"1.3.6.1.4.1.11591.13.2.3", GCRY_CIPHER_MODE_OFB },
1959
    {"1.3.6.1.4.1.11591.13.2.4", GCRY_CIPHER_MODE_CFB },
1960
    { NULL }
1961
  };
1962
1963
static const gcry_cipher_oid_spec_t serpent192_oids[] =
1964
  {
1965
    {"1.3.6.1.4.1.11591.13.2.21", GCRY_CIPHER_MODE_ECB },
1966
    {"1.3.6.1.4.1.11591.13.2.22", GCRY_CIPHER_MODE_CBC },
1967
    {"1.3.6.1.4.1.11591.13.2.23", GCRY_CIPHER_MODE_OFB },
1968
    {"1.3.6.1.4.1.11591.13.2.24", GCRY_CIPHER_MODE_CFB },
1969
    { NULL }
1970
  };
1971
1972
static const gcry_cipher_oid_spec_t serpent256_oids[] =
1973
  {
1974
    {"1.3.6.1.4.1.11591.13.2.41", GCRY_CIPHER_MODE_ECB },
1975
    {"1.3.6.1.4.1.11591.13.2.42", GCRY_CIPHER_MODE_CBC },
1976
    {"1.3.6.1.4.1.11591.13.2.43", GCRY_CIPHER_MODE_OFB },
1977
    {"1.3.6.1.4.1.11591.13.2.44", GCRY_CIPHER_MODE_CFB },
1978
    { NULL }
1979
  };
1980
1981
static const char *serpent128_aliases[] =
1982
  {
1983
    "SERPENT",
1984
    "SERPENT-128",
1985
    NULL
1986
  };
1987
static const char *serpent192_aliases[] =
1988
  {
1989
    "SERPENT-192",
1990
    NULL
1991
  };
1992
static const char *serpent256_aliases[] =
1993
  {
1994
    "SERPENT-256",
1995
    NULL
1996
  };
1997
1998
gcry_cipher_spec_t _gcry_cipher_spec_serpent128 =
1999
  {
2000
    GCRY_CIPHER_SERPENT128, {0, 0},
2001
    "SERPENT128", serpent128_aliases, serpent128_oids, 16, 128,
2002
    sizeof (serpent_context_t),
2003
    serpent_setkey, serpent_encrypt, serpent_decrypt
2004
  };
2005
2006
gcry_cipher_spec_t _gcry_cipher_spec_serpent192 =
2007
  {
2008
    GCRY_CIPHER_SERPENT192, {0, 0},
2009
    "SERPENT192", serpent192_aliases, serpent192_oids, 16, 192,
2010
    sizeof (serpent_context_t),
2011
    serpent_setkey, serpent_encrypt, serpent_decrypt
2012
  };
2013
2014
gcry_cipher_spec_t _gcry_cipher_spec_serpent256 =
2015
  {
2016
    GCRY_CIPHER_SERPENT256, {0, 0},
2017
    "SERPENT256", serpent256_aliases, serpent256_oids, 16, 256,
2018
    sizeof (serpent_context_t),
2019
    serpent_setkey, serpent_encrypt, serpent_decrypt
2020
  };