Coverage Report

Created: 2024-11-21 07:03

/src/libgcrypt/cipher/twofish.c
Line
Count
Source (jump to first uncovered line)
1
/* Twofish for GPG
2
 * Copyright (C) 1998, 2002, 2003 Free Software Foundation, Inc.
3
 * Written by Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
4
 * 256-bit key length added March 20, 1999
5
 * Some modifications to reduce the text size by Werner Koch, April, 1998
6
 *
7
 * This file is part of Libgcrypt.
8
 *
9
 * Libgcrypt is free software; you can redistribute it and/or modify
10
 * it under the terms of the GNU Lesser General Public License as
11
 * published by the Free Software Foundation; either version 2.1 of
12
 * the License, or (at your option) any later version.
13
 *
14
 * Libgcrypt is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 * GNU Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with this program; if not, see <https://www.gnu.org/licenses/>.
21
 * SPDX-License-Identifier: LGPL-2.1-or-later
22
 ********************************************************************
23
 *
24
 * This code is a "clean room" implementation, written from the paper
25
 * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
26
 * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
27
 * through http://www.counterpane.com/twofish.html
28
 *
29
 * For background information on multiplication in finite fields, used for
30
 * the matrix operations in the key schedule, see the book _Contemporary
31
 * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
32
 * Third Edition.
33
 *
34
 * Only the 128- and 256-bit key sizes are supported.  This code is intended
35
 * for GNU C on a 32-bit system, but it should work almost anywhere.  Loops
36
 * are unrolled, precomputation tables are used, etc., for maximum speed at
37
 * some cost in memory consumption. */
38
39
#include <config.h>
40
#include <stdio.h>
41
#include <stdlib.h>
42
#include <string.h> /* for memcmp() */
43
44
#include "types.h"  /* for byte and u32 typedefs */
45
#include "g10lib.h"
46
#include "cipher.h"
47
#include "bufhelp.h"
48
#include "cipher-internal.h"
49
#include "bulkhelp.h"
50
51
52
0
#define TWOFISH_BLOCKSIZE 16
53
54
55
/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
56
#undef USE_AMD64_ASM
57
#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
58
    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
59
# define USE_AMD64_ASM 1
60
#endif
61
62
/* USE_ARM_ASM indicates whether to use ARM assembly code. */
63
#undef USE_ARM_ASM
64
#if defined(__ARMEL__)
65
# if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
66
#  define USE_ARM_ASM 1
67
# endif
68
#endif
69
# if defined(__AARCH64EL__)
70
#  ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
71
#   define USE_ARM_ASM 1
72
#  endif
73
# endif
74
75
/* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
76
#undef USE_AVX2
77
#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
78
    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
79
# if defined(ENABLE_AVX2_SUPPORT)
80
#  define USE_AVX2 1
81
# endif
82
#endif
83
84
85
/* Prototype for the self-test function. */
86
static const char *selftest(void);
87
88
89
/* Prototypes for the bulk functions. */
90
static void _gcry_twofish_ctr_enc (void *context, unsigned char *ctr,
91
           void *outbuf_arg, const void *inbuf_arg,
92
           size_t nblocks);
93
static void _gcry_twofish_cbc_dec (void *context, unsigned char *iv,
94
           void *outbuf_arg, const void *inbuf_arg,
95
           size_t nblocks);
96
static void _gcry_twofish_cfb_dec (void *context, unsigned char *iv,
97
           void *outbuf_arg, const void *inbuf_arg,
98
           size_t nblocks);
99
static size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
100
               const void *inbuf_arg, size_t nblocks,
101
               int encrypt);
102
static size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
103
              size_t nblocks);
104
static void _gcry_twofish_xts_crypt (void *context, unsigned char *tweak,
105
             void *outbuf_arg, const void *inbuf_arg,
106
             size_t nblocks, int encrypt);
107
static void _gcry_twofish_ecb_crypt (void *context, void *outbuf_arg,
108
             const void *inbuf_arg, size_t nblocks,
109
             int encrypt);
110
111
/* Structure for an expanded Twofish key.  s contains the key-dependent
112
 * S-boxes composed with the MDS matrix; w contains the eight "whitening"
113
 * subkeys, K[0] through K[7].  k holds the remaining, "round" subkeys.  Note
114
 * that k[i] corresponds to what the Twofish paper calls K[i+8]. */
115
typedef struct {
116
   u32 s[4][256], w[8], k[32];
117
118
#ifdef USE_AVX2
119
  int use_avx2;
120
#endif
121
} TWOFISH_context;
122

123
124
/* Assembly implementations use SystemV ABI, ABI conversion and additional
125
 * stack to store XMM6-XMM15 needed on Win64. */
126
#undef ASM_FUNC_ABI
127
#if defined(USE_AVX2)
128
# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
129
#  define ASM_FUNC_ABI __attribute__((sysv_abi))
130
# else
131
#  define ASM_FUNC_ABI
132
# endif
133
#endif
134
135
136
/* These two tables are the q0 and q1 permutations, exactly as described in
137
 * the Twofish paper. */
138
139
static const byte q0[256] = {
140
   0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78,
141
   0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C,
142
   0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30,
143
   0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
144
   0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE,
145
   0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B,
146
   0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45,
147
   0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
148
   0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF,
149
   0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8,
150
   0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED,
151
   0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
152
   0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B,
153
   0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B,
154
   0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F,
155
   0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
156
   0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17,
157
   0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72,
158
   0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68,
159
   0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
160
   0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42,
161
   0x4A, 0x5E, 0xC1, 0xE0
162
};
163
164
static const byte q1[256] = {
165
   0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B,
166
   0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1,
167
   0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B,
168
   0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
169
   0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54,
170
   0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96,
171
   0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7,
172
   0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
173
   0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF,
174
   0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9,
175
   0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D,
176
   0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
177
   0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21,
178
   0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01,
179
   0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E,
180
   0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
181
   0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44,
182
   0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E,
183
   0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B,
184
   0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
185
   0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56,
186
   0x55, 0x09, 0xBE, 0x91
187
};
188

189
/* These MDS tables are actually tables of MDS composed with q0 and q1,
190
 * because it is only ever used that way and we can save some time by
191
 * precomputing.  Of course the main saving comes from precomputing the
192
 * GF(2^8) multiplication involved in the MDS matrix multiply; by looking
193
 * things up in these tables we reduce the matrix multiply to four lookups
194
 * and three XORs.  Semi-formally, the definition of these tables is:
195
 * mds[0][i] = MDS (q1[i] 0 0 0)^T  mds[1][i] = MDS (0 q0[i] 0 0)^T
196
 * mds[2][i] = MDS (0 0 q1[i] 0)^T  mds[3][i] = MDS (0 0 0 q0[i])^T
197
 * where ^T means "transpose", the matrix multiply is performed in GF(2^8)
198
 * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described
199
 * by Schneier et al, and I'm casually glossing over the byte/word
200
 * conversion issues. */
201
202
static const u32 mds[4][256] = {
203
   {0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
204
    0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B,
205
    0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32,
206
    0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1,
207
    0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA,
208
    0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B,
209
    0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1,
210
    0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5,
211
    0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490,
212
    0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154,
213
    0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0,
214
    0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796,
215
    0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228,
216
    0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7,
217
    0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3,
218
    0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8,
219
    0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477,
220
    0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF,
221
    0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C,
222
    0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9,
223
    0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA,
224
    0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D,
225
    0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72,
226
    0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E,
227
    0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76,
228
    0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321,
229
    0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39,
230
    0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01,
231
    0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D,
232
    0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E,
233
    0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5,
234
    0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64,
235
    0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7,
236
    0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544,
237
    0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E,
238
    0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E,
239
    0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A,
240
    0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B,
241
    0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2,
242
    0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9,
243
    0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504,
244
    0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756,
245
    0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91},
246
247
   {0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252,
248
    0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A,
249
    0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020,
250
    0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141,
251
    0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444,
252
    0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424,
253
    0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A,
254
    0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757,
255
    0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383,
256
    0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A,
257
    0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9,
258
    0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656,
259
    0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1,
260
    0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898,
261
    0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414,
262
    0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3,
263
    0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1,
264
    0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989,
265
    0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5,
266
    0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282,
267
    0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E,
268
    0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E,
269
    0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202,
270
    0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC,
271
    0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565,
272
    0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A,
273
    0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808,
274
    0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272,
275
    0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A,
276
    0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969,
277
    0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505,
278
    0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5,
279
    0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D,
280
    0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343,
281
    0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF,
282
    0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3,
283
    0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F,
284
    0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646,
285
    0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6,
286
    0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF,
287
    0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A,
288
    0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7,
289
    0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8},
290
291
   {0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B,
292
    0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F,
293
    0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A,
294
    0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783,
295
    0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70,
296
    0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3,
297
    0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB,
298
    0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA,
299
    0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4,
300
    0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41,
301
    0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C,
302
    0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07,
303
    0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622,
304
    0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18,
305
    0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035,
306
    0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96,
307
    0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84,
308
    0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E,
309
    0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F,
310
    0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD,
311
    0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558,
312
    0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40,
313
    0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA,
314
    0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85,
315
    0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF,
316
    0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773,
317
    0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D,
318
    0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B,
319
    0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C,
320
    0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19,
321
    0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086,
322
    0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D,
323
    0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74,
324
    0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755,
325
    0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691,
326
    0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D,
327
    0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4,
328
    0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53,
329
    0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E,
330
    0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9,
331
    0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705,
332
    0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7,
333
    0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF},
334
335
   {0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98,
336
    0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866,
337
    0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643,
338
    0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77,
339
    0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9,
340
    0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C,
341
    0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3,
342
    0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216,
343
    0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F,
344
    0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25,
345
    0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF,
346
    0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7,
347
    0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4,
348
    0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E,
349
    0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA,
350
    0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C,
351
    0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12,
352
    0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A,
353
    0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D,
354
    0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE,
355
    0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A,
356
    0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C,
357
    0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B,
358
    0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4,
359
    0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B,
360
    0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3,
361
    0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE,
362
    0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB,
363
    0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85,
364
    0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA,
365
    0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E,
366
    0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8,
367
    0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33,
368
    0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC,
369
    0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718,
370
    0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA,
371
    0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8,
372
    0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872,
373
    0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882,
374
    0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D,
375
    0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10,
376
    0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6,
377
    0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8}
378
};
379

380
/* The exp_to_poly and poly_to_exp tables are used to perform efficient
381
 * operations in GF(2^8) represented as GF(2)[x]/w(x) where
382
 * w(x)=x^8+x^6+x^3+x^2+1.  We care about doing that because it's part of the
383
 * definition of the RS matrix in the key schedule.  Elements of that field
384
 * are polynomials of degree not greater than 7 and all coefficients 0 or 1,
385
 * which can be represented naturally by bytes (just substitute x=2).  In that
386
 * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8)
387
 * multiplication is inefficient without hardware support.  To multiply
388
 * faster, I make use of the fact x is a generator for the nonzero elements,
389
 * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
390
 * some n in 0..254.  Note that that caret is exponentiation in GF(2^8),
391
 * *not* polynomial notation.  So if I want to compute pq where p and q are
392
 * in GF(2^8), I can just say:
393
 *    1. if p=0 or q=0 then pq=0
394
 *    2. otherwise, find m and n such that p=x^m and q=x^n
395
 *    3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq
396
 * The translations in steps 2 and 3 are looked up in the tables
397
 * poly_to_exp (for step 2) and exp_to_poly (for step 3).  To see this
398
 * in action, look at the CALC_S macro.  As additional wrinkles, note that
399
 * one of my operands is always a constant, so the poly_to_exp lookup on it
400
 * is done in advance; I included the original values in the comments so
401
 * readers can have some chance of recognizing that this *is* the RS matrix
402
 * from the Twofish paper.  I've only included the table entries I actually
403
 * need; I never do a lookup on a variable input of zero and the biggest
404
 * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll
405
 * never sum to more than 491.  I'm repeating part of the exp_to_poly table
406
 * so that I don't have to do mod-255 reduction in the exponent arithmetic.
407
 * Since I know my constant operands are never zero, I only have to worry
408
 * about zero values in the variable operand, and I do it with a simple
409
 * conditional branch.  I know conditionals are expensive, but I couldn't
410
 * see a non-horrible way of avoiding them, and I did manage to group the
411
 * statements so that each if covers four group multiplications. */
412
413
static const u16 poly_to_exp[256] = {
414
   492,
415
   0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19,
416
   0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A,
417
   0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C,
418
   0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B,
419
   0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47,
420
   0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D,
421
   0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8,
422
   0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C,
423
   0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83,
424
   0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48,
425
   0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26,
426
   0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E,
427
   0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3,
428
   0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9,
429
   0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A,
430
   0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D,
431
   0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75,
432
   0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84,
433
   0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64,
434
   0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49,
435
   0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF,
436
   0x85, 0xC8, 0xA1
437
};
438
439
static const byte exp_to_poly[492 + 256] = {
440
   0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2,
441
   0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03,
442
   0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6,
443
   0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A,
444
   0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63,
445
   0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C,
446
   0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07,
447
   0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88,
448
   0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12,
449
   0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7,
450
   0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C,
451
   0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8,
452
   0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25,
453
   0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A,
454
   0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE,
455
   0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC,
456
   0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E,
457
   0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92,
458
   0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89,
459
   0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB,
460
   0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1,
461
   0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D,
462
   0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC,
463
   0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3,
464
   0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52,
465
   0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0,
466
   0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1,
467
   0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A,
468
   0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11,
469
   0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51,
470
   0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66,
471
   0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB,
472
   0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19,
473
   0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D,
474
   0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56,
475
   0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE,
476
   0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9,
477
   0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE,
478
   0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41,
479
   0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E,
480
   0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB,
481
};
482

483
484
/* The table constants are indices of
485
 * S-box entries, preprocessed through q0 and q1. */
486
static byte calc_sb_tbl[512] = {
487
    0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4,
488
    0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8,
489
    0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B,
490
    0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B,
491
    0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD,
492
    0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1,
493
    0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B,
494
    0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F,
495
    0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B,
496
    0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D,
497
    0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E,
498
    0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5,
499
    0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14,
500
    0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3,
501
    0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54,
502
    0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51,
503
    0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A,
504
    0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96,
505
    0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10,
506
    0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C,
507
    0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7,
508
    0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70,
509
    0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB,
510
    0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8,
511
    0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF,
512
    0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC,
513
    0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF,
514
    0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2,
515
    0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82,
516
    0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9,
517
    0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97,
518
    0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17,
519
    0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D,
520
    0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3,
521
    0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C,
522
    0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E,
523
    0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F,
524
    0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49,
525
    0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21,
526
    0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9,
527
    0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD,
528
    0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01,
529
    0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F,
530
    0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48,
531
    0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E,
532
    0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19,
533
    0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57,
534
    0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64,
535
    0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE,
536
    0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5,
537
    0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44,
538
    0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69,
539
    0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15,
540
    0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E,
541
    0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34,
542
    0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC,
543
    0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B,
544
    0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB,
545
    0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52,
546
    0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9,
547
    0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4,
548
    0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2,
549
    0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56,
550
    0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91
551
};
552
553
/* Macro to perform one column of the RS matrix multiplication.  The
554
 * parameters a, b, c, and d are the four bytes of output; i is the index
555
 * of the key bytes, and w, x, y, and z, are the column of constants from
556
 * the RS matrix, preprocessed through the poly_to_exp table. */
557
558
#define CALC_S(a, b, c, d, i, w, x, y, z) \
559
368
   { \
560
368
      tmp = poly_to_exp[key[i]]; \
561
368
      (a) ^= exp_to_poly[tmp + (w)]; \
562
368
      (b) ^= exp_to_poly[tmp + (x)]; \
563
368
      (c) ^= exp_to_poly[tmp + (y)]; \
564
368
      (d) ^= exp_to_poly[tmp + (z)]; \
565
368
   }
566
567
/* Macros to calculate the key-dependent S-boxes for a 128-bit key using
568
 * the S vector from CALC_S.  CALC_SB_2 computes a single entry in all
569
 * four S-boxes, where i is the index of the entry to compute, and a and b
570
 * are the index numbers preprocessed through the q0 and q1 tables
571
 * respectively.  CALC_SB is simply a convenience to make the code shorter;
572
 * it calls CALC_SB_2 four times with consecutive indices from i to i+3,
573
 * using the remaining parameters two by two. */
574
575
#define CALC_SB_2(i, a, b) \
576
2.30k
   ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \
577
2.30k
   ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \
578
2.30k
   ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \
579
2.30k
   ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh]
580
581
#define CALC_SB(i, a, b, c, d, e, f, g, h) \
582
   CALC_SB_2 (i, a, b); CALC_SB_2 ((i)+1, c, d); \
583
   CALC_SB_2 ((i)+2, e, f); CALC_SB_2 ((i)+3, g, h)
584
585
/* Macros exactly like CALC_SB and CALC_SB_2, but for 256-bit keys. */
586
587
#define CALC_SB256_2(i, a, b) \
588
1.79k
   ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \
589
1.79k
   ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \
590
1.79k
   ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \
591
1.79k
   ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp];
592
593
#define CALC_SB256(i, a, b, c, d, e, f, g, h) \
594
   CALC_SB256_2 (i, a, b); CALC_SB256_2 ((i)+1, c, d); \
595
   CALC_SB256_2 ((i)+2, e, f); CALC_SB256_2 ((i)+3, g, h)
596
597
/* Macros to calculate the whitening and round subkeys.  CALC_K_2 computes the
598
 * last two stages of the h() function for a given index (either 2i or 2i+1).
599
 * a, b, c, and d are the four bytes going into the last two stages.  For
600
 * 128-bit keys, this is the entire h() function and a and c are the index
601
 * preprocessed through q0 and q1 respectively; for longer keys they are the
602
 * output of previous stages.  j is the index of the first key byte to use.
603
 * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
604
 * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
605
 * rotations.  Its parameters are: a, the array to write the results into,
606
 * j, the index of the first output entry, k and l, the preprocessed indices
607
 * for index 2i, and m and n, the preprocessed indices for index 2i+1.
608
 * CALC_K256_2 expands CALC_K_2 to handle 256-bit keys, by doing two
609
 * additional lookup-and-XOR stages.  The parameters a and b are the index
610
 * preprocessed through q0 and q1 respectively; j is the index of the first
611
 * key byte to use.  CALC_K256 is identical to CALC_K but for using the
612
 * CALC_K256_2 macro instead of CALC_K_2. */
613
614
#define CALC_K_2(a, b, c, d, j) \
615
640
     mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \
616
640
   ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \
617
640
   ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \
618
640
   ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]]
619
620
#define CALC_K(a, j, k, l, m, n) \
621
180
   x = CALC_K_2 (k, l, k, l, 0); \
622
180
   y = CALC_K_2 (m, n, m, n, 4); \
623
180
   y = (y << 8) + (y >> 24); \
624
180
   x += y; y += x; ctx->a[j] = x; \
625
180
   ctx->a[(j) + 1] = (y << 9) + (y >> 23)
626
627
#define CALC_K256_2(a, b, j) \
628
280
   CALC_K_2 (q0[q1[b ^ key[(j) + 24]] ^ key[(j) + 16]], \
629
280
       q1[q1[a ^ key[(j) + 25]] ^ key[(j) + 17]], \
630
280
       q0[q0[a ^ key[(j) + 26]] ^ key[(j) + 18]], \
631
280
       q1[q0[b ^ key[(j) + 27]] ^ key[(j) + 19]], j)
632
633
#define CALC_K256(a, j, k, l, m, n) \
634
140
   x = CALC_K256_2 (k, l, 0); \
635
140
   y = CALC_K256_2 (m, n, 4); \
636
140
   y = (y << 8) + (y >> 24); \
637
140
   x += y; y += x; ctx->a[j] = x; \
638
140
   ctx->a[(j) + 1] = (y << 9) + (y >> 23)
639

640
641
642
/* Perform the key setup.  Note that this works only with 128- and 256-bit
643
 * keys, despite the API that looks like it might support other sizes. */
644
645
static gcry_err_code_t
646
do_twofish_setkey (TWOFISH_context *ctx, const byte *key, const unsigned keylen)
647
23
{
648
23
  int i, j, k;
649
650
  /* Temporaries for CALC_K. */
651
23
  u32 x, y;
652
653
  /* The S vector used to key the S-boxes, split up into individual bytes.
654
   * 128-bit keys use only sa through sh; 256-bit use all of them. */
655
23
  byte sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0;
656
23
  byte si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0;
657
658
  /* Temporary for CALC_S. */
659
23
  unsigned int tmp;
660
661
  /* Flags for self-test. */
662
23
  static int initialized = 0;
663
23
  static const char *selftest_failed=0;
664
665
  /* Check key length. */
666
23
  if( ( ( keylen - 16 ) | 16 ) != 16 )
667
7
    return GPG_ERR_INV_KEYLEN;
668
669
  /* Do self-test if necessary. */
670
16
  if (!initialized)
671
3
    {
672
3
      initialized = 1;
673
3
      selftest_failed = selftest ();
674
3
      if( selftest_failed )
675
0
        log_error("%s\n", selftest_failed );
676
3
    }
677
16
  if( selftest_failed )
678
0
    return GPG_ERR_SELFTEST_FAILED;
679
680
  /* Compute the first two words of the S vector.  The magic numbers are
681
   * the entries of the RS matrix, preprocessed through poly_to_exp.  The
682
   * numbers in the comments are the original (polynomial form) matrix
683
   * entries. */
684
16
  CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
685
16
  CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
686
16
  CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
687
16
  CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
688
16
  CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
689
16
  CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
690
16
  CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
691
16
  CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
692
16
  CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
693
16
  CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
694
16
  CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
695
16
  CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
696
16
  CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
697
16
  CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
698
16
  CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
699
16
  CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
700
701
16
  if (keylen == 32)  /* 256-bit key */
702
7
    {
703
      /* Calculate the remaining two words of the S vector */
704
7
      CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
705
7
      CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
706
7
      CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
707
7
      CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
708
7
      CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
709
7
      CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
710
7
      CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
711
7
      CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
712
7
      CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
713
7
      CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
714
7
      CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
715
7
      CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
716
7
      CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
717
7
      CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
718
7
      CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
719
7
      CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
720
721
      /* Compute the S-boxes. */
722
1.79k
      for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 )
723
1.79k
        {
724
1.79k
          CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
725
1.79k
  }
726
727
      /* Calculate whitening and round subkeys. */
728
35
      for (i = 0; i < 8; i += 2)
729
28
  {
730
28
    CALC_K256 ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] );
731
28
  }
732
119
      for (j = 0; j < 32; j += 2, i += 2)
733
112
  {
734
112
    CALC_K256 ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] );
735
112
  }
736
7
    }
737
9
  else
738
9
    {
739
      /* Compute the S-boxes. */
740
2.31k
      for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 )
741
2.30k
        {
742
2.30k
          CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
743
2.30k
        }
744
745
      /* Calculate whitening and round subkeys. */
746
45
      for (i = 0; i < 8; i += 2)
747
36
  {
748
36
    CALC_K ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] );
749
36
  }
750
153
      for (j = 0; j < 32; j += 2, i += 2)
751
144
  {
752
144
    CALC_K ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] );
753
144
  }
754
9
    }
755
756
16
  return 0;
757
16
}
758
759
static gcry_err_code_t
760
twofish_setkey (void *context, const byte *key, unsigned int keylen,
761
                cipher_bulk_ops_t *bulk_ops)
762
23
{
763
23
  TWOFISH_context *ctx = context;
764
23
  unsigned int hwfeatures = _gcry_get_hw_features ();
765
23
  int rc;
766
767
23
  rc = do_twofish_setkey (ctx, key, keylen);
768
769
23
#ifdef USE_AVX2
770
23
  ctx->use_avx2 = (hwfeatures & HWF_INTEL_AVX2) != 0;
771
23
#endif
772
773
  /* Setup bulk encryption routines.  */
774
23
  memset (bulk_ops, 0, sizeof(*bulk_ops));
775
23
  bulk_ops->cbc_dec = _gcry_twofish_cbc_dec;
776
23
  bulk_ops->cfb_dec = _gcry_twofish_cfb_dec;
777
23
  bulk_ops->ctr_enc = _gcry_twofish_ctr_enc;
778
23
  bulk_ops->ocb_crypt = _gcry_twofish_ocb_crypt;
779
23
  bulk_ops->ocb_auth = _gcry_twofish_ocb_auth;
780
23
  bulk_ops->xts_crypt = _gcry_twofish_xts_crypt;
781
23
  bulk_ops->ecb_crypt = _gcry_twofish_ecb_crypt;
782
783
23
  (void)hwfeatures;
784
785
23
  _gcry_burn_stack (23+6*sizeof(void*));
786
23
  return rc;
787
23
}
788
789
790
#ifdef USE_AVX2
791
/* Assembler implementations of Twofish using AVX2.  Process 16 block in
792
   parallel.
793
 */
794
extern void _gcry_twofish_avx2_blk16 (const TWOFISH_context *c, byte *out,
795
              const byte *in, int encrypt) ASM_FUNC_ABI;
796
797
extern void _gcry_twofish_avx2_ctr_enc(const TWOFISH_context *ctx,
798
               unsigned char *out,
799
               const unsigned char *in,
800
               unsigned char *ctr) ASM_FUNC_ABI;
801
802
extern void _gcry_twofish_avx2_cbc_dec(const TWOFISH_context *ctx,
803
               unsigned char *out,
804
               const unsigned char *in,
805
               unsigned char *iv) ASM_FUNC_ABI;
806
807
extern void _gcry_twofish_avx2_cfb_dec(const TWOFISH_context *ctx,
808
               unsigned char *out,
809
               const unsigned char *in,
810
               unsigned char *iv) ASM_FUNC_ABI;
811
812
extern void _gcry_twofish_avx2_ocb_enc(const TWOFISH_context *ctx,
813
               unsigned char *out,
814
               const unsigned char *in,
815
               unsigned char *offset,
816
               unsigned char *checksum,
817
               const u64 Ls[16]) ASM_FUNC_ABI;
818
819
extern void _gcry_twofish_avx2_ocb_dec(const TWOFISH_context *ctx,
820
               unsigned char *out,
821
               const unsigned char *in,
822
               unsigned char *offset,
823
               unsigned char *checksum,
824
               const u64 Ls[16]) ASM_FUNC_ABI;
825
826
extern void _gcry_twofish_avx2_ocb_auth(const TWOFISH_context *ctx,
827
          const unsigned char *abuf,
828
          unsigned char *offset,
829
          unsigned char *checksum,
830
          const u64 Ls[16]) ASM_FUNC_ABI;
831
#endif
832
833

834
#ifdef USE_AMD64_ASM
835
836
/* Assembly implementations of Twofish. */
837
extern void _gcry_twofish_amd64_encrypt_block(const TWOFISH_context *c,
838
                byte *out, const byte *in);
839
840
extern void _gcry_twofish_amd64_decrypt_block(const TWOFISH_context *c,
841
                byte *out, const byte *in);
842
843
/* These assembly implementations process three blocks in parallel. */
844
extern void _gcry_twofish_amd64_blk3(const TWOFISH_context *c, byte *out,
845
             const byte *in, int encrypt);
846
847
extern void _gcry_twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out,
848
          const byte *in, byte *ctr);
849
850
extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out,
851
          const byte *in, byte *iv);
852
853
extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out,
854
          const byte *in, byte *iv);
855
856
extern void _gcry_twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out,
857
          const byte *in, byte *offset,
858
          byte *checksum, const u64 Ls[3]);
859
860
extern void _gcry_twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out,
861
          const byte *in, byte *offset,
862
          byte *checksum, const u64 Ls[3]);
863
864
extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx,
865
           const byte *abuf, byte *offset,
866
           byte *checksum, const u64 Ls[3]);
867
868
static inline void
869
twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
870
3.70k
{
871
3.70k
  _gcry_twofish_amd64_encrypt_block(c, out, in);
872
3.70k
}
873
874
static inline void
875
twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
876
6
{
877
6
  _gcry_twofish_amd64_decrypt_block(c, out, in);
878
6
}
879
880
static inline void
881
twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in,
882
                      byte *ctr)
883
0
{
884
0
  _gcry_twofish_amd64_ctr_enc(c, out, in, ctr);
885
0
}
886
887
static inline void
888
twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in,
889
                      byte *iv)
890
0
{
891
0
  _gcry_twofish_amd64_cbc_dec(c, out, in, iv);
892
0
}
893
894
static inline void
895
twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in,
896
                      byte *iv)
897
0
{
898
0
  _gcry_twofish_amd64_cfb_dec(c, out, in, iv);
899
0
}
900
901
static inline void
902
twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in,
903
          byte *offset, byte *checksum, const u64 Ls[3])
904
0
{
905
0
  _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls);
906
0
}
907
908
static inline void
909
twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in,
910
          byte *offset, byte *checksum, const u64 Ls[3])
911
0
{
912
0
  _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls);
913
0
}
914
915
static inline void
916
twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf,
917
           byte *offset, byte *checksum, const u64 Ls[3])
918
0
{
919
0
  _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls);
920
0
}
921
922
#elif defined(USE_ARM_ASM)
923
924
/* Assembly implementations of Twofish. */
925
extern void _gcry_twofish_arm_encrypt_block(const TWOFISH_context *c,
926
                byte *out, const byte *in);
927
928
extern void _gcry_twofish_arm_decrypt_block(const TWOFISH_context *c,
929
                byte *out, const byte *in);
930
931
#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
932
933
/* Macros to compute the g() function in the encryption and decryption
934
 * rounds.  G1 is the straight g() function; G2 includes the 8-bit
935
 * rotation for the high 32-bit word. */
936
937
#define G1(a) \
938
     (ctx->s[0][(a) & 0xFF]) ^ (ctx->s[1][((a) >> 8) & 0xFF]) \
939
   ^ (ctx->s[2][((a) >> 16) & 0xFF]) ^ (ctx->s[3][(a) >> 24])
940
941
#define G2(b) \
942
     (ctx->s[1][(b) & 0xFF]) ^ (ctx->s[2][((b) >> 8) & 0xFF]) \
943
   ^ (ctx->s[3][((b) >> 16) & 0xFF]) ^ (ctx->s[0][(b) >> 24])
944
945
/* Encryption and decryption Feistel rounds.  Each one calls the two g()
946
 * macros, does the PHT, and performs the XOR and the appropriate bit
947
 * rotations.  The parameters are the round number (used to select subkeys),
948
 * and the four 32-bit chunks of the text. */
949
950
#define ENCROUND(n, a, b, c, d) \
951
   x = G1 (a); y = G2 (b); \
952
   x += y; y += x + ctx->k[2 * (n) + 1]; \
953
   (c) ^= x + ctx->k[2 * (n)]; \
954
   (c) = ((c) >> 1) + ((c) << 31); \
955
   (d) = (((d) << 1)+((d) >> 31)) ^ y
956
957
#define DECROUND(n, a, b, c, d) \
958
   x = G1 (a); y = G2 (b); \
959
   x += y; y += x; \
960
   (d) ^= y + ctx->k[2 * (n) + 1]; \
961
   (d) = ((d) >> 1) + ((d) << 31); \
962
   (c) = (((c) << 1)+((c) >> 31)); \
963
   (c) ^= (x + ctx->k[2 * (n)])
964
965
/* Encryption and decryption cycles; each one is simply two Feistel rounds
966
 * with the 32-bit chunks re-ordered to simulate the "swap" */
967
968
#define ENCCYCLE(n) \
969
   ENCROUND (2 * (n), a, b, c, d); \
970
   ENCROUND (2 * (n) + 1, c, d, a, b)
971
972
#define DECCYCLE(n) \
973
   DECROUND (2 * (n) + 1, c, d, a, b); \
974
   DECROUND (2 * (n), a, b, c, d)
975
976
/* Macros to convert the input and output bytes into 32-bit words,
977
 * and simultaneously perform the whitening step.  INPACK packs word
978
 * number n into the variable named by x, using whitening subkey number m.
979
 * OUTUNPACK unpacks word number n from the variable named by x, using
980
 * whitening subkey number m. */
981
982
#define INPACK(n, x, m) \
983
   x = buf_get_le32(in + (n) * 4); \
984
   x ^= ctx->w[m]
985
986
#define OUTUNPACK(n, x, m) \
987
   x ^= ctx->w[m]; \
988
   buf_put_le32(out + (n) * 4, x)
989
990
#endif /*!USE_AMD64_ASM*/
991
992

993
/* Encrypt one block.  in and out may be the same. */
994
995
#ifdef USE_AMD64_ASM
996
997
static unsigned int
998
twofish_encrypt (void *context, byte *out, const byte *in)
999
3.70k
{
1000
3.70k
  TWOFISH_context *ctx = context;
1001
3.70k
  twofish_amd64_encrypt_block(ctx, out, in);
1002
3.70k
  return /*burn_stack*/ (4*sizeof (void*));
1003
3.70k
}
1004
1005
#elif defined(USE_ARM_ASM)
1006
1007
static unsigned int
1008
twofish_encrypt (void *context, byte *out, const byte *in)
1009
{
1010
  TWOFISH_context *ctx = context;
1011
  _gcry_twofish_arm_encrypt_block(ctx, out, in);
1012
  return /*burn_stack*/ (4*sizeof (void*));
1013
}
1014
1015
#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
1016
1017
static void
1018
do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
1019
{
1020
  /* The four 32-bit chunks of the text. */
1021
  u32 a, b, c, d;
1022
1023
  /* Temporaries used by the round function. */
1024
  u32 x, y;
1025
1026
  /* Input whitening and packing. */
1027
  INPACK (0, a, 0);
1028
  INPACK (1, b, 1);
1029
  INPACK (2, c, 2);
1030
  INPACK (3, d, 3);
1031
1032
  /* Encryption Feistel cycles. */
1033
  ENCCYCLE (0);
1034
  ENCCYCLE (1);
1035
  ENCCYCLE (2);
1036
  ENCCYCLE (3);
1037
  ENCCYCLE (4);
1038
  ENCCYCLE (5);
1039
  ENCCYCLE (6);
1040
  ENCCYCLE (7);
1041
1042
  /* Output whitening and unpacking. */
1043
  OUTUNPACK (0, c, 4);
1044
  OUTUNPACK (1, d, 5);
1045
  OUTUNPACK (2, a, 6);
1046
  OUTUNPACK (3, b, 7);
1047
}
1048
1049
static unsigned int
1050
twofish_encrypt (void *context, byte *out, const byte *in)
1051
{
1052
  TWOFISH_context *ctx = context;
1053
  do_twofish_encrypt (ctx, out, in);
1054
  return /*burn_stack*/ (24+3*sizeof (void*));
1055
}
1056
1057
#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
1058
1059

1060
/* Decrypt one block.  in and out may be the same. */
1061
1062
#ifdef USE_AMD64_ASM
1063
1064
static unsigned int
1065
twofish_decrypt (void *context, byte *out, const byte *in)
1066
6
{
1067
6
  TWOFISH_context *ctx = context;
1068
6
  twofish_amd64_decrypt_block(ctx, out, in);
1069
6
  return /*burn_stack*/ (4*sizeof (void*));
1070
6
}
1071
1072
#elif defined(USE_ARM_ASM)
1073
1074
static unsigned int
1075
twofish_decrypt (void *context, byte *out, const byte *in)
1076
{
1077
  TWOFISH_context *ctx = context;
1078
  _gcry_twofish_arm_decrypt_block(ctx, out, in);
1079
  return /*burn_stack*/ (4*sizeof (void*));
1080
}
1081
1082
#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
1083
1084
static void
1085
do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
1086
{
1087
  /* The four 32-bit chunks of the text. */
1088
  u32 a, b, c, d;
1089
1090
  /* Temporaries used by the round function. */
1091
  u32 x, y;
1092
1093
  /* Input whitening and packing. */
1094
  INPACK (0, c, 4);
1095
  INPACK (1, d, 5);
1096
  INPACK (2, a, 6);
1097
  INPACK (3, b, 7);
1098
1099
  /* Encryption Feistel cycles. */
1100
  DECCYCLE (7);
1101
  DECCYCLE (6);
1102
  DECCYCLE (5);
1103
  DECCYCLE (4);
1104
  DECCYCLE (3);
1105
  DECCYCLE (2);
1106
  DECCYCLE (1);
1107
  DECCYCLE (0);
1108
1109
  /* Output whitening and unpacking. */
1110
  OUTUNPACK (0, a, 0);
1111
  OUTUNPACK (1, b, 1);
1112
  OUTUNPACK (2, c, 2);
1113
  OUTUNPACK (3, d, 3);
1114
}
1115
1116
static unsigned int
1117
twofish_decrypt (void *context, byte *out, const byte *in)
1118
{
1119
  TWOFISH_context *ctx = context;
1120
1121
  do_twofish_decrypt (ctx, out, in);
1122
  return /*burn_stack*/ (24+3*sizeof (void*));
1123
}
1124
1125
#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
1126
1127

1128
1129
/* Bulk encryption of complete blocks in CTR mode.  This function is only
1130
   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
1131
   of size TWOFISH_BLOCKSIZE. */
1132
static void
1133
_gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
1134
          const void *inbuf_arg, size_t nblocks)
1135
0
{
1136
0
  TWOFISH_context *ctx = context;
1137
0
  unsigned char *outbuf = outbuf_arg;
1138
0
  const unsigned char *inbuf = inbuf_arg;
1139
0
  unsigned char tmpbuf[TWOFISH_BLOCKSIZE];
1140
0
  unsigned int burn, burn_stack_depth = 0;
1141
1142
0
#ifdef USE_AVX2
1143
0
  if (ctx->use_avx2)
1144
0
    {
1145
0
      int did_use_avx2 = 0;
1146
1147
      /* Process data in 16 block chunks. */
1148
0
      while (nblocks >= 16)
1149
0
        {
1150
0
          _gcry_twofish_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
1151
1152
0
          nblocks -= 16;
1153
0
          outbuf += 16 * TWOFISH_BLOCKSIZE;
1154
0
          inbuf  += 16 * TWOFISH_BLOCKSIZE;
1155
0
          did_use_avx2 = 1;
1156
0
        }
1157
1158
0
      if (did_use_avx2)
1159
0
        {
1160
          /* twofish-avx2 assembly code does not use stack */
1161
0
          if (nblocks == 0)
1162
0
            burn_stack_depth = 0;
1163
0
        }
1164
0
    }
1165
0
#endif
1166
1167
0
#ifdef USE_AMD64_ASM
1168
0
  {
1169
    /* Process data in 3 block chunks. */
1170
0
    while (nblocks >= 3)
1171
0
      {
1172
0
        twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
1173
1174
0
        nblocks -= 3;
1175
0
        outbuf += 3 * TWOFISH_BLOCKSIZE;
1176
0
        inbuf += 3 * TWOFISH_BLOCKSIZE;
1177
1178
0
        burn = 8 * sizeof(void*);
1179
0
        if (burn > burn_stack_depth)
1180
0
          burn_stack_depth = burn;
1181
0
      }
1182
1183
    /* Use generic code to handle smaller chunks... */
1184
    /* TODO: use caching instead? */
1185
0
  }
1186
0
#endif
1187
1188
0
  for ( ;nblocks; nblocks-- )
1189
0
    {
1190
      /* Encrypt the counter. */
1191
0
      burn = twofish_encrypt(ctx, tmpbuf, ctr);
1192
0
      if (burn > burn_stack_depth)
1193
0
        burn_stack_depth = burn;
1194
1195
      /* XOR the input with the encrypted counter and store in output.  */
1196
0
      cipher_block_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE);
1197
0
      outbuf += TWOFISH_BLOCKSIZE;
1198
0
      inbuf  += TWOFISH_BLOCKSIZE;
1199
      /* Increment the counter.  */
1200
0
      cipher_block_add(ctr, 1, TWOFISH_BLOCKSIZE);
1201
0
    }
1202
1203
0
  wipememory(tmpbuf, sizeof(tmpbuf));
1204
0
  _gcry_burn_stack(burn_stack_depth);
1205
0
}
1206
1207
1208
/* Bulk decryption of complete blocks in CBC mode.  This function is only
1209
   intended for the bulk encryption feature of cipher.c. */
1210
static void
1211
_gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
1212
          const void *inbuf_arg, size_t nblocks)
1213
0
{
1214
0
  TWOFISH_context *ctx = context;
1215
0
  unsigned char *outbuf = outbuf_arg;
1216
0
  const unsigned char *inbuf = inbuf_arg;
1217
0
  unsigned char savebuf[TWOFISH_BLOCKSIZE];
1218
0
  unsigned int burn, burn_stack_depth = 0;
1219
1220
0
#ifdef USE_AVX2
1221
0
  if (ctx->use_avx2)
1222
0
    {
1223
0
      int did_use_avx2 = 0;
1224
1225
      /* Process data in 16 block chunks. */
1226
0
      while (nblocks >= 16)
1227
0
        {
1228
0
          _gcry_twofish_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
1229
1230
0
          nblocks -= 16;
1231
0
          outbuf += 16 * TWOFISH_BLOCKSIZE;
1232
0
          inbuf  += 16 * TWOFISH_BLOCKSIZE;
1233
0
          did_use_avx2 = 1;
1234
0
        }
1235
1236
0
      if (did_use_avx2)
1237
0
        {
1238
          /* twofish-avx2 assembly code does not use stack */
1239
0
          if (nblocks == 0)
1240
0
            burn_stack_depth = 0;
1241
0
        }
1242
0
    }
1243
0
#endif
1244
1245
0
#ifdef USE_AMD64_ASM
1246
0
  {
1247
    /* Process data in 3 block chunks. */
1248
0
    while (nblocks >= 3)
1249
0
      {
1250
0
        twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
1251
1252
0
        nblocks -= 3;
1253
0
        outbuf += 3 * TWOFISH_BLOCKSIZE;
1254
0
        inbuf += 3 * TWOFISH_BLOCKSIZE;
1255
1256
0
        burn = 9 * sizeof(void*);
1257
0
        if (burn > burn_stack_depth)
1258
0
          burn_stack_depth = burn;
1259
0
      }
1260
1261
    /* Use generic code to handle smaller chunks... */
1262
0
  }
1263
0
#endif
1264
1265
0
  for ( ;nblocks; nblocks-- )
1266
0
    {
1267
      /* INBUF is needed later and it may be identical to OUTBUF, so store
1268
         the intermediate result to SAVEBUF.  */
1269
0
      burn = twofish_decrypt (ctx, savebuf, inbuf);
1270
0
      if (burn > burn_stack_depth)
1271
0
        burn_stack_depth = burn;
1272
1273
0
      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE);
1274
0
      inbuf += TWOFISH_BLOCKSIZE;
1275
0
      outbuf += TWOFISH_BLOCKSIZE;
1276
0
    }
1277
1278
0
  wipememory(savebuf, sizeof(savebuf));
1279
0
  _gcry_burn_stack(burn_stack_depth);
1280
0
}
1281
1282
1283
/* Bulk decryption of complete blocks in CFB mode.  This function is only
1284
   intended for the bulk encryption feature of cipher.c. */
1285
static void
1286
_gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
1287
        const void *inbuf_arg, size_t nblocks)
1288
0
{
1289
0
  TWOFISH_context *ctx = context;
1290
0
  unsigned char *outbuf = outbuf_arg;
1291
0
  const unsigned char *inbuf = inbuf_arg;
1292
0
  unsigned int burn, burn_stack_depth = 0;
1293
1294
0
#ifdef USE_AVX2
1295
0
  if (ctx->use_avx2)
1296
0
    {
1297
0
      int did_use_avx2 = 0;
1298
1299
      /* Process data in 16 block chunks. */
1300
0
      while (nblocks >= 16)
1301
0
        {
1302
0
          _gcry_twofish_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
1303
1304
0
          nblocks -= 16;
1305
0
          outbuf += 16 * TWOFISH_BLOCKSIZE;
1306
0
          inbuf  += 16 * TWOFISH_BLOCKSIZE;
1307
0
          did_use_avx2 = 1;
1308
0
        }
1309
1310
0
      if (did_use_avx2)
1311
0
        {
1312
          /* twofish-avx2 assembly code does not use stack */
1313
0
          if (nblocks == 0)
1314
0
            burn_stack_depth = 0;
1315
0
        }
1316
0
    }
1317
0
#endif
1318
1319
0
#ifdef USE_AMD64_ASM
1320
0
  {
1321
    /* Process data in 3 block chunks. */
1322
0
    while (nblocks >= 3)
1323
0
      {
1324
0
        twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
1325
1326
0
        nblocks -= 3;
1327
0
        outbuf += 3 * TWOFISH_BLOCKSIZE;
1328
0
        inbuf += 3 * TWOFISH_BLOCKSIZE;
1329
1330
0
        burn = 8 * sizeof(void*);
1331
0
        if (burn > burn_stack_depth)
1332
0
          burn_stack_depth = burn;
1333
0
      }
1334
1335
    /* Use generic code to handle smaller chunks... */
1336
0
  }
1337
0
#endif
1338
1339
0
  for ( ;nblocks; nblocks-- )
1340
0
    {
1341
0
      burn = twofish_encrypt(ctx, iv, iv);
1342
0
      if (burn > burn_stack_depth)
1343
0
        burn_stack_depth = burn;
1344
1345
0
      cipher_block_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE);
1346
0
      outbuf += TWOFISH_BLOCKSIZE;
1347
0
      inbuf += TWOFISH_BLOCKSIZE;
1348
0
    }
1349
1350
0
  _gcry_burn_stack(burn_stack_depth);
1351
0
}
1352
1353
/* Bulk encryption/decryption of complete blocks in OCB mode. */
1354
static size_t
1355
_gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
1356
      const void *inbuf_arg, size_t nblocks, int encrypt)
1357
0
{
1358
0
#ifdef USE_AMD64_ASM
1359
0
  TWOFISH_context *ctx = (void *)&c->context.c;
1360
0
  unsigned char *outbuf = outbuf_arg;
1361
0
  const unsigned char *inbuf = inbuf_arg;
1362
0
  unsigned int burn, burn_stack_depth = 0;
1363
0
  u64 blkn = c->u_mode.ocb.data_nblocks;
1364
1365
0
#ifdef USE_AVX2
1366
0
  if (ctx->use_avx2)
1367
0
    {
1368
0
      int did_use_avx2 = 0;
1369
0
      u64 Ls[16];
1370
0
      u64 *l;
1371
1372
0
      if (nblocks >= 16)
1373
0
  {
1374
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1375
1376
    /* Process data in 16 block chunks. */
1377
0
    while (nblocks >= 16)
1378
0
      {
1379
0
        blkn += 16;
1380
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1381
1382
0
        if (encrypt)
1383
0
    _gcry_twofish_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
1384
0
            c->u_ctr.ctr, Ls);
1385
0
        else
1386
0
    _gcry_twofish_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
1387
0
            c->u_ctr.ctr, Ls);
1388
1389
0
        nblocks -= 16;
1390
0
        outbuf += 16 * TWOFISH_BLOCKSIZE;
1391
0
        inbuf  += 16 * TWOFISH_BLOCKSIZE;
1392
0
        did_use_avx2 = 1;
1393
0
      }
1394
0
  }
1395
1396
0
      if (did_use_avx2)
1397
0
  {
1398
    /* twofish-avx2 assembly code does not use stack */
1399
0
    if (nblocks == 0)
1400
0
      burn_stack_depth = 0;
1401
0
  }
1402
0
    }
1403
0
#endif
1404
1405
0
  {
1406
    /* Use u64 to store pointers for x32 support (assembly function
1407
      * assumes 64-bit pointers). */
1408
0
    u64 Ls[3];
1409
1410
    /* Process data in 3 block chunks. */
1411
0
    while (nblocks >= 3)
1412
0
      {
1413
0
  Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
1414
0
  Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
1415
0
  Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
1416
0
  blkn += 3;
1417
1418
0
  if (encrypt)
1419
0
    twofish_amd64_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
1420
0
        Ls);
1421
0
  else
1422
0
    twofish_amd64_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
1423
0
        Ls);
1424
1425
0
  nblocks -= 3;
1426
0
  outbuf += 3 * TWOFISH_BLOCKSIZE;
1427
0
  inbuf  += 3 * TWOFISH_BLOCKSIZE;
1428
1429
0
  burn = 8 * sizeof(void*);
1430
0
  if (burn > burn_stack_depth)
1431
0
    burn_stack_depth = burn;
1432
0
      }
1433
1434
    /* Use generic code to handle smaller chunks... */
1435
0
  }
1436
1437
0
  c->u_mode.ocb.data_nblocks = blkn;
1438
1439
0
  if (burn_stack_depth)
1440
0
    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
1441
#else
1442
  (void)c;
1443
  (void)outbuf_arg;
1444
  (void)inbuf_arg;
1445
  (void)encrypt;
1446
#endif
1447
1448
0
  return nblocks;
1449
0
}
1450
1451
/* Bulk authentication of complete blocks in OCB mode. */
1452
static size_t
1453
_gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
1454
      size_t nblocks)
1455
0
{
1456
0
#ifdef USE_AMD64_ASM
1457
0
  TWOFISH_context *ctx = (void *)&c->context.c;
1458
0
  const unsigned char *abuf = abuf_arg;
1459
0
  unsigned int burn, burn_stack_depth = 0;
1460
0
  u64 blkn = c->u_mode.ocb.aad_nblocks;
1461
1462
0
#ifdef USE_AVX2
1463
0
  if (ctx->use_avx2)
1464
0
    {
1465
0
      int did_use_avx2 = 0;
1466
0
      u64 Ls[16];
1467
0
      u64 *l;
1468
1469
0
      if (nblocks >= 16)
1470
0
  {
1471
0
          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
1472
1473
    /* Process data in 16 block chunks. */
1474
0
    while (nblocks >= 16)
1475
0
      {
1476
0
        blkn += 16;
1477
0
        *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1478
1479
0
        _gcry_twofish_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
1480
0
            c->u_mode.ocb.aad_sum, Ls);
1481
1482
0
        nblocks -= 16;
1483
0
        abuf += 16 * TWOFISH_BLOCKSIZE;
1484
0
        did_use_avx2 = 1;
1485
0
      }
1486
0
  }
1487
1488
0
      if (did_use_avx2)
1489
0
  {
1490
    /* twofish-avx2 assembly code does not use stack */
1491
0
    if (nblocks == 0)
1492
0
      burn_stack_depth = 0;
1493
0
  }
1494
1495
      /* Use generic code to handle smaller chunks... */
1496
0
    }
1497
0
#endif
1498
1499
0
  {
1500
    /* Use u64 to store pointers for x32 support (assembly function
1501
      * assumes 64-bit pointers). */
1502
0
    u64 Ls[3];
1503
1504
    /* Process data in 3 block chunks. */
1505
0
    while (nblocks >= 3)
1506
0
      {
1507
0
  Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
1508
0
  Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
1509
0
  Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
1510
0
  blkn += 3;
1511
1512
0
  twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
1513
0
             c->u_mode.ocb.aad_sum, Ls);
1514
1515
0
  nblocks -= 3;
1516
0
  abuf += 3 * TWOFISH_BLOCKSIZE;
1517
1518
0
  burn = 8 * sizeof(void*);
1519
0
  if (burn > burn_stack_depth)
1520
0
    burn_stack_depth = burn;
1521
0
      }
1522
1523
    /* Use generic code to handle smaller chunks... */
1524
0
  }
1525
1526
0
  c->u_mode.ocb.aad_nblocks = blkn;
1527
1528
0
  if (burn_stack_depth)
1529
0
    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
1530
#else
1531
  (void)c;
1532
  (void)abuf_arg;
1533
#endif
1534
1535
0
  return nblocks;
1536
0
}
1537
1538
1539
static unsigned int
1540
twofish_crypt_blk1_16(void *context, byte *out, const byte *in,
1541
          size_t num_blks, int encrypt)
1542
0
{
1543
0
  TWOFISH_context *ctx = context;
1544
0
  unsigned int burn, burn_stack_depth = 0;
1545
1546
0
#ifdef USE_AVX2
1547
0
  if (num_blks == 16 && ctx->use_avx2)
1548
0
    {
1549
0
      _gcry_twofish_avx2_blk16 (ctx, out, in, encrypt);
1550
0
      return 0;
1551
0
    }
1552
0
#endif
1553
1554
0
#ifdef USE_AMD64_ASM
1555
0
  while (num_blks >= 3)
1556
0
    {
1557
0
      _gcry_twofish_amd64_blk3 (ctx, out, in, encrypt);
1558
0
      burn = 8 * sizeof(void *);
1559
0
      burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth;
1560
0
      out += 3 * TWOFISH_BLOCKSIZE;
1561
0
      in += 3 * TWOFISH_BLOCKSIZE;
1562
0
      num_blks -= 3;
1563
0
    }
1564
0
#endif
1565
1566
0
  while (num_blks >= 1)
1567
0
    {
1568
0
      if (encrypt)
1569
0
  burn = twofish_encrypt((void *)ctx, out, in);
1570
0
      else
1571
0
  burn = twofish_decrypt((void *)ctx, out, in);
1572
1573
0
      burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth;
1574
0
      out += TWOFISH_BLOCKSIZE;
1575
0
      in += TWOFISH_BLOCKSIZE;
1576
0
      num_blks--;
1577
0
    }
1578
1579
0
  return burn_stack_depth;
1580
0
}
1581
1582
static unsigned int
1583
twofish_encrypt_blk1_16(void *ctx, byte *out, const byte *in,
1584
      size_t num_blks)
1585
0
{
1586
0
  return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 1);
1587
0
}
1588
1589
static unsigned int
1590
twofish_decrypt_blk1_16(void *ctx, byte *out, const byte *in,
1591
      size_t num_blks)
1592
0
{
1593
0
  return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 0);
1594
0
}
1595
1596
1597
/* Bulk encryption/decryption of complete blocks in XTS mode. */
1598
static void
1599
_gcry_twofish_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg,
1600
       const void *inbuf_arg, size_t nblocks, int encrypt)
1601
0
{
1602
0
  TWOFISH_context *ctx = context;
1603
0
  unsigned char *outbuf = outbuf_arg;
1604
0
  const unsigned char *inbuf = inbuf_arg;
1605
0
  int burn_stack_depth = 0;
1606
1607
  /* Process remaining blocks. */
1608
0
  if (nblocks)
1609
0
    {
1610
0
      unsigned char tmpbuf[16 * 16];
1611
0
      unsigned int tmp_used = 16;
1612
0
      size_t tmpbufsize = 15 * 16;
1613
0
      size_t nburn;
1614
1615
0
#ifdef USE_AVX2
1616
0
      if (ctx->use_avx2)
1617
0
  tmpbufsize = 16 * 16;
1618
0
#endif
1619
1620
0
      nburn = bulk_xts_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16
1621
0
                                              : twofish_decrypt_blk1_16,
1622
0
                                 outbuf, inbuf, nblocks,
1623
0
                                 tweak, tmpbuf, tmpbufsize / 16,
1624
0
                                 &tmp_used);
1625
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1626
1627
0
      wipememory(tmpbuf, tmp_used);
1628
0
    }
1629
1630
0
  if (burn_stack_depth)
1631
0
    _gcry_burn_stack(burn_stack_depth);
1632
0
}
1633
1634
1635
/* Bulk encryption/decryption in ECB mode. */
1636
static void
1637
_gcry_twofish_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg,
1638
       size_t nblocks, int encrypt)
1639
0
{
1640
0
  TWOFISH_context *ctx = context;
1641
0
  unsigned char *outbuf = outbuf_arg;
1642
0
  const unsigned char *inbuf = inbuf_arg;
1643
0
  int burn_stack_depth = 0;
1644
1645
  /* Process remaining blocks. */
1646
0
  if (nblocks)
1647
0
    {
1648
0
      size_t fn_maxblocks = 15;
1649
0
      size_t nburn;
1650
1651
0
#ifdef USE_AVX2
1652
0
      if (ctx->use_avx2)
1653
0
  fn_maxblocks = 16;
1654
0
#endif
1655
1656
0
      nburn = bulk_ecb_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16
1657
0
                                              : twofish_decrypt_blk1_16,
1658
0
                                 outbuf, inbuf, nblocks, fn_maxblocks);
1659
0
      burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
1660
0
    }
1661
1662
0
  if (burn_stack_depth)
1663
0
    _gcry_burn_stack(burn_stack_depth);
1664
0
}
1665
1666
1667

1668
/* Test a single encryption and decryption with each key size. */
1669
1670
static const char*
1671
selftest (void)
1672
3
{
1673
3
  TWOFISH_context ctx; /* Expanded key. */
1674
3
  byte scratch[16];    /* Encryption/decryption result buffer. */
1675
3
  cipher_bulk_ops_t bulk_ops;
1676
1677
  /* Test vectors for single encryption/decryption.  Note that I am using
1678
   * the vectors from the Twofish paper's "known answer test", I=3 for
1679
   * 128-bit and I=4 for 256-bit, instead of the all-0 vectors from the
1680
   * "intermediate value test", because an all-0 key would trigger all the
1681
   * special cases in the RS matrix multiply, leaving the math untested. */
1682
3
  static  byte plaintext[16] = {
1683
3
    0xD4, 0x91, 0xDB, 0x16, 0xE7, 0xB1, 0xC3, 0x9E,
1684
3
    0x86, 0xCB, 0x08, 0x6B, 0x78, 0x9F, 0x54, 0x19
1685
3
  };
1686
3
  static byte key[16] = {
1687
3
    0x9F, 0x58, 0x9F, 0x5C, 0xF6, 0x12, 0x2C, 0x32,
1688
3
    0xB6, 0xBF, 0xEC, 0x2F, 0x2A, 0xE8, 0xC3, 0x5A
1689
3
  };
1690
3
  static const byte ciphertext[16] = {
1691
3
    0x01, 0x9F, 0x98, 0x09, 0xDE, 0x17, 0x11, 0x85,
1692
3
    0x8F, 0xAA, 0xC3, 0xA3, 0xBA, 0x20, 0xFB, 0xC3
1693
3
  };
1694
3
  static byte plaintext_256[16] = {
1695
3
    0x90, 0xAF, 0xE9, 0x1B, 0xB2, 0x88, 0x54, 0x4F,
1696
3
    0x2C, 0x32, 0xDC, 0x23, 0x9B, 0x26, 0x35, 0xE6
1697
3
  };
1698
3
  static byte key_256[32] = {
1699
3
    0xD4, 0x3B, 0xB7, 0x55, 0x6E, 0xA3, 0x2E, 0x46,
1700
3
    0xF2, 0xA2, 0x82, 0xB7, 0xD4, 0x5B, 0x4E, 0x0D,
1701
3
    0x57, 0xFF, 0x73, 0x9D, 0x4D, 0xC9, 0x2C, 0x1B,
1702
3
    0xD7, 0xFC, 0x01, 0x70, 0x0C, 0xC8, 0x21, 0x6F
1703
3
  };
1704
3
  static const byte ciphertext_256[16] = {
1705
3
    0x6C, 0xB4, 0x56, 0x1C, 0x40, 0xBF, 0x0A, 0x97,
1706
3
    0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA
1707
3
  };
1708
1709
3
  twofish_setkey (&ctx, key, sizeof(key), &bulk_ops);
1710
3
  twofish_encrypt (&ctx, scratch, plaintext);
1711
3
  if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
1712
0
    return "Twofish-128 test encryption failed.";
1713
3
  twofish_decrypt (&ctx, scratch, scratch);
1714
3
  if (memcmp (scratch, plaintext, sizeof (plaintext)))
1715
0
    return "Twofish-128 test decryption failed.";
1716
1717
3
  twofish_setkey (&ctx, key_256, sizeof(key_256), &bulk_ops);
1718
3
  twofish_encrypt (&ctx, scratch, plaintext_256);
1719
3
  if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
1720
0
    return "Twofish-256 test encryption failed.";
1721
3
  twofish_decrypt (&ctx, scratch, scratch);
1722
3
  if (memcmp (scratch, plaintext_256, sizeof (plaintext_256)))
1723
0
    return "Twofish-256 test decryption failed.";
1724
1725
3
  return NULL;
1726
3
}
1727

1728
/* More complete test program.  This does 1000 encryptions and decryptions
1729
 * with each of 250 128-bit keys and 2000 encryptions and decryptions with
1730
 * each of 125 256-bit keys, using a feedback scheme similar to a Feistel
1731
 * cipher, so as to be sure of testing all the table entries pretty
1732
 * thoroughly.  We keep changing the keys so as to get a more meaningful
1733
 * performance number, since the key setup is non-trivial for Twofish. */
1734
1735
#ifdef TEST
1736
1737
#include <stdio.h>
1738
#include <string.h>
1739
#include <time.h>
1740
1741
int
1742
main()
1743
{
1744
  TWOFISH_context ctx;     /* Expanded key. */
1745
  int i, j;                /* Loop counters. */
1746
  cipher_bulk_ops_t bulk_ops;
1747
1748
  const char *encrypt_msg; /* Message to print regarding encryption test;
1749
                            * the printf is done outside the loop to avoid
1750
                            * stuffing up the timing. */
1751
  clock_t timer; /* For computing elapsed time. */
1752
1753
  /* Test buffer. */
1754
  byte buffer[4][16] = {
1755
    {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
1756
     0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF},
1757
    {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78,
1758
     0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0},
1759
    {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
1760
     0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10},
1761
    {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10,
1762
     0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98}
1763
  };
1764
1765
  /* Expected outputs for the million-operation test */
1766
  static const byte test_encrypt[4][16] = {
1767
    {0xC8, 0x23, 0xB8, 0xB7, 0x6B, 0xFE, 0x91, 0x13,
1768
     0x2F, 0xA7, 0x5E, 0xE6, 0x94, 0x77, 0x6F, 0x6B},
1769
    {0x90, 0x36, 0xD8, 0x29, 0xD5, 0x96, 0xC2, 0x8E,
1770
     0xE4, 0xFF, 0x76, 0xBC, 0xE5, 0x77, 0x88, 0x27},
1771
    {0xB8, 0x78, 0x69, 0xAF, 0x42, 0x8B, 0x48, 0x64,
1772
     0xF7, 0xE9, 0xF3, 0x9C, 0x42, 0x18, 0x7B, 0x73},
1773
    {0x7A, 0x88, 0xFB, 0xEB, 0x90, 0xA4, 0xB4, 0xA8,
1774
     0x43, 0xA3, 0x1D, 0xF1, 0x26, 0xC4, 0x53, 0x57}
1775
  };
1776
  static const byte test_decrypt[4][16] = {
1777
    {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
1778
     0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF},
1779
    {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78,
1780
     0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0},
1781
    {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
1782
     0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10},
1783
    {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10,
1784
     0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98}
1785
  };
1786
1787
  /* Start the timer ticking. */
1788
  timer = clock ();
1789
1790
  /* Encryption test. */
1791
  for (i = 0; i < 125; i++)
1792
    {
1793
      twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), &bulk_ops);
1794
      for (j = 0; j < 1000; j++)
1795
        twofish_encrypt (&ctx, buffer[2], buffer[2]);
1796
      twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), &bulk_ops);
1797
      for (j = 0; j < 1000; j++)
1798
        twofish_encrypt (&ctx, buffer[3], buffer[3]);
1799
      twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, &bulk_ops);
1800
      for (j = 0; j < 1000; j++) {
1801
        twofish_encrypt (&ctx, buffer[0], buffer[0]);
1802
        twofish_encrypt (&ctx, buffer[1], buffer[1]);
1803
      }
1804
    }
1805
  encrypt_msg = memcmp (buffer, test_encrypt, sizeof (test_encrypt)) ?
1806
    "encryption failure!\n" : "encryption OK!\n";
1807
1808
  /* Decryption test. */
1809
  for (i = 0; i < 125; i++)
1810
    {
1811
      twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, &bulk_ops);
1812
      for (j = 0; j < 1000; j++) {
1813
        twofish_decrypt (&ctx, buffer[0], buffer[0]);
1814
        twofish_decrypt (&ctx, buffer[1], buffer[1]);
1815
      }
1816
      twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), &bulk_ops);
1817
      for (j = 0; j < 1000; j++)
1818
        twofish_decrypt (&ctx, buffer[3], buffer[3]);
1819
      twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), &bulk_ops);
1820
      for (j = 0; j < 1000; j++)
1821
        twofish_decrypt (&ctx, buffer[2], buffer[2]);
1822
    }
1823
1824
  /* Stop the timer, and print results. */
1825
  timer = clock () - timer;
1826
  printf (encrypt_msg);
1827
  printf (memcmp (buffer, test_decrypt, sizeof (test_decrypt)) ?
1828
          "decryption failure!\n" : "decryption OK!\n");
1829
  printf ("elapsed time: %.1f s.\n", (float) timer / CLOCKS_PER_SEC);
1830
1831
  return 0;
1832
}
1833
1834
#endif /* TEST */
1835
1836

1837
1838
gcry_cipher_spec_t _gcry_cipher_spec_twofish =
1839
  {
1840
    GCRY_CIPHER_TWOFISH, {0, 0},
1841
    "TWOFISH", NULL, NULL, 16, 256, sizeof (TWOFISH_context),
1842
    twofish_setkey, twofish_encrypt, twofish_decrypt
1843
  };
1844
1845
gcry_cipher_spec_t _gcry_cipher_spec_twofish128 =
1846
  {
1847
    GCRY_CIPHER_TWOFISH128, {0, 0},
1848
    "TWOFISH128", NULL, NULL, 16, 128, sizeof (TWOFISH_context),
1849
    twofish_setkey, twofish_encrypt, twofish_decrypt
1850
  };