/src/libgcrypt/cipher/twofish.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Twofish for GPG |
2 | | * Copyright (C) 1998, 2002, 2003 Free Software Foundation, Inc. |
3 | | * Written by Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998 |
4 | | * 256-bit key length added March 20, 1999 |
5 | | * Some modifications to reduce the text size by Werner Koch, April, 1998 |
6 | | * |
7 | | * This file is part of Libgcrypt. |
8 | | * |
9 | | * Libgcrypt is free software; you can redistribute it and/or modify |
10 | | * it under the terms of the GNU Lesser General Public License as |
11 | | * published by the Free Software Foundation; either version 2.1 of |
12 | | * the License, or (at your option) any later version. |
13 | | * |
14 | | * Libgcrypt is distributed in the hope that it will be useful, |
15 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | * GNU Lesser General Public License for more details. |
18 | | * |
19 | | * You should have received a copy of the GNU Lesser General Public |
20 | | * License along with this program; if not, write to the Free Software |
21 | | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA |
22 | | ******************************************************************** |
23 | | * |
24 | | * This code is a "clean room" implementation, written from the paper |
25 | | * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, |
26 | | * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available |
27 | | * through http://www.counterpane.com/twofish.html |
28 | | * |
29 | | * For background information on multiplication in finite fields, used for |
30 | | * the matrix operations in the key schedule, see the book _Contemporary |
31 | | * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the |
32 | | * Third Edition. |
33 | | * |
34 | | * Only the 128- and 256-bit key sizes are supported. This code is intended |
35 | | * for GNU C on a 32-bit system, but it should work almost anywhere. Loops |
36 | | * are unrolled, precomputation tables are used, etc., for maximum speed at |
37 | | * some cost in memory consumption. */ |
38 | | |
39 | | #include <config.h> |
40 | | #include <stdio.h> |
41 | | #include <stdlib.h> |
42 | | #include <string.h> /* for memcmp() */ |
43 | | |
44 | | #include "types.h" /* for byte and u32 typedefs */ |
45 | | #include "g10lib.h" |
46 | | #include "cipher.h" |
47 | | #include "bufhelp.h" |
48 | | #include "cipher-internal.h" |
49 | | #include "bulkhelp.h" |
50 | | |
51 | | |
52 | 0 | #define TWOFISH_BLOCKSIZE 16 |
53 | | |
54 | | |
55 | | /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ |
56 | | #undef USE_AMD64_ASM |
57 | | #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
58 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
59 | | # define USE_AMD64_ASM 1 |
60 | | #endif |
61 | | |
62 | | /* USE_ARM_ASM indicates whether to use ARM assembly code. */ |
63 | | #undef USE_ARM_ASM |
64 | | #if defined(__ARMEL__) |
65 | | # if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) |
66 | | # define USE_ARM_ASM 1 |
67 | | # endif |
68 | | #endif |
69 | | # if defined(__AARCH64EL__) |
70 | | # ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS |
71 | | # define USE_ARM_ASM 1 |
72 | | # endif |
73 | | # endif |
74 | | |
75 | | /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */ |
76 | | #undef USE_AVX2 |
77 | | #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ |
78 | | defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) |
79 | | # if defined(ENABLE_AVX2_SUPPORT) |
80 | | # define USE_AVX2 1 |
81 | | # endif |
82 | | #endif |
83 | | |
84 | | |
85 | | /* Prototype for the self-test function. */ |
86 | | static const char *selftest(void); |
87 | | |
88 | | |
89 | | /* Prototypes for the bulk functions. */ |
90 | | static void _gcry_twofish_ctr_enc (void *context, unsigned char *ctr, |
91 | | void *outbuf_arg, const void *inbuf_arg, |
92 | | size_t nblocks); |
93 | | static void _gcry_twofish_cbc_dec (void *context, unsigned char *iv, |
94 | | void *outbuf_arg, const void *inbuf_arg, |
95 | | size_t nblocks); |
96 | | static void _gcry_twofish_cfb_dec (void *context, unsigned char *iv, |
97 | | void *outbuf_arg, const void *inbuf_arg, |
98 | | size_t nblocks); |
99 | | static size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, |
100 | | const void *inbuf_arg, size_t nblocks, |
101 | | int encrypt); |
102 | | static size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, |
103 | | size_t nblocks); |
104 | | static void _gcry_twofish_xts_crypt (void *context, unsigned char *tweak, |
105 | | void *outbuf_arg, const void *inbuf_arg, |
106 | | size_t nblocks, int encrypt); |
107 | | static void _gcry_twofish_ecb_crypt (void *context, void *outbuf_arg, |
108 | | const void *inbuf_arg, size_t nblocks, |
109 | | int encrypt); |
110 | | |
111 | | /* Structure for an expanded Twofish key. s contains the key-dependent |
112 | | * S-boxes composed with the MDS matrix; w contains the eight "whitening" |
113 | | * subkeys, K[0] through K[7]. k holds the remaining, "round" subkeys. Note |
114 | | * that k[i] corresponds to what the Twofish paper calls K[i+8]. */ |
115 | | typedef struct { |
116 | | u32 s[4][256], w[8], k[32]; |
117 | | |
118 | | #ifdef USE_AVX2 |
119 | | int use_avx2; |
120 | | #endif |
121 | | } TWOFISH_context; |
122 | | |
123 | | |
124 | | /* Assembly implementations use SystemV ABI, ABI conversion and additional |
125 | | * stack to store XMM6-XMM15 needed on Win64. */ |
126 | | #undef ASM_FUNC_ABI |
127 | | #if defined(USE_AVX2) |
128 | | # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS |
129 | | # define ASM_FUNC_ABI __attribute__((sysv_abi)) |
130 | | # else |
131 | | # define ASM_FUNC_ABI |
132 | | # endif |
133 | | #endif |
134 | | |
135 | | |
136 | | /* These two tables are the q0 and q1 permutations, exactly as described in |
137 | | * the Twofish paper. */ |
138 | | |
139 | | static const byte q0[256] = { |
140 | | 0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78, |
141 | | 0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C, |
142 | | 0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30, |
143 | | 0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82, |
144 | | 0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE, |
145 | | 0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B, |
146 | | 0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45, |
147 | | 0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7, |
148 | | 0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF, |
149 | | 0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8, |
150 | | 0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED, |
151 | | 0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90, |
152 | | 0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B, |
153 | | 0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B, |
154 | | 0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F, |
155 | | 0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A, |
156 | | 0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17, |
157 | | 0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72, |
158 | | 0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68, |
159 | | 0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4, |
160 | | 0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42, |
161 | | 0x4A, 0x5E, 0xC1, 0xE0 |
162 | | }; |
163 | | |
164 | | static const byte q1[256] = { |
165 | | 0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B, |
166 | | 0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1, |
167 | | 0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B, |
168 | | 0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5, |
169 | | 0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54, |
170 | | 0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96, |
171 | | 0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7, |
172 | | 0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8, |
173 | | 0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF, |
174 | | 0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9, |
175 | | 0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D, |
176 | | 0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E, |
177 | | 0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21, |
178 | | 0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01, |
179 | | 0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E, |
180 | | 0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64, |
181 | | 0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44, |
182 | | 0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E, |
183 | | 0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B, |
184 | | 0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9, |
185 | | 0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56, |
186 | | 0x55, 0x09, 0xBE, 0x91 |
187 | | }; |
188 | | |
189 | | /* These MDS tables are actually tables of MDS composed with q0 and q1, |
190 | | * because it is only ever used that way and we can save some time by |
191 | | * precomputing. Of course the main saving comes from precomputing the |
192 | | * GF(2^8) multiplication involved in the MDS matrix multiply; by looking |
193 | | * things up in these tables we reduce the matrix multiply to four lookups |
194 | | * and three XORs. Semi-formally, the definition of these tables is: |
195 | | * mds[0][i] = MDS (q1[i] 0 0 0)^T mds[1][i] = MDS (0 q0[i] 0 0)^T |
196 | | * mds[2][i] = MDS (0 0 q1[i] 0)^T mds[3][i] = MDS (0 0 0 q0[i])^T |
197 | | * where ^T means "transpose", the matrix multiply is performed in GF(2^8) |
198 | | * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described |
199 | | * by Schneier et al, and I'm casually glossing over the byte/word |
200 | | * conversion issues. */ |
201 | | |
202 | | static const u32 mds[4][256] = { |
203 | | {0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, |
204 | | 0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B, |
205 | | 0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32, |
206 | | 0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1, |
207 | | 0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA, |
208 | | 0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B, |
209 | | 0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1, |
210 | | 0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5, |
211 | | 0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490, |
212 | | 0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154, |
213 | | 0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0, |
214 | | 0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796, |
215 | | 0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228, |
216 | | 0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7, |
217 | | 0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3, |
218 | | 0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8, |
219 | | 0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477, |
220 | | 0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF, |
221 | | 0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C, |
222 | | 0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9, |
223 | | 0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA, |
224 | | 0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D, |
225 | | 0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72, |
226 | | 0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E, |
227 | | 0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76, |
228 | | 0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321, |
229 | | 0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39, |
230 | | 0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01, |
231 | | 0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D, |
232 | | 0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E, |
233 | | 0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5, |
234 | | 0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64, |
235 | | 0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7, |
236 | | 0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544, |
237 | | 0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E, |
238 | | 0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E, |
239 | | 0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A, |
240 | | 0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B, |
241 | | 0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2, |
242 | | 0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9, |
243 | | 0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504, |
244 | | 0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756, |
245 | | 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91}, |
246 | | |
247 | | {0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, |
248 | | 0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A, |
249 | | 0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020, |
250 | | 0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141, |
251 | | 0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444, |
252 | | 0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424, |
253 | | 0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A, |
254 | | 0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757, |
255 | | 0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383, |
256 | | 0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A, |
257 | | 0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9, |
258 | | 0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656, |
259 | | 0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1, |
260 | | 0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898, |
261 | | 0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414, |
262 | | 0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3, |
263 | | 0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1, |
264 | | 0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989, |
265 | | 0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5, |
266 | | 0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282, |
267 | | 0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E, |
268 | | 0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E, |
269 | | 0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202, |
270 | | 0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC, |
271 | | 0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565, |
272 | | 0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A, |
273 | | 0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808, |
274 | | 0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272, |
275 | | 0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A, |
276 | | 0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969, |
277 | | 0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505, |
278 | | 0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5, |
279 | | 0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D, |
280 | | 0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343, |
281 | | 0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF, |
282 | | 0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3, |
283 | | 0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F, |
284 | | 0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646, |
285 | | 0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6, |
286 | | 0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF, |
287 | | 0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A, |
288 | | 0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7, |
289 | | 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8}, |
290 | | |
291 | | {0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, |
292 | | 0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F, |
293 | | 0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A, |
294 | | 0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783, |
295 | | 0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70, |
296 | | 0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3, |
297 | | 0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB, |
298 | | 0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA, |
299 | | 0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4, |
300 | | 0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41, |
301 | | 0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C, |
302 | | 0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07, |
303 | | 0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622, |
304 | | 0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18, |
305 | | 0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035, |
306 | | 0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96, |
307 | | 0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84, |
308 | | 0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E, |
309 | | 0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F, |
310 | | 0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD, |
311 | | 0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558, |
312 | | 0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40, |
313 | | 0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA, |
314 | | 0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85, |
315 | | 0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF, |
316 | | 0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773, |
317 | | 0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D, |
318 | | 0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B, |
319 | | 0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C, |
320 | | 0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19, |
321 | | 0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086, |
322 | | 0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D, |
323 | | 0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74, |
324 | | 0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755, |
325 | | 0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691, |
326 | | 0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D, |
327 | | 0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4, |
328 | | 0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53, |
329 | | 0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E, |
330 | | 0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9, |
331 | | 0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705, |
332 | | 0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7, |
333 | | 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF}, |
334 | | |
335 | | {0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, |
336 | | 0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866, |
337 | | 0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643, |
338 | | 0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77, |
339 | | 0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9, |
340 | | 0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C, |
341 | | 0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3, |
342 | | 0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216, |
343 | | 0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F, |
344 | | 0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25, |
345 | | 0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF, |
346 | | 0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7, |
347 | | 0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4, |
348 | | 0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E, |
349 | | 0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA, |
350 | | 0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C, |
351 | | 0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12, |
352 | | 0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A, |
353 | | 0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D, |
354 | | 0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE, |
355 | | 0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A, |
356 | | 0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C, |
357 | | 0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B, |
358 | | 0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4, |
359 | | 0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B, |
360 | | 0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3, |
361 | | 0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE, |
362 | | 0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB, |
363 | | 0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85, |
364 | | 0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA, |
365 | | 0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E, |
366 | | 0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8, |
367 | | 0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33, |
368 | | 0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC, |
369 | | 0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718, |
370 | | 0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA, |
371 | | 0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8, |
372 | | 0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872, |
373 | | 0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882, |
374 | | 0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D, |
375 | | 0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10, |
376 | | 0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6, |
377 | | 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8} |
378 | | }; |
379 | | |
380 | | /* The exp_to_poly and poly_to_exp tables are used to perform efficient |
381 | | * operations in GF(2^8) represented as GF(2)[x]/w(x) where |
382 | | * w(x)=x^8+x^6+x^3+x^2+1. We care about doing that because it's part of the |
383 | | * definition of the RS matrix in the key schedule. Elements of that field |
384 | | * are polynomials of degree not greater than 7 and all coefficients 0 or 1, |
385 | | * which can be represented naturally by bytes (just substitute x=2). In that |
386 | | * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8) |
387 | | * multiplication is inefficient without hardware support. To multiply |
388 | | * faster, I make use of the fact x is a generator for the nonzero elements, |
389 | | * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for |
390 | | * some n in 0..254. Note that that caret is exponentiation in GF(2^8), |
391 | | * *not* polynomial notation. So if I want to compute pq where p and q are |
392 | | * in GF(2^8), I can just say: |
393 | | * 1. if p=0 or q=0 then pq=0 |
394 | | * 2. otherwise, find m and n such that p=x^m and q=x^n |
395 | | * 3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq |
396 | | * The translations in steps 2 and 3 are looked up in the tables |
397 | | * poly_to_exp (for step 2) and exp_to_poly (for step 3). To see this |
398 | | * in action, look at the CALC_S macro. As additional wrinkles, note that |
399 | | * one of my operands is always a constant, so the poly_to_exp lookup on it |
400 | | * is done in advance; I included the original values in the comments so |
401 | | * readers can have some chance of recognizing that this *is* the RS matrix |
402 | | * from the Twofish paper. I've only included the table entries I actually |
403 | | * need; I never do a lookup on a variable input of zero and the biggest |
404 | | * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll |
405 | | * never sum to more than 491. I'm repeating part of the exp_to_poly table |
406 | | * so that I don't have to do mod-255 reduction in the exponent arithmetic. |
407 | | * Since I know my constant operands are never zero, I only have to worry |
408 | | * about zero values in the variable operand, and I do it with a simple |
409 | | * conditional branch. I know conditionals are expensive, but I couldn't |
410 | | * see a non-horrible way of avoiding them, and I did manage to group the |
411 | | * statements so that each if covers four group multiplications. */ |
412 | | |
413 | | static const u16 poly_to_exp[256] = { |
414 | | 492, |
415 | | 0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19, |
416 | | 0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A, |
417 | | 0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C, |
418 | | 0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B, |
419 | | 0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47, |
420 | | 0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D, |
421 | | 0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8, |
422 | | 0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C, |
423 | | 0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83, |
424 | | 0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48, |
425 | | 0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26, |
426 | | 0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E, |
427 | | 0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3, |
428 | | 0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9, |
429 | | 0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A, |
430 | | 0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D, |
431 | | 0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75, |
432 | | 0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84, |
433 | | 0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64, |
434 | | 0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49, |
435 | | 0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF, |
436 | | 0x85, 0xC8, 0xA1 |
437 | | }; |
438 | | |
439 | | static const byte exp_to_poly[492 + 256] = { |
440 | | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2, |
441 | | 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03, |
442 | | 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6, |
443 | | 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A, |
444 | | 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63, |
445 | | 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C, |
446 | | 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07, |
447 | | 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88, |
448 | | 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12, |
449 | | 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7, |
450 | | 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C, |
451 | | 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8, |
452 | | 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25, |
453 | | 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A, |
454 | | 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE, |
455 | | 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC, |
456 | | 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E, |
457 | | 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92, |
458 | | 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89, |
459 | | 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB, |
460 | | 0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1, |
461 | | 0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, |
462 | | 0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, |
463 | | 0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, |
464 | | 0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, |
465 | | 0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, |
466 | | 0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, |
467 | | 0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, |
468 | | 0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, |
469 | | 0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, |
470 | | 0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, |
471 | | 0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, |
472 | | 0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, |
473 | | 0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, |
474 | | 0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, |
475 | | 0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, |
476 | | 0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, |
477 | | 0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, |
478 | | 0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, |
479 | | 0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, |
480 | | 0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, |
481 | | }; |
482 | | |
483 | | |
484 | | /* The table constants are indices of |
485 | | * S-box entries, preprocessed through q0 and q1. */ |
486 | | static byte calc_sb_tbl[512] = { |
487 | | 0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4, |
488 | | 0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8, |
489 | | 0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B, |
490 | | 0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B, |
491 | | 0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD, |
492 | | 0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1, |
493 | | 0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B, |
494 | | 0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F, |
495 | | 0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B, |
496 | | 0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D, |
497 | | 0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E, |
498 | | 0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5, |
499 | | 0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14, |
500 | | 0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3, |
501 | | 0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54, |
502 | | 0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51, |
503 | | 0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A, |
504 | | 0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96, |
505 | | 0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10, |
506 | | 0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C, |
507 | | 0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7, |
508 | | 0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70, |
509 | | 0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB, |
510 | | 0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8, |
511 | | 0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF, |
512 | | 0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC, |
513 | | 0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF, |
514 | | 0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2, |
515 | | 0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82, |
516 | | 0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9, |
517 | | 0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97, |
518 | | 0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17, |
519 | | 0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D, |
520 | | 0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3, |
521 | | 0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C, |
522 | | 0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E, |
523 | | 0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F, |
524 | | 0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49, |
525 | | 0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21, |
526 | | 0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9, |
527 | | 0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD, |
528 | | 0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01, |
529 | | 0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F, |
530 | | 0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48, |
531 | | 0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E, |
532 | | 0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19, |
533 | | 0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57, |
534 | | 0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64, |
535 | | 0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE, |
536 | | 0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5, |
537 | | 0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44, |
538 | | 0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69, |
539 | | 0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15, |
540 | | 0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E, |
541 | | 0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34, |
542 | | 0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC, |
543 | | 0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B, |
544 | | 0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB, |
545 | | 0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52, |
546 | | 0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9, |
547 | | 0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4, |
548 | | 0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2, |
549 | | 0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56, |
550 | | 0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91 |
551 | | }; |
552 | | |
553 | | /* Macro to perform one column of the RS matrix multiplication. The |
554 | | * parameters a, b, c, and d are the four bytes of output; i is the index |
555 | | * of the key bytes, and w, x, y, and z, are the column of constants from |
556 | | * the RS matrix, preprocessed through the poly_to_exp table. */ |
557 | | |
558 | | #define CALC_S(a, b, c, d, i, w, x, y, z) \ |
559 | 0 | { \ |
560 | 0 | tmp = poly_to_exp[key[i]]; \ |
561 | 0 | (a) ^= exp_to_poly[tmp + (w)]; \ |
562 | 0 | (b) ^= exp_to_poly[tmp + (x)]; \ |
563 | 0 | (c) ^= exp_to_poly[tmp + (y)]; \ |
564 | 0 | (d) ^= exp_to_poly[tmp + (z)]; \ |
565 | 0 | } |
566 | | |
567 | | /* Macros to calculate the key-dependent S-boxes for a 128-bit key using |
568 | | * the S vector from CALC_S. CALC_SB_2 computes a single entry in all |
569 | | * four S-boxes, where i is the index of the entry to compute, and a and b |
570 | | * are the index numbers preprocessed through the q0 and q1 tables |
571 | | * respectively. CALC_SB is simply a convenience to make the code shorter; |
572 | | * it calls CALC_SB_2 four times with consecutive indices from i to i+3, |
573 | | * using the remaining parameters two by two. */ |
574 | | |
575 | | #define CALC_SB_2(i, a, b) \ |
576 | 0 | ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \ |
577 | 0 | ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \ |
578 | 0 | ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \ |
579 | 0 | ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh] |
580 | | |
581 | | #define CALC_SB(i, a, b, c, d, e, f, g, h) \ |
582 | | CALC_SB_2 (i, a, b); CALC_SB_2 ((i)+1, c, d); \ |
583 | | CALC_SB_2 ((i)+2, e, f); CALC_SB_2 ((i)+3, g, h) |
584 | | |
585 | | /* Macros exactly like CALC_SB and CALC_SB_2, but for 256-bit keys. */ |
586 | | |
587 | | #define CALC_SB256_2(i, a, b) \ |
588 | 0 | ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \ |
589 | 0 | ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \ |
590 | 0 | ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \ |
591 | 0 | ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp]; |
592 | | |
593 | | #define CALC_SB256(i, a, b, c, d, e, f, g, h) \ |
594 | | CALC_SB256_2 (i, a, b); CALC_SB256_2 ((i)+1, c, d); \ |
595 | | CALC_SB256_2 ((i)+2, e, f); CALC_SB256_2 ((i)+3, g, h) |
596 | | |
597 | | /* Macros to calculate the whitening and round subkeys. CALC_K_2 computes the |
598 | | * last two stages of the h() function for a given index (either 2i or 2i+1). |
599 | | * a, b, c, and d are the four bytes going into the last two stages. For |
600 | | * 128-bit keys, this is the entire h() function and a and c are the index |
601 | | * preprocessed through q0 and q1 respectively; for longer keys they are the |
602 | | * output of previous stages. j is the index of the first key byte to use. |
603 | | * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2 |
604 | | * twice, doing the Pseudo-Hadamard Transform, and doing the necessary |
605 | | * rotations. Its parameters are: a, the array to write the results into, |
606 | | * j, the index of the first output entry, k and l, the preprocessed indices |
607 | | * for index 2i, and m and n, the preprocessed indices for index 2i+1. |
608 | | * CALC_K256_2 expands CALC_K_2 to handle 256-bit keys, by doing two |
609 | | * additional lookup-and-XOR stages. The parameters a and b are the index |
610 | | * preprocessed through q0 and q1 respectively; j is the index of the first |
611 | | * key byte to use. CALC_K256 is identical to CALC_K but for using the |
612 | | * CALC_K256_2 macro instead of CALC_K_2. */ |
613 | | |
614 | | #define CALC_K_2(a, b, c, d, j) \ |
615 | 0 | mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \ |
616 | 0 | ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \ |
617 | 0 | ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \ |
618 | 0 | ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]] |
619 | | |
620 | | #define CALC_K(a, j, k, l, m, n) \ |
621 | 0 | x = CALC_K_2 (k, l, k, l, 0); \ |
622 | 0 | y = CALC_K_2 (m, n, m, n, 4); \ |
623 | 0 | y = (y << 8) + (y >> 24); \ |
624 | 0 | x += y; y += x; ctx->a[j] = x; \ |
625 | 0 | ctx->a[(j) + 1] = (y << 9) + (y >> 23) |
626 | | |
627 | | #define CALC_K256_2(a, b, j) \ |
628 | 0 | CALC_K_2 (q0[q1[b ^ key[(j) + 24]] ^ key[(j) + 16]], \ |
629 | 0 | q1[q1[a ^ key[(j) + 25]] ^ key[(j) + 17]], \ |
630 | 0 | q0[q0[a ^ key[(j) + 26]] ^ key[(j) + 18]], \ |
631 | 0 | q1[q0[b ^ key[(j) + 27]] ^ key[(j) + 19]], j) |
632 | | |
633 | | #define CALC_K256(a, j, k, l, m, n) \ |
634 | 0 | x = CALC_K256_2 (k, l, 0); \ |
635 | 0 | y = CALC_K256_2 (m, n, 4); \ |
636 | 0 | y = (y << 8) + (y >> 24); \ |
637 | 0 | x += y; y += x; ctx->a[j] = x; \ |
638 | 0 | ctx->a[(j) + 1] = (y << 9) + (y >> 23) |
639 | | |
640 | | |
641 | | |
642 | | /* Perform the key setup. Note that this works only with 128- and 256-bit |
643 | | * keys, despite the API that looks like it might support other sizes. */ |
644 | | |
645 | | static gcry_err_code_t |
646 | | do_twofish_setkey (TWOFISH_context *ctx, const byte *key, const unsigned keylen) |
647 | 0 | { |
648 | 0 | int i, j, k; |
649 | | |
650 | | /* Temporaries for CALC_K. */ |
651 | 0 | u32 x, y; |
652 | | |
653 | | /* The S vector used to key the S-boxes, split up into individual bytes. |
654 | | * 128-bit keys use only sa through sh; 256-bit use all of them. */ |
655 | 0 | byte sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0; |
656 | 0 | byte si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0; |
657 | | |
658 | | /* Temporary for CALC_S. */ |
659 | 0 | unsigned int tmp; |
660 | | |
661 | | /* Flags for self-test. */ |
662 | 0 | static int initialized = 0; |
663 | 0 | static const char *selftest_failed=0; |
664 | | |
665 | | /* Check key length. */ |
666 | 0 | if( ( ( keylen - 16 ) | 16 ) != 16 ) |
667 | 0 | return GPG_ERR_INV_KEYLEN; |
668 | | |
669 | | /* Do self-test if necessary. */ |
670 | 0 | if (!initialized) |
671 | 0 | { |
672 | 0 | initialized = 1; |
673 | 0 | selftest_failed = selftest (); |
674 | 0 | if( selftest_failed ) |
675 | 0 | log_error("%s\n", selftest_failed ); |
676 | 0 | } |
677 | 0 | if( selftest_failed ) |
678 | 0 | return GPG_ERR_SELFTEST_FAILED; |
679 | | |
680 | | /* Compute the first two words of the S vector. The magic numbers are |
681 | | * the entries of the RS matrix, preprocessed through poly_to_exp. The |
682 | | * numbers in the comments are the original (polynomial form) matrix |
683 | | * entries. */ |
684 | 0 | CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ |
685 | 0 | CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ |
686 | 0 | CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ |
687 | 0 | CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ |
688 | 0 | CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ |
689 | 0 | CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ |
690 | 0 | CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ |
691 | 0 | CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ |
692 | 0 | CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ |
693 | 0 | CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ |
694 | 0 | CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ |
695 | 0 | CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ |
696 | 0 | CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ |
697 | 0 | CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ |
698 | 0 | CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ |
699 | 0 | CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ |
700 | |
|
701 | 0 | if (keylen == 32) /* 256-bit key */ |
702 | 0 | { |
703 | | /* Calculate the remaining two words of the S vector */ |
704 | 0 | CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ |
705 | 0 | CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ |
706 | 0 | CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ |
707 | 0 | CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ |
708 | 0 | CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ |
709 | 0 | CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ |
710 | 0 | CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ |
711 | 0 | CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ |
712 | 0 | CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ |
713 | 0 | CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ |
714 | 0 | CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ |
715 | 0 | CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ |
716 | 0 | CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ |
717 | 0 | CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ |
718 | 0 | CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ |
719 | 0 | CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ |
720 | | |
721 | | /* Compute the S-boxes. */ |
722 | 0 | for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 ) |
723 | 0 | { |
724 | 0 | CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); |
725 | 0 | } |
726 | | |
727 | | /* Calculate whitening and round subkeys. */ |
728 | 0 | for (i = 0; i < 8; i += 2) |
729 | 0 | { |
730 | 0 | CALC_K256 ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] ); |
731 | 0 | } |
732 | 0 | for (j = 0; j < 32; j += 2, i += 2) |
733 | 0 | { |
734 | 0 | CALC_K256 ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] ); |
735 | 0 | } |
736 | 0 | } |
737 | 0 | else |
738 | 0 | { |
739 | | /* Compute the S-boxes. */ |
740 | 0 | for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 ) |
741 | 0 | { |
742 | 0 | CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); |
743 | 0 | } |
744 | | |
745 | | /* Calculate whitening and round subkeys. */ |
746 | 0 | for (i = 0; i < 8; i += 2) |
747 | 0 | { |
748 | 0 | CALC_K ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] ); |
749 | 0 | } |
750 | 0 | for (j = 0; j < 32; j += 2, i += 2) |
751 | 0 | { |
752 | 0 | CALC_K ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] ); |
753 | 0 | } |
754 | 0 | } |
755 | |
|
756 | 0 | return 0; |
757 | 0 | } |
758 | | |
759 | | static gcry_err_code_t |
760 | | twofish_setkey (void *context, const byte *key, unsigned int keylen, |
761 | | cipher_bulk_ops_t *bulk_ops) |
762 | 0 | { |
763 | 0 | TWOFISH_context *ctx = context; |
764 | 0 | unsigned int hwfeatures = _gcry_get_hw_features (); |
765 | 0 | int rc; |
766 | |
|
767 | 0 | rc = do_twofish_setkey (ctx, key, keylen); |
768 | |
|
769 | 0 | #ifdef USE_AVX2 |
770 | 0 | ctx->use_avx2 = 0; |
771 | 0 | if ((hwfeatures & HWF_INTEL_AVX2) && (hwfeatures & HWF_INTEL_FAST_VPGATHER)) |
772 | 0 | { |
773 | 0 | ctx->use_avx2 = 1; |
774 | 0 | } |
775 | 0 | #endif |
776 | | |
777 | | /* Setup bulk encryption routines. */ |
778 | 0 | memset (bulk_ops, 0, sizeof(*bulk_ops)); |
779 | 0 | bulk_ops->cbc_dec = _gcry_twofish_cbc_dec; |
780 | 0 | bulk_ops->cfb_dec = _gcry_twofish_cfb_dec; |
781 | 0 | bulk_ops->ctr_enc = _gcry_twofish_ctr_enc; |
782 | 0 | bulk_ops->ocb_crypt = _gcry_twofish_ocb_crypt; |
783 | 0 | bulk_ops->ocb_auth = _gcry_twofish_ocb_auth; |
784 | 0 | bulk_ops->xts_crypt = _gcry_twofish_xts_crypt; |
785 | 0 | bulk_ops->ecb_crypt = _gcry_twofish_ecb_crypt; |
786 | |
|
787 | 0 | (void)hwfeatures; |
788 | |
|
789 | 0 | _gcry_burn_stack (23+6*sizeof(void*)); |
790 | 0 | return rc; |
791 | 0 | } |
792 | | |
793 | | |
794 | | #ifdef USE_AVX2 |
795 | | /* Assembler implementations of Twofish using AVX2. Process 16 block in |
796 | | parallel. |
797 | | */ |
798 | | extern void _gcry_twofish_avx2_blk16 (const TWOFISH_context *c, byte *out, |
799 | | const byte *in, int encrypt) ASM_FUNC_ABI; |
800 | | |
801 | | extern void _gcry_twofish_avx2_ctr_enc(const TWOFISH_context *ctx, |
802 | | unsigned char *out, |
803 | | const unsigned char *in, |
804 | | unsigned char *ctr) ASM_FUNC_ABI; |
805 | | |
806 | | extern void _gcry_twofish_avx2_cbc_dec(const TWOFISH_context *ctx, |
807 | | unsigned char *out, |
808 | | const unsigned char *in, |
809 | | unsigned char *iv) ASM_FUNC_ABI; |
810 | | |
811 | | extern void _gcry_twofish_avx2_cfb_dec(const TWOFISH_context *ctx, |
812 | | unsigned char *out, |
813 | | const unsigned char *in, |
814 | | unsigned char *iv) ASM_FUNC_ABI; |
815 | | |
816 | | extern void _gcry_twofish_avx2_ocb_enc(const TWOFISH_context *ctx, |
817 | | unsigned char *out, |
818 | | const unsigned char *in, |
819 | | unsigned char *offset, |
820 | | unsigned char *checksum, |
821 | | const u64 Ls[16]) ASM_FUNC_ABI; |
822 | | |
823 | | extern void _gcry_twofish_avx2_ocb_dec(const TWOFISH_context *ctx, |
824 | | unsigned char *out, |
825 | | const unsigned char *in, |
826 | | unsigned char *offset, |
827 | | unsigned char *checksum, |
828 | | const u64 Ls[16]) ASM_FUNC_ABI; |
829 | | |
830 | | extern void _gcry_twofish_avx2_ocb_auth(const TWOFISH_context *ctx, |
831 | | const unsigned char *abuf, |
832 | | unsigned char *offset, |
833 | | unsigned char *checksum, |
834 | | const u64 Ls[16]) ASM_FUNC_ABI; |
835 | | #endif |
836 | | |
837 | | |
838 | | #ifdef USE_AMD64_ASM |
839 | | |
840 | | /* Assembly implementations of Twofish. */ |
841 | | extern void _gcry_twofish_amd64_encrypt_block(const TWOFISH_context *c, |
842 | | byte *out, const byte *in); |
843 | | |
844 | | extern void _gcry_twofish_amd64_decrypt_block(const TWOFISH_context *c, |
845 | | byte *out, const byte *in); |
846 | | |
847 | | /* These assembly implementations process three blocks in parallel. */ |
848 | | extern void _gcry_twofish_amd64_blk3(const TWOFISH_context *c, byte *out, |
849 | | const byte *in, int encrypt); |
850 | | |
851 | | extern void _gcry_twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, |
852 | | const byte *in, byte *ctr); |
853 | | |
854 | | extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, |
855 | | const byte *in, byte *iv); |
856 | | |
857 | | extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, |
858 | | const byte *in, byte *iv); |
859 | | |
860 | | extern void _gcry_twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, |
861 | | const byte *in, byte *offset, |
862 | | byte *checksum, const u64 Ls[3]); |
863 | | |
864 | | extern void _gcry_twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, |
865 | | const byte *in, byte *offset, |
866 | | byte *checksum, const u64 Ls[3]); |
867 | | |
868 | | extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx, |
869 | | const byte *abuf, byte *offset, |
870 | | byte *checksum, const u64 Ls[3]); |
871 | | |
872 | | static inline void |
873 | | twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in) |
874 | 0 | { |
875 | 0 | _gcry_twofish_amd64_encrypt_block(c, out, in); |
876 | 0 | } |
877 | | |
878 | | static inline void |
879 | | twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in) |
880 | 0 | { |
881 | 0 | _gcry_twofish_amd64_decrypt_block(c, out, in); |
882 | 0 | } |
883 | | |
884 | | static inline void |
885 | | twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in, |
886 | | byte *ctr) |
887 | 0 | { |
888 | 0 | _gcry_twofish_amd64_ctr_enc(c, out, in, ctr); |
889 | 0 | } |
890 | | |
891 | | static inline void |
892 | | twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in, |
893 | | byte *iv) |
894 | 0 | { |
895 | 0 | _gcry_twofish_amd64_cbc_dec(c, out, in, iv); |
896 | 0 | } |
897 | | |
898 | | static inline void |
899 | | twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, |
900 | | byte *iv) |
901 | 0 | { |
902 | 0 | _gcry_twofish_amd64_cfb_dec(c, out, in, iv); |
903 | 0 | } |
904 | | |
905 | | static inline void |
906 | | twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in, |
907 | | byte *offset, byte *checksum, const u64 Ls[3]) |
908 | 0 | { |
909 | 0 | _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls); |
910 | 0 | } |
911 | | |
912 | | static inline void |
913 | | twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in, |
914 | | byte *offset, byte *checksum, const u64 Ls[3]) |
915 | 0 | { |
916 | 0 | _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls); |
917 | 0 | } |
918 | | |
919 | | static inline void |
920 | | twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf, |
921 | | byte *offset, byte *checksum, const u64 Ls[3]) |
922 | 0 | { |
923 | 0 | _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls); |
924 | 0 | } |
925 | | |
926 | | #elif defined(USE_ARM_ASM) |
927 | | |
928 | | /* Assembly implementations of Twofish. */ |
929 | | extern void _gcry_twofish_arm_encrypt_block(const TWOFISH_context *c, |
930 | | byte *out, const byte *in); |
931 | | |
932 | | extern void _gcry_twofish_arm_decrypt_block(const TWOFISH_context *c, |
933 | | byte *out, const byte *in); |
934 | | |
935 | | #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ |
936 | | |
937 | | /* Macros to compute the g() function in the encryption and decryption |
938 | | * rounds. G1 is the straight g() function; G2 includes the 8-bit |
939 | | * rotation for the high 32-bit word. */ |
940 | | |
941 | | #define G1(a) \ |
942 | | (ctx->s[0][(a) & 0xFF]) ^ (ctx->s[1][((a) >> 8) & 0xFF]) \ |
943 | | ^ (ctx->s[2][((a) >> 16) & 0xFF]) ^ (ctx->s[3][(a) >> 24]) |
944 | | |
945 | | #define G2(b) \ |
946 | | (ctx->s[1][(b) & 0xFF]) ^ (ctx->s[2][((b) >> 8) & 0xFF]) \ |
947 | | ^ (ctx->s[3][((b) >> 16) & 0xFF]) ^ (ctx->s[0][(b) >> 24]) |
948 | | |
949 | | /* Encryption and decryption Feistel rounds. Each one calls the two g() |
950 | | * macros, does the PHT, and performs the XOR and the appropriate bit |
951 | | * rotations. The parameters are the round number (used to select subkeys), |
952 | | * and the four 32-bit chunks of the text. */ |
953 | | |
954 | | #define ENCROUND(n, a, b, c, d) \ |
955 | | x = G1 (a); y = G2 (b); \ |
956 | | x += y; y += x + ctx->k[2 * (n) + 1]; \ |
957 | | (c) ^= x + ctx->k[2 * (n)]; \ |
958 | | (c) = ((c) >> 1) + ((c) << 31); \ |
959 | | (d) = (((d) << 1)+((d) >> 31)) ^ y |
960 | | |
961 | | #define DECROUND(n, a, b, c, d) \ |
962 | | x = G1 (a); y = G2 (b); \ |
963 | | x += y; y += x; \ |
964 | | (d) ^= y + ctx->k[2 * (n) + 1]; \ |
965 | | (d) = ((d) >> 1) + ((d) << 31); \ |
966 | | (c) = (((c) << 1)+((c) >> 31)); \ |
967 | | (c) ^= (x + ctx->k[2 * (n)]) |
968 | | |
969 | | /* Encryption and decryption cycles; each one is simply two Feistel rounds |
970 | | * with the 32-bit chunks re-ordered to simulate the "swap" */ |
971 | | |
972 | | #define ENCCYCLE(n) \ |
973 | | ENCROUND (2 * (n), a, b, c, d); \ |
974 | | ENCROUND (2 * (n) + 1, c, d, a, b) |
975 | | |
976 | | #define DECCYCLE(n) \ |
977 | | DECROUND (2 * (n) + 1, c, d, a, b); \ |
978 | | DECROUND (2 * (n), a, b, c, d) |
979 | | |
980 | | /* Macros to convert the input and output bytes into 32-bit words, |
981 | | * and simultaneously perform the whitening step. INPACK packs word |
982 | | * number n into the variable named by x, using whitening subkey number m. |
983 | | * OUTUNPACK unpacks word number n from the variable named by x, using |
984 | | * whitening subkey number m. */ |
985 | | |
986 | | #define INPACK(n, x, m) \ |
987 | | x = buf_get_le32(in + (n) * 4); \ |
988 | | x ^= ctx->w[m] |
989 | | |
990 | | #define OUTUNPACK(n, x, m) \ |
991 | | x ^= ctx->w[m]; \ |
992 | | buf_put_le32(out + (n) * 4, x) |
993 | | |
994 | | #endif /*!USE_AMD64_ASM*/ |
995 | | |
996 | | |
997 | | /* Encrypt one block. in and out may be the same. */ |
998 | | |
999 | | #ifdef USE_AMD64_ASM |
1000 | | |
1001 | | static unsigned int |
1002 | | twofish_encrypt (void *context, byte *out, const byte *in) |
1003 | 0 | { |
1004 | 0 | TWOFISH_context *ctx = context; |
1005 | 0 | twofish_amd64_encrypt_block(ctx, out, in); |
1006 | 0 | return /*burn_stack*/ (4*sizeof (void*)); |
1007 | 0 | } |
1008 | | |
1009 | | #elif defined(USE_ARM_ASM) |
1010 | | |
1011 | | static unsigned int |
1012 | | twofish_encrypt (void *context, byte *out, const byte *in) |
1013 | | { |
1014 | | TWOFISH_context *ctx = context; |
1015 | | _gcry_twofish_arm_encrypt_block(ctx, out, in); |
1016 | | return /*burn_stack*/ (4*sizeof (void*)); |
1017 | | } |
1018 | | |
1019 | | #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ |
1020 | | |
1021 | | static void |
1022 | | do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in) |
1023 | | { |
1024 | | /* The four 32-bit chunks of the text. */ |
1025 | | u32 a, b, c, d; |
1026 | | |
1027 | | /* Temporaries used by the round function. */ |
1028 | | u32 x, y; |
1029 | | |
1030 | | /* Input whitening and packing. */ |
1031 | | INPACK (0, a, 0); |
1032 | | INPACK (1, b, 1); |
1033 | | INPACK (2, c, 2); |
1034 | | INPACK (3, d, 3); |
1035 | | |
1036 | | /* Encryption Feistel cycles. */ |
1037 | | ENCCYCLE (0); |
1038 | | ENCCYCLE (1); |
1039 | | ENCCYCLE (2); |
1040 | | ENCCYCLE (3); |
1041 | | ENCCYCLE (4); |
1042 | | ENCCYCLE (5); |
1043 | | ENCCYCLE (6); |
1044 | | ENCCYCLE (7); |
1045 | | |
1046 | | /* Output whitening and unpacking. */ |
1047 | | OUTUNPACK (0, c, 4); |
1048 | | OUTUNPACK (1, d, 5); |
1049 | | OUTUNPACK (2, a, 6); |
1050 | | OUTUNPACK (3, b, 7); |
1051 | | } |
1052 | | |
1053 | | static unsigned int |
1054 | | twofish_encrypt (void *context, byte *out, const byte *in) |
1055 | | { |
1056 | | TWOFISH_context *ctx = context; |
1057 | | do_twofish_encrypt (ctx, out, in); |
1058 | | return /*burn_stack*/ (24+3*sizeof (void*)); |
1059 | | } |
1060 | | |
1061 | | #endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ |
1062 | | |
1063 | | |
1064 | | /* Decrypt one block. in and out may be the same. */ |
1065 | | |
1066 | | #ifdef USE_AMD64_ASM |
1067 | | |
1068 | | static unsigned int |
1069 | | twofish_decrypt (void *context, byte *out, const byte *in) |
1070 | 0 | { |
1071 | 0 | TWOFISH_context *ctx = context; |
1072 | 0 | twofish_amd64_decrypt_block(ctx, out, in); |
1073 | 0 | return /*burn_stack*/ (4*sizeof (void*)); |
1074 | 0 | } |
1075 | | |
1076 | | #elif defined(USE_ARM_ASM) |
1077 | | |
1078 | | static unsigned int |
1079 | | twofish_decrypt (void *context, byte *out, const byte *in) |
1080 | | { |
1081 | | TWOFISH_context *ctx = context; |
1082 | | _gcry_twofish_arm_decrypt_block(ctx, out, in); |
1083 | | return /*burn_stack*/ (4*sizeof (void*)); |
1084 | | } |
1085 | | |
1086 | | #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ |
1087 | | |
1088 | | static void |
1089 | | do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in) |
1090 | | { |
1091 | | /* The four 32-bit chunks of the text. */ |
1092 | | u32 a, b, c, d; |
1093 | | |
1094 | | /* Temporaries used by the round function. */ |
1095 | | u32 x, y; |
1096 | | |
1097 | | /* Input whitening and packing. */ |
1098 | | INPACK (0, c, 4); |
1099 | | INPACK (1, d, 5); |
1100 | | INPACK (2, a, 6); |
1101 | | INPACK (3, b, 7); |
1102 | | |
1103 | | /* Encryption Feistel cycles. */ |
1104 | | DECCYCLE (7); |
1105 | | DECCYCLE (6); |
1106 | | DECCYCLE (5); |
1107 | | DECCYCLE (4); |
1108 | | DECCYCLE (3); |
1109 | | DECCYCLE (2); |
1110 | | DECCYCLE (1); |
1111 | | DECCYCLE (0); |
1112 | | |
1113 | | /* Output whitening and unpacking. */ |
1114 | | OUTUNPACK (0, a, 0); |
1115 | | OUTUNPACK (1, b, 1); |
1116 | | OUTUNPACK (2, c, 2); |
1117 | | OUTUNPACK (3, d, 3); |
1118 | | } |
1119 | | |
1120 | | static unsigned int |
1121 | | twofish_decrypt (void *context, byte *out, const byte *in) |
1122 | | { |
1123 | | TWOFISH_context *ctx = context; |
1124 | | |
1125 | | do_twofish_decrypt (ctx, out, in); |
1126 | | return /*burn_stack*/ (24+3*sizeof (void*)); |
1127 | | } |
1128 | | |
1129 | | #endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ |
1130 | | |
1131 | | |
1132 | | |
1133 | | /* Bulk encryption of complete blocks in CTR mode. This function is only |
1134 | | intended for the bulk encryption feature of cipher.c. CTR is expected to be |
1135 | | of size TWOFISH_BLOCKSIZE. */ |
1136 | | static void |
1137 | | _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, |
1138 | | const void *inbuf_arg, size_t nblocks) |
1139 | 0 | { |
1140 | 0 | TWOFISH_context *ctx = context; |
1141 | 0 | unsigned char *outbuf = outbuf_arg; |
1142 | 0 | const unsigned char *inbuf = inbuf_arg; |
1143 | 0 | unsigned char tmpbuf[TWOFISH_BLOCKSIZE]; |
1144 | 0 | unsigned int burn, burn_stack_depth = 0; |
1145 | |
|
1146 | 0 | #ifdef USE_AVX2 |
1147 | 0 | if (ctx->use_avx2) |
1148 | 0 | { |
1149 | 0 | int did_use_avx2 = 0; |
1150 | | |
1151 | | /* Process data in 16 block chunks. */ |
1152 | 0 | while (nblocks >= 16) |
1153 | 0 | { |
1154 | 0 | _gcry_twofish_avx2_ctr_enc(ctx, outbuf, inbuf, ctr); |
1155 | |
|
1156 | 0 | nblocks -= 16; |
1157 | 0 | outbuf += 16 * TWOFISH_BLOCKSIZE; |
1158 | 0 | inbuf += 16 * TWOFISH_BLOCKSIZE; |
1159 | 0 | did_use_avx2 = 1; |
1160 | 0 | } |
1161 | |
|
1162 | 0 | if (did_use_avx2) |
1163 | 0 | { |
1164 | | /* twofish-avx2 assembly code does not use stack */ |
1165 | 0 | if (nblocks == 0) |
1166 | 0 | burn_stack_depth = 0; |
1167 | 0 | } |
1168 | 0 | } |
1169 | 0 | #endif |
1170 | |
|
1171 | 0 | #ifdef USE_AMD64_ASM |
1172 | 0 | { |
1173 | | /* Process data in 3 block chunks. */ |
1174 | 0 | while (nblocks >= 3) |
1175 | 0 | { |
1176 | 0 | twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); |
1177 | |
|
1178 | 0 | nblocks -= 3; |
1179 | 0 | outbuf += 3 * TWOFISH_BLOCKSIZE; |
1180 | 0 | inbuf += 3 * TWOFISH_BLOCKSIZE; |
1181 | |
|
1182 | 0 | burn = 8 * sizeof(void*); |
1183 | 0 | if (burn > burn_stack_depth) |
1184 | 0 | burn_stack_depth = burn; |
1185 | 0 | } |
1186 | | |
1187 | | /* Use generic code to handle smaller chunks... */ |
1188 | | /* TODO: use caching instead? */ |
1189 | 0 | } |
1190 | 0 | #endif |
1191 | |
|
1192 | 0 | for ( ;nblocks; nblocks-- ) |
1193 | 0 | { |
1194 | | /* Encrypt the counter. */ |
1195 | 0 | burn = twofish_encrypt(ctx, tmpbuf, ctr); |
1196 | 0 | if (burn > burn_stack_depth) |
1197 | 0 | burn_stack_depth = burn; |
1198 | | |
1199 | | /* XOR the input with the encrypted counter and store in output. */ |
1200 | 0 | cipher_block_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE); |
1201 | 0 | outbuf += TWOFISH_BLOCKSIZE; |
1202 | 0 | inbuf += TWOFISH_BLOCKSIZE; |
1203 | | /* Increment the counter. */ |
1204 | 0 | cipher_block_add(ctr, 1, TWOFISH_BLOCKSIZE); |
1205 | 0 | } |
1206 | |
|
1207 | 0 | wipememory(tmpbuf, sizeof(tmpbuf)); |
1208 | 0 | _gcry_burn_stack(burn_stack_depth); |
1209 | 0 | } |
1210 | | |
1211 | | |
1212 | | /* Bulk decryption of complete blocks in CBC mode. This function is only |
1213 | | intended for the bulk encryption feature of cipher.c. */ |
1214 | | static void |
1215 | | _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, |
1216 | | const void *inbuf_arg, size_t nblocks) |
1217 | 0 | { |
1218 | 0 | TWOFISH_context *ctx = context; |
1219 | 0 | unsigned char *outbuf = outbuf_arg; |
1220 | 0 | const unsigned char *inbuf = inbuf_arg; |
1221 | 0 | unsigned char savebuf[TWOFISH_BLOCKSIZE]; |
1222 | 0 | unsigned int burn, burn_stack_depth = 0; |
1223 | |
|
1224 | 0 | #ifdef USE_AVX2 |
1225 | 0 | if (ctx->use_avx2) |
1226 | 0 | { |
1227 | 0 | int did_use_avx2 = 0; |
1228 | | |
1229 | | /* Process data in 16 block chunks. */ |
1230 | 0 | while (nblocks >= 16) |
1231 | 0 | { |
1232 | 0 | _gcry_twofish_avx2_cbc_dec(ctx, outbuf, inbuf, iv); |
1233 | |
|
1234 | 0 | nblocks -= 16; |
1235 | 0 | outbuf += 16 * TWOFISH_BLOCKSIZE; |
1236 | 0 | inbuf += 16 * TWOFISH_BLOCKSIZE; |
1237 | 0 | did_use_avx2 = 1; |
1238 | 0 | } |
1239 | |
|
1240 | 0 | if (did_use_avx2) |
1241 | 0 | { |
1242 | | /* twofish-avx2 assembly code does not use stack */ |
1243 | 0 | if (nblocks == 0) |
1244 | 0 | burn_stack_depth = 0; |
1245 | 0 | } |
1246 | 0 | } |
1247 | 0 | #endif |
1248 | |
|
1249 | 0 | #ifdef USE_AMD64_ASM |
1250 | 0 | { |
1251 | | /* Process data in 3 block chunks. */ |
1252 | 0 | while (nblocks >= 3) |
1253 | 0 | { |
1254 | 0 | twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv); |
1255 | |
|
1256 | 0 | nblocks -= 3; |
1257 | 0 | outbuf += 3 * TWOFISH_BLOCKSIZE; |
1258 | 0 | inbuf += 3 * TWOFISH_BLOCKSIZE; |
1259 | |
|
1260 | 0 | burn = 9 * sizeof(void*); |
1261 | 0 | if (burn > burn_stack_depth) |
1262 | 0 | burn_stack_depth = burn; |
1263 | 0 | } |
1264 | | |
1265 | | /* Use generic code to handle smaller chunks... */ |
1266 | 0 | } |
1267 | 0 | #endif |
1268 | |
|
1269 | 0 | for ( ;nblocks; nblocks-- ) |
1270 | 0 | { |
1271 | | /* INBUF is needed later and it may be identical to OUTBUF, so store |
1272 | | the intermediate result to SAVEBUF. */ |
1273 | 0 | burn = twofish_decrypt (ctx, savebuf, inbuf); |
1274 | 0 | if (burn > burn_stack_depth) |
1275 | 0 | burn_stack_depth = burn; |
1276 | |
|
1277 | 0 | cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE); |
1278 | 0 | inbuf += TWOFISH_BLOCKSIZE; |
1279 | 0 | outbuf += TWOFISH_BLOCKSIZE; |
1280 | 0 | } |
1281 | |
|
1282 | 0 | wipememory(savebuf, sizeof(savebuf)); |
1283 | 0 | _gcry_burn_stack(burn_stack_depth); |
1284 | 0 | } |
1285 | | |
1286 | | |
1287 | | /* Bulk decryption of complete blocks in CFB mode. This function is only |
1288 | | intended for the bulk encryption feature of cipher.c. */ |
1289 | | static void |
1290 | | _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, |
1291 | | const void *inbuf_arg, size_t nblocks) |
1292 | 0 | { |
1293 | 0 | TWOFISH_context *ctx = context; |
1294 | 0 | unsigned char *outbuf = outbuf_arg; |
1295 | 0 | const unsigned char *inbuf = inbuf_arg; |
1296 | 0 | unsigned int burn, burn_stack_depth = 0; |
1297 | |
|
1298 | 0 | #ifdef USE_AVX2 |
1299 | 0 | if (ctx->use_avx2) |
1300 | 0 | { |
1301 | 0 | int did_use_avx2 = 0; |
1302 | | |
1303 | | /* Process data in 16 block chunks. */ |
1304 | 0 | while (nblocks >= 16) |
1305 | 0 | { |
1306 | 0 | _gcry_twofish_avx2_cfb_dec(ctx, outbuf, inbuf, iv); |
1307 | |
|
1308 | 0 | nblocks -= 16; |
1309 | 0 | outbuf += 16 * TWOFISH_BLOCKSIZE; |
1310 | 0 | inbuf += 16 * TWOFISH_BLOCKSIZE; |
1311 | 0 | did_use_avx2 = 1; |
1312 | 0 | } |
1313 | |
|
1314 | 0 | if (did_use_avx2) |
1315 | 0 | { |
1316 | | /* twofish-avx2 assembly code does not use stack */ |
1317 | 0 | if (nblocks == 0) |
1318 | 0 | burn_stack_depth = 0; |
1319 | 0 | } |
1320 | 0 | } |
1321 | 0 | #endif |
1322 | |
|
1323 | 0 | #ifdef USE_AMD64_ASM |
1324 | 0 | { |
1325 | | /* Process data in 3 block chunks. */ |
1326 | 0 | while (nblocks >= 3) |
1327 | 0 | { |
1328 | 0 | twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv); |
1329 | |
|
1330 | 0 | nblocks -= 3; |
1331 | 0 | outbuf += 3 * TWOFISH_BLOCKSIZE; |
1332 | 0 | inbuf += 3 * TWOFISH_BLOCKSIZE; |
1333 | |
|
1334 | 0 | burn = 8 * sizeof(void*); |
1335 | 0 | if (burn > burn_stack_depth) |
1336 | 0 | burn_stack_depth = burn; |
1337 | 0 | } |
1338 | | |
1339 | | /* Use generic code to handle smaller chunks... */ |
1340 | 0 | } |
1341 | 0 | #endif |
1342 | |
|
1343 | 0 | for ( ;nblocks; nblocks-- ) |
1344 | 0 | { |
1345 | 0 | burn = twofish_encrypt(ctx, iv, iv); |
1346 | 0 | if (burn > burn_stack_depth) |
1347 | 0 | burn_stack_depth = burn; |
1348 | |
|
1349 | 0 | cipher_block_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE); |
1350 | 0 | outbuf += TWOFISH_BLOCKSIZE; |
1351 | 0 | inbuf += TWOFISH_BLOCKSIZE; |
1352 | 0 | } |
1353 | |
|
1354 | 0 | _gcry_burn_stack(burn_stack_depth); |
1355 | 0 | } |
1356 | | |
1357 | | /* Bulk encryption/decryption of complete blocks in OCB mode. */ |
1358 | | static size_t |
1359 | | _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, |
1360 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1361 | 0 | { |
1362 | 0 | #ifdef USE_AMD64_ASM |
1363 | 0 | TWOFISH_context *ctx = (void *)&c->context.c; |
1364 | 0 | unsigned char *outbuf = outbuf_arg; |
1365 | 0 | const unsigned char *inbuf = inbuf_arg; |
1366 | 0 | unsigned int burn, burn_stack_depth = 0; |
1367 | 0 | u64 blkn = c->u_mode.ocb.data_nblocks; |
1368 | |
|
1369 | 0 | #ifdef USE_AVX2 |
1370 | 0 | if (ctx->use_avx2) |
1371 | 0 | { |
1372 | 0 | int did_use_avx2 = 0; |
1373 | 0 | u64 Ls[16]; |
1374 | 0 | u64 *l; |
1375 | |
|
1376 | 0 | if (nblocks >= 16) |
1377 | 0 | { |
1378 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1379 | | |
1380 | | /* Process data in 16 block chunks. */ |
1381 | 0 | while (nblocks >= 16) |
1382 | 0 | { |
1383 | 0 | blkn += 16; |
1384 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1385 | |
|
1386 | 0 | if (encrypt) |
1387 | 0 | _gcry_twofish_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, |
1388 | 0 | c->u_ctr.ctr, Ls); |
1389 | 0 | else |
1390 | 0 | _gcry_twofish_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, |
1391 | 0 | c->u_ctr.ctr, Ls); |
1392 | |
|
1393 | 0 | nblocks -= 16; |
1394 | 0 | outbuf += 16 * TWOFISH_BLOCKSIZE; |
1395 | 0 | inbuf += 16 * TWOFISH_BLOCKSIZE; |
1396 | 0 | did_use_avx2 = 1; |
1397 | 0 | } |
1398 | 0 | } |
1399 | |
|
1400 | 0 | if (did_use_avx2) |
1401 | 0 | { |
1402 | | /* twofish-avx2 assembly code does not use stack */ |
1403 | 0 | if (nblocks == 0) |
1404 | 0 | burn_stack_depth = 0; |
1405 | 0 | } |
1406 | 0 | } |
1407 | 0 | #endif |
1408 | |
|
1409 | 0 | { |
1410 | | /* Use u64 to store pointers for x32 support (assembly function |
1411 | | * assumes 64-bit pointers). */ |
1412 | 0 | u64 Ls[3]; |
1413 | | |
1414 | | /* Process data in 3 block chunks. */ |
1415 | 0 | while (nblocks >= 3) |
1416 | 0 | { |
1417 | 0 | Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1); |
1418 | 0 | Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2); |
1419 | 0 | Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3); |
1420 | 0 | blkn += 3; |
1421 | |
|
1422 | 0 | if (encrypt) |
1423 | 0 | twofish_amd64_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, |
1424 | 0 | Ls); |
1425 | 0 | else |
1426 | 0 | twofish_amd64_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, |
1427 | 0 | Ls); |
1428 | |
|
1429 | 0 | nblocks -= 3; |
1430 | 0 | outbuf += 3 * TWOFISH_BLOCKSIZE; |
1431 | 0 | inbuf += 3 * TWOFISH_BLOCKSIZE; |
1432 | |
|
1433 | 0 | burn = 8 * sizeof(void*); |
1434 | 0 | if (burn > burn_stack_depth) |
1435 | 0 | burn_stack_depth = burn; |
1436 | 0 | } |
1437 | | |
1438 | | /* Use generic code to handle smaller chunks... */ |
1439 | 0 | } |
1440 | |
|
1441 | 0 | c->u_mode.ocb.data_nblocks = blkn; |
1442 | |
|
1443 | 0 | if (burn_stack_depth) |
1444 | 0 | _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); |
1445 | | #else |
1446 | | (void)c; |
1447 | | (void)outbuf_arg; |
1448 | | (void)inbuf_arg; |
1449 | | (void)encrypt; |
1450 | | #endif |
1451 | |
|
1452 | 0 | return nblocks; |
1453 | 0 | } |
1454 | | |
1455 | | /* Bulk authentication of complete blocks in OCB mode. */ |
1456 | | static size_t |
1457 | | _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, |
1458 | | size_t nblocks) |
1459 | 0 | { |
1460 | 0 | #ifdef USE_AMD64_ASM |
1461 | 0 | TWOFISH_context *ctx = (void *)&c->context.c; |
1462 | 0 | const unsigned char *abuf = abuf_arg; |
1463 | 0 | unsigned int burn, burn_stack_depth = 0; |
1464 | 0 | u64 blkn = c->u_mode.ocb.aad_nblocks; |
1465 | |
|
1466 | 0 | #ifdef USE_AVX2 |
1467 | 0 | if (ctx->use_avx2) |
1468 | 0 | { |
1469 | 0 | int did_use_avx2 = 0; |
1470 | 0 | u64 Ls[16]; |
1471 | 0 | u64 *l; |
1472 | |
|
1473 | 0 | if (nblocks >= 16) |
1474 | 0 | { |
1475 | 0 | l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); |
1476 | | |
1477 | | /* Process data in 16 block chunks. */ |
1478 | 0 | while (nblocks >= 16) |
1479 | 0 | { |
1480 | 0 | blkn += 16; |
1481 | 0 | *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); |
1482 | |
|
1483 | 0 | _gcry_twofish_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, |
1484 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1485 | |
|
1486 | 0 | nblocks -= 16; |
1487 | 0 | abuf += 16 * TWOFISH_BLOCKSIZE; |
1488 | 0 | did_use_avx2 = 1; |
1489 | 0 | } |
1490 | 0 | } |
1491 | |
|
1492 | 0 | if (did_use_avx2) |
1493 | 0 | { |
1494 | | /* twofish-avx2 assembly code does not use stack */ |
1495 | 0 | if (nblocks == 0) |
1496 | 0 | burn_stack_depth = 0; |
1497 | 0 | } |
1498 | | |
1499 | | /* Use generic code to handle smaller chunks... */ |
1500 | 0 | } |
1501 | 0 | #endif |
1502 | |
|
1503 | 0 | { |
1504 | | /* Use u64 to store pointers for x32 support (assembly function |
1505 | | * assumes 64-bit pointers). */ |
1506 | 0 | u64 Ls[3]; |
1507 | | |
1508 | | /* Process data in 3 block chunks. */ |
1509 | 0 | while (nblocks >= 3) |
1510 | 0 | { |
1511 | 0 | Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1); |
1512 | 0 | Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2); |
1513 | 0 | Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3); |
1514 | 0 | blkn += 3; |
1515 | |
|
1516 | 0 | twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, |
1517 | 0 | c->u_mode.ocb.aad_sum, Ls); |
1518 | |
|
1519 | 0 | nblocks -= 3; |
1520 | 0 | abuf += 3 * TWOFISH_BLOCKSIZE; |
1521 | |
|
1522 | 0 | burn = 8 * sizeof(void*); |
1523 | 0 | if (burn > burn_stack_depth) |
1524 | 0 | burn_stack_depth = burn; |
1525 | 0 | } |
1526 | | |
1527 | | /* Use generic code to handle smaller chunks... */ |
1528 | 0 | } |
1529 | |
|
1530 | 0 | c->u_mode.ocb.aad_nblocks = blkn; |
1531 | |
|
1532 | 0 | if (burn_stack_depth) |
1533 | 0 | _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); |
1534 | | #else |
1535 | | (void)c; |
1536 | | (void)abuf_arg; |
1537 | | #endif |
1538 | |
|
1539 | 0 | return nblocks; |
1540 | 0 | } |
1541 | | |
1542 | | |
1543 | | static unsigned int |
1544 | | twofish_crypt_blk1_16(const void *context, byte *out, const byte *in, |
1545 | | unsigned int num_blks, int encrypt) |
1546 | 0 | { |
1547 | 0 | const TWOFISH_context *ctx = context; |
1548 | 0 | unsigned int burn, burn_stack_depth = 0; |
1549 | |
|
1550 | 0 | #ifdef USE_AVX2 |
1551 | 0 | if (num_blks == 16 && ctx->use_avx2) |
1552 | 0 | { |
1553 | 0 | _gcry_twofish_avx2_blk16 (ctx, out, in, encrypt); |
1554 | 0 | return 0; |
1555 | 0 | } |
1556 | 0 | #endif |
1557 | | |
1558 | 0 | #ifdef USE_AMD64_ASM |
1559 | 0 | while (num_blks >= 3) |
1560 | 0 | { |
1561 | 0 | _gcry_twofish_amd64_blk3 (ctx, out, in, encrypt); |
1562 | 0 | burn = 8 * sizeof(void *); |
1563 | 0 | burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth; |
1564 | 0 | out += 3 * TWOFISH_BLOCKSIZE; |
1565 | 0 | in += 3 * TWOFISH_BLOCKSIZE; |
1566 | 0 | num_blks -= 3; |
1567 | 0 | } |
1568 | 0 | #endif |
1569 | |
|
1570 | 0 | while (num_blks >= 1) |
1571 | 0 | { |
1572 | 0 | if (encrypt) |
1573 | 0 | burn = twofish_encrypt((void *)ctx, out, in); |
1574 | 0 | else |
1575 | 0 | burn = twofish_decrypt((void *)ctx, out, in); |
1576 | |
|
1577 | 0 | burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth; |
1578 | 0 | out += TWOFISH_BLOCKSIZE; |
1579 | 0 | in += TWOFISH_BLOCKSIZE; |
1580 | 0 | num_blks--; |
1581 | 0 | } |
1582 | |
|
1583 | 0 | return burn_stack_depth; |
1584 | 0 | } |
1585 | | |
1586 | | static unsigned int |
1587 | | twofish_encrypt_blk1_16(const void *ctx, byte *out, const byte *in, |
1588 | | unsigned int num_blks) |
1589 | 0 | { |
1590 | 0 | return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 1); |
1591 | 0 | } |
1592 | | |
1593 | | static unsigned int |
1594 | | twofish_decrypt_blk1_16(const void *ctx, byte *out, const byte *in, |
1595 | | unsigned int num_blks) |
1596 | 0 | { |
1597 | 0 | return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 0); |
1598 | 0 | } |
1599 | | |
1600 | | |
1601 | | /* Bulk encryption/decryption of complete blocks in XTS mode. */ |
1602 | | static void |
1603 | | _gcry_twofish_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, |
1604 | | const void *inbuf_arg, size_t nblocks, int encrypt) |
1605 | 0 | { |
1606 | 0 | TWOFISH_context *ctx = context; |
1607 | 0 | unsigned char *outbuf = outbuf_arg; |
1608 | 0 | const unsigned char *inbuf = inbuf_arg; |
1609 | 0 | int burn_stack_depth = 0; |
1610 | | |
1611 | | /* Process remaining blocks. */ |
1612 | 0 | if (nblocks) |
1613 | 0 | { |
1614 | 0 | unsigned char tmpbuf[16 * 16]; |
1615 | 0 | unsigned int tmp_used = 16; |
1616 | 0 | size_t tmpbufsize = 15 * 16; |
1617 | 0 | size_t nburn; |
1618 | |
|
1619 | 0 | #ifdef USE_AVX2 |
1620 | 0 | if (ctx->use_avx2) |
1621 | 0 | tmpbufsize = 16 * 16; |
1622 | 0 | #endif |
1623 | |
|
1624 | 0 | nburn = bulk_xts_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16 |
1625 | 0 | : twofish_decrypt_blk1_16, |
1626 | 0 | outbuf, inbuf, nblocks, |
1627 | 0 | tweak, tmpbuf, tmpbufsize / 16, |
1628 | 0 | &tmp_used); |
1629 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1630 | |
|
1631 | 0 | wipememory(tmpbuf, tmp_used); |
1632 | 0 | } |
1633 | |
|
1634 | 0 | if (burn_stack_depth) |
1635 | 0 | _gcry_burn_stack(burn_stack_depth); |
1636 | 0 | } |
1637 | | |
1638 | | |
1639 | | /* Bulk encryption/decryption in ECB mode. */ |
1640 | | static void |
1641 | | _gcry_twofish_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, |
1642 | | size_t nblocks, int encrypt) |
1643 | 0 | { |
1644 | 0 | TWOFISH_context *ctx = context; |
1645 | 0 | unsigned char *outbuf = outbuf_arg; |
1646 | 0 | const unsigned char *inbuf = inbuf_arg; |
1647 | 0 | int burn_stack_depth = 0; |
1648 | | |
1649 | | /* Process remaining blocks. */ |
1650 | 0 | if (nblocks) |
1651 | 0 | { |
1652 | 0 | size_t fn_maxblocks = 15; |
1653 | 0 | size_t nburn; |
1654 | |
|
1655 | 0 | #ifdef USE_AVX2 |
1656 | 0 | if (ctx->use_avx2) |
1657 | 0 | fn_maxblocks = 16; |
1658 | 0 | #endif |
1659 | |
|
1660 | 0 | nburn = bulk_ecb_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16 |
1661 | 0 | : twofish_decrypt_blk1_16, |
1662 | 0 | outbuf, inbuf, nblocks, fn_maxblocks); |
1663 | 0 | burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; |
1664 | 0 | } |
1665 | |
|
1666 | 0 | if (burn_stack_depth) |
1667 | 0 | _gcry_burn_stack(burn_stack_depth); |
1668 | 0 | } |
1669 | | |
1670 | | |
1671 | | |
1672 | | /* Test a single encryption and decryption with each key size. */ |
1673 | | |
1674 | | static const char* |
1675 | | selftest (void) |
1676 | 0 | { |
1677 | 0 | TWOFISH_context ctx; /* Expanded key. */ |
1678 | 0 | byte scratch[16]; /* Encryption/decryption result buffer. */ |
1679 | 0 | cipher_bulk_ops_t bulk_ops; |
1680 | | |
1681 | | /* Test vectors for single encryption/decryption. Note that I am using |
1682 | | * the vectors from the Twofish paper's "known answer test", I=3 for |
1683 | | * 128-bit and I=4 for 256-bit, instead of the all-0 vectors from the |
1684 | | * "intermediate value test", because an all-0 key would trigger all the |
1685 | | * special cases in the RS matrix multiply, leaving the math untested. */ |
1686 | 0 | static byte plaintext[16] = { |
1687 | 0 | 0xD4, 0x91, 0xDB, 0x16, 0xE7, 0xB1, 0xC3, 0x9E, |
1688 | 0 | 0x86, 0xCB, 0x08, 0x6B, 0x78, 0x9F, 0x54, 0x19 |
1689 | 0 | }; |
1690 | 0 | static byte key[16] = { |
1691 | 0 | 0x9F, 0x58, 0x9F, 0x5C, 0xF6, 0x12, 0x2C, 0x32, |
1692 | 0 | 0xB6, 0xBF, 0xEC, 0x2F, 0x2A, 0xE8, 0xC3, 0x5A |
1693 | 0 | }; |
1694 | 0 | static const byte ciphertext[16] = { |
1695 | 0 | 0x01, 0x9F, 0x98, 0x09, 0xDE, 0x17, 0x11, 0x85, |
1696 | 0 | 0x8F, 0xAA, 0xC3, 0xA3, 0xBA, 0x20, 0xFB, 0xC3 |
1697 | 0 | }; |
1698 | 0 | static byte plaintext_256[16] = { |
1699 | 0 | 0x90, 0xAF, 0xE9, 0x1B, 0xB2, 0x88, 0x54, 0x4F, |
1700 | 0 | 0x2C, 0x32, 0xDC, 0x23, 0x9B, 0x26, 0x35, 0xE6 |
1701 | 0 | }; |
1702 | 0 | static byte key_256[32] = { |
1703 | 0 | 0xD4, 0x3B, 0xB7, 0x55, 0x6E, 0xA3, 0x2E, 0x46, |
1704 | 0 | 0xF2, 0xA2, 0x82, 0xB7, 0xD4, 0x5B, 0x4E, 0x0D, |
1705 | 0 | 0x57, 0xFF, 0x73, 0x9D, 0x4D, 0xC9, 0x2C, 0x1B, |
1706 | 0 | 0xD7, 0xFC, 0x01, 0x70, 0x0C, 0xC8, 0x21, 0x6F |
1707 | 0 | }; |
1708 | 0 | static const byte ciphertext_256[16] = { |
1709 | 0 | 0x6C, 0xB4, 0x56, 0x1C, 0x40, 0xBF, 0x0A, 0x97, |
1710 | 0 | 0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA |
1711 | 0 | }; |
1712 | |
|
1713 | 0 | twofish_setkey (&ctx, key, sizeof(key), &bulk_ops); |
1714 | 0 | twofish_encrypt (&ctx, scratch, plaintext); |
1715 | 0 | if (memcmp (scratch, ciphertext, sizeof (ciphertext))) |
1716 | 0 | return "Twofish-128 test encryption failed."; |
1717 | 0 | twofish_decrypt (&ctx, scratch, scratch); |
1718 | 0 | if (memcmp (scratch, plaintext, sizeof (plaintext))) |
1719 | 0 | return "Twofish-128 test decryption failed."; |
1720 | | |
1721 | 0 | twofish_setkey (&ctx, key_256, sizeof(key_256), &bulk_ops); |
1722 | 0 | twofish_encrypt (&ctx, scratch, plaintext_256); |
1723 | 0 | if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) |
1724 | 0 | return "Twofish-256 test encryption failed."; |
1725 | 0 | twofish_decrypt (&ctx, scratch, scratch); |
1726 | 0 | if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) |
1727 | 0 | return "Twofish-256 test decryption failed."; |
1728 | | |
1729 | 0 | return NULL; |
1730 | 0 | } |
1731 | | |
1732 | | /* More complete test program. This does 1000 encryptions and decryptions |
1733 | | * with each of 250 128-bit keys and 2000 encryptions and decryptions with |
1734 | | * each of 125 256-bit keys, using a feedback scheme similar to a Feistel |
1735 | | * cipher, so as to be sure of testing all the table entries pretty |
1736 | | * thoroughly. We keep changing the keys so as to get a more meaningful |
1737 | | * performance number, since the key setup is non-trivial for Twofish. */ |
1738 | | |
1739 | | #ifdef TEST |
1740 | | |
1741 | | #include <stdio.h> |
1742 | | #include <string.h> |
1743 | | #include <time.h> |
1744 | | |
1745 | | int |
1746 | | main() |
1747 | | { |
1748 | | TWOFISH_context ctx; /* Expanded key. */ |
1749 | | int i, j; /* Loop counters. */ |
1750 | | cipher_bulk_ops_t bulk_ops; |
1751 | | |
1752 | | const char *encrypt_msg; /* Message to print regarding encryption test; |
1753 | | * the printf is done outside the loop to avoid |
1754 | | * stuffing up the timing. */ |
1755 | | clock_t timer; /* For computing elapsed time. */ |
1756 | | |
1757 | | /* Test buffer. */ |
1758 | | byte buffer[4][16] = { |
1759 | | {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, |
1760 | | 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}, |
1761 | | {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78, |
1762 | | 0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0}, |
1763 | | {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, |
1764 | | 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10}, |
1765 | | {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10, |
1766 | | 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98} |
1767 | | }; |
1768 | | |
1769 | | /* Expected outputs for the million-operation test */ |
1770 | | static const byte test_encrypt[4][16] = { |
1771 | | {0xC8, 0x23, 0xB8, 0xB7, 0x6B, 0xFE, 0x91, 0x13, |
1772 | | 0x2F, 0xA7, 0x5E, 0xE6, 0x94, 0x77, 0x6F, 0x6B}, |
1773 | | {0x90, 0x36, 0xD8, 0x29, 0xD5, 0x96, 0xC2, 0x8E, |
1774 | | 0xE4, 0xFF, 0x76, 0xBC, 0xE5, 0x77, 0x88, 0x27}, |
1775 | | {0xB8, 0x78, 0x69, 0xAF, 0x42, 0x8B, 0x48, 0x64, |
1776 | | 0xF7, 0xE9, 0xF3, 0x9C, 0x42, 0x18, 0x7B, 0x73}, |
1777 | | {0x7A, 0x88, 0xFB, 0xEB, 0x90, 0xA4, 0xB4, 0xA8, |
1778 | | 0x43, 0xA3, 0x1D, 0xF1, 0x26, 0xC4, 0x53, 0x57} |
1779 | | }; |
1780 | | static const byte test_decrypt[4][16] = { |
1781 | | {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, |
1782 | | 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}, |
1783 | | {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78, |
1784 | | 0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0}, |
1785 | | {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, |
1786 | | 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10}, |
1787 | | {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10, |
1788 | | 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98} |
1789 | | }; |
1790 | | |
1791 | | /* Start the timer ticking. */ |
1792 | | timer = clock (); |
1793 | | |
1794 | | /* Encryption test. */ |
1795 | | for (i = 0; i < 125; i++) |
1796 | | { |
1797 | | twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), &bulk_ops); |
1798 | | for (j = 0; j < 1000; j++) |
1799 | | twofish_encrypt (&ctx, buffer[2], buffer[2]); |
1800 | | twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), &bulk_ops); |
1801 | | for (j = 0; j < 1000; j++) |
1802 | | twofish_encrypt (&ctx, buffer[3], buffer[3]); |
1803 | | twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, &bulk_ops); |
1804 | | for (j = 0; j < 1000; j++) { |
1805 | | twofish_encrypt (&ctx, buffer[0], buffer[0]); |
1806 | | twofish_encrypt (&ctx, buffer[1], buffer[1]); |
1807 | | } |
1808 | | } |
1809 | | encrypt_msg = memcmp (buffer, test_encrypt, sizeof (test_encrypt)) ? |
1810 | | "encryption failure!\n" : "encryption OK!\n"; |
1811 | | |
1812 | | /* Decryption test. */ |
1813 | | for (i = 0; i < 125; i++) |
1814 | | { |
1815 | | twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, &bulk_ops); |
1816 | | for (j = 0; j < 1000; j++) { |
1817 | | twofish_decrypt (&ctx, buffer[0], buffer[0]); |
1818 | | twofish_decrypt (&ctx, buffer[1], buffer[1]); |
1819 | | } |
1820 | | twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), &bulk_ops); |
1821 | | for (j = 0; j < 1000; j++) |
1822 | | twofish_decrypt (&ctx, buffer[3], buffer[3]); |
1823 | | twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), &bulk_ops); |
1824 | | for (j = 0; j < 1000; j++) |
1825 | | twofish_decrypt (&ctx, buffer[2], buffer[2]); |
1826 | | } |
1827 | | |
1828 | | /* Stop the timer, and print results. */ |
1829 | | timer = clock () - timer; |
1830 | | printf (encrypt_msg); |
1831 | | printf (memcmp (buffer, test_decrypt, sizeof (test_decrypt)) ? |
1832 | | "decryption failure!\n" : "decryption OK!\n"); |
1833 | | printf ("elapsed time: %.1f s.\n", (float) timer / CLOCKS_PER_SEC); |
1834 | | |
1835 | | return 0; |
1836 | | } |
1837 | | |
1838 | | #endif /* TEST */ |
1839 | | |
1840 | | |
1841 | | |
1842 | | gcry_cipher_spec_t _gcry_cipher_spec_twofish = |
1843 | | { |
1844 | | GCRY_CIPHER_TWOFISH, {0, 0}, |
1845 | | "TWOFISH", NULL, NULL, 16, 256, sizeof (TWOFISH_context), |
1846 | | twofish_setkey, twofish_encrypt, twofish_decrypt |
1847 | | }; |
1848 | | |
1849 | | gcry_cipher_spec_t _gcry_cipher_spec_twofish128 = |
1850 | | { |
1851 | | GCRY_CIPHER_TWOFISH128, {0, 0}, |
1852 | | "TWOFISH128", NULL, NULL, 16, 128, sizeof (TWOFISH_context), |
1853 | | twofish_setkey, twofish_encrypt, twofish_decrypt |
1854 | | }; |