/src/botan/src/lib/block/twofish/twofish.cpp
Line | Count | Source |
1 | | /* |
2 | | * Twofish |
3 | | * (C) 1999-2007,2017 Jack Lloyd |
4 | | * |
5 | | * The key schedule implemenation is based on a public domain |
6 | | * implementation by Matthew Skala |
7 | | * |
8 | | * Botan is released under the Simplified BSD License (see license.txt) |
9 | | */ |
10 | | |
11 | | #include <botan/internal/twofish.h> |
12 | | |
13 | | #include <botan/internal/loadstor.h> |
14 | | #include <botan/internal/rotate.h> |
15 | | |
16 | | namespace Botan { |
17 | | |
18 | | namespace { |
19 | | |
20 | | inline void TF_E( |
21 | 96.5k | uint32_t A, uint32_t B, uint32_t& C, uint32_t& D, uint32_t RK1, uint32_t RK2, const secure_vector<uint32_t>& SB) { |
22 | 96.5k | uint32_t X = SB[get_byte<3>(A)] ^ SB[256 + get_byte<2>(A)] ^ SB[512 + get_byte<1>(A)] ^ SB[768 + get_byte<0>(A)]; |
23 | 96.5k | uint32_t Y = SB[get_byte<0>(B)] ^ SB[256 + get_byte<3>(B)] ^ SB[512 + get_byte<2>(B)] ^ SB[768 + get_byte<1>(B)]; |
24 | | |
25 | 96.5k | X += Y; |
26 | 96.5k | Y += X; |
27 | | |
28 | 96.5k | X += RK1; |
29 | 96.5k | Y += RK2; |
30 | | |
31 | 96.5k | C = rotr<1>(C ^ X); |
32 | 96.5k | D = rotl<1>(D) ^ Y; |
33 | 96.5k | } |
34 | | |
35 | | inline void TF_D( |
36 | 69.1k | uint32_t A, uint32_t B, uint32_t& C, uint32_t& D, uint32_t RK1, uint32_t RK2, const secure_vector<uint32_t>& SB) { |
37 | 69.1k | uint32_t X = SB[get_byte<3>(A)] ^ SB[256 + get_byte<2>(A)] ^ SB[512 + get_byte<1>(A)] ^ SB[768 + get_byte<0>(A)]; |
38 | 69.1k | uint32_t Y = SB[get_byte<0>(B)] ^ SB[256 + get_byte<3>(B)] ^ SB[512 + get_byte<2>(B)] ^ SB[768 + get_byte<1>(B)]; |
39 | | |
40 | 69.1k | X += Y; |
41 | 69.1k | Y += X; |
42 | | |
43 | 69.1k | X += RK1; |
44 | 69.1k | Y += RK2; |
45 | | |
46 | 69.1k | C = rotl<1>(C) ^ X; |
47 | 69.1k | D = rotr<1>(D ^ Y); |
48 | 69.1k | } |
49 | | |
50 | | } // namespace |
51 | | |
52 | | /* |
53 | | * Twofish Encryption |
54 | | */ |
55 | 3.14k | void Twofish::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
56 | 3.14k | assert_key_material_set(); |
57 | | |
58 | 5.10k | while(blocks >= 2) { |
59 | 1.95k | uint32_t A0, B0, C0, D0; |
60 | 1.95k | uint32_t A1, B1, C1, D1; |
61 | 1.95k | load_le(in, A0, B0, C0, D0, A1, B1, C1, D1); |
62 | | |
63 | 1.95k | A0 ^= m_RK[0]; |
64 | 1.95k | A1 ^= m_RK[0]; |
65 | 1.95k | B0 ^= m_RK[1]; |
66 | 1.95k | B1 ^= m_RK[1]; |
67 | 1.95k | C0 ^= m_RK[2]; |
68 | 1.95k | C1 ^= m_RK[2]; |
69 | 1.95k | D0 ^= m_RK[3]; |
70 | 1.95k | D1 ^= m_RK[3]; |
71 | | |
72 | 17.6k | for(size_t k = 8; k != 40; k += 4) { |
73 | 15.6k | TF_E(A0, B0, C0, D0, m_RK[k + 0], m_RK[k + 1], m_SB); |
74 | 15.6k | TF_E(A1, B1, C1, D1, m_RK[k + 0], m_RK[k + 1], m_SB); |
75 | | |
76 | 15.6k | TF_E(C0, D0, A0, B0, m_RK[k + 2], m_RK[k + 3], m_SB); |
77 | 15.6k | TF_E(C1, D1, A1, B1, m_RK[k + 2], m_RK[k + 3], m_SB); |
78 | 15.6k | } |
79 | | |
80 | 1.95k | C0 ^= m_RK[4]; |
81 | 1.95k | C1 ^= m_RK[4]; |
82 | 1.95k | D0 ^= m_RK[5]; |
83 | 1.95k | D1 ^= m_RK[5]; |
84 | 1.95k | A0 ^= m_RK[6]; |
85 | 1.95k | A1 ^= m_RK[6]; |
86 | 1.95k | B0 ^= m_RK[7]; |
87 | 1.95k | B1 ^= m_RK[7]; |
88 | | |
89 | 1.95k | store_le(out, C0, D0, A0, B0, C1, D1, A1, B1); |
90 | | |
91 | 1.95k | blocks -= 2; |
92 | 1.95k | out += 2 * BLOCK_SIZE; |
93 | 1.95k | in += 2 * BLOCK_SIZE; |
94 | 1.95k | } |
95 | | |
96 | 3.14k | if(blocks) { |
97 | 2.11k | uint32_t A, B, C, D; |
98 | 2.11k | load_le(in, A, B, C, D); |
99 | | |
100 | 2.11k | A ^= m_RK[0]; |
101 | 2.11k | B ^= m_RK[1]; |
102 | 2.11k | C ^= m_RK[2]; |
103 | 2.11k | D ^= m_RK[3]; |
104 | | |
105 | 19.0k | for(size_t k = 8; k != 40; k += 4) { |
106 | 16.9k | TF_E(A, B, C, D, m_RK[k], m_RK[k + 1], m_SB); |
107 | 16.9k | TF_E(C, D, A, B, m_RK[k + 2], m_RK[k + 3], m_SB); |
108 | 16.9k | } |
109 | | |
110 | 2.11k | C ^= m_RK[4]; |
111 | 2.11k | D ^= m_RK[5]; |
112 | 2.11k | A ^= m_RK[6]; |
113 | 2.11k | B ^= m_RK[7]; |
114 | | |
115 | 2.11k | store_le(out, C, D, A, B); |
116 | 2.11k | } |
117 | 3.14k | } |
118 | | |
119 | | /* |
120 | | * Twofish Decryption |
121 | | */ |
122 | 1.39k | void Twofish::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
123 | 1.39k | assert_key_material_set(); |
124 | | |
125 | 3.37k | while(blocks >= 2) { |
126 | 1.97k | uint32_t A0, B0, C0, D0; |
127 | 1.97k | uint32_t A1, B1, C1, D1; |
128 | 1.97k | load_le(in, A0, B0, C0, D0, A1, B1, C1, D1); |
129 | | |
130 | 1.97k | A0 ^= m_RK[4]; |
131 | 1.97k | A1 ^= m_RK[4]; |
132 | 1.97k | B0 ^= m_RK[5]; |
133 | 1.97k | B1 ^= m_RK[5]; |
134 | 1.97k | C0 ^= m_RK[6]; |
135 | 1.97k | C1 ^= m_RK[6]; |
136 | 1.97k | D0 ^= m_RK[7]; |
137 | 1.97k | D1 ^= m_RK[7]; |
138 | | |
139 | 17.8k | for(size_t k = 40; k != 8; k -= 4) { |
140 | 15.8k | TF_D(A0, B0, C0, D0, m_RK[k - 2], m_RK[k - 1], m_SB); |
141 | 15.8k | TF_D(A1, B1, C1, D1, m_RK[k - 2], m_RK[k - 1], m_SB); |
142 | | |
143 | 15.8k | TF_D(C0, D0, A0, B0, m_RK[k - 4], m_RK[k - 3], m_SB); |
144 | 15.8k | TF_D(C1, D1, A1, B1, m_RK[k - 4], m_RK[k - 3], m_SB); |
145 | 15.8k | } |
146 | | |
147 | 1.97k | C0 ^= m_RK[0]; |
148 | 1.97k | C1 ^= m_RK[0]; |
149 | 1.97k | D0 ^= m_RK[1]; |
150 | 1.97k | D1 ^= m_RK[1]; |
151 | 1.97k | A0 ^= m_RK[2]; |
152 | 1.97k | A1 ^= m_RK[2]; |
153 | 1.97k | B0 ^= m_RK[3]; |
154 | 1.97k | B1 ^= m_RK[3]; |
155 | | |
156 | 1.97k | store_le(out, C0, D0, A0, B0, C1, D1, A1, B1); |
157 | | |
158 | 1.97k | blocks -= 2; |
159 | 1.97k | out += 2 * BLOCK_SIZE; |
160 | 1.97k | in += 2 * BLOCK_SIZE; |
161 | 1.97k | } |
162 | | |
163 | 1.39k | if(blocks) { |
164 | 361 | uint32_t A, B, C, D; |
165 | 361 | load_le(in, A, B, C, D); |
166 | | |
167 | 361 | A ^= m_RK[4]; |
168 | 361 | B ^= m_RK[5]; |
169 | 361 | C ^= m_RK[6]; |
170 | 361 | D ^= m_RK[7]; |
171 | | |
172 | 3.24k | for(size_t k = 40; k != 8; k -= 4) { |
173 | 2.88k | TF_D(A, B, C, D, m_RK[k - 2], m_RK[k - 1], m_SB); |
174 | 2.88k | TF_D(C, D, A, B, m_RK[k - 4], m_RK[k - 3], m_SB); |
175 | 2.88k | } |
176 | | |
177 | 361 | C ^= m_RK[0]; |
178 | 361 | D ^= m_RK[1]; |
179 | 361 | A ^= m_RK[2]; |
180 | 361 | B ^= m_RK[3]; |
181 | | |
182 | 361 | store_le(out, C, D, A, B); |
183 | 361 | } |
184 | 1.39k | } |
185 | | |
186 | 4.76k | bool Twofish::has_keying_material() const { |
187 | 4.76k | return !m_SB.empty(); |
188 | 4.76k | } |
189 | | |
190 | | /* |
191 | | * Twofish Key Schedule |
192 | | */ |
193 | 932 | void Twofish::key_schedule(std::span<const uint8_t> key) { |
194 | 932 | m_SB.resize(1024); |
195 | 932 | m_RK.resize(40); |
196 | | |
197 | 932 | secure_vector<uint8_t> S(16); |
198 | | |
199 | 26.3k | for(size_t i = 0; i != key.size(); ++i) { |
200 | | /* |
201 | | * Do one column of the RS matrix multiplcation |
202 | | */ |
203 | 25.4k | if(key[i]) { |
204 | 20.4k | uint8_t X = POLY_TO_EXP[key[i] - 1]; |
205 | | |
206 | 20.4k | uint8_t RS1 = RS[(4 * i) % 32]; |
207 | 20.4k | uint8_t RS2 = RS[(4 * i + 1) % 32]; |
208 | 20.4k | uint8_t RS3 = RS[(4 * i + 2) % 32]; |
209 | 20.4k | uint8_t RS4 = RS[(4 * i + 3) % 32]; |
210 | | |
211 | 20.4k | S[4 * (i / 8)] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS1 - 1]) % 255]; |
212 | 20.4k | S[4 * (i / 8) + 1] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS2 - 1]) % 255]; |
213 | 20.4k | S[4 * (i / 8) + 2] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS3 - 1]) % 255]; |
214 | 20.4k | S[4 * (i / 8) + 3] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS4 - 1]) % 255]; |
215 | 20.4k | } |
216 | 25.4k | } |
217 | | |
218 | 932 | if(key.size() == 16) { |
219 | 67.5k | for(size_t i = 0; i != 256; ++i) { |
220 | 67.3k | m_SB[i] = MDS0[Q0[Q0[i] ^ S[0]] ^ S[4]]; |
221 | 67.3k | m_SB[256 + i] = MDS1[Q0[Q1[i] ^ S[1]] ^ S[5]]; |
222 | 67.3k | m_SB[512 + i] = MDS2[Q1[Q0[i] ^ S[2]] ^ S[6]]; |
223 | 67.3k | m_SB[768 + i] = MDS3[Q1[Q1[i] ^ S[3]] ^ S[7]]; |
224 | 67.3k | } |
225 | | |
226 | 5.52k | for(size_t i = 0; i < 40; i += 2) { |
227 | 5.26k | uint32_t X = MDS0[Q0[Q0[i] ^ key[8]] ^ key[0]] ^ MDS1[Q0[Q1[i] ^ key[9]] ^ key[1]] ^ |
228 | 5.26k | MDS2[Q1[Q0[i] ^ key[10]] ^ key[2]] ^ MDS3[Q1[Q1[i] ^ key[11]] ^ key[3]]; |
229 | 5.26k | uint32_t Y = MDS0[Q0[Q0[i + 1] ^ key[12]] ^ key[4]] ^ MDS1[Q0[Q1[i + 1] ^ key[13]] ^ key[5]] ^ |
230 | 5.26k | MDS2[Q1[Q0[i + 1] ^ key[14]] ^ key[6]] ^ MDS3[Q1[Q1[i + 1] ^ key[15]] ^ key[7]]; |
231 | 5.26k | Y = rotl<8>(Y); |
232 | 5.26k | X += Y; |
233 | 5.26k | Y += X; |
234 | | |
235 | 5.26k | m_RK[i] = X; |
236 | 5.26k | m_RK[i + 1] = rotl<9>(Y); |
237 | 5.26k | } |
238 | 669 | } else if(key.size() == 24) { |
239 | 5.39k | for(size_t i = 0; i != 256; ++i) { |
240 | 5.37k | m_SB[i] = MDS0[Q0[Q0[Q1[i] ^ S[0]] ^ S[4]] ^ S[8]]; |
241 | 5.37k | m_SB[256 + i] = MDS1[Q0[Q1[Q1[i] ^ S[1]] ^ S[5]] ^ S[9]]; |
242 | 5.37k | m_SB[512 + i] = MDS2[Q1[Q0[Q0[i] ^ S[2]] ^ S[6]] ^ S[10]]; |
243 | 5.37k | m_SB[768 + i] = MDS3[Q1[Q1[Q0[i] ^ S[3]] ^ S[7]] ^ S[11]]; |
244 | 5.37k | } |
245 | | |
246 | 441 | for(size_t i = 0; i < 40; i += 2) { |
247 | 420 | uint32_t X = |
248 | 420 | MDS0[Q0[Q0[Q1[i] ^ key[16]] ^ key[8]] ^ key[0]] ^ MDS1[Q0[Q1[Q1[i] ^ key[17]] ^ key[9]] ^ key[1]] ^ |
249 | 420 | MDS2[Q1[Q0[Q0[i] ^ key[18]] ^ key[10]] ^ key[2]] ^ MDS3[Q1[Q1[Q0[i] ^ key[19]] ^ key[11]] ^ key[3]]; |
250 | 420 | uint32_t Y = MDS0[Q0[Q0[Q1[i + 1] ^ key[20]] ^ key[12]] ^ key[4]] ^ |
251 | 420 | MDS1[Q0[Q1[Q1[i + 1] ^ key[21]] ^ key[13]] ^ key[5]] ^ |
252 | 420 | MDS2[Q1[Q0[Q0[i + 1] ^ key[22]] ^ key[14]] ^ key[6]] ^ |
253 | 420 | MDS3[Q1[Q1[Q0[i + 1] ^ key[23]] ^ key[15]] ^ key[7]]; |
254 | 420 | Y = rotl<8>(Y); |
255 | 420 | X += Y; |
256 | 420 | Y += X; |
257 | | |
258 | 420 | m_RK[i] = X; |
259 | 420 | m_RK[i + 1] = rotl<9>(Y); |
260 | 420 | } |
261 | 648 | } else if(key.size() == 32) { |
262 | 166k | for(size_t i = 0; i != 256; ++i) { |
263 | 165k | m_SB[i] = MDS0[Q0[Q0[Q1[Q1[i] ^ S[0]] ^ S[4]] ^ S[8]] ^ S[12]]; |
264 | 165k | m_SB[256 + i] = MDS1[Q0[Q1[Q1[Q0[i] ^ S[1]] ^ S[5]] ^ S[9]] ^ S[13]]; |
265 | 165k | m_SB[512 + i] = MDS2[Q1[Q0[Q0[Q0[i] ^ S[2]] ^ S[6]] ^ S[10]] ^ S[14]]; |
266 | 165k | m_SB[768 + i] = MDS3[Q1[Q1[Q0[Q1[i] ^ S[3]] ^ S[7]] ^ S[11]] ^ S[15]]; |
267 | 165k | } |
268 | | |
269 | 13.6k | for(size_t i = 0; i < 40; i += 2) { |
270 | 12.9k | uint32_t X = MDS0[Q0[Q0[Q1[Q1[i] ^ key[24]] ^ key[16]] ^ key[8]] ^ key[0]] ^ |
271 | 12.9k | MDS1[Q0[Q1[Q1[Q0[i] ^ key[25]] ^ key[17]] ^ key[9]] ^ key[1]] ^ |
272 | 12.9k | MDS2[Q1[Q0[Q0[Q0[i] ^ key[26]] ^ key[18]] ^ key[10]] ^ key[2]] ^ |
273 | 12.9k | MDS3[Q1[Q1[Q0[Q1[i] ^ key[27]] ^ key[19]] ^ key[11]] ^ key[3]]; |
274 | 12.9k | uint32_t Y = MDS0[Q0[Q0[Q1[Q1[i + 1] ^ key[28]] ^ key[20]] ^ key[12]] ^ key[4]] ^ |
275 | 12.9k | MDS1[Q0[Q1[Q1[Q0[i + 1] ^ key[29]] ^ key[21]] ^ key[13]] ^ key[5]] ^ |
276 | 12.9k | MDS2[Q1[Q0[Q0[Q0[i + 1] ^ key[30]] ^ key[22]] ^ key[14]] ^ key[6]] ^ |
277 | 12.9k | MDS3[Q1[Q1[Q0[Q1[i + 1] ^ key[31]] ^ key[23]] ^ key[15]] ^ key[7]]; |
278 | 12.9k | Y = rotl<8>(Y); |
279 | 12.9k | X += Y; |
280 | 12.9k | Y += X; |
281 | | |
282 | 12.9k | m_RK[i] = X; |
283 | 12.9k | m_RK[i + 1] = rotl<9>(Y); |
284 | 12.9k | } |
285 | 648 | } |
286 | 932 | } |
287 | | |
288 | | /* |
289 | | * Clear memory of sensitive data |
290 | | */ |
291 | 144 | void Twofish::clear() { |
292 | 144 | zap(m_SB); |
293 | 144 | zap(m_RK); |
294 | 144 | } |
295 | | |
296 | | } // namespace Botan |