/src/botan/src/lib/block/des/des_bmi2/des_bmi2.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 2020 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/internal/des.h> |
8 | | #include <botan/internal/rotate.h> |
9 | | #include <botan/internal/loadstor.h> |
10 | | #include <immintrin.h> |
11 | | |
12 | | namespace Botan { |
13 | | |
14 | | namespace { |
15 | | |
16 | | namespace DES_BMI2_fn { |
17 | | |
18 | | alignas(64) const uint8_t SPBOX_CAT_0[64] = { |
19 | | 0xE, 0x0, 0x4, 0xF, 0xD, 0x7, 0x1, 0x4, 0x2, 0xE, 0xF, 0x2, 0xB, 0xD, 0x8, 0x1, |
20 | | 0x3, 0xA, 0xA, 0x6, 0x6, 0xC, 0xC, 0xB, 0x5, 0x9, 0x9, 0x5, 0x0, 0x3, 0x7, 0x8, |
21 | | 0x4, 0xF, 0x1, 0xC, 0xE, 0x8, 0x8, 0x2, 0xD, 0x4, 0x6, 0x9, 0x2, 0x1, 0xB, 0x7, |
22 | | 0xF, 0x5, 0xC, 0xB, 0x9, 0x3, 0x7, 0xE, 0x3, 0xA, 0xA, 0x0, 0x5, 0x6, 0x0, 0xD, |
23 | | }; |
24 | | |
25 | | alignas(64) const uint8_t SPBOX_CAT_1[64] = { |
26 | | 0xF, 0xA, 0x2, 0x7, 0x4, 0x1, 0xD, 0xB, 0x9, 0xF, 0xE, 0x8, 0xA, 0x4, 0x1, 0xD, |
27 | | 0x6, 0x5, 0xB, 0x0, 0x8, 0x2, 0x7, 0xC, 0x5, 0x9, 0x0, 0x6, 0x3, 0xE, 0xC, 0x3, |
28 | | 0x0, 0x7, 0xD, 0x4, 0xB, 0xC, 0xE, 0x2, 0xC, 0xA, 0x1, 0xF, 0x7, 0x1, 0x2, 0x8, |
29 | | 0x3, 0xE, 0x4, 0x9, 0x5, 0xB, 0x9, 0x5, 0x6, 0x0, 0xA, 0x3, 0x8, 0xD, 0xF, 0x6, |
30 | | }; |
31 | | |
32 | | alignas(64) const uint8_t SPBOX_CAT_2[64] = { |
33 | | 0x3, 0xE, 0x0, 0xD, 0xA, 0x0, 0x7, 0xA, 0x5, 0x9, 0x9, 0x4, 0xF, 0x5, 0xC, 0x3, |
34 | | 0x8, 0x1, 0xE, 0x2, 0x6, 0xC, 0xD, 0x7, 0xB, 0x6, 0x4, 0xB, 0x1, 0xF, 0x2, 0x8, |
35 | | 0xE, 0x8, 0x5, 0x3, 0x4, 0xE, 0xA, 0x0, 0x2, 0x5, 0xF, 0xA, 0x9, 0x2, 0x0, 0xD, |
36 | | 0xB, 0x4, 0x8, 0xF, 0x1, 0x7, 0x6, 0x9, 0xC, 0xB, 0x3, 0xC, 0x7, 0x1, 0xD, 0x6, |
37 | | }; |
38 | | |
39 | | alignas(64) const uint8_t SPBOX_CAT_3[64] = { |
40 | | 0xD, 0x7, 0x7, 0x2, 0xE, 0xB, 0x9, 0x5, 0x0, 0xC, 0xC, 0xF, 0x3, 0x0, 0xA, 0x9, |
41 | | 0x1, 0x4, 0x8, 0xD, 0x2, 0x8, 0x5, 0x6, 0xB, 0x1, 0x6, 0xA, 0x4, 0xE, 0xF, 0x3, |
42 | | 0xA, 0x9, 0xC, 0xF, 0x3, 0x0, 0x0, 0xC, 0x6, 0xA, 0xB, 0x1, 0xD, 0x7, 0x7, 0x2, |
43 | | 0xF, 0x3, 0x1, 0x4, 0x9, 0x5, 0xE, 0xB, 0x5, 0x6, 0x8, 0xD, 0x2, 0x8, 0x4, 0xE, |
44 | | }; |
45 | | |
46 | | alignas(64) const uint8_t SPBOX_CAT_4[64] = { |
47 | | 0x1, 0x7, 0x6, 0xD, 0x2, 0x1, 0x8, 0x6, 0xB, 0x2, 0x5, 0xB, 0xD, 0xE, 0x3, 0x8, |
48 | | 0x4, 0xA, 0xA, 0x0, 0x9, 0xF, 0xF, 0x5, 0xE, 0x9, 0x0, 0xC, 0x7, 0x4, 0xC, 0x3, |
49 | | 0x2, 0xD, 0x1, 0x4, 0x8, 0x6, 0xD, 0xB, 0x5, 0x8, 0xE, 0x7, 0xB, 0x1, 0x4, 0xE, |
50 | | 0xF, 0x3, 0xC, 0xF, 0x6, 0x0, 0xA, 0xC, 0x3, 0x5, 0x9, 0x2, 0x0, 0xA, 0x7, 0x9, |
51 | | }; |
52 | | |
53 | | alignas(64) const uint8_t SPBOX_CAT_5[64] = { |
54 | | 0x9, 0xC, 0x2, 0xF, 0xC, 0x1, 0xF, 0x4, 0xA, 0x7, 0x4, 0x9, 0x5, 0xA, 0x8, 0x3, |
55 | | 0x0, 0x5, 0xB, 0x2, 0x6, 0xB, 0x1, 0xD, 0xD, 0x0, 0x7, 0xE, 0x3, 0x6, 0xE, 0x8, |
56 | | 0xA, 0x1, 0xD, 0x6, 0xF, 0x4, 0x3, 0x9, 0x4, 0xA, 0x8, 0x3, 0x9, 0xF, 0x6, 0xC, |
57 | | 0x7, 0xE, 0x0, 0xD, 0x1, 0x2, 0xC, 0x7, 0x2, 0x5, 0xB, 0x0, 0xE, 0x8, 0x5, 0xB, |
58 | | }; |
59 | | |
60 | | alignas(64) const uint8_t SPBOX_CAT_6[64] = { |
61 | | 0x4, 0xD, 0xB, 0x0, 0x2, 0xB, 0x7, 0xE, 0xF, 0x4, 0x0, 0x9, 0x1, 0x8, 0xD, 0x3, |
62 | | 0xA, 0x7, 0x5, 0xA, 0x9, 0xC, 0xE, 0x5, 0xC, 0x2, 0x3, 0xF, 0x6, 0x1, 0x8, 0x6, |
63 | | 0x8, 0x6, 0x4, 0xB, 0xB, 0xD, 0xD, 0x1, 0x5, 0x8, 0xA, 0x4, 0xE, 0x3, 0x7, 0xE, |
64 | | 0x3, 0x9, 0xF, 0xC, 0x6, 0x0, 0x1, 0xF, 0x0, 0x7, 0xC, 0x2, 0x9, 0xA, 0x2, 0x5, |
65 | | }; |
66 | | |
67 | | alignas(64) const uint8_t SPBOX_CAT_7[64] = { |
68 | | 0xB, 0x2, 0x4, 0xF, 0x8, 0xB, 0x1, 0x8, 0x5, 0xC, 0xF, 0x6, 0xE, 0x7, 0x2, 0x1, |
69 | | 0xC, 0x9, 0xA, 0x3, 0x6, 0x5, 0xD, 0xE, 0x3, 0x0, 0x0, 0xD, 0x9, 0xA, 0x7, 0x4, |
70 | | 0x7, 0x4, 0xE, 0x2, 0x1, 0xD, 0x2, 0x7, 0xA, 0x1, 0x9, 0xC, 0xD, 0x8, 0x4, 0xB, |
71 | | 0x0, 0xF, 0x5, 0x9, 0xC, 0xA, 0xB, 0x0, 0xF, 0x6, 0x6, 0x3, 0x3, 0x5, 0x8, 0xE, |
72 | | }; |
73 | | |
74 | | inline uint32_t spbox(uint32_t T0, uint32_t T1) |
75 | 961k | { |
76 | 961k | return |
77 | 961k | _pdep_u32(SPBOX_CAT_0[get_byte(0, T0) % 64], 0x01010404) ^ |
78 | 961k | _pdep_u32(SPBOX_CAT_1[get_byte(0, T1) % 64], 0x80108020) ^ |
79 | 961k | _pdep_u32(SPBOX_CAT_2[get_byte(1, T0) % 64], 0x08020208) ^ |
80 | 961k | _pdep_u32(SPBOX_CAT_3[get_byte(1, T1) % 64], 0x00802081) ^ |
81 | 961k | _pdep_u32(SPBOX_CAT_4[get_byte(2, T0) % 64], 0x42080100) ^ |
82 | 961k | _pdep_u32(SPBOX_CAT_5[get_byte(2, T1) % 64], 0x20404010) ^ |
83 | 961k | _pdep_u32(SPBOX_CAT_6[get_byte(3, T0) % 64], 0x04200802) ^ |
84 | 961k | _pdep_u32(SPBOX_CAT_7[get_byte(3, T1) % 64], 0x10041040); |
85 | 961k | } |
86 | | |
87 | | inline void des_encrypt(uint32_t& Lr, uint32_t& Rr, |
88 | | const uint32_t round_key[32]) |
89 | 3.73k | { |
90 | 3.73k | uint32_t L = Lr; |
91 | 3.73k | uint32_t R = Rr; |
92 | 33.6k | for(size_t i = 0; i != 16; i += 2) |
93 | 29.9k | { |
94 | 29.9k | L ^= spbox(rotr<4>(R) ^ round_key[2*i ], R ^ round_key[2*i+1]); |
95 | 29.9k | R ^= spbox(rotr<4>(L) ^ round_key[2*i+2], L ^ round_key[2*i+3]); |
96 | 29.9k | } |
97 | | |
98 | 3.73k | Lr = L; |
99 | 3.73k | Rr = R; |
100 | 3.73k | } |
101 | | |
102 | | inline void des_encrypt_x2(uint32_t& L0r, uint32_t& R0r, |
103 | | uint32_t& L1r, uint32_t& R1r, |
104 | | const uint32_t round_key[32]) |
105 | 9.06k | { |
106 | 9.06k | uint32_t L0 = L0r; |
107 | 9.06k | uint32_t R0 = R0r; |
108 | 9.06k | uint32_t L1 = L1r; |
109 | 9.06k | uint32_t R1 = R1r; |
110 | | |
111 | 81.5k | for(size_t i = 0; i != 16; i += 2) |
112 | 72.4k | { |
113 | 72.4k | L0 ^= spbox(rotr<4>(R0) ^ round_key[2*i ], R0 ^ round_key[2*i+1]); |
114 | 72.4k | L1 ^= spbox(rotr<4>(R1) ^ round_key[2*i ], R1 ^ round_key[2*i+1]); |
115 | | |
116 | 72.4k | R0 ^= spbox(rotr<4>(L0) ^ round_key[2*i+2], L0 ^ round_key[2*i+3]); |
117 | 72.4k | R1 ^= spbox(rotr<4>(L1) ^ round_key[2*i+2], L1 ^ round_key[2*i+3]); |
118 | 72.4k | } |
119 | | |
120 | 9.06k | L0r = L0; |
121 | 9.06k | R0r = R0; |
122 | 9.06k | L1r = L1; |
123 | 9.06k | R1r = R1; |
124 | 9.06k | } |
125 | | |
126 | | inline void des_decrypt(uint32_t& Lr, uint32_t& Rr, |
127 | | const uint32_t round_key[32]) |
128 | 1.98k | { |
129 | 1.98k | uint32_t L = Lr; |
130 | 1.98k | uint32_t R = Rr; |
131 | 17.8k | for(size_t i = 16; i != 0; i -= 2) |
132 | 15.8k | { |
133 | 15.8k | L ^= spbox(rotr<4>(R) ^ round_key[2*i - 2], R ^ round_key[2*i - 1]); |
134 | 15.8k | R ^= spbox(rotr<4>(L) ^ round_key[2*i - 4], L ^ round_key[2*i - 3]); |
135 | 15.8k | } |
136 | 1.98k | Lr = L; |
137 | 1.98k | Rr = R; |
138 | 1.98k | } |
139 | | |
140 | | inline void des_decrypt_x2(uint32_t& L0r, uint32_t& R0r, |
141 | | uint32_t& L1r, uint32_t& R1r, |
142 | | const uint32_t round_key[32]) |
143 | 18.1k | { |
144 | 18.1k | uint32_t L0 = L0r; |
145 | 18.1k | uint32_t R0 = R0r; |
146 | 18.1k | uint32_t L1 = L1r; |
147 | 18.1k | uint32_t R1 = R1r; |
148 | | |
149 | 163k | for(size_t i = 16; i != 0; i -= 2) |
150 | 144k | { |
151 | 144k | L0 ^= spbox(rotr<4>(R0) ^ round_key[2*i - 2], R0 ^ round_key[2*i - 1]); |
152 | 144k | L1 ^= spbox(rotr<4>(R1) ^ round_key[2*i - 2], R1 ^ round_key[2*i - 1]); |
153 | | |
154 | 144k | R0 ^= spbox(rotr<4>(L0) ^ round_key[2*i - 4], L0 ^ round_key[2*i - 3]); |
155 | 144k | R1 ^= spbox(rotr<4>(L1) ^ round_key[2*i - 4], L1 ^ round_key[2*i - 3]); |
156 | 144k | } |
157 | | |
158 | 18.1k | L0r = L0; |
159 | 18.1k | R0r = R0; |
160 | 18.1k | L1r = L1; |
161 | 18.1k | R1r = R1; |
162 | 18.1k | } |
163 | | |
164 | | inline void des_IP(uint32_t& L, uint32_t& R, const uint8_t block[]) |
165 | 20.0k | { |
166 | | // IP sequence by Wei Dai, taken from public domain Crypto++ |
167 | 20.0k | L = load_be<uint32_t>(block, 0); |
168 | 20.0k | R = load_be<uint32_t>(block, 1); |
169 | | |
170 | 20.0k | uint32_t T; |
171 | 20.0k | R = rotl<4>(R); |
172 | 20.0k | T = (L ^ R) & 0xF0F0F0F0; |
173 | 20.0k | L ^= T; |
174 | 20.0k | R = rotr<20>(R ^ T); |
175 | 20.0k | T = (L ^ R) & 0xFFFF0000; |
176 | 20.0k | L ^= T; |
177 | 20.0k | R = rotr<18>(R ^ T); |
178 | 20.0k | T = (L ^ R) & 0x33333333; |
179 | 20.0k | L ^= T; |
180 | 20.0k | R = rotr<6>(R ^ T); |
181 | 20.0k | T = (L ^ R) & 0x00FF00FF; |
182 | 20.0k | L ^= T; |
183 | 20.0k | R = rotl<9>(R ^ T); |
184 | 20.0k | T = (L ^ R) & 0xAAAAAAAA; |
185 | 20.0k | L = rotl<1>(L ^ T); |
186 | 20.0k | R ^= T; |
187 | 20.0k | } |
188 | | |
189 | | inline void des_FP(uint32_t L, uint32_t R, uint8_t out[]) |
190 | 20.0k | { |
191 | | // FP sequence by Wei Dai, taken from public domain Crypto++ |
192 | 20.0k | uint32_t T; |
193 | | |
194 | 20.0k | R = rotr<1>(R); |
195 | 20.0k | T = (L ^ R) & 0xAAAAAAAA; |
196 | 20.0k | R ^= T; |
197 | 20.0k | L = rotr<9>(L ^ T); |
198 | 20.0k | T = (L ^ R) & 0x00FF00FF; |
199 | 20.0k | R ^= T; |
200 | 20.0k | L = rotl<6>(L ^ T); |
201 | 20.0k | T = (L ^ R) & 0x33333333; |
202 | 20.0k | R ^= T; |
203 | 20.0k | L = rotl<18>(L ^ T); |
204 | 20.0k | T = (L ^ R) & 0xFFFF0000; |
205 | 20.0k | R ^= T; |
206 | 20.0k | L = rotl<20>(L ^ T); |
207 | 20.0k | T = (L ^ R) & 0xF0F0F0F0; |
208 | 20.0k | R ^= T; |
209 | 20.0k | L = rotr<4>(L ^ T); |
210 | 20.0k | store_be(out, R, L); |
211 | 20.0k | } |
212 | | |
213 | | } |
214 | | |
215 | | } |
216 | | |
217 | | //static |
218 | | void TripleDES::bmi2_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, const uint32_t key[]) |
219 | 1.83k | { |
220 | 1.83k | using namespace DES_BMI2_fn; |
221 | 1.83k | while(blocks >= 2) |
222 | 0 | { |
223 | 0 | uint32_t L0, R0; |
224 | 0 | uint32_t L1, R1; |
225 | |
|
226 | 0 | des_IP(L0, R0, in); |
227 | 0 | des_IP(L1, R1, in + BLOCK_SIZE); |
228 | |
|
229 | 0 | des_encrypt_x2(L0, R0, L1, R1, &key[0]); |
230 | 0 | des_decrypt_x2(R0, L0, R1, L1, &key[32]); |
231 | 0 | des_encrypt_x2(L0, R0, L1, R1, &key[64]); |
232 | |
|
233 | 0 | des_FP(L0, R0, out); |
234 | 0 | des_FP(L1, R1, out + BLOCK_SIZE); |
235 | |
|
236 | 0 | in += 2*BLOCK_SIZE; |
237 | 0 | out += 2*BLOCK_SIZE; |
238 | 0 | blocks -= 2; |
239 | 0 | } |
240 | | |
241 | 3.66k | for(size_t i = 0; i != blocks; ++i) |
242 | 1.83k | { |
243 | 1.83k | uint32_t L, R; |
244 | 1.83k | des_IP(L, R, in + BLOCK_SIZE*i); |
245 | | |
246 | 1.83k | des_encrypt(L, R, &key[0]); |
247 | 1.83k | des_decrypt(R, L, &key[32]); |
248 | 1.83k | des_encrypt(L, R, &key[64]); |
249 | | |
250 | 1.83k | des_FP(L, R, out + BLOCK_SIZE*i); |
251 | 1.83k | } |
252 | 1.83k | } |
253 | | |
254 | | //static |
255 | | void TripleDES::bmi2_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, const uint32_t key[]) |
256 | 4.59k | { |
257 | 4.59k | using namespace DES_BMI2_fn; |
258 | | |
259 | 13.6k | while(blocks >= 2) |
260 | 9.06k | { |
261 | 9.06k | uint32_t L0, R0; |
262 | 9.06k | uint32_t L1, R1; |
263 | | |
264 | 9.06k | des_IP(L0, R0, in); |
265 | 9.06k | des_IP(L1, R1, in + BLOCK_SIZE); |
266 | | |
267 | 9.06k | des_decrypt_x2(L0, R0, L1, R1, &key[64]); |
268 | 9.06k | des_encrypt_x2(R0, L0, R1, L1, &key[32]); |
269 | 9.06k | des_decrypt_x2(L0, R0, L1, R1, &key[0]); |
270 | | |
271 | 9.06k | des_FP(L0, R0, out); |
272 | 9.06k | des_FP(L1, R1, out + BLOCK_SIZE); |
273 | | |
274 | 9.06k | in += 2*BLOCK_SIZE; |
275 | 9.06k | out += 2*BLOCK_SIZE; |
276 | 9.06k | blocks -= 2; |
277 | 9.06k | } |
278 | | |
279 | 4.67k | for(size_t i = 0; i != blocks; ++i) |
280 | 76 | { |
281 | 76 | uint32_t L, R; |
282 | 76 | des_IP(L, R, in + BLOCK_SIZE*i); |
283 | | |
284 | 76 | des_decrypt(L, R, &key[64]); |
285 | 76 | des_encrypt(R, L, &key[32]); |
286 | 76 | des_decrypt(L, R, &key[0]); |
287 | | |
288 | 76 | des_FP(L, R, out + BLOCK_SIZE*i); |
289 | 76 | } |
290 | 4.59k | } |
291 | | |
292 | | } |