Coverage Report

Created: 2021-02-21 07:20

/src/botan/src/lib/block/des/des_bmi2/des_bmi2.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2020 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/internal/des.h>
8
#include <botan/internal/rotate.h>
9
#include <botan/internal/loadstor.h>
10
#include <immintrin.h>
11
12
namespace Botan {
13
14
namespace {
15
16
namespace DES_BMI2_fn {
17
18
alignas(64) const uint8_t SPBOX_CAT_0[64] = {
19
   0xE, 0x0, 0x4, 0xF, 0xD, 0x7, 0x1, 0x4, 0x2, 0xE, 0xF, 0x2, 0xB, 0xD, 0x8, 0x1,
20
   0x3, 0xA, 0xA, 0x6, 0x6, 0xC, 0xC, 0xB, 0x5, 0x9, 0x9, 0x5, 0x0, 0x3, 0x7, 0x8,
21
   0x4, 0xF, 0x1, 0xC, 0xE, 0x8, 0x8, 0x2, 0xD, 0x4, 0x6, 0x9, 0x2, 0x1, 0xB, 0x7,
22
   0xF, 0x5, 0xC, 0xB, 0x9, 0x3, 0x7, 0xE, 0x3, 0xA, 0xA, 0x0, 0x5, 0x6, 0x0, 0xD,
23
};
24
25
alignas(64) const uint8_t SPBOX_CAT_1[64] = {
26
   0xF, 0xA, 0x2, 0x7, 0x4, 0x1, 0xD, 0xB, 0x9, 0xF, 0xE, 0x8, 0xA, 0x4, 0x1, 0xD,
27
   0x6, 0x5, 0xB, 0x0, 0x8, 0x2, 0x7, 0xC, 0x5, 0x9, 0x0, 0x6, 0x3, 0xE, 0xC, 0x3,
28
   0x0, 0x7, 0xD, 0x4, 0xB, 0xC, 0xE, 0x2, 0xC, 0xA, 0x1, 0xF, 0x7, 0x1, 0x2, 0x8,
29
   0x3, 0xE, 0x4, 0x9, 0x5, 0xB, 0x9, 0x5, 0x6, 0x0, 0xA, 0x3, 0x8, 0xD, 0xF, 0x6,
30
};
31
32
alignas(64) const uint8_t SPBOX_CAT_2[64] = {
33
   0x3, 0xE, 0x0, 0xD, 0xA, 0x0, 0x7, 0xA, 0x5, 0x9, 0x9, 0x4, 0xF, 0x5, 0xC, 0x3,
34
   0x8, 0x1, 0xE, 0x2, 0x6, 0xC, 0xD, 0x7, 0xB, 0x6, 0x4, 0xB, 0x1, 0xF, 0x2, 0x8,
35
   0xE, 0x8, 0x5, 0x3, 0x4, 0xE, 0xA, 0x0, 0x2, 0x5, 0xF, 0xA, 0x9, 0x2, 0x0, 0xD,
36
   0xB, 0x4, 0x8, 0xF, 0x1, 0x7, 0x6, 0x9, 0xC, 0xB, 0x3, 0xC, 0x7, 0x1, 0xD, 0x6,
37
};
38
39
alignas(64) const uint8_t SPBOX_CAT_3[64] = {
40
   0xD, 0x7, 0x7, 0x2, 0xE, 0xB, 0x9, 0x5, 0x0, 0xC, 0xC, 0xF, 0x3, 0x0, 0xA, 0x9,
41
   0x1, 0x4, 0x8, 0xD, 0x2, 0x8, 0x5, 0x6, 0xB, 0x1, 0x6, 0xA, 0x4, 0xE, 0xF, 0x3,
42
   0xA, 0x9, 0xC, 0xF, 0x3, 0x0, 0x0, 0xC, 0x6, 0xA, 0xB, 0x1, 0xD, 0x7, 0x7, 0x2,
43
   0xF, 0x3, 0x1, 0x4, 0x9, 0x5, 0xE, 0xB, 0x5, 0x6, 0x8, 0xD, 0x2, 0x8, 0x4, 0xE,
44
};
45
46
alignas(64) const uint8_t SPBOX_CAT_4[64] = {
47
   0x1, 0x7, 0x6, 0xD, 0x2, 0x1, 0x8, 0x6, 0xB, 0x2, 0x5, 0xB, 0xD, 0xE, 0x3, 0x8,
48
   0x4, 0xA, 0xA, 0x0, 0x9, 0xF, 0xF, 0x5, 0xE, 0x9, 0x0, 0xC, 0x7, 0x4, 0xC, 0x3,
49
   0x2, 0xD, 0x1, 0x4, 0x8, 0x6, 0xD, 0xB, 0x5, 0x8, 0xE, 0x7, 0xB, 0x1, 0x4, 0xE,
50
   0xF, 0x3, 0xC, 0xF, 0x6, 0x0, 0xA, 0xC, 0x3, 0x5, 0x9, 0x2, 0x0, 0xA, 0x7, 0x9,
51
};
52
53
alignas(64) const uint8_t SPBOX_CAT_5[64] = {
54
   0x9, 0xC, 0x2, 0xF, 0xC, 0x1, 0xF, 0x4, 0xA, 0x7, 0x4, 0x9, 0x5, 0xA, 0x8, 0x3,
55
   0x0, 0x5, 0xB, 0x2, 0x6, 0xB, 0x1, 0xD, 0xD, 0x0, 0x7, 0xE, 0x3, 0x6, 0xE, 0x8,
56
   0xA, 0x1, 0xD, 0x6, 0xF, 0x4, 0x3, 0x9, 0x4, 0xA, 0x8, 0x3, 0x9, 0xF, 0x6, 0xC,
57
   0x7, 0xE, 0x0, 0xD, 0x1, 0x2, 0xC, 0x7, 0x2, 0x5, 0xB, 0x0, 0xE, 0x8, 0x5, 0xB,
58
};
59
60
alignas(64) const uint8_t SPBOX_CAT_6[64] = {
61
   0x4, 0xD, 0xB, 0x0, 0x2, 0xB, 0x7, 0xE, 0xF, 0x4, 0x0, 0x9, 0x1, 0x8, 0xD, 0x3,
62
   0xA, 0x7, 0x5, 0xA, 0x9, 0xC, 0xE, 0x5, 0xC, 0x2, 0x3, 0xF, 0x6, 0x1, 0x8, 0x6,
63
   0x8, 0x6, 0x4, 0xB, 0xB, 0xD, 0xD, 0x1, 0x5, 0x8, 0xA, 0x4, 0xE, 0x3, 0x7, 0xE,
64
   0x3, 0x9, 0xF, 0xC, 0x6, 0x0, 0x1, 0xF, 0x0, 0x7, 0xC, 0x2, 0x9, 0xA, 0x2, 0x5,
65
};
66
67
alignas(64) const uint8_t SPBOX_CAT_7[64] = {
68
   0xB, 0x2, 0x4, 0xF, 0x8, 0xB, 0x1, 0x8, 0x5, 0xC, 0xF, 0x6, 0xE, 0x7, 0x2, 0x1,
69
   0xC, 0x9, 0xA, 0x3, 0x6, 0x5, 0xD, 0xE, 0x3, 0x0, 0x0, 0xD, 0x9, 0xA, 0x7, 0x4,
70
   0x7, 0x4, 0xE, 0x2, 0x1, 0xD, 0x2, 0x7, 0xA, 0x1, 0x9, 0xC, 0xD, 0x8, 0x4, 0xB,
71
   0x0, 0xF, 0x5, 0x9, 0xC, 0xA, 0xB, 0x0, 0xF, 0x6, 0x6, 0x3, 0x3, 0x5, 0x8, 0xE,
72
};
73
74
inline uint32_t spbox(uint32_t T0, uint32_t T1)
75
961k
   {
76
961k
   return
77
961k
      _pdep_u32(SPBOX_CAT_0[get_byte(0, T0) % 64], 0x01010404) ^
78
961k
      _pdep_u32(SPBOX_CAT_1[get_byte(0, T1) % 64], 0x80108020) ^
79
961k
      _pdep_u32(SPBOX_CAT_2[get_byte(1, T0) % 64], 0x08020208) ^
80
961k
      _pdep_u32(SPBOX_CAT_3[get_byte(1, T1) % 64], 0x00802081) ^
81
961k
      _pdep_u32(SPBOX_CAT_4[get_byte(2, T0) % 64], 0x42080100) ^
82
961k
      _pdep_u32(SPBOX_CAT_5[get_byte(2, T1) % 64], 0x20404010) ^
83
961k
      _pdep_u32(SPBOX_CAT_6[get_byte(3, T0) % 64], 0x04200802) ^
84
961k
      _pdep_u32(SPBOX_CAT_7[get_byte(3, T1) % 64], 0x10041040);
85
961k
   }
86
87
inline void des_encrypt(uint32_t& Lr, uint32_t& Rr,
88
                        const uint32_t round_key[32])
89
3.73k
   {
90
3.73k
   uint32_t L = Lr;
91
3.73k
   uint32_t R = Rr;
92
33.6k
   for(size_t i = 0; i != 16; i += 2)
93
29.9k
      {
94
29.9k
      L ^= spbox(rotr<4>(R) ^ round_key[2*i  ], R ^ round_key[2*i+1]);
95
29.9k
      R ^= spbox(rotr<4>(L) ^ round_key[2*i+2], L ^ round_key[2*i+3]);
96
29.9k
      }
97
98
3.73k
   Lr = L;
99
3.73k
   Rr = R;
100
3.73k
   }
101
102
inline void des_encrypt_x2(uint32_t& L0r, uint32_t& R0r,
103
                           uint32_t& L1r, uint32_t& R1r,
104
                           const uint32_t round_key[32])
105
9.06k
   {
106
9.06k
   uint32_t L0 = L0r;
107
9.06k
   uint32_t R0 = R0r;
108
9.06k
   uint32_t L1 = L1r;
109
9.06k
   uint32_t R1 = R1r;
110
111
81.5k
   for(size_t i = 0; i != 16; i += 2)
112
72.4k
      {
113
72.4k
      L0 ^= spbox(rotr<4>(R0) ^ round_key[2*i  ], R0 ^ round_key[2*i+1]);
114
72.4k
      L1 ^= spbox(rotr<4>(R1) ^ round_key[2*i  ], R1 ^ round_key[2*i+1]);
115
116
72.4k
      R0 ^= spbox(rotr<4>(L0) ^ round_key[2*i+2], L0 ^ round_key[2*i+3]);
117
72.4k
      R1 ^= spbox(rotr<4>(L1) ^ round_key[2*i+2], L1 ^ round_key[2*i+3]);
118
72.4k
      }
119
120
9.06k
   L0r = L0;
121
9.06k
   R0r = R0;
122
9.06k
   L1r = L1;
123
9.06k
   R1r = R1;
124
9.06k
   }
125
126
inline void des_decrypt(uint32_t& Lr, uint32_t& Rr,
127
                        const uint32_t round_key[32])
128
1.98k
   {
129
1.98k
   uint32_t L = Lr;
130
1.98k
   uint32_t R = Rr;
131
17.8k
   for(size_t i = 16; i != 0; i -= 2)
132
15.8k
      {
133
15.8k
      L ^= spbox(rotr<4>(R) ^ round_key[2*i - 2], R  ^ round_key[2*i - 1]);
134
15.8k
      R ^= spbox(rotr<4>(L) ^ round_key[2*i - 4], L  ^ round_key[2*i - 3]);
135
15.8k
      }
136
1.98k
   Lr = L;
137
1.98k
   Rr = R;
138
1.98k
   }
139
140
inline void des_decrypt_x2(uint32_t& L0r, uint32_t& R0r,
141
                           uint32_t& L1r, uint32_t& R1r,
142
                           const uint32_t round_key[32])
143
18.1k
   {
144
18.1k
   uint32_t L0 = L0r;
145
18.1k
   uint32_t R0 = R0r;
146
18.1k
   uint32_t L1 = L1r;
147
18.1k
   uint32_t R1 = R1r;
148
149
163k
   for(size_t i = 16; i != 0; i -= 2)
150
144k
      {
151
144k
      L0 ^= spbox(rotr<4>(R0) ^ round_key[2*i - 2], R0  ^ round_key[2*i - 1]);
152
144k
      L1 ^= spbox(rotr<4>(R1) ^ round_key[2*i - 2], R1  ^ round_key[2*i - 1]);
153
154
144k
      R0 ^= spbox(rotr<4>(L0) ^ round_key[2*i - 4], L0  ^ round_key[2*i - 3]);
155
144k
      R1 ^= spbox(rotr<4>(L1) ^ round_key[2*i - 4], L1  ^ round_key[2*i - 3]);
156
144k
      }
157
158
18.1k
   L0r = L0;
159
18.1k
   R0r = R0;
160
18.1k
   L1r = L1;
161
18.1k
   R1r = R1;
162
18.1k
   }
163
164
inline void des_IP(uint32_t& L, uint32_t& R, const uint8_t block[])
165
20.0k
   {
166
   // IP sequence by Wei Dai, taken from public domain Crypto++
167
20.0k
   L = load_be<uint32_t>(block, 0);
168
20.0k
   R = load_be<uint32_t>(block, 1);
169
170
20.0k
   uint32_t T;
171
20.0k
   R = rotl<4>(R);
172
20.0k
   T = (L ^ R) & 0xF0F0F0F0;
173
20.0k
   L ^= T;
174
20.0k
   R = rotr<20>(R ^ T);
175
20.0k
   T = (L ^ R) & 0xFFFF0000;
176
20.0k
   L ^= T;
177
20.0k
   R = rotr<18>(R ^ T);
178
20.0k
   T = (L ^ R) & 0x33333333;
179
20.0k
   L ^= T;
180
20.0k
   R = rotr<6>(R ^ T);
181
20.0k
   T = (L ^ R) & 0x00FF00FF;
182
20.0k
   L ^= T;
183
20.0k
   R = rotl<9>(R ^ T);
184
20.0k
   T = (L ^ R) & 0xAAAAAAAA;
185
20.0k
   L = rotl<1>(L ^ T);
186
20.0k
   R ^= T;
187
20.0k
   }
188
189
inline void des_FP(uint32_t L, uint32_t R, uint8_t out[])
190
20.0k
   {
191
   // FP sequence by Wei Dai, taken from public domain Crypto++
192
20.0k
   uint32_t T;
193
194
20.0k
   R = rotr<1>(R);
195
20.0k
   T = (L ^ R) & 0xAAAAAAAA;
196
20.0k
   R ^= T;
197
20.0k
   L = rotr<9>(L ^ T);
198
20.0k
   T = (L ^ R) & 0x00FF00FF;
199
20.0k
   R ^= T;
200
20.0k
   L = rotl<6>(L ^ T);
201
20.0k
   T = (L ^ R) & 0x33333333;
202
20.0k
   R ^= T;
203
20.0k
   L = rotl<18>(L ^ T);
204
20.0k
   T = (L ^ R) & 0xFFFF0000;
205
20.0k
   R ^= T;
206
20.0k
   L = rotl<20>(L ^ T);
207
20.0k
   T = (L ^ R) & 0xF0F0F0F0;
208
20.0k
   R ^= T;
209
20.0k
   L = rotr<4>(L ^ T);
210
20.0k
   store_be(out, R, L);
211
20.0k
   }
212
213
}
214
215
}
216
217
//static
218
void TripleDES::bmi2_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, const uint32_t key[])
219
1.83k
   {
220
1.83k
   using namespace DES_BMI2_fn;
221
1.83k
   while(blocks >= 2)
222
0
      {
223
0
      uint32_t L0, R0;
224
0
      uint32_t L1, R1;
225
226
0
      des_IP(L0, R0, in);
227
0
      des_IP(L1, R1, in + BLOCK_SIZE);
228
229
0
      des_encrypt_x2(L0, R0, L1, R1, &key[0]);
230
0
      des_decrypt_x2(R0, L0, R1, L1, &key[32]);
231
0
      des_encrypt_x2(L0, R0, L1, R1, &key[64]);
232
233
0
      des_FP(L0, R0, out);
234
0
      des_FP(L1, R1, out + BLOCK_SIZE);
235
236
0
      in += 2*BLOCK_SIZE;
237
0
      out += 2*BLOCK_SIZE;
238
0
      blocks -= 2;
239
0
      }
240
241
3.66k
   for(size_t i = 0; i != blocks; ++i)
242
1.83k
      {
243
1.83k
      uint32_t L, R;
244
1.83k
      des_IP(L, R, in + BLOCK_SIZE*i);
245
246
1.83k
      des_encrypt(L, R, &key[0]);
247
1.83k
      des_decrypt(R, L, &key[32]);
248
1.83k
      des_encrypt(L, R, &key[64]);
249
250
1.83k
      des_FP(L, R, out + BLOCK_SIZE*i);
251
1.83k
      }
252
1.83k
   }
253
254
//static
255
void TripleDES::bmi2_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, const uint32_t key[])
256
4.59k
   {
257
4.59k
   using namespace DES_BMI2_fn;
258
259
13.6k
   while(blocks >= 2)
260
9.06k
      {
261
9.06k
      uint32_t L0, R0;
262
9.06k
      uint32_t L1, R1;
263
264
9.06k
      des_IP(L0, R0, in);
265
9.06k
      des_IP(L1, R1, in + BLOCK_SIZE);
266
267
9.06k
      des_decrypt_x2(L0, R0, L1, R1, &key[64]);
268
9.06k
      des_encrypt_x2(R0, L0, R1, L1, &key[32]);
269
9.06k
      des_decrypt_x2(L0, R0, L1, R1, &key[0]);
270
271
9.06k
      des_FP(L0, R0, out);
272
9.06k
      des_FP(L1, R1, out + BLOCK_SIZE);
273
274
9.06k
      in += 2*BLOCK_SIZE;
275
9.06k
      out += 2*BLOCK_SIZE;
276
9.06k
      blocks -= 2;
277
9.06k
      }
278
279
4.67k
   for(size_t i = 0; i != blocks; ++i)
280
76
      {
281
76
      uint32_t L, R;
282
76
      des_IP(L, R, in + BLOCK_SIZE*i);
283
284
76
      des_decrypt(L, R, &key[64]);
285
76
      des_encrypt(R, L, &key[32]);
286
76
      des_decrypt(L, R, &key[0]);
287
288
76
      des_FP(L, R, out + BLOCK_SIZE*i);
289
76
      }
290
4.59k
   }
291
292
}