Coverage Report

Created: 2023-06-07 07:00

/src/botan/src/lib/block/serpent/serpent_avx512/serpent_avx512.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2023 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/internal/serpent.h>
8
#include <botan/internal/serpent_sbox.h>
9
#include <botan/internal/simd_avx512.h>
10
11
namespace Botan {
12
13
BOTAN_FORCE_INLINE
14
0
void SBoxE0(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
15
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xb9>(b, d, c);
16
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xe2>(a, b, d);
17
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x36>(a, b, d);
18
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x26>(t0, d, b);
19
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
20
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t1, c, o0);
21
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xa9>(o0, o1, t2);
22
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
23
0
   a = o0;
24
0
   b = o1;
25
0
   c = o2;
26
0
   d = o3;
27
0
}
28
29
BOTAN_FORCE_INLINE
30
0
void SBoxE1(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
31
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xe5>(d, b, c);
32
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x26>(c, d, b);
33
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xa6>(a, b, c);
34
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x2b>(a, b, d);
35
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
36
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x83>(t2, d, t0);
37
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t3, c, o1);
38
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x65>(o3, o1, t2);
39
0
   a = o0;
40
0
   b = o1;
41
0
   c = o2;
42
0
   d = o3;
43
0
}
44
45
BOTAN_FORCE_INLINE
46
0
void SBoxE2(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
47
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x96>(c, b, d);
48
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xda>(a, b, c);
49
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x66>(d, t0, c);
50
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x94>(a, b, t0);
51
0
   const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xa1>(a, d, t0);
52
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t2);
53
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xd2>(t3, d, o0);
54
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x2d>(t4, b, c);
55
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x2d>(t1, d, t2);
56
0
   a = o0;
57
0
   b = o1;
58
0
   c = o2;
59
0
   d = o3;
60
0
}
61
62
BOTAN_FORCE_INLINE
63
0
void SBoxE3(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
64
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x92>(d, c, b);
65
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x3b>(d, b, c);
66
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xbc>(a, c, t0);
67
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x68>(t2, d, t1);
68
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x6e>(a, c, o2);
69
0
   const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xb9>(a, d, t3);
70
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
71
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x6d>(t4, b, t2);
72
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x38>(t3, b, t0);
73
0
   a = o0;
74
0
   b = o1;
75
0
   c = o2;
76
0
   d = o3;
77
0
}
78
79
BOTAN_FORCE_INLINE
80
0
void SBoxE4(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
81
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc2>(c, b, d);
82
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x79>(b, c, d);
83
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x71>(a, b, d);
84
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x6b>(a, b, d);
85
0
   const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xc2>(a, t0, t3);
86
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
87
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
88
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x49>(t3, c, t0);
89
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd6>(t4, b, t1);
90
0
   a = o0;
91
0
   b = o1;
92
0
   c = o2;
93
0
   d = o3;
94
0
}
95
96
BOTAN_FORCE_INLINE
97
0
void SBoxE5(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
98
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xa9>(b, d, c);
99
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x93>(b, c, d);
100
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc3>(a, b, c);
101
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x27>(a, b, d);
102
0
   const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x85>(a, c, t1);
103
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
104
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x2d>(t2, d, o0);
105
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x7a>(t4, b, t0);
106
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x87>(t3, t0, o0);
107
0
   a = o0;
108
0
   b = o1;
109
0
   c = o2;
110
0
   d = o3;
111
0
}
112
113
BOTAN_FORCE_INLINE
114
0
void SBoxE6(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
115
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x61>(d, c, b);
116
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9c>(b, d, t0);
117
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x93>(a, b, d);
118
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0xb5>(a, b, c);
119
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
120
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
121
0
   const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x7c>(a, b, o1);
122
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x1e>(t4, d, t0);
123
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x29>(t3, t0, t1);
124
0
   a = o0;
125
0
   b = o1;
126
0
   c = o2;
127
0
   d = o3;
128
0
}
129
130
BOTAN_FORCE_INLINE
131
0
void SBoxE7(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
132
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x9b>(b, c, d);
133
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x61>(c, b, d);
134
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xe3>(a, d, t1);
135
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x83>(b, c, d);
136
0
   const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x49>(a, b, c);
137
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
138
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xe1>(t2, b, c);
139
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd1>(t3, a, t1);
140
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x87>(t4, d, t2);
141
0
   a = o0;
142
0
   b = o1;
143
0
   c = o2;
144
0
   d = o3;
145
0
}
146
147
BOTAN_FORCE_INLINE
148
0
void SBoxD0(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
149
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x67>(c, d, b);
150
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x49>(b, d, c);
151
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
152
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xa9>(a, b, c);
153
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x3c>(t2, d, t0);
154
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x4d>(a, b, d);
155
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t3, c, o0);
156
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x56>(o3, o0, t2);
157
0
   a = o0;
158
0
   b = o1;
159
0
   c = o2;
160
0
   d = o3;
161
0
}
162
163
BOTAN_FORCE_INLINE
164
0
void SBoxD1(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
165
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x73>(d, b, c);
166
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x68>(c, d, b);
167
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc5>(a, b, d);
168
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x2d>(a, b, d);
169
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
170
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t2, c, o0);
171
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd2>(t3, o0, o1);
172
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x99>(o0, t3, c);
173
0
   a = o0;
174
0
   b = o1;
175
0
   c = o2;
176
0
   d = o3;
177
0
}
178
179
BOTAN_FORCE_INLINE
180
0
void SBoxD2(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
181
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc6>(d, b, c);
182
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9c>(d, c, b);
183
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xe1>(a, b, c);
184
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x87>(t2, d, t0);
185
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
186
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd1>(t0, a, t1);
187
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x9b>(a, c, o2);
188
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x5b>(t3, b, d);
189
0
   a = o0;
190
0
   b = o1;
191
0
   c = o2;
192
0
   d = o3;
193
0
}
194
195
BOTAN_FORCE_INLINE
196
0
void SBoxD3(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
197
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x94>(c, d, b);
198
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x99>(b, d, t0);
199
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
200
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x97>(a, b, d);
201
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t2, c, o0);
202
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x94>(c, d, t2);
203
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x0e>(t3, b, t0);
204
0
   const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x1c>(a, b, t0);
205
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0xb4>(t4, c, d);
206
0
   a = o0;
207
0
   b = o1;
208
0
   c = o2;
209
0
   d = o3;
210
0
}
211
212
BOTAN_FORCE_INLINE
213
0
void SBoxD4(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
214
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xa9>(d, c, b);
215
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xa6>(d, b, c);
216
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xb5>(a, b, d);
217
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x5e>(a, b, d);
218
0
   const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x50>(a, b, t0);
219
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
220
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t4, c, d);
221
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t3, c, t4);
222
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x5a>(t2, c, t0);
223
0
   a = o0;
224
0
   b = o1;
225
0
   c = o2;
226
0
   d = o3;
227
0
}
228
229
BOTAN_FORCE_INLINE
230
0
void SBoxD5(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
231
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc9>(a, b, c);
232
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x65>(a, b, c);
233
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x25>(a, b, d);
234
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x63>(c, d, t0);
235
0
   const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x86>(a, b, t3);
236
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x87>(t2, c, t0);
237
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xc3>(t4, c, d);
238
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x47>(t1, d, t0);
239
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
240
0
   a = o0;
241
0
   b = o1;
242
0
   c = o2;
243
0
   d = o3;
244
0
}
245
246
BOTAN_FORCE_INLINE
247
0
void SBoxD6(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
248
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x07>(d, b, c);
249
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9e>(c, d, b);
250
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc6>(a, b, c);
251
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x34>(a, b, d);
252
0
   const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x2b>(a, c, d);
253
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
254
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xcb>(t2, d, t0);
255
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t3, c, t0);
256
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t4, b, o0);
257
0
   a = o0;
258
0
   b = o1;
259
0
   c = o2;
260
0
   d = o3;
261
0
}
262
263
BOTAN_FORCE_INLINE
264
0
void SBoxD7(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
265
0
   const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x67>(b, d, c);
266
0
   const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x3e>(a, c, d);
267
0
   const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x1c>(a, b, d);
268
0
   const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x87>(t0, d, b);
269
0
   const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x7d>(a, b, t1);
270
0
   const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
271
0
   const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t1, b, t0);
272
0
   const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd2>(t2, c, t1);
273
0
   const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x6d>(t4, c, d);
274
0
   a = o0;
275
0
   b = o1;
276
0
   c = o2;
277
0
   d = o3;
278
0
}
279
280
BOTAN_AVX512_FN
281
0
void Serpent::avx512_encrypt_16(const uint8_t in[16 * 16], uint8_t out[16 * 16]) const {
282
0
   using namespace Botan::Serpent_F;
283
284
0
   SIMD_16x32 B0 = SIMD_16x32::load_le(in);
285
0
   SIMD_16x32 B1 = SIMD_16x32::load_le(in + 64);
286
0
   SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128);
287
0
   SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192);
288
289
0
   SIMD_16x32::transpose(B0, B1, B2, B3);
290
291
0
   const Key_Inserter key_xor(m_round_key.data());
292
293
0
   key_xor(0, B0, B1, B2, B3);
294
0
   SBoxE0(B0, B1, B2, B3);
295
0
   transform(B0, B1, B2, B3);
296
0
   key_xor(1, B0, B1, B2, B3);
297
0
   SBoxE1(B0, B1, B2, B3);
298
0
   transform(B0, B1, B2, B3);
299
0
   key_xor(2, B0, B1, B2, B3);
300
0
   SBoxE2(B0, B1, B2, B3);
301
0
   transform(B0, B1, B2, B3);
302
0
   key_xor(3, B0, B1, B2, B3);
303
0
   SBoxE3(B0, B1, B2, B3);
304
0
   transform(B0, B1, B2, B3);
305
0
   key_xor(4, B0, B1, B2, B3);
306
0
   SBoxE4(B0, B1, B2, B3);
307
0
   transform(B0, B1, B2, B3);
308
0
   key_xor(5, B0, B1, B2, B3);
309
0
   SBoxE5(B0, B1, B2, B3);
310
0
   transform(B0, B1, B2, B3);
311
0
   key_xor(6, B0, B1, B2, B3);
312
0
   SBoxE6(B0, B1, B2, B3);
313
0
   transform(B0, B1, B2, B3);
314
0
   key_xor(7, B0, B1, B2, B3);
315
0
   SBoxE7(B0, B1, B2, B3);
316
0
   transform(B0, B1, B2, B3);
317
318
0
   key_xor(8, B0, B1, B2, B3);
319
0
   SBoxE0(B0, B1, B2, B3);
320
0
   transform(B0, B1, B2, B3);
321
0
   key_xor(9, B0, B1, B2, B3);
322
0
   SBoxE1(B0, B1, B2, B3);
323
0
   transform(B0, B1, B2, B3);
324
0
   key_xor(10, B0, B1, B2, B3);
325
0
   SBoxE2(B0, B1, B2, B3);
326
0
   transform(B0, B1, B2, B3);
327
0
   key_xor(11, B0, B1, B2, B3);
328
0
   SBoxE3(B0, B1, B2, B3);
329
0
   transform(B0, B1, B2, B3);
330
0
   key_xor(12, B0, B1, B2, B3);
331
0
   SBoxE4(B0, B1, B2, B3);
332
0
   transform(B0, B1, B2, B3);
333
0
   key_xor(13, B0, B1, B2, B3);
334
0
   SBoxE5(B0, B1, B2, B3);
335
0
   transform(B0, B1, B2, B3);
336
0
   key_xor(14, B0, B1, B2, B3);
337
0
   SBoxE6(B0, B1, B2, B3);
338
0
   transform(B0, B1, B2, B3);
339
0
   key_xor(15, B0, B1, B2, B3);
340
0
   SBoxE7(B0, B1, B2, B3);
341
0
   transform(B0, B1, B2, B3);
342
343
0
   key_xor(16, B0, B1, B2, B3);
344
0
   SBoxE0(B0, B1, B2, B3);
345
0
   transform(B0, B1, B2, B3);
346
0
   key_xor(17, B0, B1, B2, B3);
347
0
   SBoxE1(B0, B1, B2, B3);
348
0
   transform(B0, B1, B2, B3);
349
0
   key_xor(18, B0, B1, B2, B3);
350
0
   SBoxE2(B0, B1, B2, B3);
351
0
   transform(B0, B1, B2, B3);
352
0
   key_xor(19, B0, B1, B2, B3);
353
0
   SBoxE3(B0, B1, B2, B3);
354
0
   transform(B0, B1, B2, B3);
355
0
   key_xor(20, B0, B1, B2, B3);
356
0
   SBoxE4(B0, B1, B2, B3);
357
0
   transform(B0, B1, B2, B3);
358
0
   key_xor(21, B0, B1, B2, B3);
359
0
   SBoxE5(B0, B1, B2, B3);
360
0
   transform(B0, B1, B2, B3);
361
0
   key_xor(22, B0, B1, B2, B3);
362
0
   SBoxE6(B0, B1, B2, B3);
363
0
   transform(B0, B1, B2, B3);
364
0
   key_xor(23, B0, B1, B2, B3);
365
0
   SBoxE7(B0, B1, B2, B3);
366
0
   transform(B0, B1, B2, B3);
367
368
0
   key_xor(24, B0, B1, B2, B3);
369
0
   SBoxE0(B0, B1, B2, B3);
370
0
   transform(B0, B1, B2, B3);
371
0
   key_xor(25, B0, B1, B2, B3);
372
0
   SBoxE1(B0, B1, B2, B3);
373
0
   transform(B0, B1, B2, B3);
374
0
   key_xor(26, B0, B1, B2, B3);
375
0
   SBoxE2(B0, B1, B2, B3);
376
0
   transform(B0, B1, B2, B3);
377
0
   key_xor(27, B0, B1, B2, B3);
378
0
   SBoxE3(B0, B1, B2, B3);
379
0
   transform(B0, B1, B2, B3);
380
0
   key_xor(28, B0, B1, B2, B3);
381
0
   SBoxE4(B0, B1, B2, B3);
382
0
   transform(B0, B1, B2, B3);
383
0
   key_xor(29, B0, B1, B2, B3);
384
0
   SBoxE5(B0, B1, B2, B3);
385
0
   transform(B0, B1, B2, B3);
386
0
   key_xor(30, B0, B1, B2, B3);
387
0
   SBoxE6(B0, B1, B2, B3);
388
0
   transform(B0, B1, B2, B3);
389
0
   key_xor(31, B0, B1, B2, B3);
390
0
   SBoxE7(B0, B1, B2, B3);
391
0
   key_xor(32, B0, B1, B2, B3);
392
393
0
   SIMD_16x32::transpose(B0, B1, B2, B3);
394
0
   B0.store_le(out);
395
0
   B1.store_le(out + 64);
396
0
   B2.store_le(out + 128);
397
0
   B3.store_le(out + 192);
398
399
0
   SIMD_16x32::zero_registers();
400
0
}
401
402
BOTAN_AVX512_FN
403
0
void Serpent::avx512_decrypt_16(const uint8_t in[16 * 16], uint8_t out[16 * 16]) const {
404
0
   using namespace Botan::Serpent_F;
405
406
0
   SIMD_16x32 B0 = SIMD_16x32::load_le(in);
407
0
   SIMD_16x32 B1 = SIMD_16x32::load_le(in + 64);
408
0
   SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128);
409
0
   SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192);
410
411
0
   SIMD_16x32::transpose(B0, B1, B2, B3);
412
413
0
   const Key_Inserter key_xor(m_round_key.data());
414
415
0
   key_xor(32, B0, B1, B2, B3);
416
0
   SBoxD7(B0, B1, B2, B3);
417
0
   key_xor(31, B0, B1, B2, B3);
418
0
   i_transform(B0, B1, B2, B3);
419
0
   SBoxD6(B0, B1, B2, B3);
420
0
   key_xor(30, B0, B1, B2, B3);
421
0
   i_transform(B0, B1, B2, B3);
422
0
   SBoxD5(B0, B1, B2, B3);
423
0
   key_xor(29, B0, B1, B2, B3);
424
0
   i_transform(B0, B1, B2, B3);
425
0
   SBoxD4(B0, B1, B2, B3);
426
0
   key_xor(28, B0, B1, B2, B3);
427
0
   i_transform(B0, B1, B2, B3);
428
0
   SBoxD3(B0, B1, B2, B3);
429
0
   key_xor(27, B0, B1, B2, B3);
430
0
   i_transform(B0, B1, B2, B3);
431
0
   SBoxD2(B0, B1, B2, B3);
432
0
   key_xor(26, B0, B1, B2, B3);
433
0
   i_transform(B0, B1, B2, B3);
434
0
   SBoxD1(B0, B1, B2, B3);
435
0
   key_xor(25, B0, B1, B2, B3);
436
0
   i_transform(B0, B1, B2, B3);
437
0
   SBoxD0(B0, B1, B2, B3);
438
0
   key_xor(24, B0, B1, B2, B3);
439
440
0
   i_transform(B0, B1, B2, B3);
441
0
   SBoxD7(B0, B1, B2, B3);
442
0
   key_xor(23, B0, B1, B2, B3);
443
0
   i_transform(B0, B1, B2, B3);
444
0
   SBoxD6(B0, B1, B2, B3);
445
0
   key_xor(22, B0, B1, B2, B3);
446
0
   i_transform(B0, B1, B2, B3);
447
0
   SBoxD5(B0, B1, B2, B3);
448
0
   key_xor(21, B0, B1, B2, B3);
449
0
   i_transform(B0, B1, B2, B3);
450
0
   SBoxD4(B0, B1, B2, B3);
451
0
   key_xor(20, B0, B1, B2, B3);
452
0
   i_transform(B0, B1, B2, B3);
453
0
   SBoxD3(B0, B1, B2, B3);
454
0
   key_xor(19, B0, B1, B2, B3);
455
0
   i_transform(B0, B1, B2, B3);
456
0
   SBoxD2(B0, B1, B2, B3);
457
0
   key_xor(18, B0, B1, B2, B3);
458
0
   i_transform(B0, B1, B2, B3);
459
0
   SBoxD1(B0, B1, B2, B3);
460
0
   key_xor(17, B0, B1, B2, B3);
461
0
   i_transform(B0, B1, B2, B3);
462
0
   SBoxD0(B0, B1, B2, B3);
463
0
   key_xor(16, B0, B1, B2, B3);
464
465
0
   i_transform(B0, B1, B2, B3);
466
0
   SBoxD7(B0, B1, B2, B3);
467
0
   key_xor(15, B0, B1, B2, B3);
468
0
   i_transform(B0, B1, B2, B3);
469
0
   SBoxD6(B0, B1, B2, B3);
470
0
   key_xor(14, B0, B1, B2, B3);
471
0
   i_transform(B0, B1, B2, B3);
472
0
   SBoxD5(B0, B1, B2, B3);
473
0
   key_xor(13, B0, B1, B2, B3);
474
0
   i_transform(B0, B1, B2, B3);
475
0
   SBoxD4(B0, B1, B2, B3);
476
0
   key_xor(12, B0, B1, B2, B3);
477
0
   i_transform(B0, B1, B2, B3);
478
0
   SBoxD3(B0, B1, B2, B3);
479
0
   key_xor(11, B0, B1, B2, B3);
480
0
   i_transform(B0, B1, B2, B3);
481
0
   SBoxD2(B0, B1, B2, B3);
482
0
   key_xor(10, B0, B1, B2, B3);
483
0
   i_transform(B0, B1, B2, B3);
484
0
   SBoxD1(B0, B1, B2, B3);
485
0
   key_xor(9, B0, B1, B2, B3);
486
0
   i_transform(B0, B1, B2, B3);
487
0
   SBoxD0(B0, B1, B2, B3);
488
0
   key_xor(8, B0, B1, B2, B3);
489
490
0
   i_transform(B0, B1, B2, B3);
491
0
   SBoxD7(B0, B1, B2, B3);
492
0
   key_xor(7, B0, B1, B2, B3);
493
0
   i_transform(B0, B1, B2, B3);
494
0
   SBoxD6(B0, B1, B2, B3);
495
0
   key_xor(6, B0, B1, B2, B3);
496
0
   i_transform(B0, B1, B2, B3);
497
0
   SBoxD5(B0, B1, B2, B3);
498
0
   key_xor(5, B0, B1, B2, B3);
499
0
   i_transform(B0, B1, B2, B3);
500
0
   SBoxD4(B0, B1, B2, B3);
501
0
   key_xor(4, B0, B1, B2, B3);
502
0
   i_transform(B0, B1, B2, B3);
503
0
   SBoxD3(B0, B1, B2, B3);
504
0
   key_xor(3, B0, B1, B2, B3);
505
0
   i_transform(B0, B1, B2, B3);
506
0
   SBoxD2(B0, B1, B2, B3);
507
0
   key_xor(2, B0, B1, B2, B3);
508
0
   i_transform(B0, B1, B2, B3);
509
0
   SBoxD1(B0, B1, B2, B3);
510
0
   key_xor(1, B0, B1, B2, B3);
511
0
   i_transform(B0, B1, B2, B3);
512
0
   SBoxD0(B0, B1, B2, B3);
513
0
   key_xor(0, B0, B1, B2, B3);
514
515
0
   SIMD_16x32::transpose(B0, B1, B2, B3);
516
517
0
   B0.store_le(out);
518
0
   B1.store_le(out + 64);
519
0
   B2.store_le(out + 128);
520
0
   B3.store_le(out + 192);
521
522
0
   SIMD_16x32::zero_registers();
523
0
}
524
525
}  // namespace Botan