/src/botan/src/lib/block/serpent/serpent_avx512/serpent_avx512.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 2023 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/internal/serpent.h> |
8 | | #include <botan/internal/serpent_sbox.h> |
9 | | #include <botan/internal/simd_avx512.h> |
10 | | |
11 | | namespace Botan { |
12 | | |
13 | | BOTAN_FORCE_INLINE |
14 | 0 | void SBoxE0(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
15 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xb9>(b, d, c); |
16 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xe2>(a, b, d); |
17 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x36>(a, b, d); |
18 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x26>(t0, d, b); |
19 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3); |
20 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t1, c, o0); |
21 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xa9>(o0, o1, t2); |
22 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0); |
23 | 0 | a = o0; |
24 | 0 | b = o1; |
25 | 0 | c = o2; |
26 | 0 | d = o3; |
27 | 0 | } |
28 | | |
29 | | BOTAN_FORCE_INLINE |
30 | 0 | void SBoxE1(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
31 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xe5>(d, b, c); |
32 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x26>(c, d, b); |
33 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xa6>(a, b, c); |
34 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x2b>(a, b, d); |
35 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
36 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x83>(t2, d, t0); |
37 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t3, c, o1); |
38 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x65>(o3, o1, t2); |
39 | 0 | a = o0; |
40 | 0 | b = o1; |
41 | 0 | c = o2; |
42 | 0 | d = o3; |
43 | 0 | } |
44 | | |
45 | | BOTAN_FORCE_INLINE |
46 | 0 | void SBoxE2(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
47 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x96>(c, b, d); |
48 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xda>(a, b, c); |
49 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x66>(d, t0, c); |
50 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x94>(a, b, t0); |
51 | 0 | const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xa1>(a, d, t0); |
52 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t2); |
53 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xd2>(t3, d, o0); |
54 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x2d>(t4, b, c); |
55 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x2d>(t1, d, t2); |
56 | 0 | a = o0; |
57 | 0 | b = o1; |
58 | 0 | c = o2; |
59 | 0 | d = o3; |
60 | 0 | } |
61 | | |
62 | | BOTAN_FORCE_INLINE |
63 | 0 | void SBoxE3(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
64 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x92>(d, c, b); |
65 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x3b>(d, b, c); |
66 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xbc>(a, c, t0); |
67 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x68>(t2, d, t1); |
68 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x6e>(a, c, o2); |
69 | 0 | const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xb9>(a, d, t3); |
70 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
71 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x6d>(t4, b, t2); |
72 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x38>(t3, b, t0); |
73 | 0 | a = o0; |
74 | 0 | b = o1; |
75 | 0 | c = o2; |
76 | 0 | d = o3; |
77 | 0 | } |
78 | | |
79 | | BOTAN_FORCE_INLINE |
80 | 0 | void SBoxE4(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
81 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc2>(c, b, d); |
82 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x79>(b, c, d); |
83 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x71>(a, b, d); |
84 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x6b>(a, b, d); |
85 | 0 | const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xc2>(a, t0, t3); |
86 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
87 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0); |
88 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x49>(t3, c, t0); |
89 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd6>(t4, b, t1); |
90 | 0 | a = o0; |
91 | 0 | b = o1; |
92 | 0 | c = o2; |
93 | 0 | d = o3; |
94 | 0 | } |
95 | | |
96 | | BOTAN_FORCE_INLINE |
97 | 0 | void SBoxE5(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
98 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xa9>(b, d, c); |
99 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x93>(b, c, d); |
100 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc3>(a, b, c); |
101 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x27>(a, b, d); |
102 | 0 | const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x85>(a, c, t1); |
103 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
104 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x2d>(t2, d, o0); |
105 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x7a>(t4, b, t0); |
106 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x87>(t3, t0, o0); |
107 | 0 | a = o0; |
108 | 0 | b = o1; |
109 | 0 | c = o2; |
110 | 0 | d = o3; |
111 | 0 | } |
112 | | |
113 | | BOTAN_FORCE_INLINE |
114 | 0 | void SBoxE6(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
115 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x61>(d, c, b); |
116 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9c>(b, d, t0); |
117 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x93>(a, b, d); |
118 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0xb5>(a, b, c); |
119 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
120 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0); |
121 | 0 | const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x7c>(a, b, o1); |
122 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x1e>(t4, d, t0); |
123 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x29>(t3, t0, t1); |
124 | 0 | a = o0; |
125 | 0 | b = o1; |
126 | 0 | c = o2; |
127 | 0 | d = o3; |
128 | 0 | } |
129 | | |
130 | | BOTAN_FORCE_INLINE |
131 | 0 | void SBoxE7(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
132 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x9b>(b, c, d); |
133 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x61>(c, b, d); |
134 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xe3>(a, d, t1); |
135 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x83>(b, c, d); |
136 | 0 | const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x49>(a, b, c); |
137 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
138 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xe1>(t2, b, c); |
139 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd1>(t3, a, t1); |
140 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x87>(t4, d, t2); |
141 | 0 | a = o0; |
142 | 0 | b = o1; |
143 | 0 | c = o2; |
144 | 0 | d = o3; |
145 | 0 | } |
146 | | |
147 | | BOTAN_FORCE_INLINE |
148 | 0 | void SBoxD0(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
149 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x67>(c, d, b); |
150 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x49>(b, d, c); |
151 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
152 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xa9>(a, b, c); |
153 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x3c>(t2, d, t0); |
154 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x4d>(a, b, d); |
155 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t3, c, o0); |
156 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x56>(o3, o0, t2); |
157 | 0 | a = o0; |
158 | 0 | b = o1; |
159 | 0 | c = o2; |
160 | 0 | d = o3; |
161 | 0 | } |
162 | | |
163 | | BOTAN_FORCE_INLINE |
164 | 0 | void SBoxD1(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
165 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x73>(d, b, c); |
166 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x68>(c, d, b); |
167 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc5>(a, b, d); |
168 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x2d>(a, b, d); |
169 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
170 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t2, c, o0); |
171 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd2>(t3, o0, o1); |
172 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x99>(o0, t3, c); |
173 | 0 | a = o0; |
174 | 0 | b = o1; |
175 | 0 | c = o2; |
176 | 0 | d = o3; |
177 | 0 | } |
178 | | |
179 | | BOTAN_FORCE_INLINE |
180 | 0 | void SBoxD2(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
181 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc6>(d, b, c); |
182 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9c>(d, c, b); |
183 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xe1>(a, b, c); |
184 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x87>(t2, d, t0); |
185 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
186 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd1>(t0, a, t1); |
187 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x9b>(a, c, o2); |
188 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x5b>(t3, b, d); |
189 | 0 | a = o0; |
190 | 0 | b = o1; |
191 | 0 | c = o2; |
192 | 0 | d = o3; |
193 | 0 | } |
194 | | |
195 | | BOTAN_FORCE_INLINE |
196 | 0 | void SBoxD3(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
197 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x94>(c, d, b); |
198 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x99>(b, d, t0); |
199 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
200 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x97>(a, b, d); |
201 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t2, c, o0); |
202 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x94>(c, d, t2); |
203 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x0e>(t3, b, t0); |
204 | 0 | const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x1c>(a, b, t0); |
205 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0xb4>(t4, c, d); |
206 | 0 | a = o0; |
207 | 0 | b = o1; |
208 | 0 | c = o2; |
209 | 0 | d = o3; |
210 | 0 | } |
211 | | |
212 | | BOTAN_FORCE_INLINE |
213 | 0 | void SBoxD4(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
214 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xa9>(d, c, b); |
215 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xa6>(d, b, c); |
216 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xb5>(a, b, d); |
217 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x5e>(a, b, d); |
218 | 0 | const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x50>(a, b, t0); |
219 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
220 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t4, c, d); |
221 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t3, c, t4); |
222 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x5a>(t2, c, t0); |
223 | 0 | a = o0; |
224 | 0 | b = o1; |
225 | 0 | c = o2; |
226 | 0 | d = o3; |
227 | 0 | } |
228 | | |
229 | | BOTAN_FORCE_INLINE |
230 | 0 | void SBoxD5(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
231 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc9>(a, b, c); |
232 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x65>(a, b, c); |
233 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x25>(a, b, d); |
234 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x63>(c, d, t0); |
235 | 0 | const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x86>(a, b, t3); |
236 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x87>(t2, c, t0); |
237 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xc3>(t4, c, d); |
238 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x47>(t1, d, t0); |
239 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3); |
240 | 0 | a = o0; |
241 | 0 | b = o1; |
242 | 0 | c = o2; |
243 | 0 | d = o3; |
244 | 0 | } |
245 | | |
246 | | BOTAN_FORCE_INLINE |
247 | 0 | void SBoxD6(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
248 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x07>(d, b, c); |
249 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9e>(c, d, b); |
250 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc6>(a, b, c); |
251 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x34>(a, b, d); |
252 | 0 | const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x2b>(a, c, d); |
253 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1); |
254 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xcb>(t2, d, t0); |
255 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t3, c, t0); |
256 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t4, b, o0); |
257 | 0 | a = o0; |
258 | 0 | b = o1; |
259 | 0 | c = o2; |
260 | 0 | d = o3; |
261 | 0 | } |
262 | | |
263 | | BOTAN_FORCE_INLINE |
264 | 0 | void SBoxD7(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) { |
265 | 0 | const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x67>(b, d, c); |
266 | 0 | const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x3e>(a, c, d); |
267 | 0 | const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x1c>(a, b, d); |
268 | 0 | const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x87>(t0, d, b); |
269 | 0 | const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x7d>(a, b, t1); |
270 | 0 | const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3); |
271 | 0 | const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t1, b, t0); |
272 | 0 | const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd2>(t2, c, t1); |
273 | 0 | const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x6d>(t4, c, d); |
274 | 0 | a = o0; |
275 | 0 | b = o1; |
276 | 0 | c = o2; |
277 | 0 | d = o3; |
278 | 0 | } |
279 | | |
280 | | BOTAN_AVX512_FN |
281 | 0 | void Serpent::avx512_encrypt_16(const uint8_t in[16 * 16], uint8_t out[16 * 16]) const { |
282 | 0 | using namespace Botan::Serpent_F; |
283 | |
|
284 | 0 | SIMD_16x32 B0 = SIMD_16x32::load_le(in); |
285 | 0 | SIMD_16x32 B1 = SIMD_16x32::load_le(in + 64); |
286 | 0 | SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128); |
287 | 0 | SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192); |
288 | |
|
289 | 0 | SIMD_16x32::transpose(B0, B1, B2, B3); |
290 | |
|
291 | 0 | const Key_Inserter key_xor(m_round_key.data()); |
292 | |
|
293 | 0 | key_xor(0, B0, B1, B2, B3); |
294 | 0 | SBoxE0(B0, B1, B2, B3); |
295 | 0 | transform(B0, B1, B2, B3); |
296 | 0 | key_xor(1, B0, B1, B2, B3); |
297 | 0 | SBoxE1(B0, B1, B2, B3); |
298 | 0 | transform(B0, B1, B2, B3); |
299 | 0 | key_xor(2, B0, B1, B2, B3); |
300 | 0 | SBoxE2(B0, B1, B2, B3); |
301 | 0 | transform(B0, B1, B2, B3); |
302 | 0 | key_xor(3, B0, B1, B2, B3); |
303 | 0 | SBoxE3(B0, B1, B2, B3); |
304 | 0 | transform(B0, B1, B2, B3); |
305 | 0 | key_xor(4, B0, B1, B2, B3); |
306 | 0 | SBoxE4(B0, B1, B2, B3); |
307 | 0 | transform(B0, B1, B2, B3); |
308 | 0 | key_xor(5, B0, B1, B2, B3); |
309 | 0 | SBoxE5(B0, B1, B2, B3); |
310 | 0 | transform(B0, B1, B2, B3); |
311 | 0 | key_xor(6, B0, B1, B2, B3); |
312 | 0 | SBoxE6(B0, B1, B2, B3); |
313 | 0 | transform(B0, B1, B2, B3); |
314 | 0 | key_xor(7, B0, B1, B2, B3); |
315 | 0 | SBoxE7(B0, B1, B2, B3); |
316 | 0 | transform(B0, B1, B2, B3); |
317 | |
|
318 | 0 | key_xor(8, B0, B1, B2, B3); |
319 | 0 | SBoxE0(B0, B1, B2, B3); |
320 | 0 | transform(B0, B1, B2, B3); |
321 | 0 | key_xor(9, B0, B1, B2, B3); |
322 | 0 | SBoxE1(B0, B1, B2, B3); |
323 | 0 | transform(B0, B1, B2, B3); |
324 | 0 | key_xor(10, B0, B1, B2, B3); |
325 | 0 | SBoxE2(B0, B1, B2, B3); |
326 | 0 | transform(B0, B1, B2, B3); |
327 | 0 | key_xor(11, B0, B1, B2, B3); |
328 | 0 | SBoxE3(B0, B1, B2, B3); |
329 | 0 | transform(B0, B1, B2, B3); |
330 | 0 | key_xor(12, B0, B1, B2, B3); |
331 | 0 | SBoxE4(B0, B1, B2, B3); |
332 | 0 | transform(B0, B1, B2, B3); |
333 | 0 | key_xor(13, B0, B1, B2, B3); |
334 | 0 | SBoxE5(B0, B1, B2, B3); |
335 | 0 | transform(B0, B1, B2, B3); |
336 | 0 | key_xor(14, B0, B1, B2, B3); |
337 | 0 | SBoxE6(B0, B1, B2, B3); |
338 | 0 | transform(B0, B1, B2, B3); |
339 | 0 | key_xor(15, B0, B1, B2, B3); |
340 | 0 | SBoxE7(B0, B1, B2, B3); |
341 | 0 | transform(B0, B1, B2, B3); |
342 | |
|
343 | 0 | key_xor(16, B0, B1, B2, B3); |
344 | 0 | SBoxE0(B0, B1, B2, B3); |
345 | 0 | transform(B0, B1, B2, B3); |
346 | 0 | key_xor(17, B0, B1, B2, B3); |
347 | 0 | SBoxE1(B0, B1, B2, B3); |
348 | 0 | transform(B0, B1, B2, B3); |
349 | 0 | key_xor(18, B0, B1, B2, B3); |
350 | 0 | SBoxE2(B0, B1, B2, B3); |
351 | 0 | transform(B0, B1, B2, B3); |
352 | 0 | key_xor(19, B0, B1, B2, B3); |
353 | 0 | SBoxE3(B0, B1, B2, B3); |
354 | 0 | transform(B0, B1, B2, B3); |
355 | 0 | key_xor(20, B0, B1, B2, B3); |
356 | 0 | SBoxE4(B0, B1, B2, B3); |
357 | 0 | transform(B0, B1, B2, B3); |
358 | 0 | key_xor(21, B0, B1, B2, B3); |
359 | 0 | SBoxE5(B0, B1, B2, B3); |
360 | 0 | transform(B0, B1, B2, B3); |
361 | 0 | key_xor(22, B0, B1, B2, B3); |
362 | 0 | SBoxE6(B0, B1, B2, B3); |
363 | 0 | transform(B0, B1, B2, B3); |
364 | 0 | key_xor(23, B0, B1, B2, B3); |
365 | 0 | SBoxE7(B0, B1, B2, B3); |
366 | 0 | transform(B0, B1, B2, B3); |
367 | |
|
368 | 0 | key_xor(24, B0, B1, B2, B3); |
369 | 0 | SBoxE0(B0, B1, B2, B3); |
370 | 0 | transform(B0, B1, B2, B3); |
371 | 0 | key_xor(25, B0, B1, B2, B3); |
372 | 0 | SBoxE1(B0, B1, B2, B3); |
373 | 0 | transform(B0, B1, B2, B3); |
374 | 0 | key_xor(26, B0, B1, B2, B3); |
375 | 0 | SBoxE2(B0, B1, B2, B3); |
376 | 0 | transform(B0, B1, B2, B3); |
377 | 0 | key_xor(27, B0, B1, B2, B3); |
378 | 0 | SBoxE3(B0, B1, B2, B3); |
379 | 0 | transform(B0, B1, B2, B3); |
380 | 0 | key_xor(28, B0, B1, B2, B3); |
381 | 0 | SBoxE4(B0, B1, B2, B3); |
382 | 0 | transform(B0, B1, B2, B3); |
383 | 0 | key_xor(29, B0, B1, B2, B3); |
384 | 0 | SBoxE5(B0, B1, B2, B3); |
385 | 0 | transform(B0, B1, B2, B3); |
386 | 0 | key_xor(30, B0, B1, B2, B3); |
387 | 0 | SBoxE6(B0, B1, B2, B3); |
388 | 0 | transform(B0, B1, B2, B3); |
389 | 0 | key_xor(31, B0, B1, B2, B3); |
390 | 0 | SBoxE7(B0, B1, B2, B3); |
391 | 0 | key_xor(32, B0, B1, B2, B3); |
392 | |
|
393 | 0 | SIMD_16x32::transpose(B0, B1, B2, B3); |
394 | 0 | B0.store_le(out); |
395 | 0 | B1.store_le(out + 64); |
396 | 0 | B2.store_le(out + 128); |
397 | 0 | B3.store_le(out + 192); |
398 | |
|
399 | 0 | SIMD_16x32::zero_registers(); |
400 | 0 | } |
401 | | |
402 | | BOTAN_AVX512_FN |
403 | 0 | void Serpent::avx512_decrypt_16(const uint8_t in[16 * 16], uint8_t out[16 * 16]) const { |
404 | 0 | using namespace Botan::Serpent_F; |
405 | |
|
406 | 0 | SIMD_16x32 B0 = SIMD_16x32::load_le(in); |
407 | 0 | SIMD_16x32 B1 = SIMD_16x32::load_le(in + 64); |
408 | 0 | SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128); |
409 | 0 | SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192); |
410 | |
|
411 | 0 | SIMD_16x32::transpose(B0, B1, B2, B3); |
412 | |
|
413 | 0 | const Key_Inserter key_xor(m_round_key.data()); |
414 | |
|
415 | 0 | key_xor(32, B0, B1, B2, B3); |
416 | 0 | SBoxD7(B0, B1, B2, B3); |
417 | 0 | key_xor(31, B0, B1, B2, B3); |
418 | 0 | i_transform(B0, B1, B2, B3); |
419 | 0 | SBoxD6(B0, B1, B2, B3); |
420 | 0 | key_xor(30, B0, B1, B2, B3); |
421 | 0 | i_transform(B0, B1, B2, B3); |
422 | 0 | SBoxD5(B0, B1, B2, B3); |
423 | 0 | key_xor(29, B0, B1, B2, B3); |
424 | 0 | i_transform(B0, B1, B2, B3); |
425 | 0 | SBoxD4(B0, B1, B2, B3); |
426 | 0 | key_xor(28, B0, B1, B2, B3); |
427 | 0 | i_transform(B0, B1, B2, B3); |
428 | 0 | SBoxD3(B0, B1, B2, B3); |
429 | 0 | key_xor(27, B0, B1, B2, B3); |
430 | 0 | i_transform(B0, B1, B2, B3); |
431 | 0 | SBoxD2(B0, B1, B2, B3); |
432 | 0 | key_xor(26, B0, B1, B2, B3); |
433 | 0 | i_transform(B0, B1, B2, B3); |
434 | 0 | SBoxD1(B0, B1, B2, B3); |
435 | 0 | key_xor(25, B0, B1, B2, B3); |
436 | 0 | i_transform(B0, B1, B2, B3); |
437 | 0 | SBoxD0(B0, B1, B2, B3); |
438 | 0 | key_xor(24, B0, B1, B2, B3); |
439 | |
|
440 | 0 | i_transform(B0, B1, B2, B3); |
441 | 0 | SBoxD7(B0, B1, B2, B3); |
442 | 0 | key_xor(23, B0, B1, B2, B3); |
443 | 0 | i_transform(B0, B1, B2, B3); |
444 | 0 | SBoxD6(B0, B1, B2, B3); |
445 | 0 | key_xor(22, B0, B1, B2, B3); |
446 | 0 | i_transform(B0, B1, B2, B3); |
447 | 0 | SBoxD5(B0, B1, B2, B3); |
448 | 0 | key_xor(21, B0, B1, B2, B3); |
449 | 0 | i_transform(B0, B1, B2, B3); |
450 | 0 | SBoxD4(B0, B1, B2, B3); |
451 | 0 | key_xor(20, B0, B1, B2, B3); |
452 | 0 | i_transform(B0, B1, B2, B3); |
453 | 0 | SBoxD3(B0, B1, B2, B3); |
454 | 0 | key_xor(19, B0, B1, B2, B3); |
455 | 0 | i_transform(B0, B1, B2, B3); |
456 | 0 | SBoxD2(B0, B1, B2, B3); |
457 | 0 | key_xor(18, B0, B1, B2, B3); |
458 | 0 | i_transform(B0, B1, B2, B3); |
459 | 0 | SBoxD1(B0, B1, B2, B3); |
460 | 0 | key_xor(17, B0, B1, B2, B3); |
461 | 0 | i_transform(B0, B1, B2, B3); |
462 | 0 | SBoxD0(B0, B1, B2, B3); |
463 | 0 | key_xor(16, B0, B1, B2, B3); |
464 | |
|
465 | 0 | i_transform(B0, B1, B2, B3); |
466 | 0 | SBoxD7(B0, B1, B2, B3); |
467 | 0 | key_xor(15, B0, B1, B2, B3); |
468 | 0 | i_transform(B0, B1, B2, B3); |
469 | 0 | SBoxD6(B0, B1, B2, B3); |
470 | 0 | key_xor(14, B0, B1, B2, B3); |
471 | 0 | i_transform(B0, B1, B2, B3); |
472 | 0 | SBoxD5(B0, B1, B2, B3); |
473 | 0 | key_xor(13, B0, B1, B2, B3); |
474 | 0 | i_transform(B0, B1, B2, B3); |
475 | 0 | SBoxD4(B0, B1, B2, B3); |
476 | 0 | key_xor(12, B0, B1, B2, B3); |
477 | 0 | i_transform(B0, B1, B2, B3); |
478 | 0 | SBoxD3(B0, B1, B2, B3); |
479 | 0 | key_xor(11, B0, B1, B2, B3); |
480 | 0 | i_transform(B0, B1, B2, B3); |
481 | 0 | SBoxD2(B0, B1, B2, B3); |
482 | 0 | key_xor(10, B0, B1, B2, B3); |
483 | 0 | i_transform(B0, B1, B2, B3); |
484 | 0 | SBoxD1(B0, B1, B2, B3); |
485 | 0 | key_xor(9, B0, B1, B2, B3); |
486 | 0 | i_transform(B0, B1, B2, B3); |
487 | 0 | SBoxD0(B0, B1, B2, B3); |
488 | 0 | key_xor(8, B0, B1, B2, B3); |
489 | |
|
490 | 0 | i_transform(B0, B1, B2, B3); |
491 | 0 | SBoxD7(B0, B1, B2, B3); |
492 | 0 | key_xor(7, B0, B1, B2, B3); |
493 | 0 | i_transform(B0, B1, B2, B3); |
494 | 0 | SBoxD6(B0, B1, B2, B3); |
495 | 0 | key_xor(6, B0, B1, B2, B3); |
496 | 0 | i_transform(B0, B1, B2, B3); |
497 | 0 | SBoxD5(B0, B1, B2, B3); |
498 | 0 | key_xor(5, B0, B1, B2, B3); |
499 | 0 | i_transform(B0, B1, B2, B3); |
500 | 0 | SBoxD4(B0, B1, B2, B3); |
501 | 0 | key_xor(4, B0, B1, B2, B3); |
502 | 0 | i_transform(B0, B1, B2, B3); |
503 | 0 | SBoxD3(B0, B1, B2, B3); |
504 | 0 | key_xor(3, B0, B1, B2, B3); |
505 | 0 | i_transform(B0, B1, B2, B3); |
506 | 0 | SBoxD2(B0, B1, B2, B3); |
507 | 0 | key_xor(2, B0, B1, B2, B3); |
508 | 0 | i_transform(B0, B1, B2, B3); |
509 | 0 | SBoxD1(B0, B1, B2, B3); |
510 | 0 | key_xor(1, B0, B1, B2, B3); |
511 | 0 | i_transform(B0, B1, B2, B3); |
512 | 0 | SBoxD0(B0, B1, B2, B3); |
513 | 0 | key_xor(0, B0, B1, B2, B3); |
514 | |
|
515 | 0 | SIMD_16x32::transpose(B0, B1, B2, B3); |
516 | |
|
517 | 0 | B0.store_le(out); |
518 | 0 | B1.store_le(out + 64); |
519 | 0 | B2.store_le(out + 128); |
520 | 0 | B3.store_le(out + 192); |
521 | |
|
522 | 0 | SIMD_16x32::zero_registers(); |
523 | 0 | } |
524 | | |
525 | | } // namespace Botan |