/src/botan/src/lib/block/aes/aes_vaes/aes_vaes.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 2024 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/internal/aes.h> |
8 | | |
9 | | #include <botan/internal/isa_extn.h> |
10 | | #include <botan/internal/loadstor.h> |
11 | | #include <botan/internal/simd_avx2.h> |
12 | | #include <wmmintrin.h> |
13 | | |
14 | | namespace Botan { |
15 | | |
16 | | namespace { |
17 | | |
18 | 0 | BOTAN_FORCE_INLINE void keyxor(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) { |
19 | 0 | B0 ^= K; |
20 | 0 | B1 ^= K; |
21 | 0 | B2 ^= K; |
22 | 0 | B3 ^= K; |
23 | 0 | } |
24 | | |
25 | 0 | BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenc(SIMD_8x32 K, SIMD_8x32& B) { |
26 | 0 | B = SIMD_8x32(_mm256_aesenc_epi128(B.raw(), K.raw())); |
27 | 0 | } |
28 | | |
29 | | BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenc( |
30 | 0 | SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) { |
31 | 0 | B0 = SIMD_8x32(_mm256_aesenc_epi128(B0.raw(), K.raw())); |
32 | 0 | B1 = SIMD_8x32(_mm256_aesenc_epi128(B1.raw(), K.raw())); |
33 | 0 | B2 = SIMD_8x32(_mm256_aesenc_epi128(B2.raw(), K.raw())); |
34 | 0 | B3 = SIMD_8x32(_mm256_aesenc_epi128(B3.raw(), K.raw())); |
35 | 0 | } |
36 | | |
37 | 0 | BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenclast(SIMD_8x32 K, SIMD_8x32& B) { |
38 | 0 | B = SIMD_8x32(_mm256_aesenclast_epi128(B.raw(), K.raw())); |
39 | 0 | } |
40 | | |
41 | | BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenclast( |
42 | 0 | SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) { |
43 | 0 | B0 = SIMD_8x32(_mm256_aesenclast_epi128(B0.raw(), K.raw())); |
44 | 0 | B1 = SIMD_8x32(_mm256_aesenclast_epi128(B1.raw(), K.raw())); |
45 | 0 | B2 = SIMD_8x32(_mm256_aesenclast_epi128(B2.raw(), K.raw())); |
46 | 0 | B3 = SIMD_8x32(_mm256_aesenclast_epi128(B3.raw(), K.raw())); |
47 | 0 | } |
48 | | |
49 | 0 | BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdec(SIMD_8x32 K, SIMD_8x32& B) { |
50 | 0 | B = SIMD_8x32(_mm256_aesdec_epi128(B.raw(), K.raw())); |
51 | 0 | } |
52 | | |
53 | | BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdec( |
54 | 0 | SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) { |
55 | 0 | B0 = SIMD_8x32(_mm256_aesdec_epi128(B0.raw(), K.raw())); |
56 | 0 | B1 = SIMD_8x32(_mm256_aesdec_epi128(B1.raw(), K.raw())); |
57 | 0 | B2 = SIMD_8x32(_mm256_aesdec_epi128(B2.raw(), K.raw())); |
58 | 0 | B3 = SIMD_8x32(_mm256_aesdec_epi128(B3.raw(), K.raw())); |
59 | 0 | } |
60 | | |
61 | 0 | BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdeclast(SIMD_8x32 K, SIMD_8x32& B) { |
62 | 0 | B = SIMD_8x32(_mm256_aesdeclast_epi128(B.raw(), K.raw())); |
63 | 0 | } |
64 | | |
65 | | BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdeclast( |
66 | 0 | SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) { |
67 | 0 | B0 = SIMD_8x32(_mm256_aesdeclast_epi128(B0.raw(), K.raw())); |
68 | 0 | B1 = SIMD_8x32(_mm256_aesdeclast_epi128(B1.raw(), K.raw())); |
69 | 0 | B2 = SIMD_8x32(_mm256_aesdeclast_epi128(B2.raw(), K.raw())); |
70 | 0 | B3 = SIMD_8x32(_mm256_aesdeclast_epi128(B3.raw(), K.raw())); |
71 | 0 | } |
72 | | |
73 | | } // namespace |
74 | | |
75 | | /* |
76 | | * AES-128 Encryption |
77 | | */ |
78 | 0 | BOTAN_FN_ISA_AVX2_VAES void AES_128::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
79 | 0 | const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]); |
80 | 0 | const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]); |
81 | 0 | const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]); |
82 | 0 | const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]); |
83 | 0 | const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]); |
84 | 0 | const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]); |
85 | 0 | const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]); |
86 | 0 | const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]); |
87 | 0 | const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]); |
88 | 0 | const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]); |
89 | 0 | const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]); |
90 | |
|
91 | 0 | while(blocks >= 8) { |
92 | 0 | SIMD_8x32 B0 = SIMD_8x32::load_le(in); |
93 | 0 | SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2); |
94 | 0 | SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4); |
95 | 0 | SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6); |
96 | |
|
97 | 0 | keyxor(K0, B0, B1, B2, B3); |
98 | 0 | aesenc(K1, B0, B1, B2, B3); |
99 | 0 | aesenc(K2, B0, B1, B2, B3); |
100 | 0 | aesenc(K3, B0, B1, B2, B3); |
101 | 0 | aesenc(K4, B0, B1, B2, B3); |
102 | 0 | aesenc(K5, B0, B1, B2, B3); |
103 | 0 | aesenc(K6, B0, B1, B2, B3); |
104 | 0 | aesenc(K7, B0, B1, B2, B3); |
105 | 0 | aesenc(K8, B0, B1, B2, B3); |
106 | 0 | aesenc(K9, B0, B1, B2, B3); |
107 | 0 | aesenclast(K10, B0, B1, B2, B3); |
108 | |
|
109 | 0 | B0.store_le(out); |
110 | 0 | B1.store_le(out + 16 * 2); |
111 | 0 | B2.store_le(out + 16 * 4); |
112 | 0 | B3.store_le(out + 16 * 6); |
113 | |
|
114 | 0 | blocks -= 8; |
115 | 0 | in += 8 * 16; |
116 | 0 | out += 8 * 16; |
117 | 0 | } |
118 | |
|
119 | 0 | while(blocks >= 2) { |
120 | 0 | SIMD_8x32 B = SIMD_8x32::load_le(in); |
121 | |
|
122 | 0 | B ^= K0; |
123 | 0 | aesenc(K1, B); |
124 | 0 | aesenc(K2, B); |
125 | 0 | aesenc(K3, B); |
126 | 0 | aesenc(K4, B); |
127 | 0 | aesenc(K5, B); |
128 | 0 | aesenc(K6, B); |
129 | 0 | aesenc(K7, B); |
130 | 0 | aesenc(K8, B); |
131 | 0 | aesenc(K9, B); |
132 | 0 | aesenclast(K10, B); |
133 | |
|
134 | 0 | B.store_le(out); |
135 | |
|
136 | 0 | in += 2 * 16; |
137 | 0 | out += 2 * 16; |
138 | 0 | blocks -= 2; |
139 | 0 | } |
140 | |
|
141 | 0 | if(blocks > 0) { |
142 | 0 | SIMD_8x32 B = SIMD_8x32::load_le128(in); |
143 | |
|
144 | 0 | B ^= K0; |
145 | 0 | aesenc(K1, B); |
146 | 0 | aesenc(K2, B); |
147 | 0 | aesenc(K3, B); |
148 | 0 | aesenc(K4, B); |
149 | 0 | aesenc(K5, B); |
150 | 0 | aesenc(K6, B); |
151 | 0 | aesenc(K7, B); |
152 | 0 | aesenc(K8, B); |
153 | 0 | aesenc(K9, B); |
154 | 0 | aesenclast(K10, B); |
155 | |
|
156 | 0 | B.store_le128(out); |
157 | 0 | } |
158 | 0 | } |
159 | | |
160 | | /* |
161 | | * AES-128 Decryption |
162 | | */ |
163 | 0 | BOTAN_FN_ISA_AVX2_VAES void AES_128::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
164 | 0 | const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]); |
165 | 0 | const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]); |
166 | 0 | const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]); |
167 | 0 | const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]); |
168 | 0 | const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]); |
169 | 0 | const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]); |
170 | 0 | const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]); |
171 | 0 | const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]); |
172 | 0 | const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]); |
173 | 0 | const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]); |
174 | 0 | const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]); |
175 | |
|
176 | 0 | while(blocks >= 8) { |
177 | 0 | SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0); |
178 | 0 | SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2); |
179 | 0 | SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4); |
180 | 0 | SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6); |
181 | |
|
182 | 0 | keyxor(K0, B0, B1, B2, B3); |
183 | 0 | aesdec(K1, B0, B1, B2, B3); |
184 | 0 | aesdec(K2, B0, B1, B2, B3); |
185 | 0 | aesdec(K3, B0, B1, B2, B3); |
186 | 0 | aesdec(K4, B0, B1, B2, B3); |
187 | 0 | aesdec(K5, B0, B1, B2, B3); |
188 | 0 | aesdec(K6, B0, B1, B2, B3); |
189 | 0 | aesdec(K7, B0, B1, B2, B3); |
190 | 0 | aesdec(K8, B0, B1, B2, B3); |
191 | 0 | aesdec(K9, B0, B1, B2, B3); |
192 | 0 | aesdeclast(K10, B0, B1, B2, B3); |
193 | |
|
194 | 0 | B0.store_le(out + 16 * 0); |
195 | 0 | B1.store_le(out + 16 * 2); |
196 | 0 | B2.store_le(out + 16 * 4); |
197 | 0 | B3.store_le(out + 16 * 6); |
198 | |
|
199 | 0 | blocks -= 8; |
200 | 0 | in += 8 * 16; |
201 | 0 | out += 8 * 16; |
202 | 0 | } |
203 | |
|
204 | 0 | while(blocks >= 2) { |
205 | 0 | SIMD_8x32 B = SIMD_8x32::load_le(in); |
206 | |
|
207 | 0 | B ^= K0; |
208 | 0 | aesdec(K1, B); |
209 | 0 | aesdec(K2, B); |
210 | 0 | aesdec(K3, B); |
211 | 0 | aesdec(K4, B); |
212 | 0 | aesdec(K5, B); |
213 | 0 | aesdec(K6, B); |
214 | 0 | aesdec(K7, B); |
215 | 0 | aesdec(K8, B); |
216 | 0 | aesdec(K9, B); |
217 | 0 | aesdeclast(K10, B); |
218 | |
|
219 | 0 | B.store_le(out); |
220 | |
|
221 | 0 | in += 2 * 16; |
222 | 0 | out += 2 * 16; |
223 | 0 | blocks -= 2; |
224 | 0 | } |
225 | |
|
226 | 0 | if(blocks > 0) { |
227 | 0 | SIMD_8x32 B = SIMD_8x32::load_le128(in); |
228 | |
|
229 | 0 | B ^= K0; |
230 | 0 | aesdec(K1, B); |
231 | 0 | aesdec(K2, B); |
232 | 0 | aesdec(K3, B); |
233 | 0 | aesdec(K4, B); |
234 | 0 | aesdec(K5, B); |
235 | 0 | aesdec(K6, B); |
236 | 0 | aesdec(K7, B); |
237 | 0 | aesdec(K8, B); |
238 | 0 | aesdec(K9, B); |
239 | 0 | aesdeclast(K10, B); |
240 | |
|
241 | 0 | B.store_le128(out); |
242 | 0 | } |
243 | 0 | } |
244 | | |
245 | | /* |
246 | | * AES-192 Encryption |
247 | | */ |
248 | 0 | BOTAN_FN_ISA_AVX2_VAES void AES_192::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
249 | 0 | const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]); |
250 | 0 | const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]); |
251 | 0 | const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]); |
252 | 0 | const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]); |
253 | 0 | const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]); |
254 | 0 | const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]); |
255 | 0 | const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]); |
256 | 0 | const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]); |
257 | 0 | const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]); |
258 | 0 | const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]); |
259 | 0 | const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]); |
260 | 0 | const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]); |
261 | 0 | const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]); |
262 | |
|
263 | 0 | while(blocks >= 8) { |
264 | 0 | SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0); |
265 | 0 | SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2); |
266 | 0 | SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4); |
267 | 0 | SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6); |
268 | |
|
269 | 0 | keyxor(K0, B0, B1, B2, B3); |
270 | 0 | aesenc(K1, B0, B1, B2, B3); |
271 | 0 | aesenc(K2, B0, B1, B2, B3); |
272 | 0 | aesenc(K3, B0, B1, B2, B3); |
273 | 0 | aesenc(K4, B0, B1, B2, B3); |
274 | 0 | aesenc(K5, B0, B1, B2, B3); |
275 | 0 | aesenc(K6, B0, B1, B2, B3); |
276 | 0 | aesenc(K7, B0, B1, B2, B3); |
277 | 0 | aesenc(K8, B0, B1, B2, B3); |
278 | 0 | aesenc(K9, B0, B1, B2, B3); |
279 | 0 | aesenc(K10, B0, B1, B2, B3); |
280 | 0 | aesenc(K11, B0, B1, B2, B3); |
281 | 0 | aesenclast(K12, B0, B1, B2, B3); |
282 | |
|
283 | 0 | B0.store_le(out + 16 * 0); |
284 | 0 | B1.store_le(out + 16 * 2); |
285 | 0 | B2.store_le(out + 16 * 4); |
286 | 0 | B3.store_le(out + 16 * 6); |
287 | |
|
288 | 0 | blocks -= 8; |
289 | 0 | in += 8 * 16; |
290 | 0 | out += 8 * 16; |
291 | 0 | } |
292 | |
|
293 | 0 | while(blocks >= 2) { |
294 | 0 | SIMD_8x32 B = SIMD_8x32::load_le(in); |
295 | |
|
296 | 0 | B ^= K0; |
297 | 0 | aesenc(K1, B); |
298 | 0 | aesenc(K2, B); |
299 | 0 | aesenc(K3, B); |
300 | 0 | aesenc(K4, B); |
301 | 0 | aesenc(K5, B); |
302 | 0 | aesenc(K6, B); |
303 | 0 | aesenc(K7, B); |
304 | 0 | aesenc(K8, B); |
305 | 0 | aesenc(K9, B); |
306 | 0 | aesenc(K10, B); |
307 | 0 | aesenc(K11, B); |
308 | 0 | aesenclast(K12, B); |
309 | |
|
310 | 0 | B.store_le(out); |
311 | |
|
312 | 0 | in += 2 * 16; |
313 | 0 | out += 2 * 16; |
314 | 0 | blocks -= 2; |
315 | 0 | } |
316 | |
|
317 | 0 | if(blocks > 0) { |
318 | 0 | SIMD_8x32 B = SIMD_8x32::load_le128(in); |
319 | |
|
320 | 0 | B ^= K0; |
321 | 0 | aesenc(K1, B); |
322 | 0 | aesenc(K2, B); |
323 | 0 | aesenc(K3, B); |
324 | 0 | aesenc(K4, B); |
325 | 0 | aesenc(K5, B); |
326 | 0 | aesenc(K6, B); |
327 | 0 | aesenc(K7, B); |
328 | 0 | aesenc(K8, B); |
329 | 0 | aesenc(K9, B); |
330 | 0 | aesenc(K10, B); |
331 | 0 | aesenc(K11, B); |
332 | 0 | aesenclast(K12, B); |
333 | |
|
334 | 0 | B.store_le128(out); |
335 | 0 | } |
336 | 0 | } |
337 | | |
338 | | /* |
339 | | * AES-192 Decryption |
340 | | */ |
341 | 0 | BOTAN_FN_ISA_AVX2_VAES void AES_192::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
342 | 0 | const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]); |
343 | 0 | const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]); |
344 | 0 | const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]); |
345 | 0 | const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]); |
346 | 0 | const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]); |
347 | 0 | const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]); |
348 | 0 | const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]); |
349 | 0 | const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]); |
350 | 0 | const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]); |
351 | 0 | const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]); |
352 | 0 | const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]); |
353 | 0 | const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]); |
354 | 0 | const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]); |
355 | |
|
356 | 0 | while(blocks >= 8) { |
357 | 0 | SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0); |
358 | 0 | SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2); |
359 | 0 | SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4); |
360 | 0 | SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6); |
361 | |
|
362 | 0 | keyxor(K0, B0, B1, B2, B3); |
363 | 0 | aesdec(K1, B0, B1, B2, B3); |
364 | 0 | aesdec(K2, B0, B1, B2, B3); |
365 | 0 | aesdec(K3, B0, B1, B2, B3); |
366 | 0 | aesdec(K4, B0, B1, B2, B3); |
367 | 0 | aesdec(K5, B0, B1, B2, B3); |
368 | 0 | aesdec(K6, B0, B1, B2, B3); |
369 | 0 | aesdec(K7, B0, B1, B2, B3); |
370 | 0 | aesdec(K8, B0, B1, B2, B3); |
371 | 0 | aesdec(K9, B0, B1, B2, B3); |
372 | 0 | aesdec(K10, B0, B1, B2, B3); |
373 | 0 | aesdec(K11, B0, B1, B2, B3); |
374 | 0 | aesdeclast(K12, B0, B1, B2, B3); |
375 | |
|
376 | 0 | B0.store_le(out + 16 * 0); |
377 | 0 | B1.store_le(out + 16 * 2); |
378 | 0 | B2.store_le(out + 16 * 4); |
379 | 0 | B3.store_le(out + 16 * 6); |
380 | |
|
381 | 0 | blocks -= 8; |
382 | 0 | in += 8 * 16; |
383 | 0 | out += 8 * 16; |
384 | 0 | } |
385 | |
|
386 | 0 | while(blocks >= 2) { |
387 | 0 | SIMD_8x32 B = SIMD_8x32::load_le(in); |
388 | |
|
389 | 0 | B ^= K0; |
390 | 0 | aesdec(K1, B); |
391 | 0 | aesdec(K2, B); |
392 | 0 | aesdec(K3, B); |
393 | 0 | aesdec(K4, B); |
394 | 0 | aesdec(K5, B); |
395 | 0 | aesdec(K6, B); |
396 | 0 | aesdec(K7, B); |
397 | 0 | aesdec(K8, B); |
398 | 0 | aesdec(K9, B); |
399 | 0 | aesdec(K10, B); |
400 | 0 | aesdec(K11, B); |
401 | 0 | aesdeclast(K12, B); |
402 | |
|
403 | 0 | B.store_le(out); |
404 | |
|
405 | 0 | in += 2 * 16; |
406 | 0 | out += 2 * 16; |
407 | 0 | blocks -= 2; |
408 | 0 | } |
409 | |
|
410 | 0 | if(blocks > 0) { |
411 | 0 | SIMD_8x32 B = SIMD_8x32::load_le128(in); |
412 | |
|
413 | 0 | B ^= K0; |
414 | 0 | aesdec(K1, B); |
415 | 0 | aesdec(K2, B); |
416 | 0 | aesdec(K3, B); |
417 | 0 | aesdec(K4, B); |
418 | 0 | aesdec(K5, B); |
419 | 0 | aesdec(K6, B); |
420 | 0 | aesdec(K7, B); |
421 | 0 | aesdec(K8, B); |
422 | 0 | aesdec(K9, B); |
423 | 0 | aesdec(K10, B); |
424 | 0 | aesdec(K11, B); |
425 | 0 | aesdeclast(K12, B); |
426 | |
|
427 | 0 | B.store_le128(out); |
428 | 0 | } |
429 | 0 | } |
430 | | |
431 | 0 | BOTAN_FN_ISA_AVX2_VAES void AES_256::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
432 | 0 | const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]); |
433 | 0 | const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]); |
434 | 0 | const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]); |
435 | 0 | const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]); |
436 | 0 | const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]); |
437 | 0 | const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]); |
438 | 0 | const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]); |
439 | 0 | const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]); |
440 | 0 | const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]); |
441 | 0 | const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]); |
442 | 0 | const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]); |
443 | 0 | const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]); |
444 | 0 | const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]); |
445 | 0 | const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_EK[4 * 13]); |
446 | 0 | const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_EK[4 * 14]); |
447 | |
|
448 | 0 | while(blocks >= 8) { |
449 | 0 | SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0); |
450 | 0 | SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2); |
451 | 0 | SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4); |
452 | 0 | SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6); |
453 | |
|
454 | 0 | keyxor(K0, B0, B1, B2, B3); |
455 | 0 | aesenc(K1, B0, B1, B2, B3); |
456 | 0 | aesenc(K2, B0, B1, B2, B3); |
457 | 0 | aesenc(K3, B0, B1, B2, B3); |
458 | 0 | aesenc(K4, B0, B1, B2, B3); |
459 | 0 | aesenc(K5, B0, B1, B2, B3); |
460 | 0 | aesenc(K6, B0, B1, B2, B3); |
461 | 0 | aesenc(K7, B0, B1, B2, B3); |
462 | 0 | aesenc(K8, B0, B1, B2, B3); |
463 | 0 | aesenc(K9, B0, B1, B2, B3); |
464 | 0 | aesenc(K10, B0, B1, B2, B3); |
465 | 0 | aesenc(K11, B0, B1, B2, B3); |
466 | 0 | aesenc(K12, B0, B1, B2, B3); |
467 | 0 | aesenc(K13, B0, B1, B2, B3); |
468 | 0 | aesenclast(K14, B0, B1, B2, B3); |
469 | |
|
470 | 0 | B0.store_le(out + 16 * 0); |
471 | 0 | B1.store_le(out + 16 * 2); |
472 | 0 | B2.store_le(out + 16 * 4); |
473 | 0 | B3.store_le(out + 16 * 6); |
474 | |
|
475 | 0 | blocks -= 8; |
476 | 0 | in += 8 * 16; |
477 | 0 | out += 8 * 16; |
478 | 0 | } |
479 | |
|
480 | 0 | while(blocks >= 2) { |
481 | 0 | SIMD_8x32 B = SIMD_8x32::load_le(in); |
482 | |
|
483 | 0 | B ^= K0; |
484 | 0 | aesenc(K1, B); |
485 | 0 | aesenc(K2, B); |
486 | 0 | aesenc(K3, B); |
487 | 0 | aesenc(K4, B); |
488 | 0 | aesenc(K5, B); |
489 | 0 | aesenc(K6, B); |
490 | 0 | aesenc(K7, B); |
491 | 0 | aesenc(K8, B); |
492 | 0 | aesenc(K9, B); |
493 | 0 | aesenc(K10, B); |
494 | 0 | aesenc(K11, B); |
495 | 0 | aesenc(K12, B); |
496 | 0 | aesenc(K13, B); |
497 | 0 | aesenclast(K14, B); |
498 | |
|
499 | 0 | B.store_le(out); |
500 | |
|
501 | 0 | in += 2 * 16; |
502 | 0 | out += 2 * 16; |
503 | 0 | blocks -= 2; |
504 | 0 | } |
505 | |
|
506 | 0 | if(blocks > 0) { |
507 | 0 | SIMD_8x32 B = SIMD_8x32::load_le128(in); |
508 | |
|
509 | 0 | B ^= K0; |
510 | 0 | aesenc(K1, B); |
511 | 0 | aesenc(K2, B); |
512 | 0 | aesenc(K3, B); |
513 | 0 | aesenc(K4, B); |
514 | 0 | aesenc(K5, B); |
515 | 0 | aesenc(K6, B); |
516 | 0 | aesenc(K7, B); |
517 | 0 | aesenc(K8, B); |
518 | 0 | aesenc(K9, B); |
519 | 0 | aesenc(K10, B); |
520 | 0 | aesenc(K11, B); |
521 | 0 | aesenc(K12, B); |
522 | 0 | aesenc(K13, B); |
523 | 0 | aesenclast(K14, B); |
524 | |
|
525 | 0 | B.store_le128(out); |
526 | 0 | } |
527 | 0 | } |
528 | | |
529 | | /* |
530 | | * AES-256 Decryption |
531 | | */ |
532 | 0 | BOTAN_FN_ISA_AVX2_VAES void AES_256::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
533 | 0 | const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]); |
534 | 0 | const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]); |
535 | 0 | const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]); |
536 | 0 | const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]); |
537 | 0 | const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]); |
538 | 0 | const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]); |
539 | 0 | const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]); |
540 | 0 | const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]); |
541 | 0 | const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]); |
542 | 0 | const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]); |
543 | 0 | const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]); |
544 | 0 | const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]); |
545 | 0 | const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]); |
546 | 0 | const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_DK[4 * 13]); |
547 | 0 | const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_DK[4 * 14]); |
548 | |
|
549 | 0 | while(blocks >= 8) { |
550 | 0 | SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0); |
551 | 0 | SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2); |
552 | 0 | SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4); |
553 | 0 | SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6); |
554 | |
|
555 | 0 | keyxor(K0, B0, B1, B2, B3); |
556 | 0 | aesdec(K1, B0, B1, B2, B3); |
557 | 0 | aesdec(K2, B0, B1, B2, B3); |
558 | 0 | aesdec(K3, B0, B1, B2, B3); |
559 | 0 | aesdec(K4, B0, B1, B2, B3); |
560 | 0 | aesdec(K5, B0, B1, B2, B3); |
561 | 0 | aesdec(K6, B0, B1, B2, B3); |
562 | 0 | aesdec(K7, B0, B1, B2, B3); |
563 | 0 | aesdec(K8, B0, B1, B2, B3); |
564 | 0 | aesdec(K9, B0, B1, B2, B3); |
565 | 0 | aesdec(K10, B0, B1, B2, B3); |
566 | 0 | aesdec(K11, B0, B1, B2, B3); |
567 | 0 | aesdec(K12, B0, B1, B2, B3); |
568 | 0 | aesdec(K13, B0, B1, B2, B3); |
569 | 0 | aesdeclast(K14, B0, B1, B2, B3); |
570 | |
|
571 | 0 | B0.store_le(out + 16 * 0); |
572 | 0 | B1.store_le(out + 16 * 2); |
573 | 0 | B2.store_le(out + 16 * 4); |
574 | 0 | B3.store_le(out + 16 * 6); |
575 | |
|
576 | 0 | blocks -= 8; |
577 | 0 | in += 8 * 16; |
578 | 0 | out += 8 * 16; |
579 | 0 | } |
580 | |
|
581 | 0 | while(blocks >= 2) { |
582 | 0 | SIMD_8x32 B = SIMD_8x32::load_le(in); |
583 | |
|
584 | 0 | B ^= K0; |
585 | 0 | aesdec(K1, B); |
586 | 0 | aesdec(K2, B); |
587 | 0 | aesdec(K3, B); |
588 | 0 | aesdec(K4, B); |
589 | 0 | aesdec(K5, B); |
590 | 0 | aesdec(K6, B); |
591 | 0 | aesdec(K7, B); |
592 | 0 | aesdec(K8, B); |
593 | 0 | aesdec(K9, B); |
594 | 0 | aesdec(K10, B); |
595 | 0 | aesdec(K11, B); |
596 | 0 | aesdec(K12, B); |
597 | 0 | aesdec(K13, B); |
598 | 0 | aesdeclast(K14, B); |
599 | |
|
600 | 0 | B.store_le(out); |
601 | |
|
602 | 0 | in += 2 * 16; |
603 | 0 | out += 2 * 16; |
604 | 0 | blocks -= 2; |
605 | 0 | } |
606 | |
|
607 | 0 | if(blocks > 0) { |
608 | 0 | SIMD_8x32 B = SIMD_8x32::load_le128(in); |
609 | |
|
610 | 0 | B ^= K0; |
611 | 0 | aesdec(K1, B); |
612 | 0 | aesdec(K2, B); |
613 | 0 | aesdec(K3, B); |
614 | 0 | aesdec(K4, B); |
615 | 0 | aesdec(K5, B); |
616 | 0 | aesdec(K6, B); |
617 | 0 | aesdec(K7, B); |
618 | 0 | aesdec(K8, B); |
619 | 0 | aesdec(K9, B); |
620 | 0 | aesdec(K10, B); |
621 | 0 | aesdec(K11, B); |
622 | 0 | aesdec(K12, B); |
623 | 0 | aesdec(K13, B); |
624 | 0 | aesdeclast(K14, B); |
625 | |
|
626 | 0 | B.store_le128(out); |
627 | 0 | } |
628 | 0 | } |
629 | | |
630 | | } // namespace Botan |