Coverage Report

Created: 2025-04-11 06:34

/src/botan/src/lib/block/aes/aes_vaes/aes_vaes.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2024 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/internal/aes.h>
8
9
#include <botan/internal/isa_extn.h>
10
#include <botan/internal/loadstor.h>
11
#include <botan/internal/simd_avx2.h>
12
#include <wmmintrin.h>
13
14
namespace Botan {
15
16
namespace {
17
18
0
BOTAN_FORCE_INLINE void keyxor(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
19
0
   B0 ^= K;
20
0
   B1 ^= K;
21
0
   B2 ^= K;
22
0
   B3 ^= K;
23
0
}
24
25
0
BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenc(SIMD_8x32 K, SIMD_8x32& B) {
26
0
   B = SIMD_8x32(_mm256_aesenc_epi128(B.raw(), K.raw()));
27
0
}
28
29
BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenc(
30
0
   SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
31
0
   B0 = SIMD_8x32(_mm256_aesenc_epi128(B0.raw(), K.raw()));
32
0
   B1 = SIMD_8x32(_mm256_aesenc_epi128(B1.raw(), K.raw()));
33
0
   B2 = SIMD_8x32(_mm256_aesenc_epi128(B2.raw(), K.raw()));
34
0
   B3 = SIMD_8x32(_mm256_aesenc_epi128(B3.raw(), K.raw()));
35
0
}
36
37
0
BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenclast(SIMD_8x32 K, SIMD_8x32& B) {
38
0
   B = SIMD_8x32(_mm256_aesenclast_epi128(B.raw(), K.raw()));
39
0
}
40
41
BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenclast(
42
0
   SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
43
0
   B0 = SIMD_8x32(_mm256_aesenclast_epi128(B0.raw(), K.raw()));
44
0
   B1 = SIMD_8x32(_mm256_aesenclast_epi128(B1.raw(), K.raw()));
45
0
   B2 = SIMD_8x32(_mm256_aesenclast_epi128(B2.raw(), K.raw()));
46
0
   B3 = SIMD_8x32(_mm256_aesenclast_epi128(B3.raw(), K.raw()));
47
0
}
48
49
0
BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdec(SIMD_8x32 K, SIMD_8x32& B) {
50
0
   B = SIMD_8x32(_mm256_aesdec_epi128(B.raw(), K.raw()));
51
0
}
52
53
BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdec(
54
0
   SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
55
0
   B0 = SIMD_8x32(_mm256_aesdec_epi128(B0.raw(), K.raw()));
56
0
   B1 = SIMD_8x32(_mm256_aesdec_epi128(B1.raw(), K.raw()));
57
0
   B2 = SIMD_8x32(_mm256_aesdec_epi128(B2.raw(), K.raw()));
58
0
   B3 = SIMD_8x32(_mm256_aesdec_epi128(B3.raw(), K.raw()));
59
0
}
60
61
0
BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdeclast(SIMD_8x32 K, SIMD_8x32& B) {
62
0
   B = SIMD_8x32(_mm256_aesdeclast_epi128(B.raw(), K.raw()));
63
0
}
64
65
BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdeclast(
66
0
   SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
67
0
   B0 = SIMD_8x32(_mm256_aesdeclast_epi128(B0.raw(), K.raw()));
68
0
   B1 = SIMD_8x32(_mm256_aesdeclast_epi128(B1.raw(), K.raw()));
69
0
   B2 = SIMD_8x32(_mm256_aesdeclast_epi128(B2.raw(), K.raw()));
70
0
   B3 = SIMD_8x32(_mm256_aesdeclast_epi128(B3.raw(), K.raw()));
71
0
}
72
73
}  // namespace
74
75
/*
76
* AES-128 Encryption
77
*/
78
0
BOTAN_FN_ISA_AVX2_VAES void AES_128::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
79
0
   const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
80
0
   const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
81
0
   const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
82
0
   const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
83
0
   const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
84
0
   const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
85
0
   const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
86
0
   const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
87
0
   const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
88
0
   const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
89
0
   const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
90
91
0
   while(blocks >= 8) {
92
0
      SIMD_8x32 B0 = SIMD_8x32::load_le(in);
93
0
      SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
94
0
      SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
95
0
      SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
96
97
0
      keyxor(K0, B0, B1, B2, B3);
98
0
      aesenc(K1, B0, B1, B2, B3);
99
0
      aesenc(K2, B0, B1, B2, B3);
100
0
      aesenc(K3, B0, B1, B2, B3);
101
0
      aesenc(K4, B0, B1, B2, B3);
102
0
      aesenc(K5, B0, B1, B2, B3);
103
0
      aesenc(K6, B0, B1, B2, B3);
104
0
      aesenc(K7, B0, B1, B2, B3);
105
0
      aesenc(K8, B0, B1, B2, B3);
106
0
      aesenc(K9, B0, B1, B2, B3);
107
0
      aesenclast(K10, B0, B1, B2, B3);
108
109
0
      B0.store_le(out);
110
0
      B1.store_le(out + 16 * 2);
111
0
      B2.store_le(out + 16 * 4);
112
0
      B3.store_le(out + 16 * 6);
113
114
0
      blocks -= 8;
115
0
      in += 8 * 16;
116
0
      out += 8 * 16;
117
0
   }
118
119
0
   while(blocks >= 2) {
120
0
      SIMD_8x32 B = SIMD_8x32::load_le(in);
121
122
0
      B ^= K0;
123
0
      aesenc(K1, B);
124
0
      aesenc(K2, B);
125
0
      aesenc(K3, B);
126
0
      aesenc(K4, B);
127
0
      aesenc(K5, B);
128
0
      aesenc(K6, B);
129
0
      aesenc(K7, B);
130
0
      aesenc(K8, B);
131
0
      aesenc(K9, B);
132
0
      aesenclast(K10, B);
133
134
0
      B.store_le(out);
135
136
0
      in += 2 * 16;
137
0
      out += 2 * 16;
138
0
      blocks -= 2;
139
0
   }
140
141
0
   if(blocks > 0) {
142
0
      SIMD_8x32 B = SIMD_8x32::load_le128(in);
143
144
0
      B ^= K0;
145
0
      aesenc(K1, B);
146
0
      aesenc(K2, B);
147
0
      aesenc(K3, B);
148
0
      aesenc(K4, B);
149
0
      aesenc(K5, B);
150
0
      aesenc(K6, B);
151
0
      aesenc(K7, B);
152
0
      aesenc(K8, B);
153
0
      aesenc(K9, B);
154
0
      aesenclast(K10, B);
155
156
0
      B.store_le128(out);
157
0
   }
158
0
}
159
160
/*
161
* AES-128 Decryption
162
*/
163
0
BOTAN_FN_ISA_AVX2_VAES void AES_128::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
164
0
   const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
165
0
   const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
166
0
   const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
167
0
   const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
168
0
   const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
169
0
   const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
170
0
   const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
171
0
   const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
172
0
   const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
173
0
   const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
174
0
   const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
175
176
0
   while(blocks >= 8) {
177
0
      SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
178
0
      SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
179
0
      SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
180
0
      SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
181
182
0
      keyxor(K0, B0, B1, B2, B3);
183
0
      aesdec(K1, B0, B1, B2, B3);
184
0
      aesdec(K2, B0, B1, B2, B3);
185
0
      aesdec(K3, B0, B1, B2, B3);
186
0
      aesdec(K4, B0, B1, B2, B3);
187
0
      aesdec(K5, B0, B1, B2, B3);
188
0
      aesdec(K6, B0, B1, B2, B3);
189
0
      aesdec(K7, B0, B1, B2, B3);
190
0
      aesdec(K8, B0, B1, B2, B3);
191
0
      aesdec(K9, B0, B1, B2, B3);
192
0
      aesdeclast(K10, B0, B1, B2, B3);
193
194
0
      B0.store_le(out + 16 * 0);
195
0
      B1.store_le(out + 16 * 2);
196
0
      B2.store_le(out + 16 * 4);
197
0
      B3.store_le(out + 16 * 6);
198
199
0
      blocks -= 8;
200
0
      in += 8 * 16;
201
0
      out += 8 * 16;
202
0
   }
203
204
0
   while(blocks >= 2) {
205
0
      SIMD_8x32 B = SIMD_8x32::load_le(in);
206
207
0
      B ^= K0;
208
0
      aesdec(K1, B);
209
0
      aesdec(K2, B);
210
0
      aesdec(K3, B);
211
0
      aesdec(K4, B);
212
0
      aesdec(K5, B);
213
0
      aesdec(K6, B);
214
0
      aesdec(K7, B);
215
0
      aesdec(K8, B);
216
0
      aesdec(K9, B);
217
0
      aesdeclast(K10, B);
218
219
0
      B.store_le(out);
220
221
0
      in += 2 * 16;
222
0
      out += 2 * 16;
223
0
      blocks -= 2;
224
0
   }
225
226
0
   if(blocks > 0) {
227
0
      SIMD_8x32 B = SIMD_8x32::load_le128(in);
228
229
0
      B ^= K0;
230
0
      aesdec(K1, B);
231
0
      aesdec(K2, B);
232
0
      aesdec(K3, B);
233
0
      aesdec(K4, B);
234
0
      aesdec(K5, B);
235
0
      aesdec(K6, B);
236
0
      aesdec(K7, B);
237
0
      aesdec(K8, B);
238
0
      aesdec(K9, B);
239
0
      aesdeclast(K10, B);
240
241
0
      B.store_le128(out);
242
0
   }
243
0
}
244
245
/*
246
* AES-192 Encryption
247
*/
248
0
BOTAN_FN_ISA_AVX2_VAES void AES_192::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
249
0
   const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
250
0
   const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
251
0
   const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
252
0
   const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
253
0
   const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
254
0
   const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
255
0
   const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
256
0
   const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
257
0
   const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
258
0
   const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
259
0
   const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
260
0
   const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
261
0
   const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
262
263
0
   while(blocks >= 8) {
264
0
      SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
265
0
      SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
266
0
      SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
267
0
      SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
268
269
0
      keyxor(K0, B0, B1, B2, B3);
270
0
      aesenc(K1, B0, B1, B2, B3);
271
0
      aesenc(K2, B0, B1, B2, B3);
272
0
      aesenc(K3, B0, B1, B2, B3);
273
0
      aesenc(K4, B0, B1, B2, B3);
274
0
      aesenc(K5, B0, B1, B2, B3);
275
0
      aesenc(K6, B0, B1, B2, B3);
276
0
      aesenc(K7, B0, B1, B2, B3);
277
0
      aesenc(K8, B0, B1, B2, B3);
278
0
      aesenc(K9, B0, B1, B2, B3);
279
0
      aesenc(K10, B0, B1, B2, B3);
280
0
      aesenc(K11, B0, B1, B2, B3);
281
0
      aesenclast(K12, B0, B1, B2, B3);
282
283
0
      B0.store_le(out + 16 * 0);
284
0
      B1.store_le(out + 16 * 2);
285
0
      B2.store_le(out + 16 * 4);
286
0
      B3.store_le(out + 16 * 6);
287
288
0
      blocks -= 8;
289
0
      in += 8 * 16;
290
0
      out += 8 * 16;
291
0
   }
292
293
0
   while(blocks >= 2) {
294
0
      SIMD_8x32 B = SIMD_8x32::load_le(in);
295
296
0
      B ^= K0;
297
0
      aesenc(K1, B);
298
0
      aesenc(K2, B);
299
0
      aesenc(K3, B);
300
0
      aesenc(K4, B);
301
0
      aesenc(K5, B);
302
0
      aesenc(K6, B);
303
0
      aesenc(K7, B);
304
0
      aesenc(K8, B);
305
0
      aesenc(K9, B);
306
0
      aesenc(K10, B);
307
0
      aesenc(K11, B);
308
0
      aesenclast(K12, B);
309
310
0
      B.store_le(out);
311
312
0
      in += 2 * 16;
313
0
      out += 2 * 16;
314
0
      blocks -= 2;
315
0
   }
316
317
0
   if(blocks > 0) {
318
0
      SIMD_8x32 B = SIMD_8x32::load_le128(in);
319
320
0
      B ^= K0;
321
0
      aesenc(K1, B);
322
0
      aesenc(K2, B);
323
0
      aesenc(K3, B);
324
0
      aesenc(K4, B);
325
0
      aesenc(K5, B);
326
0
      aesenc(K6, B);
327
0
      aesenc(K7, B);
328
0
      aesenc(K8, B);
329
0
      aesenc(K9, B);
330
0
      aesenc(K10, B);
331
0
      aesenc(K11, B);
332
0
      aesenclast(K12, B);
333
334
0
      B.store_le128(out);
335
0
   }
336
0
}
337
338
/*
339
* AES-192 Decryption
340
*/
341
0
BOTAN_FN_ISA_AVX2_VAES void AES_192::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
342
0
   const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
343
0
   const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
344
0
   const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
345
0
   const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
346
0
   const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
347
0
   const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
348
0
   const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
349
0
   const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
350
0
   const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
351
0
   const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
352
0
   const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
353
0
   const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
354
0
   const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
355
356
0
   while(blocks >= 8) {
357
0
      SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
358
0
      SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
359
0
      SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
360
0
      SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
361
362
0
      keyxor(K0, B0, B1, B2, B3);
363
0
      aesdec(K1, B0, B1, B2, B3);
364
0
      aesdec(K2, B0, B1, B2, B3);
365
0
      aesdec(K3, B0, B1, B2, B3);
366
0
      aesdec(K4, B0, B1, B2, B3);
367
0
      aesdec(K5, B0, B1, B2, B3);
368
0
      aesdec(K6, B0, B1, B2, B3);
369
0
      aesdec(K7, B0, B1, B2, B3);
370
0
      aesdec(K8, B0, B1, B2, B3);
371
0
      aesdec(K9, B0, B1, B2, B3);
372
0
      aesdec(K10, B0, B1, B2, B3);
373
0
      aesdec(K11, B0, B1, B2, B3);
374
0
      aesdeclast(K12, B0, B1, B2, B3);
375
376
0
      B0.store_le(out + 16 * 0);
377
0
      B1.store_le(out + 16 * 2);
378
0
      B2.store_le(out + 16 * 4);
379
0
      B3.store_le(out + 16 * 6);
380
381
0
      blocks -= 8;
382
0
      in += 8 * 16;
383
0
      out += 8 * 16;
384
0
   }
385
386
0
   while(blocks >= 2) {
387
0
      SIMD_8x32 B = SIMD_8x32::load_le(in);
388
389
0
      B ^= K0;
390
0
      aesdec(K1, B);
391
0
      aesdec(K2, B);
392
0
      aesdec(K3, B);
393
0
      aesdec(K4, B);
394
0
      aesdec(K5, B);
395
0
      aesdec(K6, B);
396
0
      aesdec(K7, B);
397
0
      aesdec(K8, B);
398
0
      aesdec(K9, B);
399
0
      aesdec(K10, B);
400
0
      aesdec(K11, B);
401
0
      aesdeclast(K12, B);
402
403
0
      B.store_le(out);
404
405
0
      in += 2 * 16;
406
0
      out += 2 * 16;
407
0
      blocks -= 2;
408
0
   }
409
410
0
   if(blocks > 0) {
411
0
      SIMD_8x32 B = SIMD_8x32::load_le128(in);
412
413
0
      B ^= K0;
414
0
      aesdec(K1, B);
415
0
      aesdec(K2, B);
416
0
      aesdec(K3, B);
417
0
      aesdec(K4, B);
418
0
      aesdec(K5, B);
419
0
      aesdec(K6, B);
420
0
      aesdec(K7, B);
421
0
      aesdec(K8, B);
422
0
      aesdec(K9, B);
423
0
      aesdec(K10, B);
424
0
      aesdec(K11, B);
425
0
      aesdeclast(K12, B);
426
427
0
      B.store_le128(out);
428
0
   }
429
0
}
430
431
0
BOTAN_FN_ISA_AVX2_VAES void AES_256::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
432
0
   const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
433
0
   const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
434
0
   const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
435
0
   const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
436
0
   const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
437
0
   const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
438
0
   const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
439
0
   const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
440
0
   const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
441
0
   const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
442
0
   const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
443
0
   const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
444
0
   const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
445
0
   const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_EK[4 * 13]);
446
0
   const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_EK[4 * 14]);
447
448
0
   while(blocks >= 8) {
449
0
      SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
450
0
      SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
451
0
      SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
452
0
      SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
453
454
0
      keyxor(K0, B0, B1, B2, B3);
455
0
      aesenc(K1, B0, B1, B2, B3);
456
0
      aesenc(K2, B0, B1, B2, B3);
457
0
      aesenc(K3, B0, B1, B2, B3);
458
0
      aesenc(K4, B0, B1, B2, B3);
459
0
      aesenc(K5, B0, B1, B2, B3);
460
0
      aesenc(K6, B0, B1, B2, B3);
461
0
      aesenc(K7, B0, B1, B2, B3);
462
0
      aesenc(K8, B0, B1, B2, B3);
463
0
      aesenc(K9, B0, B1, B2, B3);
464
0
      aesenc(K10, B0, B1, B2, B3);
465
0
      aesenc(K11, B0, B1, B2, B3);
466
0
      aesenc(K12, B0, B1, B2, B3);
467
0
      aesenc(K13, B0, B1, B2, B3);
468
0
      aesenclast(K14, B0, B1, B2, B3);
469
470
0
      B0.store_le(out + 16 * 0);
471
0
      B1.store_le(out + 16 * 2);
472
0
      B2.store_le(out + 16 * 4);
473
0
      B3.store_le(out + 16 * 6);
474
475
0
      blocks -= 8;
476
0
      in += 8 * 16;
477
0
      out += 8 * 16;
478
0
   }
479
480
0
   while(blocks >= 2) {
481
0
      SIMD_8x32 B = SIMD_8x32::load_le(in);
482
483
0
      B ^= K0;
484
0
      aesenc(K1, B);
485
0
      aesenc(K2, B);
486
0
      aesenc(K3, B);
487
0
      aesenc(K4, B);
488
0
      aesenc(K5, B);
489
0
      aesenc(K6, B);
490
0
      aesenc(K7, B);
491
0
      aesenc(K8, B);
492
0
      aesenc(K9, B);
493
0
      aesenc(K10, B);
494
0
      aesenc(K11, B);
495
0
      aesenc(K12, B);
496
0
      aesenc(K13, B);
497
0
      aesenclast(K14, B);
498
499
0
      B.store_le(out);
500
501
0
      in += 2 * 16;
502
0
      out += 2 * 16;
503
0
      blocks -= 2;
504
0
   }
505
506
0
   if(blocks > 0) {
507
0
      SIMD_8x32 B = SIMD_8x32::load_le128(in);
508
509
0
      B ^= K0;
510
0
      aesenc(K1, B);
511
0
      aesenc(K2, B);
512
0
      aesenc(K3, B);
513
0
      aesenc(K4, B);
514
0
      aesenc(K5, B);
515
0
      aesenc(K6, B);
516
0
      aesenc(K7, B);
517
0
      aesenc(K8, B);
518
0
      aesenc(K9, B);
519
0
      aesenc(K10, B);
520
0
      aesenc(K11, B);
521
0
      aesenc(K12, B);
522
0
      aesenc(K13, B);
523
0
      aesenclast(K14, B);
524
525
0
      B.store_le128(out);
526
0
   }
527
0
}
528
529
/*
530
* AES-256 Decryption
531
*/
532
0
BOTAN_FN_ISA_AVX2_VAES void AES_256::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
533
0
   const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
534
0
   const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
535
0
   const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
536
0
   const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
537
0
   const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
538
0
   const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
539
0
   const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
540
0
   const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
541
0
   const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
542
0
   const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
543
0
   const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
544
0
   const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
545
0
   const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
546
0
   const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_DK[4 * 13]);
547
0
   const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_DK[4 * 14]);
548
549
0
   while(blocks >= 8) {
550
0
      SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
551
0
      SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
552
0
      SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
553
0
      SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
554
555
0
      keyxor(K0, B0, B1, B2, B3);
556
0
      aesdec(K1, B0, B1, B2, B3);
557
0
      aesdec(K2, B0, B1, B2, B3);
558
0
      aesdec(K3, B0, B1, B2, B3);
559
0
      aesdec(K4, B0, B1, B2, B3);
560
0
      aesdec(K5, B0, B1, B2, B3);
561
0
      aesdec(K6, B0, B1, B2, B3);
562
0
      aesdec(K7, B0, B1, B2, B3);
563
0
      aesdec(K8, B0, B1, B2, B3);
564
0
      aesdec(K9, B0, B1, B2, B3);
565
0
      aesdec(K10, B0, B1, B2, B3);
566
0
      aesdec(K11, B0, B1, B2, B3);
567
0
      aesdec(K12, B0, B1, B2, B3);
568
0
      aesdec(K13, B0, B1, B2, B3);
569
0
      aesdeclast(K14, B0, B1, B2, B3);
570
571
0
      B0.store_le(out + 16 * 0);
572
0
      B1.store_le(out + 16 * 2);
573
0
      B2.store_le(out + 16 * 4);
574
0
      B3.store_le(out + 16 * 6);
575
576
0
      blocks -= 8;
577
0
      in += 8 * 16;
578
0
      out += 8 * 16;
579
0
   }
580
581
0
   while(blocks >= 2) {
582
0
      SIMD_8x32 B = SIMD_8x32::load_le(in);
583
584
0
      B ^= K0;
585
0
      aesdec(K1, B);
586
0
      aesdec(K2, B);
587
0
      aesdec(K3, B);
588
0
      aesdec(K4, B);
589
0
      aesdec(K5, B);
590
0
      aesdec(K6, B);
591
0
      aesdec(K7, B);
592
0
      aesdec(K8, B);
593
0
      aesdec(K9, B);
594
0
      aesdec(K10, B);
595
0
      aesdec(K11, B);
596
0
      aesdec(K12, B);
597
0
      aesdec(K13, B);
598
0
      aesdeclast(K14, B);
599
600
0
      B.store_le(out);
601
602
0
      in += 2 * 16;
603
0
      out += 2 * 16;
604
0
      blocks -= 2;
605
0
   }
606
607
0
   if(blocks > 0) {
608
0
      SIMD_8x32 B = SIMD_8x32::load_le128(in);
609
610
0
      B ^= K0;
611
0
      aesdec(K1, B);
612
0
      aesdec(K2, B);
613
0
      aesdec(K3, B);
614
0
      aesdec(K4, B);
615
0
      aesdec(K5, B);
616
0
      aesdec(K6, B);
617
0
      aesdec(K7, B);
618
0
      aesdec(K8, B);
619
0
      aesdec(K9, B);
620
0
      aesdec(K10, B);
621
0
      aesdec(K11, B);
622
0
      aesdec(K12, B);
623
0
      aesdec(K13, B);
624
0
      aesdeclast(K14, B);
625
626
0
      B.store_le128(out);
627
0
   }
628
0
}
629
630
}  // namespace Botan