/src/botan/src/lib/block/aes/aes.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 1999-2010,2015,2017,2018,2020 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/aes.h> |
8 | | #include <botan/loadstor.h> |
9 | | #include <botan/cpuid.h> |
10 | | #include <botan/rotate.h> |
11 | | #include <botan/internal/bit_ops.h> |
12 | | #include <botan/internal/ct_utils.h> |
13 | | |
14 | | namespace Botan { |
15 | | |
16 | | #if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI) |
17 | | #define BOTAN_HAS_HW_AES_SUPPORT |
18 | | #endif |
19 | | |
20 | | /* |
21 | | * One of three AES implementation strategies are used to get a constant time |
22 | | * implementation which is immune to common cache/timing based side channels: |
23 | | * |
24 | | * - If AES hardware support is available (AES-NI, POWER8, Aarch64) use that |
25 | | * |
26 | | * - If 128-bit SIMD with byte shuffles are available (SSSE3, NEON, or Altivec), |
27 | | * use the vperm technique published by Mike Hamburg at CHES 2009. |
28 | | * |
29 | | * - If no hardware or SIMD support, fall back to a constant time bitsliced |
30 | | * implementation. This uses 32-bit words resulting in 2 blocks being processed |
31 | | * in parallel. Moving to 4 blocks (with 64-bit words) would approximately |
32 | | * double performance on 64-bit CPUs. Likewise moving to 128 bit SIMD would |
33 | | * again approximately double performance vs 64-bit. However the assumption is |
34 | | * that most 64-bit CPUs either have hardware AES or SIMD shuffle support and |
35 | | * that the majority of users falling back to this code will be 32-bit cores. |
36 | | * If this assumption proves to be unsound, the bitsliced code can easily be |
37 | | * extended to operate on either 32 or 64 bit words depending on the native |
38 | | * wordsize of the target processor. |
39 | | * |
40 | | * Useful references |
41 | | * |
42 | | * - "Accelerating AES with Vector Permute Instructions" Mike Hamburg |
43 | | * https://www.shiftleft.org/papers/vector_aes/vector_aes.pdf |
44 | | * |
45 | | * - "Faster and Timing-Attack Resistant AES-GCM" Käsper and Schwabe |
46 | | * https://eprint.iacr.org/2009/129.pdf |
47 | | * |
48 | | * - "A new combinational logic minimization technique with applications to cryptology." |
49 | | * Boyar and Peralta https://eprint.iacr.org/2009/191.pdf |
50 | | * |
51 | | * - "A depth-16 circuit for the AES S-box" Boyar and Peralta |
52 | | * https://eprint.iacr.org/2011/332.pdf |
53 | | * |
54 | | * - "A Very Compact S-box for AES" Canright |
55 | | * https://www.iacr.org/archive/ches2005/032.pdf |
56 | | * https://core.ac.uk/download/pdf/36694529.pdf (extended) |
57 | | */ |
58 | | |
59 | | namespace { |
60 | | |
61 | | /* |
62 | | This is an AES sbox circuit which can execute in bitsliced mode up to 32x in |
63 | | parallel. |
64 | | |
65 | | The circuit is from the "Circuit Minimization Team" group |
66 | | http://www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html |
67 | | http://www.cs.yale.edu/homes/peralta/CircuitStuff/SLP_AES_113.txt |
68 | | |
69 | | This circuit has size 113 and depth 27. In software it is much faster than |
70 | | circuits which are considered faster for hardware purposes (where circuit depth |
71 | | is the critical constraint), because unlike in hardware, on common CPUs we can |
72 | | only execute - at best - 3 or 4 logic operations per cycle. So a smaller circuit |
73 | | is superior. On an x86-64 machine this circuit is about 15% faster than the |
74 | | circuit of size 128 and depth 16 given in "A depth-16 circuit for the AES S-box". |
75 | | |
76 | | Another circuit for AES Sbox of size 102 and depth 24 is describted in "New |
77 | | Circuit Minimization Techniques for Smaller and Faster AES SBoxes" |
78 | | [https://eprint.iacr.org/2019/802] however it relies on "non-standard" gates |
79 | | like MUX, NOR, NAND, etc and so in practice in bitsliced software, its size is |
80 | | actually a bit larger than this circuit, as few CPUs have such instructions and |
81 | | otherwise they must be emulated using a sequence of available bit operations. |
82 | | */ |
83 | | void AES_SBOX(uint32_t V[8]) |
84 | 0 | { |
85 | 0 | const uint32_t U0 = V[0]; |
86 | 0 | const uint32_t U1 = V[1]; |
87 | 0 | const uint32_t U2 = V[2]; |
88 | 0 | const uint32_t U3 = V[3]; |
89 | 0 | const uint32_t U4 = V[4]; |
90 | 0 | const uint32_t U5 = V[5]; |
91 | 0 | const uint32_t U6 = V[6]; |
92 | 0 | const uint32_t U7 = V[7]; |
93 | 0 |
|
94 | 0 | const uint32_t y14 = U3 ^ U5; |
95 | 0 | const uint32_t y13 = U0 ^ U6; |
96 | 0 | const uint32_t y9 = U0 ^ U3; |
97 | 0 | const uint32_t y8 = U0 ^ U5; |
98 | 0 | const uint32_t t0 = U1 ^ U2; |
99 | 0 | const uint32_t y1 = t0 ^ U7; |
100 | 0 | const uint32_t y4 = y1 ^ U3; |
101 | 0 | const uint32_t y12 = y13 ^ y14; |
102 | 0 | const uint32_t y2 = y1 ^ U0; |
103 | 0 | const uint32_t y5 = y1 ^ U6; |
104 | 0 | const uint32_t y3 = y5 ^ y8; |
105 | 0 | const uint32_t t1 = U4 ^ y12; |
106 | 0 | const uint32_t y15 = t1 ^ U5; |
107 | 0 | const uint32_t y20 = t1 ^ U1; |
108 | 0 | const uint32_t y6 = y15 ^ U7; |
109 | 0 | const uint32_t y10 = y15 ^ t0; |
110 | 0 | const uint32_t y11 = y20 ^ y9; |
111 | 0 | const uint32_t y7 = U7 ^ y11; |
112 | 0 | const uint32_t y17 = y10 ^ y11; |
113 | 0 | const uint32_t y19 = y10 ^ y8; |
114 | 0 | const uint32_t y16 = t0 ^ y11; |
115 | 0 | const uint32_t y21 = y13 ^ y16; |
116 | 0 | const uint32_t y18 = U0 ^ y16; |
117 | 0 | const uint32_t t2 = y12 & y15; |
118 | 0 | const uint32_t t3 = y3 & y6; |
119 | 0 | const uint32_t t4 = t3 ^ t2; |
120 | 0 | const uint32_t t5 = y4 & U7; |
121 | 0 | const uint32_t t6 = t5 ^ t2; |
122 | 0 | const uint32_t t7 = y13 & y16; |
123 | 0 | const uint32_t t8 = y5 & y1; |
124 | 0 | const uint32_t t9 = t8 ^ t7; |
125 | 0 | const uint32_t t10 = y2 & y7; |
126 | 0 | const uint32_t t11 = t10 ^ t7; |
127 | 0 | const uint32_t t12 = y9 & y11; |
128 | 0 | const uint32_t t13 = y14 & y17; |
129 | 0 | const uint32_t t14 = t13 ^ t12; |
130 | 0 | const uint32_t t15 = y8 & y10; |
131 | 0 | const uint32_t t16 = t15 ^ t12; |
132 | 0 | const uint32_t t17 = t4 ^ y20; |
133 | 0 | const uint32_t t18 = t6 ^ t16; |
134 | 0 | const uint32_t t19 = t9 ^ t14; |
135 | 0 | const uint32_t t20 = t11 ^ t16; |
136 | 0 | const uint32_t t21 = t17 ^ t14; |
137 | 0 | const uint32_t t22 = t18 ^ y19; |
138 | 0 | const uint32_t t23 = t19 ^ y21; |
139 | 0 | const uint32_t t24 = t20 ^ y18; |
140 | 0 | const uint32_t t25 = t21 ^ t22; |
141 | 0 | const uint32_t t26 = t21 & t23; |
142 | 0 | const uint32_t t27 = t24 ^ t26; |
143 | 0 | const uint32_t t28 = t25 & t27; |
144 | 0 | const uint32_t t29 = t28 ^ t22; |
145 | 0 | const uint32_t t30 = t23 ^ t24; |
146 | 0 | const uint32_t t31 = t22 ^ t26; |
147 | 0 | const uint32_t t32 = t31 & t30; |
148 | 0 | const uint32_t t33 = t32 ^ t24; |
149 | 0 | const uint32_t t34 = t23 ^ t33; |
150 | 0 | const uint32_t t35 = t27 ^ t33; |
151 | 0 | const uint32_t t36 = t24 & t35; |
152 | 0 | const uint32_t t37 = t36 ^ t34; |
153 | 0 | const uint32_t t38 = t27 ^ t36; |
154 | 0 | const uint32_t t39 = t29 & t38; |
155 | 0 | const uint32_t t40 = t25 ^ t39; |
156 | 0 | const uint32_t t41 = t40 ^ t37; |
157 | 0 | const uint32_t t42 = t29 ^ t33; |
158 | 0 | const uint32_t t43 = t29 ^ t40; |
159 | 0 | const uint32_t t44 = t33 ^ t37; |
160 | 0 | const uint32_t t45 = t42 ^ t41; |
161 | 0 | const uint32_t z0 = t44 & y15; |
162 | 0 | const uint32_t z1 = t37 & y6; |
163 | 0 | const uint32_t z2 = t33 & U7; |
164 | 0 | const uint32_t z3 = t43 & y16; |
165 | 0 | const uint32_t z4 = t40 & y1; |
166 | 0 | const uint32_t z5 = t29 & y7; |
167 | 0 | const uint32_t z6 = t42 & y11; |
168 | 0 | const uint32_t z7 = t45 & y17; |
169 | 0 | const uint32_t z8 = t41 & y10; |
170 | 0 | const uint32_t z9 = t44 & y12; |
171 | 0 | const uint32_t z10 = t37 & y3; |
172 | 0 | const uint32_t z11 = t33 & y4; |
173 | 0 | const uint32_t z12 = t43 & y13; |
174 | 0 | const uint32_t z13 = t40 & y5; |
175 | 0 | const uint32_t z14 = t29 & y2; |
176 | 0 | const uint32_t z15 = t42 & y9; |
177 | 0 | const uint32_t z16 = t45 & y14; |
178 | 0 | const uint32_t z17 = t41 & y8; |
179 | 0 | const uint32_t tc1 = z15 ^ z16; |
180 | 0 | const uint32_t tc2 = z10 ^ tc1; |
181 | 0 | const uint32_t tc3 = z9 ^ tc2; |
182 | 0 | const uint32_t tc4 = z0 ^ z2; |
183 | 0 | const uint32_t tc5 = z1 ^ z0; |
184 | 0 | const uint32_t tc6 = z3 ^ z4; |
185 | 0 | const uint32_t tc7 = z12 ^ tc4; |
186 | 0 | const uint32_t tc8 = z7 ^ tc6; |
187 | 0 | const uint32_t tc9 = z8 ^ tc7; |
188 | 0 | const uint32_t tc10 = tc8 ^ tc9; |
189 | 0 | const uint32_t tc11 = tc6 ^ tc5; |
190 | 0 | const uint32_t tc12 = z3 ^ z5; |
191 | 0 | const uint32_t tc13 = z13 ^ tc1; |
192 | 0 | const uint32_t tc14 = tc4 ^ tc12; |
193 | 0 | const uint32_t S3 = tc3 ^ tc11; |
194 | 0 | const uint32_t tc16 = z6 ^ tc8; |
195 | 0 | const uint32_t tc17 = z14 ^ tc10; |
196 | 0 | const uint32_t tc18 = ~tc13 ^ tc14; |
197 | 0 | const uint32_t S7 = z12 ^ tc18; |
198 | 0 | const uint32_t tc20 = z15 ^ tc16; |
199 | 0 | const uint32_t tc21 = tc2 ^ z11; |
200 | 0 | const uint32_t S0 = tc3 ^ tc16; |
201 | 0 | const uint32_t S6 = tc10 ^ tc18; |
202 | 0 | const uint32_t S4 = tc14 ^ S3; |
203 | 0 | const uint32_t S1 = ~(S3 ^ tc16); |
204 | 0 | const uint32_t tc26 = tc17 ^ tc20; |
205 | 0 | const uint32_t S2 = ~(tc26 ^ z17); |
206 | 0 | const uint32_t S5 = tc21 ^ tc17; |
207 | 0 |
|
208 | 0 | V[0] = S0; |
209 | 0 | V[1] = S1; |
210 | 0 | V[2] = S2; |
211 | 0 | V[3] = S3; |
212 | 0 | V[4] = S4; |
213 | 0 | V[5] = S5; |
214 | 0 | V[6] = S6; |
215 | 0 | V[7] = S7; |
216 | 0 | } |
217 | | |
218 | | /* |
219 | | A circuit for inverse AES Sbox of size 121 and depth 21 from |
220 | | http://www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html |
221 | | http://www.cs.yale.edu/homes/peralta/CircuitStuff/Sinv.txt |
222 | | */ |
223 | | void AES_INV_SBOX(uint32_t V[8]) |
224 | 0 | { |
225 | 0 | const uint32_t U0 = V[0]; |
226 | 0 | const uint32_t U1 = V[1]; |
227 | 0 | const uint32_t U2 = V[2]; |
228 | 0 | const uint32_t U3 = V[3]; |
229 | 0 | const uint32_t U4 = V[4]; |
230 | 0 | const uint32_t U5 = V[5]; |
231 | 0 | const uint32_t U6 = V[6]; |
232 | 0 | const uint32_t U7 = V[7]; |
233 | 0 |
|
234 | 0 | const uint32_t Y0 = U0 ^ U3; |
235 | 0 | const uint32_t Y2 = ~(U1 ^ U3); |
236 | 0 | const uint32_t Y4 = U0 ^ Y2; |
237 | 0 | const uint32_t RTL0 = U6 ^ U7; |
238 | 0 | const uint32_t Y1 = Y2 ^ RTL0; |
239 | 0 | const uint32_t Y7 = ~(U2 ^ Y1); |
240 | 0 | const uint32_t RTL1 = U3 ^ U4; |
241 | 0 | const uint32_t Y6 = ~(U7 ^ RTL1); |
242 | 0 | const uint32_t Y3 = Y1 ^ RTL1; |
243 | 0 | const uint32_t RTL2 = ~(U0 ^ U2); |
244 | 0 | const uint32_t Y5 = U5 ^ RTL2; |
245 | 0 | const uint32_t sa1 = Y0 ^ Y2; |
246 | 0 | const uint32_t sa0 = Y1 ^ Y3; |
247 | 0 | const uint32_t sb1 = Y4 ^ Y6; |
248 | 0 | const uint32_t sb0 = Y5 ^ Y7; |
249 | 0 | const uint32_t ah = Y0 ^ Y1; |
250 | 0 | const uint32_t al = Y2 ^ Y3; |
251 | 0 | const uint32_t aa = sa0 ^ sa1; |
252 | 0 | const uint32_t bh = Y4 ^ Y5; |
253 | 0 | const uint32_t bl = Y6 ^ Y7; |
254 | 0 | const uint32_t bb = sb0 ^ sb1; |
255 | 0 | const uint32_t ab20 = sa0 ^ sb0; |
256 | 0 | const uint32_t ab22 = al ^ bl; |
257 | 0 | const uint32_t ab23 = Y3 ^ Y7; |
258 | 0 | const uint32_t ab21 = sa1 ^ sb1; |
259 | 0 | const uint32_t abcd1 = ah & bh; |
260 | 0 | const uint32_t rr1 = Y0 & Y4; |
261 | 0 | const uint32_t ph11 = ab20 ^ abcd1; |
262 | 0 | const uint32_t t01 = Y1 & Y5; |
263 | 0 | const uint32_t ph01 = t01 ^ abcd1; |
264 | 0 | const uint32_t abcd2 = al & bl; |
265 | 0 | const uint32_t r1 = Y2 & Y6; |
266 | 0 | const uint32_t pl11 = ab22 ^ abcd2; |
267 | 0 | const uint32_t r2 = Y3 & Y7; |
268 | 0 | const uint32_t pl01 = r2 ^ abcd2; |
269 | 0 | const uint32_t r3 = sa0 & sb0; |
270 | 0 | const uint32_t vr1 = aa & bb; |
271 | 0 | const uint32_t pr1 = vr1 ^ r3; |
272 | 0 | const uint32_t wr1 = sa1 & sb1; |
273 | 0 | const uint32_t qr1 = wr1 ^ r3; |
274 | 0 | const uint32_t ab0 = ph11 ^ rr1; |
275 | 0 | const uint32_t ab1 = ph01 ^ ab21; |
276 | 0 | const uint32_t ab2 = pl11 ^ r1; |
277 | 0 | const uint32_t ab3 = pl01 ^ qr1; |
278 | 0 | const uint32_t cp1 = ab0 ^ pr1; |
279 | 0 | const uint32_t cp2 = ab1 ^ qr1; |
280 | 0 | const uint32_t cp3 = ab2 ^ pr1; |
281 | 0 | const uint32_t cp4 = ab3 ^ ab23; |
282 | 0 | const uint32_t tinv1 = cp3 ^ cp4; |
283 | 0 | const uint32_t tinv2 = cp3 & cp1; |
284 | 0 | const uint32_t tinv3 = cp2 ^ tinv2; |
285 | 0 | const uint32_t tinv4 = cp1 ^ cp2; |
286 | 0 | const uint32_t tinv5 = cp4 ^ tinv2; |
287 | 0 | const uint32_t tinv6 = tinv5 & tinv4; |
288 | 0 | const uint32_t tinv7 = tinv3 & tinv1; |
289 | 0 | const uint32_t d2 = cp4 ^ tinv7; |
290 | 0 | const uint32_t d0 = cp2 ^ tinv6; |
291 | 0 | const uint32_t tinv8 = cp1 & cp4; |
292 | 0 | const uint32_t tinv9 = tinv4 & tinv8; |
293 | 0 | const uint32_t tinv10 = tinv4 ^ tinv2; |
294 | 0 | const uint32_t d1 = tinv9 ^ tinv10; |
295 | 0 | const uint32_t tinv11 = cp2 & cp3; |
296 | 0 | const uint32_t tinv12 = tinv1 & tinv11; |
297 | 0 | const uint32_t tinv13 = tinv1 ^ tinv2; |
298 | 0 | const uint32_t d3 = tinv12 ^ tinv13; |
299 | 0 | const uint32_t sd1 = d1 ^ d3; |
300 | 0 | const uint32_t sd0 = d0 ^ d2; |
301 | 0 | const uint32_t dl = d0 ^ d1; |
302 | 0 | const uint32_t dh = d2 ^ d3; |
303 | 0 | const uint32_t dd = sd0 ^ sd1; |
304 | 0 | const uint32_t abcd3 = dh & bh; |
305 | 0 | const uint32_t rr2 = d3 & Y4; |
306 | 0 | const uint32_t t02 = d2 & Y5; |
307 | 0 | const uint32_t abcd4 = dl & bl; |
308 | 0 | const uint32_t r4 = d1 & Y6; |
309 | 0 | const uint32_t r5 = d0 & Y7; |
310 | 0 | const uint32_t r6 = sd0 & sb0; |
311 | 0 | const uint32_t vr2 = dd & bb; |
312 | 0 | const uint32_t wr2 = sd1 & sb1; |
313 | 0 | const uint32_t abcd5 = dh & ah; |
314 | 0 | const uint32_t r7 = d3 & Y0; |
315 | 0 | const uint32_t r8 = d2 & Y1; |
316 | 0 | const uint32_t abcd6 = dl & al; |
317 | 0 | const uint32_t r9 = d1 & Y2; |
318 | 0 | const uint32_t r10 = d0 & Y3; |
319 | 0 | const uint32_t r11 = sd0 & sa0; |
320 | 0 | const uint32_t vr3 = dd & aa; |
321 | 0 | const uint32_t wr3 = sd1 & sa1; |
322 | 0 | const uint32_t ph12 = rr2 ^ abcd3; |
323 | 0 | const uint32_t ph02 = t02 ^ abcd3; |
324 | 0 | const uint32_t pl12 = r4 ^ abcd4; |
325 | 0 | const uint32_t pl02 = r5 ^ abcd4; |
326 | 0 | const uint32_t pr2 = vr2 ^ r6; |
327 | 0 | const uint32_t qr2 = wr2 ^ r6; |
328 | 0 | const uint32_t p0 = ph12 ^ pr2; |
329 | 0 | const uint32_t p1 = ph02 ^ qr2; |
330 | 0 | const uint32_t p2 = pl12 ^ pr2; |
331 | 0 | const uint32_t p3 = pl02 ^ qr2; |
332 | 0 | const uint32_t ph13 = r7 ^ abcd5; |
333 | 0 | const uint32_t ph03 = r8 ^ abcd5; |
334 | 0 | const uint32_t pl13 = r9 ^ abcd6; |
335 | 0 | const uint32_t pl03 = r10 ^ abcd6; |
336 | 0 | const uint32_t pr3 = vr3 ^ r11; |
337 | 0 | const uint32_t qr3 = wr3 ^ r11; |
338 | 0 | const uint32_t p4 = ph13 ^ pr3; |
339 | 0 | const uint32_t S7 = ph03 ^ qr3; |
340 | 0 | const uint32_t p6 = pl13 ^ pr3; |
341 | 0 | const uint32_t p7 = pl03 ^ qr3; |
342 | 0 | const uint32_t S3 = p1 ^ p6; |
343 | 0 | const uint32_t S6 = p2 ^ p6; |
344 | 0 | const uint32_t S0 = p3 ^ p6; |
345 | 0 | const uint32_t X11 = p0 ^ p2; |
346 | 0 | const uint32_t S5 = S0 ^ X11; |
347 | 0 | const uint32_t X13 = p4 ^ p7; |
348 | 0 | const uint32_t X14 = X11 ^ X13; |
349 | 0 | const uint32_t S1 = S3 ^ X14; |
350 | 0 | const uint32_t X16 = p1 ^ S7; |
351 | 0 | const uint32_t S2 = X14 ^ X16; |
352 | 0 | const uint32_t X18 = p0 ^ p4; |
353 | 0 | const uint32_t X19 = S5 ^ X16; |
354 | 0 | const uint32_t S4 = X18 ^ X19; |
355 | 0 |
|
356 | 0 | V[0] = S0; |
357 | 0 | V[1] = S1; |
358 | 0 | V[2] = S2; |
359 | 0 | V[3] = S3; |
360 | 0 | V[4] = S4; |
361 | 0 | V[5] = S5; |
362 | 0 | V[6] = S6; |
363 | 0 | V[7] = S7; |
364 | 0 | } |
365 | | |
366 | | inline void bit_transpose(uint32_t B[8]) |
367 | 0 | { |
368 | 0 | swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1); |
369 | 0 | swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1); |
370 | 0 | swap_bits<uint32_t>(B[5], B[4], 0x55555555, 1); |
371 | 0 | swap_bits<uint32_t>(B[7], B[6], 0x55555555, 1); |
372 | 0 |
|
373 | 0 | swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2); |
374 | 0 | swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2); |
375 | 0 | swap_bits<uint32_t>(B[6], B[4], 0x33333333, 2); |
376 | 0 | swap_bits<uint32_t>(B[7], B[5], 0x33333333, 2); |
377 | 0 |
|
378 | 0 | swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4); |
379 | 0 | swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4); |
380 | 0 | swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4); |
381 | 0 | swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4); |
382 | 0 | } |
383 | | |
384 | | inline void ks_expand(uint32_t B[8], const uint32_t K[], size_t r) |
385 | 0 | { |
386 | 0 | /* |
387 | 0 | This is bit_transpose of K[r..r+4] || K[r..r+4], we can save some computation |
388 | 0 | due to knowing the first and second halves are the same data. |
389 | 0 | */ |
390 | 0 | for(size_t i = 0; i != 4; ++i) |
391 | 0 | B[i] = K[r + i]; |
392 | 0 |
|
393 | 0 | swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1); |
394 | 0 | swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1); |
395 | 0 |
|
396 | 0 | swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2); |
397 | 0 | swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2); |
398 | 0 |
|
399 | 0 | B[4] = B[0]; |
400 | 0 | B[5] = B[1]; |
401 | 0 | B[6] = B[2]; |
402 | 0 | B[7] = B[3]; |
403 | 0 |
|
404 | 0 | swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4); |
405 | 0 | swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4); |
406 | 0 | swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4); |
407 | 0 | swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4); |
408 | 0 | } |
409 | | |
410 | | inline void shift_rows(uint32_t B[8]) |
411 | 0 | { |
412 | 0 | // 3 0 1 2 7 4 5 6 10 11 8 9 14 15 12 13 17 18 19 16 21 22 23 20 24 25 26 27 28 29 30 31 |
413 | 0 | #if defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) |
414 | 0 | for(size_t i = 0; i != 8; i += 2) |
415 | 0 | { |
416 | 0 | uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i+1]; |
417 | 0 | x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2); |
418 | 0 | x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1); |
419 | 0 | B[i] = static_cast<uint32_t>(x >> 32); |
420 | 0 | B[i+1] = static_cast<uint32_t>(x); |
421 | 0 | } |
422 | | #else |
423 | | for(size_t i = 0; i != 8; ++i) |
424 | | { |
425 | | uint32_t x = B[i]; |
426 | | x = bit_permute_step<uint32_t>(x, 0x00223311, 2); |
427 | | x = bit_permute_step<uint32_t>(x, 0x00550055, 1); |
428 | | B[i] = x; |
429 | | } |
430 | | #endif |
431 | | } |
432 | | |
433 | | inline void inv_shift_rows(uint32_t B[8]) |
434 | 0 | { |
435 | 0 | // Inverse of shift_rows, just inverting the steps |
436 | 0 |
|
437 | 0 | #if defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) |
438 | 0 | for(size_t i = 0; i != 8; i += 2) |
439 | 0 | { |
440 | 0 | uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i+1]; |
441 | 0 | x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1); |
442 | 0 | x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2); |
443 | 0 | B[i] = static_cast<uint32_t>(x >> 32); |
444 | 0 | B[i+1] = static_cast<uint32_t>(x); |
445 | 0 | } |
446 | | #else |
447 | | for(size_t i = 0; i != 8; ++i) |
448 | | { |
449 | | uint32_t x = B[i]; |
450 | | x = bit_permute_step<uint32_t>(x, 0x00550055, 1); |
451 | | x = bit_permute_step<uint32_t>(x, 0x00223311, 2); |
452 | | B[i] = x; |
453 | | } |
454 | | #endif |
455 | | } |
456 | | |
457 | | inline void mix_columns(uint32_t B[8]) |
458 | 0 | { |
459 | 0 | // carry high bits in B[0] to positions in 0x1b == 0b11011 |
460 | 0 | const uint32_t X2[8] = { |
461 | 0 | B[1], |
462 | 0 | B[2], |
463 | 0 | B[3], |
464 | 0 | B[4] ^ B[0], |
465 | 0 | B[5] ^ B[0], |
466 | 0 | B[6], |
467 | 0 | B[7] ^ B[0], |
468 | 0 | B[0], |
469 | 0 | }; |
470 | 0 |
|
471 | 0 | for(size_t i = 0; i != 8; i++) |
472 | 0 | { |
473 | 0 | const uint32_t X3 = B[i] ^ X2[i]; |
474 | 0 | B[i] = X2[i] ^ rotr<8>(B[i]) ^ rotr<16>(B[i]) ^ rotr<24>(X3); |
475 | 0 | } |
476 | 0 | } |
477 | | |
478 | | void inv_mix_columns(uint32_t B[8]) |
479 | 0 | { |
480 | 0 | /* |
481 | 0 | OpenSSL's bsaes implementation credits Jussi Kivilinna with the lovely |
482 | 0 | matrix decomposition |
483 | 0 |
|
484 | 0 | | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | |
485 | 0 | | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | |
486 | 0 | | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | |
487 | 0 | | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | |
488 | 0 |
|
489 | 0 | Notice the first component is simply the MixColumns matrix. So we can |
490 | 0 | multiply first by (05,00,04,00) then perform MixColumns to get the equivalent |
491 | 0 | of InvMixColumn. |
492 | 0 | */ |
493 | 0 | const uint32_t X4[8] = { |
494 | 0 | B[2], |
495 | 0 | B[3], |
496 | 0 | B[4] ^ B[0], |
497 | 0 | B[5] ^ B[0] ^ B[1], |
498 | 0 | B[6] ^ B[1], |
499 | 0 | B[7] ^ B[0], |
500 | 0 | B[0] ^ B[1], |
501 | 0 | B[1], |
502 | 0 | }; |
503 | 0 |
|
504 | 0 | for(size_t i = 0; i != 8; i++) |
505 | 0 | { |
506 | 0 | const uint32_t X5 = X4[i] ^ B[i]; |
507 | 0 | B[i] = X5 ^ rotr<16>(X4[i]); |
508 | 0 | } |
509 | 0 |
|
510 | 0 | mix_columns(B); |
511 | 0 | } |
512 | | |
513 | | /* |
514 | | * AES Encryption |
515 | | */ |
516 | | void aes_encrypt_n(const uint8_t in[], uint8_t out[], |
517 | | size_t blocks, |
518 | | const secure_vector<uint32_t>& EK) |
519 | 0 | { |
520 | 0 | BOTAN_ASSERT(EK.size() == 44 || EK.size() == 52 || EK.size() == 60, "Key was set"); |
521 | 0 |
|
522 | 0 | const size_t rounds = (EK.size() - 4) / 4; |
523 | 0 |
|
524 | 0 | uint32_t KS[13*8] = { 0 }; // actual maximum is (rounds - 1) * 8 |
525 | 0 | for(size_t i = 0; i < rounds - 1; i += 1) |
526 | 0 | { |
527 | 0 | ks_expand(&KS[8*i], EK.data(), 4*i + 4); |
528 | 0 | } |
529 | 0 |
|
530 | 0 | const size_t BLOCK_SIZE = 16; |
531 | 0 | const size_t BITSLICED_BLOCKS = 8*sizeof(uint32_t) / BLOCK_SIZE; |
532 | 0 |
|
533 | 0 | while(blocks > 0) |
534 | 0 | { |
535 | 0 | const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS); |
536 | 0 |
|
537 | 0 | uint32_t B[8] = { 0 }; |
538 | 0 |
|
539 | 0 | load_be(B, in, this_loop*4); |
540 | 0 |
|
541 | 0 | for(size_t i = 0; i != 8; ++i) |
542 | 0 | B[i] ^= EK[i % 4]; |
543 | 0 |
|
544 | 0 | bit_transpose(B); |
545 | 0 |
|
546 | 0 | for(size_t r = 0; r != rounds - 1; ++r) |
547 | 0 | { |
548 | 0 | AES_SBOX(B); |
549 | 0 | shift_rows(B); |
550 | 0 | mix_columns(B); |
551 | 0 |
|
552 | 0 | for(size_t i = 0; i != 8; ++i) |
553 | 0 | B[i] ^= KS[8*r + i]; |
554 | 0 | } |
555 | 0 |
|
556 | 0 | // Final round: |
557 | 0 | AES_SBOX(B); |
558 | 0 | shift_rows(B); |
559 | 0 | bit_transpose(B); |
560 | 0 |
|
561 | 0 | for(size_t i = 0; i != 8; ++i) |
562 | 0 | B[i] ^= EK[4*rounds + i % 4]; |
563 | 0 |
|
564 | 0 | copy_out_be(out, this_loop*4*sizeof(uint32_t), B); |
565 | 0 |
|
566 | 0 | in += this_loop * BLOCK_SIZE; |
567 | 0 | out += this_loop * BLOCK_SIZE; |
568 | 0 | blocks -= this_loop; |
569 | 0 | } |
570 | 0 | } |
571 | | |
572 | | /* |
573 | | * AES Decryption |
574 | | */ |
575 | | void aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, |
576 | | const secure_vector<uint32_t>& DK) |
577 | 0 | { |
578 | 0 | BOTAN_ASSERT(DK.size() == 44 || DK.size() == 52 || DK.size() == 60, "Key was set"); |
579 | 0 |
|
580 | 0 | const size_t rounds = (DK.size() - 4) / 4; |
581 | 0 |
|
582 | 0 | uint32_t KS[13*8] = { 0 }; // actual maximum is (rounds - 1) * 8 |
583 | 0 | for(size_t i = 0; i < rounds - 1; i += 1) |
584 | 0 | { |
585 | 0 | ks_expand(&KS[8*i], DK.data(), 4*i + 4); |
586 | 0 | } |
587 | 0 |
|
588 | 0 | const size_t BLOCK_SIZE = 16; |
589 | 0 | const size_t BITSLICED_BLOCKS = 8*sizeof(uint32_t) / BLOCK_SIZE; |
590 | 0 |
|
591 | 0 | while(blocks > 0) |
592 | 0 | { |
593 | 0 | const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS); |
594 | 0 |
|
595 | 0 | uint32_t B[8] = { 0 }; |
596 | 0 |
|
597 | 0 | load_be(B, in, this_loop*4); |
598 | 0 |
|
599 | 0 | for(size_t i = 0; i != 8; ++i) |
600 | 0 | B[i] ^= DK[i % 4]; |
601 | 0 |
|
602 | 0 | bit_transpose(B); |
603 | 0 |
|
604 | 0 | for(size_t r = 0; r != rounds - 1; ++r) |
605 | 0 | { |
606 | 0 | AES_INV_SBOX(B); |
607 | 0 | inv_shift_rows(B); |
608 | 0 | inv_mix_columns(B); |
609 | 0 |
|
610 | 0 | for(size_t i = 0; i != 8; ++i) |
611 | 0 | B[i] ^= KS[8*r + i]; |
612 | 0 | } |
613 | 0 |
|
614 | 0 | // Final round: |
615 | 0 | AES_INV_SBOX(B); |
616 | 0 | inv_shift_rows(B); |
617 | 0 | bit_transpose(B); |
618 | 0 |
|
619 | 0 | for(size_t i = 0; i != 8; ++i) |
620 | 0 | B[i] ^= DK[4*rounds + i % 4]; |
621 | 0 |
|
622 | 0 | copy_out_be(out, this_loop*4*sizeof(uint32_t), B); |
623 | 0 |
|
624 | 0 | in += this_loop * BLOCK_SIZE; |
625 | 0 | out += this_loop * BLOCK_SIZE; |
626 | 0 | blocks -= this_loop; |
627 | 0 | } |
628 | 0 | } |
629 | | |
630 | 0 | inline constexpr uint8_t xtime(uint8_t s) { return static_cast<uint8_t>(s << 1) ^ ((s >> 7) * 0x1B); } |
631 | | |
632 | | inline uint32_t InvMixColumn(uint8_t s1) |
633 | 0 | { |
634 | 0 | const uint8_t s2 = xtime(s1); |
635 | 0 | const uint8_t s4 = xtime(s2); |
636 | 0 | const uint8_t s8 = xtime(s4); |
637 | 0 | const uint8_t s9 = s8 ^ s1; |
638 | 0 | const uint8_t s11 = s9 ^ s2; |
639 | 0 | const uint8_t s13 = s9 ^ s4; |
640 | 0 | const uint8_t s14 = s8 ^ s4 ^ s2; |
641 | 0 | return make_uint32(s14, s9, s13, s11); |
642 | 0 | } |
643 | | |
644 | | uint32_t SE_word(uint32_t x) |
645 | 0 | { |
646 | 0 | uint32_t I[8] = { 0 }; |
647 | 0 |
|
648 | 0 | for(size_t i = 0; i != 8; ++i) |
649 | 0 | I[i] = (x >> (7-i)) & 0x01010101; |
650 | 0 |
|
651 | 0 | AES_SBOX(I); |
652 | 0 |
|
653 | 0 | x = 0; |
654 | 0 |
|
655 | 0 | for(size_t i = 0; i != 8; ++i) |
656 | 0 | x |= ((I[i] & 0x01010101) << (7-i)); |
657 | 0 |
|
658 | 0 | return x; |
659 | 0 | } |
660 | | |
661 | | void aes_key_schedule(const uint8_t key[], size_t length, |
662 | | secure_vector<uint32_t>& EK, |
663 | | secure_vector<uint32_t>& DK, |
664 | | bool bswap_keys = false) |
665 | 0 | { |
666 | 0 | static const uint32_t RC[10] = { |
667 | 0 | 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, |
668 | 0 | 0x20000000, 0x40000000, 0x80000000, 0x1B000000, 0x36000000 }; |
669 | 0 |
|
670 | 0 | const size_t X = length / 4; |
671 | 0 |
|
672 | 0 | // Can't happen, but make static analyzers happy |
673 | 0 | BOTAN_ASSERT_NOMSG(X == 4 || X == 6 || X == 8); |
674 | 0 |
|
675 | 0 | const size_t rounds = (length / 4) + 6; |
676 | 0 |
|
677 | 0 | CT::poison(key, length); |
678 | 0 |
|
679 | 0 | EK.resize(length + 28); |
680 | 0 | DK.resize(length + 28); |
681 | 0 |
|
682 | 0 | for(size_t i = 0; i != X; ++i) |
683 | 0 | EK[i] = load_be<uint32_t>(key, i); |
684 | 0 |
|
685 | 0 | for(size_t i = X; i < 4*(rounds+1); i += X) |
686 | 0 | { |
687 | 0 | EK[i] = EK[i-X] ^ RC[(i-X)/X] ^ rotl<8>(SE_word(EK[i-1])); |
688 | 0 |
|
689 | 0 | for(size_t j = 1; j != X && (i+j) < EK.size(); ++j) |
690 | 0 | { |
691 | 0 | EK[i+j] = EK[i+j-X]; |
692 | 0 |
|
693 | 0 | if(X == 8 && j == 4) |
694 | 0 | EK[i+j] ^= SE_word(EK[i+j-1]); |
695 | 0 | else |
696 | 0 | EK[i+j] ^= EK[i+j-1]; |
697 | 0 | } |
698 | 0 | } |
699 | 0 |
|
700 | 0 | DK[0] = EK[4*rounds ]; |
701 | 0 | DK[1] = EK[4*rounds+1]; |
702 | 0 | DK[2] = EK[4*rounds+2]; |
703 | 0 | DK[3] = EK[4*rounds+3]; |
704 | 0 |
|
705 | 0 | for(size_t i = 4; i != 4*rounds; ++i) |
706 | 0 | { |
707 | 0 | const uint32_t K = EK[4*rounds - 4*(i/4) + (i%4)]; |
708 | 0 | const uint8_t s0 = get_byte(0, K); |
709 | 0 | const uint8_t s1 = get_byte(1, K); |
710 | 0 | const uint8_t s2 = get_byte(2, K); |
711 | 0 | const uint8_t s3 = get_byte(3, K); |
712 | 0 |
|
713 | 0 | DK[i] = InvMixColumn(s0) ^ |
714 | 0 | rotr<8>(InvMixColumn(s1)) ^ |
715 | 0 | rotr<16>(InvMixColumn(s2)) ^ |
716 | 0 | rotr<24>(InvMixColumn(s3)); |
717 | 0 | } |
718 | 0 |
|
719 | 0 | DK[4*rounds ] = EK[0]; |
720 | 0 | DK[4*rounds+1] = EK[1]; |
721 | 0 | DK[4*rounds+2] = EK[2]; |
722 | 0 | DK[4*rounds+3] = EK[3]; |
723 | 0 |
|
724 | 0 | if(bswap_keys) |
725 | 0 | { |
726 | 0 | // HW AES on little endian needs the subkeys to be byte reversed |
727 | 0 | for(size_t i = 0; i != EK.size(); ++i) |
728 | 0 | EK[i] = reverse_bytes(EK[i]); |
729 | 0 | for(size_t i = 0; i != DK.size(); ++i) |
730 | 0 | DK[i] = reverse_bytes(DK[i]); |
731 | 0 | } |
732 | 0 |
|
733 | 0 | CT::unpoison(EK.data(), EK.size()); |
734 | 0 | CT::unpoison(DK.data(), DK.size()); |
735 | 0 | CT::unpoison(key, length); |
736 | 0 | } |
737 | | |
738 | | size_t aes_parallelism() |
739 | 2.59k | { |
740 | 2.59k | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
741 | 2.59k | if(CPUID::has_hw_aes()) |
742 | 2.59k | { |
743 | 2.59k | return 4; // pipelined |
744 | 2.59k | } |
745 | 0 | #endif |
746 | 0 | |
747 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
748 | 0 | if(CPUID::has_vperm()) |
749 | 0 | { |
750 | 0 | return 2; // pipelined |
751 | 0 | } |
752 | 0 | #endif |
753 | 0 | |
754 | 0 | // bitsliced: |
755 | 0 | return 2; |
756 | 0 | } |
757 | | |
758 | | const char* aes_provider() |
759 | 0 | { |
760 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
761 | 0 | if(CPUID::has_hw_aes()) |
762 | 0 | { |
763 | 0 | return "cpu"; |
764 | 0 | } |
765 | 0 | #endif |
766 | 0 | |
767 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
768 | 0 | if(CPUID::has_vperm()) |
769 | 0 | { |
770 | 0 | return "vperm"; |
771 | 0 | } |
772 | 0 | #endif |
773 | 0 | |
774 | 0 | return "base"; |
775 | 0 | } |
776 | | |
777 | | } |
778 | | |
779 | 0 | std::string AES_128::provider() const { return aes_provider(); } |
780 | 0 | std::string AES_192::provider() const { return aes_provider(); } |
781 | 0 | std::string AES_256::provider() const { return aes_provider(); } |
782 | | |
783 | 878 | size_t AES_128::parallelism() const { return aes_parallelism(); } |
784 | 0 | size_t AES_192::parallelism() const { return aes_parallelism(); } |
785 | 1.71k | size_t AES_256::parallelism() const { return aes_parallelism(); } |
786 | | |
787 | | void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
788 | 10.0k | { |
789 | 10.0k | verify_key_set(m_EK.empty() == false); |
790 | 10.0k | |
791 | 10.0k | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
792 | 10.0k | if(CPUID::has_hw_aes()) |
793 | 10.0k | { |
794 | 10.0k | return hw_aes_encrypt_n(in, out, blocks); |
795 | 10.0k | } |
796 | 0 | #endif |
797 | 0 | |
798 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
799 | 0 | if(CPUID::has_vperm()) |
800 | 0 | { |
801 | 0 | return vperm_encrypt_n(in, out, blocks); |
802 | 0 | } |
803 | 0 | #endif |
804 | 0 | |
805 | 0 | aes_encrypt_n(in, out, blocks, m_EK); |
806 | 0 | } |
807 | | |
808 | | void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
809 | 712 | { |
810 | 712 | verify_key_set(m_DK.empty() == false); |
811 | 712 | |
812 | 712 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
813 | 712 | if(CPUID::has_hw_aes()) |
814 | 712 | { |
815 | 712 | return hw_aes_decrypt_n(in, out, blocks); |
816 | 712 | } |
817 | 0 | #endif |
818 | 0 | |
819 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
820 | 0 | if(CPUID::has_vperm()) |
821 | 0 | { |
822 | 0 | return vperm_decrypt_n(in, out, blocks); |
823 | 0 | } |
824 | 0 | #endif |
825 | 0 | |
826 | 0 | aes_decrypt_n(in, out, blocks, m_DK); |
827 | 0 | } |
828 | | |
829 | | void AES_128::key_schedule(const uint8_t key[], size_t length) |
830 | 597 | { |
831 | 597 | #if defined(BOTAN_HAS_AES_NI) |
832 | 597 | if(CPUID::has_aes_ni()) |
833 | 597 | { |
834 | 597 | return aesni_key_schedule(key, length); |
835 | 597 | } |
836 | 0 | #endif |
837 | 0 | |
838 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
839 | 0 | if(CPUID::has_hw_aes()) |
840 | 0 | { |
841 | 0 | return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian()); |
842 | 0 | } |
843 | 0 | #endif |
844 | 0 | |
845 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
846 | 0 | if(CPUID::has_vperm()) |
847 | 0 | { |
848 | 0 | return vperm_key_schedule(key, length); |
849 | 0 | } |
850 | 0 | #endif |
851 | 0 | |
852 | 0 | aes_key_schedule(key, length, m_EK, m_DK); |
853 | 0 | } |
854 | | |
855 | | void AES_128::clear() |
856 | 0 | { |
857 | 0 | zap(m_EK); |
858 | 0 | zap(m_DK); |
859 | 0 | } |
860 | | |
861 | | void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
862 | 0 | { |
863 | 0 | verify_key_set(m_EK.empty() == false); |
864 | 0 |
|
865 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
866 | 0 | if(CPUID::has_hw_aes()) |
867 | 0 | { |
868 | 0 | return hw_aes_encrypt_n(in, out, blocks); |
869 | 0 | } |
870 | 0 | #endif |
871 | 0 | |
872 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
873 | 0 | if(CPUID::has_vperm()) |
874 | 0 | { |
875 | 0 | return vperm_encrypt_n(in, out, blocks); |
876 | 0 | } |
877 | 0 | #endif |
878 | 0 | |
879 | 0 | aes_encrypt_n(in, out, blocks, m_EK); |
880 | 0 | } |
881 | | |
882 | | void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
883 | 0 | { |
884 | 0 | verify_key_set(m_DK.empty() == false); |
885 | 0 |
|
886 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
887 | 0 | if(CPUID::has_hw_aes()) |
888 | 0 | { |
889 | 0 | return hw_aes_decrypt_n(in, out, blocks); |
890 | 0 | } |
891 | 0 | #endif |
892 | 0 | |
893 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
894 | 0 | if(CPUID::has_vperm()) |
895 | 0 | { |
896 | 0 | return vperm_decrypt_n(in, out, blocks); |
897 | 0 | } |
898 | 0 | #endif |
899 | 0 | |
900 | 0 | aes_decrypt_n(in, out, blocks, m_DK); |
901 | 0 | } |
902 | | |
903 | | void AES_192::key_schedule(const uint8_t key[], size_t length) |
904 | 0 | { |
905 | 0 | #if defined(BOTAN_HAS_AES_NI) |
906 | 0 | if(CPUID::has_aes_ni()) |
907 | 0 | { |
908 | 0 | return aesni_key_schedule(key, length); |
909 | 0 | } |
910 | 0 | #endif |
911 | 0 | |
912 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
913 | 0 | if(CPUID::has_hw_aes()) |
914 | 0 | { |
915 | 0 | return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian()); |
916 | 0 | } |
917 | 0 | #endif |
918 | 0 | |
919 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
920 | 0 | if(CPUID::has_vperm()) |
921 | 0 | { |
922 | 0 | return vperm_key_schedule(key, length); |
923 | 0 | } |
924 | 0 | #endif |
925 | 0 | |
926 | 0 | aes_key_schedule(key, length, m_EK, m_DK); |
927 | 0 | } |
928 | | |
929 | | void AES_192::clear() |
930 | 0 | { |
931 | 0 | zap(m_EK); |
932 | 0 | zap(m_DK); |
933 | 0 | } |
934 | | |
935 | | void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
936 | 8.58k | { |
937 | 8.58k | verify_key_set(m_EK.empty() == false); |
938 | 8.58k | |
939 | 8.58k | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
940 | 8.58k | if(CPUID::has_hw_aes()) |
941 | 8.58k | { |
942 | 8.58k | return hw_aes_encrypt_n(in, out, blocks); |
943 | 8.58k | } |
944 | 0 | #endif |
945 | 0 | |
946 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
947 | 0 | if(CPUID::has_vperm()) |
948 | 0 | { |
949 | 0 | return vperm_encrypt_n(in, out, blocks); |
950 | 0 | } |
951 | 0 | #endif |
952 | 0 | |
953 | 0 | aes_encrypt_n(in, out, blocks, m_EK); |
954 | 0 | } |
955 | | |
956 | | void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
957 | 737 | { |
958 | 737 | verify_key_set(m_DK.empty() == false); |
959 | 737 | |
960 | 737 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
961 | 737 | if(CPUID::has_hw_aes()) |
962 | 737 | { |
963 | 737 | return hw_aes_decrypt_n(in, out, blocks); |
964 | 737 | } |
965 | 0 | #endif |
966 | 0 | |
967 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
968 | 0 | if(CPUID::has_vperm()) |
969 | 0 | { |
970 | 0 | return vperm_decrypt_n(in, out, blocks); |
971 | 0 | } |
972 | 0 | #endif |
973 | 0 | |
974 | 0 | aes_decrypt_n(in, out, blocks, m_DK); |
975 | 0 | } |
976 | | |
977 | | void AES_256::key_schedule(const uint8_t key[], size_t length) |
978 | 966 | { |
979 | 966 | #if defined(BOTAN_HAS_AES_NI) |
980 | 966 | if(CPUID::has_aes_ni()) |
981 | 966 | { |
982 | 966 | return aesni_key_schedule(key, length); |
983 | 966 | } |
984 | 0 | #endif |
985 | 0 | |
986 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
987 | 0 | if(CPUID::has_hw_aes()) |
988 | 0 | { |
989 | 0 | return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian()); |
990 | 0 | } |
991 | 0 | #endif |
992 | 0 | |
993 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
994 | 0 | if(CPUID::has_vperm()) |
995 | 0 | { |
996 | 0 | return vperm_key_schedule(key, length); |
997 | 0 | } |
998 | 0 | #endif |
999 | 0 | |
1000 | 0 | aes_key_schedule(key, length, m_EK, m_DK); |
1001 | 0 | } |
1002 | | |
1003 | | void AES_256::clear() |
1004 | 0 | { |
1005 | 0 | zap(m_EK); |
1006 | 0 | zap(m_DK); |
1007 | 0 | } |
1008 | | |
1009 | | } |