/src/botan/src/lib/block/aes/aes.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 1999-2010,2015,2017,2018,2020 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/internal/aes.h> |
8 | | |
9 | | #include <botan/internal/bit_ops.h> |
10 | | #include <botan/internal/cpuid.h> |
11 | | #include <botan/internal/ct_utils.h> |
12 | | #include <botan/internal/loadstor.h> |
13 | | #include <botan/internal/rotate.h> |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | #if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI) |
18 | | #define BOTAN_HAS_HW_AES_SUPPORT |
19 | | #endif |
20 | | |
21 | | /* |
22 | | * One of three AES implementation strategies are used to get a constant time |
23 | | * implementation which is immune to common cache/timing based side channels: |
24 | | * |
25 | | * - If AES hardware support is available (AES-NI, POWER8, Aarch64) use that |
26 | | * |
27 | | * - If 128-bit SIMD with byte shuffles are available (SSSE3, NEON, or Altivec), |
28 | | * use the vperm technique published by Mike Hamburg at CHES 2009. |
29 | | * |
30 | | * - If no hardware or SIMD support, fall back to a constant time bitsliced |
31 | | * implementation. This uses 32-bit words resulting in 2 blocks being processed |
32 | | * in parallel. Moving to 4 blocks (with 64-bit words) would approximately |
33 | | * double performance on 64-bit CPUs. Likewise moving to 128 bit SIMD would |
34 | | * again approximately double performance vs 64-bit. However the assumption is |
35 | | * that most 64-bit CPUs either have hardware AES or SIMD shuffle support and |
36 | | * that the majority of users falling back to this code will be 32-bit cores. |
37 | | * If this assumption proves to be unsound, the bitsliced code can easily be |
38 | | * extended to operate on either 32 or 64 bit words depending on the native |
39 | | * wordsize of the target processor. |
40 | | * |
41 | | * Useful references |
42 | | * |
43 | | * - "Accelerating AES with Vector Permute Instructions" Mike Hamburg |
44 | | * https://www.shiftleft.org/papers/vector_aes/vector_aes.pdf |
45 | | * |
46 | | * - "Faster and Timing-Attack Resistant AES-GCM" Käsper and Schwabe |
47 | | * https://eprint.iacr.org/2009/129.pdf |
48 | | * |
49 | | * - "A new combinational logic minimization technique with applications to cryptology." |
50 | | * Boyar and Peralta https://eprint.iacr.org/2009/191.pdf |
51 | | * |
52 | | * - "A depth-16 circuit for the AES S-box" Boyar and Peralta |
53 | | * https://eprint.iacr.org/2011/332.pdf |
54 | | * |
55 | | * - "A Very Compact S-box for AES" Canright |
56 | | * https://www.iacr.org/archive/ches2005/032.pdf |
57 | | * https://core.ac.uk/download/pdf/36694529.pdf (extended) |
58 | | */ |
59 | | |
60 | | namespace { |
61 | | |
62 | | /* |
63 | | This is an AES sbox circuit which can execute in bitsliced mode up to 32x in |
64 | | parallel. |
65 | | |
66 | | The circuit is from the "Circuit Minimization Team" group |
67 | | http://www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html |
68 | | http://www.cs.yale.edu/homes/peralta/CircuitStuff/SLP_AES_113.txt |
69 | | |
70 | | This circuit has size 113 and depth 27. In software it is much faster than |
71 | | circuits which are considered faster for hardware purposes (where circuit depth |
72 | | is the critical constraint), because unlike in hardware, on common CPUs we can |
73 | | only execute - at best - 3 or 4 logic operations per cycle. So a smaller circuit |
74 | | is superior. On an x86-64 machine this circuit is about 15% faster than the |
75 | | circuit of size 128 and depth 16 given in "A depth-16 circuit for the AES S-box". |
76 | | |
77 | | Another circuit for AES Sbox of size 102 and depth 24 is describted in "New |
78 | | Circuit Minimization Techniques for Smaller and Faster AES SBoxes" |
79 | | [https://eprint.iacr.org/2019/802] however it relies on "non-standard" gates |
80 | | like MUX, NOR, NAND, etc and so in practice in bitsliced software, its size is |
81 | | actually a bit larger than this circuit, as few CPUs have such instructions and |
82 | | otherwise they must be emulated using a sequence of available bit operations. |
83 | | */ |
84 | 0 | void AES_SBOX(uint32_t V[8]) { |
85 | 0 | const uint32_t U0 = V[0]; |
86 | 0 | const uint32_t U1 = V[1]; |
87 | 0 | const uint32_t U2 = V[2]; |
88 | 0 | const uint32_t U3 = V[3]; |
89 | 0 | const uint32_t U4 = V[4]; |
90 | 0 | const uint32_t U5 = V[5]; |
91 | 0 | const uint32_t U6 = V[6]; |
92 | 0 | const uint32_t U7 = V[7]; |
93 | |
|
94 | 0 | const uint32_t y14 = U3 ^ U5; |
95 | 0 | const uint32_t y13 = U0 ^ U6; |
96 | 0 | const uint32_t y9 = U0 ^ U3; |
97 | 0 | const uint32_t y8 = U0 ^ U5; |
98 | 0 | const uint32_t t0 = U1 ^ U2; |
99 | 0 | const uint32_t y1 = t0 ^ U7; |
100 | 0 | const uint32_t y4 = y1 ^ U3; |
101 | 0 | const uint32_t y12 = y13 ^ y14; |
102 | 0 | const uint32_t y2 = y1 ^ U0; |
103 | 0 | const uint32_t y5 = y1 ^ U6; |
104 | 0 | const uint32_t y3 = y5 ^ y8; |
105 | 0 | const uint32_t t1 = U4 ^ y12; |
106 | 0 | const uint32_t y15 = t1 ^ U5; |
107 | 0 | const uint32_t y20 = t1 ^ U1; |
108 | 0 | const uint32_t y6 = y15 ^ U7; |
109 | 0 | const uint32_t y10 = y15 ^ t0; |
110 | 0 | const uint32_t y11 = y20 ^ y9; |
111 | 0 | const uint32_t y7 = U7 ^ y11; |
112 | 0 | const uint32_t y17 = y10 ^ y11; |
113 | 0 | const uint32_t y19 = y10 ^ y8; |
114 | 0 | const uint32_t y16 = t0 ^ y11; |
115 | 0 | const uint32_t y21 = y13 ^ y16; |
116 | 0 | const uint32_t y18 = U0 ^ y16; |
117 | 0 | const uint32_t t2 = y12 & y15; |
118 | 0 | const uint32_t t3 = y3 & y6; |
119 | 0 | const uint32_t t4 = t3 ^ t2; |
120 | 0 | const uint32_t t5 = y4 & U7; |
121 | 0 | const uint32_t t6 = t5 ^ t2; |
122 | 0 | const uint32_t t7 = y13 & y16; |
123 | 0 | const uint32_t t8 = y5 & y1; |
124 | 0 | const uint32_t t9 = t8 ^ t7; |
125 | 0 | const uint32_t t10 = y2 & y7; |
126 | 0 | const uint32_t t11 = t10 ^ t7; |
127 | 0 | const uint32_t t12 = y9 & y11; |
128 | 0 | const uint32_t t13 = y14 & y17; |
129 | 0 | const uint32_t t14 = t13 ^ t12; |
130 | 0 | const uint32_t t15 = y8 & y10; |
131 | 0 | const uint32_t t16 = t15 ^ t12; |
132 | 0 | const uint32_t t17 = t4 ^ y20; |
133 | 0 | const uint32_t t18 = t6 ^ t16; |
134 | 0 | const uint32_t t19 = t9 ^ t14; |
135 | 0 | const uint32_t t20 = t11 ^ t16; |
136 | 0 | const uint32_t t21 = t17 ^ t14; |
137 | 0 | const uint32_t t22 = t18 ^ y19; |
138 | 0 | const uint32_t t23 = t19 ^ y21; |
139 | 0 | const uint32_t t24 = t20 ^ y18; |
140 | 0 | const uint32_t t25 = t21 ^ t22; |
141 | 0 | const uint32_t t26 = t21 & t23; |
142 | 0 | const uint32_t t27 = t24 ^ t26; |
143 | 0 | const uint32_t t28 = t25 & t27; |
144 | 0 | const uint32_t t29 = t28 ^ t22; |
145 | 0 | const uint32_t t30 = t23 ^ t24; |
146 | 0 | const uint32_t t31 = t22 ^ t26; |
147 | 0 | const uint32_t t32 = t31 & t30; |
148 | 0 | const uint32_t t33 = t32 ^ t24; |
149 | 0 | const uint32_t t34 = t23 ^ t33; |
150 | 0 | const uint32_t t35 = t27 ^ t33; |
151 | 0 | const uint32_t t36 = t24 & t35; |
152 | 0 | const uint32_t t37 = t36 ^ t34; |
153 | 0 | const uint32_t t38 = t27 ^ t36; |
154 | 0 | const uint32_t t39 = t29 & t38; |
155 | 0 | const uint32_t t40 = t25 ^ t39; |
156 | 0 | const uint32_t t41 = t40 ^ t37; |
157 | 0 | const uint32_t t42 = t29 ^ t33; |
158 | 0 | const uint32_t t43 = t29 ^ t40; |
159 | 0 | const uint32_t t44 = t33 ^ t37; |
160 | 0 | const uint32_t t45 = t42 ^ t41; |
161 | 0 | const uint32_t z0 = t44 & y15; |
162 | 0 | const uint32_t z1 = t37 & y6; |
163 | 0 | const uint32_t z2 = t33 & U7; |
164 | 0 | const uint32_t z3 = t43 & y16; |
165 | 0 | const uint32_t z4 = t40 & y1; |
166 | 0 | const uint32_t z5 = t29 & y7; |
167 | 0 | const uint32_t z6 = t42 & y11; |
168 | 0 | const uint32_t z7 = t45 & y17; |
169 | 0 | const uint32_t z8 = t41 & y10; |
170 | 0 | const uint32_t z9 = t44 & y12; |
171 | 0 | const uint32_t z10 = t37 & y3; |
172 | 0 | const uint32_t z11 = t33 & y4; |
173 | 0 | const uint32_t z12 = t43 & y13; |
174 | 0 | const uint32_t z13 = t40 & y5; |
175 | 0 | const uint32_t z14 = t29 & y2; |
176 | 0 | const uint32_t z15 = t42 & y9; |
177 | 0 | const uint32_t z16 = t45 & y14; |
178 | 0 | const uint32_t z17 = t41 & y8; |
179 | 0 | const uint32_t tc1 = z15 ^ z16; |
180 | 0 | const uint32_t tc2 = z10 ^ tc1; |
181 | 0 | const uint32_t tc3 = z9 ^ tc2; |
182 | 0 | const uint32_t tc4 = z0 ^ z2; |
183 | 0 | const uint32_t tc5 = z1 ^ z0; |
184 | 0 | const uint32_t tc6 = z3 ^ z4; |
185 | 0 | const uint32_t tc7 = z12 ^ tc4; |
186 | 0 | const uint32_t tc8 = z7 ^ tc6; |
187 | 0 | const uint32_t tc9 = z8 ^ tc7; |
188 | 0 | const uint32_t tc10 = tc8 ^ tc9; |
189 | 0 | const uint32_t tc11 = tc6 ^ tc5; |
190 | 0 | const uint32_t tc12 = z3 ^ z5; |
191 | 0 | const uint32_t tc13 = z13 ^ tc1; |
192 | 0 | const uint32_t tc14 = tc4 ^ tc12; |
193 | 0 | const uint32_t S3 = tc3 ^ tc11; |
194 | 0 | const uint32_t tc16 = z6 ^ tc8; |
195 | 0 | const uint32_t tc17 = z14 ^ tc10; |
196 | 0 | const uint32_t tc18 = ~tc13 ^ tc14; |
197 | 0 | const uint32_t S7 = z12 ^ tc18; |
198 | 0 | const uint32_t tc20 = z15 ^ tc16; |
199 | 0 | const uint32_t tc21 = tc2 ^ z11; |
200 | 0 | const uint32_t S0 = tc3 ^ tc16; |
201 | 0 | const uint32_t S6 = tc10 ^ tc18; |
202 | 0 | const uint32_t S4 = tc14 ^ S3; |
203 | 0 | const uint32_t S1 = ~(S3 ^ tc16); |
204 | 0 | const uint32_t tc26 = tc17 ^ tc20; |
205 | 0 | const uint32_t S2 = ~(tc26 ^ z17); |
206 | 0 | const uint32_t S5 = tc21 ^ tc17; |
207 | |
|
208 | 0 | V[0] = S0; |
209 | 0 | V[1] = S1; |
210 | 0 | V[2] = S2; |
211 | 0 | V[3] = S3; |
212 | 0 | V[4] = S4; |
213 | 0 | V[5] = S5; |
214 | 0 | V[6] = S6; |
215 | 0 | V[7] = S7; |
216 | 0 | } |
217 | | |
218 | | /* |
219 | | A circuit for inverse AES Sbox of size 121 and depth 21 from |
220 | | http://www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html |
221 | | http://www.cs.yale.edu/homes/peralta/CircuitStuff/Sinv.txt |
222 | | */ |
223 | 0 | void AES_INV_SBOX(uint32_t V[8]) { |
224 | 0 | const uint32_t U0 = V[0]; |
225 | 0 | const uint32_t U1 = V[1]; |
226 | 0 | const uint32_t U2 = V[2]; |
227 | 0 | const uint32_t U3 = V[3]; |
228 | 0 | const uint32_t U4 = V[4]; |
229 | 0 | const uint32_t U5 = V[5]; |
230 | 0 | const uint32_t U6 = V[6]; |
231 | 0 | const uint32_t U7 = V[7]; |
232 | |
|
233 | 0 | const uint32_t Y0 = U0 ^ U3; |
234 | 0 | const uint32_t Y2 = ~(U1 ^ U3); |
235 | 0 | const uint32_t Y4 = U0 ^ Y2; |
236 | 0 | const uint32_t RTL0 = U6 ^ U7; |
237 | 0 | const uint32_t Y1 = Y2 ^ RTL0; |
238 | 0 | const uint32_t Y7 = ~(U2 ^ Y1); |
239 | 0 | const uint32_t RTL1 = U3 ^ U4; |
240 | 0 | const uint32_t Y6 = ~(U7 ^ RTL1); |
241 | 0 | const uint32_t Y3 = Y1 ^ RTL1; |
242 | 0 | const uint32_t RTL2 = ~(U0 ^ U2); |
243 | 0 | const uint32_t Y5 = U5 ^ RTL2; |
244 | 0 | const uint32_t sa1 = Y0 ^ Y2; |
245 | 0 | const uint32_t sa0 = Y1 ^ Y3; |
246 | 0 | const uint32_t sb1 = Y4 ^ Y6; |
247 | 0 | const uint32_t sb0 = Y5 ^ Y7; |
248 | 0 | const uint32_t ah = Y0 ^ Y1; |
249 | 0 | const uint32_t al = Y2 ^ Y3; |
250 | 0 | const uint32_t aa = sa0 ^ sa1; |
251 | 0 | const uint32_t bh = Y4 ^ Y5; |
252 | 0 | const uint32_t bl = Y6 ^ Y7; |
253 | 0 | const uint32_t bb = sb0 ^ sb1; |
254 | 0 | const uint32_t ab20 = sa0 ^ sb0; |
255 | 0 | const uint32_t ab22 = al ^ bl; |
256 | 0 | const uint32_t ab23 = Y3 ^ Y7; |
257 | 0 | const uint32_t ab21 = sa1 ^ sb1; |
258 | 0 | const uint32_t abcd1 = ah & bh; |
259 | 0 | const uint32_t rr1 = Y0 & Y4; |
260 | 0 | const uint32_t ph11 = ab20 ^ abcd1; |
261 | 0 | const uint32_t t01 = Y1 & Y5; |
262 | 0 | const uint32_t ph01 = t01 ^ abcd1; |
263 | 0 | const uint32_t abcd2 = al & bl; |
264 | 0 | const uint32_t r1 = Y2 & Y6; |
265 | 0 | const uint32_t pl11 = ab22 ^ abcd2; |
266 | 0 | const uint32_t r2 = Y3 & Y7; |
267 | 0 | const uint32_t pl01 = r2 ^ abcd2; |
268 | 0 | const uint32_t r3 = sa0 & sb0; |
269 | 0 | const uint32_t vr1 = aa & bb; |
270 | 0 | const uint32_t pr1 = vr1 ^ r3; |
271 | 0 | const uint32_t wr1 = sa1 & sb1; |
272 | 0 | const uint32_t qr1 = wr1 ^ r3; |
273 | 0 | const uint32_t ab0 = ph11 ^ rr1; |
274 | 0 | const uint32_t ab1 = ph01 ^ ab21; |
275 | 0 | const uint32_t ab2 = pl11 ^ r1; |
276 | 0 | const uint32_t ab3 = pl01 ^ qr1; |
277 | 0 | const uint32_t cp1 = ab0 ^ pr1; |
278 | 0 | const uint32_t cp2 = ab1 ^ qr1; |
279 | 0 | const uint32_t cp3 = ab2 ^ pr1; |
280 | 0 | const uint32_t cp4 = ab3 ^ ab23; |
281 | 0 | const uint32_t tinv1 = cp3 ^ cp4; |
282 | 0 | const uint32_t tinv2 = cp3 & cp1; |
283 | 0 | const uint32_t tinv3 = cp2 ^ tinv2; |
284 | 0 | const uint32_t tinv4 = cp1 ^ cp2; |
285 | 0 | const uint32_t tinv5 = cp4 ^ tinv2; |
286 | 0 | const uint32_t tinv6 = tinv5 & tinv4; |
287 | 0 | const uint32_t tinv7 = tinv3 & tinv1; |
288 | 0 | const uint32_t d2 = cp4 ^ tinv7; |
289 | 0 | const uint32_t d0 = cp2 ^ tinv6; |
290 | 0 | const uint32_t tinv8 = cp1 & cp4; |
291 | 0 | const uint32_t tinv9 = tinv4 & tinv8; |
292 | 0 | const uint32_t tinv10 = tinv4 ^ tinv2; |
293 | 0 | const uint32_t d1 = tinv9 ^ tinv10; |
294 | 0 | const uint32_t tinv11 = cp2 & cp3; |
295 | 0 | const uint32_t tinv12 = tinv1 & tinv11; |
296 | 0 | const uint32_t tinv13 = tinv1 ^ tinv2; |
297 | 0 | const uint32_t d3 = tinv12 ^ tinv13; |
298 | 0 | const uint32_t sd1 = d1 ^ d3; |
299 | 0 | const uint32_t sd0 = d0 ^ d2; |
300 | 0 | const uint32_t dl = d0 ^ d1; |
301 | 0 | const uint32_t dh = d2 ^ d3; |
302 | 0 | const uint32_t dd = sd0 ^ sd1; |
303 | 0 | const uint32_t abcd3 = dh & bh; |
304 | 0 | const uint32_t rr2 = d3 & Y4; |
305 | 0 | const uint32_t t02 = d2 & Y5; |
306 | 0 | const uint32_t abcd4 = dl & bl; |
307 | 0 | const uint32_t r4 = d1 & Y6; |
308 | 0 | const uint32_t r5 = d0 & Y7; |
309 | 0 | const uint32_t r6 = sd0 & sb0; |
310 | 0 | const uint32_t vr2 = dd & bb; |
311 | 0 | const uint32_t wr2 = sd1 & sb1; |
312 | 0 | const uint32_t abcd5 = dh & ah; |
313 | 0 | const uint32_t r7 = d3 & Y0; |
314 | 0 | const uint32_t r8 = d2 & Y1; |
315 | 0 | const uint32_t abcd6 = dl & al; |
316 | 0 | const uint32_t r9 = d1 & Y2; |
317 | 0 | const uint32_t r10 = d0 & Y3; |
318 | 0 | const uint32_t r11 = sd0 & sa0; |
319 | 0 | const uint32_t vr3 = dd & aa; |
320 | 0 | const uint32_t wr3 = sd1 & sa1; |
321 | 0 | const uint32_t ph12 = rr2 ^ abcd3; |
322 | 0 | const uint32_t ph02 = t02 ^ abcd3; |
323 | 0 | const uint32_t pl12 = r4 ^ abcd4; |
324 | 0 | const uint32_t pl02 = r5 ^ abcd4; |
325 | 0 | const uint32_t pr2 = vr2 ^ r6; |
326 | 0 | const uint32_t qr2 = wr2 ^ r6; |
327 | 0 | const uint32_t p0 = ph12 ^ pr2; |
328 | 0 | const uint32_t p1 = ph02 ^ qr2; |
329 | 0 | const uint32_t p2 = pl12 ^ pr2; |
330 | 0 | const uint32_t p3 = pl02 ^ qr2; |
331 | 0 | const uint32_t ph13 = r7 ^ abcd5; |
332 | 0 | const uint32_t ph03 = r8 ^ abcd5; |
333 | 0 | const uint32_t pl13 = r9 ^ abcd6; |
334 | 0 | const uint32_t pl03 = r10 ^ abcd6; |
335 | 0 | const uint32_t pr3 = vr3 ^ r11; |
336 | 0 | const uint32_t qr3 = wr3 ^ r11; |
337 | 0 | const uint32_t p4 = ph13 ^ pr3; |
338 | 0 | const uint32_t S7 = ph03 ^ qr3; |
339 | 0 | const uint32_t p6 = pl13 ^ pr3; |
340 | 0 | const uint32_t p7 = pl03 ^ qr3; |
341 | 0 | const uint32_t S3 = p1 ^ p6; |
342 | 0 | const uint32_t S6 = p2 ^ p6; |
343 | 0 | const uint32_t S0 = p3 ^ p6; |
344 | 0 | const uint32_t X11 = p0 ^ p2; |
345 | 0 | const uint32_t S5 = S0 ^ X11; |
346 | 0 | const uint32_t X13 = p4 ^ p7; |
347 | 0 | const uint32_t X14 = X11 ^ X13; |
348 | 0 | const uint32_t S1 = S3 ^ X14; |
349 | 0 | const uint32_t X16 = p1 ^ S7; |
350 | 0 | const uint32_t S2 = X14 ^ X16; |
351 | 0 | const uint32_t X18 = p0 ^ p4; |
352 | 0 | const uint32_t X19 = S5 ^ X16; |
353 | 0 | const uint32_t S4 = X18 ^ X19; |
354 | |
|
355 | 0 | V[0] = S0; |
356 | 0 | V[1] = S1; |
357 | 0 | V[2] = S2; |
358 | 0 | V[3] = S3; |
359 | 0 | V[4] = S4; |
360 | 0 | V[5] = S5; |
361 | 0 | V[6] = S6; |
362 | 0 | V[7] = S7; |
363 | 0 | } |
364 | | |
365 | 0 | inline void bit_transpose(uint32_t B[8]) { |
366 | 0 | swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1); |
367 | 0 | swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1); |
368 | 0 | swap_bits<uint32_t>(B[5], B[4], 0x55555555, 1); |
369 | 0 | swap_bits<uint32_t>(B[7], B[6], 0x55555555, 1); |
370 | |
|
371 | 0 | swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2); |
372 | 0 | swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2); |
373 | 0 | swap_bits<uint32_t>(B[6], B[4], 0x33333333, 2); |
374 | 0 | swap_bits<uint32_t>(B[7], B[5], 0x33333333, 2); |
375 | |
|
376 | 0 | swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4); |
377 | 0 | swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4); |
378 | 0 | swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4); |
379 | 0 | swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4); |
380 | 0 | } |
381 | | |
382 | 0 | inline void ks_expand(uint32_t B[8], const uint32_t K[], size_t r) { |
383 | | /* |
384 | | This is bit_transpose of K[r..r+4] || K[r..r+4], we can save some computation |
385 | | due to knowing the first and second halves are the same data. |
386 | | */ |
387 | 0 | for(size_t i = 0; i != 4; ++i) { |
388 | 0 | B[i] = K[r + i]; |
389 | 0 | } |
390 | |
|
391 | 0 | swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1); |
392 | 0 | swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1); |
393 | |
|
394 | 0 | swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2); |
395 | 0 | swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2); |
396 | |
|
397 | 0 | B[4] = B[0]; |
398 | 0 | B[5] = B[1]; |
399 | 0 | B[6] = B[2]; |
400 | 0 | B[7] = B[3]; |
401 | |
|
402 | 0 | swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4); |
403 | 0 | swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4); |
404 | 0 | swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4); |
405 | 0 | swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4); |
406 | 0 | } |
407 | | |
408 | 0 | inline void shift_rows(uint32_t B[8]) { |
409 | | // 3 0 1 2 7 4 5 6 10 11 8 9 14 15 12 13 17 18 19 16 21 22 23 20 24 25 26 27 28 29 30 31 |
410 | 0 | #if defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) |
411 | 0 | for(size_t i = 0; i != 8; i += 2) { |
412 | 0 | uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i + 1]; |
413 | 0 | x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2); |
414 | 0 | x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1); |
415 | 0 | B[i] = static_cast<uint32_t>(x >> 32); |
416 | 0 | B[i + 1] = static_cast<uint32_t>(x); |
417 | 0 | } |
418 | | #else |
419 | | for(size_t i = 0; i != 8; ++i) { |
420 | | uint32_t x = B[i]; |
421 | | x = bit_permute_step<uint32_t>(x, 0x00223311, 2); |
422 | | x = bit_permute_step<uint32_t>(x, 0x00550055, 1); |
423 | | B[i] = x; |
424 | | } |
425 | | #endif |
426 | 0 | } |
427 | | |
428 | 0 | inline void inv_shift_rows(uint32_t B[8]) { |
429 | | // Inverse of shift_rows, just inverting the steps |
430 | |
|
431 | 0 | #if defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) |
432 | 0 | for(size_t i = 0; i != 8; i += 2) { |
433 | 0 | uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i + 1]; |
434 | 0 | x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1); |
435 | 0 | x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2); |
436 | 0 | B[i] = static_cast<uint32_t>(x >> 32); |
437 | 0 | B[i + 1] = static_cast<uint32_t>(x); |
438 | 0 | } |
439 | | #else |
440 | | for(size_t i = 0; i != 8; ++i) { |
441 | | uint32_t x = B[i]; |
442 | | x = bit_permute_step<uint32_t>(x, 0x00550055, 1); |
443 | | x = bit_permute_step<uint32_t>(x, 0x00223311, 2); |
444 | | B[i] = x; |
445 | | } |
446 | | #endif |
447 | 0 | } |
448 | | |
449 | 0 | inline void mix_columns(uint32_t B[8]) { |
450 | | // carry high bits in B[0] to positions in 0x1b == 0b11011 |
451 | 0 | const uint32_t X2[8] = { |
452 | 0 | B[1], |
453 | 0 | B[2], |
454 | 0 | B[3], |
455 | 0 | B[4] ^ B[0], |
456 | 0 | B[5] ^ B[0], |
457 | 0 | B[6], |
458 | 0 | B[7] ^ B[0], |
459 | 0 | B[0], |
460 | 0 | }; |
461 | |
|
462 | 0 | for(size_t i = 0; i != 8; i++) { |
463 | 0 | const uint32_t X3 = B[i] ^ X2[i]; |
464 | 0 | B[i] = X2[i] ^ rotr<8>(B[i]) ^ rotr<16>(B[i]) ^ rotr<24>(X3); |
465 | 0 | } |
466 | 0 | } |
467 | | |
468 | 0 | void inv_mix_columns(uint32_t B[8]) { |
469 | | /* |
470 | | OpenSSL's bsaes implementation credits Jussi Kivilinna with the lovely |
471 | | matrix decomposition |
472 | | |
473 | | | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | |
474 | | | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | |
475 | | | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | |
476 | | | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | |
477 | | |
478 | | Notice the first component is simply the MixColumns matrix. So we can |
479 | | multiply first by (05,00,04,00) then perform MixColumns to get the equivalent |
480 | | of InvMixColumn. |
481 | | */ |
482 | 0 | const uint32_t X4[8] = { |
483 | 0 | B[2], |
484 | 0 | B[3], |
485 | 0 | B[4] ^ B[0], |
486 | 0 | B[5] ^ B[0] ^ B[1], |
487 | 0 | B[6] ^ B[1], |
488 | 0 | B[7] ^ B[0], |
489 | 0 | B[0] ^ B[1], |
490 | 0 | B[1], |
491 | 0 | }; |
492 | |
|
493 | 0 | for(size_t i = 0; i != 8; i++) { |
494 | 0 | const uint32_t X5 = X4[i] ^ B[i]; |
495 | 0 | B[i] = X5 ^ rotr<16>(X4[i]); |
496 | 0 | } |
497 | |
|
498 | 0 | mix_columns(B); |
499 | 0 | } |
500 | | |
501 | | /* |
502 | | * AES Encryption |
503 | | */ |
504 | 0 | void aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, const secure_vector<uint32_t>& EK) { |
505 | 0 | BOTAN_ASSERT(EK.size() == 44 || EK.size() == 52 || EK.size() == 60, "Key was set"); |
506 | |
|
507 | 0 | const size_t rounds = (EK.size() - 4) / 4; |
508 | |
|
509 | 0 | uint32_t KS[13 * 8] = {0}; // actual maximum is (rounds - 1) * 8 |
510 | 0 | for(size_t i = 0; i < rounds - 1; i += 1) { |
511 | 0 | ks_expand(&KS[8 * i], EK.data(), 4 * i + 4); |
512 | 0 | } |
513 | |
|
514 | 0 | const size_t BLOCK_SIZE = 16; |
515 | 0 | const size_t BITSLICED_BLOCKS = 8 * sizeof(uint32_t) / BLOCK_SIZE; |
516 | |
|
517 | 0 | while(blocks > 0) { |
518 | 0 | const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS); |
519 | |
|
520 | 0 | uint32_t B[8] = {0}; |
521 | |
|
522 | 0 | load_be(B, in, this_loop * 4); |
523 | |
|
524 | 0 | CT::poison(B, 8); |
525 | |
|
526 | 0 | for(size_t i = 0; i != 8; ++i) { |
527 | 0 | B[i] ^= EK[i % 4]; |
528 | 0 | } |
529 | |
|
530 | 0 | bit_transpose(B); |
531 | |
|
532 | 0 | for(size_t r = 0; r != rounds - 1; ++r) { |
533 | 0 | AES_SBOX(B); |
534 | 0 | shift_rows(B); |
535 | 0 | mix_columns(B); |
536 | |
|
537 | 0 | for(size_t i = 0; i != 8; ++i) { |
538 | 0 | B[i] ^= KS[8 * r + i]; |
539 | 0 | } |
540 | 0 | } |
541 | | |
542 | | // Final round: |
543 | 0 | AES_SBOX(B); |
544 | 0 | shift_rows(B); |
545 | 0 | bit_transpose(B); |
546 | |
|
547 | 0 | for(size_t i = 0; i != 8; ++i) { |
548 | 0 | B[i] ^= EK[4 * rounds + i % 4]; |
549 | 0 | } |
550 | |
|
551 | 0 | CT::unpoison(B, 8); |
552 | |
|
553 | 0 | copy_out_be(out, this_loop * 4 * sizeof(uint32_t), B); |
554 | |
|
555 | 0 | in += this_loop * BLOCK_SIZE; |
556 | 0 | out += this_loop * BLOCK_SIZE; |
557 | 0 | blocks -= this_loop; |
558 | 0 | } |
559 | 0 | } |
560 | | |
561 | | /* |
562 | | * AES Decryption |
563 | | */ |
564 | 0 | void aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, const secure_vector<uint32_t>& DK) { |
565 | 0 | BOTAN_ASSERT(DK.size() == 44 || DK.size() == 52 || DK.size() == 60, "Key was set"); |
566 | |
|
567 | 0 | const size_t rounds = (DK.size() - 4) / 4; |
568 | |
|
569 | 0 | uint32_t KS[13 * 8] = {0}; // actual maximum is (rounds - 1) * 8 |
570 | 0 | for(size_t i = 0; i < rounds - 1; i += 1) { |
571 | 0 | ks_expand(&KS[8 * i], DK.data(), 4 * i + 4); |
572 | 0 | } |
573 | |
|
574 | 0 | const size_t BLOCK_SIZE = 16; |
575 | 0 | const size_t BITSLICED_BLOCKS = 8 * sizeof(uint32_t) / BLOCK_SIZE; |
576 | |
|
577 | 0 | while(blocks > 0) { |
578 | 0 | const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS); |
579 | |
|
580 | 0 | uint32_t B[8] = {0}; |
581 | |
|
582 | 0 | CT::poison(B, 8); |
583 | |
|
584 | 0 | load_be(B, in, this_loop * 4); |
585 | |
|
586 | 0 | for(size_t i = 0; i != 8; ++i) { |
587 | 0 | B[i] ^= DK[i % 4]; |
588 | 0 | } |
589 | |
|
590 | 0 | bit_transpose(B); |
591 | |
|
592 | 0 | for(size_t r = 0; r != rounds - 1; ++r) { |
593 | 0 | AES_INV_SBOX(B); |
594 | 0 | inv_shift_rows(B); |
595 | 0 | inv_mix_columns(B); |
596 | |
|
597 | 0 | for(size_t i = 0; i != 8; ++i) { |
598 | 0 | B[i] ^= KS[8 * r + i]; |
599 | 0 | } |
600 | 0 | } |
601 | | |
602 | | // Final round: |
603 | 0 | AES_INV_SBOX(B); |
604 | 0 | inv_shift_rows(B); |
605 | 0 | bit_transpose(B); |
606 | |
|
607 | 0 | for(size_t i = 0; i != 8; ++i) { |
608 | 0 | B[i] ^= DK[4 * rounds + i % 4]; |
609 | 0 | } |
610 | |
|
611 | 0 | CT::unpoison(B, 8); |
612 | |
|
613 | 0 | copy_out_be(out, this_loop * 4 * sizeof(uint32_t), B); |
614 | |
|
615 | 0 | in += this_loop * BLOCK_SIZE; |
616 | 0 | out += this_loop * BLOCK_SIZE; |
617 | 0 | blocks -= this_loop; |
618 | 0 | } |
619 | 0 | } |
620 | | |
621 | 0 | inline uint32_t xtime32(uint32_t s) { |
622 | 0 | const uint32_t lo_bit = 0x01010101; |
623 | 0 | const uint32_t mask = 0x7F7F7F7F; |
624 | 0 | const uint32_t poly = 0x1B; |
625 | |
|
626 | 0 | return ((s & mask) << 1) ^ (((s >> 7) & lo_bit) * poly); |
627 | 0 | } |
628 | | |
629 | 0 | inline uint32_t InvMixColumn(uint32_t s1) { |
630 | 0 | const uint32_t s2 = xtime32(s1); |
631 | 0 | const uint32_t s4 = xtime32(s2); |
632 | 0 | const uint32_t s8 = xtime32(s4); |
633 | 0 | const uint32_t s9 = s8 ^ s1; |
634 | 0 | const uint32_t s11 = s9 ^ s2; |
635 | 0 | const uint32_t s13 = s9 ^ s4; |
636 | 0 | const uint32_t s14 = s8 ^ s4 ^ s2; |
637 | |
|
638 | 0 | return s14 ^ rotr<8>(s9) ^ rotr<16>(s13) ^ rotr<24>(s11); |
639 | 0 | } |
640 | | |
641 | 0 | void InvMixColumn_x4(uint32_t x[4]) { |
642 | 0 | x[0] = InvMixColumn(x[0]); |
643 | 0 | x[1] = InvMixColumn(x[1]); |
644 | 0 | x[2] = InvMixColumn(x[2]); |
645 | 0 | x[3] = InvMixColumn(x[3]); |
646 | 0 | } |
647 | | |
648 | 0 | uint32_t SE_word(uint32_t x) { |
649 | 0 | uint32_t I[8] = {0}; |
650 | |
|
651 | 0 | for(size_t i = 0; i != 8; ++i) { |
652 | 0 | I[i] = (x >> (7 - i)) & 0x01010101; |
653 | 0 | } |
654 | |
|
655 | 0 | AES_SBOX(I); |
656 | |
|
657 | 0 | x = 0; |
658 | |
|
659 | 0 | for(size_t i = 0; i != 8; ++i) { |
660 | 0 | x |= ((I[i] & 0x01010101) << (7 - i)); |
661 | 0 | } |
662 | |
|
663 | 0 | return x; |
664 | 0 | } |
665 | | |
666 | | void aes_key_schedule(const uint8_t key[], |
667 | | size_t length, |
668 | | secure_vector<uint32_t>& EK, |
669 | | secure_vector<uint32_t>& DK, |
670 | 0 | bool bswap_keys = false) { |
671 | 0 | static const uint32_t RC[10] = {0x01000000, |
672 | 0 | 0x02000000, |
673 | 0 | 0x04000000, |
674 | 0 | 0x08000000, |
675 | 0 | 0x10000000, |
676 | 0 | 0x20000000, |
677 | 0 | 0x40000000, |
678 | 0 | 0x80000000, |
679 | 0 | 0x1B000000, |
680 | 0 | 0x36000000}; |
681 | |
|
682 | 0 | const size_t X = length / 4; |
683 | | |
684 | | // Can't happen, but make static analyzers happy |
685 | 0 | BOTAN_ASSERT_NOMSG(X == 4 || X == 6 || X == 8); |
686 | |
|
687 | 0 | const size_t rounds = (length / 4) + 6; |
688 | | |
689 | | // Help the optimizer |
690 | 0 | BOTAN_ASSERT_NOMSG(rounds == 10 || rounds == 12 || rounds == 14); |
691 | |
|
692 | 0 | CT::poison(key, length); |
693 | |
|
694 | 0 | EK.resize(length + 28); |
695 | 0 | DK.resize(length + 28); |
696 | |
|
697 | 0 | for(size_t i = 0; i != X; ++i) { |
698 | 0 | EK[i] = load_be<uint32_t>(key, i); |
699 | 0 | } |
700 | |
|
701 | 0 | for(size_t i = X; i < 4 * (rounds + 1); i += X) { |
702 | 0 | EK[i] = EK[i - X] ^ RC[(i - X) / X] ^ rotl<8>(SE_word(EK[i - 1])); |
703 | |
|
704 | 0 | for(size_t j = 1; j != X && (i + j) < EK.size(); ++j) { |
705 | 0 | EK[i + j] = EK[i + j - X]; |
706 | |
|
707 | 0 | if(X == 8 && j == 4) { |
708 | 0 | EK[i + j] ^= SE_word(EK[i + j - 1]); |
709 | 0 | } else { |
710 | 0 | EK[i + j] ^= EK[i + j - 1]; |
711 | 0 | } |
712 | 0 | } |
713 | 0 | } |
714 | |
|
715 | 0 | for(size_t i = 0; i != 4 * (rounds + 1); i += 4) { |
716 | 0 | DK[i] = EK[4 * rounds - i]; |
717 | 0 | DK[i + 1] = EK[4 * rounds - i + 1]; |
718 | 0 | DK[i + 2] = EK[4 * rounds - i + 2]; |
719 | 0 | DK[i + 3] = EK[4 * rounds - i + 3]; |
720 | 0 | } |
721 | |
|
722 | 0 | for(size_t i = 4; i != 4 * rounds; i += 4) { |
723 | 0 | InvMixColumn_x4(&DK[i]); |
724 | 0 | } |
725 | |
|
726 | 0 | if(bswap_keys) { |
727 | | // HW AES on little endian needs the subkeys to be byte reversed |
728 | 0 | for(size_t i = 0; i != EK.size(); ++i) { |
729 | 0 | EK[i] = reverse_bytes(EK[i]); |
730 | 0 | } |
731 | 0 | for(size_t i = 0; i != DK.size(); ++i) { |
732 | 0 | DK[i] = reverse_bytes(DK[i]); |
733 | 0 | } |
734 | 0 | } |
735 | |
|
736 | 0 | CT::unpoison(EK.data(), EK.size()); |
737 | 0 | CT::unpoison(DK.data(), DK.size()); |
738 | 0 | CT::unpoison(key, length); |
739 | 0 | } |
740 | | |
741 | 0 | size_t aes_parallelism() { |
742 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
743 | 0 | if(CPUID::has_hw_aes()) { |
744 | 0 | return 4; // pipelined |
745 | 0 | } |
746 | 0 | #endif |
747 | | |
748 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
749 | 0 | if(CPUID::has_vperm()) { |
750 | 0 | return 2; // pipelined |
751 | 0 | } |
752 | 0 | #endif |
753 | | |
754 | | // bitsliced: |
755 | 0 | return 2; |
756 | 0 | } |
757 | | |
758 | 0 | const char* aes_provider() { |
759 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
760 | 0 | if(CPUID::has_hw_aes()) { |
761 | 0 | return "cpu"; |
762 | 0 | } |
763 | 0 | #endif |
764 | | |
765 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
766 | 0 | if(CPUID::has_vperm()) { |
767 | 0 | return "vperm"; |
768 | 0 | } |
769 | 0 | #endif |
770 | | |
771 | 0 | return "base"; |
772 | 0 | } |
773 | | |
774 | | } // namespace |
775 | | |
776 | 0 | std::string AES_128::provider() const { return aes_provider(); } |
777 | | |
778 | 0 | std::string AES_192::provider() const { return aes_provider(); } |
779 | | |
780 | 0 | std::string AES_256::provider() const { return aes_provider(); } |
781 | | |
782 | 0 | size_t AES_128::parallelism() const { return aes_parallelism(); } |
783 | | |
784 | 0 | size_t AES_192::parallelism() const { return aes_parallelism(); } |
785 | | |
786 | 0 | size_t AES_256::parallelism() const { return aes_parallelism(); } |
787 | | |
788 | 0 | bool AES_128::has_keying_material() const { return !m_EK.empty(); } |
789 | | |
790 | 0 | bool AES_192::has_keying_material() const { return !m_EK.empty(); } |
791 | | |
792 | 0 | bool AES_256::has_keying_material() const { return !m_EK.empty(); } |
793 | | |
794 | 0 | void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
795 | 0 | assert_key_material_set(); |
796 | |
|
797 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
798 | 0 | if(CPUID::has_hw_aes()) { |
799 | 0 | return hw_aes_encrypt_n(in, out, blocks); |
800 | 0 | } |
801 | 0 | #endif |
802 | | |
803 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
804 | 0 | if(CPUID::has_vperm()) { |
805 | 0 | return vperm_encrypt_n(in, out, blocks); |
806 | 0 | } |
807 | 0 | #endif |
808 | | |
809 | 0 | aes_encrypt_n(in, out, blocks, m_EK); |
810 | 0 | } |
811 | | |
812 | 0 | void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
813 | 0 | assert_key_material_set(); |
814 | |
|
815 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
816 | 0 | if(CPUID::has_hw_aes()) { |
817 | 0 | return hw_aes_decrypt_n(in, out, blocks); |
818 | 0 | } |
819 | 0 | #endif |
820 | | |
821 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
822 | 0 | if(CPUID::has_vperm()) { |
823 | 0 | return vperm_decrypt_n(in, out, blocks); |
824 | 0 | } |
825 | 0 | #endif |
826 | | |
827 | 0 | aes_decrypt_n(in, out, blocks, m_DK); |
828 | 0 | } |
829 | | |
830 | 0 | void AES_128::key_schedule(const uint8_t key[], size_t length) { |
831 | 0 | #if defined(BOTAN_HAS_AES_NI) |
832 | 0 | if(CPUID::has_aes_ni()) { |
833 | 0 | return aesni_key_schedule(key, length); |
834 | 0 | } |
835 | 0 | #endif |
836 | | |
837 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
838 | 0 | if(CPUID::has_hw_aes()) { |
839 | 0 | return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian()); |
840 | 0 | } |
841 | 0 | #endif |
842 | | |
843 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
844 | 0 | if(CPUID::has_vperm()) { |
845 | 0 | return vperm_key_schedule(key, length); |
846 | 0 | } |
847 | 0 | #endif |
848 | | |
849 | 0 | aes_key_schedule(key, length, m_EK, m_DK); |
850 | 0 | } |
851 | | |
852 | 0 | void AES_128::clear() { |
853 | 0 | zap(m_EK); |
854 | 0 | zap(m_DK); |
855 | 0 | } |
856 | | |
857 | 0 | void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
858 | 0 | assert_key_material_set(); |
859 | |
|
860 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
861 | 0 | if(CPUID::has_hw_aes()) { |
862 | 0 | return hw_aes_encrypt_n(in, out, blocks); |
863 | 0 | } |
864 | 0 | #endif |
865 | | |
866 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
867 | 0 | if(CPUID::has_vperm()) { |
868 | 0 | return vperm_encrypt_n(in, out, blocks); |
869 | 0 | } |
870 | 0 | #endif |
871 | | |
872 | 0 | aes_encrypt_n(in, out, blocks, m_EK); |
873 | 0 | } |
874 | | |
875 | 0 | void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
876 | 0 | assert_key_material_set(); |
877 | |
|
878 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
879 | 0 | if(CPUID::has_hw_aes()) { |
880 | 0 | return hw_aes_decrypt_n(in, out, blocks); |
881 | 0 | } |
882 | 0 | #endif |
883 | | |
884 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
885 | 0 | if(CPUID::has_vperm()) { |
886 | 0 | return vperm_decrypt_n(in, out, blocks); |
887 | 0 | } |
888 | 0 | #endif |
889 | | |
890 | 0 | aes_decrypt_n(in, out, blocks, m_DK); |
891 | 0 | } |
892 | | |
893 | 0 | void AES_192::key_schedule(const uint8_t key[], size_t length) { |
894 | 0 | #if defined(BOTAN_HAS_AES_NI) |
895 | 0 | if(CPUID::has_aes_ni()) { |
896 | 0 | return aesni_key_schedule(key, length); |
897 | 0 | } |
898 | 0 | #endif |
899 | | |
900 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
901 | 0 | if(CPUID::has_hw_aes()) { |
902 | 0 | return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian()); |
903 | 0 | } |
904 | 0 | #endif |
905 | | |
906 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
907 | 0 | if(CPUID::has_vperm()) { |
908 | 0 | return vperm_key_schedule(key, length); |
909 | 0 | } |
910 | 0 | #endif |
911 | | |
912 | 0 | aes_key_schedule(key, length, m_EK, m_DK); |
913 | 0 | } |
914 | | |
915 | 0 | void AES_192::clear() { |
916 | 0 | zap(m_EK); |
917 | 0 | zap(m_DK); |
918 | 0 | } |
919 | | |
920 | 0 | void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
921 | 0 | assert_key_material_set(); |
922 | |
|
923 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
924 | 0 | if(CPUID::has_hw_aes()) { |
925 | 0 | return hw_aes_encrypt_n(in, out, blocks); |
926 | 0 | } |
927 | 0 | #endif |
928 | | |
929 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
930 | 0 | if(CPUID::has_vperm()) { |
931 | 0 | return vperm_encrypt_n(in, out, blocks); |
932 | 0 | } |
933 | 0 | #endif |
934 | | |
935 | 0 | aes_encrypt_n(in, out, blocks, m_EK); |
936 | 0 | } |
937 | | |
938 | 0 | void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { |
939 | 0 | assert_key_material_set(); |
940 | |
|
941 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
942 | 0 | if(CPUID::has_hw_aes()) { |
943 | 0 | return hw_aes_decrypt_n(in, out, blocks); |
944 | 0 | } |
945 | 0 | #endif |
946 | | |
947 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
948 | 0 | if(CPUID::has_vperm()) { |
949 | 0 | return vperm_decrypt_n(in, out, blocks); |
950 | 0 | } |
951 | 0 | #endif |
952 | | |
953 | 0 | aes_decrypt_n(in, out, blocks, m_DK); |
954 | 0 | } |
955 | | |
956 | 0 | void AES_256::key_schedule(const uint8_t key[], size_t length) { |
957 | 0 | #if defined(BOTAN_HAS_AES_NI) |
958 | 0 | if(CPUID::has_aes_ni()) { |
959 | 0 | return aesni_key_schedule(key, length); |
960 | 0 | } |
961 | 0 | #endif |
962 | | |
963 | 0 | #if defined(BOTAN_HAS_HW_AES_SUPPORT) |
964 | 0 | if(CPUID::has_hw_aes()) { |
965 | 0 | return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian()); |
966 | 0 | } |
967 | 0 | #endif |
968 | | |
969 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
970 | 0 | if(CPUID::has_vperm()) { |
971 | 0 | return vperm_key_schedule(key, length); |
972 | 0 | } |
973 | 0 | #endif |
974 | | |
975 | 0 | aes_key_schedule(key, length, m_EK, m_DK); |
976 | 0 | } |
977 | | |
978 | 0 | void AES_256::clear() { |
979 | 0 | zap(m_EK); |
980 | 0 | zap(m_DK); |
981 | 0 | } |
982 | | |
983 | | } // namespace Botan |