/src/botan/src/lib/block/aes/aes.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * AES |
3 | | * (C) 1999-2010,2015,2017,2018 Jack Lloyd |
4 | | * |
5 | | * Based on the public domain reference implementation by Paulo Baretto |
6 | | * |
7 | | * Botan is released under the Simplified BSD License (see license.txt) |
8 | | */ |
9 | | |
10 | | #include <botan/aes.h> |
11 | | #include <botan/loadstor.h> |
12 | | #include <botan/cpuid.h> |
13 | | #include <botan/rotate.h> |
14 | | #include <type_traits> |
15 | | |
16 | | /* |
17 | | * This implementation is based on table lookups which are known to be |
18 | | * vulnerable to timing and cache based side channel attacks. Some |
19 | | * countermeasures are used which may be helpful in some situations: |
20 | | * |
21 | | * - Only a single 256-word T-table is used, with rotations applied. |
22 | | * Most implementations use 4 (or sometimes 5) T-tables, which leaks |
23 | | * much more information via cache usage. |
24 | | * |
25 | | * - The TE and TD tables are computed at runtime to avoid flush+reload |
26 | | * attacks using clflush. As different processes will not share the |
27 | | * same underlying table data, an attacker can't manipulate another |
28 | | * processes cache lines via their shared reference to the library |
29 | | * read only segment. (However, prime+probe attacks are still possible.) |
30 | | * |
31 | | * - Each cache line of the lookup tables is accessed at the beginning |
32 | | * of each call to encrypt or decrypt. (See the Z variable below) |
33 | | * |
34 | | * If available SSSE3 or AES-NI are used instead of this version, as both |
35 | | * are faster and immune to side channel attacks. |
36 | | * |
37 | | * Some AES cache timing papers for reference: |
38 | | * |
39 | | * "Software mitigations to hedge AES against cache-based software side |
40 | | * channel vulnerabilities" https://eprint.iacr.org/2006/052.pdf |
41 | | * |
42 | | * "Cache Games - Bringing Access-Based Cache Attacks on AES to Practice" |
43 | | * http://www.ieee-security.org/TC/SP2011/PAPERS/2011/paper031.pdf |
44 | | * |
45 | | * "Cache-Collision Timing Attacks Against AES" Bonneau, Mironov |
46 | | * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.88.4753 |
47 | | */ |
48 | | |
49 | | namespace Botan { |
50 | | |
51 | | namespace { |
52 | | |
53 | | alignas(64) |
54 | | const uint8_t SE[256] = { |
55 | | 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, |
56 | | 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, |
57 | | 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26, |
58 | | 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, |
59 | | 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, |
60 | | 0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, |
61 | | 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED, |
62 | | 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, |
63 | | 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, |
64 | | 0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, |
65 | | 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC, |
66 | | 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, |
67 | | 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, |
68 | | 0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, |
69 | | 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D, |
70 | | 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, |
71 | | 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, |
72 | | 0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, |
73 | | 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11, |
74 | | 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, |
75 | | 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, |
76 | | 0xB0, 0x54, 0xBB, 0x16 }; |
77 | | |
78 | | alignas(64) |
79 | | const uint8_t SD[256] = { |
80 | | 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, |
81 | | 0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, |
82 | | 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32, |
83 | | 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, |
84 | | 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, |
85 | | 0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, |
86 | | 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50, |
87 | | 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, |
88 | | 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, |
89 | | 0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, |
90 | | 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41, |
91 | | 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, |
92 | | 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, |
93 | | 0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, |
94 | | 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B, |
95 | | 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, |
96 | | 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, |
97 | | 0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, |
98 | | 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D, |
99 | | 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, |
100 | | 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, |
101 | | 0x55, 0x21, 0x0C, 0x7D }; |
102 | | |
103 | 0 | inline constexpr uint8_t xtime(uint8_t s) { return static_cast<uint8_t>(s << 1) ^ ((s >> 7) * 0x1B); } |
104 | 0 | inline constexpr uint8_t xtime4(uint8_t s) { return xtime(xtime(s)); } |
105 | 0 | inline constexpr uint8_t xtime8(uint8_t s) { return xtime(xtime(xtime(s))); } |
106 | | |
107 | 0 | inline constexpr uint8_t xtime3(uint8_t s) { return xtime(s) ^ s; } |
108 | 0 | inline constexpr uint8_t xtime9(uint8_t s) { return xtime8(s) ^ s; } |
109 | 0 | inline constexpr uint8_t xtime11(uint8_t s) { return xtime8(s) ^ xtime(s) ^ s; } |
110 | 0 | inline constexpr uint8_t xtime13(uint8_t s) { return xtime8(s) ^ xtime4(s) ^ s; } |
111 | 0 | inline constexpr uint8_t xtime14(uint8_t s) { return xtime8(s) ^ xtime4(s) ^ xtime(s); } |
112 | | |
113 | | inline uint32_t SE_word(uint32_t x) |
114 | 0 | { |
115 | 0 | return make_uint32(SE[get_byte(0, x)], |
116 | 0 | SE[get_byte(1, x)], |
117 | 0 | SE[get_byte(2, x)], |
118 | 0 | SE[get_byte(3, x)]); |
119 | 0 | } |
120 | | |
121 | | const uint32_t* AES_TE() |
122 | 0 | { |
123 | 0 | class TE_Table final |
124 | 0 | { |
125 | 0 | public: |
126 | 0 | TE_Table() |
127 | 0 | { |
128 | 0 | uint32_t* p = reinterpret_cast<uint32_t*>(&data); |
129 | 0 | for(size_t i = 0; i != 256; ++i) |
130 | 0 | { |
131 | 0 | const uint8_t s = SE[i]; |
132 | 0 | p[i] = make_uint32(xtime(s), s, s, xtime3(s)); |
133 | 0 | } |
134 | 0 | } |
135 | 0 |
|
136 | 0 | const uint32_t* ptr() const |
137 | 0 | { |
138 | 0 | return reinterpret_cast<const uint32_t*>(&data); |
139 | 0 | } |
140 | 0 | private: |
141 | 0 | std::aligned_storage<256*sizeof(uint32_t), 64>::type data; |
142 | 0 | }; |
143 | 0 |
|
144 | 0 | static TE_Table table; |
145 | 0 | return table.ptr(); |
146 | 0 | } |
147 | | |
148 | | const uint32_t* AES_TD() |
149 | 0 | { |
150 | 0 | class TD_Table final |
151 | 0 | { |
152 | 0 | public: |
153 | 0 | TD_Table() |
154 | 0 | { |
155 | 0 | uint32_t* p = reinterpret_cast<uint32_t*>(&data); |
156 | 0 | for(size_t i = 0; i != 256; ++i) |
157 | 0 | { |
158 | 0 | const uint8_t s = SD[i]; |
159 | 0 | p[i] = make_uint32(xtime14(s), xtime9(s), xtime13(s), xtime11(s)); |
160 | 0 | } |
161 | 0 | } |
162 | 0 |
|
163 | 0 | const uint32_t* ptr() const |
164 | 0 | { |
165 | 0 | return reinterpret_cast<const uint32_t*>(&data); |
166 | 0 | } |
167 | 0 | private: |
168 | 0 | std::aligned_storage<256*sizeof(uint32_t), 64>::type data; |
169 | 0 | }; |
170 | 0 |
|
171 | 0 | static TD_Table table; |
172 | 0 | return table.ptr(); |
173 | 0 | } |
174 | | |
175 | | #define AES_T(T, K, V0, V1, V2, V3) \ |
176 | 0 | (K ^ T[get_byte(0, V0)] ^ \ |
177 | 0 | rotr< 8>(T[get_byte(1, V1)]) ^ \ |
178 | 0 | rotr<16>(T[get_byte(2, V2)]) ^ \ |
179 | 0 | rotr<24>(T[get_byte(3, V3)])) |
180 | | |
181 | | /* |
182 | | * AES Encryption |
183 | | */ |
184 | | void aes_encrypt_n(const uint8_t in[], uint8_t out[], |
185 | | size_t blocks, |
186 | | const secure_vector<uint32_t>& EK, |
187 | | const secure_vector<uint8_t>& ME) |
188 | 0 | { |
189 | 0 | BOTAN_ASSERT(EK.size() && ME.size() == 16, "Key was set"); |
190 | 0 |
|
191 | 0 | const size_t cache_line_size = CPUID::cache_line_size(); |
192 | 0 | const uint32_t* TE = AES_TE(); |
193 | 0 |
|
194 | 0 | // Hit every cache line of TE |
195 | 0 | volatile uint32_t Z = 0; |
196 | 0 | for(size_t i = 0; i < 256; i += cache_line_size / sizeof(uint32_t)) |
197 | 0 | { |
198 | 0 | Z |= TE[i]; |
199 | 0 | } |
200 | 0 | Z &= TE[82]; // this is zero, which hopefully the compiler cannot deduce |
201 | 0 |
|
202 | 0 | for(size_t i = 0; i < blocks; ++i) |
203 | 0 | { |
204 | 0 | uint32_t T0, T1, T2, T3; |
205 | 0 | load_be(in + 16*i, T0, T1, T2, T3); |
206 | 0 |
|
207 | 0 | T0 ^= EK[0]; |
208 | 0 | T1 ^= EK[1]; |
209 | 0 | T2 ^= EK[2]; |
210 | 0 | T3 ^= EK[3]; |
211 | 0 |
|
212 | 0 | T0 ^= Z; |
213 | 0 |
|
214 | 0 | uint32_t B0 = AES_T(TE, EK[4], T0, T1, T2, T3); |
215 | 0 | uint32_t B1 = AES_T(TE, EK[5], T1, T2, T3, T0); |
216 | 0 | uint32_t B2 = AES_T(TE, EK[6], T2, T3, T0, T1); |
217 | 0 | uint32_t B3 = AES_T(TE, EK[7], T3, T0, T1, T2); |
218 | 0 |
|
219 | 0 | for(size_t r = 2*4; r < EK.size(); r += 2*4) |
220 | 0 | { |
221 | 0 | T0 = AES_T(TE, EK[r ], B0, B1, B2, B3); |
222 | 0 | T1 = AES_T(TE, EK[r+1], B1, B2, B3, B0); |
223 | 0 | T2 = AES_T(TE, EK[r+2], B2, B3, B0, B1); |
224 | 0 | T3 = AES_T(TE, EK[r+3], B3, B0, B1, B2); |
225 | 0 |
|
226 | 0 | B0 = AES_T(TE, EK[r+4], T0, T1, T2, T3); |
227 | 0 | B1 = AES_T(TE, EK[r+5], T1, T2, T3, T0); |
228 | 0 | B2 = AES_T(TE, EK[r+6], T2, T3, T0, T1); |
229 | 0 | B3 = AES_T(TE, EK[r+7], T3, T0, T1, T2); |
230 | 0 | } |
231 | 0 |
|
232 | 0 | /* |
233 | 0 | * Use TE[x] >> 8 instead of SE[] so encryption only references a single |
234 | 0 | * lookup table. |
235 | 0 | */ |
236 | 0 | out[16*i+ 0] = static_cast<uint8_t>(TE[get_byte(0, B0)] >> 8) ^ ME[0]; |
237 | 0 | out[16*i+ 1] = static_cast<uint8_t>(TE[get_byte(1, B1)] >> 8) ^ ME[1]; |
238 | 0 | out[16*i+ 2] = static_cast<uint8_t>(TE[get_byte(2, B2)] >> 8) ^ ME[2]; |
239 | 0 | out[16*i+ 3] = static_cast<uint8_t>(TE[get_byte(3, B3)] >> 8) ^ ME[3]; |
240 | 0 | out[16*i+ 4] = static_cast<uint8_t>(TE[get_byte(0, B1)] >> 8) ^ ME[4]; |
241 | 0 | out[16*i+ 5] = static_cast<uint8_t>(TE[get_byte(1, B2)] >> 8) ^ ME[5]; |
242 | 0 | out[16*i+ 6] = static_cast<uint8_t>(TE[get_byte(2, B3)] >> 8) ^ ME[6]; |
243 | 0 | out[16*i+ 7] = static_cast<uint8_t>(TE[get_byte(3, B0)] >> 8) ^ ME[7]; |
244 | 0 | out[16*i+ 8] = static_cast<uint8_t>(TE[get_byte(0, B2)] >> 8) ^ ME[8]; |
245 | 0 | out[16*i+ 9] = static_cast<uint8_t>(TE[get_byte(1, B3)] >> 8) ^ ME[9]; |
246 | 0 | out[16*i+10] = static_cast<uint8_t>(TE[get_byte(2, B0)] >> 8) ^ ME[10]; |
247 | 0 | out[16*i+11] = static_cast<uint8_t>(TE[get_byte(3, B1)] >> 8) ^ ME[11]; |
248 | 0 | out[16*i+12] = static_cast<uint8_t>(TE[get_byte(0, B3)] >> 8) ^ ME[12]; |
249 | 0 | out[16*i+13] = static_cast<uint8_t>(TE[get_byte(1, B0)] >> 8) ^ ME[13]; |
250 | 0 | out[16*i+14] = static_cast<uint8_t>(TE[get_byte(2, B1)] >> 8) ^ ME[14]; |
251 | 0 | out[16*i+15] = static_cast<uint8_t>(TE[get_byte(3, B2)] >> 8) ^ ME[15]; |
252 | 0 | } |
253 | 0 | } |
254 | | |
255 | | /* |
256 | | * AES Decryption |
257 | | */ |
258 | | void aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, |
259 | | const secure_vector<uint32_t>& DK, |
260 | | const secure_vector<uint8_t>& MD) |
261 | 0 | { |
262 | 0 | BOTAN_ASSERT(DK.size() && MD.size() == 16, "Key was set"); |
263 | 0 |
|
264 | 0 | const size_t cache_line_size = CPUID::cache_line_size(); |
265 | 0 | const uint32_t* TD = AES_TD(); |
266 | 0 |
|
267 | 0 | volatile uint32_t Z = 0; |
268 | 0 | for(size_t i = 0; i < 256; i += cache_line_size / sizeof(uint32_t)) |
269 | 0 | { |
270 | 0 | Z |= TD[i]; |
271 | 0 | } |
272 | 0 | for(size_t i = 0; i < 256; i += cache_line_size) |
273 | 0 | { |
274 | 0 | Z |= SD[i]; |
275 | 0 | } |
276 | 0 | Z &= TD[99]; // this is zero, which hopefully the compiler cannot deduce |
277 | 0 |
|
278 | 0 | for(size_t i = 0; i != blocks; ++i) |
279 | 0 | { |
280 | 0 | uint32_t T0 = load_be<uint32_t>(in, 0) ^ DK[0]; |
281 | 0 | uint32_t T1 = load_be<uint32_t>(in, 1) ^ DK[1]; |
282 | 0 | uint32_t T2 = load_be<uint32_t>(in, 2) ^ DK[2]; |
283 | 0 | uint32_t T3 = load_be<uint32_t>(in, 3) ^ DK[3]; |
284 | 0 |
|
285 | 0 | T0 ^= Z; |
286 | 0 |
|
287 | 0 | uint32_t B0 = AES_T(TD, DK[4], T0, T3, T2, T1); |
288 | 0 | uint32_t B1 = AES_T(TD, DK[5], T1, T0, T3, T2); |
289 | 0 | uint32_t B2 = AES_T(TD, DK[6], T2, T1, T0, T3); |
290 | 0 | uint32_t B3 = AES_T(TD, DK[7], T3, T2, T1, T0); |
291 | 0 |
|
292 | 0 | for(size_t r = 2*4; r < DK.size(); r += 2*4) |
293 | 0 | { |
294 | 0 | T0 = AES_T(TD, DK[r ], B0, B3, B2, B1); |
295 | 0 | T1 = AES_T(TD, DK[r+1], B1, B0, B3, B2); |
296 | 0 | T2 = AES_T(TD, DK[r+2], B2, B1, B0, B3); |
297 | 0 | T3 = AES_T(TD, DK[r+3], B3, B2, B1, B0); |
298 | 0 |
|
299 | 0 | B0 = AES_T(TD, DK[r+4], T0, T3, T2, T1); |
300 | 0 | B1 = AES_T(TD, DK[r+5], T1, T0, T3, T2); |
301 | 0 | B2 = AES_T(TD, DK[r+6], T2, T1, T0, T3); |
302 | 0 | B3 = AES_T(TD, DK[r+7], T3, T2, T1, T0); |
303 | 0 | } |
304 | 0 |
|
305 | 0 | out[ 0] = SD[get_byte(0, B0)] ^ MD[0]; |
306 | 0 | out[ 1] = SD[get_byte(1, B3)] ^ MD[1]; |
307 | 0 | out[ 2] = SD[get_byte(2, B2)] ^ MD[2]; |
308 | 0 | out[ 3] = SD[get_byte(3, B1)] ^ MD[3]; |
309 | 0 | out[ 4] = SD[get_byte(0, B1)] ^ MD[4]; |
310 | 0 | out[ 5] = SD[get_byte(1, B0)] ^ MD[5]; |
311 | 0 | out[ 6] = SD[get_byte(2, B3)] ^ MD[6]; |
312 | 0 | out[ 7] = SD[get_byte(3, B2)] ^ MD[7]; |
313 | 0 | out[ 8] = SD[get_byte(0, B2)] ^ MD[8]; |
314 | 0 | out[ 9] = SD[get_byte(1, B1)] ^ MD[9]; |
315 | 0 | out[10] = SD[get_byte(2, B0)] ^ MD[10]; |
316 | 0 | out[11] = SD[get_byte(3, B3)] ^ MD[11]; |
317 | 0 | out[12] = SD[get_byte(0, B3)] ^ MD[12]; |
318 | 0 | out[13] = SD[get_byte(1, B2)] ^ MD[13]; |
319 | 0 | out[14] = SD[get_byte(2, B1)] ^ MD[14]; |
320 | 0 | out[15] = SD[get_byte(3, B0)] ^ MD[15]; |
321 | 0 |
|
322 | 0 | in += 16; |
323 | 0 | out += 16; |
324 | 0 | } |
325 | 0 | } |
326 | | |
327 | | void aes_key_schedule(const uint8_t key[], size_t length, |
328 | | secure_vector<uint32_t>& EK, |
329 | | secure_vector<uint32_t>& DK, |
330 | | secure_vector<uint8_t>& ME, |
331 | | secure_vector<uint8_t>& MD) |
332 | 0 | { |
333 | 0 | static const uint32_t RC[10] = { |
334 | 0 | 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, |
335 | 0 | 0x20000000, 0x40000000, 0x80000000, 0x1B000000, 0x36000000 }; |
336 | 0 |
|
337 | 0 | const size_t rounds = (length / 4) + 6; |
338 | 0 |
|
339 | 0 | secure_vector<uint32_t> XEK(length + 32), XDK(length + 32); |
340 | 0 |
|
341 | 0 | const size_t X = length / 4; |
342 | 0 |
|
343 | 0 | // Can't happen, but make static analyzers happy |
344 | 0 | BOTAN_ARG_CHECK(X == 4 || X == 6 || X == 8, "Invalid AES key size"); |
345 | 0 |
|
346 | 0 | const uint32_t* TD = AES_TD(); |
347 | 0 |
|
348 | 0 | // Prefetch TD and SE which are used later on in this function |
349 | 0 | volatile uint32_t Z = 0; |
350 | 0 | const size_t cache_line_size = CPUID::cache_line_size(); |
351 | 0 |
|
352 | 0 | for(size_t i = 0; i < 256; i += cache_line_size / sizeof(uint32_t)) |
353 | 0 | { |
354 | 0 | Z |= TD[i]; |
355 | 0 | } |
356 | 0 | for(size_t i = 0; i < 256; i += cache_line_size) |
357 | 0 | { |
358 | 0 | Z |= SE[i]; |
359 | 0 | } |
360 | 0 | Z &= TD[99]; // this is zero, which hopefully the compiler cannot deduce |
361 | 0 |
|
362 | 0 | for(size_t i = 0; i != X; ++i) |
363 | 0 | XEK[i] = Z ^ load_be<uint32_t>(key, i); |
364 | 0 |
|
365 | 0 | for(size_t i = X; i < 4*(rounds+1); i += X) |
366 | 0 | { |
367 | 0 | XEK[i] = XEK[i-X] ^ RC[(i-X)/X] ^ SE_word(rotl<8>(XEK[i-1])); |
368 | 0 |
|
369 | 0 | for(size_t j = 1; j != X; ++j) |
370 | 0 | { |
371 | 0 | XEK[i+j] = XEK[i+j-X]; |
372 | 0 |
|
373 | 0 | if(X == 8 && j == 4) |
374 | 0 | XEK[i+j] ^= SE_word(XEK[i+j-1]); |
375 | 0 | else |
376 | 0 | XEK[i+j] ^= XEK[i+j-1]; |
377 | 0 | } |
378 | 0 | } |
379 | 0 |
|
380 | 0 | for(size_t i = 0; i != 4*(rounds+1); i += 4) |
381 | 0 | { |
382 | 0 | XDK[i ] = XEK[4*rounds-i ]; |
383 | 0 | XDK[i+1] = XEK[4*rounds-i+1]; |
384 | 0 | XDK[i+2] = XEK[4*rounds-i+2]; |
385 | 0 | XDK[i+3] = XEK[4*rounds-i+3]; |
386 | 0 | } |
387 | 0 |
|
388 | 0 | for(size_t i = 4; i != length + 24; ++i) |
389 | 0 | { |
390 | 0 | XDK[i] = Z ^ SE_word(XDK[i]); |
391 | 0 | XDK[i] = AES_T(TD, 0, XDK[i], XDK[i], XDK[i], XDK[i]); |
392 | 0 | } |
393 | 0 |
|
394 | 0 | ME.resize(16); |
395 | 0 | MD.resize(16); |
396 | 0 |
|
397 | 0 | for(size_t i = 0; i != 4; ++i) |
398 | 0 | { |
399 | 0 | store_be(XEK[i+4*rounds], &ME[4*i]); |
400 | 0 | store_be(XEK[i], &MD[4*i]); |
401 | 0 | } |
402 | 0 |
|
403 | 0 | EK.resize(length + 24); |
404 | 0 | DK.resize(length + 24); |
405 | 0 | copy_mem(EK.data(), XEK.data(), EK.size()); |
406 | 0 | copy_mem(DK.data(), XDK.data(), DK.size()); |
407 | 0 |
|
408 | | #if defined(BOTAN_HAS_AES_ARMV8) |
409 | | if(CPUID::has_arm_aes()) |
410 | | { |
411 | | // ARM needs the subkeys to be byte reversed |
412 | | |
413 | | for(size_t i = 0; i != EK.size(); ++i) |
414 | | EK[i] = reverse_bytes(EK[i]); |
415 | | for(size_t i = 0; i != DK.size(); ++i) |
416 | | DK[i] = reverse_bytes(DK[i]); |
417 | | } |
418 | | #endif |
419 | |
|
420 | 0 | } |
421 | | |
422 | | #undef AES_T |
423 | | |
424 | | size_t aes_parallelism() |
425 | 2.64k | { |
426 | 2.64k | #if defined(BOTAN_HAS_AES_NI) |
427 | 2.64k | if(CPUID::has_aes_ni()) |
428 | 2.64k | { |
429 | 2.64k | return 4; |
430 | 2.64k | } |
431 | 0 | #endif |
432 | 0 | |
433 | | #if defined(BOTAN_HAS_AES_POWER8) |
434 | | if(CPUID::has_power_crypto()) |
435 | | { |
436 | | return 4; |
437 | | } |
438 | | #endif |
439 | | |
440 | | #if defined(BOTAN_HAS_AES_ARMV8) |
441 | | if(CPUID::has_arm_aes()) |
442 | | { |
443 | | return 4; |
444 | | } |
445 | | #endif |
446 | | |
447 | 0 | return 1; |
448 | 0 | } |
449 | | |
450 | | const char* aes_provider() |
451 | 0 | { |
452 | 0 | #if defined(BOTAN_HAS_AES_NI) |
453 | 0 | if(CPUID::has_aes_ni()) |
454 | 0 | { |
455 | 0 | return "aesni"; |
456 | 0 | } |
457 | 0 | #endif |
458 | 0 | |
459 | | #if defined(BOTAN_HAS_AES_POWER8) |
460 | | if(CPUID::has_power_crypto()) |
461 | | { |
462 | | return "power8"; |
463 | | } |
464 | | #endif |
465 | | |
466 | | #if defined(BOTAN_HAS_AES_ARMV8) |
467 | | if(CPUID::has_arm_aes()) |
468 | | { |
469 | | return "armv8"; |
470 | | } |
471 | | #endif |
472 | | |
473 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
474 | 0 | if(CPUID::has_vperm()) |
475 | 0 | { |
476 | 0 | return "vperm"; |
477 | 0 | } |
478 | 0 | #endif |
479 | 0 | |
480 | 0 | return "base"; |
481 | 0 | } |
482 | | |
483 | | } |
484 | | |
485 | 0 | std::string AES_128::provider() const { return aes_provider(); } |
486 | 0 | std::string AES_192::provider() const { return aes_provider(); } |
487 | 0 | std::string AES_256::provider() const { return aes_provider(); } |
488 | | |
489 | 898 | size_t AES_128::parallelism() const { return aes_parallelism(); } |
490 | 0 | size_t AES_192::parallelism() const { return aes_parallelism(); } |
491 | 1.74k | size_t AES_256::parallelism() const { return aes_parallelism(); } |
492 | | |
493 | | void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
494 | 11.4k | { |
495 | 11.4k | verify_key_set(m_EK.empty() == false); |
496 | 11.4k | |
497 | 11.4k | #if defined(BOTAN_HAS_AES_NI) |
498 | 11.4k | if(CPUID::has_aes_ni()) |
499 | 11.4k | { |
500 | 11.4k | return aesni_encrypt_n(in, out, blocks); |
501 | 11.4k | } |
502 | 0 | #endif |
503 | 0 | |
504 | | #if defined(BOTAN_HAS_AES_ARMV8) |
505 | | if(CPUID::has_arm_aes()) |
506 | | { |
507 | | return armv8_encrypt_n(in, out, blocks); |
508 | | } |
509 | | #endif |
510 | | |
511 | | #if defined(BOTAN_HAS_AES_POWER8) |
512 | | if(CPUID::has_power_crypto()) |
513 | | { |
514 | | return power8_encrypt_n(in, out, blocks); |
515 | | } |
516 | | #endif |
517 | | |
518 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
519 | 0 | if(CPUID::has_vperm()) |
520 | 0 | { |
521 | 0 | return vperm_encrypt_n(in, out, blocks); |
522 | 0 | } |
523 | 0 | #endif |
524 | 0 | |
525 | 0 | aes_encrypt_n(in, out, blocks, m_EK, m_ME); |
526 | 0 | } |
527 | | |
528 | | void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
529 | 774 | { |
530 | 774 | verify_key_set(m_DK.empty() == false); |
531 | 774 | |
532 | 774 | #if defined(BOTAN_HAS_AES_NI) |
533 | 774 | if(CPUID::has_aes_ni()) |
534 | 774 | { |
535 | 774 | return aesni_decrypt_n(in, out, blocks); |
536 | 774 | } |
537 | 0 | #endif |
538 | 0 | |
539 | | #if defined(BOTAN_HAS_AES_ARMV8) |
540 | | if(CPUID::has_arm_aes()) |
541 | | { |
542 | | return armv8_decrypt_n(in, out, blocks); |
543 | | } |
544 | | #endif |
545 | | |
546 | | #if defined(BOTAN_HAS_AES_POWER8) |
547 | | if(CPUID::has_power_crypto()) |
548 | | { |
549 | | return power8_decrypt_n(in, out, blocks); |
550 | | } |
551 | | #endif |
552 | | |
553 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
554 | 0 | if(CPUID::has_vperm()) |
555 | 0 | { |
556 | 0 | return vperm_decrypt_n(in, out, blocks); |
557 | 0 | } |
558 | 0 | #endif |
559 | 0 | |
560 | 0 | aes_decrypt_n(in, out, blocks, m_DK, m_MD); |
561 | 0 | } |
562 | | |
563 | | void AES_128::key_schedule(const uint8_t key[], size_t length) |
564 | 618 | { |
565 | 618 | #if defined(BOTAN_HAS_AES_NI) |
566 | 618 | if(CPUID::has_aes_ni()) |
567 | 618 | { |
568 | 618 | return aesni_key_schedule(key, length); |
569 | 618 | } |
570 | 0 | #endif |
571 | 0 | |
572 | | #if defined(BOTAN_HAS_AES_ARMV8) |
573 | | if(CPUID::has_arm_aes()) |
574 | | { |
575 | | return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD); |
576 | | } |
577 | | #endif |
578 | | |
579 | | #if defined(BOTAN_HAS_AES_POWER8) |
580 | | if(CPUID::has_power_crypto()) |
581 | | { |
582 | | return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD); |
583 | | } |
584 | | #endif |
585 | | |
586 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
587 | 0 | if(CPUID::has_vperm()) |
588 | 0 | { |
589 | 0 | return vperm_key_schedule(key, length); |
590 | 0 | } |
591 | 0 | #endif |
592 | 0 | |
593 | 0 | aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD); |
594 | 0 | } |
595 | | |
596 | | void AES_128::clear() |
597 | 0 | { |
598 | 0 | zap(m_EK); |
599 | 0 | zap(m_DK); |
600 | 0 | zap(m_ME); |
601 | 0 | zap(m_MD); |
602 | 0 | } |
603 | | |
604 | | void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
605 | 0 | { |
606 | 0 | verify_key_set(m_EK.empty() == false); |
607 | 0 |
|
608 | 0 | #if defined(BOTAN_HAS_AES_NI) |
609 | 0 | if(CPUID::has_aes_ni()) |
610 | 0 | { |
611 | 0 | return aesni_encrypt_n(in, out, blocks); |
612 | 0 | } |
613 | 0 | #endif |
614 | 0 | |
615 | | #if defined(BOTAN_HAS_AES_ARMV8) |
616 | | if(CPUID::has_arm_aes()) |
617 | | { |
618 | | return armv8_encrypt_n(in, out, blocks); |
619 | | } |
620 | | #endif |
621 | | |
622 | | #if defined(BOTAN_HAS_AES_POWER8) |
623 | | if(CPUID::has_power_crypto()) |
624 | | { |
625 | | return power8_encrypt_n(in, out, blocks); |
626 | | } |
627 | | #endif |
628 | | |
629 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
630 | 0 | if(CPUID::has_vperm()) |
631 | 0 | { |
632 | 0 | return vperm_encrypt_n(in, out, blocks); |
633 | 0 | } |
634 | 0 | #endif |
635 | 0 | |
636 | 0 | aes_encrypt_n(in, out, blocks, m_EK, m_ME); |
637 | 0 | } |
638 | | |
639 | | void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
640 | 0 | { |
641 | 0 | verify_key_set(m_DK.empty() == false); |
642 | 0 |
|
643 | 0 | #if defined(BOTAN_HAS_AES_NI) |
644 | 0 | if(CPUID::has_aes_ni()) |
645 | 0 | { |
646 | 0 | return aesni_decrypt_n(in, out, blocks); |
647 | 0 | } |
648 | 0 | #endif |
649 | 0 | |
650 | | #if defined(BOTAN_HAS_AES_ARMV8) |
651 | | if(CPUID::has_arm_aes()) |
652 | | { |
653 | | return armv8_decrypt_n(in, out, blocks); |
654 | | } |
655 | | #endif |
656 | | |
657 | | #if defined(BOTAN_HAS_AES_POWER8) |
658 | | if(CPUID::has_power_crypto()) |
659 | | { |
660 | | return power8_decrypt_n(in, out, blocks); |
661 | | } |
662 | | #endif |
663 | | |
664 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
665 | 0 | if(CPUID::has_vperm()) |
666 | 0 | { |
667 | 0 | return vperm_decrypt_n(in, out, blocks); |
668 | 0 | } |
669 | 0 | #endif |
670 | 0 | |
671 | 0 | aes_decrypt_n(in, out, blocks, m_DK, m_MD); |
672 | 0 | } |
673 | | |
674 | | void AES_192::key_schedule(const uint8_t key[], size_t length) |
675 | 0 | { |
676 | 0 | #if defined(BOTAN_HAS_AES_NI) |
677 | 0 | if(CPUID::has_aes_ni()) |
678 | 0 | { |
679 | 0 | return aesni_key_schedule(key, length); |
680 | 0 | } |
681 | 0 | #endif |
682 | 0 | |
683 | | #if defined(BOTAN_HAS_AES_ARMV8) |
684 | | if(CPUID::has_arm_aes()) |
685 | | { |
686 | | return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD); |
687 | | } |
688 | | #endif |
689 | | |
690 | | #if defined(BOTAN_HAS_AES_POWER8) |
691 | | if(CPUID::has_power_crypto()) |
692 | | { |
693 | | return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD); |
694 | | } |
695 | | #endif |
696 | | |
697 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
698 | 0 | if(CPUID::has_vperm()) |
699 | 0 | { |
700 | 0 | return vperm_key_schedule(key, length); |
701 | 0 | } |
702 | 0 | #endif |
703 | 0 | |
704 | 0 | aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD); |
705 | 0 | } |
706 | | |
707 | | void AES_192::clear() |
708 | 0 | { |
709 | 0 | zap(m_EK); |
710 | 0 | zap(m_DK); |
711 | 0 | zap(m_ME); |
712 | 0 | zap(m_MD); |
713 | 0 | } |
714 | | |
715 | | void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
716 | 8.96k | { |
717 | 8.96k | verify_key_set(m_EK.empty() == false); |
718 | 8.96k | |
719 | 8.96k | #if defined(BOTAN_HAS_AES_NI) |
720 | 8.96k | if(CPUID::has_aes_ni()) |
721 | 8.96k | { |
722 | 8.96k | return aesni_encrypt_n(in, out, blocks); |
723 | 8.96k | } |
724 | 0 | #endif |
725 | 0 | |
726 | | #if defined(BOTAN_HAS_AES_ARMV8) |
727 | | if(CPUID::has_arm_aes()) |
728 | | { |
729 | | return armv8_encrypt_n(in, out, blocks); |
730 | | } |
731 | | #endif |
732 | | |
733 | | #if defined(BOTAN_HAS_AES_POWER8) |
734 | | if(CPUID::has_power_crypto()) |
735 | | { |
736 | | return power8_encrypt_n(in, out, blocks); |
737 | | } |
738 | | #endif |
739 | | |
740 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
741 | 0 | if(CPUID::has_vperm()) |
742 | 0 | { |
743 | 0 | return vperm_encrypt_n(in, out, blocks); |
744 | 0 | } |
745 | 0 | #endif |
746 | 0 | |
747 | 0 | aes_encrypt_n(in, out, blocks, m_EK, m_ME); |
748 | 0 | } |
749 | | |
750 | | void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const |
751 | 707 | { |
752 | 707 | verify_key_set(m_DK.empty() == false); |
753 | 707 | |
754 | 707 | #if defined(BOTAN_HAS_AES_NI) |
755 | 707 | if(CPUID::has_aes_ni()) |
756 | 707 | { |
757 | 707 | return aesni_decrypt_n(in, out, blocks); |
758 | 707 | } |
759 | 0 | #endif |
760 | 0 | |
761 | | #if defined(BOTAN_HAS_AES_ARMV8) |
762 | | if(CPUID::has_arm_aes()) |
763 | | { |
764 | | return armv8_decrypt_n(in, out, blocks); |
765 | | } |
766 | | #endif |
767 | | |
768 | | #if defined(BOTAN_HAS_AES_POWER8) |
769 | | if(CPUID::has_power_crypto()) |
770 | | { |
771 | | return power8_decrypt_n(in, out, blocks); |
772 | | } |
773 | | #endif |
774 | | |
775 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
776 | 0 | if(CPUID::has_vperm()) |
777 | 0 | { |
778 | 0 | return vperm_decrypt_n(in, out, blocks); |
779 | 0 | } |
780 | 0 | #endif |
781 | 0 | |
782 | 0 | aes_decrypt_n(in, out, blocks, m_DK, m_MD); |
783 | 0 | } |
784 | | |
785 | | void AES_256::key_schedule(const uint8_t key[], size_t length) |
786 | 1.01k | { |
787 | 1.01k | #if defined(BOTAN_HAS_AES_NI) |
788 | 1.01k | if(CPUID::has_aes_ni()) |
789 | 1.01k | { |
790 | 1.01k | return aesni_key_schedule(key, length); |
791 | 1.01k | } |
792 | 0 | #endif |
793 | 0 | |
794 | | #if defined(BOTAN_HAS_AES_ARMV8) |
795 | | if(CPUID::has_arm_aes()) |
796 | | { |
797 | | return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD); |
798 | | } |
799 | | #endif |
800 | | |
801 | | #if defined(BOTAN_HAS_AES_POWER8) |
802 | | if(CPUID::has_power_crypto()) |
803 | | { |
804 | | return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD); |
805 | | } |
806 | | #endif |
807 | | |
808 | 0 | #if defined(BOTAN_HAS_AES_VPERM) |
809 | 0 | if(CPUID::has_vperm()) |
810 | 0 | { |
811 | 0 | return vperm_key_schedule(key, length); |
812 | 0 | } |
813 | 0 | #endif |
814 | 0 | |
815 | 0 | aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD); |
816 | 0 | } |
817 | | |
818 | | void AES_256::clear() |
819 | 0 | { |
820 | 0 | zap(m_EK); |
821 | 0 | zap(m_DK); |
822 | 0 | zap(m_ME); |
823 | 0 | zap(m_MD); |
824 | 0 | } |
825 | | |
826 | | } |