/rust/registry/src/index.crates.io-1949cf8c6b5b557f/aes-0.8.4/src/soft/fixslice64.rs
Line | Count | Source |
1 | | //! Fixsliced implementations of AES-128, AES-192 and AES-256 (64-bit) |
2 | | //! adapted from the C implementation. |
3 | | //! |
4 | | //! All implementations are fully bitsliced and do not rely on any |
5 | | //! Look-Up Table (LUT). |
6 | | //! |
7 | | //! See the paper at <https://eprint.iacr.org/2020/1123.pdf> for more details. |
8 | | //! |
9 | | //! # Author (original C code) |
10 | | //! |
11 | | //! Alexandre Adomnicai, Nanyang Technological University, Singapore |
12 | | //! <alexandre.adomnicai@ntu.edu.sg> |
13 | | //! |
14 | | //! Originally licensed MIT. Relicensed as Apache 2.0+MIT with permission. |
15 | | |
16 | | #![allow(clippy::unreadable_literal)] |
17 | | |
18 | | use crate::Block; |
19 | | use cipher::{consts::U4, generic_array::GenericArray}; |
20 | | |
21 | | /// AES block batch size for this implementation |
22 | | pub(crate) type FixsliceBlocks = U4; |
23 | | |
24 | | pub(crate) type BatchBlocks = GenericArray<Block, FixsliceBlocks>; |
25 | | |
26 | | /// AES-128 round keys |
27 | | pub(crate) type FixsliceKeys128 = [u64; 88]; |
28 | | |
29 | | /// AES-192 round keys |
30 | | pub(crate) type FixsliceKeys192 = [u64; 104]; |
31 | | |
32 | | /// AES-256 round keys |
33 | | pub(crate) type FixsliceKeys256 = [u64; 120]; |
34 | | |
35 | | /// 512-bit internal state |
36 | | pub(crate) type State = [u64; 8]; |
37 | | |
38 | | /// Fully bitsliced AES-128 key schedule to match the fully-fixsliced representation. |
39 | 0 | pub(crate) fn aes128_key_schedule(key: &[u8; 16]) -> FixsliceKeys128 { |
40 | 0 | let mut rkeys = [0u64; 88]; |
41 | | |
42 | 0 | bitslice(&mut rkeys[..8], key, key, key, key); |
43 | | |
44 | 0 | let mut rk_off = 0; |
45 | 0 | for rcon in 0..10 { |
46 | 0 | memshift32(&mut rkeys, rk_off); |
47 | 0 | rk_off += 8; |
48 | | |
49 | 0 | sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]); |
50 | 0 | sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]); |
51 | | |
52 | 0 | if rcon < 8 { |
53 | 0 | add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon); |
54 | 0 | } else { |
55 | 0 | add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 8); |
56 | 0 | add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 7); |
57 | 0 | add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 5); |
58 | 0 | add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 4); |
59 | 0 | } |
60 | | |
61 | 0 | xor_columns(&mut rkeys, rk_off, 8, ror_distance(1, 3)); |
62 | | } |
63 | | |
64 | | // Adjust to match fixslicing format |
65 | | #[cfg(aes_compact)] |
66 | | { |
67 | | for i in (8..88).step_by(16) { |
68 | | inv_shift_rows_1(&mut rkeys[i..(i + 8)]); |
69 | | } |
70 | | } |
71 | | #[cfg(not(aes_compact))] |
72 | | { |
73 | 0 | for i in (8..72).step_by(32) { |
74 | 0 | inv_shift_rows_1(&mut rkeys[i..(i + 8)]); |
75 | 0 | inv_shift_rows_2(&mut rkeys[(i + 8)..(i + 16)]); |
76 | 0 | inv_shift_rows_3(&mut rkeys[(i + 16)..(i + 24)]); |
77 | 0 | } |
78 | 0 | inv_shift_rows_1(&mut rkeys[72..80]); |
79 | | } |
80 | | |
81 | | // Account for NOTs removed from sub_bytes |
82 | 0 | for i in 1..11 { |
83 | 0 | sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]); |
84 | 0 | } |
85 | | |
86 | 0 | rkeys |
87 | 0 | } |
88 | | |
89 | | /// Fully bitsliced AES-192 key schedule to match the fully-fixsliced representation. |
90 | 0 | pub(crate) fn aes192_key_schedule(key: &[u8; 24]) -> FixsliceKeys192 { |
91 | 0 | let mut rkeys = [0u64; 104]; |
92 | 0 | let mut tmp = [0u64; 8]; |
93 | | |
94 | 0 | bitslice( |
95 | 0 | &mut rkeys[..8], |
96 | 0 | &key[..16], |
97 | 0 | &key[..16], |
98 | 0 | &key[..16], |
99 | 0 | &key[..16], |
100 | | ); |
101 | 0 | bitslice(&mut tmp, &key[8..], &key[8..], &key[8..], &key[8..]); |
102 | | |
103 | 0 | let mut rcon = 0; |
104 | 0 | let mut rk_off = 8; |
105 | | |
106 | | loop { |
107 | 0 | for i in 0..8 { |
108 | 0 | rkeys[rk_off + i] = (0x00ff00ff00ff00ff & (tmp[i] >> 8)) |
109 | 0 | | (0xff00ff00ff00ff00 & (rkeys[(rk_off - 8) + i] << 8)); |
110 | 0 | } |
111 | | |
112 | 0 | sub_bytes(&mut tmp); |
113 | 0 | sub_bytes_nots(&mut tmp); |
114 | | |
115 | 0 | add_round_constant_bit(&mut tmp, rcon); |
116 | 0 | rcon += 1; |
117 | | |
118 | 0 | for i in 0..8 { |
119 | 0 | let mut ti = rkeys[rk_off + i]; |
120 | 0 | ti ^= 0x0f000f000f000f00 & ror(tmp[i], ror_distance(1, 1)); |
121 | 0 | ti ^= 0xf000f000f000f000 & (ti << 4); |
122 | 0 | tmp[i] = ti; |
123 | 0 | } |
124 | 0 | rkeys[rk_off..(rk_off + 8)].copy_from_slice(&tmp); |
125 | 0 | rk_off += 8; |
126 | | |
127 | 0 | for i in 0..8 { |
128 | 0 | let ui = tmp[i]; |
129 | 0 | let mut ti = (0x00ff00ff00ff00ff & (rkeys[(rk_off - 16) + i] >> 8)) |
130 | 0 | | (0xff00ff00ff00ff00 & (ui << 8)); |
131 | 0 | ti ^= 0x000f000f000f000f & (ui >> 12); |
132 | 0 | tmp[i] = ti |
133 | 0 | ^ (0xfff0fff0fff0fff0 & (ti << 4)) |
134 | 0 | ^ (0xff00ff00ff00ff00 & (ti << 8)) |
135 | 0 | ^ (0xf000f000f000f000 & (ti << 12)); |
136 | 0 | } |
137 | 0 | rkeys[rk_off..(rk_off + 8)].copy_from_slice(&tmp); |
138 | 0 | rk_off += 8; |
139 | | |
140 | 0 | sub_bytes(&mut tmp); |
141 | 0 | sub_bytes_nots(&mut tmp); |
142 | | |
143 | 0 | add_round_constant_bit(&mut tmp, rcon); |
144 | 0 | rcon += 1; |
145 | | |
146 | 0 | for i in 0..8 { |
147 | 0 | let mut ti = (0x00ff00ff00ff00ff & (rkeys[(rk_off - 16) + i] >> 8)) |
148 | 0 | | (0xff00ff00ff00ff00 & (rkeys[(rk_off - 8) + i] << 8)); |
149 | 0 | ti ^= 0x000f000f000f000f & ror(tmp[i], ror_distance(1, 3)); |
150 | 0 | rkeys[rk_off + i] = ti |
151 | 0 | ^ (0xfff0fff0fff0fff0 & (ti << 4)) |
152 | 0 | ^ (0xff00ff00ff00ff00 & (ti << 8)) |
153 | 0 | ^ (0xf000f000f000f000 & (ti << 12)); |
154 | 0 | } |
155 | 0 | rk_off += 8; |
156 | | |
157 | 0 | if rcon >= 8 { |
158 | 0 | break; |
159 | 0 | } |
160 | | |
161 | 0 | for i in 0..8 { |
162 | 0 | let ui = rkeys[(rk_off - 8) + i]; |
163 | 0 | let mut ti = rkeys[(rk_off - 16) + i]; |
164 | 0 | ti ^= 0x0f000f000f000f00 & (ui >> 4); |
165 | 0 | ti ^= 0xf000f000f000f000 & (ti << 4); |
166 | 0 | tmp[i] = ti; |
167 | 0 | } |
168 | | } |
169 | | |
170 | | // Adjust to match fixslicing format |
171 | | #[cfg(aes_compact)] |
172 | | { |
173 | | for i in (8..104).step_by(16) { |
174 | | inv_shift_rows_1(&mut rkeys[i..(i + 8)]); |
175 | | } |
176 | | } |
177 | | #[cfg(not(aes_compact))] |
178 | | { |
179 | 0 | for i in (0..96).step_by(32) { |
180 | 0 | inv_shift_rows_1(&mut rkeys[(i + 8)..(i + 16)]); |
181 | 0 | inv_shift_rows_2(&mut rkeys[(i + 16)..(i + 24)]); |
182 | 0 | inv_shift_rows_3(&mut rkeys[(i + 24)..(i + 32)]); |
183 | 0 | } |
184 | | } |
185 | | |
186 | | // Account for NOTs removed from sub_bytes |
187 | 0 | for i in 1..13 { |
188 | 0 | sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]); |
189 | 0 | } |
190 | | |
191 | 0 | rkeys |
192 | 0 | } |
193 | | |
194 | | /// Fully bitsliced AES-256 key schedule to match the fully-fixsliced representation. |
195 | 0 | pub(crate) fn aes256_key_schedule(key: &[u8; 32]) -> FixsliceKeys256 { |
196 | 0 | let mut rkeys = [0u64; 120]; |
197 | | |
198 | 0 | bitslice( |
199 | 0 | &mut rkeys[..8], |
200 | 0 | &key[..16], |
201 | 0 | &key[..16], |
202 | 0 | &key[..16], |
203 | 0 | &key[..16], |
204 | | ); |
205 | 0 | bitslice( |
206 | 0 | &mut rkeys[8..16], |
207 | 0 | &key[16..], |
208 | 0 | &key[16..], |
209 | 0 | &key[16..], |
210 | 0 | &key[16..], |
211 | | ); |
212 | | |
213 | 0 | let mut rk_off = 8; |
214 | | |
215 | 0 | let mut rcon = 0; |
216 | | loop { |
217 | 0 | memshift32(&mut rkeys, rk_off); |
218 | 0 | rk_off += 8; |
219 | | |
220 | 0 | sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]); |
221 | 0 | sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]); |
222 | | |
223 | 0 | add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon); |
224 | 0 | xor_columns(&mut rkeys, rk_off, 16, ror_distance(1, 3)); |
225 | 0 | rcon += 1; |
226 | | |
227 | 0 | if rcon == 7 { |
228 | 0 | break; |
229 | 0 | } |
230 | | |
231 | 0 | memshift32(&mut rkeys, rk_off); |
232 | 0 | rk_off += 8; |
233 | | |
234 | 0 | sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]); |
235 | 0 | sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]); |
236 | | |
237 | 0 | xor_columns(&mut rkeys, rk_off, 16, ror_distance(0, 3)); |
238 | | } |
239 | | |
240 | | // Adjust to match fixslicing format |
241 | | #[cfg(aes_compact)] |
242 | | { |
243 | | for i in (8..120).step_by(16) { |
244 | | inv_shift_rows_1(&mut rkeys[i..(i + 8)]); |
245 | | } |
246 | | } |
247 | | #[cfg(not(aes_compact))] |
248 | | { |
249 | 0 | for i in (8..104).step_by(32) { |
250 | 0 | inv_shift_rows_1(&mut rkeys[i..(i + 8)]); |
251 | 0 | inv_shift_rows_2(&mut rkeys[(i + 8)..(i + 16)]); |
252 | 0 | inv_shift_rows_3(&mut rkeys[(i + 16)..(i + 24)]); |
253 | 0 | } |
254 | 0 | inv_shift_rows_1(&mut rkeys[104..112]); |
255 | | } |
256 | | |
257 | | // Account for NOTs removed from sub_bytes |
258 | 0 | for i in 1..15 { |
259 | 0 | sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]); |
260 | 0 | } |
261 | | |
262 | 0 | rkeys |
263 | 0 | } |
264 | | |
265 | | /// Fully-fixsliced AES-128 decryption (the InvShiftRows is completely omitted). |
266 | | /// |
267 | | /// Decrypts four blocks in-place and in parallel. |
268 | 0 | pub(crate) fn aes128_decrypt(rkeys: &FixsliceKeys128, blocks: &BatchBlocks) -> BatchBlocks { |
269 | 0 | let mut state = State::default(); |
270 | | |
271 | 0 | bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]); |
272 | | |
273 | 0 | add_round_key(&mut state, &rkeys[80..]); |
274 | 0 | inv_sub_bytes(&mut state); |
275 | | |
276 | | #[cfg(not(aes_compact))] |
277 | 0 | { |
278 | 0 | inv_shift_rows_2(&mut state); |
279 | 0 | } |
280 | | |
281 | 0 | let mut rk_off = 72; |
282 | | loop { |
283 | | #[cfg(aes_compact)] |
284 | | { |
285 | | inv_shift_rows_2(&mut state); |
286 | | } |
287 | | |
288 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
289 | 0 | inv_mix_columns_1(&mut state); |
290 | 0 | inv_sub_bytes(&mut state); |
291 | 0 | rk_off -= 8; |
292 | | |
293 | 0 | if rk_off == 0 { |
294 | 0 | break; |
295 | 0 | } |
296 | | |
297 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
298 | 0 | inv_mix_columns_0(&mut state); |
299 | 0 | inv_sub_bytes(&mut state); |
300 | 0 | rk_off -= 8; |
301 | | |
302 | | #[cfg(not(aes_compact))] |
303 | 0 | { |
304 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
305 | 0 | inv_mix_columns_3(&mut state); |
306 | 0 | inv_sub_bytes(&mut state); |
307 | 0 | rk_off -= 8; |
308 | 0 |
|
309 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
310 | 0 | inv_mix_columns_2(&mut state); |
311 | 0 | inv_sub_bytes(&mut state); |
312 | 0 | rk_off -= 8; |
313 | 0 | } |
314 | | } |
315 | | |
316 | 0 | add_round_key(&mut state, &rkeys[..8]); |
317 | | |
318 | 0 | inv_bitslice(&state) |
319 | 0 | } |
320 | | |
321 | | /// Fully-fixsliced AES-128 encryption (the ShiftRows is completely omitted). |
322 | | /// |
323 | | /// Encrypts four blocks in-place and in parallel. |
324 | 0 | pub(crate) fn aes128_encrypt(rkeys: &FixsliceKeys128, blocks: &BatchBlocks) -> BatchBlocks { |
325 | 0 | let mut state = State::default(); |
326 | | |
327 | 0 | bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]); |
328 | | |
329 | 0 | add_round_key(&mut state, &rkeys[..8]); |
330 | | |
331 | 0 | let mut rk_off = 8; |
332 | | loop { |
333 | 0 | sub_bytes(&mut state); |
334 | 0 | mix_columns_1(&mut state); |
335 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
336 | 0 | rk_off += 8; |
337 | | |
338 | | #[cfg(aes_compact)] |
339 | | { |
340 | | shift_rows_2(&mut state); |
341 | | } |
342 | | |
343 | 0 | if rk_off == 80 { |
344 | 0 | break; |
345 | 0 | } |
346 | | |
347 | | #[cfg(not(aes_compact))] |
348 | 0 | { |
349 | 0 | sub_bytes(&mut state); |
350 | 0 | mix_columns_2(&mut state); |
351 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
352 | 0 | rk_off += 8; |
353 | 0 |
|
354 | 0 | sub_bytes(&mut state); |
355 | 0 | mix_columns_3(&mut state); |
356 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
357 | 0 | rk_off += 8; |
358 | 0 | } |
359 | | |
360 | 0 | sub_bytes(&mut state); |
361 | 0 | mix_columns_0(&mut state); |
362 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
363 | 0 | rk_off += 8; |
364 | | } |
365 | | |
366 | | #[cfg(not(aes_compact))] |
367 | 0 | { |
368 | 0 | shift_rows_2(&mut state); |
369 | 0 | } |
370 | | |
371 | 0 | sub_bytes(&mut state); |
372 | 0 | add_round_key(&mut state, &rkeys[80..]); |
373 | | |
374 | 0 | inv_bitslice(&state) |
375 | 0 | } |
376 | | |
377 | | /// Fully-fixsliced AES-192 decryption (the InvShiftRows is completely omitted). |
378 | | /// |
379 | | /// Decrypts four blocks in-place and in parallel. |
380 | 0 | pub(crate) fn aes192_decrypt(rkeys: &FixsliceKeys192, blocks: &BatchBlocks) -> BatchBlocks { |
381 | 0 | let mut state = State::default(); |
382 | | |
383 | 0 | bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]); |
384 | | |
385 | 0 | add_round_key(&mut state, &rkeys[96..]); |
386 | 0 | inv_sub_bytes(&mut state); |
387 | | |
388 | 0 | let mut rk_off = 88; |
389 | | loop { |
390 | | #[cfg(aes_compact)] |
391 | | { |
392 | | inv_shift_rows_2(&mut state); |
393 | | } |
394 | | #[cfg(not(aes_compact))] |
395 | 0 | { |
396 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
397 | 0 | inv_mix_columns_3(&mut state); |
398 | 0 | inv_sub_bytes(&mut state); |
399 | 0 | rk_off -= 8; |
400 | 0 |
|
401 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
402 | 0 | inv_mix_columns_2(&mut state); |
403 | 0 | inv_sub_bytes(&mut state); |
404 | 0 | rk_off -= 8; |
405 | 0 | } |
406 | | |
407 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
408 | 0 | inv_mix_columns_1(&mut state); |
409 | 0 | inv_sub_bytes(&mut state); |
410 | 0 | rk_off -= 8; |
411 | | |
412 | 0 | if rk_off == 0 { |
413 | 0 | break; |
414 | 0 | } |
415 | | |
416 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
417 | 0 | inv_mix_columns_0(&mut state); |
418 | 0 | inv_sub_bytes(&mut state); |
419 | 0 | rk_off -= 8; |
420 | | } |
421 | | |
422 | 0 | add_round_key(&mut state, &rkeys[..8]); |
423 | | |
424 | 0 | inv_bitslice(&state) |
425 | 0 | } |
426 | | |
427 | | /// Fully-fixsliced AES-192 encryption (the ShiftRows is completely omitted). |
428 | | /// |
429 | | /// Encrypts four blocks in-place and in parallel. |
430 | 0 | pub(crate) fn aes192_encrypt(rkeys: &FixsliceKeys192, blocks: &BatchBlocks) -> BatchBlocks { |
431 | 0 | let mut state = State::default(); |
432 | | |
433 | 0 | bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]); |
434 | | |
435 | 0 | add_round_key(&mut state, &rkeys[..8]); |
436 | | |
437 | 0 | let mut rk_off = 8; |
438 | | loop { |
439 | 0 | sub_bytes(&mut state); |
440 | 0 | mix_columns_1(&mut state); |
441 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
442 | 0 | rk_off += 8; |
443 | | |
444 | | #[cfg(aes_compact)] |
445 | | { |
446 | | shift_rows_2(&mut state); |
447 | | } |
448 | | #[cfg(not(aes_compact))] |
449 | 0 | { |
450 | 0 | sub_bytes(&mut state); |
451 | 0 | mix_columns_2(&mut state); |
452 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
453 | 0 | rk_off += 8; |
454 | 0 |
|
455 | 0 | sub_bytes(&mut state); |
456 | 0 | mix_columns_3(&mut state); |
457 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
458 | 0 | rk_off += 8; |
459 | 0 | } |
460 | | |
461 | 0 | if rk_off == 96 { |
462 | 0 | break; |
463 | 0 | } |
464 | | |
465 | 0 | sub_bytes(&mut state); |
466 | 0 | mix_columns_0(&mut state); |
467 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
468 | 0 | rk_off += 8; |
469 | | } |
470 | | |
471 | 0 | sub_bytes(&mut state); |
472 | 0 | add_round_key(&mut state, &rkeys[96..]); |
473 | | |
474 | 0 | inv_bitslice(&state) |
475 | 0 | } |
476 | | |
477 | | /// Fully-fixsliced AES-256 decryption (the InvShiftRows is completely omitted). |
478 | | /// |
479 | | /// Decrypts four blocks in-place and in parallel. |
480 | 0 | pub(crate) fn aes256_decrypt(rkeys: &FixsliceKeys256, blocks: &BatchBlocks) -> BatchBlocks { |
481 | 0 | let mut state = State::default(); |
482 | | |
483 | 0 | bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]); |
484 | | |
485 | 0 | add_round_key(&mut state, &rkeys[112..]); |
486 | 0 | inv_sub_bytes(&mut state); |
487 | | |
488 | | #[cfg(not(aes_compact))] |
489 | 0 | { |
490 | 0 | inv_shift_rows_2(&mut state); |
491 | 0 | } |
492 | | |
493 | 0 | let mut rk_off = 104; |
494 | | loop { |
495 | | #[cfg(aes_compact)] |
496 | | { |
497 | | inv_shift_rows_2(&mut state); |
498 | | } |
499 | | |
500 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
501 | 0 | inv_mix_columns_1(&mut state); |
502 | 0 | inv_sub_bytes(&mut state); |
503 | 0 | rk_off -= 8; |
504 | | |
505 | 0 | if rk_off == 0 { |
506 | 0 | break; |
507 | 0 | } |
508 | | |
509 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
510 | 0 | inv_mix_columns_0(&mut state); |
511 | 0 | inv_sub_bytes(&mut state); |
512 | 0 | rk_off -= 8; |
513 | | |
514 | | #[cfg(not(aes_compact))] |
515 | 0 | { |
516 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
517 | 0 | inv_mix_columns_3(&mut state); |
518 | 0 | inv_sub_bytes(&mut state); |
519 | 0 | rk_off -= 8; |
520 | 0 |
|
521 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
522 | 0 | inv_mix_columns_2(&mut state); |
523 | 0 | inv_sub_bytes(&mut state); |
524 | 0 | rk_off -= 8; |
525 | 0 | } |
526 | | } |
527 | | |
528 | 0 | add_round_key(&mut state, &rkeys[..8]); |
529 | | |
530 | 0 | inv_bitslice(&state) |
531 | 0 | } |
532 | | |
533 | | /// Fully-fixsliced AES-256 encryption (the ShiftRows is completely omitted). |
534 | | /// |
535 | | /// Encrypts four blocks in-place and in parallel. |
536 | 0 | pub(crate) fn aes256_encrypt(rkeys: &FixsliceKeys256, blocks: &BatchBlocks) -> BatchBlocks { |
537 | 0 | let mut state = State::default(); |
538 | | |
539 | 0 | bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]); |
540 | | |
541 | 0 | add_round_key(&mut state, &rkeys[..8]); |
542 | | |
543 | 0 | let mut rk_off = 8; |
544 | | loop { |
545 | 0 | sub_bytes(&mut state); |
546 | 0 | mix_columns_1(&mut state); |
547 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
548 | 0 | rk_off += 8; |
549 | | |
550 | | #[cfg(aes_compact)] |
551 | | { |
552 | | shift_rows_2(&mut state); |
553 | | } |
554 | | |
555 | 0 | if rk_off == 112 { |
556 | 0 | break; |
557 | 0 | } |
558 | | |
559 | | #[cfg(not(aes_compact))] |
560 | 0 | { |
561 | 0 | sub_bytes(&mut state); |
562 | 0 | mix_columns_2(&mut state); |
563 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
564 | 0 | rk_off += 8; |
565 | 0 |
|
566 | 0 | sub_bytes(&mut state); |
567 | 0 | mix_columns_3(&mut state); |
568 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
569 | 0 | rk_off += 8; |
570 | 0 | } |
571 | | |
572 | 0 | sub_bytes(&mut state); |
573 | 0 | mix_columns_0(&mut state); |
574 | 0 | add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
575 | 0 | rk_off += 8; |
576 | | } |
577 | | |
578 | | #[cfg(not(aes_compact))] |
579 | 0 | { |
580 | 0 | shift_rows_2(&mut state); |
581 | 0 | } |
582 | | |
583 | 0 | sub_bytes(&mut state); |
584 | 0 | add_round_key(&mut state, &rkeys[112..]); |
585 | | |
586 | 0 | inv_bitslice(&state) |
587 | 0 | } |
588 | | |
589 | | /// Note that the 4 bitwise NOT (^= 0xffffffffffffffff) are accounted for here so that it is a true |
590 | | /// inverse of 'sub_bytes'. |
591 | 0 | fn inv_sub_bytes(state: &mut [u64]) { |
592 | 0 | debug_assert_eq!(state.len(), 8); |
593 | | |
594 | | // Scheduled using https://github.com/Ko-/aes-armcortexm/tree/public/scheduler |
595 | | // Inline "stack" comments reflect suggested stores and loads (ARM Cortex-M3 and M4) |
596 | | |
597 | 0 | let u7 = state[0]; |
598 | 0 | let u6 = state[1]; |
599 | 0 | let u5 = state[2]; |
600 | 0 | let u4 = state[3]; |
601 | 0 | let u3 = state[4]; |
602 | 0 | let u2 = state[5]; |
603 | 0 | let u1 = state[6]; |
604 | 0 | let u0 = state[7]; |
605 | | |
606 | 0 | let t23 = u0 ^ u3; |
607 | 0 | let t8 = u1 ^ t23; |
608 | 0 | let m2 = t23 & t8; |
609 | 0 | let t4 = u4 ^ t8; |
610 | 0 | let t22 = u1 ^ u3; |
611 | 0 | let t2 = u0 ^ u1; |
612 | 0 | let t1 = u3 ^ u4; |
613 | | // t23 -> stack |
614 | 0 | let t9 = u7 ^ t1; |
615 | | // t8 -> stack |
616 | 0 | let m7 = t22 & t9; |
617 | | // t9 -> stack |
618 | 0 | let t24 = u4 ^ u7; |
619 | | // m7 -> stack |
620 | 0 | let t10 = t2 ^ t24; |
621 | | // u4 -> stack |
622 | 0 | let m14 = t2 & t10; |
623 | 0 | let r5 = u6 ^ u7; |
624 | | // m2 -> stack |
625 | 0 | let t3 = t1 ^ r5; |
626 | | // t2 -> stack |
627 | 0 | let t13 = t2 ^ r5; |
628 | 0 | let t19 = t22 ^ r5; |
629 | | // t3 -> stack |
630 | 0 | let t17 = u2 ^ t19; |
631 | | // t4 -> stack |
632 | 0 | let t25 = u2 ^ t1; |
633 | 0 | let r13 = u1 ^ u6; |
634 | | // t25 -> stack |
635 | 0 | let t20 = t24 ^ r13; |
636 | | // t17 -> stack |
637 | 0 | let m9 = t20 & t17; |
638 | | // t20 -> stack |
639 | 0 | let r17 = u2 ^ u5; |
640 | | // t22 -> stack |
641 | 0 | let t6 = t22 ^ r17; |
642 | | // t13 -> stack |
643 | 0 | let m1 = t13 & t6; |
644 | 0 | let y5 = u0 ^ r17; |
645 | 0 | let m4 = t19 & y5; |
646 | 0 | let m5 = m4 ^ m1; |
647 | 0 | let m17 = m5 ^ t24; |
648 | 0 | let r18 = u5 ^ u6; |
649 | 0 | let t27 = t1 ^ r18; |
650 | 0 | let t15 = t10 ^ t27; |
651 | | // t6 -> stack |
652 | 0 | let m11 = t1 & t15; |
653 | 0 | let m15 = m14 ^ m11; |
654 | 0 | let m21 = m17 ^ m15; |
655 | | // t1 -> stack |
656 | | // t4 <- stack |
657 | 0 | let m12 = t4 & t27; |
658 | 0 | let m13 = m12 ^ m11; |
659 | 0 | let t14 = t10 ^ r18; |
660 | 0 | let m3 = t14 ^ m1; |
661 | | // m2 <- stack |
662 | 0 | let m16 = m3 ^ m2; |
663 | 0 | let m20 = m16 ^ m13; |
664 | | // u4 <- stack |
665 | 0 | let r19 = u2 ^ u4; |
666 | 0 | let t16 = r13 ^ r19; |
667 | | // t3 <- stack |
668 | 0 | let t26 = t3 ^ t16; |
669 | 0 | let m6 = t3 & t16; |
670 | 0 | let m8 = t26 ^ m6; |
671 | | // t10 -> stack |
672 | | // m7 <- stack |
673 | 0 | let m18 = m8 ^ m7; |
674 | 0 | let m22 = m18 ^ m13; |
675 | 0 | let m25 = m22 & m20; |
676 | 0 | let m26 = m21 ^ m25; |
677 | 0 | let m10 = m9 ^ m6; |
678 | 0 | let m19 = m10 ^ m15; |
679 | | // t25 <- stack |
680 | 0 | let m23 = m19 ^ t25; |
681 | 0 | let m28 = m23 ^ m25; |
682 | 0 | let m24 = m22 ^ m23; |
683 | 0 | let m30 = m26 & m24; |
684 | 0 | let m39 = m23 ^ m30; |
685 | 0 | let m48 = m39 & y5; |
686 | 0 | let m57 = m39 & t19; |
687 | | // m48 -> stack |
688 | 0 | let m36 = m24 ^ m25; |
689 | 0 | let m31 = m20 & m23; |
690 | 0 | let m27 = m20 ^ m21; |
691 | 0 | let m32 = m27 & m31; |
692 | 0 | let m29 = m28 & m27; |
693 | 0 | let m37 = m21 ^ m29; |
694 | | // m39 -> stack |
695 | 0 | let m42 = m37 ^ m39; |
696 | 0 | let m52 = m42 & t15; |
697 | | // t27 -> stack |
698 | | // t1 <- stack |
699 | 0 | let m61 = m42 & t1; |
700 | 0 | let p0 = m52 ^ m61; |
701 | 0 | let p16 = m57 ^ m61; |
702 | | // m57 -> stack |
703 | | // t20 <- stack |
704 | 0 | let m60 = m37 & t20; |
705 | | // p16 -> stack |
706 | | // t17 <- stack |
707 | 0 | let m51 = m37 & t17; |
708 | 0 | let m33 = m27 ^ m25; |
709 | 0 | let m38 = m32 ^ m33; |
710 | 0 | let m43 = m37 ^ m38; |
711 | 0 | let m49 = m43 & t16; |
712 | 0 | let p6 = m49 ^ m60; |
713 | 0 | let p13 = m49 ^ m51; |
714 | 0 | let m58 = m43 & t3; |
715 | | // t9 <- stack |
716 | 0 | let m50 = m38 & t9; |
717 | | // t22 <- stack |
718 | 0 | let m59 = m38 & t22; |
719 | | // p6 -> stack |
720 | 0 | let p1 = m58 ^ m59; |
721 | 0 | let p7 = p0 ^ p1; |
722 | 0 | let m34 = m21 & m22; |
723 | 0 | let m35 = m24 & m34; |
724 | 0 | let m40 = m35 ^ m36; |
725 | 0 | let m41 = m38 ^ m40; |
726 | 0 | let m45 = m42 ^ m41; |
727 | | // t27 <- stack |
728 | 0 | let m53 = m45 & t27; |
729 | 0 | let p8 = m50 ^ m53; |
730 | 0 | let p23 = p7 ^ p8; |
731 | | // t4 <- stack |
732 | 0 | let m62 = m45 & t4; |
733 | 0 | let p14 = m49 ^ m62; |
734 | 0 | let s6 = p14 ^ p23; |
735 | | // t10 <- stack |
736 | 0 | let m54 = m41 & t10; |
737 | 0 | let p2 = m54 ^ m62; |
738 | 0 | let p22 = p2 ^ p7; |
739 | 0 | let s0 = p13 ^ p22; |
740 | 0 | let p17 = m58 ^ p2; |
741 | 0 | let p15 = m54 ^ m59; |
742 | | // t2 <- stack |
743 | 0 | let m63 = m41 & t2; |
744 | | // m39 <- stack |
745 | 0 | let m44 = m39 ^ m40; |
746 | | // p17 -> stack |
747 | | // t6 <- stack |
748 | 0 | let m46 = m44 & t6; |
749 | 0 | let p5 = m46 ^ m51; |
750 | | // p23 -> stack |
751 | 0 | let p18 = m63 ^ p5; |
752 | 0 | let p24 = p5 ^ p7; |
753 | | // m48 <- stack |
754 | 0 | let p12 = m46 ^ m48; |
755 | 0 | let s3 = p12 ^ p22; |
756 | | // t13 <- stack |
757 | 0 | let m55 = m44 & t13; |
758 | 0 | let p9 = m55 ^ m63; |
759 | | // p16 <- stack |
760 | 0 | let s7 = p9 ^ p16; |
761 | | // t8 <- stack |
762 | 0 | let m47 = m40 & t8; |
763 | 0 | let p3 = m47 ^ m50; |
764 | 0 | let p19 = p2 ^ p3; |
765 | 0 | let s5 = p19 ^ p24; |
766 | 0 | let p11 = p0 ^ p3; |
767 | 0 | let p26 = p9 ^ p11; |
768 | | // t23 <- stack |
769 | 0 | let m56 = m40 & t23; |
770 | 0 | let p4 = m48 ^ m56; |
771 | | // p6 <- stack |
772 | 0 | let p20 = p4 ^ p6; |
773 | 0 | let p29 = p15 ^ p20; |
774 | 0 | let s1 = p26 ^ p29; |
775 | | // m57 <- stack |
776 | 0 | let p10 = m57 ^ p4; |
777 | 0 | let p27 = p10 ^ p18; |
778 | | // p23 <- stack |
779 | 0 | let s4 = p23 ^ p27; |
780 | 0 | let p25 = p6 ^ p10; |
781 | 0 | let p28 = p11 ^ p25; |
782 | | // p17 <- stack |
783 | 0 | let s2 = p17 ^ p28; |
784 | | |
785 | 0 | state[0] = s7; |
786 | 0 | state[1] = s6; |
787 | 0 | state[2] = s5; |
788 | 0 | state[3] = s4; |
789 | 0 | state[4] = s3; |
790 | 0 | state[5] = s2; |
791 | 0 | state[6] = s1; |
792 | 0 | state[7] = s0; |
793 | 0 | } |
794 | | |
795 | | /// Bitsliced implementation of the AES Sbox based on Boyar, Peralta and Calik. |
796 | | /// |
797 | | /// See: <http://www.cs.yale.edu/homes/peralta/CircuitStuff/SLP_AES_113.txt> |
798 | | /// |
799 | | /// Note that the 4 bitwise NOT (^= 0xffffffffffffffff) are moved to the key schedule. |
800 | 0 | fn sub_bytes(state: &mut [u64]) { |
801 | 0 | debug_assert_eq!(state.len(), 8); |
802 | | |
803 | | // Scheduled using https://github.com/Ko-/aes-armcortexm/tree/public/scheduler |
804 | | // Inline "stack" comments reflect suggested stores and loads (ARM Cortex-M3 and M4) |
805 | | |
806 | 0 | let u7 = state[0]; |
807 | 0 | let u6 = state[1]; |
808 | 0 | let u5 = state[2]; |
809 | 0 | let u4 = state[3]; |
810 | 0 | let u3 = state[4]; |
811 | 0 | let u2 = state[5]; |
812 | 0 | let u1 = state[6]; |
813 | 0 | let u0 = state[7]; |
814 | | |
815 | 0 | let y14 = u3 ^ u5; |
816 | 0 | let y13 = u0 ^ u6; |
817 | 0 | let y12 = y13 ^ y14; |
818 | 0 | let t1 = u4 ^ y12; |
819 | 0 | let y15 = t1 ^ u5; |
820 | 0 | let t2 = y12 & y15; |
821 | 0 | let y6 = y15 ^ u7; |
822 | 0 | let y20 = t1 ^ u1; |
823 | | // y12 -> stack |
824 | 0 | let y9 = u0 ^ u3; |
825 | | // y20 -> stack |
826 | 0 | let y11 = y20 ^ y9; |
827 | | // y9 -> stack |
828 | 0 | let t12 = y9 & y11; |
829 | | // y6 -> stack |
830 | 0 | let y7 = u7 ^ y11; |
831 | 0 | let y8 = u0 ^ u5; |
832 | 0 | let t0 = u1 ^ u2; |
833 | 0 | let y10 = y15 ^ t0; |
834 | | // y15 -> stack |
835 | 0 | let y17 = y10 ^ y11; |
836 | | // y14 -> stack |
837 | 0 | let t13 = y14 & y17; |
838 | 0 | let t14 = t13 ^ t12; |
839 | | // y17 -> stack |
840 | 0 | let y19 = y10 ^ y8; |
841 | | // y10 -> stack |
842 | 0 | let t15 = y8 & y10; |
843 | 0 | let t16 = t15 ^ t12; |
844 | 0 | let y16 = t0 ^ y11; |
845 | | // y11 -> stack |
846 | 0 | let y21 = y13 ^ y16; |
847 | | // y13 -> stack |
848 | 0 | let t7 = y13 & y16; |
849 | | // y16 -> stack |
850 | 0 | let y18 = u0 ^ y16; |
851 | 0 | let y1 = t0 ^ u7; |
852 | 0 | let y4 = y1 ^ u3; |
853 | | // u7 -> stack |
854 | 0 | let t5 = y4 & u7; |
855 | 0 | let t6 = t5 ^ t2; |
856 | 0 | let t18 = t6 ^ t16; |
857 | 0 | let t22 = t18 ^ y19; |
858 | 0 | let y2 = y1 ^ u0; |
859 | 0 | let t10 = y2 & y7; |
860 | 0 | let t11 = t10 ^ t7; |
861 | 0 | let t20 = t11 ^ t16; |
862 | 0 | let t24 = t20 ^ y18; |
863 | 0 | let y5 = y1 ^ u6; |
864 | 0 | let t8 = y5 & y1; |
865 | 0 | let t9 = t8 ^ t7; |
866 | 0 | let t19 = t9 ^ t14; |
867 | 0 | let t23 = t19 ^ y21; |
868 | 0 | let y3 = y5 ^ y8; |
869 | | // y6 <- stack |
870 | 0 | let t3 = y3 & y6; |
871 | 0 | let t4 = t3 ^ t2; |
872 | | // y20 <- stack |
873 | 0 | let t17 = t4 ^ y20; |
874 | 0 | let t21 = t17 ^ t14; |
875 | 0 | let t26 = t21 & t23; |
876 | 0 | let t27 = t24 ^ t26; |
877 | 0 | let t31 = t22 ^ t26; |
878 | 0 | let t25 = t21 ^ t22; |
879 | | // y4 -> stack |
880 | 0 | let t28 = t25 & t27; |
881 | 0 | let t29 = t28 ^ t22; |
882 | 0 | let z14 = t29 & y2; |
883 | 0 | let z5 = t29 & y7; |
884 | 0 | let t30 = t23 ^ t24; |
885 | 0 | let t32 = t31 & t30; |
886 | 0 | let t33 = t32 ^ t24; |
887 | 0 | let t35 = t27 ^ t33; |
888 | 0 | let t36 = t24 & t35; |
889 | 0 | let t38 = t27 ^ t36; |
890 | 0 | let t39 = t29 & t38; |
891 | 0 | let t40 = t25 ^ t39; |
892 | 0 | let t43 = t29 ^ t40; |
893 | | // y16 <- stack |
894 | 0 | let z3 = t43 & y16; |
895 | 0 | let tc12 = z3 ^ z5; |
896 | | // tc12 -> stack |
897 | | // y13 <- stack |
898 | 0 | let z12 = t43 & y13; |
899 | 0 | let z13 = t40 & y5; |
900 | 0 | let z4 = t40 & y1; |
901 | 0 | let tc6 = z3 ^ z4; |
902 | 0 | let t34 = t23 ^ t33; |
903 | 0 | let t37 = t36 ^ t34; |
904 | 0 | let t41 = t40 ^ t37; |
905 | | // y10 <- stack |
906 | 0 | let z8 = t41 & y10; |
907 | 0 | let z17 = t41 & y8; |
908 | 0 | let t44 = t33 ^ t37; |
909 | | // y15 <- stack |
910 | 0 | let z0 = t44 & y15; |
911 | | // z17 -> stack |
912 | | // y12 <- stack |
913 | 0 | let z9 = t44 & y12; |
914 | 0 | let z10 = t37 & y3; |
915 | 0 | let z1 = t37 & y6; |
916 | 0 | let tc5 = z1 ^ z0; |
917 | 0 | let tc11 = tc6 ^ tc5; |
918 | | // y4 <- stack |
919 | 0 | let z11 = t33 & y4; |
920 | 0 | let t42 = t29 ^ t33; |
921 | 0 | let t45 = t42 ^ t41; |
922 | | // y17 <- stack |
923 | 0 | let z7 = t45 & y17; |
924 | 0 | let tc8 = z7 ^ tc6; |
925 | | // y14 <- stack |
926 | 0 | let z16 = t45 & y14; |
927 | | // y11 <- stack |
928 | 0 | let z6 = t42 & y11; |
929 | 0 | let tc16 = z6 ^ tc8; |
930 | | // z14 -> stack |
931 | | // y9 <- stack |
932 | 0 | let z15 = t42 & y9; |
933 | 0 | let tc20 = z15 ^ tc16; |
934 | 0 | let tc1 = z15 ^ z16; |
935 | 0 | let tc2 = z10 ^ tc1; |
936 | 0 | let tc21 = tc2 ^ z11; |
937 | 0 | let tc3 = z9 ^ tc2; |
938 | 0 | let s0 = tc3 ^ tc16; |
939 | 0 | let s3 = tc3 ^ tc11; |
940 | 0 | let s1 = s3 ^ tc16; |
941 | 0 | let tc13 = z13 ^ tc1; |
942 | | // u7 <- stack |
943 | 0 | let z2 = t33 & u7; |
944 | 0 | let tc4 = z0 ^ z2; |
945 | 0 | let tc7 = z12 ^ tc4; |
946 | 0 | let tc9 = z8 ^ tc7; |
947 | 0 | let tc10 = tc8 ^ tc9; |
948 | | // z14 <- stack |
949 | 0 | let tc17 = z14 ^ tc10; |
950 | 0 | let s5 = tc21 ^ tc17; |
951 | 0 | let tc26 = tc17 ^ tc20; |
952 | | // z17 <- stack |
953 | 0 | let s2 = tc26 ^ z17; |
954 | | // tc12 <- stack |
955 | 0 | let tc14 = tc4 ^ tc12; |
956 | 0 | let tc18 = tc13 ^ tc14; |
957 | 0 | let s6 = tc10 ^ tc18; |
958 | 0 | let s7 = z12 ^ tc18; |
959 | 0 | let s4 = tc14 ^ s3; |
960 | | |
961 | 0 | state[0] = s7; |
962 | 0 | state[1] = s6; |
963 | 0 | state[2] = s5; |
964 | 0 | state[3] = s4; |
965 | 0 | state[4] = s3; |
966 | 0 | state[5] = s2; |
967 | 0 | state[6] = s1; |
968 | 0 | state[7] = s0; |
969 | 0 | } |
970 | | |
971 | | /// NOT operations that are omitted in S-box |
972 | | #[inline] |
973 | 0 | fn sub_bytes_nots(state: &mut [u64]) { |
974 | 0 | debug_assert_eq!(state.len(), 8); |
975 | 0 | state[0] ^= 0xffffffffffffffff; |
976 | 0 | state[1] ^= 0xffffffffffffffff; |
977 | 0 | state[5] ^= 0xffffffffffffffff; |
978 | 0 | state[6] ^= 0xffffffffffffffff; |
979 | 0 | } |
980 | | |
981 | | /// Computation of the MixColumns transformation in the fixsliced representation, with different |
982 | | /// rotations used according to the round number mod 4. |
983 | | /// |
984 | | /// Based on Käsper-Schwabe, similar to https://github.com/Ko-/aes-armcortexm. |
985 | | macro_rules! define_mix_columns { |
986 | | ( |
987 | | $name:ident, |
988 | | $name_inv:ident, |
989 | | $first_rotate:path, |
990 | | $second_rotate:path |
991 | | ) => { |
992 | | #[rustfmt::skip] |
993 | 0 | fn $name(state: &mut State) { |
994 | 0 | let (a0, a1, a2, a3, a4, a5, a6, a7) = ( |
995 | 0 | state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7] |
996 | 0 | ); |
997 | 0 | let (b0, b1, b2, b3, b4, b5, b6, b7) = ( |
998 | 0 | $first_rotate(a0), |
999 | 0 | $first_rotate(a1), |
1000 | 0 | $first_rotate(a2), |
1001 | 0 | $first_rotate(a3), |
1002 | 0 | $first_rotate(a4), |
1003 | 0 | $first_rotate(a5), |
1004 | 0 | $first_rotate(a6), |
1005 | 0 | $first_rotate(a7), |
1006 | 0 | ); |
1007 | 0 | let (c0, c1, c2, c3, c4, c5, c6, c7) = ( |
1008 | 0 | a0 ^ b0, |
1009 | 0 | a1 ^ b1, |
1010 | 0 | a2 ^ b2, |
1011 | 0 | a3 ^ b3, |
1012 | 0 | a4 ^ b4, |
1013 | 0 | a5 ^ b5, |
1014 | 0 | a6 ^ b6, |
1015 | 0 | a7 ^ b7, |
1016 | 0 | ); |
1017 | 0 | state[0] = b0 ^ c7 ^ $second_rotate(c0); |
1018 | 0 | state[1] = b1 ^ c0 ^ c7 ^ $second_rotate(c1); |
1019 | 0 | state[2] = b2 ^ c1 ^ $second_rotate(c2); |
1020 | 0 | state[3] = b3 ^ c2 ^ c7 ^ $second_rotate(c3); |
1021 | 0 | state[4] = b4 ^ c3 ^ c7 ^ $second_rotate(c4); |
1022 | 0 | state[5] = b5 ^ c4 ^ $second_rotate(c5); |
1023 | 0 | state[6] = b6 ^ c5 ^ $second_rotate(c6); |
1024 | 0 | state[7] = b7 ^ c6 ^ $second_rotate(c7); |
1025 | 0 | } Unexecuted instantiation: aes::soft::fixslice::mix_columns_0 Unexecuted instantiation: aes::soft::fixslice::mix_columns_1 Unexecuted instantiation: aes::soft::fixslice::mix_columns_2 Unexecuted instantiation: aes::soft::fixslice::mix_columns_3 |
1026 | | |
1027 | | #[rustfmt::skip] |
1028 | 0 | fn $name_inv(state: &mut State) { |
1029 | 0 | let (a0, a1, a2, a3, a4, a5, a6, a7) = ( |
1030 | 0 | state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7] |
1031 | 0 | ); |
1032 | 0 | let (b0, b1, b2, b3, b4, b5, b6, b7) = ( |
1033 | 0 | $first_rotate(a0), |
1034 | 0 | $first_rotate(a1), |
1035 | 0 | $first_rotate(a2), |
1036 | 0 | $first_rotate(a3), |
1037 | 0 | $first_rotate(a4), |
1038 | 0 | $first_rotate(a5), |
1039 | 0 | $first_rotate(a6), |
1040 | 0 | $first_rotate(a7), |
1041 | 0 | ); |
1042 | 0 | let (c0, c1, c2, c3, c4, c5, c6, c7) = ( |
1043 | 0 | a0 ^ b0, |
1044 | 0 | a1 ^ b1, |
1045 | 0 | a2 ^ b2, |
1046 | 0 | a3 ^ b3, |
1047 | 0 | a4 ^ b4, |
1048 | 0 | a5 ^ b5, |
1049 | 0 | a6 ^ b6, |
1050 | 0 | a7 ^ b7, |
1051 | 0 | ); |
1052 | 0 | let (d0, d1, d2, d3, d4, d5, d6, d7) = ( |
1053 | 0 | a0 ^ c7, |
1054 | 0 | a1 ^ c0 ^ c7, |
1055 | 0 | a2 ^ c1, |
1056 | 0 | a3 ^ c2 ^ c7, |
1057 | 0 | a4 ^ c3 ^ c7, |
1058 | 0 | a5 ^ c4, |
1059 | 0 | a6 ^ c5, |
1060 | 0 | a7 ^ c6, |
1061 | 0 | ); |
1062 | 0 | let (e0, e1, e2, e3, e4, e5, e6, e7) = ( |
1063 | 0 | c0 ^ d6, |
1064 | 0 | c1 ^ d6 ^ d7, |
1065 | 0 | c2 ^ d0 ^ d7, |
1066 | 0 | c3 ^ d1 ^ d6, |
1067 | 0 | c4 ^ d2 ^ d6 ^ d7, |
1068 | 0 | c5 ^ d3 ^ d7, |
1069 | 0 | c6 ^ d4, |
1070 | 0 | c7 ^ d5, |
1071 | 0 | ); |
1072 | 0 | state[0] = d0 ^ e0 ^ $second_rotate(e0); |
1073 | 0 | state[1] = d1 ^ e1 ^ $second_rotate(e1); |
1074 | 0 | state[2] = d2 ^ e2 ^ $second_rotate(e2); |
1075 | 0 | state[3] = d3 ^ e3 ^ $second_rotate(e3); |
1076 | 0 | state[4] = d4 ^ e4 ^ $second_rotate(e4); |
1077 | 0 | state[5] = d5 ^ e5 ^ $second_rotate(e5); |
1078 | 0 | state[6] = d6 ^ e6 ^ $second_rotate(e6); |
1079 | 0 | state[7] = d7 ^ e7 ^ $second_rotate(e7); |
1080 | 0 | } Unexecuted instantiation: aes::soft::fixslice::inv_mix_columns_0 Unexecuted instantiation: aes::soft::fixslice::inv_mix_columns_1 Unexecuted instantiation: aes::soft::fixslice::inv_mix_columns_2 Unexecuted instantiation: aes::soft::fixslice::inv_mix_columns_3 |
1081 | | } |
1082 | | } |
1083 | | |
1084 | | define_mix_columns!( |
1085 | | mix_columns_0, |
1086 | | inv_mix_columns_0, |
1087 | | rotate_rows_1, |
1088 | | rotate_rows_2 |
1089 | | ); |
1090 | | |
1091 | | define_mix_columns!( |
1092 | | mix_columns_1, |
1093 | | inv_mix_columns_1, |
1094 | | rotate_rows_and_columns_1_1, |
1095 | | rotate_rows_and_columns_2_2 |
1096 | | ); |
1097 | | |
1098 | | #[cfg(not(aes_compact))] |
1099 | | define_mix_columns!( |
1100 | | mix_columns_2, |
1101 | | inv_mix_columns_2, |
1102 | | rotate_rows_and_columns_1_2, |
1103 | | rotate_rows_2 |
1104 | | ); |
1105 | | |
1106 | | #[cfg(not(aes_compact))] |
1107 | | define_mix_columns!( |
1108 | | mix_columns_3, |
1109 | | inv_mix_columns_3, |
1110 | | rotate_rows_and_columns_1_3, |
1111 | | rotate_rows_and_columns_2_2 |
1112 | | ); |
1113 | | |
1114 | | #[inline] |
1115 | 0 | fn delta_swap_1(a: &mut u64, shift: u32, mask: u64) { |
1116 | 0 | let t = (*a ^ ((*a) >> shift)) & mask; |
1117 | 0 | *a ^= t ^ (t << shift); |
1118 | 0 | } |
1119 | | |
1120 | | #[inline] |
1121 | 0 | fn delta_swap_2(a: &mut u64, b: &mut u64, shift: u32, mask: u64) { |
1122 | 0 | let t = (*a ^ ((*b) >> shift)) & mask; |
1123 | 0 | *a ^= t; |
1124 | 0 | *b ^= t << shift; |
1125 | 0 | } |
1126 | | |
1127 | | /// Applies ShiftRows once on an AES state (or key). |
1128 | | #[cfg(any(not(aes_compact), feature = "hazmat"))] |
1129 | | #[inline] |
1130 | 0 | fn shift_rows_1(state: &mut [u64]) { |
1131 | 0 | debug_assert_eq!(state.len(), 8); |
1132 | 0 | for x in state.iter_mut() { |
1133 | 0 | delta_swap_1(x, 8, 0x00f000ff000f0000); |
1134 | 0 | delta_swap_1(x, 4, 0x0f0f00000f0f0000); |
1135 | 0 | } |
1136 | 0 | } |
1137 | | |
1138 | | /// Applies ShiftRows twice on an AES state (or key). |
1139 | | #[inline] |
1140 | 0 | fn shift_rows_2(state: &mut [u64]) { |
1141 | 0 | debug_assert_eq!(state.len(), 8); |
1142 | 0 | for x in state.iter_mut() { |
1143 | 0 | delta_swap_1(x, 8, 0x00ff000000ff0000); |
1144 | 0 | } |
1145 | 0 | } |
1146 | | |
1147 | | /// Applies ShiftRows three times on an AES state (or key). |
1148 | | #[inline] |
1149 | 0 | fn shift_rows_3(state: &mut [u64]) { |
1150 | 0 | debug_assert_eq!(state.len(), 8); |
1151 | 0 | for x in state.iter_mut() { |
1152 | 0 | delta_swap_1(x, 8, 0x000f00ff00f00000); |
1153 | 0 | delta_swap_1(x, 4, 0x0f0f00000f0f0000); |
1154 | 0 | } |
1155 | 0 | } |
1156 | | |
1157 | | #[inline(always)] |
1158 | 0 | fn inv_shift_rows_1(state: &mut [u64]) { |
1159 | 0 | shift_rows_3(state); |
1160 | 0 | } |
1161 | | |
1162 | | #[inline(always)] |
1163 | 0 | fn inv_shift_rows_2(state: &mut [u64]) { |
1164 | 0 | shift_rows_2(state); |
1165 | 0 | } |
1166 | | |
1167 | | #[cfg(not(aes_compact))] |
1168 | | #[inline(always)] |
1169 | 0 | fn inv_shift_rows_3(state: &mut [u64]) { |
1170 | 0 | shift_rows_1(state); |
1171 | 0 | } |
1172 | | |
1173 | | /// XOR the columns after the S-box during the key schedule round function. |
1174 | | /// |
1175 | | /// The `idx_xor` parameter refers to the index of the previous round key that is |
1176 | | /// involved in the XOR computation (should be 8 and 16 for AES-128 and AES-256, |
1177 | | /// respectively). |
1178 | | /// |
1179 | | /// The `idx_ror` parameter refers to the rotation value, which varies between the |
1180 | | /// different key schedules. |
1181 | 0 | fn xor_columns(rkeys: &mut [u64], offset: usize, idx_xor: usize, idx_ror: u32) { |
1182 | 0 | for i in 0..8 { |
1183 | 0 | let off_i = offset + i; |
1184 | 0 | let rk = rkeys[off_i - idx_xor] ^ (0x000f000f000f000f & ror(rkeys[off_i], idx_ror)); |
1185 | 0 | rkeys[off_i] = rk |
1186 | 0 | ^ (0xfff0fff0fff0fff0 & (rk << 4)) |
1187 | 0 | ^ (0xff00ff00ff00ff00 & (rk << 8)) |
1188 | 0 | ^ (0xf000f000f000f000 & (rk << 12)); |
1189 | 0 | } |
1190 | 0 | } |
1191 | | |
1192 | | /// Bitslice four 128-bit input blocks input0, input1, input2, input3 into a 512-bit internal state. |
1193 | 0 | fn bitslice(output: &mut [u64], input0: &[u8], input1: &[u8], input2: &[u8], input3: &[u8]) { |
1194 | 0 | debug_assert_eq!(output.len(), 8); |
1195 | 0 | debug_assert_eq!(input0.len(), 16); |
1196 | 0 | debug_assert_eq!(input1.len(), 16); |
1197 | 0 | debug_assert_eq!(input2.len(), 16); |
1198 | 0 | debug_assert_eq!(input3.len(), 16); |
1199 | | |
1200 | | // Bitslicing is a bit index manipulation. 512 bits of data means each bit is positioned at a |
1201 | | // 9-bit index. AES data is 4 blocks, each one a 4x4 column-major matrix of bytes, so the |
1202 | | // index is initially ([b]lock, [c]olumn, [r]ow, [p]osition): |
1203 | | // b1 b0 c1 c0 r1 r0 p2 p1 p0 |
1204 | | // |
1205 | | // The desired bitsliced data groups first by bit position, then row, column, block: |
1206 | | // p2 p1 p0 r1 r0 c1 c0 b1 b0 |
1207 | | |
1208 | | #[rustfmt::skip] |
1209 | 0 | fn read_reordered(input: &[u8]) -> u64 { |
1210 | 0 | (u64::from(input[0x0]) ) | |
1211 | 0 | (u64::from(input[0x1]) << 0x10) | |
1212 | 0 | (u64::from(input[0x2]) << 0x20) | |
1213 | 0 | (u64::from(input[0x3]) << 0x30) | |
1214 | 0 | (u64::from(input[0x8]) << 0x08) | |
1215 | 0 | (u64::from(input[0x9]) << 0x18) | |
1216 | 0 | (u64::from(input[0xa]) << 0x28) | |
1217 | 0 | (u64::from(input[0xb]) << 0x38) |
1218 | 0 | } |
1219 | | |
1220 | | // Reorder each block's bytes on input |
1221 | | // __ __ c1 c0 r1 r0 __ __ __ => __ __ c0 r1 r0 c1 __ __ __ |
1222 | | // Reorder by relabeling (note the order of input) |
1223 | | // b1 b0 c0 __ __ __ __ __ __ => c0 b1 b0 __ __ __ __ __ __ |
1224 | 0 | let mut t0 = read_reordered(&input0[0x00..0x0c]); |
1225 | 0 | let mut t4 = read_reordered(&input0[0x04..0x10]); |
1226 | 0 | let mut t1 = read_reordered(&input1[0x00..0x0c]); |
1227 | 0 | let mut t5 = read_reordered(&input1[0x04..0x10]); |
1228 | 0 | let mut t2 = read_reordered(&input2[0x00..0x0c]); |
1229 | 0 | let mut t6 = read_reordered(&input2[0x04..0x10]); |
1230 | 0 | let mut t3 = read_reordered(&input3[0x00..0x0c]); |
1231 | 0 | let mut t7 = read_reordered(&input3[0x04..0x10]); |
1232 | | |
1233 | | // Bit Index Swap 6 <-> 0: |
1234 | | // __ __ b0 __ __ __ __ __ p0 => __ __ p0 __ __ __ __ __ b0 |
1235 | 0 | let m0 = 0x5555555555555555; |
1236 | 0 | delta_swap_2(&mut t1, &mut t0, 1, m0); |
1237 | 0 | delta_swap_2(&mut t3, &mut t2, 1, m0); |
1238 | 0 | delta_swap_2(&mut t5, &mut t4, 1, m0); |
1239 | 0 | delta_swap_2(&mut t7, &mut t6, 1, m0); |
1240 | | |
1241 | | // Bit Index Swap 7 <-> 1: |
1242 | | // __ b1 __ __ __ __ __ p1 __ => __ p1 __ __ __ __ __ b1 __ |
1243 | 0 | let m1 = 0x3333333333333333; |
1244 | 0 | delta_swap_2(&mut t2, &mut t0, 2, m1); |
1245 | 0 | delta_swap_2(&mut t3, &mut t1, 2, m1); |
1246 | 0 | delta_swap_2(&mut t6, &mut t4, 2, m1); |
1247 | 0 | delta_swap_2(&mut t7, &mut t5, 2, m1); |
1248 | | |
1249 | | // Bit Index Swap 8 <-> 2: |
1250 | | // c0 __ __ __ __ __ p2 __ __ => p2 __ __ __ __ __ c0 __ __ |
1251 | 0 | let m2 = 0x0f0f0f0f0f0f0f0f; |
1252 | 0 | delta_swap_2(&mut t4, &mut t0, 4, m2); |
1253 | 0 | delta_swap_2(&mut t5, &mut t1, 4, m2); |
1254 | 0 | delta_swap_2(&mut t6, &mut t2, 4, m2); |
1255 | 0 | delta_swap_2(&mut t7, &mut t3, 4, m2); |
1256 | | |
1257 | | // Final bitsliced bit index, as desired: |
1258 | | // p2 p1 p0 r1 r0 c1 c0 b1 b0 |
1259 | 0 | output[0] = t0; |
1260 | 0 | output[1] = t1; |
1261 | 0 | output[2] = t2; |
1262 | 0 | output[3] = t3; |
1263 | 0 | output[4] = t4; |
1264 | 0 | output[5] = t5; |
1265 | 0 | output[6] = t6; |
1266 | 0 | output[7] = t7; |
1267 | 0 | } |
1268 | | |
1269 | | /// Un-bitslice a 512-bit internal state into four 128-bit blocks of output. |
1270 | 0 | fn inv_bitslice(input: &[u64]) -> BatchBlocks { |
1271 | 0 | debug_assert_eq!(input.len(), 8); |
1272 | | |
1273 | | // Unbitslicing is a bit index manipulation. 512 bits of data means each bit is positioned at |
1274 | | // a 9-bit index. AES data is 4 blocks, each one a 4x4 column-major matrix of bytes, so the |
1275 | | // desired index for the output is ([b]lock, [c]olumn, [r]ow, [p]osition): |
1276 | | // b1 b0 c1 c0 r1 r0 p2 p1 p0 |
1277 | | // |
1278 | | // The initially bitsliced data groups first by bit position, then row, column, block: |
1279 | | // p2 p1 p0 r1 r0 c1 c0 b1 b0 |
1280 | | |
1281 | 0 | let mut t0 = input[0]; |
1282 | 0 | let mut t1 = input[1]; |
1283 | 0 | let mut t2 = input[2]; |
1284 | 0 | let mut t3 = input[3]; |
1285 | 0 | let mut t4 = input[4]; |
1286 | 0 | let mut t5 = input[5]; |
1287 | 0 | let mut t6 = input[6]; |
1288 | 0 | let mut t7 = input[7]; |
1289 | | |
1290 | | // TODO: these bit index swaps are identical to those in 'packing' |
1291 | | |
1292 | | // Bit Index Swap 6 <-> 0: |
1293 | | // __ __ p0 __ __ __ __ __ b0 => __ __ b0 __ __ __ __ __ p0 |
1294 | 0 | let m0 = 0x5555555555555555; |
1295 | 0 | delta_swap_2(&mut t1, &mut t0, 1, m0); |
1296 | 0 | delta_swap_2(&mut t3, &mut t2, 1, m0); |
1297 | 0 | delta_swap_2(&mut t5, &mut t4, 1, m0); |
1298 | 0 | delta_swap_2(&mut t7, &mut t6, 1, m0); |
1299 | | |
1300 | | // Bit Index Swap 7 <-> 1: |
1301 | | // __ p1 __ __ __ __ __ b1 __ => __ b1 __ __ __ __ __ p1 __ |
1302 | 0 | let m1 = 0x3333333333333333; |
1303 | 0 | delta_swap_2(&mut t2, &mut t0, 2, m1); |
1304 | 0 | delta_swap_2(&mut t3, &mut t1, 2, m1); |
1305 | 0 | delta_swap_2(&mut t6, &mut t4, 2, m1); |
1306 | 0 | delta_swap_2(&mut t7, &mut t5, 2, m1); |
1307 | | |
1308 | | // Bit Index Swap 8 <-> 2: |
1309 | | // p2 __ __ __ __ __ c0 __ __ => c0 __ __ __ __ __ p2 __ __ |
1310 | 0 | let m2 = 0x0f0f0f0f0f0f0f0f; |
1311 | 0 | delta_swap_2(&mut t4, &mut t0, 4, m2); |
1312 | 0 | delta_swap_2(&mut t5, &mut t1, 4, m2); |
1313 | 0 | delta_swap_2(&mut t6, &mut t2, 4, m2); |
1314 | 0 | delta_swap_2(&mut t7, &mut t3, 4, m2); |
1315 | | |
1316 | | #[rustfmt::skip] |
1317 | 0 | fn write_reordered(columns: u64, output: &mut [u8]) { |
1318 | 0 | output[0x0] = (columns ) as u8; |
1319 | 0 | output[0x1] = (columns >> 0x10) as u8; |
1320 | 0 | output[0x2] = (columns >> 0x20) as u8; |
1321 | 0 | output[0x3] = (columns >> 0x30) as u8; |
1322 | 0 | output[0x8] = (columns >> 0x08) as u8; |
1323 | 0 | output[0x9] = (columns >> 0x18) as u8; |
1324 | 0 | output[0xa] = (columns >> 0x28) as u8; |
1325 | 0 | output[0xb] = (columns >> 0x38) as u8; |
1326 | 0 | } |
1327 | | |
1328 | 0 | let mut output = BatchBlocks::default(); |
1329 | | // Reorder by relabeling (note the order of output) |
1330 | | // c0 b1 b0 __ __ __ __ __ __ => b1 b0 c0 __ __ __ __ __ __ |
1331 | | // Reorder each block's bytes on output |
1332 | | // __ __ c0 r1 r0 c1 __ __ __ => __ __ c1 c0 r1 r0 __ __ __ |
1333 | 0 | write_reordered(t0, &mut output[0][0x00..0x0c]); |
1334 | 0 | write_reordered(t4, &mut output[0][0x04..0x10]); |
1335 | 0 | write_reordered(t1, &mut output[1][0x00..0x0c]); |
1336 | 0 | write_reordered(t5, &mut output[1][0x04..0x10]); |
1337 | 0 | write_reordered(t2, &mut output[2][0x00..0x0c]); |
1338 | 0 | write_reordered(t6, &mut output[2][0x04..0x10]); |
1339 | 0 | write_reordered(t3, &mut output[3][0x00..0x0c]); |
1340 | 0 | write_reordered(t7, &mut output[3][0x04..0x10]); |
1341 | | |
1342 | | // Final AES bit index, as desired: |
1343 | | // b1 b0 c1 c0 r1 r0 p2 p1 p0 |
1344 | 0 | output |
1345 | 0 | } |
1346 | | |
1347 | | /// Copy 32-bytes within the provided slice to an 8-byte offset |
1348 | 0 | fn memshift32(buffer: &mut [u64], src_offset: usize) { |
1349 | 0 | debug_assert_eq!(src_offset % 8, 0); |
1350 | | |
1351 | 0 | let dst_offset = src_offset + 8; |
1352 | 0 | debug_assert!(dst_offset + 8 <= buffer.len()); |
1353 | | |
1354 | 0 | for i in (0..8).rev() { |
1355 | 0 | buffer[dst_offset + i] = buffer[src_offset + i]; |
1356 | 0 | } |
1357 | 0 | } |
1358 | | |
1359 | | /// XOR the round key to the internal state. The round keys are expected to be |
1360 | | /// pre-computed and to be packed in the fixsliced representation. |
1361 | | #[inline] |
1362 | 0 | fn add_round_key(state: &mut State, rkey: &[u64]) { |
1363 | 0 | debug_assert_eq!(rkey.len(), 8); |
1364 | 0 | for (a, b) in state.iter_mut().zip(rkey) { |
1365 | 0 | *a ^= b; |
1366 | 0 | } |
1367 | 0 | } |
1368 | | |
1369 | | #[inline(always)] |
1370 | 0 | fn add_round_constant_bit(state: &mut [u64], bit: usize) { |
1371 | 0 | state[bit] ^= 0x00000000f0000000; |
1372 | 0 | } |
1373 | | |
1374 | | #[inline(always)] |
1375 | 0 | fn ror(x: u64, y: u32) -> u64 { |
1376 | 0 | x.rotate_right(y) |
1377 | 0 | } |
1378 | | |
1379 | | #[inline(always)] |
1380 | 0 | fn ror_distance(rows: u32, cols: u32) -> u32 { |
1381 | 0 | (rows << 4) + (cols << 2) |
1382 | 0 | } |
1383 | | |
1384 | | #[inline(always)] |
1385 | 0 | fn rotate_rows_1(x: u64) -> u64 { |
1386 | 0 | ror(x, ror_distance(1, 0)) |
1387 | 0 | } |
1388 | | |
1389 | | #[inline(always)] |
1390 | 0 | fn rotate_rows_2(x: u64) -> u64 { |
1391 | 0 | ror(x, ror_distance(2, 0)) |
1392 | 0 | } |
1393 | | |
1394 | | #[inline(always)] |
1395 | | #[rustfmt::skip] |
1396 | 0 | fn rotate_rows_and_columns_1_1(x: u64) -> u64 { |
1397 | 0 | (ror(x, ror_distance(1, 1)) & 0x0fff0fff0fff0fff) | |
1398 | 0 | (ror(x, ror_distance(0, 1)) & 0xf000f000f000f000) |
1399 | 0 | } |
1400 | | |
1401 | | #[cfg(not(aes_compact))] |
1402 | | #[inline(always)] |
1403 | | #[rustfmt::skip] |
1404 | 0 | fn rotate_rows_and_columns_1_2(x: u64) -> u64 { |
1405 | 0 | (ror(x, ror_distance(1, 2)) & 0x00ff00ff00ff00ff) | |
1406 | 0 | (ror(x, ror_distance(0, 2)) & 0xff00ff00ff00ff00) |
1407 | 0 | } |
1408 | | |
1409 | | #[cfg(not(aes_compact))] |
1410 | | #[inline(always)] |
1411 | | #[rustfmt::skip] |
1412 | 0 | fn rotate_rows_and_columns_1_3(x: u64) -> u64 { |
1413 | 0 | (ror(x, ror_distance(1, 3)) & 0x000f000f000f000f) | |
1414 | 0 | (ror(x, ror_distance(0, 3)) & 0xfff0fff0fff0fff0) |
1415 | 0 | } |
1416 | | |
1417 | | #[inline(always)] |
1418 | | #[rustfmt::skip] |
1419 | 0 | fn rotate_rows_and_columns_2_2(x: u64) -> u64 { |
1420 | 0 | (ror(x, ror_distance(2, 2)) & 0x00ff00ff00ff00ff) | |
1421 | 0 | (ror(x, ror_distance(1, 2)) & 0xff00ff00ff00ff00) |
1422 | 0 | } |
1423 | | |
1424 | | /// Low-level "hazmat" AES functions. |
1425 | | /// |
1426 | | /// Note: this isn't actually used in the `Aes128`/`Aes192`/`Aes256` |
1427 | | /// implementations in this crate, but instead provides raw access to |
1428 | | /// the AES round function gated under the `hazmat` crate feature. |
1429 | | #[cfg(feature = "hazmat")] |
1430 | | pub(crate) mod hazmat { |
1431 | | use super::{ |
1432 | | bitslice, inv_bitslice, inv_mix_columns_0, inv_shift_rows_1, inv_sub_bytes, mix_columns_0, |
1433 | | shift_rows_1, sub_bytes, sub_bytes_nots, State, |
1434 | | }; |
1435 | | use crate::{Block, Block8}; |
1436 | | |
1437 | | /// XOR the `src` block into the `dst` block in-place. |
1438 | | fn xor_in_place(dst: &mut Block, src: &Block) { |
1439 | | for (a, b) in dst.iter_mut().zip(src.as_slice()) { |
1440 | | *a ^= *b; |
1441 | | } |
1442 | | } |
1443 | | |
1444 | | /// Perform a bitslice operation, loading a single block. |
1445 | | fn bitslice_block(block: &Block) -> State { |
1446 | | let mut state = State::default(); |
1447 | | bitslice(&mut state, block, block, block, block); |
1448 | | state |
1449 | | } |
1450 | | |
1451 | | /// Perform an inverse bitslice operation, extracting a single block. |
1452 | | fn inv_bitslice_block(block: &mut Block, state: &State) { |
1453 | | block.copy_from_slice(&inv_bitslice(state)[0]); |
1454 | | } |
1455 | | |
1456 | | /// AES cipher (encrypt) round function. |
1457 | | #[inline] |
1458 | | pub(crate) fn cipher_round(block: &mut Block, round_key: &Block) { |
1459 | | let mut state = bitslice_block(block); |
1460 | | sub_bytes(&mut state); |
1461 | | sub_bytes_nots(&mut state); |
1462 | | shift_rows_1(&mut state); |
1463 | | mix_columns_0(&mut state); |
1464 | | inv_bitslice_block(block, &state); |
1465 | | xor_in_place(block, round_key); |
1466 | | } |
1467 | | |
1468 | | /// AES cipher (encrypt) round function: parallel version. |
1469 | | #[inline] |
1470 | | pub(crate) fn cipher_round_par(blocks: &mut Block8, round_keys: &Block8) { |
1471 | | for (chunk, keys) in blocks.chunks_exact_mut(4).zip(round_keys.chunks_exact(4)) { |
1472 | | let mut state = State::default(); |
1473 | | bitslice(&mut state, &chunk[0], &chunk[1], &chunk[2], &chunk[3]); |
1474 | | sub_bytes(&mut state); |
1475 | | sub_bytes_nots(&mut state); |
1476 | | shift_rows_1(&mut state); |
1477 | | mix_columns_0(&mut state); |
1478 | | let res = inv_bitslice(&state); |
1479 | | |
1480 | | for i in 0..4 { |
1481 | | chunk[i] = res[i]; |
1482 | | xor_in_place(&mut chunk[i], &keys[i]); |
1483 | | } |
1484 | | } |
1485 | | } |
1486 | | |
1487 | | /// AES cipher (encrypt) round function. |
1488 | | #[inline] |
1489 | | pub(crate) fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block) { |
1490 | | let mut state = State::default(); |
1491 | | bitslice(&mut state, block, block, block, block); |
1492 | | sub_bytes_nots(&mut state); |
1493 | | inv_sub_bytes(&mut state); |
1494 | | inv_shift_rows_1(&mut state); |
1495 | | inv_mix_columns_0(&mut state); |
1496 | | inv_bitslice_block(block, &state); |
1497 | | xor_in_place(block, round_key); |
1498 | | } |
1499 | | |
1500 | | /// AES cipher (encrypt) round function: parallel version. |
1501 | | #[inline] |
1502 | | pub(crate) fn equiv_inv_cipher_round_par(blocks: &mut Block8, round_keys: &Block8) { |
1503 | | for (chunk, keys) in blocks.chunks_exact_mut(4).zip(round_keys.chunks_exact(4)) { |
1504 | | let mut state = State::default(); |
1505 | | bitslice(&mut state, &chunk[0], &chunk[1], &chunk[2], &chunk[3]); |
1506 | | sub_bytes_nots(&mut state); |
1507 | | inv_sub_bytes(&mut state); |
1508 | | inv_shift_rows_1(&mut state); |
1509 | | inv_mix_columns_0(&mut state); |
1510 | | let res = inv_bitslice(&state); |
1511 | | |
1512 | | for i in 0..4 { |
1513 | | chunk[i] = res[i]; |
1514 | | xor_in_place(&mut chunk[i], &keys[i]); |
1515 | | } |
1516 | | } |
1517 | | } |
1518 | | |
1519 | | /// AES mix columns function. |
1520 | | #[inline] |
1521 | | pub(crate) fn mix_columns(block: &mut Block) { |
1522 | | let mut state = bitslice_block(block); |
1523 | | mix_columns_0(&mut state); |
1524 | | inv_bitslice_block(block, &state); |
1525 | | } |
1526 | | |
1527 | | /// AES inverse mix columns function. |
1528 | | #[inline] |
1529 | | pub(crate) fn inv_mix_columns(block: &mut Block) { |
1530 | | let mut state = bitslice_block(block); |
1531 | | inv_mix_columns_0(&mut state); |
1532 | | inv_bitslice_block(block, &state); |
1533 | | } |
1534 | | } |