Coverage Report

Created: 2025-12-31 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/aes-0.8.4/src/soft/fixslice64.rs
Line
Count
Source
1
//! Fixsliced implementations of AES-128, AES-192 and AES-256 (64-bit)
2
//! adapted from the C implementation.
3
//!
4
//! All implementations are fully bitsliced and do not rely on any
5
//! Look-Up Table (LUT).
6
//!
7
//! See the paper at <https://eprint.iacr.org/2020/1123.pdf> for more details.
8
//!
9
//! # Author (original C code)
10
//!
11
//! Alexandre Adomnicai, Nanyang Technological University, Singapore
12
//! <alexandre.adomnicai@ntu.edu.sg>
13
//!
14
//! Originally licensed MIT. Relicensed as Apache 2.0+MIT with permission.
15
16
#![allow(clippy::unreadable_literal)]
17
18
use crate::Block;
19
use cipher::{consts::U4, generic_array::GenericArray};
20
21
/// AES block batch size for this implementation
22
pub(crate) type FixsliceBlocks = U4;
23
24
pub(crate) type BatchBlocks = GenericArray<Block, FixsliceBlocks>;
25
26
/// AES-128 round keys
27
pub(crate) type FixsliceKeys128 = [u64; 88];
28
29
/// AES-192 round keys
30
pub(crate) type FixsliceKeys192 = [u64; 104];
31
32
/// AES-256 round keys
33
pub(crate) type FixsliceKeys256 = [u64; 120];
34
35
/// 512-bit internal state
36
pub(crate) type State = [u64; 8];
37
38
/// Fully bitsliced AES-128 key schedule to match the fully-fixsliced representation.
39
0
pub(crate) fn aes128_key_schedule(key: &[u8; 16]) -> FixsliceKeys128 {
40
0
    let mut rkeys = [0u64; 88];
41
42
0
    bitslice(&mut rkeys[..8], key, key, key, key);
43
44
0
    let mut rk_off = 0;
45
0
    for rcon in 0..10 {
46
0
        memshift32(&mut rkeys, rk_off);
47
0
        rk_off += 8;
48
49
0
        sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]);
50
0
        sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]);
51
52
0
        if rcon < 8 {
53
0
            add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon);
54
0
        } else {
55
0
            add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 8);
56
0
            add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 7);
57
0
            add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 5);
58
0
            add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 4);
59
0
        }
60
61
0
        xor_columns(&mut rkeys, rk_off, 8, ror_distance(1, 3));
62
    }
63
64
    // Adjust to match fixslicing format
65
    #[cfg(aes_compact)]
66
    {
67
        for i in (8..88).step_by(16) {
68
            inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
69
        }
70
    }
71
    #[cfg(not(aes_compact))]
72
    {
73
0
        for i in (8..72).step_by(32) {
74
0
            inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
75
0
            inv_shift_rows_2(&mut rkeys[(i + 8)..(i + 16)]);
76
0
            inv_shift_rows_3(&mut rkeys[(i + 16)..(i + 24)]);
77
0
        }
78
0
        inv_shift_rows_1(&mut rkeys[72..80]);
79
    }
80
81
    // Account for NOTs removed from sub_bytes
82
0
    for i in 1..11 {
83
0
        sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]);
84
0
    }
85
86
0
    rkeys
87
0
}
88
89
/// Fully bitsliced AES-192 key schedule to match the fully-fixsliced representation.
90
0
pub(crate) fn aes192_key_schedule(key: &[u8; 24]) -> FixsliceKeys192 {
91
0
    let mut rkeys = [0u64; 104];
92
0
    let mut tmp = [0u64; 8];
93
94
0
    bitslice(
95
0
        &mut rkeys[..8],
96
0
        &key[..16],
97
0
        &key[..16],
98
0
        &key[..16],
99
0
        &key[..16],
100
    );
101
0
    bitslice(&mut tmp, &key[8..], &key[8..], &key[8..], &key[8..]);
102
103
0
    let mut rcon = 0;
104
0
    let mut rk_off = 8;
105
106
    loop {
107
0
        for i in 0..8 {
108
0
            rkeys[rk_off + i] = (0x00ff00ff00ff00ff & (tmp[i] >> 8))
109
0
                | (0xff00ff00ff00ff00 & (rkeys[(rk_off - 8) + i] << 8));
110
0
        }
111
112
0
        sub_bytes(&mut tmp);
113
0
        sub_bytes_nots(&mut tmp);
114
115
0
        add_round_constant_bit(&mut tmp, rcon);
116
0
        rcon += 1;
117
118
0
        for i in 0..8 {
119
0
            let mut ti = rkeys[rk_off + i];
120
0
            ti ^= 0x0f000f000f000f00 & ror(tmp[i], ror_distance(1, 1));
121
0
            ti ^= 0xf000f000f000f000 & (ti << 4);
122
0
            tmp[i] = ti;
123
0
        }
124
0
        rkeys[rk_off..(rk_off + 8)].copy_from_slice(&tmp);
125
0
        rk_off += 8;
126
127
0
        for i in 0..8 {
128
0
            let ui = tmp[i];
129
0
            let mut ti = (0x00ff00ff00ff00ff & (rkeys[(rk_off - 16) + i] >> 8))
130
0
                | (0xff00ff00ff00ff00 & (ui << 8));
131
0
            ti ^= 0x000f000f000f000f & (ui >> 12);
132
0
            tmp[i] = ti
133
0
                ^ (0xfff0fff0fff0fff0 & (ti << 4))
134
0
                ^ (0xff00ff00ff00ff00 & (ti << 8))
135
0
                ^ (0xf000f000f000f000 & (ti << 12));
136
0
        }
137
0
        rkeys[rk_off..(rk_off + 8)].copy_from_slice(&tmp);
138
0
        rk_off += 8;
139
140
0
        sub_bytes(&mut tmp);
141
0
        sub_bytes_nots(&mut tmp);
142
143
0
        add_round_constant_bit(&mut tmp, rcon);
144
0
        rcon += 1;
145
146
0
        for i in 0..8 {
147
0
            let mut ti = (0x00ff00ff00ff00ff & (rkeys[(rk_off - 16) + i] >> 8))
148
0
                | (0xff00ff00ff00ff00 & (rkeys[(rk_off - 8) + i] << 8));
149
0
            ti ^= 0x000f000f000f000f & ror(tmp[i], ror_distance(1, 3));
150
0
            rkeys[rk_off + i] = ti
151
0
                ^ (0xfff0fff0fff0fff0 & (ti << 4))
152
0
                ^ (0xff00ff00ff00ff00 & (ti << 8))
153
0
                ^ (0xf000f000f000f000 & (ti << 12));
154
0
        }
155
0
        rk_off += 8;
156
157
0
        if rcon >= 8 {
158
0
            break;
159
0
        }
160
161
0
        for i in 0..8 {
162
0
            let ui = rkeys[(rk_off - 8) + i];
163
0
            let mut ti = rkeys[(rk_off - 16) + i];
164
0
            ti ^= 0x0f000f000f000f00 & (ui >> 4);
165
0
            ti ^= 0xf000f000f000f000 & (ti << 4);
166
0
            tmp[i] = ti;
167
0
        }
168
    }
169
170
    // Adjust to match fixslicing format
171
    #[cfg(aes_compact)]
172
    {
173
        for i in (8..104).step_by(16) {
174
            inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
175
        }
176
    }
177
    #[cfg(not(aes_compact))]
178
    {
179
0
        for i in (0..96).step_by(32) {
180
0
            inv_shift_rows_1(&mut rkeys[(i + 8)..(i + 16)]);
181
0
            inv_shift_rows_2(&mut rkeys[(i + 16)..(i + 24)]);
182
0
            inv_shift_rows_3(&mut rkeys[(i + 24)..(i + 32)]);
183
0
        }
184
    }
185
186
    // Account for NOTs removed from sub_bytes
187
0
    for i in 1..13 {
188
0
        sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]);
189
0
    }
190
191
0
    rkeys
192
0
}
193
194
/// Fully bitsliced AES-256 key schedule to match the fully-fixsliced representation.
195
0
pub(crate) fn aes256_key_schedule(key: &[u8; 32]) -> FixsliceKeys256 {
196
0
    let mut rkeys = [0u64; 120];
197
198
0
    bitslice(
199
0
        &mut rkeys[..8],
200
0
        &key[..16],
201
0
        &key[..16],
202
0
        &key[..16],
203
0
        &key[..16],
204
    );
205
0
    bitslice(
206
0
        &mut rkeys[8..16],
207
0
        &key[16..],
208
0
        &key[16..],
209
0
        &key[16..],
210
0
        &key[16..],
211
    );
212
213
0
    let mut rk_off = 8;
214
215
0
    let mut rcon = 0;
216
    loop {
217
0
        memshift32(&mut rkeys, rk_off);
218
0
        rk_off += 8;
219
220
0
        sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]);
221
0
        sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]);
222
223
0
        add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon);
224
0
        xor_columns(&mut rkeys, rk_off, 16, ror_distance(1, 3));
225
0
        rcon += 1;
226
227
0
        if rcon == 7 {
228
0
            break;
229
0
        }
230
231
0
        memshift32(&mut rkeys, rk_off);
232
0
        rk_off += 8;
233
234
0
        sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]);
235
0
        sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]);
236
237
0
        xor_columns(&mut rkeys, rk_off, 16, ror_distance(0, 3));
238
    }
239
240
    // Adjust to match fixslicing format
241
    #[cfg(aes_compact)]
242
    {
243
        for i in (8..120).step_by(16) {
244
            inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
245
        }
246
    }
247
    #[cfg(not(aes_compact))]
248
    {
249
0
        for i in (8..104).step_by(32) {
250
0
            inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
251
0
            inv_shift_rows_2(&mut rkeys[(i + 8)..(i + 16)]);
252
0
            inv_shift_rows_3(&mut rkeys[(i + 16)..(i + 24)]);
253
0
        }
254
0
        inv_shift_rows_1(&mut rkeys[104..112]);
255
    }
256
257
    // Account for NOTs removed from sub_bytes
258
0
    for i in 1..15 {
259
0
        sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]);
260
0
    }
261
262
0
    rkeys
263
0
}
264
265
/// Fully-fixsliced AES-128 decryption (the InvShiftRows is completely omitted).
266
///
267
/// Decrypts four blocks in-place and in parallel.
268
0
pub(crate) fn aes128_decrypt(rkeys: &FixsliceKeys128, blocks: &BatchBlocks) -> BatchBlocks {
269
0
    let mut state = State::default();
270
271
0
    bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
272
273
0
    add_round_key(&mut state, &rkeys[80..]);
274
0
    inv_sub_bytes(&mut state);
275
276
    #[cfg(not(aes_compact))]
277
0
    {
278
0
        inv_shift_rows_2(&mut state);
279
0
    }
280
281
0
    let mut rk_off = 72;
282
    loop {
283
        #[cfg(aes_compact)]
284
        {
285
            inv_shift_rows_2(&mut state);
286
        }
287
288
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
289
0
        inv_mix_columns_1(&mut state);
290
0
        inv_sub_bytes(&mut state);
291
0
        rk_off -= 8;
292
293
0
        if rk_off == 0 {
294
0
            break;
295
0
        }
296
297
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
298
0
        inv_mix_columns_0(&mut state);
299
0
        inv_sub_bytes(&mut state);
300
0
        rk_off -= 8;
301
302
        #[cfg(not(aes_compact))]
303
0
        {
304
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
305
0
            inv_mix_columns_3(&mut state);
306
0
            inv_sub_bytes(&mut state);
307
0
            rk_off -= 8;
308
0
309
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
310
0
            inv_mix_columns_2(&mut state);
311
0
            inv_sub_bytes(&mut state);
312
0
            rk_off -= 8;
313
0
        }
314
    }
315
316
0
    add_round_key(&mut state, &rkeys[..8]);
317
318
0
    inv_bitslice(&state)
319
0
}
320
321
/// Fully-fixsliced AES-128 encryption (the ShiftRows is completely omitted).
322
///
323
/// Encrypts four blocks in-place and in parallel.
324
0
pub(crate) fn aes128_encrypt(rkeys: &FixsliceKeys128, blocks: &BatchBlocks) -> BatchBlocks {
325
0
    let mut state = State::default();
326
327
0
    bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
328
329
0
    add_round_key(&mut state, &rkeys[..8]);
330
331
0
    let mut rk_off = 8;
332
    loop {
333
0
        sub_bytes(&mut state);
334
0
        mix_columns_1(&mut state);
335
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
336
0
        rk_off += 8;
337
338
        #[cfg(aes_compact)]
339
        {
340
            shift_rows_2(&mut state);
341
        }
342
343
0
        if rk_off == 80 {
344
0
            break;
345
0
        }
346
347
        #[cfg(not(aes_compact))]
348
0
        {
349
0
            sub_bytes(&mut state);
350
0
            mix_columns_2(&mut state);
351
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
352
0
            rk_off += 8;
353
0
354
0
            sub_bytes(&mut state);
355
0
            mix_columns_3(&mut state);
356
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
357
0
            rk_off += 8;
358
0
        }
359
360
0
        sub_bytes(&mut state);
361
0
        mix_columns_0(&mut state);
362
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
363
0
        rk_off += 8;
364
    }
365
366
    #[cfg(not(aes_compact))]
367
0
    {
368
0
        shift_rows_2(&mut state);
369
0
    }
370
371
0
    sub_bytes(&mut state);
372
0
    add_round_key(&mut state, &rkeys[80..]);
373
374
0
    inv_bitslice(&state)
375
0
}
376
377
/// Fully-fixsliced AES-192 decryption (the InvShiftRows is completely omitted).
378
///
379
/// Decrypts four blocks in-place and in parallel.
380
0
pub(crate) fn aes192_decrypt(rkeys: &FixsliceKeys192, blocks: &BatchBlocks) -> BatchBlocks {
381
0
    let mut state = State::default();
382
383
0
    bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
384
385
0
    add_round_key(&mut state, &rkeys[96..]);
386
0
    inv_sub_bytes(&mut state);
387
388
0
    let mut rk_off = 88;
389
    loop {
390
        #[cfg(aes_compact)]
391
        {
392
            inv_shift_rows_2(&mut state);
393
        }
394
        #[cfg(not(aes_compact))]
395
0
        {
396
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
397
0
            inv_mix_columns_3(&mut state);
398
0
            inv_sub_bytes(&mut state);
399
0
            rk_off -= 8;
400
0
401
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
402
0
            inv_mix_columns_2(&mut state);
403
0
            inv_sub_bytes(&mut state);
404
0
            rk_off -= 8;
405
0
        }
406
407
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
408
0
        inv_mix_columns_1(&mut state);
409
0
        inv_sub_bytes(&mut state);
410
0
        rk_off -= 8;
411
412
0
        if rk_off == 0 {
413
0
            break;
414
0
        }
415
416
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
417
0
        inv_mix_columns_0(&mut state);
418
0
        inv_sub_bytes(&mut state);
419
0
        rk_off -= 8;
420
    }
421
422
0
    add_round_key(&mut state, &rkeys[..8]);
423
424
0
    inv_bitslice(&state)
425
0
}
426
427
/// Fully-fixsliced AES-192 encryption (the ShiftRows is completely omitted).
428
///
429
/// Encrypts four blocks in-place and in parallel.
430
0
pub(crate) fn aes192_encrypt(rkeys: &FixsliceKeys192, blocks: &BatchBlocks) -> BatchBlocks {
431
0
    let mut state = State::default();
432
433
0
    bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
434
435
0
    add_round_key(&mut state, &rkeys[..8]);
436
437
0
    let mut rk_off = 8;
438
    loop {
439
0
        sub_bytes(&mut state);
440
0
        mix_columns_1(&mut state);
441
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
442
0
        rk_off += 8;
443
444
        #[cfg(aes_compact)]
445
        {
446
            shift_rows_2(&mut state);
447
        }
448
        #[cfg(not(aes_compact))]
449
0
        {
450
0
            sub_bytes(&mut state);
451
0
            mix_columns_2(&mut state);
452
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
453
0
            rk_off += 8;
454
0
455
0
            sub_bytes(&mut state);
456
0
            mix_columns_3(&mut state);
457
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
458
0
            rk_off += 8;
459
0
        }
460
461
0
        if rk_off == 96 {
462
0
            break;
463
0
        }
464
465
0
        sub_bytes(&mut state);
466
0
        mix_columns_0(&mut state);
467
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
468
0
        rk_off += 8;
469
    }
470
471
0
    sub_bytes(&mut state);
472
0
    add_round_key(&mut state, &rkeys[96..]);
473
474
0
    inv_bitslice(&state)
475
0
}
476
477
/// Fully-fixsliced AES-256 decryption (the InvShiftRows is completely omitted).
478
///
479
/// Decrypts four blocks in-place and in parallel.
480
0
pub(crate) fn aes256_decrypt(rkeys: &FixsliceKeys256, blocks: &BatchBlocks) -> BatchBlocks {
481
0
    let mut state = State::default();
482
483
0
    bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
484
485
0
    add_round_key(&mut state, &rkeys[112..]);
486
0
    inv_sub_bytes(&mut state);
487
488
    #[cfg(not(aes_compact))]
489
0
    {
490
0
        inv_shift_rows_2(&mut state);
491
0
    }
492
493
0
    let mut rk_off = 104;
494
    loop {
495
        #[cfg(aes_compact)]
496
        {
497
            inv_shift_rows_2(&mut state);
498
        }
499
500
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
501
0
        inv_mix_columns_1(&mut state);
502
0
        inv_sub_bytes(&mut state);
503
0
        rk_off -= 8;
504
505
0
        if rk_off == 0 {
506
0
            break;
507
0
        }
508
509
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
510
0
        inv_mix_columns_0(&mut state);
511
0
        inv_sub_bytes(&mut state);
512
0
        rk_off -= 8;
513
514
        #[cfg(not(aes_compact))]
515
0
        {
516
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
517
0
            inv_mix_columns_3(&mut state);
518
0
            inv_sub_bytes(&mut state);
519
0
            rk_off -= 8;
520
0
521
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
522
0
            inv_mix_columns_2(&mut state);
523
0
            inv_sub_bytes(&mut state);
524
0
            rk_off -= 8;
525
0
        }
526
    }
527
528
0
    add_round_key(&mut state, &rkeys[..8]);
529
530
0
    inv_bitslice(&state)
531
0
}
532
533
/// Fully-fixsliced AES-256 encryption (the ShiftRows is completely omitted).
534
///
535
/// Encrypts four blocks in-place and in parallel.
536
0
pub(crate) fn aes256_encrypt(rkeys: &FixsliceKeys256, blocks: &BatchBlocks) -> BatchBlocks {
537
0
    let mut state = State::default();
538
539
0
    bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
540
541
0
    add_round_key(&mut state, &rkeys[..8]);
542
543
0
    let mut rk_off = 8;
544
    loop {
545
0
        sub_bytes(&mut state);
546
0
        mix_columns_1(&mut state);
547
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
548
0
        rk_off += 8;
549
550
        #[cfg(aes_compact)]
551
        {
552
            shift_rows_2(&mut state);
553
        }
554
555
0
        if rk_off == 112 {
556
0
            break;
557
0
        }
558
559
        #[cfg(not(aes_compact))]
560
0
        {
561
0
            sub_bytes(&mut state);
562
0
            mix_columns_2(&mut state);
563
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
564
0
            rk_off += 8;
565
0
566
0
            sub_bytes(&mut state);
567
0
            mix_columns_3(&mut state);
568
0
            add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
569
0
            rk_off += 8;
570
0
        }
571
572
0
        sub_bytes(&mut state);
573
0
        mix_columns_0(&mut state);
574
0
        add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
575
0
        rk_off += 8;
576
    }
577
578
    #[cfg(not(aes_compact))]
579
0
    {
580
0
        shift_rows_2(&mut state);
581
0
    }
582
583
0
    sub_bytes(&mut state);
584
0
    add_round_key(&mut state, &rkeys[112..]);
585
586
0
    inv_bitslice(&state)
587
0
}
588
589
/// Note that the 4 bitwise NOT (^= 0xffffffffffffffff) are accounted for here so that it is a true
590
/// inverse of 'sub_bytes'.
591
0
fn inv_sub_bytes(state: &mut [u64]) {
592
0
    debug_assert_eq!(state.len(), 8);
593
594
    // Scheduled using https://github.com/Ko-/aes-armcortexm/tree/public/scheduler
595
    // Inline "stack" comments reflect suggested stores and loads (ARM Cortex-M3 and M4)
596
597
0
    let u7 = state[0];
598
0
    let u6 = state[1];
599
0
    let u5 = state[2];
600
0
    let u4 = state[3];
601
0
    let u3 = state[4];
602
0
    let u2 = state[5];
603
0
    let u1 = state[6];
604
0
    let u0 = state[7];
605
606
0
    let t23 = u0 ^ u3;
607
0
    let t8 = u1 ^ t23;
608
0
    let m2 = t23 & t8;
609
0
    let t4 = u4 ^ t8;
610
0
    let t22 = u1 ^ u3;
611
0
    let t2 = u0 ^ u1;
612
0
    let t1 = u3 ^ u4;
613
    // t23 -> stack
614
0
    let t9 = u7 ^ t1;
615
    // t8 -> stack
616
0
    let m7 = t22 & t9;
617
    // t9 -> stack
618
0
    let t24 = u4 ^ u7;
619
    // m7 -> stack
620
0
    let t10 = t2 ^ t24;
621
    // u4 -> stack
622
0
    let m14 = t2 & t10;
623
0
    let r5 = u6 ^ u7;
624
    // m2 -> stack
625
0
    let t3 = t1 ^ r5;
626
    // t2 -> stack
627
0
    let t13 = t2 ^ r5;
628
0
    let t19 = t22 ^ r5;
629
    // t3 -> stack
630
0
    let t17 = u2 ^ t19;
631
    // t4 -> stack
632
0
    let t25 = u2 ^ t1;
633
0
    let r13 = u1 ^ u6;
634
    // t25 -> stack
635
0
    let t20 = t24 ^ r13;
636
    // t17 -> stack
637
0
    let m9 = t20 & t17;
638
    // t20 -> stack
639
0
    let r17 = u2 ^ u5;
640
    // t22 -> stack
641
0
    let t6 = t22 ^ r17;
642
    // t13 -> stack
643
0
    let m1 = t13 & t6;
644
0
    let y5 = u0 ^ r17;
645
0
    let m4 = t19 & y5;
646
0
    let m5 = m4 ^ m1;
647
0
    let m17 = m5 ^ t24;
648
0
    let r18 = u5 ^ u6;
649
0
    let t27 = t1 ^ r18;
650
0
    let t15 = t10 ^ t27;
651
    // t6 -> stack
652
0
    let m11 = t1 & t15;
653
0
    let m15 = m14 ^ m11;
654
0
    let m21 = m17 ^ m15;
655
    // t1 -> stack
656
    // t4 <- stack
657
0
    let m12 = t4 & t27;
658
0
    let m13 = m12 ^ m11;
659
0
    let t14 = t10 ^ r18;
660
0
    let m3 = t14 ^ m1;
661
    // m2 <- stack
662
0
    let m16 = m3 ^ m2;
663
0
    let m20 = m16 ^ m13;
664
    // u4 <- stack
665
0
    let r19 = u2 ^ u4;
666
0
    let t16 = r13 ^ r19;
667
    // t3 <- stack
668
0
    let t26 = t3 ^ t16;
669
0
    let m6 = t3 & t16;
670
0
    let m8 = t26 ^ m6;
671
    // t10 -> stack
672
    // m7 <- stack
673
0
    let m18 = m8 ^ m7;
674
0
    let m22 = m18 ^ m13;
675
0
    let m25 = m22 & m20;
676
0
    let m26 = m21 ^ m25;
677
0
    let m10 = m9 ^ m6;
678
0
    let m19 = m10 ^ m15;
679
    // t25 <- stack
680
0
    let m23 = m19 ^ t25;
681
0
    let m28 = m23 ^ m25;
682
0
    let m24 = m22 ^ m23;
683
0
    let m30 = m26 & m24;
684
0
    let m39 = m23 ^ m30;
685
0
    let m48 = m39 & y5;
686
0
    let m57 = m39 & t19;
687
    // m48 -> stack
688
0
    let m36 = m24 ^ m25;
689
0
    let m31 = m20 & m23;
690
0
    let m27 = m20 ^ m21;
691
0
    let m32 = m27 & m31;
692
0
    let m29 = m28 & m27;
693
0
    let m37 = m21 ^ m29;
694
    // m39 -> stack
695
0
    let m42 = m37 ^ m39;
696
0
    let m52 = m42 & t15;
697
    // t27 -> stack
698
    // t1 <- stack
699
0
    let m61 = m42 & t1;
700
0
    let p0 = m52 ^ m61;
701
0
    let p16 = m57 ^ m61;
702
    // m57 -> stack
703
    // t20 <- stack
704
0
    let m60 = m37 & t20;
705
    // p16 -> stack
706
    // t17 <- stack
707
0
    let m51 = m37 & t17;
708
0
    let m33 = m27 ^ m25;
709
0
    let m38 = m32 ^ m33;
710
0
    let m43 = m37 ^ m38;
711
0
    let m49 = m43 & t16;
712
0
    let p6 = m49 ^ m60;
713
0
    let p13 = m49 ^ m51;
714
0
    let m58 = m43 & t3;
715
    // t9 <- stack
716
0
    let m50 = m38 & t9;
717
    // t22 <- stack
718
0
    let m59 = m38 & t22;
719
    // p6 -> stack
720
0
    let p1 = m58 ^ m59;
721
0
    let p7 = p0 ^ p1;
722
0
    let m34 = m21 & m22;
723
0
    let m35 = m24 & m34;
724
0
    let m40 = m35 ^ m36;
725
0
    let m41 = m38 ^ m40;
726
0
    let m45 = m42 ^ m41;
727
    // t27 <- stack
728
0
    let m53 = m45 & t27;
729
0
    let p8 = m50 ^ m53;
730
0
    let p23 = p7 ^ p8;
731
    // t4 <- stack
732
0
    let m62 = m45 & t4;
733
0
    let p14 = m49 ^ m62;
734
0
    let s6 = p14 ^ p23;
735
    // t10 <- stack
736
0
    let m54 = m41 & t10;
737
0
    let p2 = m54 ^ m62;
738
0
    let p22 = p2 ^ p7;
739
0
    let s0 = p13 ^ p22;
740
0
    let p17 = m58 ^ p2;
741
0
    let p15 = m54 ^ m59;
742
    // t2 <- stack
743
0
    let m63 = m41 & t2;
744
    // m39 <- stack
745
0
    let m44 = m39 ^ m40;
746
    // p17 -> stack
747
    // t6 <- stack
748
0
    let m46 = m44 & t6;
749
0
    let p5 = m46 ^ m51;
750
    // p23 -> stack
751
0
    let p18 = m63 ^ p5;
752
0
    let p24 = p5 ^ p7;
753
    // m48 <- stack
754
0
    let p12 = m46 ^ m48;
755
0
    let s3 = p12 ^ p22;
756
    // t13 <- stack
757
0
    let m55 = m44 & t13;
758
0
    let p9 = m55 ^ m63;
759
    // p16 <- stack
760
0
    let s7 = p9 ^ p16;
761
    // t8 <- stack
762
0
    let m47 = m40 & t8;
763
0
    let p3 = m47 ^ m50;
764
0
    let p19 = p2 ^ p3;
765
0
    let s5 = p19 ^ p24;
766
0
    let p11 = p0 ^ p3;
767
0
    let p26 = p9 ^ p11;
768
    // t23 <- stack
769
0
    let m56 = m40 & t23;
770
0
    let p4 = m48 ^ m56;
771
    // p6 <- stack
772
0
    let p20 = p4 ^ p6;
773
0
    let p29 = p15 ^ p20;
774
0
    let s1 = p26 ^ p29;
775
    // m57 <- stack
776
0
    let p10 = m57 ^ p4;
777
0
    let p27 = p10 ^ p18;
778
    // p23 <- stack
779
0
    let s4 = p23 ^ p27;
780
0
    let p25 = p6 ^ p10;
781
0
    let p28 = p11 ^ p25;
782
    // p17 <- stack
783
0
    let s2 = p17 ^ p28;
784
785
0
    state[0] = s7;
786
0
    state[1] = s6;
787
0
    state[2] = s5;
788
0
    state[3] = s4;
789
0
    state[4] = s3;
790
0
    state[5] = s2;
791
0
    state[6] = s1;
792
0
    state[7] = s0;
793
0
}
794
795
/// Bitsliced implementation of the AES Sbox based on Boyar, Peralta and Calik.
796
///
797
/// See: <http://www.cs.yale.edu/homes/peralta/CircuitStuff/SLP_AES_113.txt>
798
///
799
/// Note that the 4 bitwise NOT (^= 0xffffffffffffffff) are moved to the key schedule.
800
0
fn sub_bytes(state: &mut [u64]) {
801
0
    debug_assert_eq!(state.len(), 8);
802
803
    // Scheduled using https://github.com/Ko-/aes-armcortexm/tree/public/scheduler
804
    // Inline "stack" comments reflect suggested stores and loads (ARM Cortex-M3 and M4)
805
806
0
    let u7 = state[0];
807
0
    let u6 = state[1];
808
0
    let u5 = state[2];
809
0
    let u4 = state[3];
810
0
    let u3 = state[4];
811
0
    let u2 = state[5];
812
0
    let u1 = state[6];
813
0
    let u0 = state[7];
814
815
0
    let y14 = u3 ^ u5;
816
0
    let y13 = u0 ^ u6;
817
0
    let y12 = y13 ^ y14;
818
0
    let t1 = u4 ^ y12;
819
0
    let y15 = t1 ^ u5;
820
0
    let t2 = y12 & y15;
821
0
    let y6 = y15 ^ u7;
822
0
    let y20 = t1 ^ u1;
823
    // y12 -> stack
824
0
    let y9 = u0 ^ u3;
825
    // y20 -> stack
826
0
    let y11 = y20 ^ y9;
827
    // y9 -> stack
828
0
    let t12 = y9 & y11;
829
    // y6 -> stack
830
0
    let y7 = u7 ^ y11;
831
0
    let y8 = u0 ^ u5;
832
0
    let t0 = u1 ^ u2;
833
0
    let y10 = y15 ^ t0;
834
    // y15 -> stack
835
0
    let y17 = y10 ^ y11;
836
    // y14 -> stack
837
0
    let t13 = y14 & y17;
838
0
    let t14 = t13 ^ t12;
839
    // y17 -> stack
840
0
    let y19 = y10 ^ y8;
841
    // y10 -> stack
842
0
    let t15 = y8 & y10;
843
0
    let t16 = t15 ^ t12;
844
0
    let y16 = t0 ^ y11;
845
    // y11 -> stack
846
0
    let y21 = y13 ^ y16;
847
    // y13 -> stack
848
0
    let t7 = y13 & y16;
849
    // y16 -> stack
850
0
    let y18 = u0 ^ y16;
851
0
    let y1 = t0 ^ u7;
852
0
    let y4 = y1 ^ u3;
853
    // u7 -> stack
854
0
    let t5 = y4 & u7;
855
0
    let t6 = t5 ^ t2;
856
0
    let t18 = t6 ^ t16;
857
0
    let t22 = t18 ^ y19;
858
0
    let y2 = y1 ^ u0;
859
0
    let t10 = y2 & y7;
860
0
    let t11 = t10 ^ t7;
861
0
    let t20 = t11 ^ t16;
862
0
    let t24 = t20 ^ y18;
863
0
    let y5 = y1 ^ u6;
864
0
    let t8 = y5 & y1;
865
0
    let t9 = t8 ^ t7;
866
0
    let t19 = t9 ^ t14;
867
0
    let t23 = t19 ^ y21;
868
0
    let y3 = y5 ^ y8;
869
    // y6 <- stack
870
0
    let t3 = y3 & y6;
871
0
    let t4 = t3 ^ t2;
872
    // y20 <- stack
873
0
    let t17 = t4 ^ y20;
874
0
    let t21 = t17 ^ t14;
875
0
    let t26 = t21 & t23;
876
0
    let t27 = t24 ^ t26;
877
0
    let t31 = t22 ^ t26;
878
0
    let t25 = t21 ^ t22;
879
    // y4 -> stack
880
0
    let t28 = t25 & t27;
881
0
    let t29 = t28 ^ t22;
882
0
    let z14 = t29 & y2;
883
0
    let z5 = t29 & y7;
884
0
    let t30 = t23 ^ t24;
885
0
    let t32 = t31 & t30;
886
0
    let t33 = t32 ^ t24;
887
0
    let t35 = t27 ^ t33;
888
0
    let t36 = t24 & t35;
889
0
    let t38 = t27 ^ t36;
890
0
    let t39 = t29 & t38;
891
0
    let t40 = t25 ^ t39;
892
0
    let t43 = t29 ^ t40;
893
    // y16 <- stack
894
0
    let z3 = t43 & y16;
895
0
    let tc12 = z3 ^ z5;
896
    // tc12 -> stack
897
    // y13 <- stack
898
0
    let z12 = t43 & y13;
899
0
    let z13 = t40 & y5;
900
0
    let z4 = t40 & y1;
901
0
    let tc6 = z3 ^ z4;
902
0
    let t34 = t23 ^ t33;
903
0
    let t37 = t36 ^ t34;
904
0
    let t41 = t40 ^ t37;
905
    // y10 <- stack
906
0
    let z8 = t41 & y10;
907
0
    let z17 = t41 & y8;
908
0
    let t44 = t33 ^ t37;
909
    // y15 <- stack
910
0
    let z0 = t44 & y15;
911
    // z17 -> stack
912
    // y12 <- stack
913
0
    let z9 = t44 & y12;
914
0
    let z10 = t37 & y3;
915
0
    let z1 = t37 & y6;
916
0
    let tc5 = z1 ^ z0;
917
0
    let tc11 = tc6 ^ tc5;
918
    // y4 <- stack
919
0
    let z11 = t33 & y4;
920
0
    let t42 = t29 ^ t33;
921
0
    let t45 = t42 ^ t41;
922
    // y17 <- stack
923
0
    let z7 = t45 & y17;
924
0
    let tc8 = z7 ^ tc6;
925
    // y14 <- stack
926
0
    let z16 = t45 & y14;
927
    // y11 <- stack
928
0
    let z6 = t42 & y11;
929
0
    let tc16 = z6 ^ tc8;
930
    // z14 -> stack
931
    // y9 <- stack
932
0
    let z15 = t42 & y9;
933
0
    let tc20 = z15 ^ tc16;
934
0
    let tc1 = z15 ^ z16;
935
0
    let tc2 = z10 ^ tc1;
936
0
    let tc21 = tc2 ^ z11;
937
0
    let tc3 = z9 ^ tc2;
938
0
    let s0 = tc3 ^ tc16;
939
0
    let s3 = tc3 ^ tc11;
940
0
    let s1 = s3 ^ tc16;
941
0
    let tc13 = z13 ^ tc1;
942
    // u7 <- stack
943
0
    let z2 = t33 & u7;
944
0
    let tc4 = z0 ^ z2;
945
0
    let tc7 = z12 ^ tc4;
946
0
    let tc9 = z8 ^ tc7;
947
0
    let tc10 = tc8 ^ tc9;
948
    // z14 <- stack
949
0
    let tc17 = z14 ^ tc10;
950
0
    let s5 = tc21 ^ tc17;
951
0
    let tc26 = tc17 ^ tc20;
952
    // z17 <- stack
953
0
    let s2 = tc26 ^ z17;
954
    // tc12 <- stack
955
0
    let tc14 = tc4 ^ tc12;
956
0
    let tc18 = tc13 ^ tc14;
957
0
    let s6 = tc10 ^ tc18;
958
0
    let s7 = z12 ^ tc18;
959
0
    let s4 = tc14 ^ s3;
960
961
0
    state[0] = s7;
962
0
    state[1] = s6;
963
0
    state[2] = s5;
964
0
    state[3] = s4;
965
0
    state[4] = s3;
966
0
    state[5] = s2;
967
0
    state[6] = s1;
968
0
    state[7] = s0;
969
0
}
970
971
/// NOT operations that are omitted in S-box
972
#[inline]
973
0
fn sub_bytes_nots(state: &mut [u64]) {
974
0
    debug_assert_eq!(state.len(), 8);
975
0
    state[0] ^= 0xffffffffffffffff;
976
0
    state[1] ^= 0xffffffffffffffff;
977
0
    state[5] ^= 0xffffffffffffffff;
978
0
    state[6] ^= 0xffffffffffffffff;
979
0
}
980
981
/// Computation of the MixColumns transformation in the fixsliced representation, with different
982
/// rotations used according to the round number mod 4.
983
///
984
/// Based on Käsper-Schwabe, similar to https://github.com/Ko-/aes-armcortexm.
985
macro_rules! define_mix_columns {
986
    (
987
        $name:ident,
988
        $name_inv:ident,
989
        $first_rotate:path,
990
        $second_rotate:path
991
    ) => {
992
        #[rustfmt::skip]
993
0
        fn $name(state: &mut State) {
994
0
            let (a0, a1, a2, a3, a4, a5, a6, a7) = (
995
0
                state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7]
996
0
            );
997
0
            let (b0, b1, b2, b3, b4, b5, b6, b7) = (
998
0
                $first_rotate(a0),
999
0
                $first_rotate(a1),
1000
0
                $first_rotate(a2),
1001
0
                $first_rotate(a3),
1002
0
                $first_rotate(a4),
1003
0
                $first_rotate(a5),
1004
0
                $first_rotate(a6),
1005
0
                $first_rotate(a7),
1006
0
            );
1007
0
            let (c0, c1, c2, c3, c4, c5, c6, c7) = (
1008
0
                a0 ^ b0,
1009
0
                a1 ^ b1,
1010
0
                a2 ^ b2,
1011
0
                a3 ^ b3,
1012
0
                a4 ^ b4,
1013
0
                a5 ^ b5,
1014
0
                a6 ^ b6,
1015
0
                a7 ^ b7,
1016
0
            );
1017
0
            state[0] = b0      ^ c7 ^ $second_rotate(c0);
1018
0
            state[1] = b1 ^ c0 ^ c7 ^ $second_rotate(c1);
1019
0
            state[2] = b2 ^ c1      ^ $second_rotate(c2);
1020
0
            state[3] = b3 ^ c2 ^ c7 ^ $second_rotate(c3);
1021
0
            state[4] = b4 ^ c3 ^ c7 ^ $second_rotate(c4);
1022
0
            state[5] = b5 ^ c4      ^ $second_rotate(c5);
1023
0
            state[6] = b6 ^ c5      ^ $second_rotate(c6);
1024
0
            state[7] = b7 ^ c6      ^ $second_rotate(c7);
1025
0
        }
Unexecuted instantiation: aes::soft::fixslice::mix_columns_0
Unexecuted instantiation: aes::soft::fixslice::mix_columns_1
Unexecuted instantiation: aes::soft::fixslice::mix_columns_2
Unexecuted instantiation: aes::soft::fixslice::mix_columns_3
1026
1027
        #[rustfmt::skip]
1028
0
        fn $name_inv(state: &mut State) {
1029
0
            let (a0, a1, a2, a3, a4, a5, a6, a7) = (
1030
0
                state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7]
1031
0
            );
1032
0
            let (b0, b1, b2, b3, b4, b5, b6, b7) = (
1033
0
                $first_rotate(a0),
1034
0
                $first_rotate(a1),
1035
0
                $first_rotate(a2),
1036
0
                $first_rotate(a3),
1037
0
                $first_rotate(a4),
1038
0
                $first_rotate(a5),
1039
0
                $first_rotate(a6),
1040
0
                $first_rotate(a7),
1041
0
            );
1042
0
            let (c0, c1, c2, c3, c4, c5, c6, c7) = (
1043
0
                a0 ^ b0,
1044
0
                a1 ^ b1,
1045
0
                a2 ^ b2,
1046
0
                a3 ^ b3,
1047
0
                a4 ^ b4,
1048
0
                a5 ^ b5,
1049
0
                a6 ^ b6,
1050
0
                a7 ^ b7,
1051
0
            );
1052
0
            let (d0, d1, d2, d3, d4, d5, d6, d7) = (
1053
0
                a0      ^ c7,
1054
0
                a1 ^ c0 ^ c7,
1055
0
                a2 ^ c1,
1056
0
                a3 ^ c2 ^ c7,
1057
0
                a4 ^ c3 ^ c7,
1058
0
                a5 ^ c4,
1059
0
                a6 ^ c5,
1060
0
                a7 ^ c6,
1061
0
            );
1062
0
            let (e0, e1, e2, e3, e4, e5, e6, e7) = (
1063
0
                c0      ^ d6,
1064
0
                c1      ^ d6 ^ d7,
1065
0
                c2 ^ d0      ^ d7,
1066
0
                c3 ^ d1 ^ d6,
1067
0
                c4 ^ d2 ^ d6 ^ d7,
1068
0
                c5 ^ d3      ^ d7,
1069
0
                c6 ^ d4,
1070
0
                c7 ^ d5,
1071
0
            );
1072
0
            state[0] = d0 ^ e0 ^ $second_rotate(e0);
1073
0
            state[1] = d1 ^ e1 ^ $second_rotate(e1);
1074
0
            state[2] = d2 ^ e2 ^ $second_rotate(e2);
1075
0
            state[3] = d3 ^ e3 ^ $second_rotate(e3);
1076
0
            state[4] = d4 ^ e4 ^ $second_rotate(e4);
1077
0
            state[5] = d5 ^ e5 ^ $second_rotate(e5);
1078
0
            state[6] = d6 ^ e6 ^ $second_rotate(e6);
1079
0
            state[7] = d7 ^ e7 ^ $second_rotate(e7);
1080
0
        }
Unexecuted instantiation: aes::soft::fixslice::inv_mix_columns_0
Unexecuted instantiation: aes::soft::fixslice::inv_mix_columns_1
Unexecuted instantiation: aes::soft::fixslice::inv_mix_columns_2
Unexecuted instantiation: aes::soft::fixslice::inv_mix_columns_3
1081
    }
1082
}
1083
1084
define_mix_columns!(
1085
    mix_columns_0,
1086
    inv_mix_columns_0,
1087
    rotate_rows_1,
1088
    rotate_rows_2
1089
);
1090
1091
define_mix_columns!(
1092
    mix_columns_1,
1093
    inv_mix_columns_1,
1094
    rotate_rows_and_columns_1_1,
1095
    rotate_rows_and_columns_2_2
1096
);
1097
1098
#[cfg(not(aes_compact))]
1099
define_mix_columns!(
1100
    mix_columns_2,
1101
    inv_mix_columns_2,
1102
    rotate_rows_and_columns_1_2,
1103
    rotate_rows_2
1104
);
1105
1106
#[cfg(not(aes_compact))]
1107
define_mix_columns!(
1108
    mix_columns_3,
1109
    inv_mix_columns_3,
1110
    rotate_rows_and_columns_1_3,
1111
    rotate_rows_and_columns_2_2
1112
);
1113
1114
#[inline]
1115
0
fn delta_swap_1(a: &mut u64, shift: u32, mask: u64) {
1116
0
    let t = (*a ^ ((*a) >> shift)) & mask;
1117
0
    *a ^= t ^ (t << shift);
1118
0
}
1119
1120
#[inline]
1121
0
fn delta_swap_2(a: &mut u64, b: &mut u64, shift: u32, mask: u64) {
1122
0
    let t = (*a ^ ((*b) >> shift)) & mask;
1123
0
    *a ^= t;
1124
0
    *b ^= t << shift;
1125
0
}
1126
1127
/// Applies ShiftRows once on an AES state (or key).
1128
#[cfg(any(not(aes_compact), feature = "hazmat"))]
1129
#[inline]
1130
0
fn shift_rows_1(state: &mut [u64]) {
1131
0
    debug_assert_eq!(state.len(), 8);
1132
0
    for x in state.iter_mut() {
1133
0
        delta_swap_1(x, 8, 0x00f000ff000f0000);
1134
0
        delta_swap_1(x, 4, 0x0f0f00000f0f0000);
1135
0
    }
1136
0
}
1137
1138
/// Applies ShiftRows twice on an AES state (or key).
1139
#[inline]
1140
0
fn shift_rows_2(state: &mut [u64]) {
1141
0
    debug_assert_eq!(state.len(), 8);
1142
0
    for x in state.iter_mut() {
1143
0
        delta_swap_1(x, 8, 0x00ff000000ff0000);
1144
0
    }
1145
0
}
1146
1147
/// Applies ShiftRows three times on an AES state (or key).
1148
#[inline]
1149
0
fn shift_rows_3(state: &mut [u64]) {
1150
0
    debug_assert_eq!(state.len(), 8);
1151
0
    for x in state.iter_mut() {
1152
0
        delta_swap_1(x, 8, 0x000f00ff00f00000);
1153
0
        delta_swap_1(x, 4, 0x0f0f00000f0f0000);
1154
0
    }
1155
0
}
1156
1157
#[inline(always)]
1158
0
fn inv_shift_rows_1(state: &mut [u64]) {
1159
0
    shift_rows_3(state);
1160
0
}
1161
1162
#[inline(always)]
1163
0
fn inv_shift_rows_2(state: &mut [u64]) {
1164
0
    shift_rows_2(state);
1165
0
}
1166
1167
#[cfg(not(aes_compact))]
1168
#[inline(always)]
1169
0
fn inv_shift_rows_3(state: &mut [u64]) {
1170
0
    shift_rows_1(state);
1171
0
}
1172
1173
/// XOR the columns after the S-box during the key schedule round function.
1174
///
1175
/// The `idx_xor` parameter refers to the index of the previous round key that is
1176
/// involved in the XOR computation (should be 8 and 16 for AES-128 and AES-256,
1177
/// respectively).
1178
///
1179
/// The `idx_ror` parameter refers to the rotation value, which varies between the
1180
/// different key schedules.
1181
0
fn xor_columns(rkeys: &mut [u64], offset: usize, idx_xor: usize, idx_ror: u32) {
1182
0
    for i in 0..8 {
1183
0
        let off_i = offset + i;
1184
0
        let rk = rkeys[off_i - idx_xor] ^ (0x000f000f000f000f & ror(rkeys[off_i], idx_ror));
1185
0
        rkeys[off_i] = rk
1186
0
            ^ (0xfff0fff0fff0fff0 & (rk << 4))
1187
0
            ^ (0xff00ff00ff00ff00 & (rk << 8))
1188
0
            ^ (0xf000f000f000f000 & (rk << 12));
1189
0
    }
1190
0
}
1191
1192
/// Bitslice four 128-bit input blocks input0, input1, input2, input3 into a 512-bit internal state.
1193
0
fn bitslice(output: &mut [u64], input0: &[u8], input1: &[u8], input2: &[u8], input3: &[u8]) {
1194
0
    debug_assert_eq!(output.len(), 8);
1195
0
    debug_assert_eq!(input0.len(), 16);
1196
0
    debug_assert_eq!(input1.len(), 16);
1197
0
    debug_assert_eq!(input2.len(), 16);
1198
0
    debug_assert_eq!(input3.len(), 16);
1199
1200
    // Bitslicing is a bit index manipulation. 512 bits of data means each bit is positioned at a
1201
    // 9-bit index. AES data is 4 blocks, each one a 4x4 column-major matrix of bytes, so the
1202
    // index is initially ([b]lock, [c]olumn, [r]ow, [p]osition):
1203
    //     b1 b0 c1 c0 r1 r0 p2 p1 p0
1204
    //
1205
    // The desired bitsliced data groups first by bit position, then row, column, block:
1206
    //     p2 p1 p0 r1 r0 c1 c0 b1 b0
1207
1208
    #[rustfmt::skip]
1209
0
    fn read_reordered(input: &[u8]) -> u64 {
1210
0
        (u64::from(input[0x0])        ) |
1211
0
        (u64::from(input[0x1]) << 0x10) |
1212
0
        (u64::from(input[0x2]) << 0x20) |
1213
0
        (u64::from(input[0x3]) << 0x30) |
1214
0
        (u64::from(input[0x8]) << 0x08) |
1215
0
        (u64::from(input[0x9]) << 0x18) |
1216
0
        (u64::from(input[0xa]) << 0x28) |
1217
0
        (u64::from(input[0xb]) << 0x38)
1218
0
    }
1219
1220
    // Reorder each block's bytes on input
1221
    //     __ __ c1 c0 r1 r0 __ __ __ => __ __ c0 r1 r0 c1 __ __ __
1222
    // Reorder by relabeling (note the order of input)
1223
    //     b1 b0 c0 __ __ __ __ __ __ => c0 b1 b0 __ __ __ __ __ __
1224
0
    let mut t0 = read_reordered(&input0[0x00..0x0c]);
1225
0
    let mut t4 = read_reordered(&input0[0x04..0x10]);
1226
0
    let mut t1 = read_reordered(&input1[0x00..0x0c]);
1227
0
    let mut t5 = read_reordered(&input1[0x04..0x10]);
1228
0
    let mut t2 = read_reordered(&input2[0x00..0x0c]);
1229
0
    let mut t6 = read_reordered(&input2[0x04..0x10]);
1230
0
    let mut t3 = read_reordered(&input3[0x00..0x0c]);
1231
0
    let mut t7 = read_reordered(&input3[0x04..0x10]);
1232
1233
    // Bit Index Swap 6 <-> 0:
1234
    //     __ __ b0 __ __ __ __ __ p0 => __ __ p0 __ __ __ __ __ b0
1235
0
    let m0 = 0x5555555555555555;
1236
0
    delta_swap_2(&mut t1, &mut t0, 1, m0);
1237
0
    delta_swap_2(&mut t3, &mut t2, 1, m0);
1238
0
    delta_swap_2(&mut t5, &mut t4, 1, m0);
1239
0
    delta_swap_2(&mut t7, &mut t6, 1, m0);
1240
1241
    // Bit Index Swap 7 <-> 1:
1242
    //     __ b1 __ __ __ __ __ p1 __ => __ p1 __ __ __ __ __ b1 __
1243
0
    let m1 = 0x3333333333333333;
1244
0
    delta_swap_2(&mut t2, &mut t0, 2, m1);
1245
0
    delta_swap_2(&mut t3, &mut t1, 2, m1);
1246
0
    delta_swap_2(&mut t6, &mut t4, 2, m1);
1247
0
    delta_swap_2(&mut t7, &mut t5, 2, m1);
1248
1249
    // Bit Index Swap 8 <-> 2:
1250
    //     c0 __ __ __ __ __ p2 __ __ => p2 __ __ __ __ __ c0 __ __
1251
0
    let m2 = 0x0f0f0f0f0f0f0f0f;
1252
0
    delta_swap_2(&mut t4, &mut t0, 4, m2);
1253
0
    delta_swap_2(&mut t5, &mut t1, 4, m2);
1254
0
    delta_swap_2(&mut t6, &mut t2, 4, m2);
1255
0
    delta_swap_2(&mut t7, &mut t3, 4, m2);
1256
1257
    // Final bitsliced bit index, as desired:
1258
    //     p2 p1 p0 r1 r0 c1 c0 b1 b0
1259
0
    output[0] = t0;
1260
0
    output[1] = t1;
1261
0
    output[2] = t2;
1262
0
    output[3] = t3;
1263
0
    output[4] = t4;
1264
0
    output[5] = t5;
1265
0
    output[6] = t6;
1266
0
    output[7] = t7;
1267
0
}
1268
1269
/// Un-bitslice a 512-bit internal state into four 128-bit blocks of output.
1270
0
fn inv_bitslice(input: &[u64]) -> BatchBlocks {
1271
0
    debug_assert_eq!(input.len(), 8);
1272
1273
    // Unbitslicing is a bit index manipulation. 512 bits of data means each bit is positioned at
1274
    // a 9-bit index. AES data is 4 blocks, each one a 4x4 column-major matrix of bytes, so the
1275
    // desired index for the output is ([b]lock, [c]olumn, [r]ow, [p]osition):
1276
    //     b1 b0 c1 c0 r1 r0 p2 p1 p0
1277
    //
1278
    // The initially bitsliced data groups first by bit position, then row, column, block:
1279
    //     p2 p1 p0 r1 r0 c1 c0 b1 b0
1280
1281
0
    let mut t0 = input[0];
1282
0
    let mut t1 = input[1];
1283
0
    let mut t2 = input[2];
1284
0
    let mut t3 = input[3];
1285
0
    let mut t4 = input[4];
1286
0
    let mut t5 = input[5];
1287
0
    let mut t6 = input[6];
1288
0
    let mut t7 = input[7];
1289
1290
    // TODO: these bit index swaps are identical to those in 'packing'
1291
1292
    // Bit Index Swap 6 <-> 0:
1293
    //     __ __ p0 __ __ __ __ __ b0 => __ __ b0 __ __ __ __ __ p0
1294
0
    let m0 = 0x5555555555555555;
1295
0
    delta_swap_2(&mut t1, &mut t0, 1, m0);
1296
0
    delta_swap_2(&mut t3, &mut t2, 1, m0);
1297
0
    delta_swap_2(&mut t5, &mut t4, 1, m0);
1298
0
    delta_swap_2(&mut t7, &mut t6, 1, m0);
1299
1300
    // Bit Index Swap 7 <-> 1:
1301
    //     __ p1 __ __ __ __ __ b1 __ => __ b1 __ __ __ __ __ p1 __
1302
0
    let m1 = 0x3333333333333333;
1303
0
    delta_swap_2(&mut t2, &mut t0, 2, m1);
1304
0
    delta_swap_2(&mut t3, &mut t1, 2, m1);
1305
0
    delta_swap_2(&mut t6, &mut t4, 2, m1);
1306
0
    delta_swap_2(&mut t7, &mut t5, 2, m1);
1307
1308
    // Bit Index Swap 8 <-> 2:
1309
    //     p2 __ __ __ __ __ c0 __ __ => c0 __ __ __ __ __ p2 __ __
1310
0
    let m2 = 0x0f0f0f0f0f0f0f0f;
1311
0
    delta_swap_2(&mut t4, &mut t0, 4, m2);
1312
0
    delta_swap_2(&mut t5, &mut t1, 4, m2);
1313
0
    delta_swap_2(&mut t6, &mut t2, 4, m2);
1314
0
    delta_swap_2(&mut t7, &mut t3, 4, m2);
1315
1316
    #[rustfmt::skip]
1317
0
    fn write_reordered(columns: u64, output: &mut [u8]) {
1318
0
        output[0x0] = (columns        ) as u8;
1319
0
        output[0x1] = (columns >> 0x10) as u8;
1320
0
        output[0x2] = (columns >> 0x20) as u8;
1321
0
        output[0x3] = (columns >> 0x30) as u8;
1322
0
        output[0x8] = (columns >> 0x08) as u8;
1323
0
        output[0x9] = (columns >> 0x18) as u8;
1324
0
        output[0xa] = (columns >> 0x28) as u8;
1325
0
        output[0xb] = (columns >> 0x38) as u8;
1326
0
    }
1327
1328
0
    let mut output = BatchBlocks::default();
1329
    // Reorder by relabeling (note the order of output)
1330
    //     c0 b1 b0 __ __ __ __ __ __ => b1 b0 c0 __ __ __ __ __ __
1331
    // Reorder each block's bytes on output
1332
    //     __ __ c0 r1 r0 c1 __ __ __ => __ __ c1 c0 r1 r0 __ __ __
1333
0
    write_reordered(t0, &mut output[0][0x00..0x0c]);
1334
0
    write_reordered(t4, &mut output[0][0x04..0x10]);
1335
0
    write_reordered(t1, &mut output[1][0x00..0x0c]);
1336
0
    write_reordered(t5, &mut output[1][0x04..0x10]);
1337
0
    write_reordered(t2, &mut output[2][0x00..0x0c]);
1338
0
    write_reordered(t6, &mut output[2][0x04..0x10]);
1339
0
    write_reordered(t3, &mut output[3][0x00..0x0c]);
1340
0
    write_reordered(t7, &mut output[3][0x04..0x10]);
1341
1342
    // Final AES bit index, as desired:
1343
    //     b1 b0 c1 c0 r1 r0 p2 p1 p0
1344
0
    output
1345
0
}
1346
1347
/// Copy 32-bytes within the provided slice to an 8-byte offset
1348
0
fn memshift32(buffer: &mut [u64], src_offset: usize) {
1349
0
    debug_assert_eq!(src_offset % 8, 0);
1350
1351
0
    let dst_offset = src_offset + 8;
1352
0
    debug_assert!(dst_offset + 8 <= buffer.len());
1353
1354
0
    for i in (0..8).rev() {
1355
0
        buffer[dst_offset + i] = buffer[src_offset + i];
1356
0
    }
1357
0
}
1358
1359
/// XOR the round key to the internal state. The round keys are expected to be
1360
/// pre-computed and to be packed in the fixsliced representation.
1361
#[inline]
1362
0
fn add_round_key(state: &mut State, rkey: &[u64]) {
1363
0
    debug_assert_eq!(rkey.len(), 8);
1364
0
    for (a, b) in state.iter_mut().zip(rkey) {
1365
0
        *a ^= b;
1366
0
    }
1367
0
}
1368
1369
#[inline(always)]
1370
0
fn add_round_constant_bit(state: &mut [u64], bit: usize) {
1371
0
    state[bit] ^= 0x00000000f0000000;
1372
0
}
1373
1374
#[inline(always)]
1375
0
fn ror(x: u64, y: u32) -> u64 {
1376
0
    x.rotate_right(y)
1377
0
}
1378
1379
#[inline(always)]
1380
0
fn ror_distance(rows: u32, cols: u32) -> u32 {
1381
0
    (rows << 4) + (cols << 2)
1382
0
}
1383
1384
#[inline(always)]
1385
0
fn rotate_rows_1(x: u64) -> u64 {
1386
0
    ror(x, ror_distance(1, 0))
1387
0
}
1388
1389
#[inline(always)]
1390
0
fn rotate_rows_2(x: u64) -> u64 {
1391
0
    ror(x, ror_distance(2, 0))
1392
0
}
1393
1394
#[inline(always)]
1395
#[rustfmt::skip]
1396
0
fn rotate_rows_and_columns_1_1(x: u64) -> u64 {
1397
0
    (ror(x, ror_distance(1, 1)) & 0x0fff0fff0fff0fff) |
1398
0
    (ror(x, ror_distance(0, 1)) & 0xf000f000f000f000)
1399
0
}
1400
1401
#[cfg(not(aes_compact))]
1402
#[inline(always)]
1403
#[rustfmt::skip]
1404
0
fn rotate_rows_and_columns_1_2(x: u64) -> u64 {
1405
0
    (ror(x, ror_distance(1, 2)) & 0x00ff00ff00ff00ff) |
1406
0
    (ror(x, ror_distance(0, 2)) & 0xff00ff00ff00ff00)
1407
0
}
1408
1409
#[cfg(not(aes_compact))]
1410
#[inline(always)]
1411
#[rustfmt::skip]
1412
0
fn rotate_rows_and_columns_1_3(x: u64) -> u64 {
1413
0
    (ror(x, ror_distance(1, 3)) & 0x000f000f000f000f) |
1414
0
    (ror(x, ror_distance(0, 3)) & 0xfff0fff0fff0fff0)
1415
0
}
1416
1417
#[inline(always)]
1418
#[rustfmt::skip]
1419
0
fn rotate_rows_and_columns_2_2(x: u64) -> u64 {
1420
0
    (ror(x, ror_distance(2, 2)) & 0x00ff00ff00ff00ff) |
1421
0
    (ror(x, ror_distance(1, 2)) & 0xff00ff00ff00ff00)
1422
0
}
1423
1424
/// Low-level "hazmat" AES functions.
1425
///
1426
/// Note: this isn't actually used in the `Aes128`/`Aes192`/`Aes256`
1427
/// implementations in this crate, but instead provides raw access to
1428
/// the AES round function gated under the `hazmat` crate feature.
1429
#[cfg(feature = "hazmat")]
1430
pub(crate) mod hazmat {
1431
    use super::{
1432
        bitslice, inv_bitslice, inv_mix_columns_0, inv_shift_rows_1, inv_sub_bytes, mix_columns_0,
1433
        shift_rows_1, sub_bytes, sub_bytes_nots, State,
1434
    };
1435
    use crate::{Block, Block8};
1436
1437
    /// XOR the `src` block into the `dst` block in-place.
1438
    fn xor_in_place(dst: &mut Block, src: &Block) {
1439
        for (a, b) in dst.iter_mut().zip(src.as_slice()) {
1440
            *a ^= *b;
1441
        }
1442
    }
1443
1444
    /// Perform a bitslice operation, loading a single block.
1445
    fn bitslice_block(block: &Block) -> State {
1446
        let mut state = State::default();
1447
        bitslice(&mut state, block, block, block, block);
1448
        state
1449
    }
1450
1451
    /// Perform an inverse bitslice operation, extracting a single block.
1452
    fn inv_bitslice_block(block: &mut Block, state: &State) {
1453
        block.copy_from_slice(&inv_bitslice(state)[0]);
1454
    }
1455
1456
    /// AES cipher (encrypt) round function.
1457
    #[inline]
1458
    pub(crate) fn cipher_round(block: &mut Block, round_key: &Block) {
1459
        let mut state = bitslice_block(block);
1460
        sub_bytes(&mut state);
1461
        sub_bytes_nots(&mut state);
1462
        shift_rows_1(&mut state);
1463
        mix_columns_0(&mut state);
1464
        inv_bitslice_block(block, &state);
1465
        xor_in_place(block, round_key);
1466
    }
1467
1468
    /// AES cipher (encrypt) round function: parallel version.
1469
    #[inline]
1470
    pub(crate) fn cipher_round_par(blocks: &mut Block8, round_keys: &Block8) {
1471
        for (chunk, keys) in blocks.chunks_exact_mut(4).zip(round_keys.chunks_exact(4)) {
1472
            let mut state = State::default();
1473
            bitslice(&mut state, &chunk[0], &chunk[1], &chunk[2], &chunk[3]);
1474
            sub_bytes(&mut state);
1475
            sub_bytes_nots(&mut state);
1476
            shift_rows_1(&mut state);
1477
            mix_columns_0(&mut state);
1478
            let res = inv_bitslice(&state);
1479
1480
            for i in 0..4 {
1481
                chunk[i] = res[i];
1482
                xor_in_place(&mut chunk[i], &keys[i]);
1483
            }
1484
        }
1485
    }
1486
1487
    /// AES cipher (encrypt) round function.
1488
    #[inline]
1489
    pub(crate) fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block) {
1490
        let mut state = State::default();
1491
        bitslice(&mut state, block, block, block, block);
1492
        sub_bytes_nots(&mut state);
1493
        inv_sub_bytes(&mut state);
1494
        inv_shift_rows_1(&mut state);
1495
        inv_mix_columns_0(&mut state);
1496
        inv_bitslice_block(block, &state);
1497
        xor_in_place(block, round_key);
1498
    }
1499
1500
    /// AES cipher (encrypt) round function: parallel version.
1501
    #[inline]
1502
    pub(crate) fn equiv_inv_cipher_round_par(blocks: &mut Block8, round_keys: &Block8) {
1503
        for (chunk, keys) in blocks.chunks_exact_mut(4).zip(round_keys.chunks_exact(4)) {
1504
            let mut state = State::default();
1505
            bitslice(&mut state, &chunk[0], &chunk[1], &chunk[2], &chunk[3]);
1506
            sub_bytes_nots(&mut state);
1507
            inv_sub_bytes(&mut state);
1508
            inv_shift_rows_1(&mut state);
1509
            inv_mix_columns_0(&mut state);
1510
            let res = inv_bitslice(&state);
1511
1512
            for i in 0..4 {
1513
                chunk[i] = res[i];
1514
                xor_in_place(&mut chunk[i], &keys[i]);
1515
            }
1516
        }
1517
    }
1518
1519
    /// AES mix columns function.
1520
    #[inline]
1521
    pub(crate) fn mix_columns(block: &mut Block) {
1522
        let mut state = bitslice_block(block);
1523
        mix_columns_0(&mut state);
1524
        inv_bitslice_block(block, &state);
1525
    }
1526
1527
    /// AES inverse mix columns function.
1528
    #[inline]
1529
    pub(crate) fn inv_mix_columns(block: &mut Block) {
1530
        let mut state = bitslice_block(block);
1531
        inv_mix_columns_0(&mut state);
1532
        inv_bitslice_block(block, &state);
1533
    }
1534
}