Coverage Report

Created: 2025-10-31 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/sha2-0.10.9/src/sha512/x86.rs
Line
Count
Source
1
//! SHA-512 `x86`/`x86_64` backend
2
3
#![allow(clippy::many_single_char_names)]
4
5
use core::mem::size_of;
6
7
#[cfg(target_arch = "x86")]
8
use core::arch::x86::*;
9
#[cfg(target_arch = "x86_64")]
10
use core::arch::x86_64::*;
11
12
use crate::consts::K64;
13
14
cpufeatures::new!(avx2_cpuid, "avx2");
15
16
0
pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
17
    // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
18
    // after stabilization
19
0
    if avx2_cpuid::get() {
20
0
        unsafe {
21
0
            sha512_compress_x86_64_avx2(state, blocks);
22
0
        }
23
0
    } else {
24
0
        super::soft::compress(state, blocks);
25
0
    }
26
0
}
27
28
#[target_feature(enable = "avx2")]
29
0
unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
30
0
    let mut start_block = 0;
31
32
0
    if blocks.len() & 0b1 != 0 {
33
0
        sha512_compress_x86_64_avx(state, &blocks[0]);
34
0
        start_block += 1;
35
0
    }
36
37
0
    let mut ms: MsgSchedule = [_mm_setzero_si128(); 8];
38
0
    let mut t2: RoundStates = [_mm_setzero_si128(); 40];
39
0
    let mut x = [_mm256_setzero_si256(); 8];
40
41
0
    for i in (start_block..blocks.len()).step_by(2) {
42
0
        load_data_avx2(&mut x, &mut ms, &mut t2, blocks.as_ptr().add(i) as *const _);
43
0
44
0
        // First block
45
0
        let mut current_state = *state;
46
0
        rounds_0_63_avx2(&mut current_state, &mut x, &mut ms, &mut t2);
47
0
        rounds_64_79(&mut current_state, &ms);
48
0
        accumulate_state(state, &current_state);
49
0
50
0
        // Second block
51
0
        current_state = *state;
52
0
        process_second_block(&mut current_state, &t2);
53
0
        accumulate_state(state, &current_state);
54
0
    }
55
0
}
56
57
#[inline(always)]
58
0
unsafe fn sha512_compress_x86_64_avx(state: &mut [u64; 8], block: &[u8; 128]) {
59
0
    let mut ms = [_mm_setzero_si128(); 8];
60
0
    let mut x = [_mm_setzero_si128(); 8];
61
62
    // Reduced to single iteration
63
0
    let mut current_state = *state;
64
0
    load_data_avx(&mut x, &mut ms, block.as_ptr() as *const _);
65
0
    rounds_0_63_avx(&mut current_state, &mut x, &mut ms);
66
0
    rounds_64_79(&mut current_state, &ms);
67
0
    accumulate_state(state, &current_state);
68
0
}
69
70
#[inline(always)]
71
0
unsafe fn load_data_avx(x: &mut [__m128i; 8], ms: &mut MsgSchedule, data: *const __m128i) {
72
    #[allow(non_snake_case)]
73
0
    let MASK = _mm_setr_epi32(0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b);
74
75
    macro_rules! unrolled_iterations {
76
        ($($i:literal),*) => {$(
77
            x[$i] = _mm_loadu_si128(data.add($i) as *const _);
78
            x[$i] = _mm_shuffle_epi8(x[$i], MASK);
79
80
            let y = _mm_add_epi64(
81
                x[$i],
82
                _mm_loadu_si128(&K64[2 * $i] as *const u64 as *const _),
83
            );
84
85
            ms[$i] = y;
86
        )*};
87
    }
88
89
0
    unrolled_iterations!(0, 1, 2, 3, 4, 5, 6, 7);
90
0
}
91
92
#[inline(always)]
93
0
unsafe fn load_data_avx2(
94
0
    x: &mut [__m256i; 8],
95
0
    ms: &mut MsgSchedule,
96
0
    t2: &mut RoundStates,
97
0
    data: *const __m128i,
98
0
) {
99
    #[allow(non_snake_case)]
100
0
    let MASK = _mm256_set_epi64x(
101
        0x0809_0A0B_0C0D_0E0F_i64,
102
        0x0001_0203_0405_0607_i64,
103
        0x0809_0A0B_0C0D_0E0F_i64,
104
        0x0001_0203_0405_0607_i64,
105
    );
106
107
    macro_rules! unrolled_iterations {
108
        ($($i:literal),*) => {$(
109
            x[$i] = _mm256_insertf128_si256(x[$i], _mm_loadu_si128(data.add(8 + $i) as *const _), 1);
110
            x[$i] = _mm256_insertf128_si256(x[$i], _mm_loadu_si128(data.add($i) as *const _), 0);
111
112
            x[$i] = _mm256_shuffle_epi8(x[$i], MASK);
113
114
            let t = _mm_loadu_si128(K64.as_ptr().add($i * 2) as *const u64 as *const _);
115
            let y = _mm256_add_epi64(x[$i], _mm256_set_m128i(t, t));
116
117
            ms[$i] = _mm256_extracti128_si256(y, 0);
118
            t2[$i] = _mm256_extracti128_si256(y, 1);
119
        )*};
120
    }
121
122
0
    unrolled_iterations!(0, 1, 2, 3, 4, 5, 6, 7);
123
0
}
124
125
#[inline(always)]
126
0
unsafe fn rounds_0_63_avx(current_state: &mut State, x: &mut [__m128i; 8], ms: &mut MsgSchedule) {
127
0
    let mut k64_idx: usize = SHA512_BLOCK_WORDS_NUM;
128
129
0
    for _ in 0..4 {
130
0
        for j in 0..8 {
131
0
            let k64 = _mm_loadu_si128(&K64[k64_idx] as *const u64 as *const _);
132
0
            let y = sha512_update_x_avx(x, k64);
133
0
134
0
            {
135
0
                let ms = cast_ms(ms);
136
0
                sha_round(current_state, ms[2 * j]);
137
0
                sha_round(current_state, ms[2 * j + 1]);
138
0
            }
139
0
140
0
            ms[j] = y;
141
0
            k64_idx += 2;
142
0
        }
143
    }
144
0
}
145
146
#[inline(always)]
147
0
unsafe fn rounds_0_63_avx2(
148
0
    current_state: &mut State,
149
0
    x: &mut [__m256i; 8],
150
0
    ms: &mut MsgSchedule,
151
0
    t2: &mut RoundStates,
152
0
) {
153
0
    let mut k64x4_idx: usize = SHA512_BLOCK_WORDS_NUM;
154
155
0
    for i in 1..5 {
156
0
        for j in 0..8 {
157
0
            let t = _mm_loadu_si128(K64.as_ptr().add(k64x4_idx) as *const u64 as *const _);
158
0
            let y = sha512_update_x_avx2(x, _mm256_set_m128i(t, t));
159
0
160
0
            {
161
0
                let ms = cast_ms(ms);
162
0
                sha_round(current_state, ms[2 * j]);
163
0
                sha_round(current_state, ms[2 * j + 1]);
164
0
            }
165
0
166
0
            ms[j] = _mm256_extracti128_si256(y, 0);
167
0
            t2[8 * i + j] = _mm256_extracti128_si256(y, 1);
168
0
169
0
            k64x4_idx += 2;
170
0
        }
171
    }
172
0
}
173
174
#[inline(always)]
175
0
fn rounds_64_79(current_state: &mut State, ms: &MsgSchedule) {
176
0
    let ms = cast_ms(ms);
177
0
    for i in 64..80 {
178
0
        sha_round(current_state, ms[i & 0xf]);
179
0
    }
180
0
}
181
182
#[inline(always)]
183
0
fn process_second_block(current_state: &mut State, t2: &RoundStates) {
184
0
    for t2 in cast_rs(t2).iter() {
185
0
        sha_round(current_state, *t2);
186
0
    }
187
0
}
188
189
#[inline(always)]
190
0
fn sha_round(s: &mut State, x: u64) {
191
    macro_rules! big_sigma0 {
192
        ($a:expr) => {
193
            $a.rotate_right(28) ^ $a.rotate_right(34) ^ $a.rotate_right(39)
194
        };
195
    }
196
    macro_rules! big_sigma1 {
197
        ($a:expr) => {
198
            $a.rotate_right(14) ^ $a.rotate_right(18) ^ $a.rotate_right(41)
199
        };
200
    }
201
    macro_rules! bool3ary_202 {
202
        ($a:expr, $b:expr, $c:expr) => {
203
            $c ^ ($a & ($b ^ $c))
204
        };
205
    } // Choose, MD5F, SHA1C
206
    macro_rules! bool3ary_232 {
207
        ($a:expr, $b:expr, $c:expr) => {
208
            ($a & $b) ^ ($a & $c) ^ ($b & $c)
209
        };
210
    } // Majority, SHA1M
211
212
    macro_rules! rotate_state {
213
        ($s:ident) => {{
214
            let tmp = $s[7];
215
            $s[7] = $s[6];
216
            $s[6] = $s[5];
217
            $s[5] = $s[4];
218
            $s[4] = $s[3];
219
            $s[3] = $s[2];
220
            $s[2] = $s[1];
221
            $s[1] = $s[0];
222
            $s[0] = tmp;
223
        }};
224
    }
225
226
0
    let t = x
227
0
        .wrapping_add(s[7])
228
0
        .wrapping_add(big_sigma1!(s[4]))
229
0
        .wrapping_add(bool3ary_202!(s[4], s[5], s[6]));
230
231
0
    s[7] = t
232
0
        .wrapping_add(big_sigma0!(s[0]))
233
0
        .wrapping_add(bool3ary_232!(s[0], s[1], s[2]));
234
0
    s[3] = s[3].wrapping_add(t);
235
236
0
    rotate_state!(s);
237
0
}
238
239
#[inline(always)]
240
0
fn accumulate_state(dst: &mut State, src: &State) {
241
0
    for i in 0..SHA512_HASH_WORDS_NUM {
242
0
        dst[i] = dst[i].wrapping_add(src[i]);
243
0
    }
244
0
}
245
246
macro_rules! fn_sha512_update_x {
247
    ($name:ident, $ty:ident, {
248
        ADD64 = $ADD64:ident,
249
        ALIGNR8 = $ALIGNR8:ident,
250
        SRL64 = $SRL64:ident,
251
        SLL64 = $SLL64:ident,
252
        XOR = $XOR:ident,
253
    }) => {
254
0
        unsafe fn $name(x: &mut [$ty; 8], k64: $ty) -> $ty {
255
            // q[2:1]
256
0
            let mut t0 = $ALIGNR8(x[1], x[0], 8);
257
            // q[10:9]
258
0
            let mut t3 = $ALIGNR8(x[5], x[4], 8);
259
            // q[2:1] >> s0[0]
260
0
            let mut t2 = $SRL64(t0, 1);
261
            // q[1:0] + q[10:9]
262
0
            x[0] = $ADD64(x[0], t3);
263
            // q[2:1] >> s0[2]
264
0
            t3 = $SRL64(t0, 7);
265
            // q[2:1] << (64 - s0[1])
266
0
            let mut t1 = $SLL64(t0, 64 - 8);
267
            // (q[2:1] >> s0[2]) ^
268
            // (q[2:1] >> s0[0])
269
0
            t0 = $XOR(t3, t2);
270
            // q[2:1] >> s0[1]
271
0
            t2 = $SRL64(t2, 8 - 1);
272
            // (q[2:1] >> s0[2]) ^
273
            // (q[2:1] >> s0[0]) ^
274
            // q[2:1] << (64 - s0[1])
275
0
            t0 = $XOR(t0, t1);
276
            // q[2:1] << (64 - s0[0])
277
0
            t1 = $SLL64(t1, 8 - 1);
278
            // sigma1(q[2:1])
279
0
            t0 = $XOR(t0, t2);
280
0
            t0 = $XOR(t0, t1);
281
            // q[15:14] >> s1[2]
282
0
            t3 = $SRL64(x[7], 6);
283
            // q[15:14] >> (64 - s1[1])
284
0
            t2 = $SLL64(x[7], 64 - 61);
285
            // q[1:0] + sigma0(q[2:1])
286
0
            x[0] = $ADD64(x[0], t0);
287
            // q[15:14] >> s1[0]
288
0
            t1 = $SRL64(x[7], 19);
289
            // q[15:14] >> s1[2] ^
290
            // q[15:14] >> (64 - s1[1])
291
0
            t3 = $XOR(t3, t2);
292
            // q[15:14] >> (64 - s1[0])
293
0
            t2 = $SLL64(t2, 61 - 19);
294
            // q[15:14] >> s1[2] ^
295
            // q[15:14] >> (64 - s1[1] ^
296
            // q[15:14] >> s1[0]
297
0
            t3 = $XOR(t3, t1);
298
            // q[15:14] >> s1[1]
299
0
            t1 = $SRL64(t1, 61 - 19);
300
            // sigma1(q[15:14])
301
0
            t3 = $XOR(t3, t2);
302
0
            t3 = $XOR(t3, t1);
303
304
            // q[1:0] + q[10:9] + sigma1(q[15:14]) + sigma0(q[2:1])
305
0
            x[0] = $ADD64(x[0], t3);
306
307
            // rotate
308
0
            let temp = x[0];
309
0
            x[0] = x[1];
310
0
            x[1] = x[2];
311
0
            x[2] = x[3];
312
0
            x[3] = x[4];
313
0
            x[4] = x[5];
314
0
            x[5] = x[6];
315
0
            x[6] = x[7];
316
0
            x[7] = temp;
317
318
0
            $ADD64(x[7], k64)
319
0
        }
Unexecuted instantiation: sha2::sha512::x86::sha512_update_x_avx
Unexecuted instantiation: sha2::sha512::x86::sha512_update_x_avx2
320
    };
321
}
322
323
fn_sha512_update_x!(sha512_update_x_avx, __m128i, {
324
        ADD64 = _mm_add_epi64,
325
        ALIGNR8 = _mm_alignr_epi8,
326
        SRL64 = _mm_srli_epi64,
327
        SLL64 = _mm_slli_epi64,
328
        XOR = _mm_xor_si128,
329
});
330
331
fn_sha512_update_x!(sha512_update_x_avx2, __m256i, {
332
        ADD64 = _mm256_add_epi64,
333
        ALIGNR8 = _mm256_alignr_epi8,
334
        SRL64 = _mm256_srli_epi64,
335
        SLL64 = _mm256_slli_epi64,
336
        XOR = _mm256_xor_si256,
337
});
338
339
#[inline(always)]
340
0
fn cast_ms(ms: &MsgSchedule) -> &[u64; SHA512_BLOCK_WORDS_NUM] {
341
0
    unsafe { &*(ms as *const MsgSchedule as *const _) }
342
0
}
343
344
#[inline(always)]
345
0
fn cast_rs(rs: &RoundStates) -> &[u64; SHA512_ROUNDS_NUM] {
346
0
    unsafe { &*(rs as *const RoundStates as *const _) }
347
0
}
348
349
type State = [u64; SHA512_HASH_WORDS_NUM];
350
type MsgSchedule = [__m128i; SHA512_BLOCK_WORDS_NUM / 2];
351
type RoundStates = [__m128i; SHA512_ROUNDS_NUM / 2];
352
353
const SHA512_BLOCK_BYTE_LEN: usize = 128;
354
const SHA512_ROUNDS_NUM: usize = 80;
355
const SHA512_HASH_BYTE_LEN: usize = 64;
356
const SHA512_HASH_WORDS_NUM: usize = SHA512_HASH_BYTE_LEN / size_of::<u64>();
357
const SHA512_BLOCK_WORDS_NUM: usize = SHA512_BLOCK_BYTE_LEN / size_of::<u64>();