Coverage Report

Created: 2021-03-22 08:29

/rust/registry/src/github.com-1ecc6299db9ec823/sha2-0.9.3/src/sha256/x86.rs
Line
Count
Source (jump to first uncovered line)
1
#![allow(clippy::many_single_char_names)]
2
3
#[cfg(target_arch = "x86_64")]
4
use core::arch::x86_64::*;
5
#[cfg(target_arch = "x86")]
6
use core::arch::x86::*;
7
8
unsafe fn schedule(v0: __m128i, v1: __m128i, v2: __m128i, v3: __m128i) -> __m128i {
9
    let t1 = _mm_sha256msg1_epu32(v0, v1);
10
    let t2 = _mm_alignr_epi8(v3, v2, 4);
11
    let t3 = _mm_add_epi32(t1, t2);
12
    _mm_sha256msg2_epu32(t3, v3)
13
}
14
15
macro_rules! rounds4 {
16
    ($abef:ident, $cdgh:ident, $rest:expr, $i:expr) => {{
17
        let k = crate::consts::K32X4[$i];
18
        let kv = _mm_set_epi32(k[0] as i32, k[1] as i32, k[2] as i32, k[3] as i32);
19
        let t1 = _mm_add_epi32($rest, kv);
20
        $cdgh = _mm_sha256rnds2_epu32($cdgh, $abef, t1);
21
        let t2 = _mm_shuffle_epi32(t1, 0x0E);
22
        $abef = _mm_sha256rnds2_epu32($abef, $cdgh, t2);
23
    }};
24
}
25
26
macro_rules! schedule_rounds4 {
27
    (
28
        $abef:ident, $cdgh:ident,
29
        $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
30
        $i: expr
31
    ) => {{
32
        $w4 = schedule($w0, $w1, $w2, $w3);
33
        rounds4!($abef, $cdgh, $w4, $i);
34
    }};
35
}
36
37
// we use unaligned loads with `__m128i` pointers
38
#[allow(clippy::cast_ptr_alignment)]
39
#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
40
0
unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
41
    #[allow(non_snake_case)]
42
0
    let MASK: __m128i = _mm_set_epi64x(
43
0
        0x0C0D_0E0F_0809_0A0Bu64 as i64,
44
0
        0x0405_0607_0001_0203u64 as i64,
45
0
    );
46
0
47
0
    let state_ptr = state.as_ptr() as *const __m128i;
48
0
    let dcba = _mm_loadu_si128(state_ptr.add(0));
49
0
    let efgh = _mm_loadu_si128(state_ptr.add(1));
50
0
51
0
    let cdab = _mm_shuffle_epi32(dcba, 0xB1);
52
0
    let efgh = _mm_shuffle_epi32(efgh, 0x1B);
53
0
    let mut abef = _mm_alignr_epi8(cdab, efgh, 8);
54
0
    let mut cdgh = _mm_blend_epi16(efgh, cdab, 0xF0);
55
56
0
    for block in blocks {
57
0
        let abef_save = abef;
58
0
        let cdgh_save = cdgh;
59
0
60
0
        let data_ptr = block.as_ptr() as *const __m128i;
61
0
        let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(0)), MASK);
62
0
        let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(1)), MASK);
63
0
        let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(2)), MASK);
64
0
        let mut w3 = _mm_shuffle_epi8( _mm_loadu_si128(data_ptr.add(3)), MASK);
65
0
        let mut w4;
66
0
67
0
        rounds4!(abef, cdgh, w0, 0);
68
0
        rounds4!(abef, cdgh, w1, 1);
69
0
        rounds4!(abef, cdgh, w2, 2);
70
0
        rounds4!(abef, cdgh, w3, 3);
71
0
        schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 4);
72
0
        schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 5);
73
0
        schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 6);
74
0
        schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 7);
75
0
        schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 8);
76
0
        schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 9);
77
0
        schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 10);
78
0
        schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 11);
79
0
        schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 12);
80
0
        schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 13);
81
0
        schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 14);
82
0
        schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 15);
83
0
84
0
        abef = _mm_add_epi32(abef, abef_save);
85
0
        cdgh = _mm_add_epi32(cdgh, cdgh_save);
86
0
    }
87
88
0
    let feba = _mm_shuffle_epi32(abef, 0x1B);
89
0
    let dchg = _mm_shuffle_epi32(cdgh, 0xB1);
90
0
    let dcba = _mm_blend_epi16(feba, dchg, 0xF0);
91
0
    let hgef = _mm_alignr_epi8(dchg, feba, 8);
92
0
93
0
    let state_ptr_mut = state.as_mut_ptr() as *mut __m128i;
94
0
    _mm_storeu_si128(state_ptr_mut.add(0), dcba);
95
0
    _mm_storeu_si128(state_ptr_mut.add(1), hgef);
96
0
}
97
98
726k
pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
99
726k
    // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
100
726k
    // after stabilization
101
726k
    if cpuid_bool::cpuid_bool!("sha", "sse2", "ssse3", "sse4.1") {
102
0
        unsafe {
103
0
            digest_blocks(state, blocks);
104
0
        }
105
726k
    } else {
106
726k
        super::soft::compress(state, blocks);
107
726k
    }
108
726k
}