/rust/registry/src/index.crates.io-6f17d22bba15001f/sha1-0.10.6/src/compress/x86.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! SHA-1 `x86`/`x86_64` backend |
2 | | |
3 | | #![cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
4 | | |
5 | | #[cfg(target_arch = "x86")] |
6 | | use core::arch::x86::*; |
7 | | #[cfg(target_arch = "x86_64")] |
8 | | use core::arch::x86_64::*; |
9 | | |
10 | | macro_rules! rounds4 { |
11 | | ($h0:ident, $h1:ident, $wk:expr, $i:expr) => { |
12 | | _mm_sha1rnds4_epu32($h0, _mm_sha1nexte_epu32($h1, $wk), $i) |
13 | | }; |
14 | | } |
15 | | |
16 | | macro_rules! schedule { |
17 | | ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => { |
18 | | _mm_sha1msg2_epu32(_mm_xor_si128(_mm_sha1msg1_epu32($v0, $v1), $v2), $v3) |
19 | | }; |
20 | | } |
21 | | |
22 | | macro_rules! schedule_rounds4 { |
23 | | ( |
24 | | $h0:ident, $h1:ident, |
25 | | $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr, |
26 | | $i:expr |
27 | | ) => { |
28 | | $w4 = schedule!($w0, $w1, $w2, $w3); |
29 | | $h1 = rounds4!($h0, $h1, $w4, $i); |
30 | | }; |
31 | | } |
32 | | |
33 | | #[target_feature(enable = "sha,sse2,ssse3,sse4.1")] |
34 | 0 | unsafe fn digest_blocks(state: &mut [u32; 5], blocks: &[[u8; 64]]) { |
35 | 0 | #[allow(non_snake_case)] |
36 | 0 | let MASK: __m128i = _mm_set_epi64x(0x0001_0203_0405_0607, 0x0809_0A0B_0C0D_0E0F); |
37 | 0 |
|
38 | 0 | let mut state_abcd = _mm_set_epi32( |
39 | 0 | state[0] as i32, |
40 | 0 | state[1] as i32, |
41 | 0 | state[2] as i32, |
42 | 0 | state[3] as i32, |
43 | 0 | ); |
44 | 0 | let mut state_e = _mm_set_epi32(state[4] as i32, 0, 0, 0); |
45 | | |
46 | 0 | for block in blocks { |
47 | 0 | // SAFETY: we use only unaligned loads with this pointer |
48 | 0 | #[allow(clippy::cast_ptr_alignment)] |
49 | 0 | let block_ptr = block.as_ptr() as *const __m128i; |
50 | 0 |
|
51 | 0 | let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(0)), MASK); |
52 | 0 | let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(1)), MASK); |
53 | 0 | let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(2)), MASK); |
54 | 0 | let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(3)), MASK); |
55 | 0 | #[allow(clippy::needless_late_init)] |
56 | 0 | let mut w4; |
57 | 0 |
|
58 | 0 | let mut h0 = state_abcd; |
59 | 0 | let mut h1 = _mm_add_epi32(state_e, w0); |
60 | 0 |
|
61 | 0 | // Rounds 0..20 |
62 | 0 | h1 = _mm_sha1rnds4_epu32(h0, h1, 0); |
63 | 0 | h0 = rounds4!(h1, h0, w1, 0); |
64 | 0 | h1 = rounds4!(h0, h1, w2, 0); |
65 | 0 | h0 = rounds4!(h1, h0, w3, 0); |
66 | 0 | schedule_rounds4!(h0, h1, w0, w1, w2, w3, w4, 0); |
67 | 0 |
|
68 | 0 | // Rounds 20..40 |
69 | 0 | schedule_rounds4!(h1, h0, w1, w2, w3, w4, w0, 1); |
70 | 0 | schedule_rounds4!(h0, h1, w2, w3, w4, w0, w1, 1); |
71 | 0 | schedule_rounds4!(h1, h0, w3, w4, w0, w1, w2, 1); |
72 | 0 | schedule_rounds4!(h0, h1, w4, w0, w1, w2, w3, 1); |
73 | 0 | schedule_rounds4!(h1, h0, w0, w1, w2, w3, w4, 1); |
74 | 0 |
|
75 | 0 | // Rounds 40..60 |
76 | 0 | schedule_rounds4!(h0, h1, w1, w2, w3, w4, w0, 2); |
77 | 0 | schedule_rounds4!(h1, h0, w2, w3, w4, w0, w1, 2); |
78 | 0 | schedule_rounds4!(h0, h1, w3, w4, w0, w1, w2, 2); |
79 | 0 | schedule_rounds4!(h1, h0, w4, w0, w1, w2, w3, 2); |
80 | 0 | schedule_rounds4!(h0, h1, w0, w1, w2, w3, w4, 2); |
81 | 0 |
|
82 | 0 | // Rounds 60..80 |
83 | 0 | schedule_rounds4!(h1, h0, w1, w2, w3, w4, w0, 3); |
84 | 0 | schedule_rounds4!(h0, h1, w2, w3, w4, w0, w1, 3); |
85 | 0 | schedule_rounds4!(h1, h0, w3, w4, w0, w1, w2, 3); |
86 | 0 | schedule_rounds4!(h0, h1, w4, w0, w1, w2, w3, 3); |
87 | 0 | schedule_rounds4!(h1, h0, w0, w1, w2, w3, w4, 3); |
88 | 0 |
|
89 | 0 | state_abcd = _mm_add_epi32(state_abcd, h0); |
90 | 0 | state_e = _mm_sha1nexte_epu32(h1, state_e); |
91 | 0 | } |
92 | | |
93 | 0 | state[0] = _mm_extract_epi32(state_abcd, 3) as u32; |
94 | 0 | state[1] = _mm_extract_epi32(state_abcd, 2) as u32; |
95 | 0 | state[2] = _mm_extract_epi32(state_abcd, 1) as u32; |
96 | 0 | state[3] = _mm_extract_epi32(state_abcd, 0) as u32; |
97 | 0 | state[4] = _mm_extract_epi32(state_e, 3) as u32; |
98 | 0 | } |
99 | | |
100 | | cpufeatures::new!(shani_cpuid, "sha", "sse2", "ssse3", "sse4.1"); |
101 | | |
102 | 0 | pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) { |
103 | 0 | // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725 |
104 | 0 | // after stabilization |
105 | 0 | if shani_cpuid::get() { |
106 | 0 | unsafe { |
107 | 0 | digest_blocks(state, blocks); |
108 | 0 | } |
109 | 0 | } else { |
110 | 0 | super::soft::compress(state, blocks); |
111 | 0 | } |
112 | 0 | } |