Coverage Report

Created: 2025-12-31 06:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/polyval-0.6.2/src/backend/clmul.rs
Line
Count
Source
1
//! Intel `CLMUL`-accelerated implementation for modern x86/x86_64 CPUs
2
//! (i.e. Intel Sandy Bridge-compatible or newer)
3
4
#[cfg(target_arch = "x86")]
5
use core::arch::x86::*;
6
#[cfg(target_arch = "x86_64")]
7
use core::arch::x86_64::*;
8
9
use universal_hash::{
10
    consts::{U1, U16},
11
    crypto_common::{BlockSizeUser, KeySizeUser, ParBlocksSizeUser},
12
    KeyInit, Reset, UhfBackend,
13
};
14
15
use crate::{Block, Key, Tag};
16
17
/// **POLYVAL**: GHASH-like universal hash over GF(2^128).
18
#[derive(Clone)]
19
pub struct Polyval {
20
    h: __m128i,
21
    y: __m128i,
22
}
23
24
impl KeySizeUser for Polyval {
25
    type KeySize = U16;
26
}
27
28
impl Polyval {
29
    /// Initialize POLYVAL with the given `H` field element and initial block
30
1.55k
    pub fn new_with_init_block(h: &Key, init_block: u128) -> Self {
31
        unsafe {
32
            // `_mm_loadu_si128` performs an unaligned load
33
            #[allow(clippy::cast_ptr_alignment)]
34
1.55k
            Self {
35
1.55k
                h: _mm_loadu_si128(h.as_ptr() as *const __m128i),
36
1.55k
                y: _mm_loadu_si128(&init_block.to_be_bytes()[..] as *const _ as *const __m128i),
37
1.55k
            }
38
        }
39
1.55k
    }
40
}
41
42
impl KeyInit for Polyval {
43
    /// Initialize POLYVAL with the given `H` field element
44
0
    fn new(h: &Key) -> Self {
45
0
        Self::new_with_init_block(h, 0)
46
0
    }
47
}
48
49
impl BlockSizeUser for Polyval {
50
    type BlockSize = U16;
51
}
52
53
impl ParBlocksSizeUser for Polyval {
54
    type ParBlocksSize = U1;
55
}
56
57
impl UhfBackend for Polyval {
58
90.2k
    fn proc_block(&mut self, x: &Block) {
59
90.2k
        unsafe {
60
90.2k
            self.mul(x);
61
90.2k
        }
62
90.2k
    }
63
}
64
65
impl Polyval {
66
    /// Get GHASH output
67
1.23k
    pub(crate) fn finalize(self) -> Tag {
68
1.23k
        unsafe { core::mem::transmute(self.y) }
69
1.23k
    }
70
}
71
72
impl Polyval {
73
    #[inline]
74
    #[target_feature(enable = "pclmulqdq")]
75
90.2k
    unsafe fn mul(&mut self, x: &Block) {
76
90.2k
        let h = self.h;
77
78
        // `_mm_loadu_si128` performs an unaligned load
79
        #[allow(clippy::cast_ptr_alignment)]
80
90.2k
        let x = _mm_loadu_si128(x.as_ptr() as *const __m128i);
81
90.2k
        let y = _mm_xor_si128(self.y, x);
82
83
90.2k
        let h0 = h;
84
90.2k
        let h1 = _mm_shuffle_epi32(h, 0x0E);
85
90.2k
        let h2 = _mm_xor_si128(h0, h1);
86
90.2k
        let y0 = y;
87
88
        // Multiply values partitioned to 64-bit parts
89
90.2k
        let y1 = _mm_shuffle_epi32(y, 0x0E);
90
90.2k
        let y2 = _mm_xor_si128(y0, y1);
91
90.2k
        let t0 = _mm_clmulepi64_si128(y0, h0, 0x00);
92
90.2k
        let t1 = _mm_clmulepi64_si128(y, h, 0x11);
93
90.2k
        let t2 = _mm_clmulepi64_si128(y2, h2, 0x00);
94
90.2k
        let t2 = _mm_xor_si128(t2, _mm_xor_si128(t0, t1));
95
90.2k
        let v0 = t0;
96
90.2k
        let v1 = _mm_xor_si128(_mm_shuffle_epi32(t0, 0x0E), t2);
97
90.2k
        let v2 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
98
90.2k
        let v3 = _mm_shuffle_epi32(t1, 0x0E);
99
100
        // Polynomial reduction
101
90.2k
        let v2 = xor5(
102
90.2k
            v2,
103
90.2k
            v0,
104
90.2k
            _mm_srli_epi64(v0, 1),
105
90.2k
            _mm_srli_epi64(v0, 2),
106
90.2k
            _mm_srli_epi64(v0, 7),
107
        );
108
109
90.2k
        let v1 = xor4(
110
90.2k
            v1,
111
90.2k
            _mm_slli_epi64(v0, 63),
112
90.2k
            _mm_slli_epi64(v0, 62),
113
90.2k
            _mm_slli_epi64(v0, 57),
114
        );
115
116
90.2k
        let v3 = xor5(
117
90.2k
            v3,
118
90.2k
            v1,
119
90.2k
            _mm_srli_epi64(v1, 1),
120
90.2k
            _mm_srli_epi64(v1, 2),
121
90.2k
            _mm_srli_epi64(v1, 7),
122
        );
123
124
90.2k
        let v2 = xor4(
125
90.2k
            v2,
126
90.2k
            _mm_slli_epi64(v1, 63),
127
90.2k
            _mm_slli_epi64(v1, 62),
128
90.2k
            _mm_slli_epi64(v1, 57),
129
        );
130
131
90.2k
        self.y = _mm_unpacklo_epi64(v2, v3);
132
90.2k
    }
133
}
134
135
impl Reset for Polyval {
136
0
    fn reset(&mut self) {
137
0
        unsafe {
138
0
            self.y = _mm_setzero_si128();
139
0
        }
140
0
    }
141
}
142
143
#[cfg(feature = "zeroize")]
144
impl Drop for Polyval {
145
    fn drop(&mut self) {
146
        use zeroize::Zeroize;
147
        self.h.zeroize();
148
        self.y.zeroize();
149
    }
150
}
151
152
#[inline(always)]
153
180k
unsafe fn xor4(e1: __m128i, e2: __m128i, e3: __m128i, e4: __m128i) -> __m128i {
154
180k
    _mm_xor_si128(_mm_xor_si128(e1, e2), _mm_xor_si128(e3, e4))
155
180k
}
156
157
#[inline(always)]
158
180k
unsafe fn xor5(e1: __m128i, e2: __m128i, e3: __m128i, e4: __m128i, e5: __m128i) -> __m128i {
159
180k
    _mm_xor_si128(
160
180k
        e1,
161
180k
        _mm_xor_si128(_mm_xor_si128(e2, e3), _mm_xor_si128(e4, e5)),
162
    )
163
180k
}