Coverage Report

Created: 2025-12-31 06:19

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/simd-adler32-0.3.8/src/imp/avx2.rs
Line
Count
Source
1
use super::Adler32Imp;
2
3
/// Resolves update implementation if CPU supports avx2 instructions.
4
222M
pub fn get_imp() -> Option<Adler32Imp> {
5
222M
  get_imp_inner()
6
222M
}
7
8
#[inline]
9
#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
10
222M
fn get_imp_inner() -> Option<Adler32Imp> {
11
222M
  if std::is_x86_feature_detected!("avx2") {
12
222M
    Some(imp::update)
13
  } else {
14
0
    None
15
  }
16
222M
}
17
18
#[inline]
19
#[cfg(all(
20
  target_feature = "avx2",
21
  not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
22
))]
23
fn get_imp_inner() -> Option<Adler32Imp> {
24
  Some(imp::update)
25
}
26
27
#[inline]
28
#[cfg(all(
29
  not(target_feature = "avx2"),
30
  not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
31
))]
32
fn get_imp_inner() -> Option<Adler32Imp> {
33
  None
34
}
35
36
#[cfg(all(
37
  any(target_arch = "x86", target_arch = "x86_64"),
38
  any(feature = "std", target_feature = "avx2")
39
))]
40
mod imp {
41
  const MOD: u32 = 65521;
42
  const NMAX: usize = 5552;
43
  const BLOCK_SIZE: usize = 32;
44
  const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
45
46
  #[cfg(target_arch = "x86")]
47
  use core::arch::x86::*;
48
  #[cfg(target_arch = "x86_64")]
49
  use core::arch::x86_64::*;
50
51
241M
  pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
52
241M
    unsafe { update_imp(a, b, data) }
53
241M
  }
54
55
  #[inline]
56
  #[target_feature(enable = "avx2")]
57
241M
  unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
58
241M
    let mut a = a as u32;
59
241M
    let mut b = b as u32;
60
61
241M
    let chunks = data.chunks_exact(CHUNK_SIZE);
62
241M
    let remainder = chunks.remainder();
63
241M
    for chunk in chunks {
64
5.55k
      update_chunk_block(&mut a, &mut b, chunk);
65
5.55k
    }
66
67
241M
    update_block(&mut a, &mut b, remainder);
68
69
241M
    (a as u16, b as u16)
70
241M
  }
71
72
  #[inline]
73
5.55k
  unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
74
5.55k
    debug_assert_eq!(
75
0
      chunk.len(),
76
      CHUNK_SIZE,
77
0
      "Unexpected chunk size (expected {}, got {})",
78
      CHUNK_SIZE,
79
0
      chunk.len()
80
    );
81
82
5.55k
    reduce_add_blocks(a, b, chunk);
83
84
5.55k
    *a %= MOD;
85
5.55k
    *b %= MOD;
86
5.55k
  }
87
88
  #[inline]
89
241M
  unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
90
241M
    debug_assert!(
91
0
      chunk.len() <= CHUNK_SIZE,
92
0
      "Unexpected chunk size (expected <= {}, got {})",
93
      CHUNK_SIZE,
94
0
      chunk.len()
95
    );
96
97
299M
    for byte in reduce_add_blocks(a, b, chunk) {
98
299M
      *a += *byte as u32;
99
299M
      *b += *a;
100
299M
    }
101
102
241M
    *a %= MOD;
103
241M
    *b %= MOD;
104
241M
  }
105
106
  #[inline(always)]
107
241M
  unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
108
241M
    if chunk.len() < BLOCK_SIZE {
109
240M
      return chunk;
110
395k
    }
111
112
395k
    let blocks = chunk.chunks_exact(BLOCK_SIZE);
113
395k
    let blocks_remainder = blocks.remainder();
114
115
395k
    let one_v = _mm256_set1_epi16(1);
116
395k
    let zero_v = _mm256_setzero_si256();
117
395k
    let weights = get_weights();
118
119
395k
    let mut p_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (*a * blocks.len() as u32) as _);
120
395k
    let mut a_v = _mm256_setzero_si256();
121
395k
    let mut b_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, *b as _);
122
123
3.26M
    for block in blocks {
124
2.86M
      let block_ptr = block.as_ptr() as *const _;
125
2.86M
      let block = _mm256_loadu_si256(block_ptr);
126
2.86M
127
2.86M
      p_v = _mm256_add_epi32(p_v, a_v);
128
2.86M
129
2.86M
      a_v = _mm256_add_epi32(a_v, _mm256_sad_epu8(block, zero_v));
130
2.86M
      let mad = _mm256_maddubs_epi16(block, weights);
131
2.86M
      b_v = _mm256_add_epi32(b_v, _mm256_madd_epi16(mad, one_v));
132
2.86M
    }
133
134
395k
    b_v = _mm256_add_epi32(b_v, _mm256_slli_epi32(p_v, 5));
135
136
395k
    *a += reduce_add(a_v);
137
395k
    *b = reduce_add(b_v);
138
139
395k
    blocks_remainder
140
241M
  }
141
142
  #[inline(always)]
143
790k
  unsafe fn reduce_add(v: __m256i) -> u32 {
144
790k
    let sum = _mm_add_epi32(_mm256_castsi256_si128(v), _mm256_extracti128_si256(v, 1));
145
790k
    let hi = _mm_unpackhi_epi64(sum, sum);
146
147
790k
    let sum = _mm_add_epi32(hi, sum);
148
790k
    let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1));
149
150
790k
    let sum = _mm_add_epi32(sum, hi);
151
152
790k
    _mm_cvtsi128_si32(sum) as _
153
790k
  }
154
155
  #[inline(always)]
156
395k
  unsafe fn get_weights() -> __m256i {
157
395k
    _mm256_set_epi8(
158
      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
159
      24, 25, 26, 27, 28, 29, 30, 31, 32,
160
    )
161
395k
  }
162
}
163
164
#[cfg(test)]
165
mod tests {
166
  use rand::Rng;
167
168
  #[test]
169
  fn zeroes() {
170
    assert_sum_eq(&[]);
171
    assert_sum_eq(&[0]);
172
    assert_sum_eq(&[0, 0]);
173
    assert_sum_eq(&[0; 100]);
174
    assert_sum_eq(&[0; 1024]);
175
    assert_sum_eq(&[0; 1024 * 1024]);
176
  }
177
178
  #[test]
179
  fn ones() {
180
    assert_sum_eq(&[]);
181
    assert_sum_eq(&[1]);
182
    assert_sum_eq(&[1, 1]);
183
    assert_sum_eq(&[1; 100]);
184
    assert_sum_eq(&[1; 1024]);
185
    assert_sum_eq(&[1; 1024 * 1024]);
186
  }
187
188
  #[test]
189
  fn random() {
190
    let mut random = [0; 1024 * 1024];
191
    rand::thread_rng().fill(&mut random[..]);
192
193
    assert_sum_eq(&random[..1]);
194
    assert_sum_eq(&random[..100]);
195
    assert_sum_eq(&random[..1024]);
196
    assert_sum_eq(&random[..1024 * 1024]);
197
  }
198
199
  /// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
200
  #[test]
201
  fn wiki() {
202
    assert_sum_eq(b"Wikipedia");
203
  }
204
205
  fn assert_sum_eq(data: &[u8]) {
206
    if let Some(update) = super::get_imp() {
207
      let (a, b) = update(1, 0, data);
208
      let left = u32::from(b) << 16 | u32::from(a);
209
      let right = adler::adler32_slice(data);
210
211
      assert_eq!(left, right, "len({})", data.len());
212
    }
213
  }
214
}