Coverage Report

Created: 2025-09-27 07:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/simd-adler32-0.3.7/src/imp/avx2.rs
Line
Count
Source
1
use super::Adler32Imp;
2
3
/// Resolves update implementation if CPU supports avx2 instructions.
4
37.0k
pub fn get_imp() -> Option<Adler32Imp> {
5
37.0k
  get_imp_inner()
6
37.0k
}
7
8
#[inline]
9
#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
10
37.0k
fn get_imp_inner() -> Option<Adler32Imp> {
11
37.0k
  if std::is_x86_feature_detected!("avx2") {
12
37.0k
    Some(imp::update)
13
  } else {
14
0
    None
15
  }
16
37.0k
}
17
18
#[inline]
19
#[cfg(all(
20
  target_feature = "avx2",
21
  not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
22
))]
23
fn get_imp_inner() -> Option<Adler32Imp> {
24
  Some(imp::update)
25
}
26
27
#[inline]
28
#[cfg(all(
29
  not(target_feature = "avx2"),
30
  not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
31
))]
32
fn get_imp_inner() -> Option<Adler32Imp> {
33
  None
34
}
35
36
#[cfg(all(
37
  any(target_arch = "x86", target_arch = "x86_64"),
38
  any(feature = "std", target_feature = "avx2")
39
))]
40
mod imp {
41
  const MOD: u32 = 65521;
42
  const NMAX: usize = 5552;
43
  const BLOCK_SIZE: usize = 32;
44
  const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
45
46
  #[cfg(target_arch = "x86")]
47
  use core::arch::x86::*;
48
  #[cfg(target_arch = "x86_64")]
49
  use core::arch::x86_64::*;
50
51
11.2k
  pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
52
11.2k
    unsafe { update_imp(a, b, data) }
53
11.2k
  }
54
55
  #[inline]
56
  #[target_feature(enable = "avx2")]
57
11.2k
  unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
58
11.2k
    let mut a = a as u32;
59
11.2k
    let mut b = b as u32;
60
61
11.2k
    let chunks = data.chunks_exact(CHUNK_SIZE);
62
11.2k
    let remainder = chunks.remainder();
63
16.8k
    for chunk in chunks {
64
5.54k
      update_chunk_block(&mut a, &mut b, chunk);
65
5.54k
    }
66
67
11.2k
    update_block(&mut a, &mut b, remainder);
68
69
11.2k
    (a as u16, b as u16)
70
11.2k
  }
71
72
  #[inline]
73
5.54k
  unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
74
5.54k
    debug_assert_eq!(
75
0
      chunk.len(),
76
      CHUNK_SIZE,
77
0
      "Unexpected chunk size (expected {}, got {})",
78
      CHUNK_SIZE,
79
0
      chunk.len()
80
    );
81
82
5.54k
    reduce_add_blocks(a, b, chunk);
83
84
5.54k
    *a %= MOD;
85
5.54k
    *b %= MOD;
86
5.54k
  }
87
88
  #[inline]
89
11.2k
  unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
90
11.2k
    debug_assert!(
91
0
      chunk.len() <= CHUNK_SIZE,
92
0
      "Unexpected chunk size (expected <= {}, got {})",
93
      CHUNK_SIZE,
94
0
      chunk.len()
95
    );
96
97
68.0k
    for byte in reduce_add_blocks(a, b, chunk) {
98
68.0k
      *a += *byte as u32;
99
68.0k
      *b += *a;
100
68.0k
    }
101
102
11.2k
    *a %= MOD;
103
11.2k
    *b %= MOD;
104
11.2k
  }
105
106
  #[inline(always)]
107
16.8k
  unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
108
16.8k
    if chunk.len() < BLOCK_SIZE {
109
4.59k
      return chunk;
110
12.2k
    }
111
112
12.2k
    let blocks = chunk.chunks_exact(BLOCK_SIZE);
113
12.2k
    let blocks_remainder = blocks.remainder();
114
115
12.2k
    let one_v = _mm256_set1_epi16(1);
116
12.2k
    let zero_v = _mm256_setzero_si256();
117
12.2k
    let weights = get_weights();
118
119
12.2k
    let mut p_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (*a * blocks.len() as u32) as _);
120
12.2k
    let mut a_v = _mm256_setzero_si256();
121
12.2k
    let mut b_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, *b as _);
122
123
1.37M
    for block in blocks {
124
1.36M
      let block_ptr = block.as_ptr() as *const _;
125
1.36M
      let block = _mm256_loadu_si256(block_ptr);
126
1.36M
127
1.36M
      p_v = _mm256_add_epi32(p_v, a_v);
128
1.36M
129
1.36M
      a_v = _mm256_add_epi32(a_v, _mm256_sad_epu8(block, zero_v));
130
1.36M
      let mad = _mm256_maddubs_epi16(block, weights);
131
1.36M
      b_v = _mm256_add_epi32(b_v, _mm256_madd_epi16(mad, one_v));
132
1.36M
    }
133
134
12.2k
    b_v = _mm256_add_epi32(b_v, _mm256_slli_epi32(p_v, 5));
135
136
12.2k
    *a += reduce_add(a_v);
137
12.2k
    *b = reduce_add(b_v);
138
139
12.2k
    blocks_remainder
140
16.8k
  }
141
142
  #[inline(always)]
143
24.4k
  unsafe fn reduce_add(v: __m256i) -> u32 {
144
24.4k
    let sum = _mm_add_epi32(_mm256_castsi256_si128(v), _mm256_extracti128_si256(v, 1));
145
24.4k
    let hi = _mm_unpackhi_epi64(sum, sum);
146
147
24.4k
    let sum = _mm_add_epi32(hi, sum);
148
24.4k
    let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1));
149
150
24.4k
    let sum = _mm_add_epi32(sum, hi);
151
152
24.4k
    _mm_cvtsi128_si32(sum) as _
153
24.4k
  }
154
155
  #[inline(always)]
156
12.2k
  unsafe fn get_weights() -> __m256i {
157
12.2k
    _mm256_set_epi8(
158
      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
159
      24, 25, 26, 27, 28, 29, 30, 31, 32,
160
    )
161
12.2k
  }
162
}
163
164
#[cfg(test)]
165
mod tests {
166
  use rand::Rng;
167
168
  #[test]
169
  fn zeroes() {
170
    assert_sum_eq(&[]);
171
    assert_sum_eq(&[0]);
172
    assert_sum_eq(&[0, 0]);
173
    assert_sum_eq(&[0; 100]);
174
    assert_sum_eq(&[0; 1024]);
175
    assert_sum_eq(&[0; 1024 * 1024]);
176
  }
177
178
  #[test]
179
  fn ones() {
180
    assert_sum_eq(&[]);
181
    assert_sum_eq(&[1]);
182
    assert_sum_eq(&[1, 1]);
183
    assert_sum_eq(&[1; 100]);
184
    assert_sum_eq(&[1; 1024]);
185
    assert_sum_eq(&[1; 1024 * 1024]);
186
  }
187
188
  #[test]
189
  fn random() {
190
    let mut random = [0; 1024 * 1024];
191
    rand::thread_rng().fill(&mut random[..]);
192
193
    assert_sum_eq(&random[..1]);
194
    assert_sum_eq(&random[..100]);
195
    assert_sum_eq(&random[..1024]);
196
    assert_sum_eq(&random[..1024 * 1024]);
197
  }
198
199
  /// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
200
  #[test]
201
  fn wiki() {
202
    assert_sum_eq(b"Wikipedia");
203
  }
204
205
  fn assert_sum_eq(data: &[u8]) {
206
    if let Some(update) = super::get_imp() {
207
      let (a, b) = update(1, 0, data);
208
      let left = u32::from(b) << 16 | u32::from(a);
209
      let right = adler::adler32_slice(data);
210
211
      assert_eq!(left, right, "len({})", data.len());
212
    }
213
  }
214
}