/rust/registry/src/index.crates.io-1949cf8c6b5b557f/simd-adler32-0.3.8/src/imp/wasm.rs
Line | Count | Source |
1 | | use super::Adler32Imp; |
2 | | |
3 | | /// Resolves update implementation if CPU supports simd128 instructions. |
4 | 0 | pub fn get_imp() -> Option<Adler32Imp> { |
5 | 0 | get_imp_inner() |
6 | 0 | } |
7 | | |
8 | | #[inline] |
9 | | #[cfg(all( |
10 | | target_feature = "simd128", |
11 | | any( |
12 | | target_arch = "wasm32", |
13 | | all(feature = "nightly", target_arch = "wasm64") |
14 | | ) |
15 | | ))] |
16 | | fn get_imp_inner() -> Option<Adler32Imp> { |
17 | | Some(imp::update) |
18 | | } |
19 | | |
20 | | #[inline] |
21 | | #[cfg(not(all( |
22 | | target_feature = "simd128", |
23 | | any( |
24 | | target_arch = "wasm32", |
25 | | all(feature = "nightly", target_arch = "wasm64") |
26 | | ) |
27 | | )))] |
28 | 0 | fn get_imp_inner() -> Option<Adler32Imp> { |
29 | 0 | None |
30 | 0 | } |
31 | | |
32 | | #[cfg(all( |
33 | | target_feature = "simd128", |
34 | | any( |
35 | | target_arch = "wasm32", |
36 | | all(feature = "nightly", target_arch = "wasm64") |
37 | | ) |
38 | | ))] |
39 | | mod imp { |
40 | | const MOD: u32 = 65521; |
41 | | const NMAX: usize = 5552; |
42 | | const BLOCK_SIZE: usize = 32; |
43 | | const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; |
44 | | |
45 | | #[cfg(target_arch = "wasm32")] |
46 | | use core::arch::wasm32::*; |
47 | | #[cfg(target_arch = "wasm64")] |
48 | | use core::arch::wasm64::*; |
49 | | |
50 | | pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { |
51 | | update_imp(a, b, data) |
52 | | } |
53 | | |
54 | | #[inline] |
55 | | #[target_feature(enable = "simd128")] |
56 | | fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { |
57 | | let mut a = a as u32; |
58 | | let mut b = b as u32; |
59 | | |
60 | | let chunks = data.chunks_exact(CHUNK_SIZE); |
61 | | let remainder = chunks.remainder(); |
62 | | for chunk in chunks { |
63 | | update_chunk_block(&mut a, &mut b, chunk); |
64 | | } |
65 | | |
66 | | update_block(&mut a, &mut b, remainder); |
67 | | |
68 | | (a as u16, b as u16) |
69 | | } |
70 | | |
71 | | fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { |
72 | | debug_assert_eq!( |
73 | | chunk.len(), |
74 | | CHUNK_SIZE, |
75 | | "Unexpected chunk size (expected {}, got {})", |
76 | | CHUNK_SIZE, |
77 | | chunk.len() |
78 | | ); |
79 | | |
80 | | reduce_add_blocks(a, b, chunk); |
81 | | |
82 | | *a %= MOD; |
83 | | *b %= MOD; |
84 | | } |
85 | | |
86 | | fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { |
87 | | debug_assert!( |
88 | | chunk.len() <= CHUNK_SIZE, |
89 | | "Unexpected chunk size (expected <= {}, got {})", |
90 | | CHUNK_SIZE, |
91 | | chunk.len() |
92 | | ); |
93 | | |
94 | | for byte in reduce_add_blocks(a, b, chunk) { |
95 | | *a += *byte as u32; |
96 | | *b += *a; |
97 | | } |
98 | | |
99 | | *a %= MOD; |
100 | | *b %= MOD; |
101 | | } |
102 | | |
103 | | #[inline(always)] |
104 | | fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { |
105 | | if chunk.len() < BLOCK_SIZE { |
106 | | return chunk; |
107 | | } |
108 | | |
109 | | let blocks = chunk.chunks_exact(BLOCK_SIZE); |
110 | | let blocks_remainder = blocks.remainder(); |
111 | | |
112 | | let weight_hi_v = get_weight_hi(); |
113 | | let weight_lo_v = get_weight_lo(); |
114 | | |
115 | | let mut p_v = u32x4(*a * blocks.len() as u32, 0, 0, 0); |
116 | | let mut a_v = u32x4(0, 0, 0, 0); |
117 | | let mut b_v = u32x4(*b, 0, 0, 0); |
118 | | |
119 | | for block in blocks { |
120 | | let block_ptr = block.as_ptr() as *const v128; |
121 | | let v_lo = unsafe { block_ptr.read_unaligned() }; |
122 | | let v_hi = unsafe { block_ptr.add(1).read_unaligned() }; |
123 | | |
124 | | p_v = u32x4_add(p_v, a_v); |
125 | | |
126 | | a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_lo)); |
127 | | let mad = i32x4_dot_i8x16(v_lo, weight_lo_v); |
128 | | b_v = u32x4_add(b_v, mad); |
129 | | |
130 | | a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_hi)); |
131 | | let mad = i32x4_dot_i8x16(v_hi, weight_hi_v); |
132 | | b_v = u32x4_add(b_v, mad); |
133 | | } |
134 | | |
135 | | b_v = u32x4_add(b_v, u32x4_shl(p_v, 5)); |
136 | | |
137 | | *a += reduce_add(a_v); |
138 | | *b = reduce_add(b_v); |
139 | | |
140 | | blocks_remainder |
141 | | } |
142 | | |
143 | | #[inline(always)] |
144 | | fn i32x4_dot_i8x16(a: v128, b: v128) -> v128 { |
145 | | let a_lo = u16x8_extend_low_u8x16(a); |
146 | | let a_hi = u16x8_extend_high_u8x16(a); |
147 | | |
148 | | let b_lo = u16x8_extend_low_u8x16(b); |
149 | | let b_hi = u16x8_extend_high_u8x16(b); |
150 | | |
151 | | let lo = i32x4_dot_i16x8(a_lo, b_lo); |
152 | | let hi = i32x4_dot_i16x8(a_hi, b_hi); |
153 | | |
154 | | i32x4_add(lo, hi) |
155 | | } |
156 | | |
157 | | #[inline(always)] |
158 | | fn u32x4_extadd_quarters_u8x16(a: v128) -> v128 { |
159 | | u32x4_extadd_pairwise_u16x8(u16x8_extadd_pairwise_u8x16(a)) |
160 | | } |
161 | | |
162 | | #[inline(always)] |
163 | | fn reduce_add(v: v128) -> u32 { |
164 | | let arr: [u32; 4] = unsafe { core::mem::transmute(v) }; |
165 | | let mut sum = 0u32; |
166 | | for val in arr { |
167 | | sum = sum.wrapping_add(val); |
168 | | } |
169 | | sum |
170 | | } |
171 | | |
172 | | #[inline(always)] |
173 | | fn get_weight_lo() -> v128 { |
174 | | u8x16( |
175 | | 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, |
176 | | ) |
177 | | } |
178 | | |
179 | | #[inline(always)] |
180 | | fn get_weight_hi() -> v128 { |
181 | | u8x16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) |
182 | | } |
183 | | } |
184 | | |
185 | | #[cfg(test)] |
186 | | mod tests { |
187 | | use rand::Rng; |
188 | | |
189 | | #[test] |
190 | | fn zeroes() { |
191 | | assert_sum_eq(&[]); |
192 | | assert_sum_eq(&[0]); |
193 | | assert_sum_eq(&[0, 0]); |
194 | | assert_sum_eq(&[0; 100]); |
195 | | assert_sum_eq(&[0; 1024]); |
196 | | assert_sum_eq(&[0; 512 * 1024]); |
197 | | } |
198 | | |
199 | | #[test] |
200 | | fn ones() { |
201 | | assert_sum_eq(&[]); |
202 | | assert_sum_eq(&[1]); |
203 | | assert_sum_eq(&[1, 1]); |
204 | | assert_sum_eq(&[1; 100]); |
205 | | assert_sum_eq(&[1; 1024]); |
206 | | assert_sum_eq(&[1; 512 * 1024]); |
207 | | } |
208 | | |
209 | | #[test] |
210 | | fn random() { |
211 | | let mut random = [0; 512 * 1024]; |
212 | | rand::thread_rng().fill(&mut random[..]); |
213 | | |
214 | | assert_sum_eq(&random[..1]); |
215 | | assert_sum_eq(&random[..100]); |
216 | | assert_sum_eq(&random[..1024]); |
217 | | assert_sum_eq(&random[..512 * 1024]); |
218 | | } |
219 | | |
220 | | /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. |
221 | | #[test] |
222 | | fn wiki() { |
223 | | assert_sum_eq(b"Wikipedia"); |
224 | | } |
225 | | |
226 | | fn assert_sum_eq(data: &[u8]) { |
227 | | if let Some(update) = super::get_imp() { |
228 | | let (a, b) = update(1, 0, data); |
229 | | let left = u32::from(b) << 16 | u32::from(a); |
230 | | let right = adler::adler32_slice(data); |
231 | | |
232 | | assert_eq!(left, right, "len({})", data.len()); |
233 | | } |
234 | | } |
235 | | } |