/rust/registry/src/index.crates.io-6f17d22bba15001f/ahash-0.8.11/src/operations.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use crate::convert::*; |
2 | | #[allow(unused)] |
3 | | use zerocopy::transmute; |
4 | | |
5 | | ///This constant comes from Kunth's prng (Empirically it works better than those from splitmix32). |
6 | | pub(crate) const MULTIPLE: u64 = 6364136223846793005; |
7 | | |
8 | | /// This is a constant with a lot of special properties found by automated search. |
9 | | /// See the unit tests below. (Below are alternative values) |
10 | | #[cfg(all(target_feature = "ssse3", not(miri)))] |
11 | | const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128; |
12 | | //const SHUFFLE_MASK: u128 = 0x000d0702_0a040301_05080f0c_0e0b0609_u128; |
13 | | //const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128; |
14 | | |
15 | | #[inline(always)] |
16 | | #[cfg(feature = "folded_multiply")] |
17 | 113M | pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { |
18 | 113M | let result = (s as u128).wrapping_mul(by as u128); |
19 | 113M | ((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64) |
20 | 113M | } |
21 | | |
22 | | #[inline(always)] |
23 | | #[cfg(not(feature = "folded_multiply"))] |
24 | | pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { |
25 | | let b1 = s.wrapping_mul(by.swap_bytes()); |
26 | | let b2 = s.swap_bytes().wrapping_mul(!by); |
27 | | b1 ^ b2.swap_bytes() |
28 | | } |
29 | | |
30 | | /// Given a small (less than 8 byte slice) returns the same data stored in two u32s. |
31 | | /// (order of and non-duplication of bytes is NOT guaranteed) |
32 | | #[inline(always)] |
33 | 27.2M | pub(crate) fn read_small(data: &[u8]) -> [u64; 2] { |
34 | 27.2M | debug_assert!(data.len() <= 8); |
35 | 27.2M | if data.len() >= 2 { |
36 | 24.4M | if data.len() >= 4 { |
37 | | //len 4-8 |
38 | 21.8M | [data.read_u32().0 as u64, data.read_last_u32() as u64] |
39 | | } else { |
40 | | //len 2-3 |
41 | 2.63M | [data.read_u16().0 as u64, data[data.len() - 1] as u64] |
42 | | } |
43 | | } else { |
44 | 2.74M | if data.len() > 0 { |
45 | 2.74M | [data[0] as u64, data[0] as u64] |
46 | | } else { |
47 | 14 | [0, 0] |
48 | | } |
49 | | } |
50 | 27.2M | } |
51 | | |
52 | | #[inline(always)] |
53 | 0 | pub(crate) fn shuffle(a: u128) -> u128 { |
54 | 0 | #[cfg(all(target_feature = "ssse3", not(miri)))] |
55 | 0 | { |
56 | 0 | #[cfg(target_arch = "x86")] |
57 | 0 | use core::arch::x86::*; |
58 | 0 | #[cfg(target_arch = "x86_64")] |
59 | 0 | use core::arch::x86_64::*; |
60 | 0 | unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(SHUFFLE_MASK))) } |
61 | 0 | } |
62 | 0 | #[cfg(not(all(target_feature = "ssse3", not(miri))))] |
63 | 0 | { |
64 | 0 | a.swap_bytes() |
65 | 0 | } |
66 | 0 | } |
67 | | |
68 | | #[allow(unused)] //not used by fallback |
69 | | #[inline(always)] |
70 | 0 | pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 { |
71 | 0 | let sum = add_by_64s(a.convert(), b.convert()); |
72 | 0 | shuffle(sum.convert()) |
73 | 0 | } |
74 | | |
75 | | #[allow(unused)] //not used by fallback |
76 | | #[inline(always)] |
77 | 0 | pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 { |
78 | 0 | let shuffled: [u64; 2] = shuffle(base).convert(); |
79 | 0 | add_by_64s(shuffled, to_add.convert()).convert() |
80 | 0 | } |
81 | | |
82 | | #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri)))] |
83 | | #[inline(always)] |
84 | 0 | pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { |
85 | | unsafe { |
86 | | #[cfg(target_arch = "x86")] |
87 | | use core::arch::x86::*; |
88 | | #[cfg(target_arch = "x86_64")] |
89 | | use core::arch::x86_64::*; |
90 | 0 | transmute!(_mm_add_epi64(transmute!(a), transmute!(b))) |
91 | | } |
92 | 0 | } |
93 | | |
94 | | #[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri))))] |
95 | | #[inline(always)] |
96 | | pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { |
97 | | [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])] |
98 | | } |
99 | | |
100 | | #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))] |
101 | | #[allow(unused)] |
102 | | #[inline(always)] |
103 | | pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { |
104 | | #[cfg(target_arch = "x86")] |
105 | | use core::arch::x86::*; |
106 | | #[cfg(target_arch = "x86_64")] |
107 | | use core::arch::x86_64::*; |
108 | | unsafe { |
109 | | let value = transmute!(value); |
110 | | transmute!(_mm_aesenc_si128(value, transmute!(xor))) |
111 | | } |
112 | | } |
113 | | |
114 | | #[cfg(any( |
115 | | all(feature = "nightly-arm-aes", target_arch = "aarch64", target_feature = "aes", not(miri)), |
116 | | all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)), |
117 | | ))] |
118 | | #[allow(unused)] |
119 | | #[inline(always)] |
120 | | pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { |
121 | | #[cfg(target_arch = "aarch64")] |
122 | | use core::arch::aarch64::*; |
123 | | #[cfg(target_arch = "arm")] |
124 | | use core::arch::arm::*; |
125 | | let res = unsafe { vaesmcq_u8(vaeseq_u8(transmute!(value), transmute!(0u128))) }; |
126 | | let value: u128 = transmute!(res); |
127 | | xor ^ value |
128 | | } |
129 | | |
130 | | #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))] |
131 | | #[allow(unused)] |
132 | | #[inline(always)] |
133 | | pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { |
134 | | #[cfg(target_arch = "x86")] |
135 | | use core::arch::x86::*; |
136 | | #[cfg(target_arch = "x86_64")] |
137 | | use core::arch::x86_64::*; |
138 | | unsafe { |
139 | | let value = transmute!(value); |
140 | | transmute!(_mm_aesdec_si128(value, transmute!(xor))) |
141 | | } |
142 | | } |
143 | | |
144 | | #[cfg(any( |
145 | | all(feature = "nightly-arm-aes", target_arch = "aarch64", target_feature = "aes", not(miri)), |
146 | | all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)), |
147 | | ))] |
148 | | #[allow(unused)] |
149 | | #[inline(always)] |
150 | | pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { |
151 | | #[cfg(target_arch = "aarch64")] |
152 | | use core::arch::aarch64::*; |
153 | | #[cfg(target_arch = "arm")] |
154 | | use core::arch::arm::*; |
155 | | let res = unsafe { vaesimcq_u8(vaesdq_u8(transmute!(value), transmute!(0u128))) }; |
156 | | let value: u128 = transmute!(res); |
157 | | xor ^ value |
158 | | } |
159 | | |
160 | | #[allow(unused)] |
161 | | #[inline(always)] |
162 | 0 | pub(crate) fn add_in_length(enc: &mut u128, len: u64) { |
163 | | #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))] |
164 | | { |
165 | | #[cfg(target_arch = "x86_64")] |
166 | | use core::arch::x86_64::*; |
167 | | |
168 | 0 | unsafe { |
169 | 0 | let enc = enc as *mut u128; |
170 | 0 | let len = _mm_cvtsi64_si128(len as i64); |
171 | 0 | let data = _mm_loadu_si128(enc.cast()); |
172 | 0 | let sum = _mm_add_epi64(data, len); |
173 | 0 | _mm_storeu_si128(enc.cast(), sum); |
174 | 0 | } |
175 | 0 | } |
176 | 0 | #[cfg(not(all(target_arch = "x86_64", target_feature = "sse2", not(miri))))] |
177 | 0 | { |
178 | 0 | let mut t: [u64; 2] = enc.convert(); |
179 | 0 | t[0] = t[0].wrapping_add(len); |
180 | 0 | *enc = t.convert(); |
181 | 0 | } |
182 | 0 | } |
183 | | |
184 | | #[cfg(test)] |
185 | | mod test { |
186 | | use super::*; |
187 | | |
188 | | // This is code to search for the shuffle constant |
189 | | // |
190 | | //thread_local! { static MASK: Cell<u128> = Cell::new(0); } |
191 | | // |
192 | | // fn shuffle(a: u128) -> u128 { |
193 | | // use std::intrinsics::transmute; |
194 | | // #[cfg(target_arch = "x86")] |
195 | | // use core::arch::x86::*; |
196 | | // #[cfg(target_arch = "x86_64")] |
197 | | // use core::arch::x86_64::*; |
198 | | // MASK.with(|mask| { |
199 | | // unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(mask.get()))) } |
200 | | // }) |
201 | | // } |
202 | | // |
203 | | // #[test] |
204 | | // fn find_shuffle() { |
205 | | // use rand::prelude::*; |
206 | | // use SliceRandom; |
207 | | // use std::panic; |
208 | | // use std::io::Write; |
209 | | // |
210 | | // let mut value: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ,13, 14, 15]; |
211 | | // let mut rand = thread_rng(); |
212 | | // let mut successful_list = HashMap::new(); |
213 | | // for _attempt in 0..10000000 { |
214 | | // rand.shuffle(&mut value); |
215 | | // let test_val = value.convert(); |
216 | | // MASK.with(|mask| { |
217 | | // mask.set(test_val); |
218 | | // }); |
219 | | // if let Ok(successful) = panic::catch_unwind(|| { |
220 | | // test_shuffle_does_not_collide_with_aes(); |
221 | | // test_shuffle_moves_high_bits(); |
222 | | // test_shuffle_moves_every_value(); |
223 | | // //test_shuffle_does_not_loop(); |
224 | | // value |
225 | | // }) { |
226 | | // let successful: u128 = successful.convert(); |
227 | | // successful_list.insert(successful, iters_before_loop()); |
228 | | // } |
229 | | // } |
230 | | // let write_file = File::create("/tmp/output").unwrap(); |
231 | | // let mut writer = BufWriter::new(&write_file); |
232 | | // |
233 | | // for success in successful_list { |
234 | | // writeln!(writer, "Found successful: {:x?} - {:?}", success.0, success.1); |
235 | | // } |
236 | | // } |
237 | | // |
238 | | // fn iters_before_loop() -> u32 { |
239 | | // let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; |
240 | | // let mut shuffled = shuffle(numbered); |
241 | | // let mut count = 0; |
242 | | // loop { |
243 | | // // println!("{:>16x}", shuffled); |
244 | | // if numbered == shuffled { |
245 | | // break; |
246 | | // } |
247 | | // count += 1; |
248 | | // shuffled = shuffle(shuffled); |
249 | | // } |
250 | | // count |
251 | | // } |
252 | | |
253 | | #[cfg(all( |
254 | | any(target_arch = "x86", target_arch = "x86_64"), |
255 | | target_feature = "ssse3", |
256 | | target_feature = "aes", |
257 | | not(miri) |
258 | | ))] |
259 | | #[test] |
260 | | fn test_shuffle_does_not_collide_with_aes() { |
261 | | let mut value: [u8; 16] = [0; 16]; |
262 | | let zero_mask_enc = aesenc(0, 0); |
263 | | let zero_mask_dec = aesdec(0, 0); |
264 | | for index in 0..16 { |
265 | | value[index] = 1; |
266 | | let excluded_positions_enc: [u8; 16] = aesenc(value.convert(), zero_mask_enc).convert(); |
267 | | let excluded_positions_dec: [u8; 16] = aesdec(value.convert(), zero_mask_dec).convert(); |
268 | | let actual_location: [u8; 16] = shuffle(value.convert()).convert(); |
269 | | for pos in 0..16 { |
270 | | if actual_location[pos] != 0 { |
271 | | assert_eq!( |
272 | | 0, excluded_positions_enc[pos], |
273 | | "Forward Overlap between {:?} and {:?} at {}", |
274 | | excluded_positions_enc, actual_location, index |
275 | | ); |
276 | | assert_eq!( |
277 | | 0, excluded_positions_dec[pos], |
278 | | "Reverse Overlap between {:?} and {:?} at {}", |
279 | | excluded_positions_dec, actual_location, index |
280 | | ); |
281 | | } |
282 | | } |
283 | | value[index] = 0; |
284 | | } |
285 | | } |
286 | | |
287 | | #[test] |
288 | | fn test_shuffle_contains_each_value() { |
289 | | let value: [u8; 16] = 0x00010203_04050607_08090A0B_0C0D0E0F_u128.convert(); |
290 | | let shuffled: [u8; 16] = shuffle(value.convert()).convert(); |
291 | | for index in 0..16_u8 { |
292 | | assert!(shuffled.contains(&index), "Value is missing {}", index); |
293 | | } |
294 | | } |
295 | | |
296 | | #[test] |
297 | | fn test_shuffle_moves_every_value() { |
298 | | let mut value: [u8; 16] = [0; 16]; |
299 | | for index in 0..16 { |
300 | | value[index] = 1; |
301 | | let shuffled: [u8; 16] = shuffle(value.convert()).convert(); |
302 | | assert_eq!(0, shuffled[index], "Value is not moved {}", index); |
303 | | value[index] = 0; |
304 | | } |
305 | | } |
306 | | |
307 | | #[test] |
308 | | fn test_shuffle_moves_high_bits() { |
309 | | assert!( |
310 | | shuffle(1) > (1_u128 << 80), |
311 | | "Low bits must be moved to other half {:?} -> {:?}", |
312 | | 0, |
313 | | shuffle(1) |
314 | | ); |
315 | | |
316 | | assert!( |
317 | | shuffle(1_u128 << 58) >= (1_u128 << 64), |
318 | | "High bits must be moved to other half {:?} -> {:?}", |
319 | | 7, |
320 | | shuffle(1_u128 << 58) |
321 | | ); |
322 | | assert!( |
323 | | shuffle(1_u128 << 58) < (1_u128 << 112), |
324 | | "High bits must not remain high {:?} -> {:?}", |
325 | | 7, |
326 | | shuffle(1_u128 << 58) |
327 | | ); |
328 | | assert!( |
329 | | shuffle(1_u128 << 64) < (1_u128 << 64), |
330 | | "Low bits must be moved to other half {:?} -> {:?}", |
331 | | 8, |
332 | | shuffle(1_u128 << 64) |
333 | | ); |
334 | | assert!( |
335 | | shuffle(1_u128 << 64) >= (1_u128 << 16), |
336 | | "Low bits must not remain low {:?} -> {:?}", |
337 | | 8, |
338 | | shuffle(1_u128 << 64) |
339 | | ); |
340 | | |
341 | | assert!( |
342 | | shuffle(1_u128 << 120) < (1_u128 << 50), |
343 | | "High bits must be moved to low half {:?} -> {:?}", |
344 | | 15, |
345 | | shuffle(1_u128 << 120) |
346 | | ); |
347 | | } |
348 | | |
349 | | #[cfg(all( |
350 | | any(target_arch = "x86", target_arch = "x86_64"), |
351 | | target_feature = "ssse3", |
352 | | not(miri) |
353 | | ))] |
354 | | #[test] |
355 | | fn test_shuffle_does_not_loop() { |
356 | | let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; |
357 | | let mut shuffled = shuffle(numbered); |
358 | | for count in 0..100 { |
359 | | // println!("{:>16x}", shuffled); |
360 | | assert_ne!(numbered, shuffled, "Equal after {} vs {:x}", count, shuffled); |
361 | | shuffled = shuffle(shuffled); |
362 | | } |
363 | | } |
364 | | |
365 | | #[test] |
366 | | fn test_add_length() { |
367 | | let mut enc = (u64::MAX as u128) << 64 | 50; |
368 | | add_in_length(&mut enc, u64::MAX); |
369 | | assert_eq!(enc >> 64, u64::MAX as u128); |
370 | | assert_eq!(enc as u64, 49); |
371 | | } |
372 | | } |