/rust/registry/src/index.crates.io-1949cf8c6b5b557f/twox-hash-1.6.3/src/sixty_four.rs
Line | Count | Source |
1 | | use crate::UnalignedBuffer; |
2 | | use core::{cmp, hash::Hasher}; |
3 | | |
4 | | #[cfg(feature = "serialize")] |
5 | | use serde::{Deserialize, Serialize}; |
6 | | |
7 | | const CHUNK_SIZE: usize = 32; |
8 | | |
9 | | pub const PRIME_1: u64 = 11_400_714_785_074_694_791; |
10 | | pub const PRIME_2: u64 = 14_029_467_366_897_019_727; |
11 | | pub const PRIME_3: u64 = 1_609_587_929_392_839_161; |
12 | | pub const PRIME_4: u64 = 9_650_029_242_287_828_579; |
13 | | pub const PRIME_5: u64 = 2_870_177_450_012_600_261; |
14 | | |
15 | | #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] |
16 | | #[derive(Copy, Clone, PartialEq)] |
17 | | struct XxCore { |
18 | | v1: u64, |
19 | | v2: u64, |
20 | | v3: u64, |
21 | | v4: u64, |
22 | | } |
23 | | |
24 | | /// Calculates the 64-bit hash. |
25 | | #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] |
26 | | #[derive(Debug, Copy, Clone, PartialEq)] |
27 | | pub struct XxHash64 { |
28 | | total_len: u64, |
29 | | seed: u64, |
30 | | core: XxCore, |
31 | | #[cfg_attr(feature = "serialize", serde(flatten))] |
32 | | buffer: Buffer, |
33 | | } |
34 | | |
35 | | impl XxCore { |
36 | 0 | fn with_seed(seed: u64) -> XxCore { |
37 | 0 | XxCore { |
38 | 0 | v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2), |
39 | 0 | v2: seed.wrapping_add(PRIME_2), |
40 | 0 | v3: seed, |
41 | 0 | v4: seed.wrapping_sub(PRIME_1), |
42 | 0 | } |
43 | 0 | } |
44 | | |
45 | | #[inline(always)] |
46 | 0 | fn ingest_chunks<I>(&mut self, values: I) |
47 | 0 | where |
48 | 0 | I: IntoIterator<Item = [u64; 4]>, |
49 | | { |
50 | | #[inline(always)] |
51 | 0 | fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 { |
52 | 0 | value = value.wrapping_mul(PRIME_2); |
53 | 0 | current_value = current_value.wrapping_add(value); |
54 | 0 | current_value = current_value.rotate_left(31); |
55 | 0 | current_value.wrapping_mul(PRIME_1) |
56 | 0 | } |
57 | | |
58 | | // By drawing these out, we can avoid going back and forth to |
59 | | // memory. It only really helps for large files, when we need |
60 | | // to iterate multiple times here. |
61 | | |
62 | 0 | let mut v1 = self.v1; |
63 | 0 | let mut v2 = self.v2; |
64 | 0 | let mut v3 = self.v3; |
65 | 0 | let mut v4 = self.v4; |
66 | | |
67 | 0 | for [n1, n2, n3, n4] in values { |
68 | 0 | v1 = ingest_one_number(v1, n1.to_le()); |
69 | 0 | v2 = ingest_one_number(v2, n2.to_le()); |
70 | 0 | v3 = ingest_one_number(v3, n3.to_le()); |
71 | 0 | v4 = ingest_one_number(v4, n4.to_le()); |
72 | 0 | } |
73 | | |
74 | 0 | self.v1 = v1; |
75 | 0 | self.v2 = v2; |
76 | 0 | self.v3 = v3; |
77 | 0 | self.v4 = v4; |
78 | 0 | } |
79 | | |
80 | | #[inline(always)] |
81 | 0 | fn finish(&self) -> u64 { |
82 | | // The original code pulls out local vars for v[1234] |
83 | | // here. Performance tests did not show that to be effective |
84 | | // here, presumably because this method is not called in a |
85 | | // tight loop. |
86 | | |
87 | | #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel |
88 | | let mut hash; |
89 | | |
90 | 0 | hash = self.v1.rotate_left(1); |
91 | 0 | hash = hash.wrapping_add(self.v2.rotate_left(7)); |
92 | 0 | hash = hash.wrapping_add(self.v3.rotate_left(12)); |
93 | 0 | hash = hash.wrapping_add(self.v4.rotate_left(18)); |
94 | | |
95 | | #[inline(always)] |
96 | 0 | fn mix_one(mut hash: u64, mut value: u64) -> u64 { |
97 | 0 | value = value.wrapping_mul(PRIME_2); |
98 | 0 | value = value.rotate_left(31); |
99 | 0 | value = value.wrapping_mul(PRIME_1); |
100 | 0 | hash ^= value; |
101 | 0 | hash = hash.wrapping_mul(PRIME_1); |
102 | 0 | hash.wrapping_add(PRIME_4) |
103 | 0 | } |
104 | | |
105 | 0 | hash = mix_one(hash, self.v1); |
106 | 0 | hash = mix_one(hash, self.v2); |
107 | 0 | hash = mix_one(hash, self.v3); |
108 | 0 | hash = mix_one(hash, self.v4); |
109 | | |
110 | 0 | hash |
111 | 0 | } |
112 | | } |
113 | | |
114 | | impl core::fmt::Debug for XxCore { |
115 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { |
116 | 0 | write!( |
117 | 0 | f, |
118 | 0 | "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}", |
119 | | self.v1, self.v2, self.v3, self.v4 |
120 | | ) |
121 | 0 | } |
122 | | } |
123 | | |
124 | | #[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] |
125 | | #[derive(Debug, Copy, Clone, Default, PartialEq)] |
126 | | #[repr(align(8))] |
127 | | #[cfg_attr(feature = "serialize", serde(transparent))] |
128 | | struct AlignToU64<T>(T); |
129 | | |
130 | | #[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] |
131 | | #[derive(Debug, Copy, Clone, Default, PartialEq)] |
132 | | struct Buffer { |
133 | | #[cfg_attr(feature = "serialize", serde(rename = "buffer"))] |
134 | | data: AlignToU64<[u8; CHUNK_SIZE]>, |
135 | | #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))] |
136 | | len: usize, |
137 | | } |
138 | | |
139 | | impl Buffer { |
140 | 0 | fn data(&self) -> &[u8] { |
141 | 0 | &self.data.0[..self.len] |
142 | 0 | } |
143 | | |
144 | | /// Consumes as much of the parameter as it can, returning the unused part. |
145 | 0 | fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { |
146 | 0 | let to_use = cmp::min(self.available(), data.len()); |
147 | 0 | let (data, remaining) = data.split_at(to_use); |
148 | 0 | self.data.0[self.len..][..to_use].copy_from_slice(data); |
149 | 0 | self.len += to_use; |
150 | 0 | remaining |
151 | 0 | } |
152 | | |
153 | 0 | fn set_data(&mut self, data: &[u8]) { |
154 | 0 | debug_assert!(self.is_empty()); |
155 | 0 | debug_assert!(data.len() < CHUNK_SIZE); |
156 | 0 | self.data.0[..data.len()].copy_from_slice(data); |
157 | 0 | self.len = data.len(); |
158 | 0 | } |
159 | | |
160 | 0 | fn available(&self) -> usize { |
161 | 0 | CHUNK_SIZE - self.len |
162 | 0 | } |
163 | | |
164 | 0 | fn is_empty(&self) -> bool { |
165 | 0 | self.len == 0 |
166 | 0 | } |
167 | | |
168 | 0 | fn is_full(&self) -> bool { |
169 | 0 | self.len == CHUNK_SIZE |
170 | 0 | } |
171 | | } |
172 | | |
173 | | impl XxHash64 { |
174 | | /// Constructs the hash with an initial seed |
175 | 0 | pub fn with_seed(seed: u64) -> XxHash64 { |
176 | 0 | XxHash64 { |
177 | 0 | total_len: 0, |
178 | 0 | seed, |
179 | 0 | core: XxCore::with_seed(seed), |
180 | 0 | buffer: Buffer::default(), |
181 | 0 | } |
182 | 0 | } |
183 | | |
184 | 0 | pub(crate) fn write(&mut self, bytes: &[u8]) { |
185 | 0 | let remaining = self.maybe_consume_bytes(bytes); |
186 | 0 | if !remaining.is_empty() { |
187 | 0 | let mut remaining = UnalignedBuffer::new(remaining); |
188 | 0 | self.core.ingest_chunks(&mut remaining); |
189 | 0 | self.buffer.set_data(remaining.remaining()); |
190 | 0 | } |
191 | 0 | self.total_len += bytes.len() as u64; |
192 | 0 | } |
193 | | |
194 | | // Consume bytes and try to make `self.buffer` empty. |
195 | | // If there are not enough bytes, `self.buffer` can be non-empty, and this |
196 | | // function returns an empty slice. |
197 | 0 | fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { |
198 | 0 | if self.buffer.is_empty() { |
199 | 0 | data |
200 | | } else { |
201 | 0 | let data = self.buffer.consume(data); |
202 | 0 | if self.buffer.is_full() { |
203 | 0 | let mut u64s = UnalignedBuffer::new(self.buffer.data()); |
204 | 0 | self.core.ingest_chunks(&mut u64s); |
205 | 0 | debug_assert!(u64s.remaining().is_empty()); |
206 | 0 | self.buffer.len = 0; |
207 | 0 | } |
208 | 0 | data |
209 | | } |
210 | 0 | } |
211 | | |
212 | 0 | pub(crate) fn finish(&self) -> u64 { |
213 | 0 | let mut hash = if self.total_len >= CHUNK_SIZE as u64 { |
214 | | // We have processed at least one full chunk |
215 | 0 | self.core.finish() |
216 | | } else { |
217 | 0 | self.seed.wrapping_add(PRIME_5) |
218 | | }; |
219 | | |
220 | 0 | hash = hash.wrapping_add(self.total_len); |
221 | | |
222 | 0 | let mut buffered_u64s = UnalignedBuffer::<u64>::new(self.buffer.data()); |
223 | 0 | for buffered_u64 in &mut buffered_u64s { |
224 | 0 | let mut k1 = buffered_u64.to_le().wrapping_mul(PRIME_2); |
225 | 0 | k1 = k1.rotate_left(31); |
226 | 0 | k1 = k1.wrapping_mul(PRIME_1); |
227 | 0 | hash ^= k1; |
228 | 0 | hash = hash.rotate_left(27); |
229 | 0 | hash = hash.wrapping_mul(PRIME_1); |
230 | 0 | hash = hash.wrapping_add(PRIME_4); |
231 | 0 | } |
232 | | |
233 | 0 | let mut buffered_u32s = UnalignedBuffer::<u32>::new(buffered_u64s.remaining()); |
234 | 0 | for buffered_u32 in &mut buffered_u32s { |
235 | 0 | let k1 = u64::from(buffered_u32.to_le()).wrapping_mul(PRIME_1); |
236 | 0 | hash ^= k1; |
237 | 0 | hash = hash.rotate_left(23); |
238 | 0 | hash = hash.wrapping_mul(PRIME_2); |
239 | 0 | hash = hash.wrapping_add(PRIME_3); |
240 | 0 | } |
241 | | |
242 | 0 | let buffered_u8s = buffered_u32s.remaining(); |
243 | 0 | for &buffered_u8 in buffered_u8s { |
244 | 0 | let k1 = u64::from(buffered_u8).wrapping_mul(PRIME_5); |
245 | 0 | hash ^= k1; |
246 | 0 | hash = hash.rotate_left(11); |
247 | 0 | hash = hash.wrapping_mul(PRIME_1); |
248 | 0 | } |
249 | | |
250 | | // The final intermixing |
251 | 0 | hash ^= hash >> 33; |
252 | 0 | hash = hash.wrapping_mul(PRIME_2); |
253 | 0 | hash ^= hash >> 29; |
254 | 0 | hash = hash.wrapping_mul(PRIME_3); |
255 | 0 | hash ^= hash >> 32; |
256 | | |
257 | 0 | hash |
258 | 0 | } |
259 | | |
260 | 0 | pub fn seed(&self) -> u64 { |
261 | 0 | self.seed |
262 | 0 | } |
263 | | |
264 | 0 | pub fn total_len(&self) -> u64 { |
265 | 0 | self.total_len |
266 | 0 | } |
267 | | } |
268 | | |
269 | | impl Default for XxHash64 { |
270 | 0 | fn default() -> XxHash64 { |
271 | 0 | XxHash64::with_seed(0) |
272 | 0 | } |
273 | | } |
274 | | |
275 | | impl Hasher for XxHash64 { |
276 | 0 | fn finish(&self) -> u64 { |
277 | 0 | XxHash64::finish(self) |
278 | 0 | } |
279 | | |
280 | 0 | fn write(&mut self, bytes: &[u8]) { |
281 | 0 | XxHash64::write(self, bytes) |
282 | 0 | } |
283 | | } |
284 | | |
285 | | #[cfg(feature = "std")] |
286 | | pub use crate::std_support::sixty_four::RandomXxHashBuilder64; |
287 | | |
288 | | #[cfg(test)] |
289 | | mod test { |
290 | | use super::{RandomXxHashBuilder64, XxHash64}; |
291 | | use std::collections::HashMap; |
292 | | use std::hash::BuildHasherDefault; |
293 | | use std::prelude::v1::*; |
294 | | |
295 | | #[test] |
296 | | fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { |
297 | | let bytes: Vec<_> = (0..32).map(|_| 0).collect(); |
298 | | |
299 | | let mut byte_by_byte = XxHash64::with_seed(0); |
300 | | for byte in bytes.chunks(1) { |
301 | | byte_by_byte.write(byte); |
302 | | } |
303 | | |
304 | | let mut one_chunk = XxHash64::with_seed(0); |
305 | | one_chunk.write(&bytes); |
306 | | |
307 | | assert_eq!(byte_by_byte.core, one_chunk.core); |
308 | | } |
309 | | |
310 | | #[test] |
311 | | fn hash_of_nothing_matches_c_implementation() { |
312 | | let mut hasher = XxHash64::with_seed(0); |
313 | | hasher.write(&[]); |
314 | | assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999); |
315 | | } |
316 | | |
317 | | #[test] |
318 | | fn hash_of_single_byte_matches_c_implementation() { |
319 | | let mut hasher = XxHash64::with_seed(0); |
320 | | hasher.write(&[42]); |
321 | | assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4); |
322 | | } |
323 | | |
324 | | #[test] |
325 | | fn hash_of_multiple_bytes_matches_c_implementation() { |
326 | | let mut hasher = XxHash64::with_seed(0); |
327 | | hasher.write(b"Hello, world!\0"); |
328 | | assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f); |
329 | | } |
330 | | |
331 | | #[test] |
332 | | fn hash_of_multiple_chunks_matches_c_implementation() { |
333 | | let bytes: Vec<_> = (0..100).collect(); |
334 | | let mut hasher = XxHash64::with_seed(0); |
335 | | hasher.write(&bytes); |
336 | | assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597); |
337 | | } |
338 | | |
339 | | #[test] |
340 | | fn hash_with_different_seed_matches_c_implementation() { |
341 | | let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); |
342 | | hasher.write(&[]); |
343 | | assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672); |
344 | | } |
345 | | |
346 | | #[test] |
347 | | fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { |
348 | | let bytes: Vec<_> = (0..100).collect(); |
349 | | let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); |
350 | | hasher.write(&bytes); |
351 | | assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); |
352 | | } |
353 | | |
354 | | #[test] |
355 | | fn can_be_used_in_a_hashmap_with_a_default_seed() { |
356 | | let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default(); |
357 | | hash.insert(42, "the answer"); |
358 | | assert_eq!(hash.get(&42), Some(&"the answer")); |
359 | | } |
360 | | |
361 | | #[test] |
362 | | fn can_be_used_in_a_hashmap_with_a_random_seed() { |
363 | | let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); |
364 | | hash.insert(42, "the answer"); |
365 | | assert_eq!(hash.get(&42), Some(&"the answer")); |
366 | | } |
367 | | |
368 | | #[cfg(feature = "serialize")] |
369 | | type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>; |
370 | | |
371 | | #[cfg(feature = "serialize")] |
372 | | #[test] |
373 | | fn test_serialization_cycle() -> TestResult { |
374 | | let mut hasher = XxHash64::with_seed(0); |
375 | | hasher.write(b"Hello, world!\0"); |
376 | | hasher.finish(); |
377 | | |
378 | | let serialized = serde_json::to_string(&hasher)?; |
379 | | let unserialized: XxHash64 = serde_json::from_str(&serialized)?; |
380 | | assert_eq!(hasher, unserialized); |
381 | | Ok(()) |
382 | | } |
383 | | |
384 | | #[cfg(feature = "serialize")] |
385 | | #[test] |
386 | | fn test_serialization_stability() -> TestResult { |
387 | | let mut hasher = XxHash64::with_seed(0); |
388 | | hasher.write(b"Hello, world!\0"); |
389 | | hasher.finish(); |
390 | | |
391 | | let serialized = r#"{ |
392 | | "total_len": 14, |
393 | | "seed": 0, |
394 | | "core": { |
395 | | "v1": 6983438078262162902, |
396 | | "v2": 14029467366897019727, |
397 | | "v3": 0, |
398 | | "v4": 7046029288634856825 |
399 | | }, |
400 | | "buffer": [ |
401 | | 72, 101, 108, 108, 111, 44, 32, 119, |
402 | | 111, 114, 108, 100, 33, 0, 0, 0, |
403 | | 0, 0, 0, 0, 0, 0, 0, 0, |
404 | | 0, 0, 0, 0, 0, 0, 0, 0 |
405 | | ], |
406 | | "buffer_usage": 14 |
407 | | }"#; |
408 | | |
409 | | let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap(); |
410 | | assert_eq!(hasher, unserialized); |
411 | | Ok(()) |
412 | | } |
413 | | } |