Coverage Report

Created: 2025-11-16 06:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/twox-hash-1.6.3/src/sixty_four.rs
Line
Count
Source
1
use crate::UnalignedBuffer;
2
use core::{cmp, hash::Hasher};
3
4
#[cfg(feature = "serialize")]
5
use serde::{Deserialize, Serialize};
6
7
const CHUNK_SIZE: usize = 32;
8
9
pub const PRIME_1: u64 = 11_400_714_785_074_694_791;
10
pub const PRIME_2: u64 = 14_029_467_366_897_019_727;
11
pub const PRIME_3: u64 = 1_609_587_929_392_839_161;
12
pub const PRIME_4: u64 = 9_650_029_242_287_828_579;
13
pub const PRIME_5: u64 = 2_870_177_450_012_600_261;
14
15
#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
16
#[derive(Copy, Clone, PartialEq)]
17
struct XxCore {
18
    v1: u64,
19
    v2: u64,
20
    v3: u64,
21
    v4: u64,
22
}
23
24
/// Calculates the 64-bit hash.
25
#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
26
#[derive(Debug, Copy, Clone, PartialEq)]
27
pub struct XxHash64 {
28
    total_len: u64,
29
    seed: u64,
30
    core: XxCore,
31
    #[cfg_attr(feature = "serialize", serde(flatten))]
32
    buffer: Buffer,
33
}
34
35
impl XxCore {
36
0
    fn with_seed(seed: u64) -> XxCore {
37
0
        XxCore {
38
0
            v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2),
39
0
            v2: seed.wrapping_add(PRIME_2),
40
0
            v3: seed,
41
0
            v4: seed.wrapping_sub(PRIME_1),
42
0
        }
43
0
    }
44
45
    #[inline(always)]
46
0
    fn ingest_chunks<I>(&mut self, values: I)
47
0
    where
48
0
        I: IntoIterator<Item = [u64; 4]>,
49
    {
50
        #[inline(always)]
51
0
        fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 {
52
0
            value = value.wrapping_mul(PRIME_2);
53
0
            current_value = current_value.wrapping_add(value);
54
0
            current_value = current_value.rotate_left(31);
55
0
            current_value.wrapping_mul(PRIME_1)
56
0
        }
57
58
        // By drawing these out, we can avoid going back and forth to
59
        // memory. It only really helps for large files, when we need
60
        // to iterate multiple times here.
61
62
0
        let mut v1 = self.v1;
63
0
        let mut v2 = self.v2;
64
0
        let mut v3 = self.v3;
65
0
        let mut v4 = self.v4;
66
67
0
        for [n1, n2, n3, n4] in values {
68
0
            v1 = ingest_one_number(v1, n1.to_le());
69
0
            v2 = ingest_one_number(v2, n2.to_le());
70
0
            v3 = ingest_one_number(v3, n3.to_le());
71
0
            v4 = ingest_one_number(v4, n4.to_le());
72
0
        }
73
74
0
        self.v1 = v1;
75
0
        self.v2 = v2;
76
0
        self.v3 = v3;
77
0
        self.v4 = v4;
78
0
    }
79
80
    #[inline(always)]
81
0
    fn finish(&self) -> u64 {
82
        // The original code pulls out local vars for v[1234]
83
        // here. Performance tests did not show that to be effective
84
        // here, presumably because this method is not called in a
85
        // tight loop.
86
87
        #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel
88
        let mut hash;
89
90
0
        hash = self.v1.rotate_left(1);
91
0
        hash = hash.wrapping_add(self.v2.rotate_left(7));
92
0
        hash = hash.wrapping_add(self.v3.rotate_left(12));
93
0
        hash = hash.wrapping_add(self.v4.rotate_left(18));
94
95
        #[inline(always)]
96
0
        fn mix_one(mut hash: u64, mut value: u64) -> u64 {
97
0
            value = value.wrapping_mul(PRIME_2);
98
0
            value = value.rotate_left(31);
99
0
            value = value.wrapping_mul(PRIME_1);
100
0
            hash ^= value;
101
0
            hash = hash.wrapping_mul(PRIME_1);
102
0
            hash.wrapping_add(PRIME_4)
103
0
        }
104
105
0
        hash = mix_one(hash, self.v1);
106
0
        hash = mix_one(hash, self.v2);
107
0
        hash = mix_one(hash, self.v3);
108
0
        hash = mix_one(hash, self.v4);
109
110
0
        hash
111
0
    }
112
}
113
114
impl core::fmt::Debug for XxCore {
115
0
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
116
0
        write!(
117
0
            f,
118
0
            "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}",
119
            self.v1, self.v2, self.v3, self.v4
120
        )
121
0
    }
122
}
123
124
#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
125
#[derive(Debug, Copy, Clone, Default, PartialEq)]
126
#[repr(align(8))]
127
#[cfg_attr(feature = "serialize", serde(transparent))]
128
struct AlignToU64<T>(T);
129
130
#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
131
#[derive(Debug, Copy, Clone, Default, PartialEq)]
132
struct Buffer {
133
    #[cfg_attr(feature = "serialize", serde(rename = "buffer"))]
134
    data: AlignToU64<[u8; CHUNK_SIZE]>,
135
    #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))]
136
    len: usize,
137
}
138
139
impl Buffer {
140
0
    fn data(&self) -> &[u8] {
141
0
        &self.data.0[..self.len]
142
0
    }
143
144
    /// Consumes as much of the parameter as it can, returning the unused part.
145
0
    fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
146
0
        let to_use = cmp::min(self.available(), data.len());
147
0
        let (data, remaining) = data.split_at(to_use);
148
0
        self.data.0[self.len..][..to_use].copy_from_slice(data);
149
0
        self.len += to_use;
150
0
        remaining
151
0
    }
152
153
0
    fn set_data(&mut self, data: &[u8]) {
154
0
        debug_assert!(self.is_empty());
155
0
        debug_assert!(data.len() < CHUNK_SIZE);
156
0
        self.data.0[..data.len()].copy_from_slice(data);
157
0
        self.len = data.len();
158
0
    }
159
160
0
    fn available(&self) -> usize {
161
0
        CHUNK_SIZE - self.len
162
0
    }
163
164
0
    fn is_empty(&self) -> bool {
165
0
        self.len == 0
166
0
    }
167
168
0
    fn is_full(&self) -> bool {
169
0
        self.len == CHUNK_SIZE
170
0
    }
171
}
172
173
impl XxHash64 {
174
    /// Constructs the hash with an initial seed
175
0
    pub fn with_seed(seed: u64) -> XxHash64 {
176
0
        XxHash64 {
177
0
            total_len: 0,
178
0
            seed,
179
0
            core: XxCore::with_seed(seed),
180
0
            buffer: Buffer::default(),
181
0
        }
182
0
    }
183
184
0
    pub(crate) fn write(&mut self, bytes: &[u8]) {
185
0
        let remaining = self.maybe_consume_bytes(bytes);
186
0
        if !remaining.is_empty() {
187
0
            let mut remaining = UnalignedBuffer::new(remaining);
188
0
            self.core.ingest_chunks(&mut remaining);
189
0
            self.buffer.set_data(remaining.remaining());
190
0
        }
191
0
        self.total_len += bytes.len() as u64;
192
0
    }
193
194
    // Consume bytes and try to make `self.buffer` empty.
195
    // If there are not enough bytes, `self.buffer` can be non-empty, and this
196
    // function returns an empty slice.
197
0
    fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
198
0
        if self.buffer.is_empty() {
199
0
            data
200
        } else {
201
0
            let data = self.buffer.consume(data);
202
0
            if self.buffer.is_full() {
203
0
                let mut u64s = UnalignedBuffer::new(self.buffer.data());
204
0
                self.core.ingest_chunks(&mut u64s);
205
0
                debug_assert!(u64s.remaining().is_empty());
206
0
                self.buffer.len = 0;
207
0
            }
208
0
            data
209
        }
210
0
    }
211
212
0
    pub(crate) fn finish(&self) -> u64 {
213
0
        let mut hash = if self.total_len >= CHUNK_SIZE as u64 {
214
            // We have processed at least one full chunk
215
0
            self.core.finish()
216
        } else {
217
0
            self.seed.wrapping_add(PRIME_5)
218
        };
219
220
0
        hash = hash.wrapping_add(self.total_len);
221
222
0
        let mut buffered_u64s = UnalignedBuffer::<u64>::new(self.buffer.data());
223
0
        for buffered_u64 in &mut buffered_u64s {
224
0
            let mut k1 = buffered_u64.to_le().wrapping_mul(PRIME_2);
225
0
            k1 = k1.rotate_left(31);
226
0
            k1 = k1.wrapping_mul(PRIME_1);
227
0
            hash ^= k1;
228
0
            hash = hash.rotate_left(27);
229
0
            hash = hash.wrapping_mul(PRIME_1);
230
0
            hash = hash.wrapping_add(PRIME_4);
231
0
        }
232
233
0
        let mut buffered_u32s = UnalignedBuffer::<u32>::new(buffered_u64s.remaining());
234
0
        for buffered_u32 in &mut buffered_u32s {
235
0
            let k1 = u64::from(buffered_u32.to_le()).wrapping_mul(PRIME_1);
236
0
            hash ^= k1;
237
0
            hash = hash.rotate_left(23);
238
0
            hash = hash.wrapping_mul(PRIME_2);
239
0
            hash = hash.wrapping_add(PRIME_3);
240
0
        }
241
242
0
        let buffered_u8s = buffered_u32s.remaining();
243
0
        for &buffered_u8 in buffered_u8s {
244
0
            let k1 = u64::from(buffered_u8).wrapping_mul(PRIME_5);
245
0
            hash ^= k1;
246
0
            hash = hash.rotate_left(11);
247
0
            hash = hash.wrapping_mul(PRIME_1);
248
0
        }
249
250
        // The final intermixing
251
0
        hash ^= hash >> 33;
252
0
        hash = hash.wrapping_mul(PRIME_2);
253
0
        hash ^= hash >> 29;
254
0
        hash = hash.wrapping_mul(PRIME_3);
255
0
        hash ^= hash >> 32;
256
257
0
        hash
258
0
    }
259
260
0
    pub fn seed(&self) -> u64 {
261
0
        self.seed
262
0
    }
263
264
0
    pub fn total_len(&self) -> u64 {
265
0
        self.total_len
266
0
    }
267
}
268
269
impl Default for XxHash64 {
270
0
    fn default() -> XxHash64 {
271
0
        XxHash64::with_seed(0)
272
0
    }
273
}
274
275
impl Hasher for XxHash64 {
276
0
    fn finish(&self) -> u64 {
277
0
        XxHash64::finish(self)
278
0
    }
279
280
0
    fn write(&mut self, bytes: &[u8]) {
281
0
        XxHash64::write(self, bytes)
282
0
    }
283
}
284
285
#[cfg(feature = "std")]
286
pub use crate::std_support::sixty_four::RandomXxHashBuilder64;
287
288
#[cfg(test)]
289
mod test {
290
    use super::{RandomXxHashBuilder64, XxHash64};
291
    use std::collections::HashMap;
292
    use std::hash::BuildHasherDefault;
293
    use std::prelude::v1::*;
294
295
    #[test]
296
    fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
297
        let bytes: Vec<_> = (0..32).map(|_| 0).collect();
298
299
        let mut byte_by_byte = XxHash64::with_seed(0);
300
        for byte in bytes.chunks(1) {
301
            byte_by_byte.write(byte);
302
        }
303
304
        let mut one_chunk = XxHash64::with_seed(0);
305
        one_chunk.write(&bytes);
306
307
        assert_eq!(byte_by_byte.core, one_chunk.core);
308
    }
309
310
    #[test]
311
    fn hash_of_nothing_matches_c_implementation() {
312
        let mut hasher = XxHash64::with_seed(0);
313
        hasher.write(&[]);
314
        assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
315
    }
316
317
    #[test]
318
    fn hash_of_single_byte_matches_c_implementation() {
319
        let mut hasher = XxHash64::with_seed(0);
320
        hasher.write(&[42]);
321
        assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
322
    }
323
324
    #[test]
325
    fn hash_of_multiple_bytes_matches_c_implementation() {
326
        let mut hasher = XxHash64::with_seed(0);
327
        hasher.write(b"Hello, world!\0");
328
        assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
329
    }
330
331
    #[test]
332
    fn hash_of_multiple_chunks_matches_c_implementation() {
333
        let bytes: Vec<_> = (0..100).collect();
334
        let mut hasher = XxHash64::with_seed(0);
335
        hasher.write(&bytes);
336
        assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
337
    }
338
339
    #[test]
340
    fn hash_with_different_seed_matches_c_implementation() {
341
        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
342
        hasher.write(&[]);
343
        assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
344
    }
345
346
    #[test]
347
    fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
348
        let bytes: Vec<_> = (0..100).collect();
349
        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
350
        hasher.write(&bytes);
351
        assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
352
    }
353
354
    #[test]
355
    fn can_be_used_in_a_hashmap_with_a_default_seed() {
356
        let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
357
        hash.insert(42, "the answer");
358
        assert_eq!(hash.get(&42), Some(&"the answer"));
359
    }
360
361
    #[test]
362
    fn can_be_used_in_a_hashmap_with_a_random_seed() {
363
        let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
364
        hash.insert(42, "the answer");
365
        assert_eq!(hash.get(&42), Some(&"the answer"));
366
    }
367
368
    #[cfg(feature = "serialize")]
369
    type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>;
370
371
    #[cfg(feature = "serialize")]
372
    #[test]
373
    fn test_serialization_cycle() -> TestResult {
374
        let mut hasher = XxHash64::with_seed(0);
375
        hasher.write(b"Hello, world!\0");
376
        hasher.finish();
377
378
        let serialized = serde_json::to_string(&hasher)?;
379
        let unserialized: XxHash64 = serde_json::from_str(&serialized)?;
380
        assert_eq!(hasher, unserialized);
381
        Ok(())
382
    }
383
384
    #[cfg(feature = "serialize")]
385
    #[test]
386
    fn test_serialization_stability() -> TestResult {
387
        let mut hasher = XxHash64::with_seed(0);
388
        hasher.write(b"Hello, world!\0");
389
        hasher.finish();
390
391
        let serialized = r#"{
392
            "total_len": 14,
393
            "seed": 0,
394
            "core": {
395
              "v1": 6983438078262162902,
396
              "v2": 14029467366897019727,
397
              "v3": 0,
398
              "v4": 7046029288634856825
399
            },
400
            "buffer": [
401
              72,  101, 108, 108, 111, 44, 32, 119,
402
              111, 114, 108, 100, 33,  0,  0,  0,
403
              0,   0,   0,   0,   0,   0,  0,  0,
404
              0,   0,   0,   0,   0,   0,  0,  0
405
            ],
406
            "buffer_usage": 14
407
        }"#;
408
409
        let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap();
410
        assert_eq!(hasher, unserialized);
411
        Ok(())
412
    }
413
}