/rust/registry/src/index.crates.io-1949cf8c6b5b557f/foldhash-0.1.5/src/fast.rs
Line | Count | Source |
1 | | //! The foldhash implementation optimized for speed. |
2 | | |
3 | | use core::hash::{BuildHasher, Hasher}; |
4 | | |
5 | | use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed}; |
6 | | use crate::{folded_multiply, hash_bytes_long, hash_bytes_medium, rotate_right, ARBITRARY3}; |
7 | | |
8 | | /// A [`Hasher`] instance implementing foldhash, optimized for speed. |
9 | | /// |
10 | | /// While you can create one directly with [`FoldHasher::with_seed`], you |
11 | | /// most likely want to use [`RandomState`], [`SeedableRandomState`] or |
12 | | /// [`FixedState`] to create [`FoldHasher`]s. |
13 | | #[derive(Clone)] |
14 | | pub struct FoldHasher { |
15 | | accumulator: u64, |
16 | | sponge: u128, |
17 | | sponge_len: u8, |
18 | | fold_seed: u64, |
19 | | expand_seed: u64, |
20 | | expand_seed2: u64, |
21 | | expand_seed3: u64, |
22 | | } |
23 | | |
24 | | impl FoldHasher { |
25 | | /// Initializes this [`FoldHasher`] with the given per-hasher seed and |
26 | | /// [`SharedSeed`]. |
27 | | #[inline] |
28 | 1.21M | pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher { |
29 | 1.21M | FoldHasher { |
30 | 1.21M | accumulator: per_hasher_seed, |
31 | 1.21M | sponge: 0, |
32 | 1.21M | sponge_len: 0, |
33 | 1.21M | fold_seed: shared_seed.seeds[0], |
34 | 1.21M | expand_seed: shared_seed.seeds[1], |
35 | 1.21M | expand_seed2: shared_seed.seeds[2], |
36 | 1.21M | expand_seed3: shared_seed.seeds[3], |
37 | 1.21M | } |
38 | 1.21M | } |
39 | | |
40 | | #[inline(always)] |
41 | 3.73M | fn write_num<T: Into<u128>>(&mut self, x: T) { |
42 | 3.73M | let bits: usize = 8 * core::mem::size_of::<T>(); |
43 | 3.73M | if self.sponge_len as usize + bits > 128 { |
44 | 821k | let lo = self.sponge as u64; |
45 | 821k | let hi = (self.sponge >> 64) as u64; |
46 | 821k | self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); |
47 | 821k | self.sponge = x.into(); |
48 | 821k | self.sponge_len = bits as u8; |
49 | 2.91M | } else { |
50 | 2.91M | self.sponge |= x.into() << self.sponge_len; |
51 | 2.91M | self.sponge_len += bits as u8; |
52 | 2.91M | } |
53 | 3.73M | } <foldhash::fast::FoldHasher>::write_num::<u32> Line | Count | Source | 41 | 1.64M | fn write_num<T: Into<u128>>(&mut self, x: T) { | 42 | 1.64M | let bits: usize = 8 * core::mem::size_of::<T>(); | 43 | 1.64M | if self.sponge_len as usize + bits > 128 { | 44 | 0 | let lo = self.sponge as u64; | 45 | 0 | let hi = (self.sponge >> 64) as u64; | 46 | 0 | self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); | 47 | 0 | self.sponge = x.into(); | 48 | 0 | self.sponge_len = bits as u8; | 49 | 1.64M | } else { | 50 | 1.64M | self.sponge |= x.into() << self.sponge_len; | 51 | 1.64M | self.sponge_len += bits as u8; | 52 | 1.64M | } | 53 | 1.64M | } |
<foldhash::fast::FoldHasher>::write_num::<u64> Line | Count | Source | 41 | 2.09M | fn write_num<T: Into<u128>>(&mut self, x: T) { | 42 | 2.09M | let bits: usize = 8 * core::mem::size_of::<T>(); | 43 | 2.09M | if self.sponge_len as usize + bits > 128 { | 44 | 821k | let lo = self.sponge as u64; | 45 | 821k | let hi = (self.sponge >> 64) as u64; | 46 | 821k | self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); | 47 | 821k | self.sponge = x.into(); | 48 | 821k | self.sponge_len = bits as u8; | 49 | 1.26M | } else { | 50 | 1.26M | self.sponge |= x.into() << self.sponge_len; | 51 | 1.26M | self.sponge_len += bits as u8; | 52 | 1.26M | } | 53 | 2.09M | } |
Unexecuted instantiation: <foldhash::fast::FoldHasher>::write_num::<_> |
54 | | } |
55 | | |
56 | | impl Hasher for FoldHasher { |
57 | | #[inline(always)] |
58 | 447k | fn write(&mut self, bytes: &[u8]) { |
59 | | // We perform overlapping reads in the byte hash which could lead to |
60 | | // trivial length-extension attacks. These should be defeated by |
61 | | // adding a length-dependent rotation on our unpredictable seed |
62 | | // which costs only a single cycle (or none if executed with |
63 | | // instruction-level parallelism). |
64 | 447k | let len = bytes.len(); |
65 | 447k | let base_seed = rotate_right(self.accumulator, len as u32); |
66 | 447k | if len <= 16 { |
67 | 446k | let mut s0 = base_seed; |
68 | 446k | let mut s1 = self.expand_seed; |
69 | | // XOR the input into s0, s1, then multiply and fold. |
70 | 446k | if len >= 8 { |
71 | 227k | s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap()); |
72 | 227k | s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap()); |
73 | 227k | } else if len >= 4 { |
74 | 177k | s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64; |
75 | 177k | s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64; |
76 | 177k | } else if len > 0 { |
77 | 10.9k | let lo = bytes[0]; |
78 | 10.9k | let mid = bytes[len / 2]; |
79 | 10.9k | let hi = bytes[len - 1]; |
80 | 10.9k | s0 ^= lo as u64; |
81 | 10.9k | s1 ^= ((hi as u64) << 8) | mid as u64; |
82 | 30.9k | } |
83 | 446k | self.accumulator = folded_multiply(s0, s1); |
84 | 454 | } else if len < 256 { |
85 | 454 | self.accumulator = hash_bytes_medium( |
86 | 454 | bytes, |
87 | 454 | base_seed, |
88 | 454 | base_seed.wrapping_add(self.expand_seed), |
89 | 454 | self.fold_seed, |
90 | 454 | ); |
91 | 454 | } else { |
92 | 0 | self.accumulator = hash_bytes_long( |
93 | 0 | bytes, |
94 | 0 | base_seed, |
95 | 0 | base_seed.wrapping_add(self.expand_seed), |
96 | 0 | base_seed.wrapping_add(self.expand_seed2), |
97 | 0 | base_seed.wrapping_add(self.expand_seed3), |
98 | 0 | self.fold_seed, |
99 | 0 | ); |
100 | 0 | } |
101 | 447k | } |
102 | | |
103 | | #[inline(always)] |
104 | 0 | fn write_u8(&mut self, i: u8) { |
105 | 0 | self.write_num(i); |
106 | 0 | } |
107 | | |
108 | | #[inline(always)] |
109 | 0 | fn write_u16(&mut self, i: u16) { |
110 | 0 | self.write_num(i); |
111 | 0 | } |
112 | | |
113 | | #[inline(always)] |
114 | 1.64M | fn write_u32(&mut self, i: u32) { |
115 | 1.64M | self.write_num(i); |
116 | 1.64M | } |
117 | | |
118 | | #[inline(always)] |
119 | 1.64M | fn write_u64(&mut self, i: u64) { |
120 | 1.64M | self.write_num(i); |
121 | 1.64M | } |
122 | | |
123 | | #[inline(always)] |
124 | 0 | fn write_u128(&mut self, i: u128) { |
125 | 0 | let lo = i as u64; |
126 | 0 | let hi = (i >> 64) as u64; |
127 | 0 | self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); |
128 | 0 | } |
129 | | |
130 | | #[inline(always)] |
131 | 447k | fn write_usize(&mut self, i: usize) { |
132 | | // u128 doesn't implement From<usize>. |
133 | | #[cfg(target_pointer_width = "32")] |
134 | | self.write_num(i as u32); |
135 | | #[cfg(target_pointer_width = "64")] |
136 | 447k | self.write_num(i as u64); |
137 | 447k | } |
138 | | |
139 | | #[inline(always)] |
140 | 1.21M | fn finish(&self) -> u64 { |
141 | 1.21M | if self.sponge_len > 0 { |
142 | 1.21M | let lo = self.sponge as u64; |
143 | 1.21M | let hi = (self.sponge >> 64) as u64; |
144 | 1.21M | folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed) |
145 | | } else { |
146 | 0 | self.accumulator |
147 | | } |
148 | 1.21M | } |
149 | | } |
150 | | |
151 | | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly initialized. |
152 | | #[derive(Copy, Clone, Debug)] |
153 | | pub struct RandomState { |
154 | | per_hasher_seed: u64, |
155 | | global_seed: GlobalSeed, |
156 | | } |
157 | | |
158 | | impl Default for RandomState { |
159 | | #[inline(always)] |
160 | 179k | fn default() -> Self { |
161 | 179k | Self { |
162 | 179k | per_hasher_seed: gen_per_hasher_seed(), |
163 | 179k | global_seed: GlobalSeed::new(), |
164 | 179k | } |
165 | 179k | } |
166 | | } |
167 | | |
168 | | impl BuildHasher for RandomState { |
169 | | type Hasher = FoldHasher; |
170 | | |
171 | | #[inline(always)] |
172 | 1.21M | fn build_hasher(&self) -> FoldHasher { |
173 | 1.21M | FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get()) |
174 | 1.21M | } |
175 | | } |
176 | | |
177 | | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly |
178 | | /// initialized by default, but can also be initialized with a specific seed. |
179 | | /// |
180 | | /// This can be useful for e.g. testing, but the downside is that this type |
181 | | /// has a size of 16 bytes rather than the 8 bytes [`RandomState`] is. |
182 | | #[derive(Copy, Clone, Debug)] |
183 | | pub struct SeedableRandomState { |
184 | | per_hasher_seed: u64, |
185 | | shared_seed: &'static SharedSeed, |
186 | | } |
187 | | |
188 | | impl Default for SeedableRandomState { |
189 | | #[inline(always)] |
190 | 0 | fn default() -> Self { |
191 | 0 | Self::random() |
192 | 0 | } |
193 | | } |
194 | | |
195 | | impl SeedableRandomState { |
196 | | /// Generates a random [`SeedableRandomState`], similar to [`RandomState`]. |
197 | | #[inline(always)] |
198 | 0 | pub fn random() -> Self { |
199 | 0 | Self { |
200 | 0 | per_hasher_seed: gen_per_hasher_seed(), |
201 | 0 | shared_seed: SharedSeed::global_random(), |
202 | 0 | } |
203 | 0 | } |
204 | | |
205 | | /// Generates a fixed [`SeedableRandomState`], similar to [`FixedState`]. |
206 | | #[inline(always)] |
207 | 0 | pub fn fixed() -> Self { |
208 | 0 | Self { |
209 | 0 | per_hasher_seed: ARBITRARY3, |
210 | 0 | shared_seed: SharedSeed::global_fixed(), |
211 | 0 | } |
212 | 0 | } |
213 | | |
214 | | /// Generates a [`SeedableRandomState`] with the given per-hasher seed |
215 | | /// and [`SharedSeed`]. |
216 | | #[inline(always)] |
217 | 0 | pub fn with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self { |
218 | | // XOR with ARBITRARY3 such that with_seed(0) matches default. |
219 | 0 | Self { |
220 | 0 | per_hasher_seed: per_hasher_seed ^ ARBITRARY3, |
221 | 0 | shared_seed, |
222 | 0 | } |
223 | 0 | } |
224 | | } |
225 | | |
226 | | impl BuildHasher for SeedableRandomState { |
227 | | type Hasher = FoldHasher; |
228 | | |
229 | | #[inline(always)] |
230 | 0 | fn build_hasher(&self) -> FoldHasher { |
231 | 0 | FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed) |
232 | 0 | } |
233 | | } |
234 | | |
235 | | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that always has the same fixed seed. |
236 | | /// |
237 | | /// Not recommended unless you absolutely need determinism. |
238 | | #[derive(Copy, Clone, Debug)] |
239 | | pub struct FixedState { |
240 | | per_hasher_seed: u64, |
241 | | } |
242 | | |
243 | | impl FixedState { |
244 | | /// Creates a [`FixedState`] with the given per-hasher-seed. |
245 | | #[inline(always)] |
246 | 0 | pub const fn with_seed(per_hasher_seed: u64) -> Self { |
247 | | // XOR with ARBITRARY3 such that with_seed(0) matches default. |
248 | 0 | Self { |
249 | 0 | per_hasher_seed: per_hasher_seed ^ ARBITRARY3, |
250 | 0 | } |
251 | 0 | } |
252 | | } |
253 | | |
254 | | impl Default for FixedState { |
255 | | #[inline(always)] |
256 | 0 | fn default() -> Self { |
257 | 0 | Self { |
258 | 0 | per_hasher_seed: ARBITRARY3, |
259 | 0 | } |
260 | 0 | } |
261 | | } |
262 | | |
263 | | impl BuildHasher for FixedState { |
264 | | type Hasher = FoldHasher; |
265 | | |
266 | | #[inline(always)] |
267 | 0 | fn build_hasher(&self) -> FoldHasher { |
268 | 0 | FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed()) |
269 | 0 | } |
270 | | } |