/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rustc-hash-2.1.1/src/lib.rs
Line | Count | Source |
1 | | //! A speedy, non-cryptographic hashing algorithm used by `rustc`. |
2 | | //! |
3 | | //! # Example |
4 | | //! |
5 | | //! ```rust |
6 | | //! # #[cfg(feature = "std")] |
7 | | //! # fn main() { |
8 | | //! use rustc_hash::FxHashMap; |
9 | | //! |
10 | | //! let mut map: FxHashMap<u32, u32> = FxHashMap::default(); |
11 | | //! map.insert(22, 44); |
12 | | //! # } |
13 | | //! # #[cfg(not(feature = "std"))] |
14 | | //! # fn main() { } |
15 | | //! ``` |
16 | | |
17 | | #![no_std] |
18 | | #![cfg_attr(feature = "nightly", feature(hasher_prefixfree_extras))] |
19 | | |
20 | | #[cfg(feature = "std")] |
21 | | extern crate std; |
22 | | |
23 | | #[cfg(feature = "rand")] |
24 | | extern crate rand; |
25 | | |
26 | | #[cfg(feature = "rand")] |
27 | | mod random_state; |
28 | | |
29 | | mod seeded_state; |
30 | | |
31 | | use core::default::Default; |
32 | | use core::hash::{BuildHasher, Hasher}; |
33 | | #[cfg(feature = "std")] |
34 | | use std::collections::{HashMap, HashSet}; |
35 | | |
36 | | /// Type alias for a hash map that uses the Fx hashing algorithm. |
37 | | #[cfg(feature = "std")] |
38 | | pub type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>; |
39 | | |
40 | | /// Type alias for a hash set that uses the Fx hashing algorithm. |
41 | | #[cfg(feature = "std")] |
42 | | pub type FxHashSet<V> = HashSet<V, FxBuildHasher>; |
43 | | |
44 | | #[cfg(feature = "rand")] |
45 | | pub use random_state::{FxHashMapRand, FxHashSetRand, FxRandomState}; |
46 | | |
47 | | pub use seeded_state::FxSeededState; |
48 | | #[cfg(feature = "std")] |
49 | | pub use seeded_state::{FxHashMapSeed, FxHashSetSeed}; |
50 | | |
51 | | /// A speedy hash algorithm for use within rustc. The hashmap in liballoc |
52 | | /// by default uses SipHash which isn't quite as speedy as we want. In the |
53 | | /// compiler we're not really worried about DOS attempts, so we use a fast |
54 | | /// non-cryptographic hash. |
55 | | /// |
56 | | /// The current implementation is a fast polynomial hash with a single |
57 | | /// bit rotation as a finishing step designed by Orson Peters. |
58 | | #[derive(Clone)] |
59 | | pub struct FxHasher { |
60 | | hash: usize, |
61 | | } |
62 | | |
63 | | // One might view a polynomial hash |
64 | | // m[0] * k + m[1] * k^2 + m[2] * k^3 + ... |
65 | | // as a multilinear hash with keystream k[..] |
66 | | // m[0] * k[0] + m[1] * k[1] + m[2] * k[2] + ... |
67 | | // where keystream k just happens to be generated using a multiplicative |
68 | | // congruential pseudorandom number generator (MCG). For that reason we chose a |
69 | | // constant that was found to be good for a MCG in: |
70 | | // "Computationally Easy, Spectrally Good Multipliers for Congruential |
71 | | // Pseudorandom Number Generators" by Guy Steele and Sebastiano Vigna. |
72 | | #[cfg(target_pointer_width = "64")] |
73 | | const K: usize = 0xf1357aea2e62a9c5; |
74 | | #[cfg(target_pointer_width = "32")] |
75 | | const K: usize = 0x93d765dd; |
76 | | |
77 | | impl FxHasher { |
78 | | /// Creates a `fx` hasher with a given seed. |
79 | | pub const fn with_seed(seed: usize) -> FxHasher { |
80 | | FxHasher { hash: seed } |
81 | | } |
82 | | |
83 | | /// Creates a default `fx` hasher. |
84 | 280M | pub const fn default() -> FxHasher { |
85 | 280M | FxHasher { hash: 0 } |
86 | 280M | } |
87 | | } |
88 | | |
89 | | impl Default for FxHasher { |
90 | | #[inline] |
91 | 98.1M | fn default() -> FxHasher { |
92 | 98.1M | Self::default() |
93 | 98.1M | } |
94 | | } |
95 | | |
96 | | impl FxHasher { |
97 | | #[inline] |
98 | 427M | fn add_to_hash(&mut self, i: usize) { |
99 | 427M | self.hash = self.hash.wrapping_add(i).wrapping_mul(K); |
100 | 427M | } |
101 | | } |
102 | | |
103 | | impl Hasher for FxHasher { |
104 | | #[inline] |
105 | | fn write(&mut self, bytes: &[u8]) { |
106 | | // Compress the byte string to a single u64 and add to our hash. |
107 | | self.write_u64(hash_bytes(bytes)); |
108 | | } |
109 | | |
110 | | #[inline] |
111 | 16.5M | fn write_u8(&mut self, i: u8) { |
112 | 16.5M | self.add_to_hash(i as usize); |
113 | 16.5M | } |
114 | | |
115 | | #[inline] |
116 | 35.5M | fn write_u16(&mut self, i: u16) { |
117 | 35.5M | self.add_to_hash(i as usize); |
118 | 35.5M | } |
119 | | |
120 | | #[inline] |
121 | 312M | fn write_u32(&mut self, i: u32) { |
122 | 312M | self.add_to_hash(i as usize); |
123 | 312M | } |
124 | | |
125 | | #[inline] |
126 | 5.28M | fn write_u64(&mut self, i: u64) { |
127 | 5.28M | self.add_to_hash(i as usize); |
128 | | #[cfg(target_pointer_width = "32")] |
129 | | self.add_to_hash((i >> 32) as usize); |
130 | 5.28M | } |
131 | | |
132 | | #[inline] |
133 | | fn write_u128(&mut self, i: u128) { |
134 | | self.add_to_hash(i as usize); |
135 | | #[cfg(target_pointer_width = "32")] |
136 | | self.add_to_hash((i >> 32) as usize); |
137 | | self.add_to_hash((i >> 64) as usize); |
138 | | #[cfg(target_pointer_width = "32")] |
139 | | self.add_to_hash((i >> 96) as usize); |
140 | | } |
141 | | |
142 | | #[inline] |
143 | 57.8M | fn write_usize(&mut self, i: usize) { |
144 | 57.8M | self.add_to_hash(i); |
145 | 57.8M | } |
146 | | |
147 | | #[cfg(feature = "nightly")] |
148 | | #[inline] |
149 | | fn write_length_prefix(&mut self, _len: usize) { |
150 | | // Most cases will specialize hash_slice to call write(), which encodes |
151 | | // the length already in a more efficient manner than we could here. For |
152 | | // HashDoS-resistance you would still need to include this for the |
153 | | // non-slice collection hashes, but for the purposes of rustc we do not |
154 | | // care and do not wish to pay the performance penalty of mixing in len |
155 | | // for those collections. |
156 | | } |
157 | | |
158 | | #[cfg(feature = "nightly")] |
159 | | #[inline] |
160 | | fn write_str(&mut self, s: &str) { |
161 | | // Similarly here, write already encodes the length, so nothing special |
162 | | // is needed. |
163 | | self.write(s.as_bytes()) |
164 | | } |
165 | | |
166 | | #[inline] |
167 | 280M | fn finish(&self) -> u64 { |
168 | | // Since we used a multiplicative hash our top bits have the most |
169 | | // entropy (with the top bit having the most, decreasing as you go). |
170 | | // As most hash table implementations (including hashbrown) compute |
171 | | // the bucket index from the bottom bits we want to move bits from the |
172 | | // top to the bottom. Ideally we'd rotate left by exactly the hash table |
173 | | // size, but as we don't know this we'll choose 26 bits, giving decent |
174 | | // entropy up until 2^26 table sizes. On 32-bit hosts we'll dial it |
175 | | // back down a bit to 15 bits. |
176 | | |
177 | | #[cfg(target_pointer_width = "64")] |
178 | | const ROTATE: u32 = 26; |
179 | | #[cfg(target_pointer_width = "32")] |
180 | | const ROTATE: u32 = 15; |
181 | | |
182 | 280M | self.hash.rotate_left(ROTATE) as u64 |
183 | | |
184 | | // A bit reversal would be even better, except hashbrown also expects |
185 | | // good entropy in the top 7 bits and a bit reverse would fill those |
186 | | // bits with low entropy. More importantly, bit reversals are very slow |
187 | | // on x86-64. A byte reversal is relatively fast, but still has a 2 |
188 | | // cycle latency on x86-64 compared to the 1 cycle latency of a rotate. |
189 | | // It also suffers from the hashbrown-top-7-bit-issue. |
190 | 280M | } |
191 | | } |
192 | | |
193 | | // Nothing special, digits of pi. |
194 | | const SEED1: u64 = 0x243f6a8885a308d3; |
195 | | const SEED2: u64 = 0x13198a2e03707344; |
196 | | const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = 0xa4093822299f31d0; |
197 | | |
198 | | #[inline] |
199 | | fn multiply_mix(x: u64, y: u64) -> u64 { |
200 | | #[cfg(target_pointer_width = "64")] |
201 | | { |
202 | | // We compute the full u64 x u64 -> u128 product, this is a single mul |
203 | | // instruction on x86-64, one mul plus one mulhi on ARM64. |
204 | | let full = (x as u128) * (y as u128); |
205 | | let lo = full as u64; |
206 | | let hi = (full >> 64) as u64; |
207 | | |
208 | | // The middle bits of the full product fluctuate the most with small |
209 | | // changes in the input. This is the top bits of lo and the bottom bits |
210 | | // of hi. We can thus make the entire output fluctuate with small |
211 | | // changes to the input by XOR'ing these two halves. |
212 | | lo ^ hi |
213 | | |
214 | | // Unfortunately both 2^64 + 1 and 2^64 - 1 have small prime factors, |
215 | | // otherwise combining with + or - could result in a really strong hash, as: |
216 | | // x * y = 2^64 * hi + lo = (-1) * hi + lo = lo - hi, (mod 2^64 + 1) |
217 | | // x * y = 2^64 * hi + lo = 1 * hi + lo = lo + hi, (mod 2^64 - 1) |
218 | | // Multiplicative hashing is universal in a field (like mod p). |
219 | | } |
220 | | |
221 | | #[cfg(target_pointer_width = "32")] |
222 | | { |
223 | | // u64 x u64 -> u128 product is prohibitively expensive on 32-bit. |
224 | | // Decompose into 32-bit parts. |
225 | | let lx = x as u32; |
226 | | let ly = y as u32; |
227 | | let hx = (x >> 32) as u32; |
228 | | let hy = (y >> 32) as u32; |
229 | | |
230 | | // u32 x u32 -> u64 the low bits of one with the high bits of the other. |
231 | | let afull = (lx as u64) * (hy as u64); |
232 | | let bfull = (hx as u64) * (ly as u64); |
233 | | |
234 | | // Combine, swapping low/high of one of them so the upper bits of the |
235 | | // product of one combine with the lower bits of the other. |
236 | | afull ^ bfull.rotate_right(32) |
237 | | } |
238 | | } |
239 | | |
240 | | /// A wyhash-inspired non-collision-resistant hash for strings/slices designed |
241 | | /// by Orson Peters, with a focus on small strings and small codesize. |
242 | | /// |
243 | | /// The 64-bit version of this hash passes the SMHasher3 test suite on the full |
244 | | /// 64-bit output, that is, f(hash_bytes(b) ^ f(seed)) for some good avalanching |
245 | | /// permutation f() passed all tests with zero failures. When using the 32-bit |
246 | | /// version of multiply_mix this hash has a few non-catastrophic failures where |
247 | | /// there are a handful more collisions than an optimal hash would give. |
248 | | /// |
249 | | /// We don't bother avalanching here as we'll feed this hash into a |
250 | | /// multiplication after which we take the high bits, which avalanches for us. |
251 | | #[inline] |
252 | | fn hash_bytes(bytes: &[u8]) -> u64 { |
253 | | let len = bytes.len(); |
254 | | let mut s0 = SEED1; |
255 | | let mut s1 = SEED2; |
256 | | |
257 | | if len <= 16 { |
258 | | // XOR the input into s0, s1. |
259 | | if len >= 8 { |
260 | | s0 ^= u64::from_le_bytes(bytes[0..8].try_into().unwrap()); |
261 | | s1 ^= u64::from_le_bytes(bytes[len - 8..].try_into().unwrap()); |
262 | | } else if len >= 4 { |
263 | | s0 ^= u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as u64; |
264 | | s1 ^= u32::from_le_bytes(bytes[len - 4..].try_into().unwrap()) as u64; |
265 | | } else if len > 0 { |
266 | | let lo = bytes[0]; |
267 | | let mid = bytes[len / 2]; |
268 | | let hi = bytes[len - 1]; |
269 | | s0 ^= lo as u64; |
270 | | s1 ^= ((hi as u64) << 8) | mid as u64; |
271 | | } |
272 | | } else { |
273 | | // Handle bulk (can partially overlap with suffix). |
274 | | let mut off = 0; |
275 | | while off < len - 16 { |
276 | | let x = u64::from_le_bytes(bytes[off..off + 8].try_into().unwrap()); |
277 | | let y = u64::from_le_bytes(bytes[off + 8..off + 16].try_into().unwrap()); |
278 | | |
279 | | // Replace s1 with a mix of s0, x, and y, and s0 with s1. |
280 | | // This ensures the compiler can unroll this loop into two |
281 | | // independent streams, one operating on s0, the other on s1. |
282 | | // |
283 | | // Since zeroes are a common input we prevent an immediate trivial |
284 | | // collapse of the hash function by XOR'ing a constant with y. |
285 | | let t = multiply_mix(s0 ^ x, PREVENT_TRIVIAL_ZERO_COLLAPSE ^ y); |
286 | | s0 = s1; |
287 | | s1 = t; |
288 | | off += 16; |
289 | | } |
290 | | |
291 | | let suffix = &bytes[len - 16..]; |
292 | | s0 ^= u64::from_le_bytes(suffix[0..8].try_into().unwrap()); |
293 | | s1 ^= u64::from_le_bytes(suffix[8..16].try_into().unwrap()); |
294 | | } |
295 | | |
296 | | multiply_mix(s0, s1) ^ (len as u64) |
297 | | } |
298 | | |
299 | | /// An implementation of [`BuildHasher`] that produces [`FxHasher`]s. |
300 | | /// |
301 | | /// ``` |
302 | | /// use std::hash::BuildHasher; |
303 | | /// use rustc_hash::FxBuildHasher; |
304 | | /// assert_ne!(FxBuildHasher.hash_one(1), FxBuildHasher.hash_one(2)); |
305 | | /// ``` |
306 | | #[derive(Copy, Clone, Default)] |
307 | | pub struct FxBuildHasher; |
308 | | |
309 | | impl BuildHasher for FxBuildHasher { |
310 | | type Hasher = FxHasher; |
311 | 145M | fn build_hasher(&self) -> FxHasher { |
312 | 145M | FxHasher::default() |
313 | 145M | } |
314 | | } |
315 | | |
316 | | #[cfg(test)] |
317 | | mod tests { |
318 | | #[cfg(not(any(target_pointer_width = "64", target_pointer_width = "32")))] |
319 | | compile_error!("The test suite only supports 64 bit and 32 bit usize"); |
320 | | |
321 | | use crate::{FxBuildHasher, FxHasher}; |
322 | | use core::hash::{BuildHasher, Hash, Hasher}; |
323 | | |
324 | | macro_rules! test_hash { |
325 | | ( |
326 | | $( |
327 | | hash($value:expr) == $result:expr, |
328 | | )* |
329 | | ) => { |
330 | | $( |
331 | | assert_eq!(FxBuildHasher.hash_one($value), $result); |
332 | | )* |
333 | | }; |
334 | | } |
335 | | |
336 | | const B32: bool = cfg!(target_pointer_width = "32"); |
337 | | |
338 | | #[test] |
339 | | fn unsigned() { |
340 | | test_hash! { |
341 | | hash(0_u8) == 0, |
342 | | hash(1_u8) == if B32 { 3001993707 } else { 12157901119326311915 }, |
343 | | hash(100_u8) == if B32 { 3844759569 } else { 16751747135202103309 }, |
344 | | hash(u8::MAX) == if B32 { 999399879 } else { 1211781028898739645 }, |
345 | | |
346 | | hash(0_u16) == 0, |
347 | | hash(1_u16) == if B32 { 3001993707 } else { 12157901119326311915 }, |
348 | | hash(100_u16) == if B32 { 3844759569 } else { 16751747135202103309 }, |
349 | | hash(u16::MAX) == if B32 { 3440503042 } else { 16279819243059860173 }, |
350 | | |
351 | | hash(0_u32) == 0, |
352 | | hash(1_u32) == if B32 { 3001993707 } else { 12157901119326311915 }, |
353 | | hash(100_u32) == if B32 { 3844759569 } else { 16751747135202103309 }, |
354 | | hash(u32::MAX) == if B32 { 1293006356 } else { 7729994835221066939 }, |
355 | | |
356 | | hash(0_u64) == 0, |
357 | | hash(1_u64) == if B32 { 275023839 } else { 12157901119326311915 }, |
358 | | hash(100_u64) == if B32 { 1732383522 } else { 16751747135202103309 }, |
359 | | hash(u64::MAX) == if B32 { 1017982517 } else { 6288842954450348564 }, |
360 | | |
361 | | hash(0_u128) == 0, |
362 | | hash(1_u128) == if B32 { 1860738631 } else { 13032756267696824044 }, |
363 | | hash(100_u128) == if B32 { 1389515751 } else { 12003541609544029302 }, |
364 | | hash(u128::MAX) == if B32 { 2156022013 } else { 11702830760530184999 }, |
365 | | |
366 | | hash(0_usize) == 0, |
367 | | hash(1_usize) == if B32 { 3001993707 } else { 12157901119326311915 }, |
368 | | hash(100_usize) == if B32 { 3844759569 } else { 16751747135202103309 }, |
369 | | hash(usize::MAX) == if B32 { 1293006356 } else { 6288842954450348564 }, |
370 | | } |
371 | | } |
372 | | |
373 | | #[test] |
374 | | fn signed() { |
375 | | test_hash! { |
376 | | hash(i8::MIN) == if B32 { 2000713177 } else { 6684841074112525780 }, |
377 | | hash(0_i8) == 0, |
378 | | hash(1_i8) == if B32 { 3001993707 } else { 12157901119326311915 }, |
379 | | hash(100_i8) == if B32 { 3844759569 } else { 16751747135202103309 }, |
380 | | hash(i8::MAX) == if B32 { 3293686765 } else { 12973684028562874344 }, |
381 | | |
382 | | hash(i16::MIN) == if B32 { 1073764727 } else { 14218860181193086044 }, |
383 | | hash(0_i16) == 0, |
384 | | hash(1_i16) == if B32 { 3001993707 } else { 12157901119326311915 }, |
385 | | hash(100_i16) == if B32 { 3844759569 } else { 16751747135202103309 }, |
386 | | hash(i16::MAX) == if B32 { 2366738315 } else { 2060959061933882993 }, |
387 | | |
388 | | hash(i32::MIN) == if B32 { 16384 } else { 9943947977240134995 }, |
389 | | hash(0_i32) == 0, |
390 | | hash(1_i32) == if B32 { 3001993707 } else { 12157901119326311915 }, |
391 | | hash(100_i32) == if B32 { 3844759569 } else { 16751747135202103309 }, |
392 | | hash(i32::MAX) == if B32 { 1293022740 } else { 16232790931690483559 }, |
393 | | |
394 | | hash(i64::MIN) == if B32 { 16384 } else { 33554432 }, |
395 | | hash(0_i64) == 0, |
396 | | hash(1_i64) == if B32 { 275023839 } else { 12157901119326311915 }, |
397 | | hash(100_i64) == if B32 { 1732383522 } else { 16751747135202103309 }, |
398 | | hash(i64::MAX) == if B32 { 1017998901 } else { 6288842954483902996 }, |
399 | | |
400 | | hash(i128::MIN) == if B32 { 16384 } else { 33554432 }, |
401 | | hash(0_i128) == 0, |
402 | | hash(1_i128) == if B32 { 1860738631 } else { 13032756267696824044 }, |
403 | | hash(100_i128) == if B32 { 1389515751 } else { 12003541609544029302 }, |
404 | | hash(i128::MAX) == if B32 { 2156005629 } else { 11702830760496630567 }, |
405 | | |
406 | | hash(isize::MIN) == if B32 { 16384 } else { 33554432 }, |
407 | | hash(0_isize) == 0, |
408 | | hash(1_isize) == if B32 { 3001993707 } else { 12157901119326311915 }, |
409 | | hash(100_isize) == if B32 { 3844759569 } else { 16751747135202103309 }, |
410 | | hash(isize::MAX) == if B32 { 1293022740 } else { 6288842954483902996 }, |
411 | | } |
412 | | } |
413 | | |
414 | | // Avoid relying on any `Hash` implementations in the standard library. |
415 | | struct HashBytes(&'static [u8]); |
416 | | impl Hash for HashBytes { |
417 | | fn hash<H: core::hash::Hasher>(&self, state: &mut H) { |
418 | | state.write(self.0); |
419 | | } |
420 | | } |
421 | | |
422 | | #[test] |
423 | | fn bytes() { |
424 | | test_hash! { |
425 | | hash(HashBytes(&[])) == if B32 { 2673204745 } else { 17606491139363777937 }, |
426 | | hash(HashBytes(&[0])) == if B32 { 2948228584 } else { 5448590020104574886 }, |
427 | | hash(HashBytes(&[0, 0, 0, 0, 0, 0])) == if B32 { 3223252423 } else { 16766921560080789783 }, |
428 | | hash(HashBytes(&[1])) == if B32 { 2943445104 } else { 5922447956811044110 }, |
429 | | hash(HashBytes(&[2])) == if B32 { 1055423297 } else { 5229781508510959783 }, |
430 | | hash(HashBytes(b"uwu")) == if B32 { 2699662140 } else { 7168164714682931527 }, |
431 | | hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { 2303640537 } else { 2349210501944688211 }, |
432 | | } |
433 | | } |
434 | | |
435 | | #[test] |
436 | | fn with_seed_actually_different() { |
437 | | let seeds = [ |
438 | | [1, 2], |
439 | | [42, 17], |
440 | | [124436707, 99237], |
441 | | [usize::MIN, usize::MAX], |
442 | | ]; |
443 | | |
444 | | for [a_seed, b_seed] in seeds { |
445 | | let a = || FxHasher::with_seed(a_seed); |
446 | | let b = || FxHasher::with_seed(b_seed); |
447 | | |
448 | | for x in u8::MIN..=u8::MAX { |
449 | | let mut a = a(); |
450 | | let mut b = b(); |
451 | | |
452 | | x.hash(&mut a); |
453 | | x.hash(&mut b); |
454 | | |
455 | | assert_ne!(a.finish(), b.finish()) |
456 | | } |
457 | | } |
458 | | } |
459 | | } |