/rust/registry/src/index.crates.io-6f17d22bba15001f/foldhash-0.1.5/src/seed.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // These constants may end up unused depending on platform support. |
2 | | #[allow(unused)] |
3 | | use crate::{ARBITRARY1, ARBITRARY9}; |
4 | | |
5 | | use super::{folded_multiply, ARBITRARY2, ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7}; |
6 | | |
7 | | /// Used for FixedState, and RandomState if atomics for dynamic init are unavailable. |
8 | | const FIXED_GLOBAL_SEED: SharedSeed = SharedSeed { |
9 | | seeds: [ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7], |
10 | | }; |
11 | | |
12 | 193k | pub(crate) fn gen_per_hasher_seed() -> u64 { |
13 | 193k | // We initialize the per-hasher seed with the stack pointer to ensure |
14 | 193k | // different threads have different seeds, with as side benefit that |
15 | 193k | // stack address randomization gives us further non-determinism. |
16 | 193k | let mut per_hasher_seed = 0; |
17 | 193k | let stack_ptr = core::ptr::addr_of!(per_hasher_seed) as u64; |
18 | 193k | per_hasher_seed = stack_ptr; |
19 | 193k | |
20 | 193k | // If we have the standard library available we use a thread-local |
21 | 193k | // state to ensure RandomStates are different with high probability, |
22 | 193k | // even if the call stack is the same. |
23 | 193k | #[cfg(feature = "std")] |
24 | 193k | { |
25 | 193k | use std::cell::Cell; |
26 | 193k | thread_local! { |
27 | 193k | static PER_HASHER_NONDETERMINISM: Cell<u64> = const { Cell::new(0) }; |
28 | 193k | } |
29 | 193k | |
30 | 193k | PER_HASHER_NONDETERMINISM.with(|cell| { |
31 | 193k | let nondeterminism = cell.get(); |
32 | 193k | per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism); |
33 | 193k | cell.set(per_hasher_seed); |
34 | 193k | }) |
35 | 193k | }; |
36 | 193k | |
37 | 193k | // If we don't have the standard library we instead use a global |
38 | 193k | // atomic instead of a thread-local state. |
39 | 193k | // |
40 | 193k | // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner, |
41 | 193k | // but this doesn't matter in practice - it is impossible that two |
42 | 193k | // different threads have the same stack location, so they'll almost |
43 | 193k | // surely generate different seeds, and provide a different possible |
44 | 193k | // update for PER_HASHER_NONDETERMINISM. If we would use a proper |
45 | 193k | // fetch_add atomic update then there is a larger chance of |
46 | 193k | // problematic contention. |
47 | 193k | // |
48 | 193k | // We use usize instead of 64-bit atomics for best platform support. |
49 | 193k | #[cfg(not(feature = "std"))] |
50 | 193k | { |
51 | 193k | use core::sync::atomic::{AtomicUsize, Ordering}; |
52 | 193k | static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0); |
53 | 193k | |
54 | 193k | let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64; |
55 | 193k | per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism); |
56 | 193k | PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed); |
57 | 193k | } |
58 | 193k | |
59 | 193k | // One extra mixing step to ensure good random bits. |
60 | 193k | folded_multiply(per_hasher_seed, ARBITRARY2) |
61 | 193k | } |
62 | | |
63 | | /// A random seed intended to be shared by many different foldhash instances. |
64 | | /// |
65 | | /// This seed is consumed by [`FoldHasher::with_seed`](crate::fast::FoldHasher::with_seed), |
66 | | /// and [`SeedableRandomState::with_seed`](crate::fast::SeedableRandomState::with_seed). |
67 | | #[derive(Clone, Debug)] |
68 | | pub struct SharedSeed { |
69 | | pub(crate) seeds: [u64; 4], |
70 | | } |
71 | | |
72 | | impl SharedSeed { |
73 | | /// Returns the globally shared randomly initialized [`SharedSeed`] as used |
74 | | /// by [`RandomState`](crate::fast::RandomState). |
75 | | #[inline(always)] |
76 | 0 | pub fn global_random() -> &'static SharedSeed { |
77 | 0 | global::GlobalSeed::new().get() |
78 | 0 | } |
79 | | |
80 | | /// Returns the globally shared fixed [`SharedSeed`] as used |
81 | | /// by [`FixedState`](crate::fast::FixedState). |
82 | | #[inline(always)] |
83 | 0 | pub const fn global_fixed() -> &'static SharedSeed { |
84 | 0 | &FIXED_GLOBAL_SEED |
85 | 0 | } |
86 | | |
87 | | /// Generates a new [`SharedSeed`] from a single 64-bit seed. |
88 | | /// |
89 | | /// Note that this is somewhat expensive so it is suggested to re-use the |
90 | | /// [`SharedSeed`] as much as possible, using the per-hasher seed to |
91 | | /// differentiate between hash instances. |
92 | 4 | pub const fn from_u64(seed: u64) -> Self { |
93 | | macro_rules! mix { |
94 | | ($x: expr) => { |
95 | | folded_multiply($x, ARBITRARY9) |
96 | | }; |
97 | | } |
98 | | |
99 | 4 | let seed_a = mix!(mix!(mix!(seed))); |
100 | 4 | let seed_b = mix!(mix!(mix!(seed_a))); |
101 | 4 | let seed_c = mix!(mix!(mix!(seed_b))); |
102 | 4 | let seed_d = mix!(mix!(mix!(seed_c))); |
103 | | |
104 | | // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be |
105 | | // a common input. So we want our global seeds that are XOR'ed with the |
106 | | // input to always be non-zero. To also ensure there is always a good spread |
107 | | // of bits, we give up 3 bits of entropy and simply force some bits on. |
108 | | const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1; |
109 | 4 | Self { |
110 | 4 | seeds: [ |
111 | 4 | seed_a | FORCED_ONES, |
112 | 4 | seed_b | FORCED_ONES, |
113 | 4 | seed_c | FORCED_ONES, |
114 | 4 | seed_d | FORCED_ONES, |
115 | 4 | ], |
116 | 4 | } |
117 | 4 | } |
118 | | } |
119 | | |
120 | | #[cfg(target_has_atomic = "8")] |
121 | | mod global { |
122 | | use super::*; |
123 | | use core::cell::UnsafeCell; |
124 | | use core::sync::atomic::{AtomicU8, Ordering}; |
125 | | |
126 | 4 | fn generate_global_seed() -> SharedSeed { |
127 | 12 | let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY9); |
128 | | |
129 | | // Use address space layout randomization as our main randomness source. |
130 | | // This isn't great, but we don't advertise HashDoS resistance in the first |
131 | | // place. This is a whole lot better than nothing, at near zero cost with |
132 | | // no dependencies. |
133 | 4 | let mut seed = 0; |
134 | 4 | let stack_ptr = &seed as *const _; |
135 | 4 | let func_ptr = generate_global_seed; |
136 | 4 | let static_ptr = &GLOBAL_SEED_STORAGE as *const _; |
137 | 4 | seed = mix(seed, stack_ptr as usize as u64); |
138 | 4 | seed = mix(seed, func_ptr as usize as u64); |
139 | 4 | seed = mix(seed, static_ptr as usize as u64); |
140 | 4 | |
141 | 4 | // If we have the standard library available, augment entropy with the |
142 | 4 | // current time and an address from the allocator. |
143 | 4 | #[cfg(feature = "std")] |
144 | 4 | { |
145 | 4 | #[cfg(not(any( |
146 | 4 | miri, |
147 | 4 | all(target_family = "wasm", target_os = "unknown"), |
148 | 4 | target_os = "zkvm" |
149 | 4 | )))] |
150 | 4 | if let Ok(duration) = std::time::UNIX_EPOCH.elapsed() { |
151 | 4 | seed = mix(seed, duration.subsec_nanos() as u64); |
152 | 4 | seed = mix(seed, duration.as_secs()); |
153 | 4 | } |
154 | 4 | |
155 | 4 | let box_ptr = &*Box::new(0u8) as *const _; |
156 | 4 | seed = mix(seed, box_ptr as usize as u64); |
157 | 4 | } |
158 | 4 | |
159 | 4 | SharedSeed::from_u64(seed) |
160 | 4 | } |
161 | | |
162 | | // Now all the below code purely exists to cache the above seed as |
163 | | // efficiently as possible. Even if we weren't a no_std crate and had access to |
164 | | // OnceLock, we don't want to check whether the global is set each time we |
165 | | // hash an object, so we hand-roll a global storage where type safety allows us |
166 | | // to assume the storage is initialized after construction. |
167 | | struct GlobalSeedStorage { |
168 | | state: AtomicU8, |
169 | | seed: UnsafeCell<SharedSeed>, |
170 | | } |
171 | | |
172 | | const UNINIT: u8 = 0; |
173 | | const LOCKED: u8 = 1; |
174 | | const INIT: u8 = 2; |
175 | | |
176 | | // SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive |
177 | | // LOCKED state, and only read the UnsafeCells when state is in the |
178 | | // once-achieved-eternally-preserved state INIT. |
179 | | unsafe impl Sync for GlobalSeedStorage {} |
180 | | |
181 | | static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage { |
182 | | state: AtomicU8::new(UNINIT), |
183 | | seed: UnsafeCell::new(SharedSeed { seeds: [0; 4] }), |
184 | | }; |
185 | | |
186 | | /// An object representing an initialized global seed. |
187 | | /// |
188 | | /// Does not actually store the seed inside itself, it is a zero-sized type. |
189 | | /// This prevents inflating the RandomState size and in turn HashMap's size. |
190 | | #[derive(Copy, Clone, Debug)] |
191 | | pub struct GlobalSeed { |
192 | | // So we can't accidentally type GlobalSeed { } within this crate. |
193 | | _no_accidental_unsafe_init: (), |
194 | | } |
195 | | |
196 | | impl GlobalSeed { |
197 | | #[inline(always)] |
198 | 193k | pub fn new() -> Self { |
199 | 193k | if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT { |
200 | 4 | Self::init_slow() |
201 | 193k | } |
202 | 193k | Self { |
203 | 193k | _no_accidental_unsafe_init: (), |
204 | 193k | } |
205 | 193k | } |
206 | | |
207 | | #[cold] |
208 | | #[inline(never)] |
209 | 4 | fn init_slow() { |
210 | 4 | // Generate seed outside of critical section. |
211 | 4 | let seed = generate_global_seed(); |
212 | | |
213 | | loop { |
214 | 4 | match GLOBAL_SEED_STORAGE.state.compare_exchange_weak( |
215 | 4 | UNINIT, |
216 | 4 | LOCKED, |
217 | 4 | Ordering::Acquire, |
218 | 4 | Ordering::Acquire, |
219 | 4 | ) { |
220 | | Ok(_) => unsafe { |
221 | | // SAFETY: we just acquired an exclusive lock. |
222 | 4 | *GLOBAL_SEED_STORAGE.seed.get() = seed; |
223 | 4 | GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release); |
224 | 4 | return; |
225 | | }, |
226 | | |
227 | 0 | Err(INIT) => return, |
228 | | |
229 | | // Yes, it's a spin loop. We need to support no_std (so no easy |
230 | | // access to proper locks), this is a one-time-per-program |
231 | | // initialization, and the critical section is only a few |
232 | | // store instructions, so it'll be fine. |
233 | 0 | _ => core::hint::spin_loop(), |
234 | | } |
235 | | } |
236 | 4 | } |
237 | | |
238 | | #[inline(always)] |
239 | 1.58M | pub fn get(self) -> &'static SharedSeed { |
240 | 1.58M | // SAFETY: our constructor ensured we are in the INIT state and thus |
241 | 1.58M | // this raw read does not race with any write. |
242 | 1.58M | unsafe { &*GLOBAL_SEED_STORAGE.seed.get() } |
243 | 1.58M | } |
244 | | } |
245 | | } |
246 | | |
247 | | #[cfg(not(target_has_atomic = "8"))] |
248 | | mod global { |
249 | | use super::*; |
250 | | |
251 | | #[derive(Copy, Clone, Debug)] |
252 | | pub struct GlobalSeed {} |
253 | | |
254 | | impl GlobalSeed { |
255 | | #[inline(always)] |
256 | | pub fn new() -> Self { |
257 | | Self {} |
258 | | } |
259 | | |
260 | | #[inline(always)] |
261 | | pub fn get(self) -> &'static SharedSeed { |
262 | | &super::FIXED_GLOBAL_SEED |
263 | | } |
264 | | } |
265 | | } |
266 | | |
267 | | pub(crate) use global::GlobalSeed; |