Coverage Report

Created: 2025-07-23 07:29

/rust/registry/src/index.crates.io-6f17d22bba15001f/foldhash-0.1.5/src/seed.rs
Line
Count
Source (jump to first uncovered line)
1
// These constants may end up unused depending on platform support.
2
#[allow(unused)]
3
use crate::{ARBITRARY1, ARBITRARY9};
4
5
use super::{folded_multiply, ARBITRARY2, ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7};
6
7
/// Used for FixedState, and RandomState if atomics for dynamic init are unavailable.
8
const FIXED_GLOBAL_SEED: SharedSeed = SharedSeed {
9
    seeds: [ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7],
10
};
11
12
193k
pub(crate) fn gen_per_hasher_seed() -> u64 {
13
193k
    // We initialize the per-hasher seed with the stack pointer to ensure
14
193k
    // different threads have different seeds, with as side benefit that
15
193k
    // stack address randomization gives us further non-determinism.
16
193k
    let mut per_hasher_seed = 0;
17
193k
    let stack_ptr = core::ptr::addr_of!(per_hasher_seed) as u64;
18
193k
    per_hasher_seed = stack_ptr;
19
193k
20
193k
    // If we have the standard library available we use a thread-local
21
193k
    // state to ensure RandomStates are different with high probability,
22
193k
    // even if the call stack is the same.
23
193k
    #[cfg(feature = "std")]
24
193k
    {
25
193k
        use std::cell::Cell;
26
193k
        thread_local! {
27
193k
            static PER_HASHER_NONDETERMINISM: Cell<u64> = const { Cell::new(0) };
28
193k
        }
29
193k
30
193k
        PER_HASHER_NONDETERMINISM.with(|cell| {
31
193k
            let nondeterminism = cell.get();
32
193k
            per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
33
193k
            cell.set(per_hasher_seed);
34
193k
        })
35
193k
    };
36
193k
37
193k
    // If we don't have the standard library we instead use a global
38
193k
    // atomic instead of a thread-local state.
39
193k
    //
40
193k
    // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner,
41
193k
    // but this doesn't matter in practice - it is impossible that two
42
193k
    // different threads have the same stack location, so they'll almost
43
193k
    // surely generate different seeds, and provide a different possible
44
193k
    // update for PER_HASHER_NONDETERMINISM. If we would use a proper
45
193k
    // fetch_add atomic update then there is a larger chance of
46
193k
    // problematic contention.
47
193k
    //
48
193k
    // We use usize instead of 64-bit atomics for best platform support.
49
193k
    #[cfg(not(feature = "std"))]
50
193k
    {
51
193k
        use core::sync::atomic::{AtomicUsize, Ordering};
52
193k
        static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0);
53
193k
54
193k
        let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64;
55
193k
        per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
56
193k
        PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed);
57
193k
    }
58
193k
59
193k
    // One extra mixing step to ensure good random bits.
60
193k
    folded_multiply(per_hasher_seed, ARBITRARY2)
61
193k
}
62
63
/// A random seed intended to be shared by many different foldhash instances.
64
///
65
/// This seed is consumed by [`FoldHasher::with_seed`](crate::fast::FoldHasher::with_seed),
66
/// and [`SeedableRandomState::with_seed`](crate::fast::SeedableRandomState::with_seed).
67
#[derive(Clone, Debug)]
68
pub struct SharedSeed {
69
    pub(crate) seeds: [u64; 4],
70
}
71
72
impl SharedSeed {
73
    /// Returns the globally shared randomly initialized [`SharedSeed`] as used
74
    /// by [`RandomState`](crate::fast::RandomState).
75
    #[inline(always)]
76
0
    pub fn global_random() -> &'static SharedSeed {
77
0
        global::GlobalSeed::new().get()
78
0
    }
79
80
    /// Returns the globally shared fixed [`SharedSeed`] as used
81
    /// by [`FixedState`](crate::fast::FixedState).
82
    #[inline(always)]
83
0
    pub const fn global_fixed() -> &'static SharedSeed {
84
0
        &FIXED_GLOBAL_SEED
85
0
    }
86
87
    /// Generates a new [`SharedSeed`] from a single 64-bit seed.
88
    ///
89
    /// Note that this is somewhat expensive so it is suggested to re-use the
90
    /// [`SharedSeed`] as much as possible, using the per-hasher seed to
91
    /// differentiate between hash instances.
92
4
    pub const fn from_u64(seed: u64) -> Self {
93
        macro_rules! mix {
94
            ($x: expr) => {
95
                folded_multiply($x, ARBITRARY9)
96
            };
97
        }
98
99
4
        let seed_a = mix!(mix!(mix!(seed)));
100
4
        let seed_b = mix!(mix!(mix!(seed_a)));
101
4
        let seed_c = mix!(mix!(mix!(seed_b)));
102
4
        let seed_d = mix!(mix!(mix!(seed_c)));
103
104
        // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be
105
        // a common input. So we want our global seeds that are XOR'ed with the
106
        // input to always be non-zero. To also ensure there is always a good spread
107
        // of bits, we give up 3 bits of entropy and simply force some bits on.
108
        const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1;
109
4
        Self {
110
4
            seeds: [
111
4
                seed_a | FORCED_ONES,
112
4
                seed_b | FORCED_ONES,
113
4
                seed_c | FORCED_ONES,
114
4
                seed_d | FORCED_ONES,
115
4
            ],
116
4
        }
117
4
    }
118
}
119
120
#[cfg(target_has_atomic = "8")]
121
mod global {
122
    use super::*;
123
    use core::cell::UnsafeCell;
124
    use core::sync::atomic::{AtomicU8, Ordering};
125
126
4
    fn generate_global_seed() -> SharedSeed {
127
12
        let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY9);
128
129
        // Use address space layout randomization as our main randomness source.
130
        // This isn't great, but we don't advertise HashDoS resistance in the first
131
        // place. This is a whole lot better than nothing, at near zero cost with
132
        // no dependencies.
133
4
        let mut seed = 0;
134
4
        let stack_ptr = &seed as *const _;
135
4
        let func_ptr = generate_global_seed;
136
4
        let static_ptr = &GLOBAL_SEED_STORAGE as *const _;
137
4
        seed = mix(seed, stack_ptr as usize as u64);
138
4
        seed = mix(seed, func_ptr as usize as u64);
139
4
        seed = mix(seed, static_ptr as usize as u64);
140
4
141
4
        // If we have the standard library available, augment entropy with the
142
4
        // current time and an address from the allocator.
143
4
        #[cfg(feature = "std")]
144
4
        {
145
4
            #[cfg(not(any(
146
4
                miri,
147
4
                all(target_family = "wasm", target_os = "unknown"),
148
4
                target_os = "zkvm"
149
4
            )))]
150
4
            if let Ok(duration) = std::time::UNIX_EPOCH.elapsed() {
151
4
                seed = mix(seed, duration.subsec_nanos() as u64);
152
4
                seed = mix(seed, duration.as_secs());
153
4
            }
154
4
155
4
            let box_ptr = &*Box::new(0u8) as *const _;
156
4
            seed = mix(seed, box_ptr as usize as u64);
157
4
        }
158
4
159
4
        SharedSeed::from_u64(seed)
160
4
    }
161
162
    // Now all the below code purely exists to cache the above seed as
163
    // efficiently as possible. Even if we weren't a no_std crate and had access to
164
    // OnceLock, we don't want to check whether the global is set each time we
165
    // hash an object, so we hand-roll a global storage where type safety allows us
166
    // to assume the storage is initialized after construction.
167
    struct GlobalSeedStorage {
168
        state: AtomicU8,
169
        seed: UnsafeCell<SharedSeed>,
170
    }
171
172
    const UNINIT: u8 = 0;
173
    const LOCKED: u8 = 1;
174
    const INIT: u8 = 2;
175
176
    // SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive
177
    // LOCKED state, and only read the UnsafeCells when state is in the
178
    // once-achieved-eternally-preserved state INIT.
179
    unsafe impl Sync for GlobalSeedStorage {}
180
181
    static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage {
182
        state: AtomicU8::new(UNINIT),
183
        seed: UnsafeCell::new(SharedSeed { seeds: [0; 4] }),
184
    };
185
186
    /// An object representing an initialized global seed.
187
    ///
188
    /// Does not actually store the seed inside itself, it is a zero-sized type.
189
    /// This prevents inflating the RandomState size and in turn HashMap's size.
190
    #[derive(Copy, Clone, Debug)]
191
    pub struct GlobalSeed {
192
        // So we can't accidentally type GlobalSeed { } within this crate.
193
        _no_accidental_unsafe_init: (),
194
    }
195
196
    impl GlobalSeed {
197
        #[inline(always)]
198
193k
        pub fn new() -> Self {
199
193k
            if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT {
200
4
                Self::init_slow()
201
193k
            }
202
193k
            Self {
203
193k
                _no_accidental_unsafe_init: (),
204
193k
            }
205
193k
        }
206
207
        #[cold]
208
        #[inline(never)]
209
4
        fn init_slow() {
210
4
            // Generate seed outside of critical section.
211
4
            let seed = generate_global_seed();
212
213
            loop {
214
4
                match GLOBAL_SEED_STORAGE.state.compare_exchange_weak(
215
4
                    UNINIT,
216
4
                    LOCKED,
217
4
                    Ordering::Acquire,
218
4
                    Ordering::Acquire,
219
4
                ) {
220
                    Ok(_) => unsafe {
221
                        // SAFETY: we just acquired an exclusive lock.
222
4
                        *GLOBAL_SEED_STORAGE.seed.get() = seed;
223
4
                        GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release);
224
4
                        return;
225
                    },
226
227
0
                    Err(INIT) => return,
228
229
                    // Yes, it's a spin loop. We need to support no_std (so no easy
230
                    // access to proper locks), this is a one-time-per-program
231
                    // initialization, and the critical section is only a few
232
                    // store instructions, so it'll be fine.
233
0
                    _ => core::hint::spin_loop(),
234
                }
235
            }
236
4
        }
237
238
        #[inline(always)]
239
1.58M
        pub fn get(self) -> &'static SharedSeed {
240
1.58M
            // SAFETY: our constructor ensured we are in the INIT state and thus
241
1.58M
            // this raw read does not race with any write.
242
1.58M
            unsafe { &*GLOBAL_SEED_STORAGE.seed.get() }
243
1.58M
        }
244
    }
245
}
246
247
#[cfg(not(target_has_atomic = "8"))]
248
mod global {
249
    use super::*;
250
251
    #[derive(Copy, Clone, Debug)]
252
    pub struct GlobalSeed {}
253
254
    impl GlobalSeed {
255
        #[inline(always)]
256
        pub fn new() -> Self {
257
            Self {}
258
        }
259
260
        #[inline(always)]
261
        pub fn get(self) -> &'static SharedSeed {
262
            &super::FIXED_GLOBAL_SEED
263
        }
264
    }
265
}
266
267
pub(crate) use global::GlobalSeed;