Coverage Report

Created: 2025-07-23 06:21

/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-automata-0.4.9/src/util/lazy.rs
Line
Count
Source (jump to first uncovered line)
1
/*!
2
A lazily initialized value for safe sharing between threads.
3
4
The principal type in this module is `Lazy`, which makes it easy to construct
5
values that are shared safely across multiple threads simultaneously.
6
*/
7
8
use core::fmt;
9
10
/// A lazily initialized value that implements `Deref` for `T`.
11
///
12
/// A `Lazy` takes an initialization function and permits callers from any
13
/// thread to access the result of that initialization function in a safe
14
/// manner. In effect, this permits one-time initialization of global resources
15
/// in a (possibly) multi-threaded program.
16
///
17
/// This type and its functionality are available even when neither the `alloc`
18
/// nor the `std` features are enabled. In exchange, a `Lazy` does **not**
19
/// guarantee that the given `create` function is called at most once. It
20
/// might be called multiple times. Moreover, a call to `Lazy::get` (either
21
/// explicitly or implicitly via `Lazy`'s `Deref` impl) may block until a `T`
22
/// is available.
23
///
24
/// This is very similar to `lazy_static` or `once_cell`, except it doesn't
25
/// guarantee that the initialization function will be run once and it works
26
/// in no-alloc no-std environments. With that said, if you need stronger
27
/// guarantees or a more flexible API, then it is recommended to use either
28
/// `lazy_static` or `once_cell`.
29
///
30
/// # Warning: may use a spin lock
31
///
32
/// When this crate is compiled _without_ the `alloc` feature, then this type
33
/// may used a spin lock internally. This can have subtle effects that may
34
/// be undesirable. See [Spinlocks Considered Harmful][spinharm] for a more
35
/// thorough treatment of this topic.
36
///
37
/// [spinharm]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
38
///
39
/// # Example
40
///
41
/// This type is useful for creating regexes once, and then using them from
42
/// multiple threads simultaneously without worrying about synchronization.
43
///
44
/// ```
45
/// use regex_automata::{dfa::regex::Regex, util::lazy::Lazy, Match};
46
///
47
/// static RE: Lazy<Regex> = Lazy::new(|| Regex::new("foo[0-9]+bar").unwrap());
48
///
49
/// let expected = Some(Match::must(0, 3..14));
50
/// assert_eq!(expected, RE.find(b"zzzfoo12345barzzz"));
51
/// ```
52
pub struct Lazy<T, F = fn() -> T>(lazy::Lazy<T, F>);
53
54
impl<T, F> Lazy<T, F> {
55
    /// Create a new `Lazy` value that is initialized via the given function.
56
    ///
57
    /// The `T` type is automatically inferred from the return type of the
58
    /// `create` function given.
59
0
    pub const fn new(create: F) -> Lazy<T, F> {
60
0
        Lazy(lazy::Lazy::new(create))
61
0
    }
62
}
63
64
impl<T, F: Fn() -> T> Lazy<T, F> {
65
    /// Return a reference to the lazily initialized value.
66
    ///
67
    /// This routine may block if another thread is initializing a `T`.
68
    ///
69
    /// Note that given a `x` which has type `Lazy`, this must be called via
70
    /// `Lazy::get(x)` and not `x.get()`. This routine is defined this way
71
    /// because `Lazy` impls `Deref` with a target of `T`.
72
    ///
73
    /// # Panics
74
    ///
75
    /// This panics if the `create` function inside this lazy value panics.
76
    /// If the panic occurred in another thread, then this routine _may_ also
77
    /// panic (but is not guaranteed to do so).
78
0
    pub fn get(this: &Lazy<T, F>) -> &T {
79
0
        this.0.get()
80
0
    }
81
}
82
83
impl<T, F: Fn() -> T> core::ops::Deref for Lazy<T, F> {
84
    type Target = T;
85
86
0
    fn deref(&self) -> &T {
87
0
        Lazy::get(self)
88
0
    }
89
}
90
91
impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> {
92
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
93
0
        self.0.fmt(f)
94
0
    }
95
}
96
97
#[cfg(feature = "alloc")]
98
mod lazy {
99
    use core::{
100
        fmt,
101
        marker::PhantomData,
102
        sync::atomic::{AtomicPtr, Ordering},
103
    };
104
105
    use alloc::boxed::Box;
106
107
    /// A non-std lazy initialized value.
108
    ///
109
    /// This might run the initialization function more than once, but will
110
    /// never block.
111
    ///
112
    /// I wish I could get these semantics into the non-alloc non-std Lazy
113
    /// type below, but I'm not sure how to do it. If you can do an alloc,
114
    /// then the implementation becomes very simple if you don't care about
115
    /// redundant work precisely because a pointer can be atomically swapped.
116
    ///
117
    /// Perhaps making this approach work in the non-alloc non-std case
118
    /// requires asking the caller for a pointer? It would make the API less
119
    /// convenient I think.
120
    pub(super) struct Lazy<T, F> {
121
        data: AtomicPtr<T>,
122
        create: F,
123
        // This indicates to the compiler that this type can drop T. It's not
124
        // totally clear how the absence of this marker could lead to trouble,
125
        // but putting here doesn't have any downsides so we hedge until somone
126
        // can from the Unsafe Working Group can tell us definitively that we
127
        // don't need it.
128
        //
129
        // See: https://github.com/BurntSushi/regex-automata/issues/30
130
        owned: PhantomData<Box<T>>,
131
    }
132
133
    // SAFETY: So long as T and &T (and F and &F) can themselves be safely
134
    // shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only
135
    // permits accessing a &T and initialization is free of data races. So if T
136
    // is thread safe, then so to is Lazy<T, _>.
137
    //
138
    // We specifically require that T: Send in order for Lazy<T> to be Sync.
139
    // Without that requirement, it's possible to send a T from one thread to
140
    // another via Lazy's destructor.
141
    //
142
    // It's not clear whether we need F: Send+Sync for Lazy to be Sync. But
143
    // we're conservative for now and keep both.
144
    unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {}
145
146
    impl<T, F> Lazy<T, F> {
147
        /// Create a new alloc but non-std lazy value that is racily
148
        /// initialized. That is, the 'create' function may be called more than
149
        /// once.
150
0
        pub(super) const fn new(create: F) -> Lazy<T, F> {
151
0
            Lazy {
152
0
                data: AtomicPtr::new(core::ptr::null_mut()),
153
0
                create,
154
0
                owned: PhantomData,
155
0
            }
156
0
        }
157
    }
158
159
    impl<T, F: Fn() -> T> Lazy<T, F> {
160
        /// Get the underlying lazy value. If it hasn't been initialized
161
        /// yet, then always attempt to initialize it (even if some other
162
        /// thread is initializing it) and atomically attach it to this lazy
163
        /// value before returning it.
164
0
        pub(super) fn get(&self) -> &T {
165
0
            if let Some(data) = self.poll() {
166
0
                return data;
167
0
            }
168
0
            let data = (self.create)();
169
0
            let mut ptr = Box::into_raw(Box::new(data));
170
0
            // We attempt to stuff our initialized value into our atomic
171
0
            // pointer. Upon success, we don't need to do anything. But if
172
0
            // someone else beat us to the punch, then we need to make sure
173
0
            // our newly created value is dropped.
174
0
            let result = self.data.compare_exchange(
175
0
                core::ptr::null_mut(),
176
0
                ptr,
177
0
                Ordering::AcqRel,
178
0
                Ordering::Acquire,
179
0
            );
180
0
            if let Err(old) = result {
181
0
                // SAFETY: We created 'ptr' via Box::into_raw above, so turning
182
0
                // it back into a Box via from_raw is safe.
183
0
                drop(unsafe { Box::from_raw(ptr) });
184
0
                ptr = old;
185
0
            }
186
            // SAFETY: We just set the pointer above to a non-null value, even
187
            // in the error case, and set it to a fully initialized value
188
            // returned by 'create'.
189
0
            unsafe { &*ptr }
190
0
        }
191
192
        /// If this lazy value has been initialized successfully, then return
193
        /// that value. Otherwise return None immediately. This never attempts
194
        /// to run initialization itself.
195
0
        fn poll(&self) -> Option<&T> {
196
0
            let ptr = self.data.load(Ordering::Acquire);
197
0
            if ptr.is_null() {
198
0
                return None;
199
0
            }
200
0
            // SAFETY: We just checked that the pointer is not null. Since it's
201
0
            // not null, it must have been fully initialized by 'get' at some
202
0
            // point.
203
0
            Some(unsafe { &*ptr })
204
0
        }
205
    }
206
207
    impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> {
208
0
        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
209
0
            f.debug_struct("Lazy").field("data", &self.poll()).finish()
210
0
        }
211
    }
212
213
    impl<T, F> Drop for Lazy<T, F> {
214
0
        fn drop(&mut self) {
215
0
            let ptr = *self.data.get_mut();
216
0
            if !ptr.is_null() {
217
0
                // SAFETY: We just checked that 'ptr' is not null. And since
218
0
                // we have exclusive access, there are no races to worry about.
219
0
                drop(unsafe { Box::from_raw(ptr) });
220
0
            }
221
0
        }
222
    }
223
}
224
225
#[cfg(not(feature = "alloc"))]
226
mod lazy {
227
    use core::{
228
        cell::Cell,
229
        fmt,
230
        mem::MaybeUninit,
231
        panic::{RefUnwindSafe, UnwindSafe},
232
        sync::atomic::{AtomicU8, Ordering},
233
    };
234
235
    /// Our 'Lazy' value can be in one of three states:
236
    ///
237
    /// * INIT is where it starts, and also ends up back here if the
238
    /// 'create' routine panics.
239
    /// * BUSY is where it sits while initialization is running in exactly
240
    /// one thread.
241
    /// * DONE is where it sits after 'create' has completed and 'data' has
242
    /// been fully initialized.
243
    const LAZY_STATE_INIT: u8 = 0;
244
    const LAZY_STATE_BUSY: u8 = 1;
245
    const LAZY_STATE_DONE: u8 = 2;
246
247
    /// A non-alloc non-std lazy initialized value.
248
    ///
249
    /// This guarantees initialization only happens once, but uses a spinlock
250
    /// to block in the case of simultaneous access. Blocking occurs so that
251
    /// one thread waits while another thread initializes the value.
252
    ///
253
    /// I would much rather have the semantics of the 'alloc' Lazy type above.
254
    /// Namely, that we might run the initialization function more than once,
255
    /// but we never otherwise block. However, I don't know how to do that in
256
    /// a non-alloc non-std context.
257
    pub(super) struct Lazy<T, F> {
258
        state: AtomicU8,
259
        create: Cell<Option<F>>,
260
        data: Cell<MaybeUninit<T>>,
261
    }
262
263
    // SAFETY: So long as T and &T (and F and &F) can themselves be safely
264
    // shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only
265
    // permits accessing a &T and initialization is free of data races. So if T
266
    // is thread safe, then so to is Lazy<T, _>.
267
    unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {}
268
    // A reference to a Lazy is unwind safe because we specifically take
269
    // precautions to poison all accesses to a Lazy if the caller-provided
270
    // 'create' function panics.
271
    impl<T: UnwindSafe, F: UnwindSafe + RefUnwindSafe> RefUnwindSafe
272
        for Lazy<T, F>
273
    {
274
    }
275
276
    impl<T, F> Lazy<T, F> {
277
        /// Create a new non-alloc non-std lazy value that is initialized
278
        /// exactly once on first use using the given function.
279
        pub(super) const fn new(create: F) -> Lazy<T, F> {
280
            Lazy {
281
                state: AtomicU8::new(LAZY_STATE_INIT),
282
                create: Cell::new(Some(create)),
283
                data: Cell::new(MaybeUninit::uninit()),
284
            }
285
        }
286
    }
287
288
    impl<T, F: FnOnce() -> T> Lazy<T, F> {
289
        /// Get the underlying lazy value. If it isn't been initialized
290
        /// yet, then either initialize it or block until some other thread
291
        /// initializes it. If the 'create' function given to Lazy::new panics
292
        /// (even in another thread), then this panics too.
293
        pub(super) fn get(&self) -> &T {
294
            // This is effectively a spinlock. We loop until we enter a DONE
295
            // state, and if possible, initialize it ourselves. The only way
296
            // we exit the loop is if 'create' panics, we initialize 'data' or
297
            // some other thread initializes 'data'.
298
            //
299
            // Yes, I have read spinlocks considered harmful[1]. And that
300
            // article is why this spinlock is only active when 'alloc' isn't
301
            // enabled. I did this because I don't think there is really
302
            // another choice without 'alloc', other than not providing this at
303
            // all. But I think that's a big bummer.
304
            //
305
            // [1]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
306
            while self.state.load(Ordering::Acquire) != LAZY_STATE_DONE {
307
                // Check if we're the first ones to get here. If so, we'll be
308
                // the ones who initialize.
309
                let result = self.state.compare_exchange(
310
                    LAZY_STATE_INIT,
311
                    LAZY_STATE_BUSY,
312
                    Ordering::AcqRel,
313
                    Ordering::Acquire,
314
                );
315
                // This means we saw the INIT state and nobody else can. So we
316
                // must take responsibility for initializing. And by virtue of
317
                // observing INIT, we have also told anyone else trying to
318
                // get here that we are BUSY. If someone else sees BUSY, then
319
                // they will spin until we finish initialization.
320
                if let Ok(_) = result {
321
                    // Since we are guaranteed to be the only ones here, we
322
                    // know that 'create' is there... Unless someone else got
323
                    // here before us and 'create' panicked. In which case,
324
                    // 'self.create' is now 'None' and we forward the panic
325
                    // to the caller. (i.e., We implement poisoning.)
326
                    //
327
                    // SAFETY: Our use of 'self.state' guarantees that we are
328
                    // the only thread executing this line, and thus there are
329
                    // no races.
330
                    let create = unsafe {
331
                        (*self.create.as_ptr()).take().expect(
332
                            "Lazy's create function panicked, \
333
                             preventing initialization,
334
                             poisoning current thread",
335
                        )
336
                    };
337
                    let guard = Guard { state: &self.state };
338
                    // SAFETY: Our use of 'self.state' guarantees that we are
339
                    // the only thread executing this line, and thus there are
340
                    // no races.
341
                    unsafe {
342
                        (*self.data.as_ptr()).as_mut_ptr().write(create());
343
                    }
344
                    // All is well. 'self.create' ran successfully, so we
345
                    // forget the guard.
346
                    core::mem::forget(guard);
347
                    // Everything is initialized, so we can declare success.
348
                    self.state.store(LAZY_STATE_DONE, Ordering::Release);
349
                    break;
350
                }
351
                core::hint::spin_loop();
352
            }
353
            // We only get here if data is fully initialized, and thus poll
354
            // will always return something.
355
            self.poll().unwrap()
356
        }
357
358
        /// If this lazy value has been initialized successfully, then return
359
        /// that value. Otherwise return None immediately. This never blocks.
360
        fn poll(&self) -> Option<&T> {
361
            if self.state.load(Ordering::Acquire) == LAZY_STATE_DONE {
362
                // SAFETY: The DONE state only occurs when data has been fully
363
                // initialized.
364
                Some(unsafe { &*(*self.data.as_ptr()).as_ptr() })
365
            } else {
366
                None
367
            }
368
        }
369
    }
370
371
    impl<T: fmt::Debug, F: FnMut() -> T> fmt::Debug for Lazy<T, F> {
372
        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
373
            f.debug_struct("Lazy")
374
                .field("state", &self.state.load(Ordering::Acquire))
375
                .field("create", &"<closure>")
376
                .field("data", &self.poll())
377
                .finish()
378
        }
379
    }
380
381
    impl<T, F> Drop for Lazy<T, F> {
382
        fn drop(&mut self) {
383
            if *self.state.get_mut() == LAZY_STATE_DONE {
384
                // SAFETY: state is DONE if and only if data has been fully
385
                // initialized. At which point, it is safe to drop.
386
                unsafe {
387
                    self.data.get_mut().assume_init_drop();
388
                }
389
            }
390
        }
391
    }
392
393
    /// A guard that will reset a Lazy's state back to INIT when dropped. The
394
    /// idea here is to 'forget' this guard on success. On failure (when a
395
    /// panic occurs), the Drop impl runs and causes all in-progress and future
396
    /// 'get' calls to panic. Without this guard, all in-progress and future
397
    /// 'get' calls would spin forever. Crashing is much better than getting
398
    /// stuck in an infinite loop.
399
    struct Guard<'a> {
400
        state: &'a AtomicU8,
401
    }
402
403
    impl<'a> Drop for Guard<'a> {
404
        fn drop(&mut self) {
405
            // We force ourselves back into an INIT state. This will in turn
406
            // cause any future 'get' calls to attempt calling 'self.create'
407
            // again which will in turn panic because 'self.create' will now
408
            // be 'None'.
409
            self.state.store(LAZY_STATE_INIT, Ordering::Release);
410
        }
411
    }
412
}
413
414
#[cfg(test)]
415
mod tests {
416
    use super::*;
417
418
    fn assert_send<T: Send>() {}
419
    fn assert_sync<T: Sync>() {}
420
    fn assert_unwind<T: core::panic::UnwindSafe>() {}
421
    fn assert_refunwind<T: core::panic::RefUnwindSafe>() {}
422
423
    #[test]
424
    fn oibits() {
425
        assert_send::<Lazy<u64>>();
426
        assert_sync::<Lazy<u64>>();
427
        assert_unwind::<Lazy<u64>>();
428
        assert_refunwind::<Lazy<u64>>();
429
    }
430
431
    // This is a regression test because we used to rely on the inferred Sync
432
    // impl for the Lazy type defined above (for 'alloc' mode). In the
433
    // inferred impl, it only requires that T: Sync for Lazy<T>: Sync. But
434
    // if we have that, we can actually make use of the fact that Lazy<T> drops
435
    // T to create a value on one thread and drop it on another. This *should*
436
    // require T: Send, but our missing bounds before let it sneak by.
437
    //
438
    // Basically, this test should not compile, so we... comment it out. We
439
    // don't have a great way of testing compile-fail tests right now.
440
    //
441
    // See: https://github.com/BurntSushi/regex-automata/issues/30
442
    /*
443
    #[test]
444
    fn sync_not_send() {
445
        #[allow(dead_code)]
446
        fn inner<T: Sync + Default>() {
447
            let lazy = Lazy::new(move || T::default());
448
            std::thread::scope(|scope| {
449
                scope.spawn(|| {
450
                    Lazy::get(&lazy); // We create T in this thread
451
                });
452
            });
453
            // And drop in this thread.
454
            drop(lazy);
455
            // So we have send a !Send type over threads. (with some more
456
            // legwork, its possible to even sneak the value out of drop
457
            // through thread local)
458
        }
459
    }
460
    */
461
}