/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-automata-0.4.9/src/util/lazy.rs
Line | Count | Source (jump to first uncovered line) |
1 | | /*! |
2 | | A lazily initialized value for safe sharing between threads. |
3 | | |
4 | | The principal type in this module is `Lazy`, which makes it easy to construct |
5 | | values that are shared safely across multiple threads simultaneously. |
6 | | */ |
7 | | |
8 | | use core::fmt; |
9 | | |
10 | | /// A lazily initialized value that implements `Deref` for `T`. |
11 | | /// |
12 | | /// A `Lazy` takes an initialization function and permits callers from any |
13 | | /// thread to access the result of that initialization function in a safe |
14 | | /// manner. In effect, this permits one-time initialization of global resources |
15 | | /// in a (possibly) multi-threaded program. |
16 | | /// |
17 | | /// This type and its functionality are available even when neither the `alloc` |
18 | | /// nor the `std` features are enabled. In exchange, a `Lazy` does **not** |
19 | | /// guarantee that the given `create` function is called at most once. It |
20 | | /// might be called multiple times. Moreover, a call to `Lazy::get` (either |
21 | | /// explicitly or implicitly via `Lazy`'s `Deref` impl) may block until a `T` |
22 | | /// is available. |
23 | | /// |
24 | | /// This is very similar to `lazy_static` or `once_cell`, except it doesn't |
25 | | /// guarantee that the initialization function will be run once and it works |
26 | | /// in no-alloc no-std environments. With that said, if you need stronger |
27 | | /// guarantees or a more flexible API, then it is recommended to use either |
28 | | /// `lazy_static` or `once_cell`. |
29 | | /// |
30 | | /// # Warning: may use a spin lock |
31 | | /// |
32 | | /// When this crate is compiled _without_ the `alloc` feature, then this type |
33 | | /// may used a spin lock internally. This can have subtle effects that may |
34 | | /// be undesirable. See [Spinlocks Considered Harmful][spinharm] for a more |
35 | | /// thorough treatment of this topic. |
36 | | /// |
37 | | /// [spinharm]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html |
38 | | /// |
39 | | /// # Example |
40 | | /// |
41 | | /// This type is useful for creating regexes once, and then using them from |
42 | | /// multiple threads simultaneously without worrying about synchronization. |
43 | | /// |
44 | | /// ``` |
45 | | /// use regex_automata::{dfa::regex::Regex, util::lazy::Lazy, Match}; |
46 | | /// |
47 | | /// static RE: Lazy<Regex> = Lazy::new(|| Regex::new("foo[0-9]+bar").unwrap()); |
48 | | /// |
49 | | /// let expected = Some(Match::must(0, 3..14)); |
50 | | /// assert_eq!(expected, RE.find(b"zzzfoo12345barzzz")); |
51 | | /// ``` |
52 | | pub struct Lazy<T, F = fn() -> T>(lazy::Lazy<T, F>); |
53 | | |
54 | | impl<T, F> Lazy<T, F> { |
55 | | /// Create a new `Lazy` value that is initialized via the given function. |
56 | | /// |
57 | | /// The `T` type is automatically inferred from the return type of the |
58 | | /// `create` function given. |
59 | 0 | pub const fn new(create: F) -> Lazy<T, F> { |
60 | 0 | Lazy(lazy::Lazy::new(create)) |
61 | 0 | } |
62 | | } |
63 | | |
64 | | impl<T, F: Fn() -> T> Lazy<T, F> { |
65 | | /// Return a reference to the lazily initialized value. |
66 | | /// |
67 | | /// This routine may block if another thread is initializing a `T`. |
68 | | /// |
69 | | /// Note that given a `x` which has type `Lazy`, this must be called via |
70 | | /// `Lazy::get(x)` and not `x.get()`. This routine is defined this way |
71 | | /// because `Lazy` impls `Deref` with a target of `T`. |
72 | | /// |
73 | | /// # Panics |
74 | | /// |
75 | | /// This panics if the `create` function inside this lazy value panics. |
76 | | /// If the panic occurred in another thread, then this routine _may_ also |
77 | | /// panic (but is not guaranteed to do so). |
78 | 0 | pub fn get(this: &Lazy<T, F>) -> &T { |
79 | 0 | this.0.get() |
80 | 0 | } |
81 | | } |
82 | | |
83 | | impl<T, F: Fn() -> T> core::ops::Deref for Lazy<T, F> { |
84 | | type Target = T; |
85 | | |
86 | 0 | fn deref(&self) -> &T { |
87 | 0 | Lazy::get(self) |
88 | 0 | } |
89 | | } |
90 | | |
91 | | impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> { |
92 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
93 | 0 | self.0.fmt(f) |
94 | 0 | } |
95 | | } |
96 | | |
97 | | #[cfg(feature = "alloc")] |
98 | | mod lazy { |
99 | | use core::{ |
100 | | fmt, |
101 | | marker::PhantomData, |
102 | | sync::atomic::{AtomicPtr, Ordering}, |
103 | | }; |
104 | | |
105 | | use alloc::boxed::Box; |
106 | | |
107 | | /// A non-std lazy initialized value. |
108 | | /// |
109 | | /// This might run the initialization function more than once, but will |
110 | | /// never block. |
111 | | /// |
112 | | /// I wish I could get these semantics into the non-alloc non-std Lazy |
113 | | /// type below, but I'm not sure how to do it. If you can do an alloc, |
114 | | /// then the implementation becomes very simple if you don't care about |
115 | | /// redundant work precisely because a pointer can be atomically swapped. |
116 | | /// |
117 | | /// Perhaps making this approach work in the non-alloc non-std case |
118 | | /// requires asking the caller for a pointer? It would make the API less |
119 | | /// convenient I think. |
120 | | pub(super) struct Lazy<T, F> { |
121 | | data: AtomicPtr<T>, |
122 | | create: F, |
123 | | // This indicates to the compiler that this type can drop T. It's not |
124 | | // totally clear how the absence of this marker could lead to trouble, |
125 | | // but putting here doesn't have any downsides so we hedge until somone |
126 | | // can from the Unsafe Working Group can tell us definitively that we |
127 | | // don't need it. |
128 | | // |
129 | | // See: https://github.com/BurntSushi/regex-automata/issues/30 |
130 | | owned: PhantomData<Box<T>>, |
131 | | } |
132 | | |
133 | | // SAFETY: So long as T and &T (and F and &F) can themselves be safely |
134 | | // shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only |
135 | | // permits accessing a &T and initialization is free of data races. So if T |
136 | | // is thread safe, then so to is Lazy<T, _>. |
137 | | // |
138 | | // We specifically require that T: Send in order for Lazy<T> to be Sync. |
139 | | // Without that requirement, it's possible to send a T from one thread to |
140 | | // another via Lazy's destructor. |
141 | | // |
142 | | // It's not clear whether we need F: Send+Sync for Lazy to be Sync. But |
143 | | // we're conservative for now and keep both. |
144 | | unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {} |
145 | | |
146 | | impl<T, F> Lazy<T, F> { |
147 | | /// Create a new alloc but non-std lazy value that is racily |
148 | | /// initialized. That is, the 'create' function may be called more than |
149 | | /// once. |
150 | 0 | pub(super) const fn new(create: F) -> Lazy<T, F> { |
151 | 0 | Lazy { |
152 | 0 | data: AtomicPtr::new(core::ptr::null_mut()), |
153 | 0 | create, |
154 | 0 | owned: PhantomData, |
155 | 0 | } |
156 | 0 | } |
157 | | } |
158 | | |
159 | | impl<T, F: Fn() -> T> Lazy<T, F> { |
160 | | /// Get the underlying lazy value. If it hasn't been initialized |
161 | | /// yet, then always attempt to initialize it (even if some other |
162 | | /// thread is initializing it) and atomically attach it to this lazy |
163 | | /// value before returning it. |
164 | 0 | pub(super) fn get(&self) -> &T { |
165 | 0 | if let Some(data) = self.poll() { |
166 | 0 | return data; |
167 | 0 | } |
168 | 0 | let data = (self.create)(); |
169 | 0 | let mut ptr = Box::into_raw(Box::new(data)); |
170 | 0 | // We attempt to stuff our initialized value into our atomic |
171 | 0 | // pointer. Upon success, we don't need to do anything. But if |
172 | 0 | // someone else beat us to the punch, then we need to make sure |
173 | 0 | // our newly created value is dropped. |
174 | 0 | let result = self.data.compare_exchange( |
175 | 0 | core::ptr::null_mut(), |
176 | 0 | ptr, |
177 | 0 | Ordering::AcqRel, |
178 | 0 | Ordering::Acquire, |
179 | 0 | ); |
180 | 0 | if let Err(old) = result { |
181 | 0 | // SAFETY: We created 'ptr' via Box::into_raw above, so turning |
182 | 0 | // it back into a Box via from_raw is safe. |
183 | 0 | drop(unsafe { Box::from_raw(ptr) }); |
184 | 0 | ptr = old; |
185 | 0 | } |
186 | | // SAFETY: We just set the pointer above to a non-null value, even |
187 | | // in the error case, and set it to a fully initialized value |
188 | | // returned by 'create'. |
189 | 0 | unsafe { &*ptr } |
190 | 0 | } |
191 | | |
192 | | /// If this lazy value has been initialized successfully, then return |
193 | | /// that value. Otherwise return None immediately. This never attempts |
194 | | /// to run initialization itself. |
195 | 0 | fn poll(&self) -> Option<&T> { |
196 | 0 | let ptr = self.data.load(Ordering::Acquire); |
197 | 0 | if ptr.is_null() { |
198 | 0 | return None; |
199 | 0 | } |
200 | 0 | // SAFETY: We just checked that the pointer is not null. Since it's |
201 | 0 | // not null, it must have been fully initialized by 'get' at some |
202 | 0 | // point. |
203 | 0 | Some(unsafe { &*ptr }) |
204 | 0 | } |
205 | | } |
206 | | |
207 | | impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> { |
208 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
209 | 0 | f.debug_struct("Lazy").field("data", &self.poll()).finish() |
210 | 0 | } |
211 | | } |
212 | | |
213 | | impl<T, F> Drop for Lazy<T, F> { |
214 | 0 | fn drop(&mut self) { |
215 | 0 | let ptr = *self.data.get_mut(); |
216 | 0 | if !ptr.is_null() { |
217 | 0 | // SAFETY: We just checked that 'ptr' is not null. And since |
218 | 0 | // we have exclusive access, there are no races to worry about. |
219 | 0 | drop(unsafe { Box::from_raw(ptr) }); |
220 | 0 | } |
221 | 0 | } |
222 | | } |
223 | | } |
224 | | |
225 | | #[cfg(not(feature = "alloc"))] |
226 | | mod lazy { |
227 | | use core::{ |
228 | | cell::Cell, |
229 | | fmt, |
230 | | mem::MaybeUninit, |
231 | | panic::{RefUnwindSafe, UnwindSafe}, |
232 | | sync::atomic::{AtomicU8, Ordering}, |
233 | | }; |
234 | | |
235 | | /// Our 'Lazy' value can be in one of three states: |
236 | | /// |
237 | | /// * INIT is where it starts, and also ends up back here if the |
238 | | /// 'create' routine panics. |
239 | | /// * BUSY is where it sits while initialization is running in exactly |
240 | | /// one thread. |
241 | | /// * DONE is where it sits after 'create' has completed and 'data' has |
242 | | /// been fully initialized. |
243 | | const LAZY_STATE_INIT: u8 = 0; |
244 | | const LAZY_STATE_BUSY: u8 = 1; |
245 | | const LAZY_STATE_DONE: u8 = 2; |
246 | | |
247 | | /// A non-alloc non-std lazy initialized value. |
248 | | /// |
249 | | /// This guarantees initialization only happens once, but uses a spinlock |
250 | | /// to block in the case of simultaneous access. Blocking occurs so that |
251 | | /// one thread waits while another thread initializes the value. |
252 | | /// |
253 | | /// I would much rather have the semantics of the 'alloc' Lazy type above. |
254 | | /// Namely, that we might run the initialization function more than once, |
255 | | /// but we never otherwise block. However, I don't know how to do that in |
256 | | /// a non-alloc non-std context. |
257 | | pub(super) struct Lazy<T, F> { |
258 | | state: AtomicU8, |
259 | | create: Cell<Option<F>>, |
260 | | data: Cell<MaybeUninit<T>>, |
261 | | } |
262 | | |
263 | | // SAFETY: So long as T and &T (and F and &F) can themselves be safely |
264 | | // shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only |
265 | | // permits accessing a &T and initialization is free of data races. So if T |
266 | | // is thread safe, then so to is Lazy<T, _>. |
267 | | unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {} |
268 | | // A reference to a Lazy is unwind safe because we specifically take |
269 | | // precautions to poison all accesses to a Lazy if the caller-provided |
270 | | // 'create' function panics. |
271 | | impl<T: UnwindSafe, F: UnwindSafe + RefUnwindSafe> RefUnwindSafe |
272 | | for Lazy<T, F> |
273 | | { |
274 | | } |
275 | | |
276 | | impl<T, F> Lazy<T, F> { |
277 | | /// Create a new non-alloc non-std lazy value that is initialized |
278 | | /// exactly once on first use using the given function. |
279 | | pub(super) const fn new(create: F) -> Lazy<T, F> { |
280 | | Lazy { |
281 | | state: AtomicU8::new(LAZY_STATE_INIT), |
282 | | create: Cell::new(Some(create)), |
283 | | data: Cell::new(MaybeUninit::uninit()), |
284 | | } |
285 | | } |
286 | | } |
287 | | |
288 | | impl<T, F: FnOnce() -> T> Lazy<T, F> { |
289 | | /// Get the underlying lazy value. If it isn't been initialized |
290 | | /// yet, then either initialize it or block until some other thread |
291 | | /// initializes it. If the 'create' function given to Lazy::new panics |
292 | | /// (even in another thread), then this panics too. |
293 | | pub(super) fn get(&self) -> &T { |
294 | | // This is effectively a spinlock. We loop until we enter a DONE |
295 | | // state, and if possible, initialize it ourselves. The only way |
296 | | // we exit the loop is if 'create' panics, we initialize 'data' or |
297 | | // some other thread initializes 'data'. |
298 | | // |
299 | | // Yes, I have read spinlocks considered harmful[1]. And that |
300 | | // article is why this spinlock is only active when 'alloc' isn't |
301 | | // enabled. I did this because I don't think there is really |
302 | | // another choice without 'alloc', other than not providing this at |
303 | | // all. But I think that's a big bummer. |
304 | | // |
305 | | // [1]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html |
306 | | while self.state.load(Ordering::Acquire) != LAZY_STATE_DONE { |
307 | | // Check if we're the first ones to get here. If so, we'll be |
308 | | // the ones who initialize. |
309 | | let result = self.state.compare_exchange( |
310 | | LAZY_STATE_INIT, |
311 | | LAZY_STATE_BUSY, |
312 | | Ordering::AcqRel, |
313 | | Ordering::Acquire, |
314 | | ); |
315 | | // This means we saw the INIT state and nobody else can. So we |
316 | | // must take responsibility for initializing. And by virtue of |
317 | | // observing INIT, we have also told anyone else trying to |
318 | | // get here that we are BUSY. If someone else sees BUSY, then |
319 | | // they will spin until we finish initialization. |
320 | | if let Ok(_) = result { |
321 | | // Since we are guaranteed to be the only ones here, we |
322 | | // know that 'create' is there... Unless someone else got |
323 | | // here before us and 'create' panicked. In which case, |
324 | | // 'self.create' is now 'None' and we forward the panic |
325 | | // to the caller. (i.e., We implement poisoning.) |
326 | | // |
327 | | // SAFETY: Our use of 'self.state' guarantees that we are |
328 | | // the only thread executing this line, and thus there are |
329 | | // no races. |
330 | | let create = unsafe { |
331 | | (*self.create.as_ptr()).take().expect( |
332 | | "Lazy's create function panicked, \ |
333 | | preventing initialization, |
334 | | poisoning current thread", |
335 | | ) |
336 | | }; |
337 | | let guard = Guard { state: &self.state }; |
338 | | // SAFETY: Our use of 'self.state' guarantees that we are |
339 | | // the only thread executing this line, and thus there are |
340 | | // no races. |
341 | | unsafe { |
342 | | (*self.data.as_ptr()).as_mut_ptr().write(create()); |
343 | | } |
344 | | // All is well. 'self.create' ran successfully, so we |
345 | | // forget the guard. |
346 | | core::mem::forget(guard); |
347 | | // Everything is initialized, so we can declare success. |
348 | | self.state.store(LAZY_STATE_DONE, Ordering::Release); |
349 | | break; |
350 | | } |
351 | | core::hint::spin_loop(); |
352 | | } |
353 | | // We only get here if data is fully initialized, and thus poll |
354 | | // will always return something. |
355 | | self.poll().unwrap() |
356 | | } |
357 | | |
358 | | /// If this lazy value has been initialized successfully, then return |
359 | | /// that value. Otherwise return None immediately. This never blocks. |
360 | | fn poll(&self) -> Option<&T> { |
361 | | if self.state.load(Ordering::Acquire) == LAZY_STATE_DONE { |
362 | | // SAFETY: The DONE state only occurs when data has been fully |
363 | | // initialized. |
364 | | Some(unsafe { &*(*self.data.as_ptr()).as_ptr() }) |
365 | | } else { |
366 | | None |
367 | | } |
368 | | } |
369 | | } |
370 | | |
371 | | impl<T: fmt::Debug, F: FnMut() -> T> fmt::Debug for Lazy<T, F> { |
372 | | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
373 | | f.debug_struct("Lazy") |
374 | | .field("state", &self.state.load(Ordering::Acquire)) |
375 | | .field("create", &"<closure>") |
376 | | .field("data", &self.poll()) |
377 | | .finish() |
378 | | } |
379 | | } |
380 | | |
381 | | impl<T, F> Drop for Lazy<T, F> { |
382 | | fn drop(&mut self) { |
383 | | if *self.state.get_mut() == LAZY_STATE_DONE { |
384 | | // SAFETY: state is DONE if and only if data has been fully |
385 | | // initialized. At which point, it is safe to drop. |
386 | | unsafe { |
387 | | self.data.get_mut().assume_init_drop(); |
388 | | } |
389 | | } |
390 | | } |
391 | | } |
392 | | |
393 | | /// A guard that will reset a Lazy's state back to INIT when dropped. The |
394 | | /// idea here is to 'forget' this guard on success. On failure (when a |
395 | | /// panic occurs), the Drop impl runs and causes all in-progress and future |
396 | | /// 'get' calls to panic. Without this guard, all in-progress and future |
397 | | /// 'get' calls would spin forever. Crashing is much better than getting |
398 | | /// stuck in an infinite loop. |
399 | | struct Guard<'a> { |
400 | | state: &'a AtomicU8, |
401 | | } |
402 | | |
403 | | impl<'a> Drop for Guard<'a> { |
404 | | fn drop(&mut self) { |
405 | | // We force ourselves back into an INIT state. This will in turn |
406 | | // cause any future 'get' calls to attempt calling 'self.create' |
407 | | // again which will in turn panic because 'self.create' will now |
408 | | // be 'None'. |
409 | | self.state.store(LAZY_STATE_INIT, Ordering::Release); |
410 | | } |
411 | | } |
412 | | } |
413 | | |
414 | | #[cfg(test)] |
415 | | mod tests { |
416 | | use super::*; |
417 | | |
418 | | fn assert_send<T: Send>() {} |
419 | | fn assert_sync<T: Sync>() {} |
420 | | fn assert_unwind<T: core::panic::UnwindSafe>() {} |
421 | | fn assert_refunwind<T: core::panic::RefUnwindSafe>() {} |
422 | | |
423 | | #[test] |
424 | | fn oibits() { |
425 | | assert_send::<Lazy<u64>>(); |
426 | | assert_sync::<Lazy<u64>>(); |
427 | | assert_unwind::<Lazy<u64>>(); |
428 | | assert_refunwind::<Lazy<u64>>(); |
429 | | } |
430 | | |
431 | | // This is a regression test because we used to rely on the inferred Sync |
432 | | // impl for the Lazy type defined above (for 'alloc' mode). In the |
433 | | // inferred impl, it only requires that T: Sync for Lazy<T>: Sync. But |
434 | | // if we have that, we can actually make use of the fact that Lazy<T> drops |
435 | | // T to create a value on one thread and drop it on another. This *should* |
436 | | // require T: Send, but our missing bounds before let it sneak by. |
437 | | // |
438 | | // Basically, this test should not compile, so we... comment it out. We |
439 | | // don't have a great way of testing compile-fail tests right now. |
440 | | // |
441 | | // See: https://github.com/BurntSushi/regex-automata/issues/30 |
442 | | /* |
443 | | #[test] |
444 | | fn sync_not_send() { |
445 | | #[allow(dead_code)] |
446 | | fn inner<T: Sync + Default>() { |
447 | | let lazy = Lazy::new(move || T::default()); |
448 | | std::thread::scope(|scope| { |
449 | | scope.spawn(|| { |
450 | | Lazy::get(&lazy); // We create T in this thread |
451 | | }); |
452 | | }); |
453 | | // And drop in this thread. |
454 | | drop(lazy); |
455 | | // So we have send a !Send type over threads. (with some more |
456 | | // legwork, its possible to even sneak the value out of drop |
457 | | // through thread local) |
458 | | } |
459 | | } |
460 | | */ |
461 | | } |