Coverage Report

Created: 2025-08-26 06:30

/rust/registry/src/index.crates.io-6f17d22bba15001f/arbitrary-1.4.2/src/lib.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright © 2019 The Rust Fuzz Project Developers.
2
//
3
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6
// option. This file may not be copied, modified, or distributed
7
// except according to those terms.
8
9
//! The `Arbitrary` trait crate.
10
//!
11
//! This trait provides an [`Arbitrary`] trait to
12
//! produce well-typed, structured values, from raw, byte buffers. It is
13
//! generally intended to be used with fuzzers like AFL or libFuzzer. See the
14
//! [`Arbitrary`] trait's documentation for details on
15
//! automatically deriving, implementing, and/or using the trait.
16
17
#![deny(bad_style)]
18
#![deny(missing_docs)]
19
#![deny(future_incompatible)]
20
#![deny(nonstandard_style)]
21
#![deny(rust_2018_compatibility)]
22
#![deny(rust_2018_idioms)]
23
#![deny(unused)]
24
25
mod error;
26
mod foreign;
27
pub mod size_hint;
28
pub mod unstructured;
29
30
#[cfg(test)]
31
mod tests;
32
33
pub use error::*;
34
35
#[cfg(feature = "derive_arbitrary")]
36
pub use derive_arbitrary::*;
37
38
#[doc(inline)]
39
pub use unstructured::Unstructured;
40
41
/// Error indicating that the maximum recursion depth has been reached while calculating [`Arbitrary::size_hint`]()
42
#[derive(Debug, Clone)]
43
#[non_exhaustive]
44
pub struct MaxRecursionReached {}
45
46
impl core::fmt::Display for MaxRecursionReached {
47
0
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
48
0
        f.write_str("Maximum recursion depth has been reached")
49
0
    }
50
}
51
52
impl std::error::Error for MaxRecursionReached {}
53
54
/// Generate arbitrary structured values from raw, unstructured data.
55
///
56
/// The `Arbitrary` trait allows you to generate valid structured values, like
57
/// `HashMap`s, or ASTs, or `MyTomlConfig`, or any other data structure from
58
/// raw, unstructured bytes provided by a fuzzer.
59
///
60
/// # Deriving `Arbitrary`
61
///
62
/// Automatically deriving the `Arbitrary` trait is the recommended way to
63
/// implement `Arbitrary` for your types.
64
///
65
/// Using the custom derive requires that you enable the `"derive"` cargo
66
/// feature in your `Cargo.toml`:
67
///
68
/// ```toml
69
/// [dependencies]
70
/// arbitrary = { version = "1", features = ["derive"] }
71
/// ```
72
///
73
/// Then, you add the `#[derive(Arbitrary)]` annotation to your `struct` or
74
/// `enum` type definition:
75
///
76
/// ```
77
/// # #[cfg(feature = "derive")] mod foo {
78
/// use arbitrary::Arbitrary;
79
/// use std::collections::HashSet;
80
///
81
/// #[derive(Arbitrary)]
82
/// pub struct AddressBook {
83
///     friends: HashSet<Friend>,
84
/// }
85
///
86
/// #[derive(Arbitrary, Hash, Eq, PartialEq)]
87
/// pub enum Friend {
88
///     Buddy { name: String },
89
///     Pal { age: usize },
90
/// }
91
/// # }
92
/// ```
93
///
94
/// Every member of the `struct` or `enum` must also implement `Arbitrary`.
95
///
96
/// It is also possible to change the default bounds added by the derive:
97
///
98
/// ```
99
/// # #[cfg(feature = "derive")] mod foo {
100
/// use arbitrary::Arbitrary;
101
///
102
/// trait Trait {
103
///     type Assoc: for<'a> Arbitrary<'a>;
104
/// }
105
///
106
/// #[derive(Arbitrary)]
107
/// // The bounds are used verbatim, so any existing trait bounds will need to be repeated.
108
/// #[arbitrary(bound = "T: Trait")]
109
/// struct Point<T: Trait> {
110
///     x: T::Assoc,
111
/// }
112
/// # }
113
/// ```
114
///
115
/// # Implementing `Arbitrary` By Hand
116
///
117
/// Implementing `Arbitrary` mostly involves nested calls to other `Arbitrary`
118
/// arbitrary implementations for each of your `struct` or `enum`'s members. But
119
/// sometimes you need some amount of raw data, or you need to generate a
120
/// variably-sized collection type, or something of that sort. The
121
/// [`Unstructured`] type helps you with these tasks.
122
///
123
/// ```
124
/// # #[cfg(feature = "derive")] mod foo {
125
/// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> }
126
/// # impl<T> MyCollection<T> {
127
/// #     pub fn new() -> Self { MyCollection { _t: std::marker::PhantomData } }
128
/// #     pub fn insert(&mut self, element: T) {}
129
/// # }
130
/// use arbitrary::{Arbitrary, Result, Unstructured};
131
///
132
/// impl<'a, T> Arbitrary<'a> for MyCollection<T>
133
/// where
134
///     T: Arbitrary<'a>,
135
/// {
136
///     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
137
///         // Get an iterator of arbitrary `T`s.
138
///         let iter = u.arbitrary_iter::<T>()?;
139
///
140
///         // And then create a collection!
141
///         let mut my_collection = MyCollection::new();
142
///         for elem_result in iter {
143
///             let elem = elem_result?;
144
///             my_collection.insert(elem);
145
///         }
146
///
147
///         Ok(my_collection)
148
///     }
149
/// }
150
/// # }
151
/// ```
152
///
153
/// # A Note On Output Distributions
154
///
155
/// There is no requirement for a particular distribution of the values. For
156
/// example, it is not required that every value appears with the same
157
/// probability. That being said, the main use for `Arbitrary` is for fuzzing,
158
/// so in many cases a uniform distribution will make the most sense in order to
159
/// provide the best coverage of the domain. In other cases this is not
160
/// desirable or even possible, for example when sampling from a uniform
161
/// distribution is computationally expensive or in the case of collections that
162
/// may grow indefinitely.
163
pub trait Arbitrary<'a>: Sized {
164
    /// Generate an arbitrary value of `Self` from the given unstructured data.
165
    ///
166
    /// Calling `Arbitrary::arbitrary` requires that you have some raw data,
167
    /// perhaps given to you by a fuzzer like AFL or libFuzzer. You wrap this
168
    /// raw data in an `Unstructured`, and then you can call `<MyType as
169
    /// Arbitrary>::arbitrary` to construct an arbitrary instance of `MyType`
170
    /// from that unstructured data.
171
    ///
172
    /// Implementations may return an error if there is not enough data to
173
    /// construct a full instance of `Self`, or they may fill out the rest of
174
    /// `Self` with dummy values. Using dummy values when the underlying data is
175
    /// exhausted can help avoid accidentally "defeating" some of the fuzzer's
176
    /// mutations to the underlying byte stream that might otherwise lead to
177
    /// interesting runtime behavior or new code coverage if only we had just a
178
    /// few more bytes. However, it also requires that implementations for
179
    /// recursive types (e.g. `struct Foo(Option<Box<Foo>>)`) avoid infinite
180
    /// recursion when the underlying data is exhausted.
181
    ///
182
    /// ```
183
    /// # #[cfg(feature = "derive")] fn foo() {
184
    /// use arbitrary::{Arbitrary, Unstructured};
185
    ///
186
    /// #[derive(Arbitrary)]
187
    /// pub struct MyType {
188
    ///     // ...
189
    /// }
190
    ///
191
    /// // Get the raw data from the fuzzer or wherever else.
192
    /// # let get_raw_data_from_fuzzer = || &[];
193
    /// let raw_data: &[u8] = get_raw_data_from_fuzzer();
194
    ///
195
    /// // Wrap that raw data in an `Unstructured`.
196
    /// let mut unstructured = Unstructured::new(raw_data);
197
    ///
198
    /// // Generate an arbitrary instance of `MyType` and do stuff with it.
199
    /// if let Ok(value) = MyType::arbitrary(&mut unstructured) {
200
    /// #   let do_stuff = |_| {};
201
    ///     do_stuff(value);
202
    /// }
203
    /// # }
204
    /// ```
205
    ///
206
    /// See also the documentation for [`Unstructured`].
207
    fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self>;
208
209
    /// Generate an arbitrary value of `Self` from the entirety of the given
210
    /// unstructured data.
211
    ///
212
    /// This is similar to Arbitrary::arbitrary, however it assumes that it is
213
    /// the last consumer of the given data, and is thus able to consume it all
214
    /// if it needs.  See also the documentation for
215
    /// [`Unstructured`].
216
0
    fn arbitrary_take_rest(mut u: Unstructured<'a>) -> Result<Self> {
217
0
        Self::arbitrary(&mut u)
218
0
    }
219
220
    /// Get a size hint for how many bytes out of an `Unstructured` this type
221
    /// needs to construct itself.
222
    ///
223
    /// This is useful for determining how many elements we should insert when
224
    /// creating an arbitrary collection.
225
    ///
226
    /// The return value is similar to [`Iterator::size_hint`]: it returns a
227
    /// tuple where the first element is a lower bound on the number of bytes
228
    /// required, and the second element is an optional upper bound.
229
    ///
230
    /// The default implementation return `(0, None)` which is correct for any
231
    /// type, but not ultimately that useful. Using `#[derive(Arbitrary)]` will
232
    /// create a better implementation. If you are writing an `Arbitrary`
233
    /// implementation by hand, and your type can be part of a dynamically sized
234
    /// collection (such as `Vec`), you are strongly encouraged to override this
235
    /// default with a better implementation, and also override
236
    /// [`try_size_hint`].
237
    ///
238
    /// ## How to implement this
239
    ///
240
    /// If the size hint calculation is a trivial constant and does not recurse
241
    /// into any other `size_hint` call, you should implement it in `size_hint`:
242
    ///
243
    /// ```
244
    /// use arbitrary::{size_hint, Arbitrary, Result, Unstructured};
245
    ///
246
    /// struct SomeStruct(u8);
247
    ///
248
    /// impl<'a> Arbitrary<'a> for SomeStruct {
249
    ///     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
250
    ///         let buf = &mut [0];
251
    ///         u.fill_buffer(buf)?;
252
    ///         Ok(SomeStruct(buf[0]))
253
    ///     }
254
    ///
255
    ///     #[inline]
256
    ///     fn size_hint(depth: usize) -> (usize, Option<usize>) {
257
    ///         let _ = depth;
258
    ///         (1, Some(1))
259
    ///     }
260
    /// }
261
    /// ```
262
    ///
263
    /// Otherwise, it should instead be implemented in [`try_size_hint`],
264
    /// and the `size_hint` implementation should forward to it:
265
    ///
266
    /// ```
267
    /// use arbitrary::{size_hint, Arbitrary, MaxRecursionReached, Result, Unstructured};
268
    ///
269
    /// struct SomeStruct<A, B> {
270
    ///     a: A,
271
    ///     b: B,
272
    /// }
273
    ///
274
    /// impl<'a, A: Arbitrary<'a>, B: Arbitrary<'a>> Arbitrary<'a> for SomeStruct<A, B> {
275
    ///     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
276
    ///         // ...
277
    /// #       todo!()
278
    ///     }
279
    ///
280
    ///     fn size_hint(depth: usize) -> (usize, Option<usize>) {
281
    ///         // Return the value of try_size_hint
282
    ///         //
283
    ///         // If the recursion fails, return the default, always valid `(0, None)`
284
    ///         Self::try_size_hint(depth).unwrap_or_default()
285
    ///     }
286
    ///
287
    ///     fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
288
    ///         // Protect against potential infinite recursion with
289
    ///         // `try_recursion_guard`.
290
    ///         size_hint::try_recursion_guard(depth, |depth| {
291
    ///             // If we aren't too deep, then `recursion_guard` calls
292
    ///             // this closure, which implements the natural size hint.
293
    ///             // Don't forget to use the new `depth` in all nested
294
    ///             // `try_size_hint` calls! We recommend shadowing the
295
    ///             // parameter, like what is done here, so that you can't
296
    ///             // accidentally use the wrong depth.
297
    ///             Ok(size_hint::and(
298
    ///                 <A as Arbitrary>::try_size_hint(depth)?,
299
    ///                 <B as Arbitrary>::try_size_hint(depth)?,
300
    ///             ))
301
    ///         })
302
    ///     }
303
    /// }
304
    /// ```
305
    ///
306
    /// ## Invariant
307
    ///
308
    /// It must be possible to construct every possible output using only inputs
309
    /// of lengths bounded by these parameters. This applies to both
310
    /// [`Arbitrary::arbitrary`] and [`Arbitrary::arbitrary_take_rest`].
311
    ///
312
    /// This is trivially true for `(0, None)`. To restrict this further, it
313
    /// must be proven that all inputs that are now excluded produced redundant
314
    /// outputs which are still possible to produce using the reduced input
315
    /// space.
316
    ///
317
    /// [iterator-size-hint]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.size_hint
318
    /// [`try_size_hint`]: Arbitrary::try_size_hint
319
    #[inline]
320
0
    fn size_hint(depth: usize) -> (usize, Option<usize>) {
321
0
        let _ = depth;
322
0
        (0, None)
323
0
    }
324
325
    /// Get a size hint for how many bytes out of an `Unstructured` this type
326
    /// needs to construct itself.
327
    ///
328
    /// Unlike [`size_hint`], this function keeps the information that the
329
    /// recursion limit was reached. This is required to "short circuit" the
330
    /// calculation and avoid exponential blowup with recursive structures.
331
    ///
332
    /// If you are implementing [`size_hint`] for a struct that could be
333
    /// recursive, you should implement `try_size_hint` and call the
334
    /// `try_size_hint` when recursing
335
    ///
336
    ///
337
    /// The return value is similar to [`core::iter::Iterator::size_hint`]: it
338
    /// returns a tuple where the first element is a lower bound on the number
339
    /// of bytes required, and the second element is an optional upper bound.
340
    ///
341
    /// The default implementation returns the value of [`size_hint`] which is
342
    /// correct for any type, but might lead to exponential blowup when dealing
343
    /// with recursive types.
344
    ///
345
    /// ## Invariant
346
    ///
347
    /// It must be possible to construct every possible output using only inputs
348
    /// of lengths bounded by these parameters. This applies to both
349
    /// [`Arbitrary::arbitrary`] and [`Arbitrary::arbitrary_take_rest`].
350
    ///
351
    /// This is trivially true for `(0, None)`. To restrict this further, it
352
    /// must be proven that all inputs that are now excluded produced redundant
353
    /// outputs which are still possible to produce using the reduced input
354
    /// space.
355
    ///
356
    /// ## When to implement `try_size_hint`
357
    ///
358
    /// If you 100% know that the type you are implementing `Arbitrary` for is
359
    /// not a recursive type, or your implementation is not transitively calling
360
    /// any other `size_hint` methods, you may implement [`size_hint`], and the
361
    /// default `try_size_hint` implementation will use it.
362
    ///
363
    /// Note that if you are implementing `Arbitrary` for a generic type, you
364
    /// cannot guarantee the lack of type recursion!
365
    ///
366
    /// Otherwise, when there is possible type recursion, you should implement
367
    /// `try_size_hint` instead.
368
    ///
369
    /// ## The `depth` parameter
370
    ///
371
    /// When implementing `try_size_hint`, you need to use
372
    /// [`arbitrary::size_hint::try_recursion_guard(depth)`][crate::size_hint::try_recursion_guard]
373
    /// to prevent potential infinite recursion when calculating size hints for
374
    /// potentially recursive types:
375
    ///
376
    /// ```
377
    /// use arbitrary::{size_hint, Arbitrary, MaxRecursionReached, Unstructured};
378
    ///
379
    /// // This can potentially be a recursive type if `L` or `R` contain
380
    /// // something like `Box<Option<MyEither<L, R>>>`!
381
    /// enum MyEither<L, R> {
382
    ///     Left(L),
383
    ///     Right(R),
384
    /// }
385
    ///
386
    /// impl<'a, L, R> Arbitrary<'a> for MyEither<L, R>
387
    /// where
388
    ///     L: Arbitrary<'a>,
389
    ///     R: Arbitrary<'a>,
390
    /// {
391
    ///     fn arbitrary(u: &mut Unstructured) -> arbitrary::Result<Self> {
392
    ///         // ...
393
    /// #       unimplemented!()
394
    ///     }
395
    ///
396
    ///     fn size_hint(depth: usize) -> (usize, Option<usize>) {
397
    ///         // Return the value of `try_size_hint`
398
    ///         //
399
    ///         // If the recursion fails, return the default `(0, None)` range,
400
    ///         // which is always valid.
401
    ///         Self::try_size_hint(depth).unwrap_or_default()
402
    ///     }
403
    ///
404
    ///     fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
405
    ///         // Protect against potential infinite recursion with
406
    ///         // `try_recursion_guard`.
407
    ///         size_hint::try_recursion_guard(depth, |depth| {
408
    ///             // If we aren't too deep, then `recursion_guard` calls
409
    ///             // this closure, which implements the natural size hint.
410
    ///             // Don't forget to use the new `depth` in all nested
411
    ///             // `try_size_hint` calls! We recommend shadowing the
412
    ///             // parameter, like what is done here, so that you can't
413
    ///             // accidentally use the wrong depth.
414
    ///             Ok(size_hint::or(
415
    ///                 <L as Arbitrary>::try_size_hint(depth)?,
416
    ///                 <R as Arbitrary>::try_size_hint(depth)?,
417
    ///             ))
418
    ///         })
419
    ///     }
420
    /// }
421
    /// ```
422
    #[inline]
423
0
    fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
424
0
        Ok(Self::size_hint(depth))
425
0
    }
426
}
427
428
#[cfg(test)]
429
mod test {
430
    use super::*;
431
432
    #[test]
433
    fn exhausted_entropy() {
434
        let mut u = Unstructured::new(&[]);
435
        assert_eq!(u.arbitrary::<bool>().unwrap(), false);
436
        assert_eq!(u.arbitrary::<u8>().unwrap(), 0);
437
        assert_eq!(u.arbitrary::<usize>().unwrap(), 0);
438
        assert_eq!(u.arbitrary::<f32>().unwrap(), 0.0);
439
        assert_eq!(u.arbitrary::<f64>().unwrap(), 0.0);
440
        assert_eq!(u.arbitrary::<Option<u32>>().unwrap(), None);
441
        assert_eq!(u.int_in_range(4..=100).unwrap(), 4);
442
        assert_eq!(u.choose_index(10).unwrap(), 0);
443
        assert_eq!(u.ratio(5, 7).unwrap(), true);
444
    }
445
}
446
447
/// Multiple conflicting arbitrary attributes are used on the same field:
448
/// ```compile_fail
449
/// #[derive(::arbitrary::Arbitrary)]
450
/// struct Point {
451
///     #[arbitrary(value = 2)]
452
///     #[arbitrary(value = 2)]
453
///     x: i32,
454
/// }
455
/// ```
456
///
457
/// An unknown attribute:
458
/// ```compile_fail
459
/// #[derive(::arbitrary::Arbitrary)]
460
/// struct Point {
461
///     #[arbitrary(unknown_attr)]
462
///     x: i32,
463
/// }
464
/// ```
465
///
466
/// An unknown attribute with a value:
467
/// ```compile_fail
468
/// #[derive(::arbitrary::Arbitrary)]
469
/// struct Point {
470
///     #[arbitrary(unknown_attr = 13)]
471
///     x: i32,
472
/// }
473
/// ```
474
///
475
/// `value` without RHS:
476
/// ```compile_fail
477
/// #[derive(::arbitrary::Arbitrary)]
478
/// struct Point {
479
///     #[arbitrary(value)]
480
///     x: i32,
481
/// }
482
/// ```
483
///
484
/// `with` without RHS:
485
/// ```compile_fail
486
/// #[derive(::arbitrary::Arbitrary)]
487
/// struct Point {
488
///     #[arbitrary(with)]
489
///     x: i32,
490
/// }
491
/// ```
492
///
493
/// Multiple conflicting bounds at the container-level:
494
/// ```compile_fail
495
/// #[derive(::arbitrary::Arbitrary)]
496
/// #[arbitrary(bound = "T: Default")]
497
/// #[arbitrary(bound = "T: Default")]
498
/// struct Point<T: Default> {
499
///     #[arbitrary(default)]
500
///     x: T,
501
/// }
502
/// ```
503
///
504
/// Multiple conflicting bounds in a single bound attribute:
505
/// ```compile_fail
506
/// #[derive(::arbitrary::Arbitrary)]
507
/// #[arbitrary(bound = "T: Default, T: Default")]
508
/// struct Point<T: Default> {
509
///     #[arbitrary(default)]
510
///     x: T,
511
/// }
512
/// ```
513
///
514
/// Multiple conflicting bounds in multiple bound attributes:
515
/// ```compile_fail
516
/// #[derive(::arbitrary::Arbitrary)]
517
/// #[arbitrary(bound = "T: Default", bound = "T: Default")]
518
/// struct Point<T: Default> {
519
///     #[arbitrary(default)]
520
///     x: T,
521
/// }
522
/// ```
523
///
524
/// Too many bounds supplied:
525
/// ```compile_fail
526
/// #[derive(::arbitrary::Arbitrary)]
527
/// #[arbitrary(bound = "T: Default")]
528
/// struct Point {
529
///     x: i32,
530
/// }
531
/// ```
532
///
533
/// Too many bounds supplied across multiple attributes:
534
/// ```compile_fail
535
/// #[derive(::arbitrary::Arbitrary)]
536
/// #[arbitrary(bound = "T: Default")]
537
/// #[arbitrary(bound = "U: Default")]
538
/// struct Point<T: Default> {
539
///     #[arbitrary(default)]
540
///     x: T,
541
/// }
542
/// ```
543
///
544
/// Attempt to use the derive attribute on an enum variant:
545
/// ```compile_fail
546
/// #[derive(::arbitrary::Arbitrary)]
547
/// enum Enum<T: Default> {
548
///     #[arbitrary(default)]
549
///     Variant(T),
550
/// }
551
/// ```
552
#[cfg(all(doctest, feature = "derive"))]
553
pub struct CompileFailTests;
554
555
// Support for `#[derive(Arbitrary)]`.
556
#[doc(hidden)]
557
#[cfg(feature = "derive")]
558
pub mod details {
559
    use super::*;
560
561
    // Hidden trait that papers over the difference between `&mut Unstructured` and
562
    // `Unstructured` arguments so that `with_recursive_count` can be used for both
563
    // `arbitrary` and `arbitrary_take_rest`.
564
    pub trait IsEmpty {
565
        fn is_empty(&self) -> bool;
566
    }
567
568
    impl IsEmpty for Unstructured<'_> {
569
        fn is_empty(&self) -> bool {
570
            Unstructured::is_empty(self)
571
        }
572
    }
573
574
    impl IsEmpty for &mut Unstructured<'_> {
575
        fn is_empty(&self) -> bool {
576
            Unstructured::is_empty(self)
577
        }
578
    }
579
580
    // Calls `f` with a recursive count guard.
581
    #[inline]
582
    pub fn with_recursive_count<U: IsEmpty, R>(
583
        u: U,
584
        recursive_count: &'static std::thread::LocalKey<std::cell::Cell<u32>>,
585
        f: impl FnOnce(U) -> Result<R>,
586
    ) -> Result<R> {
587
        let guard_against_recursion = u.is_empty();
588
        if guard_against_recursion {
589
            recursive_count.with(|count| {
590
                if count.get() > 0 {
591
                    return Err(Error::NotEnoughData);
592
                }
593
                count.set(count.get() + 1);
594
                Ok(())
595
            })?;
596
        }
597
598
        let result = f(u);
599
600
        if guard_against_recursion {
601
            recursive_count.with(|count| {
602
                count.set(count.get() - 1);
603
            });
604
        }
605
606
        result
607
    }
608
}