Coverage Report

Created: 2024-09-08 06:35

/rust/registry/src/index.crates.io-6f17d22bba15001f/arbitrary-1.1.6/src/unstructured.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright © 2019 The Rust Fuzz Project Developers.
2
//
3
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6
// option. This file may not be copied, modified, or distributed
7
// except according to those terms.
8
9
//! Wrappers around raw, unstructured bytes.
10
11
use crate::{Arbitrary, Error, Result};
12
use std::marker::PhantomData;
13
use std::ops::ControlFlow;
14
use std::{mem, ops};
15
16
/// A source of unstructured data.
17
///
18
/// An `Unstructured` helps `Arbitrary` implementations interpret raw data
19
/// (typically provided by a fuzzer) as a "DNA string" that describes how to
20
/// construct the `Arbitrary` type. The goal is that a small change to the "DNA
21
/// string" (the raw data wrapped by an `Unstructured`) results in a small
22
/// change to the generated `Arbitrary` instance. This helps a fuzzer
23
/// efficiently explore the `Arbitrary`'s input space.
24
///
25
/// `Unstructured` is deterministic: given the same raw data, the same series of
26
/// API calls will return the same results (modulo system resource constraints,
27
/// like running out of memory). However, `Unstructured` does not guarantee
28
/// anything beyond that: it makes not guarantee that it will yield bytes from
29
/// the underlying data in any particular order.
30
///
31
/// You shouldn't generally need to use an `Unstructured` unless you are writing
32
/// a custom `Arbitrary` implementation by hand, instead of deriving it. Mostly,
33
/// you should just be passing it through to nested `Arbitrary::arbitrary`
34
/// calls.
35
///
36
/// # Example
37
///
38
/// Imagine you were writing a color conversion crate. You might want to write
39
/// fuzz tests that take a random RGB color and assert various properties, run
40
/// functions and make sure nothing panics, etc.
41
///
42
/// Below is what translating the fuzzer's raw input into an `Unstructured` and
43
/// using that to generate an arbitrary RGB color might look like:
44
///
45
/// ```
46
/// # #[cfg(feature = "derive")] fn foo() {
47
/// use arbitrary::{Arbitrary, Unstructured};
48
///
49
/// /// An RGB color.
50
/// #[derive(Arbitrary)]
51
/// pub struct Rgb {
52
///     r: u8,
53
///     g: u8,
54
///     b: u8,
55
/// }
56
///
57
/// // Get the raw bytes from the fuzzer.
58
/// #   let get_input_from_fuzzer = || &[];
59
/// let raw_data: &[u8] = get_input_from_fuzzer();
60
///
61
/// // Wrap it in an `Unstructured`.
62
/// let mut unstructured = Unstructured::new(raw_data);
63
///
64
/// // Generate an `Rgb` color and run our checks.
65
/// if let Ok(rgb) = Rgb::arbitrary(&mut unstructured) {
66
/// #   let run_my_color_conversion_checks = |_| {};
67
///     run_my_color_conversion_checks(rgb);
68
/// }
69
/// # }
70
/// ```
71
pub struct Unstructured<'a> {
72
    data: &'a [u8],
73
}
74
75
impl<'a> Unstructured<'a> {
76
    /// Create a new `Unstructured` from the given raw data.
77
    ///
78
    /// # Example
79
    ///
80
    /// ```
81
    /// use arbitrary::Unstructured;
82
    ///
83
    /// let u = Unstructured::new(&[1, 2, 3, 4]);
84
    /// ```
85
0
    pub fn new(data: &'a [u8]) -> Self {
86
0
        Unstructured { data }
87
0
    }
88
89
    /// Get the number of remaining bytes of underlying data that are still
90
    /// available.
91
    ///
92
    /// # Example
93
    ///
94
    /// ```
95
    /// use arbitrary::{Arbitrary, Unstructured};
96
    ///
97
    /// let mut u = Unstructured::new(&[1, 2, 3]);
98
    ///
99
    /// // Initially have three bytes of data.
100
    /// assert_eq!(u.len(), 3);
101
    ///
102
    /// // Generating a `bool` consumes one byte from the underlying data, so
103
    /// // we are left with two bytes afterwards.
104
    /// let _ = bool::arbitrary(&mut u);
105
    /// assert_eq!(u.len(), 2);
106
    /// ```
107
    #[inline]
108
0
    pub fn len(&self) -> usize {
109
0
        self.data.len()
110
0
    }
111
112
    /// Is the underlying unstructured data exhausted?
113
    ///
114
    /// `unstructured.is_empty()` is the same as `unstructured.len() == 0`.
115
    ///
116
    /// # Example
117
    ///
118
    /// ```
119
    /// use arbitrary::{Arbitrary, Unstructured};
120
    ///
121
    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
122
    ///
123
    /// // Initially, we are not empty.
124
    /// assert!(!u.is_empty());
125
    ///
126
    /// // Generating a `u32` consumes all four bytes of the underlying data, so
127
    /// // we become empty afterwards.
128
    /// let _ = u32::arbitrary(&mut u);
129
    /// assert!(u.is_empty());
130
    /// ```
131
    #[inline]
132
0
    pub fn is_empty(&self) -> bool {
133
0
        self.len() == 0
134
0
    }
135
136
    /// Generate an arbitrary instance of `A`.
137
    ///
138
    /// This is simply a helper method that is equivalent to `<A as
139
    /// Arbitrary>::arbitrary(self)`. This helper is a little bit more concise,
140
    /// and can be used in situations where Rust's type inference will figure
141
    /// out what `A` should be.
142
    ///
143
    /// # Example
144
    ///
145
    /// ```
146
    /// # #[cfg(feature="derive")] fn foo() -> arbitrary::Result<()> {
147
    /// use arbitrary::{Arbitrary, Unstructured};
148
    ///
149
    /// #[derive(Arbitrary)]
150
    /// struct MyType {
151
    ///     // ...
152
    /// }
153
    ///
154
    /// fn do_stuff(value: MyType) {
155
    /// #   let _ = value;
156
    ///     // ...
157
    /// }
158
    ///
159
    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
160
    ///
161
    /// // Rust's type inference can figure out that `value` should be of type
162
    /// // `MyType` here:
163
    /// let value = u.arbitrary()?;
164
    /// do_stuff(value);
165
    /// # Ok(()) }
166
    /// ```
167
0
    pub fn arbitrary<A>(&mut self) -> Result<A>
168
0
    where
169
0
        A: Arbitrary<'a>,
170
0
    {
171
0
        <A as Arbitrary<'a>>::arbitrary(self)
172
0
    }
173
174
    /// Get the number of elements to insert when building up a collection of
175
    /// arbitrary `ElementType`s.
176
    ///
177
    /// This uses the [`<ElementType as
178
    /// Arbitrary>::size_hint`][crate::Arbitrary::size_hint] method to smartly
179
    /// choose a length such that we most likely have enough underlying bytes to
180
    /// construct that many arbitrary `ElementType`s.
181
    ///
182
    /// This should only be called within an `Arbitrary` implementation.
183
    ///
184
    /// # Example
185
    ///
186
    /// ```
187
    /// use arbitrary::{Arbitrary, Result, Unstructured};
188
    /// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> }
189
    /// # impl<T> MyCollection<T> {
190
    /// #     pub fn with_capacity(capacity: usize) -> Self { MyCollection { _t: std::marker::PhantomData } }
191
    /// #     pub fn insert(&mut self, element: T) {}
192
    /// # }
193
    ///
194
    /// impl<'a, T> Arbitrary<'a> for MyCollection<T>
195
    /// where
196
    ///     T: Arbitrary<'a>,
197
    /// {
198
    ///     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
199
    ///         // Get the number of `T`s we should insert into our collection.
200
    ///         let len = u.arbitrary_len::<T>()?;
201
    ///
202
    ///         // And then create a collection of that length!
203
    ///         let mut my_collection = MyCollection::with_capacity(len);
204
    ///         for _ in 0..len {
205
    ///             let element = T::arbitrary(u)?;
206
    ///             my_collection.insert(element);
207
    ///         }
208
    ///
209
    ///         Ok(my_collection)
210
    ///     }
211
    /// }
212
    /// ```
213
0
    pub fn arbitrary_len<ElementType>(&mut self) -> Result<usize>
214
0
    where
215
0
        ElementType: Arbitrary<'a>,
216
0
    {
217
0
        let byte_size = self.arbitrary_byte_size()?;
218
0
        let (lower, upper) = <ElementType as Arbitrary>::size_hint(0);
219
0
        let elem_size = upper.unwrap_or_else(|| lower * 2);
220
0
        let elem_size = std::cmp::max(1, elem_size);
221
0
        Ok(byte_size / elem_size)
222
0
    }
223
224
0
    fn arbitrary_byte_size(&mut self) -> Result<usize> {
225
0
        if self.data.is_empty() {
226
0
            Ok(0)
227
0
        } else if self.data.len() == 1 {
228
0
            self.data = &[];
229
0
            Ok(0)
230
        } else {
231
            // Take lengths from the end of the data, since the `libFuzzer` folks
232
            // found that this lets fuzzers more efficiently explore the input
233
            // space.
234
            //
235
            // https://github.com/rust-fuzz/libfuzzer-sys/blob/0c450753/libfuzzer/utils/FuzzedDataProvider.h#L92-L97
236
237
            // We only consume as many bytes as necessary to cover the entire
238
            // range of the byte string.
239
            // Note: We cast to u64 so we don't overflow when checking std::u32::MAX + 4 on 32-bit archs
240
0
            let len = if self.data.len() as u64 <= std::u8::MAX as u64 + 1 {
241
0
                let bytes = 1;
242
0
                let max_size = self.data.len() - bytes;
243
0
                let (rest, for_size) = self.data.split_at(max_size);
244
0
                self.data = rest;
245
0
                Self::int_in_range_impl(0..=max_size as u8, for_size.iter().copied())?.0 as usize
246
0
            } else if self.data.len() as u64 <= std::u16::MAX as u64 + 2 {
247
0
                let bytes = 2;
248
0
                let max_size = self.data.len() - bytes;
249
0
                let (rest, for_size) = self.data.split_at(max_size);
250
0
                self.data = rest;
251
0
                Self::int_in_range_impl(0..=max_size as u16, for_size.iter().copied())?.0 as usize
252
0
            } else if self.data.len() as u64 <= std::u32::MAX as u64 + 4 {
253
0
                let bytes = 4;
254
0
                let max_size = self.data.len() - bytes;
255
0
                let (rest, for_size) = self.data.split_at(max_size);
256
0
                self.data = rest;
257
0
                Self::int_in_range_impl(0..=max_size as u32, for_size.iter().copied())?.0 as usize
258
            } else {
259
0
                let bytes = 8;
260
0
                let max_size = self.data.len() - bytes;
261
0
                let (rest, for_size) = self.data.split_at(max_size);
262
0
                self.data = rest;
263
0
                Self::int_in_range_impl(0..=max_size as u64, for_size.iter().copied())?.0 as usize
264
            };
265
266
0
            Ok(len)
267
        }
268
0
    }
269
270
    /// Generate an integer within the given range.
271
    ///
272
    /// Do not use this to generate the size of a collection. Use
273
    /// `arbitrary_len` instead.
274
    ///
275
    /// # Panics
276
    ///
277
    /// Panics if `range.start > range.end`. That is, the given range must be
278
    /// non-empty.
279
    ///
280
    /// # Example
281
    ///
282
    /// ```
283
    /// use arbitrary::{Arbitrary, Unstructured};
284
    ///
285
    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
286
    ///
287
    /// let x: i32 = u.int_in_range(-5_000..=-1_000)
288
    ///     .expect("constructed `u` with enough bytes to generate an `i32`");
289
    ///
290
    /// assert!(-5_000 <= x);
291
    /// assert!(x <= -1_000);
292
    /// ```
293
0
    pub fn int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T>
294
0
    where
295
0
        T: Int,
296
0
    {
297
0
        let (result, bytes_consumed) = Self::int_in_range_impl(range, self.data.iter().cloned())?;
298
0
        self.data = &self.data[bytes_consumed..];
299
0
        Ok(result)
300
0
    }
Unexecuted instantiation: <arbitrary::unstructured::Unstructured>::int_in_range::<usize>
Unexecuted instantiation: <arbitrary::unstructured::Unstructured>::int_in_range::<u32>
301
302
0
    fn int_in_range_impl<T>(
303
0
        range: ops::RangeInclusive<T>,
304
0
        mut bytes: impl Iterator<Item = u8>,
305
0
    ) -> Result<(T, usize)>
306
0
    where
307
0
        T: Int,
308
0
    {
309
0
        let start = *range.start();
310
0
        let end = *range.end();
311
0
        assert!(
312
0
            start <= end,
313
0
            "`arbitrary::Unstructured::int_in_range` requires a non-empty range"
314
0
        );
315
316
        // When there is only one possible choice, don't waste any entropy from
317
        // the underlying data.
318
0
        if start == end {
319
0
            return Ok((start, 0));
320
0
        }
321
0
322
0
        // From here on out we work with the unsigned representation. All of the
323
0
        // operations performed below work out just as well whether or not `T`
324
0
        // is a signed or unsigned integer.
325
0
        let start = start.to_unsigned();
326
0
        let end = end.to_unsigned();
327
0
328
0
        let delta = end.wrapping_sub(start);
329
0
        debug_assert_ne!(delta, T::Unsigned::ZERO);
330
331
        // Compute an arbitrary integer offset from the start of the range. We
332
        // do this by consuming `size_of(T)` bytes from the input to create an
333
        // arbitrary integer and then clamping that int into our range bounds
334
        // with a modulo operation.
335
0
        let mut arbitrary_int = T::Unsigned::ZERO;
336
0
        let mut bytes_consumed: usize = 0;
337
338
0
        while (bytes_consumed < mem::size_of::<T>())
339
0
            && (delta >> T::Unsigned::from_usize(bytes_consumed * 8)) > T::Unsigned::ZERO
340
        {
341
0
            let byte = match bytes.next() {
342
0
                None => break,
343
0
                Some(b) => b,
344
0
            };
345
0
            bytes_consumed += 1;
346
0
347
0
            // Combine this byte into our arbitrary integer, but avoid
348
0
            // overflowing the shift for `u8` and `i8`.
349
0
            arbitrary_int = if mem::size_of::<T>() == 1 {
350
0
                T::Unsigned::from_u8(byte)
351
            } else {
352
0
                (arbitrary_int << 8) | T::Unsigned::from_u8(byte)
353
            };
354
        }
355
356
0
        let offset = if delta == T::Unsigned::MAX {
357
0
            arbitrary_int
358
        } else {
359
0
            arbitrary_int % (delta.checked_add(T::Unsigned::ONE).unwrap())
360
        };
361
362
        // Finally, we add `start` to our offset from `start` to get the result
363
        // actual value within the range.
364
0
        let result = start.wrapping_add(offset);
365
0
366
0
        // And convert back to our maybe-signed representation.
367
0
        let result = T::from_unsigned(result);
368
0
        debug_assert!(*range.start() <= result);
369
0
        debug_assert!(result <= *range.end());
370
371
0
        Ok((result, bytes_consumed))
372
0
    }
Unexecuted instantiation: <arbitrary::unstructured::Unstructured>::int_in_range_impl::<u8, core::iter::adapters::copied::Copied<core::slice::iter::Iter<u8>>>
Unexecuted instantiation: <arbitrary::unstructured::Unstructured>::int_in_range_impl::<usize, core::iter::adapters::cloned::Cloned<core::slice::iter::Iter<u8>>>
Unexecuted instantiation: <arbitrary::unstructured::Unstructured>::int_in_range_impl::<u32, core::iter::adapters::cloned::Cloned<core::slice::iter::Iter<u8>>>
Unexecuted instantiation: <arbitrary::unstructured::Unstructured>::int_in_range_impl::<u32, core::iter::adapters::copied::Copied<core::slice::iter::Iter<u8>>>
Unexecuted instantiation: <arbitrary::unstructured::Unstructured>::int_in_range_impl::<u16, core::iter::adapters::copied::Copied<core::slice::iter::Iter<u8>>>
Unexecuted instantiation: <arbitrary::unstructured::Unstructured>::int_in_range_impl::<u64, core::iter::adapters::copied::Copied<core::slice::iter::Iter<u8>>>
373
374
    /// Choose one of the given choices.
375
    ///
376
    /// This should only be used inside of `Arbitrary` implementations.
377
    ///
378
    /// Returns an error if there is not enough underlying data to make a
379
    /// choice or if no choices are provided.
380
    ///
381
    /// # Examples
382
    ///
383
    /// Selecting from an array of choices:
384
    ///
385
    /// ```
386
    /// use arbitrary::Unstructured;
387
    ///
388
    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
389
    /// let choices = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
390
    ///
391
    /// let choice = u.choose(&choices).unwrap();
392
    ///
393
    /// println!("chose {}", choice);
394
    /// ```
395
    ///
396
    /// An error is returned if no choices are provided:
397
    ///
398
    /// ```
399
    /// use arbitrary::Unstructured;
400
    ///
401
    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
402
    /// let choices: [char; 0] = [];
403
    ///
404
    /// let result = u.choose(&choices);
405
    ///
406
    /// assert!(result.is_err());
407
    /// ```
408
0
    pub fn choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T> {
409
0
        let idx = self.choose_index(choices.len())?;
410
0
        Ok(&choices[idx])
411
0
    }
412
413
    /// Choose a value in `0..len`.
414
    ///
415
    /// Returns an error if the `len` is zero.
416
    ///
417
    /// # Examples
418
    ///
419
    /// Using Fisher–Yates shuffle shuffle to gerate an arbitrary permutation.
420
    ///
421
    /// [Fisher–Yates shuffle]: https://en.wikipedia.org/wiki/Fisher–Yates_shuffle
422
    ///
423
    /// ```
424
    /// use arbitrary::Unstructured;
425
    ///
426
    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
427
    /// let mut permutation = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
428
    /// let mut to_permute = &mut permutation[..];
429
    /// while to_permute.len() > 1 {
430
    ///     let idx = u.choose_index(to_permute.len()).unwrap();
431
    ///     to_permute.swap(0, idx);
432
    ///     to_permute = &mut to_permute[1..];
433
    /// }
434
    ///
435
    /// println!("permutation: {:?}", permutation);
436
    /// ```
437
    ///
438
    /// An error is returned if the length is zero:
439
    ///
440
    /// ```
441
    /// use arbitrary::Unstructured;
442
    ///
443
    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
444
    /// let array: [i32; 0] = [];
445
    ///
446
    /// let result = u.choose_index(array.len());
447
    ///
448
    /// assert!(result.is_err());
449
    /// ```
450
0
    pub fn choose_index(&mut self, len: usize) -> Result<usize> {
451
0
        if len == 0 {
452
0
            return Err(Error::EmptyChoose);
453
0
        }
454
0
        let idx = self.int_in_range(0..=len - 1)?;
455
0
        Ok(idx)
456
0
    }
457
458
    /// Generate a boolean according to the given ratio.
459
    ///
460
    /// # Panics
461
    ///
462
    /// Panics when the numerator and denominator do not meet these constraints:
463
    ///
464
    /// * `0 < numerator <= denominator`
465
    ///
466
    /// # Example
467
    ///
468
    /// Generate a boolean that is `true` five sevenths of the time:
469
    ///
470
    /// ```
471
    /// # fn foo() -> arbitrary::Result<()> {
472
    /// use arbitrary::Unstructured;
473
    ///
474
    /// # let my_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0];
475
    /// let mut u = Unstructured::new(&my_data);
476
    ///
477
    /// if u.ratio(5, 7)? {
478
    ///     // Take this branch 5/7 of the time.
479
    /// }
480
    /// # Ok(())
481
    /// # }
482
    /// ```
483
0
    pub fn ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool>
484
0
    where
485
0
        T: Int,
486
0
    {
487
0
        assert!(T::ZERO < numerator);
488
0
        assert!(numerator <= denominator);
489
0
        let x = self.int_in_range(T::ONE..=denominator)?;
490
0
        Ok(x <= numerator)
491
0
    }
492
493
    /// Fill a `buffer` with bytes from the underlying raw data.
494
    ///
495
    /// This should only be called within an `Arbitrary` implementation. This is
496
    /// a very low-level operation. You should generally prefer calling nested
497
    /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
498
    /// `String::arbitrary` over using this method directly.
499
    ///
500
    /// If this `Unstructured` does not have enough underlying data to fill the
501
    /// whole `buffer`, it pads the buffer out with zeros.
502
    ///
503
    /// # Example
504
    ///
505
    /// ```
506
    /// use arbitrary::Unstructured;
507
    ///
508
    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
509
    ///
510
    /// let mut buf = [0; 2];
511
    ///
512
    /// assert!(u.fill_buffer(&mut buf).is_ok());
513
    /// assert_eq!(buf, [1, 2]);
514
    ///
515
    /// assert!(u.fill_buffer(&mut buf).is_ok());
516
    /// assert_eq!(buf, [3, 4]);
517
    ///
518
    /// assert!(u.fill_buffer(&mut buf).is_ok());
519
    /// assert_eq!(buf, [0, 0]);
520
    /// ```
521
0
    pub fn fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()> {
522
0
        let n = std::cmp::min(buffer.len(), self.data.len());
523
0
        buffer[..n].copy_from_slice(&self.data[..n]);
524
0
        for byte in buffer[n..].iter_mut() {
525
0
            *byte = 0;
526
0
        }
527
0
        self.data = &self.data[n..];
528
0
        Ok(())
529
0
    }
530
531
    /// Provide `size` bytes from the underlying raw data.
532
    ///
533
    /// This should only be called within an `Arbitrary` implementation. This is
534
    /// a very low-level operation. You should generally prefer calling nested
535
    /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
536
    /// `String::arbitrary` over using this method directly.
537
    ///
538
    /// # Example
539
    ///
540
    /// ```
541
    /// use arbitrary::Unstructured;
542
    ///
543
    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
544
    ///
545
    /// assert!(u.bytes(2).unwrap() == &[1, 2]);
546
    /// assert!(u.bytes(2).unwrap() == &[3, 4]);
547
    /// ```
548
0
    pub fn bytes(&mut self, size: usize) -> Result<&'a [u8]> {
549
0
        if self.data.len() < size {
550
0
            return Err(Error::NotEnoughData);
551
0
        }
552
0
553
0
        let (for_buf, rest) = self.data.split_at(size);
554
0
        self.data = rest;
555
0
        Ok(for_buf)
556
0
    }
557
558
    /// Peek at `size` number of bytes of the underlying raw input.
559
    ///
560
    /// Does not consume the bytes, only peeks at them.
561
    ///
562
    /// Returns `None` if there are not `size` bytes left in the underlying raw
563
    /// input.
564
    ///
565
    /// # Example
566
    ///
567
    /// ```
568
    /// use arbitrary::Unstructured;
569
    ///
570
    /// let u = Unstructured::new(&[1, 2, 3]);
571
    ///
572
    /// assert_eq!(u.peek_bytes(0).unwrap(), []);
573
    /// assert_eq!(u.peek_bytes(1).unwrap(), [1]);
574
    /// assert_eq!(u.peek_bytes(2).unwrap(), [1, 2]);
575
    /// assert_eq!(u.peek_bytes(3).unwrap(), [1, 2, 3]);
576
    ///
577
    /// assert!(u.peek_bytes(4).is_none());
578
    /// ```
579
0
    pub fn peek_bytes(&self, size: usize) -> Option<&'a [u8]> {
580
0
        self.data.get(..size)
581
0
    }
582
583
    /// Consume all of the rest of the remaining underlying bytes.
584
    ///
585
    /// Returns a slice of all the remaining, unconsumed bytes.
586
    ///
587
    /// # Example
588
    ///
589
    /// ```
590
    /// use arbitrary::Unstructured;
591
    ///
592
    /// let mut u = Unstructured::new(&[1, 2, 3]);
593
    ///
594
    /// let mut remaining = u.take_rest();
595
    ///
596
    /// assert_eq!(remaining, [1, 2, 3]);
597
    /// ```
598
0
    pub fn take_rest(mut self) -> &'a [u8] {
599
0
        mem::replace(&mut self.data, &[])
600
0
    }
601
602
    /// Provide an iterator over elements for constructing a collection
603
    ///
604
    /// This is useful for implementing [`Arbitrary::arbitrary`] on collections
605
    /// since the implementation is simply `u.arbitrary_iter()?.collect()`
606
0
    pub fn arbitrary_iter<'b, ElementType: Arbitrary<'a>>(
607
0
        &'b mut self,
608
0
    ) -> Result<ArbitraryIter<'a, 'b, ElementType>> {
609
0
        Ok(ArbitraryIter {
610
0
            u: &mut *self,
611
0
            _marker: PhantomData,
612
0
        })
613
0
    }
614
615
    /// Provide an iterator over elements for constructing a collection from
616
    /// all the remaining bytes.
617
    ///
618
    /// This is useful for implementing [`Arbitrary::arbitrary_take_rest`] on collections
619
    /// since the implementation is simply `u.arbitrary_take_rest_iter()?.collect()`
620
0
    pub fn arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>(
621
0
        self,
622
0
    ) -> Result<ArbitraryTakeRestIter<'a, ElementType>> {
623
0
        let (lower, upper) = ElementType::size_hint(0);
624
0
625
0
        let elem_size = upper.unwrap_or(lower * 2);
626
0
        let elem_size = std::cmp::max(1, elem_size);
627
0
        let size = self.len() / elem_size;
628
0
        Ok(ArbitraryTakeRestIter {
629
0
            size,
630
0
            u: Some(self),
631
0
            _marker: PhantomData,
632
0
        })
633
0
    }
634
635
    /// Call the given function an arbitrary number of times.
636
    ///
637
    /// The function is given this `Unstructured` so that it can continue to
638
    /// generate arbitrary data and structures.
639
    ///
640
    /// You may optionaly specify minimum and maximum bounds on the number of
641
    /// times the function is called.
642
    ///
643
    /// You may break out of the loop early by returning
644
    /// `Ok(std::ops::ControlFlow::Break)`. To continue the loop, return
645
    /// `Ok(std::ops::ControlFlow::Continue)`.
646
    ///
647
    /// # Panics
648
    ///
649
    /// Panics if `min > max`.
650
    ///
651
    /// # Example
652
    ///
653
    /// Call a closure that generates an arbitrary type inside a context an
654
    /// arbitrary number of times:
655
    ///
656
    /// ```
657
    /// use arbitrary::{Result, Unstructured};
658
    /// use std::ops::ControlFlow;
659
    ///
660
    /// enum Type {
661
    ///     /// A boolean type.
662
    ///     Bool,
663
    ///
664
    ///     /// An integer type.
665
    ///     Int,
666
    ///
667
    ///     /// A list of the `i`th type in this type's context.
668
    ///     List(usize),
669
    /// }
670
    ///
671
    /// fn arbitrary_types_context(u: &mut Unstructured) -> Result<Vec<Type>> {
672
    ///     let mut context = vec![];
673
    ///
674
    ///     u.arbitrary_loop(Some(10), Some(20), |u| {
675
    ///         let num_choices = if context.is_empty() {
676
    ///             2
677
    ///         } else {
678
    ///             3
679
    ///         };
680
    ///         let ty = match u.int_in_range::<u8>(1..=num_choices)? {
681
    ///             1 => Type::Bool,
682
    ///             2 => Type::Int,
683
    ///             3 => Type::List(u.int_in_range(0..=context.len() - 1)?),
684
    ///             _ => unreachable!(),
685
    ///         };
686
    ///         context.push(ty);
687
    ///         Ok(ControlFlow::Continue(()))
688
    ///     })?;
689
    ///
690
    ///     // The number of loop iterations are constrained by the min/max
691
    ///     // bounds that we provided.
692
    ///     assert!(context.len() >= 10);
693
    ///     assert!(context.len() <= 20);
694
    ///
695
    ///     Ok(context)
696
    /// }
697
    /// ```
698
0
    pub fn arbitrary_loop(
699
0
        &mut self,
700
0
        min: Option<u32>,
701
0
        max: Option<u32>,
702
0
        mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>,
703
0
    ) -> Result<()> {
704
0
        let min = min.unwrap_or(0);
705
0
        let max = max.unwrap_or(u32::MAX);
706
0
707
0
        for _ in 0..self.int_in_range(min..=max)? {
708
0
            match f(self)? {
709
0
                ControlFlow::Continue(_) => continue,
710
0
                ControlFlow::Break(_) => break,
711
            }
712
        }
713
714
0
        Ok(())
715
0
    }
716
}
717
718
/// Utility iterator produced by [`Unstructured::arbitrary_iter`]
719
pub struct ArbitraryIter<'a, 'b, ElementType> {
720
    u: &'b mut Unstructured<'a>,
721
    _marker: PhantomData<ElementType>,
722
}
723
724
impl<'a, 'b, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, 'b, ElementType> {
725
    type Item = Result<ElementType>;
726
0
    fn next(&mut self) -> Option<Result<ElementType>> {
727
0
        let keep_going = self.u.arbitrary().unwrap_or(false);
728
0
        if keep_going {
729
0
            Some(Arbitrary::arbitrary(self.u))
730
        } else {
731
0
            None
732
        }
733
0
    }
734
}
735
736
/// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`]
737
pub struct ArbitraryTakeRestIter<'a, ElementType> {
738
    u: Option<Unstructured<'a>>,
739
    size: usize,
740
    _marker: PhantomData<ElementType>,
741
}
742
743
impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> {
744
    type Item = Result<ElementType>;
745
0
    fn next(&mut self) -> Option<Result<ElementType>> {
746
0
        if let Some(mut u) = self.u.take() {
747
0
            if self.size == 1 {
748
0
                Some(Arbitrary::arbitrary_take_rest(u))
749
0
            } else if self.size == 0 {
750
0
                None
751
            } else {
752
0
                self.size -= 1;
753
0
                let ret = Arbitrary::arbitrary(&mut u);
754
0
                self.u = Some(u);
755
0
                Some(ret)
756
            }
757
        } else {
758
0
            None
759
        }
760
0
    }
761
}
762
763
/// A trait that is implemented for all of the primitive integers:
764
///
765
/// * `u8`
766
/// * `u16`
767
/// * `u32`
768
/// * `u64`
769
/// * `u128`
770
/// * `usize`
771
/// * `i8`
772
/// * `i16`
773
/// * `i32`
774
/// * `i64`
775
/// * `i128`
776
/// * `isize`
777
///
778
/// Don't implement this trait yourself.
779
pub trait Int:
780
    Copy
781
    + std::fmt::Debug
782
    + PartialOrd
783
    + Ord
784
    + ops::Sub<Self, Output = Self>
785
    + ops::Rem<Self, Output = Self>
786
    + ops::Shr<Self, Output = Self>
787
    + ops::Shl<usize, Output = Self>
788
    + ops::BitOr<Self, Output = Self>
789
{
790
    #[doc(hidden)]
791
    type Unsigned: Int;
792
793
    #[doc(hidden)]
794
    const ZERO: Self;
795
796
    #[doc(hidden)]
797
    const ONE: Self;
798
799
    #[doc(hidden)]
800
    const MAX: Self;
801
802
    #[doc(hidden)]
803
    fn from_u8(b: u8) -> Self;
804
805
    #[doc(hidden)]
806
    fn from_usize(u: usize) -> Self;
807
808
    #[doc(hidden)]
809
    fn checked_add(self, rhs: Self) -> Option<Self>;
810
811
    #[doc(hidden)]
812
    fn wrapping_add(self, rhs: Self) -> Self;
813
814
    #[doc(hidden)]
815
    fn wrapping_sub(self, rhs: Self) -> Self;
816
817
    #[doc(hidden)]
818
    fn to_unsigned(self) -> Self::Unsigned;
819
820
    #[doc(hidden)]
821
    fn from_unsigned(unsigned: Self::Unsigned) -> Self;
822
}
823
824
macro_rules! impl_int {
825
    ( $( $ty:ty : $unsigned_ty: ty ; )* ) => {
826
        $(
827
            impl Int for $ty {
828
                type Unsigned = $unsigned_ty;
829
830
                const ZERO: Self = 0;
831
832
                const ONE: Self = 1;
833
834
                const MAX: Self = Self::MAX;
835
836
0
                fn from_u8(b: u8) -> Self {
837
0
                    b as Self
838
0
                }
Unexecuted instantiation: <u8 as arbitrary::unstructured::Int>::from_u8
Unexecuted instantiation: <u16 as arbitrary::unstructured::Int>::from_u8
Unexecuted instantiation: <u32 as arbitrary::unstructured::Int>::from_u8
Unexecuted instantiation: <u64 as arbitrary::unstructured::Int>::from_u8
Unexecuted instantiation: <u128 as arbitrary::unstructured::Int>::from_u8
Unexecuted instantiation: <usize as arbitrary::unstructured::Int>::from_u8
Unexecuted instantiation: <i8 as arbitrary::unstructured::Int>::from_u8
Unexecuted instantiation: <i16 as arbitrary::unstructured::Int>::from_u8
Unexecuted instantiation: <i32 as arbitrary::unstructured::Int>::from_u8
Unexecuted instantiation: <i64 as arbitrary::unstructured::Int>::from_u8
Unexecuted instantiation: <i128 as arbitrary::unstructured::Int>::from_u8
Unexecuted instantiation: <isize as arbitrary::unstructured::Int>::from_u8
839
840
0
                fn from_usize(u: usize) -> Self {
841
0
                    u as Self
842
0
                }
Unexecuted instantiation: <u8 as arbitrary::unstructured::Int>::from_usize
Unexecuted instantiation: <u16 as arbitrary::unstructured::Int>::from_usize
Unexecuted instantiation: <u32 as arbitrary::unstructured::Int>::from_usize
Unexecuted instantiation: <u64 as arbitrary::unstructured::Int>::from_usize
Unexecuted instantiation: <u128 as arbitrary::unstructured::Int>::from_usize
Unexecuted instantiation: <usize as arbitrary::unstructured::Int>::from_usize
Unexecuted instantiation: <i8 as arbitrary::unstructured::Int>::from_usize
Unexecuted instantiation: <i16 as arbitrary::unstructured::Int>::from_usize
Unexecuted instantiation: <i32 as arbitrary::unstructured::Int>::from_usize
Unexecuted instantiation: <i64 as arbitrary::unstructured::Int>::from_usize
Unexecuted instantiation: <i128 as arbitrary::unstructured::Int>::from_usize
Unexecuted instantiation: <isize as arbitrary::unstructured::Int>::from_usize
843
844
0
                fn checked_add(self, rhs: Self) -> Option<Self> {
845
0
                    <$ty>::checked_add(self, rhs)
846
0
                }
Unexecuted instantiation: <u8 as arbitrary::unstructured::Int>::checked_add
Unexecuted instantiation: <u16 as arbitrary::unstructured::Int>::checked_add
Unexecuted instantiation: <u32 as arbitrary::unstructured::Int>::checked_add
Unexecuted instantiation: <u64 as arbitrary::unstructured::Int>::checked_add
Unexecuted instantiation: <u128 as arbitrary::unstructured::Int>::checked_add
Unexecuted instantiation: <usize as arbitrary::unstructured::Int>::checked_add
Unexecuted instantiation: <i8 as arbitrary::unstructured::Int>::checked_add
Unexecuted instantiation: <i16 as arbitrary::unstructured::Int>::checked_add
Unexecuted instantiation: <i32 as arbitrary::unstructured::Int>::checked_add
Unexecuted instantiation: <i64 as arbitrary::unstructured::Int>::checked_add
Unexecuted instantiation: <i128 as arbitrary::unstructured::Int>::checked_add
Unexecuted instantiation: <isize as arbitrary::unstructured::Int>::checked_add
847
848
0
                fn wrapping_add(self, rhs: Self) -> Self {
849
0
                    <$ty>::wrapping_add(self, rhs)
850
0
                }
Unexecuted instantiation: <u8 as arbitrary::unstructured::Int>::wrapping_add
Unexecuted instantiation: <u16 as arbitrary::unstructured::Int>::wrapping_add
Unexecuted instantiation: <u32 as arbitrary::unstructured::Int>::wrapping_add
Unexecuted instantiation: <u64 as arbitrary::unstructured::Int>::wrapping_add
Unexecuted instantiation: <u128 as arbitrary::unstructured::Int>::wrapping_add
Unexecuted instantiation: <usize as arbitrary::unstructured::Int>::wrapping_add
Unexecuted instantiation: <i8 as arbitrary::unstructured::Int>::wrapping_add
Unexecuted instantiation: <i16 as arbitrary::unstructured::Int>::wrapping_add
Unexecuted instantiation: <i32 as arbitrary::unstructured::Int>::wrapping_add
Unexecuted instantiation: <i64 as arbitrary::unstructured::Int>::wrapping_add
Unexecuted instantiation: <i128 as arbitrary::unstructured::Int>::wrapping_add
Unexecuted instantiation: <isize as arbitrary::unstructured::Int>::wrapping_add
851
852
0
                fn wrapping_sub(self, rhs: Self) -> Self {
853
0
                    <$ty>::wrapping_sub(self, rhs)
854
0
                }
Unexecuted instantiation: <u8 as arbitrary::unstructured::Int>::wrapping_sub
Unexecuted instantiation: <u16 as arbitrary::unstructured::Int>::wrapping_sub
Unexecuted instantiation: <u32 as arbitrary::unstructured::Int>::wrapping_sub
Unexecuted instantiation: <u64 as arbitrary::unstructured::Int>::wrapping_sub
Unexecuted instantiation: <u128 as arbitrary::unstructured::Int>::wrapping_sub
Unexecuted instantiation: <usize as arbitrary::unstructured::Int>::wrapping_sub
Unexecuted instantiation: <i8 as arbitrary::unstructured::Int>::wrapping_sub
Unexecuted instantiation: <i16 as arbitrary::unstructured::Int>::wrapping_sub
Unexecuted instantiation: <i32 as arbitrary::unstructured::Int>::wrapping_sub
Unexecuted instantiation: <i64 as arbitrary::unstructured::Int>::wrapping_sub
Unexecuted instantiation: <i128 as arbitrary::unstructured::Int>::wrapping_sub
Unexecuted instantiation: <isize as arbitrary::unstructured::Int>::wrapping_sub
855
856
0
                fn to_unsigned(self) -> Self::Unsigned {
857
0
                    self as $unsigned_ty
858
0
                }
Unexecuted instantiation: <u8 as arbitrary::unstructured::Int>::to_unsigned
Unexecuted instantiation: <u16 as arbitrary::unstructured::Int>::to_unsigned
Unexecuted instantiation: <u32 as arbitrary::unstructured::Int>::to_unsigned
Unexecuted instantiation: <u64 as arbitrary::unstructured::Int>::to_unsigned
Unexecuted instantiation: <u128 as arbitrary::unstructured::Int>::to_unsigned
Unexecuted instantiation: <usize as arbitrary::unstructured::Int>::to_unsigned
Unexecuted instantiation: <i8 as arbitrary::unstructured::Int>::to_unsigned
Unexecuted instantiation: <i16 as arbitrary::unstructured::Int>::to_unsigned
Unexecuted instantiation: <i32 as arbitrary::unstructured::Int>::to_unsigned
Unexecuted instantiation: <i64 as arbitrary::unstructured::Int>::to_unsigned
Unexecuted instantiation: <i128 as arbitrary::unstructured::Int>::to_unsigned
Unexecuted instantiation: <isize as arbitrary::unstructured::Int>::to_unsigned
859
860
0
                fn from_unsigned(unsigned: $unsigned_ty) -> Self {
861
0
                    unsigned as Self
862
0
                }
Unexecuted instantiation: <u8 as arbitrary::unstructured::Int>::from_unsigned
Unexecuted instantiation: <u16 as arbitrary::unstructured::Int>::from_unsigned
Unexecuted instantiation: <u32 as arbitrary::unstructured::Int>::from_unsigned
Unexecuted instantiation: <u64 as arbitrary::unstructured::Int>::from_unsigned
Unexecuted instantiation: <u128 as arbitrary::unstructured::Int>::from_unsigned
Unexecuted instantiation: <usize as arbitrary::unstructured::Int>::from_unsigned
Unexecuted instantiation: <i8 as arbitrary::unstructured::Int>::from_unsigned
Unexecuted instantiation: <i16 as arbitrary::unstructured::Int>::from_unsigned
Unexecuted instantiation: <i32 as arbitrary::unstructured::Int>::from_unsigned
Unexecuted instantiation: <i64 as arbitrary::unstructured::Int>::from_unsigned
Unexecuted instantiation: <i128 as arbitrary::unstructured::Int>::from_unsigned
Unexecuted instantiation: <isize as arbitrary::unstructured::Int>::from_unsigned
863
            }
864
        )*
865
    }
866
}
867
868
impl_int! {
869
    u8: u8;
870
    u16: u16;
871
    u32: u32;
872
    u64: u64;
873
    u128: u128;
874
    usize: usize;
875
    i8: u8;
876
    i16: u16;
877
    i32: u32;
878
    i64: u64;
879
    i128: u128;
880
    isize: usize;
881
}
882
883
#[cfg(test)]
884
mod tests {
885
    use super::*;
886
887
    #[test]
888
    fn test_byte_size() {
889
        let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
890
        // Should take one byte off the end
891
        assert_eq!(u.arbitrary_byte_size().unwrap(), 6);
892
        assert_eq!(u.len(), 9);
893
        let mut v = vec![];
894
        v.resize(260, 0);
895
        v.push(1);
896
        v.push(4);
897
        let mut u = Unstructured::new(&v);
898
        // Should read two bytes off the end
899
        assert_eq!(u.arbitrary_byte_size().unwrap(), 0x104);
900
        assert_eq!(u.len(), 260);
901
    }
902
903
    #[test]
904
    fn int_in_range_of_one() {
905
        let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
906
        let x = u.int_in_range(0..=0).unwrap();
907
        assert_eq!(x, 0);
908
        let choice = *u.choose(&[42]).unwrap();
909
        assert_eq!(choice, 42)
910
    }
911
912
    #[test]
913
    fn int_in_range_uses_minimal_amount_of_bytes() {
914
        let mut u = Unstructured::new(&[1, 2]);
915
        assert_eq!(1, u.int_in_range::<u8>(0..=u8::MAX).unwrap());
916
        assert_eq!(u.len(), 1);
917
918
        let mut u = Unstructured::new(&[1, 2]);
919
        assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32).unwrap());
920
        assert_eq!(u.len(), 1);
921
922
        let mut u = Unstructured::new(&[1]);
923
        assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32 + 1).unwrap());
924
        assert!(u.is_empty());
925
    }
926
927
    #[test]
928
    fn int_in_range_in_bounds() {
929
        for input in u8::MIN..=u8::MAX {
930
            let input = [input];
931
932
            let mut u = Unstructured::new(&input);
933
            let x = u.int_in_range(1..=u8::MAX).unwrap();
934
            assert_ne!(x, 0);
935
936
            let mut u = Unstructured::new(&input);
937
            let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
938
            assert_ne!(x, u8::MAX);
939
        }
940
    }
941
942
    #[test]
943
    fn int_in_range_covers_unsigned_range() {
944
        // Test that we generate all values within the range given to
945
        // `int_in_range`.
946
947
        let mut full = [false; u8::MAX as usize + 1];
948
        let mut no_zero = [false; u8::MAX as usize];
949
        let mut no_max = [false; u8::MAX as usize];
950
        let mut narrow = [false; 10];
951
952
        for input in u8::MIN..=u8::MAX {
953
            let input = [input];
954
955
            let mut u = Unstructured::new(&input);
956
            let x = u.int_in_range(0..=u8::MAX).unwrap();
957
            full[x as usize] = true;
958
959
            let mut u = Unstructured::new(&input);
960
            let x = u.int_in_range(1..=u8::MAX).unwrap();
961
            no_zero[x as usize - 1] = true;
962
963
            let mut u = Unstructured::new(&input);
964
            let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
965
            no_max[x as usize] = true;
966
967
            let mut u = Unstructured::new(&input);
968
            let x = u.int_in_range(100..=109).unwrap();
969
            narrow[x as usize - 100] = true;
970
        }
971
972
        for (i, covered) in full.iter().enumerate() {
973
            assert!(covered, "full[{}] should have been generated", i);
974
        }
975
        for (i, covered) in no_zero.iter().enumerate() {
976
            assert!(covered, "no_zero[{}] should have been generated", i);
977
        }
978
        for (i, covered) in no_max.iter().enumerate() {
979
            assert!(covered, "no_max[{}] should have been generated", i);
980
        }
981
        for (i, covered) in narrow.iter().enumerate() {
982
            assert!(covered, "narrow[{}] should have been generated", i);
983
        }
984
    }
985
986
    #[test]
987
    fn int_in_range_covers_signed_range() {
988
        // Test that we generate all values within the range given to
989
        // `int_in_range`.
990
991
        let mut full = [false; u8::MAX as usize + 1];
992
        let mut no_min = [false; u8::MAX as usize];
993
        let mut no_max = [false; u8::MAX as usize];
994
        let mut narrow = [false; 21];
995
996
        let abs_i8_min: isize = 128;
997
998
        for input in 0..=u8::MAX {
999
            let input = [input];
1000
1001
            let mut u = Unstructured::new(&input);
1002
            let x = u.int_in_range(i8::MIN..=i8::MAX).unwrap();
1003
            full[(x as isize + abs_i8_min) as usize] = true;
1004
1005
            let mut u = Unstructured::new(&input);
1006
            let x = u.int_in_range(i8::MIN + 1..=i8::MAX).unwrap();
1007
            no_min[(x as isize + abs_i8_min - 1) as usize] = true;
1008
1009
            let mut u = Unstructured::new(&input);
1010
            let x = u.int_in_range(i8::MIN..=i8::MAX - 1).unwrap();
1011
            no_max[(x as isize + abs_i8_min) as usize] = true;
1012
1013
            let mut u = Unstructured::new(&input);
1014
            let x = u.int_in_range(-10..=10).unwrap();
1015
            narrow[(x as isize + 10) as usize] = true;
1016
        }
1017
1018
        for (i, covered) in full.iter().enumerate() {
1019
            assert!(covered, "full[{}] should have been generated", i);
1020
        }
1021
        for (i, covered) in no_min.iter().enumerate() {
1022
            assert!(covered, "no_min[{}] should have been generated", i);
1023
        }
1024
        for (i, covered) in no_max.iter().enumerate() {
1025
            assert!(covered, "no_max[{}] should have been generated", i);
1026
        }
1027
        for (i, covered) in narrow.iter().enumerate() {
1028
            assert!(covered, "narrow[{}] should have been generated", i);
1029
        }
1030
    }
1031
}