/src/wasmtime/cranelift/fuzzgen/src/lib.rs

Source
use crate::config::Config;
use crate::function_generator::FunctionGenerator;
use crate::settings::{Flags, OptLevel};
use anyhow::Result;
use arbitrary::{Arbitrary, Unstructured};
use cranelift::codegen::Context;
use cranelift::codegen::data_value::DataValue;
use cranelift::codegen::ir::{Function, LibCall};
use cranelift::codegen::ir::{UserExternalName, UserFuncName};
use cranelift::codegen::isa::Builder;
use cranelift::prelude::isa::{OwnedTargetIsa, TargetIsa};
use cranelift::prelude::settings::SettingKind;
use cranelift::prelude::*;
use cranelift_arbitrary::CraneliftArbitrary;
use cranelift_native::builder_with_options;
use rand::{Rng, SeedableRng, rngs::SmallRng};
use target_isa_extras::TargetIsaExtras;
use target_lexicon::Architecture;

mod config;
mod cranelift_arbitrary;
mod function_generator;
mod passes;
mod print;
mod target_isa_extras;

pub use print::PrintableTestCase;

pub type TestCaseInput = Vec<DataValue>;

pub enum IsaFlagGen {
    /// When generating ISA flags, ensure that they are all supported by
    /// the current host.
    Host,
    /// All flags available in cranelift are allowed to be generated.
    /// We also allow generating all possible values for each enum flag.
    All,
}

pub struct FuzzGen<'r, 'data>
where
    'data: 'r,
{
    pub u: &'r mut Unstructured<'data>,
    pub config: Config,
}

impl<'r, 'data> FuzzGen<'r, 'data>
where
    'data: 'r,
{
    pub fn new(u: &'r mut Unstructured<'data>) -> Self {
        Self {
            u,
            config: Config::default(),
        }
    }

    pub fn generate_signature(&mut self, isa: &dyn TargetIsa) -> Result<Signature> {
        let max_params = self.u.int_in_range(self.config.signature_params.clone())?;
        let max_rets = self.u.int_in_range(self.config.signature_rets.clone())?;
        Ok(self.u.signature(
            isa.supports_simd(),
            isa.triple().architecture,
            max_params,
            max_rets,
        )?)
    }

    pub fn generate_test_inputs(mut self, signature: &Signature) -> Result<Vec<TestCaseInput>> {
        let mut inputs = Vec::new();

        // Generate up to "max_test_case_inputs" inputs, we need an upper bound here since
        // the fuzzer at some point starts trying to feed us way too many inputs. (I found one
        // test case with 130k inputs!)
        for _ in 0..self.config.max_test_case_inputs {
            let last_len = self.u.len();

            let test_args = signature
                .params
                .iter()
                .map(|p| self.u.datavalue(p.value_type))
                .collect::<Result<TestCaseInput>>()?;

            inputs.push(test_args);

            // Continue generating input as long as we just consumed some of self.u. Otherwise
            // we'll generate the same test input again and again, forever. Note that once self.u
            // becomes empty we obviously can't consume any more of it, so this check is more
            // general. Also note that we need to generate at least one input or the fuzz target
            // won't actually test anything, so checking at the end of the loop is good, even if
            // self.u is empty from the start and we end up with all zeros in test_args.
            assert!(self.u.len() <= last_len);
            if self.u.len() == last_len {
                break;
            }
        }

        Ok(inputs)
    }

    fn run_func_passes(&mut self, func: Function, isa: &dyn TargetIsa) -> Result<Function> {
        // Do a NaN Canonicalization pass on the generated function.
        //
        // Both IEEE754 and the Wasm spec are somewhat loose about what is allowed
        // to be returned from NaN producing operations. And in practice this changes
        // from X86 to Aarch64 and others. Even in the same host machine, the
        // interpreter may produce a code sequence different from cranelift that
        // generates different NaN's but produces legal results according to the spec.
        //
        // These differences cause spurious failures in the fuzzer. To fix this
        // we enable the NaN Canonicalization pass that replaces any NaN's produced
        // with a single fixed canonical NaN value.
        //
        // This is something that we can enable via flags for the compiled version, however
        // the interpreter won't get that version, so call that pass manually here.

        let mut ctx = Context::for_function(func);

        // We disable the verifier here, since if it fails it prevents a test case from
        // being generated and formatted by `cargo fuzz fmt`.
        // We run the verifier before compiling the code, so it always gets verified.
        let flags = settings::Flags::new({
            let mut builder = settings::builder();
            builder.set("enable_verifier", "false").unwrap();
            builder
        });

        // Create a new TargetISA from the given ISA, this ensures that we copy all ISA
        // flags, which may have an effect on the code generated by the passes below.
        let isa = Builder::from_target_isa(isa)
            .finish(flags)
            .expect("Failed to build TargetISA");

        // Finally run the NaN canonicalization pass
        ctx.canonicalize_nans(isa.as_ref())
            .expect("Failed NaN canonicalization pass");

        // Run the int_divz pass
        //
        // This pass replaces divs and rems with sequences that do not trap
        passes::do_int_divz_pass(self, &mut ctx.func)?;

        // This pass replaces fcvt* instructions with sequences that do not trap
        passes::do_fcvt_trap_pass(self, &mut ctx.func)?;

        Ok(ctx.func)
    }

    pub fn generate_func(
        &mut self,
        name: UserFuncName,
        isa: OwnedTargetIsa,
        usercalls: Vec<(UserExternalName, Signature)>,
        libcalls: Vec<LibCall>,
    ) -> Result<Function> {
        let sig = self.generate_signature(&*isa)?;

        let func = FunctionGenerator::new(
            &mut self.u,
            &self.config,
            isa.clone(),
            name,
            sig,
            usercalls,
            libcalls,
        )
        .generate()?;

        self.run_func_passes(func, &*isa)
    }

    /// Generate a random set of cranelift flags.
    /// Only semantics preserving flags are considered
    pub fn generate_flags(&mut self, target_arch: Architecture) -> arbitrary::Result<Flags> {
        let mut builder = settings::builder();

        let opt = self.u.choose(OptLevel::all())?;
        builder.set("opt_level", &format!("{opt}")[..]).unwrap();

        // Boolean flags
        // TODO: enable_pinned_reg does not work with our current trampolines. See: #4376
        // TODO: is_pic has issues:
        //   x86: https://github.com/bytecodealliance/wasmtime/issues/5005
        //   aarch64: https://github.com/bytecodealliance/wasmtime/issues/2735
        let bool_settings = [
            "enable_alias_analysis",
            "unwind_info",
            "preserve_frame_pointers",
            "enable_heap_access_spectre_mitigation",
            "enable_table_access_spectre_mitigation",
            "enable_incremental_compilation_cache_checks",
            "regalloc_checker",
            "enable_llvm_abi_extensions",
        ];
        for flag_name in bool_settings {
            let enabled = self
                .config
                .compile_flag_ratio
                .get(&flag_name)
                .map(|&(num, denum)| self.u.ratio(num, denum))
                .unwrap_or_else(|| bool::arbitrary(self.u))?;

            let value = format!("{enabled}");
            builder.set(flag_name, value.as_str()).unwrap();
        }

        let supports_inline_probestack = match target_arch {
            Architecture::X86_64 => true,
            Architecture::Aarch64(_) => true,
            Architecture::Riscv64(_) => true,
            _ => false,
        };

        // Optionally test inline stackprobes on supported platforms
        // TODO: Test outlined stack probes.
        if supports_inline_probestack && bool::arbitrary(self.u)? {
            builder.enable("enable_probestack").unwrap();
            builder.set("probestack_strategy", "inline").unwrap();

            let size = self
                .u
                .int_in_range(self.config.stack_probe_size_log2.clone())?;
            builder
                .set("probestack_size_log2", &format!("{size}"))
                .unwrap();
        }

        // Generate random basic block padding
        let bb_padding = self
            .u
            .int_in_range(self.config.bb_padding_log2_size.clone())
            .unwrap();
        builder
            .set("bb_padding_log2_minus_one", &format!("{bb_padding}"))
            .unwrap();

        // Fixed settings

        // We need llvm ABI extensions for i128 values on x86, so enable it regardless of
        // what we picked above.
        if target_arch == Architecture::X86_64 {
            builder.enable("enable_llvm_abi_extensions").unwrap();
        }

        // FIXME(#9510) remove once this option is permanently disabled
        builder.enable("enable_multi_ret_implicit_sret").unwrap();

        // This is the default, but we should ensure that it wasn't accidentally turned off anywhere.
        builder.enable("enable_verifier").unwrap();

        // `machine_code_cfg_info` generates additional metadata for the embedder but this doesn't feed back
        // into compilation anywhere, we leave it on unconditionally to make sure the generation doesn't panic.
        builder.enable("machine_code_cfg_info").unwrap();

        // Differential fuzzing between the interpreter and the host will only
        // really work if NaN payloads are canonicalized, so enable this.
        builder.enable("enable_nan_canonicalization").unwrap();

        Ok(Flags::new(builder))
    }

    /// Generate a random set of ISA flags and apply them to a Builder.
    ///
    /// Based on `mode` we can either allow all flags, or just the subset that is
    /// supported by the current host.
    ///
    /// In all cases only a subset of the allowed flags is applied to the builder.
    pub fn set_isa_flags(&mut self, builder: &mut Builder, mode: IsaFlagGen) -> Result<()> {
        // `max_isa` is the maximal set of flags that we can use.
        let max_builder = match mode {
            IsaFlagGen::All => {
                let mut max_builder = isa::lookup(builder.triple().clone())?;

                for flag in max_builder.iter() {
                    match flag.kind {
                        SettingKind::Bool => {
                            max_builder.enable(flag.name)?;
                        }
                        SettingKind::Enum => {
                            // Since these are enums there isn't a "max" value per se, pick one at random.
                            let value = self.u.choose(flag.values.unwrap())?;
                            max_builder.set(flag.name, value)?;
                        }
                        SettingKind::Preset => {
                            // Presets are just special flags that combine other flags, we don't
                            // want to enable them directly, just the underlying flags.
                        }
                        _ => todo!(),
                    };
                }
                max_builder
            }
            // Use `cranelift-native` to do feature detection for us.
            IsaFlagGen::Host => builder_with_options(true)
                .expect("Unable to build a TargetIsa for the current host"),
        };
        // Cranelift has a somewhat weird API for this, but we need to build the final `TargetIsa` to be able
        // to extract the values for the ISA flags. We need that to use the `string_value()` that formats
        // the values so that we can pass it into the builder again.
        let max_isa = max_builder.finish(Flags::new(settings::builder()))?;

        // We give each of the flags a chance of being copied over. Otherwise we
        // keep the default. Note that a constant amount of data is taken from
        // `self.u` as a seed for a `SmallRng` which is then transitively used
        // to make decisions about what flags to include. This is done to ensure
        // that the same test case generates similarly across different machines
        // with different CPUs when `Host` is used above.
        let mut rng = SmallRng::from_seed(self.u.arbitrary()?);
        for value in max_isa.isa_flags().iter() {
            if rng.random() {
                continue;
            }
            builder.set(value.name, &value.value_string())?;
        }

        Ok(())
    }
}

Coverage Report

Created: 2025-12-04 07:01

Line	Count	Source
1		use crate::config::Config;
2		use crate::function_generator::FunctionGenerator;
3		use crate::settings::{Flags, OptLevel};
4		use anyhow::Result;
5		use arbitrary::{Arbitrary, Unstructured};
6		use cranelift::codegen::Context;
7		use cranelift::codegen::data_value::DataValue;
8		use cranelift::codegen::ir::{Function, LibCall};
9		use cranelift::codegen::ir::{UserExternalName, UserFuncName};
10		use cranelift::codegen::isa::Builder;
11		use cranelift::prelude::isa::{OwnedTargetIsa, TargetIsa};
12		use cranelift::prelude::settings::SettingKind;
13		use cranelift::prelude::*;
14		use cranelift_arbitrary::CraneliftArbitrary;
15		use cranelift_native::builder_with_options;
16		use rand::{Rng, SeedableRng, rngs::SmallRng};
17		use target_isa_extras::TargetIsaExtras;
18		use target_lexicon::Architecture;
19
20		mod config;
21		mod cranelift_arbitrary;
22		mod function_generator;
23		mod passes;
24		mod print;
25		mod target_isa_extras;
26
27		pub use print::PrintableTestCase;
28
29		pub type TestCaseInput = Vec<DataValue>;
30
31		pub enum IsaFlagGen {
32		/// When generating ISA flags, ensure that they are all supported by
33		/// the current host.
34		Host,
35		/// All flags available in cranelift are allowed to be generated.
36		/// We also allow generating all possible values for each enum flag.
37		All,
38		}
39
40		pub struct FuzzGen<'r, 'data>
41		where
42		'data: 'r,
43		{
44		pub u: &'r mut Unstructured<'data>,
45		pub config: Config,
46		}
47
48		impl<'r, 'data> FuzzGen<'r, 'data>
49		where
50		'data: 'r,
51		{
52	15.0k	pub fn new(u: &'r mut Unstructured<'data>) -> Self {
53	15.0k	Self {
54	15.0k	u,
55	15.0k	config: Config::default(),
56	15.0k	}
57	15.0k	}
58
59	40.4k	pub fn generate_signature(&mut self, isa: &dyn TargetIsa) -> Result<Signature> {
60	40.4k	let max_params = self.u.int_in_range(self.config.signature_params.clone())?;
61	40.4k	let max_rets = self.u.int_in_range(self.config.signature_rets.clone())?;
62	40.4k	Ok(self.u.signature(
63	40.4k	isa.supports_simd(),
64	40.4k	isa.triple().architecture,
65	40.4k	max_params,
66	40.4k	max_rets,
67	0	)?)
68	40.4k	}
69
70	7.26k	pub fn generate_test_inputs(mut self, signature: &Signature) -> Result<Vec<TestCaseInput>> {
71	7.26k	let mut inputs = Vec::new();
72
73		// Generate up to "max_test_case_inputs" inputs, we need an upper bound here since
74		// the fuzzer at some point starts trying to feed us way too many inputs. (I found one
75		// test case with 130k inputs!)
76	7.26k	for _ in 0..self.config.max_test_case_inputs {
77	16.1k	let last_len = self.u.len();
78
79	16.1k	let test_args = signature
80	16.1k	.params
81	16.1k	.iter()
82	157k	.map(\|p\| self.u.datavalue(p.value_type))
83	16.1k	.collect::<Result<TestCaseInput>>()?;
84
85	16.1k	inputs.push(test_args);
86
87		// Continue generating input as long as we just consumed some of self.u. Otherwise
88		// we'll generate the same test input again and again, forever. Note that once self.u
89		// becomes empty we obviously can't consume any more of it, so this check is more
90		// general. Also note that we need to generate at least one input or the fuzz target
91		// won't actually test anything, so checking at the end of the loop is good, even if
92		// self.u is empty from the start and we end up with all zeros in test_args.
93	16.1k	assert!(self.u.len() <= last_len);
94	16.1k	if self.u.len() == last_len {
95	7.24k	break;
96	8.85k	}
97		}
98
99	7.26k	Ok(inputs)
100	7.26k	}
101
102	24.0k	fn run_func_passes(&mut self, func: Function, isa: &dyn TargetIsa) -> Result<Function> {
103		// Do a NaN Canonicalization pass on the generated function.
104		//
105		// Both IEEE754 and the Wasm spec are somewhat loose about what is allowed
106		// to be returned from NaN producing operations. And in practice this changes
107		// from X86 to Aarch64 and others. Even in the same host machine, the
108		// interpreter may produce a code sequence different from cranelift that
109		// generates different NaN's but produces legal results according to the spec.
110		//
111		// These differences cause spurious failures in the fuzzer. To fix this
112		// we enable the NaN Canonicalization pass that replaces any NaN's produced
113		// with a single fixed canonical NaN value.
114		//
115		// This is something that we can enable via flags for the compiled version, however
116		// the interpreter won't get that version, so call that pass manually here.
117
118	24.0k	let mut ctx = Context::for_function(func);
119
120		// We disable the verifier here, since if it fails it prevents a test case from
121		// being generated and formatted by `cargo fuzz fmt`.
122		// We run the verifier before compiling the code, so it always gets verified.
123	24.0k	let flags = settings::Flags::new({
124	24.0k	let mut builder = settings::builder();
125	24.0k	builder.set("enable_verifier", "false").unwrap();
126	24.0k	builder
127		});
128
129		// Create a new TargetISA from the given ISA, this ensures that we copy all ISA
130		// flags, which may have an effect on the code generated by the passes below.
131	24.0k	let isa = Builder::from_target_isa(isa)
132	24.0k	.finish(flags)
133	24.0k	.expect("Failed to build TargetISA");
134
135		// Finally run the NaN canonicalization pass
136	24.0k	ctx.canonicalize_nans(isa.as_ref())
137	24.0k	.expect("Failed NaN canonicalization pass");
138
139		// Run the int_divz pass
140		//
141		// This pass replaces divs and rems with sequences that do not trap
142	24.0k	passes::do_int_divz_pass(self, &mut ctx.func)?;
143
144		// This pass replaces fcvt* instructions with sequences that do not trap
145	24.0k	passes::do_fcvt_trap_pass(self, &mut ctx.func)?;
146
147	24.0k	Ok(ctx.func)
148	24.0k	}
149
150	25.6k	pub fn generate_func(
151	25.6k	&mut self,
152	25.6k	name: UserFuncName,
153	25.6k	isa: OwnedTargetIsa,
154	25.6k	usercalls: Vec<(UserExternalName, Signature)>,
155	25.6k	libcalls: Vec<LibCall>,
156	25.6k	) -> Result<Function> {
157	25.6k	let sig = self.generate_signature(&*isa)?;
158
159	25.6k	let func = FunctionGenerator::new(
160	25.6k	&mut self.u,
161	25.6k	&self.config,
162	25.6k	isa.clone(),
163	25.6k	name,
164	25.6k	sig,
165	25.6k	usercalls,
166	25.6k	libcalls,
167		)
168	25.6k	.generate()?;
169
170	24.0k	self.run_func_passes(func, &*isa)
171	25.6k	}
172
173		/// Generate a random set of cranelift flags.
174		/// Only semantics preserving flags are considered
175	15.0k	pub fn generate_flags(&mut self, target_arch: Architecture) -> arbitrary::Result<Flags> {
176	15.0k	let mut builder = settings::builder();
177
178	15.0k	let opt = self.u.choose(OptLevel::all())?;
179	15.0k	builder.set("opt_level", &format!("{opt}")[..]).unwrap();
180
181		// Boolean flags
182		// TODO: enable_pinned_reg does not work with our current trampolines. See: #4376
183		// TODO: is_pic has issues:
184		// x86: https://github.com/bytecodealliance/wasmtime/issues/5005
185		// aarch64: https://github.com/bytecodealliance/wasmtime/issues/2735
186	15.0k	let bool_settings = [
187	15.0k	"enable_alias_analysis",
188	15.0k	"unwind_info",
189	15.0k	"preserve_frame_pointers",
190	15.0k	"enable_heap_access_spectre_mitigation",
191	15.0k	"enable_table_access_spectre_mitigation",
192	15.0k	"enable_incremental_compilation_cache_checks",
193	15.0k	"regalloc_checker",
194	15.0k	"enable_llvm_abi_extensions",
195	15.0k	];
196	135k	for flag_name in bool_settings {
197	120k	let enabled = self
198	120k	.config
199	120k	.compile_flag_ratio
200	120k	.get(&flag_name)
201	120k	.map(\|&(num, denum)\| self.u.ratio(num, denum))
202	120k	.unwrap_or_else(\|\| bool::arbitrary(self.u))?;
203
204	120k	let value = format!("{enabled}");
205	120k	builder.set(flag_name, value.as_str()).unwrap();
206		}
207
208	15.0k	let supports_inline_probestack = match target_arch {
209	10.0k	Architecture::X86_64 => true,
210	1.38k	Architecture::Aarch64(_) => true,
211	1.87k	Architecture::Riscv64(_) => true,
212	1.77k	_ => false,
213		};
214
215		// Optionally test inline stackprobes on supported platforms
216		// TODO: Test outlined stack probes.
217	15.0k	if supports_inline_probestack && bool::arbitrary(self.u)? {
218	4.04k	builder.enable("enable_probestack").unwrap();
219	4.04k	builder.set("probestack_strategy", "inline").unwrap();
220
221	4.04k	let size = self
222	4.04k	.u
223	4.04k	.int_in_range(self.config.stack_probe_size_log2.clone())?;
224	4.04k	builder
225	4.04k	.set("probestack_size_log2", &format!("{size}"))
226	4.04k	.unwrap();
227	11.0k	}
228
229		// Generate random basic block padding
230	15.0k	let bb_padding = self
231	15.0k	.u
232	15.0k	.int_in_range(self.config.bb_padding_log2_size.clone())
233	15.0k	.unwrap();
234	15.0k	builder
235	15.0k	.set("bb_padding_log2_minus_one", &format!("{bb_padding}"))
236	15.0k	.unwrap();
237
238		// Fixed settings
239
240		// We need llvm ABI extensions for i128 values on x86, so enable it regardless of
241		// what we picked above.
242	15.0k	if target_arch == Architecture::X86_64 {
243	10.0k	builder.enable("enable_llvm_abi_extensions").unwrap();
244	10.0k	}
245
246		// FIXME(#9510) remove once this option is permanently disabled
247	15.0k	builder.enable("enable_multi_ret_implicit_sret").unwrap();
248
249		// This is the default, but we should ensure that it wasn't accidentally turned off anywhere.
250	15.0k	builder.enable("enable_verifier").unwrap();
251
252		// `machine_code_cfg_info` generates additional metadata for the embedder but this doesn't feed back
253		// into compilation anywhere, we leave it on unconditionally to make sure the generation doesn't panic.
254	15.0k	builder.enable("machine_code_cfg_info").unwrap();
255
256		// Differential fuzzing between the interpreter and the host will only
257		// really work if NaN payloads are canonicalized, so enable this.
258	15.0k	builder.enable("enable_nan_canonicalization").unwrap();
259
260	15.0k	Ok(Flags::new(builder))
261	15.0k	}
262
263		/// Generate a random set of ISA flags and apply them to a Builder.
264		///
265		/// Based on `mode` we can either allow all flags, or just the subset that is
266		/// supported by the current host.
267		///
268		/// In all cases only a subset of the allowed flags is applied to the builder.
269	15.0k	pub fn set_isa_flags(&mut self, builder: &mut Builder, mode: IsaFlagGen) -> Result<()> {
270		// `max_isa` is the maximal set of flags that we can use.
271	15.0k	let max_builder = match mode {
272		IsaFlagGen::All => {
273	7.04k	let mut max_builder = isa::lookup(builder.triple().clone())?;
274
275	280k	for flag in max_builder.iter() {
276	280k	match flag.kind {
277		SettingKind::Bool => {
278	110k	max_builder.enable(flag.name)?;
279		}
280		SettingKind::Enum => {
281		// Since these are enums there isn't a "max" value per se, pick one at random.
282	0	let value = self.u.choose(flag.values.unwrap())?;
283	0	max_builder.set(flag.name, value)?;
284		}
285	169k	SettingKind::Preset => {
286	169k	// Presets are just special flags that combine other flags, we don't
287	169k	// want to enable them directly, just the underlying flags.
288	169k	}
289	0	_ => todo!(),
290		};
291		}
292	7.04k	max_builder
293		}
294		// Use `cranelift-native` to do feature detection for us.
295	8.02k	IsaFlagGen::Host => builder_with_options(true)
296	8.02k	.expect("Unable to build a TargetIsa for the current host"),
297		};
298		// Cranelift has a somewhat weird API for this, but we need to build the final `TargetIsa` to be able
299		// to extract the values for the ISA flags. We need that to use the `string_value()` that formats
300		// the values so that we can pass it into the builder again.
301	15.0k	let max_isa = max_builder.finish(Flags::new(settings::builder()))?;
302
303		// We give each of the flags a chance of being copied over. Otherwise we
304		// keep the default. Note that a constant amount of data is taken from
305		// `self.u` as a seed for a `SmallRng` which is then transitively used
306		// to make decisions about what flags to include. This is done to ensure
307		// that the same test case generates similarly across different machines
308		// with different CPUs when `Host` is used above.
309	15.0k	let mut rng = SmallRng::from_seed(self.u.arbitrary()?);
310	247k	for value in max_isa.isa_flags().iter() {
311	247k	if rng.random() {
312	114k	continue;
313	132k	}
314	132k	builder.set(value.name, &value.value_string())?;
315		}
316
317	15.0k	Ok(())
318	15.0k	}
319		}