/src/wasm-tools/fuzz/src/incremental_parse.rs

Source
use Payload::*;
use arbitrary::{Result, Unstructured};
use wasmparser::*;

// Simulate receiving chunks of data by fuzzing over a `Vec<Vec<u8>>` where each
// element of the outer vec is a chunk of data we received.
//
// The assertion here is that parsing everything in one go should always produce
// the exact same results as an incremental parse.
pub fn run(u: &mut Unstructured<'_>) -> Result<()> {
    let data: Vec<Vec<u8>> = u.arbitrary()?;

    // Concatenate everything together, create our expected iterator of
    // payloads, and then write out `input.wasm` if debugging is enabled.
    let everything = data.iter().flat_map(|a| a).copied().collect::<Vec<_>>();
    let mut expected = Parser::new(0).parse_all(&everything);
    if log::log_enabled!(log::Level::Debug) {
        std::fs::write("input.wasm", &everything).unwrap();
    }

    // Create our parser as well as a stack of nested parsers for parsing nested
    // modules.
    let mut stack = Vec::new();
    let mut parser = Parser::new(0);

    // We'll be parsing data from `buf` starting at `pos`, and we translate
    // `data` into an iterator of chunks so when requested we'll take another
    // chunk of data and feed it in.
    let mut pos = 0;
    let mut buf = Vec::new();
    let mut data = data.into_iter().peekable();
    loop {
        log::debug!("parsing {}..{}", pos, buf.len());
        let payload = match parser.parse(&buf[pos..], data.peek().is_none()) {
            // If more data is requested then we're guaranteed that `data`
            // should have another element in its iterato, so pull that off and
            // add it to the end of the `buf`.
            Ok(Chunk::NeedMoreData(_n)) => {
                buf.extend(data.next().unwrap());
                continue;
            }
            Ok(Chunk::Parsed { consumed, payload }) => {
                log::debug!("parsed {consumed} bytes");
                pos += consumed;
                payload
            }

            // On failure we should receive the same failure as if we did a full
            // parse.
            Err(actual) => {
                let expected = expected
                    .next()
                    .expect("full parse stopped early")
                    .err()
                    .expect("full parse didn't return an error");
                assert_eq!(expected.offset(), actual.offset());
                assert_eq!(expected.message(), actual.message());
                break;
            }
        };
        log::debug!("parsed payload {payload:?}");
        let expected_payload = expected
            .next()
            .expect("full parse stopped early")
            .expect("full parse failed but incremental succeeded");
        match (payload, expected_payload) {
            (End(_), End(_)) => match stack.pop() {
                Some(p) => parser = p,
                None => {
                    log::debug!("no more parsers");
                    assert!(expected.next().is_none());
                    break;
                }
            },
            (
                Version {
                    num: a,
                    encoding: ae,
                    range: ar,
                },
                Version {
                    num: b,
                    encoding: be,
                    range: br,
                },
            ) => {
                assert_eq!(a, b);
                assert_eq!(ae, be);
                assert_eq!(ar, br);
            }

            (TypeSection(a), TypeSection(b)) => assert_eq!(a.range(), b.range()),
            (ImportSection(a), ImportSection(b)) => assert_eq!(a.range(), b.range()),
            (FunctionSection(a), FunctionSection(b)) => assert_eq!(a.range(), b.range()),
            (TableSection(a), TableSection(b)) => assert_eq!(a.range(), b.range()),
            (MemorySection(a), MemorySection(b)) => assert_eq!(a.range(), b.range()),
            (GlobalSection(a), GlobalSection(b)) => assert_eq!(a.range(), b.range()),
            (ExportSection(a), ExportSection(b)) => assert_eq!(a.range(), b.range()),
            (TagSection(a), TagSection(b)) => assert_eq!(a.range(), b.range()),
            (StartSection { func: a, range: ar }, StartSection { func: b, range: br }) => {
                assert_eq!(a, b);
                assert_eq!(ar, br);
            }
            (ElementSection(a), ElementSection(b)) => assert_eq!(a.range(), b.range()),
            (
                DataCountSection {
                    count: a,
                    range: ar,
                },
                DataCountSection {
                    count: b,
                    range: br,
                },
            ) => {
                assert_eq!(a, b);
                assert_eq!(ar, br);
            }
            (DataSection(a), DataSection(b)) => assert_eq!(a.range(), b.range()),
            (CustomSection(ca), CustomSection(cb)) => {
                assert_eq!(ca.name(), cb.name());
                assert_eq!(ca.data_offset(), cb.data_offset());
                assert_eq!(ca.data(), cb.data());
                assert_eq!(ca.range(), cb.range());
            }
            (
                CodeSectionStart {
                    count: a,
                    range: ar,
                    size: asz,
                },
                CodeSectionStart {
                    count: b,
                    range: br,
                    size: bsz,
                },
            ) => {
                assert_eq!(a, b);
                assert_eq!(ar, br);
                assert_eq!(asz, bsz);
            }

            (CodeSectionEntry(a), CodeSectionEntry(b)) => {
                assert_eq!(a.get_binary_reader().range(), b.get_binary_reader().range());
            }

            (
                ModuleSection {
                    parser: p,
                    unchecked_range: a,
                },
                ModuleSection {
                    unchecked_range: b, ..
                },
            ) => {
                assert_eq!(a, b);
                stack.push(parser);
                parser = p;
            }
            (InstanceSection(a), InstanceSection(b)) => assert_eq!(a.range(), b.range()),
            (
                ComponentSection {
                    parser: p,
                    unchecked_range: a,
                },
                ComponentSection {
                    unchecked_range: b, ..
                },
            ) => {
                assert_eq!(a, b);
                stack.push(parser);
                parser = p;
            }
            (ComponentInstanceSection(a), ComponentInstanceSection(b)) => {
                assert_eq!(a.range(), b.range())
            }
            (ComponentAliasSection(a), ComponentAliasSection(b)) => {
                assert_eq!(a.range(), b.range())
            }
            (ComponentTypeSection(a), ComponentTypeSection(b)) => assert_eq!(a.range(), b.range()),
            (ComponentCanonicalSection(a), ComponentCanonicalSection(b)) => {
                assert_eq!(a.range(), b.range())
            }
            (ComponentStartSection { range: a, .. }, ComponentStartSection { range: b, .. }) => {
                assert_eq!(a, b)
            }
            (ComponentImportSection(a), ComponentImportSection(b)) => {
                assert_eq!(a.range(), b.range())
            }
            (ComponentExportSection(a), ComponentExportSection(b)) => {
                assert_eq!(a.range(), b.range())
            }
            (CoreTypeSection(a), CoreTypeSection(b)) => {
                assert_eq!(a.range(), b.range())
            }

            (
                UnknownSection {
                    id: a,
                    contents: ac,
                    range: ar,
                },
                UnknownSection {
                    id: b,
                    contents: bc,
                    range: br,
                },
            ) => {
                assert_eq!(a, b);
                assert_eq!(ar, br);
                assert_eq!(ac, bc);
            }

            (a, b) => {
                panic!("expected {b:?}\ngot {a:?}");
            }
        }
    }
    Ok(())
}

#[test]
fn smoke() {
    super::test::test_n_times(100, run);
}

Coverage Report

Created: 2025-12-04 07:01

Line	Count	Source
1		use Payload::*;
2		use arbitrary::{Result, Unstructured};
3		use wasmparser::*;
4
5		// Simulate receiving chunks of data by fuzzing over a `Vec<Vec<u8>>` where each
6		// element of the outer vec is a chunk of data we received.
7		//
8		// The assertion here is that parsing everything in one go should always produce
9		// the exact same results as an incremental parse.
10	73	pub fn run(u: &mut Unstructured<'_>) -> Result<()> {
11	73	let data: Vec<Vec<u8>> = u.arbitrary()?;
12
13		// Concatenate everything together, create our expected iterator of
14		// payloads, and then write out `input.wasm` if debugging is enabled.
15	73	let everything = data.iter().flat_map(\|a\| a).copied().collect::<Vec<_>>();
16	73	let mut expected = Parser::new(0).parse_all(&everything);
17	73	if log::log_enabled!(log::Level::Debug) {
18	0	std::fs::write("input.wasm", &everything).unwrap();
19	73	}
20
21		// Create our parser as well as a stack of nested parsers for parsing nested
22		// modules.
23	73	let mut stack = Vec::new();
24	73	let mut parser = Parser::new(0);
25
26		// We'll be parsing data from `buf` starting at `pos`, and we translate
27		// `data` into an iterator of chunks so when requested we'll take another
28		// chunk of data and feed it in.
29	73	let mut pos = 0;
30	73	let mut buf = Vec::new();
31	73	let mut data = data.into_iter().peekable();
32		loop {
33	487	log::debug!("parsing {}..{}", pos, buf.len());
34	487	let payload = match parser.parse(&buf[pos..], data.peek().is_none()) {
35		// If more data is requested then we're guaranteed that `data`
36		// should have another element in its iterato, so pull that off and
37		// add it to the end of the `buf`.
38	414	Ok(Chunk::NeedMoreData(_n)) => {
39	414	buf.extend(data.next().unwrap());
40	414	continue;
41		}
42	0	Ok(Chunk::Parsed { consumed, payload }) => {
43	0	log::debug!("parsed {consumed} bytes");
44	0	pos += consumed;
45	0	payload
46		}
47
48		// On failure we should receive the same failure as if we did a full
49		// parse.
50	73	Err(actual) => {
51	73	let expected = expected
52	73	.next()
53	73	.expect("full parse stopped early")
54	73	.err()
55	73	.expect("full parse didn't return an error");
56	73	assert_eq!(expected.offset(), actual.offset());
57	73	assert_eq!(expected.message(), actual.message());
58	73	break;
59		}
60		};
61	0	log::debug!("parsed payload {payload:?}");
62	0	let expected_payload = expected
63	0	.next()
64	0	.expect("full parse stopped early")
65	0	.expect("full parse failed but incremental succeeded");
66	0	match (payload, expected_payload) {
67	0	(End(_), End(_)) => match stack.pop() {
68	0	Some(p) => parser = p,
69		None => {
70	0	log::debug!("no more parsers");
71	0	assert!(expected.next().is_none());
72	0	break;
73		}
74		},
75		(
76		Version {
77	0	num: a,
78	0	encoding: ae,
79	0	range: ar,
80		},
81		Version {
82	0	num: b,
83	0	encoding: be,
84	0	range: br,
85		},
86		) => {
87	0	assert_eq!(a, b);
88	0	assert_eq!(ae, be);
89	0	assert_eq!(ar, br);
90		}
91
92	0	(TypeSection(a), TypeSection(b)) => assert_eq!(a.range(), b.range()),
93	0	(ImportSection(a), ImportSection(b)) => assert_eq!(a.range(), b.range()),
94	0	(FunctionSection(a), FunctionSection(b)) => assert_eq!(a.range(), b.range()),
95	0	(TableSection(a), TableSection(b)) => assert_eq!(a.range(), b.range()),
96	0	(MemorySection(a), MemorySection(b)) => assert_eq!(a.range(), b.range()),
97	0	(GlobalSection(a), GlobalSection(b)) => assert_eq!(a.range(), b.range()),
98	0	(ExportSection(a), ExportSection(b)) => assert_eq!(a.range(), b.range()),
99	0	(TagSection(a), TagSection(b)) => assert_eq!(a.range(), b.range()),
100	0	(StartSection { func: a, range: ar }, StartSection { func: b, range: br }) => {
101	0	assert_eq!(a, b);
102	0	assert_eq!(ar, br);
103		}
104	0	(ElementSection(a), ElementSection(b)) => assert_eq!(a.range(), b.range()),
105		(
106		DataCountSection {
107	0	count: a,
108	0	range: ar,
109		},
110		DataCountSection {
111	0	count: b,
112	0	range: br,
113		},
114		) => {
115	0	assert_eq!(a, b);
116	0	assert_eq!(ar, br);
117		}
118	0	(DataSection(a), DataSection(b)) => assert_eq!(a.range(), b.range()),
119	0	(CustomSection(ca), CustomSection(cb)) => {
120	0	assert_eq!(ca.name(), cb.name());
121	0	assert_eq!(ca.data_offset(), cb.data_offset());
122	0	assert_eq!(ca.data(), cb.data());
123	0	assert_eq!(ca.range(), cb.range());
124		}
125		(
126		CodeSectionStart {
127	0	count: a,
128	0	range: ar,
129	0	size: asz,
130		},
131		CodeSectionStart {
132	0	count: b,
133	0	range: br,
134	0	size: bsz,
135		},
136		) => {
137	0	assert_eq!(a, b);
138	0	assert_eq!(ar, br);
139	0	assert_eq!(asz, bsz);
140		}
141
142	0	(CodeSectionEntry(a), CodeSectionEntry(b)) => {
143	0	assert_eq!(a.get_binary_reader().range(), b.get_binary_reader().range());
144		}
145
146		(
147		ModuleSection {
148	0	parser: p,
149	0	unchecked_range: a,
150		},
151		ModuleSection {
152	0	unchecked_range: b, ..
153		},
154		) => {
155	0	assert_eq!(a, b);
156	0	stack.push(parser);
157	0	parser = p;
158		}
159	0	(InstanceSection(a), InstanceSection(b)) => assert_eq!(a.range(), b.range()),
160		(
161		ComponentSection {
162	0	parser: p,
163	0	unchecked_range: a,
164		},
165		ComponentSection {
166	0	unchecked_range: b, ..
167		},
168		) => {
169	0	assert_eq!(a, b);
170	0	stack.push(parser);
171	0	parser = p;
172		}
173	0	(ComponentInstanceSection(a), ComponentInstanceSection(b)) => {
174	0	assert_eq!(a.range(), b.range())
175		}
176	0	(ComponentAliasSection(a), ComponentAliasSection(b)) => {
177	0	assert_eq!(a.range(), b.range())
178		}
179	0	(ComponentTypeSection(a), ComponentTypeSection(b)) => assert_eq!(a.range(), b.range()),
180	0	(ComponentCanonicalSection(a), ComponentCanonicalSection(b)) => {
181	0	assert_eq!(a.range(), b.range())
182		}
183	0	(ComponentStartSection { range: a, .. }, ComponentStartSection { range: b, .. }) => {
184	0	assert_eq!(a, b)
185		}
186	0	(ComponentImportSection(a), ComponentImportSection(b)) => {
187	0	assert_eq!(a.range(), b.range())
188		}
189	0	(ComponentExportSection(a), ComponentExportSection(b)) => {
190	0	assert_eq!(a.range(), b.range())
191		}
192	0	(CoreTypeSection(a), CoreTypeSection(b)) => {
193	0	assert_eq!(a.range(), b.range())
194		}
195
196		(
197		UnknownSection {
198	0	id: a,
199	0	contents: ac,
200	0	range: ar,
201		},
202		UnknownSection {
203	0	id: b,
204	0	contents: bc,
205	0	range: br,
206		},
207		) => {
208	0	assert_eq!(a, b);
209	0	assert_eq!(ar, br);
210	0	assert_eq!(ac, bc);
211		}
212
213	0	(a, b) => {
214	0	panic!("expected {b:?}\ngot {a:?}");
215		}
216		}
217		}
218	73	Ok(())
219	73	}
220
221		#[test]
222		fn smoke() {
223		super::test::test_n_times(100, run);
224		}