/src/wasm-tools/fuzz/src/incremental_parse.rs
Line | Count | Source |
1 | | use Payload::*; |
2 | | use arbitrary::{Result, Unstructured}; |
3 | | use wasmparser::*; |
4 | | |
5 | | // Simulate receiving chunks of data by fuzzing over a `Vec<Vec<u8>>` where each |
6 | | // element of the outer vec is a chunk of data we received. |
7 | | // |
8 | | // The assertion here is that parsing everything in one go should always produce |
9 | | // the exact same results as an incremental parse. |
10 | 73 | pub fn run(u: &mut Unstructured<'_>) -> Result<()> { |
11 | 73 | let data: Vec<Vec<u8>> = u.arbitrary()?; |
12 | | |
13 | | // Concatenate everything together, create our expected iterator of |
14 | | // payloads, and then write out `input.wasm` if debugging is enabled. |
15 | 73 | let everything = data.iter().flat_map(|a| a).copied().collect::<Vec<_>>(); |
16 | 73 | let mut expected = Parser::new(0).parse_all(&everything); |
17 | 73 | if log::log_enabled!(log::Level::Debug) { |
18 | 0 | std::fs::write("input.wasm", &everything).unwrap(); |
19 | 73 | } |
20 | | |
21 | | // Create our parser as well as a stack of nested parsers for parsing nested |
22 | | // modules. |
23 | 73 | let mut stack = Vec::new(); |
24 | 73 | let mut parser = Parser::new(0); |
25 | | |
26 | | // We'll be parsing data from `buf` starting at `pos`, and we translate |
27 | | // `data` into an iterator of chunks so when requested we'll take another |
28 | | // chunk of data and feed it in. |
29 | 73 | let mut pos = 0; |
30 | 73 | let mut buf = Vec::new(); |
31 | 73 | let mut data = data.into_iter().peekable(); |
32 | | loop { |
33 | 487 | log::debug!("parsing {}..{}", pos, buf.len()); |
34 | 487 | let payload = match parser.parse(&buf[pos..], data.peek().is_none()) { |
35 | | // If more data is requested then we're guaranteed that `data` |
36 | | // should have another element in its iterato, so pull that off and |
37 | | // add it to the end of the `buf`. |
38 | 414 | Ok(Chunk::NeedMoreData(_n)) => { |
39 | 414 | buf.extend(data.next().unwrap()); |
40 | 414 | continue; |
41 | | } |
42 | 0 | Ok(Chunk::Parsed { consumed, payload }) => { |
43 | 0 | log::debug!("parsed {consumed} bytes"); |
44 | 0 | pos += consumed; |
45 | 0 | payload |
46 | | } |
47 | | |
48 | | // On failure we should receive the same failure as if we did a full |
49 | | // parse. |
50 | 73 | Err(actual) => { |
51 | 73 | let expected = expected |
52 | 73 | .next() |
53 | 73 | .expect("full parse stopped early") |
54 | 73 | .err() |
55 | 73 | .expect("full parse didn't return an error"); |
56 | 73 | assert_eq!(expected.offset(), actual.offset()); |
57 | 73 | assert_eq!(expected.message(), actual.message()); |
58 | 73 | break; |
59 | | } |
60 | | }; |
61 | 0 | log::debug!("parsed payload {payload:?}"); |
62 | 0 | let expected_payload = expected |
63 | 0 | .next() |
64 | 0 | .expect("full parse stopped early") |
65 | 0 | .expect("full parse failed but incremental succeeded"); |
66 | 0 | match (payload, expected_payload) { |
67 | 0 | (End(_), End(_)) => match stack.pop() { |
68 | 0 | Some(p) => parser = p, |
69 | | None => { |
70 | 0 | log::debug!("no more parsers"); |
71 | 0 | assert!(expected.next().is_none()); |
72 | 0 | break; |
73 | | } |
74 | | }, |
75 | | ( |
76 | | Version { |
77 | 0 | num: a, |
78 | 0 | encoding: ae, |
79 | 0 | range: ar, |
80 | | }, |
81 | | Version { |
82 | 0 | num: b, |
83 | 0 | encoding: be, |
84 | 0 | range: br, |
85 | | }, |
86 | | ) => { |
87 | 0 | assert_eq!(a, b); |
88 | 0 | assert_eq!(ae, be); |
89 | 0 | assert_eq!(ar, br); |
90 | | } |
91 | | |
92 | 0 | (TypeSection(a), TypeSection(b)) => assert_eq!(a.range(), b.range()), |
93 | 0 | (ImportSection(a), ImportSection(b)) => assert_eq!(a.range(), b.range()), |
94 | 0 | (FunctionSection(a), FunctionSection(b)) => assert_eq!(a.range(), b.range()), |
95 | 0 | (TableSection(a), TableSection(b)) => assert_eq!(a.range(), b.range()), |
96 | 0 | (MemorySection(a), MemorySection(b)) => assert_eq!(a.range(), b.range()), |
97 | 0 | (GlobalSection(a), GlobalSection(b)) => assert_eq!(a.range(), b.range()), |
98 | 0 | (ExportSection(a), ExportSection(b)) => assert_eq!(a.range(), b.range()), |
99 | 0 | (TagSection(a), TagSection(b)) => assert_eq!(a.range(), b.range()), |
100 | 0 | (StartSection { func: a, range: ar }, StartSection { func: b, range: br }) => { |
101 | 0 | assert_eq!(a, b); |
102 | 0 | assert_eq!(ar, br); |
103 | | } |
104 | 0 | (ElementSection(a), ElementSection(b)) => assert_eq!(a.range(), b.range()), |
105 | | ( |
106 | | DataCountSection { |
107 | 0 | count: a, |
108 | 0 | range: ar, |
109 | | }, |
110 | | DataCountSection { |
111 | 0 | count: b, |
112 | 0 | range: br, |
113 | | }, |
114 | | ) => { |
115 | 0 | assert_eq!(a, b); |
116 | 0 | assert_eq!(ar, br); |
117 | | } |
118 | 0 | (DataSection(a), DataSection(b)) => assert_eq!(a.range(), b.range()), |
119 | 0 | (CustomSection(ca), CustomSection(cb)) => { |
120 | 0 | assert_eq!(ca.name(), cb.name()); |
121 | 0 | assert_eq!(ca.data_offset(), cb.data_offset()); |
122 | 0 | assert_eq!(ca.data(), cb.data()); |
123 | 0 | assert_eq!(ca.range(), cb.range()); |
124 | | } |
125 | | ( |
126 | | CodeSectionStart { |
127 | 0 | count: a, |
128 | 0 | range: ar, |
129 | 0 | size: asz, |
130 | | }, |
131 | | CodeSectionStart { |
132 | 0 | count: b, |
133 | 0 | range: br, |
134 | 0 | size: bsz, |
135 | | }, |
136 | | ) => { |
137 | 0 | assert_eq!(a, b); |
138 | 0 | assert_eq!(ar, br); |
139 | 0 | assert_eq!(asz, bsz); |
140 | | } |
141 | | |
142 | 0 | (CodeSectionEntry(a), CodeSectionEntry(b)) => { |
143 | 0 | assert_eq!(a.get_binary_reader().range(), b.get_binary_reader().range()); |
144 | | } |
145 | | |
146 | | ( |
147 | | ModuleSection { |
148 | 0 | parser: p, |
149 | 0 | unchecked_range: a, |
150 | | }, |
151 | | ModuleSection { |
152 | 0 | unchecked_range: b, .. |
153 | | }, |
154 | | ) => { |
155 | 0 | assert_eq!(a, b); |
156 | 0 | stack.push(parser); |
157 | 0 | parser = p; |
158 | | } |
159 | 0 | (InstanceSection(a), InstanceSection(b)) => assert_eq!(a.range(), b.range()), |
160 | | ( |
161 | | ComponentSection { |
162 | 0 | parser: p, |
163 | 0 | unchecked_range: a, |
164 | | }, |
165 | | ComponentSection { |
166 | 0 | unchecked_range: b, .. |
167 | | }, |
168 | | ) => { |
169 | 0 | assert_eq!(a, b); |
170 | 0 | stack.push(parser); |
171 | 0 | parser = p; |
172 | | } |
173 | 0 | (ComponentInstanceSection(a), ComponentInstanceSection(b)) => { |
174 | 0 | assert_eq!(a.range(), b.range()) |
175 | | } |
176 | 0 | (ComponentAliasSection(a), ComponentAliasSection(b)) => { |
177 | 0 | assert_eq!(a.range(), b.range()) |
178 | | } |
179 | 0 | (ComponentTypeSection(a), ComponentTypeSection(b)) => assert_eq!(a.range(), b.range()), |
180 | 0 | (ComponentCanonicalSection(a), ComponentCanonicalSection(b)) => { |
181 | 0 | assert_eq!(a.range(), b.range()) |
182 | | } |
183 | 0 | (ComponentStartSection { range: a, .. }, ComponentStartSection { range: b, .. }) => { |
184 | 0 | assert_eq!(a, b) |
185 | | } |
186 | 0 | (ComponentImportSection(a), ComponentImportSection(b)) => { |
187 | 0 | assert_eq!(a.range(), b.range()) |
188 | | } |
189 | 0 | (ComponentExportSection(a), ComponentExportSection(b)) => { |
190 | 0 | assert_eq!(a.range(), b.range()) |
191 | | } |
192 | 0 | (CoreTypeSection(a), CoreTypeSection(b)) => { |
193 | 0 | assert_eq!(a.range(), b.range()) |
194 | | } |
195 | | |
196 | | ( |
197 | | UnknownSection { |
198 | 0 | id: a, |
199 | 0 | contents: ac, |
200 | 0 | range: ar, |
201 | | }, |
202 | | UnknownSection { |
203 | 0 | id: b, |
204 | 0 | contents: bc, |
205 | 0 | range: br, |
206 | | }, |
207 | | ) => { |
208 | 0 | assert_eq!(a, b); |
209 | 0 | assert_eq!(ar, br); |
210 | 0 | assert_eq!(ac, bc); |
211 | | } |
212 | | |
213 | 0 | (a, b) => { |
214 | 0 | panic!("expected {b:?}\ngot {a:?}"); |
215 | | } |
216 | | } |
217 | | } |
218 | 73 | Ok(()) |
219 | 73 | } |
220 | | |
221 | | #[test] |
222 | | fn smoke() { |
223 | | super::test::test_n_times(100, run); |
224 | | } |