/src/wasm-tools/crates/wit-component/src/metadata.rs
Line | Count | Source |
1 | | //! Definition for encoding of custom sections within core wasm modules of |
2 | | //! component-model related data. |
3 | | //! |
4 | | //! When creating a component from a source language the high-level process for |
5 | | //! doing this is that code will be generated into the source language by |
6 | | //! `wit-bindgen` or a similar tool which will be compiled down to core wasm. |
7 | | //! The core wasm file is then fed into `wit-component` and a component is |
8 | | //! created. This means that the componentization process is decoupled from the |
9 | | //! binding generation process and intentionally affords for linking together |
10 | | //! libraries into the main core wasm module that import different interfaces. |
11 | | //! |
12 | | //! The purpose of this module is to define an intermediate format to reside in |
13 | | //! a custom section in the core wasm output. This intermediate format is |
14 | | //! carried through the wasm linker through a custom section whose name starts |
15 | | //! with `component-type`. This custom section is created |
16 | | //! per-language-binding-generation and consumed by slurping up all the |
17 | | //! sections during the component creation process. |
18 | | //! |
19 | | //! Currently the encoding of this custom section is itself a component. The |
20 | | //! component has a single export which is a component type which represents the |
21 | | //! `world` that was bound during bindings generation. This single export is |
22 | | //! used to decode back into a `Resolve` with a WIT representation. |
23 | | //! |
24 | | //! Currently the component additionally has a custom section named |
25 | | //! `wit-component-encoding` (see `CUSTOM_SECTION_NAME`). This section is |
26 | | //! currently defined as 2 bytes: |
27 | | //! |
28 | | //! * The first byte is `CURRENT_VERSION` to help protect against future and |
29 | | //! past changes. |
30 | | //! * The second byte indicates the string encoding used for imports/exports as |
31 | | //! part of the bindings process. The mapping is defined by |
32 | | //! `encode_string_encoding`. |
33 | | //! |
34 | | //! This means that the top-level `encode` function takes a `Resolve`, a |
35 | | //! `WorldId`, and a `StringEncoding`. Note that the top-level `decode` function |
36 | | //! is slightly different because it's taking all custom sections in a core |
37 | | //! wasm binary, possibly from multiple invocations of bindgen, and unioning |
38 | | //! them all together. This means that the output is a `Bindgen` which |
39 | | //! represents the union of all previous bindings. |
40 | | //! |
41 | | //! The dual of `encode` is the `decode_custom_section` function which decodes |
42 | | //! the three arguments originally passed to `encode`. |
43 | | |
44 | | use crate::{DecodedWasm, StringEncoding}; |
45 | | use anyhow::{Context, Result, bail}; |
46 | | use indexmap::{IndexMap, IndexSet}; |
47 | | use std::borrow::Cow; |
48 | | use wasm_encoder::{ |
49 | | ComponentBuilder, ComponentExportKind, ComponentType, ComponentTypeRef, CustomSection, |
50 | | }; |
51 | | use wasm_metadata::Producers; |
52 | | use wasmparser::{BinaryReader, Encoding, Parser, Payload}; |
53 | | use wit_parser::{CloneMaps, Package, PackageName, Resolve, World, WorldId, WorldItem, WorldKey}; |
54 | | |
55 | | const CURRENT_VERSION: u8 = 0x04; |
56 | | const CUSTOM_SECTION_NAME: &str = "wit-component-encoding"; |
57 | | |
58 | | /// The result of decoding binding information from a WebAssembly binary. |
59 | | /// |
60 | | /// This structure is returned by [`decode`] and represents the interface of a |
61 | | /// WebAssembly binary. |
62 | | pub struct Bindgen { |
63 | | /// Interface and type information for this binary. |
64 | | pub resolve: Resolve, |
65 | | /// The world that was bound. |
66 | | pub world: WorldId, |
67 | | /// Metadata about this specific module that was bound. |
68 | | pub metadata: ModuleMetadata, |
69 | | /// Producer information about tools used to produce this specific module. |
70 | | pub producers: Option<Producers>, |
71 | | } |
72 | | |
73 | | impl Default for Bindgen { |
74 | 12.0k | fn default() -> Bindgen { |
75 | 12.0k | let mut resolve = Resolve::default(); |
76 | 12.0k | let package = resolve.packages.alloc(Package { |
77 | 12.0k | name: PackageName { |
78 | 12.0k | namespace: "root".to_string(), |
79 | 12.0k | name: "root".to_string(), |
80 | 12.0k | version: None, |
81 | 12.0k | }, |
82 | 12.0k | docs: Default::default(), |
83 | 12.0k | interfaces: Default::default(), |
84 | 12.0k | worlds: Default::default(), |
85 | 12.0k | }); |
86 | 12.0k | let world = resolve.worlds.alloc(World { |
87 | 12.0k | name: "root".to_string(), |
88 | 12.0k | docs: Default::default(), |
89 | 12.0k | imports: Default::default(), |
90 | 12.0k | exports: Default::default(), |
91 | 12.0k | includes: Default::default(), |
92 | 12.0k | include_names: Default::default(), |
93 | 12.0k | package: Some(package), |
94 | 12.0k | stability: Default::default(), |
95 | 12.0k | }); |
96 | 12.0k | resolve.packages[package] |
97 | 12.0k | .worlds |
98 | 12.0k | .insert("root".to_string(), world); |
99 | 12.0k | Bindgen { |
100 | 12.0k | resolve, |
101 | 12.0k | world, |
102 | 12.0k | metadata: ModuleMetadata::default(), |
103 | 12.0k | producers: None, |
104 | 12.0k | } |
105 | 12.0k | } |
106 | | } |
107 | | |
108 | | /// Module-level metadata that's specific to one core WebAssembly module. This |
109 | | /// is extracted with a [`Bindgen`]. |
110 | | #[derive(Default)] |
111 | | pub struct ModuleMetadata { |
112 | | /// Per-function options imported into the core wasm module, currently only |
113 | | /// related to string encoding. |
114 | | pub import_encodings: EncodingMap, |
115 | | |
116 | | /// Per-function options exported from the core wasm module, currently only |
117 | | /// related to string encoding. |
118 | | pub export_encodings: EncodingMap, |
119 | | } |
120 | | |
121 | | /// Internal map that keeps track of encodings for various world imports and |
122 | | /// exports. |
123 | | /// |
124 | | /// Stored in [`ModuleMetadata`]. |
125 | | #[derive(Default)] |
126 | | pub struct EncodingMap { |
127 | | /// A map of an "identifying string" for world items to what string |
128 | | /// encoding the import or export is using. |
129 | | /// |
130 | | /// The keys of this map are created by `EncodingMap::key` and are |
131 | | /// specifically chosen to be able to be looked up during both insertion and |
132 | | /// fetching. Note that in particular this map does not use `*Id` types such |
133 | | /// as `InterfaceId` from `wit_parser`. This is due to the fact that during |
134 | | /// world merging new interfaces are created for named imports (e.g. `import |
135 | | /// x: interface { ... }`) as inline interfaces are copied from one world to |
136 | | /// another. Additionally during world merging different interfaces at the |
137 | | /// same version may be deduplicated. |
138 | | /// |
139 | | /// For these reasons a string-based key is chosen to avoid juggling IDs |
140 | | /// through the world merging process. Additionally versions are chopped off |
141 | | /// for now to help with a problem such as: |
142 | | /// |
143 | | /// * The main module imports a:b/c@0.1.0 |
144 | | /// * An adapter imports a:b/c@0.1.1 |
145 | | /// * The final world uses a:b/c@0.1.1, but the main module has no |
146 | | /// encoding listed for that exact item. |
147 | | /// |
148 | | /// By chopping off versions this is able to get everything registered |
149 | | /// correctly even in the fact of merging interfaces and worlds. |
150 | | encodings: IndexMap<String, StringEncoding>, |
151 | | } |
152 | | |
153 | | impl EncodingMap { |
154 | 16.1k | fn insert_all( |
155 | 16.1k | &mut self, |
156 | 16.1k | resolve: &Resolve, |
157 | 16.1k | set: &IndexMap<WorldKey, WorldItem>, |
158 | 16.1k | encoding: StringEncoding, |
159 | 16.1k | ) { |
160 | 28.2k | for (name, item) in set { |
161 | 12.1k | match item { |
162 | 2.46k | WorldItem::Function(func) => { |
163 | 2.46k | let key = self.key(resolve, name, &func.name); |
164 | 2.46k | self.encodings.insert(key, encoding); |
165 | 2.46k | } |
166 | 2.22k | WorldItem::Interface { id, .. } => { |
167 | 3.58k | for (func, _) in resolve.interfaces[*id].functions.iter() { |
168 | 3.58k | let key = self.key(resolve, name, func); |
169 | 3.58k | self.encodings.insert(key, encoding); |
170 | 3.58k | } |
171 | | } |
172 | 7.42k | WorldItem::Type(_) => {} |
173 | | } |
174 | | } |
175 | 16.1k | } |
176 | | |
177 | | /// Looks up the encoding of the function `func` which is scoped under `key` |
178 | | /// in the world in question. |
179 | 2.08k | pub fn get(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> Option<StringEncoding> { |
180 | 2.08k | let key = self.key(resolve, key, func); |
181 | 2.08k | self.encodings.get(&key).copied() |
182 | 2.08k | } |
183 | | |
184 | 8.12k | fn key(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> String { |
185 | 8.12k | format!( |
186 | 8.12k | "{}/{func}", |
187 | 8.12k | match key { |
188 | 6.30k | WorldKey::Name(name) => name.to_string(), |
189 | 1.82k | WorldKey::Interface(id) => { |
190 | 1.82k | let iface = &resolve.interfaces[*id]; |
191 | 1.82k | let pkg = &resolve.packages[iface.package.unwrap()]; |
192 | 1.82k | format!( |
193 | 1.82k | "{}:{}/{}", |
194 | | pkg.name.namespace, |
195 | | pkg.name.name, |
196 | 1.82k | iface.name.as_ref().unwrap() |
197 | | ) |
198 | | } |
199 | | } |
200 | | ) |
201 | 8.12k | } |
202 | | |
203 | 24.3k | fn merge(&mut self, other: EncodingMap) -> Result<()> { |
204 | 33.5k | for (key, encoding) in other.encodings { |
205 | 9.13k | if let Some(prev) = self.encodings.insert(key.clone(), encoding) { |
206 | 17 | if prev != encoding { |
207 | 0 | bail!("conflicting string encodings specified for `{key}`"); |
208 | 17 | } |
209 | 9.11k | } |
210 | | } |
211 | 24.3k | Ok(()) |
212 | 24.3k | } |
213 | | } |
214 | | |
215 | | /// This function will parse the core `wasm` binary given as input and return a |
216 | | /// [`Bindgen`] which extracts the custom sections describing component-level |
217 | | /// types from within the binary itself. |
218 | | /// |
219 | | /// This is used to parse the output of `wit-bindgen`-generated modules and is |
220 | | /// one of the earliest phases in transitioning such a module to a component. |
221 | | /// The extraction here provides the metadata necessary to continue the process |
222 | | /// later on. |
223 | | /// |
224 | | /// This will return an error if `wasm` is not a valid WebAssembly module. |
225 | | /// |
226 | | /// If a `component-type` custom section was found then a new binary is |
227 | | /// optionally returned with the custom sections stripped out. If no |
228 | | /// `component-type` custom sections are found then `None` is returned. |
229 | 8.06k | pub fn decode(wasm: &[u8]) -> Result<(Option<Vec<u8>>, Bindgen)> { |
230 | 8.06k | let mut ret = Bindgen::default(); |
231 | 8.06k | let mut new_module = wasm_encoder::Module::new(); |
232 | | |
233 | 8.06k | let mut found_custom = false; |
234 | 88.1k | for payload in wasmparser::Parser::new(0).parse_all(wasm) { |
235 | 88.1k | let payload = payload.context("decoding item in module")?; |
236 | 8.06k | match payload { |
237 | 8.06k | wasmparser::Payload::CustomSection(cs) if cs.name().starts_with("component-type") => { |
238 | 8.06k | let data = Bindgen::decode_custom_section(cs.data()) |
239 | 8.06k | .with_context(|| format!("decoding custom section {}", cs.name()))?; |
240 | 8.06k | ret.merge(data) |
241 | 8.06k | .with_context(|| format!("updating metadata for section {}", cs.name()))?; |
242 | 8.06k | found_custom = true; |
243 | | } |
244 | 8.06k | wasmparser::Payload::Version { encoding, .. } if encoding != Encoding::Module => { |
245 | 0 | bail!("decoding a component is not supported") |
246 | | } |
247 | | _ => { |
248 | 80.0k | if let Some((id, range)) = payload.as_section() { |
249 | 43.1k | new_module.section(&wasm_encoder::RawSection { |
250 | 43.1k | id, |
251 | 43.1k | data: &wasm[range], |
252 | 43.1k | }); |
253 | 43.1k | } |
254 | | } |
255 | | } |
256 | | } |
257 | | |
258 | 8.06k | if found_custom { |
259 | 8.06k | Ok((Some(new_module.finish()), ret)) |
260 | | } else { |
261 | 0 | Ok((None, ret)) |
262 | | } |
263 | 8.06k | } |
264 | | |
265 | | /// Creates a `component-type*` custom section to be decoded by `decode` above. |
266 | | /// |
267 | | /// This is primarily created by wit-bindgen-based guest generators to embed |
268 | | /// into the final core wasm binary. The core wasm binary is later fed |
269 | | /// through `wit-component` to produce the actual component where this returned |
270 | | /// section will be decoded. |
271 | 4.03k | pub fn encode( |
272 | 4.03k | resolve: &Resolve, |
273 | 4.03k | world: WorldId, |
274 | 4.03k | string_encoding: StringEncoding, |
275 | 4.03k | extra_producers: Option<&Producers>, |
276 | 4.03k | ) -> Result<Vec<u8>> { |
277 | 4.03k | let ty = crate::encoding::encode_world(resolve, world)?; |
278 | | |
279 | 4.03k | let world = &resolve.worlds[world]; |
280 | 4.03k | let mut outer_ty = ComponentType::new(); |
281 | 4.03k | outer_ty.ty().component(&ty); |
282 | 4.03k | outer_ty.export( |
283 | 4.03k | &resolve.id_of_name(world.package.unwrap(), &world.name), |
284 | 4.03k | ComponentTypeRef::Component(0), |
285 | | ); |
286 | | |
287 | 4.03k | let mut builder = ComponentBuilder::default(); |
288 | | |
289 | 4.03k | let string_encoding = encode_string_encoding(string_encoding); |
290 | 4.03k | builder.custom_section(&CustomSection { |
291 | 4.03k | name: CUSTOM_SECTION_NAME.into(), |
292 | 4.03k | data: Cow::Borrowed(&[CURRENT_VERSION, string_encoding]), |
293 | 4.03k | }); |
294 | | |
295 | 4.03k | let ty = builder.type_component(None, &outer_ty); |
296 | 4.03k | builder.export(&world.name, ComponentExportKind::Type, ty, None); |
297 | | |
298 | 4.03k | let mut producers = crate::base_producers(); |
299 | 4.03k | if let Some(p) = extra_producers { |
300 | 0 | producers.merge(&p); |
301 | 4.03k | } |
302 | 4.03k | builder.raw_custom_section(&producers.raw_custom_section()); |
303 | 4.03k | Ok(builder.finish()) |
304 | 4.03k | } |
305 | | |
306 | 8.06k | fn decode_custom_section(wasm: &[u8]) -> Result<(Resolve, WorldId, StringEncoding)> { |
307 | 8.06k | let (resolve, world) = wit_parser::decoding::decode_world(wasm)?; |
308 | 8.06k | let mut custom_section = None; |
309 | | |
310 | 48.3k | for payload in Parser::new(0).parse_all(wasm) { |
311 | 48.3k | match payload? { |
312 | 16.1k | Payload::CustomSection(s) if s.name() == CUSTOM_SECTION_NAME => { |
313 | 8.06k | custom_section = Some(s.data()); |
314 | 8.06k | } |
315 | 40.3k | _ => {} |
316 | | } |
317 | | } |
318 | 8.06k | let string_encoding = match custom_section { |
319 | 0 | None => bail!("missing custom section of name `{CUSTOM_SECTION_NAME}`"), |
320 | 8.06k | Some([CURRENT_VERSION, byte]) => decode_string_encoding(*byte)?, |
321 | 0 | Some([]) => bail!("custom section `{CUSTOM_SECTION_NAME}` in unknown format"), |
322 | 0 | Some([version, ..]) => bail!( |
323 | 0 | "custom section `{CUSTOM_SECTION_NAME}` uses format {version} but only {CURRENT_VERSION} is supported" |
324 | | ), |
325 | | }; |
326 | 8.06k | Ok((resolve, world, string_encoding)) |
327 | 8.06k | } |
328 | | |
329 | 4.03k | fn encode_string_encoding(e: StringEncoding) -> u8 { |
330 | 4.03k | match e { |
331 | 4.03k | StringEncoding::UTF8 => 0x00, |
332 | 0 | StringEncoding::UTF16 => 0x01, |
333 | 0 | StringEncoding::CompactUTF16 => 0x02, |
334 | | } |
335 | 4.03k | } |
336 | | |
337 | 8.06k | fn decode_string_encoding(byte: u8) -> Result<StringEncoding> { |
338 | 8.06k | match byte { |
339 | 8.06k | 0x00 => Ok(StringEncoding::UTF8), |
340 | 0 | 0x01 => Ok(StringEncoding::UTF16), |
341 | 0 | 0x02 => Ok(StringEncoding::CompactUTF16), |
342 | 0 | byte => bail!("invalid string encoding {byte:#x}"), |
343 | | } |
344 | 8.06k | } |
345 | | |
346 | | impl Bindgen { |
347 | 8.06k | fn decode_custom_section(data: &[u8]) -> Result<Bindgen> { |
348 | | let wasm; |
349 | | let world; |
350 | | let resolve; |
351 | | let encoding; |
352 | | |
353 | 8.06k | let mut reader = BinaryReader::new(data, 0); |
354 | 8.06k | match reader.read_u8()? { |
355 | | // Historical 0x03 format where the support here will be deleted in |
356 | | // the future |
357 | | 0x03 => { |
358 | 0 | encoding = decode_string_encoding(reader.read_u8()?)?; |
359 | 0 | let world_name = reader.read_string()?; |
360 | 0 | wasm = &data[reader.original_position()..]; |
361 | | |
362 | 0 | let (r, pkg) = match crate::decode(wasm)? { |
363 | 0 | DecodedWasm::WitPackage(resolve, pkgs) => (resolve, pkgs), |
364 | 0 | DecodedWasm::Component(..) => bail!("expected encoded wit package(s)"), |
365 | | }; |
366 | 0 | resolve = r; |
367 | 0 | world = resolve.select_world(&[pkg], Some(world_name.into()))?; |
368 | | } |
369 | | |
370 | | // Current format where `data` is a wasm component itself. |
371 | | _ => { |
372 | 8.06k | wasm = data; |
373 | 8.06k | (resolve, world, encoding) = decode_custom_section(wasm)?; |
374 | | } |
375 | | } |
376 | | |
377 | | Ok(Bindgen { |
378 | 8.06k | metadata: ModuleMetadata::new(&resolve, world, encoding), |
379 | 8.06k | producers: wasm_metadata::Producers::from_wasm(wasm)?, |
380 | 8.06k | resolve, |
381 | 8.06k | world, |
382 | | }) |
383 | 8.06k | } |
384 | | |
385 | | /// Merges another `BindgenMetadata` into this one. |
386 | | /// |
387 | | /// This operation is intended to be akin to "merging worlds" when the |
388 | | /// abstraction level for that is what we're working at here. For now the |
389 | | /// merge operation only succeeds if the two metadata descriptions are |
390 | | /// entirely disjoint. |
391 | | /// |
392 | | /// Note that at this time there's no support for changing string encodings |
393 | | /// between metadata. |
394 | | /// |
395 | | /// This function returns the set of exports that the main world of |
396 | | /// `other` added to the world in `self`. |
397 | 12.1k | pub fn merge(&mut self, other: Bindgen) -> Result<IndexSet<WorldKey>> { |
398 | | let Bindgen { |
399 | 12.1k | resolve, |
400 | 12.1k | world, |
401 | | metadata: |
402 | | ModuleMetadata { |
403 | 12.1k | import_encodings, |
404 | 12.1k | export_encodings, |
405 | | }, |
406 | 12.1k | producers, |
407 | 12.1k | } = other; |
408 | | |
409 | 12.1k | let remap = self |
410 | 12.1k | .resolve |
411 | 12.1k | .merge(resolve) |
412 | 12.1k | .context("failed to merge WIT package sets together")?; |
413 | 12.1k | let world = remap.map_world(world, None)?; |
414 | 12.1k | let exports = self.resolve.worlds[world].exports.keys().cloned().collect(); |
415 | 12.1k | self.resolve |
416 | 12.1k | .merge_worlds(world, self.world, &mut CloneMaps::default()) |
417 | 12.1k | .context("failed to merge worlds from two documents")?; |
418 | | |
419 | 12.1k | self.metadata.import_encodings.merge(import_encodings)?; |
420 | 12.1k | self.metadata.export_encodings.merge(export_encodings)?; |
421 | 12.1k | if let Some(producers) = producers { |
422 | 12.1k | if let Some(mine) = &mut self.producers { |
423 | 89 | mine.merge(&producers); |
424 | 12.0k | } else { |
425 | 12.0k | self.producers = Some(producers); |
426 | 12.0k | } |
427 | 0 | } |
428 | | |
429 | 12.1k | Ok(exports) |
430 | 12.1k | } |
431 | | } |
432 | | |
433 | | impl ModuleMetadata { |
434 | | /// Creates a new `ModuleMetadata` instance holding the given set of |
435 | | /// interfaces which are expected to all use the `encoding` specified. |
436 | 8.06k | pub fn new(resolve: &Resolve, world: WorldId, encoding: StringEncoding) -> ModuleMetadata { |
437 | 8.06k | let mut ret = ModuleMetadata::default(); |
438 | | |
439 | 8.06k | let world = &resolve.worlds[world]; |
440 | 8.06k | ret.export_encodings |
441 | 8.06k | .insert_all(resolve, &world.exports, encoding); |
442 | 8.06k | ret.import_encodings |
443 | 8.06k | .insert_all(resolve, &world.imports, encoding); |
444 | | |
445 | 8.06k | ret |
446 | 8.06k | } |
447 | | } |