Coverage Report

Created: 2025-12-09 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wasm-tools/crates/wit-component/src/metadata.rs
Line
Count
Source
1
//! Definition for encoding of custom sections within core wasm modules of
2
//! component-model related data.
3
//!
4
//! When creating a component from a source language the high-level process for
5
//! doing this is that code will be generated into the source language by
6
//! `wit-bindgen` or a similar tool which will be compiled down to core wasm.
7
//! The core wasm file is then fed into `wit-component` and a component is
8
//! created. This means that the componentization process is decoupled from the
9
//! binding generation process and intentionally affords for linking together
10
//! libraries into the main core wasm module that import different interfaces.
11
//!
12
//! The purpose of this module is to define an intermediate format to reside in
13
//! a custom section in the core wasm output. This intermediate format is
14
//! carried through the wasm linker through a custom section whose name starts
15
//! with `component-type`. This custom section is created
16
//! per-language-binding-generation and consumed by slurping up all the
17
//! sections during the component creation process.
18
//!
19
//! Currently the encoding of this custom section is itself a component. The
20
//! component has a single export which is a component type which represents the
21
//! `world` that was bound during bindings generation. This single export is
22
//! used to decode back into a `Resolve` with a WIT representation.
23
//!
24
//! Currently the component additionally has a custom section named
25
//! `wit-component-encoding` (see `CUSTOM_SECTION_NAME`). This section is
26
//! currently defined as 2 bytes:
27
//!
28
//! * The first byte is `CURRENT_VERSION` to help protect against future and
29
//!   past changes.
30
//! * The second byte indicates the string encoding used for imports/exports as
31
//!   part of the bindings process. The mapping is defined by
32
//!   `encode_string_encoding`.
33
//!
34
//! This means that the top-level `encode` function takes a `Resolve`, a
35
//! `WorldId`, and a `StringEncoding`. Note that the top-level `decode` function
36
//! is slightly different because it's taking all custom sections in a core
37
//! wasm binary, possibly from multiple invocations of bindgen, and unioning
38
//! them all together. This means that the output is a `Bindgen` which
39
//! represents the union of all previous bindings.
40
//!
41
//! The dual of `encode` is the `decode_custom_section` function which decodes
42
//! the three arguments originally passed to `encode`.
43
44
use crate::{DecodedWasm, StringEncoding};
45
use anyhow::{Context, Result, bail};
46
use indexmap::{IndexMap, IndexSet};
47
use std::borrow::Cow;
48
use wasm_encoder::{
49
    ComponentBuilder, ComponentExportKind, ComponentType, ComponentTypeRef, CustomSection,
50
};
51
use wasm_metadata::Producers;
52
use wasmparser::{BinaryReader, Encoding, Parser, Payload};
53
use wit_parser::{CloneMaps, Package, PackageName, Resolve, World, WorldId, WorldItem, WorldKey};
54
55
const CURRENT_VERSION: u8 = 0x04;
56
const CUSTOM_SECTION_NAME: &str = "wit-component-encoding";
57
58
/// The result of decoding binding information from a WebAssembly binary.
59
///
60
/// This structure is returned by [`decode`] and represents the interface of a
61
/// WebAssembly binary.
62
pub struct Bindgen {
63
    /// Interface and type information for this binary.
64
    pub resolve: Resolve,
65
    /// The world that was bound.
66
    pub world: WorldId,
67
    /// Metadata about this specific module that was bound.
68
    pub metadata: ModuleMetadata,
69
    /// Producer information about tools used to produce this specific module.
70
    pub producers: Option<Producers>,
71
}
72
73
impl Default for Bindgen {
74
12.0k
    fn default() -> Bindgen {
75
12.0k
        let mut resolve = Resolve::default();
76
12.0k
        let package = resolve.packages.alloc(Package {
77
12.0k
            name: PackageName {
78
12.0k
                namespace: "root".to_string(),
79
12.0k
                name: "root".to_string(),
80
12.0k
                version: None,
81
12.0k
            },
82
12.0k
            docs: Default::default(),
83
12.0k
            interfaces: Default::default(),
84
12.0k
            worlds: Default::default(),
85
12.0k
        });
86
12.0k
        let world = resolve.worlds.alloc(World {
87
12.0k
            name: "root".to_string(),
88
12.0k
            docs: Default::default(),
89
12.0k
            imports: Default::default(),
90
12.0k
            exports: Default::default(),
91
12.0k
            includes: Default::default(),
92
12.0k
            include_names: Default::default(),
93
12.0k
            package: Some(package),
94
12.0k
            stability: Default::default(),
95
12.0k
        });
96
12.0k
        resolve.packages[package]
97
12.0k
            .worlds
98
12.0k
            .insert("root".to_string(), world);
99
12.0k
        Bindgen {
100
12.0k
            resolve,
101
12.0k
            world,
102
12.0k
            metadata: ModuleMetadata::default(),
103
12.0k
            producers: None,
104
12.0k
        }
105
12.0k
    }
106
}
107
108
/// Module-level metadata that's specific to one core WebAssembly module. This
109
/// is extracted with a [`Bindgen`].
110
#[derive(Default)]
111
pub struct ModuleMetadata {
112
    /// Per-function options imported into the core wasm module, currently only
113
    /// related to string encoding.
114
    pub import_encodings: EncodingMap,
115
116
    /// Per-function options exported from the core wasm module, currently only
117
    /// related to string encoding.
118
    pub export_encodings: EncodingMap,
119
}
120
121
/// Internal map that keeps track of encodings for various world imports and
122
/// exports.
123
///
124
/// Stored in [`ModuleMetadata`].
125
#[derive(Default)]
126
pub struct EncodingMap {
127
    /// A map of an "identifying string" for world items to what string
128
    /// encoding the import or export is using.
129
    ///
130
    /// The keys of this map are created by `EncodingMap::key` and are
131
    /// specifically chosen to be able to be looked up during both insertion and
132
    /// fetching. Note that in particular this map does not use `*Id` types such
133
    /// as `InterfaceId` from `wit_parser`. This is due to the fact that during
134
    /// world merging new interfaces are created for named imports (e.g. `import
135
    /// x: interface { ... }`) as inline interfaces are copied from one world to
136
    /// another. Additionally during world merging different interfaces at the
137
    /// same version may be deduplicated.
138
    ///
139
    /// For these reasons a string-based key is chosen to avoid juggling IDs
140
    /// through the world merging process. Additionally versions are chopped off
141
    /// for now to help with a problem such as:
142
    ///
143
    /// * The main module imports a:b/c@0.1.0
144
    /// * An adapter imports a:b/c@0.1.1
145
    /// * The final world uses a:b/c@0.1.1, but the main module has no
146
    ///   encoding listed for that exact item.
147
    ///
148
    /// By chopping off versions this is able to get everything registered
149
    /// correctly even in the fact of merging interfaces and worlds.
150
    encodings: IndexMap<String, StringEncoding>,
151
}
152
153
impl EncodingMap {
154
16.1k
    fn insert_all(
155
16.1k
        &mut self,
156
16.1k
        resolve: &Resolve,
157
16.1k
        set: &IndexMap<WorldKey, WorldItem>,
158
16.1k
        encoding: StringEncoding,
159
16.1k
    ) {
160
28.2k
        for (name, item) in set {
161
12.1k
            match item {
162
2.46k
                WorldItem::Function(func) => {
163
2.46k
                    let key = self.key(resolve, name, &func.name);
164
2.46k
                    self.encodings.insert(key, encoding);
165
2.46k
                }
166
2.22k
                WorldItem::Interface { id, .. } => {
167
3.58k
                    for (func, _) in resolve.interfaces[*id].functions.iter() {
168
3.58k
                        let key = self.key(resolve, name, func);
169
3.58k
                        self.encodings.insert(key, encoding);
170
3.58k
                    }
171
                }
172
7.42k
                WorldItem::Type(_) => {}
173
            }
174
        }
175
16.1k
    }
176
177
    /// Looks up the encoding of the function `func` which is scoped under `key`
178
    /// in the world in question.
179
2.08k
    pub fn get(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> Option<StringEncoding> {
180
2.08k
        let key = self.key(resolve, key, func);
181
2.08k
        self.encodings.get(&key).copied()
182
2.08k
    }
183
184
8.12k
    fn key(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> String {
185
8.12k
        format!(
186
8.12k
            "{}/{func}",
187
8.12k
            match key {
188
6.30k
                WorldKey::Name(name) => name.to_string(),
189
1.82k
                WorldKey::Interface(id) => {
190
1.82k
                    let iface = &resolve.interfaces[*id];
191
1.82k
                    let pkg = &resolve.packages[iface.package.unwrap()];
192
1.82k
                    format!(
193
1.82k
                        "{}:{}/{}",
194
                        pkg.name.namespace,
195
                        pkg.name.name,
196
1.82k
                        iface.name.as_ref().unwrap()
197
                    )
198
                }
199
            }
200
        )
201
8.12k
    }
202
203
24.3k
    fn merge(&mut self, other: EncodingMap) -> Result<()> {
204
33.5k
        for (key, encoding) in other.encodings {
205
9.13k
            if let Some(prev) = self.encodings.insert(key.clone(), encoding) {
206
17
                if prev != encoding {
207
0
                    bail!("conflicting string encodings specified for `{key}`");
208
17
                }
209
9.11k
            }
210
        }
211
24.3k
        Ok(())
212
24.3k
    }
213
}
214
215
/// This function will parse the core `wasm` binary given as input and return a
216
/// [`Bindgen`] which extracts the custom sections describing component-level
217
/// types from within the binary itself.
218
///
219
/// This is used to parse the output of `wit-bindgen`-generated modules and is
220
/// one of the earliest phases in transitioning such a module to a component.
221
/// The extraction here provides the metadata necessary to continue the process
222
/// later on.
223
///
224
/// This will return an error if `wasm` is not a valid WebAssembly module.
225
///
226
/// If a `component-type` custom section was found then a new binary is
227
/// optionally returned with the custom sections stripped out. If no
228
/// `component-type` custom sections are found then `None` is returned.
229
8.06k
pub fn decode(wasm: &[u8]) -> Result<(Option<Vec<u8>>, Bindgen)> {
230
8.06k
    let mut ret = Bindgen::default();
231
8.06k
    let mut new_module = wasm_encoder::Module::new();
232
233
8.06k
    let mut found_custom = false;
234
88.1k
    for payload in wasmparser::Parser::new(0).parse_all(wasm) {
235
88.1k
        let payload = payload.context("decoding item in module")?;
236
8.06k
        match payload {
237
8.06k
            wasmparser::Payload::CustomSection(cs) if cs.name().starts_with("component-type") => {
238
8.06k
                let data = Bindgen::decode_custom_section(cs.data())
239
8.06k
                    .with_context(|| format!("decoding custom section {}", cs.name()))?;
240
8.06k
                ret.merge(data)
241
8.06k
                    .with_context(|| format!("updating metadata for section {}", cs.name()))?;
242
8.06k
                found_custom = true;
243
            }
244
8.06k
            wasmparser::Payload::Version { encoding, .. } if encoding != Encoding::Module => {
245
0
                bail!("decoding a component is not supported")
246
            }
247
            _ => {
248
80.0k
                if let Some((id, range)) = payload.as_section() {
249
43.1k
                    new_module.section(&wasm_encoder::RawSection {
250
43.1k
                        id,
251
43.1k
                        data: &wasm[range],
252
43.1k
                    });
253
43.1k
                }
254
            }
255
        }
256
    }
257
258
8.06k
    if found_custom {
259
8.06k
        Ok((Some(new_module.finish()), ret))
260
    } else {
261
0
        Ok((None, ret))
262
    }
263
8.06k
}
264
265
/// Creates a `component-type*` custom section to be decoded by `decode` above.
266
///
267
/// This is primarily created by wit-bindgen-based guest generators to embed
268
/// into the final core wasm binary. The core wasm binary is later fed
269
/// through `wit-component` to produce the actual component where this returned
270
/// section will be decoded.
271
4.03k
pub fn encode(
272
4.03k
    resolve: &Resolve,
273
4.03k
    world: WorldId,
274
4.03k
    string_encoding: StringEncoding,
275
4.03k
    extra_producers: Option<&Producers>,
276
4.03k
) -> Result<Vec<u8>> {
277
4.03k
    let ty = crate::encoding::encode_world(resolve, world)?;
278
279
4.03k
    let world = &resolve.worlds[world];
280
4.03k
    let mut outer_ty = ComponentType::new();
281
4.03k
    outer_ty.ty().component(&ty);
282
4.03k
    outer_ty.export(
283
4.03k
        &resolve.id_of_name(world.package.unwrap(), &world.name),
284
4.03k
        ComponentTypeRef::Component(0),
285
    );
286
287
4.03k
    let mut builder = ComponentBuilder::default();
288
289
4.03k
    let string_encoding = encode_string_encoding(string_encoding);
290
4.03k
    builder.custom_section(&CustomSection {
291
4.03k
        name: CUSTOM_SECTION_NAME.into(),
292
4.03k
        data: Cow::Borrowed(&[CURRENT_VERSION, string_encoding]),
293
4.03k
    });
294
295
4.03k
    let ty = builder.type_component(None, &outer_ty);
296
4.03k
    builder.export(&world.name, ComponentExportKind::Type, ty, None);
297
298
4.03k
    let mut producers = crate::base_producers();
299
4.03k
    if let Some(p) = extra_producers {
300
0
        producers.merge(&p);
301
4.03k
    }
302
4.03k
    builder.raw_custom_section(&producers.raw_custom_section());
303
4.03k
    Ok(builder.finish())
304
4.03k
}
305
306
8.06k
fn decode_custom_section(wasm: &[u8]) -> Result<(Resolve, WorldId, StringEncoding)> {
307
8.06k
    let (resolve, world) = wit_parser::decoding::decode_world(wasm)?;
308
8.06k
    let mut custom_section = None;
309
310
48.3k
    for payload in Parser::new(0).parse_all(wasm) {
311
48.3k
        match payload? {
312
16.1k
            Payload::CustomSection(s) if s.name() == CUSTOM_SECTION_NAME => {
313
8.06k
                custom_section = Some(s.data());
314
8.06k
            }
315
40.3k
            _ => {}
316
        }
317
    }
318
8.06k
    let string_encoding = match custom_section {
319
0
        None => bail!("missing custom section of name `{CUSTOM_SECTION_NAME}`"),
320
8.06k
        Some([CURRENT_VERSION, byte]) => decode_string_encoding(*byte)?,
321
0
        Some([]) => bail!("custom section `{CUSTOM_SECTION_NAME}` in unknown format"),
322
0
        Some([version, ..]) => bail!(
323
0
            "custom section `{CUSTOM_SECTION_NAME}` uses format {version} but only {CURRENT_VERSION} is supported"
324
        ),
325
    };
326
8.06k
    Ok((resolve, world, string_encoding))
327
8.06k
}
328
329
4.03k
fn encode_string_encoding(e: StringEncoding) -> u8 {
330
4.03k
    match e {
331
4.03k
        StringEncoding::UTF8 => 0x00,
332
0
        StringEncoding::UTF16 => 0x01,
333
0
        StringEncoding::CompactUTF16 => 0x02,
334
    }
335
4.03k
}
336
337
8.06k
fn decode_string_encoding(byte: u8) -> Result<StringEncoding> {
338
8.06k
    match byte {
339
8.06k
        0x00 => Ok(StringEncoding::UTF8),
340
0
        0x01 => Ok(StringEncoding::UTF16),
341
0
        0x02 => Ok(StringEncoding::CompactUTF16),
342
0
        byte => bail!("invalid string encoding {byte:#x}"),
343
    }
344
8.06k
}
345
346
impl Bindgen {
347
8.06k
    fn decode_custom_section(data: &[u8]) -> Result<Bindgen> {
348
        let wasm;
349
        let world;
350
        let resolve;
351
        let encoding;
352
353
8.06k
        let mut reader = BinaryReader::new(data, 0);
354
8.06k
        match reader.read_u8()? {
355
            // Historical 0x03 format where the support here will be deleted in
356
            // the future
357
            0x03 => {
358
0
                encoding = decode_string_encoding(reader.read_u8()?)?;
359
0
                let world_name = reader.read_string()?;
360
0
                wasm = &data[reader.original_position()..];
361
362
0
                let (r, pkg) = match crate::decode(wasm)? {
363
0
                    DecodedWasm::WitPackage(resolve, pkgs) => (resolve, pkgs),
364
0
                    DecodedWasm::Component(..) => bail!("expected encoded wit package(s)"),
365
                };
366
0
                resolve = r;
367
0
                world = resolve.select_world(&[pkg], Some(world_name.into()))?;
368
            }
369
370
            // Current format where `data` is a wasm component itself.
371
            _ => {
372
8.06k
                wasm = data;
373
8.06k
                (resolve, world, encoding) = decode_custom_section(wasm)?;
374
            }
375
        }
376
377
        Ok(Bindgen {
378
8.06k
            metadata: ModuleMetadata::new(&resolve, world, encoding),
379
8.06k
            producers: wasm_metadata::Producers::from_wasm(wasm)?,
380
8.06k
            resolve,
381
8.06k
            world,
382
        })
383
8.06k
    }
384
385
    /// Merges another `BindgenMetadata` into this one.
386
    ///
387
    /// This operation is intended to be akin to "merging worlds" when the
388
    /// abstraction level for that is what we're working at here. For now the
389
    /// merge operation only succeeds if the two metadata descriptions are
390
    /// entirely disjoint.
391
    ///
392
    /// Note that at this time there's no support for changing string encodings
393
    /// between metadata.
394
    ///
395
    /// This function returns the set of exports that the main world of
396
    /// `other` added to the world in `self`.
397
12.1k
    pub fn merge(&mut self, other: Bindgen) -> Result<IndexSet<WorldKey>> {
398
        let Bindgen {
399
12.1k
            resolve,
400
12.1k
            world,
401
            metadata:
402
                ModuleMetadata {
403
12.1k
                    import_encodings,
404
12.1k
                    export_encodings,
405
                },
406
12.1k
            producers,
407
12.1k
        } = other;
408
409
12.1k
        let remap = self
410
12.1k
            .resolve
411
12.1k
            .merge(resolve)
412
12.1k
            .context("failed to merge WIT package sets together")?;
413
12.1k
        let world = remap.map_world(world, None)?;
414
12.1k
        let exports = self.resolve.worlds[world].exports.keys().cloned().collect();
415
12.1k
        self.resolve
416
12.1k
            .merge_worlds(world, self.world, &mut CloneMaps::default())
417
12.1k
            .context("failed to merge worlds from two documents")?;
418
419
12.1k
        self.metadata.import_encodings.merge(import_encodings)?;
420
12.1k
        self.metadata.export_encodings.merge(export_encodings)?;
421
12.1k
        if let Some(producers) = producers {
422
12.1k
            if let Some(mine) = &mut self.producers {
423
89
                mine.merge(&producers);
424
12.0k
            } else {
425
12.0k
                self.producers = Some(producers);
426
12.0k
            }
427
0
        }
428
429
12.1k
        Ok(exports)
430
12.1k
    }
431
}
432
433
impl ModuleMetadata {
434
    /// Creates a new `ModuleMetadata` instance holding the given set of
435
    /// interfaces which are expected to all use the `encoding` specified.
436
8.06k
    pub fn new(resolve: &Resolve, world: WorldId, encoding: StringEncoding) -> ModuleMetadata {
437
8.06k
        let mut ret = ModuleMetadata::default();
438
439
8.06k
        let world = &resolve.worlds[world];
440
8.06k
        ret.export_encodings
441
8.06k
            .insert_all(resolve, &world.exports, encoding);
442
8.06k
        ret.import_encodings
443
8.06k
            .insert_all(resolve, &world.imports, encoding);
444
445
8.06k
        ret
446
8.06k
    }
447
}