/src/wasm-tools/crates/wit-component/src/encoding/dedupe.rs

Source
//! Support for deduplicating module imports when turning them into a component.
//!
//! Core wasm allows for duplicate imports of the same name/field in a core wasm
//! module. This is not allowed in the component model, however, meaning that
//! such a core wasm module cannot be inserted into a component as-is. This
//! module is tasked with tackling this problem.
//!
//! The general idea of this module is to rewrite imports in-place to a
//! different and unique name. The original name is then recorded in
//! [`ModuleImportMap`] which is then plumbed through to the location that
//! classifies all imports. The classification then uses the original names for
//! what an import should be resolved to but records it under the unique names
//! generated here.

use anyhow::Result;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
use wasm_encoder::reencode::{Reencode, RoundtripReencoder};
use wasm_encoder::{ImportSection, RawSection};
use wasmparser::{Parser, Payload::*};

/// A map of current names (possibly new) to original names, if any.
#[derive(Default)]
pub struct ModuleImportMap {
    /// A two-level map for what names map to what.
    ///
    /// * The first level of the map is the "module" field, or the first string,
    ///   in a wasm import.
    /// * The second level of the map is the "name" field, or the second string,
    ///   in a wasm import.
    /// * The value of the second level is what this name was originally found
    ///   under in the original module. `None` means "same as the hash map key"
    ///   while `Some` means "it's this new name".
    ///
    /// This map is built during `ModuleImportMap::new` and serves double-duty
    /// to actually track duplicate imports.
    renamed_to_original: HashMap<String, HashMap<String, Option<String>>>,
}

impl ModuleImportMap {
    /// Creates a new [`ModuleImportMap`] tracking duplicate imports, if any,
    /// from the `wasm` provided.
    ///
    /// Upon success a new wasm binary (possibly the same as the original) is
    /// returned which is the new source of truth for this module. If duplicate
    /// imports were found then a [`ModuleImportMap`] is returned, otherwise
    /// `None` is returned.
    ///
    /// # Errors
    ///
    /// Returns an error if `wasm` could not be parsed as a wasm module.
    pub fn new<'a>(wasm: Cow<'a, [u8]>) -> Result<(Cow<'a, [u8]>, Option<ModuleImportMap>)> {
        let mut module = wasm_encoder::Module::new();
        let mut ret = ModuleImportMap::default();
        let mut found_duplicate_imports = false;

        for payload in Parser::new(0).parse_all(&wasm) {
            let payload = payload?;
            match &payload {
                Version { encoding, .. } if *encoding == wasmparser::Encoding::Component => {
                    // if this is a component let someone else deal with the
                    // error, we'll punt that up the stack.
                    assert!(!found_duplicate_imports);
                    break;
                }

                // This is the section we're interested in. Go over each import
                // and delegate to `ModuleImportMap::push_import` for figuring
                // out what to do with this import. At the end put the new
                // import section in the `module` we're building.
                ImportSection(i) => {
                    let mut new_import_section = ImportSection::new();
                    for import in i.clone().into_imports() {
                        found_duplicate_imports = ret
                            .push_import(&mut new_import_section, import?)?
                            || found_duplicate_imports;
                    }
                    module.section(&new_import_section);
                }

                // All other sections get plumbed through as-is. This ensures we
                // don't tamper with binary offsets anywhere in the module
                // except the import section, for example.
                _ => {
                    if let Some((id, range)) = payload.as_section() {
                        module.section(&RawSection {
                            id,
                            data: &wasm[range],
                        });
                    }
                }
            }
        }

        if found_duplicate_imports {
            Ok((module.finish().into(), Some(ret)))
        } else {
            Ok((wasm, None))
        }
    }

    /// Returns `Ok(true)` if this is a duplicate import, or `Ok(false)` if it's
    /// a unique import for the first time.
    fn push_import(
        &mut self,
        new_import_section: &mut ImportSection,
        import: wasmparser::Import<'_>,
    ) -> Result<bool> {
        let module_map = self
            .renamed_to_original
            .entry(import.module.to_string())
            .or_insert(HashMap::new());

        // If this import hasn't yet been seen, then great! Record that it is
        // using its original name and encode it with its original name.
        if !module_map.contains_key(import.name) {
            let prev = module_map.insert(import.name.to_string(), None);
            assert!(prev.is_none());
            RoundtripReencoder.parse_import(new_import_section, import)?;
            return Ok(false);
        }

        // FIXME: this is technically O(n^2), but it's also only applicable when
        // a module has lots of imports, and surely that won't happen often...
        // right? ... right?
        //
        // If this isn't fixed by the time someone reads this and is angry about
        // O(n^2), sorry.
        //
        // Otherwise the purpose of this loop is to determine a unique name for
        // the new import, something that hasn't previously been seen before.
        let mut new_name = import.name.to_string();
        for i in 2.. {
            new_name.truncate(import.name.len());
            write!(new_name, " [v{i}]").unwrap();
            if !module_map.contains_key(&new_name) {
                break;
            }
        }

        // Now that `new_name` is unique, record the import in the new import
        // section.
        let mut new_import = import;
        new_import.name = &new_name;
        RoundtripReencoder.parse_import(new_import_section, new_import)?;

        // Also record that `new_name` was originally known as `import.name`
        // for later lookup in `original_names` below.
        let prev = module_map.insert(new_name, Some(import.name.to_string()));
        assert!(prev.is_none());
        Ok(true)
    }

    /// Returns the original `name` that `import` should use, if any.
    ///
    /// If `None` is returned then `import.name` should be used.
    pub fn original_name(&self, import: &wasmparser::Import<'_>) -> Option<&str> {
        self.renamed_to_original
            .get(import.module)?
            .get(import.name)?
            .as_deref()
    }
}

Line	Count	Source
1		//! Support for deduplicating module imports when turning them into a component.
2		//!
3		//! Core wasm allows for duplicate imports of the same name/field in a core wasm
4		//! module. This is not allowed in the component model, however, meaning that
5		//! such a core wasm module cannot be inserted into a component as-is. This
6		//! module is tasked with tackling this problem.
7		//!
8		//! The general idea of this module is to rewrite imports in-place to a
9		//! different and unique name. The original name is then recorded in
10		//! [`ModuleImportMap`] which is then plumbed through to the location that
11		//! classifies all imports. The classification then uses the original names for
12		//! what an import should be resolved to but records it under the unique names
13		//! generated here.
14
15		use anyhow::Result;
16		use std::borrow::Cow;
17		use std::collections::HashMap;
18		use std::fmt::Write;
19		use wasm_encoder::reencode::{Reencode, RoundtripReencoder};
20		use wasm_encoder::{ImportSection, RawSection};
21		use wasmparser::{Parser, Payload::*};
22
23		/// A map of current names (possibly new) to original names, if any.
24		#[derive(Default)]
25		pub struct ModuleImportMap {
26		/// A two-level map for what names map to what.
27		///
28		/// * The first level of the map is the "module" field, or the first string,
29		/// in a wasm import.
30		/// * The second level of the map is the "name" field, or the second string,
31		/// in a wasm import.
32		/// * The value of the second level is what this name was originally found
33		/// under in the original module. `None` means "same as the hash map key"
34		/// while `Some` means "it's this new name".
35		///
36		/// This map is built during `ModuleImportMap::new` and serves double-duty
37		/// to actually track duplicate imports.
38		renamed_to_original: HashMap<String, HashMap<String, Option<String>>>,
39		}
40
41		impl ModuleImportMap {
42		/// Creates a new [`ModuleImportMap`] tracking duplicate imports, if any,
43		/// from the `wasm` provided.
44		///
45		/// Upon success a new wasm binary (possibly the same as the original) is
46		/// returned which is the new source of truth for this module. If duplicate
47		/// imports were found then a [`ModuleImportMap`] is returned, otherwise
48		/// `None` is returned.
49		///
50		/// # Errors
51		///
52		/// Returns an error if `wasm` could not be parsed as a wasm module.
53	3.74k	pub fn new<'a>(wasm: Cow<'a, [u8]>) -> Result<(Cow<'a, [u8]>, Option<ModuleImportMap>)> {
54	3.74k	let mut module = wasm_encoder::Module::new();
55	3.74k	let mut ret = ModuleImportMap::default();
56	3.74k	let mut found_duplicate_imports = false;
57
58	37.3k	for payload in Parser::new(0).parse_all(&wasm) {
59	37.3k	let payload = payload?;
60	3.74k	match &payload {
61	3.74k	Version { encoding, .. } if *encoding == wasmparser::Encoding::Component => {
62		// if this is a component let someone else deal with the
63		// error, we'll punt that up the stack.
64	0	assert!(!found_duplicate_imports);
65	0	break;
66		}
67
68		// This is the section we're interested in. Go over each import
69		// and delegate to `ModuleImportMap::push_import` for figuring
70		// out what to do with this import. At the end put the new
71		// import section in the `module` we're building.
72	1.33k	ImportSection(i) => {
73	1.33k	let mut new_import_section = ImportSection::new();
74	10.8k	for import in i.clone().into_imports() {
75	10.8k	found_duplicate_imports = ret
76	10.8k	.push_import(&mut new_import_section, import?)?
77	10.8k	\|\| found_duplicate_imports;
78		}
79	1.33k	module.section(&new_import_section);
80		}
81
82		// All other sections get plumbed through as-is. This ensures we
83		// don't tamper with binary offsets anywhere in the module
84		// except the import section, for example.
85		_ => {
86	36.0k	if let Some((id, range)) = payload.as_section() {
87	18.7k	module.section(&RawSection {
88	18.7k	id,
89	18.7k	data: &wasm[range],
90	18.7k	});
91	18.7k	}
92		}
93		}
94		}
95
96	3.74k	if found_duplicate_imports {
97	1	Ok((module.finish().into(), Some(ret)))
98		} else {
99	3.74k	Ok((wasm, None))
100		}
101	3.74k	}
102
103		/// Returns `Ok(true)` if this is a duplicate import, or `Ok(false)` if it's
104		/// a unique import for the first time.
105	10.8k	fn push_import(
106	10.8k	&mut self,
107	10.8k	new_import_section: &mut ImportSection,
108	10.8k	import: wasmparser::Import<'_>,
109	10.8k	) -> Result<bool> {
110	10.8k	let module_map = self
111	10.8k	.renamed_to_original
112	10.8k	.entry(import.module.to_string())
113	10.8k	.or_insert(HashMap::new());
114
115		// If this import hasn't yet been seen, then great! Record that it is
116		// using its original name and encode it with its original name.
117	10.8k	if !module_map.contains_key(import.name) {
118	10.8k	let prev = module_map.insert(import.name.to_string(), None);
119	10.8k	assert!(prev.is_none());
120	10.8k	RoundtripReencoder.parse_import(new_import_section, import)?;
121	10.8k	return Ok(false);
122	9	}
123
124		// FIXME: this is technically O(n^2), but it's also only applicable when
125		// a module has lots of imports, and surely that won't happen often...
126		// right? ... right?
127		//
128		// If this isn't fixed by the time someone reads this and is angry about
129		// O(n^2), sorry.
130		//
131		// Otherwise the purpose of this loop is to determine a unique name for
132		// the new import, something that hasn't previously been seen before.
133	9	let mut new_name = import.name.to_string();
134	9	for i in 2.. {
135	9	new_name.truncate(import.name.len());
136	9	write!(new_name, " [v{i}]").unwrap();
137	9	if !module_map.contains_key(&new_name) {
138	9	break;
139	0	}
140		}
141
142		// Now that `new_name` is unique, record the import in the new import
143		// section.
144	9	let mut new_import = import;
145	9	new_import.name = &new_name;
146	9	RoundtripReencoder.parse_import(new_import_section, new_import)?;
147
148		// Also record that `new_name` was originally known as `import.name`
149		// for later lookup in `original_names` below.
150	9	let prev = module_map.insert(new_name, Some(import.name.to_string()));
151	9	assert!(prev.is_none());
152	9	Ok(true)
153	10.8k	}
154
155		/// Returns the original `name` that `import` should use, if any.
156		///
157		/// If `None` is returned then `import.name` should be used.
158	97	pub fn original_name(&self, import: &wasmparser::Import<'_>) -> Option<&str> {
159	97	self.renamed_to_original
160	97	.get(import.module)?
161	97	.get(import.name)?
162	97	.as_deref()
163	97	}
164		}

Coverage Report

Created: 2026-04-24 07:45