/src/wasm-tools/crates/wit-component/src/encoding/dedupe.rs
Line | Count | Source |
1 | | //! Support for deduplicating module imports when turning them into a component. |
2 | | //! |
3 | | //! Core wasm allows for duplicate imports of the same name/field in a core wasm |
4 | | //! module. This is not allowed in the component model, however, meaning that |
5 | | //! such a core wasm module cannot be inserted into a component as-is. This |
6 | | //! module is tasked with tackling this problem. |
7 | | //! |
8 | | //! The general idea of this module is to rewrite imports in-place to a |
9 | | //! different and unique name. The original name is then recorded in |
10 | | //! [`ModuleImportMap`] which is then plumbed through to the location that |
11 | | //! classifies all imports. The classification then uses the original names for |
12 | | //! what an import should be resolved to but records it under the unique names |
13 | | //! generated here. |
14 | | |
15 | | use anyhow::Result; |
16 | | use std::borrow::Cow; |
17 | | use std::collections::HashMap; |
18 | | use std::fmt::Write; |
19 | | use wasm_encoder::reencode::{Reencode, RoundtripReencoder}; |
20 | | use wasm_encoder::{ImportSection, RawSection}; |
21 | | use wasmparser::{Parser, Payload::*}; |
22 | | |
23 | | /// A map of current names (possibly new) to original names, if any. |
24 | | #[derive(Default)] |
25 | | pub struct ModuleImportMap { |
26 | | /// A two-level map for what names map to what. |
27 | | /// |
28 | | /// * The first level of the map is the "module" field, or the first string, |
29 | | /// in a wasm import. |
30 | | /// * The second level of the map is the "name" field, or the second string, |
31 | | /// in a wasm import. |
32 | | /// * The value of the second level is what this name was originally found |
33 | | /// under in the original module. `None` means "same as the hash map key" |
34 | | /// while `Some` means "it's this new name". |
35 | | /// |
36 | | /// This map is built during `ModuleImportMap::new` and serves double-duty |
37 | | /// to actually track duplicate imports. |
38 | | renamed_to_original: HashMap<String, HashMap<String, Option<String>>>, |
39 | | } |
40 | | |
41 | | impl ModuleImportMap { |
42 | | /// Creates a new [`ModuleImportMap`] tracking duplicate imports, if any, |
43 | | /// from the `wasm` provided. |
44 | | /// |
45 | | /// Upon success a new wasm binary (possibly the same as the original) is |
46 | | /// returned which is the new source of truth for this module. If duplicate |
47 | | /// imports were found then a [`ModuleImportMap`] is returned, otherwise |
48 | | /// `None` is returned. |
49 | | /// |
50 | | /// # Errors |
51 | | /// |
52 | | /// Returns an error if `wasm` could not be parsed as a wasm module. |
53 | 3.74k | pub fn new<'a>(wasm: Cow<'a, [u8]>) -> Result<(Cow<'a, [u8]>, Option<ModuleImportMap>)> { |
54 | 3.74k | let mut module = wasm_encoder::Module::new(); |
55 | 3.74k | let mut ret = ModuleImportMap::default(); |
56 | 3.74k | let mut found_duplicate_imports = false; |
57 | | |
58 | 37.3k | for payload in Parser::new(0).parse_all(&wasm) { |
59 | 37.3k | let payload = payload?; |
60 | 3.74k | match &payload { |
61 | 3.74k | Version { encoding, .. } if *encoding == wasmparser::Encoding::Component => { |
62 | | // if this is a component let someone else deal with the |
63 | | // error, we'll punt that up the stack. |
64 | 0 | assert!(!found_duplicate_imports); |
65 | 0 | break; |
66 | | } |
67 | | |
68 | | // This is the section we're interested in. Go over each import |
69 | | // and delegate to `ModuleImportMap::push_import` for figuring |
70 | | // out what to do with this import. At the end put the new |
71 | | // import section in the `module` we're building. |
72 | 1.33k | ImportSection(i) => { |
73 | 1.33k | let mut new_import_section = ImportSection::new(); |
74 | 10.8k | for import in i.clone().into_imports() { |
75 | 10.8k | found_duplicate_imports = ret |
76 | 10.8k | .push_import(&mut new_import_section, import?)? |
77 | 10.8k | || found_duplicate_imports; |
78 | | } |
79 | 1.33k | module.section(&new_import_section); |
80 | | } |
81 | | |
82 | | // All other sections get plumbed through as-is. This ensures we |
83 | | // don't tamper with binary offsets anywhere in the module |
84 | | // except the import section, for example. |
85 | | _ => { |
86 | 36.0k | if let Some((id, range)) = payload.as_section() { |
87 | 18.7k | module.section(&RawSection { |
88 | 18.7k | id, |
89 | 18.7k | data: &wasm[range], |
90 | 18.7k | }); |
91 | 18.7k | } |
92 | | } |
93 | | } |
94 | | } |
95 | | |
96 | 3.74k | if found_duplicate_imports { |
97 | 1 | Ok((module.finish().into(), Some(ret))) |
98 | | } else { |
99 | 3.74k | Ok((wasm, None)) |
100 | | } |
101 | 3.74k | } |
102 | | |
103 | | /// Returns `Ok(true)` if this is a duplicate import, or `Ok(false)` if it's |
104 | | /// a unique import for the first time. |
105 | 10.8k | fn push_import( |
106 | 10.8k | &mut self, |
107 | 10.8k | new_import_section: &mut ImportSection, |
108 | 10.8k | import: wasmparser::Import<'_>, |
109 | 10.8k | ) -> Result<bool> { |
110 | 10.8k | let module_map = self |
111 | 10.8k | .renamed_to_original |
112 | 10.8k | .entry(import.module.to_string()) |
113 | 10.8k | .or_insert(HashMap::new()); |
114 | | |
115 | | // If this import hasn't yet been seen, then great! Record that it is |
116 | | // using its original name and encode it with its original name. |
117 | 10.8k | if !module_map.contains_key(import.name) { |
118 | 10.8k | let prev = module_map.insert(import.name.to_string(), None); |
119 | 10.8k | assert!(prev.is_none()); |
120 | 10.8k | RoundtripReencoder.parse_import(new_import_section, import)?; |
121 | 10.8k | return Ok(false); |
122 | 9 | } |
123 | | |
124 | | // FIXME: this is technically O(n^2), but it's also only applicable when |
125 | | // a module has lots of imports, and surely that won't happen often... |
126 | | // right? ... right? |
127 | | // |
128 | | // If this isn't fixed by the time someone reads this and is angry about |
129 | | // O(n^2), sorry. |
130 | | // |
131 | | // Otherwise the purpose of this loop is to determine a unique name for |
132 | | // the new import, something that hasn't previously been seen before. |
133 | 9 | let mut new_name = import.name.to_string(); |
134 | 9 | for i in 2.. { |
135 | 9 | new_name.truncate(import.name.len()); |
136 | 9 | write!(new_name, " [v{i}]").unwrap(); |
137 | 9 | if !module_map.contains_key(&new_name) { |
138 | 9 | break; |
139 | 0 | } |
140 | | } |
141 | | |
142 | | // Now that `new_name` is unique, record the import in the new import |
143 | | // section. |
144 | 9 | let mut new_import = import; |
145 | 9 | new_import.name = &new_name; |
146 | 9 | RoundtripReencoder.parse_import(new_import_section, new_import)?; |
147 | | |
148 | | // Also record that `new_name` was originally known as `import.name` |
149 | | // for later lookup in `original_names` below. |
150 | 9 | let prev = module_map.insert(new_name, Some(import.name.to_string())); |
151 | 9 | assert!(prev.is_none()); |
152 | 9 | Ok(true) |
153 | 10.8k | } |
154 | | |
155 | | /// Returns the original `name` that `import` should use, if any. |
156 | | /// |
157 | | /// If `None` is returned then `import.name` should be used. |
158 | 97 | pub fn original_name(&self, import: &wasmparser::Import<'_>) -> Option<&str> { |
159 | 97 | self.renamed_to_original |
160 | 97 | .get(import.module)? |
161 | 97 | .get(import.name)? |
162 | 97 | .as_deref() |
163 | 97 | } |
164 | | } |