/rust/registry/src/index.crates.io-6f17d22bba15001f/wasmparser-0.121.2/src/validator/core/canonical.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! Canonicalization of types. |
2 | | //! |
3 | | //! The unit of canonicalization is a recursion group. Having "unnecessary" |
4 | | //! types in a recursion group can "break" canonicalization of other types |
5 | | //! within that same recursion group, as can reordering types within a recursion |
6 | | //! group. |
7 | | //! |
8 | | //! It is an invariant that all types defined before the recursion group we are |
9 | | //! currently canonicalizing have already been canonicalized themselves. |
10 | | //! |
11 | | //! Canonicalizing a recursion group then proceeds as follows: |
12 | | //! |
13 | | //! * First we walk each of its `SubType` elements and put their type references |
14 | | //! (i.e. their `PackedIndex`es) into canonical form. Canonicalizing a |
15 | | //! `PackedIndex` means switching it from indexing into the Wasm module's |
16 | | //! types space into either |
17 | | //! |
18 | | //! 1. Referencing an already-canonicalized type, for types outside of this |
19 | | //! recursion group. Because inter-group type references can only go |
20 | | //! towards types defined before this recursion group, we know the type is |
21 | | //! already canonicalized and we have a `CoreTypeId` for each of those |
22 | | //! types. This updates the `PackedIndex` into a `CoreTypeId`. |
23 | | //! |
24 | | //! 2. Indexing into the current recursion group, for intra-group type |
25 | | //! references. |
26 | | //! |
27 | | //! Note that (2) has the effect of making the "same" structure of mutual type |
28 | | //! recursion look identical across recursion groups: |
29 | | //! |
30 | | //! ```wat |
31 | | //! ;; Before |
32 | | //! (rec (struct (field (module-type 1))) (struct (field (module-type 0)))) |
33 | | //! (rec (struct (field (module-type 3))) (struct (field (module-type 2)))) |
34 | | //! |
35 | | //! ;; After |
36 | | //! (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0)))) |
37 | | //! (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0)))) |
38 | | //! ``` |
39 | | //! |
40 | | //! * Now that the recursion group's elements are in canonical form, we can |
41 | | //! "simply" hash cons whole rec groups at a time. The `TypesList` morally |
42 | | //! maintains a hash map from `Vec<SubType>` to `RecGroupId` and we can do |
43 | | //! get-or-create operations on it. I say "morally" because we don't actually |
44 | | //! duplicate the `Vec<SubType>` key in that hash map since those elements are |
45 | | //! already stored in the `TypeList`'s internal `SnapshotList<CoreType>`. This |
46 | | //! means we need to do some low-level hash table fiddling with the |
47 | | //! `hashbrown` crate. |
48 | | //! |
49 | | //! And that's it! That is the whole canonicalization algorithm. |
50 | | //! |
51 | | //! Some more random things to note: |
52 | | //! |
53 | | //! * Because we essentially already have to do the check to canonicalize, and |
54 | | //! to avoid additional passes over the types, the canonicalization pass also |
55 | | //! checks that type references are in bounds. These are the only errors that |
56 | | //! can be returned from canonicalization. |
57 | | //! |
58 | | //! * Canonicalizing requires the `Module` to translate type indices to |
59 | | //! actual `CoreTypeId`s. |
60 | | //! |
61 | | //! * It is important that *after* we have canonicalized all types, we don't |
62 | | //! need the `Module` anymore. This makes sure that we can, for example, |
63 | | //! intern all types from the same store into the same `TypeList`. Which in |
64 | | //! turn lets us type check function imports of a same-store instance's |
65 | | //! exported functions and we don't need to translate from one module's |
66 | | //! canonical representation to another module's canonical representation or |
67 | | //! perform additional expensive checks to see if the types match or not |
68 | | //! (since the whole point of canonicalization is to avoid that!). |
69 | | |
70 | | use super::{Module, RecGroupId, TypeAlloc}; |
71 | | use crate::{ |
72 | | types::{CoreTypeId, TypeIdentifier}, |
73 | | PackedIndex, RecGroup, Result, UnpackedIndex, WasmFeatures, |
74 | | }; |
75 | | |
76 | | /// Canonicalize the rec group and return its id and whether it is a new group |
77 | | /// (we added its types to the `TypeAlloc`) or not (we deduplicated it with an |
78 | | /// existing canonical rec group). |
79 | 181k | pub(crate) fn canonicalize_and_intern_rec_group( |
80 | 181k | features: &WasmFeatures, |
81 | 181k | types: &mut TypeAlloc, |
82 | 181k | module: &Module, |
83 | 181k | mut rec_group: RecGroup, |
84 | 181k | offset: usize, |
85 | 181k | ) -> Result<(bool, RecGroupId)> { |
86 | 181k | TypeCanonicalizer::new(module, offset) |
87 | 181k | .with_features(features) |
88 | 181k | .canonicalize_rec_group(&mut rec_group)?; |
89 | 181k | Ok(types.intern_canonical_rec_group(rec_group)) |
90 | 181k | } |
91 | | |
92 | | /// The kind of canonicalization we are doing. |
93 | | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
94 | | enum CanonicalizationMode { |
95 | | /// Standard canonicalization: turns module indices into either (1) |
96 | | /// `CoreTypeId`s for inter-group references or (2) rec-group-local indices |
97 | | /// for intra-group references. |
98 | | HashConsing, |
99 | | |
100 | | /// Turns all type reference indices into `CoreTypeId`s, even from within |
101 | | /// the same rec group. Not useful for hash consing, but useful when |
102 | | /// exposing types to end users so they don't have to deal with |
103 | | /// rec-group-local indices. |
104 | | OnlyIds, |
105 | | } |
106 | | |
107 | | pub(crate) struct TypeCanonicalizer<'a> { |
108 | | module: &'a Module, |
109 | | features: Option<&'a WasmFeatures>, |
110 | | rec_group_start: u32, |
111 | | rec_group_len: u32, |
112 | | offset: usize, |
113 | | mode: CanonicalizationMode, |
114 | | within_rec_group: Option<std::ops::Range<CoreTypeId>>, |
115 | | } |
116 | | |
117 | | impl<'a> TypeCanonicalizer<'a> { |
118 | 181k | pub fn new(module: &'a Module, offset: usize) -> Self { |
119 | 181k | // These defaults will work for when we are canonicalizing types from |
120 | 181k | // outside of a rec group definition, forcing all `PackedIndex`es to be |
121 | 181k | // canonicalized to `CoreTypeId`s. |
122 | 181k | let rec_group_start = u32::MAX; |
123 | 181k | let rec_group_len = 0; |
124 | 181k | |
125 | 181k | Self { |
126 | 181k | module, |
127 | 181k | features: None, |
128 | 181k | rec_group_start, |
129 | 181k | rec_group_len, |
130 | 181k | offset, |
131 | 181k | mode: CanonicalizationMode::HashConsing, |
132 | 181k | within_rec_group: None, |
133 | 181k | } |
134 | 181k | } |
135 | | |
136 | 181k | pub fn with_features(&mut self, features: &'a WasmFeatures) -> &mut Self { |
137 | 181k | debug_assert!(self.features.is_none()); |
138 | 181k | self.features = Some(features); |
139 | 181k | self |
140 | 181k | } |
141 | | |
142 | 0 | fn allow_gc(&self) -> bool { |
143 | 0 | self.features.map_or(true, |f| f.gc) |
144 | 0 | } |
145 | | |
146 | 181k | fn canonicalize_rec_group(&mut self, rec_group: &mut RecGroup) -> Result<()> { |
147 | 181k | // Re-initialize these fields so that we properly canonicalize |
148 | 181k | // intra-rec-group type references into indices into the rec group |
149 | 181k | // rather than as `CoreTypeId`s. |
150 | 181k | self.rec_group_start = u32::try_from(self.module.types.len()).unwrap(); |
151 | 181k | self.rec_group_len = u32::try_from(rec_group.types().len()).unwrap(); |
152 | | |
153 | 181k | for (rec_group_local_index, ty) in rec_group.types_mut().enumerate() { |
154 | 181k | let rec_group_local_index = u32::try_from(rec_group_local_index).unwrap(); |
155 | 181k | let type_index = self.rec_group_start + rec_group_local_index; |
156 | | |
157 | 181k | if let Some(sup) = ty.supertype_idx.as_mut() { |
158 | 0 | if sup.as_module_index().map_or(false, |i| i >= type_index) { |
159 | 0 | bail!(self.offset, "supertypes must be defined before subtypes"); |
160 | 0 | } |
161 | 181k | } |
162 | | |
163 | 181k | ty.remap_indices(&mut |idx| self.canonicalize_type_index(idx))?; |
164 | | } |
165 | | |
166 | 181k | Ok(()) |
167 | 181k | } |
168 | | |
169 | 0 | fn canonicalize_type_index(&self, ty: &mut PackedIndex) -> Result<()> { |
170 | 0 | match ty.unpack() { |
171 | 0 | UnpackedIndex::Id(_) => return Ok(()), |
172 | 0 | UnpackedIndex::Module(index) => { |
173 | 0 | if index < self.rec_group_start || self.mode == CanonicalizationMode::OnlyIds { |
174 | 0 | let id = self.module.type_id_at(index, self.offset)?; |
175 | 0 | if let Some(id) = PackedIndex::from_id(id) { |
176 | 0 | *ty = id; |
177 | 0 | return Ok(()); |
178 | | } else { |
179 | 0 | bail!( |
180 | 0 | self.offset, |
181 | 0 | "implementation limit: too many types in `TypeList`" |
182 | 0 | ) |
183 | | } |
184 | 0 | } |
185 | 0 |
|
186 | 0 | // When GC is not enabled the `rec_group_len == 1` so any rec group |
187 | 0 | // local type references will be direct self references. But any kind of |
188 | 0 | // type recursion, including self references, is not allowed in the |
189 | 0 | // typed function references proposal, only the GC proposal. |
190 | 0 | debug_assert!(self.allow_gc() || self.rec_group_len == 1); |
191 | 0 | let local = index - self.rec_group_start; |
192 | 0 | if self.allow_gc() && local < self.rec_group_len { |
193 | 0 | if let Some(id) = PackedIndex::from_rec_group_index(local) { |
194 | 0 | *ty = id; |
195 | 0 | return Ok(()); |
196 | | } else { |
197 | 0 | bail!( |
198 | 0 | self.offset, |
199 | 0 | "implementation limit: too many types in a recursion group" |
200 | 0 | ) |
201 | | } |
202 | 0 | } |
203 | 0 |
|
204 | 0 | bail!( |
205 | 0 | self.offset, |
206 | 0 | "unknown type {index}: type index out of bounds" |
207 | 0 | ) |
208 | | } |
209 | 0 | UnpackedIndex::RecGroup(local_index) => match self.mode { |
210 | 0 | CanonicalizationMode::HashConsing => Ok(()), |
211 | | CanonicalizationMode::OnlyIds => { |
212 | 0 | let rec_group_elems = self.within_rec_group.as_ref().expect( |
213 | 0 | "configured to canonicalize all type reference indices to `CoreTypeId`s \ |
214 | 0 | and found rec-group-local index, but missing `within_rec_group` context", |
215 | 0 | ); |
216 | 0 |
|
217 | 0 | let rec_group_len = rec_group_elems.end.index() - rec_group_elems.start.index(); |
218 | 0 | let rec_group_len = u32::try_from(rec_group_len).unwrap(); |
219 | 0 | assert!(local_index < rec_group_len); |
220 | | |
221 | 0 | let rec_group_start = u32::try_from(rec_group_elems.start.index()).unwrap(); |
222 | 0 |
|
223 | 0 | let id = CoreTypeId::from_index(rec_group_start + local_index); |
224 | 0 | *ty = PackedIndex::from_id(id).expect( |
225 | 0 | "should fit in impl limits since we already have the end of the rec group \ |
226 | 0 | constructed successfully", |
227 | 0 | ); |
228 | 0 | Ok(()) |
229 | | } |
230 | | }, |
231 | | } |
232 | 0 | } |
233 | | } |