Coverage Report

Created: 2024-10-16 07:58

/rust/registry/src/index.crates.io-6f17d22bba15001f/wasmparser-0.121.2/src/validator/core/canonical.rs
Line
Count
Source (jump to first uncovered line)
1
//! Canonicalization of types.
2
//!
3
//! The unit of canonicalization is a recursion group. Having "unnecessary"
4
//! types in a recursion group can "break" canonicalization of other types
5
//! within that same recursion group, as can reordering types within a recursion
6
//! group.
7
//!
8
//! It is an invariant that all types defined before the recursion group we are
9
//! currently canonicalizing have already been canonicalized themselves.
10
//!
11
//! Canonicalizing a recursion group then proceeds as follows:
12
//!
13
//! * First we walk each of its `SubType` elements and put their type references
14
//!   (i.e. their `PackedIndex`es) into canonical form. Canonicalizing a
15
//!   `PackedIndex` means switching it from indexing into the Wasm module's
16
//!   types space into either
17
//!
18
//!   1. Referencing an already-canonicalized type, for types outside of this
19
//!      recursion group. Because inter-group type references can only go
20
//!      towards types defined before this recursion group, we know the type is
21
//!      already canonicalized and we have a `CoreTypeId` for each of those
22
//!      types. This updates the `PackedIndex` into a `CoreTypeId`.
23
//!
24
//!   2. Indexing into the current recursion group, for intra-group type
25
//!      references.
26
//!
27
//!   Note that (2) has the effect of making the "same" structure of mutual type
28
//!   recursion look identical across recursion groups:
29
//!
30
//!   ```wat
31
//!   ;; Before
32
//!   (rec (struct (field (module-type 1))) (struct (field (module-type 0))))
33
//!   (rec (struct (field (module-type 3))) (struct (field (module-type 2))))
34
//!
35
//!   ;; After
36
//!   (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0))))
37
//!   (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0))))
38
//!   ```
39
//!
40
//! * Now that the recursion group's elements are in canonical form, we can
41
//!   "simply" hash cons whole rec groups at a time. The `TypesList` morally
42
//!   maintains a hash map from `Vec<SubType>` to `RecGroupId` and we can do
43
//!   get-or-create operations on it. I say "morally" because we don't actually
44
//!   duplicate the `Vec<SubType>` key in that hash map since those elements are
45
//!   already stored in the `TypeList`'s internal `SnapshotList<CoreType>`. This
46
//!   means we need to do some low-level hash table fiddling with the
47
//!   `hashbrown` crate.
48
//!
49
//! And that's it! That is the whole canonicalization algorithm.
50
//!
51
//! Some more random things to note:
52
//!
53
//! * Because we essentially already have to do the check to canonicalize, and
54
//!   to avoid additional passes over the types, the canonicalization pass also
55
//!   checks that type references are in bounds. These are the only errors that
56
//!   can be returned from canonicalization.
57
//!
58
//! * Canonicalizing requires the `Module` to translate type indices to
59
//!   actual `CoreTypeId`s.
60
//!
61
//! * It is important that *after* we have canonicalized all types, we don't
62
//!   need the `Module` anymore. This makes sure that we can, for example,
63
//!   intern all types from the same store into the same `TypeList`. Which in
64
//!   turn lets us type check function imports of a same-store instance's
65
//!   exported functions and we don't need to translate from one module's
66
//!   canonical representation to another module's canonical representation or
67
//!   perform additional expensive checks to see if the types match or not
68
//!   (since the whole point of canonicalization is to avoid that!).
69
70
use super::{Module, RecGroupId, TypeAlloc};
71
use crate::{
72
    types::{CoreTypeId, TypeIdentifier},
73
    PackedIndex, RecGroup, Result, UnpackedIndex, WasmFeatures,
74
};
75
76
/// Canonicalize the rec group and return its id and whether it is a new group
77
/// (we added its types to the `TypeAlloc`) or not (we deduplicated it with an
78
/// existing canonical rec group).
79
181k
pub(crate) fn canonicalize_and_intern_rec_group(
80
181k
    features: &WasmFeatures,
81
181k
    types: &mut TypeAlloc,
82
181k
    module: &Module,
83
181k
    mut rec_group: RecGroup,
84
181k
    offset: usize,
85
181k
) -> Result<(bool, RecGroupId)> {
86
181k
    TypeCanonicalizer::new(module, offset)
87
181k
        .with_features(features)
88
181k
        .canonicalize_rec_group(&mut rec_group)?;
89
181k
    Ok(types.intern_canonical_rec_group(rec_group))
90
181k
}
91
92
/// The kind of canonicalization we are doing.
93
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
94
enum CanonicalizationMode {
95
    /// Standard canonicalization: turns module indices into either (1)
96
    /// `CoreTypeId`s for inter-group references or (2) rec-group-local indices
97
    /// for intra-group references.
98
    HashConsing,
99
100
    /// Turns all type reference indices into `CoreTypeId`s, even from within
101
    /// the same rec group. Not useful for hash consing, but useful when
102
    /// exposing types to end users so they don't have to deal with
103
    /// rec-group-local indices.
104
    OnlyIds,
105
}
106
107
pub(crate) struct TypeCanonicalizer<'a> {
108
    module: &'a Module,
109
    features: Option<&'a WasmFeatures>,
110
    rec_group_start: u32,
111
    rec_group_len: u32,
112
    offset: usize,
113
    mode: CanonicalizationMode,
114
    within_rec_group: Option<std::ops::Range<CoreTypeId>>,
115
}
116
117
impl<'a> TypeCanonicalizer<'a> {
118
181k
    pub fn new(module: &'a Module, offset: usize) -> Self {
119
181k
        // These defaults will work for when we are canonicalizing types from
120
181k
        // outside of a rec group definition, forcing all `PackedIndex`es to be
121
181k
        // canonicalized to `CoreTypeId`s.
122
181k
        let rec_group_start = u32::MAX;
123
181k
        let rec_group_len = 0;
124
181k
125
181k
        Self {
126
181k
            module,
127
181k
            features: None,
128
181k
            rec_group_start,
129
181k
            rec_group_len,
130
181k
            offset,
131
181k
            mode: CanonicalizationMode::HashConsing,
132
181k
            within_rec_group: None,
133
181k
        }
134
181k
    }
135
136
181k
    pub fn with_features(&mut self, features: &'a WasmFeatures) -> &mut Self {
137
181k
        debug_assert!(self.features.is_none());
138
181k
        self.features = Some(features);
139
181k
        self
140
181k
    }
141
142
0
    fn allow_gc(&self) -> bool {
143
0
        self.features.map_or(true, |f| f.gc)
144
0
    }
145
146
181k
    fn canonicalize_rec_group(&mut self, rec_group: &mut RecGroup) -> Result<()> {
147
181k
        // Re-initialize these fields so that we properly canonicalize
148
181k
        // intra-rec-group type references into indices into the rec group
149
181k
        // rather than as `CoreTypeId`s.
150
181k
        self.rec_group_start = u32::try_from(self.module.types.len()).unwrap();
151
181k
        self.rec_group_len = u32::try_from(rec_group.types().len()).unwrap();
152
153
181k
        for (rec_group_local_index, ty) in rec_group.types_mut().enumerate() {
154
181k
            let rec_group_local_index = u32::try_from(rec_group_local_index).unwrap();
155
181k
            let type_index = self.rec_group_start + rec_group_local_index;
156
157
181k
            if let Some(sup) = ty.supertype_idx.as_mut() {
158
0
                if sup.as_module_index().map_or(false, |i| i >= type_index) {
159
0
                    bail!(self.offset, "supertypes must be defined before subtypes");
160
0
                }
161
181k
            }
162
163
181k
            ty.remap_indices(&mut |idx| self.canonicalize_type_index(idx))?;
164
        }
165
166
181k
        Ok(())
167
181k
    }
168
169
0
    fn canonicalize_type_index(&self, ty: &mut PackedIndex) -> Result<()> {
170
0
        match ty.unpack() {
171
0
            UnpackedIndex::Id(_) => return Ok(()),
172
0
            UnpackedIndex::Module(index) => {
173
0
                if index < self.rec_group_start || self.mode == CanonicalizationMode::OnlyIds {
174
0
                    let id = self.module.type_id_at(index, self.offset)?;
175
0
                    if let Some(id) = PackedIndex::from_id(id) {
176
0
                        *ty = id;
177
0
                        return Ok(());
178
                    } else {
179
0
                        bail!(
180
0
                            self.offset,
181
0
                            "implementation limit: too many types in `TypeList`"
182
0
                        )
183
                    }
184
0
                }
185
0
186
0
                // When GC is not enabled the `rec_group_len == 1` so any rec group
187
0
                // local type references will be direct self references. But any kind of
188
0
                // type recursion, including self references, is not allowed in the
189
0
                // typed function references proposal, only the GC proposal.
190
0
                debug_assert!(self.allow_gc() || self.rec_group_len == 1);
191
0
                let local = index - self.rec_group_start;
192
0
                if self.allow_gc() && local < self.rec_group_len {
193
0
                    if let Some(id) = PackedIndex::from_rec_group_index(local) {
194
0
                        *ty = id;
195
0
                        return Ok(());
196
                    } else {
197
0
                        bail!(
198
0
                            self.offset,
199
0
                            "implementation limit: too many types in a recursion group"
200
0
                        )
201
                    }
202
0
                }
203
0
204
0
                bail!(
205
0
                    self.offset,
206
0
                    "unknown type {index}: type index out of bounds"
207
0
                )
208
            }
209
0
            UnpackedIndex::RecGroup(local_index) => match self.mode {
210
0
                CanonicalizationMode::HashConsing => Ok(()),
211
                CanonicalizationMode::OnlyIds => {
212
0
                    let rec_group_elems = self.within_rec_group.as_ref().expect(
213
0
                        "configured to canonicalize all type reference indices to `CoreTypeId`s \
214
0
                         and found rec-group-local index, but missing `within_rec_group` context",
215
0
                    );
216
0
217
0
                    let rec_group_len = rec_group_elems.end.index() - rec_group_elems.start.index();
218
0
                    let rec_group_len = u32::try_from(rec_group_len).unwrap();
219
0
                    assert!(local_index < rec_group_len);
220
221
0
                    let rec_group_start = u32::try_from(rec_group_elems.start.index()).unwrap();
222
0
223
0
                    let id = CoreTypeId::from_index(rec_group_start + local_index);
224
0
                    *ty = PackedIndex::from_id(id).expect(
225
0
                        "should fit in impl limits since we already have the end of the rec group \
226
0
                         constructed successfully",
227
0
                    );
228
0
                    Ok(())
229
                }
230
            },
231
        }
232
0
    }
233
}