/src/wasmtime/cranelift/codegen/src/context.rs
Line | Count | Source |
1 | | //! Cranelift compilation context and main entry point. |
2 | | //! |
3 | | //! When compiling many small functions, it is important to avoid repeatedly allocating and |
4 | | //! deallocating the data structures needed for compilation. The `Context` struct is used to hold |
5 | | //! on to memory allocations between function compilations. |
6 | | //! |
7 | | //! The context does not hold a `TargetIsa` instance which has to be provided as an argument |
8 | | //! instead. This is because an ISA instance is immutable and can be used by multiple compilation |
9 | | //! contexts concurrently. Typically, you would have one context per compilation thread and only a |
10 | | //! single ISA instance. |
11 | | |
12 | | use crate::alias_analysis::AliasAnalysis; |
13 | | use crate::dominator_tree::DominatorTree; |
14 | | use crate::egraph::EgraphPass; |
15 | | use crate::flowgraph::ControlFlowGraph; |
16 | | use crate::inline::{Inline, do_inlining}; |
17 | | use crate::ir::Function; |
18 | | use crate::isa::TargetIsa; |
19 | | use crate::legalizer::simple_legalize; |
20 | | use crate::loop_analysis::LoopAnalysis; |
21 | | use crate::machinst::{CompiledCode, CompiledCodeStencil}; |
22 | | use crate::nan_canonicalization::do_nan_canonicalization; |
23 | | use crate::remove_constant_phis::do_remove_constant_phis; |
24 | | use crate::result::{CodegenResult, CompileResult}; |
25 | | use crate::settings::{FlagsOrIsa, OptLevel}; |
26 | | use crate::trace; |
27 | | use crate::unreachable_code::eliminate_unreachable_code; |
28 | | use crate::verifier::{VerifierErrors, VerifierResult, verify_context}; |
29 | | use crate::{CompileError, timing}; |
30 | | #[cfg(feature = "souper-harvest")] |
31 | | use alloc::string::String; |
32 | | use alloc::vec::Vec; |
33 | | use cranelift_control::ControlPlane; |
34 | | use target_lexicon::Architecture; |
35 | | |
36 | | #[cfg(feature = "souper-harvest")] |
37 | | use crate::souper_harvest::do_souper_harvest; |
38 | | |
39 | | /// Persistent data structures and compilation pipeline. |
40 | | pub struct Context { |
41 | | /// The function we're compiling. |
42 | | pub func: Function, |
43 | | |
44 | | /// The control flow graph of `func`. |
45 | | pub cfg: ControlFlowGraph, |
46 | | |
47 | | /// Dominator tree for `func`. |
48 | | pub domtree: DominatorTree, |
49 | | |
50 | | /// Loop analysis of `func`. |
51 | | pub loop_analysis: LoopAnalysis, |
52 | | |
53 | | /// Result of MachBackend compilation, if computed. |
54 | | pub(crate) compiled_code: Option<CompiledCode>, |
55 | | |
56 | | /// Flag: do we want a disassembly with the CompiledCode? |
57 | | pub want_disasm: bool, |
58 | | } |
59 | | |
60 | | impl Context { |
61 | | /// Allocate a new compilation context. |
62 | | /// |
63 | | /// The returned instance should be reused for compiling multiple functions in order to avoid |
64 | | /// needless allocator thrashing. |
65 | 633k | pub fn new() -> Self { |
66 | 633k | Self::for_function(Function::new()) |
67 | 633k | } |
68 | | |
69 | | /// Allocate a new compilation context with an existing Function. |
70 | | /// |
71 | | /// The returned instance should be reused for compiling multiple functions in order to avoid |
72 | | /// needless allocator thrashing. |
73 | 687k | pub fn for_function(func: Function) -> Self { |
74 | 687k | Self { |
75 | 687k | func, |
76 | 687k | cfg: ControlFlowGraph::new(), |
77 | 687k | domtree: DominatorTree::new(), |
78 | 687k | loop_analysis: LoopAnalysis::new(), |
79 | 687k | compiled_code: None, |
80 | 687k | want_disasm: false, |
81 | 687k | } |
82 | 687k | } |
83 | | |
84 | | /// Clear all data structures in this context. |
85 | 1.13M | pub fn clear(&mut self) { |
86 | 1.13M | self.func.clear(); |
87 | 1.13M | self.cfg.clear(); |
88 | 1.13M | self.domtree.clear(); |
89 | 1.13M | self.loop_analysis.clear(); |
90 | 1.13M | self.compiled_code = None; |
91 | 1.13M | self.want_disasm = false; |
92 | 1.13M | } |
93 | | |
94 | | /// Returns the compilation result for this function, available after any `compile` function |
95 | | /// has been called. |
96 | 37.7k | pub fn compiled_code(&self) -> Option<&CompiledCode> { |
97 | 37.7k | self.compiled_code.as_ref() |
98 | 37.7k | } |
99 | | |
100 | | /// Returns the compilation result for this function, available after any `compile` function |
101 | | /// has been called. |
102 | 1.65M | pub fn take_compiled_code(&mut self) -> Option<CompiledCode> { |
103 | 1.65M | self.compiled_code.take() |
104 | 1.65M | } |
105 | | |
106 | | /// Set the flag to request a disassembly when compiling with a |
107 | | /// `MachBackend` backend. |
108 | 0 | pub fn set_disasm(&mut self, val: bool) { |
109 | 0 | self.want_disasm = val; |
110 | 0 | } |
111 | | |
112 | | /// Compile the function, and emit machine code into a `Vec<u8>`. |
113 | | #[deprecated = "use Context::compile"] |
114 | 0 | pub fn compile_and_emit( |
115 | 0 | &mut self, |
116 | 0 | isa: &dyn TargetIsa, |
117 | 0 | mem: &mut Vec<u8>, |
118 | 0 | ctrl_plane: &mut ControlPlane, |
119 | 0 | ) -> CompileResult<'_, &CompiledCode> { |
120 | 0 | let compiled_code = self.compile(isa, ctrl_plane)?; |
121 | 0 | mem.extend_from_slice(compiled_code.code_buffer()); |
122 | 0 | Ok(compiled_code) |
123 | 0 | } |
124 | | |
125 | | /// Internally compiles the function into a stencil. |
126 | | /// |
127 | | /// Public only for testing and fuzzing purposes. |
128 | 1.71M | pub fn compile_stencil( |
129 | 1.71M | &mut self, |
130 | 1.71M | isa: &dyn TargetIsa, |
131 | 1.71M | ctrl_plane: &mut ControlPlane, |
132 | 1.71M | ) -> CodegenResult<CompiledCodeStencil> { |
133 | | let result; |
134 | 1.71M | trace!("****** START compiling {}", self.func.display_spec()); |
135 | | { |
136 | 1.71M | let _tt = timing::compile(); |
137 | | |
138 | 1.71M | self.verify_if(isa)?; |
139 | 1.71M | self.optimize(isa, ctrl_plane)?; |
140 | 1.71M | result = isa.compile_function(&self.func, &self.domtree, self.want_disasm, ctrl_plane); |
141 | | } |
142 | 1.71M | trace!("****** DONE compiling {}\n", self.func.display_spec()); |
143 | 1.71M | result |
144 | 1.71M | } |
145 | | |
146 | | /// Optimize the function, performing all compilation steps up to |
147 | | /// but not including machine-code lowering and register |
148 | | /// allocation. |
149 | | /// |
150 | | /// Public only for testing purposes. |
151 | 1.71M | pub fn optimize( |
152 | 1.71M | &mut self, |
153 | 1.71M | isa: &dyn TargetIsa, |
154 | 1.71M | ctrl_plane: &mut ControlPlane, |
155 | 1.71M | ) -> CodegenResult<()> { |
156 | 1.71M | log::debug!( |
157 | | "Number of CLIF instructions to optimize: {}", |
158 | 0 | self.func.dfg.num_insts() |
159 | | ); |
160 | 1.71M | log::debug!( |
161 | | "Number of CLIF blocks to optimize: {}", |
162 | 0 | self.func.dfg.num_blocks() |
163 | | ); |
164 | | |
165 | 1.71M | let opt_level = isa.flags().opt_level(); |
166 | 1.71M | crate::trace!( |
167 | | "Optimizing (opt level {:?}):\n{}", |
168 | | opt_level, |
169 | 0 | self.func.display() |
170 | | ); |
171 | | |
172 | 1.71M | if isa.flags().enable_nan_canonicalization() { |
173 | 1.29M | self.canonicalize_nans(isa)?; |
174 | 415k | } |
175 | | |
176 | 1.71M | self.legalize(isa)?; |
177 | | |
178 | 1.71M | self.compute_cfg(); |
179 | 1.71M | self.compute_domtree(); |
180 | 1.71M | self.eliminate_unreachable_code(isa)?; |
181 | 1.71M | self.remove_constant_phis(isa)?; |
182 | | |
183 | 1.71M | self.func.dfg.resolve_all_aliases(); |
184 | | |
185 | 1.71M | if opt_level != OptLevel::None { |
186 | 1.03M | self.egraph_pass(isa, ctrl_plane)?; |
187 | 677k | } |
188 | | |
189 | 1.71M | Ok(()) |
190 | 1.71M | } |
191 | | |
192 | | /// Perform function call inlining. |
193 | | /// |
194 | | /// Returns `true` if any function call was inlined, `false` otherwise. |
195 | 273k | pub fn inline(&mut self, inliner: impl Inline) -> CodegenResult<bool> { |
196 | 273k | do_inlining(&mut self.func, inliner) |
197 | 273k | } <cranelift_codegen::context::Context>::inline::<<wasmtime_internal_cranelift::compiler::Compiler as wasmtime_environ::compile::InliningCompiler>::inline::Inliner> Line | Count | Source | 195 | 273k | pub fn inline(&mut self, inliner: impl Inline) -> CodegenResult<bool> { | 196 | 273k | do_inlining(&mut self.func, inliner) | 197 | 273k | } |
Unexecuted instantiation: <cranelift_codegen::context::Context>::inline::<_> Unexecuted instantiation: <cranelift_codegen::context::Context>::inline::<cranelift_filetests::test_inline::Inliner> |
198 | | |
199 | | /// Compile the function, |
200 | | /// |
201 | | /// Run the function through all the passes necessary to generate |
202 | | /// code for the target ISA represented by `isa`. The generated |
203 | | /// machine code is not relocated. Instead, any relocations can be |
204 | | /// obtained from `compiled_code.buffer.relocs()`. |
205 | | /// |
206 | | /// Performs any optimizations that are enabled, unless |
207 | | /// `optimize()` was already invoked. |
208 | | /// |
209 | | /// Returns the generated machine code as well as information about |
210 | | /// the function's code and read-only data. |
211 | 1.70M | pub fn compile( |
212 | 1.70M | &mut self, |
213 | 1.70M | isa: &dyn TargetIsa, |
214 | 1.70M | ctrl_plane: &mut ControlPlane, |
215 | 1.70M | ) -> CompileResult<'_, &CompiledCode> { |
216 | 1.70M | let stencil = self |
217 | 1.70M | .compile_stencil(isa, ctrl_plane) |
218 | 1.70M | .map_err(|error| CompileError { |
219 | 0 | inner: error, |
220 | 0 | func: &self.func, |
221 | 0 | })?; |
222 | 1.70M | Ok(self |
223 | 1.70M | .compiled_code |
224 | 1.70M | .insert(stencil.apply_params(&self.func.params))) |
225 | 1.70M | } |
226 | | |
227 | | /// If available, return information about the code layout in the |
228 | | /// final machine code: the offsets (in bytes) of each basic-block |
229 | | /// start, and all basic-block edges. |
230 | | #[deprecated = "use CompiledCode::get_code_bb_layout"] |
231 | 0 | pub fn get_code_bb_layout(&self) -> Option<(Vec<usize>, Vec<(usize, usize)>)> { |
232 | 0 | self.compiled_code().map(CompiledCode::get_code_bb_layout) |
233 | 0 | } |
234 | | |
235 | | /// Creates unwind information for the function. |
236 | | /// |
237 | | /// Returns `None` if the function has no unwind information. |
238 | | #[cfg(feature = "unwind")] |
239 | | #[deprecated = "use CompiledCode::create_unwind_info"] |
240 | 0 | pub fn create_unwind_info( |
241 | 0 | &self, |
242 | 0 | isa: &dyn TargetIsa, |
243 | 0 | ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> { |
244 | 0 | self.compiled_code().unwrap().create_unwind_info(isa) |
245 | 0 | } |
246 | | |
247 | | /// Run the verifier on the function. |
248 | | /// |
249 | | /// Also check that the dominator tree and control flow graph are consistent with the function. |
250 | | /// |
251 | | /// TODO: rename to "CLIF validate" or similar. |
252 | 328k | pub fn verify<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> VerifierResult<()> { |
253 | 328k | let mut errors = VerifierErrors::default(); |
254 | 328k | let _ = verify_context(&self.func, &self.cfg, &self.domtree, fisa, &mut errors); |
255 | | |
256 | 328k | if errors.is_empty() { |
257 | 328k | Ok(()) |
258 | | } else { |
259 | 0 | Err(errors) |
260 | | } |
261 | 328k | } |
262 | | |
263 | | /// Run the verifier only if the `enable_verifier` setting is true. |
264 | 9.50M | pub fn verify_if<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> CodegenResult<()> { |
265 | 9.50M | let fisa = fisa.into(); |
266 | 9.50M | if fisa.flags.enable_verifier() { |
267 | 328k | self.verify(fisa)?; |
268 | 9.17M | } |
269 | 9.50M | Ok(()) |
270 | 9.50M | } <cranelift_codegen::context::Context>::verify_if::<cranelift_codegen::settings::FlagsOrIsa> Line | Count | Source | 264 | 1.03M | pub fn verify_if<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> CodegenResult<()> { | 265 | 1.03M | let fisa = fisa.into(); | 266 | 1.03M | if fisa.flags.enable_verifier() { | 267 | 43.7k | self.verify(fisa)?; | 268 | 994k | } | 269 | 1.03M | Ok(()) | 270 | 1.03M | } |
<cranelift_codegen::context::Context>::verify_if::<&dyn cranelift_codegen::isa::TargetIsa> Line | Count | Source | 264 | 8.46M | pub fn verify_if<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> CodegenResult<()> { | 265 | 8.46M | let fisa = fisa.into(); | 266 | 8.46M | if fisa.flags.enable_verifier() { | 267 | 284k | self.verify(fisa)?; | 268 | 8.18M | } | 269 | 8.46M | Ok(()) | 270 | 8.46M | } |
|
271 | | |
272 | | /// Perform constant-phi removal on the function. |
273 | 1.71M | pub fn remove_constant_phis<'a, FOI: Into<FlagsOrIsa<'a>>>( |
274 | 1.71M | &mut self, |
275 | 1.71M | fisa: FOI, |
276 | 1.71M | ) -> CodegenResult<()> { |
277 | 1.71M | do_remove_constant_phis(&mut self.func, &mut self.domtree); |
278 | 1.71M | self.verify_if(fisa)?; |
279 | 1.71M | Ok(()) |
280 | 1.71M | } |
281 | | |
282 | | /// Perform NaN canonicalizing rewrites on the function. |
283 | 1.33M | pub fn canonicalize_nans(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { |
284 | | // Currently only RiscV64 is the only arch that may not have vector support. |
285 | 1.33M | let has_vector_support = match isa.triple().architecture { |
286 | 51.2k | Architecture::Riscv64(_) => match isa.isa_flags().iter().find(|f| f.name == "has_v") { |
287 | 6.40k | Some(value) => value.as_bool().unwrap_or(false), |
288 | 0 | None => false, |
289 | | }, |
290 | 1.32M | _ => true, |
291 | | }; |
292 | 1.33M | do_nan_canonicalization(&mut self.func, has_vector_support); |
293 | 1.33M | self.verify_if(isa) |
294 | 1.33M | } |
295 | | |
296 | | /// Run the legalizer for `isa` on the function. |
297 | 1.98M | pub fn legalize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { |
298 | | // Legalization invalidates the domtree and loop_analysis by mutating the CFG. |
299 | | // TODO: Avoid doing this when legalization doesn't actually mutate the CFG. |
300 | 1.98M | self.domtree.clear(); |
301 | 1.98M | self.loop_analysis.clear(); |
302 | 1.98M | self.cfg.clear(); |
303 | | |
304 | | // Run some specific legalizations only. |
305 | 1.98M | simple_legalize(&mut self.func, isa); |
306 | 1.98M | self.verify_if(isa) |
307 | 1.98M | } |
308 | | |
309 | | /// Compute the control flow graph. |
310 | 1.71M | pub fn compute_cfg(&mut self) { |
311 | 1.71M | self.cfg.compute(&self.func) |
312 | 1.71M | } |
313 | | |
314 | | /// Compute dominator tree. |
315 | 1.71M | pub fn compute_domtree(&mut self) { |
316 | 1.71M | self.domtree.compute(&self.func, &self.cfg); |
317 | 1.71M | } |
318 | | |
319 | | /// Compute the loop analysis. |
320 | 1.03M | pub fn compute_loop_analysis(&mut self) { |
321 | 1.03M | self.loop_analysis |
322 | 1.03M | .compute(&self.func, &self.cfg, &self.domtree) |
323 | 1.03M | } |
324 | | |
325 | | /// Compute the control flow graph and dominator tree. |
326 | 0 | pub fn flowgraph(&mut self) { |
327 | 0 | self.compute_cfg(); |
328 | 0 | self.compute_domtree() |
329 | 0 | } |
330 | | |
331 | | /// Perform unreachable code elimination. |
332 | 1.71M | pub fn eliminate_unreachable_code<'a, FOI>(&mut self, fisa: FOI) -> CodegenResult<()> |
333 | 1.71M | where |
334 | 1.71M | FOI: Into<FlagsOrIsa<'a>>, |
335 | | { |
336 | 1.71M | eliminate_unreachable_code(&mut self.func, &mut self.cfg, &self.domtree); |
337 | 1.71M | self.verify_if(fisa) |
338 | 1.71M | } |
339 | | |
340 | | /// Replace all redundant loads with the known values in |
341 | | /// memory. These are loads whose values were already loaded by |
342 | | /// other loads earlier, as well as loads whose values were stored |
343 | | /// by a store instruction to the same instruction (so-called |
344 | | /// "store-to-load forwarding"). |
345 | 0 | pub fn replace_redundant_loads(&mut self) -> CodegenResult<()> { |
346 | 0 | let mut analysis = AliasAnalysis::new(&self.func, &self.domtree); |
347 | 0 | analysis.compute_and_update_aliases(&mut self.func); |
348 | 0 | Ok(()) |
349 | 0 | } |
350 | | |
351 | | /// Harvest candidate left-hand sides for superoptimization with Souper. |
352 | | #[cfg(feature = "souper-harvest")] |
353 | | pub fn souper_harvest( |
354 | | &mut self, |
355 | | out: &mut std::sync::mpsc::Sender<String>, |
356 | | ) -> CodegenResult<()> { |
357 | | do_souper_harvest(&self.func, out); |
358 | | Ok(()) |
359 | | } |
360 | | |
361 | | /// Run optimizations via the egraph infrastructure. |
362 | 1.03M | pub fn egraph_pass<'a, FOI>( |
363 | 1.03M | &mut self, |
364 | 1.03M | fisa: FOI, |
365 | 1.03M | ctrl_plane: &mut ControlPlane, |
366 | 1.03M | ) -> CodegenResult<()> |
367 | 1.03M | where |
368 | 1.03M | FOI: Into<FlagsOrIsa<'a>>, |
369 | | { |
370 | 1.03M | let _tt = timing::egraph(); |
371 | | |
372 | 1.03M | trace!( |
373 | | "About to optimize with egraph phase:\n{}", |
374 | 0 | self.func.display() |
375 | | ); |
376 | 1.03M | let fisa = fisa.into(); |
377 | 1.03M | self.compute_loop_analysis(); |
378 | 1.03M | let mut alias_analysis = AliasAnalysis::new(&self.func, &self.domtree); |
379 | 1.03M | let mut pass = EgraphPass::new( |
380 | 1.03M | &mut self.func, |
381 | 1.03M | &self.domtree, |
382 | 1.03M | &self.loop_analysis, |
383 | 1.03M | &mut alias_analysis, |
384 | 1.03M | ctrl_plane, |
385 | | ); |
386 | 1.03M | pass.run(); |
387 | 1.03M | log::debug!("egraph stats: {:?}", pass.stats); |
388 | 1.03M | trace!("After egraph optimization:\n{}", self.func.display()); |
389 | | |
390 | 1.03M | self.verify_if(fisa) |
391 | 1.03M | } |
392 | | } |