/rust/registry/src/index.crates.io-6f17d22bba15001f/cranelift-codegen-0.91.1/src/isa/x64/abi.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! Implementation of the standard x64 ABI. |
2 | | |
3 | | use crate::ir::{self, types, LibCall, MemFlags, Opcode, Signature, TrapCode, Type}; |
4 | | use crate::ir::{types::*, ExternalName}; |
5 | | use crate::isa; |
6 | | use crate::isa::{unwind::UnwindInst, x64::inst::*, x64::settings as x64_settings, CallConv}; |
7 | | use crate::machinst::abi::*; |
8 | | use crate::machinst::*; |
9 | | use crate::settings; |
10 | | use crate::{CodegenError, CodegenResult}; |
11 | | use alloc::boxed::Box; |
12 | | use alloc::vec::Vec; |
13 | | use args::*; |
14 | | use regalloc2::{PRegSet, VReg}; |
15 | | use smallvec::{smallvec, SmallVec}; |
16 | | use std::convert::TryFrom; |
17 | | |
18 | | /// This is the limit for the size of argument and return-value areas on the |
19 | | /// stack. We place a reasonable limit here to avoid integer overflow issues |
20 | | /// with 32-bit arithmetic: for now, 128 MB. |
21 | | static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; |
22 | | |
23 | | /// Support for the x64 ABI from the callee side (within a function body). |
24 | | pub(crate) type X64Callee = Callee<X64ABIMachineSpec>; |
25 | | |
26 | | /// Support for the x64 ABI from the caller side (at a callsite). |
27 | | pub(crate) type X64Caller = Caller<X64ABIMachineSpec>; |
28 | | |
29 | | /// Implementation of ABI primitives for x64. |
30 | | pub struct X64ABIMachineSpec; |
31 | | |
32 | | impl X64ABIMachineSpec { |
33 | 0 | fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) { |
34 | 0 | insts.reserve(probe_count as usize); |
35 | 0 | for i in 0..probe_count { |
36 | 0 | let offset = (guard_size * (i + 1)) as i64; |
37 | 0 |
|
38 | 0 | // TODO: It would be nice if we could store the imm 0, but we don't have insts for those |
39 | 0 | // so store the stack pointer. Any register will do, since the stack is undefined at this point |
40 | 0 | insts.push(Self::gen_store_stack( |
41 | 0 | StackAMode::SPOffset(-offset, I8), |
42 | 0 | regs::rsp(), |
43 | 0 | I32, |
44 | 0 | )); |
45 | 0 | } |
46 | 0 | } |
47 | 0 | fn gen_probestack_loop(insts: &mut SmallInstVec<Inst>, frame_size: u32, guard_size: u32) { |
48 | 0 | // We have to use a caller saved register since clobbering only happens |
49 | 0 | // after stack probing. |
50 | 0 | // |
51 | 0 | // R11 is caller saved on both Fastcall and SystemV, and not used for argument |
52 | 0 | // passing, so it's pretty much free. It is also not used by the stacklimit mechanism. |
53 | 0 | let tmp = regs::r11(); |
54 | 0 | debug_assert!({ |
55 | 0 | let real_reg = tmp.to_real_reg().unwrap(); |
56 | 0 | !is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false) |
57 | | }); |
58 | | |
59 | 0 | insts.push(Inst::StackProbeLoop { |
60 | 0 | tmp: Writable::from_reg(tmp), |
61 | 0 | frame_size, |
62 | 0 | guard_size, |
63 | 0 | }); |
64 | 0 | } |
65 | | } |
66 | | |
67 | | impl IsaFlags for x64_settings::Flags {} |
68 | | |
69 | | impl ABIMachineSpec for X64ABIMachineSpec { |
70 | | type I = Inst; |
71 | | |
72 | | type F = x64_settings::Flags; |
73 | | |
74 | 463k | fn word_bits() -> u32 { |
75 | 463k | 64 |
76 | 463k | } |
77 | | |
78 | | /// Return required stack alignment in bytes. |
79 | 139k | fn stack_align(_call_conv: isa::CallConv) -> u32 { |
80 | 139k | 16 |
81 | 139k | } |
82 | | |
83 | 398k | fn compute_arg_locs<'a, I>( |
84 | 398k | call_conv: isa::CallConv, |
85 | 398k | flags: &settings::Flags, |
86 | 398k | params: I, |
87 | 398k | args_or_rets: ArgsOrRets, |
88 | 398k | add_ret_area_ptr: bool, |
89 | 398k | mut args: ArgsAccumulator<'_>, |
90 | 398k | ) -> CodegenResult<(i64, Option<usize>)> |
91 | 398k | where |
92 | 398k | I: IntoIterator<Item = &'a ir::AbiParam>, |
93 | 398k | { |
94 | 398k | let is_fastcall = call_conv.extends_windows_fastcall(); |
95 | 398k | |
96 | 398k | let mut next_gpr = 0; |
97 | 398k | let mut next_vreg = 0; |
98 | 398k | let mut next_stack: u64 = 0; |
99 | 398k | let mut next_param_idx = 0; // Fastcall cares about overall param index |
100 | 398k | |
101 | 398k | if args_or_rets == ArgsOrRets::Args && is_fastcall { |
102 | 0 | // Fastcall always reserves 32 bytes of shadow space corresponding to |
103 | 0 | // the four initial in-arg parameters. |
104 | 0 | // |
105 | 0 | // (See: |
106 | 0 | // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160) |
107 | 0 | next_stack = 32; |
108 | 398k | } |
109 | | |
110 | 1.11M | for param in params { |
111 | 711k | if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { |
112 | 0 | let offset = next_stack as i64; |
113 | 0 | let size = size as u64; |
114 | 0 | assert!(size % 8 == 0, "StructArgument size is not properly aligned"); |
115 | 0 | next_stack += size; |
116 | 0 | args.push(ABIArg::StructArg { |
117 | 0 | pointer: None, |
118 | 0 | offset, |
119 | 0 | size, |
120 | 0 | purpose: param.purpose, |
121 | 0 | }); |
122 | 0 | continue; |
123 | 711k | } |
124 | | |
125 | | // Find regclass(es) of the register(s) used to store a value of this type. |
126 | 711k | let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?; |
127 | | |
128 | | // Now assign ABIArgSlots for each register-sized part. |
129 | | // |
130 | | // Note that the handling of `i128` values is unique here: |
131 | | // |
132 | | // - If `enable_llvm_abi_extensions` is set in the flags, each |
133 | | // `i128` is split into two `i64`s and assigned exactly as if it |
134 | | // were two consecutive 64-bit args. This is consistent with LLVM's |
135 | | // behavior, and is needed for some uses of Cranelift (e.g., the |
136 | | // rustc backend). |
137 | | // |
138 | | // - Otherwise, both SysV and Fastcall specify behavior (use of |
139 | | // vector register, a register pair, or passing by reference |
140 | | // depending on the case), but for simplicity, we will just panic if |
141 | | // an i128 type appears in a signature and the LLVM extensions flag |
142 | | // is not set. |
143 | | // |
144 | | // For examples of how rustc compiles i128 args and return values on |
145 | | // both SysV and Fastcall platforms, see: |
146 | | // https://godbolt.org/z/PhG3ob |
147 | | |
148 | 711k | if param.value_type.bits() > 64 |
149 | 0 | && !param.value_type.is_vector() |
150 | 0 | && !flags.enable_llvm_abi_extensions() |
151 | | { |
152 | 0 | panic!( |
153 | 0 | "i128 args/return values not supported unless LLVM ABI extensions are enabled" |
154 | 0 | ); |
155 | 711k | } |
156 | 711k | |
157 | 711k | let mut slots = ABIArgSlotVec::new(); |
158 | 711k | for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) { |
159 | 711k | let intreg = *rc == RegClass::Int; |
160 | 711k | let nextreg = if intreg { |
161 | 503k | match args_or_rets { |
162 | | ArgsOrRets::Args => { |
163 | 474k | get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) |
164 | | } |
165 | | ArgsOrRets::Rets => { |
166 | 29.2k | get_intreg_for_retval(&call_conv, next_gpr, next_param_idx) |
167 | | } |
168 | | } |
169 | | } else { |
170 | 208k | match args_or_rets { |
171 | | ArgsOrRets::Args => { |
172 | 179k | get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx) |
173 | | } |
174 | | ArgsOrRets::Rets => { |
175 | 28.7k | get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx) |
176 | | } |
177 | | } |
178 | | }; |
179 | 711k | next_param_idx += 1; |
180 | 711k | if let Some(reg) = nextreg { |
181 | 634k | if intreg { |
182 | 461k | next_gpr += 1; |
183 | 461k | } else { |
184 | 173k | next_vreg += 1; |
185 | 173k | } |
186 | 634k | slots.push(ABIArgSlot::Reg { |
187 | 634k | reg: reg.to_real_reg().unwrap(), |
188 | 634k | ty: *reg_ty, |
189 | 634k | extension: param.extension, |
190 | 634k | }); |
191 | | } else { |
192 | | // Compute size. For the wasmtime ABI it differs from native |
193 | | // ABIs in how multiple values are returned, so we take a |
194 | | // leaf out of arm64's book by not rounding everything up to |
195 | | // 8 bytes. For all ABI arguments, and other ABI returns, |
196 | | // though, each slot takes a minimum of 8 bytes. |
197 | | // |
198 | | // Note that in all cases 16-byte stack alignment happens |
199 | | // separately after all args. |
200 | 77.1k | let size = (reg_ty.bits() / 8) as u64; |
201 | 77.1k | let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() { |
202 | 0 | size |
203 | | } else { |
204 | 77.1k | std::cmp::max(size, 8) |
205 | | }; |
206 | | // Align. |
207 | 77.1k | debug_assert!(size.is_power_of_two()); |
208 | 77.1k | next_stack = align_to(next_stack, size); |
209 | 77.1k | slots.push(ABIArgSlot::Stack { |
210 | 77.1k | offset: next_stack as i64, |
211 | 77.1k | ty: *reg_ty, |
212 | 77.1k | extension: param.extension, |
213 | 77.1k | }); |
214 | 77.1k | next_stack += size; |
215 | | } |
216 | | } |
217 | | |
218 | 711k | args.push(ABIArg::Slots { |
219 | 711k | slots, |
220 | 711k | purpose: param.purpose, |
221 | 711k | }); |
222 | | } |
223 | | |
224 | 398k | let extra_arg = if add_ret_area_ptr { |
225 | 0 | debug_assert!(args_or_rets == ArgsOrRets::Args); |
226 | 0 | if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) { |
227 | 0 | args.push(ABIArg::reg( |
228 | 0 | reg.to_real_reg().unwrap(), |
229 | 0 | types::I64, |
230 | 0 | ir::ArgumentExtension::None, |
231 | 0 | ir::ArgumentPurpose::Normal, |
232 | 0 | )); |
233 | 0 | } else { |
234 | 0 | args.push(ABIArg::stack( |
235 | 0 | next_stack as i64, |
236 | 0 | types::I64, |
237 | 0 | ir::ArgumentExtension::None, |
238 | 0 | ir::ArgumentPurpose::Normal, |
239 | 0 | )); |
240 | 0 | next_stack += 8; |
241 | 0 | } |
242 | 0 | Some(args.args().len() - 1) |
243 | | } else { |
244 | 398k | None |
245 | | }; |
246 | | |
247 | 398k | next_stack = align_to(next_stack, 16); |
248 | 398k | |
249 | 398k | // To avoid overflow issues, limit the arg/return size to something reasonable. |
250 | 398k | if next_stack > STACK_ARG_RET_SIZE_LIMIT { |
251 | 0 | return Err(CodegenError::ImplLimitExceeded); |
252 | 398k | } |
253 | 398k | |
254 | 398k | Ok((next_stack as i64, extra_arg)) |
255 | 398k | } <cranelift_codegen::isa::x64::abi::X64ABIMachineSpec as cranelift_codegen::machinst::abi::ABIMachineSpec>::compute_arg_locs::<core::iter::adapters::chain::Chain<core::option::IntoIter<&cranelift_codegen::ir::extfunc::AbiParam>, core::slice::iter::Iter<cranelift_codegen::ir::extfunc::AbiParam>>> Line | Count | Source | 83 | 199k | fn compute_arg_locs<'a, I>( | 84 | 199k | call_conv: isa::CallConv, | 85 | 199k | flags: &settings::Flags, | 86 | 199k | params: I, | 87 | 199k | args_or_rets: ArgsOrRets, | 88 | 199k | add_ret_area_ptr: bool, | 89 | 199k | mut args: ArgsAccumulator<'_>, | 90 | 199k | ) -> CodegenResult<(i64, Option<usize>)> | 91 | 199k | where | 92 | 199k | I: IntoIterator<Item = &'a ir::AbiParam>, | 93 | 199k | { | 94 | 199k | let is_fastcall = call_conv.extends_windows_fastcall(); | 95 | 199k | | 96 | 199k | let mut next_gpr = 0; | 97 | 199k | let mut next_vreg = 0; | 98 | 199k | let mut next_stack: u64 = 0; | 99 | 199k | let mut next_param_idx = 0; // Fastcall cares about overall param index | 100 | 199k | | 101 | 199k | if args_or_rets == ArgsOrRets::Args && is_fastcall { | 102 | 0 | // Fastcall always reserves 32 bytes of shadow space corresponding to | 103 | 0 | // the four initial in-arg parameters. | 104 | 0 | // | 105 | 0 | // (See: | 106 | 0 | // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160) | 107 | 0 | next_stack = 32; | 108 | 199k | } | 109 | | | 110 | 257k | for param in params { | 111 | 57.9k | if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { | 112 | 0 | let offset = next_stack as i64; | 113 | 0 | let size = size as u64; | 114 | 0 | assert!(size % 8 == 0, "StructArgument size is not properly aligned"); | 115 | 0 | next_stack += size; | 116 | 0 | args.push(ABIArg::StructArg { | 117 | 0 | pointer: None, | 118 | 0 | offset, | 119 | 0 | size, | 120 | 0 | purpose: param.purpose, | 121 | 0 | }); | 122 | 0 | continue; | 123 | 57.9k | } | 124 | | | 125 | | // Find regclass(es) of the register(s) used to store a value of this type. | 126 | 57.9k | let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?; | 127 | | | 128 | | // Now assign ABIArgSlots for each register-sized part. | 129 | | // | 130 | | // Note that the handling of `i128` values is unique here: | 131 | | // | 132 | | // - If `enable_llvm_abi_extensions` is set in the flags, each | 133 | | // `i128` is split into two `i64`s and assigned exactly as if it | 134 | | // were two consecutive 64-bit args. This is consistent with LLVM's | 135 | | // behavior, and is needed for some uses of Cranelift (e.g., the | 136 | | // rustc backend). | 137 | | // | 138 | | // - Otherwise, both SysV and Fastcall specify behavior (use of | 139 | | // vector register, a register pair, or passing by reference | 140 | | // depending on the case), but for simplicity, we will just panic if | 141 | | // an i128 type appears in a signature and the LLVM extensions flag | 142 | | // is not set. | 143 | | // | 144 | | // For examples of how rustc compiles i128 args and return values on | 145 | | // both SysV and Fastcall platforms, see: | 146 | | // https://godbolt.org/z/PhG3ob | 147 | | | 148 | 57.9k | if param.value_type.bits() > 64 | 149 | 0 | && !param.value_type.is_vector() | 150 | 0 | && !flags.enable_llvm_abi_extensions() | 151 | | { | 152 | 0 | panic!( | 153 | 0 | "i128 args/return values not supported unless LLVM ABI extensions are enabled" | 154 | 0 | ); | 155 | 57.9k | } | 156 | 57.9k | | 157 | 57.9k | let mut slots = ABIArgSlotVec::new(); | 158 | 57.9k | for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) { | 159 | 57.9k | let intreg = *rc == RegClass::Int; | 160 | 57.9k | let nextreg = if intreg { | 161 | 29.2k | match args_or_rets { | 162 | | ArgsOrRets::Args => { | 163 | 0 | get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) | 164 | | } | 165 | | ArgsOrRets::Rets => { | 166 | 29.2k | get_intreg_for_retval(&call_conv, next_gpr, next_param_idx) | 167 | | } | 168 | | } | 169 | | } else { | 170 | 28.7k | match args_or_rets { | 171 | | ArgsOrRets::Args => { | 172 | 0 | get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx) | 173 | | } | 174 | | ArgsOrRets::Rets => { | 175 | 28.7k | get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx) | 176 | | } | 177 | | } | 178 | | }; | 179 | 57.9k | next_param_idx += 1; | 180 | 57.9k | if let Some(reg) = nextreg { | 181 | 57.9k | if intreg { | 182 | 29.2k | next_gpr += 1; | 183 | 29.2k | } else { | 184 | 28.7k | next_vreg += 1; | 185 | 28.7k | } | 186 | 57.9k | slots.push(ABIArgSlot::Reg { | 187 | 57.9k | reg: reg.to_real_reg().unwrap(), | 188 | 57.9k | ty: *reg_ty, | 189 | 57.9k | extension: param.extension, | 190 | 57.9k | }); | 191 | | } else { | 192 | | // Compute size. For the wasmtime ABI it differs from native | 193 | | // ABIs in how multiple values are returned, so we take a | 194 | | // leaf out of arm64's book by not rounding everything up to | 195 | | // 8 bytes. For all ABI arguments, and other ABI returns, | 196 | | // though, each slot takes a minimum of 8 bytes. | 197 | | // | 198 | | // Note that in all cases 16-byte stack alignment happens | 199 | | // separately after all args. | 200 | 0 | let size = (reg_ty.bits() / 8) as u64; | 201 | 0 | let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() { | 202 | 0 | size | 203 | | } else { | 204 | 0 | std::cmp::max(size, 8) | 205 | | }; | 206 | | // Align. | 207 | 0 | debug_assert!(size.is_power_of_two()); | 208 | 0 | next_stack = align_to(next_stack, size); | 209 | 0 | slots.push(ABIArgSlot::Stack { | 210 | 0 | offset: next_stack as i64, | 211 | 0 | ty: *reg_ty, | 212 | 0 | extension: param.extension, | 213 | 0 | }); | 214 | 0 | next_stack += size; | 215 | | } | 216 | | } | 217 | | | 218 | 57.9k | args.push(ABIArg::Slots { | 219 | 57.9k | slots, | 220 | 57.9k | purpose: param.purpose, | 221 | 57.9k | }); | 222 | | } | 223 | | | 224 | 199k | let extra_arg = if add_ret_area_ptr { | 225 | 0 | debug_assert!(args_or_rets == ArgsOrRets::Args); | 226 | 0 | if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) { | 227 | 0 | args.push(ABIArg::reg( | 228 | 0 | reg.to_real_reg().unwrap(), | 229 | 0 | types::I64, | 230 | 0 | ir::ArgumentExtension::None, | 231 | 0 | ir::ArgumentPurpose::Normal, | 232 | 0 | )); | 233 | 0 | } else { | 234 | 0 | args.push(ABIArg::stack( | 235 | 0 | next_stack as i64, | 236 | 0 | types::I64, | 237 | 0 | ir::ArgumentExtension::None, | 238 | 0 | ir::ArgumentPurpose::Normal, | 239 | 0 | )); | 240 | 0 | next_stack += 8; | 241 | 0 | } | 242 | 0 | Some(args.args().len() - 1) | 243 | | } else { | 244 | 199k | None | 245 | | }; | 246 | | | 247 | 199k | next_stack = align_to(next_stack, 16); | 248 | 199k | | 249 | 199k | // To avoid overflow issues, limit the arg/return size to something reasonable. | 250 | 199k | if next_stack > STACK_ARG_RET_SIZE_LIMIT { | 251 | 0 | return Err(CodegenError::ImplLimitExceeded); | 252 | 199k | } | 253 | 199k | | 254 | 199k | Ok((next_stack as i64, extra_arg)) | 255 | 199k | } |
<cranelift_codegen::isa::x64::abi::X64ABIMachineSpec as cranelift_codegen::machinst::abi::ABIMachineSpec>::compute_arg_locs::<&alloc::vec::Vec<cranelift_codegen::ir::extfunc::AbiParam>> Line | Count | Source | 83 | 199k | fn compute_arg_locs<'a, I>( | 84 | 199k | call_conv: isa::CallConv, | 85 | 199k | flags: &settings::Flags, | 86 | 199k | params: I, | 87 | 199k | args_or_rets: ArgsOrRets, | 88 | 199k | add_ret_area_ptr: bool, | 89 | 199k | mut args: ArgsAccumulator<'_>, | 90 | 199k | ) -> CodegenResult<(i64, Option<usize>)> | 91 | 199k | where | 92 | 199k | I: IntoIterator<Item = &'a ir::AbiParam>, | 93 | 199k | { | 94 | 199k | let is_fastcall = call_conv.extends_windows_fastcall(); | 95 | 199k | | 96 | 199k | let mut next_gpr = 0; | 97 | 199k | let mut next_vreg = 0; | 98 | 199k | let mut next_stack: u64 = 0; | 99 | 199k | let mut next_param_idx = 0; // Fastcall cares about overall param index | 100 | 199k | | 101 | 199k | if args_or_rets == ArgsOrRets::Args && is_fastcall { | 102 | 0 | // Fastcall always reserves 32 bytes of shadow space corresponding to | 103 | 0 | // the four initial in-arg parameters. | 104 | 0 | // | 105 | 0 | // (See: | 106 | 0 | // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160) | 107 | 0 | next_stack = 32; | 108 | 199k | } | 109 | | | 110 | 853k | for param in params { | 111 | 653k | if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { | 112 | 0 | let offset = next_stack as i64; | 113 | 0 | let size = size as u64; | 114 | 0 | assert!(size % 8 == 0, "StructArgument size is not properly aligned"); | 115 | 0 | next_stack += size; | 116 | 0 | args.push(ABIArg::StructArg { | 117 | 0 | pointer: None, | 118 | 0 | offset, | 119 | 0 | size, | 120 | 0 | purpose: param.purpose, | 121 | 0 | }); | 122 | 0 | continue; | 123 | 653k | } | 124 | | | 125 | | // Find regclass(es) of the register(s) used to store a value of this type. | 126 | 653k | let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?; | 127 | | | 128 | | // Now assign ABIArgSlots for each register-sized part. | 129 | | // | 130 | | // Note that the handling of `i128` values is unique here: | 131 | | // | 132 | | // - If `enable_llvm_abi_extensions` is set in the flags, each | 133 | | // `i128` is split into two `i64`s and assigned exactly as if it | 134 | | // were two consecutive 64-bit args. This is consistent with LLVM's | 135 | | // behavior, and is needed for some uses of Cranelift (e.g., the | 136 | | // rustc backend). | 137 | | // | 138 | | // - Otherwise, both SysV and Fastcall specify behavior (use of | 139 | | // vector register, a register pair, or passing by reference | 140 | | // depending on the case), but for simplicity, we will just panic if | 141 | | // an i128 type appears in a signature and the LLVM extensions flag | 142 | | // is not set. | 143 | | // | 144 | | // For examples of how rustc compiles i128 args and return values on | 145 | | // both SysV and Fastcall platforms, see: | 146 | | // https://godbolt.org/z/PhG3ob | 147 | | | 148 | 653k | if param.value_type.bits() > 64 | 149 | 0 | && !param.value_type.is_vector() | 150 | 0 | && !flags.enable_llvm_abi_extensions() | 151 | | { | 152 | 0 | panic!( | 153 | 0 | "i128 args/return values not supported unless LLVM ABI extensions are enabled" | 154 | 0 | ); | 155 | 653k | } | 156 | 653k | | 157 | 653k | let mut slots = ABIArgSlotVec::new(); | 158 | 653k | for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) { | 159 | 653k | let intreg = *rc == RegClass::Int; | 160 | 653k | let nextreg = if intreg { | 161 | 474k | match args_or_rets { | 162 | | ArgsOrRets::Args => { | 163 | 474k | get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) | 164 | | } | 165 | | ArgsOrRets::Rets => { | 166 | 0 | get_intreg_for_retval(&call_conv, next_gpr, next_param_idx) | 167 | | } | 168 | | } | 169 | | } else { | 170 | 179k | match args_or_rets { | 171 | | ArgsOrRets::Args => { | 172 | 179k | get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx) | 173 | | } | 174 | | ArgsOrRets::Rets => { | 175 | 0 | get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx) | 176 | | } | 177 | | } | 178 | | }; | 179 | 653k | next_param_idx += 1; | 180 | 653k | if let Some(reg) = nextreg { | 181 | 576k | if intreg { | 182 | 432k | next_gpr += 1; | 183 | 432k | } else { | 184 | 144k | next_vreg += 1; | 185 | 144k | } | 186 | 576k | slots.push(ABIArgSlot::Reg { | 187 | 576k | reg: reg.to_real_reg().unwrap(), | 188 | 576k | ty: *reg_ty, | 189 | 576k | extension: param.extension, | 190 | 576k | }); | 191 | | } else { | 192 | | // Compute size. For the wasmtime ABI it differs from native | 193 | | // ABIs in how multiple values are returned, so we take a | 194 | | // leaf out of arm64's book by not rounding everything up to | 195 | | // 8 bytes. For all ABI arguments, and other ABI returns, | 196 | | // though, each slot takes a minimum of 8 bytes. | 197 | | // | 198 | | // Note that in all cases 16-byte stack alignment happens | 199 | | // separately after all args. | 200 | 77.1k | let size = (reg_ty.bits() / 8) as u64; | 201 | 77.1k | let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() { | 202 | 0 | size | 203 | | } else { | 204 | 77.1k | std::cmp::max(size, 8) | 205 | | }; | 206 | | // Align. | 207 | 77.1k | debug_assert!(size.is_power_of_two()); | 208 | 77.1k | next_stack = align_to(next_stack, size); | 209 | 77.1k | slots.push(ABIArgSlot::Stack { | 210 | 77.1k | offset: next_stack as i64, | 211 | 77.1k | ty: *reg_ty, | 212 | 77.1k | extension: param.extension, | 213 | 77.1k | }); | 214 | 77.1k | next_stack += size; | 215 | | } | 216 | | } | 217 | | | 218 | 653k | args.push(ABIArg::Slots { | 219 | 653k | slots, | 220 | 653k | purpose: param.purpose, | 221 | 653k | }); | 222 | | } | 223 | | | 224 | 199k | let extra_arg = if add_ret_area_ptr { | 225 | 0 | debug_assert!(args_or_rets == ArgsOrRets::Args); | 226 | 0 | if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) { | 227 | 0 | args.push(ABIArg::reg( | 228 | 0 | reg.to_real_reg().unwrap(), | 229 | 0 | types::I64, | 230 | 0 | ir::ArgumentExtension::None, | 231 | 0 | ir::ArgumentPurpose::Normal, | 232 | 0 | )); | 233 | 0 | } else { | 234 | 0 | args.push(ABIArg::stack( | 235 | 0 | next_stack as i64, | 236 | 0 | types::I64, | 237 | 0 | ir::ArgumentExtension::None, | 238 | 0 | ir::ArgumentPurpose::Normal, | 239 | 0 | )); | 240 | 0 | next_stack += 8; | 241 | 0 | } | 242 | 0 | Some(args.args().len() - 1) | 243 | | } else { | 244 | 199k | None | 245 | | }; | 246 | | | 247 | 199k | next_stack = align_to(next_stack, 16); | 248 | 199k | | 249 | 199k | // To avoid overflow issues, limit the arg/return size to something reasonable. | 250 | 199k | if next_stack > STACK_ARG_RET_SIZE_LIMIT { | 251 | 0 | return Err(CodegenError::ImplLimitExceeded); | 252 | 199k | } | 253 | 199k | | 254 | 199k | Ok((next_stack as i64, extra_arg)) | 255 | 199k | } |
|
256 | | |
257 | 71.8k | fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { |
258 | 71.8k | 16 // frame pointer + return address. |
259 | 71.8k | } |
260 | | |
261 | 76.3k | fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I { |
262 | | // For integer-typed values, we always load a full 64 bits (and we always spill a full 64 |
263 | | // bits as well -- see `Inst::store()`). |
264 | 76.3k | let ty = match ty { |
265 | 1.44k | types::I8 | types::I16 | types::I32 => types::I64, |
266 | 74.9k | _ => ty, |
267 | | }; |
268 | 76.3k | Inst::load(ty, mem, into_reg, ExtKind::None) |
269 | 76.3k | } |
270 | | |
271 | 7.15k | fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I { |
272 | 7.15k | Inst::store(ty, from_reg, mem) |
273 | 7.15k | } |
274 | | |
275 | 0 | fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I { |
276 | 0 | Inst::gen_move(to_reg, from_reg, ty) |
277 | 0 | } |
278 | | |
279 | | /// Generate an integer-extend operation. |
280 | 0 | fn gen_extend( |
281 | 0 | to_reg: Writable<Reg>, |
282 | 0 | from_reg: Reg, |
283 | 0 | is_signed: bool, |
284 | 0 | from_bits: u8, |
285 | 0 | to_bits: u8, |
286 | 0 | ) -> Self::I { |
287 | 0 | let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16) |
288 | 0 | .unwrap_or_else(|| panic!("invalid extension: {} -> {}", from_bits, to_bits)); |
289 | 0 | if is_signed { |
290 | 0 | Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg) |
291 | | } else { |
292 | 0 | Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg) |
293 | | } |
294 | 0 | } |
295 | | |
296 | 139k | fn gen_args(_isa_flags: &x64_settings::Flags, args: Vec<ArgPair>) -> Inst { |
297 | 139k | Inst::Args { args } |
298 | 139k | } |
299 | | |
300 | 198k | fn gen_ret( |
301 | 198k | _setup_frame: bool, |
302 | 198k | _isa_flags: &x64_settings::Flags, |
303 | 198k | rets: Vec<RetPair>, |
304 | 198k | ) -> Self::I { |
305 | 198k | Inst::ret(rets) |
306 | 198k | } |
307 | | |
308 | 0 | fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Self::I> { |
309 | 0 | let mut ret = SmallVec::new(); |
310 | 0 | if from_reg != into_reg.to_reg() { |
311 | 0 | ret.push(Inst::gen_move(into_reg, from_reg, I64)); |
312 | 0 | } |
313 | 0 | ret.push(Inst::alu_rmi_r( |
314 | 0 | OperandSize::Size64, |
315 | 0 | AluRmiROpcode::Add, |
316 | 0 | RegMemImm::imm(imm), |
317 | 0 | into_reg, |
318 | 0 | )); |
319 | 0 | ret |
320 | 0 | } |
321 | | |
322 | 0 | fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> { |
323 | 0 | smallvec![ |
324 | 0 | Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::reg(regs::rsp()), limit_reg), |
325 | | Inst::TrapIf { |
326 | | // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp. |
327 | 0 | cc: CC::NBE, |
328 | 0 | trap_code: TrapCode::StackOverflow, |
329 | | }, |
330 | | ] |
331 | 0 | } |
332 | | |
333 | 0 | fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Self::I { |
334 | 0 | let mem: SyntheticAmode = mem.into(); |
335 | 0 | Inst::lea(mem, into_reg) |
336 | 0 | } |
337 | | |
338 | 0 | fn get_stacklimit_reg() -> Reg { |
339 | 0 | debug_assert!(!is_callee_save_systemv( |
340 | 0 | regs::r10().to_real_reg().unwrap(), |
341 | 0 | false |
342 | 0 | )); |
343 | | |
344 | | // As per comment on trait definition, we must return a caller-save |
345 | | // register here. |
346 | 0 | regs::r10() |
347 | 0 | } |
348 | | |
349 | 0 | fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I { |
350 | 0 | // Only ever used for I64s; if that changes, see if the ExtKind below needs to be changed. |
351 | 0 | assert_eq!(ty, I64); |
352 | 0 | let simm32 = offset as u32; |
353 | 0 | let mem = Amode::imm_reg(simm32, base); |
354 | 0 | Inst::load(ty, mem, into_reg, ExtKind::None) |
355 | 0 | } |
356 | | |
357 | 0 | fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I { |
358 | 0 | let simm32 = offset as u32; |
359 | 0 | let mem = Amode::imm_reg(simm32, base); |
360 | 0 | Inst::store(ty, from_reg, mem) |
361 | 0 | } |
362 | | |
363 | 2.46k | fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> { |
364 | 2.46k | let (alu_op, amount) = if amount >= 0 { |
365 | 1.23k | (AluRmiROpcode::Add, amount) |
366 | | } else { |
367 | 1.23k | (AluRmiROpcode::Sub, -amount) |
368 | | }; |
369 | | |
370 | 2.46k | let amount = amount as u32; |
371 | 2.46k | |
372 | 2.46k | smallvec![Inst::alu_rmi_r( |
373 | 2.46k | OperandSize::Size64, |
374 | 2.46k | alu_op, |
375 | 2.46k | RegMemImm::imm(amount), |
376 | 2.46k | Writable::from_reg(regs::rsp()), |
377 | | )] |
378 | 2.46k | } |
379 | | |
380 | 2.46k | fn gen_nominal_sp_adj(offset: i32) -> Self::I { |
381 | 2.46k | Inst::VirtualSPOffsetAdj { |
382 | 2.46k | offset: offset as i64, |
383 | 2.46k | } |
384 | 2.46k | } |
385 | | |
386 | 139k | fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Self::I> { |
387 | 139k | let r_rsp = regs::rsp(); |
388 | 139k | let r_rbp = regs::rbp(); |
389 | 139k | let w_rbp = Writable::from_reg(r_rbp); |
390 | 139k | let mut insts = SmallVec::new(); |
391 | 139k | // `push %rbp` |
392 | 139k | // RSP before the call will be 0 % 16. So here, it is 8 % 16. |
393 | 139k | insts.push(Inst::push64(RegMemImm::reg(r_rbp))); |
394 | 139k | |
395 | 139k | if flags.unwind_info() { |
396 | 139k | insts.push(Inst::Unwind { |
397 | 139k | inst: UnwindInst::PushFrameRegs { |
398 | 139k | offset_upward_to_caller_sp: 16, // RBP, return address |
399 | 139k | }, |
400 | 139k | }); |
401 | 139k | } |
402 | | |
403 | | // `mov %rsp, %rbp` |
404 | | // RSP is now 0 % 16 |
405 | 139k | insts.push(Inst::mov_r_r(OperandSize::Size64, r_rsp, w_rbp)); |
406 | 139k | insts |
407 | 139k | } |
408 | | |
409 | 99.2k | fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Self::I> { |
410 | 99.2k | let mut insts = SmallVec::new(); |
411 | 99.2k | // `mov %rbp, %rsp` |
412 | 99.2k | insts.push(Inst::mov_r_r( |
413 | 99.2k | OperandSize::Size64, |
414 | 99.2k | regs::rbp(), |
415 | 99.2k | Writable::from_reg(regs::rsp()), |
416 | 99.2k | )); |
417 | 99.2k | // `pop %rbp` |
418 | 99.2k | insts.push(Inst::pop64(Writable::from_reg(regs::rbp()))); |
419 | 99.2k | insts |
420 | 99.2k | } |
421 | | |
422 | 0 | fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) { |
423 | 0 | insts.push(Inst::imm( |
424 | 0 | OperandSize::Size32, |
425 | 0 | frame_size as u64, |
426 | 0 | Writable::from_reg(regs::rax()), |
427 | 0 | )); |
428 | 0 | insts.push(Inst::CallKnown { |
429 | 0 | dest: ExternalName::LibCall(LibCall::Probestack), |
430 | 0 | info: Box::new(CallInfo { |
431 | 0 | // No need to include arg here: we are post-regalloc |
432 | 0 | // so no constraints will be seen anyway. |
433 | 0 | uses: smallvec![], |
434 | 0 | defs: smallvec![], |
435 | 0 | clobbers: PRegSet::empty(), |
436 | 0 | opcode: Opcode::Call, |
437 | 0 | }), |
438 | 0 | }); |
439 | 0 | } |
440 | | |
441 | 0 | fn gen_inline_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32, guard_size: u32) { |
442 | 0 | // Unroll at most n consecutive probes, before falling back to using a loop |
443 | 0 | // |
444 | 0 | // This was number was picked because the loop version is 38 bytes long. We can fit |
445 | 0 | // 5 inline probes in that space, so unroll if its beneficial in terms of code size. |
446 | 0 | const PROBE_MAX_UNROLL: u32 = 5; |
447 | 0 |
|
448 | 0 | // Number of probes that we need to perform |
449 | 0 | let probe_count = align_to(frame_size, guard_size) / guard_size; |
450 | 0 |
|
451 | 0 | if probe_count <= PROBE_MAX_UNROLL { |
452 | 0 | Self::gen_probestack_unroll(insts, guard_size, probe_count) |
453 | | } else { |
454 | 0 | Self::gen_probestack_loop(insts, frame_size, guard_size) |
455 | | } |
456 | 0 | } |
457 | | |
458 | 139k | fn gen_clobber_save( |
459 | 139k | _call_conv: isa::CallConv, |
460 | 139k | setup_frame: bool, |
461 | 139k | flags: &settings::Flags, |
462 | 139k | clobbered_callee_saves: &[Writable<RealReg>], |
463 | 139k | fixed_frame_storage_size: u32, |
464 | 139k | _outgoing_args_size: u32, |
465 | 139k | ) -> (u64, SmallVec<[Self::I; 16]>) { |
466 | 139k | let mut insts = SmallVec::new(); |
467 | 139k | let clobbered_size = compute_clobber_size(&clobbered_callee_saves); |
468 | 139k | |
469 | 139k | if flags.unwind_info() && setup_frame { |
470 | 139k | // Emit unwind info: start the frame. The frame (from unwind |
471 | 139k | // consumers' point of view) starts at clobbbers, just below |
472 | 139k | // the FP and return address. Spill slots and stack slots are |
473 | 139k | // part of our actual frame but do not concern the unwinder. |
474 | 139k | insts.push(Inst::Unwind { |
475 | 139k | inst: UnwindInst::DefineNewFrame { |
476 | 139k | offset_downward_to_clobbers: clobbered_size, |
477 | 139k | offset_upward_to_caller_sp: 16, // RBP, return address |
478 | 139k | }, |
479 | 139k | }); |
480 | 139k | } |
481 | | |
482 | | // Adjust the stack pointer downward for clobbers and the function fixed |
483 | | // frame (spillslots and storage slots). |
484 | 139k | let stack_size = fixed_frame_storage_size + clobbered_size; |
485 | 139k | if stack_size > 0 { |
486 | 33.5k | insts.push(Inst::alu_rmi_r( |
487 | 33.5k | OperandSize::Size64, |
488 | 33.5k | AluRmiROpcode::Sub, |
489 | 33.5k | RegMemImm::imm(stack_size), |
490 | 33.5k | Writable::from_reg(regs::rsp()), |
491 | 33.5k | )); |
492 | 105k | } |
493 | | // Store each clobbered register in order at offsets from RSP, |
494 | | // placing them above the fixed frame slots. |
495 | 139k | let mut cur_offset = fixed_frame_storage_size; |
496 | 190k | for reg in clobbered_callee_saves { |
497 | 50.9k | let r_reg = reg.to_reg(); |
498 | 50.9k | let off = cur_offset; |
499 | 50.9k | match r_reg.class() { |
500 | 50.9k | RegClass::Int => { |
501 | 50.9k | insts.push(Inst::store( |
502 | 50.9k | types::I64, |
503 | 50.9k | r_reg.into(), |
504 | 50.9k | Amode::imm_reg(cur_offset, regs::rsp()), |
505 | 50.9k | )); |
506 | 50.9k | cur_offset += 8; |
507 | 50.9k | } |
508 | 0 | RegClass::Float => { |
509 | 0 | cur_offset = align_to(cur_offset, 16); |
510 | 0 | insts.push(Inst::store( |
511 | 0 | types::I8X16, |
512 | 0 | r_reg.into(), |
513 | 0 | Amode::imm_reg(cur_offset, regs::rsp()), |
514 | 0 | )); |
515 | 0 | cur_offset += 16; |
516 | 0 | } |
517 | | }; |
518 | 50.9k | if flags.unwind_info() { |
519 | 50.9k | insts.push(Inst::Unwind { |
520 | 50.9k | inst: UnwindInst::SaveReg { |
521 | 50.9k | clobber_offset: off - fixed_frame_storage_size, |
522 | 50.9k | reg: r_reg, |
523 | 50.9k | }, |
524 | 50.9k | }); |
525 | 50.9k | } |
526 | | } |
527 | | |
528 | 139k | (clobbered_size as u64, insts) |
529 | 139k | } |
530 | | |
531 | 99.2k | fn gen_clobber_restore( |
532 | 99.2k | call_conv: isa::CallConv, |
533 | 99.2k | sig: &Signature, |
534 | 99.2k | flags: &settings::Flags, |
535 | 99.2k | clobbers: &[Writable<RealReg>], |
536 | 99.2k | fixed_frame_storage_size: u32, |
537 | 99.2k | _outgoing_args_size: u32, |
538 | 99.2k | ) -> SmallVec<[Self::I; 16]> { |
539 | 99.2k | let mut insts = SmallVec::new(); |
540 | 99.2k | |
541 | 99.2k | let clobbered_callee_saves = |
542 | 99.2k | Self::get_clobbered_callee_saves(call_conv, flags, sig, clobbers); |
543 | 99.2k | let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves); |
544 | 99.2k | |
545 | 99.2k | // Restore regs by loading from offsets of RSP. RSP will be |
546 | 99.2k | // returned to nominal-RSP at this point, so we can use the |
547 | 99.2k | // same offsets that we used when saving clobbers above. |
548 | 99.2k | let mut cur_offset = fixed_frame_storage_size; |
549 | 138k | for reg in &clobbered_callee_saves { |
550 | 38.9k | let rreg = reg.to_reg(); |
551 | 38.9k | match rreg.class() { |
552 | 38.9k | RegClass::Int => { |
553 | 38.9k | insts.push(Inst::mov64_m_r( |
554 | 38.9k | Amode::imm_reg(cur_offset, regs::rsp()), |
555 | 38.9k | Writable::from_reg(rreg.into()), |
556 | 38.9k | )); |
557 | 38.9k | cur_offset += 8; |
558 | 38.9k | } |
559 | 0 | RegClass::Float => { |
560 | 0 | cur_offset = align_to(cur_offset, 16); |
561 | 0 | insts.push(Inst::load( |
562 | 0 | types::I8X16, |
563 | 0 | Amode::imm_reg(cur_offset, regs::rsp()), |
564 | 0 | Writable::from_reg(rreg.into()), |
565 | 0 | ExtKind::None, |
566 | 0 | )); |
567 | 0 | cur_offset += 16; |
568 | 0 | } |
569 | | } |
570 | | } |
571 | | // Adjust RSP back upward. |
572 | 99.2k | if stack_size > 0 { |
573 | 28.1k | insts.push(Inst::alu_rmi_r( |
574 | 28.1k | OperandSize::Size64, |
575 | 28.1k | AluRmiROpcode::Add, |
576 | 28.1k | RegMemImm::imm(stack_size), |
577 | 28.1k | Writable::from_reg(regs::rsp()), |
578 | 28.1k | )); |
579 | 71.0k | } |
580 | | |
581 | 99.2k | insts |
582 | 99.2k | } |
583 | | |
584 | | /// Generate a call instruction/sequence. |
585 | 104k | fn gen_call( |
586 | 104k | dest: &CallDest, |
587 | 104k | uses: CallArgList, |
588 | 104k | defs: CallRetList, |
589 | 104k | clobbers: PRegSet, |
590 | 104k | opcode: ir::Opcode, |
591 | 104k | tmp: Writable<Reg>, |
592 | 104k | _callee_conv: isa::CallConv, |
593 | 104k | _caller_conv: isa::CallConv, |
594 | 104k | ) -> SmallVec<[Self::I; 2]> { |
595 | 104k | let mut insts = SmallVec::new(); |
596 | 26.7k | match dest { |
597 | 26.7k | &CallDest::ExtName(ref name, RelocDistance::Near) => { |
598 | 26.7k | insts.push(Inst::call_known(name.clone(), uses, defs, clobbers, opcode)); |
599 | 26.7k | } |
600 | 0 | &CallDest::ExtName(ref name, RelocDistance::Far) => { |
601 | 0 | insts.push(Inst::LoadExtName { |
602 | 0 | dst: tmp, |
603 | 0 | name: Box::new(name.clone()), |
604 | 0 | offset: 0, |
605 | 0 | }); |
606 | 0 | insts.push(Inst::call_unknown( |
607 | 0 | RegMem::reg(tmp.to_reg()), |
608 | 0 | uses, |
609 | 0 | defs, |
610 | 0 | clobbers, |
611 | 0 | opcode, |
612 | 0 | )); |
613 | 0 | } |
614 | 77.9k | &CallDest::Reg(reg) => { |
615 | 77.9k | insts.push(Inst::call_unknown( |
616 | 77.9k | RegMem::reg(reg), |
617 | 77.9k | uses, |
618 | 77.9k | defs, |
619 | 77.9k | clobbers, |
620 | 77.9k | opcode, |
621 | 77.9k | )); |
622 | 77.9k | } |
623 | | } |
624 | 104k | insts |
625 | 104k | } |
626 | | |
627 | 0 | fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>( |
628 | 0 | call_conv: isa::CallConv, |
629 | 0 | dst: Reg, |
630 | 0 | src: Reg, |
631 | 0 | size: usize, |
632 | 0 | mut alloc_tmp: F, |
633 | 0 | ) -> SmallVec<[Self::I; 8]> { |
634 | 0 | let mut insts = SmallVec::new(); |
635 | 0 | let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap(); |
636 | 0 | let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap(); |
637 | 0 | let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap(); |
638 | 0 | let temp = alloc_tmp(Self::word_type()); |
639 | 0 | let temp2 = alloc_tmp(Self::word_type()); |
640 | 0 | insts.extend( |
641 | 0 | Inst::gen_constant(ValueRegs::one(temp), size as u128, I64, |_| { |
642 | 0 | panic!("tmp should not be needed") |
643 | 0 | }) |
644 | 0 | .into_iter(), |
645 | 0 | ); |
646 | 0 | // We use an indirect call and a full LoadExtName because we do not have |
647 | 0 | // information about the libcall `RelocDistance` here, so we |
648 | 0 | // conservatively use the more flexible calling sequence. |
649 | 0 | insts.push(Inst::LoadExtName { |
650 | 0 | dst: temp2, |
651 | 0 | name: Box::new(ExternalName::LibCall(LibCall::Memcpy)), |
652 | 0 | offset: 0, |
653 | 0 | }); |
654 | 0 | insts.push(Inst::call_unknown( |
655 | 0 | RegMem::reg(temp2.to_reg()), |
656 | 0 | /* uses = */ |
657 | 0 | smallvec![ |
658 | | CallArgPair { |
659 | 0 | vreg: dst, |
660 | 0 | preg: arg0 |
661 | | }, |
662 | | CallArgPair { |
663 | 0 | vreg: src, |
664 | 0 | preg: arg1 |
665 | | }, |
666 | | CallArgPair { |
667 | 0 | vreg: temp.to_reg(), |
668 | 0 | preg: arg2 |
669 | | }, |
670 | | ], |
671 | 0 | /* defs = */ smallvec![], |
672 | 0 | /* clobbers = */ Self::get_regs_clobbered_by_call(call_conv), |
673 | 0 | Opcode::Call, |
674 | 0 | )); |
675 | 0 | insts |
676 | 0 | } |
677 | | |
678 | 1.15M | fn get_number_of_spillslots_for_value(rc: RegClass, vector_scale: u32) -> u32 { |
679 | 1.15M | // We allocate in terms of 8-byte slots. |
680 | 1.15M | match rc { |
681 | 768k | RegClass::Int => 1, |
682 | 385k | RegClass::Float => vector_scale / 8, |
683 | | } |
684 | 1.15M | } |
685 | | |
686 | 0 | fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64 { |
687 | 0 | s.virtual_sp_offset |
688 | 0 | } |
689 | | |
690 | 0 | fn get_nominal_sp_to_fp(s: &<Self::I as MachInstEmit>::State) -> i64 { |
691 | 0 | s.nominal_sp_to_fp |
692 | 0 | } |
693 | | |
694 | 104k | fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet { |
695 | 104k | if call_conv_of_callee.extends_windows_fastcall() { |
696 | 0 | WINDOWS_CLOBBERS |
697 | | } else { |
698 | 104k | SYSV_CLOBBERS |
699 | | } |
700 | 104k | } |
701 | | |
702 | 284k | fn get_ext_mode( |
703 | 284k | _call_conv: isa::CallConv, |
704 | 284k | _specified: ir::ArgumentExtension, |
705 | 284k | ) -> ir::ArgumentExtension { |
706 | 284k | ir::ArgumentExtension::None |
707 | 284k | } |
708 | | |
709 | 238k | fn get_clobbered_callee_saves( |
710 | 238k | call_conv: CallConv, |
711 | 238k | flags: &settings::Flags, |
712 | 238k | _sig: &Signature, |
713 | 238k | regs: &[Writable<RealReg>], |
714 | 238k | ) -> Vec<Writable<RealReg>> { |
715 | 238k | let mut regs: Vec<Writable<RealReg>> = match call_conv { |
716 | 238k | CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => regs |
717 | 238k | .iter() |
718 | 238k | .cloned() |
719 | 3.04M | .filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg())) |
720 | 238k | .collect(), |
721 | 0 | CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => regs |
722 | 0 | .iter() |
723 | 0 | .cloned() |
724 | 0 | .filter(|r| is_callee_save_fastcall(r.to_reg(), flags.enable_pinned_reg())) |
725 | 0 | .collect(), |
726 | 0 | CallConv::Probestack => todo!("probestack?"), |
727 | 0 | CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(), |
728 | | }; |
729 | | // Sort registers for deterministic code output. We can do an unstable sort because the |
730 | | // registers will be unique (there are no dups). |
731 | 238k | regs.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg()); |
732 | 238k | regs |
733 | 238k | } |
734 | | |
735 | 139k | fn is_frame_setup_needed( |
736 | 139k | _is_leaf: bool, |
737 | 139k | _stack_args_size: u32, |
738 | 139k | _num_clobbered_callee_saves: usize, |
739 | 139k | _frame_storage_size: u32, |
740 | 139k | ) -> bool { |
741 | 139k | true |
742 | 139k | } |
743 | | } |
744 | | |
745 | | impl From<StackAMode> for SyntheticAmode { |
746 | 83.5k | fn from(amode: StackAMode) -> Self { |
747 | 83.5k | // We enforce a 128 MB stack-frame size limit above, so these |
748 | 83.5k | // `expect()`s should never fail. |
749 | 83.5k | match amode { |
750 | 71.8k | StackAMode::FPOffset(off, _ty) => { |
751 | 71.8k | let off = i32::try_from(off) |
752 | 71.8k | .expect("Offset in FPOffset is greater than 2GB; should hit impl limit first"); |
753 | 71.8k | let simm32 = off as u32; |
754 | 71.8k | SyntheticAmode::Real(Amode::ImmReg { |
755 | 71.8k | simm32, |
756 | 71.8k | base: regs::rbp(), |
757 | 71.8k | flags: MemFlags::trusted(), |
758 | 71.8k | }) |
759 | | } |
760 | 6.38k | StackAMode::NominalSPOffset(off, _ty) => { |
761 | 6.38k | let off = i32::try_from(off).expect( |
762 | 6.38k | "Offset in NominalSPOffset is greater than 2GB; should hit impl limit first", |
763 | 6.38k | ); |
764 | 6.38k | let simm32 = off as u32; |
765 | 6.38k | SyntheticAmode::nominal_sp_offset(simm32) |
766 | | } |
767 | 5.27k | StackAMode::SPOffset(off, _ty) => { |
768 | 5.27k | let off = i32::try_from(off) |
769 | 5.27k | .expect("Offset in SPOffset is greater than 2GB; should hit impl limit first"); |
770 | 5.27k | let simm32 = off as u32; |
771 | 5.27k | SyntheticAmode::Real(Amode::ImmReg { |
772 | 5.27k | simm32, |
773 | 5.27k | base: regs::rsp(), |
774 | 5.27k | flags: MemFlags::trusted(), |
775 | 5.27k | }) |
776 | | } |
777 | | } |
778 | 83.5k | } |
779 | | } |
780 | | |
781 | 474k | fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> { |
782 | 474k | let is_fastcall = call_conv.extends_windows_fastcall(); |
783 | | |
784 | | // Fastcall counts by absolute argument number; SysV counts by argument of |
785 | | // this (integer) class. |
786 | 474k | let i = if is_fastcall { arg_idx } else { idx }; |
787 | 474k | match (i, is_fastcall) { |
788 | 199k | (0, false) => Some(regs::rdi()), |
789 | 126k | (1, false) => Some(regs::rsi()), |
790 | 78.9k | (2, false) => Some(regs::rdx()), |
791 | 12.6k | (3, false) => Some(regs::rcx()), |
792 | 8.60k | (4, false) => Some(regs::r8()), |
793 | 6.16k | (5, false) => Some(regs::r9()), |
794 | 0 | (0, true) => Some(regs::rcx()), |
795 | 0 | (1, true) => Some(regs::rdx()), |
796 | 0 | (2, true) => Some(regs::r8()), |
797 | 0 | (3, true) => Some(regs::r9()), |
798 | 41.8k | _ => None, |
799 | | } |
800 | 474k | } |
801 | | |
802 | 179k | fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> { |
803 | 179k | let is_fastcall = call_conv.extends_windows_fastcall(); |
804 | | |
805 | | // Fastcall counts by absolute argument number; SysV counts by argument of |
806 | | // this (floating-point) class. |
807 | 179k | let i = if is_fastcall { arg_idx } else { idx }; |
808 | 179k | match (i, is_fastcall) { |
809 | 54.8k | (0, false) => Some(regs::xmm0()), |
810 | 27.5k | (1, false) => Some(regs::xmm1()), |
811 | 16.3k | (2, false) => Some(regs::xmm2()), |
812 | 12.5k | (3, false) => Some(regs::xmm3()), |
813 | 10.1k | (4, false) => Some(regs::xmm4()), |
814 | 8.35k | (5, false) => Some(regs::xmm5()), |
815 | 7.62k | (6, false) => Some(regs::xmm6()), |
816 | 6.87k | (7, false) => Some(regs::xmm7()), |
817 | 0 | (0, true) => Some(regs::xmm0()), |
818 | 0 | (1, true) => Some(regs::xmm1()), |
819 | 0 | (2, true) => Some(regs::xmm2()), |
820 | 0 | (3, true) => Some(regs::xmm3()), |
821 | 35.2k | _ => None, |
822 | | } |
823 | 179k | } |
824 | | |
825 | 29.2k | fn get_intreg_for_retval( |
826 | 29.2k | call_conv: &CallConv, |
827 | 29.2k | intreg_idx: usize, |
828 | 29.2k | retval_idx: usize, |
829 | 29.2k | ) -> Option<Reg> { |
830 | 29.2k | match call_conv { |
831 | 29.2k | CallConv::Fast | CallConv::Cold | CallConv::SystemV => match intreg_idx { |
832 | 29.2k | 0 => Some(regs::rax()), |
833 | 0 | 1 => Some(regs::rdx()), |
834 | 0 | _ => None, |
835 | | }, |
836 | | CallConv::WasmtimeSystemV | CallConv::WasmtimeFastcall => { |
837 | 0 | if intreg_idx == 0 && retval_idx == 0 { |
838 | 0 | Some(regs::rax()) |
839 | | } else { |
840 | 0 | None |
841 | | } |
842 | | } |
843 | 0 | CallConv::WindowsFastcall => match intreg_idx { |
844 | 0 | 0 => Some(regs::rax()), |
845 | 0 | 1 => Some(regs::rdx()), // The Rust ABI for i128s needs this. |
846 | 0 | _ => None, |
847 | | }, |
848 | 0 | CallConv::Probestack => todo!(), |
849 | 0 | CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(), |
850 | | } |
851 | 29.2k | } |
852 | | |
853 | 28.7k | fn get_fltreg_for_retval( |
854 | 28.7k | call_conv: &CallConv, |
855 | 28.7k | fltreg_idx: usize, |
856 | 28.7k | retval_idx: usize, |
857 | 28.7k | ) -> Option<Reg> { |
858 | 28.7k | match call_conv { |
859 | 28.7k | CallConv::Fast | CallConv::Cold | CallConv::SystemV => match fltreg_idx { |
860 | 28.7k | 0 => Some(regs::xmm0()), |
861 | 0 | 1 => Some(regs::xmm1()), |
862 | 0 | _ => None, |
863 | | }, |
864 | | CallConv::WasmtimeFastcall | CallConv::WasmtimeSystemV => { |
865 | 0 | if fltreg_idx == 0 && retval_idx == 0 { |
866 | 0 | Some(regs::xmm0()) |
867 | | } else { |
868 | 0 | None |
869 | | } |
870 | | } |
871 | 0 | CallConv::WindowsFastcall => match fltreg_idx { |
872 | 0 | 0 => Some(regs::xmm0()), |
873 | 0 | _ => None, |
874 | | }, |
875 | 0 | CallConv::Probestack => todo!(), |
876 | 0 | CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(), |
877 | | } |
878 | 28.7k | } |
879 | | |
880 | 3.04M | fn is_callee_save_systemv(r: RealReg, enable_pinned_reg: bool) -> bool { |
881 | 3.04M | use regs::*; |
882 | 3.04M | match r.class() { |
883 | 1.15M | RegClass::Int => match r.hw_enc() { |
884 | 78.5k | ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 => true, |
885 | | // R15 is the pinned register; if we're using it that way, |
886 | | // it is effectively globally-allocated, and is not |
887 | | // callee-saved. |
888 | 11.2k | ENC_R15 => !enable_pinned_reg, |
889 | 1.06M | _ => false, |
890 | | }, |
891 | 1.88M | RegClass::Float => false, |
892 | | } |
893 | 3.04M | } |
894 | | |
895 | 0 | fn is_callee_save_fastcall(r: RealReg, enable_pinned_reg: bool) -> bool { |
896 | 0 | use regs::*; |
897 | 0 | match r.class() { |
898 | 0 | RegClass::Int => match r.hw_enc() { |
899 | 0 | ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 => true, |
900 | | // See above for SysV: we must treat the pinned reg specially. |
901 | 0 | ENC_R15 => !enable_pinned_reg, |
902 | 0 | _ => false, |
903 | | }, |
904 | 0 | RegClass::Float => match r.hw_enc() { |
905 | 0 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true, |
906 | 0 | _ => false, |
907 | | }, |
908 | | } |
909 | 0 | } |
910 | | |
911 | 238k | fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 { |
912 | 238k | let mut clobbered_size = 0; |
913 | 328k | for reg in clobbers { |
914 | 89.8k | match reg.to_reg().class() { |
915 | 89.8k | RegClass::Int => { |
916 | 89.8k | clobbered_size += 8; |
917 | 89.8k | } |
918 | 0 | RegClass::Float => { |
919 | 0 | clobbered_size = align_to(clobbered_size, 16); |
920 | 0 | clobbered_size += 16; |
921 | 0 | } |
922 | | } |
923 | | } |
924 | 238k | align_to(clobbered_size, 16) |
925 | 238k | } |
926 | | |
927 | | const WINDOWS_CLOBBERS: PRegSet = windows_clobbers(); |
928 | | const SYSV_CLOBBERS: PRegSet = sysv_clobbers(); |
929 | | |
930 | 0 | const fn windows_clobbers() -> PRegSet { |
931 | 0 | PRegSet::empty() |
932 | 0 | .with(regs::gpr_preg(regs::ENC_RAX)) |
933 | 0 | .with(regs::gpr_preg(regs::ENC_RCX)) |
934 | 0 | .with(regs::gpr_preg(regs::ENC_RDX)) |
935 | 0 | .with(regs::gpr_preg(regs::ENC_R8)) |
936 | 0 | .with(regs::gpr_preg(regs::ENC_R9)) |
937 | 0 | .with(regs::gpr_preg(regs::ENC_R10)) |
938 | 0 | .with(regs::gpr_preg(regs::ENC_R11)) |
939 | 0 | .with(regs::fpr_preg(0)) |
940 | 0 | .with(regs::fpr_preg(1)) |
941 | 0 | .with(regs::fpr_preg(2)) |
942 | 0 | .with(regs::fpr_preg(3)) |
943 | 0 | .with(regs::fpr_preg(4)) |
944 | 0 | .with(regs::fpr_preg(5)) |
945 | 0 | } |
946 | | |
947 | 0 | const fn sysv_clobbers() -> PRegSet { |
948 | 0 | PRegSet::empty() |
949 | 0 | .with(regs::gpr_preg(regs::ENC_RAX)) |
950 | 0 | .with(regs::gpr_preg(regs::ENC_RCX)) |
951 | 0 | .with(regs::gpr_preg(regs::ENC_RDX)) |
952 | 0 | .with(regs::gpr_preg(regs::ENC_RSI)) |
953 | 0 | .with(regs::gpr_preg(regs::ENC_RDI)) |
954 | 0 | .with(regs::gpr_preg(regs::ENC_R8)) |
955 | 0 | .with(regs::gpr_preg(regs::ENC_R9)) |
956 | 0 | .with(regs::gpr_preg(regs::ENC_R10)) |
957 | 0 | .with(regs::gpr_preg(regs::ENC_R11)) |
958 | 0 | .with(regs::fpr_preg(0)) |
959 | 0 | .with(regs::fpr_preg(1)) |
960 | 0 | .with(regs::fpr_preg(2)) |
961 | 0 | .with(regs::fpr_preg(3)) |
962 | 0 | .with(regs::fpr_preg(4)) |
963 | 0 | .with(regs::fpr_preg(5)) |
964 | 0 | .with(regs::fpr_preg(6)) |
965 | 0 | .with(regs::fpr_preg(7)) |
966 | 0 | .with(regs::fpr_preg(8)) |
967 | 0 | .with(regs::fpr_preg(9)) |
968 | 0 | .with(regs::fpr_preg(10)) |
969 | 0 | .with(regs::fpr_preg(11)) |
970 | 0 | .with(regs::fpr_preg(12)) |
971 | 0 | .with(regs::fpr_preg(13)) |
972 | 0 | .with(regs::fpr_preg(14)) |
973 | 0 | .with(regs::fpr_preg(15)) |
974 | 0 | } |