Coverage Report

Created: 2024-10-16 07:58

/rust/registry/src/index.crates.io-6f17d22bba15001f/cranelift-codegen-0.91.1/src/isa/x64/abi.rs
Line
Count
Source (jump to first uncovered line)
1
//! Implementation of the standard x64 ABI.
2
3
use crate::ir::{self, types, LibCall, MemFlags, Opcode, Signature, TrapCode, Type};
4
use crate::ir::{types::*, ExternalName};
5
use crate::isa;
6
use crate::isa::{unwind::UnwindInst, x64::inst::*, x64::settings as x64_settings, CallConv};
7
use crate::machinst::abi::*;
8
use crate::machinst::*;
9
use crate::settings;
10
use crate::{CodegenError, CodegenResult};
11
use alloc::boxed::Box;
12
use alloc::vec::Vec;
13
use args::*;
14
use regalloc2::{PRegSet, VReg};
15
use smallvec::{smallvec, SmallVec};
16
use std::convert::TryFrom;
17
18
/// This is the limit for the size of argument and return-value areas on the
19
/// stack. We place a reasonable limit here to avoid integer overflow issues
20
/// with 32-bit arithmetic: for now, 128 MB.
21
static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024;
22
23
/// Support for the x64 ABI from the callee side (within a function body).
24
pub(crate) type X64Callee = Callee<X64ABIMachineSpec>;
25
26
/// Support for the x64 ABI from the caller side (at a callsite).
27
pub(crate) type X64Caller = Caller<X64ABIMachineSpec>;
28
29
/// Implementation of ABI primitives for x64.
30
pub struct X64ABIMachineSpec;
31
32
impl X64ABIMachineSpec {
33
0
    fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
34
0
        insts.reserve(probe_count as usize);
35
0
        for i in 0..probe_count {
36
0
            let offset = (guard_size * (i + 1)) as i64;
37
0
38
0
            // TODO: It would be nice if we could store the imm 0, but we don't have insts for those
39
0
            // so store the stack pointer. Any register will do, since the stack is undefined at this point
40
0
            insts.push(Self::gen_store_stack(
41
0
                StackAMode::SPOffset(-offset, I8),
42
0
                regs::rsp(),
43
0
                I32,
44
0
            ));
45
0
        }
46
0
    }
47
0
    fn gen_probestack_loop(insts: &mut SmallInstVec<Inst>, frame_size: u32, guard_size: u32) {
48
0
        // We have to use a caller saved register since clobbering only happens
49
0
        // after stack probing.
50
0
        //
51
0
        // R11 is caller saved on both Fastcall and SystemV, and not used for argument
52
0
        // passing, so it's pretty much free. It is also not used by the stacklimit mechanism.
53
0
        let tmp = regs::r11();
54
0
        debug_assert!({
55
0
            let real_reg = tmp.to_real_reg().unwrap();
56
0
            !is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)
57
        });
58
59
0
        insts.push(Inst::StackProbeLoop {
60
0
            tmp: Writable::from_reg(tmp),
61
0
            frame_size,
62
0
            guard_size,
63
0
        });
64
0
    }
65
}
66
67
impl IsaFlags for x64_settings::Flags {}
68
69
impl ABIMachineSpec for X64ABIMachineSpec {
70
    type I = Inst;
71
72
    type F = x64_settings::Flags;
73
74
463k
    fn word_bits() -> u32 {
75
463k
        64
76
463k
    }
77
78
    /// Return required stack alignment in bytes.
79
139k
    fn stack_align(_call_conv: isa::CallConv) -> u32 {
80
139k
        16
81
139k
    }
82
83
398k
    fn compute_arg_locs<'a, I>(
84
398k
        call_conv: isa::CallConv,
85
398k
        flags: &settings::Flags,
86
398k
        params: I,
87
398k
        args_or_rets: ArgsOrRets,
88
398k
        add_ret_area_ptr: bool,
89
398k
        mut args: ArgsAccumulator<'_>,
90
398k
    ) -> CodegenResult<(i64, Option<usize>)>
91
398k
    where
92
398k
        I: IntoIterator<Item = &'a ir::AbiParam>,
93
398k
    {
94
398k
        let is_fastcall = call_conv.extends_windows_fastcall();
95
398k
96
398k
        let mut next_gpr = 0;
97
398k
        let mut next_vreg = 0;
98
398k
        let mut next_stack: u64 = 0;
99
398k
        let mut next_param_idx = 0; // Fastcall cares about overall param index
100
398k
101
398k
        if args_or_rets == ArgsOrRets::Args && is_fastcall {
102
0
            // Fastcall always reserves 32 bytes of shadow space corresponding to
103
0
            // the four initial in-arg parameters.
104
0
            //
105
0
            // (See:
106
0
            // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160)
107
0
            next_stack = 32;
108
398k
        }
109
110
1.11M
        for param in params {
111
711k
            if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
112
0
                let offset = next_stack as i64;
113
0
                let size = size as u64;
114
0
                assert!(size % 8 == 0, "StructArgument size is not properly aligned");
115
0
                next_stack += size;
116
0
                args.push(ABIArg::StructArg {
117
0
                    pointer: None,
118
0
                    offset,
119
0
                    size,
120
0
                    purpose: param.purpose,
121
0
                });
122
0
                continue;
123
711k
            }
124
125
            // Find regclass(es) of the register(s) used to store a value of this type.
126
711k
            let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
127
128
            // Now assign ABIArgSlots for each register-sized part.
129
            //
130
            // Note that the handling of `i128` values is unique here:
131
            //
132
            // - If `enable_llvm_abi_extensions` is set in the flags, each
133
            //   `i128` is split into two `i64`s and assigned exactly as if it
134
            //   were two consecutive 64-bit args. This is consistent with LLVM's
135
            //   behavior, and is needed for some uses of Cranelift (e.g., the
136
            //   rustc backend).
137
            //
138
            // - Otherwise, both SysV and Fastcall specify behavior (use of
139
            //   vector register, a register pair, or passing by reference
140
            //   depending on the case), but for simplicity, we will just panic if
141
            //   an i128 type appears in a signature and the LLVM extensions flag
142
            //   is not set.
143
            //
144
            // For examples of how rustc compiles i128 args and return values on
145
            // both SysV and Fastcall platforms, see:
146
            // https://godbolt.org/z/PhG3ob
147
148
711k
            if param.value_type.bits() > 64
149
0
                && !param.value_type.is_vector()
150
0
                && !flags.enable_llvm_abi_extensions()
151
            {
152
0
                panic!(
153
0
                    "i128 args/return values not supported unless LLVM ABI extensions are enabled"
154
0
                );
155
711k
            }
156
711k
157
711k
            let mut slots = ABIArgSlotVec::new();
158
711k
            for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
159
711k
                let intreg = *rc == RegClass::Int;
160
711k
                let nextreg = if intreg {
161
503k
                    match args_or_rets {
162
                        ArgsOrRets::Args => {
163
474k
                            get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
164
                        }
165
                        ArgsOrRets::Rets => {
166
29.2k
                            get_intreg_for_retval(&call_conv, next_gpr, next_param_idx)
167
                        }
168
                    }
169
                } else {
170
208k
                    match args_or_rets {
171
                        ArgsOrRets::Args => {
172
179k
                            get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
173
                        }
174
                        ArgsOrRets::Rets => {
175
28.7k
                            get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx)
176
                        }
177
                    }
178
                };
179
711k
                next_param_idx += 1;
180
711k
                if let Some(reg) = nextreg {
181
634k
                    if intreg {
182
461k
                        next_gpr += 1;
183
461k
                    } else {
184
173k
                        next_vreg += 1;
185
173k
                    }
186
634k
                    slots.push(ABIArgSlot::Reg {
187
634k
                        reg: reg.to_real_reg().unwrap(),
188
634k
                        ty: *reg_ty,
189
634k
                        extension: param.extension,
190
634k
                    });
191
                } else {
192
                    // Compute size. For the wasmtime ABI it differs from native
193
                    // ABIs in how multiple values are returned, so we take a
194
                    // leaf out of arm64's book by not rounding everything up to
195
                    // 8 bytes. For all ABI arguments, and other ABI returns,
196
                    // though, each slot takes a minimum of 8 bytes.
197
                    //
198
                    // Note that in all cases 16-byte stack alignment happens
199
                    // separately after all args.
200
77.1k
                    let size = (reg_ty.bits() / 8) as u64;
201
77.1k
                    let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() {
202
0
                        size
203
                    } else {
204
77.1k
                        std::cmp::max(size, 8)
205
                    };
206
                    // Align.
207
77.1k
                    debug_assert!(size.is_power_of_two());
208
77.1k
                    next_stack = align_to(next_stack, size);
209
77.1k
                    slots.push(ABIArgSlot::Stack {
210
77.1k
                        offset: next_stack as i64,
211
77.1k
                        ty: *reg_ty,
212
77.1k
                        extension: param.extension,
213
77.1k
                    });
214
77.1k
                    next_stack += size;
215
                }
216
            }
217
218
711k
            args.push(ABIArg::Slots {
219
711k
                slots,
220
711k
                purpose: param.purpose,
221
711k
            });
222
        }
223
224
398k
        let extra_arg = if add_ret_area_ptr {
225
0
            debug_assert!(args_or_rets == ArgsOrRets::Args);
226
0
            if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
227
0
                args.push(ABIArg::reg(
228
0
                    reg.to_real_reg().unwrap(),
229
0
                    types::I64,
230
0
                    ir::ArgumentExtension::None,
231
0
                    ir::ArgumentPurpose::Normal,
232
0
                ));
233
0
            } else {
234
0
                args.push(ABIArg::stack(
235
0
                    next_stack as i64,
236
0
                    types::I64,
237
0
                    ir::ArgumentExtension::None,
238
0
                    ir::ArgumentPurpose::Normal,
239
0
                ));
240
0
                next_stack += 8;
241
0
            }
242
0
            Some(args.args().len() - 1)
243
        } else {
244
398k
            None
245
        };
246
247
398k
        next_stack = align_to(next_stack, 16);
248
398k
249
398k
        // To avoid overflow issues, limit the arg/return size to something reasonable.
250
398k
        if next_stack > STACK_ARG_RET_SIZE_LIMIT {
251
0
            return Err(CodegenError::ImplLimitExceeded);
252
398k
        }
253
398k
254
398k
        Ok((next_stack as i64, extra_arg))
255
398k
    }
<cranelift_codegen::isa::x64::abi::X64ABIMachineSpec as cranelift_codegen::machinst::abi::ABIMachineSpec>::compute_arg_locs::<core::iter::adapters::chain::Chain<core::option::IntoIter<&cranelift_codegen::ir::extfunc::AbiParam>, core::slice::iter::Iter<cranelift_codegen::ir::extfunc::AbiParam>>>
Line
Count
Source
83
199k
    fn compute_arg_locs<'a, I>(
84
199k
        call_conv: isa::CallConv,
85
199k
        flags: &settings::Flags,
86
199k
        params: I,
87
199k
        args_or_rets: ArgsOrRets,
88
199k
        add_ret_area_ptr: bool,
89
199k
        mut args: ArgsAccumulator<'_>,
90
199k
    ) -> CodegenResult<(i64, Option<usize>)>
91
199k
    where
92
199k
        I: IntoIterator<Item = &'a ir::AbiParam>,
93
199k
    {
94
199k
        let is_fastcall = call_conv.extends_windows_fastcall();
95
199k
96
199k
        let mut next_gpr = 0;
97
199k
        let mut next_vreg = 0;
98
199k
        let mut next_stack: u64 = 0;
99
199k
        let mut next_param_idx = 0; // Fastcall cares about overall param index
100
199k
101
199k
        if args_or_rets == ArgsOrRets::Args && is_fastcall {
102
0
            // Fastcall always reserves 32 bytes of shadow space corresponding to
103
0
            // the four initial in-arg parameters.
104
0
            //
105
0
            // (See:
106
0
            // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160)
107
0
            next_stack = 32;
108
199k
        }
109
110
257k
        for param in params {
111
57.9k
            if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
112
0
                let offset = next_stack as i64;
113
0
                let size = size as u64;
114
0
                assert!(size % 8 == 0, "StructArgument size is not properly aligned");
115
0
                next_stack += size;
116
0
                args.push(ABIArg::StructArg {
117
0
                    pointer: None,
118
0
                    offset,
119
0
                    size,
120
0
                    purpose: param.purpose,
121
0
                });
122
0
                continue;
123
57.9k
            }
124
125
            // Find regclass(es) of the register(s) used to store a value of this type.
126
57.9k
            let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
127
128
            // Now assign ABIArgSlots for each register-sized part.
129
            //
130
            // Note that the handling of `i128` values is unique here:
131
            //
132
            // - If `enable_llvm_abi_extensions` is set in the flags, each
133
            //   `i128` is split into two `i64`s and assigned exactly as if it
134
            //   were two consecutive 64-bit args. This is consistent with LLVM's
135
            //   behavior, and is needed for some uses of Cranelift (e.g., the
136
            //   rustc backend).
137
            //
138
            // - Otherwise, both SysV and Fastcall specify behavior (use of
139
            //   vector register, a register pair, or passing by reference
140
            //   depending on the case), but for simplicity, we will just panic if
141
            //   an i128 type appears in a signature and the LLVM extensions flag
142
            //   is not set.
143
            //
144
            // For examples of how rustc compiles i128 args and return values on
145
            // both SysV and Fastcall platforms, see:
146
            // https://godbolt.org/z/PhG3ob
147
148
57.9k
            if param.value_type.bits() > 64
149
0
                && !param.value_type.is_vector()
150
0
                && !flags.enable_llvm_abi_extensions()
151
            {
152
0
                panic!(
153
0
                    "i128 args/return values not supported unless LLVM ABI extensions are enabled"
154
0
                );
155
57.9k
            }
156
57.9k
157
57.9k
            let mut slots = ABIArgSlotVec::new();
158
57.9k
            for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
159
57.9k
                let intreg = *rc == RegClass::Int;
160
57.9k
                let nextreg = if intreg {
161
29.2k
                    match args_or_rets {
162
                        ArgsOrRets::Args => {
163
0
                            get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
164
                        }
165
                        ArgsOrRets::Rets => {
166
29.2k
                            get_intreg_for_retval(&call_conv, next_gpr, next_param_idx)
167
                        }
168
                    }
169
                } else {
170
28.7k
                    match args_or_rets {
171
                        ArgsOrRets::Args => {
172
0
                            get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
173
                        }
174
                        ArgsOrRets::Rets => {
175
28.7k
                            get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx)
176
                        }
177
                    }
178
                };
179
57.9k
                next_param_idx += 1;
180
57.9k
                if let Some(reg) = nextreg {
181
57.9k
                    if intreg {
182
29.2k
                        next_gpr += 1;
183
29.2k
                    } else {
184
28.7k
                        next_vreg += 1;
185
28.7k
                    }
186
57.9k
                    slots.push(ABIArgSlot::Reg {
187
57.9k
                        reg: reg.to_real_reg().unwrap(),
188
57.9k
                        ty: *reg_ty,
189
57.9k
                        extension: param.extension,
190
57.9k
                    });
191
                } else {
192
                    // Compute size. For the wasmtime ABI it differs from native
193
                    // ABIs in how multiple values are returned, so we take a
194
                    // leaf out of arm64's book by not rounding everything up to
195
                    // 8 bytes. For all ABI arguments, and other ABI returns,
196
                    // though, each slot takes a minimum of 8 bytes.
197
                    //
198
                    // Note that in all cases 16-byte stack alignment happens
199
                    // separately after all args.
200
0
                    let size = (reg_ty.bits() / 8) as u64;
201
0
                    let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() {
202
0
                        size
203
                    } else {
204
0
                        std::cmp::max(size, 8)
205
                    };
206
                    // Align.
207
0
                    debug_assert!(size.is_power_of_two());
208
0
                    next_stack = align_to(next_stack, size);
209
0
                    slots.push(ABIArgSlot::Stack {
210
0
                        offset: next_stack as i64,
211
0
                        ty: *reg_ty,
212
0
                        extension: param.extension,
213
0
                    });
214
0
                    next_stack += size;
215
                }
216
            }
217
218
57.9k
            args.push(ABIArg::Slots {
219
57.9k
                slots,
220
57.9k
                purpose: param.purpose,
221
57.9k
            });
222
        }
223
224
199k
        let extra_arg = if add_ret_area_ptr {
225
0
            debug_assert!(args_or_rets == ArgsOrRets::Args);
226
0
            if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
227
0
                args.push(ABIArg::reg(
228
0
                    reg.to_real_reg().unwrap(),
229
0
                    types::I64,
230
0
                    ir::ArgumentExtension::None,
231
0
                    ir::ArgumentPurpose::Normal,
232
0
                ));
233
0
            } else {
234
0
                args.push(ABIArg::stack(
235
0
                    next_stack as i64,
236
0
                    types::I64,
237
0
                    ir::ArgumentExtension::None,
238
0
                    ir::ArgumentPurpose::Normal,
239
0
                ));
240
0
                next_stack += 8;
241
0
            }
242
0
            Some(args.args().len() - 1)
243
        } else {
244
199k
            None
245
        };
246
247
199k
        next_stack = align_to(next_stack, 16);
248
199k
249
199k
        // To avoid overflow issues, limit the arg/return size to something reasonable.
250
199k
        if next_stack > STACK_ARG_RET_SIZE_LIMIT {
251
0
            return Err(CodegenError::ImplLimitExceeded);
252
199k
        }
253
199k
254
199k
        Ok((next_stack as i64, extra_arg))
255
199k
    }
<cranelift_codegen::isa::x64::abi::X64ABIMachineSpec as cranelift_codegen::machinst::abi::ABIMachineSpec>::compute_arg_locs::<&alloc::vec::Vec<cranelift_codegen::ir::extfunc::AbiParam>>
Line
Count
Source
83
199k
    fn compute_arg_locs<'a, I>(
84
199k
        call_conv: isa::CallConv,
85
199k
        flags: &settings::Flags,
86
199k
        params: I,
87
199k
        args_or_rets: ArgsOrRets,
88
199k
        add_ret_area_ptr: bool,
89
199k
        mut args: ArgsAccumulator<'_>,
90
199k
    ) -> CodegenResult<(i64, Option<usize>)>
91
199k
    where
92
199k
        I: IntoIterator<Item = &'a ir::AbiParam>,
93
199k
    {
94
199k
        let is_fastcall = call_conv.extends_windows_fastcall();
95
199k
96
199k
        let mut next_gpr = 0;
97
199k
        let mut next_vreg = 0;
98
199k
        let mut next_stack: u64 = 0;
99
199k
        let mut next_param_idx = 0; // Fastcall cares about overall param index
100
199k
101
199k
        if args_or_rets == ArgsOrRets::Args && is_fastcall {
102
0
            // Fastcall always reserves 32 bytes of shadow space corresponding to
103
0
            // the four initial in-arg parameters.
104
0
            //
105
0
            // (See:
106
0
            // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160)
107
0
            next_stack = 32;
108
199k
        }
109
110
853k
        for param in params {
111
653k
            if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
112
0
                let offset = next_stack as i64;
113
0
                let size = size as u64;
114
0
                assert!(size % 8 == 0, "StructArgument size is not properly aligned");
115
0
                next_stack += size;
116
0
                args.push(ABIArg::StructArg {
117
0
                    pointer: None,
118
0
                    offset,
119
0
                    size,
120
0
                    purpose: param.purpose,
121
0
                });
122
0
                continue;
123
653k
            }
124
125
            // Find regclass(es) of the register(s) used to store a value of this type.
126
653k
            let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
127
128
            // Now assign ABIArgSlots for each register-sized part.
129
            //
130
            // Note that the handling of `i128` values is unique here:
131
            //
132
            // - If `enable_llvm_abi_extensions` is set in the flags, each
133
            //   `i128` is split into two `i64`s and assigned exactly as if it
134
            //   were two consecutive 64-bit args. This is consistent with LLVM's
135
            //   behavior, and is needed for some uses of Cranelift (e.g., the
136
            //   rustc backend).
137
            //
138
            // - Otherwise, both SysV and Fastcall specify behavior (use of
139
            //   vector register, a register pair, or passing by reference
140
            //   depending on the case), but for simplicity, we will just panic if
141
            //   an i128 type appears in a signature and the LLVM extensions flag
142
            //   is not set.
143
            //
144
            // For examples of how rustc compiles i128 args and return values on
145
            // both SysV and Fastcall platforms, see:
146
            // https://godbolt.org/z/PhG3ob
147
148
653k
            if param.value_type.bits() > 64
149
0
                && !param.value_type.is_vector()
150
0
                && !flags.enable_llvm_abi_extensions()
151
            {
152
0
                panic!(
153
0
                    "i128 args/return values not supported unless LLVM ABI extensions are enabled"
154
0
                );
155
653k
            }
156
653k
157
653k
            let mut slots = ABIArgSlotVec::new();
158
653k
            for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
159
653k
                let intreg = *rc == RegClass::Int;
160
653k
                let nextreg = if intreg {
161
474k
                    match args_or_rets {
162
                        ArgsOrRets::Args => {
163
474k
                            get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
164
                        }
165
                        ArgsOrRets::Rets => {
166
0
                            get_intreg_for_retval(&call_conv, next_gpr, next_param_idx)
167
                        }
168
                    }
169
                } else {
170
179k
                    match args_or_rets {
171
                        ArgsOrRets::Args => {
172
179k
                            get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
173
                        }
174
                        ArgsOrRets::Rets => {
175
0
                            get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx)
176
                        }
177
                    }
178
                };
179
653k
                next_param_idx += 1;
180
653k
                if let Some(reg) = nextreg {
181
576k
                    if intreg {
182
432k
                        next_gpr += 1;
183
432k
                    } else {
184
144k
                        next_vreg += 1;
185
144k
                    }
186
576k
                    slots.push(ABIArgSlot::Reg {
187
576k
                        reg: reg.to_real_reg().unwrap(),
188
576k
                        ty: *reg_ty,
189
576k
                        extension: param.extension,
190
576k
                    });
191
                } else {
192
                    // Compute size. For the wasmtime ABI it differs from native
193
                    // ABIs in how multiple values are returned, so we take a
194
                    // leaf out of arm64's book by not rounding everything up to
195
                    // 8 bytes. For all ABI arguments, and other ABI returns,
196
                    // though, each slot takes a minimum of 8 bytes.
197
                    //
198
                    // Note that in all cases 16-byte stack alignment happens
199
                    // separately after all args.
200
77.1k
                    let size = (reg_ty.bits() / 8) as u64;
201
77.1k
                    let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() {
202
0
                        size
203
                    } else {
204
77.1k
                        std::cmp::max(size, 8)
205
                    };
206
                    // Align.
207
77.1k
                    debug_assert!(size.is_power_of_two());
208
77.1k
                    next_stack = align_to(next_stack, size);
209
77.1k
                    slots.push(ABIArgSlot::Stack {
210
77.1k
                        offset: next_stack as i64,
211
77.1k
                        ty: *reg_ty,
212
77.1k
                        extension: param.extension,
213
77.1k
                    });
214
77.1k
                    next_stack += size;
215
                }
216
            }
217
218
653k
            args.push(ABIArg::Slots {
219
653k
                slots,
220
653k
                purpose: param.purpose,
221
653k
            });
222
        }
223
224
199k
        let extra_arg = if add_ret_area_ptr {
225
0
            debug_assert!(args_or_rets == ArgsOrRets::Args);
226
0
            if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
227
0
                args.push(ABIArg::reg(
228
0
                    reg.to_real_reg().unwrap(),
229
0
                    types::I64,
230
0
                    ir::ArgumentExtension::None,
231
0
                    ir::ArgumentPurpose::Normal,
232
0
                ));
233
0
            } else {
234
0
                args.push(ABIArg::stack(
235
0
                    next_stack as i64,
236
0
                    types::I64,
237
0
                    ir::ArgumentExtension::None,
238
0
                    ir::ArgumentPurpose::Normal,
239
0
                ));
240
0
                next_stack += 8;
241
0
            }
242
0
            Some(args.args().len() - 1)
243
        } else {
244
199k
            None
245
        };
246
247
199k
        next_stack = align_to(next_stack, 16);
248
199k
249
199k
        // To avoid overflow issues, limit the arg/return size to something reasonable.
250
199k
        if next_stack > STACK_ARG_RET_SIZE_LIMIT {
251
0
            return Err(CodegenError::ImplLimitExceeded);
252
199k
        }
253
199k
254
199k
        Ok((next_stack as i64, extra_arg))
255
199k
    }
256
257
71.8k
    fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {
258
71.8k
        16 // frame pointer + return address.
259
71.8k
    }
260
261
76.3k
    fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
262
        // For integer-typed values, we always load a full 64 bits (and we always spill a full 64
263
        // bits as well -- see `Inst::store()`).
264
76.3k
        let ty = match ty {
265
1.44k
            types::I8 | types::I16 | types::I32 => types::I64,
266
74.9k
            _ => ty,
267
        };
268
76.3k
        Inst::load(ty, mem, into_reg, ExtKind::None)
269
76.3k
    }
270
271
7.15k
    fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
272
7.15k
        Inst::store(ty, from_reg, mem)
273
7.15k
    }
274
275
0
    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
276
0
        Inst::gen_move(to_reg, from_reg, ty)
277
0
    }
278
279
    /// Generate an integer-extend operation.
280
0
    fn gen_extend(
281
0
        to_reg: Writable<Reg>,
282
0
        from_reg: Reg,
283
0
        is_signed: bool,
284
0
        from_bits: u8,
285
0
        to_bits: u8,
286
0
    ) -> Self::I {
287
0
        let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)
288
0
            .unwrap_or_else(|| panic!("invalid extension: {} -> {}", from_bits, to_bits));
289
0
        if is_signed {
290
0
            Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
291
        } else {
292
0
            Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
293
        }
294
0
    }
295
296
139k
    fn gen_args(_isa_flags: &x64_settings::Flags, args: Vec<ArgPair>) -> Inst {
297
139k
        Inst::Args { args }
298
139k
    }
299
300
198k
    fn gen_ret(
301
198k
        _setup_frame: bool,
302
198k
        _isa_flags: &x64_settings::Flags,
303
198k
        rets: Vec<RetPair>,
304
198k
    ) -> Self::I {
305
198k
        Inst::ret(rets)
306
198k
    }
307
308
0
    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Self::I> {
309
0
        let mut ret = SmallVec::new();
310
0
        if from_reg != into_reg.to_reg() {
311
0
            ret.push(Inst::gen_move(into_reg, from_reg, I64));
312
0
        }
313
0
        ret.push(Inst::alu_rmi_r(
314
0
            OperandSize::Size64,
315
0
            AluRmiROpcode::Add,
316
0
            RegMemImm::imm(imm),
317
0
            into_reg,
318
0
        ));
319
0
        ret
320
0
    }
321
322
0
    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {
323
0
        smallvec![
324
0
            Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::reg(regs::rsp()), limit_reg),
325
            Inst::TrapIf {
326
                // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
327
0
                cc: CC::NBE,
328
0
                trap_code: TrapCode::StackOverflow,
329
            },
330
        ]
331
0
    }
332
333
0
    fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Self::I {
334
0
        let mem: SyntheticAmode = mem.into();
335
0
        Inst::lea(mem, into_reg)
336
0
    }
337
338
0
    fn get_stacklimit_reg() -> Reg {
339
0
        debug_assert!(!is_callee_save_systemv(
340
0
            regs::r10().to_real_reg().unwrap(),
341
0
            false
342
0
        ));
343
344
        // As per comment on trait definition, we must return a caller-save
345
        // register here.
346
0
        regs::r10()
347
0
    }
348
349
0
    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
350
0
        // Only ever used for I64s; if that changes, see if the ExtKind below needs to be changed.
351
0
        assert_eq!(ty, I64);
352
0
        let simm32 = offset as u32;
353
0
        let mem = Amode::imm_reg(simm32, base);
354
0
        Inst::load(ty, mem, into_reg, ExtKind::None)
355
0
    }
356
357
0
    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
358
0
        let simm32 = offset as u32;
359
0
        let mem = Amode::imm_reg(simm32, base);
360
0
        Inst::store(ty, from_reg, mem)
361
0
    }
362
363
2.46k
    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
364
2.46k
        let (alu_op, amount) = if amount >= 0 {
365
1.23k
            (AluRmiROpcode::Add, amount)
366
        } else {
367
1.23k
            (AluRmiROpcode::Sub, -amount)
368
        };
369
370
2.46k
        let amount = amount as u32;
371
2.46k
372
2.46k
        smallvec![Inst::alu_rmi_r(
373
2.46k
            OperandSize::Size64,
374
2.46k
            alu_op,
375
2.46k
            RegMemImm::imm(amount),
376
2.46k
            Writable::from_reg(regs::rsp()),
377
        )]
378
2.46k
    }
379
380
2.46k
    fn gen_nominal_sp_adj(offset: i32) -> Self::I {
381
2.46k
        Inst::VirtualSPOffsetAdj {
382
2.46k
            offset: offset as i64,
383
2.46k
        }
384
2.46k
    }
385
386
139k
    fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Self::I> {
387
139k
        let r_rsp = regs::rsp();
388
139k
        let r_rbp = regs::rbp();
389
139k
        let w_rbp = Writable::from_reg(r_rbp);
390
139k
        let mut insts = SmallVec::new();
391
139k
        // `push %rbp`
392
139k
        // RSP before the call will be 0 % 16.  So here, it is 8 % 16.
393
139k
        insts.push(Inst::push64(RegMemImm::reg(r_rbp)));
394
139k
395
139k
        if flags.unwind_info() {
396
139k
            insts.push(Inst::Unwind {
397
139k
                inst: UnwindInst::PushFrameRegs {
398
139k
                    offset_upward_to_caller_sp: 16, // RBP, return address
399
139k
                },
400
139k
            });
401
139k
        }
402
403
        // `mov %rsp, %rbp`
404
        // RSP is now 0 % 16
405
139k
        insts.push(Inst::mov_r_r(OperandSize::Size64, r_rsp, w_rbp));
406
139k
        insts
407
139k
    }
408
409
99.2k
    fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Self::I> {
410
99.2k
        let mut insts = SmallVec::new();
411
99.2k
        // `mov %rbp, %rsp`
412
99.2k
        insts.push(Inst::mov_r_r(
413
99.2k
            OperandSize::Size64,
414
99.2k
            regs::rbp(),
415
99.2k
            Writable::from_reg(regs::rsp()),
416
99.2k
        ));
417
99.2k
        // `pop %rbp`
418
99.2k
        insts.push(Inst::pop64(Writable::from_reg(regs::rbp())));
419
99.2k
        insts
420
99.2k
    }
421
422
0
    fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
423
0
        insts.push(Inst::imm(
424
0
            OperandSize::Size32,
425
0
            frame_size as u64,
426
0
            Writable::from_reg(regs::rax()),
427
0
        ));
428
0
        insts.push(Inst::CallKnown {
429
0
            dest: ExternalName::LibCall(LibCall::Probestack),
430
0
            info: Box::new(CallInfo {
431
0
                // No need to include arg here: we are post-regalloc
432
0
                // so no constraints will be seen anyway.
433
0
                uses: smallvec![],
434
0
                defs: smallvec![],
435
0
                clobbers: PRegSet::empty(),
436
0
                opcode: Opcode::Call,
437
0
            }),
438
0
        });
439
0
    }
440
441
0
    fn gen_inline_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32, guard_size: u32) {
442
0
        // Unroll at most n consecutive probes, before falling back to using a loop
443
0
        //
444
0
        // This was number was picked because the loop version is 38 bytes long. We can fit
445
0
        // 5 inline probes in that space, so unroll if its beneficial in terms of code size.
446
0
        const PROBE_MAX_UNROLL: u32 = 5;
447
0
448
0
        // Number of probes that we need to perform
449
0
        let probe_count = align_to(frame_size, guard_size) / guard_size;
450
0
451
0
        if probe_count <= PROBE_MAX_UNROLL {
452
0
            Self::gen_probestack_unroll(insts, guard_size, probe_count)
453
        } else {
454
0
            Self::gen_probestack_loop(insts, frame_size, guard_size)
455
        }
456
0
    }
457
458
139k
    fn gen_clobber_save(
459
139k
        _call_conv: isa::CallConv,
460
139k
        setup_frame: bool,
461
139k
        flags: &settings::Flags,
462
139k
        clobbered_callee_saves: &[Writable<RealReg>],
463
139k
        fixed_frame_storage_size: u32,
464
139k
        _outgoing_args_size: u32,
465
139k
    ) -> (u64, SmallVec<[Self::I; 16]>) {
466
139k
        let mut insts = SmallVec::new();
467
139k
        let clobbered_size = compute_clobber_size(&clobbered_callee_saves);
468
139k
469
139k
        if flags.unwind_info() && setup_frame {
470
139k
            // Emit unwind info: start the frame. The frame (from unwind
471
139k
            // consumers' point of view) starts at clobbbers, just below
472
139k
            // the FP and return address. Spill slots and stack slots are
473
139k
            // part of our actual frame but do not concern the unwinder.
474
139k
            insts.push(Inst::Unwind {
475
139k
                inst: UnwindInst::DefineNewFrame {
476
139k
                    offset_downward_to_clobbers: clobbered_size,
477
139k
                    offset_upward_to_caller_sp: 16, // RBP, return address
478
139k
                },
479
139k
            });
480
139k
        }
481
482
        // Adjust the stack pointer downward for clobbers and the function fixed
483
        // frame (spillslots and storage slots).
484
139k
        let stack_size = fixed_frame_storage_size + clobbered_size;
485
139k
        if stack_size > 0 {
486
33.5k
            insts.push(Inst::alu_rmi_r(
487
33.5k
                OperandSize::Size64,
488
33.5k
                AluRmiROpcode::Sub,
489
33.5k
                RegMemImm::imm(stack_size),
490
33.5k
                Writable::from_reg(regs::rsp()),
491
33.5k
            ));
492
105k
        }
493
        // Store each clobbered register in order at offsets from RSP,
494
        // placing them above the fixed frame slots.
495
139k
        let mut cur_offset = fixed_frame_storage_size;
496
190k
        for reg in clobbered_callee_saves {
497
50.9k
            let r_reg = reg.to_reg();
498
50.9k
            let off = cur_offset;
499
50.9k
            match r_reg.class() {
500
50.9k
                RegClass::Int => {
501
50.9k
                    insts.push(Inst::store(
502
50.9k
                        types::I64,
503
50.9k
                        r_reg.into(),
504
50.9k
                        Amode::imm_reg(cur_offset, regs::rsp()),
505
50.9k
                    ));
506
50.9k
                    cur_offset += 8;
507
50.9k
                }
508
0
                RegClass::Float => {
509
0
                    cur_offset = align_to(cur_offset, 16);
510
0
                    insts.push(Inst::store(
511
0
                        types::I8X16,
512
0
                        r_reg.into(),
513
0
                        Amode::imm_reg(cur_offset, regs::rsp()),
514
0
                    ));
515
0
                    cur_offset += 16;
516
0
                }
517
            };
518
50.9k
            if flags.unwind_info() {
519
50.9k
                insts.push(Inst::Unwind {
520
50.9k
                    inst: UnwindInst::SaveReg {
521
50.9k
                        clobber_offset: off - fixed_frame_storage_size,
522
50.9k
                        reg: r_reg,
523
50.9k
                    },
524
50.9k
                });
525
50.9k
            }
526
        }
527
528
139k
        (clobbered_size as u64, insts)
529
139k
    }
530
531
99.2k
    fn gen_clobber_restore(
532
99.2k
        call_conv: isa::CallConv,
533
99.2k
        sig: &Signature,
534
99.2k
        flags: &settings::Flags,
535
99.2k
        clobbers: &[Writable<RealReg>],
536
99.2k
        fixed_frame_storage_size: u32,
537
99.2k
        _outgoing_args_size: u32,
538
99.2k
    ) -> SmallVec<[Self::I; 16]> {
539
99.2k
        let mut insts = SmallVec::new();
540
99.2k
541
99.2k
        let clobbered_callee_saves =
542
99.2k
            Self::get_clobbered_callee_saves(call_conv, flags, sig, clobbers);
543
99.2k
        let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves);
544
99.2k
545
99.2k
        // Restore regs by loading from offsets of RSP. RSP will be
546
99.2k
        // returned to nominal-RSP at this point, so we can use the
547
99.2k
        // same offsets that we used when saving clobbers above.
548
99.2k
        let mut cur_offset = fixed_frame_storage_size;
549
138k
        for reg in &clobbered_callee_saves {
550
38.9k
            let rreg = reg.to_reg();
551
38.9k
            match rreg.class() {
552
38.9k
                RegClass::Int => {
553
38.9k
                    insts.push(Inst::mov64_m_r(
554
38.9k
                        Amode::imm_reg(cur_offset, regs::rsp()),
555
38.9k
                        Writable::from_reg(rreg.into()),
556
38.9k
                    ));
557
38.9k
                    cur_offset += 8;
558
38.9k
                }
559
0
                RegClass::Float => {
560
0
                    cur_offset = align_to(cur_offset, 16);
561
0
                    insts.push(Inst::load(
562
0
                        types::I8X16,
563
0
                        Amode::imm_reg(cur_offset, regs::rsp()),
564
0
                        Writable::from_reg(rreg.into()),
565
0
                        ExtKind::None,
566
0
                    ));
567
0
                    cur_offset += 16;
568
0
                }
569
            }
570
        }
571
        // Adjust RSP back upward.
572
99.2k
        if stack_size > 0 {
573
28.1k
            insts.push(Inst::alu_rmi_r(
574
28.1k
                OperandSize::Size64,
575
28.1k
                AluRmiROpcode::Add,
576
28.1k
                RegMemImm::imm(stack_size),
577
28.1k
                Writable::from_reg(regs::rsp()),
578
28.1k
            ));
579
71.0k
        }
580
581
99.2k
        insts
582
99.2k
    }
583
584
    /// Generate a call instruction/sequence.
585
104k
    fn gen_call(
586
104k
        dest: &CallDest,
587
104k
        uses: CallArgList,
588
104k
        defs: CallRetList,
589
104k
        clobbers: PRegSet,
590
104k
        opcode: ir::Opcode,
591
104k
        tmp: Writable<Reg>,
592
104k
        _callee_conv: isa::CallConv,
593
104k
        _caller_conv: isa::CallConv,
594
104k
    ) -> SmallVec<[Self::I; 2]> {
595
104k
        let mut insts = SmallVec::new();
596
26.7k
        match dest {
597
26.7k
            &CallDest::ExtName(ref name, RelocDistance::Near) => {
598
26.7k
                insts.push(Inst::call_known(name.clone(), uses, defs, clobbers, opcode));
599
26.7k
            }
600
0
            &CallDest::ExtName(ref name, RelocDistance::Far) => {
601
0
                insts.push(Inst::LoadExtName {
602
0
                    dst: tmp,
603
0
                    name: Box::new(name.clone()),
604
0
                    offset: 0,
605
0
                });
606
0
                insts.push(Inst::call_unknown(
607
0
                    RegMem::reg(tmp.to_reg()),
608
0
                    uses,
609
0
                    defs,
610
0
                    clobbers,
611
0
                    opcode,
612
0
                ));
613
0
            }
614
77.9k
            &CallDest::Reg(reg) => {
615
77.9k
                insts.push(Inst::call_unknown(
616
77.9k
                    RegMem::reg(reg),
617
77.9k
                    uses,
618
77.9k
                    defs,
619
77.9k
                    clobbers,
620
77.9k
                    opcode,
621
77.9k
                ));
622
77.9k
            }
623
        }
624
104k
        insts
625
104k
    }
626
627
0
    fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
628
0
        call_conv: isa::CallConv,
629
0
        dst: Reg,
630
0
        src: Reg,
631
0
        size: usize,
632
0
        mut alloc_tmp: F,
633
0
    ) -> SmallVec<[Self::I; 8]> {
634
0
        let mut insts = SmallVec::new();
635
0
        let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap();
636
0
        let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap();
637
0
        let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap();
638
0
        let temp = alloc_tmp(Self::word_type());
639
0
        let temp2 = alloc_tmp(Self::word_type());
640
0
        insts.extend(
641
0
            Inst::gen_constant(ValueRegs::one(temp), size as u128, I64, |_| {
642
0
                panic!("tmp should not be needed")
643
0
            })
644
0
            .into_iter(),
645
0
        );
646
0
        // We use an indirect call and a full LoadExtName because we do not have
647
0
        // information about the libcall `RelocDistance` here, so we
648
0
        // conservatively use the more flexible calling sequence.
649
0
        insts.push(Inst::LoadExtName {
650
0
            dst: temp2,
651
0
            name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),
652
0
            offset: 0,
653
0
        });
654
0
        insts.push(Inst::call_unknown(
655
0
            RegMem::reg(temp2.to_reg()),
656
0
            /* uses = */
657
0
            smallvec![
658
                CallArgPair {
659
0
                    vreg: dst,
660
0
                    preg: arg0
661
                },
662
                CallArgPair {
663
0
                    vreg: src,
664
0
                    preg: arg1
665
                },
666
                CallArgPair {
667
0
                    vreg: temp.to_reg(),
668
0
                    preg: arg2
669
                },
670
            ],
671
0
            /* defs = */ smallvec![],
672
0
            /* clobbers = */ Self::get_regs_clobbered_by_call(call_conv),
673
0
            Opcode::Call,
674
0
        ));
675
0
        insts
676
0
    }
677
678
1.15M
    fn get_number_of_spillslots_for_value(rc: RegClass, vector_scale: u32) -> u32 {
679
1.15M
        // We allocate in terms of 8-byte slots.
680
1.15M
        match rc {
681
768k
            RegClass::Int => 1,
682
385k
            RegClass::Float => vector_scale / 8,
683
        }
684
1.15M
    }
685
686
0
    fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64 {
687
0
        s.virtual_sp_offset
688
0
    }
689
690
0
    fn get_nominal_sp_to_fp(s: &<Self::I as MachInstEmit>::State) -> i64 {
691
0
        s.nominal_sp_to_fp
692
0
    }
693
694
104k
    fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet {
695
104k
        if call_conv_of_callee.extends_windows_fastcall() {
696
0
            WINDOWS_CLOBBERS
697
        } else {
698
104k
            SYSV_CLOBBERS
699
        }
700
104k
    }
701
702
284k
    fn get_ext_mode(
703
284k
        _call_conv: isa::CallConv,
704
284k
        _specified: ir::ArgumentExtension,
705
284k
    ) -> ir::ArgumentExtension {
706
284k
        ir::ArgumentExtension::None
707
284k
    }
708
709
238k
    fn get_clobbered_callee_saves(
710
238k
        call_conv: CallConv,
711
238k
        flags: &settings::Flags,
712
238k
        _sig: &Signature,
713
238k
        regs: &[Writable<RealReg>],
714
238k
    ) -> Vec<Writable<RealReg>> {
715
238k
        let mut regs: Vec<Writable<RealReg>> = match call_conv {
716
238k
            CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => regs
717
238k
                .iter()
718
238k
                .cloned()
719
3.04M
                .filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))
720
238k
                .collect(),
721
0
            CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => regs
722
0
                .iter()
723
0
                .cloned()
724
0
                .filter(|r| is_callee_save_fastcall(r.to_reg(), flags.enable_pinned_reg()))
725
0
                .collect(),
726
0
            CallConv::Probestack => todo!("probestack?"),
727
0
            CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(),
728
        };
729
        // Sort registers for deterministic code output. We can do an unstable sort because the
730
        // registers will be unique (there are no dups).
731
238k
        regs.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg());
732
238k
        regs
733
238k
    }
734
735
139k
    fn is_frame_setup_needed(
736
139k
        _is_leaf: bool,
737
139k
        _stack_args_size: u32,
738
139k
        _num_clobbered_callee_saves: usize,
739
139k
        _frame_storage_size: u32,
740
139k
    ) -> bool {
741
139k
        true
742
139k
    }
743
}
744
745
impl From<StackAMode> for SyntheticAmode {
746
83.5k
    fn from(amode: StackAMode) -> Self {
747
83.5k
        // We enforce a 128 MB stack-frame size limit above, so these
748
83.5k
        // `expect()`s should never fail.
749
83.5k
        match amode {
750
71.8k
            StackAMode::FPOffset(off, _ty) => {
751
71.8k
                let off = i32::try_from(off)
752
71.8k
                    .expect("Offset in FPOffset is greater than 2GB; should hit impl limit first");
753
71.8k
                let simm32 = off as u32;
754
71.8k
                SyntheticAmode::Real(Amode::ImmReg {
755
71.8k
                    simm32,
756
71.8k
                    base: regs::rbp(),
757
71.8k
                    flags: MemFlags::trusted(),
758
71.8k
                })
759
            }
760
6.38k
            StackAMode::NominalSPOffset(off, _ty) => {
761
6.38k
                let off = i32::try_from(off).expect(
762
6.38k
                    "Offset in NominalSPOffset is greater than 2GB; should hit impl limit first",
763
6.38k
                );
764
6.38k
                let simm32 = off as u32;
765
6.38k
                SyntheticAmode::nominal_sp_offset(simm32)
766
            }
767
5.27k
            StackAMode::SPOffset(off, _ty) => {
768
5.27k
                let off = i32::try_from(off)
769
5.27k
                    .expect("Offset in SPOffset is greater than 2GB; should hit impl limit first");
770
5.27k
                let simm32 = off as u32;
771
5.27k
                SyntheticAmode::Real(Amode::ImmReg {
772
5.27k
                    simm32,
773
5.27k
                    base: regs::rsp(),
774
5.27k
                    flags: MemFlags::trusted(),
775
5.27k
                })
776
            }
777
        }
778
83.5k
    }
779
}
780
781
474k
fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
782
474k
    let is_fastcall = call_conv.extends_windows_fastcall();
783
784
    // Fastcall counts by absolute argument number; SysV counts by argument of
785
    // this (integer) class.
786
474k
    let i = if is_fastcall { arg_idx } else { idx };
787
474k
    match (i, is_fastcall) {
788
199k
        (0, false) => Some(regs::rdi()),
789
126k
        (1, false) => Some(regs::rsi()),
790
78.9k
        (2, false) => Some(regs::rdx()),
791
12.6k
        (3, false) => Some(regs::rcx()),
792
8.60k
        (4, false) => Some(regs::r8()),
793
6.16k
        (5, false) => Some(regs::r9()),
794
0
        (0, true) => Some(regs::rcx()),
795
0
        (1, true) => Some(regs::rdx()),
796
0
        (2, true) => Some(regs::r8()),
797
0
        (3, true) => Some(regs::r9()),
798
41.8k
        _ => None,
799
    }
800
474k
}
801
802
179k
fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
803
179k
    let is_fastcall = call_conv.extends_windows_fastcall();
804
805
    // Fastcall counts by absolute argument number; SysV counts by argument of
806
    // this (floating-point) class.
807
179k
    let i = if is_fastcall { arg_idx } else { idx };
808
179k
    match (i, is_fastcall) {
809
54.8k
        (0, false) => Some(regs::xmm0()),
810
27.5k
        (1, false) => Some(regs::xmm1()),
811
16.3k
        (2, false) => Some(regs::xmm2()),
812
12.5k
        (3, false) => Some(regs::xmm3()),
813
10.1k
        (4, false) => Some(regs::xmm4()),
814
8.35k
        (5, false) => Some(regs::xmm5()),
815
7.62k
        (6, false) => Some(regs::xmm6()),
816
6.87k
        (7, false) => Some(regs::xmm7()),
817
0
        (0, true) => Some(regs::xmm0()),
818
0
        (1, true) => Some(regs::xmm1()),
819
0
        (2, true) => Some(regs::xmm2()),
820
0
        (3, true) => Some(regs::xmm3()),
821
35.2k
        _ => None,
822
    }
823
179k
}
824
825
29.2k
fn get_intreg_for_retval(
826
29.2k
    call_conv: &CallConv,
827
29.2k
    intreg_idx: usize,
828
29.2k
    retval_idx: usize,
829
29.2k
) -> Option<Reg> {
830
29.2k
    match call_conv {
831
29.2k
        CallConv::Fast | CallConv::Cold | CallConv::SystemV => match intreg_idx {
832
29.2k
            0 => Some(regs::rax()),
833
0
            1 => Some(regs::rdx()),
834
0
            _ => None,
835
        },
836
        CallConv::WasmtimeSystemV | CallConv::WasmtimeFastcall => {
837
0
            if intreg_idx == 0 && retval_idx == 0 {
838
0
                Some(regs::rax())
839
            } else {
840
0
                None
841
            }
842
        }
843
0
        CallConv::WindowsFastcall => match intreg_idx {
844
0
            0 => Some(regs::rax()),
845
0
            1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
846
0
            _ => None,
847
        },
848
0
        CallConv::Probestack => todo!(),
849
0
        CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(),
850
    }
851
29.2k
}
852
853
28.7k
fn get_fltreg_for_retval(
854
28.7k
    call_conv: &CallConv,
855
28.7k
    fltreg_idx: usize,
856
28.7k
    retval_idx: usize,
857
28.7k
) -> Option<Reg> {
858
28.7k
    match call_conv {
859
28.7k
        CallConv::Fast | CallConv::Cold | CallConv::SystemV => match fltreg_idx {
860
28.7k
            0 => Some(regs::xmm0()),
861
0
            1 => Some(regs::xmm1()),
862
0
            _ => None,
863
        },
864
        CallConv::WasmtimeFastcall | CallConv::WasmtimeSystemV => {
865
0
            if fltreg_idx == 0 && retval_idx == 0 {
866
0
                Some(regs::xmm0())
867
            } else {
868
0
                None
869
            }
870
        }
871
0
        CallConv::WindowsFastcall => match fltreg_idx {
872
0
            0 => Some(regs::xmm0()),
873
0
            _ => None,
874
        },
875
0
        CallConv::Probestack => todo!(),
876
0
        CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(),
877
    }
878
28.7k
}
879
880
3.04M
fn is_callee_save_systemv(r: RealReg, enable_pinned_reg: bool) -> bool {
881
3.04M
    use regs::*;
882
3.04M
    match r.class() {
883
1.15M
        RegClass::Int => match r.hw_enc() {
884
78.5k
            ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 => true,
885
            // R15 is the pinned register; if we're using it that way,
886
            // it is effectively globally-allocated, and is not
887
            // callee-saved.
888
11.2k
            ENC_R15 => !enable_pinned_reg,
889
1.06M
            _ => false,
890
        },
891
1.88M
        RegClass::Float => false,
892
    }
893
3.04M
}
894
895
0
fn is_callee_save_fastcall(r: RealReg, enable_pinned_reg: bool) -> bool {
896
0
    use regs::*;
897
0
    match r.class() {
898
0
        RegClass::Int => match r.hw_enc() {
899
0
            ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 => true,
900
            // See above for SysV: we must treat the pinned reg specially.
901
0
            ENC_R15 => !enable_pinned_reg,
902
0
            _ => false,
903
        },
904
0
        RegClass::Float => match r.hw_enc() {
905
0
            6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
906
0
            _ => false,
907
        },
908
    }
909
0
}
910
911
238k
fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
912
238k
    let mut clobbered_size = 0;
913
328k
    for reg in clobbers {
914
89.8k
        match reg.to_reg().class() {
915
89.8k
            RegClass::Int => {
916
89.8k
                clobbered_size += 8;
917
89.8k
            }
918
0
            RegClass::Float => {
919
0
                clobbered_size = align_to(clobbered_size, 16);
920
0
                clobbered_size += 16;
921
0
            }
922
        }
923
    }
924
238k
    align_to(clobbered_size, 16)
925
238k
}
926
927
const WINDOWS_CLOBBERS: PRegSet = windows_clobbers();
928
const SYSV_CLOBBERS: PRegSet = sysv_clobbers();
929
930
0
const fn windows_clobbers() -> PRegSet {
931
0
    PRegSet::empty()
932
0
        .with(regs::gpr_preg(regs::ENC_RAX))
933
0
        .with(regs::gpr_preg(regs::ENC_RCX))
934
0
        .with(regs::gpr_preg(regs::ENC_RDX))
935
0
        .with(regs::gpr_preg(regs::ENC_R8))
936
0
        .with(regs::gpr_preg(regs::ENC_R9))
937
0
        .with(regs::gpr_preg(regs::ENC_R10))
938
0
        .with(regs::gpr_preg(regs::ENC_R11))
939
0
        .with(regs::fpr_preg(0))
940
0
        .with(regs::fpr_preg(1))
941
0
        .with(regs::fpr_preg(2))
942
0
        .with(regs::fpr_preg(3))
943
0
        .with(regs::fpr_preg(4))
944
0
        .with(regs::fpr_preg(5))
945
0
}
946
947
0
const fn sysv_clobbers() -> PRegSet {
948
0
    PRegSet::empty()
949
0
        .with(regs::gpr_preg(regs::ENC_RAX))
950
0
        .with(regs::gpr_preg(regs::ENC_RCX))
951
0
        .with(regs::gpr_preg(regs::ENC_RDX))
952
0
        .with(regs::gpr_preg(regs::ENC_RSI))
953
0
        .with(regs::gpr_preg(regs::ENC_RDI))
954
0
        .with(regs::gpr_preg(regs::ENC_R8))
955
0
        .with(regs::gpr_preg(regs::ENC_R9))
956
0
        .with(regs::gpr_preg(regs::ENC_R10))
957
0
        .with(regs::gpr_preg(regs::ENC_R11))
958
0
        .with(regs::fpr_preg(0))
959
0
        .with(regs::fpr_preg(1))
960
0
        .with(regs::fpr_preg(2))
961
0
        .with(regs::fpr_preg(3))
962
0
        .with(regs::fpr_preg(4))
963
0
        .with(regs::fpr_preg(5))
964
0
        .with(regs::fpr_preg(6))
965
0
        .with(regs::fpr_preg(7))
966
0
        .with(regs::fpr_preg(8))
967
0
        .with(regs::fpr_preg(9))
968
0
        .with(regs::fpr_preg(10))
969
0
        .with(regs::fpr_preg(11))
970
0
        .with(regs::fpr_preg(12))
971
0
        .with(regs::fpr_preg(13))
972
0
        .with(regs::fpr_preg(14))
973
0
        .with(regs::fpr_preg(15))
974
0
}