/src/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- AArch64CallLowering.cpp - Call lowering --------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file implements the lowering of LLVM calls to machine code calls for |
11 | | /// GlobalISel. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "AArch64CallLowering.h" |
16 | | #include "AArch64ISelLowering.h" |
17 | | #include "AArch64MachineFunctionInfo.h" |
18 | | #include "AArch64RegisterInfo.h" |
19 | | #include "AArch64Subtarget.h" |
20 | | #include "llvm/ADT/ArrayRef.h" |
21 | | #include "llvm/ADT/SmallVector.h" |
22 | | #include "llvm/Analysis/ObjCARCUtil.h" |
23 | | #include "llvm/CodeGen/Analysis.h" |
24 | | #include "llvm/CodeGen/CallingConvLower.h" |
25 | | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
26 | | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
27 | | #include "llvm/CodeGen/GlobalISel/Utils.h" |
28 | | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
29 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
30 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
31 | | #include "llvm/CodeGen/MachineFunction.h" |
32 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
33 | | #include "llvm/CodeGen/MachineMemOperand.h" |
34 | | #include "llvm/CodeGen/MachineOperand.h" |
35 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
36 | | #include "llvm/CodeGen/MachineValueType.h" |
37 | | #include "llvm/CodeGen/TargetRegisterInfo.h" |
38 | | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
39 | | #include "llvm/CodeGen/ValueTypes.h" |
40 | | #include "llvm/IR/Argument.h" |
41 | | #include "llvm/IR/Attributes.h" |
42 | | #include "llvm/IR/Function.h" |
43 | | #include "llvm/IR/Type.h" |
44 | | #include "llvm/IR/Value.h" |
45 | | #include <algorithm> |
46 | | #include <cassert> |
47 | | #include <cstdint> |
48 | | #include <iterator> |
49 | | |
50 | | #define DEBUG_TYPE "aarch64-call-lowering" |
51 | | |
52 | | using namespace llvm; |
53 | | |
54 | | AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) |
55 | 25 | : CallLowering(&TLI) {} |
56 | | |
57 | | static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT, |
58 | 26.0k | MVT &LocVT) { |
59 | | // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy |
60 | | // hack because the DAG calls the assignment function with pre-legalized |
61 | | // register typed values, not the raw type. |
62 | | // |
63 | | // This hack is not applied to return values which are not passed on the |
64 | | // stack. |
65 | 26.0k | if (OrigVT == MVT::i1 || OrigVT == MVT::i8) |
66 | 2.48k | ValVT = LocVT = MVT::i8; |
67 | 23.5k | else if (OrigVT == MVT::i16) |
68 | 193 | ValVT = LocVT = MVT::i16; |
69 | 26.0k | } |
70 | | |
71 | | // Account for i1/i8/i16 stack passed value hack |
72 | 517 | static LLT getStackValueStoreTypeHack(const CCValAssign &VA) { |
73 | 517 | const MVT ValVT = VA.getValVT(); |
74 | 517 | return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT) |
75 | 517 | : LLT(VA.getLocVT()); |
76 | 517 | } |
77 | | |
78 | | namespace { |
79 | | |
80 | | struct AArch64IncomingValueAssigner |
81 | | : public CallLowering::IncomingValueAssigner { |
82 | | AArch64IncomingValueAssigner(CCAssignFn *AssignFn_, |
83 | | CCAssignFn *AssignFnVarArg_) |
84 | 14.8k | : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {} |
85 | | |
86 | | bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, |
87 | | CCValAssign::LocInfo LocInfo, |
88 | | const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, |
89 | 22.0k | CCState &State) override { |
90 | 22.0k | applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); |
91 | 22.0k | return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT, |
92 | 22.0k | LocInfo, Info, Flags, State); |
93 | 22.0k | } |
94 | | }; |
95 | | |
96 | | struct AArch64OutgoingValueAssigner |
97 | | : public CallLowering::OutgoingValueAssigner { |
98 | | const AArch64Subtarget &Subtarget; |
99 | | |
100 | | /// Track if this is used for a return instead of function argument |
101 | | /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use |
102 | | /// stack passed returns for them and cannot apply the type adjustment. |
103 | | bool IsReturn; |
104 | | |
105 | | AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_, |
106 | | CCAssignFn *AssignFnVarArg_, |
107 | | const AArch64Subtarget &Subtarget_, |
108 | | bool IsReturn) |
109 | | : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_), |
110 | 16.5k | Subtarget(Subtarget_), IsReturn(IsReturn) {} |
111 | | |
112 | | bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, |
113 | | CCValAssign::LocInfo LocInfo, |
114 | | const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, |
115 | 22.1k | CCState &State) override { |
116 | 22.1k | bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv()); |
117 | 22.1k | bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg(); |
118 | | |
119 | 22.1k | bool Res; |
120 | 22.1k | if (Info.IsFixed && !UseVarArgsCCForFixed) { |
121 | 22.1k | if (!IsReturn) |
122 | 3.97k | applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); |
123 | 22.1k | Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); |
124 | 22.1k | } else |
125 | 17 | Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); |
126 | | |
127 | 22.1k | StackSize = State.getStackSize(); |
128 | 22.1k | return Res; |
129 | 22.1k | } |
130 | | }; |
131 | | |
132 | | struct IncomingArgHandler : public CallLowering::IncomingValueHandler { |
133 | | IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) |
134 | 16.6k | : IncomingValueHandler(MIRBuilder, MRI) {} |
135 | | |
136 | | Register getStackAddress(uint64_t Size, int64_t Offset, |
137 | | MachinePointerInfo &MPO, |
138 | 549 | ISD::ArgFlagsTy Flags) override { |
139 | 549 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
140 | | |
141 | | // Byval is assumed to be writable memory, but other stack passed arguments |
142 | | // are not. |
143 | 549 | const bool IsImmutable = !Flags.isByVal(); |
144 | | |
145 | 549 | int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable); |
146 | 549 | MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); |
147 | 549 | auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI); |
148 | 549 | return AddrReg.getReg(0); |
149 | 549 | } |
150 | | |
151 | | LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, |
152 | 549 | ISD::ArgFlagsTy Flags) const override { |
153 | | // For pointers, we just need to fixup the integer types reported in the |
154 | | // CCValAssign. |
155 | 549 | if (Flags.isPointer()) |
156 | 56 | return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); |
157 | 493 | return getStackValueStoreTypeHack(VA); |
158 | 549 | } |
159 | | |
160 | | void assignValueToReg(Register ValVReg, Register PhysReg, |
161 | 22.4k | const CCValAssign &VA) override { |
162 | 22.4k | markPhysRegUsed(PhysReg); |
163 | 22.4k | IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); |
164 | 22.4k | } |
165 | | |
166 | | void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, |
167 | | const MachinePointerInfo &MPO, |
168 | 549 | const CCValAssign &VA) override { |
169 | 549 | MachineFunction &MF = MIRBuilder.getMF(); |
170 | | |
171 | 549 | LLT ValTy(VA.getValVT()); |
172 | 549 | LLT LocTy(VA.getLocVT()); |
173 | | |
174 | | // Fixup the types for the DAG compatibility hack. |
175 | 549 | if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) |
176 | 0 | std::swap(ValTy, LocTy); |
177 | 549 | else { |
178 | | // The calling code knows if this is a pointer or not, we're only touching |
179 | | // the LocTy for the i8/i16 hack. |
180 | 549 | assert(LocTy.getSizeInBits() == MemTy.getSizeInBits()); |
181 | 0 | LocTy = MemTy; |
182 | 549 | } |
183 | | |
184 | 0 | auto MMO = MF.getMachineMemOperand( |
185 | 549 | MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy, |
186 | 549 | inferAlignFromPtrInfo(MF, MPO)); |
187 | | |
188 | 549 | switch (VA.getLocInfo()) { |
189 | 0 | case CCValAssign::LocInfo::ZExt: |
190 | 0 | MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, ValVReg, Addr, *MMO); |
191 | 0 | return; |
192 | 0 | case CCValAssign::LocInfo::SExt: |
193 | 0 | MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, ValVReg, Addr, *MMO); |
194 | 0 | return; |
195 | 549 | default: |
196 | 549 | MIRBuilder.buildLoad(ValVReg, Addr, *MMO); |
197 | 549 | return; |
198 | 549 | } |
199 | 549 | } |
200 | | |
201 | | /// How the physical register gets marked varies between formal |
202 | | /// parameters (it's a basic-block live-in), and a call instruction |
203 | | /// (it's an implicit-def of the BL). |
204 | | virtual void markPhysRegUsed(MCRegister PhysReg) = 0; |
205 | | }; |
206 | | |
207 | | struct FormalArgHandler : public IncomingArgHandler { |
208 | | FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) |
209 | 14.8k | : IncomingArgHandler(MIRBuilder, MRI) {} |
210 | | |
211 | 21.5k | void markPhysRegUsed(MCRegister PhysReg) override { |
212 | 21.5k | MIRBuilder.getMRI()->addLiveIn(PhysReg); |
213 | 21.5k | MIRBuilder.getMBB().addLiveIn(PhysReg); |
214 | 21.5k | } |
215 | | }; |
216 | | |
217 | | struct CallReturnHandler : public IncomingArgHandler { |
218 | | CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, |
219 | | MachineInstrBuilder MIB) |
220 | 1.74k | : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {} |
221 | | |
222 | 964 | void markPhysRegUsed(MCRegister PhysReg) override { |
223 | 964 | MIB.addDef(PhysReg, RegState::Implicit); |
224 | 964 | } |
225 | | |
226 | | MachineInstrBuilder MIB; |
227 | | }; |
228 | | |
229 | | /// A special return arg handler for "returned" attribute arg calls. |
230 | | struct ReturnedArgCallReturnHandler : public CallReturnHandler { |
231 | | ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder, |
232 | | MachineRegisterInfo &MRI, |
233 | | MachineInstrBuilder MIB) |
234 | 873 | : CallReturnHandler(MIRBuilder, MRI, MIB) {} |
235 | | |
236 | 0 | void markPhysRegUsed(MCRegister PhysReg) override {} |
237 | | }; |
238 | | |
239 | | struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { |
240 | | OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, |
241 | | MachineInstrBuilder MIB, bool IsTailCall = false, |
242 | | int FPDiff = 0) |
243 | | : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall), |
244 | | FPDiff(FPDiff), |
245 | 15.6k | Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {} |
246 | | |
247 | | Register getStackAddress(uint64_t Size, int64_t Offset, |
248 | | MachinePointerInfo &MPO, |
249 | 29 | ISD::ArgFlagsTy Flags) override { |
250 | 29 | MachineFunction &MF = MIRBuilder.getMF(); |
251 | 29 | LLT p0 = LLT::pointer(0, 64); |
252 | 29 | LLT s64 = LLT::scalar(64); |
253 | | |
254 | 29 | if (IsTailCall) { |
255 | 0 | assert(!Flags.isByVal() && "byval unhandled with tail calls"); |
256 | | |
257 | 0 | Offset += FPDiff; |
258 | 0 | int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); |
259 | 0 | auto FIReg = MIRBuilder.buildFrameIndex(p0, FI); |
260 | 0 | MPO = MachinePointerInfo::getFixedStack(MF, FI); |
261 | 0 | return FIReg.getReg(0); |
262 | 0 | } |
263 | | |
264 | 29 | if (!SPReg) |
265 | 11 | SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0); |
266 | | |
267 | 29 | auto OffsetReg = MIRBuilder.buildConstant(s64, Offset); |
268 | | |
269 | 29 | auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg); |
270 | | |
271 | 29 | MPO = MachinePointerInfo::getStack(MF, Offset); |
272 | 29 | return AddrReg.getReg(0); |
273 | 29 | } |
274 | | |
275 | | /// We need to fixup the reported store size for certain value types because |
276 | | /// we invert the interpretation of ValVT and LocVT in certain cases. This is |
277 | | /// for compatability with the DAG call lowering implementation, which we're |
278 | | /// currently building on top of. |
279 | | LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, |
280 | 24 | ISD::ArgFlagsTy Flags) const override { |
281 | 24 | if (Flags.isPointer()) |
282 | 0 | return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); |
283 | 24 | return getStackValueStoreTypeHack(VA); |
284 | 24 | } |
285 | | |
286 | | void assignValueToReg(Register ValVReg, Register PhysReg, |
287 | 21.0k | const CCValAssign &VA) override { |
288 | 21.0k | MIB.addUse(PhysReg, RegState::Implicit); |
289 | 21.0k | Register ExtReg = extendRegister(ValVReg, VA); |
290 | 21.0k | MIRBuilder.buildCopy(PhysReg, ExtReg); |
291 | 21.0k | } |
292 | | |
293 | | void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, |
294 | | const MachinePointerInfo &MPO, |
295 | 24 | const CCValAssign &VA) override { |
296 | 24 | MachineFunction &MF = MIRBuilder.getMF(); |
297 | 24 | auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy, |
298 | 24 | inferAlignFromPtrInfo(MF, MPO)); |
299 | 24 | MIRBuilder.buildStore(ValVReg, Addr, *MMO); |
300 | 24 | } |
301 | | |
302 | | void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex, |
303 | | Register Addr, LLT MemTy, |
304 | | const MachinePointerInfo &MPO, |
305 | 24 | const CCValAssign &VA) override { |
306 | 24 | unsigned MaxSize = MemTy.getSizeInBytes() * 8; |
307 | | // For varargs, we always want to extend them to 8 bytes, in which case |
308 | | // we disable setting a max. |
309 | 24 | if (!Arg.IsFixed) |
310 | 0 | MaxSize = 0; |
311 | | |
312 | 24 | Register ValVReg = Arg.Regs[RegIndex]; |
313 | 24 | if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) { |
314 | 24 | MVT LocVT = VA.getLocVT(); |
315 | 24 | MVT ValVT = VA.getValVT(); |
316 | | |
317 | 24 | if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) { |
318 | 0 | std::swap(ValVT, LocVT); |
319 | 0 | MemTy = LLT(VA.getValVT()); |
320 | 0 | } |
321 | | |
322 | 24 | ValVReg = extendRegister(ValVReg, VA, MaxSize); |
323 | 24 | } else { |
324 | | // The store does not cover the full allocated stack slot. |
325 | 0 | MemTy = LLT(VA.getValVT()); |
326 | 0 | } |
327 | | |
328 | 24 | assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA); |
329 | 24 | } |
330 | | |
331 | | MachineInstrBuilder MIB; |
332 | | |
333 | | bool IsTailCall; |
334 | | |
335 | | /// For tail calls, the byte offset of the call's argument area from the |
336 | | /// callee's. Unused elsewhere. |
337 | | int FPDiff; |
338 | | |
339 | | // Cache the SP register vreg if we need it more than once in this call site. |
340 | | Register SPReg; |
341 | | |
342 | | const AArch64Subtarget &Subtarget; |
343 | | }; |
344 | | } // namespace |
345 | | |
346 | 17.1k | static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { |
347 | 17.1k | return (CallConv == CallingConv::Fast && TailCallOpt) || |
348 | 17.1k | CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; |
349 | 17.1k | } |
350 | | |
351 | | bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, |
352 | | const Value *Val, |
353 | | ArrayRef<Register> VRegs, |
354 | | FunctionLoweringInfo &FLI, |
355 | 15.9k | Register SwiftErrorVReg) const { |
356 | 15.9k | auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR); |
357 | 15.9k | assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && |
358 | 15.9k | "Return value without a vreg"); |
359 | | |
360 | 0 | bool Success = true; |
361 | 15.9k | if (!FLI.CanLowerReturn) { |
362 | 9 | insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister); |
363 | 15.9k | } else if (!VRegs.empty()) { |
364 | 13.3k | MachineFunction &MF = MIRBuilder.getMF(); |
365 | 13.3k | const Function &F = MF.getFunction(); |
366 | 13.3k | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
367 | | |
368 | 13.3k | MachineRegisterInfo &MRI = MF.getRegInfo(); |
369 | 13.3k | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
370 | 13.3k | CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); |
371 | 13.3k | auto &DL = F.getParent()->getDataLayout(); |
372 | 13.3k | LLVMContext &Ctx = Val->getType()->getContext(); |
373 | | |
374 | 13.3k | SmallVector<EVT, 4> SplitEVTs; |
375 | 13.3k | ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); |
376 | 13.3k | assert(VRegs.size() == SplitEVTs.size() && |
377 | 13.3k | "For each split Type there should be exactly one VReg."); |
378 | | |
379 | 0 | SmallVector<ArgInfo, 8> SplitArgs; |
380 | 13.3k | CallingConv::ID CC = F.getCallingConv(); |
381 | | |
382 | 26.8k | for (unsigned i = 0; i < SplitEVTs.size(); ++i) { |
383 | 13.4k | Register CurVReg = VRegs[i]; |
384 | 13.4k | ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0}; |
385 | 13.4k | setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); |
386 | | |
387 | | // i1 is a special case because SDAG i1 true is naturally zero extended |
388 | | // when widened using ANYEXT. We need to do it explicitly here. |
389 | 13.4k | auto &Flags = CurArgInfo.Flags[0]; |
390 | 13.4k | if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() && |
391 | 13.4k | !Flags.isZExt()) { |
392 | 2.78k | CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0); |
393 | 10.6k | } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) == |
394 | 10.6k | 1) { |
395 | | // Some types will need extending as specified by the CC. |
396 | 8.34k | MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]); |
397 | 8.34k | if (EVT(NewVT) != SplitEVTs[i]) { |
398 | 1.19k | unsigned ExtendOp = TargetOpcode::G_ANYEXT; |
399 | 1.19k | if (F.getAttributes().hasRetAttr(Attribute::SExt)) |
400 | 2 | ExtendOp = TargetOpcode::G_SEXT; |
401 | 1.18k | else if (F.getAttributes().hasRetAttr(Attribute::ZExt)) |
402 | 26 | ExtendOp = TargetOpcode::G_ZEXT; |
403 | | |
404 | 1.19k | LLT NewLLT(NewVT); |
405 | 1.19k | LLT OldLLT(MVT::getVT(CurArgInfo.Ty)); |
406 | 1.19k | CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx); |
407 | | // Instead of an extend, we might have a vector type which needs |
408 | | // padding with more elements, e.g. <2 x half> -> <4 x half>. |
409 | 1.19k | if (NewVT.isVector()) { |
410 | 562 | if (OldLLT.isVector()) { |
411 | 554 | if (NewLLT.getNumElements() > OldLLT.getNumElements()) { |
412 | | |
413 | 90 | CurVReg = |
414 | 90 | MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg) |
415 | 90 | .getReg(0); |
416 | 464 | } else { |
417 | | // Just do a vector extend. |
418 | 464 | CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}) |
419 | 464 | .getReg(0); |
420 | 464 | } |
421 | 554 | } else if (NewLLT.getNumElements() >= 2 && |
422 | 8 | NewLLT.getNumElements() <= 8) { |
423 | | // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't |
424 | | // have <1 x S> vector types in GISel we use a build_vector |
425 | | // instead of a vector merge/concat. |
426 | 8 | CurVReg = |
427 | 8 | MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg) |
428 | 8 | .getReg(0); |
429 | 8 | } else { |
430 | 0 | LLVM_DEBUG(dbgs() << "Could not handle ret ty\n"); |
431 | 0 | return false; |
432 | 0 | } |
433 | 629 | } else { |
434 | | // If the split EVT was a <1 x T> vector, and NewVT is T, then we |
435 | | // don't have to do anything since we don't distinguish between the |
436 | | // two. |
437 | 629 | if (NewLLT != MRI.getType(CurVReg)) { |
438 | | // A scalar extend. |
439 | 629 | CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}) |
440 | 629 | .getReg(0); |
441 | 629 | } |
442 | 629 | } |
443 | 1.19k | } |
444 | 8.34k | } |
445 | 13.4k | if (CurVReg != CurArgInfo.Regs[0]) { |
446 | 3.97k | CurArgInfo.Regs[0] = CurVReg; |
447 | | // Reset the arg flags after modifying CurVReg. |
448 | 3.97k | setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); |
449 | 3.97k | } |
450 | 13.4k | splitToValueTypes(CurArgInfo, SplitArgs, DL, CC); |
451 | 13.4k | } |
452 | | |
453 | 13.3k | AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget, |
454 | 13.3k | /*IsReturn*/ true); |
455 | 13.3k | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB); |
456 | 13.3k | Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs, |
457 | 13.3k | MIRBuilder, CC, F.isVarArg()); |
458 | 13.3k | } |
459 | | |
460 | 15.9k | if (SwiftErrorVReg) { |
461 | 0 | MIB.addUse(AArch64::X21, RegState::Implicit); |
462 | 0 | MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg); |
463 | 0 | } |
464 | | |
465 | 15.9k | MIRBuilder.insertInstr(MIB); |
466 | 15.9k | return Success; |
467 | 15.9k | } |
468 | | |
469 | | bool AArch64CallLowering::canLowerReturn(MachineFunction &MF, |
470 | | CallingConv::ID CallConv, |
471 | | SmallVectorImpl<BaseArgInfo> &Outs, |
472 | 16.8k | bool IsVarArg) const { |
473 | 16.8k | SmallVector<CCValAssign, 16> ArgLocs; |
474 | 16.8k | const auto &TLI = *getTLI<AArch64TargetLowering>(); |
475 | 16.8k | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, |
476 | 16.8k | MF.getFunction().getContext()); |
477 | | |
478 | 16.8k | return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv)); |
479 | 16.8k | } |
480 | | |
481 | | /// Helper function to compute forwarded registers for musttail calls. Computes |
482 | | /// the forwarded registers, sets MBB liveness, and emits COPY instructions that |
483 | | /// can be used to save + restore registers later. |
484 | | static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, |
485 | 14.8k | CCAssignFn *AssignFn) { |
486 | 14.8k | MachineBasicBlock &MBB = MIRBuilder.getMBB(); |
487 | 14.8k | MachineFunction &MF = MIRBuilder.getMF(); |
488 | 14.8k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
489 | | |
490 | 14.8k | if (!MFI.hasMustTailInVarArgFunc()) |
491 | 14.8k | return; |
492 | | |
493 | 0 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
494 | 0 | const Function &F = MF.getFunction(); |
495 | 0 | assert(F.isVarArg() && "Expected F to be vararg?"); |
496 | | |
497 | | // Compute the set of forwarded registers. The rest are scratch. |
498 | 0 | SmallVector<CCValAssign, 16> ArgLocs; |
499 | 0 | CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs, |
500 | 0 | F.getContext()); |
501 | 0 | SmallVector<MVT, 2> RegParmTypes; |
502 | 0 | RegParmTypes.push_back(MVT::i64); |
503 | 0 | RegParmTypes.push_back(MVT::f128); |
504 | | |
505 | | // Later on, we can use this vector to restore the registers if necessary. |
506 | 0 | SmallVectorImpl<ForwardedRegister> &Forwards = |
507 | 0 | FuncInfo->getForwardedMustTailRegParms(); |
508 | 0 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn); |
509 | | |
510 | | // Conservatively forward X8, since it might be used for an aggregate |
511 | | // return. |
512 | 0 | if (!CCInfo.isAllocated(AArch64::X8)) { |
513 | 0 | Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); |
514 | 0 | Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); |
515 | 0 | } |
516 | | |
517 | | // Add the forwards to the MachineBasicBlock and MachineFunction. |
518 | 0 | for (const auto &F : Forwards) { |
519 | 0 | MBB.addLiveIn(F.PReg); |
520 | 0 | MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg)); |
521 | 0 | } |
522 | 0 | } |
523 | | |
524 | 14.8k | bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { |
525 | 14.8k | auto &F = MF.getFunction(); |
526 | 14.8k | if (F.getReturnType()->isScalableTy() || |
527 | 18.9k | llvm::any_of(F.args(), [](const Argument &A) { |
528 | 18.9k | return A.getType()->isScalableTy(); |
529 | 18.9k | })) |
530 | 0 | return true; |
531 | 14.8k | const auto &ST = MF.getSubtarget<AArch64Subtarget>(); |
532 | 14.8k | if (!ST.hasNEON() || !ST.hasFPARMv8()) { |
533 | 0 | LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n"); |
534 | 0 | return true; |
535 | 0 | } |
536 | | |
537 | 14.8k | SMEAttrs Attrs(F); |
538 | 14.8k | if (Attrs.hasZAState() || Attrs.hasStreamingInterfaceOrBody() || |
539 | 14.8k | Attrs.hasStreamingCompatibleInterface()) |
540 | 0 | return true; |
541 | | |
542 | 14.8k | return false; |
543 | 14.8k | } |
544 | | |
545 | | void AArch64CallLowering::saveVarArgRegisters( |
546 | | MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler, |
547 | 2 | CCState &CCInfo) const { |
548 | 2 | auto GPRArgRegs = AArch64::getGPRArgRegs(); |
549 | 2 | auto FPRArgRegs = AArch64::getFPRArgRegs(); |
550 | | |
551 | 2 | MachineFunction &MF = MIRBuilder.getMF(); |
552 | 2 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
553 | 2 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
554 | 2 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
555 | 2 | auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
556 | 2 | bool IsWin64CC = |
557 | 2 | Subtarget.isCallingConvWin64(CCInfo.getCallingConv()); |
558 | 2 | const LLT p0 = LLT::pointer(0, 64); |
559 | 2 | const LLT s64 = LLT::scalar(64); |
560 | | |
561 | 2 | unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs); |
562 | 2 | unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1; |
563 | | |
564 | 2 | unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR); |
565 | 2 | int GPRIdx = 0; |
566 | 2 | if (GPRSaveSize != 0) { |
567 | 2 | if (IsWin64CC) { |
568 | 0 | GPRIdx = MFI.CreateFixedObject(GPRSaveSize, |
569 | 0 | -static_cast<int>(GPRSaveSize), false); |
570 | 0 | if (GPRSaveSize & 15) |
571 | | // The extra size here, if triggered, will always be 8. |
572 | 0 | MFI.CreateFixedObject(16 - (GPRSaveSize & 15), |
573 | 0 | -static_cast<int>(alignTo(GPRSaveSize, 16)), |
574 | 0 | false); |
575 | 0 | } else |
576 | 2 | GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false); |
577 | | |
578 | 2 | auto FIN = MIRBuilder.buildFrameIndex(p0, GPRIdx); |
579 | 2 | auto Offset = |
580 | 2 | MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 8); |
581 | | |
582 | 16 | for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) { |
583 | 14 | Register Val = MRI.createGenericVirtualRegister(s64); |
584 | 14 | Handler.assignValueToReg( |
585 | 14 | Val, GPRArgRegs[i], |
586 | 14 | CCValAssign::getReg(i + MF.getFunction().getNumOperands(), MVT::i64, |
587 | 14 | GPRArgRegs[i], MVT::i64, CCValAssign::Full)); |
588 | 14 | auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack( |
589 | 0 | MF, GPRIdx, (i - FirstVariadicGPR) * 8) |
590 | 14 | : MachinePointerInfo::getStack(MF, i * 8); |
591 | 14 | MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO)); |
592 | | |
593 | 14 | FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0), |
594 | 14 | FIN.getReg(0), Offset); |
595 | 14 | } |
596 | 2 | } |
597 | 2 | FuncInfo->setVarArgsGPRIndex(GPRIdx); |
598 | 2 | FuncInfo->setVarArgsGPRSize(GPRSaveSize); |
599 | | |
600 | 2 | if (Subtarget.hasFPARMv8() && !IsWin64CC) { |
601 | 2 | unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs); |
602 | | |
603 | 2 | unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR); |
604 | 2 | int FPRIdx = 0; |
605 | 2 | if (FPRSaveSize != 0) { |
606 | 2 | FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false); |
607 | | |
608 | 2 | auto FIN = MIRBuilder.buildFrameIndex(p0, FPRIdx); |
609 | 2 | auto Offset = |
610 | 2 | MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 16); |
611 | | |
612 | 18 | for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) { |
613 | 16 | Register Val = MRI.createGenericVirtualRegister(LLT::scalar(128)); |
614 | 16 | Handler.assignValueToReg( |
615 | 16 | Val, FPRArgRegs[i], |
616 | 16 | CCValAssign::getReg( |
617 | 16 | i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs, |
618 | 16 | MVT::f128, FPRArgRegs[i], MVT::f128, CCValAssign::Full)); |
619 | | |
620 | 16 | auto MPO = MachinePointerInfo::getStack(MF, i * 16); |
621 | 16 | MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO)); |
622 | | |
623 | 16 | FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0), |
624 | 16 | FIN.getReg(0), Offset); |
625 | 16 | } |
626 | 2 | } |
627 | 2 | FuncInfo->setVarArgsFPRIndex(FPRIdx); |
628 | 2 | FuncInfo->setVarArgsFPRSize(FPRSaveSize); |
629 | 2 | } |
630 | 2 | } |
631 | | |
632 | | bool AArch64CallLowering::lowerFormalArguments( |
633 | | MachineIRBuilder &MIRBuilder, const Function &F, |
634 | 14.8k | ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { |
635 | 14.8k | MachineFunction &MF = MIRBuilder.getMF(); |
636 | 14.8k | MachineBasicBlock &MBB = MIRBuilder.getMBB(); |
637 | 14.8k | MachineRegisterInfo &MRI = MF.getRegInfo(); |
638 | 14.8k | auto &DL = F.getParent()->getDataLayout(); |
639 | 14.8k | auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
640 | | // TODO: Support Arm64EC |
641 | 14.8k | bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv()) && !Subtarget.isWindowsArm64EC(); |
642 | | |
643 | 14.8k | SmallVector<ArgInfo, 8> SplitArgs; |
644 | 14.8k | SmallVector<std::pair<Register, Register>> BoolArgs; |
645 | | |
646 | | // Insert the hidden sret parameter if the return value won't fit in the |
647 | | // return registers. |
648 | 14.8k | if (!FLI.CanLowerReturn) |
649 | 9 | insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL); |
650 | | |
651 | 14.8k | unsigned i = 0; |
652 | 18.9k | for (auto &Arg : F.args()) { |
653 | 18.9k | if (DL.getTypeStoreSize(Arg.getType()).isZero()) |
654 | 0 | continue; |
655 | | |
656 | 18.9k | ArgInfo OrigArg{VRegs[i], Arg, i}; |
657 | 18.9k | setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F); |
658 | | |
659 | | // i1 arguments are zero-extended to i8 by the caller. Emit a |
660 | | // hint to reflect this. |
661 | 18.9k | if (OrigArg.Ty->isIntegerTy(1)) { |
662 | 716 | assert(OrigArg.Regs.size() == 1 && |
663 | 716 | MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 && |
664 | 716 | "Unexpected registers used for i1 arg"); |
665 | | |
666 | 0 | auto &Flags = OrigArg.Flags[0]; |
667 | 716 | if (!Flags.isZExt() && !Flags.isSExt()) { |
668 | | // Lower i1 argument as i8, and insert AssertZExt + Trunc later. |
669 | 626 | Register OrigReg = OrigArg.Regs[0]; |
670 | 626 | Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8)); |
671 | 626 | OrigArg.Regs[0] = WideReg; |
672 | 626 | BoolArgs.push_back({OrigReg, WideReg}); |
673 | 626 | } |
674 | 716 | } |
675 | | |
676 | 18.9k | if (Arg.hasAttribute(Attribute::SwiftAsync)) |
677 | 0 | MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); |
678 | | |
679 | 18.9k | splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv()); |
680 | 18.9k | ++i; |
681 | 18.9k | } |
682 | | |
683 | 14.8k | if (!MBB.empty()) |
684 | 2 | MIRBuilder.setInstr(*MBB.begin()); |
685 | | |
686 | 14.8k | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
687 | 14.8k | CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), IsWin64 && F.isVarArg()); |
688 | | |
689 | 14.8k | AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn); |
690 | 14.8k | FormalArgHandler Handler(MIRBuilder, MRI); |
691 | 14.8k | SmallVector<CCValAssign, 16> ArgLocs; |
692 | 14.8k | CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); |
693 | 14.8k | if (!determineAssignments(Assigner, SplitArgs, CCInfo) || |
694 | 14.8k | !handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, MIRBuilder)) |
695 | 0 | return false; |
696 | | |
697 | 14.8k | if (!BoolArgs.empty()) { |
698 | 626 | for (auto &KV : BoolArgs) { |
699 | 626 | Register OrigReg = KV.first; |
700 | 626 | Register WideReg = KV.second; |
701 | 626 | LLT WideTy = MRI.getType(WideReg); |
702 | 626 | assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 && |
703 | 626 | "Unexpected bit size of a bool arg"); |
704 | 0 | MIRBuilder.buildTrunc( |
705 | 626 | OrigReg, MIRBuilder.buildAssertZExt(WideTy, WideReg, 1).getReg(0)); |
706 | 626 | } |
707 | 459 | } |
708 | | |
709 | 14.8k | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
710 | 14.8k | uint64_t StackSize = Assigner.StackSize; |
711 | 14.8k | if (F.isVarArg()) { |
712 | 2 | if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) { |
713 | | // The AAPCS variadic function ABI is identical to the non-variadic |
714 | | // one. As a result there may be more arguments in registers and we should |
715 | | // save them for future reference. |
716 | | // Win64 variadic functions also pass arguments in registers, but all |
717 | | // float arguments are passed in integer registers. |
718 | 2 | saveVarArgRegisters(MIRBuilder, Handler, CCInfo); |
719 | 2 | } else if (Subtarget.isWindowsArm64EC()) { |
720 | 0 | return false; |
721 | 0 | } |
722 | | |
723 | | // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. |
724 | 2 | StackSize = alignTo(Assigner.StackSize, Subtarget.isTargetILP32() ? 4 : 8); |
725 | | |
726 | 2 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
727 | 2 | FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackSize, true)); |
728 | 2 | } |
729 | | |
730 | 14.8k | if (doesCalleeRestoreStack(F.getCallingConv(), |
731 | 14.8k | MF.getTarget().Options.GuaranteedTailCallOpt)) { |
732 | | // We have a non-standard ABI, so why not make full use of the stack that |
733 | | // we're going to pop? It must be aligned to 16 B in any case. |
734 | 0 | StackSize = alignTo(StackSize, 16); |
735 | | |
736 | | // If we're expected to restore the stack (e.g. fastcc), then we'll be |
737 | | // adding a multiple of 16. |
738 | 0 | FuncInfo->setArgumentStackToRestore(StackSize); |
739 | | |
740 | | // Our own callers will guarantee that the space is free by giving an |
741 | | // aligned value to CALLSEQ_START. |
742 | 0 | } |
743 | | |
744 | | // When we tail call, we need to check if the callee's arguments |
745 | | // will fit on the caller's stack. So, whenever we lower formal arguments, |
746 | | // we should keep track of this information, since we might lower a tail call |
747 | | // in this function later. |
748 | 14.8k | FuncInfo->setBytesInStackArgArea(StackSize); |
749 | | |
750 | 14.8k | if (Subtarget.hasCustomCallingConv()) |
751 | 0 | Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); |
752 | | |
753 | 14.8k | handleMustTailForwardedRegisters(MIRBuilder, AssignFn); |
754 | | |
755 | | // Move back to the end of the basic block. |
756 | 14.8k | MIRBuilder.setMBB(MBB); |
757 | | |
758 | 14.8k | return true; |
759 | 14.8k | } |
760 | | |
761 | | /// Return true if the calling convention is one that we can guarantee TCO for. |
762 | 71 | static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { |
763 | 71 | return (CC == CallingConv::Fast && GuaranteeTailCalls) || |
764 | 71 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
765 | 71 | } |
766 | | |
767 | | /// Return true if we might ever do TCO for calls with this calling convention. |
768 | 71 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
769 | 71 | switch (CC) { |
770 | 71 | case CallingConv::C: |
771 | 71 | case CallingConv::PreserveMost: |
772 | 71 | case CallingConv::PreserveAll: |
773 | 71 | case CallingConv::Swift: |
774 | 71 | case CallingConv::SwiftTail: |
775 | 71 | case CallingConv::Tail: |
776 | 71 | case CallingConv::Fast: |
777 | 71 | return true; |
778 | 0 | default: |
779 | 0 | return false; |
780 | 71 | } |
781 | 71 | } |
782 | | |
783 | | /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for |
784 | | /// CC. |
785 | | static std::pair<CCAssignFn *, CCAssignFn *> |
786 | 2.36k | getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { |
787 | 2.36k | return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)}; |
788 | 2.36k | } |
789 | | |
790 | | bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( |
791 | | CallLoweringInfo &Info, MachineFunction &MF, |
792 | 71 | SmallVectorImpl<ArgInfo> &InArgs) const { |
793 | 71 | const Function &CallerF = MF.getFunction(); |
794 | 71 | CallingConv::ID CalleeCC = Info.CallConv; |
795 | 71 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
796 | | |
797 | | // If the calling conventions match, then everything must be the same. |
798 | 71 | if (CalleeCC == CallerCC) |
799 | 71 | return true; |
800 | | |
801 | | // Check if the caller and callee will handle arguments in the same way. |
802 | 0 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
803 | 0 | CCAssignFn *CalleeAssignFnFixed; |
804 | 0 | CCAssignFn *CalleeAssignFnVarArg; |
805 | 0 | std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) = |
806 | 0 | getAssignFnsForCC(CalleeCC, TLI); |
807 | |
|
808 | 0 | CCAssignFn *CallerAssignFnFixed; |
809 | 0 | CCAssignFn *CallerAssignFnVarArg; |
810 | 0 | std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) = |
811 | 0 | getAssignFnsForCC(CallerCC, TLI); |
812 | |
|
813 | 0 | AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed, |
814 | 0 | CalleeAssignFnVarArg); |
815 | 0 | AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed, |
816 | 0 | CallerAssignFnVarArg); |
817 | |
|
818 | 0 | if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner)) |
819 | 0 | return false; |
820 | | |
821 | | // Make sure that the caller and callee preserve all of the same registers. |
822 | 0 | auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); |
823 | 0 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
824 | 0 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
825 | 0 | if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) { |
826 | 0 | TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved); |
827 | 0 | TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved); |
828 | 0 | } |
829 | |
|
830 | 0 | return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved); |
831 | 0 | } |
832 | | |
833 | | bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( |
834 | | CallLoweringInfo &Info, MachineFunction &MF, |
835 | 71 | SmallVectorImpl<ArgInfo> &OrigOutArgs) const { |
836 | | // If there are no outgoing arguments, then we are done. |
837 | 71 | if (OrigOutArgs.empty()) |
838 | 2 | return true; |
839 | | |
840 | 69 | const Function &CallerF = MF.getFunction(); |
841 | 69 | LLVMContext &Ctx = CallerF.getContext(); |
842 | 69 | CallingConv::ID CalleeCC = Info.CallConv; |
843 | 69 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
844 | 69 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
845 | 69 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
846 | | |
847 | 69 | CCAssignFn *AssignFnFixed; |
848 | 69 | CCAssignFn *AssignFnVarArg; |
849 | 69 | std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); |
850 | | |
851 | | // We have outgoing arguments. Make sure that we can tail call with them. |
852 | 69 | SmallVector<CCValAssign, 16> OutLocs; |
853 | 69 | CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx); |
854 | | |
855 | 69 | AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, |
856 | 69 | Subtarget, /*IsReturn*/ false); |
857 | | // determineAssignments() may modify argument flags, so make a copy. |
858 | 69 | SmallVector<ArgInfo, 8> OutArgs; |
859 | 69 | append_range(OutArgs, OrigOutArgs); |
860 | 69 | if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) { |
861 | 0 | LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); |
862 | 0 | return false; |
863 | 0 | } |
864 | | |
865 | | // Make sure that they can fit on the caller's stack. |
866 | 69 | const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
867 | 69 | if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) { |
868 | 0 | LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n"); |
869 | 0 | return false; |
870 | 0 | } |
871 | | |
872 | | // Verify that the parameters in callee-saved registers match. |
873 | | // TODO: Port this over to CallLowering as general code once swiftself is |
874 | | // supported. |
875 | 69 | auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); |
876 | 69 | const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); |
877 | 69 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
878 | | |
879 | 69 | if (Info.IsVarArg) { |
880 | | // Be conservative and disallow variadic memory operands to match SDAG's |
881 | | // behaviour. |
882 | | // FIXME: If the caller's calling convention is C, then we can |
883 | | // potentially use its argument area. However, for cases like fastcc, |
884 | | // we can't do anything. |
885 | 0 | for (unsigned i = 0; i < OutLocs.size(); ++i) { |
886 | 0 | auto &ArgLoc = OutLocs[i]; |
887 | 0 | if (ArgLoc.isRegLoc()) |
888 | 0 | continue; |
889 | | |
890 | 0 | LLVM_DEBUG( |
891 | 0 | dbgs() |
892 | 0 | << "... Cannot tail call vararg function with stack arguments\n"); |
893 | 0 | return false; |
894 | 0 | } |
895 | 0 | } |
896 | | |
897 | 69 | return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs); |
898 | 69 | } |
899 | | |
900 | | bool AArch64CallLowering::isEligibleForTailCallOptimization( |
901 | | MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, |
902 | | SmallVectorImpl<ArgInfo> &InArgs, |
903 | 2.29k | SmallVectorImpl<ArgInfo> &OutArgs) const { |
904 | | |
905 | | // Must pass all target-independent checks in order to tail call optimize. |
906 | 2.29k | if (!Info.IsTailCall) |
907 | 2.22k | return false; |
908 | | |
909 | 71 | CallingConv::ID CalleeCC = Info.CallConv; |
910 | 71 | MachineFunction &MF = MIRBuilder.getMF(); |
911 | 71 | const Function &CallerF = MF.getFunction(); |
912 | | |
913 | 71 | LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n"); |
914 | | |
915 | 71 | if (Info.SwiftErrorVReg) { |
916 | | // TODO: We should handle this. |
917 | | // Note that this is also handled by the check for no outgoing arguments. |
918 | | // Proactively disabling this though, because the swifterror handling in |
919 | | // lowerCall inserts a COPY *after* the location of the call. |
920 | 0 | LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n"); |
921 | 0 | return false; |
922 | 0 | } |
923 | | |
924 | 71 | if (!mayTailCallThisCC(CalleeCC)) { |
925 | 0 | LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n"); |
926 | 0 | return false; |
927 | 0 | } |
928 | | |
929 | | // Byval parameters hand the function a pointer directly into the stack area |
930 | | // we want to reuse during a tail call. Working around this *is* possible (see |
931 | | // X86). |
932 | | // |
933 | | // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try |
934 | | // it? |
935 | | // |
936 | | // On Windows, "inreg" attributes signify non-aggregate indirect returns. |
937 | | // In this case, it is necessary to save/restore X0 in the callee. Tail |
938 | | // call opt interferes with this. So we disable tail call opt when the |
939 | | // caller has an argument with "inreg" attribute. |
940 | | // |
941 | | // FIXME: Check whether the callee also has an "inreg" argument. |
942 | | // |
943 | | // When the caller has a swifterror argument, we don't want to tail call |
944 | | // because would have to move into the swifterror register before the |
945 | | // tail call. |
946 | 71 | if (any_of(CallerF.args(), [](const Argument &A) { |
947 | 64 | return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr(); |
948 | 64 | })) { |
949 | 0 | LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, " |
950 | 0 | "inreg, or swifterror arguments\n"); |
951 | 0 | return false; |
952 | 0 | } |
953 | | |
954 | | // Externally-defined functions with weak linkage should not be |
955 | | // tail-called on AArch64 when the OS does not support dynamic |
956 | | // pre-emption of symbols, as the AAELF spec requires normal calls |
957 | | // to undefined weak functions to be replaced with a NOP or jump to the |
958 | | // next instruction. The behaviour of branch instructions in this |
959 | | // situation (as used for tail calls) is implementation-defined, so we |
960 | | // cannot rely on the linker replacing the tail call with a return. |
961 | 71 | if (Info.Callee.isGlobal()) { |
962 | 58 | const GlobalValue *GV = Info.Callee.getGlobal(); |
963 | 58 | const Triple &TT = MF.getTarget().getTargetTriple(); |
964 | 58 | if (GV->hasExternalWeakLinkage() && |
965 | 58 | (!TT.isOSWindows() || TT.isOSBinFormatELF() || |
966 | 0 | TT.isOSBinFormatMachO())) { |
967 | 0 | LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function " |
968 | 0 | "with weak linkage for this OS.\n"); |
969 | 0 | return false; |
970 | 0 | } |
971 | 58 | } |
972 | | |
973 | | // If we have -tailcallopt, then we're done. |
974 | 71 | if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt)) |
975 | 0 | return CalleeCC == CallerF.getCallingConv(); |
976 | | |
977 | | // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). |
978 | | // Try to find cases where we can do that. |
979 | | |
980 | | // I want anyone implementing a new calling convention to think long and hard |
981 | | // about this assert. |
982 | 71 | assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && |
983 | 71 | "Unexpected variadic calling convention"); |
984 | | |
985 | | // Verify that the incoming and outgoing arguments from the callee are |
986 | | // safe to tail call. |
987 | 71 | if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { |
988 | 0 | LLVM_DEBUG( |
989 | 0 | dbgs() |
990 | 0 | << "... Caller and callee have incompatible calling conventions.\n"); |
991 | 0 | return false; |
992 | 0 | } |
993 | | |
994 | 71 | if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs)) |
995 | 0 | return false; |
996 | | |
997 | 71 | LLVM_DEBUG( |
998 | 71 | dbgs() << "... Call is eligible for tail call optimization.\n"); |
999 | 71 | return true; |
1000 | 71 | } |
1001 | | |
1002 | | static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, |
1003 | 2.29k | bool IsTailCall) { |
1004 | 2.29k | if (!IsTailCall) |
1005 | 2.22k | return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL; |
1006 | | |
1007 | 71 | if (!IsIndirect) |
1008 | 71 | return AArch64::TCRETURNdi; |
1009 | | |
1010 | | // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use |
1011 | | // x16 or x17. |
1012 | 0 | if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) |
1013 | 0 | return AArch64::TCRETURNriBTI; |
1014 | | |
1015 | 0 | return AArch64::TCRETURNri; |
1016 | 0 | } |
1017 | | |
1018 | | static const uint32_t * |
1019 | | getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs, |
1020 | | AArch64CallLowering::CallLoweringInfo &Info, |
1021 | 2.29k | const AArch64RegisterInfo &TRI, MachineFunction &MF) { |
1022 | 2.29k | const uint32_t *Mask; |
1023 | 2.29k | if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) { |
1024 | | // For 'this' returns, use the X0-preserving mask if applicable |
1025 | 48 | Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv); |
1026 | 48 | if (!Mask) { |
1027 | 0 | OutArgs[0].Flags[0].setReturned(false); |
1028 | 0 | Mask = TRI.getCallPreservedMask(MF, Info.CallConv); |
1029 | 0 | } |
1030 | 2.24k | } else { |
1031 | 2.24k | Mask = TRI.getCallPreservedMask(MF, Info.CallConv); |
1032 | 2.24k | } |
1033 | 2.29k | return Mask; |
1034 | 2.29k | } |
1035 | | |
1036 | | bool AArch64CallLowering::lowerTailCall( |
1037 | | MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, |
1038 | 71 | SmallVectorImpl<ArgInfo> &OutArgs) const { |
1039 | 71 | MachineFunction &MF = MIRBuilder.getMF(); |
1040 | 71 | const Function &F = MF.getFunction(); |
1041 | 71 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1042 | 71 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
1043 | 71 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
1044 | | |
1045 | | // True when we're tail calling, but without -tailcallopt. |
1046 | 71 | bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt && |
1047 | 71 | Info.CallConv != CallingConv::Tail && |
1048 | 71 | Info.CallConv != CallingConv::SwiftTail; |
1049 | | |
1050 | | // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64 |
1051 | | // register class. Until we can do that, we should fall back here. |
1052 | 71 | if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) { |
1053 | 0 | LLVM_DEBUG( |
1054 | 0 | dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n"); |
1055 | 0 | return false; |
1056 | 0 | } |
1057 | | |
1058 | | // Find out which ABI gets to decide where things go. |
1059 | 71 | CallingConv::ID CalleeCC = Info.CallConv; |
1060 | 71 | CCAssignFn *AssignFnFixed; |
1061 | 71 | CCAssignFn *AssignFnVarArg; |
1062 | 71 | std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); |
1063 | | |
1064 | 71 | MachineInstrBuilder CallSeqStart; |
1065 | 71 | if (!IsSibCall) |
1066 | 0 | CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); |
1067 | | |
1068 | 71 | unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true); |
1069 | 71 | auto MIB = MIRBuilder.buildInstrNoInsert(Opc); |
1070 | 71 | MIB.add(Info.Callee); |
1071 | | |
1072 | | // Byte offset for the tail call. When we are sibcalling, this will always |
1073 | | // be 0. |
1074 | 71 | MIB.addImm(0); |
1075 | | |
1076 | | // Tell the call which registers are clobbered. |
1077 | 71 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
1078 | 71 | auto TRI = Subtarget.getRegisterInfo(); |
1079 | 71 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC); |
1080 | 71 | if (Subtarget.hasCustomCallingConv()) |
1081 | 0 | TRI->UpdateCustomCallPreservedMask(MF, &Mask); |
1082 | 71 | MIB.addRegMask(Mask); |
1083 | | |
1084 | 71 | if (Info.CFIType) |
1085 | 0 | MIB->setCFIType(MF, Info.CFIType->getZExtValue()); |
1086 | | |
1087 | 71 | if (TRI->isAnyArgRegReserved(MF)) |
1088 | 0 | TRI->emitReservedArgRegCallError(MF); |
1089 | | |
1090 | | // FPDiff is the byte offset of the call's argument area from the callee's. |
1091 | | // Stores to callee stack arguments will be placed in FixedStackSlots offset |
1092 | | // by this amount for a tail call. In a sibling call it must be 0 because the |
1093 | | // caller will deallocate the entire stack and the callee still expects its |
1094 | | // arguments to begin at SP+0. |
1095 | 71 | int FPDiff = 0; |
1096 | | |
1097 | | // This will be 0 for sibcalls, potentially nonzero for tail calls produced |
1098 | | // by -tailcallopt. For sibcalls, the memory operands for the call are |
1099 | | // already available in the caller's incoming argument space. |
1100 | 71 | unsigned NumBytes = 0; |
1101 | 71 | if (!IsSibCall) { |
1102 | | // We aren't sibcalling, so we need to compute FPDiff. We need to do this |
1103 | | // before handling assignments, because FPDiff must be known for memory |
1104 | | // arguments. |
1105 | 0 | unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); |
1106 | 0 | SmallVector<CCValAssign, 16> OutLocs; |
1107 | 0 | CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); |
1108 | |
|
1109 | 0 | AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, |
1110 | 0 | Subtarget, /*IsReturn*/ false); |
1111 | 0 | if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) |
1112 | 0 | return false; |
1113 | | |
1114 | | // The callee will pop the argument stack as a tail call. Thus, we must |
1115 | | // keep it 16-byte aligned. |
1116 | 0 | NumBytes = alignTo(OutInfo.getStackSize(), 16); |
1117 | | |
1118 | | // FPDiff will be negative if this tail call requires more space than we |
1119 | | // would automatically have in our incoming argument space. Positive if we |
1120 | | // actually shrink the stack. |
1121 | 0 | FPDiff = NumReusableBytes - NumBytes; |
1122 | | |
1123 | | // Update the required reserved area if this is the tail call requiring the |
1124 | | // most argument stack space. |
1125 | 0 | if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) |
1126 | 0 | FuncInfo->setTailCallReservedStack(-FPDiff); |
1127 | | |
1128 | | // The stack pointer must be 16-byte aligned at all times it's used for a |
1129 | | // memory operation, which in practice means at *all* times and in |
1130 | | // particular across call boundaries. Therefore our own arguments started at |
1131 | | // a 16-byte aligned SP and the delta applied for the tail call should |
1132 | | // satisfy the same constraint. |
1133 | 0 | assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); |
1134 | 0 | } |
1135 | | |
1136 | 71 | const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); |
1137 | | |
1138 | 71 | AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, |
1139 | 71 | Subtarget, /*IsReturn*/ false); |
1140 | | |
1141 | | // Do the actual argument marshalling. |
1142 | 71 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, |
1143 | 71 | /*IsTailCall*/ true, FPDiff); |
1144 | 71 | if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder, |
1145 | 71 | CalleeCC, Info.IsVarArg)) |
1146 | 0 | return false; |
1147 | | |
1148 | 71 | Mask = getMaskForArgs(OutArgs, Info, *TRI, MF); |
1149 | | |
1150 | 71 | if (Info.IsVarArg && Info.IsMustTailCall) { |
1151 | | // Now we know what's being passed to the function. Add uses to the call for |
1152 | | // the forwarded registers that we *aren't* passing as parameters. This will |
1153 | | // preserve the copies we build earlier. |
1154 | 0 | for (const auto &F : Forwards) { |
1155 | 0 | Register ForwardedReg = F.PReg; |
1156 | | // If the register is already passed, or aliases a register which is |
1157 | | // already being passed, then skip it. |
1158 | 0 | if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) { |
1159 | 0 | if (!Use.isReg()) |
1160 | 0 | return false; |
1161 | 0 | return TRI->regsOverlap(Use.getReg(), ForwardedReg); |
1162 | 0 | })) |
1163 | 0 | continue; |
1164 | | |
1165 | | // We aren't passing it already, so we should add it to the call. |
1166 | 0 | MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg)); |
1167 | 0 | MIB.addReg(ForwardedReg, RegState::Implicit); |
1168 | 0 | } |
1169 | 0 | } |
1170 | | |
1171 | | // If we have -tailcallopt, we need to adjust the stack. We'll do the call |
1172 | | // sequence start and end here. |
1173 | 71 | if (!IsSibCall) { |
1174 | 0 | MIB->getOperand(1).setImm(FPDiff); |
1175 | 0 | CallSeqStart.addImm(0).addImm(0); |
1176 | | // End the call sequence *before* emitting the call. Normally, we would |
1177 | | // tidy the frame up after the call. However, here, we've laid out the |
1178 | | // parameters so that when SP is reset, they will be in the correct |
1179 | | // location. |
1180 | 0 | MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0); |
1181 | 0 | } |
1182 | | |
1183 | | // Now we can add the actual call instruction to the correct basic block. |
1184 | 71 | MIRBuilder.insertInstr(MIB); |
1185 | | |
1186 | | // If Callee is a reg, since it is used by a target specific instruction, |
1187 | | // it must have a register class matching the constraint of that instruction. |
1188 | 71 | if (MIB->getOperand(0).isReg()) |
1189 | 0 | constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), |
1190 | 0 | *MF.getSubtarget().getRegBankInfo(), *MIB, |
1191 | 0 | MIB->getDesc(), MIB->getOperand(0), 0); |
1192 | | |
1193 | 71 | MF.getFrameInfo().setHasTailCall(); |
1194 | 71 | Info.LoweredTailCall = true; |
1195 | 71 | return true; |
1196 | 71 | } |
1197 | | |
1198 | | bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, |
1199 | 2.29k | CallLoweringInfo &Info) const { |
1200 | 2.29k | MachineFunction &MF = MIRBuilder.getMF(); |
1201 | 2.29k | const Function &F = MF.getFunction(); |
1202 | 2.29k | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1203 | 2.29k | auto &DL = F.getParent()->getDataLayout(); |
1204 | 2.29k | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
1205 | 2.29k | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
1206 | | |
1207 | | // Arm64EC has extra requirements for varargs calls; bail out for now. |
1208 | 2.29k | if (Info.IsVarArg && Subtarget.isWindowsArm64EC()) |
1209 | 0 | return false; |
1210 | | |
1211 | 2.29k | SmallVector<ArgInfo, 8> OutArgs; |
1212 | 2.71k | for (auto &OrigArg : Info.OrigArgs) { |
1213 | 2.71k | splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv); |
1214 | | // AAPCS requires that we zero-extend i1 to 8 bits by the caller. |
1215 | 2.71k | auto &Flags = OrigArg.Flags[0]; |
1216 | 2.71k | if (OrigArg.Ty->isIntegerTy(1) && !Flags.isSExt() && !Flags.isZExt()) { |
1217 | 696 | ArgInfo &OutArg = OutArgs.back(); |
1218 | 696 | assert(OutArg.Regs.size() == 1 && |
1219 | 696 | MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 && |
1220 | 696 | "Unexpected registers used for i1 arg"); |
1221 | | |
1222 | | // We cannot use a ZExt ArgInfo flag here, because it will |
1223 | | // zero-extend the argument to i32 instead of just i8. |
1224 | 0 | OutArg.Regs[0] = |
1225 | 696 | MIRBuilder.buildZExt(LLT::scalar(8), OutArg.Regs[0]).getReg(0); |
1226 | 696 | LLVMContext &Ctx = MF.getFunction().getContext(); |
1227 | 696 | OutArg.Ty = Type::getInt8Ty(Ctx); |
1228 | 696 | } |
1229 | 2.71k | } |
1230 | | |
1231 | 2.29k | SmallVector<ArgInfo, 8> InArgs; |
1232 | 2.29k | if (!Info.OrigRet.Ty->isVoidTy()) |
1233 | 942 | splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv); |
1234 | | |
1235 | | // If we can lower as a tail call, do that instead. |
1236 | 2.29k | bool CanTailCallOpt = |
1237 | 2.29k | isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs); |
1238 | | |
1239 | | // We must emit a tail call if we have musttail. |
1240 | 2.29k | if (Info.IsMustTailCall && !CanTailCallOpt) { |
1241 | | // There are types of incoming/outgoing arguments we can't handle yet, so |
1242 | | // it doesn't make sense to actually die here like in ISelLowering. Instead, |
1243 | | // fall back to SelectionDAG and let it try to handle this. |
1244 | 0 | LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n"); |
1245 | 0 | return false; |
1246 | 0 | } |
1247 | | |
1248 | 2.29k | Info.IsTailCall = CanTailCallOpt; |
1249 | 2.29k | if (CanTailCallOpt) |
1250 | 71 | return lowerTailCall(MIRBuilder, Info, OutArgs); |
1251 | | |
1252 | | // Find out which ABI gets to decide where things go. |
1253 | 2.22k | CCAssignFn *AssignFnFixed; |
1254 | 2.22k | CCAssignFn *AssignFnVarArg; |
1255 | 2.22k | std::tie(AssignFnFixed, AssignFnVarArg) = |
1256 | 2.22k | getAssignFnsForCC(Info.CallConv, TLI); |
1257 | | |
1258 | 2.22k | MachineInstrBuilder CallSeqStart; |
1259 | 2.22k | CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); |
1260 | | |
1261 | | // Create a temporarily-floating call instruction so we can add the implicit |
1262 | | // uses of arg registers. |
1263 | | |
1264 | 2.22k | unsigned Opc = 0; |
1265 | | // Calls with operand bundle "clang.arc.attachedcall" are special. They should |
1266 | | // be expanded to the call, directly followed by a special marker sequence and |
1267 | | // a call to an ObjC library function. |
1268 | 2.22k | if (Info.CB && objcarc::hasAttachedCallOpBundle(Info.CB)) |
1269 | 0 | Opc = AArch64::BLR_RVMARKER; |
1270 | | // A call to a returns twice function like setjmp must be followed by a bti |
1271 | | // instruction. |
1272 | 2.22k | else if (Info.CB && Info.CB->hasFnAttr(Attribute::ReturnsTwice) && |
1273 | 2.22k | !Subtarget.noBTIAtReturnTwice() && |
1274 | 2.22k | MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) |
1275 | 0 | Opc = AArch64::BLR_BTI; |
1276 | 2.22k | else |
1277 | 2.22k | Opc = getCallOpcode(MF, Info.Callee.isReg(), false); |
1278 | | |
1279 | 2.22k | auto MIB = MIRBuilder.buildInstrNoInsert(Opc); |
1280 | 2.22k | unsigned CalleeOpNo = 0; |
1281 | | |
1282 | 2.22k | if (Opc == AArch64::BLR_RVMARKER) { |
1283 | | // Add a target global address for the retainRV/claimRV runtime function |
1284 | | // just before the call target. |
1285 | 0 | Function *ARCFn = *objcarc::getAttachedARCFunction(Info.CB); |
1286 | 0 | MIB.addGlobalAddress(ARCFn); |
1287 | 0 | ++CalleeOpNo; |
1288 | 2.22k | } else if (Info.CFIType) { |
1289 | 0 | MIB->setCFIType(MF, Info.CFIType->getZExtValue()); |
1290 | 0 | } |
1291 | | |
1292 | 2.22k | MIB.add(Info.Callee); |
1293 | | |
1294 | | // Tell the call which registers are clobbered. |
1295 | 2.22k | const uint32_t *Mask; |
1296 | 2.22k | const auto *TRI = Subtarget.getRegisterInfo(); |
1297 | | |
1298 | 2.22k | AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, |
1299 | 2.22k | Subtarget, /*IsReturn*/ false); |
1300 | | // Do the actual argument marshalling. |
1301 | 2.22k | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false); |
1302 | 2.22k | if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder, |
1303 | 2.22k | Info.CallConv, Info.IsVarArg)) |
1304 | 0 | return false; |
1305 | | |
1306 | 2.22k | Mask = getMaskForArgs(OutArgs, Info, *TRI, MF); |
1307 | | |
1308 | 2.22k | if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) |
1309 | 0 | TRI->UpdateCustomCallPreservedMask(MF, &Mask); |
1310 | 2.22k | MIB.addRegMask(Mask); |
1311 | | |
1312 | 2.22k | if (TRI->isAnyArgRegReserved(MF)) |
1313 | 0 | TRI->emitReservedArgRegCallError(MF); |
1314 | | |
1315 | | // Now we can add the actual call instruction to the correct basic block. |
1316 | 2.22k | MIRBuilder.insertInstr(MIB); |
1317 | | |
1318 | 2.22k | uint64_t CalleePopBytes = |
1319 | 2.22k | doesCalleeRestoreStack(Info.CallConv, |
1320 | 2.22k | MF.getTarget().Options.GuaranteedTailCallOpt) |
1321 | 2.22k | ? alignTo(Assigner.StackSize, 16) |
1322 | 2.22k | : 0; |
1323 | | |
1324 | 2.22k | CallSeqStart.addImm(Assigner.StackSize).addImm(0); |
1325 | 2.22k | MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP) |
1326 | 2.22k | .addImm(Assigner.StackSize) |
1327 | 2.22k | .addImm(CalleePopBytes); |
1328 | | |
1329 | | // If Callee is a reg, since it is used by a target specific |
1330 | | // instruction, it must have a register class matching the |
1331 | | // constraint of that instruction. |
1332 | 2.22k | if (MIB->getOperand(CalleeOpNo).isReg()) |
1333 | 70 | constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(), |
1334 | 70 | *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(), |
1335 | 70 | MIB->getOperand(CalleeOpNo), CalleeOpNo); |
1336 | | |
1337 | | // Finally we can copy the returned value back into its virtual-register. In |
1338 | | // symmetry with the arguments, the physical register must be an |
1339 | | // implicit-define of the call instruction. |
1340 | 2.22k | if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) { |
1341 | 873 | CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv); |
1342 | 873 | CallReturnHandler Handler(MIRBuilder, MRI, MIB); |
1343 | 873 | bool UsingReturnedArg = |
1344 | 873 | !OutArgs.empty() && OutArgs[0].Flags[0].isReturned(); |
1345 | | |
1346 | 873 | AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget, |
1347 | 873 | /*IsReturn*/ false); |
1348 | 873 | ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB); |
1349 | 873 | if (!determineAndHandleAssignments( |
1350 | 873 | UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs, |
1351 | 873 | MIRBuilder, Info.CallConv, Info.IsVarArg, |
1352 | 873 | UsingReturnedArg ? ArrayRef(OutArgs[0].Regs) : std::nullopt)) |
1353 | 0 | return false; |
1354 | 873 | } |
1355 | | |
1356 | 2.22k | if (Info.SwiftErrorVReg) { |
1357 | 0 | MIB.addDef(AArch64::X21, RegState::Implicit); |
1358 | 0 | MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21)); |
1359 | 0 | } |
1360 | | |
1361 | 2.22k | if (!Info.CanLowerReturn) { |
1362 | 0 | insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs, |
1363 | 0 | Info.DemoteRegister, Info.DemoteStackIndex); |
1364 | 0 | } |
1365 | 2.22k | return true; |
1366 | 2.22k | } |
1367 | | |
1368 | 0 | bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const { |
1369 | 0 | return Ty.getSizeInBits() == 64; |
1370 | 0 | } |