/src/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file implements some simple delegations needed for call lowering. |
11 | | /// |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "llvm/CodeGen/GlobalISel/CallLowering.h" |
15 | | #include "llvm/CodeGen/Analysis.h" |
16 | | #include "llvm/CodeGen/CallingConvLower.h" |
17 | | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
18 | | #include "llvm/CodeGen/GlobalISel/Utils.h" |
19 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
20 | | #include "llvm/CodeGen/MachineOperand.h" |
21 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
22 | | #include "llvm/CodeGen/TargetLowering.h" |
23 | | #include "llvm/IR/DataLayout.h" |
24 | | #include "llvm/IR/LLVMContext.h" |
25 | | #include "llvm/IR/Module.h" |
26 | | #include "llvm/Target/TargetMachine.h" |
27 | | |
28 | | #define DEBUG_TYPE "call-lowering" |
29 | | |
30 | | using namespace llvm; |
31 | | |
32 | 0 | void CallLowering::anchor() {} |
33 | | |
34 | | /// Helper function which updates \p Flags when \p AttrFn returns true. |
35 | | static void |
36 | | addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, |
37 | 59.8k | const std::function<bool(Attribute::AttrKind)> &AttrFn) { |
38 | 59.8k | if (AttrFn(Attribute::SExt)) |
39 | 8 | Flags.setSExt(); |
40 | 59.8k | if (AttrFn(Attribute::ZExt)) |
41 | 179 | Flags.setZExt(); |
42 | 59.8k | if (AttrFn(Attribute::InReg)) |
43 | 0 | Flags.setInReg(); |
44 | 59.8k | if (AttrFn(Attribute::StructRet)) |
45 | 0 | Flags.setSRet(); |
46 | 59.8k | if (AttrFn(Attribute::Nest)) |
47 | 0 | Flags.setNest(); |
48 | 59.8k | if (AttrFn(Attribute::ByVal)) |
49 | 10 | Flags.setByVal(); |
50 | 59.8k | if (AttrFn(Attribute::Preallocated)) |
51 | 0 | Flags.setPreallocated(); |
52 | 59.8k | if (AttrFn(Attribute::InAlloca)) |
53 | 0 | Flags.setInAlloca(); |
54 | 59.8k | if (AttrFn(Attribute::Returned)) |
55 | 0 | Flags.setReturned(); |
56 | 59.8k | if (AttrFn(Attribute::SwiftSelf)) |
57 | 0 | Flags.setSwiftSelf(); |
58 | 59.8k | if (AttrFn(Attribute::SwiftAsync)) |
59 | 0 | Flags.setSwiftAsync(); |
60 | 59.8k | if (AttrFn(Attribute::SwiftError)) |
61 | 0 | Flags.setSwiftError(); |
62 | 59.8k | } |
63 | | |
64 | | ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, |
65 | 2.01k | unsigned ArgIdx) const { |
66 | 2.01k | ISD::ArgFlagsTy Flags; |
67 | 24.2k | addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) { |
68 | 24.2k | return Call.paramHasAttr(ArgIdx, Attr); |
69 | 24.2k | }); |
70 | 2.01k | return Flags; |
71 | 2.01k | } |
72 | | |
73 | | ISD::ArgFlagsTy |
74 | 1.93k | CallLowering::getAttributesForReturn(const CallBase &Call) const { |
75 | 1.93k | ISD::ArgFlagsTy Flags; |
76 | 23.1k | addFlagsUsingAttrFn(Flags, [&Call](Attribute::AttrKind Attr) { |
77 | 23.1k | return Call.hasRetAttr(Attr); |
78 | 23.1k | }); |
79 | 1.93k | return Flags; |
80 | 1.93k | } |
81 | | |
82 | | void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, |
83 | | const AttributeList &Attrs, |
84 | 55.9k | unsigned OpIdx) const { |
85 | 670k | addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) { |
86 | 670k | return Attrs.hasAttributeAtIndex(OpIdx, Attr); |
87 | 670k | }); |
88 | 55.9k | } |
89 | | |
90 | | bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, |
91 | | ArrayRef<Register> ResRegs, |
92 | | ArrayRef<ArrayRef<Register>> ArgRegs, |
93 | | Register SwiftErrorVReg, |
94 | 1.93k | std::function<unsigned()> GetCalleeReg) const { |
95 | 1.93k | CallLoweringInfo Info; |
96 | 1.93k | const DataLayout &DL = MIRBuilder.getDataLayout(); |
97 | 1.93k | MachineFunction &MF = MIRBuilder.getMF(); |
98 | 1.93k | MachineRegisterInfo &MRI = MF.getRegInfo(); |
99 | 1.93k | bool CanBeTailCalled = CB.isTailCall() && |
100 | 1.93k | isInTailCallPosition(CB, MF.getTarget()) && |
101 | 1.93k | (MF.getFunction() |
102 | 58 | .getFnAttribute("disable-tail-calls") |
103 | 58 | .getValueAsString() != "true"); |
104 | | |
105 | 1.93k | CallingConv::ID CallConv = CB.getCallingConv(); |
106 | 1.93k | Type *RetTy = CB.getType(); |
107 | 1.93k | bool IsVarArg = CB.getFunctionType()->isVarArg(); |
108 | | |
109 | 1.93k | SmallVector<BaseArgInfo, 4> SplitArgs; |
110 | 1.93k | getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL); |
111 | 1.93k | Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg); |
112 | | |
113 | 1.93k | Info.IsConvergent = CB.isConvergent(); |
114 | | |
115 | 1.93k | if (!Info.CanLowerReturn) { |
116 | | // Callee requires sret demotion. |
117 | 0 | insertSRetOutgoingArgument(MIRBuilder, CB, Info); |
118 | | |
119 | | // The sret demotion isn't compatible with tail-calls, since the sret |
120 | | // argument points into the caller's stack frame. |
121 | 0 | CanBeTailCalled = false; |
122 | 0 | } |
123 | | |
124 | | |
125 | | // First step is to marshall all the function's parameters into the correct |
126 | | // physregs and memory locations. Gather the sequence of argument types that |
127 | | // we'll pass to the assigner function. |
128 | 1.93k | unsigned i = 0; |
129 | 1.93k | unsigned NumFixedArgs = CB.getFunctionType()->getNumParams(); |
130 | 2.01k | for (const auto &Arg : CB.args()) { |
131 | 2.01k | ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(CB, i), |
132 | 2.01k | i < NumFixedArgs}; |
133 | 2.01k | setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB); |
134 | | |
135 | | // If we have an explicit sret argument that is an Instruction, (i.e., it |
136 | | // might point to function-local memory), we can't meaningfully tail-call. |
137 | 2.01k | if (OrigArg.Flags[0].isSRet() && isa<Instruction>(&Arg)) |
138 | 0 | CanBeTailCalled = false; |
139 | | |
140 | 2.01k | Info.OrigArgs.push_back(OrigArg); |
141 | 2.01k | ++i; |
142 | 2.01k | } |
143 | | |
144 | | // Try looking through a bitcast from one function type to another. |
145 | | // Commonly happens with calls to objc_msgSend(). |
146 | 1.93k | const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts(); |
147 | 1.93k | if (const Function *F = dyn_cast<Function>(CalleeV)) |
148 | 1.85k | Info.Callee = MachineOperand::CreateGA(F, 0); |
149 | 76 | else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) { |
150 | | // IR IFuncs and Aliases can't be forward declared (only defined), so the |
151 | | // callee must be in the same TU and therefore we can direct-call it without |
152 | | // worrying about it being out of range. |
153 | 6 | Info.Callee = MachineOperand::CreateGA(cast<GlobalValue>(CalleeV), 0); |
154 | 6 | } else |
155 | 70 | Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false); |
156 | | |
157 | 1.93k | Register ReturnHintAlignReg; |
158 | 1.93k | Align ReturnHintAlign; |
159 | | |
160 | 1.93k | Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, getAttributesForReturn(CB)}; |
161 | | |
162 | 1.93k | if (!Info.OrigRet.Ty->isVoidTy()) { |
163 | 635 | setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB); |
164 | | |
165 | 635 | if (MaybeAlign Alignment = CB.getRetAlign()) { |
166 | 5 | if (*Alignment > Align(1)) { |
167 | 5 | ReturnHintAlignReg = MRI.cloneVirtualRegister(ResRegs[0]); |
168 | 5 | Info.OrigRet.Regs[0] = ReturnHintAlignReg; |
169 | 5 | ReturnHintAlign = *Alignment; |
170 | 5 | } |
171 | 5 | } |
172 | 635 | } |
173 | | |
174 | 1.93k | auto Bundle = CB.getOperandBundle(LLVMContext::OB_kcfi); |
175 | 1.93k | if (Bundle && CB.isIndirectCall()) { |
176 | 0 | Info.CFIType = cast<ConstantInt>(Bundle->Inputs[0]); |
177 | 0 | assert(Info.CFIType->getType()->isIntegerTy(32) && "Invalid CFI type"); |
178 | 0 | } |
179 | | |
180 | 0 | Info.CB = &CB; |
181 | 1.93k | Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees); |
182 | 1.93k | Info.CallConv = CallConv; |
183 | 1.93k | Info.SwiftErrorVReg = SwiftErrorVReg; |
184 | 1.93k | Info.IsMustTailCall = CB.isMustTailCall(); |
185 | 1.93k | Info.IsTailCall = CanBeTailCalled; |
186 | 1.93k | Info.IsVarArg = IsVarArg; |
187 | 1.93k | if (!lowerCall(MIRBuilder, Info)) |
188 | 0 | return false; |
189 | | |
190 | 1.93k | if (ReturnHintAlignReg && !Info.IsTailCall) { |
191 | 1 | MIRBuilder.buildAssertAlign(ResRegs[0], ReturnHintAlignReg, |
192 | 1 | ReturnHintAlign); |
193 | 1 | } |
194 | | |
195 | 1.93k | return true; |
196 | 1.93k | } |
197 | | |
198 | | template <typename FuncInfoTy> |
199 | | void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, |
200 | | const DataLayout &DL, |
201 | 39.0k | const FuncInfoTy &FuncInfo) const { |
202 | 39.0k | auto &Flags = Arg.Flags[0]; |
203 | 39.0k | const AttributeList &Attrs = FuncInfo.getAttributes(); |
204 | 39.0k | addArgFlagsFromAttributes(Flags, Attrs, OpIdx); |
205 | | |
206 | 39.0k | PointerType *PtrTy = dyn_cast<PointerType>(Arg.Ty->getScalarType()); |
207 | 39.0k | if (PtrTy) { |
208 | 2.89k | Flags.setPointer(); |
209 | 2.89k | Flags.setPointerAddrSpace(PtrTy->getPointerAddressSpace()); |
210 | 2.89k | } |
211 | | |
212 | 39.0k | Align MemAlign = DL.getABITypeAlign(Arg.Ty); |
213 | 39.0k | if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { |
214 | 5 | assert(OpIdx >= AttributeList::FirstArgIndex); |
215 | 0 | unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex; |
216 | | |
217 | 5 | Type *ElementTy = FuncInfo.getParamByValType(ParamIdx); |
218 | 5 | if (!ElementTy) |
219 | 0 | ElementTy = FuncInfo.getParamInAllocaType(ParamIdx); |
220 | 5 | if (!ElementTy) |
221 | 0 | ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx); |
222 | 5 | assert(ElementTy && "Must have byval, inalloca or preallocated type"); |
223 | 0 | Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); |
224 | | |
225 | | // For ByVal, alignment should be passed from FE. BE will guess if |
226 | | // this info is not there but there are cases it cannot get right. |
227 | 5 | if (auto ParamAlign = FuncInfo.getParamStackAlign(ParamIdx)) |
228 | 0 | MemAlign = *ParamAlign; |
229 | 5 | else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx))) |
230 | 0 | MemAlign = *ParamAlign; |
231 | 5 | else |
232 | 5 | MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); |
233 | 39.0k | } else if (OpIdx >= AttributeList::FirstArgIndex) { |
234 | 20.9k | if (auto ParamAlign = |
235 | 20.9k | FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex)) |
236 | 0 | MemAlign = *ParamAlign; |
237 | 20.9k | } |
238 | 0 | Flags.setMemAlign(MemAlign); |
239 | 39.0k | Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); |
240 | | |
241 | | // Don't try to use the returned attribute if the argument is marked as |
242 | | // swiftself, since it won't be passed in x0. |
243 | 39.0k | if (Flags.isSwiftSelf()) |
244 | 0 | Flags.setReturned(false); |
245 | 39.0k | } void llvm::CallLowering::setArgFlags<llvm::Function>(llvm::CallLowering::ArgInfo&, unsigned int, llvm::DataLayout const&, llvm::Function const&) const Line | Count | Source | 201 | 36.4k | const FuncInfoTy &FuncInfo) const { | 202 | 36.4k | auto &Flags = Arg.Flags[0]; | 203 | 36.4k | const AttributeList &Attrs = FuncInfo.getAttributes(); | 204 | 36.4k | addArgFlagsFromAttributes(Flags, Attrs, OpIdx); | 205 | | | 206 | 36.4k | PointerType *PtrTy = dyn_cast<PointerType>(Arg.Ty->getScalarType()); | 207 | 36.4k | if (PtrTy) { | 208 | 2.46k | Flags.setPointer(); | 209 | 2.46k | Flags.setPointerAddrSpace(PtrTy->getPointerAddressSpace()); | 210 | 2.46k | } | 211 | | | 212 | 36.4k | Align MemAlign = DL.getABITypeAlign(Arg.Ty); | 213 | 36.4k | if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { | 214 | 0 | assert(OpIdx >= AttributeList::FirstArgIndex); | 215 | 0 | unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex; | 216 | |
| 217 | 0 | Type *ElementTy = FuncInfo.getParamByValType(ParamIdx); | 218 | 0 | if (!ElementTy) | 219 | 0 | ElementTy = FuncInfo.getParamInAllocaType(ParamIdx); | 220 | 0 | if (!ElementTy) | 221 | 0 | ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx); | 222 | 0 | assert(ElementTy && "Must have byval, inalloca or preallocated type"); | 223 | 0 | Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); | 224 | | | 225 | | // For ByVal, alignment should be passed from FE. BE will guess if | 226 | | // this info is not there but there are cases it cannot get right. | 227 | 0 | if (auto ParamAlign = FuncInfo.getParamStackAlign(ParamIdx)) | 228 | 0 | MemAlign = *ParamAlign; | 229 | 0 | else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx))) | 230 | 0 | MemAlign = *ParamAlign; | 231 | 0 | else | 232 | 0 | MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); | 233 | 36.4k | } else if (OpIdx >= AttributeList::FirstArgIndex) { | 234 | 18.9k | if (auto ParamAlign = | 235 | 18.9k | FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex)) | 236 | 0 | MemAlign = *ParamAlign; | 237 | 18.9k | } | 238 | 0 | Flags.setMemAlign(MemAlign); | 239 | 36.4k | Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); | 240 | | | 241 | | // Don't try to use the returned attribute if the argument is marked as | 242 | | // swiftself, since it won't be passed in x0. | 243 | 36.4k | if (Flags.isSwiftSelf()) | 244 | 0 | Flags.setReturned(false); | 245 | 36.4k | } |
void llvm::CallLowering::setArgFlags<llvm::CallBase>(llvm::CallLowering::ArgInfo&, unsigned int, llvm::DataLayout const&, llvm::CallBase const&) const Line | Count | Source | 201 | 2.65k | const FuncInfoTy &FuncInfo) const { | 202 | 2.65k | auto &Flags = Arg.Flags[0]; | 203 | 2.65k | const AttributeList &Attrs = FuncInfo.getAttributes(); | 204 | 2.65k | addArgFlagsFromAttributes(Flags, Attrs, OpIdx); | 205 | | | 206 | 2.65k | PointerType *PtrTy = dyn_cast<PointerType>(Arg.Ty->getScalarType()); | 207 | 2.65k | if (PtrTy) { | 208 | 429 | Flags.setPointer(); | 209 | 429 | Flags.setPointerAddrSpace(PtrTy->getPointerAddressSpace()); | 210 | 429 | } | 211 | | | 212 | 2.65k | Align MemAlign = DL.getABITypeAlign(Arg.Ty); | 213 | 2.65k | if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { | 214 | 5 | assert(OpIdx >= AttributeList::FirstArgIndex); | 215 | 0 | unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex; | 216 | | | 217 | 5 | Type *ElementTy = FuncInfo.getParamByValType(ParamIdx); | 218 | 5 | if (!ElementTy) | 219 | 0 | ElementTy = FuncInfo.getParamInAllocaType(ParamIdx); | 220 | 5 | if (!ElementTy) | 221 | 0 | ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx); | 222 | 5 | assert(ElementTy && "Must have byval, inalloca or preallocated type"); | 223 | 0 | Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); | 224 | | | 225 | | // For ByVal, alignment should be passed from FE. BE will guess if | 226 | | // this info is not there but there are cases it cannot get right. | 227 | 5 | if (auto ParamAlign = FuncInfo.getParamStackAlign(ParamIdx)) | 228 | 0 | MemAlign = *ParamAlign; | 229 | 5 | else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx))) | 230 | 0 | MemAlign = *ParamAlign; | 231 | 5 | else | 232 | 5 | MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); | 233 | 2.64k | } else if (OpIdx >= AttributeList::FirstArgIndex) { | 234 | 2.01k | if (auto ParamAlign = | 235 | 2.01k | FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex)) | 236 | 0 | MemAlign = *ParamAlign; | 237 | 2.01k | } | 238 | 0 | Flags.setMemAlign(MemAlign); | 239 | 2.65k | Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); | 240 | | | 241 | | // Don't try to use the returned attribute if the argument is marked as | 242 | | // swiftself, since it won't be passed in x0. | 243 | 2.65k | if (Flags.isSwiftSelf()) | 244 | 0 | Flags.setReturned(false); | 245 | 2.65k | } |
|
246 | | |
247 | | template void |
248 | | CallLowering::setArgFlags<Function>(CallLowering::ArgInfo &Arg, unsigned OpIdx, |
249 | | const DataLayout &DL, |
250 | | const Function &FuncInfo) const; |
251 | | |
252 | | template void |
253 | | CallLowering::setArgFlags<CallBase>(CallLowering::ArgInfo &Arg, unsigned OpIdx, |
254 | | const DataLayout &DL, |
255 | | const CallBase &FuncInfo) const; |
256 | | |
257 | | void CallLowering::splitToValueTypes(const ArgInfo &OrigArg, |
258 | | SmallVectorImpl<ArgInfo> &SplitArgs, |
259 | | const DataLayout &DL, |
260 | | CallingConv::ID CallConv, |
261 | 36.1k | SmallVectorImpl<uint64_t> *Offsets) const { |
262 | 36.1k | LLVMContext &Ctx = OrigArg.Ty->getContext(); |
263 | | |
264 | 36.1k | SmallVector<EVT, 4> SplitVTs; |
265 | 36.1k | ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, Offsets, 0); |
266 | | |
267 | 36.1k | if (SplitVTs.size() == 0) |
268 | 0 | return; |
269 | | |
270 | 36.1k | if (SplitVTs.size() == 1) { |
271 | | // No splitting to do, but we want to replace the original type (e.g. [1 x |
272 | | // double] -> double). |
273 | 36.1k | SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), |
274 | 36.1k | OrigArg.OrigArgIndex, OrigArg.Flags[0], |
275 | 36.1k | OrigArg.IsFixed, OrigArg.OrigValue); |
276 | 36.1k | return; |
277 | 36.1k | } |
278 | | |
279 | | // Create one ArgInfo for each virtual register in the original ArgInfo. |
280 | 13 | assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch"); |
281 | | |
282 | 0 | bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( |
283 | 13 | OrigArg.Ty, CallConv, false, DL); |
284 | 39 | for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { |
285 | 26 | Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx); |
286 | 26 | SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.OrigArgIndex, |
287 | 26 | OrigArg.Flags[0], OrigArg.IsFixed); |
288 | 26 | if (NeedsRegBlock) |
289 | 16 | SplitArgs.back().Flags[0].setInConsecutiveRegs(); |
290 | 26 | } |
291 | | |
292 | 13 | SplitArgs.back().Flags[0].setInConsecutiveRegsLast(); |
293 | 13 | } |
294 | | |
295 | | /// Pack values \p SrcRegs to cover the vector type result \p DstRegs. |
296 | | static MachineInstrBuilder |
297 | | mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, |
298 | 1.41k | ArrayRef<Register> SrcRegs) { |
299 | 1.41k | MachineRegisterInfo &MRI = *B.getMRI(); |
300 | 1.41k | LLT LLTy = MRI.getType(DstRegs[0]); |
301 | 1.41k | LLT PartLLT = MRI.getType(SrcRegs[0]); |
302 | | |
303 | | // Deal with v3s16 split into v2s16 |
304 | 1.41k | LLT LCMTy = getCoverTy(LLTy, PartLLT); |
305 | 1.41k | if (LCMTy == LLTy) { |
306 | | // Common case where no padding is needed. |
307 | 1.13k | assert(DstRegs.size() == 1); |
308 | 0 | return B.buildConcatVectors(DstRegs[0], SrcRegs); |
309 | 1.13k | } |
310 | | |
311 | | // We need to create an unmerge to the result registers, which may require |
312 | | // widening the original value. |
313 | 279 | Register UnmergeSrcReg; |
314 | 279 | if (LCMTy != PartLLT) { |
315 | 0 | assert(DstRegs.size() == 1); |
316 | 0 | return B.buildDeleteTrailingVectorElements( |
317 | 0 | DstRegs[0], B.buildMergeLikeInstr(LCMTy, SrcRegs)); |
318 | 279 | } else { |
319 | | // We don't need to widen anything if we're extracting a scalar which was |
320 | | // promoted to a vector e.g. s8 -> v4s8 -> s8 |
321 | 279 | assert(SrcRegs.size() == 1); |
322 | 0 | UnmergeSrcReg = SrcRegs[0]; |
323 | 279 | } |
324 | | |
325 | 279 | int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits(); |
326 | | |
327 | 279 | SmallVector<Register, 8> PadDstRegs(NumDst); |
328 | 279 | std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin()); |
329 | | |
330 | | // Create the excess dead defs for the unmerge. |
331 | 309 | for (int I = DstRegs.size(); I != NumDst; ++I) |
332 | 30 | PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy); |
333 | | |
334 | 279 | if (PadDstRegs.size() == 1) |
335 | 249 | return B.buildDeleteTrailingVectorElements(DstRegs[0], UnmergeSrcReg); |
336 | 30 | return B.buildUnmerge(PadDstRegs, UnmergeSrcReg); |
337 | 279 | } |
338 | | |
339 | | /// Create a sequence of instructions to combine pieces split into register |
340 | | /// typed values to the original IR value. \p OrigRegs contains the destination |
341 | | /// value registers of type \p LLTy, and \p Regs contains the legalized pieces |
342 | | /// with type \p PartLLT. This is used for incoming values (physregs to vregs). |
343 | | static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs, |
344 | | ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, |
345 | 6.65k | const ISD::ArgFlagsTy Flags) { |
346 | 6.65k | MachineRegisterInfo &MRI = *B.getMRI(); |
347 | | |
348 | 6.65k | if (PartLLT == LLTy) { |
349 | | // We should have avoided introducing a new virtual register, and just |
350 | | // directly assigned here. |
351 | 974 | assert(OrigRegs[0] == Regs[0]); |
352 | 0 | return; |
353 | 974 | } |
354 | | |
355 | 5.68k | if (PartLLT.getSizeInBits() == LLTy.getSizeInBits() && OrigRegs.size() == 1 && |
356 | 5.68k | Regs.size() == 1) { |
357 | 805 | B.buildBitcast(OrigRegs[0], Regs[0]); |
358 | 805 | return; |
359 | 805 | } |
360 | | |
361 | | // A vector PartLLT needs extending to LLTy's element size. |
362 | | // E.g. <2 x s64> = G_SEXT <2 x s32>. |
363 | 4.87k | if (PartLLT.isVector() == LLTy.isVector() && |
364 | 4.87k | PartLLT.getScalarSizeInBits() > LLTy.getScalarSizeInBits() && |
365 | 4.87k | (!PartLLT.isVector() || |
366 | 3.41k | PartLLT.getElementCount() == LLTy.getElementCount()) && |
367 | 4.87k | OrigRegs.size() == 1 && Regs.size() == 1) { |
368 | 3.24k | Register SrcReg = Regs[0]; |
369 | | |
370 | 3.24k | LLT LocTy = MRI.getType(SrcReg); |
371 | | |
372 | 3.24k | if (Flags.isSExt()) { |
373 | 2 | SrcReg = B.buildAssertSExt(LocTy, SrcReg, LLTy.getScalarSizeInBits()) |
374 | 2 | .getReg(0); |
375 | 3.24k | } else if (Flags.isZExt()) { |
376 | 101 | SrcReg = B.buildAssertZExt(LocTy, SrcReg, LLTy.getScalarSizeInBits()) |
377 | 101 | .getReg(0); |
378 | 101 | } |
379 | | |
380 | | // Sometimes pointers are passed zero extended. |
381 | 3.24k | LLT OrigTy = MRI.getType(OrigRegs[0]); |
382 | 3.24k | if (OrigTy.isPointer()) { |
383 | 0 | LLT IntPtrTy = LLT::scalar(OrigTy.getSizeInBits()); |
384 | 0 | B.buildIntToPtr(OrigRegs[0], B.buildTrunc(IntPtrTy, SrcReg)); |
385 | 0 | return; |
386 | 0 | } |
387 | | |
388 | 3.24k | B.buildTrunc(OrigRegs[0], SrcReg); |
389 | 3.24k | return; |
390 | 3.24k | } |
391 | | |
392 | 1.63k | if (!LLTy.isVector() && !PartLLT.isVector()) { |
393 | 42 | assert(OrigRegs.size() == 1); |
394 | 0 | LLT OrigTy = MRI.getType(OrigRegs[0]); |
395 | | |
396 | 42 | unsigned SrcSize = PartLLT.getSizeInBits().getFixedValue() * Regs.size(); |
397 | 42 | if (SrcSize == OrigTy.getSizeInBits()) |
398 | 40 | B.buildMergeValues(OrigRegs[0], Regs); |
399 | 2 | else { |
400 | 2 | auto Widened = B.buildMergeLikeInstr(LLT::scalar(SrcSize), Regs); |
401 | 2 | B.buildTrunc(OrigRegs[0], Widened); |
402 | 2 | } |
403 | | |
404 | 42 | return; |
405 | 42 | } |
406 | | |
407 | 1.59k | if (PartLLT.isVector()) { |
408 | 1.41k | assert(OrigRegs.size() == 1); |
409 | 0 | SmallVector<Register> CastRegs(Regs.begin(), Regs.end()); |
410 | | |
411 | | // If PartLLT is a mismatched vector in both number of elements and element |
412 | | // size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to |
413 | | // have the same elt type, i.e. v4s32. |
414 | | // TODO: Extend this coersion to element multiples other than just 2. |
415 | 1.41k | if (PartLLT.getSizeInBits() > LLTy.getSizeInBits() && |
416 | 1.41k | PartLLT.getScalarSizeInBits() == LLTy.getScalarSizeInBits() * 2 && |
417 | 1.41k | Regs.size() == 1) { |
418 | 40 | LLT NewTy = PartLLT.changeElementType(LLTy.getElementType()) |
419 | 40 | .changeElementCount(PartLLT.getElementCount() * 2); |
420 | 40 | CastRegs[0] = B.buildBitcast(NewTy, Regs[0]).getReg(0); |
421 | 40 | PartLLT = NewTy; |
422 | 40 | } |
423 | | |
424 | 1.41k | if (LLTy.getScalarType() == PartLLT.getElementType()) { |
425 | 1.28k | mergeVectorRegsToResultRegs(B, OrigRegs, CastRegs); |
426 | 1.28k | } else { |
427 | 129 | unsigned I = 0; |
428 | 129 | LLT GCDTy = getGCDType(LLTy, PartLLT); |
429 | | |
430 | | // We are both splitting a vector, and bitcasting its element types. Cast |
431 | | // the source pieces into the appropriate number of pieces with the result |
432 | | // element type. |
433 | 129 | for (Register SrcReg : CastRegs) |
434 | 466 | CastRegs[I++] = B.buildBitcast(GCDTy, SrcReg).getReg(0); |
435 | 129 | mergeVectorRegsToResultRegs(B, OrigRegs, CastRegs); |
436 | 129 | } |
437 | | |
438 | 1.41k | return; |
439 | 1.41k | } |
440 | | |
441 | 179 | assert(LLTy.isVector() && !PartLLT.isVector()); |
442 | | |
443 | 0 | LLT DstEltTy = LLTy.getElementType(); |
444 | | |
445 | | // Pointer information was discarded. We'll need to coerce some register types |
446 | | // to avoid violating type constraints. |
447 | 179 | LLT RealDstEltTy = MRI.getType(OrigRegs[0]).getElementType(); |
448 | | |
449 | 179 | assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits()); |
450 | | |
451 | 179 | if (DstEltTy == PartLLT) { |
452 | | // Vector was trivially scalarized. |
453 | | |
454 | 92 | if (RealDstEltTy.isPointer()) { |
455 | 0 | for (Register Reg : Regs) |
456 | 0 | MRI.setType(Reg, RealDstEltTy); |
457 | 0 | } |
458 | | |
459 | 92 | B.buildBuildVector(OrigRegs[0], Regs); |
460 | 92 | } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { |
461 | | // Deal with vector with 64-bit elements decomposed to 32-bit |
462 | | // registers. Need to create intermediate 64-bit elements. |
463 | 0 | SmallVector<Register, 8> EltMerges; |
464 | 0 | int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); |
465 | |
|
466 | 0 | assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); |
467 | | |
468 | 0 | for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { |
469 | 0 | auto Merge = |
470 | 0 | B.buildMergeLikeInstr(RealDstEltTy, Regs.take_front(PartsPerElt)); |
471 | | // Fix the type in case this is really a vector of pointers. |
472 | 0 | MRI.setType(Merge.getReg(0), RealDstEltTy); |
473 | 0 | EltMerges.push_back(Merge.getReg(0)); |
474 | 0 | Regs = Regs.drop_front(PartsPerElt); |
475 | 0 | } |
476 | |
|
477 | 0 | B.buildBuildVector(OrigRegs[0], EltMerges); |
478 | 87 | } else { |
479 | | // Vector was split, and elements promoted to a wider type. |
480 | | // FIXME: Should handle floating point promotions. |
481 | 87 | unsigned NumElts = LLTy.getNumElements(); |
482 | 87 | LLT BVType = LLT::fixed_vector(NumElts, PartLLT); |
483 | | |
484 | 87 | Register BuildVec; |
485 | 87 | if (NumElts == Regs.size()) |
486 | 87 | BuildVec = B.buildBuildVector(BVType, Regs).getReg(0); |
487 | 0 | else { |
488 | | // Vector elements are packed in the inputs. |
489 | | // e.g. we have a <4 x s16> but 2 x s32 in regs. |
490 | 0 | assert(NumElts > Regs.size()); |
491 | 0 | LLT SrcEltTy = MRI.getType(Regs[0]); |
492 | |
|
493 | 0 | LLT OriginalEltTy = MRI.getType(OrigRegs[0]).getElementType(); |
494 | | |
495 | | // Input registers contain packed elements. |
496 | | // Determine how many elements per reg. |
497 | 0 | assert((SrcEltTy.getSizeInBits() % OriginalEltTy.getSizeInBits()) == 0); |
498 | 0 | unsigned EltPerReg = |
499 | 0 | (SrcEltTy.getSizeInBits() / OriginalEltTy.getSizeInBits()); |
500 | |
|
501 | 0 | SmallVector<Register, 0> BVRegs; |
502 | 0 | BVRegs.reserve(Regs.size() * EltPerReg); |
503 | 0 | for (Register R : Regs) { |
504 | 0 | auto Unmerge = B.buildUnmerge(OriginalEltTy, R); |
505 | 0 | for (unsigned K = 0; K < EltPerReg; ++K) |
506 | 0 | BVRegs.push_back(B.buildAnyExt(PartLLT, Unmerge.getReg(K)).getReg(0)); |
507 | 0 | } |
508 | | |
509 | | // We may have some more elements in BVRegs, e.g. if we have 2 s32 pieces |
510 | | // for a <3 x s16> vector. We should have less than EltPerReg extra items. |
511 | 0 | if (BVRegs.size() > NumElts) { |
512 | 0 | assert((BVRegs.size() - NumElts) < EltPerReg); |
513 | 0 | BVRegs.truncate(NumElts); |
514 | 0 | } |
515 | 0 | BuildVec = B.buildBuildVector(BVType, BVRegs).getReg(0); |
516 | 0 | } |
517 | 0 | B.buildTrunc(OrigRegs[0], BuildVec); |
518 | 87 | } |
519 | 179 | } |
520 | | |
521 | | /// Create a sequence of instructions to expand the value in \p SrcReg (of type |
522 | | /// \p SrcTy) to the types in \p DstRegs (of type \p PartTy). \p ExtendOp should |
523 | | /// contain the type of scalar value extension if necessary. |
524 | | /// |
525 | | /// This is used for outgoing values (vregs to physregs) |
526 | | static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, |
527 | | Register SrcReg, LLT SrcTy, LLT PartTy, |
528 | 5.20k | unsigned ExtendOp = TargetOpcode::G_ANYEXT) { |
529 | | // We could just insert a regular copy, but this is unreachable at the moment. |
530 | 5.20k | assert(SrcTy != PartTy && "identical part types shouldn't reach here"); |
531 | | |
532 | 0 | const unsigned PartSize = PartTy.getSizeInBits(); |
533 | | |
534 | 5.20k | if (PartTy.isVector() == SrcTy.isVector() && |
535 | 5.20k | PartTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits()) { |
536 | 2.78k | assert(DstRegs.size() == 1); |
537 | 0 | B.buildInstr(ExtendOp, {DstRegs[0]}, {SrcReg}); |
538 | 2.78k | return; |
539 | 2.78k | } |
540 | | |
541 | 2.42k | if (SrcTy.isVector() && !PartTy.isVector() && |
542 | 2.42k | PartSize > SrcTy.getElementType().getSizeInBits()) { |
543 | | // Vector was scalarized, and the elements extended. |
544 | 329 | auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), SrcReg); |
545 | 1.31k | for (int i = 0, e = DstRegs.size(); i != e; ++i) |
546 | 987 | B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); |
547 | 329 | return; |
548 | 329 | } |
549 | | |
550 | 2.09k | if (SrcTy.isVector() && PartTy.isVector() && |
551 | 2.09k | PartTy.getScalarSizeInBits() == SrcTy.getScalarSizeInBits() && |
552 | 2.09k | SrcTy.getNumElements() < PartTy.getNumElements()) { |
553 | | // A coercion like: v2f32 -> v4f32. |
554 | 0 | Register DstReg = DstRegs.front(); |
555 | 0 | B.buildPadVectorWithUndefElements(DstReg, SrcReg); |
556 | 0 | return; |
557 | 0 | } |
558 | | |
559 | 2.09k | LLT GCDTy = getGCDType(SrcTy, PartTy); |
560 | 2.09k | if (GCDTy == PartTy) { |
561 | | // If this already evenly divisible, we can create a simple unmerge. |
562 | 2.09k | B.buildUnmerge(DstRegs, SrcReg); |
563 | 2.09k | return; |
564 | 2.09k | } |
565 | | |
566 | 2 | MachineRegisterInfo &MRI = *B.getMRI(); |
567 | 2 | LLT DstTy = MRI.getType(DstRegs[0]); |
568 | 2 | LLT LCMTy = getCoverTy(SrcTy, PartTy); |
569 | | |
570 | 2 | if (PartTy.isVector() && LCMTy == PartTy) { |
571 | 0 | assert(DstRegs.size() == 1); |
572 | 0 | B.buildPadVectorWithUndefElements(DstRegs[0], SrcReg); |
573 | 0 | return; |
574 | 0 | } |
575 | | |
576 | 2 | const unsigned DstSize = DstTy.getSizeInBits(); |
577 | 2 | const unsigned SrcSize = SrcTy.getSizeInBits(); |
578 | 2 | unsigned CoveringSize = LCMTy.getSizeInBits(); |
579 | | |
580 | 2 | Register UnmergeSrc = SrcReg; |
581 | | |
582 | 2 | if (!LCMTy.isVector() && CoveringSize != SrcSize) { |
583 | | // For scalars, it's common to be able to use a simple extension. |
584 | 2 | if (SrcTy.isScalar() && DstTy.isScalar()) { |
585 | 2 | CoveringSize = alignTo(SrcSize, DstSize); |
586 | 2 | LLT CoverTy = LLT::scalar(CoveringSize); |
587 | 2 | UnmergeSrc = B.buildInstr(ExtendOp, {CoverTy}, {SrcReg}).getReg(0); |
588 | 2 | } else { |
589 | | // Widen to the common type. |
590 | | // FIXME: This should respect the extend type |
591 | 0 | Register Undef = B.buildUndef(SrcTy).getReg(0); |
592 | 0 | SmallVector<Register, 8> MergeParts(1, SrcReg); |
593 | 0 | for (unsigned Size = SrcSize; Size != CoveringSize; Size += SrcSize) |
594 | 0 | MergeParts.push_back(Undef); |
595 | 0 | UnmergeSrc = B.buildMergeLikeInstr(LCMTy, MergeParts).getReg(0); |
596 | 0 | } |
597 | 2 | } |
598 | | |
599 | 2 | if (LCMTy.isVector() && CoveringSize != SrcSize) |
600 | 0 | UnmergeSrc = B.buildPadVectorWithUndefElements(LCMTy, SrcReg).getReg(0); |
601 | | |
602 | 2 | B.buildUnmerge(DstRegs, UnmergeSrc); |
603 | 2 | } |
604 | | |
605 | | bool CallLowering::determineAndHandleAssignments( |
606 | | ValueHandler &Handler, ValueAssigner &Assigner, |
607 | | SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder, |
608 | | CallingConv::ID CallConv, bool IsVarArg, |
609 | 16.5k | ArrayRef<Register> ThisReturnRegs) const { |
610 | 16.5k | MachineFunction &MF = MIRBuilder.getMF(); |
611 | 16.5k | const Function &F = MF.getFunction(); |
612 | 16.5k | SmallVector<CCValAssign, 16> ArgLocs; |
613 | | |
614 | 16.5k | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, F.getContext()); |
615 | 16.5k | if (!determineAssignments(Assigner, Args, CCInfo)) |
616 | 0 | return false; |
617 | | |
618 | 16.5k | return handleAssignments(Handler, Args, CCInfo, ArgLocs, MIRBuilder, |
619 | 16.5k | ThisReturnRegs); |
620 | 16.5k | } |
621 | | |
622 | 5.20k | static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) { |
623 | 5.20k | if (Flags.isSExt()) |
624 | 0 | return TargetOpcode::G_SEXT; |
625 | 5.20k | if (Flags.isZExt()) |
626 | 0 | return TargetOpcode::G_ZEXT; |
627 | 5.20k | return TargetOpcode::G_ANYEXT; |
628 | 5.20k | } |
629 | | |
630 | | bool CallLowering::determineAssignments(ValueAssigner &Assigner, |
631 | | SmallVectorImpl<ArgInfo> &Args, |
632 | 31.4k | CCState &CCInfo) const { |
633 | 31.4k | LLVMContext &Ctx = CCInfo.getContext(); |
634 | 31.4k | const CallingConv::ID CallConv = CCInfo.getCallingConv(); |
635 | | |
636 | 31.4k | unsigned NumArgs = Args.size(); |
637 | 67.6k | for (unsigned i = 0; i != NumArgs; ++i) { |
638 | 36.1k | EVT CurVT = EVT::getEVT(Args[i].Ty); |
639 | | |
640 | 36.1k | MVT NewVT = TLI->getRegisterTypeForCallingConv(Ctx, CallConv, CurVT); |
641 | | |
642 | | // If we need to split the type over multiple regs, check it's a scenario |
643 | | // we currently support. |
644 | 36.1k | unsigned NumParts = |
645 | 36.1k | TLI->getNumRegistersForCallingConv(Ctx, CallConv, CurVT); |
646 | | |
647 | 36.1k | if (NumParts == 1) { |
648 | | // Try to use the register type if we couldn't assign the VT. |
649 | 32.3k | if (Assigner.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i], |
650 | 32.3k | Args[i].Flags[0], CCInfo)) |
651 | 0 | return false; |
652 | 32.3k | continue; |
653 | 32.3k | } |
654 | | |
655 | | // For incoming arguments (physregs to vregs), we could have values in |
656 | | // physregs (or memlocs) which we want to extract and copy to vregs. |
657 | | // During this, we might have to deal with the LLT being split across |
658 | | // multiple regs, so we have to record this information for later. |
659 | | // |
660 | | // If we have outgoing args, then we have the opposite case. We have a |
661 | | // vreg with an LLT which we want to assign to a physical location, and |
662 | | // we might have to record that the value has to be split later. |
663 | | |
664 | | // We're handling an incoming arg which is split over multiple regs. |
665 | | // E.g. passing an s128 on AArch64. |
666 | 3.78k | ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; |
667 | 3.78k | Args[i].Flags.clear(); |
668 | | |
669 | 15.5k | for (unsigned Part = 0; Part < NumParts; ++Part) { |
670 | 11.7k | ISD::ArgFlagsTy Flags = OrigFlags; |
671 | 11.7k | if (Part == 0) { |
672 | 3.78k | Flags.setSplit(); |
673 | 7.98k | } else { |
674 | 7.98k | Flags.setOrigAlign(Align(1)); |
675 | 7.98k | if (Part == NumParts - 1) |
676 | 3.78k | Flags.setSplitEnd(); |
677 | 7.98k | } |
678 | | |
679 | 11.7k | Args[i].Flags.push_back(Flags); |
680 | 11.7k | if (Assigner.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i], |
681 | 11.7k | Args[i].Flags[Part], CCInfo)) { |
682 | | // Still couldn't assign this smaller part type for some reason. |
683 | 0 | return false; |
684 | 0 | } |
685 | 11.7k | } |
686 | 3.78k | } |
687 | | |
688 | 31.4k | return true; |
689 | 31.4k | } |
690 | | |
691 | | bool CallLowering::handleAssignments(ValueHandler &Handler, |
692 | | SmallVectorImpl<ArgInfo> &Args, |
693 | | CCState &CCInfo, |
694 | | SmallVectorImpl<CCValAssign> &ArgLocs, |
695 | | MachineIRBuilder &MIRBuilder, |
696 | 31.3k | ArrayRef<Register> ThisReturnRegs) const { |
697 | 31.3k | MachineFunction &MF = MIRBuilder.getMF(); |
698 | 31.3k | MachineRegisterInfo &MRI = MF.getRegInfo(); |
699 | 31.3k | const Function &F = MF.getFunction(); |
700 | 31.3k | const DataLayout &DL = F.getParent()->getDataLayout(); |
701 | | |
702 | 31.3k | const unsigned NumArgs = Args.size(); |
703 | | |
704 | | // Stores thunks for outgoing register assignments. This is used so we delay |
705 | | // generating register copies until mem loc assignments are done. We do this |
706 | | // so that if the target is using the delayed stack protector feature, we can |
707 | | // find the split point of the block accurately. E.g. if we have: |
708 | | // G_STORE %val, %memloc |
709 | | // $x0 = COPY %foo |
710 | | // $x1 = COPY %bar |
711 | | // CALL func |
712 | | // ... then the split point for the block will correctly be at, and including, |
713 | | // the copy to $x0. If instead the G_STORE instruction immediately precedes |
714 | | // the CALL, then we'd prematurely choose the CALL as the split point, thus |
715 | | // generating a split block with a CALL that uses undefined physregs. |
716 | 31.3k | SmallVector<std::function<void()>> DelayedOutgoingRegAssignments; |
717 | | |
718 | 67.4k | for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) { |
719 | 36.0k | assert(j < ArgLocs.size() && "Skipped too many arg locs"); |
720 | 0 | CCValAssign &VA = ArgLocs[j]; |
721 | 36.0k | assert(VA.getValNo() == i && "Location doesn't correspond to current arg"); |
722 | | |
723 | 36.0k | if (VA.needsCustom()) { |
724 | 0 | std::function<void()> Thunk; |
725 | 0 | unsigned NumArgRegs = Handler.assignCustomValue( |
726 | 0 | Args[i], ArrayRef(ArgLocs).slice(j), &Thunk); |
727 | 0 | if (Thunk) |
728 | 0 | DelayedOutgoingRegAssignments.emplace_back(Thunk); |
729 | 0 | if (!NumArgRegs) |
730 | 0 | return false; |
731 | 0 | j += (NumArgRegs - 1); |
732 | 0 | continue; |
733 | 0 | } |
734 | | |
735 | 36.0k | const MVT ValVT = VA.getValVT(); |
736 | 36.0k | const MVT LocVT = VA.getLocVT(); |
737 | | |
738 | 36.0k | const LLT LocTy(LocVT); |
739 | 36.0k | const LLT ValTy(ValVT); |
740 | 36.0k | const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy; |
741 | 36.0k | const EVT OrigVT = EVT::getEVT(Args[i].Ty); |
742 | 36.0k | const LLT OrigTy = getLLTForType(*Args[i].Ty, DL); |
743 | | |
744 | | // Expected to be multiple regs for a single incoming arg. |
745 | | // There should be Regs.size() ArgLocs per argument. |
746 | | // This should be the same as getNumRegistersForCallingConv |
747 | 36.0k | const unsigned NumParts = Args[i].Flags.size(); |
748 | | |
749 | | // Now split the registers into the assigned types. |
750 | 36.0k | Args[i].OrigRegs.assign(Args[i].Regs.begin(), Args[i].Regs.end()); |
751 | | |
752 | 36.0k | if (NumParts != 1 || NewLLT != OrigTy) { |
753 | | // If we can't directly assign the register, we need one or more |
754 | | // intermediate values. |
755 | 10.8k | Args[i].Regs.resize(NumParts); |
756 | | |
757 | | // For each split register, create and assign a vreg that will store |
758 | | // the incoming component of the larger value. These will later be |
759 | | // merged to form the final vreg. |
760 | 29.7k | for (unsigned Part = 0; Part < NumParts; ++Part) |
761 | 18.8k | Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT); |
762 | 10.8k | } |
763 | | |
764 | 36.0k | assert((j + (NumParts - 1)) < ArgLocs.size() && |
765 | 36.0k | "Too many regs for number of args"); |
766 | | |
767 | | // Coerce into outgoing value types before register assignment. |
768 | 36.0k | if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy) { |
769 | 5.20k | assert(Args[i].OrigRegs.size() == 1); |
770 | 0 | buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy, |
771 | 5.20k | ValTy, extendOpFromFlags(Args[i].Flags[0])); |
772 | 5.20k | } |
773 | | |
774 | 0 | bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL); |
775 | 80.1k | for (unsigned Part = 0; Part < NumParts; ++Part) { |
776 | 44.0k | Register ArgReg = Args[i].Regs[Part]; |
777 | | // There should be Regs.size() ArgLocs per argument. |
778 | 44.0k | unsigned Idx = BigEndianPartOrdering ? NumParts - 1 - Part : Part; |
779 | 44.0k | CCValAssign &VA = ArgLocs[j + Idx]; |
780 | 44.0k | const ISD::ArgFlagsTy Flags = Args[i].Flags[Part]; |
781 | | |
782 | 44.0k | if (VA.isMemLoc() && !Flags.isByVal()) { |
783 | | // Individual pieces may have been spilled to the stack and others |
784 | | // passed in registers. |
785 | | |
786 | | // TODO: The memory size may be larger than the value we need to |
787 | | // store. We may need to adjust the offset for big endian targets. |
788 | 573 | LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags); |
789 | | |
790 | 573 | MachinePointerInfo MPO; |
791 | 573 | Register StackAddr = Handler.getStackAddress( |
792 | 573 | MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags); |
793 | | |
794 | 573 | Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA); |
795 | 573 | continue; |
796 | 573 | } |
797 | | |
798 | 43.4k | if (VA.isMemLoc() && Flags.isByVal()) { |
799 | 5 | assert(Args[i].Regs.size() == 1 && |
800 | 5 | "didn't expect split byval pointer"); |
801 | | |
802 | 5 | if (Handler.isIncomingArgumentHandler()) { |
803 | | // We just need to copy the frame index value to the pointer. |
804 | 0 | MachinePointerInfo MPO; |
805 | 0 | Register StackAddr = Handler.getStackAddress( |
806 | 0 | Flags.getByValSize(), VA.getLocMemOffset(), MPO, Flags); |
807 | 0 | MIRBuilder.buildCopy(Args[i].Regs[0], StackAddr); |
808 | 5 | } else { |
809 | | // For outgoing byval arguments, insert the implicit copy byval |
810 | | // implies, such that writes in the callee do not modify the caller's |
811 | | // value. |
812 | 5 | uint64_t MemSize = Flags.getByValSize(); |
813 | 5 | int64_t Offset = VA.getLocMemOffset(); |
814 | | |
815 | 5 | MachinePointerInfo DstMPO; |
816 | 5 | Register StackAddr = |
817 | 5 | Handler.getStackAddress(MemSize, Offset, DstMPO, Flags); |
818 | | |
819 | 5 | MachinePointerInfo SrcMPO(Args[i].OrigValue); |
820 | 5 | if (!Args[i].OrigValue) { |
821 | | // We still need to accurately track the stack address space if we |
822 | | // don't know the underlying value. |
823 | 0 | const LLT PtrTy = MRI.getType(StackAddr); |
824 | 0 | SrcMPO = MachinePointerInfo(PtrTy.getAddressSpace()); |
825 | 0 | } |
826 | | |
827 | 5 | Align DstAlign = std::max(Flags.getNonZeroByValAlign(), |
828 | 5 | inferAlignFromPtrInfo(MF, DstMPO)); |
829 | | |
830 | 5 | Align SrcAlign = std::max(Flags.getNonZeroByValAlign(), |
831 | 5 | inferAlignFromPtrInfo(MF, SrcMPO)); |
832 | | |
833 | 5 | Handler.copyArgumentMemory(Args[i], StackAddr, Args[i].Regs[0], |
834 | 5 | DstMPO, DstAlign, SrcMPO, SrcAlign, |
835 | 5 | MemSize, VA); |
836 | 5 | } |
837 | 5 | continue; |
838 | 5 | } |
839 | | |
840 | 43.4k | assert(!VA.needsCustom() && "custom loc should have been handled already"); |
841 | | |
842 | 43.4k | if (i == 0 && !ThisReturnRegs.empty() && |
843 | 43.4k | Handler.isIncomingArgumentHandler() && |
844 | 43.4k | isTypeIsValidForThisReturn(ValVT)) { |
845 | 0 | Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA); |
846 | 0 | continue; |
847 | 0 | } |
848 | | |
849 | 43.4k | if (Handler.isIncomingArgumentHandler()) |
850 | 22.4k | Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); |
851 | 21.0k | else { |
852 | 21.0k | DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() { |
853 | 21.0k | Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); |
854 | 21.0k | }); |
855 | 21.0k | } |
856 | 43.4k | } |
857 | | |
858 | | // Now that all pieces have been assigned, re-pack the register typed values |
859 | | // into the original value typed registers. |
860 | 36.0k | if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) { |
861 | | // Merge the split registers into the expected larger result vregs of |
862 | | // the original call. |
863 | 6.65k | buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy, |
864 | 6.65k | LocTy, Args[i].Flags[0]); |
865 | 6.65k | } |
866 | | |
867 | 36.0k | j += NumParts - 1; |
868 | 36.0k | } |
869 | 31.3k | for (auto &Fn : DelayedOutgoingRegAssignments) |
870 | 21.0k | Fn(); |
871 | | |
872 | 31.3k | return true; |
873 | 31.3k | } |
874 | | |
875 | | void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, |
876 | | ArrayRef<Register> VRegs, Register DemoteReg, |
877 | 0 | int FI) const { |
878 | 0 | MachineFunction &MF = MIRBuilder.getMF(); |
879 | 0 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
880 | 0 | const DataLayout &DL = MF.getDataLayout(); |
881 | |
|
882 | 0 | SmallVector<EVT, 4> SplitVTs; |
883 | 0 | SmallVector<uint64_t, 4> Offsets; |
884 | 0 | ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); |
885 | |
|
886 | 0 | assert(VRegs.size() == SplitVTs.size()); |
887 | | |
888 | 0 | unsigned NumValues = SplitVTs.size(); |
889 | 0 | Align BaseAlign = DL.getPrefTypeAlign(RetTy); |
890 | 0 | Type *RetPtrTy = |
891 | 0 | PointerType::get(RetTy->getContext(), DL.getAllocaAddrSpace()); |
892 | 0 | LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetPtrTy), DL); |
893 | |
|
894 | 0 | MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); |
895 | |
|
896 | 0 | for (unsigned I = 0; I < NumValues; ++I) { |
897 | 0 | Register Addr; |
898 | 0 | MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); |
899 | 0 | auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, |
900 | 0 | MRI.getType(VRegs[I]), |
901 | 0 | commonAlignment(BaseAlign, Offsets[I])); |
902 | 0 | MIRBuilder.buildLoad(VRegs[I], Addr, *MMO); |
903 | 0 | } |
904 | 0 | } |
905 | | |
906 | | void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, |
907 | | ArrayRef<Register> VRegs, |
908 | 9 | Register DemoteReg) const { |
909 | 9 | MachineFunction &MF = MIRBuilder.getMF(); |
910 | 9 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
911 | 9 | const DataLayout &DL = MF.getDataLayout(); |
912 | | |
913 | 9 | SmallVector<EVT, 4> SplitVTs; |
914 | 9 | SmallVector<uint64_t, 4> Offsets; |
915 | 9 | ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); |
916 | | |
917 | 9 | assert(VRegs.size() == SplitVTs.size()); |
918 | | |
919 | 0 | unsigned NumValues = SplitVTs.size(); |
920 | 9 | Align BaseAlign = DL.getPrefTypeAlign(RetTy); |
921 | 9 | unsigned AS = DL.getAllocaAddrSpace(); |
922 | 9 | LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetTy->getPointerTo(AS)), DL); |
923 | | |
924 | 9 | MachinePointerInfo PtrInfo(AS); |
925 | | |
926 | 18 | for (unsigned I = 0; I < NumValues; ++I) { |
927 | 9 | Register Addr; |
928 | 9 | MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); |
929 | 9 | auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, |
930 | 9 | MRI.getType(VRegs[I]), |
931 | 9 | commonAlignment(BaseAlign, Offsets[I])); |
932 | 9 | MIRBuilder.buildStore(VRegs[I], Addr, *MMO); |
933 | 9 | } |
934 | 9 | } |
935 | | |
936 | | void CallLowering::insertSRetIncomingArgument( |
937 | | const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg, |
938 | 9 | MachineRegisterInfo &MRI, const DataLayout &DL) const { |
939 | 9 | unsigned AS = DL.getAllocaAddrSpace(); |
940 | 9 | DemoteReg = MRI.createGenericVirtualRegister( |
941 | 9 | LLT::pointer(AS, DL.getPointerSizeInBits(AS))); |
942 | | |
943 | 9 | Type *PtrTy = PointerType::get(F.getReturnType(), AS); |
944 | | |
945 | 9 | SmallVector<EVT, 1> ValueVTs; |
946 | 9 | ComputeValueVTs(*TLI, DL, PtrTy, ValueVTs); |
947 | | |
948 | | // NOTE: Assume that a pointer won't get split into more than one VT. |
949 | 9 | assert(ValueVTs.size() == 1); |
950 | | |
951 | 0 | ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext()), |
952 | 9 | ArgInfo::NoArgIndex); |
953 | 9 | setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F); |
954 | 9 | DemoteArg.Flags[0].setSRet(); |
955 | 9 | SplitArgs.insert(SplitArgs.begin(), DemoteArg); |
956 | 9 | } |
957 | | |
958 | | void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder, |
959 | | const CallBase &CB, |
960 | 0 | CallLoweringInfo &Info) const { |
961 | 0 | const DataLayout &DL = MIRBuilder.getDataLayout(); |
962 | 0 | Type *RetTy = CB.getType(); |
963 | 0 | unsigned AS = DL.getAllocaAddrSpace(); |
964 | 0 | LLT FramePtrTy = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); |
965 | |
|
966 | 0 | int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject( |
967 | 0 | DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false); |
968 | |
|
969 | 0 | Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0); |
970 | 0 | ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS), |
971 | 0 | ArgInfo::NoArgIndex); |
972 | 0 | setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB); |
973 | 0 | DemoteArg.Flags[0].setSRet(); |
974 | |
|
975 | 0 | Info.OrigArgs.insert(Info.OrigArgs.begin(), DemoteArg); |
976 | 0 | Info.DemoteStackIndex = FI; |
977 | 0 | Info.DemoteRegister = DemoteReg; |
978 | 0 | } |
979 | | |
980 | | bool CallLowering::checkReturn(CCState &CCInfo, |
981 | | SmallVectorImpl<BaseArgInfo> &Outs, |
982 | 16.8k | CCAssignFn *Fn) const { |
983 | 35.6k | for (unsigned I = 0, E = Outs.size(); I < E; ++I) { |
984 | 18.8k | MVT VT = MVT::getVT(Outs[I].Ty); |
985 | 18.8k | if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo)) |
986 | 9 | return false; |
987 | 18.8k | } |
988 | 16.8k | return true; |
989 | 16.8k | } |
990 | | |
991 | | void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy, |
992 | | AttributeList Attrs, |
993 | | SmallVectorImpl<BaseArgInfo> &Outs, |
994 | 16.8k | const DataLayout &DL) const { |
995 | 16.8k | LLVMContext &Context = RetTy->getContext(); |
996 | 16.8k | ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); |
997 | | |
998 | 16.8k | SmallVector<EVT, 4> SplitVTs; |
999 | 16.8k | ComputeValueVTs(*TLI, DL, RetTy, SplitVTs); |
1000 | 16.8k | addArgFlagsFromAttributes(Flags, Attrs, AttributeList::ReturnIndex); |
1001 | | |
1002 | 16.8k | for (EVT VT : SplitVTs) { |
1003 | 14.0k | unsigned NumParts = |
1004 | 14.0k | TLI->getNumRegistersForCallingConv(Context, CallConv, VT); |
1005 | 14.0k | MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CallConv, VT); |
1006 | 14.0k | Type *PartTy = EVT(RegVT).getTypeForEVT(Context); |
1007 | | |
1008 | 33.0k | for (unsigned I = 0; I < NumParts; ++I) { |
1009 | 19.0k | Outs.emplace_back(PartTy, Flags); |
1010 | 19.0k | } |
1011 | 14.0k | } |
1012 | 16.8k | } |
1013 | | |
1014 | 14.8k | bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const { |
1015 | 14.8k | const auto &F = MF.getFunction(); |
1016 | 14.8k | Type *ReturnType = F.getReturnType(); |
1017 | 14.8k | CallingConv::ID CallConv = F.getCallingConv(); |
1018 | | |
1019 | 14.8k | SmallVector<BaseArgInfo, 4> SplitArgs; |
1020 | 14.8k | getReturnInfo(CallConv, ReturnType, F.getAttributes(), SplitArgs, |
1021 | 14.8k | MF.getDataLayout()); |
1022 | 14.8k | return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg()); |
1023 | 14.8k | } |
1024 | | |
1025 | | bool CallLowering::parametersInCSRMatch( |
1026 | | const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, |
1027 | | const SmallVectorImpl<CCValAssign> &OutLocs, |
1028 | 69 | const SmallVectorImpl<ArgInfo> &OutArgs) const { |
1029 | 184 | for (unsigned i = 0; i < OutLocs.size(); ++i) { |
1030 | 115 | const auto &ArgLoc = OutLocs[i]; |
1031 | | // If it's not a register, it's fine. |
1032 | 115 | if (!ArgLoc.isRegLoc()) |
1033 | 0 | continue; |
1034 | | |
1035 | 115 | MCRegister PhysReg = ArgLoc.getLocReg(); |
1036 | | |
1037 | | // Only look at callee-saved registers. |
1038 | 115 | if (MachineOperand::clobbersPhysReg(CallerPreservedMask, PhysReg)) |
1039 | 115 | continue; |
1040 | | |
1041 | 0 | LLVM_DEBUG( |
1042 | 0 | dbgs() |
1043 | 0 | << "... Call has an argument passed in a callee-saved register.\n"); |
1044 | | |
1045 | | // Check if it was copied from. |
1046 | 0 | const ArgInfo &OutInfo = OutArgs[i]; |
1047 | |
|
1048 | 0 | if (OutInfo.Regs.size() > 1) { |
1049 | 0 | LLVM_DEBUG( |
1050 | 0 | dbgs() << "... Cannot handle arguments in multiple registers.\n"); |
1051 | 0 | return false; |
1052 | 0 | } |
1053 | | |
1054 | | // Check if we copy the register, walking through copies from virtual |
1055 | | // registers. Note that getDefIgnoringCopies does not ignore copies from |
1056 | | // physical registers. |
1057 | 0 | MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI); |
1058 | 0 | if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { |
1059 | 0 | LLVM_DEBUG( |
1060 | 0 | dbgs() |
1061 | 0 | << "... Parameter was not copied into a VReg, cannot tail call.\n"); |
1062 | 0 | return false; |
1063 | 0 | } |
1064 | | |
1065 | | // Got a copy. Verify that it's the same as the register we want. |
1066 | 0 | Register CopyRHS = RegDef->getOperand(1).getReg(); |
1067 | 0 | if (CopyRHS != PhysReg) { |
1068 | 0 | LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into " |
1069 | 0 | "VReg, cannot tail call.\n"); |
1070 | 0 | return false; |
1071 | 0 | } |
1072 | 0 | } |
1073 | | |
1074 | 69 | return true; |
1075 | 69 | } |
1076 | | |
1077 | | bool CallLowering::resultsCompatible(CallLoweringInfo &Info, |
1078 | | MachineFunction &MF, |
1079 | | SmallVectorImpl<ArgInfo> &InArgs, |
1080 | | ValueAssigner &CalleeAssigner, |
1081 | 0 | ValueAssigner &CallerAssigner) const { |
1082 | 0 | const Function &F = MF.getFunction(); |
1083 | 0 | CallingConv::ID CalleeCC = Info.CallConv; |
1084 | 0 | CallingConv::ID CallerCC = F.getCallingConv(); |
1085 | |
|
1086 | 0 | if (CallerCC == CalleeCC) |
1087 | 0 | return true; |
1088 | | |
1089 | 0 | SmallVector<CCValAssign, 16> ArgLocs1; |
1090 | 0 | CCState CCInfo1(CalleeCC, Info.IsVarArg, MF, ArgLocs1, F.getContext()); |
1091 | 0 | if (!determineAssignments(CalleeAssigner, InArgs, CCInfo1)) |
1092 | 0 | return false; |
1093 | | |
1094 | 0 | SmallVector<CCValAssign, 16> ArgLocs2; |
1095 | 0 | CCState CCInfo2(CallerCC, F.isVarArg(), MF, ArgLocs2, F.getContext()); |
1096 | 0 | if (!determineAssignments(CallerAssigner, InArgs, CCInfo2)) |
1097 | 0 | return false; |
1098 | | |
1099 | | // We need the argument locations to match up exactly. If there's more in |
1100 | | // one than the other, then we are done. |
1101 | 0 | if (ArgLocs1.size() != ArgLocs2.size()) |
1102 | 0 | return false; |
1103 | | |
1104 | | // Make sure that each location is passed in exactly the same way. |
1105 | 0 | for (unsigned i = 0, e = ArgLocs1.size(); i < e; ++i) { |
1106 | 0 | const CCValAssign &Loc1 = ArgLocs1[i]; |
1107 | 0 | const CCValAssign &Loc2 = ArgLocs2[i]; |
1108 | | |
1109 | | // We need both of them to be the same. So if one is a register and one |
1110 | | // isn't, we're done. |
1111 | 0 | if (Loc1.isRegLoc() != Loc2.isRegLoc()) |
1112 | 0 | return false; |
1113 | | |
1114 | 0 | if (Loc1.isRegLoc()) { |
1115 | | // If they don't have the same register location, we're done. |
1116 | 0 | if (Loc1.getLocReg() != Loc2.getLocReg()) |
1117 | 0 | return false; |
1118 | | |
1119 | | // They matched, so we can move to the next ArgLoc. |
1120 | 0 | continue; |
1121 | 0 | } |
1122 | | |
1123 | | // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match. |
1124 | 0 | if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset()) |
1125 | 0 | return false; |
1126 | 0 | } |
1127 | | |
1128 | 0 | return true; |
1129 | 0 | } |
1130 | | |
1131 | | LLT CallLowering::ValueHandler::getStackValueStoreType( |
1132 | 56 | const DataLayout &DL, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { |
1133 | 56 | const MVT ValVT = VA.getValVT(); |
1134 | 56 | if (ValVT != MVT::iPTR) { |
1135 | 56 | LLT ValTy(ValVT); |
1136 | | |
1137 | | // We lost the pointeriness going through CCValAssign, so try to restore it |
1138 | | // based on the flags. |
1139 | 56 | if (Flags.isPointer()) { |
1140 | 56 | LLT PtrTy = LLT::pointer(Flags.getPointerAddrSpace(), |
1141 | 56 | ValTy.getScalarSizeInBits()); |
1142 | 56 | if (ValVT.isVector()) |
1143 | 0 | return LLT::vector(ValTy.getElementCount(), PtrTy); |
1144 | 56 | return PtrTy; |
1145 | 56 | } |
1146 | | |
1147 | 0 | return ValTy; |
1148 | 56 | } |
1149 | | |
1150 | 0 | unsigned AddrSpace = Flags.getPointerAddrSpace(); |
1151 | 0 | return LLT::pointer(AddrSpace, DL.getPointerSize(AddrSpace)); |
1152 | 56 | } |
1153 | | |
1154 | | void CallLowering::ValueHandler::copyArgumentMemory( |
1155 | | const ArgInfo &Arg, Register DstPtr, Register SrcPtr, |
1156 | | const MachinePointerInfo &DstPtrInfo, Align DstAlign, |
1157 | | const MachinePointerInfo &SrcPtrInfo, Align SrcAlign, uint64_t MemSize, |
1158 | 5 | CCValAssign &VA) const { |
1159 | 5 | MachineFunction &MF = MIRBuilder.getMF(); |
1160 | 5 | MachineMemOperand *SrcMMO = MF.getMachineMemOperand( |
1161 | 5 | SrcPtrInfo, |
1162 | 5 | MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable, MemSize, |
1163 | 5 | SrcAlign); |
1164 | | |
1165 | 5 | MachineMemOperand *DstMMO = MF.getMachineMemOperand( |
1166 | 5 | DstPtrInfo, |
1167 | 5 | MachineMemOperand::MOStore | MachineMemOperand::MODereferenceable, |
1168 | 5 | MemSize, DstAlign); |
1169 | | |
1170 | 5 | const LLT PtrTy = MRI.getType(DstPtr); |
1171 | 5 | const LLT SizeTy = LLT::scalar(PtrTy.getSizeInBits()); |
1172 | | |
1173 | 5 | auto SizeConst = MIRBuilder.buildConstant(SizeTy, MemSize); |
1174 | 5 | MIRBuilder.buildMemCpy(DstPtr, SrcPtr, SizeConst, *DstMMO, *SrcMMO); |
1175 | 5 | } |
1176 | | |
1177 | | Register CallLowering::ValueHandler::extendRegister(Register ValReg, |
1178 | | const CCValAssign &VA, |
1179 | 21.0k | unsigned MaxSizeBits) { |
1180 | 21.0k | LLT LocTy{VA.getLocVT()}; |
1181 | 21.0k | LLT ValTy{VA.getValVT()}; |
1182 | | |
1183 | 21.0k | if (LocTy.getSizeInBits() == ValTy.getSizeInBits()) |
1184 | 20.2k | return ValReg; |
1185 | | |
1186 | 763 | if (LocTy.isScalar() && MaxSizeBits && MaxSizeBits < LocTy.getSizeInBits()) { |
1187 | 0 | if (MaxSizeBits <= ValTy.getSizeInBits()) |
1188 | 0 | return ValReg; |
1189 | 0 | LocTy = LLT::scalar(MaxSizeBits); |
1190 | 0 | } |
1191 | | |
1192 | 763 | const LLT ValRegTy = MRI.getType(ValReg); |
1193 | 763 | if (ValRegTy.isPointer()) { |
1194 | | // The x32 ABI wants to zero extend 32-bit pointers to 64-bit registers, so |
1195 | | // we have to cast to do the extension. |
1196 | 0 | LLT IntPtrTy = LLT::scalar(ValRegTy.getSizeInBits()); |
1197 | 0 | ValReg = MIRBuilder.buildPtrToInt(IntPtrTy, ValReg).getReg(0); |
1198 | 0 | } |
1199 | | |
1200 | 763 | switch (VA.getLocInfo()) { |
1201 | 0 | default: break; |
1202 | 0 | case CCValAssign::Full: |
1203 | 0 | case CCValAssign::BCvt: |
1204 | | // FIXME: bitconverting between vector types may or may not be a |
1205 | | // nop in big-endian situations. |
1206 | 0 | return ValReg; |
1207 | 763 | case CCValAssign::AExt: { |
1208 | 763 | auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg); |
1209 | 763 | return MIB.getReg(0); |
1210 | 0 | } |
1211 | 0 | case CCValAssign::SExt: { |
1212 | 0 | Register NewReg = MRI.createGenericVirtualRegister(LocTy); |
1213 | 0 | MIRBuilder.buildSExt(NewReg, ValReg); |
1214 | 0 | return NewReg; |
1215 | 0 | } |
1216 | 0 | case CCValAssign::ZExt: { |
1217 | 0 | Register NewReg = MRI.createGenericVirtualRegister(LocTy); |
1218 | 0 | MIRBuilder.buildZExt(NewReg, ValReg); |
1219 | 0 | return NewReg; |
1220 | 0 | } |
1221 | 763 | } |
1222 | 0 | llvm_unreachable("unable to extend register"); |
1223 | 0 | } |
1224 | | |
1225 | 0 | void CallLowering::ValueAssigner::anchor() {} |
1226 | | |
1227 | | Register CallLowering::IncomingValueHandler::buildExtensionHint( |
1228 | 0 | const CCValAssign &VA, Register SrcReg, LLT NarrowTy) { |
1229 | 0 | switch (VA.getLocInfo()) { |
1230 | 0 | case CCValAssign::LocInfo::ZExt: { |
1231 | 0 | return MIRBuilder |
1232 | 0 | .buildAssertZExt(MRI.cloneVirtualRegister(SrcReg), SrcReg, |
1233 | 0 | NarrowTy.getScalarSizeInBits()) |
1234 | 0 | .getReg(0); |
1235 | 0 | } |
1236 | 0 | case CCValAssign::LocInfo::SExt: { |
1237 | 0 | return MIRBuilder |
1238 | 0 | .buildAssertSExt(MRI.cloneVirtualRegister(SrcReg), SrcReg, |
1239 | 0 | NarrowTy.getScalarSizeInBits()) |
1240 | 0 | .getReg(0); |
1241 | 0 | break; |
1242 | 0 | } |
1243 | 0 | default: |
1244 | 0 | return SrcReg; |
1245 | 0 | } |
1246 | 0 | } |
1247 | | |
1248 | | /// Check if we can use a basic COPY instruction between the two types. |
1249 | | /// |
1250 | | /// We're currently building on top of the infrastructure using MVT, which loses |
1251 | | /// pointer information in the CCValAssign. We accept copies from physical |
1252 | | /// registers that have been reported as integers if it's to an equivalent sized |
1253 | | /// pointer LLT. |
1254 | 22.4k | static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) { |
1255 | 22.4k | if (SrcTy == DstTy) |
1256 | 20.0k | return true; |
1257 | | |
1258 | 2.46k | if (SrcTy.getSizeInBits() != DstTy.getSizeInBits()) |
1259 | 0 | return false; |
1260 | | |
1261 | 2.46k | SrcTy = SrcTy.getScalarType(); |
1262 | 2.46k | DstTy = DstTy.getScalarType(); |
1263 | | |
1264 | 2.46k | return (SrcTy.isPointer() && DstTy.isScalar()) || |
1265 | 2.46k | (DstTy.isPointer() && SrcTy.isScalar()); |
1266 | 2.46k | } |
1267 | | |
1268 | | void CallLowering::IncomingValueHandler::assignValueToReg( |
1269 | 22.4k | Register ValVReg, Register PhysReg, const CCValAssign &VA) { |
1270 | 22.4k | const MVT LocVT = VA.getLocVT(); |
1271 | 22.4k | const LLT LocTy(LocVT); |
1272 | 22.4k | const LLT RegTy = MRI.getType(ValVReg); |
1273 | | |
1274 | 22.4k | if (isCopyCompatibleType(RegTy, LocTy)) { |
1275 | 22.4k | MIRBuilder.buildCopy(ValVReg, PhysReg); |
1276 | 22.4k | return; |
1277 | 22.4k | } |
1278 | | |
1279 | 0 | auto Copy = MIRBuilder.buildCopy(LocTy, PhysReg); |
1280 | 0 | auto Hint = buildExtensionHint(VA, Copy.getReg(0), RegTy); |
1281 | 0 | MIRBuilder.buildTrunc(ValVReg, Hint); |
1282 | 0 | } |