/src/llvm-project/clang/lib/CodeGen/CGGPUBuiltin.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // Generates code for built-in GPU calls which are not runtime-specific. |
10 | | // (Runtime-specific codegen lives in programming model specific files.) |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "CodeGenFunction.h" |
15 | | #include "clang/Basic/Builtins.h" |
16 | | #include "llvm/IR/DataLayout.h" |
17 | | #include "llvm/IR/Instruction.h" |
18 | | #include "llvm/Support/MathExtras.h" |
19 | | #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" |
20 | | |
21 | | using namespace clang; |
22 | | using namespace CodeGen; |
23 | | |
24 | | namespace { |
25 | 0 | llvm::Function *GetVprintfDeclaration(llvm::Module &M) { |
26 | 0 | llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()), |
27 | 0 | llvm::PointerType::getUnqual(M.getContext())}; |
28 | 0 | llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( |
29 | 0 | llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); |
30 | |
|
31 | 0 | if (auto *F = M.getFunction("vprintf")) { |
32 | | // Our CUDA system header declares vprintf with the right signature, so |
33 | | // nobody else should have been able to declare vprintf with a bogus |
34 | | // signature. |
35 | 0 | assert(F->getFunctionType() == VprintfFuncType); |
36 | 0 | return F; |
37 | 0 | } |
38 | | |
39 | | // vprintf doesn't already exist; create a declaration and insert it into the |
40 | | // module. |
41 | 0 | return llvm::Function::Create( |
42 | 0 | VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M); |
43 | 0 | } |
44 | | |
45 | 0 | llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) { |
46 | 0 | const char *Name = "__llvm_omp_vprintf"; |
47 | 0 | llvm::Module &M = CGM.getModule(); |
48 | 0 | llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()), |
49 | 0 | llvm::PointerType::getUnqual(M.getContext()), |
50 | 0 | llvm::Type::getInt32Ty(M.getContext())}; |
51 | 0 | llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( |
52 | 0 | llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); |
53 | |
|
54 | 0 | if (auto *F = M.getFunction(Name)) { |
55 | 0 | if (F->getFunctionType() != VprintfFuncType) { |
56 | 0 | CGM.Error(SourceLocation(), |
57 | 0 | "Invalid type declaration for __llvm_omp_vprintf"); |
58 | 0 | return nullptr; |
59 | 0 | } |
60 | 0 | return F; |
61 | 0 | } |
62 | | |
63 | 0 | return llvm::Function::Create( |
64 | 0 | VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M); |
65 | 0 | } |
66 | | |
67 | | // Transforms a call to printf into a call to the NVPTX vprintf syscall (which |
68 | | // isn't particularly special; it's invoked just like a regular function). |
69 | | // vprintf takes two args: A format string, and a pointer to a buffer containing |
70 | | // the varargs. |
71 | | // |
72 | | // For example, the call |
73 | | // |
74 | | // printf("format string", arg1, arg2, arg3); |
75 | | // |
76 | | // is converted into something resembling |
77 | | // |
78 | | // struct Tmp { |
79 | | // Arg1 a1; |
80 | | // Arg2 a2; |
81 | | // Arg3 a3; |
82 | | // }; |
83 | | // char* buf = alloca(sizeof(Tmp)); |
84 | | // *(Tmp*)buf = {a1, a2, a3}; |
85 | | // vprintf("format string", buf); |
86 | | // |
87 | | // buf is aligned to the max of {alignof(Arg1), ...}. Furthermore, each of the |
88 | | // args is itself aligned to its preferred alignment. |
89 | | // |
90 | | // Note that by the time this function runs, E's args have already undergone the |
91 | | // standard C vararg promotion (short -> int, float -> double, etc.). |
92 | | |
93 | | std::pair<llvm::Value *, llvm::TypeSize> |
94 | 0 | packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) { |
95 | 0 | const llvm::DataLayout &DL = CGF->CGM.getDataLayout(); |
96 | 0 | llvm::LLVMContext &Ctx = CGF->CGM.getLLVMContext(); |
97 | 0 | CGBuilderTy &Builder = CGF->Builder; |
98 | | |
99 | | // Construct and fill the args buffer that we'll pass to vprintf. |
100 | 0 | if (Args.size() <= 1) { |
101 | | // If there are no args, pass a null pointer and size 0 |
102 | 0 | llvm::Value *BufferPtr = |
103 | 0 | llvm::ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)); |
104 | 0 | return {BufferPtr, llvm::TypeSize::getFixed(0)}; |
105 | 0 | } else { |
106 | 0 | llvm::SmallVector<llvm::Type *, 8> ArgTypes; |
107 | 0 | for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) |
108 | 0 | ArgTypes.push_back(Args[I].getRValue(*CGF).getScalarVal()->getType()); |
109 | | |
110 | | // Using llvm::StructType is correct only because printf doesn't accept |
111 | | // aggregates. If we had to handle aggregates here, we'd have to manually |
112 | | // compute the offsets within the alloca -- we wouldn't be able to assume |
113 | | // that the alignment of the llvm type was the same as the alignment of the |
114 | | // clang type. |
115 | 0 | llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args"); |
116 | 0 | llvm::Value *Alloca = CGF->CreateTempAlloca(AllocaTy); |
117 | |
|
118 | 0 | for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) { |
119 | 0 | llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1); |
120 | 0 | llvm::Value *Arg = Args[I].getRValue(*CGF).getScalarVal(); |
121 | 0 | Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType())); |
122 | 0 | } |
123 | 0 | llvm::Value *BufferPtr = |
124 | 0 | Builder.CreatePointerCast(Alloca, llvm::PointerType::getUnqual(Ctx)); |
125 | 0 | return {BufferPtr, DL.getTypeAllocSize(AllocaTy)}; |
126 | 0 | } |
127 | 0 | } |
128 | | |
129 | 0 | bool containsNonScalarVarargs(CodeGenFunction *CGF, const CallArgList &Args) { |
130 | 0 | return llvm::any_of(llvm::drop_begin(Args), [&](const CallArg &A) { |
131 | 0 | return !A.getRValue(*CGF).isScalar(); |
132 | 0 | }); |
133 | 0 | } |
134 | | |
135 | | RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF, |
136 | 0 | llvm::Function *Decl, bool WithSizeArg) { |
137 | 0 | CodeGenModule &CGM = CGF->CGM; |
138 | 0 | CGBuilderTy &Builder = CGF->Builder; |
139 | 0 | assert(E->getBuiltinCallee() == Builtin::BIprintf); |
140 | 0 | assert(E->getNumArgs() >= 1); // printf always has at least one arg. |
141 | | |
142 | | // Uses the same format as nvptx for the argument packing, but also passes |
143 | | // an i32 for the total size of the passed pointer |
144 | 0 | CallArgList Args; |
145 | 0 | CGF->EmitCallArgs(Args, |
146 | 0 | E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), |
147 | 0 | E->arguments(), E->getDirectCallee(), |
148 | 0 | /* ParamsToSkip = */ 0); |
149 | | |
150 | | // We don't know how to emit non-scalar varargs. |
151 | 0 | if (containsNonScalarVarargs(CGF, Args)) { |
152 | 0 | CGM.ErrorUnsupported(E, "non-scalar arg to printf"); |
153 | 0 | return RValue::get(llvm::ConstantInt::get(CGF->IntTy, 0)); |
154 | 0 | } |
155 | | |
156 | 0 | auto r = packArgsIntoNVPTXFormatBuffer(CGF, Args); |
157 | 0 | llvm::Value *BufferPtr = r.first; |
158 | |
|
159 | 0 | llvm::SmallVector<llvm::Value *, 3> Vec = { |
160 | 0 | Args[0].getRValue(*CGF).getScalarVal(), BufferPtr}; |
161 | 0 | if (WithSizeArg) { |
162 | | // Passing > 32bit of data as a local alloca doesn't work for nvptx or |
163 | | // amdgpu |
164 | 0 | llvm::Constant *Size = |
165 | 0 | llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGM.getLLVMContext()), |
166 | 0 | static_cast<uint32_t>(r.second.getFixedValue())); |
167 | |
|
168 | 0 | Vec.push_back(Size); |
169 | 0 | } |
170 | 0 | return RValue::get(Builder.CreateCall(Decl, Vec)); |
171 | 0 | } |
172 | | } // namespace |
173 | | |
174 | 0 | RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) { |
175 | 0 | assert(getTarget().getTriple().isNVPTX()); |
176 | 0 | return EmitDevicePrintfCallExpr( |
177 | 0 | E, this, GetVprintfDeclaration(CGM.getModule()), false); |
178 | 0 | } |
179 | | |
180 | 0 | RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) { |
181 | 0 | assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn); |
182 | 0 | assert(E->getBuiltinCallee() == Builtin::BIprintf || |
183 | 0 | E->getBuiltinCallee() == Builtin::BI__builtin_printf); |
184 | 0 | assert(E->getNumArgs() >= 1); // printf always has at least one arg. |
185 | | |
186 | 0 | CallArgList CallArgs; |
187 | 0 | EmitCallArgs(CallArgs, |
188 | 0 | E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), |
189 | 0 | E->arguments(), E->getDirectCallee(), |
190 | 0 | /* ParamsToSkip = */ 0); |
191 | |
|
192 | 0 | SmallVector<llvm::Value *, 8> Args; |
193 | 0 | for (const auto &A : CallArgs) { |
194 | | // We don't know how to emit non-scalar varargs. |
195 | 0 | if (!A.getRValue(*this).isScalar()) { |
196 | 0 | CGM.ErrorUnsupported(E, "non-scalar arg to printf"); |
197 | 0 | return RValue::get(llvm::ConstantInt::get(IntTy, -1)); |
198 | 0 | } |
199 | | |
200 | 0 | llvm::Value *Arg = A.getRValue(*this).getScalarVal(); |
201 | 0 | Args.push_back(Arg); |
202 | 0 | } |
203 | | |
204 | 0 | llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint()); |
205 | 0 | IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation()); |
206 | |
|
207 | 0 | bool isBuffered = (CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal == |
208 | 0 | clang::TargetOptions::AMDGPUPrintfKind::Buffered); |
209 | 0 | auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args, isBuffered); |
210 | 0 | Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint()); |
211 | 0 | return RValue::get(Printf); |
212 | 0 | } |
213 | | |
214 | 0 | RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) { |
215 | 0 | assert(getTarget().getTriple().isNVPTX() || |
216 | 0 | getTarget().getTriple().isAMDGCN()); |
217 | 0 | return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM), |
218 | 0 | true); |
219 | 0 | } |