Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang/lib/CodeGen/Targets/NVPTX.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- NVPTX.cpp ----------------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "ABIInfoImpl.h"
10
#include "TargetInfo.h"
11
#include "llvm/IR/IntrinsicsNVPTX.h"
12
13
using namespace clang;
14
using namespace clang::CodeGen;
15
16
//===----------------------------------------------------------------------===//
17
// NVPTX ABI Implementation
18
//===----------------------------------------------------------------------===//
19
20
namespace {
21
22
class NVPTXTargetCodeGenInfo;
23
24
class NVPTXABIInfo : public ABIInfo {
25
  NVPTXTargetCodeGenInfo &CGInfo;
26
27
public:
28
  NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
29
0
      : ABIInfo(CGT), CGInfo(Info) {}
30
31
  ABIArgInfo classifyReturnType(QualType RetTy) const;
32
  ABIArgInfo classifyArgumentType(QualType Ty) const;
33
34
  void computeInfo(CGFunctionInfo &FI) const override;
35
  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
36
                    QualType Ty) const override;
37
  bool isUnsupportedType(QualType T) const;
38
  ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
39
};
40
41
class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
42
public:
43
  NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
44
0
      : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
45
46
  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
47
                           CodeGen::CodeGenModule &M) const override;
48
  bool shouldEmitStaticExternCAliases() const override;
49
50
0
  llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
51
    // On the device side, surface reference is represented as an object handle
52
    // in 64-bit integer.
53
0
    return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
54
0
  }
55
56
0
  llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
57
    // On the device side, texture reference is represented as an object handle
58
    // in 64-bit integer.
59
0
    return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
60
0
  }
61
62
  bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
63
0
                                              LValue Src) const override {
64
0
    emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
65
0
    return true;
66
0
  }
67
68
  bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
69
0
                                              LValue Src) const override {
70
0
    emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
71
0
    return true;
72
0
  }
73
74
  // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
75
  // resulting MDNode to the nvvm.annotations MDNode.
76
  static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
77
                              int Operand);
78
79
private:
80
  static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
81
0
                                           LValue Src) {
82
0
    llvm::Value *Handle = nullptr;
83
0
    llvm::Constant *C =
84
0
        llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
85
    // Lookup `addrspacecast` through the constant pointer if any.
86
0
    if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
87
0
      C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
88
0
    if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
89
      // Load the handle from the specific global variable using
90
      // `nvvm.texsurf.handle.internal` intrinsic.
91
0
      Handle = CGF.EmitRuntimeCall(
92
0
          CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
93
0
                               {GV->getType()}),
94
0
          {GV}, "texsurf_handle");
95
0
    } else
96
0
      Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
97
0
    CGF.EmitStoreOfScalar(Handle, Dst);
98
0
  }
99
};
100
101
/// Checks if the type is unsupported directly by the current target.
102
0
bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
103
0
  ASTContext &Context = getContext();
104
0
  if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
105
0
    return true;
106
0
  if (!Context.getTargetInfo().hasFloat128Type() &&
107
0
      (T->isFloat128Type() ||
108
0
       (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
109
0
    return true;
110
0
  if (const auto *EIT = T->getAs<BitIntType>())
111
0
    return EIT->getNumBits() >
112
0
           (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
113
0
  if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
114
0
      Context.getTypeSize(T) > 64U)
115
0
    return true;
116
0
  if (const auto *AT = T->getAsArrayTypeUnsafe())
117
0
    return isUnsupportedType(AT->getElementType());
118
0
  const auto *RT = T->getAs<RecordType>();
119
0
  if (!RT)
120
0
    return false;
121
0
  const RecordDecl *RD = RT->getDecl();
122
123
  // If this is a C++ record, check the bases first.
124
0
  if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
125
0
    for (const CXXBaseSpecifier &I : CXXRD->bases())
126
0
      if (isUnsupportedType(I.getType()))
127
0
        return true;
128
129
0
  for (const FieldDecl *I : RD->fields())
130
0
    if (isUnsupportedType(I->getType()))
131
0
      return true;
132
0
  return false;
133
0
}
134
135
/// Coerce the given type into an array with maximum allowed size of elements.
136
ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
137
0
                                                   unsigned MaxSize) const {
138
  // Alignment and Size are measured in bits.
139
0
  const uint64_t Size = getContext().getTypeSize(Ty);
140
0
  const uint64_t Alignment = getContext().getTypeAlign(Ty);
141
0
  const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
142
0
  llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
143
0
  const uint64_t NumElements = (Size + Div - 1) / Div;
144
0
  return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
145
0
}
146
147
0
ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
148
0
  if (RetTy->isVoidType())
149
0
    return ABIArgInfo::getIgnore();
150
151
0
  if (getContext().getLangOpts().OpenMP &&
152
0
      getContext().getLangOpts().OpenMPIsTargetDevice &&
153
0
      isUnsupportedType(RetTy))
154
0
    return coerceToIntArrayWithLimit(RetTy, 64);
155
156
  // note: this is different from default ABI
157
0
  if (!RetTy->isScalarType())
158
0
    return ABIArgInfo::getDirect();
159
160
  // Treat an enum type as its underlying type.
161
0
  if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
162
0
    RetTy = EnumTy->getDecl()->getIntegerType();
163
164
0
  return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
165
0
                                               : ABIArgInfo::getDirect());
166
0
}
167
168
0
ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
169
  // Treat an enum type as its underlying type.
170
0
  if (const EnumType *EnumTy = Ty->getAs<EnumType>())
171
0
    Ty = EnumTy->getDecl()->getIntegerType();
172
173
  // Return aggregates type as indirect by value
174
0
  if (isAggregateTypeForABI(Ty)) {
175
    // Under CUDA device compilation, tex/surf builtin types are replaced with
176
    // object types and passed directly.
177
0
    if (getContext().getLangOpts().CUDAIsDevice) {
178
0
      if (Ty->isCUDADeviceBuiltinSurfaceType())
179
0
        return ABIArgInfo::getDirect(
180
0
            CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
181
0
      if (Ty->isCUDADeviceBuiltinTextureType())
182
0
        return ABIArgInfo::getDirect(
183
0
            CGInfo.getCUDADeviceBuiltinTextureDeviceType());
184
0
    }
185
0
    return getNaturalAlignIndirect(Ty, /* byval */ true);
186
0
  }
187
188
0
  if (const auto *EIT = Ty->getAs<BitIntType>()) {
189
0
    if ((EIT->getNumBits() > 128) ||
190
0
        (!getContext().getTargetInfo().hasInt128Type() &&
191
0
         EIT->getNumBits() > 64))
192
0
      return getNaturalAlignIndirect(Ty, /* byval */ true);
193
0
  }
194
195
0
  return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
196
0
                                            : ABIArgInfo::getDirect());
197
0
}
198
199
0
void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
200
0
  if (!getCXXABI().classifyReturnType(FI))
201
0
    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
202
0
  for (auto &I : FI.arguments())
203
0
    I.info = classifyArgumentType(I.type);
204
205
  // Always honor user-specified calling convention.
206
0
  if (FI.getCallingConvention() != llvm::CallingConv::C)
207
0
    return;
208
209
0
  FI.setEffectiveCallingConvention(getRuntimeCC());
210
0
}
211
212
Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
213
0
                                QualType Ty) const {
214
0
  llvm_unreachable("NVPTX does not support varargs");
215
0
}
216
217
void NVPTXTargetCodeGenInfo::setTargetAttributes(
218
0
    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
219
0
  if (GV->isDeclaration())
220
0
    return;
221
0
  const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
222
0
  if (VD) {
223
0
    if (M.getLangOpts().CUDA) {
224
0
      if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
225
0
        addNVVMMetadata(GV, "surface", 1);
226
0
      else if (VD->getType()->isCUDADeviceBuiltinTextureType())
227
0
        addNVVMMetadata(GV, "texture", 1);
228
0
      return;
229
0
    }
230
0
  }
231
232
0
  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
233
0
  if (!FD) return;
234
235
0
  llvm::Function *F = cast<llvm::Function>(GV);
236
237
  // Perform special handling in OpenCL mode
238
0
  if (M.getLangOpts().OpenCL) {
239
    // Use OpenCL function attributes to check for kernel functions
240
    // By default, all functions are device functions
241
0
    if (FD->hasAttr<OpenCLKernelAttr>()) {
242
      // OpenCL __kernel functions get kernel metadata
243
      // Create !{<func-ref>, metadata !"kernel", i32 1} node
244
0
      addNVVMMetadata(F, "kernel", 1);
245
      // And kernel functions are not subject to inlining
246
0
      F->addFnAttr(llvm::Attribute::NoInline);
247
0
    }
248
0
  }
249
250
  // Perform special handling in CUDA mode.
251
0
  if (M.getLangOpts().CUDA) {
252
    // CUDA __global__ functions get a kernel metadata entry.  Since
253
    // __global__ functions cannot be called from the device, we do not
254
    // need to set the noinline attribute.
255
0
    if (FD->hasAttr<CUDAGlobalAttr>()) {
256
      // Create !{<func-ref>, metadata !"kernel", i32 1} node
257
0
      addNVVMMetadata(F, "kernel", 1);
258
0
    }
259
0
    if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
260
0
      M.handleCUDALaunchBoundsAttr(F, Attr);
261
0
  }
262
263
  // Attach kernel metadata directly if compiling for NVPTX.
264
0
  if (FD->hasAttr<NVPTXKernelAttr>()) {
265
0
    addNVVMMetadata(F, "kernel", 1);
266
0
  }
267
0
}
268
269
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
270
0
                                             StringRef Name, int Operand) {
271
0
  llvm::Module *M = GV->getParent();
272
0
  llvm::LLVMContext &Ctx = M->getContext();
273
274
  // Get "nvvm.annotations" metadata node
275
0
  llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
276
277
0
  llvm::Metadata *MDVals[] = {
278
0
      llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
279
0
      llvm::ConstantAsMetadata::get(
280
0
          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
281
  // Append metadata to nvvm.annotations
282
0
  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
283
0
}
284
285
0
bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
286
0
  return false;
287
0
}
288
}
289
290
void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
291
                                               const CUDALaunchBoundsAttr *Attr,
292
                                               int32_t *MaxThreadsVal,
293
                                               int32_t *MinBlocksVal,
294
0
                                               int32_t *MaxClusterRankVal) {
295
  // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
296
0
  llvm::APSInt MaxThreads(32);
297
0
  MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
298
0
  if (MaxThreads > 0) {
299
0
    if (MaxThreadsVal)
300
0
      *MaxThreadsVal = MaxThreads.getExtValue();
301
0
    if (F) {
302
      // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
303
0
      NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
304
0
                                              MaxThreads.getExtValue());
305
0
    }
306
0
  }
307
308
  // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
309
  // was not specified in __launch_bounds__ or if the user specified a 0 value,
310
  // we don't have to add a PTX directive.
311
0
  if (Attr->getMinBlocks()) {
312
0
    llvm::APSInt MinBlocks(32);
313
0
    MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
314
0
    if (MinBlocks > 0) {
315
0
      if (MinBlocksVal)
316
0
        *MinBlocksVal = MinBlocks.getExtValue();
317
0
      if (F) {
318
        // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
319
0
        NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
320
0
                                                MinBlocks.getExtValue());
321
0
      }
322
0
    }
323
0
  }
324
0
  if (Attr->getMaxBlocks()) {
325
0
    llvm::APSInt MaxBlocks(32);
326
0
    MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
327
0
    if (MaxBlocks > 0) {
328
0
      if (MaxClusterRankVal)
329
0
        *MaxClusterRankVal = MaxBlocks.getExtValue();
330
0
      if (F) {
331
        // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
332
0
        NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
333
0
                                                MaxBlocks.getExtValue());
334
0
      }
335
0
    }
336
0
  }
337
0
}
338
339
std::unique_ptr<TargetCodeGenInfo>
340
0
CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
341
0
  return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
342
0
}