/src/llvm-project/clang/lib/CodeGen/Targets/NVPTX.cpp

Source (jump to first uncovered line)
//===- NVPTX.cpp ----------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "llvm/IR/IntrinsicsNVPTX.h"

using namespace clang;
using namespace clang::CodeGen;

//===----------------------------------------------------------------------===//
// NVPTX ABI Implementation
//===----------------------------------------------------------------------===//

namespace {

class NVPTXTargetCodeGenInfo;

class NVPTXABIInfo : public ABIInfo {
  NVPTXTargetCodeGenInfo &CGInfo;

public:
  NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
      : ABIInfo(CGT), CGInfo(Info) {}

  ABIArgInfo classifyReturnType(QualType RetTy) const;
  ABIArgInfo classifyArgumentType(QualType Ty) const;

  void computeInfo(CGFunctionInfo &FI) const override;
  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                    QualType Ty) const override;
  bool isUnsupportedType(QualType T) const;
  ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
};

class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
public:
  NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
      : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}

  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                           CodeGen::CodeGenModule &M) const override;
  bool shouldEmitStaticExternCAliases() const override;

  llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
    // On the device side, surface reference is represented as an object handle
    // in 64-bit integer.
    return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
  }

  llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
    // On the device side, texture reference is represented as an object handle
    // in 64-bit integer.
    return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
  }

  bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
                                              LValue Src) const override {
    emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
    return true;
  }

  bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
                                              LValue Src) const override {
    emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
    return true;
  }

  // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
  // resulting MDNode to the nvvm.annotations MDNode.
  static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
                              int Operand);

private:
  static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
                                           LValue Src) {
    llvm::Value *Handle = nullptr;
    llvm::Constant *C =
        llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
    // Lookup `addrspacecast` through the constant pointer if any.
    if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
      C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
    if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
      // Load the handle from the specific global variable using
      // `nvvm.texsurf.handle.internal` intrinsic.
      Handle = CGF.EmitRuntimeCall(
          CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
                               {GV->getType()}),
          {GV}, "texsurf_handle");
    } else
      Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
    CGF.EmitStoreOfScalar(Handle, Dst);
  }
};

/// Checks if the type is unsupported directly by the current target.
bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
  ASTContext &Context = getContext();
  if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
    return true;
  if (!Context.getTargetInfo().hasFloat128Type() &&
      (T->isFloat128Type() ||
       (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
    return true;
  if (const auto *EIT = T->getAs<BitIntType>())
    return EIT->getNumBits() >
           (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
  if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
      Context.getTypeSize(T) > 64U)
    return true;
  if (const auto *AT = T->getAsArrayTypeUnsafe())
    return isUnsupportedType(AT->getElementType());
  const auto *RT = T->getAs<RecordType>();
  if (!RT)
    return false;
  const RecordDecl *RD = RT->getDecl();

  // If this is a C++ record, check the bases first.
  if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
    for (const CXXBaseSpecifier &I : CXXRD->bases())
      if (isUnsupportedType(I.getType()))
        return true;

  for (const FieldDecl *I : RD->fields())
    if (isUnsupportedType(I->getType()))
      return true;
  return false;
}

/// Coerce the given type into an array with maximum allowed size of elements.
ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
                                                   unsigned MaxSize) const {
  // Alignment and Size are measured in bits.
  const uint64_t Size = getContext().getTypeSize(Ty);
  const uint64_t Alignment = getContext().getTypeAlign(Ty);
  const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
  llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
  const uint64_t NumElements = (Size + Div - 1) / Div;
  return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
}

ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
  if (RetTy->isVoidType())
    return ABIArgInfo::getIgnore();

  if (getContext().getLangOpts().OpenMP &&
      getContext().getLangOpts().OpenMPIsTargetDevice &&
      isUnsupportedType(RetTy))
    return coerceToIntArrayWithLimit(RetTy, 64);

  // note: this is different from default ABI
  if (!RetTy->isScalarType())
    return ABIArgInfo::getDirect();

  // Treat an enum type as its underlying type.
  if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
    RetTy = EnumTy->getDecl()->getIntegerType();

  return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
                                               : ABIArgInfo::getDirect());
}

ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
  // Treat an enum type as its underlying type.
  if (const EnumType *EnumTy = Ty->getAs<EnumType>())
    Ty = EnumTy->getDecl()->getIntegerType();

  // Return aggregates type as indirect by value
  if (isAggregateTypeForABI(Ty)) {
    // Under CUDA device compilation, tex/surf builtin types are replaced with
    // object types and passed directly.
    if (getContext().getLangOpts().CUDAIsDevice) {
      if (Ty->isCUDADeviceBuiltinSurfaceType())
        return ABIArgInfo::getDirect(
            CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
      if (Ty->isCUDADeviceBuiltinTextureType())
        return ABIArgInfo::getDirect(
            CGInfo.getCUDADeviceBuiltinTextureDeviceType());
    }
    return getNaturalAlignIndirect(Ty, /* byval */ true);
  }

  if (const auto *EIT = Ty->getAs<BitIntType>()) {
    if ((EIT->getNumBits() > 128) ||
        (!getContext().getTargetInfo().hasInt128Type() &&
         EIT->getNumBits() > 64))
      return getNaturalAlignIndirect(Ty, /* byval */ true);
  }

  return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
                                            : ABIArgInfo::getDirect());
}

void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
  if (!getCXXABI().classifyReturnType(FI))
    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
  for (auto &I : FI.arguments())
    I.info = classifyArgumentType(I.type);

  // Always honor user-specified calling convention.
  if (FI.getCallingConvention() != llvm::CallingConv::C)
    return;

  FI.setEffectiveCallingConvention(getRuntimeCC());
}

Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                QualType Ty) const {
  llvm_unreachable("NVPTX does not support varargs");
}

void NVPTXTargetCodeGenInfo::setTargetAttributes(
    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
  if (GV->isDeclaration())
    return;
  const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
  if (VD) {
    if (M.getLangOpts().CUDA) {
      if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
        addNVVMMetadata(GV, "surface", 1);
      else if (VD->getType()->isCUDADeviceBuiltinTextureType())
        addNVVMMetadata(GV, "texture", 1);
      return;
    }
  }

  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
  if (!FD) return;

  llvm::Function *F = cast<llvm::Function>(GV);

  // Perform special handling in OpenCL mode
  if (M.getLangOpts().OpenCL) {
    // Use OpenCL function attributes to check for kernel functions
    // By default, all functions are device functions
    if (FD->hasAttr<OpenCLKernelAttr>()) {
      // OpenCL __kernel functions get kernel metadata
      // Create !{<func-ref>, metadata !"kernel", i32 1} node
      addNVVMMetadata(F, "kernel", 1);
      // And kernel functions are not subject to inlining
      F->addFnAttr(llvm::Attribute::NoInline);
    }
  }

  // Perform special handling in CUDA mode.
  if (M.getLangOpts().CUDA) {
    // CUDA __global__ functions get a kernel metadata entry.  Since
    // __global__ functions cannot be called from the device, we do not
    // need to set the noinline attribute.
    if (FD->hasAttr<CUDAGlobalAttr>()) {
      // Create !{<func-ref>, metadata !"kernel", i32 1} node
      addNVVMMetadata(F, "kernel", 1);
    }
    if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
      M.handleCUDALaunchBoundsAttr(F, Attr);
  }

  // Attach kernel metadata directly if compiling for NVPTX.
  if (FD->hasAttr<NVPTXKernelAttr>()) {
    addNVVMMetadata(F, "kernel", 1);
  }
}

void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
                                             StringRef Name, int Operand) {
  llvm::Module *M = GV->getParent();
  llvm::LLVMContext &Ctx = M->getContext();

  // Get "nvvm.annotations" metadata node
  llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");

  llvm::Metadata *MDVals[] = {
      llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
      llvm::ConstantAsMetadata::get(
          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
  // Append metadata to nvvm.annotations
  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}

bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
  return false;
}
}

void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
                                               const CUDALaunchBoundsAttr *Attr,
                                               int32_t *MaxThreadsVal,
                                               int32_t *MinBlocksVal,
                                               int32_t *MaxClusterRankVal) {
  // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
  llvm::APSInt MaxThreads(32);
  MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
  if (MaxThreads > 0) {
    if (MaxThreadsVal)
      *MaxThreadsVal = MaxThreads.getExtValue();
    if (F) {
      // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
      NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
                                              MaxThreads.getExtValue());
    }
  }

  // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
  // was not specified in __launch_bounds__ or if the user specified a 0 value,
  // we don't have to add a PTX directive.
  if (Attr->getMinBlocks()) {
    llvm::APSInt MinBlocks(32);
    MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
    if (MinBlocks > 0) {
      if (MinBlocksVal)
        *MinBlocksVal = MinBlocks.getExtValue();
      if (F) {
        // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
        NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
                                                MinBlocks.getExtValue());
      }
    }
  }
  if (Attr->getMaxBlocks()) {
    llvm::APSInt MaxBlocks(32);
    MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
    if (MaxBlocks > 0) {
      if (MaxClusterRankVal)
        *MaxClusterRankVal = MaxBlocks.getExtValue();
      if (F) {
        // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
        NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
                                                MaxBlocks.getExtValue());
      }
    }
  }
}

std::unique_ptr<TargetCodeGenInfo>
CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
  return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
}

Coverage Report

Created: 2024-01-17 10:31

Line	Count	Source (jump to first uncovered line)
1		//===- NVPTX.cpp ----------------------------------------------------------===//
2		//
3		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4		// See https://llvm.org/LICENSE.txt for license information.
5		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6		//
7		//===----------------------------------------------------------------------===//
8
9		#include "ABIInfoImpl.h"
10		#include "TargetInfo.h"
11		#include "llvm/IR/IntrinsicsNVPTX.h"
12
13		using namespace clang;
14		using namespace clang::CodeGen;
15
16		//===----------------------------------------------------------------------===//
17		// NVPTX ABI Implementation
18		//===----------------------------------------------------------------------===//
19
20		namespace {
21
22		class NVPTXTargetCodeGenInfo;
23
24		class NVPTXABIInfo : public ABIInfo {
25		NVPTXTargetCodeGenInfo &CGInfo;
26
27		public:
28		NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
29	0	: ABIInfo(CGT), CGInfo(Info) {}
30
31		ABIArgInfo classifyReturnType(QualType RetTy) const;
32		ABIArgInfo classifyArgumentType(QualType Ty) const;
33
34		void computeInfo(CGFunctionInfo &FI) const override;
35		Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
36		QualType Ty) const override;
37		bool isUnsupportedType(QualType T) const;
38		ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
39		};
40
41		class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
42		public:
43		NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
44	0	: TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
45
46		void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
47		CodeGen::CodeGenModule &M) const override;
48		bool shouldEmitStaticExternCAliases() const override;
49
50	0	llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
51		// On the device side, surface reference is represented as an object handle
52		// in 64-bit integer.
53	0	return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
54	0	}
55
56	0	llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
57		// On the device side, texture reference is represented as an object handle
58		// in 64-bit integer.
59	0	return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
60	0	}
61
62		bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
63	0	LValue Src) const override {
64	0	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
65	0	return true;
66	0	}
67
68		bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
69	0	LValue Src) const override {
70	0	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
71	0	return true;
72	0	}
73
74		// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
75		// resulting MDNode to the nvvm.annotations MDNode.
76		static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
77		int Operand);
78
79		private:
80		static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
81	0	LValue Src) {
82	0	llvm::Value *Handle = nullptr;
83	0	llvm::Constant *C =
84	0	llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
85		// Lookup `addrspacecast` through the constant pointer if any.
86	0	if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
87	0	C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
88	0	if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
89		// Load the handle from the specific global variable using
90		// `nvvm.texsurf.handle.internal` intrinsic.
91	0	Handle = CGF.EmitRuntimeCall(
92	0	CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
93	0	{GV->getType()}),
94	0	{GV}, "texsurf_handle");
95	0	} else
96	0	Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
97	0	CGF.EmitStoreOfScalar(Handle, Dst);
98	0	}
99		};
100
101		/// Checks if the type is unsupported directly by the current target.
102	0	bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
103	0	ASTContext &Context = getContext();
104	0	if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
105	0	return true;
106	0	if (!Context.getTargetInfo().hasFloat128Type() &&
107	0	(T->isFloat128Type() \|\|
108	0	(T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
109	0	return true;
110	0	if (const auto *EIT = T->getAs<BitIntType>())
111	0	return EIT->getNumBits() >
112	0	(Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
113	0	if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
114	0	Context.getTypeSize(T) > 64U)
115	0	return true;
116	0	if (const auto *AT = T->getAsArrayTypeUnsafe())
117	0	return isUnsupportedType(AT->getElementType());
118	0	const auto *RT = T->getAs<RecordType>();
119	0	if (!RT)
120	0	return false;
121	0	const RecordDecl *RD = RT->getDecl();
122
123		// If this is a C++ record, check the bases first.
124	0	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
125	0	for (const CXXBaseSpecifier &I : CXXRD->bases())
126	0	if (isUnsupportedType(I.getType()))
127	0	return true;
128
129	0	for (const FieldDecl *I : RD->fields())
130	0	if (isUnsupportedType(I->getType()))
131	0	return true;
132	0	return false;
133	0	}
134
135		/// Coerce the given type into an array with maximum allowed size of elements.
136		ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
137	0	unsigned MaxSize) const {
138		// Alignment and Size are measured in bits.
139	0	const uint64_t Size = getContext().getTypeSize(Ty);
140	0	const uint64_t Alignment = getContext().getTypeAlign(Ty);
141	0	const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
142	0	llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
143	0	const uint64_t NumElements = (Size + Div - 1) / Div;
144	0	return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
145	0	}
146
147	0	ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
148	0	if (RetTy->isVoidType())
149	0	return ABIArgInfo::getIgnore();
150
151	0	if (getContext().getLangOpts().OpenMP &&
152	0	getContext().getLangOpts().OpenMPIsTargetDevice &&
153	0	isUnsupportedType(RetTy))
154	0	return coerceToIntArrayWithLimit(RetTy, 64);
155
156		// note: this is different from default ABI
157	0	if (!RetTy->isScalarType())
158	0	return ABIArgInfo::getDirect();
159
160		// Treat an enum type as its underlying type.
161	0	if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
162	0	RetTy = EnumTy->getDecl()->getIntegerType();
163
164	0	return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
165	0	: ABIArgInfo::getDirect());
166	0	}
167
168	0	ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
169		// Treat an enum type as its underlying type.
170	0	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
171	0	Ty = EnumTy->getDecl()->getIntegerType();
172
173		// Return aggregates type as indirect by value
174	0	if (isAggregateTypeForABI(Ty)) {
175		// Under CUDA device compilation, tex/surf builtin types are replaced with
176		// object types and passed directly.
177	0	if (getContext().getLangOpts().CUDAIsDevice) {
178	0	if (Ty->isCUDADeviceBuiltinSurfaceType())
179	0	return ABIArgInfo::getDirect(
180	0	CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
181	0	if (Ty->isCUDADeviceBuiltinTextureType())
182	0	return ABIArgInfo::getDirect(
183	0	CGInfo.getCUDADeviceBuiltinTextureDeviceType());
184	0	}
185	0	return getNaturalAlignIndirect(Ty, /* byval */ true);
186	0	}
187
188	0	if (const auto *EIT = Ty->getAs<BitIntType>()) {
189	0	if ((EIT->getNumBits() > 128) \|\|
190	0	(!getContext().getTargetInfo().hasInt128Type() &&
191	0	EIT->getNumBits() > 64))
192	0	return getNaturalAlignIndirect(Ty, /* byval */ true);
193	0	}
194
195	0	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
196	0	: ABIArgInfo::getDirect());
197	0	}
198
199	0	void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
200	0	if (!getCXXABI().classifyReturnType(FI))
201	0	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
202	0	for (auto &I : FI.arguments())
203	0	I.info = classifyArgumentType(I.type);
204
205		// Always honor user-specified calling convention.
206	0	if (FI.getCallingConvention() != llvm::CallingConv::C)
207	0	return;
208
209	0	FI.setEffectiveCallingConvention(getRuntimeCC());
210	0	}
211
212		Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
213	0	QualType Ty) const {
214	0	llvm_unreachable("NVPTX does not support varargs");
215	0	}
216
217		void NVPTXTargetCodeGenInfo::setTargetAttributes(
218	0	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {
219	0	if (GV->isDeclaration())
220	0	return;
221	0	const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
222	0	if (VD) {
223	0	if (M.getLangOpts().CUDA) {
224	0	if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
225	0	addNVVMMetadata(GV, "surface", 1);
226	0	else if (VD->getType()->isCUDADeviceBuiltinTextureType())
227	0	addNVVMMetadata(GV, "texture", 1);
228	0	return;
229	0	}
230	0	}
231
232	0	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
233	0	if (!FD) return;
234
235	0	llvm::Function *F = cast<llvm::Function>(GV);
236
237		// Perform special handling in OpenCL mode
238	0	if (M.getLangOpts().OpenCL) {
239		// Use OpenCL function attributes to check for kernel functions
240		// By default, all functions are device functions
241	0	if (FD->hasAttr<OpenCLKernelAttr>()) {
242		// OpenCL __kernel functions get kernel metadata
243		// Create !{<func-ref>, metadata !"kernel", i32 1} node
244	0	addNVVMMetadata(F, "kernel", 1);
245		// And kernel functions are not subject to inlining
246	0	F->addFnAttr(llvm::Attribute::NoInline);
247	0	}
248	0	}
249
250		// Perform special handling in CUDA mode.
251	0	if (M.getLangOpts().CUDA) {
252		// CUDA __global__ functions get a kernel metadata entry. Since
253		// __global__ functions cannot be called from the device, we do not
254		// need to set the noinline attribute.
255	0	if (FD->hasAttr<CUDAGlobalAttr>()) {
256		// Create !{<func-ref>, metadata !"kernel", i32 1} node
257	0	addNVVMMetadata(F, "kernel", 1);
258	0	}
259	0	if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
260	0	M.handleCUDALaunchBoundsAttr(F, Attr);
261	0	}
262
263		// Attach kernel metadata directly if compiling for NVPTX.
264	0	if (FD->hasAttr<NVPTXKernelAttr>()) {
265	0	addNVVMMetadata(F, "kernel", 1);
266	0	}
267	0	}
268
269		void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
270	0	StringRef Name, int Operand) {
271	0	llvm::Module *M = GV->getParent();
272	0	llvm::LLVMContext &Ctx = M->getContext();
273
274		// Get "nvvm.annotations" metadata node
275	0	llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
276
277	0	llvm::Metadata *MDVals[] = {
278	0	llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
279	0	llvm::ConstantAsMetadata::get(
280	0	llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
281		// Append metadata to nvvm.annotations
282	0	MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
283	0	}
284
285	0	bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
286	0	return false;
287	0	}
288		}
289
290		void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
291		const CUDALaunchBoundsAttr *Attr,
292		int32_t *MaxThreadsVal,
293		int32_t *MinBlocksVal,
294	0	int32_t *MaxClusterRankVal) {
295		// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
296	0	llvm::APSInt MaxThreads(32);
297	0	MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
298	0	if (MaxThreads > 0) {
299	0	if (MaxThreadsVal)
300	0	*MaxThreadsVal = MaxThreads.getExtValue();
301	0	if (F) {
302		// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
303	0	NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
304	0	MaxThreads.getExtValue());
305	0	}
306	0	}
307
308		// min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
309		// was not specified in __launch_bounds__ or if the user specified a 0 value,
310		// we don't have to add a PTX directive.
311	0	if (Attr->getMinBlocks()) {
312	0	llvm::APSInt MinBlocks(32);
313	0	MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
314	0	if (MinBlocks > 0) {
315	0	if (MinBlocksVal)
316	0	*MinBlocksVal = MinBlocks.getExtValue();
317	0	if (F) {
318		// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
319	0	NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
320	0	MinBlocks.getExtValue());
321	0	}
322	0	}
323	0	}
324	0	if (Attr->getMaxBlocks()) {
325	0	llvm::APSInt MaxBlocks(32);
326	0	MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
327	0	if (MaxBlocks > 0) {
328	0	if (MaxClusterRankVal)
329	0	*MaxClusterRankVal = MaxBlocks.getExtValue();
330	0	if (F) {
331		// Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
332	0	NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
333	0	MaxBlocks.getExtValue());
334	0	}
335	0	}
336	0	}
337	0	}
338
339		std::unique_ptr<TargetCodeGenInfo>
340	0	CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
341	0	return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
342	0	}