Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang/lib/Basic/Targets/NVPTX.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements NVPTX TargetInfo objects.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "NVPTX.h"
14
#include "Targets.h"
15
#include "clang/Basic/Builtins.h"
16
#include "clang/Basic/MacroBuilder.h"
17
#include "clang/Basic/TargetBuiltins.h"
18
#include "llvm/ADT/StringSwitch.h"
19
20
using namespace clang;
21
using namespace clang::targets;
22
23
static constexpr Builtin::Info BuiltinInfo[] = {
24
#define BUILTIN(ID, TYPE, ATTRS)                                               \
25
  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
26
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
27
  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
28
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
29
  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
30
#include "clang/Basic/BuiltinsNVPTX.def"
31
};
32
33
const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};
34
35
NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
36
                                 const TargetOptions &Opts,
37
                                 unsigned TargetPointerWidth)
38
0
    : TargetInfo(Triple) {
39
0
  assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) &&
40
0
         "NVPTX only supports 32- and 64-bit modes.");
41
42
0
  PTXVersion = 32;
43
0
  for (const StringRef Feature : Opts.FeaturesAsWritten) {
44
0
    int PTXV;
45
0
    if (!Feature.starts_with("+ptx") ||
46
0
        Feature.drop_front(4).getAsInteger(10, PTXV))
47
0
      continue;
48
0
    PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)?
49
0
  }
50
51
0
  TLSSupported = false;
52
0
  VLASupported = false;
53
0
  AddrSpaceMap = &NVPTXAddrSpaceMap;
54
0
  UseAddrSpaceMapMangling = true;
55
  // __bf16 is always available as a load/store only type.
56
0
  BFloat16Width = BFloat16Align = 16;
57
0
  BFloat16Format = &llvm::APFloat::BFloat();
58
59
  // Define available target features
60
  // These must be defined in sorted order!
61
0
  NoAsmVariants = true;
62
0
  GPU = CudaArch::SM_20;
63
64
0
  if (TargetPointerWidth == 32)
65
0
    resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
66
0
  else if (Opts.NVPTXUseShortPointers)
67
0
    resetDataLayout(
68
0
        "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
69
0
  else
70
0
    resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");
71
72
  // If possible, get a TargetInfo for our host triple, so we can match its
73
  // types.
74
0
  llvm::Triple HostTriple(Opts.HostTriple);
75
0
  if (!HostTriple.isNVPTX())
76
0
    HostTarget = AllocateTarget(llvm::Triple(Opts.HostTriple), Opts);
77
78
  // If no host target, make some guesses about the data layout and return.
79
0
  if (!HostTarget) {
80
0
    LongWidth = LongAlign = TargetPointerWidth;
81
0
    PointerWidth = PointerAlign = TargetPointerWidth;
82
0
    switch (TargetPointerWidth) {
83
0
    case 32:
84
0
      SizeType = TargetInfo::UnsignedInt;
85
0
      PtrDiffType = TargetInfo::SignedInt;
86
0
      IntPtrType = TargetInfo::SignedInt;
87
0
      break;
88
0
    case 64:
89
0
      SizeType = TargetInfo::UnsignedLong;
90
0
      PtrDiffType = TargetInfo::SignedLong;
91
0
      IntPtrType = TargetInfo::SignedLong;
92
0
      break;
93
0
    default:
94
0
      llvm_unreachable("TargetPointerWidth must be 32 or 64");
95
0
    }
96
97
0
    MaxAtomicInlineWidth = TargetPointerWidth;
98
0
    return;
99
0
  }
100
101
  // Copy properties from host target.
102
0
  PointerWidth = HostTarget->getPointerWidth(LangAS::Default);
103
0
  PointerAlign = HostTarget->getPointerAlign(LangAS::Default);
104
0
  BoolWidth = HostTarget->getBoolWidth();
105
0
  BoolAlign = HostTarget->getBoolAlign();
106
0
  IntWidth = HostTarget->getIntWidth();
107
0
  IntAlign = HostTarget->getIntAlign();
108
0
  HalfWidth = HostTarget->getHalfWidth();
109
0
  HalfAlign = HostTarget->getHalfAlign();
110
0
  FloatWidth = HostTarget->getFloatWidth();
111
0
  FloatAlign = HostTarget->getFloatAlign();
112
0
  DoubleWidth = HostTarget->getDoubleWidth();
113
0
  DoubleAlign = HostTarget->getDoubleAlign();
114
0
  LongWidth = HostTarget->getLongWidth();
115
0
  LongAlign = HostTarget->getLongAlign();
116
0
  LongLongWidth = HostTarget->getLongLongWidth();
117
0
  LongLongAlign = HostTarget->getLongLongAlign();
118
0
  MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0);
119
0
  NewAlign = HostTarget->getNewAlign();
120
0
  DefaultAlignForAttributeAligned =
121
0
      HostTarget->getDefaultAlignForAttributeAligned();
122
0
  SizeType = HostTarget->getSizeType();
123
0
  IntMaxType = HostTarget->getIntMaxType();
124
0
  PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default);
125
0
  IntPtrType = HostTarget->getIntPtrType();
126
0
  WCharType = HostTarget->getWCharType();
127
0
  WIntType = HostTarget->getWIntType();
128
0
  Char16Type = HostTarget->getChar16Type();
129
0
  Char32Type = HostTarget->getChar32Type();
130
0
  Int64Type = HostTarget->getInt64Type();
131
0
  SigAtomicType = HostTarget->getSigAtomicType();
132
0
  ProcessIDType = HostTarget->getProcessIDType();
133
134
0
  UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
135
0
  UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();
136
0
  UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
137
0
  ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();
138
139
  // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
140
  // we need those macros to be identical on host and device, because (among
141
  // other things) they affect which standard library classes are defined, and
142
  // we need all classes to be defined on both the host and device.
143
0
  MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();
144
145
  // Properties intentionally not copied from host:
146
  // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
147
  //   host/device boundary.
148
  // - SuitableAlign: Not visible across the host/device boundary, and may
149
  //   correctly be different on host/device, e.g. if host has wider vector
150
  //   types than device.
151
  // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
152
  //   as its double type, but that's not necessarily true on the host.
153
  //   TODO: nvcc emits a warning when using long double on device; we should
154
  //   do the same.
155
0
}
156
157
0
ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
158
0
  return llvm::ArrayRef(GCCRegNames);
159
0
}
160
161
0
bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
162
0
  return llvm::StringSwitch<bool>(Feature)
163
0
      .Cases("ptx", "nvptx", true)
164
0
      .Default(false);
165
0
}
166
167
void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
168
0
                                       MacroBuilder &Builder) const {
169
0
  Builder.defineMacro("__PTX__");
170
0
  Builder.defineMacro("__NVPTX__");
171
0
  if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
172
    // Set __CUDA_ARCH__ for the GPU specified.
173
0
    std::string CUDAArchCode = [this] {
174
0
      switch (GPU) {
175
0
      case CudaArch::GFX600:
176
0
      case CudaArch::GFX601:
177
0
      case CudaArch::GFX602:
178
0
      case CudaArch::GFX700:
179
0
      case CudaArch::GFX701:
180
0
      case CudaArch::GFX702:
181
0
      case CudaArch::GFX703:
182
0
      case CudaArch::GFX704:
183
0
      case CudaArch::GFX705:
184
0
      case CudaArch::GFX801:
185
0
      case CudaArch::GFX802:
186
0
      case CudaArch::GFX803:
187
0
      case CudaArch::GFX805:
188
0
      case CudaArch::GFX810:
189
0
      case CudaArch::GFX900:
190
0
      case CudaArch::GFX902:
191
0
      case CudaArch::GFX904:
192
0
      case CudaArch::GFX906:
193
0
      case CudaArch::GFX908:
194
0
      case CudaArch::GFX909:
195
0
      case CudaArch::GFX90a:
196
0
      case CudaArch::GFX90c:
197
0
      case CudaArch::GFX940:
198
0
      case CudaArch::GFX941:
199
0
      case CudaArch::GFX942:
200
0
      case CudaArch::GFX1010:
201
0
      case CudaArch::GFX1011:
202
0
      case CudaArch::GFX1012:
203
0
      case CudaArch::GFX1013:
204
0
      case CudaArch::GFX1030:
205
0
      case CudaArch::GFX1031:
206
0
      case CudaArch::GFX1032:
207
0
      case CudaArch::GFX1033:
208
0
      case CudaArch::GFX1034:
209
0
      case CudaArch::GFX1035:
210
0
      case CudaArch::GFX1036:
211
0
      case CudaArch::GFX1100:
212
0
      case CudaArch::GFX1101:
213
0
      case CudaArch::GFX1102:
214
0
      case CudaArch::GFX1103:
215
0
      case CudaArch::GFX1150:
216
0
      case CudaArch::GFX1151:
217
0
      case CudaArch::GFX1200:
218
0
      case CudaArch::GFX1201:
219
0
      case CudaArch::Generic:
220
0
      case CudaArch::LAST:
221
0
        break;
222
0
      case CudaArch::UNUSED:
223
0
      case CudaArch::UNKNOWN:
224
0
        assert(false && "No GPU arch when compiling CUDA device code.");
225
0
        return "";
226
0
      case CudaArch::SM_20:
227
0
        return "200";
228
0
      case CudaArch::SM_21:
229
0
        return "210";
230
0
      case CudaArch::SM_30:
231
0
        return "300";
232
0
      case CudaArch::SM_32:
233
0
        return "320";
234
0
      case CudaArch::SM_35:
235
0
        return "350";
236
0
      case CudaArch::SM_37:
237
0
        return "370";
238
0
      case CudaArch::SM_50:
239
0
        return "500";
240
0
      case CudaArch::SM_52:
241
0
        return "520";
242
0
      case CudaArch::SM_53:
243
0
        return "530";
244
0
      case CudaArch::SM_60:
245
0
        return "600";
246
0
      case CudaArch::SM_61:
247
0
        return "610";
248
0
      case CudaArch::SM_62:
249
0
        return "620";
250
0
      case CudaArch::SM_70:
251
0
        return "700";
252
0
      case CudaArch::SM_72:
253
0
        return "720";
254
0
      case CudaArch::SM_75:
255
0
        return "750";
256
0
      case CudaArch::SM_80:
257
0
        return "800";
258
0
      case CudaArch::SM_86:
259
0
        return "860";
260
0
      case CudaArch::SM_87:
261
0
        return "870";
262
0
      case CudaArch::SM_89:
263
0
        return "890";
264
0
      case CudaArch::SM_90:
265
0
      case CudaArch::SM_90a:
266
0
        return "900";
267
0
      }
268
0
      llvm_unreachable("unhandled CudaArch");
269
0
    }();
270
0
    Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
271
0
    if (GPU == CudaArch::SM_90a)
272
0
      Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
273
0
  }
274
0
}
275
276
0
ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {
277
0
  return llvm::ArrayRef(BuiltinInfo,
278
0
                        clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin);
279
0
}