/src/llvm-project/clang/lib/Basic/Targets/NVPTX.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements NVPTX TargetInfo objects. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "NVPTX.h" |
14 | | #include "Targets.h" |
15 | | #include "clang/Basic/Builtins.h" |
16 | | #include "clang/Basic/MacroBuilder.h" |
17 | | #include "clang/Basic/TargetBuiltins.h" |
18 | | #include "llvm/ADT/StringSwitch.h" |
19 | | |
20 | | using namespace clang; |
21 | | using namespace clang::targets; |
22 | | |
23 | | static constexpr Builtin::Info BuiltinInfo[] = { |
24 | | #define BUILTIN(ID, TYPE, ATTRS) \ |
25 | | {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, |
26 | | #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ |
27 | | {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES}, |
28 | | #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ |
29 | | {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, |
30 | | #include "clang/Basic/BuiltinsNVPTX.def" |
31 | | }; |
32 | | |
33 | | const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"}; |
34 | | |
35 | | NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, |
36 | | const TargetOptions &Opts, |
37 | | unsigned TargetPointerWidth) |
38 | 0 | : TargetInfo(Triple) { |
39 | 0 | assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && |
40 | 0 | "NVPTX only supports 32- and 64-bit modes."); |
41 | | |
42 | 0 | PTXVersion = 32; |
43 | 0 | for (const StringRef Feature : Opts.FeaturesAsWritten) { |
44 | 0 | int PTXV; |
45 | 0 | if (!Feature.starts_with("+ptx") || |
46 | 0 | Feature.drop_front(4).getAsInteger(10, PTXV)) |
47 | 0 | continue; |
48 | 0 | PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)? |
49 | 0 | } |
50 | |
|
51 | 0 | TLSSupported = false; |
52 | 0 | VLASupported = false; |
53 | 0 | AddrSpaceMap = &NVPTXAddrSpaceMap; |
54 | 0 | UseAddrSpaceMapMangling = true; |
55 | | // __bf16 is always available as a load/store only type. |
56 | 0 | BFloat16Width = BFloat16Align = 16; |
57 | 0 | BFloat16Format = &llvm::APFloat::BFloat(); |
58 | | |
59 | | // Define available target features |
60 | | // These must be defined in sorted order! |
61 | 0 | NoAsmVariants = true; |
62 | 0 | GPU = CudaArch::SM_20; |
63 | |
|
64 | 0 | if (TargetPointerWidth == 32) |
65 | 0 | resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); |
66 | 0 | else if (Opts.NVPTXUseShortPointers) |
67 | 0 | resetDataLayout( |
68 | 0 | "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); |
69 | 0 | else |
70 | 0 | resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64"); |
71 | | |
72 | | // If possible, get a TargetInfo for our host triple, so we can match its |
73 | | // types. |
74 | 0 | llvm::Triple HostTriple(Opts.HostTriple); |
75 | 0 | if (!HostTriple.isNVPTX()) |
76 | 0 | HostTarget = AllocateTarget(llvm::Triple(Opts.HostTriple), Opts); |
77 | | |
78 | | // If no host target, make some guesses about the data layout and return. |
79 | 0 | if (!HostTarget) { |
80 | 0 | LongWidth = LongAlign = TargetPointerWidth; |
81 | 0 | PointerWidth = PointerAlign = TargetPointerWidth; |
82 | 0 | switch (TargetPointerWidth) { |
83 | 0 | case 32: |
84 | 0 | SizeType = TargetInfo::UnsignedInt; |
85 | 0 | PtrDiffType = TargetInfo::SignedInt; |
86 | 0 | IntPtrType = TargetInfo::SignedInt; |
87 | 0 | break; |
88 | 0 | case 64: |
89 | 0 | SizeType = TargetInfo::UnsignedLong; |
90 | 0 | PtrDiffType = TargetInfo::SignedLong; |
91 | 0 | IntPtrType = TargetInfo::SignedLong; |
92 | 0 | break; |
93 | 0 | default: |
94 | 0 | llvm_unreachable("TargetPointerWidth must be 32 or 64"); |
95 | 0 | } |
96 | | |
97 | 0 | MaxAtomicInlineWidth = TargetPointerWidth; |
98 | 0 | return; |
99 | 0 | } |
100 | | |
101 | | // Copy properties from host target. |
102 | 0 | PointerWidth = HostTarget->getPointerWidth(LangAS::Default); |
103 | 0 | PointerAlign = HostTarget->getPointerAlign(LangAS::Default); |
104 | 0 | BoolWidth = HostTarget->getBoolWidth(); |
105 | 0 | BoolAlign = HostTarget->getBoolAlign(); |
106 | 0 | IntWidth = HostTarget->getIntWidth(); |
107 | 0 | IntAlign = HostTarget->getIntAlign(); |
108 | 0 | HalfWidth = HostTarget->getHalfWidth(); |
109 | 0 | HalfAlign = HostTarget->getHalfAlign(); |
110 | 0 | FloatWidth = HostTarget->getFloatWidth(); |
111 | 0 | FloatAlign = HostTarget->getFloatAlign(); |
112 | 0 | DoubleWidth = HostTarget->getDoubleWidth(); |
113 | 0 | DoubleAlign = HostTarget->getDoubleAlign(); |
114 | 0 | LongWidth = HostTarget->getLongWidth(); |
115 | 0 | LongAlign = HostTarget->getLongAlign(); |
116 | 0 | LongLongWidth = HostTarget->getLongLongWidth(); |
117 | 0 | LongLongAlign = HostTarget->getLongLongAlign(); |
118 | 0 | MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0); |
119 | 0 | NewAlign = HostTarget->getNewAlign(); |
120 | 0 | DefaultAlignForAttributeAligned = |
121 | 0 | HostTarget->getDefaultAlignForAttributeAligned(); |
122 | 0 | SizeType = HostTarget->getSizeType(); |
123 | 0 | IntMaxType = HostTarget->getIntMaxType(); |
124 | 0 | PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default); |
125 | 0 | IntPtrType = HostTarget->getIntPtrType(); |
126 | 0 | WCharType = HostTarget->getWCharType(); |
127 | 0 | WIntType = HostTarget->getWIntType(); |
128 | 0 | Char16Type = HostTarget->getChar16Type(); |
129 | 0 | Char32Type = HostTarget->getChar32Type(); |
130 | 0 | Int64Type = HostTarget->getInt64Type(); |
131 | 0 | SigAtomicType = HostTarget->getSigAtomicType(); |
132 | 0 | ProcessIDType = HostTarget->getProcessIDType(); |
133 | |
|
134 | 0 | UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); |
135 | 0 | UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment(); |
136 | 0 | UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); |
137 | 0 | ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); |
138 | | |
139 | | // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and |
140 | | // we need those macros to be identical on host and device, because (among |
141 | | // other things) they affect which standard library classes are defined, and |
142 | | // we need all classes to be defined on both the host and device. |
143 | 0 | MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); |
144 | | |
145 | | // Properties intentionally not copied from host: |
146 | | // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the |
147 | | // host/device boundary. |
148 | | // - SuitableAlign: Not visible across the host/device boundary, and may |
149 | | // correctly be different on host/device, e.g. if host has wider vector |
150 | | // types than device. |
151 | | // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same |
152 | | // as its double type, but that's not necessarily true on the host. |
153 | | // TODO: nvcc emits a warning when using long double on device; we should |
154 | | // do the same. |
155 | 0 | } |
156 | | |
157 | 0 | ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const { |
158 | 0 | return llvm::ArrayRef(GCCRegNames); |
159 | 0 | } |
160 | | |
161 | 0 | bool NVPTXTargetInfo::hasFeature(StringRef Feature) const { |
162 | 0 | return llvm::StringSwitch<bool>(Feature) |
163 | 0 | .Cases("ptx", "nvptx", true) |
164 | 0 | .Default(false); |
165 | 0 | } |
166 | | |
167 | | void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, |
168 | 0 | MacroBuilder &Builder) const { |
169 | 0 | Builder.defineMacro("__PTX__"); |
170 | 0 | Builder.defineMacro("__NVPTX__"); |
171 | 0 | if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { |
172 | | // Set __CUDA_ARCH__ for the GPU specified. |
173 | 0 | std::string CUDAArchCode = [this] { |
174 | 0 | switch (GPU) { |
175 | 0 | case CudaArch::GFX600: |
176 | 0 | case CudaArch::GFX601: |
177 | 0 | case CudaArch::GFX602: |
178 | 0 | case CudaArch::GFX700: |
179 | 0 | case CudaArch::GFX701: |
180 | 0 | case CudaArch::GFX702: |
181 | 0 | case CudaArch::GFX703: |
182 | 0 | case CudaArch::GFX704: |
183 | 0 | case CudaArch::GFX705: |
184 | 0 | case CudaArch::GFX801: |
185 | 0 | case CudaArch::GFX802: |
186 | 0 | case CudaArch::GFX803: |
187 | 0 | case CudaArch::GFX805: |
188 | 0 | case CudaArch::GFX810: |
189 | 0 | case CudaArch::GFX900: |
190 | 0 | case CudaArch::GFX902: |
191 | 0 | case CudaArch::GFX904: |
192 | 0 | case CudaArch::GFX906: |
193 | 0 | case CudaArch::GFX908: |
194 | 0 | case CudaArch::GFX909: |
195 | 0 | case CudaArch::GFX90a: |
196 | 0 | case CudaArch::GFX90c: |
197 | 0 | case CudaArch::GFX940: |
198 | 0 | case CudaArch::GFX941: |
199 | 0 | case CudaArch::GFX942: |
200 | 0 | case CudaArch::GFX1010: |
201 | 0 | case CudaArch::GFX1011: |
202 | 0 | case CudaArch::GFX1012: |
203 | 0 | case CudaArch::GFX1013: |
204 | 0 | case CudaArch::GFX1030: |
205 | 0 | case CudaArch::GFX1031: |
206 | 0 | case CudaArch::GFX1032: |
207 | 0 | case CudaArch::GFX1033: |
208 | 0 | case CudaArch::GFX1034: |
209 | 0 | case CudaArch::GFX1035: |
210 | 0 | case CudaArch::GFX1036: |
211 | 0 | case CudaArch::GFX1100: |
212 | 0 | case CudaArch::GFX1101: |
213 | 0 | case CudaArch::GFX1102: |
214 | 0 | case CudaArch::GFX1103: |
215 | 0 | case CudaArch::GFX1150: |
216 | 0 | case CudaArch::GFX1151: |
217 | 0 | case CudaArch::GFX1200: |
218 | 0 | case CudaArch::GFX1201: |
219 | 0 | case CudaArch::Generic: |
220 | 0 | case CudaArch::LAST: |
221 | 0 | break; |
222 | 0 | case CudaArch::UNUSED: |
223 | 0 | case CudaArch::UNKNOWN: |
224 | 0 | assert(false && "No GPU arch when compiling CUDA device code."); |
225 | 0 | return ""; |
226 | 0 | case CudaArch::SM_20: |
227 | 0 | return "200"; |
228 | 0 | case CudaArch::SM_21: |
229 | 0 | return "210"; |
230 | 0 | case CudaArch::SM_30: |
231 | 0 | return "300"; |
232 | 0 | case CudaArch::SM_32: |
233 | 0 | return "320"; |
234 | 0 | case CudaArch::SM_35: |
235 | 0 | return "350"; |
236 | 0 | case CudaArch::SM_37: |
237 | 0 | return "370"; |
238 | 0 | case CudaArch::SM_50: |
239 | 0 | return "500"; |
240 | 0 | case CudaArch::SM_52: |
241 | 0 | return "520"; |
242 | 0 | case CudaArch::SM_53: |
243 | 0 | return "530"; |
244 | 0 | case CudaArch::SM_60: |
245 | 0 | return "600"; |
246 | 0 | case CudaArch::SM_61: |
247 | 0 | return "610"; |
248 | 0 | case CudaArch::SM_62: |
249 | 0 | return "620"; |
250 | 0 | case CudaArch::SM_70: |
251 | 0 | return "700"; |
252 | 0 | case CudaArch::SM_72: |
253 | 0 | return "720"; |
254 | 0 | case CudaArch::SM_75: |
255 | 0 | return "750"; |
256 | 0 | case CudaArch::SM_80: |
257 | 0 | return "800"; |
258 | 0 | case CudaArch::SM_86: |
259 | 0 | return "860"; |
260 | 0 | case CudaArch::SM_87: |
261 | 0 | return "870"; |
262 | 0 | case CudaArch::SM_89: |
263 | 0 | return "890"; |
264 | 0 | case CudaArch::SM_90: |
265 | 0 | case CudaArch::SM_90a: |
266 | 0 | return "900"; |
267 | 0 | } |
268 | 0 | llvm_unreachable("unhandled CudaArch"); |
269 | 0 | }(); |
270 | 0 | Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); |
271 | 0 | if (GPU == CudaArch::SM_90a) |
272 | 0 | Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); |
273 | 0 | } |
274 | 0 | } |
275 | | |
276 | 0 | ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const { |
277 | 0 | return llvm::ArrayRef(BuiltinInfo, |
278 | 0 | clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin); |
279 | 0 | } |