/src/llvm-project/clang/lib/Basic/Targets/AMDGPU.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- AMDGPU.h - Declare AMDGPU target feature support -------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file declares AMDGPU TargetInfo objects. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
14 | | #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
15 | | |
16 | | #include "clang/Basic/TargetID.h" |
17 | | #include "clang/Basic/TargetInfo.h" |
18 | | #include "clang/Basic/TargetOptions.h" |
19 | | #include "llvm/ADT/StringSet.h" |
20 | | #include "llvm/Support/AMDGPUAddrSpace.h" |
21 | | #include "llvm/Support/Compiler.h" |
22 | | #include "llvm/TargetParser/TargetParser.h" |
23 | | #include "llvm/TargetParser/Triple.h" |
24 | | #include <optional> |
25 | | |
26 | | namespace clang { |
27 | | namespace targets { |
28 | | |
29 | | class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { |
30 | | |
31 | | static const char *const GCCRegNames[]; |
32 | | |
33 | | static const LangASMap AMDGPUDefIsGenMap; |
34 | | static const LangASMap AMDGPUDefIsPrivMap; |
35 | | |
36 | | llvm::AMDGPU::GPUKind GPUKind; |
37 | | unsigned GPUFeatures; |
38 | | unsigned WavefrontSize; |
39 | | |
40 | | /// Whether to use cumode or WGP mode. True for cumode. False for WGP mode. |
41 | | bool CUMode; |
42 | | |
43 | | /// Whether having image instructions. |
44 | | bool HasImage = false; |
45 | | |
46 | | /// Target ID is device name followed by optional feature name postfixed |
47 | | /// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sramecc-. |
48 | | /// If the target ID contains feature+, map it to true. |
49 | | /// If the target ID contains feature-, map it to false. |
50 | | /// If the target ID does not contain a feature (default), do not map it. |
51 | | llvm::StringMap<bool> OffloadArchFeatures; |
52 | | std::string TargetID; |
53 | | |
54 | 0 | bool hasFP64() const { |
55 | 0 | return getTriple().getArch() == llvm::Triple::amdgcn || |
56 | 0 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64); |
57 | 0 | } |
58 | | |
59 | | /// Has fast fma f32 |
60 | 0 | bool hasFastFMAF() const { |
61 | 0 | return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_FMA_F32); |
62 | 0 | } |
63 | | |
64 | | /// Has fast fma f64 |
65 | 0 | bool hasFastFMA() const { |
66 | 0 | return getTriple().getArch() == llvm::Triple::amdgcn; |
67 | 0 | } |
68 | | |
69 | 0 | bool hasFMAF() const { |
70 | 0 | return getTriple().getArch() == llvm::Triple::amdgcn || |
71 | 0 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_FMA); |
72 | 0 | } |
73 | | |
74 | 0 | bool hasFullRateDenormalsF32() const { |
75 | 0 | return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); |
76 | 0 | } |
77 | | |
78 | 0 | bool hasLDEXPF() const { |
79 | 0 | return getTriple().getArch() == llvm::Triple::amdgcn || |
80 | 0 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_LDEXP); |
81 | 0 | } |
82 | | |
83 | 0 | static bool isAMDGCN(const llvm::Triple &TT) { |
84 | 0 | return TT.getArch() == llvm::Triple::amdgcn; |
85 | 0 | } |
86 | | |
87 | 0 | static bool isR600(const llvm::Triple &TT) { |
88 | 0 | return TT.getArch() == llvm::Triple::r600; |
89 | 0 | } |
90 | | |
91 | | public: |
92 | | AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts); |
93 | | |
94 | | void setAddressSpaceMap(bool DefaultIsPrivate); |
95 | | |
96 | | void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; |
97 | | |
98 | 0 | uint64_t getPointerWidthV(LangAS AS) const override { |
99 | 0 | if (isR600(getTriple())) |
100 | 0 | return 32; |
101 | 0 | unsigned TargetAS = getTargetAddressSpace(AS); |
102 | |
|
103 | 0 | if (TargetAS == llvm::AMDGPUAS::PRIVATE_ADDRESS || |
104 | 0 | TargetAS == llvm::AMDGPUAS::LOCAL_ADDRESS) |
105 | 0 | return 32; |
106 | | |
107 | 0 | return 64; |
108 | 0 | } |
109 | | |
110 | 0 | uint64_t getPointerAlignV(LangAS AddrSpace) const override { |
111 | 0 | return getPointerWidthV(AddrSpace); |
112 | 0 | } |
113 | | |
114 | 0 | uint64_t getMaxPointerWidth() const override { |
115 | 0 | return getTriple().getArch() == llvm::Triple::amdgcn ? 64 : 32; |
116 | 0 | } |
117 | | |
118 | 0 | bool hasBFloat16Type() const override { return isAMDGCN(getTriple()); } |
119 | | |
120 | 0 | std::string_view getClobbers() const override { return ""; } |
121 | | |
122 | | ArrayRef<const char *> getGCCRegNames() const override; |
123 | | |
124 | 0 | ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override { |
125 | 0 | return std::nullopt; |
126 | 0 | } |
127 | | |
128 | | /// Accepted register names: (n, m is unsigned integer, n < m) |
129 | | /// v |
130 | | /// s |
131 | | /// a |
132 | | /// {vn}, {v[n]} |
133 | | /// {sn}, {s[n]} |
134 | | /// {an}, {a[n]} |
135 | | /// {S} , where S is a special register name |
136 | | ////{v[n:m]} |
137 | | /// {s[n:m]} |
138 | | /// {a[n:m]} |
139 | | bool validateAsmConstraint(const char *&Name, |
140 | 0 | TargetInfo::ConstraintInfo &Info) const override { |
141 | 0 | static const ::llvm::StringSet<> SpecialRegs({ |
142 | 0 | "exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma", |
143 | 0 | "flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo", |
144 | 0 | "exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi", |
145 | 0 | }); |
146 | |
|
147 | 0 | switch (*Name) { |
148 | 0 | case 'I': |
149 | 0 | Info.setRequiresImmediate(-16, 64); |
150 | 0 | return true; |
151 | 0 | case 'J': |
152 | 0 | Info.setRequiresImmediate(-32768, 32767); |
153 | 0 | return true; |
154 | 0 | case 'A': |
155 | 0 | case 'B': |
156 | 0 | case 'C': |
157 | 0 | Info.setRequiresImmediate(); |
158 | 0 | return true; |
159 | 0 | default: |
160 | 0 | break; |
161 | 0 | } |
162 | | |
163 | 0 | StringRef S(Name); |
164 | |
|
165 | 0 | if (S == "DA" || S == "DB") { |
166 | 0 | Name++; |
167 | 0 | Info.setRequiresImmediate(); |
168 | 0 | return true; |
169 | 0 | } |
170 | | |
171 | 0 | bool HasLeftParen = false; |
172 | 0 | if (S.consume_front("{")) |
173 | 0 | HasLeftParen = true; |
174 | 0 | if (S.empty()) |
175 | 0 | return false; |
176 | 0 | if (S.front() != 'v' && S.front() != 's' && S.front() != 'a') { |
177 | 0 | if (!HasLeftParen) |
178 | 0 | return false; |
179 | 0 | auto E = S.find('}'); |
180 | 0 | if (!SpecialRegs.count(S.substr(0, E))) |
181 | 0 | return false; |
182 | 0 | S = S.drop_front(E + 1); |
183 | 0 | if (!S.empty()) |
184 | 0 | return false; |
185 | | // Found {S} where S is a special register. |
186 | 0 | Info.setAllowsRegister(); |
187 | 0 | Name = S.data() - 1; |
188 | 0 | return true; |
189 | 0 | } |
190 | 0 | S = S.drop_front(); |
191 | 0 | if (!HasLeftParen) { |
192 | 0 | if (!S.empty()) |
193 | 0 | return false; |
194 | | // Found s, v or a. |
195 | 0 | Info.setAllowsRegister(); |
196 | 0 | Name = S.data() - 1; |
197 | 0 | return true; |
198 | 0 | } |
199 | 0 | bool HasLeftBracket = false; |
200 | 0 | if (S.consume_front("[")) |
201 | 0 | HasLeftBracket = true; |
202 | 0 | unsigned long long N; |
203 | 0 | if (S.empty() || consumeUnsignedInteger(S, 10, N)) |
204 | 0 | return false; |
205 | 0 | if (S.consume_front(":")) { |
206 | 0 | if (!HasLeftBracket) |
207 | 0 | return false; |
208 | 0 | unsigned long long M; |
209 | 0 | if (consumeUnsignedInteger(S, 10, M) || N >= M) |
210 | 0 | return false; |
211 | 0 | } |
212 | 0 | if (HasLeftBracket) { |
213 | 0 | if (!S.consume_front("]")) |
214 | 0 | return false; |
215 | 0 | } |
216 | 0 | if (!S.consume_front("}")) |
217 | 0 | return false; |
218 | 0 | if (!S.empty()) |
219 | 0 | return false; |
220 | | // Found {vn}, {sn}, {an}, {v[n]}, {s[n]}, {a[n]}, {v[n:m]}, {s[n:m]} |
221 | | // or {a[n:m]}. |
222 | 0 | Info.setAllowsRegister(); |
223 | 0 | Name = S.data() - 1; |
224 | 0 | return true; |
225 | 0 | } |
226 | | |
227 | | // \p Constraint will be left pointing at the last character of |
228 | | // the constraint. In practice, it won't be changed unless the |
229 | | // constraint is longer than one character. |
230 | 0 | std::string convertConstraint(const char *&Constraint) const override { |
231 | |
|
232 | 0 | StringRef S(Constraint); |
233 | 0 | if (S == "DA" || S == "DB") { |
234 | 0 | return std::string("^") + std::string(Constraint++, 2); |
235 | 0 | } |
236 | | |
237 | 0 | const char *Begin = Constraint; |
238 | 0 | TargetInfo::ConstraintInfo Info("", ""); |
239 | 0 | if (validateAsmConstraint(Constraint, Info)) |
240 | 0 | return std::string(Begin).substr(0, Constraint - Begin + 1); |
241 | | |
242 | 0 | Constraint = Begin; |
243 | 0 | return std::string(1, *Constraint); |
244 | 0 | } |
245 | | |
246 | | bool |
247 | | initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, |
248 | | StringRef CPU, |
249 | | const std::vector<std::string> &FeatureVec) const override; |
250 | | |
251 | | ArrayRef<Builtin::Info> getTargetBuiltins() const override; |
252 | | |
253 | 0 | bool useFP16ConversionIntrinsics() const override { return false; } |
254 | | |
255 | | void getTargetDefines(const LangOptions &Opts, |
256 | | MacroBuilder &Builder) const override; |
257 | | |
258 | 0 | BuiltinVaListKind getBuiltinVaListKind() const override { |
259 | 0 | return TargetInfo::CharPtrBuiltinVaList; |
260 | 0 | } |
261 | | |
262 | 0 | bool isValidCPUName(StringRef Name) const override { |
263 | 0 | if (getTriple().getArch() == llvm::Triple::amdgcn) |
264 | 0 | return llvm::AMDGPU::parseArchAMDGCN(Name) != llvm::AMDGPU::GK_NONE; |
265 | 0 | return llvm::AMDGPU::parseArchR600(Name) != llvm::AMDGPU::GK_NONE; |
266 | 0 | } |
267 | | |
268 | | void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override; |
269 | | |
270 | 0 | bool setCPU(const std::string &Name) override { |
271 | 0 | if (getTriple().getArch() == llvm::Triple::amdgcn) { |
272 | 0 | GPUKind = llvm::AMDGPU::parseArchAMDGCN(Name); |
273 | 0 | GPUFeatures = llvm::AMDGPU::getArchAttrAMDGCN(GPUKind); |
274 | 0 | } else { |
275 | 0 | GPUKind = llvm::AMDGPU::parseArchR600(Name); |
276 | 0 | GPUFeatures = llvm::AMDGPU::getArchAttrR600(GPUKind); |
277 | 0 | } |
278 | |
|
279 | 0 | return GPUKind != llvm::AMDGPU::GK_NONE; |
280 | 0 | } |
281 | | |
282 | 0 | void setSupportedOpenCLOpts() override { |
283 | 0 | auto &Opts = getSupportedOpenCLOpts(); |
284 | 0 | Opts["cl_clang_storage_class_specifiers"] = true; |
285 | 0 | Opts["__cl_clang_variadic_functions"] = true; |
286 | 0 | Opts["__cl_clang_function_pointers"] = true; |
287 | 0 | Opts["__cl_clang_non_portable_kernel_param_types"] = true; |
288 | 0 | Opts["__cl_clang_bitfields"] = true; |
289 | |
|
290 | 0 | bool IsAMDGCN = isAMDGCN(getTriple()); |
291 | |
|
292 | 0 | Opts["cl_khr_fp64"] = hasFP64(); |
293 | 0 | Opts["__opencl_c_fp64"] = hasFP64(); |
294 | |
|
295 | 0 | if (IsAMDGCN || GPUKind >= llvm::AMDGPU::GK_CEDAR) { |
296 | 0 | Opts["cl_khr_byte_addressable_store"] = true; |
297 | 0 | Opts["cl_khr_global_int32_base_atomics"] = true; |
298 | 0 | Opts["cl_khr_global_int32_extended_atomics"] = true; |
299 | 0 | Opts["cl_khr_local_int32_base_atomics"] = true; |
300 | 0 | Opts["cl_khr_local_int32_extended_atomics"] = true; |
301 | 0 | } |
302 | |
|
303 | 0 | if (IsAMDGCN) { |
304 | 0 | Opts["cl_khr_fp16"] = true; |
305 | 0 | Opts["cl_khr_int64_base_atomics"] = true; |
306 | 0 | Opts["cl_khr_int64_extended_atomics"] = true; |
307 | 0 | Opts["cl_khr_mipmap_image"] = true; |
308 | 0 | Opts["cl_khr_mipmap_image_writes"] = true; |
309 | 0 | Opts["cl_khr_subgroups"] = true; |
310 | 0 | Opts["cl_amd_media_ops"] = true; |
311 | 0 | Opts["cl_amd_media_ops2"] = true; |
312 | |
|
313 | 0 | Opts["__opencl_c_images"] = true; |
314 | 0 | Opts["__opencl_c_3d_image_writes"] = true; |
315 | 0 | Opts["cl_khr_3d_image_writes"] = true; |
316 | 0 | } |
317 | 0 | } |
318 | | |
319 | 0 | LangAS getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const override { |
320 | 0 | switch (TK) { |
321 | 0 | case OCLTK_Image: |
322 | 0 | return LangAS::opencl_constant; |
323 | | |
324 | 0 | case OCLTK_ClkEvent: |
325 | 0 | case OCLTK_Queue: |
326 | 0 | case OCLTK_ReserveID: |
327 | 0 | return LangAS::opencl_global; |
328 | | |
329 | 0 | default: |
330 | 0 | return TargetInfo::getOpenCLTypeAddrSpace(TK); |
331 | 0 | } |
332 | 0 | } |
333 | | |
334 | 0 | LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const override { |
335 | 0 | switch (AS) { |
336 | 0 | case 0: |
337 | 0 | return LangAS::opencl_generic; |
338 | 0 | case 1: |
339 | 0 | return LangAS::opencl_global; |
340 | 0 | case 3: |
341 | 0 | return LangAS::opencl_local; |
342 | 0 | case 4: |
343 | 0 | return LangAS::opencl_constant; |
344 | 0 | case 5: |
345 | 0 | return LangAS::opencl_private; |
346 | 0 | default: |
347 | 0 | return getLangASFromTargetAS(AS); |
348 | 0 | } |
349 | 0 | } |
350 | | |
351 | 0 | LangAS getCUDABuiltinAddressSpace(unsigned AS) const override { |
352 | 0 | switch (AS) { |
353 | 0 | case 0: |
354 | 0 | return LangAS::Default; |
355 | 0 | case 1: |
356 | 0 | return LangAS::cuda_device; |
357 | 0 | case 3: |
358 | 0 | return LangAS::cuda_shared; |
359 | 0 | case 4: |
360 | 0 | return LangAS::cuda_constant; |
361 | 0 | default: |
362 | 0 | return getLangASFromTargetAS(AS); |
363 | 0 | } |
364 | 0 | } |
365 | | |
366 | 0 | std::optional<LangAS> getConstantAddressSpace() const override { |
367 | 0 | return getLangASFromTargetAS(llvm::AMDGPUAS::CONSTANT_ADDRESS); |
368 | 0 | } |
369 | | |
370 | 0 | const llvm::omp::GV &getGridValue() const override { |
371 | 0 | switch (WavefrontSize) { |
372 | 0 | case 32: |
373 | 0 | return llvm::omp::getAMDGPUGridValues<32>(); |
374 | 0 | case 64: |
375 | 0 | return llvm::omp::getAMDGPUGridValues<64>(); |
376 | 0 | default: |
377 | 0 | llvm_unreachable("getGridValue not implemented for this wavesize"); |
378 | 0 | } |
379 | 0 | } |
380 | | |
381 | | /// \returns Target specific vtbl ptr address space. |
382 | 0 | unsigned getVtblPtrAddressSpace() const override { |
383 | 0 | return static_cast<unsigned>(llvm::AMDGPUAS::CONSTANT_ADDRESS); |
384 | 0 | } |
385 | | |
386 | | /// \returns If a target requires an address within a target specific address |
387 | | /// space \p AddressSpace to be converted in order to be used, then return the |
388 | | /// corresponding target specific DWARF address space. |
389 | | /// |
390 | | /// \returns Otherwise return std::nullopt and no conversion will be emitted |
391 | | /// in the DWARF. |
392 | | std::optional<unsigned> |
393 | 0 | getDWARFAddressSpace(unsigned AddressSpace) const override { |
394 | 0 | const unsigned DWARF_Private = 1; |
395 | 0 | const unsigned DWARF_Local = 2; |
396 | 0 | if (AddressSpace == llvm::AMDGPUAS::PRIVATE_ADDRESS) { |
397 | 0 | return DWARF_Private; |
398 | 0 | } else if (AddressSpace == llvm::AMDGPUAS::LOCAL_ADDRESS) { |
399 | 0 | return DWARF_Local; |
400 | 0 | } else { |
401 | 0 | return std::nullopt; |
402 | 0 | } |
403 | 0 | } |
404 | | |
405 | 0 | CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { |
406 | 0 | switch (CC) { |
407 | 0 | default: |
408 | 0 | return CCCR_Warning; |
409 | 0 | case CC_C: |
410 | 0 | case CC_OpenCLKernel: |
411 | 0 | case CC_AMDGPUKernelCall: |
412 | 0 | return CCCR_OK; |
413 | 0 | } |
414 | 0 | } |
415 | | |
416 | | // In amdgcn target the null pointer in global, constant, and generic |
417 | | // address space has value 0 but in private and local address space has |
418 | | // value ~0. |
419 | 0 | uint64_t getNullPointerValue(LangAS AS) const override { |
420 | | // FIXME: Also should handle region. |
421 | 0 | return (AS == LangAS::opencl_local || AS == LangAS::opencl_private) |
422 | 0 | ? ~0 : 0; |
423 | 0 | } |
424 | | |
425 | | void setAuxTarget(const TargetInfo *Aux) override; |
426 | | |
427 | 0 | bool hasBitIntType() const override { return true; } |
428 | | |
429 | | // Record offload arch features since they are needed for defining the |
430 | | // pre-defined macros. |
431 | | bool handleTargetFeatures(std::vector<std::string> &Features, |
432 | 0 | DiagnosticsEngine &Diags) override { |
433 | 0 | auto TargetIDFeatures = |
434 | 0 | getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind)); |
435 | 0 | for (const auto &F : Features) { |
436 | 0 | assert(F.front() == '+' || F.front() == '-'); |
437 | 0 | if (F == "+wavefrontsize64") |
438 | 0 | WavefrontSize = 64; |
439 | 0 | else if (F == "+cumode") |
440 | 0 | CUMode = true; |
441 | 0 | else if (F == "-cumode") |
442 | 0 | CUMode = false; |
443 | 0 | else if (F == "+image-insts") |
444 | 0 | HasImage = true; |
445 | 0 | bool IsOn = F.front() == '+'; |
446 | 0 | StringRef Name = StringRef(F).drop_front(); |
447 | 0 | if (!llvm::is_contained(TargetIDFeatures, Name)) |
448 | 0 | continue; |
449 | 0 | assert(!OffloadArchFeatures.contains(Name)); |
450 | 0 | OffloadArchFeatures[Name] = IsOn; |
451 | 0 | } |
452 | 0 | return true; |
453 | 0 | } |
454 | | |
455 | 0 | std::optional<std::string> getTargetID() const override { |
456 | 0 | if (!isAMDGCN(getTriple())) |
457 | 0 | return std::nullopt; |
458 | | // When -target-cpu is not set, we assume generic code that it is valid |
459 | | // for all GPU and use an empty string as target ID to represent that. |
460 | 0 | if (GPUKind == llvm::AMDGPU::GK_NONE) |
461 | 0 | return std::string(""); |
462 | 0 | return getCanonicalTargetID(getArchNameAMDGCN(GPUKind), |
463 | 0 | OffloadArchFeatures); |
464 | 0 | } |
465 | | |
466 | 0 | bool hasHIPImageSupport() const override { return HasImage; } |
467 | | }; |
468 | | |
469 | | } // namespace targets |
470 | | } // namespace clang |
471 | | |
472 | | #endif // LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |