/src/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "AMDGPU.h" |
10 | | #include "CommonArgs.h" |
11 | | #include "clang/Basic/TargetID.h" |
12 | | #include "clang/Config/config.h" |
13 | | #include "clang/Driver/Compilation.h" |
14 | | #include "clang/Driver/DriverDiagnostic.h" |
15 | | #include "clang/Driver/InputInfo.h" |
16 | | #include "clang/Driver/Options.h" |
17 | | #include "llvm/ADT/StringExtras.h" |
18 | | #include "llvm/Option/ArgList.h" |
19 | | #include "llvm/Support/Error.h" |
20 | | #include "llvm/Support/LineIterator.h" |
21 | | #include "llvm/Support/Path.h" |
22 | | #include "llvm/Support/Process.h" |
23 | | #include "llvm/Support/VirtualFileSystem.h" |
24 | | #include "llvm/TargetParser/Host.h" |
25 | | #include <optional> |
26 | | #include <system_error> |
27 | | |
28 | | using namespace clang::driver; |
29 | | using namespace clang::driver::tools; |
30 | | using namespace clang::driver::toolchains; |
31 | | using namespace clang; |
32 | | using namespace llvm::opt; |
33 | | |
34 | | // Look for sub-directory starts with PackageName under ROCm candidate path. |
35 | | // If there is one and only one matching sub-directory found, append the |
36 | | // sub-directory to Path. If there is no matching sub-directory or there are |
37 | | // more than one matching sub-directories, diagnose them. Returns the full |
38 | | // path of the package if there is only one matching sub-directory, otherwise |
39 | | // returns an empty string. |
40 | | llvm::SmallString<0> |
41 | | RocmInstallationDetector::findSPACKPackage(const Candidate &Cand, |
42 | 0 | StringRef PackageName) { |
43 | 0 | if (!Cand.isSPACK()) |
44 | 0 | return {}; |
45 | 0 | std::error_code EC; |
46 | 0 | std::string Prefix = Twine(PackageName + "-" + Cand.SPACKReleaseStr).str(); |
47 | 0 | llvm::SmallVector<llvm::SmallString<0>> SubDirs; |
48 | 0 | for (llvm::vfs::directory_iterator File = D.getVFS().dir_begin(Cand.Path, EC), |
49 | 0 | FileEnd; |
50 | 0 | File != FileEnd && !EC; File.increment(EC)) { |
51 | 0 | llvm::StringRef FileName = llvm::sys::path::filename(File->path()); |
52 | 0 | if (FileName.starts_with(Prefix)) { |
53 | 0 | SubDirs.push_back(FileName); |
54 | 0 | if (SubDirs.size() > 1) |
55 | 0 | break; |
56 | 0 | } |
57 | 0 | } |
58 | 0 | if (SubDirs.size() == 1) { |
59 | 0 | auto PackagePath = Cand.Path; |
60 | 0 | llvm::sys::path::append(PackagePath, SubDirs[0]); |
61 | 0 | return PackagePath; |
62 | 0 | } |
63 | 0 | if (SubDirs.size() == 0 && Verbose) { |
64 | 0 | llvm::errs() << "SPACK package " << Prefix << " not found at " << Cand.Path |
65 | 0 | << '\n'; |
66 | 0 | return {}; |
67 | 0 | } |
68 | | |
69 | 0 | if (SubDirs.size() > 1 && Verbose) { |
70 | 0 | llvm::errs() << "Cannot use SPACK package " << Prefix << " at " << Cand.Path |
71 | 0 | << " due to multiple installations for the same version\n"; |
72 | 0 | } |
73 | 0 | return {}; |
74 | 0 | } |
75 | | |
76 | 0 | void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { |
77 | 0 | assert(!Path.empty()); |
78 | | |
79 | 0 | const StringRef Suffix(".bc"); |
80 | 0 | const StringRef Suffix2(".amdgcn.bc"); |
81 | |
|
82 | 0 | std::error_code EC; |
83 | 0 | for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE; |
84 | 0 | !EC && LI != LE; LI = LI.increment(EC)) { |
85 | 0 | StringRef FilePath = LI->path(); |
86 | 0 | StringRef FileName = llvm::sys::path::filename(FilePath); |
87 | 0 | if (!FileName.ends_with(Suffix)) |
88 | 0 | continue; |
89 | | |
90 | 0 | StringRef BaseName; |
91 | 0 | if (FileName.ends_with(Suffix2)) |
92 | 0 | BaseName = FileName.drop_back(Suffix2.size()); |
93 | 0 | else if (FileName.ends_with(Suffix)) |
94 | 0 | BaseName = FileName.drop_back(Suffix.size()); |
95 | |
|
96 | 0 | const StringRef ABIVersionPrefix = "oclc_abi_version_"; |
97 | 0 | if (BaseName == "ocml") { |
98 | 0 | OCML = FilePath; |
99 | 0 | } else if (BaseName == "ockl") { |
100 | 0 | OCKL = FilePath; |
101 | 0 | } else if (BaseName == "opencl") { |
102 | 0 | OpenCL = FilePath; |
103 | 0 | } else if (BaseName == "hip") { |
104 | 0 | HIP = FilePath; |
105 | 0 | } else if (BaseName == "asanrtl") { |
106 | 0 | AsanRTL = FilePath; |
107 | 0 | } else if (BaseName == "oclc_finite_only_off") { |
108 | 0 | FiniteOnly.Off = FilePath; |
109 | 0 | } else if (BaseName == "oclc_finite_only_on") { |
110 | 0 | FiniteOnly.On = FilePath; |
111 | 0 | } else if (BaseName == "oclc_daz_opt_on") { |
112 | 0 | DenormalsAreZero.On = FilePath; |
113 | 0 | } else if (BaseName == "oclc_daz_opt_off") { |
114 | 0 | DenormalsAreZero.Off = FilePath; |
115 | 0 | } else if (BaseName == "oclc_correctly_rounded_sqrt_on") { |
116 | 0 | CorrectlyRoundedSqrt.On = FilePath; |
117 | 0 | } else if (BaseName == "oclc_correctly_rounded_sqrt_off") { |
118 | 0 | CorrectlyRoundedSqrt.Off = FilePath; |
119 | 0 | } else if (BaseName == "oclc_unsafe_math_on") { |
120 | 0 | UnsafeMath.On = FilePath; |
121 | 0 | } else if (BaseName == "oclc_unsafe_math_off") { |
122 | 0 | UnsafeMath.Off = FilePath; |
123 | 0 | } else if (BaseName == "oclc_wavefrontsize64_on") { |
124 | 0 | WavefrontSize64.On = FilePath; |
125 | 0 | } else if (BaseName == "oclc_wavefrontsize64_off") { |
126 | 0 | WavefrontSize64.Off = FilePath; |
127 | 0 | } else if (BaseName.starts_with(ABIVersionPrefix)) { |
128 | 0 | unsigned ABIVersionNumber; |
129 | 0 | if (BaseName.drop_front(ABIVersionPrefix.size()) |
130 | 0 | .getAsInteger(/*Redex=*/0, ABIVersionNumber)) |
131 | 0 | continue; |
132 | 0 | ABIVersionMap[ABIVersionNumber] = FilePath.str(); |
133 | 0 | } else { |
134 | | // Process all bitcode filenames that look like |
135 | | // ocl_isa_version_XXX.amdgcn.bc |
136 | 0 | const StringRef DeviceLibPrefix = "oclc_isa_version_"; |
137 | 0 | if (!BaseName.starts_with(DeviceLibPrefix)) |
138 | 0 | continue; |
139 | | |
140 | 0 | StringRef IsaVersionNumber = |
141 | 0 | BaseName.drop_front(DeviceLibPrefix.size()); |
142 | |
|
143 | 0 | llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber; |
144 | 0 | SmallString<8> Tmp; |
145 | 0 | LibDeviceMap.insert( |
146 | 0 | std::make_pair(GfxName.toStringRef(Tmp), FilePath.str())); |
147 | 0 | } |
148 | 0 | } |
149 | 0 | } |
150 | | |
151 | | // Parse and extract version numbers from `.hipVersion`. Return `true` if |
152 | | // the parsing fails. |
153 | 0 | bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) { |
154 | 0 | SmallVector<StringRef, 4> VersionParts; |
155 | 0 | V.split(VersionParts, '\n'); |
156 | 0 | unsigned Major = ~0U; |
157 | 0 | unsigned Minor = ~0U; |
158 | 0 | for (auto Part : VersionParts) { |
159 | 0 | auto Splits = Part.rtrim().split('='); |
160 | 0 | if (Splits.first == "HIP_VERSION_MAJOR") { |
161 | 0 | if (Splits.second.getAsInteger(0, Major)) |
162 | 0 | return true; |
163 | 0 | } else if (Splits.first == "HIP_VERSION_MINOR") { |
164 | 0 | if (Splits.second.getAsInteger(0, Minor)) |
165 | 0 | return true; |
166 | 0 | } else if (Splits.first == "HIP_VERSION_PATCH") |
167 | 0 | VersionPatch = Splits.second.str(); |
168 | 0 | } |
169 | 0 | if (Major == ~0U || Minor == ~0U) |
170 | 0 | return true; |
171 | 0 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
172 | 0 | DetectedVersion = |
173 | 0 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
174 | 0 | return false; |
175 | 0 | } |
176 | | |
177 | | /// \returns a list of candidate directories for ROCm installation, which is |
178 | | /// cached and populated only once. |
179 | | const SmallVectorImpl<RocmInstallationDetector::Candidate> & |
180 | 0 | RocmInstallationDetector::getInstallationPathCandidates() { |
181 | | |
182 | | // Return the cached candidate list if it has already been populated. |
183 | 0 | if (!ROCmSearchDirs.empty()) |
184 | 0 | return ROCmSearchDirs; |
185 | | |
186 | 0 | auto DoPrintROCmSearchDirs = [&]() { |
187 | 0 | if (PrintROCmSearchDirs) |
188 | 0 | for (auto Cand : ROCmSearchDirs) { |
189 | 0 | llvm::errs() << "ROCm installation search path"; |
190 | 0 | if (Cand.isSPACK()) |
191 | 0 | llvm::errs() << " (Spack " << Cand.SPACKReleaseStr << ")"; |
192 | 0 | llvm::errs() << ": " << Cand.Path << '\n'; |
193 | 0 | } |
194 | 0 | }; |
195 | | |
196 | | // For candidate specified by --rocm-path we do not do strict check, i.e., |
197 | | // checking existence of HIP version file and device library files. |
198 | 0 | if (!RocmPathArg.empty()) { |
199 | 0 | ROCmSearchDirs.emplace_back(RocmPathArg.str()); |
200 | 0 | DoPrintROCmSearchDirs(); |
201 | 0 | return ROCmSearchDirs; |
202 | 0 | } else if (std::optional<std::string> RocmPathEnv = |
203 | 0 | llvm::sys::Process::GetEnv("ROCM_PATH")) { |
204 | 0 | if (!RocmPathEnv->empty()) { |
205 | 0 | ROCmSearchDirs.emplace_back(std::move(*RocmPathEnv)); |
206 | 0 | DoPrintROCmSearchDirs(); |
207 | 0 | return ROCmSearchDirs; |
208 | 0 | } |
209 | 0 | } |
210 | | |
211 | | // Try to find relative to the compiler binary. |
212 | 0 | const char *InstallDir = D.getInstalledDir(); |
213 | | |
214 | | // Check both a normal Unix prefix position of the clang binary, as well as |
215 | | // the Windows-esque layout the ROCm packages use with the host architecture |
216 | | // subdirectory of bin. |
217 | 0 | auto DeduceROCmPath = [](StringRef ClangPath) { |
218 | | // Strip off directory (usually bin) |
219 | 0 | StringRef ParentDir = llvm::sys::path::parent_path(ClangPath); |
220 | 0 | StringRef ParentName = llvm::sys::path::filename(ParentDir); |
221 | | |
222 | | // Some builds use bin/{host arch}, so go up again. |
223 | 0 | if (ParentName == "bin") { |
224 | 0 | ParentDir = llvm::sys::path::parent_path(ParentDir); |
225 | 0 | ParentName = llvm::sys::path::filename(ParentDir); |
226 | 0 | } |
227 | | |
228 | | // Detect ROCm packages built with SPACK. |
229 | | // clang is installed at |
230 | | // <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory. |
231 | | // We only consider the parent directory of llvm-amdgpu package as ROCm |
232 | | // installation candidate for SPACK. |
233 | 0 | if (ParentName.starts_with("llvm-amdgpu-")) { |
234 | 0 | auto SPACKPostfix = |
235 | 0 | ParentName.drop_front(strlen("llvm-amdgpu-")).split('-'); |
236 | 0 | auto SPACKReleaseStr = SPACKPostfix.first; |
237 | 0 | if (!SPACKReleaseStr.empty()) { |
238 | 0 | ParentDir = llvm::sys::path::parent_path(ParentDir); |
239 | 0 | return Candidate(ParentDir.str(), /*StrictChecking=*/true, |
240 | 0 | SPACKReleaseStr); |
241 | 0 | } |
242 | 0 | } |
243 | | |
244 | | // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin |
245 | | // Some versions of the aomp package install to /opt/rocm/aomp/bin |
246 | 0 | if (ParentName == "llvm" || ParentName.starts_with("aomp")) |
247 | 0 | ParentDir = llvm::sys::path::parent_path(ParentDir); |
248 | |
|
249 | 0 | return Candidate(ParentDir.str(), /*StrictChecking=*/true); |
250 | 0 | }; |
251 | | |
252 | | // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic |
253 | | // link of clang itself. |
254 | 0 | ROCmSearchDirs.emplace_back(DeduceROCmPath(InstallDir)); |
255 | | |
256 | | // Deduce ROCm path by the real path of the invoked clang, resolving symbolic |
257 | | // link of clang itself. |
258 | 0 | llvm::SmallString<256> RealClangPath; |
259 | 0 | llvm::sys::fs::real_path(D.getClangProgramPath(), RealClangPath); |
260 | 0 | auto ParentPath = llvm::sys::path::parent_path(RealClangPath); |
261 | 0 | if (ParentPath != InstallDir) |
262 | 0 | ROCmSearchDirs.emplace_back(DeduceROCmPath(ParentPath)); |
263 | | |
264 | | // Device library may be installed in clang or resource directory. |
265 | 0 | auto ClangRoot = llvm::sys::path::parent_path(InstallDir); |
266 | 0 | auto RealClangRoot = llvm::sys::path::parent_path(ParentPath); |
267 | 0 | ROCmSearchDirs.emplace_back(ClangRoot.str(), /*StrictChecking=*/true); |
268 | 0 | if (RealClangRoot != ClangRoot) |
269 | 0 | ROCmSearchDirs.emplace_back(RealClangRoot.str(), /*StrictChecking=*/true); |
270 | 0 | ROCmSearchDirs.emplace_back(D.ResourceDir, |
271 | 0 | /*StrictChecking=*/true); |
272 | |
|
273 | 0 | ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/rocm", |
274 | 0 | /*StrictChecking=*/true); |
275 | | |
276 | | // Find the latest /opt/rocm-{release} directory. |
277 | 0 | std::error_code EC; |
278 | 0 | std::string LatestROCm; |
279 | 0 | llvm::VersionTuple LatestVer; |
280 | | // Get ROCm version from ROCm directory name. |
281 | 0 | auto GetROCmVersion = [](StringRef DirName) { |
282 | 0 | llvm::VersionTuple V; |
283 | 0 | std::string VerStr = DirName.drop_front(strlen("rocm-")).str(); |
284 | | // The ROCm directory name follows the format of |
285 | | // rocm-{major}.{minor}.{subMinor}[-{build}] |
286 | 0 | std::replace(VerStr.begin(), VerStr.end(), '-', '.'); |
287 | 0 | V.tryParse(VerStr); |
288 | 0 | return V; |
289 | 0 | }; |
290 | 0 | for (llvm::vfs::directory_iterator |
291 | 0 | File = D.getVFS().dir_begin(D.SysRoot + "/opt", EC), |
292 | 0 | FileEnd; |
293 | 0 | File != FileEnd && !EC; File.increment(EC)) { |
294 | 0 | llvm::StringRef FileName = llvm::sys::path::filename(File->path()); |
295 | 0 | if (!FileName.starts_with("rocm-")) |
296 | 0 | continue; |
297 | 0 | if (LatestROCm.empty()) { |
298 | 0 | LatestROCm = FileName.str(); |
299 | 0 | LatestVer = GetROCmVersion(LatestROCm); |
300 | 0 | continue; |
301 | 0 | } |
302 | 0 | auto Ver = GetROCmVersion(FileName); |
303 | 0 | if (LatestVer < Ver) { |
304 | 0 | LatestROCm = FileName.str(); |
305 | 0 | LatestVer = Ver; |
306 | 0 | } |
307 | 0 | } |
308 | 0 | if (!LatestROCm.empty()) |
309 | 0 | ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/" + LatestROCm, |
310 | 0 | /*StrictChecking=*/true); |
311 | |
|
312 | 0 | ROCmSearchDirs.emplace_back(D.SysRoot + "/usr/local", |
313 | 0 | /*StrictChecking=*/true); |
314 | 0 | ROCmSearchDirs.emplace_back(D.SysRoot + "/usr", |
315 | 0 | /*StrictChecking=*/true); |
316 | |
|
317 | 0 | DoPrintROCmSearchDirs(); |
318 | 0 | return ROCmSearchDirs; |
319 | 0 | } |
320 | | |
321 | | RocmInstallationDetector::RocmInstallationDetector( |
322 | | const Driver &D, const llvm::Triple &HostTriple, |
323 | | const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) |
324 | 0 | : D(D) { |
325 | 0 | Verbose = Args.hasArg(options::OPT_v); |
326 | 0 | RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ); |
327 | 0 | PrintROCmSearchDirs = |
328 | 0 | Args.hasArg(clang::driver::options::OPT_print_rocm_search_dirs); |
329 | 0 | RocmDeviceLibPathArg = |
330 | 0 | Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ); |
331 | 0 | HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ); |
332 | 0 | HIPStdParPathArg = |
333 | 0 | Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_path_EQ); |
334 | 0 | HasHIPStdParLibrary = |
335 | 0 | !HIPStdParPathArg.empty() && D.getVFS().exists(HIPStdParPathArg + |
336 | 0 | "/hipstdpar_lib.hpp"); |
337 | 0 | HIPRocThrustPathArg = |
338 | 0 | Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_thrust_path_EQ); |
339 | 0 | HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && |
340 | 0 | D.getVFS().exists(HIPRocThrustPathArg + "/thrust"); |
341 | 0 | HIPRocPrimPathArg = |
342 | 0 | Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_prim_path_EQ); |
343 | 0 | HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && |
344 | 0 | D.getVFS().exists(HIPRocPrimPathArg + "/rocprim"); |
345 | |
|
346 | 0 | if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { |
347 | 0 | HIPVersionArg = A->getValue(); |
348 | 0 | unsigned Major = ~0U; |
349 | 0 | unsigned Minor = ~0U; |
350 | 0 | SmallVector<StringRef, 3> Parts; |
351 | 0 | HIPVersionArg.split(Parts, '.'); |
352 | 0 | if (Parts.size()) |
353 | 0 | Parts[0].getAsInteger(0, Major); |
354 | 0 | if (Parts.size() > 1) |
355 | 0 | Parts[1].getAsInteger(0, Minor); |
356 | 0 | if (Parts.size() > 2) |
357 | 0 | VersionPatch = Parts[2].str(); |
358 | 0 | if (VersionPatch.empty()) |
359 | 0 | VersionPatch = "0"; |
360 | 0 | if (Major != ~0U && Minor == ~0U) |
361 | 0 | Minor = 0; |
362 | 0 | if (Major == ~0U || Minor == ~0U) |
363 | 0 | D.Diag(diag::err_drv_invalid_value) |
364 | 0 | << A->getAsString(Args) << HIPVersionArg; |
365 | |
|
366 | 0 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
367 | 0 | DetectedVersion = |
368 | 0 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
369 | 0 | } else { |
370 | 0 | VersionPatch = DefaultVersionPatch; |
371 | 0 | VersionMajorMinor = |
372 | 0 | llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor); |
373 | 0 | DetectedVersion = (Twine(DefaultVersionMajor) + "." + |
374 | 0 | Twine(DefaultVersionMinor) + "." + VersionPatch) |
375 | 0 | .str(); |
376 | 0 | } |
377 | |
|
378 | 0 | if (DetectHIPRuntime) |
379 | 0 | detectHIPRuntime(); |
380 | 0 | if (DetectDeviceLib) |
381 | 0 | detectDeviceLibrary(); |
382 | 0 | } |
383 | | |
384 | 0 | void RocmInstallationDetector::detectDeviceLibrary() { |
385 | 0 | assert(LibDevicePath.empty()); |
386 | | |
387 | 0 | if (!RocmDeviceLibPathArg.empty()) |
388 | 0 | LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1]; |
389 | 0 | else if (std::optional<std::string> LibPathEnv = |
390 | 0 | llvm::sys::Process::GetEnv("HIP_DEVICE_LIB_PATH")) |
391 | 0 | LibDevicePath = std::move(*LibPathEnv); |
392 | |
|
393 | 0 | auto &FS = D.getVFS(); |
394 | 0 | if (!LibDevicePath.empty()) { |
395 | | // Maintain compatability with HIP flag/envvar pointing directly at the |
396 | | // bitcode library directory. This points directly at the library path instead |
397 | | // of the rocm root installation. |
398 | 0 | if (!FS.exists(LibDevicePath)) |
399 | 0 | return; |
400 | | |
401 | 0 | scanLibDevicePath(LibDevicePath); |
402 | 0 | HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty(); |
403 | 0 | return; |
404 | 0 | } |
405 | | |
406 | | // Check device library exists at the given path. |
407 | 0 | auto CheckDeviceLib = [&](StringRef Path, bool StrictChecking) { |
408 | 0 | bool CheckLibDevice = (!NoBuiltinLibs || StrictChecking); |
409 | 0 | if (CheckLibDevice && !FS.exists(Path)) |
410 | 0 | return false; |
411 | | |
412 | 0 | scanLibDevicePath(Path); |
413 | |
|
414 | 0 | if (!NoBuiltinLibs) { |
415 | | // Check that the required non-target libraries are all available. |
416 | 0 | if (!allGenericLibsValid()) |
417 | 0 | return false; |
418 | | |
419 | | // Check that we have found at least one libdevice that we can link in |
420 | | // if -nobuiltinlib hasn't been specified. |
421 | 0 | if (LibDeviceMap.empty()) |
422 | 0 | return false; |
423 | 0 | } |
424 | 0 | return true; |
425 | 0 | }; |
426 | | |
427 | | // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/amdgcn/bitcode |
428 | 0 | LibDevicePath = D.ResourceDir; |
429 | 0 | llvm::sys::path::append(LibDevicePath, CLANG_INSTALL_LIBDIR_BASENAME, |
430 | 0 | "amdgcn", "bitcode"); |
431 | 0 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, true); |
432 | 0 | if (HasDeviceLibrary) |
433 | 0 | return; |
434 | | |
435 | | // Find device libraries in a legacy ROCm directory structure |
436 | | // ${ROCM_ROOT}/amdgcn/bitcode/* |
437 | 0 | auto &ROCmDirs = getInstallationPathCandidates(); |
438 | 0 | for (const auto &Candidate : ROCmDirs) { |
439 | 0 | LibDevicePath = Candidate.Path; |
440 | 0 | llvm::sys::path::append(LibDevicePath, "amdgcn", "bitcode"); |
441 | 0 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, Candidate.StrictChecking); |
442 | 0 | if (HasDeviceLibrary) |
443 | 0 | return; |
444 | 0 | } |
445 | 0 | } |
446 | | |
447 | 0 | void RocmInstallationDetector::detectHIPRuntime() { |
448 | 0 | SmallVector<Candidate, 4> HIPSearchDirs; |
449 | 0 | if (!HIPPathArg.empty()) |
450 | 0 | HIPSearchDirs.emplace_back(HIPPathArg.str()); |
451 | 0 | else if (std::optional<std::string> HIPPathEnv = |
452 | 0 | llvm::sys::Process::GetEnv("HIP_PATH")) { |
453 | 0 | if (!HIPPathEnv->empty()) |
454 | 0 | HIPSearchDirs.emplace_back(std::move(*HIPPathEnv)); |
455 | 0 | } |
456 | 0 | if (HIPSearchDirs.empty()) |
457 | 0 | HIPSearchDirs.append(getInstallationPathCandidates()); |
458 | 0 | auto &FS = D.getVFS(); |
459 | |
|
460 | 0 | for (const auto &Candidate : HIPSearchDirs) { |
461 | 0 | InstallPath = Candidate.Path; |
462 | 0 | if (InstallPath.empty() || !FS.exists(InstallPath)) |
463 | 0 | continue; |
464 | | // HIP runtime built by SPACK is installed to |
465 | | // <rocm_root>/hip-<rocm_release_string>-<hash> directory. |
466 | 0 | auto SPACKPath = findSPACKPackage(Candidate, "hip"); |
467 | 0 | InstallPath = SPACKPath.empty() ? InstallPath : SPACKPath; |
468 | |
|
469 | 0 | BinPath = InstallPath; |
470 | 0 | llvm::sys::path::append(BinPath, "bin"); |
471 | 0 | IncludePath = InstallPath; |
472 | 0 | llvm::sys::path::append(IncludePath, "include"); |
473 | 0 | LibPath = InstallPath; |
474 | 0 | llvm::sys::path::append(LibPath, "lib"); |
475 | 0 | SharePath = InstallPath; |
476 | 0 | llvm::sys::path::append(SharePath, "share"); |
477 | | |
478 | | // Get parent of InstallPath and append "share" |
479 | 0 | SmallString<0> ParentSharePath = llvm::sys::path::parent_path(InstallPath); |
480 | 0 | llvm::sys::path::append(ParentSharePath, "share"); |
481 | |
|
482 | 0 | auto Append = [](SmallString<0> &path, const Twine &a, const Twine &b = "", |
483 | 0 | const Twine &c = "", const Twine &d = "") { |
484 | 0 | SmallString<0> newpath = path; |
485 | 0 | llvm::sys::path::append(newpath, a, b, c, d); |
486 | 0 | return newpath; |
487 | 0 | }; |
488 | | // If HIP version file can be found and parsed, use HIP version from there. |
489 | 0 | for (const auto &VersionFilePath : |
490 | 0 | {Append(SharePath, "hip", "version"), |
491 | 0 | Append(ParentSharePath, "hip", "version"), |
492 | 0 | Append(BinPath, ".hipVersion")}) { |
493 | 0 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = |
494 | 0 | FS.getBufferForFile(VersionFilePath); |
495 | 0 | if (!VersionFile) |
496 | 0 | continue; |
497 | 0 | if (HIPVersionArg.empty() && VersionFile) |
498 | 0 | if (parseHIPVersionFile((*VersionFile)->getBuffer())) |
499 | 0 | continue; |
500 | | |
501 | 0 | HasHIPRuntime = true; |
502 | 0 | return; |
503 | 0 | } |
504 | | // Otherwise, if -rocm-path is specified (no strict checking), use the |
505 | | // default HIP version or specified by --hip-version. |
506 | 0 | if (!Candidate.StrictChecking) { |
507 | 0 | HasHIPRuntime = true; |
508 | 0 | return; |
509 | 0 | } |
510 | 0 | } |
511 | 0 | HasHIPRuntime = false; |
512 | 0 | } |
513 | | |
514 | 0 | void RocmInstallationDetector::print(raw_ostream &OS) const { |
515 | 0 | if (hasHIPRuntime()) |
516 | 0 | OS << "Found HIP installation: " << InstallPath << ", version " |
517 | 0 | << DetectedVersion << '\n'; |
518 | 0 | } |
519 | | |
520 | | void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, |
521 | 0 | ArgStringList &CC1Args) const { |
522 | 0 | bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) && |
523 | 0 | !DriverArgs.hasArg(options::OPT_nohipwrapperinc); |
524 | 0 | bool HasHipStdPar = DriverArgs.hasArg(options::OPT_hipstdpar); |
525 | |
|
526 | 0 | if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { |
527 | | // HIP header includes standard library wrapper headers under clang |
528 | | // cuda_wrappers directory. Since these wrapper headers include_next |
529 | | // standard C++ headers, whereas libc++ headers include_next other clang |
530 | | // headers. The include paths have to follow this order: |
531 | | // - wrapper include path |
532 | | // - standard C++ include path |
533 | | // - other clang include path |
534 | | // Since standard C++ and other clang include paths are added in other |
535 | | // places after this function, here we only need to make sure wrapper |
536 | | // include path is added. |
537 | | // |
538 | | // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs |
539 | | // a workaround. |
540 | 0 | SmallString<128> P(D.ResourceDir); |
541 | 0 | if (UsesRuntimeWrapper) |
542 | 0 | llvm::sys::path::append(P, "include", "cuda_wrappers"); |
543 | 0 | CC1Args.push_back("-internal-isystem"); |
544 | 0 | CC1Args.push_back(DriverArgs.MakeArgString(P)); |
545 | 0 | } |
546 | |
|
547 | 0 | const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() { |
548 | 0 | if (!hasHIPStdParLibrary()) { |
549 | 0 | D.Diag(diag::err_drv_no_hipstdpar_lib); |
550 | 0 | return; |
551 | 0 | } |
552 | 0 | if (!HasRocThrustLibrary && |
553 | 0 | !D.getVFS().exists(getIncludePath() + "/thrust")) { |
554 | 0 | D.Diag(diag::err_drv_no_hipstdpar_thrust_lib); |
555 | 0 | return; |
556 | 0 | } |
557 | 0 | if (!HasRocPrimLibrary && |
558 | 0 | !D.getVFS().exists(getIncludePath() + "/rocprim")) { |
559 | 0 | D.Diag(diag::err_drv_no_hipstdpar_prim_lib); |
560 | 0 | return; |
561 | 0 | } |
562 | | |
563 | 0 | const char *ThrustPath; |
564 | 0 | if (HasRocThrustLibrary) |
565 | 0 | ThrustPath = DriverArgs.MakeArgString(HIPRocThrustPathArg); |
566 | 0 | else |
567 | 0 | ThrustPath = DriverArgs.MakeArgString(getIncludePath() + "/thrust"); |
568 | |
|
569 | 0 | const char *PrimPath; |
570 | 0 | if (HasRocPrimLibrary) |
571 | 0 | PrimPath = DriverArgs.MakeArgString(HIPRocPrimPathArg); |
572 | 0 | else |
573 | 0 | PrimPath = DriverArgs.MakeArgString(getIncludePath() + "/rocprim"); |
574 | |
|
575 | 0 | CC1Args.append({"-idirafter", ThrustPath, "-idirafter", PrimPath, |
576 | 0 | "-idirafter", DriverArgs.MakeArgString(HIPStdParPathArg), |
577 | 0 | "-include", "hipstdpar_lib.hpp"}); |
578 | 0 | }; |
579 | |
|
580 | 0 | if (DriverArgs.hasArg(options::OPT_nogpuinc)) { |
581 | 0 | if (HasHipStdPar) |
582 | 0 | HandleHipStdPar(); |
583 | |
|
584 | 0 | return; |
585 | 0 | } |
586 | | |
587 | 0 | if (!hasHIPRuntime()) { |
588 | 0 | D.Diag(diag::err_drv_no_hip_runtime); |
589 | 0 | return; |
590 | 0 | } |
591 | | |
592 | 0 | CC1Args.push_back("-idirafter"); |
593 | 0 | CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath())); |
594 | 0 | if (UsesRuntimeWrapper) |
595 | 0 | CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"}); |
596 | 0 | if (HasHipStdPar) |
597 | 0 | HandleHipStdPar(); |
598 | 0 | } |
599 | | |
600 | | void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
601 | | const InputInfo &Output, |
602 | | const InputInfoList &Inputs, |
603 | | const ArgList &Args, |
604 | 0 | const char *LinkingOutput) const { |
605 | |
|
606 | 0 | std::string Linker = getToolChain().GetProgramPath(getShortName()); |
607 | 0 | ArgStringList CmdArgs; |
608 | 0 | CmdArgs.push_back("--no-undefined"); |
609 | 0 | CmdArgs.push_back("-shared"); |
610 | |
|
611 | 0 | addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs); |
612 | 0 | Args.AddAllArgs(CmdArgs, options::OPT_L); |
613 | 0 | AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); |
614 | 0 | if (C.getDriver().isUsingLTO()) |
615 | 0 | addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], |
616 | 0 | C.getDriver().getLTOMode() == LTOK_Thin); |
617 | 0 | else if (Args.hasArg(options::OPT_mcpu_EQ)) |
618 | 0 | CmdArgs.push_back(Args.MakeArgString( |
619 | 0 | "-plugin-opt=mcpu=" + Args.getLastArgValue(options::OPT_mcpu_EQ))); |
620 | 0 | CmdArgs.push_back("-o"); |
621 | 0 | CmdArgs.push_back(Output.getFilename()); |
622 | 0 | C.addCommand(std::make_unique<Command>( |
623 | 0 | JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), |
624 | 0 | CmdArgs, Inputs, Output)); |
625 | 0 | } |
626 | | |
627 | | void amdgpu::getAMDGPUTargetFeatures(const Driver &D, |
628 | | const llvm::Triple &Triple, |
629 | | const llvm::opt::ArgList &Args, |
630 | 0 | std::vector<StringRef> &Features) { |
631 | | // Add target ID features to -target-feature options. No diagnostics should |
632 | | // be emitted here since invalid target ID is diagnosed at other places. |
633 | 0 | StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); |
634 | 0 | if (!TargetID.empty()) { |
635 | 0 | llvm::StringMap<bool> FeatureMap; |
636 | 0 | auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap); |
637 | 0 | if (OptionalGpuArch) { |
638 | 0 | StringRef GpuArch = *OptionalGpuArch; |
639 | | // Iterate through all possible target ID features for the given GPU. |
640 | | // If it is mapped to true, add +feature. |
641 | | // If it is mapped to false, add -feature. |
642 | | // If it is not in the map (default), do not add it |
643 | 0 | for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) { |
644 | 0 | auto Pos = FeatureMap.find(Feature); |
645 | 0 | if (Pos == FeatureMap.end()) |
646 | 0 | continue; |
647 | 0 | Features.push_back(Args.MakeArgStringRef( |
648 | 0 | (Twine(Pos->second ? "+" : "-") + Feature).str())); |
649 | 0 | } |
650 | 0 | } |
651 | 0 | } |
652 | |
|
653 | 0 | if (Args.hasFlag(options::OPT_mwavefrontsize64, |
654 | 0 | options::OPT_mno_wavefrontsize64, false)) |
655 | 0 | Features.push_back("+wavefrontsize64"); |
656 | |
|
657 | 0 | handleTargetFeaturesGroup(D, Triple, Args, Features, |
658 | 0 | options::OPT_m_amdgpu_Features_Group); |
659 | 0 | } |
660 | | |
661 | | /// AMDGPU Toolchain |
662 | | AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, |
663 | | const ArgList &Args) |
664 | | : Generic_ELF(D, Triple, Args), |
665 | | OptionsDefault( |
666 | 0 | {{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) { |
667 | | // Check code object version options. Emit warnings for legacy options |
668 | | // and errors for the last invalid code object version options. |
669 | | // It is done here to avoid repeated warning or error messages for |
670 | | // each tool invocation. |
671 | 0 | checkAMDGPUCodeObjectVersion(D, Args); |
672 | 0 | } |
673 | | |
674 | 0 | Tool *AMDGPUToolChain::buildLinker() const { |
675 | 0 | return new tools::amdgpu::Linker(*this); |
676 | 0 | } |
677 | | |
678 | | DerivedArgList * |
679 | | AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, |
680 | 0 | Action::OffloadKind DeviceOffloadKind) const { |
681 | |
|
682 | 0 | DerivedArgList *DAL = |
683 | 0 | Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
684 | |
|
685 | 0 | const OptTable &Opts = getDriver().getOpts(); |
686 | |
|
687 | 0 | if (!DAL) |
688 | 0 | DAL = new DerivedArgList(Args.getBaseArgs()); |
689 | |
|
690 | 0 | for (Arg *A : Args) |
691 | 0 | DAL->append(A); |
692 | | |
693 | | // Replace -mcpu=native with detected GPU. |
694 | 0 | Arg *LastMCPUArg = DAL->getLastArg(options::OPT_mcpu_EQ); |
695 | 0 | if (LastMCPUArg && StringRef(LastMCPUArg->getValue()) == "native") { |
696 | 0 | DAL->eraseArg(options::OPT_mcpu_EQ); |
697 | 0 | auto GPUsOrErr = getSystemGPUArchs(Args); |
698 | 0 | if (!GPUsOrErr) { |
699 | 0 | getDriver().Diag(diag::err_drv_undetermined_gpu_arch) |
700 | 0 | << llvm::Triple::getArchTypeName(getArch()) |
701 | 0 | << llvm::toString(GPUsOrErr.takeError()) << "-mcpu"; |
702 | 0 | } else { |
703 | 0 | auto &GPUs = *GPUsOrErr; |
704 | 0 | if (GPUs.size() > 1) { |
705 | 0 | getDriver().Diag(diag::warn_drv_multi_gpu_arch) |
706 | 0 | << llvm::Triple::getArchTypeName(getArch()) |
707 | 0 | << llvm::join(GPUs, ", ") << "-mcpu"; |
708 | 0 | } |
709 | 0 | DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), |
710 | 0 | Args.MakeArgString(GPUs.front())); |
711 | 0 | } |
712 | 0 | } |
713 | |
|
714 | 0 | checkTargetID(*DAL); |
715 | |
|
716 | 0 | if (!Args.getLastArgValue(options::OPT_x).equals("cl")) |
717 | 0 | return DAL; |
718 | | |
719 | | // Phase 1 (.cl -> .bc) |
720 | 0 | if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) { |
721 | 0 | DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit() |
722 | 0 | ? options::OPT_m64 |
723 | 0 | : options::OPT_m32)); |
724 | | |
725 | | // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately |
726 | | // as they defined that way in Options.td |
727 | 0 | if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4, |
728 | 0 | options::OPT_Ofast)) |
729 | 0 | DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O), |
730 | 0 | getOptionDefault(options::OPT_O)); |
731 | 0 | } |
732 | |
|
733 | 0 | return DAL; |
734 | 0 | } |
735 | | |
736 | | bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( |
737 | 0 | llvm::AMDGPU::GPUKind Kind) { |
738 | | |
739 | | // Assume nothing without a specific target. |
740 | 0 | if (Kind == llvm::AMDGPU::GK_NONE) |
741 | 0 | return false; |
742 | | |
743 | 0 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); |
744 | | |
745 | | // Default to enabling f32 denormals by default on subtargets where fma is |
746 | | // fast with denormals |
747 | 0 | const bool BothDenormAndFMAFast = |
748 | 0 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && |
749 | 0 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); |
750 | 0 | return !BothDenormAndFMAFast; |
751 | 0 | } |
752 | | |
753 | | llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( |
754 | | const llvm::opt::ArgList &DriverArgs, const JobAction &JA, |
755 | 0 | const llvm::fltSemantics *FPType) const { |
756 | | // Denormals should always be enabled for f16 and f64. |
757 | 0 | if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) |
758 | 0 | return llvm::DenormalMode::getIEEE(); |
759 | | |
760 | 0 | if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || |
761 | 0 | JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { |
762 | 0 | auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch()); |
763 | 0 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch); |
764 | 0 | if (FPType && FPType == &llvm::APFloat::IEEEsingle() && |
765 | 0 | DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, |
766 | 0 | options::OPT_fno_gpu_flush_denormals_to_zero, |
767 | 0 | getDefaultDenormsAreZeroForTarget(Kind))) |
768 | 0 | return llvm::DenormalMode::getPreserveSign(); |
769 | | |
770 | 0 | return llvm::DenormalMode::getIEEE(); |
771 | 0 | } |
772 | | |
773 | 0 | const StringRef GpuArch = getGPUArch(DriverArgs); |
774 | 0 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); |
775 | | |
776 | | // TODO: There are way too many flags that change this. Do we need to check |
777 | | // them all? |
778 | 0 | bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || |
779 | 0 | getDefaultDenormsAreZeroForTarget(Kind); |
780 | | |
781 | | // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are |
782 | | // also implicit treated as zero (DAZ). |
783 | 0 | return DAZ ? llvm::DenormalMode::getPreserveSign() : |
784 | 0 | llvm::DenormalMode::getIEEE(); |
785 | 0 | } |
786 | | |
787 | | bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, |
788 | 0 | llvm::AMDGPU::GPUKind Kind) { |
789 | 0 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); |
790 | 0 | bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); |
791 | |
|
792 | 0 | return !HasWave32 || DriverArgs.hasFlag( |
793 | 0 | options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false); |
794 | 0 | } |
795 | | |
796 | | |
797 | | /// ROCM Toolchain |
798 | | ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, |
799 | | const ArgList &Args) |
800 | 0 | : AMDGPUToolChain(D, Triple, Args) { |
801 | 0 | RocmInstallation->detectDeviceLibrary(); |
802 | 0 | } |
803 | | |
804 | | void AMDGPUToolChain::addClangTargetOptions( |
805 | | const llvm::opt::ArgList &DriverArgs, |
806 | | llvm::opt::ArgStringList &CC1Args, |
807 | 0 | Action::OffloadKind DeviceOffloadingKind) const { |
808 | | // Default to "hidden" visibility, as object level linking will not be |
809 | | // supported for the foreseeable future. |
810 | 0 | if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, |
811 | 0 | options::OPT_fvisibility_ms_compat)) { |
812 | 0 | CC1Args.push_back("-fvisibility=hidden"); |
813 | 0 | CC1Args.push_back("-fapply-global-visibility-to-externs"); |
814 | 0 | } |
815 | 0 | } |
816 | | |
817 | | StringRef |
818 | 0 | AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { |
819 | 0 | return getProcessorFromTargetID( |
820 | 0 | getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ)); |
821 | 0 | } |
822 | | |
823 | | AMDGPUToolChain::ParsedTargetIDType |
824 | 0 | AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const { |
825 | 0 | StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); |
826 | 0 | if (TargetID.empty()) |
827 | 0 | return {std::nullopt, std::nullopt, std::nullopt}; |
828 | | |
829 | 0 | llvm::StringMap<bool> FeatureMap; |
830 | 0 | auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap); |
831 | 0 | if (!OptionalGpuArch) |
832 | 0 | return {TargetID.str(), std::nullopt, std::nullopt}; |
833 | | |
834 | 0 | return {TargetID.str(), OptionalGpuArch->str(), FeatureMap}; |
835 | 0 | } |
836 | | |
837 | | void AMDGPUToolChain::checkTargetID( |
838 | 0 | const llvm::opt::ArgList &DriverArgs) const { |
839 | 0 | auto PTID = getParsedTargetID(DriverArgs); |
840 | 0 | if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) { |
841 | 0 | getDriver().Diag(clang::diag::err_drv_bad_target_id) |
842 | 0 | << *PTID.OptionalTargetID; |
843 | 0 | } |
844 | 0 | } |
845 | | |
846 | | Expected<SmallVector<std::string>> |
847 | 0 | AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { |
848 | | // Detect AMD GPUs availible on the system. |
849 | 0 | std::string Program; |
850 | 0 | if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) |
851 | 0 | Program = A->getValue(); |
852 | 0 | else |
853 | 0 | Program = GetProgramPath("amdgpu-arch"); |
854 | |
|
855 | 0 | auto StdoutOrErr = executeToolChainProgram(Program); |
856 | 0 | if (!StdoutOrErr) |
857 | 0 | return StdoutOrErr.takeError(); |
858 | | |
859 | 0 | SmallVector<std::string, 1> GPUArchs; |
860 | 0 | for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n")) |
861 | 0 | if (!Arch.empty()) |
862 | 0 | GPUArchs.push_back(Arch.str()); |
863 | |
|
864 | 0 | if (GPUArchs.empty()) |
865 | 0 | return llvm::createStringError(std::error_code(), |
866 | 0 | "No AMD GPU detected in the system"); |
867 | | |
868 | 0 | return std::move(GPUArchs); |
869 | 0 | } |
870 | | |
871 | | void ROCMToolChain::addClangTargetOptions( |
872 | | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, |
873 | 0 | Action::OffloadKind DeviceOffloadingKind) const { |
874 | 0 | AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, |
875 | 0 | DeviceOffloadingKind); |
876 | | |
877 | | // For the OpenCL case where there is no offload target, accept -nostdlib to |
878 | | // disable bitcode linking. |
879 | 0 | if (DeviceOffloadingKind == Action::OFK_None && |
880 | 0 | DriverArgs.hasArg(options::OPT_nostdlib)) |
881 | 0 | return; |
882 | | |
883 | 0 | if (DriverArgs.hasArg(options::OPT_nogpulib)) |
884 | 0 | return; |
885 | | |
886 | | // Get the device name and canonicalize it |
887 | 0 | const StringRef GpuArch = getGPUArch(DriverArgs); |
888 | 0 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); |
889 | 0 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); |
890 | 0 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(CanonArch); |
891 | 0 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
892 | 0 | getAMDGPUCodeObjectVersion(getDriver(), DriverArgs)); |
893 | 0 | if (!RocmInstallation->checkCommonBitcodeLibs(CanonArch, LibDeviceFile, |
894 | 0 | ABIVer)) |
895 | 0 | return; |
896 | | |
897 | 0 | bool Wave64 = isWave64(DriverArgs, Kind); |
898 | | |
899 | | // TODO: There are way too many flags that change this. Do we need to check |
900 | | // them all? |
901 | 0 | bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || |
902 | 0 | getDefaultDenormsAreZeroForTarget(Kind); |
903 | 0 | bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only); |
904 | |
|
905 | 0 | bool UnsafeMathOpt = |
906 | 0 | DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations); |
907 | 0 | bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math); |
908 | 0 | bool CorrectSqrt = |
909 | 0 | DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt); |
910 | | |
911 | | // Add the OpenCL specific bitcode library. |
912 | 0 | llvm::SmallVector<std::string, 12> BCLibs; |
913 | 0 | BCLibs.push_back(RocmInstallation->getOpenCLPath().str()); |
914 | | |
915 | | // Add the generic set of libraries. |
916 | 0 | BCLibs.append(RocmInstallation->getCommonBitcodeLibs( |
917 | 0 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
918 | 0 | FastRelaxedMath, CorrectSqrt, ABIVer, false)); |
919 | |
|
920 | 0 | for (StringRef BCFile : BCLibs) { |
921 | 0 | CC1Args.push_back("-mlink-builtin-bitcode"); |
922 | 0 | CC1Args.push_back(DriverArgs.MakeArgString(BCFile)); |
923 | 0 | } |
924 | 0 | } |
925 | | |
926 | | bool RocmInstallationDetector::checkCommonBitcodeLibs( |
927 | | StringRef GPUArch, StringRef LibDeviceFile, |
928 | 0 | DeviceLibABIVersion ABIVer) const { |
929 | 0 | if (!hasDeviceLibrary()) { |
930 | 0 | D.Diag(diag::err_drv_no_rocm_device_lib) << 0; |
931 | 0 | return false; |
932 | 0 | } |
933 | 0 | if (LibDeviceFile.empty()) { |
934 | 0 | D.Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch; |
935 | 0 | return false; |
936 | 0 | } |
937 | 0 | if (ABIVer.requiresLibrary() && getABIVersionPath(ABIVer).empty()) { |
938 | 0 | D.Diag(diag::err_drv_no_rocm_device_lib) << 2 << ABIVer.toString(); |
939 | 0 | return false; |
940 | 0 | } |
941 | 0 | return true; |
942 | 0 | } |
943 | | |
944 | | llvm::SmallVector<std::string, 12> |
945 | | RocmInstallationDetector::getCommonBitcodeLibs( |
946 | | const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64, |
947 | | bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, |
948 | 0 | bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool isOpenMP = false) const { |
949 | 0 | llvm::SmallVector<std::string, 12> BCLibs; |
950 | |
|
951 | 0 | auto AddBCLib = [&](StringRef BCFile) { BCLibs.push_back(BCFile.str()); }; |
952 | |
|
953 | 0 | AddBCLib(getOCMLPath()); |
954 | 0 | if (!isOpenMP) |
955 | 0 | AddBCLib(getOCKLPath()); |
956 | 0 | AddBCLib(getDenormalsAreZeroPath(DAZ)); |
957 | 0 | AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)); |
958 | 0 | AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)); |
959 | 0 | AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt)); |
960 | 0 | AddBCLib(getWavefrontSize64Path(Wave64)); |
961 | 0 | AddBCLib(LibDeviceFile); |
962 | 0 | auto ABIVerPath = getABIVersionPath(ABIVer); |
963 | 0 | if (!ABIVerPath.empty()) |
964 | 0 | AddBCLib(ABIVerPath); |
965 | |
|
966 | 0 | return BCLibs; |
967 | 0 | } |
968 | | |
969 | | llvm::SmallVector<std::string, 12> |
970 | | ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, |
971 | | const std::string &GPUArch, |
972 | 0 | bool isOpenMP) const { |
973 | 0 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch); |
974 | 0 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); |
975 | |
|
976 | 0 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(CanonArch); |
977 | 0 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
978 | 0 | getAMDGPUCodeObjectVersion(getDriver(), DriverArgs)); |
979 | 0 | if (!RocmInstallation->checkCommonBitcodeLibs(CanonArch, LibDeviceFile, |
980 | 0 | ABIVer)) |
981 | 0 | return {}; |
982 | | |
983 | | // If --hip-device-lib is not set, add the default bitcode libraries. |
984 | | // TODO: There are way too many flags that change this. Do we need to check |
985 | | // them all? |
986 | 0 | bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, |
987 | 0 | options::OPT_fno_gpu_flush_denormals_to_zero, |
988 | 0 | getDefaultDenormsAreZeroForTarget(Kind)); |
989 | 0 | bool FiniteOnly = DriverArgs.hasFlag( |
990 | 0 | options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false); |
991 | 0 | bool UnsafeMathOpt = |
992 | 0 | DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, |
993 | 0 | options::OPT_fno_unsafe_math_optimizations, false); |
994 | 0 | bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math, |
995 | 0 | options::OPT_fno_fast_math, false); |
996 | 0 | bool CorrectSqrt = DriverArgs.hasFlag( |
997 | 0 | options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, |
998 | 0 | options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true); |
999 | 0 | bool Wave64 = isWave64(DriverArgs, Kind); |
1000 | |
|
1001 | 0 | return RocmInstallation->getCommonBitcodeLibs( |
1002 | 0 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
1003 | 0 | FastRelaxedMath, CorrectSqrt, ABIVer, isOpenMP); |
1004 | 0 | } |