xref: /freebsd/contrib/llvm-project/clang/lib/Driver/ToolChains/HIPAMD.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--- HIPAMD.cpp - HIP Tool and ToolChain Implementations ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HIPAMD.h"
10 #include "AMDGPU.h"
11 #include "HIPUtility.h"
12 #include "SPIRV.h"
13 #include "clang/Basic/Cuda.h"
14 #include "clang/Driver/CommonArgs.h"
15 #include "clang/Driver/Compilation.h"
16 #include "clang/Driver/Driver.h"
17 #include "clang/Driver/InputInfo.h"
18 #include "clang/Driver/Options.h"
19 #include "clang/Driver/SanitizerArgs.h"
20 #include "llvm/Support/FileSystem.h"
21 #include "llvm/Support/Path.h"
22 #include "llvm/TargetParser/TargetParser.h"
23 
24 using namespace clang::driver;
25 using namespace clang::driver::toolchains;
26 using namespace clang::driver::tools;
27 using namespace clang;
28 using namespace llvm::opt;
29 
30 #if defined(_WIN32) || defined(_WIN64)
31 #define NULL_FILE "nul"
32 #else
33 #define NULL_FILE "/dev/null"
34 #endif
35 
constructLlvmLinkCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const InputInfo & Output,const llvm::opt::ArgList & Args) const36 void AMDGCN::Linker::constructLlvmLinkCommand(Compilation &C,
37                                          const JobAction &JA,
38                                          const InputInfoList &Inputs,
39                                          const InputInfo &Output,
40                                          const llvm::opt::ArgList &Args) const {
41   // Construct llvm-link command.
42   // The output from llvm-link is a bitcode file.
43   ArgStringList LlvmLinkArgs;
44 
45   assert(!Inputs.empty() && "Must have at least one input.");
46 
47   LlvmLinkArgs.append({"-o", Output.getFilename()});
48   for (auto Input : Inputs)
49     LlvmLinkArgs.push_back(Input.getFilename());
50 
51   // Look for archive of bundled bitcode in arguments, and add temporary files
52   // for the extracted archive of bitcode to inputs.
53   auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
54   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LlvmLinkArgs, "amdgcn",
55                              TargetID, /*IsBitCodeSDL=*/true);
56 
57   const char *LlvmLink =
58     Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
59   C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
60                                          LlvmLink, LlvmLinkArgs, Inputs,
61                                          Output));
62 }
63 
constructLldCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const InputInfo & Output,const llvm::opt::ArgList & Args) const64 void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
65                                          const InputInfoList &Inputs,
66                                          const InputInfo &Output,
67                                          const llvm::opt::ArgList &Args) const {
68   // Construct lld command.
69   // The output from ld.lld is an HSA code object file.
70   ArgStringList LldArgs{"-flavor",
71                         "gnu",
72                         "-m",
73                         "elf64_amdgpu",
74                         "--no-undefined",
75                         "-shared",
76                         "-plugin-opt=-amdgpu-internalize-symbols"};
77   if (Args.hasArg(options::OPT_hipstdpar))
78     LldArgs.push_back("-plugin-opt=-amdgpu-enable-hipstdpar");
79 
80   auto &TC = getToolChain();
81   auto &D = TC.getDriver();
82   bool IsThinLTO = D.getOffloadLTOMode() == LTOK_Thin;
83   addLTOOptions(TC, Args, LldArgs, Output, Inputs, IsThinLTO);
84 
85   // Extract all the -m options
86   std::vector<llvm::StringRef> Features;
87   amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features);
88 
89   // Add features to mattr such as cumode
90   std::string MAttrString = "-plugin-opt=-mattr=";
91   for (auto OneFeature : unifyTargetFeatures(Features)) {
92     MAttrString.append(Args.MakeArgString(OneFeature));
93     if (OneFeature != Features.back())
94       MAttrString.append(",");
95   }
96   if (!Features.empty())
97     LldArgs.push_back(Args.MakeArgString(MAttrString));
98 
99   // ToDo: Remove this option after AMDGPU backend supports ISA-level linking.
100   // Since AMDGPU backend currently does not support ISA-level linking, all
101   // called functions need to be imported.
102   if (IsThinLTO) {
103     LldArgs.push_back(Args.MakeArgString("-plugin-opt=-force-import-all"));
104     LldArgs.push_back(Args.MakeArgString("-plugin-opt=-avail-extern-to-local"));
105     LldArgs.push_back(Args.MakeArgString(
106         "-plugin-opt=-avail-extern-gv-in-addrspace-to-local=3"));
107   }
108 
109   for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
110     LldArgs.push_back(
111         Args.MakeArgString(Twine("-plugin-opt=") + A->getValue(0)));
112   }
113 
114   if (C.getDriver().isSaveTempsEnabled())
115     LldArgs.push_back("-save-temps");
116 
117   addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
118 
119   // Given that host and device linking happen in separate processes, the device
120   // linker doesn't always have the visibility as to which device symbols are
121   // needed by a program, especially for the device symbol dependencies that are
122   // introduced through the host symbol resolution.
123   // For example: host_A() (A.obj) --> host_B(B.obj) --> device_kernel_B()
124   // (B.obj) In this case, the device linker doesn't know that A.obj actually
125   // depends on the kernel functions in B.obj.  When linking to static device
126   // library, the device linker may drop some of the device global symbols if
127   // they aren't referenced.  As a workaround, we are adding to the
128   // --whole-archive flag such that all global symbols would be linked in.
129   LldArgs.push_back("--whole-archive");
130 
131   for (auto *Arg : Args.filtered(options::OPT_Xoffload_linker)) {
132     StringRef ArgVal = Arg->getValue(1);
133     auto SplitArg = ArgVal.split("-mllvm=");
134     if (!SplitArg.second.empty()) {
135       LldArgs.push_back(
136           Args.MakeArgString(Twine("-plugin-opt=") + SplitArg.second));
137     } else {
138       LldArgs.push_back(Args.MakeArgString(ArgVal));
139     }
140     Arg->claim();
141   }
142 
143   LldArgs.append({"-o", Output.getFilename()});
144   for (auto Input : Inputs)
145     LldArgs.push_back(Input.getFilename());
146 
147   // Look for archive of bundled bitcode in arguments, and add temporary files
148   // for the extracted archive of bitcode to inputs.
149   auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
150   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LldArgs, "amdgcn",
151                              TargetID, /*IsBitCodeSDL=*/true);
152 
153   LldArgs.push_back("--no-whole-archive");
154 
155   const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
156   C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
157                                          Lld, LldArgs, Inputs, Output));
158 }
159 
160 // For SPIR-V the inputs for the job are device AMDGCN SPIR-V flavoured bitcode
161 // and the output is either a compiled SPIR-V binary or bitcode (-emit-llvm). It
162 // calls llvm-link and then the llvm-spirv translator. Once the SPIR-V BE will
163 // be promoted from experimental, we will switch to using that. TODO: consider
164 // if we want to run any targeted optimisations over IR here, over generic
165 // SPIR-V.
constructLinkAndEmitSpirvCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const InputInfo & Output,const llvm::opt::ArgList & Args) const166 void AMDGCN::Linker::constructLinkAndEmitSpirvCommand(
167     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
168     const InputInfo &Output, const llvm::opt::ArgList &Args) const {
169   assert(!Inputs.empty() && "Must have at least one input.");
170 
171   constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
172 
173   // Linked BC is now in Output
174 
175   // Emit SPIR-V binary.
176   llvm::opt::ArgStringList TrArgs{
177       "--spirv-max-version=1.6",
178       "--spirv-ext=+all",
179       "--spirv-allow-unknown-intrinsics",
180       "--spirv-lower-const-expr",
181       "--spirv-preserve-auxdata",
182       "--spirv-debug-info-version=nonsemantic-shader-200"};
183   SPIRV::constructTranslateCommand(C, *this, JA, Output, Output, TrArgs);
184 }
185 
186 // For amdgcn the inputs of the linker job are device bitcode and output is
187 // either an object file or bitcode (-emit-llvm). It calls llvm-link, opt,
188 // llc, then lld steps.
ConstructJob(Compilation & C,const JobAction & JA,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const char * LinkingOutput) const189 void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
190                                   const InputInfo &Output,
191                                   const InputInfoList &Inputs,
192                                   const ArgList &Args,
193                                   const char *LinkingOutput) const {
194   if (Inputs.size() > 0 &&
195       Inputs[0].getType() == types::TY_Image &&
196       JA.getType() == types::TY_Object)
197     return HIP::constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs,
198                                                          Args, JA, *this);
199 
200   if (JA.getType() == types::TY_HIP_FATBIN)
201     return HIP::constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs,
202                                           Args, *this);
203 
204   if (JA.getType() == types::TY_LLVM_BC)
205     return constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
206 
207   if (getToolChain().getEffectiveTriple().isSPIRV())
208     return constructLinkAndEmitSpirvCommand(C, JA, Inputs, Output, Args);
209 
210   return constructLldCommand(C, JA, Inputs, Output, Args);
211 }
212 
HIPAMDToolChain(const Driver & D,const llvm::Triple & Triple,const ToolChain & HostTC,const ArgList & Args)213 HIPAMDToolChain::HIPAMDToolChain(const Driver &D, const llvm::Triple &Triple,
214                                  const ToolChain &HostTC, const ArgList &Args)
215     : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
216   // Lookup binaries into the driver directory, this is used to
217   // discover the clang-offload-bundler executable.
218   getProgramPaths().push_back(getDriver().Dir);
219   // Diagnose unsupported sanitizer options only once.
220   diagnoseUnsupportedSanitizers(Args);
221 }
222 
addClangTargetOptions(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,Action::OffloadKind DeviceOffloadingKind) const223 void HIPAMDToolChain::addClangTargetOptions(
224     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
225     Action::OffloadKind DeviceOffloadingKind) const {
226   HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
227 
228   assert(DeviceOffloadingKind == Action::OFK_HIP &&
229          "Only HIP offloading kinds are supported for GPUs.");
230 
231   CC1Args.append({"-fcuda-is-device", "-fno-threadsafe-statics"});
232 
233   if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
234                           false)) {
235     CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
236     if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar))
237       CC1Args.append({"-mllvm", "-amdgpu-enable-hipstdpar"});
238   }
239 
240   StringRef MaxThreadsPerBlock =
241       DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
242   if (!MaxThreadsPerBlock.empty()) {
243     std::string ArgStr =
244         (Twine("--gpu-max-threads-per-block=") + MaxThreadsPerBlock).str();
245     CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr));
246   }
247 
248   CC1Args.push_back("-fcuda-allow-variadic-functions");
249 
250   // Default to "hidden" visibility, as object level linking will not be
251   // supported for the foreseeable future.
252   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
253                          options::OPT_fvisibility_ms_compat)) {
254     CC1Args.append({"-fvisibility=hidden"});
255     CC1Args.push_back("-fapply-global-visibility-to-externs");
256   }
257 
258   if (getEffectiveTriple().isSPIRV()) {
259     // For SPIR-V we embed the command-line into the generated binary, in order
260     // to retrieve it at JIT time and be able to do target specific compilation
261     // with options that match the user-supplied ones.
262     if (!DriverArgs.hasArg(options::OPT_fembed_bitcode_marker))
263       CC1Args.push_back("-fembed-bitcode=marker");
264     return; // No DeviceLibs for SPIR-V.
265   }
266 
267   for (auto BCFile : getDeviceLibs(DriverArgs)) {
268     CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
269                                                : "-mlink-bitcode-file");
270     CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
271   }
272 }
273 
274 llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList & Args,StringRef BoundArch,Action::OffloadKind DeviceOffloadKind) const275 HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
276                                StringRef BoundArch,
277                                Action::OffloadKind DeviceOffloadKind) const {
278   DerivedArgList *DAL =
279       HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
280   if (!DAL)
281     DAL = new DerivedArgList(Args.getBaseArgs());
282 
283   const OptTable &Opts = getDriver().getOpts();
284 
285   for (Arg *A : Args) {
286     if (!shouldSkipSanitizeOption(*this, Args, BoundArch, A))
287       DAL->append(A);
288   }
289 
290   if (!BoundArch.empty()) {
291     DAL->eraseArg(options::OPT_mcpu_EQ);
292     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch);
293     checkTargetID(*DAL);
294   }
295 
296   if (!Args.hasArg(options::OPT_flto_partitions_EQ))
297     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_flto_partitions_EQ),
298                       "8");
299 
300   return DAL;
301 }
302 
buildLinker() const303 Tool *HIPAMDToolChain::buildLinker() const {
304   assert(getTriple().isAMDGCN() ||
305          getTriple().getArch() == llvm::Triple::spirv64);
306   return new tools::AMDGCN::Linker(*this);
307 }
308 
addClangWarningOptions(ArgStringList & CC1Args) const309 void HIPAMDToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
310   AMDGPUToolChain::addClangWarningOptions(CC1Args);
311   HostTC.addClangWarningOptions(CC1Args);
312 }
313 
314 ToolChain::CXXStdlibType
GetCXXStdlibType(const ArgList & Args) const315 HIPAMDToolChain::GetCXXStdlibType(const ArgList &Args) const {
316   return HostTC.GetCXXStdlibType(Args);
317 }
318 
AddClangSystemIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const319 void HIPAMDToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
320                                                 ArgStringList &CC1Args) const {
321   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
322 }
323 
AddClangCXXStdlibIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const324 void HIPAMDToolChain::AddClangCXXStdlibIncludeArgs(
325     const ArgList &Args, ArgStringList &CC1Args) const {
326   HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
327 }
328 
AddIAMCUIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const329 void HIPAMDToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
330                                           ArgStringList &CC1Args) const {
331   HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
332 }
333 
AddHIPIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const334 void HIPAMDToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
335                                         ArgStringList &CC1Args) const {
336   RocmInstallation->AddHIPIncludeArgs(DriverArgs, CC1Args);
337 }
338 
getSupportedSanitizers() const339 SanitizerMask HIPAMDToolChain::getSupportedSanitizers() const {
340   // The HIPAMDToolChain only supports sanitizers in the sense that it allows
341   // sanitizer arguments on the command line if they are supported by the host
342   // toolchain. The HIPAMDToolChain will actually ignore any command line
343   // arguments for any of these "supported" sanitizers. That means that no
344   // sanitization of device code is actually supported at this time.
345   //
346   // This behavior is necessary because the host and device toolchains
347   // invocations often share the command line, so the device toolchain must
348   // tolerate flags meant only for the host toolchain.
349   return HostTC.getSupportedSanitizers();
350 }
351 
computeMSVCVersion(const Driver * D,const ArgList & Args) const352 VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
353                                                  const ArgList &Args) const {
354   return HostTC.computeMSVCVersion(D, Args);
355 }
356 
357 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
getDeviceLibs(const llvm::opt::ArgList & DriverArgs) const358 HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
359   llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
360   if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib,
361                           true) ||
362       getGPUArch(DriverArgs) == "amdgcnspirv")
363     return {};
364   ArgStringList LibraryPaths;
365 
366   // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
367   for (StringRef Path : RocmInstallation->getRocmDeviceLibPathArg())
368     LibraryPaths.push_back(DriverArgs.MakeArgString(Path));
369 
370   addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH");
371 
372   // Maintain compatability with --hip-device-lib.
373   auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ);
374   if (!BCLibArgs.empty()) {
375     for (StringRef BCName : BCLibArgs) {
376       StringRef FullName;
377       bool Found = false;
378       for (StringRef LibraryPath : LibraryPaths) {
379         SmallString<128> Path(LibraryPath);
380         llvm::sys::path::append(Path, BCName);
381         FullName = Path;
382         if (llvm::sys::fs::exists(FullName)) {
383           BCLibs.emplace_back(FullName);
384           Found = true;
385           break;
386         }
387       }
388       if (!Found)
389         getDriver().Diag(diag::err_drv_no_such_file) << BCName;
390     }
391   } else {
392     if (!RocmInstallation->hasDeviceLibrary()) {
393       getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
394       return {};
395     }
396     StringRef GpuArch = getGPUArch(DriverArgs);
397     assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
398 
399     // Add common device libraries like ocml etc.
400     for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str()))
401       BCLibs.emplace_back(N);
402 
403     // Add instrument lib.
404     auto InstLib =
405         DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ);
406     if (InstLib.empty())
407       return BCLibs;
408     if (llvm::sys::fs::exists(InstLib))
409       BCLibs.emplace_back(InstLib);
410     else
411       getDriver().Diag(diag::err_drv_no_such_file) << InstLib;
412   }
413 
414   return BCLibs;
415 }
416 
checkTargetID(const llvm::opt::ArgList & DriverArgs) const417 void HIPAMDToolChain::checkTargetID(
418     const llvm::opt::ArgList &DriverArgs) const {
419   auto PTID = getParsedTargetID(DriverArgs);
420   if (PTID.OptionalTargetID && !PTID.OptionalGPUArch &&
421       PTID.OptionalTargetID != "amdgcnspirv")
422     getDriver().Diag(clang::diag::err_drv_bad_target_id)
423         << *PTID.OptionalTargetID;
424 }
425 
SPIRVAMDToolChain(const Driver & D,const llvm::Triple & Triple,const ArgList & Args)426 SPIRVAMDToolChain::SPIRVAMDToolChain(const Driver &D,
427                                      const llvm::Triple &Triple,
428                                      const ArgList &Args)
429     : ROCMToolChain(D, Triple, Args) {
430   getProgramPaths().push_back(getDriver().Dir);
431 }
432 
buildLinker() const433 Tool *SPIRVAMDToolChain::buildLinker() const {
434   assert(getTriple().getArch() == llvm::Triple::spirv64);
435   return new tools::AMDGCN::Linker(*this);
436 }
437