xref: /freebsd/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp (revision 3a9a9c0ca44ec535dcf73fe8462bee458e54814b)
1 //===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUOpenMP.h"
10 #include "AMDGPU.h"
11 #include "CommonArgs.h"
12 #include "ToolChains/ROCm.h"
13 #include "clang/Basic/DiagnosticDriver.h"
14 #include "clang/Driver/Compilation.h"
15 #include "clang/Driver/Driver.h"
16 #include "clang/Driver/DriverDiagnostic.h"
17 #include "clang/Driver/InputInfo.h"
18 #include "clang/Driver/Options.h"
19 #include "clang/Driver/Tool.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/Support/FileSystem.h"
22 #include "llvm/Support/FormatAdapters.h"
23 #include "llvm/Support/FormatVariadic.h"
24 #include "llvm/Support/Path.h"
25 
26 using namespace clang::driver;
27 using namespace clang::driver::toolchains;
28 using namespace clang::driver::tools;
29 using namespace clang;
30 using namespace llvm::opt;
31 
32 namespace {
33 
34 static const char *getOutputFileName(Compilation &C, StringRef Base,
35                                      const char *Postfix,
36                                      const char *Extension) {
37   const char *OutputFileName;
38   if (C.getDriver().isSaveTempsEnabled()) {
39     OutputFileName =
40         C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension);
41   } else {
42     std::string TmpName =
43         C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension);
44     OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName));
45   }
46   return OutputFileName;
47 }
48 
49 static void addLLCOptArg(const llvm::opt::ArgList &Args,
50                          llvm::opt::ArgStringList &CmdArgs) {
51   if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
52     StringRef OOpt = "0";
53     if (A->getOption().matches(options::OPT_O4) ||
54         A->getOption().matches(options::OPT_Ofast))
55       OOpt = "3";
56     else if (A->getOption().matches(options::OPT_O0))
57       OOpt = "0";
58     else if (A->getOption().matches(options::OPT_O)) {
59       // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3
60       // so we map -Os/-Oz to -O2.
61       // Only clang supports -Og, and maps it to -O1.
62       // We map anything else to -O2.
63       OOpt = llvm::StringSwitch<const char *>(A->getValue())
64                  .Case("1", "1")
65                  .Case("2", "2")
66                  .Case("3", "3")
67                  .Case("s", "2")
68                  .Case("z", "2")
69                  .Case("g", "1")
70                  .Default("0");
71     }
72     CmdArgs.push_back(Args.MakeArgString("-O" + OOpt));
73   }
74 }
75 
76 static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
77                                  std::string &GPUArch) {
78   if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) {
79     std::string ErrMsg =
80         llvm::formatv("{0}", llvm::fmt_consume(std::move(Err)));
81     TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg;
82     return false;
83   }
84 
85   return true;
86 }
87 } // namespace
88 
89 const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
90     const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
91     const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args,
92     StringRef SubArchName, StringRef OutputFilePrefix) const {
93   ArgStringList CmdArgs;
94 
95   for (const auto &II : Inputs)
96     if (II.isFilename())
97       CmdArgs.push_back(II.getFilename());
98 
99   bool HasLibm = false;
100   if (Args.hasArg(options::OPT_l)) {
101     auto Lm = Args.getAllArgValues(options::OPT_l);
102     for (auto &Lib : Lm) {
103       if (Lib == "m") {
104         HasLibm = true;
105         break;
106       }
107     }
108 
109     if (HasLibm) {
110       // This is not certain to work. The device libs added here, and passed to
111       // llvm-link, are missing attributes that they expect to be inserted when
112       // passed to mlink-builtin-bitcode. The amdgpu backend does not generate
113       // conservatively correct code when attributes are missing, so this may
114       // be the root cause of miscompilations. Passing via mlink-builtin-bitcode
115       // ultimately hits CodeGenModule::addDefaultFunctionDefinitionAttributes
116       // on each function, see D28538 for context.
117       // Potential workarounds:
118       //  - unconditionally link all of the device libs to every translation
119       //    unit in clang via mlink-builtin-bitcode
120       //  - build a libm bitcode file as part of the DeviceRTL and explictly
121       //    mlink-builtin-bitcode the rocm device libs components at build time
122       //  - drop this llvm-link fork in favour or some calls into LLVM, chosen
123       //    to do basically the same work as llvm-link but with that call first
124       //  - write an opt pass that sets that on every function it sees and pipe
125       //    the device-libs bitcode through that on the way to this llvm-link
126       SmallVector<std::string, 12> BCLibs =
127           AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str());
128       llvm::for_each(BCLibs, [&](StringRef BCFile) {
129         CmdArgs.push_back(Args.MakeArgString(BCFile));
130       });
131     }
132   }
133 
134   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
135                              SubArchName, /*isBitCodeSDL=*/true,
136                              /*postClangLink=*/false);
137   // Add an intermediate output file.
138   CmdArgs.push_back("-o");
139   const char *OutputFileName =
140       getOutputFileName(C, OutputFilePrefix, "-linked", "bc");
141   CmdArgs.push_back(OutputFileName);
142   const char *Exec =
143       Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
144   C.addCommand(std::make_unique<Command>(
145       JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs,
146       InputInfo(&JA, Args.MakeArgString(OutputFileName))));
147 
148   // If we linked in libm definitions late we run another round of optimizations
149   // to inline the definitions and fold what is foldable.
150   if (HasLibm) {
151     ArgStringList OptCmdArgs;
152     const char *OptOutputFileName =
153         getOutputFileName(C, OutputFilePrefix, "-linked-opt", "bc");
154     addLLCOptArg(Args, OptCmdArgs);
155     OptCmdArgs.push_back(OutputFileName);
156     OptCmdArgs.push_back("-o");
157     OptCmdArgs.push_back(OptOutputFileName);
158     const char *OptExec =
159         Args.MakeArgString(getToolChain().GetProgramPath("opt"));
160     C.addCommand(std::make_unique<Command>(
161         JA, *this, ResponseFileSupport::AtFileCurCP(), OptExec, OptCmdArgs,
162         InputInfo(&JA, Args.MakeArgString(OutputFileName)),
163         InputInfo(&JA, Args.MakeArgString(OptOutputFileName))));
164     OutputFileName = OptOutputFileName;
165   }
166 
167   return OutputFileName;
168 }
169 
170 const char *AMDGCN::OpenMPLinker::constructLlcCommand(
171     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
172     const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
173     llvm::StringRef OutputFilePrefix, const char *InputFileName,
174     bool OutputIsAsm) const {
175   // Construct llc command.
176   ArgStringList LlcArgs;
177   // The input to llc is the output from opt.
178   LlcArgs.push_back(InputFileName);
179   // Pass optimization arg to llc.
180   addLLCOptArg(Args, LlcArgs);
181   LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
182   LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
183   LlcArgs.push_back(
184       Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")));
185 
186   for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
187     LlcArgs.push_back(A->getValue(0));
188   }
189 
190   // Add output filename
191   LlcArgs.push_back("-o");
192   const char *LlcOutputFile =
193       getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o");
194   LlcArgs.push_back(LlcOutputFile);
195   const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc"));
196   C.addCommand(std::make_unique<Command>(
197       JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs,
198       InputInfo(&JA, Args.MakeArgString(LlcOutputFile))));
199   return LlcOutputFile;
200 }
201 
202 void AMDGCN::OpenMPLinker::constructLldCommand(
203     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
204     const InputInfo &Output, const llvm::opt::ArgList &Args,
205     const char *InputFileName) const {
206   // Construct lld command.
207   // The output from ld.lld is an HSA code object file.
208   ArgStringList LldArgs{"-flavor",    "gnu", "--no-undefined",
209                         "-shared",    "-o",  Output.getFilename(),
210                         InputFileName};
211 
212   const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
213   C.addCommand(std::make_unique<Command>(
214       JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs,
215       InputInfo(&JA, Args.MakeArgString(Output.getFilename()))));
216 }
217 
218 // For amdgcn the inputs of the linker job are device bitcode and output is
219 // object file. It calls llvm-link, opt, llc, then lld steps.
220 void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
221                                         const InputInfo &Output,
222                                         const InputInfoList &Inputs,
223                                         const ArgList &Args,
224                                         const char *LinkingOutput) const {
225   const ToolChain &TC = getToolChain();
226   assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target");
227 
228   const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC =
229       static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC);
230 
231   std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str();
232   if (GPUArch.empty()) {
233     if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch))
234       return;
235   }
236 
237   // Prefix for temporary file name.
238   std::string Prefix;
239   for (const auto &II : Inputs)
240     if (II.isFilename())
241       Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch;
242   assert(Prefix.length() && "no linker inputs are files ");
243 
244   // Each command outputs different files.
245   const char *LLVMLinkCommand = constructLLVMLinkCommand(
246       AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix);
247 
248   // Produce readable assembly if save-temps is enabled.
249   if (C.getDriver().isSaveTempsEnabled())
250     constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand,
251                         /*OutputIsAsm=*/true);
252   const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch,
253                                                Prefix, LLVMLinkCommand);
254   constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
255 }
256 
257 AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D,
258                                              const llvm::Triple &Triple,
259                                              const ToolChain &HostTC,
260                                              const ArgList &Args)
261     : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
262   // Lookup binaries into the driver directory, this is used to
263   // discover the clang-offload-bundler executable.
264   getProgramPaths().push_back(getDriver().Dir);
265 }
266 
267 void AMDGPUOpenMPToolChain::addClangTargetOptions(
268     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
269     Action::OffloadKind DeviceOffloadingKind) const {
270   HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
271 
272   std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str();
273   if (GPUArch.empty()) {
274     if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch))
275       return;
276   }
277 
278   assert(DeviceOffloadingKind == Action::OFK_OpenMP &&
279          "Only OpenMP offloading kinds are supported.");
280 
281   CC1Args.push_back("-target-cpu");
282   CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch));
283   CC1Args.push_back("-fcuda-is-device");
284 
285   if (DriverArgs.hasArg(options::OPT_nogpulib))
286     return;
287 
288   // Link the bitcode library late if we're using device LTO.
289   if (getDriver().isUsingLTO(/* IsOffload */ true))
290     return;
291 
292   std::string BitcodeSuffix;
293   if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
294                          options::OPT_fno_openmp_target_new_runtime, true))
295     BitcodeSuffix = "new-amdgpu-" + GPUArch;
296   else
297     BitcodeSuffix = "amdgcn-" + GPUArch;
298 
299   addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
300                      getTriple());
301 }
302 
303 llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
304     const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
305     Action::OffloadKind DeviceOffloadKind) const {
306   DerivedArgList *DAL =
307       HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
308   if (!DAL)
309     DAL = new DerivedArgList(Args.getBaseArgs());
310 
311   const OptTable &Opts = getDriver().getOpts();
312 
313   if (DeviceOffloadKind == Action::OFK_OpenMP) {
314     for (Arg *A : Args)
315       if (!llvm::is_contained(*DAL, A))
316         DAL->append(A);
317 
318     std::string Arch = DAL->getLastArgValue(options::OPT_march_EQ).str();
319     if (Arch.empty()) {
320       checkSystemForAMDGPU(Args, *this, Arch);
321       DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
322     }
323 
324     return DAL;
325   }
326 
327   for (Arg *A : Args) {
328     DAL->append(A);
329   }
330 
331   if (!BoundArch.empty()) {
332     DAL->eraseArg(options::OPT_march_EQ);
333     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
334                       BoundArch);
335   }
336 
337   return DAL;
338 }
339 
340 Tool *AMDGPUOpenMPToolChain::buildLinker() const {
341   assert(getTriple().isAMDGCN());
342   return new tools::AMDGCN::OpenMPLinker(*this);
343 }
344 
345 void AMDGPUOpenMPToolChain::addClangWarningOptions(
346     ArgStringList &CC1Args) const {
347   HostTC.addClangWarningOptions(CC1Args);
348 }
349 
350 ToolChain::CXXStdlibType
351 AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
352   return HostTC.GetCXXStdlibType(Args);
353 }
354 
355 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
356     const ArgList &DriverArgs, ArgStringList &CC1Args) const {
357   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
358 }
359 
360 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
361                                                 ArgStringList &CC1Args) const {
362   HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
363 }
364 
365 SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const {
366   // The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it
367   // allows sanitizer arguments on the command line if they are supported by the
368   // host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command
369   // line arguments for any of these "supported" sanitizers. That means that no
370   // sanitization of device code is actually supported at this time.
371   //
372   // This behavior is necessary because the host and device toolchains
373   // invocations often share the command line, so the device toolchain must
374   // tolerate flags meant only for the host toolchain.
375   return HostTC.getSupportedSanitizers();
376 }
377 
378 VersionTuple
379 AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
380                                           const ArgList &Args) const {
381   return HostTC.computeMSVCVersion(D, Args);
382 }
383