1 //===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUOpenMP.h" 10 #include "AMDGPU.h" 11 #include "CommonArgs.h" 12 #include "ToolChains/ROCm.h" 13 #include "clang/Basic/DiagnosticDriver.h" 14 #include "clang/Driver/Compilation.h" 15 #include "clang/Driver/Driver.h" 16 #include "clang/Driver/DriverDiagnostic.h" 17 #include "clang/Driver/InputInfo.h" 18 #include "clang/Driver/Options.h" 19 #include "clang/Driver/Tool.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/Support/FileSystem.h" 22 #include "llvm/Support/FormatAdapters.h" 23 #include "llvm/Support/FormatVariadic.h" 24 #include "llvm/Support/Path.h" 25 26 using namespace clang::driver; 27 using namespace clang::driver::toolchains; 28 using namespace clang::driver::tools; 29 using namespace clang; 30 using namespace llvm::opt; 31 32 namespace { 33 34 static const char *getOutputFileName(Compilation &C, StringRef Base, 35 const char *Postfix, 36 const char *Extension) { 37 const char *OutputFileName; 38 if (C.getDriver().isSaveTempsEnabled()) { 39 OutputFileName = 40 C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension); 41 } else { 42 std::string TmpName = 43 C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension); 44 OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName)); 45 } 46 return OutputFileName; 47 } 48 49 static void addLLCOptArg(const llvm::opt::ArgList &Args, 50 llvm::opt::ArgStringList &CmdArgs) { 51 if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { 52 StringRef OOpt = "0"; 53 if (A->getOption().matches(options::OPT_O4) || 54 A->getOption().matches(options::OPT_Ofast)) 55 OOpt = "3"; 56 else if (A->getOption().matches(options::OPT_O0)) 57 OOpt = "0"; 58 else if (A->getOption().matches(options::OPT_O)) { 59 // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3 60 // so we map -Os/-Oz to -O2. 61 // Only clang supports -Og, and maps it to -O1. 62 // We map anything else to -O2. 63 OOpt = llvm::StringSwitch<const char *>(A->getValue()) 64 .Case("1", "1") 65 .Case("2", "2") 66 .Case("3", "3") 67 .Case("s", "2") 68 .Case("z", "2") 69 .Case("g", "1") 70 .Default("0"); 71 } 72 CmdArgs.push_back(Args.MakeArgString("-O" + OOpt)); 73 } 74 } 75 76 static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC, 77 std::string &GPUArch) { 78 if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) { 79 std::string ErrMsg = 80 llvm::formatv("{0}", llvm::fmt_consume(std::move(Err))); 81 TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg; 82 return false; 83 } 84 85 return true; 86 } 87 } // namespace 88 89 const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( 90 const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C, 91 const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, 92 StringRef SubArchName, StringRef OutputFilePrefix) const { 93 ArgStringList CmdArgs; 94 95 for (const auto &II : Inputs) 96 if (II.isFilename()) 97 CmdArgs.push_back(II.getFilename()); 98 99 bool HasLibm = false; 100 if (Args.hasArg(options::OPT_l)) { 101 auto Lm = Args.getAllArgValues(options::OPT_l); 102 for (auto &Lib : Lm) { 103 if (Lib == "m") { 104 HasLibm = true; 105 break; 106 } 107 } 108 109 if (HasLibm) { 110 // This is not certain to work. The device libs added here, and passed to 111 // llvm-link, are missing attributes that they expect to be inserted when 112 // passed to mlink-builtin-bitcode. The amdgpu backend does not generate 113 // conservatively correct code when attributes are missing, so this may 114 // be the root cause of miscompilations. Passing via mlink-builtin-bitcode 115 // ultimately hits CodeGenModule::addDefaultFunctionDefinitionAttributes 116 // on each function, see D28538 for context. 117 // Potential workarounds: 118 // - unconditionally link all of the device libs to every translation 119 // unit in clang via mlink-builtin-bitcode 120 // - build a libm bitcode file as part of the DeviceRTL and explictly 121 // mlink-builtin-bitcode the rocm device libs components at build time 122 // - drop this llvm-link fork in favour or some calls into LLVM, chosen 123 // to do basically the same work as llvm-link but with that call first 124 // - write an opt pass that sets that on every function it sees and pipe 125 // the device-libs bitcode through that on the way to this llvm-link 126 SmallVector<std::string, 12> BCLibs = 127 AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str()); 128 llvm::for_each(BCLibs, [&](StringRef BCFile) { 129 CmdArgs.push_back(Args.MakeArgString(BCFile)); 130 }); 131 } 132 } 133 134 AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn", 135 SubArchName, /*isBitCodeSDL=*/true, 136 /*postClangLink=*/false); 137 // Add an intermediate output file. 138 CmdArgs.push_back("-o"); 139 const char *OutputFileName = 140 getOutputFileName(C, OutputFilePrefix, "-linked", "bc"); 141 CmdArgs.push_back(OutputFileName); 142 const char *Exec = 143 Args.MakeArgString(getToolChain().GetProgramPath("llvm-link")); 144 C.addCommand(std::make_unique<Command>( 145 JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, 146 InputInfo(&JA, Args.MakeArgString(OutputFileName)))); 147 148 // If we linked in libm definitions late we run another round of optimizations 149 // to inline the definitions and fold what is foldable. 150 if (HasLibm) { 151 ArgStringList OptCmdArgs; 152 const char *OptOutputFileName = 153 getOutputFileName(C, OutputFilePrefix, "-linked-opt", "bc"); 154 addLLCOptArg(Args, OptCmdArgs); 155 OptCmdArgs.push_back(OutputFileName); 156 OptCmdArgs.push_back("-o"); 157 OptCmdArgs.push_back(OptOutputFileName); 158 const char *OptExec = 159 Args.MakeArgString(getToolChain().GetProgramPath("opt")); 160 C.addCommand(std::make_unique<Command>( 161 JA, *this, ResponseFileSupport::AtFileCurCP(), OptExec, OptCmdArgs, 162 InputInfo(&JA, Args.MakeArgString(OutputFileName)), 163 InputInfo(&JA, Args.MakeArgString(OptOutputFileName)))); 164 OutputFileName = OptOutputFileName; 165 } 166 167 return OutputFileName; 168 } 169 170 const char *AMDGCN::OpenMPLinker::constructLlcCommand( 171 Compilation &C, const JobAction &JA, const InputInfoList &Inputs, 172 const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, 173 llvm::StringRef OutputFilePrefix, const char *InputFileName, 174 bool OutputIsAsm) const { 175 // Construct llc command. 176 ArgStringList LlcArgs; 177 // The input to llc is the output from opt. 178 LlcArgs.push_back(InputFileName); 179 // Pass optimization arg to llc. 180 addLLCOptArg(Args, LlcArgs); 181 LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa"); 182 LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName)); 183 LlcArgs.push_back( 184 Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj"))); 185 186 for (const Arg *A : Args.filtered(options::OPT_mllvm)) { 187 LlcArgs.push_back(A->getValue(0)); 188 } 189 190 // Add output filename 191 LlcArgs.push_back("-o"); 192 const char *LlcOutputFile = 193 getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o"); 194 LlcArgs.push_back(LlcOutputFile); 195 const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc")); 196 C.addCommand(std::make_unique<Command>( 197 JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs, 198 InputInfo(&JA, Args.MakeArgString(LlcOutputFile)))); 199 return LlcOutputFile; 200 } 201 202 void AMDGCN::OpenMPLinker::constructLldCommand( 203 Compilation &C, const JobAction &JA, const InputInfoList &Inputs, 204 const InputInfo &Output, const llvm::opt::ArgList &Args, 205 const char *InputFileName) const { 206 // Construct lld command. 207 // The output from ld.lld is an HSA code object file. 208 ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", 209 "-shared", "-o", Output.getFilename(), 210 InputFileName}; 211 212 const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); 213 C.addCommand(std::make_unique<Command>( 214 JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs, 215 InputInfo(&JA, Args.MakeArgString(Output.getFilename())))); 216 } 217 218 // For amdgcn the inputs of the linker job are device bitcode and output is 219 // object file. It calls llvm-link, opt, llc, then lld steps. 220 void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, 221 const InputInfo &Output, 222 const InputInfoList &Inputs, 223 const ArgList &Args, 224 const char *LinkingOutput) const { 225 const ToolChain &TC = getToolChain(); 226 assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target"); 227 228 const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC = 229 static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC); 230 231 std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str(); 232 if (GPUArch.empty()) { 233 if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch)) 234 return; 235 } 236 237 // Prefix for temporary file name. 238 std::string Prefix; 239 for (const auto &II : Inputs) 240 if (II.isFilename()) 241 Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch; 242 assert(Prefix.length() && "no linker inputs are files "); 243 244 // Each command outputs different files. 245 const char *LLVMLinkCommand = constructLLVMLinkCommand( 246 AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix); 247 248 // Produce readable assembly if save-temps is enabled. 249 if (C.getDriver().isSaveTempsEnabled()) 250 constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand, 251 /*OutputIsAsm=*/true); 252 const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch, 253 Prefix, LLVMLinkCommand); 254 constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand); 255 } 256 257 AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D, 258 const llvm::Triple &Triple, 259 const ToolChain &HostTC, 260 const ArgList &Args) 261 : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { 262 // Lookup binaries into the driver directory, this is used to 263 // discover the clang-offload-bundler executable. 264 getProgramPaths().push_back(getDriver().Dir); 265 } 266 267 void AMDGPUOpenMPToolChain::addClangTargetOptions( 268 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, 269 Action::OffloadKind DeviceOffloadingKind) const { 270 HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); 271 272 std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str(); 273 if (GPUArch.empty()) { 274 if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch)) 275 return; 276 } 277 278 assert(DeviceOffloadingKind == Action::OFK_OpenMP && 279 "Only OpenMP offloading kinds are supported."); 280 281 CC1Args.push_back("-target-cpu"); 282 CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch)); 283 CC1Args.push_back("-fcuda-is-device"); 284 285 if (DriverArgs.hasArg(options::OPT_nogpulib)) 286 return; 287 288 // Link the bitcode library late if we're using device LTO. 289 if (getDriver().isUsingLTO(/* IsOffload */ true)) 290 return; 291 292 std::string BitcodeSuffix; 293 if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, 294 options::OPT_fno_openmp_target_new_runtime, true)) 295 BitcodeSuffix = "new-amdgpu-" + GPUArch; 296 else 297 BitcodeSuffix = "amdgcn-" + GPUArch; 298 299 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix, 300 getTriple()); 301 } 302 303 llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs( 304 const llvm::opt::DerivedArgList &Args, StringRef BoundArch, 305 Action::OffloadKind DeviceOffloadKind) const { 306 DerivedArgList *DAL = 307 HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); 308 if (!DAL) 309 DAL = new DerivedArgList(Args.getBaseArgs()); 310 311 const OptTable &Opts = getDriver().getOpts(); 312 313 if (DeviceOffloadKind == Action::OFK_OpenMP) { 314 for (Arg *A : Args) 315 if (!llvm::is_contained(*DAL, A)) 316 DAL->append(A); 317 318 std::string Arch = DAL->getLastArgValue(options::OPT_march_EQ).str(); 319 if (Arch.empty()) { 320 checkSystemForAMDGPU(Args, *this, Arch); 321 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch); 322 } 323 324 return DAL; 325 } 326 327 for (Arg *A : Args) { 328 DAL->append(A); 329 } 330 331 if (!BoundArch.empty()) { 332 DAL->eraseArg(options::OPT_march_EQ); 333 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), 334 BoundArch); 335 } 336 337 return DAL; 338 } 339 340 Tool *AMDGPUOpenMPToolChain::buildLinker() const { 341 assert(getTriple().isAMDGCN()); 342 return new tools::AMDGCN::OpenMPLinker(*this); 343 } 344 345 void AMDGPUOpenMPToolChain::addClangWarningOptions( 346 ArgStringList &CC1Args) const { 347 HostTC.addClangWarningOptions(CC1Args); 348 } 349 350 ToolChain::CXXStdlibType 351 AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const { 352 return HostTC.GetCXXStdlibType(Args); 353 } 354 355 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs( 356 const ArgList &DriverArgs, ArgStringList &CC1Args) const { 357 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); 358 } 359 360 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, 361 ArgStringList &CC1Args) const { 362 HostTC.AddIAMCUIncludeArgs(Args, CC1Args); 363 } 364 365 SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const { 366 // The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it 367 // allows sanitizer arguments on the command line if they are supported by the 368 // host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command 369 // line arguments for any of these "supported" sanitizers. That means that no 370 // sanitization of device code is actually supported at this time. 371 // 372 // This behavior is necessary because the host and device toolchains 373 // invocations often share the command line, so the device toolchain must 374 // tolerate flags meant only for the host toolchain. 375 return HostTC.getSupportedSanitizers(); 376 } 377 378 VersionTuple 379 AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D, 380 const ArgList &Args) const { 381 return HostTC.computeMSVCVersion(D, Args); 382 } 383