1 //===--- HIPAMD.cpp - HIP Tool and ToolChain Implementations ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HIPAMD.h" 10 #include "AMDGPU.h" 11 #include "CommonArgs.h" 12 #include "HIPUtility.h" 13 #include "clang/Basic/Cuda.h" 14 #include "clang/Basic/TargetID.h" 15 #include "clang/Driver/Compilation.h" 16 #include "clang/Driver/Driver.h" 17 #include "clang/Driver/DriverDiagnostic.h" 18 #include "clang/Driver/InputInfo.h" 19 #include "clang/Driver/Options.h" 20 #include "clang/Driver/SanitizerArgs.h" 21 #include "llvm/Support/Alignment.h" 22 #include "llvm/Support/FileSystem.h" 23 #include "llvm/Support/Path.h" 24 #include "llvm/TargetParser/TargetParser.h" 25 26 using namespace clang::driver; 27 using namespace clang::driver::toolchains; 28 using namespace clang::driver::tools; 29 using namespace clang; 30 using namespace llvm::opt; 31 32 #if defined(_WIN32) || defined(_WIN64) 33 #define NULL_FILE "nul" 34 #else 35 #define NULL_FILE "/dev/null" 36 #endif 37 38 static bool shouldSkipSanitizeOption(const ToolChain &TC, 39 const llvm::opt::ArgList &DriverArgs, 40 StringRef TargetID, 41 const llvm::opt::Arg *A) { 42 // For actions without targetID, do nothing. 43 if (TargetID.empty()) 44 return false; 45 Option O = A->getOption(); 46 if (!O.matches(options::OPT_fsanitize_EQ)) 47 return false; 48 49 if (!DriverArgs.hasFlag(options::OPT_fgpu_sanitize, 50 options::OPT_fno_gpu_sanitize, true)) 51 return true; 52 53 auto &Diags = TC.getDriver().getDiags(); 54 55 // For simplicity, we only allow -fsanitize=address 56 SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false); 57 if (K != SanitizerKind::Address) 58 return true; 59 60 llvm::StringMap<bool> FeatureMap; 61 auto OptionalGpuArch = parseTargetID(TC.getTriple(), TargetID, &FeatureMap); 62 63 assert(OptionalGpuArch && "Invalid Target ID"); 64 (void)OptionalGpuArch; 65 auto Loc = FeatureMap.find("xnack"); 66 if (Loc == FeatureMap.end() || !Loc->second) { 67 Diags.Report( 68 clang::diag::warn_drv_unsupported_option_for_offload_arch_req_feature) 69 << A->getAsString(DriverArgs) << TargetID << "xnack+"; 70 return true; 71 } 72 return false; 73 } 74 75 void AMDGCN::Linker::constructLlvmLinkCommand(Compilation &C, 76 const JobAction &JA, 77 const InputInfoList &Inputs, 78 const InputInfo &Output, 79 const llvm::opt::ArgList &Args) const { 80 // Construct llvm-link command. 81 // The output from llvm-link is a bitcode file. 82 ArgStringList LlvmLinkArgs; 83 84 assert(!Inputs.empty() && "Must have at least one input."); 85 86 LlvmLinkArgs.append({"-o", Output.getFilename()}); 87 for (auto Input : Inputs) 88 LlvmLinkArgs.push_back(Input.getFilename()); 89 90 // Look for archive of bundled bitcode in arguments, and add temporary files 91 // for the extracted archive of bitcode to inputs. 92 auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); 93 AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LlvmLinkArgs, "amdgcn", 94 TargetID, 95 /*IsBitCodeSDL=*/true, 96 /*PostClangLink=*/false); 97 98 const char *LlvmLink = 99 Args.MakeArgString(getToolChain().GetProgramPath("llvm-link")); 100 C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), 101 LlvmLink, LlvmLinkArgs, Inputs, 102 Output)); 103 } 104 105 void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, 106 const InputInfoList &Inputs, 107 const InputInfo &Output, 108 const llvm::opt::ArgList &Args) const { 109 // Construct lld command. 110 // The output from ld.lld is an HSA code object file. 111 ArgStringList LldArgs{"-flavor", 112 "gnu", 113 "-m", 114 "elf64_amdgpu", 115 "--no-undefined", 116 "-shared", 117 "-plugin-opt=-amdgpu-internalize-symbols"}; 118 119 auto &TC = getToolChain(); 120 auto &D = TC.getDriver(); 121 assert(!Inputs.empty() && "Must have at least one input."); 122 bool IsThinLTO = D.getLTOMode(/*IsOffload=*/true) == LTOK_Thin; 123 addLTOOptions(TC, Args, LldArgs, Output, Inputs[0], IsThinLTO); 124 125 // Extract all the -m options 126 std::vector<llvm::StringRef> Features; 127 amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features); 128 129 // Add features to mattr such as cumode 130 std::string MAttrString = "-plugin-opt=-mattr="; 131 for (auto OneFeature : unifyTargetFeatures(Features)) { 132 MAttrString.append(Args.MakeArgString(OneFeature)); 133 if (OneFeature != Features.back()) 134 MAttrString.append(","); 135 } 136 if (!Features.empty()) 137 LldArgs.push_back(Args.MakeArgString(MAttrString)); 138 139 // ToDo: Remove this option after AMDGPU backend supports ISA-level linking. 140 // Since AMDGPU backend currently does not support ISA-level linking, all 141 // called functions need to be imported. 142 if (IsThinLTO) 143 LldArgs.push_back(Args.MakeArgString("-plugin-opt=-force-import-all")); 144 145 if (C.getDriver().isSaveTempsEnabled()) 146 LldArgs.push_back("-save-temps"); 147 148 addLinkerCompressDebugSectionsOption(TC, Args, LldArgs); 149 150 // Given that host and device linking happen in separate processes, the device 151 // linker doesn't always have the visibility as to which device symbols are 152 // needed by a program, especially for the device symbol dependencies that are 153 // introduced through the host symbol resolution. 154 // For example: host_A() (A.obj) --> host_B(B.obj) --> device_kernel_B() 155 // (B.obj) In this case, the device linker doesn't know that A.obj actually 156 // depends on the kernel functions in B.obj. When linking to static device 157 // library, the device linker may drop some of the device global symbols if 158 // they aren't referenced. As a workaround, we are adding to the 159 // --whole-archive flag such that all global symbols would be linked in. 160 LldArgs.push_back("--whole-archive"); 161 162 for (auto *Arg : Args.filtered(options::OPT_Xoffload_linker)) { 163 StringRef ArgVal = Arg->getValue(1); 164 auto SplitArg = ArgVal.split("-mllvm="); 165 if (!SplitArg.second.empty()) { 166 LldArgs.push_back( 167 Args.MakeArgString(Twine("-plugin-opt=") + SplitArg.second)); 168 } else { 169 LldArgs.push_back(Args.MakeArgString(ArgVal)); 170 } 171 Arg->claim(); 172 } 173 174 LldArgs.append({"-o", Output.getFilename()}); 175 for (auto Input : Inputs) 176 LldArgs.push_back(Input.getFilename()); 177 178 // Look for archive of bundled bitcode in arguments, and add temporary files 179 // for the extracted archive of bitcode to inputs. 180 auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); 181 AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LldArgs, "amdgcn", 182 TargetID, 183 /*IsBitCodeSDL=*/true, 184 /*PostClangLink=*/false); 185 186 LldArgs.push_back("--no-whole-archive"); 187 188 const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); 189 C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), 190 Lld, LldArgs, Inputs, Output)); 191 } 192 193 // For amdgcn the inputs of the linker job are device bitcode and output is 194 // either an object file or bitcode (-emit-llvm). It calls llvm-link, opt, 195 // llc, then lld steps. 196 void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, 197 const InputInfo &Output, 198 const InputInfoList &Inputs, 199 const ArgList &Args, 200 const char *LinkingOutput) const { 201 if (Inputs.size() > 0 && 202 Inputs[0].getType() == types::TY_Image && 203 JA.getType() == types::TY_Object) 204 return HIP::constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, 205 Args, JA, *this); 206 207 if (JA.getType() == types::TY_HIP_FATBIN) 208 return HIP::constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, 209 Args, *this); 210 211 if (JA.getType() == types::TY_LLVM_BC) 212 return constructLlvmLinkCommand(C, JA, Inputs, Output, Args); 213 214 return constructLldCommand(C, JA, Inputs, Output, Args); 215 } 216 217 HIPAMDToolChain::HIPAMDToolChain(const Driver &D, const llvm::Triple &Triple, 218 const ToolChain &HostTC, const ArgList &Args) 219 : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { 220 // Lookup binaries into the driver directory, this is used to 221 // discover the clang-offload-bundler executable. 222 getProgramPaths().push_back(getDriver().Dir); 223 224 // Diagnose unsupported sanitizer options only once. 225 if (!Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize, 226 true)) 227 return; 228 for (auto *A : Args.filtered(options::OPT_fsanitize_EQ)) { 229 SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false); 230 if (K != SanitizerKind::Address) 231 D.getDiags().Report(clang::diag::warn_drv_unsupported_option_for_target) 232 << A->getAsString(Args) << getTriple().str(); 233 } 234 } 235 236 void HIPAMDToolChain::addClangTargetOptions( 237 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, 238 Action::OffloadKind DeviceOffloadingKind) const { 239 HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); 240 241 assert(DeviceOffloadingKind == Action::OFK_HIP && 242 "Only HIP offloading kinds are supported for GPUs."); 243 244 CC1Args.push_back("-fcuda-is-device"); 245 246 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, 247 options::OPT_fno_cuda_approx_transcendentals, false)) 248 CC1Args.push_back("-fcuda-approx-transcendentals"); 249 250 if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, 251 false)) 252 CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"}); 253 254 StringRef MaxThreadsPerBlock = 255 DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ); 256 if (!MaxThreadsPerBlock.empty()) { 257 std::string ArgStr = 258 (Twine("--gpu-max-threads-per-block=") + MaxThreadsPerBlock).str(); 259 CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr)); 260 } 261 262 CC1Args.push_back("-fcuda-allow-variadic-functions"); 263 264 // Default to "hidden" visibility, as object level linking will not be 265 // supported for the foreseeable future. 266 if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, 267 options::OPT_fvisibility_ms_compat)) { 268 CC1Args.append({"-fvisibility=hidden"}); 269 CC1Args.push_back("-fapply-global-visibility-to-externs"); 270 } 271 272 for (auto BCFile : getDeviceLibs(DriverArgs)) { 273 CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" 274 : "-mlink-bitcode-file"); 275 CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); 276 } 277 } 278 279 llvm::opt::DerivedArgList * 280 HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, 281 StringRef BoundArch, 282 Action::OffloadKind DeviceOffloadKind) const { 283 DerivedArgList *DAL = 284 HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); 285 if (!DAL) 286 DAL = new DerivedArgList(Args.getBaseArgs()); 287 288 const OptTable &Opts = getDriver().getOpts(); 289 290 for (Arg *A : Args) { 291 if (!shouldSkipSanitizeOption(*this, Args, BoundArch, A)) 292 DAL->append(A); 293 } 294 295 if (!BoundArch.empty()) { 296 DAL->eraseArg(options::OPT_mcpu_EQ); 297 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch); 298 checkTargetID(*DAL); 299 } 300 301 return DAL; 302 } 303 304 Tool *HIPAMDToolChain::buildLinker() const { 305 assert(getTriple().getArch() == llvm::Triple::amdgcn); 306 return new tools::AMDGCN::Linker(*this); 307 } 308 309 void HIPAMDToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { 310 HostTC.addClangWarningOptions(CC1Args); 311 } 312 313 ToolChain::CXXStdlibType 314 HIPAMDToolChain::GetCXXStdlibType(const ArgList &Args) const { 315 return HostTC.GetCXXStdlibType(Args); 316 } 317 318 void HIPAMDToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, 319 ArgStringList &CC1Args) const { 320 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); 321 } 322 323 void HIPAMDToolChain::AddClangCXXStdlibIncludeArgs( 324 const ArgList &Args, ArgStringList &CC1Args) const { 325 HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args); 326 } 327 328 void HIPAMDToolChain::AddIAMCUIncludeArgs(const ArgList &Args, 329 ArgStringList &CC1Args) const { 330 HostTC.AddIAMCUIncludeArgs(Args, CC1Args); 331 } 332 333 void HIPAMDToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, 334 ArgStringList &CC1Args) const { 335 RocmInstallation->AddHIPIncludeArgs(DriverArgs, CC1Args); 336 } 337 338 SanitizerMask HIPAMDToolChain::getSupportedSanitizers() const { 339 // The HIPAMDToolChain only supports sanitizers in the sense that it allows 340 // sanitizer arguments on the command line if they are supported by the host 341 // toolchain. The HIPAMDToolChain will actually ignore any command line 342 // arguments for any of these "supported" sanitizers. That means that no 343 // sanitization of device code is actually supported at this time. 344 // 345 // This behavior is necessary because the host and device toolchains 346 // invocations often share the command line, so the device toolchain must 347 // tolerate flags meant only for the host toolchain. 348 return HostTC.getSupportedSanitizers(); 349 } 350 351 VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D, 352 const ArgList &Args) const { 353 return HostTC.computeMSVCVersion(D, Args); 354 } 355 356 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> 357 HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { 358 llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; 359 if (DriverArgs.hasArg(options::OPT_nogpulib)) 360 return {}; 361 ArgStringList LibraryPaths; 362 363 // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. 364 for (StringRef Path : RocmInstallation->getRocmDeviceLibPathArg()) 365 LibraryPaths.push_back(DriverArgs.MakeArgString(Path)); 366 367 addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH"); 368 369 // Maintain compatability with --hip-device-lib. 370 auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ); 371 if (!BCLibArgs.empty()) { 372 llvm::for_each(BCLibArgs, [&](StringRef BCName) { 373 StringRef FullName; 374 for (StringRef LibraryPath : LibraryPaths) { 375 SmallString<128> Path(LibraryPath); 376 llvm::sys::path::append(Path, BCName); 377 FullName = Path; 378 if (llvm::sys::fs::exists(FullName)) { 379 BCLibs.push_back(FullName); 380 return; 381 } 382 } 383 getDriver().Diag(diag::err_drv_no_such_file) << BCName; 384 }); 385 } else { 386 if (!RocmInstallation->hasDeviceLibrary()) { 387 getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; 388 return {}; 389 } 390 StringRef GpuArch = getGPUArch(DriverArgs); 391 assert(!GpuArch.empty() && "Must have an explicit GPU arch."); 392 393 // If --hip-device-lib is not set, add the default bitcode libraries. 394 if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize, 395 options::OPT_fno_gpu_sanitize, true) && 396 getSanitizerArgs(DriverArgs).needsAsanRt()) { 397 auto AsanRTL = RocmInstallation->getAsanRTLPath(); 398 if (AsanRTL.empty()) { 399 unsigned DiagID = getDriver().getDiags().getCustomDiagID( 400 DiagnosticsEngine::Error, 401 "AMDGPU address sanitizer runtime library (asanrtl) is not found. " 402 "Please install ROCm device library which supports address " 403 "sanitizer"); 404 getDriver().Diag(DiagID); 405 return {}; 406 } else 407 BCLibs.emplace_back(AsanRTL, /*ShouldInternalize=*/false); 408 } 409 410 // Add the HIP specific bitcode library. 411 BCLibs.push_back(RocmInstallation->getHIPPath()); 412 413 // Add common device libraries like ocml etc. 414 for (StringRef N : getCommonDeviceLibNames(DriverArgs, GpuArch.str())) 415 BCLibs.emplace_back(N); 416 417 // Add instrument lib. 418 auto InstLib = 419 DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ); 420 if (InstLib.empty()) 421 return BCLibs; 422 if (llvm::sys::fs::exists(InstLib)) 423 BCLibs.push_back(InstLib); 424 else 425 getDriver().Diag(diag::err_drv_no_such_file) << InstLib; 426 } 427 428 return BCLibs; 429 } 430 431 void HIPAMDToolChain::checkTargetID( 432 const llvm::opt::ArgList &DriverArgs) const { 433 auto PTID = getParsedTargetID(DriverArgs); 434 if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) { 435 getDriver().Diag(clang::diag::err_drv_bad_target_id) 436 << *PTID.OptionalTargetID; 437 } 438 } 439