1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/Diagnostic.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/SmallString.h" 20 using namespace clang; 21 using namespace clang::targets; 22 23 namespace clang { 24 namespace targets { 25 26 // If you edit the description strings, make sure you update 27 // getPointerWidthV(). 28 29 static const char *const DataLayoutStringR600 = 30 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 31 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 32 33 static const char *const DataLayoutStringAMDGCN = 34 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 35 "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" 36 "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" 37 "v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; 38 39 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 40 llvm::AMDGPUAS::FLAT_ADDRESS, // Default 41 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global 42 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local 43 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant 44 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private 45 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic 46 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device 47 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host 48 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device 49 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant 50 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared 51 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global 52 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device 53 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host 54 llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local 55 llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private 56 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr 57 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr 58 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 59 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared 60 llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant 61 // FIXME(pr/122103): hlsl_private -> PRIVATE is wrong, but at least this 62 // will break loudly. 63 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private 64 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device 65 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input 66 }; 67 68 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 69 llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default 70 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global 71 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local 72 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant 73 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private 74 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic 75 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device 76 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host 77 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device 78 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant 79 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared 80 // SYCL address space values for this map are dummy 81 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global 82 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device 83 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host 84 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local 85 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private 86 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr 87 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr 88 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 89 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared 90 llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant 91 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private 92 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device 93 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input 94 }; 95 } // namespace targets 96 } // namespace clang 97 98 static constexpr int NumBuiltins = 99 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin; 100 101 static constexpr llvm::StringTable BuiltinStrings = 102 CLANG_BUILTIN_STR_TABLE_START 103 #define BUILTIN CLANG_BUILTIN_STR_TABLE 104 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE 105 #include "clang/Basic/BuiltinsAMDGPU.def" 106 ; 107 108 static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({ 109 #define BUILTIN CLANG_BUILTIN_ENTRY 110 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY 111 #include "clang/Basic/BuiltinsAMDGPU.def" 112 }); 113 114 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 115 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 116 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 117 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 118 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 119 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 120 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 121 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 122 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 123 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 124 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 125 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 126 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 127 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 128 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 129 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 130 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 131 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 132 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 133 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 134 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 135 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 136 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 137 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 138 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 139 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 140 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 141 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 142 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 143 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 144 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 145 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 146 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 147 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 148 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 149 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 150 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 151 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 152 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 153 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 154 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 155 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 156 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 157 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 158 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 159 "flat_scratch_lo", "flat_scratch_hi", 160 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 161 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 162 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 163 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 164 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 165 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 166 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 167 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 168 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 169 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 170 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 171 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 172 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 173 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 174 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 175 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 176 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 177 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 178 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 179 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 180 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 181 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 182 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 183 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 184 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 185 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 186 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 187 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 188 "a252", "a253", "a254", "a255" 189 }; 190 191 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 192 return llvm::ArrayRef(GCCRegNames); 193 } 194 195 bool AMDGPUTargetInfo::initFeatureMap( 196 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 197 const std::vector<std::string> &FeatureVec) const { 198 199 using namespace llvm::AMDGPU; 200 fillAMDGPUFeatureMap(CPU, getTriple(), Features); 201 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec)) 202 return false; 203 204 // TODO: Should move this logic into TargetParser 205 auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features); 206 switch (HasError.first) { 207 default: 208 break; 209 case llvm::AMDGPU::INVALID_FEATURE_COMBINATION: 210 Diags.Report(diag::err_invalid_feature_combination) << HasError.second; 211 return false; 212 case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE: 213 Diags.Report(diag::err_opt_not_valid_on_target) << HasError.second; 214 return false; 215 } 216 217 return true; 218 } 219 220 void AMDGPUTargetInfo::fillValidCPUList( 221 SmallVectorImpl<StringRef> &Values) const { 222 if (isAMDGCN(getTriple())) 223 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 224 else 225 llvm::AMDGPU::fillValidArchListR600(Values); 226 } 227 228 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 229 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 230 } 231 232 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 233 const TargetOptions &Opts) 234 : TargetInfo(Triple), 235 GPUKind(isAMDGCN(Triple) ? 236 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 237 llvm::AMDGPU::parseArchR600(Opts.CPU)), 238 GPUFeatures(isAMDGCN(Triple) ? 239 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 240 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 241 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 242 : DataLayoutStringR600); 243 244 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 245 !isAMDGCN(Triple)); 246 UseAddrSpaceMapMangling = true; 247 248 if (isAMDGCN(Triple)) { 249 // __bf16 is always available as a load/store only type on AMDGCN. 250 BFloat16Width = BFloat16Align = 16; 251 BFloat16Format = &llvm::APFloat::BFloat(); 252 } 253 254 HasLegalHalfType = true; 255 HasFloat16 = true; 256 WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64; 257 258 // Set pointer width and alignment for the generic address space. 259 PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default); 260 if (getMaxPointerWidth() == 64) { 261 LongWidth = LongAlign = 64; 262 SizeType = UnsignedLong; 263 PtrDiffType = SignedLong; 264 IntPtrType = SignedLong; 265 } 266 267 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 268 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); 269 for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"}) 270 ReadOnlyFeatures.insert(F); 271 HalfArgsAndReturns = true; 272 } 273 274 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts, 275 const TargetInfo *Aux) { 276 TargetInfo::adjust(Diags, Opts, Aux); 277 // ToDo: There are still a few places using default address space as private 278 // address space in OpenCL, which needs to be cleaned up, then the references 279 // to OpenCL can be removed from the following line. 280 setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) || 281 !isAMDGCN(getTriple())); 282 283 AtomicOpts = AtomicOptions(Opts); 284 } 285 286 llvm::SmallVector<Builtin::InfosShard> 287 AMDGPUTargetInfo::getTargetBuiltins() const { 288 return {{&BuiltinStrings, BuiltinInfos}}; 289 } 290 291 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 292 MacroBuilder &Builder) const { 293 Builder.defineMacro("__AMD__"); 294 Builder.defineMacro("__AMDGPU__"); 295 296 if (isAMDGCN(getTriple())) 297 Builder.defineMacro("__AMDGCN__"); 298 else 299 Builder.defineMacro("__R600__"); 300 301 // Legacy HIP host code relies on these default attributes to be defined. 302 bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice; 303 if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost) 304 return; 305 306 llvm::SmallString<16> CanonName = 307 (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind) 308 : getArchNameR600(GPUKind)); 309 310 // Sanitize the name of generic targets. 311 // e.g. gfx10-1-generic -> gfx10_1_generic 312 if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST && 313 GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) { 314 llvm::replace(CanonName, '-', '_'); 315 } 316 317 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 318 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___ 319 if (isAMDGCN(getTriple()) && !IsHIPHost) { 320 assert(StringRef(CanonName).starts_with("gfx") && 321 "Invalid amdgcn canonical name"); 322 StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind); 323 Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) + 324 Twine("__")); 325 Builder.defineMacro("__amdgcn_processor__", 326 Twine("\"") + Twine(CanonName) + Twine("\"")); 327 Builder.defineMacro("__amdgcn_target_id__", 328 Twine("\"") + Twine(*getTargetID()) + Twine("\"")); 329 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 330 auto Loc = OffloadArchFeatures.find(F); 331 if (Loc != OffloadArchFeatures.end()) { 332 std::string NewF = F.str(); 333 llvm::replace(NewF, '-', '_'); 334 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 335 Twine("__"), 336 Loc->second ? "1" : "0"); 337 } 338 } 339 } 340 341 if (Opts.AtomicIgnoreDenormalMode) 342 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__"); 343 344 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 345 // removed in the near future. 346 if (hasFMAF()) 347 Builder.defineMacro("__HAS_FMAF__"); 348 if (hasFastFMAF()) 349 Builder.defineMacro("FP_FAST_FMAF"); 350 if (hasLDEXPF()) 351 Builder.defineMacro("__HAS_LDEXPF__"); 352 if (hasFP64()) 353 Builder.defineMacro("__HAS_FP64__"); 354 if (hasFastFMA()) 355 Builder.defineMacro("FP_FAST_FMA"); 356 357 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize), 358 "compile-time-constant access to the wavefront size will " 359 "be removed in a future release"); 360 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize), 361 "compile-time-constant access to the wavefront size will " 362 "be removed in a future release"); 363 Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode)); 364 } 365 366 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 367 assert(HalfFormat == Aux->HalfFormat); 368 assert(FloatFormat == Aux->FloatFormat); 369 assert(DoubleFormat == Aux->DoubleFormat); 370 371 // On x86_64 long double is 80-bit extended precision format, which is 372 // not supported by AMDGPU. 128-bit floating point format is also not 373 // supported by AMDGPU. Therefore keep its own format for these two types. 374 auto SaveLongDoubleFormat = LongDoubleFormat; 375 auto SaveFloat128Format = Float128Format; 376 auto SaveLongDoubleWidth = LongDoubleWidth; 377 auto SaveLongDoubleAlign = LongDoubleAlign; 378 copyAuxTarget(Aux); 379 LongDoubleFormat = SaveLongDoubleFormat; 380 Float128Format = SaveFloat128Format; 381 LongDoubleWidth = SaveLongDoubleWidth; 382 LongDoubleAlign = SaveLongDoubleAlign; 383 // For certain builtin types support on the host target, claim they are 384 // support to pass the compilation of the host code during the device-side 385 // compilation. 386 // FIXME: As the side effect, we also accept `__float128` uses in the device 387 // code. To rejct these builtin types supported in the host target but not in 388 // the device target, one approach would support `device_builtin` attribute 389 // so that we could tell the device builtin types from the host ones. The 390 // also solves the different representations of the same builtin type, such 391 // as `size_t` in the MSVC environment. 392 if (Aux->hasFloat128Type()) { 393 HasFloat128 = true; 394 Float128Format = DoubleFormat; 395 } 396 } 397