1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/Diagnostic.h" 17 #include "clang/Basic/LangOptions.h" 18 #include "clang/Basic/MacroBuilder.h" 19 #include "clang/Basic/TargetBuiltins.h" 20 #include "llvm/ADT/SmallString.h" 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" 37 "32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 39 "-ni:7:8:9"; 40 41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 42 llvm::AMDGPUAS::FLAT_ADDRESS, // Default 43 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global 44 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local 45 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant 46 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private 47 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic 48 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device 49 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host 50 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device 51 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant 52 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared 53 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global 54 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device 55 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host 56 llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local 57 llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private 58 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr 59 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr 60 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 61 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared 62 }; 63 64 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 65 llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default 66 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global 67 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local 68 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant 69 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private 70 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic 71 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device 72 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host 73 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device 74 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant 75 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared 76 // SYCL address space values for this map are dummy 77 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global 78 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device 79 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host 80 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local 81 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private 82 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr 83 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr 84 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 85 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared 86 87 }; 88 } // namespace targets 89 } // namespace clang 90 91 static constexpr Builtin::Info BuiltinInfo[] = { 92 #define BUILTIN(ID, TYPE, ATTRS) \ 93 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 94 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 95 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 96 #include "clang/Basic/BuiltinsAMDGPU.def" 97 }; 98 99 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 100 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 101 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 102 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 103 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 104 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 105 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 106 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 107 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 108 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 109 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 110 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 111 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 112 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 113 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 114 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 115 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 116 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 117 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 118 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 119 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 120 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 121 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 122 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 123 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 124 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 125 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 126 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 127 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 128 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 129 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 130 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 131 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 132 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 133 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 134 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 135 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 136 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 137 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 138 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 139 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 140 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 141 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 142 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 143 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 144 "flat_scratch_lo", "flat_scratch_hi", 145 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 146 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 147 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 148 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 149 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 150 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 151 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 152 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 153 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 154 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 155 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 156 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 157 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 158 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 159 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 160 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 161 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 162 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 163 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 164 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 165 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 166 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 167 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 168 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 169 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 170 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 171 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 172 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 173 "a252", "a253", "a254", "a255" 174 }; 175 176 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 177 return llvm::ArrayRef(GCCRegNames); 178 } 179 180 bool AMDGPUTargetInfo::initFeatureMap( 181 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 182 const std::vector<std::string> &FeatureVec) const { 183 184 using namespace llvm::AMDGPU; 185 fillAMDGPUFeatureMap(CPU, getTriple(), Features); 186 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec)) 187 return false; 188 189 // TODO: Should move this logic into TargetParser 190 auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features); 191 switch (HasError.first) { 192 default: 193 break; 194 case llvm::AMDGPU::INVALID_FEATURE_COMBINATION: 195 Diags.Report(diag::err_invalid_feature_combination) << HasError.second; 196 return false; 197 case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE: 198 Diags.Report(diag::err_opt_not_valid_on_target) << HasError.second; 199 return false; 200 } 201 202 return true; 203 } 204 205 void AMDGPUTargetInfo::fillValidCPUList( 206 SmallVectorImpl<StringRef> &Values) const { 207 if (isAMDGCN(getTriple())) 208 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 209 else 210 llvm::AMDGPU::fillValidArchListR600(Values); 211 } 212 213 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 214 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 215 } 216 217 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 218 const TargetOptions &Opts) 219 : TargetInfo(Triple), 220 GPUKind(isAMDGCN(Triple) ? 221 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 222 llvm::AMDGPU::parseArchR600(Opts.CPU)), 223 GPUFeatures(isAMDGCN(Triple) ? 224 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 225 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 226 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 227 : DataLayoutStringR600); 228 229 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 230 !isAMDGCN(Triple)); 231 UseAddrSpaceMapMangling = true; 232 233 if (isAMDGCN(Triple)) { 234 // __bf16 is always available as a load/store only type on AMDGCN. 235 BFloat16Width = BFloat16Align = 16; 236 BFloat16Format = &llvm::APFloat::BFloat(); 237 } 238 239 HasLegalHalfType = true; 240 HasFloat16 = true; 241 WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64; 242 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 243 244 // Set pointer width and alignment for the generic address space. 245 PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default); 246 if (getMaxPointerWidth() == 64) { 247 LongWidth = LongAlign = 64; 248 SizeType = UnsignedLong; 249 PtrDiffType = SignedLong; 250 IntPtrType = SignedLong; 251 } 252 253 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 254 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); 255 for (auto F : {"image-insts", "gws"}) 256 ReadOnlyFeatures.insert(F); 257 HalfArgsAndReturns = true; 258 } 259 260 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { 261 TargetInfo::adjust(Diags, Opts); 262 // ToDo: There are still a few places using default address space as private 263 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 264 // can be removed from the following line. 265 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 266 !isAMDGCN(getTriple())); 267 } 268 269 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 270 return llvm::ArrayRef(BuiltinInfo, 271 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin); 272 } 273 274 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 275 MacroBuilder &Builder) const { 276 Builder.defineMacro("__AMD__"); 277 Builder.defineMacro("__AMDGPU__"); 278 279 if (isAMDGCN(getTriple())) 280 Builder.defineMacro("__AMDGCN__"); 281 else 282 Builder.defineMacro("__R600__"); 283 284 // Legacy HIP host code relies on these default attributes to be defined. 285 bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice; 286 if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost) 287 return; 288 289 llvm::SmallString<16> CanonName = 290 (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind) 291 : getArchNameR600(GPUKind)); 292 293 // Sanitize the name of generic targets. 294 // e.g. gfx10-1-generic -> gfx10_1_generic 295 if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST && 296 GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) { 297 std::replace(CanonName.begin(), CanonName.end(), '-', '_'); 298 } 299 300 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 301 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___ 302 if (isAMDGCN(getTriple()) && !IsHIPHost) { 303 assert(StringRef(CanonName).starts_with("gfx") && 304 "Invalid amdgcn canonical name"); 305 StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind); 306 Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) + 307 Twine("__")); 308 Builder.defineMacro("__amdgcn_processor__", 309 Twine("\"") + Twine(CanonName) + Twine("\"")); 310 Builder.defineMacro("__amdgcn_target_id__", 311 Twine("\"") + Twine(*getTargetID()) + Twine("\"")); 312 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 313 auto Loc = OffloadArchFeatures.find(F); 314 if (Loc != OffloadArchFeatures.end()) { 315 std::string NewF = F.str(); 316 std::replace(NewF.begin(), NewF.end(), '-', '_'); 317 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 318 Twine("__"), 319 Loc->second ? "1" : "0"); 320 } 321 } 322 } 323 324 if (AllowAMDGPUUnsafeFPAtomics) 325 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__"); 326 327 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 328 // removed in the near future. 329 if (hasFMAF()) 330 Builder.defineMacro("__HAS_FMAF__"); 331 if (hasFastFMAF()) 332 Builder.defineMacro("FP_FAST_FMAF"); 333 if (hasLDEXPF()) 334 Builder.defineMacro("__HAS_LDEXPF__"); 335 if (hasFP64()) 336 Builder.defineMacro("__HAS_FP64__"); 337 if (hasFastFMA()) 338 Builder.defineMacro("FP_FAST_FMA"); 339 340 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize)); 341 // ToDo: deprecate this macro for naming consistency. 342 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 343 Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode)); 344 } 345 346 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 347 assert(HalfFormat == Aux->HalfFormat); 348 assert(FloatFormat == Aux->FloatFormat); 349 assert(DoubleFormat == Aux->DoubleFormat); 350 351 // On x86_64 long double is 80-bit extended precision format, which is 352 // not supported by AMDGPU. 128-bit floating point format is also not 353 // supported by AMDGPU. Therefore keep its own format for these two types. 354 auto SaveLongDoubleFormat = LongDoubleFormat; 355 auto SaveFloat128Format = Float128Format; 356 auto SaveLongDoubleWidth = LongDoubleWidth; 357 auto SaveLongDoubleAlign = LongDoubleAlign; 358 copyAuxTarget(Aux); 359 LongDoubleFormat = SaveLongDoubleFormat; 360 Float128Format = SaveFloat128Format; 361 LongDoubleWidth = SaveLongDoubleWidth; 362 LongDoubleAlign = SaveLongDoubleAlign; 363 // For certain builtin types support on the host target, claim they are 364 // support to pass the compilation of the host code during the device-side 365 // compilation. 366 // FIXME: As the side effect, we also accept `__float128` uses in the device 367 // code. To rejct these builtin types supported in the host target but not in 368 // the device target, one approach would support `device_builtin` attribute 369 // so that we could tell the device builtin types from the host ones. The 370 // also solves the different representations of the same builtin type, such 371 // as `size_t` in the MSVC environment. 372 if (Aux->hasFloat128Type()) { 373 HasFloat128 = true; 374 Float128Format = DoubleFormat; 375 } 376 } 377