1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/Diagnostic.h" 17 #include "clang/Basic/LangOptions.h" 18 #include "clang/Basic/MacroBuilder.h" 19 #include "clang/Basic/TargetBuiltins.h" 20 using namespace clang; 21 using namespace clang::targets; 22 23 namespace clang { 24 namespace targets { 25 26 // If you edit the description strings, make sure you update 27 // getPointerWidthV(). 28 29 static const char *const DataLayoutStringR600 = 30 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 31 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 32 33 static const char *const DataLayoutStringAMDGCN = 34 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 35 "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" 36 "32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 38 "-ni:7:8:9"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 llvm::AMDGPUAS::FLAT_ADDRESS, // Default 42 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global 43 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local 44 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant 45 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private 46 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic 47 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device 48 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host 49 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device 50 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant 51 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared 52 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global 53 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device 54 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host 55 llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local 56 llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private 57 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr 58 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr 59 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 60 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared 61 }; 62 63 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 64 llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default 65 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global 66 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local 67 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant 68 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private 69 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic 70 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device 71 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host 72 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device 73 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant 74 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared 75 // SYCL address space values for this map are dummy 76 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global 77 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device 78 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host 79 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local 80 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private 81 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr 82 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr 83 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 84 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared 85 86 }; 87 } // namespace targets 88 } // namespace clang 89 90 static constexpr Builtin::Info BuiltinInfo[] = { 91 #define BUILTIN(ID, TYPE, ATTRS) \ 92 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 93 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 94 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 95 #include "clang/Basic/BuiltinsAMDGPU.def" 96 }; 97 98 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 99 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 100 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 101 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 102 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 103 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 104 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 105 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 106 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 107 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 108 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 109 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 110 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 111 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 112 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 113 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 114 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 115 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 116 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 117 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 118 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 119 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 120 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 121 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 122 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 123 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 124 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 125 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 126 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 127 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 128 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 129 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 130 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 131 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 132 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 133 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 134 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 135 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 136 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 137 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 138 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 139 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 140 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 141 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 142 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 143 "flat_scratch_lo", "flat_scratch_hi", 144 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 145 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 146 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 147 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 148 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 149 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 150 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 151 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 152 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 153 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 154 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 155 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 156 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 157 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 158 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 159 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 160 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 161 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 162 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 163 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 164 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 165 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 166 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 167 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 168 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 169 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 170 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 171 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 172 "a252", "a253", "a254", "a255" 173 }; 174 175 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 176 return llvm::ArrayRef(GCCRegNames); 177 } 178 179 bool AMDGPUTargetInfo::initFeatureMap( 180 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 181 const std::vector<std::string> &FeatureVec) const { 182 183 using namespace llvm::AMDGPU; 184 fillAMDGPUFeatureMap(CPU, getTriple(), Features); 185 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec)) 186 return false; 187 188 // TODO: Should move this logic into TargetParser 189 std::string ErrorMsg; 190 if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) { 191 Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg; 192 return false; 193 } 194 195 return true; 196 } 197 198 void AMDGPUTargetInfo::fillValidCPUList( 199 SmallVectorImpl<StringRef> &Values) const { 200 if (isAMDGCN(getTriple())) 201 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 202 else 203 llvm::AMDGPU::fillValidArchListR600(Values); 204 } 205 206 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 207 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 208 } 209 210 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 211 const TargetOptions &Opts) 212 : TargetInfo(Triple), 213 GPUKind(isAMDGCN(Triple) ? 214 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 215 llvm::AMDGPU::parseArchR600(Opts.CPU)), 216 GPUFeatures(isAMDGCN(Triple) ? 217 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 218 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 219 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 220 : DataLayoutStringR600); 221 222 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 223 !isAMDGCN(Triple)); 224 UseAddrSpaceMapMangling = true; 225 226 if (isAMDGCN(Triple)) { 227 // __bf16 is always available as a load/store only type on AMDGCN. 228 BFloat16Width = BFloat16Align = 16; 229 BFloat16Format = &llvm::APFloat::BFloat(); 230 } 231 232 HasLegalHalfType = true; 233 HasFloat16 = true; 234 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 235 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 236 237 // Set pointer width and alignment for the generic address space. 238 PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default); 239 if (getMaxPointerWidth() == 64) { 240 LongWidth = LongAlign = 64; 241 SizeType = UnsignedLong; 242 PtrDiffType = SignedLong; 243 IntPtrType = SignedLong; 244 } 245 246 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 247 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); 248 for (auto F : {"image-insts", "gws"}) 249 ReadOnlyFeatures.insert(F); 250 HalfArgsAndReturns = true; 251 } 252 253 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { 254 TargetInfo::adjust(Diags, Opts); 255 // ToDo: There are still a few places using default address space as private 256 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 257 // can be removed from the following line. 258 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 259 !isAMDGCN(getTriple())); 260 } 261 262 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 263 return llvm::ArrayRef(BuiltinInfo, 264 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin); 265 } 266 267 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 268 MacroBuilder &Builder) const { 269 Builder.defineMacro("__AMD__"); 270 Builder.defineMacro("__AMDGPU__"); 271 272 if (isAMDGCN(getTriple())) 273 Builder.defineMacro("__AMDGCN__"); 274 else 275 Builder.defineMacro("__R600__"); 276 277 if (GPUKind != llvm::AMDGPU::GK_NONE) { 278 StringRef CanonName = isAMDGCN(getTriple()) ? 279 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 280 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 281 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___ 282 if (isAMDGCN(getTriple())) { 283 assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name"); 284 Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) + 285 Twine("__")); 286 } 287 if (isAMDGCN(getTriple())) { 288 Builder.defineMacro("__amdgcn_processor__", 289 Twine("\"") + Twine(CanonName) + Twine("\"")); 290 Builder.defineMacro("__amdgcn_target_id__", 291 Twine("\"") + Twine(*getTargetID()) + Twine("\"")); 292 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 293 auto Loc = OffloadArchFeatures.find(F); 294 if (Loc != OffloadArchFeatures.end()) { 295 std::string NewF = F.str(); 296 std::replace(NewF.begin(), NewF.end(), '-', '_'); 297 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 298 Twine("__"), 299 Loc->second ? "1" : "0"); 300 } 301 } 302 } 303 } 304 305 if (AllowAMDGPUUnsafeFPAtomics) 306 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__"); 307 308 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 309 // removed in the near future. 310 if (hasFMAF()) 311 Builder.defineMacro("__HAS_FMAF__"); 312 if (hasFastFMAF()) 313 Builder.defineMacro("FP_FAST_FMAF"); 314 if (hasLDEXPF()) 315 Builder.defineMacro("__HAS_LDEXPF__"); 316 if (hasFP64()) 317 Builder.defineMacro("__HAS_FP64__"); 318 if (hasFastFMA()) 319 Builder.defineMacro("FP_FAST_FMA"); 320 321 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize)); 322 // ToDo: deprecate this macro for naming consistency. 323 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 324 Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode)); 325 } 326 327 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 328 assert(HalfFormat == Aux->HalfFormat); 329 assert(FloatFormat == Aux->FloatFormat); 330 assert(DoubleFormat == Aux->DoubleFormat); 331 332 // On x86_64 long double is 80-bit extended precision format, which is 333 // not supported by AMDGPU. 128-bit floating point format is also not 334 // supported by AMDGPU. Therefore keep its own format for these two types. 335 auto SaveLongDoubleFormat = LongDoubleFormat; 336 auto SaveFloat128Format = Float128Format; 337 auto SaveLongDoubleWidth = LongDoubleWidth; 338 auto SaveLongDoubleAlign = LongDoubleAlign; 339 copyAuxTarget(Aux); 340 LongDoubleFormat = SaveLongDoubleFormat; 341 Float128Format = SaveFloat128Format; 342 LongDoubleWidth = SaveLongDoubleWidth; 343 LongDoubleAlign = SaveLongDoubleAlign; 344 // For certain builtin types support on the host target, claim they are 345 // support to pass the compilation of the host code during the device-side 346 // compilation. 347 // FIXME: As the side effect, we also accept `__float128` uses in the device 348 // code. To rejct these builtin types supported in the host target but not in 349 // the device target, one approach would support `device_builtin` attribute 350 // so that we could tell the device builtin types from the host ones. The 351 // also solves the different representations of the same builtin type, such 352 // as `size_t` in the MSVC environment. 353 if (Aux->hasFloat128Type()) { 354 HasFloat128 = true; 355 Float128Format = DoubleFormat; 356 } 357 } 358