1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/Diagnostic.h" 17 #include "clang/Basic/LangOptions.h" 18 #include "clang/Basic/MacroBuilder.h" 19 #include "clang/Basic/TargetBuiltins.h" 20 using namespace clang; 21 using namespace clang::targets; 22 23 namespace clang { 24 namespace targets { 25 26 // If you edit the description strings, make sure you update 27 // getPointerWidthV(). 28 29 static const char *const DataLayoutStringR600 = 30 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 31 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 32 33 static const char *const DataLayoutStringAMDGCN = 34 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 35 "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 36 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 37 "-ni:7:8"; 38 39 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 40 Generic, // Default 41 Global, // opencl_global 42 Local, // opencl_local 43 Constant, // opencl_constant 44 Private, // opencl_private 45 Generic, // opencl_generic 46 Global, // opencl_global_device 47 Global, // opencl_global_host 48 Global, // cuda_device 49 Constant, // cuda_constant 50 Local, // cuda_shared 51 Global, // sycl_global 52 Global, // sycl_global_device 53 Global, // sycl_global_host 54 Local, // sycl_local 55 Private, // sycl_private 56 Generic, // ptr32_sptr 57 Generic, // ptr32_uptr 58 Generic, // ptr64 59 Generic, // hlsl_groupshared 60 }; 61 62 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 63 Private, // Default 64 Global, // opencl_global 65 Local, // opencl_local 66 Constant, // opencl_constant 67 Private, // opencl_private 68 Generic, // opencl_generic 69 Global, // opencl_global_device 70 Global, // opencl_global_host 71 Global, // cuda_device 72 Constant, // cuda_constant 73 Local, // cuda_shared 74 // SYCL address space values for this map are dummy 75 Generic, // sycl_global 76 Generic, // sycl_global_device 77 Generic, // sycl_global_host 78 Generic, // sycl_local 79 Generic, // sycl_private 80 Generic, // ptr32_sptr 81 Generic, // ptr32_uptr 82 Generic, // ptr64 83 Generic, // hlsl_groupshared 84 85 }; 86 } // namespace targets 87 } // namespace clang 88 89 static constexpr Builtin::Info BuiltinInfo[] = { 90 #define BUILTIN(ID, TYPE, ATTRS) \ 91 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 92 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 93 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 94 #include "clang/Basic/BuiltinsAMDGPU.def" 95 }; 96 97 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 98 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 99 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 100 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 101 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 102 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 103 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 104 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 105 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 106 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 107 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 108 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 109 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 110 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 111 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 112 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 113 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 114 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 115 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 116 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 117 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 118 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 119 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 120 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 121 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 122 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 123 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 124 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 125 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 126 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 127 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 128 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 129 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 130 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 131 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 132 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 133 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 134 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 135 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 136 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 137 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 138 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 139 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 140 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 141 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 142 "flat_scratch_lo", "flat_scratch_hi", 143 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 144 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 145 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 146 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 147 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 148 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 149 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 150 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 151 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 152 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 153 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 154 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 155 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 156 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 157 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 158 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 159 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 160 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 161 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 162 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 163 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 164 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 165 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 166 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 167 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 168 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 169 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 170 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 171 "a252", "a253", "a254", "a255" 172 }; 173 174 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 175 return llvm::ArrayRef(GCCRegNames); 176 } 177 178 bool AMDGPUTargetInfo::initFeatureMap( 179 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 180 const std::vector<std::string> &FeatureVec) const { 181 182 using namespace llvm::AMDGPU; 183 fillAMDGPUFeatureMap(CPU, getTriple(), Features); 184 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec)) 185 return false; 186 187 // TODO: Should move this logic into TargetParser 188 std::string ErrorMsg; 189 if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) { 190 Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg; 191 return false; 192 } 193 194 return true; 195 } 196 197 void AMDGPUTargetInfo::fillValidCPUList( 198 SmallVectorImpl<StringRef> &Values) const { 199 if (isAMDGCN(getTriple())) 200 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 201 else 202 llvm::AMDGPU::fillValidArchListR600(Values); 203 } 204 205 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 206 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 207 } 208 209 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 210 const TargetOptions &Opts) 211 : TargetInfo(Triple), 212 GPUKind(isAMDGCN(Triple) ? 213 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 214 llvm::AMDGPU::parseArchR600(Opts.CPU)), 215 GPUFeatures(isAMDGCN(Triple) ? 216 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 217 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 218 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 219 : DataLayoutStringR600); 220 221 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 222 !isAMDGCN(Triple)); 223 UseAddrSpaceMapMangling = true; 224 225 if (isAMDGCN(Triple)) { 226 // __bf16 is always available as a load/store only type on AMDGCN. 227 BFloat16Width = BFloat16Align = 16; 228 BFloat16Format = &llvm::APFloat::BFloat(); 229 } 230 231 HasLegalHalfType = true; 232 HasFloat16 = true; 233 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 234 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 235 236 // Set pointer width and alignment for the generic address space. 237 PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default); 238 if (getMaxPointerWidth() == 64) { 239 LongWidth = LongAlign = 64; 240 SizeType = UnsignedLong; 241 PtrDiffType = SignedLong; 242 IntPtrType = SignedLong; 243 } 244 245 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 246 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); 247 ReadOnlyFeatures.insert("image-insts"); 248 } 249 250 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { 251 TargetInfo::adjust(Diags, Opts); 252 // ToDo: There are still a few places using default address space as private 253 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 254 // can be removed from the following line. 255 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 256 !isAMDGCN(getTriple())); 257 } 258 259 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 260 return llvm::ArrayRef(BuiltinInfo, 261 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin); 262 } 263 264 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 265 MacroBuilder &Builder) const { 266 Builder.defineMacro("__AMD__"); 267 Builder.defineMacro("__AMDGPU__"); 268 269 if (isAMDGCN(getTriple())) 270 Builder.defineMacro("__AMDGCN__"); 271 else 272 Builder.defineMacro("__R600__"); 273 274 if (GPUKind != llvm::AMDGPU::GK_NONE) { 275 StringRef CanonName = isAMDGCN(getTriple()) ? 276 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 277 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 278 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___ 279 if (isAMDGCN(getTriple())) { 280 assert(CanonName.startswith("gfx") && "Invalid amdgcn canonical name"); 281 Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) + 282 Twine("__")); 283 } 284 if (isAMDGCN(getTriple())) { 285 Builder.defineMacro("__amdgcn_processor__", 286 Twine("\"") + Twine(CanonName) + Twine("\"")); 287 Builder.defineMacro("__amdgcn_target_id__", 288 Twine("\"") + Twine(*getTargetID()) + Twine("\"")); 289 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 290 auto Loc = OffloadArchFeatures.find(F); 291 if (Loc != OffloadArchFeatures.end()) { 292 std::string NewF = F.str(); 293 std::replace(NewF.begin(), NewF.end(), '-', '_'); 294 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 295 Twine("__"), 296 Loc->second ? "1" : "0"); 297 } 298 } 299 } 300 } 301 302 if (AllowAMDGPUUnsafeFPAtomics) 303 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__"); 304 305 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 306 // removed in the near future. 307 if (hasFMAF()) 308 Builder.defineMacro("__HAS_FMAF__"); 309 if (hasFastFMAF()) 310 Builder.defineMacro("FP_FAST_FMAF"); 311 if (hasLDEXPF()) 312 Builder.defineMacro("__HAS_LDEXPF__"); 313 if (hasFP64()) 314 Builder.defineMacro("__HAS_FP64__"); 315 if (hasFastFMA()) 316 Builder.defineMacro("FP_FAST_FMA"); 317 318 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize)); 319 // ToDo: deprecate this macro for naming consistency. 320 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 321 Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode)); 322 } 323 324 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 325 assert(HalfFormat == Aux->HalfFormat); 326 assert(FloatFormat == Aux->FloatFormat); 327 assert(DoubleFormat == Aux->DoubleFormat); 328 329 // On x86_64 long double is 80-bit extended precision format, which is 330 // not supported by AMDGPU. 128-bit floating point format is also not 331 // supported by AMDGPU. Therefore keep its own format for these two types. 332 auto SaveLongDoubleFormat = LongDoubleFormat; 333 auto SaveFloat128Format = Float128Format; 334 auto SaveLongDoubleWidth = LongDoubleWidth; 335 auto SaveLongDoubleAlign = LongDoubleAlign; 336 copyAuxTarget(Aux); 337 LongDoubleFormat = SaveLongDoubleFormat; 338 Float128Format = SaveFloat128Format; 339 LongDoubleWidth = SaveLongDoubleWidth; 340 LongDoubleAlign = SaveLongDoubleAlign; 341 // For certain builtin types support on the host target, claim they are 342 // support to pass the compilation of the host code during the device-side 343 // compilation. 344 // FIXME: As the side effect, we also accept `__float128` uses in the device 345 // code. To rejct these builtin types supported in the host target but not in 346 // the device target, one approach would support `device_builtin` attribute 347 // so that we could tell the device builtin types from the host ones. The 348 // also solves the different representations of the same builtin type, such 349 // as `size_t` in the MSVC environment. 350 if (Aux->hasFloat128Type()) { 351 HasFloat128 = true; 352 Float128Format = DoubleFormat; 353 } 354 } 355