1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 38 "-ni:7"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // opencl_global_device 48 Global, // opencl_global_host 49 Global, // cuda_device 50 Constant, // cuda_constant 51 Local, // cuda_shared 52 Global, // sycl_global 53 Global, // sycl_global_device 54 Global, // sycl_global_host 55 Local, // sycl_local 56 Private, // sycl_private 57 Generic, // ptr32_sptr 58 Generic, // ptr32_uptr 59 Generic // ptr64 60 }; 61 62 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 63 Private, // Default 64 Global, // opencl_global 65 Local, // opencl_local 66 Constant, // opencl_constant 67 Private, // opencl_private 68 Generic, // opencl_generic 69 Global, // opencl_global_device 70 Global, // opencl_global_host 71 Global, // cuda_device 72 Constant, // cuda_constant 73 Local, // cuda_shared 74 // SYCL address space values for this map are dummy 75 Generic, // sycl_global 76 Generic, // sycl_global_device 77 Generic, // sycl_global_host 78 Generic, // sycl_local 79 Generic, // sycl_private 80 Generic, // ptr32_sptr 81 Generic, // ptr32_uptr 82 Generic // ptr64 83 84 }; 85 } // namespace targets 86 } // namespace clang 87 88 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 89 #define BUILTIN(ID, TYPE, ATTRS) \ 90 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 91 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 92 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 93 #include "clang/Basic/BuiltinsAMDGPU.def" 94 }; 95 96 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 97 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 98 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 99 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 100 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 101 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 102 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 103 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 104 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 105 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 106 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 107 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 108 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 109 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 110 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 111 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 112 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 113 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 114 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 115 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 116 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 117 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 118 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 119 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 120 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 121 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 122 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 123 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 124 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 125 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 126 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 127 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 128 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 129 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 130 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 131 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 132 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 133 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 134 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 135 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 136 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 137 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 138 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 139 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 140 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 141 "flat_scratch_lo", "flat_scratch_hi", 142 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 143 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 144 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 145 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 146 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 147 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 148 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 149 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 150 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 151 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 152 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 153 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 154 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 155 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 156 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 157 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 158 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 159 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 160 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 161 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 162 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 163 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 164 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 165 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 166 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 167 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 168 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 169 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 170 "a252", "a253", "a254", "a255" 171 }; 172 173 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 174 return llvm::makeArrayRef(GCCRegNames); 175 } 176 177 bool AMDGPUTargetInfo::initFeatureMap( 178 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 179 const std::vector<std::string> &FeatureVec) const { 180 181 using namespace llvm::AMDGPU; 182 183 // XXX - What does the member GPU mean if device name string passed here? 184 if (isAMDGCN(getTriple())) { 185 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 186 case GK_GFX1103: 187 case GK_GFX1102: 188 case GK_GFX1101: 189 case GK_GFX1100: 190 Features["ci-insts"] = true; 191 Features["dot1-insts"] = true; 192 Features["dot5-insts"] = true; 193 Features["dot6-insts"] = true; 194 Features["dot7-insts"] = true; 195 Features["dot8-insts"] = true; 196 Features["dl-insts"] = true; 197 Features["flat-address-space"] = true; 198 Features["16-bit-insts"] = true; 199 Features["dpp"] = true; 200 Features["gfx8-insts"] = true; 201 Features["gfx9-insts"] = true; 202 Features["gfx10-insts"] = true; 203 Features["gfx10-3-insts"] = true; 204 Features["gfx11-insts"] = true; 205 break; 206 case GK_GFX1036: 207 case GK_GFX1035: 208 case GK_GFX1034: 209 case GK_GFX1033: 210 case GK_GFX1032: 211 case GK_GFX1031: 212 case GK_GFX1030: 213 Features["ci-insts"] = true; 214 Features["dot1-insts"] = true; 215 Features["dot2-insts"] = true; 216 Features["dot5-insts"] = true; 217 Features["dot6-insts"] = true; 218 Features["dot7-insts"] = true; 219 Features["dl-insts"] = true; 220 Features["flat-address-space"] = true; 221 Features["16-bit-insts"] = true; 222 Features["dpp"] = true; 223 Features["gfx8-insts"] = true; 224 Features["gfx9-insts"] = true; 225 Features["gfx10-insts"] = true; 226 Features["gfx10-3-insts"] = true; 227 Features["s-memrealtime"] = true; 228 Features["s-memtime-inst"] = true; 229 break; 230 case GK_GFX1012: 231 case GK_GFX1011: 232 Features["dot1-insts"] = true; 233 Features["dot2-insts"] = true; 234 Features["dot5-insts"] = true; 235 Features["dot6-insts"] = true; 236 Features["dot7-insts"] = true; 237 LLVM_FALLTHROUGH; 238 case GK_GFX1013: 239 case GK_GFX1010: 240 Features["dl-insts"] = true; 241 Features["ci-insts"] = true; 242 Features["flat-address-space"] = true; 243 Features["16-bit-insts"] = true; 244 Features["dpp"] = true; 245 Features["gfx8-insts"] = true; 246 Features["gfx9-insts"] = true; 247 Features["gfx10-insts"] = true; 248 Features["s-memrealtime"] = true; 249 Features["s-memtime-inst"] = true; 250 break; 251 case GK_GFX940: 252 Features["gfx940-insts"] = true; 253 Features["fp8-insts"] = true; 254 LLVM_FALLTHROUGH; 255 case GK_GFX90A: 256 Features["gfx90a-insts"] = true; 257 LLVM_FALLTHROUGH; 258 case GK_GFX908: 259 Features["dot3-insts"] = true; 260 Features["dot4-insts"] = true; 261 Features["dot5-insts"] = true; 262 Features["dot6-insts"] = true; 263 Features["mai-insts"] = true; 264 LLVM_FALLTHROUGH; 265 case GK_GFX906: 266 Features["dl-insts"] = true; 267 Features["dot1-insts"] = true; 268 Features["dot2-insts"] = true; 269 Features["dot7-insts"] = true; 270 LLVM_FALLTHROUGH; 271 case GK_GFX90C: 272 case GK_GFX909: 273 case GK_GFX904: 274 case GK_GFX902: 275 case GK_GFX900: 276 Features["gfx9-insts"] = true; 277 LLVM_FALLTHROUGH; 278 case GK_GFX810: 279 case GK_GFX805: 280 case GK_GFX803: 281 case GK_GFX802: 282 case GK_GFX801: 283 Features["gfx8-insts"] = true; 284 Features["16-bit-insts"] = true; 285 Features["dpp"] = true; 286 Features["s-memrealtime"] = true; 287 LLVM_FALLTHROUGH; 288 case GK_GFX705: 289 case GK_GFX704: 290 case GK_GFX703: 291 case GK_GFX702: 292 case GK_GFX701: 293 case GK_GFX700: 294 Features["ci-insts"] = true; 295 Features["flat-address-space"] = true; 296 LLVM_FALLTHROUGH; 297 case GK_GFX602: 298 case GK_GFX601: 299 case GK_GFX600: 300 Features["s-memtime-inst"] = true; 301 break; 302 case GK_NONE: 303 break; 304 default: 305 llvm_unreachable("Unhandled GPU!"); 306 } 307 } else { 308 if (CPU.empty()) 309 CPU = "r600"; 310 311 switch (llvm::AMDGPU::parseArchR600(CPU)) { 312 case GK_CAYMAN: 313 case GK_CYPRESS: 314 case GK_RV770: 315 case GK_RV670: 316 // TODO: Add fp64 when implemented. 317 break; 318 case GK_TURKS: 319 case GK_CAICOS: 320 case GK_BARTS: 321 case GK_SUMO: 322 case GK_REDWOOD: 323 case GK_JUNIPER: 324 case GK_CEDAR: 325 case GK_RV730: 326 case GK_RV710: 327 case GK_RS880: 328 case GK_R630: 329 case GK_R600: 330 break; 331 default: 332 llvm_unreachable("Unhandled GPU!"); 333 } 334 } 335 336 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 337 } 338 339 void AMDGPUTargetInfo::fillValidCPUList( 340 SmallVectorImpl<StringRef> &Values) const { 341 if (isAMDGCN(getTriple())) 342 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 343 else 344 llvm::AMDGPU::fillValidArchListR600(Values); 345 } 346 347 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 348 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 349 } 350 351 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 352 const TargetOptions &Opts) 353 : TargetInfo(Triple), 354 GPUKind(isAMDGCN(Triple) ? 355 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 356 llvm::AMDGPU::parseArchR600(Opts.CPU)), 357 GPUFeatures(isAMDGCN(Triple) ? 358 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 359 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 360 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 361 : DataLayoutStringR600); 362 363 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 364 !isAMDGCN(Triple)); 365 UseAddrSpaceMapMangling = true; 366 367 HasLegalHalfType = true; 368 HasFloat16 = true; 369 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 370 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 371 372 // Set pointer width and alignment for target address space 0. 373 PointerWidth = PointerAlign = getPointerWidthV(Generic); 374 if (getMaxPointerWidth() == 64) { 375 LongWidth = LongAlign = 64; 376 SizeType = UnsignedLong; 377 PtrDiffType = SignedLong; 378 IntPtrType = SignedLong; 379 } 380 381 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 382 } 383 384 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { 385 TargetInfo::adjust(Diags, Opts); 386 // ToDo: There are still a few places using default address space as private 387 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 388 // can be removed from the following line. 389 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 390 !isAMDGCN(getTriple())); 391 } 392 393 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 394 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 395 Builtin::FirstTSBuiltin); 396 } 397 398 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 399 MacroBuilder &Builder) const { 400 Builder.defineMacro("__AMD__"); 401 Builder.defineMacro("__AMDGPU__"); 402 403 if (isAMDGCN(getTriple())) 404 Builder.defineMacro("__AMDGCN__"); 405 else 406 Builder.defineMacro("__R600__"); 407 408 if (GPUKind != llvm::AMDGPU::GK_NONE) { 409 StringRef CanonName = isAMDGCN(getTriple()) ? 410 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 411 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 412 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___ 413 if (isAMDGCN(getTriple())) { 414 assert(CanonName.startswith("gfx") && "Invalid amdgcn canonical name"); 415 Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) + 416 Twine("__")); 417 } 418 if (isAMDGCN(getTriple())) { 419 Builder.defineMacro("__amdgcn_processor__", 420 Twine("\"") + Twine(CanonName) + Twine("\"")); 421 Builder.defineMacro("__amdgcn_target_id__", 422 Twine("\"") + Twine(*getTargetID()) + Twine("\"")); 423 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 424 auto Loc = OffloadArchFeatures.find(F); 425 if (Loc != OffloadArchFeatures.end()) { 426 std::string NewF = F.str(); 427 std::replace(NewF.begin(), NewF.end(), '-', '_'); 428 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 429 Twine("__"), 430 Loc->second ? "1" : "0"); 431 } 432 } 433 } 434 } 435 436 if (AllowAMDGPUUnsafeFPAtomics) 437 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__"); 438 439 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 440 // removed in the near future. 441 if (hasFMAF()) 442 Builder.defineMacro("__HAS_FMAF__"); 443 if (hasFastFMAF()) 444 Builder.defineMacro("FP_FAST_FMAF"); 445 if (hasLDEXPF()) 446 Builder.defineMacro("__HAS_LDEXPF__"); 447 if (hasFP64()) 448 Builder.defineMacro("__HAS_FP64__"); 449 if (hasFastFMA()) 450 Builder.defineMacro("FP_FAST_FMA"); 451 452 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 453 } 454 455 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 456 assert(HalfFormat == Aux->HalfFormat); 457 assert(FloatFormat == Aux->FloatFormat); 458 assert(DoubleFormat == Aux->DoubleFormat); 459 460 // On x86_64 long double is 80-bit extended precision format, which is 461 // not supported by AMDGPU. 128-bit floating point format is also not 462 // supported by AMDGPU. Therefore keep its own format for these two types. 463 auto SaveLongDoubleFormat = LongDoubleFormat; 464 auto SaveFloat128Format = Float128Format; 465 auto SaveLongDoubleWidth = LongDoubleWidth; 466 auto SaveLongDoubleAlign = LongDoubleAlign; 467 copyAuxTarget(Aux); 468 LongDoubleFormat = SaveLongDoubleFormat; 469 Float128Format = SaveFloat128Format; 470 LongDoubleWidth = SaveLongDoubleWidth; 471 LongDoubleAlign = SaveLongDoubleAlign; 472 // For certain builtin types support on the host target, claim they are 473 // support to pass the compilation of the host code during the device-side 474 // compilation. 475 // FIXME: As the side effect, we also accept `__float128` uses in the device 476 // code. To rejct these builtin types supported in the host target but not in 477 // the device target, one approach would support `device_builtin` attribute 478 // so that we could tell the device builtin types from the host ones. The 479 // also solves the different representations of the same builtin type, such 480 // as `size_t` in the MSVC environment. 481 if (Aux->hasFloat128Type()) { 482 HasFloat128 = true; 483 Float128Format = DoubleFormat; 484 } 485 } 486