1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 #include "llvm/IR/DataLayout.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 40 "-ni:7"; 41 42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 43 Generic, // Default 44 Global, // opencl_global 45 Local, // opencl_local 46 Constant, // opencl_constant 47 Private, // opencl_private 48 Generic, // opencl_generic 49 Global, // cuda_device 50 Constant, // cuda_constant 51 Local, // cuda_shared 52 Generic, // ptr32_sptr 53 Generic, // ptr32_uptr 54 Generic // ptr64 55 }; 56 57 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 58 Private, // Default 59 Global, // opencl_global 60 Local, // opencl_local 61 Constant, // opencl_constant 62 Private, // opencl_private 63 Generic, // opencl_generic 64 Global, // cuda_device 65 Constant, // cuda_constant 66 Local, // cuda_shared 67 Generic, // ptr32_sptr 68 Generic, // ptr32_uptr 69 Generic // ptr64 70 71 }; 72 } // namespace targets 73 } // namespace clang 74 75 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 76 #define BUILTIN(ID, TYPE, ATTRS) \ 77 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 78 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 79 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 80 #include "clang/Basic/BuiltinsAMDGPU.def" 81 }; 82 83 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 84 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 85 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 86 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 87 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 88 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 89 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 90 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 91 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 92 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 93 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 94 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 95 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 96 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 97 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 98 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 99 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 100 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 101 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 102 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 103 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 104 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 105 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 106 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 107 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 108 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 109 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 110 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 111 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 112 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 113 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 114 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 115 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 116 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 117 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 118 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 119 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 120 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 121 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 122 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 123 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 124 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 125 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 126 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 127 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 128 "flat_scratch_lo", "flat_scratch_hi", 129 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 130 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 131 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 132 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 133 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 134 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 135 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 136 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 137 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 138 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 139 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 140 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 141 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 142 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 143 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 144 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 145 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 146 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 147 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 148 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 149 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 150 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 151 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 152 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 153 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 154 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 155 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 156 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 157 "a252", "a253", "a254", "a255" 158 }; 159 160 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 161 return llvm::makeArrayRef(GCCRegNames); 162 } 163 164 bool AMDGPUTargetInfo::initFeatureMap( 165 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 166 const std::vector<std::string> &FeatureVec) const { 167 168 using namespace llvm::AMDGPU; 169 170 // XXX - What does the member GPU mean if device name string passed here? 171 if (isAMDGCN(getTriple())) { 172 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 173 case GK_GFX1030: 174 Features["ci-insts"] = true; 175 Features["dot1-insts"] = true; 176 Features["dot2-insts"] = true; 177 Features["dot5-insts"] = true; 178 Features["dot6-insts"] = true; 179 Features["dl-insts"] = true; 180 Features["flat-address-space"] = true; 181 Features["16-bit-insts"] = true; 182 Features["dpp"] = true; 183 Features["gfx8-insts"] = true; 184 Features["gfx9-insts"] = true; 185 Features["gfx10-insts"] = true; 186 Features["gfx10-3-insts"] = true; 187 Features["s-memrealtime"] = true; 188 break; 189 case GK_GFX1012: 190 case GK_GFX1011: 191 Features["dot1-insts"] = true; 192 Features["dot2-insts"] = true; 193 Features["dot5-insts"] = true; 194 Features["dot6-insts"] = true; 195 LLVM_FALLTHROUGH; 196 case GK_GFX1010: 197 Features["dl-insts"] = true; 198 Features["ci-insts"] = true; 199 Features["flat-address-space"] = true; 200 Features["16-bit-insts"] = true; 201 Features["dpp"] = true; 202 Features["gfx8-insts"] = true; 203 Features["gfx9-insts"] = true; 204 Features["gfx10-insts"] = true; 205 Features["s-memrealtime"] = true; 206 break; 207 case GK_GFX908: 208 Features["dot3-insts"] = true; 209 Features["dot4-insts"] = true; 210 Features["dot5-insts"] = true; 211 Features["dot6-insts"] = true; 212 Features["mai-insts"] = true; 213 LLVM_FALLTHROUGH; 214 case GK_GFX906: 215 Features["dl-insts"] = true; 216 Features["dot1-insts"] = true; 217 Features["dot2-insts"] = true; 218 LLVM_FALLTHROUGH; 219 case GK_GFX909: 220 case GK_GFX904: 221 case GK_GFX902: 222 case GK_GFX900: 223 Features["gfx9-insts"] = true; 224 LLVM_FALLTHROUGH; 225 case GK_GFX810: 226 case GK_GFX803: 227 case GK_GFX802: 228 case GK_GFX801: 229 Features["gfx8-insts"] = true; 230 Features["16-bit-insts"] = true; 231 Features["dpp"] = true; 232 Features["s-memrealtime"] = true; 233 LLVM_FALLTHROUGH; 234 case GK_GFX704: 235 case GK_GFX703: 236 case GK_GFX702: 237 case GK_GFX701: 238 case GK_GFX700: 239 Features["ci-insts"] = true; 240 Features["flat-address-space"] = true; 241 LLVM_FALLTHROUGH; 242 case GK_GFX601: 243 case GK_GFX600: 244 break; 245 case GK_NONE: 246 break; 247 default: 248 llvm_unreachable("Unhandled GPU!"); 249 } 250 } else { 251 if (CPU.empty()) 252 CPU = "r600"; 253 254 switch (llvm::AMDGPU::parseArchR600(CPU)) { 255 case GK_CAYMAN: 256 case GK_CYPRESS: 257 case GK_RV770: 258 case GK_RV670: 259 // TODO: Add fp64 when implemented. 260 break; 261 case GK_TURKS: 262 case GK_CAICOS: 263 case GK_BARTS: 264 case GK_SUMO: 265 case GK_REDWOOD: 266 case GK_JUNIPER: 267 case GK_CEDAR: 268 case GK_RV730: 269 case GK_RV710: 270 case GK_RS880: 271 case GK_R630: 272 case GK_R600: 273 break; 274 default: 275 llvm_unreachable("Unhandled GPU!"); 276 } 277 } 278 279 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 280 } 281 282 void AMDGPUTargetInfo::fillValidCPUList( 283 SmallVectorImpl<StringRef> &Values) const { 284 if (isAMDGCN(getTriple())) 285 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 286 else 287 llvm::AMDGPU::fillValidArchListR600(Values); 288 } 289 290 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 291 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 292 } 293 294 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 295 const TargetOptions &Opts) 296 : TargetInfo(Triple), 297 GPUKind(isAMDGCN(Triple) ? 298 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 299 llvm::AMDGPU::parseArchR600(Opts.CPU)), 300 GPUFeatures(isAMDGCN(Triple) ? 301 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 302 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 303 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 304 : DataLayoutStringR600); 305 assert(DataLayout->getAllocaAddrSpace() == Private); 306 GridValues = llvm::omp::AMDGPUGpuGridValues; 307 308 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 309 !isAMDGCN(Triple)); 310 UseAddrSpaceMapMangling = true; 311 312 HasLegalHalfType = true; 313 HasFloat16 = true; 314 315 // Set pointer width and alignment for target address space 0. 316 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 317 if (getMaxPointerWidth() == 64) { 318 LongWidth = LongAlign = 64; 319 SizeType = UnsignedLong; 320 PtrDiffType = SignedLong; 321 IntPtrType = SignedLong; 322 } 323 324 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 325 } 326 327 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 328 TargetInfo::adjust(Opts); 329 // ToDo: There are still a few places using default address space as private 330 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 331 // can be removed from the following line. 332 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 333 !isAMDGCN(getTriple())); 334 } 335 336 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 337 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 338 Builtin::FirstTSBuiltin); 339 } 340 341 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 342 MacroBuilder &Builder) const { 343 Builder.defineMacro("__AMD__"); 344 Builder.defineMacro("__AMDGPU__"); 345 346 if (isAMDGCN(getTriple())) 347 Builder.defineMacro("__AMDGCN__"); 348 else 349 Builder.defineMacro("__R600__"); 350 351 if (GPUKind != llvm::AMDGPU::GK_NONE) { 352 StringRef CanonName = isAMDGCN(getTriple()) ? 353 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 354 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 355 } 356 357 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 358 // removed in the near future. 359 if (hasFMAF()) 360 Builder.defineMacro("__HAS_FMAF__"); 361 if (hasFastFMAF()) 362 Builder.defineMacro("FP_FAST_FMAF"); 363 if (hasLDEXPF()) 364 Builder.defineMacro("__HAS_LDEXPF__"); 365 if (hasFP64()) 366 Builder.defineMacro("__HAS_FP64__"); 367 if (hasFastFMA()) 368 Builder.defineMacro("FP_FAST_FMA"); 369 } 370 371 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 372 assert(HalfFormat == Aux->HalfFormat); 373 assert(FloatFormat == Aux->FloatFormat); 374 assert(DoubleFormat == Aux->DoubleFormat); 375 376 // On x86_64 long double is 80-bit extended precision format, which is 377 // not supported by AMDGPU. 128-bit floating point format is also not 378 // supported by AMDGPU. Therefore keep its own format for these two types. 379 auto SaveLongDoubleFormat = LongDoubleFormat; 380 auto SaveFloat128Format = Float128Format; 381 copyAuxTarget(Aux); 382 LongDoubleFormat = SaveLongDoubleFormat; 383 Float128Format = SaveFloat128Format; 384 // For certain builtin types support on the host target, claim they are 385 // support to pass the compilation of the host code during the device-side 386 // compilation. 387 // FIXME: As the side effect, we also accept `__float128` uses in the device 388 // code. To rejct these builtin types supported in the host target but not in 389 // the device target, one approach would support `device_builtin` attribute 390 // so that we could tell the device builtin types from the host ones. The 391 // also solves the different representations of the same builtin type, such 392 // as `size_t` in the MSVC environment. 393 if (Aux->hasFloat128Type()) { 394 HasFloat128 = true; 395 Float128Format = DoubleFormat; 396 } 397 } 398