1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/IR/DataLayout.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 39 "-ni:7"; 40 41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 42 Generic, // Default 43 Global, // opencl_global 44 Local, // opencl_local 45 Constant, // opencl_constant 46 Private, // opencl_private 47 Generic, // opencl_generic 48 Global, // cuda_device 49 Constant, // cuda_constant 50 Local // cuda_shared 51 }; 52 53 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 54 Private, // Default 55 Global, // opencl_global 56 Local, // opencl_local 57 Constant, // opencl_constant 58 Private, // opencl_private 59 Generic, // opencl_generic 60 Global, // cuda_device 61 Constant, // cuda_constant 62 Local // cuda_shared 63 }; 64 } // namespace targets 65 } // namespace clang 66 67 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 68 #define BUILTIN(ID, TYPE, ATTRS) \ 69 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 70 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 71 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 72 #include "clang/Basic/BuiltinsAMDGPU.def" 73 }; 74 75 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 76 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 77 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 78 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 79 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 80 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 81 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 82 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 83 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 84 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 85 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 86 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 87 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 88 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 89 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 90 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 91 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 92 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 93 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 94 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 95 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 96 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 97 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 98 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 99 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 100 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 101 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 102 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 103 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 104 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 105 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 106 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 107 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 108 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 109 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 110 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 111 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 112 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 113 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 114 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 115 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 116 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 117 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 118 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 119 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 120 "flat_scratch_lo", "flat_scratch_hi" 121 }; 122 123 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 124 return llvm::makeArrayRef(GCCRegNames); 125 } 126 127 bool AMDGPUTargetInfo::initFeatureMap( 128 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 129 const std::vector<std::string> &FeatureVec) const { 130 131 using namespace llvm::AMDGPU; 132 133 // XXX - What does the member GPU mean if device name string passed here? 134 if (isAMDGCN(getTriple())) { 135 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 136 case GK_GFX1012: 137 case GK_GFX1011: 138 Features["dot1-insts"] = true; 139 Features["dot2-insts"] = true; 140 Features["dot5-insts"] = true; 141 Features["dot6-insts"] = true; 142 LLVM_FALLTHROUGH; 143 case GK_GFX1010: 144 Features["dl-insts"] = true; 145 Features["ci-insts"] = true; 146 Features["flat-address-space"] = true; 147 Features["16-bit-insts"] = true; 148 Features["dpp"] = true; 149 Features["gfx8-insts"] = true; 150 Features["gfx9-insts"] = true; 151 Features["gfx10-insts"] = true; 152 Features["s-memrealtime"] = true; 153 break; 154 case GK_GFX908: 155 Features["dot3-insts"] = true; 156 Features["dot4-insts"] = true; 157 Features["dot5-insts"] = true; 158 Features["dot6-insts"] = true; 159 LLVM_FALLTHROUGH; 160 case GK_GFX906: 161 Features["dl-insts"] = true; 162 Features["dot1-insts"] = true; 163 Features["dot2-insts"] = true; 164 LLVM_FALLTHROUGH; 165 case GK_GFX909: 166 case GK_GFX904: 167 case GK_GFX902: 168 case GK_GFX900: 169 Features["gfx9-insts"] = true; 170 LLVM_FALLTHROUGH; 171 case GK_GFX810: 172 case GK_GFX803: 173 case GK_GFX802: 174 case GK_GFX801: 175 Features["gfx8-insts"] = true; 176 Features["16-bit-insts"] = true; 177 Features["dpp"] = true; 178 Features["s-memrealtime"] = true; 179 LLVM_FALLTHROUGH; 180 case GK_GFX704: 181 case GK_GFX703: 182 case GK_GFX702: 183 case GK_GFX701: 184 case GK_GFX700: 185 Features["ci-insts"] = true; 186 Features["flat-address-space"] = true; 187 LLVM_FALLTHROUGH; 188 case GK_GFX601: 189 case GK_GFX600: 190 break; 191 case GK_NONE: 192 break; 193 default: 194 llvm_unreachable("Unhandled GPU!"); 195 } 196 } else { 197 if (CPU.empty()) 198 CPU = "r600"; 199 200 switch (llvm::AMDGPU::parseArchR600(CPU)) { 201 case GK_CAYMAN: 202 case GK_CYPRESS: 203 case GK_RV770: 204 case GK_RV670: 205 // TODO: Add fp64 when implemented. 206 break; 207 case GK_TURKS: 208 case GK_CAICOS: 209 case GK_BARTS: 210 case GK_SUMO: 211 case GK_REDWOOD: 212 case GK_JUNIPER: 213 case GK_CEDAR: 214 case GK_RV730: 215 case GK_RV710: 216 case GK_RS880: 217 case GK_R630: 218 case GK_R600: 219 break; 220 default: 221 llvm_unreachable("Unhandled GPU!"); 222 } 223 } 224 225 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 226 } 227 228 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 229 TargetOptions &TargetOpts) const { 230 bool hasFP32Denormals = false; 231 bool hasFP64Denormals = false; 232 233 for (auto &I : TargetOpts.FeaturesAsWritten) { 234 if (I == "+fp32-denormals" || I == "-fp32-denormals") 235 hasFP32Denormals = true; 236 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 237 hasFP64Denormals = true; 238 } 239 if (!hasFP32Denormals) 240 TargetOpts.Features.push_back( 241 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 242 ? '+' : '-') + Twine("fp32-denormals")) 243 .str()); 244 // Always do not flush fp64 or fp16 denorms. 245 if (!hasFP64Denormals && hasFP64()) 246 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 247 } 248 249 void AMDGPUTargetInfo::fillValidCPUList( 250 SmallVectorImpl<StringRef> &Values) const { 251 if (isAMDGCN(getTriple())) 252 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 253 else 254 llvm::AMDGPU::fillValidArchListR600(Values); 255 } 256 257 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 258 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 259 } 260 261 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 262 const TargetOptions &Opts) 263 : TargetInfo(Triple), 264 GPUKind(isAMDGCN(Triple) ? 265 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 266 llvm::AMDGPU::parseArchR600(Opts.CPU)), 267 GPUFeatures(isAMDGCN(Triple) ? 268 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 269 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 270 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 271 : DataLayoutStringR600); 272 assert(DataLayout->getAllocaAddrSpace() == Private); 273 274 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 275 !isAMDGCN(Triple)); 276 UseAddrSpaceMapMangling = true; 277 278 HasLegalHalfType = true; 279 HasFloat16 = true; 280 281 // Set pointer width and alignment for target address space 0. 282 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 283 if (getMaxPointerWidth() == 64) { 284 LongWidth = LongAlign = 64; 285 SizeType = UnsignedLong; 286 PtrDiffType = SignedLong; 287 IntPtrType = SignedLong; 288 } 289 290 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 291 } 292 293 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 294 TargetInfo::adjust(Opts); 295 // ToDo: There are still a few places using default address space as private 296 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 297 // can be removed from the following line. 298 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 299 !isAMDGCN(getTriple())); 300 } 301 302 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 303 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 304 Builtin::FirstTSBuiltin); 305 } 306 307 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 308 MacroBuilder &Builder) const { 309 Builder.defineMacro("__AMD__"); 310 Builder.defineMacro("__AMDGPU__"); 311 312 if (isAMDGCN(getTriple())) 313 Builder.defineMacro("__AMDGCN__"); 314 else 315 Builder.defineMacro("__R600__"); 316 317 if (GPUKind != llvm::AMDGPU::GK_NONE) { 318 StringRef CanonName = isAMDGCN(getTriple()) ? 319 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 320 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 321 } 322 323 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 324 // removed in the near future. 325 if (hasFMAF()) 326 Builder.defineMacro("__HAS_FMAF__"); 327 if (hasFastFMAF()) 328 Builder.defineMacro("FP_FAST_FMAF"); 329 if (hasLDEXPF()) 330 Builder.defineMacro("__HAS_LDEXPF__"); 331 if (hasFP64()) 332 Builder.defineMacro("__HAS_FP64__"); 333 if (hasFastFMA()) 334 Builder.defineMacro("FP_FAST_FMA"); 335 } 336 337 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 338 assert(HalfFormat == Aux->HalfFormat); 339 assert(FloatFormat == Aux->FloatFormat); 340 assert(DoubleFormat == Aux->DoubleFormat); 341 342 // On x86_64 long double is 80-bit extended precision format, which is 343 // not supported by AMDGPU. 128-bit floating point format is also not 344 // supported by AMDGPU. Therefore keep its own format for these two types. 345 auto SaveLongDoubleFormat = LongDoubleFormat; 346 auto SaveFloat128Format = Float128Format; 347 copyAuxTarget(Aux); 348 LongDoubleFormat = SaveLongDoubleFormat; 349 Float128Format = SaveFloat128Format; 350 } 351