1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 38 "-ni:7"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 using namespace llvm::AMDGPU; 131 132 // XXX - What does the member GPU mean if device name string passed here? 133 if (isAMDGCN(getTriple())) { 134 if (CPU.empty()) 135 CPU = "gfx600"; 136 137 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 138 case GK_GFX1012: 139 case GK_GFX1011: 140 Features["dot1-insts"] = true; 141 Features["dot2-insts"] = true; 142 Features["dot5-insts"] = true; 143 Features["dot6-insts"] = true; 144 LLVM_FALLTHROUGH; 145 case GK_GFX1010: 146 Features["dl-insts"] = true; 147 Features["ci-insts"] = true; 148 Features["16-bit-insts"] = true; 149 Features["dpp"] = true; 150 Features["gfx8-insts"] = true; 151 Features["gfx9-insts"] = true; 152 Features["gfx10-insts"] = true; 153 Features["s-memrealtime"] = true; 154 break; 155 case GK_GFX908: 156 Features["dot3-insts"] = true; 157 Features["dot4-insts"] = true; 158 Features["dot5-insts"] = true; 159 Features["dot6-insts"] = true; 160 LLVM_FALLTHROUGH; 161 case GK_GFX906: 162 Features["dl-insts"] = true; 163 Features["dot1-insts"] = true; 164 Features["dot2-insts"] = true; 165 LLVM_FALLTHROUGH; 166 case GK_GFX909: 167 case GK_GFX904: 168 case GK_GFX902: 169 case GK_GFX900: 170 Features["gfx9-insts"] = true; 171 LLVM_FALLTHROUGH; 172 case GK_GFX810: 173 case GK_GFX803: 174 case GK_GFX802: 175 case GK_GFX801: 176 Features["gfx8-insts"] = true; 177 Features["16-bit-insts"] = true; 178 Features["dpp"] = true; 179 Features["s-memrealtime"] = true; 180 LLVM_FALLTHROUGH; 181 case GK_GFX704: 182 case GK_GFX703: 183 case GK_GFX702: 184 case GK_GFX701: 185 case GK_GFX700: 186 Features["ci-insts"] = true; 187 LLVM_FALLTHROUGH; 188 case GK_GFX601: 189 case GK_GFX600: 190 break; 191 case GK_NONE: 192 return false; 193 default: 194 llvm_unreachable("Unhandled GPU!"); 195 } 196 } else { 197 if (CPU.empty()) 198 CPU = "r600"; 199 200 switch (llvm::AMDGPU::parseArchR600(CPU)) { 201 case GK_CAYMAN: 202 case GK_CYPRESS: 203 case GK_RV770: 204 case GK_RV670: 205 // TODO: Add fp64 when implemented. 206 break; 207 case GK_TURKS: 208 case GK_CAICOS: 209 case GK_BARTS: 210 case GK_SUMO: 211 case GK_REDWOOD: 212 case GK_JUNIPER: 213 case GK_CEDAR: 214 case GK_RV730: 215 case GK_RV710: 216 case GK_RS880: 217 case GK_R630: 218 case GK_R600: 219 break; 220 default: 221 llvm_unreachable("Unhandled GPU!"); 222 } 223 } 224 225 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 226 } 227 228 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 229 TargetOptions &TargetOpts) const { 230 bool hasFP32Denormals = false; 231 bool hasFP64Denormals = false; 232 233 for (auto &I : TargetOpts.FeaturesAsWritten) { 234 if (I == "+fp32-denormals" || I == "-fp32-denormals") 235 hasFP32Denormals = true; 236 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 237 hasFP64Denormals = true; 238 } 239 if (!hasFP32Denormals) 240 TargetOpts.Features.push_back( 241 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 242 ? '+' : '-') + Twine("fp32-denormals")) 243 .str()); 244 // Always do not flush fp64 or fp16 denorms. 245 if (!hasFP64Denormals && hasFP64()) 246 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 247 } 248 249 void AMDGPUTargetInfo::fillValidCPUList( 250 SmallVectorImpl<StringRef> &Values) const { 251 if (isAMDGCN(getTriple())) 252 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 253 else 254 llvm::AMDGPU::fillValidArchListR600(Values); 255 } 256 257 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 258 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 259 } 260 261 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 262 const TargetOptions &Opts) 263 : TargetInfo(Triple), 264 GPUKind(isAMDGCN(Triple) ? 265 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 266 llvm::AMDGPU::parseArchR600(Opts.CPU)), 267 GPUFeatures(isAMDGCN(Triple) ? 268 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 269 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 270 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 271 : DataLayoutStringR600); 272 assert(DataLayout->getAllocaAddrSpace() == Private); 273 274 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 275 !isAMDGCN(Triple)); 276 UseAddrSpaceMapMangling = true; 277 278 HasLegalHalfType = true; 279 HasFloat16 = true; 280 281 // Set pointer width and alignment for target address space 0. 282 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 283 if (getMaxPointerWidth() == 64) { 284 LongWidth = LongAlign = 64; 285 SizeType = UnsignedLong; 286 PtrDiffType = SignedLong; 287 IntPtrType = SignedLong; 288 } 289 290 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 291 } 292 293 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 294 TargetInfo::adjust(Opts); 295 // ToDo: There are still a few places using default address space as private 296 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 297 // can be removed from the following line. 298 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 299 !isAMDGCN(getTriple())); 300 } 301 302 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 303 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 304 Builtin::FirstTSBuiltin); 305 } 306 307 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 308 MacroBuilder &Builder) const { 309 Builder.defineMacro("__AMD__"); 310 Builder.defineMacro("__AMDGPU__"); 311 312 if (isAMDGCN(getTriple())) 313 Builder.defineMacro("__AMDGCN__"); 314 else 315 Builder.defineMacro("__R600__"); 316 317 if (GPUKind != llvm::AMDGPU::GK_NONE) { 318 StringRef CanonName = isAMDGCN(getTriple()) ? 319 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 320 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 321 } 322 323 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 324 // removed in the near future. 325 if (hasFMAF()) 326 Builder.defineMacro("__HAS_FMAF__"); 327 if (hasFastFMAF()) 328 Builder.defineMacro("FP_FAST_FMAF"); 329 if (hasLDEXPF()) 330 Builder.defineMacro("__HAS_LDEXPF__"); 331 if (hasFP64()) 332 Builder.defineMacro("__HAS_FP64__"); 333 if (hasFastFMA()) 334 Builder.defineMacro("FP_FAST_FMA"); 335 } 336 337 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 338 assert(HalfFormat == Aux->HalfFormat); 339 assert(FloatFormat == Aux->FloatFormat); 340 assert(DoubleFormat == Aux->DoubleFormat); 341 342 // On x86_64 long double is 80-bit extended precision format, which is 343 // not supported by AMDGPU. 128-bit floating point format is also not 344 // supported by AMDGPU. Therefore keep its own format for these two types. 345 auto SaveLongDoubleFormat = LongDoubleFormat; 346 auto SaveFloat128Format = Float128Format; 347 copyAuxTarget(Aux); 348 LongDoubleFormat = SaveLongDoubleFormat; 349 Float128Format = SaveFloat128Format; 350 } 351