1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/IR/DataLayout.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 39 "-ni:7"; 40 41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 42 Generic, // Default 43 Global, // opencl_global 44 Local, // opencl_local 45 Constant, // opencl_constant 46 Private, // opencl_private 47 Generic, // opencl_generic 48 Global, // cuda_device 49 Constant, // cuda_constant 50 Local, // cuda_shared 51 Generic, // ptr32_sptr 52 Generic, // ptr32_uptr 53 Generic // ptr64 54 }; 55 56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 57 Private, // Default 58 Global, // opencl_global 59 Local, // opencl_local 60 Constant, // opencl_constant 61 Private, // opencl_private 62 Generic, // opencl_generic 63 Global, // cuda_device 64 Constant, // cuda_constant 65 Local, // cuda_shared 66 Generic, // ptr32_sptr 67 Generic, // ptr32_uptr 68 Generic // ptr64 69 70 }; 71 } // namespace targets 72 } // namespace clang 73 74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 75 #define BUILTIN(ID, TYPE, ATTRS) \ 76 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 78 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 79 #include "clang/Basic/BuiltinsAMDGPU.def" 80 }; 81 82 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 83 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 84 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 85 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 86 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 87 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 88 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 89 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 90 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 91 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 92 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 93 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 94 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 95 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 96 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 97 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 98 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 99 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 100 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 101 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 102 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 103 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 104 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 105 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 106 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 107 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 108 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 109 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 110 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 111 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 112 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 113 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 114 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 115 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 116 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 117 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 118 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 119 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 120 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 121 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 122 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 123 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 124 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 125 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 126 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 127 "flat_scratch_lo", "flat_scratch_hi" 128 }; 129 130 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 131 return llvm::makeArrayRef(GCCRegNames); 132 } 133 134 bool AMDGPUTargetInfo::initFeatureMap( 135 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 136 const std::vector<std::string> &FeatureVec) const { 137 138 using namespace llvm::AMDGPU; 139 140 // XXX - What does the member GPU mean if device name string passed here? 141 if (isAMDGCN(getTriple())) { 142 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 143 case GK_GFX1012: 144 case GK_GFX1011: 145 Features["dot1-insts"] = true; 146 Features["dot2-insts"] = true; 147 Features["dot5-insts"] = true; 148 Features["dot6-insts"] = true; 149 LLVM_FALLTHROUGH; 150 case GK_GFX1010: 151 Features["dl-insts"] = true; 152 Features["ci-insts"] = true; 153 Features["flat-address-space"] = true; 154 Features["16-bit-insts"] = true; 155 Features["dpp"] = true; 156 Features["gfx8-insts"] = true; 157 Features["gfx9-insts"] = true; 158 Features["gfx10-insts"] = true; 159 Features["s-memrealtime"] = true; 160 break; 161 case GK_GFX908: 162 Features["dot3-insts"] = true; 163 Features["dot4-insts"] = true; 164 Features["dot5-insts"] = true; 165 Features["dot6-insts"] = true; 166 LLVM_FALLTHROUGH; 167 case GK_GFX906: 168 Features["dl-insts"] = true; 169 Features["dot1-insts"] = true; 170 Features["dot2-insts"] = true; 171 LLVM_FALLTHROUGH; 172 case GK_GFX909: 173 case GK_GFX904: 174 case GK_GFX902: 175 case GK_GFX900: 176 Features["gfx9-insts"] = true; 177 LLVM_FALLTHROUGH; 178 case GK_GFX810: 179 case GK_GFX803: 180 case GK_GFX802: 181 case GK_GFX801: 182 Features["gfx8-insts"] = true; 183 Features["16-bit-insts"] = true; 184 Features["dpp"] = true; 185 Features["s-memrealtime"] = true; 186 LLVM_FALLTHROUGH; 187 case GK_GFX704: 188 case GK_GFX703: 189 case GK_GFX702: 190 case GK_GFX701: 191 case GK_GFX700: 192 Features["ci-insts"] = true; 193 Features["flat-address-space"] = true; 194 LLVM_FALLTHROUGH; 195 case GK_GFX601: 196 case GK_GFX600: 197 break; 198 case GK_NONE: 199 break; 200 default: 201 llvm_unreachable("Unhandled GPU!"); 202 } 203 } else { 204 if (CPU.empty()) 205 CPU = "r600"; 206 207 switch (llvm::AMDGPU::parseArchR600(CPU)) { 208 case GK_CAYMAN: 209 case GK_CYPRESS: 210 case GK_RV770: 211 case GK_RV670: 212 // TODO: Add fp64 when implemented. 213 break; 214 case GK_TURKS: 215 case GK_CAICOS: 216 case GK_BARTS: 217 case GK_SUMO: 218 case GK_REDWOOD: 219 case GK_JUNIPER: 220 case GK_CEDAR: 221 case GK_RV730: 222 case GK_RV710: 223 case GK_RS880: 224 case GK_R630: 225 case GK_R600: 226 break; 227 default: 228 llvm_unreachable("Unhandled GPU!"); 229 } 230 } 231 232 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 233 } 234 235 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 236 TargetOptions &TargetOpts) const { 237 bool hasFP32Denormals = false; 238 bool hasFP64Denormals = false; 239 240 for (auto &I : TargetOpts.FeaturesAsWritten) { 241 if (I == "+fp32-denormals" || I == "-fp32-denormals") 242 hasFP32Denormals = true; 243 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 244 hasFP64Denormals = true; 245 } 246 if (!hasFP32Denormals) 247 TargetOpts.Features.push_back( 248 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 249 ? '+' : '-') + Twine("fp32-denormals")) 250 .str()); 251 // Always do not flush fp64 or fp16 denorms. 252 if (!hasFP64Denormals && hasFP64()) 253 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 254 } 255 256 void AMDGPUTargetInfo::fillValidCPUList( 257 SmallVectorImpl<StringRef> &Values) const { 258 if (isAMDGCN(getTriple())) 259 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 260 else 261 llvm::AMDGPU::fillValidArchListR600(Values); 262 } 263 264 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 265 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 266 } 267 268 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 269 const TargetOptions &Opts) 270 : TargetInfo(Triple), 271 GPUKind(isAMDGCN(Triple) ? 272 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 273 llvm::AMDGPU::parseArchR600(Opts.CPU)), 274 GPUFeatures(isAMDGCN(Triple) ? 275 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 276 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 277 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 278 : DataLayoutStringR600); 279 assert(DataLayout->getAllocaAddrSpace() == Private); 280 281 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 282 !isAMDGCN(Triple)); 283 UseAddrSpaceMapMangling = true; 284 285 HasLegalHalfType = true; 286 HasFloat16 = true; 287 288 // Set pointer width and alignment for target address space 0. 289 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 290 if (getMaxPointerWidth() == 64) { 291 LongWidth = LongAlign = 64; 292 SizeType = UnsignedLong; 293 PtrDiffType = SignedLong; 294 IntPtrType = SignedLong; 295 } 296 297 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 298 } 299 300 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 301 TargetInfo::adjust(Opts); 302 // ToDo: There are still a few places using default address space as private 303 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 304 // can be removed from the following line. 305 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 306 !isAMDGCN(getTriple())); 307 } 308 309 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 310 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 311 Builtin::FirstTSBuiltin); 312 } 313 314 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 315 MacroBuilder &Builder) const { 316 Builder.defineMacro("__AMD__"); 317 Builder.defineMacro("__AMDGPU__"); 318 319 if (isAMDGCN(getTriple())) 320 Builder.defineMacro("__AMDGCN__"); 321 else 322 Builder.defineMacro("__R600__"); 323 324 if (GPUKind != llvm::AMDGPU::GK_NONE) { 325 StringRef CanonName = isAMDGCN(getTriple()) ? 326 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 327 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 328 } 329 330 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 331 // removed in the near future. 332 if (hasFMAF()) 333 Builder.defineMacro("__HAS_FMAF__"); 334 if (hasFastFMAF()) 335 Builder.defineMacro("FP_FAST_FMAF"); 336 if (hasLDEXPF()) 337 Builder.defineMacro("__HAS_LDEXPF__"); 338 if (hasFP64()) 339 Builder.defineMacro("__HAS_FP64__"); 340 if (hasFastFMA()) 341 Builder.defineMacro("FP_FAST_FMA"); 342 } 343 344 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 345 assert(HalfFormat == Aux->HalfFormat); 346 assert(FloatFormat == Aux->FloatFormat); 347 assert(DoubleFormat == Aux->DoubleFormat); 348 349 // On x86_64 long double is 80-bit extended precision format, which is 350 // not supported by AMDGPU. 128-bit floating point format is also not 351 // supported by AMDGPU. Therefore keep its own format for these two types. 352 auto SaveLongDoubleFormat = LongDoubleFormat; 353 auto SaveFloat128Format = Float128Format; 354 copyAuxTarget(Aux); 355 LongDoubleFormat = SaveLongDoubleFormat; 356 Float128Format = SaveFloat128Format; 357 } 358