1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file a TargetTransformInfo::Concept conforming object specific to the 11 /// AMDGPU target machine. It uses the target's detailed information to 12 /// provide more precise answers to certain TTI queries, while letting the 13 /// target independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 19 20 #include "AMDGPU.h" 21 #include "AMDGPUSubtarget.h" 22 #include "AMDGPUTargetMachine.h" 23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 24 #include "Utils/AMDGPUBaseInfo.h" 25 #include "llvm/ADT/ArrayRef.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/CodeGen/BasicTTIImpl.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/MC/SubtargetFeature.h" 30 #include "llvm/Support/MathExtras.h" 31 #include <cassert> 32 33 namespace llvm { 34 35 class AMDGPUTargetLowering; 36 class Loop; 37 class ScalarEvolution; 38 class Type; 39 class Value; 40 41 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { 42 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>; 43 using TTI = TargetTransformInfo; 44 45 friend BaseT; 46 47 Triple TargetTriple; 48 49 const TargetSubtargetInfo *ST; 50 const TargetLoweringBase *TLI; 51 52 const TargetSubtargetInfo *getST() const { return ST; } 53 const TargetLoweringBase *getTLI() const { return TLI; } 54 55 public: 56 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) 57 : BaseT(TM, F.getParent()->getDataLayout()), 58 TargetTriple(TM->getTargetTriple()), 59 ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))), 60 TLI(ST->getTargetLowering()) {} 61 62 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 63 TTI::UnrollingPreferences &UP); 64 }; 65 66 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { 67 using BaseT = BasicTTIImplBase<GCNTTIImpl>; 68 using TTI = TargetTransformInfo; 69 70 friend BaseT; 71 72 const GCNSubtarget *ST; 73 const AMDGPUTargetLowering *TLI; 74 AMDGPUTTIImpl CommonTTI; 75 bool IsGraphicsShader; 76 77 const FeatureBitset InlineFeatureIgnoreList = { 78 // Codegen control options which don't matter. 79 AMDGPU::FeatureEnableLoadStoreOpt, 80 AMDGPU::FeatureEnableSIScheduler, 81 AMDGPU::FeatureEnableUnsafeDSOffsetFolding, 82 AMDGPU::FeatureFlatForGlobal, 83 AMDGPU::FeaturePromoteAlloca, 84 AMDGPU::FeatureUnalignedBufferAccess, 85 AMDGPU::FeatureUnalignedScratchAccess, 86 87 AMDGPU::FeatureAutoWaitcntBeforeBarrier, 88 89 // Property of the kernel/environment which can't actually differ. 90 AMDGPU::FeatureSGPRInitBug, 91 AMDGPU::FeatureXNACK, 92 AMDGPU::FeatureTrapHandler, 93 AMDGPU::FeatureCodeObjectV3, 94 95 // The default assumption needs to be ecc is enabled, but no directly 96 // exposed operations depend on it, so it can be safely inlined. 97 AMDGPU::FeatureSRAMECC, 98 99 // Perf-tuning features 100 AMDGPU::FeatureFastFMAF32, 101 AMDGPU::HalfRate64Ops 102 }; 103 104 const GCNSubtarget *getST() const { return ST; } 105 const AMDGPUTargetLowering *getTLI() const { return TLI; } 106 107 static inline int getFullRateInstrCost() { 108 return TargetTransformInfo::TCC_Basic; 109 } 110 111 static inline int getHalfRateInstrCost() { 112 return 2 * TargetTransformInfo::TCC_Basic; 113 } 114 115 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe 116 // should be 2 or 4. 117 static inline int getQuarterRateInstrCost() { 118 return 3 * TargetTransformInfo::TCC_Basic; 119 } 120 121 // On some parts, normal fp64 operations are half rate, and others 122 // quarter. This also applies to some integer operations. 123 inline int get64BitInstrCost() const { 124 return ST->hasHalfRate64Ops() ? 125 getHalfRateInstrCost() : getQuarterRateInstrCost(); 126 } 127 128 public: 129 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) 130 : BaseT(TM, F.getParent()->getDataLayout()), 131 ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))), 132 TLI(ST->getTargetLowering()), 133 CommonTTI(TM, F), 134 IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {} 135 136 bool hasBranchDivergence() { return true; } 137 138 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 139 TTI::UnrollingPreferences &UP); 140 141 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { 142 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 143 return TTI::PSK_FastHardware; 144 } 145 146 unsigned getHardwareNumberOfRegisters(bool Vector) const; 147 unsigned getNumberOfRegisters(bool Vector) const; 148 unsigned getRegisterBitWidth(bool Vector) const; 149 unsigned getMinVectorRegisterBitWidth() const; 150 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 151 unsigned ChainSizeInBytes, 152 VectorType *VecTy) const; 153 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 154 unsigned ChainSizeInBytes, 155 VectorType *VecTy) const; 156 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 157 158 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, 159 unsigned Alignment, 160 unsigned AddrSpace) const; 161 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 162 unsigned Alignment, 163 unsigned AddrSpace) const; 164 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 165 unsigned Alignment, 166 unsigned AddrSpace) const; 167 168 unsigned getMaxInterleaveFactor(unsigned VF); 169 170 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 171 172 int getArithmeticInstrCost( 173 unsigned Opcode, Type *Ty, 174 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, 175 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, 176 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 177 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 178 ArrayRef<const Value *> Args = ArrayRef<const Value *>()); 179 180 unsigned getCFInstrCost(unsigned Opcode); 181 182 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); 183 bool isSourceOfDivergence(const Value *V) const; 184 bool isAlwaysUniform(const Value *V) const; 185 186 unsigned getFlatAddressSpace() const { 187 // Don't bother running InferAddressSpaces pass on graphics shaders which 188 // don't use flat addressing. 189 if (IsGraphicsShader) 190 return -1; 191 return AMDGPUAS::FLAT_ADDRESS; 192 } 193 194 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 195 Intrinsic::ID IID) const; 196 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, 197 Value *OldV, Value *NewV) const; 198 199 unsigned getVectorSplitCost() { return 0; } 200 201 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, 202 Type *SubTp); 203 204 bool areInlineCompatible(const Function *Caller, 205 const Function *Callee) const; 206 207 unsigned getInliningThresholdMultiplier() { return 9; } 208 209 int getInlinerVectorBonusPercent() { return 0; } 210 211 int getArithmeticReductionCost(unsigned Opcode, 212 Type *Ty, 213 bool IsPairwise); 214 int getMinMaxReductionCost(Type *Ty, Type *CondTy, 215 bool IsPairwiseForm, 216 bool IsUnsigned); 217 unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands); 218 }; 219 220 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> { 221 using BaseT = BasicTTIImplBase<R600TTIImpl>; 222 using TTI = TargetTransformInfo; 223 224 friend BaseT; 225 226 const R600Subtarget *ST; 227 const AMDGPUTargetLowering *TLI; 228 AMDGPUTTIImpl CommonTTI; 229 230 public: 231 explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) 232 : BaseT(TM, F.getParent()->getDataLayout()), 233 ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))), 234 TLI(ST->getTargetLowering()), 235 CommonTTI(TM, F) {} 236 237 const R600Subtarget *getST() const { return ST; } 238 const AMDGPUTargetLowering *getTLI() const { return TLI; } 239 240 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 241 TTI::UnrollingPreferences &UP); 242 unsigned getHardwareNumberOfRegisters(bool Vec) const; 243 unsigned getNumberOfRegisters(bool Vec) const; 244 unsigned getRegisterBitWidth(bool Vector) const; 245 unsigned getMinVectorRegisterBitWidth() const; 246 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 247 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment, 248 unsigned AddrSpace) const; 249 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 250 unsigned Alignment, 251 unsigned AddrSpace) const; 252 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 253 unsigned Alignment, 254 unsigned AddrSpace) const; 255 unsigned getMaxInterleaveFactor(unsigned VF); 256 unsigned getCFInstrCost(unsigned Opcode); 257 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); 258 }; 259 260 } // end namespace llvm 261 262 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 263