1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file a TargetTransformInfo::Concept conforming object specific to the 11 /// AMDGPU target machine. It uses the target's detailed information to 12 /// provide more precise answers to certain TTI queries, while letting the 13 /// target independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 19 20 #include "AMDGPU.h" 21 #include "AMDGPUSubtarget.h" 22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23 #include "llvm/CodeGen/BasicTTIImpl.h" 24 25 namespace llvm { 26 27 class AMDGPUTargetLowering; 28 class GCNSubtarget; 29 class InstCombiner; 30 class Loop; 31 class R600Subtarget; 32 class ScalarEvolution; 33 class SITargetLowering; 34 class Type; 35 class Value; 36 37 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { 38 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>; 39 using TTI = TargetTransformInfo; 40 41 friend BaseT; 42 43 Triple TargetTriple; 44 45 const TargetSubtargetInfo *ST; 46 const TargetLoweringBase *TLI; 47 48 const TargetSubtargetInfo *getST() const { return ST; } 49 const TargetLoweringBase *getTLI() const { return TLI; } 50 51 public: 52 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 53 54 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 55 TTI::UnrollingPreferences &UP); 56 57 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 58 TTI::PeelingPreferences &PP); 59 }; 60 61 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { 62 using BaseT = BasicTTIImplBase<GCNTTIImpl>; 63 using TTI = TargetTransformInfo; 64 65 friend BaseT; 66 67 const GCNSubtarget *ST; 68 const SITargetLowering *TLI; 69 AMDGPUTTIImpl CommonTTI; 70 bool IsGraphics; 71 bool HasFP32Denormals; 72 bool HasFP64FP16Denormals; 73 unsigned MaxVGPRs; 74 75 static const FeatureBitset InlineFeatureIgnoreList; 76 77 const GCNSubtarget *getST() const { return ST; } 78 const SITargetLowering *getTLI() const { return TLI; } 79 80 static inline int getFullRateInstrCost() { 81 return TargetTransformInfo::TCC_Basic; 82 } 83 84 static inline int getHalfRateInstrCost( 85 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) { 86 return CostKind == TTI::TCK_CodeSize ? 2 87 : 2 * TargetTransformInfo::TCC_Basic; 88 } 89 90 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe 91 // should be 2 or 4. 92 static inline int getQuarterRateInstrCost( 93 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) { 94 return CostKind == TTI::TCK_CodeSize ? 2 95 : 4 * TargetTransformInfo::TCC_Basic; 96 } 97 98 // On some parts, normal fp64 operations are half rate, and others 99 // quarter. This also applies to some integer operations. 100 int get64BitInstrCost( 101 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; 102 103 public: 104 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 105 106 bool hasBranchDivergence() { return true; } 107 bool useGPUDivergenceAnalysis() const; 108 109 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 110 TTI::UnrollingPreferences &UP); 111 112 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 113 TTI::PeelingPreferences &PP); 114 115 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { 116 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 117 return TTI::PSK_FastHardware; 118 } 119 120 unsigned getHardwareNumberOfRegisters(bool Vector) const; 121 unsigned getNumberOfRegisters(bool Vector) const; 122 unsigned getNumberOfRegisters(unsigned RCID) const; 123 unsigned getRegisterBitWidth(bool Vector) const; 124 unsigned getMinVectorRegisterBitWidth() const; 125 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; 126 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 127 unsigned ChainSizeInBytes, 128 VectorType *VecTy) const; 129 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 130 unsigned ChainSizeInBytes, 131 VectorType *VecTy) const; 132 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 133 134 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, 135 unsigned AddrSpace) const; 136 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 137 unsigned AddrSpace) const; 138 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 139 unsigned AddrSpace) const; 140 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 141 unsigned SrcAddrSpace, unsigned DestAddrSpace, 142 unsigned SrcAlign, unsigned DestAlign) const; 143 144 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, 145 LLVMContext &Context, 146 unsigned RemainingBytes, 147 unsigned SrcAddrSpace, 148 unsigned DestAddrSpace, 149 unsigned SrcAlign, 150 unsigned DestAlign) const; 151 unsigned getMaxInterleaveFactor(unsigned VF); 152 153 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 154 155 int getArithmeticInstrCost( 156 unsigned Opcode, Type *Ty, 157 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 158 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, 159 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, 160 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 161 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 162 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 163 const Instruction *CxtI = nullptr); 164 165 unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); 166 167 bool isInlineAsmSourceOfDivergence(const CallInst *CI, 168 ArrayRef<unsigned> Indices = {}) const; 169 170 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); 171 bool isSourceOfDivergence(const Value *V) const; 172 bool isAlwaysUniform(const Value *V) const; 173 174 unsigned getFlatAddressSpace() const { 175 // Don't bother running InferAddressSpaces pass on graphics shaders which 176 // don't use flat addressing. 177 if (IsGraphics) 178 return -1; 179 return AMDGPUAS::FLAT_ADDRESS; 180 } 181 182 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 183 Intrinsic::ID IID) const; 184 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 185 Value *NewV) const; 186 187 bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, 188 InstCombiner &IC) const; 189 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 190 IntrinsicInst &II) const; 191 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 192 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 193 APInt &UndefElts2, APInt &UndefElts3, 194 std::function<void(Instruction *, unsigned, APInt, APInt &)> 195 SimplifyAndSetOp) const; 196 197 unsigned getVectorSplitCost() { return 0; } 198 199 unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, 200 VectorType *SubTp); 201 202 bool areInlineCompatible(const Function *Caller, 203 const Function *Callee) const; 204 205 unsigned getInliningThresholdMultiplier() { return 11; } 206 unsigned adjustInliningThreshold(const CallBase *CB) const; 207 208 int getInlinerVectorBonusPercent() { return 0; } 209 210 int getArithmeticReductionCost( 211 unsigned Opcode, 212 VectorType *Ty, 213 bool IsPairwise, 214 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); 215 216 int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 217 TTI::TargetCostKind CostKind); 218 int getMinMaxReductionCost( 219 VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, 220 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); 221 }; 222 223 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> { 224 using BaseT = BasicTTIImplBase<R600TTIImpl>; 225 using TTI = TargetTransformInfo; 226 227 friend BaseT; 228 229 const R600Subtarget *ST; 230 const AMDGPUTargetLowering *TLI; 231 AMDGPUTTIImpl CommonTTI; 232 233 public: 234 explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 235 236 const R600Subtarget *getST() const { return ST; } 237 const AMDGPUTargetLowering *getTLI() const { return TLI; } 238 239 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 240 TTI::UnrollingPreferences &UP); 241 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 242 TTI::PeelingPreferences &PP); 243 unsigned getHardwareNumberOfRegisters(bool Vec) const; 244 unsigned getNumberOfRegisters(bool Vec) const; 245 unsigned getRegisterBitWidth(bool Vector) const; 246 unsigned getMinVectorRegisterBitWidth() const; 247 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 248 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, 249 unsigned AddrSpace) const; 250 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 251 unsigned AddrSpace) const; 252 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 253 unsigned AddrSpace) const; 254 unsigned getMaxInterleaveFactor(unsigned VF); 255 unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); 256 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); 257 }; 258 259 } // end namespace llvm 260 261 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 262