1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file a TargetTransformInfo::Concept conforming object specific to the 11 /// AMDGPU target machine. It uses the target's detailed information to 12 /// provide more precise answers to certain TTI queries, while letting the 13 /// target independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 19 20 #include "AMDGPU.h" 21 #include "AMDGPUSubtarget.h" 22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23 #include "llvm/CodeGen/BasicTTIImpl.h" 24 25 namespace llvm { 26 27 class AMDGPUTargetLowering; 28 class AMDGPUTargetMachine; 29 class GCNSubtarget; 30 class InstCombiner; 31 class Loop; 32 class R600Subtarget; 33 class ScalarEvolution; 34 class SITargetLowering; 35 class Type; 36 class Value; 37 38 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { 39 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>; 40 using TTI = TargetTransformInfo; 41 42 friend BaseT; 43 44 Triple TargetTriple; 45 46 const TargetSubtargetInfo *ST; 47 const TargetLoweringBase *TLI; 48 49 const TargetSubtargetInfo *getST() const { return ST; } 50 const TargetLoweringBase *getTLI() const { return TLI; } 51 52 public: 53 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 54 55 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 56 TTI::UnrollingPreferences &UP); 57 58 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 59 TTI::PeelingPreferences &PP); 60 }; 61 62 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { 63 using BaseT = BasicTTIImplBase<GCNTTIImpl>; 64 using TTI = TargetTransformInfo; 65 66 friend BaseT; 67 68 const GCNSubtarget *ST; 69 const SITargetLowering *TLI; 70 AMDGPUTTIImpl CommonTTI; 71 bool IsGraphics; 72 bool HasFP32Denormals; 73 bool HasFP64FP16Denormals; 74 unsigned MaxVGPRs; 75 76 static const FeatureBitset InlineFeatureIgnoreList; 77 78 const GCNSubtarget *getST() const { return ST; } 79 const SITargetLowering *getTLI() const { return TLI; } 80 81 static inline int getFullRateInstrCost() { 82 return TargetTransformInfo::TCC_Basic; 83 } 84 85 static inline int getHalfRateInstrCost( 86 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) { 87 return CostKind == TTI::TCK_CodeSize ? 2 88 : 2 * TargetTransformInfo::TCC_Basic; 89 } 90 91 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe 92 // should be 2 or 4. 93 static inline int getQuarterRateInstrCost( 94 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) { 95 return CostKind == TTI::TCK_CodeSize ? 2 96 : 4 * TargetTransformInfo::TCC_Basic; 97 } 98 99 // On some parts, normal fp64 operations are half rate, and others 100 // quarter. This also applies to some integer operations. 101 int get64BitInstrCost( 102 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; 103 104 public: 105 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 106 107 bool hasBranchDivergence() { return true; } 108 bool useGPUDivergenceAnalysis() const; 109 110 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 111 TTI::UnrollingPreferences &UP); 112 113 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 114 TTI::PeelingPreferences &PP); 115 116 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { 117 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 118 return TTI::PSK_FastHardware; 119 } 120 121 unsigned getHardwareNumberOfRegisters(bool Vector) const; 122 unsigned getNumberOfRegisters(bool Vector) const; 123 unsigned getNumberOfRegisters(unsigned RCID) const; 124 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const; 125 unsigned getMinVectorRegisterBitWidth() const; 126 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; 127 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 128 unsigned ChainSizeInBytes, 129 VectorType *VecTy) const; 130 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 131 unsigned ChainSizeInBytes, 132 VectorType *VecTy) const; 133 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 134 135 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, 136 unsigned AddrSpace) const; 137 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 138 unsigned AddrSpace) const; 139 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 140 unsigned AddrSpace) const; 141 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 142 unsigned SrcAddrSpace, unsigned DestAddrSpace, 143 unsigned SrcAlign, unsigned DestAlign) const; 144 145 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, 146 LLVMContext &Context, 147 unsigned RemainingBytes, 148 unsigned SrcAddrSpace, 149 unsigned DestAddrSpace, 150 unsigned SrcAlign, 151 unsigned DestAlign) const; 152 unsigned getMaxInterleaveFactor(unsigned VF); 153 154 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 155 156 InstructionCost getArithmeticInstrCost( 157 unsigned Opcode, Type *Ty, 158 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 159 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, 160 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, 161 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 162 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 163 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 164 const Instruction *CxtI = nullptr); 165 166 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 167 const Instruction *I = nullptr); 168 169 bool isInlineAsmSourceOfDivergence(const CallInst *CI, 170 ArrayRef<unsigned> Indices = {}) const; 171 172 InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, 173 unsigned Index); 174 bool isSourceOfDivergence(const Value *V) const; 175 bool isAlwaysUniform(const Value *V) const; 176 177 unsigned getFlatAddressSpace() const { 178 // Don't bother running InferAddressSpaces pass on graphics shaders which 179 // don't use flat addressing. 180 if (IsGraphics) 181 return -1; 182 return AMDGPUAS::FLAT_ADDRESS; 183 } 184 185 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 186 Intrinsic::ID IID) const; 187 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 188 Value *NewV) const; 189 190 bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, 191 InstCombiner &IC) const; 192 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 193 IntrinsicInst &II) const; 194 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 195 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 196 APInt &UndefElts2, APInt &UndefElts3, 197 std::function<void(Instruction *, unsigned, APInt, APInt &)> 198 SimplifyAndSetOp) const; 199 200 InstructionCost getVectorSplitCost() { return 0; } 201 202 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 203 ArrayRef<int> Mask, int Index, 204 VectorType *SubTp); 205 206 bool areInlineCompatible(const Function *Caller, 207 const Function *Callee) const; 208 209 unsigned getInliningThresholdMultiplier() { return 11; } 210 unsigned adjustInliningThreshold(const CallBase *CB) const; 211 212 int getInlinerVectorBonusPercent() { return 0; } 213 214 InstructionCost getArithmeticReductionCost( 215 unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF, 216 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); 217 218 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 219 TTI::TargetCostKind CostKind); 220 InstructionCost getMinMaxReductionCost( 221 VectorType *Ty, VectorType *CondTy, bool IsUnsigned, 222 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); 223 }; 224 225 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> { 226 using BaseT = BasicTTIImplBase<R600TTIImpl>; 227 using TTI = TargetTransformInfo; 228 229 friend BaseT; 230 231 const R600Subtarget *ST; 232 const AMDGPUTargetLowering *TLI; 233 AMDGPUTTIImpl CommonTTI; 234 235 public: 236 explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 237 238 const R600Subtarget *getST() const { return ST; } 239 const AMDGPUTargetLowering *getTLI() const { return TLI; } 240 241 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 242 TTI::UnrollingPreferences &UP); 243 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 244 TTI::PeelingPreferences &PP); 245 unsigned getHardwareNumberOfRegisters(bool Vec) const; 246 unsigned getNumberOfRegisters(bool Vec) const; 247 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const; 248 unsigned getMinVectorRegisterBitWidth() const; 249 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 250 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, 251 unsigned AddrSpace) const; 252 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 253 unsigned AddrSpace) const; 254 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 255 unsigned AddrSpace) const; 256 unsigned getMaxInterleaveFactor(unsigned VF); 257 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 258 const Instruction *I = nullptr); 259 InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, 260 unsigned Index); 261 }; 262 263 } // end namespace llvm 264 265 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 266