1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file a TargetTransformInfo::Concept conforming object specific to the 11 /// AMDGPU target machine. It uses the target's detailed information to 12 /// provide more precise answers to certain TTI queries, while letting the 13 /// target independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 19 20 #include "AMDGPU.h" 21 #include "llvm/CodeGen/BasicTTIImpl.h" 22 23 namespace llvm { 24 25 class AMDGPUTargetMachine; 26 class GCNSubtarget; 27 class InstCombiner; 28 class Loop; 29 class ScalarEvolution; 30 class SITargetLowering; 31 class Type; 32 class Value; 33 34 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { 35 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>; 36 using TTI = TargetTransformInfo; 37 38 friend BaseT; 39 40 Triple TargetTriple; 41 42 const TargetSubtargetInfo *ST; 43 const TargetLoweringBase *TLI; 44 45 const TargetSubtargetInfo *getST() const { return ST; } 46 const TargetLoweringBase *getTLI() const { return TLI; } 47 48 public: 49 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 50 51 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 52 TTI::UnrollingPreferences &UP, 53 OptimizationRemarkEmitter *ORE); 54 55 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 56 TTI::PeelingPreferences &PP); 57 }; 58 59 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { 60 using BaseT = BasicTTIImplBase<GCNTTIImpl>; 61 using TTI = TargetTransformInfo; 62 63 friend BaseT; 64 65 const GCNSubtarget *ST; 66 const SITargetLowering *TLI; 67 AMDGPUTTIImpl CommonTTI; 68 bool IsGraphics; 69 bool HasFP32Denormals; 70 bool HasFP64FP16Denormals; 71 unsigned MaxVGPRs; 72 73 static const FeatureBitset InlineFeatureIgnoreList; 74 75 const GCNSubtarget *getST() const { return ST; } 76 const SITargetLowering *getTLI() const { return TLI; } 77 78 static inline int getFullRateInstrCost() { 79 return TargetTransformInfo::TCC_Basic; 80 } 81 82 static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) { 83 return CostKind == TTI::TCK_CodeSize ? 2 84 : 2 * TargetTransformInfo::TCC_Basic; 85 } 86 87 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe 88 // should be 2 or 4. 89 static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) { 90 return CostKind == TTI::TCK_CodeSize ? 2 91 : 4 * TargetTransformInfo::TCC_Basic; 92 } 93 94 // On some parts, normal fp64 operations are half rate, and others 95 // quarter. This also applies to some integer operations. 96 int get64BitInstrCost(TTI::TargetCostKind CostKind) const; 97 98 public: 99 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 100 101 bool hasBranchDivergence() { return true; } 102 bool useGPUDivergenceAnalysis() const; 103 104 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 105 TTI::UnrollingPreferences &UP, 106 OptimizationRemarkEmitter *ORE); 107 108 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 109 TTI::PeelingPreferences &PP); 110 111 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { 112 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 113 return TTI::PSK_FastHardware; 114 } 115 116 unsigned getHardwareNumberOfRegisters(bool Vector) const; 117 unsigned getNumberOfRegisters(bool Vector) const; 118 unsigned getNumberOfRegisters(unsigned RCID) const; 119 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const; 120 unsigned getMinVectorRegisterBitWidth() const; 121 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; 122 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 123 unsigned ChainSizeInBytes, 124 VectorType *VecTy) const; 125 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 126 unsigned ChainSizeInBytes, 127 VectorType *VecTy) const; 128 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 129 130 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, 131 unsigned AddrSpace) const; 132 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 133 unsigned AddrSpace) const; 134 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 135 unsigned AddrSpace) const; 136 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 137 unsigned SrcAddrSpace, unsigned DestAddrSpace, 138 unsigned SrcAlign, unsigned DestAlign) const; 139 140 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, 141 LLVMContext &Context, 142 unsigned RemainingBytes, 143 unsigned SrcAddrSpace, 144 unsigned DestAddrSpace, 145 unsigned SrcAlign, 146 unsigned DestAlign) const; 147 unsigned getMaxInterleaveFactor(unsigned VF); 148 149 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 150 151 InstructionCost getArithmeticInstrCost( 152 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 153 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, 154 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, 155 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 156 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 157 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 158 const Instruction *CxtI = nullptr); 159 160 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 161 const Instruction *I = nullptr); 162 163 bool isInlineAsmSourceOfDivergence(const CallInst *CI, 164 ArrayRef<unsigned> Indices = {}) const; 165 166 InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, 167 unsigned Index); 168 bool isSourceOfDivergence(const Value *V) const; 169 bool isAlwaysUniform(const Value *V) const; 170 171 unsigned getFlatAddressSpace() const { 172 // Don't bother running InferAddressSpaces pass on graphics shaders which 173 // don't use flat addressing. 174 if (IsGraphics) 175 return -1; 176 return AMDGPUAS::FLAT_ADDRESS; 177 } 178 179 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 180 Intrinsic::ID IID) const; 181 182 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 183 return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && 184 AS != AMDGPUAS::PRIVATE_ADDRESS; 185 } 186 187 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 188 Value *NewV) const; 189 190 bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, 191 InstCombiner &IC) const; 192 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 193 IntrinsicInst &II) const; 194 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 195 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 196 APInt &UndefElts2, APInt &UndefElts3, 197 std::function<void(Instruction *, unsigned, APInt, APInt &)> 198 SimplifyAndSetOp) const; 199 200 InstructionCost getVectorSplitCost() { return 0; } 201 202 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 203 ArrayRef<int> Mask, int Index, 204 VectorType *SubTp); 205 206 bool areInlineCompatible(const Function *Caller, 207 const Function *Callee) const; 208 209 unsigned getInliningThresholdMultiplier() { return 11; } 210 unsigned adjustInliningThreshold(const CallBase *CB) const; 211 212 int getInlinerVectorBonusPercent() { return 0; } 213 214 InstructionCost getArithmeticReductionCost( 215 unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF, 216 TTI::TargetCostKind CostKind); 217 218 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 219 TTI::TargetCostKind CostKind); 220 InstructionCost getMinMaxReductionCost( 221 VectorType *Ty, VectorType *CondTy, bool IsUnsigned, 222 TTI::TargetCostKind CostKind); 223 }; 224 225 } // end namespace llvm 226 227 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 228