1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file a TargetTransformInfo::Concept conforming object specific to the 11 /// AMDGPU target machine. It uses the target's detailed information to 12 /// provide more precise answers to certain TTI queries, while letting the 13 /// target independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 19 20 #include "AMDGPU.h" 21 #include "llvm/CodeGen/BasicTTIImpl.h" 22 23 namespace llvm { 24 25 class AMDGPUTargetMachine; 26 class GCNSubtarget; 27 class InstCombiner; 28 class Loop; 29 class ScalarEvolution; 30 class SITargetLowering; 31 class Type; 32 class Value; 33 34 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { 35 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>; 36 using TTI = TargetTransformInfo; 37 38 friend BaseT; 39 40 Triple TargetTriple; 41 42 const TargetSubtargetInfo *ST; 43 const TargetLoweringBase *TLI; 44 45 const TargetSubtargetInfo *getST() const { return ST; } 46 const TargetLoweringBase *getTLI() const { return TLI; } 47 48 public: 49 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 50 51 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 52 TTI::UnrollingPreferences &UP, 53 OptimizationRemarkEmitter *ORE); 54 55 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 56 TTI::PeelingPreferences &PP); 57 }; 58 59 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { 60 using BaseT = BasicTTIImplBase<GCNTTIImpl>; 61 using TTI = TargetTransformInfo; 62 63 friend BaseT; 64 65 const GCNSubtarget *ST; 66 const SITargetLowering *TLI; 67 AMDGPUTTIImpl CommonTTI; 68 bool IsGraphics; 69 bool HasFP32Denormals; 70 bool HasFP64FP16Denormals; 71 72 static const FeatureBitset InlineFeatureIgnoreList; 73 74 const GCNSubtarget *getST() const { return ST; } 75 const SITargetLowering *getTLI() const { return TLI; } 76 77 static inline int getFullRateInstrCost() { 78 return TargetTransformInfo::TCC_Basic; 79 } 80 81 static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) { 82 return CostKind == TTI::TCK_CodeSize ? 2 83 : 2 * TargetTransformInfo::TCC_Basic; 84 } 85 86 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe 87 // should be 2 or 4. 88 static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) { 89 return CostKind == TTI::TCK_CodeSize ? 2 90 : 4 * TargetTransformInfo::TCC_Basic; 91 } 92 93 // On some parts, normal fp64 operations are half rate, and others 94 // quarter. This also applies to some integer operations. 95 int get64BitInstrCost(TTI::TargetCostKind CostKind) const; 96 97 public: 98 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 99 100 bool hasBranchDivergence() { return true; } 101 bool useGPUDivergenceAnalysis() const; 102 103 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 104 TTI::UnrollingPreferences &UP, 105 OptimizationRemarkEmitter *ORE); 106 107 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 108 TTI::PeelingPreferences &PP); 109 110 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { 111 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 112 return TTI::PSK_FastHardware; 113 } 114 115 unsigned getNumberOfRegisters(unsigned RCID) const; 116 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const; 117 unsigned getMinVectorRegisterBitWidth() const; 118 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; 119 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 120 unsigned ChainSizeInBytes, 121 VectorType *VecTy) const; 122 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 123 unsigned ChainSizeInBytes, 124 VectorType *VecTy) const; 125 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 126 127 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, 128 unsigned AddrSpace) const; 129 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 130 unsigned AddrSpace) const; 131 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 132 unsigned AddrSpace) const; 133 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 134 unsigned SrcAddrSpace, unsigned DestAddrSpace, 135 unsigned SrcAlign, unsigned DestAlign, 136 Optional<uint32_t> AtomicElementSize) const; 137 138 void getMemcpyLoopResidualLoweringType( 139 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 140 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 141 unsigned SrcAlign, unsigned DestAlign, 142 Optional<uint32_t> AtomicCpySize) const; 143 unsigned getMaxInterleaveFactor(unsigned VF); 144 145 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 146 147 InstructionCost getArithmeticInstrCost( 148 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 149 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, 150 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, 151 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 152 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 153 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 154 const Instruction *CxtI = nullptr); 155 156 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 157 const Instruction *I = nullptr); 158 159 bool isInlineAsmSourceOfDivergence(const CallInst *CI, 160 ArrayRef<unsigned> Indices = {}) const; 161 162 InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, 163 unsigned Index); 164 bool isSourceOfDivergence(const Value *V) const; 165 bool isAlwaysUniform(const Value *V) const; 166 167 unsigned getFlatAddressSpace() const { 168 // Don't bother running InferAddressSpaces pass on graphics shaders which 169 // don't use flat addressing. 170 if (IsGraphics) 171 return -1; 172 return AMDGPUAS::FLAT_ADDRESS; 173 } 174 175 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 176 Intrinsic::ID IID) const; 177 178 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 179 return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && 180 AS != AMDGPUAS::PRIVATE_ADDRESS; 181 } 182 183 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 184 Value *NewV) const; 185 186 bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, 187 InstCombiner &IC) const; 188 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 189 IntrinsicInst &II) const; 190 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 191 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 192 APInt &UndefElts2, APInt &UndefElts3, 193 std::function<void(Instruction *, unsigned, APInt, APInt &)> 194 SimplifyAndSetOp) const; 195 196 InstructionCost getVectorSplitCost() { return 0; } 197 198 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 199 ArrayRef<int> Mask, int Index, 200 VectorType *SubTp, 201 ArrayRef<const Value *> Args = None); 202 203 bool areInlineCompatible(const Function *Caller, 204 const Function *Callee) const; 205 206 unsigned getInliningThresholdMultiplier() { return 11; } 207 unsigned adjustInliningThreshold(const CallBase *CB) const; 208 209 int getInlinerVectorBonusPercent() { return 0; } 210 211 InstructionCost getArithmeticReductionCost( 212 unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF, 213 TTI::TargetCostKind CostKind); 214 215 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 216 TTI::TargetCostKind CostKind); 217 InstructionCost getMinMaxReductionCost( 218 VectorType *Ty, VectorType *CondTy, bool IsUnsigned, 219 TTI::TargetCostKind CostKind); 220 }; 221 222 } // end namespace llvm 223 224 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 225