1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file a TargetTransformInfo::Concept conforming object specific to the 11 /// AMDGPU target machine. It uses the target's detailed information to 12 /// provide more precise answers to certain TTI queries, while letting the 13 /// target independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 19 20 #include "AMDGPU.h" 21 #include "llvm/CodeGen/BasicTTIImpl.h" 22 #include <optional> 23 24 namespace llvm { 25 26 class AMDGPUTargetMachine; 27 class GCNSubtarget; 28 class InstCombiner; 29 class Loop; 30 class ScalarEvolution; 31 class SITargetLowering; 32 class Type; 33 class Value; 34 35 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { 36 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>; 37 using TTI = TargetTransformInfo; 38 39 friend BaseT; 40 41 Triple TargetTriple; 42 43 const TargetSubtargetInfo *ST; 44 const TargetLoweringBase *TLI; 45 46 const TargetSubtargetInfo *getST() const { return ST; } 47 const TargetLoweringBase *getTLI() const { return TLI; } 48 49 public: 50 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 51 52 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 53 TTI::UnrollingPreferences &UP, 54 OptimizationRemarkEmitter *ORE); 55 56 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 57 TTI::PeelingPreferences &PP); 58 }; 59 60 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { 61 using BaseT = BasicTTIImplBase<GCNTTIImpl>; 62 using TTI = TargetTransformInfo; 63 64 friend BaseT; 65 66 const GCNSubtarget *ST; 67 const SITargetLowering *TLI; 68 AMDGPUTTIImpl CommonTTI; 69 bool IsGraphics; 70 bool HasFP32Denormals; 71 bool HasFP64FP16Denormals; 72 73 static const FeatureBitset InlineFeatureIgnoreList; 74 75 const GCNSubtarget *getST() const { return ST; } 76 const SITargetLowering *getTLI() const { return TLI; } 77 78 static inline int getFullRateInstrCost() { 79 return TargetTransformInfo::TCC_Basic; 80 } 81 82 static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) { 83 return CostKind == TTI::TCK_CodeSize ? 2 84 : 2 * TargetTransformInfo::TCC_Basic; 85 } 86 87 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe 88 // should be 2 or 4. 89 static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) { 90 return CostKind == TTI::TCK_CodeSize ? 2 91 : 4 * TargetTransformInfo::TCC_Basic; 92 } 93 94 // On some parts, normal fp64 operations are half rate, and others 95 // quarter. This also applies to some integer operations. 96 int get64BitInstrCost(TTI::TargetCostKind CostKind) const; 97 98 std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const; 99 100 public: 101 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 102 103 bool hasBranchDivergence() { return true; } 104 bool useGPUDivergenceAnalysis() const; 105 106 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 107 TTI::UnrollingPreferences &UP, 108 OptimizationRemarkEmitter *ORE); 109 110 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 111 TTI::PeelingPreferences &PP); 112 113 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { 114 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 115 return TTI::PSK_FastHardware; 116 } 117 118 unsigned getNumberOfRegisters(unsigned RCID) const; 119 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const; 120 unsigned getMinVectorRegisterBitWidth() const; 121 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; 122 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 123 unsigned ChainSizeInBytes, 124 VectorType *VecTy) const; 125 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 126 unsigned ChainSizeInBytes, 127 VectorType *VecTy) const; 128 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 129 130 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, 131 unsigned AddrSpace) const; 132 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 133 unsigned AddrSpace) const; 134 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 135 unsigned AddrSpace) const; 136 Type *getMemcpyLoopLoweringType( 137 LLVMContext & Context, Value * Length, unsigned SrcAddrSpace, 138 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, 139 std::optional<uint32_t> AtomicElementSize) const; 140 141 void getMemcpyLoopResidualLoweringType( 142 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 143 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 144 unsigned SrcAlign, unsigned DestAlign, 145 std::optional<uint32_t> AtomicCpySize) const; 146 unsigned getMaxInterleaveFactor(unsigned VF); 147 148 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 149 150 InstructionCost getArithmeticInstrCost( 151 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 152 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, 153 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, 154 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 155 const Instruction *CxtI = nullptr); 156 157 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 158 const Instruction *I = nullptr); 159 160 bool isInlineAsmSourceOfDivergence(const CallInst *CI, 161 ArrayRef<unsigned> Indices = {}) const; 162 163 using BaseT::getVectorInstrCost; 164 InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, 165 TTI::TargetCostKind CostKind, 166 unsigned Index, Value *Op0, Value *Op1); 167 168 bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const; 169 bool isSourceOfDivergence(const Value *V) const; 170 bool isAlwaysUniform(const Value *V) const; 171 172 unsigned getFlatAddressSpace() const { 173 // Don't bother running InferAddressSpaces pass on graphics shaders which 174 // don't use flat addressing. 175 if (IsGraphics) 176 return -1; 177 return AMDGPUAS::FLAT_ADDRESS; 178 } 179 180 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 181 Intrinsic::ID IID) const; 182 183 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 184 return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && 185 AS != AMDGPUAS::PRIVATE_ADDRESS; 186 } 187 188 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 189 Value *NewV) const; 190 191 bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, 192 InstCombiner &IC) const; 193 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 194 IntrinsicInst &II) const; 195 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 196 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 197 APInt &UndefElts2, APInt &UndefElts3, 198 std::function<void(Instruction *, unsigned, APInt, APInt &)> 199 SimplifyAndSetOp) const; 200 201 InstructionCost getVectorSplitCost() { return 0; } 202 203 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 204 ArrayRef<int> Mask, 205 TTI::TargetCostKind CostKind, int Index, 206 VectorType *SubTp, 207 ArrayRef<const Value *> Args = std::nullopt); 208 209 bool areInlineCompatible(const Function *Caller, 210 const Function *Callee) const; 211 212 unsigned getInliningThresholdMultiplier() { return 11; } 213 unsigned adjustInliningThreshold(const CallBase *CB) const; 214 215 int getInlinerVectorBonusPercent() { return 0; } 216 217 InstructionCost getArithmeticReductionCost( 218 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF, 219 TTI::TargetCostKind CostKind); 220 221 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 222 TTI::TargetCostKind CostKind); 223 InstructionCost getMinMaxReductionCost( 224 VectorType *Ty, VectorType *CondTy, bool IsUnsigned, 225 TTI::TargetCostKind CostKind); 226 }; 227 228 } // end namespace llvm 229 230 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 231