xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (revision 5956d97f4b3204318ceb6aa9c77bd0bc6ea87a41)
1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// AMDGPU target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19 
20 #include "AMDGPU.h"
21 #include "llvm/CodeGen/BasicTTIImpl.h"
22 
23 namespace llvm {
24 
25 class AMDGPUTargetMachine;
26 class GCNSubtarget;
27 class InstCombiner;
28 class Loop;
29 class ScalarEvolution;
30 class SITargetLowering;
31 class Type;
32 class Value;
33 
34 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
35   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
36   using TTI = TargetTransformInfo;
37 
38   friend BaseT;
39 
40   Triple TargetTriple;
41 
42   const TargetSubtargetInfo *ST;
43   const TargetLoweringBase *TLI;
44 
45   const TargetSubtargetInfo *getST() const { return ST; }
46   const TargetLoweringBase *getTLI() const { return TLI; }
47 
48 public:
49   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
50 
51   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
52                                TTI::UnrollingPreferences &UP,
53                                OptimizationRemarkEmitter *ORE);
54 
55   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
56                              TTI::PeelingPreferences &PP);
57 };
58 
59 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
60   using BaseT = BasicTTIImplBase<GCNTTIImpl>;
61   using TTI = TargetTransformInfo;
62 
63   friend BaseT;
64 
65   const GCNSubtarget *ST;
66   const SITargetLowering *TLI;
67   AMDGPUTTIImpl CommonTTI;
68   bool IsGraphics;
69   bool HasFP32Denormals;
70   bool HasFP64FP16Denormals;
71   unsigned MaxVGPRs;
72 
73   static const FeatureBitset InlineFeatureIgnoreList;
74 
75   const GCNSubtarget *getST() const { return ST; }
76   const SITargetLowering *getTLI() const { return TLI; }
77 
78   static inline int getFullRateInstrCost() {
79     return TargetTransformInfo::TCC_Basic;
80   }
81 
82   static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {
83     return CostKind == TTI::TCK_CodeSize ? 2
84                                          : 2 * TargetTransformInfo::TCC_Basic;
85   }
86 
87   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
88   // should be 2 or 4.
89   static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {
90     return CostKind == TTI::TCK_CodeSize ? 2
91                                          : 4 * TargetTransformInfo::TCC_Basic;
92   }
93 
94   // On some parts, normal fp64 operations are half rate, and others
95   // quarter. This also applies to some integer operations.
96   int get64BitInstrCost(TTI::TargetCostKind CostKind) const;
97 
98 public:
99   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
100 
101   bool hasBranchDivergence() { return true; }
102   bool useGPUDivergenceAnalysis() const;
103 
104   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
105                                TTI::UnrollingPreferences &UP,
106                                OptimizationRemarkEmitter *ORE);
107 
108   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
109                              TTI::PeelingPreferences &PP);
110 
111   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
112     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
113     return TTI::PSK_FastHardware;
114   }
115 
116   unsigned getHardwareNumberOfRegisters(bool Vector) const;
117   unsigned getNumberOfRegisters(bool Vector) const;
118   unsigned getNumberOfRegisters(unsigned RCID) const;
119   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;
120   unsigned getMinVectorRegisterBitWidth() const;
121   unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
122   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
123                                unsigned ChainSizeInBytes,
124                                VectorType *VecTy) const;
125   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
126                                 unsigned ChainSizeInBytes,
127                                 VectorType *VecTy) const;
128   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
129 
130   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
131                                   unsigned AddrSpace) const;
132   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
133                                    unsigned AddrSpace) const;
134   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
135                                     unsigned AddrSpace) const;
136   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
137                                   unsigned SrcAddrSpace, unsigned DestAddrSpace,
138                                   unsigned SrcAlign, unsigned DestAlign) const;
139 
140   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
141                                          LLVMContext &Context,
142                                          unsigned RemainingBytes,
143                                          unsigned SrcAddrSpace,
144                                          unsigned DestAddrSpace,
145                                          unsigned SrcAlign,
146                                          unsigned DestAlign) const;
147   unsigned getMaxInterleaveFactor(unsigned VF);
148 
149   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
150 
151   InstructionCost getArithmeticInstrCost(
152       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
153       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
154       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
155       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
156       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
157       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
158       const Instruction *CxtI = nullptr);
159 
160   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
161                                  const Instruction *I = nullptr);
162 
163   bool isInlineAsmSourceOfDivergence(const CallInst *CI,
164                                      ArrayRef<unsigned> Indices = {}) const;
165 
166   InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
167                                      unsigned Index);
168   bool isSourceOfDivergence(const Value *V) const;
169   bool isAlwaysUniform(const Value *V) const;
170 
171   unsigned getFlatAddressSpace() const {
172     // Don't bother running InferAddressSpaces pass on graphics shaders which
173     // don't use flat addressing.
174     if (IsGraphics)
175       return -1;
176     return AMDGPUAS::FLAT_ADDRESS;
177   }
178 
179   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
180                                   Intrinsic::ID IID) const;
181 
182   bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
183     return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
184            AS != AMDGPUAS::PRIVATE_ADDRESS;
185   }
186 
187   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
188                                           Value *NewV) const;
189 
190   bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
191                                  InstCombiner &IC) const;
192   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
193                                                IntrinsicInst &II) const;
194   Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
195       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
196       APInt &UndefElts2, APInt &UndefElts3,
197       std::function<void(Instruction *, unsigned, APInt, APInt &)>
198           SimplifyAndSetOp) const;
199 
200   InstructionCost getVectorSplitCost() { return 0; }
201 
202   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
203                                  ArrayRef<int> Mask, int Index,
204                                  VectorType *SubTp);
205 
206   bool areInlineCompatible(const Function *Caller,
207                            const Function *Callee) const;
208 
209   unsigned getInliningThresholdMultiplier() { return 11; }
210   unsigned adjustInliningThreshold(const CallBase *CB) const;
211 
212   int getInlinerVectorBonusPercent() { return 0; }
213 
214   InstructionCost getArithmeticReductionCost(
215       unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
216       TTI::TargetCostKind CostKind);
217 
218   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
219                                         TTI::TargetCostKind CostKind);
220   InstructionCost getMinMaxReductionCost(
221       VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
222       TTI::TargetCostKind CostKind);
223 };
224 
225 } // end namespace llvm
226 
227 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
228