xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (revision 5def4c47d4bd90b209b9b4a4ba9faec15846d8fd)
1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// AMDGPU target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19 
20 #include "AMDGPU.h"
21 #include "AMDGPUSubtarget.h"
22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 #include "llvm/CodeGen/BasicTTIImpl.h"
24 
25 namespace llvm {
26 
27 class AMDGPUTargetLowering;
28 class GCNSubtarget;
29 class InstCombiner;
30 class Loop;
31 class R600Subtarget;
32 class ScalarEvolution;
33 class SITargetLowering;
34 class Type;
35 class Value;
36 
37 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
38   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
39   using TTI = TargetTransformInfo;
40 
41   friend BaseT;
42 
43   Triple TargetTriple;
44 
45   const TargetSubtargetInfo *ST;
46   const TargetLoweringBase *TLI;
47 
48   const TargetSubtargetInfo *getST() const { return ST; }
49   const TargetLoweringBase *getTLI() const { return TLI; }
50 
51 public:
52   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
53 
54   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
55                                TTI::UnrollingPreferences &UP);
56 
57   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
58                              TTI::PeelingPreferences &PP);
59 };
60 
61 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
62   using BaseT = BasicTTIImplBase<GCNTTIImpl>;
63   using TTI = TargetTransformInfo;
64 
65   friend BaseT;
66 
67   const GCNSubtarget *ST;
68   const SITargetLowering *TLI;
69   AMDGPUTTIImpl CommonTTI;
70   bool IsGraphics;
71   bool HasFP32Denormals;
72   bool HasFP64FP16Denormals;
73   unsigned MaxVGPRs;
74 
75   static const FeatureBitset InlineFeatureIgnoreList;
76 
77   const GCNSubtarget *getST() const { return ST; }
78   const SITargetLowering *getTLI() const { return TLI; }
79 
80   static inline int getFullRateInstrCost() {
81     return TargetTransformInfo::TCC_Basic;
82   }
83 
84   static inline int getHalfRateInstrCost(
85       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
86     return CostKind == TTI::TCK_CodeSize ? 2
87                                          : 2 * TargetTransformInfo::TCC_Basic;
88   }
89 
90   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
91   // should be 2 or 4.
92   static inline int getQuarterRateInstrCost(
93       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
94     return CostKind == TTI::TCK_CodeSize ? 2
95                                          : 4 * TargetTransformInfo::TCC_Basic;
96   }
97 
98   // On some parts, normal fp64 operations are half rate, and others
99   // quarter. This also applies to some integer operations.
100   int get64BitInstrCost(
101       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
102 
103 public:
104   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
105 
106   bool hasBranchDivergence() { return true; }
107   bool useGPUDivergenceAnalysis() const;
108 
109   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
110                                TTI::UnrollingPreferences &UP);
111 
112   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
113                              TTI::PeelingPreferences &PP);
114 
115   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
116     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
117     return TTI::PSK_FastHardware;
118   }
119 
120   unsigned getHardwareNumberOfRegisters(bool Vector) const;
121   unsigned getNumberOfRegisters(bool Vector) const;
122   unsigned getNumberOfRegisters(unsigned RCID) const;
123   unsigned getRegisterBitWidth(bool Vector) const;
124   unsigned getMinVectorRegisterBitWidth() const;
125   unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
126   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
127                                unsigned ChainSizeInBytes,
128                                VectorType *VecTy) const;
129   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
130                                 unsigned ChainSizeInBytes,
131                                 VectorType *VecTy) const;
132   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
133 
134   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
135                                   unsigned AddrSpace) const;
136   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
137                                    unsigned AddrSpace) const;
138   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
139                                     unsigned AddrSpace) const;
140   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
141                                   unsigned SrcAddrSpace, unsigned DestAddrSpace,
142                                   unsigned SrcAlign, unsigned DestAlign) const;
143 
144   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
145                                          LLVMContext &Context,
146                                          unsigned RemainingBytes,
147                                          unsigned SrcAddrSpace,
148                                          unsigned DestAddrSpace,
149                                          unsigned SrcAlign,
150                                          unsigned DestAlign) const;
151   unsigned getMaxInterleaveFactor(unsigned VF);
152 
153   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
154 
155   int getArithmeticInstrCost(
156       unsigned Opcode, Type *Ty,
157       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
158       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
159       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
160       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
161       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
162       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
163       const Instruction *CxtI = nullptr);
164 
165   unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
166 
167   bool isInlineAsmSourceOfDivergence(const CallInst *CI,
168                                      ArrayRef<unsigned> Indices = {}) const;
169 
170   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
171   bool isSourceOfDivergence(const Value *V) const;
172   bool isAlwaysUniform(const Value *V) const;
173 
174   unsigned getFlatAddressSpace() const {
175     // Don't bother running InferAddressSpaces pass on graphics shaders which
176     // don't use flat addressing.
177     if (IsGraphics)
178       return -1;
179     return AMDGPUAS::FLAT_ADDRESS;
180   }
181 
182   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
183                                   Intrinsic::ID IID) const;
184   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
185                                           Value *NewV) const;
186 
187   bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
188                                  InstCombiner &IC) const;
189   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
190                                                IntrinsicInst &II) const;
191   Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
192       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
193       APInt &UndefElts2, APInt &UndefElts3,
194       std::function<void(Instruction *, unsigned, APInt, APInt &)>
195           SimplifyAndSetOp) const;
196 
197   unsigned getVectorSplitCost() { return 0; }
198 
199   unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
200                           VectorType *SubTp);
201 
202   bool areInlineCompatible(const Function *Caller,
203                            const Function *Callee) const;
204 
205   unsigned getInliningThresholdMultiplier() { return 11; }
206   unsigned adjustInliningThreshold(const CallBase *CB) const;
207 
208   int getInlinerVectorBonusPercent() { return 0; }
209 
210   int getArithmeticReductionCost(
211       unsigned Opcode,
212       VectorType *Ty,
213       bool IsPairwise,
214       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
215 
216   int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
217                             TTI::TargetCostKind CostKind);
218   int getMinMaxReductionCost(
219     VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
220     TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
221 };
222 
223 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
224   using BaseT = BasicTTIImplBase<R600TTIImpl>;
225   using TTI = TargetTransformInfo;
226 
227   friend BaseT;
228 
229   const R600Subtarget *ST;
230   const AMDGPUTargetLowering *TLI;
231   AMDGPUTTIImpl CommonTTI;
232 
233 public:
234   explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
235 
236   const R600Subtarget *getST() const { return ST; }
237   const AMDGPUTargetLowering *getTLI() const { return TLI; }
238 
239   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
240                                TTI::UnrollingPreferences &UP);
241   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
242                              TTI::PeelingPreferences &PP);
243   unsigned getHardwareNumberOfRegisters(bool Vec) const;
244   unsigned getNumberOfRegisters(bool Vec) const;
245   unsigned getRegisterBitWidth(bool Vector) const;
246   unsigned getMinVectorRegisterBitWidth() const;
247   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
248   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
249                                   unsigned AddrSpace) const;
250   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
251                                    unsigned AddrSpace) const;
252   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
253                                     unsigned AddrSpace) const;
254   unsigned getMaxInterleaveFactor(unsigned VF);
255   unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
256   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
257 };
258 
259 } // end namespace llvm
260 
261 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
262