xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1e8d8bef9SDimitry Andric //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //==-----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric /// \file
10e8d8bef9SDimitry Andric /// AMD GCN specific subclass of TargetSubtarget.
11e8d8bef9SDimitry Andric //
12e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
13e8d8bef9SDimitry Andric 
14e8d8bef9SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15e8d8bef9SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16e8d8bef9SDimitry Andric 
17e8d8bef9SDimitry Andric #include "AMDGPUCallLowering.h"
18e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h"
19e8d8bef9SDimitry Andric #include "SIFrameLowering.h"
20e8d8bef9SDimitry Andric #include "SIISelLowering.h"
21e8d8bef9SDimitry Andric #include "SIInstrInfo.h"
22e8d8bef9SDimitry Andric #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
23e8d8bef9SDimitry Andric 
24e8d8bef9SDimitry Andric namespace llvm {
25e8d8bef9SDimitry Andric 
26e8d8bef9SDimitry Andric class MCInst;
27e8d8bef9SDimitry Andric class MCInstrInfo;
28e8d8bef9SDimitry Andric 
29e8d8bef9SDimitry Andric } // namespace llvm
30e8d8bef9SDimitry Andric 
31e8d8bef9SDimitry Andric #define GET_SUBTARGETINFO_HEADER
32e8d8bef9SDimitry Andric #include "AMDGPUGenSubtargetInfo.inc"
33e8d8bef9SDimitry Andric 
34e8d8bef9SDimitry Andric namespace llvm {
35e8d8bef9SDimitry Andric 
36e8d8bef9SDimitry Andric class GCNTargetMachine;
37e8d8bef9SDimitry Andric 
38e8d8bef9SDimitry Andric class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
39e8d8bef9SDimitry Andric                            public AMDGPUSubtarget {
40e8d8bef9SDimitry Andric 
41e8d8bef9SDimitry Andric   using AMDGPUSubtarget::getMaxWavesPerEU;
42e8d8bef9SDimitry Andric 
43e8d8bef9SDimitry Andric public:
44*fe6060f1SDimitry Andric   // Following 2 enums are documented at:
45*fe6060f1SDimitry Andric   //   - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
46*fe6060f1SDimitry Andric   enum class TrapHandlerAbi {
47*fe6060f1SDimitry Andric     NONE   = 0x00,
48*fe6060f1SDimitry Andric     AMDHSA = 0x01,
49e8d8bef9SDimitry Andric   };
50e8d8bef9SDimitry Andric 
51*fe6060f1SDimitry Andric   enum class TrapID {
52*fe6060f1SDimitry Andric     LLVMAMDHSATrap      = 0x02,
53*fe6060f1SDimitry Andric     LLVMAMDHSADebugTrap = 0x03,
54e8d8bef9SDimitry Andric   };
55e8d8bef9SDimitry Andric 
56e8d8bef9SDimitry Andric private:
57e8d8bef9SDimitry Andric   /// GlobalISel related APIs.
58e8d8bef9SDimitry Andric   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
59e8d8bef9SDimitry Andric   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
60e8d8bef9SDimitry Andric   std::unique_ptr<InstructionSelector> InstSelector;
61e8d8bef9SDimitry Andric   std::unique_ptr<LegalizerInfo> Legalizer;
62e8d8bef9SDimitry Andric   std::unique_ptr<RegisterBankInfo> RegBankInfo;
63e8d8bef9SDimitry Andric 
64e8d8bef9SDimitry Andric protected:
65e8d8bef9SDimitry Andric   // Basic subtarget description.
66e8d8bef9SDimitry Andric   Triple TargetTriple;
67e8d8bef9SDimitry Andric   AMDGPU::IsaInfo::AMDGPUTargetID TargetID;
68e8d8bef9SDimitry Andric   unsigned Gen;
69e8d8bef9SDimitry Andric   InstrItineraryData InstrItins;
70e8d8bef9SDimitry Andric   int LDSBankCount;
71e8d8bef9SDimitry Andric   unsigned MaxPrivateElementSize;
72e8d8bef9SDimitry Andric 
73e8d8bef9SDimitry Andric   // Possibly statically set by tablegen, but may want to be overridden.
74e8d8bef9SDimitry Andric   bool FastFMAF32;
75e8d8bef9SDimitry Andric   bool FastDenormalF32;
76e8d8bef9SDimitry Andric   bool HalfRate64Ops;
77*fe6060f1SDimitry Andric   bool FullRate64Ops;
78e8d8bef9SDimitry Andric 
79e8d8bef9SDimitry Andric   // Dynamically set bits that enable features.
80e8d8bef9SDimitry Andric   bool FlatForGlobal;
81e8d8bef9SDimitry Andric   bool AutoWaitcntBeforeBarrier;
82e8d8bef9SDimitry Andric   bool UnalignedScratchAccess;
83e8d8bef9SDimitry Andric   bool UnalignedAccessMode;
84e8d8bef9SDimitry Andric   bool HasApertureRegs;
85e8d8bef9SDimitry Andric   bool SupportsXNACK;
86e8d8bef9SDimitry Andric 
87e8d8bef9SDimitry Andric   // This should not be used directly. 'TargetID' tracks the dynamic settings
88e8d8bef9SDimitry Andric   // for XNACK.
89e8d8bef9SDimitry Andric   bool EnableXNACK;
90e8d8bef9SDimitry Andric 
91*fe6060f1SDimitry Andric   bool EnableTgSplit;
92e8d8bef9SDimitry Andric   bool EnableCuMode;
93e8d8bef9SDimitry Andric   bool TrapHandler;
94e8d8bef9SDimitry Andric 
95e8d8bef9SDimitry Andric   // Used as options.
96e8d8bef9SDimitry Andric   bool EnableLoadStoreOpt;
97e8d8bef9SDimitry Andric   bool EnableUnsafeDSOffsetFolding;
98e8d8bef9SDimitry Andric   bool EnableSIScheduler;
99e8d8bef9SDimitry Andric   bool EnableDS128;
100e8d8bef9SDimitry Andric   bool EnablePRTStrictNull;
101e8d8bef9SDimitry Andric   bool DumpCode;
102e8d8bef9SDimitry Andric 
103e8d8bef9SDimitry Andric   // Subtarget statically properties set by tablegen
104e8d8bef9SDimitry Andric   bool FP64;
105e8d8bef9SDimitry Andric   bool FMA;
106e8d8bef9SDimitry Andric   bool MIMG_R128;
107*fe6060f1SDimitry Andric   bool IsGCN;
108e8d8bef9SDimitry Andric   bool CIInsts;
109e8d8bef9SDimitry Andric   bool GFX8Insts;
110e8d8bef9SDimitry Andric   bool GFX9Insts;
111*fe6060f1SDimitry Andric   bool GFX90AInsts;
112e8d8bef9SDimitry Andric   bool GFX10Insts;
113e8d8bef9SDimitry Andric   bool GFX10_3Insts;
114e8d8bef9SDimitry Andric   bool GFX7GFX8GFX9Insts;
115e8d8bef9SDimitry Andric   bool SGPRInitBug;
116*fe6060f1SDimitry Andric   bool NegativeScratchOffsetBug;
117*fe6060f1SDimitry Andric   bool NegativeUnalignedScratchOffsetBug;
118e8d8bef9SDimitry Andric   bool HasSMemRealTime;
119e8d8bef9SDimitry Andric   bool HasIntClamp;
120e8d8bef9SDimitry Andric   bool HasFmaMixInsts;
121e8d8bef9SDimitry Andric   bool HasMovrel;
122e8d8bef9SDimitry Andric   bool HasVGPRIndexMode;
123e8d8bef9SDimitry Andric   bool HasScalarStores;
124e8d8bef9SDimitry Andric   bool HasScalarAtomics;
125e8d8bef9SDimitry Andric   bool HasSDWAOmod;
126e8d8bef9SDimitry Andric   bool HasSDWAScalar;
127e8d8bef9SDimitry Andric   bool HasSDWASdst;
128e8d8bef9SDimitry Andric   bool HasSDWAMac;
129e8d8bef9SDimitry Andric   bool HasSDWAOutModsVOPC;
130e8d8bef9SDimitry Andric   bool HasDPP;
131e8d8bef9SDimitry Andric   bool HasDPP8;
132*fe6060f1SDimitry Andric   bool Has64BitDPP;
133*fe6060f1SDimitry Andric   bool HasPackedFP32Ops;
134*fe6060f1SDimitry Andric   bool HasExtendedImageInsts;
135e8d8bef9SDimitry Andric   bool HasR128A16;
136e8d8bef9SDimitry Andric   bool HasGFX10A16;
137e8d8bef9SDimitry Andric   bool HasG16;
138e8d8bef9SDimitry Andric   bool HasNSAEncoding;
139*fe6060f1SDimitry Andric   unsigned NSAMaxSize;
140*fe6060f1SDimitry Andric   bool GFX10_AEncoding;
141e8d8bef9SDimitry Andric   bool GFX10_BEncoding;
142e8d8bef9SDimitry Andric   bool HasDLInsts;
143e8d8bef9SDimitry Andric   bool HasDot1Insts;
144e8d8bef9SDimitry Andric   bool HasDot2Insts;
145e8d8bef9SDimitry Andric   bool HasDot3Insts;
146e8d8bef9SDimitry Andric   bool HasDot4Insts;
147e8d8bef9SDimitry Andric   bool HasDot5Insts;
148e8d8bef9SDimitry Andric   bool HasDot6Insts;
149*fe6060f1SDimitry Andric   bool HasDot7Insts;
150e8d8bef9SDimitry Andric   bool HasMAIInsts;
151e8d8bef9SDimitry Andric   bool HasPkFmacF16Inst;
152e8d8bef9SDimitry Andric   bool HasAtomicFaddInsts;
153e8d8bef9SDimitry Andric   bool SupportsSRAMECC;
154e8d8bef9SDimitry Andric 
155e8d8bef9SDimitry Andric   // This should not be used directly. 'TargetID' tracks the dynamic settings
156e8d8bef9SDimitry Andric   // for SRAMECC.
157e8d8bef9SDimitry Andric   bool EnableSRAMECC;
158e8d8bef9SDimitry Andric 
159e8d8bef9SDimitry Andric   bool HasNoSdstCMPX;
160e8d8bef9SDimitry Andric   bool HasVscnt;
161e8d8bef9SDimitry Andric   bool HasGetWaveIdInst;
162e8d8bef9SDimitry Andric   bool HasSMemTimeInst;
163*fe6060f1SDimitry Andric   bool HasShaderCyclesRegister;
164e8d8bef9SDimitry Andric   bool HasRegisterBanking;
165e8d8bef9SDimitry Andric   bool HasVOP3Literal;
166e8d8bef9SDimitry Andric   bool HasNoDataDepHazard;
167e8d8bef9SDimitry Andric   bool FlatAddressSpace;
168e8d8bef9SDimitry Andric   bool FlatInstOffsets;
169e8d8bef9SDimitry Andric   bool FlatGlobalInsts;
170e8d8bef9SDimitry Andric   bool FlatScratchInsts;
171e8d8bef9SDimitry Andric   bool ScalarFlatScratchInsts;
172*fe6060f1SDimitry Andric   bool HasArchitectedFlatScratch;
173e8d8bef9SDimitry Andric   bool AddNoCarryInsts;
174e8d8bef9SDimitry Andric   bool HasUnpackedD16VMem;
175*fe6060f1SDimitry Andric   bool R600ALUInst;
176*fe6060f1SDimitry Andric   bool CaymanISA;
177*fe6060f1SDimitry Andric   bool CFALUBug;
178e8d8bef9SDimitry Andric   bool LDSMisalignedBug;
179e8d8bef9SDimitry Andric   bool HasMFMAInlineLiteralBug;
180*fe6060f1SDimitry Andric   bool HasVertexCache;
181*fe6060f1SDimitry Andric   short TexVTXClauseSize;
182e8d8bef9SDimitry Andric   bool UnalignedBufferAccess;
183e8d8bef9SDimitry Andric   bool UnalignedDSAccess;
184*fe6060f1SDimitry Andric   bool HasPackedTID;
185e8d8bef9SDimitry Andric   bool ScalarizeGlobal;
186e8d8bef9SDimitry Andric 
187e8d8bef9SDimitry Andric   bool HasVcmpxPermlaneHazard;
188e8d8bef9SDimitry Andric   bool HasVMEMtoScalarWriteHazard;
189e8d8bef9SDimitry Andric   bool HasSMEMtoVectorWriteHazard;
190e8d8bef9SDimitry Andric   bool HasInstFwdPrefetchBug;
191e8d8bef9SDimitry Andric   bool HasVcmpxExecWARHazard;
192e8d8bef9SDimitry Andric   bool HasLdsBranchVmemWARHazard;
193e8d8bef9SDimitry Andric   bool HasNSAtoVMEMBug;
194*fe6060f1SDimitry Andric   bool HasNSAClauseBug;
195e8d8bef9SDimitry Andric   bool HasOffset3fBug;
196e8d8bef9SDimitry Andric   bool HasFlatSegmentOffsetBug;
197e8d8bef9SDimitry Andric   bool HasImageStoreD16Bug;
198e8d8bef9SDimitry Andric   bool HasImageGather4D16Bug;
199e8d8bef9SDimitry Andric 
200e8d8bef9SDimitry Andric   // Dummy feature to use for assembler in tablegen.
201e8d8bef9SDimitry Andric   bool FeatureDisable;
202e8d8bef9SDimitry Andric 
203e8d8bef9SDimitry Andric   SelectionDAGTargetInfo TSInfo;
204e8d8bef9SDimitry Andric private:
205e8d8bef9SDimitry Andric   SIInstrInfo InstrInfo;
206e8d8bef9SDimitry Andric   SITargetLowering TLInfo;
207e8d8bef9SDimitry Andric   SIFrameLowering FrameLowering;
208e8d8bef9SDimitry Andric 
209e8d8bef9SDimitry Andric public:
210e8d8bef9SDimitry Andric   // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
211e8d8bef9SDimitry Andric   static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
212e8d8bef9SDimitry Andric 
213e8d8bef9SDimitry Andric   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
214e8d8bef9SDimitry Andric                const GCNTargetMachine &TM);
215e8d8bef9SDimitry Andric   ~GCNSubtarget() override;
216e8d8bef9SDimitry Andric 
217e8d8bef9SDimitry Andric   GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
218e8d8bef9SDimitry Andric                                                    StringRef GPU, StringRef FS);
219e8d8bef9SDimitry Andric 
220e8d8bef9SDimitry Andric   const SIInstrInfo *getInstrInfo() const override {
221e8d8bef9SDimitry Andric     return &InstrInfo;
222e8d8bef9SDimitry Andric   }
223e8d8bef9SDimitry Andric 
224e8d8bef9SDimitry Andric   const SIFrameLowering *getFrameLowering() const override {
225e8d8bef9SDimitry Andric     return &FrameLowering;
226e8d8bef9SDimitry Andric   }
227e8d8bef9SDimitry Andric 
228e8d8bef9SDimitry Andric   const SITargetLowering *getTargetLowering() const override {
229e8d8bef9SDimitry Andric     return &TLInfo;
230e8d8bef9SDimitry Andric   }
231e8d8bef9SDimitry Andric 
232e8d8bef9SDimitry Andric   const SIRegisterInfo *getRegisterInfo() const override {
233e8d8bef9SDimitry Andric     return &InstrInfo.getRegisterInfo();
234e8d8bef9SDimitry Andric   }
235e8d8bef9SDimitry Andric 
236e8d8bef9SDimitry Andric   const CallLowering *getCallLowering() const override {
237e8d8bef9SDimitry Andric     return CallLoweringInfo.get();
238e8d8bef9SDimitry Andric   }
239e8d8bef9SDimitry Andric 
240e8d8bef9SDimitry Andric   const InlineAsmLowering *getInlineAsmLowering() const override {
241e8d8bef9SDimitry Andric     return InlineAsmLoweringInfo.get();
242e8d8bef9SDimitry Andric   }
243e8d8bef9SDimitry Andric 
244e8d8bef9SDimitry Andric   InstructionSelector *getInstructionSelector() const override {
245e8d8bef9SDimitry Andric     return InstSelector.get();
246e8d8bef9SDimitry Andric   }
247e8d8bef9SDimitry Andric 
248e8d8bef9SDimitry Andric   const LegalizerInfo *getLegalizerInfo() const override {
249e8d8bef9SDimitry Andric     return Legalizer.get();
250e8d8bef9SDimitry Andric   }
251e8d8bef9SDimitry Andric 
252e8d8bef9SDimitry Andric   const RegisterBankInfo *getRegBankInfo() const override {
253e8d8bef9SDimitry Andric     return RegBankInfo.get();
254e8d8bef9SDimitry Andric   }
255e8d8bef9SDimitry Andric 
256*fe6060f1SDimitry Andric   const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const {
257*fe6060f1SDimitry Andric     return TargetID;
258*fe6060f1SDimitry Andric   }
259*fe6060f1SDimitry Andric 
260e8d8bef9SDimitry Andric   // Nothing implemented, just prevent crashes on use.
261e8d8bef9SDimitry Andric   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
262e8d8bef9SDimitry Andric     return &TSInfo;
263e8d8bef9SDimitry Andric   }
264e8d8bef9SDimitry Andric 
265e8d8bef9SDimitry Andric   const InstrItineraryData *getInstrItineraryData() const override {
266e8d8bef9SDimitry Andric     return &InstrItins;
267e8d8bef9SDimitry Andric   }
268e8d8bef9SDimitry Andric 
269e8d8bef9SDimitry Andric   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
270e8d8bef9SDimitry Andric 
271e8d8bef9SDimitry Andric   Generation getGeneration() const {
272e8d8bef9SDimitry Andric     return (Generation)Gen;
273e8d8bef9SDimitry Andric   }
274e8d8bef9SDimitry Andric 
275e8d8bef9SDimitry Andric   /// Return the number of high bits known to be zero fror a frame index.
276e8d8bef9SDimitry Andric   unsigned getKnownHighZeroBitsForFrameIndex() const {
277e8d8bef9SDimitry Andric     return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
278e8d8bef9SDimitry Andric   }
279e8d8bef9SDimitry Andric 
280e8d8bef9SDimitry Andric   int getLDSBankCount() const {
281e8d8bef9SDimitry Andric     return LDSBankCount;
282e8d8bef9SDimitry Andric   }
283e8d8bef9SDimitry Andric 
284e8d8bef9SDimitry Andric   unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
285e8d8bef9SDimitry Andric     return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
286e8d8bef9SDimitry Andric   }
287e8d8bef9SDimitry Andric 
288e8d8bef9SDimitry Andric   unsigned getConstantBusLimit(unsigned Opcode) const;
289e8d8bef9SDimitry Andric 
290*fe6060f1SDimitry Andric   /// Returns if the result of this instruction with a 16-bit result returned in
291*fe6060f1SDimitry Andric   /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
292*fe6060f1SDimitry Andric   /// the original value.
293*fe6060f1SDimitry Andric   bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
294*fe6060f1SDimitry Andric 
295e8d8bef9SDimitry Andric   bool hasIntClamp() const {
296e8d8bef9SDimitry Andric     return HasIntClamp;
297e8d8bef9SDimitry Andric   }
298e8d8bef9SDimitry Andric 
299e8d8bef9SDimitry Andric   bool hasFP64() const {
300e8d8bef9SDimitry Andric     return FP64;
301e8d8bef9SDimitry Andric   }
302e8d8bef9SDimitry Andric 
303e8d8bef9SDimitry Andric   bool hasMIMG_R128() const {
304e8d8bef9SDimitry Andric     return MIMG_R128;
305e8d8bef9SDimitry Andric   }
306e8d8bef9SDimitry Andric 
307e8d8bef9SDimitry Andric   bool hasHWFP64() const {
308e8d8bef9SDimitry Andric     return FP64;
309e8d8bef9SDimitry Andric   }
310e8d8bef9SDimitry Andric 
311e8d8bef9SDimitry Andric   bool hasFastFMAF32() const {
312e8d8bef9SDimitry Andric     return FastFMAF32;
313e8d8bef9SDimitry Andric   }
314e8d8bef9SDimitry Andric 
315e8d8bef9SDimitry Andric   bool hasHalfRate64Ops() const {
316e8d8bef9SDimitry Andric     return HalfRate64Ops;
317e8d8bef9SDimitry Andric   }
318e8d8bef9SDimitry Andric 
319*fe6060f1SDimitry Andric   bool hasFullRate64Ops() const {
320*fe6060f1SDimitry Andric     return FullRate64Ops;
321*fe6060f1SDimitry Andric   }
322*fe6060f1SDimitry Andric 
323e8d8bef9SDimitry Andric   bool hasAddr64() const {
324e8d8bef9SDimitry Andric     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
325e8d8bef9SDimitry Andric   }
326e8d8bef9SDimitry Andric 
327e8d8bef9SDimitry Andric   bool hasFlat() const {
328e8d8bef9SDimitry Andric     return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);
329e8d8bef9SDimitry Andric   }
330e8d8bef9SDimitry Andric 
331e8d8bef9SDimitry Andric   // Return true if the target only has the reverse operand versions of VALU
332e8d8bef9SDimitry Andric   // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
333e8d8bef9SDimitry Andric   bool hasOnlyRevVALUShifts() const {
334e8d8bef9SDimitry Andric     return getGeneration() >= VOLCANIC_ISLANDS;
335e8d8bef9SDimitry Andric   }
336e8d8bef9SDimitry Andric 
337e8d8bef9SDimitry Andric   bool hasFractBug() const {
338e8d8bef9SDimitry Andric     return getGeneration() == SOUTHERN_ISLANDS;
339e8d8bef9SDimitry Andric   }
340e8d8bef9SDimitry Andric 
341e8d8bef9SDimitry Andric   bool hasBFE() const {
342e8d8bef9SDimitry Andric     return true;
343e8d8bef9SDimitry Andric   }
344e8d8bef9SDimitry Andric 
345e8d8bef9SDimitry Andric   bool hasBFI() const {
346e8d8bef9SDimitry Andric     return true;
347e8d8bef9SDimitry Andric   }
348e8d8bef9SDimitry Andric 
349e8d8bef9SDimitry Andric   bool hasBFM() const {
350e8d8bef9SDimitry Andric     return hasBFE();
351e8d8bef9SDimitry Andric   }
352e8d8bef9SDimitry Andric 
353e8d8bef9SDimitry Andric   bool hasBCNT(unsigned Size) const {
354e8d8bef9SDimitry Andric     return true;
355e8d8bef9SDimitry Andric   }
356e8d8bef9SDimitry Andric 
357e8d8bef9SDimitry Andric   bool hasFFBL() const {
358e8d8bef9SDimitry Andric     return true;
359e8d8bef9SDimitry Andric   }
360e8d8bef9SDimitry Andric 
361e8d8bef9SDimitry Andric   bool hasFFBH() const {
362e8d8bef9SDimitry Andric     return true;
363e8d8bef9SDimitry Andric   }
364e8d8bef9SDimitry Andric 
365e8d8bef9SDimitry Andric   bool hasMed3_16() const {
366e8d8bef9SDimitry Andric     return getGeneration() >= AMDGPUSubtarget::GFX9;
367e8d8bef9SDimitry Andric   }
368e8d8bef9SDimitry Andric 
369e8d8bef9SDimitry Andric   bool hasMin3Max3_16() const {
370e8d8bef9SDimitry Andric     return getGeneration() >= AMDGPUSubtarget::GFX9;
371e8d8bef9SDimitry Andric   }
372e8d8bef9SDimitry Andric 
373e8d8bef9SDimitry Andric   bool hasFmaMixInsts() const {
374e8d8bef9SDimitry Andric     return HasFmaMixInsts;
375e8d8bef9SDimitry Andric   }
376e8d8bef9SDimitry Andric 
377e8d8bef9SDimitry Andric   bool hasCARRY() const {
378e8d8bef9SDimitry Andric     return true;
379e8d8bef9SDimitry Andric   }
380e8d8bef9SDimitry Andric 
381e8d8bef9SDimitry Andric   bool hasFMA() const {
382e8d8bef9SDimitry Andric     return FMA;
383e8d8bef9SDimitry Andric   }
384e8d8bef9SDimitry Andric 
385e8d8bef9SDimitry Andric   bool hasSwap() const {
386e8d8bef9SDimitry Andric     return GFX9Insts;
387e8d8bef9SDimitry Andric   }
388e8d8bef9SDimitry Andric 
389e8d8bef9SDimitry Andric   bool hasScalarPackInsts() const {
390e8d8bef9SDimitry Andric     return GFX9Insts;
391e8d8bef9SDimitry Andric   }
392e8d8bef9SDimitry Andric 
393e8d8bef9SDimitry Andric   bool hasScalarMulHiInsts() const {
394e8d8bef9SDimitry Andric     return GFX9Insts;
395e8d8bef9SDimitry Andric   }
396e8d8bef9SDimitry Andric 
397e8d8bef9SDimitry Andric   TrapHandlerAbi getTrapHandlerAbi() const {
398*fe6060f1SDimitry Andric     return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;
399*fe6060f1SDimitry Andric   }
400*fe6060f1SDimitry Andric 
401*fe6060f1SDimitry Andric   bool supportsGetDoorbellID() const {
402*fe6060f1SDimitry Andric     // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
403*fe6060f1SDimitry Andric     return getGeneration() >= GFX9;
404e8d8bef9SDimitry Andric   }
405e8d8bef9SDimitry Andric 
406e8d8bef9SDimitry Andric   /// True if the offset field of DS instructions works as expected. On SI, the
407e8d8bef9SDimitry Andric   /// offset uses a 16-bit adder and does not always wrap properly.
408e8d8bef9SDimitry Andric   bool hasUsableDSOffset() const {
409e8d8bef9SDimitry Andric     return getGeneration() >= SEA_ISLANDS;
410e8d8bef9SDimitry Andric   }
411e8d8bef9SDimitry Andric 
412e8d8bef9SDimitry Andric   bool unsafeDSOffsetFoldingEnabled() const {
413e8d8bef9SDimitry Andric     return EnableUnsafeDSOffsetFolding;
414e8d8bef9SDimitry Andric   }
415e8d8bef9SDimitry Andric 
416e8d8bef9SDimitry Andric   /// Condition output from div_scale is usable.
417e8d8bef9SDimitry Andric   bool hasUsableDivScaleConditionOutput() const {
418e8d8bef9SDimitry Andric     return getGeneration() != SOUTHERN_ISLANDS;
419e8d8bef9SDimitry Andric   }
420e8d8bef9SDimitry Andric 
421e8d8bef9SDimitry Andric   /// Extra wait hazard is needed in some cases before
422e8d8bef9SDimitry Andric   /// s_cbranch_vccnz/s_cbranch_vccz.
423e8d8bef9SDimitry Andric   bool hasReadVCCZBug() const {
424e8d8bef9SDimitry Andric     return getGeneration() <= SEA_ISLANDS;
425e8d8bef9SDimitry Andric   }
426e8d8bef9SDimitry Andric 
427e8d8bef9SDimitry Andric   /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
428e8d8bef9SDimitry Andric   bool partialVCCWritesUpdateVCCZ() const {
429e8d8bef9SDimitry Andric     return getGeneration() >= GFX10;
430e8d8bef9SDimitry Andric   }
431e8d8bef9SDimitry Andric 
432e8d8bef9SDimitry Andric   /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
433e8d8bef9SDimitry Andric   /// was written by a VALU instruction.
434e8d8bef9SDimitry Andric   bool hasSMRDReadVALUDefHazard() const {
435e8d8bef9SDimitry Andric     return getGeneration() == SOUTHERN_ISLANDS;
436e8d8bef9SDimitry Andric   }
437e8d8bef9SDimitry Andric 
438e8d8bef9SDimitry Andric   /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
439e8d8bef9SDimitry Andric   /// SGPR was written by a VALU Instruction.
440e8d8bef9SDimitry Andric   bool hasVMEMReadSGPRVALUDefHazard() const {
441e8d8bef9SDimitry Andric     return getGeneration() >= VOLCANIC_ISLANDS;
442e8d8bef9SDimitry Andric   }
443e8d8bef9SDimitry Andric 
444e8d8bef9SDimitry Andric   bool hasRFEHazards() const {
445e8d8bef9SDimitry Andric     return getGeneration() >= VOLCANIC_ISLANDS;
446e8d8bef9SDimitry Andric   }
447e8d8bef9SDimitry Andric 
448e8d8bef9SDimitry Andric   /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
449e8d8bef9SDimitry Andric   unsigned getSetRegWaitStates() const {
450e8d8bef9SDimitry Andric     return getGeneration() <= SEA_ISLANDS ? 1 : 2;
451e8d8bef9SDimitry Andric   }
452e8d8bef9SDimitry Andric 
453e8d8bef9SDimitry Andric   bool dumpCode() const {
454e8d8bef9SDimitry Andric     return DumpCode;
455e8d8bef9SDimitry Andric   }
456e8d8bef9SDimitry Andric 
457e8d8bef9SDimitry Andric   /// Return the amount of LDS that can be used that will not restrict the
458e8d8bef9SDimitry Andric   /// occupancy lower than WaveCount.
459e8d8bef9SDimitry Andric   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
460e8d8bef9SDimitry Andric                                            const Function &) const;
461e8d8bef9SDimitry Andric 
462e8d8bef9SDimitry Andric   bool supportsMinMaxDenormModes() const {
463e8d8bef9SDimitry Andric     return getGeneration() >= AMDGPUSubtarget::GFX9;
464e8d8bef9SDimitry Andric   }
465e8d8bef9SDimitry Andric 
466e8d8bef9SDimitry Andric   /// \returns If target supports S_DENORM_MODE.
467e8d8bef9SDimitry Andric   bool hasDenormModeInst() const {
468e8d8bef9SDimitry Andric     return getGeneration() >= AMDGPUSubtarget::GFX10;
469e8d8bef9SDimitry Andric   }
470e8d8bef9SDimitry Andric 
471e8d8bef9SDimitry Andric   bool useFlatForGlobal() const {
472e8d8bef9SDimitry Andric     return FlatForGlobal;
473e8d8bef9SDimitry Andric   }
474e8d8bef9SDimitry Andric 
475e8d8bef9SDimitry Andric   /// \returns If target supports ds_read/write_b128 and user enables generation
476e8d8bef9SDimitry Andric   /// of ds_read/write_b128.
477e8d8bef9SDimitry Andric   bool useDS128() const {
478e8d8bef9SDimitry Andric     return CIInsts && EnableDS128;
479e8d8bef9SDimitry Andric   }
480e8d8bef9SDimitry Andric 
481e8d8bef9SDimitry Andric   /// \return If target supports ds_read/write_b96/128.
482e8d8bef9SDimitry Andric   bool hasDS96AndDS128() const {
483e8d8bef9SDimitry Andric     return CIInsts;
484e8d8bef9SDimitry Andric   }
485e8d8bef9SDimitry Andric 
486e8d8bef9SDimitry Andric   /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
487e8d8bef9SDimitry Andric   bool haveRoundOpsF64() const {
488e8d8bef9SDimitry Andric     return CIInsts;
489e8d8bef9SDimitry Andric   }
490e8d8bef9SDimitry Andric 
491e8d8bef9SDimitry Andric   /// \returns If MUBUF instructions always perform range checking, even for
492e8d8bef9SDimitry Andric   /// buffer resources used for private memory access.
493e8d8bef9SDimitry Andric   bool privateMemoryResourceIsRangeChecked() const {
494e8d8bef9SDimitry Andric     return getGeneration() < AMDGPUSubtarget::GFX9;
495e8d8bef9SDimitry Andric   }
496e8d8bef9SDimitry Andric 
497e8d8bef9SDimitry Andric   /// \returns If target requires PRT Struct NULL support (zero result registers
498e8d8bef9SDimitry Andric   /// for sparse texture support).
499e8d8bef9SDimitry Andric   bool usePRTStrictNull() const {
500e8d8bef9SDimitry Andric     return EnablePRTStrictNull;
501e8d8bef9SDimitry Andric   }
502e8d8bef9SDimitry Andric 
503e8d8bef9SDimitry Andric   bool hasAutoWaitcntBeforeBarrier() const {
504e8d8bef9SDimitry Andric     return AutoWaitcntBeforeBarrier;
505e8d8bef9SDimitry Andric   }
506e8d8bef9SDimitry Andric 
507e8d8bef9SDimitry Andric   bool hasUnalignedBufferAccess() const {
508e8d8bef9SDimitry Andric     return UnalignedBufferAccess;
509e8d8bef9SDimitry Andric   }
510e8d8bef9SDimitry Andric 
511e8d8bef9SDimitry Andric   bool hasUnalignedBufferAccessEnabled() const {
512e8d8bef9SDimitry Andric     return UnalignedBufferAccess && UnalignedAccessMode;
513e8d8bef9SDimitry Andric   }
514e8d8bef9SDimitry Andric 
515e8d8bef9SDimitry Andric   bool hasUnalignedDSAccess() const {
516e8d8bef9SDimitry Andric     return UnalignedDSAccess;
517e8d8bef9SDimitry Andric   }
518e8d8bef9SDimitry Andric 
519e8d8bef9SDimitry Andric   bool hasUnalignedDSAccessEnabled() const {
520e8d8bef9SDimitry Andric     return UnalignedDSAccess && UnalignedAccessMode;
521e8d8bef9SDimitry Andric   }
522e8d8bef9SDimitry Andric 
523e8d8bef9SDimitry Andric   bool hasUnalignedScratchAccess() const {
524e8d8bef9SDimitry Andric     return UnalignedScratchAccess;
525e8d8bef9SDimitry Andric   }
526e8d8bef9SDimitry Andric 
527e8d8bef9SDimitry Andric   bool hasUnalignedAccessMode() const {
528e8d8bef9SDimitry Andric     return UnalignedAccessMode;
529e8d8bef9SDimitry Andric   }
530e8d8bef9SDimitry Andric 
531e8d8bef9SDimitry Andric   bool hasApertureRegs() const {
532e8d8bef9SDimitry Andric     return HasApertureRegs;
533e8d8bef9SDimitry Andric   }
534e8d8bef9SDimitry Andric 
535e8d8bef9SDimitry Andric   bool isTrapHandlerEnabled() const {
536e8d8bef9SDimitry Andric     return TrapHandler;
537e8d8bef9SDimitry Andric   }
538e8d8bef9SDimitry Andric 
539e8d8bef9SDimitry Andric   bool isXNACKEnabled() const {
540e8d8bef9SDimitry Andric     return TargetID.isXnackOnOrAny();
541e8d8bef9SDimitry Andric   }
542e8d8bef9SDimitry Andric 
543*fe6060f1SDimitry Andric   bool isTgSplitEnabled() const {
544*fe6060f1SDimitry Andric     return EnableTgSplit;
545*fe6060f1SDimitry Andric   }
546*fe6060f1SDimitry Andric 
547e8d8bef9SDimitry Andric   bool isCuModeEnabled() const {
548e8d8bef9SDimitry Andric     return EnableCuMode;
549e8d8bef9SDimitry Andric   }
550e8d8bef9SDimitry Andric 
551e8d8bef9SDimitry Andric   bool hasFlatAddressSpace() const {
552e8d8bef9SDimitry Andric     return FlatAddressSpace;
553e8d8bef9SDimitry Andric   }
554e8d8bef9SDimitry Andric 
555e8d8bef9SDimitry Andric   bool hasFlatScrRegister() const {
556e8d8bef9SDimitry Andric     return hasFlatAddressSpace();
557e8d8bef9SDimitry Andric   }
558e8d8bef9SDimitry Andric 
559e8d8bef9SDimitry Andric   bool hasFlatInstOffsets() const {
560e8d8bef9SDimitry Andric     return FlatInstOffsets;
561e8d8bef9SDimitry Andric   }
562e8d8bef9SDimitry Andric 
563e8d8bef9SDimitry Andric   bool hasFlatGlobalInsts() const {
564e8d8bef9SDimitry Andric     return FlatGlobalInsts;
565e8d8bef9SDimitry Andric   }
566e8d8bef9SDimitry Andric 
567e8d8bef9SDimitry Andric   bool hasFlatScratchInsts() const {
568e8d8bef9SDimitry Andric     return FlatScratchInsts;
569e8d8bef9SDimitry Andric   }
570e8d8bef9SDimitry Andric 
571e8d8bef9SDimitry Andric   // Check if target supports ST addressing mode with FLAT scratch instructions.
572e8d8bef9SDimitry Andric   // The ST addressing mode means no registers are used, either VGPR or SGPR,
573e8d8bef9SDimitry Andric   // but only immediate offset is swizzled and added to the FLAT scratch base.
574e8d8bef9SDimitry Andric   bool hasFlatScratchSTMode() const {
575e8d8bef9SDimitry Andric     return hasFlatScratchInsts() && hasGFX10_3Insts();
576e8d8bef9SDimitry Andric   }
577e8d8bef9SDimitry Andric 
578e8d8bef9SDimitry Andric   bool hasScalarFlatScratchInsts() const {
579e8d8bef9SDimitry Andric     return ScalarFlatScratchInsts;
580e8d8bef9SDimitry Andric   }
581e8d8bef9SDimitry Andric 
582e8d8bef9SDimitry Andric   bool hasGlobalAddTidInsts() const {
583e8d8bef9SDimitry Andric     return GFX10_BEncoding;
584e8d8bef9SDimitry Andric   }
585e8d8bef9SDimitry Andric 
586e8d8bef9SDimitry Andric   bool hasAtomicCSub() const {
587e8d8bef9SDimitry Andric     return GFX10_BEncoding;
588e8d8bef9SDimitry Andric   }
589e8d8bef9SDimitry Andric 
590e8d8bef9SDimitry Andric   bool hasMultiDwordFlatScratchAddressing() const {
591e8d8bef9SDimitry Andric     return getGeneration() >= GFX9;
592e8d8bef9SDimitry Andric   }
593e8d8bef9SDimitry Andric 
594e8d8bef9SDimitry Andric   bool hasFlatSegmentOffsetBug() const {
595e8d8bef9SDimitry Andric     return HasFlatSegmentOffsetBug;
596e8d8bef9SDimitry Andric   }
597e8d8bef9SDimitry Andric 
598e8d8bef9SDimitry Andric   bool hasFlatLgkmVMemCountInOrder() const {
599e8d8bef9SDimitry Andric     return getGeneration() > GFX9;
600e8d8bef9SDimitry Andric   }
601e8d8bef9SDimitry Andric 
602e8d8bef9SDimitry Andric   bool hasD16LoadStore() const {
603e8d8bef9SDimitry Andric     return getGeneration() >= GFX9;
604e8d8bef9SDimitry Andric   }
605e8d8bef9SDimitry Andric 
606e8d8bef9SDimitry Andric   bool d16PreservesUnusedBits() const {
607e8d8bef9SDimitry Andric     return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
608e8d8bef9SDimitry Andric   }
609e8d8bef9SDimitry Andric 
610e8d8bef9SDimitry Andric   bool hasD16Images() const {
611e8d8bef9SDimitry Andric     return getGeneration() >= VOLCANIC_ISLANDS;
612e8d8bef9SDimitry Andric   }
613e8d8bef9SDimitry Andric 
614e8d8bef9SDimitry Andric   /// Return if most LDS instructions have an m0 use that require m0 to be
615e8d8bef9SDimitry Andric   /// iniitalized.
616e8d8bef9SDimitry Andric   bool ldsRequiresM0Init() const {
617e8d8bef9SDimitry Andric     return getGeneration() < GFX9;
618e8d8bef9SDimitry Andric   }
619e8d8bef9SDimitry Andric 
620e8d8bef9SDimitry Andric   // True if the hardware rewinds and replays GWS operations if a wave is
621e8d8bef9SDimitry Andric   // preempted.
622e8d8bef9SDimitry Andric   //
623e8d8bef9SDimitry Andric   // If this is false, a GWS operation requires testing if a nack set the
624e8d8bef9SDimitry Andric   // MEM_VIOL bit, and repeating if so.
625e8d8bef9SDimitry Andric   bool hasGWSAutoReplay() const {
626e8d8bef9SDimitry Andric     return getGeneration() >= GFX9;
627e8d8bef9SDimitry Andric   }
628e8d8bef9SDimitry Andric 
629e8d8bef9SDimitry Andric   /// \returns if target has ds_gws_sema_release_all instruction.
630e8d8bef9SDimitry Andric   bool hasGWSSemaReleaseAll() const {
631e8d8bef9SDimitry Andric     return CIInsts;
632e8d8bef9SDimitry Andric   }
633e8d8bef9SDimitry Andric 
634e8d8bef9SDimitry Andric   /// \returns true if the target has integer add/sub instructions that do not
635e8d8bef9SDimitry Andric   /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
636e8d8bef9SDimitry Andric   /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
637e8d8bef9SDimitry Andric   /// for saturation.
638e8d8bef9SDimitry Andric   bool hasAddNoCarry() const {
639e8d8bef9SDimitry Andric     return AddNoCarryInsts;
640e8d8bef9SDimitry Andric   }
641e8d8bef9SDimitry Andric 
642e8d8bef9SDimitry Andric   bool hasUnpackedD16VMem() const {
643e8d8bef9SDimitry Andric     return HasUnpackedD16VMem;
644e8d8bef9SDimitry Andric   }
645e8d8bef9SDimitry Andric 
646e8d8bef9SDimitry Andric   // Covers VS/PS/CS graphics shaders
647e8d8bef9SDimitry Andric   bool isMesaGfxShader(const Function &F) const {
648e8d8bef9SDimitry Andric     return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
649e8d8bef9SDimitry Andric   }
650e8d8bef9SDimitry Andric 
651e8d8bef9SDimitry Andric   bool hasMad64_32() const {
652e8d8bef9SDimitry Andric     return getGeneration() >= SEA_ISLANDS;
653e8d8bef9SDimitry Andric   }
654e8d8bef9SDimitry Andric 
655e8d8bef9SDimitry Andric   bool hasSDWAOmod() const {
656e8d8bef9SDimitry Andric     return HasSDWAOmod;
657e8d8bef9SDimitry Andric   }
658e8d8bef9SDimitry Andric 
659e8d8bef9SDimitry Andric   bool hasSDWAScalar() const {
660e8d8bef9SDimitry Andric     return HasSDWAScalar;
661e8d8bef9SDimitry Andric   }
662e8d8bef9SDimitry Andric 
663e8d8bef9SDimitry Andric   bool hasSDWASdst() const {
664e8d8bef9SDimitry Andric     return HasSDWASdst;
665e8d8bef9SDimitry Andric   }
666e8d8bef9SDimitry Andric 
667e8d8bef9SDimitry Andric   bool hasSDWAMac() const {
668e8d8bef9SDimitry Andric     return HasSDWAMac;
669e8d8bef9SDimitry Andric   }
670e8d8bef9SDimitry Andric 
671e8d8bef9SDimitry Andric   bool hasSDWAOutModsVOPC() const {
672e8d8bef9SDimitry Andric     return HasSDWAOutModsVOPC;
673e8d8bef9SDimitry Andric   }
674e8d8bef9SDimitry Andric 
675e8d8bef9SDimitry Andric   bool hasDLInsts() const {
676e8d8bef9SDimitry Andric     return HasDLInsts;
677e8d8bef9SDimitry Andric   }
678e8d8bef9SDimitry Andric 
679e8d8bef9SDimitry Andric   bool hasDot1Insts() const {
680e8d8bef9SDimitry Andric     return HasDot1Insts;
681e8d8bef9SDimitry Andric   }
682e8d8bef9SDimitry Andric 
683e8d8bef9SDimitry Andric   bool hasDot2Insts() const {
684e8d8bef9SDimitry Andric     return HasDot2Insts;
685e8d8bef9SDimitry Andric   }
686e8d8bef9SDimitry Andric 
687e8d8bef9SDimitry Andric   bool hasDot3Insts() const {
688e8d8bef9SDimitry Andric     return HasDot3Insts;
689e8d8bef9SDimitry Andric   }
690e8d8bef9SDimitry Andric 
691e8d8bef9SDimitry Andric   bool hasDot4Insts() const {
692e8d8bef9SDimitry Andric     return HasDot4Insts;
693e8d8bef9SDimitry Andric   }
694e8d8bef9SDimitry Andric 
695e8d8bef9SDimitry Andric   bool hasDot5Insts() const {
696e8d8bef9SDimitry Andric     return HasDot5Insts;
697e8d8bef9SDimitry Andric   }
698e8d8bef9SDimitry Andric 
699e8d8bef9SDimitry Andric   bool hasDot6Insts() const {
700e8d8bef9SDimitry Andric     return HasDot6Insts;
701e8d8bef9SDimitry Andric   }
702e8d8bef9SDimitry Andric 
703*fe6060f1SDimitry Andric   bool hasDot7Insts() const {
704*fe6060f1SDimitry Andric     return HasDot7Insts;
705*fe6060f1SDimitry Andric   }
706*fe6060f1SDimitry Andric 
707e8d8bef9SDimitry Andric   bool hasMAIInsts() const {
708e8d8bef9SDimitry Andric     return HasMAIInsts;
709e8d8bef9SDimitry Andric   }
710e8d8bef9SDimitry Andric 
711e8d8bef9SDimitry Andric   bool hasPkFmacF16Inst() const {
712e8d8bef9SDimitry Andric     return HasPkFmacF16Inst;
713e8d8bef9SDimitry Andric   }
714e8d8bef9SDimitry Andric 
715e8d8bef9SDimitry Andric   bool hasAtomicFaddInsts() const {
716e8d8bef9SDimitry Andric     return HasAtomicFaddInsts;
717e8d8bef9SDimitry Andric   }
718e8d8bef9SDimitry Andric 
719e8d8bef9SDimitry Andric   bool hasNoSdstCMPX() const {
720e8d8bef9SDimitry Andric     return HasNoSdstCMPX;
721e8d8bef9SDimitry Andric   }
722e8d8bef9SDimitry Andric 
723e8d8bef9SDimitry Andric   bool hasVscnt() const {
724e8d8bef9SDimitry Andric     return HasVscnt;
725e8d8bef9SDimitry Andric   }
726e8d8bef9SDimitry Andric 
727e8d8bef9SDimitry Andric   bool hasGetWaveIdInst() const {
728e8d8bef9SDimitry Andric     return HasGetWaveIdInst;
729e8d8bef9SDimitry Andric   }
730e8d8bef9SDimitry Andric 
731e8d8bef9SDimitry Andric   bool hasSMemTimeInst() const {
732e8d8bef9SDimitry Andric     return HasSMemTimeInst;
733e8d8bef9SDimitry Andric   }
734e8d8bef9SDimitry Andric 
735*fe6060f1SDimitry Andric   bool hasShaderCyclesRegister() const {
736*fe6060f1SDimitry Andric     return HasShaderCyclesRegister;
737*fe6060f1SDimitry Andric   }
738*fe6060f1SDimitry Andric 
739e8d8bef9SDimitry Andric   bool hasRegisterBanking() const {
740e8d8bef9SDimitry Andric     return HasRegisterBanking;
741e8d8bef9SDimitry Andric   }
742e8d8bef9SDimitry Andric 
743e8d8bef9SDimitry Andric   bool hasVOP3Literal() const {
744e8d8bef9SDimitry Andric     return HasVOP3Literal;
745e8d8bef9SDimitry Andric   }
746e8d8bef9SDimitry Andric 
747e8d8bef9SDimitry Andric   bool hasNoDataDepHazard() const {
748e8d8bef9SDimitry Andric     return HasNoDataDepHazard;
749e8d8bef9SDimitry Andric   }
750e8d8bef9SDimitry Andric 
751e8d8bef9SDimitry Andric   bool vmemWriteNeedsExpWaitcnt() const {
752e8d8bef9SDimitry Andric     return getGeneration() < SEA_ISLANDS;
753e8d8bef9SDimitry Andric   }
754e8d8bef9SDimitry Andric 
755e8d8bef9SDimitry Andric   // Scratch is allocated in 256 dword per wave blocks for the entire
756e8d8bef9SDimitry Andric   // wavefront. When viewed from the perspecive of an arbitrary workitem, this
757e8d8bef9SDimitry Andric   // is 4-byte aligned.
758e8d8bef9SDimitry Andric   //
759e8d8bef9SDimitry Andric   // Only 4-byte alignment is really needed to access anything. Transformations
760e8d8bef9SDimitry Andric   // on the pointer value itself may rely on the alignment / known low bits of
761e8d8bef9SDimitry Andric   // the pointer. Set this to something above the minimum to avoid needing
762e8d8bef9SDimitry Andric   // dynamic realignment in common cases.
763e8d8bef9SDimitry Andric   Align getStackAlignment() const { return Align(16); }
764e8d8bef9SDimitry Andric 
765e8d8bef9SDimitry Andric   bool enableMachineScheduler() const override {
766e8d8bef9SDimitry Andric     return true;
767e8d8bef9SDimitry Andric   }
768e8d8bef9SDimitry Andric 
769e8d8bef9SDimitry Andric   bool useAA() const override;
770e8d8bef9SDimitry Andric 
771e8d8bef9SDimitry Andric   bool enableSubRegLiveness() const override {
772e8d8bef9SDimitry Andric     return true;
773e8d8bef9SDimitry Andric   }
774e8d8bef9SDimitry Andric 
775e8d8bef9SDimitry Andric   void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
776e8d8bef9SDimitry Andric   bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
777e8d8bef9SDimitry Andric 
778e8d8bef9SDimitry Andric   // static wrappers
779e8d8bef9SDimitry Andric   static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
780e8d8bef9SDimitry Andric 
781e8d8bef9SDimitry Andric   // XXX - Why is this here if it isn't in the default pass set?
782e8d8bef9SDimitry Andric   bool enableEarlyIfConversion() const override {
783e8d8bef9SDimitry Andric     return true;
784e8d8bef9SDimitry Andric   }
785e8d8bef9SDimitry Andric 
786e8d8bef9SDimitry Andric   bool enableFlatScratch() const;
787e8d8bef9SDimitry Andric 
788e8d8bef9SDimitry Andric   void overrideSchedPolicy(MachineSchedPolicy &Policy,
789e8d8bef9SDimitry Andric                            unsigned NumRegionInstrs) const override;
790e8d8bef9SDimitry Andric 
791e8d8bef9SDimitry Andric   unsigned getMaxNumUserSGPRs() const {
792e8d8bef9SDimitry Andric     return 16;
793e8d8bef9SDimitry Andric   }
794e8d8bef9SDimitry Andric 
795e8d8bef9SDimitry Andric   bool hasSMemRealTime() const {
796e8d8bef9SDimitry Andric     return HasSMemRealTime;
797e8d8bef9SDimitry Andric   }
798e8d8bef9SDimitry Andric 
799e8d8bef9SDimitry Andric   bool hasMovrel() const {
800e8d8bef9SDimitry Andric     return HasMovrel;
801e8d8bef9SDimitry Andric   }
802e8d8bef9SDimitry Andric 
803e8d8bef9SDimitry Andric   bool hasVGPRIndexMode() const {
804e8d8bef9SDimitry Andric     return HasVGPRIndexMode;
805e8d8bef9SDimitry Andric   }
806e8d8bef9SDimitry Andric 
807e8d8bef9SDimitry Andric   bool useVGPRIndexMode() const;
808e8d8bef9SDimitry Andric 
809e8d8bef9SDimitry Andric   bool hasScalarCompareEq64() const {
810e8d8bef9SDimitry Andric     return getGeneration() >= VOLCANIC_ISLANDS;
811e8d8bef9SDimitry Andric   }
812e8d8bef9SDimitry Andric 
813e8d8bef9SDimitry Andric   bool hasScalarStores() const {
814e8d8bef9SDimitry Andric     return HasScalarStores;
815e8d8bef9SDimitry Andric   }
816e8d8bef9SDimitry Andric 
817e8d8bef9SDimitry Andric   bool hasScalarAtomics() const {
818e8d8bef9SDimitry Andric     return HasScalarAtomics;
819e8d8bef9SDimitry Andric   }
820e8d8bef9SDimitry Andric 
821e8d8bef9SDimitry Andric   bool hasLDSFPAtomics() const {
822e8d8bef9SDimitry Andric     return GFX8Insts;
823e8d8bef9SDimitry Andric   }
824e8d8bef9SDimitry Andric 
825*fe6060f1SDimitry Andric   /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
826*fe6060f1SDimitry Andric   bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
827*fe6060f1SDimitry Andric 
828e8d8bef9SDimitry Andric   bool hasDPP() const {
829e8d8bef9SDimitry Andric     return HasDPP;
830e8d8bef9SDimitry Andric   }
831e8d8bef9SDimitry Andric 
832e8d8bef9SDimitry Andric   bool hasDPPBroadcasts() const {
833e8d8bef9SDimitry Andric     return HasDPP && getGeneration() < GFX10;
834e8d8bef9SDimitry Andric   }
835e8d8bef9SDimitry Andric 
836e8d8bef9SDimitry Andric   bool hasDPPWavefrontShifts() const {
837e8d8bef9SDimitry Andric     return HasDPP && getGeneration() < GFX10;
838e8d8bef9SDimitry Andric   }
839e8d8bef9SDimitry Andric 
840e8d8bef9SDimitry Andric   bool hasDPP8() const {
841e8d8bef9SDimitry Andric     return HasDPP8;
842e8d8bef9SDimitry Andric   }
843e8d8bef9SDimitry Andric 
844*fe6060f1SDimitry Andric   bool has64BitDPP() const {
845*fe6060f1SDimitry Andric     return Has64BitDPP;
846*fe6060f1SDimitry Andric   }
847*fe6060f1SDimitry Andric 
848*fe6060f1SDimitry Andric   bool hasPackedFP32Ops() const {
849*fe6060f1SDimitry Andric     return HasPackedFP32Ops;
850*fe6060f1SDimitry Andric   }
851*fe6060f1SDimitry Andric 
852*fe6060f1SDimitry Andric   bool hasFmaakFmamkF32Insts() const {
853*fe6060f1SDimitry Andric     return getGeneration() >= GFX10;
854*fe6060f1SDimitry Andric   }
855*fe6060f1SDimitry Andric 
856*fe6060f1SDimitry Andric   bool hasExtendedImageInsts() const {
857*fe6060f1SDimitry Andric     return HasExtendedImageInsts;
858*fe6060f1SDimitry Andric   }
859*fe6060f1SDimitry Andric 
860e8d8bef9SDimitry Andric   bool hasR128A16() const {
861e8d8bef9SDimitry Andric     return HasR128A16;
862e8d8bef9SDimitry Andric   }
863e8d8bef9SDimitry Andric 
864e8d8bef9SDimitry Andric   bool hasGFX10A16() const {
865e8d8bef9SDimitry Andric     return HasGFX10A16;
866e8d8bef9SDimitry Andric   }
867e8d8bef9SDimitry Andric 
868e8d8bef9SDimitry Andric   bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
869e8d8bef9SDimitry Andric 
870e8d8bef9SDimitry Andric   bool hasG16() const { return HasG16; }
871e8d8bef9SDimitry Andric 
872e8d8bef9SDimitry Andric   bool hasOffset3fBug() const {
873e8d8bef9SDimitry Andric     return HasOffset3fBug;
874e8d8bef9SDimitry Andric   }
875e8d8bef9SDimitry Andric 
876e8d8bef9SDimitry Andric   bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
877e8d8bef9SDimitry Andric 
878e8d8bef9SDimitry Andric   bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }
879e8d8bef9SDimitry Andric 
880e8d8bef9SDimitry Andric   bool hasNSAEncoding() const { return HasNSAEncoding; }
881e8d8bef9SDimitry Andric 
882*fe6060f1SDimitry Andric   unsigned getNSAMaxSize() const { return NSAMaxSize; }
883*fe6060f1SDimitry Andric 
884*fe6060f1SDimitry Andric   bool hasGFX10_AEncoding() const {
885*fe6060f1SDimitry Andric     return GFX10_AEncoding;
886*fe6060f1SDimitry Andric   }
887*fe6060f1SDimitry Andric 
888e8d8bef9SDimitry Andric   bool hasGFX10_BEncoding() const {
889e8d8bef9SDimitry Andric     return GFX10_BEncoding;
890e8d8bef9SDimitry Andric   }
891e8d8bef9SDimitry Andric 
892e8d8bef9SDimitry Andric   bool hasGFX10_3Insts() const {
893e8d8bef9SDimitry Andric     return GFX10_3Insts;
894e8d8bef9SDimitry Andric   }
895e8d8bef9SDimitry Andric 
896e8d8bef9SDimitry Andric   bool hasMadF16() const;
897e8d8bef9SDimitry Andric 
898e8d8bef9SDimitry Andric   bool enableSIScheduler() const {
899e8d8bef9SDimitry Andric     return EnableSIScheduler;
900e8d8bef9SDimitry Andric   }
901e8d8bef9SDimitry Andric 
902e8d8bef9SDimitry Andric   bool loadStoreOptEnabled() const {
903e8d8bef9SDimitry Andric     return EnableLoadStoreOpt;
904e8d8bef9SDimitry Andric   }
905e8d8bef9SDimitry Andric 
906e8d8bef9SDimitry Andric   bool hasSGPRInitBug() const {
907e8d8bef9SDimitry Andric     return SGPRInitBug;
908e8d8bef9SDimitry Andric   }
909e8d8bef9SDimitry Andric 
910*fe6060f1SDimitry Andric   bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }
911*fe6060f1SDimitry Andric 
912*fe6060f1SDimitry Andric   bool hasNegativeUnalignedScratchOffsetBug() const {
913*fe6060f1SDimitry Andric     return NegativeUnalignedScratchOffsetBug;
914*fe6060f1SDimitry Andric   }
915*fe6060f1SDimitry Andric 
916e8d8bef9SDimitry Andric   bool hasMFMAInlineLiteralBug() const {
917e8d8bef9SDimitry Andric     return HasMFMAInlineLiteralBug;
918e8d8bef9SDimitry Andric   }
919e8d8bef9SDimitry Andric 
920e8d8bef9SDimitry Andric   bool has12DWordStoreHazard() const {
921e8d8bef9SDimitry Andric     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
922e8d8bef9SDimitry Andric   }
923e8d8bef9SDimitry Andric 
924e8d8bef9SDimitry Andric   // \returns true if the subtarget supports DWORDX3 load/store instructions.
925e8d8bef9SDimitry Andric   bool hasDwordx3LoadStores() const {
926e8d8bef9SDimitry Andric     return CIInsts;
927e8d8bef9SDimitry Andric   }
928e8d8bef9SDimitry Andric 
929e8d8bef9SDimitry Andric   bool hasReadM0MovRelInterpHazard() const {
930e8d8bef9SDimitry Andric     return getGeneration() == AMDGPUSubtarget::GFX9;
931e8d8bef9SDimitry Andric   }
932e8d8bef9SDimitry Andric 
933e8d8bef9SDimitry Andric   bool hasReadM0SendMsgHazard() const {
934e8d8bef9SDimitry Andric     return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
935e8d8bef9SDimitry Andric            getGeneration() <= AMDGPUSubtarget::GFX9;
936e8d8bef9SDimitry Andric   }
937e8d8bef9SDimitry Andric 
938e8d8bef9SDimitry Andric   bool hasVcmpxPermlaneHazard() const {
939e8d8bef9SDimitry Andric     return HasVcmpxPermlaneHazard;
940e8d8bef9SDimitry Andric   }
941e8d8bef9SDimitry Andric 
942e8d8bef9SDimitry Andric   bool hasVMEMtoScalarWriteHazard() const {
943e8d8bef9SDimitry Andric     return HasVMEMtoScalarWriteHazard;
944e8d8bef9SDimitry Andric   }
945e8d8bef9SDimitry Andric 
946e8d8bef9SDimitry Andric   bool hasSMEMtoVectorWriteHazard() const {
947e8d8bef9SDimitry Andric     return HasSMEMtoVectorWriteHazard;
948e8d8bef9SDimitry Andric   }
949e8d8bef9SDimitry Andric 
950e8d8bef9SDimitry Andric   bool hasLDSMisalignedBug() const {
951e8d8bef9SDimitry Andric     return LDSMisalignedBug && !EnableCuMode;
952e8d8bef9SDimitry Andric   }
953e8d8bef9SDimitry Andric 
954e8d8bef9SDimitry Andric   bool hasInstFwdPrefetchBug() const {
955e8d8bef9SDimitry Andric     return HasInstFwdPrefetchBug;
956e8d8bef9SDimitry Andric   }
957e8d8bef9SDimitry Andric 
958e8d8bef9SDimitry Andric   bool hasVcmpxExecWARHazard() const {
959e8d8bef9SDimitry Andric     return HasVcmpxExecWARHazard;
960e8d8bef9SDimitry Andric   }
961e8d8bef9SDimitry Andric 
962e8d8bef9SDimitry Andric   bool hasLdsBranchVmemWARHazard() const {
963e8d8bef9SDimitry Andric     return HasLdsBranchVmemWARHazard;
964e8d8bef9SDimitry Andric   }
965e8d8bef9SDimitry Andric 
966e8d8bef9SDimitry Andric   bool hasNSAtoVMEMBug() const {
967e8d8bef9SDimitry Andric     return HasNSAtoVMEMBug;
968e8d8bef9SDimitry Andric   }
969e8d8bef9SDimitry Andric 
970*fe6060f1SDimitry Andric   bool hasNSAClauseBug() const { return HasNSAClauseBug; }
971*fe6060f1SDimitry Andric 
972e8d8bef9SDimitry Andric   bool hasHardClauses() const { return getGeneration() >= GFX10; }
973e8d8bef9SDimitry Andric 
974*fe6060f1SDimitry Andric   bool hasGFX90AInsts() const { return GFX90AInsts; }
975*fe6060f1SDimitry Andric 
976*fe6060f1SDimitry Andric   /// Return if operations acting on VGPR tuples require even alignment.
977*fe6060f1SDimitry Andric   bool needsAlignedVGPRs() const { return GFX90AInsts; }
978*fe6060f1SDimitry Andric 
979*fe6060f1SDimitry Andric   bool hasPackedTID() const { return HasPackedTID; }
980*fe6060f1SDimitry Andric 
981e8d8bef9SDimitry Andric   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
982e8d8bef9SDimitry Andric   /// SGPRs
983e8d8bef9SDimitry Andric   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
984e8d8bef9SDimitry Andric 
985e8d8bef9SDimitry Andric   /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
986e8d8bef9SDimitry Andric   /// VGPRs
987e8d8bef9SDimitry Andric   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
988e8d8bef9SDimitry Andric 
989e8d8bef9SDimitry Andric   /// Return occupancy for the given function. Used LDS and a number of
990e8d8bef9SDimitry Andric   /// registers if provided.
991e8d8bef9SDimitry Andric   /// Note, occupancy can be affected by the scratch allocation as well, but
992e8d8bef9SDimitry Andric   /// we do not have enough information to compute it.
993e8d8bef9SDimitry Andric   unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
994e8d8bef9SDimitry Andric                             unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
995e8d8bef9SDimitry Andric 
996e8d8bef9SDimitry Andric   /// \returns true if the flat_scratch register should be initialized with the
997e8d8bef9SDimitry Andric   /// pointer to the wave's scratch memory rather than a size and offset.
998e8d8bef9SDimitry Andric   bool flatScratchIsPointer() const {
999e8d8bef9SDimitry Andric     return getGeneration() >= AMDGPUSubtarget::GFX9;
1000e8d8bef9SDimitry Andric   }
1001e8d8bef9SDimitry Andric 
1002*fe6060f1SDimitry Andric   /// \returns true if the flat_scratch register is initialized by the HW.
1003*fe6060f1SDimitry Andric   /// In this case it is readonly.
1004*fe6060f1SDimitry Andric   bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; }
1005*fe6060f1SDimitry Andric 
1006e8d8bef9SDimitry Andric   /// \returns true if the machine has merged shaders in which s0-s7 are
1007e8d8bef9SDimitry Andric   /// reserved by the hardware and user SGPRs start at s8
1008e8d8bef9SDimitry Andric   bool hasMergedShaders() const {
1009e8d8bef9SDimitry Andric     return getGeneration() >= GFX9;
1010e8d8bef9SDimitry Andric   }
1011e8d8bef9SDimitry Andric 
1012e8d8bef9SDimitry Andric   /// \returns SGPR allocation granularity supported by the subtarget.
1013e8d8bef9SDimitry Andric   unsigned getSGPRAllocGranule() const {
1014e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
1015e8d8bef9SDimitry Andric   }
1016e8d8bef9SDimitry Andric 
1017e8d8bef9SDimitry Andric   /// \returns SGPR encoding granularity supported by the subtarget.
1018e8d8bef9SDimitry Andric   unsigned getSGPREncodingGranule() const {
1019e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
1020e8d8bef9SDimitry Andric   }
1021e8d8bef9SDimitry Andric 
1022e8d8bef9SDimitry Andric   /// \returns Total number of SGPRs supported by the subtarget.
1023e8d8bef9SDimitry Andric   unsigned getTotalNumSGPRs() const {
1024e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1025e8d8bef9SDimitry Andric   }
1026e8d8bef9SDimitry Andric 
1027e8d8bef9SDimitry Andric   /// \returns Addressable number of SGPRs supported by the subtarget.
1028e8d8bef9SDimitry Andric   unsigned getAddressableNumSGPRs() const {
1029e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
1030e8d8bef9SDimitry Andric   }
1031e8d8bef9SDimitry Andric 
1032e8d8bef9SDimitry Andric   /// \returns Minimum number of SGPRs that meets the given number of waves per
1033e8d8bef9SDimitry Andric   /// execution unit requirement supported by the subtarget.
1034e8d8bef9SDimitry Andric   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1035e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1036e8d8bef9SDimitry Andric   }
1037e8d8bef9SDimitry Andric 
1038e8d8bef9SDimitry Andric   /// \returns Maximum number of SGPRs that meets the given number of waves per
1039e8d8bef9SDimitry Andric   /// execution unit requirement supported by the subtarget.
1040e8d8bef9SDimitry Andric   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1041e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1042e8d8bef9SDimitry Andric   }
1043e8d8bef9SDimitry Andric 
1044*fe6060f1SDimitry Andric   /// \returns Reserved number of SGPRs. This is common
1045*fe6060f1SDimitry Andric   /// utility function called by MachineFunction and
1046*fe6060f1SDimitry Andric   /// Function variants of getReservedNumSGPRs.
1047*fe6060f1SDimitry Andric   unsigned getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const;
1048*fe6060f1SDimitry Andric   /// \returns Reserved number of SGPRs for given machine function \p MF.
1049e8d8bef9SDimitry Andric   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1050e8d8bef9SDimitry Andric 
1051*fe6060f1SDimitry Andric   /// \returns Reserved number of SGPRs for given function \p F.
1052*fe6060f1SDimitry Andric   unsigned getReservedNumSGPRs(const Function &F) const;
1053*fe6060f1SDimitry Andric 
1054*fe6060f1SDimitry Andric   /// \returns max num SGPRs. This is the common utility
1055*fe6060f1SDimitry Andric   /// function called by MachineFunction and Function
1056*fe6060f1SDimitry Andric   /// variants of getMaxNumSGPRs.
1057*fe6060f1SDimitry Andric   unsigned getBaseMaxNumSGPRs(const Function &F,
1058*fe6060f1SDimitry Andric                               std::pair<unsigned, unsigned> WavesPerEU,
1059*fe6060f1SDimitry Andric                               unsigned PreloadedSGPRs,
1060*fe6060f1SDimitry Andric                               unsigned ReservedNumSGPRs) const;
1061*fe6060f1SDimitry Andric 
1062e8d8bef9SDimitry Andric   /// \returns Maximum number of SGPRs that meets number of waves per execution
1063e8d8bef9SDimitry Andric   /// unit requirement for function \p MF, or number of SGPRs explicitly
1064e8d8bef9SDimitry Andric   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1065e8d8bef9SDimitry Andric   ///
1066e8d8bef9SDimitry Andric   /// \returns Value that meets number of waves per execution unit requirement
1067e8d8bef9SDimitry Andric   /// if explicitly requested value cannot be converted to integer, violates
1068e8d8bef9SDimitry Andric   /// subtarget's specifications, or does not meet number of waves per execution
1069e8d8bef9SDimitry Andric   /// unit requirement.
1070e8d8bef9SDimitry Andric   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1071e8d8bef9SDimitry Andric 
1072*fe6060f1SDimitry Andric   /// \returns Maximum number of SGPRs that meets number of waves per execution
1073*fe6060f1SDimitry Andric   /// unit requirement for function \p F, or number of SGPRs explicitly
1074*fe6060f1SDimitry Andric   /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1075*fe6060f1SDimitry Andric   ///
1076*fe6060f1SDimitry Andric   /// \returns Value that meets number of waves per execution unit requirement
1077*fe6060f1SDimitry Andric   /// if explicitly requested value cannot be converted to integer, violates
1078*fe6060f1SDimitry Andric   /// subtarget's specifications, or does not meet number of waves per execution
1079*fe6060f1SDimitry Andric   /// unit requirement.
1080*fe6060f1SDimitry Andric   unsigned getMaxNumSGPRs(const Function &F) const;
1081*fe6060f1SDimitry Andric 
1082e8d8bef9SDimitry Andric   /// \returns VGPR allocation granularity supported by the subtarget.
1083e8d8bef9SDimitry Andric   unsigned getVGPRAllocGranule() const {
1084e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
1085e8d8bef9SDimitry Andric   }
1086e8d8bef9SDimitry Andric 
1087e8d8bef9SDimitry Andric   /// \returns VGPR encoding granularity supported by the subtarget.
1088e8d8bef9SDimitry Andric   unsigned getVGPREncodingGranule() const {
1089e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
1090e8d8bef9SDimitry Andric   }
1091e8d8bef9SDimitry Andric 
1092e8d8bef9SDimitry Andric   /// \returns Total number of VGPRs supported by the subtarget.
1093e8d8bef9SDimitry Andric   unsigned getTotalNumVGPRs() const {
1094e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1095e8d8bef9SDimitry Andric   }
1096e8d8bef9SDimitry Andric 
1097e8d8bef9SDimitry Andric   /// \returns Addressable number of VGPRs supported by the subtarget.
1098e8d8bef9SDimitry Andric   unsigned getAddressableNumVGPRs() const {
1099e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
1100e8d8bef9SDimitry Andric   }
1101e8d8bef9SDimitry Andric 
1102e8d8bef9SDimitry Andric   /// \returns Minimum number of VGPRs that meets given number of waves per
1103e8d8bef9SDimitry Andric   /// execution unit requirement supported by the subtarget.
1104e8d8bef9SDimitry Andric   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1105e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1106e8d8bef9SDimitry Andric   }
1107e8d8bef9SDimitry Andric 
1108e8d8bef9SDimitry Andric   /// \returns Maximum number of VGPRs that meets given number of waves per
1109e8d8bef9SDimitry Andric   /// execution unit requirement supported by the subtarget.
1110e8d8bef9SDimitry Andric   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1111e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1112e8d8bef9SDimitry Andric   }
1113e8d8bef9SDimitry Andric 
1114*fe6060f1SDimitry Andric   /// \returns max num VGPRs. This is the common utility function
1115*fe6060f1SDimitry Andric   /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1116*fe6060f1SDimitry Andric   unsigned getBaseMaxNumVGPRs(const Function &F,
1117*fe6060f1SDimitry Andric                               std::pair<unsigned, unsigned> WavesPerEU) const;
1118*fe6060f1SDimitry Andric   /// \returns Maximum number of VGPRs that meets number of waves per execution
1119*fe6060f1SDimitry Andric   /// unit requirement for function \p F, or number of VGPRs explicitly
1120*fe6060f1SDimitry Andric   /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1121*fe6060f1SDimitry Andric   ///
1122*fe6060f1SDimitry Andric   /// \returns Value that meets number of waves per execution unit requirement
1123*fe6060f1SDimitry Andric   /// if explicitly requested value cannot be converted to integer, violates
1124*fe6060f1SDimitry Andric   /// subtarget's specifications, or does not meet number of waves per execution
1125*fe6060f1SDimitry Andric   /// unit requirement.
1126*fe6060f1SDimitry Andric   unsigned getMaxNumVGPRs(const Function &F) const;
1127*fe6060f1SDimitry Andric 
1128e8d8bef9SDimitry Andric   /// \returns Maximum number of VGPRs that meets number of waves per execution
1129e8d8bef9SDimitry Andric   /// unit requirement for function \p MF, or number of VGPRs explicitly
1130e8d8bef9SDimitry Andric   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1131e8d8bef9SDimitry Andric   ///
1132e8d8bef9SDimitry Andric   /// \returns Value that meets number of waves per execution unit requirement
1133e8d8bef9SDimitry Andric   /// if explicitly requested value cannot be converted to integer, violates
1134e8d8bef9SDimitry Andric   /// subtarget's specifications, or does not meet number of waves per execution
1135e8d8bef9SDimitry Andric   /// unit requirement.
1136e8d8bef9SDimitry Andric   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1137e8d8bef9SDimitry Andric 
1138e8d8bef9SDimitry Andric   void getPostRAMutations(
1139e8d8bef9SDimitry Andric       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1140e8d8bef9SDimitry Andric       const override;
1141e8d8bef9SDimitry Andric 
1142e8d8bef9SDimitry Andric   bool isWave32() const {
1143e8d8bef9SDimitry Andric     return getWavefrontSize() == 32;
1144e8d8bef9SDimitry Andric   }
1145e8d8bef9SDimitry Andric 
1146e8d8bef9SDimitry Andric   bool isWave64() const {
1147e8d8bef9SDimitry Andric     return getWavefrontSize() == 64;
1148e8d8bef9SDimitry Andric   }
1149e8d8bef9SDimitry Andric 
1150e8d8bef9SDimitry Andric   const TargetRegisterClass *getBoolRC() const {
1151e8d8bef9SDimitry Andric     return getRegisterInfo()->getBoolRC();
1152e8d8bef9SDimitry Andric   }
1153e8d8bef9SDimitry Andric 
1154e8d8bef9SDimitry Andric   /// \returns Maximum number of work groups per compute unit supported by the
1155e8d8bef9SDimitry Andric   /// subtarget and limited by given \p FlatWorkGroupSize.
1156e8d8bef9SDimitry Andric   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1157e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1158e8d8bef9SDimitry Andric   }
1159e8d8bef9SDimitry Andric 
1160e8d8bef9SDimitry Andric   /// \returns Minimum flat work group size supported by the subtarget.
1161e8d8bef9SDimitry Andric   unsigned getMinFlatWorkGroupSize() const override {
1162e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
1163e8d8bef9SDimitry Andric   }
1164e8d8bef9SDimitry Andric 
1165e8d8bef9SDimitry Andric   /// \returns Maximum flat work group size supported by the subtarget.
1166e8d8bef9SDimitry Andric   unsigned getMaxFlatWorkGroupSize() const override {
1167e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
1168e8d8bef9SDimitry Andric   }
1169e8d8bef9SDimitry Andric 
1170e8d8bef9SDimitry Andric   /// \returns Number of waves per execution unit required to support the given
1171e8d8bef9SDimitry Andric   /// \p FlatWorkGroupSize.
1172e8d8bef9SDimitry Andric   unsigned
1173e8d8bef9SDimitry Andric   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1174e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1175e8d8bef9SDimitry Andric   }
1176e8d8bef9SDimitry Andric 
1177e8d8bef9SDimitry Andric   /// \returns Minimum number of waves per execution unit supported by the
1178e8d8bef9SDimitry Andric   /// subtarget.
1179e8d8bef9SDimitry Andric   unsigned getMinWavesPerEU() const override {
1180e8d8bef9SDimitry Andric     return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1181e8d8bef9SDimitry Andric   }
1182e8d8bef9SDimitry Andric 
1183e8d8bef9SDimitry Andric   void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1184e8d8bef9SDimitry Andric                              SDep &Dep) const override;
1185e8d8bef9SDimitry Andric };
1186e8d8bef9SDimitry Andric 
1187e8d8bef9SDimitry Andric } // end namespace llvm
1188e8d8bef9SDimitry Andric 
1189e8d8bef9SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
1190