xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric // This file implements hazard recognizers for scheduling on GCN processors.
10*0b57cec5SDimitry Andric //
11*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
12*0b57cec5SDimitry Andric 
13*0b57cec5SDimitry Andric #include "GCNHazardRecognizer.h"
14*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h"
15*0b57cec5SDimitry Andric #include "SIDefines.h"
16*0b57cec5SDimitry Andric #include "SIInstrInfo.h"
17*0b57cec5SDimitry Andric #include "SIRegisterInfo.h"
18*0b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19*0b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
20*0b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h"
21*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
22*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
23*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
24*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
25*0b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h"
26*0b57cec5SDimitry Andric #include "llvm/MC/MCInstrDesc.h"
27*0b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
28*0b57cec5SDimitry Andric #include <algorithm>
29*0b57cec5SDimitry Andric #include <cassert>
30*0b57cec5SDimitry Andric #include <limits>
31*0b57cec5SDimitry Andric #include <set>
32*0b57cec5SDimitry Andric #include <vector>
33*0b57cec5SDimitry Andric 
34*0b57cec5SDimitry Andric using namespace llvm;
35*0b57cec5SDimitry Andric 
36*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
37*0b57cec5SDimitry Andric // Hazard Recoginizer Implementation
38*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
39*0b57cec5SDimitry Andric 
40*0b57cec5SDimitry Andric GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
41*0b57cec5SDimitry Andric   IsHazardRecognizerMode(false),
42*0b57cec5SDimitry Andric   CurrCycleInstr(nullptr),
43*0b57cec5SDimitry Andric   MF(MF),
44*0b57cec5SDimitry Andric   ST(MF.getSubtarget<GCNSubtarget>()),
45*0b57cec5SDimitry Andric   TII(*ST.getInstrInfo()),
46*0b57cec5SDimitry Andric   TRI(TII.getRegisterInfo()),
47*0b57cec5SDimitry Andric   ClauseUses(TRI.getNumRegUnits()),
48*0b57cec5SDimitry Andric   ClauseDefs(TRI.getNumRegUnits()) {
49*0b57cec5SDimitry Andric   MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5;
50*0b57cec5SDimitry Andric   TSchedModel.init(&ST);
51*0b57cec5SDimitry Andric }
52*0b57cec5SDimitry Andric 
53*0b57cec5SDimitry Andric void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
54*0b57cec5SDimitry Andric   EmitInstruction(SU->getInstr());
55*0b57cec5SDimitry Andric }
56*0b57cec5SDimitry Andric 
57*0b57cec5SDimitry Andric void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
58*0b57cec5SDimitry Andric   CurrCycleInstr = MI;
59*0b57cec5SDimitry Andric }
60*0b57cec5SDimitry Andric 
61*0b57cec5SDimitry Andric static bool isDivFMas(unsigned Opcode) {
62*0b57cec5SDimitry Andric   return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
63*0b57cec5SDimitry Andric }
64*0b57cec5SDimitry Andric 
65*0b57cec5SDimitry Andric static bool isSGetReg(unsigned Opcode) {
66*0b57cec5SDimitry Andric   return Opcode == AMDGPU::S_GETREG_B32;
67*0b57cec5SDimitry Andric }
68*0b57cec5SDimitry Andric 
69*0b57cec5SDimitry Andric static bool isSSetReg(unsigned Opcode) {
70*0b57cec5SDimitry Andric   return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
71*0b57cec5SDimitry Andric }
72*0b57cec5SDimitry Andric 
73*0b57cec5SDimitry Andric static bool isRWLane(unsigned Opcode) {
74*0b57cec5SDimitry Andric   return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
75*0b57cec5SDimitry Andric }
76*0b57cec5SDimitry Andric 
77*0b57cec5SDimitry Andric static bool isRFE(unsigned Opcode) {
78*0b57cec5SDimitry Andric   return Opcode == AMDGPU::S_RFE_B64;
79*0b57cec5SDimitry Andric }
80*0b57cec5SDimitry Andric 
81*0b57cec5SDimitry Andric static bool isSMovRel(unsigned Opcode) {
82*0b57cec5SDimitry Andric   switch (Opcode) {
83*0b57cec5SDimitry Andric   case AMDGPU::S_MOVRELS_B32:
84*0b57cec5SDimitry Andric   case AMDGPU::S_MOVRELS_B64:
85*0b57cec5SDimitry Andric   case AMDGPU::S_MOVRELD_B32:
86*0b57cec5SDimitry Andric   case AMDGPU::S_MOVRELD_B64:
87*0b57cec5SDimitry Andric     return true;
88*0b57cec5SDimitry Andric   default:
89*0b57cec5SDimitry Andric     return false;
90*0b57cec5SDimitry Andric   }
91*0b57cec5SDimitry Andric }
92*0b57cec5SDimitry Andric 
93*0b57cec5SDimitry Andric static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
94*0b57cec5SDimitry Andric                                     const MachineInstr &MI) {
95*0b57cec5SDimitry Andric   if (TII.isAlwaysGDS(MI.getOpcode()))
96*0b57cec5SDimitry Andric     return true;
97*0b57cec5SDimitry Andric 
98*0b57cec5SDimitry Andric   switch (MI.getOpcode()) {
99*0b57cec5SDimitry Andric   case AMDGPU::S_SENDMSG:
100*0b57cec5SDimitry Andric   case AMDGPU::S_SENDMSGHALT:
101*0b57cec5SDimitry Andric   case AMDGPU::S_TTRACEDATA:
102*0b57cec5SDimitry Andric     return true;
103*0b57cec5SDimitry Andric   // These DS opcodes don't support GDS.
104*0b57cec5SDimitry Andric   case AMDGPU::DS_NOP:
105*0b57cec5SDimitry Andric   case AMDGPU::DS_PERMUTE_B32:
106*0b57cec5SDimitry Andric   case AMDGPU::DS_BPERMUTE_B32:
107*0b57cec5SDimitry Andric     return false;
108*0b57cec5SDimitry Andric   default:
109*0b57cec5SDimitry Andric     if (TII.isDS(MI.getOpcode())) {
110*0b57cec5SDimitry Andric       int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
111*0b57cec5SDimitry Andric                                            AMDGPU::OpName::gds);
112*0b57cec5SDimitry Andric       if (MI.getOperand(GDS).getImm())
113*0b57cec5SDimitry Andric         return true;
114*0b57cec5SDimitry Andric     }
115*0b57cec5SDimitry Andric     return false;
116*0b57cec5SDimitry Andric   }
117*0b57cec5SDimitry Andric }
118*0b57cec5SDimitry Andric 
119*0b57cec5SDimitry Andric static bool isPermlane(const MachineInstr &MI) {
120*0b57cec5SDimitry Andric   unsigned Opcode = MI.getOpcode();
121*0b57cec5SDimitry Andric   return Opcode == AMDGPU::V_PERMLANE16_B32 ||
122*0b57cec5SDimitry Andric          Opcode == AMDGPU::V_PERMLANEX16_B32;
123*0b57cec5SDimitry Andric }
124*0b57cec5SDimitry Andric 
125*0b57cec5SDimitry Andric static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
126*0b57cec5SDimitry Andric   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
127*0b57cec5SDimitry Andric                                                      AMDGPU::OpName::simm16);
128*0b57cec5SDimitry Andric   return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
129*0b57cec5SDimitry Andric }
130*0b57cec5SDimitry Andric 
131*0b57cec5SDimitry Andric ScheduleHazardRecognizer::HazardType
132*0b57cec5SDimitry Andric GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
133*0b57cec5SDimitry Andric   MachineInstr *MI = SU->getInstr();
134*0b57cec5SDimitry Andric   if (MI->isBundle())
135*0b57cec5SDimitry Andric    return NoHazard;
136*0b57cec5SDimitry Andric 
137*0b57cec5SDimitry Andric   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
138*0b57cec5SDimitry Andric     return NoopHazard;
139*0b57cec5SDimitry Andric 
140*0b57cec5SDimitry Andric   // FIXME: Should flat be considered vmem?
141*0b57cec5SDimitry Andric   if ((SIInstrInfo::isVMEM(*MI) ||
142*0b57cec5SDimitry Andric        SIInstrInfo::isFLAT(*MI))
143*0b57cec5SDimitry Andric       && checkVMEMHazards(MI) > 0)
144*0b57cec5SDimitry Andric     return NoopHazard;
145*0b57cec5SDimitry Andric 
146*0b57cec5SDimitry Andric   if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
147*0b57cec5SDimitry Andric     return NoopHazard;
148*0b57cec5SDimitry Andric 
149*0b57cec5SDimitry Andric   if (checkFPAtomicToDenormModeHazard(MI) > 0)
150*0b57cec5SDimitry Andric     return NoopHazard;
151*0b57cec5SDimitry Andric 
152*0b57cec5SDimitry Andric   if (ST.hasNoDataDepHazard())
153*0b57cec5SDimitry Andric     return NoHazard;
154*0b57cec5SDimitry Andric 
155*0b57cec5SDimitry Andric   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
156*0b57cec5SDimitry Andric     return NoopHazard;
157*0b57cec5SDimitry Andric 
158*0b57cec5SDimitry Andric   if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
159*0b57cec5SDimitry Andric     return NoopHazard;
160*0b57cec5SDimitry Andric 
161*0b57cec5SDimitry Andric   if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
162*0b57cec5SDimitry Andric     return NoopHazard;
163*0b57cec5SDimitry Andric 
164*0b57cec5SDimitry Andric   if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
165*0b57cec5SDimitry Andric     return NoopHazard;
166*0b57cec5SDimitry Andric 
167*0b57cec5SDimitry Andric   if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
168*0b57cec5SDimitry Andric     return NoopHazard;
169*0b57cec5SDimitry Andric 
170*0b57cec5SDimitry Andric   if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
171*0b57cec5SDimitry Andric     return NoopHazard;
172*0b57cec5SDimitry Andric 
173*0b57cec5SDimitry Andric   if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
174*0b57cec5SDimitry Andric     return NoopHazard;
175*0b57cec5SDimitry Andric 
176*0b57cec5SDimitry Andric   if (ST.hasReadM0MovRelInterpHazard() &&
177*0b57cec5SDimitry Andric       (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
178*0b57cec5SDimitry Andric       checkReadM0Hazards(MI) > 0)
179*0b57cec5SDimitry Andric     return NoopHazard;
180*0b57cec5SDimitry Andric 
181*0b57cec5SDimitry Andric   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
182*0b57cec5SDimitry Andric       checkReadM0Hazards(MI) > 0)
183*0b57cec5SDimitry Andric     return NoopHazard;
184*0b57cec5SDimitry Andric 
185*0b57cec5SDimitry Andric   if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
186*0b57cec5SDimitry Andric     return NoopHazard;
187*0b57cec5SDimitry Andric 
188*0b57cec5SDimitry Andric   if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0)
189*0b57cec5SDimitry Andric     return NoopHazard;
190*0b57cec5SDimitry Andric 
191*0b57cec5SDimitry Andric   if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
192*0b57cec5SDimitry Andric     return NoopHazard;
193*0b57cec5SDimitry Andric 
194*0b57cec5SDimitry Andric   if (checkAnyInstHazards(MI) > 0)
195*0b57cec5SDimitry Andric     return NoopHazard;
196*0b57cec5SDimitry Andric 
197*0b57cec5SDimitry Andric   return NoHazard;
198*0b57cec5SDimitry Andric }
199*0b57cec5SDimitry Andric 
200*0b57cec5SDimitry Andric static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
201*0b57cec5SDimitry Andric   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
202*0b57cec5SDimitry Andric       .addImm(0);
203*0b57cec5SDimitry Andric }
204*0b57cec5SDimitry Andric 
205*0b57cec5SDimitry Andric void GCNHazardRecognizer::processBundle() {
206*0b57cec5SDimitry Andric   MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
207*0b57cec5SDimitry Andric   MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
208*0b57cec5SDimitry Andric   // Check bundled MachineInstr's for hazards.
209*0b57cec5SDimitry Andric   for (; MI != E && MI->isInsideBundle(); ++MI) {
210*0b57cec5SDimitry Andric     CurrCycleInstr = &*MI;
211*0b57cec5SDimitry Andric     unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
212*0b57cec5SDimitry Andric 
213*0b57cec5SDimitry Andric     if (IsHazardRecognizerMode)
214*0b57cec5SDimitry Andric       fixHazards(CurrCycleInstr);
215*0b57cec5SDimitry Andric 
216*0b57cec5SDimitry Andric     for (unsigned i = 0; i < WaitStates; ++i)
217*0b57cec5SDimitry Andric       insertNoopInBundle(CurrCycleInstr, TII);
218*0b57cec5SDimitry Andric 
219*0b57cec5SDimitry Andric     // It’s unnecessary to track more than MaxLookAhead instructions. Since we
220*0b57cec5SDimitry Andric     // include the bundled MI directly after, only add a maximum of
221*0b57cec5SDimitry Andric     // (MaxLookAhead - 1) noops to EmittedInstrs.
222*0b57cec5SDimitry Andric     for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
223*0b57cec5SDimitry Andric       EmittedInstrs.push_front(nullptr);
224*0b57cec5SDimitry Andric 
225*0b57cec5SDimitry Andric     EmittedInstrs.push_front(CurrCycleInstr);
226*0b57cec5SDimitry Andric     EmittedInstrs.resize(MaxLookAhead);
227*0b57cec5SDimitry Andric   }
228*0b57cec5SDimitry Andric   CurrCycleInstr = nullptr;
229*0b57cec5SDimitry Andric }
230*0b57cec5SDimitry Andric 
231*0b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
232*0b57cec5SDimitry Andric   IsHazardRecognizerMode = false;
233*0b57cec5SDimitry Andric   return PreEmitNoopsCommon(SU->getInstr());
234*0b57cec5SDimitry Andric }
235*0b57cec5SDimitry Andric 
236*0b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
237*0b57cec5SDimitry Andric   IsHazardRecognizerMode = true;
238*0b57cec5SDimitry Andric   CurrCycleInstr = MI;
239*0b57cec5SDimitry Andric   unsigned W = PreEmitNoopsCommon(MI);
240*0b57cec5SDimitry Andric   fixHazards(MI);
241*0b57cec5SDimitry Andric   CurrCycleInstr = nullptr;
242*0b57cec5SDimitry Andric   return W;
243*0b57cec5SDimitry Andric }
244*0b57cec5SDimitry Andric 
245*0b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
246*0b57cec5SDimitry Andric   if (MI->isBundle())
247*0b57cec5SDimitry Andric     return 0;
248*0b57cec5SDimitry Andric 
249*0b57cec5SDimitry Andric   int WaitStates = std::max(0, checkAnyInstHazards(MI));
250*0b57cec5SDimitry Andric 
251*0b57cec5SDimitry Andric   if (SIInstrInfo::isSMRD(*MI))
252*0b57cec5SDimitry Andric     return std::max(WaitStates, checkSMRDHazards(MI));
253*0b57cec5SDimitry Andric 
254*0b57cec5SDimitry Andric   if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
255*0b57cec5SDimitry Andric     WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
256*0b57cec5SDimitry Andric 
257*0b57cec5SDimitry Andric   if (ST.hasNSAtoVMEMBug())
258*0b57cec5SDimitry Andric     WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
259*0b57cec5SDimitry Andric 
260*0b57cec5SDimitry Andric   WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
261*0b57cec5SDimitry Andric 
262*0b57cec5SDimitry Andric   if (ST.hasNoDataDepHazard())
263*0b57cec5SDimitry Andric     return WaitStates;
264*0b57cec5SDimitry Andric 
265*0b57cec5SDimitry Andric   if (SIInstrInfo::isVALU(*MI))
266*0b57cec5SDimitry Andric     WaitStates = std::max(WaitStates, checkVALUHazards(MI));
267*0b57cec5SDimitry Andric 
268*0b57cec5SDimitry Andric   if (SIInstrInfo::isDPP(*MI))
269*0b57cec5SDimitry Andric     WaitStates = std::max(WaitStates, checkDPPHazards(MI));
270*0b57cec5SDimitry Andric 
271*0b57cec5SDimitry Andric   if (isDivFMas(MI->getOpcode()))
272*0b57cec5SDimitry Andric     WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
273*0b57cec5SDimitry Andric 
274*0b57cec5SDimitry Andric   if (isRWLane(MI->getOpcode()))
275*0b57cec5SDimitry Andric     WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
276*0b57cec5SDimitry Andric 
277*0b57cec5SDimitry Andric   if (MI->isInlineAsm())
278*0b57cec5SDimitry Andric     return std::max(WaitStates, checkInlineAsmHazards(MI));
279*0b57cec5SDimitry Andric 
280*0b57cec5SDimitry Andric   if (isSGetReg(MI->getOpcode()))
281*0b57cec5SDimitry Andric     return std::max(WaitStates, checkGetRegHazards(MI));
282*0b57cec5SDimitry Andric 
283*0b57cec5SDimitry Andric   if (isSSetReg(MI->getOpcode()))
284*0b57cec5SDimitry Andric     return std::max(WaitStates, checkSetRegHazards(MI));
285*0b57cec5SDimitry Andric 
286*0b57cec5SDimitry Andric   if (isRFE(MI->getOpcode()))
287*0b57cec5SDimitry Andric     return std::max(WaitStates, checkRFEHazards(MI));
288*0b57cec5SDimitry Andric 
289*0b57cec5SDimitry Andric   if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
290*0b57cec5SDimitry Andric                                            isSMovRel(MI->getOpcode())))
291*0b57cec5SDimitry Andric     return std::max(WaitStates, checkReadM0Hazards(MI));
292*0b57cec5SDimitry Andric 
293*0b57cec5SDimitry Andric   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
294*0b57cec5SDimitry Andric     return std::max(WaitStates, checkReadM0Hazards(MI));
295*0b57cec5SDimitry Andric 
296*0b57cec5SDimitry Andric   if (SIInstrInfo::isMAI(*MI))
297*0b57cec5SDimitry Andric     return std::max(WaitStates, checkMAIHazards(MI));
298*0b57cec5SDimitry Andric 
299*0b57cec5SDimitry Andric   if (MI->mayLoad() || MI->mayStore())
300*0b57cec5SDimitry Andric     return std::max(WaitStates, checkMAILdStHazards(MI));
301*0b57cec5SDimitry Andric 
302*0b57cec5SDimitry Andric   return WaitStates;
303*0b57cec5SDimitry Andric }
304*0b57cec5SDimitry Andric 
305*0b57cec5SDimitry Andric void GCNHazardRecognizer::EmitNoop() {
306*0b57cec5SDimitry Andric   EmittedInstrs.push_front(nullptr);
307*0b57cec5SDimitry Andric }
308*0b57cec5SDimitry Andric 
309*0b57cec5SDimitry Andric void GCNHazardRecognizer::AdvanceCycle() {
310*0b57cec5SDimitry Andric   // When the scheduler detects a stall, it will call AdvanceCycle() without
311*0b57cec5SDimitry Andric   // emitting any instructions.
312*0b57cec5SDimitry Andric   if (!CurrCycleInstr)
313*0b57cec5SDimitry Andric     return;
314*0b57cec5SDimitry Andric 
315*0b57cec5SDimitry Andric   // Do not track non-instructions which do not affect the wait states.
316*0b57cec5SDimitry Andric   // If included, these instructions can lead to buffer overflow such that
317*0b57cec5SDimitry Andric   // detectable hazards are missed.
318*0b57cec5SDimitry Andric   if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
319*0b57cec5SDimitry Andric       CurrCycleInstr->isKill())
320*0b57cec5SDimitry Andric     return;
321*0b57cec5SDimitry Andric 
322*0b57cec5SDimitry Andric   if (CurrCycleInstr->isBundle()) {
323*0b57cec5SDimitry Andric     processBundle();
324*0b57cec5SDimitry Andric     return;
325*0b57cec5SDimitry Andric   }
326*0b57cec5SDimitry Andric 
327*0b57cec5SDimitry Andric   unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
328*0b57cec5SDimitry Andric 
329*0b57cec5SDimitry Andric   // Keep track of emitted instructions
330*0b57cec5SDimitry Andric   EmittedInstrs.push_front(CurrCycleInstr);
331*0b57cec5SDimitry Andric 
332*0b57cec5SDimitry Andric   // Add a nullptr for each additional wait state after the first.  Make sure
333*0b57cec5SDimitry Andric   // not to add more than getMaxLookAhead() items to the list, since we
334*0b57cec5SDimitry Andric   // truncate the list to that size right after this loop.
335*0b57cec5SDimitry Andric   for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
336*0b57cec5SDimitry Andric        i < e; ++i) {
337*0b57cec5SDimitry Andric     EmittedInstrs.push_front(nullptr);
338*0b57cec5SDimitry Andric   }
339*0b57cec5SDimitry Andric 
340*0b57cec5SDimitry Andric   // getMaxLookahead() is the largest number of wait states we will ever need
341*0b57cec5SDimitry Andric   // to insert, so there is no point in keeping track of more than that many
342*0b57cec5SDimitry Andric   // wait states.
343*0b57cec5SDimitry Andric   EmittedInstrs.resize(getMaxLookAhead());
344*0b57cec5SDimitry Andric 
345*0b57cec5SDimitry Andric   CurrCycleInstr = nullptr;
346*0b57cec5SDimitry Andric }
347*0b57cec5SDimitry Andric 
348*0b57cec5SDimitry Andric void GCNHazardRecognizer::RecedeCycle() {
349*0b57cec5SDimitry Andric   llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
350*0b57cec5SDimitry Andric }
351*0b57cec5SDimitry Andric 
352*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
353*0b57cec5SDimitry Andric // Helper Functions
354*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
355*0b57cec5SDimitry Andric 
356*0b57cec5SDimitry Andric typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn;
357*0b57cec5SDimitry Andric 
358*0b57cec5SDimitry Andric // Returns a minimum wait states since \p I walking all predecessors.
359*0b57cec5SDimitry Andric // Only scans until \p IsExpired does not return true.
360*0b57cec5SDimitry Andric // Can only be run in a hazard recognizer mode.
361*0b57cec5SDimitry Andric static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
362*0b57cec5SDimitry Andric                               MachineBasicBlock *MBB,
363*0b57cec5SDimitry Andric                               MachineBasicBlock::reverse_instr_iterator I,
364*0b57cec5SDimitry Andric                               int WaitStates,
365*0b57cec5SDimitry Andric                               IsExpiredFn IsExpired,
366*0b57cec5SDimitry Andric                               DenseSet<const MachineBasicBlock *> &Visited) {
367*0b57cec5SDimitry Andric   for (auto E = MBB->instr_rend(); I != E; ++I) {
368*0b57cec5SDimitry Andric     // Don't add WaitStates for parent BUNDLE instructions.
369*0b57cec5SDimitry Andric     if (I->isBundle())
370*0b57cec5SDimitry Andric       continue;
371*0b57cec5SDimitry Andric 
372*0b57cec5SDimitry Andric     if (IsHazard(&*I))
373*0b57cec5SDimitry Andric       return WaitStates;
374*0b57cec5SDimitry Andric 
375*0b57cec5SDimitry Andric     if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
376*0b57cec5SDimitry Andric       continue;
377*0b57cec5SDimitry Andric 
378*0b57cec5SDimitry Andric     WaitStates += SIInstrInfo::getNumWaitStates(*I);
379*0b57cec5SDimitry Andric 
380*0b57cec5SDimitry Andric     if (IsExpired(&*I, WaitStates))
381*0b57cec5SDimitry Andric       return std::numeric_limits<int>::max();
382*0b57cec5SDimitry Andric   }
383*0b57cec5SDimitry Andric 
384*0b57cec5SDimitry Andric   int MinWaitStates = WaitStates;
385*0b57cec5SDimitry Andric   bool Found = false;
386*0b57cec5SDimitry Andric   for (MachineBasicBlock *Pred : MBB->predecessors()) {
387*0b57cec5SDimitry Andric     if (!Visited.insert(Pred).second)
388*0b57cec5SDimitry Andric       continue;
389*0b57cec5SDimitry Andric 
390*0b57cec5SDimitry Andric     int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
391*0b57cec5SDimitry Andric                                WaitStates, IsExpired, Visited);
392*0b57cec5SDimitry Andric 
393*0b57cec5SDimitry Andric     if (W == std::numeric_limits<int>::max())
394*0b57cec5SDimitry Andric       continue;
395*0b57cec5SDimitry Andric 
396*0b57cec5SDimitry Andric     MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
397*0b57cec5SDimitry Andric     if (IsExpired(nullptr, MinWaitStates))
398*0b57cec5SDimitry Andric       return MinWaitStates;
399*0b57cec5SDimitry Andric 
400*0b57cec5SDimitry Andric     Found = true;
401*0b57cec5SDimitry Andric   }
402*0b57cec5SDimitry Andric 
403*0b57cec5SDimitry Andric   if (Found)
404*0b57cec5SDimitry Andric     return MinWaitStates;
405*0b57cec5SDimitry Andric 
406*0b57cec5SDimitry Andric   return std::numeric_limits<int>::max();
407*0b57cec5SDimitry Andric }
408*0b57cec5SDimitry Andric 
409*0b57cec5SDimitry Andric static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
410*0b57cec5SDimitry Andric                               MachineInstr *MI,
411*0b57cec5SDimitry Andric                               IsExpiredFn IsExpired) {
412*0b57cec5SDimitry Andric   DenseSet<const MachineBasicBlock *> Visited;
413*0b57cec5SDimitry Andric   return getWaitStatesSince(IsHazard, MI->getParent(),
414*0b57cec5SDimitry Andric                             std::next(MI->getReverseIterator()),
415*0b57cec5SDimitry Andric                             0, IsExpired, Visited);
416*0b57cec5SDimitry Andric }
417*0b57cec5SDimitry Andric 
418*0b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
419*0b57cec5SDimitry Andric   if (IsHazardRecognizerMode) {
420*0b57cec5SDimitry Andric     auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
421*0b57cec5SDimitry Andric       return WaitStates >= Limit;
422*0b57cec5SDimitry Andric     };
423*0b57cec5SDimitry Andric     return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
424*0b57cec5SDimitry Andric   }
425*0b57cec5SDimitry Andric 
426*0b57cec5SDimitry Andric   int WaitStates = 0;
427*0b57cec5SDimitry Andric   for (MachineInstr *MI : EmittedInstrs) {
428*0b57cec5SDimitry Andric     if (MI) {
429*0b57cec5SDimitry Andric       if (IsHazard(MI))
430*0b57cec5SDimitry Andric         return WaitStates;
431*0b57cec5SDimitry Andric 
432*0b57cec5SDimitry Andric       if (MI->isInlineAsm())
433*0b57cec5SDimitry Andric         continue;
434*0b57cec5SDimitry Andric     }
435*0b57cec5SDimitry Andric     ++WaitStates;
436*0b57cec5SDimitry Andric 
437*0b57cec5SDimitry Andric     if (WaitStates >= Limit)
438*0b57cec5SDimitry Andric       break;
439*0b57cec5SDimitry Andric   }
440*0b57cec5SDimitry Andric   return std::numeric_limits<int>::max();
441*0b57cec5SDimitry Andric }
442*0b57cec5SDimitry Andric 
443*0b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
444*0b57cec5SDimitry Andric                                                IsHazardFn IsHazardDef,
445*0b57cec5SDimitry Andric                                                int Limit) {
446*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
447*0b57cec5SDimitry Andric 
448*0b57cec5SDimitry Andric   auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
449*0b57cec5SDimitry Andric     return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
450*0b57cec5SDimitry Andric   };
451*0b57cec5SDimitry Andric 
452*0b57cec5SDimitry Andric   return getWaitStatesSince(IsHazardFn, Limit);
453*0b57cec5SDimitry Andric }
454*0b57cec5SDimitry Andric 
455*0b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
456*0b57cec5SDimitry Andric                                                   int Limit) {
457*0b57cec5SDimitry Andric   auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
458*0b57cec5SDimitry Andric     return isSSetReg(MI->getOpcode()) && IsHazard(MI);
459*0b57cec5SDimitry Andric   };
460*0b57cec5SDimitry Andric 
461*0b57cec5SDimitry Andric   return getWaitStatesSince(IsHazardFn, Limit);
462*0b57cec5SDimitry Andric }
463*0b57cec5SDimitry Andric 
464*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
465*0b57cec5SDimitry Andric // No-op Hazard Detection
466*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
467*0b57cec5SDimitry Andric 
468*0b57cec5SDimitry Andric static void addRegUnits(const SIRegisterInfo &TRI,
469*0b57cec5SDimitry Andric                         BitVector &BV, unsigned Reg) {
470*0b57cec5SDimitry Andric   for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
471*0b57cec5SDimitry Andric     BV.set(*RUI);
472*0b57cec5SDimitry Andric }
473*0b57cec5SDimitry Andric 
474*0b57cec5SDimitry Andric static void addRegsToSet(const SIRegisterInfo &TRI,
475*0b57cec5SDimitry Andric                          iterator_range<MachineInstr::const_mop_iterator> Ops,
476*0b57cec5SDimitry Andric                          BitVector &Set) {
477*0b57cec5SDimitry Andric   for (const MachineOperand &Op : Ops) {
478*0b57cec5SDimitry Andric     if (Op.isReg())
479*0b57cec5SDimitry Andric       addRegUnits(TRI, Set, Op.getReg());
480*0b57cec5SDimitry Andric   }
481*0b57cec5SDimitry Andric }
482*0b57cec5SDimitry Andric 
483*0b57cec5SDimitry Andric void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
484*0b57cec5SDimitry Andric   // XXX: Do we need to worry about implicit operands
485*0b57cec5SDimitry Andric   addRegsToSet(TRI, MI.defs(), ClauseDefs);
486*0b57cec5SDimitry Andric   addRegsToSet(TRI, MI.uses(), ClauseUses);
487*0b57cec5SDimitry Andric }
488*0b57cec5SDimitry Andric 
489*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
490*0b57cec5SDimitry Andric   // SMEM soft clause are only present on VI+, and only matter if xnack is
491*0b57cec5SDimitry Andric   // enabled.
492*0b57cec5SDimitry Andric   if (!ST.isXNACKEnabled())
493*0b57cec5SDimitry Andric     return 0;
494*0b57cec5SDimitry Andric 
495*0b57cec5SDimitry Andric   bool IsSMRD = TII.isSMRD(*MEM);
496*0b57cec5SDimitry Andric 
497*0b57cec5SDimitry Andric   resetClause();
498*0b57cec5SDimitry Andric 
499*0b57cec5SDimitry Andric   // A soft-clause is any group of consecutive SMEM instructions.  The
500*0b57cec5SDimitry Andric   // instructions in this group may return out of order and/or may be
501*0b57cec5SDimitry Andric   // replayed (i.e. the same instruction issued more than once).
502*0b57cec5SDimitry Andric   //
503*0b57cec5SDimitry Andric   // In order to handle these situations correctly we need to make sure that
504*0b57cec5SDimitry Andric   // when a clause has more than one instruction, no instruction in the clause
505*0b57cec5SDimitry Andric   // writes to a register that is read by another instruction in the clause
506*0b57cec5SDimitry Andric   // (including itself). If we encounter this situaion, we need to break the
507*0b57cec5SDimitry Andric   // clause by inserting a non SMEM instruction.
508*0b57cec5SDimitry Andric 
509*0b57cec5SDimitry Andric   for (MachineInstr *MI : EmittedInstrs) {
510*0b57cec5SDimitry Andric     // When we hit a non-SMEM instruction then we have passed the start of the
511*0b57cec5SDimitry Andric     // clause and we can stop.
512*0b57cec5SDimitry Andric     if (!MI)
513*0b57cec5SDimitry Andric       break;
514*0b57cec5SDimitry Andric 
515*0b57cec5SDimitry Andric     if (IsSMRD != SIInstrInfo::isSMRD(*MI))
516*0b57cec5SDimitry Andric       break;
517*0b57cec5SDimitry Andric 
518*0b57cec5SDimitry Andric     addClauseInst(*MI);
519*0b57cec5SDimitry Andric   }
520*0b57cec5SDimitry Andric 
521*0b57cec5SDimitry Andric   if (ClauseDefs.none())
522*0b57cec5SDimitry Andric     return 0;
523*0b57cec5SDimitry Andric 
524*0b57cec5SDimitry Andric   // We need to make sure not to put loads and stores in the same clause if they
525*0b57cec5SDimitry Andric   // use the same address. For now, just start a new clause whenever we see a
526*0b57cec5SDimitry Andric   // store.
527*0b57cec5SDimitry Andric   if (MEM->mayStore())
528*0b57cec5SDimitry Andric     return 1;
529*0b57cec5SDimitry Andric 
530*0b57cec5SDimitry Andric   addClauseInst(*MEM);
531*0b57cec5SDimitry Andric 
532*0b57cec5SDimitry Andric   // If the set of defs and uses intersect then we cannot add this instruction
533*0b57cec5SDimitry Andric   // to the clause, so we have a hazard.
534*0b57cec5SDimitry Andric   return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
535*0b57cec5SDimitry Andric }
536*0b57cec5SDimitry Andric 
537*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
538*0b57cec5SDimitry Andric   int WaitStatesNeeded = 0;
539*0b57cec5SDimitry Andric 
540*0b57cec5SDimitry Andric   WaitStatesNeeded = checkSoftClauseHazards(SMRD);
541*0b57cec5SDimitry Andric 
542*0b57cec5SDimitry Andric   // This SMRD hazard only affects SI.
543*0b57cec5SDimitry Andric   if (!ST.hasSMRDReadVALUDefHazard())
544*0b57cec5SDimitry Andric     return WaitStatesNeeded;
545*0b57cec5SDimitry Andric 
546*0b57cec5SDimitry Andric   // A read of an SGPR by SMRD instruction requires 4 wait states when the
547*0b57cec5SDimitry Andric   // SGPR was written by a VALU instruction.
548*0b57cec5SDimitry Andric   int SmrdSgprWaitStates = 4;
549*0b57cec5SDimitry Andric   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
550*0b57cec5SDimitry Andric   auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
551*0b57cec5SDimitry Andric 
552*0b57cec5SDimitry Andric   bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
553*0b57cec5SDimitry Andric 
554*0b57cec5SDimitry Andric   for (const MachineOperand &Use : SMRD->uses()) {
555*0b57cec5SDimitry Andric     if (!Use.isReg())
556*0b57cec5SDimitry Andric       continue;
557*0b57cec5SDimitry Andric     int WaitStatesNeededForUse =
558*0b57cec5SDimitry Andric         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
559*0b57cec5SDimitry Andric                                                    SmrdSgprWaitStates);
560*0b57cec5SDimitry Andric     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
561*0b57cec5SDimitry Andric 
562*0b57cec5SDimitry Andric     // This fixes what appears to be undocumented hardware behavior in SI where
563*0b57cec5SDimitry Andric     // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
564*0b57cec5SDimitry Andric     // needs some number of nops in between. We don't know how many we need, but
565*0b57cec5SDimitry Andric     // let's use 4. This wasn't discovered before probably because the only
566*0b57cec5SDimitry Andric     // case when this happens is when we expand a 64-bit pointer into a full
567*0b57cec5SDimitry Andric     // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
568*0b57cec5SDimitry Andric     // probably never encountered in the closed-source land.
569*0b57cec5SDimitry Andric     if (IsBufferSMRD) {
570*0b57cec5SDimitry Andric       int WaitStatesNeededForUse =
571*0b57cec5SDimitry Andric         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
572*0b57cec5SDimitry Andric                                                    IsBufferHazardDefFn,
573*0b57cec5SDimitry Andric                                                    SmrdSgprWaitStates);
574*0b57cec5SDimitry Andric       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
575*0b57cec5SDimitry Andric     }
576*0b57cec5SDimitry Andric   }
577*0b57cec5SDimitry Andric 
578*0b57cec5SDimitry Andric   return WaitStatesNeeded;
579*0b57cec5SDimitry Andric }
580*0b57cec5SDimitry Andric 
581*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
582*0b57cec5SDimitry Andric   if (!ST.hasVMEMReadSGPRVALUDefHazard())
583*0b57cec5SDimitry Andric     return 0;
584*0b57cec5SDimitry Andric 
585*0b57cec5SDimitry Andric   int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
586*0b57cec5SDimitry Andric 
587*0b57cec5SDimitry Andric   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
588*0b57cec5SDimitry Andric   // SGPR was written by a VALU Instruction.
589*0b57cec5SDimitry Andric   const int VmemSgprWaitStates = 5;
590*0b57cec5SDimitry Andric   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
591*0b57cec5SDimitry Andric   for (const MachineOperand &Use : VMEM->uses()) {
592*0b57cec5SDimitry Andric     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
593*0b57cec5SDimitry Andric       continue;
594*0b57cec5SDimitry Andric 
595*0b57cec5SDimitry Andric     int WaitStatesNeededForUse =
596*0b57cec5SDimitry Andric         VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
597*0b57cec5SDimitry Andric                                                    VmemSgprWaitStates);
598*0b57cec5SDimitry Andric     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
599*0b57cec5SDimitry Andric   }
600*0b57cec5SDimitry Andric   return WaitStatesNeeded;
601*0b57cec5SDimitry Andric }
602*0b57cec5SDimitry Andric 
603*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
604*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
605*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
606*0b57cec5SDimitry Andric 
607*0b57cec5SDimitry Andric   // Check for DPP VGPR read after VALU VGPR write and EXEC write.
608*0b57cec5SDimitry Andric   int DppVgprWaitStates = 2;
609*0b57cec5SDimitry Andric   int DppExecWaitStates = 5;
610*0b57cec5SDimitry Andric   int WaitStatesNeeded = 0;
611*0b57cec5SDimitry Andric   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
612*0b57cec5SDimitry Andric 
613*0b57cec5SDimitry Andric   for (const MachineOperand &Use : DPP->uses()) {
614*0b57cec5SDimitry Andric     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
615*0b57cec5SDimitry Andric       continue;
616*0b57cec5SDimitry Andric     int WaitStatesNeededForUse =
617*0b57cec5SDimitry Andric         DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
618*0b57cec5SDimitry Andric                               [](MachineInstr *) { return true; },
619*0b57cec5SDimitry Andric                               DppVgprWaitStates);
620*0b57cec5SDimitry Andric     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
621*0b57cec5SDimitry Andric   }
622*0b57cec5SDimitry Andric 
623*0b57cec5SDimitry Andric   WaitStatesNeeded = std::max(
624*0b57cec5SDimitry Andric       WaitStatesNeeded,
625*0b57cec5SDimitry Andric       DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
626*0b57cec5SDimitry Andric                                                 DppExecWaitStates));
627*0b57cec5SDimitry Andric 
628*0b57cec5SDimitry Andric   return WaitStatesNeeded;
629*0b57cec5SDimitry Andric }
630*0b57cec5SDimitry Andric 
631*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
632*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
633*0b57cec5SDimitry Andric 
634*0b57cec5SDimitry Andric   // v_div_fmas requires 4 wait states after a write to vcc from a VALU
635*0b57cec5SDimitry Andric   // instruction.
636*0b57cec5SDimitry Andric   const int DivFMasWaitStates = 4;
637*0b57cec5SDimitry Andric   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
638*0b57cec5SDimitry Andric   int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
639*0b57cec5SDimitry Andric                                                DivFMasWaitStates);
640*0b57cec5SDimitry Andric 
641*0b57cec5SDimitry Andric   return DivFMasWaitStates - WaitStatesNeeded;
642*0b57cec5SDimitry Andric }
643*0b57cec5SDimitry Andric 
644*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
645*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
646*0b57cec5SDimitry Andric   unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
647*0b57cec5SDimitry Andric 
648*0b57cec5SDimitry Andric   const int GetRegWaitStates = 2;
649*0b57cec5SDimitry Andric   auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
650*0b57cec5SDimitry Andric     return GetRegHWReg == getHWReg(TII, *MI);
651*0b57cec5SDimitry Andric   };
652*0b57cec5SDimitry Andric   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
653*0b57cec5SDimitry Andric 
654*0b57cec5SDimitry Andric   return GetRegWaitStates - WaitStatesNeeded;
655*0b57cec5SDimitry Andric }
656*0b57cec5SDimitry Andric 
657*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
658*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
659*0b57cec5SDimitry Andric   unsigned HWReg = getHWReg(TII, *SetRegInstr);
660*0b57cec5SDimitry Andric 
661*0b57cec5SDimitry Andric   const int SetRegWaitStates = ST.getSetRegWaitStates();
662*0b57cec5SDimitry Andric   auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
663*0b57cec5SDimitry Andric     return HWReg == getHWReg(TII, *MI);
664*0b57cec5SDimitry Andric   };
665*0b57cec5SDimitry Andric   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
666*0b57cec5SDimitry Andric   return SetRegWaitStates - WaitStatesNeeded;
667*0b57cec5SDimitry Andric }
668*0b57cec5SDimitry Andric 
669*0b57cec5SDimitry Andric int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
670*0b57cec5SDimitry Andric   if (!MI.mayStore())
671*0b57cec5SDimitry Andric     return -1;
672*0b57cec5SDimitry Andric 
673*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
674*0b57cec5SDimitry Andric   unsigned Opcode = MI.getOpcode();
675*0b57cec5SDimitry Andric   const MCInstrDesc &Desc = MI.getDesc();
676*0b57cec5SDimitry Andric 
677*0b57cec5SDimitry Andric   int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
678*0b57cec5SDimitry Andric   int VDataRCID = -1;
679*0b57cec5SDimitry Andric   if (VDataIdx != -1)
680*0b57cec5SDimitry Andric     VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
681*0b57cec5SDimitry Andric 
682*0b57cec5SDimitry Andric   if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
683*0b57cec5SDimitry Andric     // There is no hazard if the instruction does not use vector regs
684*0b57cec5SDimitry Andric     // (like wbinvl1)
685*0b57cec5SDimitry Andric     if (VDataIdx == -1)
686*0b57cec5SDimitry Andric       return -1;
687*0b57cec5SDimitry Andric     // For MUBUF/MTBUF instructions this hazard only exists if the
688*0b57cec5SDimitry Andric     // instruction is not using a register in the soffset field.
689*0b57cec5SDimitry Andric     const MachineOperand *SOffset =
690*0b57cec5SDimitry Andric         TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
691*0b57cec5SDimitry Andric     // If we have no soffset operand, then assume this field has been
692*0b57cec5SDimitry Andric     // hardcoded to zero.
693*0b57cec5SDimitry Andric     if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
694*0b57cec5SDimitry Andric         (!SOffset || !SOffset->isReg()))
695*0b57cec5SDimitry Andric       return VDataIdx;
696*0b57cec5SDimitry Andric   }
697*0b57cec5SDimitry Andric 
698*0b57cec5SDimitry Andric   // MIMG instructions create a hazard if they don't use a 256-bit T# and
699*0b57cec5SDimitry Andric   // the store size is greater than 8 bytes and they have more than two bits
700*0b57cec5SDimitry Andric   // of their dmask set.
701*0b57cec5SDimitry Andric   // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
702*0b57cec5SDimitry Andric   if (TII->isMIMG(MI)) {
703*0b57cec5SDimitry Andric     int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
704*0b57cec5SDimitry Andric     assert(SRsrcIdx != -1 &&
705*0b57cec5SDimitry Andric            AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
706*0b57cec5SDimitry Andric     (void)SRsrcIdx;
707*0b57cec5SDimitry Andric   }
708*0b57cec5SDimitry Andric 
709*0b57cec5SDimitry Andric   if (TII->isFLAT(MI)) {
710*0b57cec5SDimitry Andric     int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
711*0b57cec5SDimitry Andric     if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
712*0b57cec5SDimitry Andric       return DataIdx;
713*0b57cec5SDimitry Andric   }
714*0b57cec5SDimitry Andric 
715*0b57cec5SDimitry Andric   return -1;
716*0b57cec5SDimitry Andric }
717*0b57cec5SDimitry Andric 
718*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
719*0b57cec5SDimitry Andric 						const MachineRegisterInfo &MRI) {
720*0b57cec5SDimitry Andric   // Helper to check for the hazard where VMEM instructions that store more than
721*0b57cec5SDimitry Andric   // 8 bytes can have there store data over written by the next instruction.
722*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
723*0b57cec5SDimitry Andric 
724*0b57cec5SDimitry Andric   const int VALUWaitStates = 1;
725*0b57cec5SDimitry Andric   int WaitStatesNeeded = 0;
726*0b57cec5SDimitry Andric 
727*0b57cec5SDimitry Andric   if (!TRI->isVGPR(MRI, Def.getReg()))
728*0b57cec5SDimitry Andric     return WaitStatesNeeded;
729*0b57cec5SDimitry Andric   unsigned Reg = Def.getReg();
730*0b57cec5SDimitry Andric   auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
731*0b57cec5SDimitry Andric     int DataIdx = createsVALUHazard(*MI);
732*0b57cec5SDimitry Andric     return DataIdx >= 0 &&
733*0b57cec5SDimitry Andric     TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
734*0b57cec5SDimitry Andric   };
735*0b57cec5SDimitry Andric   int WaitStatesNeededForDef =
736*0b57cec5SDimitry Andric     VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
737*0b57cec5SDimitry Andric   WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
738*0b57cec5SDimitry Andric 
739*0b57cec5SDimitry Andric   return WaitStatesNeeded;
740*0b57cec5SDimitry Andric }
741*0b57cec5SDimitry Andric 
742*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
743*0b57cec5SDimitry Andric   // This checks for the hazard where VMEM instructions that store more than
744*0b57cec5SDimitry Andric   // 8 bytes can have there store data over written by the next instruction.
745*0b57cec5SDimitry Andric   if (!ST.has12DWordStoreHazard())
746*0b57cec5SDimitry Andric     return 0;
747*0b57cec5SDimitry Andric 
748*0b57cec5SDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
749*0b57cec5SDimitry Andric   int WaitStatesNeeded = 0;
750*0b57cec5SDimitry Andric 
751*0b57cec5SDimitry Andric   for (const MachineOperand &Def : VALU->defs()) {
752*0b57cec5SDimitry Andric     WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
753*0b57cec5SDimitry Andric   }
754*0b57cec5SDimitry Andric 
755*0b57cec5SDimitry Andric   return WaitStatesNeeded;
756*0b57cec5SDimitry Andric }
757*0b57cec5SDimitry Andric 
758*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
759*0b57cec5SDimitry Andric   // This checks for hazards associated with inline asm statements.
760*0b57cec5SDimitry Andric   // Since inline asms can contain just about anything, we use this
761*0b57cec5SDimitry Andric   // to call/leverage other check*Hazard routines. Note that
762*0b57cec5SDimitry Andric   // this function doesn't attempt to address all possible inline asm
763*0b57cec5SDimitry Andric   // hazards (good luck), but is a collection of what has been
764*0b57cec5SDimitry Andric   // problematic thus far.
765*0b57cec5SDimitry Andric 
766*0b57cec5SDimitry Andric   // see checkVALUHazards()
767*0b57cec5SDimitry Andric   if (!ST.has12DWordStoreHazard())
768*0b57cec5SDimitry Andric     return 0;
769*0b57cec5SDimitry Andric 
770*0b57cec5SDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
771*0b57cec5SDimitry Andric   int WaitStatesNeeded = 0;
772*0b57cec5SDimitry Andric 
773*0b57cec5SDimitry Andric   for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
774*0b57cec5SDimitry Andric        I != E; ++I) {
775*0b57cec5SDimitry Andric     const MachineOperand &Op = IA->getOperand(I);
776*0b57cec5SDimitry Andric     if (Op.isReg() && Op.isDef()) {
777*0b57cec5SDimitry Andric       WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
778*0b57cec5SDimitry Andric     }
779*0b57cec5SDimitry Andric   }
780*0b57cec5SDimitry Andric 
781*0b57cec5SDimitry Andric   return WaitStatesNeeded;
782*0b57cec5SDimitry Andric }
783*0b57cec5SDimitry Andric 
784*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
785*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
786*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
787*0b57cec5SDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
788*0b57cec5SDimitry Andric 
789*0b57cec5SDimitry Andric   const MachineOperand *LaneSelectOp =
790*0b57cec5SDimitry Andric       TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
791*0b57cec5SDimitry Andric 
792*0b57cec5SDimitry Andric   if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
793*0b57cec5SDimitry Andric     return 0;
794*0b57cec5SDimitry Andric 
795*0b57cec5SDimitry Andric   unsigned LaneSelectReg = LaneSelectOp->getReg();
796*0b57cec5SDimitry Andric   auto IsHazardFn = [TII] (MachineInstr *MI) {
797*0b57cec5SDimitry Andric     return TII->isVALU(*MI);
798*0b57cec5SDimitry Andric   };
799*0b57cec5SDimitry Andric 
800*0b57cec5SDimitry Andric   const int RWLaneWaitStates = 4;
801*0b57cec5SDimitry Andric   int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
802*0b57cec5SDimitry Andric                                               RWLaneWaitStates);
803*0b57cec5SDimitry Andric   return RWLaneWaitStates - WaitStatesSince;
804*0b57cec5SDimitry Andric }
805*0b57cec5SDimitry Andric 
806*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
807*0b57cec5SDimitry Andric   if (!ST.hasRFEHazards())
808*0b57cec5SDimitry Andric     return 0;
809*0b57cec5SDimitry Andric 
810*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
811*0b57cec5SDimitry Andric 
812*0b57cec5SDimitry Andric   const int RFEWaitStates = 1;
813*0b57cec5SDimitry Andric 
814*0b57cec5SDimitry Andric   auto IsHazardFn = [TII] (MachineInstr *MI) {
815*0b57cec5SDimitry Andric     return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
816*0b57cec5SDimitry Andric   };
817*0b57cec5SDimitry Andric   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
818*0b57cec5SDimitry Andric   return RFEWaitStates - WaitStatesNeeded;
819*0b57cec5SDimitry Andric }
820*0b57cec5SDimitry Andric 
821*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
822*0b57cec5SDimitry Andric   if (MI->isDebugInstr())
823*0b57cec5SDimitry Andric     return 0;
824*0b57cec5SDimitry Andric 
825*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
826*0b57cec5SDimitry Andric   if (!ST.hasSMovFedHazard())
827*0b57cec5SDimitry Andric     return 0;
828*0b57cec5SDimitry Andric 
829*0b57cec5SDimitry Andric   // Check for any instruction reading an SGPR after a write from
830*0b57cec5SDimitry Andric   // s_mov_fed_b32.
831*0b57cec5SDimitry Andric   int MovFedWaitStates = 1;
832*0b57cec5SDimitry Andric   int WaitStatesNeeded = 0;
833*0b57cec5SDimitry Andric 
834*0b57cec5SDimitry Andric   for (const MachineOperand &Use : MI->uses()) {
835*0b57cec5SDimitry Andric     if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
836*0b57cec5SDimitry Andric       continue;
837*0b57cec5SDimitry Andric     auto IsHazardFn = [] (MachineInstr *MI) {
838*0b57cec5SDimitry Andric       return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
839*0b57cec5SDimitry Andric     };
840*0b57cec5SDimitry Andric     int WaitStatesNeededForUse =
841*0b57cec5SDimitry Andric         MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
842*0b57cec5SDimitry Andric                                                  MovFedWaitStates);
843*0b57cec5SDimitry Andric     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
844*0b57cec5SDimitry Andric   }
845*0b57cec5SDimitry Andric 
846*0b57cec5SDimitry Andric   return WaitStatesNeeded;
847*0b57cec5SDimitry Andric }
848*0b57cec5SDimitry Andric 
849*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
850*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
851*0b57cec5SDimitry Andric   const int SMovRelWaitStates = 1;
852*0b57cec5SDimitry Andric   auto IsHazardFn = [TII] (MachineInstr *MI) {
853*0b57cec5SDimitry Andric     return TII->isSALU(*MI);
854*0b57cec5SDimitry Andric   };
855*0b57cec5SDimitry Andric   return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
856*0b57cec5SDimitry Andric                                                    SMovRelWaitStates);
857*0b57cec5SDimitry Andric }
858*0b57cec5SDimitry Andric 
859*0b57cec5SDimitry Andric void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
860*0b57cec5SDimitry Andric   fixVMEMtoScalarWriteHazards(MI);
861*0b57cec5SDimitry Andric   fixVcmpxPermlaneHazards(MI);
862*0b57cec5SDimitry Andric   fixSMEMtoVectorWriteHazards(MI);
863*0b57cec5SDimitry Andric   fixVcmpxExecWARHazard(MI);
864*0b57cec5SDimitry Andric   fixLdsBranchVmemWARHazard(MI);
865*0b57cec5SDimitry Andric }
866*0b57cec5SDimitry Andric 
867*0b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
868*0b57cec5SDimitry Andric   if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
869*0b57cec5SDimitry Andric     return false;
870*0b57cec5SDimitry Andric 
871*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
872*0b57cec5SDimitry Andric   auto IsHazardFn = [TII] (MachineInstr *MI) {
873*0b57cec5SDimitry Andric     return TII->isVOPC(*MI);
874*0b57cec5SDimitry Andric   };
875*0b57cec5SDimitry Andric 
876*0b57cec5SDimitry Andric   auto IsExpiredFn = [] (MachineInstr *MI, int) {
877*0b57cec5SDimitry Andric     if (!MI)
878*0b57cec5SDimitry Andric       return false;
879*0b57cec5SDimitry Andric     unsigned Opc = MI->getOpcode();
880*0b57cec5SDimitry Andric     return SIInstrInfo::isVALU(*MI) &&
881*0b57cec5SDimitry Andric            Opc != AMDGPU::V_NOP_e32 &&
882*0b57cec5SDimitry Andric            Opc != AMDGPU::V_NOP_e64 &&
883*0b57cec5SDimitry Andric            Opc != AMDGPU::V_NOP_sdwa;
884*0b57cec5SDimitry Andric   };
885*0b57cec5SDimitry Andric 
886*0b57cec5SDimitry Andric   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
887*0b57cec5SDimitry Andric       std::numeric_limits<int>::max())
888*0b57cec5SDimitry Andric     return false;
889*0b57cec5SDimitry Andric 
890*0b57cec5SDimitry Andric   // V_NOP will be discarded by SQ.
891*0b57cec5SDimitry Andric   // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
892*0b57cec5SDimitry Andric   // which is always a VGPR and available.
893*0b57cec5SDimitry Andric   auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
894*0b57cec5SDimitry Andric   unsigned Reg = Src0->getReg();
895*0b57cec5SDimitry Andric   bool IsUndef = Src0->isUndef();
896*0b57cec5SDimitry Andric   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
897*0b57cec5SDimitry Andric           TII->get(AMDGPU::V_MOV_B32_e32))
898*0b57cec5SDimitry Andric     .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
899*0b57cec5SDimitry Andric     .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
900*0b57cec5SDimitry Andric 
901*0b57cec5SDimitry Andric   return true;
902*0b57cec5SDimitry Andric }
903*0b57cec5SDimitry Andric 
904*0b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
905*0b57cec5SDimitry Andric   if (!ST.hasVMEMtoScalarWriteHazard())
906*0b57cec5SDimitry Andric     return false;
907*0b57cec5SDimitry Andric 
908*0b57cec5SDimitry Andric   if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
909*0b57cec5SDimitry Andric     return false;
910*0b57cec5SDimitry Andric 
911*0b57cec5SDimitry Andric   if (MI->getNumDefs() == 0)
912*0b57cec5SDimitry Andric     return false;
913*0b57cec5SDimitry Andric 
914*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
915*0b57cec5SDimitry Andric 
916*0b57cec5SDimitry Andric   auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
917*0b57cec5SDimitry Andric     if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
918*0b57cec5SDimitry Andric         !SIInstrInfo::isFLAT(*I))
919*0b57cec5SDimitry Andric       return false;
920*0b57cec5SDimitry Andric 
921*0b57cec5SDimitry Andric     for (const MachineOperand &Def : MI->defs()) {
922*0b57cec5SDimitry Andric       MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
923*0b57cec5SDimitry Andric       if (!Op)
924*0b57cec5SDimitry Andric         continue;
925*0b57cec5SDimitry Andric       return true;
926*0b57cec5SDimitry Andric     }
927*0b57cec5SDimitry Andric     return false;
928*0b57cec5SDimitry Andric   };
929*0b57cec5SDimitry Andric 
930*0b57cec5SDimitry Andric   auto IsExpiredFn = [] (MachineInstr *MI, int) {
931*0b57cec5SDimitry Andric     return MI && (SIInstrInfo::isVALU(*MI) ||
932*0b57cec5SDimitry Andric                   (MI->getOpcode() == AMDGPU::S_WAITCNT &&
933*0b57cec5SDimitry Andric                    !MI->getOperand(0).getImm()));
934*0b57cec5SDimitry Andric   };
935*0b57cec5SDimitry Andric 
936*0b57cec5SDimitry Andric   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
937*0b57cec5SDimitry Andric       std::numeric_limits<int>::max())
938*0b57cec5SDimitry Andric     return false;
939*0b57cec5SDimitry Andric 
940*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
941*0b57cec5SDimitry Andric   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
942*0b57cec5SDimitry Andric   return true;
943*0b57cec5SDimitry Andric }
944*0b57cec5SDimitry Andric 
945*0b57cec5SDimitry Andric bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
946*0b57cec5SDimitry Andric   if (!ST.hasSMEMtoVectorWriteHazard())
947*0b57cec5SDimitry Andric     return false;
948*0b57cec5SDimitry Andric 
949*0b57cec5SDimitry Andric   if (!SIInstrInfo::isVALU(*MI))
950*0b57cec5SDimitry Andric     return false;
951*0b57cec5SDimitry Andric 
952*0b57cec5SDimitry Andric   unsigned SDSTName;
953*0b57cec5SDimitry Andric   switch (MI->getOpcode()) {
954*0b57cec5SDimitry Andric   case AMDGPU::V_READLANE_B32:
955*0b57cec5SDimitry Andric   case AMDGPU::V_READFIRSTLANE_B32:
956*0b57cec5SDimitry Andric     SDSTName = AMDGPU::OpName::vdst;
957*0b57cec5SDimitry Andric     break;
958*0b57cec5SDimitry Andric   default:
959*0b57cec5SDimitry Andric     SDSTName = AMDGPU::OpName::sdst;
960*0b57cec5SDimitry Andric     break;
961*0b57cec5SDimitry Andric   }
962*0b57cec5SDimitry Andric 
963*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
964*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
965*0b57cec5SDimitry Andric   const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
966*0b57cec5SDimitry Andric   const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
967*0b57cec5SDimitry Andric   if (!SDST) {
968*0b57cec5SDimitry Andric     for (const auto &MO : MI->implicit_operands()) {
969*0b57cec5SDimitry Andric       if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
970*0b57cec5SDimitry Andric         SDST = &MO;
971*0b57cec5SDimitry Andric         break;
972*0b57cec5SDimitry Andric       }
973*0b57cec5SDimitry Andric     }
974*0b57cec5SDimitry Andric   }
975*0b57cec5SDimitry Andric 
976*0b57cec5SDimitry Andric   if (!SDST)
977*0b57cec5SDimitry Andric     return false;
978*0b57cec5SDimitry Andric 
979*0b57cec5SDimitry Andric   const unsigned SDSTReg = SDST->getReg();
980*0b57cec5SDimitry Andric   auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
981*0b57cec5SDimitry Andric     return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
982*0b57cec5SDimitry Andric   };
983*0b57cec5SDimitry Andric 
984*0b57cec5SDimitry Andric   auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
985*0b57cec5SDimitry Andric     if (MI) {
986*0b57cec5SDimitry Andric       if (TII->isSALU(*MI)) {
987*0b57cec5SDimitry Andric         switch (MI->getOpcode()) {
988*0b57cec5SDimitry Andric         case AMDGPU::S_SETVSKIP:
989*0b57cec5SDimitry Andric         case AMDGPU::S_VERSION:
990*0b57cec5SDimitry Andric         case AMDGPU::S_WAITCNT_VSCNT:
991*0b57cec5SDimitry Andric         case AMDGPU::S_WAITCNT_VMCNT:
992*0b57cec5SDimitry Andric         case AMDGPU::S_WAITCNT_EXPCNT:
993*0b57cec5SDimitry Andric           // These instructions cannot not mitigate the hazard.
994*0b57cec5SDimitry Andric           return false;
995*0b57cec5SDimitry Andric         case AMDGPU::S_WAITCNT_LGKMCNT:
996*0b57cec5SDimitry Andric           // Reducing lgkmcnt count to 0 always mitigates the hazard.
997*0b57cec5SDimitry Andric           return (MI->getOperand(1).getImm() == 0) &&
998*0b57cec5SDimitry Andric                  (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
999*0b57cec5SDimitry Andric         case AMDGPU::S_WAITCNT: {
1000*0b57cec5SDimitry Andric           const int64_t Imm = MI->getOperand(0).getImm();
1001*0b57cec5SDimitry Andric           AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
1002*0b57cec5SDimitry Andric           return (Decoded.LgkmCnt == 0);
1003*0b57cec5SDimitry Andric         }
1004*0b57cec5SDimitry Andric         default:
1005*0b57cec5SDimitry Andric           // SOPP instructions cannot mitigate the hazard.
1006*0b57cec5SDimitry Andric           if (TII->isSOPP(*MI))
1007*0b57cec5SDimitry Andric             return false;
1008*0b57cec5SDimitry Andric           // At this point the SALU can be assumed to mitigate the hazard
1009*0b57cec5SDimitry Andric           // because either:
1010*0b57cec5SDimitry Andric           // (a) it is independent of the at risk SMEM (breaking chain),
1011*0b57cec5SDimitry Andric           // or
1012*0b57cec5SDimitry Andric           // (b) it is dependent on the SMEM, in which case an appropriate
1013*0b57cec5SDimitry Andric           //     s_waitcnt lgkmcnt _must_ exist between it and the at risk
1014*0b57cec5SDimitry Andric           //     SMEM instruction.
1015*0b57cec5SDimitry Andric           return true;
1016*0b57cec5SDimitry Andric         }
1017*0b57cec5SDimitry Andric       }
1018*0b57cec5SDimitry Andric     }
1019*0b57cec5SDimitry Andric     return false;
1020*0b57cec5SDimitry Andric   };
1021*0b57cec5SDimitry Andric 
1022*0b57cec5SDimitry Andric   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1023*0b57cec5SDimitry Andric       std::numeric_limits<int>::max())
1024*0b57cec5SDimitry Andric     return false;
1025*0b57cec5SDimitry Andric 
1026*0b57cec5SDimitry Andric   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1027*0b57cec5SDimitry Andric           TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
1028*0b57cec5SDimitry Andric       .addImm(0);
1029*0b57cec5SDimitry Andric   return true;
1030*0b57cec5SDimitry Andric }
1031*0b57cec5SDimitry Andric 
1032*0b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
1033*0b57cec5SDimitry Andric   if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
1034*0b57cec5SDimitry Andric     return false;
1035*0b57cec5SDimitry Andric 
1036*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1037*0b57cec5SDimitry Andric   if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
1038*0b57cec5SDimitry Andric     return false;
1039*0b57cec5SDimitry Andric 
1040*0b57cec5SDimitry Andric   auto IsHazardFn = [TRI] (MachineInstr *I) {
1041*0b57cec5SDimitry Andric     if (SIInstrInfo::isVALU(*I))
1042*0b57cec5SDimitry Andric       return false;
1043*0b57cec5SDimitry Andric     return I->readsRegister(AMDGPU::EXEC, TRI);
1044*0b57cec5SDimitry Andric   };
1045*0b57cec5SDimitry Andric 
1046*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
1047*0b57cec5SDimitry Andric   auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
1048*0b57cec5SDimitry Andric     if (!MI)
1049*0b57cec5SDimitry Andric       return false;
1050*0b57cec5SDimitry Andric     if (SIInstrInfo::isVALU(*MI)) {
1051*0b57cec5SDimitry Andric       if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
1052*0b57cec5SDimitry Andric         return true;
1053*0b57cec5SDimitry Andric       for (auto MO : MI->implicit_operands())
1054*0b57cec5SDimitry Andric         if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
1055*0b57cec5SDimitry Andric           return true;
1056*0b57cec5SDimitry Andric     }
1057*0b57cec5SDimitry Andric     if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1058*0b57cec5SDimitry Andric         (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
1059*0b57cec5SDimitry Andric       return true;
1060*0b57cec5SDimitry Andric     return false;
1061*0b57cec5SDimitry Andric   };
1062*0b57cec5SDimitry Andric 
1063*0b57cec5SDimitry Andric   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1064*0b57cec5SDimitry Andric       std::numeric_limits<int>::max())
1065*0b57cec5SDimitry Andric     return false;
1066*0b57cec5SDimitry Andric 
1067*0b57cec5SDimitry Andric   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1068*0b57cec5SDimitry Andric           TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1069*0b57cec5SDimitry Andric     .addImm(0xfffe);
1070*0b57cec5SDimitry Andric   return true;
1071*0b57cec5SDimitry Andric }
1072*0b57cec5SDimitry Andric 
1073*0b57cec5SDimitry Andric bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1074*0b57cec5SDimitry Andric   if (!ST.hasLdsBranchVmemWARHazard())
1075*0b57cec5SDimitry Andric     return false;
1076*0b57cec5SDimitry Andric 
1077*0b57cec5SDimitry Andric   auto IsHazardInst = [] (const MachineInstr *MI) {
1078*0b57cec5SDimitry Andric     if (SIInstrInfo::isDS(*MI))
1079*0b57cec5SDimitry Andric       return 1;
1080*0b57cec5SDimitry Andric     if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
1081*0b57cec5SDimitry Andric       return 2;
1082*0b57cec5SDimitry Andric     return 0;
1083*0b57cec5SDimitry Andric   };
1084*0b57cec5SDimitry Andric 
1085*0b57cec5SDimitry Andric   auto InstType = IsHazardInst(MI);
1086*0b57cec5SDimitry Andric   if (!InstType)
1087*0b57cec5SDimitry Andric     return false;
1088*0b57cec5SDimitry Andric 
1089*0b57cec5SDimitry Andric   auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
1090*0b57cec5SDimitry Andric     return I && (IsHazardInst(I) ||
1091*0b57cec5SDimitry Andric                  (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1092*0b57cec5SDimitry Andric                   I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1093*0b57cec5SDimitry Andric                   !I->getOperand(1).getImm()));
1094*0b57cec5SDimitry Andric   };
1095*0b57cec5SDimitry Andric 
1096*0b57cec5SDimitry Andric   auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
1097*0b57cec5SDimitry Andric     if (!I->isBranch())
1098*0b57cec5SDimitry Andric       return false;
1099*0b57cec5SDimitry Andric 
1100*0b57cec5SDimitry Andric     auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
1101*0b57cec5SDimitry Andric       auto InstType2 = IsHazardInst(I);
1102*0b57cec5SDimitry Andric       return InstType2 && InstType != InstType2;
1103*0b57cec5SDimitry Andric     };
1104*0b57cec5SDimitry Andric 
1105*0b57cec5SDimitry Andric     auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
1106*0b57cec5SDimitry Andric       if (!I)
1107*0b57cec5SDimitry Andric         return false;
1108*0b57cec5SDimitry Andric 
1109*0b57cec5SDimitry Andric       auto InstType2 = IsHazardInst(I);
1110*0b57cec5SDimitry Andric       if (InstType == InstType2)
1111*0b57cec5SDimitry Andric         return true;
1112*0b57cec5SDimitry Andric 
1113*0b57cec5SDimitry Andric       return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1114*0b57cec5SDimitry Andric              I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1115*0b57cec5SDimitry Andric              !I->getOperand(1).getImm();
1116*0b57cec5SDimitry Andric     };
1117*0b57cec5SDimitry Andric 
1118*0b57cec5SDimitry Andric     return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
1119*0b57cec5SDimitry Andric            std::numeric_limits<int>::max();
1120*0b57cec5SDimitry Andric   };
1121*0b57cec5SDimitry Andric 
1122*0b57cec5SDimitry Andric   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1123*0b57cec5SDimitry Andric       std::numeric_limits<int>::max())
1124*0b57cec5SDimitry Andric     return false;
1125*0b57cec5SDimitry Andric 
1126*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
1127*0b57cec5SDimitry Andric   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1128*0b57cec5SDimitry Andric           TII->get(AMDGPU::S_WAITCNT_VSCNT))
1129*0b57cec5SDimitry Andric     .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1130*0b57cec5SDimitry Andric     .addImm(0);
1131*0b57cec5SDimitry Andric 
1132*0b57cec5SDimitry Andric   return true;
1133*0b57cec5SDimitry Andric }
1134*0b57cec5SDimitry Andric 
1135*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
1136*0b57cec5SDimitry Andric   int NSAtoVMEMWaitStates = 1;
1137*0b57cec5SDimitry Andric 
1138*0b57cec5SDimitry Andric   if (!ST.hasNSAtoVMEMBug())
1139*0b57cec5SDimitry Andric     return 0;
1140*0b57cec5SDimitry Andric 
1141*0b57cec5SDimitry Andric   if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
1142*0b57cec5SDimitry Andric     return 0;
1143*0b57cec5SDimitry Andric 
1144*0b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
1145*0b57cec5SDimitry Andric   const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1146*0b57cec5SDimitry Andric   if (!Offset || (Offset->getImm() & 6) == 0)
1147*0b57cec5SDimitry Andric     return 0;
1148*0b57cec5SDimitry Andric 
1149*0b57cec5SDimitry Andric   auto IsHazardFn = [TII] (MachineInstr *I) {
1150*0b57cec5SDimitry Andric     if (!SIInstrInfo::isMIMG(*I))
1151*0b57cec5SDimitry Andric       return false;
1152*0b57cec5SDimitry Andric     const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
1153*0b57cec5SDimitry Andric     return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1154*0b57cec5SDimitry Andric            TII->getInstSizeInBytes(*I) >= 16;
1155*0b57cec5SDimitry Andric   };
1156*0b57cec5SDimitry Andric 
1157*0b57cec5SDimitry Andric   return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
1158*0b57cec5SDimitry Andric }
1159*0b57cec5SDimitry Andric 
1160*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1161*0b57cec5SDimitry Andric   int FPAtomicToDenormModeWaitStates = 3;
1162*0b57cec5SDimitry Andric 
1163*0b57cec5SDimitry Andric   if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1164*0b57cec5SDimitry Andric     return 0;
1165*0b57cec5SDimitry Andric 
1166*0b57cec5SDimitry Andric   auto IsHazardFn = [] (MachineInstr *I) {
1167*0b57cec5SDimitry Andric     if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I))
1168*0b57cec5SDimitry Andric       return false;
1169*0b57cec5SDimitry Andric     return SIInstrInfo::isFPAtomic(*I);
1170*0b57cec5SDimitry Andric   };
1171*0b57cec5SDimitry Andric 
1172*0b57cec5SDimitry Andric   auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
1173*0b57cec5SDimitry Andric     if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
1174*0b57cec5SDimitry Andric       return true;
1175*0b57cec5SDimitry Andric 
1176*0b57cec5SDimitry Andric     switch (MI->getOpcode()) {
1177*0b57cec5SDimitry Andric     case AMDGPU::S_WAITCNT:
1178*0b57cec5SDimitry Andric     case AMDGPU::S_WAITCNT_VSCNT:
1179*0b57cec5SDimitry Andric     case AMDGPU::S_WAITCNT_VMCNT:
1180*0b57cec5SDimitry Andric     case AMDGPU::S_WAITCNT_EXPCNT:
1181*0b57cec5SDimitry Andric     case AMDGPU::S_WAITCNT_LGKMCNT:
1182*0b57cec5SDimitry Andric     case AMDGPU::S_WAITCNT_IDLE:
1183*0b57cec5SDimitry Andric       return true;
1184*0b57cec5SDimitry Andric     default:
1185*0b57cec5SDimitry Andric       break;
1186*0b57cec5SDimitry Andric     }
1187*0b57cec5SDimitry Andric 
1188*0b57cec5SDimitry Andric     return false;
1189*0b57cec5SDimitry Andric   };
1190*0b57cec5SDimitry Andric 
1191*0b57cec5SDimitry Andric 
1192*0b57cec5SDimitry Andric   return FPAtomicToDenormModeWaitStates -
1193*0b57cec5SDimitry Andric          ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1194*0b57cec5SDimitry Andric }
1195*0b57cec5SDimitry Andric 
1196*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
1197*0b57cec5SDimitry Andric   assert(SIInstrInfo::isMAI(*MI));
1198*0b57cec5SDimitry Andric 
1199*0b57cec5SDimitry Andric   int WaitStatesNeeded = 0;
1200*0b57cec5SDimitry Andric   unsigned Opc = MI->getOpcode();
1201*0b57cec5SDimitry Andric 
1202*0b57cec5SDimitry Andric   auto IsVALUFn = [] (MachineInstr *MI) {
1203*0b57cec5SDimitry Andric     return SIInstrInfo::isVALU(*MI);
1204*0b57cec5SDimitry Andric   };
1205*0b57cec5SDimitry Andric 
1206*0b57cec5SDimitry Andric   if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
1207*0b57cec5SDimitry Andric     const int LegacyVALUWritesVGPRWaitStates = 2;
1208*0b57cec5SDimitry Andric     const int VALUWritesExecWaitStates = 4;
1209*0b57cec5SDimitry Andric     const int MaxWaitStates = 4;
1210*0b57cec5SDimitry Andric 
1211*0b57cec5SDimitry Andric     int WaitStatesNeededForUse = VALUWritesExecWaitStates -
1212*0b57cec5SDimitry Andric       getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
1213*0b57cec5SDimitry Andric     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1214*0b57cec5SDimitry Andric 
1215*0b57cec5SDimitry Andric     if (WaitStatesNeeded < MaxWaitStates) {
1216*0b57cec5SDimitry Andric       for (const MachineOperand &Use : MI->explicit_uses()) {
1217*0b57cec5SDimitry Andric         const int MaxWaitStates = 2;
1218*0b57cec5SDimitry Andric 
1219*0b57cec5SDimitry Andric         if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
1220*0b57cec5SDimitry Andric           continue;
1221*0b57cec5SDimitry Andric 
1222*0b57cec5SDimitry Andric         int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
1223*0b57cec5SDimitry Andric           getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
1224*0b57cec5SDimitry Andric         WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1225*0b57cec5SDimitry Andric 
1226*0b57cec5SDimitry Andric         if (WaitStatesNeeded == MaxWaitStates)
1227*0b57cec5SDimitry Andric           break;
1228*0b57cec5SDimitry Andric       }
1229*0b57cec5SDimitry Andric     }
1230*0b57cec5SDimitry Andric   }
1231*0b57cec5SDimitry Andric 
1232*0b57cec5SDimitry Andric   auto IsMFMAFn = [] (MachineInstr *MI) {
1233*0b57cec5SDimitry Andric     return SIInstrInfo::isMAI(*MI) &&
1234*0b57cec5SDimitry Andric            MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
1235*0b57cec5SDimitry Andric            MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
1236*0b57cec5SDimitry Andric   };
1237*0b57cec5SDimitry Andric 
1238*0b57cec5SDimitry Andric   for (const MachineOperand &Op : MI->explicit_operands()) {
1239*0b57cec5SDimitry Andric     if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
1240*0b57cec5SDimitry Andric       continue;
1241*0b57cec5SDimitry Andric 
1242*0b57cec5SDimitry Andric     if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
1243*0b57cec5SDimitry Andric       continue;
1244*0b57cec5SDimitry Andric 
1245*0b57cec5SDimitry Andric     const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
1246*0b57cec5SDimitry Andric     const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
1247*0b57cec5SDimitry Andric     const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
1248*0b57cec5SDimitry Andric     const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
1249*0b57cec5SDimitry Andric     const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
1250*0b57cec5SDimitry Andric     const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
1251*0b57cec5SDimitry Andric     const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
1252*0b57cec5SDimitry Andric     const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
1253*0b57cec5SDimitry Andric     const int MaxWaitStates = 18;
1254*0b57cec5SDimitry Andric     unsigned Reg = Op.getReg();
1255*0b57cec5SDimitry Andric     unsigned HazardDefLatency = 0;
1256*0b57cec5SDimitry Andric 
1257*0b57cec5SDimitry Andric     auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
1258*0b57cec5SDimitry Andric                               (MachineInstr *MI) {
1259*0b57cec5SDimitry Andric       if (!IsMFMAFn(MI))
1260*0b57cec5SDimitry Andric         return false;
1261*0b57cec5SDimitry Andric       unsigned DstReg = MI->getOperand(0).getReg();
1262*0b57cec5SDimitry Andric       if (DstReg == Reg)
1263*0b57cec5SDimitry Andric         return false;
1264*0b57cec5SDimitry Andric       HazardDefLatency = std::max(HazardDefLatency,
1265*0b57cec5SDimitry Andric                                   TSchedModel.computeInstrLatency(MI));
1266*0b57cec5SDimitry Andric       return TRI.regsOverlap(DstReg, Reg);
1267*0b57cec5SDimitry Andric     };
1268*0b57cec5SDimitry Andric 
1269*0b57cec5SDimitry Andric     int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
1270*0b57cec5SDimitry Andric                                                    MaxWaitStates);
1271*0b57cec5SDimitry Andric     int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
1272*0b57cec5SDimitry Andric     int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1273*0b57cec5SDimitry Andric     int OpNo = MI->getOperandNo(&Op);
1274*0b57cec5SDimitry Andric     if (OpNo == SrcCIdx) {
1275*0b57cec5SDimitry Andric       NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
1276*0b57cec5SDimitry Andric     } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
1277*0b57cec5SDimitry Andric       switch (HazardDefLatency) {
1278*0b57cec5SDimitry Andric       case 2:  NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
1279*0b57cec5SDimitry Andric                break;
1280*0b57cec5SDimitry Andric       case 8:  NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
1281*0b57cec5SDimitry Andric                break;
1282*0b57cec5SDimitry Andric       case 16: LLVM_FALLTHROUGH;
1283*0b57cec5SDimitry Andric       default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
1284*0b57cec5SDimitry Andric                break;
1285*0b57cec5SDimitry Andric       }
1286*0b57cec5SDimitry Andric     } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
1287*0b57cec5SDimitry Andric       switch (HazardDefLatency) {
1288*0b57cec5SDimitry Andric       case 2:  NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
1289*0b57cec5SDimitry Andric                break;
1290*0b57cec5SDimitry Andric       case 8:  NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
1291*0b57cec5SDimitry Andric                break;
1292*0b57cec5SDimitry Andric       case 16: LLVM_FALLTHROUGH;
1293*0b57cec5SDimitry Andric       default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
1294*0b57cec5SDimitry Andric                break;
1295*0b57cec5SDimitry Andric       }
1296*0b57cec5SDimitry Andric     }
1297*0b57cec5SDimitry Andric 
1298*0b57cec5SDimitry Andric     int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1299*0b57cec5SDimitry Andric     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1300*0b57cec5SDimitry Andric 
1301*0b57cec5SDimitry Andric     if (WaitStatesNeeded == MaxWaitStates)
1302*0b57cec5SDimitry Andric       return WaitStatesNeeded; // Early exit.
1303*0b57cec5SDimitry Andric 
1304*0b57cec5SDimitry Andric     auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
1305*0b57cec5SDimitry Andric       if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
1306*0b57cec5SDimitry Andric         return false;
1307*0b57cec5SDimitry Andric       unsigned DstReg = MI->getOperand(0).getReg();
1308*0b57cec5SDimitry Andric       return TRI.regsOverlap(Reg, DstReg);
1309*0b57cec5SDimitry Andric     };
1310*0b57cec5SDimitry Andric 
1311*0b57cec5SDimitry Andric     const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
1312*0b57cec5SDimitry Andric     const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
1313*0b57cec5SDimitry Andric     const int AccVGPRWriteAccVgprReadWaitStates = 3;
1314*0b57cec5SDimitry Andric     NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
1315*0b57cec5SDimitry Andric     if (OpNo == SrcCIdx)
1316*0b57cec5SDimitry Andric       NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
1317*0b57cec5SDimitry Andric     else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
1318*0b57cec5SDimitry Andric       NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
1319*0b57cec5SDimitry Andric 
1320*0b57cec5SDimitry Andric     WaitStatesNeededForUse = NeedWaitStates -
1321*0b57cec5SDimitry Andric       getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
1322*0b57cec5SDimitry Andric     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1323*0b57cec5SDimitry Andric 
1324*0b57cec5SDimitry Andric     if (WaitStatesNeeded == MaxWaitStates)
1325*0b57cec5SDimitry Andric       return WaitStatesNeeded; // Early exit.
1326*0b57cec5SDimitry Andric   }
1327*0b57cec5SDimitry Andric 
1328*0b57cec5SDimitry Andric   if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
1329*0b57cec5SDimitry Andric     const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
1330*0b57cec5SDimitry Andric     const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
1331*0b57cec5SDimitry Andric     const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
1332*0b57cec5SDimitry Andric     const int MaxWaitStates = 13;
1333*0b57cec5SDimitry Andric     unsigned DstReg = MI->getOperand(0).getReg();
1334*0b57cec5SDimitry Andric     unsigned HazardDefLatency = 0;
1335*0b57cec5SDimitry Andric 
1336*0b57cec5SDimitry Andric     auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
1337*0b57cec5SDimitry Andric                          (MachineInstr *MI) {
1338*0b57cec5SDimitry Andric       if (!IsMFMAFn(MI))
1339*0b57cec5SDimitry Andric         return false;
1340*0b57cec5SDimitry Andric       unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
1341*0b57cec5SDimitry Andric       HazardDefLatency = std::max(HazardDefLatency,
1342*0b57cec5SDimitry Andric                                   TSchedModel.computeInstrLatency(MI));
1343*0b57cec5SDimitry Andric       return TRI.regsOverlap(Reg, DstReg);
1344*0b57cec5SDimitry Andric     };
1345*0b57cec5SDimitry Andric 
1346*0b57cec5SDimitry Andric     int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
1347*0b57cec5SDimitry Andric     int NeedWaitStates;
1348*0b57cec5SDimitry Andric     switch (HazardDefLatency) {
1349*0b57cec5SDimitry Andric     case 2:  NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
1350*0b57cec5SDimitry Andric              break;
1351*0b57cec5SDimitry Andric     case 8:  NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
1352*0b57cec5SDimitry Andric              break;
1353*0b57cec5SDimitry Andric     case 16: LLVM_FALLTHROUGH;
1354*0b57cec5SDimitry Andric     default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
1355*0b57cec5SDimitry Andric              break;
1356*0b57cec5SDimitry Andric     }
1357*0b57cec5SDimitry Andric 
1358*0b57cec5SDimitry Andric     int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
1359*0b57cec5SDimitry Andric     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1360*0b57cec5SDimitry Andric   }
1361*0b57cec5SDimitry Andric 
1362*0b57cec5SDimitry Andric   return WaitStatesNeeded;
1363*0b57cec5SDimitry Andric }
1364*0b57cec5SDimitry Andric 
1365*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
1366*0b57cec5SDimitry Andric   if (!ST.hasMAIInsts())
1367*0b57cec5SDimitry Andric     return 0;
1368*0b57cec5SDimitry Andric 
1369*0b57cec5SDimitry Andric   int WaitStatesNeeded = 0;
1370*0b57cec5SDimitry Andric 
1371*0b57cec5SDimitry Andric   auto IsAccVgprReadFn = [] (MachineInstr *MI) {
1372*0b57cec5SDimitry Andric     return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
1373*0b57cec5SDimitry Andric   };
1374*0b57cec5SDimitry Andric 
1375*0b57cec5SDimitry Andric   for (const MachineOperand &Op : MI->explicit_uses()) {
1376*0b57cec5SDimitry Andric     if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
1377*0b57cec5SDimitry Andric       continue;
1378*0b57cec5SDimitry Andric 
1379*0b57cec5SDimitry Andric     unsigned Reg = Op.getReg();
1380*0b57cec5SDimitry Andric 
1381*0b57cec5SDimitry Andric     const int AccVgprReadLdStWaitStates = 2;
1382*0b57cec5SDimitry Andric     const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
1383*0b57cec5SDimitry Andric     const int MaxWaitStates = 2;
1384*0b57cec5SDimitry Andric 
1385*0b57cec5SDimitry Andric     int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
1386*0b57cec5SDimitry Andric       getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
1387*0b57cec5SDimitry Andric     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1388*0b57cec5SDimitry Andric 
1389*0b57cec5SDimitry Andric     if (WaitStatesNeeded == MaxWaitStates)
1390*0b57cec5SDimitry Andric       return WaitStatesNeeded; // Early exit.
1391*0b57cec5SDimitry Andric 
1392*0b57cec5SDimitry Andric     auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
1393*0b57cec5SDimitry Andric       if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
1394*0b57cec5SDimitry Andric         return false;
1395*0b57cec5SDimitry Andric       auto IsVALUFn = [] (MachineInstr *MI) {
1396*0b57cec5SDimitry Andric         return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
1397*0b57cec5SDimitry Andric       };
1398*0b57cec5SDimitry Andric       return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
1399*0b57cec5SDimitry Andric              std::numeric_limits<int>::max();
1400*0b57cec5SDimitry Andric     };
1401*0b57cec5SDimitry Andric 
1402*0b57cec5SDimitry Andric     WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
1403*0b57cec5SDimitry Andric       getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
1404*0b57cec5SDimitry Andric     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1405*0b57cec5SDimitry Andric   }
1406*0b57cec5SDimitry Andric 
1407*0b57cec5SDimitry Andric   return WaitStatesNeeded;
1408*0b57cec5SDimitry Andric }
1409