10b57cec5SDimitry Andric //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements hazard recognizers for scheduling on GCN processors. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "GCNHazardRecognizer.h" 14e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 150b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 16*81ad6265SDimitry Andric #include "SIMachineFunctionInfo.h" 170b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h" 19e8d8bef9SDimitry Andric #include "llvm/Support/TargetParser.h" 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric using namespace llvm; 220b57cec5SDimitry Andric 23*81ad6265SDimitry Andric namespace { 24*81ad6265SDimitry Andric 25*81ad6265SDimitry Andric struct MFMAPaddingRatioParser : public cl::parser<unsigned> { 26*81ad6265SDimitry Andric MFMAPaddingRatioParser(cl::Option &O) : cl::parser<unsigned>(O) {} 27*81ad6265SDimitry Andric 28*81ad6265SDimitry Andric bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) { 29*81ad6265SDimitry Andric if (Arg.getAsInteger(0, Value)) 30*81ad6265SDimitry Andric return O.error("'" + Arg + "' value invalid for uint argument!"); 31*81ad6265SDimitry Andric 32*81ad6265SDimitry Andric if (Value > 100) 33*81ad6265SDimitry Andric return O.error("'" + Arg + "' value must be in the range [0, 100]!"); 34*81ad6265SDimitry Andric 35*81ad6265SDimitry Andric return false; 36*81ad6265SDimitry Andric } 37*81ad6265SDimitry Andric }; 38*81ad6265SDimitry Andric 39*81ad6265SDimitry Andric } // end anonymous namespace 40*81ad6265SDimitry Andric 41*81ad6265SDimitry Andric static cl::opt<unsigned, false, MFMAPaddingRatioParser> 42*81ad6265SDimitry Andric MFMAPaddingRatio("amdgpu-mfma-padding-ratio", cl::init(0), cl::Hidden, 43*81ad6265SDimitry Andric cl::desc("Fill a percentage of the latency between " 44*81ad6265SDimitry Andric "neighboring MFMA with s_nops.")); 45*81ad6265SDimitry Andric 460b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 47*81ad6265SDimitry Andric // Hazard Recognizer Implementation 480b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 490b57cec5SDimitry Andric 50fe6060f1SDimitry Andric static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, 51fe6060f1SDimitry Andric const GCNSubtarget &ST); 52fe6060f1SDimitry Andric 530b57cec5SDimitry Andric GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : 540b57cec5SDimitry Andric IsHazardRecognizerMode(false), 550b57cec5SDimitry Andric CurrCycleInstr(nullptr), 560b57cec5SDimitry Andric MF(MF), 570b57cec5SDimitry Andric ST(MF.getSubtarget<GCNSubtarget>()), 580b57cec5SDimitry Andric TII(*ST.getInstrInfo()), 590b57cec5SDimitry Andric TRI(TII.getRegisterInfo()), 600b57cec5SDimitry Andric ClauseUses(TRI.getNumRegUnits()), 610b57cec5SDimitry Andric ClauseDefs(TRI.getNumRegUnits()) { 62fe6060f1SDimitry Andric MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 19 : 5; 630b57cec5SDimitry Andric TSchedModel.init(&ST); 64fe6060f1SDimitry Andric RunLdsBranchVmemWARHazardFixup = shouldRunLdsBranchVmemWARHazardFixup(MF, ST); 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric 67e8d8bef9SDimitry Andric void GCNHazardRecognizer::Reset() { 68e8d8bef9SDimitry Andric EmittedInstrs.clear(); 69e8d8bef9SDimitry Andric } 70e8d8bef9SDimitry Andric 710b57cec5SDimitry Andric void GCNHazardRecognizer::EmitInstruction(SUnit *SU) { 720b57cec5SDimitry Andric EmitInstruction(SU->getInstr()); 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { 760b57cec5SDimitry Andric CurrCycleInstr = MI; 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric static bool isDivFMas(unsigned Opcode) { 80e8d8bef9SDimitry Andric return Opcode == AMDGPU::V_DIV_FMAS_F32_e64 || Opcode == AMDGPU::V_DIV_FMAS_F64_e64; 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric static bool isSGetReg(unsigned Opcode) { 840b57cec5SDimitry Andric return Opcode == AMDGPU::S_GETREG_B32; 850b57cec5SDimitry Andric } 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric static bool isSSetReg(unsigned Opcode) { 88e8d8bef9SDimitry Andric switch (Opcode) { 89e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_B32: 90e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_B32_mode: 91e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_IMM32_B32: 92e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_IMM32_B32_mode: 93e8d8bef9SDimitry Andric return true; 94e8d8bef9SDimitry Andric } 95e8d8bef9SDimitry Andric return false; 960b57cec5SDimitry Andric } 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric static bool isRWLane(unsigned Opcode) { 990b57cec5SDimitry Andric return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32; 1000b57cec5SDimitry Andric } 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric static bool isRFE(unsigned Opcode) { 1030b57cec5SDimitry Andric return Opcode == AMDGPU::S_RFE_B64; 1040b57cec5SDimitry Andric } 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric static bool isSMovRel(unsigned Opcode) { 1070b57cec5SDimitry Andric switch (Opcode) { 1080b57cec5SDimitry Andric case AMDGPU::S_MOVRELS_B32: 1090b57cec5SDimitry Andric case AMDGPU::S_MOVRELS_B64: 1100b57cec5SDimitry Andric case AMDGPU::S_MOVRELD_B32: 1110b57cec5SDimitry Andric case AMDGPU::S_MOVRELD_B64: 1120b57cec5SDimitry Andric return true; 1130b57cec5SDimitry Andric default: 1140b57cec5SDimitry Andric return false; 1150b57cec5SDimitry Andric } 1160b57cec5SDimitry Andric } 1170b57cec5SDimitry Andric 118fe6060f1SDimitry Andric static bool isDGEMM(unsigned Opcode) { 119*81ad6265SDimitry Andric return AMDGPU::getMAIIsDGEMM(Opcode); 120fe6060f1SDimitry Andric } 121fe6060f1SDimitry Andric 122fe6060f1SDimitry Andric static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) { 123fe6060f1SDimitry Andric unsigned Opcode = MI.getOpcode(); 124fe6060f1SDimitry Andric 125fe6060f1SDimitry Andric if (!SIInstrInfo::isMAI(MI) || 126fe6060f1SDimitry Andric isDGEMM(Opcode) || 127fe6060f1SDimitry Andric Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 || 128fe6060f1SDimitry Andric Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64) 129fe6060f1SDimitry Andric return false; 130fe6060f1SDimitry Andric 131*81ad6265SDimitry Andric if (!ST.hasGFX940Insts()) 132fe6060f1SDimitry Andric return true; 133*81ad6265SDimitry Andric 134*81ad6265SDimitry Andric return AMDGPU::getMAIIsGFX940XDL(Opcode); 135fe6060f1SDimitry Andric } 136fe6060f1SDimitry Andric 1370b57cec5SDimitry Andric static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, 1380b57cec5SDimitry Andric const MachineInstr &MI) { 1390b57cec5SDimitry Andric if (TII.isAlwaysGDS(MI.getOpcode())) 1400b57cec5SDimitry Andric return true; 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric switch (MI.getOpcode()) { 1430b57cec5SDimitry Andric case AMDGPU::S_SENDMSG: 1440b57cec5SDimitry Andric case AMDGPU::S_SENDMSGHALT: 1450b57cec5SDimitry Andric case AMDGPU::S_TTRACEDATA: 1460b57cec5SDimitry Andric return true; 1470b57cec5SDimitry Andric // These DS opcodes don't support GDS. 1480b57cec5SDimitry Andric case AMDGPU::DS_NOP: 1490b57cec5SDimitry Andric case AMDGPU::DS_PERMUTE_B32: 1500b57cec5SDimitry Andric case AMDGPU::DS_BPERMUTE_B32: 1510b57cec5SDimitry Andric return false; 1520b57cec5SDimitry Andric default: 1530b57cec5SDimitry Andric if (TII.isDS(MI.getOpcode())) { 1540b57cec5SDimitry Andric int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 1550b57cec5SDimitry Andric AMDGPU::OpName::gds); 1560b57cec5SDimitry Andric if (MI.getOperand(GDS).getImm()) 1570b57cec5SDimitry Andric return true; 1580b57cec5SDimitry Andric } 1590b57cec5SDimitry Andric return false; 1600b57cec5SDimitry Andric } 1610b57cec5SDimitry Andric } 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric static bool isPermlane(const MachineInstr &MI) { 1640b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode(); 165e8d8bef9SDimitry Andric return Opcode == AMDGPU::V_PERMLANE16_B32_e64 || 166e8d8bef9SDimitry Andric Opcode == AMDGPU::V_PERMLANEX16_B32_e64; 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric 169*81ad6265SDimitry Andric static bool isLdsDma(const MachineInstr &MI) { 170*81ad6265SDimitry Andric return SIInstrInfo::isVALU(MI) && 171*81ad6265SDimitry Andric (SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI)); 172*81ad6265SDimitry Andric } 173*81ad6265SDimitry Andric 1740b57cec5SDimitry Andric static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { 1750b57cec5SDimitry Andric const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, 1760b57cec5SDimitry Andric AMDGPU::OpName::simm16); 1770b57cec5SDimitry Andric return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_; 1780b57cec5SDimitry Andric } 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric ScheduleHazardRecognizer::HazardType 1810b57cec5SDimitry Andric GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 1820b57cec5SDimitry Andric MachineInstr *MI = SU->getInstr(); 183e8d8bef9SDimitry Andric // If we are not in "HazardRecognizerMode" and therefore not being run from 184e8d8bef9SDimitry Andric // the scheduler, track possible stalls from hazards but don't insert noops. 185e8d8bef9SDimitry Andric auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard; 186e8d8bef9SDimitry Andric 1870b57cec5SDimitry Andric if (MI->isBundle()) 1880b57cec5SDimitry Andric return NoHazard; 1890b57cec5SDimitry Andric 1900b57cec5SDimitry Andric if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) 191e8d8bef9SDimitry Andric return HazardType; 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0) 194e8d8bef9SDimitry Andric return HazardType; 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric if (checkFPAtomicToDenormModeHazard(MI) > 0) 197e8d8bef9SDimitry Andric return HazardType; 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric if (ST.hasNoDataDepHazard()) 2000b57cec5SDimitry Andric return NoHazard; 2010b57cec5SDimitry Andric 202fe6060f1SDimitry Andric // FIXME: Should flat be considered vmem? 203fe6060f1SDimitry Andric if ((SIInstrInfo::isVMEM(*MI) || 204fe6060f1SDimitry Andric SIInstrInfo::isFLAT(*MI)) 205fe6060f1SDimitry Andric && checkVMEMHazards(MI) > 0) 206fe6060f1SDimitry Andric return HazardType; 207fe6060f1SDimitry Andric 2080b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) 209e8d8bef9SDimitry Andric return HazardType; 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) 212e8d8bef9SDimitry Andric return HazardType; 2130b57cec5SDimitry Andric 2140b57cec5SDimitry Andric if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) 215e8d8bef9SDimitry Andric return HazardType; 2160b57cec5SDimitry Andric 2170b57cec5SDimitry Andric if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0) 218e8d8bef9SDimitry Andric return HazardType; 2190b57cec5SDimitry Andric 220fe6060f1SDimitry Andric if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) || 221fe6060f1SDimitry Andric SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) || 222fe6060f1SDimitry Andric SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0) 223fe6060f1SDimitry Andric return HazardType; 224fe6060f1SDimitry Andric 2250b57cec5SDimitry Andric if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) 226e8d8bef9SDimitry Andric return HazardType; 2270b57cec5SDimitry Andric 2280b57cec5SDimitry Andric if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0) 229e8d8bef9SDimitry Andric return HazardType; 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) 232e8d8bef9SDimitry Andric return HazardType; 2330b57cec5SDimitry Andric 234*81ad6265SDimitry Andric if (((ST.hasReadM0MovRelInterpHazard() && 235*81ad6265SDimitry Andric (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) || 236*81ad6265SDimitry Andric (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) || 237*81ad6265SDimitry Andric (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) || 238*81ad6265SDimitry Andric (ST.hasReadM0LdsDirectHazard() && 239*81ad6265SDimitry Andric MI->readsRegister(AMDGPU::LDS_DIRECT))) && 2400b57cec5SDimitry Andric checkReadM0Hazards(MI) > 0) 241e8d8bef9SDimitry Andric return HazardType; 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0) 244e8d8bef9SDimitry Andric return HazardType; 2450b57cec5SDimitry Andric 246e8d8bef9SDimitry Andric if ((SIInstrInfo::isVMEM(*MI) || 247e8d8bef9SDimitry Andric SIInstrInfo::isFLAT(*MI) || 248e8d8bef9SDimitry Andric SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0) 249e8d8bef9SDimitry Andric return HazardType; 2500b57cec5SDimitry Andric 2510b57cec5SDimitry Andric if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) 252e8d8bef9SDimitry Andric return HazardType; 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric return NoHazard; 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric 257e8d8bef9SDimitry Andric static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII, 258e8d8bef9SDimitry Andric unsigned Quantity) { 259e8d8bef9SDimitry Andric while (Quantity > 0) { 260e8d8bef9SDimitry Andric unsigned Arg = std::min(Quantity, 8u); 261e8d8bef9SDimitry Andric Quantity -= Arg; 2620b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP)) 263e8d8bef9SDimitry Andric .addImm(Arg - 1); 264e8d8bef9SDimitry Andric } 2650b57cec5SDimitry Andric } 2660b57cec5SDimitry Andric 267*81ad6265SDimitry Andric unsigned 268*81ad6265SDimitry Andric GCNHazardRecognizer::getMFMAPipelineWaitStates(const MachineInstr &MI) const { 269*81ad6265SDimitry Andric const MCSchedClassDesc *SC = TSchedModel.resolveSchedClass(&MI); 270*81ad6265SDimitry Andric assert(TSchedModel.getWriteProcResBegin(SC) != 271*81ad6265SDimitry Andric TSchedModel.getWriteProcResEnd(SC)); 272*81ad6265SDimitry Andric return TSchedModel.getWriteProcResBegin(SC)->Cycles; 273*81ad6265SDimitry Andric } 274*81ad6265SDimitry Andric 2750b57cec5SDimitry Andric void GCNHazardRecognizer::processBundle() { 2760b57cec5SDimitry Andric MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator()); 2770b57cec5SDimitry Andric MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end(); 2780b57cec5SDimitry Andric // Check bundled MachineInstr's for hazards. 2790b57cec5SDimitry Andric for (; MI != E && MI->isInsideBundle(); ++MI) { 2800b57cec5SDimitry Andric CurrCycleInstr = &*MI; 2810b57cec5SDimitry Andric unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr); 2820b57cec5SDimitry Andric 283e8d8bef9SDimitry Andric if (IsHazardRecognizerMode) { 2840b57cec5SDimitry Andric fixHazards(CurrCycleInstr); 2850b57cec5SDimitry Andric 286e8d8bef9SDimitry Andric insertNoopsInBundle(CurrCycleInstr, TII, WaitStates); 287e8d8bef9SDimitry Andric } 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric // It’s unnecessary to track more than MaxLookAhead instructions. Since we 2900b57cec5SDimitry Andric // include the bundled MI directly after, only add a maximum of 2910b57cec5SDimitry Andric // (MaxLookAhead - 1) noops to EmittedInstrs. 2920b57cec5SDimitry Andric for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i) 2930b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 2940b57cec5SDimitry Andric 2950b57cec5SDimitry Andric EmittedInstrs.push_front(CurrCycleInstr); 2960b57cec5SDimitry Andric EmittedInstrs.resize(MaxLookAhead); 2970b57cec5SDimitry Andric } 2980b57cec5SDimitry Andric CurrCycleInstr = nullptr; 2990b57cec5SDimitry Andric } 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { 3020b57cec5SDimitry Andric IsHazardRecognizerMode = true; 3030b57cec5SDimitry Andric CurrCycleInstr = MI; 3040b57cec5SDimitry Andric unsigned W = PreEmitNoopsCommon(MI); 3050b57cec5SDimitry Andric fixHazards(MI); 3060b57cec5SDimitry Andric CurrCycleInstr = nullptr; 3070b57cec5SDimitry Andric return W; 3080b57cec5SDimitry Andric } 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { 3110b57cec5SDimitry Andric if (MI->isBundle()) 3120b57cec5SDimitry Andric return 0; 3130b57cec5SDimitry Andric 314e8d8bef9SDimitry Andric int WaitStates = 0; 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andric if (SIInstrInfo::isSMRD(*MI)) 3170b57cec5SDimitry Andric return std::max(WaitStates, checkSMRDHazards(MI)); 3180b57cec5SDimitry Andric 3190b57cec5SDimitry Andric if (ST.hasNSAtoVMEMBug()) 3200b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI)); 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI)); 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric if (ST.hasNoDataDepHazard()) 3250b57cec5SDimitry Andric return WaitStates; 3260b57cec5SDimitry Andric 327fe6060f1SDimitry Andric if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI)) 328fe6060f1SDimitry Andric WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); 329fe6060f1SDimitry Andric 3300b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*MI)) 3310b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkVALUHazards(MI)); 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric if (SIInstrInfo::isDPP(*MI)) 3340b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkDPPHazards(MI)); 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric if (isDivFMas(MI->getOpcode())) 3370b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric if (isRWLane(MI->getOpcode())) 3400b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); 3410b57cec5SDimitry Andric 342fe6060f1SDimitry Andric if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) || 343fe6060f1SDimitry Andric SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) || 344fe6060f1SDimitry Andric SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0) 345fe6060f1SDimitry Andric WaitStates = std::max(WaitStates, checkMAIVALUHazards(MI)); 346fe6060f1SDimitry Andric 3470b57cec5SDimitry Andric if (MI->isInlineAsm()) 3480b57cec5SDimitry Andric return std::max(WaitStates, checkInlineAsmHazards(MI)); 3490b57cec5SDimitry Andric 3500b57cec5SDimitry Andric if (isSGetReg(MI->getOpcode())) 3510b57cec5SDimitry Andric return std::max(WaitStates, checkGetRegHazards(MI)); 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric if (isSSetReg(MI->getOpcode())) 3540b57cec5SDimitry Andric return std::max(WaitStates, checkSetRegHazards(MI)); 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric if (isRFE(MI->getOpcode())) 3570b57cec5SDimitry Andric return std::max(WaitStates, checkRFEHazards(MI)); 3580b57cec5SDimitry Andric 359*81ad6265SDimitry Andric if ((ST.hasReadM0MovRelInterpHazard() && 360*81ad6265SDimitry Andric (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) || 361*81ad6265SDimitry Andric (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) || 362*81ad6265SDimitry Andric (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) || 363*81ad6265SDimitry Andric (ST.hasReadM0LdsDirectHazard() && MI->readsRegister(AMDGPU::LDS_DIRECT))) 3640b57cec5SDimitry Andric return std::max(WaitStates, checkReadM0Hazards(MI)); 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric if (SIInstrInfo::isMAI(*MI)) 3670b57cec5SDimitry Andric return std::max(WaitStates, checkMAIHazards(MI)); 3680b57cec5SDimitry Andric 369e8d8bef9SDimitry Andric if (SIInstrInfo::isVMEM(*MI) || 370e8d8bef9SDimitry Andric SIInstrInfo::isFLAT(*MI) || 371e8d8bef9SDimitry Andric SIInstrInfo::isDS(*MI)) 3720b57cec5SDimitry Andric return std::max(WaitStates, checkMAILdStHazards(MI)); 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric return WaitStates; 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric void GCNHazardRecognizer::EmitNoop() { 3780b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 3790b57cec5SDimitry Andric } 3800b57cec5SDimitry Andric 3810b57cec5SDimitry Andric void GCNHazardRecognizer::AdvanceCycle() { 3820b57cec5SDimitry Andric // When the scheduler detects a stall, it will call AdvanceCycle() without 3830b57cec5SDimitry Andric // emitting any instructions. 384e8d8bef9SDimitry Andric if (!CurrCycleInstr) { 385e8d8bef9SDimitry Andric EmittedInstrs.push_front(nullptr); 3860b57cec5SDimitry Andric return; 387e8d8bef9SDimitry Andric } 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric if (CurrCycleInstr->isBundle()) { 3900b57cec5SDimitry Andric processBundle(); 3910b57cec5SDimitry Andric return; 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr); 395349cc55cSDimitry Andric if (!NumWaitStates) { 396349cc55cSDimitry Andric CurrCycleInstr = nullptr; 397349cc55cSDimitry Andric return; 398349cc55cSDimitry Andric } 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric // Keep track of emitted instructions 4010b57cec5SDimitry Andric EmittedInstrs.push_front(CurrCycleInstr); 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric // Add a nullptr for each additional wait state after the first. Make sure 4040b57cec5SDimitry Andric // not to add more than getMaxLookAhead() items to the list, since we 4050b57cec5SDimitry Andric // truncate the list to that size right after this loop. 4060b57cec5SDimitry Andric for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); 4070b57cec5SDimitry Andric i < e; ++i) { 4080b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 4090b57cec5SDimitry Andric } 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric // getMaxLookahead() is the largest number of wait states we will ever need 4120b57cec5SDimitry Andric // to insert, so there is no point in keeping track of more than that many 4130b57cec5SDimitry Andric // wait states. 4140b57cec5SDimitry Andric EmittedInstrs.resize(getMaxLookAhead()); 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric CurrCycleInstr = nullptr; 4170b57cec5SDimitry Andric } 4180b57cec5SDimitry Andric 4190b57cec5SDimitry Andric void GCNHazardRecognizer::RecedeCycle() { 4200b57cec5SDimitry Andric llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); 4210b57cec5SDimitry Andric } 4220b57cec5SDimitry Andric 4230b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 4240b57cec5SDimitry Andric // Helper Functions 4250b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 4260b57cec5SDimitry Andric 427*81ad6265SDimitry Andric typedef enum { HazardFound, HazardExpired, NoHazardFound } HazardFnResult; 428*81ad6265SDimitry Andric 429fe6060f1SDimitry Andric typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn; 430*81ad6265SDimitry Andric typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn; 431*81ad6265SDimitry Andric 432*81ad6265SDimitry Andric // Search for a hazard in a block and its predecessors. 433*81ad6265SDimitry Andric template <typename StateT> 434*81ad6265SDimitry Andric static bool 435*81ad6265SDimitry Andric hasHazard(StateT State, 436*81ad6265SDimitry Andric function_ref<HazardFnResult(StateT &, const MachineInstr &)> IsHazard, 437*81ad6265SDimitry Andric function_ref<void(StateT &, const MachineInstr &)> UpdateState, 438*81ad6265SDimitry Andric const MachineBasicBlock *MBB, 439*81ad6265SDimitry Andric MachineBasicBlock::const_reverse_instr_iterator I, 440*81ad6265SDimitry Andric DenseSet<const MachineBasicBlock *> &Visited) { 441*81ad6265SDimitry Andric for (auto E = MBB->instr_rend(); I != E; ++I) { 442*81ad6265SDimitry Andric // No need to look at parent BUNDLE instructions. 443*81ad6265SDimitry Andric if (I->isBundle()) 444*81ad6265SDimitry Andric continue; 445*81ad6265SDimitry Andric 446*81ad6265SDimitry Andric switch (IsHazard(State, *I)) { 447*81ad6265SDimitry Andric case HazardFound: 448*81ad6265SDimitry Andric return true; 449*81ad6265SDimitry Andric case HazardExpired: 450*81ad6265SDimitry Andric return false; 451*81ad6265SDimitry Andric default: 452*81ad6265SDimitry Andric // Continue search 453*81ad6265SDimitry Andric break; 454*81ad6265SDimitry Andric } 455*81ad6265SDimitry Andric 456*81ad6265SDimitry Andric if (I->isInlineAsm() || I->isMetaInstruction()) 457*81ad6265SDimitry Andric continue; 458*81ad6265SDimitry Andric 459*81ad6265SDimitry Andric UpdateState(State, *I); 460*81ad6265SDimitry Andric } 461*81ad6265SDimitry Andric 462*81ad6265SDimitry Andric for (MachineBasicBlock *Pred : MBB->predecessors()) { 463*81ad6265SDimitry Andric if (!Visited.insert(Pred).second) 464*81ad6265SDimitry Andric continue; 465*81ad6265SDimitry Andric 466*81ad6265SDimitry Andric if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(), 467*81ad6265SDimitry Andric Visited)) 468*81ad6265SDimitry Andric return true; 469*81ad6265SDimitry Andric } 470*81ad6265SDimitry Andric 471*81ad6265SDimitry Andric return false; 472*81ad6265SDimitry Andric } 4730b57cec5SDimitry Andric 4740b57cec5SDimitry Andric // Returns a minimum wait states since \p I walking all predecessors. 4750b57cec5SDimitry Andric // Only scans until \p IsExpired does not return true. 4760b57cec5SDimitry Andric // Can only be run in a hazard recognizer mode. 477*81ad6265SDimitry Andric static int getWaitStatesSince( 478*81ad6265SDimitry Andric GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB, 479*81ad6265SDimitry Andric MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates, 480*81ad6265SDimitry Andric IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited, 481*81ad6265SDimitry Andric GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) { 4820b57cec5SDimitry Andric for (auto E = MBB->instr_rend(); I != E; ++I) { 4830b57cec5SDimitry Andric // Don't add WaitStates for parent BUNDLE instructions. 4840b57cec5SDimitry Andric if (I->isBundle()) 4850b57cec5SDimitry Andric continue; 4860b57cec5SDimitry Andric 487fe6060f1SDimitry Andric if (IsHazard(*I)) 4880b57cec5SDimitry Andric return WaitStates; 4890b57cec5SDimitry Andric 490349cc55cSDimitry Andric if (I->isInlineAsm()) 4910b57cec5SDimitry Andric continue; 4920b57cec5SDimitry Andric 493*81ad6265SDimitry Andric WaitStates += GetNumWaitStates(*I); 4940b57cec5SDimitry Andric 495fe6060f1SDimitry Andric if (IsExpired(*I, WaitStates)) 4960b57cec5SDimitry Andric return std::numeric_limits<int>::max(); 4970b57cec5SDimitry Andric } 4980b57cec5SDimitry Andric 499fe6060f1SDimitry Andric int MinWaitStates = std::numeric_limits<int>::max(); 5000b57cec5SDimitry Andric for (MachineBasicBlock *Pred : MBB->predecessors()) { 5010b57cec5SDimitry Andric if (!Visited.insert(Pred).second) 5020b57cec5SDimitry Andric continue; 5030b57cec5SDimitry Andric 504*81ad6265SDimitry Andric int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), WaitStates, 505*81ad6265SDimitry Andric IsExpired, Visited, GetNumWaitStates); 5060b57cec5SDimitry Andric 507fe6060f1SDimitry Andric MinWaitStates = std::min(MinWaitStates, W); 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andric return MinWaitStates; 5110b57cec5SDimitry Andric } 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, 514fe6060f1SDimitry Andric const MachineInstr *MI, IsExpiredFn IsExpired) { 5150b57cec5SDimitry Andric DenseSet<const MachineBasicBlock *> Visited; 5160b57cec5SDimitry Andric return getWaitStatesSince(IsHazard, MI->getParent(), 5170b57cec5SDimitry Andric std::next(MI->getReverseIterator()), 5180b57cec5SDimitry Andric 0, IsExpired, Visited); 5190b57cec5SDimitry Andric } 5200b57cec5SDimitry Andric 5210b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) { 5220b57cec5SDimitry Andric if (IsHazardRecognizerMode) { 523fe6060f1SDimitry Andric auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) { 5240b57cec5SDimitry Andric return WaitStates >= Limit; 5250b57cec5SDimitry Andric }; 5260b57cec5SDimitry Andric return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn); 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric int WaitStates = 0; 5300b57cec5SDimitry Andric for (MachineInstr *MI : EmittedInstrs) { 5310b57cec5SDimitry Andric if (MI) { 532fe6060f1SDimitry Andric if (IsHazard(*MI)) 5330b57cec5SDimitry Andric return WaitStates; 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric if (MI->isInlineAsm()) 5360b57cec5SDimitry Andric continue; 5370b57cec5SDimitry Andric } 5380b57cec5SDimitry Andric ++WaitStates; 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric if (WaitStates >= Limit) 5410b57cec5SDimitry Andric break; 5420b57cec5SDimitry Andric } 5430b57cec5SDimitry Andric return std::numeric_limits<int>::max(); 5440b57cec5SDimitry Andric } 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg, 5470b57cec5SDimitry Andric IsHazardFn IsHazardDef, 5480b57cec5SDimitry Andric int Limit) { 5490b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 5500b57cec5SDimitry Andric 551fe6060f1SDimitry Andric auto IsHazardFn = [IsHazardDef, TRI, Reg](const MachineInstr &MI) { 552fe6060f1SDimitry Andric return IsHazardDef(MI) && MI.modifiesRegister(Reg, TRI); 5530b57cec5SDimitry Andric }; 5540b57cec5SDimitry Andric 5550b57cec5SDimitry Andric return getWaitStatesSince(IsHazardFn, Limit); 5560b57cec5SDimitry Andric } 5570b57cec5SDimitry Andric 5580b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard, 5590b57cec5SDimitry Andric int Limit) { 560fe6060f1SDimitry Andric auto IsHazardFn = [IsHazard](const MachineInstr &MI) { 561fe6060f1SDimitry Andric return isSSetReg(MI.getOpcode()) && IsHazard(MI); 5620b57cec5SDimitry Andric }; 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric return getWaitStatesSince(IsHazardFn, Limit); 5650b57cec5SDimitry Andric } 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 5680b57cec5SDimitry Andric // No-op Hazard Detection 5690b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 5700b57cec5SDimitry Andric 571e8d8bef9SDimitry Andric static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV, 572e8d8bef9SDimitry Andric MCRegister Reg) { 5730b57cec5SDimitry Andric for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) 5740b57cec5SDimitry Andric BV.set(*RUI); 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric static void addRegsToSet(const SIRegisterInfo &TRI, 5780b57cec5SDimitry Andric iterator_range<MachineInstr::const_mop_iterator> Ops, 5790b57cec5SDimitry Andric BitVector &Set) { 5800b57cec5SDimitry Andric for (const MachineOperand &Op : Ops) { 5810b57cec5SDimitry Andric if (Op.isReg()) 582e8d8bef9SDimitry Andric addRegUnits(TRI, Set, Op.getReg().asMCReg()); 5830b57cec5SDimitry Andric } 5840b57cec5SDimitry Andric } 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) { 5870b57cec5SDimitry Andric // XXX: Do we need to worry about implicit operands 5880b57cec5SDimitry Andric addRegsToSet(TRI, MI.defs(), ClauseDefs); 5890b57cec5SDimitry Andric addRegsToSet(TRI, MI.uses(), ClauseUses); 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric 5925ffd83dbSDimitry Andric static bool breaksSMEMSoftClause(MachineInstr *MI) { 5935ffd83dbSDimitry Andric return !SIInstrInfo::isSMRD(*MI); 5945ffd83dbSDimitry Andric } 5955ffd83dbSDimitry Andric 5965ffd83dbSDimitry Andric static bool breaksVMEMSoftClause(MachineInstr *MI) { 5975ffd83dbSDimitry Andric return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI); 5985ffd83dbSDimitry Andric } 5995ffd83dbSDimitry Andric 6000b57cec5SDimitry Andric int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { 6010b57cec5SDimitry Andric // SMEM soft clause are only present on VI+, and only matter if xnack is 6020b57cec5SDimitry Andric // enabled. 6030b57cec5SDimitry Andric if (!ST.isXNACKEnabled()) 6040b57cec5SDimitry Andric return 0; 6050b57cec5SDimitry Andric 6060b57cec5SDimitry Andric bool IsSMRD = TII.isSMRD(*MEM); 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric resetClause(); 6090b57cec5SDimitry Andric 6100b57cec5SDimitry Andric // A soft-clause is any group of consecutive SMEM instructions. The 6110b57cec5SDimitry Andric // instructions in this group may return out of order and/or may be 6120b57cec5SDimitry Andric // replayed (i.e. the same instruction issued more than once). 6130b57cec5SDimitry Andric // 6140b57cec5SDimitry Andric // In order to handle these situations correctly we need to make sure that 6150b57cec5SDimitry Andric // when a clause has more than one instruction, no instruction in the clause 6160b57cec5SDimitry Andric // writes to a register that is read by another instruction in the clause 617*81ad6265SDimitry Andric // (including itself). If we encounter this situation, we need to break the 6180b57cec5SDimitry Andric // clause by inserting a non SMEM instruction. 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric for (MachineInstr *MI : EmittedInstrs) { 6210b57cec5SDimitry Andric // When we hit a non-SMEM instruction then we have passed the start of the 6220b57cec5SDimitry Andric // clause and we can stop. 6230b57cec5SDimitry Andric if (!MI) 6240b57cec5SDimitry Andric break; 6250b57cec5SDimitry Andric 6265ffd83dbSDimitry Andric if (IsSMRD ? breaksSMEMSoftClause(MI) : breaksVMEMSoftClause(MI)) 6270b57cec5SDimitry Andric break; 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric addClauseInst(*MI); 6300b57cec5SDimitry Andric } 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric if (ClauseDefs.none()) 6330b57cec5SDimitry Andric return 0; 6340b57cec5SDimitry Andric 6350b57cec5SDimitry Andric // We need to make sure not to put loads and stores in the same clause if they 6360b57cec5SDimitry Andric // use the same address. For now, just start a new clause whenever we see a 6370b57cec5SDimitry Andric // store. 6380b57cec5SDimitry Andric if (MEM->mayStore()) 6390b57cec5SDimitry Andric return 1; 6400b57cec5SDimitry Andric 6410b57cec5SDimitry Andric addClauseInst(*MEM); 6420b57cec5SDimitry Andric 6430b57cec5SDimitry Andric // If the set of defs and uses intersect then we cannot add this instruction 6440b57cec5SDimitry Andric // to the clause, so we have a hazard. 6450b57cec5SDimitry Andric return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0; 6460b57cec5SDimitry Andric } 6470b57cec5SDimitry Andric 6480b57cec5SDimitry Andric int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { 6490b57cec5SDimitry Andric int WaitStatesNeeded = 0; 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric WaitStatesNeeded = checkSoftClauseHazards(SMRD); 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric // This SMRD hazard only affects SI. 6540b57cec5SDimitry Andric if (!ST.hasSMRDReadVALUDefHazard()) 6550b57cec5SDimitry Andric return WaitStatesNeeded; 6560b57cec5SDimitry Andric 6570b57cec5SDimitry Andric // A read of an SGPR by SMRD instruction requires 4 wait states when the 6580b57cec5SDimitry Andric // SGPR was written by a VALU instruction. 6590b57cec5SDimitry Andric int SmrdSgprWaitStates = 4; 660fe6060f1SDimitry Andric auto IsHazardDefFn = [this](const MachineInstr &MI) { 661fe6060f1SDimitry Andric return TII.isVALU(MI); 662fe6060f1SDimitry Andric }; 663fe6060f1SDimitry Andric auto IsBufferHazardDefFn = [this](const MachineInstr &MI) { 664fe6060f1SDimitry Andric return TII.isSALU(MI); 665fe6060f1SDimitry Andric }; 6660b57cec5SDimitry Andric 6670b57cec5SDimitry Andric bool IsBufferSMRD = TII.isBufferSMRD(*SMRD); 6680b57cec5SDimitry Andric 6690b57cec5SDimitry Andric for (const MachineOperand &Use : SMRD->uses()) { 6700b57cec5SDimitry Andric if (!Use.isReg()) 6710b57cec5SDimitry Andric continue; 6720b57cec5SDimitry Andric int WaitStatesNeededForUse = 6730b57cec5SDimitry Andric SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, 6740b57cec5SDimitry Andric SmrdSgprWaitStates); 6750b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 6760b57cec5SDimitry Andric 6770b57cec5SDimitry Andric // This fixes what appears to be undocumented hardware behavior in SI where 6780b57cec5SDimitry Andric // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor 6790b57cec5SDimitry Andric // needs some number of nops in between. We don't know how many we need, but 6800b57cec5SDimitry Andric // let's use 4. This wasn't discovered before probably because the only 6810b57cec5SDimitry Andric // case when this happens is when we expand a 64-bit pointer into a full 6820b57cec5SDimitry Andric // descriptor and use s_buffer_load_dword instead of s_load_dword, which was 6830b57cec5SDimitry Andric // probably never encountered in the closed-source land. 6840b57cec5SDimitry Andric if (IsBufferSMRD) { 6850b57cec5SDimitry Andric int WaitStatesNeededForUse = 6860b57cec5SDimitry Andric SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), 6870b57cec5SDimitry Andric IsBufferHazardDefFn, 6880b57cec5SDimitry Andric SmrdSgprWaitStates); 6890b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 6900b57cec5SDimitry Andric } 6910b57cec5SDimitry Andric } 6920b57cec5SDimitry Andric 6930b57cec5SDimitry Andric return WaitStatesNeeded; 6940b57cec5SDimitry Andric } 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { 6970b57cec5SDimitry Andric if (!ST.hasVMEMReadSGPRVALUDefHazard()) 6980b57cec5SDimitry Andric return 0; 6990b57cec5SDimitry Andric 7000b57cec5SDimitry Andric int WaitStatesNeeded = checkSoftClauseHazards(VMEM); 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric // A read of an SGPR by a VMEM instruction requires 5 wait states when the 7030b57cec5SDimitry Andric // SGPR was written by a VALU Instruction. 7040b57cec5SDimitry Andric const int VmemSgprWaitStates = 5; 705fe6060f1SDimitry Andric auto IsHazardDefFn = [this](const MachineInstr &MI) { 706fe6060f1SDimitry Andric return TII.isVALU(MI); 707fe6060f1SDimitry Andric }; 7080b57cec5SDimitry Andric for (const MachineOperand &Use : VMEM->uses()) { 709fe6060f1SDimitry Andric if (!Use.isReg() || TRI.isVectorRegister(MF.getRegInfo(), Use.getReg())) 7100b57cec5SDimitry Andric continue; 7110b57cec5SDimitry Andric 7120b57cec5SDimitry Andric int WaitStatesNeededForUse = 7130b57cec5SDimitry Andric VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, 7140b57cec5SDimitry Andric VmemSgprWaitStates); 7150b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 7160b57cec5SDimitry Andric } 7170b57cec5SDimitry Andric return WaitStatesNeeded; 7180b57cec5SDimitry Andric } 7190b57cec5SDimitry Andric 7200b57cec5SDimitry Andric int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { 7210b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 7220b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 7230b57cec5SDimitry Andric 7240b57cec5SDimitry Andric // Check for DPP VGPR read after VALU VGPR write and EXEC write. 7250b57cec5SDimitry Andric int DppVgprWaitStates = 2; 7260b57cec5SDimitry Andric int DppExecWaitStates = 5; 7270b57cec5SDimitry Andric int WaitStatesNeeded = 0; 728fe6060f1SDimitry Andric auto IsHazardDefFn = [TII](const MachineInstr &MI) { 729fe6060f1SDimitry Andric return TII->isVALU(MI); 730fe6060f1SDimitry Andric }; 7310b57cec5SDimitry Andric 7320b57cec5SDimitry Andric for (const MachineOperand &Use : DPP->uses()) { 7330b57cec5SDimitry Andric if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 7340b57cec5SDimitry Andric continue; 7350b57cec5SDimitry Andric int WaitStatesNeededForUse = 736fe6060f1SDimitry Andric DppVgprWaitStates - getWaitStatesSinceDef( 737fe6060f1SDimitry Andric Use.getReg(), 738fe6060f1SDimitry Andric [](const MachineInstr &) { return true; }, 7390b57cec5SDimitry Andric DppVgprWaitStates); 7400b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 7410b57cec5SDimitry Andric } 7420b57cec5SDimitry Andric 7430b57cec5SDimitry Andric WaitStatesNeeded = std::max( 7440b57cec5SDimitry Andric WaitStatesNeeded, 7450b57cec5SDimitry Andric DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn, 7460b57cec5SDimitry Andric DppExecWaitStates)); 7470b57cec5SDimitry Andric 7480b57cec5SDimitry Andric return WaitStatesNeeded; 7490b57cec5SDimitry Andric } 7500b57cec5SDimitry Andric 7510b57cec5SDimitry Andric int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { 7520b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 7530b57cec5SDimitry Andric 7540b57cec5SDimitry Andric // v_div_fmas requires 4 wait states after a write to vcc from a VALU 7550b57cec5SDimitry Andric // instruction. 7560b57cec5SDimitry Andric const int DivFMasWaitStates = 4; 757fe6060f1SDimitry Andric auto IsHazardDefFn = [TII](const MachineInstr &MI) { 758fe6060f1SDimitry Andric return TII->isVALU(MI); 759fe6060f1SDimitry Andric }; 7600b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn, 7610b57cec5SDimitry Andric DivFMasWaitStates); 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andric return DivFMasWaitStates - WaitStatesNeeded; 7640b57cec5SDimitry Andric } 7650b57cec5SDimitry Andric 7660b57cec5SDimitry Andric int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) { 7670b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 7680b57cec5SDimitry Andric unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr); 7690b57cec5SDimitry Andric 7700b57cec5SDimitry Andric const int GetRegWaitStates = 2; 771fe6060f1SDimitry Andric auto IsHazardFn = [TII, GetRegHWReg](const MachineInstr &MI) { 772fe6060f1SDimitry Andric return GetRegHWReg == getHWReg(TII, MI); 7730b57cec5SDimitry Andric }; 7740b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates); 7750b57cec5SDimitry Andric 7760b57cec5SDimitry Andric return GetRegWaitStates - WaitStatesNeeded; 7770b57cec5SDimitry Andric } 7780b57cec5SDimitry Andric 7790b57cec5SDimitry Andric int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) { 7800b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 7810b57cec5SDimitry Andric unsigned HWReg = getHWReg(TII, *SetRegInstr); 7820b57cec5SDimitry Andric 7830b57cec5SDimitry Andric const int SetRegWaitStates = ST.getSetRegWaitStates(); 784fe6060f1SDimitry Andric auto IsHazardFn = [TII, HWReg](const MachineInstr &MI) { 785fe6060f1SDimitry Andric return HWReg == getHWReg(TII, MI); 7860b57cec5SDimitry Andric }; 7870b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates); 7880b57cec5SDimitry Andric return SetRegWaitStates - WaitStatesNeeded; 7890b57cec5SDimitry Andric } 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andric int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) { 7920b57cec5SDimitry Andric if (!MI.mayStore()) 7930b57cec5SDimitry Andric return -1; 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 7960b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode(); 7970b57cec5SDimitry Andric const MCInstrDesc &Desc = MI.getDesc(); 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 8000b57cec5SDimitry Andric int VDataRCID = -1; 8010b57cec5SDimitry Andric if (VDataIdx != -1) 8020b57cec5SDimitry Andric VDataRCID = Desc.OpInfo[VDataIdx].RegClass; 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andric if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) { 8050b57cec5SDimitry Andric // There is no hazard if the instruction does not use vector regs 8060b57cec5SDimitry Andric // (like wbinvl1) 8070b57cec5SDimitry Andric if (VDataIdx == -1) 8080b57cec5SDimitry Andric return -1; 8090b57cec5SDimitry Andric // For MUBUF/MTBUF instructions this hazard only exists if the 8100b57cec5SDimitry Andric // instruction is not using a register in the soffset field. 8110b57cec5SDimitry Andric const MachineOperand *SOffset = 8120b57cec5SDimitry Andric TII->getNamedOperand(MI, AMDGPU::OpName::soffset); 8130b57cec5SDimitry Andric // If we have no soffset operand, then assume this field has been 8140b57cec5SDimitry Andric // hardcoded to zero. 8150b57cec5SDimitry Andric if (AMDGPU::getRegBitWidth(VDataRCID) > 64 && 8160b57cec5SDimitry Andric (!SOffset || !SOffset->isReg())) 8170b57cec5SDimitry Andric return VDataIdx; 8180b57cec5SDimitry Andric } 8190b57cec5SDimitry Andric 8200b57cec5SDimitry Andric // MIMG instructions create a hazard if they don't use a 256-bit T# and 8210b57cec5SDimitry Andric // the store size is greater than 8 bytes and they have more than two bits 8220b57cec5SDimitry Andric // of their dmask set. 8230b57cec5SDimitry Andric // All our MIMG definitions use a 256-bit T#, so we can skip checking for them. 8240b57cec5SDimitry Andric if (TII->isMIMG(MI)) { 8250b57cec5SDimitry Andric int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); 8260b57cec5SDimitry Andric assert(SRsrcIdx != -1 && 8270b57cec5SDimitry Andric AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256); 8280b57cec5SDimitry Andric (void)SRsrcIdx; 8290b57cec5SDimitry Andric } 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric if (TII->isFLAT(MI)) { 8320b57cec5SDimitry Andric int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 8330b57cec5SDimitry Andric if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64) 8340b57cec5SDimitry Andric return DataIdx; 8350b57cec5SDimitry Andric } 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andric return -1; 8380b57cec5SDimitry Andric } 8390b57cec5SDimitry Andric 840e8d8bef9SDimitry Andric int 841e8d8bef9SDimitry Andric GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, 8420b57cec5SDimitry Andric const MachineRegisterInfo &MRI) { 8430b57cec5SDimitry Andric // Helper to check for the hazard where VMEM instructions that store more than 8440b57cec5SDimitry Andric // 8 bytes can have there store data over written by the next instruction. 8450b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 8460b57cec5SDimitry Andric 847*81ad6265SDimitry Andric const int VALUWaitStates = ST.hasGFX940Insts() ? 2 : 1; 8480b57cec5SDimitry Andric int WaitStatesNeeded = 0; 8490b57cec5SDimitry Andric 850fe6060f1SDimitry Andric if (!TRI->isVectorRegister(MRI, Def.getReg())) 8510b57cec5SDimitry Andric return WaitStatesNeeded; 8528bcb0991SDimitry Andric Register Reg = Def.getReg(); 853fe6060f1SDimitry Andric auto IsHazardFn = [this, Reg, TRI](const MachineInstr &MI) { 854fe6060f1SDimitry Andric int DataIdx = createsVALUHazard(MI); 8550b57cec5SDimitry Andric return DataIdx >= 0 && 856fe6060f1SDimitry Andric TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg); 8570b57cec5SDimitry Andric }; 8580b57cec5SDimitry Andric int WaitStatesNeededForDef = 8590b57cec5SDimitry Andric VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates); 8600b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 8610b57cec5SDimitry Andric 8620b57cec5SDimitry Andric return WaitStatesNeeded; 8630b57cec5SDimitry Andric } 8640b57cec5SDimitry Andric 8650b57cec5SDimitry Andric int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { 866*81ad6265SDimitry Andric int WaitStatesNeeded = 0; 867*81ad6265SDimitry Andric 868*81ad6265SDimitry Andric if (ST.hasTransForwardingHazard() && !SIInstrInfo::isTRANS(*VALU)) { 869*81ad6265SDimitry Andric const int TransDefWaitstates = 1; 870*81ad6265SDimitry Andric 871*81ad6265SDimitry Andric auto IsTransDefFn = [this, VALU](const MachineInstr &MI) { 872*81ad6265SDimitry Andric if (!SIInstrInfo::isTRANS(MI)) 873*81ad6265SDimitry Andric return false; 874*81ad6265SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 875*81ad6265SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 876*81ad6265SDimitry Andric Register Def = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)->getReg(); 877*81ad6265SDimitry Andric 878*81ad6265SDimitry Andric for (const MachineOperand &Use : VALU->explicit_uses()) { 879*81ad6265SDimitry Andric if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg())) 880*81ad6265SDimitry Andric return true; 881*81ad6265SDimitry Andric } 882*81ad6265SDimitry Andric 883*81ad6265SDimitry Andric return false; 884*81ad6265SDimitry Andric }; 885*81ad6265SDimitry Andric 886*81ad6265SDimitry Andric int WaitStatesNeededForDef = 887*81ad6265SDimitry Andric TransDefWaitstates - 888*81ad6265SDimitry Andric getWaitStatesSince(IsTransDefFn, TransDefWaitstates); 889*81ad6265SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 890*81ad6265SDimitry Andric } 891*81ad6265SDimitry Andric 892*81ad6265SDimitry Andric if (ST.hasDstSelForwardingHazard()) { 893*81ad6265SDimitry Andric const int Shift16DefWaitstates = 1; 894*81ad6265SDimitry Andric 895*81ad6265SDimitry Andric auto IsShift16BitDefFn = [this, VALU](const MachineInstr &MI) { 896*81ad6265SDimitry Andric if (!SIInstrInfo::isVALU(MI)) 897*81ad6265SDimitry Andric return false; 898*81ad6265SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 899*81ad6265SDimitry Andric if (SIInstrInfo::isSDWA(MI)) { 900*81ad6265SDimitry Andric if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel)) 901*81ad6265SDimitry Andric if (DstSel->getImm() == AMDGPU::SDWA::DWORD) 902*81ad6265SDimitry Andric return false; 903*81ad6265SDimitry Andric } else { 904*81ad6265SDimitry Andric if ((AMDGPU::getNamedOperandIdx(MI.getOpcode(), 905*81ad6265SDimitry Andric AMDGPU::OpName::op_sel) == -1) || 906*81ad6265SDimitry Andric !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers) 907*81ad6265SDimitry Andric ->getImm() & 908*81ad6265SDimitry Andric SISrcMods::DST_OP_SEL)) 909*81ad6265SDimitry Andric return false; 910*81ad6265SDimitry Andric } 911*81ad6265SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 912*81ad6265SDimitry Andric if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) { 913*81ad6265SDimitry Andric Register Def = Dst->getReg(); 914*81ad6265SDimitry Andric 915*81ad6265SDimitry Andric for (const MachineOperand &Use : VALU->explicit_uses()) { 916*81ad6265SDimitry Andric if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg())) 917*81ad6265SDimitry Andric return true; 918*81ad6265SDimitry Andric } 919*81ad6265SDimitry Andric } 920*81ad6265SDimitry Andric 921*81ad6265SDimitry Andric return false; 922*81ad6265SDimitry Andric }; 923*81ad6265SDimitry Andric 924*81ad6265SDimitry Andric int WaitStatesNeededForDef = 925*81ad6265SDimitry Andric Shift16DefWaitstates - 926*81ad6265SDimitry Andric getWaitStatesSince(IsShift16BitDefFn, Shift16DefWaitstates); 927*81ad6265SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 928*81ad6265SDimitry Andric } 929*81ad6265SDimitry Andric 930*81ad6265SDimitry Andric if (ST.hasVDecCoExecHazard()) { 931*81ad6265SDimitry Andric const int VALUWriteSGPRVALUReadWaitstates = 2; 932*81ad6265SDimitry Andric const int VALUWriteEXECRWLane = 4; 933*81ad6265SDimitry Andric const int VALUWriteVGPRReadlaneRead = 1; 934*81ad6265SDimitry Andric 935*81ad6265SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 936*81ad6265SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 937*81ad6265SDimitry Andric Register UseReg; 938*81ad6265SDimitry Andric auto IsVALUDefSGPRFn = [&UseReg, TRI](const MachineInstr &MI) { 939*81ad6265SDimitry Andric if (!SIInstrInfo::isVALU(MI)) 940*81ad6265SDimitry Andric return false; 941*81ad6265SDimitry Andric return MI.modifiesRegister(UseReg, TRI); 942*81ad6265SDimitry Andric }; 943*81ad6265SDimitry Andric 944*81ad6265SDimitry Andric for (const MachineOperand &Use : VALU->explicit_uses()) { 945*81ad6265SDimitry Andric if (!Use.isReg()) 946*81ad6265SDimitry Andric continue; 947*81ad6265SDimitry Andric 948*81ad6265SDimitry Andric UseReg = Use.getReg(); 949*81ad6265SDimitry Andric if (TRI->isSGPRReg(MRI, UseReg)) { 950*81ad6265SDimitry Andric int WaitStatesNeededForDef = 951*81ad6265SDimitry Andric VALUWriteSGPRVALUReadWaitstates - 952*81ad6265SDimitry Andric getWaitStatesSince(IsVALUDefSGPRFn, 953*81ad6265SDimitry Andric VALUWriteSGPRVALUReadWaitstates); 954*81ad6265SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 955*81ad6265SDimitry Andric } 956*81ad6265SDimitry Andric } 957*81ad6265SDimitry Andric 958*81ad6265SDimitry Andric if (VALU->readsRegister(AMDGPU::VCC, TRI)) { 959*81ad6265SDimitry Andric UseReg = AMDGPU::VCC; 960*81ad6265SDimitry Andric int WaitStatesNeededForDef = 961*81ad6265SDimitry Andric VALUWriteSGPRVALUReadWaitstates - 962*81ad6265SDimitry Andric getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteSGPRVALUReadWaitstates); 963*81ad6265SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 964*81ad6265SDimitry Andric } 965*81ad6265SDimitry Andric 966*81ad6265SDimitry Andric switch (VALU->getOpcode()) { 967*81ad6265SDimitry Andric case AMDGPU::V_READLANE_B32: 968*81ad6265SDimitry Andric case AMDGPU::V_READFIRSTLANE_B32: { 969*81ad6265SDimitry Andric MachineOperand *Src = TII.getNamedOperand(*VALU, AMDGPU::OpName::src0); 970*81ad6265SDimitry Andric UseReg = Src->getReg(); 971*81ad6265SDimitry Andric int WaitStatesNeededForDef = 972*81ad6265SDimitry Andric VALUWriteVGPRReadlaneRead - 973*81ad6265SDimitry Andric getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteVGPRReadlaneRead); 974*81ad6265SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 975*81ad6265SDimitry Andric } 976*81ad6265SDimitry Andric LLVM_FALLTHROUGH; 977*81ad6265SDimitry Andric case AMDGPU::V_WRITELANE_B32: { 978*81ad6265SDimitry Andric UseReg = AMDGPU::EXEC; 979*81ad6265SDimitry Andric int WaitStatesNeededForDef = 980*81ad6265SDimitry Andric VALUWriteEXECRWLane - 981*81ad6265SDimitry Andric getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteEXECRWLane); 982*81ad6265SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 983*81ad6265SDimitry Andric break; 984*81ad6265SDimitry Andric } 985*81ad6265SDimitry Andric default: 986*81ad6265SDimitry Andric break; 987*81ad6265SDimitry Andric } 988*81ad6265SDimitry Andric } 989*81ad6265SDimitry Andric 9900b57cec5SDimitry Andric // This checks for the hazard where VMEM instructions that store more than 9910b57cec5SDimitry Andric // 8 bytes can have there store data over written by the next instruction. 9920b57cec5SDimitry Andric if (!ST.has12DWordStoreHazard()) 993*81ad6265SDimitry Andric return WaitStatesNeeded; 9940b57cec5SDimitry Andric 9950b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 9960b57cec5SDimitry Andric 9970b57cec5SDimitry Andric for (const MachineOperand &Def : VALU->defs()) { 9980b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI)); 9990b57cec5SDimitry Andric } 10000b57cec5SDimitry Andric 10010b57cec5SDimitry Andric return WaitStatesNeeded; 10020b57cec5SDimitry Andric } 10030b57cec5SDimitry Andric 10040b57cec5SDimitry Andric int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) { 10050b57cec5SDimitry Andric // This checks for hazards associated with inline asm statements. 10060b57cec5SDimitry Andric // Since inline asms can contain just about anything, we use this 10070b57cec5SDimitry Andric // to call/leverage other check*Hazard routines. Note that 10080b57cec5SDimitry Andric // this function doesn't attempt to address all possible inline asm 10090b57cec5SDimitry Andric // hazards (good luck), but is a collection of what has been 10100b57cec5SDimitry Andric // problematic thus far. 10110b57cec5SDimitry Andric 10120b57cec5SDimitry Andric // see checkVALUHazards() 10130b57cec5SDimitry Andric if (!ST.has12DWordStoreHazard()) 10140b57cec5SDimitry Andric return 0; 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 10170b57cec5SDimitry Andric int WaitStatesNeeded = 0; 10180b57cec5SDimitry Andric 10190b57cec5SDimitry Andric for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands(); 10200b57cec5SDimitry Andric I != E; ++I) { 10210b57cec5SDimitry Andric const MachineOperand &Op = IA->getOperand(I); 10220b57cec5SDimitry Andric if (Op.isReg() && Op.isDef()) { 10230b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI)); 10240b57cec5SDimitry Andric } 10250b57cec5SDimitry Andric } 10260b57cec5SDimitry Andric 10270b57cec5SDimitry Andric return WaitStatesNeeded; 10280b57cec5SDimitry Andric } 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andric int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { 10310b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 10320b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 10330b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 10340b57cec5SDimitry Andric 10350b57cec5SDimitry Andric const MachineOperand *LaneSelectOp = 10360b57cec5SDimitry Andric TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1); 10370b57cec5SDimitry Andric 10380b57cec5SDimitry Andric if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg())) 10390b57cec5SDimitry Andric return 0; 10400b57cec5SDimitry Andric 10418bcb0991SDimitry Andric Register LaneSelectReg = LaneSelectOp->getReg(); 1042fe6060f1SDimitry Andric auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVALU(MI); }; 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric const int RWLaneWaitStates = 4; 10450b57cec5SDimitry Andric int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn, 10460b57cec5SDimitry Andric RWLaneWaitStates); 10470b57cec5SDimitry Andric return RWLaneWaitStates - WaitStatesSince; 10480b57cec5SDimitry Andric } 10490b57cec5SDimitry Andric 10500b57cec5SDimitry Andric int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) { 10510b57cec5SDimitry Andric if (!ST.hasRFEHazards()) 10520b57cec5SDimitry Andric return 0; 10530b57cec5SDimitry Andric 10540b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 10550b57cec5SDimitry Andric 10560b57cec5SDimitry Andric const int RFEWaitStates = 1; 10570b57cec5SDimitry Andric 1058fe6060f1SDimitry Andric auto IsHazardFn = [TII](const MachineInstr &MI) { 1059fe6060f1SDimitry Andric return getHWReg(TII, MI) == AMDGPU::Hwreg::ID_TRAPSTS; 10600b57cec5SDimitry Andric }; 10610b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates); 10620b57cec5SDimitry Andric return RFEWaitStates - WaitStatesNeeded; 10630b57cec5SDimitry Andric } 10640b57cec5SDimitry Andric 10650b57cec5SDimitry Andric int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { 10660b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 1067*81ad6265SDimitry Andric const int ReadM0WaitStates = 1; 1068fe6060f1SDimitry Andric auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); }; 1069*81ad6265SDimitry Andric return ReadM0WaitStates - 1070*81ad6265SDimitry Andric getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates); 10710b57cec5SDimitry Andric } 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { 10740b57cec5SDimitry Andric fixVMEMtoScalarWriteHazards(MI); 10750b57cec5SDimitry Andric fixVcmpxPermlaneHazards(MI); 10760b57cec5SDimitry Andric fixSMEMtoVectorWriteHazards(MI); 10770b57cec5SDimitry Andric fixVcmpxExecWARHazard(MI); 10780b57cec5SDimitry Andric fixLdsBranchVmemWARHazard(MI); 1079*81ad6265SDimitry Andric if (ST.hasLdsDirect()) { 1080*81ad6265SDimitry Andric fixLdsDirectVALUHazard(MI); 1081*81ad6265SDimitry Andric fixLdsDirectVMEMHazard(MI); 1082*81ad6265SDimitry Andric } 1083*81ad6265SDimitry Andric fixVALUPartialForwardingHazard(MI); 1084*81ad6265SDimitry Andric fixVALUTransUseHazard(MI); 1085*81ad6265SDimitry Andric fixWMMAHazards(MI); 10860b57cec5SDimitry Andric } 10870b57cec5SDimitry Andric 10880b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { 10890b57cec5SDimitry Andric if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI)) 10900b57cec5SDimitry Andric return false; 10910b57cec5SDimitry Andric 10920b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 1093*81ad6265SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1094*81ad6265SDimitry Andric auto IsHazardFn = [TII, TRI](const MachineInstr &MI) { 1095*81ad6265SDimitry Andric return (TII->isVOPC(MI) || 1096*81ad6265SDimitry Andric ((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) && 1097*81ad6265SDimitry Andric MI.modifiesRegister(AMDGPU::EXEC, TRI); 1098*81ad6265SDimitry Andric }; 10990b57cec5SDimitry Andric 1100fe6060f1SDimitry Andric auto IsExpiredFn = [](const MachineInstr &MI, int) { 1101fe6060f1SDimitry Andric unsigned Opc = MI.getOpcode(); 1102fe6060f1SDimitry Andric return SIInstrInfo::isVALU(MI) && Opc != AMDGPU::V_NOP_e32 && 1103fe6060f1SDimitry Andric Opc != AMDGPU::V_NOP_e64 && Opc != AMDGPU::V_NOP_sdwa; 11040b57cec5SDimitry Andric }; 11050b57cec5SDimitry Andric 11060b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 11070b57cec5SDimitry Andric std::numeric_limits<int>::max()) 11080b57cec5SDimitry Andric return false; 11090b57cec5SDimitry Andric 11100b57cec5SDimitry Andric // V_NOP will be discarded by SQ. 1111*81ad6265SDimitry Andric // Use V_MOV_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE* 11120b57cec5SDimitry Andric // which is always a VGPR and available. 11130b57cec5SDimitry Andric auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); 11148bcb0991SDimitry Andric Register Reg = Src0->getReg(); 11150b57cec5SDimitry Andric bool IsUndef = Src0->isUndef(); 11160b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 11170b57cec5SDimitry Andric TII->get(AMDGPU::V_MOV_B32_e32)) 11180b57cec5SDimitry Andric .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0)) 11190b57cec5SDimitry Andric .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill); 11200b57cec5SDimitry Andric 11210b57cec5SDimitry Andric return true; 11220b57cec5SDimitry Andric } 11230b57cec5SDimitry Andric 11240b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { 11250b57cec5SDimitry Andric if (!ST.hasVMEMtoScalarWriteHazard()) 11260b57cec5SDimitry Andric return false; 11270b57cec5SDimitry Andric 11280b57cec5SDimitry Andric if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI)) 11290b57cec5SDimitry Andric return false; 11300b57cec5SDimitry Andric 11310b57cec5SDimitry Andric if (MI->getNumDefs() == 0) 11320b57cec5SDimitry Andric return false; 11330b57cec5SDimitry Andric 11340b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 11350b57cec5SDimitry Andric 1136fe6060f1SDimitry Andric auto IsHazardFn = [TRI, MI](const MachineInstr &I) { 1137fe6060f1SDimitry Andric if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I) && 1138fe6060f1SDimitry Andric !SIInstrInfo::isFLAT(I)) 11390b57cec5SDimitry Andric return false; 11400b57cec5SDimitry Andric 11410b57cec5SDimitry Andric for (const MachineOperand &Def : MI->defs()) { 1142fe6060f1SDimitry Andric const MachineOperand *Op = 1143fe6060f1SDimitry Andric I.findRegisterUseOperand(Def.getReg(), false, TRI); 11440b57cec5SDimitry Andric if (!Op) 11450b57cec5SDimitry Andric continue; 11460b57cec5SDimitry Andric return true; 11470b57cec5SDimitry Andric } 11480b57cec5SDimitry Andric return false; 11490b57cec5SDimitry Andric }; 11500b57cec5SDimitry Andric 1151fe6060f1SDimitry Andric auto IsExpiredFn = [](const MachineInstr &MI, int) { 1152fe6060f1SDimitry Andric return SIInstrInfo::isVALU(MI) || 1153fe6060f1SDimitry Andric (MI.getOpcode() == AMDGPU::S_WAITCNT && 1154fe6060f1SDimitry Andric !MI.getOperand(0).getImm()) || 1155fe6060f1SDimitry Andric (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && 1156fe6060f1SDimitry Andric MI.getOperand(0).getImm() == 0xffe3); 11570b57cec5SDimitry Andric }; 11580b57cec5SDimitry Andric 11590b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 11600b57cec5SDimitry Andric std::numeric_limits<int>::max()) 11610b57cec5SDimitry Andric return false; 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 1164e8d8bef9SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 1165e8d8bef9SDimitry Andric TII->get(AMDGPU::S_WAITCNT_DEPCTR)) 1166e8d8bef9SDimitry Andric .addImm(0xffe3); 11670b57cec5SDimitry Andric return true; 11680b57cec5SDimitry Andric } 11690b57cec5SDimitry Andric 11700b57cec5SDimitry Andric bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { 11710b57cec5SDimitry Andric if (!ST.hasSMEMtoVectorWriteHazard()) 11720b57cec5SDimitry Andric return false; 11730b57cec5SDimitry Andric 11740b57cec5SDimitry Andric if (!SIInstrInfo::isVALU(*MI)) 11750b57cec5SDimitry Andric return false; 11760b57cec5SDimitry Andric 11770b57cec5SDimitry Andric unsigned SDSTName; 11780b57cec5SDimitry Andric switch (MI->getOpcode()) { 11790b57cec5SDimitry Andric case AMDGPU::V_READLANE_B32: 11800b57cec5SDimitry Andric case AMDGPU::V_READFIRSTLANE_B32: 11810b57cec5SDimitry Andric SDSTName = AMDGPU::OpName::vdst; 11820b57cec5SDimitry Andric break; 11830b57cec5SDimitry Andric default: 11840b57cec5SDimitry Andric SDSTName = AMDGPU::OpName::sdst; 11850b57cec5SDimitry Andric break; 11860b57cec5SDimitry Andric } 11870b57cec5SDimitry Andric 11880b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 11890b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 11900b57cec5SDimitry Andric const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU()); 11910b57cec5SDimitry Andric const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName); 11920b57cec5SDimitry Andric if (!SDST) { 11930b57cec5SDimitry Andric for (const auto &MO : MI->implicit_operands()) { 11940b57cec5SDimitry Andric if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) { 11950b57cec5SDimitry Andric SDST = &MO; 11960b57cec5SDimitry Andric break; 11970b57cec5SDimitry Andric } 11980b57cec5SDimitry Andric } 11990b57cec5SDimitry Andric } 12000b57cec5SDimitry Andric 12010b57cec5SDimitry Andric if (!SDST) 12020b57cec5SDimitry Andric return false; 12030b57cec5SDimitry Andric 12048bcb0991SDimitry Andric const Register SDSTReg = SDST->getReg(); 1205fe6060f1SDimitry Andric auto IsHazardFn = [SDSTReg, TRI](const MachineInstr &I) { 1206fe6060f1SDimitry Andric return SIInstrInfo::isSMRD(I) && I.readsRegister(SDSTReg, TRI); 12070b57cec5SDimitry Andric }; 12080b57cec5SDimitry Andric 1209fe6060f1SDimitry Andric auto IsExpiredFn = [TII, IV](const MachineInstr &MI, int) { 1210fe6060f1SDimitry Andric if (TII->isSALU(MI)) { 1211fe6060f1SDimitry Andric switch (MI.getOpcode()) { 12120b57cec5SDimitry Andric case AMDGPU::S_SETVSKIP: 12130b57cec5SDimitry Andric case AMDGPU::S_VERSION: 12140b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VSCNT: 12150b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VMCNT: 12160b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT: 12170b57cec5SDimitry Andric // These instructions cannot not mitigate the hazard. 12180b57cec5SDimitry Andric return false; 12190b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT: 12200b57cec5SDimitry Andric // Reducing lgkmcnt count to 0 always mitigates the hazard. 1221fe6060f1SDimitry Andric return (MI.getOperand(1).getImm() == 0) && 1222fe6060f1SDimitry Andric (MI.getOperand(0).getReg() == AMDGPU::SGPR_NULL); 12230b57cec5SDimitry Andric case AMDGPU::S_WAITCNT: { 1224fe6060f1SDimitry Andric const int64_t Imm = MI.getOperand(0).getImm(); 12250b57cec5SDimitry Andric AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm); 12260b57cec5SDimitry Andric return (Decoded.LgkmCnt == 0); 12270b57cec5SDimitry Andric } 12280b57cec5SDimitry Andric default: 12290b57cec5SDimitry Andric // SOPP instructions cannot mitigate the hazard. 1230fe6060f1SDimitry Andric if (TII->isSOPP(MI)) 12310b57cec5SDimitry Andric return false; 12320b57cec5SDimitry Andric // At this point the SALU can be assumed to mitigate the hazard 12330b57cec5SDimitry Andric // because either: 12340b57cec5SDimitry Andric // (a) it is independent of the at risk SMEM (breaking chain), 12350b57cec5SDimitry Andric // or 12360b57cec5SDimitry Andric // (b) it is dependent on the SMEM, in which case an appropriate 12370b57cec5SDimitry Andric // s_waitcnt lgkmcnt _must_ exist between it and the at risk 12380b57cec5SDimitry Andric // SMEM instruction. 12390b57cec5SDimitry Andric return true; 12400b57cec5SDimitry Andric } 12410b57cec5SDimitry Andric } 12420b57cec5SDimitry Andric return false; 12430b57cec5SDimitry Andric }; 12440b57cec5SDimitry Andric 12450b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 12460b57cec5SDimitry Andric std::numeric_limits<int>::max()) 12470b57cec5SDimitry Andric return false; 12480b57cec5SDimitry Andric 12490b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 12500b57cec5SDimitry Andric TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL) 12510b57cec5SDimitry Andric .addImm(0); 12520b57cec5SDimitry Andric return true; 12530b57cec5SDimitry Andric } 12540b57cec5SDimitry Andric 12550b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) { 12560b57cec5SDimitry Andric if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI)) 12570b57cec5SDimitry Andric return false; 12580b57cec5SDimitry Andric 12590b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 12600b57cec5SDimitry Andric if (!MI->modifiesRegister(AMDGPU::EXEC, TRI)) 12610b57cec5SDimitry Andric return false; 12620b57cec5SDimitry Andric 1263fe6060f1SDimitry Andric auto IsHazardFn = [TRI](const MachineInstr &I) { 1264fe6060f1SDimitry Andric if (SIInstrInfo::isVALU(I)) 12650b57cec5SDimitry Andric return false; 1266fe6060f1SDimitry Andric return I.readsRegister(AMDGPU::EXEC, TRI); 12670b57cec5SDimitry Andric }; 12680b57cec5SDimitry Andric 12690b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 1270fe6060f1SDimitry Andric auto IsExpiredFn = [TII, TRI](const MachineInstr &MI, int) { 1271fe6060f1SDimitry Andric if (SIInstrInfo::isVALU(MI)) { 1272fe6060f1SDimitry Andric if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) 12730b57cec5SDimitry Andric return true; 1274fe6060f1SDimitry Andric for (auto MO : MI.implicit_operands()) 12750b57cec5SDimitry Andric if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) 12760b57cec5SDimitry Andric return true; 12770b57cec5SDimitry Andric } 1278fe6060f1SDimitry Andric if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && 1279fe6060f1SDimitry Andric (MI.getOperand(0).getImm() & 0xfffe) == 0xfffe) 12800b57cec5SDimitry Andric return true; 12810b57cec5SDimitry Andric return false; 12820b57cec5SDimitry Andric }; 12830b57cec5SDimitry Andric 12840b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 12850b57cec5SDimitry Andric std::numeric_limits<int>::max()) 12860b57cec5SDimitry Andric return false; 12870b57cec5SDimitry Andric 12880b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 12890b57cec5SDimitry Andric TII->get(AMDGPU::S_WAITCNT_DEPCTR)) 12900b57cec5SDimitry Andric .addImm(0xfffe); 12910b57cec5SDimitry Andric return true; 12920b57cec5SDimitry Andric } 12930b57cec5SDimitry Andric 1294fe6060f1SDimitry Andric static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, 1295fe6060f1SDimitry Andric const GCNSubtarget &ST) { 12960b57cec5SDimitry Andric if (!ST.hasLdsBranchVmemWARHazard()) 12970b57cec5SDimitry Andric return false; 12980b57cec5SDimitry Andric 1299fe6060f1SDimitry Andric // Check if the necessary condition for the hazard is met: both LDS and VMEM 1300fe6060f1SDimitry Andric // instructions need to appear in the same function. 1301fe6060f1SDimitry Andric bool HasLds = false; 1302fe6060f1SDimitry Andric bool HasVmem = false; 1303fe6060f1SDimitry Andric for (auto &MBB : MF) { 1304fe6060f1SDimitry Andric for (auto &MI : MBB) { 1305fe6060f1SDimitry Andric HasLds |= SIInstrInfo::isDS(MI); 1306fe6060f1SDimitry Andric HasVmem |= 1307fe6060f1SDimitry Andric SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI); 1308fe6060f1SDimitry Andric if (HasLds && HasVmem) 1309fe6060f1SDimitry Andric return true; 1310fe6060f1SDimitry Andric } 1311fe6060f1SDimitry Andric } 1312fe6060f1SDimitry Andric return false; 1313fe6060f1SDimitry Andric } 1314fe6060f1SDimitry Andric 1315fe6060f1SDimitry Andric bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { 1316fe6060f1SDimitry Andric if (!RunLdsBranchVmemWARHazardFixup) 1317fe6060f1SDimitry Andric return false; 1318fe6060f1SDimitry Andric 1319fe6060f1SDimitry Andric assert(ST.hasLdsBranchVmemWARHazard()); 1320fe6060f1SDimitry Andric 1321fe6060f1SDimitry Andric auto IsHazardInst = [](const MachineInstr &MI) { 1322fe6060f1SDimitry Andric if (SIInstrInfo::isDS(MI)) 13230b57cec5SDimitry Andric return 1; 1324fe6060f1SDimitry Andric if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) 13250b57cec5SDimitry Andric return 2; 13260b57cec5SDimitry Andric return 0; 13270b57cec5SDimitry Andric }; 13280b57cec5SDimitry Andric 1329fe6060f1SDimitry Andric auto InstType = IsHazardInst(*MI); 13300b57cec5SDimitry Andric if (!InstType) 13310b57cec5SDimitry Andric return false; 13320b57cec5SDimitry Andric 1333fe6060f1SDimitry Andric auto IsExpiredFn = [&IsHazardInst](const MachineInstr &I, int) { 1334fe6060f1SDimitry Andric return IsHazardInst(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT && 1335fe6060f1SDimitry Andric I.getOperand(0).getReg() == AMDGPU::SGPR_NULL && 1336fe6060f1SDimitry Andric !I.getOperand(1).getImm()); 13370b57cec5SDimitry Andric }; 13380b57cec5SDimitry Andric 1339fe6060f1SDimitry Andric auto IsHazardFn = [InstType, &IsHazardInst](const MachineInstr &I) { 1340fe6060f1SDimitry Andric if (!I.isBranch()) 13410b57cec5SDimitry Andric return false; 13420b57cec5SDimitry Andric 1343fe6060f1SDimitry Andric auto IsHazardFn = [InstType, IsHazardInst](const MachineInstr &I) { 13440b57cec5SDimitry Andric auto InstType2 = IsHazardInst(I); 13450b57cec5SDimitry Andric return InstType2 && InstType != InstType2; 13460b57cec5SDimitry Andric }; 13470b57cec5SDimitry Andric 1348fe6060f1SDimitry Andric auto IsExpiredFn = [InstType, &IsHazardInst](const MachineInstr &I, int) { 13490b57cec5SDimitry Andric auto InstType2 = IsHazardInst(I); 13500b57cec5SDimitry Andric if (InstType == InstType2) 13510b57cec5SDimitry Andric return true; 13520b57cec5SDimitry Andric 1353fe6060f1SDimitry Andric return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT && 1354fe6060f1SDimitry Andric I.getOperand(0).getReg() == AMDGPU::SGPR_NULL && 1355fe6060f1SDimitry Andric !I.getOperand(1).getImm(); 13560b57cec5SDimitry Andric }; 13570b57cec5SDimitry Andric 1358fe6060f1SDimitry Andric return ::getWaitStatesSince(IsHazardFn, &I, IsExpiredFn) != 13590b57cec5SDimitry Andric std::numeric_limits<int>::max(); 13600b57cec5SDimitry Andric }; 13610b57cec5SDimitry Andric 13620b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 13630b57cec5SDimitry Andric std::numeric_limits<int>::max()) 13640b57cec5SDimitry Andric return false; 13650b57cec5SDimitry Andric 13660b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 13670b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 13680b57cec5SDimitry Andric TII->get(AMDGPU::S_WAITCNT_VSCNT)) 13690b57cec5SDimitry Andric .addReg(AMDGPU::SGPR_NULL, RegState::Undef) 13700b57cec5SDimitry Andric .addImm(0); 13710b57cec5SDimitry Andric 13720b57cec5SDimitry Andric return true; 13730b57cec5SDimitry Andric } 13740b57cec5SDimitry Andric 1375*81ad6265SDimitry Andric bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) { 1376*81ad6265SDimitry Andric if (!SIInstrInfo::isLDSDIR(*MI)) 1377*81ad6265SDimitry Andric return false; 1378*81ad6265SDimitry Andric 1379*81ad6265SDimitry Andric const int NoHazardWaitStates = 15; 1380*81ad6265SDimitry Andric const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst); 1381*81ad6265SDimitry Andric const Register VDSTReg = VDST->getReg(); 1382*81ad6265SDimitry Andric 1383*81ad6265SDimitry Andric bool VisitedTrans = false; 1384*81ad6265SDimitry Andric auto IsHazardFn = [this, VDSTReg, &VisitedTrans](const MachineInstr &I) { 1385*81ad6265SDimitry Andric if (!SIInstrInfo::isVALU(I)) 1386*81ad6265SDimitry Andric return false; 1387*81ad6265SDimitry Andric VisitedTrans = VisitedTrans || SIInstrInfo::isTRANS(I); 1388*81ad6265SDimitry Andric // Cover both WAR and WAW 1389*81ad6265SDimitry Andric return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI); 1390*81ad6265SDimitry Andric }; 1391*81ad6265SDimitry Andric auto IsExpiredFn = [&](const MachineInstr &I, int WaitStates) { 1392*81ad6265SDimitry Andric if (WaitStates >= NoHazardWaitStates) 1393*81ad6265SDimitry Andric return true; 1394*81ad6265SDimitry Andric // Instructions which cause va_vdst==0 expire hazard 1395*81ad6265SDimitry Andric return SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) || 1396*81ad6265SDimitry Andric SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I); 1397*81ad6265SDimitry Andric }; 1398*81ad6265SDimitry Andric auto GetWaitStatesFn = [](const MachineInstr &MI) { 1399*81ad6265SDimitry Andric return SIInstrInfo::isVALU(MI) ? 1 : 0; 1400*81ad6265SDimitry Andric }; 1401*81ad6265SDimitry Andric 1402*81ad6265SDimitry Andric DenseSet<const MachineBasicBlock *> Visited; 1403*81ad6265SDimitry Andric auto Count = ::getWaitStatesSince(IsHazardFn, MI->getParent(), 1404*81ad6265SDimitry Andric std::next(MI->getReverseIterator()), 0, 1405*81ad6265SDimitry Andric IsExpiredFn, Visited, GetWaitStatesFn); 1406*81ad6265SDimitry Andric 1407*81ad6265SDimitry Andric // Transcendentals can execute in parallel to other VALUs. 1408*81ad6265SDimitry Andric // This makes va_vdst count unusable with a mixture of VALU and TRANS. 1409*81ad6265SDimitry Andric if (VisitedTrans) 1410*81ad6265SDimitry Andric Count = 0; 1411*81ad6265SDimitry Andric 1412*81ad6265SDimitry Andric MachineOperand *WaitVdstOp = 1413*81ad6265SDimitry Andric TII.getNamedOperand(*MI, AMDGPU::OpName::waitvdst); 1414*81ad6265SDimitry Andric WaitVdstOp->setImm(std::min(Count, NoHazardWaitStates)); 1415*81ad6265SDimitry Andric 1416*81ad6265SDimitry Andric return true; 1417*81ad6265SDimitry Andric } 1418*81ad6265SDimitry Andric 1419*81ad6265SDimitry Andric bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) { 1420*81ad6265SDimitry Andric if (!SIInstrInfo::isLDSDIR(*MI)) 1421*81ad6265SDimitry Andric return false; 1422*81ad6265SDimitry Andric 1423*81ad6265SDimitry Andric const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst); 1424*81ad6265SDimitry Andric const Register VDSTReg = VDST->getReg(); 1425*81ad6265SDimitry Andric 1426*81ad6265SDimitry Andric auto IsHazardFn = [this, VDSTReg](const MachineInstr &I) { 1427*81ad6265SDimitry Andric if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I) && 1428*81ad6265SDimitry Andric !SIInstrInfo::isDS(I)) 1429*81ad6265SDimitry Andric return false; 1430*81ad6265SDimitry Andric return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI); 1431*81ad6265SDimitry Andric }; 1432*81ad6265SDimitry Andric auto IsExpiredFn = [](const MachineInstr &I, int) { 1433*81ad6265SDimitry Andric return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) || 1434*81ad6265SDimitry Andric (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) || 1435*81ad6265SDimitry Andric (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && 1436*81ad6265SDimitry Andric I.getOperand(0).getImm() == 0xffe3); 1437*81ad6265SDimitry Andric }; 1438*81ad6265SDimitry Andric 1439*81ad6265SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 1440*81ad6265SDimitry Andric std::numeric_limits<int>::max()) 1441*81ad6265SDimitry Andric return false; 1442*81ad6265SDimitry Andric 1443*81ad6265SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 1444*81ad6265SDimitry Andric TII.get(AMDGPU::S_WAITCNT_DEPCTR)) 1445*81ad6265SDimitry Andric .addImm(0xffe3); 1446*81ad6265SDimitry Andric 1447*81ad6265SDimitry Andric return true; 1448*81ad6265SDimitry Andric } 1449*81ad6265SDimitry Andric 1450*81ad6265SDimitry Andric bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { 1451*81ad6265SDimitry Andric if (!ST.isWave64()) 1452*81ad6265SDimitry Andric return false; 1453*81ad6265SDimitry Andric if (!ST.hasVALUPartialForwardingHazard()) 1454*81ad6265SDimitry Andric return false; 1455*81ad6265SDimitry Andric if (!SIInstrInfo::isVALU(*MI)) 1456*81ad6265SDimitry Andric return false; 1457*81ad6265SDimitry Andric 1458*81ad6265SDimitry Andric SmallSetVector<Register, 4> SrcVGPRs; 1459*81ad6265SDimitry Andric 1460*81ad6265SDimitry Andric for (const MachineOperand &Use : MI->explicit_uses()) { 1461*81ad6265SDimitry Andric if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 1462*81ad6265SDimitry Andric SrcVGPRs.insert(Use.getReg()); 1463*81ad6265SDimitry Andric } 1464*81ad6265SDimitry Andric 1465*81ad6265SDimitry Andric // Only applies with >= 2 unique VGPR sources 1466*81ad6265SDimitry Andric if (SrcVGPRs.size() <= 1) 1467*81ad6265SDimitry Andric return false; 1468*81ad6265SDimitry Andric 1469*81ad6265SDimitry Andric // Look for the following pattern: 1470*81ad6265SDimitry Andric // Va <- VALU [PreExecPos] 1471*81ad6265SDimitry Andric // intv1 1472*81ad6265SDimitry Andric // Exec <- SALU [ExecPos] 1473*81ad6265SDimitry Andric // intv2 1474*81ad6265SDimitry Andric // Vb <- VALU [PostExecPos] 1475*81ad6265SDimitry Andric // intv3 1476*81ad6265SDimitry Andric // MI Va, Vb (WaitState = 0) 1477*81ad6265SDimitry Andric // 1478*81ad6265SDimitry Andric // Where: 1479*81ad6265SDimitry Andric // intv1 + intv2 <= 2 VALUs 1480*81ad6265SDimitry Andric // intv3 <= 4 VALUs 1481*81ad6265SDimitry Andric // 1482*81ad6265SDimitry Andric // If found, insert an appropriate S_WAITCNT_DEPCTR before MI. 1483*81ad6265SDimitry Andric 1484*81ad6265SDimitry Andric const int Intv1plus2MaxVALUs = 2; 1485*81ad6265SDimitry Andric const int Intv3MaxVALUs = 4; 1486*81ad6265SDimitry Andric const int IntvMaxVALUs = 6; 1487*81ad6265SDimitry Andric const int NoHazardVALUWaitStates = IntvMaxVALUs + 2; 1488*81ad6265SDimitry Andric 1489*81ad6265SDimitry Andric struct StateType { 1490*81ad6265SDimitry Andric SmallDenseMap<Register, int, 4> DefPos; 1491*81ad6265SDimitry Andric int ExecPos = std::numeric_limits<int>::max(); 1492*81ad6265SDimitry Andric int VALUs = 0; 1493*81ad6265SDimitry Andric }; 1494*81ad6265SDimitry Andric 1495*81ad6265SDimitry Andric StateType State; 1496*81ad6265SDimitry Andric 1497*81ad6265SDimitry Andric // This overloads expiry testing with all the hazard detection 1498*81ad6265SDimitry Andric auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) { 1499*81ad6265SDimitry Andric // Too many VALU states have passed 1500*81ad6265SDimitry Andric if (State.VALUs > NoHazardVALUWaitStates) 1501*81ad6265SDimitry Andric return HazardExpired; 1502*81ad6265SDimitry Andric 1503*81ad6265SDimitry Andric // Instructions which cause va_vdst==0 expire hazard 1504*81ad6265SDimitry Andric if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) || 1505*81ad6265SDimitry Andric SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) || 1506*81ad6265SDimitry Andric (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && 1507*81ad6265SDimitry Andric I.getOperand(0).getImm() == 0x0fff)) 1508*81ad6265SDimitry Andric return HazardExpired; 1509*81ad6265SDimitry Andric 1510*81ad6265SDimitry Andric // Track registers writes 1511*81ad6265SDimitry Andric bool Changed = false; 1512*81ad6265SDimitry Andric if (SIInstrInfo::isVALU(I)) { 1513*81ad6265SDimitry Andric for (Register Src : SrcVGPRs) { 1514*81ad6265SDimitry Andric if (!State.DefPos.count(Src) && I.modifiesRegister(Src, &TRI)) { 1515*81ad6265SDimitry Andric State.DefPos[Src] = State.VALUs; 1516*81ad6265SDimitry Andric Changed = true; 1517*81ad6265SDimitry Andric } 1518*81ad6265SDimitry Andric } 1519*81ad6265SDimitry Andric } else if (SIInstrInfo::isSALU(I)) { 1520*81ad6265SDimitry Andric if (State.ExecPos == std::numeric_limits<int>::max()) { 1521*81ad6265SDimitry Andric if (!State.DefPos.empty() && I.modifiesRegister(AMDGPU::EXEC, &TRI)) { 1522*81ad6265SDimitry Andric State.ExecPos = State.VALUs; 1523*81ad6265SDimitry Andric Changed = true; 1524*81ad6265SDimitry Andric } 1525*81ad6265SDimitry Andric } 1526*81ad6265SDimitry Andric } 1527*81ad6265SDimitry Andric 1528*81ad6265SDimitry Andric // Early expiration: too many VALUs in intv3 1529*81ad6265SDimitry Andric if (State.VALUs > Intv3MaxVALUs && State.DefPos.empty()) 1530*81ad6265SDimitry Andric return HazardExpired; 1531*81ad6265SDimitry Andric 1532*81ad6265SDimitry Andric // Only evaluate state if something changed 1533*81ad6265SDimitry Andric if (!Changed) 1534*81ad6265SDimitry Andric return NoHazardFound; 1535*81ad6265SDimitry Andric 1536*81ad6265SDimitry Andric // Determine positions of VALUs pre/post exec change 1537*81ad6265SDimitry Andric if (State.ExecPos == std::numeric_limits<int>::max()) 1538*81ad6265SDimitry Andric return NoHazardFound; 1539*81ad6265SDimitry Andric 1540*81ad6265SDimitry Andric int PreExecPos = std::numeric_limits<int>::max(); 1541*81ad6265SDimitry Andric int PostExecPos = std::numeric_limits<int>::max(); 1542*81ad6265SDimitry Andric 1543*81ad6265SDimitry Andric for (auto Entry : State.DefPos) { 1544*81ad6265SDimitry Andric int DefVALUs = Entry.second; 1545*81ad6265SDimitry Andric if (DefVALUs != std::numeric_limits<int>::max()) { 1546*81ad6265SDimitry Andric if (DefVALUs >= State.ExecPos) 1547*81ad6265SDimitry Andric PreExecPos = std::min(PreExecPos, DefVALUs); 1548*81ad6265SDimitry Andric else if (DefVALUs < State.ExecPos) 1549*81ad6265SDimitry Andric PostExecPos = std::min(PostExecPos, DefVALUs); 1550*81ad6265SDimitry Andric } 1551*81ad6265SDimitry Andric } 1552*81ad6265SDimitry Andric 1553*81ad6265SDimitry Andric // Need a VALUs post exec change 1554*81ad6265SDimitry Andric if (PostExecPos == std::numeric_limits<int>::max()) 1555*81ad6265SDimitry Andric return NoHazardFound; 1556*81ad6265SDimitry Andric 1557*81ad6265SDimitry Andric // Too many VALUs in intv3? 1558*81ad6265SDimitry Andric int Intv3VALUs = PostExecPos; 1559*81ad6265SDimitry Andric if (Intv3VALUs > Intv3MaxVALUs) 1560*81ad6265SDimitry Andric return HazardExpired; 1561*81ad6265SDimitry Andric 1562*81ad6265SDimitry Andric // Too many VALUs in intv2? 1563*81ad6265SDimitry Andric int Intv2VALUs = (State.ExecPos - PostExecPos) - 1; 1564*81ad6265SDimitry Andric if (Intv2VALUs > Intv1plus2MaxVALUs) 1565*81ad6265SDimitry Andric return HazardExpired; 1566*81ad6265SDimitry Andric 1567*81ad6265SDimitry Andric // Need a VALUs pre exec change 1568*81ad6265SDimitry Andric if (PreExecPos == std::numeric_limits<int>::max()) 1569*81ad6265SDimitry Andric return NoHazardFound; 1570*81ad6265SDimitry Andric 1571*81ad6265SDimitry Andric // Too many VALUs in intv1? 1572*81ad6265SDimitry Andric int Intv1VALUs = PreExecPos - State.ExecPos; 1573*81ad6265SDimitry Andric if (Intv1VALUs > Intv1plus2MaxVALUs) 1574*81ad6265SDimitry Andric return HazardExpired; 1575*81ad6265SDimitry Andric 1576*81ad6265SDimitry Andric // Too many VALUs in intv1 + intv2 1577*81ad6265SDimitry Andric if (Intv1VALUs + Intv2VALUs > Intv1plus2MaxVALUs) 1578*81ad6265SDimitry Andric return HazardExpired; 1579*81ad6265SDimitry Andric 1580*81ad6265SDimitry Andric return HazardFound; 1581*81ad6265SDimitry Andric }; 1582*81ad6265SDimitry Andric auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) { 1583*81ad6265SDimitry Andric if (SIInstrInfo::isVALU(MI)) 1584*81ad6265SDimitry Andric State.VALUs += 1; 1585*81ad6265SDimitry Andric }; 1586*81ad6265SDimitry Andric 1587*81ad6265SDimitry Andric DenseSet<const MachineBasicBlock *> Visited; 1588*81ad6265SDimitry Andric if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(), 1589*81ad6265SDimitry Andric std::next(MI->getReverseIterator()), Visited)) 1590*81ad6265SDimitry Andric return false; 1591*81ad6265SDimitry Andric 1592*81ad6265SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 1593*81ad6265SDimitry Andric TII.get(AMDGPU::S_WAITCNT_DEPCTR)) 1594*81ad6265SDimitry Andric .addImm(0x0fff); 1595*81ad6265SDimitry Andric 1596*81ad6265SDimitry Andric return true; 1597*81ad6265SDimitry Andric } 1598*81ad6265SDimitry Andric 1599*81ad6265SDimitry Andric bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { 1600*81ad6265SDimitry Andric if (!ST.hasVALUTransUseHazard()) 1601*81ad6265SDimitry Andric return false; 1602*81ad6265SDimitry Andric if (!SIInstrInfo::isVALU(*MI)) 1603*81ad6265SDimitry Andric return false; 1604*81ad6265SDimitry Andric 1605*81ad6265SDimitry Andric SmallSet<Register, 4> SrcVGPRs; 1606*81ad6265SDimitry Andric 1607*81ad6265SDimitry Andric for (const MachineOperand &Use : MI->explicit_uses()) { 1608*81ad6265SDimitry Andric if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 1609*81ad6265SDimitry Andric SrcVGPRs.insert(Use.getReg()); 1610*81ad6265SDimitry Andric } 1611*81ad6265SDimitry Andric 1612*81ad6265SDimitry Andric // Look for the following pattern: 1613*81ad6265SDimitry Andric // Va <- TRANS VALU 1614*81ad6265SDimitry Andric // intv 1615*81ad6265SDimitry Andric // MI Va (WaitState = 0) 1616*81ad6265SDimitry Andric // 1617*81ad6265SDimitry Andric // Where: 1618*81ad6265SDimitry Andric // intv <= 5 VALUs / 1 TRANS 1619*81ad6265SDimitry Andric // 1620*81ad6265SDimitry Andric // If found, insert an appropriate S_WAITCNT_DEPCTR before MI. 1621*81ad6265SDimitry Andric 1622*81ad6265SDimitry Andric const int IntvMaxVALUs = 5; 1623*81ad6265SDimitry Andric const int IntvMaxTRANS = 1; 1624*81ad6265SDimitry Andric 1625*81ad6265SDimitry Andric struct StateType { 1626*81ad6265SDimitry Andric int VALUs = 0; 1627*81ad6265SDimitry Andric int TRANS = 0; 1628*81ad6265SDimitry Andric }; 1629*81ad6265SDimitry Andric 1630*81ad6265SDimitry Andric StateType State; 1631*81ad6265SDimitry Andric 1632*81ad6265SDimitry Andric // This overloads expiry testing with all the hazard detection 1633*81ad6265SDimitry Andric auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) { 1634*81ad6265SDimitry Andric // Too many VALU states have passed 1635*81ad6265SDimitry Andric if (State.VALUs > IntvMaxVALUs || State.TRANS > IntvMaxTRANS) 1636*81ad6265SDimitry Andric return HazardExpired; 1637*81ad6265SDimitry Andric 1638*81ad6265SDimitry Andric // Instructions which cause va_vdst==0 expire hazard 1639*81ad6265SDimitry Andric if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) || 1640*81ad6265SDimitry Andric SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) || 1641*81ad6265SDimitry Andric (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && 1642*81ad6265SDimitry Andric I.getOperand(0).getImm() == 0x0fff)) 1643*81ad6265SDimitry Andric return HazardExpired; 1644*81ad6265SDimitry Andric 1645*81ad6265SDimitry Andric // Track registers writes 1646*81ad6265SDimitry Andric if (SIInstrInfo::isTRANS(I)) { 1647*81ad6265SDimitry Andric for (Register Src : SrcVGPRs) { 1648*81ad6265SDimitry Andric if (I.modifiesRegister(Src, &TRI)) { 1649*81ad6265SDimitry Andric return HazardFound; 1650*81ad6265SDimitry Andric } 1651*81ad6265SDimitry Andric } 1652*81ad6265SDimitry Andric } 1653*81ad6265SDimitry Andric 1654*81ad6265SDimitry Andric return NoHazardFound; 1655*81ad6265SDimitry Andric }; 1656*81ad6265SDimitry Andric auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) { 1657*81ad6265SDimitry Andric if (SIInstrInfo::isVALU(MI)) 1658*81ad6265SDimitry Andric State.VALUs += 1; 1659*81ad6265SDimitry Andric if (SIInstrInfo::isTRANS(MI)) 1660*81ad6265SDimitry Andric State.TRANS += 1; 1661*81ad6265SDimitry Andric }; 1662*81ad6265SDimitry Andric 1663*81ad6265SDimitry Andric DenseSet<const MachineBasicBlock *> Visited; 1664*81ad6265SDimitry Andric if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(), 1665*81ad6265SDimitry Andric std::next(MI->getReverseIterator()), Visited)) 1666*81ad6265SDimitry Andric return false; 1667*81ad6265SDimitry Andric 1668*81ad6265SDimitry Andric // Hazard is observed - insert a wait on va_dst counter to ensure hazard is 1669*81ad6265SDimitry Andric // avoided (mask 0x0fff achieves this). 1670*81ad6265SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 1671*81ad6265SDimitry Andric TII.get(AMDGPU::S_WAITCNT_DEPCTR)) 1672*81ad6265SDimitry Andric .addImm(0x0fff); 1673*81ad6265SDimitry Andric 1674*81ad6265SDimitry Andric return true; 1675*81ad6265SDimitry Andric } 1676*81ad6265SDimitry Andric 1677*81ad6265SDimitry Andric bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) { 1678*81ad6265SDimitry Andric if (!SIInstrInfo::isWMMA(*MI)) 1679*81ad6265SDimitry Andric return false; 1680*81ad6265SDimitry Andric 1681*81ad6265SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 1682*81ad6265SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1683*81ad6265SDimitry Andric 1684*81ad6265SDimitry Andric auto IsHazardFn = [MI, TII, TRI](const MachineInstr &I) { 1685*81ad6265SDimitry Andric if (!SIInstrInfo::isWMMA(I)) 1686*81ad6265SDimitry Andric return false; 1687*81ad6265SDimitry Andric 1688*81ad6265SDimitry Andric // Src0 or Src1 of the current wmma instruction overlaps with the dest of 1689*81ad6265SDimitry Andric // the previous wmma. 1690*81ad6265SDimitry Andric const Register CurSrc0Reg = 1691*81ad6265SDimitry Andric TII->getNamedOperand(*MI, AMDGPU::OpName::src0)->getReg(); 1692*81ad6265SDimitry Andric const Register CurSrc1Reg = 1693*81ad6265SDimitry Andric TII->getNamedOperand(*MI, AMDGPU::OpName::src1)->getReg(); 1694*81ad6265SDimitry Andric 1695*81ad6265SDimitry Andric const Register PrevDstReg = 1696*81ad6265SDimitry Andric TII->getNamedOperand(I, AMDGPU::OpName::vdst)->getReg(); 1697*81ad6265SDimitry Andric 1698*81ad6265SDimitry Andric if (TRI->regsOverlap(PrevDstReg, CurSrc0Reg) || 1699*81ad6265SDimitry Andric TRI->regsOverlap(PrevDstReg, CurSrc1Reg)) { 1700*81ad6265SDimitry Andric return true; 1701*81ad6265SDimitry Andric } 1702*81ad6265SDimitry Andric 1703*81ad6265SDimitry Andric // Src2 of the current wmma instruction overlaps with the dest of the 1704*81ad6265SDimitry Andric // previous wmma. 1705*81ad6265SDimitry Andric const MachineOperand *Src2 = 1706*81ad6265SDimitry Andric TII->getNamedOperand(*MI, AMDGPU::OpName::src2); 1707*81ad6265SDimitry Andric const Register CurSrc2Reg = Src2->isReg() ? Src2->getReg() : Register(); 1708*81ad6265SDimitry Andric 1709*81ad6265SDimitry Andric if (CurSrc2Reg != AMDGPU::NoRegister && 1710*81ad6265SDimitry Andric TRI->regsOverlap(PrevDstReg, CurSrc2Reg)) { 1711*81ad6265SDimitry Andric 1712*81ad6265SDimitry Andric const MachineOperand *Src2Mods = 1713*81ad6265SDimitry Andric TII->getNamedOperand(*MI, AMDGPU::OpName::src2_modifiers); 1714*81ad6265SDimitry Andric const bool NoSrc2Mods = 1715*81ad6265SDimitry Andric (Src2Mods->getImm() & (SISrcMods::NEG | SISrcMods::NEG_HI)) == 0; 1716*81ad6265SDimitry Andric // Exception: there is no hazard if the wmma instructions are of the same 1717*81ad6265SDimitry Andric // type and there is no input modifier on src2 of the current instruction. 1718*81ad6265SDimitry Andric return !(NoSrc2Mods && (TII->pseudoToMCOpcode(I.getOpcode()) == 1719*81ad6265SDimitry Andric TII->pseudoToMCOpcode(MI->getOpcode()))); 1720*81ad6265SDimitry Andric } 1721*81ad6265SDimitry Andric 1722*81ad6265SDimitry Andric return false; 1723*81ad6265SDimitry Andric }; 1724*81ad6265SDimitry Andric 1725*81ad6265SDimitry Andric auto IsExpiredFn = [](const MachineInstr &I, int) { 1726*81ad6265SDimitry Andric return SIInstrInfo::isVALU(I); 1727*81ad6265SDimitry Andric }; 1728*81ad6265SDimitry Andric 1729*81ad6265SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 1730*81ad6265SDimitry Andric std::numeric_limits<int>::max()) 1731*81ad6265SDimitry Andric return false; 1732*81ad6265SDimitry Andric 1733*81ad6265SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32)); 1734*81ad6265SDimitry Andric 1735*81ad6265SDimitry Andric return true; 1736*81ad6265SDimitry Andric } 1737*81ad6265SDimitry Andric 17380b57cec5SDimitry Andric int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) { 17390b57cec5SDimitry Andric int NSAtoVMEMWaitStates = 1; 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric if (!ST.hasNSAtoVMEMBug()) 17420b57cec5SDimitry Andric return 0; 17430b57cec5SDimitry Andric 17440b57cec5SDimitry Andric if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI)) 17450b57cec5SDimitry Andric return 0; 17460b57cec5SDimitry Andric 17470b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 17480b57cec5SDimitry Andric const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset); 17490b57cec5SDimitry Andric if (!Offset || (Offset->getImm() & 6) == 0) 17500b57cec5SDimitry Andric return 0; 17510b57cec5SDimitry Andric 1752fe6060f1SDimitry Andric auto IsHazardFn = [TII](const MachineInstr &I) { 1753fe6060f1SDimitry Andric if (!SIInstrInfo::isMIMG(I)) 17540b57cec5SDimitry Andric return false; 1755fe6060f1SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I.getOpcode()); 17560b57cec5SDimitry Andric return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA && 1757fe6060f1SDimitry Andric TII->getInstSizeInBytes(I) >= 16; 17580b57cec5SDimitry Andric }; 17590b57cec5SDimitry Andric 17600b57cec5SDimitry Andric return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1); 17610b57cec5SDimitry Andric } 17620b57cec5SDimitry Andric 17630b57cec5SDimitry Andric int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) { 17640b57cec5SDimitry Andric int FPAtomicToDenormModeWaitStates = 3; 17650b57cec5SDimitry Andric 17660b57cec5SDimitry Andric if (MI->getOpcode() != AMDGPU::S_DENORM_MODE) 17670b57cec5SDimitry Andric return 0; 17680b57cec5SDimitry Andric 1769fe6060f1SDimitry Andric auto IsHazardFn = [](const MachineInstr &I) { 1770fe6060f1SDimitry Andric if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I)) 17710b57cec5SDimitry Andric return false; 1772fe6060f1SDimitry Andric return SIInstrInfo::isFPAtomic(I); 17730b57cec5SDimitry Andric }; 17740b57cec5SDimitry Andric 1775fe6060f1SDimitry Andric auto IsExpiredFn = [](const MachineInstr &MI, int WaitStates) { 1776fe6060f1SDimitry Andric if (WaitStates >= 3 || SIInstrInfo::isVALU(MI)) 17770b57cec5SDimitry Andric return true; 17780b57cec5SDimitry Andric 1779fe6060f1SDimitry Andric switch (MI.getOpcode()) { 17800b57cec5SDimitry Andric case AMDGPU::S_WAITCNT: 17810b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VSCNT: 17820b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VMCNT: 17830b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT: 17840b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT: 1785e8d8bef9SDimitry Andric case AMDGPU::S_WAIT_IDLE: 17860b57cec5SDimitry Andric return true; 17870b57cec5SDimitry Andric default: 17880b57cec5SDimitry Andric break; 17890b57cec5SDimitry Andric } 17900b57cec5SDimitry Andric 17910b57cec5SDimitry Andric return false; 17920b57cec5SDimitry Andric }; 17930b57cec5SDimitry Andric 17940b57cec5SDimitry Andric return FPAtomicToDenormModeWaitStates - 17950b57cec5SDimitry Andric ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn); 17960b57cec5SDimitry Andric } 17970b57cec5SDimitry Andric 17980b57cec5SDimitry Andric int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { 17990b57cec5SDimitry Andric assert(SIInstrInfo::isMAI(*MI)); 18000b57cec5SDimitry Andric 1801fe6060f1SDimitry Andric return ST.hasGFX90AInsts() ? checkMAIHazards90A(MI) : checkMAIHazards908(MI); 1802fe6060f1SDimitry Andric } 1803fe6060f1SDimitry Andric 1804*81ad6265SDimitry Andric int GCNHazardRecognizer::checkMFMAPadding(MachineInstr *MI) { 1805*81ad6265SDimitry Andric // Early exit if no padding is requested. 1806*81ad6265SDimitry Andric if (MFMAPaddingRatio == 0) 1807*81ad6265SDimitry Andric return 0; 1808*81ad6265SDimitry Andric 1809*81ad6265SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1810*81ad6265SDimitry Andric if (!SIInstrInfo::isMFMA(*MI) || MFI->getOccupancy() < 2) 1811*81ad6265SDimitry Andric return 0; 1812*81ad6265SDimitry Andric 1813*81ad6265SDimitry Andric int NeighborMFMALatency = 0; 1814*81ad6265SDimitry Andric auto IsNeighboringMFMA = [&NeighborMFMALatency, 1815*81ad6265SDimitry Andric this](const MachineInstr &MI) { 1816*81ad6265SDimitry Andric if (!SIInstrInfo::isMFMA(MI)) 1817*81ad6265SDimitry Andric return false; 1818*81ad6265SDimitry Andric 1819*81ad6265SDimitry Andric NeighborMFMALatency = this->getMFMAPipelineWaitStates(MI); 1820*81ad6265SDimitry Andric return true; 1821*81ad6265SDimitry Andric }; 1822*81ad6265SDimitry Andric 1823*81ad6265SDimitry Andric const int MaxMFMAPipelineWaitStates = 16; 1824*81ad6265SDimitry Andric int WaitStatesSinceNeighborMFMA = 1825*81ad6265SDimitry Andric getWaitStatesSince(IsNeighboringMFMA, MaxMFMAPipelineWaitStates); 1826*81ad6265SDimitry Andric 1827*81ad6265SDimitry Andric int NeighborMFMAPaddingNeeded = 1828*81ad6265SDimitry Andric (NeighborMFMALatency * MFMAPaddingRatio / 100) - 1829*81ad6265SDimitry Andric WaitStatesSinceNeighborMFMA; 1830*81ad6265SDimitry Andric 1831*81ad6265SDimitry Andric return std::max(0, NeighborMFMAPaddingNeeded); 1832*81ad6265SDimitry Andric } 1833*81ad6265SDimitry Andric 1834fe6060f1SDimitry Andric int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { 18350b57cec5SDimitry Andric int WaitStatesNeeded = 0; 18360b57cec5SDimitry Andric unsigned Opc = MI->getOpcode(); 18370b57cec5SDimitry Andric 1838fe6060f1SDimitry Andric auto IsVALUFn = [](const MachineInstr &MI) { 1839fe6060f1SDimitry Andric return SIInstrInfo::isVALU(MI); 18400b57cec5SDimitry Andric }; 18410b57cec5SDimitry Andric 1842e8d8bef9SDimitry Andric if (Opc != AMDGPU::V_ACCVGPR_READ_B32_e64) { // MFMA or v_accvgpr_write 18430b57cec5SDimitry Andric const int LegacyVALUWritesVGPRWaitStates = 2; 18440b57cec5SDimitry Andric const int VALUWritesExecWaitStates = 4; 18450b57cec5SDimitry Andric const int MaxWaitStates = 4; 18460b57cec5SDimitry Andric 18470b57cec5SDimitry Andric int WaitStatesNeededForUse = VALUWritesExecWaitStates - 18480b57cec5SDimitry Andric getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates); 18490b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 18500b57cec5SDimitry Andric 18510b57cec5SDimitry Andric if (WaitStatesNeeded < MaxWaitStates) { 18520b57cec5SDimitry Andric for (const MachineOperand &Use : MI->explicit_uses()) { 18530b57cec5SDimitry Andric const int MaxWaitStates = 2; 18540b57cec5SDimitry Andric 18550b57cec5SDimitry Andric if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 18560b57cec5SDimitry Andric continue; 18570b57cec5SDimitry Andric 18580b57cec5SDimitry Andric int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates - 18590b57cec5SDimitry Andric getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates); 18600b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 18610b57cec5SDimitry Andric 18620b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 18630b57cec5SDimitry Andric break; 18640b57cec5SDimitry Andric } 18650b57cec5SDimitry Andric } 18660b57cec5SDimitry Andric } 18670b57cec5SDimitry Andric 18680b57cec5SDimitry Andric for (const MachineOperand &Op : MI->explicit_operands()) { 18690b57cec5SDimitry Andric if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg())) 18700b57cec5SDimitry Andric continue; 18710b57cec5SDimitry Andric 1872e8d8bef9SDimitry Andric if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32_e64) 18730b57cec5SDimitry Andric continue; 18740b57cec5SDimitry Andric 18750b57cec5SDimitry Andric const int MFMAWritesAGPROverlappedSrcABWaitStates = 4; 18760b57cec5SDimitry Andric const int MFMAWritesAGPROverlappedSrcCWaitStates = 2; 18770b57cec5SDimitry Andric const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4; 18780b57cec5SDimitry Andric const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10; 18790b57cec5SDimitry Andric const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18; 18800b57cec5SDimitry Andric const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1; 18810b57cec5SDimitry Andric const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7; 18820b57cec5SDimitry Andric const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15; 18830b57cec5SDimitry Andric const int MaxWaitStates = 18; 18848bcb0991SDimitry Andric Register Reg = Op.getReg(); 18850b57cec5SDimitry Andric unsigned HazardDefLatency = 0; 18860b57cec5SDimitry Andric 1887*81ad6265SDimitry Andric auto IsOverlappedMFMAFn = [Reg, &HazardDefLatency, 1888fe6060f1SDimitry Andric this](const MachineInstr &MI) { 1889*81ad6265SDimitry Andric if (!SIInstrInfo::isMFMA(MI)) 18900b57cec5SDimitry Andric return false; 1891fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 18920b57cec5SDimitry Andric if (DstReg == Reg) 18930b57cec5SDimitry Andric return false; 1894fe6060f1SDimitry Andric HazardDefLatency = 1895fe6060f1SDimitry Andric std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI)); 18960b57cec5SDimitry Andric return TRI.regsOverlap(DstReg, Reg); 18970b57cec5SDimitry Andric }; 18980b57cec5SDimitry Andric 18990b57cec5SDimitry Andric int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, 19000b57cec5SDimitry Andric MaxWaitStates); 19010b57cec5SDimitry Andric int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates; 19020b57cec5SDimitry Andric int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 19030b57cec5SDimitry Andric int OpNo = MI->getOperandNo(&Op); 19040b57cec5SDimitry Andric if (OpNo == SrcCIdx) { 19050b57cec5SDimitry Andric NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates; 1906e8d8bef9SDimitry Andric } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) { 19070b57cec5SDimitry Andric switch (HazardDefLatency) { 19080b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates; 19090b57cec5SDimitry Andric break; 19100b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates; 19110b57cec5SDimitry Andric break; 19120b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 19130b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates; 19140b57cec5SDimitry Andric break; 19150b57cec5SDimitry Andric } 1916e8d8bef9SDimitry Andric } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { 19170b57cec5SDimitry Andric switch (HazardDefLatency) { 19180b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates; 19190b57cec5SDimitry Andric break; 19200b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates; 19210b57cec5SDimitry Andric break; 19220b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 19230b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates; 19240b57cec5SDimitry Andric break; 19250b57cec5SDimitry Andric } 19260b57cec5SDimitry Andric } 19270b57cec5SDimitry Andric 19280b57cec5SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; 19290b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 19300b57cec5SDimitry Andric 19310b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 19320b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 19330b57cec5SDimitry Andric 1934fe6060f1SDimitry Andric auto IsAccVgprWriteFn = [Reg, this](const MachineInstr &MI) { 1935fe6060f1SDimitry Andric if (MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) 19360b57cec5SDimitry Andric return false; 1937fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 19380b57cec5SDimitry Andric return TRI.regsOverlap(Reg, DstReg); 19390b57cec5SDimitry Andric }; 19400b57cec5SDimitry Andric 19410b57cec5SDimitry Andric const int AccVGPRWriteMFMAReadSrcCWaitStates = 1; 19420b57cec5SDimitry Andric const int AccVGPRWriteMFMAReadSrcABWaitStates = 3; 19430b57cec5SDimitry Andric const int AccVGPRWriteAccVgprReadWaitStates = 3; 19440b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates; 19450b57cec5SDimitry Andric if (OpNo == SrcCIdx) 19460b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates; 1947e8d8bef9SDimitry Andric else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) 19480b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates; 19490b57cec5SDimitry Andric 19500b57cec5SDimitry Andric WaitStatesNeededForUse = NeedWaitStates - 19510b57cec5SDimitry Andric getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates); 19520b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 19530b57cec5SDimitry Andric 19540b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 19550b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 19560b57cec5SDimitry Andric } 19570b57cec5SDimitry Andric 1958e8d8bef9SDimitry Andric if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { 19590b57cec5SDimitry Andric const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0; 19600b57cec5SDimitry Andric const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5; 19610b57cec5SDimitry Andric const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13; 19620b57cec5SDimitry Andric const int MaxWaitStates = 13; 19638bcb0991SDimitry Andric Register DstReg = MI->getOperand(0).getReg(); 19640b57cec5SDimitry Andric unsigned HazardDefLatency = 0; 19650b57cec5SDimitry Andric 1966*81ad6265SDimitry Andric auto IsSrcCMFMAFn = [DstReg, &HazardDefLatency, 1967fe6060f1SDimitry Andric this](const MachineInstr &MI) { 1968*81ad6265SDimitry Andric if (!SIInstrInfo::isMFMA(MI)) 19690b57cec5SDimitry Andric return false; 1970fe6060f1SDimitry Andric Register Reg = TII.getNamedOperand(MI, AMDGPU::OpName::src2)->getReg(); 1971fe6060f1SDimitry Andric HazardDefLatency = 1972fe6060f1SDimitry Andric std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI)); 19730b57cec5SDimitry Andric return TRI.regsOverlap(Reg, DstReg); 19740b57cec5SDimitry Andric }; 19750b57cec5SDimitry Andric 19760b57cec5SDimitry Andric int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates); 19770b57cec5SDimitry Andric int NeedWaitStates; 19780b57cec5SDimitry Andric switch (HazardDefLatency) { 19790b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates; 19800b57cec5SDimitry Andric break; 19810b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates; 19820b57cec5SDimitry Andric break; 19830b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 19840b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates; 19850b57cec5SDimitry Andric break; 19860b57cec5SDimitry Andric } 19870b57cec5SDimitry Andric 19880b57cec5SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince; 19890b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 19900b57cec5SDimitry Andric } 19910b57cec5SDimitry Andric 1992*81ad6265SDimitry Andric // Pad neighboring MFMA with noops for better inter-wave performance. 1993*81ad6265SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, checkMFMAPadding(MI)); 1994*81ad6265SDimitry Andric 19950b57cec5SDimitry Andric return WaitStatesNeeded; 19960b57cec5SDimitry Andric } 19970b57cec5SDimitry Andric 1998fe6060f1SDimitry Andric int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { 1999fe6060f1SDimitry Andric int WaitStatesNeeded = 0; 2000fe6060f1SDimitry Andric unsigned Opc = MI->getOpcode(); 2001fe6060f1SDimitry Andric 2002*81ad6265SDimitry Andric auto IsLegacyVALUFn = [](const MachineInstr &MI) { 2003*81ad6265SDimitry Andric return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMFMA(MI); 2004fe6060f1SDimitry Andric }; 2005fe6060f1SDimitry Andric 2006*81ad6265SDimitry Andric auto IsLegacyVALUNotDotFn = [](const MachineInstr &MI) { 2007*81ad6265SDimitry Andric return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMFMA(MI) && 2008*81ad6265SDimitry Andric !SIInstrInfo::isDOT(MI); 2009fe6060f1SDimitry Andric }; 2010fe6060f1SDimitry Andric 2011*81ad6265SDimitry Andric if (!SIInstrInfo::isMFMA(*MI)) 2012fe6060f1SDimitry Andric return WaitStatesNeeded; 2013fe6060f1SDimitry Andric 2014fe6060f1SDimitry Andric const int VALUWritesExecWaitStates = 4; 2015fe6060f1SDimitry Andric int WaitStatesNeededForUse = VALUWritesExecWaitStates - 2016fe6060f1SDimitry Andric getWaitStatesSinceDef(AMDGPU::EXEC, IsLegacyVALUFn, 2017fe6060f1SDimitry Andric VALUWritesExecWaitStates); 2018fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 2019fe6060f1SDimitry Andric 2020fe6060f1SDimitry Andric int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 2021fe6060f1SDimitry Andric 2022fe6060f1SDimitry Andric // Loop for both DGEMM and S/HGEMM 2nd instruction. 2023fe6060f1SDimitry Andric for (const MachineOperand &Use : MI->explicit_uses()) { 2024fe6060f1SDimitry Andric const int LegacyVALUNotDotWritesVGPRWaitStates = 2; 2025fe6060f1SDimitry Andric const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2; 2026*81ad6265SDimitry Andric const int GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates = 3; 2027*81ad6265SDimitry Andric const int GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates = 5; 2028*81ad6265SDimitry Andric const int GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates = 4; 2029*81ad6265SDimitry Andric const int GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates = 9; 2030*81ad6265SDimitry Andric const int GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates = 8; 2031*81ad6265SDimitry Andric const int GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates = 17; 2032*81ad6265SDimitry Andric const int GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates = 16; 2033fe6060f1SDimitry Andric const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8; 2034fe6060f1SDimitry Andric const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16; 2035fe6060f1SDimitry Andric const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3; 2036fe6060f1SDimitry Andric const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9; 2037fe6060f1SDimitry Andric const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17; 2038fe6060f1SDimitry Andric const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9; 2039fe6060f1SDimitry Andric const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4; 2040fe6060f1SDimitry Andric const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5; 2041fe6060f1SDimitry Andric const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11; 2042fe6060f1SDimitry Andric const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19; 2043*81ad6265SDimitry Andric const int GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates = 4; 2044*81ad6265SDimitry Andric const int GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates = 6; 2045*81ad6265SDimitry Andric const int GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates = 10; 2046*81ad6265SDimitry Andric const int GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates = 18; 2047*81ad6265SDimitry Andric const int GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates = 5; 2048*81ad6265SDimitry Andric const int GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates = 7; 2049*81ad6265SDimitry Andric const int GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates = 11; 2050*81ad6265SDimitry Andric const int GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates = 19; 2051fe6060f1SDimitry Andric const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6; 2052fe6060f1SDimitry Andric const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11; 2053fe6060f1SDimitry Andric const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4; 2054*81ad6265SDimitry Andric const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2; 2055fe6060f1SDimitry Andric const int MaxWaitStates = 19; 2056fe6060f1SDimitry Andric 2057fe6060f1SDimitry Andric if (!Use.isReg()) 2058fe6060f1SDimitry Andric continue; 205904eeddc0SDimitry Andric Register Reg = Use.getReg(); 2060fe6060f1SDimitry Andric bool FullReg; 2061fe6060f1SDimitry Andric const MachineInstr *MI1; 2062fe6060f1SDimitry Andric 2063*81ad6265SDimitry Andric auto IsOverlappedMFMAFn = [Reg, &FullReg, &MI1, 2064fe6060f1SDimitry Andric this](const MachineInstr &MI) { 2065*81ad6265SDimitry Andric if (!SIInstrInfo::isMFMA(MI)) 2066fe6060f1SDimitry Andric return false; 2067fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2068fe6060f1SDimitry Andric FullReg = (DstReg == Reg); 2069fe6060f1SDimitry Andric MI1 = &MI; 2070fe6060f1SDimitry Andric return TRI.regsOverlap(DstReg, Reg); 2071fe6060f1SDimitry Andric }; 2072fe6060f1SDimitry Andric 2073fe6060f1SDimitry Andric WaitStatesNeededForUse = LegacyVALUNotDotWritesVGPRWaitStates - 2074fe6060f1SDimitry Andric getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates); 2075fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 2076fe6060f1SDimitry Andric 20774824e7fdSDimitry Andric int NumWaitStates = 20784824e7fdSDimitry Andric getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates); 2079fe6060f1SDimitry Andric if (NumWaitStates == std::numeric_limits<int>::max()) 2080fe6060f1SDimitry Andric continue; 2081fe6060f1SDimitry Andric 2082fe6060f1SDimitry Andric int OpNo = MI->getOperandNo(&Use); 2083fe6060f1SDimitry Andric unsigned Opc1 = MI1->getOpcode(); 2084fe6060f1SDimitry Andric int NeedWaitStates = 0; 2085fe6060f1SDimitry Andric if (OpNo == SrcCIdx) { 2086*81ad6265SDimitry Andric if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) { 2087fe6060f1SDimitry Andric NeedWaitStates = 0; 2088fe6060f1SDimitry Andric } else if (FullReg) { 2089fe6060f1SDimitry Andric if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || 2090fe6060f1SDimitry Andric Opc == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64) && 2091fe6060f1SDimitry Andric (Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || 2092fe6060f1SDimitry Andric Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64)) 2093fe6060f1SDimitry Andric NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates; 2094*81ad6265SDimitry Andric else if (ST.hasGFX940Insts() && 2095*81ad6265SDimitry Andric TSchedModel.computeInstrLatency(MI1) == 2) 2096*81ad6265SDimitry Andric NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates; 2097fe6060f1SDimitry Andric } else { 2098fe6060f1SDimitry Andric switch (Opc1) { 2099fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_e64: 2100fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64: 210104eeddc0SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: 210204eeddc0SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: 2103fe6060f1SDimitry Andric if (!isXDL(ST, *MI)) 2104fe6060f1SDimitry Andric NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates; 2105fe6060f1SDimitry Andric break; 2106fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_4X4X4F64_e64: 2107fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: 2108fe6060f1SDimitry Andric if (!isXDL(ST, *MI)) 2109fe6060f1SDimitry Andric NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates; 2110fe6060f1SDimitry Andric break; 2111fe6060f1SDimitry Andric default: 2112*81ad6265SDimitry Andric if (ST.hasGFX940Insts() && isXDL(ST, *MI) && !isXDL(ST, *MI1)) 2113*81ad6265SDimitry Andric break; 2114fe6060f1SDimitry Andric switch (TSchedModel.computeInstrLatency(MI1)) { 2115fe6060f1SDimitry Andric case 2: 2116*81ad6265SDimitry Andric NeedWaitStates = ST.hasGFX940Insts() 2117*81ad6265SDimitry Andric ? isXDL(ST, *MI1) 2118*81ad6265SDimitry Andric ? GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates 2119*81ad6265SDimitry Andric : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates 2120*81ad6265SDimitry Andric : isDGEMM(Opc) 2121fe6060f1SDimitry Andric ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates 2122fe6060f1SDimitry Andric : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; 2123fe6060f1SDimitry Andric break; 2124*81ad6265SDimitry Andric case 4: 2125*81ad6265SDimitry Andric assert(ST.hasGFX940Insts()); 2126*81ad6265SDimitry Andric NeedWaitStates = isXDL(ST, *MI1) 2127*81ad6265SDimitry Andric ? GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates 2128*81ad6265SDimitry Andric : GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates; 2129*81ad6265SDimitry Andric break; 2130fe6060f1SDimitry Andric case 8: 2131*81ad6265SDimitry Andric NeedWaitStates = ST.hasGFX940Insts() 2132*81ad6265SDimitry Andric ? isXDL(ST, *MI1) 2133*81ad6265SDimitry Andric ? GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates 2134*81ad6265SDimitry Andric : GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates 2135*81ad6265SDimitry Andric : isDGEMM(Opc) 2136fe6060f1SDimitry Andric ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates 2137fe6060f1SDimitry Andric : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; 2138fe6060f1SDimitry Andric break; 2139fe6060f1SDimitry Andric case 16: LLVM_FALLTHROUGH; 2140fe6060f1SDimitry Andric default: 2141*81ad6265SDimitry Andric NeedWaitStates = ST.hasGFX940Insts() 2142*81ad6265SDimitry Andric ? isXDL(ST, *MI1) 2143*81ad6265SDimitry Andric ? GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates 2144*81ad6265SDimitry Andric : GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates 2145*81ad6265SDimitry Andric : isDGEMM(Opc) 2146fe6060f1SDimitry Andric ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates 2147fe6060f1SDimitry Andric : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; 2148fe6060f1SDimitry Andric } 2149fe6060f1SDimitry Andric } 2150fe6060f1SDimitry Andric } 2151fe6060f1SDimitry Andric } else { 2152fe6060f1SDimitry Andric switch (Opc1) { 2153fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_e64: 2154fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64: 215504eeddc0SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: 215604eeddc0SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: 2157fe6060f1SDimitry Andric NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates; 2158fe6060f1SDimitry Andric break; 2159fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_4X4X4F64_e64: 2160fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: 2161fe6060f1SDimitry Andric NeedWaitStates = DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates; 2162fe6060f1SDimitry Andric break; 2163fe6060f1SDimitry Andric default: 2164fe6060f1SDimitry Andric switch (TSchedModel.computeInstrLatency(MI1)) { 2165fe6060f1SDimitry Andric case 2: 2166*81ad6265SDimitry Andric NeedWaitStates = ST.hasGFX940Insts() 2167*81ad6265SDimitry Andric ? isXDL(ST, *MI1) 2168*81ad6265SDimitry Andric ? GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates 2169*81ad6265SDimitry Andric : GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates 2170*81ad6265SDimitry Andric : SMFMA4x4WritesVGPROverlappedSrcABWaitStates; 2171*81ad6265SDimitry Andric break; 2172*81ad6265SDimitry Andric case 4: 2173*81ad6265SDimitry Andric assert(ST.hasGFX940Insts()); 2174*81ad6265SDimitry Andric NeedWaitStates = isXDL(ST, *MI1) 2175*81ad6265SDimitry Andric ? GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates 2176*81ad6265SDimitry Andric : GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates; 2177fe6060f1SDimitry Andric break; 2178fe6060f1SDimitry Andric case 8: 2179*81ad6265SDimitry Andric NeedWaitStates = ST.hasGFX940Insts() 2180*81ad6265SDimitry Andric ? isXDL(ST, *MI1) 2181*81ad6265SDimitry Andric ? GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates 2182*81ad6265SDimitry Andric : GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates 2183*81ad6265SDimitry Andric : SMFMA16x16WritesVGPROverlappedSrcABWaitStates; 2184fe6060f1SDimitry Andric break; 2185fe6060f1SDimitry Andric case 16: LLVM_FALLTHROUGH; 2186fe6060f1SDimitry Andric default: 2187*81ad6265SDimitry Andric NeedWaitStates = ST.hasGFX940Insts() 2188*81ad6265SDimitry Andric ? isXDL(ST, *MI1) 2189*81ad6265SDimitry Andric ? GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates 2190*81ad6265SDimitry Andric : GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates 2191*81ad6265SDimitry Andric : SMFMA32x32WritesVGPROverlappedSrcABWaitStates; 2192fe6060f1SDimitry Andric } 2193fe6060f1SDimitry Andric } 2194fe6060f1SDimitry Andric } 2195fe6060f1SDimitry Andric if (WaitStatesNeeded >= NeedWaitStates) 2196fe6060f1SDimitry Andric continue; 2197fe6060f1SDimitry Andric 2198fe6060f1SDimitry Andric WaitStatesNeededForUse = NeedWaitStates - NumWaitStates; 2199fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 2200fe6060f1SDimitry Andric 2201fe6060f1SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 2202fe6060f1SDimitry Andric break; 2203fe6060f1SDimitry Andric } 2204fe6060f1SDimitry Andric 2205fe6060f1SDimitry Andric return WaitStatesNeeded; 2206fe6060f1SDimitry Andric } 2207fe6060f1SDimitry Andric 22080b57cec5SDimitry Andric int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) { 2209349cc55cSDimitry Andric // On gfx90a+ relevant hazards are checked in checkMAIVALUHazards() 2210fe6060f1SDimitry Andric if (!ST.hasMAIInsts() || ST.hasGFX90AInsts()) 22110b57cec5SDimitry Andric return 0; 22120b57cec5SDimitry Andric 22130b57cec5SDimitry Andric int WaitStatesNeeded = 0; 22140b57cec5SDimitry Andric 2215fe6060f1SDimitry Andric auto IsAccVgprReadFn = [](const MachineInstr &MI) { 2216fe6060f1SDimitry Andric return MI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64; 22170b57cec5SDimitry Andric }; 22180b57cec5SDimitry Andric 22190b57cec5SDimitry Andric for (const MachineOperand &Op : MI->explicit_uses()) { 22200b57cec5SDimitry Andric if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg())) 22210b57cec5SDimitry Andric continue; 22220b57cec5SDimitry Andric 22238bcb0991SDimitry Andric Register Reg = Op.getReg(); 22240b57cec5SDimitry Andric 22250b57cec5SDimitry Andric const int AccVgprReadLdStWaitStates = 2; 2226e8d8bef9SDimitry Andric const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1; 22270b57cec5SDimitry Andric const int MaxWaitStates = 2; 22280b57cec5SDimitry Andric 22290b57cec5SDimitry Andric int WaitStatesNeededForUse = AccVgprReadLdStWaitStates - 22300b57cec5SDimitry Andric getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates); 22310b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 22320b57cec5SDimitry Andric 22330b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 22340b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 22350b57cec5SDimitry Andric 2236fe6060f1SDimitry Andric auto IsVALUAccVgprRdWrCheckFn = [Reg, this](const MachineInstr &MI) { 2237fe6060f1SDimitry Andric if (MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 && 2238fe6060f1SDimitry Andric MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) 22390b57cec5SDimitry Andric return false; 2240fe6060f1SDimitry Andric auto IsVALUFn = [](const MachineInstr &MI) { 2241fe6060f1SDimitry Andric return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMAI(MI); 22420b57cec5SDimitry Andric }; 22430b57cec5SDimitry Andric return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) < 22440b57cec5SDimitry Andric std::numeric_limits<int>::max(); 22450b57cec5SDimitry Andric }; 22460b57cec5SDimitry Andric 2247e8d8bef9SDimitry Andric WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates - 2248e8d8bef9SDimitry Andric getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates); 22490b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 22500b57cec5SDimitry Andric } 22510b57cec5SDimitry Andric 22520b57cec5SDimitry Andric return WaitStatesNeeded; 22530b57cec5SDimitry Andric } 2254e8d8bef9SDimitry Andric 2255fe6060f1SDimitry Andric int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { 2256fe6060f1SDimitry Andric if (!ST.hasGFX90AInsts()) 2257fe6060f1SDimitry Andric return 0; 2258fe6060f1SDimitry Andric 2259fe6060f1SDimitry Andric auto IsDGEMMFn = [](const MachineInstr &MI) -> bool { 2260fe6060f1SDimitry Andric return isDGEMM(MI.getOpcode()); 2261fe6060f1SDimitry Andric }; 2262fe6060f1SDimitry Andric 2263fe6060f1SDimitry Andric // This is checked in checkMAIHazards90A() 2264*81ad6265SDimitry Andric if (SIInstrInfo::isMFMA(*MI)) 2265fe6060f1SDimitry Andric return 0; 2266fe6060f1SDimitry Andric 2267fe6060f1SDimitry Andric int WaitStatesNeeded = 0; 2268fe6060f1SDimitry Andric 2269fe6060f1SDimitry Andric bool IsMemOrExport = SIInstrInfo::isVMEM(*MI) || 2270fe6060f1SDimitry Andric SIInstrInfo::isFLAT(*MI) || 2271fe6060f1SDimitry Andric SIInstrInfo::isDS(*MI) || 2272fe6060f1SDimitry Andric SIInstrInfo::isEXP(*MI); 2273fe6060f1SDimitry Andric bool IsVALU = SIInstrInfo::isVALU(*MI); 2274fe6060f1SDimitry Andric 2275fe6060f1SDimitry Andric const MachineInstr *MFMA = nullptr; 2276fe6060f1SDimitry Andric unsigned Reg; 2277*81ad6265SDimitry Andric auto IsMFMAWriteFn = [&Reg, &MFMA, this](const MachineInstr &MI) { 2278*81ad6265SDimitry Andric if (!SIInstrInfo::isMFMA(MI) || 2279*81ad6265SDimitry Andric !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg)) 2280fe6060f1SDimitry Andric return false; 2281fe6060f1SDimitry Andric MFMA = &MI; 2282fe6060f1SDimitry Andric return true; 2283fe6060f1SDimitry Andric }; 2284fe6060f1SDimitry Andric 2285fe6060f1SDimitry Andric const MachineInstr *DOT = nullptr; 2286fe6060f1SDimitry Andric auto IsDotWriteFn = [&Reg, &DOT, this](const MachineInstr &MI) { 2287fe6060f1SDimitry Andric if (!SIInstrInfo::isDOT(MI) || 2288fe6060f1SDimitry Andric !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg)) 2289fe6060f1SDimitry Andric return false; 2290fe6060f1SDimitry Andric DOT = &MI; 2291fe6060f1SDimitry Andric return true; 2292fe6060f1SDimitry Andric }; 2293fe6060f1SDimitry Andric 2294fe6060f1SDimitry Andric int SrcCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 2295fe6060f1SDimitry Andric AMDGPU::OpName::src2); 2296fe6060f1SDimitry Andric 2297fe6060f1SDimitry Andric if (IsMemOrExport || IsVALU) { 2298fe6060f1SDimitry Andric const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5; 2299fe6060f1SDimitry Andric const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11; 2300fe6060f1SDimitry Andric const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19; 2301*81ad6265SDimitry Andric const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4; 2302*81ad6265SDimitry Andric const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6; 2303*81ad6265SDimitry Andric const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10; 2304*81ad6265SDimitry Andric const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18; 2305*81ad6265SDimitry Andric const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5; 2306*81ad6265SDimitry Andric const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7; 2307*81ad6265SDimitry Andric const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11; 2308*81ad6265SDimitry Andric const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19; 2309fe6060f1SDimitry Andric const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9; 2310fe6060f1SDimitry Andric const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18; 2311fe6060f1SDimitry Andric const int DMFMA4x4WriteVgprVALUReadWaitStates = 6; 2312fe6060f1SDimitry Andric const int DMFMA16x16WriteVgprVALUReadWaitStates = 11; 2313fe6060f1SDimitry Andric const int DotWriteSameDotReadSrcAB = 3; 2314fe6060f1SDimitry Andric const int DotWriteDifferentVALURead = 3; 2315fe6060f1SDimitry Andric const int MaxWaitStates = 19; 2316fe6060f1SDimitry Andric 2317fe6060f1SDimitry Andric for (const MachineOperand &Use : MI->explicit_uses()) { 2318fe6060f1SDimitry Andric if (!Use.isReg()) 2319fe6060f1SDimitry Andric continue; 2320fe6060f1SDimitry Andric Reg = Use.getReg(); 2321fe6060f1SDimitry Andric 2322fe6060f1SDimitry Andric DOT = nullptr; 2323fe6060f1SDimitry Andric int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn, 2324fe6060f1SDimitry Andric MaxWaitStates); 2325fe6060f1SDimitry Andric if (DOT) { 2326fe6060f1SDimitry Andric int NeedWaitStates = 0; 2327fe6060f1SDimitry Andric if (DOT->getOpcode() == MI->getOpcode()) { 2328fe6060f1SDimitry Andric if (&Use - &MI->getOperand(0) != SrcCIdx) 2329fe6060f1SDimitry Andric NeedWaitStates = DotWriteSameDotReadSrcAB; 2330fe6060f1SDimitry Andric } else { 2331fe6060f1SDimitry Andric NeedWaitStates = DotWriteDifferentVALURead; 2332fe6060f1SDimitry Andric } 2333fe6060f1SDimitry Andric 2334fe6060f1SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; 2335fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 2336fe6060f1SDimitry Andric } 2337fe6060f1SDimitry Andric 2338fe6060f1SDimitry Andric MFMA = nullptr; 23394824e7fdSDimitry Andric WaitStatesSinceDef = 23404824e7fdSDimitry Andric getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates); 2341fe6060f1SDimitry Andric if (!MFMA) 2342fe6060f1SDimitry Andric continue; 2343fe6060f1SDimitry Andric 2344fe6060f1SDimitry Andric unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA); 2345fe6060f1SDimitry Andric int NeedWaitStates = MaxWaitStates; 2346fe6060f1SDimitry Andric switch (HazardDefLatency) { 2347fe6060f1SDimitry Andric case 2: 2348*81ad6265SDimitry Andric NeedWaitStates = 2349*81ad6265SDimitry Andric ST.hasGFX940Insts() 2350*81ad6265SDimitry Andric ? isXDL(ST, *MFMA) 2351*81ad6265SDimitry Andric ? GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates 2352*81ad6265SDimitry Andric : GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates 2353*81ad6265SDimitry Andric : SMFMA4x4WriteVgprVALUMemExpReadWaitStates; 2354fe6060f1SDimitry Andric break; 2355fe6060f1SDimitry Andric case 4: 2356*81ad6265SDimitry Andric assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts()); 2357fe6060f1SDimitry Andric NeedWaitStates = 2358*81ad6265SDimitry Andric isDGEMM(MFMA->getOpcode()) 2359*81ad6265SDimitry Andric ? IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates 2360*81ad6265SDimitry Andric : DMFMA4x4WriteVgprVALUReadWaitStates 2361*81ad6265SDimitry Andric : isXDL(ST, *MFMA) 2362*81ad6265SDimitry Andric ? GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates 2363*81ad6265SDimitry Andric : GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates; 2364fe6060f1SDimitry Andric break; 2365fe6060f1SDimitry Andric case 8: 2366*81ad6265SDimitry Andric NeedWaitStates = 2367*81ad6265SDimitry Andric ST.hasGFX940Insts() 2368*81ad6265SDimitry Andric ? isXDL(ST, *MFMA) 2369*81ad6265SDimitry Andric ? GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates 2370*81ad6265SDimitry Andric : GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates 2371*81ad6265SDimitry Andric : SMFMA16x16WriteVgprVALUMemExpReadWaitStates; 2372fe6060f1SDimitry Andric break; 2373fe6060f1SDimitry Andric case 16: LLVM_FALLTHROUGH; 2374fe6060f1SDimitry Andric default: 2375fe6060f1SDimitry Andric NeedWaitStates = 2376fe6060f1SDimitry Andric isDGEMM(MFMA->getOpcode()) 2377fe6060f1SDimitry Andric ? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates 2378fe6060f1SDimitry Andric : DMFMA16x16WriteVgprVALUReadWaitStates 2379*81ad6265SDimitry Andric : ST.hasGFX940Insts() 2380*81ad6265SDimitry Andric ? isXDL(ST, *MFMA) 2381*81ad6265SDimitry Andric ? GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates 2382*81ad6265SDimitry Andric : GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates 2383fe6060f1SDimitry Andric : SMFMA32x32WriteVgprVALUMemExpReadWaitStates; 2384fe6060f1SDimitry Andric break; 2385fe6060f1SDimitry Andric } 2386fe6060f1SDimitry Andric 2387fe6060f1SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; 2388fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 2389fe6060f1SDimitry Andric 2390fe6060f1SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 2391fe6060f1SDimitry Andric break; 2392fe6060f1SDimitry Andric } 2393fe6060f1SDimitry Andric } 2394fe6060f1SDimitry Andric 2395fe6060f1SDimitry Andric unsigned Opc = MI->getOpcode(); 2396fe6060f1SDimitry Andric const int DMFMAToFMA64WaitStates = 2; 2397fe6060f1SDimitry Andric if ((Opc == AMDGPU::V_FMA_F64_e64 || 2398fe6060f1SDimitry Andric Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64 || 2399fe6060f1SDimitry Andric Opc == AMDGPU::V_FMAC_F64_dpp) && 2400fe6060f1SDimitry Andric WaitStatesNeeded < DMFMAToFMA64WaitStates) { 2401fe6060f1SDimitry Andric int WaitStatesNeededForUse = DMFMAToFMA64WaitStates - 2402fe6060f1SDimitry Andric getWaitStatesSince(IsDGEMMFn, DMFMAToFMA64WaitStates); 2403fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 2404fe6060f1SDimitry Andric } 2405fe6060f1SDimitry Andric 2406fe6060f1SDimitry Andric if (!IsVALU && !IsMemOrExport) 2407fe6060f1SDimitry Andric return WaitStatesNeeded; 2408fe6060f1SDimitry Andric 2409fe6060f1SDimitry Andric for (const MachineOperand &Def : MI->defs()) { 2410fe6060f1SDimitry Andric const int SMFMA4x4WriteVgprVALUWawWaitStates = 5; 2411fe6060f1SDimitry Andric const int SMFMA16x16WriteVgprVALUWawWaitStates = 11; 2412fe6060f1SDimitry Andric const int SMFMA32x32WriteVgprVALUWawWaitStates = 19; 2413*81ad6265SDimitry Andric const int GFX940_SMFMA2PassWriteVgprVALUWawWaitStates = 4; 2414*81ad6265SDimitry Andric const int GFX940_SMFMA4PassWriteVgprVALUWawWaitStates = 6; 2415*81ad6265SDimitry Andric const int GFX940_SMFMA8PassWriteVgprVALUWawWaitStates = 10; 2416*81ad6265SDimitry Andric const int GFX940_SMFMA16PassWriteVgprVALUWawWaitStates = 18; 2417*81ad6265SDimitry Andric const int GFX940_XDL2PassWriteVgprVALUWawWaitStates = 5; 2418*81ad6265SDimitry Andric const int GFX940_XDL4PassWriteVgprVALUWawWaitStates = 7; 2419*81ad6265SDimitry Andric const int GFX940_XDL8PassWriteVgprVALUWawWaitStates = 11; 2420*81ad6265SDimitry Andric const int GFX940_XDL16PassWriteVgprVALUWawWaitStates = 19; 2421fe6060f1SDimitry Andric const int SMFMA4x4ReadVgprVALUWarWaitStates = 1; 2422*81ad6265SDimitry Andric const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3; 2423fe6060f1SDimitry Andric const int SMFMA16x16ReadVgprVALUWarWaitStates = 7; 2424fe6060f1SDimitry Andric const int SMFMA32x32ReadVgprVALUWarWaitStates = 15; 2425fe6060f1SDimitry Andric const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6; 2426fe6060f1SDimitry Andric const int DMFMA16x16WriteVgprVALUWriteWaitStates = 11; 2427fe6060f1SDimitry Andric const int DotWriteDifferentVALUWrite = 3; 2428fe6060f1SDimitry Andric const int MaxWaitStates = 19; 2429fe6060f1SDimitry Andric const int MaxWarWaitStates = 15; 2430fe6060f1SDimitry Andric 2431fe6060f1SDimitry Andric Reg = Def.getReg(); 2432fe6060f1SDimitry Andric 2433fe6060f1SDimitry Andric DOT = nullptr; 2434fe6060f1SDimitry Andric int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn, 2435fe6060f1SDimitry Andric MaxWaitStates); 2436fe6060f1SDimitry Andric if (DOT && DOT->getOpcode() != MI->getOpcode()) 2437fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, DotWriteDifferentVALUWrite - 2438fe6060f1SDimitry Andric WaitStatesSinceDef); 2439fe6060f1SDimitry Andric 2440fe6060f1SDimitry Andric MFMA = nullptr; 24414824e7fdSDimitry Andric WaitStatesSinceDef = 24424824e7fdSDimitry Andric getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates); 2443fe6060f1SDimitry Andric if (MFMA) { 2444fe6060f1SDimitry Andric int NeedWaitStates = MaxWaitStates; 2445fe6060f1SDimitry Andric switch (TSchedModel.computeInstrLatency(MFMA)) { 2446fe6060f1SDimitry Andric case 2: 2447*81ad6265SDimitry Andric NeedWaitStates = ST.hasGFX940Insts() 2448*81ad6265SDimitry Andric ? isXDL(ST, *MFMA) 2449*81ad6265SDimitry Andric ? GFX940_XDL2PassWriteVgprVALUWawWaitStates 2450*81ad6265SDimitry Andric : GFX940_SMFMA2PassWriteVgprVALUWawWaitStates 2451*81ad6265SDimitry Andric : SMFMA4x4WriteVgprVALUWawWaitStates; 2452fe6060f1SDimitry Andric break; 2453fe6060f1SDimitry Andric case 4: 2454*81ad6265SDimitry Andric assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts()); 2455*81ad6265SDimitry Andric NeedWaitStates = isDGEMM(MFMA->getOpcode()) 2456*81ad6265SDimitry Andric ? DMFMA4x4WriteVgprVALUWriteWaitStates 2457*81ad6265SDimitry Andric : isXDL(ST, *MFMA) 2458*81ad6265SDimitry Andric ? GFX940_XDL4PassWriteVgprVALUWawWaitStates 2459*81ad6265SDimitry Andric : GFX940_SMFMA4PassWriteVgprVALUWawWaitStates; 2460fe6060f1SDimitry Andric break; 2461fe6060f1SDimitry Andric case 8: 2462*81ad6265SDimitry Andric NeedWaitStates = ST.hasGFX940Insts() 2463*81ad6265SDimitry Andric ? isXDL(ST, *MFMA) 2464*81ad6265SDimitry Andric ? GFX940_XDL8PassWriteVgprVALUWawWaitStates 2465*81ad6265SDimitry Andric : GFX940_SMFMA8PassWriteVgprVALUWawWaitStates 2466*81ad6265SDimitry Andric : SMFMA16x16WriteVgprVALUWawWaitStates; 2467fe6060f1SDimitry Andric break; 2468fe6060f1SDimitry Andric case 16: LLVM_FALLTHROUGH; 2469fe6060f1SDimitry Andric default: 2470fe6060f1SDimitry Andric NeedWaitStates = isDGEMM(MFMA->getOpcode()) 2471fe6060f1SDimitry Andric ? DMFMA16x16WriteVgprVALUWriteWaitStates 2472*81ad6265SDimitry Andric : ST.hasGFX940Insts() 2473*81ad6265SDimitry Andric ? isXDL(ST, *MFMA) 2474*81ad6265SDimitry Andric ? GFX940_XDL16PassWriteVgprVALUWawWaitStates 2475*81ad6265SDimitry Andric : GFX940_SMFMA16PassWriteVgprVALUWawWaitStates 2476fe6060f1SDimitry Andric : SMFMA32x32WriteVgprVALUWawWaitStates; 2477fe6060f1SDimitry Andric break; 2478fe6060f1SDimitry Andric } 2479fe6060f1SDimitry Andric 2480fe6060f1SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; 2481fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 2482fe6060f1SDimitry Andric 2483fe6060f1SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 2484fe6060f1SDimitry Andric break; 2485fe6060f1SDimitry Andric } 2486fe6060f1SDimitry Andric 2487*81ad6265SDimitry Andric auto IsSMFMAReadAsCFn = [&Reg, &MFMA, this](const MachineInstr &MI) { 2488*81ad6265SDimitry Andric if (!SIInstrInfo::isMFMA(MI) || isDGEMM(MI.getOpcode()) || 2489fe6060f1SDimitry Andric !MI.readsRegister(Reg, &TRI)) 2490fe6060f1SDimitry Andric return false; 2491fe6060f1SDimitry Andric 2492*81ad6265SDimitry Andric if (ST.hasGFX940Insts() && !isXDL(ST, MI)) 2493*81ad6265SDimitry Andric return false; 2494*81ad6265SDimitry Andric 2495fe6060f1SDimitry Andric const MachineOperand *SrcC = 2496fe6060f1SDimitry Andric TII.getNamedOperand(MI, AMDGPU::OpName::src2); 2497fe6060f1SDimitry Andric assert(SrcC); 2498fe6060f1SDimitry Andric if (!SrcC->isReg() || !TRI.regsOverlap(SrcC->getReg(), Reg)) 2499fe6060f1SDimitry Andric return false; 2500fe6060f1SDimitry Andric 2501fe6060f1SDimitry Andric MFMA = &MI; 2502fe6060f1SDimitry Andric return true; 2503fe6060f1SDimitry Andric }; 2504fe6060f1SDimitry Andric 2505fe6060f1SDimitry Andric MFMA = nullptr; 2506fe6060f1SDimitry Andric int WaitStatesSinceUse = getWaitStatesSince(IsSMFMAReadAsCFn, 2507fe6060f1SDimitry Andric MaxWarWaitStates); 2508fe6060f1SDimitry Andric if (!MFMA) 2509fe6060f1SDimitry Andric continue; 2510fe6060f1SDimitry Andric 2511fe6060f1SDimitry Andric unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA); 2512fe6060f1SDimitry Andric int NeedWaitStates = MaxWaitStates; 2513fe6060f1SDimitry Andric switch (HazardDefLatency) { 2514fe6060f1SDimitry Andric case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates; 2515fe6060f1SDimitry Andric break; 2516*81ad6265SDimitry Andric case 4: assert(ST.hasGFX940Insts()); 2517*81ad6265SDimitry Andric NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates; 2518*81ad6265SDimitry Andric break; 2519fe6060f1SDimitry Andric case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates; 2520fe6060f1SDimitry Andric break; 2521fe6060f1SDimitry Andric case 16: LLVM_FALLTHROUGH; 2522fe6060f1SDimitry Andric default: NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates; 2523fe6060f1SDimitry Andric break; 2524fe6060f1SDimitry Andric } 2525fe6060f1SDimitry Andric 2526fe6060f1SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse; 2527fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 2528fe6060f1SDimitry Andric } 2529fe6060f1SDimitry Andric 2530fe6060f1SDimitry Andric return WaitStatesNeeded; 2531fe6060f1SDimitry Andric } 2532fe6060f1SDimitry Andric 2533e8d8bef9SDimitry Andric bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) { 2534e8d8bef9SDimitry Andric if (!SU->isInstr()) 2535e8d8bef9SDimitry Andric return false; 2536e8d8bef9SDimitry Andric 2537fe6060f1SDimitry Andric const MachineInstr *MAI = nullptr; 2538*81ad6265SDimitry Andric 2539fe6060f1SDimitry Andric auto IsMFMAFn = [&MAI](const MachineInstr &MI) { 2540e8d8bef9SDimitry Andric MAI = nullptr; 2541*81ad6265SDimitry Andric if (SIInstrInfo::isMFMA(MI)) 2542fe6060f1SDimitry Andric MAI = &MI; 2543e8d8bef9SDimitry Andric return MAI != nullptr; 2544e8d8bef9SDimitry Andric }; 2545e8d8bef9SDimitry Andric 2546e8d8bef9SDimitry Andric MachineInstr *MI = SU->getInstr(); 2547fe6060f1SDimitry Andric if (IsMFMAFn(*MI)) { 2548e8d8bef9SDimitry Andric int W = getWaitStatesSince(IsMFMAFn, 16); 2549e8d8bef9SDimitry Andric if (MAI) 2550e8d8bef9SDimitry Andric return W < (int)TSchedModel.computeInstrLatency(MAI); 2551e8d8bef9SDimitry Andric } 2552e8d8bef9SDimitry Andric 2553e8d8bef9SDimitry Andric return false; 2554e8d8bef9SDimitry Andric } 2555