1*0b57cec5SDimitry Andric //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This file implements hazard recognizers for scheduling on GCN processors. 10*0b57cec5SDimitry Andric // 11*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 12*0b57cec5SDimitry Andric 13*0b57cec5SDimitry Andric #include "GCNHazardRecognizer.h" 14*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h" 15*0b57cec5SDimitry Andric #include "SIDefines.h" 16*0b57cec5SDimitry Andric #include "SIInstrInfo.h" 17*0b57cec5SDimitry Andric #include "SIRegisterInfo.h" 18*0b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19*0b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 20*0b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h" 21*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 22*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 23*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 24*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 25*0b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h" 26*0b57cec5SDimitry Andric #include "llvm/MC/MCInstrDesc.h" 27*0b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 28*0b57cec5SDimitry Andric #include <algorithm> 29*0b57cec5SDimitry Andric #include <cassert> 30*0b57cec5SDimitry Andric #include <limits> 31*0b57cec5SDimitry Andric #include <set> 32*0b57cec5SDimitry Andric #include <vector> 33*0b57cec5SDimitry Andric 34*0b57cec5SDimitry Andric using namespace llvm; 35*0b57cec5SDimitry Andric 36*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 37*0b57cec5SDimitry Andric // Hazard Recoginizer Implementation 38*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 39*0b57cec5SDimitry Andric 40*0b57cec5SDimitry Andric GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : 41*0b57cec5SDimitry Andric IsHazardRecognizerMode(false), 42*0b57cec5SDimitry Andric CurrCycleInstr(nullptr), 43*0b57cec5SDimitry Andric MF(MF), 44*0b57cec5SDimitry Andric ST(MF.getSubtarget<GCNSubtarget>()), 45*0b57cec5SDimitry Andric TII(*ST.getInstrInfo()), 46*0b57cec5SDimitry Andric TRI(TII.getRegisterInfo()), 47*0b57cec5SDimitry Andric ClauseUses(TRI.getNumRegUnits()), 48*0b57cec5SDimitry Andric ClauseDefs(TRI.getNumRegUnits()) { 49*0b57cec5SDimitry Andric MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5; 50*0b57cec5SDimitry Andric TSchedModel.init(&ST); 51*0b57cec5SDimitry Andric } 52*0b57cec5SDimitry Andric 53*0b57cec5SDimitry Andric void GCNHazardRecognizer::EmitInstruction(SUnit *SU) { 54*0b57cec5SDimitry Andric EmitInstruction(SU->getInstr()); 55*0b57cec5SDimitry Andric } 56*0b57cec5SDimitry Andric 57*0b57cec5SDimitry Andric void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { 58*0b57cec5SDimitry Andric CurrCycleInstr = MI; 59*0b57cec5SDimitry Andric } 60*0b57cec5SDimitry Andric 61*0b57cec5SDimitry Andric static bool isDivFMas(unsigned Opcode) { 62*0b57cec5SDimitry Andric return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64; 63*0b57cec5SDimitry Andric } 64*0b57cec5SDimitry Andric 65*0b57cec5SDimitry Andric static bool isSGetReg(unsigned Opcode) { 66*0b57cec5SDimitry Andric return Opcode == AMDGPU::S_GETREG_B32; 67*0b57cec5SDimitry Andric } 68*0b57cec5SDimitry Andric 69*0b57cec5SDimitry Andric static bool isSSetReg(unsigned Opcode) { 70*0b57cec5SDimitry Andric return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32; 71*0b57cec5SDimitry Andric } 72*0b57cec5SDimitry Andric 73*0b57cec5SDimitry Andric static bool isRWLane(unsigned Opcode) { 74*0b57cec5SDimitry Andric return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32; 75*0b57cec5SDimitry Andric } 76*0b57cec5SDimitry Andric 77*0b57cec5SDimitry Andric static bool isRFE(unsigned Opcode) { 78*0b57cec5SDimitry Andric return Opcode == AMDGPU::S_RFE_B64; 79*0b57cec5SDimitry Andric } 80*0b57cec5SDimitry Andric 81*0b57cec5SDimitry Andric static bool isSMovRel(unsigned Opcode) { 82*0b57cec5SDimitry Andric switch (Opcode) { 83*0b57cec5SDimitry Andric case AMDGPU::S_MOVRELS_B32: 84*0b57cec5SDimitry Andric case AMDGPU::S_MOVRELS_B64: 85*0b57cec5SDimitry Andric case AMDGPU::S_MOVRELD_B32: 86*0b57cec5SDimitry Andric case AMDGPU::S_MOVRELD_B64: 87*0b57cec5SDimitry Andric return true; 88*0b57cec5SDimitry Andric default: 89*0b57cec5SDimitry Andric return false; 90*0b57cec5SDimitry Andric } 91*0b57cec5SDimitry Andric } 92*0b57cec5SDimitry Andric 93*0b57cec5SDimitry Andric static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, 94*0b57cec5SDimitry Andric const MachineInstr &MI) { 95*0b57cec5SDimitry Andric if (TII.isAlwaysGDS(MI.getOpcode())) 96*0b57cec5SDimitry Andric return true; 97*0b57cec5SDimitry Andric 98*0b57cec5SDimitry Andric switch (MI.getOpcode()) { 99*0b57cec5SDimitry Andric case AMDGPU::S_SENDMSG: 100*0b57cec5SDimitry Andric case AMDGPU::S_SENDMSGHALT: 101*0b57cec5SDimitry Andric case AMDGPU::S_TTRACEDATA: 102*0b57cec5SDimitry Andric return true; 103*0b57cec5SDimitry Andric // These DS opcodes don't support GDS. 104*0b57cec5SDimitry Andric case AMDGPU::DS_NOP: 105*0b57cec5SDimitry Andric case AMDGPU::DS_PERMUTE_B32: 106*0b57cec5SDimitry Andric case AMDGPU::DS_BPERMUTE_B32: 107*0b57cec5SDimitry Andric return false; 108*0b57cec5SDimitry Andric default: 109*0b57cec5SDimitry Andric if (TII.isDS(MI.getOpcode())) { 110*0b57cec5SDimitry Andric int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 111*0b57cec5SDimitry Andric AMDGPU::OpName::gds); 112*0b57cec5SDimitry Andric if (MI.getOperand(GDS).getImm()) 113*0b57cec5SDimitry Andric return true; 114*0b57cec5SDimitry Andric } 115*0b57cec5SDimitry Andric return false; 116*0b57cec5SDimitry Andric } 117*0b57cec5SDimitry Andric } 118*0b57cec5SDimitry Andric 119*0b57cec5SDimitry Andric static bool isPermlane(const MachineInstr &MI) { 120*0b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode(); 121*0b57cec5SDimitry Andric return Opcode == AMDGPU::V_PERMLANE16_B32 || 122*0b57cec5SDimitry Andric Opcode == AMDGPU::V_PERMLANEX16_B32; 123*0b57cec5SDimitry Andric } 124*0b57cec5SDimitry Andric 125*0b57cec5SDimitry Andric static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { 126*0b57cec5SDimitry Andric const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, 127*0b57cec5SDimitry Andric AMDGPU::OpName::simm16); 128*0b57cec5SDimitry Andric return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_; 129*0b57cec5SDimitry Andric } 130*0b57cec5SDimitry Andric 131*0b57cec5SDimitry Andric ScheduleHazardRecognizer::HazardType 132*0b57cec5SDimitry Andric GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 133*0b57cec5SDimitry Andric MachineInstr *MI = SU->getInstr(); 134*0b57cec5SDimitry Andric if (MI->isBundle()) 135*0b57cec5SDimitry Andric return NoHazard; 136*0b57cec5SDimitry Andric 137*0b57cec5SDimitry Andric if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) 138*0b57cec5SDimitry Andric return NoopHazard; 139*0b57cec5SDimitry Andric 140*0b57cec5SDimitry Andric // FIXME: Should flat be considered vmem? 141*0b57cec5SDimitry Andric if ((SIInstrInfo::isVMEM(*MI) || 142*0b57cec5SDimitry Andric SIInstrInfo::isFLAT(*MI)) 143*0b57cec5SDimitry Andric && checkVMEMHazards(MI) > 0) 144*0b57cec5SDimitry Andric return NoopHazard; 145*0b57cec5SDimitry Andric 146*0b57cec5SDimitry Andric if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0) 147*0b57cec5SDimitry Andric return NoopHazard; 148*0b57cec5SDimitry Andric 149*0b57cec5SDimitry Andric if (checkFPAtomicToDenormModeHazard(MI) > 0) 150*0b57cec5SDimitry Andric return NoopHazard; 151*0b57cec5SDimitry Andric 152*0b57cec5SDimitry Andric if (ST.hasNoDataDepHazard()) 153*0b57cec5SDimitry Andric return NoHazard; 154*0b57cec5SDimitry Andric 155*0b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) 156*0b57cec5SDimitry Andric return NoopHazard; 157*0b57cec5SDimitry Andric 158*0b57cec5SDimitry Andric if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) 159*0b57cec5SDimitry Andric return NoopHazard; 160*0b57cec5SDimitry Andric 161*0b57cec5SDimitry Andric if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) 162*0b57cec5SDimitry Andric return NoopHazard; 163*0b57cec5SDimitry Andric 164*0b57cec5SDimitry Andric if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0) 165*0b57cec5SDimitry Andric return NoopHazard; 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry Andric if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) 168*0b57cec5SDimitry Andric return NoopHazard; 169*0b57cec5SDimitry Andric 170*0b57cec5SDimitry Andric if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0) 171*0b57cec5SDimitry Andric return NoopHazard; 172*0b57cec5SDimitry Andric 173*0b57cec5SDimitry Andric if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) 174*0b57cec5SDimitry Andric return NoopHazard; 175*0b57cec5SDimitry Andric 176*0b57cec5SDimitry Andric if (ST.hasReadM0MovRelInterpHazard() && 177*0b57cec5SDimitry Andric (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) && 178*0b57cec5SDimitry Andric checkReadM0Hazards(MI) > 0) 179*0b57cec5SDimitry Andric return NoopHazard; 180*0b57cec5SDimitry Andric 181*0b57cec5SDimitry Andric if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) && 182*0b57cec5SDimitry Andric checkReadM0Hazards(MI) > 0) 183*0b57cec5SDimitry Andric return NoopHazard; 184*0b57cec5SDimitry Andric 185*0b57cec5SDimitry Andric if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0) 186*0b57cec5SDimitry Andric return NoopHazard; 187*0b57cec5SDimitry Andric 188*0b57cec5SDimitry Andric if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0) 189*0b57cec5SDimitry Andric return NoopHazard; 190*0b57cec5SDimitry Andric 191*0b57cec5SDimitry Andric if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) 192*0b57cec5SDimitry Andric return NoopHazard; 193*0b57cec5SDimitry Andric 194*0b57cec5SDimitry Andric if (checkAnyInstHazards(MI) > 0) 195*0b57cec5SDimitry Andric return NoopHazard; 196*0b57cec5SDimitry Andric 197*0b57cec5SDimitry Andric return NoHazard; 198*0b57cec5SDimitry Andric } 199*0b57cec5SDimitry Andric 200*0b57cec5SDimitry Andric static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) { 201*0b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP)) 202*0b57cec5SDimitry Andric .addImm(0); 203*0b57cec5SDimitry Andric } 204*0b57cec5SDimitry Andric 205*0b57cec5SDimitry Andric void GCNHazardRecognizer::processBundle() { 206*0b57cec5SDimitry Andric MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator()); 207*0b57cec5SDimitry Andric MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end(); 208*0b57cec5SDimitry Andric // Check bundled MachineInstr's for hazards. 209*0b57cec5SDimitry Andric for (; MI != E && MI->isInsideBundle(); ++MI) { 210*0b57cec5SDimitry Andric CurrCycleInstr = &*MI; 211*0b57cec5SDimitry Andric unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr); 212*0b57cec5SDimitry Andric 213*0b57cec5SDimitry Andric if (IsHazardRecognizerMode) 214*0b57cec5SDimitry Andric fixHazards(CurrCycleInstr); 215*0b57cec5SDimitry Andric 216*0b57cec5SDimitry Andric for (unsigned i = 0; i < WaitStates; ++i) 217*0b57cec5SDimitry Andric insertNoopInBundle(CurrCycleInstr, TII); 218*0b57cec5SDimitry Andric 219*0b57cec5SDimitry Andric // It’s unnecessary to track more than MaxLookAhead instructions. Since we 220*0b57cec5SDimitry Andric // include the bundled MI directly after, only add a maximum of 221*0b57cec5SDimitry Andric // (MaxLookAhead - 1) noops to EmittedInstrs. 222*0b57cec5SDimitry Andric for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i) 223*0b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 224*0b57cec5SDimitry Andric 225*0b57cec5SDimitry Andric EmittedInstrs.push_front(CurrCycleInstr); 226*0b57cec5SDimitry Andric EmittedInstrs.resize(MaxLookAhead); 227*0b57cec5SDimitry Andric } 228*0b57cec5SDimitry Andric CurrCycleInstr = nullptr; 229*0b57cec5SDimitry Andric } 230*0b57cec5SDimitry Andric 231*0b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) { 232*0b57cec5SDimitry Andric IsHazardRecognizerMode = false; 233*0b57cec5SDimitry Andric return PreEmitNoopsCommon(SU->getInstr()); 234*0b57cec5SDimitry Andric } 235*0b57cec5SDimitry Andric 236*0b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { 237*0b57cec5SDimitry Andric IsHazardRecognizerMode = true; 238*0b57cec5SDimitry Andric CurrCycleInstr = MI; 239*0b57cec5SDimitry Andric unsigned W = PreEmitNoopsCommon(MI); 240*0b57cec5SDimitry Andric fixHazards(MI); 241*0b57cec5SDimitry Andric CurrCycleInstr = nullptr; 242*0b57cec5SDimitry Andric return W; 243*0b57cec5SDimitry Andric } 244*0b57cec5SDimitry Andric 245*0b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { 246*0b57cec5SDimitry Andric if (MI->isBundle()) 247*0b57cec5SDimitry Andric return 0; 248*0b57cec5SDimitry Andric 249*0b57cec5SDimitry Andric int WaitStates = std::max(0, checkAnyInstHazards(MI)); 250*0b57cec5SDimitry Andric 251*0b57cec5SDimitry Andric if (SIInstrInfo::isSMRD(*MI)) 252*0b57cec5SDimitry Andric return std::max(WaitStates, checkSMRDHazards(MI)); 253*0b57cec5SDimitry Andric 254*0b57cec5SDimitry Andric if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI)) 255*0b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); 256*0b57cec5SDimitry Andric 257*0b57cec5SDimitry Andric if (ST.hasNSAtoVMEMBug()) 258*0b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI)); 259*0b57cec5SDimitry Andric 260*0b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI)); 261*0b57cec5SDimitry Andric 262*0b57cec5SDimitry Andric if (ST.hasNoDataDepHazard()) 263*0b57cec5SDimitry Andric return WaitStates; 264*0b57cec5SDimitry Andric 265*0b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*MI)) 266*0b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkVALUHazards(MI)); 267*0b57cec5SDimitry Andric 268*0b57cec5SDimitry Andric if (SIInstrInfo::isDPP(*MI)) 269*0b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkDPPHazards(MI)); 270*0b57cec5SDimitry Andric 271*0b57cec5SDimitry Andric if (isDivFMas(MI->getOpcode())) 272*0b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); 273*0b57cec5SDimitry Andric 274*0b57cec5SDimitry Andric if (isRWLane(MI->getOpcode())) 275*0b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); 276*0b57cec5SDimitry Andric 277*0b57cec5SDimitry Andric if (MI->isInlineAsm()) 278*0b57cec5SDimitry Andric return std::max(WaitStates, checkInlineAsmHazards(MI)); 279*0b57cec5SDimitry Andric 280*0b57cec5SDimitry Andric if (isSGetReg(MI->getOpcode())) 281*0b57cec5SDimitry Andric return std::max(WaitStates, checkGetRegHazards(MI)); 282*0b57cec5SDimitry Andric 283*0b57cec5SDimitry Andric if (isSSetReg(MI->getOpcode())) 284*0b57cec5SDimitry Andric return std::max(WaitStates, checkSetRegHazards(MI)); 285*0b57cec5SDimitry Andric 286*0b57cec5SDimitry Andric if (isRFE(MI->getOpcode())) 287*0b57cec5SDimitry Andric return std::max(WaitStates, checkRFEHazards(MI)); 288*0b57cec5SDimitry Andric 289*0b57cec5SDimitry Andric if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) || 290*0b57cec5SDimitry Andric isSMovRel(MI->getOpcode()))) 291*0b57cec5SDimitry Andric return std::max(WaitStates, checkReadM0Hazards(MI)); 292*0b57cec5SDimitry Andric 293*0b57cec5SDimitry Andric if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) 294*0b57cec5SDimitry Andric return std::max(WaitStates, checkReadM0Hazards(MI)); 295*0b57cec5SDimitry Andric 296*0b57cec5SDimitry Andric if (SIInstrInfo::isMAI(*MI)) 297*0b57cec5SDimitry Andric return std::max(WaitStates, checkMAIHazards(MI)); 298*0b57cec5SDimitry Andric 299*0b57cec5SDimitry Andric if (MI->mayLoad() || MI->mayStore()) 300*0b57cec5SDimitry Andric return std::max(WaitStates, checkMAILdStHazards(MI)); 301*0b57cec5SDimitry Andric 302*0b57cec5SDimitry Andric return WaitStates; 303*0b57cec5SDimitry Andric } 304*0b57cec5SDimitry Andric 305*0b57cec5SDimitry Andric void GCNHazardRecognizer::EmitNoop() { 306*0b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 307*0b57cec5SDimitry Andric } 308*0b57cec5SDimitry Andric 309*0b57cec5SDimitry Andric void GCNHazardRecognizer::AdvanceCycle() { 310*0b57cec5SDimitry Andric // When the scheduler detects a stall, it will call AdvanceCycle() without 311*0b57cec5SDimitry Andric // emitting any instructions. 312*0b57cec5SDimitry Andric if (!CurrCycleInstr) 313*0b57cec5SDimitry Andric return; 314*0b57cec5SDimitry Andric 315*0b57cec5SDimitry Andric // Do not track non-instructions which do not affect the wait states. 316*0b57cec5SDimitry Andric // If included, these instructions can lead to buffer overflow such that 317*0b57cec5SDimitry Andric // detectable hazards are missed. 318*0b57cec5SDimitry Andric if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() || 319*0b57cec5SDimitry Andric CurrCycleInstr->isKill()) 320*0b57cec5SDimitry Andric return; 321*0b57cec5SDimitry Andric 322*0b57cec5SDimitry Andric if (CurrCycleInstr->isBundle()) { 323*0b57cec5SDimitry Andric processBundle(); 324*0b57cec5SDimitry Andric return; 325*0b57cec5SDimitry Andric } 326*0b57cec5SDimitry Andric 327*0b57cec5SDimitry Andric unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr); 328*0b57cec5SDimitry Andric 329*0b57cec5SDimitry Andric // Keep track of emitted instructions 330*0b57cec5SDimitry Andric EmittedInstrs.push_front(CurrCycleInstr); 331*0b57cec5SDimitry Andric 332*0b57cec5SDimitry Andric // Add a nullptr for each additional wait state after the first. Make sure 333*0b57cec5SDimitry Andric // not to add more than getMaxLookAhead() items to the list, since we 334*0b57cec5SDimitry Andric // truncate the list to that size right after this loop. 335*0b57cec5SDimitry Andric for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); 336*0b57cec5SDimitry Andric i < e; ++i) { 337*0b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 338*0b57cec5SDimitry Andric } 339*0b57cec5SDimitry Andric 340*0b57cec5SDimitry Andric // getMaxLookahead() is the largest number of wait states we will ever need 341*0b57cec5SDimitry Andric // to insert, so there is no point in keeping track of more than that many 342*0b57cec5SDimitry Andric // wait states. 343*0b57cec5SDimitry Andric EmittedInstrs.resize(getMaxLookAhead()); 344*0b57cec5SDimitry Andric 345*0b57cec5SDimitry Andric CurrCycleInstr = nullptr; 346*0b57cec5SDimitry Andric } 347*0b57cec5SDimitry Andric 348*0b57cec5SDimitry Andric void GCNHazardRecognizer::RecedeCycle() { 349*0b57cec5SDimitry Andric llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); 350*0b57cec5SDimitry Andric } 351*0b57cec5SDimitry Andric 352*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 353*0b57cec5SDimitry Andric // Helper Functions 354*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 355*0b57cec5SDimitry Andric 356*0b57cec5SDimitry Andric typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn; 357*0b57cec5SDimitry Andric 358*0b57cec5SDimitry Andric // Returns a minimum wait states since \p I walking all predecessors. 359*0b57cec5SDimitry Andric // Only scans until \p IsExpired does not return true. 360*0b57cec5SDimitry Andric // Can only be run in a hazard recognizer mode. 361*0b57cec5SDimitry Andric static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, 362*0b57cec5SDimitry Andric MachineBasicBlock *MBB, 363*0b57cec5SDimitry Andric MachineBasicBlock::reverse_instr_iterator I, 364*0b57cec5SDimitry Andric int WaitStates, 365*0b57cec5SDimitry Andric IsExpiredFn IsExpired, 366*0b57cec5SDimitry Andric DenseSet<const MachineBasicBlock *> &Visited) { 367*0b57cec5SDimitry Andric for (auto E = MBB->instr_rend(); I != E; ++I) { 368*0b57cec5SDimitry Andric // Don't add WaitStates for parent BUNDLE instructions. 369*0b57cec5SDimitry Andric if (I->isBundle()) 370*0b57cec5SDimitry Andric continue; 371*0b57cec5SDimitry Andric 372*0b57cec5SDimitry Andric if (IsHazard(&*I)) 373*0b57cec5SDimitry Andric return WaitStates; 374*0b57cec5SDimitry Andric 375*0b57cec5SDimitry Andric if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr()) 376*0b57cec5SDimitry Andric continue; 377*0b57cec5SDimitry Andric 378*0b57cec5SDimitry Andric WaitStates += SIInstrInfo::getNumWaitStates(*I); 379*0b57cec5SDimitry Andric 380*0b57cec5SDimitry Andric if (IsExpired(&*I, WaitStates)) 381*0b57cec5SDimitry Andric return std::numeric_limits<int>::max(); 382*0b57cec5SDimitry Andric } 383*0b57cec5SDimitry Andric 384*0b57cec5SDimitry Andric int MinWaitStates = WaitStates; 385*0b57cec5SDimitry Andric bool Found = false; 386*0b57cec5SDimitry Andric for (MachineBasicBlock *Pred : MBB->predecessors()) { 387*0b57cec5SDimitry Andric if (!Visited.insert(Pred).second) 388*0b57cec5SDimitry Andric continue; 389*0b57cec5SDimitry Andric 390*0b57cec5SDimitry Andric int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), 391*0b57cec5SDimitry Andric WaitStates, IsExpired, Visited); 392*0b57cec5SDimitry Andric 393*0b57cec5SDimitry Andric if (W == std::numeric_limits<int>::max()) 394*0b57cec5SDimitry Andric continue; 395*0b57cec5SDimitry Andric 396*0b57cec5SDimitry Andric MinWaitStates = Found ? std::min(MinWaitStates, W) : W; 397*0b57cec5SDimitry Andric if (IsExpired(nullptr, MinWaitStates)) 398*0b57cec5SDimitry Andric return MinWaitStates; 399*0b57cec5SDimitry Andric 400*0b57cec5SDimitry Andric Found = true; 401*0b57cec5SDimitry Andric } 402*0b57cec5SDimitry Andric 403*0b57cec5SDimitry Andric if (Found) 404*0b57cec5SDimitry Andric return MinWaitStates; 405*0b57cec5SDimitry Andric 406*0b57cec5SDimitry Andric return std::numeric_limits<int>::max(); 407*0b57cec5SDimitry Andric } 408*0b57cec5SDimitry Andric 409*0b57cec5SDimitry Andric static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, 410*0b57cec5SDimitry Andric MachineInstr *MI, 411*0b57cec5SDimitry Andric IsExpiredFn IsExpired) { 412*0b57cec5SDimitry Andric DenseSet<const MachineBasicBlock *> Visited; 413*0b57cec5SDimitry Andric return getWaitStatesSince(IsHazard, MI->getParent(), 414*0b57cec5SDimitry Andric std::next(MI->getReverseIterator()), 415*0b57cec5SDimitry Andric 0, IsExpired, Visited); 416*0b57cec5SDimitry Andric } 417*0b57cec5SDimitry Andric 418*0b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) { 419*0b57cec5SDimitry Andric if (IsHazardRecognizerMode) { 420*0b57cec5SDimitry Andric auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) { 421*0b57cec5SDimitry Andric return WaitStates >= Limit; 422*0b57cec5SDimitry Andric }; 423*0b57cec5SDimitry Andric return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn); 424*0b57cec5SDimitry Andric } 425*0b57cec5SDimitry Andric 426*0b57cec5SDimitry Andric int WaitStates = 0; 427*0b57cec5SDimitry Andric for (MachineInstr *MI : EmittedInstrs) { 428*0b57cec5SDimitry Andric if (MI) { 429*0b57cec5SDimitry Andric if (IsHazard(MI)) 430*0b57cec5SDimitry Andric return WaitStates; 431*0b57cec5SDimitry Andric 432*0b57cec5SDimitry Andric if (MI->isInlineAsm()) 433*0b57cec5SDimitry Andric continue; 434*0b57cec5SDimitry Andric } 435*0b57cec5SDimitry Andric ++WaitStates; 436*0b57cec5SDimitry Andric 437*0b57cec5SDimitry Andric if (WaitStates >= Limit) 438*0b57cec5SDimitry Andric break; 439*0b57cec5SDimitry Andric } 440*0b57cec5SDimitry Andric return std::numeric_limits<int>::max(); 441*0b57cec5SDimitry Andric } 442*0b57cec5SDimitry Andric 443*0b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg, 444*0b57cec5SDimitry Andric IsHazardFn IsHazardDef, 445*0b57cec5SDimitry Andric int Limit) { 446*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 447*0b57cec5SDimitry Andric 448*0b57cec5SDimitry Andric auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) { 449*0b57cec5SDimitry Andric return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI); 450*0b57cec5SDimitry Andric }; 451*0b57cec5SDimitry Andric 452*0b57cec5SDimitry Andric return getWaitStatesSince(IsHazardFn, Limit); 453*0b57cec5SDimitry Andric } 454*0b57cec5SDimitry Andric 455*0b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard, 456*0b57cec5SDimitry Andric int Limit) { 457*0b57cec5SDimitry Andric auto IsHazardFn = [IsHazard] (MachineInstr *MI) { 458*0b57cec5SDimitry Andric return isSSetReg(MI->getOpcode()) && IsHazard(MI); 459*0b57cec5SDimitry Andric }; 460*0b57cec5SDimitry Andric 461*0b57cec5SDimitry Andric return getWaitStatesSince(IsHazardFn, Limit); 462*0b57cec5SDimitry Andric } 463*0b57cec5SDimitry Andric 464*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 465*0b57cec5SDimitry Andric // No-op Hazard Detection 466*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 467*0b57cec5SDimitry Andric 468*0b57cec5SDimitry Andric static void addRegUnits(const SIRegisterInfo &TRI, 469*0b57cec5SDimitry Andric BitVector &BV, unsigned Reg) { 470*0b57cec5SDimitry Andric for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) 471*0b57cec5SDimitry Andric BV.set(*RUI); 472*0b57cec5SDimitry Andric } 473*0b57cec5SDimitry Andric 474*0b57cec5SDimitry Andric static void addRegsToSet(const SIRegisterInfo &TRI, 475*0b57cec5SDimitry Andric iterator_range<MachineInstr::const_mop_iterator> Ops, 476*0b57cec5SDimitry Andric BitVector &Set) { 477*0b57cec5SDimitry Andric for (const MachineOperand &Op : Ops) { 478*0b57cec5SDimitry Andric if (Op.isReg()) 479*0b57cec5SDimitry Andric addRegUnits(TRI, Set, Op.getReg()); 480*0b57cec5SDimitry Andric } 481*0b57cec5SDimitry Andric } 482*0b57cec5SDimitry Andric 483*0b57cec5SDimitry Andric void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) { 484*0b57cec5SDimitry Andric // XXX: Do we need to worry about implicit operands 485*0b57cec5SDimitry Andric addRegsToSet(TRI, MI.defs(), ClauseDefs); 486*0b57cec5SDimitry Andric addRegsToSet(TRI, MI.uses(), ClauseUses); 487*0b57cec5SDimitry Andric } 488*0b57cec5SDimitry Andric 489*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { 490*0b57cec5SDimitry Andric // SMEM soft clause are only present on VI+, and only matter if xnack is 491*0b57cec5SDimitry Andric // enabled. 492*0b57cec5SDimitry Andric if (!ST.isXNACKEnabled()) 493*0b57cec5SDimitry Andric return 0; 494*0b57cec5SDimitry Andric 495*0b57cec5SDimitry Andric bool IsSMRD = TII.isSMRD(*MEM); 496*0b57cec5SDimitry Andric 497*0b57cec5SDimitry Andric resetClause(); 498*0b57cec5SDimitry Andric 499*0b57cec5SDimitry Andric // A soft-clause is any group of consecutive SMEM instructions. The 500*0b57cec5SDimitry Andric // instructions in this group may return out of order and/or may be 501*0b57cec5SDimitry Andric // replayed (i.e. the same instruction issued more than once). 502*0b57cec5SDimitry Andric // 503*0b57cec5SDimitry Andric // In order to handle these situations correctly we need to make sure that 504*0b57cec5SDimitry Andric // when a clause has more than one instruction, no instruction in the clause 505*0b57cec5SDimitry Andric // writes to a register that is read by another instruction in the clause 506*0b57cec5SDimitry Andric // (including itself). If we encounter this situaion, we need to break the 507*0b57cec5SDimitry Andric // clause by inserting a non SMEM instruction. 508*0b57cec5SDimitry Andric 509*0b57cec5SDimitry Andric for (MachineInstr *MI : EmittedInstrs) { 510*0b57cec5SDimitry Andric // When we hit a non-SMEM instruction then we have passed the start of the 511*0b57cec5SDimitry Andric // clause and we can stop. 512*0b57cec5SDimitry Andric if (!MI) 513*0b57cec5SDimitry Andric break; 514*0b57cec5SDimitry Andric 515*0b57cec5SDimitry Andric if (IsSMRD != SIInstrInfo::isSMRD(*MI)) 516*0b57cec5SDimitry Andric break; 517*0b57cec5SDimitry Andric 518*0b57cec5SDimitry Andric addClauseInst(*MI); 519*0b57cec5SDimitry Andric } 520*0b57cec5SDimitry Andric 521*0b57cec5SDimitry Andric if (ClauseDefs.none()) 522*0b57cec5SDimitry Andric return 0; 523*0b57cec5SDimitry Andric 524*0b57cec5SDimitry Andric // We need to make sure not to put loads and stores in the same clause if they 525*0b57cec5SDimitry Andric // use the same address. For now, just start a new clause whenever we see a 526*0b57cec5SDimitry Andric // store. 527*0b57cec5SDimitry Andric if (MEM->mayStore()) 528*0b57cec5SDimitry Andric return 1; 529*0b57cec5SDimitry Andric 530*0b57cec5SDimitry Andric addClauseInst(*MEM); 531*0b57cec5SDimitry Andric 532*0b57cec5SDimitry Andric // If the set of defs and uses intersect then we cannot add this instruction 533*0b57cec5SDimitry Andric // to the clause, so we have a hazard. 534*0b57cec5SDimitry Andric return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0; 535*0b57cec5SDimitry Andric } 536*0b57cec5SDimitry Andric 537*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { 538*0b57cec5SDimitry Andric int WaitStatesNeeded = 0; 539*0b57cec5SDimitry Andric 540*0b57cec5SDimitry Andric WaitStatesNeeded = checkSoftClauseHazards(SMRD); 541*0b57cec5SDimitry Andric 542*0b57cec5SDimitry Andric // This SMRD hazard only affects SI. 543*0b57cec5SDimitry Andric if (!ST.hasSMRDReadVALUDefHazard()) 544*0b57cec5SDimitry Andric return WaitStatesNeeded; 545*0b57cec5SDimitry Andric 546*0b57cec5SDimitry Andric // A read of an SGPR by SMRD instruction requires 4 wait states when the 547*0b57cec5SDimitry Andric // SGPR was written by a VALU instruction. 548*0b57cec5SDimitry Andric int SmrdSgprWaitStates = 4; 549*0b57cec5SDimitry Andric auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; 550*0b57cec5SDimitry Andric auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); }; 551*0b57cec5SDimitry Andric 552*0b57cec5SDimitry Andric bool IsBufferSMRD = TII.isBufferSMRD(*SMRD); 553*0b57cec5SDimitry Andric 554*0b57cec5SDimitry Andric for (const MachineOperand &Use : SMRD->uses()) { 555*0b57cec5SDimitry Andric if (!Use.isReg()) 556*0b57cec5SDimitry Andric continue; 557*0b57cec5SDimitry Andric int WaitStatesNeededForUse = 558*0b57cec5SDimitry Andric SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, 559*0b57cec5SDimitry Andric SmrdSgprWaitStates); 560*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 561*0b57cec5SDimitry Andric 562*0b57cec5SDimitry Andric // This fixes what appears to be undocumented hardware behavior in SI where 563*0b57cec5SDimitry Andric // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor 564*0b57cec5SDimitry Andric // needs some number of nops in between. We don't know how many we need, but 565*0b57cec5SDimitry Andric // let's use 4. This wasn't discovered before probably because the only 566*0b57cec5SDimitry Andric // case when this happens is when we expand a 64-bit pointer into a full 567*0b57cec5SDimitry Andric // descriptor and use s_buffer_load_dword instead of s_load_dword, which was 568*0b57cec5SDimitry Andric // probably never encountered in the closed-source land. 569*0b57cec5SDimitry Andric if (IsBufferSMRD) { 570*0b57cec5SDimitry Andric int WaitStatesNeededForUse = 571*0b57cec5SDimitry Andric SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), 572*0b57cec5SDimitry Andric IsBufferHazardDefFn, 573*0b57cec5SDimitry Andric SmrdSgprWaitStates); 574*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 575*0b57cec5SDimitry Andric } 576*0b57cec5SDimitry Andric } 577*0b57cec5SDimitry Andric 578*0b57cec5SDimitry Andric return WaitStatesNeeded; 579*0b57cec5SDimitry Andric } 580*0b57cec5SDimitry Andric 581*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { 582*0b57cec5SDimitry Andric if (!ST.hasVMEMReadSGPRVALUDefHazard()) 583*0b57cec5SDimitry Andric return 0; 584*0b57cec5SDimitry Andric 585*0b57cec5SDimitry Andric int WaitStatesNeeded = checkSoftClauseHazards(VMEM); 586*0b57cec5SDimitry Andric 587*0b57cec5SDimitry Andric // A read of an SGPR by a VMEM instruction requires 5 wait states when the 588*0b57cec5SDimitry Andric // SGPR was written by a VALU Instruction. 589*0b57cec5SDimitry Andric const int VmemSgprWaitStates = 5; 590*0b57cec5SDimitry Andric auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; 591*0b57cec5SDimitry Andric for (const MachineOperand &Use : VMEM->uses()) { 592*0b57cec5SDimitry Andric if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 593*0b57cec5SDimitry Andric continue; 594*0b57cec5SDimitry Andric 595*0b57cec5SDimitry Andric int WaitStatesNeededForUse = 596*0b57cec5SDimitry Andric VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, 597*0b57cec5SDimitry Andric VmemSgprWaitStates); 598*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 599*0b57cec5SDimitry Andric } 600*0b57cec5SDimitry Andric return WaitStatesNeeded; 601*0b57cec5SDimitry Andric } 602*0b57cec5SDimitry Andric 603*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { 604*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 605*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 606*0b57cec5SDimitry Andric 607*0b57cec5SDimitry Andric // Check for DPP VGPR read after VALU VGPR write and EXEC write. 608*0b57cec5SDimitry Andric int DppVgprWaitStates = 2; 609*0b57cec5SDimitry Andric int DppExecWaitStates = 5; 610*0b57cec5SDimitry Andric int WaitStatesNeeded = 0; 611*0b57cec5SDimitry Andric auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 612*0b57cec5SDimitry Andric 613*0b57cec5SDimitry Andric for (const MachineOperand &Use : DPP->uses()) { 614*0b57cec5SDimitry Andric if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 615*0b57cec5SDimitry Andric continue; 616*0b57cec5SDimitry Andric int WaitStatesNeededForUse = 617*0b57cec5SDimitry Andric DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(), 618*0b57cec5SDimitry Andric [](MachineInstr *) { return true; }, 619*0b57cec5SDimitry Andric DppVgprWaitStates); 620*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 621*0b57cec5SDimitry Andric } 622*0b57cec5SDimitry Andric 623*0b57cec5SDimitry Andric WaitStatesNeeded = std::max( 624*0b57cec5SDimitry Andric WaitStatesNeeded, 625*0b57cec5SDimitry Andric DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn, 626*0b57cec5SDimitry Andric DppExecWaitStates)); 627*0b57cec5SDimitry Andric 628*0b57cec5SDimitry Andric return WaitStatesNeeded; 629*0b57cec5SDimitry Andric } 630*0b57cec5SDimitry Andric 631*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { 632*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 633*0b57cec5SDimitry Andric 634*0b57cec5SDimitry Andric // v_div_fmas requires 4 wait states after a write to vcc from a VALU 635*0b57cec5SDimitry Andric // instruction. 636*0b57cec5SDimitry Andric const int DivFMasWaitStates = 4; 637*0b57cec5SDimitry Andric auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 638*0b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn, 639*0b57cec5SDimitry Andric DivFMasWaitStates); 640*0b57cec5SDimitry Andric 641*0b57cec5SDimitry Andric return DivFMasWaitStates - WaitStatesNeeded; 642*0b57cec5SDimitry Andric } 643*0b57cec5SDimitry Andric 644*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) { 645*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 646*0b57cec5SDimitry Andric unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr); 647*0b57cec5SDimitry Andric 648*0b57cec5SDimitry Andric const int GetRegWaitStates = 2; 649*0b57cec5SDimitry Andric auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) { 650*0b57cec5SDimitry Andric return GetRegHWReg == getHWReg(TII, *MI); 651*0b57cec5SDimitry Andric }; 652*0b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates); 653*0b57cec5SDimitry Andric 654*0b57cec5SDimitry Andric return GetRegWaitStates - WaitStatesNeeded; 655*0b57cec5SDimitry Andric } 656*0b57cec5SDimitry Andric 657*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) { 658*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 659*0b57cec5SDimitry Andric unsigned HWReg = getHWReg(TII, *SetRegInstr); 660*0b57cec5SDimitry Andric 661*0b57cec5SDimitry Andric const int SetRegWaitStates = ST.getSetRegWaitStates(); 662*0b57cec5SDimitry Andric auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) { 663*0b57cec5SDimitry Andric return HWReg == getHWReg(TII, *MI); 664*0b57cec5SDimitry Andric }; 665*0b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates); 666*0b57cec5SDimitry Andric return SetRegWaitStates - WaitStatesNeeded; 667*0b57cec5SDimitry Andric } 668*0b57cec5SDimitry Andric 669*0b57cec5SDimitry Andric int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) { 670*0b57cec5SDimitry Andric if (!MI.mayStore()) 671*0b57cec5SDimitry Andric return -1; 672*0b57cec5SDimitry Andric 673*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 674*0b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode(); 675*0b57cec5SDimitry Andric const MCInstrDesc &Desc = MI.getDesc(); 676*0b57cec5SDimitry Andric 677*0b57cec5SDimitry Andric int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 678*0b57cec5SDimitry Andric int VDataRCID = -1; 679*0b57cec5SDimitry Andric if (VDataIdx != -1) 680*0b57cec5SDimitry Andric VDataRCID = Desc.OpInfo[VDataIdx].RegClass; 681*0b57cec5SDimitry Andric 682*0b57cec5SDimitry Andric if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) { 683*0b57cec5SDimitry Andric // There is no hazard if the instruction does not use vector regs 684*0b57cec5SDimitry Andric // (like wbinvl1) 685*0b57cec5SDimitry Andric if (VDataIdx == -1) 686*0b57cec5SDimitry Andric return -1; 687*0b57cec5SDimitry Andric // For MUBUF/MTBUF instructions this hazard only exists if the 688*0b57cec5SDimitry Andric // instruction is not using a register in the soffset field. 689*0b57cec5SDimitry Andric const MachineOperand *SOffset = 690*0b57cec5SDimitry Andric TII->getNamedOperand(MI, AMDGPU::OpName::soffset); 691*0b57cec5SDimitry Andric // If we have no soffset operand, then assume this field has been 692*0b57cec5SDimitry Andric // hardcoded to zero. 693*0b57cec5SDimitry Andric if (AMDGPU::getRegBitWidth(VDataRCID) > 64 && 694*0b57cec5SDimitry Andric (!SOffset || !SOffset->isReg())) 695*0b57cec5SDimitry Andric return VDataIdx; 696*0b57cec5SDimitry Andric } 697*0b57cec5SDimitry Andric 698*0b57cec5SDimitry Andric // MIMG instructions create a hazard if they don't use a 256-bit T# and 699*0b57cec5SDimitry Andric // the store size is greater than 8 bytes and they have more than two bits 700*0b57cec5SDimitry Andric // of their dmask set. 701*0b57cec5SDimitry Andric // All our MIMG definitions use a 256-bit T#, so we can skip checking for them. 702*0b57cec5SDimitry Andric if (TII->isMIMG(MI)) { 703*0b57cec5SDimitry Andric int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); 704*0b57cec5SDimitry Andric assert(SRsrcIdx != -1 && 705*0b57cec5SDimitry Andric AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256); 706*0b57cec5SDimitry Andric (void)SRsrcIdx; 707*0b57cec5SDimitry Andric } 708*0b57cec5SDimitry Andric 709*0b57cec5SDimitry Andric if (TII->isFLAT(MI)) { 710*0b57cec5SDimitry Andric int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 711*0b57cec5SDimitry Andric if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64) 712*0b57cec5SDimitry Andric return DataIdx; 713*0b57cec5SDimitry Andric } 714*0b57cec5SDimitry Andric 715*0b57cec5SDimitry Andric return -1; 716*0b57cec5SDimitry Andric } 717*0b57cec5SDimitry Andric 718*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, 719*0b57cec5SDimitry Andric const MachineRegisterInfo &MRI) { 720*0b57cec5SDimitry Andric // Helper to check for the hazard where VMEM instructions that store more than 721*0b57cec5SDimitry Andric // 8 bytes can have there store data over written by the next instruction. 722*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 723*0b57cec5SDimitry Andric 724*0b57cec5SDimitry Andric const int VALUWaitStates = 1; 725*0b57cec5SDimitry Andric int WaitStatesNeeded = 0; 726*0b57cec5SDimitry Andric 727*0b57cec5SDimitry Andric if (!TRI->isVGPR(MRI, Def.getReg())) 728*0b57cec5SDimitry Andric return WaitStatesNeeded; 729*0b57cec5SDimitry Andric unsigned Reg = Def.getReg(); 730*0b57cec5SDimitry Andric auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { 731*0b57cec5SDimitry Andric int DataIdx = createsVALUHazard(*MI); 732*0b57cec5SDimitry Andric return DataIdx >= 0 && 733*0b57cec5SDimitry Andric TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); 734*0b57cec5SDimitry Andric }; 735*0b57cec5SDimitry Andric int WaitStatesNeededForDef = 736*0b57cec5SDimitry Andric VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates); 737*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 738*0b57cec5SDimitry Andric 739*0b57cec5SDimitry Andric return WaitStatesNeeded; 740*0b57cec5SDimitry Andric } 741*0b57cec5SDimitry Andric 742*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { 743*0b57cec5SDimitry Andric // This checks for the hazard where VMEM instructions that store more than 744*0b57cec5SDimitry Andric // 8 bytes can have there store data over written by the next instruction. 745*0b57cec5SDimitry Andric if (!ST.has12DWordStoreHazard()) 746*0b57cec5SDimitry Andric return 0; 747*0b57cec5SDimitry Andric 748*0b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 749*0b57cec5SDimitry Andric int WaitStatesNeeded = 0; 750*0b57cec5SDimitry Andric 751*0b57cec5SDimitry Andric for (const MachineOperand &Def : VALU->defs()) { 752*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI)); 753*0b57cec5SDimitry Andric } 754*0b57cec5SDimitry Andric 755*0b57cec5SDimitry Andric return WaitStatesNeeded; 756*0b57cec5SDimitry Andric } 757*0b57cec5SDimitry Andric 758*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) { 759*0b57cec5SDimitry Andric // This checks for hazards associated with inline asm statements. 760*0b57cec5SDimitry Andric // Since inline asms can contain just about anything, we use this 761*0b57cec5SDimitry Andric // to call/leverage other check*Hazard routines. Note that 762*0b57cec5SDimitry Andric // this function doesn't attempt to address all possible inline asm 763*0b57cec5SDimitry Andric // hazards (good luck), but is a collection of what has been 764*0b57cec5SDimitry Andric // problematic thus far. 765*0b57cec5SDimitry Andric 766*0b57cec5SDimitry Andric // see checkVALUHazards() 767*0b57cec5SDimitry Andric if (!ST.has12DWordStoreHazard()) 768*0b57cec5SDimitry Andric return 0; 769*0b57cec5SDimitry Andric 770*0b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 771*0b57cec5SDimitry Andric int WaitStatesNeeded = 0; 772*0b57cec5SDimitry Andric 773*0b57cec5SDimitry Andric for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands(); 774*0b57cec5SDimitry Andric I != E; ++I) { 775*0b57cec5SDimitry Andric const MachineOperand &Op = IA->getOperand(I); 776*0b57cec5SDimitry Andric if (Op.isReg() && Op.isDef()) { 777*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI)); 778*0b57cec5SDimitry Andric } 779*0b57cec5SDimitry Andric } 780*0b57cec5SDimitry Andric 781*0b57cec5SDimitry Andric return WaitStatesNeeded; 782*0b57cec5SDimitry Andric } 783*0b57cec5SDimitry Andric 784*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { 785*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 786*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 787*0b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 788*0b57cec5SDimitry Andric 789*0b57cec5SDimitry Andric const MachineOperand *LaneSelectOp = 790*0b57cec5SDimitry Andric TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1); 791*0b57cec5SDimitry Andric 792*0b57cec5SDimitry Andric if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg())) 793*0b57cec5SDimitry Andric return 0; 794*0b57cec5SDimitry Andric 795*0b57cec5SDimitry Andric unsigned LaneSelectReg = LaneSelectOp->getReg(); 796*0b57cec5SDimitry Andric auto IsHazardFn = [TII] (MachineInstr *MI) { 797*0b57cec5SDimitry Andric return TII->isVALU(*MI); 798*0b57cec5SDimitry Andric }; 799*0b57cec5SDimitry Andric 800*0b57cec5SDimitry Andric const int RWLaneWaitStates = 4; 801*0b57cec5SDimitry Andric int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn, 802*0b57cec5SDimitry Andric RWLaneWaitStates); 803*0b57cec5SDimitry Andric return RWLaneWaitStates - WaitStatesSince; 804*0b57cec5SDimitry Andric } 805*0b57cec5SDimitry Andric 806*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) { 807*0b57cec5SDimitry Andric if (!ST.hasRFEHazards()) 808*0b57cec5SDimitry Andric return 0; 809*0b57cec5SDimitry Andric 810*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 811*0b57cec5SDimitry Andric 812*0b57cec5SDimitry Andric const int RFEWaitStates = 1; 813*0b57cec5SDimitry Andric 814*0b57cec5SDimitry Andric auto IsHazardFn = [TII] (MachineInstr *MI) { 815*0b57cec5SDimitry Andric return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS; 816*0b57cec5SDimitry Andric }; 817*0b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates); 818*0b57cec5SDimitry Andric return RFEWaitStates - WaitStatesNeeded; 819*0b57cec5SDimitry Andric } 820*0b57cec5SDimitry Andric 821*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) { 822*0b57cec5SDimitry Andric if (MI->isDebugInstr()) 823*0b57cec5SDimitry Andric return 0; 824*0b57cec5SDimitry Andric 825*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 826*0b57cec5SDimitry Andric if (!ST.hasSMovFedHazard()) 827*0b57cec5SDimitry Andric return 0; 828*0b57cec5SDimitry Andric 829*0b57cec5SDimitry Andric // Check for any instruction reading an SGPR after a write from 830*0b57cec5SDimitry Andric // s_mov_fed_b32. 831*0b57cec5SDimitry Andric int MovFedWaitStates = 1; 832*0b57cec5SDimitry Andric int WaitStatesNeeded = 0; 833*0b57cec5SDimitry Andric 834*0b57cec5SDimitry Andric for (const MachineOperand &Use : MI->uses()) { 835*0b57cec5SDimitry Andric if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 836*0b57cec5SDimitry Andric continue; 837*0b57cec5SDimitry Andric auto IsHazardFn = [] (MachineInstr *MI) { 838*0b57cec5SDimitry Andric return MI->getOpcode() == AMDGPU::S_MOV_FED_B32; 839*0b57cec5SDimitry Andric }; 840*0b57cec5SDimitry Andric int WaitStatesNeededForUse = 841*0b57cec5SDimitry Andric MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn, 842*0b57cec5SDimitry Andric MovFedWaitStates); 843*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 844*0b57cec5SDimitry Andric } 845*0b57cec5SDimitry Andric 846*0b57cec5SDimitry Andric return WaitStatesNeeded; 847*0b57cec5SDimitry Andric } 848*0b57cec5SDimitry Andric 849*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { 850*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 851*0b57cec5SDimitry Andric const int SMovRelWaitStates = 1; 852*0b57cec5SDimitry Andric auto IsHazardFn = [TII] (MachineInstr *MI) { 853*0b57cec5SDimitry Andric return TII->isSALU(*MI); 854*0b57cec5SDimitry Andric }; 855*0b57cec5SDimitry Andric return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, 856*0b57cec5SDimitry Andric SMovRelWaitStates); 857*0b57cec5SDimitry Andric } 858*0b57cec5SDimitry Andric 859*0b57cec5SDimitry Andric void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { 860*0b57cec5SDimitry Andric fixVMEMtoScalarWriteHazards(MI); 861*0b57cec5SDimitry Andric fixVcmpxPermlaneHazards(MI); 862*0b57cec5SDimitry Andric fixSMEMtoVectorWriteHazards(MI); 863*0b57cec5SDimitry Andric fixVcmpxExecWARHazard(MI); 864*0b57cec5SDimitry Andric fixLdsBranchVmemWARHazard(MI); 865*0b57cec5SDimitry Andric } 866*0b57cec5SDimitry Andric 867*0b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { 868*0b57cec5SDimitry Andric if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI)) 869*0b57cec5SDimitry Andric return false; 870*0b57cec5SDimitry Andric 871*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 872*0b57cec5SDimitry Andric auto IsHazardFn = [TII] (MachineInstr *MI) { 873*0b57cec5SDimitry Andric return TII->isVOPC(*MI); 874*0b57cec5SDimitry Andric }; 875*0b57cec5SDimitry Andric 876*0b57cec5SDimitry Andric auto IsExpiredFn = [] (MachineInstr *MI, int) { 877*0b57cec5SDimitry Andric if (!MI) 878*0b57cec5SDimitry Andric return false; 879*0b57cec5SDimitry Andric unsigned Opc = MI->getOpcode(); 880*0b57cec5SDimitry Andric return SIInstrInfo::isVALU(*MI) && 881*0b57cec5SDimitry Andric Opc != AMDGPU::V_NOP_e32 && 882*0b57cec5SDimitry Andric Opc != AMDGPU::V_NOP_e64 && 883*0b57cec5SDimitry Andric Opc != AMDGPU::V_NOP_sdwa; 884*0b57cec5SDimitry Andric }; 885*0b57cec5SDimitry Andric 886*0b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 887*0b57cec5SDimitry Andric std::numeric_limits<int>::max()) 888*0b57cec5SDimitry Andric return false; 889*0b57cec5SDimitry Andric 890*0b57cec5SDimitry Andric // V_NOP will be discarded by SQ. 891*0b57cec5SDimitry Andric // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE* 892*0b57cec5SDimitry Andric // which is always a VGPR and available. 893*0b57cec5SDimitry Andric auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); 894*0b57cec5SDimitry Andric unsigned Reg = Src0->getReg(); 895*0b57cec5SDimitry Andric bool IsUndef = Src0->isUndef(); 896*0b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 897*0b57cec5SDimitry Andric TII->get(AMDGPU::V_MOV_B32_e32)) 898*0b57cec5SDimitry Andric .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0)) 899*0b57cec5SDimitry Andric .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill); 900*0b57cec5SDimitry Andric 901*0b57cec5SDimitry Andric return true; 902*0b57cec5SDimitry Andric } 903*0b57cec5SDimitry Andric 904*0b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { 905*0b57cec5SDimitry Andric if (!ST.hasVMEMtoScalarWriteHazard()) 906*0b57cec5SDimitry Andric return false; 907*0b57cec5SDimitry Andric 908*0b57cec5SDimitry Andric if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI)) 909*0b57cec5SDimitry Andric return false; 910*0b57cec5SDimitry Andric 911*0b57cec5SDimitry Andric if (MI->getNumDefs() == 0) 912*0b57cec5SDimitry Andric return false; 913*0b57cec5SDimitry Andric 914*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 915*0b57cec5SDimitry Andric 916*0b57cec5SDimitry Andric auto IsHazardFn = [TRI, MI] (MachineInstr *I) { 917*0b57cec5SDimitry Andric if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) && 918*0b57cec5SDimitry Andric !SIInstrInfo::isFLAT(*I)) 919*0b57cec5SDimitry Andric return false; 920*0b57cec5SDimitry Andric 921*0b57cec5SDimitry Andric for (const MachineOperand &Def : MI->defs()) { 922*0b57cec5SDimitry Andric MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI); 923*0b57cec5SDimitry Andric if (!Op) 924*0b57cec5SDimitry Andric continue; 925*0b57cec5SDimitry Andric return true; 926*0b57cec5SDimitry Andric } 927*0b57cec5SDimitry Andric return false; 928*0b57cec5SDimitry Andric }; 929*0b57cec5SDimitry Andric 930*0b57cec5SDimitry Andric auto IsExpiredFn = [] (MachineInstr *MI, int) { 931*0b57cec5SDimitry Andric return MI && (SIInstrInfo::isVALU(*MI) || 932*0b57cec5SDimitry Andric (MI->getOpcode() == AMDGPU::S_WAITCNT && 933*0b57cec5SDimitry Andric !MI->getOperand(0).getImm())); 934*0b57cec5SDimitry Andric }; 935*0b57cec5SDimitry Andric 936*0b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 937*0b57cec5SDimitry Andric std::numeric_limits<int>::max()) 938*0b57cec5SDimitry Andric return false; 939*0b57cec5SDimitry Andric 940*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 941*0b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32)); 942*0b57cec5SDimitry Andric return true; 943*0b57cec5SDimitry Andric } 944*0b57cec5SDimitry Andric 945*0b57cec5SDimitry Andric bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { 946*0b57cec5SDimitry Andric if (!ST.hasSMEMtoVectorWriteHazard()) 947*0b57cec5SDimitry Andric return false; 948*0b57cec5SDimitry Andric 949*0b57cec5SDimitry Andric if (!SIInstrInfo::isVALU(*MI)) 950*0b57cec5SDimitry Andric return false; 951*0b57cec5SDimitry Andric 952*0b57cec5SDimitry Andric unsigned SDSTName; 953*0b57cec5SDimitry Andric switch (MI->getOpcode()) { 954*0b57cec5SDimitry Andric case AMDGPU::V_READLANE_B32: 955*0b57cec5SDimitry Andric case AMDGPU::V_READFIRSTLANE_B32: 956*0b57cec5SDimitry Andric SDSTName = AMDGPU::OpName::vdst; 957*0b57cec5SDimitry Andric break; 958*0b57cec5SDimitry Andric default: 959*0b57cec5SDimitry Andric SDSTName = AMDGPU::OpName::sdst; 960*0b57cec5SDimitry Andric break; 961*0b57cec5SDimitry Andric } 962*0b57cec5SDimitry Andric 963*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 964*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 965*0b57cec5SDimitry Andric const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU()); 966*0b57cec5SDimitry Andric const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName); 967*0b57cec5SDimitry Andric if (!SDST) { 968*0b57cec5SDimitry Andric for (const auto &MO : MI->implicit_operands()) { 969*0b57cec5SDimitry Andric if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) { 970*0b57cec5SDimitry Andric SDST = &MO; 971*0b57cec5SDimitry Andric break; 972*0b57cec5SDimitry Andric } 973*0b57cec5SDimitry Andric } 974*0b57cec5SDimitry Andric } 975*0b57cec5SDimitry Andric 976*0b57cec5SDimitry Andric if (!SDST) 977*0b57cec5SDimitry Andric return false; 978*0b57cec5SDimitry Andric 979*0b57cec5SDimitry Andric const unsigned SDSTReg = SDST->getReg(); 980*0b57cec5SDimitry Andric auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) { 981*0b57cec5SDimitry Andric return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI); 982*0b57cec5SDimitry Andric }; 983*0b57cec5SDimitry Andric 984*0b57cec5SDimitry Andric auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) { 985*0b57cec5SDimitry Andric if (MI) { 986*0b57cec5SDimitry Andric if (TII->isSALU(*MI)) { 987*0b57cec5SDimitry Andric switch (MI->getOpcode()) { 988*0b57cec5SDimitry Andric case AMDGPU::S_SETVSKIP: 989*0b57cec5SDimitry Andric case AMDGPU::S_VERSION: 990*0b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VSCNT: 991*0b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VMCNT: 992*0b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT: 993*0b57cec5SDimitry Andric // These instructions cannot not mitigate the hazard. 994*0b57cec5SDimitry Andric return false; 995*0b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT: 996*0b57cec5SDimitry Andric // Reducing lgkmcnt count to 0 always mitigates the hazard. 997*0b57cec5SDimitry Andric return (MI->getOperand(1).getImm() == 0) && 998*0b57cec5SDimitry Andric (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL); 999*0b57cec5SDimitry Andric case AMDGPU::S_WAITCNT: { 1000*0b57cec5SDimitry Andric const int64_t Imm = MI->getOperand(0).getImm(); 1001*0b57cec5SDimitry Andric AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm); 1002*0b57cec5SDimitry Andric return (Decoded.LgkmCnt == 0); 1003*0b57cec5SDimitry Andric } 1004*0b57cec5SDimitry Andric default: 1005*0b57cec5SDimitry Andric // SOPP instructions cannot mitigate the hazard. 1006*0b57cec5SDimitry Andric if (TII->isSOPP(*MI)) 1007*0b57cec5SDimitry Andric return false; 1008*0b57cec5SDimitry Andric // At this point the SALU can be assumed to mitigate the hazard 1009*0b57cec5SDimitry Andric // because either: 1010*0b57cec5SDimitry Andric // (a) it is independent of the at risk SMEM (breaking chain), 1011*0b57cec5SDimitry Andric // or 1012*0b57cec5SDimitry Andric // (b) it is dependent on the SMEM, in which case an appropriate 1013*0b57cec5SDimitry Andric // s_waitcnt lgkmcnt _must_ exist between it and the at risk 1014*0b57cec5SDimitry Andric // SMEM instruction. 1015*0b57cec5SDimitry Andric return true; 1016*0b57cec5SDimitry Andric } 1017*0b57cec5SDimitry Andric } 1018*0b57cec5SDimitry Andric } 1019*0b57cec5SDimitry Andric return false; 1020*0b57cec5SDimitry Andric }; 1021*0b57cec5SDimitry Andric 1022*0b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 1023*0b57cec5SDimitry Andric std::numeric_limits<int>::max()) 1024*0b57cec5SDimitry Andric return false; 1025*0b57cec5SDimitry Andric 1026*0b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 1027*0b57cec5SDimitry Andric TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL) 1028*0b57cec5SDimitry Andric .addImm(0); 1029*0b57cec5SDimitry Andric return true; 1030*0b57cec5SDimitry Andric } 1031*0b57cec5SDimitry Andric 1032*0b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) { 1033*0b57cec5SDimitry Andric if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI)) 1034*0b57cec5SDimitry Andric return false; 1035*0b57cec5SDimitry Andric 1036*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1037*0b57cec5SDimitry Andric if (!MI->modifiesRegister(AMDGPU::EXEC, TRI)) 1038*0b57cec5SDimitry Andric return false; 1039*0b57cec5SDimitry Andric 1040*0b57cec5SDimitry Andric auto IsHazardFn = [TRI] (MachineInstr *I) { 1041*0b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*I)) 1042*0b57cec5SDimitry Andric return false; 1043*0b57cec5SDimitry Andric return I->readsRegister(AMDGPU::EXEC, TRI); 1044*0b57cec5SDimitry Andric }; 1045*0b57cec5SDimitry Andric 1046*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 1047*0b57cec5SDimitry Andric auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) { 1048*0b57cec5SDimitry Andric if (!MI) 1049*0b57cec5SDimitry Andric return false; 1050*0b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*MI)) { 1051*0b57cec5SDimitry Andric if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst)) 1052*0b57cec5SDimitry Andric return true; 1053*0b57cec5SDimitry Andric for (auto MO : MI->implicit_operands()) 1054*0b57cec5SDimitry Andric if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) 1055*0b57cec5SDimitry Andric return true; 1056*0b57cec5SDimitry Andric } 1057*0b57cec5SDimitry Andric if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && 1058*0b57cec5SDimitry Andric (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe) 1059*0b57cec5SDimitry Andric return true; 1060*0b57cec5SDimitry Andric return false; 1061*0b57cec5SDimitry Andric }; 1062*0b57cec5SDimitry Andric 1063*0b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 1064*0b57cec5SDimitry Andric std::numeric_limits<int>::max()) 1065*0b57cec5SDimitry Andric return false; 1066*0b57cec5SDimitry Andric 1067*0b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 1068*0b57cec5SDimitry Andric TII->get(AMDGPU::S_WAITCNT_DEPCTR)) 1069*0b57cec5SDimitry Andric .addImm(0xfffe); 1070*0b57cec5SDimitry Andric return true; 1071*0b57cec5SDimitry Andric } 1072*0b57cec5SDimitry Andric 1073*0b57cec5SDimitry Andric bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { 1074*0b57cec5SDimitry Andric if (!ST.hasLdsBranchVmemWARHazard()) 1075*0b57cec5SDimitry Andric return false; 1076*0b57cec5SDimitry Andric 1077*0b57cec5SDimitry Andric auto IsHazardInst = [] (const MachineInstr *MI) { 1078*0b57cec5SDimitry Andric if (SIInstrInfo::isDS(*MI)) 1079*0b57cec5SDimitry Andric return 1; 1080*0b57cec5SDimitry Andric if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI)) 1081*0b57cec5SDimitry Andric return 2; 1082*0b57cec5SDimitry Andric return 0; 1083*0b57cec5SDimitry Andric }; 1084*0b57cec5SDimitry Andric 1085*0b57cec5SDimitry Andric auto InstType = IsHazardInst(MI); 1086*0b57cec5SDimitry Andric if (!InstType) 1087*0b57cec5SDimitry Andric return false; 1088*0b57cec5SDimitry Andric 1089*0b57cec5SDimitry Andric auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) { 1090*0b57cec5SDimitry Andric return I && (IsHazardInst(I) || 1091*0b57cec5SDimitry Andric (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT && 1092*0b57cec5SDimitry Andric I->getOperand(0).getReg() == AMDGPU::SGPR_NULL && 1093*0b57cec5SDimitry Andric !I->getOperand(1).getImm())); 1094*0b57cec5SDimitry Andric }; 1095*0b57cec5SDimitry Andric 1096*0b57cec5SDimitry Andric auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) { 1097*0b57cec5SDimitry Andric if (!I->isBranch()) 1098*0b57cec5SDimitry Andric return false; 1099*0b57cec5SDimitry Andric 1100*0b57cec5SDimitry Andric auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) { 1101*0b57cec5SDimitry Andric auto InstType2 = IsHazardInst(I); 1102*0b57cec5SDimitry Andric return InstType2 && InstType != InstType2; 1103*0b57cec5SDimitry Andric }; 1104*0b57cec5SDimitry Andric 1105*0b57cec5SDimitry Andric auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) { 1106*0b57cec5SDimitry Andric if (!I) 1107*0b57cec5SDimitry Andric return false; 1108*0b57cec5SDimitry Andric 1109*0b57cec5SDimitry Andric auto InstType2 = IsHazardInst(I); 1110*0b57cec5SDimitry Andric if (InstType == InstType2) 1111*0b57cec5SDimitry Andric return true; 1112*0b57cec5SDimitry Andric 1113*0b57cec5SDimitry Andric return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT && 1114*0b57cec5SDimitry Andric I->getOperand(0).getReg() == AMDGPU::SGPR_NULL && 1115*0b57cec5SDimitry Andric !I->getOperand(1).getImm(); 1116*0b57cec5SDimitry Andric }; 1117*0b57cec5SDimitry Andric 1118*0b57cec5SDimitry Andric return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) != 1119*0b57cec5SDimitry Andric std::numeric_limits<int>::max(); 1120*0b57cec5SDimitry Andric }; 1121*0b57cec5SDimitry Andric 1122*0b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 1123*0b57cec5SDimitry Andric std::numeric_limits<int>::max()) 1124*0b57cec5SDimitry Andric return false; 1125*0b57cec5SDimitry Andric 1126*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 1127*0b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 1128*0b57cec5SDimitry Andric TII->get(AMDGPU::S_WAITCNT_VSCNT)) 1129*0b57cec5SDimitry Andric .addReg(AMDGPU::SGPR_NULL, RegState::Undef) 1130*0b57cec5SDimitry Andric .addImm(0); 1131*0b57cec5SDimitry Andric 1132*0b57cec5SDimitry Andric return true; 1133*0b57cec5SDimitry Andric } 1134*0b57cec5SDimitry Andric 1135*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) { 1136*0b57cec5SDimitry Andric int NSAtoVMEMWaitStates = 1; 1137*0b57cec5SDimitry Andric 1138*0b57cec5SDimitry Andric if (!ST.hasNSAtoVMEMBug()) 1139*0b57cec5SDimitry Andric return 0; 1140*0b57cec5SDimitry Andric 1141*0b57cec5SDimitry Andric if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI)) 1142*0b57cec5SDimitry Andric return 0; 1143*0b57cec5SDimitry Andric 1144*0b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 1145*0b57cec5SDimitry Andric const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset); 1146*0b57cec5SDimitry Andric if (!Offset || (Offset->getImm() & 6) == 0) 1147*0b57cec5SDimitry Andric return 0; 1148*0b57cec5SDimitry Andric 1149*0b57cec5SDimitry Andric auto IsHazardFn = [TII] (MachineInstr *I) { 1150*0b57cec5SDimitry Andric if (!SIInstrInfo::isMIMG(*I)) 1151*0b57cec5SDimitry Andric return false; 1152*0b57cec5SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode()); 1153*0b57cec5SDimitry Andric return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA && 1154*0b57cec5SDimitry Andric TII->getInstSizeInBytes(*I) >= 16; 1155*0b57cec5SDimitry Andric }; 1156*0b57cec5SDimitry Andric 1157*0b57cec5SDimitry Andric return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1); 1158*0b57cec5SDimitry Andric } 1159*0b57cec5SDimitry Andric 1160*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) { 1161*0b57cec5SDimitry Andric int FPAtomicToDenormModeWaitStates = 3; 1162*0b57cec5SDimitry Andric 1163*0b57cec5SDimitry Andric if (MI->getOpcode() != AMDGPU::S_DENORM_MODE) 1164*0b57cec5SDimitry Andric return 0; 1165*0b57cec5SDimitry Andric 1166*0b57cec5SDimitry Andric auto IsHazardFn = [] (MachineInstr *I) { 1167*0b57cec5SDimitry Andric if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I)) 1168*0b57cec5SDimitry Andric return false; 1169*0b57cec5SDimitry Andric return SIInstrInfo::isFPAtomic(*I); 1170*0b57cec5SDimitry Andric }; 1171*0b57cec5SDimitry Andric 1172*0b57cec5SDimitry Andric auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) { 1173*0b57cec5SDimitry Andric if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI)) 1174*0b57cec5SDimitry Andric return true; 1175*0b57cec5SDimitry Andric 1176*0b57cec5SDimitry Andric switch (MI->getOpcode()) { 1177*0b57cec5SDimitry Andric case AMDGPU::S_WAITCNT: 1178*0b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VSCNT: 1179*0b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VMCNT: 1180*0b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT: 1181*0b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT: 1182*0b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_IDLE: 1183*0b57cec5SDimitry Andric return true; 1184*0b57cec5SDimitry Andric default: 1185*0b57cec5SDimitry Andric break; 1186*0b57cec5SDimitry Andric } 1187*0b57cec5SDimitry Andric 1188*0b57cec5SDimitry Andric return false; 1189*0b57cec5SDimitry Andric }; 1190*0b57cec5SDimitry Andric 1191*0b57cec5SDimitry Andric 1192*0b57cec5SDimitry Andric return FPAtomicToDenormModeWaitStates - 1193*0b57cec5SDimitry Andric ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn); 1194*0b57cec5SDimitry Andric } 1195*0b57cec5SDimitry Andric 1196*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { 1197*0b57cec5SDimitry Andric assert(SIInstrInfo::isMAI(*MI)); 1198*0b57cec5SDimitry Andric 1199*0b57cec5SDimitry Andric int WaitStatesNeeded = 0; 1200*0b57cec5SDimitry Andric unsigned Opc = MI->getOpcode(); 1201*0b57cec5SDimitry Andric 1202*0b57cec5SDimitry Andric auto IsVALUFn = [] (MachineInstr *MI) { 1203*0b57cec5SDimitry Andric return SIInstrInfo::isVALU(*MI); 1204*0b57cec5SDimitry Andric }; 1205*0b57cec5SDimitry Andric 1206*0b57cec5SDimitry Andric if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write 1207*0b57cec5SDimitry Andric const int LegacyVALUWritesVGPRWaitStates = 2; 1208*0b57cec5SDimitry Andric const int VALUWritesExecWaitStates = 4; 1209*0b57cec5SDimitry Andric const int MaxWaitStates = 4; 1210*0b57cec5SDimitry Andric 1211*0b57cec5SDimitry Andric int WaitStatesNeededForUse = VALUWritesExecWaitStates - 1212*0b57cec5SDimitry Andric getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates); 1213*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1214*0b57cec5SDimitry Andric 1215*0b57cec5SDimitry Andric if (WaitStatesNeeded < MaxWaitStates) { 1216*0b57cec5SDimitry Andric for (const MachineOperand &Use : MI->explicit_uses()) { 1217*0b57cec5SDimitry Andric const int MaxWaitStates = 2; 1218*0b57cec5SDimitry Andric 1219*0b57cec5SDimitry Andric if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 1220*0b57cec5SDimitry Andric continue; 1221*0b57cec5SDimitry Andric 1222*0b57cec5SDimitry Andric int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates - 1223*0b57cec5SDimitry Andric getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates); 1224*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1225*0b57cec5SDimitry Andric 1226*0b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 1227*0b57cec5SDimitry Andric break; 1228*0b57cec5SDimitry Andric } 1229*0b57cec5SDimitry Andric } 1230*0b57cec5SDimitry Andric } 1231*0b57cec5SDimitry Andric 1232*0b57cec5SDimitry Andric auto IsMFMAFn = [] (MachineInstr *MI) { 1233*0b57cec5SDimitry Andric return SIInstrInfo::isMAI(*MI) && 1234*0b57cec5SDimitry Andric MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 && 1235*0b57cec5SDimitry Andric MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32; 1236*0b57cec5SDimitry Andric }; 1237*0b57cec5SDimitry Andric 1238*0b57cec5SDimitry Andric for (const MachineOperand &Op : MI->explicit_operands()) { 1239*0b57cec5SDimitry Andric if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg())) 1240*0b57cec5SDimitry Andric continue; 1241*0b57cec5SDimitry Andric 1242*0b57cec5SDimitry Andric if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32) 1243*0b57cec5SDimitry Andric continue; 1244*0b57cec5SDimitry Andric 1245*0b57cec5SDimitry Andric const int MFMAWritesAGPROverlappedSrcABWaitStates = 4; 1246*0b57cec5SDimitry Andric const int MFMAWritesAGPROverlappedSrcCWaitStates = 2; 1247*0b57cec5SDimitry Andric const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4; 1248*0b57cec5SDimitry Andric const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10; 1249*0b57cec5SDimitry Andric const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18; 1250*0b57cec5SDimitry Andric const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1; 1251*0b57cec5SDimitry Andric const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7; 1252*0b57cec5SDimitry Andric const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15; 1253*0b57cec5SDimitry Andric const int MaxWaitStates = 18; 1254*0b57cec5SDimitry Andric unsigned Reg = Op.getReg(); 1255*0b57cec5SDimitry Andric unsigned HazardDefLatency = 0; 1256*0b57cec5SDimitry Andric 1257*0b57cec5SDimitry Andric auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this] 1258*0b57cec5SDimitry Andric (MachineInstr *MI) { 1259*0b57cec5SDimitry Andric if (!IsMFMAFn(MI)) 1260*0b57cec5SDimitry Andric return false; 1261*0b57cec5SDimitry Andric unsigned DstReg = MI->getOperand(0).getReg(); 1262*0b57cec5SDimitry Andric if (DstReg == Reg) 1263*0b57cec5SDimitry Andric return false; 1264*0b57cec5SDimitry Andric HazardDefLatency = std::max(HazardDefLatency, 1265*0b57cec5SDimitry Andric TSchedModel.computeInstrLatency(MI)); 1266*0b57cec5SDimitry Andric return TRI.regsOverlap(DstReg, Reg); 1267*0b57cec5SDimitry Andric }; 1268*0b57cec5SDimitry Andric 1269*0b57cec5SDimitry Andric int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, 1270*0b57cec5SDimitry Andric MaxWaitStates); 1271*0b57cec5SDimitry Andric int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates; 1272*0b57cec5SDimitry Andric int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 1273*0b57cec5SDimitry Andric int OpNo = MI->getOperandNo(&Op); 1274*0b57cec5SDimitry Andric if (OpNo == SrcCIdx) { 1275*0b57cec5SDimitry Andric NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates; 1276*0b57cec5SDimitry Andric } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) { 1277*0b57cec5SDimitry Andric switch (HazardDefLatency) { 1278*0b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates; 1279*0b57cec5SDimitry Andric break; 1280*0b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates; 1281*0b57cec5SDimitry Andric break; 1282*0b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 1283*0b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates; 1284*0b57cec5SDimitry Andric break; 1285*0b57cec5SDimitry Andric } 1286*0b57cec5SDimitry Andric } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) { 1287*0b57cec5SDimitry Andric switch (HazardDefLatency) { 1288*0b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates; 1289*0b57cec5SDimitry Andric break; 1290*0b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates; 1291*0b57cec5SDimitry Andric break; 1292*0b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 1293*0b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates; 1294*0b57cec5SDimitry Andric break; 1295*0b57cec5SDimitry Andric } 1296*0b57cec5SDimitry Andric } 1297*0b57cec5SDimitry Andric 1298*0b57cec5SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; 1299*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1300*0b57cec5SDimitry Andric 1301*0b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 1302*0b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 1303*0b57cec5SDimitry Andric 1304*0b57cec5SDimitry Andric auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) { 1305*0b57cec5SDimitry Andric if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32) 1306*0b57cec5SDimitry Andric return false; 1307*0b57cec5SDimitry Andric unsigned DstReg = MI->getOperand(0).getReg(); 1308*0b57cec5SDimitry Andric return TRI.regsOverlap(Reg, DstReg); 1309*0b57cec5SDimitry Andric }; 1310*0b57cec5SDimitry Andric 1311*0b57cec5SDimitry Andric const int AccVGPRWriteMFMAReadSrcCWaitStates = 1; 1312*0b57cec5SDimitry Andric const int AccVGPRWriteMFMAReadSrcABWaitStates = 3; 1313*0b57cec5SDimitry Andric const int AccVGPRWriteAccVgprReadWaitStates = 3; 1314*0b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates; 1315*0b57cec5SDimitry Andric if (OpNo == SrcCIdx) 1316*0b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates; 1317*0b57cec5SDimitry Andric else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) 1318*0b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates; 1319*0b57cec5SDimitry Andric 1320*0b57cec5SDimitry Andric WaitStatesNeededForUse = NeedWaitStates - 1321*0b57cec5SDimitry Andric getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates); 1322*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1323*0b57cec5SDimitry Andric 1324*0b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 1325*0b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 1326*0b57cec5SDimitry Andric } 1327*0b57cec5SDimitry Andric 1328*0b57cec5SDimitry Andric if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) { 1329*0b57cec5SDimitry Andric const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0; 1330*0b57cec5SDimitry Andric const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5; 1331*0b57cec5SDimitry Andric const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13; 1332*0b57cec5SDimitry Andric const int MaxWaitStates = 13; 1333*0b57cec5SDimitry Andric unsigned DstReg = MI->getOperand(0).getReg(); 1334*0b57cec5SDimitry Andric unsigned HazardDefLatency = 0; 1335*0b57cec5SDimitry Andric 1336*0b57cec5SDimitry Andric auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this] 1337*0b57cec5SDimitry Andric (MachineInstr *MI) { 1338*0b57cec5SDimitry Andric if (!IsMFMAFn(MI)) 1339*0b57cec5SDimitry Andric return false; 1340*0b57cec5SDimitry Andric unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg(); 1341*0b57cec5SDimitry Andric HazardDefLatency = std::max(HazardDefLatency, 1342*0b57cec5SDimitry Andric TSchedModel.computeInstrLatency(MI)); 1343*0b57cec5SDimitry Andric return TRI.regsOverlap(Reg, DstReg); 1344*0b57cec5SDimitry Andric }; 1345*0b57cec5SDimitry Andric 1346*0b57cec5SDimitry Andric int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates); 1347*0b57cec5SDimitry Andric int NeedWaitStates; 1348*0b57cec5SDimitry Andric switch (HazardDefLatency) { 1349*0b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates; 1350*0b57cec5SDimitry Andric break; 1351*0b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates; 1352*0b57cec5SDimitry Andric break; 1353*0b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 1354*0b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates; 1355*0b57cec5SDimitry Andric break; 1356*0b57cec5SDimitry Andric } 1357*0b57cec5SDimitry Andric 1358*0b57cec5SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince; 1359*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1360*0b57cec5SDimitry Andric } 1361*0b57cec5SDimitry Andric 1362*0b57cec5SDimitry Andric return WaitStatesNeeded; 1363*0b57cec5SDimitry Andric } 1364*0b57cec5SDimitry Andric 1365*0b57cec5SDimitry Andric int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) { 1366*0b57cec5SDimitry Andric if (!ST.hasMAIInsts()) 1367*0b57cec5SDimitry Andric return 0; 1368*0b57cec5SDimitry Andric 1369*0b57cec5SDimitry Andric int WaitStatesNeeded = 0; 1370*0b57cec5SDimitry Andric 1371*0b57cec5SDimitry Andric auto IsAccVgprReadFn = [] (MachineInstr *MI) { 1372*0b57cec5SDimitry Andric return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32; 1373*0b57cec5SDimitry Andric }; 1374*0b57cec5SDimitry Andric 1375*0b57cec5SDimitry Andric for (const MachineOperand &Op : MI->explicit_uses()) { 1376*0b57cec5SDimitry Andric if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg())) 1377*0b57cec5SDimitry Andric continue; 1378*0b57cec5SDimitry Andric 1379*0b57cec5SDimitry Andric unsigned Reg = Op.getReg(); 1380*0b57cec5SDimitry Andric 1381*0b57cec5SDimitry Andric const int AccVgprReadLdStWaitStates = 2; 1382*0b57cec5SDimitry Andric const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1; 1383*0b57cec5SDimitry Andric const int MaxWaitStates = 2; 1384*0b57cec5SDimitry Andric 1385*0b57cec5SDimitry Andric int WaitStatesNeededForUse = AccVgprReadLdStWaitStates - 1386*0b57cec5SDimitry Andric getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates); 1387*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1388*0b57cec5SDimitry Andric 1389*0b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 1390*0b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 1391*0b57cec5SDimitry Andric 1392*0b57cec5SDimitry Andric auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) { 1393*0b57cec5SDimitry Andric if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32) 1394*0b57cec5SDimitry Andric return false; 1395*0b57cec5SDimitry Andric auto IsVALUFn = [] (MachineInstr *MI) { 1396*0b57cec5SDimitry Andric return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI); 1397*0b57cec5SDimitry Andric }; 1398*0b57cec5SDimitry Andric return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) < 1399*0b57cec5SDimitry Andric std::numeric_limits<int>::max(); 1400*0b57cec5SDimitry Andric }; 1401*0b57cec5SDimitry Andric 1402*0b57cec5SDimitry Andric WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates - 1403*0b57cec5SDimitry Andric getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates); 1404*0b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1405*0b57cec5SDimitry Andric } 1406*0b57cec5SDimitry Andric 1407*0b57cec5SDimitry Andric return WaitStatesNeeded; 1408*0b57cec5SDimitry Andric } 1409