10b57cec5SDimitry Andric //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements hazard recognizers for scheduling on GCN processors. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "GCNHazardRecognizer.h" 14*e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 150b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 160b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 170b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h" 18*e8d8bef9SDimitry Andric #include "llvm/Support/TargetParser.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric using namespace llvm; 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 230b57cec5SDimitry Andric // Hazard Recoginizer Implementation 240b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : 270b57cec5SDimitry Andric IsHazardRecognizerMode(false), 280b57cec5SDimitry Andric CurrCycleInstr(nullptr), 290b57cec5SDimitry Andric MF(MF), 300b57cec5SDimitry Andric ST(MF.getSubtarget<GCNSubtarget>()), 310b57cec5SDimitry Andric TII(*ST.getInstrInfo()), 320b57cec5SDimitry Andric TRI(TII.getRegisterInfo()), 330b57cec5SDimitry Andric ClauseUses(TRI.getNumRegUnits()), 340b57cec5SDimitry Andric ClauseDefs(TRI.getNumRegUnits()) { 350b57cec5SDimitry Andric MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5; 360b57cec5SDimitry Andric TSchedModel.init(&ST); 370b57cec5SDimitry Andric } 380b57cec5SDimitry Andric 39*e8d8bef9SDimitry Andric void GCNHazardRecognizer::Reset() { 40*e8d8bef9SDimitry Andric EmittedInstrs.clear(); 41*e8d8bef9SDimitry Andric } 42*e8d8bef9SDimitry Andric 430b57cec5SDimitry Andric void GCNHazardRecognizer::EmitInstruction(SUnit *SU) { 440b57cec5SDimitry Andric EmitInstruction(SU->getInstr()); 450b57cec5SDimitry Andric } 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { 480b57cec5SDimitry Andric CurrCycleInstr = MI; 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric static bool isDivFMas(unsigned Opcode) { 52*e8d8bef9SDimitry Andric return Opcode == AMDGPU::V_DIV_FMAS_F32_e64 || Opcode == AMDGPU::V_DIV_FMAS_F64_e64; 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric static bool isSGetReg(unsigned Opcode) { 560b57cec5SDimitry Andric return Opcode == AMDGPU::S_GETREG_B32; 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric static bool isSSetReg(unsigned Opcode) { 60*e8d8bef9SDimitry Andric switch (Opcode) { 61*e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_B32: 62*e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_B32_mode: 63*e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_IMM32_B32: 64*e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_IMM32_B32_mode: 65*e8d8bef9SDimitry Andric return true; 66*e8d8bef9SDimitry Andric } 67*e8d8bef9SDimitry Andric return false; 680b57cec5SDimitry Andric } 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric static bool isRWLane(unsigned Opcode) { 710b57cec5SDimitry Andric return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32; 720b57cec5SDimitry Andric } 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric static bool isRFE(unsigned Opcode) { 750b57cec5SDimitry Andric return Opcode == AMDGPU::S_RFE_B64; 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric static bool isSMovRel(unsigned Opcode) { 790b57cec5SDimitry Andric switch (Opcode) { 800b57cec5SDimitry Andric case AMDGPU::S_MOVRELS_B32: 810b57cec5SDimitry Andric case AMDGPU::S_MOVRELS_B64: 820b57cec5SDimitry Andric case AMDGPU::S_MOVRELD_B32: 830b57cec5SDimitry Andric case AMDGPU::S_MOVRELD_B64: 840b57cec5SDimitry Andric return true; 850b57cec5SDimitry Andric default: 860b57cec5SDimitry Andric return false; 870b57cec5SDimitry Andric } 880b57cec5SDimitry Andric } 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, 910b57cec5SDimitry Andric const MachineInstr &MI) { 920b57cec5SDimitry Andric if (TII.isAlwaysGDS(MI.getOpcode())) 930b57cec5SDimitry Andric return true; 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric switch (MI.getOpcode()) { 960b57cec5SDimitry Andric case AMDGPU::S_SENDMSG: 970b57cec5SDimitry Andric case AMDGPU::S_SENDMSGHALT: 980b57cec5SDimitry Andric case AMDGPU::S_TTRACEDATA: 990b57cec5SDimitry Andric return true; 1000b57cec5SDimitry Andric // These DS opcodes don't support GDS. 1010b57cec5SDimitry Andric case AMDGPU::DS_NOP: 1020b57cec5SDimitry Andric case AMDGPU::DS_PERMUTE_B32: 1030b57cec5SDimitry Andric case AMDGPU::DS_BPERMUTE_B32: 1040b57cec5SDimitry Andric return false; 1050b57cec5SDimitry Andric default: 1060b57cec5SDimitry Andric if (TII.isDS(MI.getOpcode())) { 1070b57cec5SDimitry Andric int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 1080b57cec5SDimitry Andric AMDGPU::OpName::gds); 1090b57cec5SDimitry Andric if (MI.getOperand(GDS).getImm()) 1100b57cec5SDimitry Andric return true; 1110b57cec5SDimitry Andric } 1120b57cec5SDimitry Andric return false; 1130b57cec5SDimitry Andric } 1140b57cec5SDimitry Andric } 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric static bool isPermlane(const MachineInstr &MI) { 1170b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode(); 118*e8d8bef9SDimitry Andric return Opcode == AMDGPU::V_PERMLANE16_B32_e64 || 119*e8d8bef9SDimitry Andric Opcode == AMDGPU::V_PERMLANEX16_B32_e64; 1200b57cec5SDimitry Andric } 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { 1230b57cec5SDimitry Andric const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, 1240b57cec5SDimitry Andric AMDGPU::OpName::simm16); 1250b57cec5SDimitry Andric return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_; 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric ScheduleHazardRecognizer::HazardType 1290b57cec5SDimitry Andric GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 1300b57cec5SDimitry Andric MachineInstr *MI = SU->getInstr(); 131*e8d8bef9SDimitry Andric // If we are not in "HazardRecognizerMode" and therefore not being run from 132*e8d8bef9SDimitry Andric // the scheduler, track possible stalls from hazards but don't insert noops. 133*e8d8bef9SDimitry Andric auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard; 134*e8d8bef9SDimitry Andric 1350b57cec5SDimitry Andric if (MI->isBundle()) 1360b57cec5SDimitry Andric return NoHazard; 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) 139*e8d8bef9SDimitry Andric return HazardType; 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric // FIXME: Should flat be considered vmem? 1420b57cec5SDimitry Andric if ((SIInstrInfo::isVMEM(*MI) || 1430b57cec5SDimitry Andric SIInstrInfo::isFLAT(*MI)) 1440b57cec5SDimitry Andric && checkVMEMHazards(MI) > 0) 145*e8d8bef9SDimitry Andric return HazardType; 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0) 148*e8d8bef9SDimitry Andric return HazardType; 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric if (checkFPAtomicToDenormModeHazard(MI) > 0) 151*e8d8bef9SDimitry Andric return HazardType; 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric if (ST.hasNoDataDepHazard()) 1540b57cec5SDimitry Andric return NoHazard; 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) 157*e8d8bef9SDimitry Andric return HazardType; 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) 160*e8d8bef9SDimitry Andric return HazardType; 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) 163*e8d8bef9SDimitry Andric return HazardType; 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0) 166*e8d8bef9SDimitry Andric return HazardType; 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) 169*e8d8bef9SDimitry Andric return HazardType; 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0) 172*e8d8bef9SDimitry Andric return HazardType; 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) 175*e8d8bef9SDimitry Andric return HazardType; 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric if (ST.hasReadM0MovRelInterpHazard() && 1780b57cec5SDimitry Andric (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) && 1790b57cec5SDimitry Andric checkReadM0Hazards(MI) > 0) 180*e8d8bef9SDimitry Andric return HazardType; 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) && 1830b57cec5SDimitry Andric checkReadM0Hazards(MI) > 0) 184*e8d8bef9SDimitry Andric return HazardType; 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0) 187*e8d8bef9SDimitry Andric return HazardType; 1880b57cec5SDimitry Andric 189*e8d8bef9SDimitry Andric if ((SIInstrInfo::isVMEM(*MI) || 190*e8d8bef9SDimitry Andric SIInstrInfo::isFLAT(*MI) || 191*e8d8bef9SDimitry Andric SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0) 192*e8d8bef9SDimitry Andric return HazardType; 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) 195*e8d8bef9SDimitry Andric return HazardType; 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric return NoHazard; 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric 200*e8d8bef9SDimitry Andric static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII, 201*e8d8bef9SDimitry Andric unsigned Quantity) { 202*e8d8bef9SDimitry Andric while (Quantity > 0) { 203*e8d8bef9SDimitry Andric unsigned Arg = std::min(Quantity, 8u); 204*e8d8bef9SDimitry Andric Quantity -= Arg; 2050b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP)) 206*e8d8bef9SDimitry Andric .addImm(Arg - 1); 207*e8d8bef9SDimitry Andric } 2080b57cec5SDimitry Andric } 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric void GCNHazardRecognizer::processBundle() { 2110b57cec5SDimitry Andric MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator()); 2120b57cec5SDimitry Andric MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end(); 2130b57cec5SDimitry Andric // Check bundled MachineInstr's for hazards. 2140b57cec5SDimitry Andric for (; MI != E && MI->isInsideBundle(); ++MI) { 2150b57cec5SDimitry Andric CurrCycleInstr = &*MI; 2160b57cec5SDimitry Andric unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr); 2170b57cec5SDimitry Andric 218*e8d8bef9SDimitry Andric if (IsHazardRecognizerMode) { 2190b57cec5SDimitry Andric fixHazards(CurrCycleInstr); 2200b57cec5SDimitry Andric 221*e8d8bef9SDimitry Andric insertNoopsInBundle(CurrCycleInstr, TII, WaitStates); 222*e8d8bef9SDimitry Andric } 2230b57cec5SDimitry Andric 2240b57cec5SDimitry Andric // It’s unnecessary to track more than MaxLookAhead instructions. Since we 2250b57cec5SDimitry Andric // include the bundled MI directly after, only add a maximum of 2260b57cec5SDimitry Andric // (MaxLookAhead - 1) noops to EmittedInstrs. 2270b57cec5SDimitry Andric for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i) 2280b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric EmittedInstrs.push_front(CurrCycleInstr); 2310b57cec5SDimitry Andric EmittedInstrs.resize(MaxLookAhead); 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric CurrCycleInstr = nullptr; 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { 2370b57cec5SDimitry Andric IsHazardRecognizerMode = true; 2380b57cec5SDimitry Andric CurrCycleInstr = MI; 2390b57cec5SDimitry Andric unsigned W = PreEmitNoopsCommon(MI); 2400b57cec5SDimitry Andric fixHazards(MI); 2410b57cec5SDimitry Andric CurrCycleInstr = nullptr; 2420b57cec5SDimitry Andric return W; 2430b57cec5SDimitry Andric } 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { 2460b57cec5SDimitry Andric if (MI->isBundle()) 2470b57cec5SDimitry Andric return 0; 2480b57cec5SDimitry Andric 249*e8d8bef9SDimitry Andric int WaitStates = 0; 2500b57cec5SDimitry Andric 2510b57cec5SDimitry Andric if (SIInstrInfo::isSMRD(*MI)) 2520b57cec5SDimitry Andric return std::max(WaitStates, checkSMRDHazards(MI)); 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI)) 2550b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric if (ST.hasNSAtoVMEMBug()) 2580b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI)); 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI)); 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric if (ST.hasNoDataDepHazard()) 2630b57cec5SDimitry Andric return WaitStates; 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*MI)) 2660b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkVALUHazards(MI)); 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andric if (SIInstrInfo::isDPP(*MI)) 2690b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkDPPHazards(MI)); 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andric if (isDivFMas(MI->getOpcode())) 2720b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric if (isRWLane(MI->getOpcode())) 2750b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); 2760b57cec5SDimitry Andric 2770b57cec5SDimitry Andric if (MI->isInlineAsm()) 2780b57cec5SDimitry Andric return std::max(WaitStates, checkInlineAsmHazards(MI)); 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric if (isSGetReg(MI->getOpcode())) 2810b57cec5SDimitry Andric return std::max(WaitStates, checkGetRegHazards(MI)); 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric if (isSSetReg(MI->getOpcode())) 2840b57cec5SDimitry Andric return std::max(WaitStates, checkSetRegHazards(MI)); 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric if (isRFE(MI->getOpcode())) 2870b57cec5SDimitry Andric return std::max(WaitStates, checkRFEHazards(MI)); 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) || 2900b57cec5SDimitry Andric isSMovRel(MI->getOpcode()))) 2910b57cec5SDimitry Andric return std::max(WaitStates, checkReadM0Hazards(MI)); 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) 2940b57cec5SDimitry Andric return std::max(WaitStates, checkReadM0Hazards(MI)); 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric if (SIInstrInfo::isMAI(*MI)) 2970b57cec5SDimitry Andric return std::max(WaitStates, checkMAIHazards(MI)); 2980b57cec5SDimitry Andric 299*e8d8bef9SDimitry Andric if (SIInstrInfo::isVMEM(*MI) || 300*e8d8bef9SDimitry Andric SIInstrInfo::isFLAT(*MI) || 301*e8d8bef9SDimitry Andric SIInstrInfo::isDS(*MI)) 3020b57cec5SDimitry Andric return std::max(WaitStates, checkMAILdStHazards(MI)); 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric return WaitStates; 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric void GCNHazardRecognizer::EmitNoop() { 3080b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 3090b57cec5SDimitry Andric } 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric void GCNHazardRecognizer::AdvanceCycle() { 3120b57cec5SDimitry Andric // When the scheduler detects a stall, it will call AdvanceCycle() without 3130b57cec5SDimitry Andric // emitting any instructions. 314*e8d8bef9SDimitry Andric if (!CurrCycleInstr) { 315*e8d8bef9SDimitry Andric EmittedInstrs.push_front(nullptr); 3160b57cec5SDimitry Andric return; 317*e8d8bef9SDimitry Andric } 3180b57cec5SDimitry Andric 3190b57cec5SDimitry Andric // Do not track non-instructions which do not affect the wait states. 3200b57cec5SDimitry Andric // If included, these instructions can lead to buffer overflow such that 3210b57cec5SDimitry Andric // detectable hazards are missed. 3220b57cec5SDimitry Andric if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() || 323*e8d8bef9SDimitry Andric CurrCycleInstr->isKill()) { 324*e8d8bef9SDimitry Andric CurrCycleInstr = nullptr; 3250b57cec5SDimitry Andric return; 326*e8d8bef9SDimitry Andric } 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric if (CurrCycleInstr->isBundle()) { 3290b57cec5SDimitry Andric processBundle(); 3300b57cec5SDimitry Andric return; 3310b57cec5SDimitry Andric } 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr); 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andric // Keep track of emitted instructions 3360b57cec5SDimitry Andric EmittedInstrs.push_front(CurrCycleInstr); 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric // Add a nullptr for each additional wait state after the first. Make sure 3390b57cec5SDimitry Andric // not to add more than getMaxLookAhead() items to the list, since we 3400b57cec5SDimitry Andric // truncate the list to that size right after this loop. 3410b57cec5SDimitry Andric for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); 3420b57cec5SDimitry Andric i < e; ++i) { 3430b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 3440b57cec5SDimitry Andric } 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric // getMaxLookahead() is the largest number of wait states we will ever need 3470b57cec5SDimitry Andric // to insert, so there is no point in keeping track of more than that many 3480b57cec5SDimitry Andric // wait states. 3490b57cec5SDimitry Andric EmittedInstrs.resize(getMaxLookAhead()); 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric CurrCycleInstr = nullptr; 3520b57cec5SDimitry Andric } 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric void GCNHazardRecognizer::RecedeCycle() { 3550b57cec5SDimitry Andric llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); 3560b57cec5SDimitry Andric } 3570b57cec5SDimitry Andric 3580b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 3590b57cec5SDimitry Andric // Helper Functions 3600b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn; 3630b57cec5SDimitry Andric 3640b57cec5SDimitry Andric // Returns a minimum wait states since \p I walking all predecessors. 3650b57cec5SDimitry Andric // Only scans until \p IsExpired does not return true. 3660b57cec5SDimitry Andric // Can only be run in a hazard recognizer mode. 3670b57cec5SDimitry Andric static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, 3680b57cec5SDimitry Andric MachineBasicBlock *MBB, 3690b57cec5SDimitry Andric MachineBasicBlock::reverse_instr_iterator I, 3700b57cec5SDimitry Andric int WaitStates, 3710b57cec5SDimitry Andric IsExpiredFn IsExpired, 3720b57cec5SDimitry Andric DenseSet<const MachineBasicBlock *> &Visited) { 3730b57cec5SDimitry Andric for (auto E = MBB->instr_rend(); I != E; ++I) { 3740b57cec5SDimitry Andric // Don't add WaitStates for parent BUNDLE instructions. 3750b57cec5SDimitry Andric if (I->isBundle()) 3760b57cec5SDimitry Andric continue; 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andric if (IsHazard(&*I)) 3790b57cec5SDimitry Andric return WaitStates; 3800b57cec5SDimitry Andric 381*e8d8bef9SDimitry Andric if (I->isInlineAsm() || I->isMetaInstruction()) 3820b57cec5SDimitry Andric continue; 3830b57cec5SDimitry Andric 3840b57cec5SDimitry Andric WaitStates += SIInstrInfo::getNumWaitStates(*I); 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric if (IsExpired(&*I, WaitStates)) 3870b57cec5SDimitry Andric return std::numeric_limits<int>::max(); 3880b57cec5SDimitry Andric } 3890b57cec5SDimitry Andric 3900b57cec5SDimitry Andric int MinWaitStates = WaitStates; 3910b57cec5SDimitry Andric bool Found = false; 3920b57cec5SDimitry Andric for (MachineBasicBlock *Pred : MBB->predecessors()) { 3930b57cec5SDimitry Andric if (!Visited.insert(Pred).second) 3940b57cec5SDimitry Andric continue; 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), 3970b57cec5SDimitry Andric WaitStates, IsExpired, Visited); 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric if (W == std::numeric_limits<int>::max()) 4000b57cec5SDimitry Andric continue; 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andric MinWaitStates = Found ? std::min(MinWaitStates, W) : W; 4030b57cec5SDimitry Andric if (IsExpired(nullptr, MinWaitStates)) 4040b57cec5SDimitry Andric return MinWaitStates; 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric Found = true; 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric if (Found) 4100b57cec5SDimitry Andric return MinWaitStates; 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric return std::numeric_limits<int>::max(); 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, 4160b57cec5SDimitry Andric MachineInstr *MI, 4170b57cec5SDimitry Andric IsExpiredFn IsExpired) { 4180b57cec5SDimitry Andric DenseSet<const MachineBasicBlock *> Visited; 4190b57cec5SDimitry Andric return getWaitStatesSince(IsHazard, MI->getParent(), 4200b57cec5SDimitry Andric std::next(MI->getReverseIterator()), 4210b57cec5SDimitry Andric 0, IsExpired, Visited); 4220b57cec5SDimitry Andric } 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) { 4250b57cec5SDimitry Andric if (IsHazardRecognizerMode) { 4260b57cec5SDimitry Andric auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) { 4270b57cec5SDimitry Andric return WaitStates >= Limit; 4280b57cec5SDimitry Andric }; 4290b57cec5SDimitry Andric return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn); 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric int WaitStates = 0; 4330b57cec5SDimitry Andric for (MachineInstr *MI : EmittedInstrs) { 4340b57cec5SDimitry Andric if (MI) { 4350b57cec5SDimitry Andric if (IsHazard(MI)) 4360b57cec5SDimitry Andric return WaitStates; 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric if (MI->isInlineAsm()) 4390b57cec5SDimitry Andric continue; 4400b57cec5SDimitry Andric } 4410b57cec5SDimitry Andric ++WaitStates; 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andric if (WaitStates >= Limit) 4440b57cec5SDimitry Andric break; 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric return std::numeric_limits<int>::max(); 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg, 4500b57cec5SDimitry Andric IsHazardFn IsHazardDef, 4510b57cec5SDimitry Andric int Limit) { 4520b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) { 4550b57cec5SDimitry Andric return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI); 4560b57cec5SDimitry Andric }; 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric return getWaitStatesSince(IsHazardFn, Limit); 4590b57cec5SDimitry Andric } 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard, 4620b57cec5SDimitry Andric int Limit) { 4630b57cec5SDimitry Andric auto IsHazardFn = [IsHazard] (MachineInstr *MI) { 4640b57cec5SDimitry Andric return isSSetReg(MI->getOpcode()) && IsHazard(MI); 4650b57cec5SDimitry Andric }; 4660b57cec5SDimitry Andric 4670b57cec5SDimitry Andric return getWaitStatesSince(IsHazardFn, Limit); 4680b57cec5SDimitry Andric } 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 4710b57cec5SDimitry Andric // No-op Hazard Detection 4720b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 4730b57cec5SDimitry Andric 474*e8d8bef9SDimitry Andric static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV, 475*e8d8bef9SDimitry Andric MCRegister Reg) { 4760b57cec5SDimitry Andric for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) 4770b57cec5SDimitry Andric BV.set(*RUI); 4780b57cec5SDimitry Andric } 4790b57cec5SDimitry Andric 4800b57cec5SDimitry Andric static void addRegsToSet(const SIRegisterInfo &TRI, 4810b57cec5SDimitry Andric iterator_range<MachineInstr::const_mop_iterator> Ops, 4820b57cec5SDimitry Andric BitVector &Set) { 4830b57cec5SDimitry Andric for (const MachineOperand &Op : Ops) { 4840b57cec5SDimitry Andric if (Op.isReg()) 485*e8d8bef9SDimitry Andric addRegUnits(TRI, Set, Op.getReg().asMCReg()); 4860b57cec5SDimitry Andric } 4870b57cec5SDimitry Andric } 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andric void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) { 4900b57cec5SDimitry Andric // XXX: Do we need to worry about implicit operands 4910b57cec5SDimitry Andric addRegsToSet(TRI, MI.defs(), ClauseDefs); 4920b57cec5SDimitry Andric addRegsToSet(TRI, MI.uses(), ClauseUses); 4930b57cec5SDimitry Andric } 4940b57cec5SDimitry Andric 4955ffd83dbSDimitry Andric static bool breaksSMEMSoftClause(MachineInstr *MI) { 4965ffd83dbSDimitry Andric return !SIInstrInfo::isSMRD(*MI); 4975ffd83dbSDimitry Andric } 4985ffd83dbSDimitry Andric 4995ffd83dbSDimitry Andric static bool breaksVMEMSoftClause(MachineInstr *MI) { 5005ffd83dbSDimitry Andric return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI); 5015ffd83dbSDimitry Andric } 5025ffd83dbSDimitry Andric 5030b57cec5SDimitry Andric int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { 5040b57cec5SDimitry Andric // SMEM soft clause are only present on VI+, and only matter if xnack is 5050b57cec5SDimitry Andric // enabled. 5060b57cec5SDimitry Andric if (!ST.isXNACKEnabled()) 5070b57cec5SDimitry Andric return 0; 5080b57cec5SDimitry Andric 5090b57cec5SDimitry Andric bool IsSMRD = TII.isSMRD(*MEM); 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric resetClause(); 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric // A soft-clause is any group of consecutive SMEM instructions. The 5140b57cec5SDimitry Andric // instructions in this group may return out of order and/or may be 5150b57cec5SDimitry Andric // replayed (i.e. the same instruction issued more than once). 5160b57cec5SDimitry Andric // 5170b57cec5SDimitry Andric // In order to handle these situations correctly we need to make sure that 5180b57cec5SDimitry Andric // when a clause has more than one instruction, no instruction in the clause 5190b57cec5SDimitry Andric // writes to a register that is read by another instruction in the clause 5200b57cec5SDimitry Andric // (including itself). If we encounter this situaion, we need to break the 5210b57cec5SDimitry Andric // clause by inserting a non SMEM instruction. 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric for (MachineInstr *MI : EmittedInstrs) { 5240b57cec5SDimitry Andric // When we hit a non-SMEM instruction then we have passed the start of the 5250b57cec5SDimitry Andric // clause and we can stop. 5260b57cec5SDimitry Andric if (!MI) 5270b57cec5SDimitry Andric break; 5280b57cec5SDimitry Andric 5295ffd83dbSDimitry Andric if (IsSMRD ? breaksSMEMSoftClause(MI) : breaksVMEMSoftClause(MI)) 5300b57cec5SDimitry Andric break; 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric addClauseInst(*MI); 5330b57cec5SDimitry Andric } 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric if (ClauseDefs.none()) 5360b57cec5SDimitry Andric return 0; 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric // We need to make sure not to put loads and stores in the same clause if they 5390b57cec5SDimitry Andric // use the same address. For now, just start a new clause whenever we see a 5400b57cec5SDimitry Andric // store. 5410b57cec5SDimitry Andric if (MEM->mayStore()) 5420b57cec5SDimitry Andric return 1; 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric addClauseInst(*MEM); 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric // If the set of defs and uses intersect then we cannot add this instruction 5470b57cec5SDimitry Andric // to the clause, so we have a hazard. 5480b57cec5SDimitry Andric return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0; 5490b57cec5SDimitry Andric } 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { 5520b57cec5SDimitry Andric int WaitStatesNeeded = 0; 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric WaitStatesNeeded = checkSoftClauseHazards(SMRD); 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric // This SMRD hazard only affects SI. 5570b57cec5SDimitry Andric if (!ST.hasSMRDReadVALUDefHazard()) 5580b57cec5SDimitry Andric return WaitStatesNeeded; 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric // A read of an SGPR by SMRD instruction requires 4 wait states when the 5610b57cec5SDimitry Andric // SGPR was written by a VALU instruction. 5620b57cec5SDimitry Andric int SmrdSgprWaitStates = 4; 5630b57cec5SDimitry Andric auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; 5640b57cec5SDimitry Andric auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); }; 5650b57cec5SDimitry Andric 5660b57cec5SDimitry Andric bool IsBufferSMRD = TII.isBufferSMRD(*SMRD); 5670b57cec5SDimitry Andric 5680b57cec5SDimitry Andric for (const MachineOperand &Use : SMRD->uses()) { 5690b57cec5SDimitry Andric if (!Use.isReg()) 5700b57cec5SDimitry Andric continue; 5710b57cec5SDimitry Andric int WaitStatesNeededForUse = 5720b57cec5SDimitry Andric SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, 5730b57cec5SDimitry Andric SmrdSgprWaitStates); 5740b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 5750b57cec5SDimitry Andric 5760b57cec5SDimitry Andric // This fixes what appears to be undocumented hardware behavior in SI where 5770b57cec5SDimitry Andric // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor 5780b57cec5SDimitry Andric // needs some number of nops in between. We don't know how many we need, but 5790b57cec5SDimitry Andric // let's use 4. This wasn't discovered before probably because the only 5800b57cec5SDimitry Andric // case when this happens is when we expand a 64-bit pointer into a full 5810b57cec5SDimitry Andric // descriptor and use s_buffer_load_dword instead of s_load_dword, which was 5820b57cec5SDimitry Andric // probably never encountered in the closed-source land. 5830b57cec5SDimitry Andric if (IsBufferSMRD) { 5840b57cec5SDimitry Andric int WaitStatesNeededForUse = 5850b57cec5SDimitry Andric SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), 5860b57cec5SDimitry Andric IsBufferHazardDefFn, 5870b57cec5SDimitry Andric SmrdSgprWaitStates); 5880b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 5890b57cec5SDimitry Andric } 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric return WaitStatesNeeded; 5930b57cec5SDimitry Andric } 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { 5960b57cec5SDimitry Andric if (!ST.hasVMEMReadSGPRVALUDefHazard()) 5970b57cec5SDimitry Andric return 0; 5980b57cec5SDimitry Andric 5990b57cec5SDimitry Andric int WaitStatesNeeded = checkSoftClauseHazards(VMEM); 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andric // A read of an SGPR by a VMEM instruction requires 5 wait states when the 6020b57cec5SDimitry Andric // SGPR was written by a VALU Instruction. 6030b57cec5SDimitry Andric const int VmemSgprWaitStates = 5; 6040b57cec5SDimitry Andric auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; 6050b57cec5SDimitry Andric for (const MachineOperand &Use : VMEM->uses()) { 6060b57cec5SDimitry Andric if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 6070b57cec5SDimitry Andric continue; 6080b57cec5SDimitry Andric 6090b57cec5SDimitry Andric int WaitStatesNeededForUse = 6100b57cec5SDimitry Andric VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, 6110b57cec5SDimitry Andric VmemSgprWaitStates); 6120b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 6130b57cec5SDimitry Andric } 6140b57cec5SDimitry Andric return WaitStatesNeeded; 6150b57cec5SDimitry Andric } 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { 6180b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 6190b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 6200b57cec5SDimitry Andric 6210b57cec5SDimitry Andric // Check for DPP VGPR read after VALU VGPR write and EXEC write. 6220b57cec5SDimitry Andric int DppVgprWaitStates = 2; 6230b57cec5SDimitry Andric int DppExecWaitStates = 5; 6240b57cec5SDimitry Andric int WaitStatesNeeded = 0; 6250b57cec5SDimitry Andric auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 6260b57cec5SDimitry Andric 6270b57cec5SDimitry Andric for (const MachineOperand &Use : DPP->uses()) { 6280b57cec5SDimitry Andric if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 6290b57cec5SDimitry Andric continue; 6300b57cec5SDimitry Andric int WaitStatesNeededForUse = 6310b57cec5SDimitry Andric DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(), 6320b57cec5SDimitry Andric [](MachineInstr *) { return true; }, 6330b57cec5SDimitry Andric DppVgprWaitStates); 6340b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 6350b57cec5SDimitry Andric } 6360b57cec5SDimitry Andric 6370b57cec5SDimitry Andric WaitStatesNeeded = std::max( 6380b57cec5SDimitry Andric WaitStatesNeeded, 6390b57cec5SDimitry Andric DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn, 6400b57cec5SDimitry Andric DppExecWaitStates)); 6410b57cec5SDimitry Andric 6420b57cec5SDimitry Andric return WaitStatesNeeded; 6430b57cec5SDimitry Andric } 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andric int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { 6460b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 6470b57cec5SDimitry Andric 6480b57cec5SDimitry Andric // v_div_fmas requires 4 wait states after a write to vcc from a VALU 6490b57cec5SDimitry Andric // instruction. 6500b57cec5SDimitry Andric const int DivFMasWaitStates = 4; 6510b57cec5SDimitry Andric auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 6520b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn, 6530b57cec5SDimitry Andric DivFMasWaitStates); 6540b57cec5SDimitry Andric 6550b57cec5SDimitry Andric return DivFMasWaitStates - WaitStatesNeeded; 6560b57cec5SDimitry Andric } 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) { 6590b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 6600b57cec5SDimitry Andric unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr); 6610b57cec5SDimitry Andric 6620b57cec5SDimitry Andric const int GetRegWaitStates = 2; 6630b57cec5SDimitry Andric auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) { 6640b57cec5SDimitry Andric return GetRegHWReg == getHWReg(TII, *MI); 6650b57cec5SDimitry Andric }; 6660b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates); 6670b57cec5SDimitry Andric 6680b57cec5SDimitry Andric return GetRegWaitStates - WaitStatesNeeded; 6690b57cec5SDimitry Andric } 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) { 6720b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 6730b57cec5SDimitry Andric unsigned HWReg = getHWReg(TII, *SetRegInstr); 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric const int SetRegWaitStates = ST.getSetRegWaitStates(); 6760b57cec5SDimitry Andric auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) { 6770b57cec5SDimitry Andric return HWReg == getHWReg(TII, *MI); 6780b57cec5SDimitry Andric }; 6790b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates); 6800b57cec5SDimitry Andric return SetRegWaitStates - WaitStatesNeeded; 6810b57cec5SDimitry Andric } 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) { 6840b57cec5SDimitry Andric if (!MI.mayStore()) 6850b57cec5SDimitry Andric return -1; 6860b57cec5SDimitry Andric 6870b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 6880b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode(); 6890b57cec5SDimitry Andric const MCInstrDesc &Desc = MI.getDesc(); 6900b57cec5SDimitry Andric 6910b57cec5SDimitry Andric int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 6920b57cec5SDimitry Andric int VDataRCID = -1; 6930b57cec5SDimitry Andric if (VDataIdx != -1) 6940b57cec5SDimitry Andric VDataRCID = Desc.OpInfo[VDataIdx].RegClass; 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) { 6970b57cec5SDimitry Andric // There is no hazard if the instruction does not use vector regs 6980b57cec5SDimitry Andric // (like wbinvl1) 6990b57cec5SDimitry Andric if (VDataIdx == -1) 7000b57cec5SDimitry Andric return -1; 7010b57cec5SDimitry Andric // For MUBUF/MTBUF instructions this hazard only exists if the 7020b57cec5SDimitry Andric // instruction is not using a register in the soffset field. 7030b57cec5SDimitry Andric const MachineOperand *SOffset = 7040b57cec5SDimitry Andric TII->getNamedOperand(MI, AMDGPU::OpName::soffset); 7050b57cec5SDimitry Andric // If we have no soffset operand, then assume this field has been 7060b57cec5SDimitry Andric // hardcoded to zero. 7070b57cec5SDimitry Andric if (AMDGPU::getRegBitWidth(VDataRCID) > 64 && 7080b57cec5SDimitry Andric (!SOffset || !SOffset->isReg())) 7090b57cec5SDimitry Andric return VDataIdx; 7100b57cec5SDimitry Andric } 7110b57cec5SDimitry Andric 7120b57cec5SDimitry Andric // MIMG instructions create a hazard if they don't use a 256-bit T# and 7130b57cec5SDimitry Andric // the store size is greater than 8 bytes and they have more than two bits 7140b57cec5SDimitry Andric // of their dmask set. 7150b57cec5SDimitry Andric // All our MIMG definitions use a 256-bit T#, so we can skip checking for them. 7160b57cec5SDimitry Andric if (TII->isMIMG(MI)) { 7170b57cec5SDimitry Andric int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); 7180b57cec5SDimitry Andric assert(SRsrcIdx != -1 && 7190b57cec5SDimitry Andric AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256); 7200b57cec5SDimitry Andric (void)SRsrcIdx; 7210b57cec5SDimitry Andric } 7220b57cec5SDimitry Andric 7230b57cec5SDimitry Andric if (TII->isFLAT(MI)) { 7240b57cec5SDimitry Andric int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 7250b57cec5SDimitry Andric if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64) 7260b57cec5SDimitry Andric return DataIdx; 7270b57cec5SDimitry Andric } 7280b57cec5SDimitry Andric 7290b57cec5SDimitry Andric return -1; 7300b57cec5SDimitry Andric } 7310b57cec5SDimitry Andric 732*e8d8bef9SDimitry Andric int 733*e8d8bef9SDimitry Andric GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, 7340b57cec5SDimitry Andric const MachineRegisterInfo &MRI) { 7350b57cec5SDimitry Andric // Helper to check for the hazard where VMEM instructions that store more than 7360b57cec5SDimitry Andric // 8 bytes can have there store data over written by the next instruction. 7370b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric const int VALUWaitStates = 1; 7400b57cec5SDimitry Andric int WaitStatesNeeded = 0; 7410b57cec5SDimitry Andric 7420b57cec5SDimitry Andric if (!TRI->isVGPR(MRI, Def.getReg())) 7430b57cec5SDimitry Andric return WaitStatesNeeded; 7448bcb0991SDimitry Andric Register Reg = Def.getReg(); 7450b57cec5SDimitry Andric auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { 7460b57cec5SDimitry Andric int DataIdx = createsVALUHazard(*MI); 7470b57cec5SDimitry Andric return DataIdx >= 0 && 7480b57cec5SDimitry Andric TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); 7490b57cec5SDimitry Andric }; 7500b57cec5SDimitry Andric int WaitStatesNeededForDef = 7510b57cec5SDimitry Andric VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates); 7520b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 7530b57cec5SDimitry Andric 7540b57cec5SDimitry Andric return WaitStatesNeeded; 7550b57cec5SDimitry Andric } 7560b57cec5SDimitry Andric 7570b57cec5SDimitry Andric int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { 7580b57cec5SDimitry Andric // This checks for the hazard where VMEM instructions that store more than 7590b57cec5SDimitry Andric // 8 bytes can have there store data over written by the next instruction. 7600b57cec5SDimitry Andric if (!ST.has12DWordStoreHazard()) 7610b57cec5SDimitry Andric return 0; 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 7640b57cec5SDimitry Andric int WaitStatesNeeded = 0; 7650b57cec5SDimitry Andric 7660b57cec5SDimitry Andric for (const MachineOperand &Def : VALU->defs()) { 7670b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI)); 7680b57cec5SDimitry Andric } 7690b57cec5SDimitry Andric 7700b57cec5SDimitry Andric return WaitStatesNeeded; 7710b57cec5SDimitry Andric } 7720b57cec5SDimitry Andric 7730b57cec5SDimitry Andric int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) { 7740b57cec5SDimitry Andric // This checks for hazards associated with inline asm statements. 7750b57cec5SDimitry Andric // Since inline asms can contain just about anything, we use this 7760b57cec5SDimitry Andric // to call/leverage other check*Hazard routines. Note that 7770b57cec5SDimitry Andric // this function doesn't attempt to address all possible inline asm 7780b57cec5SDimitry Andric // hazards (good luck), but is a collection of what has been 7790b57cec5SDimitry Andric // problematic thus far. 7800b57cec5SDimitry Andric 7810b57cec5SDimitry Andric // see checkVALUHazards() 7820b57cec5SDimitry Andric if (!ST.has12DWordStoreHazard()) 7830b57cec5SDimitry Andric return 0; 7840b57cec5SDimitry Andric 7850b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 7860b57cec5SDimitry Andric int WaitStatesNeeded = 0; 7870b57cec5SDimitry Andric 7880b57cec5SDimitry Andric for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands(); 7890b57cec5SDimitry Andric I != E; ++I) { 7900b57cec5SDimitry Andric const MachineOperand &Op = IA->getOperand(I); 7910b57cec5SDimitry Andric if (Op.isReg() && Op.isDef()) { 7920b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI)); 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric } 7950b57cec5SDimitry Andric 7960b57cec5SDimitry Andric return WaitStatesNeeded; 7970b57cec5SDimitry Andric } 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { 8000b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 8010b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 8020b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andric const MachineOperand *LaneSelectOp = 8050b57cec5SDimitry Andric TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1); 8060b57cec5SDimitry Andric 8070b57cec5SDimitry Andric if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg())) 8080b57cec5SDimitry Andric return 0; 8090b57cec5SDimitry Andric 8108bcb0991SDimitry Andric Register LaneSelectReg = LaneSelectOp->getReg(); 8110b57cec5SDimitry Andric auto IsHazardFn = [TII] (MachineInstr *MI) { 8120b57cec5SDimitry Andric return TII->isVALU(*MI); 8130b57cec5SDimitry Andric }; 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric const int RWLaneWaitStates = 4; 8160b57cec5SDimitry Andric int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn, 8170b57cec5SDimitry Andric RWLaneWaitStates); 8180b57cec5SDimitry Andric return RWLaneWaitStates - WaitStatesSince; 8190b57cec5SDimitry Andric } 8200b57cec5SDimitry Andric 8210b57cec5SDimitry Andric int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) { 8220b57cec5SDimitry Andric if (!ST.hasRFEHazards()) 8230b57cec5SDimitry Andric return 0; 8240b57cec5SDimitry Andric 8250b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 8260b57cec5SDimitry Andric 8270b57cec5SDimitry Andric const int RFEWaitStates = 1; 8280b57cec5SDimitry Andric 8290b57cec5SDimitry Andric auto IsHazardFn = [TII] (MachineInstr *MI) { 8300b57cec5SDimitry Andric return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS; 8310b57cec5SDimitry Andric }; 8320b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates); 8330b57cec5SDimitry Andric return RFEWaitStates - WaitStatesNeeded; 8340b57cec5SDimitry Andric } 8350b57cec5SDimitry Andric 8360b57cec5SDimitry Andric int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { 8370b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 8380b57cec5SDimitry Andric const int SMovRelWaitStates = 1; 8390b57cec5SDimitry Andric auto IsHazardFn = [TII] (MachineInstr *MI) { 8400b57cec5SDimitry Andric return TII->isSALU(*MI); 8410b57cec5SDimitry Andric }; 8420b57cec5SDimitry Andric return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, 8430b57cec5SDimitry Andric SMovRelWaitStates); 8440b57cec5SDimitry Andric } 8450b57cec5SDimitry Andric 8460b57cec5SDimitry Andric void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { 8470b57cec5SDimitry Andric fixVMEMtoScalarWriteHazards(MI); 8480b57cec5SDimitry Andric fixVcmpxPermlaneHazards(MI); 8490b57cec5SDimitry Andric fixSMEMtoVectorWriteHazards(MI); 8500b57cec5SDimitry Andric fixVcmpxExecWARHazard(MI); 8510b57cec5SDimitry Andric fixLdsBranchVmemWARHazard(MI); 8520b57cec5SDimitry Andric } 8530b57cec5SDimitry Andric 8540b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { 8550b57cec5SDimitry Andric if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI)) 8560b57cec5SDimitry Andric return false; 8570b57cec5SDimitry Andric 8580b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 8590b57cec5SDimitry Andric auto IsHazardFn = [TII] (MachineInstr *MI) { 8600b57cec5SDimitry Andric return TII->isVOPC(*MI); 8610b57cec5SDimitry Andric }; 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric auto IsExpiredFn = [] (MachineInstr *MI, int) { 8640b57cec5SDimitry Andric if (!MI) 8650b57cec5SDimitry Andric return false; 8660b57cec5SDimitry Andric unsigned Opc = MI->getOpcode(); 8670b57cec5SDimitry Andric return SIInstrInfo::isVALU(*MI) && 8680b57cec5SDimitry Andric Opc != AMDGPU::V_NOP_e32 && 8690b57cec5SDimitry Andric Opc != AMDGPU::V_NOP_e64 && 8700b57cec5SDimitry Andric Opc != AMDGPU::V_NOP_sdwa; 8710b57cec5SDimitry Andric }; 8720b57cec5SDimitry Andric 8730b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 8740b57cec5SDimitry Andric std::numeric_limits<int>::max()) 8750b57cec5SDimitry Andric return false; 8760b57cec5SDimitry Andric 8770b57cec5SDimitry Andric // V_NOP will be discarded by SQ. 8780b57cec5SDimitry Andric // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE* 8790b57cec5SDimitry Andric // which is always a VGPR and available. 8800b57cec5SDimitry Andric auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); 8818bcb0991SDimitry Andric Register Reg = Src0->getReg(); 8820b57cec5SDimitry Andric bool IsUndef = Src0->isUndef(); 8830b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 8840b57cec5SDimitry Andric TII->get(AMDGPU::V_MOV_B32_e32)) 8850b57cec5SDimitry Andric .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0)) 8860b57cec5SDimitry Andric .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill); 8870b57cec5SDimitry Andric 8880b57cec5SDimitry Andric return true; 8890b57cec5SDimitry Andric } 8900b57cec5SDimitry Andric 8910b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { 8920b57cec5SDimitry Andric if (!ST.hasVMEMtoScalarWriteHazard()) 8930b57cec5SDimitry Andric return false; 8940b57cec5SDimitry Andric 8950b57cec5SDimitry Andric if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI)) 8960b57cec5SDimitry Andric return false; 8970b57cec5SDimitry Andric 8980b57cec5SDimitry Andric if (MI->getNumDefs() == 0) 8990b57cec5SDimitry Andric return false; 9000b57cec5SDimitry Andric 9010b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 9020b57cec5SDimitry Andric 9030b57cec5SDimitry Andric auto IsHazardFn = [TRI, MI] (MachineInstr *I) { 9040b57cec5SDimitry Andric if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) && 9050b57cec5SDimitry Andric !SIInstrInfo::isFLAT(*I)) 9060b57cec5SDimitry Andric return false; 9070b57cec5SDimitry Andric 9080b57cec5SDimitry Andric for (const MachineOperand &Def : MI->defs()) { 9090b57cec5SDimitry Andric MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI); 9100b57cec5SDimitry Andric if (!Op) 9110b57cec5SDimitry Andric continue; 9120b57cec5SDimitry Andric return true; 9130b57cec5SDimitry Andric } 9140b57cec5SDimitry Andric return false; 9150b57cec5SDimitry Andric }; 9160b57cec5SDimitry Andric 9170b57cec5SDimitry Andric auto IsExpiredFn = [](MachineInstr *MI, int) { 9180b57cec5SDimitry Andric return MI && (SIInstrInfo::isVALU(*MI) || 9190b57cec5SDimitry Andric (MI->getOpcode() == AMDGPU::S_WAITCNT && 920*e8d8bef9SDimitry Andric !MI->getOperand(0).getImm()) || 921*e8d8bef9SDimitry Andric (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && 922*e8d8bef9SDimitry Andric MI->getOperand(0).getImm() == 0xffe3)); 9230b57cec5SDimitry Andric }; 9240b57cec5SDimitry Andric 9250b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 9260b57cec5SDimitry Andric std::numeric_limits<int>::max()) 9270b57cec5SDimitry Andric return false; 9280b57cec5SDimitry Andric 9290b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 930*e8d8bef9SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 931*e8d8bef9SDimitry Andric TII->get(AMDGPU::S_WAITCNT_DEPCTR)) 932*e8d8bef9SDimitry Andric .addImm(0xffe3); 9330b57cec5SDimitry Andric return true; 9340b57cec5SDimitry Andric } 9350b57cec5SDimitry Andric 9360b57cec5SDimitry Andric bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { 9370b57cec5SDimitry Andric if (!ST.hasSMEMtoVectorWriteHazard()) 9380b57cec5SDimitry Andric return false; 9390b57cec5SDimitry Andric 9400b57cec5SDimitry Andric if (!SIInstrInfo::isVALU(*MI)) 9410b57cec5SDimitry Andric return false; 9420b57cec5SDimitry Andric 9430b57cec5SDimitry Andric unsigned SDSTName; 9440b57cec5SDimitry Andric switch (MI->getOpcode()) { 9450b57cec5SDimitry Andric case AMDGPU::V_READLANE_B32: 9460b57cec5SDimitry Andric case AMDGPU::V_READFIRSTLANE_B32: 9470b57cec5SDimitry Andric SDSTName = AMDGPU::OpName::vdst; 9480b57cec5SDimitry Andric break; 9490b57cec5SDimitry Andric default: 9500b57cec5SDimitry Andric SDSTName = AMDGPU::OpName::sdst; 9510b57cec5SDimitry Andric break; 9520b57cec5SDimitry Andric } 9530b57cec5SDimitry Andric 9540b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 9550b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 9560b57cec5SDimitry Andric const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU()); 9570b57cec5SDimitry Andric const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName); 9580b57cec5SDimitry Andric if (!SDST) { 9590b57cec5SDimitry Andric for (const auto &MO : MI->implicit_operands()) { 9600b57cec5SDimitry Andric if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) { 9610b57cec5SDimitry Andric SDST = &MO; 9620b57cec5SDimitry Andric break; 9630b57cec5SDimitry Andric } 9640b57cec5SDimitry Andric } 9650b57cec5SDimitry Andric } 9660b57cec5SDimitry Andric 9670b57cec5SDimitry Andric if (!SDST) 9680b57cec5SDimitry Andric return false; 9690b57cec5SDimitry Andric 9708bcb0991SDimitry Andric const Register SDSTReg = SDST->getReg(); 9710b57cec5SDimitry Andric auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) { 9720b57cec5SDimitry Andric return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI); 9730b57cec5SDimitry Andric }; 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) { 9760b57cec5SDimitry Andric if (MI) { 9770b57cec5SDimitry Andric if (TII->isSALU(*MI)) { 9780b57cec5SDimitry Andric switch (MI->getOpcode()) { 9790b57cec5SDimitry Andric case AMDGPU::S_SETVSKIP: 9800b57cec5SDimitry Andric case AMDGPU::S_VERSION: 9810b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VSCNT: 9820b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VMCNT: 9830b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT: 9840b57cec5SDimitry Andric // These instructions cannot not mitigate the hazard. 9850b57cec5SDimitry Andric return false; 9860b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT: 9870b57cec5SDimitry Andric // Reducing lgkmcnt count to 0 always mitigates the hazard. 9880b57cec5SDimitry Andric return (MI->getOperand(1).getImm() == 0) && 9890b57cec5SDimitry Andric (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL); 9900b57cec5SDimitry Andric case AMDGPU::S_WAITCNT: { 9910b57cec5SDimitry Andric const int64_t Imm = MI->getOperand(0).getImm(); 9920b57cec5SDimitry Andric AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm); 9930b57cec5SDimitry Andric return (Decoded.LgkmCnt == 0); 9940b57cec5SDimitry Andric } 9950b57cec5SDimitry Andric default: 9960b57cec5SDimitry Andric // SOPP instructions cannot mitigate the hazard. 9970b57cec5SDimitry Andric if (TII->isSOPP(*MI)) 9980b57cec5SDimitry Andric return false; 9990b57cec5SDimitry Andric // At this point the SALU can be assumed to mitigate the hazard 10000b57cec5SDimitry Andric // because either: 10010b57cec5SDimitry Andric // (a) it is independent of the at risk SMEM (breaking chain), 10020b57cec5SDimitry Andric // or 10030b57cec5SDimitry Andric // (b) it is dependent on the SMEM, in which case an appropriate 10040b57cec5SDimitry Andric // s_waitcnt lgkmcnt _must_ exist between it and the at risk 10050b57cec5SDimitry Andric // SMEM instruction. 10060b57cec5SDimitry Andric return true; 10070b57cec5SDimitry Andric } 10080b57cec5SDimitry Andric } 10090b57cec5SDimitry Andric } 10100b57cec5SDimitry Andric return false; 10110b57cec5SDimitry Andric }; 10120b57cec5SDimitry Andric 10130b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 10140b57cec5SDimitry Andric std::numeric_limits<int>::max()) 10150b57cec5SDimitry Andric return false; 10160b57cec5SDimitry Andric 10170b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 10180b57cec5SDimitry Andric TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL) 10190b57cec5SDimitry Andric .addImm(0); 10200b57cec5SDimitry Andric return true; 10210b57cec5SDimitry Andric } 10220b57cec5SDimitry Andric 10230b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) { 10240b57cec5SDimitry Andric if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI)) 10250b57cec5SDimitry Andric return false; 10260b57cec5SDimitry Andric 10270b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 10280b57cec5SDimitry Andric if (!MI->modifiesRegister(AMDGPU::EXEC, TRI)) 10290b57cec5SDimitry Andric return false; 10300b57cec5SDimitry Andric 10310b57cec5SDimitry Andric auto IsHazardFn = [TRI] (MachineInstr *I) { 10320b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*I)) 10330b57cec5SDimitry Andric return false; 10340b57cec5SDimitry Andric return I->readsRegister(AMDGPU::EXEC, TRI); 10350b57cec5SDimitry Andric }; 10360b57cec5SDimitry Andric 10370b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 10380b57cec5SDimitry Andric auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) { 10390b57cec5SDimitry Andric if (!MI) 10400b57cec5SDimitry Andric return false; 10410b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*MI)) { 10420b57cec5SDimitry Andric if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst)) 10430b57cec5SDimitry Andric return true; 10440b57cec5SDimitry Andric for (auto MO : MI->implicit_operands()) 10450b57cec5SDimitry Andric if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) 10460b57cec5SDimitry Andric return true; 10470b57cec5SDimitry Andric } 10480b57cec5SDimitry Andric if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && 10490b57cec5SDimitry Andric (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe) 10500b57cec5SDimitry Andric return true; 10510b57cec5SDimitry Andric return false; 10520b57cec5SDimitry Andric }; 10530b57cec5SDimitry Andric 10540b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 10550b57cec5SDimitry Andric std::numeric_limits<int>::max()) 10560b57cec5SDimitry Andric return false; 10570b57cec5SDimitry Andric 10580b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 10590b57cec5SDimitry Andric TII->get(AMDGPU::S_WAITCNT_DEPCTR)) 10600b57cec5SDimitry Andric .addImm(0xfffe); 10610b57cec5SDimitry Andric return true; 10620b57cec5SDimitry Andric } 10630b57cec5SDimitry Andric 10640b57cec5SDimitry Andric bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { 10650b57cec5SDimitry Andric if (!ST.hasLdsBranchVmemWARHazard()) 10660b57cec5SDimitry Andric return false; 10670b57cec5SDimitry Andric 10680b57cec5SDimitry Andric auto IsHazardInst = [] (const MachineInstr *MI) { 10690b57cec5SDimitry Andric if (SIInstrInfo::isDS(*MI)) 10700b57cec5SDimitry Andric return 1; 10710b57cec5SDimitry Andric if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI)) 10720b57cec5SDimitry Andric return 2; 10730b57cec5SDimitry Andric return 0; 10740b57cec5SDimitry Andric }; 10750b57cec5SDimitry Andric 10760b57cec5SDimitry Andric auto InstType = IsHazardInst(MI); 10770b57cec5SDimitry Andric if (!InstType) 10780b57cec5SDimitry Andric return false; 10790b57cec5SDimitry Andric 10800b57cec5SDimitry Andric auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) { 10810b57cec5SDimitry Andric return I && (IsHazardInst(I) || 10820b57cec5SDimitry Andric (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT && 10830b57cec5SDimitry Andric I->getOperand(0).getReg() == AMDGPU::SGPR_NULL && 10840b57cec5SDimitry Andric !I->getOperand(1).getImm())); 10850b57cec5SDimitry Andric }; 10860b57cec5SDimitry Andric 10870b57cec5SDimitry Andric auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) { 10880b57cec5SDimitry Andric if (!I->isBranch()) 10890b57cec5SDimitry Andric return false; 10900b57cec5SDimitry Andric 10910b57cec5SDimitry Andric auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) { 10920b57cec5SDimitry Andric auto InstType2 = IsHazardInst(I); 10930b57cec5SDimitry Andric return InstType2 && InstType != InstType2; 10940b57cec5SDimitry Andric }; 10950b57cec5SDimitry Andric 10960b57cec5SDimitry Andric auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) { 10970b57cec5SDimitry Andric if (!I) 10980b57cec5SDimitry Andric return false; 10990b57cec5SDimitry Andric 11000b57cec5SDimitry Andric auto InstType2 = IsHazardInst(I); 11010b57cec5SDimitry Andric if (InstType == InstType2) 11020b57cec5SDimitry Andric return true; 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT && 11050b57cec5SDimitry Andric I->getOperand(0).getReg() == AMDGPU::SGPR_NULL && 11060b57cec5SDimitry Andric !I->getOperand(1).getImm(); 11070b57cec5SDimitry Andric }; 11080b57cec5SDimitry Andric 11090b57cec5SDimitry Andric return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) != 11100b57cec5SDimitry Andric std::numeric_limits<int>::max(); 11110b57cec5SDimitry Andric }; 11120b57cec5SDimitry Andric 11130b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 11140b57cec5SDimitry Andric std::numeric_limits<int>::max()) 11150b57cec5SDimitry Andric return false; 11160b57cec5SDimitry Andric 11170b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 11180b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 11190b57cec5SDimitry Andric TII->get(AMDGPU::S_WAITCNT_VSCNT)) 11200b57cec5SDimitry Andric .addReg(AMDGPU::SGPR_NULL, RegState::Undef) 11210b57cec5SDimitry Andric .addImm(0); 11220b57cec5SDimitry Andric 11230b57cec5SDimitry Andric return true; 11240b57cec5SDimitry Andric } 11250b57cec5SDimitry Andric 11260b57cec5SDimitry Andric int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) { 11270b57cec5SDimitry Andric int NSAtoVMEMWaitStates = 1; 11280b57cec5SDimitry Andric 11290b57cec5SDimitry Andric if (!ST.hasNSAtoVMEMBug()) 11300b57cec5SDimitry Andric return 0; 11310b57cec5SDimitry Andric 11320b57cec5SDimitry Andric if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI)) 11330b57cec5SDimitry Andric return 0; 11340b57cec5SDimitry Andric 11350b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 11360b57cec5SDimitry Andric const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset); 11370b57cec5SDimitry Andric if (!Offset || (Offset->getImm() & 6) == 0) 11380b57cec5SDimitry Andric return 0; 11390b57cec5SDimitry Andric 11400b57cec5SDimitry Andric auto IsHazardFn = [TII] (MachineInstr *I) { 11410b57cec5SDimitry Andric if (!SIInstrInfo::isMIMG(*I)) 11420b57cec5SDimitry Andric return false; 11430b57cec5SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode()); 11440b57cec5SDimitry Andric return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA && 11450b57cec5SDimitry Andric TII->getInstSizeInBytes(*I) >= 16; 11460b57cec5SDimitry Andric }; 11470b57cec5SDimitry Andric 11480b57cec5SDimitry Andric return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1); 11490b57cec5SDimitry Andric } 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) { 11520b57cec5SDimitry Andric int FPAtomicToDenormModeWaitStates = 3; 11530b57cec5SDimitry Andric 11540b57cec5SDimitry Andric if (MI->getOpcode() != AMDGPU::S_DENORM_MODE) 11550b57cec5SDimitry Andric return 0; 11560b57cec5SDimitry Andric 11570b57cec5SDimitry Andric auto IsHazardFn = [] (MachineInstr *I) { 11580b57cec5SDimitry Andric if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I)) 11590b57cec5SDimitry Andric return false; 11600b57cec5SDimitry Andric return SIInstrInfo::isFPAtomic(*I); 11610b57cec5SDimitry Andric }; 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) { 11640b57cec5SDimitry Andric if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI)) 11650b57cec5SDimitry Andric return true; 11660b57cec5SDimitry Andric 11670b57cec5SDimitry Andric switch (MI->getOpcode()) { 11680b57cec5SDimitry Andric case AMDGPU::S_WAITCNT: 11690b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VSCNT: 11700b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VMCNT: 11710b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT: 11720b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT: 1173*e8d8bef9SDimitry Andric case AMDGPU::S_WAIT_IDLE: 11740b57cec5SDimitry Andric return true; 11750b57cec5SDimitry Andric default: 11760b57cec5SDimitry Andric break; 11770b57cec5SDimitry Andric } 11780b57cec5SDimitry Andric 11790b57cec5SDimitry Andric return false; 11800b57cec5SDimitry Andric }; 11810b57cec5SDimitry Andric 11820b57cec5SDimitry Andric 11830b57cec5SDimitry Andric return FPAtomicToDenormModeWaitStates - 11840b57cec5SDimitry Andric ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn); 11850b57cec5SDimitry Andric } 11860b57cec5SDimitry Andric 11870b57cec5SDimitry Andric int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { 11880b57cec5SDimitry Andric assert(SIInstrInfo::isMAI(*MI)); 11890b57cec5SDimitry Andric 11900b57cec5SDimitry Andric int WaitStatesNeeded = 0; 11910b57cec5SDimitry Andric unsigned Opc = MI->getOpcode(); 11920b57cec5SDimitry Andric 11930b57cec5SDimitry Andric auto IsVALUFn = [] (MachineInstr *MI) { 11940b57cec5SDimitry Andric return SIInstrInfo::isVALU(*MI); 11950b57cec5SDimitry Andric }; 11960b57cec5SDimitry Andric 1197*e8d8bef9SDimitry Andric if (Opc != AMDGPU::V_ACCVGPR_READ_B32_e64) { // MFMA or v_accvgpr_write 11980b57cec5SDimitry Andric const int LegacyVALUWritesVGPRWaitStates = 2; 11990b57cec5SDimitry Andric const int VALUWritesExecWaitStates = 4; 12000b57cec5SDimitry Andric const int MaxWaitStates = 4; 12010b57cec5SDimitry Andric 12020b57cec5SDimitry Andric int WaitStatesNeededForUse = VALUWritesExecWaitStates - 12030b57cec5SDimitry Andric getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates); 12040b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 12050b57cec5SDimitry Andric 12060b57cec5SDimitry Andric if (WaitStatesNeeded < MaxWaitStates) { 12070b57cec5SDimitry Andric for (const MachineOperand &Use : MI->explicit_uses()) { 12080b57cec5SDimitry Andric const int MaxWaitStates = 2; 12090b57cec5SDimitry Andric 12100b57cec5SDimitry Andric if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 12110b57cec5SDimitry Andric continue; 12120b57cec5SDimitry Andric 12130b57cec5SDimitry Andric int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates - 12140b57cec5SDimitry Andric getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates); 12150b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 12160b57cec5SDimitry Andric 12170b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 12180b57cec5SDimitry Andric break; 12190b57cec5SDimitry Andric } 12200b57cec5SDimitry Andric } 12210b57cec5SDimitry Andric } 12220b57cec5SDimitry Andric 12230b57cec5SDimitry Andric auto IsMFMAFn = [] (MachineInstr *MI) { 12240b57cec5SDimitry Andric return SIInstrInfo::isMAI(*MI) && 1225*e8d8bef9SDimitry Andric MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 1226*e8d8bef9SDimitry Andric MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; 12270b57cec5SDimitry Andric }; 12280b57cec5SDimitry Andric 12290b57cec5SDimitry Andric for (const MachineOperand &Op : MI->explicit_operands()) { 12300b57cec5SDimitry Andric if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg())) 12310b57cec5SDimitry Andric continue; 12320b57cec5SDimitry Andric 1233*e8d8bef9SDimitry Andric if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32_e64) 12340b57cec5SDimitry Andric continue; 12350b57cec5SDimitry Andric 12360b57cec5SDimitry Andric const int MFMAWritesAGPROverlappedSrcABWaitStates = 4; 12370b57cec5SDimitry Andric const int MFMAWritesAGPROverlappedSrcCWaitStates = 2; 12380b57cec5SDimitry Andric const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4; 12390b57cec5SDimitry Andric const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10; 12400b57cec5SDimitry Andric const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18; 12410b57cec5SDimitry Andric const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1; 12420b57cec5SDimitry Andric const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7; 12430b57cec5SDimitry Andric const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15; 12440b57cec5SDimitry Andric const int MaxWaitStates = 18; 12458bcb0991SDimitry Andric Register Reg = Op.getReg(); 12460b57cec5SDimitry Andric unsigned HazardDefLatency = 0; 12470b57cec5SDimitry Andric 12480b57cec5SDimitry Andric auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this] 12490b57cec5SDimitry Andric (MachineInstr *MI) { 12500b57cec5SDimitry Andric if (!IsMFMAFn(MI)) 12510b57cec5SDimitry Andric return false; 12528bcb0991SDimitry Andric Register DstReg = MI->getOperand(0).getReg(); 12530b57cec5SDimitry Andric if (DstReg == Reg) 12540b57cec5SDimitry Andric return false; 12550b57cec5SDimitry Andric HazardDefLatency = std::max(HazardDefLatency, 12560b57cec5SDimitry Andric TSchedModel.computeInstrLatency(MI)); 12570b57cec5SDimitry Andric return TRI.regsOverlap(DstReg, Reg); 12580b57cec5SDimitry Andric }; 12590b57cec5SDimitry Andric 12600b57cec5SDimitry Andric int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, 12610b57cec5SDimitry Andric MaxWaitStates); 12620b57cec5SDimitry Andric int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates; 12630b57cec5SDimitry Andric int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 12640b57cec5SDimitry Andric int OpNo = MI->getOperandNo(&Op); 12650b57cec5SDimitry Andric if (OpNo == SrcCIdx) { 12660b57cec5SDimitry Andric NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates; 1267*e8d8bef9SDimitry Andric } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) { 12680b57cec5SDimitry Andric switch (HazardDefLatency) { 12690b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates; 12700b57cec5SDimitry Andric break; 12710b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates; 12720b57cec5SDimitry Andric break; 12730b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 12740b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates; 12750b57cec5SDimitry Andric break; 12760b57cec5SDimitry Andric } 1277*e8d8bef9SDimitry Andric } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { 12780b57cec5SDimitry Andric switch (HazardDefLatency) { 12790b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates; 12800b57cec5SDimitry Andric break; 12810b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates; 12820b57cec5SDimitry Andric break; 12830b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 12840b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates; 12850b57cec5SDimitry Andric break; 12860b57cec5SDimitry Andric } 12870b57cec5SDimitry Andric } 12880b57cec5SDimitry Andric 12890b57cec5SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; 12900b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 12930b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 12940b57cec5SDimitry Andric 12950b57cec5SDimitry Andric auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) { 1296*e8d8bef9SDimitry Andric if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) 12970b57cec5SDimitry Andric return false; 12988bcb0991SDimitry Andric Register DstReg = MI->getOperand(0).getReg(); 12990b57cec5SDimitry Andric return TRI.regsOverlap(Reg, DstReg); 13000b57cec5SDimitry Andric }; 13010b57cec5SDimitry Andric 13020b57cec5SDimitry Andric const int AccVGPRWriteMFMAReadSrcCWaitStates = 1; 13030b57cec5SDimitry Andric const int AccVGPRWriteMFMAReadSrcABWaitStates = 3; 13040b57cec5SDimitry Andric const int AccVGPRWriteAccVgprReadWaitStates = 3; 13050b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates; 13060b57cec5SDimitry Andric if (OpNo == SrcCIdx) 13070b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates; 1308*e8d8bef9SDimitry Andric else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) 13090b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates; 13100b57cec5SDimitry Andric 13110b57cec5SDimitry Andric WaitStatesNeededForUse = NeedWaitStates - 13120b57cec5SDimitry Andric getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates); 13130b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 13140b57cec5SDimitry Andric 13150b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 13160b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 13170b57cec5SDimitry Andric } 13180b57cec5SDimitry Andric 1319*e8d8bef9SDimitry Andric if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { 13200b57cec5SDimitry Andric const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0; 13210b57cec5SDimitry Andric const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5; 13220b57cec5SDimitry Andric const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13; 13230b57cec5SDimitry Andric const int MaxWaitStates = 13; 13248bcb0991SDimitry Andric Register DstReg = MI->getOperand(0).getReg(); 13250b57cec5SDimitry Andric unsigned HazardDefLatency = 0; 13260b57cec5SDimitry Andric 13270b57cec5SDimitry Andric auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this] 13280b57cec5SDimitry Andric (MachineInstr *MI) { 13290b57cec5SDimitry Andric if (!IsMFMAFn(MI)) 13300b57cec5SDimitry Andric return false; 13318bcb0991SDimitry Andric Register Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg(); 13320b57cec5SDimitry Andric HazardDefLatency = std::max(HazardDefLatency, 13330b57cec5SDimitry Andric TSchedModel.computeInstrLatency(MI)); 13340b57cec5SDimitry Andric return TRI.regsOverlap(Reg, DstReg); 13350b57cec5SDimitry Andric }; 13360b57cec5SDimitry Andric 13370b57cec5SDimitry Andric int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates); 13380b57cec5SDimitry Andric int NeedWaitStates; 13390b57cec5SDimitry Andric switch (HazardDefLatency) { 13400b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates; 13410b57cec5SDimitry Andric break; 13420b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates; 13430b57cec5SDimitry Andric break; 13440b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 13450b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates; 13460b57cec5SDimitry Andric break; 13470b57cec5SDimitry Andric } 13480b57cec5SDimitry Andric 13490b57cec5SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince; 13500b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 13510b57cec5SDimitry Andric } 13520b57cec5SDimitry Andric 13530b57cec5SDimitry Andric return WaitStatesNeeded; 13540b57cec5SDimitry Andric } 13550b57cec5SDimitry Andric 13560b57cec5SDimitry Andric int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) { 13570b57cec5SDimitry Andric if (!ST.hasMAIInsts()) 13580b57cec5SDimitry Andric return 0; 13590b57cec5SDimitry Andric 13600b57cec5SDimitry Andric int WaitStatesNeeded = 0; 13610b57cec5SDimitry Andric 13620b57cec5SDimitry Andric auto IsAccVgprReadFn = [] (MachineInstr *MI) { 1363*e8d8bef9SDimitry Andric return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64; 13640b57cec5SDimitry Andric }; 13650b57cec5SDimitry Andric 13660b57cec5SDimitry Andric for (const MachineOperand &Op : MI->explicit_uses()) { 13670b57cec5SDimitry Andric if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg())) 13680b57cec5SDimitry Andric continue; 13690b57cec5SDimitry Andric 13708bcb0991SDimitry Andric Register Reg = Op.getReg(); 13710b57cec5SDimitry Andric 13720b57cec5SDimitry Andric const int AccVgprReadLdStWaitStates = 2; 1373*e8d8bef9SDimitry Andric const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1; 13740b57cec5SDimitry Andric const int MaxWaitStates = 2; 13750b57cec5SDimitry Andric 13760b57cec5SDimitry Andric int WaitStatesNeededForUse = AccVgprReadLdStWaitStates - 13770b57cec5SDimitry Andric getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates); 13780b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 13790b57cec5SDimitry Andric 13800b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 13810b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 13820b57cec5SDimitry Andric 1383*e8d8bef9SDimitry Andric auto IsVALUAccVgprRdWrCheckFn = [Reg, this](MachineInstr *MI) { 1384*e8d8bef9SDimitry Andric if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 && 1385*e8d8bef9SDimitry Andric MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) 13860b57cec5SDimitry Andric return false; 13870b57cec5SDimitry Andric auto IsVALUFn = [] (MachineInstr *MI) { 13880b57cec5SDimitry Andric return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI); 13890b57cec5SDimitry Andric }; 13900b57cec5SDimitry Andric return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) < 13910b57cec5SDimitry Andric std::numeric_limits<int>::max(); 13920b57cec5SDimitry Andric }; 13930b57cec5SDimitry Andric 1394*e8d8bef9SDimitry Andric WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates - 1395*e8d8bef9SDimitry Andric getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates); 13960b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 13970b57cec5SDimitry Andric } 13980b57cec5SDimitry Andric 13990b57cec5SDimitry Andric return WaitStatesNeeded; 14000b57cec5SDimitry Andric } 1401*e8d8bef9SDimitry Andric 1402*e8d8bef9SDimitry Andric bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) { 1403*e8d8bef9SDimitry Andric if (!SU->isInstr()) 1404*e8d8bef9SDimitry Andric return false; 1405*e8d8bef9SDimitry Andric 1406*e8d8bef9SDimitry Andric MachineInstr *MAI = nullptr; 1407*e8d8bef9SDimitry Andric auto IsMFMAFn = [&MAI] (MachineInstr *MI) { 1408*e8d8bef9SDimitry Andric MAI = nullptr; 1409*e8d8bef9SDimitry Andric if (SIInstrInfo::isMAI(*MI) && 1410*e8d8bef9SDimitry Andric MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 1411*e8d8bef9SDimitry Andric MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64) 1412*e8d8bef9SDimitry Andric MAI = MI; 1413*e8d8bef9SDimitry Andric return MAI != nullptr; 1414*e8d8bef9SDimitry Andric }; 1415*e8d8bef9SDimitry Andric 1416*e8d8bef9SDimitry Andric MachineInstr *MI = SU->getInstr(); 1417*e8d8bef9SDimitry Andric if (IsMFMAFn(MI)) { 1418*e8d8bef9SDimitry Andric int W = getWaitStatesSince(IsMFMAFn, 16); 1419*e8d8bef9SDimitry Andric if (MAI) 1420*e8d8bef9SDimitry Andric return W < (int)TSchedModel.computeInstrLatency(MAI); 1421*e8d8bef9SDimitry Andric } 1422*e8d8bef9SDimitry Andric 1423*e8d8bef9SDimitry Andric return false; 1424*e8d8bef9SDimitry Andric } 1425