10b57cec5SDimitry Andric //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements hazard recognizers for scheduling on GCN processors. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "GCNHazardRecognizer.h" 14e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 150b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 160b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 170b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h" 18e8d8bef9SDimitry Andric #include "llvm/Support/TargetParser.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric using namespace llvm; 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 230b57cec5SDimitry Andric // Hazard Recoginizer Implementation 240b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 250b57cec5SDimitry Andric 26fe6060f1SDimitry Andric static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, 27fe6060f1SDimitry Andric const GCNSubtarget &ST); 28fe6060f1SDimitry Andric 290b57cec5SDimitry Andric GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : 300b57cec5SDimitry Andric IsHazardRecognizerMode(false), 310b57cec5SDimitry Andric CurrCycleInstr(nullptr), 320b57cec5SDimitry Andric MF(MF), 330b57cec5SDimitry Andric ST(MF.getSubtarget<GCNSubtarget>()), 340b57cec5SDimitry Andric TII(*ST.getInstrInfo()), 350b57cec5SDimitry Andric TRI(TII.getRegisterInfo()), 360b57cec5SDimitry Andric ClauseUses(TRI.getNumRegUnits()), 370b57cec5SDimitry Andric ClauseDefs(TRI.getNumRegUnits()) { 38fe6060f1SDimitry Andric MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 19 : 5; 390b57cec5SDimitry Andric TSchedModel.init(&ST); 40fe6060f1SDimitry Andric RunLdsBranchVmemWARHazardFixup = shouldRunLdsBranchVmemWARHazardFixup(MF, ST); 410b57cec5SDimitry Andric } 420b57cec5SDimitry Andric 43e8d8bef9SDimitry Andric void GCNHazardRecognizer::Reset() { 44e8d8bef9SDimitry Andric EmittedInstrs.clear(); 45e8d8bef9SDimitry Andric } 46e8d8bef9SDimitry Andric 470b57cec5SDimitry Andric void GCNHazardRecognizer::EmitInstruction(SUnit *SU) { 480b57cec5SDimitry Andric EmitInstruction(SU->getInstr()); 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { 520b57cec5SDimitry Andric CurrCycleInstr = MI; 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric static bool isDivFMas(unsigned Opcode) { 56e8d8bef9SDimitry Andric return Opcode == AMDGPU::V_DIV_FMAS_F32_e64 || Opcode == AMDGPU::V_DIV_FMAS_F64_e64; 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric static bool isSGetReg(unsigned Opcode) { 600b57cec5SDimitry Andric return Opcode == AMDGPU::S_GETREG_B32; 610b57cec5SDimitry Andric } 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric static bool isSSetReg(unsigned Opcode) { 64e8d8bef9SDimitry Andric switch (Opcode) { 65e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_B32: 66e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_B32_mode: 67e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_IMM32_B32: 68e8d8bef9SDimitry Andric case AMDGPU::S_SETREG_IMM32_B32_mode: 69e8d8bef9SDimitry Andric return true; 70e8d8bef9SDimitry Andric } 71e8d8bef9SDimitry Andric return false; 720b57cec5SDimitry Andric } 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric static bool isRWLane(unsigned Opcode) { 750b57cec5SDimitry Andric return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32; 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric static bool isRFE(unsigned Opcode) { 790b57cec5SDimitry Andric return Opcode == AMDGPU::S_RFE_B64; 800b57cec5SDimitry Andric } 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric static bool isSMovRel(unsigned Opcode) { 830b57cec5SDimitry Andric switch (Opcode) { 840b57cec5SDimitry Andric case AMDGPU::S_MOVRELS_B32: 850b57cec5SDimitry Andric case AMDGPU::S_MOVRELS_B64: 860b57cec5SDimitry Andric case AMDGPU::S_MOVRELD_B32: 870b57cec5SDimitry Andric case AMDGPU::S_MOVRELD_B64: 880b57cec5SDimitry Andric return true; 890b57cec5SDimitry Andric default: 900b57cec5SDimitry Andric return false; 910b57cec5SDimitry Andric } 920b57cec5SDimitry Andric } 930b57cec5SDimitry Andric 94fe6060f1SDimitry Andric static bool isDGEMM(unsigned Opcode) { 95fe6060f1SDimitry Andric return Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || 96fe6060f1SDimitry Andric Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64 || 97fe6060f1SDimitry Andric Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_e64 || 98*04eeddc0SDimitry Andric Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64 || 99*04eeddc0SDimitry Andric Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64 || 100*04eeddc0SDimitry Andric Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64; 101fe6060f1SDimitry Andric } 102fe6060f1SDimitry Andric 103fe6060f1SDimitry Andric static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) { 104fe6060f1SDimitry Andric unsigned Opcode = MI.getOpcode(); 105fe6060f1SDimitry Andric 106fe6060f1SDimitry Andric if (!SIInstrInfo::isMAI(MI) || 107fe6060f1SDimitry Andric isDGEMM(Opcode) || 108fe6060f1SDimitry Andric Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 || 109fe6060f1SDimitry Andric Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64) 110fe6060f1SDimitry Andric return false; 111fe6060f1SDimitry Andric 112fe6060f1SDimitry Andric return true; 113fe6060f1SDimitry Andric } 114fe6060f1SDimitry Andric 1150b57cec5SDimitry Andric static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, 1160b57cec5SDimitry Andric const MachineInstr &MI) { 1170b57cec5SDimitry Andric if (TII.isAlwaysGDS(MI.getOpcode())) 1180b57cec5SDimitry Andric return true; 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric switch (MI.getOpcode()) { 1210b57cec5SDimitry Andric case AMDGPU::S_SENDMSG: 1220b57cec5SDimitry Andric case AMDGPU::S_SENDMSGHALT: 1230b57cec5SDimitry Andric case AMDGPU::S_TTRACEDATA: 1240b57cec5SDimitry Andric return true; 1250b57cec5SDimitry Andric // These DS opcodes don't support GDS. 1260b57cec5SDimitry Andric case AMDGPU::DS_NOP: 1270b57cec5SDimitry Andric case AMDGPU::DS_PERMUTE_B32: 1280b57cec5SDimitry Andric case AMDGPU::DS_BPERMUTE_B32: 1290b57cec5SDimitry Andric return false; 1300b57cec5SDimitry Andric default: 1310b57cec5SDimitry Andric if (TII.isDS(MI.getOpcode())) { 1320b57cec5SDimitry Andric int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 1330b57cec5SDimitry Andric AMDGPU::OpName::gds); 1340b57cec5SDimitry Andric if (MI.getOperand(GDS).getImm()) 1350b57cec5SDimitry Andric return true; 1360b57cec5SDimitry Andric } 1370b57cec5SDimitry Andric return false; 1380b57cec5SDimitry Andric } 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric static bool isPermlane(const MachineInstr &MI) { 1420b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode(); 143e8d8bef9SDimitry Andric return Opcode == AMDGPU::V_PERMLANE16_B32_e64 || 144e8d8bef9SDimitry Andric Opcode == AMDGPU::V_PERMLANEX16_B32_e64; 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { 1480b57cec5SDimitry Andric const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, 1490b57cec5SDimitry Andric AMDGPU::OpName::simm16); 1500b57cec5SDimitry Andric return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_; 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric ScheduleHazardRecognizer::HazardType 1540b57cec5SDimitry Andric GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 1550b57cec5SDimitry Andric MachineInstr *MI = SU->getInstr(); 156e8d8bef9SDimitry Andric // If we are not in "HazardRecognizerMode" and therefore not being run from 157e8d8bef9SDimitry Andric // the scheduler, track possible stalls from hazards but don't insert noops. 158e8d8bef9SDimitry Andric auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard; 159e8d8bef9SDimitry Andric 1600b57cec5SDimitry Andric if (MI->isBundle()) 1610b57cec5SDimitry Andric return NoHazard; 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) 164e8d8bef9SDimitry Andric return HazardType; 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0) 167e8d8bef9SDimitry Andric return HazardType; 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric if (checkFPAtomicToDenormModeHazard(MI) > 0) 170e8d8bef9SDimitry Andric return HazardType; 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric if (ST.hasNoDataDepHazard()) 1730b57cec5SDimitry Andric return NoHazard; 1740b57cec5SDimitry Andric 175fe6060f1SDimitry Andric // FIXME: Should flat be considered vmem? 176fe6060f1SDimitry Andric if ((SIInstrInfo::isVMEM(*MI) || 177fe6060f1SDimitry Andric SIInstrInfo::isFLAT(*MI)) 178fe6060f1SDimitry Andric && checkVMEMHazards(MI) > 0) 179fe6060f1SDimitry Andric return HazardType; 180fe6060f1SDimitry Andric 1810b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) 182e8d8bef9SDimitry Andric return HazardType; 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) 185e8d8bef9SDimitry Andric return HazardType; 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) 188e8d8bef9SDimitry Andric return HazardType; 1890b57cec5SDimitry Andric 1900b57cec5SDimitry Andric if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0) 191e8d8bef9SDimitry Andric return HazardType; 1920b57cec5SDimitry Andric 193fe6060f1SDimitry Andric if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) || 194fe6060f1SDimitry Andric SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) || 195fe6060f1SDimitry Andric SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0) 196fe6060f1SDimitry Andric return HazardType; 197fe6060f1SDimitry Andric 1980b57cec5SDimitry Andric if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) 199e8d8bef9SDimitry Andric return HazardType; 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0) 202e8d8bef9SDimitry Andric return HazardType; 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) 205e8d8bef9SDimitry Andric return HazardType; 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric if (ST.hasReadM0MovRelInterpHazard() && 2080b57cec5SDimitry Andric (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) && 2090b57cec5SDimitry Andric checkReadM0Hazards(MI) > 0) 210e8d8bef9SDimitry Andric return HazardType; 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) && 2130b57cec5SDimitry Andric checkReadM0Hazards(MI) > 0) 214e8d8bef9SDimitry Andric return HazardType; 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0) 217e8d8bef9SDimitry Andric return HazardType; 2180b57cec5SDimitry Andric 219e8d8bef9SDimitry Andric if ((SIInstrInfo::isVMEM(*MI) || 220e8d8bef9SDimitry Andric SIInstrInfo::isFLAT(*MI) || 221e8d8bef9SDimitry Andric SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0) 222e8d8bef9SDimitry Andric return HazardType; 2230b57cec5SDimitry Andric 2240b57cec5SDimitry Andric if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) 225e8d8bef9SDimitry Andric return HazardType; 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric return NoHazard; 2280b57cec5SDimitry Andric } 2290b57cec5SDimitry Andric 230e8d8bef9SDimitry Andric static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII, 231e8d8bef9SDimitry Andric unsigned Quantity) { 232e8d8bef9SDimitry Andric while (Quantity > 0) { 233e8d8bef9SDimitry Andric unsigned Arg = std::min(Quantity, 8u); 234e8d8bef9SDimitry Andric Quantity -= Arg; 2350b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP)) 236e8d8bef9SDimitry Andric .addImm(Arg - 1); 237e8d8bef9SDimitry Andric } 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric void GCNHazardRecognizer::processBundle() { 2410b57cec5SDimitry Andric MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator()); 2420b57cec5SDimitry Andric MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end(); 2430b57cec5SDimitry Andric // Check bundled MachineInstr's for hazards. 2440b57cec5SDimitry Andric for (; MI != E && MI->isInsideBundle(); ++MI) { 2450b57cec5SDimitry Andric CurrCycleInstr = &*MI; 2460b57cec5SDimitry Andric unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr); 2470b57cec5SDimitry Andric 248e8d8bef9SDimitry Andric if (IsHazardRecognizerMode) { 2490b57cec5SDimitry Andric fixHazards(CurrCycleInstr); 2500b57cec5SDimitry Andric 251e8d8bef9SDimitry Andric insertNoopsInBundle(CurrCycleInstr, TII, WaitStates); 252e8d8bef9SDimitry Andric } 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric // It’s unnecessary to track more than MaxLookAhead instructions. Since we 2550b57cec5SDimitry Andric // include the bundled MI directly after, only add a maximum of 2560b57cec5SDimitry Andric // (MaxLookAhead - 1) noops to EmittedInstrs. 2570b57cec5SDimitry Andric for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i) 2580b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric EmittedInstrs.push_front(CurrCycleInstr); 2610b57cec5SDimitry Andric EmittedInstrs.resize(MaxLookAhead); 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric CurrCycleInstr = nullptr; 2640b57cec5SDimitry Andric } 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { 2670b57cec5SDimitry Andric IsHazardRecognizerMode = true; 2680b57cec5SDimitry Andric CurrCycleInstr = MI; 2690b57cec5SDimitry Andric unsigned W = PreEmitNoopsCommon(MI); 2700b57cec5SDimitry Andric fixHazards(MI); 2710b57cec5SDimitry Andric CurrCycleInstr = nullptr; 2720b57cec5SDimitry Andric return W; 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { 2760b57cec5SDimitry Andric if (MI->isBundle()) 2770b57cec5SDimitry Andric return 0; 2780b57cec5SDimitry Andric 279e8d8bef9SDimitry Andric int WaitStates = 0; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric if (SIInstrInfo::isSMRD(*MI)) 2820b57cec5SDimitry Andric return std::max(WaitStates, checkSMRDHazards(MI)); 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric if (ST.hasNSAtoVMEMBug()) 2850b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI)); 2860b57cec5SDimitry Andric 2870b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI)); 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric if (ST.hasNoDataDepHazard()) 2900b57cec5SDimitry Andric return WaitStates; 2910b57cec5SDimitry Andric 292fe6060f1SDimitry Andric if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI)) 293fe6060f1SDimitry Andric WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); 294fe6060f1SDimitry Andric 2950b57cec5SDimitry Andric if (SIInstrInfo::isVALU(*MI)) 2960b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkVALUHazards(MI)); 2970b57cec5SDimitry Andric 2980b57cec5SDimitry Andric if (SIInstrInfo::isDPP(*MI)) 2990b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkDPPHazards(MI)); 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric if (isDivFMas(MI->getOpcode())) 3020b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric if (isRWLane(MI->getOpcode())) 3050b57cec5SDimitry Andric WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); 3060b57cec5SDimitry Andric 307fe6060f1SDimitry Andric if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) || 308fe6060f1SDimitry Andric SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) || 309fe6060f1SDimitry Andric SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0) 310fe6060f1SDimitry Andric WaitStates = std::max(WaitStates, checkMAIVALUHazards(MI)); 311fe6060f1SDimitry Andric 3120b57cec5SDimitry Andric if (MI->isInlineAsm()) 3130b57cec5SDimitry Andric return std::max(WaitStates, checkInlineAsmHazards(MI)); 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric if (isSGetReg(MI->getOpcode())) 3160b57cec5SDimitry Andric return std::max(WaitStates, checkGetRegHazards(MI)); 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric if (isSSetReg(MI->getOpcode())) 3190b57cec5SDimitry Andric return std::max(WaitStates, checkSetRegHazards(MI)); 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric if (isRFE(MI->getOpcode())) 3220b57cec5SDimitry Andric return std::max(WaitStates, checkRFEHazards(MI)); 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) || 3250b57cec5SDimitry Andric isSMovRel(MI->getOpcode()))) 3260b57cec5SDimitry Andric return std::max(WaitStates, checkReadM0Hazards(MI)); 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) 3290b57cec5SDimitry Andric return std::max(WaitStates, checkReadM0Hazards(MI)); 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric if (SIInstrInfo::isMAI(*MI)) 3320b57cec5SDimitry Andric return std::max(WaitStates, checkMAIHazards(MI)); 3330b57cec5SDimitry Andric 334e8d8bef9SDimitry Andric if (SIInstrInfo::isVMEM(*MI) || 335e8d8bef9SDimitry Andric SIInstrInfo::isFLAT(*MI) || 336e8d8bef9SDimitry Andric SIInstrInfo::isDS(*MI)) 3370b57cec5SDimitry Andric return std::max(WaitStates, checkMAILdStHazards(MI)); 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric return WaitStates; 3400b57cec5SDimitry Andric } 3410b57cec5SDimitry Andric 3420b57cec5SDimitry Andric void GCNHazardRecognizer::EmitNoop() { 3430b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 3440b57cec5SDimitry Andric } 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric void GCNHazardRecognizer::AdvanceCycle() { 3470b57cec5SDimitry Andric // When the scheduler detects a stall, it will call AdvanceCycle() without 3480b57cec5SDimitry Andric // emitting any instructions. 349e8d8bef9SDimitry Andric if (!CurrCycleInstr) { 350e8d8bef9SDimitry Andric EmittedInstrs.push_front(nullptr); 3510b57cec5SDimitry Andric return; 352e8d8bef9SDimitry Andric } 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric if (CurrCycleInstr->isBundle()) { 3550b57cec5SDimitry Andric processBundle(); 3560b57cec5SDimitry Andric return; 3570b57cec5SDimitry Andric } 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr); 360349cc55cSDimitry Andric if (!NumWaitStates) { 361349cc55cSDimitry Andric CurrCycleInstr = nullptr; 362349cc55cSDimitry Andric return; 363349cc55cSDimitry Andric } 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric // Keep track of emitted instructions 3660b57cec5SDimitry Andric EmittedInstrs.push_front(CurrCycleInstr); 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric // Add a nullptr for each additional wait state after the first. Make sure 3690b57cec5SDimitry Andric // not to add more than getMaxLookAhead() items to the list, since we 3700b57cec5SDimitry Andric // truncate the list to that size right after this loop. 3710b57cec5SDimitry Andric for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); 3720b57cec5SDimitry Andric i < e; ++i) { 3730b57cec5SDimitry Andric EmittedInstrs.push_front(nullptr); 3740b57cec5SDimitry Andric } 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric // getMaxLookahead() is the largest number of wait states we will ever need 3770b57cec5SDimitry Andric // to insert, so there is no point in keeping track of more than that many 3780b57cec5SDimitry Andric // wait states. 3790b57cec5SDimitry Andric EmittedInstrs.resize(getMaxLookAhead()); 3800b57cec5SDimitry Andric 3810b57cec5SDimitry Andric CurrCycleInstr = nullptr; 3820b57cec5SDimitry Andric } 3830b57cec5SDimitry Andric 3840b57cec5SDimitry Andric void GCNHazardRecognizer::RecedeCycle() { 3850b57cec5SDimitry Andric llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); 3860b57cec5SDimitry Andric } 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 3890b57cec5SDimitry Andric // Helper Functions 3900b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 3910b57cec5SDimitry Andric 392fe6060f1SDimitry Andric typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn; 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric // Returns a minimum wait states since \p I walking all predecessors. 3950b57cec5SDimitry Andric // Only scans until \p IsExpired does not return true. 3960b57cec5SDimitry Andric // Can only be run in a hazard recognizer mode. 3970b57cec5SDimitry Andric static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, 398fe6060f1SDimitry Andric const MachineBasicBlock *MBB, 399fe6060f1SDimitry Andric MachineBasicBlock::const_reverse_instr_iterator I, 400fe6060f1SDimitry Andric int WaitStates, IsExpiredFn IsExpired, 4010b57cec5SDimitry Andric DenseSet<const MachineBasicBlock *> &Visited) { 4020b57cec5SDimitry Andric for (auto E = MBB->instr_rend(); I != E; ++I) { 4030b57cec5SDimitry Andric // Don't add WaitStates for parent BUNDLE instructions. 4040b57cec5SDimitry Andric if (I->isBundle()) 4050b57cec5SDimitry Andric continue; 4060b57cec5SDimitry Andric 407fe6060f1SDimitry Andric if (IsHazard(*I)) 4080b57cec5SDimitry Andric return WaitStates; 4090b57cec5SDimitry Andric 410349cc55cSDimitry Andric if (I->isInlineAsm()) 4110b57cec5SDimitry Andric continue; 4120b57cec5SDimitry Andric 4130b57cec5SDimitry Andric WaitStates += SIInstrInfo::getNumWaitStates(*I); 4140b57cec5SDimitry Andric 415fe6060f1SDimitry Andric if (IsExpired(*I, WaitStates)) 4160b57cec5SDimitry Andric return std::numeric_limits<int>::max(); 4170b57cec5SDimitry Andric } 4180b57cec5SDimitry Andric 419fe6060f1SDimitry Andric int MinWaitStates = std::numeric_limits<int>::max(); 4200b57cec5SDimitry Andric for (MachineBasicBlock *Pred : MBB->predecessors()) { 4210b57cec5SDimitry Andric if (!Visited.insert(Pred).second) 4220b57cec5SDimitry Andric continue; 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), 4250b57cec5SDimitry Andric WaitStates, IsExpired, Visited); 4260b57cec5SDimitry Andric 427fe6060f1SDimitry Andric MinWaitStates = std::min(MinWaitStates, W); 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric return MinWaitStates; 4310b57cec5SDimitry Andric } 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, 434fe6060f1SDimitry Andric const MachineInstr *MI, IsExpiredFn IsExpired) { 4350b57cec5SDimitry Andric DenseSet<const MachineBasicBlock *> Visited; 4360b57cec5SDimitry Andric return getWaitStatesSince(IsHazard, MI->getParent(), 4370b57cec5SDimitry Andric std::next(MI->getReverseIterator()), 4380b57cec5SDimitry Andric 0, IsExpired, Visited); 4390b57cec5SDimitry Andric } 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) { 4420b57cec5SDimitry Andric if (IsHazardRecognizerMode) { 443fe6060f1SDimitry Andric auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) { 4440b57cec5SDimitry Andric return WaitStates >= Limit; 4450b57cec5SDimitry Andric }; 4460b57cec5SDimitry Andric return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn); 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric int WaitStates = 0; 4500b57cec5SDimitry Andric for (MachineInstr *MI : EmittedInstrs) { 4510b57cec5SDimitry Andric if (MI) { 452fe6060f1SDimitry Andric if (IsHazard(*MI)) 4530b57cec5SDimitry Andric return WaitStates; 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric if (MI->isInlineAsm()) 4560b57cec5SDimitry Andric continue; 4570b57cec5SDimitry Andric } 4580b57cec5SDimitry Andric ++WaitStates; 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric if (WaitStates >= Limit) 4610b57cec5SDimitry Andric break; 4620b57cec5SDimitry Andric } 4630b57cec5SDimitry Andric return std::numeric_limits<int>::max(); 4640b57cec5SDimitry Andric } 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg, 4670b57cec5SDimitry Andric IsHazardFn IsHazardDef, 4680b57cec5SDimitry Andric int Limit) { 4690b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 4700b57cec5SDimitry Andric 471fe6060f1SDimitry Andric auto IsHazardFn = [IsHazardDef, TRI, Reg](const MachineInstr &MI) { 472fe6060f1SDimitry Andric return IsHazardDef(MI) && MI.modifiesRegister(Reg, TRI); 4730b57cec5SDimitry Andric }; 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric return getWaitStatesSince(IsHazardFn, Limit); 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard, 4790b57cec5SDimitry Andric int Limit) { 480fe6060f1SDimitry Andric auto IsHazardFn = [IsHazard](const MachineInstr &MI) { 481fe6060f1SDimitry Andric return isSSetReg(MI.getOpcode()) && IsHazard(MI); 4820b57cec5SDimitry Andric }; 4830b57cec5SDimitry Andric 4840b57cec5SDimitry Andric return getWaitStatesSince(IsHazardFn, Limit); 4850b57cec5SDimitry Andric } 4860b57cec5SDimitry Andric 4870b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 4880b57cec5SDimitry Andric // No-op Hazard Detection 4890b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 4900b57cec5SDimitry Andric 491e8d8bef9SDimitry Andric static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV, 492e8d8bef9SDimitry Andric MCRegister Reg) { 4930b57cec5SDimitry Andric for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) 4940b57cec5SDimitry Andric BV.set(*RUI); 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric static void addRegsToSet(const SIRegisterInfo &TRI, 4980b57cec5SDimitry Andric iterator_range<MachineInstr::const_mop_iterator> Ops, 4990b57cec5SDimitry Andric BitVector &Set) { 5000b57cec5SDimitry Andric for (const MachineOperand &Op : Ops) { 5010b57cec5SDimitry Andric if (Op.isReg()) 502e8d8bef9SDimitry Andric addRegUnits(TRI, Set, Op.getReg().asMCReg()); 5030b57cec5SDimitry Andric } 5040b57cec5SDimitry Andric } 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) { 5070b57cec5SDimitry Andric // XXX: Do we need to worry about implicit operands 5080b57cec5SDimitry Andric addRegsToSet(TRI, MI.defs(), ClauseDefs); 5090b57cec5SDimitry Andric addRegsToSet(TRI, MI.uses(), ClauseUses); 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric 5125ffd83dbSDimitry Andric static bool breaksSMEMSoftClause(MachineInstr *MI) { 5135ffd83dbSDimitry Andric return !SIInstrInfo::isSMRD(*MI); 5145ffd83dbSDimitry Andric } 5155ffd83dbSDimitry Andric 5165ffd83dbSDimitry Andric static bool breaksVMEMSoftClause(MachineInstr *MI) { 5175ffd83dbSDimitry Andric return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI); 5185ffd83dbSDimitry Andric } 5195ffd83dbSDimitry Andric 5200b57cec5SDimitry Andric int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { 5210b57cec5SDimitry Andric // SMEM soft clause are only present on VI+, and only matter if xnack is 5220b57cec5SDimitry Andric // enabled. 5230b57cec5SDimitry Andric if (!ST.isXNACKEnabled()) 5240b57cec5SDimitry Andric return 0; 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric bool IsSMRD = TII.isSMRD(*MEM); 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric resetClause(); 5290b57cec5SDimitry Andric 5300b57cec5SDimitry Andric // A soft-clause is any group of consecutive SMEM instructions. The 5310b57cec5SDimitry Andric // instructions in this group may return out of order and/or may be 5320b57cec5SDimitry Andric // replayed (i.e. the same instruction issued more than once). 5330b57cec5SDimitry Andric // 5340b57cec5SDimitry Andric // In order to handle these situations correctly we need to make sure that 5350b57cec5SDimitry Andric // when a clause has more than one instruction, no instruction in the clause 5360b57cec5SDimitry Andric // writes to a register that is read by another instruction in the clause 5370b57cec5SDimitry Andric // (including itself). If we encounter this situaion, we need to break the 5380b57cec5SDimitry Andric // clause by inserting a non SMEM instruction. 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric for (MachineInstr *MI : EmittedInstrs) { 5410b57cec5SDimitry Andric // When we hit a non-SMEM instruction then we have passed the start of the 5420b57cec5SDimitry Andric // clause and we can stop. 5430b57cec5SDimitry Andric if (!MI) 5440b57cec5SDimitry Andric break; 5450b57cec5SDimitry Andric 5465ffd83dbSDimitry Andric if (IsSMRD ? breaksSMEMSoftClause(MI) : breaksVMEMSoftClause(MI)) 5470b57cec5SDimitry Andric break; 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric addClauseInst(*MI); 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric if (ClauseDefs.none()) 5530b57cec5SDimitry Andric return 0; 5540b57cec5SDimitry Andric 5550b57cec5SDimitry Andric // We need to make sure not to put loads and stores in the same clause if they 5560b57cec5SDimitry Andric // use the same address. For now, just start a new clause whenever we see a 5570b57cec5SDimitry Andric // store. 5580b57cec5SDimitry Andric if (MEM->mayStore()) 5590b57cec5SDimitry Andric return 1; 5600b57cec5SDimitry Andric 5610b57cec5SDimitry Andric addClauseInst(*MEM); 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric // If the set of defs and uses intersect then we cannot add this instruction 5640b57cec5SDimitry Andric // to the clause, so we have a hazard. 5650b57cec5SDimitry Andric return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0; 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric 5680b57cec5SDimitry Andric int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { 5690b57cec5SDimitry Andric int WaitStatesNeeded = 0; 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric WaitStatesNeeded = checkSoftClauseHazards(SMRD); 5720b57cec5SDimitry Andric 5730b57cec5SDimitry Andric // This SMRD hazard only affects SI. 5740b57cec5SDimitry Andric if (!ST.hasSMRDReadVALUDefHazard()) 5750b57cec5SDimitry Andric return WaitStatesNeeded; 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric // A read of an SGPR by SMRD instruction requires 4 wait states when the 5780b57cec5SDimitry Andric // SGPR was written by a VALU instruction. 5790b57cec5SDimitry Andric int SmrdSgprWaitStates = 4; 580fe6060f1SDimitry Andric auto IsHazardDefFn = [this](const MachineInstr &MI) { 581fe6060f1SDimitry Andric return TII.isVALU(MI); 582fe6060f1SDimitry Andric }; 583fe6060f1SDimitry Andric auto IsBufferHazardDefFn = [this](const MachineInstr &MI) { 584fe6060f1SDimitry Andric return TII.isSALU(MI); 585fe6060f1SDimitry Andric }; 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric bool IsBufferSMRD = TII.isBufferSMRD(*SMRD); 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric for (const MachineOperand &Use : SMRD->uses()) { 5900b57cec5SDimitry Andric if (!Use.isReg()) 5910b57cec5SDimitry Andric continue; 5920b57cec5SDimitry Andric int WaitStatesNeededForUse = 5930b57cec5SDimitry Andric SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, 5940b57cec5SDimitry Andric SmrdSgprWaitStates); 5950b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andric // This fixes what appears to be undocumented hardware behavior in SI where 5980b57cec5SDimitry Andric // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor 5990b57cec5SDimitry Andric // needs some number of nops in between. We don't know how many we need, but 6000b57cec5SDimitry Andric // let's use 4. This wasn't discovered before probably because the only 6010b57cec5SDimitry Andric // case when this happens is when we expand a 64-bit pointer into a full 6020b57cec5SDimitry Andric // descriptor and use s_buffer_load_dword instead of s_load_dword, which was 6030b57cec5SDimitry Andric // probably never encountered in the closed-source land. 6040b57cec5SDimitry Andric if (IsBufferSMRD) { 6050b57cec5SDimitry Andric int WaitStatesNeededForUse = 6060b57cec5SDimitry Andric SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), 6070b57cec5SDimitry Andric IsBufferHazardDefFn, 6080b57cec5SDimitry Andric SmrdSgprWaitStates); 6090b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 6100b57cec5SDimitry Andric } 6110b57cec5SDimitry Andric } 6120b57cec5SDimitry Andric 6130b57cec5SDimitry Andric return WaitStatesNeeded; 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric 6160b57cec5SDimitry Andric int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { 6170b57cec5SDimitry Andric if (!ST.hasVMEMReadSGPRVALUDefHazard()) 6180b57cec5SDimitry Andric return 0; 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric int WaitStatesNeeded = checkSoftClauseHazards(VMEM); 6210b57cec5SDimitry Andric 6220b57cec5SDimitry Andric // A read of an SGPR by a VMEM instruction requires 5 wait states when the 6230b57cec5SDimitry Andric // SGPR was written by a VALU Instruction. 6240b57cec5SDimitry Andric const int VmemSgprWaitStates = 5; 625fe6060f1SDimitry Andric auto IsHazardDefFn = [this](const MachineInstr &MI) { 626fe6060f1SDimitry Andric return TII.isVALU(MI); 627fe6060f1SDimitry Andric }; 6280b57cec5SDimitry Andric for (const MachineOperand &Use : VMEM->uses()) { 629fe6060f1SDimitry Andric if (!Use.isReg() || TRI.isVectorRegister(MF.getRegInfo(), Use.getReg())) 6300b57cec5SDimitry Andric continue; 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric int WaitStatesNeededForUse = 6330b57cec5SDimitry Andric VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, 6340b57cec5SDimitry Andric VmemSgprWaitStates); 6350b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 6360b57cec5SDimitry Andric } 6370b57cec5SDimitry Andric return WaitStatesNeeded; 6380b57cec5SDimitry Andric } 6390b57cec5SDimitry Andric 6400b57cec5SDimitry Andric int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { 6410b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 6420b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric // Check for DPP VGPR read after VALU VGPR write and EXEC write. 6450b57cec5SDimitry Andric int DppVgprWaitStates = 2; 6460b57cec5SDimitry Andric int DppExecWaitStates = 5; 6470b57cec5SDimitry Andric int WaitStatesNeeded = 0; 648fe6060f1SDimitry Andric auto IsHazardDefFn = [TII](const MachineInstr &MI) { 649fe6060f1SDimitry Andric return TII->isVALU(MI); 650fe6060f1SDimitry Andric }; 6510b57cec5SDimitry Andric 6520b57cec5SDimitry Andric for (const MachineOperand &Use : DPP->uses()) { 6530b57cec5SDimitry Andric if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 6540b57cec5SDimitry Andric continue; 6550b57cec5SDimitry Andric int WaitStatesNeededForUse = 656fe6060f1SDimitry Andric DppVgprWaitStates - getWaitStatesSinceDef( 657fe6060f1SDimitry Andric Use.getReg(), 658fe6060f1SDimitry Andric [](const MachineInstr &) { return true; }, 6590b57cec5SDimitry Andric DppVgprWaitStates); 6600b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 6610b57cec5SDimitry Andric } 6620b57cec5SDimitry Andric 6630b57cec5SDimitry Andric WaitStatesNeeded = std::max( 6640b57cec5SDimitry Andric WaitStatesNeeded, 6650b57cec5SDimitry Andric DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn, 6660b57cec5SDimitry Andric DppExecWaitStates)); 6670b57cec5SDimitry Andric 6680b57cec5SDimitry Andric return WaitStatesNeeded; 6690b57cec5SDimitry Andric } 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { 6720b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 6730b57cec5SDimitry Andric 6740b57cec5SDimitry Andric // v_div_fmas requires 4 wait states after a write to vcc from a VALU 6750b57cec5SDimitry Andric // instruction. 6760b57cec5SDimitry Andric const int DivFMasWaitStates = 4; 677fe6060f1SDimitry Andric auto IsHazardDefFn = [TII](const MachineInstr &MI) { 678fe6060f1SDimitry Andric return TII->isVALU(MI); 679fe6060f1SDimitry Andric }; 6800b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn, 6810b57cec5SDimitry Andric DivFMasWaitStates); 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric return DivFMasWaitStates - WaitStatesNeeded; 6840b57cec5SDimitry Andric } 6850b57cec5SDimitry Andric 6860b57cec5SDimitry Andric int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) { 6870b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 6880b57cec5SDimitry Andric unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr); 6890b57cec5SDimitry Andric 6900b57cec5SDimitry Andric const int GetRegWaitStates = 2; 691fe6060f1SDimitry Andric auto IsHazardFn = [TII, GetRegHWReg](const MachineInstr &MI) { 692fe6060f1SDimitry Andric return GetRegHWReg == getHWReg(TII, MI); 6930b57cec5SDimitry Andric }; 6940b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates); 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric return GetRegWaitStates - WaitStatesNeeded; 6970b57cec5SDimitry Andric } 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) { 7000b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 7010b57cec5SDimitry Andric unsigned HWReg = getHWReg(TII, *SetRegInstr); 7020b57cec5SDimitry Andric 7030b57cec5SDimitry Andric const int SetRegWaitStates = ST.getSetRegWaitStates(); 704fe6060f1SDimitry Andric auto IsHazardFn = [TII, HWReg](const MachineInstr &MI) { 705fe6060f1SDimitry Andric return HWReg == getHWReg(TII, MI); 7060b57cec5SDimitry Andric }; 7070b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates); 7080b57cec5SDimitry Andric return SetRegWaitStates - WaitStatesNeeded; 7090b57cec5SDimitry Andric } 7100b57cec5SDimitry Andric 7110b57cec5SDimitry Andric int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) { 7120b57cec5SDimitry Andric if (!MI.mayStore()) 7130b57cec5SDimitry Andric return -1; 7140b57cec5SDimitry Andric 7150b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 7160b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode(); 7170b57cec5SDimitry Andric const MCInstrDesc &Desc = MI.getDesc(); 7180b57cec5SDimitry Andric 7190b57cec5SDimitry Andric int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 7200b57cec5SDimitry Andric int VDataRCID = -1; 7210b57cec5SDimitry Andric if (VDataIdx != -1) 7220b57cec5SDimitry Andric VDataRCID = Desc.OpInfo[VDataIdx].RegClass; 7230b57cec5SDimitry Andric 7240b57cec5SDimitry Andric if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) { 7250b57cec5SDimitry Andric // There is no hazard if the instruction does not use vector regs 7260b57cec5SDimitry Andric // (like wbinvl1) 7270b57cec5SDimitry Andric if (VDataIdx == -1) 7280b57cec5SDimitry Andric return -1; 7290b57cec5SDimitry Andric // For MUBUF/MTBUF instructions this hazard only exists if the 7300b57cec5SDimitry Andric // instruction is not using a register in the soffset field. 7310b57cec5SDimitry Andric const MachineOperand *SOffset = 7320b57cec5SDimitry Andric TII->getNamedOperand(MI, AMDGPU::OpName::soffset); 7330b57cec5SDimitry Andric // If we have no soffset operand, then assume this field has been 7340b57cec5SDimitry Andric // hardcoded to zero. 7350b57cec5SDimitry Andric if (AMDGPU::getRegBitWidth(VDataRCID) > 64 && 7360b57cec5SDimitry Andric (!SOffset || !SOffset->isReg())) 7370b57cec5SDimitry Andric return VDataIdx; 7380b57cec5SDimitry Andric } 7390b57cec5SDimitry Andric 7400b57cec5SDimitry Andric // MIMG instructions create a hazard if they don't use a 256-bit T# and 7410b57cec5SDimitry Andric // the store size is greater than 8 bytes and they have more than two bits 7420b57cec5SDimitry Andric // of their dmask set. 7430b57cec5SDimitry Andric // All our MIMG definitions use a 256-bit T#, so we can skip checking for them. 7440b57cec5SDimitry Andric if (TII->isMIMG(MI)) { 7450b57cec5SDimitry Andric int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); 7460b57cec5SDimitry Andric assert(SRsrcIdx != -1 && 7470b57cec5SDimitry Andric AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256); 7480b57cec5SDimitry Andric (void)SRsrcIdx; 7490b57cec5SDimitry Andric } 7500b57cec5SDimitry Andric 7510b57cec5SDimitry Andric if (TII->isFLAT(MI)) { 7520b57cec5SDimitry Andric int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 7530b57cec5SDimitry Andric if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64) 7540b57cec5SDimitry Andric return DataIdx; 7550b57cec5SDimitry Andric } 7560b57cec5SDimitry Andric 7570b57cec5SDimitry Andric return -1; 7580b57cec5SDimitry Andric } 7590b57cec5SDimitry Andric 760e8d8bef9SDimitry Andric int 761e8d8bef9SDimitry Andric GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, 7620b57cec5SDimitry Andric const MachineRegisterInfo &MRI) { 7630b57cec5SDimitry Andric // Helper to check for the hazard where VMEM instructions that store more than 7640b57cec5SDimitry Andric // 8 bytes can have there store data over written by the next instruction. 7650b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 7660b57cec5SDimitry Andric 7670b57cec5SDimitry Andric const int VALUWaitStates = 1; 7680b57cec5SDimitry Andric int WaitStatesNeeded = 0; 7690b57cec5SDimitry Andric 770fe6060f1SDimitry Andric if (!TRI->isVectorRegister(MRI, Def.getReg())) 7710b57cec5SDimitry Andric return WaitStatesNeeded; 7728bcb0991SDimitry Andric Register Reg = Def.getReg(); 773fe6060f1SDimitry Andric auto IsHazardFn = [this, Reg, TRI](const MachineInstr &MI) { 774fe6060f1SDimitry Andric int DataIdx = createsVALUHazard(MI); 7750b57cec5SDimitry Andric return DataIdx >= 0 && 776fe6060f1SDimitry Andric TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg); 7770b57cec5SDimitry Andric }; 7780b57cec5SDimitry Andric int WaitStatesNeededForDef = 7790b57cec5SDimitry Andric VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates); 7800b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 7810b57cec5SDimitry Andric 7820b57cec5SDimitry Andric return WaitStatesNeeded; 7830b57cec5SDimitry Andric } 7840b57cec5SDimitry Andric 7850b57cec5SDimitry Andric int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { 7860b57cec5SDimitry Andric // This checks for the hazard where VMEM instructions that store more than 7870b57cec5SDimitry Andric // 8 bytes can have there store data over written by the next instruction. 7880b57cec5SDimitry Andric if (!ST.has12DWordStoreHazard()) 7890b57cec5SDimitry Andric return 0; 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 7920b57cec5SDimitry Andric int WaitStatesNeeded = 0; 7930b57cec5SDimitry Andric 7940b57cec5SDimitry Andric for (const MachineOperand &Def : VALU->defs()) { 7950b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI)); 7960b57cec5SDimitry Andric } 7970b57cec5SDimitry Andric 7980b57cec5SDimitry Andric return WaitStatesNeeded; 7990b57cec5SDimitry Andric } 8000b57cec5SDimitry Andric 8010b57cec5SDimitry Andric int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) { 8020b57cec5SDimitry Andric // This checks for hazards associated with inline asm statements. 8030b57cec5SDimitry Andric // Since inline asms can contain just about anything, we use this 8040b57cec5SDimitry Andric // to call/leverage other check*Hazard routines. Note that 8050b57cec5SDimitry Andric // this function doesn't attempt to address all possible inline asm 8060b57cec5SDimitry Andric // hazards (good luck), but is a collection of what has been 8070b57cec5SDimitry Andric // problematic thus far. 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andric // see checkVALUHazards() 8100b57cec5SDimitry Andric if (!ST.has12DWordStoreHazard()) 8110b57cec5SDimitry Andric return 0; 8120b57cec5SDimitry Andric 8130b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 8140b57cec5SDimitry Andric int WaitStatesNeeded = 0; 8150b57cec5SDimitry Andric 8160b57cec5SDimitry Andric for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands(); 8170b57cec5SDimitry Andric I != E; ++I) { 8180b57cec5SDimitry Andric const MachineOperand &Op = IA->getOperand(I); 8190b57cec5SDimitry Andric if (Op.isReg() && Op.isDef()) { 8200b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI)); 8210b57cec5SDimitry Andric } 8220b57cec5SDimitry Andric } 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric return WaitStatesNeeded; 8250b57cec5SDimitry Andric } 8260b57cec5SDimitry Andric 8270b57cec5SDimitry Andric int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { 8280b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 8290b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 8300b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 8310b57cec5SDimitry Andric 8320b57cec5SDimitry Andric const MachineOperand *LaneSelectOp = 8330b57cec5SDimitry Andric TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1); 8340b57cec5SDimitry Andric 8350b57cec5SDimitry Andric if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg())) 8360b57cec5SDimitry Andric return 0; 8370b57cec5SDimitry Andric 8388bcb0991SDimitry Andric Register LaneSelectReg = LaneSelectOp->getReg(); 839fe6060f1SDimitry Andric auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVALU(MI); }; 8400b57cec5SDimitry Andric 8410b57cec5SDimitry Andric const int RWLaneWaitStates = 4; 8420b57cec5SDimitry Andric int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn, 8430b57cec5SDimitry Andric RWLaneWaitStates); 8440b57cec5SDimitry Andric return RWLaneWaitStates - WaitStatesSince; 8450b57cec5SDimitry Andric } 8460b57cec5SDimitry Andric 8470b57cec5SDimitry Andric int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) { 8480b57cec5SDimitry Andric if (!ST.hasRFEHazards()) 8490b57cec5SDimitry Andric return 0; 8500b57cec5SDimitry Andric 8510b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 8520b57cec5SDimitry Andric 8530b57cec5SDimitry Andric const int RFEWaitStates = 1; 8540b57cec5SDimitry Andric 855fe6060f1SDimitry Andric auto IsHazardFn = [TII](const MachineInstr &MI) { 856fe6060f1SDimitry Andric return getHWReg(TII, MI) == AMDGPU::Hwreg::ID_TRAPSTS; 8570b57cec5SDimitry Andric }; 8580b57cec5SDimitry Andric int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates); 8590b57cec5SDimitry Andric return RFEWaitStates - WaitStatesNeeded; 8600b57cec5SDimitry Andric } 8610b57cec5SDimitry Andric 8620b57cec5SDimitry Andric int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { 8630b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 8640b57cec5SDimitry Andric const int SMovRelWaitStates = 1; 865fe6060f1SDimitry Andric auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); }; 8660b57cec5SDimitry Andric return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, 8670b57cec5SDimitry Andric SMovRelWaitStates); 8680b57cec5SDimitry Andric } 8690b57cec5SDimitry Andric 8700b57cec5SDimitry Andric void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { 8710b57cec5SDimitry Andric fixVMEMtoScalarWriteHazards(MI); 8720b57cec5SDimitry Andric fixVcmpxPermlaneHazards(MI); 8730b57cec5SDimitry Andric fixSMEMtoVectorWriteHazards(MI); 8740b57cec5SDimitry Andric fixVcmpxExecWARHazard(MI); 8750b57cec5SDimitry Andric fixLdsBranchVmemWARHazard(MI); 8760b57cec5SDimitry Andric } 8770b57cec5SDimitry Andric 8780b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { 8790b57cec5SDimitry Andric if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI)) 8800b57cec5SDimitry Andric return false; 8810b57cec5SDimitry Andric 8820b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 883fe6060f1SDimitry Andric auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVOPC(MI); }; 8840b57cec5SDimitry Andric 885fe6060f1SDimitry Andric auto IsExpiredFn = [](const MachineInstr &MI, int) { 886fe6060f1SDimitry Andric unsigned Opc = MI.getOpcode(); 887fe6060f1SDimitry Andric return SIInstrInfo::isVALU(MI) && Opc != AMDGPU::V_NOP_e32 && 888fe6060f1SDimitry Andric Opc != AMDGPU::V_NOP_e64 && Opc != AMDGPU::V_NOP_sdwa; 8890b57cec5SDimitry Andric }; 8900b57cec5SDimitry Andric 8910b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 8920b57cec5SDimitry Andric std::numeric_limits<int>::max()) 8930b57cec5SDimitry Andric return false; 8940b57cec5SDimitry Andric 8950b57cec5SDimitry Andric // V_NOP will be discarded by SQ. 8960b57cec5SDimitry Andric // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE* 8970b57cec5SDimitry Andric // which is always a VGPR and available. 8980b57cec5SDimitry Andric auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); 8998bcb0991SDimitry Andric Register Reg = Src0->getReg(); 9000b57cec5SDimitry Andric bool IsUndef = Src0->isUndef(); 9010b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 9020b57cec5SDimitry Andric TII->get(AMDGPU::V_MOV_B32_e32)) 9030b57cec5SDimitry Andric .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0)) 9040b57cec5SDimitry Andric .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill); 9050b57cec5SDimitry Andric 9060b57cec5SDimitry Andric return true; 9070b57cec5SDimitry Andric } 9080b57cec5SDimitry Andric 9090b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { 9100b57cec5SDimitry Andric if (!ST.hasVMEMtoScalarWriteHazard()) 9110b57cec5SDimitry Andric return false; 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI)) 9140b57cec5SDimitry Andric return false; 9150b57cec5SDimitry Andric 9160b57cec5SDimitry Andric if (MI->getNumDefs() == 0) 9170b57cec5SDimitry Andric return false; 9180b57cec5SDimitry Andric 9190b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 9200b57cec5SDimitry Andric 921fe6060f1SDimitry Andric auto IsHazardFn = [TRI, MI](const MachineInstr &I) { 922fe6060f1SDimitry Andric if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I) && 923fe6060f1SDimitry Andric !SIInstrInfo::isFLAT(I)) 9240b57cec5SDimitry Andric return false; 9250b57cec5SDimitry Andric 9260b57cec5SDimitry Andric for (const MachineOperand &Def : MI->defs()) { 927fe6060f1SDimitry Andric const MachineOperand *Op = 928fe6060f1SDimitry Andric I.findRegisterUseOperand(Def.getReg(), false, TRI); 9290b57cec5SDimitry Andric if (!Op) 9300b57cec5SDimitry Andric continue; 9310b57cec5SDimitry Andric return true; 9320b57cec5SDimitry Andric } 9330b57cec5SDimitry Andric return false; 9340b57cec5SDimitry Andric }; 9350b57cec5SDimitry Andric 936fe6060f1SDimitry Andric auto IsExpiredFn = [](const MachineInstr &MI, int) { 937fe6060f1SDimitry Andric return SIInstrInfo::isVALU(MI) || 938fe6060f1SDimitry Andric (MI.getOpcode() == AMDGPU::S_WAITCNT && 939fe6060f1SDimitry Andric !MI.getOperand(0).getImm()) || 940fe6060f1SDimitry Andric (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && 941fe6060f1SDimitry Andric MI.getOperand(0).getImm() == 0xffe3); 9420b57cec5SDimitry Andric }; 9430b57cec5SDimitry Andric 9440b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 9450b57cec5SDimitry Andric std::numeric_limits<int>::max()) 9460b57cec5SDimitry Andric return false; 9470b57cec5SDimitry Andric 9480b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 949e8d8bef9SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 950e8d8bef9SDimitry Andric TII->get(AMDGPU::S_WAITCNT_DEPCTR)) 951e8d8bef9SDimitry Andric .addImm(0xffe3); 9520b57cec5SDimitry Andric return true; 9530b57cec5SDimitry Andric } 9540b57cec5SDimitry Andric 9550b57cec5SDimitry Andric bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { 9560b57cec5SDimitry Andric if (!ST.hasSMEMtoVectorWriteHazard()) 9570b57cec5SDimitry Andric return false; 9580b57cec5SDimitry Andric 9590b57cec5SDimitry Andric if (!SIInstrInfo::isVALU(*MI)) 9600b57cec5SDimitry Andric return false; 9610b57cec5SDimitry Andric 9620b57cec5SDimitry Andric unsigned SDSTName; 9630b57cec5SDimitry Andric switch (MI->getOpcode()) { 9640b57cec5SDimitry Andric case AMDGPU::V_READLANE_B32: 9650b57cec5SDimitry Andric case AMDGPU::V_READFIRSTLANE_B32: 9660b57cec5SDimitry Andric SDSTName = AMDGPU::OpName::vdst; 9670b57cec5SDimitry Andric break; 9680b57cec5SDimitry Andric default: 9690b57cec5SDimitry Andric SDSTName = AMDGPU::OpName::sdst; 9700b57cec5SDimitry Andric break; 9710b57cec5SDimitry Andric } 9720b57cec5SDimitry Andric 9730b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 9740b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 9750b57cec5SDimitry Andric const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU()); 9760b57cec5SDimitry Andric const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName); 9770b57cec5SDimitry Andric if (!SDST) { 9780b57cec5SDimitry Andric for (const auto &MO : MI->implicit_operands()) { 9790b57cec5SDimitry Andric if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) { 9800b57cec5SDimitry Andric SDST = &MO; 9810b57cec5SDimitry Andric break; 9820b57cec5SDimitry Andric } 9830b57cec5SDimitry Andric } 9840b57cec5SDimitry Andric } 9850b57cec5SDimitry Andric 9860b57cec5SDimitry Andric if (!SDST) 9870b57cec5SDimitry Andric return false; 9880b57cec5SDimitry Andric 9898bcb0991SDimitry Andric const Register SDSTReg = SDST->getReg(); 990fe6060f1SDimitry Andric auto IsHazardFn = [SDSTReg, TRI](const MachineInstr &I) { 991fe6060f1SDimitry Andric return SIInstrInfo::isSMRD(I) && I.readsRegister(SDSTReg, TRI); 9920b57cec5SDimitry Andric }; 9930b57cec5SDimitry Andric 994fe6060f1SDimitry Andric auto IsExpiredFn = [TII, IV](const MachineInstr &MI, int) { 995fe6060f1SDimitry Andric if (TII->isSALU(MI)) { 996fe6060f1SDimitry Andric switch (MI.getOpcode()) { 9970b57cec5SDimitry Andric case AMDGPU::S_SETVSKIP: 9980b57cec5SDimitry Andric case AMDGPU::S_VERSION: 9990b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VSCNT: 10000b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VMCNT: 10010b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT: 10020b57cec5SDimitry Andric // These instructions cannot not mitigate the hazard. 10030b57cec5SDimitry Andric return false; 10040b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT: 10050b57cec5SDimitry Andric // Reducing lgkmcnt count to 0 always mitigates the hazard. 1006fe6060f1SDimitry Andric return (MI.getOperand(1).getImm() == 0) && 1007fe6060f1SDimitry Andric (MI.getOperand(0).getReg() == AMDGPU::SGPR_NULL); 10080b57cec5SDimitry Andric case AMDGPU::S_WAITCNT: { 1009fe6060f1SDimitry Andric const int64_t Imm = MI.getOperand(0).getImm(); 10100b57cec5SDimitry Andric AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm); 10110b57cec5SDimitry Andric return (Decoded.LgkmCnt == 0); 10120b57cec5SDimitry Andric } 10130b57cec5SDimitry Andric default: 10140b57cec5SDimitry Andric // SOPP instructions cannot mitigate the hazard. 1015fe6060f1SDimitry Andric if (TII->isSOPP(MI)) 10160b57cec5SDimitry Andric return false; 10170b57cec5SDimitry Andric // At this point the SALU can be assumed to mitigate the hazard 10180b57cec5SDimitry Andric // because either: 10190b57cec5SDimitry Andric // (a) it is independent of the at risk SMEM (breaking chain), 10200b57cec5SDimitry Andric // or 10210b57cec5SDimitry Andric // (b) it is dependent on the SMEM, in which case an appropriate 10220b57cec5SDimitry Andric // s_waitcnt lgkmcnt _must_ exist between it and the at risk 10230b57cec5SDimitry Andric // SMEM instruction. 10240b57cec5SDimitry Andric return true; 10250b57cec5SDimitry Andric } 10260b57cec5SDimitry Andric } 10270b57cec5SDimitry Andric return false; 10280b57cec5SDimitry Andric }; 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 10310b57cec5SDimitry Andric std::numeric_limits<int>::max()) 10320b57cec5SDimitry Andric return false; 10330b57cec5SDimitry Andric 10340b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 10350b57cec5SDimitry Andric TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL) 10360b57cec5SDimitry Andric .addImm(0); 10370b57cec5SDimitry Andric return true; 10380b57cec5SDimitry Andric } 10390b57cec5SDimitry Andric 10400b57cec5SDimitry Andric bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) { 10410b57cec5SDimitry Andric if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI)) 10420b57cec5SDimitry Andric return false; 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 10450b57cec5SDimitry Andric if (!MI->modifiesRegister(AMDGPU::EXEC, TRI)) 10460b57cec5SDimitry Andric return false; 10470b57cec5SDimitry Andric 1048fe6060f1SDimitry Andric auto IsHazardFn = [TRI](const MachineInstr &I) { 1049fe6060f1SDimitry Andric if (SIInstrInfo::isVALU(I)) 10500b57cec5SDimitry Andric return false; 1051fe6060f1SDimitry Andric return I.readsRegister(AMDGPU::EXEC, TRI); 10520b57cec5SDimitry Andric }; 10530b57cec5SDimitry Andric 10540b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 1055fe6060f1SDimitry Andric auto IsExpiredFn = [TII, TRI](const MachineInstr &MI, int) { 1056fe6060f1SDimitry Andric if (SIInstrInfo::isVALU(MI)) { 1057fe6060f1SDimitry Andric if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) 10580b57cec5SDimitry Andric return true; 1059fe6060f1SDimitry Andric for (auto MO : MI.implicit_operands()) 10600b57cec5SDimitry Andric if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) 10610b57cec5SDimitry Andric return true; 10620b57cec5SDimitry Andric } 1063fe6060f1SDimitry Andric if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && 1064fe6060f1SDimitry Andric (MI.getOperand(0).getImm() & 0xfffe) == 0xfffe) 10650b57cec5SDimitry Andric return true; 10660b57cec5SDimitry Andric return false; 10670b57cec5SDimitry Andric }; 10680b57cec5SDimitry Andric 10690b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 10700b57cec5SDimitry Andric std::numeric_limits<int>::max()) 10710b57cec5SDimitry Andric return false; 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 10740b57cec5SDimitry Andric TII->get(AMDGPU::S_WAITCNT_DEPCTR)) 10750b57cec5SDimitry Andric .addImm(0xfffe); 10760b57cec5SDimitry Andric return true; 10770b57cec5SDimitry Andric } 10780b57cec5SDimitry Andric 1079fe6060f1SDimitry Andric static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, 1080fe6060f1SDimitry Andric const GCNSubtarget &ST) { 10810b57cec5SDimitry Andric if (!ST.hasLdsBranchVmemWARHazard()) 10820b57cec5SDimitry Andric return false; 10830b57cec5SDimitry Andric 1084fe6060f1SDimitry Andric // Check if the necessary condition for the hazard is met: both LDS and VMEM 1085fe6060f1SDimitry Andric // instructions need to appear in the same function. 1086fe6060f1SDimitry Andric bool HasLds = false; 1087fe6060f1SDimitry Andric bool HasVmem = false; 1088fe6060f1SDimitry Andric for (auto &MBB : MF) { 1089fe6060f1SDimitry Andric for (auto &MI : MBB) { 1090fe6060f1SDimitry Andric HasLds |= SIInstrInfo::isDS(MI); 1091fe6060f1SDimitry Andric HasVmem |= 1092fe6060f1SDimitry Andric SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI); 1093fe6060f1SDimitry Andric if (HasLds && HasVmem) 1094fe6060f1SDimitry Andric return true; 1095fe6060f1SDimitry Andric } 1096fe6060f1SDimitry Andric } 1097fe6060f1SDimitry Andric return false; 1098fe6060f1SDimitry Andric } 1099fe6060f1SDimitry Andric 1100fe6060f1SDimitry Andric bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { 1101fe6060f1SDimitry Andric if (!RunLdsBranchVmemWARHazardFixup) 1102fe6060f1SDimitry Andric return false; 1103fe6060f1SDimitry Andric 1104fe6060f1SDimitry Andric assert(ST.hasLdsBranchVmemWARHazard()); 1105fe6060f1SDimitry Andric 1106fe6060f1SDimitry Andric auto IsHazardInst = [](const MachineInstr &MI) { 1107fe6060f1SDimitry Andric if (SIInstrInfo::isDS(MI)) 11080b57cec5SDimitry Andric return 1; 1109fe6060f1SDimitry Andric if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) 11100b57cec5SDimitry Andric return 2; 11110b57cec5SDimitry Andric return 0; 11120b57cec5SDimitry Andric }; 11130b57cec5SDimitry Andric 1114fe6060f1SDimitry Andric auto InstType = IsHazardInst(*MI); 11150b57cec5SDimitry Andric if (!InstType) 11160b57cec5SDimitry Andric return false; 11170b57cec5SDimitry Andric 1118fe6060f1SDimitry Andric auto IsExpiredFn = [&IsHazardInst](const MachineInstr &I, int) { 1119fe6060f1SDimitry Andric return IsHazardInst(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT && 1120fe6060f1SDimitry Andric I.getOperand(0).getReg() == AMDGPU::SGPR_NULL && 1121fe6060f1SDimitry Andric !I.getOperand(1).getImm()); 11220b57cec5SDimitry Andric }; 11230b57cec5SDimitry Andric 1124fe6060f1SDimitry Andric auto IsHazardFn = [InstType, &IsHazardInst](const MachineInstr &I) { 1125fe6060f1SDimitry Andric if (!I.isBranch()) 11260b57cec5SDimitry Andric return false; 11270b57cec5SDimitry Andric 1128fe6060f1SDimitry Andric auto IsHazardFn = [InstType, IsHazardInst](const MachineInstr &I) { 11290b57cec5SDimitry Andric auto InstType2 = IsHazardInst(I); 11300b57cec5SDimitry Andric return InstType2 && InstType != InstType2; 11310b57cec5SDimitry Andric }; 11320b57cec5SDimitry Andric 1133fe6060f1SDimitry Andric auto IsExpiredFn = [InstType, &IsHazardInst](const MachineInstr &I, int) { 11340b57cec5SDimitry Andric auto InstType2 = IsHazardInst(I); 11350b57cec5SDimitry Andric if (InstType == InstType2) 11360b57cec5SDimitry Andric return true; 11370b57cec5SDimitry Andric 1138fe6060f1SDimitry Andric return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT && 1139fe6060f1SDimitry Andric I.getOperand(0).getReg() == AMDGPU::SGPR_NULL && 1140fe6060f1SDimitry Andric !I.getOperand(1).getImm(); 11410b57cec5SDimitry Andric }; 11420b57cec5SDimitry Andric 1143fe6060f1SDimitry Andric return ::getWaitStatesSince(IsHazardFn, &I, IsExpiredFn) != 11440b57cec5SDimitry Andric std::numeric_limits<int>::max(); 11450b57cec5SDimitry Andric }; 11460b57cec5SDimitry Andric 11470b57cec5SDimitry Andric if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 11480b57cec5SDimitry Andric std::numeric_limits<int>::max()) 11490b57cec5SDimitry Andric return false; 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 11520b57cec5SDimitry Andric BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 11530b57cec5SDimitry Andric TII->get(AMDGPU::S_WAITCNT_VSCNT)) 11540b57cec5SDimitry Andric .addReg(AMDGPU::SGPR_NULL, RegState::Undef) 11550b57cec5SDimitry Andric .addImm(0); 11560b57cec5SDimitry Andric 11570b57cec5SDimitry Andric return true; 11580b57cec5SDimitry Andric } 11590b57cec5SDimitry Andric 11600b57cec5SDimitry Andric int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) { 11610b57cec5SDimitry Andric int NSAtoVMEMWaitStates = 1; 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric if (!ST.hasNSAtoVMEMBug()) 11640b57cec5SDimitry Andric return 0; 11650b57cec5SDimitry Andric 11660b57cec5SDimitry Andric if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI)) 11670b57cec5SDimitry Andric return 0; 11680b57cec5SDimitry Andric 11690b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 11700b57cec5SDimitry Andric const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset); 11710b57cec5SDimitry Andric if (!Offset || (Offset->getImm() & 6) == 0) 11720b57cec5SDimitry Andric return 0; 11730b57cec5SDimitry Andric 1174fe6060f1SDimitry Andric auto IsHazardFn = [TII](const MachineInstr &I) { 1175fe6060f1SDimitry Andric if (!SIInstrInfo::isMIMG(I)) 11760b57cec5SDimitry Andric return false; 1177fe6060f1SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I.getOpcode()); 11780b57cec5SDimitry Andric return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA && 1179fe6060f1SDimitry Andric TII->getInstSizeInBytes(I) >= 16; 11800b57cec5SDimitry Andric }; 11810b57cec5SDimitry Andric 11820b57cec5SDimitry Andric return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1); 11830b57cec5SDimitry Andric } 11840b57cec5SDimitry Andric 11850b57cec5SDimitry Andric int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) { 11860b57cec5SDimitry Andric int FPAtomicToDenormModeWaitStates = 3; 11870b57cec5SDimitry Andric 11880b57cec5SDimitry Andric if (MI->getOpcode() != AMDGPU::S_DENORM_MODE) 11890b57cec5SDimitry Andric return 0; 11900b57cec5SDimitry Andric 1191fe6060f1SDimitry Andric auto IsHazardFn = [](const MachineInstr &I) { 1192fe6060f1SDimitry Andric if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I)) 11930b57cec5SDimitry Andric return false; 1194fe6060f1SDimitry Andric return SIInstrInfo::isFPAtomic(I); 11950b57cec5SDimitry Andric }; 11960b57cec5SDimitry Andric 1197fe6060f1SDimitry Andric auto IsExpiredFn = [](const MachineInstr &MI, int WaitStates) { 1198fe6060f1SDimitry Andric if (WaitStates >= 3 || SIInstrInfo::isVALU(MI)) 11990b57cec5SDimitry Andric return true; 12000b57cec5SDimitry Andric 1201fe6060f1SDimitry Andric switch (MI.getOpcode()) { 12020b57cec5SDimitry Andric case AMDGPU::S_WAITCNT: 12030b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VSCNT: 12040b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_VMCNT: 12050b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT: 12060b57cec5SDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT: 1207e8d8bef9SDimitry Andric case AMDGPU::S_WAIT_IDLE: 12080b57cec5SDimitry Andric return true; 12090b57cec5SDimitry Andric default: 12100b57cec5SDimitry Andric break; 12110b57cec5SDimitry Andric } 12120b57cec5SDimitry Andric 12130b57cec5SDimitry Andric return false; 12140b57cec5SDimitry Andric }; 12150b57cec5SDimitry Andric 12160b57cec5SDimitry Andric return FPAtomicToDenormModeWaitStates - 12170b57cec5SDimitry Andric ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn); 12180b57cec5SDimitry Andric } 12190b57cec5SDimitry Andric 12200b57cec5SDimitry Andric int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { 12210b57cec5SDimitry Andric assert(SIInstrInfo::isMAI(*MI)); 12220b57cec5SDimitry Andric 1223fe6060f1SDimitry Andric return ST.hasGFX90AInsts() ? checkMAIHazards90A(MI) : checkMAIHazards908(MI); 1224fe6060f1SDimitry Andric } 1225fe6060f1SDimitry Andric 1226fe6060f1SDimitry Andric int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { 12270b57cec5SDimitry Andric int WaitStatesNeeded = 0; 12280b57cec5SDimitry Andric unsigned Opc = MI->getOpcode(); 12290b57cec5SDimitry Andric 1230fe6060f1SDimitry Andric auto IsVALUFn = [](const MachineInstr &MI) { 1231fe6060f1SDimitry Andric return SIInstrInfo::isVALU(MI); 12320b57cec5SDimitry Andric }; 12330b57cec5SDimitry Andric 1234e8d8bef9SDimitry Andric if (Opc != AMDGPU::V_ACCVGPR_READ_B32_e64) { // MFMA or v_accvgpr_write 12350b57cec5SDimitry Andric const int LegacyVALUWritesVGPRWaitStates = 2; 12360b57cec5SDimitry Andric const int VALUWritesExecWaitStates = 4; 12370b57cec5SDimitry Andric const int MaxWaitStates = 4; 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric int WaitStatesNeededForUse = VALUWritesExecWaitStates - 12400b57cec5SDimitry Andric getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates); 12410b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric if (WaitStatesNeeded < MaxWaitStates) { 12440b57cec5SDimitry Andric for (const MachineOperand &Use : MI->explicit_uses()) { 12450b57cec5SDimitry Andric const int MaxWaitStates = 2; 12460b57cec5SDimitry Andric 12470b57cec5SDimitry Andric if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 12480b57cec5SDimitry Andric continue; 12490b57cec5SDimitry Andric 12500b57cec5SDimitry Andric int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates - 12510b57cec5SDimitry Andric getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates); 12520b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 12530b57cec5SDimitry Andric 12540b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 12550b57cec5SDimitry Andric break; 12560b57cec5SDimitry Andric } 12570b57cec5SDimitry Andric } 12580b57cec5SDimitry Andric } 12590b57cec5SDimitry Andric 1260fe6060f1SDimitry Andric auto IsMFMAFn = [](const MachineInstr &MI) { 1261fe6060f1SDimitry Andric return SIInstrInfo::isMAI(MI) && 1262fe6060f1SDimitry Andric MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 1263fe6060f1SDimitry Andric MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; 12640b57cec5SDimitry Andric }; 12650b57cec5SDimitry Andric 12660b57cec5SDimitry Andric for (const MachineOperand &Op : MI->explicit_operands()) { 12670b57cec5SDimitry Andric if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg())) 12680b57cec5SDimitry Andric continue; 12690b57cec5SDimitry Andric 1270e8d8bef9SDimitry Andric if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32_e64) 12710b57cec5SDimitry Andric continue; 12720b57cec5SDimitry Andric 12730b57cec5SDimitry Andric const int MFMAWritesAGPROverlappedSrcABWaitStates = 4; 12740b57cec5SDimitry Andric const int MFMAWritesAGPROverlappedSrcCWaitStates = 2; 12750b57cec5SDimitry Andric const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4; 12760b57cec5SDimitry Andric const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10; 12770b57cec5SDimitry Andric const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18; 12780b57cec5SDimitry Andric const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1; 12790b57cec5SDimitry Andric const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7; 12800b57cec5SDimitry Andric const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15; 12810b57cec5SDimitry Andric const int MaxWaitStates = 18; 12828bcb0991SDimitry Andric Register Reg = Op.getReg(); 12830b57cec5SDimitry Andric unsigned HazardDefLatency = 0; 12840b57cec5SDimitry Andric 1285fe6060f1SDimitry Andric auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, 1286fe6060f1SDimitry Andric this](const MachineInstr &MI) { 12870b57cec5SDimitry Andric if (!IsMFMAFn(MI)) 12880b57cec5SDimitry Andric return false; 1289fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 12900b57cec5SDimitry Andric if (DstReg == Reg) 12910b57cec5SDimitry Andric return false; 1292fe6060f1SDimitry Andric HazardDefLatency = 1293fe6060f1SDimitry Andric std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI)); 12940b57cec5SDimitry Andric return TRI.regsOverlap(DstReg, Reg); 12950b57cec5SDimitry Andric }; 12960b57cec5SDimitry Andric 12970b57cec5SDimitry Andric int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, 12980b57cec5SDimitry Andric MaxWaitStates); 12990b57cec5SDimitry Andric int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates; 13000b57cec5SDimitry Andric int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 13010b57cec5SDimitry Andric int OpNo = MI->getOperandNo(&Op); 13020b57cec5SDimitry Andric if (OpNo == SrcCIdx) { 13030b57cec5SDimitry Andric NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates; 1304e8d8bef9SDimitry Andric } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) { 13050b57cec5SDimitry Andric switch (HazardDefLatency) { 13060b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates; 13070b57cec5SDimitry Andric break; 13080b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates; 13090b57cec5SDimitry Andric break; 13100b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 13110b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates; 13120b57cec5SDimitry Andric break; 13130b57cec5SDimitry Andric } 1314e8d8bef9SDimitry Andric } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { 13150b57cec5SDimitry Andric switch (HazardDefLatency) { 13160b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates; 13170b57cec5SDimitry Andric break; 13180b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates; 13190b57cec5SDimitry Andric break; 13200b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 13210b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates; 13220b57cec5SDimitry Andric break; 13230b57cec5SDimitry Andric } 13240b57cec5SDimitry Andric } 13250b57cec5SDimitry Andric 13260b57cec5SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; 13270b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 13280b57cec5SDimitry Andric 13290b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 13300b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 13310b57cec5SDimitry Andric 1332fe6060f1SDimitry Andric auto IsAccVgprWriteFn = [Reg, this](const MachineInstr &MI) { 1333fe6060f1SDimitry Andric if (MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) 13340b57cec5SDimitry Andric return false; 1335fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 13360b57cec5SDimitry Andric return TRI.regsOverlap(Reg, DstReg); 13370b57cec5SDimitry Andric }; 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric const int AccVGPRWriteMFMAReadSrcCWaitStates = 1; 13400b57cec5SDimitry Andric const int AccVGPRWriteMFMAReadSrcABWaitStates = 3; 13410b57cec5SDimitry Andric const int AccVGPRWriteAccVgprReadWaitStates = 3; 13420b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates; 13430b57cec5SDimitry Andric if (OpNo == SrcCIdx) 13440b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates; 1345e8d8bef9SDimitry Andric else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) 13460b57cec5SDimitry Andric NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates; 13470b57cec5SDimitry Andric 13480b57cec5SDimitry Andric WaitStatesNeededForUse = NeedWaitStates - 13490b57cec5SDimitry Andric getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates); 13500b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 13510b57cec5SDimitry Andric 13520b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 13530b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 13540b57cec5SDimitry Andric } 13550b57cec5SDimitry Andric 1356e8d8bef9SDimitry Andric if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { 13570b57cec5SDimitry Andric const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0; 13580b57cec5SDimitry Andric const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5; 13590b57cec5SDimitry Andric const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13; 13600b57cec5SDimitry Andric const int MaxWaitStates = 13; 13618bcb0991SDimitry Andric Register DstReg = MI->getOperand(0).getReg(); 13620b57cec5SDimitry Andric unsigned HazardDefLatency = 0; 13630b57cec5SDimitry Andric 1364fe6060f1SDimitry Andric auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, 1365fe6060f1SDimitry Andric this](const MachineInstr &MI) { 13660b57cec5SDimitry Andric if (!IsMFMAFn(MI)) 13670b57cec5SDimitry Andric return false; 1368fe6060f1SDimitry Andric Register Reg = TII.getNamedOperand(MI, AMDGPU::OpName::src2)->getReg(); 1369fe6060f1SDimitry Andric HazardDefLatency = 1370fe6060f1SDimitry Andric std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI)); 13710b57cec5SDimitry Andric return TRI.regsOverlap(Reg, DstReg); 13720b57cec5SDimitry Andric }; 13730b57cec5SDimitry Andric 13740b57cec5SDimitry Andric int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates); 13750b57cec5SDimitry Andric int NeedWaitStates; 13760b57cec5SDimitry Andric switch (HazardDefLatency) { 13770b57cec5SDimitry Andric case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates; 13780b57cec5SDimitry Andric break; 13790b57cec5SDimitry Andric case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates; 13800b57cec5SDimitry Andric break; 13810b57cec5SDimitry Andric case 16: LLVM_FALLTHROUGH; 13820b57cec5SDimitry Andric default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates; 13830b57cec5SDimitry Andric break; 13840b57cec5SDimitry Andric } 13850b57cec5SDimitry Andric 13860b57cec5SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince; 13870b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 13880b57cec5SDimitry Andric } 13890b57cec5SDimitry Andric 13900b57cec5SDimitry Andric return WaitStatesNeeded; 13910b57cec5SDimitry Andric } 13920b57cec5SDimitry Andric 1393fe6060f1SDimitry Andric int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { 1394fe6060f1SDimitry Andric int WaitStatesNeeded = 0; 1395fe6060f1SDimitry Andric unsigned Opc = MI->getOpcode(); 1396fe6060f1SDimitry Andric 1397fe6060f1SDimitry Andric auto IsMFMAFn = [](const MachineInstr &MI) { 1398fe6060f1SDimitry Andric return SIInstrInfo::isMAI(MI) && 1399fe6060f1SDimitry Andric MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 1400fe6060f1SDimitry Andric MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; 1401fe6060f1SDimitry Andric }; 1402fe6060f1SDimitry Andric 1403fe6060f1SDimitry Andric auto IsLegacyVALUFn = [&IsMFMAFn](const MachineInstr &MI) { 1404fe6060f1SDimitry Andric return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI); 1405fe6060f1SDimitry Andric }; 1406fe6060f1SDimitry Andric 1407fe6060f1SDimitry Andric auto IsLegacyVALUNotDotFn = [&IsMFMAFn](const MachineInstr &MI) { 1408fe6060f1SDimitry Andric return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI) && !SIInstrInfo::isDOT(MI); 1409fe6060f1SDimitry Andric }; 1410fe6060f1SDimitry Andric 1411fe6060f1SDimitry Andric if (!IsMFMAFn(*MI)) 1412fe6060f1SDimitry Andric return WaitStatesNeeded; 1413fe6060f1SDimitry Andric 1414fe6060f1SDimitry Andric const int VALUWritesExecWaitStates = 4; 1415fe6060f1SDimitry Andric int WaitStatesNeededForUse = VALUWritesExecWaitStates - 1416fe6060f1SDimitry Andric getWaitStatesSinceDef(AMDGPU::EXEC, IsLegacyVALUFn, 1417fe6060f1SDimitry Andric VALUWritesExecWaitStates); 1418fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1419fe6060f1SDimitry Andric 1420fe6060f1SDimitry Andric int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 1421fe6060f1SDimitry Andric 1422fe6060f1SDimitry Andric // Loop for both DGEMM and S/HGEMM 2nd instruction. 1423fe6060f1SDimitry Andric for (const MachineOperand &Use : MI->explicit_uses()) { 1424fe6060f1SDimitry Andric const int LegacyVALUNotDotWritesVGPRWaitStates = 2; 1425fe6060f1SDimitry Andric const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2; 1426fe6060f1SDimitry Andric const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8; 1427fe6060f1SDimitry Andric const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16; 1428fe6060f1SDimitry Andric const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3; 1429fe6060f1SDimitry Andric const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9; 1430fe6060f1SDimitry Andric const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17; 1431fe6060f1SDimitry Andric const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9; 1432fe6060f1SDimitry Andric const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4; 1433fe6060f1SDimitry Andric const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5; 1434fe6060f1SDimitry Andric const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11; 1435fe6060f1SDimitry Andric const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19; 1436fe6060f1SDimitry Andric const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6; 1437fe6060f1SDimitry Andric const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11; 1438fe6060f1SDimitry Andric const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4; 1439fe6060f1SDimitry Andric const int MaxWaitStates = 19; 1440fe6060f1SDimitry Andric 1441fe6060f1SDimitry Andric if (!Use.isReg()) 1442fe6060f1SDimitry Andric continue; 1443*04eeddc0SDimitry Andric Register Reg = Use.getReg(); 1444fe6060f1SDimitry Andric bool FullReg; 1445fe6060f1SDimitry Andric const MachineInstr *MI1; 1446fe6060f1SDimitry Andric 14474824e7fdSDimitry Andric auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &FullReg, &MI1, 1448fe6060f1SDimitry Andric this](const MachineInstr &MI) { 1449fe6060f1SDimitry Andric if (!IsMFMAFn(MI)) 1450fe6060f1SDimitry Andric return false; 1451fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1452fe6060f1SDimitry Andric FullReg = (DstReg == Reg); 1453fe6060f1SDimitry Andric MI1 = &MI; 1454fe6060f1SDimitry Andric return TRI.regsOverlap(DstReg, Reg); 1455fe6060f1SDimitry Andric }; 1456fe6060f1SDimitry Andric 1457fe6060f1SDimitry Andric WaitStatesNeededForUse = LegacyVALUNotDotWritesVGPRWaitStates - 1458fe6060f1SDimitry Andric getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates); 1459fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1460fe6060f1SDimitry Andric 14614824e7fdSDimitry Andric int NumWaitStates = 14624824e7fdSDimitry Andric getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates); 1463fe6060f1SDimitry Andric if (NumWaitStates == std::numeric_limits<int>::max()) 1464fe6060f1SDimitry Andric continue; 1465fe6060f1SDimitry Andric 1466fe6060f1SDimitry Andric int OpNo = MI->getOperandNo(&Use); 1467fe6060f1SDimitry Andric unsigned Opc1 = MI1->getOpcode(); 1468fe6060f1SDimitry Andric int NeedWaitStates = 0; 1469fe6060f1SDimitry Andric if (OpNo == SrcCIdx) { 1470fe6060f1SDimitry Andric if (!isDGEMM(Opc) && isDGEMM(Opc1)) { 1471fe6060f1SDimitry Andric NeedWaitStates = 0; 1472fe6060f1SDimitry Andric } else if (FullReg) { 1473fe6060f1SDimitry Andric if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || 1474fe6060f1SDimitry Andric Opc == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64) && 1475fe6060f1SDimitry Andric (Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || 1476fe6060f1SDimitry Andric Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64)) 1477fe6060f1SDimitry Andric NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates; 1478fe6060f1SDimitry Andric } else { 1479fe6060f1SDimitry Andric switch (Opc1) { 1480fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_e64: 1481fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64: 1482*04eeddc0SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: 1483*04eeddc0SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: 1484fe6060f1SDimitry Andric if (!isXDL(ST, *MI)) 1485fe6060f1SDimitry Andric NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates; 1486fe6060f1SDimitry Andric break; 1487fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_4X4X4F64_e64: 1488fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: 1489fe6060f1SDimitry Andric if (!isXDL(ST, *MI)) 1490fe6060f1SDimitry Andric NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates; 1491fe6060f1SDimitry Andric break; 1492fe6060f1SDimitry Andric default: 1493fe6060f1SDimitry Andric switch (TSchedModel.computeInstrLatency(MI1)) { 1494fe6060f1SDimitry Andric case 2: 1495fe6060f1SDimitry Andric NeedWaitStates = isDGEMM(Opc) 1496fe6060f1SDimitry Andric ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates 1497fe6060f1SDimitry Andric : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; 1498fe6060f1SDimitry Andric break; 1499fe6060f1SDimitry Andric case 8: 1500fe6060f1SDimitry Andric NeedWaitStates = isDGEMM(Opc) 1501fe6060f1SDimitry Andric ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates 1502fe6060f1SDimitry Andric : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; 1503fe6060f1SDimitry Andric break; 1504fe6060f1SDimitry Andric case 16: LLVM_FALLTHROUGH; 1505fe6060f1SDimitry Andric default: 1506fe6060f1SDimitry Andric NeedWaitStates = isDGEMM(Opc) 1507fe6060f1SDimitry Andric ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates 1508fe6060f1SDimitry Andric : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; 1509fe6060f1SDimitry Andric } 1510fe6060f1SDimitry Andric } 1511fe6060f1SDimitry Andric } 1512fe6060f1SDimitry Andric } else { 1513fe6060f1SDimitry Andric switch (Opc1) { 1514fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_e64: 1515fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64: 1516*04eeddc0SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: 1517*04eeddc0SDimitry Andric case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: 1518fe6060f1SDimitry Andric NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates; 1519fe6060f1SDimitry Andric break; 1520fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_4X4X4F64_e64: 1521fe6060f1SDimitry Andric case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: 1522fe6060f1SDimitry Andric NeedWaitStates = DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates; 1523fe6060f1SDimitry Andric break; 1524fe6060f1SDimitry Andric default: 1525fe6060f1SDimitry Andric switch (TSchedModel.computeInstrLatency(MI1)) { 1526fe6060f1SDimitry Andric case 2: 1527fe6060f1SDimitry Andric NeedWaitStates = SMFMA4x4WritesVGPROverlappedSrcABWaitStates; 1528fe6060f1SDimitry Andric break; 1529fe6060f1SDimitry Andric case 8: 1530fe6060f1SDimitry Andric NeedWaitStates = SMFMA16x16WritesVGPROverlappedSrcABWaitStates; 1531fe6060f1SDimitry Andric break; 1532fe6060f1SDimitry Andric case 16: LLVM_FALLTHROUGH; 1533fe6060f1SDimitry Andric default: 1534fe6060f1SDimitry Andric NeedWaitStates = SMFMA32x32WritesVGPROverlappedSrcABWaitStates; 1535fe6060f1SDimitry Andric } 1536fe6060f1SDimitry Andric } 1537fe6060f1SDimitry Andric } 1538fe6060f1SDimitry Andric if (WaitStatesNeeded >= NeedWaitStates) 1539fe6060f1SDimitry Andric continue; 1540fe6060f1SDimitry Andric 1541fe6060f1SDimitry Andric WaitStatesNeededForUse = NeedWaitStates - NumWaitStates; 1542fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1543fe6060f1SDimitry Andric 1544fe6060f1SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 1545fe6060f1SDimitry Andric break; 1546fe6060f1SDimitry Andric } 1547fe6060f1SDimitry Andric 1548fe6060f1SDimitry Andric return WaitStatesNeeded; 1549fe6060f1SDimitry Andric } 1550fe6060f1SDimitry Andric 15510b57cec5SDimitry Andric int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) { 1552349cc55cSDimitry Andric // On gfx90a+ relevant hazards are checked in checkMAIVALUHazards() 1553fe6060f1SDimitry Andric if (!ST.hasMAIInsts() || ST.hasGFX90AInsts()) 15540b57cec5SDimitry Andric return 0; 15550b57cec5SDimitry Andric 15560b57cec5SDimitry Andric int WaitStatesNeeded = 0; 15570b57cec5SDimitry Andric 1558fe6060f1SDimitry Andric auto IsAccVgprReadFn = [](const MachineInstr &MI) { 1559fe6060f1SDimitry Andric return MI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64; 15600b57cec5SDimitry Andric }; 15610b57cec5SDimitry Andric 15620b57cec5SDimitry Andric for (const MachineOperand &Op : MI->explicit_uses()) { 15630b57cec5SDimitry Andric if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg())) 15640b57cec5SDimitry Andric continue; 15650b57cec5SDimitry Andric 15668bcb0991SDimitry Andric Register Reg = Op.getReg(); 15670b57cec5SDimitry Andric 15680b57cec5SDimitry Andric const int AccVgprReadLdStWaitStates = 2; 1569e8d8bef9SDimitry Andric const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1; 15700b57cec5SDimitry Andric const int MaxWaitStates = 2; 15710b57cec5SDimitry Andric 15720b57cec5SDimitry Andric int WaitStatesNeededForUse = AccVgprReadLdStWaitStates - 15730b57cec5SDimitry Andric getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates); 15740b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 15750b57cec5SDimitry Andric 15760b57cec5SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 15770b57cec5SDimitry Andric return WaitStatesNeeded; // Early exit. 15780b57cec5SDimitry Andric 1579fe6060f1SDimitry Andric auto IsVALUAccVgprRdWrCheckFn = [Reg, this](const MachineInstr &MI) { 1580fe6060f1SDimitry Andric if (MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 && 1581fe6060f1SDimitry Andric MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) 15820b57cec5SDimitry Andric return false; 1583fe6060f1SDimitry Andric auto IsVALUFn = [](const MachineInstr &MI) { 1584fe6060f1SDimitry Andric return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMAI(MI); 15850b57cec5SDimitry Andric }; 15860b57cec5SDimitry Andric return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) < 15870b57cec5SDimitry Andric std::numeric_limits<int>::max(); 15880b57cec5SDimitry Andric }; 15890b57cec5SDimitry Andric 1590e8d8bef9SDimitry Andric WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates - 1591e8d8bef9SDimitry Andric getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates); 15920b57cec5SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 15930b57cec5SDimitry Andric } 15940b57cec5SDimitry Andric 15950b57cec5SDimitry Andric return WaitStatesNeeded; 15960b57cec5SDimitry Andric } 1597e8d8bef9SDimitry Andric 1598fe6060f1SDimitry Andric int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { 1599fe6060f1SDimitry Andric if (!ST.hasGFX90AInsts()) 1600fe6060f1SDimitry Andric return 0; 1601fe6060f1SDimitry Andric 1602fe6060f1SDimitry Andric auto IsMFMAFn = [](const MachineInstr &MI) -> bool { 1603fe6060f1SDimitry Andric return SIInstrInfo::isMAI(MI) && 1604fe6060f1SDimitry Andric MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 1605fe6060f1SDimitry Andric MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; 1606fe6060f1SDimitry Andric }; 1607fe6060f1SDimitry Andric 1608fe6060f1SDimitry Andric auto IsDGEMMFn = [](const MachineInstr &MI) -> bool { 1609fe6060f1SDimitry Andric return isDGEMM(MI.getOpcode()); 1610fe6060f1SDimitry Andric }; 1611fe6060f1SDimitry Andric 1612fe6060f1SDimitry Andric // This is checked in checkMAIHazards90A() 1613fe6060f1SDimitry Andric if (IsMFMAFn(*MI)) 1614fe6060f1SDimitry Andric return 0; 1615fe6060f1SDimitry Andric 1616fe6060f1SDimitry Andric int WaitStatesNeeded = 0; 1617fe6060f1SDimitry Andric 1618fe6060f1SDimitry Andric bool IsMemOrExport = SIInstrInfo::isVMEM(*MI) || 1619fe6060f1SDimitry Andric SIInstrInfo::isFLAT(*MI) || 1620fe6060f1SDimitry Andric SIInstrInfo::isDS(*MI) || 1621fe6060f1SDimitry Andric SIInstrInfo::isEXP(*MI); 1622fe6060f1SDimitry Andric bool IsVALU = SIInstrInfo::isVALU(*MI); 1623fe6060f1SDimitry Andric 1624fe6060f1SDimitry Andric const MachineInstr *MFMA = nullptr; 1625fe6060f1SDimitry Andric unsigned Reg; 16264824e7fdSDimitry Andric auto IsMFMAWriteFn = [&Reg, &IsMFMAFn, &MFMA, this](const MachineInstr &MI) { 1627fe6060f1SDimitry Andric if (!IsMFMAFn(MI) || !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg)) 1628fe6060f1SDimitry Andric return false; 1629fe6060f1SDimitry Andric MFMA = &MI; 1630fe6060f1SDimitry Andric return true; 1631fe6060f1SDimitry Andric }; 1632fe6060f1SDimitry Andric 1633fe6060f1SDimitry Andric const MachineInstr *DOT = nullptr; 1634fe6060f1SDimitry Andric auto IsDotWriteFn = [&Reg, &DOT, this](const MachineInstr &MI) { 1635fe6060f1SDimitry Andric if (!SIInstrInfo::isDOT(MI) || 1636fe6060f1SDimitry Andric !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg)) 1637fe6060f1SDimitry Andric return false; 1638fe6060f1SDimitry Andric DOT = &MI; 1639fe6060f1SDimitry Andric return true; 1640fe6060f1SDimitry Andric }; 1641fe6060f1SDimitry Andric 1642fe6060f1SDimitry Andric int SrcCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1643fe6060f1SDimitry Andric AMDGPU::OpName::src2); 1644fe6060f1SDimitry Andric 1645fe6060f1SDimitry Andric if (IsMemOrExport || IsVALU) { 1646fe6060f1SDimitry Andric const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5; 1647fe6060f1SDimitry Andric const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11; 1648fe6060f1SDimitry Andric const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19; 1649fe6060f1SDimitry Andric const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9; 1650fe6060f1SDimitry Andric const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18; 1651fe6060f1SDimitry Andric const int DMFMA4x4WriteVgprVALUReadWaitStates = 6; 1652fe6060f1SDimitry Andric const int DMFMA16x16WriteVgprVALUReadWaitStates = 11; 1653fe6060f1SDimitry Andric const int DotWriteSameDotReadSrcAB = 3; 1654fe6060f1SDimitry Andric const int DotWriteDifferentVALURead = 3; 1655fe6060f1SDimitry Andric const int MaxWaitStates = 19; 1656fe6060f1SDimitry Andric 1657fe6060f1SDimitry Andric for (const MachineOperand &Use : MI->explicit_uses()) { 1658fe6060f1SDimitry Andric if (!Use.isReg()) 1659fe6060f1SDimitry Andric continue; 1660fe6060f1SDimitry Andric Reg = Use.getReg(); 1661fe6060f1SDimitry Andric 1662fe6060f1SDimitry Andric DOT = nullptr; 1663fe6060f1SDimitry Andric int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn, 1664fe6060f1SDimitry Andric MaxWaitStates); 1665fe6060f1SDimitry Andric if (DOT) { 1666fe6060f1SDimitry Andric int NeedWaitStates = 0; 1667fe6060f1SDimitry Andric if (DOT->getOpcode() == MI->getOpcode()) { 1668fe6060f1SDimitry Andric if (&Use - &MI->getOperand(0) != SrcCIdx) 1669fe6060f1SDimitry Andric NeedWaitStates = DotWriteSameDotReadSrcAB; 1670fe6060f1SDimitry Andric } else { 1671fe6060f1SDimitry Andric NeedWaitStates = DotWriteDifferentVALURead; 1672fe6060f1SDimitry Andric } 1673fe6060f1SDimitry Andric 1674fe6060f1SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; 1675fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1676fe6060f1SDimitry Andric } 1677fe6060f1SDimitry Andric 1678fe6060f1SDimitry Andric MFMA = nullptr; 16794824e7fdSDimitry Andric WaitStatesSinceDef = 16804824e7fdSDimitry Andric getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates); 1681fe6060f1SDimitry Andric if (!MFMA) 1682fe6060f1SDimitry Andric continue; 1683fe6060f1SDimitry Andric 1684fe6060f1SDimitry Andric unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA); 1685fe6060f1SDimitry Andric int NeedWaitStates = MaxWaitStates; 1686fe6060f1SDimitry Andric switch (HazardDefLatency) { 1687fe6060f1SDimitry Andric case 2: 1688fe6060f1SDimitry Andric NeedWaitStates = SMFMA4x4WriteVgprVALUMemExpReadWaitStates; 1689fe6060f1SDimitry Andric break; 1690fe6060f1SDimitry Andric case 4: 1691fe6060f1SDimitry Andric assert(isDGEMM(MFMA->getOpcode())); 1692fe6060f1SDimitry Andric NeedWaitStates = 1693fe6060f1SDimitry Andric IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates 1694fe6060f1SDimitry Andric : DMFMA4x4WriteVgprVALUReadWaitStates; 1695fe6060f1SDimitry Andric break; 1696fe6060f1SDimitry Andric case 8: 1697fe6060f1SDimitry Andric NeedWaitStates = SMFMA16x16WriteVgprVALUMemExpReadWaitStates; 1698fe6060f1SDimitry Andric break; 1699fe6060f1SDimitry Andric case 16: LLVM_FALLTHROUGH; 1700fe6060f1SDimitry Andric default: 1701fe6060f1SDimitry Andric NeedWaitStates = 1702fe6060f1SDimitry Andric isDGEMM(MFMA->getOpcode()) 1703fe6060f1SDimitry Andric ? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates 1704fe6060f1SDimitry Andric : DMFMA16x16WriteVgprVALUReadWaitStates 1705fe6060f1SDimitry Andric : SMFMA32x32WriteVgprVALUMemExpReadWaitStates; 1706fe6060f1SDimitry Andric break; 1707fe6060f1SDimitry Andric } 1708fe6060f1SDimitry Andric 1709fe6060f1SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; 1710fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1711fe6060f1SDimitry Andric 1712fe6060f1SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 1713fe6060f1SDimitry Andric break; 1714fe6060f1SDimitry Andric } 1715fe6060f1SDimitry Andric } 1716fe6060f1SDimitry Andric 1717fe6060f1SDimitry Andric unsigned Opc = MI->getOpcode(); 1718fe6060f1SDimitry Andric const int DMFMAToFMA64WaitStates = 2; 1719fe6060f1SDimitry Andric if ((Opc == AMDGPU::V_FMA_F64_e64 || 1720fe6060f1SDimitry Andric Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64 || 1721fe6060f1SDimitry Andric Opc == AMDGPU::V_FMAC_F64_dpp) && 1722fe6060f1SDimitry Andric WaitStatesNeeded < DMFMAToFMA64WaitStates) { 1723fe6060f1SDimitry Andric int WaitStatesNeededForUse = DMFMAToFMA64WaitStates - 1724fe6060f1SDimitry Andric getWaitStatesSince(IsDGEMMFn, DMFMAToFMA64WaitStates); 1725fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1726fe6060f1SDimitry Andric } 1727fe6060f1SDimitry Andric 1728fe6060f1SDimitry Andric if (!IsVALU && !IsMemOrExport) 1729fe6060f1SDimitry Andric return WaitStatesNeeded; 1730fe6060f1SDimitry Andric 1731fe6060f1SDimitry Andric for (const MachineOperand &Def : MI->defs()) { 1732fe6060f1SDimitry Andric const int SMFMA4x4WriteVgprVALUWawWaitStates = 5; 1733fe6060f1SDimitry Andric const int SMFMA16x16WriteVgprVALUWawWaitStates = 11; 1734fe6060f1SDimitry Andric const int SMFMA32x32WriteVgprVALUWawWaitStates = 19; 1735fe6060f1SDimitry Andric const int SMFMA4x4ReadVgprVALUWarWaitStates = 1; 1736fe6060f1SDimitry Andric const int SMFMA16x16ReadVgprVALUWarWaitStates = 7; 1737fe6060f1SDimitry Andric const int SMFMA32x32ReadVgprVALUWarWaitStates = 15; 1738fe6060f1SDimitry Andric const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6; 1739fe6060f1SDimitry Andric const int DMFMA16x16WriteVgprVALUWriteWaitStates = 11; 1740fe6060f1SDimitry Andric const int DotWriteDifferentVALUWrite = 3; 1741fe6060f1SDimitry Andric const int MaxWaitStates = 19; 1742fe6060f1SDimitry Andric const int MaxWarWaitStates = 15; 1743fe6060f1SDimitry Andric 1744fe6060f1SDimitry Andric Reg = Def.getReg(); 1745fe6060f1SDimitry Andric 1746fe6060f1SDimitry Andric DOT = nullptr; 1747fe6060f1SDimitry Andric int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn, 1748fe6060f1SDimitry Andric MaxWaitStates); 1749fe6060f1SDimitry Andric if (DOT && DOT->getOpcode() != MI->getOpcode()) 1750fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, DotWriteDifferentVALUWrite - 1751fe6060f1SDimitry Andric WaitStatesSinceDef); 1752fe6060f1SDimitry Andric 1753fe6060f1SDimitry Andric MFMA = nullptr; 17544824e7fdSDimitry Andric WaitStatesSinceDef = 17554824e7fdSDimitry Andric getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates); 1756fe6060f1SDimitry Andric if (MFMA) { 1757fe6060f1SDimitry Andric int NeedWaitStates = MaxWaitStates; 1758fe6060f1SDimitry Andric switch (TSchedModel.computeInstrLatency(MFMA)) { 1759fe6060f1SDimitry Andric case 2: 1760fe6060f1SDimitry Andric NeedWaitStates = SMFMA4x4WriteVgprVALUWawWaitStates; 1761fe6060f1SDimitry Andric break; 1762fe6060f1SDimitry Andric case 4: 1763fe6060f1SDimitry Andric assert(isDGEMM(MFMA->getOpcode())); 1764fe6060f1SDimitry Andric NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates; 1765fe6060f1SDimitry Andric break; 1766fe6060f1SDimitry Andric case 8: 1767fe6060f1SDimitry Andric NeedWaitStates = SMFMA16x16WriteVgprVALUWawWaitStates; 1768fe6060f1SDimitry Andric break; 1769fe6060f1SDimitry Andric case 16: LLVM_FALLTHROUGH; 1770fe6060f1SDimitry Andric default: 1771fe6060f1SDimitry Andric NeedWaitStates = isDGEMM(MFMA->getOpcode()) 1772fe6060f1SDimitry Andric ? DMFMA16x16WriteVgprVALUWriteWaitStates 1773fe6060f1SDimitry Andric : SMFMA32x32WriteVgprVALUWawWaitStates; 1774fe6060f1SDimitry Andric break; 1775fe6060f1SDimitry Andric } 1776fe6060f1SDimitry Andric 1777fe6060f1SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; 1778fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1779fe6060f1SDimitry Andric 1780fe6060f1SDimitry Andric if (WaitStatesNeeded == MaxWaitStates) 1781fe6060f1SDimitry Andric break; 1782fe6060f1SDimitry Andric } 1783fe6060f1SDimitry Andric 1784fe6060f1SDimitry Andric auto IsSMFMAReadAsCFn = [&Reg, &IsMFMAFn, &MFMA, 1785fe6060f1SDimitry Andric this](const MachineInstr &MI) { 1786fe6060f1SDimitry Andric if (!IsMFMAFn(MI) || isDGEMM(MI.getOpcode()) || 1787fe6060f1SDimitry Andric !MI.readsRegister(Reg, &TRI)) 1788fe6060f1SDimitry Andric return false; 1789fe6060f1SDimitry Andric 1790fe6060f1SDimitry Andric const MachineOperand *SrcC = 1791fe6060f1SDimitry Andric TII.getNamedOperand(MI, AMDGPU::OpName::src2); 1792fe6060f1SDimitry Andric assert(SrcC); 1793fe6060f1SDimitry Andric if (!SrcC->isReg() || !TRI.regsOverlap(SrcC->getReg(), Reg)) 1794fe6060f1SDimitry Andric return false; 1795fe6060f1SDimitry Andric 1796fe6060f1SDimitry Andric MFMA = &MI; 1797fe6060f1SDimitry Andric return true; 1798fe6060f1SDimitry Andric }; 1799fe6060f1SDimitry Andric 1800fe6060f1SDimitry Andric MFMA = nullptr; 1801fe6060f1SDimitry Andric int WaitStatesSinceUse = getWaitStatesSince(IsSMFMAReadAsCFn, 1802fe6060f1SDimitry Andric MaxWarWaitStates); 1803fe6060f1SDimitry Andric if (!MFMA) 1804fe6060f1SDimitry Andric continue; 1805fe6060f1SDimitry Andric 1806fe6060f1SDimitry Andric unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA); 1807fe6060f1SDimitry Andric int NeedWaitStates = MaxWaitStates; 1808fe6060f1SDimitry Andric switch (HazardDefLatency) { 1809fe6060f1SDimitry Andric case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates; 1810fe6060f1SDimitry Andric break; 1811fe6060f1SDimitry Andric case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates; 1812fe6060f1SDimitry Andric break; 1813fe6060f1SDimitry Andric case 16: LLVM_FALLTHROUGH; 1814fe6060f1SDimitry Andric default: NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates; 1815fe6060f1SDimitry Andric break; 1816fe6060f1SDimitry Andric } 1817fe6060f1SDimitry Andric 1818fe6060f1SDimitry Andric int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse; 1819fe6060f1SDimitry Andric WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 1820fe6060f1SDimitry Andric } 1821fe6060f1SDimitry Andric 1822fe6060f1SDimitry Andric return WaitStatesNeeded; 1823fe6060f1SDimitry Andric } 1824fe6060f1SDimitry Andric 1825e8d8bef9SDimitry Andric bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) { 1826e8d8bef9SDimitry Andric if (!SU->isInstr()) 1827e8d8bef9SDimitry Andric return false; 1828e8d8bef9SDimitry Andric 1829fe6060f1SDimitry Andric const MachineInstr *MAI = nullptr; 1830fe6060f1SDimitry Andric auto IsMFMAFn = [&MAI](const MachineInstr &MI) { 1831e8d8bef9SDimitry Andric MAI = nullptr; 1832fe6060f1SDimitry Andric if (SIInstrInfo::isMAI(MI) && 1833fe6060f1SDimitry Andric MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 1834fe6060f1SDimitry Andric MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64) 1835fe6060f1SDimitry Andric MAI = &MI; 1836e8d8bef9SDimitry Andric return MAI != nullptr; 1837e8d8bef9SDimitry Andric }; 1838e8d8bef9SDimitry Andric 1839e8d8bef9SDimitry Andric MachineInstr *MI = SU->getInstr(); 1840fe6060f1SDimitry Andric if (IsMFMAFn(*MI)) { 1841e8d8bef9SDimitry Andric int W = getWaitStatesSince(IsMFMAFn, 16); 1842e8d8bef9SDimitry Andric if (MAI) 1843e8d8bef9SDimitry Andric return W < (int)TSchedModel.computeInstrLatency(MAI); 1844e8d8bef9SDimitry Andric } 1845e8d8bef9SDimitry Andric 1846e8d8bef9SDimitry Andric return false; 1847e8d8bef9SDimitry Andric } 1848