Lines Matching +full:smem +full:- +full:part

1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
43 MFMAPaddingRatio("amdgpu-mfma-padding-ratio", cl::init(0), cl::Hidden,
47 //===----------------------------------------------------------------------===//
49 //===----------------------------------------------------------------------===//
73 EmitInstruction(SU->getInstr()); in EmitInstruction()
179 const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, in getHWReg()
181 return std::get<0>(AMDGPU::Hwreg::HwregEncoding::decode(RegOp->getImm())); in getHWReg()
186 MachineInstr *MI = SU->getInstr(); in getHazardType()
191 if (MI->isBundle()) in getHazardType()
218 if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) in getHazardType()
221 if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0) in getHazardType()
229 if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) in getHazardType()
232 if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0) in getHazardType()
235 if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) in getHazardType()
239 (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()) || in getHazardType()
240 MI->getOpcode() == AMDGPU::DS_WRITE_ADDTID_B32 || in getHazardType()
241 MI->getOpcode() == AMDGPU::DS_READ_ADDTID_B32)) || in getHazardType()
245 MI->readsRegister(AMDGPU::LDS_DIRECT, /*TRI=*/nullptr))) && in getHazardType()
257 if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) in getHazardType()
267 Quantity -= Arg; in insertNoopsInBundle()
268 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP)) in insertNoopsInBundle()
269 .addImm(Arg - 1); in insertNoopsInBundle()
278 return TSchedModel.getWriteProcResBegin(SC)->ReleaseAtCycle; in getMFMAPipelineWaitStates()
282 MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator()); in processBundle()
283 MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end(); in processBundle()
285 for (; MI != E && MI->isInsideBundle(); ++MI) { in processBundle()
297 // (MaxLookAhead - 1) noops to EmittedInstrs. in processBundle()
298 for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i) in processBundle()
312 if (MI->isInsideBundle()) in runOnInstruction()
315 TII.insertNoops(*MI->getParent(), MachineBasicBlock::iterator(MI), in runOnInstruction()
331 if (MI->isBundle()) in PreEmitNoopsCommon()
356 if (isDivFMas(MI->getOpcode())) in PreEmitNoopsCommon()
359 if (isRWLane(MI->getOpcode())) in PreEmitNoopsCommon()
367 if (MI->isInlineAsm()) in PreEmitNoopsCommon()
370 if (isSGetReg(MI->getOpcode())) in PreEmitNoopsCommon()
373 if (isSSetReg(MI->getOpcode())) in PreEmitNoopsCommon()
376 if (isRFE(MI->getOpcode())) in PreEmitNoopsCommon()
380 (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()) || in PreEmitNoopsCommon()
381 MI->getOpcode() == AMDGPU::DS_WRITE_ADDTID_B32 || in PreEmitNoopsCommon()
382 MI->getOpcode() == AMDGPU::DS_READ_ADDTID_B32)) || in PreEmitNoopsCommon()
386 MI->readsRegister(AMDGPU::LDS_DIRECT, /*TRI=*/nullptr))) in PreEmitNoopsCommon()
412 if (CurrCycleInstr->isBundle()) { in AdvanceCycle()
443 llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); in RecedeCycle()
446 //===----------------------------------------------------------------------===//
448 //===----------------------------------------------------------------------===//
464 for (auto E = MBB->instr_rend(); I != E; ++I) { in hasHazard()
466 if (I->isBundle()) in hasHazard()
479 if (I->isInlineAsm() || I->isMetaInstruction()) in hasHazard()
485 for (MachineBasicBlock *Pred : MBB->predecessors()) { in hasHazard()
489 if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(), in hasHazard()
505 for (auto E = MBB->instr_rend(); I != E; ++I) { in getWaitStatesSince()
507 if (I->isBundle()) in getWaitStatesSince()
513 if (I->isInlineAsm()) in getWaitStatesSince()
523 for (MachineBasicBlock *Pred : MBB->predecessors()) { in getWaitStatesSince()
527 int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), WaitStates, in getWaitStatesSince()
539 return getWaitStatesSince(IsHazard, MI->getParent(), in getWaitStatesSince()
540 std::next(MI->getReverseIterator()), in getWaitStatesSince()
558 if (MI->isInlineAsm()) in getWaitStatesSince()
590 //===----------------------------------------------------------------------===//
591 // No-op Hazard Detection
592 //===----------------------------------------------------------------------===//
622 // SMEM soft clause are only present on VI+, and only matter if xnack is in checkSoftClauseHazards()
631 // A soft-clause is any group of consecutive SMEM instructions. The in checkSoftClauseHazards()
639 // clause by inserting a non SMEM instruction. in checkSoftClauseHazards()
642 // When we hit a non-SMEM instruction then we have passed the start of the in checkSoftClauseHazards()
659 if (MEM->mayStore()) in checkSoftClauseHazards()
690 for (const MachineOperand &Use : SMRD->uses()) { in checkSMRDHazards()
694 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, in checkSMRDHazards()
702 // case when this happens is when we expand a 64-bit pointer into a full in checkSMRDHazards()
704 // probably never encountered in the closed-source land. in checkSMRDHazards()
707 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), in checkSMRDHazards()
729 for (const MachineOperand &Use : VMEM->uses()) { in checkVMEMHazards()
734 VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, in checkVMEMHazards()
750 return TII->isVALU(MI); in checkDPPHazards()
753 for (const MachineOperand &Use : DPP->uses()) { in checkDPPHazards()
754 if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) in checkDPPHazards()
757 DppVgprWaitStates - getWaitStatesSinceDef( in checkDPPHazards()
766 DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn, in checkDPPHazards()
779 return TII->isVALU(MI); in checkDivFMasHazards()
784 return DivFMasWaitStates - WaitStatesNeeded; in checkDivFMasHazards()
797 return GetRegWaitStates - WaitStatesNeeded; in checkGetRegHazards()
809 return SetRegWaitStates - WaitStatesNeeded; in checkSetRegHazards()
814 return -1; in createsVALUHazard()
821 int VDataRCID = -1; in createsVALUHazard()
822 if (VDataIdx != -1) in createsVALUHazard()
825 if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) { in createsVALUHazard()
828 if (VDataIdx == -1) in createsVALUHazard()
829 return -1; in createsVALUHazard()
833 TII->getNamedOperand(MI, AMDGPU::OpName::soffset); in createsVALUHazard()
837 (!SOffset || !SOffset->isReg())) in createsVALUHazard()
841 // MIMG instructions create a hazard if they don't use a 256-bit T# and in createsVALUHazard()
844 // All our MIMG definitions use a 256-bit T#, so we can skip checking for them. in createsVALUHazard()
845 if (TII->isMIMG(MI)) { in createsVALUHazard()
847 assert(SRsrcIdx != -1 && in createsVALUHazard()
852 if (TII->isFLAT(MI)) { in createsVALUHazard()
858 return -1; in createsVALUHazard()
871 if (!TRI->isVectorRegister(MRI, Def.getReg())) in checkVALUHazardsHelper()
877 TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg); in checkVALUHazardsHelper()
880 VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates); in checkVALUHazardsHelper()
897 Register Def = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)->getReg(); in checkVALUHazards()
899 for (const MachineOperand &Use : VALU->explicit_uses()) { in checkVALUHazards()
900 if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg())) in checkVALUHazards()
908 TransDefWaitstates - in checkVALUHazards()
921 if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel)) in checkVALUHazards()
922 if (DstSel->getImm() == AMDGPU::SDWA::DWORD) in checkVALUHazards()
926 !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers) in checkVALUHazards()
927 ->getImm() & in checkVALUHazards()
932 if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) { in checkVALUHazards()
933 Register Def = Dst->getReg(); in checkVALUHazards()
935 for (const MachineOperand &Use : VALU->explicit_uses()) { in checkVALUHazards()
936 if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg())) in checkVALUHazards()
945 Shift16DefWaitstates - in checkVALUHazards()
964 for (const MachineOperand &Use : VALU->explicit_uses()) { in checkVALUHazards()
969 if (TRI->isSGPRReg(MRI, UseReg)) { in checkVALUHazards()
971 VALUWriteSGPRVALUReadWaitstates - in checkVALUHazards()
978 if (VALU->readsRegister(AMDGPU::VCC, TRI)) { in checkVALUHazards()
981 VALUWriteSGPRVALUReadWaitstates - in checkVALUHazards()
986 switch (VALU->getOpcode()) { in checkVALUHazards()
990 UseReg = Src->getReg(); in checkVALUHazards()
992 VALUWriteVGPRReadlaneRead - in checkVALUHazards()
1000 VALUWriteEXECRWLane - in checkVALUHazards()
1017 for (const MachineOperand &Def : VALU->defs()) { in checkVALUHazards()
1040 llvm::drop_begin(IA->operands(), InlineAsm::MIOp_FirstOperand)) { in checkInlineAsmHazards()
1056 TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1); in checkRWLaneHazards()
1058 if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg())) in checkRWLaneHazards()
1061 Register LaneSelectReg = LaneSelectOp->getReg(); in checkRWLaneHazards()
1062 auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVALU(MI); }; in checkRWLaneHazards()
1067 return RWLaneWaitStates - WaitStatesSince; in checkRWLaneHazards()
1082 return RFEWaitStates - WaitStatesNeeded; in checkRFEHazards()
1088 auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); }; in checkReadM0Hazards()
1089 return ReadM0WaitStates - in checkReadM0Hazards()
1118 return (TII->isVOPC(MI) || in fixVcmpxPermlaneHazards()
1119 ((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) && in fixVcmpxPermlaneHazards()
1136 auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); in fixVcmpxPermlaneHazards()
1137 Register Reg = Src0->getReg(); in fixVcmpxPermlaneHazards()
1138 bool IsUndef = Src0->isUndef(); in fixVcmpxPermlaneHazards()
1139 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), in fixVcmpxPermlaneHazards()
1140 TII->get(AMDGPU::V_MOV_B32_e32)) in fixVcmpxPermlaneHazards()
1155 if (MI->getNumDefs() == 0) in fixVMEMtoScalarWriteHazards()
1165 for (const MachineOperand &Def : MI->defs()) { in fixVMEMtoScalarWriteHazards()
1188 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), in fixVMEMtoScalarWriteHazards()
1189 TII->get(AMDGPU::S_WAITCNT_DEPCTR)) in fixVMEMtoScalarWriteHazards()
1203 switch (MI->getOpcode()) { in fixSMEMtoVectorWriteHazards()
1216 const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName); in fixSMEMtoVectorWriteHazards()
1218 for (const auto &MO : MI->implicit_operands()) { in fixSMEMtoVectorWriteHazards()
1219 if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegBaseClass(MO.getReg()))) { in fixSMEMtoVectorWriteHazards()
1229 const Register SDSTReg = SDST->getReg(); in fixSMEMtoVectorWriteHazards()
1235 if (TII->isSALU(MI)) { in fixSMEMtoVectorWriteHazards()
1256 if (TII->isSOPP(MI)) in fixSMEMtoVectorWriteHazards()
1260 // (a) it is independent of the at risk SMEM (breaking chain), in fixSMEMtoVectorWriteHazards()
1262 // (b) it is dependent on the SMEM, in which case an appropriate in fixSMEMtoVectorWriteHazards()
1264 // SMEM instruction. in fixSMEMtoVectorWriteHazards()
1275 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), in fixSMEMtoVectorWriteHazards()
1276 TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL) in fixSMEMtoVectorWriteHazards()
1290 if (!MI->modifiesRegister(AMDGPU::EXEC, TRI)) in fixVcmpxExecWARHazard()
1302 if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) in fixVcmpxExecWARHazard()
1305 if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegBaseClass(MO.getReg()))) in fixVcmpxExecWARHazard()
1318 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), in fixVcmpxExecWARHazard()
1319 TII->get(AMDGPU::S_WAITCNT_DEPCTR)) in fixVcmpxExecWARHazard()
1400 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), in fixLdsBranchVmemWARHazard()
1401 TII->get(AMDGPU::S_WAITCNT_VSCNT)) in fixLdsBranchVmemWARHazard()
1414 const Register VDSTReg = VDST->getReg(); in fixLdsDirectVALUHazard()
1436 auto Count = ::getWaitStatesSince(IsHazardFn, MI->getParent(), in fixLdsDirectVALUHazard()
1437 std::next(MI->getReverseIterator()), 0, in fixLdsDirectVALUHazard()
1447 WaitVdstOp->setImm(std::min(Count, NoHazardWaitStates)); in fixLdsDirectVALUHazard()
1457 const Register VDSTReg = VDST->getReg(); in fixLdsDirectVMEMHazard()
1474 !TII.getNamedOperand(I, AMDGPU::OpName::waitvsrc)->getImm()); in fixLdsDirectVMEMHazard()
1482 TII.getNamedOperand(*MI, AMDGPU::OpName::waitvsrc)->setImm(0); in fixLdsDirectVMEMHazard()
1484 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), in fixLdsDirectVMEMHazard()
1502 for (const MachineOperand &Use : MI->explicit_uses()) { in fixVALUPartialForwardingHazard()
1512 // Va <- VALU [PreExecPos] in fixVALUPartialForwardingHazard()
1514 // Exec <- SALU [ExecPos] in fixVALUPartialForwardingHazard()
1516 // Vb <- VALU [PostExecPos] in fixVALUPartialForwardingHazard()
1605 int Intv2VALUs = (State.ExecPos - PostExecPos) - 1; in fixVALUPartialForwardingHazard()
1614 int Intv1VALUs = PreExecPos - State.ExecPos; in fixVALUPartialForwardingHazard()
1630 if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(), in fixVALUPartialForwardingHazard()
1631 std::next(MI->getReverseIterator()), Visited)) in fixVALUPartialForwardingHazard()
1634 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), in fixVALUPartialForwardingHazard()
1651 for (const MachineOperand &Use : MI->explicit_uses()) { in fixVALUTransUseHazard()
1657 // Va <- TRANS VALU in fixVALUTransUseHazard()
1708 if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(), in fixVALUTransUseHazard()
1709 std::next(MI->getReverseIterator()), Visited)) in fixVALUTransUseHazard()
1712 // Hazard is observed - insert a wait on va_dst counter to ensure hazard is in fixVALUTransUseHazard()
1714 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), in fixVALUTransUseHazard()
1735 TII->getNamedOperand(*MI, AMDGPU::OpName::src0)->getReg(); in fixWMMAHazards()
1737 TII->getNamedOperand(*MI, AMDGPU::OpName::src1)->getReg(); in fixWMMAHazards()
1740 TII->getNamedOperand(I, AMDGPU::OpName::vdst)->getReg(); in fixWMMAHazards()
1742 if (TRI->regsOverlap(PrevDstReg, CurSrc0Reg) || in fixWMMAHazards()
1743 TRI->regsOverlap(PrevDstReg, CurSrc1Reg)) { in fixWMMAHazards()
1752 TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg(); in fixWMMAHazards()
1753 if (TRI->regsOverlap(PrevDstReg, CurIndex)) in fixWMMAHazards()
1770 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32)); in fixWMMAHazards()
1780 switch (MI->getOpcode()) { in fixShift64HighRegBug()
1790 if (!Amt->isReg()) in fixShift64HighRegBug()
1793 Register AmtReg = Amt->getReg(); in fixShift64HighRegBug()
1796 if (!TRI.isVGPR(MRI, AmtReg) || ((AmtReg - AMDGPU::VGPR0) & 7) != 7) in fixShift64HighRegBug()
1803 bool OverlappedSrc = Src1->isReg() && TRI.regsOverlap(Src1->getReg(), AmtReg); in fixShift64HighRegBug()
1804 bool OverlappedDst = MI->modifiesRegister(AmtReg, &TRI); in fixShift64HighRegBug()
1808 Src1->getReg() == MI->getOperand(0).getReg()); in fixShift64HighRegBug()
1815 if (!MI->modifiesRegister(Reg, &TRI) && !MI->readsRegister(Reg, &TRI)) { in fixShift64HighRegBug()
1828 DebugLoc DL = MI->getDebugLoc(); in fixShift64HighRegBug()
1829 MachineBasicBlock *MBB = MI->getParent(); in fixShift64HighRegBug()
1838 .addDef(AmtReg - 1) in fixShift64HighRegBug()
1839 .addReg(AmtReg - 1, RegState::Undef) in fixShift64HighRegBug()
1848 BuildMI(*MBB, std::next(MI->getIterator()), DL, TII.get(AMDGPU::V_SWAP_B32), in fixShift64HighRegBug()
1854 BuildMI(*MBB, std::next(MI->getIterator()), DL, TII.get(AMDGPU::V_SWAP_B32), in fixShift64HighRegBug()
1855 AmtReg - 1) in fixShift64HighRegBug()
1858 .addReg(AmtReg - 1); in fixShift64HighRegBug()
1860 // Re-running hazard recognizer on the modified instruction is not necessary, in fixShift64HighRegBug()
1863 Amt->setReg(NewAmt); in fixShift64HighRegBug()
1864 Amt->setIsKill(false); in fixShift64HighRegBug()
1866 Amt->setIsUndef(); in fixShift64HighRegBug()
1868 MI->getOperand(0).setReg(NewReg); in fixShift64HighRegBug()
1870 Src1->setReg(NewReg); in fixShift64HighRegBug()
1871 Src1->setIsKill(false); in fixShift64HighRegBug()
1872 Src1->setIsUndef(); in fixShift64HighRegBug()
1888 const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset); in checkNSAtoVMEMHazard()
1889 if (!Offset || (Offset->getImm() & 6) == 0) in checkNSAtoVMEMHazard()
1896 return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA && in checkNSAtoVMEMHazard()
1897 TII->getInstSizeInBytes(I) >= 16; in checkNSAtoVMEMHazard()
1900 return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1); in checkNSAtoVMEMHazard()
1910 if (MI->getOpcode() != AMDGPU::S_DENORM_MODE) in checkFPAtomicToDenormModeHazard()
1938 return FPAtomicToDenormModeWaitStates - in checkFPAtomicToDenormModeHazard()
1954 if (!SIInstrInfo::isMFMA(*MI) || MFI->getOccupancy() < 2) in checkMFMAPadding()
1963 NeighborMFMALatency = this->getMFMAPipelineWaitStates(MI); in checkMFMAPadding()
1972 (NeighborMFMALatency * MFMAPaddingRatio / 100) - in checkMFMAPadding()
1980 unsigned Opc = MI->getOpcode(); in checkMAIHazards908()
1991 int WaitStatesNeededForUse = VALUWritesExecWaitStates - in checkMAIHazards908()
1996 for (const MachineOperand &Use : MI->explicit_uses()) { in checkMAIHazards908()
2002 int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates - in checkMAIHazards908()
2012 for (const MachineOperand &Op : MI->explicit_operands()) { in checkMAIHazards908()
2072 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; in checkMAIHazards908()
2094 WaitStatesNeededForUse = NeedWaitStates - in checkMAIHazards908()
2107 Register DstReg = MI->getOperand(0).getReg(); in checkMAIHazards908()
2114 Register Reg = TII.getNamedOperand(MI, AMDGPU::OpName::src2)->getReg(); in checkMAIHazards908()
2132 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince; in checkMAIHazards908()
2136 // Pad neighboring MFMA with noops for better inter-wave performance. in checkMAIHazards908()
2144 // 2 pass -> 3 in GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates()
2145 // 4 pass -> 5 in GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates()
2146 // 8 pass -> 9 in GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates()
2147 // 16 pass -> 17 in GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates()
2153 // 2 pass -> 2 in GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates()
2154 // 4 pass -> 4 in GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates()
2155 // 8 pass -> 8 in GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates()
2156 // 16 pass -> 16 in GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates()
2162 // 2 pass -> 4 in GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates()
2163 // 4 pass -> 6 in GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates()
2164 // 8 pass -> 10 in GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates()
2165 // 16 pass -> 18 in GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates()
2170 // 2 pass -> 5 in GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates()
2171 // 4 pass -> 7 in GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates()
2172 // 8 pass -> 11 in GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates()
2173 // 16 pass -> 19 in GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates()
2179 unsigned Opc = MI->getOpcode(); in checkMAIHazards90A()
2194 int WaitStatesNeededForUse = VALUWritesExecWaitStates - in checkMAIHazards90A()
2202 for (const MachineOperand &Use : MI->explicit_uses()) { in checkMAIHazards90A()
2237 WaitStatesNeededForUse = LegacyVALUNotDotWritesVGPRWaitStates - in checkMAIHazards90A()
2247 unsigned Opc1 = MI1->getOpcode(); in checkMAIHazards90A()
2356 WaitStatesNeededForUse = NeedWaitStates - NumWaitStates; in checkMAIHazards90A()
2363 // Pad neighboring MFMA with noops for better inter-wave performance. in checkMAIHazards90A()
2380 for (const MachineOperand &Op : MI->explicit_uses()) { in checkMAILdStHazards()
2390 int WaitStatesNeededForUse = AccVgprReadLdStWaitStates - in checkMAILdStHazards()
2408 WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates - in checkMAILdStHazards()
2417 // 2 pass -> 4 in GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates()
2418 // 4 pass -> 6 in GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates()
2419 // 8 pass -> 10 in GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates()
2420 // 16 pass -> 18 in GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates()
2425 // 2 pass -> 5 in GFX940_XDL_N_PassWriteVgprVALUWawWaitStates()
2426 // 4 pass -> 7 in GFX940_XDL_N_PassWriteVgprVALUWawWaitStates()
2427 // 8 pass -> 11 in GFX940_XDL_N_PassWriteVgprVALUWawWaitStates()
2428 // 16 pass -> 19 in GFX940_XDL_N_PassWriteVgprVALUWawWaitStates()
2433 // 2 pass -> 5 in GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates()
2434 // 4 pass -> 7 in GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates()
2435 // 8 pass -> 11 in GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates()
2436 // 16 pass -> 19 in GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates()
2441 // 2 pass -> 4 in GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates()
2442 // 4 pass -> 6 in GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates()
2443 // 8 pass -> 10 in GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates()
2444 // 16 pass -> 18 in GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates()
2452 auto IsDGEMMFn = [](const MachineInstr &MI) -> bool { in checkMAIVALUHazards()
2503 int SrcCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), in checkMAIVALUHazards()
2519 for (const MachineOperand &Use : MI->explicit_uses()) { in checkMAIVALUHazards()
2529 if (DOT->getOpcode() == MI->getOpcode()) { in checkMAIVALUHazards()
2530 if (&Use - &MI->getOperand(0) != SrcCIdx) in checkMAIVALUHazards()
2536 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; in checkMAIVALUHazards()
2541 // is a DGEMM instruction in-between a VALU and a VMEM instruction it in checkMAIVALUHazards()
2548 DMFMABetweenVALUWriteVMEMRead - in checkMAIVALUHazards()
2566 if (isDGEMM(MFMA->getOpcode())) { in checkMAIVALUHazards()
2603 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; in checkMAIVALUHazards()
2611 unsigned Opc = MI->getOpcode(); in checkMAIVALUHazards()
2617 int WaitStatesNeededForUse = DMFMAToFMA64WaitStates - in checkMAIVALUHazards()
2625 for (const MachineOperand &Def : MI->defs()) { in checkMAIVALUHazards()
2644 if (DOT && DOT->getOpcode() != MI->getOpcode()) in checkMAIVALUHazards()
2645 WaitStatesNeeded = std::max(WaitStatesNeeded, DotWriteDifferentVALUWrite - in checkMAIVALUHazards()
2655 if (isDGEMM(MFMA->getOpcode())) { in checkMAIVALUHazards()
2688 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; in checkMAIVALUHazards()
2706 if (!SrcC->isReg() || !TRI.regsOverlap(SrcC->getReg(), Reg)) in checkMAIVALUHazards()
2734 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse; in checkMAIVALUHazards()
2742 if (!SU->isInstr()) in ShouldPreferAnother()
2754 MachineInstr *MI = SU->getInstr(); in ShouldPreferAnother()
2781 if (!SDSTOp || !SDSTOp->isReg()) in fixVALUMaskWriteHazard()
2784 const Register HazardReg = SDSTOp->getReg(); in fixVALUMaskWriteHazard()
2822 return TRI.regsOverlap(SSRCOp->getReg(), HazardReg); in fixVALUMaskWriteHazard()
2878 auto NextMI = std::next(MI->getIterator()); in fixVALUMaskWriteHazard()
2881 BuildMI(*MI->getParent(), NextMI, MI->getDebugLoc(), in fixVALUMaskWriteHazard()
2886 if (MI->getOpcode() == AMDGPU::S_GETPC_B64) { in fixVALUMaskWriteHazard()
2888 while (NextMI != MI->getParent()->end() && in fixVALUMaskWriteHazard()
2889 NextMI->isBundledWithPred()) { in fixVALUMaskWriteHazard()
2890 for (auto &Operand : NextMI->operands()) { in fixVALUMaskWriteHazard()
2903 MachineBasicBlock &EntryMBB = MF->front(); in ensureEntrySetPrio()
2922 MachineBasicBlock *MBB = MI->getParent(); in fixRequiredExportPriority()
2923 MachineFunction *MF = MBB->getParent(); in fixRequiredExportPriority()
2924 auto CC = MF->getFunction().getCallingConv(); in fixRequiredExportPriority()
2939 auto It = MI->getIterator(); in fixRequiredExportPriority()
2940 switch (MI->getOpcode()) { in fixRequiredExportPriority()
2947 if (MF->getFrameInfo().hasCalls()) in fixRequiredExportPriority()
2952 auto &PrioOp = MI->getOperand(0); in fixRequiredExportPriority()
2955 (It != MBB->begin() && TII.isEXP(*std::prev(It))); in fixRequiredExportPriority()
2975 if (NextMI != MBB->end()) { in fixRequiredExportPriority()
2980 if (NextMI->getOpcode() == AMDGPU::S_SETPRIO && in fixRequiredExportPriority()
2981 NextMI->getOperand(0).getImm() == PostExportPriority) in fixRequiredExportPriority()
2983 EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM; in fixRequiredExportPriority()
2986 const DebugLoc &DL = MI->getDebugLoc(); in fixRequiredExportPriority()