1 //===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file contains the AMDGPU DAG scheduling 10 /// mutation to pair VOPD instructions back to back. It also contains 11 // subroutines useful in the creation of VOPD instructions 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "GCNVOPDUtils.h" 16 #include "AMDGPUSubtarget.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIInstrInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineOperand.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/MacroFusion.h" 28 #include "llvm/CodeGen/ScheduleDAG.h" 29 #include "llvm/CodeGen/ScheduleDAGMutation.h" 30 #include "llvm/CodeGen/TargetInstrInfo.h" 31 #include "llvm/MC/MCInst.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "gcn-vopd-utils" 36 37 bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, 38 const MachineInstr &FirstMI, 39 const MachineInstr &SecondMI) { 40 namespace VOPD = AMDGPU::VOPD; 41 42 const MachineFunction *MF = FirstMI.getMF(); 43 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 44 const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo()); 45 const MachineRegisterInfo &MRI = MF->getRegInfo(); 46 // Literals also count against scalar bus limit 47 SmallVector<const MachineOperand *> UniqueLiterals; 48 auto addLiteral = [&](const MachineOperand &Op) { 49 for (auto &Literal : UniqueLiterals) { 50 if (Literal->isIdenticalTo(Op)) 51 return; 52 } 53 UniqueLiterals.push_back(&Op); 54 }; 55 SmallVector<Register> UniqueScalarRegs; 56 assert([&]() -> bool { 57 for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); 58 MII != FirstMI.getParent()->instr_end(); ++MII) { 59 if (&*MII == &SecondMI) 60 return true; 61 } 62 return false; 63 }() && "Expected FirstMI to precede SecondMI"); 64 // Cannot pair dependent instructions 65 for (const auto &Use : SecondMI.uses()) 66 if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI)) 67 return false; 68 69 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) { 70 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI; 71 const MachineOperand &Operand = MI.getOperand(OperandIdx); 72 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg())) 73 return Operand.getReg(); 74 return Register(); 75 }; 76 77 auto InstInfo = 78 AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc()); 79 80 for (auto CompIdx : VOPD::COMPONENTS) { 81 const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI; 82 83 const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0); 84 if (Src0.isReg()) { 85 if (!TRI->isVectorRegister(MRI, Src0.getReg())) { 86 if (!is_contained(UniqueScalarRegs, Src0.getReg())) 87 UniqueScalarRegs.push_back(Src0.getReg()); 88 } 89 } else { 90 if (!TII.isInlineConstant(MI, VOPD::Component::SRC0)) 91 addLiteral(Src0); 92 } 93 94 if (InstInfo[CompIdx].hasMandatoryLiteral()) { 95 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex(); 96 addLiteral(MI.getOperand(CompOprIdx)); 97 } 98 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC)) 99 UniqueScalarRegs.push_back(AMDGPU::VCC_LO); 100 } 101 102 if (UniqueLiterals.size() > 1) 103 return false; 104 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2) 105 return false; 106 107 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. 108 bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 && 109 FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 && 110 SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32; 111 112 if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc)) 113 return false; 114 115 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI 116 << "\n\tY: " << SecondMI << "\n"); 117 return true; 118 } 119 120 /// Check if the instr pair, FirstMI and SecondMI, should be scheduled 121 /// together. Given SecondMI, when FirstMI is unspecified, then check if 122 /// SecondMI may be part of a fused pair at all. 123 static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, 124 const TargetSubtargetInfo &TSI, 125 const MachineInstr *FirstMI, 126 const MachineInstr &SecondMI) { 127 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII); 128 unsigned Opc2 = SecondMI.getOpcode(); 129 auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); 130 131 // One instruction case 132 if (!FirstMI) 133 return SecondCanBeVOPD.Y; 134 135 unsigned Opc = FirstMI->getOpcode(); 136 auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); 137 138 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) || 139 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X))) 140 return false; 141 142 return checkVOPDRegConstraints(STII, *FirstMI, SecondMI); 143 } 144 145 namespace { 146 /// Adapts design from MacroFusion 147 /// Puts valid candidate instructions back-to-back so they can easily 148 /// be turned into VOPD instructions 149 /// Greedily pairs instruction candidates. O(n^2) algorithm. 150 struct VOPDPairingMutation : ScheduleDAGMutation { 151 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer 152 153 VOPDPairingMutation( 154 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer 155 : shouldScheduleAdjacent(shouldScheduleAdjacent) {} 156 157 void apply(ScheduleDAGInstrs *DAG) override { 158 const TargetInstrInfo &TII = *DAG->TII; 159 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>(); 160 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) { 161 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n"); 162 return; 163 } 164 165 std::vector<SUnit>::iterator ISUI, JSUI; 166 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) { 167 const MachineInstr *IMI = ISUI->getInstr(); 168 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI)) 169 continue; 170 if (!hasLessThanNumFused(*ISUI, 2)) 171 continue; 172 173 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) { 174 if (JSUI->isBoundaryNode()) 175 continue; 176 const MachineInstr *JMI = JSUI->getInstr(); 177 if (!hasLessThanNumFused(*JSUI, 2) || 178 !shouldScheduleAdjacent(TII, ST, IMI, *JMI)) 179 continue; 180 if (fuseInstructionPair(*DAG, *ISUI, *JSUI)) 181 break; 182 } 183 } 184 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n"); 185 } 186 }; 187 } // namespace 188 189 std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() { 190 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent); 191 } 192