1 //===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file contains the AMDGPU DAG scheduling 10 /// mutation to pair VOPD instructions back to back. It also contains 11 // subroutines useful in the creation of VOPD instructions 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "GCNVOPDUtils.h" 16 #include "AMDGPUSubtarget.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIInstrInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineOperand.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/MacroFusion.h" 28 #include "llvm/CodeGen/ScheduleDAG.h" 29 #include "llvm/CodeGen/ScheduleDAGMutation.h" 30 #include "llvm/CodeGen/TargetInstrInfo.h" 31 #include "llvm/MC/MCInst.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "gcn-vopd-utils" 36 37 bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, 38 const MachineInstr &FirstMI, 39 const MachineInstr &SecondMI, bool IsVOPD3) { 40 namespace VOPD = AMDGPU::VOPD; 41 42 const MachineFunction *MF = FirstMI.getMF(); 43 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 44 45 if (IsVOPD3 && !ST.hasVOPD3()) 46 return false; 47 if (!IsVOPD3 && (TII.isVOP3(FirstMI) || TII.isVOP3(SecondMI))) 48 return false; 49 if (TII.isDPP(FirstMI) || TII.isDPP(SecondMI)) 50 return false; 51 52 const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo()); 53 const MachineRegisterInfo &MRI = MF->getRegInfo(); 54 // Literals also count against scalar bus limit 55 SmallVector<const MachineOperand *> UniqueLiterals; 56 auto addLiteral = [&](const MachineOperand &Op) { 57 for (auto &Literal : UniqueLiterals) { 58 if (Literal->isIdenticalTo(Op)) 59 return; 60 } 61 UniqueLiterals.push_back(&Op); 62 }; 63 SmallVector<Register> UniqueScalarRegs; 64 assert([&]() -> bool { 65 for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); 66 MII != FirstMI.getParent()->instr_end(); ++MII) { 67 if (&*MII == &SecondMI) 68 return true; 69 } 70 return false; 71 }() && "Expected FirstMI to precede SecondMI"); 72 // Cannot pair dependent instructions 73 for (const auto &Use : SecondMI.uses()) 74 if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI)) 75 return false; 76 77 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) { 78 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI; 79 const MachineOperand &Operand = MI.getOperand(OperandIdx); 80 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg())) 81 return Operand.getReg(); 82 return Register(); 83 }; 84 85 auto InstInfo = 86 AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc()); 87 88 for (auto CompIdx : VOPD::COMPONENTS) { 89 const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI; 90 91 const MachineOperand &Src0 = *TII.getNamedOperand(MI, AMDGPU::OpName::src0); 92 if (Src0.isReg()) { 93 if (!TRI->isVectorRegister(MRI, Src0.getReg())) { 94 if (!is_contained(UniqueScalarRegs, Src0.getReg())) 95 UniqueScalarRegs.push_back(Src0.getReg()); 96 } 97 } else if (!TII.isInlineConstant(Src0)) { 98 if (IsVOPD3) 99 return false; 100 addLiteral(Src0); 101 } 102 103 if (InstInfo[CompIdx].hasMandatoryLiteral()) { 104 if (IsVOPD3) 105 return false; 106 107 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex(); 108 addLiteral(MI.getOperand(CompOprIdx)); 109 } 110 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC)) 111 UniqueScalarRegs.push_back(AMDGPU::VCC_LO); 112 113 if (IsVOPD3) { 114 for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) { 115 const MachineOperand *Src = TII.getNamedOperand(MI, OpName); 116 if (!Src) 117 continue; 118 if (OpName == AMDGPU::OpName::src2) { 119 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::bitop3)) 120 continue; 121 if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) { 122 UniqueScalarRegs.push_back(Src->getReg()); 123 continue; 124 } 125 } 126 if (!Src->isReg() || !TRI->isVGPR(MRI, Src->getReg())) 127 return false; 128 } 129 130 for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod, 131 AMDGPU::OpName::op_sel}) { 132 if (TII.hasModifiersSet(MI, OpName)) 133 return false; 134 } 135 136 // Neg is allowed, other modifiers are not. NB: even though sext has the 137 // same value as neg, there are no combinable instructions with sext. 138 for (auto OpName : 139 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers, 140 AMDGPU::OpName::src2_modifiers}) { 141 const MachineOperand *Mods = TII.getNamedOperand(MI, OpName); 142 if (Mods && (Mods->getImm() & ~SISrcMods::NEG)) 143 return false; 144 } 145 } 146 } 147 148 if (UniqueLiterals.size() > 1) 149 return false; 150 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2) 151 return false; 152 153 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 154 // source-cache. 155 bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 && 156 FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 && 157 SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32; 158 bool AllowSameVGPR = ST.hasGFX1250Insts(); 159 160 if (InstInfo.hasInvalidOperand(getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, 161 IsVOPD3)) 162 return false; 163 164 if (IsVOPD3) { 165 // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero. 166 if (AMDGPU::hasNamedOperand(SecondMI.getOpcode(), AMDGPU::OpName::bitop3)) { 167 const MachineOperand &Src2 = 168 *TII.getNamedOperand(SecondMI, AMDGPU::OpName::src2); 169 if (!Src2.isImm() || Src2.getImm()) 170 return false; 171 } 172 if (AMDGPU::hasNamedOperand(FirstMI.getOpcode(), AMDGPU::OpName::bitop3)) { 173 const MachineOperand &Src2 = 174 *TII.getNamedOperand(FirstMI, AMDGPU::OpName::src2); 175 if (!Src2.isImm() || Src2.getImm()) 176 return false; 177 } 178 } 179 180 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI 181 << "\n\tY: " << SecondMI << "\n"); 182 return true; 183 } 184 185 /// Check if the instr pair, FirstMI and SecondMI, should be scheduled 186 /// together. Given SecondMI, when FirstMI is unspecified, then check if 187 /// SecondMI may be part of a fused pair at all. 188 static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, 189 const TargetSubtargetInfo &TSI, 190 const MachineInstr *FirstMI, 191 const MachineInstr &SecondMI) { 192 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII); 193 const GCNSubtarget &ST = STII.getSubtarget(); 194 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST); 195 unsigned Opc2 = SecondMI.getOpcode(); 196 197 const auto checkVOPD = [&](bool VOPD3) -> bool { 198 auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3); 199 200 // One instruction case 201 if (!FirstMI) 202 return SecondCanBeVOPD.Y || SecondCanBeVOPD.X; 203 204 unsigned Opc = FirstMI->getOpcode(); 205 auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3); 206 207 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) || 208 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X))) 209 return false; 210 211 return checkVOPDRegConstraints(STII, *FirstMI, SecondMI, VOPD3); 212 }; 213 214 return checkVOPD(false) || (ST.hasVOPD3() && checkVOPD(true)); 215 } 216 217 namespace { 218 /// Adapts design from MacroFusion 219 /// Puts valid candidate instructions back-to-back so they can easily 220 /// be turned into VOPD instructions 221 /// Greedily pairs instruction candidates. O(n^2) algorithm. 222 struct VOPDPairingMutation : ScheduleDAGMutation { 223 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer 224 225 VOPDPairingMutation( 226 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer 227 : shouldScheduleAdjacent(shouldScheduleAdjacent) {} 228 229 void apply(ScheduleDAGInstrs *DAG) override { 230 const TargetInstrInfo &TII = *DAG->TII; 231 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>(); 232 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) { 233 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n"); 234 return; 235 } 236 237 std::vector<SUnit>::iterator ISUI, JSUI; 238 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) { 239 const MachineInstr *IMI = ISUI->getInstr(); 240 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI)) 241 continue; 242 if (!hasLessThanNumFused(*ISUI, 2)) 243 continue; 244 245 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) { 246 if (JSUI->isBoundaryNode()) 247 continue; 248 const MachineInstr *JMI = JSUI->getInstr(); 249 if (!hasLessThanNumFused(*JSUI, 2) || 250 !shouldScheduleAdjacent(TII, ST, IMI, *JMI)) 251 continue; 252 if (fuseInstructionPair(*DAG, *ISUI, *JSUI)) 253 break; 254 } 255 } 256 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n"); 257 } 258 }; 259 } // namespace 260 261 std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() { 262 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent); 263 } 264