1 //===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file contains the AMDGPU DAG scheduling 10 /// mutation to pair VOPD instructions back to back. It also contains 11 // subroutines useful in the creation of VOPD instructions 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "GCNVOPDUtils.h" 16 #include "AMDGPUSubtarget.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIInstrInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineOperand.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/MacroFusion.h" 28 #include "llvm/CodeGen/ScheduleDAG.h" 29 #include "llvm/CodeGen/ScheduleDAGMutation.h" 30 #include "llvm/CodeGen/TargetInstrInfo.h" 31 #include "llvm/MC/MCInst.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "gcn-vopd-utils" 36 37 bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, 38 const MachineInstr &FirstMI, 39 const MachineInstr &SecondMI) { 40 namespace VOPD = AMDGPU::VOPD; 41 42 const MachineFunction *MF = FirstMI.getMF(); 43 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 44 const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo()); 45 const MachineRegisterInfo &MRI = MF->getRegInfo(); 46 // Literals also count against scalar bus limit 47 SmallVector<const MachineOperand *> UniqueLiterals; 48 auto addLiteral = [&](const MachineOperand &Op) { 49 for (auto &Literal : UniqueLiterals) { 50 if (Literal->isIdenticalTo(Op)) 51 return; 52 } 53 UniqueLiterals.push_back(&Op); 54 }; 55 SmallVector<Register> UniqueScalarRegs; 56 assert([&]() -> bool { 57 for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); 58 MII != FirstMI.getParent()->instr_end(); ++MII) { 59 if (&*MII == &SecondMI) 60 return true; 61 } 62 return false; 63 }() && "Expected FirstMI to precede SecondMI"); 64 // Cannot pair dependent instructions 65 for (const auto &Use : SecondMI.uses()) 66 if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI)) 67 return false; 68 69 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) { 70 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI; 71 const MachineOperand &Operand = MI.getOperand(OperandIdx); 72 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg())) 73 return Operand.getReg(); 74 return Register(); 75 }; 76 77 auto InstInfo = 78 AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc()); 79 80 for (auto CompIdx : VOPD::COMPONENTS) { 81 const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI; 82 83 const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0); 84 if (Src0.isReg()) { 85 if (!TRI->isVectorRegister(MRI, Src0.getReg())) { 86 if (!is_contained(UniqueScalarRegs, Src0.getReg())) 87 UniqueScalarRegs.push_back(Src0.getReg()); 88 } 89 } else { 90 if (!TII.isInlineConstant(MI, VOPD::Component::SRC0)) 91 addLiteral(Src0); 92 } 93 94 if (InstInfo[CompIdx].hasMandatoryLiteral()) { 95 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex(); 96 addLiteral(MI.getOperand(CompOprIdx)); 97 } 98 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC)) 99 UniqueScalarRegs.push_back(AMDGPU::VCC_LO); 100 } 101 102 if (UniqueLiterals.size() > 1) 103 return false; 104 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2) 105 return false; 106 if (InstInfo.hasInvalidOperand(getVRegIdx)) 107 return false; 108 109 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI 110 << "\n\tY: " << SecondMI << "\n"); 111 return true; 112 } 113 114 /// Check if the instr pair, FirstMI and SecondMI, should be scheduled 115 /// together. Given SecondMI, when FirstMI is unspecified, then check if 116 /// SecondMI may be part of a fused pair at all. 117 static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, 118 const TargetSubtargetInfo &TSI, 119 const MachineInstr *FirstMI, 120 const MachineInstr &SecondMI) { 121 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII); 122 unsigned Opc2 = SecondMI.getOpcode(); 123 auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); 124 125 // One instruction case 126 if (!FirstMI) 127 return SecondCanBeVOPD.Y; 128 129 unsigned Opc = FirstMI->getOpcode(); 130 auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); 131 132 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) || 133 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X))) 134 return false; 135 136 return checkVOPDRegConstraints(STII, *FirstMI, SecondMI); 137 } 138 139 namespace { 140 /// Adapts design from MacroFusion 141 /// Puts valid candidate instructions back-to-back so they can easily 142 /// be turned into VOPD instructions 143 /// Greedily pairs instruction candidates. O(n^2) algorithm. 144 struct VOPDPairingMutation : ScheduleDAGMutation { 145 ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer 146 147 VOPDPairingMutation( 148 ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer 149 : shouldScheduleAdjacent(shouldScheduleAdjacent) {} 150 151 void apply(ScheduleDAGInstrs *DAG) override { 152 const TargetInstrInfo &TII = *DAG->TII; 153 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>(); 154 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) { 155 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n"); 156 return; 157 } 158 159 std::vector<SUnit>::iterator ISUI, JSUI; 160 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) { 161 const MachineInstr *IMI = ISUI->getInstr(); 162 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI)) 163 continue; 164 if (!hasLessThanNumFused(*ISUI, 2)) 165 continue; 166 167 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) { 168 if (JSUI->isBoundaryNode()) 169 continue; 170 const MachineInstr *JMI = JSUI->getInstr(); 171 if (!hasLessThanNumFused(*JSUI, 2) || 172 !shouldScheduleAdjacent(TII, ST, IMI, *JMI)) 173 continue; 174 if (fuseInstructionPair(*DAG, *ISUI, *JSUI)) 175 break; 176 } 177 } 178 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n"); 179 } 180 }; 181 } // namespace 182 183 std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() { 184 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent); 185 } 186