xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp (revision e3f4a63af63bea70bc86b6c790b14aa5ee99fcd0)
1 //===- GCNVOPDUtils.cpp - GCN VOPD Utils  ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file contains the AMDGPU DAG scheduling
10 /// mutation to pair VOPD instructions back to back. It also contains
11 //  subroutines useful in the creation of VOPD instructions
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "GCNVOPDUtils.h"
16 #include "AMDGPUSubtarget.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIInstrInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineOperand.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/MacroFusion.h"
28 #include "llvm/CodeGen/ScheduleDAG.h"
29 #include "llvm/CodeGen/ScheduleDAGMutation.h"
30 #include "llvm/CodeGen/TargetInstrInfo.h"
31 #include "llvm/MC/MCInst.h"
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "gcn-vopd-utils"
36 
37 bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
38                                    const MachineInstr &FirstMI,
39                                    const MachineInstr &SecondMI, bool IsVOPD3) {
40   namespace VOPD = AMDGPU::VOPD;
41 
42   const MachineFunction *MF = FirstMI.getMF();
43   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44 
45   if (IsVOPD3 && !ST.hasVOPD3())
46     return false;
47   if (!IsVOPD3 && (TII.isVOP3(FirstMI) || TII.isVOP3(SecondMI)))
48     return false;
49   if (TII.isDPP(FirstMI) || TII.isDPP(SecondMI))
50     return false;
51 
52   const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
53   const MachineRegisterInfo &MRI = MF->getRegInfo();
54   // Literals also count against scalar bus limit
55   SmallVector<const MachineOperand *> UniqueLiterals;
56   auto addLiteral = [&](const MachineOperand &Op) {
57     for (auto &Literal : UniqueLiterals) {
58       if (Literal->isIdenticalTo(Op))
59         return;
60     }
61     UniqueLiterals.push_back(&Op);
62   };
63   SmallVector<Register> UniqueScalarRegs;
64   assert([&]() -> bool {
65     for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
66          MII != FirstMI.getParent()->instr_end(); ++MII) {
67       if (&*MII == &SecondMI)
68         return true;
69     }
70     return false;
71   }() && "Expected FirstMI to precede SecondMI");
72   // Cannot pair dependent instructions
73   for (const auto &Use : SecondMI.uses())
74     if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
75       return false;
76 
77   auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
78     const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
79     const MachineOperand &Operand = MI.getOperand(OperandIdx);
80     if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
81       return Operand.getReg();
82     return Register();
83   };
84 
85   auto InstInfo =
86       AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());
87 
88   for (auto CompIdx : VOPD::COMPONENTS) {
89     const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
90 
91     const MachineOperand &Src0 = *TII.getNamedOperand(MI, AMDGPU::OpName::src0);
92     if (Src0.isReg()) {
93       if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
94         if (!is_contained(UniqueScalarRegs, Src0.getReg()))
95           UniqueScalarRegs.push_back(Src0.getReg());
96       }
97     } else if (!TII.isInlineConstant(Src0)) {
98       if (IsVOPD3)
99         return false;
100       addLiteral(Src0);
101     }
102 
103     if (InstInfo[CompIdx].hasMandatoryLiteral()) {
104       if (IsVOPD3)
105         return false;
106 
107       auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
108       addLiteral(MI.getOperand(CompOprIdx));
109     }
110     if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
111       UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
112 
113     if (IsVOPD3) {
114       for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
115         const MachineOperand *Src = TII.getNamedOperand(MI, OpName);
116         if (!Src)
117           continue;
118         if (OpName == AMDGPU::OpName::src2) {
119           if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::bitop3))
120             continue;
121           if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
122             UniqueScalarRegs.push_back(Src->getReg());
123             continue;
124           }
125         }
126         if (!Src->isReg() || !TRI->isVGPR(MRI, Src->getReg()))
127           return false;
128       }
129 
130       for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
131                           AMDGPU::OpName::op_sel}) {
132         if (TII.hasModifiersSet(MI, OpName))
133           return false;
134       }
135 
136       // Neg is allowed, other modifiers are not. NB: even though sext has the
137       // same value as neg, there are no combinable instructions with sext.
138       for (auto OpName :
139            {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
140             AMDGPU::OpName::src2_modifiers}) {
141         const MachineOperand *Mods = TII.getNamedOperand(MI, OpName);
142         if (Mods && (Mods->getImm() & ~SISrcMods::NEG))
143           return false;
144       }
145     }
146   }
147 
148   if (UniqueLiterals.size() > 1)
149     return false;
150   if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
151     return false;
152 
153   // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
154   // source-cache.
155   bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
156                  FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
157                  SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
158   bool AllowSameVGPR = ST.hasGFX1250Insts();
159 
160   if (InstInfo.hasInvalidOperand(getVRegIdx, *TRI, SkipSrc, AllowSameVGPR,
161                                  IsVOPD3))
162     return false;
163 
164   if (IsVOPD3) {
165     // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero.
166     if (AMDGPU::hasNamedOperand(SecondMI.getOpcode(), AMDGPU::OpName::bitop3)) {
167       const MachineOperand &Src2 =
168           *TII.getNamedOperand(SecondMI, AMDGPU::OpName::src2);
169       if (!Src2.isImm() || Src2.getImm())
170         return false;
171     }
172     if (AMDGPU::hasNamedOperand(FirstMI.getOpcode(), AMDGPU::OpName::bitop3)) {
173       const MachineOperand &Src2 =
174           *TII.getNamedOperand(FirstMI, AMDGPU::OpName::src2);
175       if (!Src2.isImm() || Src2.getImm())
176         return false;
177     }
178   }
179 
180   LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
181                     << "\n\tY: " << SecondMI << "\n");
182   return true;
183 }
184 
185 /// Check if the instr pair, FirstMI and SecondMI, should be scheduled
186 /// together. Given SecondMI, when FirstMI is unspecified, then check if
187 /// SecondMI may be part of a fused pair at all.
188 static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
189                                        const TargetSubtargetInfo &TSI,
190                                        const MachineInstr *FirstMI,
191                                        const MachineInstr &SecondMI) {
192   const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
193   const GCNSubtarget &ST = STII.getSubtarget();
194   unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
195   unsigned Opc2 = SecondMI.getOpcode();
196 
197   const auto checkVOPD = [&](bool VOPD3) -> bool {
198     auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
199 
200     // One instruction case
201     if (!FirstMI)
202       return SecondCanBeVOPD.Y || SecondCanBeVOPD.X;
203 
204     unsigned Opc = FirstMI->getOpcode();
205     auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
206 
207     if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
208           (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
209       return false;
210 
211     return checkVOPDRegConstraints(STII, *FirstMI, SecondMI, VOPD3);
212   };
213 
214   return checkVOPD(false) || (ST.hasVOPD3() && checkVOPD(true));
215 }
216 
217 namespace {
218 /// Adapts design from MacroFusion
219 /// Puts valid candidate instructions back-to-back so they can easily
220 /// be turned into VOPD instructions
221 /// Greedily pairs instruction candidates. O(n^2) algorithm.
222 struct VOPDPairingMutation : ScheduleDAGMutation {
223   MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
224 
225   VOPDPairingMutation(
226       MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
227       : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
228 
229   void apply(ScheduleDAGInstrs *DAG) override {
230     const TargetInstrInfo &TII = *DAG->TII;
231     const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
232     if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
233       LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
234       return;
235     }
236 
237     std::vector<SUnit>::iterator ISUI, JSUI;
238     for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
239       const MachineInstr *IMI = ISUI->getInstr();
240       if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
241         continue;
242       if (!hasLessThanNumFused(*ISUI, 2))
243         continue;
244 
245       for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
246         if (JSUI->isBoundaryNode())
247           continue;
248         const MachineInstr *JMI = JSUI->getInstr();
249         if (!hasLessThanNumFused(*JSUI, 2) ||
250             !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
251           continue;
252         if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
253           break;
254       }
255     }
256     LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
257   }
258 };
259 } // namespace
260 
261 std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
262   return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
263 }
264