xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1 //===- GCNVOPDUtils.cpp - GCN VOPD Utils  ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file contains the AMDGPU DAG scheduling
10 /// mutation to pair VOPD instructions back to back. It also contains
11 //  subroutines useful in the creation of VOPD instructions
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "GCNVOPDUtils.h"
16 #include "AMDGPUSubtarget.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIInstrInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineOperand.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/MacroFusion.h"
28 #include "llvm/CodeGen/ScheduleDAG.h"
29 #include "llvm/CodeGen/ScheduleDAGMutation.h"
30 #include "llvm/CodeGen/TargetInstrInfo.h"
31 #include "llvm/MC/MCInst.h"
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "gcn-vopd-utils"
36 
37 bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
38                                    const MachineInstr &FirstMI,
39                                    const MachineInstr &SecondMI) {
40   namespace VOPD = AMDGPU::VOPD;
41 
42   const MachineFunction *MF = FirstMI.getMF();
43   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44   const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
45   const MachineRegisterInfo &MRI = MF->getRegInfo();
46   // Literals also count against scalar bus limit
47   SmallVector<const MachineOperand *> UniqueLiterals;
48   auto addLiteral = [&](const MachineOperand &Op) {
49     for (auto &Literal : UniqueLiterals) {
50       if (Literal->isIdenticalTo(Op))
51         return;
52     }
53     UniqueLiterals.push_back(&Op);
54   };
55   SmallVector<Register> UniqueScalarRegs;
56   assert([&]() -> bool {
57     for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
58          MII != FirstMI.getParent()->instr_end(); ++MII) {
59       if (&*MII == &SecondMI)
60         return true;
61     }
62     return false;
63   }() && "Expected FirstMI to precede SecondMI");
64   // Cannot pair dependent instructions
65   for (const auto &Use : SecondMI.uses())
66     if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg()))
67       return false;
68 
69   auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
70     const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
71     const MachineOperand &Operand = MI.getOperand(OperandIdx);
72     if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
73       return Operand.getReg();
74     return Register();
75   };
76 
77   auto InstInfo =
78       AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());
79 
80   for (auto CompIdx : VOPD::COMPONENTS) {
81     const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
82 
83     const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
84     if (Src0.isReg()) {
85       if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
86         if (!is_contained(UniqueScalarRegs, Src0.getReg()))
87           UniqueScalarRegs.push_back(Src0.getReg());
88       }
89     } else {
90       if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))
91         addLiteral(Src0);
92     }
93 
94     if (InstInfo[CompIdx].hasMandatoryLiteral()) {
95       auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
96       addLiteral(MI.getOperand(CompOprIdx));
97     }
98     if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
99       UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
100   }
101 
102   if (UniqueLiterals.size() > 1)
103     return false;
104   if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
105     return false;
106   if (InstInfo.hasInvalidOperand(getVRegIdx))
107     return false;
108 
109   LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
110                     << "\n\tY: " << SecondMI << "\n");
111   return true;
112 }
113 
114 /// Check if the instr pair, FirstMI and SecondMI, should be scheduled
115 /// together. Given SecondMI, when FirstMI is unspecified, then check if
116 /// SecondMI may be part of a fused pair at all.
117 static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
118                                        const TargetSubtargetInfo &TSI,
119                                        const MachineInstr *FirstMI,
120                                        const MachineInstr &SecondMI) {
121   const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
122   unsigned Opc2 = SecondMI.getOpcode();
123   auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
124 
125   // One instruction case
126   if (!FirstMI)
127     return SecondCanBeVOPD.Y;
128 
129   unsigned Opc = FirstMI->getOpcode();
130   auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
131 
132   if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
133         (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
134     return false;
135 
136   return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
137 }
138 
139 namespace {
140 /// Adapts design from MacroFusion
141 /// Puts valid candidate instructions back-to-back so they can easily
142 /// be turned into VOPD instructions
143 /// Greedily pairs instruction candidates. O(n^2) algorithm.
144 struct VOPDPairingMutation : ScheduleDAGMutation {
145   ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer
146 
147   VOPDPairingMutation(
148       ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer
149       : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
150 
151   void apply(ScheduleDAGInstrs *DAG) override {
152     const TargetInstrInfo &TII = *DAG->TII;
153     const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
154     if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
155       LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
156       return;
157     }
158 
159     std::vector<SUnit>::iterator ISUI, JSUI;
160     for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
161       const MachineInstr *IMI = ISUI->getInstr();
162       if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
163         continue;
164       if (!hasLessThanNumFused(*ISUI, 2))
165         continue;
166 
167       for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
168         if (JSUI->isBoundaryNode())
169           continue;
170         const MachineInstr *JMI = JSUI->getInstr();
171         if (!hasLessThanNumFused(*JSUI, 2) ||
172             !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
173           continue;
174         if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
175           break;
176       }
177     }
178     LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
179   }
180 };
181 } // namespace
182 
183 std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
184   return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
185 }
186