xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp (revision d9a42747950146bf03cda7f6e25d219253f8a57a)
1 //===- GCNCreateVOPD.cpp - Create VOPD Instructions ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Combine VALU pairs into VOPD instructions
11 /// Only works on wave32
12 /// Has register requirements, we reject creating VOPD if the requirements are
13 /// not met.
14 /// shouldCombineVOPD mutator in postRA machine scheduler puts candidate
15 /// instructions for VOPD back-to-back
16 ///
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "AMDGPU.h"
21 #include "GCNSubtarget.h"
22 #include "GCNVOPDUtils.h"
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "SIInstrInfo.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/ADT/StringMap.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/Debug.h"
34 #include <utility>
35 
36 #define DEBUG_TYPE "gcn-create-vopd"
37 STATISTIC(NumVOPDCreated, "Number of VOPD Insts Created.");
38 
39 using namespace llvm;
40 
41 namespace {
42 
43 class GCNCreateVOPD : public MachineFunctionPass {
44 private:
45 public:
46   static char ID;
47   const GCNSubtarget *ST = nullptr;
48 
49   GCNCreateVOPD() : MachineFunctionPass(ID) {}
50 
51   void getAnalysisUsage(AnalysisUsage &AU) const override {
52     AU.setPreservesCFG();
53     MachineFunctionPass::getAnalysisUsage(AU);
54   }
55 
56   StringRef getPassName() const override {
57     return "GCN Create VOPD Instructions";
58   }
59 
60   bool doReplace(const SIInstrInfo *SII,
61                  std::pair<MachineInstr *, MachineInstr *> &Pair) {
62     auto *FirstMI = Pair.first;
63     auto *SecondMI = Pair.second;
64     unsigned Opc1 = FirstMI->getOpcode();
65     unsigned Opc2 = SecondMI->getOpcode();
66     int NewOpcode = AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
67                                         AMDGPU::getVOPDOpcode(Opc2));
68     assert(NewOpcode != -1 &&
69            "Should have previously determined this as a possible VOPD\n");
70 
71     auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI,
72                             FirstMI->getDebugLoc(), SII->get(NewOpcode))
73                         .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
74     VOPDInst.add(FirstMI->getOperand(0))
75         .add(SecondMI->getOperand(0))
76         .add(FirstMI->getOperand(1));
77 
78     switch (Opc1) {
79     case AMDGPU::V_MOV_B32_e32:
80       break;
81     case AMDGPU::V_FMAMK_F32:
82     case AMDGPU::V_FMAAK_F32:
83       VOPDInst.add(FirstMI->getOperand(2));
84       VOPDInst.add(FirstMI->getOperand(3));
85       break;
86     default:
87       VOPDInst.add(FirstMI->getOperand(2));
88       break;
89     }
90 
91     VOPDInst.add(SecondMI->getOperand(1));
92 
93     switch (Opc2) {
94     case AMDGPU::V_MOV_B32_e32:
95       break;
96     case AMDGPU::V_FMAMK_F32:
97     case AMDGPU::V_FMAAK_F32:
98       VOPDInst.add(SecondMI->getOperand(2));
99       VOPDInst.add(SecondMI->getOperand(3));
100       break;
101     default:
102       VOPDInst.add(SecondMI->getOperand(2));
103       break;
104     }
105 
106     VOPDInst.copyImplicitOps(*FirstMI);
107     VOPDInst.copyImplicitOps(*SecondMI);
108 
109     LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
110                       << *Pair.first << "\tY: " << *Pair.second << "\n");
111     FirstMI->eraseFromParent();
112     SecondMI->eraseFromParent();
113     ++NumVOPDCreated;
114     return true;
115   }
116 
117   bool runOnMachineFunction(MachineFunction &MF) override {
118     if (skipFunction(MF.getFunction()))
119       return false;
120     ST = &MF.getSubtarget<GCNSubtarget>();
121     if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
122       return false;
123     LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
124 
125     const SIInstrInfo *SII = ST->getInstrInfo();
126     bool Changed = false;
127 
128     SmallVector<std::pair<MachineInstr *, MachineInstr *>> ReplaceCandidates;
129 
130     for (auto &MBB : MF) {
131       auto MII = MBB.begin(), E = MBB.end();
132       while (MII != E) {
133         auto *FirstMI = &*MII;
134         MII = next_nodbg(MII, MBB.end());
135         if (MII == MBB.end())
136           break;
137         if (FirstMI->isDebugInstr())
138           continue;
139         auto *SecondMI = &*MII;
140         unsigned Opc = FirstMI->getOpcode();
141         unsigned Opc2 = SecondMI->getOpcode();
142         llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
143         llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
144         std::pair<MachineInstr *, MachineInstr *> Pair;
145 
146         if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
147           Pair = {FirstMI, SecondMI};
148         else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
149           Pair = {SecondMI, FirstMI};
150         else
151           continue;
152         // checkVOPDRegConstraints cares about program order, but doReplace
153         // cares about X-Y order in the constituted VOPD
154         if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
155           ReplaceCandidates.push_back(Pair);
156           ++MII;
157         }
158       }
159     }
160     for (auto &Pair : ReplaceCandidates) {
161       Changed |= doReplace(SII, Pair);
162     }
163 
164     return Changed;
165   }
166 };
167 
168 } // namespace
169 
170 char GCNCreateVOPD::ID = 0;
171 
172 char &llvm::GCNCreateVOPDID = GCNCreateVOPD::ID;
173 
174 INITIALIZE_PASS(GCNCreateVOPD, DEBUG_TYPE, "GCN Create VOPD Instructions",
175                 false, false)
176