xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after register banks are known.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/CodeGen/GlobalISel/Combiner.h"
20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
23 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/TargetPassConfig.h"
26 #include "llvm/Target/TargetMachine.h"
27 #define DEBUG_TYPE "amdgpu-regbank-combiner"
28 
29 using namespace llvm;
30 using namespace MIPatternMatch;
31 
32 class AMDGPURegBankCombinerHelper {
33 protected:
34   MachineIRBuilder &B;
35   MachineFunction &MF;
36   MachineRegisterInfo &MRI;
37   const RegisterBankInfo &RBI;
38   const TargetRegisterInfo &TRI;
39   CombinerHelper &Helper;
40 
41 public:
42   AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
43       : B(B), MF(B.getMF()), MRI(*B.getMRI()),
44         RBI(*MF.getSubtarget().getRegBankInfo()),
45         TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
46 
47   bool isVgprRegBank(Register Reg);
48 
49   struct MinMaxMedOpc {
50     unsigned Min, Max, Med;
51   };
52 
53   struct Med3MatchInfo {
54     unsigned Opc;
55     Register Val0, Val1, Val2;
56   };
57 
58   MinMaxMedOpc getMinMaxPair(unsigned Opc);
59 
60   template <class m_Cst, typename CstTy>
61   bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
62                 Register &Val, CstTy &K0, CstTy &K1);
63 
64   bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
65   void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
66 };
67 
68 bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
69   return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
70 }
71 
72 AMDGPURegBankCombinerHelper::MinMaxMedOpc
73 AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
74   switch (Opc) {
75   default:
76     llvm_unreachable("Unsupported opcode");
77   case AMDGPU::G_SMAX:
78   case AMDGPU::G_SMIN:
79     return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
80   case AMDGPU::G_UMAX:
81   case AMDGPU::G_UMIN:
82     return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
83   }
84 }
85 
86 template <class m_Cst, typename CstTy>
87 bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
88                                            MachineRegisterInfo &MRI,
89                                            MinMaxMedOpc MMMOpc, Register &Val,
90                                            CstTy &K0, CstTy &K1) {
91   // 4 operand commutes of: min(max(Val, K0), K1).
92   // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
93   // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
94   // 4 operand commutes of: max(min(Val, K1), K0).
95   // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
96   // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
97   return mi_match(
98       MI, MRI,
99       m_any_of(
100           m_CommutativeBinOp(
101               MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
102               m_Cst(K1)),
103           m_CommutativeBinOp(
104               MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
105               m_Cst(K0))));
106 }
107 
108 bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
109     MachineInstr &MI, Med3MatchInfo &MatchInfo) {
110   Register Dst = MI.getOperand(0).getReg();
111   if (!isVgprRegBank(Dst))
112     return false;
113 
114   if (MRI.getType(Dst).isVector())
115     return false;
116 
117   MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
118   Register Val;
119   Optional<ValueAndVReg> K0, K1;
120   // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
121   if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
122     return false;
123 
124   if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
125     return false;
126   if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
127     return false;
128 
129   MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
130   return true;
131 }
132 
133 void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
134                                             Med3MatchInfo &MatchInfo) {
135   B.setInstrAndDebugLoc(MI);
136   B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
137                {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
138   MI.eraseFromParent();
139 }
140 
141 class AMDGPURegBankCombinerHelperState {
142 protected:
143   CombinerHelper &Helper;
144   AMDGPURegBankCombinerHelper &RegBankHelper;
145 
146 public:
147   AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
148                                    AMDGPURegBankCombinerHelper &RegBankHelper)
149       : Helper(Helper), RegBankHelper(RegBankHelper) {}
150 };
151 
152 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
153 #include "AMDGPUGenRegBankGICombiner.inc"
154 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
155 
156 namespace {
157 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
158 #include "AMDGPUGenRegBankGICombiner.inc"
159 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
160 
161 class AMDGPURegBankCombinerInfo final : public CombinerInfo {
162   GISelKnownBits *KB;
163   MachineDominatorTree *MDT;
164 
165 public:
166   AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
167 
168   AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
169                                   const AMDGPULegalizerInfo *LI,
170                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
171       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
172                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
173         KB(KB), MDT(MDT) {
174     if (!GeneratedRuleCfg.parseCommandLineOption())
175       report_fatal_error("Invalid rule identifier");
176   }
177 
178   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
179                MachineIRBuilder &B) const override;
180 };
181 
182 bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
183                                               MachineInstr &MI,
184                                               MachineIRBuilder &B) const {
185   CombinerHelper Helper(Observer, B, KB, MDT);
186   AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
187   AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
188                                            RegBankHelper);
189 
190   if (Generated.tryCombineAll(Observer, MI, B))
191     return true;
192 
193   return false;
194 }
195 
196 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
197 #include "AMDGPUGenRegBankGICombiner.inc"
198 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
199 
200 // Pass boilerplate
201 // ================
202 
203 class AMDGPURegBankCombiner : public MachineFunctionPass {
204 public:
205   static char ID;
206 
207   AMDGPURegBankCombiner(bool IsOptNone = false);
208 
209   StringRef getPassName() const override {
210     return "AMDGPURegBankCombiner";
211   }
212 
213   bool runOnMachineFunction(MachineFunction &MF) override;
214 
215   void getAnalysisUsage(AnalysisUsage &AU) const override;
216 private:
217   bool IsOptNone;
218 };
219 } // end anonymous namespace
220 
221 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
222   AU.addRequired<TargetPassConfig>();
223   AU.setPreservesCFG();
224   getSelectionDAGFallbackAnalysisUsage(AU);
225   AU.addRequired<GISelKnownBitsAnalysis>();
226   AU.addPreserved<GISelKnownBitsAnalysis>();
227   if (!IsOptNone) {
228     AU.addRequired<MachineDominatorTree>();
229     AU.addPreserved<MachineDominatorTree>();
230   }
231   MachineFunctionPass::getAnalysisUsage(AU);
232 }
233 
234 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
235   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
236   initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
237 }
238 
239 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
240   if (MF.getProperties().hasProperty(
241           MachineFunctionProperties::Property::FailedISel))
242     return false;
243   auto *TPC = &getAnalysis<TargetPassConfig>();
244   const Function &F = MF.getFunction();
245   bool EnableOpt =
246       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
247 
248   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
249   const AMDGPULegalizerInfo *LI
250     = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
251 
252   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
253   MachineDominatorTree *MDT =
254       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
255   AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
256                                          F.hasMinSize(), LI, KB, MDT);
257   Combiner C(PCInfo, TPC);
258   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
259 }
260 
261 char AMDGPURegBankCombiner::ID = 0;
262 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
263                       "Combine AMDGPU machine instrs after regbankselect",
264                       false, false)
265 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
266 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
267 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
268                     "Combine AMDGPU machine instrs after regbankselect", false,
269                     false)
270 
271 namespace llvm {
272 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) {
273   return new AMDGPURegBankCombiner(IsOptNone);
274 }
275 } // end namespace llvm
276