xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1*5ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
2*5ffd83dbSDimitry Andric //
3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*5ffd83dbSDimitry Andric //
7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
8*5ffd83dbSDimitry Andric //
9*5ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level,
10*5ffd83dbSDimitry Andric // after the legalizer.
11*5ffd83dbSDimitry Andric //
12*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
13*5ffd83dbSDimitry Andric 
14*5ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h"
15*5ffd83dbSDimitry Andric #include "AMDGPULegalizerInfo.h"
16*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h"
17*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
18*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
19*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
20*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
21*5ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
22*5ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
23*5ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
24*5ffd83dbSDimitry Andric #include "llvm/Support/Debug.h"
25*5ffd83dbSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
26*5ffd83dbSDimitry Andric 
27*5ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
28*5ffd83dbSDimitry Andric 
29*5ffd83dbSDimitry Andric using namespace llvm;
30*5ffd83dbSDimitry Andric using namespace MIPatternMatch;
31*5ffd83dbSDimitry Andric 
32*5ffd83dbSDimitry Andric struct FMinFMaxLegacyInfo {
33*5ffd83dbSDimitry Andric   Register LHS;
34*5ffd83dbSDimitry Andric   Register RHS;
35*5ffd83dbSDimitry Andric   Register True;
36*5ffd83dbSDimitry Andric   Register False;
37*5ffd83dbSDimitry Andric   CmpInst::Predicate Pred;
38*5ffd83dbSDimitry Andric };
39*5ffd83dbSDimitry Andric 
40*5ffd83dbSDimitry Andric // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
41*5ffd83dbSDimitry Andric static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI,
42*5ffd83dbSDimitry Andric                                 MachineFunction &MF, FMinFMaxLegacyInfo &Info) {
43*5ffd83dbSDimitry Andric   // FIXME: Combines should have subtarget predicates, and we shouldn't need
44*5ffd83dbSDimitry Andric   // this here.
45*5ffd83dbSDimitry Andric   if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
46*5ffd83dbSDimitry Andric     return false;
47*5ffd83dbSDimitry Andric 
48*5ffd83dbSDimitry Andric   // FIXME: Type predicate on pattern
49*5ffd83dbSDimitry Andric   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
50*5ffd83dbSDimitry Andric     return false;
51*5ffd83dbSDimitry Andric 
52*5ffd83dbSDimitry Andric   Register Cond = MI.getOperand(1).getReg();
53*5ffd83dbSDimitry Andric   if (!MRI.hasOneNonDBGUse(Cond) ||
54*5ffd83dbSDimitry Andric       !mi_match(Cond, MRI,
55*5ffd83dbSDimitry Andric                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
56*5ffd83dbSDimitry Andric     return false;
57*5ffd83dbSDimitry Andric 
58*5ffd83dbSDimitry Andric   Info.True = MI.getOperand(2).getReg();
59*5ffd83dbSDimitry Andric   Info.False = MI.getOperand(3).getReg();
60*5ffd83dbSDimitry Andric 
61*5ffd83dbSDimitry Andric   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
62*5ffd83dbSDimitry Andric       !(Info.LHS == Info.False && Info.RHS == Info.True))
63*5ffd83dbSDimitry Andric     return false;
64*5ffd83dbSDimitry Andric 
65*5ffd83dbSDimitry Andric   switch (Info.Pred) {
66*5ffd83dbSDimitry Andric   case CmpInst::FCMP_FALSE:
67*5ffd83dbSDimitry Andric   case CmpInst::FCMP_OEQ:
68*5ffd83dbSDimitry Andric   case CmpInst::FCMP_ONE:
69*5ffd83dbSDimitry Andric   case CmpInst::FCMP_ORD:
70*5ffd83dbSDimitry Andric   case CmpInst::FCMP_UNO:
71*5ffd83dbSDimitry Andric   case CmpInst::FCMP_UEQ:
72*5ffd83dbSDimitry Andric   case CmpInst::FCMP_UNE:
73*5ffd83dbSDimitry Andric   case CmpInst::FCMP_TRUE:
74*5ffd83dbSDimitry Andric     return false;
75*5ffd83dbSDimitry Andric   default:
76*5ffd83dbSDimitry Andric     return true;
77*5ffd83dbSDimitry Andric   }
78*5ffd83dbSDimitry Andric }
79*5ffd83dbSDimitry Andric 
80*5ffd83dbSDimitry Andric static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
81*5ffd83dbSDimitry Andric                                               const FMinFMaxLegacyInfo &Info) {
82*5ffd83dbSDimitry Andric 
83*5ffd83dbSDimitry Andric   auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) {
84*5ffd83dbSDimitry Andric     MachineIRBuilder MIB(MI);
85*5ffd83dbSDimitry Andric     MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
86*5ffd83dbSDimitry Andric   };
87*5ffd83dbSDimitry Andric 
88*5ffd83dbSDimitry Andric   switch (Info.Pred) {
89*5ffd83dbSDimitry Andric   case CmpInst::FCMP_ULT:
90*5ffd83dbSDimitry Andric   case CmpInst::FCMP_ULE:
91*5ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
92*5ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
93*5ffd83dbSDimitry Andric     else
94*5ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
95*5ffd83dbSDimitry Andric     break;
96*5ffd83dbSDimitry Andric   case CmpInst::FCMP_OLE:
97*5ffd83dbSDimitry Andric   case CmpInst::FCMP_OLT: {
98*5ffd83dbSDimitry Andric     // We need to permute the operands to get the correct NaN behavior. The
99*5ffd83dbSDimitry Andric     // selected operand is the second one based on the failing compare with NaN,
100*5ffd83dbSDimitry Andric     // so permute it based on the compare type the hardware uses.
101*5ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
102*5ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
103*5ffd83dbSDimitry Andric     else
104*5ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
105*5ffd83dbSDimitry Andric     break;
106*5ffd83dbSDimitry Andric   }
107*5ffd83dbSDimitry Andric   case CmpInst::FCMP_UGE:
108*5ffd83dbSDimitry Andric   case CmpInst::FCMP_UGT: {
109*5ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
110*5ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
111*5ffd83dbSDimitry Andric     else
112*5ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
113*5ffd83dbSDimitry Andric     break;
114*5ffd83dbSDimitry Andric   }
115*5ffd83dbSDimitry Andric   case CmpInst::FCMP_OGT:
116*5ffd83dbSDimitry Andric   case CmpInst::FCMP_OGE: {
117*5ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
118*5ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
119*5ffd83dbSDimitry Andric     else
120*5ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
121*5ffd83dbSDimitry Andric     break;
122*5ffd83dbSDimitry Andric   }
123*5ffd83dbSDimitry Andric   default:
124*5ffd83dbSDimitry Andric     llvm_unreachable("predicate should not have matched");
125*5ffd83dbSDimitry Andric   }
126*5ffd83dbSDimitry Andric 
127*5ffd83dbSDimitry Andric   MI.eraseFromParent();
128*5ffd83dbSDimitry Andric }
129*5ffd83dbSDimitry Andric 
130*5ffd83dbSDimitry Andric static bool matchUCharToFloat(MachineInstr &MI, MachineRegisterInfo &MRI,
131*5ffd83dbSDimitry Andric                               MachineFunction &MF, CombinerHelper &Helper) {
132*5ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
133*5ffd83dbSDimitry Andric 
134*5ffd83dbSDimitry Andric   // TODO: We could try to match extracting the higher bytes, which would be
135*5ffd83dbSDimitry Andric   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
136*5ffd83dbSDimitry Andric   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
137*5ffd83dbSDimitry Andric   // about in practice.
138*5ffd83dbSDimitry Andric   LLT Ty = MRI.getType(DstReg);
139*5ffd83dbSDimitry Andric   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
140*5ffd83dbSDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
141*5ffd83dbSDimitry Andric     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
142*5ffd83dbSDimitry Andric     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
143*5ffd83dbSDimitry Andric     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
144*5ffd83dbSDimitry Andric     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
145*5ffd83dbSDimitry Andric   }
146*5ffd83dbSDimitry Andric 
147*5ffd83dbSDimitry Andric   return false;
148*5ffd83dbSDimitry Andric }
149*5ffd83dbSDimitry Andric 
150*5ffd83dbSDimitry Andric static void applyUCharToFloat(MachineInstr &MI) {
151*5ffd83dbSDimitry Andric   MachineIRBuilder B(MI);
152*5ffd83dbSDimitry Andric 
153*5ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
154*5ffd83dbSDimitry Andric 
155*5ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
156*5ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
157*5ffd83dbSDimitry Andric   LLT Ty = B.getMRI()->getType(DstReg);
158*5ffd83dbSDimitry Andric   LLT SrcTy = B.getMRI()->getType(SrcReg);
159*5ffd83dbSDimitry Andric   if (SrcTy != S32)
160*5ffd83dbSDimitry Andric     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
161*5ffd83dbSDimitry Andric 
162*5ffd83dbSDimitry Andric   if (Ty == S32) {
163*5ffd83dbSDimitry Andric     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
164*5ffd83dbSDimitry Andric                    {SrcReg}, MI.getFlags());
165*5ffd83dbSDimitry Andric   } else {
166*5ffd83dbSDimitry Andric     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
167*5ffd83dbSDimitry Andric                              {SrcReg}, MI.getFlags());
168*5ffd83dbSDimitry Andric     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
169*5ffd83dbSDimitry Andric   }
170*5ffd83dbSDimitry Andric 
171*5ffd83dbSDimitry Andric   MI.eraseFromParent();
172*5ffd83dbSDimitry Andric }
173*5ffd83dbSDimitry Andric 
174*5ffd83dbSDimitry Andric // FIXME: Should be able to have 2 separate matchdatas rather than custom struct
175*5ffd83dbSDimitry Andric // boilerplate.
176*5ffd83dbSDimitry Andric struct CvtF32UByteMatchInfo {
177*5ffd83dbSDimitry Andric   Register CvtVal;
178*5ffd83dbSDimitry Andric   unsigned ShiftOffset;
179*5ffd83dbSDimitry Andric };
180*5ffd83dbSDimitry Andric 
181*5ffd83dbSDimitry Andric static bool matchCvtF32UByteN(MachineInstr &MI, MachineRegisterInfo &MRI,
182*5ffd83dbSDimitry Andric                               MachineFunction &MF,
183*5ffd83dbSDimitry Andric                               CvtF32UByteMatchInfo &MatchInfo) {
184*5ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
185*5ffd83dbSDimitry Andric 
186*5ffd83dbSDimitry Andric   // Look through G_ZEXT.
187*5ffd83dbSDimitry Andric   mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
188*5ffd83dbSDimitry Andric 
189*5ffd83dbSDimitry Andric   Register Src0;
190*5ffd83dbSDimitry Andric   int64_t ShiftAmt;
191*5ffd83dbSDimitry Andric   bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
192*5ffd83dbSDimitry Andric   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
193*5ffd83dbSDimitry Andric     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
194*5ffd83dbSDimitry Andric 
195*5ffd83dbSDimitry Andric     unsigned ShiftOffset = 8 * Offset;
196*5ffd83dbSDimitry Andric     if (IsShr)
197*5ffd83dbSDimitry Andric       ShiftOffset += ShiftAmt;
198*5ffd83dbSDimitry Andric     else
199*5ffd83dbSDimitry Andric       ShiftOffset -= ShiftAmt;
200*5ffd83dbSDimitry Andric 
201*5ffd83dbSDimitry Andric     MatchInfo.CvtVal = Src0;
202*5ffd83dbSDimitry Andric     MatchInfo.ShiftOffset = ShiftOffset;
203*5ffd83dbSDimitry Andric     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
204*5ffd83dbSDimitry Andric   }
205*5ffd83dbSDimitry Andric 
206*5ffd83dbSDimitry Andric   // TODO: Simplify demanded bits.
207*5ffd83dbSDimitry Andric   return false;
208*5ffd83dbSDimitry Andric }
209*5ffd83dbSDimitry Andric 
210*5ffd83dbSDimitry Andric static void applyCvtF32UByteN(MachineInstr &MI,
211*5ffd83dbSDimitry Andric                               const CvtF32UByteMatchInfo &MatchInfo) {
212*5ffd83dbSDimitry Andric   MachineIRBuilder B(MI);
213*5ffd83dbSDimitry Andric   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
214*5ffd83dbSDimitry Andric 
215*5ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
216*5ffd83dbSDimitry Andric   Register CvtSrc = MatchInfo.CvtVal;
217*5ffd83dbSDimitry Andric   LLT SrcTy = B.getMRI()->getType(MatchInfo.CvtVal);
218*5ffd83dbSDimitry Andric   if (SrcTy != S32) {
219*5ffd83dbSDimitry Andric     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
220*5ffd83dbSDimitry Andric     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
221*5ffd83dbSDimitry Andric   }
222*5ffd83dbSDimitry Andric 
223*5ffd83dbSDimitry Andric   assert(MI.getOpcode() != NewOpc);
224*5ffd83dbSDimitry Andric   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
225*5ffd83dbSDimitry Andric   MI.eraseFromParent();
226*5ffd83dbSDimitry Andric }
227*5ffd83dbSDimitry Andric 
228*5ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
229*5ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
230*5ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
231*5ffd83dbSDimitry Andric 
232*5ffd83dbSDimitry Andric namespace {
233*5ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
234*5ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
235*5ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
236*5ffd83dbSDimitry Andric 
237*5ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombinerInfo : public CombinerInfo {
238*5ffd83dbSDimitry Andric   GISelKnownBits *KB;
239*5ffd83dbSDimitry Andric   MachineDominatorTree *MDT;
240*5ffd83dbSDimitry Andric 
241*5ffd83dbSDimitry Andric public:
242*5ffd83dbSDimitry Andric   AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
243*5ffd83dbSDimitry Andric 
244*5ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
245*5ffd83dbSDimitry Andric                                   const AMDGPULegalizerInfo *LI,
246*5ffd83dbSDimitry Andric                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
247*5ffd83dbSDimitry Andric       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
248*5ffd83dbSDimitry Andric                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
249*5ffd83dbSDimitry Andric         KB(KB), MDT(MDT) {
250*5ffd83dbSDimitry Andric     if (!GeneratedRuleCfg.parseCommandLineOption())
251*5ffd83dbSDimitry Andric       report_fatal_error("Invalid rule identifier");
252*5ffd83dbSDimitry Andric   }
253*5ffd83dbSDimitry Andric 
254*5ffd83dbSDimitry Andric   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
255*5ffd83dbSDimitry Andric                MachineIRBuilder &B) const override;
256*5ffd83dbSDimitry Andric };
257*5ffd83dbSDimitry Andric 
258*5ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
259*5ffd83dbSDimitry Andric                                               MachineInstr &MI,
260*5ffd83dbSDimitry Andric                                               MachineIRBuilder &B) const {
261*5ffd83dbSDimitry Andric   CombinerHelper Helper(Observer, B, KB, MDT);
262*5ffd83dbSDimitry Andric   AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg);
263*5ffd83dbSDimitry Andric 
264*5ffd83dbSDimitry Andric   if (Generated.tryCombineAll(Observer, MI, B, Helper))
265*5ffd83dbSDimitry Andric     return true;
266*5ffd83dbSDimitry Andric 
267*5ffd83dbSDimitry Andric   switch (MI.getOpcode()) {
268*5ffd83dbSDimitry Andric   case TargetOpcode::G_SHL:
269*5ffd83dbSDimitry Andric   case TargetOpcode::G_LSHR:
270*5ffd83dbSDimitry Andric   case TargetOpcode::G_ASHR:
271*5ffd83dbSDimitry Andric     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
272*5ffd83dbSDimitry Andric     // common case, splitting this into a move and a 32-bit shift is faster and
273*5ffd83dbSDimitry Andric     // the same code size.
274*5ffd83dbSDimitry Andric     return Helper.tryCombineShiftToUnmerge(MI, 32);
275*5ffd83dbSDimitry Andric   }
276*5ffd83dbSDimitry Andric 
277*5ffd83dbSDimitry Andric   return false;
278*5ffd83dbSDimitry Andric }
279*5ffd83dbSDimitry Andric 
280*5ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
281*5ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
282*5ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
283*5ffd83dbSDimitry Andric 
284*5ffd83dbSDimitry Andric // Pass boilerplate
285*5ffd83dbSDimitry Andric // ================
286*5ffd83dbSDimitry Andric 
287*5ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
288*5ffd83dbSDimitry Andric public:
289*5ffd83dbSDimitry Andric   static char ID;
290*5ffd83dbSDimitry Andric 
291*5ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
292*5ffd83dbSDimitry Andric 
293*5ffd83dbSDimitry Andric   StringRef getPassName() const override {
294*5ffd83dbSDimitry Andric     return "AMDGPUPostLegalizerCombiner";
295*5ffd83dbSDimitry Andric   }
296*5ffd83dbSDimitry Andric 
297*5ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
298*5ffd83dbSDimitry Andric 
299*5ffd83dbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
300*5ffd83dbSDimitry Andric private:
301*5ffd83dbSDimitry Andric   bool IsOptNone;
302*5ffd83dbSDimitry Andric };
303*5ffd83dbSDimitry Andric } // end anonymous namespace
304*5ffd83dbSDimitry Andric 
305*5ffd83dbSDimitry Andric void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
306*5ffd83dbSDimitry Andric   AU.addRequired<TargetPassConfig>();
307*5ffd83dbSDimitry Andric   AU.setPreservesCFG();
308*5ffd83dbSDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
309*5ffd83dbSDimitry Andric   AU.addRequired<GISelKnownBitsAnalysis>();
310*5ffd83dbSDimitry Andric   AU.addPreserved<GISelKnownBitsAnalysis>();
311*5ffd83dbSDimitry Andric   if (!IsOptNone) {
312*5ffd83dbSDimitry Andric     AU.addRequired<MachineDominatorTree>();
313*5ffd83dbSDimitry Andric     AU.addPreserved<MachineDominatorTree>();
314*5ffd83dbSDimitry Andric   }
315*5ffd83dbSDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
316*5ffd83dbSDimitry Andric }
317*5ffd83dbSDimitry Andric 
318*5ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
319*5ffd83dbSDimitry Andric   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
320*5ffd83dbSDimitry Andric   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
321*5ffd83dbSDimitry Andric }
322*5ffd83dbSDimitry Andric 
323*5ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
324*5ffd83dbSDimitry Andric   if (MF.getProperties().hasProperty(
325*5ffd83dbSDimitry Andric           MachineFunctionProperties::Property::FailedISel))
326*5ffd83dbSDimitry Andric     return false;
327*5ffd83dbSDimitry Andric   auto *TPC = &getAnalysis<TargetPassConfig>();
328*5ffd83dbSDimitry Andric   const Function &F = MF.getFunction();
329*5ffd83dbSDimitry Andric   bool EnableOpt =
330*5ffd83dbSDimitry Andric       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
331*5ffd83dbSDimitry Andric 
332*5ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
333*5ffd83dbSDimitry Andric   const AMDGPULegalizerInfo *LI
334*5ffd83dbSDimitry Andric     = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
335*5ffd83dbSDimitry Andric 
336*5ffd83dbSDimitry Andric   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
337*5ffd83dbSDimitry Andric   MachineDominatorTree *MDT =
338*5ffd83dbSDimitry Andric       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
339*5ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
340*5ffd83dbSDimitry Andric                                          F.hasMinSize(), LI, KB, MDT);
341*5ffd83dbSDimitry Andric   Combiner C(PCInfo, TPC);
342*5ffd83dbSDimitry Andric   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
343*5ffd83dbSDimitry Andric }
344*5ffd83dbSDimitry Andric 
345*5ffd83dbSDimitry Andric char AMDGPUPostLegalizerCombiner::ID = 0;
346*5ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
347*5ffd83dbSDimitry Andric                       "Combine AMDGPU machine instrs after legalization",
348*5ffd83dbSDimitry Andric                       false, false)
349*5ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
350*5ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
351*5ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
352*5ffd83dbSDimitry Andric                     "Combine AMDGPU machine instrs after legalization", false,
353*5ffd83dbSDimitry Andric                     false)
354*5ffd83dbSDimitry Andric 
355*5ffd83dbSDimitry Andric namespace llvm {
356*5ffd83dbSDimitry Andric FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
357*5ffd83dbSDimitry Andric   return new AMDGPUPostLegalizerCombiner(IsOptNone);
358*5ffd83dbSDimitry Andric }
359*5ffd83dbSDimitry Andric } // end namespace llvm
360