xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level,
105ffd83dbSDimitry Andric // after the legalizer.
115ffd83dbSDimitry Andric //
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric 
14*e8d8bef9SDimitry Andric #include "AMDGPU.h"
155ffd83dbSDimitry Andric #include "AMDGPULegalizerInfo.h"
16*e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
17*e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
185ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h"
195ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
215ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
235ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
245ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
25*e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
265ffd83dbSDimitry Andric 
275ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
285ffd83dbSDimitry Andric 
295ffd83dbSDimitry Andric using namespace llvm;
305ffd83dbSDimitry Andric using namespace MIPatternMatch;
315ffd83dbSDimitry Andric 
32*e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelper {
33*e8d8bef9SDimitry Andric protected:
34*e8d8bef9SDimitry Andric   MachineIRBuilder &B;
35*e8d8bef9SDimitry Andric   MachineFunction &MF;
36*e8d8bef9SDimitry Andric   MachineRegisterInfo &MRI;
37*e8d8bef9SDimitry Andric   CombinerHelper &Helper;
38*e8d8bef9SDimitry Andric 
39*e8d8bef9SDimitry Andric public:
40*e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
41*e8d8bef9SDimitry Andric       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
42*e8d8bef9SDimitry Andric 
435ffd83dbSDimitry Andric   struct FMinFMaxLegacyInfo {
445ffd83dbSDimitry Andric     Register LHS;
455ffd83dbSDimitry Andric     Register RHS;
465ffd83dbSDimitry Andric     Register True;
475ffd83dbSDimitry Andric     Register False;
485ffd83dbSDimitry Andric     CmpInst::Predicate Pred;
495ffd83dbSDimitry Andric   };
505ffd83dbSDimitry Andric 
515ffd83dbSDimitry Andric   // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
52*e8d8bef9SDimitry Andric   bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
53*e8d8bef9SDimitry Andric   void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
54*e8d8bef9SDimitry Andric                                          const FMinFMaxLegacyInfo &Info);
55*e8d8bef9SDimitry Andric 
56*e8d8bef9SDimitry Andric   bool matchUCharToFloat(MachineInstr &MI);
57*e8d8bef9SDimitry Andric   void applyUCharToFloat(MachineInstr &MI);
58*e8d8bef9SDimitry Andric 
59*e8d8bef9SDimitry Andric   // FIXME: Should be able to have 2 separate matchdatas rather than custom
60*e8d8bef9SDimitry Andric   // struct boilerplate.
61*e8d8bef9SDimitry Andric   struct CvtF32UByteMatchInfo {
62*e8d8bef9SDimitry Andric     Register CvtVal;
63*e8d8bef9SDimitry Andric     unsigned ShiftOffset;
64*e8d8bef9SDimitry Andric   };
65*e8d8bef9SDimitry Andric 
66*e8d8bef9SDimitry Andric   bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
67*e8d8bef9SDimitry Andric   void applyCvtF32UByteN(MachineInstr &MI,
68*e8d8bef9SDimitry Andric                          const CvtF32UByteMatchInfo &MatchInfo);
69*e8d8bef9SDimitry Andric };
70*e8d8bef9SDimitry Andric 
71*e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
72*e8d8bef9SDimitry Andric     MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
735ffd83dbSDimitry Andric   // FIXME: Combines should have subtarget predicates, and we shouldn't need
745ffd83dbSDimitry Andric   // this here.
755ffd83dbSDimitry Andric   if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
765ffd83dbSDimitry Andric     return false;
775ffd83dbSDimitry Andric 
785ffd83dbSDimitry Andric   // FIXME: Type predicate on pattern
795ffd83dbSDimitry Andric   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
805ffd83dbSDimitry Andric     return false;
815ffd83dbSDimitry Andric 
825ffd83dbSDimitry Andric   Register Cond = MI.getOperand(1).getReg();
835ffd83dbSDimitry Andric   if (!MRI.hasOneNonDBGUse(Cond) ||
845ffd83dbSDimitry Andric       !mi_match(Cond, MRI,
855ffd83dbSDimitry Andric                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
865ffd83dbSDimitry Andric     return false;
875ffd83dbSDimitry Andric 
885ffd83dbSDimitry Andric   Info.True = MI.getOperand(2).getReg();
895ffd83dbSDimitry Andric   Info.False = MI.getOperand(3).getReg();
905ffd83dbSDimitry Andric 
915ffd83dbSDimitry Andric   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
925ffd83dbSDimitry Andric       !(Info.LHS == Info.False && Info.RHS == Info.True))
935ffd83dbSDimitry Andric     return false;
945ffd83dbSDimitry Andric 
955ffd83dbSDimitry Andric   switch (Info.Pred) {
965ffd83dbSDimitry Andric   case CmpInst::FCMP_FALSE:
975ffd83dbSDimitry Andric   case CmpInst::FCMP_OEQ:
985ffd83dbSDimitry Andric   case CmpInst::FCMP_ONE:
995ffd83dbSDimitry Andric   case CmpInst::FCMP_ORD:
1005ffd83dbSDimitry Andric   case CmpInst::FCMP_UNO:
1015ffd83dbSDimitry Andric   case CmpInst::FCMP_UEQ:
1025ffd83dbSDimitry Andric   case CmpInst::FCMP_UNE:
1035ffd83dbSDimitry Andric   case CmpInst::FCMP_TRUE:
1045ffd83dbSDimitry Andric     return false;
1055ffd83dbSDimitry Andric   default:
1065ffd83dbSDimitry Andric     return true;
1075ffd83dbSDimitry Andric   }
1085ffd83dbSDimitry Andric }
1095ffd83dbSDimitry Andric 
110*e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
111*e8d8bef9SDimitry Andric     MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
112*e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
113*e8d8bef9SDimitry Andric   auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
114*e8d8bef9SDimitry Andric     B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
1155ffd83dbSDimitry Andric   };
1165ffd83dbSDimitry Andric 
1175ffd83dbSDimitry Andric   switch (Info.Pred) {
1185ffd83dbSDimitry Andric   case CmpInst::FCMP_ULT:
1195ffd83dbSDimitry Andric   case CmpInst::FCMP_ULE:
1205ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1215ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
1225ffd83dbSDimitry Andric     else
1235ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
1245ffd83dbSDimitry Andric     break;
1255ffd83dbSDimitry Andric   case CmpInst::FCMP_OLE:
1265ffd83dbSDimitry Andric   case CmpInst::FCMP_OLT: {
1275ffd83dbSDimitry Andric     // We need to permute the operands to get the correct NaN behavior. The
1285ffd83dbSDimitry Andric     // selected operand is the second one based on the failing compare with NaN,
1295ffd83dbSDimitry Andric     // so permute it based on the compare type the hardware uses.
1305ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1315ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
1325ffd83dbSDimitry Andric     else
1335ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
1345ffd83dbSDimitry Andric     break;
1355ffd83dbSDimitry Andric   }
1365ffd83dbSDimitry Andric   case CmpInst::FCMP_UGE:
1375ffd83dbSDimitry Andric   case CmpInst::FCMP_UGT: {
1385ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1395ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
1405ffd83dbSDimitry Andric     else
1415ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
1425ffd83dbSDimitry Andric     break;
1435ffd83dbSDimitry Andric   }
1445ffd83dbSDimitry Andric   case CmpInst::FCMP_OGT:
1455ffd83dbSDimitry Andric   case CmpInst::FCMP_OGE: {
1465ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1475ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
1485ffd83dbSDimitry Andric     else
1495ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
1505ffd83dbSDimitry Andric     break;
1515ffd83dbSDimitry Andric   }
1525ffd83dbSDimitry Andric   default:
1535ffd83dbSDimitry Andric     llvm_unreachable("predicate should not have matched");
1545ffd83dbSDimitry Andric   }
1555ffd83dbSDimitry Andric 
1565ffd83dbSDimitry Andric   MI.eraseFromParent();
1575ffd83dbSDimitry Andric }
1585ffd83dbSDimitry Andric 
159*e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
1605ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1615ffd83dbSDimitry Andric 
1625ffd83dbSDimitry Andric   // TODO: We could try to match extracting the higher bytes, which would be
1635ffd83dbSDimitry Andric   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
1645ffd83dbSDimitry Andric   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
1655ffd83dbSDimitry Andric   // about in practice.
1665ffd83dbSDimitry Andric   LLT Ty = MRI.getType(DstReg);
1675ffd83dbSDimitry Andric   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
1685ffd83dbSDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
1695ffd83dbSDimitry Andric     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
1705ffd83dbSDimitry Andric     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
1715ffd83dbSDimitry Andric     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
1725ffd83dbSDimitry Andric     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
1735ffd83dbSDimitry Andric   }
1745ffd83dbSDimitry Andric 
1755ffd83dbSDimitry Andric   return false;
1765ffd83dbSDimitry Andric }
1775ffd83dbSDimitry Andric 
178*e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
179*e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
1805ffd83dbSDimitry Andric 
1815ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
1825ffd83dbSDimitry Andric 
1835ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1845ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
185*e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(DstReg);
186*e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
1875ffd83dbSDimitry Andric   if (SrcTy != S32)
1885ffd83dbSDimitry Andric     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
1895ffd83dbSDimitry Andric 
1905ffd83dbSDimitry Andric   if (Ty == S32) {
1915ffd83dbSDimitry Andric     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
1925ffd83dbSDimitry Andric                    {SrcReg}, MI.getFlags());
1935ffd83dbSDimitry Andric   } else {
1945ffd83dbSDimitry Andric     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
1955ffd83dbSDimitry Andric                              {SrcReg}, MI.getFlags());
1965ffd83dbSDimitry Andric     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
1975ffd83dbSDimitry Andric   }
1985ffd83dbSDimitry Andric 
1995ffd83dbSDimitry Andric   MI.eraseFromParent();
2005ffd83dbSDimitry Andric }
2015ffd83dbSDimitry Andric 
202*e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
203*e8d8bef9SDimitry Andric     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
2045ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
2055ffd83dbSDimitry Andric 
2065ffd83dbSDimitry Andric   // Look through G_ZEXT.
2075ffd83dbSDimitry Andric   mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
2085ffd83dbSDimitry Andric 
2095ffd83dbSDimitry Andric   Register Src0;
2105ffd83dbSDimitry Andric   int64_t ShiftAmt;
2115ffd83dbSDimitry Andric   bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
2125ffd83dbSDimitry Andric   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
2135ffd83dbSDimitry Andric     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
2145ffd83dbSDimitry Andric 
2155ffd83dbSDimitry Andric     unsigned ShiftOffset = 8 * Offset;
2165ffd83dbSDimitry Andric     if (IsShr)
2175ffd83dbSDimitry Andric       ShiftOffset += ShiftAmt;
2185ffd83dbSDimitry Andric     else
2195ffd83dbSDimitry Andric       ShiftOffset -= ShiftAmt;
2205ffd83dbSDimitry Andric 
2215ffd83dbSDimitry Andric     MatchInfo.CvtVal = Src0;
2225ffd83dbSDimitry Andric     MatchInfo.ShiftOffset = ShiftOffset;
2235ffd83dbSDimitry Andric     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
2245ffd83dbSDimitry Andric   }
2255ffd83dbSDimitry Andric 
2265ffd83dbSDimitry Andric   // TODO: Simplify demanded bits.
2275ffd83dbSDimitry Andric   return false;
2285ffd83dbSDimitry Andric }
2295ffd83dbSDimitry Andric 
230*e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
231*e8d8bef9SDimitry Andric     MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
232*e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
2335ffd83dbSDimitry Andric   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
2345ffd83dbSDimitry Andric 
2355ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
2365ffd83dbSDimitry Andric   Register CvtSrc = MatchInfo.CvtVal;
237*e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
2385ffd83dbSDimitry Andric   if (SrcTy != S32) {
2395ffd83dbSDimitry Andric     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
2405ffd83dbSDimitry Andric     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
2415ffd83dbSDimitry Andric   }
2425ffd83dbSDimitry Andric 
2435ffd83dbSDimitry Andric   assert(MI.getOpcode() != NewOpc);
2445ffd83dbSDimitry Andric   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
2455ffd83dbSDimitry Andric   MI.eraseFromParent();
2465ffd83dbSDimitry Andric }
2475ffd83dbSDimitry Andric 
248*e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelperState {
249*e8d8bef9SDimitry Andric protected:
250*e8d8bef9SDimitry Andric   CombinerHelper &Helper;
251*e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
252*e8d8bef9SDimitry Andric 
253*e8d8bef9SDimitry Andric public:
254*e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelperState(
255*e8d8bef9SDimitry Andric       CombinerHelper &Helper,
256*e8d8bef9SDimitry Andric       AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
257*e8d8bef9SDimitry Andric       : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
258*e8d8bef9SDimitry Andric };
259*e8d8bef9SDimitry Andric 
2605ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
2615ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
2625ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
2635ffd83dbSDimitry Andric 
2645ffd83dbSDimitry Andric namespace {
2655ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
2665ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
2675ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
2685ffd83dbSDimitry Andric 
269*e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
2705ffd83dbSDimitry Andric   GISelKnownBits *KB;
2715ffd83dbSDimitry Andric   MachineDominatorTree *MDT;
2725ffd83dbSDimitry Andric 
2735ffd83dbSDimitry Andric public:
2745ffd83dbSDimitry Andric   AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
2755ffd83dbSDimitry Andric 
2765ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
2775ffd83dbSDimitry Andric                                   const AMDGPULegalizerInfo *LI,
2785ffd83dbSDimitry Andric                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
2795ffd83dbSDimitry Andric       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
2805ffd83dbSDimitry Andric                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
2815ffd83dbSDimitry Andric         KB(KB), MDT(MDT) {
2825ffd83dbSDimitry Andric     if (!GeneratedRuleCfg.parseCommandLineOption())
2835ffd83dbSDimitry Andric       report_fatal_error("Invalid rule identifier");
2845ffd83dbSDimitry Andric   }
2855ffd83dbSDimitry Andric 
2865ffd83dbSDimitry Andric   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
2875ffd83dbSDimitry Andric                MachineIRBuilder &B) const override;
2885ffd83dbSDimitry Andric };
2895ffd83dbSDimitry Andric 
2905ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
2915ffd83dbSDimitry Andric                                               MachineInstr &MI,
2925ffd83dbSDimitry Andric                                               MachineIRBuilder &B) const {
293*e8d8bef9SDimitry Andric   CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
294*e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
295*e8d8bef9SDimitry Andric   AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
296*e8d8bef9SDimitry Andric                                                  PostLegalizerHelper);
2975ffd83dbSDimitry Andric 
298*e8d8bef9SDimitry Andric   if (Generated.tryCombineAll(Observer, MI, B))
2995ffd83dbSDimitry Andric     return true;
3005ffd83dbSDimitry Andric 
3015ffd83dbSDimitry Andric   switch (MI.getOpcode()) {
3025ffd83dbSDimitry Andric   case TargetOpcode::G_SHL:
3035ffd83dbSDimitry Andric   case TargetOpcode::G_LSHR:
3045ffd83dbSDimitry Andric   case TargetOpcode::G_ASHR:
3055ffd83dbSDimitry Andric     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
3065ffd83dbSDimitry Andric     // common case, splitting this into a move and a 32-bit shift is faster and
3075ffd83dbSDimitry Andric     // the same code size.
3085ffd83dbSDimitry Andric     return Helper.tryCombineShiftToUnmerge(MI, 32);
3095ffd83dbSDimitry Andric   }
3105ffd83dbSDimitry Andric 
3115ffd83dbSDimitry Andric   return false;
3125ffd83dbSDimitry Andric }
3135ffd83dbSDimitry Andric 
3145ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
3155ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
3165ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
3175ffd83dbSDimitry Andric 
3185ffd83dbSDimitry Andric // Pass boilerplate
3195ffd83dbSDimitry Andric // ================
3205ffd83dbSDimitry Andric 
3215ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
3225ffd83dbSDimitry Andric public:
3235ffd83dbSDimitry Andric   static char ID;
3245ffd83dbSDimitry Andric 
3255ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
3265ffd83dbSDimitry Andric 
3275ffd83dbSDimitry Andric   StringRef getPassName() const override {
3285ffd83dbSDimitry Andric     return "AMDGPUPostLegalizerCombiner";
3295ffd83dbSDimitry Andric   }
3305ffd83dbSDimitry Andric 
3315ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
3325ffd83dbSDimitry Andric 
3335ffd83dbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
3345ffd83dbSDimitry Andric private:
3355ffd83dbSDimitry Andric   bool IsOptNone;
3365ffd83dbSDimitry Andric };
3375ffd83dbSDimitry Andric } // end anonymous namespace
3385ffd83dbSDimitry Andric 
3395ffd83dbSDimitry Andric void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
3405ffd83dbSDimitry Andric   AU.addRequired<TargetPassConfig>();
3415ffd83dbSDimitry Andric   AU.setPreservesCFG();
3425ffd83dbSDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
3435ffd83dbSDimitry Andric   AU.addRequired<GISelKnownBitsAnalysis>();
3445ffd83dbSDimitry Andric   AU.addPreserved<GISelKnownBitsAnalysis>();
3455ffd83dbSDimitry Andric   if (!IsOptNone) {
3465ffd83dbSDimitry Andric     AU.addRequired<MachineDominatorTree>();
3475ffd83dbSDimitry Andric     AU.addPreserved<MachineDominatorTree>();
3485ffd83dbSDimitry Andric   }
3495ffd83dbSDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
3505ffd83dbSDimitry Andric }
3515ffd83dbSDimitry Andric 
3525ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
3535ffd83dbSDimitry Andric   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
3545ffd83dbSDimitry Andric   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
3555ffd83dbSDimitry Andric }
3565ffd83dbSDimitry Andric 
3575ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
3585ffd83dbSDimitry Andric   if (MF.getProperties().hasProperty(
3595ffd83dbSDimitry Andric           MachineFunctionProperties::Property::FailedISel))
3605ffd83dbSDimitry Andric     return false;
3615ffd83dbSDimitry Andric   auto *TPC = &getAnalysis<TargetPassConfig>();
3625ffd83dbSDimitry Andric   const Function &F = MF.getFunction();
3635ffd83dbSDimitry Andric   bool EnableOpt =
3645ffd83dbSDimitry Andric       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
3655ffd83dbSDimitry Andric 
3665ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3675ffd83dbSDimitry Andric   const AMDGPULegalizerInfo *LI
3685ffd83dbSDimitry Andric     = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
3695ffd83dbSDimitry Andric 
3705ffd83dbSDimitry Andric   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
3715ffd83dbSDimitry Andric   MachineDominatorTree *MDT =
3725ffd83dbSDimitry Andric       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
3735ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
3745ffd83dbSDimitry Andric                                          F.hasMinSize(), LI, KB, MDT);
3755ffd83dbSDimitry Andric   Combiner C(PCInfo, TPC);
3765ffd83dbSDimitry Andric   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
3775ffd83dbSDimitry Andric }
3785ffd83dbSDimitry Andric 
3795ffd83dbSDimitry Andric char AMDGPUPostLegalizerCombiner::ID = 0;
3805ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
3815ffd83dbSDimitry Andric                       "Combine AMDGPU machine instrs after legalization",
3825ffd83dbSDimitry Andric                       false, false)
3835ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
3845ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
3855ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
3865ffd83dbSDimitry Andric                     "Combine AMDGPU machine instrs after legalization", false,
3875ffd83dbSDimitry Andric                     false)
3885ffd83dbSDimitry Andric 
3895ffd83dbSDimitry Andric namespace llvm {
3905ffd83dbSDimitry Andric FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
3915ffd83dbSDimitry Andric   return new AMDGPUPostLegalizerCombiner(IsOptNone);
3925ffd83dbSDimitry Andric }
3935ffd83dbSDimitry Andric } // end namespace llvm
394