xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level,
105ffd83dbSDimitry Andric // after the legalizer.
115ffd83dbSDimitry Andric //
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric 
14e8d8bef9SDimitry Andric #include "AMDGPU.h"
15*349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h"
165ffd83dbSDimitry Andric #include "AMDGPULegalizerInfo.h"
17e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
18e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
195ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h"
205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
215ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
235ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
245ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
255ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
26e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
275ffd83dbSDimitry Andric 
285ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
295ffd83dbSDimitry Andric 
305ffd83dbSDimitry Andric using namespace llvm;
315ffd83dbSDimitry Andric using namespace MIPatternMatch;
325ffd83dbSDimitry Andric 
33e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelper {
34e8d8bef9SDimitry Andric protected:
35e8d8bef9SDimitry Andric   MachineIRBuilder &B;
36e8d8bef9SDimitry Andric   MachineFunction &MF;
37e8d8bef9SDimitry Andric   MachineRegisterInfo &MRI;
38*349cc55cSDimitry Andric   AMDGPUCombinerHelper &Helper;
39e8d8bef9SDimitry Andric 
40e8d8bef9SDimitry Andric public:
41*349cc55cSDimitry Andric   AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B,
42*349cc55cSDimitry Andric                                     AMDGPUCombinerHelper &Helper)
43e8d8bef9SDimitry Andric       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
44e8d8bef9SDimitry Andric 
455ffd83dbSDimitry Andric   struct FMinFMaxLegacyInfo {
465ffd83dbSDimitry Andric     Register LHS;
475ffd83dbSDimitry Andric     Register RHS;
485ffd83dbSDimitry Andric     Register True;
495ffd83dbSDimitry Andric     Register False;
505ffd83dbSDimitry Andric     CmpInst::Predicate Pred;
515ffd83dbSDimitry Andric   };
525ffd83dbSDimitry Andric 
535ffd83dbSDimitry Andric   // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
54e8d8bef9SDimitry Andric   bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
55e8d8bef9SDimitry Andric   void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
56e8d8bef9SDimitry Andric                                          const FMinFMaxLegacyInfo &Info);
57e8d8bef9SDimitry Andric 
58e8d8bef9SDimitry Andric   bool matchUCharToFloat(MachineInstr &MI);
59e8d8bef9SDimitry Andric   void applyUCharToFloat(MachineInstr &MI);
60e8d8bef9SDimitry Andric 
61e8d8bef9SDimitry Andric   // FIXME: Should be able to have 2 separate matchdatas rather than custom
62e8d8bef9SDimitry Andric   // struct boilerplate.
63e8d8bef9SDimitry Andric   struct CvtF32UByteMatchInfo {
64e8d8bef9SDimitry Andric     Register CvtVal;
65e8d8bef9SDimitry Andric     unsigned ShiftOffset;
66e8d8bef9SDimitry Andric   };
67e8d8bef9SDimitry Andric 
68e8d8bef9SDimitry Andric   bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
69e8d8bef9SDimitry Andric   void applyCvtF32UByteN(MachineInstr &MI,
70e8d8bef9SDimitry Andric                          const CvtF32UByteMatchInfo &MatchInfo);
71fe6060f1SDimitry Andric 
72fe6060f1SDimitry Andric   bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg);
73e8d8bef9SDimitry Andric };
74e8d8bef9SDimitry Andric 
75e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
76e8d8bef9SDimitry Andric     MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
775ffd83dbSDimitry Andric   // FIXME: Combines should have subtarget predicates, and we shouldn't need
785ffd83dbSDimitry Andric   // this here.
795ffd83dbSDimitry Andric   if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
805ffd83dbSDimitry Andric     return false;
815ffd83dbSDimitry Andric 
825ffd83dbSDimitry Andric   // FIXME: Type predicate on pattern
835ffd83dbSDimitry Andric   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
845ffd83dbSDimitry Andric     return false;
855ffd83dbSDimitry Andric 
865ffd83dbSDimitry Andric   Register Cond = MI.getOperand(1).getReg();
875ffd83dbSDimitry Andric   if (!MRI.hasOneNonDBGUse(Cond) ||
885ffd83dbSDimitry Andric       !mi_match(Cond, MRI,
895ffd83dbSDimitry Andric                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
905ffd83dbSDimitry Andric     return false;
915ffd83dbSDimitry Andric 
925ffd83dbSDimitry Andric   Info.True = MI.getOperand(2).getReg();
935ffd83dbSDimitry Andric   Info.False = MI.getOperand(3).getReg();
945ffd83dbSDimitry Andric 
955ffd83dbSDimitry Andric   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
965ffd83dbSDimitry Andric       !(Info.LHS == Info.False && Info.RHS == Info.True))
975ffd83dbSDimitry Andric     return false;
985ffd83dbSDimitry Andric 
995ffd83dbSDimitry Andric   switch (Info.Pred) {
1005ffd83dbSDimitry Andric   case CmpInst::FCMP_FALSE:
1015ffd83dbSDimitry Andric   case CmpInst::FCMP_OEQ:
1025ffd83dbSDimitry Andric   case CmpInst::FCMP_ONE:
1035ffd83dbSDimitry Andric   case CmpInst::FCMP_ORD:
1045ffd83dbSDimitry Andric   case CmpInst::FCMP_UNO:
1055ffd83dbSDimitry Andric   case CmpInst::FCMP_UEQ:
1065ffd83dbSDimitry Andric   case CmpInst::FCMP_UNE:
1075ffd83dbSDimitry Andric   case CmpInst::FCMP_TRUE:
1085ffd83dbSDimitry Andric     return false;
1095ffd83dbSDimitry Andric   default:
1105ffd83dbSDimitry Andric     return true;
1115ffd83dbSDimitry Andric   }
1125ffd83dbSDimitry Andric }
1135ffd83dbSDimitry Andric 
114e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
115e8d8bef9SDimitry Andric     MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
116e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
117e8d8bef9SDimitry Andric   auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
118e8d8bef9SDimitry Andric     B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
1195ffd83dbSDimitry Andric   };
1205ffd83dbSDimitry Andric 
1215ffd83dbSDimitry Andric   switch (Info.Pred) {
1225ffd83dbSDimitry Andric   case CmpInst::FCMP_ULT:
1235ffd83dbSDimitry Andric   case CmpInst::FCMP_ULE:
1245ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1255ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
1265ffd83dbSDimitry Andric     else
1275ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
1285ffd83dbSDimitry Andric     break;
1295ffd83dbSDimitry Andric   case CmpInst::FCMP_OLE:
1305ffd83dbSDimitry Andric   case CmpInst::FCMP_OLT: {
1315ffd83dbSDimitry Andric     // We need to permute the operands to get the correct NaN behavior. The
1325ffd83dbSDimitry Andric     // selected operand is the second one based on the failing compare with NaN,
1335ffd83dbSDimitry Andric     // so permute it based on the compare type the hardware uses.
1345ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1355ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
1365ffd83dbSDimitry Andric     else
1375ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
1385ffd83dbSDimitry Andric     break;
1395ffd83dbSDimitry Andric   }
1405ffd83dbSDimitry Andric   case CmpInst::FCMP_UGE:
1415ffd83dbSDimitry Andric   case CmpInst::FCMP_UGT: {
1425ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1435ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
1445ffd83dbSDimitry Andric     else
1455ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
1465ffd83dbSDimitry Andric     break;
1475ffd83dbSDimitry Andric   }
1485ffd83dbSDimitry Andric   case CmpInst::FCMP_OGT:
1495ffd83dbSDimitry Andric   case CmpInst::FCMP_OGE: {
1505ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1515ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
1525ffd83dbSDimitry Andric     else
1535ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
1545ffd83dbSDimitry Andric     break;
1555ffd83dbSDimitry Andric   }
1565ffd83dbSDimitry Andric   default:
1575ffd83dbSDimitry Andric     llvm_unreachable("predicate should not have matched");
1585ffd83dbSDimitry Andric   }
1595ffd83dbSDimitry Andric 
1605ffd83dbSDimitry Andric   MI.eraseFromParent();
1615ffd83dbSDimitry Andric }
1625ffd83dbSDimitry Andric 
163e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
1645ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1655ffd83dbSDimitry Andric 
1665ffd83dbSDimitry Andric   // TODO: We could try to match extracting the higher bytes, which would be
1675ffd83dbSDimitry Andric   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
1685ffd83dbSDimitry Andric   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
1695ffd83dbSDimitry Andric   // about in practice.
1705ffd83dbSDimitry Andric   LLT Ty = MRI.getType(DstReg);
1715ffd83dbSDimitry Andric   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
1725ffd83dbSDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
1735ffd83dbSDimitry Andric     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
1745ffd83dbSDimitry Andric     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
1755ffd83dbSDimitry Andric     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
1765ffd83dbSDimitry Andric     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
1775ffd83dbSDimitry Andric   }
1785ffd83dbSDimitry Andric 
1795ffd83dbSDimitry Andric   return false;
1805ffd83dbSDimitry Andric }
1815ffd83dbSDimitry Andric 
182e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
183e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
1845ffd83dbSDimitry Andric 
1855ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
1865ffd83dbSDimitry Andric 
1875ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1885ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
189e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(DstReg);
190e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
1915ffd83dbSDimitry Andric   if (SrcTy != S32)
1925ffd83dbSDimitry Andric     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
1935ffd83dbSDimitry Andric 
1945ffd83dbSDimitry Andric   if (Ty == S32) {
1955ffd83dbSDimitry Andric     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
1965ffd83dbSDimitry Andric                    {SrcReg}, MI.getFlags());
1975ffd83dbSDimitry Andric   } else {
1985ffd83dbSDimitry Andric     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
1995ffd83dbSDimitry Andric                              {SrcReg}, MI.getFlags());
2005ffd83dbSDimitry Andric     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
2015ffd83dbSDimitry Andric   }
2025ffd83dbSDimitry Andric 
2035ffd83dbSDimitry Andric   MI.eraseFromParent();
2045ffd83dbSDimitry Andric }
2055ffd83dbSDimitry Andric 
206e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
207e8d8bef9SDimitry Andric     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
2085ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
2095ffd83dbSDimitry Andric 
2105ffd83dbSDimitry Andric   // Look through G_ZEXT.
2115ffd83dbSDimitry Andric   mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
2125ffd83dbSDimitry Andric 
2135ffd83dbSDimitry Andric   Register Src0;
2145ffd83dbSDimitry Andric   int64_t ShiftAmt;
2155ffd83dbSDimitry Andric   bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
2165ffd83dbSDimitry Andric   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
2175ffd83dbSDimitry Andric     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
2185ffd83dbSDimitry Andric 
2195ffd83dbSDimitry Andric     unsigned ShiftOffset = 8 * Offset;
2205ffd83dbSDimitry Andric     if (IsShr)
2215ffd83dbSDimitry Andric       ShiftOffset += ShiftAmt;
2225ffd83dbSDimitry Andric     else
2235ffd83dbSDimitry Andric       ShiftOffset -= ShiftAmt;
2245ffd83dbSDimitry Andric 
2255ffd83dbSDimitry Andric     MatchInfo.CvtVal = Src0;
2265ffd83dbSDimitry Andric     MatchInfo.ShiftOffset = ShiftOffset;
2275ffd83dbSDimitry Andric     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
2285ffd83dbSDimitry Andric   }
2295ffd83dbSDimitry Andric 
2305ffd83dbSDimitry Andric   // TODO: Simplify demanded bits.
2315ffd83dbSDimitry Andric   return false;
2325ffd83dbSDimitry Andric }
2335ffd83dbSDimitry Andric 
234e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
235e8d8bef9SDimitry Andric     MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
236e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
2375ffd83dbSDimitry Andric   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
2385ffd83dbSDimitry Andric 
2395ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
2405ffd83dbSDimitry Andric   Register CvtSrc = MatchInfo.CvtVal;
241e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
2425ffd83dbSDimitry Andric   if (SrcTy != S32) {
2435ffd83dbSDimitry Andric     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
2445ffd83dbSDimitry Andric     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
2455ffd83dbSDimitry Andric   }
2465ffd83dbSDimitry Andric 
2475ffd83dbSDimitry Andric   assert(MI.getOpcode() != NewOpc);
2485ffd83dbSDimitry Andric   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
2495ffd83dbSDimitry Andric   MI.eraseFromParent();
2505ffd83dbSDimitry Andric }
2515ffd83dbSDimitry Andric 
252fe6060f1SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize(
253fe6060f1SDimitry Andric     MachineInstr &MI, Register &Reg) {
254fe6060f1SDimitry Andric   const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
255fe6060f1SDimitry Andric       MF.getSubtarget().getTargetLowering());
256fe6060f1SDimitry Andric   Reg = MI.getOperand(1).getReg();
257fe6060f1SDimitry Andric   return TLI->isCanonicalized(Reg, MF);
258fe6060f1SDimitry Andric }
259fe6060f1SDimitry Andric 
260e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelperState {
261e8d8bef9SDimitry Andric protected:
262*349cc55cSDimitry Andric   AMDGPUCombinerHelper &Helper;
263e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
264e8d8bef9SDimitry Andric 
265e8d8bef9SDimitry Andric public:
266e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelperState(
267*349cc55cSDimitry Andric       AMDGPUCombinerHelper &Helper,
268e8d8bef9SDimitry Andric       AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
269e8d8bef9SDimitry Andric       : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
270e8d8bef9SDimitry Andric };
271e8d8bef9SDimitry Andric 
2725ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
2735ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
2745ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
2755ffd83dbSDimitry Andric 
2765ffd83dbSDimitry Andric namespace {
2775ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
2785ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
2795ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
2805ffd83dbSDimitry Andric 
281e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
2825ffd83dbSDimitry Andric   GISelKnownBits *KB;
2835ffd83dbSDimitry Andric   MachineDominatorTree *MDT;
2845ffd83dbSDimitry Andric 
2855ffd83dbSDimitry Andric public:
2865ffd83dbSDimitry Andric   AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
2875ffd83dbSDimitry Andric 
2885ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
2895ffd83dbSDimitry Andric                                   const AMDGPULegalizerInfo *LI,
2905ffd83dbSDimitry Andric                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
2915ffd83dbSDimitry Andric       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
2925ffd83dbSDimitry Andric                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
2935ffd83dbSDimitry Andric         KB(KB), MDT(MDT) {
2945ffd83dbSDimitry Andric     if (!GeneratedRuleCfg.parseCommandLineOption())
2955ffd83dbSDimitry Andric       report_fatal_error("Invalid rule identifier");
2965ffd83dbSDimitry Andric   }
2975ffd83dbSDimitry Andric 
2985ffd83dbSDimitry Andric   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
2995ffd83dbSDimitry Andric                MachineIRBuilder &B) const override;
3005ffd83dbSDimitry Andric };
3015ffd83dbSDimitry Andric 
3025ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
3035ffd83dbSDimitry Andric                                               MachineInstr &MI,
3045ffd83dbSDimitry Andric                                               MachineIRBuilder &B) const {
305*349cc55cSDimitry Andric   AMDGPUCombinerHelper Helper(Observer, B, KB, MDT, LInfo);
306e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
307e8d8bef9SDimitry Andric   AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
308e8d8bef9SDimitry Andric                                                  PostLegalizerHelper);
3095ffd83dbSDimitry Andric 
310e8d8bef9SDimitry Andric   if (Generated.tryCombineAll(Observer, MI, B))
3115ffd83dbSDimitry Andric     return true;
3125ffd83dbSDimitry Andric 
3135ffd83dbSDimitry Andric   switch (MI.getOpcode()) {
3145ffd83dbSDimitry Andric   case TargetOpcode::G_SHL:
3155ffd83dbSDimitry Andric   case TargetOpcode::G_LSHR:
3165ffd83dbSDimitry Andric   case TargetOpcode::G_ASHR:
3175ffd83dbSDimitry Andric     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
3185ffd83dbSDimitry Andric     // common case, splitting this into a move and a 32-bit shift is faster and
3195ffd83dbSDimitry Andric     // the same code size.
3205ffd83dbSDimitry Andric     return Helper.tryCombineShiftToUnmerge(MI, 32);
3215ffd83dbSDimitry Andric   }
3225ffd83dbSDimitry Andric 
3235ffd83dbSDimitry Andric   return false;
3245ffd83dbSDimitry Andric }
3255ffd83dbSDimitry Andric 
3265ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
3275ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
3285ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
3295ffd83dbSDimitry Andric 
3305ffd83dbSDimitry Andric // Pass boilerplate
3315ffd83dbSDimitry Andric // ================
3325ffd83dbSDimitry Andric 
3335ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
3345ffd83dbSDimitry Andric public:
3355ffd83dbSDimitry Andric   static char ID;
3365ffd83dbSDimitry Andric 
3375ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
3385ffd83dbSDimitry Andric 
3395ffd83dbSDimitry Andric   StringRef getPassName() const override {
3405ffd83dbSDimitry Andric     return "AMDGPUPostLegalizerCombiner";
3415ffd83dbSDimitry Andric   }
3425ffd83dbSDimitry Andric 
3435ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
3445ffd83dbSDimitry Andric 
3455ffd83dbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
3465ffd83dbSDimitry Andric private:
3475ffd83dbSDimitry Andric   bool IsOptNone;
3485ffd83dbSDimitry Andric };
3495ffd83dbSDimitry Andric } // end anonymous namespace
3505ffd83dbSDimitry Andric 
3515ffd83dbSDimitry Andric void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
3525ffd83dbSDimitry Andric   AU.addRequired<TargetPassConfig>();
3535ffd83dbSDimitry Andric   AU.setPreservesCFG();
3545ffd83dbSDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
3555ffd83dbSDimitry Andric   AU.addRequired<GISelKnownBitsAnalysis>();
3565ffd83dbSDimitry Andric   AU.addPreserved<GISelKnownBitsAnalysis>();
3575ffd83dbSDimitry Andric   if (!IsOptNone) {
3585ffd83dbSDimitry Andric     AU.addRequired<MachineDominatorTree>();
3595ffd83dbSDimitry Andric     AU.addPreserved<MachineDominatorTree>();
3605ffd83dbSDimitry Andric   }
3615ffd83dbSDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
3625ffd83dbSDimitry Andric }
3635ffd83dbSDimitry Andric 
3645ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
3655ffd83dbSDimitry Andric   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
3665ffd83dbSDimitry Andric   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
3675ffd83dbSDimitry Andric }
3685ffd83dbSDimitry Andric 
3695ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
3705ffd83dbSDimitry Andric   if (MF.getProperties().hasProperty(
3715ffd83dbSDimitry Andric           MachineFunctionProperties::Property::FailedISel))
3725ffd83dbSDimitry Andric     return false;
3735ffd83dbSDimitry Andric   auto *TPC = &getAnalysis<TargetPassConfig>();
3745ffd83dbSDimitry Andric   const Function &F = MF.getFunction();
3755ffd83dbSDimitry Andric   bool EnableOpt =
3765ffd83dbSDimitry Andric       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
3775ffd83dbSDimitry Andric 
3785ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3795ffd83dbSDimitry Andric   const AMDGPULegalizerInfo *LI
3805ffd83dbSDimitry Andric     = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
3815ffd83dbSDimitry Andric 
3825ffd83dbSDimitry Andric   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
3835ffd83dbSDimitry Andric   MachineDominatorTree *MDT =
3845ffd83dbSDimitry Andric       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
3855ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
3865ffd83dbSDimitry Andric                                          F.hasMinSize(), LI, KB, MDT);
3875ffd83dbSDimitry Andric   Combiner C(PCInfo, TPC);
3885ffd83dbSDimitry Andric   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
3895ffd83dbSDimitry Andric }
3905ffd83dbSDimitry Andric 
3915ffd83dbSDimitry Andric char AMDGPUPostLegalizerCombiner::ID = 0;
3925ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
3935ffd83dbSDimitry Andric                       "Combine AMDGPU machine instrs after legalization",
3945ffd83dbSDimitry Andric                       false, false)
3955ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
3965ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
3975ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
3985ffd83dbSDimitry Andric                     "Combine AMDGPU machine instrs after legalization", false,
3995ffd83dbSDimitry Andric                     false)
4005ffd83dbSDimitry Andric 
4015ffd83dbSDimitry Andric namespace llvm {
4025ffd83dbSDimitry Andric FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
4035ffd83dbSDimitry Andric   return new AMDGPUPostLegalizerCombiner(IsOptNone);
4045ffd83dbSDimitry Andric }
4055ffd83dbSDimitry Andric } // end namespace llvm
406