xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp (revision fe6060f10f634930ff71b7c50291ddc610da2475)
15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level,
105ffd83dbSDimitry Andric // after the legalizer.
115ffd83dbSDimitry Andric //
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric 
14e8d8bef9SDimitry Andric #include "AMDGPU.h"
155ffd83dbSDimitry Andric #include "AMDGPULegalizerInfo.h"
16e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
17e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
185ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h"
195ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
215ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
235ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
245ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
25e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
265ffd83dbSDimitry Andric 
275ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
285ffd83dbSDimitry Andric 
295ffd83dbSDimitry Andric using namespace llvm;
305ffd83dbSDimitry Andric using namespace MIPatternMatch;
315ffd83dbSDimitry Andric 
32e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelper {
33e8d8bef9SDimitry Andric protected:
34e8d8bef9SDimitry Andric   MachineIRBuilder &B;
35e8d8bef9SDimitry Andric   MachineFunction &MF;
36e8d8bef9SDimitry Andric   MachineRegisterInfo &MRI;
37e8d8bef9SDimitry Andric   CombinerHelper &Helper;
38e8d8bef9SDimitry Andric 
39e8d8bef9SDimitry Andric public:
40e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
41e8d8bef9SDimitry Andric       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
42e8d8bef9SDimitry Andric 
435ffd83dbSDimitry Andric   struct FMinFMaxLegacyInfo {
445ffd83dbSDimitry Andric     Register LHS;
455ffd83dbSDimitry Andric     Register RHS;
465ffd83dbSDimitry Andric     Register True;
475ffd83dbSDimitry Andric     Register False;
485ffd83dbSDimitry Andric     CmpInst::Predicate Pred;
495ffd83dbSDimitry Andric   };
505ffd83dbSDimitry Andric 
515ffd83dbSDimitry Andric   // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
52e8d8bef9SDimitry Andric   bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
53e8d8bef9SDimitry Andric   void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
54e8d8bef9SDimitry Andric                                          const FMinFMaxLegacyInfo &Info);
55e8d8bef9SDimitry Andric 
56e8d8bef9SDimitry Andric   bool matchUCharToFloat(MachineInstr &MI);
57e8d8bef9SDimitry Andric   void applyUCharToFloat(MachineInstr &MI);
58e8d8bef9SDimitry Andric 
59e8d8bef9SDimitry Andric   // FIXME: Should be able to have 2 separate matchdatas rather than custom
60e8d8bef9SDimitry Andric   // struct boilerplate.
61e8d8bef9SDimitry Andric   struct CvtF32UByteMatchInfo {
62e8d8bef9SDimitry Andric     Register CvtVal;
63e8d8bef9SDimitry Andric     unsigned ShiftOffset;
64e8d8bef9SDimitry Andric   };
65e8d8bef9SDimitry Andric 
66e8d8bef9SDimitry Andric   bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
67e8d8bef9SDimitry Andric   void applyCvtF32UByteN(MachineInstr &MI,
68e8d8bef9SDimitry Andric                          const CvtF32UByteMatchInfo &MatchInfo);
69*fe6060f1SDimitry Andric 
70*fe6060f1SDimitry Andric   bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg);
71e8d8bef9SDimitry Andric };
72e8d8bef9SDimitry Andric 
73e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
74e8d8bef9SDimitry Andric     MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
755ffd83dbSDimitry Andric   // FIXME: Combines should have subtarget predicates, and we shouldn't need
765ffd83dbSDimitry Andric   // this here.
775ffd83dbSDimitry Andric   if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
785ffd83dbSDimitry Andric     return false;
795ffd83dbSDimitry Andric 
805ffd83dbSDimitry Andric   // FIXME: Type predicate on pattern
815ffd83dbSDimitry Andric   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
825ffd83dbSDimitry Andric     return false;
835ffd83dbSDimitry Andric 
845ffd83dbSDimitry Andric   Register Cond = MI.getOperand(1).getReg();
855ffd83dbSDimitry Andric   if (!MRI.hasOneNonDBGUse(Cond) ||
865ffd83dbSDimitry Andric       !mi_match(Cond, MRI,
875ffd83dbSDimitry Andric                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
885ffd83dbSDimitry Andric     return false;
895ffd83dbSDimitry Andric 
905ffd83dbSDimitry Andric   Info.True = MI.getOperand(2).getReg();
915ffd83dbSDimitry Andric   Info.False = MI.getOperand(3).getReg();
925ffd83dbSDimitry Andric 
935ffd83dbSDimitry Andric   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
945ffd83dbSDimitry Andric       !(Info.LHS == Info.False && Info.RHS == Info.True))
955ffd83dbSDimitry Andric     return false;
965ffd83dbSDimitry Andric 
975ffd83dbSDimitry Andric   switch (Info.Pred) {
985ffd83dbSDimitry Andric   case CmpInst::FCMP_FALSE:
995ffd83dbSDimitry Andric   case CmpInst::FCMP_OEQ:
1005ffd83dbSDimitry Andric   case CmpInst::FCMP_ONE:
1015ffd83dbSDimitry Andric   case CmpInst::FCMP_ORD:
1025ffd83dbSDimitry Andric   case CmpInst::FCMP_UNO:
1035ffd83dbSDimitry Andric   case CmpInst::FCMP_UEQ:
1045ffd83dbSDimitry Andric   case CmpInst::FCMP_UNE:
1055ffd83dbSDimitry Andric   case CmpInst::FCMP_TRUE:
1065ffd83dbSDimitry Andric     return false;
1075ffd83dbSDimitry Andric   default:
1085ffd83dbSDimitry Andric     return true;
1095ffd83dbSDimitry Andric   }
1105ffd83dbSDimitry Andric }
1115ffd83dbSDimitry Andric 
112e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
113e8d8bef9SDimitry Andric     MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
114e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
115e8d8bef9SDimitry Andric   auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
116e8d8bef9SDimitry Andric     B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
1175ffd83dbSDimitry Andric   };
1185ffd83dbSDimitry Andric 
1195ffd83dbSDimitry Andric   switch (Info.Pred) {
1205ffd83dbSDimitry Andric   case CmpInst::FCMP_ULT:
1215ffd83dbSDimitry Andric   case CmpInst::FCMP_ULE:
1225ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1235ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
1245ffd83dbSDimitry Andric     else
1255ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
1265ffd83dbSDimitry Andric     break;
1275ffd83dbSDimitry Andric   case CmpInst::FCMP_OLE:
1285ffd83dbSDimitry Andric   case CmpInst::FCMP_OLT: {
1295ffd83dbSDimitry Andric     // We need to permute the operands to get the correct NaN behavior. The
1305ffd83dbSDimitry Andric     // selected operand is the second one based on the failing compare with NaN,
1315ffd83dbSDimitry Andric     // so permute it based on the compare type the hardware uses.
1325ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1335ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
1345ffd83dbSDimitry Andric     else
1355ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
1365ffd83dbSDimitry Andric     break;
1375ffd83dbSDimitry Andric   }
1385ffd83dbSDimitry Andric   case CmpInst::FCMP_UGE:
1395ffd83dbSDimitry Andric   case CmpInst::FCMP_UGT: {
1405ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1415ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
1425ffd83dbSDimitry Andric     else
1435ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
1445ffd83dbSDimitry Andric     break;
1455ffd83dbSDimitry Andric   }
1465ffd83dbSDimitry Andric   case CmpInst::FCMP_OGT:
1475ffd83dbSDimitry Andric   case CmpInst::FCMP_OGE: {
1485ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1495ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
1505ffd83dbSDimitry Andric     else
1515ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
1525ffd83dbSDimitry Andric     break;
1535ffd83dbSDimitry Andric   }
1545ffd83dbSDimitry Andric   default:
1555ffd83dbSDimitry Andric     llvm_unreachable("predicate should not have matched");
1565ffd83dbSDimitry Andric   }
1575ffd83dbSDimitry Andric 
1585ffd83dbSDimitry Andric   MI.eraseFromParent();
1595ffd83dbSDimitry Andric }
1605ffd83dbSDimitry Andric 
161e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
1625ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1635ffd83dbSDimitry Andric 
1645ffd83dbSDimitry Andric   // TODO: We could try to match extracting the higher bytes, which would be
1655ffd83dbSDimitry Andric   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
1665ffd83dbSDimitry Andric   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
1675ffd83dbSDimitry Andric   // about in practice.
1685ffd83dbSDimitry Andric   LLT Ty = MRI.getType(DstReg);
1695ffd83dbSDimitry Andric   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
1705ffd83dbSDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
1715ffd83dbSDimitry Andric     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
1725ffd83dbSDimitry Andric     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
1735ffd83dbSDimitry Andric     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
1745ffd83dbSDimitry Andric     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
1755ffd83dbSDimitry Andric   }
1765ffd83dbSDimitry Andric 
1775ffd83dbSDimitry Andric   return false;
1785ffd83dbSDimitry Andric }
1795ffd83dbSDimitry Andric 
180e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
181e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
1825ffd83dbSDimitry Andric 
1835ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
1845ffd83dbSDimitry Andric 
1855ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1865ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
187e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(DstReg);
188e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
1895ffd83dbSDimitry Andric   if (SrcTy != S32)
1905ffd83dbSDimitry Andric     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
1915ffd83dbSDimitry Andric 
1925ffd83dbSDimitry Andric   if (Ty == S32) {
1935ffd83dbSDimitry Andric     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
1945ffd83dbSDimitry Andric                    {SrcReg}, MI.getFlags());
1955ffd83dbSDimitry Andric   } else {
1965ffd83dbSDimitry Andric     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
1975ffd83dbSDimitry Andric                              {SrcReg}, MI.getFlags());
1985ffd83dbSDimitry Andric     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
1995ffd83dbSDimitry Andric   }
2005ffd83dbSDimitry Andric 
2015ffd83dbSDimitry Andric   MI.eraseFromParent();
2025ffd83dbSDimitry Andric }
2035ffd83dbSDimitry Andric 
204e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
205e8d8bef9SDimitry Andric     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
2065ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
2075ffd83dbSDimitry Andric 
2085ffd83dbSDimitry Andric   // Look through G_ZEXT.
2095ffd83dbSDimitry Andric   mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
2105ffd83dbSDimitry Andric 
2115ffd83dbSDimitry Andric   Register Src0;
2125ffd83dbSDimitry Andric   int64_t ShiftAmt;
2135ffd83dbSDimitry Andric   bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
2145ffd83dbSDimitry Andric   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
2155ffd83dbSDimitry Andric     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
2165ffd83dbSDimitry Andric 
2175ffd83dbSDimitry Andric     unsigned ShiftOffset = 8 * Offset;
2185ffd83dbSDimitry Andric     if (IsShr)
2195ffd83dbSDimitry Andric       ShiftOffset += ShiftAmt;
2205ffd83dbSDimitry Andric     else
2215ffd83dbSDimitry Andric       ShiftOffset -= ShiftAmt;
2225ffd83dbSDimitry Andric 
2235ffd83dbSDimitry Andric     MatchInfo.CvtVal = Src0;
2245ffd83dbSDimitry Andric     MatchInfo.ShiftOffset = ShiftOffset;
2255ffd83dbSDimitry Andric     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
2265ffd83dbSDimitry Andric   }
2275ffd83dbSDimitry Andric 
2285ffd83dbSDimitry Andric   // TODO: Simplify demanded bits.
2295ffd83dbSDimitry Andric   return false;
2305ffd83dbSDimitry Andric }
2315ffd83dbSDimitry Andric 
232e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
233e8d8bef9SDimitry Andric     MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
234e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
2355ffd83dbSDimitry Andric   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
2365ffd83dbSDimitry Andric 
2375ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
2385ffd83dbSDimitry Andric   Register CvtSrc = MatchInfo.CvtVal;
239e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
2405ffd83dbSDimitry Andric   if (SrcTy != S32) {
2415ffd83dbSDimitry Andric     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
2425ffd83dbSDimitry Andric     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
2435ffd83dbSDimitry Andric   }
2445ffd83dbSDimitry Andric 
2455ffd83dbSDimitry Andric   assert(MI.getOpcode() != NewOpc);
2465ffd83dbSDimitry Andric   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
2475ffd83dbSDimitry Andric   MI.eraseFromParent();
2485ffd83dbSDimitry Andric }
2495ffd83dbSDimitry Andric 
250*fe6060f1SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize(
251*fe6060f1SDimitry Andric     MachineInstr &MI, Register &Reg) {
252*fe6060f1SDimitry Andric   const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
253*fe6060f1SDimitry Andric       MF.getSubtarget().getTargetLowering());
254*fe6060f1SDimitry Andric   Reg = MI.getOperand(1).getReg();
255*fe6060f1SDimitry Andric   return TLI->isCanonicalized(Reg, MF);
256*fe6060f1SDimitry Andric }
257*fe6060f1SDimitry Andric 
258e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelperState {
259e8d8bef9SDimitry Andric protected:
260e8d8bef9SDimitry Andric   CombinerHelper &Helper;
261e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
262e8d8bef9SDimitry Andric 
263e8d8bef9SDimitry Andric public:
264e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelperState(
265e8d8bef9SDimitry Andric       CombinerHelper &Helper,
266e8d8bef9SDimitry Andric       AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
267e8d8bef9SDimitry Andric       : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
268e8d8bef9SDimitry Andric };
269e8d8bef9SDimitry Andric 
2705ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
2715ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
2725ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
2735ffd83dbSDimitry Andric 
2745ffd83dbSDimitry Andric namespace {
2755ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
2765ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
2775ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
2785ffd83dbSDimitry Andric 
279e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
2805ffd83dbSDimitry Andric   GISelKnownBits *KB;
2815ffd83dbSDimitry Andric   MachineDominatorTree *MDT;
2825ffd83dbSDimitry Andric 
2835ffd83dbSDimitry Andric public:
2845ffd83dbSDimitry Andric   AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
2855ffd83dbSDimitry Andric 
2865ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
2875ffd83dbSDimitry Andric                                   const AMDGPULegalizerInfo *LI,
2885ffd83dbSDimitry Andric                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
2895ffd83dbSDimitry Andric       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
2905ffd83dbSDimitry Andric                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
2915ffd83dbSDimitry Andric         KB(KB), MDT(MDT) {
2925ffd83dbSDimitry Andric     if (!GeneratedRuleCfg.parseCommandLineOption())
2935ffd83dbSDimitry Andric       report_fatal_error("Invalid rule identifier");
2945ffd83dbSDimitry Andric   }
2955ffd83dbSDimitry Andric 
2965ffd83dbSDimitry Andric   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
2975ffd83dbSDimitry Andric                MachineIRBuilder &B) const override;
2985ffd83dbSDimitry Andric };
2995ffd83dbSDimitry Andric 
3005ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
3015ffd83dbSDimitry Andric                                               MachineInstr &MI,
3025ffd83dbSDimitry Andric                                               MachineIRBuilder &B) const {
303e8d8bef9SDimitry Andric   CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
304e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
305e8d8bef9SDimitry Andric   AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
306e8d8bef9SDimitry Andric                                                  PostLegalizerHelper);
3075ffd83dbSDimitry Andric 
308e8d8bef9SDimitry Andric   if (Generated.tryCombineAll(Observer, MI, B))
3095ffd83dbSDimitry Andric     return true;
3105ffd83dbSDimitry Andric 
3115ffd83dbSDimitry Andric   switch (MI.getOpcode()) {
3125ffd83dbSDimitry Andric   case TargetOpcode::G_SHL:
3135ffd83dbSDimitry Andric   case TargetOpcode::G_LSHR:
3145ffd83dbSDimitry Andric   case TargetOpcode::G_ASHR:
3155ffd83dbSDimitry Andric     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
3165ffd83dbSDimitry Andric     // common case, splitting this into a move and a 32-bit shift is faster and
3175ffd83dbSDimitry Andric     // the same code size.
3185ffd83dbSDimitry Andric     return Helper.tryCombineShiftToUnmerge(MI, 32);
3195ffd83dbSDimitry Andric   }
3205ffd83dbSDimitry Andric 
3215ffd83dbSDimitry Andric   return false;
3225ffd83dbSDimitry Andric }
3235ffd83dbSDimitry Andric 
3245ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
3255ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
3265ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
3275ffd83dbSDimitry Andric 
3285ffd83dbSDimitry Andric // Pass boilerplate
3295ffd83dbSDimitry Andric // ================
3305ffd83dbSDimitry Andric 
3315ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
3325ffd83dbSDimitry Andric public:
3335ffd83dbSDimitry Andric   static char ID;
3345ffd83dbSDimitry Andric 
3355ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
3365ffd83dbSDimitry Andric 
3375ffd83dbSDimitry Andric   StringRef getPassName() const override {
3385ffd83dbSDimitry Andric     return "AMDGPUPostLegalizerCombiner";
3395ffd83dbSDimitry Andric   }
3405ffd83dbSDimitry Andric 
3415ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
3425ffd83dbSDimitry Andric 
3435ffd83dbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
3445ffd83dbSDimitry Andric private:
3455ffd83dbSDimitry Andric   bool IsOptNone;
3465ffd83dbSDimitry Andric };
3475ffd83dbSDimitry Andric } // end anonymous namespace
3485ffd83dbSDimitry Andric 
3495ffd83dbSDimitry Andric void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
3505ffd83dbSDimitry Andric   AU.addRequired<TargetPassConfig>();
3515ffd83dbSDimitry Andric   AU.setPreservesCFG();
3525ffd83dbSDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
3535ffd83dbSDimitry Andric   AU.addRequired<GISelKnownBitsAnalysis>();
3545ffd83dbSDimitry Andric   AU.addPreserved<GISelKnownBitsAnalysis>();
3555ffd83dbSDimitry Andric   if (!IsOptNone) {
3565ffd83dbSDimitry Andric     AU.addRequired<MachineDominatorTree>();
3575ffd83dbSDimitry Andric     AU.addPreserved<MachineDominatorTree>();
3585ffd83dbSDimitry Andric   }
3595ffd83dbSDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
3605ffd83dbSDimitry Andric }
3615ffd83dbSDimitry Andric 
3625ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
3635ffd83dbSDimitry Andric   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
3645ffd83dbSDimitry Andric   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
3655ffd83dbSDimitry Andric }
3665ffd83dbSDimitry Andric 
3675ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
3685ffd83dbSDimitry Andric   if (MF.getProperties().hasProperty(
3695ffd83dbSDimitry Andric           MachineFunctionProperties::Property::FailedISel))
3705ffd83dbSDimitry Andric     return false;
3715ffd83dbSDimitry Andric   auto *TPC = &getAnalysis<TargetPassConfig>();
3725ffd83dbSDimitry Andric   const Function &F = MF.getFunction();
3735ffd83dbSDimitry Andric   bool EnableOpt =
3745ffd83dbSDimitry Andric       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
3755ffd83dbSDimitry Andric 
3765ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3775ffd83dbSDimitry Andric   const AMDGPULegalizerInfo *LI
3785ffd83dbSDimitry Andric     = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
3795ffd83dbSDimitry Andric 
3805ffd83dbSDimitry Andric   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
3815ffd83dbSDimitry Andric   MachineDominatorTree *MDT =
3825ffd83dbSDimitry Andric       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
3835ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
3845ffd83dbSDimitry Andric                                          F.hasMinSize(), LI, KB, MDT);
3855ffd83dbSDimitry Andric   Combiner C(PCInfo, TPC);
3865ffd83dbSDimitry Andric   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
3875ffd83dbSDimitry Andric }
3885ffd83dbSDimitry Andric 
3895ffd83dbSDimitry Andric char AMDGPUPostLegalizerCombiner::ID = 0;
3905ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
3915ffd83dbSDimitry Andric                       "Combine AMDGPU machine instrs after legalization",
3925ffd83dbSDimitry Andric                       false, false)
3935ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
3945ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
3955ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
3965ffd83dbSDimitry Andric                     "Combine AMDGPU machine instrs after legalization", false,
3975ffd83dbSDimitry Andric                     false)
3985ffd83dbSDimitry Andric 
3995ffd83dbSDimitry Andric namespace llvm {
4005ffd83dbSDimitry Andric FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
4015ffd83dbSDimitry Andric   return new AMDGPUPostLegalizerCombiner(IsOptNone);
4025ffd83dbSDimitry Andric }
4035ffd83dbSDimitry Andric } // end namespace llvm
404