xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp (revision 4824e7fd18a1223177218d4aec1b3c6c5c4a444e)
15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level,
105ffd83dbSDimitry Andric // after the legalizer.
115ffd83dbSDimitry Andric //
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric 
14e8d8bef9SDimitry Andric #include "AMDGPU.h"
15349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h"
165ffd83dbSDimitry Andric #include "AMDGPULegalizerInfo.h"
17e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
18e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
195ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h"
205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
215ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
235ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
245ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
255ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
26*4824e7fdSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
27e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
285ffd83dbSDimitry Andric 
295ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
305ffd83dbSDimitry Andric 
315ffd83dbSDimitry Andric using namespace llvm;
325ffd83dbSDimitry Andric using namespace MIPatternMatch;
335ffd83dbSDimitry Andric 
34e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelper {
35e8d8bef9SDimitry Andric protected:
36e8d8bef9SDimitry Andric   MachineIRBuilder &B;
37e8d8bef9SDimitry Andric   MachineFunction &MF;
38e8d8bef9SDimitry Andric   MachineRegisterInfo &MRI;
39349cc55cSDimitry Andric   AMDGPUCombinerHelper &Helper;
40e8d8bef9SDimitry Andric 
41e8d8bef9SDimitry Andric public:
42349cc55cSDimitry Andric   AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B,
43349cc55cSDimitry Andric                                     AMDGPUCombinerHelper &Helper)
44e8d8bef9SDimitry Andric       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
45e8d8bef9SDimitry Andric 
465ffd83dbSDimitry Andric   struct FMinFMaxLegacyInfo {
475ffd83dbSDimitry Andric     Register LHS;
485ffd83dbSDimitry Andric     Register RHS;
495ffd83dbSDimitry Andric     Register True;
505ffd83dbSDimitry Andric     Register False;
515ffd83dbSDimitry Andric     CmpInst::Predicate Pred;
525ffd83dbSDimitry Andric   };
535ffd83dbSDimitry Andric 
545ffd83dbSDimitry Andric   // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
55e8d8bef9SDimitry Andric   bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
56e8d8bef9SDimitry Andric   void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
57e8d8bef9SDimitry Andric                                          const FMinFMaxLegacyInfo &Info);
58e8d8bef9SDimitry Andric 
59e8d8bef9SDimitry Andric   bool matchUCharToFloat(MachineInstr &MI);
60e8d8bef9SDimitry Andric   void applyUCharToFloat(MachineInstr &MI);
61e8d8bef9SDimitry Andric 
62*4824e7fdSDimitry Andric   bool matchRcpSqrtToRsq(MachineInstr &MI,
63*4824e7fdSDimitry Andric                          std::function<void(MachineIRBuilder &)> &MatchInfo);
64*4824e7fdSDimitry Andric 
65e8d8bef9SDimitry Andric   // FIXME: Should be able to have 2 separate matchdatas rather than custom
66e8d8bef9SDimitry Andric   // struct boilerplate.
67e8d8bef9SDimitry Andric   struct CvtF32UByteMatchInfo {
68e8d8bef9SDimitry Andric     Register CvtVal;
69e8d8bef9SDimitry Andric     unsigned ShiftOffset;
70e8d8bef9SDimitry Andric   };
71e8d8bef9SDimitry Andric 
72e8d8bef9SDimitry Andric   bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
73e8d8bef9SDimitry Andric   void applyCvtF32UByteN(MachineInstr &MI,
74e8d8bef9SDimitry Andric                          const CvtF32UByteMatchInfo &MatchInfo);
75fe6060f1SDimitry Andric 
76fe6060f1SDimitry Andric   bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg);
77e8d8bef9SDimitry Andric };
78e8d8bef9SDimitry Andric 
79e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
80e8d8bef9SDimitry Andric     MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
815ffd83dbSDimitry Andric   // FIXME: Combines should have subtarget predicates, and we shouldn't need
825ffd83dbSDimitry Andric   // this here.
835ffd83dbSDimitry Andric   if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
845ffd83dbSDimitry Andric     return false;
855ffd83dbSDimitry Andric 
865ffd83dbSDimitry Andric   // FIXME: Type predicate on pattern
875ffd83dbSDimitry Andric   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
885ffd83dbSDimitry Andric     return false;
895ffd83dbSDimitry Andric 
905ffd83dbSDimitry Andric   Register Cond = MI.getOperand(1).getReg();
915ffd83dbSDimitry Andric   if (!MRI.hasOneNonDBGUse(Cond) ||
925ffd83dbSDimitry Andric       !mi_match(Cond, MRI,
935ffd83dbSDimitry Andric                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
945ffd83dbSDimitry Andric     return false;
955ffd83dbSDimitry Andric 
965ffd83dbSDimitry Andric   Info.True = MI.getOperand(2).getReg();
975ffd83dbSDimitry Andric   Info.False = MI.getOperand(3).getReg();
985ffd83dbSDimitry Andric 
995ffd83dbSDimitry Andric   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
1005ffd83dbSDimitry Andric       !(Info.LHS == Info.False && Info.RHS == Info.True))
1015ffd83dbSDimitry Andric     return false;
1025ffd83dbSDimitry Andric 
1035ffd83dbSDimitry Andric   switch (Info.Pred) {
1045ffd83dbSDimitry Andric   case CmpInst::FCMP_FALSE:
1055ffd83dbSDimitry Andric   case CmpInst::FCMP_OEQ:
1065ffd83dbSDimitry Andric   case CmpInst::FCMP_ONE:
1075ffd83dbSDimitry Andric   case CmpInst::FCMP_ORD:
1085ffd83dbSDimitry Andric   case CmpInst::FCMP_UNO:
1095ffd83dbSDimitry Andric   case CmpInst::FCMP_UEQ:
1105ffd83dbSDimitry Andric   case CmpInst::FCMP_UNE:
1115ffd83dbSDimitry Andric   case CmpInst::FCMP_TRUE:
1125ffd83dbSDimitry Andric     return false;
1135ffd83dbSDimitry Andric   default:
1145ffd83dbSDimitry Andric     return true;
1155ffd83dbSDimitry Andric   }
1165ffd83dbSDimitry Andric }
1175ffd83dbSDimitry Andric 
118e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
119e8d8bef9SDimitry Andric     MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
120e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
121e8d8bef9SDimitry Andric   auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
122e8d8bef9SDimitry Andric     B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
1235ffd83dbSDimitry Andric   };
1245ffd83dbSDimitry Andric 
1255ffd83dbSDimitry Andric   switch (Info.Pred) {
1265ffd83dbSDimitry Andric   case CmpInst::FCMP_ULT:
1275ffd83dbSDimitry Andric   case CmpInst::FCMP_ULE:
1285ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1295ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
1305ffd83dbSDimitry Andric     else
1315ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
1325ffd83dbSDimitry Andric     break;
1335ffd83dbSDimitry Andric   case CmpInst::FCMP_OLE:
1345ffd83dbSDimitry Andric   case CmpInst::FCMP_OLT: {
1355ffd83dbSDimitry Andric     // We need to permute the operands to get the correct NaN behavior. The
1365ffd83dbSDimitry Andric     // selected operand is the second one based on the failing compare with NaN,
1375ffd83dbSDimitry Andric     // so permute it based on the compare type the hardware uses.
1385ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1395ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
1405ffd83dbSDimitry Andric     else
1415ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
1425ffd83dbSDimitry Andric     break;
1435ffd83dbSDimitry Andric   }
1445ffd83dbSDimitry Andric   case CmpInst::FCMP_UGE:
1455ffd83dbSDimitry Andric   case CmpInst::FCMP_UGT: {
1465ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1475ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
1485ffd83dbSDimitry Andric     else
1495ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
1505ffd83dbSDimitry Andric     break;
1515ffd83dbSDimitry Andric   }
1525ffd83dbSDimitry Andric   case CmpInst::FCMP_OGT:
1535ffd83dbSDimitry Andric   case CmpInst::FCMP_OGE: {
1545ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1555ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
1565ffd83dbSDimitry Andric     else
1575ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
1585ffd83dbSDimitry Andric     break;
1595ffd83dbSDimitry Andric   }
1605ffd83dbSDimitry Andric   default:
1615ffd83dbSDimitry Andric     llvm_unreachable("predicate should not have matched");
1625ffd83dbSDimitry Andric   }
1635ffd83dbSDimitry Andric 
1645ffd83dbSDimitry Andric   MI.eraseFromParent();
1655ffd83dbSDimitry Andric }
1665ffd83dbSDimitry Andric 
167e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
1685ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1695ffd83dbSDimitry Andric 
1705ffd83dbSDimitry Andric   // TODO: We could try to match extracting the higher bytes, which would be
1715ffd83dbSDimitry Andric   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
1725ffd83dbSDimitry Andric   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
1735ffd83dbSDimitry Andric   // about in practice.
1745ffd83dbSDimitry Andric   LLT Ty = MRI.getType(DstReg);
1755ffd83dbSDimitry Andric   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
1765ffd83dbSDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
1775ffd83dbSDimitry Andric     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
1785ffd83dbSDimitry Andric     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
1795ffd83dbSDimitry Andric     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
1805ffd83dbSDimitry Andric     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
1815ffd83dbSDimitry Andric   }
1825ffd83dbSDimitry Andric 
1835ffd83dbSDimitry Andric   return false;
1845ffd83dbSDimitry Andric }
1855ffd83dbSDimitry Andric 
186e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
187e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
1885ffd83dbSDimitry Andric 
1895ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
1905ffd83dbSDimitry Andric 
1915ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1925ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
193e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(DstReg);
194e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
1955ffd83dbSDimitry Andric   if (SrcTy != S32)
1965ffd83dbSDimitry Andric     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
1975ffd83dbSDimitry Andric 
1985ffd83dbSDimitry Andric   if (Ty == S32) {
1995ffd83dbSDimitry Andric     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
2005ffd83dbSDimitry Andric                    {SrcReg}, MI.getFlags());
2015ffd83dbSDimitry Andric   } else {
2025ffd83dbSDimitry Andric     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
2035ffd83dbSDimitry Andric                              {SrcReg}, MI.getFlags());
2045ffd83dbSDimitry Andric     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
2055ffd83dbSDimitry Andric   }
2065ffd83dbSDimitry Andric 
2075ffd83dbSDimitry Andric   MI.eraseFromParent();
2085ffd83dbSDimitry Andric }
2095ffd83dbSDimitry Andric 
210*4824e7fdSDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq(
211*4824e7fdSDimitry Andric     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
212*4824e7fdSDimitry Andric 
213*4824e7fdSDimitry Andric   auto getRcpSrc = [=](const MachineInstr &MI) {
214*4824e7fdSDimitry Andric     MachineInstr *ResMI = nullptr;
215*4824e7fdSDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
216*4824e7fdSDimitry Andric         MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
217*4824e7fdSDimitry Andric       ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
218*4824e7fdSDimitry Andric 
219*4824e7fdSDimitry Andric     return ResMI;
220*4824e7fdSDimitry Andric   };
221*4824e7fdSDimitry Andric 
222*4824e7fdSDimitry Andric   auto getSqrtSrc = [=](const MachineInstr &MI) {
223*4824e7fdSDimitry Andric     MachineInstr *SqrtSrcMI = nullptr;
224*4824e7fdSDimitry Andric     mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
225*4824e7fdSDimitry Andric     return SqrtSrcMI;
226*4824e7fdSDimitry Andric   };
227*4824e7fdSDimitry Andric 
228*4824e7fdSDimitry Andric   MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
229*4824e7fdSDimitry Andric   // rcp(sqrt(x))
230*4824e7fdSDimitry Andric   if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
231*4824e7fdSDimitry Andric     MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
232*4824e7fdSDimitry Andric       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
233*4824e7fdSDimitry Andric           .addUse(SqrtSrcMI->getOperand(0).getReg())
234*4824e7fdSDimitry Andric           .setMIFlags(MI.getFlags());
235*4824e7fdSDimitry Andric     };
236*4824e7fdSDimitry Andric     return true;
237*4824e7fdSDimitry Andric   }
238*4824e7fdSDimitry Andric 
239*4824e7fdSDimitry Andric   // sqrt(rcp(x))
240*4824e7fdSDimitry Andric   if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
241*4824e7fdSDimitry Andric     MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
242*4824e7fdSDimitry Andric       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
243*4824e7fdSDimitry Andric           .addUse(RcpSrcMI->getOperand(0).getReg())
244*4824e7fdSDimitry Andric           .setMIFlags(MI.getFlags());
245*4824e7fdSDimitry Andric     };
246*4824e7fdSDimitry Andric     return true;
247*4824e7fdSDimitry Andric   }
248*4824e7fdSDimitry Andric 
249*4824e7fdSDimitry Andric   return false;
250*4824e7fdSDimitry Andric }
251*4824e7fdSDimitry Andric 
252e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
253e8d8bef9SDimitry Andric     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
2545ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
2555ffd83dbSDimitry Andric 
2565ffd83dbSDimitry Andric   // Look through G_ZEXT.
2575ffd83dbSDimitry Andric   mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
2585ffd83dbSDimitry Andric 
2595ffd83dbSDimitry Andric   Register Src0;
2605ffd83dbSDimitry Andric   int64_t ShiftAmt;
2615ffd83dbSDimitry Andric   bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
2625ffd83dbSDimitry Andric   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
2635ffd83dbSDimitry Andric     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
2645ffd83dbSDimitry Andric 
2655ffd83dbSDimitry Andric     unsigned ShiftOffset = 8 * Offset;
2665ffd83dbSDimitry Andric     if (IsShr)
2675ffd83dbSDimitry Andric       ShiftOffset += ShiftAmt;
2685ffd83dbSDimitry Andric     else
2695ffd83dbSDimitry Andric       ShiftOffset -= ShiftAmt;
2705ffd83dbSDimitry Andric 
2715ffd83dbSDimitry Andric     MatchInfo.CvtVal = Src0;
2725ffd83dbSDimitry Andric     MatchInfo.ShiftOffset = ShiftOffset;
2735ffd83dbSDimitry Andric     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
2745ffd83dbSDimitry Andric   }
2755ffd83dbSDimitry Andric 
2765ffd83dbSDimitry Andric   // TODO: Simplify demanded bits.
2775ffd83dbSDimitry Andric   return false;
2785ffd83dbSDimitry Andric }
2795ffd83dbSDimitry Andric 
280e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
281e8d8bef9SDimitry Andric     MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
282e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
2835ffd83dbSDimitry Andric   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
2845ffd83dbSDimitry Andric 
2855ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
2865ffd83dbSDimitry Andric   Register CvtSrc = MatchInfo.CvtVal;
287e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
2885ffd83dbSDimitry Andric   if (SrcTy != S32) {
2895ffd83dbSDimitry Andric     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
2905ffd83dbSDimitry Andric     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
2915ffd83dbSDimitry Andric   }
2925ffd83dbSDimitry Andric 
2935ffd83dbSDimitry Andric   assert(MI.getOpcode() != NewOpc);
2945ffd83dbSDimitry Andric   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
2955ffd83dbSDimitry Andric   MI.eraseFromParent();
2965ffd83dbSDimitry Andric }
2975ffd83dbSDimitry Andric 
298fe6060f1SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize(
299fe6060f1SDimitry Andric     MachineInstr &MI, Register &Reg) {
300fe6060f1SDimitry Andric   const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
301fe6060f1SDimitry Andric       MF.getSubtarget().getTargetLowering());
302fe6060f1SDimitry Andric   Reg = MI.getOperand(1).getReg();
303fe6060f1SDimitry Andric   return TLI->isCanonicalized(Reg, MF);
304fe6060f1SDimitry Andric }
305fe6060f1SDimitry Andric 
306e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelperState {
307e8d8bef9SDimitry Andric protected:
308349cc55cSDimitry Andric   AMDGPUCombinerHelper &Helper;
309e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
310e8d8bef9SDimitry Andric 
311e8d8bef9SDimitry Andric public:
312e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelperState(
313349cc55cSDimitry Andric       AMDGPUCombinerHelper &Helper,
314e8d8bef9SDimitry Andric       AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
315e8d8bef9SDimitry Andric       : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
316e8d8bef9SDimitry Andric };
317e8d8bef9SDimitry Andric 
3185ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
3195ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
3205ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
3215ffd83dbSDimitry Andric 
3225ffd83dbSDimitry Andric namespace {
3235ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
3245ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
3255ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
3265ffd83dbSDimitry Andric 
327e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
3285ffd83dbSDimitry Andric   GISelKnownBits *KB;
3295ffd83dbSDimitry Andric   MachineDominatorTree *MDT;
3305ffd83dbSDimitry Andric 
3315ffd83dbSDimitry Andric public:
3325ffd83dbSDimitry Andric   AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
3335ffd83dbSDimitry Andric 
3345ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
3355ffd83dbSDimitry Andric                                   const AMDGPULegalizerInfo *LI,
3365ffd83dbSDimitry Andric                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
3375ffd83dbSDimitry Andric       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
3385ffd83dbSDimitry Andric                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
3395ffd83dbSDimitry Andric         KB(KB), MDT(MDT) {
3405ffd83dbSDimitry Andric     if (!GeneratedRuleCfg.parseCommandLineOption())
3415ffd83dbSDimitry Andric       report_fatal_error("Invalid rule identifier");
3425ffd83dbSDimitry Andric   }
3435ffd83dbSDimitry Andric 
3445ffd83dbSDimitry Andric   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
3455ffd83dbSDimitry Andric                MachineIRBuilder &B) const override;
3465ffd83dbSDimitry Andric };
3475ffd83dbSDimitry Andric 
3485ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
3495ffd83dbSDimitry Andric                                               MachineInstr &MI,
3505ffd83dbSDimitry Andric                                               MachineIRBuilder &B) const {
351349cc55cSDimitry Andric   AMDGPUCombinerHelper Helper(Observer, B, KB, MDT, LInfo);
352e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
353e8d8bef9SDimitry Andric   AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
354e8d8bef9SDimitry Andric                                                  PostLegalizerHelper);
3555ffd83dbSDimitry Andric 
356e8d8bef9SDimitry Andric   if (Generated.tryCombineAll(Observer, MI, B))
3575ffd83dbSDimitry Andric     return true;
3585ffd83dbSDimitry Andric 
3595ffd83dbSDimitry Andric   switch (MI.getOpcode()) {
3605ffd83dbSDimitry Andric   case TargetOpcode::G_SHL:
3615ffd83dbSDimitry Andric   case TargetOpcode::G_LSHR:
3625ffd83dbSDimitry Andric   case TargetOpcode::G_ASHR:
3635ffd83dbSDimitry Andric     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
3645ffd83dbSDimitry Andric     // common case, splitting this into a move and a 32-bit shift is faster and
3655ffd83dbSDimitry Andric     // the same code size.
3665ffd83dbSDimitry Andric     return Helper.tryCombineShiftToUnmerge(MI, 32);
3675ffd83dbSDimitry Andric   }
3685ffd83dbSDimitry Andric 
3695ffd83dbSDimitry Andric   return false;
3705ffd83dbSDimitry Andric }
3715ffd83dbSDimitry Andric 
3725ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
3735ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
3745ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
3755ffd83dbSDimitry Andric 
3765ffd83dbSDimitry Andric // Pass boilerplate
3775ffd83dbSDimitry Andric // ================
3785ffd83dbSDimitry Andric 
3795ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
3805ffd83dbSDimitry Andric public:
3815ffd83dbSDimitry Andric   static char ID;
3825ffd83dbSDimitry Andric 
3835ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
3845ffd83dbSDimitry Andric 
3855ffd83dbSDimitry Andric   StringRef getPassName() const override {
3865ffd83dbSDimitry Andric     return "AMDGPUPostLegalizerCombiner";
3875ffd83dbSDimitry Andric   }
3885ffd83dbSDimitry Andric 
3895ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
3905ffd83dbSDimitry Andric 
3915ffd83dbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
3925ffd83dbSDimitry Andric private:
3935ffd83dbSDimitry Andric   bool IsOptNone;
3945ffd83dbSDimitry Andric };
3955ffd83dbSDimitry Andric } // end anonymous namespace
3965ffd83dbSDimitry Andric 
3975ffd83dbSDimitry Andric void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
3985ffd83dbSDimitry Andric   AU.addRequired<TargetPassConfig>();
3995ffd83dbSDimitry Andric   AU.setPreservesCFG();
4005ffd83dbSDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
4015ffd83dbSDimitry Andric   AU.addRequired<GISelKnownBitsAnalysis>();
4025ffd83dbSDimitry Andric   AU.addPreserved<GISelKnownBitsAnalysis>();
4035ffd83dbSDimitry Andric   if (!IsOptNone) {
4045ffd83dbSDimitry Andric     AU.addRequired<MachineDominatorTree>();
4055ffd83dbSDimitry Andric     AU.addPreserved<MachineDominatorTree>();
4065ffd83dbSDimitry Andric   }
4075ffd83dbSDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
4085ffd83dbSDimitry Andric }
4095ffd83dbSDimitry Andric 
4105ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
4115ffd83dbSDimitry Andric   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
4125ffd83dbSDimitry Andric   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
4135ffd83dbSDimitry Andric }
4145ffd83dbSDimitry Andric 
4155ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
4165ffd83dbSDimitry Andric   if (MF.getProperties().hasProperty(
4175ffd83dbSDimitry Andric           MachineFunctionProperties::Property::FailedISel))
4185ffd83dbSDimitry Andric     return false;
4195ffd83dbSDimitry Andric   auto *TPC = &getAnalysis<TargetPassConfig>();
4205ffd83dbSDimitry Andric   const Function &F = MF.getFunction();
4215ffd83dbSDimitry Andric   bool EnableOpt =
4225ffd83dbSDimitry Andric       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
4235ffd83dbSDimitry Andric 
4245ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
4255ffd83dbSDimitry Andric   const AMDGPULegalizerInfo *LI
4265ffd83dbSDimitry Andric     = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
4275ffd83dbSDimitry Andric 
4285ffd83dbSDimitry Andric   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
4295ffd83dbSDimitry Andric   MachineDominatorTree *MDT =
4305ffd83dbSDimitry Andric       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
4315ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
4325ffd83dbSDimitry Andric                                          F.hasMinSize(), LI, KB, MDT);
4335ffd83dbSDimitry Andric   Combiner C(PCInfo, TPC);
4345ffd83dbSDimitry Andric   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
4355ffd83dbSDimitry Andric }
4365ffd83dbSDimitry Andric 
4375ffd83dbSDimitry Andric char AMDGPUPostLegalizerCombiner::ID = 0;
4385ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
4395ffd83dbSDimitry Andric                       "Combine AMDGPU machine instrs after legalization",
4405ffd83dbSDimitry Andric                       false, false)
4415ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
4425ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
4435ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
4445ffd83dbSDimitry Andric                     "Combine AMDGPU machine instrs after legalization", false,
4455ffd83dbSDimitry Andric                     false)
4465ffd83dbSDimitry Andric 
4475ffd83dbSDimitry Andric namespace llvm {
4485ffd83dbSDimitry Andric FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
4495ffd83dbSDimitry Andric   return new AMDGPUPostLegalizerCombiner(IsOptNone);
4505ffd83dbSDimitry Andric }
4515ffd83dbSDimitry Andric } // end namespace llvm
452